diff options
author | Lennart Poettering <lennart@poettering.net> | 2023-01-19 22:45:06 +0100 |
---|---|---|
committer | Lennart Poettering <lennart@poettering.net> | 2023-01-25 22:12:29 +0100 |
commit | ce92dc27a126a8076ee60913d5d2f43aaa2cd75b (patch) | |
tree | 5dacda2b6e87f0a373cbb7b70045d434710b5353 /src/libsystemd/sd-journal | |
parent | Merge pull request #26204 from poettering/journal-header-compoung-init (diff) | |
download | systemd-ce92dc27a126a8076ee60913d5d2f43aaa2cd75b.tar.xz systemd-ce92dc27a126a8076ee60913d5d2f43aaa2cd75b.zip |
journal-file: make strict order optional
This is a follow-up for 1d8d483f59ffa62974772fb58a8ef4abe88550ec and
makes the strict ordering by realtime clock within each journal file
optional, not mandatory. It then enables it for all journal files
written by journald, but leaves it off on others (for example those
written by journald-remote).
This relaxes the logic behind writing journal files to the status quo
ante for all cases where the journal files are not generated, but are
merged/processed/propagated. Typically when processing journal records
from many files ordering by realtime clock and monotonic clock are
contradictory, and cannot be universally guaranteed as the records are
interleaved. By enforcing strict rules we would thus end up generating
myriads of separate journal files, each with just a few records in them.
Hence, let's losen restrictions again, but continue to enforce them in
journald, i.e. when we original create the journal files locally.
Note that generally there's nothing really wring with having journal
files with non-monotonically ordered entries by realtime clock. Looking
for records will not be deterministic anymore, but that's inherent to a
realtime clock that jumps up and down. So you won't get the "only"
answer, but still *a* answer that is correct if you seek for a realtime
clock.
This also adds similar logic on the monotonic clock, which is also only
enabled when generating journal files locally. This should be harder to
trigger (as journald will generate the messages, and should run with a
stable boot id and monotonic clock), but let's better be safe than
sorry, and refuse on the lower layer what makes no sense, even if it's
unlikely the higher layer will ever generate records that aren't ordered
by their monotonic clock.
Diffstat (limited to 'src/libsystemd/sd-journal')
-rw-r--r-- | src/libsystemd/sd-journal/journal-file.c | 45 | ||||
-rw-r--r-- | src/libsystemd/sd-journal/journal-file.h | 7 |
2 files changed, 44 insertions, 8 deletions
diff --git a/src/libsystemd/sd-journal/journal-file.c b/src/libsystemd/sd-journal/journal-file.c index c517e31cc4..2bd5dd650d 100644 --- a/src/libsystemd/sd-journal/journal-file.c +++ b/src/libsystemd/sd-journal/journal-file.c @@ -334,7 +334,11 @@ static bool compact_mode_requested(void) { return true; } -static int journal_file_init_header(JournalFile *f, JournalFileFlags file_flags, JournalFile *template) { +static int journal_file_init_header( + JournalFile *f, + JournalFileFlags file_flags, + JournalFile *template) { + bool seal = false; ssize_t k; int r; @@ -2092,11 +2096,37 @@ static int journal_file_append_entry_internal( assert(ts); assert(items || n_items == 0); - if (ts->realtime < le64toh(f->header->tail_entry_realtime)) - return log_debug_errno(SYNTHETIC_ERRNO(EREMCHG), - "Realtime timestamp %" PRIu64 " smaller than previous realtime " - "timestamp %" PRIu64 ", refusing entry.", - ts->realtime, le64toh(f->header->tail_entry_realtime)); + if (f->strict_order) { + /* If requested be stricter with ordering in this journal file, to make searching via + * bisection fully deterministic. This is an optional feature, so that if desired journal + * files can be written where the ordering is not strictly enforced (in which case bisection + * will yield *a* result, but not the *only* result, when searching for points in + * time). Strict ordering mode is enabled when journald originally writes the files, but + * might not necessarily be if other tools (the remoting tools for example) write journal + * files from combined sources. + * + * Typically, if any of the errors generated here are seen journald will just rotate the + * journal files and start anew. */ + + if (ts->realtime < le64toh(f->header->tail_entry_realtime)) + return log_debug_errno(SYNTHETIC_ERRNO(EREMCHG), + "Realtime timestamp %" PRIu64 " smaller than previous realtime " + "timestamp %" PRIu64 ", refusing entry.", + ts->realtime, le64toh(f->header->tail_entry_realtime)); + + if (!sd_id128_is_null(f->header->boot_id) && boot_id) { + + if (!sd_id128_equal(f->header->boot_id, *boot_id)) + return log_debug_errno(SYNTHETIC_ERRNO(EREMOTE), + "Boot ID to write is different from previous boot id, refusing entry."); + + if (ts->monotonic < le64toh(f->header->tail_entry_monotonic)) + return log_debug_errno(SYNTHETIC_ERRNO(ENOTNAM), + "Monotonic timestamp %" PRIu64 " smaller than previous monotonic " + "timestamp %" PRIu64 ", refusing entry.", + ts->monotonic, le64toh(f->header->tail_entry_monotonic)); + } + } osize = offsetof(Object, entry.items) + (n_items * journal_file_entry_item_size(f)); @@ -3710,6 +3740,8 @@ int journal_file_open( int r; assert(fd >= 0 || fname); + assert(file_flags >= 0); + assert(file_flags <= _JOURNAL_FILE_FLAGS_MAX); assert(mmap_cache); assert(ret); @@ -3733,6 +3765,7 @@ int journal_file_open( .compress_threshold_bytes = compress_threshold_bytes == UINT64_MAX ? DEFAULT_COMPRESS_THRESHOLD : MAX(MIN_COMPRESS_THRESHOLD, compress_threshold_bytes), + .strict_order = FLAGS_SET(file_flags, JOURNAL_STRICT_ORDER), }; if (fname) { diff --git a/src/libsystemd/sd-journal/journal-file.h b/src/libsystemd/sd-journal/journal-file.h index a35aa5daef..8c809ed4b9 100644 --- a/src/libsystemd/sd-journal/journal-file.h +++ b/src/libsystemd/sd-journal/journal-file.h @@ -67,6 +67,7 @@ typedef struct JournalFile { int open_flags; bool close_fd:1; bool archive:1; + bool strict_order:1; direction_t last_direction; LocationType location_type; @@ -123,8 +124,10 @@ typedef struct JournalFile { } JournalFile; typedef enum JournalFileFlags { - JOURNAL_COMPRESS = 1 << 0, - JOURNAL_SEAL = 1 << 1, + JOURNAL_COMPRESS = 1 << 0, + JOURNAL_SEAL = 1 << 1, + JOURNAL_STRICT_ORDER = 1 << 2, + _JOURNAL_FILE_FLAGS_MAX = JOURNAL_COMPRESS|JOURNAL_SEAL|JOURNAL_STRICT_ORDER, } JournalFileFlags; typedef struct { |