summaryrefslogtreecommitdiffstats
path: root/src/libsystemd/sd-journal
diff options
context:
space:
mode:
authorLennart Poettering <lennart@poettering.net>2023-01-19 22:45:06 +0100
committerLennart Poettering <lennart@poettering.net>2023-01-25 22:12:29 +0100
commitce92dc27a126a8076ee60913d5d2f43aaa2cd75b (patch)
tree5dacda2b6e87f0a373cbb7b70045d434710b5353 /src/libsystemd/sd-journal
parentMerge pull request #26204 from poettering/journal-header-compoung-init (diff)
downloadsystemd-ce92dc27a126a8076ee60913d5d2f43aaa2cd75b.tar.xz
systemd-ce92dc27a126a8076ee60913d5d2f43aaa2cd75b.zip
journal-file: make strict order optional
This is a follow-up for 1d8d483f59ffa62974772fb58a8ef4abe88550ec and makes the strict ordering by realtime clock within each journal file optional, not mandatory. It then enables it for all journal files written by journald, but leaves it off on others (for example those written by journald-remote). This relaxes the logic behind writing journal files to the status quo ante for all cases where the journal files are not generated, but are merged/processed/propagated. Typically when processing journal records from many files ordering by realtime clock and monotonic clock are contradictory, and cannot be universally guaranteed as the records are interleaved. By enforcing strict rules we would thus end up generating myriads of separate journal files, each with just a few records in them. Hence, let's losen restrictions again, but continue to enforce them in journald, i.e. when we original create the journal files locally. Note that generally there's nothing really wring with having journal files with non-monotonically ordered entries by realtime clock. Looking for records will not be deterministic anymore, but that's inherent to a realtime clock that jumps up and down. So you won't get the "only" answer, but still *a* answer that is correct if you seek for a realtime clock. This also adds similar logic on the monotonic clock, which is also only enabled when generating journal files locally. This should be harder to trigger (as journald will generate the messages, and should run with a stable boot id and monotonic clock), but let's better be safe than sorry, and refuse on the lower layer what makes no sense, even if it's unlikely the higher layer will ever generate records that aren't ordered by their monotonic clock.
Diffstat (limited to 'src/libsystemd/sd-journal')
-rw-r--r--src/libsystemd/sd-journal/journal-file.c45
-rw-r--r--src/libsystemd/sd-journal/journal-file.h7
2 files changed, 44 insertions, 8 deletions
diff --git a/src/libsystemd/sd-journal/journal-file.c b/src/libsystemd/sd-journal/journal-file.c
index c517e31cc4..2bd5dd650d 100644
--- a/src/libsystemd/sd-journal/journal-file.c
+++ b/src/libsystemd/sd-journal/journal-file.c
@@ -334,7 +334,11 @@ static bool compact_mode_requested(void) {
return true;
}
-static int journal_file_init_header(JournalFile *f, JournalFileFlags file_flags, JournalFile *template) {
+static int journal_file_init_header(
+ JournalFile *f,
+ JournalFileFlags file_flags,
+ JournalFile *template) {
+
bool seal = false;
ssize_t k;
int r;
@@ -2092,11 +2096,37 @@ static int journal_file_append_entry_internal(
assert(ts);
assert(items || n_items == 0);
- if (ts->realtime < le64toh(f->header->tail_entry_realtime))
- return log_debug_errno(SYNTHETIC_ERRNO(EREMCHG),
- "Realtime timestamp %" PRIu64 " smaller than previous realtime "
- "timestamp %" PRIu64 ", refusing entry.",
- ts->realtime, le64toh(f->header->tail_entry_realtime));
+ if (f->strict_order) {
+ /* If requested be stricter with ordering in this journal file, to make searching via
+ * bisection fully deterministic. This is an optional feature, so that if desired journal
+ * files can be written where the ordering is not strictly enforced (in which case bisection
+ * will yield *a* result, but not the *only* result, when searching for points in
+ * time). Strict ordering mode is enabled when journald originally writes the files, but
+ * might not necessarily be if other tools (the remoting tools for example) write journal
+ * files from combined sources.
+ *
+ * Typically, if any of the errors generated here are seen journald will just rotate the
+ * journal files and start anew. */
+
+ if (ts->realtime < le64toh(f->header->tail_entry_realtime))
+ return log_debug_errno(SYNTHETIC_ERRNO(EREMCHG),
+ "Realtime timestamp %" PRIu64 " smaller than previous realtime "
+ "timestamp %" PRIu64 ", refusing entry.",
+ ts->realtime, le64toh(f->header->tail_entry_realtime));
+
+ if (!sd_id128_is_null(f->header->boot_id) && boot_id) {
+
+ if (!sd_id128_equal(f->header->boot_id, *boot_id))
+ return log_debug_errno(SYNTHETIC_ERRNO(EREMOTE),
+ "Boot ID to write is different from previous boot id, refusing entry.");
+
+ if (ts->monotonic < le64toh(f->header->tail_entry_monotonic))
+ return log_debug_errno(SYNTHETIC_ERRNO(ENOTNAM),
+ "Monotonic timestamp %" PRIu64 " smaller than previous monotonic "
+ "timestamp %" PRIu64 ", refusing entry.",
+ ts->monotonic, le64toh(f->header->tail_entry_monotonic));
+ }
+ }
osize = offsetof(Object, entry.items) + (n_items * journal_file_entry_item_size(f));
@@ -3710,6 +3740,8 @@ int journal_file_open(
int r;
assert(fd >= 0 || fname);
+ assert(file_flags >= 0);
+ assert(file_flags <= _JOURNAL_FILE_FLAGS_MAX);
assert(mmap_cache);
assert(ret);
@@ -3733,6 +3765,7 @@ int journal_file_open(
.compress_threshold_bytes = compress_threshold_bytes == UINT64_MAX ?
DEFAULT_COMPRESS_THRESHOLD :
MAX(MIN_COMPRESS_THRESHOLD, compress_threshold_bytes),
+ .strict_order = FLAGS_SET(file_flags, JOURNAL_STRICT_ORDER),
};
if (fname) {
diff --git a/src/libsystemd/sd-journal/journal-file.h b/src/libsystemd/sd-journal/journal-file.h
index a35aa5daef..8c809ed4b9 100644
--- a/src/libsystemd/sd-journal/journal-file.h
+++ b/src/libsystemd/sd-journal/journal-file.h
@@ -67,6 +67,7 @@ typedef struct JournalFile {
int open_flags;
bool close_fd:1;
bool archive:1;
+ bool strict_order:1;
direction_t last_direction;
LocationType location_type;
@@ -123,8 +124,10 @@ typedef struct JournalFile {
} JournalFile;
typedef enum JournalFileFlags {
- JOURNAL_COMPRESS = 1 << 0,
- JOURNAL_SEAL = 1 << 1,
+ JOURNAL_COMPRESS = 1 << 0,
+ JOURNAL_SEAL = 1 << 1,
+ JOURNAL_STRICT_ORDER = 1 << 2,
+ _JOURNAL_FILE_FLAGS_MAX = JOURNAL_COMPRESS|JOURNAL_SEAL|JOURNAL_STRICT_ORDER,
} JournalFileFlags;
typedef struct {