journal: use a different hash function for each journal file

This adds a new (incompatible) feature to journal files: if enabled the hash function used for the hash tables is no longer jenkins hash with a zero key, but siphash keyed by the file uuid that is included in the file header anyway. This should make our hash tables more robust against collision attacks, as long as the attacker has no read access to the journal files. We switch from jenkins to siphash simply because it's more well-known and we standardize for the rest of our codebase onto it. This is hardening in order to make collision attacks harder for clients that can forge log messages but have no read access to the logs. It has no effect on clients that have read access.
author: Lennart Poettering <lennart@poettering.net> 2020-05-30 00:00:50 +0200
committer: Lennart Poettering <lennart@poettering.net> 2020-06-25 15:01:45 +0200
commit: 4ce534f4cd950cb22abacdfd4d967b41b890a706 (patch)
tree: 0da9f2465c5b18ce909919724f9a92a466b0d6c0 /src/journal
parent: journal: make signature arrays const (diff)
download: systemd-4ce534f4cd950cb22abacdfd4d967b41b890a706.tar.xz
systemd-4ce534f4cd950cb22abacdfd4d967b41b890a706.zip
7 files changed, 128 insertions, 44 deletions
diff --git a/src/journal/journal-def.h b/src/journal/journal-def.h
index 54260c97b0..431f46bb59 100644
--- a/src/journal/journal-def.h
+++ b/src/journal/journal-def.h
@@ -147,18 +147,22 @@ enum {
 enum {
         HEADER_INCOMPATIBLE_COMPRESSED_XZ   = 1 << 0,
         HEADER_INCOMPATIBLE_COMPRESSED_LZ4  = 1 << 1,
+        HEADER_INCOMPATIBLE_KEYED_HASH      = 1 << 2,
 };
 
-#define HEADER_INCOMPATIBLE_ANY (HEADER_INCOMPATIBLE_COMPRESSED_XZ|HEADER_INCOMPATIBLE_COMPRESSED_LZ4)
+#define HEADER_INCOMPATIBLE_ANY              \
+        (HEADER_INCOMPATIBLE_COMPRESSED_XZ|  \
+         HEADER_INCOMPATIBLE_COMPRESSED_LZ4| \
+         HEADER_INCOMPATIBLE_KEYED_HASH)
 
 #if HAVE_XZ && HAVE_LZ4
 #  define HEADER_INCOMPATIBLE_SUPPORTED HEADER_INCOMPATIBLE_ANY
 #elif HAVE_XZ
-#  define HEADER_INCOMPATIBLE_SUPPORTED HEADER_INCOMPATIBLE_COMPRESSED_XZ
+#  define HEADER_INCOMPATIBLE_SUPPORTED (HEADER_INCOMPATIBLE_COMPRESSED_XZ|HEADER_INCOMPATIBLE_KEYED_HASH)
 #elif HAVE_LZ4
-#  define HEADER_INCOMPATIBLE_SUPPORTED HEADER_INCOMPATIBLE_COMPRESSED_LZ4
+#  define HEADER_INCOMPATIBLE_SUPPORTED (HEADER_INCOMPATIBLE_COMPRESSED_LZ4|HEADER_INCOMPATIBLE_KEYED_HASH)
 #else
-#  define HEADER_INCOMPATIBLE_SUPPORTED 0
+#  define HEADER_INCOMPATIBLE_SUPPORTED HEADER_INCOMPATIBLE_KEYED_HASH
 #endif
 
 enum {
diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c
index c77a9436e6..8ae966a6b2 100644
--- a/src/journal/journal-file.c
+++ b/src/journal/journal-file.c
@@ -16,6 +16,7 @@
 #include "btrfs-util.h"
 #include "chattr-util.h"
 #include "compress.h"
+#include "env-util.h"
 #include "fd-util.h"
 #include "format-util.h"
 #include "fs-util.h"
@@ -419,7 +420,8 @@ static int journal_file_init_header(JournalFile *f, JournalFile *template) {
 
         h.incompatible_flags |= htole32(
                 f->compress_xz * HEADER_INCOMPATIBLE_COMPRESSED_XZ |
-                f->compress_lz4 * HEADER_INCOMPATIBLE_COMPRESSED_LZ4);
+                f->compress_lz4 * HEADER_INCOMPATIBLE_COMPRESSED_LZ4 |
+                f->keyed_hash * HEADER_INCOMPATIBLE_KEYED_HASH);
 
         h.compatible_flags = htole32(
                 f->seal * HEADER_COMPATIBLE_SEALED);
@@ -486,16 +488,21 @@ static bool warn_wrong_flags(const JournalFile *f, bool compatible) {
                                   f->path, type, flags & ~any);
                 flags = (flags & any) & ~supported;
                 if (flags) {
-                        const char* strv[3];
+                        const char* strv[4];
                         unsigned n = 0;
                         _cleanup_free_ char *t = NULL;
 
-                        if (compatible && (flags & HEADER_COMPATIBLE_SEALED))
-                                strv[n++] = "sealed";
-                        if (!compatible && (flags & HEADER_INCOMPATIBLE_COMPRESSED_XZ))
-                                strv[n++] = "xz-compressed";
-                        if (!compatible && (flags & HEADER_INCOMPATIBLE_COMPRESSED_LZ4))
-                                strv[n++] = "lz4-compressed";
+                        if (compatible) {
+                                if (flags & HEADER_COMPATIBLE_SEALED)
+                                        strv[n++] = "sealed";
+                        } else {
+                                if (flags & HEADER_INCOMPATIBLE_COMPRESSED_XZ)
+                                        strv[n++] = "xz-compressed";
+                                if (flags & HEADER_INCOMPATIBLE_COMPRESSED_LZ4)
+                                        strv[n++] = "lz4-compressed";
+                                if (flags & HEADER_INCOMPATIBLE_KEYED_HASH)
+                                        strv[n++] = "keyed-hash";
+                        }
                         strv[n] = NULL;
                         assert(n < ELEMENTSOF(strv));
 
@@ -595,6 +602,8 @@ static int journal_file_verify_header(JournalFile *f) {
 
         f->seal = JOURNAL_HEADER_SEALED(f->header);
 
+        f->keyed_hash = JOURNAL_HEADER_KEYED_HASH(f->header);
+
         return 0;
 }
 
@@ -1334,21 +1343,35 @@ int journal_file_find_field_object_with_hash(
         return 0;
 }
 
+uint64_t journal_file_hash_data(
+                JournalFile *f,
+                const void *data,
+                size_t sz) {
+
+        assert(f);
+        assert(data || sz == 0);
+
+        /* We try to unify our codebase on siphash, hence new-styled journal files utilizing the keyed hash
+         * function use siphash. Old journal files use the Jenkins hash. */
+
+        if (JOURNAL_HEADER_KEYED_HASH(f->header))
+                return siphash24(data, sz, f->header->file_id.bytes);
+
+        return jenkins_hash64(data, sz);
+}
+
 int journal_file_find_field_object(
                 JournalFile *f,
                 const void *field, uint64_t size,
                 Object **ret, uint64_t *ret_offset) {
 
-        uint64_t hash;
-
         assert(f);
         assert(field && size > 0);
 
-        hash = jenkins_hash64(field, size);
-
         return journal_file_find_field_object_with_hash(
                         f,
-                        field, size, hash,
+                        field, size,
+                        journal_file_hash_data(f, field, size),
                         ret, ret_offset);
 }
 
@@ -1446,16 +1469,13 @@ int journal_file_find_data_object(
                 const void *data, uint64_t size,
                 Object **ret, uint64_t *ret_offset) {
 
-        uint64_t hash;
-
         assert(f);
         assert(data || size == 0);
 
-        hash = jenkins_hash64(data, size);
-
         return journal_file_find_data_object_with_hash(
                         f,
-                        data, size, hash,
+                        data, size,
+                        journal_file_hash_data(f, data, size),
                         ret, ret_offset);
 }
 
@@ -1472,7 +1492,7 @@ static int journal_file_append_field(
         assert(f);
         assert(field && size > 0);
 
-        hash = jenkins_hash64(field, size);
+        hash = journal_file_hash_data(f, field, size);
 
         r = journal_file_find_field_object_with_hash(f, field, size, hash, &o, &p);
         if (r < 0)
@@ -1535,7 +1555,7 @@ static int journal_file_append_data(
         assert(f);
         assert(data || size == 0);
 
-        hash = jenkins_hash64(data, size);
+        hash = journal_file_hash_data(f, data, size);
 
         r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
         if (r < 0)
@@ -2028,7 +2048,20 @@ int journal_file_append_entry(
                 if (r < 0)
                         return r;
 
-                xor_hash ^= le64toh(o->data.hash);
+                /* When calculating the XOR hash field, we need to take special care if the "keyed-hash"
+                 * journal file flag is on. We use the XOR hash field to quickly determine the identity of a
+                 * specific record, and give records with otherwise identical position (i.e. match in seqno,
+                 * timestamp, …) a stable ordering. But for that we can't have it that the hash of the
+                 * objects in each file is different since they are keyed. Hence let's calculate the Jenkins
+                 * hash here for that. This also has the benefit that cursors for old and new journal files
+                 * are completely identical (they include the XOR hash after all). For classic Jenkins-hash
+                 * files things are easier, we can just take the value from the stored record directly. */
+
+                if (JOURNAL_HEADER_KEYED_HASH(f->header))
+                        xor_hash ^= jenkins_hash64(iovec[i].iov_base, iovec[i].iov_len);
+                else
+                        xor_hash ^= le64toh(o->data.hash);
+
                 items[i].object_offset = htole64(p);
                 items[i].hash = o->data.hash;
         }
@@ -3149,7 +3182,7 @@ void journal_file_print_header(JournalFile *f) {
                "Sequential number ID: %s\n"
                "State: %s\n"
                "Compatible flags:%s%s\n"
-               "Incompatible flags:%s%s%s\n"
+               "Incompatible flags:%s%s%s%s\n"
                "Header size: %"PRIu64"\n"
                "Arena size: %"PRIu64"\n"
                "Data hash table size: %"PRIu64"\n"
@@ -3174,6 +3207,7 @@ void journal_file_print_header(JournalFile *f) {
                (le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_ANY) ? " ???" : "",
                JOURNAL_HEADER_COMPRESSED_XZ(f->header) ? " COMPRESSED-XZ" : "",
                JOURNAL_HEADER_COMPRESSED_LZ4(f->header) ? " COMPRESSED-LZ4" : "",
+               JOURNAL_HEADER_KEYED_HASH(f->header) ? " KEYED-HASH" : "",
                (le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_ANY) ? " ???" : "",
                le64toh(f->header->header_size),
                le64toh(f->header->arena_size),
@@ -3299,19 +3333,31 @@ int journal_file_open(
 #endif
         };
 
+        /* We turn on keyed hashes by default, but provide an environment variable to turn them off, if
+         * people really want that */
+        r = getenv_bool("SYSTEMD_JOURNAL_KEYED_HASH");
+        if (r < 0) {
+                if (r != -ENXIO)
+                        log_debug_errno(r, "Failed to parse $SYSTEMD_JOURNAL_KEYED_HASH environment variable, ignoring.");
+                f->keyed_hash = true;
+        } else
+                f->keyed_hash = r;
+
         if (DEBUG_LOGGING) {
-                static int last_seal = -1, last_compress = -1;
+                static int last_seal = -1, last_compress = -1, last_keyed_hash = -1;
                 static uint64_t last_bytes = UINT64_MAX;
                 char bytes[FORMAT_BYTES_MAX];
 
                 if (last_seal != f->seal ||
+                    last_keyed_hash != f->keyed_hash ||
                     last_compress != JOURNAL_FILE_COMPRESS(f) ||
                     last_bytes != f->compress_threshold_bytes) {
 
-                        log_debug("Journal effective settings seal=%s compress=%s compress_threshold_bytes=%s",
-                                  yes_no(f->seal), yes_no(JOURNAL_FILE_COMPRESS(f)),
+                        log_debug("Journal effective settings seal=%s keyed_hash=%s compress=%s compress_threshold_bytes=%s",
+                                  yes_no(f->seal), yes_no(f->keyed_hash), yes_no(JOURNAL_FILE_COMPRESS(f)),
                                   format_bytes(bytes, sizeof bytes, f->compress_threshold_bytes));
                         last_seal = f->seal;
+                        last_keyed_hash = f->keyed_hash;
                         last_compress = JOURNAL_FILE_COMPRESS(f);
                         last_bytes = f->compress_threshold_bytes;
                 }
@@ -3769,7 +3815,11 @@ int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint6
                 if (r < 0)
                         return r;
 
-                xor_hash ^= le64toh(u->data.hash);
+                if (JOURNAL_HEADER_KEYED_HASH(to->header))
+                        xor_hash ^= jenkins_hash64(data, l);
+                else
+                        xor_hash ^= le64toh(u->data.hash);
+
                 items[i].object_offset = htole64(h);
                 items[i].hash = u->data.hash;
 
diff --git a/src/journal/journal-file.h b/src/journal/journal-file.h
index 121e9153a6..732c2f31cd 100644
--- a/src/journal/journal-file.h
+++ b/src/journal/journal-file.h
@@ -71,6 +71,7 @@ typedef struct JournalFile {
         bool defrag_on_close:1;
         bool close_fd:1;
         bool archive:1;
+        bool keyed_hash:1;
 
         direction_t last_direction;
         LocationType location_type;
@@ -195,6 +196,9 @@ static inline bool VALID_EPOCH(uint64_t u) {
 #define JOURNAL_HEADER_COMPRESSED_LZ4(h) \
         FLAGS_SET(le32toh((h)->incompatible_flags), HEADER_INCOMPATIBLE_COMPRESSED_LZ4)
 
+#define JOURNAL_HEADER_KEYED_HASH(h) \
+        FLAGS_SET(le32toh((h)->incompatible_flags), HEADER_INCOMPATIBLE_KEYED_HASH)
+
 int journal_file_move_to_object(JournalFile *f, ObjectType type, uint64_t offset, Object **ret);
 
 uint64_t journal_file_entry_n_items(Object *o) _pure_;
@@ -262,3 +266,5 @@ static inline bool JOURNAL_FILE_COMPRESS(JournalFile *f) {
         assert(f);
         return f->compress_xz || f->compress_lz4;
 }
+
+uint64_t journal_file_hash_data(JournalFile *f, const void *data, size_t sz);
diff --git a/src/journal/journal-internal.h b/src/journal/journal-internal.h
index 028f0d9055..a649acf634 100644
--- a/src/journal/journal-internal.h
+++ b/src/journal/journal-internal.h
@@ -32,7 +32,7 @@ struct Match {
         /* For concrete matches */
         char *data;
         size_t size;
-        uint64_t hash;
+        uint64_t hash; /* old-style jenkins hash. New-style siphash is different per file, hence won't be cached here */
 
         /* For terms */
         LIST_HEAD(Match, matches);
diff --git a/src/journal/journal-verify.c b/src/journal/journal-verify.c
index c70ab7aa24..fe9997bc14 100644
--- a/src/journal/journal-verify.c
+++ b/src/journal/journal-verify.c
@@ -163,9 +163,9 @@ static int journal_file_object_verify(JournalFile *f, uint64_t offset, Object *o
                                 return r;
                         }
 
-                        h2 = jenkins_hash64(b, b_size);
+                        h2 = journal_file_hash_data(f, b, b_size);
                 } else
-                        h2 = jenkins_hash64(o->data.payload, le64toh(o->object.size) - offsetof(Object, data.payload));
+                        h2 = journal_file_hash_data(f, o->data.payload, le64toh(o->object.size) - offsetof(Object, data.payload));
 
                 if (h1 != h2) {
                         error(offset, "Invalid hash (%08"PRIx64" vs. %08"PRIx64, h1, h2);
diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c
index 5ddca5f93a..515bb82621 100644
--- a/src/journal/sd-journal.c
+++ b/src/journal/sd-journal.c
@@ -279,6 +279,8 @@ _public_ int sd_journal_add_match(sd_journal *j, const void *data, size_t size)
         assert(j->level1->type == MATCH_OR_TERM);
         assert(j->level2->type == MATCH_AND_TERM);
 
+        /* Old-style Jenkins (unkeyed) hashing only here. We do not cover new-style siphash (keyed) hashing
+         * here, since it's different for each file, and thus can't be pre-calculated in the Match object. */
         hash = jenkins_hash64(data, size);
 
         LIST_FOREACH(matches, l3, j->level2->matches) {
@@ -501,9 +503,16 @@ static int next_for_match(
         assert(f);
 
         if (m->type == MATCH_DISCRETE) {
-                uint64_t dp;
+                uint64_t dp, hash;
 
-                r = journal_file_find_data_object_with_hash(f, m->data, m->size, m->hash, NULL, &dp);
+                /* If the keyed hash logic is used, we need to calculate the hash fresh per file. Otherwise
+                 * we can use what we pre-calculated. */
+                if (JOURNAL_HEADER_KEYED_HASH(f->header))
+                        hash = journal_file_hash_data(f, m->data, m->size);
+                else
+                        hash = m->hash;
+
+                r = journal_file_find_data_object_with_hash(f, m->data, m->size, hash, NULL, &dp);
                 if (r <= 0)
                         return r;
 
@@ -590,9 +599,14 @@ static int find_location_for_match(
         assert(f);
 
         if (m->type == MATCH_DISCRETE) {
-                uint64_t dp;
+                uint64_t dp, hash;
+
+                if (JOURNAL_HEADER_KEYED_HASH(f->header))
+                        hash = journal_file_hash_data(f, m->data, m->size);
+                else
+                        hash = m->hash;
 
-                r = journal_file_find_data_object_with_hash(f, m->data, m->size, m->hash, NULL, &dp);
+                r = journal_file_find_data_object_with_hash(f, m->data, m->size, hash, NULL, &dp);
                 if (r <= 0)
                         return r;
 
diff --git a/src/journal/test-journal-stream.c b/src/journal/test-journal-stream.c
index 6d97bc5ce8..50aab11c6a 100644
--- a/src/journal/test-journal-stream.c
+++ b/src/journal/test-journal-stream.c
@@ -58,7 +58,7 @@ static void verify_contents(sd_journal *j, unsigned skip) {
                 assert_se(i == N_ENTRIES);
 }
 
-int main(int argc, char *argv[]) {
+static void run_test(void) {
         JournalFile *one, *two, *three;
         char t[] = "/var/tmp/journal-stream-XXXXXX";
         unsigned i;
@@ -68,12 +68,6 @@ int main(int argc, char *argv[]) {
         size_t l;
         dual_timestamp previous_ts = DUAL_TIMESTAMP_NULL;
 
-        /* journal_file_open requires a valid machine id */
-        if (access("/etc/machine-id", F_OK) != 0)
-                return log_tests_skipped("/etc/machine-id not found");
-
-        test_setup_logging(LOG_DEBUG);
-
         assert_se(mkdtemp(t));
         assert_se(chdir(t) >= 0);
         (void) chattr_path(t, FS_NOCOW_FL, FS_NOCOW_FL, NULL);
@@ -177,6 +171,22 @@ int main(int argc, char *argv[]) {
                 printf("%.*s\n", (int) l, (const char*) data);
 
         assert_se(rm_rf(t, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0);
+}
+
+int main(int argc, char *argv[]) {
+
+        /* journal_file_open requires a valid machine id */
+        if (access("/etc/machine-id", F_OK) != 0)
+                return log_tests_skipped("/etc/machine-id not found");
+
+        test_setup_logging(LOG_DEBUG);
+
+        /* Run this test twice. Once with old hashing and once with new hashing */
+        assert_se(setenv("SYSTEMD_JOURNAL_KEYED_HASH", "1", 1) >= 0);
+        run_test();
+
+        assert_se(setenv("SYSTEMD_JOURNAL_KEYED_HASH", "0", 1) >= 0);
+        run_test();
 
         return 0;
 }
author	Lennart Poettering <lennart@poettering.net>	2020-05-30 00:00:50 +0200
committer	Lennart Poettering <lennart@poettering.net>	2020-06-25 15:01:45 +0200
commit	4ce534f4cd950cb22abacdfd4d967b41b890a706 (patch)
tree	0da9f2465c5b18ce909919724f9a92a466b0d6c0 /src/journal
parent	journal: make signature arrays const (diff)
download	systemd-4ce534f4cd950cb22abacdfd4d967b41b890a706.tar.xz systemd-4ce534f4cd950cb22abacdfd4d967b41b890a706.zip