From 9030aaf9bf0a1eee47a154c316c789e959638b0f Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 6 Oct 2009 11:31:15 -0700 Subject: ceph: Kconfig, Makefile Kconfig options and Makefile. Signed-off-by: Sage Weil --- fs/ceph/Makefile | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 fs/ceph/Makefile (limited to 'fs/ceph/Makefile') diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile new file mode 100644 index 000000000000..7da6d69dba29 --- /dev/null +++ b/fs/ceph/Makefile @@ -0,0 +1,36 @@ +# +# Makefile for CEPH filesystem. +# + +ifneq ($(KERNELRELEASE),) + +obj-$(CONFIG_CEPH_FS) += ceph.o + +ceph-objs := super.o inode.o dir.o file.o addr.o ioctl.o \ + export.o caps.o snap.o xattr.o \ + messenger.o msgpool.o buffer.o \ + mds_client.o mdsmap.o \ + mon_client.o \ + osd_client.o osdmap.o crush/crush.o crush/mapper.o \ + debugfs.o \ + ceph_fs.o ceph_strings.o ceph_frag.o + +else +#Otherwise we were called directly from the command +# line; invoke the kernel build system. + +KERNELDIR ?= /lib/modules/$(shell uname -r)/build +PWD := $(shell pwd) + +default: all + +all: + $(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_FS=m modules + +modules_install: + $(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_FS=m modules_install + +clean: + $(MAKE) -C $(KERNELDIR) M=$(PWD) clean + +endif -- cgit v1.2.3 From c6cf726316abd613cfb7c325d950f3629f964ec6 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 6 Nov 2009 16:39:26 -0800 Subject: ceph: make CRUSH hash functions non-inline These are way to big to be inline. I missed crush/* when doing the inline audit for akpm's review. Signed-off-by: Sage Weil --- fs/ceph/Makefile | 2 +- fs/ceph/README | 1 + fs/ceph/crush/crush.c | 11 ++++++ fs/ceph/crush/crush.h | 11 +----- fs/ceph/crush/hash.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++ fs/ceph/crush/hash.h | 92 ++++----------------------------------------------- 6 files changed, 107 insertions(+), 96 deletions(-) create mode 100644 fs/ceph/crush/hash.c (limited to 'fs/ceph/Makefile') diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile index 7da6d69dba29..8bad70aac363 100644 --- a/fs/ceph/Makefile +++ b/fs/ceph/Makefile @@ -11,7 +11,7 @@ ceph-objs := super.o inode.o dir.o file.o addr.o ioctl.o \ messenger.o msgpool.o buffer.o \ mds_client.o mdsmap.o \ mon_client.o \ - osd_client.o osdmap.o crush/crush.o crush/mapper.o \ + osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \ debugfs.o \ ceph_fs.o ceph_strings.o ceph_frag.o diff --git a/fs/ceph/README b/fs/ceph/README index 231a1df5eafe..660e00074d59 100644 --- a/fs/ceph/README +++ b/fs/ceph/README @@ -15,3 +15,4 @@ src/crush/crush.h fs/ceph/crush/crush.h src/crush/mapper.c fs/ceph/crush/mapper.c src/crush/mapper.h fs/ceph/crush/mapper.h src/crush/hash.h fs/ceph/crush/hash.h +src/crush/hash.c fs/ceph/crush/hash.c diff --git a/fs/ceph/crush/crush.c b/fs/ceph/crush/crush.c index 13755cdc4fb3..fabd302e5779 100644 --- a/fs/ceph/crush/crush.c +++ b/fs/ceph/crush/crush.c @@ -10,6 +10,17 @@ #include "crush.h" +const char *crush_bucket_alg_name(int alg) +{ + switch (alg) { + case CRUSH_BUCKET_UNIFORM: return "uniform"; + case CRUSH_BUCKET_LIST: return "list"; + case CRUSH_BUCKET_TREE: return "tree"; + case CRUSH_BUCKET_STRAW: return "straw"; + default: return "unknown"; + } +} + /** * crush_get_bucket_item_weight - Get weight of an item in given bucket * @b: bucket pointer diff --git a/fs/ceph/crush/crush.h b/fs/ceph/crush/crush.h index 9ac7e091126f..92c6b3c3a571 100644 --- a/fs/ceph/crush/crush.h +++ b/fs/ceph/crush/crush.h @@ -97,16 +97,7 @@ enum { CRUSH_BUCKET_TREE = 3, CRUSH_BUCKET_STRAW = 4 }; -static inline const char *crush_bucket_alg_name(int alg) -{ - switch (alg) { - case CRUSH_BUCKET_UNIFORM: return "uniform"; - case CRUSH_BUCKET_LIST: return "list"; - case CRUSH_BUCKET_TREE: return "tree"; - case CRUSH_BUCKET_STRAW: return "straw"; - default: return "unknown"; - } -} +extern const char *crush_bucket_alg_name(int alg); struct crush_bucket { __s32 id; /* this'll be negative */ diff --git a/fs/ceph/crush/hash.c b/fs/ceph/crush/hash.c new file mode 100644 index 000000000000..b438c5d27816 --- /dev/null +++ b/fs/ceph/crush/hash.c @@ -0,0 +1,86 @@ + +#include + +/* + * Robert Jenkins' function for mixing 32-bit values + * http://burtleburtle.net/bob/hash/evahash.html + * a, b = random bits, c = input and output + */ +#define crush_hashmix(a, b, c) do { \ + a = a-b; a = a-c; a = a^(c>>13); \ + b = b-c; b = b-a; b = b^(a<<8); \ + c = c-a; c = c-b; c = c^(b>>13); \ + a = a-b; a = a-c; a = a^(c>>12); \ + b = b-c; b = b-a; b = b^(a<<16); \ + c = c-a; c = c-b; c = c^(b>>5); \ + a = a-b; a = a-c; a = a^(c>>3); \ + b = b-c; b = b-a; b = b^(a<<10); \ + c = c-a; c = c-b; c = c^(b>>15); \ + } while (0) + +#define crush_hash_seed 1315423911 + +__u32 crush_hash32(__u32 a) +{ + __u32 hash = crush_hash_seed ^ a; + __u32 b = a; + __u32 x = 231232; + __u32 y = 1232; + crush_hashmix(b, x, hash); + crush_hashmix(y, a, hash); + return hash; +} + +__u32 crush_hash32_2(__u32 a, __u32 b) +{ + __u32 hash = crush_hash_seed ^ a ^ b; + __u32 x = 231232; + __u32 y = 1232; + crush_hashmix(a, b, hash); + crush_hashmix(x, a, hash); + crush_hashmix(b, y, hash); + return hash; +} + +__u32 crush_hash32_3(__u32 a, __u32 b, __u32 c) +{ + __u32 hash = crush_hash_seed ^ a ^ b ^ c; + __u32 x = 231232; + __u32 y = 1232; + crush_hashmix(a, b, hash); + crush_hashmix(c, x, hash); + crush_hashmix(y, a, hash); + crush_hashmix(b, x, hash); + crush_hashmix(y, c, hash); + return hash; +} + +__u32 crush_hash32_4(__u32 a, __u32 b, __u32 c, __u32 d) +{ + __u32 hash = crush_hash_seed ^ a ^ b ^ c ^ d; + __u32 x = 231232; + __u32 y = 1232; + crush_hashmix(a, b, hash); + crush_hashmix(c, d, hash); + crush_hashmix(a, x, hash); + crush_hashmix(y, b, hash); + crush_hashmix(c, x, hash); + crush_hashmix(y, d, hash); + return hash; +} + +__u32 crush_hash32_5(__u32 a, __u32 b, __u32 c, __u32 d, __u32 e) +{ + __u32 hash = crush_hash_seed ^ a ^ b ^ c ^ d ^ e; + __u32 x = 231232; + __u32 y = 1232; + crush_hashmix(a, b, hash); + crush_hashmix(c, d, hash); + crush_hashmix(e, x, hash); + crush_hashmix(y, a, hash); + crush_hashmix(b, x, hash); + crush_hashmix(y, c, hash); + crush_hashmix(d, x, hash); + crush_hashmix(y, e, hash); + return hash; +} diff --git a/fs/ceph/crush/hash.h b/fs/ceph/crush/hash.h index 42f3312783a1..9ce89f85dc7d 100644 --- a/fs/ceph/crush/hash.h +++ b/fs/ceph/crush/hash.h @@ -1,90 +1,12 @@ #ifndef _CRUSH_HASH_H #define _CRUSH_HASH_H -/* - * Robert Jenkins' function for mixing 32-bit values - * http://burtleburtle.net/bob/hash/evahash.html - * a, b = random bits, c = input and output - */ -#define crush_hashmix(a, b, c) do { \ - a = a-b; a = a-c; a = a^(c>>13); \ - b = b-c; b = b-a; b = b^(a<<8); \ - c = c-a; c = c-b; c = c^(b>>13); \ - a = a-b; a = a-c; a = a^(c>>12); \ - b = b-c; b = b-a; b = b^(a<<16); \ - c = c-a; c = c-b; c = c^(b>>5); \ - a = a-b; a = a-c; a = a^(c>>3); \ - b = b-c; b = b-a; b = b^(a<<10); \ - c = c-a; c = c-b; c = c^(b>>15); \ - } while (0) - -#define crush_hash_seed 1315423911 - -static inline __u32 crush_hash32(__u32 a) -{ - __u32 hash = crush_hash_seed ^ a; - __u32 b = a; - __u32 x = 231232; - __u32 y = 1232; - crush_hashmix(b, x, hash); - crush_hashmix(y, a, hash); - return hash; -} - -static inline __u32 crush_hash32_2(__u32 a, __u32 b) -{ - __u32 hash = crush_hash_seed ^ a ^ b; - __u32 x = 231232; - __u32 y = 1232; - crush_hashmix(a, b, hash); - crush_hashmix(x, a, hash); - crush_hashmix(b, y, hash); - return hash; -} - -static inline __u32 crush_hash32_3(__u32 a, __u32 b, __u32 c) -{ - __u32 hash = crush_hash_seed ^ a ^ b ^ c; - __u32 x = 231232; - __u32 y = 1232; - crush_hashmix(a, b, hash); - crush_hashmix(c, x, hash); - crush_hashmix(y, a, hash); - crush_hashmix(b, x, hash); - crush_hashmix(y, c, hash); - return hash; -} - -static inline __u32 crush_hash32_4(__u32 a, __u32 b, __u32 c, - __u32 d) -{ - __u32 hash = crush_hash_seed ^ a ^ b ^ c ^ d; - __u32 x = 231232; - __u32 y = 1232; - crush_hashmix(a, b, hash); - crush_hashmix(c, d, hash); - crush_hashmix(a, x, hash); - crush_hashmix(y, b, hash); - crush_hashmix(c, x, hash); - crush_hashmix(y, d, hash); - return hash; -} - -static inline __u32 crush_hash32_5(__u32 a, __u32 b, __u32 c, - __u32 d, __u32 e) -{ - __u32 hash = crush_hash_seed ^ a ^ b ^ c ^ d ^ e; - __u32 x = 231232; - __u32 y = 1232; - crush_hashmix(a, b, hash); - crush_hashmix(c, d, hash); - crush_hashmix(e, x, hash); - crush_hashmix(y, a, hash); - crush_hashmix(b, x, hash); - crush_hashmix(y, c, hash); - crush_hashmix(d, x, hash); - crush_hashmix(y, e, hash); - return hash; -} +extern __u32 crush_hash32(__u32 a); +extern __u32 crush_hash32_2(__u32 a, __u32 b); +extern __u32 crush_hash32_3(__u32 a, __u32 b, __u32 c); +extern __u32 crush_hash32_4(__u32 a, __u32 b, __u32 c, + __u32 d); +extern __u32 crush_hash32_5(__u32 a, __u32 b, __u32 c, + __u32 d, __u32 e); #endif -- cgit v1.2.3 From 1654dd0cf5ee1827322aca156af7d96d757201c7 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 6 Nov 2009 21:55:25 -0800 Subject: ceph: make object hash a pg_pool property The object will be hashed to a placement seed (ps) based on the pg_pool's hash function. This allows new hashes to be introduced into an existing object store, or selection of a hash appropriate to the objects that will be stored in a particular pool. Signed-off-by: Sage Weil --- fs/ceph/Makefile | 2 +- fs/ceph/README | 2 + fs/ceph/ceph_fs.c | 77 ---------------------------------- fs/ceph/ceph_fs.h | 2 - fs/ceph/ceph_hash.c | 118 ++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/ceph/ceph_hash.h | 13 ++++++ fs/ceph/osdmap.c | 2 +- fs/ceph/rados.h | 1 + fs/ceph/types.h | 1 + 9 files changed, 137 insertions(+), 81 deletions(-) create mode 100644 fs/ceph/ceph_hash.c create mode 100644 fs/ceph/ceph_hash.h (limited to 'fs/ceph/Makefile') diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile index 8bad70aac363..bdd3e6fc1609 100644 --- a/fs/ceph/Makefile +++ b/fs/ceph/Makefile @@ -13,7 +13,7 @@ ceph-objs := super.o inode.o dir.o file.o addr.o ioctl.o \ mon_client.o \ osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \ debugfs.o \ - ceph_fs.o ceph_strings.o ceph_frag.o + ceph_fs.o ceph_strings.o ceph_hash.o ceph_frag.o else #Otherwise we were called directly from the command diff --git a/fs/ceph/README b/fs/ceph/README index 660e00074d59..18352fab37c0 100644 --- a/fs/ceph/README +++ b/fs/ceph/README @@ -10,6 +10,8 @@ src/include/rados.h fs/ceph/rados.h src/include/ceph_strings.cc fs/ceph/ceph_strings.c src/include/ceph_frag.h fs/ceph/ceph_frag.h src/include/ceph_frag.cc fs/ceph/ceph_frag.c +src/include/ceph_hash.h fs/ceph/ceph_hash.h +src/include/ceph_hash.cc fs/ceph/ceph_hash.c src/crush/crush.c fs/ceph/crush/crush.c src/crush/crush.h fs/ceph/crush/crush.h src/crush/mapper.c fs/ceph/crush/mapper.c diff --git a/fs/ceph/ceph_fs.c b/fs/ceph/ceph_fs.c index b3ecf1b07521..79d76bc4303f 100644 --- a/fs/ceph/ceph_fs.c +++ b/fs/ceph/ceph_fs.c @@ -72,80 +72,3 @@ int ceph_caps_for_mode(int mode) } return 0; } - -/* - * Robert Jenkin's hash function. - * http://burtleburtle.net/bob/hash/evahash.html - * This is in the public domain. - */ -#define mix(a, b, c) \ - do { \ - a = a - b; a = a - c; a = a ^ (c >> 13); \ - b = b - c; b = b - a; b = b ^ (a << 8); \ - c = c - a; c = c - b; c = c ^ (b >> 13); \ - a = a - b; a = a - c; a = a ^ (c >> 12); \ - b = b - c; b = b - a; b = b ^ (a << 16); \ - c = c - a; c = c - b; c = c ^ (b >> 5); \ - a = a - b; a = a - c; a = a ^ (c >> 3); \ - b = b - c; b = b - a; b = b ^ (a << 10); \ - c = c - a; c = c - b; c = c ^ (b >> 15); \ - } while (0) - -unsigned int ceph_full_name_hash(const char *str, unsigned int length) -{ - const unsigned char *k = (const unsigned char *)str; - __u32 a, b, c; /* the internal state */ - __u32 len; /* how many key bytes still need mixing */ - - /* Set up the internal state */ - len = length; - a = 0x9e3779b9; /* the golden ratio; an arbitrary value */ - b = a; - c = 0; /* variable initialization of internal state */ - - /* handle most of the key */ - while (len >= 12) { - a = a + (k[0] + ((__u32)k[1] << 8) + ((__u32)k[2] << 16) + - ((__u32)k[3] << 24)); - b = b + (k[4] + ((__u32)k[5] << 8) + ((__u32)k[6] << 16) + - ((__u32)k[7] << 24)); - c = c + (k[8] + ((__u32)k[9] << 8) + ((__u32)k[10] << 16) + - ((__u32)k[11] << 24)); - mix(a, b, c); - k = k + 12; - len = len - 12; - } - - /* handle the last 11 bytes */ - c = c + length; - switch (len) { /* all the case statements fall through */ - case 11: - c = c + ((__u32)k[10] << 24); - case 10: - c = c + ((__u32)k[9] << 16); - case 9: - c = c + ((__u32)k[8] << 8); - /* the first byte of c is reserved for the length */ - case 8: - b = b + ((__u32)k[7] << 24); - case 7: - b = b + ((__u32)k[6] << 16); - case 6: - b = b + ((__u32)k[5] << 8); - case 5: - b = b + k[4]; - case 4: - a = a + ((__u32)k[3] << 24); - case 3: - a = a + ((__u32)k[2] << 16); - case 2: - a = a + ((__u32)k[1] << 8); - case 1: - a = a + k[0]; - /* case 0: nothing left to add */ - } - mix(a, b, c); - - return c; -} - diff --git a/fs/ceph/ceph_fs.h b/fs/ceph/ceph_fs.h index 25fc537f4140..36becb024788 100644 --- a/fs/ceph/ceph_fs.h +++ b/fs/ceph/ceph_fs.h @@ -49,8 +49,6 @@ #define CEPH_MAX_MON 31 -unsigned int ceph_full_name_hash(const char *name, unsigned int len); - /* * ceph_file_layout - describe data layout for a file/inode diff --git a/fs/ceph/ceph_hash.c b/fs/ceph/ceph_hash.c new file mode 100644 index 000000000000..ac8be54631fe --- /dev/null +++ b/fs/ceph/ceph_hash.c @@ -0,0 +1,118 @@ + +#include "types.h" + +/* + * Robert Jenkin's hash function. + * http://burtleburtle.net/bob/hash/evahash.html + * This is in the public domain. + */ +#define mix(a, b, c) \ + do { \ + a = a - b; a = a - c; a = a ^ (c >> 13); \ + b = b - c; b = b - a; b = b ^ (a << 8); \ + c = c - a; c = c - b; c = c ^ (b >> 13); \ + a = a - b; a = a - c; a = a ^ (c >> 12); \ + b = b - c; b = b - a; b = b ^ (a << 16); \ + c = c - a; c = c - b; c = c ^ (b >> 5); \ + a = a - b; a = a - c; a = a ^ (c >> 3); \ + b = b - c; b = b - a; b = b ^ (a << 10); \ + c = c - a; c = c - b; c = c ^ (b >> 15); \ + } while (0) + +unsigned ceph_str_hash_rjenkins(const char *str, unsigned length) +{ + const unsigned char *k = (const unsigned char *)str; + __u32 a, b, c; /* the internal state */ + __u32 len; /* how many key bytes still need mixing */ + + /* Set up the internal state */ + len = length; + a = 0x9e3779b9; /* the golden ratio; an arbitrary value */ + b = a; + c = 0; /* variable initialization of internal state */ + + /* handle most of the key */ + while (len >= 12) { + a = a + (k[0] + ((__u32)k[1] << 8) + ((__u32)k[2] << 16) + + ((__u32)k[3] << 24)); + b = b + (k[4] + ((__u32)k[5] << 8) + ((__u32)k[6] << 16) + + ((__u32)k[7] << 24)); + c = c + (k[8] + ((__u32)k[9] << 8) + ((__u32)k[10] << 16) + + ((__u32)k[11] << 24)); + mix(a, b, c); + k = k + 12; + len = len - 12; + } + + /* handle the last 11 bytes */ + c = c + length; + switch (len) { /* all the case statements fall through */ + case 11: + c = c + ((__u32)k[10] << 24); + case 10: + c = c + ((__u32)k[9] << 16); + case 9: + c = c + ((__u32)k[8] << 8); + /* the first byte of c is reserved for the length */ + case 8: + b = b + ((__u32)k[7] << 24); + case 7: + b = b + ((__u32)k[6] << 16); + case 6: + b = b + ((__u32)k[5] << 8); + case 5: + b = b + k[4]; + case 4: + a = a + ((__u32)k[3] << 24); + case 3: + a = a + ((__u32)k[2] << 16); + case 2: + a = a + ((__u32)k[1] << 8); + case 1: + a = a + k[0]; + /* case 0: nothing left to add */ + } + mix(a, b, c); + + return c; +} + +/* + * linux dcache hash + */ +unsigned ceph_str_hash_linux(const char *str, unsigned length) +{ + unsigned long hash = 0; + unsigned char c; + + while (length-- > 0) { + c = *str++; + hash = (hash + (c << 4) + (c >> 4)) * 11; + } + return hash; +} + + +unsigned ceph_str_hash(int type, const char *s, unsigned len) +{ + switch (type) { + case CEPH_STR_HASH_LINUX: + return ceph_str_hash_linux(s, len); + case CEPH_STR_HASH_RJENKINS: + return ceph_str_hash_rjenkins(s, len); + default: + return -1; + } +} + +const char *ceph_str_hash_name(int type) +{ + switch (type) { + case CEPH_STR_HASH_LINUX: + return "linux"; + case CEPH_STR_HASH_RJENKINS: + return "rjenkins"; + default: + return "unknown"; + } +} diff --git a/fs/ceph/ceph_hash.h b/fs/ceph/ceph_hash.h new file mode 100644 index 000000000000..5ac470c433c9 --- /dev/null +++ b/fs/ceph/ceph_hash.h @@ -0,0 +1,13 @@ +#ifndef _FS_CEPH_HASH_H +#define _FS_CEPH_HASH_H + +#define CEPH_STR_HASH_LINUX 0x1 /* linux dcache hash */ +#define CEPH_STR_HASH_RJENKINS 0x2 /* robert jenkins' */ + +extern unsigned ceph_str_hash_linux(const char *s, unsigned len); +extern unsigned ceph_str_hash_rjenkins(const char *s, unsigned len); + +extern unsigned ceph_str_hash(int type, const char *s, unsigned len); +extern const char *ceph_str_hash_name(int type); + +#endif diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c index a025555b70af..68478270ad70 100644 --- a/fs/ceph/osdmap.c +++ b/fs/ceph/osdmap.c @@ -809,7 +809,7 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol, return -EIO; pool = &osdmap->pg_pool[poolid]; - ps = ceph_full_name_hash(oid, strlen(oid)); + ps = ceph_str_hash(pool->v.object_hash, oid, strlen(oid)); if (preferred >= 0) { ps += preferred; num = le32_to_cpu(pool->v.lpg_num); diff --git a/fs/ceph/rados.h b/fs/ceph/rados.h index 85bdef78d142..fb23ff9297c9 100644 --- a/fs/ceph/rados.h +++ b/fs/ceph/rados.h @@ -84,6 +84,7 @@ struct ceph_pg_pool { __u8 type; /* CEPH_PG_TYPE_* */ __u8 size; /* number of osds in each pg */ __u8 crush_ruleset; /* crush placement rule */ + __u8 object_hash; /* hash mapping object name to ps */ __le32 pg_num, pgp_num; /* number of pg's */ __le32 lpg_num, lpgp_num; /* number of localized pg's */ __le32 last_change; /* most recent epoch changed */ diff --git a/fs/ceph/types.h b/fs/ceph/types.h index 8a514568cab2..28b35a005ec2 100644 --- a/fs/ceph/types.h +++ b/fs/ceph/types.h @@ -9,6 +9,7 @@ #include "ceph_fs.h" #include "ceph_frag.h" +#include "ceph_hash.h" /* * Identify inodes by both their ino AND snapshot id (a u64). -- cgit v1.2.3 From 4e7a5dcd1bbab6560fbc8ada29a840e7a20ed7bc Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 18 Nov 2009 16:19:57 -0800 Subject: ceph: negotiate authentication protocol; implement AUTH_NONE protocol When we open a monitor session, we send an initial AUTH message listing the auth protocols we support, our entity name, and (possibly) a previously assigned global_id. The monitor chooses a protocol and responds with an initial message. Initially implement AUTH_NONE, a dummy protocol that provides no security, but works within the new framework. It generates 'authorizers' that are used when connecting to (mds, osd) services that simply state our entity name and global_id. This is a wire protocol change. Signed-off-by: Sage Weil --- fs/ceph/Makefile | 1 + fs/ceph/auth.c | 220 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/ceph/auth.h | 77 +++++++++++++++++ fs/ceph/auth_none.c | 120 +++++++++++++++++++++++++++ fs/ceph/auth_none.h | 28 +++++++ fs/ceph/ceph_fs.h | 14 +++- fs/ceph/ceph_strings.c | 13 +++ fs/ceph/decode.h | 6 ++ fs/ceph/mds_client.c | 69 +++++++++++++++- fs/ceph/mds_client.h | 4 + fs/ceph/messenger.c | 54 +++++++++++- fs/ceph/messenger.h | 10 +++ fs/ceph/mon_client.c | 210 +++++++++++++++++++++++++++++++++++++--------- fs/ceph/mon_client.h | 14 ++-- fs/ceph/msgr.h | 21 +++-- fs/ceph/osd_client.c | 63 +++++++++++++- fs/ceph/osd_client.h | 4 + fs/ceph/rados.h | 2 +- fs/ceph/super.c | 32 ++++--- fs/ceph/super.h | 2 + 20 files changed, 888 insertions(+), 76 deletions(-) create mode 100644 fs/ceph/auth.c create mode 100644 fs/ceph/auth.h create mode 100644 fs/ceph/auth_none.c create mode 100644 fs/ceph/auth_none.h (limited to 'fs/ceph/Makefile') diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile index bdd3e6fc1609..827629c85768 100644 --- a/fs/ceph/Makefile +++ b/fs/ceph/Makefile @@ -13,6 +13,7 @@ ceph-objs := super.o inode.o dir.o file.o addr.o ioctl.o \ mon_client.o \ osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \ debugfs.o \ + auth.o auth_none.o \ ceph_fs.o ceph_strings.o ceph_hash.o ceph_frag.o else diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c new file mode 100644 index 000000000000..c4d1eee827a3 --- /dev/null +++ b/fs/ceph/auth.c @@ -0,0 +1,220 @@ +#include "ceph_debug.h" + +#include +#include + +#include "types.h" +#include "auth_none.h" +#include "decode.h" +#include "super.h" + +#include "messenger.h" + +/* + * get protocol handler + */ +static u32 supported_protocols[] = { + CEPH_AUTH_NONE +}; + +int ceph_auth_init_protocol(struct ceph_auth_client *ac, int protocol) +{ + switch (protocol) { + case CEPH_AUTH_NONE: + return ceph_auth_none_init(ac); + default: + return -ENOENT; + } +} + +/* + * setup, teardown. + */ +struct ceph_auth_client *ceph_auth_init(const char *name, const char *secret) +{ + struct ceph_auth_client *ac; + int ret; + + dout("auth_init name '%s' secret '%s'\n", name, secret); + + ret = -ENOMEM; + ac = kzalloc(sizeof(*ac), GFP_NOFS); + if (!ac) + goto out; + + ac->negotiating = true; + if (name) + ac->name = name; + else + ac->name = CEPH_AUTH_NAME_DEFAULT; + dout("auth_init name %s secret %s\n", ac->name, secret); + ac->secret = secret; + return ac; + +out: + return ERR_PTR(ret); +} + +void ceph_auth_destroy(struct ceph_auth_client *ac) +{ + dout("auth_destroy %p\n", ac); + if (ac->ops) + ac->ops->destroy(ac); + kfree(ac); +} + +/* + * Reset occurs when reconnecting to the monitor. + */ +void ceph_auth_reset(struct ceph_auth_client *ac) +{ + dout("auth_reset %p\n", ac); + if (ac->ops && !ac->negotiating) + ac->ops->reset(ac); + ac->negotiating = true; +} + +int ceph_entity_name_encode(const char *name, void **p, void *end) +{ + int len = strlen(name); + + if (*p + 2*sizeof(u32) + len > end) + return -ERANGE; + ceph_encode_32(p, CEPH_ENTITY_TYPE_CLIENT); + ceph_encode_32(p, len); + ceph_encode_copy(p, name, len); + return 0; +} + +/* + * Initiate protocol negotiation with monitor. Include entity name + * and list supported protocols. + */ +int ceph_auth_build_hello(struct ceph_auth_client *ac, void *buf, size_t len) +{ + struct ceph_mon_request_header *monhdr = buf; + void *p = monhdr + 1, *end = buf + len, *lenp; + int i, num; + int ret; + + dout("auth_build_hello\n"); + monhdr->have_version = 0; + monhdr->session_mon = cpu_to_le16(-1); + monhdr->session_mon_tid = 0; + + ceph_encode_32(&p, 0); /* no protocol, yet */ + + lenp = p; + p += sizeof(u32); + + num = ARRAY_SIZE(supported_protocols); + ceph_encode_32(&p, num); + for (i = 0; i < num; i++) + ceph_encode_32(&p, supported_protocols[i]); + + ret = ceph_entity_name_encode(ac->name, &p, end); + if (ret < 0) + return ret; + ceph_decode_need(&p, end, sizeof(u64), bad); + ceph_encode_64(&p, ac->global_id); + + ceph_encode_32(&lenp, p - lenp - sizeof(u32)); + return p - buf; + +bad: + return -ERANGE; +} + +/* + * Handle auth message from monitor. + */ +int ceph_handle_auth_reply(struct ceph_auth_client *ac, + void *buf, size_t len, + void *reply_buf, size_t reply_len) +{ + void *p = buf; + void *end = buf + len; + int protocol; + s32 result; + u64 global_id; + void *payload, *payload_end; + int payload_len; + char *result_msg; + int result_msg_len; + int ret = -EINVAL; + + dout("handle_auth_reply %p %p\n", p, end); + ceph_decode_need(&p, end, sizeof(u32) * 3 + sizeof(u64), bad); + protocol = ceph_decode_32(&p); + result = ceph_decode_32(&p); + global_id = ceph_decode_64(&p); + payload_len = ceph_decode_32(&p); + payload = p; + p += payload_len; + ceph_decode_need(&p, end, sizeof(u32), bad); + result_msg_len = ceph_decode_32(&p); + result_msg = p; + p += result_msg_len; + if (p != end) + goto bad; + + dout(" result %d '%.*s' gid %llu len %d\n", result, result_msg_len, + result_msg, global_id, payload_len); + + payload_end = payload + payload_len; + + if (global_id && ac->global_id != global_id) { + dout(" set global_id %lld -> %lld\n", ac->global_id, global_id); + ac->global_id = global_id; + } + + if (ac->negotiating) { + /* set up (new) protocol handler? */ + if (ac->protocol && ac->protocol != protocol) { + ac->ops->destroy(ac); + ac->protocol = 0; + ac->ops = NULL; + } + if (ac->protocol != protocol) { + ret = ceph_auth_init_protocol(ac, protocol); + if (ret) { + pr_err("error %d on auth protocol %d init\n", + ret, protocol); + goto out; + } + } + } + + ret = ac->ops->handle_reply(ac, result, payload, payload_end); + if (ret == -EAGAIN) { + struct ceph_mon_request_header *monhdr = reply_buf; + void *p = reply_buf + 1; + void *end = reply_buf + reply_len; + + monhdr->have_version = 0; + monhdr->session_mon = cpu_to_le16(-1); + monhdr->session_mon_tid = 0; + + ceph_encode_32(&p, ac->protocol); + + ret = ac->ops->build_request(ac, p + sizeof(u32), end); + if (ret < 0) { + pr_err("error %d building request\n", ret); + goto out; + } + dout(" built request %d bytes\n", ret); + ceph_encode_32(&p, ret); + return p + ret - reply_buf; + } else if (ret) { + pr_err("authentication error %d\n", ret); + return ret; + } + return 0; + +bad: + pr_err("failed to decode auth msg\n"); +out: + return ret; +} + + diff --git a/fs/ceph/auth.h b/fs/ceph/auth.h new file mode 100644 index 000000000000..4d8cdf6bb3b6 --- /dev/null +++ b/fs/ceph/auth.h @@ -0,0 +1,77 @@ +#ifndef _FS_CEPH_AUTH_H +#define _FS_CEPH_AUTH_H + +#include "types.h" +#include "buffer.h" + +/* + * Abstract interface for communicating with the authenticate module. + * There is some handshake that takes place between us and the monitor + * to acquire the necessary keys. These are used to generate an + * 'authorizer' that we use when connecting to a service (mds, osd). + */ + +struct ceph_auth_client; +struct ceph_authorizer; + +struct ceph_auth_client_ops { + /* + * true if we are authenticated and can connect to + * services. + */ + int (*is_authenticated)(struct ceph_auth_client *ac); + + /* + * build requests and process replies during monitor + * handshake. if handle_reply returns -EAGAIN, we build + * another request. + */ + int (*build_request)(struct ceph_auth_client *ac, void *buf, void *end); + int (*handle_reply)(struct ceph_auth_client *ac, int result, + void *buf, void *end); + + /* + * Create authorizer for connecting to a service, and verify + * the response to authenticate the service. + */ + int (*create_authorizer)(struct ceph_auth_client *ac, int peer_type, + struct ceph_authorizer **a, + void **buf, size_t *len, + void **reply_buf, size_t *reply_len); + int (*verify_authorizer_reply)(struct ceph_auth_client *ac, + struct ceph_authorizer *a, size_t len); + void (*destroy_authorizer)(struct ceph_auth_client *ac, + struct ceph_authorizer *a); + + /* reset when we (re)connect to a monitor */ + void (*reset)(struct ceph_auth_client *ac); + + void (*destroy)(struct ceph_auth_client *ac); +}; + +struct ceph_auth_client { + u32 protocol; /* CEPH_AUTH_* */ + void *private; /* for use by protocol implementation */ + const struct ceph_auth_client_ops *ops; /* null iff protocol==0 */ + + bool negotiating; /* true if negotiating protocol */ + const char *name; /* entity name */ + u64 global_id; /* our unique id in system */ + const char *secret; /* our secret key */ + unsigned want_keys; /* which services we want */ +}; + +extern struct ceph_auth_client *ceph_auth_init(const char *name, + const char *secret); +extern void ceph_auth_destroy(struct ceph_auth_client *ac); + +extern void ceph_auth_reset(struct ceph_auth_client *ac); + +extern int ceph_auth_build_hello(struct ceph_auth_client *ac, + void *buf, size_t len); +extern int ceph_handle_auth_reply(struct ceph_auth_client *ac, + void *buf, size_t len, + void *reply_buf, size_t reply_len); +extern int ceph_entity_name_encode(const char *name, void **p, void *end); + +#endif diff --git a/fs/ceph/auth_none.c b/fs/ceph/auth_none.c new file mode 100644 index 000000000000..631017eb7117 --- /dev/null +++ b/fs/ceph/auth_none.c @@ -0,0 +1,120 @@ + +#include "ceph_debug.h" + +#include +#include +#include + +#include "auth_none.h" +#include "auth.h" +#include "decode.h" + +static void reset(struct ceph_auth_client *ac) +{ + struct ceph_auth_none_info *xi = ac->private; + + xi->starting = true; + xi->built_authorizer = false; +} + +static void destroy(struct ceph_auth_client *ac) +{ + kfree(ac->private); + ac->private = NULL; +} + +static int is_authenticated(struct ceph_auth_client *ac) +{ + struct ceph_auth_none_info *xi = ac->private; + + return !xi->starting; +} + +/* + * the generic auth code decode the global_id, and we carry no actual + * authenticate state, so nothing happens here. + */ +static int handle_reply(struct ceph_auth_client *ac, int result, + void *buf, void *end) +{ + struct ceph_auth_none_info *xi = ac->private; + + xi->starting = false; + return result; +} + +/* + * build an 'authorizer' with our entity_name and global_id. we can + * reuse a single static copy since it is identical for all services + * we connect to. + */ +static int ceph_auth_none_create_authorizer( + struct ceph_auth_client *ac, int peer_type, + struct ceph_authorizer **a, + void **buf, size_t *len, + void **reply_buf, size_t *reply_len) +{ + struct ceph_auth_none_info *ai = ac->private; + struct ceph_none_authorizer *au = &ai->au; + void *p, *end; + int ret; + + if (!ai->built_authorizer) { + p = au->buf; + end = p + sizeof(au->buf); + ret = ceph_entity_name_encode(ac->name, &p, end - 8); + if (ret < 0) + goto bad; + ceph_decode_need(&p, end, sizeof(u64), bad2); + ceph_encode_64(&p, ac->global_id); + au->buf_len = p - (void *)au->buf; + ai->built_authorizer = true; + dout("built authorizer len %d\n", au->buf_len); + } + + *a = (struct ceph_authorizer *)au; + *buf = au->buf; + *len = au->buf_len; + *reply_buf = au->reply_buf; + *reply_len = sizeof(au->reply_buf); + return 0; + +bad2: + ret = -ERANGE; +bad: + return ret; +} + +static void ceph_auth_none_destroy_authorizer(struct ceph_auth_client *ac, + struct ceph_authorizer *a) +{ + /* nothing to do */ +} + +static const struct ceph_auth_client_ops ceph_auth_none_ops = { + .reset = reset, + .destroy = destroy, + .is_authenticated = is_authenticated, + .handle_reply = handle_reply, + .create_authorizer = ceph_auth_none_create_authorizer, + .destroy_authorizer = ceph_auth_none_destroy_authorizer, +}; + +int ceph_auth_none_init(struct ceph_auth_client *ac) +{ + struct ceph_auth_none_info *xi; + + dout("ceph_auth_none_init %p\n", ac); + xi = kzalloc(sizeof(*xi), GFP_NOFS); + if (!xi) + return -ENOMEM; + + xi->starting = true; + xi->built_authorizer = false; + + ac->protocol = CEPH_AUTH_NONE; + ac->private = xi; + ac->ops = &ceph_auth_none_ops; + return 0; +} + diff --git a/fs/ceph/auth_none.h b/fs/ceph/auth_none.h new file mode 100644 index 000000000000..56c05533a31c --- /dev/null +++ b/fs/ceph/auth_none.h @@ -0,0 +1,28 @@ +#ifndef _FS_CEPH_AUTH_NONE_H +#define _FS_CEPH_AUTH_NONE_H + +#include "auth.h" + +/* + * null security mode. + * + * we use a single static authorizer that simply encodes our entity name + * and global id. + */ + +struct ceph_none_authorizer { + char buf[128]; + int buf_len; + char reply_buf[0]; +}; + +struct ceph_auth_none_info { + bool starting; + bool built_authorizer; + struct ceph_none_authorizer au; /* we only need one; it's static */ +}; + +extern int ceph_auth_none_init(struct ceph_auth_client *ac); + +#endif + diff --git a/fs/ceph/ceph_fs.h b/fs/ceph/ceph_fs.h index 36becb024788..1e96a9a87d8d 100644 --- a/fs/ceph/ceph_fs.h +++ b/fs/ceph/ceph_fs.h @@ -75,6 +75,16 @@ struct ceph_file_layout { int ceph_file_layout_is_valid(const struct ceph_file_layout *layout); +/* crypto algorithms */ +#define CEPH_CRYPTO_NONE 0x0 +#define CEPH_CRYPTO_AES 0x1 + +/* security/authentication protocols */ +#define CEPH_AUTH_UNKNOWN 0x0 +#define CEPH_AUTH_NONE 0x1 +#define CEPH_AUTH_CEPHX 0x2 + + /********************************************* * message layer */ @@ -90,12 +100,12 @@ int ceph_file_layout_is_valid(const struct ceph_file_layout *layout); /* client <-> monitor */ #define CEPH_MSG_MON_MAP 4 #define CEPH_MSG_MON_GET_MAP 5 -#define CEPH_MSG_CLIENT_MOUNT 10 -#define CEPH_MSG_CLIENT_MOUNT_ACK 11 #define CEPH_MSG_STATFS 13 #define CEPH_MSG_STATFS_REPLY 14 #define CEPH_MSG_MON_SUBSCRIBE 15 #define CEPH_MSG_MON_SUBSCRIBE_ACK 16 +#define CEPH_MSG_AUTH 17 +#define CEPH_MSG_AUTH_REPLY 18 /* client <-> mds */ #define CEPH_MSG_MDS_MAP 21 diff --git a/fs/ceph/ceph_strings.c b/fs/ceph/ceph_strings.c index 90d19d9d8d8f..8e4be6a80c62 100644 --- a/fs/ceph/ceph_strings.c +++ b/fs/ceph/ceph_strings.c @@ -3,6 +3,19 @@ */ #include "types.h" +const char *ceph_entity_type_name(int type) +{ + switch (type) { + case CEPH_ENTITY_TYPE_MDS: return "mds"; + case CEPH_ENTITY_TYPE_OSD: return "osd"; + case CEPH_ENTITY_TYPE_MON: return "mon"; + case CEPH_ENTITY_TYPE_CLIENT: return "client"; + case CEPH_ENTITY_TYPE_ADMIN: return "admin"; + case CEPH_ENTITY_TYPE_AUTH: return "auth"; + default: return "unknown"; + } +} + const char *ceph_osd_op_name(int op) { switch (op) { diff --git a/fs/ceph/decode.h b/fs/ceph/decode.h index a382aecc55bb..10de84896244 100644 --- a/fs/ceph/decode.h +++ b/fs/ceph/decode.h @@ -98,6 +98,7 @@ static inline void ceph_encode_addr(struct ceph_entity_addr *a) static inline void ceph_decode_addr(struct ceph_entity_addr *a) { a->in_addr.ss_family = ntohs(a->in_addr.ss_family); + WARN_ON(a->in_addr.ss_family == 512); } /* @@ -123,6 +124,11 @@ static inline void ceph_encode_8(void **p, u8 v) *(u8 *)*p = v; (*p)++; } +static inline void ceph_encode_copy(void **p, const void *s, int len) +{ + memcpy(*p, s, len); + *p += len; +} /* * filepath, string encoders diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 69feeb1c9819..8a285158aecc 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -8,6 +8,7 @@ #include "super.h" #include "messenger.h" #include "decode.h" +#include "auth.h" /* * A cluster of MDS (metadata server) daemons is responsible for @@ -274,8 +275,12 @@ void ceph_put_mds_session(struct ceph_mds_session *s) { dout("mdsc put_session %p %d -> %d\n", s, atomic_read(&s->s_ref), atomic_read(&s->s_ref)-1); - if (atomic_dec_and_test(&s->s_ref)) + if (atomic_dec_and_test(&s->s_ref)) { + if (s->s_authorizer) + s->s_mdsc->client->monc.auth->ops->destroy_authorizer( + s->s_mdsc->client->monc.auth, s->s_authorizer); kfree(s); + } } /* @@ -2777,9 +2782,15 @@ void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, struct ceph_msg *msg) ceph_decode_need(&p, end, sizeof(fsid)+2*sizeof(u32), bad); ceph_decode_copy(&p, &fsid, sizeof(fsid)); - if (ceph_fsid_compare(&fsid, &mdsc->client->monc.monmap->fsid)) { - pr_err("got mdsmap with wrong fsid\n"); - return; + if (mdsc->client->monc.have_fsid) { + if (ceph_fsid_compare(&fsid, + &mdsc->client->monc.monmap->fsid)) { + pr_err("got mdsmap with wrong fsid\n"); + return; + } + } else { + ceph_fsid_set(&mdsc->client->monc.monmap->fsid, &fsid); + mdsc->client->monc.have_fsid = true; } epoch = ceph_decode_32(&p); maplen = ceph_decode_32(&p); @@ -2895,10 +2906,60 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) ceph_msg_put(msg); } +/* + * authentication + */ +static int get_authorizer(struct ceph_connection *con, + void **buf, int *len, int *proto, + void **reply_buf, int *reply_len, int force_new) +{ + struct ceph_mds_session *s = con->private; + struct ceph_mds_client *mdsc = s->s_mdsc; + struct ceph_auth_client *ac = mdsc->client->monc.auth; + int ret = 0; + + if (force_new && s->s_authorizer) { + ac->ops->destroy_authorizer(ac, s->s_authorizer); + s->s_authorizer = NULL; + } + if (s->s_authorizer == NULL) { + if (ac->ops->create_authorizer) { + ret = ac->ops->create_authorizer( + ac, CEPH_ENTITY_TYPE_MDS, + &s->s_authorizer, + &s->s_authorizer_buf, + &s->s_authorizer_buf_len, + &s->s_authorizer_reply_buf, + &s->s_authorizer_reply_buf_len); + if (ret) + return ret; + } + } + + *proto = ac->protocol; + *buf = s->s_authorizer_buf; + *len = s->s_authorizer_buf_len; + *reply_buf = s->s_authorizer_reply_buf; + *reply_len = s->s_authorizer_reply_buf_len; + return 0; +} + + +static int verify_authorizer_reply(struct ceph_connection *con, int len) +{ + struct ceph_mds_session *s = con->private; + struct ceph_mds_client *mdsc = s->s_mdsc; + struct ceph_auth_client *ac = mdsc->client->monc.auth; + + return ac->ops->verify_authorizer_reply(ac, s->s_authorizer, len); +} + const static struct ceph_connection_operations mds_con_ops = { .get = con_get, .put = con_put, .dispatch = dispatch, + .get_authorizer = get_authorizer, + .verify_authorizer_reply = verify_authorizer_reply, .peer_reset = peer_reset, .alloc_msg = ceph_alloc_msg, .alloc_middle = ceph_alloc_middle, diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 7c439488cfab..9faa1b2f79a7 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -100,6 +100,10 @@ struct ceph_mds_session { struct ceph_connection s_con; + struct ceph_authorizer *s_authorizer; + void *s_authorizer_buf, *s_authorizer_reply_buf; + size_t s_authorizer_buf_len, s_authorizer_reply_buf_len; + /* protected by s_cap_lock */ spinlock_t s_cap_lock; u32 s_cap_gen; /* inc each time we get mds stale msg */ diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index d8a6a56a1571..0b1674854b25 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c @@ -550,6 +550,27 @@ static void prepare_write_keepalive(struct ceph_connection *con) * Connection negotiation. */ +static void prepare_connect_authorizer(struct ceph_connection *con) +{ + void *auth_buf; + int auth_len = 0; + int auth_protocol = 0; + + if (con->ops->get_authorizer) + con->ops->get_authorizer(con, &auth_buf, &auth_len, + &auth_protocol, &con->auth_reply_buf, + &con->auth_reply_buf_len, + con->auth_retry); + + con->out_connect.authorizer_protocol = cpu_to_le32(auth_protocol); + con->out_connect.authorizer_len = cpu_to_le32(auth_len); + + con->out_kvec[con->out_kvec_left].iov_base = auth_buf; + con->out_kvec[con->out_kvec_left].iov_len = auth_len; + con->out_kvec_left++; + con->out_kvec_bytes += auth_len; +} + /* * We connected to a peer and are saying hello. */ @@ -592,6 +613,7 @@ static void prepare_write_connect(struct ceph_messenger *msgr, dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, con->connect_seq, global_seq, proto); + con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); con->out_connect.connect_seq = cpu_to_le32(con->connect_seq); con->out_connect.global_seq = cpu_to_le32(global_seq); @@ -611,6 +633,8 @@ static void prepare_write_connect(struct ceph_messenger *msgr, con->out_kvec_cur = con->out_kvec; con->out_more = 0; set_bit(WRITE_PENDING, &con->state); + + prepare_connect_authorizer(con); } @@ -777,6 +801,13 @@ static void prepare_read_connect(struct ceph_connection *con) con->in_base_pos = 0; } +static void prepare_read_connect_retry(struct ceph_connection *con) +{ + dout("prepare_read_connect_retry %p\n", con); + con->in_base_pos = strlen(CEPH_BANNER) + sizeof(con->actual_peer_addr) + + sizeof(con->peer_addr_for_me); +} + static void prepare_read_ack(struct ceph_connection *con) { dout("prepare_read_ack %p\n", con); @@ -853,9 +884,14 @@ static int read_partial_connect(struct ceph_connection *con) ret = read_partial(con, &to, sizeof(con->in_reply), &con->in_reply); if (ret <= 0) goto out; + ret = read_partial(con, &to, le32_to_cpu(con->in_reply.authorizer_len), + con->auth_reply_buf); + if (ret <= 0) + goto out; - dout("read_partial_connect %p connect_seq = %u, global_seq = %u\n", - con, le32_to_cpu(con->in_reply.connect_seq), + dout("read_partial_connect %p tag %d, con_seq = %u, g_seq = %u\n", + con, (int)con->in_reply.tag, + le32_to_cpu(con->in_reply.connect_seq), le32_to_cpu(con->in_reply.global_seq)); out: return ret; @@ -1051,6 +1087,20 @@ static int process_connect(struct ceph_connection *con) set_bit(CLOSED, &con->state); /* in case there's queued work */ return -1; + case CEPH_MSGR_TAG_BADAUTHORIZER: + con->auth_retry++; + dout("process_connect %p got BADAUTHORIZER attempt %d\n", con, + con->auth_retry); + if (con->auth_retry == 2) { + con->error_msg = "connect authorization failure"; + reset_connection(con); + set_bit(CLOSED, &con->state); + return -1; + } + con->auth_retry = 1; + prepare_write_connect(con->msgr, con, 0); + prepare_read_connect_retry(con); + break; case CEPH_MSGR_TAG_RESETSESSION: /* diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h index 4bd85c36308e..f9c9f6487302 100644 --- a/fs/ceph/messenger.h +++ b/fs/ceph/messenger.h @@ -26,6 +26,12 @@ struct ceph_connection_operations { /* handle an incoming message. */ void (*dispatch) (struct ceph_connection *con, struct ceph_msg *m); + /* authorize an outgoing connection */ + int (*get_authorizer) (struct ceph_connection *con, + void **buf, int *len, int *proto, + void **reply_buf, int *reply_len, int force_new); + int (*verify_authorizer_reply) (struct ceph_connection *con, int len); + /* protocol version mismatch */ void (*bad_proto) (struct ceph_connection *con); @@ -144,6 +150,10 @@ struct ceph_connection { attempt for this connection, client */ u32 peer_global_seq; /* peer's global seq for this connection */ + int auth_retry; /* true if we need a newer authorizer */ + void *auth_reply_buf; /* where to put the authorizer reply */ + int auth_reply_buf_len; + /* out queue */ struct mutex out_mutex; struct list_head out_queue; diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c index 95b76e761e18..017d5aef8834 100644 --- a/fs/ceph/mon_client.c +++ b/fs/ceph/mon_client.c @@ -6,6 +6,7 @@ #include "mon_client.h" #include "super.h" +#include "auth.h" #include "decode.h" /* @@ -38,6 +39,10 @@ struct ceph_monmap *ceph_monmap_decode(void *p, void *end) struct ceph_fsid fsid; u32 epoch, num_mon; u16 version; + u32 len; + + ceph_decode_32_safe(&p, end, len, bad); + ceph_decode_need(&p, end, len, bad); dout("monmap_decode %p %p len %d\n", p, end, (int)(end-p)); @@ -95,8 +100,10 @@ static void __close_session(struct ceph_mon_client *monc) { if (monc->con) { dout("__close_session closing mon%d\n", monc->cur_mon); + ceph_con_revoke(monc->con, monc->m_auth); ceph_con_close(monc->con); monc->cur_mon = -1; + ceph_auth_reset(monc->auth); } } @@ -106,6 +113,7 @@ static void __close_session(struct ceph_mon_client *monc) static int __open_session(struct ceph_mon_client *monc) { char r; + int ret; if (monc->cur_mon < 0) { get_random_bytes(&r, 1); @@ -121,6 +129,15 @@ static int __open_session(struct ceph_mon_client *monc) monc->con->peer_name.num = cpu_to_le64(monc->cur_mon); ceph_con_open(monc->con, &monc->monmap->mon_inst[monc->cur_mon].addr); + + /* initiatiate authentication handshake */ + ret = ceph_auth_build_hello(monc->auth, + monc->m_auth->front.iov_base, + monc->m_auth->front_max); + monc->m_auth->front.iov_len = ret; + monc->m_auth->hdr.front_len = cpu_to_le32(ret); + ceph_msg_get(monc->m_auth); /* keep our ref */ + ceph_con_send(monc->con, monc->m_auth); } else { dout("open_session mon%d already open\n", monc->cur_mon); } @@ -139,7 +156,7 @@ static void __schedule_delayed(struct ceph_mon_client *monc) { unsigned delay; - if (monc->cur_mon < 0 || monc->want_mount || __sub_expired(monc)) + if (monc->cur_mon < 0 || __sub_expired(monc)) delay = 10 * HZ; else delay = 20 * HZ; @@ -161,7 +178,7 @@ static void __send_subscribe(struct ceph_mon_client *monc) struct ceph_mon_subscribe_item *i; void *p, *end; - msg = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE, 64, 0, 0, NULL); + msg = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE, 96, 0, 0, NULL); if (!msg) return; @@ -173,7 +190,7 @@ static void __send_subscribe(struct ceph_mon_client *monc) if (monc->want_next_osdmap) { dout("__send_subscribe to 'osdmap' %u\n", (unsigned)monc->have_osdmap); - ceph_encode_32(&p, 2); + ceph_encode_32(&p, 3); ceph_encode_string(&p, end, "osdmap", 6); i = p; i->have = cpu_to_le64(monc->have_osdmap); @@ -181,13 +198,18 @@ static void __send_subscribe(struct ceph_mon_client *monc) p += sizeof(*i); monc->want_next_osdmap = 2; /* requested */ } else { - ceph_encode_32(&p, 1); + ceph_encode_32(&p, 2); } ceph_encode_string(&p, end, "mdsmap", 6); i = p; i->have = cpu_to_le64(monc->have_mdsmap); i->onetime = 0; p += sizeof(*i); + ceph_encode_string(&p, end, "monmap", 6); + i = p; + i->have = 0; + i->onetime = 0; + p += sizeof(*i); msg->front.iov_len = p - msg->front.iov_base; msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); @@ -256,7 +278,7 @@ void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc) mutex_unlock(&monc->mutex); } - +#if 0 /* * mount */ @@ -264,12 +286,8 @@ static void __request_mount(struct ceph_mon_client *monc) { struct ceph_msg *msg; struct ceph_client_mount *h; - int err; dout("__request_mount\n"); - err = __open_session(monc); - if (err) - return; msg = ceph_msg_new(CEPH_MSG_CLIENT_MOUNT, sizeof(*h), 0, 0, NULL); if (IS_ERR(msg)) return; @@ -279,8 +297,12 @@ static void __request_mount(struct ceph_mon_client *monc) h->monhdr.session_mon_tid = 0; ceph_con_send(monc->con, msg); } +#endif -int ceph_monc_request_mount(struct ceph_mon_client *monc) +/* + * + */ +int ceph_monc_open_session(struct ceph_mon_client *monc) { if (!monc->con) { monc->con = kmalloc(sizeof(*monc->con), GFP_KERNEL); @@ -292,12 +314,14 @@ int ceph_monc_request_mount(struct ceph_mon_client *monc) } mutex_lock(&monc->mutex); - __request_mount(monc); + __open_session(monc); __schedule_delayed(monc); mutex_unlock(&monc->mutex); return 0; } +#if 0 + /* * The monitor responds with mount ack indicate mount success. The * included client ticket allows the client to talk to MDSs and OSDs. @@ -372,9 +396,65 @@ out: mutex_unlock(&monc->mutex); wake_up(&client->mount_wq); } +#endif + +/* + * The monitor responds with mount ack indicate mount success. The + * included client ticket allows the client to talk to MDSs and OSDs. + */ +static void ceph_monc_handle_map(struct ceph_mon_client *monc, struct ceph_msg *msg) +{ + struct ceph_client *client = monc->client; + struct ceph_monmap *monmap = NULL, *old = monc->monmap; + void *p, *end; + + mutex_lock(&monc->mutex); + + dout("handle_monmap\n"); + p = msg->front.iov_base; + end = p + msg->front.iov_len; + + monmap = ceph_monmap_decode(p, end); + if (IS_ERR(monmap)) { + pr_err("problem decoding monmap, %d\n", + (int)PTR_ERR(monmap)); + return; + } + if (monc->have_fsid && + ceph_fsid_compare(&monmap->fsid, &monc->monmap->fsid)) { + print_hex_dump(KERN_ERR, "monmap->fsid: ", DUMP_PREFIX_NONE, 16, 1, + (void *)&monmap->fsid, 16, 0); + print_hex_dump(KERN_ERR, "monc->monmap->fsid: ", DUMP_PREFIX_NONE, 16, 1, + (void *)&monc->monmap->fsid, 16, 0); + + pr_err("fsid mismatch, got a previous map with different fsid"); + kfree(monmap); + return; + } + + client->monc.monmap = monmap; + client->monc.have_fsid = true; + kfree(old); + + mutex_unlock(&monc->mutex); + wake_up(&client->mount_wq); +} + +/* + * init client info after authentication + */ +static void __init_authenticated_client(struct ceph_mon_client *monc) +{ + struct ceph_client *client = monc->client; + client->signed_ticket = NULL; + client->signed_ticket_len = 0; + client->msgr->inst.name.type = CEPH_ENTITY_TYPE_CLIENT; + client->msgr->inst.name.num = monc->auth->global_id; + ceph_debugfs_client_init(client); +} /* * statfs @@ -414,12 +494,8 @@ static int send_statfs(struct ceph_mon_client *monc, { struct ceph_msg *msg; struct ceph_mon_statfs *h; - int err; dout("send_statfs tid %llu\n", req->tid); - err = __open_session(monc); - if (err) - return err; msg = ceph_msg_new(CEPH_MSG_STATFS, sizeof(*h), 0, 0, NULL); if (IS_ERR(msg)) return PTR_ERR(msg); @@ -514,17 +590,14 @@ static void delayed_work(struct work_struct *work) dout("monc delayed_work\n"); mutex_lock(&monc->mutex); - if (monc->want_mount) { - __request_mount(monc); + if (monc->hunting) { + __close_session(monc); + __open_session(monc); /* continue hunting */ } else { - if (monc->hunting) { - __close_session(monc); - __open_session(monc); /* continue hunting */ - } else { - ceph_con_keepalive(monc->con); - } + ceph_con_keepalive(monc->con); + if (monc->auth->ops->is_authenticated(monc->auth)) + __send_subscribe(monc); } - __send_subscribe(monc); __schedule_delayed(monc); mutex_unlock(&monc->mutex); } @@ -555,6 +628,7 @@ static int build_initial_monmap(struct ceph_mon_client *monc) monc->monmap->mon_inst[i].name.num = cpu_to_le64(i); } monc->monmap->num_mon = num_mon; + monc->have_fsid = false; /* release addr memory */ kfree(args->mon_addr); @@ -579,21 +653,37 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) monc->con = NULL; + /* authentication */ + monc->auth = ceph_auth_init(cl->mount_args->name, + cl->mount_args->secret); + if (IS_ERR(monc->auth)) + return PTR_ERR(monc->auth); + monc->auth->want_keys = + CEPH_ENTITY_TYPE_AUTH | CEPH_ENTITY_TYPE_MON | + CEPH_ENTITY_TYPE_OSD | CEPH_ENTITY_TYPE_MDS; + /* msg pools */ - err = ceph_msgpool_init(&monc->msgpool_mount_ack, 4096, 1, false); - if (err < 0) - goto out; err = ceph_msgpool_init(&monc->msgpool_subscribe_ack, sizeof(struct ceph_mon_subscribe_ack), 1, false); if (err < 0) - goto out; + goto out_monmap; err = ceph_msgpool_init(&monc->msgpool_statfs_reply, sizeof(struct ceph_mon_statfs_reply), 0, false); if (err < 0) - goto out; + goto out_pool1; + err = ceph_msgpool_init(&monc->msgpool_auth_reply, 4096, 1, false); + if (err < 0) + goto out_pool2; + + monc->m_auth = ceph_msg_new(CEPH_MSG_AUTH, 4096, 0, 0, NULL); + if (IS_ERR(monc->m_auth)) { + err = PTR_ERR(monc->m_auth); + monc->m_auth = NULL; + goto out_pool3; + } monc->cur_mon = -1; - monc->hunting = false; /* not really */ + monc->hunting = true; monc->sub_renew_after = jiffies; monc->sub_sent = 0; @@ -605,7 +695,16 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) monc->have_mdsmap = 0; monc->have_osdmap = 0; monc->want_next_osdmap = 1; - monc->want_mount = true; + return 0; + +out_pool3: + ceph_msgpool_destroy(&monc->msgpool_auth_reply); +out_pool2: + ceph_msgpool_destroy(&monc->msgpool_subscribe_ack); +out_pool1: + ceph_msgpool_destroy(&monc->msgpool_statfs_reply); +out_monmap: + kfree(monc->monmap); out: return err; } @@ -624,14 +723,44 @@ void ceph_monc_stop(struct ceph_mon_client *monc) } mutex_unlock(&monc->mutex); - ceph_msgpool_destroy(&monc->msgpool_mount_ack); + ceph_auth_destroy(monc->auth); + + ceph_msg_put(monc->m_auth); ceph_msgpool_destroy(&monc->msgpool_subscribe_ack); ceph_msgpool_destroy(&monc->msgpool_statfs_reply); + ceph_msgpool_destroy(&monc->msgpool_auth_reply); kfree(monc->monmap); } +static void handle_auth_reply(struct ceph_mon_client *monc, + struct ceph_msg *msg) +{ + int ret; + + mutex_lock(&monc->mutex); + ret = ceph_handle_auth_reply(monc->auth, msg->front.iov_base, + msg->front.iov_len, + monc->m_auth->front.iov_base, + monc->m_auth->front_max); + if (ret < 0) { + monc->client->mount_err = ret; + wake_up(&monc->client->mount_wq); + } else if (ret > 0) { + monc->m_auth->front.iov_len = ret; + monc->m_auth->hdr.front_len = cpu_to_le32(ret); + ceph_msg_get(monc->m_auth); /* keep our ref */ + ceph_con_send(monc->con, monc->m_auth); + } else if (monc->auth->ops->is_authenticated(monc->auth)) { + dout("authenticated, starting session\n"); + __init_authenticated_client(monc); + __send_subscribe(monc); + __resend_statfs(monc); + } + mutex_unlock(&monc->mutex); +} + /* * handle incoming message */ @@ -644,8 +773,8 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) return; switch (type) { - case CEPH_MSG_CLIENT_MOUNT_ACK: - handle_mount_ack(monc, msg); + case CEPH_MSG_AUTH_REPLY: + handle_auth_reply(monc, msg); break; case CEPH_MSG_MON_SUBSCRIBE_ACK: @@ -656,6 +785,10 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) handle_statfs_reply(monc, msg); break; + case CEPH_MSG_MON_MAP: + ceph_monc_handle_map(monc, msg); + break; + case CEPH_MSG_MDS_MAP: ceph_mdsc_handle_map(&monc->client->mdsc, msg); break; @@ -682,12 +815,12 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con, int front = le32_to_cpu(hdr->front_len); switch (type) { - case CEPH_MSG_CLIENT_MOUNT_ACK: - return ceph_msgpool_get(&monc->msgpool_mount_ack, front); case CEPH_MSG_MON_SUBSCRIBE_ACK: return ceph_msgpool_get(&monc->msgpool_subscribe_ack, front); case CEPH_MSG_STATFS_REPLY: return ceph_msgpool_get(&monc->msgpool_statfs_reply, front); + case CEPH_MSG_AUTH_REPLY: + return ceph_msgpool_get(&monc->msgpool_auth_reply, front); } return ceph_alloc_msg(con, hdr); } @@ -717,10 +850,7 @@ static void mon_fault(struct ceph_connection *con) if (!monc->hunting) { /* start hunting */ monc->hunting = true; - if (__open_session(monc) == 0) { - __send_subscribe(monc); - __resend_statfs(monc); - } + __open_session(monc); } else { /* already hunting, let's wait a bit */ __schedule_delayed(monc); diff --git a/fs/ceph/mon_client.h b/fs/ceph/mon_client.h index 9f6db45bf469..c75b53302ecc 100644 --- a/fs/ceph/mon_client.h +++ b/fs/ceph/mon_client.h @@ -9,6 +9,7 @@ struct ceph_client; struct ceph_mount_args; +struct ceph_auth_client; /* * The monitor map enumerates the set of all monitors. @@ -58,23 +59,26 @@ struct ceph_mon_client { struct mutex mutex; struct delayed_work delayed_work; + struct ceph_auth_client *auth; + struct ceph_msg *m_auth; + bool hunting; int cur_mon; /* last monitor i contacted */ unsigned long sub_sent, sub_renew_after; struct ceph_connection *con; + bool have_fsid; /* msg pools */ - struct ceph_msgpool msgpool_mount_ack; struct ceph_msgpool msgpool_subscribe_ack; struct ceph_msgpool msgpool_statfs_reply; + struct ceph_msgpool msgpool_auth_reply; /* pending statfs requests */ struct radix_tree_root statfs_request_tree; int num_statfs_requests; u64 last_tid; - /* mds/osd map or mount requests */ - bool want_mount; + /* mds/osd map */ int want_next_osdmap; /* 1 = want, 2 = want+asked */ u32 have_osdmap, have_mdsmap; @@ -101,11 +105,11 @@ extern int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 have); extern void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc); -extern int ceph_monc_request_mount(struct ceph_mon_client *monc); - extern int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf); +extern int ceph_monc_open_session(struct ceph_mon_client *monc); + #endif diff --git a/fs/ceph/msgr.h b/fs/ceph/msgr.h index 8e3ea2eb1bf5..c758e8f8f71b 100644 --- a/fs/ceph/msgr.h +++ b/fs/ceph/msgr.h @@ -21,7 +21,7 @@ * whenever the wire protocol changes. try to keep this string length * constant. */ -#define CEPH_BANNER "ceph v023" +#define CEPH_BANNER "ceph v024" #define CEPH_BANNER_MAX_LEN 30 @@ -46,11 +46,16 @@ struct ceph_entity_name { __le64 num; } __attribute__ ((packed)); -#define CEPH_ENTITY_TYPE_MON 1 -#define CEPH_ENTITY_TYPE_MDS 2 -#define CEPH_ENTITY_TYPE_OSD 3 -#define CEPH_ENTITY_TYPE_CLIENT 4 -#define CEPH_ENTITY_TYPE_ADMIN 5 +#define CEPH_ENTITY_TYPE_MON 0x01 +#define CEPH_ENTITY_TYPE_MDS 0x02 +#define CEPH_ENTITY_TYPE_OSD 0x04 +#define CEPH_ENTITY_TYPE_CLIENT 0x08 +#define CEPH_ENTITY_TYPE_ADMIN 0x10 +#define CEPH_ENTITY_TYPE_AUTH 0x20 + +#define CEPH_ENTITY_TYPE_ANY 0xFF + +extern const char *ceph_entity_type_name(int type); /* * entity_addr -- network address @@ -94,6 +99,7 @@ struct ceph_entity_inst { #define CEPH_MSGR_TAG_ACK 8 /* message ack */ #define CEPH_MSGR_TAG_KEEPALIVE 9 /* just a keepalive byte! */ #define CEPH_MSGR_TAG_BADPROTOVER 10 /* bad protocol version */ +#define CEPH_MSGR_TAG_BADAUTHORIZER 11 /* bad authorizer */ /* @@ -104,6 +110,8 @@ struct ceph_msg_connect { __le32 global_seq; /* count connections initiated by this host */ __le32 connect_seq; /* count connections initiated in this session */ __le32 protocol_version; + __le32 authorizer_protocol; + __le32 authorizer_len; __u8 flags; /* CEPH_MSG_CONNECT_* */ } __attribute__ ((packed)); @@ -112,6 +120,7 @@ struct ceph_msg_connect_reply { __le32 global_seq; __le32 connect_seq; __le32 protocol_version; + __le32 authorizer_len; __u8 flags; } __attribute__ ((packed)); diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index 0a16c4f951f9..ca0ee68c322a 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c @@ -11,6 +11,7 @@ #include "osd_client.h" #include "messenger.h" #include "decode.h" +#include "auth.h" const static struct ceph_connection_operations osd_con_ops; @@ -331,6 +332,7 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc) osd->o_con.private = osd; osd->o_con.ops = &osd_con_ops; osd->o_con.peer_name.type = CEPH_ENTITY_TYPE_OSD; + return osd; } @@ -880,9 +882,15 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg) /* verify fsid */ ceph_decode_need(&p, end, sizeof(fsid), bad); ceph_decode_copy(&p, &fsid, sizeof(fsid)); - if (ceph_fsid_compare(&fsid, &osdc->client->monc.monmap->fsid)) { - pr_err("got osdmap with wrong fsid, ignoring\n"); - return; + if (osdc->client->monc.have_fsid) { + if (ceph_fsid_compare(&fsid, + &osdc->client->monc.monmap->fsid)) { + pr_err("got osdmap with wrong fsid, ignoring\n"); + return; + } + } else { + ceph_fsid_set(&osdc->client->monc.monmap->fsid, &fsid); + osdc->client->monc.have_fsid = true; } down_write(&osdc->map_sem); @@ -1302,10 +1310,59 @@ static void put_osd_con(struct ceph_connection *con) put_osd(osd); } +/* + * authentication + */ +static int get_authorizer(struct ceph_connection *con, + void **buf, int *len, int *proto, + void **reply_buf, int *reply_len, int force_new) +{ + struct ceph_osd *o = con->private; + struct ceph_osd_client *osdc = o->o_osdc; + struct ceph_auth_client *ac = osdc->client->monc.auth; + int ret = 0; + + if (force_new && o->o_authorizer) { + ac->ops->destroy_authorizer(ac, o->o_authorizer); + o->o_authorizer = NULL; + } + if (o->o_authorizer == NULL) { + ret = ac->ops->create_authorizer( + ac, CEPH_ENTITY_TYPE_OSD, + &o->o_authorizer, + &o->o_authorizer_buf, + &o->o_authorizer_buf_len, + &o->o_authorizer_reply_buf, + &o->o_authorizer_reply_buf_len); + if (ret) + return ret; + } + + *proto = ac->protocol; + *buf = o->o_authorizer_buf; + *len = o->o_authorizer_buf_len; + *reply_buf = o->o_authorizer_reply_buf; + *reply_len = o->o_authorizer_reply_buf_len; + return 0; +} + + +static int verify_authorizer_reply(struct ceph_connection *con, int len) +{ + struct ceph_osd *o = con->private; + struct ceph_osd_client *osdc = o->o_osdc; + struct ceph_auth_client *ac = osdc->client->monc.auth; + + return ac->ops->verify_authorizer_reply(ac, o->o_authorizer, len); +} + + const static struct ceph_connection_operations osd_con_ops = { .get = get_osd_con, .put = put_osd_con, .dispatch = dispatch, + .get_authorizer = get_authorizer, + .verify_authorizer_reply = verify_authorizer_reply, .alloc_msg = alloc_msg, .fault = osd_reset, .alloc_middle = ceph_alloc_middle, diff --git a/fs/ceph/osd_client.h b/fs/ceph/osd_client.h index 766c8dc80aff..3d4ae6595aaa 100644 --- a/fs/ceph/osd_client.h +++ b/fs/ceph/osd_client.h @@ -13,6 +13,7 @@ struct ceph_msg; struct ceph_snap_context; struct ceph_osd_request; struct ceph_osd_client; +struct ceph_authorizer; /* * completion callback for async writepages @@ -29,6 +30,9 @@ struct ceph_osd { struct rb_node o_node; struct ceph_connection o_con; struct list_head o_requests; + struct ceph_authorizer *o_authorizer; + void *o_authorizer_buf, *o_authorizer_reply_buf; + size_t o_authorizer_buf_len, o_authorizer_reply_buf_len; }; /* an in-flight request */ diff --git a/fs/ceph/rados.h b/fs/ceph/rados.h index fb23ff9297c9..12bfb2f7c275 100644 --- a/fs/ceph/rados.h +++ b/fs/ceph/rados.h @@ -157,7 +157,6 @@ struct ceph_eversion { #define CEPH_OSD_OP_MODE_WR 0x2000 #define CEPH_OSD_OP_MODE_RMW 0x3000 #define CEPH_OSD_OP_MODE_SUB 0x4000 -#define CEPH_OSD_OP_MODE_EXEC 0x8000 #define CEPH_OSD_OP_TYPE 0x0f00 #define CEPH_OSD_OP_TYPE_LOCK 0x0100 @@ -285,6 +284,7 @@ enum { CEPH_OSD_FLAG_BALANCE_READS = 256, CEPH_OSD_FLAG_PARALLELEXEC = 512, /* execute op in parallel */ CEPH_OSD_FLAG_PGOP = 1024, /* pg op, no object */ + CEPH_OSD_FLAG_EXEC = 2048, /* op may exec */ }; enum { diff --git a/fs/ceph/super.c b/fs/ceph/super.c index fe0a5962a082..c901395ae8a1 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -128,6 +128,8 @@ static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt) seq_puts(m, ",noasyncreaddir"); if (strcmp(args->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) seq_printf(m, ",snapdirname=%s", args->snapdir_name); + if (args->name) + seq_printf(m, ",name=%s", args->name); if (args->secret) seq_puts(m, ",secret="); return 0; @@ -224,12 +226,12 @@ const char *ceph_msg_type_name(int type) switch (type) { case CEPH_MSG_SHUTDOWN: return "shutdown"; case CEPH_MSG_PING: return "ping"; + case CEPH_MSG_AUTH: return "auth"; + case CEPH_MSG_AUTH_REPLY: return "auth_reply"; case CEPH_MSG_MON_MAP: return "mon_map"; case CEPH_MSG_MON_GET_MAP: return "mon_get_map"; case CEPH_MSG_MON_SUBSCRIBE: return "mon_subscribe"; case CEPH_MSG_MON_SUBSCRIBE_ACK: return "mon_subscribe_ack"; - case CEPH_MSG_CLIENT_MOUNT: return "client_mount"; - case CEPH_MSG_CLIENT_MOUNT_ACK: return "client_mount_ack"; case CEPH_MSG_STATFS: return "statfs"; case CEPH_MSG_STATFS_REPLY: return "statfs_reply"; case CEPH_MSG_MDS_MAP: return "mds_map"; @@ -267,6 +269,7 @@ enum { Opt_last_int, /* int args above */ Opt_snapdirname, + Opt_name, Opt_secret, Opt_last_string, /* string args above */ @@ -293,6 +296,7 @@ static match_table_t arg_tokens = { {Opt_readdir_max_entries, "readdir_max_entries=%d"}, /* int args above */ {Opt_snapdirname, "snapdirname=%s"}, + {Opt_name, "name=%s"}, {Opt_secret, "secret=%s"}, /* string args above */ {Opt_ip, "ip=%s"}, @@ -407,6 +411,11 @@ static struct ceph_mount_args *parse_mount_args(int flags, char *options, argstr[0].to-argstr[0].from, GFP_KERNEL); break; + case Opt_name: + args->name = kstrndup(argstr[0].from, + argstr[0].to-argstr[0].from, + GFP_KERNEL); + break; case Opt_secret: args->secret = kstrndup(argstr[0].from, argstr[0].to-argstr[0].from, @@ -476,6 +485,8 @@ static void destroy_mount_args(struct ceph_mount_args *args) dout("destroy_mount_args %p\n", args); kfree(args->snapdir_name); args->snapdir_name = NULL; + kfree(args->name); + args->name = NULL; kfree(args->secret); args->secret = NULL; kfree(args); @@ -657,27 +668,23 @@ static int ceph_mount(struct ceph_client *client, struct vfsmount *mnt, client->msgr->nocrc = ceph_test_opt(client, NOCRC); } - /* send mount request, and wait for mon, mds, and osd maps */ - err = ceph_monc_request_mount(&client->monc); + /* open session, and wait for mon, mds, and osd maps */ + err = ceph_monc_open_session(&client->monc); if (err < 0) goto out; - while (!have_mon_map(client) && !client->mount_err) { + while (!have_mon_map(client)) { err = -EIO; if (timeout && time_after_eq(jiffies, started + timeout)) goto out; /* wait */ - dout("mount waiting for mount\n"); - err = wait_event_interruptible_timeout(client->mount_wq, - client->mount_err || have_mon_map(client), + dout("mount waiting for mon_map\n"); + err = wait_event_interruptible_timeout(client->mount_wq, /* FIXME */ + have_mon_map(client), timeout); if (err == -EINTR || err == -ERESTARTSYS) goto out; - if (client->mount_err) { - err = client->mount_err; - goto out; - } } dout("mount opening root\n"); @@ -795,7 +802,6 @@ static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client) client->backing_dev_info.ra_pages = (client->mount_args->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_SHIFT; - err = bdi_register_dev(&client->backing_dev_info, sb->s_dev); return err; } diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 8aa1ffba6c0d..e0e8130959b6 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -61,6 +61,7 @@ struct ceph_mount_args { int max_readdir; /* max readdir size */ int osd_timeout; char *snapdir_name; /* default ".snap" */ + char *name; char *secret; int cap_release_safety; }; @@ -75,6 +76,7 @@ struct ceph_mount_args { #define CEPH_MSG_MAX_DATA_LEN (16*1024*1024) #define CEPH_SNAPDIRNAME_DEFAULT ".snap" +#define CEPH_AUTH_NAME_DEFAULT "guest" /* * Delay telling the MDS we no longer want caps, in case we reopen -- cgit v1.2.3 From 58bb3b374b07a2a43315213f00a48a5ffd6d0915 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 23 Dec 2009 12:12:31 -0800 Subject: ceph: support ceph_pagelist for message payload The ceph_pagelist is a simple list of whole pages, strung together via their lru list_head. It facilitates encoding to a "buffer" of unknown size. Allow its use in place of the ceph_msg page vector. This will be used to fix the huge buffer preallocation woes of MDS reconnection. Signed-off-by: Sage Weil --- fs/ceph/Makefile | 2 +- fs/ceph/messenger.c | 24 ++++++++++++++++++++---- fs/ceph/messenger.h | 1 + fs/ceph/pagelist.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/ceph/pagelist.h | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 130 insertions(+), 5 deletions(-) create mode 100644 fs/ceph/pagelist.c create mode 100644 fs/ceph/pagelist.h (limited to 'fs/ceph/Makefile') diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile index 827629c85768..47caf2f1b75a 100644 --- a/fs/ceph/Makefile +++ b/fs/ceph/Makefile @@ -8,7 +8,7 @@ obj-$(CONFIG_CEPH_FS) += ceph.o ceph-objs := super.o inode.o dir.o file.o addr.o ioctl.o \ export.o caps.o snap.o xattr.o \ - messenger.o msgpool.o buffer.o \ + messenger.o msgpool.o buffer.o pagelist.o \ mds_client.o mdsmap.o \ mon_client.o \ osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \ diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index 68052f664280..c1106e8360f0 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c @@ -13,6 +13,7 @@ #include "super.h" #include "messenger.h" #include "decode.h" +#include "pagelist.h" /* * Ceph uses the messenger to exchange ceph_msg messages with other @@ -728,6 +729,11 @@ static int write_partial_msg_pages(struct ceph_connection *con) page = msg->pages[con->out_msg_pos.page]; if (crc) kaddr = kmap(page); + } else if (msg->pagelist) { + page = list_first_entry(&msg->pagelist->head, + struct page, lru); + if (crc) + kaddr = kmap(page); } else { page = con->msgr->zero_page; if (crc) @@ -750,7 +756,7 @@ static int write_partial_msg_pages(struct ceph_connection *con) MSG_DONTWAIT | MSG_NOSIGNAL | MSG_MORE); - if (crc && msg->pages) + if (crc && (msg->pages || msg->pagelist)) kunmap(page); if (ret <= 0) @@ -762,6 +768,9 @@ static int write_partial_msg_pages(struct ceph_connection *con) con->out_msg_pos.page_pos = 0; con->out_msg_pos.page++; con->out_msg_pos.did_page_crc = 0; + if (msg->pagelist) + list_move_tail(&page->lru, + &msg->pagelist->head); } } @@ -1051,13 +1060,13 @@ static int process_banner(struct ceph_connection *con) &con->actual_peer_addr) && !(addr_is_blank(&con->actual_peer_addr.in_addr) && con->actual_peer_addr.nonce == con->peer_addr.nonce)) { - pr_err("wrong peer, want %s/%d, " - "got %s/%d, wtf\n", + pr_warning("wrong peer, want %s/%d, " + "got %s/%d\n", pr_addr(&con->peer_addr.in_addr), con->peer_addr.nonce, pr_addr(&con->actual_peer_addr.in_addr), con->actual_peer_addr.nonce); - con->error_msg = "protocol error, wrong peer"; + con->error_msg = "wrong peer at address"; return -1; } @@ -2096,6 +2105,7 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, /* data */ m->nr_pages = calc_pages_for(page_off, page_len); m->pages = pages; + m->pagelist = NULL; dout("ceph_msg_new %p page %d~%d -> %d\n", m, page_off, page_len, m->nr_pages); @@ -2181,6 +2191,12 @@ void ceph_msg_last_put(struct kref *kref) m->nr_pages = 0; m->pages = NULL; + if (m->pagelist) { + ceph_pagelist_release(m->pagelist); + kfree(m->pagelist); + m->pagelist = NULL; + } + if (m->pool) ceph_msgpool_put(m->pool, m); else diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h index 7e2aab1d3ce2..a7b684145092 100644 --- a/fs/ceph/messenger.h +++ b/fs/ceph/messenger.h @@ -85,6 +85,7 @@ struct ceph_msg { struct ceph_buffer *middle; struct page **pages; /* data payload. NOT OWNER. */ unsigned nr_pages; /* size of page array */ + struct ceph_pagelist *pagelist; /* instead of pages */ struct list_head list_head; struct kref kref; bool front_is_vmalloc; diff --git a/fs/ceph/pagelist.c b/fs/ceph/pagelist.c new file mode 100644 index 000000000000..370e93695474 --- /dev/null +++ b/fs/ceph/pagelist.c @@ -0,0 +1,54 @@ + +#include +#include + +#include "pagelist.h" + +int ceph_pagelist_release(struct ceph_pagelist *pl) +{ + if (pl->mapped_tail) + kunmap(pl->mapped_tail); + while (!list_empty(&pl->head)) { + struct page *page = list_first_entry(&pl->head, struct page, + lru); + list_del(&page->lru); + __free_page(page); + } + return 0; +} + +static int ceph_pagelist_addpage(struct ceph_pagelist *pl) +{ + struct page *page = alloc_page(GFP_NOFS); + if (!page) + return -ENOMEM; + pl->room += PAGE_SIZE; + list_add_tail(&page->lru, &pl->head); + if (pl->mapped_tail) + kunmap(pl->mapped_tail); + pl->mapped_tail = kmap(page); + return 0; +} + +int ceph_pagelist_append(struct ceph_pagelist *pl, void *buf, size_t len) +{ + while (pl->room < len) { + size_t bit = pl->room; + int ret; + + memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK), + buf, bit); + pl->length += bit; + pl->room -= bit; + buf += bit; + len -= bit; + ret = ceph_pagelist_addpage(pl); + if (ret) + return ret; + } + + memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK), buf, len); + pl->length += len; + pl->room -= len; + return 0; +} diff --git a/fs/ceph/pagelist.h b/fs/ceph/pagelist.h new file mode 100644 index 000000000000..e8a4187e1087 --- /dev/null +++ b/fs/ceph/pagelist.h @@ -0,0 +1,54 @@ +#ifndef __FS_CEPH_PAGELIST_H +#define __FS_CEPH_PAGELIST_H + +#include + +struct ceph_pagelist { + struct list_head head; + void *mapped_tail; + size_t length; + size_t room; +}; + +static inline void ceph_pagelist_init(struct ceph_pagelist *pl) +{ + INIT_LIST_HEAD(&pl->head); + pl->mapped_tail = NULL; + pl->length = 0; + pl->room = 0; +} +extern int ceph_pagelist_release(struct ceph_pagelist *pl); + +extern int ceph_pagelist_append(struct ceph_pagelist *pl, void *d, size_t l); + +static inline int ceph_pagelist_encode_64(struct ceph_pagelist *pl, u64 v) +{ + __le64 ev = cpu_to_le64(v); + return ceph_pagelist_append(pl, &ev, sizeof(ev)); +} +static inline int ceph_pagelist_encode_32(struct ceph_pagelist *pl, u32 v) +{ + __le32 ev = cpu_to_le32(v); + return ceph_pagelist_append(pl, &ev, sizeof(ev)); +} +static inline int ceph_pagelist_encode_16(struct ceph_pagelist *pl, u16 v) +{ + __le16 ev = cpu_to_le16(v); + return ceph_pagelist_append(pl, &ev, sizeof(ev)); +} +static inline int ceph_pagelist_encode_8(struct ceph_pagelist *pl, u8 v) +{ + return ceph_pagelist_append(pl, &v, 1); +} +static inline int ceph_pagelist_encode_string(struct ceph_pagelist *pl, + char *s, size_t len) +{ + int ret = ceph_pagelist_encode_32(pl, len); + if (ret) + return ret; + if (len) + return ceph_pagelist_append(pl, s, len); + return 0; +} + +#endif -- cgit v1.2.3 From 8b6e4f2d8b21c25225b1ce8d53a2e03b92cc8522 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 2 Feb 2010 16:07:07 -0800 Subject: ceph: aes crypto and base64 encode/decode helpers Helpers to encrypt/decrypt AES and base64. Signed-off-by: Yehuda Sadeh Signed-off-by: Sage Weil --- fs/ceph/Kconfig | 1 + fs/ceph/Makefile | 1 + fs/ceph/armor.c | 99 ++++++++++++++ fs/ceph/crypto.c | 408 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/ceph/crypto.h | 48 +++++++ 5 files changed, 557 insertions(+) create mode 100644 fs/ceph/armor.c create mode 100644 fs/ceph/crypto.c create mode 100644 fs/ceph/crypto.h (limited to 'fs/ceph/Makefile') diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig index bc1fbd956187..04b8280582a9 100644 --- a/fs/ceph/Kconfig +++ b/fs/ceph/Kconfig @@ -2,6 +2,7 @@ config CEPH_FS tristate "Ceph distributed file system (EXPERIMENTAL)" depends on INET && EXPERIMENTAL select LIBCRC32C + select CONFIG_CRYPTO_AES help Choose Y or M here to include support for mounting the experimental Ceph distributed file system. Ceph is an extremely diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile index 47caf2f1b75a..85a588e43e7d 100644 --- a/fs/ceph/Makefile +++ b/fs/ceph/Makefile @@ -14,6 +14,7 @@ ceph-objs := super.o inode.o dir.o file.o addr.o ioctl.o \ osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \ debugfs.o \ auth.o auth_none.o \ + crypto.o armor.o \ ceph_fs.o ceph_strings.o ceph_hash.o ceph_frag.o else diff --git a/fs/ceph/armor.c b/fs/ceph/armor.c new file mode 100644 index 000000000000..67b2c030924b --- /dev/null +++ b/fs/ceph/armor.c @@ -0,0 +1,99 @@ + +#include + +/* + * base64 encode/decode. + */ + +const char *pem_key = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + +static int encode_bits(int c) +{ + return pem_key[c]; +} + +static int decode_bits(char c) +{ + if (c >= 'A' && c <= 'Z') + return c - 'A'; + if (c >= 'a' && c <= 'z') + return c - 'a' + 26; + if (c >= '0' && c <= '9') + return c - '0' + 52; + if (c == '+') + return 62; + if (c == '/') + return 63; + if (c == '=') + return 0; /* just non-negative, please */ + return -EINVAL; +} + +int ceph_armor(char *dst, const char *src, const char *end) +{ + int olen = 0; + int line = 0; + + while (src < end) { + unsigned char a, b, c; + + a = *src++; + *dst++ = encode_bits(a >> 2); + if (src < end) { + b = *src++; + *dst++ = encode_bits(((a & 3) << 4) | (b >> 4)); + if (src < end) { + c = *src++; + *dst++ = encode_bits(((b & 15) << 2) | + (c >> 6)); + *dst++ = encode_bits(c & 63); + } else { + *dst++ = encode_bits((b & 15) << 2); + *dst++ = '='; + } + } else { + *dst++ = encode_bits(((a & 3) << 4)); + *dst++ = '='; + *dst++ = '='; + } + olen += 4; + line += 4; + if (line == 64) { + line = 0; + *(dst++) = '\n'; + olen++; + } + } + return olen; +} + +int ceph_unarmor(char *dst, const char *src, const char *end) +{ + int olen = 0; + + while (src < end) { + int a, b, c, d; + + if (src < end && src[0] == '\n') + src++; + if (src + 4 > end) + return -EINVAL; + a = decode_bits(src[0]); + b = decode_bits(src[1]); + c = decode_bits(src[2]); + d = decode_bits(src[3]); + if (a < 0 || b < 0 || c < 0 || d < 0) + return -EINVAL; + + *dst++ = (a << 2) | (b >> 4); + if (src[2] == '=') + return olen + 1; + *dst++ = ((b & 15) << 4) | (c >> 2); + if (src[3] == '=') + return olen + 2; + *dst++ = ((c & 3) << 6) | d; + olen += 3; + src += 4; + } + return olen; +} diff --git a/fs/ceph/crypto.c b/fs/ceph/crypto.c new file mode 100644 index 000000000000..291ac288e791 --- /dev/null +++ b/fs/ceph/crypto.c @@ -0,0 +1,408 @@ + +#include "ceph_debug.h" + +#include +#include +#include + +#include "crypto.h" +#include "decode.h" + +int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end) +{ + if (*p + sizeof(u16) + sizeof(key->created) + + sizeof(u16) + key->len > end) + return -ERANGE; + ceph_encode_16(p, key->type); + ceph_encode_copy(p, &key->created, sizeof(key->created)); + ceph_encode_16(p, key->len); + ceph_encode_copy(p, key->key, key->len); + return 0; +} + +int ceph_crypto_key_decode(struct ceph_crypto_key *key, void **p, void *end) +{ + ceph_decode_need(p, end, 2*sizeof(u16) + sizeof(key->created), bad); + key->type = ceph_decode_16(p); + ceph_decode_copy(p, &key->created, sizeof(key->created)); + key->len = ceph_decode_16(p); + ceph_decode_need(p, end, key->len, bad); + key->key = kmalloc(key->len, GFP_NOFS); + if (!key->key) + return -ENOMEM; + ceph_decode_copy(p, key->key, key->len); + return 0; + +bad: + dout("failed to decode crypto key\n"); + return -EINVAL; +} + +int ceph_crypto_key_unarmor(struct ceph_crypto_key *key, const char *inkey) +{ + int inlen = strlen(inkey); + int blen = inlen * 3 / 4; + void *buf, *p; + int ret; + + dout("crypto_key_unarmor %s\n", inkey); + buf = kmalloc(blen, GFP_NOFS); + if (!buf) + return -ENOMEM; + blen = ceph_unarmor(buf, inkey, inkey+inlen); + if (blen < 0) { + kfree(buf); + return blen; + } + + p = buf; + ret = ceph_crypto_key_decode(key, &p, p + blen); + kfree(buf); + if (ret) + return ret; + dout("crypto_key_unarmor key %p type %d len %d\n", key, + key->type, key->len); + return 0; +} + + + +#define AES_KEY_SIZE 16 + +static struct crypto_blkcipher *ceph_crypto_alloc_cipher(void) +{ + return crypto_alloc_blkcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC); +} + +const u8 *aes_iv = "cephsageyudagreg"; + +int ceph_aes_encrypt(const void *key, int key_len, void *dst, size_t *dst_len, + const void *src, size_t src_len) +{ + struct scatterlist sg_in[2], sg_out[1]; + struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); + struct blkcipher_desc desc = { .tfm = tfm, .flags = 0 }; + int ret; + void *iv; + int ivsize; + size_t zero_padding = (0x10 - (src_len & 0x0f)); + char pad[16]; + + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + + memset(pad, zero_padding, zero_padding); + + *dst_len = src_len + zero_padding; + + crypto_blkcipher_setkey((void *)tfm, key, key_len); + sg_init_table(sg_in, 2); + sg_set_buf(&sg_in[0], src, src_len); + sg_set_buf(&sg_in[1], pad, zero_padding); + sg_init_table(sg_out, 1); + sg_set_buf(sg_out, dst, *dst_len); + iv = crypto_blkcipher_crt(tfm)->iv; + ivsize = crypto_blkcipher_ivsize(tfm); + + memcpy(iv, aes_iv, ivsize); + /* + print_hex_dump(KERN_ERR, "enc key: ", DUMP_PREFIX_NONE, 16, 1, + key, key_len, 1); + print_hex_dump(KERN_ERR, "enc src: ", DUMP_PREFIX_NONE, 16, 1, + src, src_len, 1); + print_hex_dump(KERN_ERR, "enc pad: ", DUMP_PREFIX_NONE, 16, 1, + pad, zero_padding, 1); + */ + ret = crypto_blkcipher_encrypt(&desc, sg_out, sg_in, + src_len + zero_padding); + crypto_free_blkcipher(tfm); + if (ret < 0) + pr_err("ceph_aes_crypt failed %d\n", ret); + /* + print_hex_dump(KERN_ERR, "enc out: ", DUMP_PREFIX_NONE, 16, 1, + dst, *dst_len, 1); + */ + return 0; +} + +int ceph_aes_encrypt2(const void *key, int key_len, void *dst, size_t *dst_len, + const void *src1, size_t src1_len, + const void *src2, size_t src2_len) +{ + struct scatterlist sg_in[3], sg_out[1]; + struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); + struct blkcipher_desc desc = { .tfm = tfm, .flags = 0 }; + int ret; + void *iv; + int ivsize; + size_t zero_padding = (0x10 - ((src1_len + src2_len) & 0x0f)); + char pad[16]; + + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + + memset(pad, zero_padding, zero_padding); + + *dst_len = src1_len + src2_len + zero_padding; + + crypto_blkcipher_setkey((void *)tfm, key, key_len); + sg_init_table(sg_in, 3); + sg_set_buf(&sg_in[0], src1, src1_len); + sg_set_buf(&sg_in[1], src2, src2_len); + sg_set_buf(&sg_in[2], pad, zero_padding); + sg_init_table(sg_out, 1); + sg_set_buf(sg_out, dst, *dst_len); + iv = crypto_blkcipher_crt(tfm)->iv; + ivsize = crypto_blkcipher_ivsize(tfm); + + memcpy(iv, aes_iv, ivsize); + /* + print_hex_dump(KERN_ERR, "enc key: ", DUMP_PREFIX_NONE, 16, 1, + key, key_len, 1); + print_hex_dump(KERN_ERR, "enc src1: ", DUMP_PREFIX_NONE, 16, 1, + src1, src1_len, 1); + print_hex_dump(KERN_ERR, "enc src2: ", DUMP_PREFIX_NONE, 16, 1, + src2, src2_len, 1); + print_hex_dump(KERN_ERR, "enc pad: ", DUMP_PREFIX_NONE, 16, 1, + pad, zero_padding, 1); + */ + ret = crypto_blkcipher_encrypt(&desc, sg_out, sg_in, + src1_len + src2_len + zero_padding); + crypto_free_blkcipher(tfm); + if (ret < 0) + pr_err("ceph_aes_crypt2 failed %d\n", ret); + /* + print_hex_dump(KERN_ERR, "enc out: ", DUMP_PREFIX_NONE, 16, 1, + dst, *dst_len, 1); + */ + return 0; +} + +int ceph_aes_decrypt(const void *key, int key_len, void *dst, size_t *dst_len, + const void *src, size_t src_len) +{ + struct scatterlist sg_in[1], sg_out[2]; + struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); + struct blkcipher_desc desc = { .tfm = tfm }; + char pad[16]; + void *iv; + int ivsize; + int ret; + int last_byte; + + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + + crypto_blkcipher_setkey((void *)tfm, key, key_len); + sg_init_table(sg_in, 1); + sg_init_table(sg_out, 2); + sg_set_buf(sg_in, src, src_len); + sg_set_buf(&sg_out[0], dst, *dst_len); + sg_set_buf(&sg_out[1], pad, sizeof(pad)); + + iv = crypto_blkcipher_crt(tfm)->iv; + ivsize = crypto_blkcipher_ivsize(tfm); + + memcpy(iv, aes_iv, ivsize); + + /* + print_hex_dump(KERN_ERR, "dec key: ", DUMP_PREFIX_NONE, 16, 1, + key, key_len, 1); + print_hex_dump(KERN_ERR, "dec in: ", DUMP_PREFIX_NONE, 16, 1, + src, src_len, 1); + */ + + ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in, src_len); + crypto_free_blkcipher(tfm); + if (ret < 0) { + pr_err("ceph_aes_decrypt failed %d\n", ret); + return ret; + } + + if (src_len <= *dst_len) + last_byte = ((char *)dst)[src_len - 1]; + else + last_byte = pad[src_len - *dst_len - 1]; + if (last_byte <= 16 && src_len >= last_byte) { + *dst_len = src_len - last_byte; + } else { + pr_err("ceph_aes_decrypt got bad padding %d on src len %d\n", + last_byte, (int)src_len); + return -EPERM; /* bad padding */ + } + /* + print_hex_dump(KERN_ERR, "dec out: ", DUMP_PREFIX_NONE, 16, 1, + dst, *dst_len, 1); + */ + return 0; +} + +int ceph_aes_decrypt2(const void *key, int key_len, + void *dst1, size_t *dst1_len, + void *dst2, size_t *dst2_len, + const void *src, size_t src_len) +{ + struct scatterlist sg_in[1], sg_out[3]; + struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); + struct blkcipher_desc desc = { .tfm = tfm }; + char pad[16]; + void *iv; + int ivsize; + int ret; + int last_byte; + + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + + sg_init_table(sg_in, 1); + sg_set_buf(sg_in, src, src_len); + sg_init_table(sg_out, 3); + sg_set_buf(&sg_out[0], dst1, *dst1_len); + sg_set_buf(&sg_out[1], dst2, *dst2_len); + sg_set_buf(&sg_out[2], pad, sizeof(pad)); + + crypto_blkcipher_setkey((void *)tfm, key, key_len); + iv = crypto_blkcipher_crt(tfm)->iv; + ivsize = crypto_blkcipher_ivsize(tfm); + + memcpy(iv, aes_iv, ivsize); + + /* + print_hex_dump(KERN_ERR, "dec key: ", DUMP_PREFIX_NONE, 16, 1, + key, key_len, 1); + print_hex_dump(KERN_ERR, "dec in: ", DUMP_PREFIX_NONE, 16, 1, + src, src_len, 1); + */ + + ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in, src_len); + crypto_free_blkcipher(tfm); + if (ret < 0) { + pr_err("ceph_aes_decrypt failed %d\n", ret); + return ret; + } + + if (src_len <= *dst1_len) + last_byte = ((char *)dst1)[src_len - 1]; + else if (src_len <= *dst1_len + *dst2_len) + last_byte = ((char *)dst2)[src_len - *dst1_len - 1]; + else + last_byte = pad[src_len - *dst1_len - *dst2_len - 1]; + if (last_byte <= 16 && src_len >= last_byte) { + src_len -= last_byte; + } else { + pr_err("ceph_aes_decrypt got bad padding %d on src len %d\n", + last_byte, (int)src_len); + return -EPERM; /* bad padding */ + } + + if (src_len < *dst1_len) { + *dst1_len = src_len; + *dst2_len = 0; + } else { + *dst2_len = src_len - *dst1_len; + } + /* + print_hex_dump(KERN_ERR, "dec out1: ", DUMP_PREFIX_NONE, 16, 1, + dst1, *dst1_len, 1); + print_hex_dump(KERN_ERR, "dec out2: ", DUMP_PREFIX_NONE, 16, 1, + dst2, *dst2_len, 1); + */ + + return 0; +} + + +int ceph_decrypt(struct ceph_crypto_key *secret, void *dst, size_t *dst_len, + const void *src, size_t src_len) +{ + switch (secret->type) { + case CEPH_CRYPTO_NONE: + if (*dst_len < src_len) + return -ERANGE; + memcpy(dst, src, src_len); + *dst_len = src_len; + return 0; + + case CEPH_CRYPTO_AES: + return ceph_aes_decrypt(secret->key, secret->len, dst, + dst_len, src, src_len); + + default: + return -EINVAL; + } +} + +int ceph_decrypt2(struct ceph_crypto_key *secret, + void *dst1, size_t *dst1_len, + void *dst2, size_t *dst2_len, + const void *src, size_t src_len) +{ + size_t t; + + switch (secret->type) { + case CEPH_CRYPTO_NONE: + if (*dst1_len + *dst2_len < src_len) + return -ERANGE; + t = min(*dst1_len, src_len); + memcpy(dst1, src, t); + *dst1_len = t; + src += t; + src_len -= t; + if (src_len) { + t = min(*dst2_len, src_len); + memcpy(dst2, src, t); + *dst2_len = t; + } + return 0; + + case CEPH_CRYPTO_AES: + return ceph_aes_decrypt2(secret->key, secret->len, + dst1, dst1_len, dst2, dst2_len, + src, src_len); + + default: + return -EINVAL; + } +} + +int ceph_encrypt(struct ceph_crypto_key *secret, void *dst, size_t *dst_len, + const void *src, size_t src_len) +{ + switch (secret->type) { + case CEPH_CRYPTO_NONE: + if (*dst_len < src_len) + return -ERANGE; + memcpy(dst, src, src_len); + *dst_len = src_len; + return 0; + + case CEPH_CRYPTO_AES: + return ceph_aes_encrypt(secret->key, secret->len, dst, + dst_len, src, src_len); + + default: + return -EINVAL; + } +} + +int ceph_encrypt2(struct ceph_crypto_key *secret, void *dst, size_t *dst_len, + const void *src1, size_t src1_len, + const void *src2, size_t src2_len) +{ + switch (secret->type) { + case CEPH_CRYPTO_NONE: + if (*dst_len < src1_len + src2_len) + return -ERANGE; + memcpy(dst, src1, src1_len); + memcpy(dst + src1_len, src2, src2_len); + *dst_len = src1_len + src2_len; + return 0; + + case CEPH_CRYPTO_AES: + return ceph_aes_encrypt2(secret->key, secret->len, dst, dst_len, + src1, src1_len, src2, src2_len); + + default: + return -EINVAL; + } +} diff --git a/fs/ceph/crypto.h b/fs/ceph/crypto.h new file mode 100644 index 000000000000..40b502e6bd89 --- /dev/null +++ b/fs/ceph/crypto.h @@ -0,0 +1,48 @@ +#ifndef _FS_CEPH_CRYPTO_H +#define _FS_CEPH_CRYPTO_H + +#include "types.h" +#include "buffer.h" + +/* + * cryptographic secret + */ +struct ceph_crypto_key { + int type; + struct ceph_timespec created; + int len; + void *key; +}; + +static inline void ceph_crypto_key_destroy(struct ceph_crypto_key *key) +{ + kfree(key->key); +} + +extern int ceph_crypto_key_encode(struct ceph_crypto_key *key, + void **p, void *end); +extern int ceph_crypto_key_decode(struct ceph_crypto_key *key, + void **p, void *end); +extern int ceph_crypto_key_unarmor(struct ceph_crypto_key *key, const char *in); + +/* crypto.c */ +extern int ceph_decrypt(struct ceph_crypto_key *secret, + void *dst, size_t *dst_len, + const void *src, size_t src_len); +extern int ceph_encrypt(struct ceph_crypto_key *secret, + void *dst, size_t *dst_len, + const void *src, size_t src_len); +extern int ceph_decrypt2(struct ceph_crypto_key *secret, + void *dst1, size_t *dst1_len, + void *dst2, size_t *dst2_len, + const void *src, size_t src_len); +extern int ceph_encrypt2(struct ceph_crypto_key *secret, + void *dst, size_t *dst_len, + const void *src1, size_t src1_len, + const void *src2, size_t src2_len); + +/* armor.c */ +extern int ceph_armor(char *dst, const void *src, const void *end); +extern int ceph_unarmor(void *dst, const char *src, const char *end); + +#endif -- cgit v1.2.3 From ec0994e48ea2aebf62ff08376227f3a9ccf46262 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 2 Feb 2010 16:25:35 -0800 Subject: ceph: add support for auth_x authentication protocol The auth_x protocol implements support for a kerberos-like mutual authentication infrastructure used by Ceph. We do not simply use vanilla kerberos because of scalability and performance issues when dealing with a large cluster of nodes providing a single logical service. Auth_x provides mutual authentication of client and server and protects against replay and man in the middle attacks. It does not encrypt the full session over the wire, however, so data payload may still be snooped. Signed-off-by: Yehuda Sadeh Signed-off-by: Sage Weil --- fs/ceph/Makefile | 1 + fs/ceph/auth.c | 6 +- fs/ceph/auth_x.c | 656 ++++++++++++++++++++++++++++++++++++++++++++++ fs/ceph/auth_x.h | 49 ++++ fs/ceph/auth_x_protocol.h | 90 +++++++ 5 files changed, 801 insertions(+), 1 deletion(-) create mode 100644 fs/ceph/auth_x.c create mode 100644 fs/ceph/auth_x.h create mode 100644 fs/ceph/auth_x_protocol.h (limited to 'fs/ceph/Makefile') diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile index 85a588e43e7d..6a660e610be8 100644 --- a/fs/ceph/Makefile +++ b/fs/ceph/Makefile @@ -15,6 +15,7 @@ ceph-objs := super.o inode.o dir.o file.o addr.o ioctl.o \ debugfs.o \ auth.o auth_none.o \ crypto.o armor.o \ + auth_x.o \ ceph_fs.o ceph_strings.o ceph_hash.o ceph_frag.o else diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c index b34ce0e41b4c..abb204fea6c7 100644 --- a/fs/ceph/auth.c +++ b/fs/ceph/auth.c @@ -5,6 +5,7 @@ #include "types.h" #include "auth_none.h" +#include "auth_x.h" #include "decode.h" #include "super.h" @@ -14,7 +15,8 @@ * get protocol handler */ static u32 supported_protocols[] = { - CEPH_AUTH_NONE + CEPH_AUTH_NONE, + CEPH_AUTH_CEPHX }; int ceph_auth_init_protocol(struct ceph_auth_client *ac, int protocol) @@ -22,6 +24,8 @@ int ceph_auth_init_protocol(struct ceph_auth_client *ac, int protocol) switch (protocol) { case CEPH_AUTH_NONE: return ceph_auth_none_init(ac); + case CEPH_AUTH_CEPHX: + return ceph_x_init(ac); default: return -ENOENT; } diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c new file mode 100644 index 000000000000..f0318427b6da --- /dev/null +++ b/fs/ceph/auth_x.c @@ -0,0 +1,656 @@ + +#include "ceph_debug.h" + +#include +#include +#include + +#include "auth_x.h" +#include "auth_x_protocol.h" +#include "crypto.h" +#include "auth.h" +#include "decode.h" + +struct kmem_cache *ceph_x_ticketbuf_cachep; + +#define TEMP_TICKET_BUF_LEN 256 + +static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed); + +static int ceph_x_is_authenticated(struct ceph_auth_client *ac) +{ + struct ceph_x_info *xi = ac->private; + int need; + + ceph_x_validate_tickets(ac, &need); + dout("ceph_x_is_authenticated want=%d need=%d have=%d\n", + ac->want_keys, need, xi->have_keys); + return (ac->want_keys & xi->have_keys) == ac->want_keys; +} + +static int ceph_x_encrypt(struct ceph_crypto_key *secret, + void *ibuf, int ilen, void *obuf, size_t olen) +{ + struct ceph_x_encrypt_header head = { + .struct_v = 1, + .magic = cpu_to_le64(CEPHX_ENC_MAGIC) + }; + size_t len = olen - sizeof(u32); + int ret; + + ret = ceph_encrypt2(secret, obuf + sizeof(u32), &len, + &head, sizeof(head), ibuf, ilen); + if (ret) + return ret; + ceph_encode_32(&obuf, len); + return len + sizeof(u32); +} + +static int ceph_x_decrypt(struct ceph_crypto_key *secret, + void **p, void *end, void *obuf, size_t olen) +{ + struct ceph_x_encrypt_header head; + size_t head_len = sizeof(head); + int len, ret; + + len = ceph_decode_32(p); + if (*p + len > end) + return -EINVAL; + + dout("ceph_x_decrypt len %d\n", len); + ret = ceph_decrypt2(secret, &head, &head_len, obuf, &olen, + *p, len); + if (ret) + return ret; + if (head.struct_v != 1 || le64_to_cpu(head.magic) != CEPHX_ENC_MAGIC) + return -EPERM; + *p += len; + return olen; +} + +/* + * get existing (or insert new) ticket handler + */ +struct ceph_x_ticket_handler *get_ticket_handler(struct ceph_auth_client *ac, + int service) +{ + struct ceph_x_ticket_handler *th; + struct ceph_x_info *xi = ac->private; + struct rb_node *parent = NULL, **p = &xi->ticket_handlers.rb_node; + + while (*p) { + parent = *p; + th = rb_entry(parent, struct ceph_x_ticket_handler, node); + if (service < th->service) + p = &(*p)->rb_left; + else if (service > th->service) + p = &(*p)->rb_right; + else + return th; + } + + /* add it */ + th = kzalloc(sizeof(*th), GFP_NOFS); + if (!th) + return ERR_PTR(-ENOMEM); + th->service = service; + rb_link_node(&th->node, parent, p); + rb_insert_color(&th->node, &xi->ticket_handlers); + return th; +} + +static void remove_ticket_handler(struct ceph_auth_client *ac, + struct ceph_x_ticket_handler *th) +{ + struct ceph_x_info *xi = ac->private; + + dout("remove_ticket_handler %p %d\n", th, th->service); + rb_erase(&th->node, &xi->ticket_handlers); + ceph_crypto_key_destroy(&th->session_key); + if (th->ticket_blob) + ceph_buffer_put(th->ticket_blob); + kfree(th); +} + +static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, + struct ceph_crypto_key *secret, + void *buf, void *end) +{ + struct ceph_x_info *xi = ac->private; + int num; + void *p = buf; + int ret; + char *dbuf; + char *ticket_buf; + u8 struct_v; + + dbuf = kmem_cache_alloc(ceph_x_ticketbuf_cachep, GFP_NOFS | GFP_ATOMIC); + if (!dbuf) + return -ENOMEM; + + ret = -ENOMEM; + ticket_buf = kmem_cache_alloc(ceph_x_ticketbuf_cachep, + GFP_NOFS | GFP_ATOMIC); + if (!ticket_buf) + goto out_dbuf; + + ceph_decode_need(&p, end, 1 + sizeof(u32), bad); + struct_v = ceph_decode_8(&p); + if (struct_v != 1) + goto bad; + num = ceph_decode_32(&p); + dout("%d tickets\n", num); + while (num--) { + int type; + u8 struct_v; + struct ceph_x_ticket_handler *th; + void *dp, *dend; + int dlen; + char is_enc; + struct timespec validity; + struct ceph_crypto_key old_key; + void *tp, *tpend; + + ceph_decode_need(&p, end, sizeof(u32) + 1, bad); + + type = ceph_decode_32(&p); + dout(" ticket type %d %s\n", type, ceph_entity_type_name(type)); + + struct_v = ceph_decode_8(&p); + if (struct_v != 1) + goto bad; + + th = get_ticket_handler(ac, type); + if (IS_ERR(th)) { + ret = PTR_ERR(th); + goto out; + } + + /* blob for me */ + dlen = ceph_x_decrypt(secret, &p, end, dbuf, + TEMP_TICKET_BUF_LEN); + if (dlen <= 0) { + ret = dlen; + goto out; + } + dout(" decrypted %d bytes\n", dlen); + dend = dbuf + dlen; + dp = dbuf; + + struct_v = ceph_decode_8(&dp); + if (struct_v != 1) + goto bad; + + memcpy(&old_key, &th->session_key, sizeof(old_key)); + ret = ceph_crypto_key_decode(&th->session_key, &dp, dend); + if (ret) + goto out; + + ceph_decode_copy(&dp, &th->validity, sizeof(th->validity)); + ceph_decode_timespec(&validity, &th->validity); + th->expires = get_seconds() + validity.tv_sec; + th->renew_after = th->expires - (validity.tv_sec / 4); + dout(" expires=%lu renew_after=%lu\n", th->expires, + th->renew_after); + + /* ticket blob for service */ + ceph_decode_8_safe(&p, end, is_enc, bad); + tp = ticket_buf; + if (is_enc) { + /* encrypted */ + dout(" encrypted ticket\n"); + dlen = ceph_x_decrypt(&old_key, &p, end, ticket_buf, + TEMP_TICKET_BUF_LEN); + if (dlen < 0) { + ret = dlen; + goto out; + } + dlen = ceph_decode_32(&tp); + } else { + /* unencrypted */ + ceph_decode_32_safe(&p, end, dlen, bad); + ceph_decode_need(&p, end, dlen, bad); + ceph_decode_copy(&p, ticket_buf, dlen); + } + tpend = tp + dlen; + dout(" ticket blob is %d bytes\n", dlen); + ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad); + struct_v = ceph_decode_8(&tp); + th->secret_id = ceph_decode_64(&tp); + ret = ceph_decode_buffer(&th->ticket_blob, &tp, tpend); + if (ret) + goto out; + dout(" got ticket service %d (%s) secret_id %lld len %d\n", + type, ceph_entity_type_name(type), th->secret_id, + (int)th->ticket_blob->vec.iov_len); + xi->have_keys |= th->service; + } + + ret = 0; +out: + kmem_cache_free(ceph_x_ticketbuf_cachep, ticket_buf); +out_dbuf: + kmem_cache_free(ceph_x_ticketbuf_cachep, dbuf); + return ret; + +bad: + ret = -EINVAL; + goto out; +} + +static int ceph_x_build_authorizer(struct ceph_auth_client *ac, + struct ceph_x_ticket_handler *th, + struct ceph_x_authorizer *au) +{ + int len; + struct ceph_x_authorize_a *msg_a; + struct ceph_x_authorize_b msg_b; + void *p, *end; + int ret; + int ticket_blob_len = + (th->ticket_blob ? th->ticket_blob->vec.iov_len : 0); + + dout("build_authorizer for %s %p\n", + ceph_entity_type_name(th->service), au); + + len = sizeof(*msg_a) + sizeof(msg_b) + sizeof(u32) + + ticket_blob_len + 16; + dout(" need len %d\n", len); + if (au->buf && au->buf->alloc_len < len) { + ceph_buffer_put(au->buf); + au->buf = NULL; + } + if (!au->buf) { + au->buf = ceph_buffer_new(len, GFP_NOFS); + if (!au->buf) + return -ENOMEM; + } + au->service = th->service; + + msg_a = au->buf->vec.iov_base; + msg_a->struct_v = 1; + msg_a->global_id = cpu_to_le64(ac->global_id); + msg_a->service_id = cpu_to_le32(th->service); + msg_a->ticket_blob.struct_v = 1; + msg_a->ticket_blob.secret_id = cpu_to_le64(th->secret_id); + msg_a->ticket_blob.blob_len = cpu_to_le32(ticket_blob_len); + if (ticket_blob_len) { + memcpy(msg_a->ticket_blob.blob, th->ticket_blob->vec.iov_base, + th->ticket_blob->vec.iov_len); + } + dout(" th %p secret_id %lld %lld\n", th, th->secret_id, + le64_to_cpu(msg_a->ticket_blob.secret_id)); + + p = msg_a + 1; + p += ticket_blob_len; + end = au->buf->vec.iov_base + au->buf->vec.iov_len; + + get_random_bytes(&au->nonce, sizeof(au->nonce)); + msg_b.struct_v = 1; + msg_b.nonce = cpu_to_le64(au->nonce); + ret = ceph_x_encrypt(&th->session_key, &msg_b, sizeof(msg_b), + p, end - p); + if (ret < 0) + goto out_buf; + p += ret; + au->buf->vec.iov_len = p - au->buf->vec.iov_base; + dout(" built authorizer nonce %llx len %d\n", au->nonce, + (int)au->buf->vec.iov_len); + return 0; + +out_buf: + ceph_buffer_put(au->buf); + au->buf = NULL; + return ret; +} + +static int ceph_x_encode_ticket(struct ceph_x_ticket_handler *th, + void **p, void *end) +{ + ceph_decode_need(p, end, 1 + sizeof(u64), bad); + ceph_encode_8(p, 1); + ceph_encode_64(p, th->secret_id); + if (th->ticket_blob) { + const char *buf = th->ticket_blob->vec.iov_base; + u32 len = th->ticket_blob->vec.iov_len; + + ceph_encode_32_safe(p, end, len, bad); + ceph_encode_copy_safe(p, end, buf, len, bad); + } else { + ceph_encode_32_safe(p, end, 0, bad); + } + + return 0; +bad: + return -ERANGE; +} + +static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed) +{ + int want = ac->want_keys; + struct ceph_x_info *xi = ac->private; + int service; + + *pneed = ac->want_keys & ~(xi->have_keys); + + for (service = 1; service <= want; service <<= 1) { + struct ceph_x_ticket_handler *th; + + if (!(ac->want_keys & service)) + continue; + + if (*pneed & service) + continue; + + th = get_ticket_handler(ac, service); + + if (!th) { + *pneed |= service; + continue; + } + + if (get_seconds() >= th->renew_after) + *pneed |= service; + if (get_seconds() >= th->expires) + xi->have_keys &= ~service; + } +} + + +static int ceph_x_build_request(struct ceph_auth_client *ac, + void *buf, void *end) +{ + struct ceph_x_info *xi = ac->private; + int need; + struct ceph_x_request_header *head = buf; + int ret; + struct ceph_x_ticket_handler *th = + get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH); + + ceph_x_validate_tickets(ac, &need); + + dout("build_request want %x have %x need %x\n", + ac->want_keys, xi->have_keys, need); + + if (need & CEPH_ENTITY_TYPE_AUTH) { + struct ceph_x_authenticate *auth = (void *)(head + 1); + void *p = auth + 1; + struct ceph_x_challenge_blob tmp; + char tmp_enc[40]; + u64 *u; + + if (p > end) + return -ERANGE; + + dout(" get_auth_session_key\n"); + head->op = cpu_to_le16(CEPHX_GET_AUTH_SESSION_KEY); + + /* encrypt and hash */ + get_random_bytes(&auth->client_challenge, sizeof(u64)); + tmp.client_challenge = auth->client_challenge; + tmp.server_challenge = cpu_to_le64(xi->server_challenge); + ret = ceph_x_encrypt(&xi->secret, &tmp, sizeof(tmp), + tmp_enc, sizeof(tmp_enc)); + if (ret < 0) + return ret; + + auth->struct_v = 1; + auth->key = 0; + for (u = (u64 *)tmp_enc; u + 1 <= (u64 *)(tmp_enc + ret); u++) + auth->key ^= *u; + dout(" server_challenge %llx client_challenge %llx key %llx\n", + xi->server_challenge, le64_to_cpu(auth->client_challenge), + le64_to_cpu(auth->key)); + + /* now encode the old ticket if exists */ + ret = ceph_x_encode_ticket(th, &p, end); + if (ret < 0) + return ret; + + return p - buf; + } + + if (need) { + void *p = head + 1; + struct ceph_x_service_ticket_request *req; + + if (p > end) + return -ERANGE; + head->op = cpu_to_le16(CEPHX_GET_PRINCIPAL_SESSION_KEY); + + BUG_ON(!th); + ret = ceph_x_build_authorizer(ac, th, &xi->auth_authorizer); + if (ret) + return ret; + ceph_encode_copy(&p, xi->auth_authorizer.buf->vec.iov_base, + xi->auth_authorizer.buf->vec.iov_len); + + req = p; + req->keys = cpu_to_le32(need); + p += sizeof(*req); + return p - buf; + } + + return 0; +} + +static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result, + void *buf, void *end) +{ + struct ceph_x_info *xi = ac->private; + struct ceph_x_reply_header *head = buf; + struct ceph_x_ticket_handler *th; + int len = end - buf; + int op; + int ret; + + if (result) + return result; /* XXX hmm? */ + + if (xi->starting) { + /* it's a hello */ + struct ceph_x_server_challenge *sc = buf; + + if (len != sizeof(*sc)) + return -EINVAL; + xi->server_challenge = le64_to_cpu(sc->server_challenge); + dout("handle_reply got server challenge %llx\n", + xi->server_challenge); + xi->starting = false; + xi->have_keys &= ~CEPH_ENTITY_TYPE_AUTH; + return -EAGAIN; + } + + op = le32_to_cpu(head->op); + result = le32_to_cpu(head->result); + dout("handle_reply op %d result %d\n", op, result); + switch (op) { + case CEPHX_GET_AUTH_SESSION_KEY: + /* verify auth key */ + ret = ceph_x_proc_ticket_reply(ac, &xi->secret, + buf + sizeof(*head), end); + break; + + case CEPHX_GET_PRINCIPAL_SESSION_KEY: + th = get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH); + BUG_ON(!th); + ret = ceph_x_proc_ticket_reply(ac, &th->session_key, + buf + sizeof(*head), end); + break; + + default: + return -EINVAL; + } + if (ret) + return ret; + if (ac->want_keys == xi->have_keys) + return 0; + return -EAGAIN; +} + +static int ceph_x_create_authorizer( + struct ceph_auth_client *ac, int peer_type, + struct ceph_authorizer **a, + void **buf, size_t *len, + void **reply_buf, size_t *reply_len) +{ + struct ceph_x_authorizer *au; + struct ceph_x_ticket_handler *th; + int ret; + + th = get_ticket_handler(ac, peer_type); + if (IS_ERR(th)) + return PTR_ERR(th); + + au = kzalloc(sizeof(*au), GFP_NOFS); + if (!au) + return -ENOMEM; + + ret = ceph_x_build_authorizer(ac, th, au); + if (ret) { + kfree(au); + return ret; + } + + *a = (struct ceph_authorizer *)au; + *buf = au->buf->vec.iov_base; + *len = au->buf->vec.iov_len; + *reply_buf = au->reply_buf; + *reply_len = sizeof(au->reply_buf); + return 0; +} + +static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac, + struct ceph_authorizer *a, size_t len) +{ + struct ceph_x_authorizer *au = (void *)a; + struct ceph_x_ticket_handler *th; + int ret = 0; + struct ceph_x_authorize_reply reply; + void *p = au->reply_buf; + void *end = p + sizeof(au->reply_buf); + + th = get_ticket_handler(ac, au->service); + if (!th) + return -EIO; /* hrm! */ + ret = ceph_x_decrypt(&th->session_key, &p, end, &reply, sizeof(reply)); + if (ret < 0) + return ret; + if (ret != sizeof(reply)) + return -EPERM; + + if (au->nonce + 1 != le64_to_cpu(reply.nonce_plus_one)) + ret = -EPERM; + else + ret = 0; + dout("verify_authorizer_reply nonce %llx got %llx ret %d\n", + au->nonce, le64_to_cpu(reply.nonce_plus_one), ret); + return ret; +} + +static void ceph_x_destroy_authorizer(struct ceph_auth_client *ac, + struct ceph_authorizer *a) +{ + struct ceph_x_authorizer *au = (void *)a; + + ceph_buffer_put(au->buf); + kfree(au); +} + + +static void ceph_x_reset(struct ceph_auth_client *ac) +{ + struct ceph_x_info *xi = ac->private; + + dout("reset\n"); + xi->starting = true; + xi->server_challenge = 0; +} + +static void ceph_x_destroy(struct ceph_auth_client *ac) +{ + struct ceph_x_info *xi = ac->private; + struct rb_node *p; + + dout("ceph_x_destroy %p\n", ac); + ceph_crypto_key_destroy(&xi->secret); + + while ((p = rb_first(&xi->ticket_handlers)) != NULL) { + struct ceph_x_ticket_handler *th = + rb_entry(p, struct ceph_x_ticket_handler, node); + remove_ticket_handler(ac, th); + } + + kmem_cache_destroy(ceph_x_ticketbuf_cachep); + + kfree(ac->private); + ac->private = NULL; +} + +static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac, + int peer_type) +{ + struct ceph_x_ticket_handler *th; + + th = get_ticket_handler(ac, peer_type); + if (th && !IS_ERR(th)) + remove_ticket_handler(ac, th); +} + + +static const struct ceph_auth_client_ops ceph_x_ops = { + .is_authenticated = ceph_x_is_authenticated, + .build_request = ceph_x_build_request, + .handle_reply = ceph_x_handle_reply, + .create_authorizer = ceph_x_create_authorizer, + .verify_authorizer_reply = ceph_x_verify_authorizer_reply, + .destroy_authorizer = ceph_x_destroy_authorizer, + .invalidate_authorizer = ceph_x_invalidate_authorizer, + .reset = ceph_x_reset, + .destroy = ceph_x_destroy, +}; + + +int ceph_x_init(struct ceph_auth_client *ac) +{ + struct ceph_x_info *xi; + int ret; + + dout("ceph_x_init %p\n", ac); + xi = kzalloc(sizeof(*xi), GFP_NOFS); + if (!xi) + return -ENOMEM; + + ret = -ENOMEM; + ceph_x_ticketbuf_cachep = kmem_cache_create("ceph_x_ticketbuf", + TEMP_TICKET_BUF_LEN, 8, + (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD), + NULL); + if (!ceph_x_ticketbuf_cachep) + goto done_nomem; + ret = -EINVAL; + if (!ac->secret) { + pr_err("no secret set (for auth_x protocol)\n"); + goto done_nomem; + } + + ret = ceph_crypto_key_unarmor(&xi->secret, ac->secret); + if (ret) + goto done_nomem; + + xi->starting = true; + xi->ticket_handlers = RB_ROOT; + + ac->protocol = CEPH_AUTH_CEPHX; + ac->private = xi; + ac->ops = &ceph_x_ops; + return 0; + +done_nomem: + kfree(xi); + if (ceph_x_ticketbuf_cachep) + kmem_cache_destroy(ceph_x_ticketbuf_cachep); + return ret; +} + + diff --git a/fs/ceph/auth_x.h b/fs/ceph/auth_x.h new file mode 100644 index 000000000000..ff6f8180e681 --- /dev/null +++ b/fs/ceph/auth_x.h @@ -0,0 +1,49 @@ +#ifndef _FS_CEPH_AUTH_X_H +#define _FS_CEPH_AUTH_X_H + +#include + +#include "crypto.h" +#include "auth.h" +#include "auth_x_protocol.h" + +/* + * Handle ticket for a single service. + */ +struct ceph_x_ticket_handler { + struct rb_node node; + unsigned service; + + struct ceph_crypto_key session_key; + struct ceph_timespec validity; + + u64 secret_id; + struct ceph_buffer *ticket_blob; + + unsigned long renew_after, expires; +}; + + +struct ceph_x_authorizer { + struct ceph_buffer *buf; + unsigned service; + u64 nonce; + char reply_buf[128]; /* big enough for encrypted blob */ +}; + +struct ceph_x_info { + struct ceph_crypto_key secret; + + bool starting; + u64 server_challenge; + + unsigned have_keys; + struct rb_root ticket_handlers; + + struct ceph_x_authorizer auth_authorizer; +}; + +extern int ceph_x_init(struct ceph_auth_client *ac); + +#endif + diff --git a/fs/ceph/auth_x_protocol.h b/fs/ceph/auth_x_protocol.h new file mode 100644 index 000000000000..671d30576c4f --- /dev/null +++ b/fs/ceph/auth_x_protocol.h @@ -0,0 +1,90 @@ +#ifndef __FS_CEPH_AUTH_X_PROTOCOL +#define __FS_CEPH_AUTH_X_PROTOCOL + +#define CEPHX_GET_AUTH_SESSION_KEY 0x0100 +#define CEPHX_GET_PRINCIPAL_SESSION_KEY 0x0200 +#define CEPHX_GET_ROTATING_KEY 0x0400 + +/* common bits */ +struct ceph_x_ticket_blob { + __u8 struct_v; + __le64 secret_id; + __le32 blob_len; + char blob[]; +} __attribute__ ((packed)); + + +/* common request/reply headers */ +struct ceph_x_request_header { + __le16 op; +} __attribute__ ((packed)); + +struct ceph_x_reply_header { + __le16 op; + __le32 result; +} __attribute__ ((packed)); + + +/* authenticate handshake */ + +/* initial hello (no reply header) */ +struct ceph_x_server_challenge { + __u8 struct_v; + __le64 server_challenge; +} __attribute__ ((packed)); + +struct ceph_x_authenticate { + __u8 struct_v; + __le64 client_challenge; + __le64 key; + /* ticket blob */ +} __attribute__ ((packed)); + +struct ceph_x_service_ticket_request { + __u8 struct_v; + __le32 keys; +} __attribute__ ((packed)); + +struct ceph_x_challenge_blob { + __le64 server_challenge; + __le64 client_challenge; +} __attribute__ ((packed)); + + + +/* authorize handshake */ + +/* + * The authorizer consists of two pieces: + * a - service id, ticket blob + * b - encrypted with session key + */ +struct ceph_x_authorize_a { + __u8 struct_v; + __le64 global_id; + __le32 service_id; + struct ceph_x_ticket_blob ticket_blob; +} __attribute__ ((packed)); + +struct ceph_x_authorize_b { + __u8 struct_v; + __le64 nonce; +} __attribute__ ((packed)); + +struct ceph_x_authorize_reply { + __u8 struct_v; + __le64 nonce_plus_one; +} __attribute__ ((packed)); + + +/* + * encyption bundle + */ +#define CEPHX_ENC_MAGIC 0xff009cad8826aa55ull + +struct ceph_x_encrypt_header { + __u8 struct_v; + __le64 magic; +} __attribute__ ((packed)); + +#endif -- cgit v1.2.3