diff options
author | Yehuda Sadeh <yehuda@hq.newdream.net> | 2010-04-07 00:14:15 +0200 |
---|---|---|
committer | Sage Weil <sage@newdream.net> | 2010-10-21 00:37:28 +0200 |
commit | 3d14c5d2b6e15c21d8e5467dc62d33127c23a644 (patch) | |
tree | 7d123c47847df9d1e865b6b78dc7da3fe739b704 /net | |
parent | ceph-rbd: osdc support for osd call and rollback operations (diff) | |
download | linux-3d14c5d2b6e15c21d8e5467dc62d33127c23a644.tar.xz linux-3d14c5d2b6e15c21d8e5467dc62d33127c23a644.zip |
ceph: factor out libceph from Ceph file system
This factors out protocol and low-level storage parts of ceph into a
separate libceph module living in net/ceph and include/linux/ceph. This
is mostly a matter of moving files around. However, a few key pieces
of the interface change as well:
- ceph_client becomes ceph_fs_client and ceph_client, where the latter
captures the mon and osd clients, and the fs_client gets the mds client
and file system specific pieces.
- Mount option parsing and debugfs setup is correspondingly broken into
two pieces.
- The mon client gets a generic handler callback for otherwise unknown
messages (mds map, in this case).
- The basic supported/required feature bits can be expanded (and are by
ceph_fs_client).
No functional change, aside from some subtle error handling cases that got
cleaned up in the refactoring process.
Signed-off-by: Sage Weil <sage@newdream.net>
Diffstat (limited to '')
-rw-r--r-- | net/Kconfig | 1 | ||||
-rw-r--r-- | net/Makefile | 1 | ||||
-rw-r--r-- | net/ceph/Kconfig | 28 | ||||
-rw-r--r-- | net/ceph/Makefile | 37 | ||||
-rw-r--r-- | net/ceph/armor.c (renamed from fs/ceph/armor.c) | 0 | ||||
-rw-r--r-- | net/ceph/auth.c (renamed from fs/ceph/auth.c) | 10 | ||||
-rw-r--r-- | net/ceph/auth_none.c (renamed from fs/ceph/auth_none.c) | 7 | ||||
-rw-r--r-- | net/ceph/auth_none.h (renamed from fs/ceph/auth_none.h) | 3 | ||||
-rw-r--r-- | net/ceph/auth_x.c (renamed from fs/ceph/auth_x.c) | 9 | ||||
-rw-r--r-- | net/ceph/auth_x.h (renamed from fs/ceph/auth_x.h) | 3 | ||||
-rw-r--r-- | net/ceph/auth_x_protocol.h (renamed from fs/ceph/auth_x_protocol.h) | 0 | ||||
-rw-r--r-- | net/ceph/buffer.c (renamed from fs/ceph/buffer.c) | 9 | ||||
-rw-r--r-- | net/ceph/ceph_common.c | 529 | ||||
-rw-r--r-- | net/ceph/ceph_fs.c (renamed from fs/ceph/ceph_fs.c) | 5 | ||||
-rw-r--r-- | net/ceph/ceph_hash.c (renamed from fs/ceph/ceph_hash.c) | 2 | ||||
-rw-r--r-- | net/ceph/ceph_strings.c | 84 | ||||
-rw-r--r-- | net/ceph/crush/crush.c (renamed from fs/ceph/crush/crush.c) | 2 | ||||
-rw-r--r-- | net/ceph/crush/hash.c (renamed from fs/ceph/crush/hash.c) | 2 | ||||
-rw-r--r-- | net/ceph/crush/mapper.c (renamed from fs/ceph/crush/mapper.c) | 4 | ||||
-rw-r--r-- | net/ceph/crypto.c (renamed from fs/ceph/crypto.c) | 4 | ||||
-rw-r--r-- | net/ceph/crypto.h (renamed from fs/ceph/crypto.h) | 4 | ||||
-rw-r--r-- | net/ceph/debugfs.c | 268 | ||||
-rw-r--r-- | net/ceph/messenger.c (renamed from fs/ceph/messenger.c) | 77 | ||||
-rw-r--r-- | net/ceph/mon_client.c (renamed from fs/ceph/mon_client.c) | 73 | ||||
-rw-r--r-- | net/ceph/msgpool.c (renamed from fs/ceph/msgpool.c) | 4 | ||||
-rw-r--r-- | net/ceph/osd_client.c (renamed from fs/ceph/osd_client.c) | 54 | ||||
-rw-r--r-- | net/ceph/osdmap.c (renamed from fs/ceph/osdmap.c) | 17 | ||||
-rw-r--r-- | net/ceph/pagelist.c (renamed from fs/ceph/pagelist.c) | 6 | ||||
-rw-r--r-- | net/ceph/pagevec.c | 223 |
29 files changed, 1342 insertions, 124 deletions
diff --git a/net/Kconfig b/net/Kconfig index e926884c1675..55fd82e9ffd9 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -293,6 +293,7 @@ source "net/wimax/Kconfig" source "net/rfkill/Kconfig" source "net/9p/Kconfig" source "net/caif/Kconfig" +source "net/ceph/Kconfig" endif # if NET diff --git a/net/Makefile b/net/Makefile index ea60fbce9b1b..6b7bfd7f1416 100644 --- a/net/Makefile +++ b/net/Makefile @@ -68,3 +68,4 @@ obj-$(CONFIG_SYSCTL) += sysctl_net.o endif obj-$(CONFIG_WIMAX) += wimax/ obj-$(CONFIG_DNS_RESOLVER) += dns_resolver/ +obj-$(CONFIG_CEPH_LIB) += ceph/ diff --git a/net/ceph/Kconfig b/net/ceph/Kconfig new file mode 100644 index 000000000000..ad424049b0cf --- /dev/null +++ b/net/ceph/Kconfig @@ -0,0 +1,28 @@ +config CEPH_LIB + tristate "Ceph core library (EXPERIMENTAL)" + depends on INET && EXPERIMENTAL + select LIBCRC32C + select CRYPTO_AES + select CRYPTO + default n + help + Choose Y or M here to include cephlib, which provides the + common functionality to both the Ceph filesystem and + to the rados block device (rbd). + + More information at http://ceph.newdream.net/. + + If unsure, say N. + +config CEPH_LIB_PRETTYDEBUG + bool "Include file:line in ceph debug output" + depends on CEPH_LIB + default n + help + If you say Y here, debug output will include a filename and + line to aid debugging. This increases kernel size and slows + execution slightly when debug call sites are enabled (e.g., + via CONFIG_DYNAMIC_DEBUG). + + If unsure, say N. + diff --git a/net/ceph/Makefile b/net/ceph/Makefile new file mode 100644 index 000000000000..aab1cabb8035 --- /dev/null +++ b/net/ceph/Makefile @@ -0,0 +1,37 @@ +# +# Makefile for CEPH filesystem. +# + +ifneq ($(KERNELRELEASE),) + +obj-$(CONFIG_CEPH_LIB) += libceph.o + +libceph-objs := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \ + mon_client.o \ + osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \ + debugfs.o \ + auth.o auth_none.o \ + crypto.o armor.o \ + auth_x.o \ + ceph_fs.o ceph_strings.o ceph_hash.o \ + pagevec.o + +else +#Otherwise we were called directly from the command +# line; invoke the kernel build system. + +KERNELDIR ?= /lib/modules/$(shell uname -r)/build +PWD := $(shell pwd) + +default: all + +all: + $(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_LIB=m modules + +modules_install: + $(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_LIB=m modules_install + +clean: + $(MAKE) -C $(KERNELDIR) M=$(PWD) clean + +endif diff --git a/fs/ceph/armor.c b/net/ceph/armor.c index eb2a666b0be7..eb2a666b0be7 100644 --- a/fs/ceph/armor.c +++ b/net/ceph/armor.c diff --git a/fs/ceph/auth.c b/net/ceph/auth.c index 6d2e30600627..549c1f43e1d5 100644 --- a/fs/ceph/auth.c +++ b/net/ceph/auth.c @@ -1,16 +1,16 @@ -#include "ceph_debug.h" +#include <linux/ceph/ceph_debug.h> #include <linux/module.h> #include <linux/err.h> #include <linux/slab.h> -#include "types.h" +#include <linux/ceph/types.h> +#include <linux/ceph/decode.h> +#include <linux/ceph/libceph.h> +#include <linux/ceph/messenger.h> #include "auth_none.h" #include "auth_x.h" -#include "decode.h" -#include "super.h" -#include "messenger.h" /* * get protocol handler diff --git a/fs/ceph/auth_none.c b/net/ceph/auth_none.c index ad1dc21286c7..214c2bb43d62 100644 --- a/fs/ceph/auth_none.c +++ b/net/ceph/auth_none.c @@ -1,14 +1,15 @@ -#include "ceph_debug.h" +#include <linux/ceph/ceph_debug.h> #include <linux/err.h> #include <linux/module.h> #include <linux/random.h> #include <linux/slab.h> +#include <linux/ceph/decode.h> +#include <linux/ceph/auth.h> + #include "auth_none.h" -#include "auth.h" -#include "decode.h" static void reset(struct ceph_auth_client *ac) { diff --git a/fs/ceph/auth_none.h b/net/ceph/auth_none.h index 8164df1a08be..ed7d088b1bc9 100644 --- a/fs/ceph/auth_none.h +++ b/net/ceph/auth_none.h @@ -2,8 +2,7 @@ #define _FS_CEPH_AUTH_NONE_H #include <linux/slab.h> - -#include "auth.h" +#include <linux/ceph/auth.h> /* * null security mode. diff --git a/fs/ceph/auth_x.c b/net/ceph/auth_x.c index a2d002cbdec2..7fd5dfcf6e18 100644 --- a/fs/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -1,16 +1,17 @@ -#include "ceph_debug.h" +#include <linux/ceph/ceph_debug.h> #include <linux/err.h> #include <linux/module.h> #include <linux/random.h> #include <linux/slab.h> +#include <linux/ceph/decode.h> +#include <linux/ceph/auth.h> + +#include "crypto.h" #include "auth_x.h" #include "auth_x_protocol.h" -#include "crypto.h" -#include "auth.h" -#include "decode.h" #define TEMP_TICKET_BUF_LEN 256 diff --git a/fs/ceph/auth_x.h b/net/ceph/auth_x.h index ff6f8180e681..e02da7a5c5a1 100644 --- a/fs/ceph/auth_x.h +++ b/net/ceph/auth_x.h @@ -3,8 +3,9 @@ #include <linux/rbtree.h> +#include <linux/ceph/auth.h> + #include "crypto.h" -#include "auth.h" #include "auth_x_protocol.h" /* diff --git a/fs/ceph/auth_x_protocol.h b/net/ceph/auth_x_protocol.h index 671d30576c4f..671d30576c4f 100644 --- a/fs/ceph/auth_x_protocol.h +++ b/net/ceph/auth_x_protocol.h diff --git a/fs/ceph/buffer.c b/net/ceph/buffer.c index cd39f17021de..53d8abfa25d5 100644 --- a/fs/ceph/buffer.c +++ b/net/ceph/buffer.c @@ -1,10 +1,11 @@ -#include "ceph_debug.h" +#include <linux/ceph/ceph_debug.h> +#include <linux/module.h> #include <linux/slab.h> -#include "buffer.h" -#include "decode.h" +#include <linux/ceph/buffer.h> +#include <linux/ceph/decode.h> struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp) { @@ -32,6 +33,7 @@ struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp) dout("buffer_new %p\n", b); return b; } +EXPORT_SYMBOL(ceph_buffer_new); void ceph_buffer_release(struct kref *kref) { @@ -46,6 +48,7 @@ void ceph_buffer_release(struct kref *kref) } kfree(b); } +EXPORT_SYMBOL(ceph_buffer_release); int ceph_decode_buffer(struct ceph_buffer **b, void **p, void *end) { diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c new file mode 100644 index 000000000000..f6f2eebc0767 --- /dev/null +++ b/net/ceph/ceph_common.c @@ -0,0 +1,529 @@ + +#include <linux/ceph/ceph_debug.h> +#include <linux/backing-dev.h> +#include <linux/ctype.h> +#include <linux/fs.h> +#include <linux/inet.h> +#include <linux/in6.h> +#include <linux/module.h> +#include <linux/mount.h> +#include <linux/parser.h> +#include <linux/sched.h> +#include <linux/seq_file.h> +#include <linux/slab.h> +#include <linux/statfs.h> +#include <linux/string.h> + + +#include <linux/ceph/libceph.h> +#include <linux/ceph/debugfs.h> +#include <linux/ceph/decode.h> +#include <linux/ceph/mon_client.h> +#include <linux/ceph/auth.h> + + + +/* + * find filename portion of a path (/foo/bar/baz -> baz) + */ +const char *ceph_file_part(const char *s, int len) +{ + const char *e = s + len; + + while (e != s && *(e-1) != '/') + e--; + return e; +} +EXPORT_SYMBOL(ceph_file_part); + +const char *ceph_msg_type_name(int type) +{ + switch (type) { + case CEPH_MSG_SHUTDOWN: return "shutdown"; + case CEPH_MSG_PING: return "ping"; + case CEPH_MSG_AUTH: return "auth"; + case CEPH_MSG_AUTH_REPLY: return "auth_reply"; + case CEPH_MSG_MON_MAP: return "mon_map"; + case CEPH_MSG_MON_GET_MAP: return "mon_get_map"; + case CEPH_MSG_MON_SUBSCRIBE: return "mon_subscribe"; + case CEPH_MSG_MON_SUBSCRIBE_ACK: return "mon_subscribe_ack"; + case CEPH_MSG_STATFS: return "statfs"; + case CEPH_MSG_STATFS_REPLY: return "statfs_reply"; + case CEPH_MSG_MDS_MAP: return "mds_map"; + case CEPH_MSG_CLIENT_SESSION: return "client_session"; + case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect"; + case CEPH_MSG_CLIENT_REQUEST: return "client_request"; + case CEPH_MSG_CLIENT_REQUEST_FORWARD: return "client_request_forward"; + case CEPH_MSG_CLIENT_REPLY: return "client_reply"; + case CEPH_MSG_CLIENT_CAPS: return "client_caps"; + case CEPH_MSG_CLIENT_CAPRELEASE: return "client_cap_release"; + case CEPH_MSG_CLIENT_SNAP: return "client_snap"; + case CEPH_MSG_CLIENT_LEASE: return "client_lease"; + case CEPH_MSG_OSD_MAP: return "osd_map"; + case CEPH_MSG_OSD_OP: return "osd_op"; + case CEPH_MSG_OSD_OPREPLY: return "osd_opreply"; + default: return "unknown"; + } +} +EXPORT_SYMBOL(ceph_msg_type_name); + +/* + * Initially learn our fsid, or verify an fsid matches. + */ +int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid) +{ + if (client->have_fsid) { + if (ceph_fsid_compare(&client->fsid, fsid)) { + pr_err("bad fsid, had %pU got %pU", + &client->fsid, fsid); + return -1; + } + } else { + pr_info("client%lld fsid %pU\n", ceph_client_id(client), fsid); + memcpy(&client->fsid, fsid, sizeof(*fsid)); + ceph_debugfs_client_init(client); + client->have_fsid = true; + } + return 0; +} +EXPORT_SYMBOL(ceph_check_fsid); + +static int strcmp_null(const char *s1, const char *s2) +{ + if (!s1 && !s2) + return 0; + if (s1 && !s2) + return -1; + if (!s1 && s2) + return 1; + return strcmp(s1, s2); +} + +int ceph_compare_options(struct ceph_options *new_opt, + struct ceph_client *client) +{ + struct ceph_options *opt1 = new_opt; + struct ceph_options *opt2 = client->options; + int ofs = offsetof(struct ceph_options, mon_addr); + int i; + int ret; + + ret = memcmp(opt1, opt2, ofs); + if (ret) + return ret; + + ret = strcmp_null(opt1->name, opt2->name); + if (ret) + return ret; + + ret = strcmp_null(opt1->secret, opt2->secret); + if (ret) + return ret; + + /* any matching mon ip implies a match */ + for (i = 0; i < opt1->num_mon; i++) { + if (ceph_monmap_contains(client->monc.monmap, + &opt1->mon_addr[i])) + return 0; + } + return -1; +} +EXPORT_SYMBOL(ceph_compare_options); + + +static int parse_fsid(const char *str, struct ceph_fsid *fsid) +{ + int i = 0; + char tmp[3]; + int err = -EINVAL; + int d; + + dout("parse_fsid '%s'\n", str); + tmp[2] = 0; + while (*str && i < 16) { + if (ispunct(*str)) { + str++; + continue; + } + if (!isxdigit(str[0]) || !isxdigit(str[1])) + break; + tmp[0] = str[0]; + tmp[1] = str[1]; + if (sscanf(tmp, "%x", &d) < 1) + break; + fsid->fsid[i] = d & 0xff; + i++; + str += 2; + } + + if (i == 16) + err = 0; + dout("parse_fsid ret %d got fsid %pU", err, fsid); + return err; +} + +/* + * ceph options + */ +enum { + Opt_osdtimeout, + Opt_osdkeepalivetimeout, + Opt_mount_timeout, + Opt_osd_idle_ttl, + Opt_last_int, + /* int args above */ + Opt_fsid, + Opt_name, + Opt_secret, + Opt_ip, + Opt_last_string, + /* string args above */ + Opt_noshare, + Opt_nocrc, +}; + +static match_table_t opt_tokens = { + {Opt_osdtimeout, "osdtimeout=%d"}, + {Opt_osdkeepalivetimeout, "osdkeepalive=%d"}, + {Opt_mount_timeout, "mount_timeout=%d"}, + {Opt_osd_idle_ttl, "osd_idle_ttl=%d"}, + /* int args above */ + {Opt_fsid, "fsid=%s"}, + {Opt_name, "name=%s"}, + {Opt_secret, "secret=%s"}, + {Opt_ip, "ip=%s"}, + /* string args above */ + {Opt_noshare, "noshare"}, + {Opt_nocrc, "nocrc"}, + {-1, NULL} +}; + +void ceph_destroy_options(struct ceph_options *opt) +{ + dout("destroy_options %p\n", opt); + kfree(opt->name); + kfree(opt->secret); + kfree(opt); +} +EXPORT_SYMBOL(ceph_destroy_options); + +int ceph_parse_options(struct ceph_options **popt, char *options, + const char *dev_name, const char *dev_name_end, + int (*parse_extra_token)(char *c, void *private), + void *private) +{ + struct ceph_options *opt; + const char *c; + int err = -ENOMEM; + substring_t argstr[MAX_OPT_ARGS]; + + opt = kzalloc(sizeof(*opt), GFP_KERNEL); + if (!opt) + return err; + opt->mon_addr = kcalloc(CEPH_MAX_MON, sizeof(*opt->mon_addr), + GFP_KERNEL); + if (!opt->mon_addr) + goto out; + + dout("parse_options %p options '%s' dev_name '%s'\n", opt, options, + dev_name); + + /* start with defaults */ + opt->flags = CEPH_OPT_DEFAULT; + opt->osd_timeout = CEPH_OSD_TIMEOUT_DEFAULT; + opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; + opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */ + opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; /* seconds */ + + /* get mon ip(s) */ + /* ip1[:port1][,ip2[:port2]...] */ + err = ceph_parse_ips(dev_name, dev_name_end, opt->mon_addr, + CEPH_MAX_MON, &opt->num_mon); + if (err < 0) + goto out; + + /* parse mount options */ + while ((c = strsep(&options, ",")) != NULL) { + int token, intval, ret; + if (!*c) + continue; + err = -EINVAL; + token = match_token((char *)c, opt_tokens, argstr); + if (token < 0) { + /* extra? */ + err = parse_extra_token((char *)c, private); + if (err < 0) { + pr_err("bad option at '%s'\n", c); + goto out; + } + continue; + } + if (token < Opt_last_int) { + ret = match_int(&argstr[0], &intval); + if (ret < 0) { + pr_err("bad mount option arg (not int) " + "at '%s'\n", c); + continue; + } + dout("got int token %d val %d\n", token, intval); + } else if (token > Opt_last_int && token < Opt_last_string) { + dout("got string token %d val %s\n", token, + argstr[0].from); + } else { + dout("got token %d\n", token); + } + switch (token) { + case Opt_ip: + err = ceph_parse_ips(argstr[0].from, + argstr[0].to, + &opt->my_addr, + 1, NULL); + if (err < 0) + goto out; + opt->flags |= CEPH_OPT_MYIP; + break; + + case Opt_fsid: + err = parse_fsid(argstr[0].from, &opt->fsid); + if (err == 0) + opt->flags |= CEPH_OPT_FSID; + break; + case Opt_name: + opt->name = kstrndup(argstr[0].from, + argstr[0].to-argstr[0].from, + GFP_KERNEL); + break; + case Opt_secret: + opt->secret = kstrndup(argstr[0].from, + argstr[0].to-argstr[0].from, + GFP_KERNEL); + break; + + /* misc */ + case Opt_osdtimeout: + opt->osd_timeout = intval; + break; + case Opt_osdkeepalivetimeout: + opt->osd_keepalive_timeout = intval; + break; + case Opt_osd_idle_ttl: + opt->osd_idle_ttl = intval; + break; + case Opt_mount_timeout: + opt->mount_timeout = intval; + break; + + case Opt_noshare: + opt->flags |= CEPH_OPT_NOSHARE; + break; + + case Opt_nocrc: + opt->flags |= CEPH_OPT_NOCRC; + break; + + default: + BUG_ON(token); + } + } + + /* success */ + *popt = opt; + return 0; + +out: + ceph_destroy_options(opt); + return err; +} +EXPORT_SYMBOL(ceph_parse_options); + +u64 ceph_client_id(struct ceph_client *client) +{ + return client->monc.auth->global_id; +} +EXPORT_SYMBOL(ceph_client_id); + +/* + * create a fresh client instance + */ +struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private) +{ + struct ceph_client *client; + int err = -ENOMEM; + + client = kzalloc(sizeof(*client), GFP_KERNEL); + if (client == NULL) + return ERR_PTR(-ENOMEM); + + client->private = private; + client->options = opt; + + mutex_init(&client->mount_mutex); + init_waitqueue_head(&client->auth_wq); + client->auth_err = 0; + + client->extra_mon_dispatch = NULL; + client->supported_features = CEPH_FEATURE_SUPPORTED_DEFAULT; + client->required_features = CEPH_FEATURE_REQUIRED_DEFAULT; + + client->msgr = NULL; + + /* subsystems */ + err = ceph_monc_init(&client->monc, client); + if (err < 0) + goto fail; + err = ceph_osdc_init(&client->osdc, client); + if (err < 0) + goto fail_monc; + + return client; + +fail_monc: + ceph_monc_stop(&client->monc); +fail: + kfree(client); + return ERR_PTR(err); +} +EXPORT_SYMBOL(ceph_create_client); + +void ceph_destroy_client(struct ceph_client *client) +{ + dout("destroy_client %p\n", client); + + /* unmount */ + ceph_osdc_stop(&client->osdc); + + /* + * make sure mds and osd connections close out before destroying + * the auth module, which is needed to free those connections' + * ceph_authorizers. + */ + ceph_msgr_flush(); + + ceph_monc_stop(&client->monc); + + ceph_debugfs_client_cleanup(client); + + if (client->msgr) + ceph_messenger_destroy(client->msgr); + + ceph_destroy_options(client->options); + + kfree(client); + dout("destroy_client %p done\n", client); +} +EXPORT_SYMBOL(ceph_destroy_client); + +/* + * true if we have the mon map (and have thus joined the cluster) + */ +static int have_mon_and_osd_map(struct ceph_client *client) +{ + return client->monc.monmap && client->monc.monmap->epoch && + client->osdc.osdmap && client->osdc.osdmap->epoch; +} + +/* + * mount: join the ceph cluster, and open root directory. + */ +int __ceph_open_session(struct ceph_client *client, unsigned long started) +{ + struct ceph_entity_addr *myaddr = NULL; + int err; + unsigned long timeout = client->options->mount_timeout * HZ; + + /* initialize the messenger */ + if (client->msgr == NULL) { + if (ceph_test_opt(client, MYIP)) + myaddr = &client->options->my_addr; + client->msgr = ceph_messenger_create(myaddr, + client->supported_features, + client->required_features); + if (IS_ERR(client->msgr)) { + client->msgr = NULL; + return PTR_ERR(client->msgr); + } + client->msgr->nocrc = ceph_test_opt(client, NOCRC); + } + + /* open session, and wait for mon and osd maps */ + err = ceph_monc_open_session(&client->monc); + if (err < 0) + return err; + + while (!have_mon_and_osd_map(client)) { + err = -EIO; + if (timeout && time_after_eq(jiffies, started + timeout)) + return err; + + /* wait */ + dout("mount waiting for mon_map\n"); + err = wait_event_interruptible_timeout(client->auth_wq, + have_mon_and_osd_map(client) || (client->auth_err < 0), + timeout); + if (err == -EINTR || err == -ERESTARTSYS) + return err; + if (client->auth_err < 0) + return client->auth_err; + } + + return 0; +} +EXPORT_SYMBOL(__ceph_open_session); + + +int ceph_open_session(struct ceph_client *client) +{ + int ret; + unsigned long started = jiffies; /* note the start time */ + + dout("open_session start\n"); + mutex_lock(&client->mount_mutex); + + ret = __ceph_open_session(client, started); + + mutex_unlock(&client->mount_mutex); + return ret; +} +EXPORT_SYMBOL(ceph_open_session); + + +static int __init init_ceph_lib(void) +{ + int ret = 0; + + ret = ceph_debugfs_init(); + if (ret < 0) + goto out; + + ret = ceph_msgr_init(); + if (ret < 0) + goto out_debugfs; + + pr_info("loaded (mon/osd proto %d/%d, osdmap %d/%d %d/%d)\n", + CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL, + CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT, + CEPH_OSDMAP_INC_VERSION, CEPH_OSDMAP_INC_VERSION_EXT); + + return 0; + +out_debugfs: + ceph_debugfs_cleanup(); +out: + return ret; +} + +static void __exit exit_ceph_lib(void) +{ + dout("exit_ceph_lib\n"); + ceph_msgr_exit(); + ceph_debugfs_cleanup(); +} + +module_init(init_ceph_lib); +module_exit(exit_ceph_lib); + +MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); +MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); +MODULE_AUTHOR("Patience Warnick <patience@newdream.net>"); +MODULE_DESCRIPTION("Ceph filesystem for Linux"); +MODULE_LICENSE("GPL"); diff --git a/fs/ceph/ceph_fs.c b/net/ceph/ceph_fs.c index 3ac6cc7c1156..a3a3a31d3c37 100644 --- a/fs/ceph/ceph_fs.c +++ b/net/ceph/ceph_fs.c @@ -1,7 +1,8 @@ /* * Some non-inline ceph helpers */ -#include "types.h" +#include <linux/module.h> +#include <linux/ceph/types.h> /* * return true if @layout appears to be valid @@ -52,6 +53,7 @@ int ceph_flags_to_mode(int flags) return mode; } +EXPORT_SYMBOL(ceph_flags_to_mode); int ceph_caps_for_mode(int mode) { @@ -70,3 +72,4 @@ int ceph_caps_for_mode(int mode) return caps; } +EXPORT_SYMBOL(ceph_caps_for_mode); diff --git a/fs/ceph/ceph_hash.c b/net/ceph/ceph_hash.c index bd570015d147..815ef8826796 100644 --- a/fs/ceph/ceph_hash.c +++ b/net/ceph/ceph_hash.c @@ -1,5 +1,5 @@ -#include "types.h" +#include <linux/ceph/types.h> /* * Robert Jenkin's hash function. diff --git a/net/ceph/ceph_strings.c b/net/ceph/ceph_strings.c new file mode 100644 index 000000000000..3fbda04de29c --- /dev/null +++ b/net/ceph/ceph_strings.c @@ -0,0 +1,84 @@ +/* + * Ceph string constants + */ +#include <linux/module.h> +#include <linux/ceph/types.h> + +const char *ceph_entity_type_name(int type) +{ + switch (type) { + case CEPH_ENTITY_TYPE_MDS: return "mds"; + case CEPH_ENTITY_TYPE_OSD: return "osd"; + case CEPH_ENTITY_TYPE_MON: return "mon"; + case CEPH_ENTITY_TYPE_CLIENT: return "client"; + case CEPH_ENTITY_TYPE_AUTH: return "auth"; + default: return "unknown"; + } +} + +const char *ceph_osd_op_name(int op) +{ + switch (op) { + case CEPH_OSD_OP_READ: return "read"; + case CEPH_OSD_OP_STAT: return "stat"; + + case CEPH_OSD_OP_MASKTRUNC: return "masktrunc"; + + case CEPH_OSD_OP_WRITE: return "write"; + case CEPH_OSD_OP_DELETE: return "delete"; + case CEPH_OSD_OP_TRUNCATE: return "truncate"; + case CEPH_OSD_OP_ZERO: return "zero"; + case CEPH_OSD_OP_WRITEFULL: return "writefull"; + case CEPH_OSD_OP_ROLLBACK: return "rollback"; + + case CEPH_OSD_OP_APPEND: return "append"; + case CEPH_OSD_OP_STARTSYNC: return "startsync"; + case CEPH_OSD_OP_SETTRUNC: return "settrunc"; + case CEPH_OSD_OP_TRIMTRUNC: return "trimtrunc"; + + case CEPH_OSD_OP_TMAPUP: return "tmapup"; + case CEPH_OSD_OP_TMAPGET: return "tmapget"; + case CEPH_OSD_OP_TMAPPUT: return "tmapput"; + + case CEPH_OSD_OP_GETXATTR: return "getxattr"; + case CEPH_OSD_OP_GETXATTRS: return "getxattrs"; + case CEPH_OSD_OP_SETXATTR: return "setxattr"; + case CEPH_OSD_OP_SETXATTRS: return "setxattrs"; + case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs"; + case CEPH_OSD_OP_RMXATTR: return "rmxattr"; + case CEPH_OSD_OP_CMPXATTR: return "cmpxattr"; + + case CEPH_OSD_OP_PULL: return "pull"; + case CEPH_OSD_OP_PUSH: return "push"; + case CEPH_OSD_OP_BALANCEREADS: return "balance-reads"; + case CEPH_OSD_OP_UNBALANCEREADS: return "unbalance-reads"; + case CEPH_OSD_OP_SCRUB: return "scrub"; + + case CEPH_OSD_OP_WRLOCK: return "wrlock"; + case CEPH_OSD_OP_WRUNLOCK: return "wrunlock"; + case CEPH_OSD_OP_RDLOCK: return "rdlock"; + case CEPH_OSD_OP_RDUNLOCK: return "rdunlock"; + case CEPH_OSD_OP_UPLOCK: return "uplock"; + case CEPH_OSD_OP_DNLOCK: return "dnlock"; + + case CEPH_OSD_OP_CALL: return "call"; + + case CEPH_OSD_OP_PGLS: return "pgls"; + } + return "???"; +} + + +const char *ceph_pool_op_name(int op) +{ + switch (op) { + case POOL_OP_CREATE: return "create"; + case POOL_OP_DELETE: return "delete"; + case POOL_OP_AUID_CHANGE: return "auid change"; + case POOL_OP_CREATE_SNAP: return "create snap"; + case POOL_OP_DELETE_SNAP: return "delete snap"; + case POOL_OP_CREATE_UNMANAGED_SNAP: return "create unmanaged snap"; + case POOL_OP_DELETE_UNMANAGED_SNAP: return "delete unmanaged snap"; + } + return "???"; +} diff --git a/fs/ceph/crush/crush.c b/net/ceph/crush/crush.c index fabd302e5779..d6ebb13a18a4 100644 --- a/fs/ceph/crush/crush.c +++ b/net/ceph/crush/crush.c @@ -8,7 +8,7 @@ # define BUG_ON(x) assert(!(x)) #endif -#include "crush.h" +#include <linux/crush/crush.h> const char *crush_bucket_alg_name(int alg) { diff --git a/fs/ceph/crush/hash.c b/net/ceph/crush/hash.c index 5873aed694bf..5bb63e37a8a1 100644 --- a/fs/ceph/crush/hash.c +++ b/net/ceph/crush/hash.c @@ -1,6 +1,6 @@ #include <linux/types.h> -#include "hash.h" +#include <linux/crush/hash.h> /* * Robert Jenkins' function for mixing 32-bit values diff --git a/fs/ceph/crush/mapper.c b/net/ceph/crush/mapper.c index a4eec133258e..42599e31dcad 100644 --- a/fs/ceph/crush/mapper.c +++ b/net/ceph/crush/mapper.c @@ -18,8 +18,8 @@ # define kfree(x) free(x) #endif -#include "crush.h" -#include "hash.h" +#include <linux/crush/crush.h> +#include <linux/crush/hash.h> /* * Implement the core CRUSH mapping algorithm. diff --git a/fs/ceph/crypto.c b/net/ceph/crypto.c index a3e627f63293..7b505b0c983f 100644 --- a/fs/ceph/crypto.c +++ b/net/ceph/crypto.c @@ -1,13 +1,13 @@ -#include "ceph_debug.h" +#include <linux/ceph/ceph_debug.h> #include <linux/err.h> #include <linux/scatterlist.h> #include <linux/slab.h> #include <crypto/hash.h> +#include <linux/ceph/decode.h> #include "crypto.h" -#include "decode.h" int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end) { diff --git a/fs/ceph/crypto.h b/net/ceph/crypto.h index bdf38607323c..f9eccace592b 100644 --- a/fs/ceph/crypto.h +++ b/net/ceph/crypto.h @@ -1,8 +1,8 @@ #ifndef _FS_CEPH_CRYPTO_H #define _FS_CEPH_CRYPTO_H -#include "types.h" -#include "buffer.h" +#include <linux/ceph/types.h> +#include <linux/ceph/buffer.h> /* * cryptographic secret diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c new file mode 100644 index 000000000000..33d04999f4f2 --- /dev/null +++ b/net/ceph/debugfs.c @@ -0,0 +1,268 @@ +#include <linux/ceph/ceph_debug.h> + +#include <linux/device.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/ctype.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> + +#include <linux/ceph/libceph.h> +#include <linux/ceph/mon_client.h> +#include <linux/ceph/auth.h> +#include <linux/ceph/debugfs.h> + +#ifdef CONFIG_DEBUG_FS + +/* + * Implement /sys/kernel/debug/ceph fun + * + * /sys/kernel/debug/ceph/client* - an instance of the ceph client + * .../osdmap - current osdmap + * .../monmap - current monmap + * .../osdc - active osd requests + * .../monc - mon client state + * .../dentry_lru - dump contents of dentry lru + * .../caps - expose cap (reservation) stats + * .../bdi - symlink to ../../bdi/something + */ + +static struct dentry *ceph_debugfs_dir; + +static int monmap_show(struct seq_file *s, void *p) +{ + int i; + struct ceph_client *client = s->private; + + if (client->monc.monmap == NULL) + return 0; + + seq_printf(s, "epoch %d\n", client->monc.monmap->epoch); + for (i = 0; i < client->monc.monmap->num_mon; i++) { + struct ceph_entity_inst *inst = + &client->monc.monmap->mon_inst[i]; + + seq_printf(s, "\t%s%lld\t%s\n", + ENTITY_NAME(inst->name), + ceph_pr_addr(&inst->addr.in_addr)); + } + return 0; +} + +static int osdmap_show(struct seq_file *s, void *p) +{ + int i; + struct ceph_client *client = s->private; + struct rb_node *n; + + if (client->osdc.osdmap == NULL) + return 0; + seq_printf(s, "epoch %d\n", client->osdc.osdmap->epoch); + seq_printf(s, "flags%s%s\n", + (client->osdc.osdmap->flags & CEPH_OSDMAP_NEARFULL) ? + " NEARFULL" : "", + (client->osdc.osdmap->flags & CEPH_OSDMAP_FULL) ? + " FULL" : ""); + for (n = rb_first(&client->osdc.osdmap->pg_pools); n; n = rb_next(n)) { + struct ceph_pg_pool_info *pool = + rb_entry(n, struct ceph_pg_pool_info, node); + seq_printf(s, "pg_pool %d pg_num %d / %d, lpg_num %d / %d\n", + pool->id, pool->v.pg_num, pool->pg_num_mask, + pool->v.lpg_num, pool->lpg_num_mask); + } + for (i = 0; i < client->osdc.osdmap->max_osd; i++) { + struct ceph_entity_addr *addr = + &client->osdc.osdmap->osd_addr[i]; + int state = client->osdc.osdmap->osd_state[i]; + char sb[64]; + + seq_printf(s, "\tosd%d\t%s\t%3d%%\t(%s)\n", + i, ceph_pr_addr(&addr->in_addr), + ((client->osdc.osdmap->osd_weight[i]*100) >> 16), + ceph_osdmap_state_str(sb, sizeof(sb), state)); + } + return 0; +} + +static int monc_show(struct seq_file *s, void *p) +{ + struct ceph_client *client = s->private; + struct ceph_mon_generic_request *req; + struct ceph_mon_client *monc = &client->monc; + struct rb_node *rp; + + mutex_lock(&monc->mutex); + + if (monc->have_mdsmap) + seq_printf(s, "have mdsmap %u\n", (unsigned)monc->have_mdsmap); + if (monc->have_osdmap) + seq_printf(s, "have osdmap %u\n", (unsigned)monc->have_osdmap); + if (monc->want_next_osdmap) + seq_printf(s, "want next osdmap\n"); + + for (rp = rb_first(&monc->generic_request_tree); rp; rp = rb_next(rp)) { + __u16 op; + req = rb_entry(rp, struct ceph_mon_generic_request, node); + op = le16_to_cpu(req->request->hdr.type); + if (op == CEPH_MSG_STATFS) + seq_printf(s, "%lld statfs\n", req->tid); + else + seq_printf(s, "%lld unknown\n", req->tid); + } + + mutex_unlock(&monc->mutex); + return 0; +} + +static int osdc_show(struct seq_file *s, void *pp) +{ + struct ceph_client *client = s->private; + struct ceph_osd_client *osdc = &client->osdc; + struct rb_node *p; + + mutex_lock(&osdc->request_mutex); + for (p = rb_first(&osdc->requests); p; p = rb_next(p)) { + struct ceph_osd_request *req; + struct ceph_osd_request_head *head; + struct ceph_osd_op *op; + int num_ops; + int opcode, olen; + int i; + + req = rb_entry(p, struct ceph_osd_request, r_node); + + seq_printf(s, "%lld\tosd%d\t%d.%x\t", req->r_tid, + req->r_osd ? req->r_osd->o_osd : -1, + le32_to_cpu(req->r_pgid.pool), + le16_to_cpu(req->r_pgid.ps)); + + head = req->r_request->front.iov_base; + op = (void *)(head + 1); + + num_ops = le16_to_cpu(head->num_ops); + olen = le32_to_cpu(head->object_len); + seq_printf(s, "%.*s", olen, + (const char *)(head->ops + num_ops)); + + if (req->r_reassert_version.epoch) + seq_printf(s, "\t%u'%llu", + (unsigned)le32_to_cpu(req->r_reassert_version.epoch), + le64_to_cpu(req->r_reassert_version.version)); + else + seq_printf(s, "\t"); + + for (i = 0; i < num_ops; i++) { + opcode = le16_to_cpu(op->op); + seq_printf(s, "\t%s", ceph_osd_op_name(opcode)); + op++; + } + + seq_printf(s, "\n"); + } + mutex_unlock(&osdc->request_mutex); + return 0; +} + +CEPH_DEFINE_SHOW_FUNC(monmap_show) +CEPH_DEFINE_SHOW_FUNC(osdmap_show) +CEPH_DEFINE_SHOW_FUNC(monc_show) +CEPH_DEFINE_SHOW_FUNC(osdc_show) + +int ceph_debugfs_init(void) +{ + ceph_debugfs_dir = debugfs_create_dir("ceph", NULL); + if (!ceph_debugfs_dir) + return -ENOMEM; + return 0; +} + +void ceph_debugfs_cleanup(void) +{ + debugfs_remove(ceph_debugfs_dir); +} + +int ceph_debugfs_client_init(struct ceph_client *client) +{ + int ret = -ENOMEM; + char name[80]; + + snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid, + client->monc.auth->global_id); + + client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir); + if (!client->debugfs_dir) + goto out; + + client->monc.debugfs_file = debugfs_create_file("monc", + 0600, + client->debugfs_dir, + client, + &monc_show_fops); + if (!client->monc.debugfs_file) + goto out; + + client->osdc.debugfs_file = debugfs_create_file("osdc", + 0600, + client->debugfs_dir, + client, + &osdc_show_fops); + if (!client->osdc.debugfs_file) + goto out; + + client->debugfs_monmap = debugfs_create_file("monmap", + 0600, + client->debugfs_dir, + client, + &monmap_show_fops); + if (!client->debugfs_monmap) + goto out; + + client->debugfs_osdmap = debugfs_create_file("osdmap", + 0600, + client->debugfs_dir, + client, + &osdmap_show_fops); + if (!client->debugfs_osdmap) + goto out; + + return 0; + +out: + ceph_debugfs_client_cleanup(client); + return ret; +} + +void ceph_debugfs_client_cleanup(struct ceph_client *client) +{ + debugfs_remove(client->debugfs_osdmap); + debugfs_remove(client->debugfs_monmap); + debugfs_remove(client->osdc.debugfs_file); + debugfs_remove(client->monc.debugfs_file); + debugfs_remove(client->debugfs_dir); +} + +#else /* CONFIG_DEBUG_FS */ + +int ceph_debugfs_init(void) +{ + return 0; +} + +void ceph_debugfs_cleanup(void) +{ +} + +int ceph_debugfs_client_init(struct ceph_client *client, + int (*module_debugfs_init)(struct ceph_client *)) +{ + return 0; +} + +void ceph_debugfs_client_cleanup(struct ceph_client *client) +{ +} + +#endif /* CONFIG_DEBUG_FS */ + +EXPORT_SYMBOL(ceph_debugfs_init); +EXPORT_SYMBOL(ceph_debugfs_cleanup); diff --git a/fs/ceph/messenger.c b/net/ceph/messenger.c index 17a09b32a591..0e8157ee5d43 100644 --- a/fs/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1,4 +1,4 @@ -#include "ceph_debug.h" +#include <linux/ceph/ceph_debug.h> #include <linux/crc32c.h> #include <linux/ctype.h> @@ -13,10 +13,10 @@ #include <linux/blkdev.h> #include <net/tcp.h> -#include "super.h" -#include "messenger.h" -#include "decode.h" -#include "pagelist.h" +#include <linux/ceph/libceph.h> +#include <linux/ceph/messenger.h> +#include <linux/ceph/decode.h> +#include <linux/ceph/pagelist.h> /* * Ceph uses the messenger to exchange ceph_msg messages with other @@ -50,7 +50,7 @@ static char addr_str[MAX_ADDR_STR][MAX_ADDR_STR_LEN]; static DEFINE_SPINLOCK(addr_str_lock); static int last_addr_str; -const char *pr_addr(const struct sockaddr_storage *ss) +const char *ceph_pr_addr(const struct sockaddr_storage *ss) { int i; char *s; @@ -81,6 +81,7 @@ const char *pr_addr(const struct sockaddr_storage *ss) return s; } +EXPORT_SYMBOL(ceph_pr_addr); static void encode_my_addr(struct ceph_messenger *msgr) { @@ -93,7 +94,7 @@ static void encode_my_addr(struct ceph_messenger *msgr) */ struct workqueue_struct *ceph_msgr_wq; -int __init ceph_msgr_init(void) +int ceph_msgr_init(void) { ceph_msgr_wq = create_workqueue("ceph-msgr"); if (IS_ERR(ceph_msgr_wq)) { @@ -104,16 +105,19 @@ int __init ceph_msgr_init(void) } return 0; } +EXPORT_SYMBOL(ceph_msgr_init); void ceph_msgr_exit(void) { destroy_workqueue(ceph_msgr_wq); } +EXPORT_SYMBOL(ceph_msgr_exit); void ceph_msgr_flush(void) { flush_workqueue(ceph_msgr_wq); } +EXPORT_SYMBOL(ceph_msgr_flush); /* @@ -223,19 +227,19 @@ static struct socket *ceph_tcp_connect(struct ceph_connection *con) set_sock_callbacks(sock, con); - dout("connect %s\n", pr_addr(&con->peer_addr.in_addr)); + dout("connect %s\n", ceph_pr_addr(&con->peer_addr.in_addr)); ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr), O_NONBLOCK); if (ret == -EINPROGRESS) { dout("connect %s EINPROGRESS sk_state = %u\n", - pr_addr(&con->peer_addr.in_addr), + ceph_pr_addr(&con->peer_addr.in_addr), sock->sk->sk_state); ret = 0; } if (ret < 0) { pr_err("connect %s error %d\n", - pr_addr(&con->peer_addr.in_addr), ret); + ceph_pr_addr(&con->peer_addr.in_addr), ret); sock_release(sock); con->sock = NULL; con->error_msg = "connect error"; @@ -336,7 +340,8 @@ static void reset_connection(struct ceph_connection *con) */ void ceph_con_close(struct ceph_connection *con) { - dout("con_close %p peer %s\n", con, pr_addr(&con->peer_addr.in_addr)); + dout("con_close %p peer %s\n", con, + ceph_pr_addr(&con->peer_addr.in_addr)); set_bit(CLOSED, &con->state); /* in case there's queued work */ clear_bit(STANDBY, &con->state); /* avoid connect_seq bump */ clear_bit(LOSSYTX, &con->state); /* so we retry next connect */ @@ -349,19 +354,21 @@ void ceph_con_close(struct ceph_connection *con) mutex_unlock(&con->mutex); queue_con(con); } +EXPORT_SYMBOL(ceph_con_close); /* * Reopen a closed connection, with a new peer address. */ void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr) { - dout("con_open %p %s\n", con, pr_addr(&addr->in_addr)); + dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr)); set_bit(OPENING, &con->state); clear_bit(CLOSED, &con->state); memcpy(&con->peer_addr, addr, sizeof(*addr)); con->delay = 0; /* reset backoff memory */ queue_con(con); } +EXPORT_SYMBOL(ceph_con_open); /* * return true if this connection ever successfully opened @@ -408,6 +415,7 @@ void ceph_con_init(struct ceph_messenger *msgr, struct ceph_connection *con) INIT_LIST_HEAD(&con->out_sent); INIT_DELAYED_WORK(&con->work, con_work); } +EXPORT_SYMBOL(ceph_con_init); /* @@ -652,7 +660,7 @@ static void prepare_write_connect(struct ceph_messenger *msgr, dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, con->connect_seq, global_seq, proto); - con->out_connect.features = cpu_to_le64(CEPH_FEATURE_SUPPORTED); + con->out_connect.features = cpu_to_le64(msgr->supported_features); con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); con->out_connect.connect_seq = cpu_to_le32(con->connect_seq); con->out_connect.global_seq = cpu_to_le32(global_seq); @@ -1013,7 +1021,7 @@ static int verify_hello(struct ceph_connection *con) { if (memcmp(con->in_banner, CEPH_BANNER, strlen(CEPH_BANNER))) { pr_err("connect to %s got bad banner\n", - pr_addr(&con->peer_addr.in_addr)); + ceph_pr_addr(&con->peer_addr.in_addr)); con->error_msg = "protocol error, bad banner"; return -1; } @@ -1116,7 +1124,7 @@ int ceph_parse_ips(const char *c, const char *end, addr_set_port(ss, port); - dout("parse_ips got %s\n", pr_addr(ss)); + dout("parse_ips got %s\n", ceph_pr_addr(ss)); if (p == end) break; @@ -1136,6 +1144,7 @@ bad: pr_err("parse_ips bad ip '%.*s'\n", (int)(end - c), c); return -EINVAL; } +EXPORT_SYMBOL(ceph_parse_ips); static int process_banner(struct ceph_connection *con) { @@ -1157,9 +1166,9 @@ static int process_banner(struct ceph_connection *con) !(addr_is_blank(&con->actual_peer_addr.in_addr) && con->actual_peer_addr.nonce == con->peer_addr.nonce)) { pr_warning("wrong peer, want %s/%d, got %s/%d\n", - pr_addr(&con->peer_addr.in_addr), + ceph_pr_addr(&con->peer_addr.in_addr), (int)le32_to_cpu(con->peer_addr.nonce), - pr_addr(&con->actual_peer_addr.in_addr), + ceph_pr_addr(&con->actual_peer_addr.in_addr), (int)le32_to_cpu(con->actual_peer_addr.nonce)); con->error_msg = "wrong peer at address"; return -1; @@ -1177,7 +1186,7 @@ static int process_banner(struct ceph_connection *con) addr_set_port(&con->msgr->inst.addr.in_addr, port); encode_my_addr(con->msgr); dout("process_banner learned my addr is %s\n", - pr_addr(&con->msgr->inst.addr.in_addr)); + ceph_pr_addr(&con->msgr->inst.addr.in_addr)); } set_bit(NEGOTIATING, &con->state); @@ -1198,8 +1207,8 @@ static void fail_protocol(struct ceph_connection *con) static int process_connect(struct ceph_connection *con) { - u64 sup_feat = CEPH_FEATURE_SUPPORTED; - u64 req_feat = CEPH_FEATURE_REQUIRED; + u64 sup_feat = con->msgr->supported_features; + u64 req_feat = con->msgr->required_features; u64 server_feat = le64_to_cpu(con->in_reply.features); dout("process_connect on %p tag %d\n", con, (int)con->in_tag); @@ -1209,7 +1218,7 @@ static int process_connect(struct ceph_connection *con) pr_err("%s%lld %s feature set mismatch," " my %llx < server's %llx, missing %llx\n", ENTITY_NAME(con->peer_name), - pr_addr(&con->peer_addr.in_addr), + ceph_pr_addr(&con->peer_addr.in_addr), sup_feat, server_feat, server_feat & ~sup_feat); con->error_msg = "missing required protocol features"; fail_protocol(con); @@ -1219,7 +1228,7 @@ static int process_connect(struct ceph_connection *con) pr_err("%s%lld %s protocol version mismatch," " my %d != server's %d\n", ENTITY_NAME(con->peer_name), - pr_addr(&con->peer_addr.in_addr), + ceph_pr_addr(&con->peer_addr.in_addr), le32_to_cpu(con->out_connect.protocol_version), le32_to_cpu(con->in_reply.protocol_version)); con->error_msg = "protocol version mismatch"; @@ -1253,7 +1262,7 @@ static int process_connect(struct ceph_connection *con) le32_to_cpu(con->in_connect.connect_seq)); pr_err("%s%lld %s connection reset\n", ENTITY_NAME(con->peer_name), - pr_addr(&con->peer_addr.in_addr)); + ceph_pr_addr(&con->peer_addr.in_addr)); reset_connection(con); prepare_write_connect(con->msgr, con, 0); prepare_read_connect(con); @@ -1298,7 +1307,7 @@ static int process_connect(struct ceph_connection *con) pr_err("%s%lld %s protocol feature mismatch," " my required %llx > server's %llx, need %llx\n", ENTITY_NAME(con->peer_name), - pr_addr(&con->peer_addr.in_addr), + ceph_pr_addr(&con->peer_addr.in_addr), req_feat, server_feat, req_feat & ~server_feat); con->error_msg = "missing required protocol features"; fail_protocol(con); @@ -1525,7 +1534,7 @@ static int read_partial_message(struct ceph_connection *con) if ((s64)seq - (s64)con->in_seq < 1) { pr_info("skipping %s%lld %s seq %lld, expected %lld\n", ENTITY_NAME(con->peer_name), - pr_addr(&con->peer_addr.in_addr), + ceph_pr_addr(&con->peer_addr.in_addr), seq, con->in_seq + 1); con->in_base_pos = -front_len - middle_len - data_len - sizeof(m->footer); @@ -2023,9 +2032,9 @@ out: static void ceph_fault(struct ceph_connection *con) { pr_err("%s%lld %s %s\n", ENTITY_NAME(con->peer_name), - pr_addr(&con->peer_addr.in_addr), con->error_msg); + ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg); dout("fault %p state %lu to peer %s\n", - con, con->state, pr_addr(&con->peer_addr.in_addr)); + con, con->state, ceph_pr_addr(&con->peer_addr.in_addr)); if (test_bit(LOSSYTX, &con->state)) { dout("fault on LOSSYTX channel\n"); @@ -2085,7 +2094,9 @@ out: /* * create a new messenger instance */ -struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr) +struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr, + u32 supported_features, + u32 required_features) { struct ceph_messenger *msgr; @@ -2093,6 +2104,9 @@ struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr) if (msgr == NULL) return ERR_PTR(-ENOMEM); + msgr->supported_features = supported_features; + msgr->required_features = required_features; + spin_lock_init(&msgr->global_seq_lock); /* the zero page is needed if a request is "canceled" while the message @@ -2115,6 +2129,7 @@ struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr) dout("messenger_create %p\n", msgr); return msgr; } +EXPORT_SYMBOL(ceph_messenger_create); void ceph_messenger_destroy(struct ceph_messenger *msgr) { @@ -2124,6 +2139,7 @@ void ceph_messenger_destroy(struct ceph_messenger *msgr) kfree(msgr); dout("destroyed messenger %p\n", msgr); } +EXPORT_SYMBOL(ceph_messenger_destroy); /* * Queue up an outgoing message on the given connection. @@ -2160,6 +2176,7 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg) if (test_and_set_bit(WRITE_PENDING, &con->state) == 0) queue_con(con); } +EXPORT_SYMBOL(ceph_con_send); /* * Revoke a message that was previously queued for send @@ -2225,6 +2242,7 @@ void ceph_con_keepalive(struct ceph_connection *con) test_and_set_bit(WRITE_PENDING, &con->state) == 0) queue_con(con); } +EXPORT_SYMBOL(ceph_con_keepalive); /* @@ -2299,6 +2317,7 @@ out: pr_err("msg_new can't create type %d front %d\n", type, front_len); return NULL; } +EXPORT_SYMBOL(ceph_msg_new); /* * Allocate "middle" portion of a message, if it is needed and wasn't @@ -2410,6 +2429,7 @@ void ceph_msg_last_put(struct kref *kref) else ceph_msg_kfree(m); } +EXPORT_SYMBOL(ceph_msg_last_put); void ceph_msg_dump(struct ceph_msg *msg) { @@ -2430,3 +2450,4 @@ void ceph_msg_dump(struct ceph_msg *msg) DUMP_PREFIX_OFFSET, 16, 1, &msg->footer, sizeof(msg->footer), true); } +EXPORT_SYMBOL(ceph_msg_dump); diff --git a/fs/ceph/mon_client.c b/net/ceph/mon_client.c index b2a5a3e4a671..8a079399174a 100644 --- a/fs/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -1,14 +1,16 @@ -#include "ceph_debug.h" +#include <linux/ceph/ceph_debug.h> +#include <linux/module.h> #include <linux/types.h> #include <linux/slab.h> #include <linux/random.h> #include <linux/sched.h> -#include "mon_client.h" -#include "super.h" -#include "auth.h" -#include "decode.h" +#include <linux/ceph/mon_client.h> +#include <linux/ceph/libceph.h> +#include <linux/ceph/decode.h> + +#include <linux/ceph/auth.h> /* * Interact with Ceph monitor cluster. Handle requests for new map @@ -74,7 +76,7 @@ struct ceph_monmap *ceph_monmap_decode(void *p, void *end) m->num_mon); for (i = 0; i < m->num_mon; i++) dout("monmap_decode mon%d is %s\n", i, - pr_addr(&m->mon_inst[i].addr.in_addr)); + ceph_pr_addr(&m->mon_inst[i].addr.in_addr)); return m; bad: @@ -191,30 +193,33 @@ static void __send_subscribe(struct ceph_mon_client *monc) struct ceph_msg *msg = monc->m_subscribe; struct ceph_mon_subscribe_item *i; void *p, *end; + int num; p = msg->front.iov_base; end = p + msg->front_max; - dout("__send_subscribe to 'mdsmap' %u+\n", - (unsigned)monc->have_mdsmap); + num = 1 + !!monc->want_next_osdmap + !!monc->want_mdsmap; + ceph_encode_32(&p, num); + if (monc->want_next_osdmap) { dout("__send_subscribe to 'osdmap' %u\n", (unsigned)monc->have_osdmap); - ceph_encode_32(&p, 3); ceph_encode_string(&p, end, "osdmap", 6); i = p; i->have = cpu_to_le64(monc->have_osdmap); i->onetime = 1; p += sizeof(*i); monc->want_next_osdmap = 2; /* requested */ - } else { - ceph_encode_32(&p, 2); } - ceph_encode_string(&p, end, "mdsmap", 6); - i = p; - i->have = cpu_to_le64(monc->have_mdsmap); - i->onetime = 0; - p += sizeof(*i); + if (monc->want_mdsmap) { + dout("__send_subscribe to 'mdsmap' %u+\n", + (unsigned)monc->have_mdsmap); + ceph_encode_string(&p, end, "mdsmap", 6); + i = p; + i->have = cpu_to_le64(monc->have_mdsmap); + i->onetime = 0; + p += sizeof(*i); + } ceph_encode_string(&p, end, "monmap", 6); i = p; i->have = 0; @@ -243,7 +248,8 @@ static void handle_subscribe_ack(struct ceph_mon_client *monc, mutex_lock(&monc->mutex); if (monc->hunting) { pr_info("mon%d %s session established\n", - monc->cur_mon, pr_addr(&monc->con->peer_addr.in_addr)); + monc->cur_mon, + ceph_pr_addr(&monc->con->peer_addr.in_addr)); monc->hunting = false; } dout("handle_subscribe_ack after %d seconds\n", seconds); @@ -266,6 +272,7 @@ int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, u32 got) mutex_unlock(&monc->mutex); return 0; } +EXPORT_SYMBOL(ceph_monc_got_mdsmap); int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 got) { @@ -310,6 +317,7 @@ int ceph_monc_open_session(struct ceph_mon_client *monc) mutex_unlock(&monc->mutex); return 0; } +EXPORT_SYMBOL(ceph_monc_open_session); /* * The monitor responds with mount ack indicate mount success. The @@ -540,6 +548,7 @@ out: kref_put(&req->kref, release_generic_request); return err; } +EXPORT_SYMBOL(ceph_monc_do_statfs); /* * pool ops @@ -651,6 +660,7 @@ int ceph_monc_create_snapid(struct ceph_mon_client *monc, pool, 0, (char *)snapid, sizeof(*snapid)); } +EXPORT_SYMBOL(ceph_monc_create_snapid); int ceph_monc_delete_snapid(struct ceph_mon_client *monc, u32 pool, u64 snapid) @@ -708,9 +718,9 @@ static void delayed_work(struct work_struct *work) */ static int build_initial_monmap(struct ceph_mon_client *monc) { - struct ceph_mount_args *args = monc->client->mount_args; - struct ceph_entity_addr *mon_addr = args->mon_addr; - int num_mon = args->num_mon; + struct ceph_options *opt = monc->client->options; + struct ceph_entity_addr *mon_addr = opt->mon_addr; + int num_mon = opt->num_mon; int i; /* build initial monmap */ @@ -728,11 +738,6 @@ static int build_initial_monmap(struct ceph_mon_client *monc) } monc->monmap->num_mon = num_mon; monc->have_fsid = false; - - /* release addr memory */ - kfree(args->mon_addr); - args->mon_addr = NULL; - args->num_mon = 0; return 0; } @@ -753,8 +758,8 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) monc->con = NULL; /* authentication */ - monc->auth = ceph_auth_init(cl->mount_args->name, - cl->mount_args->secret); + monc->auth = ceph_auth_init(cl->options->name, + cl->options->secret); if (IS_ERR(monc->auth)) return PTR_ERR(monc->auth); monc->auth->want_keys = @@ -808,6 +813,7 @@ out_monmap: out: return err; } +EXPORT_SYMBOL(ceph_monc_init); void ceph_monc_stop(struct ceph_mon_client *monc) { @@ -832,6 +838,7 @@ void ceph_monc_stop(struct ceph_mon_client *monc) kfree(monc->monmap); } +EXPORT_SYMBOL(ceph_monc_stop); static void handle_auth_reply(struct ceph_mon_client *monc, struct ceph_msg *msg) @@ -889,6 +896,7 @@ int ceph_monc_validate_auth(struct ceph_mon_client *monc) mutex_unlock(&monc->mutex); return ret; } +EXPORT_SYMBOL(ceph_monc_validate_auth); /* * handle incoming message @@ -922,15 +930,16 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) ceph_monc_handle_map(monc, msg); break; - case CEPH_MSG_MDS_MAP: - ceph_mdsc_handle_map(&monc->client->mdsc, msg); - break; - case CEPH_MSG_OSD_MAP: ceph_osdc_handle_map(&monc->client->osdc, msg); break; default: + /* can the chained handler handle it? */ + if (monc->client->extra_mon_dispatch && + monc->client->extra_mon_dispatch(monc->client, msg) == 0) + break; + pr_err("received unknown message type %d %s\n", type, ceph_msg_type_name(type)); } @@ -994,7 +1003,7 @@ static void mon_fault(struct ceph_connection *con) if (monc->con && !monc->hunting) pr_info("mon%d %s session lost, " "hunting for new mon\n", monc->cur_mon, - pr_addr(&monc->con->peer_addr.in_addr)); + ceph_pr_addr(&monc->con->peer_addr.in_addr)); __close_session(monc); if (!monc->hunting) { diff --git a/fs/ceph/msgpool.c b/net/ceph/msgpool.c index dd65a6438131..d5f2d97ac05c 100644 --- a/fs/ceph/msgpool.c +++ b/net/ceph/msgpool.c @@ -1,11 +1,11 @@ -#include "ceph_debug.h" +#include <linux/ceph/ceph_debug.h> #include <linux/err.h> #include <linux/sched.h> #include <linux/types.h> #include <linux/vmalloc.h> -#include "msgpool.h" +#include <linux/ceph/msgpool.h> static void *alloc_fn(gfp_t gfp_mask, void *arg) { diff --git a/fs/ceph/osd_client.c b/net/ceph/osd_client.c index 0edb43f1ef26..79391994b3ed 100644 --- a/fs/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1,5 +1,6 @@ -#include "ceph_debug.h" +#include <linux/ceph/ceph_debug.h> +#include <linux/module.h> #include <linux/err.h> #include <linux/highmem.h> #include <linux/mm.h> @@ -10,12 +11,12 @@ #include <linux/bio.h> #endif -#include "super.h" -#include "osd_client.h" -#include "messenger.h" -#include "decode.h" -#include "auth.h" -#include "pagelist.h" +#include <linux/ceph/libceph.h> +#include <linux/ceph/osd_client.h> +#include <linux/ceph/messenger.h> +#include <linux/ceph/decode.h> +#include <linux/ceph/auth.h> +#include <linux/ceph/pagelist.h> #define OSD_OP_FRONT_LEN 4096 #define OSD_OPREPLY_FRONT_LEN 512 @@ -70,11 +71,14 @@ void ceph_calc_raw_layout(struct ceph_osd_client *osdc, op->extent.length = objlen; } req->r_num_pages = calc_pages_for(off, *plen); + if (op->op == CEPH_OSD_OP_WRITE) + op->payload_len = *plen; dout("calc_layout bno=%llx %llu~%llu (%d pages)\n", *bno, objoff, objlen, req->r_num_pages); } +EXPORT_SYMBOL(ceph_calc_raw_layout); /* * Implement client access to distributed object storage cluster. @@ -154,19 +158,7 @@ void ceph_osdc_release_request(struct kref *kref) else kfree(req); } - -static int op_needs_trail(int op) -{ - switch (op) { - case CEPH_OSD_OP_GETXATTR: - case CEPH_OSD_OP_SETXATTR: - case CEPH_OSD_OP_CMPXATTR: - case CEPH_OSD_OP_CALL: - return 1; - default: - return 0; - } -} +EXPORT_SYMBOL(ceph_osdc_release_request); static int get_num_ops(struct ceph_osd_req_op *ops, int *needs_trail) { @@ -268,6 +260,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, return req; } +EXPORT_SYMBOL(ceph_osdc_alloc_request); static void osd_req_encode_op(struct ceph_osd_request *req, struct ceph_osd_op *dst, @@ -403,6 +396,7 @@ void ceph_osdc_build_request(struct ceph_osd_request *req, msg->hdr.front_len = cpu_to_le32(msg_size); return; } +EXPORT_SYMBOL(ceph_osdc_build_request); /* * build new request AND message, calculate layout, and adjust file @@ -460,6 +454,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, return req; } +EXPORT_SYMBOL(ceph_osdc_new_request); /* * We keep osd requests in an rbtree, sorted by ->r_tid. @@ -614,7 +609,7 @@ static void __move_osd_to_lru(struct ceph_osd_client *osdc, dout("__move_osd_to_lru %p\n", osd); BUG_ON(!list_empty(&osd->o_osd_lru)); list_add_tail(&osd->o_osd_lru, &osdc->osd_lru); - osd->lru_ttl = jiffies + osdc->client->mount_args->osd_idle_ttl * HZ; + osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl * HZ; } static void __remove_osd_from_lru(struct ceph_osd *osd) @@ -708,7 +703,7 @@ static struct ceph_osd *__lookup_osd(struct ceph_osd_client *osdc, int o) static void __schedule_osd_timeout(struct ceph_osd_client *osdc) { schedule_delayed_work(&osdc->timeout_work, - osdc->client->mount_args->osd_keepalive_timeout * HZ); + osdc->client->options->osd_keepalive_timeout * HZ); } static void __cancel_osd_timeout(struct ceph_osd_client *osdc) @@ -909,9 +904,9 @@ static void handle_timeout(struct work_struct *work) container_of(work, struct ceph_osd_client, timeout_work.work); struct ceph_osd_request *req, *last_req = NULL; struct ceph_osd *osd; - unsigned long timeout = osdc->client->mount_args->osd_timeout * HZ; + unsigned long timeout = osdc->client->options->osd_timeout * HZ; unsigned long keepalive = - osdc->client->mount_args->osd_keepalive_timeout * HZ; + osdc->client->options->osd_keepalive_timeout * HZ; unsigned long last_stamp = 0; struct rb_node *p; struct list_head slow_osds; @@ -998,7 +993,7 @@ static void handle_osds_timeout(struct work_struct *work) container_of(work, struct ceph_osd_client, osds_timeout_work.work); unsigned long delay = - osdc->client->mount_args->osd_idle_ttl * HZ >> 2; + osdc->client->options->osd_idle_ttl * HZ >> 2; dout("osds timeout\n"); down_read(&osdc->map_sem); @@ -1360,6 +1355,7 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, up_read(&osdc->map_sem); return rc; } +EXPORT_SYMBOL(ceph_osdc_start_request); /* * wait for a request to complete @@ -1382,6 +1378,7 @@ int ceph_osdc_wait_request(struct ceph_osd_client *osdc, dout("wait_request tid %llu result %d\n", req->r_tid, req->r_result); return req->r_result; } +EXPORT_SYMBOL(ceph_osdc_wait_request); /* * sync - wait for all in-flight requests to flush. avoid starvation. @@ -1415,6 +1412,7 @@ void ceph_osdc_sync(struct ceph_osd_client *osdc) mutex_unlock(&osdc->request_mutex); dout("sync done (thru tid %llu)\n", last_tid); } +EXPORT_SYMBOL(ceph_osdc_sync); /* * init, shutdown @@ -1440,7 +1438,7 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) INIT_DELAYED_WORK(&osdc->osds_timeout_work, handle_osds_timeout); schedule_delayed_work(&osdc->osds_timeout_work, - round_jiffies_relative(osdc->client->mount_args->osd_idle_ttl * HZ)); + round_jiffies_relative(osdc->client->options->osd_idle_ttl * HZ)); err = -ENOMEM; osdc->req_mempool = mempool_create_kmalloc_pool(10, @@ -1466,6 +1464,7 @@ out_mempool: out: return err; } +EXPORT_SYMBOL(ceph_osdc_init); void ceph_osdc_stop(struct ceph_osd_client *osdc) { @@ -1480,6 +1479,7 @@ void ceph_osdc_stop(struct ceph_osd_client *osdc) ceph_msgpool_destroy(&osdc->msgpool_op); ceph_msgpool_destroy(&osdc->msgpool_op_reply); } +EXPORT_SYMBOL(ceph_osdc_stop); /* * Read some contiguous pages. If we cross a stripe boundary, shorten @@ -1517,6 +1517,7 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc, dout("readpages result %d\n", rc); return rc; } +EXPORT_SYMBOL(ceph_osdc_readpages); /* * do a synchronous write on N pages @@ -1559,6 +1560,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, dout("writepages result %d\n", rc); return rc; } +EXPORT_SYMBOL(ceph_osdc_writepages); /* * handle incoming message diff --git a/fs/ceph/osdmap.c b/net/ceph/osdmap.c index 3ccd11761cb6..d73f3f6efa36 100644 --- a/fs/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -1,14 +1,15 @@ -#include "ceph_debug.h" +#include <linux/ceph/ceph_debug.h> +#include <linux/module.h> #include <linux/slab.h> #include <asm/div64.h> -#include "super.h" -#include "osdmap.h" -#include "crush/hash.h" -#include "crush/mapper.h" -#include "decode.h" +#include <linux/ceph/libceph.h> +#include <linux/ceph/osdmap.h> +#include <linux/ceph/decode.h> +#include <linux/crush/hash.h> +#include <linux/crush/mapper.h> char *ceph_osdmap_state_str(char *str, int len, int state) { @@ -429,6 +430,7 @@ int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name) } return -ENOENT; } +EXPORT_SYMBOL(ceph_pg_poolid_by_name); static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi) { @@ -979,6 +981,7 @@ void ceph_calc_file_object_mapping(struct ceph_file_layout *layout, dout(" obj extent %llu~%llu\n", *oxoff, *oxlen); } +EXPORT_SYMBOL(ceph_calc_file_object_mapping); /* * calculate an object layout (i.e. pgid) from an oid, @@ -1024,6 +1027,7 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol, ol->ol_stripe_unit = fl->fl_object_stripe_unit; return 0; } +EXPORT_SYMBOL(ceph_calc_object_layout); /* * Calculate raw osd vector for the given pgid. Return pointer to osd @@ -1121,3 +1125,4 @@ int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid) return osds[i]; return -1; } +EXPORT_SYMBOL(ceph_calc_pg_primary); diff --git a/fs/ceph/pagelist.c b/net/ceph/pagelist.c index 326e1c04176f..3b146cfac182 100644 --- a/fs/ceph/pagelist.c +++ b/net/ceph/pagelist.c @@ -1,9 +1,9 @@ +#include <linux/module.h> #include <linux/gfp.h> #include <linux/pagemap.h> #include <linux/highmem.h> - -#include "pagelist.h" +#include <linux/ceph/pagelist.h> static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl) { @@ -25,6 +25,7 @@ int ceph_pagelist_release(struct ceph_pagelist *pl) } return 0; } +EXPORT_SYMBOL(ceph_pagelist_release); static int ceph_pagelist_addpage(struct ceph_pagelist *pl) { @@ -61,3 +62,4 @@ int ceph_pagelist_append(struct ceph_pagelist *pl, const void *buf, size_t len) pl->room -= len; return 0; } +EXPORT_SYMBOL(ceph_pagelist_append); diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c new file mode 100644 index 000000000000..54caf0687155 --- /dev/null +++ b/net/ceph/pagevec.c @@ -0,0 +1,223 @@ +#include <linux/ceph/ceph_debug.h> + +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/file.h> +#include <linux/namei.h> +#include <linux/writeback.h> + +#include <linux/ceph/libceph.h> + +/* + * build a vector of user pages + */ +struct page **ceph_get_direct_page_vector(const char __user *data, + int num_pages, + loff_t off, size_t len) +{ + struct page **pages; + int rc; + + pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS); + if (!pages) + return ERR_PTR(-ENOMEM); + + down_read(¤t->mm->mmap_sem); + rc = get_user_pages(current, current->mm, (unsigned long)data, + num_pages, 0, 0, pages, NULL); + up_read(¤t->mm->mmap_sem); + if (rc < 0) + goto fail; + return pages; + +fail: + kfree(pages); + return ERR_PTR(rc); +} +EXPORT_SYMBOL(ceph_get_direct_page_vector); + +void ceph_put_page_vector(struct page **pages, int num_pages) +{ + int i; + + for (i = 0; i < num_pages; i++) + put_page(pages[i]); + kfree(pages); +} +EXPORT_SYMBOL(ceph_put_page_vector); + +void ceph_release_page_vector(struct page **pages, int num_pages) +{ + int i; + + for (i = 0; i < num_pages; i++) + __free_pages(pages[i], 0); + kfree(pages); +} +EXPORT_SYMBOL(ceph_release_page_vector); + +/* + * allocate a vector new pages + */ +struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags) +{ + struct page **pages; + int i; + + pages = kmalloc(sizeof(*pages) * num_pages, flags); + if (!pages) + return ERR_PTR(-ENOMEM); + for (i = 0; i < num_pages; i++) { + pages[i] = __page_cache_alloc(flags); + if (pages[i] == NULL) { + ceph_release_page_vector(pages, i); + return ERR_PTR(-ENOMEM); + } + } + return pages; +} +EXPORT_SYMBOL(ceph_alloc_page_vector); + +/* + * copy user data into a page vector + */ +int ceph_copy_user_to_page_vector(struct page **pages, + const char __user *data, + loff_t off, size_t len) +{ + int i = 0; + int po = off & ~PAGE_CACHE_MASK; + int left = len; + int l, bad; + + while (left > 0) { + l = min_t(int, PAGE_CACHE_SIZE-po, left); + bad = copy_from_user(page_address(pages[i]) + po, data, l); + if (bad == l) + return -EFAULT; + data += l - bad; + left -= l - bad; + po += l - bad; + if (po == PAGE_CACHE_SIZE) { + po = 0; + i++; + } + } + return len; +} +EXPORT_SYMBOL(ceph_copy_user_to_page_vector); + +int ceph_copy_to_page_vector(struct page **pages, + const char *data, + loff_t off, size_t len) +{ + int i = 0; + size_t po = off & ~PAGE_CACHE_MASK; + size_t left = len; + size_t l; + + while (left > 0) { + l = min_t(size_t, PAGE_CACHE_SIZE-po, left); + memcpy(page_address(pages[i]) + po, data, l); + data += l; + left -= l; + po += l; + if (po == PAGE_CACHE_SIZE) { + po = 0; + i++; + } + } + return len; +} +EXPORT_SYMBOL(ceph_copy_to_page_vector); + +int ceph_copy_from_page_vector(struct page **pages, + char *data, + loff_t off, size_t len) +{ + int i = 0; + size_t po = off & ~PAGE_CACHE_MASK; + size_t left = len; + size_t l; + + while (left > 0) { + l = min_t(size_t, PAGE_CACHE_SIZE-po, left); + memcpy(data, page_address(pages[i]) + po, l); + data += l; + left -= l; + po += l; + if (po == PAGE_CACHE_SIZE) { + po = 0; + i++; + } + } + return len; +} +EXPORT_SYMBOL(ceph_copy_from_page_vector); + +/* + * copy user data from a page vector into a user pointer + */ +int ceph_copy_page_vector_to_user(struct page **pages, + char __user *data, + loff_t off, size_t len) +{ + int i = 0; + int po = off & ~PAGE_CACHE_MASK; + int left = len; + int l, bad; + + while (left > 0) { + l = min_t(int, left, PAGE_CACHE_SIZE-po); + bad = copy_to_user(data, page_address(pages[i]) + po, l); + if (bad == l) + return -EFAULT; + data += l - bad; + left -= l - bad; + if (po) { + po += l - bad; + if (po == PAGE_CACHE_SIZE) + po = 0; + } + i++; + } + return len; +} +EXPORT_SYMBOL(ceph_copy_page_vector_to_user); + +/* + * Zero an extent within a page vector. Offset is relative to the + * start of the first page. + */ +void ceph_zero_page_vector_range(int off, int len, struct page **pages) +{ + int i = off >> PAGE_CACHE_SHIFT; + + off &= ~PAGE_CACHE_MASK; + + dout("zero_page_vector_page %u~%u\n", off, len); + + /* leading partial page? */ + if (off) { + int end = min((int)PAGE_CACHE_SIZE, off + len); + dout("zeroing %d %p head from %d\n", i, pages[i], + (int)off); + zero_user_segment(pages[i], off, end); + len -= (end - off); + i++; + } + while (len >= PAGE_CACHE_SIZE) { + dout("zeroing %d %p len=%d\n", i, pages[i], len); + zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE); + len -= PAGE_CACHE_SIZE; + i++; + } + /* trailing partial page? */ + if (len) { + dout("zeroing %d %p tail to %d\n", i, pages[i], (int)len); + zero_user_segment(pages[i], 0, len); + } +} +EXPORT_SYMBOL(ceph_zero_page_vector_range); + |