summaryrefslogtreecommitdiffstats
path: root/net/ceph
diff options
context:
space:
mode:
Diffstat (limited to 'net/ceph')
-rw-r--r--net/ceph/auth_x.c76
-rw-r--r--net/ceph/auth_x.h1
-rw-r--r--net/ceph/buffer.c4
-rw-r--r--net/ceph/ceph_common.c74
-rw-r--r--net/ceph/ceph_strings.c14
-rw-r--r--net/ceph/crush/crush.c14
-rw-r--r--net/ceph/crush/crush_ln_table.h166
-rw-r--r--net/ceph/crush/mapper.c118
-rw-r--r--net/ceph/debugfs.c26
-rw-r--r--net/ceph/messenger.c80
-rw-r--r--net/ceph/mon_client.c139
-rw-r--r--net/ceph/osd_client.c182
-rw-r--r--net/ceph/osdmap.c25
-rw-r--r--net/ceph/pagevec.c6
14 files changed, 671 insertions, 254 deletions
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index 7e38b729696a..ba6eb17226da 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -8,6 +8,7 @@
#include <linux/ceph/decode.h>
#include <linux/ceph/auth.h>
+#include <linux/ceph/messenger.h>
#include "crypto.h"
#include "auth_x.h"
@@ -293,6 +294,11 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
dout("build_authorizer for %s %p\n",
ceph_entity_type_name(th->service), au);
+ ceph_crypto_key_destroy(&au->session_key);
+ ret = ceph_crypto_key_clone(&au->session_key, &th->session_key);
+ if (ret)
+ return ret;
+
maxlen = sizeof(*msg_a) + sizeof(msg_b) +
ceph_x_encrypt_buflen(ticket_blob_len);
dout(" need len %d\n", maxlen);
@@ -302,8 +308,10 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
}
if (!au->buf) {
au->buf = ceph_buffer_new(maxlen, GFP_NOFS);
- if (!au->buf)
+ if (!au->buf) {
+ ceph_crypto_key_destroy(&au->session_key);
return -ENOMEM;
+ }
}
au->service = th->service;
au->secret_id = th->secret_id;
@@ -329,7 +337,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
get_random_bytes(&au->nonce, sizeof(au->nonce));
msg_b.struct_v = 1;
msg_b.nonce = cpu_to_le64(au->nonce);
- ret = ceph_x_encrypt(&th->session_key, &msg_b, sizeof(msg_b),
+ ret = ceph_x_encrypt(&au->session_key, &msg_b, sizeof(msg_b),
p, end - p);
if (ret < 0)
goto out_buf;
@@ -560,6 +568,8 @@ static int ceph_x_create_authorizer(
auth->authorizer_buf_len = au->buf->vec.iov_len;
auth->authorizer_reply_buf = au->reply_buf;
auth->authorizer_reply_buf_len = sizeof (au->reply_buf);
+ auth->sign_message = ac->ops->sign_message;
+ auth->check_message_signature = ac->ops->check_message_signature;
return 0;
}
@@ -588,17 +598,13 @@ static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
struct ceph_authorizer *a, size_t len)
{
struct ceph_x_authorizer *au = (void *)a;
- struct ceph_x_ticket_handler *th;
int ret = 0;
struct ceph_x_authorize_reply reply;
void *preply = &reply;
void *p = au->reply_buf;
void *end = p + sizeof(au->reply_buf);
- th = get_ticket_handler(ac, au->service);
- if (IS_ERR(th))
- return PTR_ERR(th);
- ret = ceph_x_decrypt(&th->session_key, &p, end, &preply, sizeof(reply));
+ ret = ceph_x_decrypt(&au->session_key, &p, end, &preply, sizeof(reply));
if (ret < 0)
return ret;
if (ret != sizeof(reply))
@@ -618,6 +624,7 @@ static void ceph_x_destroy_authorizer(struct ceph_auth_client *ac,
{
struct ceph_x_authorizer *au = (void *)a;
+ ceph_crypto_key_destroy(&au->session_key);
ceph_buffer_put(au->buf);
kfree(au);
}
@@ -663,6 +670,59 @@ static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
memset(&th->validity, 0, sizeof(th->validity));
}
+static int calcu_signature(struct ceph_x_authorizer *au,
+ struct ceph_msg *msg, __le64 *sig)
+{
+ int ret;
+ char tmp_enc[40];
+ __le32 tmp[5] = {
+ cpu_to_le32(16), msg->hdr.crc, msg->footer.front_crc,
+ msg->footer.middle_crc, msg->footer.data_crc,
+ };
+ ret = ceph_x_encrypt(&au->session_key, &tmp, sizeof(tmp),
+ tmp_enc, sizeof(tmp_enc));
+ if (ret < 0)
+ return ret;
+ *sig = *(__le64*)(tmp_enc + 4);
+ return 0;
+}
+
+static int ceph_x_sign_message(struct ceph_auth_handshake *auth,
+ struct ceph_msg *msg)
+{
+ int ret;
+ if (!auth->authorizer)
+ return 0;
+ ret = calcu_signature((struct ceph_x_authorizer *)auth->authorizer,
+ msg, &msg->footer.sig);
+ if (ret < 0)
+ return ret;
+ msg->footer.flags |= CEPH_MSG_FOOTER_SIGNED;
+ return 0;
+}
+
+static int ceph_x_check_message_signature(struct ceph_auth_handshake *auth,
+ struct ceph_msg *msg)
+{
+ __le64 sig_check;
+ int ret;
+
+ if (!auth->authorizer)
+ return 0;
+ ret = calcu_signature((struct ceph_x_authorizer *)auth->authorizer,
+ msg, &sig_check);
+ if (ret < 0)
+ return ret;
+ if (sig_check == msg->footer.sig)
+ return 0;
+ if (msg->footer.flags & CEPH_MSG_FOOTER_SIGNED)
+ dout("ceph_x_check_message_signature %p has signature %llx "
+ "expect %llx\n", msg, msg->footer.sig, sig_check);
+ else
+ dout("ceph_x_check_message_signature %p sender did not set "
+ "CEPH_MSG_FOOTER_SIGNED\n", msg);
+ return -EBADMSG;
+}
static const struct ceph_auth_client_ops ceph_x_ops = {
.name = "x",
@@ -677,6 +737,8 @@ static const struct ceph_auth_client_ops ceph_x_ops = {
.invalidate_authorizer = ceph_x_invalidate_authorizer,
.reset = ceph_x_reset,
.destroy = ceph_x_destroy,
+ .sign_message = ceph_x_sign_message,
+ .check_message_signature = ceph_x_check_message_signature,
};
diff --git a/net/ceph/auth_x.h b/net/ceph/auth_x.h
index 65ee72082d99..e8b7c6917d47 100644
--- a/net/ceph/auth_x.h
+++ b/net/ceph/auth_x.h
@@ -26,6 +26,7 @@ struct ceph_x_ticket_handler {
struct ceph_x_authorizer {
+ struct ceph_crypto_key session_key;
struct ceph_buffer *buf;
unsigned int service;
u64 nonce;
diff --git a/net/ceph/buffer.c b/net/ceph/buffer.c
index 621b5f65407f..add5f921a0ff 100644
--- a/net/ceph/buffer.c
+++ b/net/ceph/buffer.c
@@ -6,7 +6,7 @@
#include <linux/ceph/buffer.h>
#include <linux/ceph/decode.h>
-#include <linux/ceph/libceph.h> /* for ceph_kv{malloc,free} */
+#include <linux/ceph/libceph.h> /* for ceph_kvmalloc */
struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp)
{
@@ -35,7 +35,7 @@ void ceph_buffer_release(struct kref *kref)
struct ceph_buffer *b = container_of(kref, struct ceph_buffer, kref);
dout("buffer_release %p\n", b);
- ceph_kvfree(b->vec.iov_base);
+ kvfree(b->vec.iov_base);
kfree(b);
}
EXPORT_SYMBOL(ceph_buffer_release);
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 58fbfe134f93..79e8f71aef5b 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -184,14 +184,6 @@ void *ceph_kvmalloc(size_t size, gfp_t flags)
return __vmalloc(size, flags | __GFP_HIGHMEM, PAGE_KERNEL);
}
-void ceph_kvfree(const void *ptr)
-{
- if (is_vmalloc_addr(ptr))
- vfree(ptr);
- else
- kfree(ptr);
-}
-
static int parse_fsid(const char *str, struct ceph_fsid *fsid)
{
@@ -245,6 +237,10 @@ enum {
Opt_noshare,
Opt_crc,
Opt_nocrc,
+ Opt_cephx_require_signatures,
+ Opt_nocephx_require_signatures,
+ Opt_tcp_nodelay,
+ Opt_notcp_nodelay,
};
static match_table_t opt_tokens = {
@@ -263,6 +259,10 @@ static match_table_t opt_tokens = {
{Opt_noshare, "noshare"},
{Opt_crc, "crc"},
{Opt_nocrc, "nocrc"},
+ {Opt_cephx_require_signatures, "cephx_require_signatures"},
+ {Opt_nocephx_require_signatures, "nocephx_require_signatures"},
+ {Opt_tcp_nodelay, "tcp_nodelay"},
+ {Opt_notcp_nodelay, "notcp_nodelay"},
{-1, NULL}
};
@@ -462,6 +462,20 @@ ceph_parse_options(char *options, const char *dev_name,
opt->flags |= CEPH_OPT_NOCRC;
break;
+ case Opt_cephx_require_signatures:
+ opt->flags &= ~CEPH_OPT_NOMSGAUTH;
+ break;
+ case Opt_nocephx_require_signatures:
+ opt->flags |= CEPH_OPT_NOMSGAUTH;
+ break;
+
+ case Opt_tcp_nodelay:
+ opt->flags |= CEPH_OPT_TCP_NODELAY;
+ break;
+ case Opt_notcp_nodelay:
+ opt->flags &= ~CEPH_OPT_TCP_NODELAY;
+ break;
+
default:
BUG_ON(token);
}
@@ -476,6 +490,43 @@ out:
}
EXPORT_SYMBOL(ceph_parse_options);
+int ceph_print_client_options(struct seq_file *m, struct ceph_client *client)
+{
+ struct ceph_options *opt = client->options;
+ size_t pos = m->count;
+
+ if (opt->name)
+ seq_printf(m, "name=%s,", opt->name);
+ if (opt->key)
+ seq_puts(m, "secret=<hidden>,");
+
+ if (opt->flags & CEPH_OPT_FSID)
+ seq_printf(m, "fsid=%pU,", &opt->fsid);
+ if (opt->flags & CEPH_OPT_NOSHARE)
+ seq_puts(m, "noshare,");
+ if (opt->flags & CEPH_OPT_NOCRC)
+ seq_puts(m, "nocrc,");
+ if (opt->flags & CEPH_OPT_NOMSGAUTH)
+ seq_puts(m, "nocephx_require_signatures,");
+ if ((opt->flags & CEPH_OPT_TCP_NODELAY) == 0)
+ seq_puts(m, "notcp_nodelay,");
+
+ if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
+ seq_printf(m, "mount_timeout=%d,", opt->mount_timeout);
+ if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
+ seq_printf(m, "osd_idle_ttl=%d,", opt->osd_idle_ttl);
+ if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
+ seq_printf(m, "osdkeepalivetimeout=%d,",
+ opt->osd_keepalive_timeout);
+
+ /* drop redundant comma */
+ if (m->count != pos)
+ m->count--;
+
+ return 0;
+}
+EXPORT_SYMBOL(ceph_print_client_options);
+
u64 ceph_client_id(struct ceph_client *client)
{
return client->monc.auth->global_id;
@@ -504,6 +555,9 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private,
init_waitqueue_head(&client->auth_wq);
client->auth_err = 0;
+ if (!ceph_test_opt(client, NOMSGAUTH))
+ required_features |= CEPH_FEATURE_MSG_AUTH;
+
client->extra_mon_dispatch = NULL;
client->supported_features = CEPH_FEATURES_SUPPORTED_DEFAULT |
supported_features;
@@ -513,10 +567,12 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private,
/* msgr */
if (ceph_test_opt(client, MYIP))
myaddr = &client->options->my_addr;
+
ceph_messenger_init(&client->msgr, myaddr,
client->supported_features,
client->required_features,
- ceph_test_opt(client, NOCRC));
+ ceph_test_opt(client, NOCRC),
+ ceph_test_opt(client, TCP_NODELAY));
/* subsystems */
err = ceph_monc_init(&client->monc, client);
diff --git a/net/ceph/ceph_strings.c b/net/ceph/ceph_strings.c
index 30560202f57b..139a9cb19b0c 100644
--- a/net/ceph/ceph_strings.c
+++ b/net/ceph/ceph_strings.c
@@ -42,17 +42,3 @@ const char *ceph_osd_state_name(int s)
return "???";
}
}
-
-const char *ceph_pool_op_name(int op)
-{
- switch (op) {
- case POOL_OP_CREATE: return "create";
- case POOL_OP_DELETE: return "delete";
- case POOL_OP_AUID_CHANGE: return "auid change";
- case POOL_OP_CREATE_SNAP: return "create snap";
- case POOL_OP_DELETE_SNAP: return "delete snap";
- case POOL_OP_CREATE_UNMANAGED_SNAP: return "create unmanaged snap";
- case POOL_OP_DELETE_UNMANAGED_SNAP: return "delete unmanaged snap";
- }
- return "???";
-}
diff --git a/net/ceph/crush/crush.c b/net/ceph/crush/crush.c
index 16bc199d9a62..9d84ce4ea0df 100644
--- a/net/ceph/crush/crush.c
+++ b/net/ceph/crush/crush.c
@@ -17,6 +17,7 @@ const char *crush_bucket_alg_name(int alg)
case CRUSH_BUCKET_LIST: return "list";
case CRUSH_BUCKET_TREE: return "tree";
case CRUSH_BUCKET_STRAW: return "straw";
+ case CRUSH_BUCKET_STRAW2: return "straw2";
default: return "unknown";
}
}
@@ -40,6 +41,8 @@ int crush_get_bucket_item_weight(const struct crush_bucket *b, int p)
return ((struct crush_bucket_tree *)b)->node_weights[crush_calc_tree_node(p)];
case CRUSH_BUCKET_STRAW:
return ((struct crush_bucket_straw *)b)->item_weights[p];
+ case CRUSH_BUCKET_STRAW2:
+ return ((struct crush_bucket_straw2 *)b)->item_weights[p];
}
return 0;
}
@@ -77,6 +80,14 @@ void crush_destroy_bucket_straw(struct crush_bucket_straw *b)
kfree(b);
}
+void crush_destroy_bucket_straw2(struct crush_bucket_straw2 *b)
+{
+ kfree(b->item_weights);
+ kfree(b->h.perm);
+ kfree(b->h.items);
+ kfree(b);
+}
+
void crush_destroy_bucket(struct crush_bucket *b)
{
switch (b->alg) {
@@ -92,6 +103,9 @@ void crush_destroy_bucket(struct crush_bucket *b)
case CRUSH_BUCKET_STRAW:
crush_destroy_bucket_straw((struct crush_bucket_straw *)b);
break;
+ case CRUSH_BUCKET_STRAW2:
+ crush_destroy_bucket_straw2((struct crush_bucket_straw2 *)b);
+ break;
}
}
diff --git a/net/ceph/crush/crush_ln_table.h b/net/ceph/crush/crush_ln_table.h
new file mode 100644
index 000000000000..6192c7fc958c
--- /dev/null
+++ b/net/ceph/crush/crush_ln_table.h
@@ -0,0 +1,166 @@
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2015 Intel Corporation All Rights Reserved
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#if defined(__linux__)
+#include <linux/types.h>
+#elif defined(__FreeBSD__)
+#include <sys/types.h>
+#endif
+
+#ifndef CEPH_CRUSH_LN_H
+#define CEPH_CRUSH_LN_H
+
+
+// RH_LH_tbl[2*k] = 2^48/(1.0+k/128.0)
+// RH_LH_tbl[2*k+1] = 2^48*log2(1.0+k/128.0)
+
+static int64_t __RH_LH_tbl[128*2+2] = {
+ 0x0001000000000000ll, 0x0000000000000000ll, 0x0000fe03f80fe040ll, 0x000002dfca16dde1ll,
+ 0x0000fc0fc0fc0fc1ll, 0x000005b9e5a170b4ll, 0x0000fa232cf25214ll, 0x0000088e68ea899all,
+ 0x0000f83e0f83e0f9ll, 0x00000b5d69bac77ell, 0x0000f6603d980f67ll, 0x00000e26fd5c8555ll,
+ 0x0000f4898d5f85bcll, 0x000010eb389fa29fll, 0x0000f2b9d6480f2cll, 0x000013aa2fdd27f1ll,
+ 0x0000f0f0f0f0f0f1ll, 0x00001663f6fac913ll, 0x0000ef2eb71fc435ll, 0x00001918a16e4633ll,
+ 0x0000ed7303b5cc0fll, 0x00001bc84240adabll, 0x0000ebbdb2a5c162ll, 0x00001e72ec117fa5ll,
+ 0x0000ea0ea0ea0ea1ll, 0x00002118b119b4f3ll, 0x0000e865ac7b7604ll, 0x000023b9a32eaa56ll,
+ 0x0000e6c2b4481cd9ll, 0x00002655d3c4f15cll, 0x0000e525982af70dll, 0x000028ed53f307eell,
+ 0x0000e38e38e38e39ll, 0x00002b803473f7adll, 0x0000e1fc780e1fc8ll, 0x00002e0e85a9de04ll,
+ 0x0000e070381c0e08ll, 0x0000309857a05e07ll, 0x0000dee95c4ca038ll, 0x0000331dba0efce1ll,
+ 0x0000dd67c8a60dd7ll, 0x0000359ebc5b69d9ll, 0x0000dbeb61eed19dll, 0x0000381b6d9bb29bll,
+ 0x0000da740da740dbll, 0x00003a93dc9864b2ll, 0x0000d901b2036407ll, 0x00003d0817ce9cd4ll,
+ 0x0000d79435e50d7all, 0x00003f782d7204d0ll, 0x0000d62b80d62b81ll, 0x000041e42b6ec0c0ll,
+ 0x0000d4c77b03531ell, 0x0000444c1f6b4c2dll, 0x0000d3680d3680d4ll, 0x000046b016ca47c1ll,
+ 0x0000d20d20d20d21ll, 0x000049101eac381cll, 0x0000d0b69fcbd259ll, 0x00004b6c43f1366all,
+ 0x0000cf6474a8819fll, 0x00004dc4933a9337ll, 0x0000ce168a772509ll, 0x0000501918ec6c11ll,
+ 0x0000cccccccccccdll, 0x00005269e12f346ell, 0x0000cb8727c065c4ll, 0x000054b6f7f1325all,
+ 0x0000ca4587e6b750ll, 0x0000570068e7ef5all, 0x0000c907da4e8712ll, 0x000059463f919deell,
+ 0x0000c7ce0c7ce0c8ll, 0x00005b8887367433ll, 0x0000c6980c6980c7ll, 0x00005dc74ae9fbecll,
+ 0x0000c565c87b5f9ell, 0x00006002958c5871ll, 0x0000c4372f855d83ll, 0x0000623a71cb82c8ll,
+ 0x0000c30c30c30c31ll, 0x0000646eea247c5cll, 0x0000c1e4bbd595f7ll, 0x000066a008e4788cll,
+ 0x0000c0c0c0c0c0c1ll, 0x000068cdd829fd81ll, 0x0000bfa02fe80bfbll, 0x00006af861e5fc7dll,
+ 0x0000be82fa0be830ll, 0x00006d1fafdce20all, 0x0000bd6910470767ll, 0x00006f43cba79e40ll,
+ 0x0000bc52640bc527ll, 0x00007164beb4a56dll, 0x0000bb3ee721a54ell, 0x000073829248e961ll,
+ 0x0000ba2e8ba2e8bbll, 0x0000759d4f80cba8ll, 0x0000b92143fa36f6ll, 0x000077b4ff5108d9ll,
+ 0x0000b81702e05c0cll, 0x000079c9aa879d53ll, 0x0000b70fbb5a19bfll, 0x00007bdb59cca388ll,
+ 0x0000b60b60b60b61ll, 0x00007dea15a32c1bll, 0x0000b509e68a9b95ll, 0x00007ff5e66a0ffell,
+ 0x0000b40b40b40b41ll, 0x000081fed45cbccbll, 0x0000b30f63528918ll, 0x00008404e793fb81ll,
+ 0x0000b21642c8590cll, 0x000086082806b1d5ll, 0x0000b11fd3b80b12ll, 0x000088089d8a9e47ll,
+ 0x0000b02c0b02c0b1ll, 0x00008a064fd50f2all, 0x0000af3addc680b0ll, 0x00008c01467b94bbll,
+ 0x0000ae4c415c9883ll, 0x00008df988f4ae80ll, 0x0000ad602b580ad7ll, 0x00008fef1e987409ll,
+ 0x0000ac7691840ac8ll, 0x000091e20ea1393ell, 0x0000ab8f69e2835all, 0x000093d2602c2e5fll,
+ 0x0000aaaaaaaaaaabll, 0x000095c01a39fbd6ll, 0x0000a9c84a47a080ll, 0x000097ab43af59f9ll,
+ 0x0000a8e83f5717c1ll, 0x00009993e355a4e5ll, 0x0000a80a80a80a81ll, 0x00009b79ffdb6c8bll,
+ 0x0000a72f0539782all, 0x00009d5d9fd5010bll, 0x0000a655c4392d7cll, 0x00009f3ec9bcfb80ll,
+ 0x0000a57eb50295fbll, 0x0000a11d83f4c355ll, 0x0000a4a9cf1d9684ll, 0x0000a2f9d4c51039ll,
+ 0x0000a3d70a3d70a4ll, 0x0000a4d3c25e68dcll, 0x0000a3065e3fae7dll, 0x0000a6ab52d99e76ll,
+ 0x0000a237c32b16d0ll, 0x0000a8808c384547ll, 0x0000a16b312ea8fdll, 0x0000aa5374652a1cll,
+ 0x0000a0a0a0a0a0a1ll, 0x0000ac241134c4e9ll, 0x00009fd809fd80a0ll, 0x0000adf26865a8a1ll,
+ 0x00009f1165e72549ll, 0x0000afbe7fa0f04dll, 0x00009e4cad23dd60ll, 0x0000b1885c7aa982ll,
+ 0x00009d89d89d89d9ll, 0x0000b35004723c46ll, 0x00009cc8e160c3fcll, 0x0000b5157cf2d078ll,
+ 0x00009c09c09c09c1ll, 0x0000b6d8cb53b0call, 0x00009b4c6f9ef03bll, 0x0000b899f4d8ab63ll,
+ 0x00009a90e7d95bc7ll, 0x0000ba58feb2703all, 0x000099d722dabde6ll, 0x0000bc15edfeed32ll,
+ 0x0000991f1a515886ll, 0x0000bdd0c7c9a817ll, 0x00009868c809868dll, 0x0000bf89910c1678ll,
+ 0x000097b425ed097cll, 0x0000c1404eadf383ll, 0x000097012e025c05ll, 0x0000c2f5058593d9ll,
+ 0x0000964fda6c0965ll, 0x0000c4a7ba58377cll, 0x000095a02568095bll, 0x0000c65871da59ddll,
+ 0x000094f2094f2095ll, 0x0000c80730b00016ll, 0x0000944580944581ll, 0x0000c9b3fb6d0559ll,
+ 0x0000939a85c4093all, 0x0000cb5ed69565afll, 0x000092f113840498ll, 0x0000cd07c69d8702ll,
+ 0x0000924924924925ll, 0x0000ceaecfea8085ll, 0x000091a2b3c4d5e7ll, 0x0000d053f6d26089ll,
+ 0x000090fdbc090fdcll, 0x0000d1f73f9c70c0ll, 0x0000905a38633e07ll, 0x0000d398ae817906ll,
+ 0x00008fb823ee08fcll, 0x0000d53847ac00a6ll, 0x00008f1779d9fdc4ll, 0x0000d6d60f388e41ll,
+ 0x00008e78356d1409ll, 0x0000d8720935e643ll, 0x00008dda5202376all, 0x0000da0c39a54804ll,
+ 0x00008d3dcb08d3ddll, 0x0000dba4a47aa996ll, 0x00008ca29c046515ll, 0x0000dd3b4d9cf24bll,
+ 0x00008c08c08c08c1ll, 0x0000ded038e633f3ll, 0x00008b70344a139cll, 0x0000e0636a23e2eell,
+ 0x00008ad8f2fba939ll, 0x0000e1f4e5170d02ll, 0x00008a42f870566all, 0x0000e384ad748f0ell,
+ 0x000089ae4089ae41ll, 0x0000e512c6e54998ll, 0x0000891ac73ae982ll, 0x0000e69f35065448ll,
+ 0x0000888888888889ll, 0x0000e829fb693044ll, 0x000087f78087f781ll, 0x0000e9b31d93f98ell,
+ 0x00008767ab5f34e5ll, 0x0000eb3a9f019750ll, 0x000086d905447a35ll, 0x0000ecc08321eb30ll,
+ 0x0000864b8a7de6d2ll, 0x0000ee44cd59ffabll, 0x000085bf37612cefll, 0x0000efc781043579ll,
+ 0x0000853408534086ll, 0x0000f148a170700all, 0x000084a9f9c8084bll, 0x0000f2c831e44116ll,
+ 0x0000842108421085ll, 0x0000f446359b1353ll, 0x0000839930523fbfll, 0x0000f5c2afc65447ll,
+ 0x000083126e978d50ll, 0x0000f73da38d9d4all, 0x0000828cbfbeb9a1ll, 0x0000f8b7140edbb1ll,
+ 0x0000820820820821ll, 0x0000fa2f045e7832ll, 0x000081848da8faf1ll, 0x0000fba577877d7dll,
+ 0x0000810204081021ll, 0x0000fd1a708bbe11ll, 0x0000808080808081ll, 0x0000fe8df263f957ll,
+ 0x0000800000000000ll, 0x0000ffff00000000ll,
+ };
+
+
+ // LL_tbl[k] = 2^48*log2(1.0+k/2^15);
+static int64_t __LL_tbl[256] = {
+ 0x0000000000000000ull, 0x00000002e2a60a00ull, 0x000000070cb64ec5ull, 0x00000009ef50ce67ull,
+ 0x0000000cd1e588fdull, 0x0000000fb4747e9cull, 0x0000001296fdaf5eull, 0x0000001579811b58ull,
+ 0x000000185bfec2a1ull, 0x0000001b3e76a552ull, 0x0000001e20e8c380ull, 0x0000002103551d43ull,
+ 0x00000023e5bbb2b2ull, 0x00000026c81c83e4ull, 0x00000029aa7790f0ull, 0x0000002c8cccd9edull,
+ 0x0000002f6f1c5ef2ull, 0x0000003251662017ull, 0x0000003533aa1d71ull, 0x0000003815e8571aull,
+ 0x0000003af820cd26ull, 0x0000003dda537faeull, 0x00000040bc806ec8ull, 0x000000439ea79a8cull,
+ 0x0000004680c90310ull, 0x0000004962e4a86cull, 0x0000004c44fa8ab6ull, 0x0000004f270aaa06ull,
+ 0x0000005209150672ull, 0x00000054eb19a013ull, 0x00000057cd1876fdull, 0x0000005aaf118b4aull,
+ 0x0000005d9104dd0full, 0x0000006072f26c64ull, 0x0000006354da3960ull, 0x0000006636bc441aull,
+ 0x0000006918988ca8ull, 0x0000006bfa6f1322ull, 0x0000006edc3fd79full, 0x00000071be0ada35ull,
+ 0x000000749fd01afdull, 0x00000077818f9a0cull, 0x0000007a6349577aull, 0x0000007d44fd535eull,
+ 0x0000008026ab8dceull, 0x00000083085406e3ull, 0x00000085e9f6beb2ull, 0x00000088cb93b552ull,
+ 0x0000008bad2aeadcull, 0x0000008e8ebc5f65ull, 0x0000009170481305ull, 0x0000009451ce05d3ull,
+ 0x00000097334e37e5ull, 0x0000009a14c8a953ull, 0x0000009cf63d5a33ull, 0x0000009fd7ac4a9dull,
+ 0x000000a2b07f3458ull, 0x000000a59a78ea6aull, 0x000000a87bd699fbull, 0x000000ab5d2e8970ull,
+ 0x000000ae3e80b8e3ull, 0x000000b11fcd2869ull, 0x000000b40113d818ull, 0x000000b6e254c80aull,
+ 0x000000b9c38ff853ull, 0x000000bca4c5690cull, 0x000000bf85f51a4aull, 0x000000c2671f0c26ull,
+ 0x000000c548433eb6ull, 0x000000c82961b211ull, 0x000000cb0a7a664dull, 0x000000cdeb8d5b82ull,
+ 0x000000d0cc9a91c8ull, 0x000000d3ada20933ull, 0x000000d68ea3c1ddull, 0x000000d96f9fbbdbull,
+ 0x000000dc5095f744ull, 0x000000df31867430ull, 0x000000e2127132b5ull, 0x000000e4f35632eaull,
+ 0x000000e7d43574e6ull, 0x000000eab50ef8c1ull, 0x000000ed95e2be90ull, 0x000000f076b0c66cull,
+ 0x000000f35779106aull, 0x000000f6383b9ca2ull, 0x000000f918f86b2aull, 0x000000fbf9af7c1aull,
+ 0x000000feda60cf88ull, 0x00000101bb0c658cull, 0x000001049bb23e3cull, 0x000001077c5259afull,
+ 0x0000010a5cecb7fcull, 0x0000010d3d81593aull, 0x000001101e103d7full, 0x00000112fe9964e4ull,
+ 0x00000115df1ccf7eull, 0x00000118bf9a7d64ull, 0x0000011ba0126eadull, 0x0000011e8084a371ull,
+ 0x0000012160f11bc6ull, 0x000001244157d7c3ull, 0x0000012721b8d77full, 0x0000012a02141b10ull,
+ 0x0000012ce269a28eull, 0x0000012fc2b96e0full, 0x00000132a3037daaull, 0x000001358347d177ull,
+ 0x000001386386698cull, 0x0000013b43bf45ffull, 0x0000013e23f266e9ull, 0x00000141041fcc5eull,
+ 0x00000143e4477678ull, 0x00000146c469654bull, 0x00000149a48598f0ull, 0x0000014c849c117cull,
+ 0x0000014f64accf08ull, 0x0000015244b7d1a9ull, 0x0000015524bd1976ull, 0x0000015804bca687ull,
+ 0x0000015ae4b678f2ull, 0x0000015dc4aa90ceull, 0x00000160a498ee31ull, 0x0000016384819134ull,
+ 0x00000166646479ecull, 0x000001694441a870ull, 0x0000016c24191cd7ull, 0x0000016df6ca19bdull,
+ 0x00000171e3b6d7aaull, 0x00000174c37d1e44ull, 0x00000177a33dab1cull, 0x0000017a82f87e49ull,
+ 0x0000017d62ad97e2ull, 0x00000180425cf7feull, 0x00000182b07f3458ull, 0x0000018601aa8c19ull,
+ 0x00000188e148c046ull, 0x0000018bc0e13b52ull, 0x0000018ea073fd52ull, 0x000001918001065dull,
+ 0x000001945f88568bull, 0x000001973f09edf2ull, 0x0000019a1e85ccaaull, 0x0000019cfdfbf2c8ull,
+ 0x0000019fdd6c6063ull, 0x000001a2bcd71593ull, 0x000001a59c3c126eull, 0x000001a87b9b570bull,
+ 0x000001ab5af4e380ull, 0x000001ae3a48b7e5ull, 0x000001b11996d450ull, 0x000001b3f8df38d9ull,
+ 0x000001b6d821e595ull, 0x000001b9b75eda9bull, 0x000001bc96961803ull, 0x000001bf75c79de3ull,
+ 0x000001c254f36c51ull, 0x000001c534198365ull, 0x000001c81339e336ull, 0x000001caf2548bd9ull,
+ 0x000001cdd1697d67ull, 0x000001d0b078b7f5ull, 0x000001d38f823b9aull, 0x000001d66e86086dull,
+ 0x000001d94d841e86ull, 0x000001dc2c7c7df9ull, 0x000001df0b6f26dfull, 0x000001e1ea5c194eull,
+ 0x000001e4c943555dull, 0x000001e7a824db23ull, 0x000001ea8700aab5ull, 0x000001ed65d6c42bull,
+ 0x000001f044a7279dull, 0x000001f32371d51full, 0x000001f60236cccaull, 0x000001f8e0f60eb3ull,
+ 0x000001fbbfaf9af3ull, 0x000001fe9e63719eull, 0x000002017d1192ccull, 0x000002045bb9fe94ull,
+ 0x000002073a5cb50dull, 0x00000209c06e6212ull, 0x0000020cf791026aull, 0x0000020fd622997cull,
+ 0x00000212b07f3458ull, 0x000002159334a8d8ull, 0x0000021871b52150ull, 0x0000021b502fe517ull,
+ 0x0000021d6a73a78full, 0x000002210d144eeeull, 0x00000223eb7df52cull, 0x00000226c9e1e713ull,
+ 0x00000229a84024bbull, 0x0000022c23679b4eull, 0x0000022f64eb83a8ull, 0x000002324338a51bull,
+ 0x00000235218012a9ull, 0x00000237ffc1cc69ull, 0x0000023a2c3b0ea4ull, 0x0000023d13ee805bull,
+ 0x0000024035e9221full, 0x00000243788faf25ull, 0x0000024656b4e735ull, 0x00000247ed646bfeull,
+ 0x0000024c12ee3d98ull, 0x0000024ef1025c1aull, 0x00000251cf10c799ull, 0x0000025492644d65ull,
+ 0x000002578b1c85eeull, 0x0000025a6919d8f0ull, 0x0000025d13ee805bull, 0x0000026025036716ull,
+ 0x0000026296453882ull, 0x00000265e0d62b53ull, 0x00000268beb701f3ull, 0x0000026b9c92265eull,
+ 0x0000026d32f798a9ull, 0x00000271583758ebull, 0x000002743601673bull, 0x0000027713c5c3b0ull,
+ 0x00000279f1846e5full, 0x0000027ccf3d6761ull, 0x0000027e6580aecbull, 0x000002828a9e44b3ull,
+ 0x0000028568462932ull, 0x00000287bdbf5255ull, 0x0000028b2384de4aull, 0x0000028d13ee805bull,
+ 0x0000029035e9221full, 0x0000029296453882ull, 0x0000029699bdfb61ull, 0x0000029902a37aabull,
+ 0x0000029c54b864c9ull, 0x0000029deabd1083ull, 0x000002a20f9c0bb5ull, 0x000002a4c7605d61ull,
+ 0x000002a7bdbf5255ull, 0x000002a96056dafcull, 0x000002ac3daf14efull, 0x000002af1b019ecaull,
+ 0x000002b296453882ull, 0x000002b5d022d80full, 0x000002b8fa471cb3ull, 0x000002ba9012e713ull,
+ 0x000002bd6d4901ccull, 0x000002c04a796cf6ull, 0x000002c327a428a6ull, 0x000002c61a5e8f4cull,
+ 0x000002c8e1e891f6ull, 0x000002cbbf023fc2ull, 0x000002ce9c163e6eull, 0x000002d179248e13ull,
+ 0x000002d4562d2ec6ull, 0x000002d73330209dull, 0x000002da102d63b0ull, 0x000002dced24f814ull,
+};
+
+
+
+
+#endif
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index a1ef53c04415..5b47736d27d9 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -20,7 +20,7 @@
#include <linux/crush/crush.h>
#include <linux/crush/hash.h>
-#include <linux/crush/mapper.h>
+#include "crush_ln_table.h"
/*
* Implement the core CRUSH mapping algorithm.
@@ -238,6 +238,102 @@ static int bucket_straw_choose(struct crush_bucket_straw *bucket,
return bucket->h.items[high];
}
+// compute 2^44*log2(input+1)
+uint64_t crush_ln(unsigned xin)
+{
+ unsigned x=xin, x1;
+ int iexpon, index1, index2;
+ uint64_t RH, LH, LL, xl64, result;
+
+ x++;
+
+ // normalize input
+ iexpon = 15;
+ while(!(x&0x18000)) { x<<=1; iexpon--; }
+
+ index1 = (x>>8)<<1;
+ // RH ~ 2^56/index1
+ RH = __RH_LH_tbl[index1 - 256];
+ // LH ~ 2^48 * log2(index1/256)
+ LH = __RH_LH_tbl[index1 + 1 - 256];
+
+ // RH*x ~ 2^48 * (2^15 + xf), xf<2^8
+ xl64 = (int64_t)x * RH;
+ xl64 >>= 48;
+ x1 = xl64;
+
+ result = iexpon;
+ result <<= (12 + 32);
+
+ index2 = x1 & 0xff;
+ // LL ~ 2^48*log2(1.0+index2/2^15)
+ LL = __LL_tbl[index2];
+
+ LH = LH + LL;
+
+ LH >>= (48-12 - 32);
+ result += LH;
+
+ return result;
+}
+
+
+/*
+ * straw2
+ *
+ * for reference, see:
+ *
+ * http://en.wikipedia.org/wiki/Exponential_distribution#Distribution_of_the_minimum_of_exponential_random_variables
+ *
+ */
+
+static int bucket_straw2_choose(struct crush_bucket_straw2 *bucket,
+ int x, int r)
+{
+ unsigned i, high = 0;
+ unsigned u;
+ unsigned w;
+ __s64 ln, draw, high_draw = 0;
+
+ for (i = 0; i < bucket->h.size; i++) {
+ w = bucket->item_weights[i];
+ if (w) {
+ u = crush_hash32_3(bucket->h.hash, x,
+ bucket->h.items[i], r);
+ u &= 0xffff;
+
+ /*
+ * for some reason slightly less than 0x10000 produces
+ * a slightly more accurate distribution... probably a
+ * rounding effect.
+ *
+ * the natural log lookup table maps [0,0xffff]
+ * (corresponding to real numbers [1/0x10000, 1] to
+ * [0, 0xffffffffffff] (corresponding to real numbers
+ * [-11.090355,0]).
+ */
+ ln = crush_ln(u) - 0x1000000000000ll;
+
+ /*
+ * divide by 16.16 fixed-point weight. note
+ * that the ln value is negative, so a larger
+ * weight means a larger (less negative) value
+ * for draw.
+ */
+ draw = div64_s64(ln, w);
+ } else {
+ draw = S64_MIN;
+ }
+
+ if (i == 0 || draw > high_draw) {
+ high = i;
+ high_draw = draw;
+ }
+ }
+ return bucket->h.items[high];
+}
+
+
static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
{
dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);
@@ -255,12 +351,16 @@ static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
case CRUSH_BUCKET_STRAW:
return bucket_straw_choose((struct crush_bucket_straw *)in,
x, r);
+ case CRUSH_BUCKET_STRAW2:
+ return bucket_straw2_choose((struct crush_bucket_straw2 *)in,
+ x, r);
default:
dprintk("unknown bucket %d alg %d\n", in->id, in->alg);
return in->items[0];
}
}
+
/*
* true if device is marked "out" (failed, fully offloaded)
* of the cluster
@@ -290,6 +390,7 @@ static int is_out(const struct crush_map *map,
* @type: the type of item to choose
* @out: pointer to output vector
* @outpos: our position in that vector
+ * @out_size: size of the out vector
* @tries: number of attempts to make
* @recurse_tries: number of attempts to have recursive chooseleaf make
* @local_retries: localized retries
@@ -304,6 +405,7 @@ static int crush_choose_firstn(const struct crush_map *map,
const __u32 *weight, int weight_max,
int x, int numrep, int type,
int *out, int outpos,
+ int out_size,
unsigned int tries,
unsigned int recurse_tries,
unsigned int local_retries,
@@ -322,6 +424,7 @@ static int crush_choose_firstn(const struct crush_map *map,
int item = 0;
int itemtype;
int collide, reject;
+ int count = out_size;
dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d\n",
recurse_to_leaf ? "_LEAF" : "",
@@ -329,7 +432,7 @@ static int crush_choose_firstn(const struct crush_map *map,
tries, recurse_tries, local_retries, local_fallback_retries,
parent_r);
- for (rep = outpos; rep < numrep; rep++) {
+ for (rep = outpos; rep < numrep && count > 0 ; rep++) {
/* keep trying until we get a non-out, non-colliding item */
ftotal = 0;
skip_rep = 0;
@@ -403,7 +506,7 @@ static int crush_choose_firstn(const struct crush_map *map,
map->buckets[-1-item],
weight, weight_max,
x, outpos+1, 0,
- out2, outpos,
+ out2, outpos, count,
recurse_tries, 0,
local_retries,
local_fallback_retries,
@@ -463,6 +566,7 @@ reject:
dprintk("CHOOSE got %d\n", item);
out[outpos] = item;
outpos++;
+ count--;
}
dprintk("CHOOSE returns %d\n", outpos);
@@ -654,6 +758,7 @@ int crush_do_rule(const struct crush_map *map,
__u32 step;
int i, j;
int numrep;
+ int out_size;
/*
* the original choose_total_tries value was off by one (it
* counted "retries" and not "tries"). add one.
@@ -761,6 +866,7 @@ int crush_do_rule(const struct crush_map *map,
x, numrep,
curstep->arg2,
o+osize, j,
+ result_max-osize,
choose_tries,
recurse_tries,
choose_local_retries,
@@ -770,11 +876,13 @@ int crush_do_rule(const struct crush_map *map,
c+osize,
0);
} else {
+ out_size = ((numrep < (result_max-osize)) ?
+ numrep : (result_max-osize));
crush_choose_indep(
map,
map->buckets[-1-w[i]],
weight, weight_max,
- x, numrep, numrep,
+ x, out_size, numrep,
curstep->arg2,
o+osize, j,
choose_tries,
@@ -783,7 +891,7 @@ int crush_do_rule(const struct crush_map *map,
recurse_to_leaf,
c+osize,
0);
- osize += numrep;
+ osize += out_size;
}
}
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index d2d525529f87..593dc2eabcc8 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -22,6 +22,7 @@
* .../monmap - current monmap
* .../osdc - active osd requests
* .../monc - mon client state
+ * .../client_options - libceph-only (i.e. not rbd or cephfs) options
* .../dentry_lru - dump contents of dentry lru
* .../caps - expose cap (reservation) stats
* .../bdi - symlink to ../../bdi/something
@@ -127,8 +128,6 @@ static int monc_show(struct seq_file *s, void *p)
op = le16_to_cpu(req->request->hdr.type);
if (op == CEPH_MSG_STATFS)
seq_printf(s, "%llu statfs\n", req->tid);
- else if (op == CEPH_MSG_POOLOP)
- seq_printf(s, "%llu poolop\n", req->tid);
else if (op == CEPH_MSG_MON_GET_VERSION)
seq_printf(s, "%llu mon_get_version", req->tid);
else
@@ -179,10 +178,24 @@ static int osdc_show(struct seq_file *s, void *pp)
return 0;
}
+static int client_options_show(struct seq_file *s, void *p)
+{
+ struct ceph_client *client = s->private;
+ int ret;
+
+ ret = ceph_print_client_options(s, client);
+ if (ret)
+ return ret;
+
+ seq_putc(s, '\n');
+ return 0;
+}
+
CEPH_DEFINE_SHOW_FUNC(monmap_show)
CEPH_DEFINE_SHOW_FUNC(osdmap_show)
CEPH_DEFINE_SHOW_FUNC(monc_show)
CEPH_DEFINE_SHOW_FUNC(osdc_show)
+CEPH_DEFINE_SHOW_FUNC(client_options_show)
int ceph_debugfs_init(void)
{
@@ -244,6 +257,14 @@ int ceph_debugfs_client_init(struct ceph_client *client)
if (!client->debugfs_osdmap)
goto out;
+ client->debugfs_options = debugfs_create_file("client_options",
+ 0600,
+ client->debugfs_dir,
+ client,
+ &client_options_show_fops);
+ if (!client->debugfs_options)
+ goto out;
+
return 0;
out:
@@ -254,6 +275,7 @@ out:
void ceph_debugfs_client_cleanup(struct ceph_client *client)
{
dout("ceph_debugfs_client_cleanup %p\n", client);
+ debugfs_remove(client->debugfs_options);
debugfs_remove(client->debugfs_osdmap);
debugfs_remove(client->debugfs_monmap);
debugfs_remove(client->osdc.debugfs_file);
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 8d1653caffdb..967080a9f043 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -484,7 +484,7 @@ static int ceph_tcp_connect(struct ceph_connection *con)
IPPROTO_TCP, &sock);
if (ret)
return ret;
- sock->sk->sk_allocation = GFP_NOFS | __GFP_MEMALLOC;
+ sock->sk->sk_allocation = GFP_NOFS;
#ifdef CONFIG_LOCKDEP
lockdep_set_class(&sock->sk->sk_lock, &socket_class);
@@ -505,12 +505,18 @@ static int ceph_tcp_connect(struct ceph_connection *con)
pr_err("connect %s error %d\n",
ceph_pr_addr(&con->peer_addr.in_addr), ret);
sock_release(sock);
- con->error_msg = "connect error";
-
return ret;
}
- sk_set_memalloc(sock->sk);
+ if (con->msgr->tcp_nodelay) {
+ int optval = 1;
+
+ ret = kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY,
+ (char *)&optval, sizeof(optval));
+ if (ret)
+ pr_err("kernel_setsockopt(TCP_NODELAY) failed: %d",
+ ret);
+ }
con->sock = sock;
return 0;
@@ -1196,8 +1202,18 @@ static void prepare_write_message_footer(struct ceph_connection *con)
dout("prepare_write_message_footer %p\n", con);
con->out_kvec_is_msg = true;
con->out_kvec[v].iov_base = &m->footer;
- con->out_kvec[v].iov_len = sizeof(m->footer);
- con->out_kvec_bytes += sizeof(m->footer);
+ if (con->peer_features & CEPH_FEATURE_MSG_AUTH) {
+ if (con->ops->sign_message)
+ con->ops->sign_message(con, m);
+ else
+ m->footer.sig = 0;
+ con->out_kvec[v].iov_len = sizeof(m->footer);
+ con->out_kvec_bytes += sizeof(m->footer);
+ } else {
+ m->old_footer.flags = m->footer.flags;
+ con->out_kvec[v].iov_len = sizeof(m->old_footer);
+ con->out_kvec_bytes += sizeof(m->old_footer);
+ }
con->out_kvec_left++;
con->out_more = m->more_to_follow;
con->out_msg_done = true;
@@ -2127,12 +2143,10 @@ static int process_connect(struct ceph_connection *con)
* to WAIT. This shouldn't happen if we are the
* client.
*/
- pr_err("process_connect got WAIT as client\n");
con->error_msg = "protocol error, got WAIT as client";
return -1;
default:
- pr_err("connect protocol error, will retry\n");
con->error_msg = "protocol error, garbage tag during connect";
return -1;
}
@@ -2249,6 +2263,7 @@ static int read_partial_message(struct ceph_connection *con)
int ret;
unsigned int front_len, middle_len, data_len;
bool do_datacrc = !con->msgr->nocrc;
+ bool need_sign = (con->peer_features & CEPH_FEATURE_MSG_AUTH);
u64 seq;
u32 crc;
@@ -2263,8 +2278,7 @@ static int read_partial_message(struct ceph_connection *con)
crc = crc32c(0, &con->in_hdr, offsetof(struct ceph_msg_header, crc));
if (cpu_to_le32(crc) != con->in_hdr.crc) {
- pr_err("read_partial_message bad hdr "
- " crc %u != expected %u\n",
+ pr_err("read_partial_message bad hdr crc %u != expected %u\n",
crc, con->in_hdr.crc);
return -EBADMSG;
}
@@ -2294,7 +2308,7 @@ static int read_partial_message(struct ceph_connection *con)
pr_err("read_partial_message bad seq %lld expected %lld\n",
seq, con->in_seq + 1);
con->error_msg = "bad message sequence # for incoming message";
- return -EBADMSG;
+ return -EBADE;
}
/* allocate message? */
@@ -2361,12 +2375,21 @@ static int read_partial_message(struct ceph_connection *con)
}
/* footer */
- size = sizeof (m->footer);
+ if (need_sign)
+ size = sizeof(m->footer);
+ else
+ size = sizeof(m->old_footer);
+
end += size;
ret = read_partial(con, end, size, &m->footer);
if (ret <= 0)
return ret;
+ if (!need_sign) {
+ m->footer.flags = m->old_footer.flags;
+ m->footer.sig = 0;
+ }
+
dout("read_partial_message got msg %p %d (%u) + %d (%u) + %d (%u)\n",
m, front_len, m->footer.front_crc, middle_len,
m->footer.middle_crc, data_len, m->footer.data_crc);
@@ -2390,6 +2413,12 @@ static int read_partial_message(struct ceph_connection *con)
return -EBADMSG;
}
+ if (need_sign && con->ops->check_message_signature &&
+ con->ops->check_message_signature(con, m)) {
+ pr_err("read_partial_message %p signature check failed\n", m);
+ return -EBADMSG;
+ }
+
return 1; /* done! */
}
@@ -2626,6 +2655,8 @@ more:
switch (ret) {
case -EBADMSG:
con->error_msg = "bad crc";
+ /* fall through */
+ case -EBADE:
ret = -EIO;
break;
case -EIO:
@@ -2772,11 +2803,8 @@ static void con_work(struct work_struct *work)
{
struct ceph_connection *con = container_of(work, struct ceph_connection,
work.work);
- unsigned long pflags = current->flags;
bool fault;
- current->flags |= PF_MEMALLOC;
-
mutex_lock(&con->mutex);
while (true) {
int ret;
@@ -2807,7 +2835,8 @@ static void con_work(struct work_struct *work)
if (ret < 0) {
if (ret == -EAGAIN)
continue;
- con->error_msg = "socket error on read";
+ if (!con->error_msg)
+ con->error_msg = "socket error on read";
fault = true;
break;
}
@@ -2816,7 +2845,8 @@ static void con_work(struct work_struct *work)
if (ret < 0) {
if (ret == -EAGAIN)
continue;
- con->error_msg = "socket error on write";
+ if (!con->error_msg)
+ con->error_msg = "socket error on write";
fault = true;
}
@@ -2830,8 +2860,6 @@ static void con_work(struct work_struct *work)
con_fault_finish(con);
con->ops->put(con);
-
- tsk_restore_flags(current, pflags, PF_MEMALLOC);
}
/*
@@ -2840,11 +2868,13 @@ static void con_work(struct work_struct *work)
*/
static void con_fault(struct ceph_connection *con)
{
- pr_warn("%s%lld %s %s\n", ENTITY_NAME(con->peer_name),
- ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg);
dout("fault %p state %lu to peer %s\n",
con, con->state, ceph_pr_addr(&con->peer_addr.in_addr));
+ pr_warn("%s%lld %s %s\n", ENTITY_NAME(con->peer_name),
+ ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg);
+ con->error_msg = NULL;
+
WARN_ON(con->state != CON_STATE_CONNECTING &&
con->state != CON_STATE_NEGOTIATING &&
con->state != CON_STATE_OPEN);
@@ -2896,7 +2926,8 @@ void ceph_messenger_init(struct ceph_messenger *msgr,
struct ceph_entity_addr *myaddr,
u64 supported_features,
u64 required_features,
- bool nocrc)
+ bool nocrc,
+ bool tcp_nodelay)
{
msgr->supported_features = supported_features;
msgr->required_features = required_features;
@@ -2911,6 +2942,7 @@ void ceph_messenger_init(struct ceph_messenger *msgr,
get_random_bytes(&msgr->inst.addr.nonce, sizeof(msgr->inst.addr.nonce));
encode_my_addr(msgr);
msgr->nocrc = nocrc;
+ msgr->tcp_nodelay = tcp_nodelay;
atomic_set(&msgr->stopping, 0);
@@ -3264,8 +3296,8 @@ static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip)
*/
if (*skip)
return 0;
- con->error_msg = "error allocating memory for incoming message";
+ con->error_msg = "error allocating memory for incoming message";
return -ENOMEM;
}
memcpy(&con->in_msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
@@ -3288,7 +3320,7 @@ static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip)
static void ceph_msg_free(struct ceph_msg *m)
{
dout("%s %p\n", __func__, m);
- ceph_kvfree(m->front.iov_base);
+ kvfree(m->front.iov_base);
kmem_cache_free(ceph_msg_cache, m);
}
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index a83062ceeec9..2b3cf05e87b0 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -410,7 +410,7 @@ out_unlocked:
}
/*
- * generic requests (e.g., statfs, poolop)
+ * generic requests (currently statfs, mon_get_version)
*/
static struct ceph_mon_generic_request *__lookup_generic_req(
struct ceph_mon_client *monc, u64 tid)
@@ -569,7 +569,7 @@ static void handle_statfs_reply(struct ceph_mon_client *monc,
return;
bad:
- pr_err("corrupt generic reply, tid %llu\n", tid);
+ pr_err("corrupt statfs reply, tid %llu\n", tid);
ceph_msg_dump(msg);
}
@@ -588,7 +588,6 @@ int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf)
kref_init(&req->kref);
req->buf = buf;
- req->buf_len = sizeof(*buf);
init_completion(&req->completion);
err = -ENOMEM;
@@ -611,7 +610,7 @@ int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf)
err = do_generic_request(monc, req);
out:
- kref_put(&req->kref, release_generic_request);
+ put_generic_request(req);
return err;
}
EXPORT_SYMBOL(ceph_monc_do_statfs);
@@ -647,7 +646,7 @@ static void handle_get_version_reply(struct ceph_mon_client *monc,
return;
bad:
- pr_err("corrupt mon_get_version reply\n");
+ pr_err("corrupt mon_get_version reply, tid %llu\n", tid);
ceph_msg_dump(msg);
}
@@ -670,7 +669,6 @@ int ceph_monc_do_get_version(struct ceph_mon_client *monc, const char *what,
kref_init(&req->kref);
req->buf = newest;
- req->buf_len = sizeof(*newest);
init_completion(&req->completion);
req->request = ceph_msg_new(CEPH_MSG_MON_GET_VERSION,
@@ -701,134 +699,12 @@ int ceph_monc_do_get_version(struct ceph_mon_client *monc, const char *what,
mutex_unlock(&monc->mutex);
out:
- kref_put(&req->kref, release_generic_request);
+ put_generic_request(req);
return err;
}
EXPORT_SYMBOL(ceph_monc_do_get_version);
/*
- * pool ops
- */
-static int get_poolop_reply_buf(const char *src, size_t src_len,
- char *dst, size_t dst_len)
-{
- u32 buf_len;
-
- if (src_len != sizeof(u32) + dst_len)
- return -EINVAL;
-
- buf_len = le32_to_cpu(*(u32 *)src);
- if (buf_len != dst_len)
- return -EINVAL;
-
- memcpy(dst, src + sizeof(u32), dst_len);
- return 0;
-}
-
-static void handle_poolop_reply(struct ceph_mon_client *monc,
- struct ceph_msg *msg)
-{
- struct ceph_mon_generic_request *req;
- struct ceph_mon_poolop_reply *reply = msg->front.iov_base;
- u64 tid = le64_to_cpu(msg->hdr.tid);
-
- if (msg->front.iov_len < sizeof(*reply))
- goto bad;
- dout("handle_poolop_reply %p tid %llu\n", msg, tid);
-
- mutex_lock(&monc->mutex);
- req = __lookup_generic_req(monc, tid);
- if (req) {
- if (req->buf_len &&
- get_poolop_reply_buf(msg->front.iov_base + sizeof(*reply),
- msg->front.iov_len - sizeof(*reply),
- req->buf, req->buf_len) < 0) {
- mutex_unlock(&monc->mutex);
- goto bad;
- }
- req->result = le32_to_cpu(reply->reply_code);
- get_generic_request(req);
- }
- mutex_unlock(&monc->mutex);
- if (req) {
- complete(&req->completion);
- put_generic_request(req);
- }
- return;
-
-bad:
- pr_err("corrupt generic reply, tid %llu\n", tid);
- ceph_msg_dump(msg);
-}
-
-/*
- * Do a synchronous pool op.
- */
-static int do_poolop(struct ceph_mon_client *monc, u32 op,
- u32 pool, u64 snapid,
- char *buf, int len)
-{
- struct ceph_mon_generic_request *req;
- struct ceph_mon_poolop *h;
- int err;
-
- req = kzalloc(sizeof(*req), GFP_NOFS);
- if (!req)
- return -ENOMEM;
-
- kref_init(&req->kref);
- req->buf = buf;
- req->buf_len = len;
- init_completion(&req->completion);
-
- err = -ENOMEM;
- req->request = ceph_msg_new(CEPH_MSG_POOLOP, sizeof(*h), GFP_NOFS,
- true);
- if (!req->request)
- goto out;
- req->reply = ceph_msg_new(CEPH_MSG_POOLOP_REPLY, 1024, GFP_NOFS,
- true);
- if (!req->reply)
- goto out;
-
- /* fill out request */
- req->request->hdr.version = cpu_to_le16(2);
- h = req->request->front.iov_base;
- h->monhdr.have_version = 0;
- h->monhdr.session_mon = cpu_to_le16(-1);
- h->monhdr.session_mon_tid = 0;
- h->fsid = monc->monmap->fsid;
- h->pool = cpu_to_le32(pool);
- h->op = cpu_to_le32(op);
- h->auid = 0;
- h->snapid = cpu_to_le64(snapid);
- h->name_len = 0;
-
- err = do_generic_request(monc, req);
-
-out:
- kref_put(&req->kref, release_generic_request);
- return err;
-}
-
-int ceph_monc_create_snapid(struct ceph_mon_client *monc,
- u32 pool, u64 *snapid)
-{
- return do_poolop(monc, POOL_OP_CREATE_UNMANAGED_SNAP,
- pool, 0, (char *)snapid, sizeof(*snapid));
-
-}
-EXPORT_SYMBOL(ceph_monc_create_snapid);
-
-int ceph_monc_delete_snapid(struct ceph_mon_client *monc,
- u32 pool, u64 snapid)
-{
- return do_poolop(monc, POOL_OP_CREATE_UNMANAGED_SNAP,
- pool, snapid, NULL, 0);
-
-}
-
-/*
* Resend pending generic requests.
*/
static void __resend_generic_request(struct ceph_mon_client *monc)
@@ -1112,10 +988,6 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
handle_get_version_reply(monc, msg);
break;
- case CEPH_MSG_POOLOP_REPLY:
- handle_poolop_reply(monc, msg);
- break;
-
case CEPH_MSG_MON_MAP:
ceph_monc_handle_map(monc, msg);
break;
@@ -1154,7 +1026,6 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con,
case CEPH_MSG_MON_SUBSCRIBE_ACK:
m = ceph_msg_get(monc->m_subscribe_ack);
break;
- case CEPH_MSG_POOLOP_REPLY:
case CEPH_MSG_STATFS_REPLY:
return get_generic_reply(con, hdr, skip);
case CEPH_MSG_AUTH_REPLY:
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 6f164289bde8..c4ec9239249a 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -292,6 +292,10 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req,
ceph_osd_data_release(&op->cls.request_data);
ceph_osd_data_release(&op->cls.response_data);
break;
+ case CEPH_OSD_OP_SETXATTR:
+ case CEPH_OSD_OP_CMPXATTR:
+ ceph_osd_data_release(&op->xattr.osd_data);
+ break;
default:
break;
}
@@ -476,8 +480,7 @@ void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
size_t payload_len = 0;
BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE &&
- opcode != CEPH_OSD_OP_DELETE && opcode != CEPH_OSD_OP_ZERO &&
- opcode != CEPH_OSD_OP_TRUNCATE);
+ opcode != CEPH_OSD_OP_ZERO && opcode != CEPH_OSD_OP_TRUNCATE);
op->extent.offset = offset;
op->extent.length = length;
@@ -545,6 +548,39 @@ void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
}
EXPORT_SYMBOL(osd_req_op_cls_init);
+int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
+ u16 opcode, const char *name, const void *value,
+ size_t size, u8 cmp_op, u8 cmp_mode)
+{
+ struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
+ struct ceph_pagelist *pagelist;
+ size_t payload_len;
+
+ BUG_ON(opcode != CEPH_OSD_OP_SETXATTR && opcode != CEPH_OSD_OP_CMPXATTR);
+
+ pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS);
+ if (!pagelist)
+ return -ENOMEM;
+
+ ceph_pagelist_init(pagelist);
+
+ payload_len = strlen(name);
+ op->xattr.name_len = payload_len;
+ ceph_pagelist_append(pagelist, name, payload_len);
+
+ op->xattr.value_len = size;
+ ceph_pagelist_append(pagelist, value, size);
+ payload_len += size;
+
+ op->xattr.cmp_op = cmp_op;
+ op->xattr.cmp_mode = cmp_mode;
+
+ ceph_osd_data_pagelist_init(&op->xattr.osd_data, pagelist);
+ op->payload_len = payload_len;
+ return 0;
+}
+EXPORT_SYMBOL(osd_req_op_xattr_init);
+
void osd_req_op_watch_init(struct ceph_osd_request *osd_req,
unsigned int which, u16 opcode,
u64 cookie, u64 version, int flag)
@@ -626,7 +662,6 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req,
case CEPH_OSD_OP_READ:
case CEPH_OSD_OP_WRITE:
case CEPH_OSD_OP_ZERO:
- case CEPH_OSD_OP_DELETE:
case CEPH_OSD_OP_TRUNCATE:
if (src->op == CEPH_OSD_OP_WRITE)
request_data_len = src->extent.length;
@@ -676,6 +711,19 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req,
dst->alloc_hint.expected_write_size =
cpu_to_le64(src->alloc_hint.expected_write_size);
break;
+ case CEPH_OSD_OP_SETXATTR:
+ case CEPH_OSD_OP_CMPXATTR:
+ dst->xattr.name_len = cpu_to_le32(src->xattr.name_len);
+ dst->xattr.value_len = cpu_to_le32(src->xattr.value_len);
+ dst->xattr.cmp_op = src->xattr.cmp_op;
+ dst->xattr.cmp_mode = src->xattr.cmp_mode;
+ osd_data = &src->xattr.osd_data;
+ ceph_osdc_msg_data_add(req->r_request, osd_data);
+ request_data_len = osd_data->pagelist->length;
+ break;
+ case CEPH_OSD_OP_CREATE:
+ case CEPH_OSD_OP_DELETE:
+ break;
default:
pr_err("unsupported osd opcode %s\n",
ceph_osd_op_name(src->op));
@@ -705,7 +753,8 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req,
struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
struct ceph_file_layout *layout,
struct ceph_vino vino,
- u64 off, u64 *plen, int num_ops,
+ u64 off, u64 *plen,
+ unsigned int which, int num_ops,
int opcode, int flags,
struct ceph_snap_context *snapc,
u32 truncate_seq,
@@ -716,13 +765,11 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
u64 objnum = 0;
u64 objoff = 0;
u64 objlen = 0;
- u32 object_size;
- u64 object_base;
int r;
BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE &&
- opcode != CEPH_OSD_OP_DELETE && opcode != CEPH_OSD_OP_ZERO &&
- opcode != CEPH_OSD_OP_TRUNCATE);
+ opcode != CEPH_OSD_OP_ZERO && opcode != CEPH_OSD_OP_TRUNCATE &&
+ opcode != CEPH_OSD_OP_CREATE && opcode != CEPH_OSD_OP_DELETE);
req = ceph_osdc_alloc_request(osdc, snapc, num_ops, use_mempool,
GFP_NOFS);
@@ -738,29 +785,24 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
return ERR_PTR(r);
}
- object_size = le32_to_cpu(layout->fl_object_size);
- object_base = off - objoff;
- if (!(truncate_seq == 1 && truncate_size == -1ULL)) {
- if (truncate_size <= object_base) {
- truncate_size = 0;
- } else {
- truncate_size -= object_base;
- if (truncate_size > object_size)
- truncate_size = object_size;
+ if (opcode == CEPH_OSD_OP_CREATE || opcode == CEPH_OSD_OP_DELETE) {
+ osd_req_op_init(req, which, opcode);
+ } else {
+ u32 object_size = le32_to_cpu(layout->fl_object_size);
+ u32 object_base = off - objoff;
+ if (!(truncate_seq == 1 && truncate_size == -1ULL)) {
+ if (truncate_size <= object_base) {
+ truncate_size = 0;
+ } else {
+ truncate_size -= object_base;
+ if (truncate_size > object_size)
+ truncate_size = object_size;
+ }
}
+ osd_req_op_extent_init(req, which, opcode, objoff, objlen,
+ truncate_size, truncate_seq);
}
- osd_req_op_extent_init(req, 0, opcode, objoff, objlen,
- truncate_size, truncate_seq);
-
- /*
- * A second op in the ops array means the caller wants to
- * also issue a include a 'startsync' command so that the
- * osd will flush data quickly.
- */
- if (num_ops > 1)
- osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC);
-
req->r_base_oloc.pool = ceph_file_layout_pg_pool(*layout);
snprintf(req->r_base_oid.name, sizeof(req->r_base_oid.name),
@@ -993,10 +1035,11 @@ static void put_osd(struct ceph_osd *osd)
{
dout("put_osd %p %d -> %d\n", osd, atomic_read(&osd->o_ref),
atomic_read(&osd->o_ref) - 1);
- if (atomic_dec_and_test(&osd->o_ref) && osd->o_auth.authorizer) {
+ if (atomic_dec_and_test(&osd->o_ref)) {
struct ceph_auth_client *ac = osd->o_osdc->client->monc.auth;
- ceph_auth_destroy_authorizer(ac, osd->o_auth.authorizer);
+ if (osd->o_auth.authorizer)
+ ceph_auth_destroy_authorizer(ac, osd->o_auth.authorizer);
kfree(osd);
}
}
@@ -1006,14 +1049,24 @@ static void put_osd(struct ceph_osd *osd)
*/
static void __remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
{
- dout("__remove_osd %p\n", osd);
+ dout("%s %p osd%d\n", __func__, osd, osd->o_osd);
WARN_ON(!list_empty(&osd->o_requests));
WARN_ON(!list_empty(&osd->o_linger_requests));
- rb_erase(&osd->o_node, &osdc->osds);
list_del_init(&osd->o_osd_lru);
- ceph_con_close(&osd->o_con);
- put_osd(osd);
+ rb_erase(&osd->o_node, &osdc->osds);
+ RB_CLEAR_NODE(&osd->o_node);
+}
+
+static void remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
+{
+ dout("%s %p osd%d\n", __func__, osd, osd->o_osd);
+
+ if (!RB_EMPTY_NODE(&osd->o_node)) {
+ ceph_con_close(&osd->o_con);
+ __remove_osd(osdc, osd);
+ put_osd(osd);
+ }
}
static void remove_all_osds(struct ceph_osd_client *osdc)
@@ -1023,7 +1076,7 @@ static void remove_all_osds(struct ceph_osd_client *osdc)
while (!RB_EMPTY_ROOT(&osdc->osds)) {
struct ceph_osd *osd = rb_entry(rb_first(&osdc->osds),
struct ceph_osd, o_node);
- __remove_osd(osdc, osd);
+ remove_osd(osdc, osd);
}
mutex_unlock(&osdc->request_mutex);
}
@@ -1064,7 +1117,7 @@ static void remove_old_osds(struct ceph_osd_client *osdc)
list_for_each_entry_safe(osd, nosd, &osdc->osd_lru, o_osd_lru) {
if (time_before(jiffies, osd->lru_ttl))
break;
- __remove_osd(osdc, osd);
+ remove_osd(osdc, osd);
}
mutex_unlock(&osdc->request_mutex);
}
@@ -1079,8 +1132,7 @@ static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
dout("__reset_osd %p osd%d\n", osd, osd->o_osd);
if (list_empty(&osd->o_requests) &&
list_empty(&osd->o_linger_requests)) {
- __remove_osd(osdc, osd);
-
+ remove_osd(osdc, osd);
return -ENODEV;
}
@@ -1254,8 +1306,6 @@ static void __unregister_linger_request(struct ceph_osd_client *osdc,
if (list_empty(&req->r_osd_item))
req->r_osd = NULL;
}
-
- list_del_init(&req->r_req_lru_item); /* can be on notarget */
ceph_osdc_put_request(req);
}
@@ -1884,6 +1934,7 @@ static void reset_changed_osds(struct ceph_osd_client *osdc)
{
struct rb_node *p, *n;
+ dout("%s %p\n", __func__, osdc);
for (p = rb_first(&osdc->osds); p; p = n) {
struct ceph_osd *osd = rb_entry(p, struct ceph_osd, o_node);
@@ -1964,20 +2015,29 @@ static void kick_requests(struct ceph_osd_client *osdc, bool force_resend,
err = __map_request(osdc, req,
force_resend || force_resend_writes);
dout("__map_request returned %d\n", err);
- if (err == 0)
- continue; /* no change and no osd was specified */
if (err < 0)
continue; /* hrm! */
- if (req->r_osd == NULL) {
- dout("tid %llu maps to no valid osd\n", req->r_tid);
- needmap++; /* request a newer map */
- continue;
- }
+ if (req->r_osd == NULL || err > 0) {
+ if (req->r_osd == NULL) {
+ dout("lingering %p tid %llu maps to no osd\n",
+ req, req->r_tid);
+ /*
+ * A homeless lingering request makes
+ * no sense, as it's job is to keep
+ * a particular OSD connection open.
+ * Request a newer map and kick the
+ * request, knowing that it won't be
+ * resent until we actually get a map
+ * that can tell us where to send it.
+ */
+ needmap++;
+ }
- dout("kicking lingering %p tid %llu osd%d\n", req, req->r_tid,
- req->r_osd ? req->r_osd->o_osd : -1);
- __register_request(osdc, req);
- __unregister_linger_request(osdc, req);
+ dout("kicking lingering %p tid %llu osd%d\n", req,
+ req->r_tid, req->r_osd ? req->r_osd->o_osd : -1);
+ __register_request(osdc, req);
+ __unregister_linger_request(osdc, req);
+ }
}
reset_changed_osds(osdc);
mutex_unlock(&osdc->request_mutex);
@@ -2626,7 +2686,7 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc,
dout("readpages on ino %llx.%llx on %llu~%llu\n", vino.ino,
vino.snap, off, *plen);
- req = ceph_osdc_new_request(osdc, layout, vino, off, plen, 1,
+ req = ceph_osdc_new_request(osdc, layout, vino, off, plen, 0, 1,
CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
NULL, truncate_seq, truncate_size,
false);
@@ -2669,7 +2729,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
int page_align = off & ~PAGE_MASK;
BUG_ON(vino.snap != CEPH_NOSNAP); /* snapshots aren't writeable */
- req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 1,
+ req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 0, 1,
CEPH_OSD_OP_WRITE,
CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
snapc, truncate_seq, truncate_size,
@@ -2920,6 +2980,20 @@ static int invalidate_authorizer(struct ceph_connection *con)
return ceph_monc_validate_auth(&osdc->client->monc);
}
+static int sign_message(struct ceph_connection *con, struct ceph_msg *msg)
+{
+ struct ceph_osd *o = con->private;
+ struct ceph_auth_handshake *auth = &o->o_auth;
+ return ceph_auth_sign_message(auth, msg);
+}
+
+static int check_message_signature(struct ceph_connection *con, struct ceph_msg *msg)
+{
+ struct ceph_osd *o = con->private;
+ struct ceph_auth_handshake *auth = &o->o_auth;
+ return ceph_auth_check_message_signature(auth, msg);
+}
+
static const struct ceph_connection_operations osd_con_ops = {
.get = get_osd_con,
.put = put_osd_con,
@@ -2928,5 +3002,7 @@ static const struct ceph_connection_operations osd_con_ops = {
.verify_authorizer_reply = verify_authorizer_reply,
.invalidate_authorizer = invalidate_authorizer,
.alloc_msg = alloc_msg,
+ .sign_message = sign_message,
+ .check_message_signature = check_message_signature,
.fault = osd_reset,
};
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index b8c3fde5b04f..15796696d64e 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -122,6 +122,22 @@ bad:
return -EINVAL;
}
+static int crush_decode_straw2_bucket(void **p, void *end,
+ struct crush_bucket_straw2 *b)
+{
+ int j;
+ dout("crush_decode_straw2_bucket %p to %p\n", *p, end);
+ b->item_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
+ if (b->item_weights == NULL)
+ return -ENOMEM;
+ ceph_decode_need(p, end, b->h.size * sizeof(u32), bad);
+ for (j = 0; j < b->h.size; j++)
+ b->item_weights[j] = ceph_decode_32(p);
+ return 0;
+bad:
+ return -EINVAL;
+}
+
static int skip_name_map(void **p, void *end)
{
int len;
@@ -204,6 +220,9 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
case CRUSH_BUCKET_STRAW:
size = sizeof(struct crush_bucket_straw);
break;
+ case CRUSH_BUCKET_STRAW2:
+ size = sizeof(struct crush_bucket_straw2);
+ break;
default:
err = -EINVAL;
goto bad;
@@ -261,6 +280,12 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
if (err < 0)
goto bad;
break;
+ case CRUSH_BUCKET_STRAW2:
+ err = crush_decode_straw2_bucket(p, end,
+ (struct crush_bucket_straw2 *)b);
+ if (err < 0)
+ goto bad;
+ break;
}
}
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index 555013034f7a..096d91447e06 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -23,17 +23,15 @@ struct page **ceph_get_direct_page_vector(const void __user *data,
if (!pages)
return ERR_PTR(-ENOMEM);
- down_read(&current->mm->mmap_sem);
while (got < num_pages) {
- rc = get_user_pages(current, current->mm,
+ rc = get_user_pages_unlocked(current, current->mm,
(unsigned long)data + ((unsigned long)got * PAGE_SIZE),
- num_pages - got, write_page, 0, pages + got, NULL);
+ num_pages - got, write_page, 0, pages + got);
if (rc < 0)
break;
BUG_ON(rc == 0);
got += rc;
}
- up_read(&current->mm->mmap_sem);
if (rc < 0)
goto fail;
return pages;