From 3d09fc424406b5610964507b3eb73cebbc3b4c38 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 25 Jun 2014 20:41:30 -0500 Subject: tipc: eliminate case of writing to freed memory In the function tipc_nodesub_notify() we call a function pointer aggregated into the object to be notified, whereafter we set the function pointer to NULL. However, in some cases the function pointed to will free the struct containing the function pointer, resulting in a write to already freed memory. This bug seems to always have been there, without causing any notable harm. In this commit we fix the problem by inverting the order of the zeroing and the function call. Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/node_subscr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/node_subscr.c b/net/tipc/node_subscr.c index 7c59ab1d6ecb..2d13eea8574a 100644 --- a/net/tipc/node_subscr.c +++ b/net/tipc/node_subscr.c @@ -84,11 +84,13 @@ void tipc_nodesub_unsubscribe(struct tipc_node_subscr *node_sub) void tipc_nodesub_notify(struct list_head *nsub_list) { struct tipc_node_subscr *ns, *safe; + net_ev_handler handle_node_down; list_for_each_entry_safe(ns, safe, nsub_list, nodesub_list) { - if (ns->handle_node_down) { - ns->handle_node_down(ns->usr_handle); + handle_node_down = ns->handle_node_down; + if (handle_node_down) { ns->handle_node_down = NULL; + handle_node_down(ns->usr_handle); } } } -- cgit v1.2.3 From e4de5fab806f74622600ab7fd6ed22b7f911a8c5 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 25 Jun 2014 20:41:31 -0500 Subject: tipc: use negative error return values in functions In some places, TIPC functions returns positive integers as return codes. This goes against standard Linux coding practice, and may even cause problems in some cases. We now change the return values of the functions filter_rcv() and filter_connect() to become signed integers, and return negative error codes when needed. The codes we use in these particular cases are still TIPC specific, since they are both part of the TIPC API and have no correspondence in errno.h Signed-off-by: Jon Maloy Reviewed-by: Erik Hugne Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/socket.c | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index ef0475568f9e..9a95dab13b7e 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1269,17 +1269,16 @@ static void tipc_data_ready(struct sock *sk) * @tsk: TIPC socket * @msg: message * - * Returns TIPC error status code and socket error status code - * once it encounters some errors + * Returns 0 (TIPC_OK) if everyting ok, -TIPC_ERR_NO_PORT otherwise */ -static u32 filter_connect(struct tipc_sock *tsk, struct sk_buff **buf) +static int filter_connect(struct tipc_sock *tsk, struct sk_buff **buf) { struct sock *sk = &tsk->sk; struct tipc_port *port = &tsk->port; struct socket *sock = sk->sk_socket; struct tipc_msg *msg = buf_msg(*buf); - u32 retval = TIPC_ERR_NO_PORT; + int retval = -TIPC_ERR_NO_PORT; int res; if (msg_mcast(msg)) @@ -1382,32 +1381,33 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf) * * Called with socket lock already taken; port lock may also be taken. * - * Returns TIPC error status code (TIPC_OK if message is not to be rejected) + * Returns 0 (TIPC_OK) if message was consumed, -TIPC error code if message + * to be rejected. */ -static u32 filter_rcv(struct sock *sk, struct sk_buff *buf) +static int filter_rcv(struct sock *sk, struct sk_buff *buf) { struct socket *sock = sk->sk_socket; struct tipc_sock *tsk = tipc_sk(sk); struct tipc_msg *msg = buf_msg(buf); unsigned int limit = rcvbuf_limit(sk, buf); - u32 res = TIPC_OK; + int rc = TIPC_OK; /* Reject message if it is wrong sort of message for socket */ if (msg_type(msg) > TIPC_DIRECT_MSG) - return TIPC_ERR_NO_PORT; + return -TIPC_ERR_NO_PORT; if (sock->state == SS_READY) { if (msg_connected(msg)) - return TIPC_ERR_NO_PORT; + return -TIPC_ERR_NO_PORT; } else { - res = filter_connect(tsk, &buf); - if (res != TIPC_OK || buf == NULL) - return res; + rc = filter_connect(tsk, &buf); + if (rc != TIPC_OK || buf == NULL) + return rc; } /* Reject message if there isn't room to queue it */ if (sk_rmem_alloc_get(sk) + buf->truesize >= limit) - return TIPC_ERR_OVERLOAD; + return -TIPC_ERR_OVERLOAD; /* Enqueue message */ TIPC_SKB_CB(buf)->handle = NULL; @@ -1429,13 +1429,13 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf) */ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *buf) { - u32 res; + int rc; struct tipc_sock *tsk = tipc_sk(sk); uint truesize = buf->truesize; - res = filter_rcv(sk, buf); - if (unlikely(res)) - tipc_reject_msg(buf, res); + rc = filter_rcv(sk, buf); + if (unlikely(rc)) + tipc_reject_msg(buf, -rc); if (atomic_read(&tsk->dupl_rcvcnt) < TIPC_CONN_OVERLOAD_LIMIT) atomic_add(truesize, &tsk->dupl_rcvcnt); @@ -1455,7 +1455,7 @@ int tipc_sk_rcv(struct sk_buff *buf) struct tipc_port *port; struct sock *sk; u32 dport = msg_destport(buf_msg(buf)); - int err = TIPC_OK; + int rc = TIPC_OK; uint limit; /* Forward unresolved named message */ @@ -1467,7 +1467,7 @@ int tipc_sk_rcv(struct sk_buff *buf) /* Validate destination */ port = tipc_port_lock(dport); if (unlikely(!port)) { - err = TIPC_ERR_NO_PORT; + rc = -TIPC_ERR_NO_PORT; goto exit; } @@ -1478,22 +1478,22 @@ int tipc_sk_rcv(struct sk_buff *buf) bh_lock_sock(sk); if (!sock_owned_by_user(sk)) { - err = filter_rcv(sk, buf); + rc = filter_rcv(sk, buf); } else { if (sk->sk_backlog.len == 0) atomic_set(&tsk->dupl_rcvcnt, 0); limit = rcvbuf_limit(sk, buf) + atomic_read(&tsk->dupl_rcvcnt); if (sk_add_backlog(sk, buf, limit)) - err = TIPC_ERR_OVERLOAD; + rc = -TIPC_ERR_OVERLOAD; } bh_unlock_sock(sk); tipc_port_unlock(port); - if (likely(!err)) + if (likely(!rc)) return 0; exit: - tipc_reject_msg(buf, err); + tipc_reject_msg(buf, -rc); return -EHOSTUNREACH; } -- cgit v1.2.3 From 4f1688b2c63cd86f0d7bcf95a9b3040e38bd3c1a Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 25 Jun 2014 20:41:32 -0500 Subject: tipc: introduce send functions for chained buffers in link The current link implementation provides several different transmit functions, depending on the characteristics of the message to be sent: if it is an iovec or an sk_buff, if it needs fragmentation or not, if the caller holds the node_lock or not. The permutation of these options gives us an unwanted amount of unnecessarily complex code. As a first step towards simplifying the send path for all messages, we introduce two new send functions at link level, tipc_link_xmit2() and __tipc_link_xmit2(). The former looks up a link to the message destination, and if one is found, it grabs the node lock and calls the second function, which works exclusively inside the node lock protection. If no link is found, and the destination is on the same node, it delivers the message directly to the local destination socket. The new functions take a buffer chain where all packet headers are already prepared, and the correct MTU has been used. These two functions will later replace all other link-level transmit functions. The functions are not backwards compatible, so we have added them as new functions with temporary names. They are tested, but have no users yet. Those will be added later in this series. Signed-off-by: Jon Maloy Reviewed-by: Erik Hugne Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/link.c | 140 +++++++++++++++++++++++++++++++++++++++++++++++++++++++- net/tipc/link.h | 2 + net/tipc/msg.c | 96 +++++++++++++++++++++++++++++++++----- net/tipc/msg.h | 25 +++++++++- 4 files changed, 250 insertions(+), 13 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/link.c b/net/tipc/link.c index ad2c57f5868d..68d2afb44f2f 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -850,6 +850,144 @@ int tipc_link_xmit(struct sk_buff *buf, u32 dest, u32 selector) return res; } +/* tipc_link_cong: determine return value and how to treat the + * sent buffer during link congestion. + * - For plain, errorless user data messages we keep the buffer and + * return -ELINKONG. + * - For all other messages we discard the buffer and return -EHOSTUNREACH + * - For TIPC internal messages we also reset the link + */ +static int tipc_link_cong(struct tipc_link *link, struct sk_buff *buf) +{ + struct tipc_msg *msg = buf_msg(buf); + uint psz = msg_size(msg); + uint imp = tipc_msg_tot_importance(msg); + u32 oport = msg_tot_origport(msg); + + if (likely(imp <= TIPC_CRITICAL_IMPORTANCE)) { + if (!msg_errcode(msg) && !msg_reroute_cnt(msg)) { + link_schedule_port(link, oport, psz); + return -ELINKCONG; + } + } else { + pr_warn("%s<%s>, send queue full", link_rst_msg, link->name); + tipc_link_reset(link); + } + kfree_skb_list(buf); + return -EHOSTUNREACH; +} + +/** + * __tipc_link_xmit2(): same as tipc_link_xmit2, but destlink is known & locked + * @link: link to use + * @buf: chain of buffers containing message + * Consumes the buffer chain, except when returning -ELINKCONG + * Returns 0 if success, otherwise errno: -ELINKCONG, -EMSGSIZE (plain socket + * user data messages) or -EHOSTUNREACH (all other messages/senders) + * Only the socket functions tipc_send_stream() and tipc_send_packet() need + * to act on the return value, since they may need to do more send attempts. + */ +int __tipc_link_xmit2(struct tipc_link *link, struct sk_buff *buf) +{ + struct tipc_msg *msg = buf_msg(buf); + uint psz = msg_size(msg); + uint qsz = link->out_queue_size; + uint sndlim = link->queue_limit[0]; + uint imp = tipc_msg_tot_importance(msg); + uint mtu = link->max_pkt; + uint ack = mod(link->next_in_no - 1); + uint seqno = link->next_out_no; + uint bc_last_in = link->owner->bclink.last_in; + struct tipc_media_addr *addr = &link->media_addr; + struct sk_buff *next = buf->next; + + /* Match queue limits against msg importance: */ + if (unlikely(qsz >= link->queue_limit[imp])) + return tipc_link_cong(link, buf); + + /* Has valid packet limit been used ? */ + if (unlikely(psz > mtu)) { + kfree_skb_list(buf); + return -EMSGSIZE; + } + + /* Prepare each packet for sending, and add to outqueue: */ + while (buf) { + next = buf->next; + msg = buf_msg(buf); + msg_set_word(msg, 2, ((ack << 16) | mod(seqno))); + msg_set_bcast_ack(msg, bc_last_in); + + if (!link->first_out) { + link->first_out = buf; + } else if (qsz < sndlim) { + link->last_out->next = buf; + } else if (tipc_msg_bundle(link->last_out, buf, mtu)) { + link->stats.sent_bundled++; + buf = next; + next = buf->next; + continue; + } else if (tipc_msg_make_bundle(&buf, mtu, link->addr)) { + link->stats.sent_bundled++; + link->stats.sent_bundles++; + link->last_out->next = buf; + if (!link->next_out) + link->next_out = buf; + } else { + link->last_out->next = buf; + if (!link->next_out) + link->next_out = buf; + } + + /* Send packet if possible: */ + if (likely(++qsz <= sndlim)) { + tipc_bearer_send(link->bearer_id, buf, addr); + link->next_out = next; + link->unacked_window = 0; + } + seqno++; + link->last_out = buf; + buf = next; + } + link->next_out_no = seqno; + link->out_queue_size = qsz; + return 0; +} + +/** + * tipc_link_xmit2() is the general link level function for message sending + * @buf: chain of buffers containing message + * @dsz: amount of user data to be sent + * @dnode: address of destination node + * @selector: a number used for deterministic link selection + * Consumes the buffer chain, except when returning -ELINKCONG + * Returns 0 if success, otherwise errno: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE + */ +int tipc_link_xmit2(struct sk_buff *buf, u32 dnode, u32 selector) +{ + struct tipc_link *link = NULL; + struct tipc_node *node; + int rc = -EHOSTUNREACH; + + node = tipc_node_find(dnode); + if (node) { + tipc_node_lock(node); + link = node->active_links[selector & 1]; + if (link) + rc = __tipc_link_xmit2(link, buf); + tipc_node_unlock(node); + } + + if (link) + return rc; + + if (likely(in_own_node(dnode))) + return tipc_sk_rcv(buf); + + kfree_skb_list(buf); + return rc; +} + /* * tipc_link_sync_xmit - synchronize broadcast link endpoints. * @@ -1238,7 +1376,7 @@ static u32 tipc_link_push_packet(struct tipc_link *l_ptr) tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr); if (msg_user(msg) == MSG_BUNDLER) - msg_set_type(msg, CLOSED_MSG); + msg_set_type(msg, BUNDLE_CLOSED); l_ptr->next_out = buf->next; return 0; } diff --git a/net/tipc/link.h b/net/tipc/link.h index 200d518b218e..227ff8120897 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -227,8 +227,10 @@ void tipc_link_reset_all(struct tipc_node *node); void tipc_link_reset(struct tipc_link *l_ptr); void tipc_link_reset_list(unsigned int bearer_id); int tipc_link_xmit(struct sk_buff *buf, u32 dest, u32 selector); +int tipc_link_xmit2(struct sk_buff *buf, u32 dest, u32 selector); void tipc_link_names_xmit(struct list_head *message_list, u32 dest); int __tipc_link_xmit(struct tipc_link *l_ptr, struct sk_buff *buf); +int __tipc_link_xmit2(struct tipc_link *link, struct sk_buff *buf); int tipc_link_send_buf(struct tipc_link *l_ptr, struct sk_buff *buf); u32 tipc_link_get_max_pkt(u32 dest, u32 selector); int tipc_link_iovec_xmit_fast(struct tipc_port *sender, diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 8be6e94a1ca9..e02afc96edd7 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -37,20 +37,11 @@ #include "core.h" #include "msg.h" -u32 tipc_msg_tot_importance(struct tipc_msg *m) +static unsigned int align(unsigned int i) { - if (likely(msg_isdata(m))) { - if (likely(msg_orignode(m) == tipc_own_addr)) - return msg_importance(m); - return msg_importance(m) + 4; - } - if ((msg_user(m) == MSG_FRAGMENTER) && - (msg_type(m) == FIRST_FRAGMENT)) - return msg_importance(msg_get_wrapped(m)); - return msg_importance(m); + return (i + 3) & ~3u; } - void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize, u32 destnode) { @@ -152,3 +143,86 @@ out_free: kfree_skb(*buf); return 0; } + +/** + * tipc_msg_bundle(): Append contents of a buffer to tail of an existing one + * @bbuf: the existing buffer ("bundle") + * @buf: buffer to be appended + * @mtu: max allowable size for the bundle buffer + * Consumes buffer if successful + * Returns true if bundling could be performed, otherwise false + */ +bool tipc_msg_bundle(struct sk_buff *bbuf, struct sk_buff *buf, u32 mtu) +{ + struct tipc_msg *bmsg = buf_msg(bbuf); + struct tipc_msg *msg = buf_msg(buf); + unsigned int bsz = msg_size(bmsg); + unsigned int msz = msg_size(msg); + u32 start = align(bsz); + u32 max = mtu - INT_H_SIZE; + u32 pad = start - bsz; + + if (likely(msg_user(msg) == MSG_FRAGMENTER)) + return false; + if (unlikely(msg_user(msg) == CHANGEOVER_PROTOCOL)) + return false; + if (unlikely(msg_user(msg) == BCAST_PROTOCOL)) + return false; + if (likely(msg_user(bmsg) != MSG_BUNDLER)) + return false; + if (likely(msg_type(bmsg) != BUNDLE_OPEN)) + return false; + if (unlikely(skb_tailroom(bbuf) < (pad + msz))) + return false; + if (unlikely(max < (start + msz))) + return false; + + skb_put(bbuf, pad + msz); + skb_copy_to_linear_data_offset(bbuf, start, buf->data, msz); + msg_set_size(bmsg, start + msz); + msg_set_msgcnt(bmsg, msg_msgcnt(bmsg) + 1); + bbuf->next = buf->next; + kfree_skb(buf); + return true; +} + +/** + * tipc_msg_make_bundle(): Create bundle buf and append message to its tail + * @buf: buffer to be appended and replaced + * @mtu: max allowable size for the bundle buffer, inclusive header + * @dnode: destination node for message. (Not always present in header) + * Replaces buffer if successful + * Returns true if sucess, otherwise false + */ +bool tipc_msg_make_bundle(struct sk_buff **buf, u32 mtu, u32 dnode) +{ + struct sk_buff *bbuf; + struct tipc_msg *bmsg; + struct tipc_msg *msg = buf_msg(*buf); + u32 msz = msg_size(msg); + u32 max = mtu - INT_H_SIZE; + + if (msg_user(msg) == MSG_FRAGMENTER) + return false; + if (msg_user(msg) == CHANGEOVER_PROTOCOL) + return false; + if (msg_user(msg) == BCAST_PROTOCOL) + return false; + if (msz > (max / 2)) + return false; + + bbuf = tipc_buf_acquire(max); + if (!bbuf) + return false; + + skb_trim(bbuf, INT_H_SIZE); + bmsg = buf_msg(bbuf); + tipc_msg_init(bmsg, MSG_BUNDLER, BUNDLE_OPEN, INT_H_SIZE, dnode); + msg_set_seqno(bmsg, msg_seqno(msg)); + msg_set_ack(bmsg, msg_ack(msg)); + msg_set_bcast_ack(bmsg, msg_bcast_ack(msg)); + bbuf->next = (*buf)->next; + tipc_msg_bundle(bbuf, *buf, mtu); + *buf = bbuf; + return true; +} diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 503511903d1d..41a05fa8d608 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -463,6 +463,11 @@ static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m) #define FRAGMENT 1 #define LAST_FRAGMENT 2 +/* Bundling protocol message types + */ +#define BUNDLE_OPEN 0 +#define BUNDLE_CLOSED 1 + /* * Link management protocol message types */ @@ -706,12 +711,30 @@ static inline void msg_set_link_tolerance(struct tipc_msg *m, u32 n) msg_set_bits(m, 9, 0, 0xffff, n); } -u32 tipc_msg_tot_importance(struct tipc_msg *m); +static inline u32 tipc_msg_tot_importance(struct tipc_msg *m) +{ + if ((msg_user(m) == MSG_FRAGMENTER) && (msg_type(m) == FIRST_FRAGMENT)) + return msg_importance(msg_get_wrapped(m)); + return msg_importance(m); +} + +static inline u32 msg_tot_origport(struct tipc_msg *m) +{ + if ((msg_user(m) == MSG_FRAGMENTER) && (msg_type(m) == FIRST_FRAGMENT)) + return msg_origport(msg_get_wrapped(m)); + return msg_origport(m); +} + void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize, u32 destnode); + int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect, unsigned int len, int max_size, struct sk_buff **buf); int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf); +bool tipc_msg_bundle(struct sk_buff *bbuf, struct sk_buff *buf, u32 mtu); + +bool tipc_msg_make_bundle(struct sk_buff **buf, u32 mtu, u32 dnode); + #endif -- cgit v1.2.3 From 16e166b88cdfd508b88934ec0c8a0ec97e6b30f8 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 25 Jun 2014 20:41:33 -0500 Subject: tipc: make link mtu easily accessible from socket Message fragmentation is currently performed at link level, inside the protection of node_lock. This potentially binds up the sending link structure for a long time, instead of letting it do other tasks, such as handle reception of new packets. In this commit, we make the MTUs of each active link become easily accessible from the socket level, i.e., without taking any spinlock or dereferencing the target link pointer. This way, we make it possible to perform fragmentation in the sending socket, before sending the whole fragment chain to the link for transport. Signed-off-by: Jon Maloy Reviewed-by: Erik Hugne Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/node.c | 25 +++++++++++++++++++++---- net/tipc/node.h | 17 +++++++++++++++++ 2 files changed, 38 insertions(+), 4 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/node.c b/net/tipc/node.c index 5b44c3041be4..d959343043fd 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1,7 +1,7 @@ /* * net/tipc/node.c: TIPC node management routines * - * Copyright (c) 2000-2006, 2012 Ericsson AB + * Copyright (c) 2000-2006, 2012-2014, Ericsson AB * Copyright (c) 2005-2006, 2010-2014, Wind River Systems * All rights reserved. * @@ -155,21 +155,25 @@ void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr) if (!active[0]) { active[0] = active[1] = l_ptr; node_established_contact(n_ptr); - return; + goto exit; } if (l_ptr->priority < active[0]->priority) { pr_info("New link <%s> becomes standby\n", l_ptr->name); - return; + goto exit; } tipc_link_dup_queue_xmit(active[0], l_ptr); if (l_ptr->priority == active[0]->priority) { active[0] = l_ptr; - return; + goto exit; } pr_info("Old link <%s> becomes standby\n", active[0]->name); if (active[1] != active[0]) pr_info("Old link <%s> becomes standby\n", active[1]->name); active[0] = active[1] = l_ptr; +exit: + /* Leave room for changeover header when returning 'mtu' to users: */ + n_ptr->act_mtus[0] = active[0]->max_pkt - INT_H_SIZE; + n_ptr->act_mtus[1] = active[1]->max_pkt - INT_H_SIZE; } /** @@ -229,6 +233,19 @@ void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr) tipc_link_failover_send_queue(l_ptr); else node_lost_contact(n_ptr); + + /* Leave room for changeover header when returning 'mtu' to users: */ + if (active[0]) { + n_ptr->act_mtus[0] = active[0]->max_pkt - INT_H_SIZE; + n_ptr->act_mtus[1] = active[1]->max_pkt - INT_H_SIZE; + return; + } + + /* Loopback link went down? No fragmentation needed from now on. */ + if (n_ptr->addr == tipc_own_addr) { + n_ptr->act_mtus[0] = MAX_MSG_SIZE; + n_ptr->act_mtus[1] = MAX_MSG_SIZE; + } } int tipc_node_active_links(struct tipc_node *n_ptr) diff --git a/net/tipc/node.h b/net/tipc/node.h index 9087063793f2..b61716a8218e 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -41,6 +41,7 @@ #include "addr.h" #include "net.h" #include "bearer.h" +#include "msg.h" /* * Out-of-range value for node signature @@ -105,6 +106,7 @@ struct tipc_node { spinlock_t lock; struct hlist_node hash; struct tipc_link *active_links[2]; + u32 act_mtus[2]; struct tipc_link *links[MAX_BEARERS]; unsigned int action_flags; struct tipc_node_bclink bclink; @@ -143,4 +145,19 @@ static inline bool tipc_node_blocked(struct tipc_node *node) TIPC_NOTIFY_NODE_DOWN | TIPC_WAIT_OWN_LINKS_DOWN)); } +static inline uint tipc_node_get_mtu(u32 addr, u32 selector) +{ + struct tipc_node *node; + u32 mtu; + + node = tipc_node_find(addr); + + if (likely(node)) + mtu = node->act_mtus[selector & 1]; + else + mtu = MAX_MSG_SIZE; + + return mtu; +} + #endif -- cgit v1.2.3 From 067608e9d019d6477fd45dd948e81af0e5bf599f Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 25 Jun 2014 20:41:34 -0500 Subject: tipc: introduce direct iovec to buffer chain fragmentation function Fragmentation at message sending is currently performed in two places in link.c, depending on whether data to be transmitted is delivered in the form of an iovec or as a big sk_buff. Those functions are also tightly entangled with the send functions that are using them. We now introduce a re-entrant, standalone function, tipc_msg_build2(), that builds a packet chain directly from an iovec. Each fragment is sized according to the MTU value given by the caller, and is prepended with a correctly built fragment header, when needed. The function is independent from who is calling and where the chain will be delivered, as long as the caller is able to indicate a correct MTU. The function is tested, but not called by anybody yet. Since it is incompatible with the existing tipc_msg_build(), and we cannot yet remove that function, we have given it a temporary name. Signed-off-by: Jon Maloy Reviewed-by: Erik Hugne Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/msg.c | 102 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ net/tipc/msg.h | 3 ++ 2 files changed, 105 insertions(+) (limited to 'net/tipc') diff --git a/net/tipc/msg.c b/net/tipc/msg.c index e02afc96edd7..4093b4c94d26 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -144,6 +144,108 @@ out_free: return 0; } + +/** + * tipc_msg_build2 - create buffer chain containing specified header and data + * @mhdr: Message header, to be prepended to data + * @iov: User data + * @offset: Posision in iov to start copying from + * @dsz: Total length of user data + * @pktmax: Max packet size that can be used + * @chain: Buffer or chain of buffers to be returned to caller + * Returns message data size or errno: -ENOMEM, -EFAULT + */ +int tipc_msg_build2(struct tipc_msg *mhdr, struct iovec const *iov, + int offset, int dsz, int pktmax , struct sk_buff **chain) +{ + int mhsz = msg_hdr_sz(mhdr); + int msz = mhsz + dsz; + int pktno = 1; + int pktsz; + int pktrem = pktmax; + int drem = dsz; + struct tipc_msg pkthdr; + struct sk_buff *buf, *prev; + char *pktpos; + int rc; + + msg_set_size(mhdr, msz); + + /* No fragmentation needed? */ + if (likely(msz <= pktmax)) { + buf = tipc_buf_acquire(msz); + *chain = buf; + if (unlikely(!buf)) + return -ENOMEM; + skb_copy_to_linear_data(buf, mhdr, mhsz); + pktpos = buf->data + mhsz; + if (!dsz || !memcpy_fromiovecend(pktpos, iov, offset, dsz)) + return dsz; + rc = -EFAULT; + goto error; + } + + /* Prepare reusable fragment header */ + tipc_msg_init(&pkthdr, MSG_FRAGMENTER, FIRST_FRAGMENT, + INT_H_SIZE, msg_destnode(mhdr)); + msg_set_size(&pkthdr, pktmax); + msg_set_fragm_no(&pkthdr, pktno); + + /* Prepare first fragment */ + *chain = buf = tipc_buf_acquire(pktmax); + if (!buf) + return -ENOMEM; + pktpos = buf->data; + skb_copy_to_linear_data(buf, &pkthdr, INT_H_SIZE); + pktpos += INT_H_SIZE; + pktrem -= INT_H_SIZE; + skb_copy_to_linear_data_offset(buf, INT_H_SIZE, mhdr, mhsz); + pktpos += mhsz; + pktrem -= mhsz; + + do { + if (drem < pktrem) + pktrem = drem; + + if (memcpy_fromiovecend(pktpos, iov, offset, pktrem)) { + rc = -EFAULT; + goto error; + } + drem -= pktrem; + offset += pktrem; + + if (!drem) + break; + + /* Prepare new fragment: */ + if (drem < (pktmax - INT_H_SIZE)) + pktsz = drem + INT_H_SIZE; + else + pktsz = pktmax; + prev = buf; + buf = tipc_buf_acquire(pktsz); + if (!buf) { + rc = -ENOMEM; + goto error; + } + prev->next = buf; + msg_set_type(&pkthdr, FRAGMENT); + msg_set_size(&pkthdr, pktsz); + msg_set_fragm_no(&pkthdr, ++pktno); + skb_copy_to_linear_data(buf, &pkthdr, INT_H_SIZE); + pktpos = buf->data + INT_H_SIZE; + pktrem = pktsz - INT_H_SIZE; + + } while (1); + + msg_set_type(buf_msg(buf), LAST_FRAGMENT); + return dsz; +error: + kfree_skb_list(*chain); + *chain = NULL; + return rc; +} + /** * tipc_msg_bundle(): Append contents of a buffer to tail of an existing one * @bbuf: the existing buffer ("bundle") diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 41a05fa8d608..5e1339d60c69 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -737,4 +737,7 @@ bool tipc_msg_bundle(struct sk_buff *bbuf, struct sk_buff *buf, u32 mtu); bool tipc_msg_make_bundle(struct sk_buff **buf, u32 mtu, u32 dnode); +int tipc_msg_build2(struct tipc_msg *mhdr, struct iovec const *iov, + int offset, int dsz, int mtu , struct sk_buff **chain); + #endif -- cgit v1.2.3 From 8db1bae30b7cd3c3abc05f467d0f7c69b33b80e9 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 25 Jun 2014 20:41:35 -0500 Subject: tipc: separate building and sending of rejected messages The way we build and send rejected message is currenty perceived as hard to follow, partly because we let the transmission go via deep call chains through functions such as tipc_reject_msg() and net_route_msg(). We want to remove those functions, and make the call sequences shallower and simpler. For this purpose, we separate building and sending of rejected messages. We build the reject message using the new function tipc_msg_reverse(), and let the transmission go via the newly introduced tipc_link_xmit2() function, as all transmission eventually will do. We also ensure that all calls to tipc_link_xmit2() are made outside port_lock/bh_lock_sock. Finally, we replace all calls to tipc_reject_msg() with the two new calls at all locations in the code that we want to keep. The remaining calls are made from code that we are planning to remove, along with tipc_reject_msg() itself. Signed-off-by: Jon Maloy Reviewed-by: Erik Hugne Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/msg.c | 42 ++++++++++++++++++++++++++++++++++++++++++ net/tipc/msg.h | 2 ++ net/tipc/socket.c | 28 ++++++++++++++++++++-------- 3 files changed, 64 insertions(+), 8 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 4093b4c94d26..6070dd0ec634 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -37,6 +37,8 @@ #include "core.h" #include "msg.h" +#define MAX_FORWARD_SIZE 1024 + static unsigned int align(unsigned int i) { return (i + 3) & ~3u; @@ -328,3 +330,43 @@ bool tipc_msg_make_bundle(struct sk_buff **buf, u32 mtu, u32 dnode) *buf = bbuf; return true; } + +/** + * tipc_msg_reverse(): swap source and destination addresses and add error code + * @buf: buffer containing message to be reversed + * @dnode: return value: node where to send message after reversal + * @err: error code to be set in message + * Consumes buffer if failure + * Returns true if success, otherwise false + */ +bool tipc_msg_reverse(struct sk_buff *buf, u32 *dnode, int err) +{ + struct tipc_msg *msg = buf_msg(buf); + uint imp = msg_importance(msg); + struct tipc_msg ohdr; + uint rdsz = min_t(uint, msg_data_sz(msg), MAX_FORWARD_SIZE); + + if (skb_linearize(buf) || !msg_isdata(msg)) + goto exit; + if (msg_dest_droppable(msg) || msg_errcode(msg)) + goto exit; + + memcpy(&ohdr, msg, msg_hdr_sz(msg)); + msg_set_importance(msg, min_t(uint, ++imp, TIPC_CRITICAL_IMPORTANCE)); + msg_set_errcode(msg, err); + msg_set_origport(msg, msg_destport(&ohdr)); + msg_set_destport(msg, msg_origport(&ohdr)); + msg_set_prevnode(msg, tipc_own_addr); + if (!msg_short(msg)) { + msg_set_orignode(msg, msg_destnode(&ohdr)); + msg_set_destnode(msg, msg_orignode(&ohdr)); + } + msg_set_size(msg, msg_hdr_sz(msg) + rdsz); + skb_trim(buf, msg_size(msg)); + skb_orphan(buf); + *dnode = msg_orignode(&ohdr); + return true; +exit: + kfree_skb(buf); + return false; +} diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 5e1339d60c69..38050941a504 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -725,6 +725,8 @@ static inline u32 msg_tot_origport(struct tipc_msg *m) return msg_origport(m); } +bool tipc_msg_reverse(struct sk_buff *buf, u32 *dnode, int err); + void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize, u32 destnode); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 9a95dab13b7e..5961a6335f49 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -39,6 +39,7 @@ #include "node.h" #include +#include "link.h" #define SS_LISTENING -1 /* socket is listening */ #define SS_READY -2 /* socket is connectionless */ @@ -123,9 +124,12 @@ static void advance_rx_queue(struct sock *sk) static void reject_rx_queue(struct sock *sk) { struct sk_buff *buf; + u32 dnode; - while ((buf = __skb_dequeue(&sk->sk_receive_queue))) - tipc_reject_msg(buf, TIPC_ERR_NO_PORT); + while ((buf = __skb_dequeue(&sk->sk_receive_queue))) { + if (tipc_msg_reverse(buf, &dnode, TIPC_ERR_NO_PORT)) + tipc_link_xmit2(buf, dnode, 0); + } } /** @@ -303,6 +307,7 @@ static int tipc_release(struct socket *sock) struct tipc_sock *tsk; struct tipc_port *port; struct sk_buff *buf; + u32 dnode; /* * Exit if socket isn't fully initialized (occurs when a failed accept() @@ -331,7 +336,8 @@ static int tipc_release(struct socket *sock) sock->state = SS_DISCONNECTING; tipc_port_disconnect(port->ref); } - tipc_reject_msg(buf, TIPC_ERR_NO_PORT); + if (tipc_msg_reverse(buf, &dnode, TIPC_ERR_NO_PORT)) + tipc_link_xmit2(buf, dnode, 0); } } @@ -1430,14 +1436,15 @@ static int filter_rcv(struct sock *sk, struct sk_buff *buf) static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *buf) { int rc; + u32 onode; struct tipc_sock *tsk = tipc_sk(sk); uint truesize = buf->truesize; rc = filter_rcv(sk, buf); - if (unlikely(rc)) - tipc_reject_msg(buf, -rc); - if (atomic_read(&tsk->dupl_rcvcnt) < TIPC_CONN_OVERLOAD_LIMIT) + if (unlikely(rc && tipc_msg_reverse(buf, &onode, -rc))) + tipc_link_xmit2(buf, onode, 0); + else if (atomic_read(&tsk->dupl_rcvcnt) < TIPC_CONN_OVERLOAD_LIMIT) atomic_add(truesize, &tsk->dupl_rcvcnt); return 0; @@ -1457,6 +1464,7 @@ int tipc_sk_rcv(struct sk_buff *buf) u32 dport = msg_destport(buf_msg(buf)); int rc = TIPC_OK; uint limit; + u32 dnode; /* Forward unresolved named message */ if (unlikely(!dport)) { @@ -1493,7 +1501,9 @@ int tipc_sk_rcv(struct sk_buff *buf) if (likely(!rc)) return 0; exit: - tipc_reject_msg(buf, -rc); + if (!tipc_msg_reverse(buf, &dnode, -rc)) + return -EHOSTUNREACH; + tipc_link_xmit2(buf, dnode, 0); return -EHOSTUNREACH; } @@ -1758,6 +1768,7 @@ static int tipc_shutdown(struct socket *sock, int how) struct tipc_sock *tsk = tipc_sk(sk); struct tipc_port *port = &tsk->port; struct sk_buff *buf; + u32 peer; int res; if (how != SHUT_RDWR) @@ -1778,7 +1789,8 @@ restart: goto restart; } tipc_port_disconnect(port->ref); - tipc_reject_msg(buf, TIPC_CONN_SHUTDOWN); + if (tipc_msg_reverse(buf, &peer, TIPC_CONN_SHUTDOWN)) + tipc_link_xmit2(buf, peer, 0); } else { tipc_port_shutdown(port->ref); } -- cgit v1.2.3 From 5a379074a7dd6d288ec9e6472769ba0e0c54dd85 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 25 Jun 2014 20:41:36 -0500 Subject: tipc: introduce message evaluation function When a message arrives in a node and finds no destination socket, we may need to drop it, reject it, or forward it after a secondary destination lookup. The latter two cases currently results in a code path that is perceived as complex, because it follows a deep call chain via obscure functions such as net_route_named_msg() and net_route_msg(). We now introduce a function, tipc_msg_eval(), that takes the decision about whether such a message should be rejected or forwarded, but leaves it to the caller to actually perform the indicated action. If the decision is 'reject', it is still the task of the recently introduced function tipc_msg_reverse() to take the final decision about whether the message is rejectable or not. In the latter case it drops the message. As a result of this change, we can finally eliminate the function net_route_named_msg(), and hence become independent of net_route_msg(). Signed-off-by: Jon Maloy Reviewed-by: Erik Hugne Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/msg.c | 38 ++++++++++++++++++++++++++++++++++++++ net/tipc/msg.h | 2 ++ net/tipc/net.c | 33 ++++++--------------------------- net/tipc/socket.c | 16 +++++----------- 4 files changed, 51 insertions(+), 38 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 6070dd0ec634..7bfc4422bf2c 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -36,6 +36,8 @@ #include "core.h" #include "msg.h" +#include "addr.h" +#include "name_table.h" #define MAX_FORWARD_SIZE 1024 @@ -370,3 +372,39 @@ exit: kfree_skb(buf); return false; } + +/** + * tipc_msg_eval: determine fate of message that found no destination + * @buf: the buffer containing the message. + * @dnode: return value: next-hop node, if message to be forwarded + * @err: error code to use, if message to be rejected + * + * Does not consume buffer + * Returns 0 (TIPC_OK) if message ok and we can try again, -TIPC error + * code if message to be rejected + */ +int tipc_msg_eval(struct sk_buff *buf, u32 *dnode) +{ + struct tipc_msg *msg = buf_msg(buf); + u32 dport; + + if (msg_type(msg) != TIPC_NAMED_MSG) + return -TIPC_ERR_NO_PORT; + if (skb_linearize(buf)) + return -TIPC_ERR_NO_NAME; + if (msg_data_sz(msg) > MAX_FORWARD_SIZE) + return -TIPC_ERR_NO_NAME; + if (msg_reroute_cnt(msg) > 0) + return -TIPC_ERR_NO_NAME; + + *dnode = addr_domain(msg_lookup_scope(msg)); + dport = tipc_nametbl_translate(msg_nametype(msg), + msg_nameinst(msg), + dnode); + if (!dport) + return -TIPC_ERR_NO_NAME; + msg_incr_reroute_cnt(msg); + msg_set_destnode(msg, *dnode); + msg_set_destport(msg, dport); + return TIPC_OK; +} diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 38050941a504..7d574346e75e 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -727,6 +727,8 @@ static inline u32 msg_tot_origport(struct tipc_msg *m) bool tipc_msg_reverse(struct sk_buff *buf, u32 *dnode, int err); +int tipc_msg_eval(struct sk_buff *buf, u32 *dnode); + void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize, u32 destnode); diff --git a/net/tipc/net.c b/net/tipc/net.c index f64375e7f99f..5f7d6ffb5465 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -1,7 +1,7 @@ /* * net/tipc/net.c: TIPC network routing code * - * Copyright (c) 1995-2006, Ericsson AB + * Copyright (c) 1995-2006, 2014, Ericsson AB * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * @@ -103,29 +103,6 @@ * This is always used within the scope of a tipc_nametbl_lock(read). * - A local spin_lock protecting the queue of subscriber events. */ - -static void net_route_named_msg(struct sk_buff *buf) -{ - struct tipc_msg *msg = buf_msg(buf); - u32 dnode; - u32 dport; - - if (!msg_named(msg)) { - kfree_skb(buf); - return; - } - - dnode = addr_domain(msg_lookup_scope(msg)); - dport = tipc_nametbl_translate(msg_nametype(msg), msg_nameinst(msg), &dnode); - if (dport) { - msg_set_destnode(msg, dnode); - msg_set_destport(msg, dport); - tipc_net_route_msg(buf); - return; - } - tipc_reject_msg(buf, TIPC_ERR_NO_NAME); -} - void tipc_net_route_msg(struct sk_buff *buf) { struct tipc_msg *msg; @@ -141,10 +118,12 @@ void tipc_net_route_msg(struct sk_buff *buf) if (msg_isdata(msg)) { if (msg_mcast(msg)) tipc_port_mcast_rcv(buf, NULL); - else if (msg_destport(msg)) + else if (msg_destport(msg)) { tipc_sk_rcv(buf); - else - net_route_named_msg(buf); + } else { + pr_warn("Cannot route msg; no destination\n"); + kfree_skb(buf); + } return; } switch (msg_user(msg)) { diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 5961a6335f49..e642ed5b3602 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1466,16 +1466,10 @@ int tipc_sk_rcv(struct sk_buff *buf) uint limit; u32 dnode; - /* Forward unresolved named message */ - if (unlikely(!dport)) { - tipc_net_route_msg(buf); - return 0; - } - - /* Validate destination */ + /* Validate destination and message */ port = tipc_port_lock(dport); if (unlikely(!port)) { - rc = -TIPC_ERR_NO_PORT; + rc = tipc_msg_eval(buf, &dnode); goto exit; } @@ -1494,17 +1488,17 @@ int tipc_sk_rcv(struct sk_buff *buf) if (sk_add_backlog(sk, buf, limit)) rc = -TIPC_ERR_OVERLOAD; } - bh_unlock_sock(sk); tipc_port_unlock(port); if (likely(!rc)) return 0; exit: - if (!tipc_msg_reverse(buf, &dnode, -rc)) + if ((rc < 0) && !tipc_msg_reverse(buf, &dnode, -rc)) return -EHOSTUNREACH; + tipc_link_xmit2(buf, dnode, 0); - return -EHOSTUNREACH; + return (rc < 0) ? -EHOSTUNREACH : 0; } static int tipc_wait_for_connect(struct socket *sock, long *timeo_p) -- cgit v1.2.3 From e2dafe87d328774a94fdd77718422b9cbd97ed47 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 25 Jun 2014 20:41:37 -0500 Subject: tipc: RDM/DGRAM transport uses new fragmenting and sending functions We merge the code for sending port name and port identity addressed messages into the corresponding send functions in socket.c, and start using the new fragmenting and transmit functions we just have introduced. This saves a call level and quite a few code lines, as well as making this part of the code easier to follow. As a consequence, the functions tipc_send2name() and tipc_send2port() in port.c can be removed. For practical reasons, we break out the code for sending multicast messages from tipc_sendmsg() and move it into a separate function, tipc_sendmcast(), but we do not yet convert it into using the new build/send functions. Signed-off-by: Jon Maloy Reviewed-by: Erik Hugne Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/port.c | 90 ------------------------------- net/tipc/port.h | 11 ---- net/tipc/socket.c | 158 +++++++++++++++++++++++++++++++++++++----------------- 3 files changed, 110 insertions(+), 149 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/port.c b/net/tipc/port.c index 5fd7acce01ea..8350ec932514 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -806,93 +806,3 @@ int tipc_send(struct tipc_port *p_ptr, } return -ELINKCONG; } - -/** - * tipc_send2name - send message sections to port name - */ -int tipc_send2name(struct tipc_port *p_ptr, - struct tipc_name const *name, - unsigned int domain, - struct iovec const *msg_sect, - unsigned int len) -{ - struct tipc_msg *msg; - u32 destnode = domain; - u32 destport; - int res; - - if (p_ptr->connected) - return -EINVAL; - - msg = &p_ptr->phdr; - msg_set_type(msg, TIPC_NAMED_MSG); - msg_set_hdr_sz(msg, NAMED_H_SIZE); - msg_set_nametype(msg, name->type); - msg_set_nameinst(msg, name->instance); - msg_set_lookup_scope(msg, tipc_addr_scope(domain)); - destport = tipc_nametbl_translate(name->type, name->instance, &destnode); - msg_set_destnode(msg, destnode); - msg_set_destport(msg, destport); - - if (likely(destport || destnode)) { - if (likely(in_own_node(destnode))) - res = tipc_port_iovec_rcv(p_ptr, msg_sect, len); - else if (tipc_own_addr) - res = tipc_link_iovec_xmit_fast(p_ptr, msg_sect, len, - destnode); - else - res = tipc_port_iovec_reject(p_ptr, msg, msg_sect, - len, TIPC_ERR_NO_NODE); - if (likely(res != -ELINKCONG)) { - if (res > 0) - p_ptr->sent++; - return res; - } - if (tipc_port_unreliable(p_ptr)) - return len; - - return -ELINKCONG; - } - return tipc_port_iovec_reject(p_ptr, msg, msg_sect, len, - TIPC_ERR_NO_NAME); -} - -/** - * tipc_send2port - send message sections to port identity - */ -int tipc_send2port(struct tipc_port *p_ptr, - struct tipc_portid const *dest, - struct iovec const *msg_sect, - unsigned int len) -{ - struct tipc_msg *msg; - int res; - - if (p_ptr->connected) - return -EINVAL; - - msg = &p_ptr->phdr; - msg_set_type(msg, TIPC_DIRECT_MSG); - msg_set_lookup_scope(msg, 0); - msg_set_destnode(msg, dest->node); - msg_set_destport(msg, dest->ref); - msg_set_hdr_sz(msg, BASIC_H_SIZE); - - if (in_own_node(dest->node)) - res = tipc_port_iovec_rcv(p_ptr, msg_sect, len); - else if (tipc_own_addr) - res = tipc_link_iovec_xmit_fast(p_ptr, msg_sect, len, - dest->node); - else - res = tipc_port_iovec_reject(p_ptr, msg, msg_sect, len, - TIPC_ERR_NO_NODE); - if (likely(res != -ELINKCONG)) { - if (res > 0) - p_ptr->sent++; - return res; - } - if (tipc_port_unreliable(p_ptr)) - return len; - - return -ELINKCONG; -} diff --git a/net/tipc/port.h b/net/tipc/port.h index cf4ca5b1d9a4..e566d55e2655 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -140,17 +140,6 @@ int tipc_send(struct tipc_port *port, struct iovec const *msg_sect, unsigned int len); -int tipc_send2name(struct tipc_port *port, - struct tipc_name const *name, - u32 domain, - struct iovec const *msg_sect, - unsigned int len); - -int tipc_send2port(struct tipc_port *port, - struct tipc_portid const *dest, - struct iovec const *msg_sect, - unsigned int len); - int tipc_port_mcast_xmit(struct tipc_port *port, struct tipc_name_seq const *seq, struct iovec const *msg, diff --git a/net/tipc/socket.c b/net/tipc/socket.c index e642ed5b3602..5690751cbfa5 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -36,8 +36,9 @@ #include "core.h" #include "port.h" +#include "name_table.h" #include "node.h" - +#include "link.h" #include #include "link.h" @@ -545,6 +546,8 @@ static int dest_name_check(struct sockaddr_tipc *dest, struct msghdr *m) { struct tipc_cfg_msg_hdr hdr; + if (unlikely(dest->addrtype == TIPC_ADDR_ID)) + return 0; if (likely(dest->addr.name.name.type >= TIPC_RESERVED_TYPES)) return 0; if (likely(dest->addr.name.name.type == TIPC_TOP_SRV)) @@ -587,13 +590,49 @@ static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p) return 0; } +/** + * tipc_sendmcast - send multicast message + * @sock: socket structure + * @seq: destination address + * @iov: message data to send + * @dsz: total length of message data + * @timeo: timeout to wait for wakeup + * + * Called from function tipc_sendmsg(), which has done all sanity checks + * Returns the number of bytes sent on success, or errno + */ +static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, + struct iovec *iov, size_t dsz, long timeo) +{ + struct sock *sk = sock->sk; + struct tipc_sock *tsk = tipc_sk(sk); + int rc; + + do { + if (sock->state != SS_READY) { + rc = -EOPNOTSUPP; + break; + } + rc = tipc_port_mcast_xmit(&tsk->port, seq, iov, dsz); + if (likely(rc >= 0)) { + if (sock->state != SS_READY) + sock->state = SS_CONNECTING; + break; + } + if (rc != -ELINKCONG) + break; + rc = tipc_wait_for_sndmsg(sock, &timeo); + } while (!rc); + + return rc; +} /** * tipc_sendmsg - send message in connectionless manner * @iocb: if NULL, indicates that socket lock is already held * @sock: socket structure * @m: message to send - * @total_len: length of message + * @dsz: amount of user data to be sent * * Message must have an destination specified explicitly. * Used for SOCK_RDM and SOCK_DGRAM messages, @@ -603,93 +642,116 @@ static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p) * Returns the number of bytes sent on success, or errno otherwise */ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len) + struct msghdr *m, size_t dsz) { + DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); struct tipc_port *port = &tsk->port; - DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); - int needs_conn; + struct tipc_msg *mhdr = &port->phdr; + struct iovec *iov = m->msg_iov; + u32 dnode, dport; + struct sk_buff *buf; + struct tipc_name_seq *seq = &dest->addr.nameseq; + u32 mtu; long timeo; - int res = -EINVAL; + int rc = -EINVAL; if (unlikely(!dest)) return -EDESTADDRREQ; + if (unlikely((m->msg_namelen < sizeof(*dest)) || (dest->family != AF_TIPC))) return -EINVAL; - if (total_len > TIPC_MAX_USER_MSG_SIZE) + + if (dsz > TIPC_MAX_USER_MSG_SIZE) return -EMSGSIZE; if (iocb) lock_sock(sk); - needs_conn = (sock->state != SS_READY); - if (unlikely(needs_conn)) { + if (unlikely(sock->state != SS_READY)) { if (sock->state == SS_LISTENING) { - res = -EPIPE; + rc = -EPIPE; goto exit; } if (sock->state != SS_UNCONNECTED) { - res = -EISCONN; + rc = -EISCONN; goto exit; } if (tsk->port.published) { - res = -EOPNOTSUPP; + rc = -EOPNOTSUPP; goto exit; } if (dest->addrtype == TIPC_ADDR_NAME) { tsk->port.conn_type = dest->addr.name.name.type; tsk->port.conn_instance = dest->addr.name.name.instance; } - - /* Abort any pending connection attempts (very unlikely) */ - reject_rx_queue(sk); } + rc = dest_name_check(dest, m); + if (rc) + goto exit; timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); - do { - if (dest->addrtype == TIPC_ADDR_NAME) { - res = dest_name_check(dest, m); - if (res) - break; - res = tipc_send2name(port, - &dest->addr.name.name, - dest->addr.name.domain, - m->msg_iov, - total_len); - } else if (dest->addrtype == TIPC_ADDR_ID) { - res = tipc_send2port(port, - &dest->addr.id, - m->msg_iov, - total_len); - } else if (dest->addrtype == TIPC_ADDR_MCAST) { - if (needs_conn) { - res = -EOPNOTSUPP; - break; - } - res = dest_name_check(dest, m); - if (res) - break; - res = tipc_port_mcast_xmit(port, - &dest->addr.nameseq, - m->msg_iov, - total_len); + + if (dest->addrtype == TIPC_ADDR_MCAST) { + rc = tipc_sendmcast(sock, seq, iov, dsz, timeo); + goto exit; + } else if (dest->addrtype == TIPC_ADDR_NAME) { + u32 type = dest->addr.name.name.type; + u32 inst = dest->addr.name.name.instance; + u32 domain = dest->addr.name.domain; + + dnode = domain; + msg_set_type(mhdr, TIPC_NAMED_MSG); + msg_set_hdr_sz(mhdr, NAMED_H_SIZE); + msg_set_nametype(mhdr, type); + msg_set_nameinst(mhdr, inst); + msg_set_lookup_scope(mhdr, tipc_addr_scope(domain)); + dport = tipc_nametbl_translate(type, inst, &dnode); + msg_set_destnode(mhdr, dnode); + msg_set_destport(mhdr, dport); + if (unlikely(!dport && !dnode)) { + rc = -EHOSTUNREACH; + goto exit; } - if (likely(res != -ELINKCONG)) { - if (needs_conn && (res >= 0)) + } else if (dest->addrtype == TIPC_ADDR_ID) { + dnode = dest->addr.id.node; + msg_set_type(mhdr, TIPC_DIRECT_MSG); + msg_set_lookup_scope(mhdr, 0); + msg_set_destnode(mhdr, dnode); + msg_set_destport(mhdr, dest->addr.id.ref); + msg_set_hdr_sz(mhdr, BASIC_H_SIZE); + } + +new_mtu: + mtu = tipc_node_get_mtu(dnode, tsk->port.ref); + rc = tipc_msg_build2(mhdr, iov, 0, dsz, mtu, &buf); + if (rc < 0) + goto exit; + + do { + rc = tipc_link_xmit2(buf, dnode, tsk->port.ref); + if (likely(rc >= 0)) { + if (sock->state != SS_READY) sock->state = SS_CONNECTING; + rc = dsz; break; } - res = tipc_wait_for_sndmsg(sock, &timeo); - if (res) + if (rc == -EMSGSIZE) + goto new_mtu; + + if (rc != -ELINKCONG) break; - } while (1); + + rc = tipc_wait_for_sndmsg(sock, &timeo); + } while (!rc); exit: if (iocb) release_sock(sk); - return res; + + return rc; } static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p) -- cgit v1.2.3 From 4ccfe5e0419eefcab3010ff6a87ffb03aef86c5d Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 25 Jun 2014 20:41:38 -0500 Subject: tipc: connection oriented transport uses new send functions We move the message sending across established connections to use the message preparation and send functions introduced earlier in this series. We now do the message preparation and call to the link send function directly from the socket, instead of going via the port layer. As a consequence of this change, the functions tipc_send(), tipc_port_iovec_rcv(), tipc_port_iovec_reject() and tipc_reject_msg() become unreferenced and can be eliminated from port.c. For the same reason, the functions tipc_link_xmit_fast(), tipc_link_iovec_xmit_long() and tipc_link_iovec_fast() can be eliminated from link.c. Signed-off-by: Jon Maloy Reviewed-by: Erik Hugne Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/link.c | 249 ------------------------------------------------------ net/tipc/port.c | 142 +------------------------------ net/tipc/port.h | 12 --- net/tipc/socket.c | 180 +++++++++++++++------------------------ 4 files changed, 70 insertions(+), 513 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/link.c b/net/tipc/link.c index 68d2afb44f2f..93a8033263c0 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -82,9 +82,6 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf); static int tipc_link_tunnel_rcv(struct tipc_node *n_ptr, struct sk_buff **buf); static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tolerance); -static int tipc_link_iovec_long_xmit(struct tipc_port *sender, - struct iovec const *msg_sect, - unsigned int len, u32 destnode); static void link_state_event(struct tipc_link *l_ptr, u32 event); static void link_reset_statistics(struct tipc_link *l_ptr); static void link_print(struct tipc_link *l_ptr, const char *str); @@ -1070,252 +1067,6 @@ void tipc_link_names_xmit(struct list_head *message_list, u32 dest) } } -/* - * tipc_link_xmit_fast: Entry for data messages where the - * destination link is known and the header is complete, - * inclusive total message length. Very time critical. - * Link is locked. Returns user data length. - */ -static int tipc_link_xmit_fast(struct tipc_link *l_ptr, struct sk_buff *buf, - u32 *used_max_pkt) -{ - struct tipc_msg *msg = buf_msg(buf); - int res = msg_data_sz(msg); - - if (likely(!link_congested(l_ptr))) { - if (likely(msg_size(msg) <= l_ptr->max_pkt)) { - link_add_to_outqueue(l_ptr, buf, msg); - tipc_bearer_send(l_ptr->bearer_id, buf, - &l_ptr->media_addr); - l_ptr->unacked_window = 0; - return res; - } - else - *used_max_pkt = l_ptr->max_pkt; - } - return __tipc_link_xmit(l_ptr, buf); /* All other cases */ -} - -/* - * tipc_link_iovec_xmit_fast: Entry for messages where the - * destination processor is known and the header is complete, - * except for total message length. - * Returns user data length or errno. - */ -int tipc_link_iovec_xmit_fast(struct tipc_port *sender, - struct iovec const *msg_sect, - unsigned int len, u32 destaddr) -{ - struct tipc_msg *hdr = &sender->phdr; - struct tipc_link *l_ptr; - struct sk_buff *buf; - struct tipc_node *node; - int res; - u32 selector = msg_origport(hdr) & 1; - -again: - /* - * Try building message using port's max_pkt hint. - * (Must not hold any locks while building message.) - */ - res = tipc_msg_build(hdr, msg_sect, len, sender->max_pkt, &buf); - /* Exit if build request was invalid */ - if (unlikely(res < 0)) - return res; - - node = tipc_node_find(destaddr); - if (likely(node)) { - tipc_node_lock(node); - l_ptr = node->active_links[selector]; - if (likely(l_ptr)) { - if (likely(buf)) { - res = tipc_link_xmit_fast(l_ptr, buf, - &sender->max_pkt); -exit: - tipc_node_unlock(node); - return res; - } - - /* Exit if link (or bearer) is congested */ - if (link_congested(l_ptr)) { - res = link_schedule_port(l_ptr, - sender->ref, res); - goto exit; - } - - /* - * Message size exceeds max_pkt hint; update hint, - * then re-try fast path or fragment the message - */ - sender->max_pkt = l_ptr->max_pkt; - tipc_node_unlock(node); - - - if ((msg_hdr_sz(hdr) + res) <= sender->max_pkt) - goto again; - - return tipc_link_iovec_long_xmit(sender, msg_sect, - len, destaddr); - } - tipc_node_unlock(node); - } - - /* Couldn't find a link to the destination node */ - kfree_skb(buf); - tipc_port_iovec_reject(sender, hdr, msg_sect, len, TIPC_ERR_NO_NODE); - return -ENETUNREACH; -} - -/* - * tipc_link_iovec_long_xmit(): Entry for long messages where the - * destination node is known and the header is complete, - * inclusive total message length. - * Link and bearer congestion status have been checked to be ok, - * and are ignored if they change. - * - * Note that fragments do not use the full link MTU so that they won't have - * to undergo refragmentation if link changeover causes them to be sent - * over another link with an additional tunnel header added as prefix. - * (Refragmentation will still occur if the other link has a smaller MTU.) - * - * Returns user data length or errno. - */ -static int tipc_link_iovec_long_xmit(struct tipc_port *sender, - struct iovec const *msg_sect, - unsigned int len, u32 destaddr) -{ - struct tipc_link *l_ptr; - struct tipc_node *node; - struct tipc_msg *hdr = &sender->phdr; - u32 dsz = len; - u32 max_pkt, fragm_sz, rest; - struct tipc_msg fragm_hdr; - struct sk_buff *buf, *buf_chain, *prev; - u32 fragm_crs, fragm_rest, hsz, sect_rest; - const unchar __user *sect_crs; - int curr_sect; - u32 fragm_no; - int res = 0; - -again: - fragm_no = 1; - max_pkt = sender->max_pkt - INT_H_SIZE; - /* leave room for tunnel header in case of link changeover */ - fragm_sz = max_pkt - INT_H_SIZE; - /* leave room for fragmentation header in each fragment */ - rest = dsz; - fragm_crs = 0; - fragm_rest = 0; - sect_rest = 0; - sect_crs = NULL; - curr_sect = -1; - - /* Prepare reusable fragment header */ - tipc_msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT, - INT_H_SIZE, msg_destnode(hdr)); - msg_set_size(&fragm_hdr, max_pkt); - msg_set_fragm_no(&fragm_hdr, 1); - - /* Prepare header of first fragment */ - buf_chain = buf = tipc_buf_acquire(max_pkt); - if (!buf) - return -ENOMEM; - buf->next = NULL; - skb_copy_to_linear_data(buf, &fragm_hdr, INT_H_SIZE); - hsz = msg_hdr_sz(hdr); - skb_copy_to_linear_data_offset(buf, INT_H_SIZE, hdr, hsz); - - /* Chop up message */ - fragm_crs = INT_H_SIZE + hsz; - fragm_rest = fragm_sz - hsz; - - do { /* For all sections */ - u32 sz; - - if (!sect_rest) { - sect_rest = msg_sect[++curr_sect].iov_len; - sect_crs = msg_sect[curr_sect].iov_base; - } - - if (sect_rest < fragm_rest) - sz = sect_rest; - else - sz = fragm_rest; - - if (copy_from_user(buf->data + fragm_crs, sect_crs, sz)) { - res = -EFAULT; -error: - kfree_skb_list(buf_chain); - return res; - } - sect_crs += sz; - sect_rest -= sz; - fragm_crs += sz; - fragm_rest -= sz; - rest -= sz; - - if (!fragm_rest && rest) { - - /* Initiate new fragment: */ - if (rest <= fragm_sz) { - fragm_sz = rest; - msg_set_type(&fragm_hdr, LAST_FRAGMENT); - } else { - msg_set_type(&fragm_hdr, FRAGMENT); - } - msg_set_size(&fragm_hdr, fragm_sz + INT_H_SIZE); - msg_set_fragm_no(&fragm_hdr, ++fragm_no); - prev = buf; - buf = tipc_buf_acquire(fragm_sz + INT_H_SIZE); - if (!buf) { - res = -ENOMEM; - goto error; - } - - buf->next = NULL; - prev->next = buf; - skb_copy_to_linear_data(buf, &fragm_hdr, INT_H_SIZE); - fragm_crs = INT_H_SIZE; - fragm_rest = fragm_sz; - } - } while (rest > 0); - - /* - * Now we have a buffer chain. Select a link and check - * that packet size is still OK - */ - node = tipc_node_find(destaddr); - if (likely(node)) { - tipc_node_lock(node); - l_ptr = node->active_links[sender->ref & 1]; - if (!l_ptr) { - tipc_node_unlock(node); - goto reject; - } - if (l_ptr->max_pkt < max_pkt) { - sender->max_pkt = l_ptr->max_pkt; - tipc_node_unlock(node); - kfree_skb_list(buf_chain); - goto again; - } - } else { -reject: - kfree_skb_list(buf_chain); - tipc_port_iovec_reject(sender, hdr, msg_sect, len, - TIPC_ERR_NO_NODE); - return -ENETUNREACH; - } - - /* Append chain of fragments to send queue & send them */ - l_ptr->long_msg_seq_no++; - link_add_chain_to_outqueue(l_ptr, buf_chain, l_ptr->long_msg_seq_no); - l_ptr->stats.sent_fragments += fragm_no; - l_ptr->stats.sent_fragmented++; - tipc_link_push_queue(l_ptr); - tipc_node_unlock(node); - return dsz; -} - /* * tipc_link_push_packet: Push one unsent packet to the media */ diff --git a/net/tipc/port.c b/net/tipc/port.c index 8350ec932514..606ff1a78e2b 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -211,6 +211,7 @@ u32 tipc_port_init(struct tipc_port *p_ptr, } p_ptr->max_pkt = MAX_PKT_DEFAULT; + p_ptr->sent = 1; p_ptr->ref = ref; INIT_LIST_HEAD(&p_ptr->wait_list); INIT_LIST_HEAD(&p_ptr->subscription.nodesub_list); @@ -279,92 +280,6 @@ static struct sk_buff *port_build_proto_msg(struct tipc_port *p_ptr, return buf; } -int tipc_reject_msg(struct sk_buff *buf, u32 err) -{ - struct tipc_msg *msg = buf_msg(buf); - struct sk_buff *rbuf; - struct tipc_msg *rmsg; - int hdr_sz; - u32 imp; - u32 data_sz = msg_data_sz(msg); - u32 src_node; - u32 rmsg_sz; - - /* discard rejected message if it shouldn't be returned to sender */ - if (WARN(!msg_isdata(msg), - "attempt to reject message with user=%u", msg_user(msg))) { - dump_stack(); - goto exit; - } - if (msg_errcode(msg) || msg_dest_droppable(msg)) - goto exit; - - /* - * construct returned message by copying rejected message header and - * data (or subset), then updating header fields that need adjusting - */ - hdr_sz = msg_hdr_sz(msg); - rmsg_sz = hdr_sz + min_t(u32, data_sz, MAX_REJECT_SIZE); - - rbuf = tipc_buf_acquire(rmsg_sz); - if (rbuf == NULL) - goto exit; - - rmsg = buf_msg(rbuf); - skb_copy_to_linear_data(rbuf, msg, rmsg_sz); - - if (msg_connected(rmsg)) { - imp = msg_importance(rmsg); - if (imp < TIPC_CRITICAL_IMPORTANCE) - msg_set_importance(rmsg, ++imp); - } - msg_set_non_seq(rmsg, 0); - msg_set_size(rmsg, rmsg_sz); - msg_set_errcode(rmsg, err); - msg_set_prevnode(rmsg, tipc_own_addr); - msg_swap_words(rmsg, 4, 5); - if (!msg_short(rmsg)) - msg_swap_words(rmsg, 6, 7); - - /* send self-abort message when rejecting on a connected port */ - if (msg_connected(msg)) { - struct tipc_port *p_ptr = tipc_port_lock(msg_destport(msg)); - - if (p_ptr) { - struct sk_buff *abuf = NULL; - - if (p_ptr->connected) - abuf = port_build_self_abort_msg(p_ptr, err); - tipc_port_unlock(p_ptr); - tipc_net_route_msg(abuf); - } - } - - /* send returned message & dispose of rejected message */ - src_node = msg_prevnode(msg); - if (in_own_node(src_node)) - tipc_sk_rcv(rbuf); - else - tipc_link_xmit(rbuf, src_node, msg_link_selector(rmsg)); -exit: - kfree_skb(buf); - return data_sz; -} - -int tipc_port_iovec_reject(struct tipc_port *p_ptr, struct tipc_msg *hdr, - struct iovec const *msg_sect, unsigned int len, - int err) -{ - struct sk_buff *buf; - int res; - - res = tipc_msg_build(hdr, msg_sect, len, MAX_MSG_SIZE, &buf); - if (!buf) - return res; - - return tipc_reject_msg(buf, err); -} - static void port_timeout(unsigned long ref) { struct tipc_port *p_ptr = tipc_port_lock(ref); @@ -698,7 +613,7 @@ int __tipc_port_connect(u32 ref, struct tipc_port *p_ptr, (net_ev_handler)port_handle_node_down); res = 0; exit: - p_ptr->max_pkt = tipc_link_get_max_pkt(peer->node, ref); + p_ptr->max_pkt = tipc_node_get_mtu(peer->node, ref); return res; } @@ -753,56 +668,3 @@ int tipc_port_shutdown(u32 ref) tipc_net_route_msg(buf); return tipc_port_disconnect(ref); } - -/* - * tipc_port_iovec_rcv: Concatenate and deliver sectioned - * message for this node. - */ -static int tipc_port_iovec_rcv(struct tipc_port *sender, - struct iovec const *msg_sect, - unsigned int len) -{ - struct sk_buff *buf; - int res; - - res = tipc_msg_build(&sender->phdr, msg_sect, len, MAX_MSG_SIZE, &buf); - if (likely(buf)) - tipc_sk_rcv(buf); - return res; -} - -/** - * tipc_send - send message sections on connection - */ -int tipc_send(struct tipc_port *p_ptr, - struct iovec const *msg_sect, - unsigned int len) -{ - u32 destnode; - int res; - - if (!p_ptr->connected) - return -EINVAL; - - p_ptr->congested = 1; - if (!tipc_port_congested(p_ptr)) { - destnode = tipc_port_peernode(p_ptr); - if (likely(!in_own_node(destnode))) - res = tipc_link_iovec_xmit_fast(p_ptr, msg_sect, len, - destnode); - else - res = tipc_port_iovec_rcv(p_ptr, msg_sect, len); - - if (likely(res != -ELINKCONG)) { - p_ptr->congested = 0; - if (res > 0) - p_ptr->sent++; - return res; - } - } - if (tipc_port_unreliable(p_ptr)) { - p_ptr->congested = 0; - return len; - } - return -ELINKCONG; -} diff --git a/net/tipc/port.h b/net/tipc/port.h index e566d55e2655..231d9488189c 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -104,8 +104,6 @@ struct tipc_port_list; u32 tipc_port_init(struct tipc_port *p_ptr, const unsigned int importance); -int tipc_reject_msg(struct sk_buff *buf, u32 err); - void tipc_acknowledge(u32 port_ref, u32 ack); void tipc_port_destroy(struct tipc_port *p_ptr); @@ -136,21 +134,11 @@ int tipc_port_peer_msg(struct tipc_port *p_ptr, struct tipc_msg *msg); * TIPC messaging routines */ -int tipc_send(struct tipc_port *port, - struct iovec const *msg_sect, - unsigned int len); - int tipc_port_mcast_xmit(struct tipc_port *port, struct tipc_name_seq const *seq, struct iovec const *msg, unsigned int len); -int tipc_port_iovec_reject(struct tipc_port *p_ptr, - struct tipc_msg *hdr, - struct iovec const *msg_sect, - unsigned int len, - int err); - struct sk_buff *tipc_port_get_ports(void); void tipc_port_proto_rcv(struct sk_buff *buf); void tipc_port_mcast_rcv(struct sk_buff *buf, struct tipc_port_list *dp); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 5690751cbfa5..bfe79bbd83a1 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -206,6 +206,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, sk->sk_data_ready = tipc_data_ready; sk->sk_write_space = tipc_write_space; tsk->conn_timeout = CONN_TIMEOUT_DEFAULT; + tsk->port.sent = 0; atomic_set(&tsk->dupl_rcvcnt, 0); tipc_port_unlock(port); @@ -784,30 +785,40 @@ static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p) } /** - * tipc_send_packet - send a connection-oriented message - * @iocb: if NULL, indicates that socket lock is already held + * tipc_send_stream - send stream-oriented data + * @iocb: (unused) * @sock: socket structure - * @m: message to send - * @total_len: length of message + * @m: data to send + * @dsz: total length of data to be transmitted * - * Used for SOCK_SEQPACKET messages and SOCK_STREAM data. + * Used for SOCK_STREAM data. * - * Returns the number of bytes sent on success, or errno otherwise + * Returns the number of bytes sent on success (or partial success), + * or errno if no data sent */ -static int tipc_send_packet(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len) +static int tipc_send_stream(struct kiocb *iocb, struct socket *sock, + struct msghdr *m, size_t dsz) { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_port *port = &tsk->port; + struct tipc_msg *mhdr = &port->phdr; + struct sk_buff *buf; DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); - int res = -EINVAL; + u32 ref = port->ref; + int rc = -EINVAL; long timeo; + u32 dnode; + uint mtu, send, sent = 0; /* Handle implied connection establishment */ - if (unlikely(dest)) - return tipc_sendmsg(iocb, sock, m, total_len); - - if (total_len > TIPC_MAX_USER_MSG_SIZE) + if (unlikely(dest)) { + rc = tipc_sendmsg(iocb, sock, m, dsz); + if (dsz && (dsz == rc)) + tsk->port.sent = 1; + return rc; + } + if (dsz > (uint)INT_MAX) return -EMSGSIZE; if (iocb) @@ -815,123 +826,68 @@ static int tipc_send_packet(struct kiocb *iocb, struct socket *sock, if (unlikely(sock->state != SS_CONNECTED)) { if (sock->state == SS_DISCONNECTING) - res = -EPIPE; + rc = -EPIPE; else - res = -ENOTCONN; + rc = -ENOTCONN; goto exit; } timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); + dnode = tipc_port_peernode(port); + port->congested = 1; + +next: + mtu = port->max_pkt; + send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE); + rc = tipc_msg_build2(mhdr, m->msg_iov, sent, send, mtu, &buf); + if (unlikely(rc < 0)) + goto exit; do { - res = tipc_send(&tsk->port, m->msg_iov, total_len); - if (likely(res != -ELINKCONG)) - break; - res = tipc_wait_for_sndpkt(sock, &timeo); - if (res) - break; - } while (1); + port->congested = 1; + if (likely(!tipc_port_congested(port))) { + rc = tipc_link_xmit2(buf, dnode, ref); + if (likely(!rc)) { + port->sent++; + sent += send; + if (sent == dsz) + break; + goto next; + } + if (rc == -EMSGSIZE) { + port->max_pkt = tipc_node_get_mtu(dnode, ref); + goto next; + } + if (rc != -ELINKCONG) + break; + } + rc = tipc_wait_for_sndpkt(sock, &timeo); + } while (!rc); + + port->congested = 0; exit: if (iocb) release_sock(sk); - return res; + return sent ? sent : rc; } /** - * tipc_send_stream - send stream-oriented data - * @iocb: (unused) + * tipc_send_packet - send a connection-oriented message + * @iocb: if NULL, indicates that socket lock is already held * @sock: socket structure - * @m: data to send - * @total_len: total length of data to be sent + * @m: message to send + * @dsz: length of data to be transmitted * - * Used for SOCK_STREAM data. + * Used for SOCK_SEQPACKET messages. * - * Returns the number of bytes sent on success (or partial success), - * or errno if no data sent + * Returns the number of bytes sent on success, or errno otherwise */ -static int tipc_send_stream(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len) +static int tipc_send_packet(struct kiocb *iocb, struct socket *sock, + struct msghdr *m, size_t dsz) { - struct sock *sk = sock->sk; - struct tipc_sock *tsk = tipc_sk(sk); - struct msghdr my_msg; - struct iovec my_iov; - struct iovec *curr_iov; - int curr_iovlen; - char __user *curr_start; - u32 hdr_size; - int curr_left; - int bytes_to_send; - int bytes_sent; - int res; - - lock_sock(sk); - - /* Handle special cases where there is no connection */ - if (unlikely(sock->state != SS_CONNECTED)) { - if (sock->state == SS_UNCONNECTED) - res = tipc_send_packet(NULL, sock, m, total_len); - else - res = sock->state == SS_DISCONNECTING ? -EPIPE : -ENOTCONN; - goto exit; - } - - if (unlikely(m->msg_name)) { - res = -EISCONN; - goto exit; - } - - if (total_len > (unsigned int)INT_MAX) { - res = -EMSGSIZE; - goto exit; - } - - /* - * Send each iovec entry using one or more messages - * - * Note: This algorithm is good for the most likely case - * (i.e. one large iovec entry), but could be improved to pass sets - * of small iovec entries into send_packet(). - */ - curr_iov = m->msg_iov; - curr_iovlen = m->msg_iovlen; - my_msg.msg_iov = &my_iov; - my_msg.msg_iovlen = 1; - my_msg.msg_flags = m->msg_flags; - my_msg.msg_name = NULL; - bytes_sent = 0; - - hdr_size = msg_hdr_sz(&tsk->port.phdr); - - while (curr_iovlen--) { - curr_start = curr_iov->iov_base; - curr_left = curr_iov->iov_len; - - while (curr_left) { - bytes_to_send = tsk->port.max_pkt - hdr_size; - if (bytes_to_send > TIPC_MAX_USER_MSG_SIZE) - bytes_to_send = TIPC_MAX_USER_MSG_SIZE; - if (curr_left < bytes_to_send) - bytes_to_send = curr_left; - my_iov.iov_base = curr_start; - my_iov.iov_len = bytes_to_send; - res = tipc_send_packet(NULL, sock, &my_msg, - bytes_to_send); - if (res < 0) { - if (bytes_sent) - res = bytes_sent; - goto exit; - } - curr_left -= bytes_to_send; - curr_start += bytes_to_send; - bytes_sent += bytes_to_send; - } + if (dsz > TIPC_MAX_USER_MSG_SIZE) + return -EMSGSIZE; - curr_iov++; - } - res = bytes_sent; -exit: - release_sock(sk); - return res; + return tipc_send_stream(iocb, sock, m, dsz); } /** -- cgit v1.2.3 From b786e2b0faea1265d72533d59ec4482f764ad60f Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 25 Jun 2014 20:41:39 -0500 Subject: tipc: let port protocol senders use new link send function Several functions in port.c, related to the port protocol and connection shutdown, need to send messages. We now convert them to use the new link send function. Signed-off-by: Jon Maloy Reviewed-by: Erik Hugne Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/port.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/port.c b/net/tipc/port.c index 606ff1a78e2b..60aede075b52 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -236,6 +236,8 @@ u32 tipc_port_init(struct tipc_port *p_ptr, void tipc_port_destroy(struct tipc_port *p_ptr) { struct sk_buff *buf = NULL; + struct tipc_msg *msg = NULL; + u32 peer; tipc_withdraw(p_ptr, 0, NULL); @@ -247,14 +249,15 @@ void tipc_port_destroy(struct tipc_port *p_ptr) if (p_ptr->connected) { buf = port_build_peer_abort_msg(p_ptr, TIPC_ERR_NO_PORT); tipc_nodesub_unsubscribe(&p_ptr->subscription); + msg = buf_msg(buf); + peer = msg_destnode(msg); + tipc_link_xmit2(buf, peer, msg_link_selector(msg)); } - spin_lock_bh(&tipc_port_list_lock); list_del(&p_ptr->port_list); list_del(&p_ptr->wait_list); spin_unlock_bh(&tipc_port_list_lock); k_term_timer(&p_ptr->timer); - tipc_net_route_msg(buf); } /* @@ -276,6 +279,7 @@ static struct sk_buff *port_build_proto_msg(struct tipc_port *p_ptr, msg_set_destport(msg, tipc_port_peerport(p_ptr)); msg_set_origport(msg, p_ptr->ref); msg_set_msgcnt(msg, ack); + buf->next = NULL; } return buf; } @@ -284,6 +288,7 @@ static void port_timeout(unsigned long ref) { struct tipc_port *p_ptr = tipc_port_lock(ref); struct sk_buff *buf = NULL; + struct tipc_msg *msg = NULL; if (!p_ptr) return; @@ -302,7 +307,8 @@ static void port_timeout(unsigned long ref) k_start_timer(&p_ptr->timer, p_ptr->probing_interval); } tipc_port_unlock(p_ptr); - tipc_net_route_msg(buf); + msg = buf_msg(buf); + tipc_link_xmit2(buf, msg_destnode(msg), msg_link_selector(msg)); } @@ -310,12 +316,14 @@ static void port_handle_node_down(unsigned long ref) { struct tipc_port *p_ptr = tipc_port_lock(ref); struct sk_buff *buf = NULL; + struct tipc_msg *msg = NULL; if (!p_ptr) return; buf = port_build_self_abort_msg(p_ptr, TIPC_ERR_NO_NODE); tipc_port_unlock(p_ptr); - tipc_net_route_msg(buf); + msg = buf_msg(buf); + tipc_link_xmit2(buf, msg_destnode(msg), msg_link_selector(msg)); } @@ -327,6 +335,7 @@ static struct sk_buff *port_build_self_abort_msg(struct tipc_port *p_ptr, u32 er struct tipc_msg *msg = buf_msg(buf); msg_swap_words(msg, 4, 5); msg_swap_words(msg, 6, 7); + buf->next = NULL; } return buf; } @@ -351,6 +360,7 @@ static struct sk_buff *port_build_peer_abort_msg(struct tipc_port *p_ptr, u32 er if (imp < TIPC_CRITICAL_IMPORTANCE) msg_set_importance(msg, ++imp); msg_set_errcode(msg, err); + buf->next = NULL; } return buf; } @@ -401,7 +411,7 @@ void tipc_port_proto_rcv(struct sk_buff *buf) p_ptr->probing_state = CONFIRMED; tipc_port_unlock(p_ptr); exit: - tipc_net_route_msg(r_buf); + tipc_link_xmit2(r_buf, msg_destnode(msg), msg_link_selector(msg)); kfree_skb(buf); } @@ -496,6 +506,7 @@ void tipc_acknowledge(u32 ref, u32 ack) { struct tipc_port *p_ptr; struct sk_buff *buf = NULL; + struct tipc_msg *msg; p_ptr = tipc_port_lock(ref); if (!p_ptr) @@ -505,7 +516,10 @@ void tipc_acknowledge(u32 ref, u32 ack) buf = port_build_proto_msg(p_ptr, CONN_ACK, ack); } tipc_port_unlock(p_ptr); - tipc_net_route_msg(buf); + if (!buf) + return; + msg = buf_msg(buf); + tipc_link_xmit2(buf, msg_destnode(msg), msg_link_selector(msg)); } int tipc_publish(struct tipc_port *p_ptr, unsigned int scope, @@ -656,6 +670,7 @@ int tipc_port_disconnect(u32 ref) */ int tipc_port_shutdown(u32 ref) { + struct tipc_msg *msg; struct tipc_port *p_ptr; struct sk_buff *buf = NULL; @@ -665,6 +680,7 @@ int tipc_port_shutdown(u32 ref) buf = port_build_peer_abort_msg(p_ptr, TIPC_CONN_SHUTDOWN); tipc_port_unlock(p_ptr); - tipc_net_route_msg(buf); + msg = buf_msg(buf); + tipc_link_xmit2(buf, msg_destnode(msg), msg_link_selector(msg)); return tipc_port_disconnect(ref); } -- cgit v1.2.3 From ec8a2e5621db2da24badb3969eda7fd359e1869f Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 25 Jun 2014 20:41:40 -0500 Subject: tipc: same receive code path for connection protocol and data messages As a preparation to eliminate port_lock we need to bring reception of connection protocol messages under proper protection of bh_lock_sock or socket owner. We fix this by letting those messages follow the same code path as incoming data messages. As a side effect of this change, the last reference to the function net_route_msg() disappears, and we can eliminate that function. Signed-off-by: Jon Maloy Reviewed-by: Erik Hugne Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/link.c | 18 ++++++++++++------ net/tipc/net.c | 40 ---------------------------------------- net/tipc/net.h | 2 -- net/tipc/port.c | 7 +------ net/tipc/port.h | 2 +- net/tipc/socket.c | 5 +++++ 6 files changed, 19 insertions(+), 55 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/link.c b/net/tipc/link.c index 93a8033263c0..96a8072f73cc 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1479,6 +1479,7 @@ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr) case TIPC_MEDIUM_IMPORTANCE: case TIPC_HIGH_IMPORTANCE: case TIPC_CRITICAL_IMPORTANCE: + case CONN_MANAGER: tipc_node_unlock(n_ptr); tipc_sk_rcv(buf); continue; @@ -1493,10 +1494,6 @@ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr) tipc_node_unlock(n_ptr); tipc_named_rcv(buf); continue; - case CONN_MANAGER: - tipc_node_unlock(n_ptr); - tipc_port_proto_rcv(buf); - continue; case BCAST_PROTOCOL: tipc_link_sync_rcv(n_ptr, buf); break; @@ -2106,6 +2103,7 @@ void tipc_link_bundle_rcv(struct sk_buff *buf) u32 msgcount = msg_msgcnt(buf_msg(buf)); u32 pos = INT_H_SIZE; struct sk_buff *obuf; + struct tipc_msg *omsg; while (msgcount--) { obuf = buf_extract(buf, pos); @@ -2113,8 +2111,16 @@ void tipc_link_bundle_rcv(struct sk_buff *buf) pr_warn("Link unable to unbundle message(s)\n"); break; } - pos += align(msg_size(buf_msg(obuf))); - tipc_net_route_msg(obuf); + omsg = buf_msg(obuf); + pos += align(msg_size(omsg)); + if (msg_isdata(omsg) || (msg_user(omsg) == CONN_MANAGER)) { + tipc_sk_rcv(obuf); + } else if (msg_user(omsg) == NAME_DISTRIBUTOR) { + tipc_named_rcv(obuf); + } else { + pr_warn("Illegal bundled msg: %u\n", msg_user(omsg)); + kfree_skb(obuf); + } } kfree_skb(buf); } diff --git a/net/tipc/net.c b/net/tipc/net.c index 5f7d6ffb5465..7fcc94998fea 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -103,46 +103,6 @@ * This is always used within the scope of a tipc_nametbl_lock(read). * - A local spin_lock protecting the queue of subscriber events. */ -void tipc_net_route_msg(struct sk_buff *buf) -{ - struct tipc_msg *msg; - u32 dnode; - - if (!buf) - return; - msg = buf_msg(buf); - - /* Handle message for this node */ - dnode = msg_short(msg) ? tipc_own_addr : msg_destnode(msg); - if (tipc_in_scope(dnode, tipc_own_addr)) { - if (msg_isdata(msg)) { - if (msg_mcast(msg)) - tipc_port_mcast_rcv(buf, NULL); - else if (msg_destport(msg)) { - tipc_sk_rcv(buf); - } else { - pr_warn("Cannot route msg; no destination\n"); - kfree_skb(buf); - } - return; - } - switch (msg_user(msg)) { - case NAME_DISTRIBUTOR: - tipc_named_rcv(buf); - break; - case CONN_MANAGER: - tipc_port_proto_rcv(buf); - break; - default: - kfree_skb(buf); - } - return; - } - - /* Handle message for another node */ - skb_trim(buf, msg_size(msg)); - tipc_link_xmit(buf, dnode, msg_link_selector(msg)); -} int tipc_net_start(u32 addr) { diff --git a/net/tipc/net.h b/net/tipc/net.h index c6c2b46f7c28..59ef3388be2c 100644 --- a/net/tipc/net.h +++ b/net/tipc/net.h @@ -37,8 +37,6 @@ #ifndef _TIPC_NET_H #define _TIPC_NET_H -void tipc_net_route_msg(struct sk_buff *buf); - int tipc_net_start(u32 addr); void tipc_net_stop(void); diff --git a/net/tipc/port.c b/net/tipc/port.c index 60aede075b52..dcc630948e04 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -365,16 +365,14 @@ static struct sk_buff *port_build_peer_abort_msg(struct tipc_port *p_ptr, u32 er return buf; } -void tipc_port_proto_rcv(struct sk_buff *buf) +void tipc_port_proto_rcv(struct tipc_port *p_ptr, struct sk_buff *buf) { struct tipc_msg *msg = buf_msg(buf); - struct tipc_port *p_ptr; struct sk_buff *r_buf = NULL; u32 destport = msg_destport(msg); int wakeable; /* Validate connection */ - p_ptr = tipc_port_lock(destport); if (!p_ptr || !p_ptr->connected || !tipc_port_peer_msg(p_ptr, msg)) { r_buf = tipc_buf_acquire(BASIC_H_SIZE); if (r_buf) { @@ -385,8 +383,6 @@ void tipc_port_proto_rcv(struct sk_buff *buf) msg_set_origport(msg, destport); msg_set_destport(msg, msg_origport(msg)); } - if (p_ptr) - tipc_port_unlock(p_ptr); goto exit; } @@ -409,7 +405,6 @@ void tipc_port_proto_rcv(struct sk_buff *buf) break; } p_ptr->probing_state = CONFIRMED; - tipc_port_unlock(p_ptr); exit: tipc_link_xmit2(r_buf, msg_destnode(msg), msg_link_selector(msg)); kfree_skb(buf); diff --git a/net/tipc/port.h b/net/tipc/port.h index 231d9488189c..3f28fd420ff9 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -140,7 +140,7 @@ int tipc_port_mcast_xmit(struct tipc_port *port, unsigned int len); struct sk_buff *tipc_port_get_ports(void); -void tipc_port_proto_rcv(struct sk_buff *buf); +void tipc_port_proto_rcv(struct tipc_port *port, struct sk_buff *buf); void tipc_port_mcast_rcv(struct sk_buff *buf, struct tipc_port_list *dp); void tipc_port_reinit(void); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index bfe79bbd83a1..d838a4b66d3a 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1416,6 +1416,11 @@ static int filter_rcv(struct sock *sk, struct sk_buff *buf) unsigned int limit = rcvbuf_limit(sk, buf); int rc = TIPC_OK; + if (unlikely(msg_user(msg) == CONN_MANAGER)) { + tipc_port_proto_rcv(&tsk->port, buf); + return TIPC_OK; + } + /* Reject message if it is wrong sort of message for socket */ if (msg_type(msg) > TIPC_DIRECT_MSG) return -TIPC_ERR_NO_PORT; -- cgit v1.2.3 From ac0074ee70ddb32f62d918b31cb20e3c947c75a1 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 25 Jun 2014 20:41:41 -0500 Subject: tipc: clean up connection protocol reception function We simplify the code for receiving connection probes, leveraging the recently introduced tipc_msg_reverse() function. We also stick to the principle of sending a possible response message directly from the calling (tipc_sk_rcv or backlog_rcv) functions, hence making the call chain shallower and easier to follow. We make one small protocol change here, allowed according to the spec. If a protocol message arrives from a remote socket that is not the one we are connected to, we are currently generating a connection abort message and send it to the source. This behavior is unnecessary, and might even be a security risk, so instead we now choose to only ignore the message. The consequnce for the sender is that he will need longer time to discover his mistake (until the next timeout), but this is an extreme corner case, and may happen anyway under other circumstances, so we deem this change acceptable. Signed-off-by: Jon Maloy Reviewed-by: Erik Hugne Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/msg.c | 10 ++++++--- net/tipc/port.c | 53 +++------------------------------------------ net/tipc/port.h | 1 - net/tipc/socket.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++-------- net/tipc/socket.h | 3 +++ 5 files changed, 68 insertions(+), 63 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 7bfc4422bf2c..6ec958401f78 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -348,13 +348,17 @@ bool tipc_msg_reverse(struct sk_buff *buf, u32 *dnode, int err) struct tipc_msg ohdr; uint rdsz = min_t(uint, msg_data_sz(msg), MAX_FORWARD_SIZE); - if (skb_linearize(buf) || !msg_isdata(msg)) + if (skb_linearize(buf)) + goto exit; + if (msg_dest_droppable(msg)) goto exit; - if (msg_dest_droppable(msg) || msg_errcode(msg)) + if (msg_errcode(msg)) goto exit; memcpy(&ohdr, msg, msg_hdr_sz(msg)); - msg_set_importance(msg, min_t(uint, ++imp, TIPC_CRITICAL_IMPORTANCE)); + imp = min_t(uint, imp + 1, TIPC_CRITICAL_IMPORTANCE); + if (msg_isdata(msg)) + msg_set_importance(msg, imp); msg_set_errcode(msg, err); msg_set_origport(msg, msg_destport(&ohdr)); msg_set_destport(msg, msg_origport(&ohdr)); diff --git a/net/tipc/port.c b/net/tipc/port.c index dcc630948e04..9f53d5ac35e1 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -42,8 +42,6 @@ /* Connection management: */ #define PROBING_INTERVAL 3600000 /* [ms] => 1 h */ -#define CONFIRMED 0 -#define PROBING 1 #define MAX_REJECT_SIZE 1024 @@ -299,11 +297,11 @@ static void port_timeout(unsigned long ref) } /* Last probe answered ? */ - if (p_ptr->probing_state == PROBING) { + if (p_ptr->probing_state == TIPC_CONN_PROBING) { buf = port_build_self_abort_msg(p_ptr, TIPC_ERR_NO_PORT); } else { buf = port_build_proto_msg(p_ptr, CONN_PROBE, 0); - p_ptr->probing_state = PROBING; + p_ptr->probing_state = TIPC_CONN_PROBING; k_start_timer(&p_ptr->timer, p_ptr->probing_interval); } tipc_port_unlock(p_ptr); @@ -365,51 +363,6 @@ static struct sk_buff *port_build_peer_abort_msg(struct tipc_port *p_ptr, u32 er return buf; } -void tipc_port_proto_rcv(struct tipc_port *p_ptr, struct sk_buff *buf) -{ - struct tipc_msg *msg = buf_msg(buf); - struct sk_buff *r_buf = NULL; - u32 destport = msg_destport(msg); - int wakeable; - - /* Validate connection */ - if (!p_ptr || !p_ptr->connected || !tipc_port_peer_msg(p_ptr, msg)) { - r_buf = tipc_buf_acquire(BASIC_H_SIZE); - if (r_buf) { - msg = buf_msg(r_buf); - tipc_msg_init(msg, TIPC_HIGH_IMPORTANCE, TIPC_CONN_MSG, - BASIC_H_SIZE, msg_orignode(msg)); - msg_set_errcode(msg, TIPC_ERR_NO_PORT); - msg_set_origport(msg, destport); - msg_set_destport(msg, msg_origport(msg)); - } - goto exit; - } - - /* Process protocol message sent by peer */ - switch (msg_type(msg)) { - case CONN_ACK: - wakeable = tipc_port_congested(p_ptr) && p_ptr->congested; - p_ptr->acked += msg_msgcnt(msg); - if (!tipc_port_congested(p_ptr)) { - p_ptr->congested = 0; - if (wakeable) - tipc_port_wakeup(p_ptr); - } - break; - case CONN_PROBE: - r_buf = port_build_proto_msg(p_ptr, CONN_PROBE_REPLY, 0); - break; - default: - /* CONN_PROBE_REPLY or unrecognized - no action required */ - break; - } - p_ptr->probing_state = CONFIRMED; -exit: - tipc_link_xmit2(r_buf, msg_destnode(msg), msg_link_selector(msg)); - kfree_skb(buf); -} - static int port_print(struct tipc_port *p_ptr, char *buf, int len, int full_id) { struct publication *publ; @@ -613,7 +566,7 @@ int __tipc_port_connect(u32 ref, struct tipc_port *p_ptr, msg_set_hdr_sz(msg, SHORT_H_SIZE); p_ptr->probing_interval = PROBING_INTERVAL; - p_ptr->probing_state = CONFIRMED; + p_ptr->probing_state = TIPC_CONN_OK; p_ptr->connected = 1; k_start_timer(&p_ptr->timer, p_ptr->probing_interval); diff --git a/net/tipc/port.h b/net/tipc/port.h index 3f28fd420ff9..3a4f808135c3 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -140,7 +140,6 @@ int tipc_port_mcast_xmit(struct tipc_port *port, unsigned int len); struct sk_buff *tipc_port_get_ports(void); -void tipc_port_proto_rcv(struct tipc_port *port, struct sk_buff *buf); void tipc_port_mcast_rcv(struct sk_buff *buf, struct tipc_port_list *dp); void tipc_port_reinit(void); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index d838a4b66d3a..1762323156af 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -46,6 +46,7 @@ #define SS_READY -2 /* socket is connectionless */ #define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ +#define TIPC_FWD_MSG 1 static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb); static void tipc_data_ready(struct sock *sk); @@ -533,6 +534,46 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, return mask; } +/** + * tipc_sk_proto_rcv - receive a connection mng protocol message + * @tsk: receiving socket + * @dnode: node to send response message to, if any + * @buf: buffer containing protocol message + * Returns 0 (TIPC_OK) if message was consumed, 1 (TIPC_FWD_MSG) if + * (CONN_PROBE_REPLY) message should be forwarded. + */ +int tipc_sk_proto_rcv(struct tipc_sock *tsk, u32 *dnode, struct sk_buff *buf) +{ + struct tipc_msg *msg = buf_msg(buf); + struct tipc_port *port = &tsk->port; + int wakeable; + + /* Ignore if connection cannot be validated: */ + if (!port->connected || !tipc_port_peer_msg(port, msg)) + goto exit; + + port->probing_state = TIPC_CONN_OK; + + if (msg_type(msg) == CONN_ACK) { + wakeable = tipc_port_congested(port) && port->congested; + port->acked += msg_msgcnt(msg); + if (!tipc_port_congested(port)) { + port->congested = 0; + if (wakeable) + tipc_port_wakeup(port); + } + } else if (msg_type(msg) == CONN_PROBE) { + if (!tipc_msg_reverse(buf, dnode, TIPC_OK)) + return TIPC_OK; + msg_set_type(msg, CONN_PROBE_REPLY); + return TIPC_FWD_MSG; + } + /* Do nothing if msg_type() == CONN_PROBE_REPLY */ +exit: + kfree_skb(buf); + return TIPC_OK; +} + /** * dest_name_check - verify user is permitted to send to specified port name * @dest: destination address @@ -1406,7 +1447,7 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf) * Called with socket lock already taken; port lock may also be taken. * * Returns 0 (TIPC_OK) if message was consumed, -TIPC error code if message - * to be rejected. + * to be rejected, 1 (TIPC_FWD_MSG) if (CONN_MANAGER) message to be forwarded */ static int filter_rcv(struct sock *sk, struct sk_buff *buf) { @@ -1414,12 +1455,11 @@ static int filter_rcv(struct sock *sk, struct sk_buff *buf) struct tipc_sock *tsk = tipc_sk(sk); struct tipc_msg *msg = buf_msg(buf); unsigned int limit = rcvbuf_limit(sk, buf); + u32 onode; int rc = TIPC_OK; - if (unlikely(msg_user(msg) == CONN_MANAGER)) { - tipc_port_proto_rcv(&tsk->port, buf); - return TIPC_OK; - } + if (unlikely(msg_user(msg) == CONN_MANAGER)) + return tipc_sk_proto_rcv(tsk, &onode, buf); /* Reject message if it is wrong sort of message for socket */ if (msg_type(msg) > TIPC_DIRECT_MSG) @@ -1465,10 +1505,16 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *buf) rc = filter_rcv(sk, buf); - if (unlikely(rc && tipc_msg_reverse(buf, &onode, -rc))) - tipc_link_xmit2(buf, onode, 0); - else if (atomic_read(&tsk->dupl_rcvcnt) < TIPC_CONN_OVERLOAD_LIMIT) - atomic_add(truesize, &tsk->dupl_rcvcnt); + if (likely(!rc)) { + if (atomic_read(&tsk->dupl_rcvcnt) < TIPC_CONN_OVERLOAD_LIMIT) + atomic_add(truesize, &tsk->dupl_rcvcnt); + return 0; + } + + if ((rc < 0) && !tipc_msg_reverse(buf, &onode, -rc)) + return 0; + + tipc_link_xmit2(buf, onode, 0); return 0; } diff --git a/net/tipc/socket.h b/net/tipc/socket.h index 3afcd2a70b31..69fd06bce68a 100644 --- a/net/tipc/socket.h +++ b/net/tipc/socket.h @@ -38,6 +38,9 @@ #include "port.h" #include +#define TIPC_CONN_OK 0 +#define TIPC_CONN_PROBING 1 + /** * struct tipc_sock - TIPC socket structure * @sk: socket - interacts with 'port' and with user via the socket API -- cgit v1.2.3 From 60120526c26f42fd658e32bf4a6d548483d09da8 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 25 Jun 2014 20:41:42 -0500 Subject: tipc: simplify connection congestion handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As a consequence of the recently introduced serialized access to the socket in commit 8d94168a761819d10252bab1f8de6d7b202c3baa ("tipc: same receive code path for connection protocol and data messages") we can make a number of simplifications in the detection and handling of connection congestion situations. - We don't need to keep two counters, one for sent messages and one for acked messages. There is no longer any risk for races between acknowledge messages arriving in BH and data message sending running in user context. So we merge this into one counter, 'sent_unacked', which is incremented at sending and subtracted from at acknowledge reception. - We don't need to set the 'congested' field in tipc_port to true before we sent the message, and clear it when sending is successful. (As a matter of fact, it was never necessary; the field was set in link_schedule_port() before any wakeup could arrive anyway.) - We keep the conditions for link congestion and connection connection congestion separated. There would otherwise be a risk that an arriving acknowledge message may wake up a user sleeping because of link congestion. - We can simplify reception of acknowledge messages. We also make some cosmetic/structural changes: - We rename the 'congested' field to the more correct 'link_cong´. - We rename 'conn_unacked' to 'rcv_unacked' - We move the above mentioned fields from struct tipc_port to struct tipc_sock. Signed-off-by: Jon Maloy Reviewed-by: Erik Hugne Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/link.c | 10 +++++++--- net/tipc/port.c | 12 ++---------- net/tipc/port.h | 16 ---------------- net/tipc/socket.c | 48 +++++++++++++++++++++++------------------------- net/tipc/socket.h | 11 +++++++++++ 5 files changed, 43 insertions(+), 54 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/link.c b/net/tipc/link.c index 96a8072f73cc..a081e7d08d22 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -332,13 +332,15 @@ void tipc_link_delete_list(unsigned int bearer_id, bool shutting_down) static int link_schedule_port(struct tipc_link *l_ptr, u32 origport, u32 sz) { struct tipc_port *p_ptr; + struct tipc_sock *tsk; spin_lock_bh(&tipc_port_list_lock); p_ptr = tipc_port_lock(origport); if (p_ptr) { if (!list_empty(&p_ptr->wait_list)) goto exit; - p_ptr->congested = 1; + tsk = tipc_port_to_sock(p_ptr); + tsk->link_cong = 1; p_ptr->waiting_pkts = 1 + ((sz - 1) / l_ptr->max_pkt); list_add_tail(&p_ptr->wait_list, &l_ptr->waiting_ports); l_ptr->stats.link_congs++; @@ -352,6 +354,7 @@ exit: void tipc_link_wakeup_ports(struct tipc_link *l_ptr, int all) { struct tipc_port *p_ptr; + struct tipc_sock *tsk; struct tipc_port *temp_p_ptr; int win = l_ptr->queue_limit[0] - l_ptr->out_queue_size; @@ -367,10 +370,11 @@ void tipc_link_wakeup_ports(struct tipc_link *l_ptr, int all) wait_list) { if (win <= 0) break; + tsk = tipc_port_to_sock(p_ptr); list_del_init(&p_ptr->wait_list); spin_lock_bh(p_ptr->lock); - p_ptr->congested = 0; - tipc_port_wakeup(p_ptr); + tsk->link_cong = 0; + tipc_sock_wakeup(tsk); win -= p_ptr->waiting_pkts; spin_unlock_bh(p_ptr->lock); } diff --git a/net/tipc/port.c b/net/tipc/port.c index 9f53d5ac35e1..0d09dcb6da18 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -186,12 +186,6 @@ exit: tipc_port_list_free(dp); } - -void tipc_port_wakeup(struct tipc_port *port) -{ - tipc_sock_wakeup(tipc_port_to_sock(port)); -} - /* tipc_port_init - intiate TIPC port and lock it * * Returns obtained reference if initialization is successful, zero otherwise @@ -209,7 +203,6 @@ u32 tipc_port_init(struct tipc_port *p_ptr, } p_ptr->max_pkt = MAX_PKT_DEFAULT; - p_ptr->sent = 1; p_ptr->ref = ref; INIT_LIST_HEAD(&p_ptr->wait_list); INIT_LIST_HEAD(&p_ptr->subscription.nodesub_list); @@ -459,10 +452,9 @@ void tipc_acknowledge(u32 ref, u32 ack) p_ptr = tipc_port_lock(ref); if (!p_ptr) return; - if (p_ptr->connected) { - p_ptr->conn_unacked -= ack; + if (p_ptr->connected) buf = port_build_proto_msg(p_ptr, CONN_ACK, ack); - } + tipc_port_unlock(p_ptr); if (!buf) return; diff --git a/net/tipc/port.h b/net/tipc/port.h index 3a4f808135c3..0e47052daa29 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -53,17 +53,13 @@ * @connected: non-zero if port is currently connected to a peer port * @conn_type: TIPC type used when connection was established * @conn_instance: TIPC instance used when connection was established - * @conn_unacked: number of unacknowledged messages received from peer port * @published: non-zero if port has one or more associated names - * @congested: non-zero if cannot send because of link or port congestion * @max_pkt: maximum packet size "hint" used when building messages sent by port * @ref: unique reference to port in TIPC object registry * @phdr: preformatted message header used when sending messages * @port_list: adjacent ports in TIPC's global list of ports * @wait_list: adjacent ports in list of ports waiting on link congestion * @waiting_pkts: - * @sent: # of non-empty messages sent by port - * @acked: # of non-empty message acknowledgements from connected port's peer * @publications: list of publications for port * @pub_count: total # of publications port has made during its lifetime * @probing_state: @@ -76,17 +72,13 @@ struct tipc_port { int connected; u32 conn_type; u32 conn_instance; - u32 conn_unacked; int published; - u32 congested; u32 max_pkt; u32 ref; struct tipc_msg phdr; struct list_head port_list; struct list_head wait_list; u32 waiting_pkts; - u32 sent; - u32 acked; struct list_head publications; u32 pub_count; u32 probing_state; @@ -120,8 +112,6 @@ int tipc_port_disconnect(u32 portref); int tipc_port_shutdown(u32 ref); -void tipc_port_wakeup(struct tipc_port *port); - /* * The following routines require that the port be locked on entry */ @@ -161,12 +151,6 @@ static inline void tipc_port_unlock(struct tipc_port *p_ptr) spin_unlock_bh(p_ptr->lock); } -static inline int tipc_port_congested(struct tipc_port *p_ptr) -{ - return ((p_ptr->sent - p_ptr->acked) >= TIPC_FLOWCTRL_WIN); -} - - static inline u32 tipc_port_peernode(struct tipc_port *p_ptr) { return msg_destnode(&p_ptr->phdr); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 1762323156af..ede78b144dcf 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -207,7 +207,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, sk->sk_data_ready = tipc_data_ready; sk->sk_write_space = tipc_write_space; tsk->conn_timeout = CONN_TIMEOUT_DEFAULT; - tsk->port.sent = 0; + tsk->sent_unacked = 0; atomic_set(&tsk->dupl_rcvcnt, 0); tipc_port_unlock(port); @@ -513,12 +513,12 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, switch ((int)sock->state) { case SS_UNCONNECTED: - if (!tsk->port.congested) + if (!tsk->link_cong) mask |= POLLOUT; break; case SS_READY: case SS_CONNECTED: - if (!tsk->port.congested) + if (!tsk->link_cong && !tipc_sk_conn_cong(tsk)) mask |= POLLOUT; /* fall thru' */ case SS_CONNECTING: @@ -546,7 +546,7 @@ int tipc_sk_proto_rcv(struct tipc_sock *tsk, u32 *dnode, struct sk_buff *buf) { struct tipc_msg *msg = buf_msg(buf); struct tipc_port *port = &tsk->port; - int wakeable; + int conn_cong; /* Ignore if connection cannot be validated: */ if (!port->connected || !tipc_port_peer_msg(port, msg)) @@ -555,13 +555,10 @@ int tipc_sk_proto_rcv(struct tipc_sock *tsk, u32 *dnode, struct sk_buff *buf) port->probing_state = TIPC_CONN_OK; if (msg_type(msg) == CONN_ACK) { - wakeable = tipc_port_congested(port) && port->congested; - port->acked += msg_msgcnt(msg); - if (!tipc_port_congested(port)) { - port->congested = 0; - if (wakeable) - tipc_port_wakeup(port); - } + conn_cong = tipc_sk_conn_cong(tsk); + tsk->sent_unacked -= msg_msgcnt(msg); + if (conn_cong) + tipc_sock_wakeup(tsk); } else if (msg_type(msg) == CONN_PROBE) { if (!tipc_msg_reverse(buf, dnode, TIPC_OK)) return TIPC_OK; @@ -626,7 +623,7 @@ static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p) return sock_intr_errno(*timeo_p); prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - done = sk_wait_event(sk, timeo_p, !tsk->port.congested); + done = sk_wait_event(sk, timeo_p, !tsk->link_cong); finish_wait(sk_sleep(sk), &wait); } while (!done); return 0; @@ -800,7 +797,6 @@ static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p) { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); - struct tipc_port *port = &tsk->port; DEFINE_WAIT(wait); int done; @@ -819,7 +815,9 @@ static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p) prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); done = sk_wait_event(sk, timeo_p, - (!port->congested || !port->connected)); + (!tsk->link_cong && + !tipc_sk_conn_cong(tsk)) || + !tsk->port.connected); finish_wait(sk_sleep(sk), &wait); } while (!done); return 0; @@ -856,7 +854,7 @@ static int tipc_send_stream(struct kiocb *iocb, struct socket *sock, if (unlikely(dest)) { rc = tipc_sendmsg(iocb, sock, m, dsz); if (dsz && (dsz == rc)) - tsk->port.sent = 1; + tsk->sent_unacked = 1; return rc; } if (dsz > (uint)INT_MAX) @@ -875,7 +873,6 @@ static int tipc_send_stream(struct kiocb *iocb, struct socket *sock, timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); dnode = tipc_port_peernode(port); - port->congested = 1; next: mtu = port->max_pkt; @@ -884,11 +881,10 @@ next: if (unlikely(rc < 0)) goto exit; do { - port->congested = 1; - if (likely(!tipc_port_congested(port))) { + if (likely(!tipc_sk_conn_cong(tsk))) { rc = tipc_link_xmit2(buf, dnode, ref); if (likely(!rc)) { - port->sent++; + tsk->sent_unacked++; sent += send; if (sent == dsz) break; @@ -903,8 +899,6 @@ next: } rc = tipc_wait_for_sndpkt(sock, &timeo); } while (!rc); - - port->congested = 0; exit: if (iocb) release_sock(sk); @@ -1169,8 +1163,10 @@ restart: /* Consume received message (optional) */ if (likely(!(flags & MSG_PEEK))) { if ((sock->state != SS_READY) && - (++port->conn_unacked >= TIPC_CONNACK_INTV)) - tipc_acknowledge(port->ref, port->conn_unacked); + (++tsk->rcv_unacked >= TIPC_CONNACK_INTV)) { + tipc_acknowledge(port->ref, tsk->rcv_unacked); + tsk->rcv_unacked = 0; + } advance_rx_queue(sk); } exit: @@ -1278,8 +1274,10 @@ restart: /* Consume received message (optional) */ if (likely(!(flags & MSG_PEEK))) { - if (unlikely(++port->conn_unacked >= TIPC_CONNACK_INTV)) - tipc_acknowledge(port->ref, port->conn_unacked); + if (unlikely(++tsk->rcv_unacked >= TIPC_CONNACK_INTV)) { + tipc_acknowledge(port->ref, tsk->rcv_unacked); + tsk->rcv_unacked = 0; + } advance_rx_queue(sk); } diff --git a/net/tipc/socket.h b/net/tipc/socket.h index 69fd06bce68a..2cdede9eda1b 100644 --- a/net/tipc/socket.h +++ b/net/tipc/socket.h @@ -48,6 +48,9 @@ * @peer_name: the peer of the connection, if any * @conn_timeout: the time we can wait for an unresponded setup request * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue + * @link_cong: non-zero if owner must sleep because of link congestion + * @sent_unacked: # messages sent by socket, and not yet acked by peer + * @rcv_unacked: # messages read by user, but not yet acked back to peer */ struct tipc_sock { @@ -55,6 +58,9 @@ struct tipc_sock { struct tipc_port port; unsigned int conn_timeout; atomic_t dupl_rcvcnt; + int link_cong; + uint sent_unacked; + uint rcv_unacked; }; static inline struct tipc_sock *tipc_sk(const struct sock *sk) @@ -72,6 +78,11 @@ static inline void tipc_sock_wakeup(struct tipc_sock *tsk) tsk->sk.sk_write_space(&tsk->sk); } +static inline int tipc_sk_conn_cong(struct tipc_sock *tsk) +{ + return tsk->sent_unacked >= TIPC_FLOWCTRL_WIN; +} + int tipc_sk_rcv(struct sk_buff *buf); #endif -- cgit v1.2.3 From 7ae934bebe289362e5b1f5ea50e4c347863ae202 Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Tue, 1 Jul 2014 10:22:40 +0200 Subject: tipc: refactor message delivery out of tipc_rcv This is a cosmetic change, separating message delivery from the link state processing. Signed-off-by: Erik Hugne Reviewed-by: Ying Xue Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 124 ++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 80 insertions(+), 44 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/link.c b/net/tipc/link.c index a081e7d08d22..18b1524098c6 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -88,6 +88,8 @@ static void link_print(struct tipc_link *l_ptr, const char *str); static int tipc_link_frag_xmit(struct tipc_link *l_ptr, struct sk_buff *buf); static void tipc_link_sync_xmit(struct tipc_link *l); static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf); +static int tipc_link_input(struct tipc_link *l, struct sk_buff *buf); +static int tipc_link_prepare_input(struct tipc_link *l, struct sk_buff **buf); /* * Simple link routines @@ -1458,54 +1460,14 @@ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr) if (unlikely(l_ptr->oldest_deferred_in)) head = link_insert_deferred_queue(l_ptr, head); - /* Deliver packet/message to correct user: */ - if (unlikely(msg_user(msg) == CHANGEOVER_PROTOCOL)) { - if (!tipc_link_tunnel_rcv(n_ptr, &buf)) { - tipc_node_unlock(n_ptr); - continue; - } - msg = buf_msg(buf); - } else if (msg_user(msg) == MSG_FRAGMENTER) { - l_ptr->stats.recv_fragments++; - if (tipc_buf_append(&l_ptr->reasm_buf, &buf)) { - l_ptr->stats.recv_fragmented++; - msg = buf_msg(buf); - } else { - if (!l_ptr->reasm_buf) - tipc_link_reset(l_ptr); - tipc_node_unlock(n_ptr); - continue; - } - } - - switch (msg_user(msg)) { - case TIPC_LOW_IMPORTANCE: - case TIPC_MEDIUM_IMPORTANCE: - case TIPC_HIGH_IMPORTANCE: - case TIPC_CRITICAL_IMPORTANCE: - case CONN_MANAGER: - tipc_node_unlock(n_ptr); - tipc_sk_rcv(buf); - continue; - case MSG_BUNDLER: - l_ptr->stats.recv_bundles++; - l_ptr->stats.recv_bundled += msg_msgcnt(msg); + if (tipc_link_prepare_input(l_ptr, &buf)) { tipc_node_unlock(n_ptr); - tipc_link_bundle_rcv(buf); continue; - case NAME_DISTRIBUTOR: - n_ptr->bclink.recv_permitted = true; - tipc_node_unlock(n_ptr); - tipc_named_rcv(buf); - continue; - case BCAST_PROTOCOL: - tipc_link_sync_rcv(n_ptr, buf); - break; - default: - kfree_skb(buf); - break; } tipc_node_unlock(n_ptr); + msg = buf_msg(buf); + if (tipc_link_input(l_ptr, buf) != 0) + goto discard; continue; unlock_discard: tipc_node_unlock(n_ptr); @@ -1514,6 +1476,80 @@ discard: } } +/** + * tipc_link_prepare_input - process TIPC link messages + * + * returns nonzero if the message was consumed + * + * Node lock must be held + */ +static int tipc_link_prepare_input(struct tipc_link *l, struct sk_buff **buf) +{ + struct tipc_node *n; + struct tipc_msg *msg; + int res = -EINVAL; + + n = l->owner; + msg = buf_msg(*buf); + switch (msg_user(msg)) { + case CHANGEOVER_PROTOCOL: + if (tipc_link_tunnel_rcv(n, buf)) + res = 0; + break; + case MSG_FRAGMENTER: + l->stats.recv_fragments++; + if (tipc_buf_append(&l->reasm_buf, buf)) { + l->stats.recv_fragmented++; + res = 0; + } else if (!l->reasm_buf) { + tipc_link_reset(l); + } + break; + case MSG_BUNDLER: + l->stats.recv_bundles++; + l->stats.recv_bundled += msg_msgcnt(msg); + res = 0; + break; + case NAME_DISTRIBUTOR: + n->bclink.recv_permitted = true; + res = 0; + break; + case BCAST_PROTOCOL: + tipc_link_sync_rcv(n, *buf); + break; + default: + res = 0; + } + return res; +} +/** + * tipc_link_input - Deliver message too higher layers + */ +static int tipc_link_input(struct tipc_link *l, struct sk_buff *buf) +{ + struct tipc_msg *msg = buf_msg(buf); + int res = 0; + + switch (msg_user(msg)) { + case TIPC_LOW_IMPORTANCE: + case TIPC_MEDIUM_IMPORTANCE: + case TIPC_HIGH_IMPORTANCE: + case TIPC_CRITICAL_IMPORTANCE: + case CONN_MANAGER: + tipc_sk_rcv(buf); + break; + case NAME_DISTRIBUTOR: + tipc_named_rcv(buf); + break; + case MSG_BUNDLER: + tipc_link_bundle_rcv(buf); + break; + default: + res = -EINVAL; + } + return res; +} + /** * tipc_link_defer_pkt - Add out-of-sequence message to deferred reception queue * -- cgit v1.2.3 From 3f53bd8f8b563cc6024d25c1665d8358745f48b2 Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Tue, 1 Jul 2014 10:22:41 +0200 Subject: tipc: fix link acknowledge logic in receive path Link state acks triggered from the receive path is done before the last received packet have been processed by the link layer. The effect of this is that the last received packet will not be included in the ack. This causes problems if the link window is set to TIPC_MIN_LINK_WIN, where the ack interval will be equal to the link tolerance, and the link enters a stop-and-go behavior. We move the ack logic to after link state processing, just before the packet is delivered to higher layers. Signed-off-by: Erik Hugne Signed-off-by: Carl Sigurjonsson Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/link.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/link.c b/net/tipc/link.c index 18b1524098c6..a235b245f682 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1422,11 +1422,6 @@ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr) if (unlikely(!list_empty(&l_ptr->waiting_ports))) tipc_link_wakeup_ports(l_ptr, 0); - if (unlikely(++l_ptr->unacked_window >= TIPC_MIN_LINK_WIN)) { - l_ptr->stats.sent_acks++; - tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0, 0); - } - /* Process the incoming packet */ if (unlikely(!link_working_working(l_ptr))) { if (msg_user(msg) == LINK_PROTOCOL) { @@ -1460,6 +1455,11 @@ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr) if (unlikely(l_ptr->oldest_deferred_in)) head = link_insert_deferred_queue(l_ptr, head); + if (unlikely(++l_ptr->unacked_window >= TIPC_MIN_LINK_WIN)) { + l_ptr->stats.sent_acks++; + tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0, 0); + } + if (tipc_link_prepare_input(l_ptr, &buf)) { tipc_node_unlock(n_ptr); continue; -- cgit v1.2.3 From 70452dcb6d401349fbd6db55dfab112fb44639be Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Sun, 6 Jul 2014 20:38:50 -0400 Subject: tipc: fix a memleak when sending data This fixes a regression bug caused by: 067608e9d019d6477fd45dd948e81af0e5bf599f ("tipc: introduce direct iovec to buffer chain fragmentation function") If data is sent on a nonblocking socket and the destination link is congested, the buffer chain is leaked. We fix this by freeing the chain in this case. Signed-off-by: Erik Hugne Signed-off-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/socket.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net/tipc') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index ede78b144dcf..8c5600cfcc3e 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -784,8 +784,9 @@ new_mtu: break; rc = tipc_wait_for_sndmsg(sock, &timeo); + if (rc) + kfree_skb_list(buf); } while (!rc); - exit: if (iocb) release_sock(sk); @@ -898,6 +899,8 @@ next: break; } rc = tipc_wait_for_sndpkt(sock, &timeo); + if (rc) + kfree_skb_list(buf); } while (!rc); exit: if (iocb) -- cgit v1.2.3 From 7ceaa583becacb985ad5803bac0970b8f8cc0d74 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Mon, 14 Jul 2014 18:30:52 +0200 Subject: tipc: remove unnecessary break after return Signed-off-by: Fabian Frederick Signed-off-by: David S. Miller --- net/tipc/socket.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net/tipc') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 8c5600cfcc3e..de01622672b2 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2028,7 +2028,6 @@ int tipc_ioctl(struct socket *sk, unsigned int cmd, unsigned long arg) return 0; } return -EADDRNOTAVAIL; - break; default: return -ENOIOCTLCMD; } -- cgit v1.2.3 From dbdf6d24ad37d63938f29a2d134a1a9f6e9e673c Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 16 Jul 2014 20:40:58 -0400 Subject: tipc: make name table distributor use new send function In a previous commit series ("tipc: new unicast transmission code") we introduced a new message sending function, tipc_link_xmit2(), and moved the unicast data users over to use that function. We now let the internal name table distributor do the same. The interaction between the name distributor and the node/link layer also becomes significantly simpler, so we can eliminate the function tipc_link_names_xmit(). Signed-off-by: Jon Maloy Reviewed-by: Erik Hugne Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/link.c | 41 --------------------------- net/tipc/link.h | 1 - net/tipc/name_distr.c | 76 +++++++++++++++++++++++++++++---------------------- net/tipc/name_distr.h | 2 +- net/tipc/node.c | 13 ++------- 5 files changed, 48 insertions(+), 85 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/link.c b/net/tipc/link.c index a235b245f682..367b0f5886f8 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1032,47 +1032,6 @@ static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf) kfree_skb(buf); } -/* - * tipc_link_names_xmit - send name table entries to new neighbor - * - * Send routine for bulk delivery of name table messages when contact - * with a new neighbor occurs. No link congestion checking is performed - * because name table messages *must* be delivered. The messages must be - * small enough not to require fragmentation. - * Called without any locks held. - */ -void tipc_link_names_xmit(struct list_head *message_list, u32 dest) -{ - struct tipc_node *n_ptr; - struct tipc_link *l_ptr; - struct sk_buff *buf; - struct sk_buff *temp_buf; - - if (list_empty(message_list)) - return; - - n_ptr = tipc_node_find(dest); - if (n_ptr) { - tipc_node_lock(n_ptr); - l_ptr = n_ptr->active_links[0]; - if (l_ptr) { - /* convert circular list to linear list */ - ((struct sk_buff *)message_list->prev)->next = NULL; - link_add_chain_to_outqueue(l_ptr, - (struct sk_buff *)message_list->next, 0); - tipc_link_push_queue(l_ptr); - INIT_LIST_HEAD(message_list); - } - tipc_node_unlock(n_ptr); - } - - /* discard the messages if they couldn't be sent */ - list_for_each_safe(buf, temp_buf, ((struct sk_buff *)message_list)) { - list_del((struct list_head *)buf); - kfree_skb(buf); - } -} - /* * tipc_link_push_packet: Push one unsent packet to the media */ diff --git a/net/tipc/link.h b/net/tipc/link.h index 227ff8120897..04a59c58d385 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -228,7 +228,6 @@ void tipc_link_reset(struct tipc_link *l_ptr); void tipc_link_reset_list(unsigned int bearer_id); int tipc_link_xmit(struct sk_buff *buf, u32 dest, u32 selector); int tipc_link_xmit2(struct sk_buff *buf, u32 dest, u32 selector); -void tipc_link_names_xmit(struct list_head *message_list, u32 dest); int __tipc_link_xmit(struct tipc_link *l_ptr, struct sk_buff *buf); int __tipc_link_xmit2(struct tipc_link *link, struct sk_buff *buf); int tipc_link_send_buf(struct tipc_link *l_ptr, struct sk_buff *buf); diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 8ce730984aa1..d16f9475fa76 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -101,24 +101,22 @@ static struct sk_buff *named_prepare_buf(u32 type, u32 size, u32 dest) void named_cluster_distribute(struct sk_buff *buf) { - struct sk_buff *buf_copy; - struct tipc_node *n_ptr; - struct tipc_link *l_ptr; + struct sk_buff *obuf; + struct tipc_node *node; + u32 dnode; rcu_read_lock(); - list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) { - tipc_node_lock(n_ptr); - l_ptr = n_ptr->active_links[n_ptr->addr & 1]; - if (l_ptr) { - buf_copy = skb_copy(buf, GFP_ATOMIC); - if (!buf_copy) { - tipc_node_unlock(n_ptr); - break; - } - msg_set_destnode(buf_msg(buf_copy), n_ptr->addr); - __tipc_link_xmit(l_ptr, buf_copy); - } - tipc_node_unlock(n_ptr); + list_for_each_entry_rcu(node, &tipc_node_list, list) { + dnode = node->addr; + if (in_own_node(dnode)) + continue; + if (!tipc_node_active_links(node)) + continue; + obuf = skb_copy(buf, GFP_ATOMIC); + if (!obuf) + break; + msg_set_destnode(buf_msg(obuf), dnode); + tipc_link_xmit2(obuf, dnode, dnode); } rcu_read_unlock(); @@ -175,34 +173,44 @@ struct sk_buff *tipc_named_withdraw(struct publication *publ) return buf; } -/* +/** * named_distribute - prepare name info for bulk distribution to another node + * @msg_list: list of messages (buffers) to be returned from this function + * @dnode: node to be updated + * @pls: linked list of publication items to be packed into buffer chain */ -static void named_distribute(struct list_head *message_list, u32 node, - struct publ_list *pls, u32 max_item_buf) +static void named_distribute(struct list_head *msg_list, u32 dnode, + struct publ_list *pls) { struct publication *publ; struct sk_buff *buf = NULL; struct distr_item *item = NULL; - u32 left = 0; - u32 rest = pls->size * ITEM_SIZE; + uint dsz = pls->size * ITEM_SIZE; + uint msg_dsz = (tipc_node_get_mtu(dnode, 0) / ITEM_SIZE) * ITEM_SIZE; + uint rem = dsz; + uint msg_rem = 0; list_for_each_entry(publ, &pls->list, local_list) { + /* Prepare next buffer: */ if (!buf) { - left = (rest <= max_item_buf) ? rest : max_item_buf; - rest -= left; - buf = named_prepare_buf(PUBLICATION, left, node); + msg_rem = min_t(uint, rem, msg_dsz); + rem -= msg_rem; + buf = named_prepare_buf(PUBLICATION, msg_rem, dnode); if (!buf) { pr_warn("Bulk publication failure\n"); return; } item = (struct distr_item *)msg_data(buf_msg(buf)); } + + /* Pack publication into message: */ publ_to_item(item, publ); item++; - left -= ITEM_SIZE; - if (!left) { - list_add_tail((struct list_head *)buf, message_list); + msg_rem -= ITEM_SIZE; + + /* Append full buffer to list: */ + if (!msg_rem) { + list_add_tail((struct list_head *)buf, msg_list); buf = NULL; } } @@ -211,16 +219,20 @@ static void named_distribute(struct list_head *message_list, u32 node, /** * tipc_named_node_up - tell specified node about all publications by this node */ -void tipc_named_node_up(u32 max_item_buf, u32 node) +void tipc_named_node_up(u32 dnode) { - LIST_HEAD(message_list); + LIST_HEAD(msg_list); + struct sk_buff *buf_chain; read_lock_bh(&tipc_nametbl_lock); - named_distribute(&message_list, node, &publ_cluster, max_item_buf); - named_distribute(&message_list, node, &publ_zone, max_item_buf); + named_distribute(&msg_list, dnode, &publ_cluster); + named_distribute(&msg_list, dnode, &publ_zone); read_unlock_bh(&tipc_nametbl_lock); - tipc_link_names_xmit(&message_list, node); + /* Convert circular list to linear list and send: */ + buf_chain = (struct sk_buff *)msg_list.next; + ((struct sk_buff *)msg_list.prev)->next = NULL; + tipc_link_xmit2(buf_chain, dnode, dnode); } /** diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h index b2eed4ec1526..8afe32b7fc9a 100644 --- a/net/tipc/name_distr.h +++ b/net/tipc/name_distr.h @@ -70,7 +70,7 @@ struct distr_item { struct sk_buff *tipc_named_publish(struct publication *publ); struct sk_buff *tipc_named_withdraw(struct publication *publ); void named_cluster_distribute(struct sk_buff *buf); -void tipc_named_node_up(u32 max_item_buf, u32 node); +void tipc_named_node_up(u32 dnode); void tipc_named_rcv(struct sk_buff *buf); void tipc_named_reinit(void); diff --git a/net/tipc/node.c b/net/tipc/node.c index d959343043fd..f7069299943f 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -474,8 +474,6 @@ int tipc_node_get_linkname(u32 bearer_id, u32 addr, char *linkname, size_t len) void tipc_node_unlock(struct tipc_node *node) { LIST_HEAD(nsub_list); - struct tipc_link *link; - int pkt_sz = 0; u32 addr = 0; if (likely(!node->action_flags)) { @@ -488,18 +486,13 @@ void tipc_node_unlock(struct tipc_node *node) node->action_flags &= ~TIPC_NOTIFY_NODE_DOWN; } if (node->action_flags & TIPC_NOTIFY_NODE_UP) { - link = node->active_links[0]; node->action_flags &= ~TIPC_NOTIFY_NODE_UP; - if (link) { - pkt_sz = ((link->max_pkt - INT_H_SIZE) / ITEM_SIZE) * - ITEM_SIZE; - addr = node->addr; - } + addr = node->addr; } spin_unlock_bh(&node->lock); if (!list_empty(&nsub_list)) tipc_nodesub_notify(&nsub_list); - if (pkt_sz) - tipc_named_node_up(pkt_sz, addr); + if (addr) + tipc_named_node_up(addr); } -- cgit v1.2.3 From 25b660c7e202d533e4985380b24729fd12de2b5e Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 16 Jul 2014 20:40:59 -0400 Subject: tipc: let internal link users call the new link send function We convert the link internal users (changeover protocol, broadcast synchronization) to use the new packet send function. Signed-off-by: Jon Maloy Reviewed-by: Erik Hugne Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/link.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/link.c b/net/tipc/link.c index 367b0f5886f8..d1255ba51216 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -999,7 +999,7 @@ int tipc_link_xmit2(struct sk_buff *buf, u32 dnode, u32 selector) * * Called with node locked */ -static void tipc_link_sync_xmit(struct tipc_link *l) +static void tipc_link_sync_xmit(struct tipc_link *link) { struct sk_buff *buf; struct tipc_msg *msg; @@ -1009,10 +1009,9 @@ static void tipc_link_sync_xmit(struct tipc_link *l) return; msg = buf_msg(buf); - tipc_msg_init(msg, BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE, l->addr); - msg_set_last_bcast(msg, l->owner->bclink.acked); - link_add_chain_to_outqueue(l, buf, 0); - tipc_link_push_queue(l); + tipc_msg_init(msg, BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE, link->addr); + msg_set_last_bcast(msg, link->owner->bclink.acked); + __tipc_link_xmit2(link, buf); } /* @@ -1859,7 +1858,7 @@ static void tipc_link_tunnel_xmit(struct tipc_link *l_ptr, } skb_copy_to_linear_data(buf, tunnel_hdr, INT_H_SIZE); skb_copy_to_linear_data_offset(buf, INT_H_SIZE, msg, length); - __tipc_link_xmit(tunnel, buf); + __tipc_link_xmit2(tunnel, buf); } @@ -1892,7 +1891,7 @@ void tipc_link_failover_send_queue(struct tipc_link *l_ptr) if (buf) { skb_copy_to_linear_data(buf, &tunnel_hdr, INT_H_SIZE); msg_set_size(&tunnel_hdr, INT_H_SIZE); - __tipc_link_xmit(tunnel, buf); + __tipc_link_xmit2(tunnel, buf); } else { pr_warn("%sunable to send changeover msg\n", link_co_err); @@ -1965,7 +1964,7 @@ void tipc_link_dup_queue_xmit(struct tipc_link *l_ptr, skb_copy_to_linear_data(outbuf, &tunnel_hdr, INT_H_SIZE); skb_copy_to_linear_data_offset(outbuf, INT_H_SIZE, iter->data, length); - __tipc_link_xmit(tunnel, outbuf); + __tipc_link_xmit2(tunnel, outbuf); if (!tipc_link_is_up(l_ptr)) return; iter = iter->next; -- cgit v1.2.3 From 078bec826f7b73cf2a2397680537bcb7e075b492 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 16 Jul 2014 20:41:00 -0400 Subject: tipc: add new functions for multicast and broadcast distribution We add a new broadcast link transmit function in bclink.c and a new receive function in socket.c. The purpose is to move the branching between external and internal destination down to the link layer, just as we have done with unicast in earlier commits. We also make use of the new link-independent fragmentation support that was introduced in an earlier commit series. This gives a shorter and simpler code path, and makes it possible to obtain copy-free buffer delivery to all node local destination sockets. The new transmission code is added in parallel with the existing one, and will be used by the socket multicast send function in the next commit in this series. Signed-off-by: Jon Maloy Reviewed-by: Erik Hugne Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/bcast.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- net/tipc/bcast.h | 4 +++- net/tipc/msg.c | 35 +++++++++++++++++++++++++++++++++++ net/tipc/msg.h | 2 ++ net/tipc/socket.c | 40 ++++++++++++++++++++++++++++++++++++++++ net/tipc/socket.h | 2 ++ 6 files changed, 136 insertions(+), 2 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 55c6c9d3e1ce..ac947251dd37 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -1,7 +1,7 @@ /* * net/tipc/bcast.c: TIPC broadcast code * - * Copyright (c) 2004-2006, Ericsson AB + * Copyright (c) 2004-2006, 2014, Ericsson AB * Copyright (c) 2004, Intel Corporation. * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. @@ -38,6 +38,8 @@ #include "core.h" #include "link.h" #include "port.h" +#include "socket.h" +#include "msg.h" #include "bcast.h" #include "name_distr.h" @@ -138,6 +140,11 @@ static void tipc_bclink_unlock(void) tipc_link_reset_all(node); } +uint tipc_bclink_get_mtu(void) +{ + return MAX_PKT_DEFAULT_MCAST; +} + void tipc_bclink_set_flags(unsigned int flags) { bclink->flags |= flags; @@ -408,6 +415,52 @@ exit: return res; } +/* tipc_bclink_xmit2 - broadcast buffer chain to all nodes in cluster + * and to identified node local sockets + * @buf: chain of buffers containing message + * Consumes the buffer chain, except when returning -ELINKCONG + * Returns 0 if success, otherwise errno: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE + */ +int tipc_bclink_xmit2(struct sk_buff *buf) +{ + int rc = 0; + int bc = 0; + struct sk_buff *clbuf; + + /* Prepare clone of message for local node */ + clbuf = tipc_msg_reassemble(buf); + if (unlikely(!clbuf)) { + kfree_skb_list(buf); + return -EHOSTUNREACH; + } + + /* Broadcast to all other nodes */ + if (likely(bclink)) { + tipc_bclink_lock(); + if (likely(bclink->bcast_nodes.count)) { + rc = __tipc_link_xmit(bcl, buf); + if (likely(!rc)) { + bclink_set_last_sent(); + bcl->stats.queue_sz_counts++; + bcl->stats.accu_queue_sz += bcl->out_queue_size; + } + bc = 1; + } + tipc_bclink_unlock(); + } + + if (unlikely(!bc)) + kfree_skb_list(buf); + + /* Deliver message clone */ + if (likely(!rc)) + tipc_sk_mcast_rcv(clbuf); + else + kfree_skb(clbuf); + + return rc; +} + /** * bclink_accept_pkt - accept an incoming, in-sequence broadcast packet * diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index 00330c45df3e..d90645f408aa 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -1,7 +1,7 @@ /* * net/tipc/bcast.h: Include file for TIPC broadcast code * - * Copyright (c) 2003-2006, Ericsson AB + * Copyright (c) 2003-2006, 2014, Ericsson AB * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * @@ -98,5 +98,7 @@ int tipc_bclink_stats(char *stats_buf, const u32 buf_size); int tipc_bclink_reset_stats(void); int tipc_bclink_set_queue_limits(u32 limit); void tipc_bcbearer_sort(struct tipc_node_map *nm_ptr, u32 node, bool action); +uint tipc_bclink_get_mtu(void); +int tipc_bclink_xmit2(struct sk_buff *buf); #endif diff --git a/net/tipc/msg.c b/net/tipc/msg.c index ce6d929d66d2..9682296f5e7c 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -417,3 +417,38 @@ int tipc_msg_eval(struct sk_buff *buf, u32 *dnode) msg_set_destport(msg, dport); return TIPC_OK; } + +/* tipc_msg_reassemble() - clone a buffer chain of fragments and + * reassemble the clones into one message + */ +struct sk_buff *tipc_msg_reassemble(struct sk_buff *chain) +{ + struct sk_buff *buf = chain; + struct sk_buff *frag = buf; + struct sk_buff *head = NULL; + int hdr_sz; + + /* Copy header if single buffer */ + if (!buf->next) { + hdr_sz = skb_headroom(buf) + msg_hdr_sz(buf_msg(buf)); + return __pskb_copy(buf, hdr_sz, GFP_ATOMIC); + } + + /* Clone all fragments and reassemble */ + while (buf) { + frag = skb_clone(buf, GFP_ATOMIC); + if (!frag) + goto error; + frag->next = NULL; + if (tipc_buf_append(&head, &frag)) + break; + if (!head) + goto error; + buf = buf->next; + } + return frag; +error: + pr_warn("Failed do clone local mcast rcv buffer\n"); + kfree_skb(head); + return NULL; +} diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 7d574346e75e..a15d59601bf9 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -744,4 +744,6 @@ bool tipc_msg_make_bundle(struct sk_buff **buf, u32 mtu, u32 dnode); int tipc_msg_build2(struct tipc_msg *mhdr, struct iovec const *iov, int offset, int dsz, int mtu , struct sk_buff **chain); +struct sk_buff *tipc_msg_reassemble(struct sk_buff *chain); + #endif diff --git a/net/tipc/socket.c b/net/tipc/socket.c index de01622672b2..8d30995682b1 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -534,6 +534,46 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, return mask; } +/* tipc_sk_mcast_rcv - Deliver multicast message to all destination sockets + */ +void tipc_sk_mcast_rcv(struct sk_buff *buf) +{ + struct tipc_msg *msg = buf_msg(buf); + struct tipc_port_list dports = {0, NULL, }; + struct tipc_port_list *item; + struct sk_buff *b; + uint i, last, dst = 0; + u32 scope = TIPC_CLUSTER_SCOPE; + + if (in_own_node(msg_orignode(msg))) + scope = TIPC_NODE_SCOPE; + + /* Create destination port list: */ + tipc_nametbl_mc_translate(msg_nametype(msg), + msg_namelower(msg), + msg_nameupper(msg), + scope, + &dports); + last = dports.count; + if (!last) { + kfree_skb(buf); + return; + } + + for (item = &dports; item; item = item->next) { + for (i = 0; i < PLSIZE && ++dst <= last; i++) { + b = (dst != last) ? skb_clone(buf, GFP_ATOMIC) : buf; + if (!b) { + pr_warn("Failed do clone mcast rcv buffer\n"); + continue; + } + msg_set_destport(msg, item->ports[i]); + tipc_sk_rcv(b); + } + } + tipc_port_list_free(&dports); +} + /** * tipc_sk_proto_rcv - receive a connection mng protocol message * @tsk: receiving socket diff --git a/net/tipc/socket.h b/net/tipc/socket.h index 2cdede9eda1b..43b75b3ceced 100644 --- a/net/tipc/socket.h +++ b/net/tipc/socket.h @@ -85,4 +85,6 @@ static inline int tipc_sk_conn_cong(struct tipc_sock *tsk) int tipc_sk_rcv(struct sk_buff *buf); +void tipc_sk_mcast_rcv(struct sk_buff *buf); + #endif -- cgit v1.2.3 From 0abd8ff21f19adddc465538354e9baaca63df073 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 16 Jul 2014 20:41:01 -0400 Subject: tipc: start using the new multicast functions In this commit, we convert the socket multicast send function to directly call the new multicast/broadcast function (tipc_bclink_xmit2()) introduced in the previous commit. We do this instead of letting the call go via the now obsolete tipc_port_mcast_xmit(), hence saving a call level and some code complexity. We also remove the initial destination lookup at the message sending side, and replace that with an unconditional lookup at the receiving side, including on the sending node itself. This makes the destination lookup and message transfer more uniform than before. Signed-off-by: Jon Maloy Reviewed-by: Erik Hugne Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/bcast.c | 7 ++--- net/tipc/socket.c | 90 ++++++++++++++++++++++++++++++++----------------------- 2 files changed, 56 insertions(+), 41 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index ac947251dd37..4a1c9afc9d74 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -496,7 +496,7 @@ void tipc_bclink_rcv(struct sk_buff *buf) struct tipc_node *node; u32 next_in; u32 seqno; - int deferred; + int deferred = 0; /* Screen out unwanted broadcast messages */ @@ -547,7 +547,7 @@ receive: tipc_bclink_unlock(); tipc_node_unlock(node); if (likely(msg_mcast(msg))) - tipc_port_mcast_rcv(buf, NULL); + tipc_sk_mcast_rcv(buf); else kfree_skb(buf); } else if (msg_user(msg) == MSG_BUNDLER) { @@ -626,8 +626,7 @@ receive: node->bclink.deferred_size += deferred; bclink_update_last_sent(node, seqno); buf = NULL; - } else - deferred = 0; + } tipc_bclink_lock(); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 8d30995682b1..b28eea50c7fc 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -53,6 +53,7 @@ static void tipc_data_ready(struct sock *sk); static void tipc_write_space(struct sock *sk); static int tipc_release(struct socket *sock); static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags); +static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p); static const struct proto_ops packet_ops; static const struct proto_ops stream_ops; @@ -534,6 +535,58 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, return mask; } +/** + * tipc_sendmcast - send multicast message + * @sock: socket structure + * @seq: destination address + * @iov: message data to send + * @dsz: total length of message data + * @timeo: timeout to wait for wakeup + * + * Called from function tipc_sendmsg(), which has done all sanity checks + * Returns the number of bytes sent on success, or errno + */ +static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, + struct iovec *iov, size_t dsz, long timeo) +{ + struct sock *sk = sock->sk; + struct tipc_msg *mhdr = &tipc_sk(sk)->port.phdr; + struct sk_buff *buf; + uint mtu; + int rc; + + msg_set_type(mhdr, TIPC_MCAST_MSG); + msg_set_lookup_scope(mhdr, TIPC_CLUSTER_SCOPE); + msg_set_destport(mhdr, 0); + msg_set_destnode(mhdr, 0); + msg_set_nametype(mhdr, seq->type); + msg_set_namelower(mhdr, seq->lower); + msg_set_nameupper(mhdr, seq->upper); + msg_set_hdr_sz(mhdr, MCAST_H_SIZE); + +new_mtu: + mtu = tipc_bclink_get_mtu(); + rc = tipc_msg_build2(mhdr, iov, 0, dsz, mtu, &buf); + if (unlikely(rc < 0)) + return rc; + + do { + rc = tipc_bclink_xmit(buf); + if (likely(rc >= 0)) { + rc = dsz; + break; + } + if (rc == -EMSGSIZE) + goto new_mtu; + if (rc != -ELINKCONG) + break; + rc = tipc_wait_for_sndmsg(sock, &timeo); + if (rc) + kfree_skb_list(buf); + } while (!rc); + return rc; +} + /* tipc_sk_mcast_rcv - Deliver multicast message to all destination sockets */ void tipc_sk_mcast_rcv(struct sk_buff *buf) @@ -669,43 +722,6 @@ static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p) return 0; } -/** - * tipc_sendmcast - send multicast message - * @sock: socket structure - * @seq: destination address - * @iov: message data to send - * @dsz: total length of message data - * @timeo: timeout to wait for wakeup - * - * Called from function tipc_sendmsg(), which has done all sanity checks - * Returns the number of bytes sent on success, or errno - */ -static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, - struct iovec *iov, size_t dsz, long timeo) -{ - struct sock *sk = sock->sk; - struct tipc_sock *tsk = tipc_sk(sk); - int rc; - - do { - if (sock->state != SS_READY) { - rc = -EOPNOTSUPP; - break; - } - rc = tipc_port_mcast_xmit(&tsk->port, seq, iov, dsz); - if (likely(rc >= 0)) { - if (sock->state != SS_READY) - sock->state = SS_CONNECTING; - break; - } - if (rc != -ELINKCONG) - break; - rc = tipc_wait_for_sndmsg(sock, &timeo); - } while (!rc); - - return rc; -} - /** * tipc_sendmsg - send message in connectionless manner * @iocb: if NULL, indicates that socket lock is already held -- cgit v1.2.3 From c4116e10579c5bbbfc3cd2ad0324ee0d8691e531 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 16 Jul 2014 20:41:02 -0400 Subject: tipc: remove unreferenced functions We can now remove a number of functions which have become obsolete and unreferenced through this commit series. There are no functional changes in this commit. Signed-off-by: Jon Maloy Reviewed-by: Erik Hugne Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/bcast.c | 26 ------ net/tipc/bcast.h | 1 - net/tipc/link.c | 247 ------------------------------------------------------- net/tipc/link.h | 6 -- net/tipc/msg.c | 35 -------- net/tipc/msg.h | 3 - net/tipc/port.c | 112 ------------------------- net/tipc/port.h | 10 --- 8 files changed, 440 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 4a1c9afc9d74..2f3256da6f88 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -389,32 +389,6 @@ static void bclink_peek_nack(struct tipc_msg *msg) tipc_node_unlock(n_ptr); } -/* - * tipc_bclink_xmit - broadcast a packet to all nodes in cluster - */ -int tipc_bclink_xmit(struct sk_buff *buf) -{ - int res; - - tipc_bclink_lock(); - - if (!bclink->bcast_nodes.count) { - res = msg_data_sz(buf_msg(buf)); - kfree_skb(buf); - goto exit; - } - - res = __tipc_link_xmit(bcl, buf); - if (likely(res >= 0)) { - bclink_set_last_sent(); - bcl->stats.queue_sz_counts++; - bcl->stats.accu_queue_sz += bcl->out_queue_size; - } -exit: - tipc_bclink_unlock(); - return res; -} - /* tipc_bclink_xmit2 - broadcast buffer chain to all nodes in cluster * and to identified node local sockets * @buf: chain of buffers containing message diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index d90645f408aa..af6b6579f5c7 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -89,7 +89,6 @@ void tipc_bclink_add_node(u32 addr); void tipc_bclink_remove_node(u32 addr); struct tipc_node *tipc_bclink_retransmit_to(void); void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked); -int tipc_bclink_xmit(struct sk_buff *buf); void tipc_bclink_rcv(struct sk_buff *buf); u32 tipc_bclink_get_last_sent(void); u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr); diff --git a/net/tipc/link.c b/net/tipc/link.c index d1255ba51216..28730ddf4b78 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -85,7 +85,6 @@ static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tolerance); static void link_state_event(struct tipc_link *l_ptr, u32 event); static void link_reset_statistics(struct tipc_link *l_ptr); static void link_print(struct tipc_link *l_ptr, const char *str); -static int tipc_link_frag_xmit(struct tipc_link *l_ptr, struct sk_buff *buf); static void tipc_link_sync_xmit(struct tipc_link *l); static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf); static int tipc_link_input(struct tipc_link *l, struct sk_buff *buf); @@ -679,180 +678,6 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) } } -/* - * link_bundle_buf(): Append contents of a buffer to - * the tail of an existing one. - */ -static int link_bundle_buf(struct tipc_link *l_ptr, struct sk_buff *bundler, - struct sk_buff *buf) -{ - struct tipc_msg *bundler_msg = buf_msg(bundler); - struct tipc_msg *msg = buf_msg(buf); - u32 size = msg_size(msg); - u32 bundle_size = msg_size(bundler_msg); - u32 to_pos = align(bundle_size); - u32 pad = to_pos - bundle_size; - - if (msg_user(bundler_msg) != MSG_BUNDLER) - return 0; - if (msg_type(bundler_msg) != OPEN_MSG) - return 0; - if (skb_tailroom(bundler) < (pad + size)) - return 0; - if (l_ptr->max_pkt < (to_pos + size)) - return 0; - - skb_put(bundler, pad + size); - skb_copy_to_linear_data_offset(bundler, to_pos, buf->data, size); - msg_set_size(bundler_msg, to_pos + size); - msg_set_msgcnt(bundler_msg, msg_msgcnt(bundler_msg) + 1); - kfree_skb(buf); - l_ptr->stats.sent_bundled++; - return 1; -} - -static void link_add_to_outqueue(struct tipc_link *l_ptr, - struct sk_buff *buf, - struct tipc_msg *msg) -{ - u32 ack = mod(l_ptr->next_in_no - 1); - u32 seqno = mod(l_ptr->next_out_no++); - - msg_set_word(msg, 2, ((ack << 16) | seqno)); - msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); - buf->next = NULL; - if (l_ptr->first_out) { - l_ptr->last_out->next = buf; - l_ptr->last_out = buf; - } else - l_ptr->first_out = l_ptr->last_out = buf; - - l_ptr->out_queue_size++; - if (l_ptr->out_queue_size > l_ptr->stats.max_queue_sz) - l_ptr->stats.max_queue_sz = l_ptr->out_queue_size; -} - -static void link_add_chain_to_outqueue(struct tipc_link *l_ptr, - struct sk_buff *buf_chain, - u32 long_msgno) -{ - struct sk_buff *buf; - struct tipc_msg *msg; - - if (!l_ptr->next_out) - l_ptr->next_out = buf_chain; - while (buf_chain) { - buf = buf_chain; - buf_chain = buf_chain->next; - - msg = buf_msg(buf); - msg_set_long_msgno(msg, long_msgno); - link_add_to_outqueue(l_ptr, buf, msg); - } -} - -/* - * tipc_link_xmit() is the 'full path' for messages, called from - * inside TIPC when the 'fast path' in tipc_send_xmit - * has failed, and from link_send() - */ -int __tipc_link_xmit(struct tipc_link *l_ptr, struct sk_buff *buf) -{ - struct tipc_msg *msg = buf_msg(buf); - u32 size = msg_size(msg); - u32 dsz = msg_data_sz(msg); - u32 queue_size = l_ptr->out_queue_size; - u32 imp = tipc_msg_tot_importance(msg); - u32 queue_limit = l_ptr->queue_limit[imp]; - u32 max_packet = l_ptr->max_pkt; - - /* Match msg importance against queue limits: */ - if (unlikely(queue_size >= queue_limit)) { - if (imp <= TIPC_CRITICAL_IMPORTANCE) { - link_schedule_port(l_ptr, msg_origport(msg), size); - kfree_skb(buf); - return -ELINKCONG; - } - kfree_skb(buf); - if (imp > CONN_MANAGER) { - pr_warn("%s<%s>, send queue full", link_rst_msg, - l_ptr->name); - tipc_link_reset(l_ptr); - } - return dsz; - } - - /* Fragmentation needed ? */ - if (size > max_packet) - return tipc_link_frag_xmit(l_ptr, buf); - - /* Packet can be queued or sent. */ - if (likely(!link_congested(l_ptr))) { - link_add_to_outqueue(l_ptr, buf, msg); - - tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr); - l_ptr->unacked_window = 0; - return dsz; - } - /* Congestion: can message be bundled ? */ - if ((msg_user(msg) != CHANGEOVER_PROTOCOL) && - (msg_user(msg) != MSG_FRAGMENTER)) { - - /* Try adding message to an existing bundle */ - if (l_ptr->next_out && - link_bundle_buf(l_ptr, l_ptr->last_out, buf)) - return dsz; - - /* Try creating a new bundle */ - if (size <= max_packet * 2 / 3) { - struct sk_buff *bundler = tipc_buf_acquire(max_packet); - struct tipc_msg bundler_hdr; - - if (bundler) { - tipc_msg_init(&bundler_hdr, MSG_BUNDLER, OPEN_MSG, - INT_H_SIZE, l_ptr->addr); - skb_copy_to_linear_data(bundler, &bundler_hdr, - INT_H_SIZE); - skb_trim(bundler, INT_H_SIZE); - link_bundle_buf(l_ptr, bundler, buf); - buf = bundler; - msg = buf_msg(buf); - l_ptr->stats.sent_bundles++; - } - } - } - if (!l_ptr->next_out) - l_ptr->next_out = buf; - link_add_to_outqueue(l_ptr, buf, msg); - return dsz; -} - -/* - * tipc_link_xmit(): same as __tipc_link_xmit(), but the link to use - * has not been selected yet, and the the owner node is not locked - * Called by TIPC internal users, e.g. the name distributor - */ -int tipc_link_xmit(struct sk_buff *buf, u32 dest, u32 selector) -{ - struct tipc_link *l_ptr; - struct tipc_node *n_ptr; - int res = -ELINKCONG; - - n_ptr = tipc_node_find(dest); - if (n_ptr) { - tipc_node_lock(n_ptr); - l_ptr = n_ptr->active_links[selector & 1]; - if (l_ptr) - res = __tipc_link_xmit(l_ptr, buf); - else - kfree_skb(buf); - tipc_node_unlock(n_ptr); - } else { - kfree_skb(buf); - } - return res; -} - /* tipc_link_cong: determine return value and how to treat the * sent buffer during link congestion. * - For plain, errorless user data messages we keep the buffer and @@ -2123,78 +1948,6 @@ void tipc_link_bundle_rcv(struct sk_buff *buf) kfree_skb(buf); } -/* - * Fragmentation/defragmentation: - */ - -/* - * tipc_link_frag_xmit: Entry for buffers needing fragmentation. - * The buffer is complete, inclusive total message length. - * Returns user data length. - */ -static int tipc_link_frag_xmit(struct tipc_link *l_ptr, struct sk_buff *buf) -{ - struct sk_buff *buf_chain = NULL; - struct sk_buff *buf_chain_tail = (struct sk_buff *)&buf_chain; - struct tipc_msg *inmsg = buf_msg(buf); - struct tipc_msg fragm_hdr; - u32 insize = msg_size(inmsg); - u32 dsz = msg_data_sz(inmsg); - unchar *crs = buf->data; - u32 rest = insize; - u32 pack_sz = l_ptr->max_pkt; - u32 fragm_sz = pack_sz - INT_H_SIZE; - u32 fragm_no = 0; - u32 destaddr; - - if (msg_short(inmsg)) - destaddr = l_ptr->addr; - else - destaddr = msg_destnode(inmsg); - - /* Prepare reusable fragment header: */ - tipc_msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT, - INT_H_SIZE, destaddr); - - /* Chop up message: */ - while (rest > 0) { - struct sk_buff *fragm; - - if (rest <= fragm_sz) { - fragm_sz = rest; - msg_set_type(&fragm_hdr, LAST_FRAGMENT); - } - fragm = tipc_buf_acquire(fragm_sz + INT_H_SIZE); - if (fragm == NULL) { - kfree_skb(buf); - kfree_skb_list(buf_chain); - return -ENOMEM; - } - msg_set_size(&fragm_hdr, fragm_sz + INT_H_SIZE); - fragm_no++; - msg_set_fragm_no(&fragm_hdr, fragm_no); - skb_copy_to_linear_data(fragm, &fragm_hdr, INT_H_SIZE); - skb_copy_to_linear_data_offset(fragm, INT_H_SIZE, crs, - fragm_sz); - buf_chain_tail->next = fragm; - buf_chain_tail = fragm; - - rest -= fragm_sz; - crs += fragm_sz; - msg_set_type(&fragm_hdr, FRAGMENT); - } - kfree_skb(buf); - - /* Append chain of fragments to send queue & send them */ - l_ptr->long_msg_seq_no++; - link_add_chain_to_outqueue(l_ptr, buf_chain, l_ptr->long_msg_seq_no); - l_ptr->stats.sent_fragments += fragm_no; - l_ptr->stats.sent_fragmented++; - tipc_link_push_queue(l_ptr); - - return dsz; -} - static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tolerance) { if ((tolerance < TIPC_MIN_LINK_TOL) || (tolerance > TIPC_MAX_LINK_TOL)) diff --git a/net/tipc/link.h b/net/tipc/link.h index 04a59c58d385..f0ebeb677a8b 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -226,15 +226,9 @@ struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, void tipc_link_reset_all(struct tipc_node *node); void tipc_link_reset(struct tipc_link *l_ptr); void tipc_link_reset_list(unsigned int bearer_id); -int tipc_link_xmit(struct sk_buff *buf, u32 dest, u32 selector); int tipc_link_xmit2(struct sk_buff *buf, u32 dest, u32 selector); -int __tipc_link_xmit(struct tipc_link *l_ptr, struct sk_buff *buf); int __tipc_link_xmit2(struct tipc_link *link, struct sk_buff *buf); -int tipc_link_send_buf(struct tipc_link *l_ptr, struct sk_buff *buf); u32 tipc_link_get_max_pkt(u32 dest, u32 selector); -int tipc_link_iovec_xmit_fast(struct tipc_port *sender, - struct iovec const *msg_sect, - unsigned int len, u32 destnode); void tipc_link_bundle_rcv(struct sk_buff *buf); void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int prob, u32 gap, u32 tolerance, u32 priority, u32 acked_mtu); diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 9682296f5e7c..e3102ea494d4 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -60,41 +60,6 @@ void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize, msg_set_destnode(m, destnode); } -/** - * tipc_msg_build - create message using specified header and data - * - * Note: Caller must not hold any locks in case copy_from_user() is interrupted! - * - * Returns message data size or errno - */ -int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect, - unsigned int len, int max_size, struct sk_buff **buf) -{ - int dsz, sz, hsz; - unsigned char *to; - - dsz = len; - hsz = msg_hdr_sz(hdr); - sz = hsz + dsz; - msg_set_size(hdr, sz); - if (unlikely(sz > max_size)) { - *buf = NULL; - return dsz; - } - - *buf = tipc_buf_acquire(sz); - if (!(*buf)) - return -ENOMEM; - skb_copy_to_linear_data(*buf, hdr, hsz); - to = (*buf)->data + hsz; - if (len && memcpy_fromiovecend(to, msg_sect, 0, dsz)) { - kfree_skb(*buf); - *buf = NULL; - return -EFAULT; - } - return dsz; -} - /* tipc_buf_append(): Append a buffer to the fragment list of another buffer * @*headbuf: in: NULL for first frag, otherwise value returned from prev call * out: set when successful non-complete reassembly, otherwise NULL diff --git a/net/tipc/msg.h b/net/tipc/msg.h index a15d59601bf9..868fe22e3452 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -732,9 +732,6 @@ int tipc_msg_eval(struct sk_buff *buf, u32 *dnode); void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize, u32 destnode); -int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect, - unsigned int len, int max_size, struct sk_buff **buf); - int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf); bool tipc_msg_bundle(struct sk_buff *bbuf, struct sk_buff *buf, u32 mtu); diff --git a/net/tipc/port.c b/net/tipc/port.c index 0d09dcb6da18..835dca1fd7ad 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -74,118 +74,6 @@ int tipc_port_peer_msg(struct tipc_port *p_ptr, struct tipc_msg *msg) (!peernode && (orignode == tipc_own_addr)); } -/** - * tipc_port_mcast_xmit - send a multicast message to local and remote - * destinations - */ -int tipc_port_mcast_xmit(struct tipc_port *oport, - struct tipc_name_seq const *seq, - struct iovec const *msg_sect, - unsigned int len) -{ - struct tipc_msg *hdr; - struct sk_buff *buf; - struct sk_buff *ibuf = NULL; - struct tipc_port_list dports = {0, NULL, }; - int ext_targets; - int res; - - /* Create multicast message */ - hdr = &oport->phdr; - msg_set_type(hdr, TIPC_MCAST_MSG); - msg_set_lookup_scope(hdr, TIPC_CLUSTER_SCOPE); - msg_set_destport(hdr, 0); - msg_set_destnode(hdr, 0); - msg_set_nametype(hdr, seq->type); - msg_set_namelower(hdr, seq->lower); - msg_set_nameupper(hdr, seq->upper); - msg_set_hdr_sz(hdr, MCAST_H_SIZE); - res = tipc_msg_build(hdr, msg_sect, len, MAX_MSG_SIZE, &buf); - if (unlikely(!buf)) - return res; - - /* Figure out where to send multicast message */ - ext_targets = tipc_nametbl_mc_translate(seq->type, seq->lower, seq->upper, - TIPC_NODE_SCOPE, &dports); - - /* Send message to destinations (duplicate it only if necessary) */ - if (ext_targets) { - if (dports.count != 0) { - ibuf = skb_copy(buf, GFP_ATOMIC); - if (ibuf == NULL) { - tipc_port_list_free(&dports); - kfree_skb(buf); - return -ENOMEM; - } - } - res = tipc_bclink_xmit(buf); - if ((res < 0) && (dports.count != 0)) - kfree_skb(ibuf); - } else { - ibuf = buf; - } - - if (res >= 0) { - if (ibuf) - tipc_port_mcast_rcv(ibuf, &dports); - } else { - tipc_port_list_free(&dports); - } - return res; -} - -/** - * tipc_port_mcast_rcv - deliver multicast message to all destination ports - * - * If there is no port list, perform a lookup to create one - */ -void tipc_port_mcast_rcv(struct sk_buff *buf, struct tipc_port_list *dp) -{ - struct tipc_msg *msg; - struct tipc_port_list dports = {0, NULL, }; - struct tipc_port_list *item = dp; - int cnt = 0; - - msg = buf_msg(buf); - - /* Create destination port list, if one wasn't supplied */ - if (dp == NULL) { - tipc_nametbl_mc_translate(msg_nametype(msg), - msg_namelower(msg), - msg_nameupper(msg), - TIPC_CLUSTER_SCOPE, - &dports); - item = dp = &dports; - } - - /* Deliver a copy of message to each destination port */ - if (dp->count != 0) { - msg_set_destnode(msg, tipc_own_addr); - if (dp->count == 1) { - msg_set_destport(msg, dp->ports[0]); - tipc_sk_rcv(buf); - tipc_port_list_free(dp); - return; - } - for (; cnt < dp->count; cnt++) { - int index = cnt % PLSIZE; - struct sk_buff *b = skb_clone(buf, GFP_ATOMIC); - - if (b == NULL) { - pr_warn("Unable to deliver multicast message(s)\n"); - goto exit; - } - if ((index == 0) && (cnt != 0)) - item = item->next; - msg_set_destport(buf_msg(b), item->ports[index]); - tipc_sk_rcv(b); - } - } -exit: - kfree_skb(buf); - tipc_port_list_free(dp); -} - /* tipc_port_init - intiate TIPC port and lock it * * Returns obtained reference if initialization is successful, zero otherwise diff --git a/net/tipc/port.h b/net/tipc/port.h index 0e47052daa29..3f93454592b6 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -120,17 +120,7 @@ int __tipc_port_connect(u32 ref, struct tipc_port *p_ptr, struct tipc_portid const *peer); int tipc_port_peer_msg(struct tipc_port *p_ptr, struct tipc_msg *msg); -/* - * TIPC messaging routines - */ - -int tipc_port_mcast_xmit(struct tipc_port *port, - struct tipc_name_seq const *seq, - struct iovec const *msg, - unsigned int len); - struct sk_buff *tipc_port_get_ports(void); -void tipc_port_mcast_rcv(struct sk_buff *buf, struct tipc_port_list *dp); void tipc_port_reinit(void); /** -- cgit v1.2.3 From 9fbfb8b120bd4fe89cd70d6c8841e6e1cfab2609 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 16 Jul 2014 20:41:03 -0400 Subject: tipc: rename temporarily named functions After the previous commit, we can now give the functions with temporary names, such as tipc_link_xmit2(), tipc_msg_build2() etc., their proper names. There are no functional changes in this commit. Signed-off-by: Jon Maloy Reviewed-by: Erik Hugne Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/bcast.c | 6 +++--- net/tipc/bcast.h | 2 +- net/tipc/link.c | 18 +++++++++--------- net/tipc/link.h | 4 ++-- net/tipc/msg.c | 6 +++--- net/tipc/msg.h | 4 ++-- net/tipc/name_distr.c | 4 ++-- net/tipc/port.c | 10 +++++----- net/tipc/socket.c | 20 ++++++++++---------- 9 files changed, 37 insertions(+), 37 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 2f3256da6f88..d890d480ae3b 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -389,13 +389,13 @@ static void bclink_peek_nack(struct tipc_msg *msg) tipc_node_unlock(n_ptr); } -/* tipc_bclink_xmit2 - broadcast buffer chain to all nodes in cluster - * and to identified node local sockets +/* tipc_bclink_xmit - broadcast buffer chain to all nodes in cluster + * and to identified node local sockets * @buf: chain of buffers containing message * Consumes the buffer chain, except when returning -ELINKCONG * Returns 0 if success, otherwise errno: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE */ -int tipc_bclink_xmit2(struct sk_buff *buf) +int tipc_bclink_xmit(struct sk_buff *buf) { int rc = 0; int bc = 0; diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index af6b6579f5c7..4875d9536aee 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -98,6 +98,6 @@ int tipc_bclink_reset_stats(void); int tipc_bclink_set_queue_limits(u32 limit); void tipc_bcbearer_sort(struct tipc_node_map *nm_ptr, u32 node, bool action); uint tipc_bclink_get_mtu(void); -int tipc_bclink_xmit2(struct sk_buff *buf); +int tipc_bclink_xmit(struct sk_buff *buf); #endif diff --git a/net/tipc/link.c b/net/tipc/link.c index 28730ddf4b78..fb1485dc6736 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -706,7 +706,7 @@ static int tipc_link_cong(struct tipc_link *link, struct sk_buff *buf) } /** - * __tipc_link_xmit2(): same as tipc_link_xmit2, but destlink is known & locked + * __tipc_link_xmit(): same as tipc_link_xmit, but destlink is known & locked * @link: link to use * @buf: chain of buffers containing message * Consumes the buffer chain, except when returning -ELINKCONG @@ -715,7 +715,7 @@ static int tipc_link_cong(struct tipc_link *link, struct sk_buff *buf) * Only the socket functions tipc_send_stream() and tipc_send_packet() need * to act on the return value, since they may need to do more send attempts. */ -int __tipc_link_xmit2(struct tipc_link *link, struct sk_buff *buf) +int __tipc_link_xmit(struct tipc_link *link, struct sk_buff *buf) { struct tipc_msg *msg = buf_msg(buf); uint psz = msg_size(msg); @@ -783,7 +783,7 @@ int __tipc_link_xmit2(struct tipc_link *link, struct sk_buff *buf) } /** - * tipc_link_xmit2() is the general link level function for message sending + * tipc_link_xmit() is the general link level function for message sending * @buf: chain of buffers containing message * @dsz: amount of user data to be sent * @dnode: address of destination node @@ -791,7 +791,7 @@ int __tipc_link_xmit2(struct tipc_link *link, struct sk_buff *buf) * Consumes the buffer chain, except when returning -ELINKCONG * Returns 0 if success, otherwise errno: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE */ -int tipc_link_xmit2(struct sk_buff *buf, u32 dnode, u32 selector) +int tipc_link_xmit(struct sk_buff *buf, u32 dnode, u32 selector) { struct tipc_link *link = NULL; struct tipc_node *node; @@ -802,7 +802,7 @@ int tipc_link_xmit2(struct sk_buff *buf, u32 dnode, u32 selector) tipc_node_lock(node); link = node->active_links[selector & 1]; if (link) - rc = __tipc_link_xmit2(link, buf); + rc = __tipc_link_xmit(link, buf); tipc_node_unlock(node); } @@ -836,7 +836,7 @@ static void tipc_link_sync_xmit(struct tipc_link *link) msg = buf_msg(buf); tipc_msg_init(msg, BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE, link->addr); msg_set_last_bcast(msg, link->owner->bclink.acked); - __tipc_link_xmit2(link, buf); + __tipc_link_xmit(link, buf); } /* @@ -1683,7 +1683,7 @@ static void tipc_link_tunnel_xmit(struct tipc_link *l_ptr, } skb_copy_to_linear_data(buf, tunnel_hdr, INT_H_SIZE); skb_copy_to_linear_data_offset(buf, INT_H_SIZE, msg, length); - __tipc_link_xmit2(tunnel, buf); + __tipc_link_xmit(tunnel, buf); } @@ -1716,7 +1716,7 @@ void tipc_link_failover_send_queue(struct tipc_link *l_ptr) if (buf) { skb_copy_to_linear_data(buf, &tunnel_hdr, INT_H_SIZE); msg_set_size(&tunnel_hdr, INT_H_SIZE); - __tipc_link_xmit2(tunnel, buf); + __tipc_link_xmit(tunnel, buf); } else { pr_warn("%sunable to send changeover msg\n", link_co_err); @@ -1789,7 +1789,7 @@ void tipc_link_dup_queue_xmit(struct tipc_link *l_ptr, skb_copy_to_linear_data(outbuf, &tunnel_hdr, INT_H_SIZE); skb_copy_to_linear_data_offset(outbuf, INT_H_SIZE, iter->data, length); - __tipc_link_xmit2(tunnel, outbuf); + __tipc_link_xmit(tunnel, outbuf); if (!tipc_link_is_up(l_ptr)) return; iter = iter->next; diff --git a/net/tipc/link.h b/net/tipc/link.h index f0ebeb677a8b..782983ccd323 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -226,8 +226,8 @@ struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, void tipc_link_reset_all(struct tipc_node *node); void tipc_link_reset(struct tipc_link *l_ptr); void tipc_link_reset_list(unsigned int bearer_id); -int tipc_link_xmit2(struct sk_buff *buf, u32 dest, u32 selector); -int __tipc_link_xmit2(struct tipc_link *link, struct sk_buff *buf); +int tipc_link_xmit(struct sk_buff *buf, u32 dest, u32 selector); +int __tipc_link_xmit(struct tipc_link *link, struct sk_buff *buf); u32 tipc_link_get_max_pkt(u32 dest, u32 selector); void tipc_link_bundle_rcv(struct sk_buff *buf); void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int prob, diff --git a/net/tipc/msg.c b/net/tipc/msg.c index e3102ea494d4..b6f45d029933 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -120,7 +120,7 @@ out_free: /** - * tipc_msg_build2 - create buffer chain containing specified header and data + * tipc_msg_build - create buffer chain containing specified header and data * @mhdr: Message header, to be prepended to data * @iov: User data * @offset: Posision in iov to start copying from @@ -129,8 +129,8 @@ out_free: * @chain: Buffer or chain of buffers to be returned to caller * Returns message data size or errno: -ENOMEM, -EFAULT */ -int tipc_msg_build2(struct tipc_msg *mhdr, struct iovec const *iov, - int offset, int dsz, int pktmax , struct sk_buff **chain) +int tipc_msg_build(struct tipc_msg *mhdr, struct iovec const *iov, + int offset, int dsz, int pktmax , struct sk_buff **chain) { int mhsz = msg_hdr_sz(mhdr); int msz = mhsz + dsz; diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 868fe22e3452..462fa194a6af 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -738,8 +738,8 @@ bool tipc_msg_bundle(struct sk_buff *bbuf, struct sk_buff *buf, u32 mtu); bool tipc_msg_make_bundle(struct sk_buff **buf, u32 mtu, u32 dnode); -int tipc_msg_build2(struct tipc_msg *mhdr, struct iovec const *iov, - int offset, int dsz, int mtu , struct sk_buff **chain); +int tipc_msg_build(struct tipc_msg *mhdr, struct iovec const *iov, + int offset, int dsz, int mtu , struct sk_buff **chain); struct sk_buff *tipc_msg_reassemble(struct sk_buff *chain); diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index d16f9475fa76..dcc15bcd5692 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -116,7 +116,7 @@ void named_cluster_distribute(struct sk_buff *buf) if (!obuf) break; msg_set_destnode(buf_msg(obuf), dnode); - tipc_link_xmit2(obuf, dnode, dnode); + tipc_link_xmit(obuf, dnode, dnode); } rcu_read_unlock(); @@ -232,7 +232,7 @@ void tipc_named_node_up(u32 dnode) /* Convert circular list to linear list and send: */ buf_chain = (struct sk_buff *)msg_list.next; ((struct sk_buff *)msg_list.prev)->next = NULL; - tipc_link_xmit2(buf_chain, dnode, dnode); + tipc_link_xmit(buf_chain, dnode, dnode); } /** diff --git a/net/tipc/port.c b/net/tipc/port.c index 835dca1fd7ad..7e096a5e7701 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -130,7 +130,7 @@ void tipc_port_destroy(struct tipc_port *p_ptr) tipc_nodesub_unsubscribe(&p_ptr->subscription); msg = buf_msg(buf); peer = msg_destnode(msg); - tipc_link_xmit2(buf, peer, msg_link_selector(msg)); + tipc_link_xmit(buf, peer, msg_link_selector(msg)); } spin_lock_bh(&tipc_port_list_lock); list_del(&p_ptr->port_list); @@ -187,7 +187,7 @@ static void port_timeout(unsigned long ref) } tipc_port_unlock(p_ptr); msg = buf_msg(buf); - tipc_link_xmit2(buf, msg_destnode(msg), msg_link_selector(msg)); + tipc_link_xmit(buf, msg_destnode(msg), msg_link_selector(msg)); } @@ -202,7 +202,7 @@ static void port_handle_node_down(unsigned long ref) buf = port_build_self_abort_msg(p_ptr, TIPC_ERR_NO_NODE); tipc_port_unlock(p_ptr); msg = buf_msg(buf); - tipc_link_xmit2(buf, msg_destnode(msg), msg_link_selector(msg)); + tipc_link_xmit(buf, msg_destnode(msg), msg_link_selector(msg)); } @@ -347,7 +347,7 @@ void tipc_acknowledge(u32 ref, u32 ack) if (!buf) return; msg = buf_msg(buf); - tipc_link_xmit2(buf, msg_destnode(msg), msg_link_selector(msg)); + tipc_link_xmit(buf, msg_destnode(msg), msg_link_selector(msg)); } int tipc_publish(struct tipc_port *p_ptr, unsigned int scope, @@ -509,6 +509,6 @@ int tipc_port_shutdown(u32 ref) buf = port_build_peer_abort_msg(p_ptr, TIPC_CONN_SHUTDOWN); tipc_port_unlock(p_ptr); msg = buf_msg(buf); - tipc_link_xmit2(buf, msg_destnode(msg), msg_link_selector(msg)); + tipc_link_xmit(buf, msg_destnode(msg), msg_link_selector(msg)); return tipc_port_disconnect(ref); } diff --git a/net/tipc/socket.c b/net/tipc/socket.c index b28eea50c7fc..5ad42f6137d8 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -131,7 +131,7 @@ static void reject_rx_queue(struct sock *sk) while ((buf = __skb_dequeue(&sk->sk_receive_queue))) { if (tipc_msg_reverse(buf, &dnode, TIPC_ERR_NO_PORT)) - tipc_link_xmit2(buf, dnode, 0); + tipc_link_xmit(buf, dnode, 0); } } @@ -341,7 +341,7 @@ static int tipc_release(struct socket *sock) tipc_port_disconnect(port->ref); } if (tipc_msg_reverse(buf, &dnode, TIPC_ERR_NO_PORT)) - tipc_link_xmit2(buf, dnode, 0); + tipc_link_xmit(buf, dnode, 0); } } @@ -566,7 +566,7 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, new_mtu: mtu = tipc_bclink_get_mtu(); - rc = tipc_msg_build2(mhdr, iov, 0, dsz, mtu, &buf); + rc = tipc_msg_build(mhdr, iov, 0, dsz, mtu, &buf); if (unlikely(rc < 0)) return rc; @@ -821,12 +821,12 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, new_mtu: mtu = tipc_node_get_mtu(dnode, tsk->port.ref); - rc = tipc_msg_build2(mhdr, iov, 0, dsz, mtu, &buf); + rc = tipc_msg_build(mhdr, iov, 0, dsz, mtu, &buf); if (rc < 0) goto exit; do { - rc = tipc_link_xmit2(buf, dnode, tsk->port.ref); + rc = tipc_link_xmit(buf, dnode, tsk->port.ref); if (likely(rc >= 0)) { if (sock->state != SS_READY) sock->state = SS_CONNECTING; @@ -934,12 +934,12 @@ static int tipc_send_stream(struct kiocb *iocb, struct socket *sock, next: mtu = port->max_pkt; send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE); - rc = tipc_msg_build2(mhdr, m->msg_iov, sent, send, mtu, &buf); + rc = tipc_msg_build(mhdr, m->msg_iov, sent, send, mtu, &buf); if (unlikely(rc < 0)) goto exit; do { if (likely(!tipc_sk_conn_cong(tsk))) { - rc = tipc_link_xmit2(buf, dnode, ref); + rc = tipc_link_xmit(buf, dnode, ref); if (likely(!rc)) { tsk->sent_unacked++; sent += send; @@ -1571,7 +1571,7 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *buf) if ((rc < 0) && !tipc_msg_reverse(buf, &onode, -rc)) return 0; - tipc_link_xmit2(buf, onode, 0); + tipc_link_xmit(buf, onode, 0); return 0; } @@ -1623,7 +1623,7 @@ exit: if ((rc < 0) && !tipc_msg_reverse(buf, &dnode, -rc)) return -EHOSTUNREACH; - tipc_link_xmit2(buf, dnode, 0); + tipc_link_xmit(buf, dnode, 0); return (rc < 0) ? -EHOSTUNREACH : 0; } @@ -1910,7 +1910,7 @@ restart: } tipc_port_disconnect(port->ref); if (tipc_msg_reverse(buf, &peer, TIPC_CONN_SHUTDOWN)) - tipc_link_xmit2(buf, peer, 0); + tipc_link_xmit(buf, peer, 0); } else { tipc_port_shutdown(port->ref); } -- cgit v1.2.3 From 6f92ee54b316c116e125a6bb268abe308e4c14e6 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 16 Jul 2014 20:41:04 -0400 Subject: tipc: ensure sequential message delivery across dual bearers When we run broadcast packets over dual bearers/interfaces, the current transmission code is flipping bearers between each sent packet, with the purpose of leveraging the double bandwidth available. The receiving bclink is resequencing the packets if needed, so all messages are delivered upwards from the broadcast link in the correct order, even if they may arrive in concurrent interrupts. However, at the moment of delivery upwards to the socket, we release all spinlocks (bclink_lock, node_lock), so it is still possible that arriving messages bypass each other before they reach the socket queue. We fix this by applying the same technique we are using for unicast traffic. We use a link selector (i.e., the last bit of sending port number) to ensure that messages from the same sender socket always are sent over the same bearer. This guarantees sequential delivery between socket pairs, which is sufficient to satisfy the protocol spec, as well as all known user requirements. Signed-off-by: Jon Maloy Reviewed-by: Erik Hugne Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/bcast.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index d890d480ae3b..dd13bfa09333 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -637,6 +637,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1, struct tipc_media_addr *unused2) { int bp_index; + struct tipc_msg *msg = buf_msg(buf); /* Prepare broadcast link message for reliable transmission, * if first time trying to send it; @@ -644,10 +645,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1, * since they are sent in an unreliable manner and don't need it */ if (likely(!msg_non_seq(buf_msg(buf)))) { - struct tipc_msg *msg; - bcbuf_set_acks(buf, bclink->bcast_nodes.count); - msg = buf_msg(buf); msg_set_non_seq(msg, 1); msg_set_mc_netid(msg, tipc_net_id); bcl->stats.sent_info++; @@ -664,12 +662,14 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1, for (bp_index = 0; bp_index < MAX_BEARERS; bp_index++) { struct tipc_bearer *p = bcbearer->bpairs[bp_index].primary; struct tipc_bearer *s = bcbearer->bpairs[bp_index].secondary; - struct tipc_bearer *b = p; + struct tipc_bearer *bp[2] = {p, s}; + struct tipc_bearer *b = bp[msg_link_selector(msg)]; struct sk_buff *tbuf; if (!p) break; /* No more bearers to try */ - + if (!b) + b = p; tipc_nmap_diff(&bcbearer->remains, &b->nodes, &bcbearer->remains_new); if (bcbearer->remains_new.count == bcbearer->remains.count) @@ -686,13 +686,6 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1, tipc_bearer_send(b->identity, tbuf, &b->bcast_addr); kfree_skb(tbuf); /* Bearer keeps a clone */ } - - /* Swap bearers for next packet */ - if (s) { - bcbearer->bpairs[bp_index].primary = s; - bcbearer->bpairs[bp_index].secondary = p; - } - if (bcbearer->remains_new.count == 0) break; /* All targets reached */ -- cgit v1.2.3 From 52f50ce556af7008e1c2088d8b2b778ef5e79ff1 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 20 Jul 2014 13:14:28 +0800 Subject: tipc: fix sparse non static symbol warnings Fixes the following sparse warnings: net/tipc/socket.c:545:5: warning: symbol 'tipc_sk_proto_rcv' was not declared. Should it be static? net/tipc/socket.c:2015:5: warning: symbol 'tipc_ioctl' was not declared. Should it be static? Signed-off-by: Wei Yongjun Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/socket.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 5ad42f6137d8..8477d08a6aa0 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -635,7 +635,8 @@ void tipc_sk_mcast_rcv(struct sk_buff *buf) * Returns 0 (TIPC_OK) if message was consumed, 1 (TIPC_FWD_MSG) if * (CONN_PROBE_REPLY) message should be forwarded. */ -int tipc_sk_proto_rcv(struct tipc_sock *tsk, u32 *dnode, struct sk_buff *buf) +static int tipc_sk_proto_rcv(struct tipc_sock *tsk, u32 *dnode, + struct sk_buff *buf) { struct tipc_msg *msg = buf_msg(buf); struct tipc_port *port = &tsk->port; @@ -2068,7 +2069,7 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt, return put_user(sizeof(value), ol); } -int tipc_ioctl(struct socket *sk, unsigned int cmd, unsigned long arg) +static int tipc_ioctl(struct socket *sk, unsigned int cmd, unsigned long arg) { struct tipc_sioc_ln_req lnr; void __user *argp = (void __user *)arg; -- cgit v1.2.3 From 13e9b9972fa0f34059e737ae215a26e43966b46f Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Fri, 25 Jul 2014 14:48:09 -0400 Subject: tipc: make tipc_buf_append() more robust As per comment from David Miller, we try to make the buffer reassembly function more resilient to user errors than it is today. - We check that the "*buf" parameter always is set, since this is mandatory input. - We ensure that *buf->next always is set to NULL before linking in the buffer, instead of relying of the caller to have done this. - We ensure that the "tail" pointer in the head buffer's control block is initialized to NULL when the first fragment arrives. Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/msg.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/msg.c b/net/tipc/msg.c index b6f45d029933..9680be6d388a 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -72,27 +72,38 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) struct sk_buff *head = *headbuf; struct sk_buff *frag = *buf; struct sk_buff *tail; - struct tipc_msg *msg = buf_msg(frag); - u32 fragid = msg_type(msg); - bool headstolen; + struct tipc_msg *msg; + u32 fragid; int delta; + bool headstolen; + if (!frag) + goto err; + + msg = buf_msg(frag); + fragid = msg_type(msg); + frag->next = NULL; skb_pull(frag, msg_hdr_sz(msg)); if (fragid == FIRST_FRAGMENT) { - if (head || skb_unclone(frag, GFP_ATOMIC)) - goto out_free; + if (unlikely(head)) + goto err; + if (unlikely(skb_unclone(frag, GFP_ATOMIC))) + goto err; head = *headbuf = frag; skb_frag_list_init(head); + TIPC_SKB_CB(head)->tail = NULL; *buf = NULL; return 0; } + if (!head) - goto out_free; - tail = TIPC_SKB_CB(head)->tail; + goto err; + if (skb_try_coalesce(head, frag, &headstolen, &delta)) { kfree_skb_partial(frag, headstolen); } else { + tail = TIPC_SKB_CB(head)->tail; if (!skb_has_frag_list(head)) skb_shinfo(head)->frag_list = frag; else @@ -102,6 +113,7 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) head->len += frag->len; TIPC_SKB_CB(head)->tail = frag; } + if (fragid == LAST_FRAGMENT) { *buf = head; TIPC_SKB_CB(head)->tail = NULL; @@ -110,7 +122,8 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) } *buf = NULL; return 0; -out_free: + +err: pr_warn_ratelimited("Unable to build fragment list\n"); kfree_skb(*buf); kfree_skb(*headbuf); -- cgit v1.2.3 From ad025a56a5487d5d443ee790c04517581d39b21b Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 28 Jul 2014 21:30:14 +0800 Subject: tipc: remove duplicated include from socket.c Remove duplicated include. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/tipc/socket.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net/tipc') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 8477d08a6aa0..7d423ee10897 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -40,7 +40,6 @@ #include "node.h" #include "link.h" #include -#include "link.h" #define SS_LISTENING -1 /* socket is listening */ #define SS_READY -2 /* socket is connectionless */ -- cgit v1.2.3