diff options
Diffstat (limited to 'net/tipc/socket.c')
-rw-r--r-- | net/tipc/socket.c | 884 |
1 files changed, 740 insertions, 144 deletions
diff --git a/net/tipc/socket.c b/net/tipc/socket.c index ff8c8118d56e..75275c5cf929 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -35,17 +35,67 @@ */ #include "core.h" -#include "port.h" #include "name_table.h" #include "node.h" #include "link.h" #include <linux/export.h> +#include "config.h" +#include "socket.h" #define SS_LISTENING -1 /* socket is listening */ #define SS_READY -2 /* socket is connectionless */ -#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ -#define TIPC_FWD_MSG 1 +#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ +#define CONN_PROBING_INTERVAL 3600000 /* [ms] => 1 h */ +#define TIPC_FWD_MSG 1 +#define TIPC_CONN_OK 0 +#define TIPC_CONN_PROBING 1 + +/** + * struct tipc_sock - TIPC socket structure + * @sk: socket - interacts with 'port' and with user via the socket API + * @connected: non-zero if port is currently connected to a peer port + * @conn_type: TIPC type used when connection was established + * @conn_instance: TIPC instance used when connection was established + * @published: non-zero if port has one or more associated names + * @max_pkt: maximum packet size "hint" used when building messages sent by port + * @ref: unique reference to port in TIPC object registry + * @phdr: preformatted message header used when sending messages + * @port_list: adjacent ports in TIPC's global list of ports + * @publications: list of publications for port + * @pub_count: total # of publications port has made during its lifetime + * @probing_state: + * @probing_interval: + * @timer: + * @port: port - interacts with 'sk' and with the rest of the TIPC stack + * @peer_name: the peer of the connection, if any + * @conn_timeout: the time we can wait for an unresponded setup request + * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue + * @link_cong: non-zero if owner must sleep because of link congestion + * @sent_unacked: # messages sent by socket, and not yet acked by peer + * @rcv_unacked: # messages read by user, but not yet acked back to peer + */ +struct tipc_sock { + struct sock sk; + int connected; + u32 conn_type; + u32 conn_instance; + int published; + u32 max_pkt; + u32 ref; + struct tipc_msg phdr; + struct list_head sock_list; + struct list_head publications; + u32 pub_count; + u32 probing_state; + u32 probing_interval; + struct timer_list timer; + uint conn_timeout; + atomic_t dupl_rcvcnt; + bool link_cong; + uint sent_unacked; + uint rcv_unacked; +}; static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb); static void tipc_data_ready(struct sock *sk); @@ -53,6 +103,16 @@ static void tipc_write_space(struct sock *sk); static int tipc_release(struct socket *sock); static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags); static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p); +static void tipc_sk_timeout(unsigned long ref); +static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, + struct tipc_name_seq const *seq); +static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, + struct tipc_name_seq const *seq); +static u32 tipc_sk_ref_acquire(struct tipc_sock *tsk); +static void tipc_sk_ref_discard(u32 ref); +static struct tipc_sock *tipc_sk_get(u32 ref); +static struct tipc_sock *tipc_sk_get_next(u32 *ref); +static void tipc_sk_put(struct tipc_sock *tsk); static const struct proto_ops packet_ops; static const struct proto_ops stream_ops; @@ -106,24 +166,75 @@ static struct proto tipc_proto_kern; * - port reference */ -#include "socket.h" +static u32 tsk_peer_node(struct tipc_sock *tsk) +{ + return msg_destnode(&tsk->phdr); +} + +static u32 tsk_peer_port(struct tipc_sock *tsk) +{ + return msg_destport(&tsk->phdr); +} + +static bool tsk_unreliable(struct tipc_sock *tsk) +{ + return msg_src_droppable(&tsk->phdr) != 0; +} + +static void tsk_set_unreliable(struct tipc_sock *tsk, bool unreliable) +{ + msg_set_src_droppable(&tsk->phdr, unreliable ? 1 : 0); +} + +static bool tsk_unreturnable(struct tipc_sock *tsk) +{ + return msg_dest_droppable(&tsk->phdr) != 0; +} + +static void tsk_set_unreturnable(struct tipc_sock *tsk, bool unreturnable) +{ + msg_set_dest_droppable(&tsk->phdr, unreturnable ? 1 : 0); +} + +static int tsk_importance(struct tipc_sock *tsk) +{ + return msg_importance(&tsk->phdr); +} + +static int tsk_set_importance(struct tipc_sock *tsk, int imp) +{ + if (imp > TIPC_CRITICAL_IMPORTANCE) + return -EINVAL; + msg_set_importance(&tsk->phdr, (u32)imp); + return 0; +} + +static struct tipc_sock *tipc_sk(const struct sock *sk) +{ + return container_of(sk, struct tipc_sock, sk); +} + +static int tsk_conn_cong(struct tipc_sock *tsk) +{ + return tsk->sent_unacked >= TIPC_FLOWCTRL_WIN; +} /** - * advance_rx_queue - discard first buffer in socket receive queue + * tsk_advance_rx_queue - discard first buffer in socket receive queue * * Caller must hold socket lock */ -static void advance_rx_queue(struct sock *sk) +static void tsk_advance_rx_queue(struct sock *sk) { kfree_skb(__skb_dequeue(&sk->sk_receive_queue)); } /** - * reject_rx_queue - reject all buffers in socket receive queue + * tsk_rej_rx_queue - reject all buffers in socket receive queue * * Caller must hold socket lock */ -static void reject_rx_queue(struct sock *sk) +static void tsk_rej_rx_queue(struct sock *sk) { struct sk_buff *buf; u32 dnode; @@ -134,6 +245,38 @@ static void reject_rx_queue(struct sock *sk) } } +/* tsk_peer_msg - verify if message was sent by connected port's peer + * + * Handles cases where the node's network address has changed from + * the default of <0.0.0> to its configured setting. + */ +static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg) +{ + u32 peer_port = tsk_peer_port(tsk); + u32 orig_node; + u32 peer_node; + + if (unlikely(!tsk->connected)) + return false; + + if (unlikely(msg_origport(msg) != peer_port)) + return false; + + orig_node = msg_orignode(msg); + peer_node = tsk_peer_node(tsk); + + if (likely(orig_node == peer_node)) + return true; + + if (!orig_node && (peer_node == tipc_own_addr)) + return true; + + if (!peer_node && (orig_node == tipc_own_addr)) + return true; + + return false; +} + /** * tipc_sk_create - create a TIPC socket * @net: network namespace (must be default network) @@ -153,7 +296,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, socket_state state; struct sock *sk; struct tipc_sock *tsk; - struct tipc_port *port; + struct tipc_msg *msg; u32 ref; /* Validate arguments */ @@ -188,20 +331,24 @@ static int tipc_sk_create(struct net *net, struct socket *sock, return -ENOMEM; tsk = tipc_sk(sk); - port = &tsk->port; - - ref = tipc_port_init(port, TIPC_LOW_IMPORTANCE); + ref = tipc_sk_ref_acquire(tsk); if (!ref) { - pr_warn("Socket registration failed, ref. table exhausted\n"); - sk_free(sk); + pr_warn("Socket create failed; reference table exhausted\n"); return -ENOMEM; } + tsk->max_pkt = MAX_PKT_DEFAULT; + tsk->ref = ref; + INIT_LIST_HEAD(&tsk->publications); + msg = &tsk->phdr; + tipc_msg_init(msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG, + NAMED_H_SIZE, 0); + msg_set_origport(msg, ref); /* Finish initializing socket data structures */ sock->ops = ops; sock->state = state; - sock_init_data(sock, sk); + k_init_timer(&tsk->timer, (Handler)tipc_sk_timeout, ref); sk->sk_backlog_rcv = tipc_backlog_rcv; sk->sk_rcvbuf = sysctl_tipc_rmem[1]; sk->sk_data_ready = tipc_data_ready; @@ -209,12 +356,11 @@ static int tipc_sk_create(struct net *net, struct socket *sock, tsk->conn_timeout = CONN_TIMEOUT_DEFAULT; tsk->sent_unacked = 0; atomic_set(&tsk->dupl_rcvcnt, 0); - tipc_port_unlock(port); if (sock->state == SS_READY) { - tipc_port_set_unreturnable(port, true); + tsk_set_unreturnable(tsk, true); if (sock->type == SOCK_DGRAM) - tipc_port_set_unreliable(port, true); + tsk_set_unreliable(tsk, true); } return 0; } @@ -308,7 +454,6 @@ static int tipc_release(struct socket *sock) { struct sock *sk = sock->sk; struct tipc_sock *tsk; - struct tipc_port *port; struct sk_buff *buf; u32 dnode; @@ -320,13 +465,13 @@ static int tipc_release(struct socket *sock) return 0; tsk = tipc_sk(sk); - port = &tsk->port; lock_sock(sk); /* * Reject all unreceived messages, except on an active connection * (which disconnects locally & sends a 'FIN+' to peer) */ + dnode = tsk_peer_node(tsk); while (sock->state != SS_DISCONNECTING) { buf = __skb_dequeue(&sk->sk_receive_queue); if (buf == NULL) @@ -337,17 +482,27 @@ static int tipc_release(struct socket *sock) if ((sock->state == SS_CONNECTING) || (sock->state == SS_CONNECTED)) { sock->state = SS_DISCONNECTING; - tipc_port_disconnect(port->ref); + tsk->connected = 0; + tipc_node_remove_conn(dnode, tsk->ref); } if (tipc_msg_reverse(buf, &dnode, TIPC_ERR_NO_PORT)) tipc_link_xmit(buf, dnode, 0); } } - /* Destroy TIPC port; also disconnects an active connection and - * sends a 'FIN-' to peer. - */ - tipc_port_destroy(port); + tipc_sk_withdraw(tsk, 0, NULL); + tipc_sk_ref_discard(tsk->ref); + k_cancel_timer(&tsk->timer); + if (tsk->connected) { + buf = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, + SHORT_H_SIZE, 0, dnode, tipc_own_addr, + tsk_peer_port(tsk), + tsk->ref, TIPC_ERR_NO_PORT); + if (buf) + tipc_link_xmit(buf, dnode, tsk->ref); + tipc_node_remove_conn(dnode, tsk->ref); + } + k_term_timer(&tsk->timer); /* Discard any remaining (connection-based) messages in receive queue */ __skb_queue_purge(&sk->sk_receive_queue); @@ -355,7 +510,6 @@ static int tipc_release(struct socket *sock) /* Reject any messages that accumulated in backlog queue */ sock->state = SS_DISCONNECTING; release_sock(sk); - sock_put(sk); sock->sk = NULL; @@ -387,7 +541,7 @@ static int tipc_bind(struct socket *sock, struct sockaddr *uaddr, lock_sock(sk); if (unlikely(!uaddr_len)) { - res = tipc_withdraw(&tsk->port, 0, NULL); + res = tipc_sk_withdraw(tsk, 0, NULL); goto exit; } @@ -415,8 +569,8 @@ static int tipc_bind(struct socket *sock, struct sockaddr *uaddr, } res = (addr->scope > 0) ? - tipc_publish(&tsk->port, addr->scope, &addr->addr.nameseq) : - tipc_withdraw(&tsk->port, -addr->scope, &addr->addr.nameseq); + tipc_sk_publish(tsk, addr->scope, &addr->addr.nameseq) : + tipc_sk_withdraw(tsk, -addr->scope, &addr->addr.nameseq); exit: release_sock(sk); return res; @@ -446,10 +600,10 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, if ((sock->state != SS_CONNECTED) && ((peer != 2) || (sock->state != SS_DISCONNECTING))) return -ENOTCONN; - addr->addr.id.ref = tipc_port_peerport(&tsk->port); - addr->addr.id.node = tipc_port_peernode(&tsk->port); + addr->addr.id.ref = tsk_peer_port(tsk); + addr->addr.id.node = tsk_peer_node(tsk); } else { - addr->addr.id.ref = tsk->port.ref; + addr->addr.id.ref = tsk->ref; addr->addr.id.node = tipc_own_addr; } @@ -518,7 +672,7 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, break; case SS_READY: case SS_CONNECTED: - if (!tsk->link_cong && !tipc_sk_conn_cong(tsk)) + if (!tsk->link_cong && !tsk_conn_cong(tsk)) mask |= POLLOUT; /* fall thru' */ case SS_CONNECTING: @@ -549,7 +703,7 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, struct iovec *iov, size_t dsz, long timeo) { struct sock *sk = sock->sk; - struct tipc_msg *mhdr = &tipc_sk(sk)->port.phdr; + struct tipc_msg *mhdr = &tipc_sk(sk)->phdr; struct sk_buff *buf; uint mtu; int rc; @@ -579,6 +733,7 @@ new_mtu: goto new_mtu; if (rc != -ELINKCONG) break; + tipc_sk(sk)->link_cong = 1; rc = tipc_wait_for_sndmsg(sock, &timeo); if (rc) kfree_skb_list(buf); @@ -638,20 +793,19 @@ static int tipc_sk_proto_rcv(struct tipc_sock *tsk, u32 *dnode, struct sk_buff *buf) { struct tipc_msg *msg = buf_msg(buf); - struct tipc_port *port = &tsk->port; int conn_cong; /* Ignore if connection cannot be validated: */ - if (!port->connected || !tipc_port_peer_msg(port, msg)) + if (!tsk_peer_msg(tsk, msg)) goto exit; - port->probing_state = TIPC_CONN_OK; + tsk->probing_state = TIPC_CONN_OK; if (msg_type(msg) == CONN_ACK) { - conn_cong = tipc_sk_conn_cong(tsk); + conn_cong = tsk_conn_cong(tsk); tsk->sent_unacked -= msg_msgcnt(msg); if (conn_cong) - tipc_sock_wakeup(tsk); + tsk->sk.sk_write_space(&tsk->sk); } else if (msg_type(msg) == CONN_PROBE) { if (!tipc_msg_reverse(buf, dnode, TIPC_OK)) return TIPC_OK; @@ -742,8 +896,7 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); - struct tipc_port *port = &tsk->port; - struct tipc_msg *mhdr = &port->phdr; + struct tipc_msg *mhdr = &tsk->phdr; struct iovec *iov = m->msg_iov; u32 dnode, dport; struct sk_buff *buf; @@ -774,13 +927,13 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, rc = -EISCONN; goto exit; } - if (tsk->port.published) { + if (tsk->published) { rc = -EOPNOTSUPP; goto exit; } if (dest->addrtype == TIPC_ADDR_NAME) { - tsk->port.conn_type = dest->addr.name.name.type; - tsk->port.conn_instance = dest->addr.name.name.instance; + tsk->conn_type = dest->addr.name.name.type; + tsk->conn_instance = dest->addr.name.name.instance; } } rc = dest_name_check(dest, m); @@ -820,13 +973,14 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, } new_mtu: - mtu = tipc_node_get_mtu(dnode, tsk->port.ref); + mtu = tipc_node_get_mtu(dnode, tsk->ref); rc = tipc_msg_build(mhdr, iov, 0, dsz, mtu, &buf); if (rc < 0) goto exit; do { - rc = tipc_link_xmit(buf, dnode, tsk->port.ref); + TIPC_SKB_CB(buf)->wakeup_pending = tsk->link_cong; + rc = tipc_link_xmit(buf, dnode, tsk->ref); if (likely(rc >= 0)) { if (sock->state != SS_READY) sock->state = SS_CONNECTING; @@ -835,10 +989,9 @@ new_mtu: } if (rc == -EMSGSIZE) goto new_mtu; - if (rc != -ELINKCONG) break; - + tsk->link_cong = 1; rc = tipc_wait_for_sndmsg(sock, &timeo); if (rc) kfree_skb_list(buf); @@ -873,8 +1026,8 @@ static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p) prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); done = sk_wait_event(sk, timeo_p, (!tsk->link_cong && - !tipc_sk_conn_cong(tsk)) || - !tsk->port.connected); + !tsk_conn_cong(tsk)) || + !tsk->connected); finish_wait(sk_sleep(sk), &wait); } while (!done); return 0; @@ -897,11 +1050,10 @@ static int tipc_send_stream(struct kiocb *iocb, struct socket *sock, { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); - struct tipc_port *port = &tsk->port; - struct tipc_msg *mhdr = &port->phdr; + struct tipc_msg *mhdr = &tsk->phdr; struct sk_buff *buf; DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); - u32 ref = port->ref; + u32 ref = tsk->ref; int rc = -EINVAL; long timeo; u32 dnode; @@ -929,16 +1081,16 @@ static int tipc_send_stream(struct kiocb *iocb, struct socket *sock, } timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); - dnode = tipc_port_peernode(port); + dnode = tsk_peer_node(tsk); next: - mtu = port->max_pkt; + mtu = tsk->max_pkt; send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE); rc = tipc_msg_build(mhdr, m->msg_iov, sent, send, mtu, &buf); if (unlikely(rc < 0)) goto exit; do { - if (likely(!tipc_sk_conn_cong(tsk))) { + if (likely(!tsk_conn_cong(tsk))) { rc = tipc_link_xmit(buf, dnode, ref); if (likely(!rc)) { tsk->sent_unacked++; @@ -948,11 +1100,12 @@ next: goto next; } if (rc == -EMSGSIZE) { - port->max_pkt = tipc_node_get_mtu(dnode, ref); + tsk->max_pkt = tipc_node_get_mtu(dnode, ref); goto next; } if (rc != -ELINKCONG) break; + tsk->link_cong = 1; } rc = tipc_wait_for_sndpkt(sock, &timeo); if (rc) @@ -984,29 +1137,25 @@ static int tipc_send_packet(struct kiocb *iocb, struct socket *sock, return tipc_send_stream(iocb, sock, m, dsz); } -/** - * auto_connect - complete connection setup to a remote port - * @tsk: tipc socket structure - * @msg: peer's response message - * - * Returns 0 on success, errno otherwise +/* tipc_sk_finish_conn - complete the setup of a connection */ -static int auto_connect(struct tipc_sock *tsk, struct tipc_msg *msg) +static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port, + u32 peer_node) { - struct tipc_port *port = &tsk->port; - struct socket *sock = tsk->sk.sk_socket; - struct tipc_portid peer; - - peer.ref = msg_origport(msg); - peer.node = msg_orignode(msg); - - __tipc_port_connect(port->ref, port, &peer); - - if (msg_importance(msg) > TIPC_CRITICAL_IMPORTANCE) - return -EINVAL; - msg_set_importance(&port->phdr, (u32)msg_importance(msg)); - sock->state = SS_CONNECTED; - return 0; + struct tipc_msg *msg = &tsk->phdr; + + msg_set_destnode(msg, peer_node); + msg_set_destport(msg, peer_port); + msg_set_type(msg, TIPC_CONN_MSG); + msg_set_lookup_scope(msg, 0); + msg_set_hdr_sz(msg, SHORT_H_SIZE); + + tsk->probing_interval = CONN_PROBING_INTERVAL; + tsk->probing_state = TIPC_CONN_OK; + tsk->connected = 1; + k_start_timer(&tsk->timer, tsk->probing_interval); + tipc_node_add_conn(peer_node, tsk->ref, peer_port); + tsk->max_pkt = tipc_node_get_mtu(peer_node, tsk->ref); } /** @@ -1033,17 +1182,17 @@ static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg) } /** - * anc_data_recv - optionally capture ancillary data for received message + * tipc_sk_anc_data_recv - optionally capture ancillary data for received message * @m: descriptor for message info * @msg: received message header - * @tport: TIPC port associated with message + * @tsk: TIPC port associated with message * * Note: Ancillary data is not captured if not requested by receiver. * * Returns 0 if successful, otherwise errno */ -static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg, - struct tipc_port *tport) +static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg, + struct tipc_sock *tsk) { u32 anc_data[3]; u32 err; @@ -1086,10 +1235,10 @@ static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg, anc_data[2] = msg_nameupper(msg); break; case TIPC_CONN_MSG: - has_name = (tport->conn_type != 0); - anc_data[0] = tport->conn_type; - anc_data[1] = tport->conn_instance; - anc_data[2] = tport->conn_instance; + has_name = (tsk->conn_type != 0); + anc_data[0] = tsk->conn_type; + anc_data[1] = tsk->conn_instance; + anc_data[2] = tsk->conn_instance; break; default: has_name = 0; @@ -1103,6 +1252,24 @@ static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg, return 0; } +static void tipc_sk_send_ack(struct tipc_sock *tsk, uint ack) +{ + struct sk_buff *buf = NULL; + struct tipc_msg *msg; + u32 peer_port = tsk_peer_port(tsk); + u32 dnode = tsk_peer_node(tsk); + + if (!tsk->connected) + return; + buf = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0, dnode, + tipc_own_addr, peer_port, tsk->ref, TIPC_OK); + if (!buf) + return; + msg = buf_msg(buf); + msg_set_msgcnt(msg, ack); + tipc_link_xmit(buf, dnode, msg_link_selector(msg)); +} + static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) { struct sock *sk = sock->sk; @@ -1153,7 +1320,6 @@ static int tipc_recvmsg(struct kiocb *iocb, struct socket *sock, { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); - struct tipc_port *port = &tsk->port; struct sk_buff *buf; struct tipc_msg *msg; long timeo; @@ -1188,7 +1354,7 @@ restart: /* Discard an empty non-errored message & try again */ if ((!sz) && (!err)) { - advance_rx_queue(sk); + tsk_advance_rx_queue(sk); goto restart; } @@ -1196,7 +1362,7 @@ restart: set_orig_addr(m, msg); /* Capture ancillary data (optional) */ - res = anc_data_recv(m, msg, port); + res = tipc_sk_anc_data_recv(m, msg, tsk); if (res) goto exit; @@ -1223,10 +1389,10 @@ restart: if (likely(!(flags & MSG_PEEK))) { if ((sock->state != SS_READY) && (++tsk->rcv_unacked >= TIPC_CONNACK_INTV)) { - tipc_acknowledge(port->ref, tsk->rcv_unacked); + tipc_sk_send_ack(tsk, tsk->rcv_unacked); tsk->rcv_unacked = 0; } - advance_rx_queue(sk); + tsk_advance_rx_queue(sk); } exit: release_sock(sk); @@ -1250,7 +1416,6 @@ static int tipc_recv_stream(struct kiocb *iocb, struct socket *sock, { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); - struct tipc_port *port = &tsk->port; struct sk_buff *buf; struct tipc_msg *msg; long timeo; @@ -1288,14 +1453,14 @@ restart: /* Discard an empty non-errored message & try again */ if ((!sz) && (!err)) { - advance_rx_queue(sk); + tsk_advance_rx_queue(sk); goto restart; } /* Optionally capture sender's address & ancillary data of first msg */ if (sz_copied == 0) { set_orig_addr(m, msg); - res = anc_data_recv(m, msg, port); + res = tipc_sk_anc_data_recv(m, msg, tsk); if (res) goto exit; } @@ -1334,10 +1499,10 @@ restart: /* Consume received message (optional) */ if (likely(!(flags & MSG_PEEK))) { if (unlikely(++tsk->rcv_unacked >= TIPC_CONNACK_INTV)) { - tipc_acknowledge(port->ref, tsk->rcv_unacked); + tipc_sk_send_ack(tsk, tsk->rcv_unacked); tsk->rcv_unacked = 0; } - advance_rx_queue(sk); + tsk_advance_rx_queue(sk); } /* Loop around if more data is required */ @@ -1396,12 +1561,9 @@ static void tipc_data_ready(struct sock *sk) static int filter_connect(struct tipc_sock *tsk, struct sk_buff **buf) { struct sock *sk = &tsk->sk; - struct tipc_port *port = &tsk->port; struct socket *sock = sk->sk_socket; struct tipc_msg *msg = buf_msg(*buf); - int retval = -TIPC_ERR_NO_PORT; - int res; if (msg_mcast(msg)) return retval; @@ -1409,16 +1571,23 @@ static int filter_connect(struct tipc_sock *tsk, struct sk_buff **buf) switch ((int)sock->state) { case SS_CONNECTED: /* Accept only connection-based messages sent by peer */ - if (msg_connected(msg) && tipc_port_peer_msg(port, msg)) { + if (tsk_peer_msg(tsk, msg)) { if (unlikely(msg_errcode(msg))) { sock->state = SS_DISCONNECTING; - __tipc_port_disconnect(port); + tsk->connected = 0; + /* let timer expire on it's own */ + tipc_node_remove_conn(tsk_peer_node(tsk), + tsk->ref); } retval = TIPC_OK; } break; case SS_CONNECTING: /* Accept only ACK or NACK message */ + + if (unlikely(!msg_connected(msg))) + break; + if (unlikely(msg_errcode(msg))) { sock->state = SS_DISCONNECTING; sk->sk_err = ECONNREFUSED; @@ -1426,17 +1595,17 @@ static int filter_connect(struct tipc_sock *tsk, struct sk_buff **buf) break; } - if (unlikely(!msg_connected(msg))) - break; - - res = auto_connect(tsk, msg); - if (res) { + if (unlikely(msg_importance(msg) > TIPC_CRITICAL_IMPORTANCE)) { sock->state = SS_DISCONNECTING; - sk->sk_err = -res; + sk->sk_err = EINVAL; retval = TIPC_OK; break; } + tipc_sk_finish_conn(tsk, msg_origport(msg), msg_orignode(msg)); + msg_set_importance(&tsk->phdr, msg_importance(msg)); + sock->state = SS_CONNECTED; + /* If an incoming message is an 'ACK-', it should be * discarded here because it doesn't contain useful * data. In addition, we should try to wake up @@ -1518,6 +1687,13 @@ static int filter_rcv(struct sock *sk, struct sk_buff *buf) if (unlikely(msg_user(msg) == CONN_MANAGER)) return tipc_sk_proto_rcv(tsk, &onode, buf); + if (unlikely(msg_user(msg) == SOCK_WAKEUP)) { + kfree_skb(buf); + tsk->link_cong = 0; + sk->sk_write_space(sk); + return TIPC_OK; + } + /* Reject message if it is wrong sort of message for socket */ if (msg_type(msg) > TIPC_DIRECT_MSG) return -TIPC_ERR_NO_PORT; @@ -1585,7 +1761,6 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *buf) int tipc_sk_rcv(struct sk_buff *buf) { struct tipc_sock *tsk; - struct tipc_port *port; struct sock *sk; u32 dport = msg_destport(buf_msg(buf)); int rc = TIPC_OK; @@ -1593,13 +1768,11 @@ int tipc_sk_rcv(struct sk_buff *buf) u32 dnode; /* Validate destination and message */ - port = tipc_port_lock(dport); - if (unlikely(!port)) { + tsk = tipc_sk_get(dport); + if (unlikely(!tsk)) { rc = tipc_msg_eval(buf, &dnode); goto exit; } - - tsk = tipc_port_to_sock(port); sk = &tsk->sk; /* Queue message */ @@ -1615,8 +1788,7 @@ int tipc_sk_rcv(struct sk_buff *buf) rc = -TIPC_ERR_OVERLOAD; } bh_unlock_sock(sk); - tipc_port_unlock(port); - + tipc_sk_put(tsk); if (likely(!rc)) return 0; exit: @@ -1803,10 +1975,8 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags) { struct sock *new_sk, *sk = sock->sk; struct sk_buff *buf; - struct tipc_port *new_port; + struct tipc_sock *new_tsock; struct tipc_msg *msg; - struct tipc_portid peer; - u32 new_ref; long timeo; int res; @@ -1828,8 +1998,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags) goto exit; new_sk = new_sock->sk; - new_port = &tipc_sk(new_sk)->port; - new_ref = new_port->ref; + new_tsock = tipc_sk(new_sk); msg = buf_msg(buf); /* we lock on new_sk; but lockdep sees the lock on sk */ @@ -1839,18 +2008,16 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags) * Reject any stray messages received by new socket * before the socket lock was taken (very, very unlikely) */ - reject_rx_queue(new_sk); + tsk_rej_rx_queue(new_sk); /* Connect new socket to it's peer */ - peer.ref = msg_origport(msg); - peer.node = msg_orignode(msg); - tipc_port_connect(new_ref, &peer); + tipc_sk_finish_conn(new_tsock, msg_origport(msg), msg_orignode(msg)); new_sock->state = SS_CONNECTED; - tipc_port_set_importance(new_port, msg_importance(msg)); + tsk_set_importance(new_tsock, msg_importance(msg)); if (msg_named(msg)) { - new_port->conn_type = msg_nametype(msg); - new_port->conn_instance = msg_nameinst(msg); + new_tsock->conn_type = msg_nametype(msg); + new_tsock->conn_instance = msg_nameinst(msg); } /* @@ -1860,7 +2027,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags) if (!msg_data_sz(msg)) { struct msghdr m = {NULL,}; - advance_rx_queue(sk); + tsk_advance_rx_queue(sk); tipc_send_packet(NULL, new_sock, &m, 0); } else { __skb_dequeue(&sk->sk_receive_queue); @@ -1886,9 +2053,8 @@ static int tipc_shutdown(struct socket *sock, int how) { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); - struct tipc_port *port = &tsk->port; struct sk_buff *buf; - u32 peer; + u32 dnode; int res; if (how != SHUT_RDWR) @@ -1908,15 +2074,21 @@ restart: kfree_skb(buf); goto restart; } - tipc_port_disconnect(port->ref); - if (tipc_msg_reverse(buf, &peer, TIPC_CONN_SHUTDOWN)) - tipc_link_xmit(buf, peer, 0); + if (tipc_msg_reverse(buf, &dnode, TIPC_CONN_SHUTDOWN)) + tipc_link_xmit(buf, dnode, tsk->ref); + tipc_node_remove_conn(dnode, tsk->ref); } else { - tipc_port_shutdown(port->ref); + dnode = tsk_peer_node(tsk); + buf = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, + TIPC_CONN_MSG, SHORT_H_SIZE, + 0, dnode, tipc_own_addr, + tsk_peer_port(tsk), + tsk->ref, TIPC_CONN_SHUTDOWN); + tipc_link_xmit(buf, dnode, tsk->ref); } - + tsk->connected = 0; sock->state = SS_DISCONNECTING; - + tipc_node_remove_conn(dnode, tsk->ref); /* fall through */ case SS_DISCONNECTING: @@ -1937,6 +2109,432 @@ restart: return res; } +static void tipc_sk_timeout(unsigned long ref) +{ + struct tipc_sock *tsk; + struct sock *sk; + struct sk_buff *buf = NULL; + u32 peer_port, peer_node; + + tsk = tipc_sk_get(ref); + if (!tsk) + return; + + sk = &tsk->sk; + bh_lock_sock(sk); + if (!tsk->connected) { + bh_unlock_sock(sk); + goto exit; + } + peer_port = tsk_peer_port(tsk); + peer_node = tsk_peer_node(tsk); + + if (tsk->probing_state == TIPC_CONN_PROBING) { + /* Previous probe not answered -> self abort */ + buf = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, + SHORT_H_SIZE, 0, tipc_own_addr, + peer_node, ref, peer_port, + TIPC_ERR_NO_PORT); + } else { + buf = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE, + 0, peer_node, tipc_own_addr, + peer_port, ref, TIPC_OK); + tsk->probing_state = TIPC_CONN_PROBING; + k_start_timer(&tsk->timer, tsk->probing_interval); + } + bh_unlock_sock(sk); + if (buf) + tipc_link_xmit(buf, peer_node, ref); +exit: + tipc_sk_put(tsk); +} + +static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, + struct tipc_name_seq const *seq) +{ + struct publication *publ; + u32 key; + + if (tsk->connected) + return -EINVAL; + key = tsk->ref + tsk->pub_count + 1; + if (key == tsk->ref) + return -EADDRINUSE; + + publ = tipc_nametbl_publish(seq->type, seq->lower, seq->upper, + scope, tsk->ref, key); + if (unlikely(!publ)) + return -EINVAL; + + list_add(&publ->pport_list, &tsk->publications); + tsk->pub_count++; + tsk->published = 1; + return 0; +} + +static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, + struct tipc_name_seq const *seq) +{ + struct publication *publ; + struct publication *safe; + int rc = -EINVAL; + + list_for_each_entry_safe(publ, safe, &tsk->publications, pport_list) { + if (seq) { + if (publ->scope != scope) + continue; + if (publ->type != seq->type) + continue; + if (publ->lower != seq->lower) + continue; + if (publ->upper != seq->upper) + break; + tipc_nametbl_withdraw(publ->type, publ->lower, + publ->ref, publ->key); + rc = 0; + break; + } + tipc_nametbl_withdraw(publ->type, publ->lower, + publ->ref, publ->key); + rc = 0; + } + if (list_empty(&tsk->publications)) + tsk->published = 0; + return rc; +} + +static int tipc_sk_show(struct tipc_sock *tsk, char *buf, + int len, int full_id) +{ + struct publication *publ; + int ret; + + if (full_id) + ret = tipc_snprintf(buf, len, "<%u.%u.%u:%u>:", + tipc_zone(tipc_own_addr), + tipc_cluster(tipc_own_addr), + tipc_node(tipc_own_addr), tsk->ref); + else + ret = tipc_snprintf(buf, len, "%-10u:", tsk->ref); + + if (tsk->connected) { + u32 dport = tsk_peer_port(tsk); + u32 destnode = tsk_peer_node(tsk); + + ret += tipc_snprintf(buf + ret, len - ret, + " connected to <%u.%u.%u:%u>", + tipc_zone(destnode), + tipc_cluster(destnode), + tipc_node(destnode), dport); + if (tsk->conn_type != 0) + ret += tipc_snprintf(buf + ret, len - ret, + " via {%u,%u}", tsk->conn_type, + tsk->conn_instance); + } else if (tsk->published) { + ret += tipc_snprintf(buf + ret, len - ret, " bound to"); + list_for_each_entry(publ, &tsk->publications, pport_list) { + if (publ->lower == publ->upper) + ret += tipc_snprintf(buf + ret, len - ret, + " {%u,%u}", publ->type, + publ->lower); + else + ret += tipc_snprintf(buf + ret, len - ret, + " {%u,%u,%u}", publ->type, + publ->lower, publ->upper); + } + } + ret += tipc_snprintf(buf + ret, len - ret, "\n"); + return ret; +} + +struct sk_buff *tipc_sk_socks_show(void) +{ + struct sk_buff *buf; + struct tlv_desc *rep_tlv; + char *pb; + int pb_len; + struct tipc_sock *tsk; + int str_len = 0; + u32 ref = 0; + + buf = tipc_cfg_reply_alloc(TLV_SPACE(ULTRA_STRING_MAX_LEN)); + if (!buf) + return NULL; + rep_tlv = (struct tlv_desc *)buf->data; + pb = TLV_DATA(rep_tlv); + pb_len = ULTRA_STRING_MAX_LEN; + + tsk = tipc_sk_get_next(&ref); + for (; tsk; tsk = tipc_sk_get_next(&ref)) { + lock_sock(&tsk->sk); + str_len += tipc_sk_show(tsk, pb + str_len, + pb_len - str_len, 0); + release_sock(&tsk->sk); + tipc_sk_put(tsk); + } + str_len += 1; /* for "\0" */ + skb_put(buf, TLV_SPACE(str_len)); + TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len); + + return buf; +} + +/* tipc_sk_reinit: set non-zero address in all existing sockets + * when we go from standalone to network mode. + */ +void tipc_sk_reinit(void) +{ + struct tipc_msg *msg; + u32 ref = 0; + struct tipc_sock *tsk = tipc_sk_get_next(&ref); + + for (; tsk; tsk = tipc_sk_get_next(&ref)) { + lock_sock(&tsk->sk); + msg = &tsk->phdr; + msg_set_prevnode(msg, tipc_own_addr); + msg_set_orignode(msg, tipc_own_addr); + release_sock(&tsk->sk); + tipc_sk_put(tsk); + } +} + +/** + * struct reference - TIPC socket reference entry + * @tsk: pointer to socket associated with reference entry + * @ref: reference value for socket (combines instance & array index info) + */ +struct reference { + struct tipc_sock *tsk; + u32 ref; +}; + +/** + * struct tipc_ref_table - table of TIPC socket reference entries + * @entries: pointer to array of reference entries + * @capacity: array index of first unusable entry + * @init_point: array index of first uninitialized entry + * @first_free: array index of first unused socket reference entry + * @last_free: array index of last unused socket reference entry + * @index_mask: bitmask for array index portion of reference values + * @start_mask: initial value for instance value portion of reference values + */ +struct ref_table { + struct reference *entries; + u32 capacity; + u32 init_point; + u32 first_free; + u32 last_free; + u32 index_mask; + u32 start_mask; +}; + +/* Socket reference table consists of 2**N entries. + * + * State Socket ptr Reference + * ----- ---------- --------- + * In use non-NULL XXXX|own index + * (XXXX changes each time entry is acquired) + * Free NULL YYYY|next free index + * (YYYY is one more than last used XXXX) + * Uninitialized NULL 0 + * + * Entry 0 is not used; this allows index 0 to denote the end of the free list. + * + * Note that a reference value of 0 does not necessarily indicate that an + * entry is uninitialized, since the last entry in the free list could also + * have a reference value of 0 (although this is unlikely). + */ + +static struct ref_table tipc_ref_table; + +static DEFINE_RWLOCK(ref_table_lock); + +/** + * tipc_ref_table_init - create reference table for sockets + */ +int tipc_sk_ref_table_init(u32 req_sz, u32 start) +{ + struct reference *table; + u32 actual_sz; + + /* account for unused entry, then round up size to a power of 2 */ + + req_sz++; + for (actual_sz = 16; actual_sz < req_sz; actual_sz <<= 1) { + /* do nothing */ + }; + + /* allocate table & mark all entries as uninitialized */ + table = vzalloc(actual_sz * sizeof(struct reference)); + if (table == NULL) + return -ENOMEM; + + tipc_ref_table.entries = table; + tipc_ref_table.capacity = req_sz; + tipc_ref_table.init_point = 1; + tipc_ref_table.first_free = 0; + tipc_ref_table.last_free = 0; + tipc_ref_table.index_mask = actual_sz - 1; + tipc_ref_table.start_mask = start & ~tipc_ref_table.index_mask; + + return 0; +} + +/** + * tipc_ref_table_stop - destroy reference table for sockets + */ +void tipc_sk_ref_table_stop(void) +{ + if (!tipc_ref_table.entries) + return; + vfree(tipc_ref_table.entries); + tipc_ref_table.entries = NULL; +} + +/* tipc_ref_acquire - create reference to a socket + * + * Register an socket pointer in the reference table. + * Returns a unique reference value that is used from then on to retrieve the + * socket pointer, or to determine if the socket has been deregistered. + */ +u32 tipc_sk_ref_acquire(struct tipc_sock *tsk) +{ + u32 index; + u32 index_mask; + u32 next_plus_upper; + u32 ref = 0; + struct reference *entry; + + if (unlikely(!tsk)) { + pr_err("Attempt to acquire ref. to non-existent obj\n"); + return 0; + } + if (unlikely(!tipc_ref_table.entries)) { + pr_err("Ref. table not found in acquisition attempt\n"); + return 0; + } + + /* Take a free entry, if available; otherwise initialize a new one */ + write_lock_bh(&ref_table_lock); + index = tipc_ref_table.first_free; + entry = &tipc_ref_table.entries[index]; + + if (likely(index)) { + index = tipc_ref_table.first_free; + entry = &tipc_ref_table.entries[index]; + index_mask = tipc_ref_table.index_mask; + next_plus_upper = entry->ref; + tipc_ref_table.first_free = next_plus_upper & index_mask; + ref = (next_plus_upper & ~index_mask) + index; + entry->tsk = tsk; + } else if (tipc_ref_table.init_point < tipc_ref_table.capacity) { + index = tipc_ref_table.init_point++; + entry = &tipc_ref_table.entries[index]; + ref = tipc_ref_table.start_mask + index; + } + + if (ref) { + entry->ref = ref; + entry->tsk = tsk; + } + write_unlock_bh(&ref_table_lock); + return ref; +} + +/* tipc_sk_ref_discard - invalidate reference to an socket + * + * Disallow future references to an socket and free up the entry for re-use. + */ +void tipc_sk_ref_discard(u32 ref) +{ + struct reference *entry; + u32 index; + u32 index_mask; + + if (unlikely(!tipc_ref_table.entries)) { + pr_err("Ref. table not found during discard attempt\n"); + return; + } + + index_mask = tipc_ref_table.index_mask; + index = ref & index_mask; + entry = &tipc_ref_table.entries[index]; + + write_lock_bh(&ref_table_lock); + + if (unlikely(!entry->tsk)) { + pr_err("Attempt to discard ref. to non-existent socket\n"); + goto exit; + } + if (unlikely(entry->ref != ref)) { + pr_err("Attempt to discard non-existent reference\n"); + goto exit; + } + + /* Mark entry as unused; increment instance part of entry's + * reference to invalidate any subsequent references + */ + + entry->tsk = NULL; + entry->ref = (ref & ~index_mask) + (index_mask + 1); + + /* Append entry to free entry list */ + if (unlikely(tipc_ref_table.first_free == 0)) + tipc_ref_table.first_free = index; + else + tipc_ref_table.entries[tipc_ref_table.last_free].ref |= index; + tipc_ref_table.last_free = index; +exit: + write_unlock_bh(&ref_table_lock); +} + +/* tipc_sk_get - find referenced socket and return pointer to it + */ +struct tipc_sock *tipc_sk_get(u32 ref) +{ + struct reference *entry; + struct tipc_sock *tsk; + + if (unlikely(!tipc_ref_table.entries)) + return NULL; + read_lock_bh(&ref_table_lock); + entry = &tipc_ref_table.entries[ref & tipc_ref_table.index_mask]; + tsk = entry->tsk; + if (likely(tsk && (entry->ref == ref))) + sock_hold(&tsk->sk); + else + tsk = NULL; + read_unlock_bh(&ref_table_lock); + return tsk; +} + +/* tipc_sk_get_next - lock & return next socket after referenced one +*/ +struct tipc_sock *tipc_sk_get_next(u32 *ref) +{ + struct reference *entry; + struct tipc_sock *tsk = NULL; + uint index = *ref & tipc_ref_table.index_mask; + + read_lock_bh(&ref_table_lock); + while (++index < tipc_ref_table.capacity) { + entry = &tipc_ref_table.entries[index]; + if (!entry->tsk) + continue; + tsk = entry->tsk; + sock_hold(&tsk->sk); + *ref = entry->ref; + break; + } + read_unlock_bh(&ref_table_lock); + return tsk; +} + +static void tipc_sk_put(struct tipc_sock *tsk) +{ + sock_put(&tsk->sk); +} + /** * tipc_setsockopt - set socket option * @sock: socket structure @@ -1955,7 +2553,6 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt, { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); - struct tipc_port *port = &tsk->port; u32 value; int res; @@ -1973,16 +2570,16 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt, switch (opt) { case TIPC_IMPORTANCE: - res = tipc_port_set_importance(port, value); + res = tsk_set_importance(tsk, value); break; case TIPC_SRC_DROPPABLE: if (sock->type != SOCK_STREAM) - tipc_port_set_unreliable(port, value); + tsk_set_unreliable(tsk, value); else res = -ENOPROTOOPT; break; case TIPC_DEST_DROPPABLE: - tipc_port_set_unreturnable(port, value); + tsk_set_unreturnable(tsk, value); break; case TIPC_CONN_TIMEOUT: tipc_sk(sk)->conn_timeout = value; @@ -2015,7 +2612,6 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt, { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); - struct tipc_port *port = &tsk->port; int len; u32 value; int res; @@ -2032,16 +2628,16 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt, switch (opt) { case TIPC_IMPORTANCE: - value = tipc_port_importance(port); + value = tsk_importance(tsk); break; case TIPC_SRC_DROPPABLE: - value = tipc_port_unreliable(port); + value = tsk_unreliable(tsk); break; case TIPC_DEST_DROPPABLE: - value = tipc_port_unreturnable(port); + value = tsk_unreturnable(tsk); break; case TIPC_CONN_TIMEOUT: - value = tipc_sk(sk)->conn_timeout; + value = tsk->conn_timeout; /* no need to set "res", since already 0 at this point */ break; case TIPC_NODE_RECVQ_DEPTH: |