diff options
Diffstat (limited to 'net')
155 files changed, 1915 insertions, 1118 deletions
diff --git a/net/802/garp.c b/net/802/garp.c index 2dac647ff420..7f50d47470bd 100644 --- a/net/802/garp.c +++ b/net/802/garp.c @@ -401,9 +401,9 @@ static void garp_join_timer_arm(struct garp_applicant *app) mod_timer(&app->join_timer, jiffies + delay); } -static void garp_join_timer(unsigned long data) +static void garp_join_timer(struct timer_list *t) { - struct garp_applicant *app = (struct garp_applicant *)data; + struct garp_applicant *app = from_timer(app, t, join_timer); spin_lock(&app->lock); garp_gid_event(app, GARP_EVENT_TRANSMIT_PDU); @@ -584,7 +584,7 @@ int garp_init_applicant(struct net_device *dev, struct garp_application *appl) spin_lock_init(&app->lock); skb_queue_head_init(&app->queue); rcu_assign_pointer(dev->garp_port->applicants[appl->type], app); - setup_timer(&app->join_timer, garp_join_timer, (unsigned long)app); + timer_setup(&app->join_timer, garp_join_timer, 0); garp_join_timer_arm(app); return 0; diff --git a/net/802/mrp.c b/net/802/mrp.c index be4dd3165347..a808dd5bbb27 100644 --- a/net/802/mrp.c +++ b/net/802/mrp.c @@ -586,9 +586,9 @@ static void mrp_join_timer_arm(struct mrp_applicant *app) mod_timer(&app->join_timer, jiffies + delay); } -static void mrp_join_timer(unsigned long data) +static void mrp_join_timer(struct timer_list *t) { - struct mrp_applicant *app = (struct mrp_applicant *)data; + struct mrp_applicant *app = from_timer(app, t, join_timer); spin_lock(&app->lock); mrp_mad_event(app, MRP_EVENT_TX); @@ -605,9 +605,9 @@ static void mrp_periodic_timer_arm(struct mrp_applicant *app) jiffies + msecs_to_jiffies(mrp_periodic_time)); } -static void mrp_periodic_timer(unsigned long data) +static void mrp_periodic_timer(struct timer_list *t) { - struct mrp_applicant *app = (struct mrp_applicant *)data; + struct mrp_applicant *app = from_timer(app, t, periodic_timer); spin_lock(&app->lock); mrp_mad_event(app, MRP_EVENT_PERIODIC); @@ -865,10 +865,9 @@ int mrp_init_applicant(struct net_device *dev, struct mrp_application *appl) spin_lock_init(&app->lock); skb_queue_head_init(&app->queue); rcu_assign_pointer(dev->mrp_port->applicants[appl->type], app); - setup_timer(&app->join_timer, mrp_join_timer, (unsigned long)app); + timer_setup(&app->join_timer, mrp_join_timer, 0); mrp_join_timer_arm(app); - setup_timer(&app->periodic_timer, mrp_periodic_timer, - (unsigned long)app); + timer_setup(&app->periodic_timer, mrp_periodic_timer, 0); mrp_periodic_timer_arm(app); return 0; diff --git a/net/9p/client.c b/net/9p/client.c index 4674235b0d9b..b433aff5ff13 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -82,7 +82,7 @@ int p9_show_client_options(struct seq_file *m, struct p9_client *clnt) { if (clnt->msize != 8192) seq_printf(m, ",msize=%u", clnt->msize); - seq_printf(m, "trans=%s", clnt->trans_mod->name); + seq_printf(m, ",trans=%s", clnt->trans_mod->name); switch (clnt->proto_version) { case p9_proto_legacy: @@ -773,8 +773,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) } again: /* Wait for the response */ - err = wait_event_interruptible(*req->wq, - req->status >= REQ_STATUS_RCVD); + err = wait_event_killable(*req->wq, req->status >= REQ_STATUS_RCVD); /* * Make sure our req is coherent with regard to updates in other diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 903a190319b9..80f5c79053a4 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -724,12 +724,12 @@ static int p9_fd_show_options(struct seq_file *m, struct p9_client *clnt) { if (clnt->trans_mod == &p9_tcp_trans) { if (clnt->trans_opts.tcp.port != P9_PORT) - seq_printf(m, "port=%u", clnt->trans_opts.tcp.port); + seq_printf(m, ",port=%u", clnt->trans_opts.tcp.port); } else if (clnt->trans_mod == &p9_fd_trans) { if (clnt->trans_opts.fd.rfd != ~0) - seq_printf(m, "rfd=%u", clnt->trans_opts.fd.rfd); + seq_printf(m, ",rfd=%u", clnt->trans_opts.fd.rfd); if (clnt->trans_opts.fd.wfd != ~0) - seq_printf(m, "wfd=%u", clnt->trans_opts.fd.wfd); + seq_printf(m, ",wfd=%u", clnt->trans_opts.fd.wfd); } return 0; } @@ -839,7 +839,6 @@ static int p9_socket_open(struct p9_client *client, struct socket *csocket) if (IS_ERR(file)) { pr_err("%s (%d): failed to map fd\n", __func__, task_pid_nr(current)); - sock_release(csocket); kfree(p); return PTR_ERR(file); } diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index f24b25c25106..f3a4efcf1456 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -286,8 +286,8 @@ req_retry: if (err == -ENOSPC) { chan->ring_bufs_avail = 0; spin_unlock_irqrestore(&chan->lock, flags); - err = wait_event_interruptible(*chan->vc_wq, - chan->ring_bufs_avail); + err = wait_event_killable(*chan->vc_wq, + chan->ring_bufs_avail); if (err == -ERESTARTSYS) return err; @@ -327,7 +327,7 @@ static int p9_get_mapped_pages(struct virtio_chan *chan, * Other zc request to finish here */ if (atomic_read(&vp_pinned) >= chan->p9_max_pages) { - err = wait_event_interruptible(vp_wq, + err = wait_event_killable(vp_wq, (atomic_read(&vp_pinned) < chan->p9_max_pages)); if (err == -ERESTARTSYS) return err; @@ -471,8 +471,8 @@ req_retry_pinned: if (err == -ENOSPC) { chan->ring_bufs_avail = 0; spin_unlock_irqrestore(&chan->lock, flags); - err = wait_event_interruptible(*chan->vc_wq, - chan->ring_bufs_avail); + err = wait_event_killable(*chan->vc_wq, + chan->ring_bufs_avail); if (err == -ERESTARTSYS) goto err_out; @@ -489,8 +489,7 @@ req_retry_pinned: virtqueue_kick(chan->vq); spin_unlock_irqrestore(&chan->lock, flags); p9_debug(P9_DEBUG_TRANS, "virtio request kicked\n"); - err = wait_event_interruptible(*req->wq, - req->status >= REQ_STATUS_RCVD); + err = wait_event_killable(*req->wq, req->status >= REQ_STATUS_RCVD); /* * Non kernel buffers are pinned, unpin them */ diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c index 6ad3e043c617..325c56043007 100644 --- a/net/9p/trans_xen.c +++ b/net/9p/trans_xen.c @@ -156,8 +156,8 @@ static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req) ring = &priv->rings[num]; again: - while (wait_event_interruptible(ring->wq, - p9_xen_write_todo(ring, size)) != 0) + while (wait_event_killable(ring->wq, + p9_xen_write_todo(ring, size)) != 0) ; spin_lock_irqsave(&ring->lock, flags); diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index 8ad3ec2610b6..309d7dbb36e8 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -310,7 +310,7 @@ static void __aarp_expire_device(struct aarp_entry **n, struct net_device *dev) } /* Handle the timer event */ -static void aarp_expire_timeout(unsigned long unused) +static void aarp_expire_timeout(struct timer_list *unused) { int ct; @@ -884,7 +884,7 @@ void __init aarp_proto_init(void) aarp_dl = register_snap_client(aarp_snap_id, aarp_rcv); if (!aarp_dl) printk(KERN_CRIT "Unable to register AARP with SNAP.\n"); - setup_timer(&aarp_timer, aarp_expire_timeout, 0); + timer_setup(&aarp_timer, aarp_expire_timeout, 0); aarp_timer.expires = jiffies + sysctl_aarp_expiry_time; add_timer(&aarp_timer); register_netdevice_notifier(&aarp_notifier); diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 5d035c1f1156..03a9fc0771c0 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -158,9 +158,9 @@ found: return s; } -static void atalk_destroy_timer(unsigned long data) +static void atalk_destroy_timer(struct timer_list *t) { - struct sock *sk = (struct sock *)data; + struct sock *sk = from_timer(sk, t, sk_timer); if (sk_has_allocations(sk)) { sk->sk_timer.expires = jiffies + SOCK_DESTROY_TIME; @@ -175,8 +175,7 @@ static inline void atalk_destroy_socket(struct sock *sk) skb_queue_purge(&sk->sk_receive_queue); if (sk_has_allocations(sk)) { - setup_timer(&sk->sk_timer, atalk_destroy_timer, - (unsigned long)sk); + timer_setup(&sk->sk_timer, atalk_destroy_timer, 0); sk->sk_timer.expires = jiffies + SOCK_DESTROY_TIME; add_timer(&sk->sk_timer); } else diff --git a/net/atm/lec.c b/net/atm/lec.c index c976196da3ea..6676e3433261 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -1798,7 +1798,7 @@ static struct atm_vcc *lec_arp_resolve(struct lec_priv *priv, else send_to_lecd(priv, l_arp_xmt, mac_to_find, NULL, NULL); entry->timer.expires = jiffies + (1 * HZ); - entry->timer.function = (TIMER_FUNC_TYPE)lec_arp_expire_arp; + entry->timer.function = lec_arp_expire_arp; add_timer(&entry->timer); found = priv->mcast_vcc; } @@ -1998,7 +1998,7 @@ lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data, entry->old_recv_push = old_push; entry->status = ESI_UNKNOWN; entry->timer.expires = jiffies + priv->vcc_timeout_period; - entry->timer.function = (TIMER_FUNC_TYPE)lec_arp_expire_vcc; + entry->timer.function = lec_arp_expire_vcc; hlist_add_head(&entry->next, &priv->lec_no_forward); add_timer(&entry->timer); dump_arp_table(priv); @@ -2082,7 +2082,7 @@ lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data, entry->status = ESI_UNKNOWN; hlist_add_head(&entry->next, &priv->lec_arp_empty_ones); entry->timer.expires = jiffies + priv->vcc_timeout_period; - entry->timer.function = (TIMER_FUNC_TYPE)lec_arp_expire_vcc; + entry->timer.function = lec_arp_expire_vcc; add_timer(&entry->timer); pr_debug("After vcc was added\n"); dump_arp_table(priv); diff --git a/net/atm/mpc.c b/net/atm/mpc.c index e882d8b5db05..7c6a1cc760a2 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -121,7 +121,7 @@ static struct notifier_block mpoa_notifier = { struct mpoa_client *mpcs = NULL; /* FIXME */ static struct atm_mpoa_qos *qos_head = NULL; -static DEFINE_TIMER(mpc_timer, NULL); +static DEFINE_TIMER(mpc_timer, mpc_cache_check); static struct mpoa_client *find_mpc_by_itfnum(int itf) @@ -1413,7 +1413,6 @@ static void mpc_timer_refresh(void) { mpc_timer.expires = jiffies + (MPC_P2 * HZ); checking_time = mpc_timer.expires; - mpc_timer.function = (TIMER_FUNC_TYPE)mpc_cache_check; add_timer(&mpc_timer); } diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index 4b90033f35a8..15cd2139381e 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -488,9 +488,9 @@ static void batadv_tp_reset_sender_timer(struct batadv_tp_vars *tp_vars) * Switch to Slow Start, set the ss_threshold to half of the current cwnd and * reset the cwnd to 3*MSS */ -static void batadv_tp_sender_timeout(unsigned long arg) +static void batadv_tp_sender_timeout(struct timer_list *t) { - struct batadv_tp_vars *tp_vars = (struct batadv_tp_vars *)arg; + struct batadv_tp_vars *tp_vars = from_timer(tp_vars, t, timer); struct batadv_priv *bat_priv = tp_vars->bat_priv; if (atomic_read(&tp_vars->sending) == 0) @@ -1020,8 +1020,7 @@ void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst, atomic64_set(&tp_vars->tot_sent, 0); kref_get(&tp_vars->refcount); - setup_timer(&tp_vars->timer, batadv_tp_sender_timeout, - (unsigned long)tp_vars); + timer_setup(&tp_vars->timer, batadv_tp_sender_timeout, 0); tp_vars->bat_priv = bat_priv; tp_vars->start_time = jiffies; @@ -1109,9 +1108,9 @@ static void batadv_tp_reset_receiver_timer(struct batadv_tp_vars *tp_vars) * reached without received ack * @arg: address of the related tp_vars */ -static void batadv_tp_receiver_shutdown(unsigned long arg) +static void batadv_tp_receiver_shutdown(struct timer_list *t) { - struct batadv_tp_vars *tp_vars = (struct batadv_tp_vars *)arg; + struct batadv_tp_vars *tp_vars = from_timer(tp_vars, t, timer); struct batadv_tp_unacked *un, *safe; struct batadv_priv *bat_priv; @@ -1373,8 +1372,7 @@ batadv_tp_init_recv(struct batadv_priv *bat_priv, hlist_add_head_rcu(&tp_vars->list, &bat_priv->tp_list); kref_get(&tp_vars->refcount); - setup_timer(&tp_vars->timer, batadv_tp_receiver_shutdown, - (unsigned long)tp_vars); + timer_setup(&tp_vars->timer, batadv_tp_receiver_shutdown, 0); batadv_tp_reset_receiver_timer(tp_vars); diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 8112893037bd..f2cec70d520c 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -398,9 +398,9 @@ static int hidp_raw_request(struct hid_device *hid, unsigned char reportnum, } } -static void hidp_idle_timeout(unsigned long arg) +static void hidp_idle_timeout(struct timer_list *t) { - struct hidp_session *session = (struct hidp_session *) arg; + struct hidp_session *session = from_timer(session, t, timer); /* The HIDP user-space API only contains calls to add and remove * devices. There is no way to forward events of any kind. Therefore, @@ -944,8 +944,7 @@ static int hidp_session_new(struct hidp_session **out, const bdaddr_t *bdaddr, /* device management */ INIT_WORK(&session->dev_init, hidp_session_dev_work); - setup_timer(&session->timer, hidp_idle_timeout, - (unsigned long)session); + timer_setup(&session->timer, hidp_idle_timeout, 0); /* session data */ mutex_init(&session->report_mutex); diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 4a0b41d75c84..b98225d65e87 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -233,9 +233,9 @@ static int rfcomm_check_security(struct rfcomm_dlc *d) d->out); } -static void rfcomm_session_timeout(unsigned long arg) +static void rfcomm_session_timeout(struct timer_list *t) { - struct rfcomm_session *s = (void *) arg; + struct rfcomm_session *s = from_timer(s, t, timer); BT_DBG("session %p state %ld", s, s->state); @@ -258,9 +258,9 @@ static void rfcomm_session_clear_timer(struct rfcomm_session *s) } /* ---- RFCOMM DLCs ---- */ -static void rfcomm_dlc_timeout(unsigned long arg) +static void rfcomm_dlc_timeout(struct timer_list *t) { - struct rfcomm_dlc *d = (void *) arg; + struct rfcomm_dlc *d = from_timer(d, t, timer); BT_DBG("dlc %p state %ld", d, d->state); @@ -307,7 +307,7 @@ struct rfcomm_dlc *rfcomm_dlc_alloc(gfp_t prio) if (!d) return NULL; - setup_timer(&d->timer, rfcomm_dlc_timeout, (unsigned long)d); + timer_setup(&d->timer, rfcomm_dlc_timeout, 0); skb_queue_head_init(&d->tx_queue); mutex_init(&d->lock); @@ -650,7 +650,7 @@ static struct rfcomm_session *rfcomm_session_add(struct socket *sock, int state) BT_DBG("session %p sock %p", s, sock); - setup_timer(&s->timer, rfcomm_session_timeout, (unsigned long) s); + timer_setup(&s->timer, rfcomm_session_timeout, 0); INIT_LIST_HEAD(&s->dlcs); s->state = state; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 795e920a3281..08df57665e1f 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -73,9 +73,9 @@ struct sco_pinfo { #define SCO_CONN_TIMEOUT (HZ * 40) #define SCO_DISCONN_TIMEOUT (HZ * 2) -static void sco_sock_timeout(unsigned long arg) +static void sco_sock_timeout(struct timer_list *t) { - struct sock *sk = (struct sock *)arg; + struct sock *sk = from_timer(sk, t, sk_timer); BT_DBG("sock %p state %d", sk, sk->sk_state); @@ -487,7 +487,7 @@ static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, sco_pi(sk)->setting = BT_VOICE_CVSD_16BIT; - setup_timer(&sk->sk_timer, sco_sock_timeout, (unsigned long)sk); + timer_setup(&sk->sk_timer, sco_sock_timeout, 0); bt_sock_link(&sco_sk_list, sk); return sk; diff --git a/net/can/proc.c b/net/can/proc.c index d979b3dc49a6..0c59f876fe6f 100644 --- a/net/can/proc.c +++ b/net/can/proc.c @@ -221,7 +221,7 @@ static int can_stats_proc_show(struct seq_file *m, void *v) seq_putc(m, '\n'); - if (net->can.can_stattimer.function == (TIMER_FUNC_TYPE)can_stat_update) { + if (net->can.can_stattimer.function == can_stat_update) { seq_printf(m, " %8ld %% total match ratio (RXMR)\n", can_stats->total_rx_match_ratio); @@ -291,7 +291,7 @@ static int can_reset_stats_proc_show(struct seq_file *m, void *v) user_reset = 1; - if (net->can.can_stattimer.function == (TIMER_FUNC_TYPE)can_stat_update) { + if (net->can.can_stattimer.function == can_stat_update) { seq_printf(m, "Scheduled statistic reset #%ld.\n", can_pstats->stats_reset + 1); } else { diff --git a/net/ceph/ceph_hash.c b/net/ceph/ceph_hash.c index 67bb1f11e613..9a5850f264ed 100644 --- a/net/ceph/ceph_hash.c +++ b/net/ceph/ceph_hash.c @@ -47,28 +47,38 @@ unsigned int ceph_str_hash_rjenkins(const char *str, unsigned int length) /* handle the last 11 bytes */ c = c + length; - switch (len) { /* all the case statements fall through */ + switch (len) { case 11: c = c + ((__u32)k[10] << 24); + /* fall through */ case 10: c = c + ((__u32)k[9] << 16); + /* fall through */ case 9: c = c + ((__u32)k[8] << 8); /* the first byte of c is reserved for the length */ + /* fall through */ case 8: b = b + ((__u32)k[7] << 24); + /* fall through */ case 7: b = b + ((__u32)k[6] << 16); + /* fall through */ case 6: b = b + ((__u32)k[5] << 8); + /* fall through */ case 5: b = b + k[4]; + /* fall through */ case 4: a = a + ((__u32)k[3] << 24); + /* fall through */ case 3: a = a + ((__u32)k[2] << 16); + /* fall through */ case 2: a = a + ((__u32)k[1] << 8); + /* fall through */ case 1: a = a + k[0]; /* case 0: nothing left to add */ diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index 489610ac1cdd..bf9d079cbafd 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c @@ -37,7 +37,9 @@ static int set_secret(struct ceph_crypto_key *key, void *buf) return -ENOTSUPP; } - WARN_ON(!key->len); + if (!key->len) + return -EINVAL; + key->key = kmemdup(buf, key->len, GFP_NOIO); if (!key->key) { ret = -ENOMEM; diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index ad93342c90d7..8a4d3758030b 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -430,6 +430,7 @@ static void ceph_sock_state_change(struct sock *sk) switch (sk->sk_state) { case TCP_CLOSE: dout("%s TCP_CLOSE\n", __func__); + /* fall through */ case TCP_CLOSE_WAIT: dout("%s TCP_CLOSE_WAIT\n", __func__); con_sock_state_closing(con); diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 9ae1bab8c05d..1547107f4854 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -1279,9 +1279,10 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con, /* * Older OSDs don't set reply tid even if the orignal - * request had a non-zero tid. Workaround this weirdness - * by falling through to the allocate case. + * request had a non-zero tid. Work around this weirdness + * by allocating a new message. */ + /* fall through */ case CEPH_MSG_MON_MAP: case CEPH_MSG_MDS_MAP: case CEPH_MSG_OSD_MAP: diff --git a/net/core/dev.c b/net/core/dev.c index 8ee29f4f5fa9..f47e96b62308 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1106,7 +1106,7 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf) * when the name is long and there isn't enough space left * for the digits, or if all bits are used. */ - return p ? -ENFILE : -EEXIST; + return -ENFILE; } static int dev_alloc_name_ns(struct net *net, @@ -2746,7 +2746,8 @@ EXPORT_SYMBOL(skb_mac_gso_segment); static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path) { if (tx_path) - return skb->ip_summed != CHECKSUM_PARTIAL; + return skb->ip_summed != CHECKSUM_PARTIAL && + skb->ip_summed != CHECKSUM_UNNECESSARY; return skb->ip_summed == CHECKSUM_NONE; } @@ -7139,13 +7140,17 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, __dev_xdp_attached(dev, bpf_op, NULL)) return -EBUSY; - if (bpf_op == ops->ndo_bpf) - prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP, - dev); - else - prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP); + prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP, + bpf_op == ops->ndo_bpf); if (IS_ERR(prog)) return PTR_ERR(prog); + + if (!(flags & XDP_FLAGS_HW_MODE) && + bpf_prog_is_dev_bound(prog->aux)) { + NL_SET_ERR_MSG(extack, "using device-bound program without HW_MODE flag is not supported"); + bpf_prog_put(prog); + return -EINVAL; + } } err = dev_xdp_install(dev, bpf_op, extack, flags, prog); diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 70ccda233bd1..c7785efeea57 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -144,9 +144,9 @@ static void send_dm_alert(struct work_struct *work) * in the event that more drops will arrive during the * hysteresis period. */ -static void sched_send_work(unsigned long _data) +static void sched_send_work(struct timer_list *t) { - struct per_cpu_dm_data *data = (struct per_cpu_dm_data *)_data; + struct per_cpu_dm_data *data = from_timer(data, t, send_timer); schedule_work(&data->dm_alert_work); } @@ -412,8 +412,7 @@ static int __init init_net_drop_monitor(void) for_each_possible_cpu(cpu) { data = &per_cpu(dm_cpu_data, cpu); INIT_WORK(&data->dm_alert_work, send_dm_alert); - setup_timer(&data->send_timer, sched_send_work, - (unsigned long)data); + timer_setup(&data->send_timer, sched_send_work, 0); spin_lock_init(&data->lock); reset_per_cpu_data(data); } diff --git a/net/core/filter.c b/net/core/filter.c index 1afa17935954..6a85e67fafce 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1646,9 +1646,9 @@ static const struct bpf_func_proto bpf_csum_diff_proto = { .gpl_only = false, .pkt_access = true, .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_MEM, + .arg1_type = ARG_PTR_TO_MEM_OR_NULL, .arg2_type = ARG_CONST_SIZE_OR_ZERO, - .arg3_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_PTR_TO_MEM_OR_NULL, .arg4_type = ARG_CONST_SIZE_OR_ZERO, .arg5_type = ARG_ANYTHING, }; diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 7c1ffd6f9501..9834cfa21b21 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -76,9 +76,9 @@ static void est_fetch_counters(struct net_rate_estimator *e, } -static void est_timer(unsigned long arg) +static void est_timer(struct timer_list *t) { - struct net_rate_estimator *est = (struct net_rate_estimator *)arg; + struct net_rate_estimator *est = from_timer(est, t, timer); struct gnet_stats_basic_packed b; u64 rate, brate; @@ -170,7 +170,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, } est->next_jiffies = jiffies + ((HZ/4) << intvl_log); - setup_timer(&est->timer, est_timer, (unsigned long)est); + timer_setup(&est->timer, est_timer, 0); mod_timer(&est->timer, est->next_jiffies); rcu_assign_pointer(*rate_est, est); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 6ea3a1a7f36a..d1f5fe986edd 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -51,7 +51,7 @@ do { \ #define PNEIGH_HASHMASK 0xF -static void neigh_timer_handler(unsigned long arg); +static void neigh_timer_handler(struct timer_list *t); static void __neigh_notify(struct neighbour *n, int type, int flags, u32 pid); static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid); @@ -331,7 +331,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device n->output = neigh_blackhole; seqlock_init(&n->hh.hh_lock); n->parms = neigh_parms_clone(&tbl->parms); - setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n); + timer_setup(&n->timer, neigh_timer_handler, 0); NEIGH_CACHE_STAT_INC(tbl, allocs); n->tbl = tbl; @@ -903,10 +903,10 @@ static void neigh_probe(struct neighbour *neigh) /* Called when a timer expires for a neighbour entry. */ -static void neigh_timer_handler(unsigned long arg) +static void neigh_timer_handler(struct timer_list *t) { unsigned long now, next; - struct neighbour *neigh = (struct neighbour *)arg; + struct neighbour *neigh = from_timer(neigh, t, timer); unsigned int state; int notify = 0; @@ -1391,9 +1391,9 @@ int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb) } EXPORT_SYMBOL(neigh_direct_output); -static void neigh_proxy_process(unsigned long arg) +static void neigh_proxy_process(struct timer_list *t) { - struct neigh_table *tbl = (struct neigh_table *)arg; + struct neigh_table *tbl = from_timer(tbl, t, proxy_timer); long sched_next = 0; unsigned long now = jiffies; struct sk_buff *skb, *n; @@ -1573,7 +1573,7 @@ void neigh_table_init(int index, struct neigh_table *tbl) INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work); queue_delayed_work(system_power_efficient_wq, &tbl->gc_work, tbl->parms.reachable_time); - setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl); + timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0); skb_queue_head_init_class(&tbl->proxy_queue, &neigh_table_proxy_queue_class); diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 1c4810919a0a..b9057478d69c 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -14,7 +14,6 @@ #include <linux/module.h> #include <linux/slab.h> #include <linux/types.h> -#include <linux/module.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/skbuff.h> diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index abd07a443219..178bb9833311 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -57,10 +57,16 @@ void dccp_time_wait(struct sock *sk, int state, int timeo) if (state == DCCP_TIME_WAIT) timeo = DCCP_TIMEWAIT_LEN; + /* tw_timer is pinned, so we need to make sure BH are disabled + * in following section, otherwise timer handler could run before + * we complete the initialization. + */ + local_bh_disable(); inet_twsk_schedule(tw, timeo); /* Linkage updates. */ __inet_twsk_hashdance(tw, sk, &dccp_hashinfo); inet_twsk_put(tw); + local_bh_enable(); } else { /* Sorry, if we're out of memory, just CLOSE this * socket up. We've got bigger problems than diff --git a/net/dccp/proto.c b/net/dccp/proto.c index b68168fcc06a..9d43c1f40274 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -259,6 +259,7 @@ int dccp_disconnect(struct sock *sk, int flags) { struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet = inet_sk(sk); + struct dccp_sock *dp = dccp_sk(sk); int err = 0; const int old_state = sk->sk_state; @@ -278,6 +279,10 @@ int dccp_disconnect(struct sock *sk, int flags) sk->sk_err = ECONNRESET; dccp_clear_xmit_timers(sk); + ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk); + ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); + dp->dccps_hc_rx_ccid = NULL; + dp->dccps_hc_tx_ccid = NULL; __skb_queue_purge(&sk->sk_receive_queue); __skb_queue_purge(&sk->sk_write_queue); diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index b36dceab0dc1..324cb9f2f551 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -125,7 +125,7 @@ static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst, struct sk_buff *skb, const void *daddr); static int dn_route_input(struct sk_buff *); -static void dn_run_flush(unsigned long dummy); +static void dn_run_flush(struct timer_list *unused); static struct dn_rt_hash_bucket *dn_rt_hash_table; static unsigned int dn_rt_hash_mask; @@ -183,7 +183,7 @@ static __inline__ unsigned int dn_hash(__le16 src, __le16 dst) return dn_rt_hash_mask & (unsigned int)tmp; } -static void dn_dst_check_expire(unsigned long dummy) +static void dn_dst_check_expire(struct timer_list *unused) { int i; struct dn_route *rt; @@ -357,7 +357,7 @@ static int dn_insert_route(struct dn_route *rt, unsigned int hash, struct dn_rou return 0; } -static void dn_run_flush(unsigned long dummy) +static void dn_run_flush(struct timer_list *unused) { int i; struct dn_route *rt, *next; @@ -1875,7 +1875,7 @@ void __init dn_route_init(void) kmem_cache_create("dn_dst_cache", sizeof(struct dn_route), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); dst_entries_init(&dn_dst_ops); - setup_timer(&dn_route_timer, dn_dst_check_expire, 0); + timer_setup(&dn_route_timer, dn_dst_check_expire, 0); dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ; add_timer(&dn_route_timer); diff --git a/net/decnet/dn_timer.c b/net/decnet/dn_timer.c index f430daed24a0..aa4155875ca8 100644 --- a/net/decnet/dn_timer.c +++ b/net/decnet/dn_timer.c @@ -34,11 +34,11 @@ #define SLOW_INTERVAL (HZ/2) -static void dn_slow_timer(unsigned long arg); +static void dn_slow_timer(struct timer_list *t); void dn_start_slow_timer(struct sock *sk) { - setup_timer(&sk->sk_timer, dn_slow_timer, (unsigned long)sk); + timer_setup(&sk->sk_timer, dn_slow_timer, 0); sk_reset_timer(sk, &sk->sk_timer, jiffies + SLOW_INTERVAL); } @@ -47,9 +47,9 @@ void dn_stop_slow_timer(struct sock *sk) sk_stop_timer(sk, &sk->sk_timer); } -static void dn_slow_timer(unsigned long arg) +static void dn_slow_timer(struct timer_list *t) { - struct sock *sk = (struct sock *)arg; + struct sock *sk = from_timer(sk, t, sk_timer); struct dn_scp *scp = DN_SK(sk); bh_lock_sock(sk); diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 44e3fb7dec8c..1e287420ff49 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -51,9 +51,7 @@ static struct dsa_switch_tree *dsa_tree_alloc(int index) INIT_LIST_HEAD(&dst->list); list_add_tail(&dsa_tree_list, &dst->list); - /* Initialize the reference counter to the number of switches, not 1 */ kref_init(&dst->refcount); - refcount_set(&dst->refcount.refcount, 0); return dst; } @@ -64,20 +62,23 @@ static void dsa_tree_free(struct dsa_switch_tree *dst) kfree(dst); } -static struct dsa_switch_tree *dsa_tree_touch(int index) +static struct dsa_switch_tree *dsa_tree_get(struct dsa_switch_tree *dst) { - struct dsa_switch_tree *dst; - - dst = dsa_tree_find(index); - if (!dst) - dst = dsa_tree_alloc(index); + if (dst) + kref_get(&dst->refcount); return dst; } -static void dsa_tree_get(struct dsa_switch_tree *dst) +static struct dsa_switch_tree *dsa_tree_touch(int index) { - kref_get(&dst->refcount); + struct dsa_switch_tree *dst; + + dst = dsa_tree_find(index); + if (dst) + return dsa_tree_get(dst); + else + return dsa_tree_alloc(index); } static void dsa_tree_release(struct kref *ref) @@ -91,7 +92,8 @@ static void dsa_tree_release(struct kref *ref) static void dsa_tree_put(struct dsa_switch_tree *dst) { - kref_put(&dst->refcount, dsa_tree_release); + if (dst) + kref_put(&dst->refcount, dsa_tree_release); } static bool dsa_port_is_dsa(struct dsa_port *port) @@ -765,6 +767,7 @@ int dsa_register_switch(struct dsa_switch *ds) mutex_lock(&dsa2_mutex); err = dsa_switch_probe(ds); + dsa_tree_put(ds->dst); mutex_unlock(&dsa2_mutex); return err; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index d6e7a642493b..a95a55f79137 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -16,7 +16,6 @@ #include <linux/of_net.h> #include <linux/of_mdio.h> #include <linux/mdio.h> -#include <linux/list.h> #include <net/rtnetlink.h> #include <net/pkt_cls.h> #include <net/tc_act/tc_mirred.h> diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index ce4aa827be05..f00499a46927 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1223,9 +1223,10 @@ EXPORT_SYMBOL(inet_sk_rebuild_header); struct sk_buff *inet_gso_segment(struct sk_buff *skb, netdev_features_t features) { - bool fixedid = false, gso_partial, encap; + bool udpfrag = false, fixedid = false, gso_partial, encap; struct sk_buff *segs = ERR_PTR(-EINVAL); const struct net_offload *ops; + unsigned int offset = 0; struct iphdr *iph; int proto, tot_len; int nhoff; @@ -1260,6 +1261,7 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb, segs = ERR_PTR(-EPROTONOSUPPORT); if (!skb->encapsulation || encap) { + udpfrag = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP); fixedid = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID); /* fixed ID is invalid if DF bit is not set */ @@ -1279,7 +1281,13 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb, skb = segs; do { iph = (struct iphdr *)(skb_mac_header(skb) + nhoff); - if (skb_is_gso(skb)) { + if (udpfrag) { + iph->frag_off = htons(offset >> 3); + if (skb->next) + iph->frag_off |= htons(IP_MF); + offset += skb->len - nhoff - ihl; + tot_len = skb->len - nhoff; + } else if (skb_is_gso(skb)) { if (!fixedid) { iph->id = htons(id); id += skb_shinfo(skb)->gso_segs; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index a4573bccd6da..7a93359fbc72 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1428,7 +1428,7 @@ skip: static bool inetdev_valid_mtu(unsigned int mtu) { - return mtu >= 68; + return mtu >= IPV4_MIN_MTU; } static void inetdev_send_gratuitous_arp(struct net_device *dev, diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index ab183af0b5b6..726f6b608274 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -89,6 +89,7 @@ #include <linux/rtnetlink.h> #include <linux/times.h> #include <linux/pkt_sched.h> +#include <linux/byteorder/generic.h> #include <net/net_namespace.h> #include <net/arp.h> @@ -321,6 +322,23 @@ igmp_scount(struct ip_mc_list *pmc, int type, int gdeleted, int sdeleted) return scount; } +/* source address selection per RFC 3376 section 4.2.13 */ +static __be32 igmpv3_get_srcaddr(struct net_device *dev, + const struct flowi4 *fl4) +{ + struct in_device *in_dev = __in_dev_get_rcu(dev); + + if (!in_dev) + return htonl(INADDR_ANY); + + for_ifa(in_dev) { + if (inet_ifa_match(fl4->saddr, ifa)) + return fl4->saddr; + } endfor_ifa(in_dev); + + return htonl(INADDR_ANY); +} + static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu) { struct sk_buff *skb; @@ -368,7 +386,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu) pip->frag_off = htons(IP_DF); pip->ttl = 1; pip->daddr = fl4.daddr; - pip->saddr = fl4.saddr; + pip->saddr = igmpv3_get_srcaddr(dev, &fl4); pip->protocol = IPPROTO_IGMP; pip->tot_len = 0; /* filled in later */ ip_select_ident(net, skb, NULL); @@ -404,16 +422,17 @@ static int grec_size(struct ip_mc_list *pmc, int type, int gdel, int sdel) } static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc, - int type, struct igmpv3_grec **ppgr) + int type, struct igmpv3_grec **ppgr, unsigned int mtu) { struct net_device *dev = pmc->interface->dev; struct igmpv3_report *pih; struct igmpv3_grec *pgr; - if (!skb) - skb = igmpv3_newpack(dev, dev->mtu); - if (!skb) - return NULL; + if (!skb) { + skb = igmpv3_newpack(dev, mtu); + if (!skb) + return NULL; + } pgr = skb_put(skb, sizeof(struct igmpv3_grec)); pgr->grec_type = type; pgr->grec_auxwords = 0; @@ -436,12 +455,17 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, struct igmpv3_grec *pgr = NULL; struct ip_sf_list *psf, *psf_next, *psf_prev, **psf_list; int scount, stotal, first, isquery, truncate; + unsigned int mtu; if (pmc->multiaddr == IGMP_ALL_HOSTS) return skb; if (ipv4_is_local_multicast(pmc->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports) return skb; + mtu = READ_ONCE(dev->mtu); + if (mtu < IPV4_MIN_MTU) + return skb; + isquery = type == IGMPV3_MODE_IS_INCLUDE || type == IGMPV3_MODE_IS_EXCLUDE; truncate = type == IGMPV3_MODE_IS_EXCLUDE || @@ -462,7 +486,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) { if (skb) igmpv3_sendpack(skb); - skb = igmpv3_newpack(dev, dev->mtu); + skb = igmpv3_newpack(dev, mtu); } } first = 1; @@ -498,12 +522,12 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, pgr->grec_nsrcs = htons(scount); if (skb) igmpv3_sendpack(skb); - skb = igmpv3_newpack(dev, dev->mtu); + skb = igmpv3_newpack(dev, mtu); first = 1; scount = 0; } if (first) { - skb = add_grhead(skb, pmc, type, &pgr); + skb = add_grhead(skb, pmc, type, &pgr, mtu); first = 0; } if (!skb) @@ -538,7 +562,7 @@ empty_source: igmpv3_sendpack(skb); skb = NULL; /* add_grhead will get a new one */ } - skb = add_grhead(skb, pmc, type, &pgr); + skb = add_grhead(skb, pmc, type, &pgr, mtu); } } if (pgr) @@ -752,18 +776,18 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, return ip_local_out(net, skb->sk, skb); } -static void igmp_gq_timer_expire(unsigned long data) +static void igmp_gq_timer_expire(struct timer_list *t) { - struct in_device *in_dev = (struct in_device *)data; + struct in_device *in_dev = from_timer(in_dev, t, mr_gq_timer); in_dev->mr_gq_running = 0; igmpv3_send_report(in_dev, NULL); in_dev_put(in_dev); } -static void igmp_ifc_timer_expire(unsigned long data) +static void igmp_ifc_timer_expire(struct timer_list *t) { - struct in_device *in_dev = (struct in_device *)data; + struct in_device *in_dev = from_timer(in_dev, t, mr_ifc_timer); igmpv3_send_cr(in_dev); if (in_dev->mr_ifc_count) { @@ -784,9 +808,9 @@ static void igmp_ifc_event(struct in_device *in_dev) } -static void igmp_timer_expire(unsigned long data) +static void igmp_timer_expire(struct timer_list *t) { - struct ip_mc_list *im = (struct ip_mc_list *)data; + struct ip_mc_list *im = from_timer(im, t, timer); struct in_device *in_dev = im->interface; spin_lock(&im->lock); @@ -1385,7 +1409,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) refcount_set(&im->refcnt, 1); spin_lock_init(&im->lock); #ifdef CONFIG_IP_MULTICAST - setup_timer(&im->timer, igmp_timer_expire, (unsigned long)im); + timer_setup(&im->timer, igmp_timer_expire, 0); im->unsolicit_count = net->ipv4.sysctl_igmp_qrv; #endif @@ -1695,10 +1719,8 @@ void ip_mc_init_dev(struct in_device *in_dev) ASSERT_RTNL(); #ifdef CONFIG_IP_MULTICAST - setup_timer(&in_dev->mr_gq_timer, igmp_gq_timer_expire, - (unsigned long)in_dev); - setup_timer(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire, - (unsigned long)in_dev); + timer_setup(&in_dev->mr_gq_timer, igmp_gq_timer_expire, 0); + timer_setup(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire, 0); in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv; #endif diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index c690cd0d9b3f..b563e0c46bac 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -93,7 +93,7 @@ static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw, } /* - * Enter the time wait state. + * Enter the time wait state. This is called with locally disabled BH. * Essentially we whip up a timewait bucket, copy the relevant info into it * from the SK, and mess with hash chains and list linkage. */ @@ -111,7 +111,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, */ bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num, hashinfo->bhash_size)]; - spin_lock_bh(&bhead->lock); + spin_lock(&bhead->lock); tw->tw_tb = icsk->icsk_bind_hash; WARN_ON(!icsk->icsk_bind_hash); inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); @@ -137,7 +137,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, if (__sk_nulls_del_node_init_rcu(sk)) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); - spin_unlock_bh(lock); + spin_unlock(lock); } EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index fe6fee728ce4..5ddb1cb52bd4 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -349,8 +349,8 @@ static int ip_tunnel_bind_dev(struct net_device *dev) dev->needed_headroom = t_hlen + hlen; mtu -= (dev->hard_header_len + t_hlen); - if (mtu < 68) - mtu = 68; + if (mtu < IPV4_MIN_MTU) + mtu = IPV4_MIN_MTU; return mtu; } diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 40a43ad294cb..fd5f19c988e4 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -112,7 +112,7 @@ static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, int cmd); static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt); static void mroute_clean_tables(struct mr_table *mrt, bool all); -static void ipmr_expire_process(unsigned long arg); +static void ipmr_expire_process(struct timer_list *t); #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES #define ipmr_for_each_table(mrt, net) \ @@ -375,8 +375,7 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id) INIT_LIST_HEAD(&mrt->mfc_cache_list); INIT_LIST_HEAD(&mrt->mfc_unres_queue); - setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process, - (unsigned long)mrt); + timer_setup(&mrt->ipmr_expire_timer, ipmr_expire_process, 0); mrt->mroute_reg_vif_num = -1; #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES @@ -804,9 +803,9 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) } /* Timer process for the unresolved queue. */ -static void ipmr_expire_process(unsigned long arg) +static void ipmr_expire_process(struct timer_list *t) { - struct mr_table *mrt = (struct mr_table *)arg; + struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer); unsigned long now; unsigned long expires; struct mfc_cache *c, *next; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 33b70bfd1122..125c1eab3eaa 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -513,11 +513,16 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) int err; struct ip_options_data opt_copy; struct raw_frag_vec rfv; + int hdrincl; err = -EMSGSIZE; if (len > 0xFFFF) goto out; + /* hdrincl should be READ_ONCE(inet->hdrincl) + * but READ_ONCE() doesn't work with bit fields + */ + hdrincl = inet->hdrincl; /* * Check the flags. */ @@ -593,7 +598,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) /* Linux does not mangle headers on raw sockets, * so that IP options + IP_HDRINCL is non-sense. */ - if (inet->hdrincl) + if (hdrincl) goto done; if (ipc.opt->opt.srr) { if (!daddr) @@ -615,12 +620,12 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, - inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, + hdrincl ? IPPROTO_RAW : sk->sk_protocol, inet_sk_flowi_flags(sk) | - (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), + (hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), daddr, saddr, 0, 0, sk->sk_uid); - if (!inet->hdrincl) { + if (!hdrincl) { rfv.msg = msg; rfv.hlen = 0; @@ -645,7 +650,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) goto do_confirm; back_from_confirm: - if (inet->hdrincl) + if (hdrincl) err = raw_send_hdrinc(sk, &fl4, msg, len, &rt, msg->msg_flags, &ipc.sockc); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index bf97317e6c97..f08eebe60446 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2412,6 +2412,7 @@ int tcp_disconnect(struct sock *sk, int flags) tp->snd_cwnd_cnt = 0; tp->window_clamp = 0; tcp_set_ca_state(sk, TCP_CA_Open); + tp->is_sack_reneg = 0; tcp_clear_retrans(tp); inet_csk_delack_init(sk); /* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0 diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 69ee877574d0..8322f26e770e 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -110,7 +110,8 @@ struct bbr { u32 lt_last_lost; /* LT intvl start: tp->lost */ u32 pacing_gain:10, /* current gain for setting pacing rate */ cwnd_gain:10, /* current gain for setting cwnd */ - full_bw_cnt:3, /* number of rounds without large bw gains */ + full_bw_reached:1, /* reached full bw in Startup? */ + full_bw_cnt:2, /* number of rounds without large bw gains */ cycle_idx:3, /* current index in pacing_gain cycle array */ has_seen_rtt:1, /* have we seen an RTT sample yet? */ unused_b:5; @@ -180,7 +181,7 @@ static bool bbr_full_bw_reached(const struct sock *sk) { const struct bbr *bbr = inet_csk_ca(sk); - return bbr->full_bw_cnt >= bbr_full_bw_cnt; + return bbr->full_bw_reached; } /* Return the windowed max recent bandwidth sample, in pkts/uS << BW_SCALE. */ @@ -717,6 +718,7 @@ static void bbr_check_full_bw_reached(struct sock *sk, return; } ++bbr->full_bw_cnt; + bbr->full_bw_reached = bbr->full_bw_cnt >= bbr_full_bw_cnt; } /* If pipe is probably full, drain the queue and then enter steady-state. */ @@ -850,6 +852,7 @@ static void bbr_init(struct sock *sk) bbr->restore_cwnd = 0; bbr->round_start = 0; bbr->idle_restart = 0; + bbr->full_bw_reached = 0; bbr->full_bw = 0; bbr->full_bw_cnt = 0; bbr->cycle_mstamp = 0; @@ -871,6 +874,11 @@ static u32 bbr_sndbuf_expand(struct sock *sk) */ static u32 bbr_undo_cwnd(struct sock *sk) { + struct bbr *bbr = inet_csk_ca(sk); + + bbr->full_bw = 0; /* spurious slow-down; reset full pipe detection */ + bbr->full_bw_cnt = 0; + bbr_reset_lt_bw_sampling(sk); return tcp_sk(sk)->snd_cwnd; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 734cfc8ff76e..9550cc42de2d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -579,6 +579,7 @@ void tcp_rcv_space_adjust(struct sock *sk) int time; int copied; + tcp_mstamp_refresh(tp); time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time); if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0) return; @@ -1941,6 +1942,8 @@ void tcp_enter_loss(struct sock *sk) if (is_reneg) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING); tp->sacked_out = 0; + /* Mark SACK reneging until we recover from this loss event. */ + tp->is_sack_reneg = 1; } tcp_clear_all_retrans_hints(tp); @@ -2326,6 +2329,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss) } tp->snd_cwnd_stamp = tcp_jiffies32; tp->undo_marker = 0; + tp->rack.advanced = 1; /* Force RACK to re-exam losses */ } static inline bool tcp_may_undo(const struct tcp_sock *tp) @@ -2364,6 +2368,7 @@ static bool tcp_try_undo_recovery(struct sock *sk) return true; } tcp_set_ca_state(sk, TCP_CA_Open); + tp->is_sack_reneg = 0; return false; } @@ -2397,8 +2402,10 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS); inet_csk(sk)->icsk_retransmits = 0; - if (frto_undo || tcp_is_sack(tp)) + if (frto_undo || tcp_is_sack(tp)) { tcp_set_ca_state(sk, TCP_CA_Open); + tp->is_sack_reneg = 0; + } return true; } return false; @@ -3495,6 +3502,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) struct tcp_sacktag_state sack_state; struct rate_sample rs = { .prior_delivered = 0 }; u32 prior_snd_una = tp->snd_una; + bool is_sack_reneg = tp->is_sack_reneg; u32 ack_seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; bool is_dupack = false; @@ -3611,7 +3619,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */ lost = tp->lost - lost; /* freshly marked lost */ - tcp_rate_gen(sk, delivered, lost, sack_state.rate); + tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate); tcp_cong_control(sk, ack, delivered, flag, sack_state.rate); tcp_xmit_recovery(sk, rexmit); return 1; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c6bc0c4d19c6..94e28350f420 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -848,7 +848,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, req->ts_recent, 0, - tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr, + tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->saddr, AF_INET), inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0, ip_hdr(skb)->tos); @@ -1591,6 +1591,34 @@ int tcp_filter(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(tcp_filter); +static void tcp_v4_restore_cb(struct sk_buff *skb) +{ + memmove(IPCB(skb), &TCP_SKB_CB(skb)->header.h4, + sizeof(struct inet_skb_parm)); +} + +static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph, + const struct tcphdr *th) +{ + /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB() + * barrier() makes sure compiler wont play fool^Waliasing games. + */ + memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb), + sizeof(struct inet_skb_parm)); + barrier(); + + TCP_SKB_CB(skb)->seq = ntohl(th->seq); + TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + + skb->len - th->doff * 4); + TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); + TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); + TCP_SKB_CB(skb)->tcp_tw_isn = 0; + TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph); + TCP_SKB_CB(skb)->sacked = 0; + TCP_SKB_CB(skb)->has_rxtstamp = + skb->tstamp || skb_hwtstamps(skb)->hwtstamp; +} + /* * From tcp_input.c */ @@ -1631,24 +1659,6 @@ int tcp_v4_rcv(struct sk_buff *skb) th = (const struct tcphdr *)skb->data; iph = ip_hdr(skb); - /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB() - * barrier() makes sure compiler wont play fool^Waliasing games. - */ - memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb), - sizeof(struct inet_skb_parm)); - barrier(); - - TCP_SKB_CB(skb)->seq = ntohl(th->seq); - TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + - skb->len - th->doff * 4); - TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); - TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); - TCP_SKB_CB(skb)->tcp_tw_isn = 0; - TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph); - TCP_SKB_CB(skb)->sacked = 0; - TCP_SKB_CB(skb)->has_rxtstamp = - skb->tstamp || skb_hwtstamps(skb)->hwtstamp; - lookup: sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source, th->dest, sdif, &refcounted); @@ -1679,14 +1689,19 @@ process: sock_hold(sk); refcounted = true; nsk = NULL; - if (!tcp_filter(sk, skb)) + if (!tcp_filter(sk, skb)) { + th = (const struct tcphdr *)skb->data; + iph = ip_hdr(skb); + tcp_v4_fill_cb(skb, iph, th); nsk = tcp_check_req(sk, skb, req, false); + } if (!nsk) { reqsk_put(req); goto discard_and_relse; } if (nsk == sk) { reqsk_put(req); + tcp_v4_restore_cb(skb); } else if (tcp_child_process(sk, nsk, skb)) { tcp_v4_send_reset(nsk, skb); goto discard_and_relse; @@ -1712,6 +1727,7 @@ process: goto discard_and_relse; th = (const struct tcphdr *)skb->data; iph = ip_hdr(skb); + tcp_v4_fill_cb(skb, iph, th); skb->dev = NULL; @@ -1742,6 +1758,8 @@ no_tcp_socket: if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard_it; + tcp_v4_fill_cb(skb, iph, th); + if (tcp_checksum_complete(skb)) { csum_error: __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); @@ -1768,6 +1786,8 @@ do_time_wait: goto discard_it; } + tcp_v4_fill_cb(skb, iph, th); + if (tcp_checksum_complete(skb)) { inet_twsk_put(inet_twsk(sk)); goto csum_error; @@ -1784,6 +1804,7 @@ do_time_wait: if (sk2) { inet_twsk_deschedule_put(inet_twsk(sk)); sk = sk2; + tcp_v4_restore_cb(skb); refcounted = false; goto process; } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index e36eff0403f4..b079b619b60c 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -310,10 +310,16 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) if (state == TCP_TIME_WAIT) timeo = TCP_TIMEWAIT_LEN; + /* tw_timer is pinned, so we need to make sure BH are disabled + * in following section, otherwise timer handler could run before + * we complete the initialization. + */ + local_bh_disable(); inet_twsk_schedule(tw, timeo); /* Linkage updates. */ __inet_twsk_hashdance(tw, sk, &tcp_hashinfo); inet_twsk_put(tw); + local_bh_enable(); } else { /* Sorry, if we're out of memory, just CLOSE this * socket up. We've got bigger problems than diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c index 3330a370d306..c61240e43923 100644 --- a/net/ipv4/tcp_rate.c +++ b/net/ipv4/tcp_rate.c @@ -106,7 +106,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, /* Update the connection delivery information and generate a rate sample. */ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, - struct rate_sample *rs) + bool is_sack_reneg, struct rate_sample *rs) { struct tcp_sock *tp = tcp_sk(sk); u32 snd_us, ack_us; @@ -124,8 +124,12 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, rs->acked_sacked = delivered; /* freshly ACKed or SACKed */ rs->losses = lost; /* freshly marked lost */ - /* Return an invalid sample if no timing information is available. */ - if (!rs->prior_mstamp) { + /* Return an invalid sample if no timing information is available or + * in recovery from loss with SACK reneging. Rate samples taken during + * a SACK reneging event may overestimate bw by including packets that + * were SACKed before the reneg. + */ + if (!rs->prior_mstamp || is_sack_reneg) { rs->delivered = -1; rs->interval_us = -1; return; diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index d3ea89020c69..3a81720ac0c4 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -55,7 +55,8 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout) * to queuing or delayed ACKs. */ reo_wnd = 1000; - if ((tp->rack.reord || !tp->lost_out) && min_rtt != ~0U) { + if ((tp->rack.reord || inet_csk(sk)->icsk_ca_state < TCP_CA_Recovery) && + min_rtt != ~0U) { reo_wnd = max((min_rtt >> 2) * tp->rack.reo_wnd_steps, reo_wnd); reo_wnd = min(reo_wnd, tp->srtt_us >> 3); } @@ -79,12 +80,12 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout) */ remaining = tp->rack.rtt_us + reo_wnd - tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp); - if (remaining < 0) { + if (remaining <= 0) { tcp_rack_mark_skb_lost(sk, skb); list_del_init(&skb->tcp_tsorted_anchor); } else { - /* Record maximum wait time (+1 to avoid 0) */ - *reo_timeout = max_t(u32, *reo_timeout, 1 + remaining); + /* Record maximum wait time */ + *reo_timeout = max_t(u32, *reo_timeout, remaining); } } } @@ -116,13 +117,8 @@ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, { u32 rtt_us; - if (tp->rack.mstamp && - !tcp_rack_sent_after(xmit_time, tp->rack.mstamp, - end_seq, tp->rack.end_seq)) - return; - rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, xmit_time); - if (sacked & TCPCB_RETRANS) { + if (rtt_us < tcp_min_rtt(tp) && (sacked & TCPCB_RETRANS)) { /* If the sacked packet was retransmitted, it's ambiguous * whether the retransmission or the original (or the prior * retransmission) was sacked. @@ -133,13 +129,15 @@ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, * so it's at least one RTT (i.e., retransmission is at least * an RTT later). */ - if (rtt_us < tcp_min_rtt(tp)) - return; + return; } - tp->rack.rtt_us = rtt_us; - tp->rack.mstamp = xmit_time; - tp->rack.end_seq = end_seq; tp->rack.advanced = 1; + tp->rack.rtt_us = rtt_us; + if (tcp_rack_sent_after(xmit_time, tp->rack.mstamp, + end_seq, tp->rack.end_seq)) { + tp->rack.mstamp = xmit_time; + tp->rack.end_seq = end_seq; + } } /* We have waited long enough to accommodate reordering. Mark the expired diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index e360d55be555..01801b77bd0d 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -187,16 +187,57 @@ out_unlock: } EXPORT_SYMBOL(skb_udp_tunnel_segment); -static struct sk_buff *udp4_tunnel_segment(struct sk_buff *skb, - netdev_features_t features) +static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, + netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); + unsigned int mss; + __wsum csum; + struct udphdr *uh; + struct iphdr *iph; if (skb->encapsulation && (skb_shinfo(skb)->gso_type & - (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) + (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) { segs = skb_udp_tunnel_segment(skb, features, false); + goto out; + } + + if (!pskb_may_pull(skb, sizeof(struct udphdr))) + goto out; + + mss = skb_shinfo(skb)->gso_size; + if (unlikely(skb->len <= mss)) + goto out; + + /* Do software UFO. Complete and fill in the UDP checksum as + * HW cannot do checksum of UDP packets sent as multiple + * IP fragments. + */ + uh = udp_hdr(skb); + iph = ip_hdr(skb); + + uh->check = 0; + csum = skb_checksum(skb, 0, skb->len, 0); + uh->check = udp_v4_check(skb->len, iph->saddr, iph->daddr, csum); + if (uh->check == 0) + uh->check = CSUM_MANGLED_0; + + skb->ip_summed = CHECKSUM_UNNECESSARY; + + /* If there is no outer header we can fake a checksum offload + * due to the fact that we have already done the checksum in + * software prior to segmenting the frame. + */ + if (!skb->encap_hdr_csum) + features |= NETIF_F_HW_CSUM; + + /* Fragment the skb. IP headers of the fragments are updated in + * inet_gso_segment() + */ + segs = skb_segment(skb, features); +out: return segs; } @@ -330,7 +371,7 @@ static int udp4_gro_complete(struct sk_buff *skb, int nhoff) static const struct net_offload udpv4_offload = { .callbacks = { - .gso_segment = udp4_tunnel_segment, + .gso_segment = udp4_ufo_fragment, .gro_receive = udp4_gro_receive, .gro_complete = udp4_gro_complete, }, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index a0ae1c9d37df..f49bd7897e95 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -188,7 +188,7 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp); static void addrconf_dad_work(struct work_struct *w); static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id); static void addrconf_dad_run(struct inet6_dev *idev); -static void addrconf_rs_timer(unsigned long data); +static void addrconf_rs_timer(struct timer_list *t); static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa); static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa); @@ -388,8 +388,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) rwlock_init(&ndev->lock); ndev->dev = dev; INIT_LIST_HEAD(&ndev->addr_list); - setup_timer(&ndev->rs_timer, addrconf_rs_timer, - (unsigned long)ndev); + timer_setup(&ndev->rs_timer, addrconf_rs_timer, 0); memcpy(&ndev->cnf, dev_net(dev)->ipv6.devconf_dflt, sizeof(ndev->cnf)); if (ndev->cnf.stable_secret.initialized) @@ -3741,9 +3740,9 @@ restart: return 0; } -static void addrconf_rs_timer(unsigned long data) +static void addrconf_rs_timer(struct timer_list *t) { - struct inet6_dev *idev = (struct inet6_dev *)data; + struct inet6_dev *idev = from_timer(idev, t, rs_timer); struct net_device *dev = idev->dev; struct in6_addr lladdr; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 2e2804f5823e..f5285f4e1d08 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -70,7 +70,7 @@ static int fib6_walk_continue(struct fib6_walker *w); * result of redirects, path MTU changes, etc. */ -static void fib6_gc_timer_cb(unsigned long arg); +static void fib6_gc_timer_cb(struct timer_list *t); #define FOR_WALKERS(net, w) \ list_for_each_entry(w, &(net)->ipv6.fib6_walkers, lh) @@ -2026,9 +2026,11 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force) spin_unlock_bh(&net->ipv6.fib6_gc_lock); } -static void fib6_gc_timer_cb(unsigned long arg) +static void fib6_gc_timer_cb(struct timer_list *t) { - fib6_run_gc(0, (struct net *)arg, true); + struct net *arg = from_timer(arg, t, ipv6.ip6_fib_timer); + + fib6_run_gc(0, arg, true); } static int __net_init fib6_net_init(struct net *net) @@ -2043,7 +2045,7 @@ static int __net_init fib6_net_init(struct net *net) spin_lock_init(&net->ipv6.fib6_gc_lock); rwlock_init(&net->ipv6.fib6_walker_lock); INIT_LIST_HEAD(&net->ipv6.fib6_walkers); - setup_timer(&net->ipv6.ip6_fib_timer, fib6_gc_timer_cb, (unsigned long)net); + timer_setup(&net->ipv6.ip6_fib_timer, fib6_gc_timer_cb, 0); net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL); if (!net->ipv6.rt6_stats) diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 9f2e73c71768..7f59c8fabeeb 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -46,7 +46,7 @@ static atomic_t fl_size = ATOMIC_INIT(0); static struct ip6_flowlabel __rcu *fl_ht[FL_HASH_MASK+1]; -static void ip6_fl_gc(unsigned long dummy); +static void ip6_fl_gc(struct timer_list *unused); static DEFINE_TIMER(ip6_fl_gc_timer, ip6_fl_gc); /* FL hash table lock: it protects only of GC */ @@ -127,7 +127,7 @@ static void fl_release(struct ip6_flowlabel *fl) spin_unlock_bh(&ip6_fl_lock); } -static void ip6_fl_gc(unsigned long dummy) +static void ip6_fl_gc(struct timer_list *unused) { int i; unsigned long now = jiffies; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 3d3092adf1d2..db84f523656d 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -904,7 +904,7 @@ static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto, if (t->parms.collect_md) { tun_dst = ipv6_tun_rx_dst(skb, 0, 0, 0); if (!tun_dst) - return 0; + goto drop; } ret = __ip6_tnl_rcv(t, skb, tpi, tun_dst, dscp_ecn_decapsulate, log_ecn_error); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 9c24b85949c1..a2e1a864eb46 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -120,7 +120,7 @@ static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt); static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb); static void mroute_clean_tables(struct mr6_table *mrt, bool all); -static void ipmr_expire_process(unsigned long arg); +static void ipmr_expire_process(struct timer_list *t); #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES #define ip6mr_for_each_table(mrt, net) \ @@ -320,8 +320,7 @@ static struct mr6_table *ip6mr_new_table(struct net *net, u32 id) INIT_LIST_HEAD(&mrt->mfc6_unres_queue); - setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process, - (unsigned long)mrt); + timer_setup(&mrt->ipmr_expire_timer, ipmr_expire_process, 0); #ifdef CONFIG_IPV6_PIMSM_V2 mrt->mroute_reg_vif_num = -1; @@ -888,9 +887,9 @@ static void ipmr_do_expire_process(struct mr6_table *mrt) mod_timer(&mrt->ipmr_expire_timer, jiffies + expires); } -static void ipmr_expire_process(unsigned long arg) +static void ipmr_expire_process(struct timer_list *t) { - struct mr6_table *mrt = (struct mr6_table *)arg; + struct mr6_table *mrt = from_timer(mrt, t, ipmr_expire_timer); if (!spin_trylock(&mfc_unres_lock)) { mod_timer(&mrt->ipmr_expire_timer, jiffies + 1); diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 12b7c27ce5ce..844642682b83 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -75,10 +75,10 @@ static struct in6_addr mld2_all_mcr = MLD2_ALL_MCR_INIT; static void igmp6_join_group(struct ifmcaddr6 *ma); static void igmp6_leave_group(struct ifmcaddr6 *ma); -static void igmp6_timer_handler(unsigned long data); +static void igmp6_timer_handler(struct timer_list *t); -static void mld_gq_timer_expire(unsigned long data); -static void mld_ifc_timer_expire(unsigned long data); +static void mld_gq_timer_expire(struct timer_list *t); +static void mld_ifc_timer_expire(struct timer_list *t); static void mld_ifc_event(struct inet6_dev *idev); static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc); static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc); @@ -839,7 +839,7 @@ static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev, if (!mc) return NULL; - setup_timer(&mc->mca_timer, igmp6_timer_handler, (unsigned long)mc); + timer_setup(&mc->mca_timer, igmp6_timer_handler, 0); mc->mca_addr = *addr; mc->idev = idev; /* reference taken by caller */ @@ -1682,16 +1682,16 @@ static int grec_size(struct ifmcaddr6 *pmc, int type, int gdel, int sdel) } static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc, - int type, struct mld2_grec **ppgr) + int type, struct mld2_grec **ppgr, unsigned int mtu) { - struct net_device *dev = pmc->idev->dev; struct mld2_report *pmr; struct mld2_grec *pgr; - if (!skb) - skb = mld_newpack(pmc->idev, dev->mtu); - if (!skb) - return NULL; + if (!skb) { + skb = mld_newpack(pmc->idev, mtu); + if (!skb) + return NULL; + } pgr = skb_put(skb, sizeof(struct mld2_grec)); pgr->grec_type = type; pgr->grec_auxwords = 0; @@ -1714,10 +1714,15 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, struct mld2_grec *pgr = NULL; struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list; int scount, stotal, first, isquery, truncate; + unsigned int mtu; if (pmc->mca_flags & MAF_NOREPORT) return skb; + mtu = READ_ONCE(dev->mtu); + if (mtu < IPV6_MIN_MTU) + return skb; + isquery = type == MLD2_MODE_IS_INCLUDE || type == MLD2_MODE_IS_EXCLUDE; truncate = type == MLD2_MODE_IS_EXCLUDE || @@ -1738,7 +1743,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) { if (skb) mld_sendpack(skb); - skb = mld_newpack(idev, dev->mtu); + skb = mld_newpack(idev, mtu); } } first = 1; @@ -1774,12 +1779,12 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, pgr->grec_nsrcs = htons(scount); if (skb) mld_sendpack(skb); - skb = mld_newpack(idev, dev->mtu); + skb = mld_newpack(idev, mtu); first = 1; scount = 0; } if (first) { - skb = add_grhead(skb, pmc, type, &pgr); + skb = add_grhead(skb, pmc, type, &pgr, mtu); first = 0; } if (!skb) @@ -1814,7 +1819,7 @@ empty_source: mld_sendpack(skb); skb = NULL; /* add_grhead will get a new one */ } - skb = add_grhead(skb, pmc, type, &pgr); + skb = add_grhead(skb, pmc, type, &pgr, mtu); } } if (pgr) @@ -2083,9 +2088,9 @@ void ipv6_mc_dad_complete(struct inet6_dev *idev) } } -static void mld_dad_timer_expire(unsigned long data) +static void mld_dad_timer_expire(struct timer_list *t) { - struct inet6_dev *idev = (struct inet6_dev *)data; + struct inet6_dev *idev = from_timer(idev, t, mc_dad_timer); mld_send_initial_cr(idev); if (idev->mc_dad_count) { @@ -2432,18 +2437,18 @@ static void igmp6_leave_group(struct ifmcaddr6 *ma) } } -static void mld_gq_timer_expire(unsigned long data) +static void mld_gq_timer_expire(struct timer_list *t) { - struct inet6_dev *idev = (struct inet6_dev *)data; + struct inet6_dev *idev = from_timer(idev, t, mc_gq_timer); idev->mc_gq_running = 0; mld_send_report(idev, NULL); in6_dev_put(idev); } -static void mld_ifc_timer_expire(unsigned long data) +static void mld_ifc_timer_expire(struct timer_list *t) { - struct inet6_dev *idev = (struct inet6_dev *)data; + struct inet6_dev *idev = from_timer(idev, t, mc_ifc_timer); mld_send_cr(idev); if (idev->mc_ifc_count) { @@ -2462,9 +2467,9 @@ static void mld_ifc_event(struct inet6_dev *idev) mld_ifc_start_timer(idev, 1); } -static void igmp6_timer_handler(unsigned long data) +static void igmp6_timer_handler(struct timer_list *t) { - struct ifmcaddr6 *ma = (struct ifmcaddr6 *) data; + struct ifmcaddr6 *ma = from_timer(ma, t, mca_timer); if (mld_in_v1_mode(ma->idev)) igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT); @@ -2552,14 +2557,11 @@ void ipv6_mc_init_dev(struct inet6_dev *idev) write_lock_bh(&idev->lock); spin_lock_init(&idev->mc_lock); idev->mc_gq_running = 0; - setup_timer(&idev->mc_gq_timer, mld_gq_timer_expire, - (unsigned long)idev); + timer_setup(&idev->mc_gq_timer, mld_gq_timer_expire, 0); idev->mc_tomb = NULL; idev->mc_ifc_count = 0; - setup_timer(&idev->mc_ifc_timer, mld_ifc_timer_expire, - (unsigned long)idev); - setup_timer(&idev->mc_dad_timer, mld_dad_timer_expire, - (unsigned long)idev); + timer_setup(&idev->mc_ifc_timer, mld_ifc_timer_expire, 0); + timer_setup(&idev->mc_dad_timer, mld_dad_timer_expire, 0); ipv6_mc_reset(idev); write_unlock_bh(&idev->lock); } diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 4a7e5ffa5108..4fe7c90962dd 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -31,6 +31,37 @@ static u32 __ipv6_select_ident(struct net *net, u32 hashrnd, return id; } +/* This function exists only for tap drivers that must support broken + * clients requesting UFO without specifying an IPv6 fragment ID. + * + * This is similar to ipv6_select_ident() but we use an independent hash + * seed to limit information leakage. + * + * The network header must be set before calling this. + */ +__be32 ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb) +{ + static u32 ip6_proxy_idents_hashrnd __read_mostly; + struct in6_addr buf[2]; + struct in6_addr *addrs; + u32 id; + + addrs = skb_header_pointer(skb, + skb_network_offset(skb) + + offsetof(struct ipv6hdr, saddr), + sizeof(buf), buf); + if (!addrs) + return 0; + + net_get_random_once(&ip6_proxy_idents_hashrnd, + sizeof(ip6_proxy_idents_hashrnd)); + + id = __ipv6_select_ident(net, ip6_proxy_idents_hashrnd, + &addrs[1], &addrs[0]); + return htonl(id); +} +EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident); + __be32 ipv6_select_ident(struct net *net, const struct in6_addr *daddr, const struct in6_addr *saddr) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 05eb7bc36156..7a8d1500d374 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -472,6 +472,11 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match, &match->rt6i_siblings, rt6i_siblings) { route_choosen--; if (route_choosen == 0) { + struct inet6_dev *idev = sibling->rt6i_idev; + + if (!netif_carrier_ok(sibling->dst.dev) && + idev->cnf.ignore_routes_with_linkdown) + break; if (rt6_score_route(sibling, oif, strict) < 0) break; match = sibling; @@ -1019,7 +1024,7 @@ static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt) { struct net_device *dev = rt->dst.dev; - if (rt->rt6i_flags & RTF_LOCAL) { + if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) { /* for copies of local routes, dst->dev needs to be the * device if it is a master device, the master device if * device is enslaved, and the loopback as the default diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index d60ddcb0bfe2..d7dc23c1b2ca 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1098,6 +1098,7 @@ static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p, ipip6_tunnel_link(sitn, t); t->parms.iph.ttl = p->iph.ttl; t->parms.iph.tos = p->iph.tos; + t->parms.iph.frag_off = p->iph.frag_off; if (t->parms.link != p->link || t->fwmark != fwmark) { t->parms.link = p->link; t->fwmark = fwmark; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 6bb98c93edfe..7178476b3d2f 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -994,7 +994,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, req->ts_recent, sk->sk_bound_dev_if, - tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), + tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr), 0, 0); } @@ -1454,7 +1454,6 @@ process: struct sock *nsk; sk = req->rsk_listener; - tcp_v6_fill_cb(skb, hdr, th); if (tcp_v6_inbound_md5_hash(sk, skb)) { sk_drops_add(sk, skb); reqsk_put(req); @@ -1467,8 +1466,12 @@ process: sock_hold(sk); refcounted = true; nsk = NULL; - if (!tcp_filter(sk, skb)) + if (!tcp_filter(sk, skb)) { + th = (const struct tcphdr *)skb->data; + hdr = ipv6_hdr(skb); + tcp_v6_fill_cb(skb, hdr, th); nsk = tcp_check_req(sk, skb, req, false); + } if (!nsk) { reqsk_put(req); goto discard_and_relse; @@ -1492,8 +1495,6 @@ process: if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; - tcp_v6_fill_cb(skb, hdr, th); - if (tcp_v6_inbound_md5_hash(sk, skb)) goto discard_and_relse; @@ -1501,6 +1502,7 @@ process: goto discard_and_relse; th = (const struct tcphdr *)skb->data; hdr = ipv6_hdr(skb); + tcp_v6_fill_cb(skb, hdr, th); skb->dev = NULL; @@ -1590,7 +1592,6 @@ do_time_wait: tcp_v6_timewait_ack(sk, skb); break; case TCP_TW_RST: - tcp_v6_restore_cb(skb); tcp_v6_send_reset(sk, skb); inet_twsk_deschedule_put(inet_twsk(sk)); goto discard_it; diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 455fd4e39333..a0f89ad76f9d 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -17,15 +17,94 @@ #include <net/ip6_checksum.h> #include "ip6_offload.h" -static struct sk_buff *udp6_tunnel_segment(struct sk_buff *skb, - netdev_features_t features) +static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, + netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); + unsigned int mss; + unsigned int unfrag_ip6hlen, unfrag_len; + struct frag_hdr *fptr; + u8 *packet_start, *prevhdr; + u8 nexthdr; + u8 frag_hdr_sz = sizeof(struct frag_hdr); + __wsum csum; + int tnl_hlen; + int err; + + mss = skb_shinfo(skb)->gso_size; + if (unlikely(skb->len <= mss)) + goto out; if (skb->encapsulation && skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM)) segs = skb_udp_tunnel_segment(skb, features, true); + else { + const struct ipv6hdr *ipv6h; + struct udphdr *uh; + + if (!pskb_may_pull(skb, sizeof(struct udphdr))) + goto out; + + /* Do software UFO. Complete and fill in the UDP checksum as HW cannot + * do checksum of UDP packets sent as multiple IP fragments. + */ + + uh = udp_hdr(skb); + ipv6h = ipv6_hdr(skb); + + uh->check = 0; + csum = skb_checksum(skb, 0, skb->len, 0); + uh->check = udp_v6_check(skb->len, &ipv6h->saddr, + &ipv6h->daddr, csum); + if (uh->check == 0) + uh->check = CSUM_MANGLED_0; + + skb->ip_summed = CHECKSUM_UNNECESSARY; + + /* If there is no outer header we can fake a checksum offload + * due to the fact that we have already done the checksum in + * software prior to segmenting the frame. + */ + if (!skb->encap_hdr_csum) + features |= NETIF_F_HW_CSUM; + + /* Check if there is enough headroom to insert fragment header. */ + tnl_hlen = skb_tnl_header_len(skb); + if (skb->mac_header < (tnl_hlen + frag_hdr_sz)) { + if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz)) + goto out; + } + + /* Find the unfragmentable header and shift it left by frag_hdr_sz + * bytes to insert fragment header. + */ + err = ip6_find_1stfragopt(skb, &prevhdr); + if (err < 0) + return ERR_PTR(err); + unfrag_ip6hlen = err; + nexthdr = *prevhdr; + *prevhdr = NEXTHDR_FRAGMENT; + unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) + + unfrag_ip6hlen + tnl_hlen; + packet_start = (u8 *) skb->head + SKB_GSO_CB(skb)->mac_offset; + memmove(packet_start-frag_hdr_sz, packet_start, unfrag_len); + + SKB_GSO_CB(skb)->mac_offset -= frag_hdr_sz; + skb->mac_header -= frag_hdr_sz; + skb->network_header -= frag_hdr_sz; + + fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen); + fptr->nexthdr = nexthdr; + fptr->reserved = 0; + fptr->identification = ipv6_proxy_select_ident(dev_net(skb->dev), skb); + + /* Fragment the skb. ipv6 header and the remaining fields of the + * fragment header are updated in ipv6_gso_segment() + */ + segs = skb_segment(skb, features); + } +out: return segs; } @@ -75,7 +154,7 @@ static int udp6_gro_complete(struct sk_buff *skb, int nhoff) static const struct net_offload udpv6_offload = { .callbacks = { - .gso_segment = udp6_tunnel_segment, + .gso_segment = udp6_ufo_fragment, .gro_receive = udp6_gro_receive, .gro_complete = udp6_gro_complete, }, diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 0b750a22c4b9..d4e98f20fc2a 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1625,60 +1625,30 @@ static struct proto kcm_proto = { }; /* Clone a kcm socket. */ -static int kcm_clone(struct socket *osock, struct kcm_clone *info, - struct socket **newsockp) +static struct file *kcm_clone(struct socket *osock) { struct socket *newsock; struct sock *newsk; - struct file *newfile; - int err, newfd; - err = -ENFILE; newsock = sock_alloc(); if (!newsock) - goto out; + return ERR_PTR(-ENFILE); newsock->type = osock->type; newsock->ops = osock->ops; __module_get(newsock->ops->owner); - newfd = get_unused_fd_flags(0); - if (unlikely(newfd < 0)) { - err = newfd; - goto out_fd_fail; - } - - newfile = sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name); - if (IS_ERR(newfile)) { - err = PTR_ERR(newfile); - goto out_sock_alloc_fail; - } - newsk = sk_alloc(sock_net(osock->sk), PF_KCM, GFP_KERNEL, &kcm_proto, true); if (!newsk) { - err = -ENOMEM; - goto out_sk_alloc_fail; + sock_release(newsock); + return ERR_PTR(-ENOMEM); } - sock_init_data(newsock, newsk); init_kcm_sock(kcm_sk(newsk), kcm_sk(osock->sk)->mux); - fd_install(newfd, newfile); - *newsockp = newsock; - info->fd = newfd; - - return 0; - -out_sk_alloc_fail: - fput(newfile); -out_sock_alloc_fail: - put_unused_fd(newfd); -out_fd_fail: - sock_release(newsock); -out: - return err; + return sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name); } static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) @@ -1708,17 +1678,25 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) } case SIOCKCMCLONE: { struct kcm_clone info; - struct socket *newsock = NULL; - - err = kcm_clone(sock, &info, &newsock); - if (!err) { - if (copy_to_user((void __user *)arg, &info, - sizeof(info))) { - err = -EFAULT; - sys_close(info.fd); - } - } + struct file *file; + + info.fd = get_unused_fd_flags(0); + if (unlikely(info.fd < 0)) + return info.fd; + file = kcm_clone(sock); + if (IS_ERR(file)) { + put_unused_fd(info.fd); + return PTR_ERR(file); + } + if (copy_to_user((void __user *)arg, &info, + sizeof(info))) { + put_unused_fd(info.fd); + fput(file); + return -EFAULT; + } + fd_install(info.fd, file); + err = 0; break; } default: diff --git a/net/lapb/lapb_timer.c b/net/lapb/lapb_timer.c index 8bb469cb3abe..5d4ae01951b5 100644 --- a/net/lapb/lapb_timer.c +++ b/net/lapb/lapb_timer.c @@ -42,7 +42,7 @@ void lapb_start_t1timer(struct lapb_cb *lapb) { del_timer(&lapb->t1timer); - lapb->t1timer.function = (TIMER_FUNC_TYPE)lapb_t1timer_expiry; + lapb->t1timer.function = lapb_t1timer_expiry; lapb->t1timer.expires = jiffies + lapb->t1; add_timer(&lapb->t1timer); @@ -52,7 +52,7 @@ void lapb_start_t2timer(struct lapb_cb *lapb) { del_timer(&lapb->t2timer); - lapb->t2timer.function = (TIMER_FUNC_TYPE)lapb_t2timer_expiry; + lapb->t2timer.function = lapb_t2timer_expiry; lapb->t2timer.expires = jiffies + lapb->t2; add_timer(&lapb->t2timer); diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 88cc1ae935ea..d444752dbf40 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -151,21 +151,17 @@ EXPORT_SYMBOL(ieee80211_stop_rx_ba_session); * After accepting the AddBA Request we activated a timer, * resetting it after each frame that arrives from the originator. */ -static void sta_rx_agg_session_timer_expired(unsigned long data) +static void sta_rx_agg_session_timer_expired(struct timer_list *t) { - /* not an elegant detour, but there is no choice as the timer passes - * only one argument, and various sta_info are needed here, so init - * flow in sta_info_create gives the TID as data, while the timer_to_id - * array gives the sta through container_of */ - u8 *ptid = (u8 *)data; - u8 *timer_to_id = ptid - *ptid; - struct sta_info *sta = container_of(timer_to_id, struct sta_info, - timer_to_tid[0]); + struct tid_ampdu_rx *tid_rx_timer = + from_timer(tid_rx_timer, t, session_timer); + struct sta_info *sta = tid_rx_timer->sta; + u8 tid = tid_rx_timer->tid; struct tid_ampdu_rx *tid_rx; unsigned long timeout; rcu_read_lock(); - tid_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[*ptid]); + tid_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]); if (!tid_rx) { rcu_read_unlock(); return; @@ -180,21 +176,18 @@ static void sta_rx_agg_session_timer_expired(unsigned long data) rcu_read_unlock(); ht_dbg(sta->sdata, "RX session timer expired on %pM tid %d\n", - sta->sta.addr, (u16)*ptid); + sta->sta.addr, tid); - set_bit(*ptid, sta->ampdu_mlme.tid_rx_timer_expired); + set_bit(tid, sta->ampdu_mlme.tid_rx_timer_expired); ieee80211_queue_work(&sta->local->hw, &sta->ampdu_mlme.work); } -static void sta_rx_agg_reorder_timer_expired(unsigned long data) +static void sta_rx_agg_reorder_timer_expired(struct timer_list *t) { - u8 *ptid = (u8 *)data; - u8 *timer_to_id = ptid - *ptid; - struct sta_info *sta = container_of(timer_to_id, struct sta_info, - timer_to_tid[0]); + struct tid_ampdu_rx *tid_rx = from_timer(tid_rx, t, reorder_timer); rcu_read_lock(); - ieee80211_release_reorder_timeout(sta, *ptid); + ieee80211_release_reorder_timeout(tid_rx->sta, tid_rx->tid); rcu_read_unlock(); } @@ -356,14 +349,12 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta, spin_lock_init(&tid_agg_rx->reorder_lock); /* rx timer */ - setup_deferrable_timer(&tid_agg_rx->session_timer, - sta_rx_agg_session_timer_expired, - (unsigned long)&sta->timer_to_tid[tid]); + timer_setup(&tid_agg_rx->session_timer, + sta_rx_agg_session_timer_expired, TIMER_DEFERRABLE); /* rx reorder timer */ - setup_timer(&tid_agg_rx->reorder_timer, - sta_rx_agg_reorder_timer_expired, - (unsigned long)&sta->timer_to_tid[tid]); + timer_setup(&tid_agg_rx->reorder_timer, + sta_rx_agg_reorder_timer_expired, 0); /* prepare reordering buffer */ tid_agg_rx->reorder_buf = @@ -399,6 +390,8 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta, tid_agg_rx->auto_seq = auto_seq; tid_agg_rx->started = false; tid_agg_rx->reorder_buf_filtered = 0; + tid_agg_rx->tid = tid; + tid_agg_rx->sta = sta; status = WLAN_STATUS_SUCCESS; /* activate it for RX */ diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index bef516ec47f9..5f8ab5be369f 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -330,6 +330,11 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, spin_lock_bh(&sta->lock); + /* free struct pending for start, if present */ + tid_tx = sta->ampdu_mlme.tid_start_tx[tid]; + kfree(tid_tx); + sta->ampdu_mlme.tid_start_tx[tid] = NULL; + tid_tx = rcu_dereference_protected_tid_tx(sta, tid); if (!tid_tx) { spin_unlock_bh(&sta->lock); @@ -422,15 +427,12 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, * add Block Ack response will arrive from the recipient. * If this timer expires sta_addba_resp_timer_expired will be executed. */ -static void sta_addba_resp_timer_expired(unsigned long data) +static void sta_addba_resp_timer_expired(struct timer_list *t) { - /* not an elegant detour, but there is no choice as the timer passes - * only one argument, and both sta_info and TID are needed, so init - * flow in sta_info_create gives the TID as data, while the timer_to_id - * array gives the sta through container_of */ - u16 tid = *(u8 *)data; - struct sta_info *sta = container_of((void *)data, - struct sta_info, timer_to_tid[tid]); + struct tid_ampdu_tx *tid_tx_timer = + from_timer(tid_tx_timer, t, addba_resp_timer); + struct sta_info *sta = tid_tx_timer->sta; + u8 tid = tid_tx_timer->tid; struct tid_ampdu_tx *tid_tx; /* check if the TID waits for addBA response */ @@ -525,21 +527,17 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) * After accepting the AddBA Response we activated a timer, * resetting it after each frame that we send. */ -static void sta_tx_agg_session_timer_expired(unsigned long data) +static void sta_tx_agg_session_timer_expired(struct timer_list *t) { - /* not an elegant detour, but there is no choice as the timer passes - * only one argument, and various sta_info are needed here, so init - * flow in sta_info_create gives the TID as data, while the timer_to_id - * array gives the sta through container_of */ - u8 *ptid = (u8 *)data; - u8 *timer_to_id = ptid - *ptid; - struct sta_info *sta = container_of(timer_to_id, struct sta_info, - timer_to_tid[0]); + struct tid_ampdu_tx *tid_tx_timer = + from_timer(tid_tx_timer, t, session_timer); + struct sta_info *sta = tid_tx_timer->sta; + u8 tid = tid_tx_timer->tid; struct tid_ampdu_tx *tid_tx; unsigned long timeout; rcu_read_lock(); - tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[*ptid]); + tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[tid]); if (!tid_tx || test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) { rcu_read_unlock(); return; @@ -555,9 +553,9 @@ static void sta_tx_agg_session_timer_expired(unsigned long data) rcu_read_unlock(); ht_dbg(sta->sdata, "tx session timer expired on %pM tid %d\n", - sta->sta.addr, (u16)*ptid); + sta->sta.addr, tid); - ieee80211_stop_tx_ba_session(&sta->sta, *ptid); + ieee80211_stop_tx_ba_session(&sta->sta, tid); } int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, @@ -670,16 +668,15 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, __set_bit(HT_AGG_STATE_WANT_START, &tid_tx->state); tid_tx->timeout = timeout; + tid_tx->sta = sta; + tid_tx->tid = tid; /* response timer */ - setup_timer(&tid_tx->addba_resp_timer, - sta_addba_resp_timer_expired, - (unsigned long)&sta->timer_to_tid[tid]); + timer_setup(&tid_tx->addba_resp_timer, sta_addba_resp_timer_expired, 0); /* tx timer */ - setup_deferrable_timer(&tid_tx->session_timer, - sta_tx_agg_session_timer_expired, - (unsigned long)&sta->timer_to_tid[tid]); + timer_setup(&tid_tx->session_timer, + sta_tx_agg_session_timer_expired, TIMER_DEFERRABLE); /* assign a dialog token */ sta->ampdu_mlme.dialog_token_allocator++; diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 41f5e48f8021..1621b6ab17ba 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -291,13 +291,14 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, int i; mutex_lock(&sta->ampdu_mlme.mtx); - for (i = 0; i < IEEE80211_NUM_TIDS; i++) { - ___ieee80211_stop_tx_ba_session(sta, i, reason); + for (i = 0; i < IEEE80211_NUM_TIDS; i++) ___ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT, WLAN_REASON_QSTA_LEAVE_QBSS, reason != AGG_STOP_DESTROY_STA && reason != AGG_STOP_PEER_REQUEST); - } + + for (i = 0; i < IEEE80211_NUM_TIDS; i++) + ___ieee80211_stop_tx_ba_session(sta, i, reason); mutex_unlock(&sta->ampdu_mlme.mtx); /* stopping might queue the work again - so cancel only afterwards */ diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index e9c6aa3ed05b..db07e0de9a03 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -1711,10 +1711,10 @@ void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata) sdata_unlock(sdata); } -static void ieee80211_ibss_timer(unsigned long data) +static void ieee80211_ibss_timer(struct timer_list *t) { struct ieee80211_sub_if_data *sdata = - (struct ieee80211_sub_if_data *) data; + from_timer(sdata, t, u.ibss.timer); ieee80211_queue_work(&sdata->local->hw, &sdata->work); } @@ -1723,8 +1723,7 @@ void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; - setup_timer(&ifibss->timer, ieee80211_ibss_timer, - (unsigned long) sdata); + timer_setup(&ifibss->timer, ieee80211_ibss_timer, 0); INIT_LIST_HEAD(&ifibss->incomplete_stations); spin_lock_init(&ifibss->incomplete_lock); INIT_WORK(&ifibss->csa_connection_drop_work, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 68f874e73561..885d00b41911 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1057,6 +1057,7 @@ struct tpt_led_trigger { const struct ieee80211_tpt_blink *blink_table; unsigned int blink_table_len; struct timer_list timer; + struct ieee80211_local *local; unsigned long prev_traffic; unsigned long tx_bytes, rx_bytes; unsigned int active, want; @@ -1932,7 +1933,7 @@ static inline int ieee80211_ac_from_tid(int tid) void ieee80211_dynamic_ps_enable_work(struct work_struct *work); void ieee80211_dynamic_ps_disable_work(struct work_struct *work); -void ieee80211_dynamic_ps_timer(unsigned long data); +void ieee80211_dynamic_ps_timer(struct timer_list *t); void ieee80211_send_nullfunc(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, bool powersave); diff --git a/net/mac80211/led.c b/net/mac80211/led.c index 0505845b7ab8..ba0b507ea691 100644 --- a/net/mac80211/led.c +++ b/net/mac80211/led.c @@ -248,10 +248,10 @@ static unsigned long tpt_trig_traffic(struct ieee80211_local *local, return DIV_ROUND_UP(delta, 1024 / 8); } -static void tpt_trig_timer(unsigned long data) +static void tpt_trig_timer(struct timer_list *t) { - struct ieee80211_local *local = (void *)data; - struct tpt_led_trigger *tpt_trig = local->tpt_led_trigger; + struct tpt_led_trigger *tpt_trig = from_timer(tpt_trig, t, timer); + struct ieee80211_local *local = tpt_trig->local; struct led_classdev *led_cdev; unsigned long on, off, tpt; int i; @@ -306,8 +306,9 @@ __ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw, tpt_trig->blink_table = blink_table; tpt_trig->blink_table_len = blink_table_len; tpt_trig->want = flags; + tpt_trig->local = local; - setup_timer(&tpt_trig->timer, tpt_trig_timer, (unsigned long)local); + timer_setup(&tpt_trig->timer, tpt_trig_timer, 0); local->tpt_led_trigger = tpt_trig; @@ -326,7 +327,7 @@ static void ieee80211_start_tpt_led_trig(struct ieee80211_local *local) tpt_trig_traffic(local, tpt_trig); tpt_trig->running = true; - tpt_trig_timer((unsigned long)local); + tpt_trig_timer(&tpt_trig->timer); mod_timer(&tpt_trig->timer, round_jiffies(jiffies + HZ)); } diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 8aa1f5b6a051..e054a2fd8d38 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -633,8 +633,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, ieee80211_dynamic_ps_enable_work); INIT_WORK(&local->dynamic_ps_disable_work, ieee80211_dynamic_ps_disable_work); - setup_timer(&local->dynamic_ps_timer, - ieee80211_dynamic_ps_timer, (unsigned long) local); + timer_setup(&local->dynamic_ps_timer, ieee80211_dynamic_ps_timer, 0); INIT_WORK(&local->sched_scan_stopped_work, ieee80211_sched_scan_stopped_work); diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 7a76c4a6df30..5e27364e10ac 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -37,9 +37,10 @@ void ieee80211s_stop(void) kmem_cache_destroy(rm_cache); } -static void ieee80211_mesh_housekeeping_timer(unsigned long data) +static void ieee80211_mesh_housekeeping_timer(struct timer_list *t) { - struct ieee80211_sub_if_data *sdata = (void *) data; + struct ieee80211_sub_if_data *sdata = + from_timer(sdata, t, u.mesh.housekeeping_timer); struct ieee80211_local *local = sdata->local; struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; @@ -528,18 +529,18 @@ int mesh_add_vht_oper_ie(struct ieee80211_sub_if_data *sdata, return 0; } -static void ieee80211_mesh_path_timer(unsigned long data) +static void ieee80211_mesh_path_timer(struct timer_list *t) { struct ieee80211_sub_if_data *sdata = - (struct ieee80211_sub_if_data *) data; + from_timer(sdata, t, u.mesh.mesh_path_timer); ieee80211_queue_work(&sdata->local->hw, &sdata->work); } -static void ieee80211_mesh_path_root_timer(unsigned long data) +static void ieee80211_mesh_path_root_timer(struct timer_list *t) { struct ieee80211_sub_if_data *sdata = - (struct ieee80211_sub_if_data *) data; + from_timer(sdata, t, u.mesh.mesh_path_root_timer); struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; set_bit(MESH_WORK_ROOT, &ifmsh->wrkq_flags); @@ -1442,9 +1443,8 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata) struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; static u8 zero_addr[ETH_ALEN] = {}; - setup_timer(&ifmsh->housekeeping_timer, - ieee80211_mesh_housekeeping_timer, - (unsigned long) sdata); + timer_setup(&ifmsh->housekeeping_timer, + ieee80211_mesh_housekeeping_timer, 0); ifmsh->accepting_plinks = true; atomic_set(&ifmsh->mpaths, 0); @@ -1458,12 +1458,9 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata) mesh_pathtbl_init(sdata); - setup_timer(&ifmsh->mesh_path_timer, - ieee80211_mesh_path_timer, - (unsigned long) sdata); - setup_timer(&ifmsh->mesh_path_root_timer, - ieee80211_mesh_path_root_timer, - (unsigned long) sdata); + timer_setup(&ifmsh->mesh_path_timer, ieee80211_mesh_path_timer, 0); + timer_setup(&ifmsh->mesh_path_root_timer, + ieee80211_mesh_path_root_timer, 0); INIT_LIST_HEAD(&ifmsh->preq_queue.list); skb_queue_head_init(&ifmsh->ps.bc_buf); spin_lock_init(&ifmsh->mesh_preq_queue_lock); diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 465b7853edc0..ee56f18cad3f 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -296,7 +296,7 @@ void mesh_path_tx_pending(struct mesh_path *mpath); int mesh_pathtbl_init(struct ieee80211_sub_if_data *sdata); void mesh_pathtbl_unregister(struct ieee80211_sub_if_data *sdata); int mesh_path_del(struct ieee80211_sub_if_data *sdata, const u8 *addr); -void mesh_path_timer(unsigned long data); +void mesh_path_timer(struct timer_list *t); void mesh_path_flush_by_nexthop(struct sta_info *sta); void mesh_path_discard_frame(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 146ec6c0f12f..4394463a0c2e 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -797,7 +797,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, struct mesh_path *mpath; u8 ttl, flags, hopcount; const u8 *orig_addr; - u32 orig_sn, metric, metric_txsta, interval; + u32 orig_sn, new_metric, orig_metric, last_hop_metric, interval; bool root_is_gate; ttl = rann->rann_ttl; @@ -808,7 +808,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, interval = le32_to_cpu(rann->rann_interval); hopcount = rann->rann_hopcount; hopcount++; - metric = le32_to_cpu(rann->rann_metric); + orig_metric = le32_to_cpu(rann->rann_metric); /* Ignore our own RANNs */ if (ether_addr_equal(orig_addr, sdata->vif.addr)) @@ -825,7 +825,10 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, return; } - metric_txsta = airtime_link_metric_get(local, sta); + last_hop_metric = airtime_link_metric_get(local, sta); + new_metric = orig_metric + last_hop_metric; + if (new_metric < orig_metric) + new_metric = MAX_METRIC; mpath = mesh_path_lookup(sdata, orig_addr); if (!mpath) { @@ -838,7 +841,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, } if (!(SN_LT(mpath->sn, orig_sn)) && - !(mpath->sn == orig_sn && metric < mpath->rann_metric)) { + !(mpath->sn == orig_sn && new_metric < mpath->rann_metric)) { rcu_read_unlock(); return; } @@ -856,7 +859,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, } mpath->sn = orig_sn; - mpath->rann_metric = metric + metric_txsta; + mpath->rann_metric = new_metric; mpath->is_root = true; /* Recording RANNs sender address to send individually * addressed PREQs destined for root mesh STA */ @@ -876,7 +879,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, mesh_path_sel_frame_tx(MPATH_RANN, flags, orig_addr, orig_sn, 0, NULL, 0, broadcast_addr, hopcount, ttl, interval, - metric + metric_txsta, 0, sdata); + new_metric, 0, sdata); } rcu_read_unlock(); @@ -1194,9 +1197,9 @@ endlookup: return err; } -void mesh_path_timer(unsigned long data) +void mesh_path_timer(struct timer_list *t) { - struct mesh_path *mpath = (void *) data; + struct mesh_path *mpath = from_timer(mpath, t, timer); struct ieee80211_sub_if_data *sdata = mpath->sdata; int ret; diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 97269caafecd..86c8dfef56a4 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -399,8 +399,7 @@ struct mesh_path *mesh_path_new(struct ieee80211_sub_if_data *sdata, skb_queue_head_init(&new_mpath->frame_queue); new_mpath->exp_time = jiffies; spin_lock_init(&new_mpath->state_lock); - setup_timer(&new_mpath->timer, mesh_path_timer, - (unsigned long) new_mpath); + timer_setup(&new_mpath->timer, mesh_path_timer, 0); return new_mpath; } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index e4ededa1909d..c244691deab9 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -895,7 +895,7 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local, struct ieee80211_hdr_3addr *nullfunc; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif); + skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif, true); if (!skb) return; @@ -1066,10 +1066,10 @@ void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success) } EXPORT_SYMBOL(ieee80211_chswitch_done); -static void ieee80211_chswitch_timer(unsigned long data) +static void ieee80211_chswitch_timer(struct timer_list *t) { struct ieee80211_sub_if_data *sdata = - (struct ieee80211_sub_if_data *) data; + from_timer(sdata, t, u.mgd.chswitch_timer); ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.chswitch_work); } @@ -1577,9 +1577,9 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work) } } -void ieee80211_dynamic_ps_timer(unsigned long data) +void ieee80211_dynamic_ps_timer(struct timer_list *t) { - struct ieee80211_local *local = (void *) data; + struct ieee80211_local *local = from_timer(local, t, dynamic_ps_timer); ieee80211_queue_work(&local->hw, &local->dynamic_ps_enable_work); } @@ -3711,10 +3711,10 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, sdata_unlock(sdata); } -static void ieee80211_sta_timer(unsigned long data) +static void ieee80211_sta_timer(struct timer_list *t) { struct ieee80211_sub_if_data *sdata = - (struct ieee80211_sub_if_data *) data; + from_timer(sdata, t, u.mgd.timer); ieee80211_queue_work(&sdata->local->hw, &sdata->work); } @@ -3991,10 +3991,10 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) sdata_unlock(sdata); } -static void ieee80211_sta_bcn_mon_timer(unsigned long data) +static void ieee80211_sta_bcn_mon_timer(struct timer_list *t) { struct ieee80211_sub_if_data *sdata = - (struct ieee80211_sub_if_data *) data; + from_timer(sdata, t, u.mgd.bcn_mon_timer); struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; if (sdata->vif.csa_active && !ifmgd->csa_waiting_bcn) @@ -4005,10 +4005,10 @@ static void ieee80211_sta_bcn_mon_timer(unsigned long data) &sdata->u.mgd.beacon_connection_loss_work); } -static void ieee80211_sta_conn_mon_timer(unsigned long data) +static void ieee80211_sta_conn_mon_timer(struct timer_list *t) { struct ieee80211_sub_if_data *sdata = - (struct ieee80211_sub_if_data *) data; + from_timer(sdata, t, u.mgd.conn_mon_timer); struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_local *local = sdata->local; @@ -4139,14 +4139,10 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) INIT_WORK(&ifmgd->request_smps_work, ieee80211_request_smps_mgd_work); INIT_DELAYED_WORK(&ifmgd->tdls_peer_del_work, ieee80211_tdls_peer_del_work); - setup_timer(&ifmgd->timer, ieee80211_sta_timer, - (unsigned long) sdata); - setup_timer(&ifmgd->bcn_mon_timer, ieee80211_sta_bcn_mon_timer, - (unsigned long) sdata); - setup_timer(&ifmgd->conn_mon_timer, ieee80211_sta_conn_mon_timer, - (unsigned long) sdata); - setup_timer(&ifmgd->chswitch_timer, ieee80211_chswitch_timer, - (unsigned long) sdata); + timer_setup(&ifmgd->timer, ieee80211_sta_timer, 0); + timer_setup(&ifmgd->bcn_mon_timer, ieee80211_sta_bcn_mon_timer, 0); + timer_setup(&ifmgd->conn_mon_timer, ieee80211_sta_conn_mon_timer, 0); + timer_setup(&ifmgd->chswitch_timer, ieee80211_chswitch_timer, 0); INIT_DELAYED_WORK(&ifmgd->tx_tspec_wk, ieee80211_sta_handle_tspec_ac_params_wk); diff --git a/net/mac80211/ocb.c b/net/mac80211/ocb.c index 88e6ebbbe24f..d351dc1162be 100644 --- a/net/mac80211/ocb.c +++ b/net/mac80211/ocb.c @@ -150,9 +150,10 @@ void ieee80211_ocb_work(struct ieee80211_sub_if_data *sdata) sdata_unlock(sdata); } -static void ieee80211_ocb_housekeeping_timer(unsigned long data) +static void ieee80211_ocb_housekeeping_timer(struct timer_list *t) { - struct ieee80211_sub_if_data *sdata = (void *)data; + struct ieee80211_sub_if_data *sdata = + from_timer(sdata, t, u.ocb.housekeeping_timer); struct ieee80211_local *local = sdata->local; struct ieee80211_if_ocb *ifocb = &sdata->u.ocb; @@ -165,9 +166,8 @@ void ieee80211_ocb_setup_sdata(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_ocb *ifocb = &sdata->u.ocb; - setup_timer(&ifocb->housekeeping_timer, - ieee80211_ocb_housekeeping_timer, - (unsigned long)sdata); + timer_setup(&ifocb->housekeeping_timer, + ieee80211_ocb_housekeeping_timer, 0); INIT_LIST_HEAD(&ifocb->incomplete_stations); spin_lock_init(&ifocb->incomplete_lock); } diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index a3060e55122c..0c5627f8a104 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -379,14 +379,6 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, if (sta_prepare_rate_control(local, sta, gfp)) goto free_txq; - for (i = 0; i < IEEE80211_NUM_TIDS; i++) { - /* - * timer_to_tid must be initialized with identity mapping - * to enable session_timer's data differentiation. See - * sta_rx_agg_session_timer_expired for usage. - */ - sta->timer_to_tid[i] = i; - } for (i = 0; i < IEEE80211_NUM_ACS; i++) { skb_queue_head_init(&sta->ps_tx_buf[i]); skb_queue_head_init(&sta->tx_filtered[i]); @@ -1064,9 +1056,9 @@ int sta_info_destroy_addr_bss(struct ieee80211_sub_if_data *sdata, return ret; } -static void sta_info_cleanup(unsigned long data) +static void sta_info_cleanup(struct timer_list *t) { - struct ieee80211_local *local = (struct ieee80211_local *) data; + struct ieee80211_local *local = from_timer(local, t, sta_cleanup); struct sta_info *sta; bool timer_needed = false; @@ -1098,8 +1090,7 @@ int sta_info_init(struct ieee80211_local *local) mutex_init(&local->sta_mtx); INIT_LIST_HEAD(&local->sta_list); - setup_timer(&local->sta_cleanup, sta_info_cleanup, - (unsigned long)local); + timer_setup(&local->sta_cleanup, sta_info_cleanup, 0); return 0; } diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 5c54acd10562..cd53619435b6 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -126,6 +126,8 @@ enum ieee80211_agg_stop_reason { AGG_STOP_DESTROY_STA, }; +struct sta_info; + /** * struct tid_ampdu_tx - TID aggregation information (Tx). * @@ -133,8 +135,10 @@ enum ieee80211_agg_stop_reason { * @session_timer: check if we keep Tx-ing on the TID (by timeout value) * @addba_resp_timer: timer for peer's response to addba request * @pending: pending frames queue -- use sta's spinlock to protect + * @sta: station we are attached to * @dialog_token: dialog token for aggregation session * @timeout: session timeout value to be filled in ADDBA requests + * @tid: TID number * @state: session state (see above) * @last_tx: jiffies of last tx activity * @stop_initiator: initiator of a session stop @@ -158,6 +162,7 @@ struct tid_ampdu_tx { struct timer_list session_timer; struct timer_list addba_resp_timer; struct sk_buff_head pending; + struct sta_info *sta; unsigned long state; unsigned long last_tx; u16 timeout; @@ -169,6 +174,7 @@ struct tid_ampdu_tx { u16 failed_bar_ssn; bool bar_pending; bool amsdu; + u8 tid; }; /** @@ -181,12 +187,14 @@ struct tid_ampdu_tx { * @reorder_time: jiffies when skb was added * @session_timer: check if peer keeps Tx-ing on the TID (by timeout value) * @reorder_timer: releases expired frames from the reorder buffer. + * @sta: station we are attached to * @last_rx: jiffies of last rx activity * @head_seq_num: head sequence number in reordering buffer. * @stored_mpdu_num: number of MPDUs in reordering buffer * @ssn: Starting Sequence Number expected to be aggregated. * @buf_size: buffer size for incoming A-MPDUs * @timeout: reset timer value (in TUs). + * @tid: TID number * @rcu_head: RCU head used for freeing this struct * @reorder_lock: serializes access to reorder buffer, see below. * @auto_seq: used for offloaded BA sessions to automatically pick head_seq_and @@ -208,6 +216,7 @@ struct tid_ampdu_rx { u64 reorder_buf_filtered; struct sk_buff_head *reorder_buf; unsigned long *reorder_time; + struct sta_info *sta; struct timer_list session_timer; struct timer_list reorder_timer; unsigned long last_rx; @@ -216,6 +225,7 @@ struct tid_ampdu_rx { u16 ssn; u16 buf_size; u16 timeout; + u8 tid; u8 auto_seq:1, removed:1, started:1; @@ -447,7 +457,6 @@ struct ieee80211_sta_rx_stats { * plus one for non-QoS frames) * @tid_seq: per-TID sequence numbers for sending to this STA * @ampdu_mlme: A-MPDU state machine state - * @timer_to_tid: identity mapping to ID timers * @mesh: mesh STA information * @debugfs_dir: debug filesystem directory dentry * @dead: set to true when sta is unlinked @@ -554,7 +563,6 @@ struct sta_info { * Aggregation information, locked with lock. */ struct sta_ampdu_mlme ampdu_mlme; - u8 timer_to_tid[IEEE80211_NUM_TIDS]; #ifdef CONFIG_MAC80211_DEBUGFS struct dentry *debugfs_dir; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 7b8154474b9e..3160954fc406 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -4438,13 +4438,15 @@ struct sk_buff *ieee80211_pspoll_get(struct ieee80211_hw *hw, EXPORT_SYMBOL(ieee80211_pspoll_get); struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw, - struct ieee80211_vif *vif) + struct ieee80211_vif *vif, + bool qos_ok) { struct ieee80211_hdr_3addr *nullfunc; struct ieee80211_sub_if_data *sdata; struct ieee80211_if_managed *ifmgd; struct ieee80211_local *local; struct sk_buff *skb; + bool qos = false; if (WARN_ON(vif->type != NL80211_IFTYPE_STATION)) return NULL; @@ -4453,7 +4455,17 @@ struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw, ifmgd = &sdata->u.mgd; local = sdata->local; - skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*nullfunc)); + if (qos_ok) { + struct sta_info *sta; + + rcu_read_lock(); + sta = sta_info_get(sdata, ifmgd->bssid); + qos = sta && sta->sta.wme; + rcu_read_unlock(); + } + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + + sizeof(*nullfunc) + 2); if (!skb) return NULL; @@ -4463,6 +4475,19 @@ struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw, nullfunc->frame_control = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC | IEEE80211_FCTL_TODS); + if (qos) { + __le16 qos = cpu_to_le16(7); + + BUILD_BUG_ON((IEEE80211_STYPE_QOS_NULLFUNC | + IEEE80211_STYPE_NULLFUNC) != + IEEE80211_STYPE_QOS_NULLFUNC); + nullfunc->frame_control |= + cpu_to_le16(IEEE80211_STYPE_QOS_NULLFUNC); + skb->priority = 7; + skb_set_queue_mapping(skb, IEEE80211_AC_VO); + skb_put_data(skb, &qos, sizeof(qos)); + } + memcpy(nullfunc->addr1, ifmgd->bssid, ETH_ALEN); memcpy(nullfunc->addr2, vif->addr, ETH_ALEN); memcpy(nullfunc->addr3, ifmgd->bssid, ETH_ALEN); diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index a2b904a718c6..c989211bbabc 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -184,9 +184,9 @@ report: nd->handler(nd); } -static void ncsi_channel_monitor(unsigned long data) +static void ncsi_channel_monitor(struct timer_list *t) { - struct ncsi_channel *nc = (struct ncsi_channel *)data; + struct ncsi_channel *nc = from_timer(nc, t, monitor.timer); struct ncsi_package *np = nc->package; struct ncsi_dev_priv *ndp = np->ndp; struct ncsi_channel_mode *ncm; @@ -313,8 +313,7 @@ struct ncsi_channel *ncsi_add_channel(struct ncsi_package *np, unsigned char id) nc->package = np; nc->state = NCSI_CHANNEL_INACTIVE; nc->monitor.enabled = false; - setup_timer(&nc->monitor.timer, - ncsi_channel_monitor, (unsigned long)nc); + timer_setup(&nc->monitor.timer, ncsi_channel_monitor, 0); spin_lock_init(&nc->lock); INIT_LIST_HEAD(&nc->link); for (index = 0; index < NCSI_CAP_MAX; index++) @@ -529,9 +528,9 @@ struct ncsi_dev *ncsi_find_dev(struct net_device *dev) return NULL; } -static void ncsi_request_timeout(unsigned long data) +static void ncsi_request_timeout(struct timer_list *t) { - struct ncsi_request *nr = (struct ncsi_request *)data; + struct ncsi_request *nr = from_timer(nr, t, timer); struct ncsi_dev_priv *ndp = nr->ndp; unsigned long flags; @@ -1577,9 +1576,7 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev, for (i = 0; i < ARRAY_SIZE(ndp->requests); i++) { ndp->requests[i].id = i; ndp->requests[i].ndp = ndp; - setup_timer(&ndp->requests[i].timer, - ncsi_request_timeout, - (unsigned long)&ndp->requests[i]); + timer_setup(&ndp->requests[i].timer, ncsi_request_timeout, 0); } spin_lock_irqsave(&ncsi_dev_lock, flags); diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 64778f9a8548..d6748a8a79c5 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -67,9 +67,9 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp, } EXPORT_SYMBOL_GPL(nf_ct_unlink_expect_report); -static void nf_ct_expectation_timed_out(unsigned long ul_expect) +static void nf_ct_expectation_timed_out(struct timer_list *t) { - struct nf_conntrack_expect *exp = (void *)ul_expect; + struct nf_conntrack_expect *exp = from_timer(exp, t, timeout); spin_lock_bh(&nf_conntrack_expect_lock); nf_ct_unlink_expect(exp); @@ -368,8 +368,7 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) /* two references : one for hash insert, one for the timer */ refcount_add(2, &exp->use); - setup_timer(&exp->timeout, nf_ct_expectation_timed_out, - (unsigned long)exp); + timer_setup(&exp->timeout, nf_ct_expectation_timed_out, 0); helper = rcu_dereference_protected(master_help->helper, lockdep_is_held(&nf_conntrack_expect_lock)); if (helper) { diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 66d72a8fa87f..382d49792f42 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -45,7 +45,6 @@ #include <net/netfilter/nf_conntrack_zones.h> #include <net/netfilter/nf_conntrack_timestamp.h> #include <net/netfilter/nf_conntrack_labels.h> -#include <net/netfilter/nf_conntrack_seqadj.h> #include <net/netfilter/nf_conntrack_synproxy.h> #ifdef CONFIG_NF_NAT_NEEDED #include <net/netfilter/nf_nat_core.h> diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 1f511ed0fea3..e955bec0acc6 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -151,7 +151,7 @@ instance_put(struct nfulnl_instance *inst) call_rcu_bh(&inst->rcu, nfulnl_instance_free_rcu); } -static void nfulnl_timer(unsigned long data); +static void nfulnl_timer(struct timer_list *t); static struct nfulnl_instance * instance_create(struct net *net, u_int16_t group_num, @@ -184,7 +184,7 @@ instance_create(struct net *net, u_int16_t group_num, /* needs to be two, since we _put() after creation */ refcount_set(&inst->use, 2); - setup_timer(&inst->timer, nfulnl_timer, (unsigned long)inst); + timer_setup(&inst->timer, nfulnl_timer, 0); inst->net = get_net(net); inst->peer_user_ns = user_ns; @@ -377,9 +377,9 @@ __nfulnl_flush(struct nfulnl_instance *inst) } static void -nfulnl_timer(unsigned long data) +nfulnl_timer(struct timer_list *t) { - struct nfulnl_instance *inst = (struct nfulnl_instance *)data; + struct nfulnl_instance *inst = from_timer(inst, t, timer); spin_lock_bh(&inst->lock); if (inst->skb) diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index daf45da448fa..ee3421ad108d 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -107,9 +107,9 @@ static void idletimer_tg_work(struct work_struct *work) sysfs_notify(idletimer_tg_kobj, NULL, timer->attr.attr.name); } -static void idletimer_tg_expired(unsigned long data) +static void idletimer_tg_expired(struct timer_list *t) { - struct idletimer_tg *timer = (struct idletimer_tg *) data; + struct idletimer_tg *timer = from_timer(timer, t, timer); pr_debug("timer %s expired\n", timer->attr.attr.name); @@ -143,8 +143,7 @@ static int idletimer_tg_create(struct idletimer_tg_info *info) list_add(&info->timer->entry, &idletimer_tg_list); - setup_timer(&info->timer->timer, idletimer_tg_expired, - (unsigned long) info->timer); + timer_setup(&info->timer->timer, idletimer_tg_expired, 0); info->timer->refcnt = 1; mod_timer(&info->timer->timer, diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c index 3ba31c194cce..0971634e5444 100644 --- a/net/netfilter/xt_LED.c +++ b/net/netfilter/xt_LED.c @@ -85,9 +85,10 @@ led_tg(struct sk_buff *skb, const struct xt_action_param *par) return XT_CONTINUE; } -static void led_timeout_callback(unsigned long data) +static void led_timeout_callback(struct timer_list *t) { - struct xt_led_info_internal *ledinternal = (struct xt_led_info_internal *)data; + struct xt_led_info_internal *ledinternal = from_timer(ledinternal, t, + timer); led_trigger_event(&ledinternal->netfilter_led_trigger, LED_OFF); } @@ -143,8 +144,7 @@ static int led_tg_check(const struct xt_tgchk_param *par) /* See if we need to set up a timer */ if (ledinfo->delay > 0) - setup_timer(&ledinternal->timer, led_timeout_callback, - (unsigned long)ledinternal); + timer_setup(&ledinternal->timer, led_timeout_callback, 0); list_add_tail(&ledinternal->list, &xt_led_triggers); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index b9e0ee4e22f5..79cc1bf36e4a 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -253,6 +253,9 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb, struct sock *sk = skb->sk; int ret = -ENOMEM; + if (!net_eq(dev_net(dev), sock_net(sk))) + return 0; + dev_hold(dev); if (is_vmalloc_addr(skb->head)) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 2dec3583c97d..7ed9d4422a73 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -284,7 +284,7 @@ void nr_destroy_socket(struct sock *sk) if (sk_has_allocations(sk)) { /* Defer: outstanding buffers */ - sk->sk_timer.function = (TIMER_FUNC_TYPE)nr_destroy_timer; + sk->sk_timer.function = nr_destroy_timer; sk->sk_timer.expires = jiffies + 2 * HZ; add_timer(&sk->sk_timer); } else diff --git a/net/netrom/nr_loopback.c b/net/netrom/nr_loopback.c index 989ae647825e..215ad22a9647 100644 --- a/net/netrom/nr_loopback.c +++ b/net/netrom/nr_loopback.c @@ -15,7 +15,7 @@ #include <net/netrom.h> #include <linux/init.h> -static void nr_loopback_timer(unsigned long); +static void nr_loopback_timer(struct timer_list *); static struct sk_buff_head loopback_queue; static DEFINE_TIMER(loopback_timer, nr_loopback_timer); @@ -48,7 +48,7 @@ int nr_loopback_queue(struct sk_buff *skb) return 1; } -static void nr_loopback_timer(unsigned long param) +static void nr_loopback_timer(struct timer_list *unused) { struct sk_buff *skb; ax25_address *nr_dest; diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c index 43569aea0f5e..cbd51ed5a2d7 100644 --- a/net/netrom/nr_timer.c +++ b/net/netrom/nr_timer.c @@ -45,7 +45,7 @@ void nr_init_timers(struct sock *sk) timer_setup(&nr->idletimer, nr_idletimer_expiry, 0); /* initialized by sock_init_data */ - sk->sk_timer.function = (TIMER_FUNC_TYPE)nr_heartbeat_expiry; + sk->sk_timer.function = nr_heartbeat_expiry; } void nr_start_t1timer(struct sock *sk) diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index c25e9b4179c3..074960154993 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -591,18 +591,18 @@ static int nci_close_device(struct nci_dev *ndev) } /* NCI command timer function */ -static void nci_cmd_timer(unsigned long arg) +static void nci_cmd_timer(struct timer_list *t) { - struct nci_dev *ndev = (void *) arg; + struct nci_dev *ndev = from_timer(ndev, t, cmd_timer); atomic_set(&ndev->cmd_cnt, 1); queue_work(ndev->cmd_wq, &ndev->cmd_work); } /* NCI data exchange timer function */ -static void nci_data_timer(unsigned long arg) +static void nci_data_timer(struct timer_list *t) { - struct nci_dev *ndev = (void *) arg; + struct nci_dev *ndev = from_timer(ndev, t, data_timer); set_bit(NCI_DATA_EXCHANGE_TO, &ndev->flags); queue_work(ndev->rx_wq, &ndev->rx_work); @@ -1232,10 +1232,8 @@ int nci_register_device(struct nci_dev *ndev) skb_queue_head_init(&ndev->rx_q); skb_queue_head_init(&ndev->tx_q); - setup_timer(&ndev->cmd_timer, nci_cmd_timer, - (unsigned long) ndev); - setup_timer(&ndev->data_timer, nci_data_timer, - (unsigned long) ndev); + timer_setup(&ndev->cmd_timer, nci_cmd_timer, 0); + timer_setup(&ndev->data_timer, nci_data_timer, 0); mutex_init(&ndev->req_lock); INIT_LIST_HEAD(&ndev->conn_info_list); diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 0dab33fb9844..ef38e5aecd28 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -308,6 +308,8 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info, uint32_t cutlen) { + unsigned int gso_type = skb_shinfo(skb)->gso_type; + struct sw_flow_key later_key; struct sk_buff *segs, *nskb; int err; @@ -318,9 +320,21 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, if (segs == NULL) return -EINVAL; + if (gso_type & SKB_GSO_UDP) { + /* The initial flow key extracted by ovs_flow_key_extract() + * in this case is for a first fragment, so we need to + * properly mark later fragments. + */ + later_key = *key; + later_key.ip.frag = OVS_FRAG_TYPE_LATER; + } + /* Queue all of the segments. */ skb = segs; do { + if (gso_type & SKB_GSO_UDP && skb != segs) + key = &later_key; + err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen); if (err) break; diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 864ddb1e3642..dbe2379329c5 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -631,7 +631,8 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) key->ip.frag = OVS_FRAG_TYPE_LATER; return 0; } - if (nh->frag_off & htons(IP_MF)) + if (nh->frag_off & htons(IP_MF) || + skb_shinfo(skb)->gso_type & SKB_GSO_UDP) key->ip.frag = OVS_FRAG_TYPE_FIRST; else key->ip.frag = OVS_FRAG_TYPE_NONE; @@ -747,6 +748,9 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) if (key->ip.frag == OVS_FRAG_TYPE_LATER) return 0; + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP) + key->ip.frag = OVS_FRAG_TYPE_FIRST; + /* Transport layer. */ if (key->ip.proto == NEXTHDR_TCP) { if (tcphdr_ok(skb)) { diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index dc424798ba6f..624ea74353dd 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2241,14 +2241,11 @@ int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb) #define MAX_ACTIONS_BUFSIZE (32 * 1024) -static struct sw_flow_actions *nla_alloc_flow_actions(int size, bool log) +static struct sw_flow_actions *nla_alloc_flow_actions(int size) { struct sw_flow_actions *sfa; - if (size > MAX_ACTIONS_BUFSIZE) { - OVS_NLERR(log, "Flow action size %u bytes exceeds max", size); - return ERR_PTR(-EINVAL); - } + WARN_ON_ONCE(size > MAX_ACTIONS_BUFSIZE); sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL); if (!sfa) @@ -2321,12 +2318,15 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, new_acts_size = ksize(*sfa) * 2; if (new_acts_size > MAX_ACTIONS_BUFSIZE) { - if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) + if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) { + OVS_NLERR(log, "Flow action size exceeds max %u", + MAX_ACTIONS_BUFSIZE); return ERR_PTR(-EMSGSIZE); + } new_acts_size = MAX_ACTIONS_BUFSIZE; } - acts = nla_alloc_flow_actions(new_acts_size, log); + acts = nla_alloc_flow_actions(new_acts_size); if (IS_ERR(acts)) return (void *)acts; @@ -3059,7 +3059,7 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, { int err; - *sfa = nla_alloc_flow_actions(nla_len(attr), log); + *sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE)); if (IS_ERR(*sfa)) return PTR_ERR(*sfa); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 737092ca9b4e..da215e5c1399 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1687,7 +1687,6 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) atomic_long_set(&rollover->num, 0); atomic_long_set(&rollover->num_huge, 0); atomic_long_set(&rollover->num_failed, 0); - po->rollover = rollover; } if (type_flags & PACKET_FANOUT_FLAG_UNIQUEID) { @@ -1745,6 +1744,8 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) if (refcount_read(&match->sk_ref) < PACKET_FANOUT_MAX) { __dev_remove_pack(&po->prot_hook); po->fanout = match; + po->rollover = rollover; + rollover = NULL; refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1); __fanout_link(sk, po); err = 0; @@ -1758,10 +1759,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) } out: - if (err && rollover) { - kfree_rcu(rollover, rcu); - po->rollover = NULL; - } + kfree(rollover); mutex_unlock(&fanout_mutex); return err; } @@ -1785,11 +1783,6 @@ static struct packet_fanout *fanout_release(struct sock *sk) list_del(&f->list); else f = NULL; - - if (po->rollover) { - kfree_rcu(po->rollover, rcu); - po->rollover = NULL; - } } mutex_unlock(&fanout_mutex); @@ -3029,6 +3022,7 @@ static int packet_release(struct socket *sock) synchronize_net(); if (f) { + kfree(po->rollover); fanout_release_data(f); kfree(f); } @@ -3097,6 +3091,10 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, if (need_rehook) { if (po->running) { rcu_read_unlock(); + /* prevents packet_notifier() from calling + * register_prot_hook() + */ + po->num = 0; __unregister_prot_hook(sk, true); rcu_read_lock(); dev_curr = po->prot_hook.dev; @@ -3105,6 +3103,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, dev->ifindex); } + BUG_ON(po->running); po->num = proto; po->prot_hook.type = proto; @@ -3843,7 +3842,6 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, void *data = &val; union tpacket_stats_u st; struct tpacket_rollover_stats rstats; - struct packet_rollover *rollover; if (level != SOL_PACKET) return -ENOPROTOOPT; @@ -3922,18 +3920,13 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, 0); break; case PACKET_ROLLOVER_STATS: - rcu_read_lock(); - rollover = rcu_dereference(po->rollover); - if (rollover) { - rstats.tp_all = atomic_long_read(&rollover->num); - rstats.tp_huge = atomic_long_read(&rollover->num_huge); - rstats.tp_failed = atomic_long_read(&rollover->num_failed); - data = &rstats; - lv = sizeof(rstats); - } - rcu_read_unlock(); - if (!rollover) + if (!po->rollover) return -EINVAL; + rstats.tp_all = atomic_long_read(&po->rollover->num); + rstats.tp_huge = atomic_long_read(&po->rollover->num_huge); + rstats.tp_failed = atomic_long_read(&po->rollover->num_failed); + data = &rstats; + lv = sizeof(rstats); break; case PACKET_TX_HAS_OFF: val = po->tp_tx_has_off; diff --git a/net/packet/internal.h b/net/packet/internal.h index 562fbc155006..a1d2b2319ae9 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -95,7 +95,6 @@ struct packet_fanout { struct packet_rollover { int sock; - struct rcu_head rcu; atomic_long_t num; atomic_long_t num_huge; atomic_long_t num_failed; diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 8886f15abe90..bc2f1e0977d6 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -183,7 +183,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, long i; int ret; - if (rs->rs_bound_addr == 0) { + if (rs->rs_bound_addr == 0 || !rs->rs_transport) { ret = -ENOTCONN; /* XXX not a great errno */ goto out; } diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c index cda4c6678ef1..62055d3069d2 100644 --- a/net/rose/rose_link.c +++ b/net/rose/rose_link.c @@ -37,7 +37,7 @@ void rose_start_ftimer(struct rose_neigh *neigh) { del_timer(&neigh->ftimer); - neigh->ftimer.function = (TIMER_FUNC_TYPE)rose_ftimer_expiry; + neigh->ftimer.function = rose_ftimer_expiry; neigh->ftimer.expires = jiffies + msecs_to_jiffies(sysctl_rose_link_fail_timeout); @@ -48,7 +48,7 @@ static void rose_start_t0timer(struct rose_neigh *neigh) { del_timer(&neigh->t0timer); - neigh->t0timer.function = (TIMER_FUNC_TYPE)rose_t0timer_expiry; + neigh->t0timer.function = rose_t0timer_expiry; neigh->t0timer.expires = jiffies + msecs_to_jiffies(sysctl_rose_restart_request_timeout); diff --git a/net/rose/rose_timer.c b/net/rose/rose_timer.c index ea613b2a9735..74555fb95615 100644 --- a/net/rose/rose_timer.c +++ b/net/rose/rose_timer.c @@ -36,7 +36,7 @@ void rose_start_heartbeat(struct sock *sk) { del_timer(&sk->sk_timer); - sk->sk_timer.function = (TIMER_FUNC_TYPE)rose_heartbeat_expiry; + sk->sk_timer.function = rose_heartbeat_expiry; sk->sk_timer.expires = jiffies + 5 * HZ; add_timer(&sk->sk_timer); @@ -48,7 +48,7 @@ void rose_start_t1timer(struct sock *sk) del_timer(&rose->timer); - rose->timer.function = (TIMER_FUNC_TYPE)rose_timer_expiry; + rose->timer.function = rose_timer_expiry; rose->timer.expires = jiffies + rose->t1; add_timer(&rose->timer); @@ -60,7 +60,7 @@ void rose_start_t2timer(struct sock *sk) del_timer(&rose->timer); - rose->timer.function = (TIMER_FUNC_TYPE)rose_timer_expiry; + rose->timer.function = rose_timer_expiry; rose->timer.expires = jiffies + rose->t2; add_timer(&rose->timer); @@ -72,7 +72,7 @@ void rose_start_t3timer(struct sock *sk) del_timer(&rose->timer); - rose->timer.function = (TIMER_FUNC_TYPE)rose_timer_expiry; + rose->timer.function = rose_timer_expiry; rose->timer.expires = jiffies + rose->t3; add_timer(&rose->timer); @@ -84,7 +84,7 @@ void rose_start_hbtimer(struct sock *sk) del_timer(&rose->timer); - rose->timer.function = (TIMER_FUNC_TYPE)rose_timer_expiry; + rose->timer.function = rose_timer_expiry; rose->timer.expires = jiffies + rose->hb; add_timer(&rose->timer); @@ -97,7 +97,7 @@ void rose_start_idletimer(struct sock *sk) del_timer(&rose->idletimer); if (rose->idle > 0) { - rose->idletimer.function = (TIMER_FUNC_TYPE)rose_idletimer_expiry; + rose->idletimer.function = rose_idletimer_expiry; rose->idletimer.expires = jiffies + rose->idle; add_timer(&rose->idletimer); diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 9b5c46b052fd..dcd818fa837e 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -285,6 +285,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, bool upgrade) { struct rxrpc_conn_parameters cp; + struct rxrpc_call_params p; struct rxrpc_call *call; struct rxrpc_sock *rx = rxrpc_sk(sock->sk); int ret; @@ -302,6 +303,10 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, if (key && !key->payload.data[0]) key = NULL; /* a no-security key */ + memset(&p, 0, sizeof(p)); + p.user_call_ID = user_call_ID; + p.tx_total_len = tx_total_len; + memset(&cp, 0, sizeof(cp)); cp.local = rx->local; cp.key = key; @@ -309,8 +314,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, cp.exclusive = false; cp.upgrade = upgrade; cp.service_id = srx->srx_service; - call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, tx_total_len, - gfp); + call = rxrpc_new_client_call(rx, &cp, srx, &p, gfp); /* The socket has been unlocked. */ if (!IS_ERR(call)) { call->notify_rx = notify_rx; @@ -856,6 +860,7 @@ static void rxrpc_sock_destructor(struct sock *sk) static int rxrpc_release_sock(struct sock *sk) { struct rxrpc_sock *rx = rxrpc_sk(sk); + struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk)); _enter("%p{%d,%d}", sk, sk->sk_state, refcount_read(&sk->sk_refcnt)); @@ -863,6 +868,19 @@ static int rxrpc_release_sock(struct sock *sk) sock_orphan(sk); sk->sk_shutdown = SHUTDOWN_MASK; + /* We want to kill off all connections from a service socket + * as fast as possible because we can't share these; client + * sockets, on the other hand, can share an endpoint. + */ + switch (sk->sk_state) { + case RXRPC_SERVER_BOUND: + case RXRPC_SERVER_BOUND2: + case RXRPC_SERVER_LISTENING: + case RXRPC_SERVER_LISTEN_DISABLED: + rx->local->service_closed = true; + break; + } + spin_lock_bh(&sk->sk_receive_queue.lock); sk->sk_state = RXRPC_CLOSE; spin_unlock_bh(&sk->sk_receive_queue.lock); @@ -878,6 +896,8 @@ static int rxrpc_release_sock(struct sock *sk) rxrpc_release_calls_on_socket(rx); flush_workqueue(rxrpc_workqueue); rxrpc_purge_queue(&sk->sk_receive_queue); + rxrpc_queue_work(&rxnet->service_conn_reaper); + rxrpc_queue_work(&rxnet->client_conn_reaper); rxrpc_put_local(rx->local); rx->local = NULL; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index b2151993d384..416688381eb7 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -79,17 +79,20 @@ struct rxrpc_net { struct list_head conn_proc_list; /* List of conns in this namespace for proc */ struct list_head service_conns; /* Service conns in this namespace */ rwlock_t conn_lock; /* Lock for ->conn_proc_list, ->service_conns */ - struct delayed_work service_conn_reaper; + struct work_struct service_conn_reaper; + struct timer_list service_conn_reap_timer; unsigned int nr_client_conns; unsigned int nr_active_client_conns; bool kill_all_client_conns; + bool live; spinlock_t client_conn_cache_lock; /* Lock for ->*_client_conns */ spinlock_t client_conn_discard_lock; /* Prevent multiple discarders */ struct list_head waiting_client_conns; struct list_head active_client_conns; struct list_head idle_client_conns; - struct delayed_work client_conn_reaper; + struct work_struct client_conn_reaper; + struct timer_list client_conn_reap_timer; struct list_head local_endpoints; struct mutex local_mutex; /* Lock for ->local_endpoints */ @@ -265,6 +268,7 @@ struct rxrpc_local { rwlock_t services_lock; /* lock for services list */ int debug_id; /* debug ID for printks */ bool dead; + bool service_closed; /* Service socket closed */ struct sockaddr_rxrpc srx; /* local address */ }; @@ -338,8 +342,17 @@ enum rxrpc_conn_flag { RXRPC_CONN_DONT_REUSE, /* Don't reuse this connection */ RXRPC_CONN_COUNTED, /* Counted by rxrpc_nr_client_conns */ RXRPC_CONN_PROBING_FOR_UPGRADE, /* Probing for service upgrade */ + RXRPC_CONN_FINAL_ACK_0, /* Need final ACK for channel 0 */ + RXRPC_CONN_FINAL_ACK_1, /* Need final ACK for channel 1 */ + RXRPC_CONN_FINAL_ACK_2, /* Need final ACK for channel 2 */ + RXRPC_CONN_FINAL_ACK_3, /* Need final ACK for channel 3 */ }; +#define RXRPC_CONN_FINAL_ACK_MASK ((1UL << RXRPC_CONN_FINAL_ACK_0) | \ + (1UL << RXRPC_CONN_FINAL_ACK_1) | \ + (1UL << RXRPC_CONN_FINAL_ACK_2) | \ + (1UL << RXRPC_CONN_FINAL_ACK_3)) + /* * Events that can be raised upon a connection. */ @@ -393,6 +406,7 @@ struct rxrpc_connection { #define RXRPC_ACTIVE_CHANS_MASK ((1 << RXRPC_MAXCALLS) - 1) struct list_head waiting_calls; /* Calls waiting for channels */ struct rxrpc_channel { + unsigned long final_ack_at; /* Time at which to issue final ACK */ struct rxrpc_call __rcu *call; /* Active call */ u32 call_id; /* ID of current call */ u32 call_counter; /* Call ID counter */ @@ -404,6 +418,7 @@ struct rxrpc_connection { }; } channels[RXRPC_MAXCALLS]; + struct timer_list timer; /* Conn event timer */ struct work_struct processor; /* connection event processor */ union { struct rb_node client_node; /* Node in local->client_conns */ @@ -457,9 +472,10 @@ enum rxrpc_call_flag { enum rxrpc_call_event { RXRPC_CALL_EV_ACK, /* need to generate ACK */ RXRPC_CALL_EV_ABORT, /* need to generate abort */ - RXRPC_CALL_EV_TIMER, /* Timer expired */ RXRPC_CALL_EV_RESEND, /* Tx resend required */ RXRPC_CALL_EV_PING, /* Ping send required */ + RXRPC_CALL_EV_EXPIRED, /* Expiry occurred */ + RXRPC_CALL_EV_ACK_LOST, /* ACK may be lost, send ping */ }; /* @@ -503,10 +519,16 @@ struct rxrpc_call { struct rxrpc_peer *peer; /* Peer record for remote address */ struct rxrpc_sock __rcu *socket; /* socket responsible */ struct mutex user_mutex; /* User access mutex */ - ktime_t ack_at; /* When deferred ACK needs to happen */ - ktime_t resend_at; /* When next resend needs to happen */ - ktime_t ping_at; /* When next to send a ping */ - ktime_t expire_at; /* When the call times out */ + unsigned long ack_at; /* When deferred ACK needs to happen */ + unsigned long ack_lost_at; /* When ACK is figured as lost */ + unsigned long resend_at; /* When next resend needs to happen */ + unsigned long ping_at; /* When next to send a ping */ + unsigned long keepalive_at; /* When next to send a keepalive ping */ + unsigned long expect_rx_by; /* When we expect to get a packet by */ + unsigned long expect_req_by; /* When we expect to get a request DATA packet by */ + unsigned long expect_term_by; /* When we expect call termination by */ + u32 next_rx_timo; /* Timeout for next Rx packet (jif) */ + u32 next_req_timo; /* Timeout for next Rx request packet (jif) */ struct timer_list timer; /* Combined event timer */ struct work_struct processor; /* Event processor */ rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */ @@ -609,6 +631,8 @@ struct rxrpc_call { ktime_t acks_latest_ts; /* Timestamp of latest ACK received */ rxrpc_serial_t acks_latest; /* serial number of latest ACK received */ rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */ + rxrpc_seq_t acks_lost_top; /* tx_top at the time lost-ack ping sent */ + rxrpc_serial_t acks_lost_ping; /* Serial number of probe ACK */ }; /* @@ -632,6 +656,35 @@ struct rxrpc_ack_summary { u8 cumulative_acks; }; +/* + * sendmsg() cmsg-specified parameters. + */ +enum rxrpc_command { + RXRPC_CMD_SEND_DATA, /* send data message */ + RXRPC_CMD_SEND_ABORT, /* request abort generation */ + RXRPC_CMD_ACCEPT, /* [server] accept incoming call */ + RXRPC_CMD_REJECT_BUSY, /* [server] reject a call as busy */ +}; + +struct rxrpc_call_params { + s64 tx_total_len; /* Total Tx data length (if send data) */ + unsigned long user_call_ID; /* User's call ID */ + struct { + u32 hard; /* Maximum lifetime (sec) */ + u32 idle; /* Max time since last data packet (msec) */ + u32 normal; /* Max time since last call packet (msec) */ + } timeouts; + u8 nr_timeouts; /* Number of timeouts specified */ +}; + +struct rxrpc_send_params { + struct rxrpc_call_params call; + u32 abort_code; /* Abort code to Tx (if abort) */ + enum rxrpc_command command : 8; /* The command to implement */ + bool exclusive; /* Shared or exclusive call */ + bool upgrade; /* If the connection is upgradeable */ +}; + #include <trace/events/rxrpc.h> /* @@ -657,12 +710,19 @@ int rxrpc_reject_call(struct rxrpc_sock *); /* * call_event.c */ -void __rxrpc_set_timer(struct rxrpc_call *, enum rxrpc_timer_trace, ktime_t); -void rxrpc_set_timer(struct rxrpc_call *, enum rxrpc_timer_trace, ktime_t); void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool, enum rxrpc_propose_ack_trace); void rxrpc_process_call(struct work_struct *); +static inline void rxrpc_reduce_call_timer(struct rxrpc_call *call, + unsigned long expire_at, + unsigned long now, + enum rxrpc_timer_trace why) +{ + trace_rxrpc_timer(call, why, now); + timer_reduce(&call->timer, expire_at); +} + /* * call_object.c */ @@ -672,11 +732,11 @@ extern unsigned int rxrpc_max_call_lifetime; extern struct kmem_cache *rxrpc_call_jar; struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long); -struct rxrpc_call *rxrpc_alloc_call(gfp_t); +struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *, gfp_t); struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *, struct rxrpc_conn_parameters *, struct sockaddr_rxrpc *, - unsigned long, s64, gfp_t); + struct rxrpc_call_params *, gfp_t); int rxrpc_retry_client_call(struct rxrpc_sock *, struct rxrpc_call *, struct rxrpc_conn_parameters *, @@ -803,8 +863,8 @@ static inline bool __rxrpc_abort_eproto(struct rxrpc_call *call, */ extern unsigned int rxrpc_max_client_connections; extern unsigned int rxrpc_reap_client_connections; -extern unsigned int rxrpc_conn_idle_client_expiry; -extern unsigned int rxrpc_conn_idle_client_fast_expiry; +extern unsigned long rxrpc_conn_idle_client_expiry; +extern unsigned long rxrpc_conn_idle_client_fast_expiry; extern struct idr rxrpc_client_conn_ids; void rxrpc_destroy_client_conn_ids(void); @@ -825,6 +885,7 @@ void rxrpc_process_connection(struct work_struct *); * conn_object.c */ extern unsigned int rxrpc_connection_expiry; +extern unsigned int rxrpc_closed_conn_expiry; struct rxrpc_connection *rxrpc_alloc_connection(gfp_t); struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *, @@ -861,6 +922,12 @@ static inline void rxrpc_put_connection(struct rxrpc_connection *conn) rxrpc_put_service_conn(conn); } +static inline void rxrpc_reduce_conn_timer(struct rxrpc_connection *conn, + unsigned long expire_at) +{ + timer_reduce(&conn->timer, expire_at); +} + /* * conn_service.c */ @@ -930,13 +997,13 @@ static inline void rxrpc_queue_local(struct rxrpc_local *local) * misc.c */ extern unsigned int rxrpc_max_backlog __read_mostly; -extern unsigned int rxrpc_requested_ack_delay; -extern unsigned int rxrpc_soft_ack_delay; -extern unsigned int rxrpc_idle_ack_delay; +extern unsigned long rxrpc_requested_ack_delay; +extern unsigned long rxrpc_soft_ack_delay; +extern unsigned long rxrpc_idle_ack_delay; extern unsigned int rxrpc_rx_window_size; extern unsigned int rxrpc_rx_mtu; extern unsigned int rxrpc_rx_jumbo_max; -extern unsigned int rxrpc_resend_timeout; +extern unsigned long rxrpc_resend_timeout; extern const s8 rxrpc_ack_priority[]; @@ -954,7 +1021,7 @@ static inline struct rxrpc_net *rxrpc_net(struct net *net) /* * output.c */ -int rxrpc_send_ack_packet(struct rxrpc_call *, bool); +int rxrpc_send_ack_packet(struct rxrpc_call *, bool, rxrpc_serial_t *); int rxrpc_send_abort_packet(struct rxrpc_call *); int rxrpc_send_data_packet(struct rxrpc_call *, struct sk_buff *, bool); void rxrpc_reject_packets(struct rxrpc_local *); diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index cbd1701e813a..3028298ca561 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -94,7 +94,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, /* Now it gets complicated, because calls get registered with the * socket here, particularly if a user ID is preassigned by the user. */ - call = rxrpc_alloc_call(gfp); + call = rxrpc_alloc_call(rx, gfp); if (!call) return -ENOMEM; call->flags |= (1 << RXRPC_CALL_IS_SERVICE); diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 3574508baf9a..ad2ab1103189 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -22,80 +22,6 @@ #include "ar-internal.h" /* - * Set the timer - */ -void __rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why, - ktime_t now) -{ - unsigned long t_j, now_j = jiffies; - ktime_t t; - bool queue = false; - - if (call->state < RXRPC_CALL_COMPLETE) { - t = call->expire_at; - if (!ktime_after(t, now)) { - trace_rxrpc_timer(call, why, now, now_j); - queue = true; - goto out; - } - - if (!ktime_after(call->resend_at, now)) { - call->resend_at = call->expire_at; - if (!test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) - queue = true; - } else if (ktime_before(call->resend_at, t)) { - t = call->resend_at; - } - - if (!ktime_after(call->ack_at, now)) { - call->ack_at = call->expire_at; - if (!test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events)) - queue = true; - } else if (ktime_before(call->ack_at, t)) { - t = call->ack_at; - } - - if (!ktime_after(call->ping_at, now)) { - call->ping_at = call->expire_at; - if (!test_and_set_bit(RXRPC_CALL_EV_PING, &call->events)) - queue = true; - } else if (ktime_before(call->ping_at, t)) { - t = call->ping_at; - } - - t_j = nsecs_to_jiffies(ktime_to_ns(ktime_sub(t, now))); - t_j += jiffies; - - /* We have to make sure that the calculated jiffies value falls - * at or after the nsec value, or we may loop ceaselessly - * because the timer times out, but we haven't reached the nsec - * timeout yet. - */ - t_j++; - - if (call->timer.expires != t_j || !timer_pending(&call->timer)) { - mod_timer(&call->timer, t_j); - trace_rxrpc_timer(call, why, now, now_j); - } - } - -out: - if (queue) - rxrpc_queue_call(call); -} - -/* - * Set the timer - */ -void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why, - ktime_t now) -{ - read_lock_bh(&call->state_lock); - __rxrpc_set_timer(call, why, now); - read_unlock_bh(&call->state_lock); -} - -/* * Propose a PING ACK be sent. */ static void rxrpc_propose_ping(struct rxrpc_call *call, @@ -106,12 +32,13 @@ static void rxrpc_propose_ping(struct rxrpc_call *call, !test_and_set_bit(RXRPC_CALL_EV_PING, &call->events)) rxrpc_queue_call(call); } else { - ktime_t now = ktime_get_real(); - ktime_t ping_at = ktime_add_ms(now, rxrpc_idle_ack_delay); + unsigned long now = jiffies; + unsigned long ping_at = now + rxrpc_idle_ack_delay; - if (ktime_before(ping_at, call->ping_at)) { - call->ping_at = ping_at; - rxrpc_set_timer(call, rxrpc_timer_set_for_ping, now); + if (time_before(ping_at, call->ping_at)) { + WRITE_ONCE(call->ping_at, ping_at); + rxrpc_reduce_call_timer(call, ping_at, now, + rxrpc_timer_set_for_ping); } } } @@ -125,8 +52,7 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, enum rxrpc_propose_ack_trace why) { enum rxrpc_propose_ack_outcome outcome = rxrpc_propose_ack_use; - unsigned int expiry = rxrpc_soft_ack_delay; - ktime_t now, ack_at; + unsigned long expiry = rxrpc_soft_ack_delay; s8 prior = rxrpc_ack_priority[ack_reason]; /* Pings are handled specially because we don't want to accidentally @@ -190,11 +116,18 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, background) rxrpc_queue_call(call); } else { - now = ktime_get_real(); - ack_at = ktime_add_ms(now, expiry); - if (ktime_before(ack_at, call->ack_at)) { - call->ack_at = ack_at; - rxrpc_set_timer(call, rxrpc_timer_set_for_ack, now); + unsigned long now = jiffies, ack_at; + + if (call->peer->rtt_usage > 0) + ack_at = nsecs_to_jiffies(call->peer->rtt); + else + ack_at = expiry; + + ack_at += now; + if (time_before(ack_at, call->ack_at)) { + WRITE_ONCE(call->ack_at, ack_at); + rxrpc_reduce_call_timer(call, ack_at, now, + rxrpc_timer_set_for_ack); } } @@ -227,18 +160,28 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call) /* * Perform retransmission of NAK'd and unack'd packets. */ -static void rxrpc_resend(struct rxrpc_call *call, ktime_t now) +static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) { struct rxrpc_skb_priv *sp; struct sk_buff *skb; + unsigned long resend_at; rxrpc_seq_t cursor, seq, top; - ktime_t max_age, oldest, ack_ts; + ktime_t now, max_age, oldest, ack_ts, timeout, min_timeo; int ix; u8 annotation, anno_type, retrans = 0, unacked = 0; _enter("{%d,%d}", call->tx_hard_ack, call->tx_top); - max_age = ktime_sub_ms(now, rxrpc_resend_timeout); + if (call->peer->rtt_usage > 1) + timeout = ns_to_ktime(call->peer->rtt * 3 / 2); + else + timeout = ms_to_ktime(rxrpc_resend_timeout); + min_timeo = ns_to_ktime((1000000000 / HZ) * 4); + if (ktime_before(timeout, min_timeo)) + timeout = min_timeo; + + now = ktime_get_real(); + max_age = ktime_sub(now, timeout); spin_lock_bh(&call->lock); @@ -282,7 +225,9 @@ static void rxrpc_resend(struct rxrpc_call *call, ktime_t now) ktime_to_ns(ktime_sub(skb->tstamp, max_age))); } - call->resend_at = ktime_add_ms(oldest, rxrpc_resend_timeout); + resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(oldest, now))); + resend_at += jiffies + rxrpc_resend_timeout; + WRITE_ONCE(call->resend_at, resend_at); if (unacked) rxrpc_congestion_timeout(call); @@ -292,14 +237,15 @@ static void rxrpc_resend(struct rxrpc_call *call, ktime_t now) * retransmitting data. */ if (!retrans) { - rxrpc_set_timer(call, rxrpc_timer_set_for_resend, now); + rxrpc_reduce_call_timer(call, resend_at, now, + rxrpc_timer_set_for_resend); spin_unlock_bh(&call->lock); ack_ts = ktime_sub(now, call->acks_latest_ts); if (ktime_to_ns(ack_ts) < call->peer->rtt) goto out; rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false, rxrpc_propose_ack_ping_for_lost_ack); - rxrpc_send_ack_packet(call, true); + rxrpc_send_ack_packet(call, true, NULL); goto out; } @@ -364,7 +310,8 @@ void rxrpc_process_call(struct work_struct *work) { struct rxrpc_call *call = container_of(work, struct rxrpc_call, processor); - ktime_t now; + rxrpc_serial_t *send_ack; + unsigned long now, next, t; rxrpc_see_call(call); @@ -384,22 +331,89 @@ recheck_state: goto out_put; } - now = ktime_get_real(); - if (ktime_before(call->expire_at, now)) { + /* Work out if any timeouts tripped */ + now = jiffies; + t = READ_ONCE(call->expect_rx_by); + if (time_after_eq(now, t)) { + trace_rxrpc_timer(call, rxrpc_timer_exp_normal, now); + set_bit(RXRPC_CALL_EV_EXPIRED, &call->events); + } + + t = READ_ONCE(call->expect_req_by); + if (call->state == RXRPC_CALL_SERVER_RECV_REQUEST && + time_after_eq(now, t)) { + trace_rxrpc_timer(call, rxrpc_timer_exp_idle, now); + set_bit(RXRPC_CALL_EV_EXPIRED, &call->events); + } + + t = READ_ONCE(call->expect_term_by); + if (time_after_eq(now, t)) { + trace_rxrpc_timer(call, rxrpc_timer_exp_hard, now); + set_bit(RXRPC_CALL_EV_EXPIRED, &call->events); + } + + t = READ_ONCE(call->ack_at); + if (time_after_eq(now, t)) { + trace_rxrpc_timer(call, rxrpc_timer_exp_ack, now); + cmpxchg(&call->ack_at, t, now + MAX_JIFFY_OFFSET); + set_bit(RXRPC_CALL_EV_ACK, &call->events); + } + + t = READ_ONCE(call->ack_lost_at); + if (time_after_eq(now, t)) { + trace_rxrpc_timer(call, rxrpc_timer_exp_lost_ack, now); + cmpxchg(&call->ack_lost_at, t, now + MAX_JIFFY_OFFSET); + set_bit(RXRPC_CALL_EV_ACK_LOST, &call->events); + } + + t = READ_ONCE(call->keepalive_at); + if (time_after_eq(now, t)) { + trace_rxrpc_timer(call, rxrpc_timer_exp_keepalive, now); + cmpxchg(&call->keepalive_at, t, now + MAX_JIFFY_OFFSET); + rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, true, + rxrpc_propose_ack_ping_for_keepalive); + set_bit(RXRPC_CALL_EV_PING, &call->events); + } + + t = READ_ONCE(call->ping_at); + if (time_after_eq(now, t)) { + trace_rxrpc_timer(call, rxrpc_timer_exp_ping, now); + cmpxchg(&call->ping_at, t, now + MAX_JIFFY_OFFSET); + set_bit(RXRPC_CALL_EV_PING, &call->events); + } + + t = READ_ONCE(call->resend_at); + if (time_after_eq(now, t)) { + trace_rxrpc_timer(call, rxrpc_timer_exp_resend, now); + cmpxchg(&call->resend_at, t, now + MAX_JIFFY_OFFSET); + set_bit(RXRPC_CALL_EV_RESEND, &call->events); + } + + /* Process events */ + if (test_and_clear_bit(RXRPC_CALL_EV_EXPIRED, &call->events)) { rxrpc_abort_call("EXP", call, 0, RX_USER_ABORT, -ETIME); set_bit(RXRPC_CALL_EV_ABORT, &call->events); goto recheck_state; } - if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events)) { + send_ack = NULL; + if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events)) { + call->acks_lost_top = call->tx_top; + rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false, + rxrpc_propose_ack_ping_for_lost_ack); + send_ack = &call->acks_lost_ping; + } + + if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events) || + send_ack) { if (call->ackr_reason) { - rxrpc_send_ack_packet(call, false); + rxrpc_send_ack_packet(call, false, send_ack); goto recheck_state; } } if (test_and_clear_bit(RXRPC_CALL_EV_PING, &call->events)) { - rxrpc_send_ack_packet(call, true); + rxrpc_send_ack_packet(call, true, NULL); goto recheck_state; } @@ -408,7 +422,24 @@ recheck_state: goto recheck_state; } - rxrpc_set_timer(call, rxrpc_timer_set_for_resend, now); + /* Make sure the timer is restarted */ + next = call->expect_rx_by; + +#define set(T) { t = READ_ONCE(T); if (time_before(t, next)) next = t; } + + set(call->expect_req_by); + set(call->expect_term_by); + set(call->ack_at); + set(call->ack_lost_at); + set(call->resend_at); + set(call->keepalive_at); + set(call->ping_at); + + now = jiffies; + if (time_after_eq(now, next)) + goto recheck_state; + + rxrpc_reduce_call_timer(call, next, now, rxrpc_timer_restart); /* other events may have been raised since we started checking */ if (call->events && call->state < RXRPC_CALL_COMPLETE) { diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 4c7fbc6dcce7..0b2db38dd32d 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -45,16 +45,20 @@ const char *const rxrpc_call_completions[NR__RXRPC_CALL_COMPLETIONS] = { struct kmem_cache *rxrpc_call_jar; -static void rxrpc_call_timer_expired(unsigned long _call) +static void rxrpc_call_timer_expired(struct timer_list *t) { - struct rxrpc_call *call = (struct rxrpc_call *)_call; + struct rxrpc_call *call = from_timer(call, t, timer); _enter("%d", call->debug_id); - if (call->state < RXRPC_CALL_COMPLETE) - rxrpc_set_timer(call, rxrpc_timer_expired, ktime_get_real()); + if (call->state < RXRPC_CALL_COMPLETE) { + trace_rxrpc_timer(call, rxrpc_timer_expired, jiffies); + rxrpc_queue_call(call); + } } +static struct lock_class_key rxrpc_call_user_mutex_lock_class_key; + /* * find an extant server call * - called in process context with IRQs enabled @@ -95,7 +99,7 @@ found_extant_call: /* * allocate a new call */ -struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) +struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp) { struct rxrpc_call *call; @@ -114,8 +118,15 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) goto nomem_2; mutex_init(&call->user_mutex); - setup_timer(&call->timer, rxrpc_call_timer_expired, - (unsigned long)call); + + /* Prevent lockdep reporting a deadlock false positive between the afs + * filesystem and sys_sendmsg() via the mmap sem. + */ + if (rx->sk.sk_kern_sock) + lockdep_set_class(&call->user_mutex, + &rxrpc_call_user_mutex_lock_class_key); + + timer_setup(&call->timer, rxrpc_call_timer_expired, 0); INIT_WORK(&call->processor, &rxrpc_process_call); INIT_LIST_HEAD(&call->link); INIT_LIST_HEAD(&call->chan_wait_link); @@ -129,6 +140,8 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) atomic_set(&call->usage, 1); call->debug_id = atomic_inc_return(&rxrpc_debug_id); call->tx_total_len = -1; + call->next_rx_timo = 20 * HZ; + call->next_req_timo = 1 * HZ; memset(&call->sock_node, 0xed, sizeof(call->sock_node)); @@ -151,7 +164,8 @@ nomem: /* * Allocate a new client call. */ -static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx, +static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx, + struct sockaddr_rxrpc *srx, gfp_t gfp) { struct rxrpc_call *call; @@ -159,7 +173,7 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx, _enter(""); - call = rxrpc_alloc_call(gfp); + call = rxrpc_alloc_call(rx, gfp); if (!call) return ERR_PTR(-ENOMEM); call->state = RXRPC_CALL_CLIENT_AWAIT_CONN; @@ -178,15 +192,17 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx, */ static void rxrpc_start_call_timer(struct rxrpc_call *call) { - ktime_t now = ktime_get_real(), expire_at; - - expire_at = ktime_add_ms(now, rxrpc_max_call_lifetime); - call->expire_at = expire_at; - call->ack_at = expire_at; - call->ping_at = expire_at; - call->resend_at = expire_at; - call->timer.expires = jiffies + LONG_MAX / 2; - rxrpc_set_timer(call, rxrpc_timer_begin, now); + unsigned long now = jiffies; + unsigned long j = now + MAX_JIFFY_OFFSET; + + call->ack_at = j; + call->ack_lost_at = j; + call->resend_at = j; + call->ping_at = j; + call->expect_rx_by = j; + call->expect_req_by = j; + call->expect_term_by = j; + call->timer.expires = now; } /* @@ -197,8 +213,7 @@ static void rxrpc_start_call_timer(struct rxrpc_call *call) struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, struct rxrpc_conn_parameters *cp, struct sockaddr_rxrpc *srx, - unsigned long user_call_ID, - s64 tx_total_len, + struct rxrpc_call_params *p, gfp_t gfp) __releases(&rx->sk.sk_lock.slock) { @@ -208,18 +223,18 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, const void *here = __builtin_return_address(0); int ret; - _enter("%p,%lx", rx, user_call_ID); + _enter("%p,%lx", rx, p->user_call_ID); - call = rxrpc_alloc_client_call(srx, gfp); + call = rxrpc_alloc_client_call(rx, srx, gfp); if (IS_ERR(call)) { release_sock(&rx->sk); _leave(" = %ld", PTR_ERR(call)); return call; } - call->tx_total_len = tx_total_len; + call->tx_total_len = p->tx_total_len; trace_rxrpc_call(call, rxrpc_call_new_client, atomic_read(&call->usage), - here, (const void *)user_call_ID); + here, (const void *)p->user_call_ID); /* We need to protect a partially set up call against the user as we * will be acting outside the socket lock. @@ -235,16 +250,16 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, parent = *pp; xcall = rb_entry(parent, struct rxrpc_call, sock_node); - if (user_call_ID < xcall->user_call_ID) + if (p->user_call_ID < xcall->user_call_ID) pp = &(*pp)->rb_left; - else if (user_call_ID > xcall->user_call_ID) + else if (p->user_call_ID > xcall->user_call_ID) pp = &(*pp)->rb_right; else goto error_dup_user_ID; } rcu_assign_pointer(call->socket, rx); - call->user_call_ID = user_call_ID; + call->user_call_ID = p->user_call_ID; __set_bit(RXRPC_CALL_HAS_USERID, &call->flags); rxrpc_get_call(call, rxrpc_call_got_userid); rb_link_node(&call->sock_node, parent, pp); diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 5f9624bd311c..7f74ca3059f8 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -85,8 +85,8 @@ __read_mostly unsigned int rxrpc_max_client_connections = 1000; __read_mostly unsigned int rxrpc_reap_client_connections = 900; -__read_mostly unsigned int rxrpc_conn_idle_client_expiry = 2 * 60 * HZ; -__read_mostly unsigned int rxrpc_conn_idle_client_fast_expiry = 2 * HZ; +__read_mostly unsigned long rxrpc_conn_idle_client_expiry = 2 * 60 * HZ; +__read_mostly unsigned long rxrpc_conn_idle_client_fast_expiry = 2 * HZ; /* * We use machine-unique IDs for our client connections. @@ -554,6 +554,11 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn, trace_rxrpc_client(conn, channel, rxrpc_client_chan_activate); + /* Cancel the final ACK on the previous call if it hasn't been sent yet + * as the DATA packet will implicitly ACK it. + */ + clear_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags); + write_lock_bh(&call->state_lock); if (!test_bit(RXRPC_CALL_TX_LASTQ, &call->flags)) call->state = RXRPC_CALL_CLIENT_SEND_REQUEST; @@ -686,7 +691,7 @@ int rxrpc_connect_call(struct rxrpc_call *call, _enter("{%d,%lx},", call->debug_id, call->user_call_ID); - rxrpc_discard_expired_client_conns(&rxnet->client_conn_reaper.work); + rxrpc_discard_expired_client_conns(&rxnet->client_conn_reaper); rxrpc_cull_active_client_conns(rxnet); ret = rxrpc_get_client_conn(call, cp, srx, gfp); @@ -752,6 +757,18 @@ void rxrpc_expose_client_call(struct rxrpc_call *call) } /* + * Set the reap timer. + */ +static void rxrpc_set_client_reap_timer(struct rxrpc_net *rxnet) +{ + unsigned long now = jiffies; + unsigned long reap_at = now + rxrpc_conn_idle_client_expiry; + + if (rxnet->live) + timer_reduce(&rxnet->client_conn_reap_timer, reap_at); +} + +/* * Disconnect a client call. */ void rxrpc_disconnect_client_call(struct rxrpc_call *call) @@ -813,6 +830,19 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) goto out_2; } + /* Schedule the final ACK to be transmitted in a short while so that it + * can be skipped if we find a follow-on call. The first DATA packet + * of the follow on call will implicitly ACK this call. + */ + if (test_bit(RXRPC_CALL_EXPOSED, &call->flags)) { + unsigned long final_ack_at = jiffies + 2; + + WRITE_ONCE(chan->final_ack_at, final_ack_at); + smp_wmb(); /* vs rxrpc_process_delayed_final_acks() */ + set_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags); + rxrpc_reduce_conn_timer(conn, final_ack_at); + } + /* Things are more complex and we need the cache lock. We might be * able to simply idle the conn or it might now be lurking on the wait * list. It might even get moved back to the active list whilst we're @@ -878,9 +908,7 @@ idle_connection: list_move_tail(&conn->cache_link, &rxnet->idle_client_conns); if (rxnet->idle_client_conns.next == &conn->cache_link && !rxnet->kill_all_client_conns) - queue_delayed_work(rxrpc_workqueue, - &rxnet->client_conn_reaper, - rxrpc_conn_idle_client_expiry); + rxrpc_set_client_reap_timer(rxnet); } else { trace_rxrpc_client(conn, channel, rxrpc_client_to_inactive); conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE; @@ -1018,8 +1046,7 @@ void rxrpc_discard_expired_client_conns(struct work_struct *work) { struct rxrpc_connection *conn; struct rxrpc_net *rxnet = - container_of(to_delayed_work(work), - struct rxrpc_net, client_conn_reaper); + container_of(work, struct rxrpc_net, client_conn_reaper); unsigned long expiry, conn_expires_at, now; unsigned int nr_conns; bool did_discard = false; @@ -1061,6 +1088,8 @@ next: expiry = rxrpc_conn_idle_client_expiry; if (nr_conns > rxrpc_reap_client_connections) expiry = rxrpc_conn_idle_client_fast_expiry; + if (conn->params.local->service_closed) + expiry = rxrpc_closed_conn_expiry * HZ; conn_expires_at = conn->idle_timestamp + expiry; @@ -1096,9 +1125,8 @@ not_yet_expired: */ _debug("not yet"); if (!rxnet->kill_all_client_conns) - queue_delayed_work(rxrpc_workqueue, - &rxnet->client_conn_reaper, - conn_expires_at - now); + timer_reduce(&rxnet->client_conn_reap_timer, + conn_expires_at); out: spin_unlock(&rxnet->client_conn_cache_lock); @@ -1118,9 +1146,9 @@ void rxrpc_destroy_all_client_connections(struct rxrpc_net *rxnet) rxnet->kill_all_client_conns = true; spin_unlock(&rxnet->client_conn_cache_lock); - cancel_delayed_work(&rxnet->client_conn_reaper); + del_timer_sync(&rxnet->client_conn_reap_timer); - if (!queue_delayed_work(rxrpc_workqueue, &rxnet->client_conn_reaper, 0)) + if (!rxrpc_queue_work(&rxnet->client_conn_reaper)) _debug("destroy: queue failed"); _leave(""); diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 59a51a56e7c8..4ca11be6be3c 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -24,31 +24,28 @@ * Retransmit terminal ACK or ABORT of the previous call. */ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, - struct sk_buff *skb) + struct sk_buff *skb, + unsigned int channel) { - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rxrpc_skb_priv *sp = skb ? rxrpc_skb(skb) : NULL; struct rxrpc_channel *chan; struct msghdr msg; - struct kvec iov; + struct kvec iov[3]; struct { struct rxrpc_wire_header whdr; union { - struct { - __be32 code; - } abort; - struct { - struct rxrpc_ackpacket ack; - u8 padding[3]; - struct rxrpc_ackinfo info; - }; + __be32 abort_code; + struct rxrpc_ackpacket ack; }; } __attribute__((packed)) pkt; + struct rxrpc_ackinfo ack_info; size_t len; - u32 serial, mtu, call_id; + int ioc; + u32 serial, mtu, call_id, padding; _enter("%d", conn->debug_id); - chan = &conn->channels[sp->hdr.cid & RXRPC_CHANNELMASK]; + chan = &conn->channels[channel]; /* If the last call got moved on whilst we were waiting to run, just * ignore this packet. @@ -56,7 +53,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, call_id = READ_ONCE(chan->last_call); /* Sync with __rxrpc_disconnect_call() */ smp_rmb(); - if (call_id != sp->hdr.callNumber) + if (skb && call_id != sp->hdr.callNumber) return; msg.msg_name = &conn->params.peer->srx.transport; @@ -65,9 +62,16 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, msg.msg_controllen = 0; msg.msg_flags = 0; - pkt.whdr.epoch = htonl(sp->hdr.epoch); - pkt.whdr.cid = htonl(sp->hdr.cid); - pkt.whdr.callNumber = htonl(sp->hdr.callNumber); + iov[0].iov_base = &pkt; + iov[0].iov_len = sizeof(pkt.whdr); + iov[1].iov_base = &padding; + iov[1].iov_len = 3; + iov[2].iov_base = &ack_info; + iov[2].iov_len = sizeof(ack_info); + + pkt.whdr.epoch = htonl(conn->proto.epoch); + pkt.whdr.cid = htonl(conn->proto.cid); + pkt.whdr.callNumber = htonl(call_id); pkt.whdr.seq = 0; pkt.whdr.type = chan->last_type; pkt.whdr.flags = conn->out_clientflag; @@ -79,27 +83,35 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, len = sizeof(pkt.whdr); switch (chan->last_type) { case RXRPC_PACKET_TYPE_ABORT: - pkt.abort.code = htonl(chan->last_abort); - len += sizeof(pkt.abort); + pkt.abort_code = htonl(chan->last_abort); + iov[0].iov_len += sizeof(pkt.abort_code); + len += sizeof(pkt.abort_code); + ioc = 1; break; case RXRPC_PACKET_TYPE_ACK: mtu = conn->params.peer->if_mtu; mtu -= conn->params.peer->hdrsize; pkt.ack.bufferSpace = 0; - pkt.ack.maxSkew = htons(skb->priority); - pkt.ack.firstPacket = htonl(chan->last_seq); - pkt.ack.previousPacket = htonl(chan->last_seq - 1); - pkt.ack.serial = htonl(sp->hdr.serial); - pkt.ack.reason = RXRPC_ACK_DUPLICATE; + pkt.ack.maxSkew = htons(skb ? skb->priority : 0); + pkt.ack.firstPacket = htonl(chan->last_seq + 1); + pkt.ack.previousPacket = htonl(chan->last_seq); + pkt.ack.serial = htonl(skb ? sp->hdr.serial : 0); + pkt.ack.reason = skb ? RXRPC_ACK_DUPLICATE : RXRPC_ACK_IDLE; pkt.ack.nAcks = 0; - pkt.info.rxMTU = htonl(rxrpc_rx_mtu); - pkt.info.maxMTU = htonl(mtu); - pkt.info.rwind = htonl(rxrpc_rx_window_size); - pkt.info.jumbo_max = htonl(rxrpc_rx_jumbo_max); + ack_info.rxMTU = htonl(rxrpc_rx_mtu); + ack_info.maxMTU = htonl(mtu); + ack_info.rwind = htonl(rxrpc_rx_window_size); + ack_info.jumbo_max = htonl(rxrpc_rx_jumbo_max); pkt.whdr.flags |= RXRPC_SLOW_START_OK; - len += sizeof(pkt.ack) + sizeof(pkt.info); + padding = 0; + iov[0].iov_len += sizeof(pkt.ack); + len += sizeof(pkt.ack) + 3 + sizeof(ack_info); + ioc = 3; break; + + default: + return; } /* Resync with __rxrpc_disconnect_call() and check that the last call @@ -109,9 +121,6 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, if (READ_ONCE(chan->last_call) != call_id) return; - iov.iov_base = &pkt; - iov.iov_len = len; - serial = atomic_inc_return(&conn->serial); pkt.whdr.serial = htonl(serial); @@ -126,7 +135,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, break; } - kernel_sendmsg(conn->params.local->socket, &msg, &iov, 1, len); + kernel_sendmsg(conn->params.local->socket, &msg, iov, ioc, len); _leave(""); return; } @@ -272,7 +281,8 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, switch (sp->hdr.type) { case RXRPC_PACKET_TYPE_DATA: case RXRPC_PACKET_TYPE_ACK: - rxrpc_conn_retransmit_call(conn, skb); + rxrpc_conn_retransmit_call(conn, skb, + sp->hdr.cid & RXRPC_CHANNELMASK); return 0; case RXRPC_PACKET_TYPE_BUSY: @@ -379,6 +389,48 @@ abort: } /* + * Process delayed final ACKs that we haven't subsumed into a subsequent call. + */ +static void rxrpc_process_delayed_final_acks(struct rxrpc_connection *conn) +{ + unsigned long j = jiffies, next_j; + unsigned int channel; + bool set; + +again: + next_j = j + LONG_MAX; + set = false; + for (channel = 0; channel < RXRPC_MAXCALLS; channel++) { + struct rxrpc_channel *chan = &conn->channels[channel]; + unsigned long ack_at; + + if (!test_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags)) + continue; + + smp_rmb(); /* vs rxrpc_disconnect_client_call */ + ack_at = READ_ONCE(chan->final_ack_at); + + if (time_before(j, ack_at)) { + if (time_before(ack_at, next_j)) { + next_j = ack_at; + set = true; + } + continue; + } + + if (test_and_clear_bit(RXRPC_CONN_FINAL_ACK_0 + channel, + &conn->flags)) + rxrpc_conn_retransmit_call(conn, NULL, channel); + } + + j = jiffies; + if (time_before_eq(next_j, j)) + goto again; + if (set) + rxrpc_reduce_conn_timer(conn, next_j); +} + +/* * connection-level event processor */ void rxrpc_process_connection(struct work_struct *work) @@ -394,6 +446,10 @@ void rxrpc_process_connection(struct work_struct *work) if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events)) rxrpc_secure_connection(conn); + /* Process delayed ACKs whose time has come. */ + if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK) + rxrpc_process_delayed_final_acks(conn); + /* go through the conn-level event packets, releasing the ref on this * connection that each one has when we've finished with it */ while ((skb = skb_dequeue(&conn->rx_queue))) { diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index fe575798592f..c628351eb900 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -20,10 +20,19 @@ /* * Time till a connection expires after last use (in seconds). */ -unsigned int rxrpc_connection_expiry = 10 * 60; +unsigned int __read_mostly rxrpc_connection_expiry = 10 * 60; +unsigned int __read_mostly rxrpc_closed_conn_expiry = 10; static void rxrpc_destroy_connection(struct rcu_head *); +static void rxrpc_connection_timer(struct timer_list *timer) +{ + struct rxrpc_connection *conn = + container_of(timer, struct rxrpc_connection, timer); + + rxrpc_queue_conn(conn); +} + /* * allocate a new connection */ @@ -38,6 +47,7 @@ struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp) INIT_LIST_HEAD(&conn->cache_link); spin_lock_init(&conn->channel_lock); INIT_LIST_HEAD(&conn->waiting_calls); + timer_setup(&conn->timer, &rxrpc_connection_timer, 0); INIT_WORK(&conn->processor, &rxrpc_process_connection); INIT_LIST_HEAD(&conn->proc_link); INIT_LIST_HEAD(&conn->link); @@ -301,21 +311,29 @@ rxrpc_get_connection_maybe(struct rxrpc_connection *conn) } /* + * Set the service connection reap timer. + */ +static void rxrpc_set_service_reap_timer(struct rxrpc_net *rxnet, + unsigned long reap_at) +{ + if (rxnet->live) + timer_reduce(&rxnet->service_conn_reap_timer, reap_at); +} + +/* * Release a service connection */ void rxrpc_put_service_conn(struct rxrpc_connection *conn) { - struct rxrpc_net *rxnet; const void *here = __builtin_return_address(0); int n; n = atomic_dec_return(&conn->usage); trace_rxrpc_conn(conn, rxrpc_conn_put_service, n, here); ASSERTCMP(n, >=, 0); - if (n == 0) { - rxnet = conn->params.local->rxnet; - rxrpc_queue_delayed_work(&rxnet->service_conn_reaper, 0); - } + if (n == 1) + rxrpc_set_service_reap_timer(conn->params.local->rxnet, + jiffies + rxrpc_connection_expiry); } /* @@ -332,6 +350,7 @@ static void rxrpc_destroy_connection(struct rcu_head *rcu) _net("DESTROY CONN %d", conn->debug_id); + del_timer_sync(&conn->timer); rxrpc_purge_queue(&conn->rx_queue); conn->security->clear(conn); @@ -351,17 +370,15 @@ void rxrpc_service_connection_reaper(struct work_struct *work) { struct rxrpc_connection *conn, *_p; struct rxrpc_net *rxnet = - container_of(to_delayed_work(work), - struct rxrpc_net, service_conn_reaper); - unsigned long reap_older_than, earliest, idle_timestamp, now; + container_of(work, struct rxrpc_net, service_conn_reaper); + unsigned long expire_at, earliest, idle_timestamp, now; LIST_HEAD(graveyard); _enter(""); now = jiffies; - reap_older_than = now - rxrpc_connection_expiry * HZ; - earliest = ULONG_MAX; + earliest = now + MAX_JIFFY_OFFSET; write_lock(&rxnet->conn_lock); list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) { @@ -371,15 +388,21 @@ void rxrpc_service_connection_reaper(struct work_struct *work) if (conn->state == RXRPC_CONN_SERVICE_PREALLOC) continue; - idle_timestamp = READ_ONCE(conn->idle_timestamp); - _debug("reap CONN %d { u=%d,t=%ld }", - conn->debug_id, atomic_read(&conn->usage), - (long)reap_older_than - (long)idle_timestamp); - - if (time_after(idle_timestamp, reap_older_than)) { - if (time_before(idle_timestamp, earliest)) - earliest = idle_timestamp; - continue; + if (rxnet->live) { + idle_timestamp = READ_ONCE(conn->idle_timestamp); + expire_at = idle_timestamp + rxrpc_connection_expiry * HZ; + if (conn->params.local->service_closed) + expire_at = idle_timestamp + rxrpc_closed_conn_expiry * HZ; + + _debug("reap CONN %d { u=%d,t=%ld }", + conn->debug_id, atomic_read(&conn->usage), + (long)expire_at - (long)now); + + if (time_before(now, expire_at)) { + if (time_before(expire_at, earliest)) + earliest = expire_at; + continue; + } } /* The usage count sits at 1 whilst the object is unused on the @@ -387,6 +410,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work) */ if (atomic_cmpxchg(&conn->usage, 1, 0) != 1) continue; + trace_rxrpc_conn(conn, rxrpc_conn_reap_service, 0, 0); if (rxrpc_conn_is_client(conn)) BUG(); @@ -397,11 +421,10 @@ void rxrpc_service_connection_reaper(struct work_struct *work) } write_unlock(&rxnet->conn_lock); - if (earliest != ULONG_MAX) { - _debug("reschedule reaper %ld", (long) earliest - now); + if (earliest != now + MAX_JIFFY_OFFSET) { + _debug("reschedule reaper %ld", (long)earliest - (long)now); ASSERT(time_after(earliest, now)); - rxrpc_queue_delayed_work(&rxnet->client_conn_reaper, - earliest - now); + rxrpc_set_service_reap_timer(rxnet, earliest); } while (!list_empty(&graveyard)) { @@ -429,9 +452,8 @@ void rxrpc_destroy_all_connections(struct rxrpc_net *rxnet) rxrpc_destroy_all_client_connections(rxnet); - rxrpc_connection_expiry = 0; - cancel_delayed_work(&rxnet->client_conn_reaper); - rxrpc_queue_delayed_work(&rxnet->client_conn_reaper, 0); + del_timer_sync(&rxnet->service_conn_reap_timer); + rxrpc_queue_work(&rxnet->service_conn_reaper); flush_workqueue(rxrpc_workqueue); write_lock(&rxnet->conn_lock); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 1b592073ec96..6fc61400337f 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -318,16 +318,18 @@ bad_state: static bool rxrpc_receiving_reply(struct rxrpc_call *call) { struct rxrpc_ack_summary summary = { 0 }; + unsigned long now, timo; rxrpc_seq_t top = READ_ONCE(call->tx_top); if (call->ackr_reason) { spin_lock_bh(&call->lock); call->ackr_reason = 0; - call->resend_at = call->expire_at; - call->ack_at = call->expire_at; spin_unlock_bh(&call->lock); - rxrpc_set_timer(call, rxrpc_timer_init_for_reply, - ktime_get_real()); + now = jiffies; + timo = now + MAX_JIFFY_OFFSET; + WRITE_ONCE(call->resend_at, timo); + WRITE_ONCE(call->ack_at, timo); + trace_rxrpc_timer(call, rxrpc_timer_init_for_reply, now); } if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) @@ -437,6 +439,19 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb, if (state >= RXRPC_CALL_COMPLETE) return; + if (call->state == RXRPC_CALL_SERVER_RECV_REQUEST) { + unsigned long timo = READ_ONCE(call->next_req_timo); + unsigned long now, expect_req_by; + + if (timo) { + now = jiffies; + expect_req_by = now + timo; + WRITE_ONCE(call->expect_req_by, expect_req_by); + rxrpc_reduce_call_timer(call, expect_req_by, now, + rxrpc_timer_set_for_idle); + } + } + /* Received data implicitly ACKs all of the request packets we sent * when we're acting as a client. */ @@ -616,6 +631,43 @@ found: } /* + * Process the response to a ping that we sent to find out if we lost an ACK. + * + * If we got back a ping response that indicates a lower tx_top than what we + * had at the time of the ping transmission, we adjudge all the DATA packets + * sent between the response tx_top and the ping-time tx_top to have been lost. + */ +static void rxrpc_input_check_for_lost_ack(struct rxrpc_call *call) +{ + rxrpc_seq_t top, bottom, seq; + bool resend = false; + + spin_lock_bh(&call->lock); + + bottom = call->tx_hard_ack + 1; + top = call->acks_lost_top; + if (before(bottom, top)) { + for (seq = bottom; before_eq(seq, top); seq++) { + int ix = seq & RXRPC_RXTX_BUFF_MASK; + u8 annotation = call->rxtx_annotations[ix]; + u8 anno_type = annotation & RXRPC_TX_ANNO_MASK; + + if (anno_type != RXRPC_TX_ANNO_UNACK) + continue; + annotation &= ~RXRPC_TX_ANNO_MASK; + annotation |= RXRPC_TX_ANNO_RETRANS; + call->rxtx_annotations[ix] = annotation; + resend = true; + } + } + + spin_unlock_bh(&call->lock); + + if (resend && !test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) + rxrpc_queue_call(call); +} + +/* * Process a ping response. */ static void rxrpc_input_ping_response(struct rxrpc_call *call, @@ -630,6 +682,9 @@ static void rxrpc_input_ping_response(struct rxrpc_call *call, smp_rmb(); ping_serial = call->ping_serial; + if (orig_serial == call->acks_lost_ping) + rxrpc_input_check_for_lost_ack(call); + if (!test_bit(RXRPC_CALL_PINGING, &call->flags) || before(orig_serial, ping_serial)) return; @@ -908,9 +963,20 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call, struct sk_buff *skb, u16 skew) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + unsigned long timo; _enter("%p,%p", call, skb); + timo = READ_ONCE(call->next_rx_timo); + if (timo) { + unsigned long now = jiffies, expect_rx_by; + + expect_rx_by = jiffies + timo; + WRITE_ONCE(call->expect_rx_by, expect_rx_by); + rxrpc_reduce_call_timer(call, expect_rx_by, now, + rxrpc_timer_set_for_normal); + } + switch (sp->hdr.type) { case RXRPC_PACKET_TYPE_DATA: rxrpc_input_data(call, skb, skew); @@ -1147,7 +1213,7 @@ void rxrpc_data_ready(struct sock *udp_sk) goto reupgrade; conn->service_id = sp->hdr.serviceId; } - + if (sp->hdr.callNumber == 0) { /* Connection-level packet */ _debug("CONN %p {%d}", conn, conn->debug_id); diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 1a2d4b112064..c1d9e7fd7448 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -21,33 +21,28 @@ unsigned int rxrpc_max_backlog __read_mostly = 10; /* - * Maximum lifetime of a call (in mx). - */ -unsigned int rxrpc_max_call_lifetime = 60 * 1000; - -/* * How long to wait before scheduling ACK generation after seeing a - * packet with RXRPC_REQUEST_ACK set (in ms). + * packet with RXRPC_REQUEST_ACK set (in jiffies). */ -unsigned int rxrpc_requested_ack_delay = 1; +unsigned long rxrpc_requested_ack_delay = 1; /* - * How long to wait before scheduling an ACK with subtype DELAY (in ms). + * How long to wait before scheduling an ACK with subtype DELAY (in jiffies). * * We use this when we've received new data packets. If those packets aren't * all consumed within this time we will send a DELAY ACK if an ACK was not * requested to let the sender know it doesn't need to resend. */ -unsigned int rxrpc_soft_ack_delay = 1 * 1000; +unsigned long rxrpc_soft_ack_delay = HZ; /* - * How long to wait before scheduling an ACK with subtype IDLE (in ms). + * How long to wait before scheduling an ACK with subtype IDLE (in jiffies). * * We use this when we've consumed some previously soft-ACK'd packets when * further packets aren't immediately received to decide when to send an IDLE * ACK let the other end know that it can free up its Tx buffer space. */ -unsigned int rxrpc_idle_ack_delay = 0.5 * 1000; +unsigned long rxrpc_idle_ack_delay = HZ / 2; /* * Receive window size in packets. This indicates the maximum number of @@ -75,7 +70,7 @@ unsigned int rxrpc_rx_jumbo_max = 4; /* * Time till packet resend (in milliseconds). */ -unsigned int rxrpc_resend_timeout = 4 * 1000; +unsigned long rxrpc_resend_timeout = 4 * HZ; const s8 rxrpc_ack_priority[] = { [0] = 0, diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c index 7edceb8522f5..f18c9248e0d4 100644 --- a/net/rxrpc/net_ns.c +++ b/net/rxrpc/net_ns.c @@ -14,6 +14,24 @@ unsigned int rxrpc_net_id; +static void rxrpc_client_conn_reap_timeout(struct timer_list *timer) +{ + struct rxrpc_net *rxnet = + container_of(timer, struct rxrpc_net, client_conn_reap_timer); + + if (rxnet->live) + rxrpc_queue_work(&rxnet->client_conn_reaper); +} + +static void rxrpc_service_conn_reap_timeout(struct timer_list *timer) +{ + struct rxrpc_net *rxnet = + container_of(timer, struct rxrpc_net, service_conn_reap_timer); + + if (rxnet->live) + rxrpc_queue_work(&rxnet->service_conn_reaper); +} + /* * Initialise a per-network namespace record. */ @@ -22,6 +40,7 @@ static __net_init int rxrpc_init_net(struct net *net) struct rxrpc_net *rxnet = rxrpc_net(net); int ret; + rxnet->live = true; get_random_bytes(&rxnet->epoch, sizeof(rxnet->epoch)); rxnet->epoch |= RXRPC_RANDOM_EPOCH; @@ -31,8 +50,10 @@ static __net_init int rxrpc_init_net(struct net *net) INIT_LIST_HEAD(&rxnet->conn_proc_list); INIT_LIST_HEAD(&rxnet->service_conns); rwlock_init(&rxnet->conn_lock); - INIT_DELAYED_WORK(&rxnet->service_conn_reaper, - rxrpc_service_connection_reaper); + INIT_WORK(&rxnet->service_conn_reaper, + rxrpc_service_connection_reaper); + timer_setup(&rxnet->service_conn_reap_timer, + rxrpc_service_conn_reap_timeout, 0); rxnet->nr_client_conns = 0; rxnet->nr_active_client_conns = 0; @@ -42,8 +63,10 @@ static __net_init int rxrpc_init_net(struct net *net) INIT_LIST_HEAD(&rxnet->waiting_client_conns); INIT_LIST_HEAD(&rxnet->active_client_conns); INIT_LIST_HEAD(&rxnet->idle_client_conns); - INIT_DELAYED_WORK(&rxnet->client_conn_reaper, - rxrpc_discard_expired_client_conns); + INIT_WORK(&rxnet->client_conn_reaper, + rxrpc_discard_expired_client_conns); + timer_setup(&rxnet->client_conn_reap_timer, + rxrpc_client_conn_reap_timeout, 0); INIT_LIST_HEAD(&rxnet->local_endpoints); mutex_init(&rxnet->local_mutex); @@ -60,6 +83,7 @@ static __net_init int rxrpc_init_net(struct net *net) return 0; err_proc: + rxnet->live = false; return ret; } @@ -70,6 +94,7 @@ static __net_exit void rxrpc_exit_net(struct net *net) { struct rxrpc_net *rxnet = rxrpc_net(net); + rxnet->live = false; rxrpc_destroy_all_calls(rxnet); rxrpc_destroy_all_connections(rxnet); rxrpc_destroy_all_locals(rxnet); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index f47659c7b224..42410e910aff 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -33,6 +33,24 @@ struct rxrpc_abort_buffer { }; /* + * Arrange for a keepalive ping a certain time after we last transmitted. This + * lets the far side know we're still interested in this call and helps keep + * the route through any intervening firewall open. + * + * Receiving a response to the ping will prevent the ->expect_rx_by timer from + * expiring. + */ +static void rxrpc_set_keepalive(struct rxrpc_call *call) +{ + unsigned long now = jiffies, keepalive_at = call->next_rx_timo / 6; + + keepalive_at += now; + WRITE_ONCE(call->keepalive_at, keepalive_at); + rxrpc_reduce_call_timer(call, keepalive_at, now, + rxrpc_timer_set_for_keepalive); +} + +/* * Fill out an ACK packet. */ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, @@ -95,7 +113,8 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, /* * Send an ACK call packet. */ -int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping) +int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, + rxrpc_serial_t *_serial) { struct rxrpc_connection *conn = NULL; struct rxrpc_ack_buffer *pkt; @@ -165,6 +184,8 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping) ntohl(pkt->ack.firstPacket), ntohl(pkt->ack.serial), pkt->ack.reason, pkt->ack.nAcks); + if (_serial) + *_serial = serial; if (ping) { call->ping_serial = serial; @@ -202,6 +223,8 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping) call->ackr_seen = top; spin_unlock_bh(&call->lock); } + + rxrpc_set_keepalive(call); } out: @@ -323,7 +346,8 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, * ACKs if a DATA packet appears to have been lost. */ if (!(sp->hdr.flags & RXRPC_LAST_PACKET) && - (retrans || + (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events) || + retrans || call->cong_mode == RXRPC_CALL_SLOW_START || (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) || ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), @@ -370,8 +394,23 @@ done: if (whdr.flags & RXRPC_REQUEST_ACK) { call->peer->rtt_last_req = now; trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial); + if (call->peer->rtt_usage > 1) { + unsigned long nowj = jiffies, ack_lost_at; + + ack_lost_at = nsecs_to_jiffies(2 * call->peer->rtt); + if (ack_lost_at < 1) + ack_lost_at = 1; + + ack_lost_at += nowj; + WRITE_ONCE(call->ack_lost_at, ack_lost_at); + rxrpc_reduce_call_timer(call, ack_lost_at, nowj, + rxrpc_timer_set_for_lost_ack); + } } } + + rxrpc_set_keepalive(call); + _leave(" = %d [%u]", ret, call->peer->maxdata); return ret; diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 8510a98b87e1..cc21e8db25b0 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -144,11 +144,13 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) trace_rxrpc_receive(call, rxrpc_receive_end, 0, call->rx_top); ASSERTCMP(call->rx_hard_ack, ==, call->rx_top); +#if 0 // TODO: May want to transmit final ACK under some circumstances anyway if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) { rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, serial, true, false, rxrpc_propose_ack_terminal_ack); - rxrpc_send_ack_packet(call, false); + rxrpc_send_ack_packet(call, false, NULL); } +#endif write_lock_bh(&call->state_lock); @@ -161,7 +163,7 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) case RXRPC_CALL_SERVER_RECV_REQUEST: call->tx_phase = true; call->state = RXRPC_CALL_SERVER_ACK_REQUEST; - call->ack_at = call->expire_at; + call->expect_req_by = jiffies + MAX_JIFFY_OFFSET; write_unlock_bh(&call->state_lock); rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, 0, serial, false, true, rxrpc_propose_ack_processing_op); @@ -217,10 +219,10 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) after_eq(top, call->ackr_seen + 2) || (hard_ack == top && after(hard_ack, call->ackr_consumed))) rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, 0, serial, - true, false, + true, true, rxrpc_propose_ack_rotate_rx); - if (call->ackr_reason) - rxrpc_send_ack_packet(call, false); + if (call->ackr_reason && call->ackr_reason != RXRPC_ACK_DELAY) + rxrpc_send_ack_packet(call, false, NULL); } } diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 7d2595582c09..09f2a3e05221 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -21,22 +21,6 @@ #include <net/af_rxrpc.h> #include "ar-internal.h" -enum rxrpc_command { - RXRPC_CMD_SEND_DATA, /* send data message */ - RXRPC_CMD_SEND_ABORT, /* request abort generation */ - RXRPC_CMD_ACCEPT, /* [server] accept incoming call */ - RXRPC_CMD_REJECT_BUSY, /* [server] reject a call as busy */ -}; - -struct rxrpc_send_params { - s64 tx_total_len; /* Total Tx data length (if send data) */ - unsigned long user_call_ID; /* User's call ID */ - u32 abort_code; /* Abort code to Tx (if abort) */ - enum rxrpc_command command : 8; /* The command to implement */ - bool exclusive; /* Shared or exclusive call */ - bool upgrade; /* If the connection is upgradeable */ -}; - /* * Wait for space to appear in the Tx queue or a signal to occur. */ @@ -174,6 +158,7 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, rxrpc_notify_end_tx_t notify_end_tx) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + unsigned long now; rxrpc_seq_t seq = sp->hdr.seq; int ret, ix; u8 annotation = RXRPC_TX_ANNO_UNACK; @@ -213,11 +198,11 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, break; case RXRPC_CALL_SERVER_ACK_REQUEST: call->state = RXRPC_CALL_SERVER_SEND_REPLY; - call->ack_at = call->expire_at; + now = jiffies; + WRITE_ONCE(call->ack_at, now + MAX_JIFFY_OFFSET); if (call->ackr_reason == RXRPC_ACK_DELAY) call->ackr_reason = 0; - __rxrpc_set_timer(call, rxrpc_timer_init_for_send_reply, - ktime_get_real()); + trace_rxrpc_timer(call, rxrpc_timer_init_for_send_reply, now); if (!last) break; /* Fall through */ @@ -239,14 +224,19 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, _debug("need instant resend %d", ret); rxrpc_instant_resend(call, ix); } else { - ktime_t now = ktime_get_real(), resend_at; - - resend_at = ktime_add_ms(now, rxrpc_resend_timeout); - - if (ktime_before(resend_at, call->resend_at)) { - call->resend_at = resend_at; - rxrpc_set_timer(call, rxrpc_timer_set_for_send, now); - } + unsigned long now = jiffies, resend_at; + + if (call->peer->rtt_usage > 1) + resend_at = nsecs_to_jiffies(call->peer->rtt * 3 / 2); + else + resend_at = rxrpc_resend_timeout; + if (resend_at < 1) + resend_at = 1; + + resend_at += now; + WRITE_ONCE(call->resend_at, resend_at); + rxrpc_reduce_call_timer(call, resend_at, now, + rxrpc_timer_set_for_send); } rxrpc_free_skb(skb, rxrpc_skb_tx_freed); @@ -295,7 +285,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, do { /* Check to see if there's a ping ACK to reply to. */ if (call->ackr_reason == RXRPC_ACK_PING_RESPONSE) - rxrpc_send_ack_packet(call, false); + rxrpc_send_ack_packet(call, false, NULL); if (!skb) { size_t size, chunk, max, space; @@ -480,11 +470,11 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, struct rxrpc_send_params *p) if (msg->msg_flags & MSG_CMSG_COMPAT) { if (len != sizeof(u32)) return -EINVAL; - p->user_call_ID = *(u32 *)CMSG_DATA(cmsg); + p->call.user_call_ID = *(u32 *)CMSG_DATA(cmsg); } else { if (len != sizeof(unsigned long)) return -EINVAL; - p->user_call_ID = *(unsigned long *) + p->call.user_call_ID = *(unsigned long *) CMSG_DATA(cmsg); } got_user_ID = true; @@ -522,11 +512,24 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, struct rxrpc_send_params *p) break; case RXRPC_TX_LENGTH: - if (p->tx_total_len != -1 || len != sizeof(__s64)) + if (p->call.tx_total_len != -1 || len != sizeof(__s64)) + return -EINVAL; + p->call.tx_total_len = *(__s64 *)CMSG_DATA(cmsg); + if (p->call.tx_total_len < 0) return -EINVAL; - p->tx_total_len = *(__s64 *)CMSG_DATA(cmsg); - if (p->tx_total_len < 0) + break; + + case RXRPC_SET_CALL_TIMEOUT: + if (len & 3 || len < 4 || len > 12) return -EINVAL; + memcpy(&p->call.timeouts, CMSG_DATA(cmsg), len); + p->call.nr_timeouts = len / 4; + if (p->call.timeouts.hard > INT_MAX / HZ) + return -ERANGE; + if (p->call.nr_timeouts >= 2 && p->call.timeouts.idle > 60 * 60 * 1000) + return -ERANGE; + if (p->call.nr_timeouts >= 3 && p->call.timeouts.normal > 60 * 60 * 1000) + return -ERANGE; break; default: @@ -536,7 +539,7 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, struct rxrpc_send_params *p) if (!got_user_ID) return -EINVAL; - if (p->tx_total_len != -1 && p->command != RXRPC_CMD_SEND_DATA) + if (p->call.tx_total_len != -1 && p->command != RXRPC_CMD_SEND_DATA) return -EINVAL; _leave(" = 0"); return 0; @@ -576,8 +579,7 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, cp.exclusive = rx->exclusive | p->exclusive; cp.upgrade = p->upgrade; cp.service_id = srx->srx_service; - call = rxrpc_new_client_call(rx, &cp, srx, p->user_call_ID, - p->tx_total_len, GFP_KERNEL); + call = rxrpc_new_client_call(rx, &cp, srx, &p->call, GFP_KERNEL); /* The socket is now unlocked */ _leave(" = %p\n", call); @@ -594,15 +596,17 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) { enum rxrpc_call_state state; struct rxrpc_call *call; + unsigned long now, j; int ret; struct rxrpc_send_params p = { - .tx_total_len = -1, - .user_call_ID = 0, - .abort_code = 0, - .command = RXRPC_CMD_SEND_DATA, - .exclusive = false, - .upgrade = true, + .call.tx_total_len = -1, + .call.user_call_ID = 0, + .call.nr_timeouts = 0, + .abort_code = 0, + .command = RXRPC_CMD_SEND_DATA, + .exclusive = false, + .upgrade = false, }; _enter(""); @@ -615,15 +619,15 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) ret = -EINVAL; if (rx->sk.sk_state != RXRPC_SERVER_LISTENING) goto error_release_sock; - call = rxrpc_accept_call(rx, p.user_call_ID, NULL); + call = rxrpc_accept_call(rx, p.call.user_call_ID, NULL); /* The socket is now unlocked. */ if (IS_ERR(call)) return PTR_ERR(call); - rxrpc_put_call(call, rxrpc_call_put); - return 0; + ret = 0; + goto out_put_unlock; } - call = rxrpc_find_call_by_user_ID(rx, p.user_call_ID); + call = rxrpc_find_call_by_user_ID(rx, p.call.user_call_ID); if (!call) { ret = -EBADSLT; if (p.command != RXRPC_CMD_SEND_DATA) @@ -653,14 +657,39 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) goto error_put; } - if (p.tx_total_len != -1) { + if (p.call.tx_total_len != -1) { ret = -EINVAL; if (call->tx_total_len != -1 || call->tx_pending || call->tx_top != 0) goto error_put; - call->tx_total_len = p.tx_total_len; + call->tx_total_len = p.call.tx_total_len; + } + } + + switch (p.call.nr_timeouts) { + case 3: + j = msecs_to_jiffies(p.call.timeouts.normal); + if (p.call.timeouts.normal > 0 && j == 0) + j = 1; + WRITE_ONCE(call->next_rx_timo, j); + /* Fall through */ + case 2: + j = msecs_to_jiffies(p.call.timeouts.idle); + if (p.call.timeouts.idle > 0 && j == 0) + j = 1; + WRITE_ONCE(call->next_req_timo, j); + /* Fall through */ + case 1: + if (p.call.timeouts.hard > 0) { + j = msecs_to_jiffies(p.call.timeouts.hard); + now = jiffies; + j += now; + WRITE_ONCE(call->expect_term_by, j); + rxrpc_reduce_call_timer(call, j, now, + rxrpc_timer_set_for_hard); } + break; } state = READ_ONCE(call->state); @@ -689,6 +718,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) ret = rxrpc_send_data(rx, call, msg, len, NULL); } +out_put_unlock: mutex_unlock(&call->user_mutex); error_put: rxrpc_put_call(call, rxrpc_call_put); diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c index 34c706d2f79c..4a7af7aff37d 100644 --- a/net/rxrpc/sysctl.c +++ b/net/rxrpc/sysctl.c @@ -21,6 +21,8 @@ static const unsigned int four = 4; static const unsigned int thirtytwo = 32; static const unsigned int n_65535 = 65535; static const unsigned int n_max_acks = RXRPC_RXTX_BUFF_SIZE - 1; +static const unsigned long one_jiffy = 1; +static const unsigned long max_jiffies = MAX_JIFFY_OFFSET; /* * RxRPC operating parameters. @@ -29,64 +31,60 @@ static const unsigned int n_max_acks = RXRPC_RXTX_BUFF_SIZE - 1; * information on the individual parameters. */ static struct ctl_table rxrpc_sysctl_table[] = { - /* Values measured in milliseconds */ + /* Values measured in milliseconds but used in jiffies */ { .procname = "req_ack_delay", .data = &rxrpc_requested_ack_delay, - .maxlen = sizeof(unsigned int), + .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = proc_dointvec, - .extra1 = (void *)&zero, + .proc_handler = proc_doulongvec_ms_jiffies_minmax, + .extra1 = (void *)&one_jiffy, + .extra2 = (void *)&max_jiffies, }, { .procname = "soft_ack_delay", .data = &rxrpc_soft_ack_delay, - .maxlen = sizeof(unsigned int), + .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = proc_dointvec, - .extra1 = (void *)&one, + .proc_handler = proc_doulongvec_ms_jiffies_minmax, + .extra1 = (void *)&one_jiffy, + .extra2 = (void *)&max_jiffies, }, { .procname = "idle_ack_delay", .data = &rxrpc_idle_ack_delay, - .maxlen = sizeof(unsigned int), + .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = proc_dointvec, - .extra1 = (void *)&one, - }, - { - .procname = "resend_timeout", - .data = &rxrpc_resend_timeout, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - .extra1 = (void *)&one, + .proc_handler = proc_doulongvec_ms_jiffies_minmax, + .extra1 = (void *)&one_jiffy, + .extra2 = (void *)&max_jiffies, }, { .procname = "idle_conn_expiry", .data = &rxrpc_conn_idle_client_expiry, - .maxlen = sizeof(unsigned int), + .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = proc_dointvec_ms_jiffies, - .extra1 = (void *)&one, + .proc_handler = proc_doulongvec_ms_jiffies_minmax, + .extra1 = (void *)&one_jiffy, + .extra2 = (void *)&max_jiffies, }, { .procname = "idle_conn_fast_expiry", .data = &rxrpc_conn_idle_client_fast_expiry, - .maxlen = sizeof(unsigned int), + .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = proc_dointvec_ms_jiffies, - .extra1 = (void *)&one, + .proc_handler = proc_doulongvec_ms_jiffies_minmax, + .extra1 = (void *)&one_jiffy, + .extra2 = (void *)&max_jiffies, }, - - /* Values measured in seconds but used in jiffies */ { - .procname = "max_call_lifetime", - .data = &rxrpc_max_call_lifetime, - .maxlen = sizeof(unsigned int), + .procname = "resend_timeout", + .data = &rxrpc_resend_timeout, + .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = proc_dointvec, - .extra1 = (void *)&one, + .proc_handler = proc_doulongvec_ms_jiffies_minmax, + .extra1 = (void *)&one_jiffy, + .extra2 = (void *)&max_jiffies, }, /* Non-time values */ diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 1c40caadcff9..d836f998117b 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -229,6 +229,9 @@ static int tcf_csum_ipv4_udp(struct sk_buff *skb, unsigned int ihl, const struct iphdr *iph; u16 ul; + if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP) + return 1; + /* * Support both UDP and UDPLITE checksum algorithms, Don't use * udph->len to get the real length without any protocol check, @@ -282,6 +285,9 @@ static int tcf_csum_ipv6_udp(struct sk_buff *skb, unsigned int ihl, const struct ipv6hdr *ip6h; u16 ul; + if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP) + return 1; + /* * Support both UDP and UDPLITE checksum algorithms, Don't use * udph->len to get the real length without any protocol check, diff --git a/net/sched/act_meta_mark.c b/net/sched/act_meta_mark.c index 1e3f10e5da99..6445184b2759 100644 --- a/net/sched/act_meta_mark.c +++ b/net/sched/act_meta_mark.c @@ -22,7 +22,6 @@ #include <net/pkt_sched.h> #include <uapi/linux/tc_act/tc_ife.h> #include <net/tc_act/tc_ife.h> -#include <linux/rtnetlink.h> static int skbmark_encode(struct sk_buff *skb, void *skbdata, struct tcf_meta_info *e) diff --git a/net/sched/act_meta_skbtcindex.c b/net/sched/act_meta_skbtcindex.c index 2ea1f26c9e96..7221437ca3a6 100644 --- a/net/sched/act_meta_skbtcindex.c +++ b/net/sched/act_meta_skbtcindex.c @@ -22,7 +22,6 @@ #include <net/pkt_sched.h> #include <uapi/linux/tc_act/tc_ife.h> #include <net/tc_act/tc_ife.h> -#include <linux/rtnetlink.h> static int skbtcindex_encode(struct sk_buff *skb, void *skbdata, struct tcf_meta_info *e) diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 8b5abcd2f32f..9438969290a6 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -96,23 +96,16 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, return ret; } -static void tcf_sample_cleanup_rcu(struct rcu_head *rcu) +static void tcf_sample_cleanup(struct tc_action *a, int bind) { - struct tcf_sample *s = container_of(rcu, struct tcf_sample, rcu); + struct tcf_sample *s = to_sample(a); struct psample_group *psample_group; - psample_group = rcu_dereference_protected(s->psample_group, 1); + psample_group = rtnl_dereference(s->psample_group); RCU_INIT_POINTER(s->psample_group, NULL); psample_group_put(psample_group); } -static void tcf_sample_cleanup(struct tc_action *a, int bind) -{ - struct tcf_sample *s = to_sample(a); - - call_rcu(&s->rcu, tcf_sample_cleanup_rcu); -} - static bool tcf_sample_dev_ok_push(struct net_device *dev) { switch (dev->type) { @@ -264,7 +257,6 @@ static int __init sample_init_module(void) static void __exit sample_cleanup_module(void) { - rcu_barrier(); tcf_unregister_action(&act_sample_ops, &sample_net_ops); } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index ab255b421781..f40256a3e7f0 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -23,7 +23,6 @@ #include <linux/skbuff.h> #include <linux/init.h> #include <linux/kmod.h> -#include <linux/err.h> #include <linux/slab.h> #include <net/net_namespace.h> #include <net/sock.h> @@ -205,13 +204,14 @@ static void tcf_chain_head_change(struct tcf_chain *chain, static void tcf_chain_flush(struct tcf_chain *chain) { - struct tcf_proto *tp; + struct tcf_proto *tp = rtnl_dereference(chain->filter_chain); tcf_chain_head_change(chain, NULL); - while ((tp = rtnl_dereference(chain->filter_chain)) != NULL) { + while (tp) { RCU_INIT_POINTER(chain->filter_chain, tp->next); - tcf_chain_put(chain); tcf_proto_destroy(tp); + tp = rtnl_dereference(chain->filter_chain); + tcf_chain_put(chain); } } @@ -335,7 +335,8 @@ static void tcf_block_put_final(struct work_struct *work) struct tcf_chain *chain, *tmp; rtnl_lock(); - /* Only chain 0 should be still here. */ + + /* At this point, all the chains should have refcnt == 1. */ list_for_each_entry_safe(chain, tmp, &block->chain_list, list) tcf_chain_put(chain); rtnl_unlock(); @@ -343,15 +344,21 @@ static void tcf_block_put_final(struct work_struct *work) } /* XXX: Standalone actions are not allowed to jump to any chain, and bound - * actions should be all removed after flushing. However, filters are now - * destroyed in tc filter workqueue with RTNL lock, they can not race here. + * actions should be all removed after flushing. */ void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q, struct tcf_block_ext_info *ei) { - struct tcf_chain *chain, *tmp; + struct tcf_chain *chain; - list_for_each_entry_safe(chain, tmp, &block->chain_list, list) + /* Hold a refcnt for all chains, except 0, so that they don't disappear + * while we are iterating. + */ + list_for_each_entry(chain, &block->chain_list, list) + if (chain->index) + tcf_chain_hold(chain); + + list_for_each_entry(chain, &block->chain_list, list) tcf_chain_flush(chain); tcf_block_offload_unbind(block, q, ei); diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index fb680dafac5a..6fe798c2df1a 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -258,11 +258,8 @@ static int cls_bpf_init(struct tcf_proto *tp) return 0; } -static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog) +static void cls_bpf_free_parms(struct cls_bpf_prog *prog) { - tcf_exts_destroy(&prog->exts); - tcf_exts_put_net(&prog->exts); - if (cls_bpf_is_ebpf(prog)) bpf_prog_put(prog->filter); else @@ -270,6 +267,14 @@ static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog) kfree(prog->bpf_name); kfree(prog->bpf_ops); +} + +static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog) +{ + tcf_exts_destroy(&prog->exts); + tcf_exts_put_net(&prog->exts); + + cls_bpf_free_parms(prog); kfree(prog); } @@ -382,15 +387,13 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog, { struct bpf_prog *fp; char *name = NULL; + bool skip_sw; u32 bpf_fd; bpf_fd = nla_get_u32(tb[TCA_BPF_FD]); + skip_sw = gen_flags & TCA_CLS_FLAGS_SKIP_SW; - if (gen_flags & TCA_CLS_FLAGS_SKIP_SW) - fp = bpf_prog_get_type_dev(bpf_fd, BPF_PROG_TYPE_SCHED_CLS, - qdisc_dev(tp->q)); - else - fp = bpf_prog_get_type(bpf_fd, BPF_PROG_TYPE_SCHED_CLS); + fp = bpf_prog_get_type_dev(bpf_fd, BPF_PROG_TYPE_SCHED_CLS, skip_sw); if (IS_ERR(fp)) return PTR_ERR(fp); @@ -516,12 +519,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, goto errout_idr; ret = cls_bpf_offload(tp, prog, oldprog); - if (ret) { - if (!oldprog) - idr_remove_ext(&head->handle_idr, prog->handle); - __cls_bpf_delete_prog(prog); - return ret; - } + if (ret) + goto errout_parms; if (!tc_in_hw(prog->gen_flags)) prog->gen_flags |= TCA_CLS_FLAGS_NOT_IN_HW; @@ -539,6 +538,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, *arg = prog; return 0; +errout_parms: + cls_bpf_free_parms(prog); errout_idr: if (!oldprog) idr_remove_ext(&head->handle_idr, prog->handle); diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index ac152b4f4247..507859cdd1cb 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -45,7 +45,6 @@ #include <net/netlink.h> #include <net/act_api.h> #include <net/pkt_cls.h> -#include <linux/netdevice.h> #include <linux/idr.h> struct tc_u_knode { diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 6361be7881f1..525eb3a6d625 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1158,9 +1158,13 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL) return -EINVAL; + err = tcf_block_get(&q->link.block, &q->link.filter_list, sch); + if (err) + goto put_rtab; + err = qdisc_class_hash_init(&q->clhash); if (err < 0) - goto put_rtab; + goto put_block; q->link.sibling = &q->link; q->link.common.classid = sch->handle; @@ -1194,6 +1198,9 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) cbq_addprio(q, &q->link); return 0; +put_block: + tcf_block_put(q->link.block); + put_rtab: qdisc_put_rtab(q->link.R_tab); return err; diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index b30a2c70bd48..531250fceb9e 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -369,6 +369,9 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt) ctl = nla_data(tb[TCA_CHOKE_PARMS]); + if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog)) + return -EINVAL; + if (ctl->limit > CHOKE_MAX_QUEUE) return -EINVAL; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 3839cbbdc32b..cd1b200acae7 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -26,6 +26,7 @@ #include <linux/list.h> #include <linux/slab.h> #include <linux/if_vlan.h> +#include <linux/if_macvlan.h> #include <net/sch_generic.h> #include <net/pkt_sched.h> #include <net/dst.h> @@ -277,6 +278,8 @@ unsigned long dev_trans_start(struct net_device *dev) if (is_vlan_dev(dev)) dev = vlan_dev_real_dev(dev); + else if (netif_is_macvlan(dev)) + dev = macvlan_dev_real_dev(dev); res = netdev_get_tx_queue(dev, 0)->trans_start; for (i = 1; i < dev->num_tx_queues; i++) { val = netdev_get_tx_queue(dev, i)->trans_start; diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 17c7130454bd..bc30f9186ac6 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -356,6 +356,9 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp, struct gred_sched *table = qdisc_priv(sch); struct gred_sched_data *q = table->tab[dp]; + if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog)) + return -EINVAL; + if (!q) { table->tab[dp] = q = *prealloc; *prealloc = NULL; diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 7f8ea9e297c3..9d874e60e032 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -212,6 +212,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt) max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0; ctl = nla_data(tb[TCA_RED_PARMS]); + if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog)) + return -EINVAL; if (ctl->limit > 0) { child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit); diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 890f4a4564e7..930e5bd26d3d 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -639,6 +639,9 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) if (ctl->divisor && (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536)) return -EINVAL; + if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max, + ctl_v1->Wlog)) + return -EINVAL; if (ctl_v1 && ctl_v1->qth_min) { p = kmalloc(sizeof(*p), GFP_KERNEL); if (!p) @@ -724,6 +727,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) int i; int err; + q->sch = sch; timer_setup(&q->perturb_timer, sfq_perturbation, TIMER_DEFERRABLE); err = tcf_block_get(&q->block, &q->filter_list, sch); diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 7b261afc47b9..7f8baa48e7c2 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -53,6 +53,7 @@ static void sctp_datamsg_init(struct sctp_datamsg *msg) msg->send_failed = 0; msg->send_error = 0; msg->can_delay = 1; + msg->abandoned = 0; msg->expires_at = 0; INIT_LIST_HEAD(&msg->chunks); } @@ -304,6 +305,13 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk) if (!chunk->asoc->peer.prsctp_capable) return 0; + if (chunk->msg->abandoned) + return 1; + + if (!chunk->has_tsn && + !(chunk->chunk_hdr->flags & SCTP_DATA_FIRST_FRAG)) + return 0; + if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) && time_after(jiffies, chunk->msg->expires_at)) { struct sctp_stream_out *streamout = @@ -316,6 +324,7 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk) chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++; streamout->ext->abandoned_unsent[SCTP_PR_INDEX(TTL)]++; } + chunk->msg->abandoned = 1; return 1; } else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) && chunk->sent_count > chunk->sinfo.sinfo_timetolive) { @@ -324,10 +333,12 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk) chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++; streamout->ext->abandoned_sent[SCTP_PR_INDEX(RTX)]++; + chunk->msg->abandoned = 1; return 1; } else if (!SCTP_PR_POLICY(chunk->sinfo.sinfo_flags) && chunk->msg->expires_at && time_after(jiffies, chunk->msg->expires_at)) { + chunk->msg->abandoned = 1; return 1; } /* PRIO policy is processed by sendmsg, not here */ diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 4db012aa25f7..7d67feeeffc1 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -364,10 +364,12 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc, list_for_each_entry_safe(chk, temp, queue, transmitted_list) { struct sctp_stream_out *streamout; - if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || - chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive) + if (!chk->msg->abandoned && + (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || + chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)) continue; + chk->msg->abandoned = 1; list_del_init(&chk->transmitted_list); sctp_insert_list(&asoc->outqueue.abandoned, &chk->transmitted_list); @@ -377,7 +379,8 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc, asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++; streamout->ext->abandoned_sent[SCTP_PR_INDEX(PRIO)]++; - if (!chk->tsn_gap_acked) { + if (queue != &asoc->outqueue.retransmit && + !chk->tsn_gap_acked) { if (chk->transport) chk->transport->flight_size -= sctp_data_size(chk); @@ -403,10 +406,13 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc, q->sched->unsched_all(&asoc->stream); list_for_each_entry_safe(chk, temp, &q->out_chunk_list, list) { - if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || - chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive) + if (!chk->msg->abandoned && + (!(chk->chunk_hdr->flags & SCTP_DATA_FIRST_FRAG) || + !SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || + chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)) continue; + chk->msg->abandoned = 1; sctp_sched_dequeue_common(q, chk); asoc->sent_cnt_removable--; asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; @@ -1434,7 +1440,8 @@ static void sctp_check_transmitted(struct sctp_outq *q, /* If this chunk has not been acked, stop * considering it as 'outstanding'. */ - if (!tchunk->tsn_gap_acked) { + if (transmitted_queue != &q->retransmit && + !tchunk->tsn_gap_acked) { if (tchunk->transport) tchunk->transport->flight_size -= sctp_data_size(tchunk); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index f5172c21349b..6a38c2503649 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1499,6 +1499,7 @@ static __init int sctp_init(void) INIT_LIST_HEAD(&sctp_address_families); sctp_v4_pf_init(); sctp_v6_pf_init(); + sctp_sched_ops_init(); status = register_pernet_subsys(&sctp_defaults_ops); if (status) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 3204a9b29407..3253f724a995 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -188,13 +188,13 @@ static void sctp_for_each_tx_datachunk(struct sctp_association *asoc, list_for_each_entry(chunk, &t->transmitted, transmitted_list) cb(chunk); - list_for_each_entry(chunk, &q->retransmit, list) + list_for_each_entry(chunk, &q->retransmit, transmitted_list) cb(chunk); - list_for_each_entry(chunk, &q->sacked, list) + list_for_each_entry(chunk, &q->sacked, transmitted_list) cb(chunk); - list_for_each_entry(chunk, &q->abandoned, list) + list_for_each_entry(chunk, &q->abandoned, transmitted_list) cb(chunk); list_for_each_entry(chunk, &q->out_chunk_list, list) @@ -3891,13 +3891,17 @@ static int sctp_setsockopt_reset_streams(struct sock *sk, struct sctp_association *asoc; int retval = -EINVAL; - if (optlen < sizeof(struct sctp_reset_streams)) + if (optlen < sizeof(*params)) return -EINVAL; params = memdup_user(optval, optlen); if (IS_ERR(params)) return PTR_ERR(params); + if (params->srs_number_streams * sizeof(__u16) > + optlen - sizeof(*params)) + goto out; + asoc = sctp_id2assoc(sk, params->srs_assoc_id); if (!asoc) goto out; @@ -5080,7 +5084,6 @@ static int sctp_getsockopt_peeloff_common(struct sock *sk, sctp_peeloff_arg_t *p *newfile = sock_alloc_file(newsock, 0, NULL); if (IS_ERR(*newfile)) { put_unused_fd(retval); - sock_release(newsock); retval = PTR_ERR(*newfile); *newfile = NULL; return retval; diff --git a/net/sctp/stream.c b/net/sctp/stream.c index a11db21dc8a0..76ea66be0bbe 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -64,7 +64,7 @@ static void sctp_stream_outq_migrate(struct sctp_stream *stream, */ /* Mark as failed send. */ - sctp_chunk_fail(ch, SCTP_ERROR_INV_STRM); + sctp_chunk_fail(ch, (__force __u32)SCTP_ERROR_INV_STRM); if (asoc->peer.prsctp_capable && SCTP_PR_PRIO_ENABLED(ch->sinfo.sinfo_flags)) asoc->sent_cnt_removable--; @@ -254,6 +254,30 @@ static int sctp_send_reconf(struct sctp_association *asoc, return retval; } +static bool sctp_stream_outq_is_empty(struct sctp_stream *stream, + __u16 str_nums, __be16 *str_list) +{ + struct sctp_association *asoc; + __u16 i; + + asoc = container_of(stream, struct sctp_association, stream); + if (!asoc->outqueue.out_qlen) + return true; + + if (!str_nums) + return false; + + for (i = 0; i < str_nums; i++) { + __u16 sid = ntohs(str_list[i]); + + if (stream->out[sid].ext && + !list_empty(&stream->out[sid].ext->outq)) + return false; + } + + return true; +} + int sctp_send_reset_streams(struct sctp_association *asoc, struct sctp_reset_streams *params) { @@ -317,6 +341,11 @@ int sctp_send_reset_streams(struct sctp_association *asoc, for (i = 0; i < str_nums; i++) nstr_list[i] = htons(str_list[i]); + if (out && !sctp_stream_outq_is_empty(stream, str_nums, nstr_list)) { + retval = -EAGAIN; + goto out; + } + chunk = sctp_make_strreset_req(asoc, str_nums, nstr_list, out, in); kfree(nstr_list); @@ -377,6 +406,9 @@ int sctp_send_reset_assoc(struct sctp_association *asoc) if (asoc->strreset_outstanding) return -EINPROGRESS; + if (!sctp_outq_is_empty(&asoc->outqueue)) + return -EAGAIN; + chunk = sctp_make_strreset_tsnreq(asoc); if (!chunk) return -ENOMEM; @@ -563,7 +595,7 @@ struct sctp_chunk *sctp_process_strreset_outreq( flags = SCTP_STREAM_RESET_INCOMING_SSN; } - nums = (ntohs(param.p->length) - sizeof(*outreq)) / 2; + nums = (ntohs(param.p->length) - sizeof(*outreq)) / sizeof(__u16); if (nums) { str_p = outreq->list_of_streams; for (i = 0; i < nums; i++) { @@ -627,7 +659,7 @@ struct sctp_chunk *sctp_process_strreset_inreq( goto out; } - nums = (ntohs(param.p->length) - sizeof(*inreq)) / 2; + nums = (ntohs(param.p->length) - sizeof(*inreq)) / sizeof(__u16); str_p = inreq->list_of_streams; for (i = 0; i < nums; i++) { if (ntohs(str_p[i]) >= stream->outcnt) { @@ -636,6 +668,12 @@ struct sctp_chunk *sctp_process_strreset_inreq( } } + if (!sctp_stream_outq_is_empty(stream, nums, str_p)) { + result = SCTP_STRRESET_IN_PROGRESS; + asoc->strreset_inseq--; + goto err; + } + chunk = sctp_make_strreset_req(asoc, nums, str_p, 1, 0); if (!chunk) goto out; @@ -687,12 +725,18 @@ struct sctp_chunk *sctp_process_strreset_tsnreq( i = asoc->strreset_inseq - request_seq - 1; result = asoc->strreset_result[i]; if (result == SCTP_STRRESET_PERFORMED) { - next_tsn = asoc->next_tsn; + next_tsn = asoc->ctsn_ack_point + 1; init_tsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map) + 1; } goto err; } + + if (!sctp_outq_is_empty(&asoc->outqueue)) { + result = SCTP_STRRESET_IN_PROGRESS; + goto err; + } + asoc->strreset_inseq++; if (!(asoc->strreset_enable & SCTP_ENABLE_RESET_ASSOC_REQ)) @@ -703,9 +747,10 @@ struct sctp_chunk *sctp_process_strreset_tsnreq( goto out; } - /* G3: The same processing as though a SACK chunk with no gap report - * and a cumulative TSN ACK of the Sender's Next TSN minus 1 were - * received MUST be performed. + /* G4: The same processing as though a FWD-TSN chunk (as defined in + * [RFC3758]) with all streams affected and a new cumulative TSN + * ACK of the Receiver's Next TSN minus 1 were received MUST be + * performed. */ max_tsn_seen = sctp_tsnmap_get_max_tsn_seen(&asoc->peer.tsn_map); sctp_ulpq_reasm_flushtsn(&asoc->ulpq, max_tsn_seen); @@ -720,10 +765,9 @@ struct sctp_chunk *sctp_process_strreset_tsnreq( sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL, init_tsn, GFP_ATOMIC); - /* G4: The same processing as though a FWD-TSN chunk (as defined in - * [RFC3758]) with all streams affected and a new cumulative TSN - * ACK of the Receiver's Next TSN minus 1 were received MUST be - * performed. + /* G3: The same processing as though a SACK chunk with no gap report + * and a cumulative TSN ACK of the Sender's Next TSN minus 1 were + * received MUST be performed. */ sctp_outq_free(&asoc->outqueue); @@ -927,7 +971,8 @@ struct sctp_chunk *sctp_process_strreset_resp( outreq = (struct sctp_strreset_outreq *)req; str_p = outreq->list_of_streams; - nums = (ntohs(outreq->param_hdr.length) - sizeof(*outreq)) / 2; + nums = (ntohs(outreq->param_hdr.length) - sizeof(*outreq)) / + sizeof(__u16); if (result == SCTP_STRRESET_PERFORMED) { if (nums) { @@ -956,7 +1001,8 @@ struct sctp_chunk *sctp_process_strreset_resp( inreq = (struct sctp_strreset_inreq *)req; str_p = inreq->list_of_streams; - nums = (ntohs(inreq->param_hdr.length) - sizeof(*inreq)) / 2; + nums = (ntohs(inreq->param_hdr.length) - sizeof(*inreq)) / + sizeof(__u16); *evp = sctp_ulpevent_make_stream_reset_event(asoc, flags, nums, str_p, GFP_ATOMIC); @@ -975,6 +1021,7 @@ struct sctp_chunk *sctp_process_strreset_resp( if (result == SCTP_STRRESET_PERFORMED) { __u32 mtsn = sctp_tsnmap_get_max_tsn_seen( &asoc->peer.tsn_map); + LIST_HEAD(temp); sctp_ulpq_reasm_flushtsn(&asoc->ulpq, mtsn); sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC); @@ -983,7 +1030,13 @@ struct sctp_chunk *sctp_process_strreset_resp( SCTP_TSN_MAP_INITIAL, stsn, GFP_ATOMIC); + /* Clean up sacked and abandoned queues only. As the + * out_chunk_list may not be empty, splice it to temp, + * then get it back after sctp_outq_free is done. + */ + list_splice_init(&asoc->outqueue.out_chunk_list, &temp); sctp_outq_free(&asoc->outqueue); + list_splice_init(&temp, &asoc->outqueue.out_chunk_list); asoc->next_tsn = rtsn; asoc->ctsn_ack_point = asoc->next_tsn - 1; diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c index 0b83ec51e43b..d8c162a4089c 100644 --- a/net/sctp/stream_sched.c +++ b/net/sctp/stream_sched.c @@ -119,16 +119,27 @@ static struct sctp_sched_ops sctp_sched_fcfs = { .unsched_all = sctp_sched_fcfs_unsched_all, }; +static void sctp_sched_ops_fcfs_init(void) +{ + sctp_sched_ops_register(SCTP_SS_FCFS, &sctp_sched_fcfs); +} + /* API to other parts of the stack */ -extern struct sctp_sched_ops sctp_sched_prio; -extern struct sctp_sched_ops sctp_sched_rr; +static struct sctp_sched_ops *sctp_sched_ops[SCTP_SS_MAX + 1]; -static struct sctp_sched_ops *sctp_sched_ops[] = { - &sctp_sched_fcfs, - &sctp_sched_prio, - &sctp_sched_rr, -}; +void sctp_sched_ops_register(enum sctp_sched_type sched, + struct sctp_sched_ops *sched_ops) +{ + sctp_sched_ops[sched] = sched_ops; +} + +void sctp_sched_ops_init(void) +{ + sctp_sched_ops_fcfs_init(); + sctp_sched_ops_prio_init(); + sctp_sched_ops_rr_init(); +} int sctp_sched_set_sched(struct sctp_association *asoc, enum sctp_sched_type sched) diff --git a/net/sctp/stream_sched_prio.c b/net/sctp/stream_sched_prio.c index 384dbf3c8760..7997d35dd0fd 100644 --- a/net/sctp/stream_sched_prio.c +++ b/net/sctp/stream_sched_prio.c @@ -333,7 +333,7 @@ static void sctp_sched_prio_unsched_all(struct sctp_stream *stream) sctp_sched_prio_unsched(soute); } -struct sctp_sched_ops sctp_sched_prio = { +static struct sctp_sched_ops sctp_sched_prio = { .set = sctp_sched_prio_set, .get = sctp_sched_prio_get, .init = sctp_sched_prio_init, @@ -345,3 +345,8 @@ struct sctp_sched_ops sctp_sched_prio = { .sched_all = sctp_sched_prio_sched_all, .unsched_all = sctp_sched_prio_unsched_all, }; + +void sctp_sched_ops_prio_init(void) +{ + sctp_sched_ops_register(SCTP_SS_PRIO, &sctp_sched_prio); +} diff --git a/net/sctp/stream_sched_rr.c b/net/sctp/stream_sched_rr.c index 7612a438c5b9..1155692448f1 100644 --- a/net/sctp/stream_sched_rr.c +++ b/net/sctp/stream_sched_rr.c @@ -187,7 +187,7 @@ static void sctp_sched_rr_unsched_all(struct sctp_stream *stream) sctp_sched_rr_unsched(stream, soute); } -struct sctp_sched_ops sctp_sched_rr = { +static struct sctp_sched_ops sctp_sched_rr = { .set = sctp_sched_rr_set, .get = sctp_sched_rr_get, .init = sctp_sched_rr_init, @@ -199,3 +199,8 @@ struct sctp_sched_ops sctp_sched_rr = { .sched_all = sctp_sched_rr_sched_all, .unsched_all = sctp_sched_rr_unsched_all, }; + +void sctp_sched_ops_rr_init(void) +{ + sctp_sched_ops_register(SCTP_SS_RR, &sctp_sched_rr); +} diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 2578fbd95664..94f21116dac5 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -562,7 +562,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_rmb) { struct smc_connection *conn = &smc->conn; struct smc_link_group *lgr = conn->lgr; - struct smc_buf_desc *buf_desc = NULL; + struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM); struct list_head *buf_list; int bufsize, bufsize_short; int sk_buf_size; @@ -575,7 +575,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_rmb) /* use socket send buffer size (w/o overhead) as start value */ sk_buf_size = smc->sk.sk_sndbuf / 2; - for (bufsize_short = smc_compress_bufsize(smc->sk.sk_sndbuf / 2); + for (bufsize_short = smc_compress_bufsize(sk_buf_size); bufsize_short >= 0; bufsize_short--) { if (is_rmb) { diff --git a/net/socket.c b/net/socket.c index 42d8e9c9ccd5..05f361faec45 100644 --- a/net/socket.c +++ b/net/socket.c @@ -406,8 +406,10 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname) name.len = strlen(name.name); } path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name); - if (unlikely(!path.dentry)) + if (unlikely(!path.dentry)) { + sock_release(sock); return ERR_PTR(-ENOMEM); + } path.mnt = mntget(sock_mnt); d_instantiate(path.dentry, SOCK_INODE(sock)); @@ -415,9 +417,11 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname) file = alloc_file(&path, FMODE_READ | FMODE_WRITE, &socket_file_ops); if (IS_ERR(file)) { - /* drop dentry, keep inode */ + /* drop dentry, keep inode for a bit */ ihold(d_inode(path.dentry)); path_put(&path); + /* ... and now kill it properly */ + sock_release(sock); return file; } @@ -1330,19 +1334,9 @@ SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol) retval = sock_create(family, type, protocol, &sock); if (retval < 0) - goto out; - - retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK)); - if (retval < 0) - goto out_release; - -out: - /* It may be already another descriptor 8) Not kernel problem. */ - return retval; + return retval; -out_release: - sock_release(sock); - return retval; + return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK)); } /* @@ -1366,87 +1360,72 @@ SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol, flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; /* + * reserve descriptors and make sure we won't fail + * to return them to userland. + */ + fd1 = get_unused_fd_flags(flags); + if (unlikely(fd1 < 0)) + return fd1; + + fd2 = get_unused_fd_flags(flags); + if (unlikely(fd2 < 0)) { + put_unused_fd(fd1); + return fd2; + } + + err = put_user(fd1, &usockvec[0]); + if (err) + goto out; + + err = put_user(fd2, &usockvec[1]); + if (err) + goto out; + + /* * Obtain the first socket and check if the underlying protocol * supports the socketpair call. */ err = sock_create(family, type, protocol, &sock1); - if (err < 0) + if (unlikely(err < 0)) goto out; err = sock_create(family, type, protocol, &sock2); - if (err < 0) - goto out_release_1; - - err = sock1->ops->socketpair(sock1, sock2); - if (err < 0) - goto out_release_both; - - fd1 = get_unused_fd_flags(flags); - if (unlikely(fd1 < 0)) { - err = fd1; - goto out_release_both; + if (unlikely(err < 0)) { + sock_release(sock1); + goto out; } - fd2 = get_unused_fd_flags(flags); - if (unlikely(fd2 < 0)) { - err = fd2; - goto out_put_unused_1; + err = sock1->ops->socketpair(sock1, sock2); + if (unlikely(err < 0)) { + sock_release(sock2); + sock_release(sock1); + goto out; } newfile1 = sock_alloc_file(sock1, flags, NULL); if (IS_ERR(newfile1)) { err = PTR_ERR(newfile1); - goto out_put_unused_both; + sock_release(sock2); + goto out; } newfile2 = sock_alloc_file(sock2, flags, NULL); if (IS_ERR(newfile2)) { err = PTR_ERR(newfile2); - goto out_fput_1; + fput(newfile1); + goto out; } - err = put_user(fd1, &usockvec[0]); - if (err) - goto out_fput_both; - - err = put_user(fd2, &usockvec[1]); - if (err) - goto out_fput_both; - audit_fd_pair(fd1, fd2); fd_install(fd1, newfile1); fd_install(fd2, newfile2); - /* fd1 and fd2 may be already another descriptors. - * Not kernel problem. - */ - return 0; -out_fput_both: - fput(newfile2); - fput(newfile1); - put_unused_fd(fd2); - put_unused_fd(fd1); - goto out; - -out_fput_1: - fput(newfile1); - put_unused_fd(fd2); - put_unused_fd(fd1); - sock_release(sock2); - goto out; - -out_put_unused_both: +out: put_unused_fd(fd2); -out_put_unused_1: put_unused_fd(fd1); -out_release_both: - sock_release(sock2); -out_release_1: - sock_release(sock1); -out: return err; } @@ -1562,7 +1541,6 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, if (IS_ERR(newfile)) { err = PTR_ERR(newfile); put_unused_fd(newfd); - sock_release(newsock); goto out_put; } diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 7b1ee5a0b03c..5dd4e6c9fef2 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -264,7 +264,7 @@ out: return status; } -static struct cache_detail rsi_cache_template = { +static const struct cache_detail rsi_cache_template = { .owner = THIS_MODULE, .hash_size = RSI_HASHMAX, .name = "auth.rpcsec.init", @@ -524,7 +524,7 @@ out: return status; } -static struct cache_detail rsc_cache_template = { +static const struct cache_detail rsc_cache_template = { .owner = THIS_MODULE, .hash_size = RSC_HASHMAX, .name = "auth.rpcsec.context", @@ -855,11 +855,13 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g return stat; if (integ_len > buf->len) return stat; - if (xdr_buf_subsegment(buf, &integ_buf, 0, integ_len)) - BUG(); + if (xdr_buf_subsegment(buf, &integ_buf, 0, integ_len)) { + WARN_ON_ONCE(1); + return stat; + } /* copy out mic... */ if (read_u32_from_xdr_buf(buf, integ_len, &mic.len)) - BUG(); + return stat; if (mic.len > RPC_MAX_AUTH_SIZE) return stat; mic.data = kmalloc(mic.len, GFP_KERNEL); @@ -1611,8 +1613,10 @@ svcauth_gss_wrap_resp_integ(struct svc_rqst *rqstp) BUG_ON(integ_len % 4); *p++ = htonl(integ_len); *p++ = htonl(gc->gc_seq); - if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, integ_len)) - BUG(); + if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, integ_len)) { + WARN_ON_ONCE(1); + goto out_err; + } if (resbuf->tail[0].iov_base == NULL) { if (resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE > PAGE_SIZE) goto out_err; diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 79d55d949d9a..e68943895be4 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1674,7 +1674,7 @@ void cache_unregister_net(struct cache_detail *cd, struct net *net) } EXPORT_SYMBOL_GPL(cache_unregister_net); -struct cache_detail *cache_create_net(struct cache_detail *tmpl, struct net *net) +struct cache_detail *cache_create_net(const struct cache_detail *tmpl, struct net *net) { struct cache_detail *cd; int i; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index a801da812f86..e2a4184f3c5d 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1841,6 +1841,7 @@ call_bind_status(struct rpc_task *task) case -ECONNABORTED: case -ENOTCONN: case -EHOSTDOWN: + case -ENETDOWN: case -EHOSTUNREACH: case -ENETUNREACH: case -ENOBUFS: @@ -1917,6 +1918,7 @@ call_connect_status(struct rpc_task *task) /* fall through */ case -ECONNRESET: case -ECONNABORTED: + case -ENETDOWN: case -ENETUNREACH: case -EHOSTUNREACH: case -EADDRINUSE: @@ -2022,6 +2024,7 @@ call_transmit_status(struct rpc_task *task) */ case -ECONNREFUSED: case -EHOSTDOWN: + case -ENETDOWN: case -EHOSTUNREACH: case -ENETUNREACH: case -EPERM: @@ -2071,6 +2074,7 @@ call_bc_transmit(struct rpc_task *task) switch (task->tk_status) { case 0: /* Success */ + case -ENETDOWN: case -EHOSTDOWN: case -EHOSTUNREACH: case -ENETUNREACH: @@ -2139,6 +2143,7 @@ call_status(struct rpc_task *task) task->tk_status = 0; switch(status) { case -EHOSTDOWN: + case -ENETDOWN: case -EHOSTUNREACH: case -ENETUNREACH: case -EPERM: diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 71de77bd4423..f9307bd6644b 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -250,9 +250,9 @@ void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *new) svc_xprt_received(new); } -int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name, - struct net *net, const int family, - const unsigned short port, int flags) +static int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name, + struct net *net, const int family, + const unsigned short port, int flags) { struct svc_xprt_class *xcl; @@ -380,7 +380,6 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt) struct svc_pool *pool; struct svc_rqst *rqstp = NULL; int cpu; - bool queued = false; if (!svc_xprt_has_something_to_do(xprt)) goto out; @@ -401,58 +400,25 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt) atomic_long_inc(&pool->sp_stats.packets); -redo_search: + dprintk("svc: transport %p put into queue\n", xprt); + spin_lock_bh(&pool->sp_lock); + list_add_tail(&xprt->xpt_ready, &pool->sp_sockets); + pool->sp_stats.sockets_queued++; + spin_unlock_bh(&pool->sp_lock); + /* find a thread for this xprt */ rcu_read_lock(); list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) { - /* Do a lockless check first */ - if (test_bit(RQ_BUSY, &rqstp->rq_flags)) + if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags)) continue; - - /* - * Once the xprt has been queued, it can only be dequeued by - * the task that intends to service it. All we can do at that - * point is to try to wake this thread back up so that it can - * do so. - */ - if (!queued) { - spin_lock_bh(&rqstp->rq_lock); - if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags)) { - /* already busy, move on... */ - spin_unlock_bh(&rqstp->rq_lock); - continue; - } - - /* this one will do */ - rqstp->rq_xprt = xprt; - svc_xprt_get(xprt); - spin_unlock_bh(&rqstp->rq_lock); - } - rcu_read_unlock(); - atomic_long_inc(&pool->sp_stats.threads_woken); wake_up_process(rqstp->rq_task); - put_cpu(); - goto out; - } - rcu_read_unlock(); - - /* - * We didn't find an idle thread to use, so we need to queue the xprt. - * Do so and then search again. If we find one, we can't hook this one - * up to it directly but we can wake the thread up in the hopes that it - * will pick it up once it searches for a xprt to service. - */ - if (!queued) { - queued = true; - dprintk("svc: transport %p put into queue\n", xprt); - spin_lock_bh(&pool->sp_lock); - list_add_tail(&xprt->xpt_ready, &pool->sp_sockets); - pool->sp_stats.sockets_queued++; - spin_unlock_bh(&pool->sp_lock); - goto redo_search; + goto out_unlock; } + set_bit(SP_CONGESTED, &pool->sp_flags); rqstp = NULL; +out_unlock: + rcu_read_unlock(); put_cpu(); out: trace_svc_xprt_do_enqueue(xprt, rqstp); @@ -721,38 +687,25 @@ rqst_should_sleep(struct svc_rqst *rqstp) static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) { - struct svc_xprt *xprt; struct svc_pool *pool = rqstp->rq_pool; long time_left = 0; /* rq_xprt should be clear on entry */ WARN_ON_ONCE(rqstp->rq_xprt); - /* Normally we will wait up to 5 seconds for any required - * cache information to be provided. - */ - rqstp->rq_chandle.thread_wait = 5*HZ; - - xprt = svc_xprt_dequeue(pool); - if (xprt) { - rqstp->rq_xprt = xprt; - - /* As there is a shortage of threads and this request - * had to be queued, don't allow the thread to wait so - * long for cache updates. - */ - rqstp->rq_chandle.thread_wait = 1*HZ; - clear_bit(SP_TASK_PENDING, &pool->sp_flags); - return xprt; - } + rqstp->rq_xprt = svc_xprt_dequeue(pool); + if (rqstp->rq_xprt) + goto out_found; /* * We have to be able to interrupt this wait * to bring down the daemons ... */ set_current_state(TASK_INTERRUPTIBLE); + smp_mb__before_atomic(); + clear_bit(SP_CONGESTED, &pool->sp_flags); clear_bit(RQ_BUSY, &rqstp->rq_flags); - smp_mb(); + smp_mb__after_atomic(); if (likely(rqst_should_sleep(rqstp))) time_left = schedule_timeout(timeout); @@ -761,13 +714,11 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) try_to_freeze(); - spin_lock_bh(&rqstp->rq_lock); set_bit(RQ_BUSY, &rqstp->rq_flags); - spin_unlock_bh(&rqstp->rq_lock); - - xprt = rqstp->rq_xprt; - if (xprt != NULL) - return xprt; + smp_mb__after_atomic(); + rqstp->rq_xprt = svc_xprt_dequeue(pool); + if (rqstp->rq_xprt) + goto out_found; if (!time_left) atomic_long_inc(&pool->sp_stats.threads_timedout); @@ -775,6 +726,15 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) if (signalled() || kthread_should_stop()) return ERR_PTR(-EINTR); return ERR_PTR(-EAGAIN); +out_found: + /* Normally we will wait up to 5 seconds for any required + * cache information to be provided. + */ + if (!test_bit(SP_CONGESTED, &pool->sp_flags)) + rqstp->rq_chandle.thread_wait = 5*HZ; + else + rqstp->rq_chandle.thread_wait = 1*HZ; + return rqstp->rq_xprt; } static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt) @@ -785,7 +745,7 @@ static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt serv->sv_tmpcnt++; if (serv->sv_temptimer.function == NULL) { /* setup timer to age temp transports */ - serv->sv_temptimer.function = (TIMER_FUNC_TYPE)svc_age_temp_xprts; + serv->sv_temptimer.function = svc_age_temp_xprts; mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ); } diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index f81eaa8e0888..740b67d5a733 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -569,7 +569,7 @@ static int unix_gid_show(struct seq_file *m, return 0; } -static struct cache_detail unix_gid_cache_template = { +static const struct cache_detail unix_gid_cache_template = { .owner = THIS_MODULE, .hash_size = GID_HASHMAX, .name = "auth.unix.gid", @@ -862,7 +862,7 @@ struct auth_ops svcauth_unix = { .set_client = svcauth_unix_set_client, }; -static struct cache_detail ip_map_cache_template = { +static const struct cache_detail ip_map_cache_template = { .owner = THIS_MODULE, .hash_size = IP_HASHMAX, .name = "auth.unix.ip", diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index 992594b7cc6b..af7893501e40 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -133,6 +133,10 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma, if (ret) goto out_err; + /* Bump page refcnt so Send completion doesn't release + * the rq_buffer before all retransmits are complete. + */ + get_page(virt_to_page(rqst->rq_buffer)); ret = svc_rdma_post_send_wr(rdma, ctxt, 1, 0); if (ret) goto out_unmap; @@ -165,7 +169,6 @@ xprt_rdma_bc_allocate(struct rpc_task *task) return -EINVAL; } - /* svc_rdma_sendto releases this page */ page = alloc_page(RPCRDMA_DEF_GFP); if (!page) return -ENOMEM; @@ -184,6 +187,7 @@ xprt_rdma_bc_free(struct rpc_task *task) { struct rpc_rqst *rqst = task->tk_rqstp; + put_page(virt_to_page(rqst->rq_buffer)); kfree(rqst->rq_rbuffer); } diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 5caf8e722a11..46ec069150d5 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -290,6 +290,7 @@ static void qp_event_handler(struct ib_event *event, void *context) ib_event_msg(event->event), event->event, event->element.qp); set_bit(XPT_CLOSE, &xprt->xpt_flags); + svc_xprt_enqueue(xprt); break; } } @@ -322,8 +323,7 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); if (test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags)) goto out; - svc_xprt_enqueue(&xprt->sc_xprt); - goto out; + goto out_enqueue; flushed: if (wc->status != IB_WC_WR_FLUSH_ERR) @@ -333,6 +333,8 @@ flushed: set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); svc_rdma_put_context(ctxt, 1); +out_enqueue: + svc_xprt_enqueue(&xprt->sc_xprt); out: svc_xprt_put(&xprt->sc_xprt); } @@ -358,6 +360,7 @@ void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) if (unlikely(wc->status != IB_WC_SUCCESS)) { set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); + svc_xprt_enqueue(&xprt->sc_xprt); if (wc->status != IB_WC_WR_FLUSH_ERR) pr_err("svcrdma: Send: %s (%u/0x%x)\n", ib_wc_status_msg(wc->status), @@ -569,8 +572,10 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id, case RDMA_CM_EVENT_DEVICE_REMOVAL: dprintk("svcrdma: Device removal xprt=%p, cm_id=%p\n", xprt, cma_id); - if (xprt) + if (xprt) { set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); + svc_xprt_enqueue(&xprt->sc_xprt); + } break; default: diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 9cc850c2719e..6d0cc3b8f932 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2440,7 +2440,9 @@ static void xs_tcp_setup_socket(struct work_struct *work) */ case -ECONNREFUSED: case -ECONNRESET: + case -ENETDOWN: case -ENETUNREACH: + case -EHOSTUNREACH: case -EADDRINUSE: case -ENOBUFS: /* diff --git a/net/tipc/group.c b/net/tipc/group.c index 7821085a7dd8..95fec2c057d6 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -497,6 +497,7 @@ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq, while ((skb = skb_peek(defq))) { hdr = buf_msg(skb); mtyp = msg_type(hdr); + blks = msg_blocks(hdr); deliver = true; ack = false; update = false; @@ -539,14 +540,13 @@ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq, tipc_group_proto_xmit(grp, m, GRP_ACK_MSG, xmitq); if (leave) { - tipc_group_delete_member(grp, m); __skb_queue_purge(defq); + tipc_group_delete_member(grp, m); break; } if (!update) continue; - blks = msg_blocks(hdr); tipc_group_update_rcv_win(grp, blks, node, port, xmitq); } return; diff --git a/net/tipc/server.c b/net/tipc/server.c index acaef80fb88c..d60c30342327 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -314,6 +314,7 @@ static int tipc_accept_from_sock(struct tipc_conn *con) newcon->usr_data = s->tipc_conn_new(newcon->conid); if (!newcon->usr_data) { sock_release(newsock); + conn_put(newcon); return -ENOMEM; } @@ -511,7 +512,7 @@ bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, s = con->server; scbr = s->tipc_conn_new(*conid); if (!scbr) { - tipc_close_conn(con); + conn_put(con); return false; } diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 5d18c0caa92b..41127d0b925e 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1140,7 +1140,7 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, __skb_dequeue(arrvq); __skb_queue_tail(inputq, skb); } - refcount_dec(&skb->users); + kfree_skb(skb); spin_unlock_bh(&inputq->lock); continue; } diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index ecca64fc6a6f..3deabcab4882 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -371,10 +371,6 @@ static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb) goto rcu_out; } - tipc_rcv(sock_net(sk), skb, b); - rcu_read_unlock(); - return 0; - rcu_out: rcu_read_unlock(); out: diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index 5583df708b8c..a827547aa102 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -487,7 +487,7 @@ static void hvs_release(struct vsock_sock *vsk) lock_sock(sk); - sk->sk_state = SS_DISCONNECTING; + sk->sk_state = TCP_CLOSING; vsock_remove_sock(vsk); release_sock(sk); diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 391775e3575c..a7a73ffe675b 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -797,11 +797,13 @@ static void vmci_transport_handle_detach(struct sock *sk) /* We should not be sending anymore since the peer won't be * there to receive, but we can still receive if there is data - * left in our consume queue. + * left in our consume queue. If the local endpoint is a host, + * we can't call vsock_stream_has_data, since that may block, + * but a host endpoint can't read data once the VM has + * detached, so there is no available data in that case. */ - if (vsock_stream_has_data(vsk) <= 0) { - sk->sk_state = TCP_CLOSE; - + if (vsk->local_addr.svm_cid == VMADDR_CID_HOST || + vsock_stream_has_data(vsk) <= 0) { if (sk->sk_state == TCP_SYN_SENT) { /* The peer may detach from a queue pair while * we are still in the connecting state, i.e., @@ -811,10 +813,12 @@ static void vmci_transport_handle_detach(struct sock *sk) * event like a reset. */ + sk->sk_state = TCP_CLOSE; sk->sk_err = ECONNRESET; sk->sk_error_report(sk); return; } + sk->sk_state = TCP_CLOSE; } sk->sk_state_change(sk); } @@ -2144,7 +2148,7 @@ module_exit(vmci_transport_exit); MODULE_AUTHOR("VMware, Inc."); MODULE_DESCRIPTION("VMCI transport for Virtual Sockets"); -MODULE_VERSION("1.0.4.0-k"); +MODULE_VERSION("1.0.5.0-k"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS("vmware_vsock"); MODULE_ALIAS_NETPROTO(PF_VSOCK); diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index da91bb547db3..1abcc4fc4df1 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -20,6 +20,10 @@ config CFG80211 tristate "cfg80211 - wireless configuration API" depends on RFKILL || !RFKILL select FW_LOADER + # may need to update this when certificates are changed and are + # using a different algorithm, though right now they shouldn't + # (this is here rather than below to allow it to be a module) + select CRYPTO_SHA256 if CFG80211_USE_KERNEL_REGDB_KEYS ---help--- cfg80211 is the Linux wireless LAN (802.11) configuration API. Enable this if you have a wireless device. @@ -113,6 +117,9 @@ config CFG80211_EXTRA_REGDB_KEYDIR certificates like in the kernel sources (net/wireless/certs/) that shall be accepted for a signed regulatory database. + Note that you need to also select the correct CRYPTO_<hash> modules + for your certificates, and if cfg80211 is built-in they also must be. + config CFG80211_REG_CELLULAR_HINTS bool "cfg80211 regulatory support for cellular base station hints" depends on CFG80211_CERTIFICATION_ONUS diff --git a/net/wireless/Makefile b/net/wireless/Makefile index 278d979c211a..d7d6cb00c47b 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -25,17 +25,45 @@ endif $(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.x509) @$(kecho) " GEN $@" - @echo '#include "reg.h"' > $@ - @echo 'const u8 shipped_regdb_certs[] = {' >> $@ - @for f in $^ ; do hexdump -v -e '1/1 "0x%.2x," "\n"' < $$f >> $@ ; done - @echo '};' >> $@ - @echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);' >> $@ + @(set -e; \ + allf=""; \ + for f in $^ ; do \ + # similar to hexdump -v -e '1/1 "0x%.2x," "\n"' \ + thisf=$$(od -An -v -tx1 < $$f | \ + sed -e 's/ /\n/g' | \ + sed -e 's/^[0-9a-f]\+$$/\0/;t;d' | \ + sed -e 's/^/0x/;s/$$/,/'); \ + # file should not be empty - maybe command substitution failed? \ + test ! -z "$$thisf";\ + allf=$$allf$$thisf;\ + done; \ + ( \ + echo '#include "reg.h"'; \ + echo 'const u8 shipped_regdb_certs[] = {'; \ + echo "$$allf"; \ + echo '};'; \ + echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);'; \ + ) >> $@) $(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \ $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%)/*.x509) @$(kecho) " GEN $@" - @echo '#include "reg.h"' > $@ - @echo 'const u8 extra_regdb_certs[] = {' >> $@ - @for f in $^ ; do test -f $$f && hexdump -v -e '1/1 "0x%.2x," "\n"' < $$f >> $@ || true ; done - @echo '};' >> $@ - @echo 'unsigned int extra_regdb_certs_len = sizeof(extra_regdb_certs);' >> $@ + @(set -e; \ + allf=""; \ + for f in $^ ; do \ + # similar to hexdump -v -e '1/1 "0x%.2x," "\n"' \ + thisf=$$(od -An -v -tx1 < $$f | \ + sed -e 's/ /\n/g' | \ + sed -e 's/^[0-9a-f]\+$$/\0/;t;d' | \ + sed -e 's/^/0x/;s/$$/,/'); \ + # file should not be empty - maybe command substitution failed? \ + test ! -z "$$thisf";\ + allf=$$allf$$thisf;\ + done; \ + ( \ + echo '#include "reg.h"'; \ + echo 'const u8 extra_regdb_certs[] = {'; \ + echo "$$allf"; \ + echo '};'; \ + echo 'unsigned int extra_regdb_certs_len = sizeof(extra_regdb_certs);'; \ + ) >> $@) diff --git a/net/wireless/lib80211.c b/net/wireless/lib80211.c index 459611577d3d..801d4781a73b 100644 --- a/net/wireless/lib80211.c +++ b/net/wireless/lib80211.c @@ -44,7 +44,7 @@ static DEFINE_SPINLOCK(lib80211_crypto_lock); static void lib80211_crypt_deinit_entries(struct lib80211_crypt_info *info, int force); static void lib80211_crypt_quiescing(struct lib80211_crypt_info *info); -static void lib80211_crypt_deinit_handler(unsigned long data); +static void lib80211_crypt_deinit_handler(struct timer_list *t); int lib80211_crypt_info_init(struct lib80211_crypt_info *info, char *name, spinlock_t *lock) @@ -55,8 +55,8 @@ int lib80211_crypt_info_init(struct lib80211_crypt_info *info, char *name, info->lock = lock; INIT_LIST_HEAD(&info->crypt_deinit_list); - setup_timer(&info->crypt_deinit_timer, lib80211_crypt_deinit_handler, - (unsigned long)info); + timer_setup(&info->crypt_deinit_timer, lib80211_crypt_deinit_handler, + 0); return 0; } @@ -116,9 +116,10 @@ static void lib80211_crypt_quiescing(struct lib80211_crypt_info *info) spin_unlock_irqrestore(info->lock, flags); } -static void lib80211_crypt_deinit_handler(unsigned long data) +static void lib80211_crypt_deinit_handler(struct timer_list *t) { - struct lib80211_crypt_info *info = (struct lib80211_crypt_info *)data; + struct lib80211_crypt_info *info = from_timer(info, t, + crypt_deinit_timer); unsigned long flags; lib80211_crypt_deinit_entries(info, 0); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index a0e1951227fa..b1ac23ca20c8 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2605,10 +2605,32 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag goto nla_put_failure; } - if (wdev->ssid_len) { - if (nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid)) + wdev_lock(wdev); + switch (wdev->iftype) { + case NL80211_IFTYPE_AP: + if (wdev->ssid_len && + nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid)) goto nla_put_failure; + break; + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_P2P_CLIENT: + case NL80211_IFTYPE_ADHOC: { + const u8 *ssid_ie; + if (!wdev->current_bss) + break; + ssid_ie = ieee80211_bss_get_ie(&wdev->current_bss->pub, + WLAN_EID_SSID); + if (!ssid_ie) + break; + if (nla_put(msg, NL80211_ATTR_SSID, ssid_ie[1], ssid_ie + 2)) + goto nla_put_failure; + break; + } + default: + /* nothing */ + break; } + wdev_unlock(wdev); genlmsg_end(msg, hdr); return 0; diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 3871998059de..78e71b0390be 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -3644,27 +3644,14 @@ void regulatory_propagate_dfs_state(struct wiphy *wiphy, } } -int __init regulatory_init(void) +static int __init regulatory_init_db(void) { - int err = 0; + int err; err = load_builtin_regdb_keys(); if (err) return err; - reg_pdev = platform_device_register_simple("regulatory", 0, NULL, 0); - if (IS_ERR(reg_pdev)) - return PTR_ERR(reg_pdev); - - spin_lock_init(®_requests_lock); - spin_lock_init(®_pending_beacons_lock); - spin_lock_init(®_indoor_lock); - - rcu_assign_pointer(cfg80211_regdomain, cfg80211_world_regdom); - - user_alpha2[0] = '9'; - user_alpha2[1] = '7'; - /* We always try to get an update for the static regdomain */ err = regulatory_hint_core(cfg80211_world_regdom->alpha2); if (err) { @@ -3692,6 +3679,31 @@ int __init regulatory_init(void) return 0; } +#ifndef MODULE +late_initcall(regulatory_init_db); +#endif + +int __init regulatory_init(void) +{ + reg_pdev = platform_device_register_simple("regulatory", 0, NULL, 0); + if (IS_ERR(reg_pdev)) + return PTR_ERR(reg_pdev); + + spin_lock_init(®_requests_lock); + spin_lock_init(®_pending_beacons_lock); + spin_lock_init(®_indoor_lock); + + rcu_assign_pointer(cfg80211_regdomain, cfg80211_world_regdom); + + user_alpha2[0] = '9'; + user_alpha2[1] = '7'; + +#ifdef MODULE + return regulatory_init_db(); +#else + return 0; +#endif +} void regulatory_exit(void) { diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index ea87143314f3..562cc11131f6 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -415,7 +415,7 @@ static void __x25_destroy_socket(struct sock *sk) if (sk_has_allocations(sk)) { /* Defer: outstanding buffers */ sk->sk_timer.expires = jiffies + 10 * HZ; - sk->sk_timer.function = (TIMER_FUNC_TYPE)x25_destroy_timer; + sk->sk_timer.function = x25_destroy_timer; add_timer(&sk->sk_timer); } else { /* drop last reference so sock_put will free */ diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c index e0cd04d28352..a6a8ab09b914 100644 --- a/net/x25/x25_link.c +++ b/net/x25/x25_link.c @@ -36,7 +36,7 @@ LIST_HEAD(x25_neigh_list); DEFINE_RWLOCK(x25_neigh_list_lock); -static void x25_t20timer_expiry(unsigned long); +static void x25_t20timer_expiry(struct timer_list *); static void x25_transmit_restart_confirmation(struct x25_neigh *nb); static void x25_transmit_restart_request(struct x25_neigh *nb); @@ -49,9 +49,9 @@ static inline void x25_start_t20timer(struct x25_neigh *nb) mod_timer(&nb->t20timer, jiffies + nb->t20); } -static void x25_t20timer_expiry(unsigned long param) +static void x25_t20timer_expiry(struct timer_list *t) { - struct x25_neigh *nb = (struct x25_neigh *)param; + struct x25_neigh *nb = from_timer(nb, t, t20timer); x25_transmit_restart_request(nb); @@ -252,7 +252,7 @@ void x25_link_device_up(struct net_device *dev) return; skb_queue_head_init(&nb->queue); - setup_timer(&nb->t20timer, x25_t20timer_expiry, (unsigned long)nb); + timer_setup(&nb->t20timer, x25_t20timer_expiry, 0); dev_hold(dev); nb->dev = dev; diff --git a/net/x25/x25_timer.c b/net/x25/x25_timer.c index 1dfba3c23459..fa3461002b3e 100644 --- a/net/x25/x25_timer.c +++ b/net/x25/x25_timer.c @@ -36,7 +36,7 @@ void x25_init_timers(struct sock *sk) timer_setup(&x25->timer, x25_timer_expiry, 0); /* initialized by sock_init_data */ - sk->sk_timer.function = (TIMER_FUNC_TYPE)x25_heartbeat_expiry; + sk->sk_timer.function = x25_heartbeat_expiry; } void x25_start_heartbeat(struct sock *sk) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 1f5cee2269af..065d89606888 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -556,7 +556,7 @@ out: return HRTIMER_NORESTART; } -static void xfrm_replay_timer_handler(unsigned long data); +static void xfrm_replay_timer_handler(struct timer_list *t); struct xfrm_state *xfrm_state_alloc(struct net *net) { @@ -574,8 +574,7 @@ struct xfrm_state *xfrm_state_alloc(struct net *net) INIT_HLIST_NODE(&x->byspi); tasklet_hrtimer_init(&x->mtimer, xfrm_timer_handler, CLOCK_BOOTTIME, HRTIMER_MODE_ABS); - setup_timer(&x->rtimer, xfrm_replay_timer_handler, - (unsigned long)x); + timer_setup(&x->rtimer, xfrm_replay_timer_handler, 0); x->curlft.add_time = get_seconds(); x->lft.soft_byte_limit = XFRM_INF; x->lft.soft_packet_limit = XFRM_INF; @@ -1879,9 +1878,9 @@ void xfrm_state_walk_done(struct xfrm_state_walk *walk, struct net *net) } EXPORT_SYMBOL(xfrm_state_walk_done); -static void xfrm_replay_timer_handler(unsigned long data) +static void xfrm_replay_timer_handler(struct timer_list *t) { - struct xfrm_state *x = (struct xfrm_state *)data; + struct xfrm_state *x = from_timer(x, t, rtimer); spin_lock(&x->lock); |