From 00af5c69598212cf6cd4ecb4ca89785118aeecad Mon Sep 17 00:00:00 2001 From: roel kluin Date: Wed, 29 Oct 2008 15:55:53 -0400 Subject: cipso: unsigned buf_len cannot be negative unsigned buf_len cannot be negative Signed-off-by: Roel Kluin Signed-off-by: Paul Moore --- net/ipv4/cipso_ipv4.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 490e035c6d90..2e78f6bd9775 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -2063,9 +2063,10 @@ int cipso_v4_skbuff_setattr(struct sk_buff *skb, u32 opt_len; int len_delta; - buf_len = cipso_v4_genopt(buf, buf_len, doi_def, secattr); - if (buf_len < 0) - return buf_len; + ret_val = cipso_v4_genopt(buf, buf_len, doi_def, secattr); + if (ret_val < 0) + return ret_val; + buf_len = ret_val; opt_len = (buf_len + 3) & ~3; /* we overwrite any existing options to ensure that we have enough -- cgit v1.2.3 From f8a024796b2bbec3d1a4ad5aae6173cfb18226b4 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Wed, 29 Oct 2008 16:09:12 -0400 Subject: netlabel: Fix compiler warnings in netlabel_mgmt.c Fix the compiler warnings below, thanks to Andrew Morton for finding them. net/netlabel/netlabel_mgmt.c: In function `netlbl_mgmt_listentry': net/netlabel/netlabel_mgmt.c:268: warning: 'ret_val' might be used uninitialized in this function Signed-off-by: Paul Moore --- net/netlabel/netlabel_mgmt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index ee769ecaa13c..0a0ef17b2a40 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -265,7 +265,7 @@ add_failure: static int netlbl_mgmt_listentry(struct sk_buff *skb, struct netlbl_dom_map *entry) { - int ret_val; + int ret_val = 0; struct nlattr *nla_a; struct nlattr *nla_b; struct netlbl_af4list *iter4; -- cgit v1.2.3 From 47b676c0e03dcfd88de91f6f24a06653cfdf32af Mon Sep 17 00:00:00 2001 From: Manish Katiyar Date: Thu, 30 Oct 2008 10:44:48 -0400 Subject: netlabel: Fix compilation warnings in net/netlabel/netlabel_addrlist.c Enable netlabel auditing functions only when CONFIG_AUDIT is set Signed-off-by: Manish Katiyar Signed-off-by: Paul Moore --- net/netlabel/netlabel_addrlist.c | 2 ++ net/netlabel/netlabel_addrlist.h | 22 ++++++++++++++++++++++ 2 files changed, 24 insertions(+) (limited to 'net') diff --git a/net/netlabel/netlabel_addrlist.c b/net/netlabel/netlabel_addrlist.c index b0925a303353..249f6b92f153 100644 --- a/net/netlabel/netlabel_addrlist.c +++ b/net/netlabel/netlabel_addrlist.c @@ -315,6 +315,7 @@ struct netlbl_af6list *netlbl_af6list_remove(const struct in6_addr *addr, * Audit Helper Functions */ +#ifdef CONFIG_AUDIT /** * netlbl_af4list_audit_addr - Audit an IPv4 address * @audit_buf: audit buffer @@ -386,3 +387,4 @@ void netlbl_af6list_audit_addr(struct audit_buffer *audit_buf, } } #endif /* IPv6 */ +#endif /* CONFIG_AUDIT */ diff --git a/net/netlabel/netlabel_addrlist.h b/net/netlabel/netlabel_addrlist.h index 0242bead405f..07ae7fd82be1 100644 --- a/net/netlabel/netlabel_addrlist.h +++ b/net/netlabel/netlabel_addrlist.h @@ -120,9 +120,19 @@ struct netlbl_af4list *netlbl_af4list_search(__be32 addr, struct netlbl_af4list *netlbl_af4list_search_exact(__be32 addr, __be32 mask, struct list_head *head); + +#ifdef CONFIG_AUDIT void netlbl_af4list_audit_addr(struct audit_buffer *audit_buf, int src, const char *dev, __be32 addr, __be32 mask); +#else +static inline void netlbl_af4list_audit_addr(struct audit_buffer *audit_buf, + int src, const char *dev, + __be32 addr, __be32 mask) +{ + return; +} +#endif #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) @@ -179,11 +189,23 @@ struct netlbl_af6list *netlbl_af6list_search(const struct in6_addr *addr, struct netlbl_af6list *netlbl_af6list_search_exact(const struct in6_addr *addr, const struct in6_addr *mask, struct list_head *head); + +#ifdef CONFIG_AUDIT void netlbl_af6list_audit_addr(struct audit_buffer *audit_buf, int src, const char *dev, const struct in6_addr *addr, const struct in6_addr *mask); +#else +static inline void netlbl_af6list_audit_addr(struct audit_buffer *audit_buf, + int src, + const char *dev, + const struct in6_addr *addr, + const struct in6_addr *mask) +{ + return; +} +#endif #endif /* IPV6 */ #endif -- cgit v1.2.3 From 485ac57bc1238719b1508f91b0f9eeda4a3c84bb Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 30 Oct 2008 23:55:16 -0700 Subject: netns: add register_pernet_gen_subsys/unregister_pernet_gen_subsys netns ops which are registered with register_pernet_gen_device() are shutdown strictly before those which are registered with register_pernet_subsys(). Sometimes this leads to opposite (read: buggy) shutdown ordering between two modules. Add register_pernet_gen_subsys()/unregister_pernet_gen_subsys() for modules which aren't elite enough for entry in struct net, and which can't use register_pernet_gen_device(). PPTP conntracking module is such one. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/net_namespace.h | 2 ++ net/core/net_namespace.c | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) (limited to 'net') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 708009be88b6..700c53a3c6fa 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -214,6 +214,8 @@ struct pernet_operations { extern int register_pernet_subsys(struct pernet_operations *); extern void unregister_pernet_subsys(struct pernet_operations *); +extern int register_pernet_gen_subsys(int *id, struct pernet_operations *); +extern void unregister_pernet_gen_subsys(int id, struct pernet_operations *); extern int register_pernet_device(struct pernet_operations *); extern void unregister_pernet_device(struct pernet_operations *); extern int register_pernet_gen_device(int *id, struct pernet_operations *); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index f1d07b5c1e17..1895a4ca9c4f 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -325,6 +325,38 @@ void unregister_pernet_subsys(struct pernet_operations *module) } EXPORT_SYMBOL_GPL(unregister_pernet_subsys); +int register_pernet_gen_subsys(int *id, struct pernet_operations *ops) +{ + int rv; + + mutex_lock(&net_mutex); +again: + rv = ida_get_new_above(&net_generic_ids, 1, id); + if (rv < 0) { + if (rv == -EAGAIN) { + ida_pre_get(&net_generic_ids, GFP_KERNEL); + goto again; + } + goto out; + } + rv = register_pernet_operations(first_device, ops); + if (rv < 0) + ida_remove(&net_generic_ids, *id); + mutex_unlock(&net_mutex); +out: + return rv; +} +EXPORT_SYMBOL_GPL(register_pernet_gen_subsys); + +void unregister_pernet_gen_subsys(int id, struct pernet_operations *ops) +{ + mutex_lock(&net_mutex); + unregister_pernet_operations(ops); + ida_remove(&net_generic_ids, id); + mutex_unlock(&net_mutex); +} +EXPORT_SYMBOL_GPL(unregister_pernet_gen_subsys); + /** * register_pernet_device - register a network namespace device * @ops: pernet operations structure for the subsystem -- cgit v1.2.3 From 61e5744849cb936bf4361181b8f9ebccebf4d9db Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 30 Oct 2008 23:55:44 -0700 Subject: netfilter: nf_conntrack_proto_gre: switch to register_pernet_gen_subsys() register_pernet_gen_device() can't be used is nf_conntrack_pptp module is also used (compiled in or loaded). Right now, proto_gre_net_exit() is called before nf_conntrack_pptp_net_exit(). The former shutdowns and frees GRE piece of netns, however the latter absolutely needs it to flush keymap. Oops is inevitable. Switch to shiny new register_pernet_gen_subsys() to get correct ordering in netns ops list. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_proto_gre.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index a2cdbcbf64c4..4ab62ad85dd4 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -335,7 +335,7 @@ static int __init nf_ct_proto_gre_init(void) rv = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_gre4); if (rv < 0) return rv; - rv = register_pernet_gen_device(&proto_gre_net_id, &proto_gre_net_ops); + rv = register_pernet_gen_subsys(&proto_gre_net_id, &proto_gre_net_ops); if (rv < 0) nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_gre4); return rv; @@ -344,7 +344,7 @@ static int __init nf_ct_proto_gre_init(void) static void nf_ct_proto_gre_fini(void) { nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_gre4); - unregister_pernet_gen_device(proto_gre_net_id, &proto_gre_net_ops); + unregister_pernet_gen_subsys(proto_gre_net_id, &proto_gre_net_ops); } module_init(nf_ct_proto_gre_init); -- cgit v1.2.3 From a432226614c5616e3cfd211e0acffa0acfb4770c Mon Sep 17 00:00:00 2001 From: "fernando@oss.ntt.co" Date: Thu, 23 Oct 2008 04:27:19 +0000 Subject: xfrm: do not leak ESRCH to user space I noticed that, under certain conditions, ESRCH can be leaked from the xfrm layer to user space through sys_connect. In particular, this seems to happen reliably when the kernel fails to resolve a template either because the AF_KEY receive buffer being used by racoon is full or because the SA entry we are trying to use is in XFRM_STATE_EXPIRED state. However, since this could be a transient issue it could be argued that EAGAIN would be more appropriate. Besides this error code is not even documented in the man page for sys_connect (as of man-pages 3.07). Signed-off-by: Fernando Luis Vazquez Cao Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 832b47c1de80..25872747762c 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1251,6 +1251,8 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, -EINVAL : -EAGAIN); xfrm_state_put(x); } + else if (error == -ESRCH) + error = -EAGAIN; if (!tmpl->optional) goto fail; -- cgit v1.2.3 From 920da6923cf03c8a78fbaffa408f8ab37f6abfc1 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 31 Oct 2008 16:41:26 -0700 Subject: key: fix setkey(8) policy set breakage Steps to reproduce: #/usr/sbin/setkey -f flush; spdflush; add 192.168.0.42 192.168.0.1 ah 24500 -A hmac-md5 "1234567890123456"; add 192.168.0.42 192.168.0.1 esp 24501 -E 3des-cbc "123456789012123456789012"; spdadd 192.168.0.42 192.168.0.1 any -P out ipsec esp/transport//require ah/transport//require; setkey: invalid keymsg length Policy dump will bail out with the same message after that. -recv(4, "\2\16\0\0\32\0\3\0\0\0\0\0\37\r\0\0\3\0\5\0\377 \0\0\2\0\0\0\300\250\0*\0"..., 32768, 0) = 208 +recv(4, "\2\16\0\0\36\0\3\0\0\0\0\0H\t\0\0\3\0\5\0\377 \0\0\2\0\0\0\300\250\0*\0"..., 32768, 0) = 208 Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/key/af_key.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/key/af_key.c b/net/key/af_key.c index e55e0441e4d9..3440a4637f01 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2075,7 +2075,6 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, in req_size += socklen * 2; } else { size -= 2*socklen; - socklen = 0; } rq = (void*)skb_put(skb, req_size); pol->sadb_x_policy_len += req_size/8; -- cgit v1.2.3 From 233e70f4228e78eb2f80dc6650f65d3ae3dbf17c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 31 Oct 2008 23:28:30 +0000 Subject: saner FASYNC handling on file close As it is, all instances of ->release() for files that have ->fasync() need to remember to evict file from fasync lists; forgetting that creates a hole and we actually have a bunch that *does* forget. So let's keep our lives simple - let __fput() check FASYNC in file->f_flags and call ->fasync() there if it's been set. And lose that crap in ->release() instances - leaving it there is still valid, but we don't have to bother anymore. Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- arch/ia64/kernel/perfmon.c | 5 ----- drivers/char/hpet.c | 3 --- drivers/char/ipmi/ipmi_devintf.c | 2 -- drivers/char/ipmi/ipmi_watchdog.c | 1 - drivers/char/random.c | 7 ------- drivers/char/rtc.c | 2 -- drivers/char/sonypi.c | 1 - drivers/gpu/drm/drm_fops.c | 2 -- drivers/hid/usbhid/hiddev.c | 2 -- drivers/ieee1394/dv1394.c | 3 --- drivers/infiniband/core/uverbs_main.c | 2 -- drivers/input/evdev.c | 1 - drivers/input/joydev.c | 1 - drivers/input/misc/hp_sdc_rtc.c | 13 ------------- drivers/input/mousedev.c | 1 - drivers/input/serio/serio_raw.c | 1 - drivers/message/fusion/mptctl.c | 7 ------- drivers/message/i2o/i2o_config.c | 21 +++++---------------- drivers/misc/sony-laptop.c | 1 - drivers/net/tun.c | 2 -- drivers/rtc/rtc-dev.c | 3 --- drivers/scsi/megaraid/megaraid_sas.c | 12 ------------ drivers/scsi/sg.c | 1 - drivers/staging/me4000/me4000.c | 3 --- drivers/telephony/ixj.c | 1 - drivers/uio/uio.c | 3 --- drivers/usb/gadget/inode.c | 1 - fs/file_table.c | 4 ++++ fs/fuse/dev.c | 1 - fs/inotify_user.c | 3 --- fs/pipe.c | 3 --- net/socket.c | 1 - sound/core/control.c | 1 - sound/core/init.c | 5 ++++- sound/core/pcm_native.c | 1 - sound/core/timer.c | 1 - 36 files changed, 13 insertions(+), 109 deletions(-) (limited to 'net') diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index ada4605d1223..6543a5547c84 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -1995,11 +1995,6 @@ pfm_close(struct inode *inode, struct file *filp) return -EBADF; } - if (filp->f_flags & FASYNC) { - DPRINT(("cleaning up async_queue=%p\n", ctx->ctx_async_queue)); - pfm_do_fasync(-1, filp, ctx, 0); - } - PROTECT_CTX(ctx, flags); state = ctx->ctx_state; diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c index 408f5f92cb4e..53fdc7ff3870 100644 --- a/drivers/char/hpet.c +++ b/drivers/char/hpet.c @@ -427,9 +427,6 @@ static int hpet_release(struct inode *inode, struct file *file) if (irq) free_irq(irq, devp); - if (file->f_flags & FASYNC) - hpet_fasync(-1, file, 0); - file->private_data = NULL; return 0; } diff --git a/drivers/char/ipmi/ipmi_devintf.c b/drivers/char/ipmi/ipmi_devintf.c index 1d7b429f7ffa..41fc11dc921c 100644 --- a/drivers/char/ipmi/ipmi_devintf.c +++ b/drivers/char/ipmi/ipmi_devintf.c @@ -162,8 +162,6 @@ static int ipmi_release(struct inode *inode, struct file *file) if (rv) return rv; - ipmi_fasync (-1, file, 0); - /* FIXME - free the messages in the list. */ kfree(priv); diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c index 235fab0bdf79..a4d57e31f713 100644 --- a/drivers/char/ipmi/ipmi_watchdog.c +++ b/drivers/char/ipmi/ipmi_watchdog.c @@ -870,7 +870,6 @@ static int ipmi_close(struct inode *ino, struct file *filep) clear_bit(0, &ipmi_wdog_open); } - ipmi_fasync(-1, filep, 0); expect_close = 0; return 0; diff --git a/drivers/char/random.c b/drivers/char/random.c index 705a839f1796..675076f5fca8 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1139,18 +1139,12 @@ static int random_fasync(int fd, struct file *filp, int on) return fasync_helper(fd, filp, on, &fasync); } -static int random_release(struct inode *inode, struct file *filp) -{ - return fasync_helper(-1, filp, 0, &fasync); -} - const struct file_operations random_fops = { .read = random_read, .write = random_write, .poll = random_poll, .unlocked_ioctl = random_ioctl, .fasync = random_fasync, - .release = random_release, }; const struct file_operations urandom_fops = { @@ -1158,7 +1152,6 @@ const struct file_operations urandom_fops = { .write = random_write, .unlocked_ioctl = random_ioctl, .fasync = random_fasync, - .release = random_release, }; /*************************************************************** diff --git a/drivers/char/rtc.c b/drivers/char/rtc.c index 32dc89720d58..20d6efb6324e 100644 --- a/drivers/char/rtc.c +++ b/drivers/char/rtc.c @@ -788,8 +788,6 @@ static int rtc_release(struct inode *inode, struct file *file) } spin_unlock_irq(&rtc_lock); - if (file->f_flags & FASYNC) - rtc_fasync(-1, file, 0); no_irq: #endif diff --git a/drivers/char/sonypi.c b/drivers/char/sonypi.c index 85e0eb76eeab..2457b07dabd6 100644 --- a/drivers/char/sonypi.c +++ b/drivers/char/sonypi.c @@ -898,7 +898,6 @@ static int sonypi_misc_fasync(int fd, struct file *filp, int on) static int sonypi_misc_release(struct inode *inode, struct file *file) { - sonypi_misc_fasync(-1, file, 0); mutex_lock(&sonypi_device.lock); sonypi_device.open_count--; mutex_unlock(&sonypi_device.lock); diff --git a/drivers/gpu/drm/drm_fops.c b/drivers/gpu/drm/drm_fops.c index 0d46627663b1..78eeed5caaff 100644 --- a/drivers/gpu/drm/drm_fops.c +++ b/drivers/gpu/drm/drm_fops.c @@ -406,8 +406,6 @@ int drm_release(struct inode *inode, struct file *filp) if (dev->driver->driver_features & DRIVER_GEM) drm_gem_release(dev, file_priv); - drm_fasync(-1, filp, 0); - mutex_lock(&dev->ctxlist_mutex); if (!list_empty(&dev->ctxlist)) { struct drm_ctx_list *pos, *n; diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c index 3ac320785fc5..83e851a5ed30 100644 --- a/drivers/hid/usbhid/hiddev.c +++ b/drivers/hid/usbhid/hiddev.c @@ -242,8 +242,6 @@ static int hiddev_release(struct inode * inode, struct file * file) struct hiddev_list *list = file->private_data; unsigned long flags; - hiddev_fasync(-1, file, 0); - spin_lock_irqsave(&list->hiddev->list_lock, flags); list_del(&list->node); spin_unlock_irqrestore(&list->hiddev->list_lock, flags); diff --git a/drivers/ieee1394/dv1394.c b/drivers/ieee1394/dv1394.c index 2f83543a9dfc..965cfdb84ebc 100644 --- a/drivers/ieee1394/dv1394.c +++ b/drivers/ieee1394/dv1394.c @@ -1828,9 +1828,6 @@ static int dv1394_release(struct inode *inode, struct file *file) /* OK to free the DMA buffer, no more mappings can exist */ do_dv1394_shutdown(video, 1); - /* clean up async I/O users */ - dv1394_fasync(-1, file, 0); - /* give someone else a turn */ clear_bit(0, &video->open); diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index d85af1b67027..eb36a81dd09b 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -358,8 +358,6 @@ static int ib_uverbs_event_close(struct inode *inode, struct file *filp) } spin_unlock_irq(&file->lock); - ib_uverbs_event_fasync(-1, filp, 0); - if (file->is_async) { ib_unregister_event_handler(&file->uverbs_file->event_handler); kref_put(&file->uverbs_file->ref, ib_uverbs_release_file); diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c index 3524bef62be6..1070db330d35 100644 --- a/drivers/input/evdev.c +++ b/drivers/input/evdev.c @@ -235,7 +235,6 @@ static int evdev_release(struct inode *inode, struct file *file) evdev_ungrab(evdev, client); mutex_unlock(&evdev->mutex); - evdev_fasync(-1, file, 0); evdev_detach_client(evdev, client); kfree(client); diff --git a/drivers/input/joydev.c b/drivers/input/joydev.c index 65d7077a75a1..a85b1485e774 100644 --- a/drivers/input/joydev.c +++ b/drivers/input/joydev.c @@ -244,7 +244,6 @@ static int joydev_release(struct inode *inode, struct file *file) struct joydev_client *client = file->private_data; struct joydev *joydev = client->joydev; - joydev_fasync(-1, file, 0); joydev_detach_client(joydev, client); kfree(client); diff --git a/drivers/input/misc/hp_sdc_rtc.c b/drivers/input/misc/hp_sdc_rtc.c index 82ec6b1b6467..216a559f55ea 100644 --- a/drivers/input/misc/hp_sdc_rtc.c +++ b/drivers/input/misc/hp_sdc_rtc.c @@ -71,7 +71,6 @@ static int hp_sdc_rtc_ioctl(struct inode *inode, struct file *file, static unsigned int hp_sdc_rtc_poll(struct file *file, poll_table *wait); static int hp_sdc_rtc_open(struct inode *inode, struct file *file); -static int hp_sdc_rtc_release(struct inode *inode, struct file *file); static int hp_sdc_rtc_fasync (int fd, struct file *filp, int on); static int hp_sdc_rtc_read_proc(char *page, char **start, off_t off, @@ -414,17 +413,6 @@ static int hp_sdc_rtc_open(struct inode *inode, struct file *file) return 0; } -static int hp_sdc_rtc_release(struct inode *inode, struct file *file) -{ - /* Turn off interrupts? */ - - if (file->f_flags & FASYNC) { - hp_sdc_rtc_fasync (-1, file, 0); - } - - return 0; -} - static int hp_sdc_rtc_fasync (int fd, struct file *filp, int on) { return fasync_helper (fd, filp, on, &hp_sdc_rtc_async_queue); @@ -680,7 +668,6 @@ static const struct file_operations hp_sdc_rtc_fops = { .poll = hp_sdc_rtc_poll, .ioctl = hp_sdc_rtc_ioctl, .open = hp_sdc_rtc_open, - .release = hp_sdc_rtc_release, .fasync = hp_sdc_rtc_fasync, }; diff --git a/drivers/input/mousedev.c b/drivers/input/mousedev.c index 8137e50ded87..d8c056fe7e98 100644 --- a/drivers/input/mousedev.c +++ b/drivers/input/mousedev.c @@ -519,7 +519,6 @@ static int mousedev_release(struct inode *inode, struct file *file) struct mousedev_client *client = file->private_data; struct mousedev *mousedev = client->mousedev; - mousedev_fasync(-1, file, 0); mousedev_detach_client(mousedev, client); kfree(client); diff --git a/drivers/input/serio/serio_raw.c b/drivers/input/serio/serio_raw.c index 470770c09260..06bbd0e74c6f 100644 --- a/drivers/input/serio/serio_raw.c +++ b/drivers/input/serio/serio_raw.c @@ -135,7 +135,6 @@ static int serio_raw_release(struct inode *inode, struct file *file) mutex_lock(&serio_raw_mutex); - serio_raw_fasync(-1, file, 0); serio_raw_cleanup(serio_raw); mutex_unlock(&serio_raw_mutex); diff --git a/drivers/message/fusion/mptctl.c b/drivers/message/fusion/mptctl.c index f5233f3d9eff..b89f476cd0a9 100644 --- a/drivers/message/fusion/mptctl.c +++ b/drivers/message/fusion/mptctl.c @@ -559,12 +559,6 @@ mptctl_fasync(int fd, struct file *filep, int mode) return ret; } -static int -mptctl_release(struct inode *inode, struct file *filep) -{ - return fasync_helper(-1, filep, 0, &async_queue); -} - /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /* * MPT ioctl handler @@ -2706,7 +2700,6 @@ mptctl_hp_targetinfo(unsigned long arg) static const struct file_operations mptctl_fops = { .owner = THIS_MODULE, .llseek = no_llseek, - .release = mptctl_release, .fasync = mptctl_fasync, .unlocked_ioctl = mptctl_ioctl, #ifdef CONFIG_COMPAT diff --git a/drivers/message/i2o/i2o_config.c b/drivers/message/i2o/i2o_config.c index a3fabdbe6ca6..f3384c32b9a1 100644 --- a/drivers/message/i2o/i2o_config.c +++ b/drivers/message/i2o/i2o_config.c @@ -1097,28 +1097,17 @@ static int cfg_fasync(int fd, struct file *fp, int on) static int cfg_release(struct inode *inode, struct file *file) { ulong id = (ulong) file->private_data; - struct i2o_cfg_info *p1, *p2; + struct i2o_cfg_info *p, **q; unsigned long flags; lock_kernel(); - p1 = p2 = NULL; - spin_lock_irqsave(&i2o_config_lock, flags); - for (p1 = open_files; p1;) { - if (p1->q_id == id) { - - if (p1->fasync) - cfg_fasync(-1, file, 0); - if (p2) - p2->next = p1->next; - else - open_files = p1->next; - - kfree(p1); + for (q = &open_files; (p = *q) != NULL; q = &p->next) { + if (p->q_id == id) { + *q = p->next; + kfree(p); break; } - p2 = p1; - p1 = p1->next; } spin_unlock_irqrestore(&i2o_config_lock, flags); unlock_kernel(); diff --git a/drivers/misc/sony-laptop.c b/drivers/misc/sony-laptop.c index f483c4221f76..06f07e19dc70 100644 --- a/drivers/misc/sony-laptop.c +++ b/drivers/misc/sony-laptop.c @@ -1920,7 +1920,6 @@ static int sonypi_misc_fasync(int fd, struct file *filp, int on) static int sonypi_misc_release(struct inode *inode, struct file *file) { - sonypi_misc_fasync(-1, file, 0); atomic_dec(&sonypi_compat.open_count); return 0; } diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 6daea0c91862..33b6d1b122fb 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1070,8 +1070,6 @@ static int tun_chr_close(struct inode *inode, struct file *file) DBG(KERN_INFO "%s: tun_chr_close\n", tun->dev->name); - tun_chr_fasync(-1, file, 0); - rtnl_lock(); /* Detach from net device */ diff --git a/drivers/rtc/rtc-dev.c b/drivers/rtc/rtc-dev.c index 079e9ed907e0..ecdea44ae4e5 100644 --- a/drivers/rtc/rtc-dev.c +++ b/drivers/rtc/rtc-dev.c @@ -446,9 +446,6 @@ static int rtc_dev_release(struct inode *inode, struct file *file) if (rtc->ops->release) rtc->ops->release(rtc->dev.parent); - if (file->f_flags & FASYNC) - rtc_dev_fasync(-1, file, 0); - clear_bit_unlock(RTC_DEV_BUSY, &rtc->flags); return 0; } diff --git a/drivers/scsi/megaraid/megaraid_sas.c b/drivers/scsi/megaraid/megaraid_sas.c index afe1de998763..a454f94623d7 100644 --- a/drivers/scsi/megaraid/megaraid_sas.c +++ b/drivers/scsi/megaraid/megaraid_sas.c @@ -2987,17 +2987,6 @@ static int megasas_mgmt_open(struct inode *inode, struct file *filep) return 0; } -/** - * megasas_mgmt_release - char node "release" entry point - */ -static int megasas_mgmt_release(struct inode *inode, struct file *filep) -{ - filep->private_data = NULL; - fasync_helper(-1, filep, 0, &megasas_async_queue); - - return 0; -} - /** * megasas_mgmt_fasync - Async notifier registration from applications * @@ -3345,7 +3334,6 @@ megasas_mgmt_compat_ioctl(struct file *file, unsigned int cmd, static const struct file_operations megasas_mgmt_fops = { .owner = THIS_MODULE, .open = megasas_mgmt_open, - .release = megasas_mgmt_release, .fasync = megasas_mgmt_fasync, .unlocked_ioctl = megasas_mgmt_ioctl, #ifdef CONFIG_COMPAT diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 9adf35bd8b56..5103855242ae 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -327,7 +327,6 @@ sg_release(struct inode *inode, struct file *filp) if ((!(sfp = (Sg_fd *) filp->private_data)) || (!(sdp = sfp->parentdp))) return -ENXIO; SCSI_LOG_TIMEOUT(3, printk("sg_release: %s\n", sdp->disk->disk_name)); - sg_fasync(-1, filp, 0); /* remove filp from async notification list */ if (0 == sg_remove_sfp(sdp, sfp)) { /* Returns 1 when sdp gone */ if (!sdp->detached) { scsi_device_put(sdp->device); diff --git a/drivers/staging/me4000/me4000.c b/drivers/staging/me4000/me4000.c index 0b33773bb4f6..cf8b01bcac8d 100644 --- a/drivers/staging/me4000/me4000.c +++ b/drivers/staging/me4000/me4000.c @@ -1633,9 +1633,6 @@ static int me4000_release(struct inode *inode_p, struct file *file_p) free_irq(ext_int_context->irq, ext_int_context); - /* Delete the fasync structure and free memory */ - me4000_ext_int_fasync(0, file_p, 0); - /* Mark as unused */ ext_int_context->in_use = 0; } else { diff --git a/drivers/telephony/ixj.c b/drivers/telephony/ixj.c index 41b6530b8f25..a913efc69669 100644 --- a/drivers/telephony/ixj.c +++ b/drivers/telephony/ixj.c @@ -2328,7 +2328,6 @@ static int ixj_release(struct inode *inode, struct file *file_p) j->rec_codec = j->play_codec = 0; j->rec_frame_size = j->play_frame_size = 0; j->flags.cidsent = j->flags.cidring = 0; - ixj_fasync(-1, file_p, 0); /* remove from list of async notification */ if(j->cardtype == QTI_LINEJACK && !j->readers && !j->writers) { ixj_set_port(j, PORT_PSTN); diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index f9b4647255aa..2d2440cd57a9 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -367,9 +367,6 @@ static int uio_release(struct inode *inode, struct file *filep) ret = idev->info->release(idev->info, inode); module_put(idev->owner); - - if (filep->f_flags & FASYNC) - ret = uio_fasync(-1, filep, 0); kfree(listener); return ret; } diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c index f4585d3e90d7..eeb26c0f88e5 100644 --- a/drivers/usb/gadget/inode.c +++ b/drivers/usb/gadget/inode.c @@ -1251,7 +1251,6 @@ dev_release (struct inode *inode, struct file *fd) * alternatively, all host requests will time out. */ - fasync_helper (-1, fd, 0, &dev->fasync); kfree (dev->buf); dev->buf = NULL; put_dev (dev); diff --git a/fs/file_table.c b/fs/file_table.c index efc06faede6c..5ad0eca6eea2 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -269,6 +269,10 @@ void __fput(struct file *file) eventpoll_release(file); locks_remove_flock(file); + if (unlikely(file->f_flags & FASYNC)) { + if (file->f_op && file->f_op->fasync) + file->f_op->fasync(-1, file, 0); + } if (file->f_op && file->f_op->release) file->f_op->release(inode, file); security_file_free(file); diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 87250b6a8682..b72361479be2 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1056,7 +1056,6 @@ static int fuse_dev_release(struct inode *inode, struct file *file) end_requests(fc, &fc->pending); end_requests(fc, &fc->processing); spin_unlock(&fc->lock); - fasync_helper(-1, file, 0, &fc->fasync); fuse_conn_put(fc); } diff --git a/fs/inotify_user.c b/fs/inotify_user.c index d85c7d931cdf..d367e9b92862 100644 --- a/fs/inotify_user.c +++ b/fs/inotify_user.c @@ -537,9 +537,6 @@ static int inotify_release(struct inode *ignored, struct file *file) inotify_dev_event_dequeue(dev); mutex_unlock(&dev->ev_mutex); - if (file->f_flags & FASYNC) - inotify_fasync(-1, file, 0); - /* free this device: the put matching the get in inotify_init() */ put_inotify_dev(dev); diff --git a/fs/pipe.c b/fs/pipe.c index fcba6542b8d0..7aea8b89baac 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -717,14 +717,12 @@ pipe_rdwr_fasync(int fd, struct file *filp, int on) static int pipe_read_release(struct inode *inode, struct file *filp) { - pipe_read_fasync(-1, filp, 0); return pipe_release(inode, 1, 0); } static int pipe_write_release(struct inode *inode, struct file *filp) { - pipe_write_fasync(-1, filp, 0); return pipe_release(inode, 0, 1); } @@ -733,7 +731,6 @@ pipe_rdwr_release(struct inode *inode, struct file *filp) { int decr, decw; - pipe_rdwr_fasync(-1, filp, 0); decr = (filp->f_mode & FMODE_READ) != 0; decw = (filp->f_mode & FMODE_WRITE) != 0; return pipe_release(inode, decr, decw); diff --git a/net/socket.c b/net/socket.c index 2b7a4b5c9b72..57550c3bcabe 100644 --- a/net/socket.c +++ b/net/socket.c @@ -990,7 +990,6 @@ static int sock_close(struct inode *inode, struct file *filp) printk(KERN_DEBUG "sock_close: NULL inode\n"); return 0; } - sock_fasync(-1, filp, 0); sock_release(SOCKET_I(inode)); return 0; } diff --git a/sound/core/control.c b/sound/core/control.c index b0bf42691047..636b3b52ef8b 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -113,7 +113,6 @@ static int snd_ctl_release(struct inode *inode, struct file *file) unsigned int idx; ctl = file->private_data; - fasync_helper(-1, file, 0, &ctl->fasync); file->private_data = NULL; card = ctl->card; write_lock_irqsave(&card->ctl_files_rwlock, flags); diff --git a/sound/core/init.c b/sound/core/init.c index ef2352c2e451..b47ff8b44be8 100644 --- a/sound/core/init.c +++ b/sound/core/init.c @@ -264,8 +264,11 @@ static int snd_disconnect_release(struct inode *inode, struct file *file) } spin_unlock(&shutdown_lock); - if (likely(df)) + if (likely(df)) { + if ((file->f_flags & FASYNC) && df->disconnected_f_op->fasync) + df->disconnected_f_op->fasync(-1, file, 0); return df->disconnected_f_op->release(inode, file); + } panic("%s(%p, %p) failed!", __func__, inode, file); } diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index aef18682c035..a789efc9df39 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -2169,7 +2169,6 @@ static int snd_pcm_release(struct inode *inode, struct file *file) if (snd_BUG_ON(!substream)) return -ENXIO; pcm = substream->pcm; - fasync_helper(-1, file, 0, &substream->runtime->fasync); mutex_lock(&pcm->open_mutex); snd_pcm_release_substream(substream); kfree(pcm_file); diff --git a/sound/core/timer.c b/sound/core/timer.c index e582face89d2..c584408c9f17 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -1263,7 +1263,6 @@ static int snd_timer_user_release(struct inode *inode, struct file *file) if (file->private_data) { tu = file->private_data; file->private_data = NULL; - fasync_helper(-1, file, 0, &tu->fasync); if (tu->timeri) snd_timer_close(tu->timeri); kfree(tu->queue); -- cgit v1.2.3 From d1a203eac0ec13cd1c0ba610fe7a55c9bc40473b Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sat, 1 Nov 2008 21:01:09 -0700 Subject: net: add documentation for skb recycling Commit 04a4bb55bcf35b63d40fd2725e58599ff8310dd7 ("net: add skb_recycle_check() to enable netdriver skb recycling") added a method for network drivers to recycle skbuffs, but while use of this mechanism was documented in the commit message, it should really have been added as a docbook comment as well -- this patch does that. Signed-off-by: Stephen Hemminger Signed-off-by: Lennert Buytenhek Signed-off-by: David S. Miller --- net/core/skbuff.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4e22e3a35359..ebb6b94f8af2 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -449,6 +449,18 @@ void kfree_skb(struct sk_buff *skb) __kfree_skb(skb); } +/** + * skb_recycle_check - check if skb can be reused for receive + * @skb: buffer + * @skb_size: minimum receive buffer size + * + * Checks that the skb passed in is not shared or cloned, and + * that it is linear and its head portion at least as large as + * skb_size so that it can be recycled as a receive buffer. + * If these conditions are met, this function does any necessary + * reference count dropping and cleans up the skbuff as if it + * just came from __alloc_skb(). + */ int skb_recycle_check(struct sk_buff *skb, int skb_size) { struct skb_shared_info *shinfo; -- cgit v1.2.3 From 920a46115ca3fa88990276d98520abab85495b2d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 1 Nov 2008 21:22:23 -0700 Subject: udp: multicast packets need to check namespace Current UDP multicast delivery is not namespace aware. Signed-off-by: Eric Dumazet Acked-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv4/udp.c | 12 +++++++----- net/ipv6/udp.c | 8 ++++---- 2 files changed, 11 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 2095abc3caba..cf02701ced48 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -284,7 +284,7 @@ struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, } EXPORT_SYMBOL_GPL(udp4_lib_lookup); -static inline struct sock *udp_v4_mcast_next(struct sock *sk, +static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk, __be16 loc_port, __be32 loc_addr, __be16 rmt_port, __be32 rmt_addr, int dif) @@ -296,7 +296,8 @@ static inline struct sock *udp_v4_mcast_next(struct sock *sk, sk_for_each_from(s, node) { struct inet_sock *inet = inet_sk(s); - if (s->sk_hash != hnum || + if (!net_eq(sock_net(s), net) || + s->sk_hash != hnum || (inet->daddr && inet->daddr != rmt_addr) || (inet->dport != rmt_port && inet->dport) || (inet->rcv_saddr && inet->rcv_saddr != loc_addr) || @@ -1079,15 +1080,16 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, read_lock(&udp_hash_lock); sk = sk_head(&udptable[udp_hashfn(net, ntohs(uh->dest))]); dif = skb->dev->ifindex; - sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); + sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); if (sk) { struct sock *sknext = NULL; do { struct sk_buff *skb1 = skb; - sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr, - uh->source, saddr, dif); + sknext = udp_v4_mcast_next(net, sk_next(sk), uh->dest, + daddr, uh->source, saddr, + dif); if (sknext) skb1 = skb_clone(skb, GFP_ATOMIC); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index e51da8c092fa..71e259e866a1 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -328,7 +328,7 @@ drop: return -1; } -static struct sock *udp_v6_mcast_next(struct sock *sk, +static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk, __be16 loc_port, struct in6_addr *loc_addr, __be16 rmt_port, struct in6_addr *rmt_addr, int dif) @@ -340,7 +340,7 @@ static struct sock *udp_v6_mcast_next(struct sock *sk, sk_for_each_from(s, node) { struct inet_sock *inet = inet_sk(s); - if (sock_net(s) != sock_net(sk)) + if (!net_eq(sock_net(s), net)) continue; if (s->sk_hash == num && s->sk_family == PF_INET6) { @@ -383,14 +383,14 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, read_lock(&udp_hash_lock); sk = sk_head(&udptable[udp_hashfn(net, ntohs(uh->dest))]); dif = inet6_iif(skb); - sk = udp_v6_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); + sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); if (!sk) { kfree_skb(skb); goto out; } sk2 = sk; - while ((sk2 = udp_v6_mcast_next(sk_next(sk2), uh->dest, daddr, + while ((sk2 = udp_v6_mcast_next(net, sk_next(sk2), uh->dest, daddr, uh->source, saddr, dif))) { struct sk_buff *buff = skb_clone(skb, GFP_ATOMIC); if (buff) { -- cgit v1.2.3 From 48dcc33e5e11de0f76b65b113988dbc930d17395 Mon Sep 17 00:00:00 2001 From: Jianjun Kong Date: Sat, 1 Nov 2008 21:37:27 -0700 Subject: af_unix: netns: fix problem of return value fix problem of return value net/unix/af_unix.c: unix_net_init() when error appears, it should return 'error', not always return 0. Signed-off-by: Jianjun Kong Signed-off-by: David S. Miller --- net/unix/af_unix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index dc504d308ec0..4d3c6071b9a4 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2213,7 +2213,7 @@ static int unix_net_init(struct net *net) #endif error = 0; out: - return 0; + return error; } static void unix_net_exit(struct net *net) -- cgit v1.2.3 From f26ba1751145edbf52b2c89a40e389f2fbdfc1af Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 2 Nov 2008 16:11:01 +0000 Subject: udp: Fix the SNMP counter of UDP_MIB_INDATAGRAMS If UDP echo is sent to xinetd/echo-dgram, the UDP reply will be received at the sender. But the SNMP counter of UDP_MIB_INDATAGRAMS will be not increased, UDP6_MIB_INDATAGRAMS will be increased instead. Endpoint A Endpoint B UDP Echo request -----------> (IPv4, Dst port=7) <---------- UDP Echo Reply (IPv4, Src port=7) This bug is come from this patch cb75994ec311b2cd50e5205efdcc0696abd6675d. It do counter UDP[6]_MIB_INDATAGRAMS until udp[v6]_recvmsg. Because xinetd used IPv6 socket to receive UDP messages, thus, when received UDP packet, the UDP6_MIB_INDATAGRAMS will be increased in function udpv6_recvmsg() even if the packet is a IPv4 UDP packet. This patch fixed the problem. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/ipv6/udp.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 71e259e866a1..18696af106d6 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -138,6 +138,7 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, int peeked; int err; int is_udplite = IS_UDPLITE(sk); + int is_udp4; if (addr_len) *addr_len=sizeof(struct sockaddr_in6); @@ -158,6 +159,8 @@ try_again: else if (copied < ulen) msg->msg_flags |= MSG_TRUNC; + is_udp4 = (skb->protocol == htons(ETH_P_IP)); + /* * If checksum is needed at all, try to do it while copying the * data. If the data is truncated, or if we only want a partial @@ -180,9 +183,14 @@ try_again: if (err) goto out_free; - if (!peeked) - UDP6_INC_STATS_USER(sock_net(sk), - UDP_MIB_INDATAGRAMS, is_udplite); + if (!peeked) { + if (is_udp4) + UDP_INC_STATS_USER(sock_net(sk), + UDP_MIB_INDATAGRAMS, is_udplite); + else + UDP6_INC_STATS_USER(sock_net(sk), + UDP_MIB_INDATAGRAMS, is_udplite); + } sock_recv_timestamp(msg, sk, skb); @@ -196,7 +204,7 @@ try_again: sin6->sin6_flowinfo = 0; sin6->sin6_scope_id = 0; - if (skb->protocol == htons(ETH_P_IP)) + if (is_udp4) ipv6_addr_set(&sin6->sin6_addr, 0, 0, htonl(0xffff), ip_hdr(skb)->saddr); else { @@ -207,7 +215,7 @@ try_again: } } - if (skb->protocol == htons(ETH_P_IP)) { + if (is_udp4) { if (inet->cmsg_flags) ip_cmsg_recv(msg, skb); } else { -- cgit v1.2.3 From 0856f93958c488f0cc656be53c26dfd20663bdb3 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 2 Nov 2008 16:14:27 +0000 Subject: udp: Fix the SNMP counter of UDP_MIB_INERRORS UDP packets received in udpv6_recvmsg() are not only IPv6 UDP packets, but also have IPv4 UDP packets, so when do the counter of UDP_MIB_INERRORS in udpv6_recvmsg(), we should check whether the packet is a IPv6 UDP packet or a IPv4 UDP packet. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/ipv6/udp.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 18696af106d6..8b48512ebf6a 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -236,8 +236,14 @@ out: csum_copy_err: lock_sock(sk); - if (!skb_kill_datagram(sk, skb, flags)) - UDP6_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + if (!skb_kill_datagram(sk, skb, flags)) { + if (is_udp4) + UDP_INC_STATS_USER(sock_net(sk), + UDP_MIB_INERRORS, is_udplite); + else + UDP6_INC_STATS_USER(sock_net(sk), + UDP_MIB_INERRORS, is_udplite); + } release_sock(sk); if (flags & MSG_DONTWAIT) -- cgit v1.2.3 From a1caa32295d67284ecba18cd8db692c7166f0706 Mon Sep 17 00:00:00 2001 From: Arnaud Ebalard Date: Mon, 3 Nov 2008 01:30:23 -0800 Subject: XFRM: copy_to_user_kmaddress() reports local address twice While adding support for MIGRATE/KMADDRESS in strongSwan (as specified in draft-ebalard-mext-pfkey-enhanced-migrate-00), Andreas Steffen noticed that XFRMA_KMADDRESS attribute passed to userland contains the local address twice (remote provides local address instead of remote one). This bug in copy_to_user_kmaddress() affects only key managers that use native XFRM interface (key managers that use PF_KEY are not affected). For the record, the bug was in the initial changeset I posted which added support for KMADDRESS (13c1d18931ebb5cf407cb348ef2cd6284d68902d 'xfrm: MIGRATE enhancements (draft-ebalard-mext-pfkey-enhanced-migrate)'). Signed-off-by: Arnaud Ebalard Reported-by: Andreas Steffen Signed-off-by: David S. Miller --- net/xfrm/xfrm_user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 4a8a1abb59ee..a278a6f3b991 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1816,7 +1816,7 @@ static int copy_to_user_kmaddress(struct xfrm_kmaddress *k, struct sk_buff *skb) uk.family = k->family; uk.reserved = k->reserved; memcpy(&uk.local, &k->local, sizeof(uk.local)); - memcpy(&uk.remote, &k->local, sizeof(uk.remote)); + memcpy(&uk.remote, &k->remote, sizeof(uk.remote)); return nla_put(skb, XFRMA_KMADDRESS, sizeof(uk), &uk); } -- cgit v1.2.3 From bbb770e7ab9a436752babfc8765e422d7481be1f Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 3 Nov 2008 19:11:29 -0800 Subject: xfrm: Fix xfrm_policy_gc_lock handling. From: Alexey Dobriyan Based upon a lockdep trace by Simon Arlott. xfrm_policy_kill() can be called from both BH and non-BH contexts, so we have to grab xfrm_policy_gc_lock with BH disabling. Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 25872747762c..058f04f54b90 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -315,9 +315,9 @@ static void xfrm_policy_kill(struct xfrm_policy *policy) return; } - spin_lock(&xfrm_policy_gc_lock); + spin_lock_bh(&xfrm_policy_gc_lock); hlist_add_head(&policy->bydst, &xfrm_policy_gc_list); - spin_unlock(&xfrm_policy_gc_lock); + spin_unlock_bh(&xfrm_policy_gc_lock); schedule_work(&xfrm_policy_gc_work); } -- cgit v1.2.3 From 79654a7698195fa043063092f5c1ca5245276fba Mon Sep 17 00:00:00 2001 From: Andreas Steffen Date: Tue, 4 Nov 2008 14:49:19 -0800 Subject: xfrm: Have af-specific init_tempsel() initialize family field of temporary selector While adding MIGRATE support to strongSwan, Andreas Steffen noticed that the selectors provided in XFRM_MSG_ACQUIRE have their family field uninitialized (those in MIGRATE do have their family set). Looking at the code, this is because the af-specific init_tempsel() (called via afinfo->init_tempsel() in xfrm_init_tempsel()) do not set the value. Reported-by: Andreas Steffen Acked-by: Herbert Xu Signed-off-by: Arnaud Ebalard --- net/ipv4/xfrm4_state.c | 1 + net/ipv6/xfrm6_state.c | 1 + 2 files changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 07735ed280d7..55dc6beab9aa 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -33,6 +33,7 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl, x->sel.dport_mask = htons(0xffff); x->sel.sport = xfrm_flowi_sport(fl); x->sel.sport_mask = htons(0xffff); + x->sel.family = AF_INET; x->sel.prefixlen_d = 32; x->sel.prefixlen_s = 32; x->sel.proto = fl->proto; diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 89884a4f23aa..60c78cfc2737 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -34,6 +34,7 @@ __xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl, x->sel.dport_mask = htons(0xffff); x->sel.sport = xfrm_flowi_sport(fl); x->sel.sport_mask = htons(0xffff); + x->sel.family = AF_INET6; x->sel.prefixlen_d = 128; x->sel.prefixlen_s = 128; x->sel.proto = fl->proto; -- cgit v1.2.3 From 9b22ea560957de1484e6b3e8538f7eef202e3596 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 4 Nov 2008 14:49:57 -0800 Subject: net: fix packet socket delivery in rx irq handler The changes to deliver hardware accelerated VLAN packets to packet sockets (commit bc1d0411) caused a warning for non-NAPI drivers. The __vlan_hwaccel_rx() function is called directly from the drivers RX function, for non-NAPI drivers that means its still in RX IRQ context: [ 27.779463] ------------[ cut here ]------------ [ 27.779509] WARNING: at kernel/softirq.c:136 local_bh_enable+0x37/0x81() ... [ 27.782520] [] netif_nit_deliver+0x5b/0x75 [ 27.782590] [] __vlan_hwaccel_rx+0x79/0x162 [ 27.782664] [] atl1_intr+0x9a9/0xa7c [atl1] [ 27.782738] [] handle_IRQ_event+0x23/0x51 [ 27.782808] [] handle_edge_irq+0xc2/0x102 [ 27.782878] [] do_IRQ+0x4d/0x64 Split hardware accelerated VLAN reception into two parts to fix this: - __vlan_hwaccel_rx just stores the VLAN TCI and performs the VLAN device lookup, then calls netif_receive_skb()/netif_rx() - vlan_hwaccel_do_receive(), which is invoked by netif_receive_skb() in softirq context, performs the real reception and delivery to packet sockets. Reported-and-tested-by: Ramon Casellas Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 7 +++++++ net/8021q/vlan_core.c | 46 +++++++++++++++++++++++++++++++++------------- net/core/dev.c | 3 +++ 3 files changed, 43 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 9e7b49b8062d..a5cb0c3f6dcf 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -114,6 +114,8 @@ extern u16 vlan_dev_vlan_id(const struct net_device *dev); extern int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, u16 vlan_tci, int polling); +extern int vlan_hwaccel_do_receive(struct sk_buff *skb); + #else static inline struct net_device *vlan_dev_real_dev(const struct net_device *dev) { @@ -133,6 +135,11 @@ static inline int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, BUG(); return NET_XMIT_SUCCESS; } + +static inline int vlan_hwaccel_do_receive(struct sk_buff *skb) +{ + return 0; +} #endif /** diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 916061f681b6..68ced4bf158c 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -3,11 +3,20 @@ #include #include "vlan.h" +struct vlan_hwaccel_cb { + struct net_device *dev; +}; + +static inline struct vlan_hwaccel_cb *vlan_hwaccel_cb(struct sk_buff *skb) +{ + return (struct vlan_hwaccel_cb *)skb->cb; +} + /* VLAN rx hw acceleration helper. This acts like netif_{rx,receive_skb}(). */ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, u16 vlan_tci, int polling) { - struct net_device_stats *stats; + struct vlan_hwaccel_cb *cb = vlan_hwaccel_cb(skb); if (skb_bond_should_drop(skb)) { dev_kfree_skb_any(skb); @@ -15,23 +24,35 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, } skb->vlan_tci = vlan_tci; + cb->dev = vlan_group_get_device(grp, vlan_tci & VLAN_VID_MASK); + + return (polling ? netif_receive_skb(skb) : netif_rx(skb)); +} +EXPORT_SYMBOL(__vlan_hwaccel_rx); + +int vlan_hwaccel_do_receive(struct sk_buff *skb) +{ + struct vlan_hwaccel_cb *cb = vlan_hwaccel_cb(skb); + struct net_device *dev = cb->dev; + struct net_device_stats *stats; + netif_nit_deliver(skb); - skb->dev = vlan_group_get_device(grp, vlan_tci & VLAN_VID_MASK); - if (skb->dev == NULL) { - dev_kfree_skb_any(skb); - /* Not NET_RX_DROP, this is not being dropped - * due to congestion. */ - return NET_RX_SUCCESS; + if (dev == NULL) { + kfree_skb(skb); + return -1; } - skb->dev->last_rx = jiffies; + + skb->dev = dev; + skb->priority = vlan_get_ingress_priority(dev, skb->vlan_tci); skb->vlan_tci = 0; - stats = &skb->dev->stats; + dev->last_rx = jiffies; + + stats = &dev->stats; stats->rx_packets++; stats->rx_bytes += skb->len; - skb->priority = vlan_get_ingress_priority(skb->dev, vlan_tci); switch (skb->pkt_type) { case PACKET_BROADCAST: break; @@ -43,13 +64,12 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, * This allows the VLAN to have a different MAC than the * underlying device, and still route correctly. */ if (!compare_ether_addr(eth_hdr(skb)->h_dest, - skb->dev->dev_addr)) + dev->dev_addr)) skb->pkt_type = PACKET_HOST; break; }; - return (polling ? netif_receive_skb(skb) : netif_rx(skb)); + return 0; } -EXPORT_SYMBOL(__vlan_hwaccel_rx); struct net_device *vlan_dev_real_dev(const struct net_device *dev) { diff --git a/net/core/dev.c b/net/core/dev.c index d9038e328cc1..9174c77d3112 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2218,6 +2218,9 @@ int netif_receive_skb(struct sk_buff *skb) int ret = NET_RX_DROP; __be16 type; + if (skb->vlan_tci && vlan_hwaccel_do_receive(skb)) + return NET_RX_SUCCESS; + /* if we've gotten here through NAPI, check netpoll */ if (netpoll_receive_skb(skb)) return NET_RX_DROP; -- cgit v1.2.3 From b22cecdd8fa4667ebab02def0866387e709927ee Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 5 Nov 2008 01:35:55 -0800 Subject: net/9p: fix printk format warnings Fix printk format warnings in net/9p. Built cleanly on 7 arches. net/9p/client.c:820: warning: format '%llx' expects type 'long long unsigned int', but argument 4 has type 'u64' net/9p/client.c:820: warning: format '%llx' expects type 'long long unsigned int', but argument 5 has type 'u64' net/9p/client.c:867: warning: format '%llx' expects type 'long long unsigned int', but argument 4 has type 'u64' net/9p/client.c:867: warning: format '%llx' expects type 'long long unsigned int', but argument 5 has type 'u64' net/9p/client.c:932: warning: format '%llx' expects type 'long long unsigned int', but argument 5 has type 'u64' net/9p/client.c:932: warning: format '%llx' expects type 'long long unsigned int', but argument 6 has type 'u64' net/9p/client.c:982: warning: format '%llx' expects type 'long long unsigned int', but argument 4 has type 'u64' net/9p/client.c:982: warning: format '%llx' expects type 'long long unsigned int', but argument 5 has type 'u64' net/9p/client.c:1025: warning: format '%llx' expects type 'long long unsigned int', but argument 4 has type 'u64' net/9p/client.c:1025: warning: format '%llx' expects type 'long long unsigned int', but argument 5 has type 'u64' net/9p/client.c:1227: warning: format '%llx' expects type 'long long unsigned int', but argument 7 has type 'u64' net/9p/client.c:1227: warning: format '%llx' expects type 'long long unsigned int', but argument 12 has type 'u64' net/9p/client.c:1227: warning: format '%llx' expects type 'long long unsigned int', but argument 8 has type 'u64' net/9p/client.c:1227: warning: format '%llx' expects type 'long long unsigned int', but argument 13 has type 'u64' net/9p/client.c:1252: warning: format '%llx' expects type 'long long unsigned int', but argument 7 has type 'u64' net/9p/client.c:1252: warning: format '%llx' expects type 'long long unsigned int', but argument 12 has type 'u64' net/9p/client.c:1252: warning: format '%llx' expects type 'long long unsigned int', but argument 8 has type 'u64' net/9p/client.c:1252: warning: format '%llx' expects type 'long long unsigned int', but argument 13 has type 'u64' Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- net/9p/client.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/9p/client.c b/net/9p/client.c index 67717f69412e..0a04faa22116 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -818,7 +818,9 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, } P9_DPRINTK(P9_DEBUG_9P, "<<< RATTACH qid %x.%llx.%x\n", - qid.type, qid.path, qid.version); + qid.type, + (unsigned long long)qid.path, + qid.version); memmove(&fid->qid, &qid, sizeof(struct p9_qid)); @@ -865,7 +867,9 @@ p9_client_auth(struct p9_client *clnt, char *uname, u32 n_uname, char *aname) } P9_DPRINTK(P9_DEBUG_9P, "<<< RAUTH qid %x.%llx.%x\n", - qid.type, qid.path, qid.version); + qid.type, + (unsigned long long)qid.path, + qid.version); memmove(&afid->qid, &qid, sizeof(struct p9_qid)); p9_free_req(clnt, req); @@ -930,7 +934,8 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, for (count = 0; count < nwqids; count++) P9_DPRINTK(P9_DEBUG_9P, "<<< [%d] %x.%llx.%x\n", - count, wqids[count].type, wqids[count].path, + count, wqids[count].type, + (unsigned long long)wqids[count].path, wqids[count].version); if (nwname) @@ -980,7 +985,9 @@ int p9_client_open(struct p9_fid *fid, int mode) } P9_DPRINTK(P9_DEBUG_9P, "<<< ROPEN qid %x.%llx.%x iounit %x\n", - qid.type, qid.path, qid.version, iounit); + qid.type, + (unsigned long long)qid.path, + qid.version, iounit); fid->mode = mode; fid->iounit = iounit; @@ -1023,7 +1030,9 @@ int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode, } P9_DPRINTK(P9_DEBUG_9P, "<<< RCREATE qid %x.%llx.%x iounit %x\n", - qid.type, qid.path, qid.version, iounit); + qid.type, + (unsigned long long)qid.path, + qid.version, iounit); fid->mode = mode; fid->iounit = iounit; @@ -1230,9 +1239,9 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid) "<<< name=%s uid=%s gid=%s muid=%s extension=(%s)\n" "<<< uid=%d gid=%d n_muid=%d\n", ret->size, ret->type, ret->dev, ret->qid.type, - ret->qid.path, ret->qid.version, ret->mode, - ret->atime, ret->mtime, ret->length, ret->name, - ret->uid, ret->gid, ret->muid, ret->extension, + (unsigned long long)ret->qid.path, ret->qid.version, ret->mode, + ret->atime, ret->mtime, (unsigned long long)ret->length, + ret->name, ret->uid, ret->gid, ret->muid, ret->extension, ret->n_uid, ret->n_gid, ret->n_muid); free_and_error: @@ -1255,9 +1264,9 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst) " name=%s uid=%s gid=%s muid=%s extension=(%s)\n" " uid=%d gid=%d n_muid=%d\n", wst->size, wst->type, wst->dev, wst->qid.type, - wst->qid.path, wst->qid.version, wst->mode, - wst->atime, wst->mtime, wst->length, wst->name, - wst->uid, wst->gid, wst->muid, wst->extension, + (unsigned long long)wst->qid.path, wst->qid.version, wst->mode, + wst->atime, wst->mtime, (unsigned long long)wst->length, + wst->name, wst->uid, wst->gid, wst->muid, wst->extension, wst->n_uid, wst->n_gid, wst->n_muid); err = 0; clnt = fid->clnt; -- cgit v1.2.3 From e3ec6cfc260e2322834e200c2fa349cdf104fd13 Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Wed, 5 Nov 2008 01:43:57 -0800 Subject: ipv6: fix run pending DAD when interface becomes ready With some net devices types, an IPv6 address configured while the interface was down can stay 'tentative' forever, even after the interface is set up. In some case, pending IPv6 DADs are not executed when the device becomes ready. I observed this while doing some tests with kvm. If I assign an IPv6 address to my interface eth0 (kvm driver rtl8139) when it is still down then the address is flagged tentative (IFA_F_TENTATIVE). Then, I set eth0 up, and to my surprise, the address stays 'tentative', no DAD is executed and the address can't be pinged. I also observed the same behaviour, without kvm, with virtual interfaces types macvlan and veth. Some easy steps to reproduce the issue with macvlan: 1. ip link add link eth0 type macvlan 2. ip -6 addr add 2003::ab32/64 dev macvlan0 3. ip addr show dev macvlan0 ... inet6 2003::ab32/64 scope global tentative ... 4. ip link set macvlan0 up 5. ip addr show dev macvlan0 ... inet6 2003::ab32/64 scope global tentative ... Address is still tentative I think there's a bug in net/ipv6/addrconf.c, addrconf_notify(): addrconf_dad_run() is not always run when the interface is flagged IF_READY. Currently it is only run when receiving NETDEV_CHANGE event. Looks like some (virtual) devices doesn't send this event when becoming up. For both NETDEV_UP and NETDEV_CHANGE events, when the interface becomes ready, run_pending should be set to 1. Patch below. 'run_pending = 1' could be moved below the if/else block but it makes the code less readable. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index eea9542728ca..d9da5eb9dcb2 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2483,8 +2483,10 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, if (!idev && dev->mtu >= IPV6_MIN_MTU) idev = ipv6_add_dev(dev); - if (idev) + if (idev) { idev->if_flags |= IF_READY; + run_pending = 1; + } } else { if (!addrconf_qdisc_ok(dev)) { /* device is still not ready. */ -- cgit v1.2.3 From efb9a8c28ca0edd9e2572117105ebad9bbc0c368 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 5 Nov 2008 03:03:18 -0800 Subject: netfilter: netns ct: walk netns list under RTNL netns list (just list) is under RTNL. But helper and proto unregistration happen during rmmod when RTNL is not held, and that's how it was tested: modprobe/rmmod vs clone(CLONE_NEWNET)/exit. BUG: unable to handle kernel paging request at 0000000000100100 <=== IP: [] nf_conntrack_l4proto_unregister+0x96/0xae [nf_conntrack] PGD 15e300067 PUD 15e1d8067 PMD 0 Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC last sysfs file: /sys/kernel/uevent_seqnum CPU 0 Modules linked in: nf_conntrack_proto_sctp(-) nf_conntrack_proto_dccp(-) af_packet iptable_nat nf_nat nf_conntrack_ipv4 nf_conntrack nf_defrag_ipv4 iptable_filter ip_tables xt_tcpudp ip6table_filter ip6_tables x_tables ipv6 sr_mod cdrom [last unloaded: nf_conntrack_proto_sctp] Pid: 16758, comm: rmmod Not tainted 2.6.28-rc2-netns-xfrm #3 RIP: 0010:[] [] nf_conntrack_l4proto_unregister+0x96/0xae [nf_conntrack] RSP: 0018:ffff88015dc1fec8 EFLAGS: 00010212 RAX: 0000000000000000 RBX: 00000000001000f8 RCX: 0000000000000000 RDX: ffffffffa009575c RSI: 0000000000000003 RDI: ffffffffa00956b5 RBP: ffff88015dc1fed8 R08: 0000000000000002 R09: 0000000000000000 R10: 0000000000000000 R11: ffff88015dc1fe48 R12: ffffffffa0458f60 R13: 0000000000000880 R14: 00007fff4c361d30 R15: 0000000000000880 FS: 00007f624435a6f0(0000) GS:ffffffff80521580(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000100100 CR3: 0000000168969000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process rmmod (pid: 16758, threadinfo ffff88015dc1e000, task ffff880179864218) Stack: ffffffffa0459100 0000000000000000 ffff88015dc1fee8 ffffffffa0457934 ffff88015dc1ff78 ffffffff80253fef 746e6e6f635f666e 6f72705f6b636172 00707463735f6f74 ffffffff8024cb30 00000000023b8010 0000000000000000 Call Trace: [] nf_conntrack_proto_sctp_fini+0x10/0x1e [nf_conntrack_proto_sctp] [] sys_delete_module+0x19f/0x1fe [] ? trace_hardirqs_on_caller+0xf0/0x114 [] ? trace_hardirqs_on_thunk+0x3a/0x3f [] system_call_fastpath+0x16/0x1b Code: 13 35 e0 e8 c4 6c 1a e0 48 8b 1d 6d c6 46 e0 eb 16 48 89 df 4c 89 e2 48 c7 c6 fc 85 09 a0 e8 61 cd ff ff 48 8b 5b 08 48 83 eb 08 <48> 8b 43 08 0f 18 08 48 8d 43 08 48 3d 60 4f 50 80 75 d3 5b 41 RIP [] nf_conntrack_l4proto_unregister+0x96/0xae [nf_conntrack] RSP CR2: 0000000000100100 ---[ end trace bde8ac82debf7192 ]--- Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_helper.c | 3 +++ net/netfilter/nf_conntrack_proto.c | 5 +++++ 2 files changed, 8 insertions(+) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 9c06b9f86ad4..c39b6a994133 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -167,10 +168,12 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) */ synchronize_rcu(); + rtnl_lock(); spin_lock_bh(&nf_conntrack_lock); for_each_net(net) __nf_conntrack_helper_unregister(me, net); spin_unlock_bh(&nf_conntrack_lock); + rtnl_unlock(); } EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister); diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index a59a307e685d..592d73344d46 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -221,8 +222,10 @@ void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto) synchronize_rcu(); /* Remove all contrack entries for this protocol */ + rtnl_lock(); for_each_net(net) nf_ct_iterate_cleanup(net, kill_l3proto, proto); + rtnl_unlock(); } EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister); @@ -333,8 +336,10 @@ void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto) synchronize_rcu(); /* Remove all contrack entries for this protocol */ + rtnl_lock(); for_each_net(net) nf_ct_iterate_cleanup(net, kill_l4proto, l4proto); + rtnl_unlock(); } EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_unregister); -- cgit v1.2.3 From 518a09ef11f8454f4676125d47c3e775b300c6a5 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 5 Nov 2008 03:36:01 -0800 Subject: tcp: Fix recvmsg MSG_PEEK influence of blocking behavior. Vito Caputo noticed that tcp_recvmsg() returns immediately from partial reads when MSG_PEEK is used. In particular, this means that SO_RCVLOWAT is not respected. Simply remove the test. And this matches the behavior of several other systems, including BSD. Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index eccb7165a80c..c5aca0bb116a 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1374,8 +1374,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, sk->sk_state == TCP_CLOSE || (sk->sk_shutdown & RCV_SHUTDOWN) || !timeo || - signal_pending(current) || - (flags & MSG_PEEK)) + signal_pending(current)) break; } else { if (sock_flag(sk, SOCK_DONE)) -- cgit v1.2.3 From 517ac45af4b55913587279d89001171c222f22e7 Mon Sep 17 00:00:00 2001 From: Tom Tucker Date: Thu, 23 Oct 2008 16:30:13 -0500 Subject: 9p: rdma: Set trans prior to requesting async connection ops The RDMA connection manager is fundamentally asynchronous. Since the async callback context is the client pointer, the transport in the client struct needs to be set prior to calling the first async op. Signed-off-by: Tom Tucker Signed-off-by: Eric Van Hensbergen --- net/9p/trans_rdma.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 8d6cc4777aae..4e9d2e673cf4 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -589,6 +589,9 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args) if (IS_ERR(rdma->cm_id)) goto error; + /* Associate the client with the transport */ + client->trans = rdma; + /* Resolve the server's address */ rdma->addr.sin_family = AF_INET; rdma->addr.sin_addr.s_addr = in_aton(addr); @@ -669,7 +672,6 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args) if (err || (rdma->state != P9_RDMA_CONNECTED)) goto error; - client->trans = rdma; client->status = Connected; return 0; -- cgit v1.2.3 From cac23d6505546f4cfa42d949ec46d431a44bd39c Mon Sep 17 00:00:00 2001 From: Tom Tucker Date: Thu, 23 Oct 2008 16:31:02 -0500 Subject: 9p: Make all client spin locks IRQ safe The client lock must be IRQ safe. Some of the lock acquisition paths took regular spin locks. Signed-off-by: Tom Tucker Signed-off-by: Eric Van Hensbergen --- net/9p/client.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/9p/client.c b/net/9p/client.c index 67717f69412e..f4e6c05b3c68 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -613,6 +613,7 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt) { int err; struct p9_fid *fid; + unsigned long flags; P9_DPRINTK(P9_DEBUG_FID, "clnt %p\n", clnt); fid = kmalloc(sizeof(struct p9_fid), GFP_KERNEL); @@ -632,9 +633,9 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt) fid->clnt = clnt; fid->aux = NULL; - spin_lock(&clnt->lock); + spin_lock_irqsave(&clnt->lock, flags); list_add(&fid->flist, &clnt->fidlist); - spin_unlock(&clnt->lock); + spin_unlock_irqrestore(&clnt->lock, flags); return fid; @@ -646,13 +647,14 @@ error: static void p9_fid_destroy(struct p9_fid *fid) { struct p9_client *clnt; + unsigned long flags; P9_DPRINTK(P9_DEBUG_FID, "fid %d\n", fid->fid); clnt = fid->clnt; p9_idpool_put(fid->fid, clnt->fidpool); - spin_lock(&clnt->lock); + spin_lock_irqsave(&clnt->lock, flags); list_del(&fid->flist); - spin_unlock(&clnt->lock); + spin_unlock_irqrestore(&clnt->lock, flags); kfree(fid); } -- cgit v1.2.3 From 82b189eaaf6186b7694317632255fa87460820a0 Mon Sep 17 00:00:00 2001 From: Tom Tucker Date: Thu, 23 Oct 2008 16:32:28 -0500 Subject: 9p: Remove unneeded free of fcall for Flush T and R fcall are reused until the client is destroyed. There does not need to be a special case for Flush Signed-off-by: Tom Tucker Signed-off-by: Eric Van Hensbergen --- net/9p/client.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'net') diff --git a/net/9p/client.c b/net/9p/client.c index f4e6c05b3c68..26ca8ab45196 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -311,12 +311,6 @@ static void p9_free_req(struct p9_client *c, struct p9_req_t *r) r->status = REQ_STATUS_IDLE; if (tag != P9_NOTAG && p9_idpool_check(tag, c->tagpool)) p9_idpool_put(tag, c->tagpool); - - /* if this was a flush request we have to free response fcall */ - if (r->rc->id == P9_RFLUSH) { - kfree(r->tc); - kfree(r->rc); - } } /** -- cgit v1.2.3 From 45abdf1c7be80d6ec3b0b14e59ee75a0d5d9fb37 Mon Sep 17 00:00:00 2001 From: Tom Tucker Date: Thu, 23 Oct 2008 16:33:25 -0500 Subject: p9: Fix leak of waitqueue in request allocation path If a T or R fcall cannot be allocated, the function returns an error but neglects to free the wait queue that was successfully allocated. If it comes through again a second time this wq will be overwritten with a new allocation and the old allocation will be leaked. Also, if the client is subsequently closed, the close path will attempt to clean up these allocations, so set the req fields to NULL to avoid duplicate free. Signed-off-by: Tom Tucker Signed-off-by: Eric Van Hensbergen --- net/9p/client.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/9p/client.c b/net/9p/client.c index 26ca8ab45196..b56d808e63a9 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -189,6 +189,9 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag) printk(KERN_ERR "Couldn't grow tag array\n"); kfree(req->tc); kfree(req->rc); + kfree(req->wq); + req->tc = req->rc = NULL; + req->wq = NULL; return ERR_PTR(-ENOMEM); } req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall); -- cgit v1.2.3 From 1558c6214904c636d5a37f05f84202d6cdd9cff8 Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Mon, 27 Oct 2008 13:15:16 -0500 Subject: 9p: rdma: remove duplicated #include Removed duplicated #include in net/9p/trans_rdma.c. Signed-off-by: Huang Weiyi Signed-off-by: Eric Van Hensbergen --- net/9p/trans_rdma.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 4e9d2e673cf4..2f1fe5fc1228 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -45,7 +45,6 @@ #include #include #include -#include #define P9_PORT 5640 #define P9_RDMA_SQ_DEPTH 32 -- cgit v1.2.3 From 9f3e9bbe62b0bdbbaa7c689a68a22a7d3c1670f0 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Tue, 28 Oct 2008 14:22:43 -0500 Subject: unsigned fid->fid cannot be negative Signed-off-by: Roel Kluin Signed-off-by: Eric Van Hensbergen --- net/9p/client.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/9p/client.c b/net/9p/client.c index b56d808e63a9..6e800dd51f09 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -608,7 +608,7 @@ reterr: static struct p9_fid *p9_fid_create(struct p9_client *clnt) { - int err; + int ret; struct p9_fid *fid; unsigned long flags; @@ -617,11 +617,12 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt) if (!fid) return ERR_PTR(-ENOMEM); - fid->fid = p9_idpool_get(clnt->fidpool); + ret = p9_idpool_get(clnt->fidpool); if (fid->fid < 0) { - err = -ENOSPC; + ret = -ENOSPC; goto error; } + fid->fid = ret; memset(&fid->qid, 0, sizeof(struct p9_qid)); fid->mode = -1; @@ -638,7 +639,7 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt) error: kfree(fid); - return ERR_PTR(err); + return ERR_PTR(ret); } static void p9_fid_destroy(struct p9_fid *fid) -- cgit v1.2.3 From b0d5fdef521b1eadb3fc2c1283000af7ef0297bc Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 4 Nov 2008 20:46:46 -0800 Subject: net/9p: fix printk format warnings Fix printk format warnings in net/9p. Built cleanly on 7 arches. net/9p/client.c:820: warning: format '%llx' expects type 'long long unsigned int', but argument 4 has type 'u64' net/9p/client.c:820: warning: format '%llx' expects type 'long long unsigned int', but argument 5 has type 'u64' net/9p/client.c:867: warning: format '%llx' expects type 'long long unsigned int', but argument 4 has type 'u64' net/9p/client.c:867: warning: format '%llx' expects type 'long long unsigned int', but argument 5 has type 'u64' net/9p/client.c:932: warning: format '%llx' expects type 'long long unsigned int', but argument 5 has type 'u64' net/9p/client.c:932: warning: format '%llx' expects type 'long long unsigned int', but argument 6 has type 'u64' net/9p/client.c:982: warning: format '%llx' expects type 'long long unsigned int', but argument 4 has type 'u64' net/9p/client.c:982: warning: format '%llx' expects type 'long long unsigned int', but argument 5 has type 'u64' net/9p/client.c:1025: warning: format '%llx' expects type 'long long unsigned int', but argument 4 has type 'u64' net/9p/client.c:1025: warning: format '%llx' expects type 'long long unsigned int', but argument 5 has type 'u64' net/9p/client.c:1227: warning: format '%llx' expects type 'long long unsigned int', but argument 7 has type 'u64' net/9p/client.c:1227: warning: format '%llx' expects type 'long long unsigned int', but argument 12 has type 'u64' net/9p/client.c:1227: warning: format '%llx' expects type 'long long unsigned int', but argument 8 has type 'u64' net/9p/client.c:1227: warning: format '%llx' expects type 'long long unsigned int', but argument 13 has type 'u64' net/9p/client.c:1252: warning: format '%llx' expects type 'long long unsigned int', but argument 7 has type 'u64' net/9p/client.c:1252: warning: format '%llx' expects type 'long long unsigned int', but argument 12 has type 'u64' net/9p/client.c:1252: warning: format '%llx' expects type 'long long unsigned int', but argument 8 has type 'u64' net/9p/client.c:1252: warning: format '%llx' expects type 'long long unsigned int', but argument 13 has type 'u64' Signed-off-by: Randy Dunlap Signed-off-by: Eric Van Hensbergen --- net/9p/client.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/9p/client.c b/net/9p/client.c index 6e800dd51f09..4b529454616d 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -818,7 +818,9 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, } P9_DPRINTK(P9_DEBUG_9P, "<<< RATTACH qid %x.%llx.%x\n", - qid.type, qid.path, qid.version); + qid.type, + (unsigned long long)qid.path, + qid.version); memmove(&fid->qid, &qid, sizeof(struct p9_qid)); @@ -865,7 +867,9 @@ p9_client_auth(struct p9_client *clnt, char *uname, u32 n_uname, char *aname) } P9_DPRINTK(P9_DEBUG_9P, "<<< RAUTH qid %x.%llx.%x\n", - qid.type, qid.path, qid.version); + qid.type, + (unsigned long long)qid.path, + qid.version); memmove(&afid->qid, &qid, sizeof(struct p9_qid)); p9_free_req(clnt, req); @@ -930,7 +934,8 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, for (count = 0; count < nwqids; count++) P9_DPRINTK(P9_DEBUG_9P, "<<< [%d] %x.%llx.%x\n", - count, wqids[count].type, wqids[count].path, + count, wqids[count].type, + (unsigned long long)wqids[count].path, wqids[count].version); if (nwname) @@ -980,7 +985,9 @@ int p9_client_open(struct p9_fid *fid, int mode) } P9_DPRINTK(P9_DEBUG_9P, "<<< ROPEN qid %x.%llx.%x iounit %x\n", - qid.type, qid.path, qid.version, iounit); + qid.type, + (unsigned long long)qid.path, + qid.version, iounit); fid->mode = mode; fid->iounit = iounit; @@ -1023,7 +1030,9 @@ int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode, } P9_DPRINTK(P9_DEBUG_9P, "<<< RCREATE qid %x.%llx.%x iounit %x\n", - qid.type, qid.path, qid.version, iounit); + qid.type, + (unsigned long long)qid.path, + qid.version, iounit); fid->mode = mode; fid->iounit = iounit; @@ -1230,9 +1239,9 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid) "<<< name=%s uid=%s gid=%s muid=%s extension=(%s)\n" "<<< uid=%d gid=%d n_muid=%d\n", ret->size, ret->type, ret->dev, ret->qid.type, - ret->qid.path, ret->qid.version, ret->mode, - ret->atime, ret->mtime, ret->length, ret->name, - ret->uid, ret->gid, ret->muid, ret->extension, + (unsigned long long)ret->qid.path, ret->qid.version, ret->mode, + ret->atime, ret->mtime, (unsigned long long)ret->length, + ret->name, ret->uid, ret->gid, ret->muid, ret->extension, ret->n_uid, ret->n_gid, ret->n_muid); free_and_error: @@ -1255,9 +1264,9 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst) " name=%s uid=%s gid=%s muid=%s extension=(%s)\n" " uid=%d gid=%d n_muid=%d\n", wst->size, wst->type, wst->dev, wst->qid.type, - wst->qid.path, wst->qid.version, wst->mode, - wst->atime, wst->mtime, wst->length, wst->name, - wst->uid, wst->gid, wst->muid, wst->extension, + (unsigned long long)wst->qid.path, wst->qid.version, wst->mode, + wst->atime, wst->mtime, (unsigned long long)wst->length, + wst->name, wst->uid, wst->gid, wst->muid, wst->extension, wst->n_uid, wst->n_gid, wst->n_muid); err = 0; clnt = fid->clnt; -- cgit v1.2.3 From 4a9d916717de0aab4313d43817164577255242fb Mon Sep 17 00:00:00 2001 From: Jonathan McDowell Date: Thu, 30 Oct 2008 22:46:48 +0000 Subject: Fix logic error in rfkill_check_duplicity > I'll have a prod at why the [hso] rfkill stuff isn't working next Ok, I believe this is due to the addition of rfkill_check_duplicity in rfkill and the fact that test_bit actually returns a negative value rather than the postive one expected (which is of course equally true). So when the second WLAN device (the hso device, with the EEE PC WLAN being the first) comes along rfkill_check_duplicity returns a negative value and so rfkill_register returns an error. Patch below fixes this for me. Signed-Off-By: Jonathan McDowell Acked-by: Henrique de Moraes Holschuh Signed-off-by: John W. Linville --- net/rfkill/rfkill.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c index f949a482b007..25ba3bd57e66 100644 --- a/net/rfkill/rfkill.c +++ b/net/rfkill/rfkill.c @@ -603,7 +603,7 @@ static int rfkill_check_duplicity(const struct rfkill *rfkill) } /* 0: first switch of its kind */ - return test_bit(rfkill->type, seen); + return (test_bit(rfkill->type, seen)) ? 1 : 0; } static int rfkill_add_switch(struct rfkill *rfkill) -- cgit v1.2.3 From f8d570a4745835f2238a33b537218a1bb03fc671 Mon Sep 17 00:00:00 2001 From: David Miller Date: Thu, 6 Nov 2008 00:37:40 -0800 Subject: net: Fix recursive descent in __scm_destroy(). __scm_destroy() walks the list of file descriptors in the scm_fp_list pointed to by the scm_cookie argument. Those, in turn, can close sockets and invoke __scm_destroy() again. There is nothing which limits how deeply this can occur. The idea for how to fix this is from Linus. Basically, we do all of the fput()s at the top level by collecting all of the scm_fp_list objects hit by an fput(). Inside of the initial __scm_destroy() we keep running the list until it is empty. Signed-off-by: David S. Miller Signed-off-by: Linus Torvalds --- include/linux/sched.h | 2 ++ include/net/scm.h | 5 +++-- net/core/scm.c | 24 +++++++++++++++++++++--- 3 files changed, 26 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/include/linux/sched.h b/include/linux/sched.h index b483f39a7112..295b7c756ca6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1349,6 +1349,8 @@ struct task_struct { */ unsigned long timer_slack_ns; unsigned long default_timer_slack_ns; + + struct list_head *scm_work_list; }; /* diff --git a/include/net/scm.h b/include/net/scm.h index 06df126103ca..33e9986beb86 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -14,8 +14,9 @@ struct scm_fp_list { - int count; - struct file *fp[SCM_MAX_FD]; + struct list_head list; + int count; + struct file *fp[SCM_MAX_FD]; }; struct scm_cookie diff --git a/net/core/scm.c b/net/core/scm.c index 10f5c65f6a47..ab242cc1acca 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -75,6 +75,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) if (!fpl) return -ENOMEM; *fplp = fpl; + INIT_LIST_HEAD(&fpl->list); fpl->count = 0; } fpp = &fpl->fp[fpl->count]; @@ -106,9 +107,25 @@ void __scm_destroy(struct scm_cookie *scm) if (fpl) { scm->fp = NULL; - for (i=fpl->count-1; i>=0; i--) - fput(fpl->fp[i]); - kfree(fpl); + if (current->scm_work_list) { + list_add_tail(&fpl->list, current->scm_work_list); + } else { + LIST_HEAD(work_list); + + current->scm_work_list = &work_list; + + list_add(&fpl->list, &work_list); + while (!list_empty(&work_list)) { + fpl = list_first_entry(&work_list, struct scm_fp_list, list); + + list_del(&fpl->list); + for (i=fpl->count-1; i>=0; i--) + fput(fpl->fp[i]); + kfree(fpl); + } + + current->scm_work_list = NULL; + } } } @@ -284,6 +301,7 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl) new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL); if (new_fpl) { + INIT_LIST_HEAD(&new_fpl->list); for (i=fpl->count-1; i>=0; i--) get_file(fpl->fp[i]); memcpy(new_fpl, fpl, sizeof(*fpl)); -- cgit v1.2.3 From 3b53fbf4314594fa04544b02b2fc6e607912da18 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 6 Nov 2008 15:45:32 -0800 Subject: net: Fix recursive descent in __scm_destroy(). __scm_destroy() walks the list of file descriptors in the scm_fp_list pointed to by the scm_cookie argument. Those, in turn, can close sockets and invoke __scm_destroy() again. There is nothing which limits how deeply this can occur. The idea for how to fix this is from Linus. Basically, we do all of the fput()s at the top level by collecting all of the scm_fp_list objects hit by an fput(). Inside of the initial __scm_destroy() we keep running the list until it is empty. Signed-off-by: David S. Miller --- include/linux/sched.h | 2 ++ include/net/scm.h | 5 +++-- net/core/scm.c | 24 +++++++++++++++++++++--- 3 files changed, 26 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/include/linux/sched.h b/include/linux/sched.h index b483f39a7112..295b7c756ca6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1349,6 +1349,8 @@ struct task_struct { */ unsigned long timer_slack_ns; unsigned long default_timer_slack_ns; + + struct list_head *scm_work_list; }; /* diff --git a/include/net/scm.h b/include/net/scm.h index 06df126103ca..33e9986beb86 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -14,8 +14,9 @@ struct scm_fp_list { - int count; - struct file *fp[SCM_MAX_FD]; + struct list_head list; + int count; + struct file *fp[SCM_MAX_FD]; }; struct scm_cookie diff --git a/net/core/scm.c b/net/core/scm.c index 10f5c65f6a47..ab242cc1acca 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -75,6 +75,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) if (!fpl) return -ENOMEM; *fplp = fpl; + INIT_LIST_HEAD(&fpl->list); fpl->count = 0; } fpp = &fpl->fp[fpl->count]; @@ -106,9 +107,25 @@ void __scm_destroy(struct scm_cookie *scm) if (fpl) { scm->fp = NULL; - for (i=fpl->count-1; i>=0; i--) - fput(fpl->fp[i]); - kfree(fpl); + if (current->scm_work_list) { + list_add_tail(&fpl->list, current->scm_work_list); + } else { + LIST_HEAD(work_list); + + current->scm_work_list = &work_list; + + list_add(&fpl->list, &work_list); + while (!list_empty(&work_list)) { + fpl = list_first_entry(&work_list, struct scm_fp_list, list); + + list_del(&fpl->list); + for (i=fpl->count-1; i>=0; i--) + fput(fpl->fp[i]); + kfree(fpl); + } + + current->scm_work_list = NULL; + } } } @@ -284,6 +301,7 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl) new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL); if (new_fpl) { + INIT_LIST_HEAD(&new_fpl->list); for (i=fpl->count-1; i>=0; i--) get_file(fpl->fp[i]); memcpy(new_fpl, fpl, sizeof(*fpl)); -- cgit v1.2.3 From 70e90679ffce0937deb77e2bd8bd918a24a897fd Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 6 Nov 2008 23:08:37 -0800 Subject: af_key: mark policy as dead before destroying xfrm_policy_destroy() will oops if not dead policy is passed to it. On error path in pfkey_compile_policy() exactly this happens. Oopsable for CAP_NET_ADMIN owners. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/key/af_key.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/key/af_key.c b/net/key/af_key.c index 3440a4637f01..5b22e011653b 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3188,6 +3188,7 @@ static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt, return xp; out: + xp->walk.dead = 1; xfrm_policy_destroy(xp); return NULL; } -- cgit v1.2.3 From 6209344f5a3795d34b7f2c0061f49802283b6bdd Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Sun, 9 Nov 2008 15:23:57 +0100 Subject: net: unix: fix inflight counting bug in garbage collector Previously I assumed that the receive queues of candidates don't change during the GC. This is only half true, nothing can be received from the queues (see comment in unix_gc()), but buffers could be added through the other half of the socket pair, which may still have file descriptors referring to it. This can result in inc_inflight_move_tail() erronously increasing the "inflight" counter for a unix socket for which dec_inflight() wasn't previously called. This in turn can trigger the "BUG_ON(total_refs < inflight_refs)" in a later garbage collection run. Fix this by only manipulating the "inflight" counter for sockets which are candidates themselves. Duplicating the file references in unix_attach_fds() is also needed to prevent a socket becoming a candidate for GC while the skb that contains it is not yet queued. Reported-by: Andrea Bittau Signed-off-by: Miklos Szeredi CC: stable@kernel.org Signed-off-by: Linus Torvalds --- include/net/af_unix.h | 1 + net/unix/af_unix.c | 31 ++++++++++++++++++++++++------- net/unix/garbage.c | 49 +++++++++++++++++++++++++++++++++++++------------ 3 files changed, 62 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 7dd29b7e461d..c29ff1da8a18 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -54,6 +54,7 @@ struct unix_sock { atomic_long_t inflight; spinlock_t lock; unsigned int gc_candidate : 1; + unsigned int gc_maybe_cycle : 1; wait_queue_head_t peer_wait; }; #define unix_sk(__sk) ((struct unix_sock *)__sk) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 4d3c6071b9a4..eb90f77bb0e2 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1302,14 +1302,23 @@ static void unix_destruct_fds(struct sk_buff *skb) sock_wfree(skb); } -static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) +static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) { int i; + + /* + * Need to duplicate file references for the sake of garbage + * collection. Otherwise a socket in the fps might become a + * candidate for GC while the skb is not yet queued. + */ + UNIXCB(skb).fp = scm_fp_dup(scm->fp); + if (!UNIXCB(skb).fp) + return -ENOMEM; + for (i=scm->fp->count-1; i>=0; i--) unix_inflight(scm->fp->fp[i]); - UNIXCB(skb).fp = scm->fp; skb->destructor = unix_destruct_fds; - scm->fp = NULL; + return 0; } /* @@ -1368,8 +1377,11 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, goto out; memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); - if (siocb->scm->fp) - unix_attach_fds(siocb->scm, skb); + if (siocb->scm->fp) { + err = unix_attach_fds(siocb->scm, skb); + if (err) + goto out_free; + } unix_get_secdata(siocb->scm, skb); skb_reset_transport_header(skb); @@ -1538,8 +1550,13 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, size = min_t(int, size, skb_tailroom(skb)); memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); - if (siocb->scm->fp) - unix_attach_fds(siocb->scm, skb); + if (siocb->scm->fp) { + err = unix_attach_fds(siocb->scm, skb); + if (err) { + kfree_skb(skb); + goto out_err; + } + } if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) { kfree_skb(skb); diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 2a27b84f740b..6d4a9a8de5ef 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -186,8 +186,17 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), */ struct sock *sk = unix_get_socket(*fp++); if (sk) { - hit = true; - func(unix_sk(sk)); + struct unix_sock *u = unix_sk(sk); + + /* + * Ignore non-candidates, they could + * have been added to the queues after + * starting the garbage collection + */ + if (u->gc_candidate) { + hit = true; + func(u); + } } } if (hit && hitlist != NULL) { @@ -249,11 +258,11 @@ static void inc_inflight_move_tail(struct unix_sock *u) { atomic_long_inc(&u->inflight); /* - * If this is still a candidate, move it to the end of the - * list, so that it's checked even if it was already passed - * over + * If this still might be part of a cycle, move it to the end + * of the list, so that it's checked even if it was already + * passed over */ - if (u->gc_candidate) + if (u->gc_maybe_cycle) list_move_tail(&u->link, &gc_candidates); } @@ -267,6 +276,7 @@ void unix_gc(void) struct unix_sock *next; struct sk_buff_head hitlist; struct list_head cursor; + LIST_HEAD(not_cycle_list); spin_lock(&unix_gc_lock); @@ -282,10 +292,14 @@ void unix_gc(void) * * Holding unix_gc_lock will protect these candidates from * being detached, and hence from gaining an external - * reference. This also means, that since there are no - * possible receivers, the receive queues of these sockets are - * static during the GC, even though the dequeue is done - * before the detach without atomicity guarantees. + * reference. Since there are no possible receivers, all + * buffers currently on the candidates' queues stay there + * during the garbage collection. + * + * We also know that no new candidate can be added onto the + * receive queues. Other, non candidate sockets _can_ be + * added to queue, so we must make sure only to touch + * candidates. */ list_for_each_entry_safe(u, next, &gc_inflight_list, link) { long total_refs; @@ -299,6 +313,7 @@ void unix_gc(void) if (total_refs == inflight_refs) { list_move_tail(&u->link, &gc_candidates); u->gc_candidate = 1; + u->gc_maybe_cycle = 1; } } @@ -325,13 +340,23 @@ void unix_gc(void) list_move(&cursor, &u->link); if (atomic_long_read(&u->inflight) > 0) { - list_move_tail(&u->link, &gc_inflight_list); - u->gc_candidate = 0; + list_move_tail(&u->link, ¬_cycle_list); + u->gc_maybe_cycle = 0; scan_children(&u->sk, inc_inflight_move_tail, NULL); } } list_del(&cursor); + /* + * not_cycle_list contains those sockets which do not make up a + * cycle. Restore these to the inflight list. + */ + while (!list_empty(¬_cycle_list)) { + u = list_entry(not_cycle_list.next, struct unix_sock, link); + u->gc_candidate = 0; + list_move_tail(&u->link, &gc_inflight_list); + } + /* * Now gc_candidates contains only garbage. Restore original * inflight counters for these as well, and remove the skbuffs -- cgit v1.2.3 From 309f796f301bf1c2731d9b9eb8642c76b523ebf7 Mon Sep 17 00:00:00 2001 From: Ferenc Wagner Date: Mon, 10 Nov 2008 13:37:40 -0800 Subject: vlan: Fix typos in proc output string Signed-off-by: Ferenc Wagner Signed-off-by: David S. Miller --- net/8021q/vlanproc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index 0feefa4e1a4b..3628e0a81b40 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -314,7 +314,7 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset) dev_info->ingress_priority_map[6], dev_info->ingress_priority_map[7]); - seq_printf(seq, "EGRESSS priority Mappings: "); + seq_printf(seq, " EGRESS priority mappings: "); for (i = 0; i < 16; i++) { const struct vlan_priority_tci_mapping *mp = dev_info->egress_priority_map[i]; -- cgit v1.2.3 From 87b30a6530218cde431a659f2c118cb72175507c Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Mon, 10 Nov 2008 16:34:11 -0800 Subject: ipv6: fix ip6_mr_init error path The order of cleanup operations in the error/exit section of ip6_mr_init() is completely inversed. It should be the other way around. Also a del_timer() is missing in the error path. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 182f8a177e7f..52a7eb0e2c2c 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -981,14 +981,15 @@ int __init ip6_mr_init(void) goto proc_cache_fail; #endif return 0; -reg_notif_fail: - kmem_cache_destroy(mrt_cachep); #ifdef CONFIG_PROC_FS -proc_vif_fail: - unregister_netdevice_notifier(&ip6_mr_notifier); proc_cache_fail: proc_net_remove(&init_net, "ip6_mr_vif"); +proc_vif_fail: + unregister_netdevice_notifier(&ip6_mr_notifier); #endif +reg_notif_fail: + del_timer(&ipmr_expire_timer); + kmem_cache_destroy(mrt_cachep); return err; } -- cgit v1.2.3 From b7b45f47d6f8c83a0f958d7e2924468b6942dd9e Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Mon, 10 Nov 2008 16:46:06 -0800 Subject: netfilter: payload_len is be16, add size of struct rather than size of pointer payload_len is a be16 value, not cpu_endian, also the size of a ponter to a struct ipv6hdr was being added, not the size of the struct itself. Signed-off-by: Harvey Harrison Signed-off-by: David S. Miller --- net/netfilter/ipvs/ip_vs_xmit.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 02ddc2b3ce2e..e90d52f199bc 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -713,7 +713,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, iph = ipv6_hdr(skb); iph->version = 6; iph->nexthdr = IPPROTO_IPV6; - iph->payload_len = old_iph->payload_len + sizeof(old_iph); + iph->payload_len = old_iph->payload_len; + be16_add_cpu(&iph->payload_len, sizeof(*old_iph)); iph->priority = old_iph->priority; memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl)); iph->daddr = rt->rt6i_dst.addr; -- cgit v1.2.3 From 013cd397532e5803a1625954a884d021653da720 Mon Sep 17 00:00:00 2001 From: Jianjun Kong Date: Mon, 10 Nov 2008 21:37:39 -0800 Subject: mac80211: fix a buffer overrun in station debug code net/mac80211/debugfs_sta.c The trailing zero was written to state[4], it's out of bounds. Signed-off-by: Jianjun Kong Signed-off-by: David S. Miller --- net/mac80211/debugfs_sta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 189d0bafa91a..b85c4f27b361 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -199,7 +199,7 @@ static ssize_t sta_agg_status_write(struct file *file, /* toggle Rx aggregation command */ tid_num = tid_num - 100; if (tid_static_rx[tid_num] == 1) { - strcpy(state, "off "); + strcpy(state, "off"); ieee80211_sta_stop_rx_ba_session(sta->sdata, da, tid_num, 0, WLAN_REASON_QSTA_REQUIRE_SETUP); sta->ampdu_mlme.tid_state_rx[tid_num] |= -- cgit v1.2.3 From b971e7ac834e9f4bda96d5a96ae9abccd01c1dd8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 10 Nov 2008 21:43:08 -0800 Subject: net: fix /proc/net/snmp as memory corruptor icmpmsg_put() can happily corrupt kernel memory, using a static table and forgetting to reset an array index in a loop. Remove the static array since its not safe without proper locking. Signed-off-by: Alexey Dobriyan Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/proc.c | 58 +++++++++++++++++++++++++++++---------------------------- 1 file changed, 30 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 8f5a403f6f6b..a631a1f110ca 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -237,43 +237,45 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_SENTINEL }; +static void icmpmsg_put_line(struct seq_file *seq, unsigned long *vals, + unsigned short *type, int count) +{ + int j; + + if (count) { + seq_printf(seq, "\nIcmpMsg:"); + for (j = 0; j < count; ++j) + seq_printf(seq, " %sType%u", + type[j] & 0x100 ? "Out" : "In", + type[j] & 0xff); + seq_printf(seq, "\nIcmpMsg:"); + for (j = 0; j < count; ++j) + seq_printf(seq, " %lu", vals[j]); + } +} + static void icmpmsg_put(struct seq_file *seq) { #define PERLINE 16 - int j, i, count; - static int out[PERLINE]; + int i, count; + unsigned short type[PERLINE]; + unsigned long vals[PERLINE], val; struct net *net = seq->private; count = 0; for (i = 0; i < ICMPMSG_MIB_MAX; i++) { - - if (snmp_fold_field((void **) net->mib.icmpmsg_statistics, i)) - out[count++] = i; - if (count < PERLINE) - continue; - - seq_printf(seq, "\nIcmpMsg:"); - for (j = 0; j < PERLINE; ++j) - seq_printf(seq, " %sType%u", i & 0x100 ? "Out" : "In", - i & 0xff); - seq_printf(seq, "\nIcmpMsg: "); - for (j = 0; j < PERLINE; ++j) - seq_printf(seq, " %lu", - snmp_fold_field((void **) net->mib.icmpmsg_statistics, - out[j])); - seq_putc(seq, '\n'); - } - if (count) { - seq_printf(seq, "\nIcmpMsg:"); - for (j = 0; j < count; ++j) - seq_printf(seq, " %sType%u", out[j] & 0x100 ? "Out" : - "In", out[j] & 0xff); - seq_printf(seq, "\nIcmpMsg:"); - for (j = 0; j < count; ++j) - seq_printf(seq, " %lu", snmp_fold_field((void **) - net->mib.icmpmsg_statistics, out[j])); + val = snmp_fold_field((void **) net->mib.icmpmsg_statistics, i); + if (val) { + type[count] = i; + vals[count++] = val; + } + if (count == PERLINE) { + icmpmsg_put_line(seq, vals, type, count); + count = 0; + } } + icmpmsg_put_line(seq, vals, type, count); #undef PERLINE } -- cgit v1.2.3 From 5cd33db2120b5ce972568711156f91da83bff2d7 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Mon, 10 Nov 2008 21:45:05 -0800 Subject: net: fix setting of skb->tail in skb_recycle_check() Since skb_reset_tail_pointer() reads skb->data, we need to set skb->data before calling skb_reset_tail_pointer(). This was causing spurious skb_over_panic()s from skb_put() being called on a recycled skb that had its skb->tail set to beyond where it should have been. Bug report from Peter van Valderen . Signed-off-by: Lennert Buytenhek Signed-off-by: David S. Miller --- net/core/skbuff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ebb6b94f8af2..d49ef8301b5b 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -486,8 +486,8 @@ int skb_recycle_check(struct sk_buff *skb, int skb_size) shinfo->frag_list = NULL; memset(skb, 0, offsetof(struct sk_buff, tail)); - skb_reset_tail_pointer(skb); skb->data = skb->head + NET_SKB_PAD; + skb_reset_tail_pointer(skb); return 1; } -- cgit v1.2.3 From 14ee6742b1b5df275cd2d771b4562b4f808c9419 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Mon, 10 Nov 2008 21:52:42 -0800 Subject: dsa: fix skb->pkt_type when mac address of slave interface differs When a dsa slave interface has a mac address that differs from that of the master interface, eth_type_trans() won't explicitly set skb->pkt_type back to PACKET_HOST -- we need to do this ourselves before calling eth_type_trans(). Signed-off-by: Lennert Buytenhek Signed-off-by: David S. Miller --- net/dsa/tag_dsa.c | 1 + net/dsa/tag_edsa.c | 1 + net/dsa/tag_trailer.c | 1 + 3 files changed, 3 insertions(+) (limited to 'net') diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index bdc0510b53b7..31866543332e 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -159,6 +159,7 @@ static int dsa_rcv(struct sk_buff *skb, struct net_device *dev, skb->dev = ds->ports[source_port]; skb_push(skb, ETH_HLEN); + skb->pkt_type = PACKET_HOST; skb->protocol = eth_type_trans(skb, skb->dev); skb->dev->last_rx = jiffies; diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c index f985ea993843..9f4ce55eae59 100644 --- a/net/dsa/tag_edsa.c +++ b/net/dsa/tag_edsa.c @@ -178,6 +178,7 @@ static int edsa_rcv(struct sk_buff *skb, struct net_device *dev, skb->dev = ds->ports[source_port]; skb_push(skb, ETH_HLEN); + skb->pkt_type = PACKET_HOST; skb->protocol = eth_type_trans(skb, skb->dev); skb->dev->last_rx = jiffies; diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c index d3117764b2c2..efd26697e716 100644 --- a/net/dsa/tag_trailer.c +++ b/net/dsa/tag_trailer.c @@ -95,6 +95,7 @@ static int trailer_rcv(struct sk_buff *skb, struct net_device *dev, skb->dev = ds->ports[source_port]; skb_push(skb, ETH_HLEN); + skb->pkt_type = PACKET_HOST; skb->protocol = eth_type_trans(skb, skb->dev); skb->dev->last_rx = jiffies; -- cgit v1.2.3 From df02c6ff2e3937379b31ea161b53229134fe92f7 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Mon, 10 Nov 2008 21:53:12 -0800 Subject: dsa: fix master interface allmulti/promisc handling Before commit b6c40d68ff6498b7f63ddf97cf0aa818d748dee7 ("net: only invoke dev->change_rx_flags when device is UP"), the dsa driver could sort-of get away with only fiddling with the master interface's allmulti/promisc counts in ->change_rx_flags() and not touching them in ->open() or ->stop(). After this commit (note that it was merged almost simultaneously with the dsa patches, which is why this wasn't caught initially), the breakage that was already there became more apparent. Since it makes no sense to keep the master interface's allmulti or promisc count pinned for a slave interface that is down, copy the vlan driver's sync logic (which does exactly what we want) over to dsa to fix this. Bug report from Dirk Teurlings and Peter van Valderen . Signed-off-by: Lennert Buytenhek Tested-by: Dirk Teurlings Tested-by: Peter van Valderen Signed-off-by: David S. Miller --- net/dsa/slave.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 70 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 37616884b8a9..1af5a79309e9 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -10,6 +10,7 @@ #include #include +#include #include #include "dsa_priv.h" @@ -49,11 +50,57 @@ void dsa_slave_mii_bus_init(struct dsa_switch *ds) /* slave device handling ****************************************************/ static int dsa_slave_open(struct net_device *dev) { + struct dsa_slave_priv *p = netdev_priv(dev); + struct net_device *master = p->parent->master_netdev; + int err; + + if (!(master->flags & IFF_UP)) + return -ENETDOWN; + + if (compare_ether_addr(dev->dev_addr, master->dev_addr)) { + err = dev_unicast_add(master, dev->dev_addr, ETH_ALEN); + if (err < 0) + goto out; + } + + if (dev->flags & IFF_ALLMULTI) { + err = dev_set_allmulti(master, 1); + if (err < 0) + goto del_unicast; + } + if (dev->flags & IFF_PROMISC) { + err = dev_set_promiscuity(master, 1); + if (err < 0) + goto clear_allmulti; + } + return 0; + +clear_allmulti: + if (dev->flags & IFF_ALLMULTI) + dev_set_allmulti(master, -1); +del_unicast: + if (compare_ether_addr(dev->dev_addr, master->dev_addr)) + dev_unicast_delete(master, dev->dev_addr, ETH_ALEN); +out: + return err; } static int dsa_slave_close(struct net_device *dev) { + struct dsa_slave_priv *p = netdev_priv(dev); + struct net_device *master = p->parent->master_netdev; + + dev_mc_unsync(master, dev); + dev_unicast_unsync(master, dev); + if (dev->flags & IFF_ALLMULTI) + dev_set_allmulti(master, -1); + if (dev->flags & IFF_PROMISC) + dev_set_promiscuity(master, -1); + + if (compare_ether_addr(dev->dev_addr, master->dev_addr)) + dev_unicast_delete(master, dev->dev_addr, ETH_ALEN); + return 0; } @@ -77,9 +124,30 @@ static void dsa_slave_set_rx_mode(struct net_device *dev) dev_unicast_sync(master, dev); } -static int dsa_slave_set_mac_address(struct net_device *dev, void *addr) +static int dsa_slave_set_mac_address(struct net_device *dev, void *a) { - memcpy(dev->dev_addr, addr + 2, 6); + struct dsa_slave_priv *p = netdev_priv(dev); + struct net_device *master = p->parent->master_netdev; + struct sockaddr *addr = a; + int err; + + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + + if (!(dev->flags & IFF_UP)) + goto out; + + if (compare_ether_addr(addr->sa_data, master->dev_addr)) { + err = dev_unicast_add(master, addr->sa_data, ETH_ALEN); + if (err < 0) + return err; + } + + if (compare_ether_addr(dev->dev_addr, master->dev_addr)) + dev_unicast_delete(master, dev->dev_addr, ETH_ALEN); + +out: + memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); return 0; } -- cgit v1.2.3 From 8f65b5354b1a34a536641bd915958662e8af5320 Mon Sep 17 00:00:00 2001 From: Doug Leith Date: Wed, 12 Nov 2008 01:41:09 -0800 Subject: tcp_htcp: last_cong bug fix This patch fixes a minor bug in tcp_htcp.c which has been highlighted by Lachlan Andrew and Lawrence Stewart. Currently, the time since the last congestion event, which is stored in variable last_cong, is reset whenever there is a state change into TCP_CA_Open. This includes transitions of the type TCP_CA_Open->TCP_CA_Disorder->TCP_CA_Open which are not associated with backoff of cwnd. The patch changes last_cong to be updated only on transitions into TCP_CA_Open that occur after experiencing the congestion-related states TCP_CA_Loss, TCP_CA_Recovery, TCP_CA_CWR. Signed-off-by: Doug Leith Signed-off-by: David S. Miller --- net/ipv4/tcp_htcp.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index af99776146ff..937549b8a921 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -69,9 +69,12 @@ static u32 htcp_cwnd_undo(struct sock *sk) const struct tcp_sock *tp = tcp_sk(sk); struct htcp *ca = inet_csk_ca(sk); - ca->last_cong = ca->undo_last_cong; - ca->maxRTT = ca->undo_maxRTT; - ca->old_maxB = ca->undo_old_maxB; + if (ca->undo_last_cong) { + ca->last_cong = ca->undo_last_cong; + ca->maxRTT = ca->undo_maxRTT; + ca->old_maxB = ca->undo_old_maxB; + ca->undo_last_cong = 0; + } return max(tp->snd_cwnd, (tp->snd_ssthresh << 7) / ca->beta); } @@ -268,7 +271,10 @@ static void htcp_state(struct sock *sk, u8 new_state) case TCP_CA_Open: { struct htcp *ca = inet_csk_ca(sk); - ca->last_cong = jiffies; + if (ca->undo_last_cong) { + ca->last_cong = jiffies; + ca->undo_last_cong = 0; + } } break; case TCP_CA_CWR: -- cgit v1.2.3 From d35aac10eb7bcb3b80bef16b60844af0313f47f7 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Wed, 12 Nov 2008 01:54:56 -0800 Subject: net: put_cmsg_compat + SO_TIMESTAMP[NS]: use same name for value as caller In __sock_recv_timestamp() the additional SCM_TIMESTAMP[NS] is used. This has the same value as SO_TIMESTAMP[NS], so this is a purely cosmetic change. Signed-off-by: Patrick Ohly Signed-off-by: David S. Miller --- net/compat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/compat.c b/net/compat.c index 67fb6a3834a3..6ce1a1cadcc0 100644 --- a/net/compat.c +++ b/net/compat.c @@ -226,14 +226,14 @@ int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *dat return 0; /* XXX: return error? check spec. */ } - if (level == SOL_SOCKET && type == SO_TIMESTAMP) { + if (level == SOL_SOCKET && type == SCM_TIMESTAMP) { struct timeval *tv = (struct timeval *)data; ctv.tv_sec = tv->tv_sec; ctv.tv_usec = tv->tv_usec; data = &ctv; len = sizeof(ctv); } - if (level == SOL_SOCKET && type == SO_TIMESTAMPNS) { + if (level == SOL_SOCKET && type == SCM_TIMESTAMPNS) { struct timespec *ts = (struct timespec *)data; cts.tv_sec = ts->tv_sec; cts.tv_nsec = ts->tv_nsec; -- cgit v1.2.3 From db7fb86b0ca565cf3537401612581a8158025cc2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 11 Nov 2008 11:28:04 +0100 Subject: mac80211: fix notify_mac function The ieee80211_notify_mac() function uses ieee80211_sta_req_auth() which in turn calls ieee80211_set_disassoc() which calls a few functions that need to be able to sleep, so ieee80211_notify_mac() cannot use RCU locking for the interface list and must use rtnl locking instead. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 87665d7bb4f9..14d165f0df75 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2570,14 +2570,14 @@ void ieee80211_notify_mac(struct ieee80211_hw *hw, switch (notif_type) { case IEEE80211_NOTIFY_RE_ASSOC: - rcu_read_lock(); - list_for_each_entry_rcu(sdata, &local->interfaces, list) { + rtnl_lock(); + list_for_each_entry(sdata, &local->interfaces, list) { if (sdata->vif.type != NL80211_IFTYPE_STATION) continue; ieee80211_sta_req_auth(sdata, &sdata->u.sta); } - rcu_read_unlock(); + rtnl_unlock(); break; } } -- cgit v1.2.3 From 6e093d9dfffc9a02cd54d36904c62f705f09900a Mon Sep 17 00:00:00 2001 From: Brian Haley Date: Wed, 12 Nov 2008 22:59:21 -0800 Subject: ipv6: routing header fixes This patch fixes two bugs: 1. setsockopt() of anything but a Type 2 routing header should return EINVAL instead of EPERM. Noticed by Shan Wei (shanwei@cn.fujitsu.com). 2. setsockopt()/sendmsg() of a Type 2 routing header with invalid length or segments should return EINVAL. These values are statically fixed in RFC 3775, unlike the variable Type 0 was. Signed-off-by: Brian Haley Signed-off-by: David S. Miller --- net/ipv6/datagram.c | 5 +++++ net/ipv6/ipv6_sockglue.c | 5 +++++ 2 files changed, 10 insertions(+) (limited to 'net') diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 410046a8cc91..e44deb8d4df2 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -661,6 +661,11 @@ int datagram_send_ctl(struct net *net, switch (rthdr->type) { #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) case IPV6_SRCRT_TYPE_2: + if (rthdr->hdrlen != 2 || + rthdr->segments_left != 1) { + err = -EINVAL; + goto exit_f; + } break; #endif default: diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 4e5eac301f91..2aa294be0c79 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -366,11 +366,16 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, } /* routing header option needs extra check */ + retv = -EINVAL; if (optname == IPV6_RTHDR && opt && opt->srcrt) { struct ipv6_rt_hdr *rthdr = opt->srcrt; switch (rthdr->type) { #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) case IPV6_SRCRT_TYPE_2: + if (rthdr->hdrlen != 2 || + rthdr->segments_left != 1) + goto sticky_done; + break; #endif default: -- cgit v1.2.3 From 9c0188acf6dd6990bac9cd906cd554a1476c6d12 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 12 Nov 2008 23:23:51 -0800 Subject: net: shy netns_ok check Failure to pass netns_ok check is SILENT, except some MIB counter is incremented somewhere. And adding "netns_ok = 1" (after long head-scratching session) is usually the last step in making some protocol netns-ready... Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/ipv4/ip_input.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 861978a4f1a8..cfb38ac9d698 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -209,9 +209,17 @@ static int ip_local_deliver_finish(struct sk_buff *skb) hash = protocol & (MAX_INET_PROTOS - 1); ipprot = rcu_dereference(inet_protos[hash]); - if (ipprot != NULL && (net == &init_net || ipprot->netns_ok)) { + if (ipprot != NULL) { int ret; + if (!net_eq(net, &init_net) && !ipprot->netns_ok) { + if (net_ratelimit()) + printk("%s: proto %d isn't netns-ready\n", + __func__, protocol); + kfree_skb(skb); + goto out; + } + if (!ipprot->no_policy) { if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { kfree_skb(skb); -- cgit v1.2.3 From 1fa989e80a9a104bf3b81842a5f4c1867d7aa9c4 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 12 Nov 2008 11:05:17 +0000 Subject: 9p: restrict RDMA usage Make 9p's RDMA option depend on INET since it uses Infiniband rdma_* functions and that code depends on INET. Otherwise 9p can try to use symbols which don't exist. ERROR: "rdma_destroy_id" [net/9p/9pnet_rdma.ko] undefined! ERROR: "rdma_connect" [net/9p/9pnet_rdma.ko] undefined! ERROR: "rdma_create_id" [net/9p/9pnet_rdma.ko] undefined! ERROR: "rdma_create_qp" [net/9p/9pnet_rdma.ko] undefined! ERROR: "rdma_resolve_route" [net/9p/9pnet_rdma.ko] undefined! ERROR: "rdma_disconnect" [net/9p/9pnet_rdma.ko] undefined! ERROR: "rdma_resolve_addr" [net/9p/9pnet_rdma.ko] undefined! I used an if/endif block so that the menu items would remain presented together. Also correct an article adjective. Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- net/9p/Kconfig | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/9p/Kconfig b/net/9p/Kconfig index c42c0c400bf9..0663f99e977a 100644 --- a/net/9p/Kconfig +++ b/net/9p/Kconfig @@ -13,22 +13,24 @@ menuconfig NET_9P If unsure, say N. +if NET_9P + config NET_9P_VIRTIO - depends on NET_9P && EXPERIMENTAL && VIRTIO + depends on EXPERIMENTAL && VIRTIO tristate "9P Virtio Transport (Experimental)" help This builds support for a transports between guest partitions and a host partition. config NET_9P_RDMA - depends on NET_9P && INFINIBAND && EXPERIMENTAL + depends on INET && INFINIBAND && EXPERIMENTAL tristate "9P RDMA Transport (Experimental)" help - This builds support for a RDMA transport. + This builds support for an RDMA transport. config NET_9P_DEBUG bool "Debug information" - depends on NET_9P help Say Y if you want the 9P subsystem to log debug information. +endif -- cgit v1.2.3 From e8f6fbf62de37cbc2e179176ac7010d5f4396b67 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 12 Nov 2008 01:38:36 +0000 Subject: lockdep: include/linux/lockdep.h - fix warning in net/bluetooth/af_bluetooth.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix this warning: net/bluetooth/af_bluetooth.c:60: warning: ‘bt_key_strings’ defined but not used net/bluetooth/af_bluetooth.c:71: warning: ‘bt_slock_key_strings’ defined but not used this is a lockdep macro problem in the !LOCKDEP case. We cannot convert it to an inline because the macro works on multiple types, but we can mark the parameter used. [ also clean up a misaligned tab in sock_lock_init_class_and_name() ] [ also remove #ifdefs from around af_family_clock_key strings - which were certainly added to get rid of the ugly build warnings. ] Signed-off-by: Ingo Molnar Signed-off-by: David S. Miller --- include/linux/lockdep.h | 5 +++-- include/net/sock.h | 2 +- net/core/sock.c | 2 -- 3 files changed, 4 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 331e5f1c2d8e..29aec6e10020 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -331,10 +331,11 @@ static inline void lockdep_on(void) # define lock_set_subclass(l, s, i) do { } while (0) # define lockdep_init() do { } while (0) # define lockdep_info() do { } while (0) -# define lockdep_init_map(lock, name, key, sub) do { (void)(key); } while (0) +# define lockdep_init_map(lock, name, key, sub) \ + do { (void)(name); (void)(key); } while (0) # define lockdep_set_class(lock, key) do { (void)(key); } while (0) # define lockdep_set_class_and_name(lock, key, name) \ - do { (void)(key); } while (0) + do { (void)(key); (void)(name); } while (0) #define lockdep_set_class_and_subclass(lock, key, sub) \ do { (void)(key); } while (0) #define lockdep_set_subclass(lock, sub) do { } while (0) diff --git a/include/net/sock.h b/include/net/sock.h index c04f9e18ea22..2f47107f6d0f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -815,7 +815,7 @@ static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb) */ #define sock_lock_init_class_and_name(sk, sname, skey, name, key) \ do { \ - sk->sk_lock.owned = 0; \ + sk->sk_lock.owned = 0; \ init_waitqueue_head(&sk->sk_lock.wq); \ spin_lock_init(&(sk)->sk_lock.slock); \ debug_check_no_locks_freed((void *)&(sk)->sk_lock, \ diff --git a/net/core/sock.c b/net/core/sock.c index 5e2a3132a8c9..341e39456952 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -136,7 +136,6 @@ static struct lock_class_key af_family_keys[AF_MAX]; static struct lock_class_key af_family_slock_keys[AF_MAX]; -#ifdef CONFIG_DEBUG_LOCK_ALLOC /* * Make lock validator output more readable. (we pre-construct these * strings build-time, so that runtime initialization of socket @@ -187,7 +186,6 @@ static const char *af_family_clock_key_strings[AF_MAX+1] = { "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , "clock-AF_MAX" }; -#endif /* * sk_callback_lock locking rules are per-address-family, -- cgit v1.2.3 From 5421ae0153b4ba0469967cfd8de96144e3bf3979 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Fri, 14 Nov 2008 14:51:45 -0800 Subject: scm: fix scm_fp_list->list initialization made in wrong place This is the next page of the scm recursion story (the commit f8d570a4 net: Fix recursive descent in __scm_destroy()). In function scm_fp_dup(), the INIT_LIST_HEAD(&fpl->list) of newly created fpl is done *before* the subsequent memcpy from the old structure and thus the freshly initialized list is overwritten. But that's OK, since this initialization is not required at all, since the fpl->list is list_add-ed at the destruction time in any case (and is unused in other code), so I propose to drop both initializations, rather than moving it after the memcpy. Please, correct me if I miss something significant. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/core/scm.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/core/scm.c b/net/core/scm.c index ab242cc1acca..b12303dd39d9 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -75,7 +75,6 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) if (!fpl) return -ENOMEM; *fplp = fpl; - INIT_LIST_HEAD(&fpl->list); fpl->count = 0; } fpp = &fpl->fp[fpl->count]; @@ -301,7 +300,6 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl) new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL); if (new_fpl) { - INIT_LIST_HEAD(&new_fpl->list); for (i=fpl->count-1; i>=0; i--) get_file(fpl->fp[i]); memcpy(new_fpl, fpl, sizeof(*fpl)); -- cgit v1.2.3 From ebfe92ca65c780334bdf847ddc4eca15835bd9c0 Mon Sep 17 00:00:00 2001 From: Rémi Denis-Courmont Date: Sun, 16 Nov 2008 19:48:49 -0800 Subject: Phonet: refuse to send bigger than MTU packets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/af_phonet.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index defeb7a0d502..7ab30f668b5a 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -144,8 +144,8 @@ static int pn_send(struct sk_buff *skb, struct net_device *dev, struct phonethdr *ph; int err; - if (skb->len + 2 > 0xffff) { - /* Phonet length field would overflow */ + if (skb->len + 2 > 0xffff /* Phonet length field limit */ || + skb->len + sizeof(struct phonethdr) > dev->mtu) { err = -EMSGSIZE; goto drop; } -- cgit v1.2.3 From 5f9021cfdc3524a4c5e3d7ae2d049eb7adcd6776 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 16 Nov 2008 23:20:31 -0800 Subject: rtnetlink: propagate error from dev_change_flags in do_setlink() Unlike ifconfig, iproute doesn't report an error when setting an interface up fails: (example: put wireless network mac80211 interface into repeater mode with iwconfig but do not set a peer MAC address, it should fail with -ENOLINK) without patch: # ip link set wlan0 up ; echo $? 0 # with patch: # ip link set wlan0 up ; echo $? RTNETLINK answers: Link has been severed 2 # Propagate the return value from dev_change_flags() to fix this. Signed-off-by: Patrick McHardy Tested-by: Johannes Berg Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 31f29d2989fd..4dfb6b4d4559 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -878,7 +878,9 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, if (ifm->ifi_change) flags = (flags & ifm->ifi_change) | (dev->flags & ~ifm->ifi_change); - dev_change_flags(dev, flags); + err = dev_change_flags(dev, flags); + if (err < 0) + goto errout; } if (tb[IFLA_TXQLEN]) -- cgit v1.2.3 From 8e3bad65a59915f2ddc40f62a180ad81695d8440 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 17 Nov 2008 10:59:59 +0100 Subject: mac80211: remove ieee80211_notify_mac Before ieee80211_notify_mac() was added, it was presented with the use case of using it to tell mac80211 that the association may have been lost because the firmware crashed/reset. Since then, it has also been used by iwlwifi to (slightly) speed up re-association after resume, a workaround around the fact that mac80211 has no suspend/resume handling yet. It is also not used by any other drivers, so clearly it cannot be necessary for "good enough" suspend/resume. Unfortunately, the callback suffers from a severe problem: It only works for station mode. If suspend/resume happens while in IBSS or any other mode (but station), then the callback is pointless. Recently, it has created a number of locking issues, first because it required rtnl locking rather than RCU due to calling sleeping functions within the critical section, and now because it's called by iwlwifi from the mac80211 workqueue that may not use the rtnl because it is flushed under rtnl. (cf. http://bugzilla.kernel.org/show_bug.cgi?id=12046) I think, therefore, that we should take a step back, remove it entirely for now and add the small feature it provided properly. For suspend and resume we will need to introduce new hooks, and for the case where the firmware was reset the driver will probably simply just pretend it has done a suspend/resume cycle to get mac80211 to reprogram the hardware completely, not just try to connect to the current AP again in station mode. When doing so, we will need to take into account locking issues and possibly defer to schedule_work from within mac80211 for the resume operation, while the suspend operation must be done directly. Proper suspend/resume should also not necessarily try to reconnect to the current AP, the time spent in suspend may have been short enough to not be disconnected from the AP, mac80211 will detect that the AP went out of range quickly if it did, and if the association is lost then the AP will disassoc as soon as a data frame is sent. We might also take into account WWOL then, and have mac80211 program the hardware into such a mode where it is available and requested. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-agn.c | 1 - drivers/net/wireless/iwlwifi/iwl3945-base.c | 1 - include/net/mac80211.h | 20 -------------------- net/mac80211/mlme.c | 22 ---------------------- 4 files changed, 44 deletions(-) (limited to 'net') diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c index 8d690a0eb1a9..6751bb2b8ae2 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn.c @@ -2341,7 +2341,6 @@ static void iwl_bg_alive_start(struct work_struct *data) mutex_lock(&priv->mutex); iwl_alive_start(priv); mutex_unlock(&priv->mutex); - ieee80211_notify_mac(priv->hw, IEEE80211_NOTIFY_RE_ASSOC); } static void iwl4965_bg_rf_kill(struct work_struct *work) diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c index 285b53e7e261..45a6b0c35695 100644 --- a/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c @@ -6012,7 +6012,6 @@ static void iwl3945_bg_alive_start(struct work_struct *data) mutex_lock(&priv->mutex); iwl3945_alive_start(priv); mutex_unlock(&priv->mutex); - ieee80211_notify_mac(priv->hw, IEEE80211_NOTIFY_RE_ASSOC); } static void iwl3945_bg_rf_kill(struct work_struct *work) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 8856e2d60e9f..73d81bc6aa75 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -73,14 +73,6 @@ * not do so then mac80211 may add this under certain circumstances. */ -/** - * enum ieee80211_notification_type - Low level driver notification - * @IEEE80211_NOTIFY_RE_ASSOC: start the re-association sequence - */ -enum ieee80211_notification_types { - IEEE80211_NOTIFY_RE_ASSOC, -}; - /** * struct ieee80211_ht_bss_info - describing BSS's HT characteristics * @@ -1797,18 +1789,6 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid); void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_hw *hw, const u8 *ra, u16 tid); -/** - * ieee80211_notify_mac - low level driver notification - * @hw: pointer as obtained from ieee80211_alloc_hw(). - * @notif_type: enum ieee80211_notification_types - * - * This function must be called by low level driver to inform mac80211 of - * low level driver status change or force mac80211 to re-assoc for low - * level driver internal error that require re-assoc. - */ -void ieee80211_notify_mac(struct ieee80211_hw *hw, - enum ieee80211_notification_types notif_type); - /** * ieee80211_find_sta - find a station * diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 14d165f0df75..409bb7716236 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2560,25 +2560,3 @@ void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local) ieee80211_restart_sta_timer(sdata); rcu_read_unlock(); } - -/* driver notification call */ -void ieee80211_notify_mac(struct ieee80211_hw *hw, - enum ieee80211_notification_types notif_type) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct ieee80211_sub_if_data *sdata; - - switch (notif_type) { - case IEEE80211_NOTIFY_RE_ASSOC: - rtnl_lock(); - list_for_each_entry(sdata, &local->interfaces, list) { - if (sdata->vif.type != NL80211_IFTYPE_STATION) - continue; - - ieee80211_sta_req_auth(sdata, &sdata->u.sta); - } - rtnl_unlock(); - break; - } -} -EXPORT_SYMBOL(ieee80211_notify_mac); -- cgit v1.2.3 From c3e388964baa00d8c3960f23e8c8a1fb3966759e Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Wed, 19 Nov 2008 14:07:41 -0800 Subject: net: fix ip_mr_init() error path Similarly to IPv6 ip6_mr_init() (fixed last week), the order of cleanup operations in the error/exit section of ip_mr_init() is completely inversed. It should be the other way around. Also a del_timer() is missing in the error path. I should have guessed last week that this same error existed in ipmr.c too, as ip6mr.c is largely inspired by ipmr.c. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index b42e082cc170..25924b1eb2ef 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1945,13 +1945,14 @@ int __init ip_mr_init(void) goto proc_cache_fail; #endif return 0; -reg_notif_fail: - kmem_cache_destroy(mrt_cachep); #ifdef CONFIG_PROC_FS -proc_vif_fail: - unregister_netdevice_notifier(&ip_mr_notifier); proc_cache_fail: proc_net_remove(&init_net, "ip_mr_vif"); +proc_vif_fail: + unregister_netdevice_notifier(&ip_mr_notifier); #endif +reg_notif_fail: + del_timer(&ipmr_expire_timer); + kmem_cache_destroy(mrt_cachep); return err; } -- cgit v1.2.3 From bfdbc0acadcc761b94814d78f0acec90f0d760de Mon Sep 17 00:00:00 2001 From: Robert Olsson Date: Wed, 19 Nov 2008 14:09:47 -0800 Subject: pktgen: fix multiple queue warning As number of TX queues in unrelated to number of CPU's we remove this test and just make sure nxtq never gets exceeded. Signed-off-by: Robert Olsson Signed-off-by: David S. Miller --- net/core/pktgen.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index a47f5bad110d..8997e912aaaf 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -1973,13 +1973,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) /* make sure that we don't pick a non-existing transmit queue */ ntxq = pkt_dev->odev->real_num_tx_queues; - if (ntxq > num_online_cpus() && (pkt_dev->flags & F_QUEUE_MAP_CPU)) { - printk(KERN_WARNING "pktgen: WARNING: QUEUE_MAP_CPU " - "disabled because CPU count (%d) exceeds number " - "of tx queues (%d) on %s\n", num_online_cpus(), ntxq, - pkt_dev->odev->name); - pkt_dev->flags &= ~F_QUEUE_MAP_CPU; - } + if (ntxq <= pkt_dev->queue_map_min) { printk(KERN_WARNING "pktgen: WARNING: Requested " "queue_map_min (zero-based) (%d) exceeds valid range " @@ -2202,6 +2196,7 @@ static void set_cur_queue_map(struct pktgen_dev *pkt_dev) } pkt_dev->cur_queue_map = t; } + pkt_dev->cur_queue_map = pkt_dev->cur_queue_map % pkt_dev->odev->real_num_tx_queues; } /* Increment/randomize headers according to flags and current values -- cgit v1.2.3 From 566521d63720ab47576afb85147e5652993bf1e6 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 19 Nov 2008 14:17:41 -0800 Subject: phonet: fix compilation with gcc-3.4 CC [M] net/phonet/af_phonet.o net/phonet/af_phonet.c: In function `pn_socket_create': net/phonet/af_phonet.c:38: sorry, unimplemented: inlining failed in call to 'phonet_proto_put': function body not available net/phonet/af_phonet.c:99: sorry, unimplemented: called from here make[3]: *** [net/phonet/af_phonet.o] Error 1 Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/phonet/af_phonet.c | 52 +++++++++++++++++++++++--------------------------- 1 file changed, 24 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index 7ab30f668b5a..9d211f12582b 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -33,9 +33,30 @@ #include #include -static struct net_proto_family phonet_proto_family; -static struct phonet_protocol *phonet_proto_get(int protocol); -static inline void phonet_proto_put(struct phonet_protocol *pp); +/* Transport protocol registration */ +static struct phonet_protocol *proto_tab[PHONET_NPROTO] __read_mostly; +static DEFINE_SPINLOCK(proto_tab_lock); + +static struct phonet_protocol *phonet_proto_get(int protocol) +{ + struct phonet_protocol *pp; + + if (protocol >= PHONET_NPROTO) + return NULL; + + spin_lock(&proto_tab_lock); + pp = proto_tab[protocol]; + if (pp && !try_module_get(pp->prot->owner)) + pp = NULL; + spin_unlock(&proto_tab_lock); + + return pp; +} + +static inline void phonet_proto_put(struct phonet_protocol *pp) +{ + module_put(pp->prot->owner); +} /* protocol family functions */ @@ -375,10 +396,6 @@ static struct packet_type phonet_packet_type = { .func = phonet_rcv, }; -/* Transport protocol registration */ -static struct phonet_protocol *proto_tab[PHONET_NPROTO] __read_mostly; -static DEFINE_SPINLOCK(proto_tab_lock); - int __init_or_module phonet_proto_register(int protocol, struct phonet_protocol *pp) { @@ -412,27 +429,6 @@ void phonet_proto_unregister(int protocol, struct phonet_protocol *pp) } EXPORT_SYMBOL(phonet_proto_unregister); -static struct phonet_protocol *phonet_proto_get(int protocol) -{ - struct phonet_protocol *pp; - - if (protocol >= PHONET_NPROTO) - return NULL; - - spin_lock(&proto_tab_lock); - pp = proto_tab[protocol]; - if (pp && !try_module_get(pp->prot->owner)) - pp = NULL; - spin_unlock(&proto_tab_lock); - - return pp; -} - -static inline void phonet_proto_put(struct phonet_protocol *pp) -{ - module_put(pp->prot->owner); -} - /* Module registration */ static int __init phonet_init(void) { -- cgit v1.2.3 From b47300168e770b60ab96c8924854c3b0eb4260eb Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 19 Nov 2008 15:33:54 -0800 Subject: net: Do not fire linkwatch events until the device is registered. Several device drivers try to do things like netif_carrier_off() before register_netdev() is invoked. This is bogus, but too many drivers do this to fix them all up in one go. Reported-by: Folkert van Heusden Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 93cd30ce6501..cdcd16fcfeda 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -270,6 +270,8 @@ static void dev_watchdog_down(struct net_device *dev) void netif_carrier_on(struct net_device *dev) { if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) { + if (dev->reg_state == NETREG_UNINITIALIZED) + return; linkwatch_fire_event(dev); if (netif_running(dev)) __netdev_watchdog_up(dev); @@ -285,8 +287,11 @@ EXPORT_SYMBOL(netif_carrier_on); */ void netif_carrier_off(struct net_device *dev) { - if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) + if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) { + if (dev->reg_state == NETREG_UNINITIALIZED) + return; linkwatch_fire_event(dev); + } } EXPORT_SYMBOL(netif_carrier_off); -- cgit v1.2.3 From de11defebf00007677fb7ee91d9b089b78786fbb Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Wed, 19 Nov 2008 15:36:14 -0800 Subject: reintroduce accept4 Introduce a new accept4() system call. The addition of this system call matches analogous changes in 2.6.27 (dup3(), evenfd2(), signalfd4(), inotify_init1(), epoll_create1(), pipe2()) which added new system calls that differed from analogous traditional system calls in adding a flags argument that can be used to access additional functionality. The accept4() system call is exactly the same as accept(), except that it adds a flags bit-mask argument. Two flags are initially implemented. (Most of the new system calls in 2.6.27 also had both of these flags.) SOCK_CLOEXEC causes the close-on-exec (FD_CLOEXEC) flag to be enabled for the new file descriptor returned by accept4(). This is a useful security feature to avoid leaking information in a multithreaded program where one thread is doing an accept() at the same time as another thread is doing a fork() plus exec(). More details here: http://udrepper.livejournal.com/20407.html "Secure File Descriptor Handling", Ulrich Drepper). The other flag is SOCK_NONBLOCK, which causes the O_NONBLOCK flag to be enabled on the new open file description created by accept4(). (This flag is merely a convenience, saving the use of additional calls fcntl(F_GETFL) and fcntl (F_SETFL) to achieve the same result. Here's a test program. Works on x86-32. Should work on x86-64, but I (mtk) don't have a system to hand to test with. It tests accept4() with each of the four possible combinations of SOCK_CLOEXEC and SOCK_NONBLOCK set/clear in 'flags', and verifies that the appropriate flags are set on the file descriptor/open file description returned by accept4(). I tested Ulrich's patch in this thread by applying against 2.6.28-rc2, and it passes according to my test program. /* test_accept4.c Copyright (C) 2008, Linux Foundation, written by Michael Kerrisk Licensed under the GNU GPLv2 or later. */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #define PORT_NUM 33333 #define die(msg) do { perror(msg); exit(EXIT_FAILURE); } while (0) /**********************************************************************/ /* The following is what we need until glibc gets a wrapper for accept4() */ /* Flags for socket(), socketpair(), accept4() */ #ifndef SOCK_CLOEXEC #define SOCK_CLOEXEC O_CLOEXEC #endif #ifndef SOCK_NONBLOCK #define SOCK_NONBLOCK O_NONBLOCK #endif #ifdef __x86_64__ #define SYS_accept4 288 #elif __i386__ #define USE_SOCKETCALL 1 #define SYS_ACCEPT4 18 #else #error "Sorry -- don't know the syscall # on this architecture" #endif static int accept4(int fd, struct sockaddr *sockaddr, socklen_t *addrlen, int flags) { printf("Calling accept4(): flags = %x", flags); if (flags != 0) { printf(" ("); if (flags & SOCK_CLOEXEC) printf("SOCK_CLOEXEC"); if ((flags & SOCK_CLOEXEC) && (flags & SOCK_NONBLOCK)) printf(" "); if (flags & SOCK_NONBLOCK) printf("SOCK_NONBLOCK"); printf(")"); } printf("\n"); #if USE_SOCKETCALL long args[6]; args[0] = fd; args[1] = (long) sockaddr; args[2] = (long) addrlen; args[3] = flags; return syscall(SYS_socketcall, SYS_ACCEPT4, args); #else return syscall(SYS_accept4, fd, sockaddr, addrlen, flags); #endif } /**********************************************************************/ static int do_test(int lfd, struct sockaddr_in *conn_addr, int closeonexec_flag, int nonblock_flag) { int connfd, acceptfd; int fdf, flf, fdf_pass, flf_pass; struct sockaddr_in claddr; socklen_t addrlen; printf("=======================================\n"); connfd = socket(AF_INET, SOCK_STREAM, 0); if (connfd == -1) die("socket"); if (connect(connfd, (struct sockaddr *) conn_addr, sizeof(struct sockaddr_in)) == -1) die("connect"); addrlen = sizeof(struct sockaddr_in); acceptfd = accept4(lfd, (struct sockaddr *) &claddr, &addrlen, closeonexec_flag | nonblock_flag); if (acceptfd == -1) { perror("accept4()"); close(connfd); return 0; } fdf = fcntl(acceptfd, F_GETFD); if (fdf == -1) die("fcntl:F_GETFD"); fdf_pass = ((fdf & FD_CLOEXEC) != 0) == ((closeonexec_flag & SOCK_CLOEXEC) != 0); printf("Close-on-exec flag is %sset (%s); ", (fdf & FD_CLOEXEC) ? "" : "not ", fdf_pass ? "OK" : "failed"); flf = fcntl(acceptfd, F_GETFL); if (flf == -1) die("fcntl:F_GETFD"); flf_pass = ((flf & O_NONBLOCK) != 0) == ((nonblock_flag & SOCK_NONBLOCK) !=0); printf("nonblock flag is %sset (%s)\n", (flf & O_NONBLOCK) ? "" : "not ", flf_pass ? "OK" : "failed"); close(acceptfd); close(connfd); printf("Test result: %s\n", (fdf_pass && flf_pass) ? "PASS" : "FAIL"); return fdf_pass && flf_pass; } static int create_listening_socket(int port_num) { struct sockaddr_in svaddr; int lfd; int optval; memset(&svaddr, 0, sizeof(struct sockaddr_in)); svaddr.sin_family = AF_INET; svaddr.sin_addr.s_addr = htonl(INADDR_ANY); svaddr.sin_port = htons(port_num); lfd = socket(AF_INET, SOCK_STREAM, 0); if (lfd == -1) die("socket"); optval = 1; if (setsockopt(lfd, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval)) == -1) die("setsockopt"); if (bind(lfd, (struct sockaddr *) &svaddr, sizeof(struct sockaddr_in)) == -1) die("bind"); if (listen(lfd, 5) == -1) die("listen"); return lfd; } int main(int argc, char *argv[]) { struct sockaddr_in conn_addr; int lfd; int port_num; int passed; passed = 1; port_num = (argc > 1) ? atoi(argv[1]) : PORT_NUM; memset(&conn_addr, 0, sizeof(struct sockaddr_in)); conn_addr.sin_family = AF_INET; conn_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); conn_addr.sin_port = htons(port_num); lfd = create_listening_socket(port_num); if (!do_test(lfd, &conn_addr, 0, 0)) passed = 0; if (!do_test(lfd, &conn_addr, SOCK_CLOEXEC, 0)) passed = 0; if (!do_test(lfd, &conn_addr, 0, SOCK_NONBLOCK)) passed = 0; if (!do_test(lfd, &conn_addr, SOCK_CLOEXEC, SOCK_NONBLOCK)) passed = 0; close(lfd); exit(passed ? EXIT_SUCCESS : EXIT_FAILURE); } [mtk.manpages@gmail.com: rewrote changelog, updated test program] Signed-off-by: Ulrich Drepper Tested-by: Michael Kerrisk Acked-by: Michael Kerrisk Cc: Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/unistd_64.h | 4 +- include/linux/net.h | 6 +-- include/linux/syscalls.h | 3 +- kernel/sys_ni.c | 2 +- net/compat.c | 50 +++---------------------- net/socket.c | 80 +++++----------------------------------- 6 files changed, 21 insertions(+), 124 deletions(-) (limited to 'net') diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index 834b2c1d89fb..d2e415e6666f 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h @@ -639,8 +639,8 @@ __SYSCALL(__NR_fallocate, sys_fallocate) __SYSCALL(__NR_timerfd_settime, sys_timerfd_settime) #define __NR_timerfd_gettime 287 __SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime) -#define __NR_paccept 288 -__SYSCALL(__NR_paccept, sys_paccept) +#define __NR_accept4 288 +__SYSCALL(__NR_accept4, sys_accept4) #define __NR_signalfd4 289 __SYSCALL(__NR_signalfd4, sys_signalfd4) #define __NR_eventfd2 290 diff --git a/include/linux/net.h b/include/linux/net.h index 6dc14a240042..4515efae4c39 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -40,7 +40,7 @@ #define SYS_GETSOCKOPT 15 /* sys_getsockopt(2) */ #define SYS_SENDMSG 16 /* sys_sendmsg(2) */ #define SYS_RECVMSG 17 /* sys_recvmsg(2) */ -#define SYS_PACCEPT 18 /* sys_paccept(2) */ +#define SYS_ACCEPT4 18 /* sys_accept4(2) */ typedef enum { SS_FREE = 0, /* not allocated */ @@ -100,7 +100,7 @@ enum sock_type { * remaining bits are used as flags. */ #define SOCK_TYPE_MASK 0xf -/* Flags for socket, socketpair, paccept */ +/* Flags for socket, socketpair, accept4 */ #define SOCK_CLOEXEC O_CLOEXEC #ifndef SOCK_NONBLOCK #define SOCK_NONBLOCK O_NONBLOCK @@ -223,8 +223,6 @@ extern int sock_map_fd(struct socket *sock, int flags); extern struct socket *sockfd_lookup(int fd, int *err); #define sockfd_put(sock) fput(sock->file) extern int net_ratelimit(void); -extern long do_accept(int fd, struct sockaddr __user *upeer_sockaddr, - int __user *upeer_addrlen, int flags); #define net_random() random32() #define net_srandom(seed) srandom32((__force u32)seed) diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index d6ff145919ca..04fb47bfb920 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -410,8 +410,7 @@ asmlinkage long sys_getsockopt(int fd, int level, int optname, asmlinkage long sys_bind(int, struct sockaddr __user *, int); asmlinkage long sys_connect(int, struct sockaddr __user *, int); asmlinkage long sys_accept(int, struct sockaddr __user *, int __user *); -asmlinkage long sys_paccept(int, struct sockaddr __user *, int __user *, - const __user sigset_t *, size_t, int); +asmlinkage long sys_accept4(int, struct sockaddr __user *, int __user *, int); asmlinkage long sys_getsockname(int, struct sockaddr __user *, int __user *); asmlinkage long sys_getpeername(int, struct sockaddr __user *, int __user *); asmlinkage long sys_send(int, void __user *, size_t, unsigned); diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index a77b27b11b04..e14a23281707 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -31,7 +31,7 @@ cond_syscall(sys_socketpair); cond_syscall(sys_bind); cond_syscall(sys_listen); cond_syscall(sys_accept); -cond_syscall(sys_paccept); +cond_syscall(sys_accept4); cond_syscall(sys_connect); cond_syscall(sys_getsockname); cond_syscall(sys_getpeername); diff --git a/net/compat.c b/net/compat.c index 6ce1a1cadcc0..a3a2ba0fac08 100644 --- a/net/compat.c +++ b/net/compat.c @@ -725,7 +725,7 @@ EXPORT_SYMBOL(compat_mc_getsockopt); static unsigned char nas[19]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), - AL(6)}; + AL(4)}; #undef AL asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags) @@ -738,52 +738,13 @@ asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, uns return sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT); } -asmlinkage long compat_sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr, - int __user *upeer_addrlen, - const compat_sigset_t __user *sigmask, - compat_size_t sigsetsize, int flags) -{ - compat_sigset_t ss32; - sigset_t ksigmask, sigsaved; - int ret; - - if (sigmask) { - if (sigsetsize != sizeof(compat_sigset_t)) - return -EINVAL; - if (copy_from_user(&ss32, sigmask, sizeof(ss32))) - return -EFAULT; - sigset_from_compat(&ksigmask, &ss32); - - sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); - sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); - } - - ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags); - - if (ret == -ERESTARTNOHAND) { - /* - * Don't restore the signal mask yet. Let do_signal() deliver - * the signal on the way back to userspace, before the signal - * mask is restored. - */ - if (sigmask) { - memcpy(¤t->saved_sigmask, &sigsaved, - sizeof(sigsaved)); - set_restore_sigmask(); - } - } else if (sigmask) - sigprocmask(SIG_SETMASK, &sigsaved, NULL); - - return ret; -} - asmlinkage long compat_sys_socketcall(int call, u32 __user *args) { int ret; u32 a[6]; u32 a0, a1; - if (call < SYS_SOCKET || call > SYS_PACCEPT) + if (call < SYS_SOCKET || call > SYS_ACCEPT4) return -EINVAL; if (copy_from_user(a, args, nas[call])) return -EFAULT; @@ -804,7 +765,7 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args) ret = sys_listen(a0, a1); break; case SYS_ACCEPT: - ret = do_accept(a0, compat_ptr(a1), compat_ptr(a[2]), 0); + ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), 0); break; case SYS_GETSOCKNAME: ret = sys_getsockname(a0, compat_ptr(a1), compat_ptr(a[2])); @@ -844,9 +805,8 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args) case SYS_RECVMSG: ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]); break; - case SYS_PACCEPT: - ret = compat_sys_paccept(a0, compat_ptr(a1), compat_ptr(a[2]), - compat_ptr(a[3]), a[4], a[5]); + case SYS_ACCEPT4: + ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), a[3]); break; default: ret = -EINVAL; diff --git a/net/socket.c b/net/socket.c index 57550c3bcabe..92764d836891 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1426,8 +1426,8 @@ asmlinkage long sys_listen(int fd, int backlog) * clean when we restucture accept also. */ -long do_accept(int fd, struct sockaddr __user *upeer_sockaddr, - int __user *upeer_addrlen, int flags) +asmlinkage long sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, + int __user *upeer_addrlen, int flags) { struct socket *sock, *newsock; struct file *newfile; @@ -1510,66 +1510,10 @@ out_fd: goto out_put; } -#if 0 -#ifdef HAVE_SET_RESTORE_SIGMASK -asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr, - int __user *upeer_addrlen, - const sigset_t __user *sigmask, - size_t sigsetsize, int flags) -{ - sigset_t ksigmask, sigsaved; - int ret; - - if (sigmask) { - /* XXX: Don't preclude handling different sized sigset_t's. */ - if (sigsetsize != sizeof(sigset_t)) - return -EINVAL; - if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask))) - return -EFAULT; - - sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); - sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); - } - - ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags); - - if (ret < 0 && signal_pending(current)) { - /* - * Don't restore the signal mask yet. Let do_signal() deliver - * the signal on the way back to userspace, before the signal - * mask is restored. - */ - if (sigmask) { - memcpy(¤t->saved_sigmask, &sigsaved, - sizeof(sigsaved)); - set_restore_sigmask(); - } - } else if (sigmask) - sigprocmask(SIG_SETMASK, &sigsaved, NULL); - - return ret; -} -#else -asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr, - int __user *upeer_addrlen, - const sigset_t __user *sigmask, - size_t sigsetsize, int flags) -{ - /* The platform does not support restoring the signal mask in the - * return path. So we do not allow using paccept() with a signal - * mask. */ - if (sigmask) - return -EINVAL; - - return do_accept(fd, upeer_sockaddr, upeer_addrlen, flags); -} -#endif -#endif - asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen) { - return do_accept(fd, upeer_sockaddr, upeer_addrlen, 0); + return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0); } /* @@ -2096,7 +2040,7 @@ static const unsigned char nargs[19]={ AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), - AL(6) + AL(4) }; #undef AL @@ -2115,7 +2059,7 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args) unsigned long a0, a1; int err; - if (call < 1 || call > SYS_PACCEPT) + if (call < 1 || call > SYS_ACCEPT4) return -EINVAL; /* copy_from_user should be SMP safe. */ @@ -2143,9 +2087,8 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args) err = sys_listen(a0, a1); break; case SYS_ACCEPT: - err = - do_accept(a0, (struct sockaddr __user *)a1, - (int __user *)a[2], 0); + err = sys_accept4(a0, (struct sockaddr __user *)a1, + (int __user *)a[2], 0); break; case SYS_GETSOCKNAME: err = @@ -2192,12 +2135,9 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args) case SYS_RECVMSG: err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); break; - case SYS_PACCEPT: - err = - sys_paccept(a0, (struct sockaddr __user *)a1, - (int __user *)a[2], - (const sigset_t __user *) a[3], - a[4], a[5]); + case SYS_ACCEPT4: + err = sys_accept4(a0, (struct sockaddr __user *)a1, + (int __user *)a[2], a[3]); break; default: err = -EINVAL; -- cgit v1.2.3 From a134f85c131ffd56720e38af2967ec6265480757 Mon Sep 17 00:00:00 2001 From: Balazs Scheidler Date: Thu, 20 Nov 2008 01:07:24 -0800 Subject: TPROXY: fill struct flowi->flags in udp_sendmsg() udp_sendmsg() didn't fill struct flowi->flags, which means that the route lookup would fail for non-local IPs even if the IP_TRANSPARENT sockopt was set. This prevents sendto() to work properly for UDP sockets, whereas bind(foreign-ip) + connect() + send() worked fine. Signed-off-by: Balazs Scheidler Signed-off-by: David S. Miller --- net/ipv4/udp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index cf02701ced48..98c1fd09be88 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -633,6 +633,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, .saddr = saddr, .tos = tos } }, .proto = sk->sk_protocol, + .flags = inet_sk_flowi_flags(sk), .uli_u = { .ports = { .sport = inet->sport, .dport = dport } } }; -- cgit v1.2.3 From c82838458200ec4167ce7083b0a17474150c5bf7 Mon Sep 17 00:00:00 2001 From: Balazs Scheidler Date: Thu, 20 Nov 2008 01:08:06 -0800 Subject: TPROXY: supply a struct flowi->flags argument in inet_sk_rebuild_header() inet_sk_rebuild_header() does a new route lookup if the dst_entry associated with a socket becomes stale. However inet_sk_rebuild_header() didn't use struct flowi->flags, causing the route lookup to fail for foreign-bound IP_TRANSPARENT sockets, causing an error state to be set for the sockets in question. Signed-off-by: Balazs Scheidler Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 1fbff5fa4241..1aa2dc9e380e 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1117,6 +1117,7 @@ int inet_sk_rebuild_header(struct sock *sk) }, }, .proto = sk->sk_protocol, + .flags = inet_sk_flowi_flags(sk), .uli_u = { .ports = { .sport = inet->sport, -- cgit v1.2.3 From 3aa4614da741f10b09559a5675c79e2eff5cccd8 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 20 Nov 2008 04:07:14 -0800 Subject: pkt_sched: fix missing check for packet overrun in qdisc_dump_stab() nla_nest_start() might return NULL, causing a NULL pointer dereference. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/sch_api.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index b16ad2972c6b..6ab4a2f92ca0 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -417,6 +417,8 @@ static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab) struct nlattr *nest; nest = nla_nest_start(skb, TCA_STAB); + if (nest == NULL) + goto nla_put_failure; NLA_PUT(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts); nla_nest_end(skb, nest); -- cgit v1.2.3 From eedd726efbc439dbed94fb8577e5533a986b341f Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Thu, 20 Nov 2008 04:16:12 -0800 Subject: ipv6: use seq_release_private for ip6mr.c /proc entries In ip6mr.c, /proc entries /proc/net/ip6_mr_cache and /proc/net/ip6_mr_vif are opened with seq_open_private(), thus seq_release_private() should be used to release them. Should fix a small memory leak. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 52a7eb0e2c2c..0524769632e7 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -224,7 +224,7 @@ static struct file_operations ip6mr_vif_fops = { .open = ip6mr_vif_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_private, }; static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) @@ -338,7 +338,7 @@ static struct file_operations ip6mr_mfc_fops = { .open = ipmr_mfc_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_private, }; #endif -- cgit v1.2.3 From 5ece6c2ddd6f7da9e95dc325c742c0f5afbcecbe Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 20 Nov 2008 04:20:10 -0800 Subject: net: fix tiny output corruption of /proc/net/snmp6 Because "name" is static, it can be occasionally be filled with somewhat garbage if two processes read /proc/net/snmp6. Also, remove useless casts and "-1" -- snprintf() correctly terminates it's output. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/ipv6/proc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 07f0b76e7427..97c17fdd6f75 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -132,7 +132,7 @@ static struct snmp_mib snmp6_udplite6_list[] = { static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, void **mib) { - static char name[32]; + char name[32]; int i; /* print by name -- deprecated items */ @@ -144,7 +144,7 @@ static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, void **mib) p = icmp6type2name[icmptype]; if (!p) /* don't print un-named types here */ continue; - (void) snprintf(name, sizeof(name)-1, "Icmp6%s%s", + snprintf(name, sizeof(name), "Icmp6%s%s", i & 0x100 ? "Out" : "In", p); seq_printf(seq, "%-32s\t%lu\n", name, snmp_fold_field(mib, i)); @@ -157,7 +157,7 @@ static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, void **mib) val = snmp_fold_field(mib, i); if (!val) continue; - (void) snprintf(name, sizeof(name)-1, "Icmp6%sType%u", + snprintf(name, sizeof(name), "Icmp6%sType%u", i & 0x100 ? "Out" : "In", i & 0xff); seq_printf(seq, "%-32s\t%lu\n", name, val); } -- cgit v1.2.3 From 23918b03060f6e572168fdde1798a905679d2e06 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 20 Nov 2008 16:06:21 -0500 Subject: SUNRPC: Fix a performance regression in the RPC authentication code Fix a regression reported by Max Kellermann whereby kernel profiling showed that his clients were spending 45% of their time in rpcauth_lookup_credcache. It turns out that although his processes had identical uid/gid/groups, generic_match() was failing to detect this, because the task->group_info pointers were not shared. This again lead to the creation of a huge number of identical credentials at the RPC layer. The regression is fixed by comparing the contents of task->group_info if the actual pointers are not identical. Signed-off-by: Trond Myklebust Signed-off-by: Linus Torvalds --- net/sunrpc/auth_generic.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c index 744b79fdcb19..4028502f0528 100644 --- a/net/sunrpc/auth_generic.c +++ b/net/sunrpc/auth_generic.c @@ -133,13 +133,29 @@ static int generic_match(struct auth_cred *acred, struct rpc_cred *cred, int flags) { struct generic_cred *gcred = container_of(cred, struct generic_cred, gc_base); + int i; if (gcred->acred.uid != acred->uid || gcred->acred.gid != acred->gid || - gcred->acred.group_info != acred->group_info || gcred->acred.machine_cred != acred->machine_cred) - return 0; + goto out_nomatch; + + /* Optimisation in the case where pointers are identical... */ + if (gcred->acred.group_info == acred->group_info) + goto out_match; + + /* Slow path... */ + if (gcred->acred.group_info->ngroups != acred->group_info->ngroups) + goto out_nomatch; + for (i = 0; i < gcred->acred.group_info->ngroups; i++) { + if (GROUP_AT(gcred->acred.group_info, i) != + GROUP_AT(acred->group_info, i)) + goto out_nomatch; + } +out_match: return 1; +out_nomatch: + return 0; } void __init rpc_init_generic_auth(void) -- cgit v1.2.3 From 33cf71cee14743185305c61625c4544885055733 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Fri, 21 Nov 2008 16:42:58 -0800 Subject: tcp: Do not use TSO/GSO when there is urgent data This patch fixes http://bugzilla.kernel.org/show_bug.cgi?id=12014 Since most (if not all) implementations of TSO and even the in-kernel software GSO do not update the urgent pointer when splitting a large segment, it is necessary to turn off TSO/GSO for all outgoing traffic with the URG pointer set. Looking at tcp_current_mss (and the preceding comment) I even think this was the original intention. However, this approach is insufficient, because TSO/GSO is turned off only for newly created frames, not for frames which were already pending at the arrival of a message with MSG_OOB set. These frames were created when TSO/GSO was enabled, so they may be large, and they will have the urgent pointer set in tcp_transmit_skb(). With this patch, such large packets will be fragmented again before going to the transmit routine. As a side note, at least the following NICs are known to screw up the urgent pointer in the TCP header when doing TSO: Intel 82566MM (PCI ID 8086:1049) Intel 82566DC (PCI ID 8086:104b) Intel 82541GI (PCI ID 8086:1076) Broadcom NetXtreme II BCM5708 (PCI ID 14e4:164c) Signed-off-by: Petr Tesarik Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ba85d8831893..85b07eba1879 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -722,7 +722,8 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) { - if (skb->len <= mss_now || !sk_can_gso(sk)) { + if (skb->len <= mss_now || !sk_can_gso(sk) || + tcp_urg_mode(tcp_sk(sk))) { /* Avoid the costly divide in the normal * non-TSO case. */ @@ -1163,7 +1164,9 @@ static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, { int tso_segs = tcp_skb_pcount(skb); - if (!tso_segs || (tso_segs > 1 && tcp_skb_mss(skb) != mss_now)) { + if (!tso_segs || + (tso_segs > 1 && (tcp_skb_mss(skb) != mss_now || + tcp_urg_mode(tcp_sk(sk))))) { tcp_set_skb_tso_segs(sk, skb, mss_now); tso_segs = tcp_skb_pcount(skb); } -- cgit v1.2.3 From 7e56b5d698707a9934833c47b24d78fb0bcaf764 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 21 Nov 2008 16:45:22 -0800 Subject: net: Fix memory leak in the proto_register function If the slub allocator is used, kmem_cache_create() may merge two or more kmem_cache's into one but the cache name pointer is not updated and kmem_cache_name() is no longer guaranteed to return the pointer passed to the former function. This patch stores the kmalloc'ed pointers in the corresponding request_sock_ops and timewait_sock_ops structures. Signed-off-by: Catalin Marinas Acked-by: Arnaldo Carvalho de Melo Reviewed-by: Christoph Lameter Signed-off-by: David S. Miller --- include/net/request_sock.h | 1 + include/net/timewait_sock.h | 1 + net/core/sock.c | 31 ++++++++++++------------------- 3 files changed, 14 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/include/net/request_sock.h b/include/net/request_sock.h index cac811e51f6d..c7190846e128 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -31,6 +31,7 @@ struct request_sock_ops { int family; int obj_size; struct kmem_cache *slab; + char *slab_name; int (*rtx_syn_ack)(struct sock *sk, struct request_sock *req); void (*send_ack)(struct sock *sk, struct sk_buff *skb, diff --git a/include/net/timewait_sock.h b/include/net/timewait_sock.h index 1e1ee3253fd8..97c3b14da55d 100644 --- a/include/net/timewait_sock.h +++ b/include/net/timewait_sock.h @@ -16,6 +16,7 @@ struct timewait_sock_ops { struct kmem_cache *twsk_slab; + char *twsk_slab_name; unsigned int twsk_obj_size; int (*twsk_unique)(struct sock *sk, struct sock *sktw, void *twp); diff --git a/net/core/sock.c b/net/core/sock.c index 341e39456952..edf7220889a4 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2035,9 +2035,6 @@ static inline void release_proto_idx(struct proto *prot) int proto_register(struct proto *prot, int alloc_slab) { - char *request_sock_slab_name = NULL; - char *timewait_sock_slab_name; - if (alloc_slab) { prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0, SLAB_HWCACHE_ALIGN, NULL); @@ -2051,12 +2048,12 @@ int proto_register(struct proto *prot, int alloc_slab) if (prot->rsk_prot != NULL) { static const char mask[] = "request_sock_%s"; - request_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL); - if (request_sock_slab_name == NULL) + prot->rsk_prot->slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL); + if (prot->rsk_prot->slab_name == NULL) goto out_free_sock_slab; - sprintf(request_sock_slab_name, mask, prot->name); - prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name, + sprintf(prot->rsk_prot->slab_name, mask, prot->name); + prot->rsk_prot->slab = kmem_cache_create(prot->rsk_prot->slab_name, prot->rsk_prot->obj_size, 0, SLAB_HWCACHE_ALIGN, NULL); @@ -2070,14 +2067,14 @@ int proto_register(struct proto *prot, int alloc_slab) if (prot->twsk_prot != NULL) { static const char mask[] = "tw_sock_%s"; - timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL); + prot->twsk_prot->twsk_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL); - if (timewait_sock_slab_name == NULL) + if (prot->twsk_prot->twsk_slab_name == NULL) goto out_free_request_sock_slab; - sprintf(timewait_sock_slab_name, mask, prot->name); + sprintf(prot->twsk_prot->twsk_slab_name, mask, prot->name); prot->twsk_prot->twsk_slab = - kmem_cache_create(timewait_sock_slab_name, + kmem_cache_create(prot->twsk_prot->twsk_slab_name, prot->twsk_prot->twsk_obj_size, 0, SLAB_HWCACHE_ALIGN, NULL); @@ -2093,14 +2090,14 @@ int proto_register(struct proto *prot, int alloc_slab) return 0; out_free_timewait_sock_slab_name: - kfree(timewait_sock_slab_name); + kfree(prot->twsk_prot->twsk_slab_name); out_free_request_sock_slab: if (prot->rsk_prot && prot->rsk_prot->slab) { kmem_cache_destroy(prot->rsk_prot->slab); prot->rsk_prot->slab = NULL; } out_free_request_sock_slab_name: - kfree(request_sock_slab_name); + kfree(prot->rsk_prot->slab_name); out_free_sock_slab: kmem_cache_destroy(prot->slab); prot->slab = NULL; @@ -2123,18 +2120,14 @@ void proto_unregister(struct proto *prot) } if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) { - const char *name = kmem_cache_name(prot->rsk_prot->slab); - kmem_cache_destroy(prot->rsk_prot->slab); - kfree(name); + kfree(prot->rsk_prot->slab_name); prot->rsk_prot->slab = NULL; } if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) { - const char *name = kmem_cache_name(prot->twsk_prot->twsk_slab); - kmem_cache_destroy(prot->twsk_prot->twsk_slab); - kfree(name); + kfree(prot->twsk_prot->twsk_slab_name); prot->twsk_prot->twsk_slab = NULL; } } -- cgit v1.2.3 From 2da2c21d7508d34bc6d600df665d84871b65d2b9 Mon Sep 17 00:00:00 2001 From: Tom Tucker Date: Sun, 23 Nov 2008 09:58:08 -0600 Subject: Add a reference to sunrpc in svc_addsock The svc_addsock function adds transport instances without taking a reference on the sunrpc.ko module, however, the generic transport destruction code drops a reference when a transport instance is destroyed. Add a try_module_get call to the svc_addsock function for transport instances added by this function. Signed-off-by: Tom Tucker Signed-off-by: J. Bruce Fields Tested-by: Jeff Moyer --- net/sunrpc/svcsock.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 95293f549e9c..a1951dcc5776 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1183,7 +1183,11 @@ int svc_addsock(struct svc_serv *serv, else if (so->state > SS_UNCONNECTED) err = -EISCONN; else { - svsk = svc_setup_socket(serv, so, &err, SVC_SOCK_DEFAULTS); + if (!try_module_get(THIS_MODULE)) + err = -ENOENT; + else + svsk = svc_setup_socket(serv, so, &err, + SVC_SOCK_DEFAULTS); if (svsk) { struct sockaddr_storage addr; struct sockaddr *sin = (struct sockaddr *)&addr; @@ -1196,7 +1200,8 @@ int svc_addsock(struct svc_serv *serv, spin_unlock_bh(&serv->sv_lock); svc_xprt_received(&svsk->sk_xprt); err = 0; - } + } else + module_put(THIS_MODULE); } if (err) { sockfd_put(so); -- cgit v1.2.3 From b54ad409fd09a395b839fb81f300880d76861c0e Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 24 Nov 2008 15:56:17 -0800 Subject: netfilter: ctnetlink: fix conntrack creation race Conntrack creation through ctnetlink has two races: - the timer may expire and free the conntrack concurrently, causing an invalid memory access when attempting to put it in the hash tables - an identical conntrack entry may be created in the packet processing path in the time between the lookup and hash insertion Hold the conntrack lock between the lookup and insertion to avoid this. Reported-by: Zoltan Borbely Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_core.c | 2 -- net/netfilter/nf_conntrack_netlink.c | 5 +++-- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 622d7c671cb7..233fdd2d7d21 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -305,9 +305,7 @@ void nf_conntrack_hash_insert(struct nf_conn *ct) hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); - spin_lock_bh(&nf_conntrack_lock); __nf_conntrack_hash_insert(ct, hash, repl_hash); - spin_unlock_bh(&nf_conntrack_lock); } EXPORT_SYMBOL_GPL(nf_conntrack_hash_insert); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index a040d46f85d6..3b009a3e8549 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1090,7 +1090,7 @@ ctnetlink_create_conntrack(struct nlattr *cda[], struct nf_conn_help *help; struct nf_conntrack_helper *helper; - ct = nf_conntrack_alloc(&init_net, otuple, rtuple, GFP_KERNEL); + ct = nf_conntrack_alloc(&init_net, otuple, rtuple, GFP_ATOMIC); if (ct == NULL || IS_ERR(ct)) return -ENOMEM; @@ -1212,13 +1212,14 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, atomic_inc(&master_ct->ct_general.use); } - spin_unlock_bh(&nf_conntrack_lock); err = -ENOENT; if (nlh->nlmsg_flags & NLM_F_CREATE) err = ctnetlink_create_conntrack(cda, &otuple, &rtuple, master_ct); + spin_unlock_bh(&nf_conntrack_lock); + if (err < 0 && master_ct) nf_ct_put(master_ct); -- cgit v1.2.3 From 631339f1e544a4d39a63cfe6708c5bddcd5a2c48 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 24 Nov 2008 16:06:50 -0800 Subject: bridge: netfilter: fix update_pmtu crash with GRE As GRE tries to call the update_pmtu function on skb->dst and bridge supplies an skb->dst that has a NULL ops field, all is not well. This patch fixes this by giving the bridge device an ops field with an update_pmtu function. For the moment I've left all other fields blank but we can fill them in later should the need arise. Based on report and patch by Philip Craig. Signed-off-by: Herbert Xu Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/bridge/br_netfilter.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'net') diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index fa5cda4e552a..45f61c348e36 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -101,6 +101,18 @@ static inline __be16 pppoe_proto(const struct sk_buff *skb) pppoe_proto(skb) == htons(PPP_IPV6) && \ brnf_filter_pppoe_tagged) +static void fake_update_pmtu(struct dst_entry *dst, u32 mtu) +{ +} + +static struct dst_ops fake_dst_ops = { + .family = AF_INET, + .protocol = __constant_htons(ETH_P_IP), + .update_pmtu = fake_update_pmtu, + .entry_size = sizeof(struct rtable), + .entries = ATOMIC_INIT(0), +}; + /* * Initialize bogus route table used to keep netfilter happy. * Currently, we fill in the PMTU entry because netfilter @@ -117,6 +129,7 @@ void br_netfilter_rtable_init(struct net_bridge *br) rt->u.dst.path = &rt->u.dst; rt->u.dst.metrics[RTAX_MTU - 1] = 1500; rt->u.dst.flags = DST_NOXFRM; + rt->u.dst.ops = &fake_dst_ops; } static inline struct rtable *bridge_parent_rtable(const struct net_device *dev) -- cgit v1.2.3 From 244f46ae6e9e18f6fc0be7d1f49febde4762c34b Mon Sep 17 00:00:00 2001 From: Bernard Pidoux Date: Mon, 24 Nov 2008 11:49:40 +0000 Subject: rose: zero length frame filtering in af_rose.c Since changeset e79ad711a0108475c1b3a03815527e7237020b08 from mainline, >From David S. Miller, empty packet can be transmitted on connected socket for datagram protocols. However, this patch broke a high level application using ROSE network protocol with connected datagram. Bulletin Board Stations perform bulletins forwarding between BBS stations via ROSE network using a forward protocol. Now, if for some reason, a buffer in the application software happens to be empty at a specific moment, ROSE sends an empty packet via unfiltered packet socket. When received, this ROSE packet introduces perturbations of data exchange of BBS forwarding, for the application message forwarding protocol is waiting for something else. We agree that a more careful programming of the application protocol would avoid this situation and we are willing to debug it. But, as an empty frame is no use and does not have any meaning for ROSE protocol, we may consider filtering zero length data both when sending and receiving socket data. The proposed patch repaired BBS data exchange through ROSE network that were broken since 2.6.22.11 kernel. Signed-off-by: Bernard Pidoux Signed-off-by: David S. Miller --- net/rose/af_rose.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'net') diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index a7f1ce11bc22..0c1cc7612800 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1072,6 +1072,10 @@ static int rose_sendmsg(struct kiocb *iocb, struct socket *sock, unsigned char *asmptr; int n, size, qbit = 0; + /* ROSE empty frame has no meaning : don't send */ + if (len == 0) + return 0; + if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_EOR|MSG_CMSG_COMPAT)) return -EINVAL; @@ -1265,6 +1269,12 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock, skb_reset_transport_header(skb); copied = skb->len; + /* ROSE empty frame has no meaning : ignore it */ + if (copied == 0) { + skb_free_datagram(sk, skb); + return copied; + } + if (copied > size) { copied = size; msg->msg_flags |= MSG_TRUNC; -- cgit v1.2.3 From 3dd3b79aeadc6f6abc5cc78724d7df3dfcc1bd0b Mon Sep 17 00:00:00 2001 From: Abhijeet Kolekar Date: Thu, 20 Nov 2008 10:20:31 -0800 Subject: mac80211 : Fix setting ad-hoc mode and non-ibss channel Patch fixes the kernel trace when user tries to set ad-hoc mode on non IBSS channel. e.g iwconfig wlan0 chan 36 mode ad-hoc Signed-off-by: Abhijeet Kolekar Signed-off-by: John W. Linville --- net/mac80211/wext.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index 742f811ca416..ab4ddba874be 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -271,6 +271,7 @@ static int ieee80211_ioctl_siwmode(struct net_device *dev, __u32 *mode, char *extra) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; int type; if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) @@ -281,6 +282,13 @@ static int ieee80211_ioctl_siwmode(struct net_device *dev, type = NL80211_IFTYPE_STATION; break; case IW_MODE_ADHOC: + /* Setting ad-hoc mode on non ibss channel is not + * supported. + */ + if (local->oper_channel && + (local->oper_channel->flags & IEEE80211_CHAN_NO_IBSS)) + return -EOPNOTSUPP; + type = NL80211_IFTYPE_ADHOC; break; case IW_MODE_REPEAT: -- cgit v1.2.3 From 020cf6ba7a91ccc5db359f91e9abba175fd3a0aa Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 23 Nov 2008 20:09:54 +0100 Subject: net/wireless/reg.c: fix bad WARN_ON in if statement fix: net/wireless/reg.c:348:29: error: macro "if" passed 2 arguments, but takes just 1 triggered by the branch-tracer. Signed-off-by: Ingo Molnar Signed-off-by: John W. Linville --- net/wireless/reg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 626dbb688499..eb3b1a9f9b12 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -343,9 +343,9 @@ static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, return 0; return -EALREADY; } - if (WARN_ON(!is_alpha2_set(alpha2) || !is_an_alpha2(alpha2)), + if (WARN(!is_alpha2_set(alpha2) || !is_an_alpha2(alpha2), "Invalid Country IE regulatory hint passed " - "to the wireless core\n") + "to the wireless core\n")) return -EINVAL; /* We ignore Country IE hints for now, as we haven't yet * added the dot11MultiDomainCapabilityEnabled flag -- cgit v1.2.3 From 8f480c0e4e120911a673ed7385359bf76ae01963 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Tue, 25 Nov 2008 21:08:13 -0800 Subject: net: make skb_truesize_bug() call WARN() The truesize message check is important enough to make it print "BUG" to the user console... lets also make it important enough to spit a backtrace/module list etc so that kerneloops.org can track them. Signed-off-by: Arjan van de Ven Signed-off-by: David S. Miller --- net/core/skbuff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d49ef8301b5b..65f7757465bd 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -149,7 +149,7 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here) void skb_truesize_bug(struct sk_buff *skb) { - printk(KERN_ERR "SKB BUG: Invalid truesize (%u) " + WARN(net_ratelimit(), KERN_ERR "SKB BUG: Invalid truesize (%u) " "len=%u, sizeof(sk_buff)=%Zd\n", skb->truesize, skb->len, sizeof(struct sk_buff)); } -- cgit v1.2.3 From 3ec192559033ed457f0d7856838654c100fc659f Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 26 Nov 2008 03:57:44 -0800 Subject: netfilter: ctnetlink: fix GFP_KERNEL allocation under spinlock The previous fix for the conntrack creation race (netfilter: ctnetlink: fix conntrack creation race) missed a GFP_KERNEL allocation that is now performed while holding a spinlock. Switch to GFP_ATOMIC. Reported-and-tested-by: Zoltan Borbely Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 3b009a3e8549..5f4a6516b3b6 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1138,7 +1138,7 @@ ctnetlink_create_conntrack(struct nlattr *cda[], } } - nf_ct_acct_ext_add(ct, GFP_KERNEL); + nf_ct_acct_ext_add(ct, GFP_ATOMIC); #if defined(CONFIG_NF_CONNTRACK_MARK) if (cda[CTA_MARK]) -- cgit v1.2.3 From 7e5ab54296a41f590c9cbc976e1c86272f3a7e00 Mon Sep 17 00:00:00 2001 From: Rémi Denis-Courmont Date: Wed, 26 Nov 2008 15:26:43 -0800 Subject: Phonet: fix oops in phonet_address_del() on non-Phonet device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A NULL dereference would occur when trying to delete an addres from a network device that does not have any Phonet address. Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/pn_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index 53be9fc82aaa..f93ff8ef47d0 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -115,7 +115,7 @@ int phonet_address_del(struct net_device *dev, u8 addr) pnd = __phonet_get(dev); if (!pnd || !test_and_clear_bit(addr >> 2, pnd->addrs)) err = -EADDRNOTAVAIL; - if (bitmap_empty(pnd->addrs, 64)) + else if (bitmap_empty(pnd->addrs, 64)) __phonet_device_free(pnd); spin_unlock_bh(&pndevs.lock); return err; -- cgit v1.2.3 From 5f23b734963ec7eaa3ebcd9050da0c9b7d143dd3 Mon Sep 17 00:00:00 2001 From: dann frazier Date: Wed, 26 Nov 2008 15:32:27 -0800 Subject: net: Fix soft lockups/OOM issues w/ unix garbage collector This is an implementation of David Miller's suggested fix in: https://bugzilla.redhat.com/show_bug.cgi?id=470201 It has been updated to use wait_event() instead of wait_event_interruptible(). Paraphrasing the description from the above report, it makes sendmsg() block while UNIX garbage collection is in progress. This avoids a situation where child processes continue to queue new FDs over a AF_UNIX socket to a parent which is in the exit path and running garbage collection on these FDs. This contention can result in soft lockups and oom-killing of unrelated processes. Signed-off-by: dann frazier Signed-off-by: David S. Miller --- include/net/af_unix.h | 1 + net/unix/af_unix.c | 2 ++ net/unix/garbage.c | 13 ++++++++++--- 3 files changed, 13 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index c29ff1da8a18..1614d78c60ed 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -9,6 +9,7 @@ extern void unix_inflight(struct file *fp); extern void unix_notinflight(struct file *fp); extern void unix_gc(void); +extern void wait_for_unix_gc(void); #define UNIX_HASH_SIZE 256 diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index eb90f77bb0e2..66d5ac4773ab 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1343,6 +1343,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, if (NULL == siocb->scm) siocb->scm = &tmp_scm; + wait_for_unix_gc(); err = scm_send(sock, msg, siocb->scm); if (err < 0) return err; @@ -1493,6 +1494,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, if (NULL == siocb->scm) siocb->scm = &tmp_scm; + wait_for_unix_gc(); err = scm_send(sock, msg, siocb->scm); if (err < 0) return err; diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 6d4a9a8de5ef..abb3ab34cb1e 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -80,6 +80,7 @@ #include #include #include +#include #include #include @@ -91,6 +92,7 @@ static LIST_HEAD(gc_inflight_list); static LIST_HEAD(gc_candidates); static DEFINE_SPINLOCK(unix_gc_lock); +static DECLARE_WAIT_QUEUE_HEAD(unix_gc_wait); unsigned int unix_tot_inflight; @@ -266,12 +268,16 @@ static void inc_inflight_move_tail(struct unix_sock *u) list_move_tail(&u->link, &gc_candidates); } -/* The external entry point: unix_gc() */ +static bool gc_in_progress = false; -void unix_gc(void) +void wait_for_unix_gc(void) { - static bool gc_in_progress = false; + wait_event(unix_gc_wait, gc_in_progress == false); +} +/* The external entry point: unix_gc() */ +void unix_gc(void) +{ struct unix_sock *u; struct unix_sock *next; struct sk_buff_head hitlist; @@ -376,6 +382,7 @@ void unix_gc(void) /* All candidates should have been detached by now. */ BUG_ON(!list_empty(&gc_candidates)); gc_in_progress = false; + wake_up(&unix_gc_wait); out: spin_unlock(&unix_gc_lock); -- cgit v1.2.3 From d5654efd3ff1cd0baa935a0c9a5d89862f07d009 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 3 Dec 2008 00:27:18 -0800 Subject: xfrm: Fix kernel panic when flush and dump SPD entries After flush the SPD entries, dump the SPD entries will cause kernel painc. Used the following commands to reproduct: - echo 'spdflush;' | setkey -c - echo 'spdadd 3ffe:501:ffff:ff01::/64 3ffe:501:ffff:ff04::/64 any -P out ipsec \ ah/tunnel/3ffe:501:ffff:ff00:200:ff:fe00:b0b0-3ffe:501:ffff:ff02:200:ff:fe00:a1a1/require;\ spddump;' | setkey -c - echo 'spdflush; spddump;' | setkey -c - echo 'spdadd 3ffe:501:ffff:ff01::/64 3ffe:501:ffff:ff04::/64 any -P out ipsec \ ah/tunnel/3ffe:501:ffff:ff00:200:ff:fe00:b0b0-3ffe:501:ffff:ff02:200:ff:fe00:a1a1/require;\ spddump;' | setkey -c This is because when flush the SPD entries, the SPD entry is not remove from the list. This patch fix the problem by remove the SPD entry from the list. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 058f04f54b90..fb216c9adf86 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -817,6 +817,7 @@ int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info) continue; hlist_del(&pol->bydst); hlist_del(&pol->byidx); + list_del(&pol->walk.all); write_unlock_bh(&xfrm_policy_lock); xfrm_audit_policy_delete(pol, 1, audit_info->loginuid, -- cgit v1.2.3 From d25830e5507f6bc815f5dd7e2eb65f172e878a2b Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Wed, 3 Dec 2008 00:37:04 -0800 Subject: netlabel: Fix a potential NULL pointer dereference Fix a potential NULL pointer dereference seen when trying to remove a static label configuration with an invalid address/mask combination. Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- net/netlabel/netlabel_unlabeled.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index e8a5c32b0f10..90c8506a0aac 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -574,9 +574,10 @@ static int netlbl_unlhsh_remove_addr4(struct net *net, list_entry = netlbl_af4list_remove(addr->s_addr, mask->s_addr, &iface->addr4_list); spin_unlock(&netlbl_unlhsh_lock); - if (list_entry == NULL) + if (list_entry != NULL) + entry = netlbl_unlhsh_addr4_entry(list_entry); + else ret_val = -ENOENT; - entry = netlbl_unlhsh_addr4_entry(list_entry); audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCDEL, audit_info); @@ -634,9 +635,10 @@ static int netlbl_unlhsh_remove_addr6(struct net *net, spin_lock(&netlbl_unlhsh_lock); list_entry = netlbl_af6list_remove(addr, mask, &iface->addr6_list); spin_unlock(&netlbl_unlhsh_lock); - if (list_entry == NULL) + if (list_entry != NULL) + entry = netlbl_unlhsh_addr6_entry(list_entry); + else ret_val = -ENOENT; - entry = netlbl_unlhsh_addr6_entry(list_entry); audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCDEL, audit_info); -- cgit v1.2.3 From bd7df219202f44e71e2e975a0fb5f76f946c1aef Mon Sep 17 00:00:00 2001 From: "remi.denis-courmont@nokia" Date: Mon, 1 Dec 2008 02:37:20 +0000 Subject: Phonet: do not dump addresses from other namespaces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/pn_netlink.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index b1770d66bc8d..242fe8f8c322 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -123,6 +123,7 @@ nla_put_failure: static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = sock_net(skb->sk); struct phonet_device *pnd; int dev_idx = 0, dev_start_idx = cb->args[0]; int addr_idx = 0, addr_start_idx = cb->args[1]; @@ -131,6 +132,8 @@ static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb) list_for_each_entry(pnd, &pndevs.list, list) { u8 addr; + if (!net_eq(dev_net(pnd->netdev), net)) + continue; if (dev_idx > dev_start_idx) addr_start_idx = 0; if (dev_idx++ < dev_start_idx) -- cgit v1.2.3 From d253eee20195b25e298bf162a6e72f14bf4803e5 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Wed, 3 Dec 2008 15:52:35 -0800 Subject: can: Fix CAN_(EFF|RTR)_FLAG handling in can_filter Due to a wrong safety check in af_can.c it was not possible to filter for SFF frames with a specific CAN identifier without getting the same selected CAN identifier from a received EFF frame also. This fix has a minimum (but user visible) impact on the CAN filter API and therefore the CAN version is set to a new date. Indeed the 'old' API is still working as-is. But when now setting CAN_(EFF|RTR)_FLAG in can_filter.can_mask you might get less traffic than before - but still the stuff that you expected to get for your defined filter ... Thanks to Kurt Van Dijck for pointing at this issue and for the review. Signed-off-by: Oliver Hartkopp Acked-by: Kurt Van Dijck Signed-off-by: David S. Miller --- include/linux/can/core.h | 2 +- net/can/af_can.c | 63 ++++++++++++++++++++++++++++++++++++------------ net/can/bcm.c | 7 +++--- 3 files changed, 53 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/include/linux/can/core.h b/include/linux/can/core.h index e9ca210ffa5b..f50785ad4781 100644 --- a/include/linux/can/core.h +++ b/include/linux/can/core.h @@ -19,7 +19,7 @@ #include #include -#define CAN_VERSION "20071116" +#define CAN_VERSION "20081130" /* increment this number each time you change some user-space interface */ #define CAN_ABI_VERSION "8" diff --git a/net/can/af_can.c b/net/can/af_can.c index 7d4d2b3c137e..d8173e50cb87 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -319,23 +319,52 @@ static struct dev_rcv_lists *find_dev_rcv_lists(struct net_device *dev) return n ? d : NULL; } +/** + * find_rcv_list - determine optimal filterlist inside device filter struct + * @can_id: pointer to CAN identifier of a given can_filter + * @mask: pointer to CAN mask of a given can_filter + * @d: pointer to the device filter struct + * + * Description: + * Returns the optimal filterlist to reduce the filter handling in the + * receive path. This function is called by service functions that need + * to register or unregister a can_filter in the filter lists. + * + * A filter matches in general, when + * + * & mask == can_id & mask + * + * so every bit set in the mask (even CAN_EFF_FLAG, CAN_RTR_FLAG) describe + * relevant bits for the filter. + * + * The filter can be inverted (CAN_INV_FILTER bit set in can_id) or it can + * filter for error frames (CAN_ERR_FLAG bit set in mask). For error frames + * there is a special filterlist and a special rx path filter handling. + * + * Return: + * Pointer to optimal filterlist for the given can_id/mask pair. + * Constistency checked mask. + * Reduced can_id to have a preprocessed filter compare value. + */ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask, struct dev_rcv_lists *d) { canid_t inv = *can_id & CAN_INV_FILTER; /* save flag before masking */ - /* filter error frames */ + /* filter for error frames in extra filterlist */ if (*mask & CAN_ERR_FLAG) { - /* clear CAN_ERR_FLAG in list entry */ + /* clear CAN_ERR_FLAG in filter entry */ *mask &= CAN_ERR_MASK; return &d->rx[RX_ERR]; } - /* ensure valid values in can_mask */ - if (*mask & CAN_EFF_FLAG) - *mask &= (CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG); - else - *mask &= (CAN_SFF_MASK | CAN_RTR_FLAG); + /* with cleared CAN_ERR_FLAG we have a simple mask/value filterpair */ + +#define CAN_EFF_RTR_FLAGS (CAN_EFF_FLAG | CAN_RTR_FLAG) + + /* ensure valid values in can_mask for 'SFF only' frame filtering */ + if ((*mask & CAN_EFF_FLAG) && !(*can_id & CAN_EFF_FLAG)) + *mask &= (CAN_SFF_MASK | CAN_EFF_RTR_FLAGS); /* reduce condition testing at receive time */ *can_id &= *mask; @@ -348,15 +377,19 @@ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask, if (!(*mask)) return &d->rx[RX_ALL]; - /* use extra filterset for the subscription of exactly *ONE* can_id */ - if (*can_id & CAN_EFF_FLAG) { - if (*mask == (CAN_EFF_MASK | CAN_EFF_FLAG)) { - /* RFC: a use-case for hash-tables in the future? */ - return &d->rx[RX_EFF]; + /* extra filterlists for the subscription of a single non-RTR can_id */ + if (((*mask & CAN_EFF_RTR_FLAGS) == CAN_EFF_RTR_FLAGS) + && !(*can_id & CAN_RTR_FLAG)) { + + if (*can_id & CAN_EFF_FLAG) { + if (*mask == (CAN_EFF_MASK | CAN_EFF_RTR_FLAGS)) { + /* RFC: a future use-case for hash-tables? */ + return &d->rx[RX_EFF]; + } + } else { + if (*mask == (CAN_SFF_MASK | CAN_EFF_RTR_FLAGS)) + return &d->rx_sff[*can_id]; } - } else { - if (*mask == CAN_SFF_MASK) - return &d->rx_sff[*can_id]; } /* default: filter via can_id/can_mask */ diff --git a/net/can/bcm.c b/net/can/bcm.c index d0dd382001e2..da0d426c0ce4 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -64,10 +64,11 @@ #define BCM_CAN_DLC_MASK 0x0F /* clean private flags in can_dlc by masking */ /* get best masking value for can_rx_register() for a given single can_id */ -#define REGMASK(id) ((id & CAN_RTR_FLAG) | ((id & CAN_EFF_FLAG) ? \ - (CAN_EFF_MASK | CAN_EFF_FLAG) : CAN_SFF_MASK)) +#define REGMASK(id) ((id & CAN_EFF_FLAG) ? \ + (CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG) : \ + (CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG)) -#define CAN_BCM_VERSION "20080415" +#define CAN_BCM_VERSION CAN_VERSION static __initdata const char banner[] = KERN_INFO "can: broadcast manager protocol (rev " CAN_BCM_VERSION ")\n"; -- cgit v1.2.3 From f8269a495a1924f8b023532dd3e77423432db810 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Wed, 3 Dec 2008 21:24:48 -0800 Subject: tcp: make urg+gso work for real this time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I should have noticed this earlier... :-) The previous solution to URG+GSO/TSO will cause SACK block tcp_fragment to do zig-zig patterns, or even worse, a steep downward slope into packet counting because each skb pcount would be truncated to pcount of 2 and then the following fragments of the later portion would restore the window again. Basically this reverts "tcp: Do not use TSO/GSO when there is urgent data" (33cf71cee1). It also removes some unnecessary code from tcp_current_mss that didn't work as intented either (could be that something was changed down the road, or it might have been broken since the dawn of time) because it only works once urg is already written while this bug shows up starting from ~64k before the urg point. The retransmissions already are split to mss sized chunks, so only new data sending paths need splitting in case they have a segment otherwise suitable for gso/tso. The actually check can be improved to be more narrow but since this is late -rc already, I'll postpone thinking the more fine-grained things. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 85b07eba1879..fe3b4bdfd251 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -722,8 +722,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) { - if (skb->len <= mss_now || !sk_can_gso(sk) || - tcp_urg_mode(tcp_sk(sk))) { + if (skb->len <= mss_now || !sk_can_gso(sk)) { /* Avoid the costly divide in the normal * non-TSO case. */ @@ -1029,10 +1028,6 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) /* Compute the current effective MSS, taking SACKs and IP options, * and even PMTU discovery events into account. - * - * LARGESEND note: !tcp_urg_mode is overkill, only frames up to snd_up - * cannot be large. However, taking into account rare use of URG, this - * is not a big flaw. */ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) { @@ -1047,7 +1042,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) mss_now = tp->mss_cache; - if (large_allowed && sk_can_gso(sk) && !tcp_urg_mode(tp)) + if (large_allowed && sk_can_gso(sk)) doing_tso = 1; if (dst) { @@ -1164,9 +1159,7 @@ static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, { int tso_segs = tcp_skb_pcount(skb); - if (!tso_segs || - (tso_segs > 1 && (tcp_skb_mss(skb) != mss_now || - tcp_urg_mode(tcp_sk(sk))))) { + if (!tso_segs || (tso_segs > 1 && tcp_skb_mss(skb) != mss_now)) { tcp_set_skb_tso_segs(sk, skb, mss_now); tso_segs = tcp_skb_pcount(skb); } @@ -1519,6 +1512,10 @@ static int tcp_mtu_probe(struct sock *sk) * send_head. This happens as incoming acks open up the remote * window for us. * + * LARGESEND note: !tcp_urg_mode is overkill, only frames between + * snd_up-64k-mss .. snd_up cannot be large. However, taking into + * account rare use of URG, this is not a big flaw. + * * Returns 1, if no segments are in flight and we have queued segments, but * cannot send anything now because of SWS or another problem. */ @@ -1570,7 +1567,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) } limit = mss_now; - if (tso_segs > 1) + if (tso_segs > 1 && !tcp_urg_mode(tp)) limit = tcp_mss_split_point(sk, skb, mss_now, cwnd_quota); @@ -1619,6 +1616,7 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, */ void tcp_push_one(struct sock *sk, unsigned int mss_now) { + struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb = tcp_send_head(sk); unsigned int tso_segs, cwnd_quota; @@ -1633,7 +1631,7 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now) BUG_ON(!tso_segs); limit = mss_now; - if (tso_segs > 1) + if (tso_segs > 1 && !tcp_urg_mode(tp)) limit = tcp_mss_split_point(sk, skb, mss_now, cwnd_quota); -- cgit v1.2.3 From 17b24b3c97498935a2ef9777370b1151dfed3f6f Mon Sep 17 00:00:00 2001 From: Chas Williams Date: Thu, 4 Dec 2008 14:58:13 -0800 Subject: ATM: CVE-2008-5079: duplicate listen() on socket corrupts the vcc table As reported by Hugo Dias that it is possible to cause a local denial of service attack by calling the svc_listen function twice on the same socket and reading /proc/net/atm/*vc Signed-off-by: Chas Williams Signed-off-by: David S. Miller --- net/atm/svc.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/atm/svc.c b/net/atm/svc.c index de1e4f2f3a43..8fb54dc870b3 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -293,7 +293,10 @@ static int svc_listen(struct socket *sock,int backlog) error = -EINVAL; goto out; } - vcc_insert_socket(sk); + if (test_bit(ATM_VF_LISTEN, &vcc->flags)) { + error = -EADDRINUSE; + goto out; + } set_bit(ATM_VF_WAITING, &vcc->flags); prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE); sigd_enq(vcc,as_listen,NULL,NULL,&vcc->local); @@ -307,6 +310,7 @@ static int svc_listen(struct socket *sock,int backlog) goto out; } set_bit(ATM_VF_LISTEN,&vcc->flags); + vcc_insert_socket(sk); sk->sk_max_ack_backlog = backlog > 0 ? backlog : ATM_BACKLOG_DEFAULT; error = -sk->sk_err; out: -- cgit v1.2.3 From f706644d55f90e8306d87060168fef33804d6dd9 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Thu, 4 Dec 2008 15:01:08 -0800 Subject: can: omit received RTR frames for single ID filter lists Since commit d253eee20195b25e298bf162a6e72f14bf4803e5 the single CAN identifier filter lists handle only non-RTR CAN frames. So we need to omit the check of these filter lists when receiving RTR CAN frames. Signed-off-by: Oliver Hartkopp Signed-off-by: David S. Miller --- net/can/af_can.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/can/af_can.c b/net/can/af_can.c index d8173e50cb87..3dadb338addd 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -622,7 +622,10 @@ static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb) } } - /* check CAN_ID specific entries */ + /* check filterlists for single non-RTR can_ids */ + if (can_id & CAN_RTR_FLAG) + return matches; + if (can_id & CAN_EFF_FLAG) { hlist_for_each_entry_rcu(r, n, &d->rx[RX_EFF], list) { if (r->can_id == can_id) { -- cgit v1.2.3 From a6af2d6ba5797c556fba0cd3a19e5f3bc9a99b76 Mon Sep 17 00:00:00 2001 From: Doug Leith Date: Thu, 4 Dec 2008 17:17:18 -0800 Subject: tcp: tcp_vegas ssthresh bug fix This patch fixes a bug in tcp_vegas.c. At the moment this code leaves ssthresh untouched. However, this means that the vegas congestion control algorithm is effectively unable to reduce cwnd below the ssthresh value (if the vegas update lowers the cwnd below ssthresh, then slow start is activated to raise it back up). One example where this matters is when during slow start cwnd overshoots the link capacity and a flow then exits slow start with ssthresh set to a value above where congestion avoidance would like to adjust it. Signed-off-by: Doug Leith Signed-off-by: David S. Miller --- net/ipv4/tcp_vegas.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 14504dada116..7cd22262de3a 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -326,6 +326,8 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) tp->snd_cwnd = 2; else if (tp->snd_cwnd > tp->snd_cwnd_clamp) tp->snd_cwnd = tp->snd_cwnd_clamp; + + tp->snd_ssthresh = tcp_current_ssthresh(sk); } /* Wipe the slate clean for the next RTT. */ -- cgit v1.2.3 From 5cf12e8dc641ef028f0cf9c317a9567e6b794de1 Mon Sep 17 00:00:00 2001 From: Shaddy Baddah Date: Fri, 28 Nov 2008 17:08:10 +1100 Subject: mac80211: use unaligned safe memcmp() in-place of compare_ether_addr() After fixing zd1211rw: use unaligned safe memcmp() in-place of compare_ether_addr(), I started to see kernel log messages detailing unaligned access: Kernel unaligned access at TPC[100f7f44] sta_info_get+0x24/0x68 [mac80211] As with the aforementioned patch, the unaligned access was eminating from a compare_ether_addr() call. Concerned that whilst it was safe to assume that unalignment was the norm for the zd1211rw, and take preventative measures, it may not be the case or acceptable to use the easy fix of changing the call to memcmp(). My research however indicated that it was OK to do this, as there are a few instances where memcmp() is the preferred mechanism for doing mac address comparisons throughout the module. Signed-off-by: Shaddy Baddah Signed-off-by: John W. Linville --- net/mac80211/sta_info.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 7fef8ea1f5ec..d254446b85b5 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -99,7 +99,7 @@ struct sta_info *sta_info_get(struct ieee80211_local *local, const u8 *addr) sta = rcu_dereference(local->sta_hash[STA_HASH(addr)]); while (sta) { - if (compare_ether_addr(sta->sta.addr, addr) == 0) + if (memcmp(sta->sta.addr, addr, ETH_ALEN) == 0) break; sta = rcu_dereference(sta->hnext); } -- cgit v1.2.3 From c49b9f295e513753e6d9bb4444ba502f1aa59b29 Mon Sep 17 00:00:00 2001 From: Balazs Scheidler Date: Sun, 7 Dec 2008 23:53:46 -0800 Subject: tproxy: fixe a possible read from an invalid location in the socket match TIME_WAIT sockets need to be handled specially, and the socket match casted inet_timewait_sock instances to inet_sock, which are not compatible. Handle this special case by checking sk->sk_state. Signed-off-by: Balazs Scheidler Signed-off-by: David S. Miller --- net/netfilter/xt_socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 02a8fed21082..1acc089be7e9 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -141,7 +141,7 @@ socket_mt(const struct sk_buff *skb, const struct xt_match_param *par) sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol, saddr, daddr, sport, dport, par->in, false); if (sk != NULL) { - bool wildcard = (inet_sk(sk)->rcv_saddr == 0); + bool wildcard = (sk->sk_state != TCP_TIME_WAIT && inet_sk(sk)->rcv_saddr == 0); nf_tproxy_put_sock(sk); if (wildcard) -- cgit v1.2.3