diff options
Diffstat (limited to 'fs/eventpoll.c')
-rw-r--r-- | fs/eventpoll.c | 79 |
1 files changed, 73 insertions, 6 deletions
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index aabdfc38cf24..629e9ed99d0f 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -320,6 +320,11 @@ static inline int ep_is_linked(struct list_head *p) return !list_empty(p); } +static inline struct eppoll_entry *ep_pwq_from_wait(wait_queue_t *p) +{ + return container_of(p, struct eppoll_entry, wait); +} + /* Get the "struct epitem" from a wait queue pointer */ static inline struct epitem *ep_item_from_wait(wait_queue_t *p) { @@ -422,6 +427,31 @@ out_unlock: return error; } +/* + * As described in commit 0ccf831cb lockdep: annotate epoll + * the use of wait queues used by epoll is done in a very controlled + * manner. Wake ups can nest inside each other, but are never done + * with the same locking. For example: + * + * dfd = socket(...); + * efd1 = epoll_create(); + * efd2 = epoll_create(); + * epoll_ctl(efd1, EPOLL_CTL_ADD, dfd, ...); + * epoll_ctl(efd2, EPOLL_CTL_ADD, efd1, ...); + * + * When a packet arrives to the device underneath "dfd", the net code will + * issue a wake_up() on its poll wake list. Epoll (efd1) has installed a + * callback wakeup entry on that queue, and the wake_up() performed by the + * "dfd" net code will end up in ep_poll_callback(). At this point epoll + * (efd1) notices that it may have some event ready, so it needs to wake up + * the waiters on its poll wait list (efd2). So it calls ep_poll_safewake() + * that ends up in another wake_up(), after having checked about the + * recursion constraints. That are, no more than EP_MAX_POLLWAKE_NESTS, to + * avoid stack blasting. + * + * When CONFIG_DEBUG_LOCK_ALLOC is enabled, make sure lockdep can handle + * this special case of epoll. + */ #ifdef CONFIG_DEBUG_LOCK_ALLOC static inline void ep_wake_up_nested(wait_queue_head_t *wqueue, unsigned long events, int subclass) @@ -467,6 +497,18 @@ static void ep_poll_safewake(wait_queue_head_t *wq) put_cpu(); } +static void ep_remove_wait_queue(struct eppoll_entry *pwq) +{ + wait_queue_head_t *whead; + + rcu_read_lock(); + /* If it is cleared by POLLFREE, it should be rcu-safe */ + whead = rcu_dereference(pwq->whead); + if (whead) + remove_wait_queue(whead, &pwq->wait); + rcu_read_unlock(); +} + /* * This function unregisters poll callbacks from the associated file * descriptor. Must be called with "mtx" held (or "epmutex" if called from @@ -481,7 +523,7 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi) pwq = list_first_entry(lsthead, struct eppoll_entry, llink); list_del(&pwq->llink); - remove_wait_queue(pwq->whead, &pwq->wait); + ep_remove_wait_queue(pwq); kmem_cache_free(pwq_cache, pwq); } } @@ -682,9 +724,12 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head, void *priv) { struct epitem *epi, *tmp; + poll_table pt; + init_poll_funcptr(&pt, NULL); list_for_each_entry_safe(epi, tmp, head, rdllink) { - if (epi->ffd.file->f_op->poll(epi->ffd.file, NULL) & + pt._key = epi->event.events; + if (epi->ffd.file->f_op->poll(epi->ffd.file, &pt) & epi->event.events) return POLLIN | POLLRDNORM; else { @@ -842,6 +887,17 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k struct epitem *epi = ep_item_from_wait(wait); struct eventpoll *ep = epi->ep; + if ((unsigned long)key & POLLFREE) { + ep_pwq_from_wait(wait)->whead = NULL; + /* + * whead = NULL above can race with ep_remove_wait_queue() + * which can do another remove_wait_queue() after us, so we + * can't use __remove_wait_queue(). whead->lock is held by + * the caller. + */ + list_del_init(&wait->task_list); + } + spin_lock_irqsave(&ep->lock, flags); /* @@ -960,6 +1016,10 @@ static int path_count[PATH_ARR_SIZE]; static int path_count_inc(int nests) { + /* Allow an arbitrary number of depth 1 paths */ + if (nests == 0) + return 0; + if (++path_count[nests] > path_limits[nests]) return -1; return 0; @@ -1017,13 +1077,11 @@ static int reverse_path_check_proc(void *priv, void *cookie, int call_nests) */ static int reverse_path_check(void) { - int length = 0; int error = 0; struct file *current_file; /* let's call this for all tfiles */ list_for_each_entry(current_file, &tfile_check_list, f_tfile_llink) { - length++; path_count_init(); error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, reverse_path_check_proc, current_file, @@ -1065,6 +1123,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, /* Initialize the poll table using the queue callback */ epq.epi = epi; init_poll_funcptr(&epq.pt, ep_ptable_queue_proc); + epq.pt._key = event->events; /* * Attach the item to the poll hooks and get current event bits. @@ -1159,6 +1218,9 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even { int pwake = 0; unsigned int revents; + poll_table pt; + + init_poll_funcptr(&pt, NULL); /* * Set the new event interest mask before calling f_op->poll(); @@ -1166,13 +1228,14 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even * f_op->poll() call and the new event set registering. */ epi->event.events = event->events; + pt._key = event->events; epi->event.data = event->data; /* protected by mtx */ /* * Get current event bits. We can safely use the file* here because * its usage count has been increased by the caller of this function. */ - revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL); + revents = epi->ffd.file->f_op->poll(epi->ffd.file, &pt); /* * If the item is "hot" and it is not registered inside the ready @@ -1207,6 +1270,9 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, unsigned int revents; struct epitem *epi; struct epoll_event __user *uevent; + poll_table pt; + + init_poll_funcptr(&pt, NULL); /* * We can loop without lock because we are passed a task private list. @@ -1219,7 +1285,8 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, list_del_init(&epi->rdllink); - revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL) & + pt._key = epi->event.events; + revents = epi->ffd.file->f_op->poll(epi->ffd.file, &pt) & epi->event.events; /* |