15 #include <linux/kernel.h>
16 #include <linux/sched.h>
19 #include <linux/signal.h>
20 #include <linux/errno.h>
22 #include <linux/slab.h>
23 #include <linux/poll.h>
24 #include <linux/string.h>
25 #include <linux/list.h>
26 #include <linux/hash.h>
29 #include <linux/rbtree.h>
30 #include <linux/wait.h>
31 #include <linux/eventpoll.h>
33 #include <linux/bitops.h>
36 #include <linux/device.h>
37 #include <asm/uaccess.h>
91 #define EP_PRIVATE_BITS (EPOLLWAKEUP | EPOLLONESHOT | EPOLLET)
94 #define EP_MAX_NESTS 4
96 #define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event))
98 #define EP_UNACTIVE_PTR ((void *) -1L)
100 #define EP_ITEM_COST (sizeof(struct epitem) + sizeof(struct eppoll_entry))
281 #include <linux/sysctl.h>
288 .procname =
"max_user_watches",
289 .data = &max_user_watches,
290 .maxlen =
sizeof(max_user_watches),
302 static inline int is_file_epoll(
struct file *
f)
304 return f->
f_op == &eventpoll_fops;
324 static inline int ep_is_linked(
struct list_head *
p)
326 return !list_empty(p);
347 static inline int ep_op_has_event(
int op)
353 static void ep_nested_calls_init(
struct nested_calls *ncalls)
367 static inline int ep_events_available(
struct eventpoll *
ep)
388 static int ep_call_nested(
struct nested_calls *ncalls,
int max_nests,
389 int (*nproc)(
void *,
void *,
int),
void *
priv,
392 int error, call_nests = 0;
406 if (tncur->
ctx == ctx &&
407 (tncur->
cookie == cookie || ++call_nests > max_nests)) {
420 list_add(&
tnode.llink, lsthead);
422 spin_unlock_irqrestore(&ncalls->
lock, flags);
431 spin_unlock_irqrestore(&ncalls->
lock, flags);
461 #ifdef CONFIG_DEBUG_LOCK_ALLOC
469 spin_unlock_irqrestore(&wqueue->
lock, flags);
479 static int ep_poll_wakeup_proc(
void *priv,
void *cookie,
int call_nests)
501 ep_poll_wakeup_proc,
NULL, wq, (
void *) (
long) this_cpu);
506 static void ep_remove_wait_queue(
struct eppoll_entry *pwq)
523 static void ep_unregister_pollwait(
struct eventpoll *ep,
struct epitem *epi)
528 while (!list_empty(lsthead)) {
532 ep_remove_wait_queue(pwq);
549 static int ep_scan_ready_list(
struct eventpoll *ep,
555 int error, pwake = 0;
557 struct epitem *epi, *nepi;
575 list_splice_init(&ep->
rdllist, &txlist);
577 spin_unlock_irqrestore(&ep->
lock, flags);
582 error = (*sproc)(
ep, &txlist,
priv);
590 for (nepi = ep->ovflist; (epi = nepi) !=
NULL;
598 if (!ep_is_linked(&epi->
rdllink)) {
613 list_splice(&txlist, &ep->rdllist);
616 if (!list_empty(&ep->rdllist)) {
621 if (waitqueue_active(&ep->wq))
623 if (waitqueue_active(&ep->poll_wait))
626 spin_unlock_irqrestore(&ep->lock, flags);
632 ep_poll_safewake(&ep->poll_wait);
654 ep_unregister_pollwait(ep, epi);
658 if (ep_is_linked(&epi->
fllink))
659 list_del_init(&epi->
fllink);
660 spin_unlock(&file->
f_lock);
665 if (ep_is_linked(&epi->
rdllink))
667 spin_unlock_irqrestore(&ep->
lock, flags);
674 atomic_long_dec(&ep->
user->epoll_watches);
679 static void ep_free(
struct eventpoll *ep)
704 ep_unregister_pollwait(ep, epi);
725 static int ep_eventpoll_release(
struct inode *
inode,
struct file *file)
741 init_poll_funcptr(&pt,
NULL);
744 if (epi->
ffd.file->f_op->poll(epi->
ffd.file, &pt) &
761 static int ep_poll_readyevents_proc(
void *priv,
void *cookie,
int call_nests)
763 return ep_scan_ready_list(priv, ep_read_events_proc,
NULL, call_nests + 1);
766 static unsigned int ep_eventpoll_poll(
struct file *file,
poll_table *
wait)
780 pollflags = ep_call_nested(&poll_readywalk_ncalls,
EP_MAX_NESTS,
781 ep_poll_readyevents_proc, ep, ep,
current);
783 return pollflags != -1 ? pollflags : 0;
788 .release = ep_eventpoll_release,
789 .poll = ep_eventpoll_poll,
800 struct list_head *lsthead = &file->f_ep_links;
819 while (!list_empty(lsthead)) {
823 list_del_init(&epi->
fllink);
832 static int ep_alloc(
struct eventpoll **pep)
874 ep_set_ffd(&ffd, file, fd);
875 for (rbp = ep->
rbr.rb_node; rbp; ) {
877 kcmp = ep_cmp_ffd(&ffd, &epi->
ffd);
900 struct epitem *epi = ep_item_from_wait(wait);
903 if ((
unsigned long)key &
POLLFREE) {
904 ep_pwq_from_wait(wait)->whead =
NULL;
931 if (key && !((
unsigned long) key & epi->
event.events))
957 if (!ep_is_linked(&epi->
rdllink)) {
966 if (waitqueue_active(&ep->
wq))
972 spin_unlock_irqrestore(&ep->
lock, flags);
988 struct epitem *epi = ep_item_from_epqueue(pt);
992 init_waitqueue_func_entry(&pwq->
wait, ep_poll_callback);
1004 static void ep_rbtree_insert(
struct eventpoll *ep,
struct epitem *epi)
1013 kcmp = ep_cmp_ffd(&epi->
ffd, &epic->
ffd);
1019 rb_link_node(&epi->
rbn, parent, p);
1025 #define PATH_ARR_SIZE 5
1037 static const int path_limits[
PATH_ARR_SIZE] = { 1000, 500, 100, 50, 10 };
1040 static int path_count_inc(
int nests)
1046 if (++path_count[nests] > path_limits[nests])
1051 static void path_count_init(
void)
1059 static int reverse_path_check_proc(
void *priv,
void *cookie,
int call_nests)
1062 struct file *file =
priv;
1063 struct file *child_file;
1067 child_file = epi->
ep->file;
1068 if (is_file_epoll(child_file)) {
1069 if (list_empty(&child_file->f_ep_links)) {
1070 if (path_count_inc(call_nests)) {
1075 error = ep_call_nested(&poll_loop_ncalls,
1077 reverse_path_check_proc,
1078 child_file, child_file,
1085 "file is not an ep!\n");
1101 static int reverse_path_check(
void)
1109 error = ep_call_nested(&poll_loop_ncalls,
EP_MAX_NESTS,
1110 reverse_path_check_proc, current_file,
1118 static int ep_create_wakeup_source(
struct epitem *epi)
1128 name = epi->
ffd.file->f_path.dentry->d_name.name;
1136 static void ep_destroy_wakeup_source(
struct epitem *epi)
1146 struct file *tfile,
int fd)
1148 int error, revents, pwake = 0;
1149 unsigned long flags;
1154 user_watches = atomic_long_read(&ep->
user->epoll_watches);
1155 if (
unlikely(user_watches >= max_user_watches))
1161 INIT_LIST_HEAD(&epi->
rdllink);
1162 INIT_LIST_HEAD(&epi->
fllink);
1163 INIT_LIST_HEAD(&epi->
pwqlist);
1165 ep_set_ffd(&epi->
ffd, tfile, fd);
1170 error = ep_create_wakeup_source(epi);
1172 goto error_create_wakeup_source;
1179 init_poll_funcptr(&epq.pt, ep_ptable_queue_proc);
1180 epq.pt._key =
event->events;
1189 revents = tfile->
f_op->poll(tfile, &epq.pt);
1198 goto error_unregister;
1201 spin_lock(&tfile->
f_lock);
1203 spin_unlock(&tfile->
f_lock);
1209 ep_rbtree_insert(ep, epi);
1213 if (reverse_path_check())
1214 goto error_remove_epi;
1220 if ((revents & event->
events) && !ep_is_linked(&epi->
rdllink)) {
1225 if (waitqueue_active(&ep->
wq))
1231 spin_unlock_irqrestore(&ep->
lock, flags);
1233 atomic_long_inc(&ep->
user->epoll_watches);
1242 spin_lock(&tfile->
f_lock);
1243 if (ep_is_linked(&epi->
fllink))
1244 list_del_init(&epi->
fllink);
1245 spin_unlock(&tfile->
f_lock);
1250 ep_unregister_pollwait(ep, epi);
1259 if (ep_is_linked(&epi->
rdllink))
1261 spin_unlock_irqrestore(&ep->
lock, flags);
1265 error_create_wakeup_source:
1278 unsigned int revents;
1281 init_poll_funcptr(&pt,
NULL);
1288 epi->
event.events =
event->events;
1289 pt.
_key =
event->events;
1290 epi->
event.data =
event->data;
1293 ep_create_wakeup_source(epi);
1294 }
else if (epi->
ws) {
1295 ep_destroy_wakeup_source(epi);
1302 revents = epi->
ffd.file->f_op->poll(epi->
ffd.file, &pt);
1308 if (revents & event->
events) {
1309 spin_lock_irq(&ep->
lock);
1310 if (!ep_is_linked(&epi->
rdllink)) {
1315 if (waitqueue_active(&ep->
wq))
1320 spin_unlock_irq(&ep->
lock);
1335 unsigned int revents;
1340 init_poll_funcptr(&pt,
NULL);
1347 for (eventcnt = 0, uevent = esed->
events;
1348 !list_empty(head) && eventcnt < esed->maxevents;) {
1360 if (epi->
ws && epi->
ws->active)
1366 revents = epi->
ffd.file->f_op->poll(epi->
ffd.file, &pt) &
1378 list_add(&epi->
rdllink, head);
1380 return eventcnt ? eventcnt : -
EFAULT;
1407 static int ep_send_events(
struct eventpoll *ep,
1415 return ep_scan_ready_list(ep, ep_send_events_proc, &esed, 0);
1418 static inline struct timespec ep_set_mstimeout(long
ms)
1447 int maxevents,
long timeout)
1449 int res = 0, eavail, timed_out = 0;
1450 unsigned long flags;
1456 struct timespec end_time = ep_set_mstimeout(timeout);
1460 *to = timespec_to_ktime(end_time);
1461 }
else if (timeout == 0) {
1474 if (!ep_events_available(ep)) {
1480 init_waitqueue_entry(&wait,
current);
1481 __add_wait_queue_exclusive(&ep->
wq, &wait);
1490 if (ep_events_available(ep) || timed_out)
1492 if (signal_pending(
current)) {
1497 spin_unlock_irqrestore(&ep->
lock, flags);
1503 __remove_wait_queue(&ep->
wq, &wait);
1509 eavail = ep_events_available(ep);
1511 spin_unlock_irqrestore(&ep->
lock, flags);
1518 if (!res && eavail &&
1519 !(res = ep_send_events(ep, events, maxevents)) && !timed_out)
1540 static int ep_loop_check_proc(
void *priv,
void *cookie,
int call_nests)
1543 struct file *file =
priv;
1555 ep_tovisit = epi->
ffd.file->private_data;
1558 error = ep_call_nested(&poll_loop_ncalls,
EP_MAX_NESTS,
1559 ep_loop_check_proc, epi->
ffd.file,
1572 if (list_empty(&epi->
ffd.file->f_tfile_llink))
1573 list_add(&epi->
ffd.file->f_tfile_llink,
1593 static int ep_loop_check(
struct eventpoll *ep,
struct file *file)
1599 ep_loop_check_proc, file, ep,
current);
1609 static void clear_tfile_check_list(
void)
1614 while (!list_empty(&tfile_check_list)) {
1617 list_del_init(&file->f_tfile_llink);
1619 INIT_LIST_HEAD(&tfile_check_list);
1639 error = ep_alloc(&ep);
1652 O_RDWR | (flags & O_CLOEXEC));
1654 error = PTR_ERR(file);
1685 int did_lock_epmutex = 0;
1686 struct file *
file, *tfile;
1692 if (ep_op_has_event(
op) &&
1709 if (!tfile->
f_op || !tfile->
f_op->poll)
1710 goto error_tgt_fput;
1722 if (file == tfile || !is_file_epoll(file))
1723 goto error_tgt_fput;
1745 did_lock_epmutex = 1;
1748 if (is_file_epoll(tfile)) {
1750 if (ep_loop_check(ep, tfile) != 0) {
1751 clear_tfile_check_list();
1752 goto error_tgt_fput;
1755 list_add(&tfile->f_tfile_llink, &tfile_check_list);
1765 epi = ep_find(ep, tfile, fd);
1772 error = ep_insert(ep, &epds, tfile, fd);
1775 clear_tfile_check_list();
1779 error = ep_remove(ep, epi);
1786 error = ep_modify(ep, epi, &epds);
1794 if (did_lock_epmutex)
1810 int, maxevents,
int, timeout)
1834 if (!is_file_epoll(f.
file))
1841 ep = f.
file->private_data;
1844 error =
ep_poll(ep, events, maxevents, timeout);
1867 if (sigsetsize !=
sizeof(
sigset_t))
1884 if (error == -
EINTR) {
1887 set_restore_sigmask();
1895 static int __init eventpoll_init(
void)
1903 max_user_watches = (((si.totalram - si.totalhigh) / 25) <<
PAGE_SHIFT) /
1905 BUG_ON(max_user_watches < 0);
1911 ep_nested_calls_init(&poll_loop_ncalls);
1914 ep_nested_calls_init(&poll_safewake_ncalls);
1917 ep_nested_calls_init(&poll_readywalk_ncalls);