1 /* 2 * fs/eventfd.c 3 * 4 * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org> 5 * 6 */ 7 8 #include <linux/file.h> 9 #include <linux/poll.h> 10 #include <linux/init.h> 11 #include <linux/fs.h> 12 #include <linux/sched.h> 13 #include <linux/kernel.h> 14 #include <linux/list.h> 15 #include <linux/spinlock.h> 16 #include <linux/anon_inodes.h> 17 #include <linux/eventfd.h> 18 #include <linux/syscalls.h> 19 #include <linux/module.h> 20 21 struct eventfd_ctx { 22 wait_queue_head_t wqh; 23 /* 24 * Every time that a write(2) is performed on an eventfd, the 25 * value of the __u64 being written is added to "count" and a 26 * wakeup is performed on "wqh". A read(2) will return the "count" 27 * value to userspace, and will reset "count" to zero. The kernel 28 * size eventfd_signal() also, adds to the "count" counter and 29 * issue a wakeup. 30 */ 31 __u64 count; 32 unsigned int flags; 33 }; 34 35 /* 36 * Adds "n" to the eventfd counter "count". Returns "n" in case of 37 * success, or a value lower then "n" in case of coutner overflow. 38 * This function is supposed to be called by the kernel in paths 39 * that do not allow sleeping. In this function we allow the counter 40 * to reach the ULLONG_MAX value, and we signal this as overflow 41 * condition by returining a POLLERR to poll(2). 42 */ 43 int eventfd_signal(struct file *file, int n) 44 { 45 struct eventfd_ctx *ctx = file->private_data; 46 unsigned long flags; 47 48 if (n < 0) 49 return -EINVAL; 50 spin_lock_irqsave(&ctx->wqh.lock, flags); 51 if (ULLONG_MAX - ctx->count < n) 52 n = (int) (ULLONG_MAX - ctx->count); 53 ctx->count += n; 54 if (waitqueue_active(&ctx->wqh)) 55 wake_up_locked_poll(&ctx->wqh, POLLIN); 56 spin_unlock_irqrestore(&ctx->wqh.lock, flags); 57 58 return n; 59 } 60 EXPORT_SYMBOL_GPL(eventfd_signal); 61 62 static int eventfd_release(struct inode *inode, struct file *file) 63 { 64 kfree(file->private_data); 65 return 0; 66 } 67 68 static unsigned int eventfd_poll(struct file *file, poll_table *wait) 69 { 70 struct eventfd_ctx *ctx = file->private_data; 71 unsigned int events = 0; 72 unsigned long flags; 73 74 poll_wait(file, &ctx->wqh, wait); 75 76 spin_lock_irqsave(&ctx->wqh.lock, flags); 77 if (ctx->count > 0) 78 events |= POLLIN; 79 if (ctx->count == ULLONG_MAX) 80 events |= POLLERR; 81 if (ULLONG_MAX - 1 > ctx->count) 82 events |= POLLOUT; 83 spin_unlock_irqrestore(&ctx->wqh.lock, flags); 84 85 return events; 86 } 87 88 static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count, 89 loff_t *ppos) 90 { 91 struct eventfd_ctx *ctx = file->private_data; 92 ssize_t res; 93 __u64 ucnt = 0; 94 DECLARE_WAITQUEUE(wait, current); 95 96 if (count < sizeof(ucnt)) 97 return -EINVAL; 98 spin_lock_irq(&ctx->wqh.lock); 99 res = -EAGAIN; 100 if (ctx->count > 0) 101 res = sizeof(ucnt); 102 else if (!(file->f_flags & O_NONBLOCK)) { 103 __add_wait_queue(&ctx->wqh, &wait); 104 for (res = 0;;) { 105 set_current_state(TASK_INTERRUPTIBLE); 106 if (ctx->count > 0) { 107 res = sizeof(ucnt); 108 break; 109 } 110 if (signal_pending(current)) { 111 res = -ERESTARTSYS; 112 break; 113 } 114 spin_unlock_irq(&ctx->wqh.lock); 115 schedule(); 116 spin_lock_irq(&ctx->wqh.lock); 117 } 118 __remove_wait_queue(&ctx->wqh, &wait); 119 __set_current_state(TASK_RUNNING); 120 } 121 if (likely(res > 0)) { 122 ucnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count; 123 ctx->count -= ucnt; 124 if (waitqueue_active(&ctx->wqh)) 125 wake_up_locked_poll(&ctx->wqh, POLLOUT); 126 } 127 spin_unlock_irq(&ctx->wqh.lock); 128 if (res > 0 && put_user(ucnt, (__u64 __user *) buf)) 129 return -EFAULT; 130 131 return res; 132 } 133 134 static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count, 135 loff_t *ppos) 136 { 137 struct eventfd_ctx *ctx = file->private_data; 138 ssize_t res; 139 __u64 ucnt; 140 DECLARE_WAITQUEUE(wait, current); 141 142 if (count < sizeof(ucnt)) 143 return -EINVAL; 144 if (copy_from_user(&ucnt, buf, sizeof(ucnt))) 145 return -EFAULT; 146 if (ucnt == ULLONG_MAX) 147 return -EINVAL; 148 spin_lock_irq(&ctx->wqh.lock); 149 res = -EAGAIN; 150 if (ULLONG_MAX - ctx->count > ucnt) 151 res = sizeof(ucnt); 152 else if (!(file->f_flags & O_NONBLOCK)) { 153 __add_wait_queue(&ctx->wqh, &wait); 154 for (res = 0;;) { 155 set_current_state(TASK_INTERRUPTIBLE); 156 if (ULLONG_MAX - ctx->count > ucnt) { 157 res = sizeof(ucnt); 158 break; 159 } 160 if (signal_pending(current)) { 161 res = -ERESTARTSYS; 162 break; 163 } 164 spin_unlock_irq(&ctx->wqh.lock); 165 schedule(); 166 spin_lock_irq(&ctx->wqh.lock); 167 } 168 __remove_wait_queue(&ctx->wqh, &wait); 169 __set_current_state(TASK_RUNNING); 170 } 171 if (likely(res > 0)) { 172 ctx->count += ucnt; 173 if (waitqueue_active(&ctx->wqh)) 174 wake_up_locked_poll(&ctx->wqh, POLLIN); 175 } 176 spin_unlock_irq(&ctx->wqh.lock); 177 178 return res; 179 } 180 181 static const struct file_operations eventfd_fops = { 182 .release = eventfd_release, 183 .poll = eventfd_poll, 184 .read = eventfd_read, 185 .write = eventfd_write, 186 }; 187 188 struct file *eventfd_fget(int fd) 189 { 190 struct file *file; 191 192 file = fget(fd); 193 if (!file) 194 return ERR_PTR(-EBADF); 195 if (file->f_op != &eventfd_fops) { 196 fput(file); 197 return ERR_PTR(-EINVAL); 198 } 199 200 return file; 201 } 202 EXPORT_SYMBOL_GPL(eventfd_fget); 203 204 SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags) 205 { 206 int fd; 207 struct eventfd_ctx *ctx; 208 209 /* Check the EFD_* constants for consistency. */ 210 BUILD_BUG_ON(EFD_CLOEXEC != O_CLOEXEC); 211 BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK); 212 213 if (flags & ~EFD_FLAGS_SET) 214 return -EINVAL; 215 216 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); 217 if (!ctx) 218 return -ENOMEM; 219 220 init_waitqueue_head(&ctx->wqh); 221 ctx->count = count; 222 ctx->flags = flags; 223 224 /* 225 * When we call this, the initialization must be complete, since 226 * anon_inode_getfd() will install the fd. 227 */ 228 fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx, 229 flags & EFD_SHARED_FCNTL_FLAGS); 230 if (fd < 0) 231 kfree(ctx); 232 return fd; 233 } 234 235 SYSCALL_DEFINE1(eventfd, unsigned int, count) 236 { 237 return sys_eventfd2(count, 0); 238 } 239 240