1 /* SPDX-License-Identifier: MIT */ 2 3 #include <linux/io_uring.h> 4 #include <sys/mman.h> 5 #include <sys/syscall.h> 6 #include <stdio.h> 7 #include <string.h> 8 #include <unistd.h> 9 #include <sys/uio.h> 10 11 struct io_sq_ring { 12 unsigned int *head; 13 unsigned int *tail; 14 unsigned int *ring_mask; 15 unsigned int *ring_entries; 16 unsigned int *flags; 17 unsigned int *array; 18 }; 19 20 struct io_cq_ring { 21 unsigned int *head; 22 unsigned int *tail; 23 unsigned int *ring_mask; 24 unsigned int *ring_entries; 25 struct io_uring_cqe *cqes; 26 }; 27 28 struct io_uring_sq { 29 unsigned int *khead; 30 unsigned int *ktail; 31 unsigned int *kring_mask; 32 unsigned int *kring_entries; 33 unsigned int *kflags; 34 unsigned int *kdropped; 35 unsigned int *array; 36 struct io_uring_sqe *sqes; 37 38 unsigned int sqe_head; 39 unsigned int sqe_tail; 40 41 size_t ring_sz; 42 }; 43 44 struct io_uring_cq { 45 unsigned int *khead; 46 unsigned int *ktail; 47 unsigned int *kring_mask; 48 unsigned int *kring_entries; 49 unsigned int *koverflow; 50 struct io_uring_cqe *cqes; 51 52 size_t ring_sz; 53 }; 54 55 struct io_uring { 56 struct io_uring_sq sq; 57 struct io_uring_cq cq; 58 int ring_fd; 59 unsigned flags; 60 }; 61 62 #if defined(__x86_64) || defined(__i386__) 63 #define read_barrier() __asm__ __volatile__("":::"memory") 64 #define write_barrier() __asm__ __volatile__("":::"memory") 65 #else 66 #define read_barrier() __sync_synchronize() 67 #define write_barrier() __sync_synchronize() 68 #endif 69 70 static inline int io_uring_mmap(int fd, struct io_uring_params *p, 71 struct io_uring_sq *sq, struct io_uring_cq *cq) 72 { 73 size_t size; 74 void *ptr; 75 int ret; 76 77 if (p->flags & IORING_SETUP_NO_SQARRAY) { 78 sq->ring_sz = p->cq_off.cqes; 79 sq->ring_sz += p->cq_entries * sizeof(struct io_uring_cqe); 80 } else { 81 sq->ring_sz = p->sq_off.array; 82 sq->ring_sz += p->sq_entries * sizeof(unsigned int); 83 } 84 85 ptr = mmap(0, sq->ring_sz, PROT_READ | PROT_WRITE, 86 MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQ_RING); 87 if (ptr == MAP_FAILED) 88 return -errno; 89 sq->khead = ptr + p->sq_off.head; 90 sq->ktail = ptr + p->sq_off.tail; 91 sq->kring_mask = ptr + p->sq_off.ring_mask; 92 sq->kring_entries = ptr + p->sq_off.ring_entries; 93 sq->kflags = ptr + p->sq_off.flags; 94 sq->kdropped = ptr + p->sq_off.dropped; 95 if (!(p->flags & IORING_SETUP_NO_SQARRAY)) 96 sq->array = ptr + p->sq_off.array; 97 98 size = p->sq_entries * sizeof(struct io_uring_sqe); 99 sq->sqes = mmap(0, size, PROT_READ | PROT_WRITE, 100 MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQES); 101 if (sq->sqes == MAP_FAILED) { 102 ret = -errno; 103 err: 104 munmap(sq->khead, sq->ring_sz); 105 return ret; 106 } 107 108 cq->ring_sz = p->cq_off.cqes + p->cq_entries * sizeof(struct io_uring_cqe); 109 ptr = mmap(0, cq->ring_sz, PROT_READ | PROT_WRITE, 110 MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_CQ_RING); 111 if (ptr == MAP_FAILED) { 112 ret = -errno; 113 munmap(sq->sqes, p->sq_entries * sizeof(struct io_uring_sqe)); 114 goto err; 115 } 116 cq->khead = ptr + p->cq_off.head; 117 cq->ktail = ptr + p->cq_off.tail; 118 cq->kring_mask = ptr + p->cq_off.ring_mask; 119 cq->kring_entries = ptr + p->cq_off.ring_entries; 120 cq->koverflow = ptr + p->cq_off.overflow; 121 cq->cqes = ptr + p->cq_off.cqes; 122 return 0; 123 } 124 125 static inline int io_uring_setup(unsigned int entries, 126 struct io_uring_params *p) 127 { 128 return syscall(__NR_io_uring_setup, entries, p); 129 } 130 131 static inline int io_uring_enter(int fd, unsigned int to_submit, 132 unsigned int min_complete, 133 unsigned int flags, sigset_t *sig) 134 { 135 return syscall(__NR_io_uring_enter, fd, to_submit, min_complete, 136 flags, sig, _NSIG / 8); 137 } 138 139 static inline int io_uring_queue_init_params(unsigned int entries, 140 struct io_uring *ring, 141 struct io_uring_params *p) 142 { 143 int fd, ret; 144 145 memset(ring, 0, sizeof(*ring)); 146 147 fd = io_uring_setup(entries, p); 148 if (fd < 0) 149 return fd; 150 ret = io_uring_mmap(fd, p, &ring->sq, &ring->cq); 151 if (!ret) { 152 ring->ring_fd = fd; 153 ring->flags = p->flags; 154 } else { 155 close(fd); 156 } 157 return ret; 158 } 159 160 static inline int io_uring_queue_init(unsigned int entries, 161 struct io_uring *ring, 162 unsigned int flags) 163 { 164 struct io_uring_params p; 165 166 memset(&p, 0, sizeof(p)); 167 p.flags = flags; 168 169 return io_uring_queue_init_params(entries, ring, &p); 170 } 171 172 /* Get a sqe */ 173 static inline struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring) 174 { 175 struct io_uring_sq *sq = &ring->sq; 176 177 if (sq->sqe_tail + 1 - sq->sqe_head > *sq->kring_entries) 178 return NULL; 179 return &sq->sqes[sq->sqe_tail++ & *sq->kring_mask]; 180 } 181 182 static inline int io_uring_wait_cqe(struct io_uring *ring, 183 struct io_uring_cqe **cqe_ptr) 184 { 185 struct io_uring_cq *cq = &ring->cq; 186 const unsigned int mask = *cq->kring_mask; 187 unsigned int head = *cq->khead; 188 int ret; 189 190 *cqe_ptr = NULL; 191 do { 192 read_barrier(); 193 if (head != *cq->ktail) { 194 *cqe_ptr = &cq->cqes[head & mask]; 195 break; 196 } 197 ret = io_uring_enter(ring->ring_fd, 0, 1, 198 IORING_ENTER_GETEVENTS, NULL); 199 if (ret < 0) 200 return -errno; 201 } while (1); 202 203 return 0; 204 } 205 206 static inline int io_uring_submit(struct io_uring *ring) 207 { 208 struct io_uring_sq *sq = &ring->sq; 209 const unsigned int mask = *sq->kring_mask; 210 unsigned int ktail, submitted, to_submit; 211 int ret; 212 213 read_barrier(); 214 if (*sq->khead != *sq->ktail) { 215 submitted = *sq->kring_entries; 216 goto submit; 217 } 218 if (sq->sqe_head == sq->sqe_tail) 219 return 0; 220 221 ktail = *sq->ktail; 222 to_submit = sq->sqe_tail - sq->sqe_head; 223 224 if (!(ring->flags & IORING_SETUP_NO_SQARRAY)) { 225 for (submitted = 0; submitted < to_submit; submitted++) { 226 read_barrier(); 227 sq->array[ktail++ & mask] = sq->sqe_head++ & mask; 228 } 229 } else { 230 ktail += to_submit; 231 sq->sqe_head += to_submit; 232 submitted = to_submit; 233 } 234 235 if (!submitted) 236 return 0; 237 238 if (*sq->ktail != ktail) { 239 write_barrier(); 240 *sq->ktail = ktail; 241 write_barrier(); 242 } 243 submit: 244 ret = io_uring_enter(ring->ring_fd, submitted, 0, 245 IORING_ENTER_GETEVENTS, NULL); 246 return ret < 0 ? -errno : ret; 247 } 248 249 static inline void io_uring_queue_exit(struct io_uring *ring) 250 { 251 struct io_uring_sq *sq = &ring->sq; 252 253 munmap(sq->sqes, *sq->kring_entries * sizeof(struct io_uring_sqe)); 254 munmap(sq->khead, sq->ring_sz); 255 close(ring->ring_fd); 256 } 257 258 /* Prepare and send the SQE */ 259 static inline void io_uring_prep_cmd(struct io_uring_sqe *sqe, int op, 260 int sockfd, 261 int level, int optname, 262 const void *optval, 263 int optlen) 264 { 265 memset(sqe, 0, sizeof(*sqe)); 266 sqe->opcode = (__u8)IORING_OP_URING_CMD; 267 sqe->fd = sockfd; 268 sqe->cmd_op = op; 269 270 sqe->level = level; 271 sqe->optname = optname; 272 sqe->optval = (unsigned long long)optval; 273 sqe->optlen = optlen; 274 } 275 276 static inline int io_uring_register_buffers(struct io_uring *ring, 277 const struct iovec *iovecs, 278 unsigned int nr_iovecs) 279 { 280 int ret; 281 282 ret = syscall(__NR_io_uring_register, ring->ring_fd, 283 IORING_REGISTER_BUFFERS, iovecs, nr_iovecs); 284 return (ret < 0) ? -errno : ret; 285 } 286 287 static inline void io_uring_prep_send(struct io_uring_sqe *sqe, int sockfd, 288 const void *buf, size_t len, int flags) 289 { 290 memset(sqe, 0, sizeof(*sqe)); 291 sqe->opcode = (__u8)IORING_OP_SEND; 292 sqe->fd = sockfd; 293 sqe->addr = (unsigned long)buf; 294 sqe->len = len; 295 sqe->msg_flags = (__u32)flags; 296 } 297 298 static inline void io_uring_prep_sendzc(struct io_uring_sqe *sqe, int sockfd, 299 const void *buf, size_t len, int flags, 300 unsigned int zc_flags) 301 { 302 io_uring_prep_send(sqe, sockfd, buf, len, flags); 303 sqe->opcode = (__u8)IORING_OP_SEND_ZC; 304 sqe->ioprio = zc_flags; 305 } 306 307 static inline void io_uring_cqe_seen(struct io_uring *ring) 308 { 309 *(&ring->cq)->khead += 1; 310 write_barrier(); 311 } 312