1 /* SPDX-License-Identifier: MIT */ 2 3 #include <linux/io_uring.h> 4 #include <sys/mman.h> 5 #include <sys/syscall.h> 6 #include <stdio.h> 7 #include <string.h> 8 #include <unistd.h> 9 10 struct io_sq_ring { 11 unsigned int *head; 12 unsigned int *tail; 13 unsigned int *ring_mask; 14 unsigned int *ring_entries; 15 unsigned int *flags; 16 unsigned int *array; 17 }; 18 19 struct io_cq_ring { 20 unsigned int *head; 21 unsigned int *tail; 22 unsigned int *ring_mask; 23 unsigned int *ring_entries; 24 struct io_uring_cqe *cqes; 25 }; 26 27 struct io_uring_sq { 28 unsigned int *khead; 29 unsigned int *ktail; 30 unsigned int *kring_mask; 31 unsigned int *kring_entries; 32 unsigned int *kflags; 33 unsigned int *kdropped; 34 unsigned int *array; 35 struct io_uring_sqe *sqes; 36 37 unsigned int sqe_head; 38 unsigned int sqe_tail; 39 40 size_t ring_sz; 41 }; 42 43 struct io_uring_cq { 44 unsigned int *khead; 45 unsigned int *ktail; 46 unsigned int *kring_mask; 47 unsigned int *kring_entries; 48 unsigned int *koverflow; 49 struct io_uring_cqe *cqes; 50 51 size_t ring_sz; 52 }; 53 54 struct io_uring { 55 struct io_uring_sq sq; 56 struct io_uring_cq cq; 57 int ring_fd; 58 }; 59 60 #if defined(__x86_64) || defined(__i386__) 61 #define read_barrier() __asm__ __volatile__("":::"memory") 62 #define write_barrier() __asm__ __volatile__("":::"memory") 63 #else 64 #define read_barrier() __sync_synchronize() 65 #define write_barrier() __sync_synchronize() 66 #endif 67 68 static inline int io_uring_mmap(int fd, struct io_uring_params *p, 69 struct io_uring_sq *sq, struct io_uring_cq *cq) 70 { 71 size_t size; 72 void *ptr; 73 int ret; 74 75 sq->ring_sz = p->sq_off.array + p->sq_entries * sizeof(unsigned int); 76 ptr = mmap(0, sq->ring_sz, PROT_READ | PROT_WRITE, 77 MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQ_RING); 78 if (ptr == MAP_FAILED) 79 return -errno; 80 sq->khead = ptr + p->sq_off.head; 81 sq->ktail = ptr + p->sq_off.tail; 82 sq->kring_mask = ptr + p->sq_off.ring_mask; 83 sq->kring_entries = ptr + p->sq_off.ring_entries; 84 sq->kflags = ptr + p->sq_off.flags; 85 sq->kdropped = ptr + p->sq_off.dropped; 86 sq->array = ptr + p->sq_off.array; 87 88 size = p->sq_entries * sizeof(struct io_uring_sqe); 89 sq->sqes = mmap(0, size, PROT_READ | PROT_WRITE, 90 MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQES); 91 if (sq->sqes == MAP_FAILED) { 92 ret = -errno; 93 err: 94 munmap(sq->khead, sq->ring_sz); 95 return ret; 96 } 97 98 cq->ring_sz = p->cq_off.cqes + p->cq_entries * sizeof(struct io_uring_cqe); 99 ptr = mmap(0, cq->ring_sz, PROT_READ | PROT_WRITE, 100 MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_CQ_RING); 101 if (ptr == MAP_FAILED) { 102 ret = -errno; 103 munmap(sq->sqes, p->sq_entries * sizeof(struct io_uring_sqe)); 104 goto err; 105 } 106 cq->khead = ptr + p->cq_off.head; 107 cq->ktail = ptr + p->cq_off.tail; 108 cq->kring_mask = ptr + p->cq_off.ring_mask; 109 cq->kring_entries = ptr + p->cq_off.ring_entries; 110 cq->koverflow = ptr + p->cq_off.overflow; 111 cq->cqes = ptr + p->cq_off.cqes; 112 return 0; 113 } 114 115 static inline int io_uring_setup(unsigned int entries, 116 struct io_uring_params *p) 117 { 118 return syscall(__NR_io_uring_setup, entries, p); 119 } 120 121 static inline int io_uring_enter(int fd, unsigned int to_submit, 122 unsigned int min_complete, 123 unsigned int flags, sigset_t *sig) 124 { 125 return syscall(__NR_io_uring_enter, fd, to_submit, min_complete, 126 flags, sig, _NSIG / 8); 127 } 128 129 static inline int io_uring_queue_init(unsigned int entries, 130 struct io_uring *ring, 131 unsigned int flags) 132 { 133 struct io_uring_params p; 134 int fd, ret; 135 136 memset(ring, 0, sizeof(*ring)); 137 memset(&p, 0, sizeof(p)); 138 p.flags = flags; 139 140 fd = io_uring_setup(entries, &p); 141 if (fd < 0) 142 return fd; 143 ret = io_uring_mmap(fd, &p, &ring->sq, &ring->cq); 144 if (!ret) 145 ring->ring_fd = fd; 146 else 147 close(fd); 148 return ret; 149 } 150 151 /* Get a sqe */ 152 static inline struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring) 153 { 154 struct io_uring_sq *sq = &ring->sq; 155 156 if (sq->sqe_tail + 1 - sq->sqe_head > *sq->kring_entries) 157 return NULL; 158 return &sq->sqes[sq->sqe_tail++ & *sq->kring_mask]; 159 } 160 161 static inline int io_uring_wait_cqe(struct io_uring *ring, 162 struct io_uring_cqe **cqe_ptr) 163 { 164 struct io_uring_cq *cq = &ring->cq; 165 const unsigned int mask = *cq->kring_mask; 166 unsigned int head = *cq->khead; 167 int ret; 168 169 *cqe_ptr = NULL; 170 do { 171 read_barrier(); 172 if (head != *cq->ktail) { 173 *cqe_ptr = &cq->cqes[head & mask]; 174 break; 175 } 176 ret = io_uring_enter(ring->ring_fd, 0, 1, 177 IORING_ENTER_GETEVENTS, NULL); 178 if (ret < 0) 179 return -errno; 180 } while (1); 181 182 return 0; 183 } 184 185 static inline int io_uring_submit(struct io_uring *ring) 186 { 187 struct io_uring_sq *sq = &ring->sq; 188 const unsigned int mask = *sq->kring_mask; 189 unsigned int ktail, submitted, to_submit; 190 int ret; 191 192 read_barrier(); 193 if (*sq->khead != *sq->ktail) { 194 submitted = *sq->kring_entries; 195 goto submit; 196 } 197 if (sq->sqe_head == sq->sqe_tail) 198 return 0; 199 200 ktail = *sq->ktail; 201 to_submit = sq->sqe_tail - sq->sqe_head; 202 for (submitted = 0; submitted < to_submit; submitted++) { 203 read_barrier(); 204 sq->array[ktail++ & mask] = sq->sqe_head++ & mask; 205 } 206 if (!submitted) 207 return 0; 208 209 if (*sq->ktail != ktail) { 210 write_barrier(); 211 *sq->ktail = ktail; 212 write_barrier(); 213 } 214 submit: 215 ret = io_uring_enter(ring->ring_fd, submitted, 0, 216 IORING_ENTER_GETEVENTS, NULL); 217 return ret < 0 ? -errno : ret; 218 } 219 220 static inline void io_uring_queue_exit(struct io_uring *ring) 221 { 222 struct io_uring_sq *sq = &ring->sq; 223 224 munmap(sq->sqes, *sq->kring_entries * sizeof(struct io_uring_sqe)); 225 munmap(sq->khead, sq->ring_sz); 226 close(ring->ring_fd); 227 } 228 229 /* Prepare and send the SQE */ 230 static inline void io_uring_prep_cmd(struct io_uring_sqe *sqe, int op, 231 int sockfd, 232 int level, int optname, 233 const void *optval, 234 int optlen) 235 { 236 memset(sqe, 0, sizeof(*sqe)); 237 sqe->opcode = (__u8)IORING_OP_URING_CMD; 238 sqe->fd = sockfd; 239 sqe->cmd_op = op; 240 241 sqe->level = level; 242 sqe->optname = optname; 243 sqe->optval = (unsigned long long)optval; 244 sqe->optlen = optlen; 245 } 246 247 static inline int io_uring_register_buffers(struct io_uring *ring, 248 const struct iovec *iovecs, 249 unsigned int nr_iovecs) 250 { 251 int ret; 252 253 ret = syscall(__NR_io_uring_register, ring->ring_fd, 254 IORING_REGISTER_BUFFERS, iovecs, nr_iovecs); 255 return (ret < 0) ? -errno : ret; 256 } 257 258 static inline void io_uring_prep_send(struct io_uring_sqe *sqe, int sockfd, 259 const void *buf, size_t len, int flags) 260 { 261 memset(sqe, 0, sizeof(*sqe)); 262 sqe->opcode = (__u8)IORING_OP_SEND; 263 sqe->fd = sockfd; 264 sqe->addr = (unsigned long)buf; 265 sqe->len = len; 266 sqe->msg_flags = (__u32)flags; 267 } 268 269 static inline void io_uring_prep_sendzc(struct io_uring_sqe *sqe, int sockfd, 270 const void *buf, size_t len, int flags, 271 unsigned int zc_flags) 272 { 273 io_uring_prep_send(sqe, sockfd, buf, len, flags); 274 sqe->opcode = (__u8)IORING_OP_SEND_ZC; 275 sqe->ioprio = zc_flags; 276 } 277 278 static inline void io_uring_cqe_seen(struct io_uring *ring) 279 { 280 *(&ring->cq)->khead += 1; 281 write_barrier(); 282 } 283