1 /*- 2 * Copyright (c) 2016 Matt Macy (mmacy@nextbsd.org) 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include <sys/types.h> 31 #include <sys/systm.h> 32 #include <sys/malloc.h> 33 #include <sys/kernel.h> 34 #include <sys/lock.h> 35 #include <sys/mutex.h> 36 #include <sys/proc.h> 37 #include <sys/sched.h> 38 #include <sys/smp.h> 39 #include <sys/queue.h> 40 #include <sys/taskqueue.h> 41 42 #include <ck_epoch.h> 43 44 #include <linux/rcupdate.h> 45 #include <linux/srcu.h> 46 #include <linux/slab.h> 47 #include <linux/kernel.h> 48 49 struct callback_head; 50 struct writer_epoch_record { 51 ck_epoch_record_t epoch_record; 52 struct mtx head_lock; 53 struct mtx sync_lock; 54 struct task task; 55 STAILQ_HEAD(, callback_head) head; 56 } __aligned(CACHE_LINE_SIZE); 57 58 struct callback_head { 59 STAILQ_ENTRY(callback_head) entry; 60 rcu_callback_t func; 61 }; 62 63 struct srcu_epoch_record { 64 ck_epoch_record_t epoch_record; 65 struct mtx read_lock; 66 struct mtx sync_lock; 67 }; 68 69 /* 70 * Verify that "struct rcu_head" is big enough to hold "struct 71 * callback_head". This has been done to avoid having to add special 72 * compile flags for including ck_epoch.h to all clients of the 73 * LinuxKPI. 74 */ 75 CTASSERT(sizeof(struct rcu_head) >= sizeof(struct callback_head)); 76 77 /* 78 * Verify that "epoch_record" is at beginning of "struct 79 * writer_epoch_record": 80 */ 81 CTASSERT(offsetof(struct writer_epoch_record, epoch_record) == 0); 82 83 /* 84 * Verify that "epoch_record" is at beginning of "struct 85 * srcu_epoch_record": 86 */ 87 CTASSERT(offsetof(struct srcu_epoch_record, epoch_record) == 0); 88 89 static ck_epoch_t linux_epoch; 90 static MALLOC_DEFINE(M_LRCU, "lrcu", "Linux RCU"); 91 static DPCPU_DEFINE(ck_epoch_record_t *, linux_reader_epoch_record); 92 static DPCPU_DEFINE(struct writer_epoch_record *, linux_writer_epoch_record); 93 94 static void linux_rcu_cleaner_func(void *, int); 95 96 static void 97 linux_rcu_runtime_init(void *arg __unused) 98 { 99 int i; 100 101 ck_epoch_init(&linux_epoch); 102 103 /* setup reader records */ 104 CPU_FOREACH(i) { 105 ck_epoch_record_t *record; 106 107 record = malloc(sizeof(*record), M_LRCU, M_WAITOK | M_ZERO); 108 ck_epoch_register(&linux_epoch, record); 109 110 DPCPU_ID_SET(i, linux_reader_epoch_record, record); 111 } 112 113 /* setup writer records */ 114 CPU_FOREACH(i) { 115 struct writer_epoch_record *record; 116 117 record = malloc(sizeof(*record), M_LRCU, M_WAITOK | M_ZERO); 118 119 ck_epoch_register(&linux_epoch, &record->epoch_record); 120 mtx_init(&record->head_lock, "LRCU-HEAD", NULL, MTX_DEF); 121 mtx_init(&record->sync_lock, "LRCU-SYNC", NULL, MTX_DEF); 122 TASK_INIT(&record->task, 0, linux_rcu_cleaner_func, record); 123 STAILQ_INIT(&record->head); 124 125 DPCPU_ID_SET(i, linux_writer_epoch_record, record); 126 } 127 } 128 SYSINIT(linux_rcu_runtime, SI_SUB_LOCK, SI_ORDER_SECOND, linux_rcu_runtime_init, NULL); 129 130 static void 131 linux_rcu_runtime_uninit(void *arg __unused) 132 { 133 ck_stack_entry_t *cursor; 134 ck_stack_entry_t *next; 135 int i; 136 137 /* make sure all callbacks have been called */ 138 linux_rcu_barrier(); 139 140 /* destroy all writer record mutexes */ 141 CPU_FOREACH(i) { 142 struct writer_epoch_record *record; 143 144 record = DPCPU_ID_GET(i, linux_writer_epoch_record); 145 146 mtx_destroy(&record->head_lock); 147 mtx_destroy(&record->sync_lock); 148 } 149 150 /* free all registered reader and writer records */ 151 CK_STACK_FOREACH_SAFE(&linux_epoch.records, cursor, next) { 152 ck_epoch_record_t *record; 153 154 record = container_of(cursor, 155 struct ck_epoch_record, record_next); 156 free(record, M_LRCU); 157 } 158 } 159 SYSUNINIT(linux_rcu_runtime, SI_SUB_LOCK, SI_ORDER_SECOND, linux_rcu_runtime_uninit, NULL); 160 161 static inline struct srcu_epoch_record * 162 linux_srcu_get_record(void) 163 { 164 struct srcu_epoch_record *record; 165 166 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, 167 "linux_srcu_get_record() might sleep"); 168 169 /* 170 * NOTE: The only records that are unregistered and can be 171 * recycled are srcu_epoch_records. 172 */ 173 record = (struct srcu_epoch_record *)ck_epoch_recycle(&linux_epoch); 174 if (__predict_true(record != NULL)) 175 return (record); 176 177 record = malloc(sizeof(*record), M_LRCU, M_WAITOK | M_ZERO); 178 mtx_init(&record->read_lock, "SRCU-READ", NULL, MTX_DEF | MTX_NOWITNESS); 179 mtx_init(&record->sync_lock, "SRCU-SYNC", NULL, MTX_DEF | MTX_NOWITNESS); 180 ck_epoch_register(&linux_epoch, &record->epoch_record); 181 182 return (record); 183 } 184 185 static inline void 186 linux_rcu_synchronize_sub(struct writer_epoch_record *record) 187 { 188 189 /* protect access to epoch_record */ 190 mtx_lock(&record->sync_lock); 191 ck_epoch_synchronize(&record->epoch_record); 192 mtx_unlock(&record->sync_lock); 193 } 194 195 static void 196 linux_rcu_cleaner_func(void *context, int pending __unused) 197 { 198 struct writer_epoch_record *record; 199 struct callback_head *rcu; 200 STAILQ_HEAD(, callback_head) head; 201 202 record = context; 203 204 /* move current callbacks into own queue */ 205 mtx_lock(&record->head_lock); 206 STAILQ_INIT(&head); 207 STAILQ_CONCAT(&head, &record->head); 208 mtx_unlock(&record->head_lock); 209 210 /* synchronize */ 211 linux_rcu_synchronize_sub(record); 212 213 /* dispatch all callbacks, if any */ 214 while ((rcu = STAILQ_FIRST(&head)) != NULL) { 215 uintptr_t offset; 216 217 STAILQ_REMOVE_HEAD(&head, entry); 218 219 offset = (uintptr_t)rcu->func; 220 221 if (offset < LINUX_KFREE_RCU_OFFSET_MAX) 222 kfree((char *)rcu - offset); 223 else 224 rcu->func((struct rcu_head *)rcu); 225 } 226 } 227 228 void 229 linux_rcu_read_lock(void) 230 { 231 ck_epoch_record_t *record; 232 233 /* 234 * Pin thread to current CPU so that the unlock code gets the 235 * same per-CPU reader epoch record: 236 */ 237 sched_pin(); 238 239 record = DPCPU_GET(linux_reader_epoch_record); 240 241 /* 242 * Use a critical section to prevent recursion inside 243 * ck_epoch_begin(). Else this function supports recursion. 244 */ 245 critical_enter(); 246 ck_epoch_begin(record, NULL); 247 critical_exit(); 248 } 249 250 void 251 linux_rcu_read_unlock(void) 252 { 253 ck_epoch_record_t *record; 254 255 record = DPCPU_GET(linux_reader_epoch_record); 256 257 /* 258 * Use a critical section to prevent recursion inside 259 * ck_epoch_end(). Else this function supports recursion. 260 */ 261 critical_enter(); 262 ck_epoch_end(record, NULL); 263 critical_exit(); 264 265 sched_unpin(); 266 } 267 268 void 269 linux_synchronize_rcu(void) 270 { 271 linux_rcu_synchronize_sub(DPCPU_GET(linux_writer_epoch_record)); 272 } 273 274 void 275 linux_rcu_barrier(void) 276 { 277 int i; 278 279 CPU_FOREACH(i) { 280 struct writer_epoch_record *record; 281 282 record = DPCPU_ID_GET(i, linux_writer_epoch_record); 283 284 linux_rcu_synchronize_sub(record); 285 286 /* wait for callbacks to complete */ 287 taskqueue_drain(taskqueue_fast, &record->task); 288 } 289 } 290 291 void 292 linux_call_rcu(struct rcu_head *context, rcu_callback_t func) 293 { 294 struct callback_head *rcu = (struct callback_head *)context; 295 struct writer_epoch_record *record; 296 297 record = DPCPU_GET(linux_writer_epoch_record); 298 299 mtx_lock(&record->head_lock); 300 rcu->func = func; 301 STAILQ_INSERT_TAIL(&record->head, rcu, entry); 302 taskqueue_enqueue(taskqueue_fast, &record->task); 303 mtx_unlock(&record->head_lock); 304 } 305 306 int 307 init_srcu_struct(struct srcu_struct *srcu) 308 { 309 struct srcu_epoch_record *record; 310 311 record = linux_srcu_get_record(); 312 srcu->ss_epoch_record = record; 313 return (0); 314 } 315 316 void 317 cleanup_srcu_struct(struct srcu_struct *srcu) 318 { 319 struct srcu_epoch_record *record; 320 321 record = srcu->ss_epoch_record; 322 srcu->ss_epoch_record = NULL; 323 324 ck_epoch_unregister(&record->epoch_record); 325 } 326 327 int 328 srcu_read_lock(struct srcu_struct *srcu) 329 { 330 struct srcu_epoch_record *record; 331 332 record = srcu->ss_epoch_record; 333 334 mtx_lock(&record->read_lock); 335 ck_epoch_begin(&record->epoch_record, NULL); 336 mtx_unlock(&record->read_lock); 337 338 return (0); 339 } 340 341 void 342 srcu_read_unlock(struct srcu_struct *srcu, int key __unused) 343 { 344 struct srcu_epoch_record *record; 345 346 record = srcu->ss_epoch_record; 347 348 mtx_lock(&record->read_lock); 349 ck_epoch_end(&record->epoch_record, NULL); 350 mtx_unlock(&record->read_lock); 351 } 352 353 void 354 synchronize_srcu(struct srcu_struct *srcu) 355 { 356 struct srcu_epoch_record *record; 357 358 record = srcu->ss_epoch_record; 359 360 mtx_lock(&record->sync_lock); 361 ck_epoch_synchronize(&record->epoch_record); 362 mtx_unlock(&record->sync_lock); 363 } 364 365 void 366 srcu_barrier(struct srcu_struct *srcu) 367 { 368 struct srcu_epoch_record *record; 369 370 record = srcu->ss_epoch_record; 371 372 mtx_lock(&record->sync_lock); 373 ck_epoch_barrier(&record->epoch_record); 374 mtx_unlock(&record->sync_lock); 375 } 376