1 /* Copyright (c) 2012 Coraid, Inc. See COPYING for GPL terms. */ 2 /* 3 * aoedev.c 4 * AoE device utility functions; maintains device list. 5 */ 6 7 #include <linux/hdreg.h> 8 #include <linux/blkdev.h> 9 #include <linux/netdevice.h> 10 #include <linux/delay.h> 11 #include <linux/slab.h> 12 #include <linux/bitmap.h> 13 #include <linux/kdev_t.h> 14 #include <linux/moduleparam.h> 15 #include "aoe.h" 16 17 static void dummy_timer(ulong); 18 static void aoedev_freedev(struct aoedev *); 19 static void freetgt(struct aoedev *d, struct aoetgt *t); 20 static void skbpoolfree(struct aoedev *d); 21 22 static int aoe_dyndevs = 1; 23 module_param(aoe_dyndevs, int, 0644); 24 MODULE_PARM_DESC(aoe_dyndevs, "Use dynamic minor numbers for devices."); 25 26 static struct aoedev *devlist; 27 static DEFINE_SPINLOCK(devlist_lock); 28 29 /* Because some systems will have one, many, or no 30 * - partitions, 31 * - slots per shelf, 32 * - or shelves, 33 * we need some flexibility in the way the minor numbers 34 * are allocated. So they are dynamic. 35 */ 36 #define N_DEVS ((1U<<MINORBITS)/AOE_PARTITIONS) 37 38 static DEFINE_SPINLOCK(used_minors_lock); 39 static DECLARE_BITMAP(used_minors, N_DEVS); 40 41 static int 42 minor_get_dyn(ulong *sysminor) 43 { 44 ulong flags; 45 ulong n; 46 int error = 0; 47 48 spin_lock_irqsave(&used_minors_lock, flags); 49 n = find_first_zero_bit(used_minors, N_DEVS); 50 if (n < N_DEVS) 51 set_bit(n, used_minors); 52 else 53 error = -1; 54 spin_unlock_irqrestore(&used_minors_lock, flags); 55 56 *sysminor = n * AOE_PARTITIONS; 57 return error; 58 } 59 60 static int 61 minor_get_static(ulong *sysminor, ulong aoemaj, int aoemin) 62 { 63 ulong flags; 64 ulong n; 65 int error = 0; 66 enum { 67 /* for backwards compatibility when !aoe_dyndevs, 68 * a static number of supported slots per shelf */ 69 NPERSHELF = 16, 70 }; 71 72 n = aoemaj * NPERSHELF + aoemin; 73 if (aoemin >= NPERSHELF || n >= N_DEVS) { 74 pr_err("aoe: %s with e%ld.%d\n", 75 "cannot use static minor device numbers", 76 aoemaj, aoemin); 77 error = -1; 78 } else { 79 spin_lock_irqsave(&used_minors_lock, flags); 80 if (test_bit(n, used_minors)) { 81 pr_err("aoe: %s %lu\n", 82 "existing device already has static minor number", 83 n); 84 error = -1; 85 } else 86 set_bit(n, used_minors); 87 spin_unlock_irqrestore(&used_minors_lock, flags); 88 } 89 90 *sysminor = n; 91 return error; 92 } 93 94 static int 95 minor_get(ulong *sysminor, ulong aoemaj, int aoemin) 96 { 97 if (aoe_dyndevs) 98 return minor_get_dyn(sysminor); 99 else 100 return minor_get_static(sysminor, aoemaj, aoemin); 101 } 102 103 static void 104 minor_free(ulong minor) 105 { 106 ulong flags; 107 108 minor /= AOE_PARTITIONS; 109 BUG_ON(minor >= N_DEVS); 110 111 spin_lock_irqsave(&used_minors_lock, flags); 112 BUG_ON(!test_bit(minor, used_minors)); 113 clear_bit(minor, used_minors); 114 spin_unlock_irqrestore(&used_minors_lock, flags); 115 } 116 117 /* 118 * Users who grab a pointer to the device with aoedev_by_aoeaddr 119 * automatically get a reference count and must be responsible 120 * for performing a aoedev_put. With the addition of async 121 * kthread processing I'm no longer confident that we can 122 * guarantee consistency in the face of device flushes. 123 * 124 * For the time being, we only bother to add extra references for 125 * frames sitting on the iocq. When the kthreads finish processing 126 * these frames, they will aoedev_put the device. 127 */ 128 129 void 130 aoedev_put(struct aoedev *d) 131 { 132 ulong flags; 133 134 spin_lock_irqsave(&devlist_lock, flags); 135 d->ref--; 136 spin_unlock_irqrestore(&devlist_lock, flags); 137 } 138 139 static void 140 dummy_timer(ulong vp) 141 { 142 struct aoedev *d; 143 144 d = (struct aoedev *)vp; 145 if (d->flags & DEVFL_TKILL) 146 return; 147 d->timer.expires = jiffies + HZ; 148 add_timer(&d->timer); 149 } 150 151 static void 152 aoe_failip(struct aoedev *d) 153 { 154 struct request *rq; 155 struct bio *bio; 156 unsigned long n; 157 158 aoe_failbuf(d, d->ip.buf); 159 160 rq = d->ip.rq; 161 if (rq == NULL) 162 return; 163 while ((bio = d->ip.nxbio)) { 164 clear_bit(BIO_UPTODATE, &bio->bi_flags); 165 d->ip.nxbio = bio->bi_next; 166 n = (unsigned long) rq->special; 167 rq->special = (void *) --n; 168 } 169 if ((unsigned long) rq->special == 0) 170 aoe_end_request(d, rq, 0); 171 } 172 173 void 174 aoedev_downdev(struct aoedev *d) 175 { 176 struct aoetgt *t, **tt, **te; 177 struct frame *f; 178 struct list_head *head, *pos, *nx; 179 struct request *rq; 180 int i; 181 182 d->flags &= ~DEVFL_UP; 183 184 /* clean out active buffers */ 185 for (i = 0; i < NFACTIVE; i++) { 186 head = &d->factive[i]; 187 list_for_each_safe(pos, nx, head) { 188 f = list_entry(pos, struct frame, head); 189 list_del(pos); 190 if (f->buf) { 191 f->buf->nframesout--; 192 aoe_failbuf(d, f->buf); 193 } 194 aoe_freetframe(f); 195 } 196 } 197 /* reset window dressings */ 198 tt = d->targets; 199 te = tt + NTARGETS; 200 for (; tt < te && (t = *tt); tt++) { 201 t->maxout = t->nframes; 202 t->nout = 0; 203 } 204 205 /* clean out the in-process request (if any) */ 206 aoe_failip(d); 207 d->htgt = NULL; 208 209 /* fast fail all pending I/O */ 210 if (d->blkq) { 211 while ((rq = blk_peek_request(d->blkq))) { 212 blk_start_request(rq); 213 aoe_end_request(d, rq, 1); 214 } 215 } 216 217 if (d->gd) 218 set_capacity(d->gd, 0); 219 } 220 221 static void 222 aoedev_freedev(struct aoedev *d) 223 { 224 struct aoetgt **t, **e; 225 226 cancel_work_sync(&d->work); 227 if (d->gd) { 228 aoedisk_rm_sysfs(d); 229 del_gendisk(d->gd); 230 put_disk(d->gd); 231 blk_cleanup_queue(d->blkq); 232 } 233 t = d->targets; 234 e = t + NTARGETS; 235 for (; t < e && *t; t++) 236 freetgt(d, *t); 237 if (d->bufpool) 238 mempool_destroy(d->bufpool); 239 skbpoolfree(d); 240 minor_free(d->sysminor); 241 kfree(d); 242 } 243 244 int 245 aoedev_flush(const char __user *str, size_t cnt) 246 { 247 ulong flags; 248 struct aoedev *d, **dd; 249 struct aoedev *rmd = NULL; 250 char buf[16]; 251 int all = 0; 252 253 if (cnt >= 3) { 254 if (cnt > sizeof buf) 255 cnt = sizeof buf; 256 if (copy_from_user(buf, str, cnt)) 257 return -EFAULT; 258 all = !strncmp(buf, "all", 3); 259 } 260 261 spin_lock_irqsave(&devlist_lock, flags); 262 dd = &devlist; 263 while ((d = *dd)) { 264 spin_lock(&d->lock); 265 if ((!all && (d->flags & DEVFL_UP)) 266 || (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE)) 267 || d->nopen 268 || d->ref) { 269 spin_unlock(&d->lock); 270 dd = &d->next; 271 continue; 272 } 273 *dd = d->next; 274 aoedev_downdev(d); 275 d->flags |= DEVFL_TKILL; 276 spin_unlock(&d->lock); 277 d->next = rmd; 278 rmd = d; 279 } 280 spin_unlock_irqrestore(&devlist_lock, flags); 281 while ((d = rmd)) { 282 rmd = d->next; 283 del_timer_sync(&d->timer); 284 aoedev_freedev(d); /* must be able to sleep */ 285 } 286 return 0; 287 } 288 289 /* This has been confirmed to occur once with Tms=3*1000 due to the 290 * driver changing link and not processing its transmit ring. The 291 * problem is hard enough to solve by returning an error that I'm 292 * still punting on "solving" this. 293 */ 294 static void 295 skbfree(struct sk_buff *skb) 296 { 297 enum { Sms = 250, Tms = 30 * 1000}; 298 int i = Tms / Sms; 299 300 if (skb == NULL) 301 return; 302 while (atomic_read(&skb_shinfo(skb)->dataref) != 1 && i-- > 0) 303 msleep(Sms); 304 if (i < 0) { 305 printk(KERN_ERR 306 "aoe: %s holds ref: %s\n", 307 skb->dev ? skb->dev->name : "netif", 308 "cannot free skb -- memory leaked."); 309 return; 310 } 311 skb->truesize -= skb->data_len; 312 skb_shinfo(skb)->nr_frags = skb->data_len = 0; 313 skb_trim(skb, 0); 314 dev_kfree_skb(skb); 315 } 316 317 static void 318 skbpoolfree(struct aoedev *d) 319 { 320 struct sk_buff *skb, *tmp; 321 322 skb_queue_walk_safe(&d->skbpool, skb, tmp) 323 skbfree(skb); 324 325 __skb_queue_head_init(&d->skbpool); 326 } 327 328 /* find it or allocate it */ 329 struct aoedev * 330 aoedev_by_aoeaddr(ulong maj, int min, int do_alloc) 331 { 332 struct aoedev *d; 333 int i; 334 ulong flags; 335 ulong sysminor; 336 337 spin_lock_irqsave(&devlist_lock, flags); 338 339 for (d=devlist; d; d=d->next) 340 if (d->aoemajor == maj && d->aoeminor == min) { 341 d->ref++; 342 break; 343 } 344 if (d || !do_alloc || minor_get(&sysminor, maj, min) < 0) 345 goto out; 346 d = kcalloc(1, sizeof *d, GFP_ATOMIC); 347 if (!d) 348 goto out; 349 INIT_WORK(&d->work, aoecmd_sleepwork); 350 spin_lock_init(&d->lock); 351 skb_queue_head_init(&d->skbpool); 352 init_timer(&d->timer); 353 d->timer.data = (ulong) d; 354 d->timer.function = dummy_timer; 355 d->timer.expires = jiffies + HZ; 356 add_timer(&d->timer); 357 d->bufpool = NULL; /* defer to aoeblk_gdalloc */ 358 d->tgt = d->targets; 359 d->ref = 1; 360 for (i = 0; i < NFACTIVE; i++) 361 INIT_LIST_HEAD(&d->factive[i]); 362 d->sysminor = sysminor; 363 d->aoemajor = maj; 364 d->aoeminor = min; 365 d->mintimer = MINTIMER; 366 d->next = devlist; 367 devlist = d; 368 out: 369 spin_unlock_irqrestore(&devlist_lock, flags); 370 return d; 371 } 372 373 static void 374 freetgt(struct aoedev *d, struct aoetgt *t) 375 { 376 struct frame *f; 377 struct list_head *pos, *nx, *head; 378 struct aoeif *ifp; 379 380 for (ifp = t->ifs; ifp < &t->ifs[NAOEIFS]; ++ifp) { 381 if (!ifp->nd) 382 break; 383 dev_put(ifp->nd); 384 } 385 386 head = &t->ffree; 387 list_for_each_safe(pos, nx, head) { 388 list_del(pos); 389 f = list_entry(pos, struct frame, head); 390 skbfree(f->skb); 391 kfree(f); 392 } 393 kfree(t); 394 } 395 396 void 397 aoedev_exit(void) 398 { 399 struct aoedev *d; 400 ulong flags; 401 402 aoe_flush_iocq(); 403 while ((d = devlist)) { 404 devlist = d->next; 405 406 spin_lock_irqsave(&d->lock, flags); 407 aoedev_downdev(d); 408 d->flags |= DEVFL_TKILL; 409 spin_unlock_irqrestore(&d->lock, flags); 410 411 del_timer_sync(&d->timer); 412 aoedev_freedev(d); 413 } 414 } 415 416 int __init 417 aoedev_init(void) 418 { 419 return 0; 420 } 421