xref: /linux/drivers/block/aoe/aoedev.c (revision f49f4ab95c301dbccad0efe85296d908b8ae7ad4)
1 /* Copyright (c) 2012 Coraid, Inc.  See COPYING for GPL terms. */
2 /*
3  * aoedev.c
4  * AoE device utility functions; maintains device list.
5  */
6 
7 #include <linux/hdreg.h>
8 #include <linux/blkdev.h>
9 #include <linux/netdevice.h>
10 #include <linux/delay.h>
11 #include <linux/slab.h>
12 #include <linux/bitmap.h>
13 #include <linux/kdev_t.h>
14 #include <linux/moduleparam.h>
15 #include "aoe.h"
16 
17 static void dummy_timer(ulong);
18 static void aoedev_freedev(struct aoedev *);
19 static void freetgt(struct aoedev *d, struct aoetgt *t);
20 static void skbpoolfree(struct aoedev *d);
21 
22 static int aoe_dyndevs = 1;
23 module_param(aoe_dyndevs, int, 0644);
24 MODULE_PARM_DESC(aoe_dyndevs, "Use dynamic minor numbers for devices.");
25 
26 static struct aoedev *devlist;
27 static DEFINE_SPINLOCK(devlist_lock);
28 
29 /* Because some systems will have one, many, or no
30  *   - partitions,
31  *   - slots per shelf,
32  *   - or shelves,
33  * we need some flexibility in the way the minor numbers
34  * are allocated.  So they are dynamic.
35  */
36 #define N_DEVS ((1U<<MINORBITS)/AOE_PARTITIONS)
37 
38 static DEFINE_SPINLOCK(used_minors_lock);
39 static DECLARE_BITMAP(used_minors, N_DEVS);
40 
41 static int
42 minor_get_dyn(ulong *sysminor)
43 {
44 	ulong flags;
45 	ulong n;
46 	int error = 0;
47 
48 	spin_lock_irqsave(&used_minors_lock, flags);
49 	n = find_first_zero_bit(used_minors, N_DEVS);
50 	if (n < N_DEVS)
51 		set_bit(n, used_minors);
52 	else
53 		error = -1;
54 	spin_unlock_irqrestore(&used_minors_lock, flags);
55 
56 	*sysminor = n * AOE_PARTITIONS;
57 	return error;
58 }
59 
60 static int
61 minor_get_static(ulong *sysminor, ulong aoemaj, int aoemin)
62 {
63 	ulong flags;
64 	ulong n;
65 	int error = 0;
66 	enum {
67 		/* for backwards compatibility when !aoe_dyndevs,
68 		 * a static number of supported slots per shelf */
69 		NPERSHELF = 16,
70 	};
71 
72 	n = aoemaj * NPERSHELF + aoemin;
73 	if (aoemin >= NPERSHELF || n >= N_DEVS) {
74 		pr_err("aoe: %s with e%ld.%d\n",
75 			"cannot use static minor device numbers",
76 			aoemaj, aoemin);
77 		error = -1;
78 	} else {
79 		spin_lock_irqsave(&used_minors_lock, flags);
80 		if (test_bit(n, used_minors)) {
81 			pr_err("aoe: %s %lu\n",
82 				"existing device already has static minor number",
83 				n);
84 			error = -1;
85 		} else
86 			set_bit(n, used_minors);
87 		spin_unlock_irqrestore(&used_minors_lock, flags);
88 	}
89 
90 	*sysminor = n;
91 	return error;
92 }
93 
94 static int
95 minor_get(ulong *sysminor, ulong aoemaj, int aoemin)
96 {
97 	if (aoe_dyndevs)
98 		return minor_get_dyn(sysminor);
99 	else
100 		return minor_get_static(sysminor, aoemaj, aoemin);
101 }
102 
103 static void
104 minor_free(ulong minor)
105 {
106 	ulong flags;
107 
108 	minor /= AOE_PARTITIONS;
109 	BUG_ON(minor >= N_DEVS);
110 
111 	spin_lock_irqsave(&used_minors_lock, flags);
112 	BUG_ON(!test_bit(minor, used_minors));
113 	clear_bit(minor, used_minors);
114 	spin_unlock_irqrestore(&used_minors_lock, flags);
115 }
116 
117 /*
118  * Users who grab a pointer to the device with aoedev_by_aoeaddr
119  * automatically get a reference count and must be responsible
120  * for performing a aoedev_put.  With the addition of async
121  * kthread processing I'm no longer confident that we can
122  * guarantee consistency in the face of device flushes.
123  *
124  * For the time being, we only bother to add extra references for
125  * frames sitting on the iocq.  When the kthreads finish processing
126  * these frames, they will aoedev_put the device.
127  */
128 
129 void
130 aoedev_put(struct aoedev *d)
131 {
132 	ulong flags;
133 
134 	spin_lock_irqsave(&devlist_lock, flags);
135 	d->ref--;
136 	spin_unlock_irqrestore(&devlist_lock, flags);
137 }
138 
139 static void
140 dummy_timer(ulong vp)
141 {
142 	struct aoedev *d;
143 
144 	d = (struct aoedev *)vp;
145 	if (d->flags & DEVFL_TKILL)
146 		return;
147 	d->timer.expires = jiffies + HZ;
148 	add_timer(&d->timer);
149 }
150 
151 static void
152 aoe_failip(struct aoedev *d)
153 {
154 	struct request *rq;
155 	struct bio *bio;
156 	unsigned long n;
157 
158 	aoe_failbuf(d, d->ip.buf);
159 
160 	rq = d->ip.rq;
161 	if (rq == NULL)
162 		return;
163 	while ((bio = d->ip.nxbio)) {
164 		clear_bit(BIO_UPTODATE, &bio->bi_flags);
165 		d->ip.nxbio = bio->bi_next;
166 		n = (unsigned long) rq->special;
167 		rq->special = (void *) --n;
168 	}
169 	if ((unsigned long) rq->special == 0)
170 		aoe_end_request(d, rq, 0);
171 }
172 
173 void
174 aoedev_downdev(struct aoedev *d)
175 {
176 	struct aoetgt *t, **tt, **te;
177 	struct frame *f;
178 	struct list_head *head, *pos, *nx;
179 	struct request *rq;
180 	int i;
181 
182 	d->flags &= ~DEVFL_UP;
183 
184 	/* clean out active buffers */
185 	for (i = 0; i < NFACTIVE; i++) {
186 		head = &d->factive[i];
187 		list_for_each_safe(pos, nx, head) {
188 			f = list_entry(pos, struct frame, head);
189 			list_del(pos);
190 			if (f->buf) {
191 				f->buf->nframesout--;
192 				aoe_failbuf(d, f->buf);
193 			}
194 			aoe_freetframe(f);
195 		}
196 	}
197 	/* reset window dressings */
198 	tt = d->targets;
199 	te = tt + NTARGETS;
200 	for (; tt < te && (t = *tt); tt++) {
201 		t->maxout = t->nframes;
202 		t->nout = 0;
203 	}
204 
205 	/* clean out the in-process request (if any) */
206 	aoe_failip(d);
207 	d->htgt = NULL;
208 
209 	/* fast fail all pending I/O */
210 	if (d->blkq) {
211 		while ((rq = blk_peek_request(d->blkq))) {
212 			blk_start_request(rq);
213 			aoe_end_request(d, rq, 1);
214 		}
215 	}
216 
217 	if (d->gd)
218 		set_capacity(d->gd, 0);
219 }
220 
221 static void
222 aoedev_freedev(struct aoedev *d)
223 {
224 	struct aoetgt **t, **e;
225 
226 	cancel_work_sync(&d->work);
227 	if (d->gd) {
228 		aoedisk_rm_sysfs(d);
229 		del_gendisk(d->gd);
230 		put_disk(d->gd);
231 		blk_cleanup_queue(d->blkq);
232 	}
233 	t = d->targets;
234 	e = t + NTARGETS;
235 	for (; t < e && *t; t++)
236 		freetgt(d, *t);
237 	if (d->bufpool)
238 		mempool_destroy(d->bufpool);
239 	skbpoolfree(d);
240 	minor_free(d->sysminor);
241 	kfree(d);
242 }
243 
244 int
245 aoedev_flush(const char __user *str, size_t cnt)
246 {
247 	ulong flags;
248 	struct aoedev *d, **dd;
249 	struct aoedev *rmd = NULL;
250 	char buf[16];
251 	int all = 0;
252 
253 	if (cnt >= 3) {
254 		if (cnt > sizeof buf)
255 			cnt = sizeof buf;
256 		if (copy_from_user(buf, str, cnt))
257 			return -EFAULT;
258 		all = !strncmp(buf, "all", 3);
259 	}
260 
261 	spin_lock_irqsave(&devlist_lock, flags);
262 	dd = &devlist;
263 	while ((d = *dd)) {
264 		spin_lock(&d->lock);
265 		if ((!all && (d->flags & DEVFL_UP))
266 		|| (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE))
267 		|| d->nopen
268 		|| d->ref) {
269 			spin_unlock(&d->lock);
270 			dd = &d->next;
271 			continue;
272 		}
273 		*dd = d->next;
274 		aoedev_downdev(d);
275 		d->flags |= DEVFL_TKILL;
276 		spin_unlock(&d->lock);
277 		d->next = rmd;
278 		rmd = d;
279 	}
280 	spin_unlock_irqrestore(&devlist_lock, flags);
281 	while ((d = rmd)) {
282 		rmd = d->next;
283 		del_timer_sync(&d->timer);
284 		aoedev_freedev(d);	/* must be able to sleep */
285 	}
286 	return 0;
287 }
288 
289 /* This has been confirmed to occur once with Tms=3*1000 due to the
290  * driver changing link and not processing its transmit ring.  The
291  * problem is hard enough to solve by returning an error that I'm
292  * still punting on "solving" this.
293  */
294 static void
295 skbfree(struct sk_buff *skb)
296 {
297 	enum { Sms = 250, Tms = 30 * 1000};
298 	int i = Tms / Sms;
299 
300 	if (skb == NULL)
301 		return;
302 	while (atomic_read(&skb_shinfo(skb)->dataref) != 1 && i-- > 0)
303 		msleep(Sms);
304 	if (i < 0) {
305 		printk(KERN_ERR
306 			"aoe: %s holds ref: %s\n",
307 			skb->dev ? skb->dev->name : "netif",
308 			"cannot free skb -- memory leaked.");
309 		return;
310 	}
311 	skb->truesize -= skb->data_len;
312 	skb_shinfo(skb)->nr_frags = skb->data_len = 0;
313 	skb_trim(skb, 0);
314 	dev_kfree_skb(skb);
315 }
316 
317 static void
318 skbpoolfree(struct aoedev *d)
319 {
320 	struct sk_buff *skb, *tmp;
321 
322 	skb_queue_walk_safe(&d->skbpool, skb, tmp)
323 		skbfree(skb);
324 
325 	__skb_queue_head_init(&d->skbpool);
326 }
327 
328 /* find it or allocate it */
329 struct aoedev *
330 aoedev_by_aoeaddr(ulong maj, int min, int do_alloc)
331 {
332 	struct aoedev *d;
333 	int i;
334 	ulong flags;
335 	ulong sysminor;
336 
337 	spin_lock_irqsave(&devlist_lock, flags);
338 
339 	for (d=devlist; d; d=d->next)
340 		if (d->aoemajor == maj && d->aoeminor == min) {
341 			d->ref++;
342 			break;
343 		}
344 	if (d || !do_alloc || minor_get(&sysminor, maj, min) < 0)
345 		goto out;
346 	d = kcalloc(1, sizeof *d, GFP_ATOMIC);
347 	if (!d)
348 		goto out;
349 	INIT_WORK(&d->work, aoecmd_sleepwork);
350 	spin_lock_init(&d->lock);
351 	skb_queue_head_init(&d->skbpool);
352 	init_timer(&d->timer);
353 	d->timer.data = (ulong) d;
354 	d->timer.function = dummy_timer;
355 	d->timer.expires = jiffies + HZ;
356 	add_timer(&d->timer);
357 	d->bufpool = NULL;	/* defer to aoeblk_gdalloc */
358 	d->tgt = d->targets;
359 	d->ref = 1;
360 	for (i = 0; i < NFACTIVE; i++)
361 		INIT_LIST_HEAD(&d->factive[i]);
362 	d->sysminor = sysminor;
363 	d->aoemajor = maj;
364 	d->aoeminor = min;
365 	d->mintimer = MINTIMER;
366 	d->next = devlist;
367 	devlist = d;
368  out:
369 	spin_unlock_irqrestore(&devlist_lock, flags);
370 	return d;
371 }
372 
373 static void
374 freetgt(struct aoedev *d, struct aoetgt *t)
375 {
376 	struct frame *f;
377 	struct list_head *pos, *nx, *head;
378 	struct aoeif *ifp;
379 
380 	for (ifp = t->ifs; ifp < &t->ifs[NAOEIFS]; ++ifp) {
381 		if (!ifp->nd)
382 			break;
383 		dev_put(ifp->nd);
384 	}
385 
386 	head = &t->ffree;
387 	list_for_each_safe(pos, nx, head) {
388 		list_del(pos);
389 		f = list_entry(pos, struct frame, head);
390 		skbfree(f->skb);
391 		kfree(f);
392 	}
393 	kfree(t);
394 }
395 
396 void
397 aoedev_exit(void)
398 {
399 	struct aoedev *d;
400 	ulong flags;
401 
402 	aoe_flush_iocq();
403 	while ((d = devlist)) {
404 		devlist = d->next;
405 
406 		spin_lock_irqsave(&d->lock, flags);
407 		aoedev_downdev(d);
408 		d->flags |= DEVFL_TKILL;
409 		spin_unlock_irqrestore(&d->lock, flags);
410 
411 		del_timer_sync(&d->timer);
412 		aoedev_freedev(d);
413 	}
414 }
415 
416 int __init
417 aoedev_init(void)
418 {
419 	return 0;
420 }
421