xref: /linux/drivers/infiniband/hw/qib/qib_diag.c (revision e7d759f31ca295d589f7420719c311870bb3166f)
1 /*
2  * Copyright (c) 2012 Intel Corporation. All rights reserved.
3  * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
4  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  */
34 
35 /*
36  * This file contains support for diagnostic functions.  It is accessed by
37  * opening the qib_diag device, normally minor number 129.  Diagnostic use
38  * of the QLogic_IB chip may render the chip or board unusable until the
39  * driver is unloaded, or in some cases, until the system is rebooted.
40  *
41  * Accesses to the chip through this interface are not similar to going
42  * through the /sys/bus/pci resource mmap interface.
43  */
44 
45 #include <linux/io.h>
46 #include <linux/pci.h>
47 #include <linux/poll.h>
48 #include <linux/vmalloc.h>
49 #include <linux/export.h>
50 #include <linux/fs.h>
51 #include <linux/uaccess.h>
52 
53 #include "qib.h"
54 #include "qib_common.h"
55 
56 #undef pr_fmt
57 #define pr_fmt(fmt) QIB_DRV_NAME ": " fmt
58 
59 /*
60  * Each client that opens the diag device must read then write
61  * offset 0, to prevent lossage from random cat or od. diag_state
62  * sequences this "handshake".
63  */
64 enum diag_state { UNUSED = 0, OPENED, INIT, READY };
65 
66 /* State for an individual client. PID so children cannot abuse handshake */
67 static struct qib_diag_client {
68 	struct qib_diag_client *next;
69 	struct qib_devdata *dd;
70 	pid_t pid;
71 	enum diag_state state;
72 } *client_pool;
73 
74 /*
75  * Get a client struct. Recycled if possible, else kmalloc.
76  * Must be called with qib_mutex held
77  */
78 static struct qib_diag_client *get_client(struct qib_devdata *dd)
79 {
80 	struct qib_diag_client *dc;
81 
82 	dc = client_pool;
83 	if (dc)
84 		/* got from pool remove it and use */
85 		client_pool = dc->next;
86 	else
87 		/* None in pool, alloc and init */
88 		dc = kmalloc(sizeof(*dc), GFP_KERNEL);
89 
90 	if (dc) {
91 		dc->next = NULL;
92 		dc->dd = dd;
93 		dc->pid = current->pid;
94 		dc->state = OPENED;
95 	}
96 	return dc;
97 }
98 
99 /*
100  * Return to pool. Must be called with qib_mutex held
101  */
102 static void return_client(struct qib_diag_client *dc)
103 {
104 	struct qib_devdata *dd = dc->dd;
105 	struct qib_diag_client *tdc, *rdc;
106 
107 	rdc = NULL;
108 	if (dc == dd->diag_client) {
109 		dd->diag_client = dc->next;
110 		rdc = dc;
111 	} else {
112 		tdc = dc->dd->diag_client;
113 		while (tdc) {
114 			if (dc == tdc->next) {
115 				tdc->next = dc->next;
116 				rdc = dc;
117 				break;
118 			}
119 			tdc = tdc->next;
120 		}
121 	}
122 	if (rdc) {
123 		rdc->state = UNUSED;
124 		rdc->dd = NULL;
125 		rdc->pid = 0;
126 		rdc->next = client_pool;
127 		client_pool = rdc;
128 	}
129 }
130 
131 static int qib_diag_open(struct inode *in, struct file *fp);
132 static int qib_diag_release(struct inode *in, struct file *fp);
133 static ssize_t qib_diag_read(struct file *fp, char __user *data,
134 			     size_t count, loff_t *off);
135 static ssize_t qib_diag_write(struct file *fp, const char __user *data,
136 			      size_t count, loff_t *off);
137 
138 static const struct file_operations diag_file_ops = {
139 	.owner = THIS_MODULE,
140 	.write = qib_diag_write,
141 	.read = qib_diag_read,
142 	.open = qib_diag_open,
143 	.release = qib_diag_release,
144 	.llseek = default_llseek,
145 };
146 
147 static atomic_t diagpkt_count = ATOMIC_INIT(0);
148 static struct cdev *diagpkt_cdev;
149 static struct device *diagpkt_device;
150 
151 static ssize_t qib_diagpkt_write(struct file *fp, const char __user *data,
152 				 size_t count, loff_t *off);
153 
154 static const struct file_operations diagpkt_file_ops = {
155 	.owner = THIS_MODULE,
156 	.write = qib_diagpkt_write,
157 	.llseek = noop_llseek,
158 };
159 
160 int qib_diag_add(struct qib_devdata *dd)
161 {
162 	char name[16];
163 	int ret = 0;
164 
165 	if (atomic_inc_return(&diagpkt_count) == 1) {
166 		ret = qib_cdev_init(QIB_DIAGPKT_MINOR, "ipath_diagpkt",
167 				    &diagpkt_file_ops, &diagpkt_cdev,
168 				    &diagpkt_device);
169 		if (ret)
170 			goto done;
171 	}
172 
173 	snprintf(name, sizeof(name), "ipath_diag%d", dd->unit);
174 	ret = qib_cdev_init(QIB_DIAG_MINOR_BASE + dd->unit, name,
175 			    &diag_file_ops, &dd->diag_cdev,
176 			    &dd->diag_device);
177 done:
178 	return ret;
179 }
180 
181 static void qib_unregister_observers(struct qib_devdata *dd);
182 
183 void qib_diag_remove(struct qib_devdata *dd)
184 {
185 	struct qib_diag_client *dc;
186 
187 	if (atomic_dec_and_test(&diagpkt_count))
188 		qib_cdev_cleanup(&diagpkt_cdev, &diagpkt_device);
189 
190 	qib_cdev_cleanup(&dd->diag_cdev, &dd->diag_device);
191 
192 	/*
193 	 * Return all diag_clients of this device. There should be none,
194 	 * as we are "guaranteed" that no clients are still open
195 	 */
196 	while (dd->diag_client)
197 		return_client(dd->diag_client);
198 
199 	/* Now clean up all unused client structs */
200 	while (client_pool) {
201 		dc = client_pool;
202 		client_pool = dc->next;
203 		kfree(dc);
204 	}
205 	/* Clean up observer list */
206 	qib_unregister_observers(dd);
207 }
208 
209 /* qib_remap_ioaddr32 - remap an offset into chip address space to __iomem *
210  *
211  * @dd: the qlogic_ib device
212  * @offs: the offset in chip-space
213  * @cntp: Pointer to max (byte) count for transfer starting at offset
214  * This returns a u32 __iomem * so it can be used for both 64 and 32-bit
215  * mapping. It is needed because with the use of PAT for control of
216  * write-combining, the logically contiguous address-space of the chip
217  * may be split into virtually non-contiguous spaces, with different
218  * attributes, which are them mapped to contiguous physical space
219  * based from the first BAR.
220  *
221  * The code below makes the same assumptions as were made in
222  * init_chip_wc_pat() (qib_init.c), copied here:
223  * Assumes chip address space looks like:
224  *		- kregs + sregs + cregs + uregs (in any order)
225  *		- piobufs (2K and 4K bufs in either order)
226  *	or:
227  *		- kregs + sregs + cregs (in any order)
228  *		- piobufs (2K and 4K bufs in either order)
229  *		- uregs
230  *
231  * If cntp is non-NULL, returns how many bytes from offset can be accessed
232  * Returns 0 if the offset is not mapped.
233  */
234 static u32 __iomem *qib_remap_ioaddr32(struct qib_devdata *dd, u32 offset,
235 				       u32 *cntp)
236 {
237 	u32 kreglen;
238 	u32 snd_bottom, snd_lim = 0;
239 	u32 __iomem *krb32 = (u32 __iomem *)dd->kregbase;
240 	u32 __iomem *map = NULL;
241 	u32 cnt = 0;
242 	u32 tot4k, offs4k;
243 
244 	/* First, simplest case, offset is within the first map. */
245 	kreglen = (dd->kregend - dd->kregbase) * sizeof(u64);
246 	if (offset < kreglen) {
247 		map = krb32 + (offset / sizeof(u32));
248 		cnt = kreglen - offset;
249 		goto mapped;
250 	}
251 
252 	/*
253 	 * Next check for user regs, the next most common case,
254 	 * and a cheap check because if they are not in the first map
255 	 * they are last in chip.
256 	 */
257 	if (dd->userbase) {
258 		/* If user regs mapped, they are after send, so set limit. */
259 		u32 ulim = (dd->cfgctxts * dd->ureg_align) + dd->uregbase;
260 
261 		if (!dd->piovl15base)
262 			snd_lim = dd->uregbase;
263 		krb32 = (u32 __iomem *)dd->userbase;
264 		if (offset >= dd->uregbase && offset < ulim) {
265 			map = krb32 + (offset - dd->uregbase) / sizeof(u32);
266 			cnt = ulim - offset;
267 			goto mapped;
268 		}
269 	}
270 
271 	/*
272 	 * Lastly, check for offset within Send Buffers.
273 	 * This is gnarly because struct devdata is deliberately vague
274 	 * about things like 7322 VL15 buffers, and we are not in
275 	 * chip-specific code here, so should not make many assumptions.
276 	 * The one we _do_ make is that the only chip that has more sndbufs
277 	 * than we admit is the 7322, and it has userregs above that, so
278 	 * we know the snd_lim.
279 	 */
280 	/* Assume 2K buffers are first. */
281 	snd_bottom = dd->pio2k_bufbase;
282 	if (snd_lim == 0) {
283 		u32 tot2k = dd->piobcnt2k * ALIGN(dd->piosize2k, dd->palign);
284 
285 		snd_lim = snd_bottom + tot2k;
286 	}
287 	/* If 4k buffers exist, account for them by bumping
288 	 * appropriate limit.
289 	 */
290 	tot4k = dd->piobcnt4k * dd->align4k;
291 	offs4k = dd->piobufbase >> 32;
292 	if (dd->piobcnt4k) {
293 		if (snd_bottom > offs4k)
294 			snd_bottom = offs4k;
295 		else {
296 			/* 4k above 2k. Bump snd_lim, if needed*/
297 			if (!dd->userbase || dd->piovl15base)
298 				snd_lim = offs4k + tot4k;
299 		}
300 	}
301 	/*
302 	 * Judgement call: can we ignore the space between SendBuffs and
303 	 * UserRegs, where we would like to see vl15 buffs, but not more?
304 	 */
305 	if (offset >= snd_bottom && offset < snd_lim) {
306 		offset -= snd_bottom;
307 		map = (u32 __iomem *)dd->piobase + (offset / sizeof(u32));
308 		cnt = snd_lim - offset;
309 	}
310 
311 	if (!map && offs4k && dd->piovl15base) {
312 		snd_lim = offs4k + tot4k + 2 * dd->align4k;
313 		if (offset >= (offs4k + tot4k) && offset < snd_lim) {
314 			map = (u32 __iomem *)dd->piovl15base +
315 				((offset - (offs4k + tot4k)) / sizeof(u32));
316 			cnt = snd_lim - offset;
317 		}
318 	}
319 
320 mapped:
321 	if (cntp)
322 		*cntp = cnt;
323 	return map;
324 }
325 
326 /*
327  * qib_read_umem64 - read a 64-bit quantity from the chip into user space
328  * @dd: the qlogic_ib device
329  * @uaddr: the location to store the data in user memory
330  * @regoffs: the offset from BAR0 (_NOT_ full pointer, anymore)
331  * @count: number of bytes to copy (multiple of 32 bits)
332  *
333  * This function also localizes all chip memory accesses.
334  * The copy should be written such that we read full cacheline packets
335  * from the chip.  This is usually used for a single qword
336  *
337  * NOTE:  This assumes the chip address is 64-bit aligned.
338  */
339 static int qib_read_umem64(struct qib_devdata *dd, void __user *uaddr,
340 			   u32 regoffs, size_t count)
341 {
342 	const u64 __iomem *reg_addr;
343 	const u64 __iomem *reg_end;
344 	u32 limit;
345 	int ret;
346 
347 	reg_addr = (const u64 __iomem *)qib_remap_ioaddr32(dd, regoffs, &limit);
348 	if (reg_addr == NULL || limit == 0 || !(dd->flags & QIB_PRESENT)) {
349 		ret = -EINVAL;
350 		goto bail;
351 	}
352 	if (count >= limit)
353 		count = limit;
354 	reg_end = reg_addr + (count / sizeof(u64));
355 
356 	/* not very efficient, but it works for now */
357 	while (reg_addr < reg_end) {
358 		u64 data = readq(reg_addr);
359 
360 		if (copy_to_user(uaddr, &data, sizeof(u64))) {
361 			ret = -EFAULT;
362 			goto bail;
363 		}
364 		reg_addr++;
365 		uaddr += sizeof(u64);
366 	}
367 	ret = 0;
368 bail:
369 	return ret;
370 }
371 
372 /*
373  * qib_write_umem64 - write a 64-bit quantity to the chip from user space
374  * @dd: the qlogic_ib device
375  * @regoffs: the offset from BAR0 (_NOT_ full pointer, anymore)
376  * @uaddr: the source of the data in user memory
377  * @count: the number of bytes to copy (multiple of 32 bits)
378  *
379  * This is usually used for a single qword
380  * NOTE:  This assumes the chip address is 64-bit aligned.
381  */
382 
383 static int qib_write_umem64(struct qib_devdata *dd, u32 regoffs,
384 			    const void __user *uaddr, size_t count)
385 {
386 	u64 __iomem *reg_addr;
387 	const u64 __iomem *reg_end;
388 	u32 limit;
389 	int ret;
390 
391 	reg_addr = (u64 __iomem *)qib_remap_ioaddr32(dd, regoffs, &limit);
392 	if (reg_addr == NULL || limit == 0 || !(dd->flags & QIB_PRESENT)) {
393 		ret = -EINVAL;
394 		goto bail;
395 	}
396 	if (count >= limit)
397 		count = limit;
398 	reg_end = reg_addr + (count / sizeof(u64));
399 
400 	/* not very efficient, but it works for now */
401 	while (reg_addr < reg_end) {
402 		u64 data;
403 
404 		if (copy_from_user(&data, uaddr, sizeof(data))) {
405 			ret = -EFAULT;
406 			goto bail;
407 		}
408 		writeq(data, reg_addr);
409 
410 		reg_addr++;
411 		uaddr += sizeof(u64);
412 	}
413 	ret = 0;
414 bail:
415 	return ret;
416 }
417 
418 /*
419  * qib_read_umem32 - read a 32-bit quantity from the chip into user space
420  * @dd: the qlogic_ib device
421  * @uaddr: the location to store the data in user memory
422  * @regoffs: the offset from BAR0 (_NOT_ full pointer, anymore)
423  * @count: number of bytes to copy
424  *
425  * read 32 bit values, not 64 bit; for memories that only
426  * support 32 bit reads; usually a single dword.
427  */
428 static int qib_read_umem32(struct qib_devdata *dd, void __user *uaddr,
429 			   u32 regoffs, size_t count)
430 {
431 	const u32 __iomem *reg_addr;
432 	const u32 __iomem *reg_end;
433 	u32 limit;
434 	int ret;
435 
436 	reg_addr = qib_remap_ioaddr32(dd, regoffs, &limit);
437 	if (reg_addr == NULL || limit == 0 || !(dd->flags & QIB_PRESENT)) {
438 		ret = -EINVAL;
439 		goto bail;
440 	}
441 	if (count >= limit)
442 		count = limit;
443 	reg_end = reg_addr + (count / sizeof(u32));
444 
445 	/* not very efficient, but it works for now */
446 	while (reg_addr < reg_end) {
447 		u32 data = readl(reg_addr);
448 
449 		if (copy_to_user(uaddr, &data, sizeof(data))) {
450 			ret = -EFAULT;
451 			goto bail;
452 		}
453 
454 		reg_addr++;
455 		uaddr += sizeof(u32);
456 
457 	}
458 	ret = 0;
459 bail:
460 	return ret;
461 }
462 
463 /*
464  * qib_write_umem32 - write a 32-bit quantity to the chip from user space
465  * @dd: the qlogic_ib device
466  * @regoffs: the offset from BAR0 (_NOT_ full pointer, anymore)
467  * @uaddr: the source of the data in user memory
468  * @count: number of bytes to copy
469  *
470  * write 32 bit values, not 64 bit; for memories that only
471  * support 32 bit write; usually a single dword.
472  */
473 
474 static int qib_write_umem32(struct qib_devdata *dd, u32 regoffs,
475 			    const void __user *uaddr, size_t count)
476 {
477 	u32 __iomem *reg_addr;
478 	const u32 __iomem *reg_end;
479 	u32 limit;
480 	int ret;
481 
482 	reg_addr = qib_remap_ioaddr32(dd, regoffs, &limit);
483 	if (reg_addr == NULL || limit == 0 || !(dd->flags & QIB_PRESENT)) {
484 		ret = -EINVAL;
485 		goto bail;
486 	}
487 	if (count >= limit)
488 		count = limit;
489 	reg_end = reg_addr + (count / sizeof(u32));
490 
491 	while (reg_addr < reg_end) {
492 		u32 data;
493 
494 		if (copy_from_user(&data, uaddr, sizeof(data))) {
495 			ret = -EFAULT;
496 			goto bail;
497 		}
498 		writel(data, reg_addr);
499 
500 		reg_addr++;
501 		uaddr += sizeof(u32);
502 	}
503 	ret = 0;
504 bail:
505 	return ret;
506 }
507 
508 static int qib_diag_open(struct inode *in, struct file *fp)
509 {
510 	int unit = iminor(in) - QIB_DIAG_MINOR_BASE;
511 	struct qib_devdata *dd;
512 	struct qib_diag_client *dc;
513 	int ret;
514 
515 	mutex_lock(&qib_mutex);
516 
517 	dd = qib_lookup(unit);
518 
519 	if (dd == NULL || !(dd->flags & QIB_PRESENT) ||
520 	    !dd->kregbase) {
521 		ret = -ENODEV;
522 		goto bail;
523 	}
524 
525 	dc = get_client(dd);
526 	if (!dc) {
527 		ret = -ENOMEM;
528 		goto bail;
529 	}
530 	dc->next = dd->diag_client;
531 	dd->diag_client = dc;
532 	fp->private_data = dc;
533 	ret = 0;
534 bail:
535 	mutex_unlock(&qib_mutex);
536 
537 	return ret;
538 }
539 
540 /**
541  * qib_diagpkt_write - write an IB packet
542  * @fp: the diag data device file pointer
543  * @data: qib_diag_pkt structure saying where to get the packet
544  * @count: size of data to write
545  * @off: unused by this code
546  */
547 static ssize_t qib_diagpkt_write(struct file *fp,
548 				 const char __user *data,
549 				 size_t count, loff_t *off)
550 {
551 	u32 __iomem *piobuf;
552 	u32 plen, pbufn, maxlen_reserve;
553 	struct qib_diag_xpkt dp;
554 	u32 *tmpbuf = NULL;
555 	struct qib_devdata *dd;
556 	struct qib_pportdata *ppd;
557 	ssize_t ret = 0;
558 
559 	if (count != sizeof(dp)) {
560 		ret = -EINVAL;
561 		goto bail;
562 	}
563 	if (copy_from_user(&dp, data, sizeof(dp))) {
564 		ret = -EFAULT;
565 		goto bail;
566 	}
567 
568 	dd = qib_lookup(dp.unit);
569 	if (!dd || !(dd->flags & QIB_PRESENT) || !dd->kregbase) {
570 		ret = -ENODEV;
571 		goto bail;
572 	}
573 	if (!(dd->flags & QIB_INITTED)) {
574 		/* no hardware, freeze, etc. */
575 		ret = -ENODEV;
576 		goto bail;
577 	}
578 
579 	if (dp.version != _DIAG_XPKT_VERS) {
580 		qib_dev_err(dd, "Invalid version %u for diagpkt_write\n",
581 			    dp.version);
582 		ret = -EINVAL;
583 		goto bail;
584 	}
585 	/* send count must be an exact number of dwords */
586 	if (dp.len & 3) {
587 		ret = -EINVAL;
588 		goto bail;
589 	}
590 	if (!dp.port || dp.port > dd->num_pports) {
591 		ret = -EINVAL;
592 		goto bail;
593 	}
594 	ppd = &dd->pport[dp.port - 1];
595 
596 	/*
597 	 * need total length before first word written, plus 2 Dwords. One Dword
598 	 * is for padding so we get the full user data when not aligned on
599 	 * a word boundary. The other Dword is to make sure we have room for the
600 	 * ICRC which gets tacked on later.
601 	 */
602 	maxlen_reserve = 2 * sizeof(u32);
603 	if (dp.len > ppd->ibmaxlen - maxlen_reserve) {
604 		ret = -EINVAL;
605 		goto bail;
606 	}
607 
608 	plen = sizeof(u32) + dp.len;
609 
610 	tmpbuf = vmalloc(plen);
611 	if (!tmpbuf) {
612 		ret = -ENOMEM;
613 		goto bail;
614 	}
615 
616 	if (copy_from_user(tmpbuf,
617 			   u64_to_user_ptr(dp.data),
618 			   dp.len)) {
619 		ret = -EFAULT;
620 		goto bail;
621 	}
622 
623 	plen >>= 2;             /* in dwords */
624 
625 	if (dp.pbc_wd == 0)
626 		dp.pbc_wd = plen;
627 
628 	piobuf = dd->f_getsendbuf(ppd, dp.pbc_wd, &pbufn);
629 	if (!piobuf) {
630 		ret = -EBUSY;
631 		goto bail;
632 	}
633 	/* disarm it just to be extra sure */
634 	dd->f_sendctrl(dd->pport, QIB_SENDCTRL_DISARM_BUF(pbufn));
635 
636 	/* disable header check on pbufn for this packet */
637 	dd->f_txchk_change(dd, pbufn, 1, TXCHK_CHG_TYPE_DIS1, NULL);
638 
639 	writeq(dp.pbc_wd, piobuf);
640 	/*
641 	 * Copy all but the trigger word, then flush, so it's written
642 	 * to chip before trigger word, then write trigger word, then
643 	 * flush again, so packet is sent.
644 	 */
645 	if (dd->flags & QIB_PIO_FLUSH_WC) {
646 		qib_flush_wc();
647 		qib_pio_copy(piobuf + 2, tmpbuf, plen - 1);
648 		qib_flush_wc();
649 		__raw_writel(tmpbuf[plen - 1], piobuf + plen + 1);
650 	} else
651 		qib_pio_copy(piobuf + 2, tmpbuf, plen);
652 
653 	if (dd->flags & QIB_USE_SPCL_TRIG) {
654 		u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023;
655 
656 		qib_flush_wc();
657 		__raw_writel(0xaebecede, piobuf + spcl_off);
658 	}
659 
660 	/*
661 	 * Ensure buffer is written to the chip, then re-enable
662 	 * header checks (if supported by chip).  The txchk
663 	 * code will ensure seen by chip before returning.
664 	 */
665 	qib_flush_wc();
666 	qib_sendbuf_done(dd, pbufn);
667 	dd->f_txchk_change(dd, pbufn, 1, TXCHK_CHG_TYPE_ENAB1, NULL);
668 
669 	ret = sizeof(dp);
670 
671 bail:
672 	vfree(tmpbuf);
673 	return ret;
674 }
675 
676 static int qib_diag_release(struct inode *in, struct file *fp)
677 {
678 	mutex_lock(&qib_mutex);
679 	return_client(fp->private_data);
680 	fp->private_data = NULL;
681 	mutex_unlock(&qib_mutex);
682 	return 0;
683 }
684 
685 /*
686  * Chip-specific code calls to register its interest in
687  * a specific range.
688  */
689 struct diag_observer_list_elt {
690 	struct diag_observer_list_elt *next;
691 	const struct diag_observer *op;
692 };
693 
694 int qib_register_observer(struct qib_devdata *dd,
695 			  const struct diag_observer *op)
696 {
697 	struct diag_observer_list_elt *olp;
698 	unsigned long flags;
699 
700 	if (!dd || !op)
701 		return -EINVAL;
702 	olp = vmalloc(sizeof(*olp));
703 	if (!olp)
704 		return -ENOMEM;
705 
706 	spin_lock_irqsave(&dd->qib_diag_trans_lock, flags);
707 	olp->op = op;
708 	olp->next = dd->diag_observer_list;
709 	dd->diag_observer_list = olp;
710 	spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags);
711 
712 	return 0;
713 }
714 
715 /* Remove all registered observers when device is closed */
716 static void qib_unregister_observers(struct qib_devdata *dd)
717 {
718 	struct diag_observer_list_elt *olp;
719 	unsigned long flags;
720 
721 	spin_lock_irqsave(&dd->qib_diag_trans_lock, flags);
722 	olp = dd->diag_observer_list;
723 	while (olp) {
724 		/* Pop one observer, let go of lock */
725 		dd->diag_observer_list = olp->next;
726 		spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags);
727 		vfree(olp);
728 		/* try again. */
729 		spin_lock_irqsave(&dd->qib_diag_trans_lock, flags);
730 		olp = dd->diag_observer_list;
731 	}
732 	spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags);
733 }
734 
735 /*
736  * Find the observer, if any, for the specified address. Initial implementation
737  * is simple stack of observers. This must be called with diag transaction
738  * lock held.
739  */
740 static const struct diag_observer *diag_get_observer(struct qib_devdata *dd,
741 						     u32 addr)
742 {
743 	struct diag_observer_list_elt *olp;
744 	const struct diag_observer *op = NULL;
745 
746 	olp = dd->diag_observer_list;
747 	while (olp) {
748 		op = olp->op;
749 		if (addr >= op->bottom && addr <= op->top)
750 			break;
751 		olp = olp->next;
752 	}
753 	if (!olp)
754 		op = NULL;
755 
756 	return op;
757 }
758 
759 static ssize_t qib_diag_read(struct file *fp, char __user *data,
760 			     size_t count, loff_t *off)
761 {
762 	struct qib_diag_client *dc = fp->private_data;
763 	struct qib_devdata *dd = dc->dd;
764 	ssize_t ret;
765 
766 	if (dc->pid != current->pid) {
767 		ret = -EPERM;
768 		goto bail;
769 	}
770 
771 	if (count == 0)
772 		ret = 0;
773 	else if ((count % 4) || (*off % 4))
774 		/* address or length is not 32-bit aligned, hence invalid */
775 		ret = -EINVAL;
776 	else if (dc->state < READY && (*off || count != 8))
777 		ret = -EINVAL;  /* prevent cat /dev/qib_diag* */
778 	else {
779 		unsigned long flags;
780 		u64 data64 = 0;
781 		int use_32;
782 		const struct diag_observer *op;
783 
784 		use_32 = (count % 8) || (*off % 8);
785 		ret = -1;
786 		spin_lock_irqsave(&dd->qib_diag_trans_lock, flags);
787 		/*
788 		 * Check for observer on this address range.
789 		 * we only support a single 32 or 64-bit read
790 		 * via observer, currently.
791 		 */
792 		op = diag_get_observer(dd, *off);
793 		if (op) {
794 			u32 offset = *off;
795 
796 			ret = op->hook(dd, op, offset, &data64, 0, use_32);
797 		}
798 		/*
799 		 * We need to release lock before any copy_to_user(),
800 		 * whether implicit in qib_read_umem* or explicit below.
801 		 */
802 		spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags);
803 		if (!op) {
804 			if (use_32)
805 				/*
806 				 * Address or length is not 64-bit aligned;
807 				 * do 32-bit rd
808 				 */
809 				ret = qib_read_umem32(dd, data, (u32) *off,
810 						      count);
811 			else
812 				ret = qib_read_umem64(dd, data, (u32) *off,
813 						      count);
814 		} else if (ret == count) {
815 			/* Below finishes case where observer existed */
816 			ret = copy_to_user(data, &data64, use_32 ?
817 					   sizeof(u32) : sizeof(u64));
818 			if (ret)
819 				ret = -EFAULT;
820 		}
821 	}
822 
823 	if (ret >= 0) {
824 		*off += count;
825 		ret = count;
826 		if (dc->state == OPENED)
827 			dc->state = INIT;
828 	}
829 bail:
830 	return ret;
831 }
832 
833 static ssize_t qib_diag_write(struct file *fp, const char __user *data,
834 			      size_t count, loff_t *off)
835 {
836 	struct qib_diag_client *dc = fp->private_data;
837 	struct qib_devdata *dd = dc->dd;
838 	ssize_t ret;
839 
840 	if (dc->pid != current->pid) {
841 		ret = -EPERM;
842 		goto bail;
843 	}
844 
845 	if (count == 0)
846 		ret = 0;
847 	else if ((count % 4) || (*off % 4))
848 		/* address or length is not 32-bit aligned, hence invalid */
849 		ret = -EINVAL;
850 	else if (dc->state < READY &&
851 		((*off || count != 8) || dc->state != INIT))
852 		/* No writes except second-step of init seq */
853 		ret = -EINVAL;  /* before any other write allowed */
854 	else {
855 		unsigned long flags;
856 		const struct diag_observer *op = NULL;
857 		int use_32 =  (count % 8) || (*off % 8);
858 
859 		/*
860 		 * Check for observer on this address range.
861 		 * We only support a single 32 or 64-bit write
862 		 * via observer, currently. This helps, because
863 		 * we would otherwise have to jump through hoops
864 		 * to make "diag transaction" meaningful when we
865 		 * cannot do a copy_from_user while holding the lock.
866 		 */
867 		if (count == 4 || count == 8) {
868 			u64 data64;
869 			u32 offset = *off;
870 
871 			ret = copy_from_user(&data64, data, count);
872 			if (ret) {
873 				ret = -EFAULT;
874 				goto bail;
875 			}
876 			spin_lock_irqsave(&dd->qib_diag_trans_lock, flags);
877 			op = diag_get_observer(dd, *off);
878 			if (op)
879 				ret = op->hook(dd, op, offset, &data64, ~0Ull,
880 					       use_32);
881 			spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags);
882 		}
883 
884 		if (!op) {
885 			if (use_32)
886 				/*
887 				 * Address or length is not 64-bit aligned;
888 				 * do 32-bit write
889 				 */
890 				ret = qib_write_umem32(dd, (u32) *off, data,
891 						       count);
892 			else
893 				ret = qib_write_umem64(dd, (u32) *off, data,
894 						       count);
895 		}
896 	}
897 
898 	if (ret >= 0) {
899 		*off += count;
900 		ret = count;
901 		if (dc->state == INIT)
902 			dc->state = READY; /* all read/write OK now */
903 	}
904 bail:
905 	return ret;
906 }
907