xref: /linux/drivers/char/xillybus/xillybus_core.c (revision 8cb5d216ab3365bc85aac65da27f1c2b3dd6f366)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * linux/drivers/misc/xillybus_core.c
4  *
5  * Copyright 2011 Xillybus Ltd, http://xillybus.com
6  *
7  * Driver for the Xillybus FPGA/host framework.
8  *
9  * This driver interfaces with a special IP core in an FPGA, setting up
10  * a pipe between a hardware FIFO in the programmable logic and a device
11  * file in the host. The number of such pipes and their attributes are
12  * set up on the logic. This driver detects these automatically and
13  * creates the device files accordingly.
14  */
15 
16 #include <linux/list.h>
17 #include <linux/device.h>
18 #include <linux/module.h>
19 #include <linux/io.h>
20 #include <linux/dma-mapping.h>
21 #include <linux/interrupt.h>
22 #include <linux/sched.h>
23 #include <linux/fs.h>
24 #include <linux/spinlock.h>
25 #include <linux/mutex.h>
26 #include <linux/crc32.h>
27 #include <linux/poll.h>
28 #include <linux/delay.h>
29 #include <linux/slab.h>
30 #include <linux/workqueue.h>
31 #include "xillybus.h"
32 #include "xillybus_class.h"
33 
34 MODULE_DESCRIPTION("Xillybus core functions");
35 MODULE_AUTHOR("Eli Billauer, Xillybus Ltd.");
36 MODULE_VERSION("1.10");
37 MODULE_ALIAS("xillybus_core");
38 MODULE_LICENSE("GPL v2");
39 
40 /* General timeout is 100 ms, rx timeout is 10 ms */
41 #define XILLY_RX_TIMEOUT (10*HZ/1000)
42 #define XILLY_TIMEOUT (100*HZ/1000)
43 
44 #define fpga_msg_ctrl_reg              0x0008
45 #define fpga_dma_control_reg           0x0020
46 #define fpga_dma_bufno_reg             0x0024
47 #define fpga_dma_bufaddr_lowaddr_reg   0x0028
48 #define fpga_dma_bufaddr_highaddr_reg  0x002c
49 #define fpga_buf_ctrl_reg              0x0030
50 #define fpga_buf_offset_reg            0x0034
51 #define fpga_endian_reg                0x0040
52 
53 #define XILLYMSG_OPCODE_RELEASEBUF 1
54 #define XILLYMSG_OPCODE_QUIESCEACK 2
55 #define XILLYMSG_OPCODE_FIFOEOF 3
56 #define XILLYMSG_OPCODE_FATAL_ERROR 4
57 #define XILLYMSG_OPCODE_NONEMPTY 5
58 
59 static const char xillyname[] = "xillybus";
60 
61 static struct workqueue_struct *xillybus_wq;
62 
63 /*
64  * Locking scheme: Mutexes protect invocations of character device methods.
65  * If both locks are taken, wr_mutex is taken first, rd_mutex second.
66  *
67  * wr_spinlock protects wr_*_buf_idx, wr_empty, wr_sleepy, wr_ready and the
68  * buffers' end_offset fields against changes made by IRQ handler (and in
69  * theory, other file request handlers, but the mutex handles that). Nothing
70  * else.
71  * They are held for short direct memory manipulations. Needless to say,
72  * no mutex locking is allowed when a spinlock is held.
73  *
74  * rd_spinlock does the same with rd_*_buf_idx, rd_empty and end_offset.
75  *
76  * register_mutex is endpoint-specific, and is held when non-atomic
77  * register operations are performed. wr_mutex and rd_mutex may be
78  * held when register_mutex is taken, but none of the spinlocks. Note that
79  * register_mutex doesn't protect against sporadic buf_ctrl_reg writes
80  * which are unrelated to buf_offset_reg, since they are harmless.
81  *
82  * Blocking on the wait queues is allowed with mutexes held, but not with
83  * spinlocks.
84  *
85  * Only interruptible blocking is allowed on mutexes and wait queues.
86  *
87  * All in all, the locking order goes (with skips allowed, of course):
88  * wr_mutex -> rd_mutex -> register_mutex -> wr_spinlock -> rd_spinlock
89  */
90 
91 static void malformed_message(struct xilly_endpoint *endpoint, u32 *buf)
92 {
93 	int opcode;
94 	int msg_channel, msg_bufno, msg_data, msg_dir;
95 
96 	opcode = (buf[0] >> 24) & 0xff;
97 	msg_dir = buf[0] & 1;
98 	msg_channel = (buf[0] >> 1) & 0x7ff;
99 	msg_bufno = (buf[0] >> 12) & 0x3ff;
100 	msg_data = buf[1] & 0xfffffff;
101 
102 	dev_warn(endpoint->dev,
103 		 "Malformed message (skipping): opcode=%d, channel=%03x, dir=%d, bufno=%03x, data=%07x\n",
104 		 opcode, msg_channel, msg_dir, msg_bufno, msg_data);
105 }
106 
107 /*
108  * xillybus_isr assumes the interrupt is allocated exclusively to it,
109  * which is the natural case MSI and several other hardware-oriented
110  * interrupts. Sharing is not allowed.
111  */
112 
113 irqreturn_t xillybus_isr(int irq, void *data)
114 {
115 	struct xilly_endpoint *ep = data;
116 	u32 *buf;
117 	unsigned int buf_size;
118 	int i;
119 	int opcode;
120 	unsigned int msg_channel, msg_bufno, msg_data, msg_dir;
121 	struct xilly_channel *channel;
122 
123 	buf = ep->msgbuf_addr;
124 	buf_size = ep->msg_buf_size/sizeof(u32);
125 
126 	ep->ephw->hw_sync_sgl_for_cpu(ep,
127 				      ep->msgbuf_dma_addr,
128 				      ep->msg_buf_size,
129 				      DMA_FROM_DEVICE);
130 
131 	for (i = 0; i < buf_size; i += 2) {
132 		if (((buf[i+1] >> 28) & 0xf) != ep->msg_counter) {
133 			malformed_message(ep, &buf[i]);
134 			dev_warn(ep->dev,
135 				 "Sending a NACK on counter %x (instead of %x) on entry %d\n",
136 				 ((buf[i+1] >> 28) & 0xf),
137 				 ep->msg_counter,
138 				 i/2);
139 
140 			if (++ep->failed_messages > 10) {
141 				dev_err(ep->dev,
142 					"Lost sync with interrupt messages. Stopping.\n");
143 			} else {
144 				ep->ephw->hw_sync_sgl_for_device(
145 					ep,
146 					ep->msgbuf_dma_addr,
147 					ep->msg_buf_size,
148 					DMA_FROM_DEVICE);
149 
150 				iowrite32(0x01,  /* Message NACK */
151 					  ep->registers + fpga_msg_ctrl_reg);
152 			}
153 			return IRQ_HANDLED;
154 		} else if (buf[i] & (1 << 22)) /* Last message */
155 			break;
156 	}
157 
158 	if (i >= buf_size) {
159 		dev_err(ep->dev, "Bad interrupt message. Stopping.\n");
160 		return IRQ_HANDLED;
161 	}
162 
163 	buf_size = i + 2;
164 
165 	for (i = 0; i < buf_size; i += 2) { /* Scan through messages */
166 		opcode = (buf[i] >> 24) & 0xff;
167 
168 		msg_dir = buf[i] & 1;
169 		msg_channel = (buf[i] >> 1) & 0x7ff;
170 		msg_bufno = (buf[i] >> 12) & 0x3ff;
171 		msg_data = buf[i+1] & 0xfffffff;
172 
173 		switch (opcode) {
174 		case XILLYMSG_OPCODE_RELEASEBUF:
175 			if ((msg_channel > ep->num_channels) ||
176 			    (msg_channel == 0)) {
177 				malformed_message(ep, &buf[i]);
178 				break;
179 			}
180 
181 			channel = ep->channels[msg_channel];
182 
183 			if (msg_dir) { /* Write channel */
184 				if (msg_bufno >= channel->num_wr_buffers) {
185 					malformed_message(ep, &buf[i]);
186 					break;
187 				}
188 				spin_lock(&channel->wr_spinlock);
189 				channel->wr_buffers[msg_bufno]->end_offset =
190 					msg_data;
191 				channel->wr_fpga_buf_idx = msg_bufno;
192 				channel->wr_empty = 0;
193 				channel->wr_sleepy = 0;
194 				spin_unlock(&channel->wr_spinlock);
195 
196 				wake_up_interruptible(&channel->wr_wait);
197 
198 			} else {
199 				/* Read channel */
200 
201 				if (msg_bufno >= channel->num_rd_buffers) {
202 					malformed_message(ep, &buf[i]);
203 					break;
204 				}
205 
206 				spin_lock(&channel->rd_spinlock);
207 				channel->rd_fpga_buf_idx = msg_bufno;
208 				channel->rd_full = 0;
209 				spin_unlock(&channel->rd_spinlock);
210 
211 				wake_up_interruptible(&channel->rd_wait);
212 				if (!channel->rd_synchronous)
213 					queue_delayed_work(
214 						xillybus_wq,
215 						&channel->rd_workitem,
216 						XILLY_RX_TIMEOUT);
217 			}
218 
219 			break;
220 		case XILLYMSG_OPCODE_NONEMPTY:
221 			if ((msg_channel > ep->num_channels) ||
222 			    (msg_channel == 0) || (!msg_dir) ||
223 			    !ep->channels[msg_channel]->wr_supports_nonempty) {
224 				malformed_message(ep, &buf[i]);
225 				break;
226 			}
227 
228 			channel = ep->channels[msg_channel];
229 
230 			if (msg_bufno >= channel->num_wr_buffers) {
231 				malformed_message(ep, &buf[i]);
232 				break;
233 			}
234 			spin_lock(&channel->wr_spinlock);
235 			if (msg_bufno == channel->wr_host_buf_idx)
236 				channel->wr_ready = 1;
237 			spin_unlock(&channel->wr_spinlock);
238 
239 			wake_up_interruptible(&channel->wr_ready_wait);
240 
241 			break;
242 		case XILLYMSG_OPCODE_QUIESCEACK:
243 			ep->idtlen = msg_data;
244 			wake_up_interruptible(&ep->ep_wait);
245 
246 			break;
247 		case XILLYMSG_OPCODE_FIFOEOF:
248 			if ((msg_channel > ep->num_channels) ||
249 			    (msg_channel == 0) || (!msg_dir) ||
250 			    !ep->channels[msg_channel]->num_wr_buffers) {
251 				malformed_message(ep, &buf[i]);
252 				break;
253 			}
254 			channel = ep->channels[msg_channel];
255 			spin_lock(&channel->wr_spinlock);
256 			channel->wr_eof = msg_bufno;
257 			channel->wr_sleepy = 0;
258 
259 			channel->wr_hangup = channel->wr_empty &&
260 				(channel->wr_host_buf_idx == msg_bufno);
261 
262 			spin_unlock(&channel->wr_spinlock);
263 
264 			wake_up_interruptible(&channel->wr_wait);
265 
266 			break;
267 		case XILLYMSG_OPCODE_FATAL_ERROR:
268 			ep->fatal_error = 1;
269 			wake_up_interruptible(&ep->ep_wait); /* For select() */
270 			dev_err(ep->dev,
271 				"FPGA reported a fatal error. This means that the low-level communication with the device has failed. This hardware problem is most likely unrelated to Xillybus (neither kernel module nor FPGA core), but reports are still welcome. All I/O is aborted.\n");
272 			break;
273 		default:
274 			malformed_message(ep, &buf[i]);
275 			break;
276 		}
277 	}
278 
279 	ep->ephw->hw_sync_sgl_for_device(ep,
280 					 ep->msgbuf_dma_addr,
281 					 ep->msg_buf_size,
282 					 DMA_FROM_DEVICE);
283 
284 	ep->msg_counter = (ep->msg_counter + 1) & 0xf;
285 	ep->failed_messages = 0;
286 	iowrite32(0x03, ep->registers + fpga_msg_ctrl_reg); /* Message ACK */
287 
288 	return IRQ_HANDLED;
289 }
290 EXPORT_SYMBOL(xillybus_isr);
291 
292 /*
293  * A few trivial memory management functions.
294  * NOTE: These functions are used only on probe and remove, and therefore
295  * no locks are applied!
296  */
297 
298 static void xillybus_autoflush(struct work_struct *work);
299 
300 struct xilly_alloc_state {
301 	void *salami;
302 	int left_of_salami;
303 	int nbuffer;
304 	enum dma_data_direction direction;
305 	u32 regdirection;
306 };
307 
308 static int xilly_get_dma_buffers(struct xilly_endpoint *ep,
309 				 struct xilly_alloc_state *s,
310 				 struct xilly_buffer **buffers,
311 				 int bufnum, int bytebufsize)
312 {
313 	int i, rc;
314 	dma_addr_t dma_addr;
315 	struct device *dev = ep->dev;
316 	struct xilly_buffer *this_buffer = NULL; /* Init to silence warning */
317 
318 	if (buffers) { /* Not the message buffer */
319 		this_buffer = devm_kcalloc(dev, bufnum,
320 					   sizeof(struct xilly_buffer),
321 					   GFP_KERNEL);
322 		if (!this_buffer)
323 			return -ENOMEM;
324 	}
325 
326 	for (i = 0; i < bufnum; i++) {
327 		/*
328 		 * Buffers are expected in descending size order, so there
329 		 * is either enough space for this buffer or none at all.
330 		 */
331 
332 		if ((s->left_of_salami < bytebufsize) &&
333 		    (s->left_of_salami > 0)) {
334 			dev_err(ep->dev,
335 				"Corrupt buffer allocation in IDT. Aborting.\n");
336 			return -ENODEV;
337 		}
338 
339 		if (s->left_of_salami == 0) {
340 			int allocorder, allocsize;
341 
342 			allocsize = PAGE_SIZE;
343 			allocorder = 0;
344 			while (bytebufsize > allocsize) {
345 				allocsize *= 2;
346 				allocorder++;
347 			}
348 
349 			s->salami = (void *) devm_get_free_pages(
350 				dev,
351 				GFP_KERNEL | __GFP_DMA32 | __GFP_ZERO,
352 				allocorder);
353 			if (!s->salami)
354 				return -ENOMEM;
355 
356 			s->left_of_salami = allocsize;
357 		}
358 
359 		rc = ep->ephw->map_single(ep, s->salami,
360 					  bytebufsize, s->direction,
361 					  &dma_addr);
362 		if (rc)
363 			return rc;
364 
365 		iowrite32((u32) (dma_addr & 0xffffffff),
366 			  ep->registers + fpga_dma_bufaddr_lowaddr_reg);
367 		iowrite32(((u32) ((((u64) dma_addr) >> 32) & 0xffffffff)),
368 			  ep->registers + fpga_dma_bufaddr_highaddr_reg);
369 
370 		if (buffers) { /* Not the message buffer */
371 			this_buffer->addr = s->salami;
372 			this_buffer->dma_addr = dma_addr;
373 			buffers[i] = this_buffer++;
374 
375 			iowrite32(s->regdirection | s->nbuffer++,
376 				  ep->registers + fpga_dma_bufno_reg);
377 		} else {
378 			ep->msgbuf_addr = s->salami;
379 			ep->msgbuf_dma_addr = dma_addr;
380 			ep->msg_buf_size = bytebufsize;
381 
382 			iowrite32(s->regdirection,
383 				  ep->registers + fpga_dma_bufno_reg);
384 		}
385 
386 		s->left_of_salami -= bytebufsize;
387 		s->salami += bytebufsize;
388 	}
389 	return 0;
390 }
391 
392 static int xilly_setupchannels(struct xilly_endpoint *ep,
393 			       unsigned char *chandesc,
394 			       int entries)
395 {
396 	struct device *dev = ep->dev;
397 	int i, entry, rc;
398 	struct xilly_channel *channel;
399 	int channelnum, bufnum, bufsize, format, is_writebuf;
400 	int bytebufsize;
401 	int synchronous, allowpartial, exclusive_open, seekable;
402 	int supports_nonempty;
403 	int msg_buf_done = 0;
404 
405 	struct xilly_alloc_state rd_alloc = {
406 		.salami = NULL,
407 		.left_of_salami = 0,
408 		.nbuffer = 1,
409 		.direction = DMA_TO_DEVICE,
410 		.regdirection = 0,
411 	};
412 
413 	struct xilly_alloc_state wr_alloc = {
414 		.salami = NULL,
415 		.left_of_salami = 0,
416 		.nbuffer = 1,
417 		.direction = DMA_FROM_DEVICE,
418 		.regdirection = 0x80000000,
419 	};
420 
421 	channel = devm_kcalloc(dev, ep->num_channels,
422 			       sizeof(struct xilly_channel), GFP_KERNEL);
423 	if (!channel)
424 		return -ENOMEM;
425 
426 	ep->channels = devm_kcalloc(dev, ep->num_channels + 1,
427 				    sizeof(struct xilly_channel *),
428 				    GFP_KERNEL);
429 	if (!ep->channels)
430 		return -ENOMEM;
431 
432 	ep->channels[0] = NULL; /* Channel 0 is message buf. */
433 
434 	/* Initialize all channels with defaults */
435 
436 	for (i = 1; i <= ep->num_channels; i++) {
437 		channel->wr_buffers = NULL;
438 		channel->rd_buffers = NULL;
439 		channel->num_wr_buffers = 0;
440 		channel->num_rd_buffers = 0;
441 		channel->wr_fpga_buf_idx = -1;
442 		channel->wr_host_buf_idx = 0;
443 		channel->wr_host_buf_pos = 0;
444 		channel->wr_empty = 1;
445 		channel->wr_ready = 0;
446 		channel->wr_sleepy = 1;
447 		channel->rd_fpga_buf_idx = 0;
448 		channel->rd_host_buf_idx = 0;
449 		channel->rd_host_buf_pos = 0;
450 		channel->rd_full = 0;
451 		channel->wr_ref_count = 0;
452 		channel->rd_ref_count = 0;
453 
454 		spin_lock_init(&channel->wr_spinlock);
455 		spin_lock_init(&channel->rd_spinlock);
456 		mutex_init(&channel->wr_mutex);
457 		mutex_init(&channel->rd_mutex);
458 		init_waitqueue_head(&channel->rd_wait);
459 		init_waitqueue_head(&channel->wr_wait);
460 		init_waitqueue_head(&channel->wr_ready_wait);
461 
462 		INIT_DELAYED_WORK(&channel->rd_workitem, xillybus_autoflush);
463 
464 		channel->endpoint = ep;
465 		channel->chan_num = i;
466 
467 		channel->log2_element_size = 0;
468 
469 		ep->channels[i] = channel++;
470 	}
471 
472 	for (entry = 0; entry < entries; entry++, chandesc += 4) {
473 		struct xilly_buffer **buffers = NULL;
474 
475 		is_writebuf = chandesc[0] & 0x01;
476 		channelnum = (chandesc[0] >> 1) | ((chandesc[1] & 0x0f) << 7);
477 		format = (chandesc[1] >> 4) & 0x03;
478 		allowpartial = (chandesc[1] >> 6) & 0x01;
479 		synchronous = (chandesc[1] >> 7) & 0x01;
480 		bufsize = 1 << (chandesc[2] & 0x1f);
481 		bufnum = 1 << (chandesc[3] & 0x0f);
482 		exclusive_open = (chandesc[2] >> 7) & 0x01;
483 		seekable = (chandesc[2] >> 6) & 0x01;
484 		supports_nonempty = (chandesc[2] >> 5) & 0x01;
485 
486 		if ((channelnum > ep->num_channels) ||
487 		    ((channelnum == 0) && !is_writebuf)) {
488 			dev_err(ep->dev,
489 				"IDT requests channel out of range. Aborting.\n");
490 			return -ENODEV;
491 		}
492 
493 		channel = ep->channels[channelnum]; /* NULL for msg channel */
494 
495 		if (!is_writebuf || channelnum > 0) {
496 			channel->log2_element_size = ((format > 2) ?
497 						      2 : format);
498 
499 			bytebufsize = bufsize *
500 				(1 << channel->log2_element_size);
501 
502 			buffers = devm_kcalloc(dev, bufnum,
503 					       sizeof(struct xilly_buffer *),
504 					       GFP_KERNEL);
505 			if (!buffers)
506 				return -ENOMEM;
507 		} else {
508 			bytebufsize = bufsize << 2;
509 		}
510 
511 		if (!is_writebuf) {
512 			channel->num_rd_buffers = bufnum;
513 			channel->rd_buf_size = bytebufsize;
514 			channel->rd_allow_partial = allowpartial;
515 			channel->rd_synchronous = synchronous;
516 			channel->rd_exclusive_open = exclusive_open;
517 			channel->seekable = seekable;
518 
519 			channel->rd_buffers = buffers;
520 			rc = xilly_get_dma_buffers(ep, &rd_alloc, buffers,
521 						   bufnum, bytebufsize);
522 		} else if (channelnum > 0) {
523 			channel->num_wr_buffers = bufnum;
524 			channel->wr_buf_size = bytebufsize;
525 
526 			channel->seekable = seekable;
527 			channel->wr_supports_nonempty = supports_nonempty;
528 
529 			channel->wr_allow_partial = allowpartial;
530 			channel->wr_synchronous = synchronous;
531 			channel->wr_exclusive_open = exclusive_open;
532 
533 			channel->wr_buffers = buffers;
534 			rc = xilly_get_dma_buffers(ep, &wr_alloc, buffers,
535 						   bufnum, bytebufsize);
536 		} else {
537 			rc = xilly_get_dma_buffers(ep, &wr_alloc, NULL,
538 						   bufnum, bytebufsize);
539 			msg_buf_done++;
540 		}
541 
542 		if (rc)
543 			return -ENOMEM;
544 	}
545 
546 	if (!msg_buf_done) {
547 		dev_err(ep->dev,
548 			"Corrupt IDT: No message buffer. Aborting.\n");
549 		return -ENODEV;
550 	}
551 	return 0;
552 }
553 
554 static int xilly_scan_idt(struct xilly_endpoint *endpoint,
555 			  struct xilly_idt_handle *idt_handle)
556 {
557 	int count = 0;
558 	unsigned char *idt = endpoint->channels[1]->wr_buffers[0]->addr;
559 	unsigned char *end_of_idt = idt + endpoint->idtlen - 4;
560 	unsigned char *scan;
561 	int len;
562 
563 	scan = idt + 1;
564 	idt_handle->names = scan;
565 
566 	while ((scan <= end_of_idt) && *scan) {
567 		while ((scan <= end_of_idt) && *scan++)
568 			/* Do nothing, just scan thru string */;
569 		count++;
570 	}
571 
572 	idt_handle->names_len = scan - idt_handle->names;
573 
574 	scan++;
575 
576 	if (scan > end_of_idt) {
577 		dev_err(endpoint->dev,
578 			"IDT device name list overflow. Aborting.\n");
579 		return -ENODEV;
580 	}
581 	idt_handle->chandesc = scan;
582 
583 	len = endpoint->idtlen - (3 + ((int) (scan - idt)));
584 
585 	if (len & 0x03) {
586 		dev_err(endpoint->dev,
587 			"Corrupt IDT device name list. Aborting.\n");
588 		return -ENODEV;
589 	}
590 
591 	idt_handle->entries = len >> 2;
592 	endpoint->num_channels = count;
593 
594 	return 0;
595 }
596 
597 static int xilly_obtain_idt(struct xilly_endpoint *endpoint)
598 {
599 	struct xilly_channel *channel;
600 	unsigned char *version;
601 	long t;
602 
603 	channel = endpoint->channels[1]; /* This should be generated ad-hoc */
604 
605 	channel->wr_sleepy = 1;
606 
607 	iowrite32(1 |
608 		  (3 << 24), /* Opcode 3 for channel 0 = Send IDT */
609 		  endpoint->registers + fpga_buf_ctrl_reg);
610 
611 	t = wait_event_interruptible_timeout(channel->wr_wait,
612 					     (!channel->wr_sleepy),
613 					     XILLY_TIMEOUT);
614 
615 	if (t <= 0) {
616 		dev_err(endpoint->dev, "Failed to obtain IDT. Aborting.\n");
617 
618 		if (endpoint->fatal_error)
619 			return -EIO;
620 
621 		return -ENODEV;
622 	}
623 
624 	endpoint->ephw->hw_sync_sgl_for_cpu(
625 		channel->endpoint,
626 		channel->wr_buffers[0]->dma_addr,
627 		channel->wr_buf_size,
628 		DMA_FROM_DEVICE);
629 
630 	if (channel->wr_buffers[0]->end_offset != endpoint->idtlen) {
631 		dev_err(endpoint->dev,
632 			"IDT length mismatch (%d != %d). Aborting.\n",
633 			channel->wr_buffers[0]->end_offset, endpoint->idtlen);
634 		return -ENODEV;
635 	}
636 
637 	if (crc32_le(~0, channel->wr_buffers[0]->addr,
638 		     endpoint->idtlen+1) != 0) {
639 		dev_err(endpoint->dev, "IDT failed CRC check. Aborting.\n");
640 		return -ENODEV;
641 	}
642 
643 	version = channel->wr_buffers[0]->addr;
644 
645 	/* Check version number. Reject anything above 0x82. */
646 	if (*version > 0x82) {
647 		dev_err(endpoint->dev,
648 			"No support for IDT version 0x%02x. Maybe the xillybus driver needs an upgrade. Aborting.\n",
649 			*version);
650 		return -ENODEV;
651 	}
652 
653 	return 0;
654 }
655 
656 static ssize_t xillybus_read(struct file *filp, char __user *userbuf,
657 			     size_t count, loff_t *f_pos)
658 {
659 	ssize_t rc;
660 	unsigned long flags;
661 	int bytes_done = 0;
662 	int no_time_left = 0;
663 	long deadline, left_to_sleep;
664 	struct xilly_channel *channel = filp->private_data;
665 
666 	int empty, reached_eof, exhausted, ready;
667 	/* Initializations are there only to silence warnings */
668 
669 	int howmany = 0, bufpos = 0, bufidx = 0, bufferdone = 0;
670 	int waiting_bufidx;
671 
672 	if (channel->endpoint->fatal_error)
673 		return -EIO;
674 
675 	deadline = jiffies + 1 + XILLY_RX_TIMEOUT;
676 
677 	rc = mutex_lock_interruptible(&channel->wr_mutex);
678 	if (rc)
679 		return rc;
680 
681 	while (1) { /* Note that we may drop mutex within this loop */
682 		int bytes_to_do = count - bytes_done;
683 
684 		spin_lock_irqsave(&channel->wr_spinlock, flags);
685 
686 		empty = channel->wr_empty;
687 		ready = !empty || channel->wr_ready;
688 
689 		if (!empty) {
690 			bufidx = channel->wr_host_buf_idx;
691 			bufpos = channel->wr_host_buf_pos;
692 			howmany = ((channel->wr_buffers[bufidx]->end_offset
693 				    + 1) << channel->log2_element_size)
694 				- bufpos;
695 
696 			/* Update wr_host_* to its post-operation state */
697 			if (howmany > bytes_to_do) {
698 				bufferdone = 0;
699 
700 				howmany = bytes_to_do;
701 				channel->wr_host_buf_pos += howmany;
702 			} else {
703 				bufferdone = 1;
704 
705 				channel->wr_host_buf_pos = 0;
706 
707 				if (bufidx == channel->wr_fpga_buf_idx) {
708 					channel->wr_empty = 1;
709 					channel->wr_sleepy = 1;
710 					channel->wr_ready = 0;
711 				}
712 
713 				if (bufidx >= (channel->num_wr_buffers - 1))
714 					channel->wr_host_buf_idx = 0;
715 				else
716 					channel->wr_host_buf_idx++;
717 			}
718 		}
719 
720 		/*
721 		 * Marking our situation after the possible changes above,
722 		 * for use after releasing the spinlock.
723 		 *
724 		 * empty = empty before change
725 		 * exhasted = empty after possible change
726 		 */
727 
728 		reached_eof = channel->wr_empty &&
729 			(channel->wr_host_buf_idx == channel->wr_eof);
730 		channel->wr_hangup = reached_eof;
731 		exhausted = channel->wr_empty;
732 		waiting_bufidx = channel->wr_host_buf_idx;
733 
734 		spin_unlock_irqrestore(&channel->wr_spinlock, flags);
735 
736 		if (!empty) { /* Go on, now without the spinlock */
737 
738 			if (bufpos == 0) /* Position zero means it's virgin */
739 				channel->endpoint->ephw->hw_sync_sgl_for_cpu(
740 					channel->endpoint,
741 					channel->wr_buffers[bufidx]->dma_addr,
742 					channel->wr_buf_size,
743 					DMA_FROM_DEVICE);
744 
745 			if (copy_to_user(
746 				    userbuf,
747 				    channel->wr_buffers[bufidx]->addr
748 				    + bufpos, howmany))
749 				rc = -EFAULT;
750 
751 			userbuf += howmany;
752 			bytes_done += howmany;
753 
754 			if (bufferdone) {
755 				channel->endpoint->ephw->hw_sync_sgl_for_device(
756 					channel->endpoint,
757 					channel->wr_buffers[bufidx]->dma_addr,
758 					channel->wr_buf_size,
759 					DMA_FROM_DEVICE);
760 
761 				/*
762 				 * Tell FPGA the buffer is done with. It's an
763 				 * atomic operation to the FPGA, so what
764 				 * happens with other channels doesn't matter,
765 				 * and the certain channel is protected with
766 				 * the channel-specific mutex.
767 				 */
768 
769 				iowrite32(1 | (channel->chan_num << 1) |
770 					  (bufidx << 12),
771 					  channel->endpoint->registers +
772 					  fpga_buf_ctrl_reg);
773 			}
774 
775 			if (rc) {
776 				mutex_unlock(&channel->wr_mutex);
777 				return rc;
778 			}
779 		}
780 
781 		/* This includes a zero-count return = EOF */
782 		if ((bytes_done >= count) || reached_eof)
783 			break;
784 
785 		if (!exhausted)
786 			continue; /* More in RAM buffer(s)? Just go on. */
787 
788 		if ((bytes_done > 0) &&
789 		    (no_time_left ||
790 		     (channel->wr_synchronous && channel->wr_allow_partial)))
791 			break;
792 
793 		/*
794 		 * Nonblocking read: The "ready" flag tells us that the FPGA
795 		 * has data to send. In non-blocking mode, if it isn't on,
796 		 * just return. But if there is, we jump directly to the point
797 		 * where we ask for the FPGA to send all it has, and wait
798 		 * until that data arrives. So in a sense, we *do* block in
799 		 * nonblocking mode, but only for a very short time.
800 		 */
801 
802 		if (!no_time_left && (filp->f_flags & O_NONBLOCK)) {
803 			if (bytes_done > 0)
804 				break;
805 
806 			if (ready)
807 				goto desperate;
808 
809 			rc = -EAGAIN;
810 			break;
811 		}
812 
813 		if (!no_time_left || (bytes_done > 0)) {
814 			/*
815 			 * Note that in case of an element-misaligned read
816 			 * request, offsetlimit will include the last element,
817 			 * which will be partially read from.
818 			 */
819 			int offsetlimit = ((count - bytes_done) - 1) >>
820 				channel->log2_element_size;
821 			int buf_elements = channel->wr_buf_size >>
822 				channel->log2_element_size;
823 
824 			/*
825 			 * In synchronous mode, always send an offset limit.
826 			 * Just don't send a value too big.
827 			 */
828 
829 			if (channel->wr_synchronous) {
830 				/* Don't request more than one buffer */
831 				if (channel->wr_allow_partial &&
832 				    (offsetlimit >= buf_elements))
833 					offsetlimit = buf_elements - 1;
834 
835 				/* Don't request more than all buffers */
836 				if (!channel->wr_allow_partial &&
837 				    (offsetlimit >=
838 				     (buf_elements * channel->num_wr_buffers)))
839 					offsetlimit = buf_elements *
840 						channel->num_wr_buffers - 1;
841 			}
842 
843 			/*
844 			 * In asynchronous mode, force early flush of a buffer
845 			 * only if that will allow returning a full count. The
846 			 * "offsetlimit < ( ... )" rather than "<=" excludes
847 			 * requesting a full buffer, which would obviously
848 			 * cause a buffer transmission anyhow
849 			 */
850 
851 			if (channel->wr_synchronous ||
852 			    (offsetlimit < (buf_elements - 1))) {
853 				mutex_lock(&channel->endpoint->register_mutex);
854 
855 				iowrite32(offsetlimit,
856 					  channel->endpoint->registers +
857 					  fpga_buf_offset_reg);
858 
859 				iowrite32(1 | (channel->chan_num << 1) |
860 					  (2 << 24) |  /* 2 = offset limit */
861 					  (waiting_bufidx << 12),
862 					  channel->endpoint->registers +
863 					  fpga_buf_ctrl_reg);
864 
865 				mutex_unlock(&channel->endpoint->
866 					     register_mutex);
867 			}
868 		}
869 
870 		/*
871 		 * If partial completion is disallowed, there is no point in
872 		 * timeout sleeping. Neither if no_time_left is set and
873 		 * there's no data.
874 		 */
875 
876 		if (!channel->wr_allow_partial ||
877 		    (no_time_left && (bytes_done == 0))) {
878 			/*
879 			 * This do-loop will run more than once if another
880 			 * thread reasserted wr_sleepy before we got the mutex
881 			 * back, so we try again.
882 			 */
883 
884 			do {
885 				mutex_unlock(&channel->wr_mutex);
886 
887 				if (wait_event_interruptible(
888 					    channel->wr_wait,
889 					    (!channel->wr_sleepy)))
890 					goto interrupted;
891 
892 				if (mutex_lock_interruptible(
893 					    &channel->wr_mutex))
894 					goto interrupted;
895 			} while (channel->wr_sleepy);
896 
897 			continue;
898 
899 interrupted: /* Mutex is not held if got here */
900 			if (channel->endpoint->fatal_error)
901 				return -EIO;
902 			if (bytes_done)
903 				return bytes_done;
904 			if (filp->f_flags & O_NONBLOCK)
905 				return -EAGAIN; /* Don't admit snoozing */
906 			return -EINTR;
907 		}
908 
909 		left_to_sleep = deadline - ((long) jiffies);
910 
911 		/*
912 		 * If our time is out, skip the waiting. We may miss wr_sleepy
913 		 * being deasserted but hey, almost missing the train is like
914 		 * missing it.
915 		 */
916 
917 		if (left_to_sleep > 0) {
918 			left_to_sleep =
919 				wait_event_interruptible_timeout(
920 					channel->wr_wait,
921 					(!channel->wr_sleepy),
922 					left_to_sleep);
923 
924 			if (left_to_sleep > 0) /* wr_sleepy deasserted */
925 				continue;
926 
927 			if (left_to_sleep < 0) { /* Interrupt */
928 				mutex_unlock(&channel->wr_mutex);
929 				if (channel->endpoint->fatal_error)
930 					return -EIO;
931 				if (bytes_done)
932 					return bytes_done;
933 				return -EINTR;
934 			}
935 		}
936 
937 desperate:
938 		no_time_left = 1; /* We're out of sleeping time. Desperate! */
939 
940 		if (bytes_done == 0) {
941 			/*
942 			 * Reaching here means that we allow partial return,
943 			 * that we've run out of time, and that we have
944 			 * nothing to return.
945 			 * So tell the FPGA to send anything it has or gets.
946 			 */
947 
948 			iowrite32(1 | (channel->chan_num << 1) |
949 				  (3 << 24) |  /* Opcode 3, flush it all! */
950 				  (waiting_bufidx << 12),
951 				  channel->endpoint->registers +
952 				  fpga_buf_ctrl_reg);
953 		}
954 
955 		/*
956 		 * Reaching here means that we *do* have data in the buffer,
957 		 * but the "partial" flag disallows returning less than
958 		 * required. And we don't have as much. So loop again,
959 		 * which is likely to end up blocking indefinitely until
960 		 * enough data has arrived.
961 		 */
962 	}
963 
964 	mutex_unlock(&channel->wr_mutex);
965 
966 	if (channel->endpoint->fatal_error)
967 		return -EIO;
968 
969 	if (rc)
970 		return rc;
971 
972 	return bytes_done;
973 }
974 
975 /*
976  * The timeout argument takes values as follows:
977  *  >0 : Flush with timeout
978  * ==0 : Flush, and wait idefinitely for the flush to complete
979  *  <0 : Autoflush: Flush only if there's a single buffer occupied
980  */
981 
982 static int xillybus_myflush(struct xilly_channel *channel, long timeout)
983 {
984 	int rc;
985 	unsigned long flags;
986 
987 	int end_offset_plus1;
988 	int bufidx, bufidx_minus1;
989 	int i;
990 	int empty;
991 	int new_rd_host_buf_pos;
992 
993 	if (channel->endpoint->fatal_error)
994 		return -EIO;
995 	rc = mutex_lock_interruptible(&channel->rd_mutex);
996 	if (rc)
997 		return rc;
998 
999 	/*
1000 	 * Don't flush a closed channel. This can happen when the work queued
1001 	 * autoflush thread fires off after the file has closed. This is not
1002 	 * an error, just something to dismiss.
1003 	 */
1004 
1005 	if (!channel->rd_ref_count)
1006 		goto done;
1007 
1008 	bufidx = channel->rd_host_buf_idx;
1009 
1010 	bufidx_minus1 = (bufidx == 0) ?
1011 		channel->num_rd_buffers - 1 :
1012 		bufidx - 1;
1013 
1014 	end_offset_plus1 = channel->rd_host_buf_pos >>
1015 		channel->log2_element_size;
1016 
1017 	new_rd_host_buf_pos = channel->rd_host_buf_pos -
1018 		(end_offset_plus1 << channel->log2_element_size);
1019 
1020 	/* Submit the current buffer if it's nonempty */
1021 	if (end_offset_plus1) {
1022 		unsigned char *tail = channel->rd_buffers[bufidx]->addr +
1023 			(end_offset_plus1 << channel->log2_element_size);
1024 
1025 		/* Copy  unflushed data, so we can put it in next buffer */
1026 		for (i = 0; i < new_rd_host_buf_pos; i++)
1027 			channel->rd_leftovers[i] = *tail++;
1028 
1029 		spin_lock_irqsave(&channel->rd_spinlock, flags);
1030 
1031 		/* Autoflush only if a single buffer is occupied */
1032 
1033 		if ((timeout < 0) &&
1034 		    (channel->rd_full ||
1035 		     (bufidx_minus1 != channel->rd_fpga_buf_idx))) {
1036 			spin_unlock_irqrestore(&channel->rd_spinlock, flags);
1037 			/*
1038 			 * A new work item may be queued by the ISR exactly
1039 			 * now, since the execution of a work item allows the
1040 			 * queuing of a new one while it's running.
1041 			 */
1042 			goto done;
1043 		}
1044 
1045 		/* The 4th element is never needed for data, so it's a flag */
1046 		channel->rd_leftovers[3] = (new_rd_host_buf_pos != 0);
1047 
1048 		/* Set up rd_full to reflect a certain moment's state */
1049 
1050 		if (bufidx == channel->rd_fpga_buf_idx)
1051 			channel->rd_full = 1;
1052 		spin_unlock_irqrestore(&channel->rd_spinlock, flags);
1053 
1054 		if (bufidx >= (channel->num_rd_buffers - 1))
1055 			channel->rd_host_buf_idx = 0;
1056 		else
1057 			channel->rd_host_buf_idx++;
1058 
1059 		channel->endpoint->ephw->hw_sync_sgl_for_device(
1060 			channel->endpoint,
1061 			channel->rd_buffers[bufidx]->dma_addr,
1062 			channel->rd_buf_size,
1063 			DMA_TO_DEVICE);
1064 
1065 		mutex_lock(&channel->endpoint->register_mutex);
1066 
1067 		iowrite32(end_offset_plus1 - 1,
1068 			  channel->endpoint->registers + fpga_buf_offset_reg);
1069 
1070 		iowrite32((channel->chan_num << 1) | /* Channel ID */
1071 			  (2 << 24) |  /* Opcode 2, submit buffer */
1072 			  (bufidx << 12),
1073 			  channel->endpoint->registers + fpga_buf_ctrl_reg);
1074 
1075 		mutex_unlock(&channel->endpoint->register_mutex);
1076 	} else if (bufidx == 0) {
1077 		bufidx = channel->num_rd_buffers - 1;
1078 	} else {
1079 		bufidx--;
1080 	}
1081 
1082 	channel->rd_host_buf_pos = new_rd_host_buf_pos;
1083 
1084 	if (timeout < 0)
1085 		goto done; /* Autoflush */
1086 
1087 	/*
1088 	 * bufidx is now the last buffer written to (or equal to
1089 	 * rd_fpga_buf_idx if buffer was never written to), and
1090 	 * channel->rd_host_buf_idx the one after it.
1091 	 *
1092 	 * If bufidx == channel->rd_fpga_buf_idx we're either empty or full.
1093 	 */
1094 
1095 	while (1) { /* Loop waiting for draining of buffers */
1096 		spin_lock_irqsave(&channel->rd_spinlock, flags);
1097 
1098 		if (bufidx != channel->rd_fpga_buf_idx)
1099 			channel->rd_full = 1; /*
1100 					       * Not really full,
1101 					       * but needs waiting.
1102 					       */
1103 
1104 		empty = !channel->rd_full;
1105 
1106 		spin_unlock_irqrestore(&channel->rd_spinlock, flags);
1107 
1108 		if (empty)
1109 			break;
1110 
1111 		/*
1112 		 * Indefinite sleep with mutex taken. With data waiting for
1113 		 * flushing user should not be surprised if open() for write
1114 		 * sleeps.
1115 		 */
1116 		if (timeout == 0)
1117 			wait_event_interruptible(channel->rd_wait,
1118 						 (!channel->rd_full));
1119 
1120 		else if (wait_event_interruptible_timeout(
1121 				 channel->rd_wait,
1122 				 (!channel->rd_full),
1123 				 timeout) == 0) {
1124 			dev_warn(channel->endpoint->dev,
1125 				 "Timed out while flushing. Output data may be lost.\n");
1126 
1127 			rc = -ETIMEDOUT;
1128 			break;
1129 		}
1130 
1131 		if (channel->rd_full) {
1132 			rc = -EINTR;
1133 			break;
1134 		}
1135 	}
1136 
1137 done:
1138 	mutex_unlock(&channel->rd_mutex);
1139 
1140 	if (channel->endpoint->fatal_error)
1141 		return -EIO;
1142 
1143 	return rc;
1144 }
1145 
1146 static int xillybus_flush(struct file *filp, fl_owner_t id)
1147 {
1148 	if (!(filp->f_mode & FMODE_WRITE))
1149 		return 0;
1150 
1151 	return xillybus_myflush(filp->private_data, HZ); /* 1 second timeout */
1152 }
1153 
1154 static void xillybus_autoflush(struct work_struct *work)
1155 {
1156 	struct delayed_work *workitem = container_of(
1157 		work, struct delayed_work, work);
1158 	struct xilly_channel *channel = container_of(
1159 		workitem, struct xilly_channel, rd_workitem);
1160 	int rc;
1161 
1162 	rc = xillybus_myflush(channel, -1);
1163 	if (rc == -EINTR)
1164 		dev_warn(channel->endpoint->dev,
1165 			 "Autoflush failed because work queue thread got a signal.\n");
1166 	else if (rc)
1167 		dev_err(channel->endpoint->dev,
1168 			"Autoflush failed under weird circumstances.\n");
1169 }
1170 
1171 static ssize_t xillybus_write(struct file *filp, const char __user *userbuf,
1172 			      size_t count, loff_t *f_pos)
1173 {
1174 	ssize_t rc;
1175 	unsigned long flags;
1176 	int bytes_done = 0;
1177 	struct xilly_channel *channel = filp->private_data;
1178 
1179 	int full, exhausted;
1180 	/* Initializations are there only to silence warnings */
1181 
1182 	int howmany = 0, bufpos = 0, bufidx = 0, bufferdone = 0;
1183 	int end_offset_plus1 = 0;
1184 
1185 	if (channel->endpoint->fatal_error)
1186 		return -EIO;
1187 
1188 	rc = mutex_lock_interruptible(&channel->rd_mutex);
1189 	if (rc)
1190 		return rc;
1191 
1192 	while (1) {
1193 		int bytes_to_do = count - bytes_done;
1194 
1195 		spin_lock_irqsave(&channel->rd_spinlock, flags);
1196 
1197 		full = channel->rd_full;
1198 
1199 		if (!full) {
1200 			bufidx = channel->rd_host_buf_idx;
1201 			bufpos = channel->rd_host_buf_pos;
1202 			howmany = channel->rd_buf_size - bufpos;
1203 
1204 			/*
1205 			 * Update rd_host_* to its state after this operation.
1206 			 * count=0 means committing the buffer immediately,
1207 			 * which is like flushing, but not necessarily block.
1208 			 */
1209 
1210 			if ((howmany > bytes_to_do) &&
1211 			    (count ||
1212 			     ((bufpos >> channel->log2_element_size) == 0))) {
1213 				bufferdone = 0;
1214 
1215 				howmany = bytes_to_do;
1216 				channel->rd_host_buf_pos += howmany;
1217 			} else {
1218 				bufferdone = 1;
1219 
1220 				if (count) {
1221 					end_offset_plus1 =
1222 						channel->rd_buf_size >>
1223 						channel->log2_element_size;
1224 					channel->rd_host_buf_pos = 0;
1225 				} else {
1226 					unsigned char *tail;
1227 					int i;
1228 
1229 					howmany = 0;
1230 
1231 					end_offset_plus1 = bufpos >>
1232 						channel->log2_element_size;
1233 
1234 					channel->rd_host_buf_pos -=
1235 						end_offset_plus1 <<
1236 						channel->log2_element_size;
1237 
1238 					tail = channel->
1239 						rd_buffers[bufidx]->addr +
1240 						(end_offset_plus1 <<
1241 						 channel->log2_element_size);
1242 
1243 					for (i = 0;
1244 					     i < channel->rd_host_buf_pos;
1245 					     i++)
1246 						channel->rd_leftovers[i] =
1247 							*tail++;
1248 				}
1249 
1250 				if (bufidx == channel->rd_fpga_buf_idx)
1251 					channel->rd_full = 1;
1252 
1253 				if (bufidx >= (channel->num_rd_buffers - 1))
1254 					channel->rd_host_buf_idx = 0;
1255 				else
1256 					channel->rd_host_buf_idx++;
1257 			}
1258 		}
1259 
1260 		/*
1261 		 * Marking our situation after the possible changes above,
1262 		 * for use  after releasing the spinlock.
1263 		 *
1264 		 * full = full before change
1265 		 * exhasted = full after possible change
1266 		 */
1267 
1268 		exhausted = channel->rd_full;
1269 
1270 		spin_unlock_irqrestore(&channel->rd_spinlock, flags);
1271 
1272 		if (!full) { /* Go on, now without the spinlock */
1273 			unsigned char *head =
1274 				channel->rd_buffers[bufidx]->addr;
1275 			int i;
1276 
1277 			if ((bufpos == 0) || /* Zero means it's virgin */
1278 			    (channel->rd_leftovers[3] != 0)) {
1279 				channel->endpoint->ephw->hw_sync_sgl_for_cpu(
1280 					channel->endpoint,
1281 					channel->rd_buffers[bufidx]->dma_addr,
1282 					channel->rd_buf_size,
1283 					DMA_TO_DEVICE);
1284 
1285 				/* Virgin, but leftovers are due */
1286 				for (i = 0; i < bufpos; i++)
1287 					*head++ = channel->rd_leftovers[i];
1288 
1289 				channel->rd_leftovers[3] = 0; /* Clear flag */
1290 			}
1291 
1292 			if (copy_from_user(
1293 				    channel->rd_buffers[bufidx]->addr + bufpos,
1294 				    userbuf, howmany))
1295 				rc = -EFAULT;
1296 
1297 			userbuf += howmany;
1298 			bytes_done += howmany;
1299 
1300 			if (bufferdone) {
1301 				channel->endpoint->ephw->hw_sync_sgl_for_device(
1302 					channel->endpoint,
1303 					channel->rd_buffers[bufidx]->dma_addr,
1304 					channel->rd_buf_size,
1305 					DMA_TO_DEVICE);
1306 
1307 				mutex_lock(&channel->endpoint->register_mutex);
1308 
1309 				iowrite32(end_offset_plus1 - 1,
1310 					  channel->endpoint->registers +
1311 					  fpga_buf_offset_reg);
1312 
1313 				iowrite32((channel->chan_num << 1) |
1314 					  (2 << 24) |  /* 2 = submit buffer */
1315 					  (bufidx << 12),
1316 					  channel->endpoint->registers +
1317 					  fpga_buf_ctrl_reg);
1318 
1319 				mutex_unlock(&channel->endpoint->
1320 					     register_mutex);
1321 
1322 				channel->rd_leftovers[3] =
1323 					(channel->rd_host_buf_pos != 0);
1324 			}
1325 
1326 			if (rc) {
1327 				mutex_unlock(&channel->rd_mutex);
1328 
1329 				if (channel->endpoint->fatal_error)
1330 					return -EIO;
1331 
1332 				if (!channel->rd_synchronous)
1333 					queue_delayed_work(
1334 						xillybus_wq,
1335 						&channel->rd_workitem,
1336 						XILLY_RX_TIMEOUT);
1337 
1338 				return rc;
1339 			}
1340 		}
1341 
1342 		if (bytes_done >= count)
1343 			break;
1344 
1345 		if (!exhausted)
1346 			continue; /* If there's more space, just go on */
1347 
1348 		if ((bytes_done > 0) && channel->rd_allow_partial)
1349 			break;
1350 
1351 		/*
1352 		 * Indefinite sleep with mutex taken. With data waiting for
1353 		 * flushing, user should not be surprised if open() for write
1354 		 * sleeps.
1355 		 */
1356 
1357 		if (filp->f_flags & O_NONBLOCK) {
1358 			rc = -EAGAIN;
1359 			break;
1360 		}
1361 
1362 		if (wait_event_interruptible(channel->rd_wait,
1363 					     (!channel->rd_full))) {
1364 			mutex_unlock(&channel->rd_mutex);
1365 
1366 			if (channel->endpoint->fatal_error)
1367 				return -EIO;
1368 
1369 			if (bytes_done)
1370 				return bytes_done;
1371 			return -EINTR;
1372 		}
1373 	}
1374 
1375 	mutex_unlock(&channel->rd_mutex);
1376 
1377 	if (!channel->rd_synchronous)
1378 		queue_delayed_work(xillybus_wq,
1379 				   &channel->rd_workitem,
1380 				   XILLY_RX_TIMEOUT);
1381 
1382 	if (channel->endpoint->fatal_error)
1383 		return -EIO;
1384 
1385 	if (rc)
1386 		return rc;
1387 
1388 	if ((channel->rd_synchronous) && (bytes_done > 0)) {
1389 		rc = xillybus_myflush(filp->private_data, 0); /* No timeout */
1390 
1391 		if (rc && (rc != -EINTR))
1392 			return rc;
1393 	}
1394 
1395 	return bytes_done;
1396 }
1397 
1398 static int xillybus_open(struct inode *inode, struct file *filp)
1399 {
1400 	int rc;
1401 	unsigned long flags;
1402 	struct xilly_endpoint *endpoint;
1403 	struct xilly_channel *channel;
1404 	int index;
1405 
1406 	rc = xillybus_find_inode(inode, (void **)&endpoint, &index);
1407 	if (rc)
1408 		return rc;
1409 
1410 	if (endpoint->fatal_error)
1411 		return -EIO;
1412 
1413 	channel = endpoint->channels[1 + index];
1414 	filp->private_data = channel;
1415 
1416 	/*
1417 	 * It gets complicated because:
1418 	 * 1. We don't want to take a mutex we don't have to
1419 	 * 2. We don't want to open one direction if the other will fail.
1420 	 */
1421 
1422 	if ((filp->f_mode & FMODE_READ) && (!channel->num_wr_buffers))
1423 		return -ENODEV;
1424 
1425 	if ((filp->f_mode & FMODE_WRITE) && (!channel->num_rd_buffers))
1426 		return -ENODEV;
1427 
1428 	if ((filp->f_mode & FMODE_READ) && (filp->f_flags & O_NONBLOCK) &&
1429 	    (channel->wr_synchronous || !channel->wr_allow_partial ||
1430 	     !channel->wr_supports_nonempty)) {
1431 		dev_err(endpoint->dev,
1432 			"open() failed: O_NONBLOCK not allowed for read on this device\n");
1433 		return -ENODEV;
1434 	}
1435 
1436 	if ((filp->f_mode & FMODE_WRITE) && (filp->f_flags & O_NONBLOCK) &&
1437 	    (channel->rd_synchronous || !channel->rd_allow_partial)) {
1438 		dev_err(endpoint->dev,
1439 			"open() failed: O_NONBLOCK not allowed for write on this device\n");
1440 		return -ENODEV;
1441 	}
1442 
1443 	/*
1444 	 * Note: open() may block on getting mutexes despite O_NONBLOCK.
1445 	 * This shouldn't occur normally, since multiple open of the same
1446 	 * file descriptor is almost always prohibited anyhow
1447 	 * (*_exclusive_open is normally set in real-life systems).
1448 	 */
1449 
1450 	if (filp->f_mode & FMODE_READ) {
1451 		rc = mutex_lock_interruptible(&channel->wr_mutex);
1452 		if (rc)
1453 			return rc;
1454 	}
1455 
1456 	if (filp->f_mode & FMODE_WRITE) {
1457 		rc = mutex_lock_interruptible(&channel->rd_mutex);
1458 		if (rc)
1459 			goto unlock_wr;
1460 	}
1461 
1462 	if ((filp->f_mode & FMODE_READ) &&
1463 	    (channel->wr_ref_count != 0) &&
1464 	    (channel->wr_exclusive_open)) {
1465 		rc = -EBUSY;
1466 		goto unlock;
1467 	}
1468 
1469 	if ((filp->f_mode & FMODE_WRITE) &&
1470 	    (channel->rd_ref_count != 0) &&
1471 	    (channel->rd_exclusive_open)) {
1472 		rc = -EBUSY;
1473 		goto unlock;
1474 	}
1475 
1476 	if (filp->f_mode & FMODE_READ) {
1477 		if (channel->wr_ref_count == 0) { /* First open of file */
1478 			/* Move the host to first buffer */
1479 			spin_lock_irqsave(&channel->wr_spinlock, flags);
1480 			channel->wr_host_buf_idx = 0;
1481 			channel->wr_host_buf_pos = 0;
1482 			channel->wr_fpga_buf_idx = -1;
1483 			channel->wr_empty = 1;
1484 			channel->wr_ready = 0;
1485 			channel->wr_sleepy = 1;
1486 			channel->wr_eof = -1;
1487 			channel->wr_hangup = 0;
1488 
1489 			spin_unlock_irqrestore(&channel->wr_spinlock, flags);
1490 
1491 			iowrite32(1 | (channel->chan_num << 1) |
1492 				  (4 << 24) |  /* Opcode 4, open channel */
1493 				  ((channel->wr_synchronous & 1) << 23),
1494 				  channel->endpoint->registers +
1495 				  fpga_buf_ctrl_reg);
1496 		}
1497 
1498 		channel->wr_ref_count++;
1499 	}
1500 
1501 	if (filp->f_mode & FMODE_WRITE) {
1502 		if (channel->rd_ref_count == 0) { /* First open of file */
1503 			/* Move the host to first buffer */
1504 			spin_lock_irqsave(&channel->rd_spinlock, flags);
1505 			channel->rd_host_buf_idx = 0;
1506 			channel->rd_host_buf_pos = 0;
1507 			channel->rd_leftovers[3] = 0; /* No leftovers. */
1508 			channel->rd_fpga_buf_idx = channel->num_rd_buffers - 1;
1509 			channel->rd_full = 0;
1510 
1511 			spin_unlock_irqrestore(&channel->rd_spinlock, flags);
1512 
1513 			iowrite32((channel->chan_num << 1) |
1514 				  (4 << 24),   /* Opcode 4, open channel */
1515 				  channel->endpoint->registers +
1516 				  fpga_buf_ctrl_reg);
1517 		}
1518 
1519 		channel->rd_ref_count++;
1520 	}
1521 
1522 unlock:
1523 	if (filp->f_mode & FMODE_WRITE)
1524 		mutex_unlock(&channel->rd_mutex);
1525 unlock_wr:
1526 	if (filp->f_mode & FMODE_READ)
1527 		mutex_unlock(&channel->wr_mutex);
1528 
1529 	if (!rc && (!channel->seekable))
1530 		return nonseekable_open(inode, filp);
1531 
1532 	return rc;
1533 }
1534 
1535 static int xillybus_release(struct inode *inode, struct file *filp)
1536 {
1537 	unsigned long flags;
1538 	struct xilly_channel *channel = filp->private_data;
1539 
1540 	int buf_idx;
1541 	int eof;
1542 
1543 	if (channel->endpoint->fatal_error)
1544 		return -EIO;
1545 
1546 	if (filp->f_mode & FMODE_WRITE) {
1547 		mutex_lock(&channel->rd_mutex);
1548 
1549 		channel->rd_ref_count--;
1550 
1551 		if (channel->rd_ref_count == 0) {
1552 			/*
1553 			 * We rely on the kernel calling flush()
1554 			 * before we get here.
1555 			 */
1556 
1557 			iowrite32((channel->chan_num << 1) | /* Channel ID */
1558 				  (5 << 24),  /* Opcode 5, close channel */
1559 				  channel->endpoint->registers +
1560 				  fpga_buf_ctrl_reg);
1561 		}
1562 		mutex_unlock(&channel->rd_mutex);
1563 	}
1564 
1565 	if (filp->f_mode & FMODE_READ) {
1566 		mutex_lock(&channel->wr_mutex);
1567 
1568 		channel->wr_ref_count--;
1569 
1570 		if (channel->wr_ref_count == 0) {
1571 			iowrite32(1 | (channel->chan_num << 1) |
1572 				  (5 << 24),  /* Opcode 5, close channel */
1573 				  channel->endpoint->registers +
1574 				  fpga_buf_ctrl_reg);
1575 
1576 			/*
1577 			 * This is crazily cautious: We make sure that not
1578 			 * only that we got an EOF (be it because we closed
1579 			 * the channel or because of a user's EOF), but verify
1580 			 * that it's one beyond the last buffer arrived, so
1581 			 * we have no leftover buffers pending before wrapping
1582 			 * up (which can only happen in asynchronous channels,
1583 			 * BTW)
1584 			 */
1585 
1586 			while (1) {
1587 				spin_lock_irqsave(&channel->wr_spinlock,
1588 						  flags);
1589 				buf_idx = channel->wr_fpga_buf_idx;
1590 				eof = channel->wr_eof;
1591 				channel->wr_sleepy = 1;
1592 				spin_unlock_irqrestore(&channel->wr_spinlock,
1593 						       flags);
1594 
1595 				/*
1596 				 * Check if eof points at the buffer after
1597 				 * the last one the FPGA submitted. Note that
1598 				 * no EOF is marked by negative eof.
1599 				 */
1600 
1601 				buf_idx++;
1602 				if (buf_idx == channel->num_wr_buffers)
1603 					buf_idx = 0;
1604 
1605 				if (buf_idx == eof)
1606 					break;
1607 
1608 				/*
1609 				 * Steal extra 100 ms if awaken by interrupt.
1610 				 * This is a simple workaround for an
1611 				 * interrupt pending when entering, which would
1612 				 * otherwise result in declaring the hardware
1613 				 * non-responsive.
1614 				 */
1615 
1616 				if (wait_event_interruptible(
1617 					    channel->wr_wait,
1618 					    (!channel->wr_sleepy)))
1619 					msleep(100);
1620 
1621 				if (channel->wr_sleepy) {
1622 					mutex_unlock(&channel->wr_mutex);
1623 					dev_warn(channel->endpoint->dev,
1624 						 "Hardware failed to respond to close command, therefore left in messy state.\n");
1625 					return -EINTR;
1626 				}
1627 			}
1628 		}
1629 
1630 		mutex_unlock(&channel->wr_mutex);
1631 	}
1632 
1633 	return 0;
1634 }
1635 
1636 static loff_t xillybus_llseek(struct file *filp, loff_t offset, int whence)
1637 {
1638 	struct xilly_channel *channel = filp->private_data;
1639 	loff_t pos = filp->f_pos;
1640 	int rc = 0;
1641 
1642 	/*
1643 	 * Take both mutexes not allowing interrupts, since it seems like
1644 	 * common applications don't expect an -EINTR here. Besides, multiple
1645 	 * access to a single file descriptor on seekable devices is a mess
1646 	 * anyhow.
1647 	 */
1648 
1649 	if (channel->endpoint->fatal_error)
1650 		return -EIO;
1651 
1652 	mutex_lock(&channel->wr_mutex);
1653 	mutex_lock(&channel->rd_mutex);
1654 
1655 	switch (whence) {
1656 	case SEEK_SET:
1657 		pos = offset;
1658 		break;
1659 	case SEEK_CUR:
1660 		pos += offset;
1661 		break;
1662 	case SEEK_END:
1663 		pos = offset; /* Going to the end => to the beginning */
1664 		break;
1665 	default:
1666 		rc = -EINVAL;
1667 		goto end;
1668 	}
1669 
1670 	/* In any case, we must finish on an element boundary */
1671 	if (pos & ((1 << channel->log2_element_size) - 1)) {
1672 		rc = -EINVAL;
1673 		goto end;
1674 	}
1675 
1676 	mutex_lock(&channel->endpoint->register_mutex);
1677 
1678 	iowrite32(pos >> channel->log2_element_size,
1679 		  channel->endpoint->registers + fpga_buf_offset_reg);
1680 
1681 	iowrite32((channel->chan_num << 1) |
1682 		  (6 << 24),  /* Opcode 6, set address */
1683 		  channel->endpoint->registers + fpga_buf_ctrl_reg);
1684 
1685 	mutex_unlock(&channel->endpoint->register_mutex);
1686 
1687 end:
1688 	mutex_unlock(&channel->rd_mutex);
1689 	mutex_unlock(&channel->wr_mutex);
1690 
1691 	if (rc) /* Return error after releasing mutexes */
1692 		return rc;
1693 
1694 	filp->f_pos = pos;
1695 
1696 	/*
1697 	 * Since seekable devices are allowed only when the channel is
1698 	 * synchronous, we assume that there is no data pending in either
1699 	 * direction (which holds true as long as no concurrent access on the
1700 	 * file descriptor takes place).
1701 	 * The only thing we may need to throw away is leftovers from partial
1702 	 * write() flush.
1703 	 */
1704 
1705 	channel->rd_leftovers[3] = 0;
1706 
1707 	return pos;
1708 }
1709 
1710 static __poll_t xillybus_poll(struct file *filp, poll_table *wait)
1711 {
1712 	struct xilly_channel *channel = filp->private_data;
1713 	__poll_t mask = 0;
1714 	unsigned long flags;
1715 
1716 	poll_wait(filp, &channel->endpoint->ep_wait, wait);
1717 
1718 	/*
1719 	 * poll() won't play ball regarding read() channels which
1720 	 * aren't asynchronous and support the nonempty message. Allowing
1721 	 * that will create situations where data has been delivered at
1722 	 * the FPGA, and users expecting select() to wake up, which it may
1723 	 * not.
1724 	 */
1725 
1726 	if (!channel->wr_synchronous && channel->wr_supports_nonempty) {
1727 		poll_wait(filp, &channel->wr_wait, wait);
1728 		poll_wait(filp, &channel->wr_ready_wait, wait);
1729 
1730 		spin_lock_irqsave(&channel->wr_spinlock, flags);
1731 		if (!channel->wr_empty || channel->wr_ready)
1732 			mask |= EPOLLIN | EPOLLRDNORM;
1733 
1734 		if (channel->wr_hangup)
1735 			/*
1736 			 * Not EPOLLHUP, because its behavior is in the
1737 			 * mist, and EPOLLIN does what we want: Wake up
1738 			 * the read file descriptor so it sees EOF.
1739 			 */
1740 			mask |=  EPOLLIN | EPOLLRDNORM;
1741 		spin_unlock_irqrestore(&channel->wr_spinlock, flags);
1742 	}
1743 
1744 	/*
1745 	 * If partial data write is disallowed on a write() channel,
1746 	 * it's pointless to ever signal OK to write, because is could
1747 	 * block despite some space being available.
1748 	 */
1749 
1750 	if (channel->rd_allow_partial) {
1751 		poll_wait(filp, &channel->rd_wait, wait);
1752 
1753 		spin_lock_irqsave(&channel->rd_spinlock, flags);
1754 		if (!channel->rd_full)
1755 			mask |= EPOLLOUT | EPOLLWRNORM;
1756 		spin_unlock_irqrestore(&channel->rd_spinlock, flags);
1757 	}
1758 
1759 	if (channel->endpoint->fatal_error)
1760 		mask |= EPOLLERR;
1761 
1762 	return mask;
1763 }
1764 
1765 static const struct file_operations xillybus_fops = {
1766 	.owner      = THIS_MODULE,
1767 	.read       = xillybus_read,
1768 	.write      = xillybus_write,
1769 	.open       = xillybus_open,
1770 	.flush      = xillybus_flush,
1771 	.release    = xillybus_release,
1772 	.llseek     = xillybus_llseek,
1773 	.poll       = xillybus_poll,
1774 };
1775 
1776 struct xilly_endpoint *xillybus_init_endpoint(struct pci_dev *pdev,
1777 					      struct device *dev,
1778 					      struct xilly_endpoint_hardware
1779 					      *ephw)
1780 {
1781 	struct xilly_endpoint *endpoint;
1782 
1783 	endpoint = devm_kzalloc(dev, sizeof(*endpoint), GFP_KERNEL);
1784 	if (!endpoint)
1785 		return NULL;
1786 
1787 	endpoint->pdev = pdev;
1788 	endpoint->dev = dev;
1789 	endpoint->ephw = ephw;
1790 	endpoint->msg_counter = 0x0b;
1791 	endpoint->failed_messages = 0;
1792 	endpoint->fatal_error = 0;
1793 
1794 	init_waitqueue_head(&endpoint->ep_wait);
1795 	mutex_init(&endpoint->register_mutex);
1796 
1797 	return endpoint;
1798 }
1799 EXPORT_SYMBOL(xillybus_init_endpoint);
1800 
1801 static int xilly_quiesce(struct xilly_endpoint *endpoint)
1802 {
1803 	long t;
1804 
1805 	endpoint->idtlen = -1;
1806 
1807 	iowrite32((u32) (endpoint->dma_using_dac & 0x0001),
1808 		  endpoint->registers + fpga_dma_control_reg);
1809 
1810 	t = wait_event_interruptible_timeout(endpoint->ep_wait,
1811 					     (endpoint->idtlen >= 0),
1812 					     XILLY_TIMEOUT);
1813 	if (t <= 0) {
1814 		dev_err(endpoint->dev,
1815 			"Failed to quiesce the device on exit.\n");
1816 		return -ENODEV;
1817 	}
1818 	return 0;
1819 }
1820 
1821 int xillybus_endpoint_discovery(struct xilly_endpoint *endpoint)
1822 {
1823 	int rc;
1824 	long t;
1825 
1826 	void *bootstrap_resources;
1827 	int idtbuffersize = (1 << PAGE_SHIFT);
1828 	struct device *dev = endpoint->dev;
1829 
1830 	/*
1831 	 * The bogus IDT is used during bootstrap for allocating the initial
1832 	 * message buffer, and then the message buffer and space for the IDT
1833 	 * itself. The initial message buffer is of a single page's size, but
1834 	 * it's soon replaced with a more modest one (and memory is freed).
1835 	 */
1836 
1837 	unsigned char bogus_idt[8] = { 1, 224, (PAGE_SHIFT)-2, 0,
1838 				       3, 192, PAGE_SHIFT, 0 };
1839 	struct xilly_idt_handle idt_handle;
1840 
1841 	/*
1842 	 * Writing the value 0x00000001 to Endianness register signals which
1843 	 * endianness this processor is using, so the FPGA can swap words as
1844 	 * necessary.
1845 	 */
1846 
1847 	iowrite32(1, endpoint->registers + fpga_endian_reg);
1848 
1849 	/* Bootstrap phase I: Allocate temporary message buffer */
1850 
1851 	bootstrap_resources = devres_open_group(dev, NULL, GFP_KERNEL);
1852 	if (!bootstrap_resources)
1853 		return -ENOMEM;
1854 
1855 	endpoint->num_channels = 0;
1856 
1857 	rc = xilly_setupchannels(endpoint, bogus_idt, 1);
1858 	if (rc)
1859 		return rc;
1860 
1861 	/* Clear the message subsystem (and counter in particular) */
1862 	iowrite32(0x04, endpoint->registers + fpga_msg_ctrl_reg);
1863 
1864 	endpoint->idtlen = -1;
1865 
1866 	/*
1867 	 * Set DMA 32/64 bit mode, quiesce the device (?!) and get IDT
1868 	 * buffer size.
1869 	 */
1870 	iowrite32((u32) (endpoint->dma_using_dac & 0x0001),
1871 		  endpoint->registers + fpga_dma_control_reg);
1872 
1873 	t = wait_event_interruptible_timeout(endpoint->ep_wait,
1874 					     (endpoint->idtlen >= 0),
1875 					     XILLY_TIMEOUT);
1876 	if (t <= 0) {
1877 		dev_err(endpoint->dev, "No response from FPGA. Aborting.\n");
1878 		return -ENODEV;
1879 	}
1880 
1881 	/* Enable DMA */
1882 	iowrite32((u32) (0x0002 | (endpoint->dma_using_dac & 0x0001)),
1883 		  endpoint->registers + fpga_dma_control_reg);
1884 
1885 	/* Bootstrap phase II: Allocate buffer for IDT and obtain it */
1886 	while (endpoint->idtlen >= idtbuffersize) {
1887 		idtbuffersize *= 2;
1888 		bogus_idt[6]++;
1889 	}
1890 
1891 	endpoint->num_channels = 1;
1892 
1893 	rc = xilly_setupchannels(endpoint, bogus_idt, 2);
1894 	if (rc)
1895 		goto failed_idt;
1896 
1897 	rc = xilly_obtain_idt(endpoint);
1898 	if (rc)
1899 		goto failed_idt;
1900 
1901 	rc = xilly_scan_idt(endpoint, &idt_handle);
1902 	if (rc)
1903 		goto failed_idt;
1904 
1905 	devres_close_group(dev, bootstrap_resources);
1906 
1907 	/* Bootstrap phase III: Allocate buffers according to IDT */
1908 
1909 	rc = xilly_setupchannels(endpoint,
1910 				 idt_handle.chandesc,
1911 				 idt_handle.entries);
1912 	if (rc)
1913 		goto failed_idt;
1914 
1915 	rc = xillybus_init_chrdev(dev, &xillybus_fops,
1916 				  endpoint->ephw->owner, endpoint,
1917 				  idt_handle.names,
1918 				  idt_handle.names_len,
1919 				  endpoint->num_channels,
1920 				  xillyname, false);
1921 
1922 	if (rc)
1923 		goto failed_idt;
1924 
1925 	devres_release_group(dev, bootstrap_resources);
1926 
1927 	return 0;
1928 
1929 failed_idt:
1930 	xilly_quiesce(endpoint);
1931 	flush_workqueue(xillybus_wq);
1932 
1933 	return rc;
1934 }
1935 EXPORT_SYMBOL(xillybus_endpoint_discovery);
1936 
1937 void xillybus_endpoint_remove(struct xilly_endpoint *endpoint)
1938 {
1939 	xillybus_cleanup_chrdev(endpoint, endpoint->dev);
1940 
1941 	xilly_quiesce(endpoint);
1942 
1943 	/*
1944 	 * Flushing is done upon endpoint release to prevent access to memory
1945 	 * just about to be released. This makes the quiesce complete.
1946 	 */
1947 	flush_workqueue(xillybus_wq);
1948 }
1949 EXPORT_SYMBOL(xillybus_endpoint_remove);
1950 
1951 static int __init xillybus_init(void)
1952 {
1953 	xillybus_wq = alloc_workqueue(xillyname, 0, 0);
1954 	if (!xillybus_wq)
1955 		return -ENOMEM;
1956 
1957 	return 0;
1958 }
1959 
1960 static void __exit xillybus_exit(void)
1961 {
1962 	/* flush_workqueue() was called for each endpoint released */
1963 	destroy_workqueue(xillybus_wq);
1964 }
1965 
1966 module_init(xillybus_init);
1967 module_exit(xillybus_exit);
1968