xref: /freebsd/sys/dev/qcom_qup/qcom_spi_hw.c (revision d0b2dbfa0ecf2bbc9709efc5e20baf8e4b44bbbf)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2021, Adrian Chadd <adrian@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice unmodified, this list of conditions, and the following
11  *    disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 
33 #include <sys/bus.h>
34 #include <sys/interrupt.h>
35 #include <sys/malloc.h>
36 #include <sys/lock.h>
37 #include <sys/mutex.h>
38 #include <sys/kernel.h>
39 #include <sys/module.h>
40 #include <sys/rman.h>
41 
42 #include <vm/vm.h>
43 #include <vm/pmap.h>
44 #include <vm/vm_extern.h>
45 
46 #include <machine/bus.h>
47 #include <machine/cpu.h>
48 
49 #include <dev/gpio/gpiobusvar.h>
50 #include <dev/ofw/ofw_bus.h>
51 #include <dev/ofw/ofw_bus_subr.h>
52 
53 #include <dev/extres/clk/clk.h>
54 #include <dev/extres/hwreset/hwreset.h>
55 
56 #include <dev/spibus/spi.h>
57 #include <dev/spibus/spibusvar.h>
58 #include "spibus_if.h"
59 
60 #include <dev/qcom_qup/qcom_spi_var.h>
61 #include <dev/qcom_qup/qcom_spi_reg.h>
62 #include <dev/qcom_qup/qcom_qup_reg.h>
63 #include <dev/qcom_qup/qcom_spi_debug.h>
64 
65 int
66 qcom_spi_hw_read_controller_transfer_sizes(struct qcom_spi_softc *sc)
67 {
68 	uint32_t reg, val;
69 
70 	reg = QCOM_SPI_READ_4(sc, QUP_IO_M_MODES);
71 
72 	QCOM_SPI_DPRINTF(sc, QCOM_SPI_DEBUG_HW_TRANSFER_SETUP,
73 	    "%s: QUP_IO_M_MODES=0x%08x\n", __func__, reg);
74 
75 	/* Input block size */
76 	val = (reg >> QUP_IO_M_INPUT_BLOCK_SIZE_SHIFT)
77 	    & QUP_IO_M_INPUT_BLOCK_SIZE_MASK;
78 	if (val == 0)
79 		sc->config.input_block_size = 4;
80 	else
81 		sc->config.input_block_size = val * 16;
82 
83 	/* Output block size */
84 	val = (reg >> QUP_IO_M_OUTPUT_BLOCK_SIZE_SHIFT)
85 	    & QUP_IO_M_OUTPUT_BLOCK_SIZE_MASK;
86 	if (val == 0)
87 		sc->config.output_block_size = 4;
88 	else
89 		sc->config.output_block_size = val * 16;
90 
91 	/* Input FIFO size */
92 	val = (reg >> QUP_IO_M_INPUT_FIFO_SIZE_SHIFT)
93 	    & QUP_IO_M_INPUT_FIFO_SIZE_MASK;
94 	sc->config.input_fifo_size =
95 	    sc->config.input_block_size * (2 << val);
96 
97 	/* Output FIFO size */
98 	val = (reg >> QUP_IO_M_OUTPUT_FIFO_SIZE_SHIFT)
99 	    & QUP_IO_M_OUTPUT_FIFO_SIZE_MASK;
100 	sc->config.output_fifo_size =
101 	    sc->config.output_block_size * (2 << val);
102 
103 	return (0);
104 }
105 
106 static bool
107 qcom_spi_hw_qup_is_state_valid_locked(struct qcom_spi_softc *sc)
108 {
109 	uint32_t reg;
110 
111 	QCOM_SPI_ASSERT_LOCKED(sc);
112 
113 	reg = QCOM_SPI_READ_4(sc, QUP_STATE);
114 	QCOM_SPI_BARRIER_READ(sc);
115 
116 	return !! (reg & QUP_STATE_VALID);
117 }
118 
119 static int
120 qcom_spi_hw_qup_wait_state_valid_locked(struct qcom_spi_softc *sc)
121 {
122 	int i;
123 
124 	for (i = 0; i < 10; i++) {
125 		if (qcom_spi_hw_qup_is_state_valid_locked(sc))
126 			break;
127 	}
128 	if (i >= 10) {
129 		device_printf(sc->sc_dev,
130 		    "ERROR: timeout waiting for valid state\n");
131 		return (ENXIO);
132 	}
133 	return (0);
134 }
135 
136 static bool
137 qcom_spi_hw_is_opmode_dma_locked(struct qcom_spi_softc *sc)
138 {
139 
140 	QCOM_SPI_ASSERT_LOCKED(sc);
141 
142 	if (sc->state.transfer_mode == QUP_IO_M_MODE_DMOV)
143 		return (true);
144 	if (sc->state.transfer_mode == QUP_IO_M_MODE_BAM)
145 		return (true);
146 	return (false);
147 }
148 
149 int
150 qcom_spi_hw_qup_set_state_locked(struct qcom_spi_softc *sc, uint32_t state)
151 {
152 	uint32_t cur_state;
153 	int ret;
154 
155 	QCOM_SPI_ASSERT_LOCKED(sc);
156 
157 	/* Wait until the state becomes valid */
158 	ret = qcom_spi_hw_qup_wait_state_valid_locked(sc);
159 	if (ret != 0) {
160 		return (ret);
161 	}
162 
163 	cur_state = QCOM_SPI_READ_4(sc, QUP_STATE);
164 
165 	QCOM_SPI_DPRINTF(sc, QCOM_SPI_DEBUG_HW_STATE_CHANGE,
166 	    "%s: target state=%d, cur_state=0x%08x\n",
167 	    __func__, state, cur_state);
168 
169 	/*
170 	 * According to the QUP specification, when going
171 	 * from PAUSE to RESET, two writes are required.
172 	 */
173 	if ((state == QUP_STATE_RESET)
174 	    && ((cur_state & QUP_STATE_MASK) == QUP_STATE_PAUSE)) {
175 		QCOM_SPI_WRITE_4(sc, QUP_STATE, QUP_STATE_CLEAR);
176 		QCOM_SPI_BARRIER_WRITE(sc);
177 		QCOM_SPI_WRITE_4(sc, QUP_STATE, QUP_STATE_CLEAR);
178 		QCOM_SPI_BARRIER_WRITE(sc);
179 	} else {
180 		cur_state &= ~QUP_STATE_MASK;
181 		cur_state |= state;
182 		QCOM_SPI_WRITE_4(sc, QUP_STATE, cur_state);
183 		QCOM_SPI_BARRIER_WRITE(sc);
184 	}
185 
186 	/* Wait until the state becomes valid */
187 	ret = qcom_spi_hw_qup_wait_state_valid_locked(sc);
188 	if (ret != 0) {
189 		return (ret);
190 	}
191 
192 	cur_state = QCOM_SPI_READ_4(sc, QUP_STATE);
193 
194 	QCOM_SPI_DPRINTF(sc, QCOM_SPI_DEBUG_HW_STATE_CHANGE,
195 	    "%s: FINISH: target state=%d, cur_state=0x%08x\n",
196 	    __func__, state, cur_state);
197 
198 	return (0);
199 }
200 
201 /*
202  * Do initial QUP setup.
203  *
204  * This is initially for the SPI driver; it would be interesting to see how
205  * much of this is the same with the I2C/HSUART paths.
206  */
207 int
208 qcom_spi_hw_qup_init_locked(struct qcom_spi_softc *sc)
209 {
210 	int ret;
211 
212 	QCOM_SPI_ASSERT_LOCKED(sc);
213 
214 	/* Full hardware reset */
215 	(void) qcom_spi_hw_do_full_reset(sc);
216 
217 	ret = qcom_spi_hw_qup_set_state_locked(sc, QUP_STATE_RESET);
218 	if (ret != 0) {
219 		device_printf(sc->sc_dev, "ERROR: %s: couldn't reset\n",
220 		    __func__);
221 		goto error;
222 	}
223 
224 	QCOM_SPI_WRITE_4(sc, QUP_OPERATIONAL, 0);
225 	QCOM_SPI_WRITE_4(sc, QUP_IO_M_MODES, 0);
226 	/* Note: no QUP_OPERATIONAL_MASK in QUP v1 */
227 	if (! QCOM_SPI_QUP_VERSION_V1(sc))
228 		QCOM_SPI_WRITE_4(sc, QUP_OPERATIONAL_MASK, 0);
229 
230 	/* Explicitly disable input overrun in QUP v1 */
231 	if (QCOM_SPI_QUP_VERSION_V1(sc))
232 		QCOM_SPI_WRITE_4(sc, QUP_ERROR_FLAGS_EN,
233 		    QUP_ERROR_OUTPUT_OVER_RUN
234 		    | QUP_ERROR_INPUT_UNDER_RUN
235 		    | QUP_ERROR_OUTPUT_UNDER_RUN);
236 	QCOM_SPI_BARRIER_WRITE(sc);
237 
238 	return (0);
239 error:
240 	return (ret);
241 }
242 
243 /*
244  * Do initial SPI setup.
245  */
246 int
247 qcom_spi_hw_spi_init_locked(struct qcom_spi_softc *sc)
248 {
249 
250 	QCOM_SPI_ASSERT_LOCKED(sc);
251 
252 	/* Initial SPI error flags */
253 	QCOM_SPI_WRITE_4(sc, SPI_ERROR_FLAGS_EN,
254 	    QUP_ERROR_INPUT_UNDER_RUN
255 	    | QUP_ERROR_OUTPUT_UNDER_RUN);
256 	QCOM_SPI_BARRIER_WRITE(sc);
257 
258 	/* Initial SPI config */
259 	QCOM_SPI_WRITE_4(sc, SPI_CONFIG, 0);
260 	QCOM_SPI_BARRIER_WRITE(sc);
261 
262 	/* Initial CS/tri-state io control config */
263 	QCOM_SPI_WRITE_4(sc, SPI_IO_CONTROL,
264 	    SPI_IO_C_NO_TRI_STATE
265 	    | SPI_IO_C_CS_SELECT(sc->config.cs_select));
266 	QCOM_SPI_BARRIER_WRITE(sc);
267 
268 	return (0);
269 }
270 
271 /*
272  * Force the currently selected device CS line to be active
273  * or inactive.
274  *
275  * This forces it to be active or inactive rather than letting
276  * the SPI transfer machine do its thing.  If you want to be able
277  * break up a big transaction into a handful of smaller ones,
278  * without toggling /CS_n for that device, then you need it forced.
279  * (If you toggle the /CS_n to the device to inactive then active,
280  * NOR/NAND devices tend to stop a block transfer.)
281  */
282 int
283 qcom_spi_hw_spi_cs_force(struct qcom_spi_softc *sc, int cs, bool enable)
284 {
285 	uint32_t reg;
286 
287 	QCOM_SPI_ASSERT_LOCKED(sc);
288 
289 	QCOM_SPI_DPRINTF(sc, QCOM_SPI_DEBUG_HW_CHIPSELECT,
290 	    "%s: called, enable=%u\n",
291 	    __func__, enable);
292 
293 	reg = QCOM_SPI_READ_4(sc, SPI_IO_CONTROL);
294 	if (enable)
295 		reg |= SPI_IO_C_FORCE_CS;
296 	else
297 		reg &= ~SPI_IO_C_FORCE_CS;
298 	reg &= ~SPI_IO_C_CS_SELECT_MASK;
299 	reg |= SPI_IO_C_CS_SELECT(cs);
300 	QCOM_SPI_WRITE_4(sc, SPI_IO_CONTROL, reg);
301 	QCOM_SPI_BARRIER_WRITE(sc);
302 
303 	return (0);
304 }
305 
306 /*
307  * ACK/store current interrupt flag state.
308  */
309 int
310 qcom_spi_hw_interrupt_handle(struct qcom_spi_softc *sc)
311 {
312 	uint32_t qup_error, spi_error, op_flags;
313 
314 	QCOM_SPI_ASSERT_LOCKED(sc);
315 
316 	/* Get QUP/SPI state */
317 	qup_error = QCOM_SPI_READ_4(sc, QUP_ERROR_FLAGS);
318 	spi_error = QCOM_SPI_READ_4(sc, SPI_ERROR_FLAGS);
319 	op_flags = QCOM_SPI_READ_4(sc, QUP_OPERATIONAL);
320 
321 	/* ACK state */
322 	QCOM_SPI_WRITE_4(sc, QUP_ERROR_FLAGS, qup_error);
323 	QCOM_SPI_WRITE_4(sc, SPI_ERROR_FLAGS, spi_error);
324 
325 	QCOM_SPI_DPRINTF(sc, QCOM_SPI_DEBUG_HW_INTR,
326 	    "%s: called; qup=0x%08x, spi=0x%08x, op=0x%08x\n",
327 	    __func__,
328 	    qup_error,
329 	    spi_error,
330 	    op_flags);
331 
332 	/* handle error flags */
333 	if (qup_error != 0) {
334 		device_printf(sc->sc_dev, "ERROR: (QUP) mask=0x%08x\n",
335 		    qup_error);
336 		sc->intr.error = true;
337 	}
338 	if (spi_error != 0) {
339 		device_printf(sc->sc_dev, "ERROR: (SPI) mask=0x%08x\n",
340 		    spi_error);
341 		sc->intr.error = true;
342 	}
343 
344 	/* handle operational state */
345 	if (qcom_spi_hw_is_opmode_dma_locked(sc)) {
346 		/* ACK interrupts now */
347 		QCOM_SPI_WRITE_4(sc, QUP_OPERATIONAL, op_flags);
348 		if ((op_flags & QUP_OP_IN_SERVICE_FLAG)
349 		    && (op_flags & QUP_OP_MAX_INPUT_DONE_FLAG))
350 			sc->intr.rx_dma_done = true;
351 		if ((op_flags & QUP_OP_OUT_SERVICE_FLAG)
352 		    && (op_flags & QUP_OP_MAX_OUTPUT_DONE_FLAG))
353 			sc->intr.tx_dma_done = true;
354 	} else {
355 		/* FIFO/Block */
356 		if (op_flags & QUP_OP_IN_SERVICE_FLAG)
357 			sc->intr.do_rx = true;
358 		if (op_flags & QUP_OP_OUT_SERVICE_FLAG)
359 			sc->intr.do_tx = true;
360 	}
361 
362 	/* Check if we've finished transfers */
363 	if (op_flags & QUP_OP_MAX_INPUT_DONE_FLAG)
364 		sc->intr.done = true;
365 	if (sc->intr.error)
366 		sc->intr.done = true;
367 
368 	return (0);
369 }
370 
371 /*
372  * Make initial transfer selections based on the transfer sizes
373  * and alignment.
374  *
375  * For now this'll just default to FIFO until that works, and then
376  * will grow to include BLOCK / DMA as appropriate.
377  */
378 int
379 qcom_spi_hw_setup_transfer_selection(struct qcom_spi_softc *sc, uint32_t len)
380 {
381 
382 	QCOM_SPI_ASSERT_LOCKED(sc);
383 
384 	/*
385 	 * For now only support doing a single FIFO transfer.
386 	 * The main PIO transfer routine loop will break it up for us.
387 	 */
388 	sc->state.transfer_mode = QUP_IO_M_MODE_FIFO;
389 	sc->transfer.tx_offset = 0;
390 	sc->transfer.rx_offset = 0;
391 	sc->transfer.tx_len = 0;
392 	sc->transfer.rx_len = 0;
393 	sc->transfer.tx_buf = NULL;
394 	sc->transfer.rx_buf = NULL;
395 
396 	/*
397 	 * If we're sending a DWORD multiple sized block (like IO buffers)
398 	 * then we can totally just use the DWORD size transfers.
399 	 *
400 	 * This is really only valid for PIO/block modes; I'm not yet
401 	 * sure what we should do for DMA modes.
402 	 */
403 	if (len > 0 && len % 4 == 0)
404 		sc->state.transfer_word_size = 4;
405 	else
406 		sc->state.transfer_word_size = 1;
407 
408 	return (0);
409 }
410 
411 /*
412  * Blank the transfer state after a full transfer is completed.
413  */
414 int
415 qcom_spi_hw_complete_transfer(struct qcom_spi_softc *sc)
416 {
417 	QCOM_SPI_ASSERT_LOCKED(sc);
418 
419 	sc->state.transfer_mode = QUP_IO_M_MODE_FIFO;
420 	sc->transfer.tx_offset = 0;
421 	sc->transfer.rx_offset = 0;
422 	sc->transfer.tx_len = 0;
423 	sc->transfer.rx_len = 0;
424 	sc->transfer.tx_buf = NULL;
425 	sc->transfer.rx_buf = NULL;
426 	sc->state.transfer_word_size = 0;
427 	return (0);
428 }
429 
430 /*
431  * Configure up the transfer selection for the current transfer.
432  *
433  * This calculates how many words we can transfer in the current
434  * transfer and what's left to transfer.
435  */
436 int
437 qcom_spi_hw_setup_current_transfer(struct qcom_spi_softc *sc)
438 {
439 	uint32_t bytes_left;
440 
441 	QCOM_SPI_ASSERT_LOCKED(sc);
442 
443 	/*
444 	 * XXX For now, base this on the TX side buffer size, not both.
445 	 * Later on we'll want to configure it based on the MAX of
446 	 * either and just eat up the dummy values in the PIO
447 	 * routines.  (For DMA it's .. more annoyingly complicated
448 	 * if the transfer sizes are not symmetrical.)
449 	 */
450 	bytes_left = sc->transfer.tx_len - sc->transfer.tx_offset;
451 
452 	if (sc->state.transfer_mode == QUP_IO_M_MODE_FIFO) {
453 		/*
454 		 * For FIFO transfers the num_words limit depends upon
455 		 * the word size, FIFO size and how many bytes are left.
456 		 * It definitely will be under SPI_MAX_XFER so don't
457 		 * worry about that here.
458 		 */
459 		sc->transfer.num_words = bytes_left / sc->state.transfer_word_size;
460 		sc->transfer.num_words = MIN(sc->transfer.num_words,
461 		    sc->config.input_fifo_size / sizeof(uint32_t));
462 	} else if (sc->state.transfer_mode == QUP_IO_M_MODE_BLOCK) {
463 		/*
464 		 * For BLOCK transfers the logic will be a little different.
465 		 * Instead of it being based on the maximum input_fifo_size,
466 		 * it'll be broken down into the 'words per block" size but
467 		 * our maximum transfer size will ACTUALLY be capped by
468 		 * SPI_MAX_XFER (65536-64 bytes.)  Each transfer
469 		 * will end up being in multiples of a block until the
470 		 * last transfer.
471 		 */
472 		sc->transfer.num_words = bytes_left / sc->state.transfer_word_size;
473 		sc->transfer.num_words = MIN(sc->transfer.num_words,
474 		    SPI_MAX_XFER);
475 	}
476 
477 
478 	QCOM_SPI_DPRINTF(sc, QCOM_SPI_DEBUG_HW_TRANSFER_SETUP,
479 	"%s: transfer.tx_len=%u,"
480 	    "transfer.tx_offset=%u,"
481 	    " transfer_word_size=%u,"
482 	    " bytes_left=%u, num_words=%u, fifo_word_max=%u\n",
483 	    __func__,
484 	    sc->transfer.tx_len,
485 	    sc->transfer.tx_offset,
486 	    sc->state.transfer_word_size,
487 	    bytes_left,
488 	    sc->transfer.num_words,
489 	    sc->config.input_fifo_size / sizeof(uint32_t));
490 
491 	return (0);
492 }
493 
494 /*
495  * Setup the PIO FIFO transfer count.
496  *
497  * Note that we get a /single/ TX/RX phase up to these num_words
498  * transfers.
499  */
500 int
501 qcom_spi_hw_setup_pio_transfer_cnt(struct qcom_spi_softc *sc)
502 {
503 
504 	QCOM_SPI_ASSERT_LOCKED(sc);
505 
506 	QCOM_SPI_WRITE_4(sc, QUP_MX_READ_CNT, sc->transfer.num_words);
507 	QCOM_SPI_WRITE_4(sc, QUP_MX_WRITE_CNT, sc->transfer.num_words);
508 	QCOM_SPI_WRITE_4(sc, QUP_MX_INPUT_CNT, 0);
509 	QCOM_SPI_WRITE_4(sc, QUP_MX_OUTPUT_CNT, 0);
510 
511 	QCOM_SPI_DPRINTF(sc, QCOM_SPI_DEBUG_HW_TRANSFER_SETUP,
512 	    "%s: num_words=%u\n", __func__,
513 	    sc->transfer.num_words);
514 
515 	QCOM_SPI_BARRIER_WRITE(sc);
516 
517 	return (0);
518 }
519 
520 /*
521  * Setup the PIO BLOCK transfer count.
522  *
523  * This sets up the total transfer size, in TX/RX FIFO block size
524  * chunks.  We will get multiple notifications when a block sized
525  * chunk of data is avaliable or required.
526  */
527 int
528 qcom_spi_hw_setup_block_transfer_cnt(struct qcom_spi_softc *sc)
529 {
530 
531 	QCOM_SPI_ASSERT_LOCKED(sc);
532 
533 	QCOM_SPI_WRITE_4(sc, QUP_MX_READ_CNT, 0);
534 	QCOM_SPI_WRITE_4(sc, QUP_MX_WRITE_CNT, 0);
535 	QCOM_SPI_WRITE_4(sc, QUP_MX_INPUT_CNT, sc->transfer.num_words);
536 	QCOM_SPI_WRITE_4(sc, QUP_MX_OUTPUT_CNT, sc->transfer.num_words);
537 	QCOM_SPI_BARRIER_WRITE(sc);
538 
539 	return (0);
540 }
541 
542 int
543 qcom_spi_hw_setup_io_modes(struct qcom_spi_softc *sc)
544 {
545 	uint32_t reg;
546 
547 	QCOM_SPI_ASSERT_LOCKED(sc);
548 
549 	reg = QCOM_SPI_READ_4(sc, QUP_IO_M_MODES);
550 
551 	reg &= ~((QUP_IO_M_INPUT_MODE_MASK << QUP_IO_M_INPUT_MODE_SHIFT)
552 	    | (QUP_IO_M_OUTPUT_MODE_MASK << QUP_IO_M_OUTPUT_MODE_SHIFT));
553 
554 	/*
555 	 * If it's being done using DMA then the hardware will
556 	 * need to pack and unpack the byte stream into the word/dword
557 	 * stream being expected by the SPI/QUP micro engine.
558 	 *
559 	 * For PIO modes we're doing the pack/unpack in software,
560 	 * see the pio/block transfer routines.
561 	 */
562 	if (qcom_spi_hw_is_opmode_dma_locked(sc))
563 		reg |= (QUP_IO_M_PACK_EN | QUP_IO_M_UNPACK_EN);
564 	else
565 		reg &= ~(QUP_IO_M_PACK_EN | QUP_IO_M_UNPACK_EN);
566 
567 	/* Transfer mode */
568 	reg |= ((sc->state.transfer_mode & QUP_IO_M_INPUT_MODE_MASK)
569 	    << QUP_IO_M_INPUT_MODE_SHIFT);
570 	reg |= ((sc->state.transfer_mode & QUP_IO_M_OUTPUT_MODE_MASK)
571 	    << QUP_IO_M_OUTPUT_MODE_SHIFT);
572 
573 	QCOM_SPI_DPRINTF(sc, QCOM_SPI_DEBUG_HW_TRANSFER_SETUP,
574 	    "%s: QUP_IO_M_MODES=0x%08x\n", __func__, reg);
575 
576 	QCOM_SPI_WRITE_4(sc, QUP_IO_M_MODES, reg);
577 	QCOM_SPI_BARRIER_WRITE(sc);
578 
579 	return (0);
580 }
581 
582 int
583 qcom_spi_hw_setup_spi_io_clock_polarity(struct qcom_spi_softc *sc,
584     bool cpol)
585 {
586 	uint32_t reg;
587 
588 	QCOM_SPI_ASSERT_LOCKED(sc);
589 
590 	reg = QCOM_SPI_READ_4(sc, SPI_IO_CONTROL);
591 
592 	if (cpol)
593 		reg |= SPI_IO_C_CLK_IDLE_HIGH;
594 	else
595 		reg &= ~SPI_IO_C_CLK_IDLE_HIGH;
596 
597 	QCOM_SPI_DPRINTF(sc, QCOM_SPI_DEBUG_HW_TRANSFER_SETUP,
598 	    "%s: SPI_IO_CONTROL=0x%08x\n", __func__, reg);
599 
600 	QCOM_SPI_WRITE_4(sc, SPI_IO_CONTROL, reg);
601 	QCOM_SPI_BARRIER_WRITE(sc);
602 
603 	return (0);
604 }
605 
606 int
607 qcom_spi_hw_setup_spi_config(struct qcom_spi_softc *sc, uint32_t clock_val,
608     bool cpha)
609 {
610 	uint32_t reg;
611 
612 	/*
613 	 * For now we don't have a way to configure loopback SPI for testing,
614 	 * or the clock/transfer phase.  When we do then here's where we
615 	 * would put that.
616 	 */
617 
618 	QCOM_SPI_ASSERT_LOCKED(sc);
619 
620 	reg = QCOM_SPI_READ_4(sc, SPI_CONFIG);
621 	reg &= ~SPI_CONFIG_LOOPBACK;
622 
623 	if (cpha)
624 		reg &= ~SPI_CONFIG_INPUT_FIRST;
625 	else
626 		reg |= SPI_CONFIG_INPUT_FIRST;
627 
628 	/*
629 	 * If the frequency is above SPI_HS_MIN_RATE then enable high speed.
630 	 * This apparently improves stability.
631 	 *
632 	 * Note - don't do this if SPI loopback is enabled!
633 	 */
634 	if (clock_val >= SPI_HS_MIN_RATE)
635 		reg |= SPI_CONFIG_HS_MODE;
636 	else
637 		reg &= ~SPI_CONFIG_HS_MODE;
638 
639 	QCOM_SPI_DPRINTF(sc, QCOM_SPI_DEBUG_HW_TRANSFER_SETUP,
640 	    "%s: SPI_CONFIG=0x%08x\n", __func__, reg);
641 
642 	QCOM_SPI_WRITE_4(sc, SPI_CONFIG, reg);
643 	QCOM_SPI_BARRIER_WRITE(sc);
644 
645 	return (0);
646 }
647 
648 int
649 qcom_spi_hw_setup_qup_config(struct qcom_spi_softc *sc, bool is_tx, bool is_rx)
650 {
651 	uint32_t reg;
652 
653 	QCOM_SPI_ASSERT_LOCKED(sc);
654 
655 	reg = QCOM_SPI_READ_4(sc, QUP_CONFIG);
656 	reg &= ~(QUP_CONFIG_NO_INPUT | QUP_CONFIG_NO_OUTPUT | QUP_CONFIG_N);
657 
658 	/* SPI mode */
659 	reg |= QUP_CONFIG_SPI_MODE;
660 
661 	/* bitmask for number of bits per word being used in each FIFO slot */
662 	reg |= ((sc->state.transfer_word_size * 8) - 1) & QUP_CONFIG_N;
663 
664 	/*
665 	 * When doing DMA we need to configure whether we are shifting
666 	 * data in, out, and/or both.  For PIO/block modes it must stay
667 	 * unset.
668 	 */
669 	if (qcom_spi_hw_is_opmode_dma_locked(sc)) {
670 		if (is_rx == false)
671 			reg |= QUP_CONFIG_NO_INPUT;
672 		if (is_tx == false)
673 			reg |= QUP_CONFIG_NO_OUTPUT;
674 	}
675 
676 	QCOM_SPI_DPRINTF(sc, QCOM_SPI_DEBUG_HW_TRANSFER_SETUP,
677 	    "%s: QUP_CONFIG=0x%08x\n", __func__, reg);
678 
679 	QCOM_SPI_WRITE_4(sc, QUP_CONFIG, reg);
680 	QCOM_SPI_BARRIER_WRITE(sc);
681 
682 	return (0);
683 }
684 
685 int
686 qcom_spi_hw_setup_operational_mask(struct qcom_spi_softc *sc)
687 {
688 
689 	QCOM_SPI_ASSERT_LOCKED(sc);
690 
691 	if (QCOM_SPI_QUP_VERSION_V1(sc)) {
692 		QCOM_SPI_DPRINTF(sc, QCOM_SPI_DEBUG_HW_TRANSFER_SETUP,
693 		    "%s: skipping, qupv1\n", __func__);
694 		return (0);
695 	}
696 
697 	if (qcom_spi_hw_is_opmode_dma_locked(sc))
698 		QCOM_SPI_WRITE_4(sc, QUP_OPERATIONAL_MASK,
699 		    QUP_OP_IN_SERVICE_FLAG | QUP_OP_OUT_SERVICE_FLAG);
700 	else
701 		QCOM_SPI_WRITE_4(sc, QUP_OPERATIONAL_MASK, 0);
702 
703 	QCOM_SPI_BARRIER_WRITE(sc);
704 
705 	return (0);
706 }
707 
708 /*
709  * ACK that we already have serviced the output FIFO.
710  */
711 int
712 qcom_spi_hw_ack_write_pio_fifo(struct qcom_spi_softc *sc)
713 {
714 
715 	QCOM_SPI_ASSERT_LOCKED(sc);
716 	QCOM_SPI_WRITE_4(sc, QUP_OPERATIONAL, QUP_OP_OUT_SERVICE_FLAG);
717 	QCOM_SPI_BARRIER_WRITE(sc);
718 	return (0);
719 }
720 
721 int
722 qcom_spi_hw_ack_opmode(struct qcom_spi_softc *sc)
723 {
724 
725 	QCOM_SPI_ASSERT_LOCKED(sc);
726 
727 	QCOM_SPI_BARRIER_READ(sc);
728 	QCOM_SPI_READ_4(sc, QUP_OPERATIONAL);
729 	QCOM_SPI_WRITE_4(sc, QUP_OPERATIONAL, QUP_OP_OUT_SERVICE_FLAG);
730 	QCOM_SPI_BARRIER_WRITE(sc);
731 
732 	return (0);
733 
734 }
735 
736 /*
737  * Read the value from the TX buffer into the given 32 bit DWORD,
738  * pre-shifting it into the place requested.
739  *
740  * Returns true if there was a byte available, false otherwise.
741  */
742 static bool
743 qcom_spi_hw_write_from_tx_buf(struct qcom_spi_softc *sc, int shift,
744     uint32_t *val)
745 {
746 
747 	QCOM_SPI_ASSERT_LOCKED(sc);
748 
749 	if (sc->transfer.tx_buf == NULL)
750 		return false;
751 
752 	if (sc->transfer.tx_offset < sc->transfer.tx_len) {
753 		*val |= (sc->transfer.tx_buf[sc->transfer.tx_offset] & 0xff)
754 		    << shift;
755 		sc->transfer.tx_offset++;
756 		return true;
757 	}
758 
759 	return false;
760 }
761 
762 int
763 qcom_spi_hw_write_pio_fifo(struct qcom_spi_softc *sc)
764 {
765 	uint32_t i;
766 	int num_bytes = 0;
767 
768 	QCOM_SPI_ASSERT_LOCKED(sc);
769 
770 	QCOM_SPI_WRITE_4(sc, QUP_OPERATIONAL, QUP_OP_OUT_SERVICE_FLAG);
771 	QCOM_SPI_BARRIER_WRITE(sc);
772 
773 	/*
774 	 * Loop over the transfer num_words, do complain if we are full.
775 	 */
776 	for (i = 0; i < sc->transfer.num_words; i++) {
777 		uint32_t reg;
778 
779 		/* Break if FIFO is full */
780 		if ((QCOM_SPI_READ_4(sc, QUP_OPERATIONAL)
781 		    & QUP_OP_OUT_FIFO_FULL) != 0) {
782 			device_printf(sc->sc_dev, "%s: FIFO full\n", __func__);
783 			break;
784 		}
785 
786 		/*
787 		 * Handle 1, 2, 4 byte transfer packing rules.
788 		 *
789 		 * Unlike read, where the shifting is done towards the MSB
790 		 * for us by default, we have to do it ourselves for transmit.
791 		 * There's a bit that one can set to do the preshifting
792 		 * (and u-boot uses it!) but I'll stick with what Linux is
793 		 * doing to make it easier for future maintenance.
794 		 *
795 		 * The format is the same as 4 byte RX - 0xaabbccdd;
796 		 * the byte ordering on the wire being aa, bb, cc, dd.
797 		 */
798 		reg = 0;
799 		if (sc->state.transfer_word_size == 1) {
800 			if (qcom_spi_hw_write_from_tx_buf(sc, 24, &reg))
801 				num_bytes++;
802 		} else if (sc->state.transfer_word_size == 2) {
803 			if (qcom_spi_hw_write_from_tx_buf(sc, 24, &reg))
804 				num_bytes++;
805 			if (qcom_spi_hw_write_from_tx_buf(sc, 16, &reg))
806 				num_bytes++;
807 		} else if (sc->state.transfer_word_size == 4) {
808 			if (qcom_spi_hw_write_from_tx_buf(sc, 24, &reg))
809 				num_bytes++;
810 			if (qcom_spi_hw_write_from_tx_buf(sc, 16, &reg))
811 				num_bytes++;
812 			if (qcom_spi_hw_write_from_tx_buf(sc, 8, &reg))
813 				num_bytes++;
814 			if (qcom_spi_hw_write_from_tx_buf(sc, 0, &reg))
815 				num_bytes++;
816 		}
817 
818 		/*
819 		 * always shift out something in case we need phantom
820 		 * writes to finish things up whilst we read a reply
821 		 * payload.
822 		 */
823 		QCOM_SPI_WRITE_4(sc, QUP_OUTPUT_FIFO, reg);
824 		QCOM_SPI_BARRIER_WRITE(sc);
825 	}
826 
827 	QCOM_SPI_DPRINTF(sc, QCOM_SPI_DEBUG_HW_TX_FIFO,
828 	    "%s: wrote %d bytes (%d fifo slots)\n",
829 	    __func__, num_bytes, sc->transfer.num_words);
830 
831 	return (0);
832 }
833 
834 int
835 qcom_spi_hw_write_pio_block(struct qcom_spi_softc *sc)
836 {
837 	/* Not yet implemented */
838 	return (ENXIO);
839 }
840 
841 /*
842  * Read data into the RX buffer and increment the RX offset.
843  *
844  * Return true if the byte was saved into the RX buffer, else
845  * return false.
846  */
847 static bool
848 qcom_spi_hw_read_into_rx_buf(struct qcom_spi_softc *sc, uint8_t val)
849 {
850 	QCOM_SPI_ASSERT_LOCKED(sc);
851 
852 	if (sc->transfer.rx_buf == NULL)
853 		return false;
854 
855 	/* Make sure we aren't overflowing the receive buffer */
856 	if (sc->transfer.rx_offset < sc->transfer.rx_len) {
857 		sc->transfer.rx_buf[sc->transfer.rx_offset] = val;
858 		sc->transfer.rx_offset++;
859 		return true;
860 	}
861 	return false;
862 }
863 
864 /*
865  * Read "n_words" transfers, and push those bytes into the receive buffer.
866  * Make sure we have enough space, and make sure we don't overflow the
867  * read buffer size too!
868  */
869 int
870 qcom_spi_hw_read_pio_fifo(struct qcom_spi_softc *sc)
871 {
872 	uint32_t i;
873 	uint32_t reg;
874 	int num_bytes = 0;
875 
876 	QCOM_SPI_ASSERT_LOCKED(sc);
877 
878 	QCOM_SPI_WRITE_4(sc, QUP_OPERATIONAL, QUP_OP_IN_SERVICE_FLAG);
879 	QCOM_SPI_BARRIER_WRITE(sc);
880 
881 	for (i = 0; i < sc->transfer.num_words; i++) {
882 		/* Break if FIFO is empty */
883 		QCOM_SPI_BARRIER_READ(sc);
884 		reg = QCOM_SPI_READ_4(sc, QUP_OPERATIONAL);
885 		if ((reg & QUP_OP_IN_FIFO_NOT_EMPTY) == 0) {
886 			device_printf(sc->sc_dev, "%s: FIFO empty\n", __func__);
887 			break;
888 		}
889 
890 		/*
891 		 * Always read num_words up to FIFO being non-empty; that way
892 		 * if we have mis-matching TX/RX buffer sizes for some reason
893 		 * we will read the needed phantom bytes.
894 		 */
895 		reg = QCOM_SPI_READ_4(sc, QUP_INPUT_FIFO);
896 
897 		/*
898 		 * Unpack the receive buffer based on whether we are
899 		 * doing 1, 2, or 4 byte transfer words.
900 		 */
901 		if (sc->state.transfer_word_size == 1) {
902 			if (qcom_spi_hw_read_into_rx_buf(sc, reg & 0xff))
903 				num_bytes++;
904 		} else if (sc->state.transfer_word_size == 2) {
905 			if (qcom_spi_hw_read_into_rx_buf(sc, (reg >> 8) & 0xff))
906 				num_bytes++;
907 			if (qcom_spi_hw_read_into_rx_buf(sc, reg & 0xff))
908 				num_bytes++;
909 		} else if (sc->state.transfer_word_size == 4) {
910 			if (qcom_spi_hw_read_into_rx_buf(sc, (reg >> 24) & 0xff))
911 				num_bytes++;
912 			if (qcom_spi_hw_read_into_rx_buf(sc, (reg >> 16) & 0xff))
913 				num_bytes++;
914 			if (qcom_spi_hw_read_into_rx_buf(sc, (reg >> 8) & 0xff))
915 				num_bytes++;
916 			if (qcom_spi_hw_read_into_rx_buf(sc, reg & 0xff))
917 				num_bytes++;
918 		}
919 	}
920 
921 	QCOM_SPI_DPRINTF(sc, QCOM_SPI_DEBUG_HW_TX_FIFO,
922 	    "%s: read %d bytes (%d transfer words)\n",
923 	    __func__, num_bytes, sc->transfer.num_words);
924 
925 #if 0
926 	/*
927 	 * This is a no-op for FIFO mode, it's only a thing for BLOCK
928 	 * transfers.
929 	 */
930 	QCOM_SPI_BARRIER_READ(sc);
931 	reg = QCOM_SPI_READ_4(sc, QUP_OPERATIONAL);
932 	if (reg & QUP_OP_MAX_INPUT_DONE_FLAG) {
933 		device_printf(sc->sc_dev, "%s: read complete (DONE)\n" ,
934 		    __func__);
935 		sc->intr.done = true;
936 	}
937 #endif
938 
939 #if 0
940 	/*
941 	 * And see if we've finished the transfer and won't be getting
942 	 * any more.  Then treat it as done as well.
943 	 *
944 	 * In FIFO only mode we don't get a completion interrupt;
945 	 * we get an interrupt when the FIFO has enough data present.
946 	 */
947 	if ((sc->state.transfer_mode == QUP_IO_M_MODE_FIFO)
948 	    && (sc->transfer.rx_offset >= sc->transfer.rx_len)) {
949 		device_printf(sc->sc_dev, "%s: read complete (rxlen)\n",
950 		    __func__);
951 		sc->intr.done = true;
952 	}
953 #endif
954 
955 	/*
956 	 * For FIFO transfers we get a /single/ result that complete
957 	 * the FIFO transfer.  We won't get any subsequent transfers;
958 	 * we'll need to schedule a new FIFO transfer.
959 	 */
960 	sc->intr.done = true;
961 
962 	return (0);
963 }
964 
965 int
966 qcom_spi_hw_read_pio_block(struct qcom_spi_softc *sc)
967 {
968 
969 	/* Not yet implemented */
970 	return (ENXIO);
971 }
972 
973 int
974 qcom_spi_hw_do_full_reset(struct qcom_spi_softc *sc)
975 {
976 	QCOM_SPI_ASSERT_LOCKED(sc);
977 
978 	QCOM_SPI_WRITE_4(sc, QUP_SW_RESET, 1);
979 	QCOM_SPI_BARRIER_WRITE(sc);
980 	DELAY(100);
981 
982 	return (0);
983 }
984