xref: /linux/drivers/s390/cio/vfio_ccw_cp.c (revision 43b46e6bc69c2aa4331cfd7fa4e2943a894339e5)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * channel program interfaces
4  *
5  * Copyright IBM Corp. 2017
6  *
7  * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
8  *            Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
9  */
10 
11 #include <linux/ratelimit.h>
12 #include <linux/mm.h>
13 #include <linux/slab.h>
14 #include <linux/highmem.h>
15 #include <linux/iommu.h>
16 #include <linux/vfio.h>
17 #include <asm/idals.h>
18 
19 #include "vfio_ccw_cp.h"
20 #include "vfio_ccw_private.h"
21 
22 struct page_array {
23 	/* Array that stores pages need to pin. */
24 	dma_addr_t		*pa_iova;
25 	/* Array that receives the pinned pages. */
26 	struct page		**pa_page;
27 	/* Number of pages pinned from @pa_iova. */
28 	int			pa_nr;
29 };
30 
31 struct ccwchain {
32 	struct list_head	next;
33 	struct ccw1		*ch_ccw;
34 	/* Guest physical address of the current chain. */
35 	u64			ch_iova;
36 	/* Count of the valid ccws in chain. */
37 	int			ch_len;
38 	/* Pinned PAGEs for the original data. */
39 	struct page_array	*ch_pa;
40 };
41 
42 /*
43  * page_array_alloc() - alloc memory for page array
44  * @pa: page_array on which to perform the operation
45  * @iova: target guest physical address
46  * @len: number of bytes that should be pinned from @iova
47  *
48  * Attempt to allocate memory for page array.
49  *
50  * Usage of page_array:
51  * We expect (pa_nr == 0) and (pa_iova == NULL), any field in
52  * this structure will be filled in by this function.
53  *
54  * Returns:
55  *         0 if page array is allocated
56  *   -EINVAL if pa->pa_nr is not initially zero, or pa->pa_iova is not NULL
57  *   -ENOMEM if alloc failed
58  */
59 static int page_array_alloc(struct page_array *pa, u64 iova, unsigned int len)
60 {
61 	int i;
62 
63 	if (pa->pa_nr || pa->pa_iova)
64 		return -EINVAL;
65 
66 	pa->pa_nr = ((iova & ~PAGE_MASK) + len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
67 	if (!pa->pa_nr)
68 		return -EINVAL;
69 
70 	pa->pa_iova = kcalloc(pa->pa_nr,
71 			      sizeof(*pa->pa_iova) + sizeof(*pa->pa_page),
72 			      GFP_KERNEL);
73 	if (unlikely(!pa->pa_iova)) {
74 		pa->pa_nr = 0;
75 		return -ENOMEM;
76 	}
77 	pa->pa_page = (struct page **)&pa->pa_iova[pa->pa_nr];
78 
79 	pa->pa_iova[0] = iova;
80 	pa->pa_page[0] = NULL;
81 	for (i = 1; i < pa->pa_nr; i++) {
82 		pa->pa_iova[i] = pa->pa_iova[i - 1] + PAGE_SIZE;
83 		pa->pa_page[i] = NULL;
84 	}
85 
86 	return 0;
87 }
88 
89 /*
90  * page_array_unpin() - Unpin user pages in memory
91  * @pa: page_array on which to perform the operation
92  * @vdev: the vfio device to perform the operation
93  * @pa_nr: number of user pages to unpin
94  *
95  * Only unpin if any pages were pinned to begin with, i.e. pa_nr > 0,
96  * otherwise only clear pa->pa_nr
97  */
98 static void page_array_unpin(struct page_array *pa,
99 			     struct vfio_device *vdev, int pa_nr)
100 {
101 	int unpinned = 0, npage = 1;
102 
103 	while (unpinned < pa_nr) {
104 		dma_addr_t *first = &pa->pa_iova[unpinned];
105 		dma_addr_t *last = &first[npage];
106 
107 		if (unpinned + npage < pa_nr &&
108 		    *first + npage * PAGE_SIZE == *last) {
109 			npage++;
110 			continue;
111 		}
112 
113 		vfio_unpin_pages(vdev, *first, npage);
114 		unpinned += npage;
115 		npage = 1;
116 	}
117 
118 	pa->pa_nr = 0;
119 }
120 
121 /*
122  * page_array_pin() - Pin user pages in memory
123  * @pa: page_array on which to perform the operation
124  * @mdev: the mediated device to perform pin operations
125  *
126  * Returns number of pages pinned upon success.
127  * If the pin request partially succeeds, or fails completely,
128  * all pages are left unpinned and a negative error value is returned.
129  */
130 static int page_array_pin(struct page_array *pa, struct vfio_device *vdev)
131 {
132 	int pinned = 0, npage = 1;
133 	int ret = 0;
134 
135 	while (pinned < pa->pa_nr) {
136 		dma_addr_t *first = &pa->pa_iova[pinned];
137 		dma_addr_t *last = &first[npage];
138 
139 		if (pinned + npage < pa->pa_nr &&
140 		    *first + npage * PAGE_SIZE == *last) {
141 			npage++;
142 			continue;
143 		}
144 
145 		ret = vfio_pin_pages(vdev, *first, npage,
146 				     IOMMU_READ | IOMMU_WRITE,
147 				     &pa->pa_page[pinned]);
148 		if (ret < 0) {
149 			goto err_out;
150 		} else if (ret > 0 && ret != npage) {
151 			pinned += ret;
152 			ret = -EINVAL;
153 			goto err_out;
154 		}
155 		pinned += npage;
156 		npage = 1;
157 	}
158 
159 	return ret;
160 
161 err_out:
162 	page_array_unpin(pa, vdev, pinned);
163 	return ret;
164 }
165 
166 /* Unpin the pages before releasing the memory. */
167 static void page_array_unpin_free(struct page_array *pa, struct vfio_device *vdev)
168 {
169 	page_array_unpin(pa, vdev, pa->pa_nr);
170 	kfree(pa->pa_iova);
171 }
172 
173 static bool page_array_iova_pinned(struct page_array *pa, u64 iova, u64 length)
174 {
175 	u64 iova_pfn_start = iova >> PAGE_SHIFT;
176 	u64 iova_pfn_end = (iova + length - 1) >> PAGE_SHIFT;
177 	u64 pfn;
178 	int i;
179 
180 	for (i = 0; i < pa->pa_nr; i++) {
181 		pfn = pa->pa_iova[i] >> PAGE_SHIFT;
182 		if (pfn >= iova_pfn_start && pfn <= iova_pfn_end)
183 			return true;
184 	}
185 
186 	return false;
187 }
188 /* Create the list of IDAL words for a page_array. */
189 static inline void page_array_idal_create_words(struct page_array *pa,
190 						unsigned long *idaws)
191 {
192 	int i;
193 
194 	/*
195 	 * Idal words (execept the first one) rely on the memory being 4k
196 	 * aligned. If a user virtual address is 4K aligned, then it's
197 	 * corresponding kernel physical address will also be 4K aligned. Thus
198 	 * there will be no problem here to simply use the phys to create an
199 	 * idaw.
200 	 */
201 
202 	for (i = 0; i < pa->pa_nr; i++)
203 		idaws[i] = page_to_phys(pa->pa_page[i]);
204 
205 	/* Adjust the first IDAW, since it may not start on a page boundary */
206 	idaws[0] += pa->pa_iova[0] & (PAGE_SIZE - 1);
207 }
208 
209 static void convert_ccw0_to_ccw1(struct ccw1 *source, unsigned long len)
210 {
211 	struct ccw0 ccw0;
212 	struct ccw1 *pccw1 = source;
213 	int i;
214 
215 	for (i = 0; i < len; i++) {
216 		ccw0 = *(struct ccw0 *)pccw1;
217 		if ((pccw1->cmd_code & 0x0f) == CCW_CMD_TIC) {
218 			pccw1->cmd_code = CCW_CMD_TIC;
219 			pccw1->flags = 0;
220 			pccw1->count = 0;
221 		} else {
222 			pccw1->cmd_code = ccw0.cmd_code;
223 			pccw1->flags = ccw0.flags;
224 			pccw1->count = ccw0.count;
225 		}
226 		pccw1->cda = ccw0.cda;
227 		pccw1++;
228 	}
229 }
230 
231 /*
232  * Within the domain (@mdev), copy @n bytes from a guest physical
233  * address (@iova) to a host physical address (@to).
234  */
235 static long copy_from_iova(struct vfio_device *vdev, void *to, u64 iova,
236 			   unsigned long n)
237 {
238 	struct page_array pa = {0};
239 	int i, ret;
240 	unsigned long l, m;
241 
242 	ret = page_array_alloc(&pa, iova, n);
243 	if (ret < 0)
244 		return ret;
245 
246 	ret = page_array_pin(&pa, vdev);
247 	if (ret < 0) {
248 		page_array_unpin_free(&pa, vdev);
249 		return ret;
250 	}
251 
252 	l = n;
253 	for (i = 0; i < pa.pa_nr; i++) {
254 		void *from = kmap_local_page(pa.pa_page[i]);
255 
256 		m = PAGE_SIZE;
257 		if (i == 0) {
258 			from += iova & (PAGE_SIZE - 1);
259 			m -= iova & (PAGE_SIZE - 1);
260 		}
261 
262 		m = min(l, m);
263 		memcpy(to + (n - l), from, m);
264 		kunmap_local(from);
265 
266 		l -= m;
267 		if (l == 0)
268 			break;
269 	}
270 
271 	page_array_unpin_free(&pa, vdev);
272 
273 	return l;
274 }
275 
276 /*
277  * Helpers to operate ccwchain.
278  */
279 #define ccw_is_read(_ccw) (((_ccw)->cmd_code & 0x03) == 0x02)
280 #define ccw_is_read_backward(_ccw) (((_ccw)->cmd_code & 0x0F) == 0x0C)
281 #define ccw_is_sense(_ccw) (((_ccw)->cmd_code & 0x0F) == CCW_CMD_BASIC_SENSE)
282 
283 #define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP)
284 
285 #define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC)
286 
287 #define ccw_is_idal(_ccw) ((_ccw)->flags & CCW_FLAG_IDA)
288 #define ccw_is_skip(_ccw) ((_ccw)->flags & CCW_FLAG_SKIP)
289 
290 #define ccw_is_chain(_ccw) ((_ccw)->flags & (CCW_FLAG_CC | CCW_FLAG_DC))
291 
292 /*
293  * ccw_does_data_transfer()
294  *
295  * Determine whether a CCW will move any data, such that the guest pages
296  * would need to be pinned before performing the I/O.
297  *
298  * Returns 1 if yes, 0 if no.
299  */
300 static inline int ccw_does_data_transfer(struct ccw1 *ccw)
301 {
302 	/* If the count field is zero, then no data will be transferred */
303 	if (ccw->count == 0)
304 		return 0;
305 
306 	/* If the command is a NOP, then no data will be transferred */
307 	if (ccw_is_noop(ccw))
308 		return 0;
309 
310 	/* If the skip flag is off, then data will be transferred */
311 	if (!ccw_is_skip(ccw))
312 		return 1;
313 
314 	/*
315 	 * If the skip flag is on, it is only meaningful if the command
316 	 * code is a read, read backward, sense, or sense ID.  In those
317 	 * cases, no data will be transferred.
318 	 */
319 	if (ccw_is_read(ccw) || ccw_is_read_backward(ccw))
320 		return 0;
321 
322 	if (ccw_is_sense(ccw))
323 		return 0;
324 
325 	/* The skip flag is on, but it is ignored for this command code. */
326 	return 1;
327 }
328 
329 /*
330  * is_cpa_within_range()
331  *
332  * @cpa: channel program address being questioned
333  * @head: address of the beginning of a CCW chain
334  * @len: number of CCWs within the chain
335  *
336  * Determine whether the address of a CCW (whether a new chain,
337  * or the target of a TIC) falls within a range (including the end points).
338  *
339  * Returns 1 if yes, 0 if no.
340  */
341 static inline int is_cpa_within_range(u32 cpa, u32 head, int len)
342 {
343 	u32 tail = head + (len - 1) * sizeof(struct ccw1);
344 
345 	return (head <= cpa && cpa <= tail);
346 }
347 
348 static inline int is_tic_within_range(struct ccw1 *ccw, u32 head, int len)
349 {
350 	if (!ccw_is_tic(ccw))
351 		return 0;
352 
353 	return is_cpa_within_range(ccw->cda, head, len);
354 }
355 
356 static struct ccwchain *ccwchain_alloc(struct channel_program *cp, int len)
357 {
358 	struct ccwchain *chain;
359 	void *data;
360 	size_t size;
361 
362 	/* Make ccw address aligned to 8. */
363 	size = ((sizeof(*chain) + 7L) & -8L) +
364 		sizeof(*chain->ch_ccw) * len +
365 		sizeof(*chain->ch_pa) * len;
366 	chain = kzalloc(size, GFP_DMA | GFP_KERNEL);
367 	if (!chain)
368 		return NULL;
369 
370 	data = (u8 *)chain + ((sizeof(*chain) + 7L) & -8L);
371 	chain->ch_ccw = (struct ccw1 *)data;
372 
373 	data = (u8 *)(chain->ch_ccw) + sizeof(*chain->ch_ccw) * len;
374 	chain->ch_pa = (struct page_array *)data;
375 
376 	chain->ch_len = len;
377 
378 	list_add_tail(&chain->next, &cp->ccwchain_list);
379 
380 	return chain;
381 }
382 
383 static void ccwchain_free(struct ccwchain *chain)
384 {
385 	list_del(&chain->next);
386 	kfree(chain);
387 }
388 
389 /* Free resource for a ccw that allocated memory for its cda. */
390 static void ccwchain_cda_free(struct ccwchain *chain, int idx)
391 {
392 	struct ccw1 *ccw = chain->ch_ccw + idx;
393 
394 	if (ccw_is_tic(ccw))
395 		return;
396 
397 	kfree((void *)(u64)ccw->cda);
398 }
399 
400 /**
401  * ccwchain_calc_length - calculate the length of the ccw chain.
402  * @iova: guest physical address of the target ccw chain
403  * @cp: channel_program on which to perform the operation
404  *
405  * This is the chain length not considering any TICs.
406  * You need to do a new round for each TIC target.
407  *
408  * The program is also validated for absence of not yet supported
409  * indirect data addressing scenarios.
410  *
411  * Returns: the length of the ccw chain or -errno.
412  */
413 static int ccwchain_calc_length(u64 iova, struct channel_program *cp)
414 {
415 	struct ccw1 *ccw = cp->guest_cp;
416 	int cnt = 0;
417 
418 	do {
419 		cnt++;
420 
421 		/*
422 		 * As we don't want to fail direct addressing even if the
423 		 * orb specified one of the unsupported formats, we defer
424 		 * checking for IDAWs in unsupported formats to here.
425 		 */
426 		if ((!cp->orb.cmd.c64 || cp->orb.cmd.i2k) && ccw_is_idal(ccw))
427 			return -EOPNOTSUPP;
428 
429 		/*
430 		 * We want to keep counting if the current CCW has the
431 		 * command-chaining flag enabled, or if it is a TIC CCW
432 		 * that loops back into the current chain.  The latter
433 		 * is used for device orientation, where the CCW PRIOR to
434 		 * the TIC can either jump to the TIC or a CCW immediately
435 		 * after the TIC, depending on the results of its operation.
436 		 */
437 		if (!ccw_is_chain(ccw) && !is_tic_within_range(ccw, iova, cnt))
438 			break;
439 
440 		ccw++;
441 	} while (cnt < CCWCHAIN_LEN_MAX + 1);
442 
443 	if (cnt == CCWCHAIN_LEN_MAX + 1)
444 		cnt = -EINVAL;
445 
446 	return cnt;
447 }
448 
449 static int tic_target_chain_exists(struct ccw1 *tic, struct channel_program *cp)
450 {
451 	struct ccwchain *chain;
452 	u32 ccw_head;
453 
454 	list_for_each_entry(chain, &cp->ccwchain_list, next) {
455 		ccw_head = chain->ch_iova;
456 		if (is_cpa_within_range(tic->cda, ccw_head, chain->ch_len))
457 			return 1;
458 	}
459 
460 	return 0;
461 }
462 
463 static int ccwchain_loop_tic(struct ccwchain *chain,
464 			     struct channel_program *cp);
465 
466 static int ccwchain_handle_ccw(u32 cda, struct channel_program *cp)
467 {
468 	struct vfio_device *vdev =
469 		&container_of(cp, struct vfio_ccw_private, cp)->vdev;
470 	struct ccwchain *chain;
471 	int len, ret;
472 
473 	/* Copy 2K (the most we support today) of possible CCWs */
474 	len = copy_from_iova(vdev, cp->guest_cp, cda,
475 			     CCWCHAIN_LEN_MAX * sizeof(struct ccw1));
476 	if (len)
477 		return len;
478 
479 	/* Convert any Format-0 CCWs to Format-1 */
480 	if (!cp->orb.cmd.fmt)
481 		convert_ccw0_to_ccw1(cp->guest_cp, CCWCHAIN_LEN_MAX);
482 
483 	/* Count the CCWs in the current chain */
484 	len = ccwchain_calc_length(cda, cp);
485 	if (len < 0)
486 		return len;
487 
488 	/* Need alloc a new chain for this one. */
489 	chain = ccwchain_alloc(cp, len);
490 	if (!chain)
491 		return -ENOMEM;
492 	chain->ch_iova = cda;
493 
494 	/* Copy the actual CCWs into the new chain */
495 	memcpy(chain->ch_ccw, cp->guest_cp, len * sizeof(struct ccw1));
496 
497 	/* Loop for tics on this new chain. */
498 	ret = ccwchain_loop_tic(chain, cp);
499 
500 	if (ret)
501 		ccwchain_free(chain);
502 
503 	return ret;
504 }
505 
506 /* Loop for TICs. */
507 static int ccwchain_loop_tic(struct ccwchain *chain, struct channel_program *cp)
508 {
509 	struct ccw1 *tic;
510 	int i, ret;
511 
512 	for (i = 0; i < chain->ch_len; i++) {
513 		tic = chain->ch_ccw + i;
514 
515 		if (!ccw_is_tic(tic))
516 			continue;
517 
518 		/* May transfer to an existing chain. */
519 		if (tic_target_chain_exists(tic, cp))
520 			continue;
521 
522 		/* Build a ccwchain for the next segment */
523 		ret = ccwchain_handle_ccw(tic->cda, cp);
524 		if (ret)
525 			return ret;
526 	}
527 
528 	return 0;
529 }
530 
531 static int ccwchain_fetch_tic(struct ccwchain *chain,
532 			      int idx,
533 			      struct channel_program *cp)
534 {
535 	struct ccw1 *ccw = chain->ch_ccw + idx;
536 	struct ccwchain *iter;
537 	u32 ccw_head;
538 
539 	list_for_each_entry(iter, &cp->ccwchain_list, next) {
540 		ccw_head = iter->ch_iova;
541 		if (is_cpa_within_range(ccw->cda, ccw_head, iter->ch_len)) {
542 			ccw->cda = (__u32) (addr_t) (((char *)iter->ch_ccw) +
543 						     (ccw->cda - ccw_head));
544 			return 0;
545 		}
546 	}
547 
548 	return -EFAULT;
549 }
550 
551 static int ccwchain_fetch_direct(struct ccwchain *chain,
552 				 int idx,
553 				 struct channel_program *cp)
554 {
555 	struct vfio_device *vdev =
556 		&container_of(cp, struct vfio_ccw_private, cp)->vdev;
557 	struct ccw1 *ccw;
558 	struct page_array *pa;
559 	u64 iova;
560 	unsigned long *idaws;
561 	int ret;
562 	int bytes = 1;
563 	int idaw_nr, idal_len;
564 	int i;
565 
566 	ccw = chain->ch_ccw + idx;
567 
568 	if (ccw->count)
569 		bytes = ccw->count;
570 
571 	/* Calculate size of IDAL */
572 	if (ccw_is_idal(ccw)) {
573 		/* Read first IDAW to see if it's 4K-aligned or not. */
574 		/* All subsequent IDAws will be 4K-aligned. */
575 		ret = copy_from_iova(vdev, &iova, ccw->cda, sizeof(iova));
576 		if (ret)
577 			return ret;
578 	} else {
579 		iova = ccw->cda;
580 	}
581 	idaw_nr = idal_nr_words((void *)iova, bytes);
582 	idal_len = idaw_nr * sizeof(*idaws);
583 
584 	/* Allocate an IDAL from host storage */
585 	idaws = kcalloc(idaw_nr, sizeof(*idaws), GFP_DMA | GFP_KERNEL);
586 	if (!idaws) {
587 		ret = -ENOMEM;
588 		goto out_init;
589 	}
590 
591 	/*
592 	 * Allocate an array of pages to pin/translate.
593 	 * The number of pages is actually the count of the idaws
594 	 * required for the data transfer, since we only only support
595 	 * 4K IDAWs today.
596 	 */
597 	pa = chain->ch_pa + idx;
598 	ret = page_array_alloc(pa, iova, bytes);
599 	if (ret < 0)
600 		goto out_free_idaws;
601 
602 	if (ccw_is_idal(ccw)) {
603 		/* Copy guest IDAL into host IDAL */
604 		ret = copy_from_iova(vdev, idaws, ccw->cda, idal_len);
605 		if (ret)
606 			goto out_unpin;
607 
608 		/*
609 		 * Copy guest IDAWs into page_array, in case the memory they
610 		 * occupy is not contiguous.
611 		 */
612 		for (i = 0; i < idaw_nr; i++)
613 			pa->pa_iova[i] = idaws[i];
614 	} else {
615 		/*
616 		 * No action is required here; the iova addresses in page_array
617 		 * were initialized sequentially in page_array_alloc() beginning
618 		 * with the contents of ccw->cda.
619 		 */
620 	}
621 
622 	if (ccw_does_data_transfer(ccw)) {
623 		ret = page_array_pin(pa, vdev);
624 		if (ret < 0)
625 			goto out_unpin;
626 	} else {
627 		pa->pa_nr = 0;
628 	}
629 
630 	ccw->cda = (__u32) virt_to_phys(idaws);
631 	ccw->flags |= CCW_FLAG_IDA;
632 
633 	/* Populate the IDAL with pinned/translated addresses from page */
634 	page_array_idal_create_words(pa, idaws);
635 
636 	return 0;
637 
638 out_unpin:
639 	page_array_unpin_free(pa, vdev);
640 out_free_idaws:
641 	kfree(idaws);
642 out_init:
643 	ccw->cda = 0;
644 	return ret;
645 }
646 
647 /*
648  * Fetch one ccw.
649  * To reduce memory copy, we'll pin the cda page in memory,
650  * and to get rid of the cda 2G limitiaion of ccw1, we'll translate
651  * direct ccws to idal ccws.
652  */
653 static int ccwchain_fetch_one(struct ccwchain *chain,
654 			      int idx,
655 			      struct channel_program *cp)
656 {
657 	struct ccw1 *ccw = chain->ch_ccw + idx;
658 
659 	if (ccw_is_tic(ccw))
660 		return ccwchain_fetch_tic(chain, idx, cp);
661 
662 	return ccwchain_fetch_direct(chain, idx, cp);
663 }
664 
665 /**
666  * cp_init() - allocate ccwchains for a channel program.
667  * @cp: channel_program on which to perform the operation
668  * @mdev: the mediated device to perform pin/unpin operations
669  * @orb: control block for the channel program from the guest
670  *
671  * This creates one or more ccwchain(s), and copies the raw data of
672  * the target channel program from @orb->cmd.iova to the new ccwchain(s).
673  *
674  * Limitations:
675  * 1. Supports idal(c64) ccw chaining.
676  * 2. Supports 4k idaw.
677  *
678  * Returns:
679  *   %0 on success and a negative error value on failure.
680  */
681 int cp_init(struct channel_program *cp, union orb *orb)
682 {
683 	struct vfio_device *vdev =
684 		&container_of(cp, struct vfio_ccw_private, cp)->vdev;
685 	/* custom ratelimit used to avoid flood during guest IPL */
686 	static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 1);
687 	int ret;
688 
689 	/* this is an error in the caller */
690 	if (cp->initialized)
691 		return -EBUSY;
692 
693 	/*
694 	 * We only support prefetching the channel program. We assume all channel
695 	 * programs executed by supported guests likewise support prefetching.
696 	 * Executing a channel program that does not specify prefetching will
697 	 * typically not cause an error, but a warning is issued to help identify
698 	 * the problem if something does break.
699 	 */
700 	if (!orb->cmd.pfch && __ratelimit(&ratelimit_state))
701 		dev_warn(
702 			vdev->dev,
703 			"Prefetching channel program even though prefetch not specified in ORB");
704 
705 	INIT_LIST_HEAD(&cp->ccwchain_list);
706 	memcpy(&cp->orb, orb, sizeof(*orb));
707 
708 	/* Build a ccwchain for the first CCW segment */
709 	ret = ccwchain_handle_ccw(orb->cmd.cpa, cp);
710 
711 	if (!ret) {
712 		cp->initialized = true;
713 
714 		/* It is safe to force: if it was not set but idals used
715 		 * ccwchain_calc_length would have returned an error.
716 		 */
717 		cp->orb.cmd.c64 = 1;
718 	}
719 
720 	return ret;
721 }
722 
723 
724 /**
725  * cp_free() - free resources for channel program.
726  * @cp: channel_program on which to perform the operation
727  *
728  * This unpins the memory pages and frees the memory space occupied by
729  * @cp, which must have been returned by a previous call to cp_init().
730  * Otherwise, undefined behavior occurs.
731  */
732 void cp_free(struct channel_program *cp)
733 {
734 	struct vfio_device *vdev =
735 		&container_of(cp, struct vfio_ccw_private, cp)->vdev;
736 	struct ccwchain *chain, *temp;
737 	int i;
738 
739 	if (!cp->initialized)
740 		return;
741 
742 	cp->initialized = false;
743 	list_for_each_entry_safe(chain, temp, &cp->ccwchain_list, next) {
744 		for (i = 0; i < chain->ch_len; i++) {
745 			page_array_unpin_free(chain->ch_pa + i, vdev);
746 			ccwchain_cda_free(chain, i);
747 		}
748 		ccwchain_free(chain);
749 	}
750 }
751 
752 /**
753  * cp_prefetch() - translate a guest physical address channel program to
754  *                 a real-device runnable channel program.
755  * @cp: channel_program on which to perform the operation
756  *
757  * This function translates the guest-physical-address channel program
758  * and stores the result to ccwchain list. @cp must have been
759  * initialized by a previous call with cp_init(). Otherwise, undefined
760  * behavior occurs.
761  * For each chain composing the channel program:
762  * - On entry ch_len holds the count of CCWs to be translated.
763  * - On exit ch_len is adjusted to the count of successfully translated CCWs.
764  * This allows cp_free to find in ch_len the count of CCWs to free in a chain.
765  *
766  * The S/390 CCW Translation APIS (prefixed by 'cp_') are introduced
767  * as helpers to do ccw chain translation inside the kernel. Basically
768  * they accept a channel program issued by a virtual machine, and
769  * translate the channel program to a real-device runnable channel
770  * program.
771  *
772  * These APIs will copy the ccws into kernel-space buffers, and update
773  * the guest phsical addresses with their corresponding host physical
774  * addresses.  Then channel I/O device drivers could issue the
775  * translated channel program to real devices to perform an I/O
776  * operation.
777  *
778  * These interfaces are designed to support translation only for
779  * channel programs, which are generated and formatted by a
780  * guest. Thus this will make it possible for things like VFIO to
781  * leverage the interfaces to passthrough a channel I/O mediated
782  * device in QEMU.
783  *
784  * We support direct ccw chaining by translating them to idal ccws.
785  *
786  * Returns:
787  *   %0 on success and a negative error value on failure.
788  */
789 int cp_prefetch(struct channel_program *cp)
790 {
791 	struct ccwchain *chain;
792 	int len, idx, ret;
793 
794 	/* this is an error in the caller */
795 	if (!cp->initialized)
796 		return -EINVAL;
797 
798 	list_for_each_entry(chain, &cp->ccwchain_list, next) {
799 		len = chain->ch_len;
800 		for (idx = 0; idx < len; idx++) {
801 			ret = ccwchain_fetch_one(chain, idx, cp);
802 			if (ret)
803 				goto out_err;
804 		}
805 	}
806 
807 	return 0;
808 out_err:
809 	/* Only cleanup the chain elements that were actually translated. */
810 	chain->ch_len = idx;
811 	list_for_each_entry_continue(chain, &cp->ccwchain_list, next) {
812 		chain->ch_len = 0;
813 	}
814 	return ret;
815 }
816 
817 /**
818  * cp_get_orb() - get the orb of the channel program
819  * @cp: channel_program on which to perform the operation
820  * @intparm: new intparm for the returned orb
821  * @lpm: candidate value of the logical-path mask for the returned orb
822  *
823  * This function returns the address of the updated orb of the channel
824  * program. Channel I/O device drivers could use this orb to issue a
825  * ssch.
826  */
827 union orb *cp_get_orb(struct channel_program *cp, u32 intparm, u8 lpm)
828 {
829 	union orb *orb;
830 	struct ccwchain *chain;
831 	struct ccw1 *cpa;
832 
833 	/* this is an error in the caller */
834 	if (!cp->initialized)
835 		return NULL;
836 
837 	orb = &cp->orb;
838 
839 	orb->cmd.intparm = intparm;
840 	orb->cmd.fmt = 1;
841 	orb->cmd.key = PAGE_DEFAULT_KEY >> 4;
842 
843 	if (orb->cmd.lpm == 0)
844 		orb->cmd.lpm = lpm;
845 
846 	chain = list_first_entry(&cp->ccwchain_list, struct ccwchain, next);
847 	cpa = chain->ch_ccw;
848 	orb->cmd.cpa = (__u32) __pa(cpa);
849 
850 	return orb;
851 }
852 
853 /**
854  * cp_update_scsw() - update scsw for a channel program.
855  * @cp: channel_program on which to perform the operation
856  * @scsw: I/O results of the channel program and also the target to be
857  *        updated
858  *
859  * @scsw contains the I/O results of the channel program that pointed
860  * to by @cp. However what @scsw->cpa stores is a host physical
861  * address, which is meaningless for the guest, which is waiting for
862  * the I/O results.
863  *
864  * This function updates @scsw->cpa to its coressponding guest physical
865  * address.
866  */
867 void cp_update_scsw(struct channel_program *cp, union scsw *scsw)
868 {
869 	struct ccwchain *chain;
870 	u32 cpa = scsw->cmd.cpa;
871 	u32 ccw_head;
872 
873 	if (!cp->initialized)
874 		return;
875 
876 	/*
877 	 * LATER:
878 	 * For now, only update the cmd.cpa part. We may need to deal with
879 	 * other portions of the schib as well, even if we don't return them
880 	 * in the ioctl directly. Path status changes etc.
881 	 */
882 	list_for_each_entry(chain, &cp->ccwchain_list, next) {
883 		ccw_head = (u32)(u64)chain->ch_ccw;
884 		/*
885 		 * On successful execution, cpa points just beyond the end
886 		 * of the chain.
887 		 */
888 		if (is_cpa_within_range(cpa, ccw_head, chain->ch_len + 1)) {
889 			/*
890 			 * (cpa - ccw_head) is the offset value of the host
891 			 * physical ccw to its chain head.
892 			 * Adding this value to the guest physical ccw chain
893 			 * head gets us the guest cpa.
894 			 */
895 			cpa = chain->ch_iova + (cpa - ccw_head);
896 			break;
897 		}
898 	}
899 
900 	scsw->cmd.cpa = cpa;
901 }
902 
903 /**
904  * cp_iova_pinned() - check if an iova is pinned for a ccw chain.
905  * @cp: channel_program on which to perform the operation
906  * @iova: the iova to check
907  * @length: the length to check from @iova
908  *
909  * If the @iova is currently pinned for the ccw chain, return true;
910  * else return false.
911  */
912 bool cp_iova_pinned(struct channel_program *cp, u64 iova, u64 length)
913 {
914 	struct ccwchain *chain;
915 	int i;
916 
917 	if (!cp->initialized)
918 		return false;
919 
920 	list_for_each_entry(chain, &cp->ccwchain_list, next) {
921 		for (i = 0; i < chain->ch_len; i++)
922 			if (page_array_iova_pinned(chain->ch_pa + i, iova, length))
923 				return true;
924 	}
925 
926 	return false;
927 }
928