xref: /linux/drivers/misc/cxl/cxllib.c (revision b9b77222d4ff6b5bb8f5d87fca20de0910618bb9)
1 /*
2  * Copyright 2017 IBM Corp.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version
7  * 2 of the License, or (at your option) any later version.
8  */
9 
10 #include <linux/hugetlb.h>
11 #include <linux/sched/mm.h>
12 #include <asm/pnv-pci.h>
13 #include <misc/cxllib.h>
14 
15 #include "cxl.h"
16 
17 #define CXL_INVALID_DRA                 ~0ull
18 #define CXL_DUMMY_READ_SIZE             128
19 #define CXL_DUMMY_READ_ALIGN            8
20 #define CXL_CAPI_WINDOW_START           0x2000000000000ull
21 #define CXL_CAPI_WINDOW_LOG_SIZE        48
22 #define CXL_XSL_CONFIG_CURRENT_VERSION  CXL_XSL_CONFIG_VERSION1
23 
24 
25 bool cxllib_slot_is_supported(struct pci_dev *dev, unsigned long flags)
26 {
27 	int rc;
28 	u32 phb_index;
29 	u64 chip_id, capp_unit_id;
30 
31 	/* No flags currently supported */
32 	if (flags)
33 		return false;
34 
35 	if (!cpu_has_feature(CPU_FTR_HVMODE))
36 		return false;
37 
38 	if (!cxl_is_power9())
39 		return false;
40 
41 	if (cxl_slot_is_switched(dev))
42 		return false;
43 
44 	/* on p9, some pci slots are not connected to a CAPP unit */
45 	rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id);
46 	if (rc)
47 		return false;
48 
49 	return true;
50 }
51 EXPORT_SYMBOL_GPL(cxllib_slot_is_supported);
52 
53 static DEFINE_MUTEX(dra_mutex);
54 static u64 dummy_read_addr = CXL_INVALID_DRA;
55 
56 static int allocate_dummy_read_buf(void)
57 {
58 	u64 buf, vaddr;
59 	size_t buf_size;
60 
61 	/*
62 	 * Dummy read buffer is 128-byte long, aligned on a
63 	 * 256-byte boundary and we need the physical address.
64 	 */
65 	buf_size = CXL_DUMMY_READ_SIZE + (1ull << CXL_DUMMY_READ_ALIGN);
66 	buf = (u64) kzalloc(buf_size, GFP_KERNEL);
67 	if (!buf)
68 		return -ENOMEM;
69 
70 	vaddr = (buf + (1ull << CXL_DUMMY_READ_ALIGN) - 1) &
71 					(~0ull << CXL_DUMMY_READ_ALIGN);
72 
73 	WARN((vaddr + CXL_DUMMY_READ_SIZE) > (buf + buf_size),
74 		"Dummy read buffer alignment issue");
75 	dummy_read_addr = virt_to_phys((void *) vaddr);
76 	return 0;
77 }
78 
79 int cxllib_get_xsl_config(struct pci_dev *dev, struct cxllib_xsl_config *cfg)
80 {
81 	int rc;
82 	u32 phb_index;
83 	u64 chip_id, capp_unit_id;
84 
85 	if (!cpu_has_feature(CPU_FTR_HVMODE))
86 		return -EINVAL;
87 
88 	mutex_lock(&dra_mutex);
89 	if (dummy_read_addr == CXL_INVALID_DRA) {
90 		rc = allocate_dummy_read_buf();
91 		if (rc) {
92 			mutex_unlock(&dra_mutex);
93 			return rc;
94 		}
95 	}
96 	mutex_unlock(&dra_mutex);
97 
98 	rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id);
99 	if (rc)
100 		return rc;
101 
102 	rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, &cfg->dsnctl);
103 	if (rc)
104 		return rc;
105 	if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
106 		/* workaround for DD1 - nbwind = capiind */
107 		cfg->dsnctl |= ((u64)0x02 << (63-47));
108 	}
109 
110 	cfg->version  = CXL_XSL_CONFIG_CURRENT_VERSION;
111 	cfg->log_bar_size = CXL_CAPI_WINDOW_LOG_SIZE;
112 	cfg->bar_addr = CXL_CAPI_WINDOW_START;
113 	cfg->dra = dummy_read_addr;
114 	return 0;
115 }
116 EXPORT_SYMBOL_GPL(cxllib_get_xsl_config);
117 
118 int cxllib_switch_phb_mode(struct pci_dev *dev, enum cxllib_mode mode,
119 			unsigned long flags)
120 {
121 	int rc = 0;
122 
123 	if (!cpu_has_feature(CPU_FTR_HVMODE))
124 		return -EINVAL;
125 
126 	switch (mode) {
127 	case CXL_MODE_PCI:
128 		/*
129 		 * We currently don't support going back to PCI mode
130 		 * However, we'll turn the invalidations off, so that
131 		 * the firmware doesn't have to ack them and can do
132 		 * things like reset, etc.. with no worries.
133 		 * So always return EPERM (can't go back to PCI) or
134 		 * EBUSY if we couldn't even turn off snooping
135 		 */
136 		rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_OFF);
137 		if (rc)
138 			rc = -EBUSY;
139 		else
140 			rc = -EPERM;
141 		break;
142 	case CXL_MODE_CXL:
143 		/* DMA only supported on TVT1 for the time being */
144 		if (flags != CXL_MODE_DMA_TVT1)
145 			return -EINVAL;
146 		rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_DMA_TVT1);
147 		if (rc)
148 			return rc;
149 		rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_ON);
150 		break;
151 	default:
152 		rc = -EINVAL;
153 	}
154 	return rc;
155 }
156 EXPORT_SYMBOL_GPL(cxllib_switch_phb_mode);
157 
158 /*
159  * When switching the PHB to capi mode, the TVT#1 entry for
160  * the Partitionable Endpoint is set in bypass mode, like
161  * in PCI mode.
162  * Configure the device dma to use TVT#1, which is done
163  * by calling dma_set_mask() with a mask large enough.
164  */
165 int cxllib_set_device_dma(struct pci_dev *dev, unsigned long flags)
166 {
167 	int rc;
168 
169 	if (flags)
170 		return -EINVAL;
171 
172 	rc = dma_set_mask(&dev->dev, DMA_BIT_MASK(64));
173 	return rc;
174 }
175 EXPORT_SYMBOL_GPL(cxllib_set_device_dma);
176 
177 int cxllib_get_PE_attributes(struct task_struct *task,
178 			     unsigned long translation_mode,
179 			     struct cxllib_pe_attributes *attr)
180 {
181 	struct mm_struct *mm = NULL;
182 
183 	if (translation_mode != CXL_TRANSLATED_MODE &&
184 		translation_mode != CXL_REAL_MODE)
185 		return -EINVAL;
186 
187 	attr->sr = cxl_calculate_sr(false,
188 				task == NULL,
189 				translation_mode == CXL_REAL_MODE,
190 				true);
191 	attr->lpid = mfspr(SPRN_LPID);
192 	if (task) {
193 		mm = get_task_mm(task);
194 		if (mm == NULL)
195 			return -EINVAL;
196 		/*
197 		 * Caller is keeping a reference on mm_users for as long
198 		 * as XSL uses the memory context
199 		 */
200 		attr->pid = mm->context.id;
201 		mmput(mm);
202 		attr->tid = task->thread.tidr;
203 	} else {
204 		attr->pid = 0;
205 		attr->tid = 0;
206 	}
207 	return 0;
208 }
209 EXPORT_SYMBOL_GPL(cxllib_get_PE_attributes);
210 
211 static int get_vma_info(struct mm_struct *mm, u64 addr,
212 			u64 *vma_start, u64 *vma_end,
213 			unsigned long *page_size)
214 {
215 	struct vm_area_struct *vma = NULL;
216 	int rc = 0;
217 
218 	down_read(&mm->mmap_sem);
219 
220 	vma = find_vma(mm, addr);
221 	if (!vma) {
222 		rc = -EFAULT;
223 		goto out;
224 	}
225 	*page_size = vma_kernel_pagesize(vma);
226 	*vma_start = vma->vm_start;
227 	*vma_end = vma->vm_end;
228 out:
229 	up_read(&mm->mmap_sem);
230 	return rc;
231 }
232 
233 int cxllib_handle_fault(struct mm_struct *mm, u64 addr, u64 size, u64 flags)
234 {
235 	int rc;
236 	u64 dar, vma_start, vma_end;
237 	unsigned long page_size;
238 
239 	if (mm == NULL)
240 		return -EFAULT;
241 
242 	/*
243 	 * The buffer we have to process can extend over several pages
244 	 * and may also cover several VMAs.
245 	 * We iterate over all the pages. The page size could vary
246 	 * between VMAs.
247 	 */
248 	rc = get_vma_info(mm, addr, &vma_start, &vma_end, &page_size);
249 	if (rc)
250 		return rc;
251 
252 	for (dar = (addr & ~(page_size - 1)); dar < (addr + size);
253 	     dar += page_size) {
254 		if (dar < vma_start || dar >= vma_end) {
255 			/*
256 			 * We don't hold the mm->mmap_sem semaphore
257 			 * while iterating, since the semaphore is
258 			 * required by one of the lower-level page
259 			 * fault processing functions and it could
260 			 * create a deadlock.
261 			 *
262 			 * It means the VMAs can be altered between 2
263 			 * loop iterations and we could theoretically
264 			 * miss a page (however unlikely). But that's
265 			 * not really a problem, as the driver will
266 			 * retry access, get another page fault on the
267 			 * missing page and call us again.
268 			 */
269 			rc = get_vma_info(mm, dar, &vma_start, &vma_end,
270 					&page_size);
271 			if (rc)
272 				return rc;
273 		}
274 
275 		rc = cxl_handle_mm_fault(mm, flags, dar);
276 		if (rc)
277 			return -EFAULT;
278 	}
279 	return 0;
280 }
281 EXPORT_SYMBOL_GPL(cxllib_handle_fault);
282