xref: /linux/drivers/misc/cxl/cxllib.c (revision c8bfe3fad4f86a029da7157bae9699c816f0c309)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright 2017 IBM Corp.
4  */
5 
6 #include <linux/hugetlb.h>
7 #include <linux/sched/mm.h>
8 #include <asm/opal-api.h>
9 #include <asm/pnv-pci.h>
10 #include <misc/cxllib.h>
11 
12 #include "cxl.h"
13 
14 #define CXL_INVALID_DRA                 ~0ull
15 #define CXL_DUMMY_READ_SIZE             128
16 #define CXL_DUMMY_READ_ALIGN            8
17 #define CXL_CAPI_WINDOW_START           0x2000000000000ull
18 #define CXL_CAPI_WINDOW_LOG_SIZE        48
19 #define CXL_XSL_CONFIG_CURRENT_VERSION  CXL_XSL_CONFIG_VERSION1
20 
21 
22 bool cxllib_slot_is_supported(struct pci_dev *dev, unsigned long flags)
23 {
24 	int rc;
25 	u32 phb_index;
26 	u64 chip_id, capp_unit_id;
27 
28 	/* No flags currently supported */
29 	if (flags)
30 		return false;
31 
32 	if (!cpu_has_feature(CPU_FTR_HVMODE))
33 		return false;
34 
35 	if (!cxl_is_power9())
36 		return false;
37 
38 	if (cxl_slot_is_switched(dev))
39 		return false;
40 
41 	/* on p9, some pci slots are not connected to a CAPP unit */
42 	rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id);
43 	if (rc)
44 		return false;
45 
46 	return true;
47 }
48 EXPORT_SYMBOL_GPL(cxllib_slot_is_supported);
49 
50 static DEFINE_MUTEX(dra_mutex);
51 static u64 dummy_read_addr = CXL_INVALID_DRA;
52 
53 static int allocate_dummy_read_buf(void)
54 {
55 	u64 buf, vaddr;
56 	size_t buf_size;
57 
58 	/*
59 	 * Dummy read buffer is 128-byte long, aligned on a
60 	 * 256-byte boundary and we need the physical address.
61 	 */
62 	buf_size = CXL_DUMMY_READ_SIZE + (1ull << CXL_DUMMY_READ_ALIGN);
63 	buf = (u64) kzalloc(buf_size, GFP_KERNEL);
64 	if (!buf)
65 		return -ENOMEM;
66 
67 	vaddr = (buf + (1ull << CXL_DUMMY_READ_ALIGN) - 1) &
68 					(~0ull << CXL_DUMMY_READ_ALIGN);
69 
70 	WARN((vaddr + CXL_DUMMY_READ_SIZE) > (buf + buf_size),
71 		"Dummy read buffer alignment issue");
72 	dummy_read_addr = virt_to_phys((void *) vaddr);
73 	return 0;
74 }
75 
76 int cxllib_get_xsl_config(struct pci_dev *dev, struct cxllib_xsl_config *cfg)
77 {
78 	int rc;
79 	u32 phb_index;
80 	u64 chip_id, capp_unit_id;
81 
82 	if (!cpu_has_feature(CPU_FTR_HVMODE))
83 		return -EINVAL;
84 
85 	mutex_lock(&dra_mutex);
86 	if (dummy_read_addr == CXL_INVALID_DRA) {
87 		rc = allocate_dummy_read_buf();
88 		if (rc) {
89 			mutex_unlock(&dra_mutex);
90 			return rc;
91 		}
92 	}
93 	mutex_unlock(&dra_mutex);
94 
95 	rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id);
96 	if (rc)
97 		return rc;
98 
99 	rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, &cfg->dsnctl);
100 	if (rc)
101 		return rc;
102 
103 	cfg->version  = CXL_XSL_CONFIG_CURRENT_VERSION;
104 	cfg->log_bar_size = CXL_CAPI_WINDOW_LOG_SIZE;
105 	cfg->bar_addr = CXL_CAPI_WINDOW_START;
106 	cfg->dra = dummy_read_addr;
107 	return 0;
108 }
109 EXPORT_SYMBOL_GPL(cxllib_get_xsl_config);
110 
111 int cxllib_switch_phb_mode(struct pci_dev *dev, enum cxllib_mode mode,
112 			unsigned long flags)
113 {
114 	int rc = 0;
115 
116 	if (!cpu_has_feature(CPU_FTR_HVMODE))
117 		return -EINVAL;
118 
119 	switch (mode) {
120 	case CXL_MODE_PCI:
121 		/*
122 		 * We currently don't support going back to PCI mode
123 		 * However, we'll turn the invalidations off, so that
124 		 * the firmware doesn't have to ack them and can do
125 		 * things like reset, etc.. with no worries.
126 		 * So always return EPERM (can't go back to PCI) or
127 		 * EBUSY if we couldn't even turn off snooping
128 		 */
129 		rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_OFF);
130 		if (rc)
131 			rc = -EBUSY;
132 		else
133 			rc = -EPERM;
134 		break;
135 	case CXL_MODE_CXL:
136 		/* DMA only supported on TVT1 for the time being */
137 		if (flags != CXL_MODE_DMA_TVT1)
138 			return -EINVAL;
139 		rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_DMA_TVT1);
140 		if (rc)
141 			return rc;
142 		rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_ON);
143 		break;
144 	default:
145 		rc = -EINVAL;
146 	}
147 	return rc;
148 }
149 EXPORT_SYMBOL_GPL(cxllib_switch_phb_mode);
150 
151 /*
152  * When switching the PHB to capi mode, the TVT#1 entry for
153  * the Partitionable Endpoint is set in bypass mode, like
154  * in PCI mode.
155  * Configure the device dma to use TVT#1, which is done
156  * by calling dma_set_mask() with a mask large enough.
157  */
158 int cxllib_set_device_dma(struct pci_dev *dev, unsigned long flags)
159 {
160 	int rc;
161 
162 	if (flags)
163 		return -EINVAL;
164 
165 	rc = dma_set_mask(&dev->dev, DMA_BIT_MASK(64));
166 	return rc;
167 }
168 EXPORT_SYMBOL_GPL(cxllib_set_device_dma);
169 
170 int cxllib_get_PE_attributes(struct task_struct *task,
171 			     unsigned long translation_mode,
172 			     struct cxllib_pe_attributes *attr)
173 {
174 	if (translation_mode != CXL_TRANSLATED_MODE &&
175 		translation_mode != CXL_REAL_MODE)
176 		return -EINVAL;
177 
178 	attr->sr = cxl_calculate_sr(false,
179 				task == NULL,
180 				translation_mode == CXL_REAL_MODE,
181 				true);
182 	attr->lpid = mfspr(SPRN_LPID);
183 	if (task) {
184 		struct mm_struct *mm = get_task_mm(task);
185 		if (mm == NULL)
186 			return -EINVAL;
187 		/*
188 		 * Caller is keeping a reference on mm_users for as long
189 		 * as XSL uses the memory context
190 		 */
191 		attr->pid = mm->context.id;
192 		mmput(mm);
193 		attr->tid = task->thread.tidr;
194 	} else {
195 		attr->pid = 0;
196 		attr->tid = 0;
197 	}
198 	return 0;
199 }
200 EXPORT_SYMBOL_GPL(cxllib_get_PE_attributes);
201 
202 static int get_vma_info(struct mm_struct *mm, u64 addr,
203 			u64 *vma_start, u64 *vma_end,
204 			unsigned long *page_size)
205 {
206 	struct vm_area_struct *vma = NULL;
207 	int rc = 0;
208 
209 	mmap_read_lock(mm);
210 
211 	vma = find_vma(mm, addr);
212 	if (!vma) {
213 		rc = -EFAULT;
214 		goto out;
215 	}
216 	*page_size = vma_kernel_pagesize(vma);
217 	*vma_start = vma->vm_start;
218 	*vma_end = vma->vm_end;
219 out:
220 	mmap_read_unlock(mm);
221 	return rc;
222 }
223 
224 int cxllib_handle_fault(struct mm_struct *mm, u64 addr, u64 size, u64 flags)
225 {
226 	int rc;
227 	u64 dar, vma_start, vma_end;
228 	unsigned long page_size;
229 
230 	if (mm == NULL)
231 		return -EFAULT;
232 
233 	/*
234 	 * The buffer we have to process can extend over several pages
235 	 * and may also cover several VMAs.
236 	 * We iterate over all the pages. The page size could vary
237 	 * between VMAs.
238 	 */
239 	rc = get_vma_info(mm, addr, &vma_start, &vma_end, &page_size);
240 	if (rc)
241 		return rc;
242 
243 	for (dar = (addr & ~(page_size - 1)); dar < (addr + size);
244 	     dar += page_size) {
245 		if (dar < vma_start || dar >= vma_end) {
246 			/*
247 			 * We don't hold mm->mmap_lock while iterating, since
248 			 * the lock is required by one of the lower-level page
249 			 * fault processing functions and it could
250 			 * create a deadlock.
251 			 *
252 			 * It means the VMAs can be altered between 2
253 			 * loop iterations and we could theoretically
254 			 * miss a page (however unlikely). But that's
255 			 * not really a problem, as the driver will
256 			 * retry access, get another page fault on the
257 			 * missing page and call us again.
258 			 */
259 			rc = get_vma_info(mm, dar, &vma_start, &vma_end,
260 					&page_size);
261 			if (rc)
262 				return rc;
263 		}
264 
265 		rc = cxl_handle_mm_fault(mm, flags, dar);
266 		if (rc)
267 			return -EFAULT;
268 	}
269 	return 0;
270 }
271 EXPORT_SYMBOL_GPL(cxllib_handle_fault);
272