1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright 2017 IBM Corp. 4 */ 5 6 #include <linux/hugetlb.h> 7 #include <linux/sched/mm.h> 8 #include <asm/pnv-pci.h> 9 #include <misc/cxllib.h> 10 11 #include "cxl.h" 12 13 #define CXL_INVALID_DRA ~0ull 14 #define CXL_DUMMY_READ_SIZE 128 15 #define CXL_DUMMY_READ_ALIGN 8 16 #define CXL_CAPI_WINDOW_START 0x2000000000000ull 17 #define CXL_CAPI_WINDOW_LOG_SIZE 48 18 #define CXL_XSL_CONFIG_CURRENT_VERSION CXL_XSL_CONFIG_VERSION1 19 20 21 bool cxllib_slot_is_supported(struct pci_dev *dev, unsigned long flags) 22 { 23 int rc; 24 u32 phb_index; 25 u64 chip_id, capp_unit_id; 26 27 /* No flags currently supported */ 28 if (flags) 29 return false; 30 31 if (!cpu_has_feature(CPU_FTR_HVMODE)) 32 return false; 33 34 if (!cxl_is_power9()) 35 return false; 36 37 if (cxl_slot_is_switched(dev)) 38 return false; 39 40 /* on p9, some pci slots are not connected to a CAPP unit */ 41 rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id); 42 if (rc) 43 return false; 44 45 return true; 46 } 47 EXPORT_SYMBOL_GPL(cxllib_slot_is_supported); 48 49 static DEFINE_MUTEX(dra_mutex); 50 static u64 dummy_read_addr = CXL_INVALID_DRA; 51 52 static int allocate_dummy_read_buf(void) 53 { 54 u64 buf, vaddr; 55 size_t buf_size; 56 57 /* 58 * Dummy read buffer is 128-byte long, aligned on a 59 * 256-byte boundary and we need the physical address. 60 */ 61 buf_size = CXL_DUMMY_READ_SIZE + (1ull << CXL_DUMMY_READ_ALIGN); 62 buf = (u64) kzalloc(buf_size, GFP_KERNEL); 63 if (!buf) 64 return -ENOMEM; 65 66 vaddr = (buf + (1ull << CXL_DUMMY_READ_ALIGN) - 1) & 67 (~0ull << CXL_DUMMY_READ_ALIGN); 68 69 WARN((vaddr + CXL_DUMMY_READ_SIZE) > (buf + buf_size), 70 "Dummy read buffer alignment issue"); 71 dummy_read_addr = virt_to_phys((void *) vaddr); 72 return 0; 73 } 74 75 int cxllib_get_xsl_config(struct pci_dev *dev, struct cxllib_xsl_config *cfg) 76 { 77 int rc; 78 u32 phb_index; 79 u64 chip_id, capp_unit_id; 80 81 if (!cpu_has_feature(CPU_FTR_HVMODE)) 82 return -EINVAL; 83 84 mutex_lock(&dra_mutex); 85 if (dummy_read_addr == CXL_INVALID_DRA) { 86 rc = allocate_dummy_read_buf(); 87 if (rc) { 88 mutex_unlock(&dra_mutex); 89 return rc; 90 } 91 } 92 mutex_unlock(&dra_mutex); 93 94 rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id); 95 if (rc) 96 return rc; 97 98 rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, &cfg->dsnctl); 99 if (rc) 100 return rc; 101 102 cfg->version = CXL_XSL_CONFIG_CURRENT_VERSION; 103 cfg->log_bar_size = CXL_CAPI_WINDOW_LOG_SIZE; 104 cfg->bar_addr = CXL_CAPI_WINDOW_START; 105 cfg->dra = dummy_read_addr; 106 return 0; 107 } 108 EXPORT_SYMBOL_GPL(cxllib_get_xsl_config); 109 110 int cxllib_switch_phb_mode(struct pci_dev *dev, enum cxllib_mode mode, 111 unsigned long flags) 112 { 113 int rc = 0; 114 115 if (!cpu_has_feature(CPU_FTR_HVMODE)) 116 return -EINVAL; 117 118 switch (mode) { 119 case CXL_MODE_PCI: 120 /* 121 * We currently don't support going back to PCI mode 122 * However, we'll turn the invalidations off, so that 123 * the firmware doesn't have to ack them and can do 124 * things like reset, etc.. with no worries. 125 * So always return EPERM (can't go back to PCI) or 126 * EBUSY if we couldn't even turn off snooping 127 */ 128 rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_OFF); 129 if (rc) 130 rc = -EBUSY; 131 else 132 rc = -EPERM; 133 break; 134 case CXL_MODE_CXL: 135 /* DMA only supported on TVT1 for the time being */ 136 if (flags != CXL_MODE_DMA_TVT1) 137 return -EINVAL; 138 rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_DMA_TVT1); 139 if (rc) 140 return rc; 141 rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_ON); 142 break; 143 default: 144 rc = -EINVAL; 145 } 146 return rc; 147 } 148 EXPORT_SYMBOL_GPL(cxllib_switch_phb_mode); 149 150 /* 151 * When switching the PHB to capi mode, the TVT#1 entry for 152 * the Partitionable Endpoint is set in bypass mode, like 153 * in PCI mode. 154 * Configure the device dma to use TVT#1, which is done 155 * by calling dma_set_mask() with a mask large enough. 156 */ 157 int cxllib_set_device_dma(struct pci_dev *dev, unsigned long flags) 158 { 159 int rc; 160 161 if (flags) 162 return -EINVAL; 163 164 rc = dma_set_mask(&dev->dev, DMA_BIT_MASK(64)); 165 return rc; 166 } 167 EXPORT_SYMBOL_GPL(cxllib_set_device_dma); 168 169 int cxllib_get_PE_attributes(struct task_struct *task, 170 unsigned long translation_mode, 171 struct cxllib_pe_attributes *attr) 172 { 173 struct mm_struct *mm = NULL; 174 175 if (translation_mode != CXL_TRANSLATED_MODE && 176 translation_mode != CXL_REAL_MODE) 177 return -EINVAL; 178 179 attr->sr = cxl_calculate_sr(false, 180 task == NULL, 181 translation_mode == CXL_REAL_MODE, 182 true); 183 attr->lpid = mfspr(SPRN_LPID); 184 if (task) { 185 mm = get_task_mm(task); 186 if (mm == NULL) 187 return -EINVAL; 188 /* 189 * Caller is keeping a reference on mm_users for as long 190 * as XSL uses the memory context 191 */ 192 attr->pid = mm->context.id; 193 mmput(mm); 194 attr->tid = task->thread.tidr; 195 } else { 196 attr->pid = 0; 197 attr->tid = 0; 198 } 199 return 0; 200 } 201 EXPORT_SYMBOL_GPL(cxllib_get_PE_attributes); 202 203 static int get_vma_info(struct mm_struct *mm, u64 addr, 204 u64 *vma_start, u64 *vma_end, 205 unsigned long *page_size) 206 { 207 struct vm_area_struct *vma = NULL; 208 int rc = 0; 209 210 mmap_read_lock(mm); 211 212 vma = find_vma(mm, addr); 213 if (!vma) { 214 rc = -EFAULT; 215 goto out; 216 } 217 *page_size = vma_kernel_pagesize(vma); 218 *vma_start = vma->vm_start; 219 *vma_end = vma->vm_end; 220 out: 221 mmap_read_unlock(mm); 222 return rc; 223 } 224 225 int cxllib_handle_fault(struct mm_struct *mm, u64 addr, u64 size, u64 flags) 226 { 227 int rc; 228 u64 dar, vma_start, vma_end; 229 unsigned long page_size; 230 231 if (mm == NULL) 232 return -EFAULT; 233 234 /* 235 * The buffer we have to process can extend over several pages 236 * and may also cover several VMAs. 237 * We iterate over all the pages. The page size could vary 238 * between VMAs. 239 */ 240 rc = get_vma_info(mm, addr, &vma_start, &vma_end, &page_size); 241 if (rc) 242 return rc; 243 244 for (dar = (addr & ~(page_size - 1)); dar < (addr + size); 245 dar += page_size) { 246 if (dar < vma_start || dar >= vma_end) { 247 /* 248 * We don't hold mm->mmap_lock while iterating, since 249 * the lock is required by one of the lower-level page 250 * fault processing functions and it could 251 * create a deadlock. 252 * 253 * It means the VMAs can be altered between 2 254 * loop iterations and we could theoretically 255 * miss a page (however unlikely). But that's 256 * not really a problem, as the driver will 257 * retry access, get another page fault on the 258 * missing page and call us again. 259 */ 260 rc = get_vma_info(mm, dar, &vma_start, &vma_end, 261 &page_size); 262 if (rc) 263 return rc; 264 } 265 266 rc = cxl_handle_mm_fault(mm, flags, dar); 267 if (rc) 268 return -EFAULT; 269 } 270 return 0; 271 } 272 EXPORT_SYMBOL_GPL(cxllib_handle_fault); 273