1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2018 Christoph Hellwig. 4 * 5 * DMA operations that map physical memory directly without using an IOMMU. 6 */ 7 #include <linux/memblock.h> /* for max_pfn */ 8 #include <linux/export.h> 9 #include <linux/mm.h> 10 #include <linux/dma-direct.h> 11 #include <linux/scatterlist.h> 12 #include <linux/dma-contiguous.h> 13 #include <linux/dma-noncoherent.h> 14 #include <linux/pfn.h> 15 #include <linux/set_memory.h> 16 #include <linux/swiotlb.h> 17 18 /* 19 * Most architectures use ZONE_DMA for the first 16 Megabytes, but 20 * some use it for entirely different regions: 21 */ 22 #ifndef ARCH_ZONE_DMA_BITS 23 #define ARCH_ZONE_DMA_BITS 24 24 #endif 25 26 static void report_addr(struct device *dev, dma_addr_t dma_addr, size_t size) 27 { 28 if (!dev->dma_mask) { 29 dev_err_once(dev, "DMA map on device without dma_mask\n"); 30 } else if (*dev->dma_mask >= DMA_BIT_MASK(32) || dev->bus_dma_mask) { 31 dev_err_once(dev, 32 "overflow %pad+%zu of DMA mask %llx bus mask %llx\n", 33 &dma_addr, size, *dev->dma_mask, dev->bus_dma_mask); 34 } 35 WARN_ON_ONCE(1); 36 } 37 38 static inline dma_addr_t phys_to_dma_direct(struct device *dev, 39 phys_addr_t phys) 40 { 41 if (force_dma_unencrypted(dev)) 42 return __phys_to_dma(dev, phys); 43 return phys_to_dma(dev, phys); 44 } 45 46 u64 dma_direct_get_required_mask(struct device *dev) 47 { 48 u64 max_dma = phys_to_dma_direct(dev, (max_pfn - 1) << PAGE_SHIFT); 49 50 return (1ULL << (fls64(max_dma) - 1)) * 2 - 1; 51 } 52 53 static gfp_t __dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, 54 u64 *phys_mask) 55 { 56 if (dev->bus_dma_mask && dev->bus_dma_mask < dma_mask) 57 dma_mask = dev->bus_dma_mask; 58 59 if (force_dma_unencrypted(dev)) 60 *phys_mask = __dma_to_phys(dev, dma_mask); 61 else 62 *phys_mask = dma_to_phys(dev, dma_mask); 63 64 /* 65 * Optimistically try the zone that the physical address mask falls 66 * into first. If that returns memory that isn't actually addressable 67 * we will fallback to the next lower zone and try again. 68 * 69 * Note that GFP_DMA32 and GFP_DMA are no ops without the corresponding 70 * zones. 71 */ 72 if (*phys_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS)) 73 return GFP_DMA; 74 if (*phys_mask <= DMA_BIT_MASK(32)) 75 return GFP_DMA32; 76 return 0; 77 } 78 79 static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) 80 { 81 return phys_to_dma_direct(dev, phys) + size - 1 <= 82 min_not_zero(dev->coherent_dma_mask, dev->bus_dma_mask); 83 } 84 85 struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, 86 dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) 87 { 88 size_t alloc_size = PAGE_ALIGN(size); 89 int node = dev_to_node(dev); 90 struct page *page = NULL; 91 u64 phys_mask; 92 93 if (attrs & DMA_ATTR_NO_WARN) 94 gfp |= __GFP_NOWARN; 95 96 /* we always manually zero the memory once we are done: */ 97 gfp &= ~__GFP_ZERO; 98 gfp |= __dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask, 99 &phys_mask); 100 page = dma_alloc_contiguous(dev, alloc_size, gfp); 101 if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { 102 dma_free_contiguous(dev, page, alloc_size); 103 page = NULL; 104 } 105 again: 106 if (!page) 107 page = alloc_pages_node(node, gfp, get_order(alloc_size)); 108 if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { 109 dma_free_contiguous(dev, page, size); 110 page = NULL; 111 112 if (IS_ENABLED(CONFIG_ZONE_DMA32) && 113 phys_mask < DMA_BIT_MASK(64) && 114 !(gfp & (GFP_DMA32 | GFP_DMA))) { 115 gfp |= GFP_DMA32; 116 goto again; 117 } 118 119 if (IS_ENABLED(CONFIG_ZONE_DMA) && !(gfp & GFP_DMA)) { 120 gfp = (gfp & ~GFP_DMA32) | GFP_DMA; 121 goto again; 122 } 123 } 124 125 return page; 126 } 127 128 void *dma_direct_alloc_pages(struct device *dev, size_t size, 129 dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) 130 { 131 struct page *page; 132 void *ret; 133 134 page = __dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs); 135 if (!page) 136 return NULL; 137 138 if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) && 139 !force_dma_unencrypted(dev)) { 140 /* remove any dirty cache lines on the kernel alias */ 141 if (!PageHighMem(page)) 142 arch_dma_prep_coherent(page, size); 143 *dma_handle = phys_to_dma(dev, page_to_phys(page)); 144 /* return the page pointer as the opaque cookie */ 145 return page; 146 } 147 148 if (PageHighMem(page)) { 149 /* 150 * Depending on the cma= arguments and per-arch setup 151 * dma_alloc_contiguous could return highmem pages. 152 * Without remapping there is no way to return them here, 153 * so log an error and fail. 154 */ 155 dev_info(dev, "Rejecting highmem page from CMA.\n"); 156 __dma_direct_free_pages(dev, size, page); 157 return NULL; 158 } 159 160 ret = page_address(page); 161 if (force_dma_unencrypted(dev)) { 162 set_memory_decrypted((unsigned long)ret, 1 << get_order(size)); 163 *dma_handle = __phys_to_dma(dev, page_to_phys(page)); 164 } else { 165 *dma_handle = phys_to_dma(dev, page_to_phys(page)); 166 } 167 memset(ret, 0, size); 168 169 if (IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) && 170 dma_alloc_need_uncached(dev, attrs)) { 171 arch_dma_prep_coherent(page, size); 172 ret = uncached_kernel_address(ret); 173 } 174 175 return ret; 176 } 177 178 void __dma_direct_free_pages(struct device *dev, size_t size, struct page *page) 179 { 180 dma_free_contiguous(dev, page, size); 181 } 182 183 void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr, 184 dma_addr_t dma_addr, unsigned long attrs) 185 { 186 unsigned int page_order = get_order(size); 187 188 if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) && 189 !force_dma_unencrypted(dev)) { 190 /* cpu_addr is a struct page cookie, not a kernel address */ 191 __dma_direct_free_pages(dev, size, cpu_addr); 192 return; 193 } 194 195 if (force_dma_unencrypted(dev)) 196 set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order); 197 198 if (IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) && 199 dma_alloc_need_uncached(dev, attrs)) 200 cpu_addr = cached_kernel_address(cpu_addr); 201 __dma_direct_free_pages(dev, size, virt_to_page(cpu_addr)); 202 } 203 204 void *dma_direct_alloc(struct device *dev, size_t size, 205 dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) 206 { 207 if (!IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) && 208 dma_alloc_need_uncached(dev, attrs)) 209 return arch_dma_alloc(dev, size, dma_handle, gfp, attrs); 210 return dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs); 211 } 212 213 void dma_direct_free(struct device *dev, size_t size, 214 void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs) 215 { 216 if (!IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) && 217 dma_alloc_need_uncached(dev, attrs)) 218 arch_dma_free(dev, size, cpu_addr, dma_addr, attrs); 219 else 220 dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs); 221 } 222 223 #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ 224 defined(CONFIG_SWIOTLB) 225 void dma_direct_sync_single_for_device(struct device *dev, 226 dma_addr_t addr, size_t size, enum dma_data_direction dir) 227 { 228 phys_addr_t paddr = dma_to_phys(dev, addr); 229 230 if (unlikely(is_swiotlb_buffer(paddr))) 231 swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_DEVICE); 232 233 if (!dev_is_dma_coherent(dev)) 234 arch_sync_dma_for_device(dev, paddr, size, dir); 235 } 236 EXPORT_SYMBOL(dma_direct_sync_single_for_device); 237 238 void dma_direct_sync_sg_for_device(struct device *dev, 239 struct scatterlist *sgl, int nents, enum dma_data_direction dir) 240 { 241 struct scatterlist *sg; 242 int i; 243 244 for_each_sg(sgl, sg, nents, i) { 245 phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg)); 246 247 if (unlikely(is_swiotlb_buffer(paddr))) 248 swiotlb_tbl_sync_single(dev, paddr, sg->length, 249 dir, SYNC_FOR_DEVICE); 250 251 if (!dev_is_dma_coherent(dev)) 252 arch_sync_dma_for_device(dev, paddr, sg->length, 253 dir); 254 } 255 } 256 EXPORT_SYMBOL(dma_direct_sync_sg_for_device); 257 #endif 258 259 #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ 260 defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \ 261 defined(CONFIG_SWIOTLB) 262 void dma_direct_sync_single_for_cpu(struct device *dev, 263 dma_addr_t addr, size_t size, enum dma_data_direction dir) 264 { 265 phys_addr_t paddr = dma_to_phys(dev, addr); 266 267 if (!dev_is_dma_coherent(dev)) { 268 arch_sync_dma_for_cpu(dev, paddr, size, dir); 269 arch_sync_dma_for_cpu_all(dev); 270 } 271 272 if (unlikely(is_swiotlb_buffer(paddr))) 273 swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU); 274 } 275 EXPORT_SYMBOL(dma_direct_sync_single_for_cpu); 276 277 void dma_direct_sync_sg_for_cpu(struct device *dev, 278 struct scatterlist *sgl, int nents, enum dma_data_direction dir) 279 { 280 struct scatterlist *sg; 281 int i; 282 283 for_each_sg(sgl, sg, nents, i) { 284 phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg)); 285 286 if (!dev_is_dma_coherent(dev)) 287 arch_sync_dma_for_cpu(dev, paddr, sg->length, dir); 288 289 if (unlikely(is_swiotlb_buffer(paddr))) 290 swiotlb_tbl_sync_single(dev, paddr, sg->length, dir, 291 SYNC_FOR_CPU); 292 } 293 294 if (!dev_is_dma_coherent(dev)) 295 arch_sync_dma_for_cpu_all(dev); 296 } 297 EXPORT_SYMBOL(dma_direct_sync_sg_for_cpu); 298 299 void dma_direct_unmap_page(struct device *dev, dma_addr_t addr, 300 size_t size, enum dma_data_direction dir, unsigned long attrs) 301 { 302 phys_addr_t phys = dma_to_phys(dev, addr); 303 304 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) 305 dma_direct_sync_single_for_cpu(dev, addr, size, dir); 306 307 if (unlikely(is_swiotlb_buffer(phys))) 308 swiotlb_tbl_unmap_single(dev, phys, size, size, dir, attrs); 309 } 310 EXPORT_SYMBOL(dma_direct_unmap_page); 311 312 void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, 313 int nents, enum dma_data_direction dir, unsigned long attrs) 314 { 315 struct scatterlist *sg; 316 int i; 317 318 for_each_sg(sgl, sg, nents, i) 319 dma_direct_unmap_page(dev, sg->dma_address, sg_dma_len(sg), dir, 320 attrs); 321 } 322 EXPORT_SYMBOL(dma_direct_unmap_sg); 323 #endif 324 325 static inline bool dma_direct_possible(struct device *dev, dma_addr_t dma_addr, 326 size_t size) 327 { 328 return swiotlb_force != SWIOTLB_FORCE && 329 dma_capable(dev, dma_addr, size); 330 } 331 332 dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, 333 unsigned long offset, size_t size, enum dma_data_direction dir, 334 unsigned long attrs) 335 { 336 phys_addr_t phys = page_to_phys(page) + offset; 337 dma_addr_t dma_addr = phys_to_dma(dev, phys); 338 339 if (unlikely(!dma_direct_possible(dev, dma_addr, size)) && 340 !swiotlb_map(dev, &phys, &dma_addr, size, dir, attrs)) { 341 report_addr(dev, dma_addr, size); 342 return DMA_MAPPING_ERROR; 343 } 344 345 if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) 346 arch_sync_dma_for_device(dev, phys, size, dir); 347 return dma_addr; 348 } 349 EXPORT_SYMBOL(dma_direct_map_page); 350 351 int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, 352 enum dma_data_direction dir, unsigned long attrs) 353 { 354 int i; 355 struct scatterlist *sg; 356 357 for_each_sg(sgl, sg, nents, i) { 358 sg->dma_address = dma_direct_map_page(dev, sg_page(sg), 359 sg->offset, sg->length, dir, attrs); 360 if (sg->dma_address == DMA_MAPPING_ERROR) 361 goto out_unmap; 362 sg_dma_len(sg) = sg->length; 363 } 364 365 return nents; 366 367 out_unmap: 368 dma_direct_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); 369 return 0; 370 } 371 EXPORT_SYMBOL(dma_direct_map_sg); 372 373 dma_addr_t dma_direct_map_resource(struct device *dev, phys_addr_t paddr, 374 size_t size, enum dma_data_direction dir, unsigned long attrs) 375 { 376 dma_addr_t dma_addr = paddr; 377 378 if (unlikely(!dma_direct_possible(dev, dma_addr, size))) { 379 report_addr(dev, dma_addr, size); 380 return DMA_MAPPING_ERROR; 381 } 382 383 return dma_addr; 384 } 385 EXPORT_SYMBOL(dma_direct_map_resource); 386 387 /* 388 * Because 32-bit DMA masks are so common we expect every architecture to be 389 * able to satisfy them - either by not supporting more physical memory, or by 390 * providing a ZONE_DMA32. If neither is the case, the architecture needs to 391 * use an IOMMU instead of the direct mapping. 392 */ 393 int dma_direct_supported(struct device *dev, u64 mask) 394 { 395 u64 min_mask; 396 397 if (IS_ENABLED(CONFIG_ZONE_DMA)) 398 min_mask = DMA_BIT_MASK(ARCH_ZONE_DMA_BITS); 399 else 400 min_mask = DMA_BIT_MASK(32); 401 402 min_mask = min_t(u64, min_mask, (max_pfn - 1) << PAGE_SHIFT); 403 404 /* 405 * This check needs to be against the actual bit mask value, so 406 * use __phys_to_dma() here so that the SME encryption mask isn't 407 * part of the check. 408 */ 409 return mask >= __phys_to_dma(dev, min_mask); 410 } 411 412 size_t dma_direct_max_mapping_size(struct device *dev) 413 { 414 /* If SWIOTLB is active, use its maximum mapping size */ 415 if (is_swiotlb_active() && 416 (dma_addressing_limited(dev) || swiotlb_force == SWIOTLB_FORCE)) 417 return swiotlb_max_mapping_size(dev); 418 return SIZE_MAX; 419 } 420