1 // SPDX-License-Identifier: GPL-2.0 OR MIT 2 /************************************************************************** 3 * 4 * Copyright 2019 VMware, Inc., Palo Alto, CA., USA 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 21 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 24 * USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 #include "vmwgfx_drv.h" 28 29 /* 30 * Different methods for tracking dirty: 31 * VMW_BO_DIRTY_PAGETABLE - Scan the pagetable for hardware dirty bits 32 * VMW_BO_DIRTY_MKWRITE - Write-protect page table entries and record write- 33 * accesses in the VM mkwrite() callback 34 */ 35 enum vmw_bo_dirty_method { 36 VMW_BO_DIRTY_PAGETABLE, 37 VMW_BO_DIRTY_MKWRITE, 38 }; 39 40 /* 41 * No dirtied pages at scan trigger a transition to the _MKWRITE method, 42 * similarly a certain percentage of dirty pages trigger a transition to 43 * the _PAGETABLE method. How many triggers should we wait for before 44 * changing method? 45 */ 46 #define VMW_DIRTY_NUM_CHANGE_TRIGGERS 2 47 48 /* Percentage to trigger a transition to the _PAGETABLE method */ 49 #define VMW_DIRTY_PERCENTAGE 10 50 51 /** 52 * struct vmw_bo_dirty - Dirty information for buffer objects 53 * @start: First currently dirty bit 54 * @end: Last currently dirty bit + 1 55 * @method: The currently used dirty method 56 * @change_count: Number of consecutive method change triggers 57 * @ref_count: Reference count for this structure 58 * @bitmap_size: The size of the bitmap in bits. Typically equal to the 59 * nuber of pages in the bo. 60 * @size: The accounting size for this struct. 61 * @bitmap: A bitmap where each bit represents a page. A set bit means a 62 * dirty page. 63 */ 64 struct vmw_bo_dirty { 65 unsigned long start; 66 unsigned long end; 67 enum vmw_bo_dirty_method method; 68 unsigned int change_count; 69 unsigned int ref_count; 70 unsigned long bitmap_size; 71 size_t size; 72 unsigned long bitmap[]; 73 }; 74 75 /** 76 * vmw_bo_dirty_scan_pagetable - Perform a pagetable scan for dirty bits 77 * @vbo: The buffer object to scan 78 * 79 * Scans the pagetable for dirty bits. Clear those bits and modify the 80 * dirty structure with the results. This function may change the 81 * dirty-tracking method. 82 */ 83 static void vmw_bo_dirty_scan_pagetable(struct vmw_buffer_object *vbo) 84 { 85 struct vmw_bo_dirty *dirty = vbo->dirty; 86 pgoff_t offset = drm_vma_node_start(&vbo->base.base.vma_node); 87 struct address_space *mapping = vbo->base.bdev->dev_mapping; 88 pgoff_t num_marked; 89 90 num_marked = clean_record_shared_mapping_range 91 (mapping, 92 offset, dirty->bitmap_size, 93 offset, &dirty->bitmap[0], 94 &dirty->start, &dirty->end); 95 if (num_marked == 0) 96 dirty->change_count++; 97 else 98 dirty->change_count = 0; 99 100 if (dirty->change_count > VMW_DIRTY_NUM_CHANGE_TRIGGERS) { 101 dirty->change_count = 0; 102 dirty->method = VMW_BO_DIRTY_MKWRITE; 103 wp_shared_mapping_range(mapping, 104 offset, dirty->bitmap_size); 105 clean_record_shared_mapping_range(mapping, 106 offset, dirty->bitmap_size, 107 offset, &dirty->bitmap[0], 108 &dirty->start, &dirty->end); 109 } 110 } 111 112 /** 113 * vmw_bo_dirty_scan_mkwrite - Reset the mkwrite dirty-tracking method 114 * @vbo: The buffer object to scan 115 * 116 * Write-protect pages written to so that consecutive write accesses will 117 * trigger a call to mkwrite. 118 * 119 * This function may change the dirty-tracking method. 120 */ 121 static void vmw_bo_dirty_scan_mkwrite(struct vmw_buffer_object *vbo) 122 { 123 struct vmw_bo_dirty *dirty = vbo->dirty; 124 unsigned long offset = drm_vma_node_start(&vbo->base.base.vma_node); 125 struct address_space *mapping = vbo->base.bdev->dev_mapping; 126 pgoff_t num_marked; 127 128 if (dirty->end <= dirty->start) 129 return; 130 131 num_marked = wp_shared_mapping_range(vbo->base.bdev->dev_mapping, 132 dirty->start + offset, 133 dirty->end - dirty->start); 134 135 if (100UL * num_marked / dirty->bitmap_size > 136 VMW_DIRTY_PERCENTAGE) { 137 dirty->change_count++; 138 } else { 139 dirty->change_count = 0; 140 } 141 142 if (dirty->change_count > VMW_DIRTY_NUM_CHANGE_TRIGGERS) { 143 pgoff_t start = 0; 144 pgoff_t end = dirty->bitmap_size; 145 146 dirty->method = VMW_BO_DIRTY_PAGETABLE; 147 clean_record_shared_mapping_range(mapping, offset, end, offset, 148 &dirty->bitmap[0], 149 &start, &end); 150 bitmap_clear(&dirty->bitmap[0], 0, dirty->bitmap_size); 151 if (dirty->start < dirty->end) 152 bitmap_set(&dirty->bitmap[0], dirty->start, 153 dirty->end - dirty->start); 154 dirty->change_count = 0; 155 } 156 } 157 158 /** 159 * vmw_bo_dirty_scan - Scan for dirty pages and add them to the dirty 160 * tracking structure 161 * @vbo: The buffer object to scan 162 * 163 * This function may change the dirty tracking method. 164 */ 165 void vmw_bo_dirty_scan(struct vmw_buffer_object *vbo) 166 { 167 struct vmw_bo_dirty *dirty = vbo->dirty; 168 169 if (dirty->method == VMW_BO_DIRTY_PAGETABLE) 170 vmw_bo_dirty_scan_pagetable(vbo); 171 else 172 vmw_bo_dirty_scan_mkwrite(vbo); 173 } 174 175 /** 176 * vmw_bo_dirty_pre_unmap - write-protect and pick up dirty pages before 177 * an unmap_mapping_range operation. 178 * @vbo: The buffer object, 179 * @start: First page of the range within the buffer object. 180 * @end: Last page of the range within the buffer object + 1. 181 * 182 * If we're using the _PAGETABLE scan method, we may leak dirty pages 183 * when calling unmap_mapping_range(). This function makes sure we pick 184 * up all dirty pages. 185 */ 186 static void vmw_bo_dirty_pre_unmap(struct vmw_buffer_object *vbo, 187 pgoff_t start, pgoff_t end) 188 { 189 struct vmw_bo_dirty *dirty = vbo->dirty; 190 unsigned long offset = drm_vma_node_start(&vbo->base.base.vma_node); 191 struct address_space *mapping = vbo->base.bdev->dev_mapping; 192 193 if (dirty->method != VMW_BO_DIRTY_PAGETABLE || start >= end) 194 return; 195 196 wp_shared_mapping_range(mapping, start + offset, end - start); 197 clean_record_shared_mapping_range(mapping, start + offset, 198 end - start, offset, 199 &dirty->bitmap[0], &dirty->start, 200 &dirty->end); 201 } 202 203 /** 204 * vmw_bo_dirty_unmap - Clear all ptes pointing to a range within a bo 205 * @vbo: The buffer object, 206 * @start: First page of the range within the buffer object. 207 * @end: Last page of the range within the buffer object + 1. 208 * 209 * This is similar to ttm_bo_unmap_virtual() except it takes a subrange. 210 */ 211 void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo, 212 pgoff_t start, pgoff_t end) 213 { 214 unsigned long offset = drm_vma_node_start(&vbo->base.base.vma_node); 215 struct address_space *mapping = vbo->base.bdev->dev_mapping; 216 217 vmw_bo_dirty_pre_unmap(vbo, start, end); 218 unmap_shared_mapping_range(mapping, (offset + start) << PAGE_SHIFT, 219 (loff_t) (end - start) << PAGE_SHIFT); 220 } 221 222 /** 223 * vmw_bo_dirty_add - Add a dirty-tracking user to a buffer object 224 * @vbo: The buffer object 225 * 226 * This function registers a dirty-tracking user to a buffer object. 227 * A user can be for example a resource or a vma in a special user-space 228 * mapping. 229 * 230 * Return: Zero on success, -ENOMEM on memory allocation failure. 231 */ 232 int vmw_bo_dirty_add(struct vmw_buffer_object *vbo) 233 { 234 struct vmw_bo_dirty *dirty = vbo->dirty; 235 pgoff_t num_pages = vbo->base.num_pages; 236 size_t size, acc_size; 237 int ret; 238 static struct ttm_operation_ctx ctx = { 239 .interruptible = false, 240 .no_wait_gpu = false 241 }; 242 243 if (dirty) { 244 dirty->ref_count++; 245 return 0; 246 } 247 248 size = sizeof(*dirty) + BITS_TO_LONGS(num_pages) * sizeof(long); 249 acc_size = ttm_round_pot(size); 250 ret = ttm_mem_global_alloc(&ttm_mem_glob, acc_size, &ctx); 251 if (ret) { 252 VMW_DEBUG_USER("Out of graphics memory for buffer object " 253 "dirty tracker.\n"); 254 return ret; 255 } 256 dirty = kvzalloc(size, GFP_KERNEL); 257 if (!dirty) { 258 ret = -ENOMEM; 259 goto out_no_dirty; 260 } 261 262 dirty->size = acc_size; 263 dirty->bitmap_size = num_pages; 264 dirty->start = dirty->bitmap_size; 265 dirty->end = 0; 266 dirty->ref_count = 1; 267 if (num_pages < PAGE_SIZE / sizeof(pte_t)) { 268 dirty->method = VMW_BO_DIRTY_PAGETABLE; 269 } else { 270 struct address_space *mapping = vbo->base.bdev->dev_mapping; 271 pgoff_t offset = drm_vma_node_start(&vbo->base.base.vma_node); 272 273 dirty->method = VMW_BO_DIRTY_MKWRITE; 274 275 /* Write-protect and then pick up already dirty bits */ 276 wp_shared_mapping_range(mapping, offset, num_pages); 277 clean_record_shared_mapping_range(mapping, offset, num_pages, 278 offset, 279 &dirty->bitmap[0], 280 &dirty->start, &dirty->end); 281 } 282 283 vbo->dirty = dirty; 284 285 return 0; 286 287 out_no_dirty: 288 ttm_mem_global_free(&ttm_mem_glob, acc_size); 289 return ret; 290 } 291 292 /** 293 * vmw_bo_dirty_release - Release a dirty-tracking user from a buffer object 294 * @vbo: The buffer object 295 * 296 * This function releases a dirty-tracking user from a buffer object. 297 * If the reference count reaches zero, then the dirty-tracking object is 298 * freed and the pointer to it cleared. 299 * 300 * Return: Zero on success, -ENOMEM on memory allocation failure. 301 */ 302 void vmw_bo_dirty_release(struct vmw_buffer_object *vbo) 303 { 304 struct vmw_bo_dirty *dirty = vbo->dirty; 305 306 if (dirty && --dirty->ref_count == 0) { 307 size_t acc_size = dirty->size; 308 309 kvfree(dirty); 310 ttm_mem_global_free(&ttm_mem_glob, acc_size); 311 vbo->dirty = NULL; 312 } 313 } 314 315 /** 316 * vmw_bo_dirty_transfer_to_res - Pick up a resource's dirty region from 317 * its backing mob. 318 * @res: The resource 319 * 320 * This function will pick up all dirty ranges affecting the resource from 321 * it's backup mob, and call vmw_resource_dirty_update() once for each 322 * range. The transferred ranges will be cleared from the backing mob's 323 * dirty tracking. 324 */ 325 void vmw_bo_dirty_transfer_to_res(struct vmw_resource *res) 326 { 327 struct vmw_buffer_object *vbo = res->backup; 328 struct vmw_bo_dirty *dirty = vbo->dirty; 329 pgoff_t start, cur, end; 330 unsigned long res_start = res->backup_offset; 331 unsigned long res_end = res->backup_offset + res->backup_size; 332 333 WARN_ON_ONCE(res_start & ~PAGE_MASK); 334 res_start >>= PAGE_SHIFT; 335 res_end = DIV_ROUND_UP(res_end, PAGE_SIZE); 336 337 if (res_start >= dirty->end || res_end <= dirty->start) 338 return; 339 340 cur = max(res_start, dirty->start); 341 res_end = max(res_end, dirty->end); 342 while (cur < res_end) { 343 unsigned long num; 344 345 start = find_next_bit(&dirty->bitmap[0], res_end, cur); 346 if (start >= res_end) 347 break; 348 349 end = find_next_zero_bit(&dirty->bitmap[0], res_end, start + 1); 350 cur = end + 1; 351 num = end - start; 352 bitmap_clear(&dirty->bitmap[0], start, num); 353 vmw_resource_dirty_update(res, start, end); 354 } 355 356 if (res_start <= dirty->start && res_end > dirty->start) 357 dirty->start = res_end; 358 if (res_start < dirty->end && res_end >= dirty->end) 359 dirty->end = res_start; 360 } 361 362 /** 363 * vmw_bo_dirty_clear_res - Clear a resource's dirty region from 364 * its backing mob. 365 * @res: The resource 366 * 367 * This function will clear all dirty ranges affecting the resource from 368 * it's backup mob's dirty tracking. 369 */ 370 void vmw_bo_dirty_clear_res(struct vmw_resource *res) 371 { 372 unsigned long res_start = res->backup_offset; 373 unsigned long res_end = res->backup_offset + res->backup_size; 374 struct vmw_buffer_object *vbo = res->backup; 375 struct vmw_bo_dirty *dirty = vbo->dirty; 376 377 res_start >>= PAGE_SHIFT; 378 res_end = DIV_ROUND_UP(res_end, PAGE_SIZE); 379 380 if (res_start >= dirty->end || res_end <= dirty->start) 381 return; 382 383 res_start = max(res_start, dirty->start); 384 res_end = min(res_end, dirty->end); 385 bitmap_clear(&dirty->bitmap[0], res_start, res_end - res_start); 386 387 if (res_start <= dirty->start && res_end > dirty->start) 388 dirty->start = res_end; 389 if (res_start < dirty->end && res_end >= dirty->end) 390 dirty->end = res_start; 391 } 392 393 vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf) 394 { 395 struct vm_area_struct *vma = vmf->vma; 396 struct ttm_buffer_object *bo = (struct ttm_buffer_object *) 397 vma->vm_private_data; 398 vm_fault_t ret; 399 unsigned long page_offset; 400 unsigned int save_flags; 401 struct vmw_buffer_object *vbo = 402 container_of(bo, typeof(*vbo), base); 403 404 /* 405 * mkwrite() doesn't handle the VM_FAULT_RETRY return value correctly. 406 * So make sure the TTM helpers are aware. 407 */ 408 save_flags = vmf->flags; 409 vmf->flags &= ~FAULT_FLAG_ALLOW_RETRY; 410 ret = ttm_bo_vm_reserve(bo, vmf); 411 vmf->flags = save_flags; 412 if (ret) 413 return ret; 414 415 page_offset = vmf->pgoff - drm_vma_node_start(&bo->base.vma_node); 416 if (unlikely(page_offset >= bo->num_pages)) { 417 ret = VM_FAULT_SIGBUS; 418 goto out_unlock; 419 } 420 421 if (vbo->dirty && vbo->dirty->method == VMW_BO_DIRTY_MKWRITE && 422 !test_bit(page_offset, &vbo->dirty->bitmap[0])) { 423 struct vmw_bo_dirty *dirty = vbo->dirty; 424 425 __set_bit(page_offset, &dirty->bitmap[0]); 426 dirty->start = min(dirty->start, page_offset); 427 dirty->end = max(dirty->end, page_offset + 1); 428 } 429 430 out_unlock: 431 dma_resv_unlock(bo->base.resv); 432 return ret; 433 } 434 435 vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf) 436 { 437 struct vm_area_struct *vma = vmf->vma; 438 struct ttm_buffer_object *bo = (struct ttm_buffer_object *) 439 vma->vm_private_data; 440 struct vmw_buffer_object *vbo = 441 container_of(bo, struct vmw_buffer_object, base); 442 pgoff_t num_prefault; 443 pgprot_t prot; 444 vm_fault_t ret; 445 446 ret = ttm_bo_vm_reserve(bo, vmf); 447 if (ret) 448 return ret; 449 450 num_prefault = (vma->vm_flags & VM_RAND_READ) ? 1 : 451 TTM_BO_VM_NUM_PREFAULT; 452 453 if (vbo->dirty) { 454 pgoff_t allowed_prefault; 455 unsigned long page_offset; 456 457 page_offset = vmf->pgoff - 458 drm_vma_node_start(&bo->base.vma_node); 459 if (page_offset >= bo->num_pages || 460 vmw_resources_clean(vbo, page_offset, 461 page_offset + PAGE_SIZE, 462 &allowed_prefault)) { 463 ret = VM_FAULT_SIGBUS; 464 goto out_unlock; 465 } 466 467 num_prefault = min(num_prefault, allowed_prefault); 468 } 469 470 /* 471 * If we don't track dirty using the MKWRITE method, make sure 472 * sure the page protection is write-enabled so we don't get 473 * a lot of unnecessary write faults. 474 */ 475 if (vbo->dirty && vbo->dirty->method == VMW_BO_DIRTY_MKWRITE) 476 prot = vm_get_page_prot(vma->vm_flags & ~VM_SHARED); 477 else 478 prot = vm_get_page_prot(vma->vm_flags); 479 480 ret = ttm_bo_vm_fault_reserved(vmf, prot, num_prefault, 1); 481 if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) 482 return ret; 483 484 out_unlock: 485 dma_resv_unlock(bo->base.resv); 486 487 return ret; 488 } 489 490 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 491 vm_fault_t vmw_bo_vm_huge_fault(struct vm_fault *vmf, 492 enum page_entry_size pe_size) 493 { 494 struct vm_area_struct *vma = vmf->vma; 495 struct ttm_buffer_object *bo = (struct ttm_buffer_object *) 496 vma->vm_private_data; 497 struct vmw_buffer_object *vbo = 498 container_of(bo, struct vmw_buffer_object, base); 499 pgprot_t prot; 500 vm_fault_t ret; 501 pgoff_t fault_page_size; 502 bool write = vmf->flags & FAULT_FLAG_WRITE; 503 bool is_cow_mapping = 504 (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; 505 506 switch (pe_size) { 507 case PE_SIZE_PMD: 508 fault_page_size = HPAGE_PMD_SIZE >> PAGE_SHIFT; 509 break; 510 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD 511 case PE_SIZE_PUD: 512 fault_page_size = HPAGE_PUD_SIZE >> PAGE_SHIFT; 513 break; 514 #endif 515 default: 516 WARN_ON_ONCE(1); 517 return VM_FAULT_FALLBACK; 518 } 519 520 /* Always do write dirty-tracking and COW on PTE level. */ 521 if (write && (READ_ONCE(vbo->dirty) || is_cow_mapping)) 522 return VM_FAULT_FALLBACK; 523 524 ret = ttm_bo_vm_reserve(bo, vmf); 525 if (ret) 526 return ret; 527 528 if (vbo->dirty) { 529 pgoff_t allowed_prefault; 530 unsigned long page_offset; 531 532 page_offset = vmf->pgoff - 533 drm_vma_node_start(&bo->base.vma_node); 534 if (page_offset >= bo->num_pages || 535 vmw_resources_clean(vbo, page_offset, 536 page_offset + PAGE_SIZE, 537 &allowed_prefault)) { 538 ret = VM_FAULT_SIGBUS; 539 goto out_unlock; 540 } 541 542 /* 543 * Write protect, so we get a new fault on write, and can 544 * split. 545 */ 546 prot = vm_get_page_prot(vma->vm_flags & ~VM_SHARED); 547 } else { 548 prot = vm_get_page_prot(vma->vm_flags); 549 } 550 551 ret = ttm_bo_vm_fault_reserved(vmf, prot, 1, fault_page_size); 552 if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) 553 return ret; 554 555 out_unlock: 556 dma_resv_unlock(bo->base.resv); 557 558 return ret; 559 } 560 #endif 561