1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Copyright 2016-2019 HabanaLabs, Ltd. 5 * All Rights Reserved. 6 */ 7 8 #include "../habanalabs.h" 9 #include "../../include/hw_ip/mmu/mmu_general.h" 10 11 #include <linux/slab.h> 12 13 #define MMU_V1_MAX_HOPS (MMU_HOP4 + 1) 14 15 static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr); 16 17 static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 hop_addr) 18 { 19 struct pgt_info *pgt_info = NULL; 20 21 hash_for_each_possible(ctx->mmu_shadow_hash, pgt_info, node, 22 (unsigned long) hop_addr) 23 if (hop_addr == pgt_info->shadow_addr) 24 break; 25 26 return pgt_info; 27 } 28 29 static void _free_hop(struct hl_ctx *ctx, struct pgt_info *pgt_info) 30 { 31 struct hl_device *hdev = ctx->hdev; 32 33 gen_pool_free(hdev->mmu_priv.dr.mmu_pgt_pool, pgt_info->phys_addr, 34 hdev->asic_prop.mmu_hop_table_size); 35 hash_del(&pgt_info->node); 36 kfree((u64 *) (uintptr_t) pgt_info->shadow_addr); 37 kfree(pgt_info); 38 } 39 40 static void free_hop(struct hl_ctx *ctx, u64 hop_addr) 41 { 42 struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr); 43 44 _free_hop(ctx, pgt_info); 45 } 46 47 static u64 alloc_hop(struct hl_ctx *ctx) 48 { 49 struct hl_device *hdev = ctx->hdev; 50 struct asic_fixed_properties *prop = &hdev->asic_prop; 51 struct pgt_info *pgt_info; 52 u64 phys_addr, shadow_addr; 53 54 pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL); 55 if (!pgt_info) 56 return ULLONG_MAX; 57 58 phys_addr = (u64) gen_pool_alloc(hdev->mmu_priv.dr.mmu_pgt_pool, 59 prop->mmu_hop_table_size); 60 if (!phys_addr) { 61 dev_err(hdev->dev, "failed to allocate page\n"); 62 goto pool_add_err; 63 } 64 65 shadow_addr = (u64) (uintptr_t) kzalloc(prop->mmu_hop_table_size, 66 GFP_KERNEL); 67 if (!shadow_addr) 68 goto shadow_err; 69 70 pgt_info->phys_addr = phys_addr; 71 pgt_info->shadow_addr = shadow_addr; 72 pgt_info->ctx = ctx; 73 pgt_info->num_of_ptes = 0; 74 hash_add(ctx->mmu_shadow_hash, &pgt_info->node, shadow_addr); 75 76 return shadow_addr; 77 78 shadow_err: 79 gen_pool_free(hdev->mmu_priv.dr.mmu_pgt_pool, phys_addr, 80 prop->mmu_hop_table_size); 81 pool_add_err: 82 kfree(pgt_info); 83 84 return ULLONG_MAX; 85 } 86 87 static inline u64 get_phys_hop0_addr(struct hl_ctx *ctx) 88 { 89 return ctx->hdev->asic_prop.mmu_pgt_addr + 90 (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size); 91 } 92 93 static inline u64 get_hop0_addr(struct hl_ctx *ctx) 94 { 95 return (u64) (uintptr_t) ctx->hdev->mmu_priv.dr.mmu_shadow_hop0 + 96 (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size); 97 } 98 99 static void flush(struct hl_ctx *ctx) 100 { 101 /* flush all writes from all cores to reach PCI */ 102 mb(); 103 ctx->hdev->asic_funcs->read_pte(ctx->hdev, get_phys_hop0_addr(ctx)); 104 } 105 106 /* transform the value to physical address when writing to H/W */ 107 static inline void write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val) 108 { 109 /* 110 * The value to write is actually the address of the next shadow hop + 111 * flags at the 12 LSBs. 112 * Hence in order to get the value to write to the physical PTE, we 113 * clear the 12 LSBs and translate the shadow hop to its associated 114 * physical hop, and add back the original 12 LSBs. 115 */ 116 u64 phys_val = get_phys_addr(ctx, val & HOP_PHYS_ADDR_MASK) | 117 (val & FLAGS_MASK); 118 119 ctx->hdev->asic_funcs->write_pte(ctx->hdev, 120 get_phys_addr(ctx, shadow_pte_addr), 121 phys_val); 122 123 *(u64 *) (uintptr_t) shadow_pte_addr = val; 124 } 125 126 /* do not transform the value to physical address when writing to H/W */ 127 static inline void write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, 128 u64 val) 129 { 130 ctx->hdev->asic_funcs->write_pte(ctx->hdev, 131 get_phys_addr(ctx, shadow_pte_addr), 132 val); 133 *(u64 *) (uintptr_t) shadow_pte_addr = val; 134 } 135 136 /* clear the last and present bits */ 137 static inline void clear_pte(struct hl_ctx *ctx, u64 pte_addr) 138 { 139 /* no need to transform the value to physical address */ 140 write_final_pte(ctx, pte_addr, 0); 141 } 142 143 static inline void get_pte(struct hl_ctx *ctx, u64 hop_addr) 144 { 145 get_pgt_info(ctx, hop_addr)->num_of_ptes++; 146 } 147 148 /* 149 * put_pte - decrement the num of ptes and free the hop if possible 150 * 151 * @ctx: pointer to the context structure 152 * @hop_addr: addr of the hop 153 * 154 * This function returns the number of ptes left on this hop. If the number is 155 * 0, it means the pte was freed. 156 */ 157 static inline int put_pte(struct hl_ctx *ctx, u64 hop_addr) 158 { 159 struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr); 160 int num_of_ptes_left; 161 162 pgt_info->num_of_ptes--; 163 164 /* 165 * Need to save the number of ptes left because free_hop might free 166 * the pgt_info 167 */ 168 num_of_ptes_left = pgt_info->num_of_ptes; 169 if (!num_of_ptes_left) 170 _free_hop(ctx, pgt_info); 171 172 return num_of_ptes_left; 173 } 174 175 static inline u64 get_hop_pte_addr(struct hl_ctx *ctx, struct hl_mmu_properties *mmu_prop, 176 u64 *hop_addr_arr, u64 virt_addr, enum mmu_hop_num hop_idx) 177 { 178 u64 mask, shift; 179 180 mask = mmu_prop->hop_masks[hop_idx]; 181 shift = mmu_prop->hop_shifts[hop_idx]; 182 return hop_addr_arr[hop_idx] + 183 ctx->hdev->asic_prop.mmu_pte_size * ((virt_addr & mask) >> shift); 184 } 185 186 static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte, 187 bool *is_new_hop) 188 { 189 u64 hop_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte); 190 191 if (hop_addr == ULLONG_MAX) { 192 hop_addr = alloc_hop(ctx); 193 *is_new_hop = (hop_addr != ULLONG_MAX); 194 } 195 196 return hop_addr; 197 } 198 199 /* translates shadow address inside hop to a physical address */ 200 static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr) 201 { 202 u64 page_mask = (ctx->hdev->asic_prop.mmu_hop_table_size - 1); 203 u64 shadow_hop_addr = shadow_addr & ~page_mask; 204 u64 pte_offset = shadow_addr & page_mask; 205 u64 phys_hop_addr; 206 207 if (shadow_hop_addr != get_hop0_addr(ctx)) 208 phys_hop_addr = get_pgt_info(ctx, shadow_hop_addr)->phys_addr; 209 else 210 phys_hop_addr = get_phys_hop0_addr(ctx); 211 212 return phys_hop_addr + pte_offset; 213 } 214 215 static int dram_default_mapping_init(struct hl_ctx *ctx) 216 { 217 struct hl_device *hdev = ctx->hdev; 218 struct asic_fixed_properties *prop = &hdev->asic_prop; 219 u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr, 220 hop2_pte_addr, hop3_pte_addr, pte_val; 221 int rc, i, j, hop3_allocated = 0; 222 223 if ((!prop->dram_supports_virtual_memory) || 224 (!hdev->dram_default_page_mapping) || 225 (ctx->asid == HL_KERNEL_ASID_ID)) 226 return 0; 227 228 num_of_hop3 = prop->dram_size_for_default_page_mapping; 229 do_div(num_of_hop3, prop->dram_page_size); 230 do_div(num_of_hop3, HOP_PTE_ENTRIES_512); 231 232 /* add hop1 and hop2 */ 233 total_hops = num_of_hop3 + 2; 234 235 ctx->dram_default_hops = kzalloc(HL_PTE_SIZE * total_hops, GFP_KERNEL); 236 if (!ctx->dram_default_hops) 237 return -ENOMEM; 238 239 hop0_addr = get_hop0_addr(ctx); 240 241 hop1_addr = alloc_hop(ctx); 242 if (hop1_addr == ULLONG_MAX) { 243 dev_err(hdev->dev, "failed to alloc hop 1\n"); 244 rc = -ENOMEM; 245 goto hop1_err; 246 } 247 248 ctx->dram_default_hops[total_hops - 1] = hop1_addr; 249 250 hop2_addr = alloc_hop(ctx); 251 if (hop2_addr == ULLONG_MAX) { 252 dev_err(hdev->dev, "failed to alloc hop 2\n"); 253 rc = -ENOMEM; 254 goto hop2_err; 255 } 256 257 ctx->dram_default_hops[total_hops - 2] = hop2_addr; 258 259 for (i = 0 ; i < num_of_hop3 ; i++) { 260 ctx->dram_default_hops[i] = alloc_hop(ctx); 261 if (ctx->dram_default_hops[i] == ULLONG_MAX) { 262 dev_err(hdev->dev, "failed to alloc hop 3, i: %d\n", i); 263 rc = -ENOMEM; 264 goto hop3_err; 265 } 266 hop3_allocated++; 267 } 268 269 /* need only pte 0 in hops 0 and 1 */ 270 pte_val = (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; 271 write_pte(ctx, hop0_addr, pte_val); 272 273 pte_val = (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; 274 write_pte(ctx, hop1_addr, pte_val); 275 get_pte(ctx, hop1_addr); 276 277 hop2_pte_addr = hop2_addr; 278 for (i = 0 ; i < num_of_hop3 ; i++) { 279 pte_val = (ctx->dram_default_hops[i] & HOP_PHYS_ADDR_MASK) | 280 PAGE_PRESENT_MASK; 281 write_pte(ctx, hop2_pte_addr, pte_val); 282 get_pte(ctx, hop2_addr); 283 hop2_pte_addr += HL_PTE_SIZE; 284 } 285 286 pte_val = (prop->mmu_dram_default_page_addr & HOP_PHYS_ADDR_MASK) | 287 LAST_MASK | PAGE_PRESENT_MASK; 288 289 for (i = 0 ; i < num_of_hop3 ; i++) { 290 hop3_pte_addr = ctx->dram_default_hops[i]; 291 for (j = 0 ; j < HOP_PTE_ENTRIES_512 ; j++) { 292 write_final_pte(ctx, hop3_pte_addr, pte_val); 293 get_pte(ctx, ctx->dram_default_hops[i]); 294 hop3_pte_addr += HL_PTE_SIZE; 295 } 296 } 297 298 flush(ctx); 299 300 return 0; 301 302 hop3_err: 303 for (i = 0 ; i < hop3_allocated ; i++) 304 free_hop(ctx, ctx->dram_default_hops[i]); 305 306 free_hop(ctx, hop2_addr); 307 hop2_err: 308 free_hop(ctx, hop1_addr); 309 hop1_err: 310 kfree(ctx->dram_default_hops); 311 312 return rc; 313 } 314 315 static void dram_default_mapping_fini(struct hl_ctx *ctx) 316 { 317 struct hl_device *hdev = ctx->hdev; 318 struct asic_fixed_properties *prop = &hdev->asic_prop; 319 u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr, 320 hop2_pte_addr, hop3_pte_addr; 321 int i, j; 322 323 if ((!prop->dram_supports_virtual_memory) || 324 (!hdev->dram_default_page_mapping) || 325 (ctx->asid == HL_KERNEL_ASID_ID)) 326 return; 327 328 num_of_hop3 = prop->dram_size_for_default_page_mapping; 329 do_div(num_of_hop3, prop->dram_page_size); 330 do_div(num_of_hop3, HOP_PTE_ENTRIES_512); 331 332 hop0_addr = get_hop0_addr(ctx); 333 /* add hop1 and hop2 */ 334 total_hops = num_of_hop3 + 2; 335 hop1_addr = ctx->dram_default_hops[total_hops - 1]; 336 hop2_addr = ctx->dram_default_hops[total_hops - 2]; 337 338 for (i = 0 ; i < num_of_hop3 ; i++) { 339 hop3_pte_addr = ctx->dram_default_hops[i]; 340 for (j = 0 ; j < HOP_PTE_ENTRIES_512 ; j++) { 341 clear_pte(ctx, hop3_pte_addr); 342 put_pte(ctx, ctx->dram_default_hops[i]); 343 hop3_pte_addr += HL_PTE_SIZE; 344 } 345 } 346 347 hop2_pte_addr = hop2_addr; 348 for (i = 0 ; i < num_of_hop3 ; i++) { 349 clear_pte(ctx, hop2_pte_addr); 350 put_pte(ctx, hop2_addr); 351 hop2_pte_addr += HL_PTE_SIZE; 352 } 353 354 clear_pte(ctx, hop1_addr); 355 put_pte(ctx, hop1_addr); 356 clear_pte(ctx, hop0_addr); 357 358 kfree(ctx->dram_default_hops); 359 360 flush(ctx); 361 } 362 363 /** 364 * hl_mmu_v1_init() - initialize the MMU module. 365 * @hdev: habanalabs device structure. 366 * 367 * This function does the following: 368 * - Create a pool of pages for pgt_infos. 369 * - Create a shadow table for pgt 370 * 371 * Return: 0 for success, non-zero for failure. 372 */ 373 static int hl_mmu_v1_init(struct hl_device *hdev) 374 { 375 struct asic_fixed_properties *prop = &hdev->asic_prop; 376 int rc; 377 378 hdev->mmu_priv.dr.mmu_pgt_pool = 379 gen_pool_create(__ffs(prop->mmu_hop_table_size), -1); 380 381 if (!hdev->mmu_priv.dr.mmu_pgt_pool) { 382 dev_err(hdev->dev, "Failed to create page gen pool\n"); 383 return -ENOMEM; 384 } 385 386 rc = gen_pool_add(hdev->mmu_priv.dr.mmu_pgt_pool, prop->mmu_pgt_addr + 387 prop->mmu_hop0_tables_total_size, 388 prop->mmu_pgt_size - prop->mmu_hop0_tables_total_size, 389 -1); 390 if (rc) { 391 dev_err(hdev->dev, "Failed to add memory to page gen pool\n"); 392 goto err_pool_add; 393 } 394 395 hdev->mmu_priv.dr.mmu_shadow_hop0 = kvcalloc(prop->max_asid, prop->mmu_hop_table_size, 396 GFP_KERNEL); 397 if (ZERO_OR_NULL_PTR(hdev->mmu_priv.dr.mmu_shadow_hop0)) { 398 rc = -ENOMEM; 399 goto err_pool_add; 400 } 401 402 /* MMU H/W init will be done in device hw_init() */ 403 404 return 0; 405 406 err_pool_add: 407 gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool); 408 409 return rc; 410 } 411 412 /** 413 * hl_mmu_v1_fini() - release the MMU module. 414 * @hdev: habanalabs device structure. 415 * 416 * This function does the following: 417 * - Disable MMU in H/W. 418 * - Free the pgt_infos pool. 419 * 420 * All contexts should be freed before calling this function. 421 */ 422 static void hl_mmu_v1_fini(struct hl_device *hdev) 423 { 424 /* MMU H/W fini was already done in device hw_fini() */ 425 426 if (!ZERO_OR_NULL_PTR(hdev->mmu_priv.dr.mmu_shadow_hop0)) { 427 kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0); 428 gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool); 429 430 /* Make sure that if we arrive here again without init was 431 * called we won't cause kernel panic. This can happen for 432 * example if we fail during hard reset code at certain points 433 */ 434 hdev->mmu_priv.dr.mmu_shadow_hop0 = NULL; 435 } 436 } 437 438 /** 439 * hl_mmu_v1_ctx_init() - initialize a context for using the MMU module. 440 * @ctx: pointer to the context structure to initialize. 441 * 442 * Initialize a mutex to protect the concurrent mapping flow, a hash to hold all 443 * page tables hops related to this context. 444 * Return: 0 on success, non-zero otherwise. 445 */ 446 static int hl_mmu_v1_ctx_init(struct hl_ctx *ctx) 447 { 448 hash_init(ctx->mmu_shadow_hash); 449 return dram_default_mapping_init(ctx); 450 } 451 452 /* 453 * hl_mmu_ctx_fini - disable a ctx from using the mmu module 454 * 455 * @ctx: pointer to the context structure 456 * 457 * This function does the following: 458 * - Free any pgts which were not freed yet 459 * - Free the mutex 460 * - Free DRAM default page mapping hops 461 */ 462 static void hl_mmu_v1_ctx_fini(struct hl_ctx *ctx) 463 { 464 struct hl_device *hdev = ctx->hdev; 465 struct pgt_info *pgt_info; 466 struct hlist_node *tmp; 467 int i; 468 469 dram_default_mapping_fini(ctx); 470 471 if (!hash_empty(ctx->mmu_shadow_hash)) 472 dev_err(hdev->dev, "ctx %d is freed while it has pgts in use\n", 473 ctx->asid); 474 475 hash_for_each_safe(ctx->mmu_shadow_hash, i, tmp, pgt_info, node) { 476 dev_err_ratelimited(hdev->dev, 477 "pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n", 478 pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes); 479 _free_hop(ctx, pgt_info); 480 } 481 } 482 483 static int hl_mmu_v1_unmap(struct hl_ctx *ctx, 484 u64 virt_addr, bool is_dram_addr) 485 { 486 u64 hop_addr[MMU_V1_MAX_HOPS] = {0}, hop_pte_addr[MMU_V1_MAX_HOPS] = {0}, curr_pte = 0; 487 struct hl_device *hdev = ctx->hdev; 488 struct asic_fixed_properties *prop = &hdev->asic_prop; 489 struct hl_mmu_properties *mmu_prop; 490 bool is_huge, clear_hop3 = true; 491 int hop_idx; 492 493 /* shifts and masks are the same in PMMU and HPMMU, use one of them */ 494 mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; 495 496 for (hop_idx = MMU_HOP0; hop_idx < MMU_HOP4; hop_idx++) { 497 if (hop_idx == MMU_HOP0) { 498 hop_addr[hop_idx] = get_hop0_addr(ctx); 499 } else { 500 hop_addr[hop_idx] = hl_mmu_get_next_hop_addr(ctx, curr_pte); 501 if (hop_addr[hop_idx] == ULLONG_MAX) 502 goto not_mapped; 503 } 504 505 hop_pte_addr[hop_idx] = 506 get_hop_pte_addr(ctx, mmu_prop, hop_addr, virt_addr, hop_idx); 507 508 curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[hop_idx]; 509 } 510 511 is_huge = curr_pte & mmu_prop->last_mask; 512 513 if (is_dram_addr && !is_huge) { 514 dev_err(hdev->dev, "DRAM unmapping should use huge pages only\n"); 515 return -EFAULT; 516 } 517 518 if (!is_huge) { 519 hop_idx = MMU_HOP4; 520 hop_addr[hop_idx] = hl_mmu_get_next_hop_addr(ctx, curr_pte); 521 if (hop_addr[hop_idx] == ULLONG_MAX) 522 goto not_mapped; 523 524 hop_pte_addr[hop_idx] = 525 get_hop_pte_addr(ctx, mmu_prop, hop_addr, virt_addr, hop_idx); 526 curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[hop_idx]; 527 clear_hop3 = false; 528 } 529 530 if (hdev->dram_default_page_mapping && is_dram_addr) { 531 u64 default_pte = (prop->mmu_dram_default_page_addr & 532 HOP_PHYS_ADDR_MASK) | mmu_prop->last_mask | 533 PAGE_PRESENT_MASK; 534 if (curr_pte == default_pte) { 535 dev_err(hdev->dev, 536 "DRAM: hop3 PTE points to zero page, can't unmap, va: 0x%llx\n", 537 virt_addr); 538 goto not_mapped; 539 } 540 541 if (!(curr_pte & PAGE_PRESENT_MASK)) { 542 dev_err(hdev->dev, 543 "DRAM: hop3 PTE is cleared! can't unmap, va: 0x%llx\n", 544 virt_addr); 545 goto not_mapped; 546 } 547 548 hop_idx = MMU_HOP3; 549 write_final_pte(ctx, hop_pte_addr[hop_idx], default_pte); 550 put_pte(ctx, hop_addr[hop_idx]); 551 } else { 552 if (!(curr_pte & PAGE_PRESENT_MASK)) 553 goto not_mapped; 554 555 if (hop_addr[MMU_HOP4]) 556 clear_pte(ctx, hop_pte_addr[MMU_HOP4]); 557 else 558 clear_pte(ctx, hop_pte_addr[MMU_HOP3]); 559 560 if (hop_addr[MMU_HOP4] && !put_pte(ctx, hop_addr[MMU_HOP4])) 561 clear_hop3 = true; 562 563 if (!clear_hop3) 564 goto mapped; 565 566 for (hop_idx = MMU_HOP3; hop_idx >= 0; hop_idx--) { 567 clear_pte(ctx, hop_pte_addr[hop_idx]); 568 569 if (hop_idx == MMU_HOP0) 570 break; 571 572 if (put_pte(ctx, hop_addr[hop_idx])) 573 goto mapped; 574 } 575 } 576 577 mapped: 578 return 0; 579 580 not_mapped: 581 dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n", 582 virt_addr); 583 584 return -EINVAL; 585 } 586 587 static int hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, 588 u32 page_size, bool is_dram_addr) 589 { 590 u64 hop_addr[MMU_V1_MAX_HOPS] = {0}, hop_pte_addr[MMU_V1_MAX_HOPS] = {0}, curr_pte = 0; 591 struct hl_device *hdev = ctx->hdev; 592 struct asic_fixed_properties *prop = &hdev->asic_prop; 593 struct hl_mmu_properties *mmu_prop; 594 bool is_huge, hop_new[MMU_V1_MAX_HOPS] = {false}; 595 int num_hops, hop_idx, prev_hop, rc = -ENOMEM; 596 597 /* 598 * This mapping function can map a page or a huge page. For huge page 599 * there are only 3 hops rather than 4. Currently the DRAM allocation 600 * uses huge pages only but user memory could have been allocated with 601 * one of the two page sizes. Since this is a common code for all the 602 * three cases, we need this hugs page check. 603 */ 604 if (is_dram_addr) { 605 mmu_prop = &prop->dmmu; 606 is_huge = true; 607 } else if (page_size == prop->pmmu_huge.page_size) { 608 mmu_prop = &prop->pmmu_huge; 609 is_huge = true; 610 } else { 611 mmu_prop = &prop->pmmu; 612 is_huge = false; 613 } 614 615 num_hops = is_huge ? (MMU_V1_MAX_HOPS - 1) : MMU_V1_MAX_HOPS; 616 617 for (hop_idx = MMU_HOP0; hop_idx < num_hops; hop_idx++) { 618 if (hop_idx == MMU_HOP0) { 619 hop_addr[hop_idx] = get_hop0_addr(ctx); 620 } else { 621 hop_addr[hop_idx] = 622 get_alloc_next_hop_addr(ctx, curr_pte, &hop_new[hop_idx]); 623 if (hop_addr[hop_idx] == ULLONG_MAX) 624 goto err; 625 } 626 627 hop_pte_addr[hop_idx] = 628 get_hop_pte_addr(ctx, mmu_prop, hop_addr, virt_addr, hop_idx); 629 curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[hop_idx]; 630 } 631 632 if (hdev->dram_default_page_mapping && is_dram_addr) { 633 u64 default_pte = (prop->mmu_dram_default_page_addr & 634 HOP_PHYS_ADDR_MASK) | mmu_prop->last_mask | 635 PAGE_PRESENT_MASK; 636 637 if (curr_pte != default_pte) { 638 dev_err(hdev->dev, 639 "DRAM: mapping already exists for virt_addr 0x%llx\n", 640 virt_addr); 641 rc = -EINVAL; 642 goto err; 643 } 644 645 for (hop_idx = MMU_HOP1; hop_idx < num_hops; hop_idx++) { 646 if (hop_new[hop_idx]) { 647 dev_err(hdev->dev, "DRAM mapping should not allocate more hops\n"); 648 rc = -EFAULT; 649 goto err; 650 } 651 } 652 } else if (curr_pte & PAGE_PRESENT_MASK) { 653 dev_err(hdev->dev, 654 "mapping already exists for virt_addr 0x%llx\n", 655 virt_addr); 656 657 for (hop_idx = MMU_HOP0; hop_idx < num_hops; hop_idx++) 658 dev_dbg(hdev->dev, "hop%d pte: 0x%llx (0x%llx)\n", hop_idx, 659 *(u64 *) (uintptr_t) hop_pte_addr[hop_idx], 660 hop_pte_addr[hop_idx]); 661 662 rc = -EINVAL; 663 goto err; 664 } 665 666 curr_pte = (phys_addr & HOP_PHYS_ADDR_MASK) | mmu_prop->last_mask 667 | PAGE_PRESENT_MASK; 668 669 write_final_pte(ctx, hop_pte_addr[num_hops - 1], curr_pte); 670 671 for (hop_idx = MMU_HOP1; hop_idx < num_hops; hop_idx++) { 672 prev_hop = hop_idx - 1; 673 674 if (hop_new[hop_idx]) { 675 curr_pte = (hop_addr[hop_idx] & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; 676 write_pte(ctx, hop_pte_addr[prev_hop], curr_pte); 677 if (hop_idx != MMU_HOP1) 678 get_pte(ctx, hop_addr[prev_hop]); 679 } 680 } 681 682 get_pte(ctx, hop_addr[num_hops - 1]); 683 684 return 0; 685 686 err: 687 for (hop_idx = num_hops; hop_idx > MMU_HOP0; hop_idx--) { 688 if (hop_new[hop_idx]) 689 free_hop(ctx, hop_addr[hop_idx]); 690 } 691 692 return rc; 693 } 694 695 /* 696 * hl_mmu_v1_swap_out - marks all mapping of the given ctx as swapped out 697 * 698 * @ctx: pointer to the context structure 699 * 700 */ 701 static void hl_mmu_v1_swap_out(struct hl_ctx *ctx) 702 { 703 704 } 705 706 /* 707 * hl_mmu_v1_swap_in - marks all mapping of the given ctx as swapped in 708 * 709 * @ctx: pointer to the context structure 710 * 711 */ 712 static void hl_mmu_v1_swap_in(struct hl_ctx *ctx) 713 { 714 715 } 716 717 static int hl_mmu_v1_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, 718 struct hl_mmu_hop_info *hops) 719 { 720 struct hl_device *hdev = ctx->hdev; 721 struct asic_fixed_properties *prop = &hdev->asic_prop; 722 struct hl_mmu_properties *mmu_prop; 723 bool is_dram_addr, is_pmmu_addr, is_pmmu_h_addr, is_huge; 724 int i, used_hops; 725 726 is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, 727 prop->dmmu.start_addr, 728 prop->dmmu.end_addr); 729 is_pmmu_addr = hl_mem_area_inside_range(virt_addr, prop->pmmu.page_size, 730 prop->pmmu.start_addr, 731 prop->pmmu.end_addr); 732 is_pmmu_h_addr = hl_mem_area_inside_range(virt_addr, 733 prop->pmmu_huge.page_size, 734 prop->pmmu_huge.start_addr, 735 prop->pmmu_huge.end_addr); 736 if (is_dram_addr) { 737 mmu_prop = &prop->dmmu; 738 is_huge = true; 739 } else if (is_pmmu_addr) { 740 mmu_prop = &prop->pmmu; 741 is_huge = false; 742 } else if (is_pmmu_h_addr) { 743 mmu_prop = &prop->pmmu_huge; 744 is_huge = true; 745 } else { 746 return -EINVAL; 747 } 748 749 used_hops = mmu_prop->num_hops; 750 751 /* huge pages use lesser hops */ 752 if (is_huge) 753 used_hops--; 754 755 hops->hop_info[0].hop_addr = get_phys_hop0_addr(ctx); 756 hops->hop_info[0].hop_pte_addr = 757 hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, 0, 758 hops->hop_info[0].hop_addr, virt_addr); 759 hops->hop_info[0].hop_pte_val = 760 hdev->asic_funcs->read_pte(hdev, 761 hops->hop_info[0].hop_pte_addr); 762 763 for (i = 1 ; i < used_hops ; i++) { 764 hops->hop_info[i].hop_addr = 765 hl_mmu_get_next_hop_addr(ctx, 766 hops->hop_info[i - 1].hop_pte_val); 767 if (hops->hop_info[i].hop_addr == ULLONG_MAX) 768 return -EFAULT; 769 770 hops->hop_info[i].hop_pte_addr = 771 hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, i, 772 hops->hop_info[i].hop_addr, 773 virt_addr); 774 hops->hop_info[i].hop_pte_val = 775 hdev->asic_funcs->read_pte(hdev, 776 hops->hop_info[i].hop_pte_addr); 777 778 if (!(hops->hop_info[i].hop_pte_val & PAGE_PRESENT_MASK)) 779 return -EFAULT; 780 781 if (hops->hop_info[i].hop_pte_val & mmu_prop->last_mask) 782 break; 783 } 784 785 /* if passed over all hops then no last hop was found */ 786 if (i == mmu_prop->num_hops) 787 return -EFAULT; 788 789 if (!(hops->hop_info[i].hop_pte_val & PAGE_PRESENT_MASK)) 790 return -EFAULT; 791 792 hops->used_hops = i + 1; 793 794 return 0; 795 } 796 797 /* 798 * hl_mmu_v1_prepare - prepare mmu for working with mmu v1 799 * 800 * @hdev: pointer to the device structure 801 */ 802 void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu) 803 { 804 mmu->init = hl_mmu_v1_init; 805 mmu->fini = hl_mmu_v1_fini; 806 mmu->ctx_init = hl_mmu_v1_ctx_init; 807 mmu->ctx_fini = hl_mmu_v1_ctx_fini; 808 mmu->map = hl_mmu_v1_map; 809 mmu->unmap = hl_mmu_v1_unmap; 810 mmu->flush = flush; 811 mmu->swap_out = hl_mmu_v1_swap_out; 812 mmu->swap_in = hl_mmu_v1_swap_in; 813 mmu->get_tlb_info = hl_mmu_v1_get_tlb_info; 814 } 815