1 /* 2 * kmp_alloc.cpp -- private/shared dynamic memory allocation and management 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "kmp.h" 14 #include "kmp_io.h" 15 #include "kmp_wrapper_malloc.h" 16 17 #if KMP_USE_HWLOC 18 #if HWLOC_API_VERSION > 0x00020300 19 #define KMP_HWLOC_LOCATION_TYPE_CPUSET HWLOC_LOCATION_TYPE_CPUSET 20 #elif HWLOC_API_VERSION == 0x00020300 21 #define KMP_HWLOC_LOCATION_TYPE_CPUSET \ 22 hwloc_location::HWLOC_LOCATION_TYPE_CPUSET 23 #else 24 enum hwloc_memattr_id_e { 25 HWLOC_MEMATTR_ID_BANDWIDTH, 26 HWLOC_MEMATTR_ID_CAPACITY 27 }; 28 #endif 29 #endif // KMP_USE_HWLOC 30 31 // Disable bget when it is not used 32 #if KMP_USE_BGET 33 34 /* Thread private buffer management code */ 35 36 typedef int (*bget_compact_t)(size_t, int); 37 typedef void *(*bget_acquire_t)(size_t); 38 typedef void (*bget_release_t)(void *); 39 40 /* NOTE: bufsize must be a signed datatype */ 41 42 #if KMP_OS_WINDOWS 43 #if KMP_ARCH_X86 || KMP_ARCH_ARM 44 typedef kmp_int32 bufsize; 45 #else 46 typedef kmp_int64 bufsize; 47 #endif 48 #else 49 typedef ssize_t bufsize; 50 #endif // KMP_OS_WINDOWS 51 52 /* The three modes of operation are, fifo search, lifo search, and best-fit */ 53 54 typedef enum bget_mode { 55 bget_mode_fifo = 0, 56 bget_mode_lifo = 1, 57 bget_mode_best = 2 58 } bget_mode_t; 59 60 static void bpool(kmp_info_t *th, void *buffer, bufsize len); 61 static void *bget(kmp_info_t *th, bufsize size); 62 static void *bgetz(kmp_info_t *th, bufsize size); 63 static void *bgetr(kmp_info_t *th, void *buffer, bufsize newsize); 64 static void brel(kmp_info_t *th, void *buf); 65 static void bectl(kmp_info_t *th, bget_compact_t compact, 66 bget_acquire_t acquire, bget_release_t release, 67 bufsize pool_incr); 68 69 /* BGET CONFIGURATION */ 70 /* Buffer allocation size quantum: all buffers allocated are a 71 multiple of this size. This MUST be a power of two. */ 72 73 /* On some architectures, malloc() does not ensure 16 byte alignment, 74 Solaris/sparc and x86 among them. */ 75 76 #if KMP_ARCH_X86 || KMP_ARCH_SPARC || !KMP_HAVE_QUAD 77 78 #define SizeQuant 8 79 #define AlignType double 80 81 #else 82 83 #define SizeQuant 16 84 #define AlignType _Quad 85 86 #endif 87 88 // Define this symbol to enable the bstats() function which calculates the 89 // total free space in the buffer pool, the largest available buffer, and the 90 // total space currently allocated. 91 #define BufStats 1 92 93 #ifdef KMP_DEBUG 94 95 // Define this symbol to enable the bpoold() function which dumps the buffers 96 // in a buffer pool. 97 #define BufDump 1 98 99 // Define this symbol to enable the bpoolv() function for validating a buffer 100 // pool. 101 #define BufValid 1 102 103 // Define this symbol to enable the bufdump() function which allows dumping the 104 // contents of an allocated or free buffer. 105 #define DumpData 1 106 107 #ifdef NOT_USED_NOW 108 109 // Wipe free buffers to a guaranteed pattern of garbage to trip up miscreants 110 // who attempt to use pointers into released buffers. 111 #define FreeWipe 1 112 113 // Use a best fit algorithm when searching for space for an allocation request. 114 // This uses memory more efficiently, but allocation will be much slower. 115 #define BestFit 1 116 117 #endif /* NOT_USED_NOW */ 118 #endif /* KMP_DEBUG */ 119 120 static bufsize bget_bin_size[] = { 121 0, 122 // 1 << 6, /* .5 Cache line */ 123 1 << 7, /* 1 Cache line, new */ 124 1 << 8, /* 2 Cache lines */ 125 1 << 9, /* 4 Cache lines, new */ 126 1 << 10, /* 8 Cache lines */ 127 1 << 11, /* 16 Cache lines, new */ 128 1 << 12, 1 << 13, /* new */ 129 1 << 14, 1 << 15, /* new */ 130 1 << 16, 1 << 17, 1 << 18, 1 << 19, 1 << 20, /* 1MB */ 131 1 << 21, /* 2MB */ 132 1 << 22, /* 4MB */ 133 1 << 23, /* 8MB */ 134 1 << 24, /* 16MB */ 135 1 << 25, /* 32MB */ 136 }; 137 138 #define MAX_BGET_BINS (int)(sizeof(bget_bin_size) / sizeof(bufsize)) 139 140 struct bfhead; 141 142 // Declare the interface, including the requested buffer size type, bufsize. 143 144 /* Queue links */ 145 typedef struct qlinks { 146 struct bfhead *flink; /* Forward link */ 147 struct bfhead *blink; /* Backward link */ 148 } qlinks_t; 149 150 /* Header in allocated and free buffers */ 151 typedef struct bhead2 { 152 kmp_info_t *bthr; /* The thread which owns the buffer pool */ 153 bufsize prevfree; /* Relative link back to previous free buffer in memory or 154 0 if previous buffer is allocated. */ 155 bufsize bsize; /* Buffer size: positive if free, negative if allocated. */ 156 } bhead2_t; 157 158 /* Make sure the bhead structure is a multiple of SizeQuant in size. */ 159 typedef union bhead { 160 KMP_ALIGN(SizeQuant) 161 AlignType b_align; 162 char b_pad[sizeof(bhead2_t) + (SizeQuant - (sizeof(bhead2_t) % SizeQuant))]; 163 bhead2_t bb; 164 } bhead_t; 165 #define BH(p) ((bhead_t *)(p)) 166 167 /* Header in directly allocated buffers (by acqfcn) */ 168 typedef struct bdhead { 169 bufsize tsize; /* Total size, including overhead */ 170 bhead_t bh; /* Common header */ 171 } bdhead_t; 172 #define BDH(p) ((bdhead_t *)(p)) 173 174 /* Header in free buffers */ 175 typedef struct bfhead { 176 bhead_t bh; /* Common allocated/free header */ 177 qlinks_t ql; /* Links on free list */ 178 } bfhead_t; 179 #define BFH(p) ((bfhead_t *)(p)) 180 181 typedef struct thr_data { 182 bfhead_t freelist[MAX_BGET_BINS]; 183 #if BufStats 184 size_t totalloc; /* Total space currently allocated */ 185 long numget, numrel; /* Number of bget() and brel() calls */ 186 long numpblk; /* Number of pool blocks */ 187 long numpget, numprel; /* Number of block gets and rels */ 188 long numdget, numdrel; /* Number of direct gets and rels */ 189 #endif /* BufStats */ 190 191 /* Automatic expansion block management functions */ 192 bget_compact_t compfcn; 193 bget_acquire_t acqfcn; 194 bget_release_t relfcn; 195 196 bget_mode_t mode; /* what allocation mode to use? */ 197 198 bufsize exp_incr; /* Expansion block size */ 199 bufsize pool_len; /* 0: no bpool calls have been made 200 -1: not all pool blocks are the same size 201 >0: (common) block size for all bpool calls made so far 202 */ 203 bfhead_t *last_pool; /* Last pool owned by this thread (delay deallocation) */ 204 } thr_data_t; 205 206 /* Minimum allocation quantum: */ 207 #define QLSize (sizeof(qlinks_t)) 208 #define SizeQ ((SizeQuant > QLSize) ? SizeQuant : QLSize) 209 #define MaxSize \ 210 (bufsize)( \ 211 ~(((bufsize)(1) << (sizeof(bufsize) * CHAR_BIT - 1)) | (SizeQuant - 1))) 212 // Maximum for the requested size. 213 214 /* End sentinel: value placed in bsize field of dummy block delimiting 215 end of pool block. The most negative number which will fit in a 216 bufsize, defined in a way that the compiler will accept. */ 217 218 #define ESent \ 219 ((bufsize)(-(((((bufsize)1) << ((int)sizeof(bufsize) * 8 - 2)) - 1) * 2) - 2)) 220 221 /* Thread Data management routines */ 222 static int bget_get_bin(bufsize size) { 223 // binary chop bins 224 int lo = 0, hi = MAX_BGET_BINS - 1; 225 226 KMP_DEBUG_ASSERT(size > 0); 227 228 while ((hi - lo) > 1) { 229 int mid = (lo + hi) >> 1; 230 if (size < bget_bin_size[mid]) 231 hi = mid - 1; 232 else 233 lo = mid; 234 } 235 236 KMP_DEBUG_ASSERT((lo >= 0) && (lo < MAX_BGET_BINS)); 237 238 return lo; 239 } 240 241 static void set_thr_data(kmp_info_t *th) { 242 int i; 243 thr_data_t *data; 244 245 data = (thr_data_t *)((!th->th.th_local.bget_data) 246 ? __kmp_allocate(sizeof(*data)) 247 : th->th.th_local.bget_data); 248 249 memset(data, '\0', sizeof(*data)); 250 251 for (i = 0; i < MAX_BGET_BINS; ++i) { 252 data->freelist[i].ql.flink = &data->freelist[i]; 253 data->freelist[i].ql.blink = &data->freelist[i]; 254 } 255 256 th->th.th_local.bget_data = data; 257 th->th.th_local.bget_list = 0; 258 #if !USE_CMP_XCHG_FOR_BGET 259 #ifdef USE_QUEUING_LOCK_FOR_BGET 260 __kmp_init_lock(&th->th.th_local.bget_lock); 261 #else 262 __kmp_init_bootstrap_lock(&th->th.th_local.bget_lock); 263 #endif /* USE_LOCK_FOR_BGET */ 264 #endif /* ! USE_CMP_XCHG_FOR_BGET */ 265 } 266 267 static thr_data_t *get_thr_data(kmp_info_t *th) { 268 thr_data_t *data; 269 270 data = (thr_data_t *)th->th.th_local.bget_data; 271 272 KMP_DEBUG_ASSERT(data != 0); 273 274 return data; 275 } 276 277 /* Walk the free list and release the enqueued buffers */ 278 static void __kmp_bget_dequeue(kmp_info_t *th) { 279 void *p = TCR_SYNC_PTR(th->th.th_local.bget_list); 280 281 if (p != 0) { 282 #if USE_CMP_XCHG_FOR_BGET 283 { 284 volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list); 285 while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list, 286 CCAST(void *, old_value), nullptr)) { 287 KMP_CPU_PAUSE(); 288 old_value = TCR_SYNC_PTR(th->th.th_local.bget_list); 289 } 290 p = CCAST(void *, old_value); 291 } 292 #else /* ! USE_CMP_XCHG_FOR_BGET */ 293 #ifdef USE_QUEUING_LOCK_FOR_BGET 294 __kmp_acquire_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th)); 295 #else 296 __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock); 297 #endif /* USE_QUEUING_LOCK_FOR_BGET */ 298 299 p = (void *)th->th.th_local.bget_list; 300 th->th.th_local.bget_list = 0; 301 302 #ifdef USE_QUEUING_LOCK_FOR_BGET 303 __kmp_release_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th)); 304 #else 305 __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock); 306 #endif 307 #endif /* USE_CMP_XCHG_FOR_BGET */ 308 309 /* Check again to make sure the list is not empty */ 310 while (p != 0) { 311 void *buf = p; 312 bfhead_t *b = BFH(((char *)p) - sizeof(bhead_t)); 313 314 KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0); 315 KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) == 316 (kmp_uintptr_t)th); // clear possible mark 317 KMP_DEBUG_ASSERT(b->ql.blink == 0); 318 319 p = (void *)b->ql.flink; 320 321 brel(th, buf); 322 } 323 } 324 } 325 326 /* Chain together the free buffers by using the thread owner field */ 327 static void __kmp_bget_enqueue(kmp_info_t *th, void *buf 328 #ifdef USE_QUEUING_LOCK_FOR_BGET 329 , 330 kmp_int32 rel_gtid 331 #endif 332 ) { 333 bfhead_t *b = BFH(((char *)buf) - sizeof(bhead_t)); 334 335 KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0); 336 KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) == 337 (kmp_uintptr_t)th); // clear possible mark 338 339 b->ql.blink = 0; 340 341 KC_TRACE(10, ("__kmp_bget_enqueue: moving buffer to T#%d list\n", 342 __kmp_gtid_from_thread(th))); 343 344 #if USE_CMP_XCHG_FOR_BGET 345 { 346 volatile void *old_value = TCR_PTR(th->th.th_local.bget_list); 347 /* the next pointer must be set before setting bget_list to buf to avoid 348 exposing a broken list to other threads, even for an instant. */ 349 b->ql.flink = BFH(CCAST(void *, old_value)); 350 351 while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list, 352 CCAST(void *, old_value), buf)) { 353 KMP_CPU_PAUSE(); 354 old_value = TCR_PTR(th->th.th_local.bget_list); 355 /* the next pointer must be set before setting bget_list to buf to avoid 356 exposing a broken list to other threads, even for an instant. */ 357 b->ql.flink = BFH(CCAST(void *, old_value)); 358 } 359 } 360 #else /* ! USE_CMP_XCHG_FOR_BGET */ 361 #ifdef USE_QUEUING_LOCK_FOR_BGET 362 __kmp_acquire_lock(&th->th.th_local.bget_lock, rel_gtid); 363 #else 364 __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock); 365 #endif 366 367 b->ql.flink = BFH(th->th.th_local.bget_list); 368 th->th.th_local.bget_list = (void *)buf; 369 370 #ifdef USE_QUEUING_LOCK_FOR_BGET 371 __kmp_release_lock(&th->th.th_local.bget_lock, rel_gtid); 372 #else 373 __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock); 374 #endif 375 #endif /* USE_CMP_XCHG_FOR_BGET */ 376 } 377 378 /* insert buffer back onto a new freelist */ 379 static void __kmp_bget_insert_into_freelist(thr_data_t *thr, bfhead_t *b) { 380 int bin; 381 382 KMP_DEBUG_ASSERT(((size_t)b) % SizeQuant == 0); 383 KMP_DEBUG_ASSERT(b->bh.bb.bsize % SizeQuant == 0); 384 385 bin = bget_get_bin(b->bh.bb.bsize); 386 387 KMP_DEBUG_ASSERT(thr->freelist[bin].ql.blink->ql.flink == 388 &thr->freelist[bin]); 389 KMP_DEBUG_ASSERT(thr->freelist[bin].ql.flink->ql.blink == 390 &thr->freelist[bin]); 391 392 b->ql.flink = &thr->freelist[bin]; 393 b->ql.blink = thr->freelist[bin].ql.blink; 394 395 thr->freelist[bin].ql.blink = b; 396 b->ql.blink->ql.flink = b; 397 } 398 399 /* unlink the buffer from the old freelist */ 400 static void __kmp_bget_remove_from_freelist(bfhead_t *b) { 401 KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b); 402 KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b); 403 404 b->ql.blink->ql.flink = b->ql.flink; 405 b->ql.flink->ql.blink = b->ql.blink; 406 } 407 408 /* GET STATS -- check info on free list */ 409 static void bcheck(kmp_info_t *th, bufsize *max_free, bufsize *total_free) { 410 thr_data_t *thr = get_thr_data(th); 411 int bin; 412 413 *total_free = *max_free = 0; 414 415 for (bin = 0; bin < MAX_BGET_BINS; ++bin) { 416 bfhead_t *b, *best; 417 418 best = &thr->freelist[bin]; 419 b = best->ql.flink; 420 421 while (b != &thr->freelist[bin]) { 422 *total_free += (b->bh.bb.bsize - sizeof(bhead_t)); 423 if ((best == &thr->freelist[bin]) || (b->bh.bb.bsize < best->bh.bb.bsize)) 424 best = b; 425 426 /* Link to next buffer */ 427 b = b->ql.flink; 428 } 429 430 if (*max_free < best->bh.bb.bsize) 431 *max_free = best->bh.bb.bsize; 432 } 433 434 if (*max_free > (bufsize)sizeof(bhead_t)) 435 *max_free -= sizeof(bhead_t); 436 } 437 438 /* BGET -- Allocate a buffer. */ 439 static void *bget(kmp_info_t *th, bufsize requested_size) { 440 thr_data_t *thr = get_thr_data(th); 441 bufsize size = requested_size; 442 bfhead_t *b; 443 void *buf; 444 int compactseq = 0; 445 int use_blink = 0; 446 /* For BestFit */ 447 bfhead_t *best; 448 449 if (size < 0 || size + sizeof(bhead_t) > MaxSize) { 450 return NULL; 451 } 452 453 __kmp_bget_dequeue(th); /* Release any queued buffers */ 454 455 if (size < (bufsize)SizeQ) { // Need at least room for the queue links. 456 size = SizeQ; 457 } 458 #if defined(SizeQuant) && (SizeQuant > 1) 459 size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1)); 460 #endif 461 462 size += sizeof(bhead_t); // Add overhead in allocated buffer to size required. 463 KMP_DEBUG_ASSERT(size >= 0); 464 KMP_DEBUG_ASSERT(size % SizeQuant == 0); 465 466 use_blink = (thr->mode == bget_mode_lifo); 467 468 /* If a compact function was provided in the call to bectl(), wrap 469 a loop around the allocation process to allow compaction to 470 intervene in case we don't find a suitable buffer in the chain. */ 471 472 for (;;) { 473 int bin; 474 475 for (bin = bget_get_bin(size); bin < MAX_BGET_BINS; ++bin) { 476 /* Link to next buffer */ 477 b = (use_blink ? thr->freelist[bin].ql.blink 478 : thr->freelist[bin].ql.flink); 479 480 if (thr->mode == bget_mode_best) { 481 best = &thr->freelist[bin]; 482 483 /* Scan the free list searching for the first buffer big enough 484 to hold the requested size buffer. */ 485 while (b != &thr->freelist[bin]) { 486 if (b->bh.bb.bsize >= (bufsize)size) { 487 if ((best == &thr->freelist[bin]) || 488 (b->bh.bb.bsize < best->bh.bb.bsize)) { 489 best = b; 490 } 491 } 492 493 /* Link to next buffer */ 494 b = (use_blink ? b->ql.blink : b->ql.flink); 495 } 496 b = best; 497 } 498 499 while (b != &thr->freelist[bin]) { 500 if ((bufsize)b->bh.bb.bsize >= (bufsize)size) { 501 502 // Buffer is big enough to satisfy the request. Allocate it to the 503 // caller. We must decide whether the buffer is large enough to split 504 // into the part given to the caller and a free buffer that remains 505 // on the free list, or whether the entire buffer should be removed 506 // from the free list and given to the caller in its entirety. We 507 // only split the buffer if enough room remains for a header plus the 508 // minimum quantum of allocation. 509 if ((b->bh.bb.bsize - (bufsize)size) > 510 (bufsize)(SizeQ + (sizeof(bhead_t)))) { 511 bhead_t *ba, *bn; 512 513 ba = BH(((char *)b) + (b->bh.bb.bsize - (bufsize)size)); 514 bn = BH(((char *)ba) + size); 515 516 KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize); 517 518 /* Subtract size from length of free block. */ 519 b->bh.bb.bsize -= (bufsize)size; 520 521 /* Link allocated buffer to the previous free buffer. */ 522 ba->bb.prevfree = b->bh.bb.bsize; 523 524 /* Plug negative size into user buffer. */ 525 ba->bb.bsize = -size; 526 527 /* Mark this buffer as owned by this thread. */ 528 TCW_PTR(ba->bb.bthr, 529 th); // not an allocated address (do not mark it) 530 /* Mark buffer after this one not preceded by free block. */ 531 bn->bb.prevfree = 0; 532 533 // unlink buffer from old freelist, and reinsert into new freelist 534 __kmp_bget_remove_from_freelist(b); 535 __kmp_bget_insert_into_freelist(thr, b); 536 #if BufStats 537 thr->totalloc += (size_t)size; 538 thr->numget++; /* Increment number of bget() calls */ 539 #endif 540 buf = (void *)((((char *)ba) + sizeof(bhead_t))); 541 KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0); 542 return buf; 543 } else { 544 bhead_t *ba; 545 546 ba = BH(((char *)b) + b->bh.bb.bsize); 547 548 KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize); 549 550 /* The buffer isn't big enough to split. Give the whole 551 shebang to the caller and remove it from the free list. */ 552 553 __kmp_bget_remove_from_freelist(b); 554 #if BufStats 555 thr->totalloc += (size_t)b->bh.bb.bsize; 556 thr->numget++; /* Increment number of bget() calls */ 557 #endif 558 /* Negate size to mark buffer allocated. */ 559 b->bh.bb.bsize = -(b->bh.bb.bsize); 560 561 /* Mark this buffer as owned by this thread. */ 562 TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark) 563 /* Zero the back pointer in the next buffer in memory 564 to indicate that this buffer is allocated. */ 565 ba->bb.prevfree = 0; 566 567 /* Give user buffer starting at queue links. */ 568 buf = (void *)&(b->ql); 569 KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0); 570 return buf; 571 } 572 } 573 574 /* Link to next buffer */ 575 b = (use_blink ? b->ql.blink : b->ql.flink); 576 } 577 } 578 579 /* We failed to find a buffer. If there's a compact function defined, 580 notify it of the size requested. If it returns TRUE, try the allocation 581 again. */ 582 583 if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) { 584 break; 585 } 586 } 587 588 /* No buffer available with requested size free. */ 589 590 /* Don't give up yet -- look in the reserve supply. */ 591 if (thr->acqfcn != 0) { 592 if (size > (bufsize)(thr->exp_incr - sizeof(bhead_t))) { 593 /* Request is too large to fit in a single expansion block. 594 Try to satisfy it by a direct buffer acquisition. */ 595 bdhead_t *bdh; 596 597 size += sizeof(bdhead_t) - sizeof(bhead_t); 598 599 KE_TRACE(10, ("%%%%%% MALLOC( %d )\n", (int)size)); 600 601 /* richryan */ 602 bdh = BDH((*thr->acqfcn)((bufsize)size)); 603 if (bdh != NULL) { 604 605 // Mark the buffer special by setting size field of its header to zero. 606 bdh->bh.bb.bsize = 0; 607 608 /* Mark this buffer as owned by this thread. */ 609 TCW_PTR(bdh->bh.bb.bthr, th); // don't mark buffer as allocated, 610 // because direct buffer never goes to free list 611 bdh->bh.bb.prevfree = 0; 612 bdh->tsize = size; 613 #if BufStats 614 thr->totalloc += (size_t)size; 615 thr->numget++; /* Increment number of bget() calls */ 616 thr->numdget++; /* Direct bget() call count */ 617 #endif 618 buf = (void *)(bdh + 1); 619 KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0); 620 return buf; 621 } 622 623 } else { 624 625 /* Try to obtain a new expansion block */ 626 void *newpool; 627 628 KE_TRACE(10, ("%%%%%% MALLOCB( %d )\n", (int)thr->exp_incr)); 629 630 /* richryan */ 631 newpool = (*thr->acqfcn)((bufsize)thr->exp_incr); 632 KMP_DEBUG_ASSERT(((size_t)newpool) % SizeQuant == 0); 633 if (newpool != NULL) { 634 bpool(th, newpool, thr->exp_incr); 635 buf = bget( 636 th, requested_size); /* This can't, I say, can't get into a loop. */ 637 return buf; 638 } 639 } 640 } 641 642 /* Still no buffer available */ 643 644 return NULL; 645 } 646 647 /* BGETZ -- Allocate a buffer and clear its contents to zero. We clear 648 the entire contents of the buffer to zero, not just the 649 region requested by the caller. */ 650 651 static void *bgetz(kmp_info_t *th, bufsize size) { 652 char *buf = (char *)bget(th, size); 653 654 if (buf != NULL) { 655 bhead_t *b; 656 bufsize rsize; 657 658 b = BH(buf - sizeof(bhead_t)); 659 rsize = -(b->bb.bsize); 660 if (rsize == 0) { 661 bdhead_t *bd; 662 663 bd = BDH(buf - sizeof(bdhead_t)); 664 rsize = bd->tsize - (bufsize)sizeof(bdhead_t); 665 } else { 666 rsize -= sizeof(bhead_t); 667 } 668 669 KMP_DEBUG_ASSERT(rsize >= size); 670 671 (void)memset(buf, 0, (bufsize)rsize); 672 } 673 return ((void *)buf); 674 } 675 676 /* BGETR -- Reallocate a buffer. This is a minimal implementation, 677 simply in terms of brel() and bget(). It could be 678 enhanced to allow the buffer to grow into adjacent free 679 blocks and to avoid moving data unnecessarily. */ 680 681 static void *bgetr(kmp_info_t *th, void *buf, bufsize size) { 682 void *nbuf; 683 bufsize osize; /* Old size of buffer */ 684 bhead_t *b; 685 686 nbuf = bget(th, size); 687 if (nbuf == NULL) { /* Acquire new buffer */ 688 return NULL; 689 } 690 if (buf == NULL) { 691 return nbuf; 692 } 693 b = BH(((char *)buf) - sizeof(bhead_t)); 694 osize = -b->bb.bsize; 695 if (osize == 0) { 696 /* Buffer acquired directly through acqfcn. */ 697 bdhead_t *bd; 698 699 bd = BDH(((char *)buf) - sizeof(bdhead_t)); 700 osize = bd->tsize - (bufsize)sizeof(bdhead_t); 701 } else { 702 osize -= sizeof(bhead_t); 703 } 704 705 KMP_DEBUG_ASSERT(osize > 0); 706 707 (void)KMP_MEMCPY((char *)nbuf, (char *)buf, /* Copy the data */ 708 (size_t)((size < osize) ? size : osize)); 709 brel(th, buf); 710 711 return nbuf; 712 } 713 714 /* BREL -- Release a buffer. */ 715 static void brel(kmp_info_t *th, void *buf) { 716 thr_data_t *thr = get_thr_data(th); 717 bfhead_t *b, *bn; 718 kmp_info_t *bth; 719 720 KMP_DEBUG_ASSERT(buf != NULL); 721 KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0); 722 723 b = BFH(((char *)buf) - sizeof(bhead_t)); 724 725 if (b->bh.bb.bsize == 0) { /* Directly-acquired buffer? */ 726 bdhead_t *bdh; 727 728 bdh = BDH(((char *)buf) - sizeof(bdhead_t)); 729 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0); 730 #if BufStats 731 thr->totalloc -= (size_t)bdh->tsize; 732 thr->numdrel++; /* Number of direct releases */ 733 thr->numrel++; /* Increment number of brel() calls */ 734 #endif /* BufStats */ 735 #ifdef FreeWipe 736 (void)memset((char *)buf, 0x55, (size_t)(bdh->tsize - sizeof(bdhead_t))); 737 #endif /* FreeWipe */ 738 739 KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)bdh)); 740 741 KMP_DEBUG_ASSERT(thr->relfcn != 0); 742 (*thr->relfcn)((void *)bdh); /* Release it directly. */ 743 return; 744 } 745 746 bth = (kmp_info_t *)((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & 747 ~1); // clear possible mark before comparison 748 if (bth != th) { 749 /* Add this buffer to be released by the owning thread later */ 750 __kmp_bget_enqueue(bth, buf 751 #ifdef USE_QUEUING_LOCK_FOR_BGET 752 , 753 __kmp_gtid_from_thread(th) 754 #endif 755 ); 756 return; 757 } 758 759 /* Buffer size must be negative, indicating that the buffer is allocated. */ 760 if (b->bh.bb.bsize >= 0) { 761 bn = NULL; 762 } 763 KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0); 764 765 /* Back pointer in next buffer must be zero, indicating the same thing: */ 766 767 KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.bsize)->bb.prevfree == 0); 768 769 #if BufStats 770 thr->numrel++; /* Increment number of brel() calls */ 771 thr->totalloc += (size_t)b->bh.bb.bsize; 772 #endif 773 774 /* If the back link is nonzero, the previous buffer is free. */ 775 776 if (b->bh.bb.prevfree != 0) { 777 /* The previous buffer is free. Consolidate this buffer with it by adding 778 the length of this buffer to the previous free buffer. Note that we 779 subtract the size in the buffer being released, since it's negative to 780 indicate that the buffer is allocated. */ 781 bufsize size = b->bh.bb.bsize; 782 783 /* Make the previous buffer the one we're working on. */ 784 KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.prevfree)->bb.bsize == 785 b->bh.bb.prevfree); 786 b = BFH(((char *)b) - b->bh.bb.prevfree); 787 b->bh.bb.bsize -= size; 788 789 /* unlink the buffer from the old freelist */ 790 __kmp_bget_remove_from_freelist(b); 791 } else { 792 /* The previous buffer isn't allocated. Mark this buffer size as positive 793 (i.e. free) and fall through to place the buffer on the free list as an 794 isolated free block. */ 795 b->bh.bb.bsize = -b->bh.bb.bsize; 796 } 797 798 /* insert buffer back onto a new freelist */ 799 __kmp_bget_insert_into_freelist(thr, b); 800 801 /* Now we look at the next buffer in memory, located by advancing from 802 the start of this buffer by its size, to see if that buffer is 803 free. If it is, we combine this buffer with the next one in 804 memory, dechaining the second buffer from the free list. */ 805 bn = BFH(((char *)b) + b->bh.bb.bsize); 806 if (bn->bh.bb.bsize > 0) { 807 808 /* The buffer is free. Remove it from the free list and add 809 its size to that of our buffer. */ 810 KMP_DEBUG_ASSERT(BH((char *)bn + bn->bh.bb.bsize)->bb.prevfree == 811 bn->bh.bb.bsize); 812 813 __kmp_bget_remove_from_freelist(bn); 814 815 b->bh.bb.bsize += bn->bh.bb.bsize; 816 817 /* unlink the buffer from the old freelist, and reinsert it into the new 818 * freelist */ 819 __kmp_bget_remove_from_freelist(b); 820 __kmp_bget_insert_into_freelist(thr, b); 821 822 /* Finally, advance to the buffer that follows the newly 823 consolidated free block. We must set its backpointer to the 824 head of the consolidated free block. We know the next block 825 must be an allocated block because the process of recombination 826 guarantees that two free blocks will never be contiguous in 827 memory. */ 828 bn = BFH(((char *)b) + b->bh.bb.bsize); 829 } 830 #ifdef FreeWipe 831 (void)memset(((char *)b) + sizeof(bfhead_t), 0x55, 832 (size_t)(b->bh.bb.bsize - sizeof(bfhead_t))); 833 #endif 834 KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0); 835 836 /* The next buffer is allocated. Set the backpointer in it to point 837 to this buffer; the previous free buffer in memory. */ 838 839 bn->bh.bb.prevfree = b->bh.bb.bsize; 840 841 /* If a block-release function is defined, and this free buffer 842 constitutes the entire block, release it. Note that pool_len 843 is defined in such a way that the test will fail unless all 844 pool blocks are the same size. */ 845 if (thr->relfcn != 0 && 846 b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) { 847 #if BufStats 848 if (thr->numpblk != 849 1) { /* Do not release the last buffer until finalization time */ 850 #endif 851 852 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0); 853 KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent); 854 KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree == 855 b->bh.bb.bsize); 856 857 /* Unlink the buffer from the free list */ 858 __kmp_bget_remove_from_freelist(b); 859 860 KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b)); 861 862 (*thr->relfcn)(b); 863 #if BufStats 864 thr->numprel++; /* Nr of expansion block releases */ 865 thr->numpblk--; /* Total number of blocks */ 866 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel); 867 868 // avoid leaving stale last_pool pointer around if it is being dealloced 869 if (thr->last_pool == b) 870 thr->last_pool = 0; 871 } else { 872 thr->last_pool = b; 873 } 874 #endif /* BufStats */ 875 } 876 } 877 878 /* BECTL -- Establish automatic pool expansion control */ 879 static void bectl(kmp_info_t *th, bget_compact_t compact, 880 bget_acquire_t acquire, bget_release_t release, 881 bufsize pool_incr) { 882 thr_data_t *thr = get_thr_data(th); 883 884 thr->compfcn = compact; 885 thr->acqfcn = acquire; 886 thr->relfcn = release; 887 thr->exp_incr = pool_incr; 888 } 889 890 /* BPOOL -- Add a region of memory to the buffer pool. */ 891 static void bpool(kmp_info_t *th, void *buf, bufsize len) { 892 /* int bin = 0; */ 893 thr_data_t *thr = get_thr_data(th); 894 bfhead_t *b = BFH(buf); 895 bhead_t *bn; 896 897 __kmp_bget_dequeue(th); /* Release any queued buffers */ 898 899 #ifdef SizeQuant 900 len &= ~((bufsize)(SizeQuant - 1)); 901 #endif 902 if (thr->pool_len == 0) { 903 thr->pool_len = len; 904 } else if (len != thr->pool_len) { 905 thr->pool_len = -1; 906 } 907 #if BufStats 908 thr->numpget++; /* Number of block acquisitions */ 909 thr->numpblk++; /* Number of blocks total */ 910 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel); 911 #endif /* BufStats */ 912 913 /* Since the block is initially occupied by a single free buffer, 914 it had better not be (much) larger than the largest buffer 915 whose size we can store in bhead.bb.bsize. */ 916 KMP_DEBUG_ASSERT(len - sizeof(bhead_t) <= -((bufsize)ESent + 1)); 917 918 /* Clear the backpointer at the start of the block to indicate that 919 there is no free block prior to this one. That blocks 920 recombination when the first block in memory is released. */ 921 b->bh.bb.prevfree = 0; 922 923 /* Create a dummy allocated buffer at the end of the pool. This dummy 924 buffer is seen when a buffer at the end of the pool is released and 925 blocks recombination of the last buffer with the dummy buffer at 926 the end. The length in the dummy buffer is set to the largest 927 negative number to denote the end of the pool for diagnostic 928 routines (this specific value is not counted on by the actual 929 allocation and release functions). */ 930 len -= sizeof(bhead_t); 931 b->bh.bb.bsize = (bufsize)len; 932 /* Set the owner of this buffer */ 933 TCW_PTR(b->bh.bb.bthr, 934 (kmp_info_t *)((kmp_uintptr_t)th | 935 1)); // mark the buffer as allocated address 936 937 /* Chain the new block to the free list. */ 938 __kmp_bget_insert_into_freelist(thr, b); 939 940 #ifdef FreeWipe 941 (void)memset(((char *)b) + sizeof(bfhead_t), 0x55, 942 (size_t)(len - sizeof(bfhead_t))); 943 #endif 944 bn = BH(((char *)b) + len); 945 bn->bb.prevfree = (bufsize)len; 946 /* Definition of ESent assumes two's complement! */ 947 KMP_DEBUG_ASSERT((~0) == -1 && (bn != 0)); 948 949 bn->bb.bsize = ESent; 950 } 951 952 /* BFREED -- Dump the free lists for this thread. */ 953 static void bfreed(kmp_info_t *th) { 954 int bin = 0, count = 0; 955 int gtid = __kmp_gtid_from_thread(th); 956 thr_data_t *thr = get_thr_data(th); 957 958 #if BufStats 959 __kmp_printf_no_lock("__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC 960 " get=%" KMP_INT64_SPEC " rel=%" KMP_INT64_SPEC 961 " pblk=%" KMP_INT64_SPEC " pget=%" KMP_INT64_SPEC 962 " prel=%" KMP_INT64_SPEC " dget=%" KMP_INT64_SPEC 963 " drel=%" KMP_INT64_SPEC "\n", 964 gtid, (kmp_uint64)thr->totalloc, (kmp_int64)thr->numget, 965 (kmp_int64)thr->numrel, (kmp_int64)thr->numpblk, 966 (kmp_int64)thr->numpget, (kmp_int64)thr->numprel, 967 (kmp_int64)thr->numdget, (kmp_int64)thr->numdrel); 968 #endif 969 970 for (bin = 0; bin < MAX_BGET_BINS; ++bin) { 971 bfhead_t *b; 972 973 for (b = thr->freelist[bin].ql.flink; b != &thr->freelist[bin]; 974 b = b->ql.flink) { 975 bufsize bs = b->bh.bb.bsize; 976 977 KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b); 978 KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b); 979 KMP_DEBUG_ASSERT(bs > 0); 980 981 count += 1; 982 983 __kmp_printf_no_lock( 984 "__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b, 985 (long)bs); 986 #ifdef FreeWipe 987 { 988 char *lerr = ((char *)b) + sizeof(bfhead_t); 989 if ((bs > sizeof(bfhead_t)) && 990 ((*lerr != 0x55) || 991 (memcmp(lerr, lerr + 1, (size_t)(bs - (sizeof(bfhead_t) + 1))) != 992 0))) { 993 __kmp_printf_no_lock("__kmp_printpool: T#%d (Contents of above " 994 "free block have been overstored.)\n", 995 gtid); 996 } 997 } 998 #endif 999 } 1000 } 1001 1002 if (count == 0) 1003 __kmp_printf_no_lock("__kmp_printpool: T#%d No free blocks\n", gtid); 1004 } 1005 1006 void __kmp_initialize_bget(kmp_info_t *th) { 1007 KMP_DEBUG_ASSERT(SizeQuant >= sizeof(void *) && (th != 0)); 1008 1009 set_thr_data(th); 1010 1011 bectl(th, (bget_compact_t)0, (bget_acquire_t)malloc, (bget_release_t)free, 1012 (bufsize)__kmp_malloc_pool_incr); 1013 } 1014 1015 void __kmp_finalize_bget(kmp_info_t *th) { 1016 thr_data_t *thr; 1017 bfhead_t *b; 1018 1019 KMP_DEBUG_ASSERT(th != 0); 1020 1021 #if BufStats 1022 thr = (thr_data_t *)th->th.th_local.bget_data; 1023 KMP_DEBUG_ASSERT(thr != NULL); 1024 b = thr->last_pool; 1025 1026 /* If a block-release function is defined, and this free buffer constitutes 1027 the entire block, release it. Note that pool_len is defined in such a way 1028 that the test will fail unless all pool blocks are the same size. */ 1029 1030 // Deallocate the last pool if one exists because we no longer do it in brel() 1031 if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 && 1032 b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) { 1033 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0); 1034 KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent); 1035 KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree == 1036 b->bh.bb.bsize); 1037 1038 /* Unlink the buffer from the free list */ 1039 __kmp_bget_remove_from_freelist(b); 1040 1041 KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b)); 1042 1043 (*thr->relfcn)(b); 1044 thr->numprel++; /* Nr of expansion block releases */ 1045 thr->numpblk--; /* Total number of blocks */ 1046 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel); 1047 } 1048 #endif /* BufStats */ 1049 1050 /* Deallocate bget_data */ 1051 if (th->th.th_local.bget_data != NULL) { 1052 __kmp_free(th->th.th_local.bget_data); 1053 th->th.th_local.bget_data = NULL; 1054 } 1055 } 1056 1057 void kmpc_set_poolsize(size_t size) { 1058 bectl(__kmp_get_thread(), (bget_compact_t)0, (bget_acquire_t)malloc, 1059 (bget_release_t)free, (bufsize)size); 1060 } 1061 1062 size_t kmpc_get_poolsize(void) { 1063 thr_data_t *p; 1064 1065 p = get_thr_data(__kmp_get_thread()); 1066 1067 return p->exp_incr; 1068 } 1069 1070 void kmpc_set_poolmode(int mode) { 1071 thr_data_t *p; 1072 1073 if (mode == bget_mode_fifo || mode == bget_mode_lifo || 1074 mode == bget_mode_best) { 1075 p = get_thr_data(__kmp_get_thread()); 1076 p->mode = (bget_mode_t)mode; 1077 } 1078 } 1079 1080 int kmpc_get_poolmode(void) { 1081 thr_data_t *p; 1082 1083 p = get_thr_data(__kmp_get_thread()); 1084 1085 return p->mode; 1086 } 1087 1088 void kmpc_get_poolstat(size_t *maxmem, size_t *allmem) { 1089 kmp_info_t *th = __kmp_get_thread(); 1090 bufsize a, b; 1091 1092 __kmp_bget_dequeue(th); /* Release any queued buffers */ 1093 1094 bcheck(th, &a, &b); 1095 1096 *maxmem = a; 1097 *allmem = b; 1098 } 1099 1100 void kmpc_poolprint(void) { 1101 kmp_info_t *th = __kmp_get_thread(); 1102 1103 __kmp_bget_dequeue(th); /* Release any queued buffers */ 1104 1105 bfreed(th); 1106 } 1107 1108 #endif // #if KMP_USE_BGET 1109 1110 void *kmpc_malloc(size_t size) { 1111 void *ptr; 1112 ptr = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr))); 1113 if (ptr != NULL) { 1114 // save allocated pointer just before one returned to user 1115 *(void **)ptr = ptr; 1116 ptr = (void **)ptr + 1; 1117 } 1118 return ptr; 1119 } 1120 1121 #define IS_POWER_OF_TWO(n) (((n) & ((n)-1)) == 0) 1122 1123 void *kmpc_aligned_malloc(size_t size, size_t alignment) { 1124 void *ptr; 1125 void *ptr_allocated; 1126 KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too big 1127 if (!IS_POWER_OF_TWO(alignment)) { 1128 // AC: do we need to issue a warning here? 1129 errno = EINVAL; 1130 return NULL; 1131 } 1132 size = size + sizeof(void *) + alignment; 1133 ptr_allocated = bget(__kmp_entry_thread(), (bufsize)size); 1134 if (ptr_allocated != NULL) { 1135 // save allocated pointer just before one returned to user 1136 ptr = (void *)(((kmp_uintptr_t)ptr_allocated + sizeof(void *) + alignment) & 1137 ~(alignment - 1)); 1138 *((void **)ptr - 1) = ptr_allocated; 1139 } else { 1140 ptr = NULL; 1141 } 1142 return ptr; 1143 } 1144 1145 void *kmpc_calloc(size_t nelem, size_t elsize) { 1146 void *ptr; 1147 ptr = bgetz(__kmp_entry_thread(), (bufsize)(nelem * elsize + sizeof(ptr))); 1148 if (ptr != NULL) { 1149 // save allocated pointer just before one returned to user 1150 *(void **)ptr = ptr; 1151 ptr = (void **)ptr + 1; 1152 } 1153 return ptr; 1154 } 1155 1156 void *kmpc_realloc(void *ptr, size_t size) { 1157 void *result = NULL; 1158 if (ptr == NULL) { 1159 // If pointer is NULL, realloc behaves like malloc. 1160 result = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr))); 1161 // save allocated pointer just before one returned to user 1162 if (result != NULL) { 1163 *(void **)result = result; 1164 result = (void **)result + 1; 1165 } 1166 } else if (size == 0) { 1167 // If size is 0, realloc behaves like free. 1168 // The thread must be registered by the call to kmpc_malloc() or 1169 // kmpc_calloc() before. 1170 // So it should be safe to call __kmp_get_thread(), not 1171 // __kmp_entry_thread(). 1172 KMP_ASSERT(*((void **)ptr - 1)); 1173 brel(__kmp_get_thread(), *((void **)ptr - 1)); 1174 } else { 1175 result = bgetr(__kmp_entry_thread(), *((void **)ptr - 1), 1176 (bufsize)(size + sizeof(ptr))); 1177 if (result != NULL) { 1178 *(void **)result = result; 1179 result = (void **)result + 1; 1180 } 1181 } 1182 return result; 1183 } 1184 1185 // NOTE: the library must have already been initialized by a previous allocate 1186 void kmpc_free(void *ptr) { 1187 if (!__kmp_init_serial) { 1188 return; 1189 } 1190 if (ptr != NULL) { 1191 kmp_info_t *th = __kmp_get_thread(); 1192 __kmp_bget_dequeue(th); /* Release any queued buffers */ 1193 // extract allocated pointer and free it 1194 KMP_ASSERT(*((void **)ptr - 1)); 1195 brel(th, *((void **)ptr - 1)); 1196 } 1197 } 1198 1199 void *___kmp_thread_malloc(kmp_info_t *th, size_t size KMP_SRC_LOC_DECL) { 1200 void *ptr; 1201 KE_TRACE(30, ("-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n", th, 1202 (int)size KMP_SRC_LOC_PARM)); 1203 ptr = bget(th, (bufsize)size); 1204 KE_TRACE(30, ("<- __kmp_thread_malloc() returns %p\n", ptr)); 1205 return ptr; 1206 } 1207 1208 void *___kmp_thread_calloc(kmp_info_t *th, size_t nelem, 1209 size_t elsize KMP_SRC_LOC_DECL) { 1210 void *ptr; 1211 KE_TRACE(30, ("-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n", th, 1212 (int)nelem, (int)elsize KMP_SRC_LOC_PARM)); 1213 ptr = bgetz(th, (bufsize)(nelem * elsize)); 1214 KE_TRACE(30, ("<- __kmp_thread_calloc() returns %p\n", ptr)); 1215 return ptr; 1216 } 1217 1218 void *___kmp_thread_realloc(kmp_info_t *th, void *ptr, 1219 size_t size KMP_SRC_LOC_DECL) { 1220 KE_TRACE(30, ("-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n", th, 1221 ptr, (int)size KMP_SRC_LOC_PARM)); 1222 ptr = bgetr(th, ptr, (bufsize)size); 1223 KE_TRACE(30, ("<- __kmp_thread_realloc() returns %p\n", ptr)); 1224 return ptr; 1225 } 1226 1227 void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL) { 1228 KE_TRACE(30, ("-> __kmp_thread_free( %p, %p ) called from %s:%d\n", th, 1229 ptr KMP_SRC_LOC_PARM)); 1230 if (ptr != NULL) { 1231 __kmp_bget_dequeue(th); /* Release any queued buffers */ 1232 brel(th, ptr); 1233 } 1234 KE_TRACE(30, ("<- __kmp_thread_free()\n")); 1235 } 1236 1237 /* OMP 5.0 Memory Management support */ 1238 static const char *kmp_mk_lib_name; 1239 static void *h_memkind; 1240 /* memkind experimental API: */ 1241 // memkind_alloc 1242 static void *(*kmp_mk_alloc)(void *k, size_t sz); 1243 // memkind_free 1244 static void (*kmp_mk_free)(void *kind, void *ptr); 1245 // memkind_check_available 1246 static int (*kmp_mk_check)(void *kind); 1247 // kinds we are going to use 1248 static void **mk_default; 1249 static void **mk_interleave; 1250 static void **mk_hbw; 1251 static void **mk_hbw_interleave; 1252 static void **mk_hbw_preferred; 1253 static void **mk_hugetlb; 1254 static void **mk_hbw_hugetlb; 1255 static void **mk_hbw_preferred_hugetlb; 1256 static void **mk_dax_kmem; 1257 static void **mk_dax_kmem_all; 1258 static void **mk_dax_kmem_preferred; 1259 static void *(*kmp_target_alloc_host)(size_t size, int device); 1260 static void *(*kmp_target_alloc_shared)(size_t size, int device); 1261 static void *(*kmp_target_alloc_device)(size_t size, int device); 1262 static void *(*kmp_target_lock_mem)(void *ptr, size_t size, int device); 1263 static void *(*kmp_target_unlock_mem)(void *ptr, int device); 1264 static void *(*kmp_target_free_host)(void *ptr, int device); 1265 static void *(*kmp_target_free_shared)(void *ptr, int device); 1266 static void *(*kmp_target_free_device)(void *ptr, int device); 1267 static bool __kmp_target_mem_available; 1268 1269 #define KMP_IS_TARGET_MEM_SPACE(MS) \ 1270 (MS == llvm_omp_target_host_mem_space || \ 1271 MS == llvm_omp_target_shared_mem_space || \ 1272 MS == llvm_omp_target_device_mem_space) 1273 1274 #define KMP_IS_TARGET_MEM_ALLOC(MA) \ 1275 (MA == llvm_omp_target_host_mem_alloc || \ 1276 MA == llvm_omp_target_shared_mem_alloc || \ 1277 MA == llvm_omp_target_device_mem_alloc) 1278 1279 #define KMP_IS_PREDEF_MEM_SPACE(MS) \ 1280 (MS == omp_null_mem_space || MS == omp_default_mem_space || \ 1281 MS == omp_large_cap_mem_space || MS == omp_const_mem_space || \ 1282 MS == omp_high_bw_mem_space || MS == omp_low_lat_mem_space || \ 1283 KMP_IS_TARGET_MEM_SPACE(MS)) 1284 1285 /// Support OMP 6.0 target memory management 1286 /// Expected offload runtime entries. 1287 /// 1288 /// Returns number of resources and list of unique resource IDs in "resouces". 1289 /// Runtime needs to invoke this twice to get the number of resources, allocate 1290 /// space for the resource IDs, and finally let offload runtime write resource 1291 /// IDs in "resources". 1292 /// int __tgt_get_mem_resources(int num_devices, const int *devices, 1293 /// int host_access, omp_memspace_handle_t memspace, 1294 /// int *resources); 1295 /// 1296 /// Redirects omp_alloc call to offload runtime. 1297 /// void *__tgt_omp_alloc(size_t size, omp_allocator_handle_t allocator); 1298 /// 1299 /// Redirects omp_free call to offload runtime. 1300 /// void __tgt_omp_free(void *ptr, omp_allocator_handle_t); 1301 class kmp_tgt_allocator_t { 1302 bool supported = false; 1303 using get_mem_resources_t = int (*)(int, const int *, int, 1304 omp_memspace_handle_t, int *); 1305 using omp_alloc_t = void *(*)(size_t, omp_allocator_handle_t); 1306 using omp_free_t = void (*)(void *, omp_allocator_handle_t); 1307 get_mem_resources_t tgt_get_mem_resources = nullptr; 1308 omp_alloc_t tgt_omp_alloc = nullptr; 1309 omp_free_t tgt_omp_free = nullptr; 1310 1311 public: 1312 /// Initialize interface with offload runtime 1313 void init() { 1314 tgt_get_mem_resources = 1315 (get_mem_resources_t)KMP_DLSYM("__tgt_get_mem_resources"); 1316 tgt_omp_alloc = (omp_alloc_t)KMP_DLSYM("__tgt_omp_alloc"); 1317 tgt_omp_free = (omp_free_t)KMP_DLSYM("__tgt_omp_free"); 1318 supported = tgt_get_mem_resources && tgt_omp_alloc && tgt_omp_free; 1319 } 1320 /// Obtain resource information from offload runtime. We assume offload 1321 /// runtime backends maintain a list of unique resource IDS. 1322 int get_mem_resources(int ndevs, const int *devs, int host, 1323 omp_memspace_handle_t memspace, int *resources) { 1324 if (supported) 1325 return tgt_get_mem_resources(ndevs, devs, host, memspace, resources); 1326 return 0; 1327 } 1328 /// Invoke offload runtime's memory allocation routine 1329 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) { 1330 if (supported) 1331 return tgt_omp_alloc(size, allocator); 1332 return nullptr; 1333 } 1334 /// Invoke offload runtime's memory deallocation routine 1335 void omp_free(void *ptr, omp_allocator_handle_t allocator) { 1336 if (supported) 1337 tgt_omp_free(ptr, allocator); 1338 } 1339 } __kmp_tgt_allocator; 1340 1341 extern "C" int omp_get_num_devices(void); 1342 1343 /// Maintain a list of target memory spaces that are identified with the 1344 /// requested information. There will be only one unique memory space object 1345 /// that matches the input. 1346 class kmp_tgt_memspace_list_t { 1347 kmp_memspace_t *memspace_list = nullptr; 1348 KMP_LOCK_INIT(mtx); 1349 /// Find memory space that matches the provided input 1350 kmp_memspace_t *find(int num_resources, const int *resources, 1351 omp_memspace_handle_t memspace) { 1352 kmp_memspace_t *ms = memspace_list; 1353 while (ms) { 1354 if (ms->num_resources == num_resources && ms->memspace == memspace && 1355 !memcmp(ms->resources, resources, sizeof(int) * num_resources)) 1356 break; 1357 ms = ms->next; 1358 } 1359 return ms; 1360 } 1361 /// Return memory space for the provided input. It tries to find existing 1362 /// memory space that exactly matches the provided input or create one if 1363 /// not found. 1364 omp_memspace_handle_t get(int num_resources, const int *resources, 1365 omp_memspace_handle_t memspace) { 1366 int gtid = __kmp_entry_gtid(); 1367 __kmp_acquire_lock(&mtx, gtid); 1368 // Sort absolute IDs in the resource list 1369 int *sorted_resources = (int *)__kmp_allocate(sizeof(int) * num_resources); 1370 KMP_MEMCPY(sorted_resources, resources, num_resources * sizeof(int)); 1371 qsort(sorted_resources, (size_t)num_resources, sizeof(int), 1372 [](const void *a, const void *b) { 1373 const int val_a = *(const int *)a; 1374 const int val_b = *(const int *)b; 1375 return (val_a > val_b) ? 1 : ((val_a < val_b) ? -1 : 0); 1376 }); 1377 kmp_memspace_t *ms = find(num_resources, sorted_resources, memspace); 1378 if (ms) { 1379 __kmp_free(sorted_resources); 1380 __kmp_release_lock(&mtx, gtid); 1381 return ms; 1382 } 1383 ms = (kmp_memspace_t *)__kmp_allocate(sizeof(kmp_memspace_t)); 1384 ms->memspace = memspace; 1385 ms->num_resources = num_resources; 1386 ms->resources = sorted_resources; 1387 ms->next = memspace_list; 1388 memspace_list = ms; 1389 __kmp_release_lock(&mtx, gtid); 1390 return ms; 1391 } 1392 1393 public: 1394 /// Initialize memory space list 1395 void init() { __kmp_init_lock(&mtx); } 1396 /// Release resources for the memory space list 1397 void fini() { 1398 kmp_memspace_t *ms = memspace_list; 1399 while (ms) { 1400 if (ms->resources) 1401 __kmp_free(ms->resources); 1402 kmp_memspace_t *tmp = ms; 1403 ms = ms->next; 1404 __kmp_free(tmp); 1405 } 1406 __kmp_destroy_lock(&mtx); 1407 } 1408 /// Return memory space for the provided input 1409 omp_memspace_handle_t get_memspace(int num_devices, const int *devices, 1410 int host_access, 1411 omp_memspace_handle_t memspace) { 1412 int actual_num_devices = num_devices; 1413 int *actual_devices = const_cast<int *>(devices); 1414 if (actual_num_devices == 0) { 1415 actual_num_devices = omp_get_num_devices(); 1416 if (actual_num_devices <= 0) 1417 return omp_null_mem_space; 1418 } 1419 if (actual_devices == NULL) { 1420 // Prepare list of all devices in this case. 1421 actual_devices = (int *)__kmp_allocate(sizeof(int) * actual_num_devices); 1422 for (int i = 0; i < actual_num_devices; i++) 1423 actual_devices[i] = i; 1424 } 1425 // Get the number of available resources first 1426 int num_resources = __kmp_tgt_allocator.get_mem_resources( 1427 actual_num_devices, actual_devices, host_access, memspace, NULL); 1428 if (num_resources <= 0) 1429 return omp_null_mem_space; // No available resources 1430 1431 omp_memspace_handle_t ms = omp_null_mem_space; 1432 if (num_resources > 0) { 1433 int *resources = (int *)__kmp_allocate(sizeof(int) * num_resources); 1434 // Let offload runtime write the resource IDs 1435 num_resources = __kmp_tgt_allocator.get_mem_resources( 1436 actual_num_devices, actual_devices, host_access, memspace, resources); 1437 ms = get(num_resources, resources, memspace); 1438 __kmp_free(resources); 1439 } 1440 if (!devices && actual_devices) 1441 __kmp_free(actual_devices); 1442 return ms; 1443 } 1444 /// Return sub memory space from the parent memory space 1445 omp_memspace_handle_t get_memspace(int num_resources, const int *resources, 1446 omp_memspace_handle_t parent) { 1447 kmp_memspace_t *ms = (kmp_memspace_t *)parent; 1448 return get(num_resources, resources, ms->memspace); 1449 } 1450 } __kmp_tgt_memspace_list; 1451 1452 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN 1453 static inline void chk_kind(void ***pkind) { 1454 KMP_DEBUG_ASSERT(pkind); 1455 if (*pkind) // symbol found 1456 if (kmp_mk_check(**pkind)) // kind not available or error 1457 *pkind = NULL; 1458 } 1459 #endif 1460 1461 void __kmp_init_memkind() { 1462 // as of 2018-07-31 memkind does not support Windows*, exclude it for now 1463 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN 1464 // use of statically linked memkind is problematic, as it depends on libnuma 1465 kmp_mk_lib_name = "libmemkind.so"; 1466 h_memkind = dlopen(kmp_mk_lib_name, RTLD_LAZY); 1467 if (h_memkind) { 1468 kmp_mk_check = (int (*)(void *))dlsym(h_memkind, "memkind_check_available"); 1469 kmp_mk_alloc = 1470 (void *(*)(void *, size_t))dlsym(h_memkind, "memkind_malloc"); 1471 kmp_mk_free = (void (*)(void *, void *))dlsym(h_memkind, "memkind_free"); 1472 mk_default = (void **)dlsym(h_memkind, "MEMKIND_DEFAULT"); 1473 if (kmp_mk_check && kmp_mk_alloc && kmp_mk_free && mk_default && 1474 !kmp_mk_check(*mk_default)) { 1475 __kmp_memkind_available = 1; 1476 mk_interleave = (void **)dlsym(h_memkind, "MEMKIND_INTERLEAVE"); 1477 chk_kind(&mk_interleave); 1478 mk_hbw = (void **)dlsym(h_memkind, "MEMKIND_HBW"); 1479 chk_kind(&mk_hbw); 1480 mk_hbw_interleave = (void **)dlsym(h_memkind, "MEMKIND_HBW_INTERLEAVE"); 1481 chk_kind(&mk_hbw_interleave); 1482 mk_hbw_preferred = (void **)dlsym(h_memkind, "MEMKIND_HBW_PREFERRED"); 1483 chk_kind(&mk_hbw_preferred); 1484 mk_hugetlb = (void **)dlsym(h_memkind, "MEMKIND_HUGETLB"); 1485 chk_kind(&mk_hugetlb); 1486 mk_hbw_hugetlb = (void **)dlsym(h_memkind, "MEMKIND_HBW_HUGETLB"); 1487 chk_kind(&mk_hbw_hugetlb); 1488 mk_hbw_preferred_hugetlb = 1489 (void **)dlsym(h_memkind, "MEMKIND_HBW_PREFERRED_HUGETLB"); 1490 chk_kind(&mk_hbw_preferred_hugetlb); 1491 mk_dax_kmem = (void **)dlsym(h_memkind, "MEMKIND_DAX_KMEM"); 1492 chk_kind(&mk_dax_kmem); 1493 mk_dax_kmem_all = (void **)dlsym(h_memkind, "MEMKIND_DAX_KMEM_ALL"); 1494 chk_kind(&mk_dax_kmem_all); 1495 mk_dax_kmem_preferred = 1496 (void **)dlsym(h_memkind, "MEMKIND_DAX_KMEM_PREFERRED"); 1497 chk_kind(&mk_dax_kmem_preferred); 1498 KE_TRACE(25, ("__kmp_init_memkind: memkind library initialized\n")); 1499 return; // success 1500 } 1501 dlclose(h_memkind); // failure 1502 } 1503 #else // !(KMP_OS_UNIX && KMP_DYNAMIC_LIB) 1504 kmp_mk_lib_name = ""; 1505 #endif // !(KMP_OS_UNIX && KMP_DYNAMIC_LIB) 1506 h_memkind = NULL; 1507 kmp_mk_check = NULL; 1508 kmp_mk_alloc = NULL; 1509 kmp_mk_free = NULL; 1510 mk_default = NULL; 1511 mk_interleave = NULL; 1512 mk_hbw = NULL; 1513 mk_hbw_interleave = NULL; 1514 mk_hbw_preferred = NULL; 1515 mk_hugetlb = NULL; 1516 mk_hbw_hugetlb = NULL; 1517 mk_hbw_preferred_hugetlb = NULL; 1518 mk_dax_kmem = NULL; 1519 mk_dax_kmem_all = NULL; 1520 mk_dax_kmem_preferred = NULL; 1521 } 1522 1523 void __kmp_fini_memkind() { 1524 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB 1525 if (__kmp_memkind_available) 1526 KE_TRACE(25, ("__kmp_fini_memkind: finalize memkind library\n")); 1527 if (h_memkind) { 1528 dlclose(h_memkind); 1529 h_memkind = NULL; 1530 } 1531 kmp_mk_check = NULL; 1532 kmp_mk_alloc = NULL; 1533 kmp_mk_free = NULL; 1534 mk_default = NULL; 1535 mk_interleave = NULL; 1536 mk_hbw = NULL; 1537 mk_hbw_interleave = NULL; 1538 mk_hbw_preferred = NULL; 1539 mk_hugetlb = NULL; 1540 mk_hbw_hugetlb = NULL; 1541 mk_hbw_preferred_hugetlb = NULL; 1542 mk_dax_kmem = NULL; 1543 mk_dax_kmem_all = NULL; 1544 mk_dax_kmem_preferred = NULL; 1545 #endif 1546 } 1547 1548 #if KMP_USE_HWLOC 1549 static bool __kmp_is_hwloc_membind_supported(hwloc_membind_policy_t policy) { 1550 #if HWLOC_API_VERSION >= 0x00020300 1551 const hwloc_topology_support *support; 1552 support = hwloc_topology_get_support(__kmp_hwloc_topology); 1553 if (support) { 1554 if (policy == HWLOC_MEMBIND_BIND) 1555 return (support->membind->alloc_membind && 1556 support->membind->bind_membind); 1557 if (policy == HWLOC_MEMBIND_INTERLEAVE) 1558 return (support->membind->alloc_membind && 1559 support->membind->interleave_membind); 1560 } 1561 return false; 1562 #else 1563 return false; 1564 #endif 1565 } 1566 1567 void *__kmp_hwloc_alloc_membind(hwloc_memattr_id_e attr, size_t size, 1568 hwloc_membind_policy_t policy) { 1569 #if HWLOC_API_VERSION >= 0x00020300 1570 void *ptr = NULL; 1571 hwloc_obj_t node; 1572 struct hwloc_location initiator; 1573 int ret; 1574 // TODO: We should make this more efficient by getting rid of the OS syscall 1575 // 'hwloc_bitmap_alloc' and 'hwloc_get_cpubind' to get affinity and instead 1576 // use th_affin_mask field when it's capable of getting the underlying 1577 // mask implementation. 1578 hwloc_cpuset_t mask = hwloc_bitmap_alloc(); 1579 ret = hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD); 1580 if (ret < 0) { 1581 hwloc_bitmap_free(mask); 1582 return ptr; 1583 } 1584 initiator.type = KMP_HWLOC_LOCATION_TYPE_CPUSET; 1585 initiator.location.cpuset = mask; 1586 ret = hwloc_memattr_get_best_target(__kmp_hwloc_topology, attr, &initiator, 0, 1587 &node, NULL); 1588 if (ret < 0) { 1589 return ptr; 1590 } 1591 return hwloc_alloc_membind(__kmp_hwloc_topology, size, node->nodeset, policy, 1592 HWLOC_MEMBIND_BYNODESET); 1593 #else 1594 return NULL; 1595 #endif 1596 } 1597 1598 void *__kmp_hwloc_membind_policy(omp_memspace_handle_t ms, size_t size, 1599 hwloc_membind_policy_t policy) { 1600 #if HWLOC_API_VERSION >= 0x00020300 1601 void *ptr = NULL; 1602 if (ms == omp_high_bw_mem_space) { 1603 ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_BANDWIDTH, size, policy); 1604 } else if (ms == omp_large_cap_mem_space) { 1605 ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_CAPACITY, size, policy); 1606 } else { 1607 ptr = hwloc_alloc(__kmp_hwloc_topology, size); 1608 } 1609 return ptr; 1610 #else 1611 return NULL; 1612 #endif 1613 } 1614 #endif // KMP_USE_HWLOC 1615 1616 void __kmp_init_target_mem() { 1617 *(void **)(&kmp_target_alloc_host) = KMP_DLSYM("llvm_omp_target_alloc_host"); 1618 *(void **)(&kmp_target_alloc_shared) = 1619 KMP_DLSYM("llvm_omp_target_alloc_shared"); 1620 *(void **)(&kmp_target_alloc_device) = 1621 KMP_DLSYM("llvm_omp_target_alloc_device"); 1622 *(void **)(&kmp_target_free_host) = KMP_DLSYM("llvm_omp_target_free_host"); 1623 *(void **)(&kmp_target_free_shared) = 1624 KMP_DLSYM("llvm_omp_target_free_shared"); 1625 *(void **)(&kmp_target_free_device) = 1626 KMP_DLSYM("llvm_omp_target_free_device"); 1627 __kmp_target_mem_available = 1628 kmp_target_alloc_host && kmp_target_alloc_shared && 1629 kmp_target_alloc_device && kmp_target_free_host && 1630 kmp_target_free_shared && kmp_target_free_device; 1631 // lock/pin and unlock/unpin target calls 1632 *(void **)(&kmp_target_lock_mem) = KMP_DLSYM("llvm_omp_target_lock_mem"); 1633 *(void **)(&kmp_target_unlock_mem) = KMP_DLSYM("llvm_omp_target_unlock_mem"); 1634 __kmp_tgt_allocator.init(); 1635 __kmp_tgt_memspace_list.init(); 1636 } 1637 1638 /// Finalize target memory support 1639 void __kmp_fini_target_mem() { __kmp_tgt_memspace_list.fini(); } 1640 1641 omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms, 1642 int ntraits, 1643 omp_alloctrait_t traits[]) { 1644 kmp_allocator_t *al; 1645 int i; 1646 al = (kmp_allocator_t *)__kmp_allocate(sizeof(kmp_allocator_t)); // zeroed 1647 al->memspace = ms; // not used currently 1648 1649 // Assign default values if applicable 1650 al->alignment = 1; 1651 al->pinned = false; 1652 al->partition = omp_atv_environment; 1653 al->pin_device = -1; 1654 al->preferred_device = -1; 1655 al->target_access = omp_atv_single; 1656 al->atomic_scope = omp_atv_device; 1657 1658 for (i = 0; i < ntraits; ++i) { 1659 switch (traits[i].key) { 1660 case omp_atk_sync_hint: 1661 case omp_atk_access: 1662 break; 1663 case omp_atk_pinned: 1664 al->pinned = true; 1665 break; 1666 case omp_atk_alignment: 1667 __kmp_type_convert(traits[i].value, &(al->alignment)); 1668 KMP_ASSERT(IS_POWER_OF_TWO(al->alignment)); 1669 break; 1670 case omp_atk_pool_size: 1671 al->pool_size = traits[i].value; 1672 break; 1673 case omp_atk_fallback: 1674 al->fb = (omp_alloctrait_value_t)traits[i].value; 1675 KMP_DEBUG_ASSERT( 1676 al->fb == omp_atv_default_mem_fb || al->fb == omp_atv_null_fb || 1677 al->fb == omp_atv_abort_fb || al->fb == omp_atv_allocator_fb); 1678 break; 1679 case omp_atk_fb_data: 1680 al->fb_data = RCAST(kmp_allocator_t *, traits[i].value); 1681 break; 1682 case omp_atk_partition: 1683 #if KMP_USE_HWLOC 1684 al->membind = (omp_alloctrait_value_t)traits[i].value; 1685 KMP_DEBUG_ASSERT(al->membind == omp_atv_environment || 1686 al->membind == omp_atv_nearest || 1687 al->membind == omp_atv_blocked || 1688 al->membind == omp_atv_interleaved); 1689 #endif 1690 al->memkind = RCAST(void **, traits[i].value); 1691 break; 1692 case omp_atk_pin_device: 1693 __kmp_type_convert(traits[i].value, &(al->pin_device)); 1694 break; 1695 case omp_atk_preferred_device: 1696 __kmp_type_convert(traits[i].value, &(al->preferred_device)); 1697 break; 1698 case omp_atk_target_access: 1699 al->target_access = (omp_alloctrait_value_t)traits[i].value; 1700 break; 1701 case omp_atk_atomic_scope: 1702 al->atomic_scope = (omp_alloctrait_value_t)traits[i].value; 1703 break; 1704 case omp_atk_part_size: 1705 __kmp_type_convert(traits[i].value, &(al->part_size)); 1706 break; 1707 default: 1708 KMP_ASSERT2(0, "Unexpected allocator trait"); 1709 } 1710 } 1711 1712 if (al->memspace > kmp_max_mem_space) { 1713 // Memory space has been allocated for targets. 1714 return (omp_allocator_handle_t)al; 1715 } 1716 1717 KMP_DEBUG_ASSERT(KMP_IS_PREDEF_MEM_SPACE(al->memspace)); 1718 1719 if (al->fb == 0) { 1720 // set default allocator 1721 al->fb = omp_atv_default_mem_fb; 1722 al->fb_data = (kmp_allocator_t *)omp_default_mem_alloc; 1723 } else if (al->fb == omp_atv_allocator_fb) { 1724 KMP_ASSERT(al->fb_data != NULL); 1725 } else if (al->fb == omp_atv_default_mem_fb) { 1726 al->fb_data = (kmp_allocator_t *)omp_default_mem_alloc; 1727 } 1728 if (__kmp_memkind_available) { 1729 // Let's use memkind library if available 1730 if (ms == omp_high_bw_mem_space) { 1731 if (al->memkind == (void *)omp_atv_interleaved && mk_hbw_interleave) { 1732 al->memkind = mk_hbw_interleave; 1733 } else if (mk_hbw_preferred) { 1734 // AC: do not try to use MEMKIND_HBW for now, because memkind library 1735 // cannot reliably detect exhaustion of HBW memory. 1736 // It could be possible using hbw_verify_memory_region() but memkind 1737 // manual says: "Using this function in production code may result in 1738 // serious performance penalty". 1739 al->memkind = mk_hbw_preferred; 1740 } else { 1741 // HBW is requested but not available --> return NULL allocator 1742 __kmp_free(al); 1743 return omp_null_allocator; 1744 } 1745 } else if (ms == omp_large_cap_mem_space) { 1746 if (mk_dax_kmem_all) { 1747 // All pmem nodes are visited 1748 al->memkind = mk_dax_kmem_all; 1749 } else if (mk_dax_kmem) { 1750 // Only closest pmem node is visited 1751 al->memkind = mk_dax_kmem; 1752 } else { 1753 __kmp_free(al); 1754 return omp_null_allocator; 1755 } 1756 } else { 1757 if (al->memkind == (void *)omp_atv_interleaved && mk_interleave) { 1758 al->memkind = mk_interleave; 1759 } else { 1760 al->memkind = mk_default; 1761 } 1762 } 1763 } else if (KMP_IS_TARGET_MEM_SPACE(ms) && !__kmp_target_mem_available) { 1764 __kmp_free(al); 1765 return omp_null_allocator; 1766 } else { 1767 if (!__kmp_hwloc_available && 1768 (ms == omp_high_bw_mem_space || ms == omp_large_cap_mem_space)) { 1769 // cannot detect HBW memory presence without memkind library 1770 __kmp_free(al); 1771 return omp_null_allocator; 1772 } 1773 } 1774 return (omp_allocator_handle_t)al; 1775 } 1776 1777 void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t allocator) { 1778 if (allocator > kmp_max_mem_alloc) 1779 __kmp_free(allocator); 1780 } 1781 1782 void __kmpc_set_default_allocator(int gtid, omp_allocator_handle_t allocator) { 1783 if (allocator == omp_null_allocator) 1784 allocator = omp_default_mem_alloc; 1785 __kmp_threads[gtid]->th.th_def_allocator = allocator; 1786 } 1787 1788 omp_allocator_handle_t __kmpc_get_default_allocator(int gtid) { 1789 return __kmp_threads[gtid]->th.th_def_allocator; 1790 } 1791 1792 omp_memspace_handle_t __kmp_get_devices_memspace(int ndevs, const int *devs, 1793 omp_memspace_handle_t memspace, 1794 int host) { 1795 if (!__kmp_init_serial) 1796 __kmp_serial_initialize(); 1797 // Only accept valid device description and predefined memory space 1798 if (ndevs < 0 || (ndevs > 0 && !devs) || memspace > kmp_max_mem_space) 1799 return omp_null_mem_space; 1800 1801 return __kmp_tgt_memspace_list.get_memspace(ndevs, devs, host, memspace); 1802 } 1803 1804 omp_allocator_handle_t 1805 __kmp_get_devices_allocator(int ndevs, const int *devs, 1806 omp_memspace_handle_t memspace, int host) { 1807 if (!__kmp_init_serial) 1808 __kmp_serial_initialize(); 1809 // Only accept valid device description and predefined memory space 1810 if (ndevs < 0 || (ndevs > 0 && !devs) || memspace > kmp_max_mem_space) 1811 return omp_null_allocator; 1812 1813 omp_memspace_handle_t mspace = 1814 __kmp_get_devices_memspace(ndevs, devs, memspace, host); 1815 if (mspace == omp_null_mem_space) 1816 return omp_null_allocator; 1817 1818 return __kmpc_init_allocator(__kmp_entry_gtid(), mspace, 0, NULL); 1819 } 1820 1821 int __kmp_get_memspace_num_resources(omp_memspace_handle_t memspace) { 1822 if (!__kmp_init_serial) 1823 __kmp_serial_initialize(); 1824 if (memspace == omp_null_mem_space) 1825 return 0; 1826 if (memspace < kmp_max_mem_space) 1827 return 1; // return 1 for predefined memory space 1828 kmp_memspace_t *ms = (kmp_memspace_t *)memspace; 1829 return ms->num_resources; 1830 } 1831 1832 omp_memspace_handle_t __kmp_get_submemspace(omp_memspace_handle_t memspace, 1833 int num_resources, int *resources) { 1834 if (!__kmp_init_serial) 1835 __kmp_serial_initialize(); 1836 if (memspace == omp_null_mem_space || memspace < kmp_max_mem_space) 1837 return memspace; // return input memory space for predefined memory space 1838 kmp_memspace_t *ms = (kmp_memspace_t *)memspace; 1839 if (num_resources == 0 || ms->num_resources < num_resources || !resources) 1840 return omp_null_mem_space; // input memory space cannot satisfy the request 1841 1842 // The stored resource ID is an absolute ID only known to the offload backend, 1843 // and the returned memory space will still keep the property. 1844 int *resources_abs = (int *)__kmp_allocate(sizeof(int) * num_resources); 1845 1846 // Collect absolute resource ID from the relative ID 1847 for (int i = 0; i < num_resources; i++) 1848 resources_abs[i] = ms->resources[resources[i]]; 1849 1850 omp_memspace_handle_t submemspace = __kmp_tgt_memspace_list.get_memspace( 1851 num_resources, resources_abs, memspace); 1852 __kmp_free(resources_abs); 1853 1854 return submemspace; 1855 } 1856 1857 typedef struct kmp_mem_desc { // Memory block descriptor 1858 void *ptr_alloc; // Pointer returned by allocator 1859 size_t size_a; // Size of allocated memory block (initial+descriptor+align) 1860 size_t size_orig; // Original size requested 1861 void *ptr_align; // Pointer to aligned memory, returned 1862 kmp_allocator_t *allocator; // allocator 1863 } kmp_mem_desc_t; 1864 constexpr size_t alignment = SizeQuant; 1865 1866 // external interfaces are wrappers over internal implementation 1867 void *__kmpc_alloc(int gtid, size_t size, omp_allocator_handle_t allocator) { 1868 KE_TRACE(25, ("__kmpc_alloc: T#%d (%d, %p)\n", gtid, (int)size, allocator)); 1869 void *ptr = __kmp_alloc(gtid, 0, size, allocator); 1870 KE_TRACE(25, ("__kmpc_alloc returns %p, T#%d\n", ptr, gtid)); 1871 return ptr; 1872 } 1873 1874 void *__kmpc_aligned_alloc(int gtid, size_t algn, size_t size, 1875 omp_allocator_handle_t allocator) { 1876 KE_TRACE(25, ("__kmpc_aligned_alloc: T#%d (%d, %d, %p)\n", gtid, (int)algn, 1877 (int)size, allocator)); 1878 void *ptr = __kmp_alloc(gtid, algn, size, allocator); 1879 KE_TRACE(25, ("__kmpc_aligned_alloc returns %p, T#%d\n", ptr, gtid)); 1880 return ptr; 1881 } 1882 1883 void *__kmpc_calloc(int gtid, size_t nmemb, size_t size, 1884 omp_allocator_handle_t allocator) { 1885 KE_TRACE(25, ("__kmpc_calloc: T#%d (%d, %d, %p)\n", gtid, (int)nmemb, 1886 (int)size, allocator)); 1887 void *ptr = __kmp_calloc(gtid, 0, nmemb, size, allocator); 1888 KE_TRACE(25, ("__kmpc_calloc returns %p, T#%d\n", ptr, gtid)); 1889 return ptr; 1890 } 1891 1892 void *__kmpc_realloc(int gtid, void *ptr, size_t size, 1893 omp_allocator_handle_t allocator, 1894 omp_allocator_handle_t free_allocator) { 1895 KE_TRACE(25, ("__kmpc_realloc: T#%d (%p, %d, %p, %p)\n", gtid, ptr, (int)size, 1896 allocator, free_allocator)); 1897 void *nptr = __kmp_realloc(gtid, ptr, size, allocator, free_allocator); 1898 KE_TRACE(25, ("__kmpc_realloc returns %p, T#%d\n", nptr, gtid)); 1899 return nptr; 1900 } 1901 1902 void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t allocator) { 1903 KE_TRACE(25, ("__kmpc_free: T#%d free(%p,%p)\n", gtid, ptr, allocator)); 1904 ___kmpc_free(gtid, ptr, allocator); 1905 KE_TRACE(10, ("__kmpc_free: T#%d freed %p (%p)\n", gtid, ptr, allocator)); 1906 return; 1907 } 1908 1909 // internal implementation, called from inside the library 1910 void *__kmp_alloc(int gtid, size_t algn, size_t size, 1911 omp_allocator_handle_t allocator) { 1912 void *ptr = NULL; 1913 kmp_allocator_t *al; 1914 KMP_DEBUG_ASSERT(__kmp_init_serial); 1915 if (size == 0) 1916 return NULL; 1917 if (allocator == omp_null_allocator) 1918 allocator = __kmp_threads[gtid]->th.th_def_allocator; 1919 kmp_int32 default_device = 1920 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device; 1921 1922 al = RCAST(kmp_allocator_t *, allocator); 1923 1924 int sz_desc = sizeof(kmp_mem_desc_t); 1925 kmp_mem_desc_t desc; 1926 kmp_uintptr_t addr; // address returned by allocator 1927 kmp_uintptr_t addr_align; // address to return to caller 1928 kmp_uintptr_t addr_descr; // address of memory block descriptor 1929 size_t align = alignment; // default alignment 1930 if (allocator > kmp_max_mem_alloc && al->alignment > align) 1931 align = al->alignment; // alignment required by allocator trait 1932 if (align < algn) 1933 align = algn; // max of allocator trait, parameter and sizeof(void*) 1934 desc.size_orig = size; 1935 desc.size_a = size + sz_desc + align; 1936 bool is_pinned = false; 1937 if (allocator > kmp_max_mem_alloc) 1938 is_pinned = al->pinned; 1939 1940 // Use default allocator if hwloc and libmemkind are not available 1941 int use_default_allocator = 1942 (!__kmp_hwloc_available && !__kmp_memkind_available); 1943 1944 if (al > kmp_max_mem_alloc && al->memspace > kmp_max_mem_space) { 1945 // Memspace has been allocated for targets. 1946 return __kmp_tgt_allocator.omp_alloc(size, allocator); 1947 } 1948 1949 if (KMP_IS_TARGET_MEM_ALLOC(allocator)) { 1950 // Use size input directly as the memory may not be accessible on host. 1951 // Use default device for now. 1952 if (__kmp_target_mem_available) { 1953 kmp_int32 device = 1954 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device; 1955 if (allocator == llvm_omp_target_host_mem_alloc) 1956 ptr = kmp_target_alloc_host(size, device); 1957 else if (allocator == llvm_omp_target_shared_mem_alloc) 1958 ptr = kmp_target_alloc_shared(size, device); 1959 else // allocator == llvm_omp_target_device_mem_alloc 1960 ptr = kmp_target_alloc_device(size, device); 1961 return ptr; 1962 } else { 1963 KMP_INFORM(TargetMemNotAvailable); 1964 } 1965 } 1966 1967 if (allocator >= kmp_max_mem_alloc && KMP_IS_TARGET_MEM_SPACE(al->memspace)) { 1968 if (__kmp_target_mem_available) { 1969 kmp_int32 device = 1970 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device; 1971 if (al->memspace == llvm_omp_target_host_mem_space) 1972 ptr = kmp_target_alloc_host(size, device); 1973 else if (al->memspace == llvm_omp_target_shared_mem_space) 1974 ptr = kmp_target_alloc_shared(size, device); 1975 else // al->memspace == llvm_omp_target_device_mem_space 1976 ptr = kmp_target_alloc_device(size, device); 1977 return ptr; 1978 } else { 1979 KMP_INFORM(TargetMemNotAvailable); 1980 } 1981 } 1982 1983 #if KMP_USE_HWLOC 1984 if (__kmp_hwloc_available) { 1985 if (__kmp_is_hwloc_membind_supported(HWLOC_MEMBIND_BIND)) { 1986 if (allocator < kmp_max_mem_alloc) { 1987 // pre-defined allocator 1988 if (allocator == omp_high_bw_mem_alloc) { 1989 ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_BANDWIDTH, 1990 desc.size_a, HWLOC_MEMBIND_BIND); 1991 if (ptr == NULL) 1992 use_default_allocator = true; 1993 } else if (allocator == omp_large_cap_mem_alloc) { 1994 ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_CAPACITY, 1995 desc.size_a, HWLOC_MEMBIND_BIND); 1996 if (ptr == NULL) 1997 use_default_allocator = true; 1998 } else { 1999 use_default_allocator = true; 2000 } 2001 if (use_default_allocator) { 2002 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a); 2003 } 2004 } else if (al->pool_size > 0) { 2005 // custom allocator with pool size requested 2006 kmp_uint64 used = 2007 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a); 2008 if (used + desc.size_a > al->pool_size) { 2009 // not enough space, need to go fallback path 2010 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a); 2011 if (al->fb == omp_atv_default_mem_fb) { 2012 al = (kmp_allocator_t *)omp_default_mem_alloc; 2013 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a); 2014 } else if (al->fb == omp_atv_abort_fb) { 2015 KMP_ASSERT(0); // abort fallback requested 2016 } else if (al->fb == omp_atv_allocator_fb) { 2017 KMP_ASSERT(al != al->fb_data); 2018 al = al->fb_data; 2019 return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al); 2020 } // else ptr == NULL; 2021 } else { 2022 // pool has enough space 2023 if (al->membind == omp_atv_interleaved) { 2024 if (__kmp_is_hwloc_membind_supported(HWLOC_MEMBIND_INTERLEAVE)) { 2025 ptr = __kmp_hwloc_membind_policy(al->memspace, desc.size_a, 2026 HWLOC_MEMBIND_INTERLEAVE); 2027 } 2028 } else if (al->membind == omp_atv_environment) { 2029 ptr = __kmp_hwloc_membind_policy(al->memspace, desc.size_a, 2030 HWLOC_MEMBIND_DEFAULT); 2031 } else { 2032 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a); 2033 } 2034 if (ptr == NULL) { 2035 if (al->fb == omp_atv_default_mem_fb) { 2036 al = (kmp_allocator_t *)omp_default_mem_alloc; 2037 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a); 2038 } else if (al->fb == omp_atv_abort_fb) { 2039 KMP_ASSERT(0); // abort fallback requested 2040 } else if (al->fb == omp_atv_allocator_fb) { 2041 KMP_ASSERT(al != al->fb_data); 2042 al = al->fb_data; 2043 return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al); 2044 } 2045 } 2046 } 2047 } else { 2048 // custom allocator, pool size not requested 2049 if (al->membind == omp_atv_interleaved) { 2050 if (__kmp_is_hwloc_membind_supported(HWLOC_MEMBIND_INTERLEAVE)) { 2051 ptr = __kmp_hwloc_membind_policy(al->memspace, desc.size_a, 2052 HWLOC_MEMBIND_INTERLEAVE); 2053 } 2054 } else if (al->membind == omp_atv_environment) { 2055 ptr = __kmp_hwloc_membind_policy(al->memspace, desc.size_a, 2056 HWLOC_MEMBIND_DEFAULT); 2057 } else { 2058 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a); 2059 } 2060 if (ptr == NULL) { 2061 if (al->fb == omp_atv_default_mem_fb) { 2062 al = (kmp_allocator_t *)omp_default_mem_alloc; 2063 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a); 2064 } else if (al->fb == omp_atv_abort_fb) { 2065 KMP_ASSERT(0); // abort fallback requested 2066 } else if (al->fb == omp_atv_allocator_fb) { 2067 KMP_ASSERT(al != al->fb_data); 2068 al = al->fb_data; 2069 return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al); 2070 } 2071 } 2072 } 2073 } else { // alloc membind not supported, use hwloc_alloc 2074 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a); 2075 } 2076 } else { 2077 #endif 2078 if (__kmp_memkind_available) { 2079 if (allocator < kmp_max_mem_alloc) { 2080 // pre-defined allocator 2081 if (allocator == omp_high_bw_mem_alloc && mk_hbw_preferred) { 2082 ptr = kmp_mk_alloc(*mk_hbw_preferred, desc.size_a); 2083 } else if (allocator == omp_large_cap_mem_alloc && mk_dax_kmem_all) { 2084 ptr = kmp_mk_alloc(*mk_dax_kmem_all, desc.size_a); 2085 } else { 2086 ptr = kmp_mk_alloc(*mk_default, desc.size_a); 2087 } 2088 } else if (al->pool_size > 0) { 2089 // custom allocator with pool size requested 2090 kmp_uint64 used = 2091 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a); 2092 if (used + desc.size_a > al->pool_size) { 2093 // not enough space, need to go fallback path 2094 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a); 2095 if (al->fb == omp_atv_default_mem_fb) { 2096 al = (kmp_allocator_t *)omp_default_mem_alloc; 2097 ptr = kmp_mk_alloc(*mk_default, desc.size_a); 2098 } else if (al->fb == omp_atv_abort_fb) { 2099 KMP_ASSERT(0); // abort fallback requested 2100 } else if (al->fb == omp_atv_allocator_fb) { 2101 KMP_ASSERT(al != al->fb_data); 2102 al = al->fb_data; 2103 ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al); 2104 if (is_pinned && kmp_target_lock_mem) 2105 kmp_target_lock_mem(ptr, size, default_device); 2106 return ptr; 2107 } // else ptr == NULL; 2108 } else { 2109 // pool has enough space 2110 ptr = kmp_mk_alloc(*al->memkind, desc.size_a); 2111 if (ptr == NULL) { 2112 if (al->fb == omp_atv_default_mem_fb) { 2113 al = (kmp_allocator_t *)omp_default_mem_alloc; 2114 ptr = kmp_mk_alloc(*mk_default, desc.size_a); 2115 } else if (al->fb == omp_atv_abort_fb) { 2116 KMP_ASSERT(0); // abort fallback requested 2117 } else if (al->fb == omp_atv_allocator_fb) { 2118 KMP_ASSERT(al != al->fb_data); 2119 al = al->fb_data; 2120 ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al); 2121 if (is_pinned && kmp_target_lock_mem) 2122 kmp_target_lock_mem(ptr, size, default_device); 2123 return ptr; 2124 } 2125 } 2126 } 2127 } else { 2128 // custom allocator, pool size not requested 2129 ptr = kmp_mk_alloc(*al->memkind, desc.size_a); 2130 if (ptr == NULL) { 2131 if (al->fb == omp_atv_default_mem_fb) { 2132 al = (kmp_allocator_t *)omp_default_mem_alloc; 2133 ptr = kmp_mk_alloc(*mk_default, desc.size_a); 2134 } else if (al->fb == omp_atv_abort_fb) { 2135 KMP_ASSERT(0); // abort fallback requested 2136 } else if (al->fb == omp_atv_allocator_fb) { 2137 KMP_ASSERT(al != al->fb_data); 2138 al = al->fb_data; 2139 ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al); 2140 if (is_pinned && kmp_target_lock_mem) 2141 kmp_target_lock_mem(ptr, size, default_device); 2142 return ptr; 2143 } 2144 } 2145 } 2146 } else if (allocator < kmp_max_mem_alloc) { 2147 // pre-defined allocator 2148 if (allocator == omp_high_bw_mem_alloc) { 2149 KMP_WARNING(OmpNoAllocator, "omp_high_bw_mem_alloc"); 2150 } else if (allocator == omp_large_cap_mem_alloc) { 2151 KMP_WARNING(OmpNoAllocator, "omp_large_cap_mem_alloc"); 2152 } else if (allocator == omp_const_mem_alloc) { 2153 KMP_WARNING(OmpNoAllocator, "omp_const_mem_alloc"); 2154 } else if (allocator == omp_low_lat_mem_alloc) { 2155 KMP_WARNING(OmpNoAllocator, "omp_low_lat_mem_alloc"); 2156 } else if (allocator == omp_cgroup_mem_alloc) { 2157 KMP_WARNING(OmpNoAllocator, "omp_cgroup_mem_alloc"); 2158 } else if (allocator == omp_pteam_mem_alloc) { 2159 KMP_WARNING(OmpNoAllocator, "omp_pteam_mem_alloc"); 2160 } else if (allocator == omp_thread_mem_alloc) { 2161 KMP_WARNING(OmpNoAllocator, "omp_thread_mem_alloc"); 2162 } else { // default allocator requested 2163 use_default_allocator = true; 2164 } 2165 if (use_default_allocator) { 2166 ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a); 2167 use_default_allocator = false; 2168 } 2169 } else if (al->pool_size > 0) { 2170 // custom allocator with pool size requested 2171 kmp_uint64 used = 2172 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a); 2173 if (used + desc.size_a > al->pool_size) { 2174 // not enough space, need to go fallback path 2175 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a); 2176 if (al->fb == omp_atv_default_mem_fb) { 2177 al = (kmp_allocator_t *)omp_default_mem_alloc; 2178 ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a); 2179 } else if (al->fb == omp_atv_abort_fb) { 2180 KMP_ASSERT(0); // abort fallback requested 2181 } else if (al->fb == omp_atv_allocator_fb) { 2182 KMP_ASSERT(al != al->fb_data); 2183 al = al->fb_data; 2184 ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al); 2185 if (is_pinned && kmp_target_lock_mem) 2186 kmp_target_lock_mem(ptr, size, default_device); 2187 return ptr; 2188 } // else ptr == NULL 2189 } else { 2190 // pool has enough space 2191 ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a); 2192 if (ptr == NULL && al->fb == omp_atv_abort_fb) { 2193 KMP_ASSERT(0); // abort fallback requested 2194 } // no sense to look for another fallback because of same internal 2195 // alloc 2196 } 2197 } else { 2198 // custom allocator, pool size not requested 2199 ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a); 2200 if (ptr == NULL && al->fb == omp_atv_abort_fb) { 2201 KMP_ASSERT(0); // abort fallback requested 2202 } // no sense to look for another fallback because of same internal alloc 2203 } 2204 #if KMP_USE_HWLOC 2205 } 2206 #endif 2207 KE_TRACE(10, ("__kmp_alloc: T#%d %p=alloc(%d)\n", gtid, ptr, desc.size_a)); 2208 if (ptr == NULL) 2209 return NULL; 2210 2211 if (is_pinned && kmp_target_lock_mem) 2212 kmp_target_lock_mem(ptr, desc.size_a, default_device); 2213 2214 addr = (kmp_uintptr_t)ptr; 2215 addr_align = (addr + sz_desc + align - 1) & ~(align - 1); 2216 addr_descr = addr_align - sz_desc; 2217 2218 desc.ptr_alloc = ptr; 2219 desc.ptr_align = (void *)addr_align; 2220 desc.allocator = al; 2221 *((kmp_mem_desc_t *)addr_descr) = desc; // save descriptor contents 2222 KMP_MB(); 2223 2224 return desc.ptr_align; 2225 } 2226 2227 void *__kmp_calloc(int gtid, size_t algn, size_t nmemb, size_t size, 2228 omp_allocator_handle_t allocator) { 2229 void *ptr = NULL; 2230 kmp_allocator_t *al; 2231 KMP_DEBUG_ASSERT(__kmp_init_serial); 2232 2233 if (allocator == omp_null_allocator) 2234 allocator = __kmp_threads[gtid]->th.th_def_allocator; 2235 2236 al = RCAST(kmp_allocator_t *, allocator); 2237 2238 if (nmemb == 0 || size == 0) 2239 return ptr; 2240 2241 if ((SIZE_MAX - sizeof(kmp_mem_desc_t)) / size < nmemb) { 2242 if (al->fb == omp_atv_abort_fb) { 2243 KMP_ASSERT(0); 2244 } 2245 return ptr; 2246 } 2247 2248 ptr = __kmp_alloc(gtid, algn, nmemb * size, allocator); 2249 2250 if (ptr) { 2251 memset(ptr, 0x00, nmemb * size); 2252 } 2253 return ptr; 2254 } 2255 2256 void *__kmp_realloc(int gtid, void *ptr, size_t size, 2257 omp_allocator_handle_t allocator, 2258 omp_allocator_handle_t free_allocator) { 2259 void *nptr = NULL; 2260 KMP_DEBUG_ASSERT(__kmp_init_serial); 2261 2262 if (size == 0) { 2263 if (ptr != NULL) 2264 ___kmpc_free(gtid, ptr, free_allocator); 2265 return nptr; 2266 } 2267 2268 nptr = __kmp_alloc(gtid, 0, size, allocator); 2269 2270 if (nptr != NULL && ptr != NULL) { 2271 kmp_mem_desc_t desc; 2272 kmp_uintptr_t addr_align; // address to return to caller 2273 kmp_uintptr_t addr_descr; // address of memory block descriptor 2274 2275 addr_align = (kmp_uintptr_t)ptr; 2276 addr_descr = addr_align - sizeof(kmp_mem_desc_t); 2277 desc = *((kmp_mem_desc_t *)addr_descr); // read descriptor 2278 2279 KMP_DEBUG_ASSERT(desc.ptr_align == ptr); 2280 KMP_DEBUG_ASSERT(desc.size_orig > 0); 2281 KMP_DEBUG_ASSERT(desc.size_orig < desc.size_a); 2282 KMP_MEMCPY((char *)nptr, (char *)ptr, 2283 (size_t)((size < desc.size_orig) ? size : desc.size_orig)); 2284 } 2285 2286 if (nptr != NULL) { 2287 ___kmpc_free(gtid, ptr, free_allocator); 2288 } 2289 2290 return nptr; 2291 } 2292 2293 void ___kmpc_free(int gtid, void *ptr, omp_allocator_handle_t allocator) { 2294 if (ptr == NULL) 2295 return; 2296 2297 kmp_allocator_t *al; 2298 omp_allocator_handle_t oal; 2299 al = RCAST(kmp_allocator_t *, CCAST(omp_allocator_handle_t, allocator)); 2300 kmp_mem_desc_t desc; 2301 kmp_uintptr_t addr_align; // address to return to caller 2302 kmp_uintptr_t addr_descr; // address of memory block descriptor 2303 2304 if (al > kmp_max_mem_alloc && al->memspace > kmp_max_mem_space) { 2305 __kmp_tgt_allocator.omp_free(ptr, allocator); 2306 return; 2307 } 2308 2309 if (__kmp_target_mem_available && (KMP_IS_TARGET_MEM_ALLOC(allocator) || 2310 (allocator > kmp_max_mem_alloc && 2311 KMP_IS_TARGET_MEM_SPACE(al->memspace)))) { 2312 kmp_int32 device = 2313 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device; 2314 if (allocator == llvm_omp_target_host_mem_alloc) { 2315 kmp_target_free_host(ptr, device); 2316 } else if (allocator == llvm_omp_target_shared_mem_alloc) { 2317 kmp_target_free_shared(ptr, device); 2318 } else if (allocator == llvm_omp_target_device_mem_alloc) { 2319 kmp_target_free_device(ptr, device); 2320 } 2321 return; 2322 } 2323 2324 addr_align = (kmp_uintptr_t)ptr; 2325 addr_descr = addr_align - sizeof(kmp_mem_desc_t); 2326 desc = *((kmp_mem_desc_t *)addr_descr); // read descriptor 2327 2328 KMP_DEBUG_ASSERT(desc.ptr_align == ptr); 2329 if (allocator) { 2330 KMP_DEBUG_ASSERT(desc.allocator == al || desc.allocator == al->fb_data); 2331 } 2332 al = desc.allocator; 2333 oal = (omp_allocator_handle_t)al; // cast to void* for comparisons 2334 KMP_DEBUG_ASSERT(al); 2335 2336 if (allocator > kmp_max_mem_alloc && kmp_target_unlock_mem && al->pinned) { 2337 kmp_int32 device = 2338 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device; 2339 kmp_target_unlock_mem(desc.ptr_alloc, device); 2340 } 2341 2342 #if KMP_USE_HWLOC 2343 if (__kmp_hwloc_available) { 2344 if (oal > kmp_max_mem_alloc && al->pool_size > 0) { 2345 kmp_uint64 used = 2346 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a); 2347 (void)used; // to suppress compiler warning 2348 KMP_DEBUG_ASSERT(used >= desc.size_a); 2349 } 2350 hwloc_free(__kmp_hwloc_topology, desc.ptr_alloc, desc.size_a); 2351 } else { 2352 #endif 2353 if (__kmp_memkind_available) { 2354 if (oal < kmp_max_mem_alloc) { 2355 // pre-defined allocator 2356 if (oal == omp_high_bw_mem_alloc && mk_hbw_preferred) { 2357 kmp_mk_free(*mk_hbw_preferred, desc.ptr_alloc); 2358 } else if (oal == omp_large_cap_mem_alloc && mk_dax_kmem_all) { 2359 kmp_mk_free(*mk_dax_kmem_all, desc.ptr_alloc); 2360 } else { 2361 kmp_mk_free(*mk_default, desc.ptr_alloc); 2362 } 2363 } else { 2364 if (al->pool_size > 0) { // custom allocator with pool size requested 2365 kmp_uint64 used = 2366 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a); 2367 (void)used; // to suppress compiler warning 2368 KMP_DEBUG_ASSERT(used >= desc.size_a); 2369 } 2370 kmp_mk_free(*al->memkind, desc.ptr_alloc); 2371 } 2372 } else { 2373 if (oal > kmp_max_mem_alloc && al->pool_size > 0) { 2374 kmp_uint64 used = 2375 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a); 2376 (void)used; // to suppress compiler warning 2377 KMP_DEBUG_ASSERT(used >= desc.size_a); 2378 } 2379 __kmp_thread_free(__kmp_thread_from_gtid(gtid), desc.ptr_alloc); 2380 } 2381 #if KMP_USE_HWLOC 2382 } 2383 #endif 2384 } 2385 2386 /* If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes 2387 memory leaks, but it may be useful for debugging memory corruptions, used 2388 freed pointers, etc. */ 2389 /* #define LEAK_MEMORY */ 2390 struct kmp_mem_descr { // Memory block descriptor. 2391 void *ptr_allocated; // Pointer returned by malloc(), subject for free(). 2392 size_t size_allocated; // Size of allocated memory block. 2393 void *ptr_aligned; // Pointer to aligned memory, to be used by client code. 2394 size_t size_aligned; // Size of aligned memory block. 2395 }; 2396 typedef struct kmp_mem_descr kmp_mem_descr_t; 2397 2398 /* Allocate memory on requested boundary, fill allocated memory with 0x00. 2399 NULL is NEVER returned, __kmp_abort() is called in case of memory allocation 2400 error. Must use __kmp_free when freeing memory allocated by this routine! */ 2401 static void *___kmp_allocate_align(size_t size, 2402 size_t alignment KMP_SRC_LOC_DECL) { 2403 /* __kmp_allocate() allocates (by call to malloc()) bigger memory block than 2404 requested to return properly aligned pointer. Original pointer returned 2405 by malloc() and size of allocated block is saved in descriptor just 2406 before the aligned pointer. This information used by __kmp_free() -- it 2407 has to pass to free() original pointer, not aligned one. 2408 2409 +---------+------------+-----------------------------------+---------+ 2410 | padding | descriptor | aligned block | padding | 2411 +---------+------------+-----------------------------------+---------+ 2412 ^ ^ 2413 | | 2414 | +- Aligned pointer returned to caller 2415 +- Pointer returned by malloc() 2416 2417 Aligned block is filled with zeros, paddings are filled with 0xEF. */ 2418 2419 kmp_mem_descr_t descr; 2420 kmp_uintptr_t addr_allocated; // Address returned by malloc(). 2421 kmp_uintptr_t addr_aligned; // Aligned address to return to caller. 2422 kmp_uintptr_t addr_descr; // Address of memory block descriptor. 2423 2424 KE_TRACE(25, ("-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n", 2425 (int)size, (int)alignment KMP_SRC_LOC_PARM)); 2426 2427 KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too 2428 KMP_DEBUG_ASSERT(sizeof(void *) <= sizeof(kmp_uintptr_t)); 2429 // Make sure kmp_uintptr_t is enough to store addresses. 2430 2431 descr.size_aligned = size; 2432 descr.size_allocated = 2433 descr.size_aligned + sizeof(kmp_mem_descr_t) + alignment; 2434 2435 #if KMP_DEBUG 2436 descr.ptr_allocated = _malloc_src_loc(descr.size_allocated, _file_, _line_); 2437 #else 2438 descr.ptr_allocated = malloc_src_loc(descr.size_allocated KMP_SRC_LOC_PARM); 2439 #endif 2440 KE_TRACE(10, (" malloc( %d ) returned %p\n", (int)descr.size_allocated, 2441 descr.ptr_allocated)); 2442 if (descr.ptr_allocated == NULL) { 2443 KMP_FATAL(OutOfHeapMemory); 2444 } 2445 2446 addr_allocated = (kmp_uintptr_t)descr.ptr_allocated; 2447 addr_aligned = 2448 (addr_allocated + sizeof(kmp_mem_descr_t) + alignment) & ~(alignment - 1); 2449 addr_descr = addr_aligned - sizeof(kmp_mem_descr_t); 2450 2451 descr.ptr_aligned = (void *)addr_aligned; 2452 2453 KE_TRACE(26, (" ___kmp_allocate_align: " 2454 "ptr_allocated=%p, size_allocated=%d, " 2455 "ptr_aligned=%p, size_aligned=%d\n", 2456 descr.ptr_allocated, (int)descr.size_allocated, 2457 descr.ptr_aligned, (int)descr.size_aligned)); 2458 2459 KMP_DEBUG_ASSERT(addr_allocated <= addr_descr); 2460 KMP_DEBUG_ASSERT(addr_descr + sizeof(kmp_mem_descr_t) == addr_aligned); 2461 KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <= 2462 addr_allocated + descr.size_allocated); 2463 KMP_DEBUG_ASSERT(addr_aligned % alignment == 0); 2464 #ifdef KMP_DEBUG 2465 memset(descr.ptr_allocated, 0xEF, descr.size_allocated); 2466 // Fill allocated memory block with 0xEF. 2467 #endif 2468 memset(descr.ptr_aligned, 0x00, descr.size_aligned); 2469 // Fill the aligned memory block (which is intended for using by caller) with 2470 // 0x00. Do not 2471 // put this filling under KMP_DEBUG condition! Many callers expect zeroed 2472 // memory. (Padding 2473 // bytes remain filled with 0xEF in debugging library.) 2474 *((kmp_mem_descr_t *)addr_descr) = descr; 2475 2476 KMP_MB(); 2477 2478 KE_TRACE(25, ("<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned)); 2479 return descr.ptr_aligned; 2480 } // func ___kmp_allocate_align 2481 2482 /* Allocate memory on cache line boundary, fill allocated memory with 0x00. 2483 Do not call this func directly! Use __kmp_allocate macro instead. 2484 NULL is NEVER returned, __kmp_abort() is called in case of memory allocation 2485 error. Must use __kmp_free when freeing memory allocated by this routine! */ 2486 void *___kmp_allocate(size_t size KMP_SRC_LOC_DECL) { 2487 void *ptr; 2488 KE_TRACE(25, ("-> __kmp_allocate( %d ) called from %s:%d\n", 2489 (int)size KMP_SRC_LOC_PARM)); 2490 ptr = ___kmp_allocate_align(size, __kmp_align_alloc KMP_SRC_LOC_PARM); 2491 KE_TRACE(25, ("<- __kmp_allocate() returns %p\n", ptr)); 2492 return ptr; 2493 } // func ___kmp_allocate 2494 2495 /* Allocate memory on page boundary, fill allocated memory with 0x00. 2496 Does not call this func directly! Use __kmp_page_allocate macro instead. 2497 NULL is NEVER returned, __kmp_abort() is called in case of memory allocation 2498 error. Must use __kmp_free when freeing memory allocated by this routine! */ 2499 void *___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL) { 2500 int page_size = 8 * 1024; 2501 void *ptr; 2502 2503 KE_TRACE(25, ("-> __kmp_page_allocate( %d ) called from %s:%d\n", 2504 (int)size KMP_SRC_LOC_PARM)); 2505 ptr = ___kmp_allocate_align(size, page_size KMP_SRC_LOC_PARM); 2506 KE_TRACE(25, ("<- __kmp_page_allocate( %d ) returns %p\n", (int)size, ptr)); 2507 return ptr; 2508 } // ___kmp_page_allocate 2509 2510 /* Free memory allocated by __kmp_allocate() and __kmp_page_allocate(). 2511 In debug mode, fill the memory block with 0xEF before call to free(). */ 2512 void ___kmp_free(void *ptr KMP_SRC_LOC_DECL) { 2513 kmp_mem_descr_t descr; 2514 #if KMP_DEBUG 2515 kmp_uintptr_t addr_allocated; // Address returned by malloc(). 2516 kmp_uintptr_t addr_aligned; // Aligned address passed by caller. 2517 #endif 2518 KE_TRACE(25, 2519 ("-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM)); 2520 KMP_ASSERT(ptr != NULL); 2521 2522 descr = *(kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t)); 2523 2524 KE_TRACE(26, (" __kmp_free: " 2525 "ptr_allocated=%p, size_allocated=%d, " 2526 "ptr_aligned=%p, size_aligned=%d\n", 2527 descr.ptr_allocated, (int)descr.size_allocated, 2528 descr.ptr_aligned, (int)descr.size_aligned)); 2529 #if KMP_DEBUG 2530 addr_allocated = (kmp_uintptr_t)descr.ptr_allocated; 2531 addr_aligned = (kmp_uintptr_t)descr.ptr_aligned; 2532 KMP_DEBUG_ASSERT(addr_aligned % CACHE_LINE == 0); 2533 KMP_DEBUG_ASSERT(descr.ptr_aligned == ptr); 2534 KMP_DEBUG_ASSERT(addr_allocated + sizeof(kmp_mem_descr_t) <= addr_aligned); 2535 KMP_DEBUG_ASSERT(descr.size_aligned < descr.size_allocated); 2536 KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <= 2537 addr_allocated + descr.size_allocated); 2538 memset(descr.ptr_allocated, 0xEF, descr.size_allocated); 2539 // Fill memory block with 0xEF, it helps catch using freed memory. 2540 #endif 2541 2542 #ifndef LEAK_MEMORY 2543 KE_TRACE(10, (" free( %p )\n", descr.ptr_allocated)); 2544 #ifdef KMP_DEBUG 2545 _free_src_loc(descr.ptr_allocated, _file_, _line_); 2546 #else 2547 free_src_loc(descr.ptr_allocated KMP_SRC_LOC_PARM); 2548 #endif 2549 #endif 2550 KMP_MB(); 2551 KE_TRACE(25, ("<- __kmp_free() returns\n")); 2552 } // func ___kmp_free 2553 2554 #if USE_FAST_MEMORY == 3 2555 // Allocate fast memory by first scanning the thread's free lists 2556 // If a chunk the right size exists, grab it off the free list. 2557 // Otherwise allocate normally using kmp_thread_malloc. 2558 2559 // AC: How to choose the limit? Just get 16 for now... 2560 #define KMP_FREE_LIST_LIMIT 16 2561 2562 // Always use 128 bytes for determining buckets for caching memory blocks 2563 #define DCACHE_LINE 128 2564 2565 void *___kmp_fast_allocate(kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL) { 2566 void *ptr; 2567 size_t num_lines, idx; 2568 int index; 2569 void *alloc_ptr; 2570 size_t alloc_size; 2571 kmp_mem_descr_t *descr; 2572 2573 KE_TRACE(25, ("-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n", 2574 __kmp_gtid_from_thread(this_thr), (int)size KMP_SRC_LOC_PARM)); 2575 2576 num_lines = (size + DCACHE_LINE - 1) / DCACHE_LINE; 2577 idx = num_lines - 1; 2578 KMP_DEBUG_ASSERT(idx >= 0); 2579 if (idx < 2) { 2580 index = 0; // idx is [ 0, 1 ], use first free list 2581 num_lines = 2; // 1, 2 cache lines or less than cache line 2582 } else if ((idx >>= 2) == 0) { 2583 index = 1; // idx is [ 2, 3 ], use second free list 2584 num_lines = 4; // 3, 4 cache lines 2585 } else if ((idx >>= 2) == 0) { 2586 index = 2; // idx is [ 4, 15 ], use third free list 2587 num_lines = 16; // 5, 6, ..., 16 cache lines 2588 } else if ((idx >>= 2) == 0) { 2589 index = 3; // idx is [ 16, 63 ], use fourth free list 2590 num_lines = 64; // 17, 18, ..., 64 cache lines 2591 } else { 2592 goto alloc_call; // 65 or more cache lines ( > 8KB ), don't use free lists 2593 } 2594 2595 ptr = this_thr->th.th_free_lists[index].th_free_list_self; 2596 if (ptr != NULL) { 2597 // pop the head of no-sync free list 2598 this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr); 2599 KMP_DEBUG_ASSERT(this_thr == ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr - 2600 sizeof(kmp_mem_descr_t))) 2601 ->ptr_aligned); 2602 goto end; 2603 } 2604 ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync); 2605 if (ptr != NULL) { 2606 // no-sync free list is empty, use sync free list (filled in by other 2607 // threads only) 2608 // pop the head of the sync free list, push NULL instead 2609 while (!KMP_COMPARE_AND_STORE_PTR( 2610 &this_thr->th.th_free_lists[index].th_free_list_sync, ptr, nullptr)) { 2611 KMP_CPU_PAUSE(); 2612 ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync); 2613 } 2614 // push the rest of chain into no-sync free list (can be NULL if there was 2615 // the only block) 2616 this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr); 2617 KMP_DEBUG_ASSERT(this_thr == ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr - 2618 sizeof(kmp_mem_descr_t))) 2619 ->ptr_aligned); 2620 goto end; 2621 } 2622 2623 alloc_call: 2624 // haven't found block in the free lists, thus allocate it 2625 size = num_lines * DCACHE_LINE; 2626 2627 alloc_size = size + sizeof(kmp_mem_descr_t) + DCACHE_LINE; 2628 KE_TRACE(25, ("__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with " 2629 "alloc_size %d\n", 2630 __kmp_gtid_from_thread(this_thr), alloc_size)); 2631 alloc_ptr = bget(this_thr, (bufsize)alloc_size); 2632 2633 // align ptr to DCACHE_LINE 2634 ptr = (void *)((((kmp_uintptr_t)alloc_ptr) + sizeof(kmp_mem_descr_t) + 2635 DCACHE_LINE) & 2636 ~(DCACHE_LINE - 1)); 2637 descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t)); 2638 2639 descr->ptr_allocated = alloc_ptr; // remember allocated pointer 2640 // we don't need size_allocated 2641 descr->ptr_aligned = (void *)this_thr; // remember allocating thread 2642 // (it is already saved in bget buffer, 2643 // but we may want to use another allocator in future) 2644 descr->size_aligned = size; 2645 2646 end: 2647 KE_TRACE(25, ("<- __kmp_fast_allocate( T#%d ) returns %p\n", 2648 __kmp_gtid_from_thread(this_thr), ptr)); 2649 return ptr; 2650 } // func __kmp_fast_allocate 2651 2652 // Free fast memory and place it on the thread's free list if it is of 2653 // the correct size. 2654 void ___kmp_fast_free(kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL) { 2655 kmp_mem_descr_t *descr; 2656 kmp_info_t *alloc_thr; 2657 size_t size; 2658 size_t idx; 2659 int index; 2660 2661 KE_TRACE(25, ("-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n", 2662 __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM)); 2663 KMP_ASSERT(ptr != NULL); 2664 2665 descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t)); 2666 2667 KE_TRACE(26, (" __kmp_fast_free: size_aligned=%d\n", 2668 (int)descr->size_aligned)); 2669 2670 size = descr->size_aligned; // 2, 4, 16, 64, 65, 66, ... cache lines 2671 2672 idx = DCACHE_LINE * 2; // 2 cache lines is minimal size of block 2673 if (idx == size) { 2674 index = 0; // 2 cache lines 2675 } else if ((idx <<= 1) == size) { 2676 index = 1; // 4 cache lines 2677 } else if ((idx <<= 2) == size) { 2678 index = 2; // 16 cache lines 2679 } else if ((idx <<= 2) == size) { 2680 index = 3; // 64 cache lines 2681 } else { 2682 KMP_DEBUG_ASSERT(size > DCACHE_LINE * 64); 2683 goto free_call; // 65 or more cache lines ( > 8KB ) 2684 } 2685 2686 alloc_thr = (kmp_info_t *)descr->ptr_aligned; // get thread owning the block 2687 if (alloc_thr == this_thr) { 2688 // push block to self no-sync free list, linking previous head (LIFO) 2689 *((void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self; 2690 this_thr->th.th_free_lists[index].th_free_list_self = ptr; 2691 } else { 2692 void *head = this_thr->th.th_free_lists[index].th_free_list_other; 2693 if (head == NULL) { 2694 // Create new free list 2695 this_thr->th.th_free_lists[index].th_free_list_other = ptr; 2696 *((void **)ptr) = NULL; // mark the tail of the list 2697 descr->size_allocated = (size_t)1; // head of the list keeps its length 2698 } else { 2699 // need to check existed "other" list's owner thread and size of queue 2700 kmp_mem_descr_t *dsc = 2701 (kmp_mem_descr_t *)((char *)head - sizeof(kmp_mem_descr_t)); 2702 // allocating thread, same for all queue nodes 2703 kmp_info_t *q_th = (kmp_info_t *)(dsc->ptr_aligned); 2704 size_t q_sz = 2705 dsc->size_allocated + 1; // new size in case we add current task 2706 if (q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT) { 2707 // we can add current task to "other" list, no sync needed 2708 *((void **)ptr) = head; 2709 descr->size_allocated = q_sz; 2710 this_thr->th.th_free_lists[index].th_free_list_other = ptr; 2711 } else { 2712 // either queue blocks owner is changing or size limit exceeded 2713 // return old queue to allocating thread (q_th) synchronously, 2714 // and start new list for alloc_thr's tasks 2715 void *old_ptr; 2716 void *tail = head; 2717 void *next = *((void **)head); 2718 while (next != NULL) { 2719 KMP_DEBUG_ASSERT( 2720 // queue size should decrease by 1 each step through the list 2721 ((kmp_mem_descr_t *)((char *)next - sizeof(kmp_mem_descr_t))) 2722 ->size_allocated + 2723 1 == 2724 ((kmp_mem_descr_t *)((char *)tail - sizeof(kmp_mem_descr_t))) 2725 ->size_allocated); 2726 tail = next; // remember tail node 2727 next = *((void **)next); 2728 } 2729 KMP_DEBUG_ASSERT(q_th != NULL); 2730 // push block to owner's sync free list 2731 old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync); 2732 /* the next pointer must be set before setting free_list to ptr to avoid 2733 exposing a broken list to other threads, even for an instant. */ 2734 *((void **)tail) = old_ptr; 2735 2736 while (!KMP_COMPARE_AND_STORE_PTR( 2737 &q_th->th.th_free_lists[index].th_free_list_sync, old_ptr, head)) { 2738 KMP_CPU_PAUSE(); 2739 old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync); 2740 *((void **)tail) = old_ptr; 2741 } 2742 2743 // start new list of not-selt tasks 2744 this_thr->th.th_free_lists[index].th_free_list_other = ptr; 2745 *((void **)ptr) = NULL; 2746 descr->size_allocated = (size_t)1; // head of queue keeps its length 2747 } 2748 } 2749 } 2750 goto end; 2751 2752 free_call: 2753 KE_TRACE(25, ("__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n", 2754 __kmp_gtid_from_thread(this_thr), size)); 2755 __kmp_bget_dequeue(this_thr); /* Release any queued buffers */ 2756 brel(this_thr, descr->ptr_allocated); 2757 2758 end: 2759 KE_TRACE(25, ("<- __kmp_fast_free() returns\n")); 2760 2761 } // func __kmp_fast_free 2762 2763 // Initialize the thread free lists related to fast memory 2764 // Only do this when a thread is initially created. 2765 void __kmp_initialize_fast_memory(kmp_info_t *this_thr) { 2766 KE_TRACE(10, ("__kmp_initialize_fast_memory: Called from th %p\n", this_thr)); 2767 2768 memset(this_thr->th.th_free_lists, 0, NUM_LISTS * sizeof(kmp_free_list_t)); 2769 } 2770 2771 // Free the memory in the thread free lists related to fast memory 2772 // Only do this when a thread is being reaped (destroyed). 2773 void __kmp_free_fast_memory(kmp_info_t *th) { 2774 // Suppose we use BGET underlying allocator, walk through its structures... 2775 int bin; 2776 thr_data_t *thr = get_thr_data(th); 2777 void **lst = NULL; 2778 2779 KE_TRACE( 2780 5, ("__kmp_free_fast_memory: Called T#%d\n", __kmp_gtid_from_thread(th))); 2781 2782 __kmp_bget_dequeue(th); // Release any queued buffers 2783 2784 // Dig through free lists and extract all allocated blocks 2785 for (bin = 0; bin < MAX_BGET_BINS; ++bin) { 2786 bfhead_t *b = thr->freelist[bin].ql.flink; 2787 while (b != &thr->freelist[bin]) { 2788 if ((kmp_uintptr_t)b->bh.bb.bthr & 1) { // the buffer is allocated address 2789 *((void **)b) = 2790 lst; // link the list (override bthr, but keep flink yet) 2791 lst = (void **)b; // push b into lst 2792 } 2793 b = b->ql.flink; // get next buffer 2794 } 2795 } 2796 while (lst != NULL) { 2797 void *next = *lst; 2798 KE_TRACE(10, ("__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n", 2799 lst, next, th, __kmp_gtid_from_thread(th))); 2800 (*thr->relfcn)(lst); 2801 #if BufStats 2802 // count blocks to prevent problems in __kmp_finalize_bget() 2803 thr->numprel++; /* Nr of expansion block releases */ 2804 thr->numpblk--; /* Total number of blocks */ 2805 #endif 2806 lst = (void **)next; 2807 } 2808 2809 KE_TRACE( 2810 5, ("__kmp_free_fast_memory: Freed T#%d\n", __kmp_gtid_from_thread(th))); 2811 } 2812 2813 #endif // USE_FAST_MEMORY 2814