1 /* 2 * kmp_threadprivate.cpp -- OpenMP threadprivate support library 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "kmp.h" 14 #include "kmp_i18n.h" 15 #include "kmp_itt.h" 16 17 #define USE_CHECKS_COMMON 18 19 #define KMP_INLINE_SUBR 1 20 21 void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr, 22 void *data_addr, size_t pc_size); 23 struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr, 24 void *data_addr, 25 size_t pc_size); 26 27 struct shared_table __kmp_threadprivate_d_table; 28 29 static 30 #ifdef KMP_INLINE_SUBR 31 __forceinline 32 #endif 33 struct private_common * 34 __kmp_threadprivate_find_task_common(struct common_table *tbl, int gtid, 35 void *pc_addr) 36 37 { 38 struct private_common *tn; 39 40 #ifdef KMP_TASK_COMMON_DEBUG 41 KC_TRACE(10, ("__kmp_threadprivate_find_task_common: thread#%d, called with " 42 "address %p\n", 43 gtid, pc_addr)); 44 dump_list(); 45 #endif 46 47 for (tn = tbl->data[KMP_HASH(pc_addr)]; tn; tn = tn->next) { 48 if (tn->gbl_addr == pc_addr) { 49 #ifdef KMP_TASK_COMMON_DEBUG 50 KC_TRACE(10, ("__kmp_threadprivate_find_task_common: thread#%d, found " 51 "node %p on list\n", 52 gtid, pc_addr)); 53 #endif 54 return tn; 55 } 56 } 57 return 0; 58 } 59 60 static 61 #ifdef KMP_INLINE_SUBR 62 __forceinline 63 #endif 64 struct shared_common * 65 __kmp_find_shared_task_common(struct shared_table *tbl, int gtid, 66 void *pc_addr) { 67 struct shared_common *tn; 68 69 for (tn = tbl->data[KMP_HASH(pc_addr)]; tn; tn = tn->next) { 70 if (tn->gbl_addr == pc_addr) { 71 #ifdef KMP_TASK_COMMON_DEBUG 72 KC_TRACE( 73 10, 74 ("__kmp_find_shared_task_common: thread#%d, found node %p on list\n", 75 gtid, pc_addr)); 76 #endif 77 return tn; 78 } 79 } 80 return 0; 81 } 82 83 // Create a template for the data initialized storage. Either the template is 84 // NULL indicating zero fill, or the template is a copy of the original data. 85 static struct private_data *__kmp_init_common_data(void *pc_addr, 86 size_t pc_size) { 87 struct private_data *d; 88 size_t i; 89 char *p; 90 91 d = (struct private_data *)__kmp_allocate(sizeof(struct private_data)); 92 /* 93 d->data = 0; // AC: commented out because __kmp_allocate zeroes the 94 memory 95 d->next = 0; 96 */ 97 d->size = pc_size; 98 d->more = 1; 99 100 p = (char *)pc_addr; 101 102 for (i = pc_size; i > 0; --i) { 103 if (*p++ != '\0') { 104 d->data = __kmp_allocate(pc_size); 105 KMP_MEMCPY(d->data, pc_addr, pc_size); 106 break; 107 } 108 } 109 110 return d; 111 } 112 113 // Initialize the data area from the template. 114 static void __kmp_copy_common_data(void *pc_addr, struct private_data *d) { 115 char *addr = (char *)pc_addr; 116 117 for (size_t offset = 0; d != 0; d = d->next) { 118 for (int i = d->more; i > 0; --i) { 119 if (d->data == 0) 120 memset(&addr[offset], '\0', d->size); 121 else 122 KMP_MEMCPY(&addr[offset], d->data, d->size); 123 offset += d->size; 124 } 125 } 126 } 127 128 /* we are called from __kmp_serial_initialize() with __kmp_initz_lock held. */ 129 void __kmp_common_initialize(void) { 130 if (!TCR_4(__kmp_init_common)) { 131 int q; 132 #ifdef KMP_DEBUG 133 int gtid; 134 #endif 135 136 __kmp_threadpriv_cache_list = NULL; 137 138 #ifdef KMP_DEBUG 139 /* verify the uber masters were initialized */ 140 for (gtid = 0; gtid < __kmp_threads_capacity; gtid++) 141 if (__kmp_root[gtid]) { 142 KMP_DEBUG_ASSERT(__kmp_root[gtid]->r.r_uber_thread); 143 for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) 144 KMP_DEBUG_ASSERT( 145 !__kmp_root[gtid]->r.r_uber_thread->th.th_pri_common->data[q]); 146 /* __kmp_root[ gitd ]-> r.r_uber_thread -> 147 * th.th_pri_common -> data[ q ] = 0;*/ 148 } 149 #endif /* KMP_DEBUG */ 150 151 for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) 152 __kmp_threadprivate_d_table.data[q] = 0; 153 154 TCW_4(__kmp_init_common, TRUE); 155 } 156 } 157 158 /* Call all destructors for threadprivate data belonging to all threads. 159 Currently unused! */ 160 void __kmp_common_destroy(void) { 161 if (TCR_4(__kmp_init_common)) { 162 int q; 163 164 TCW_4(__kmp_init_common, FALSE); 165 166 for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) { 167 int gtid; 168 struct private_common *tn; 169 struct shared_common *d_tn; 170 171 /* C++ destructors need to be called once per thread before exiting. 172 Don't call destructors for master thread though unless we used copy 173 constructor */ 174 175 for (d_tn = __kmp_threadprivate_d_table.data[q]; d_tn; 176 d_tn = d_tn->next) { 177 if (d_tn->is_vec) { 178 if (d_tn->dt.dtorv != 0) { 179 for (gtid = 0; gtid < __kmp_all_nth; ++gtid) { 180 if (__kmp_threads[gtid]) { 181 if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid)) 182 : (!KMP_UBER_GTID(gtid))) { 183 tn = __kmp_threadprivate_find_task_common( 184 __kmp_threads[gtid]->th.th_pri_common, gtid, 185 d_tn->gbl_addr); 186 if (tn) { 187 (*d_tn->dt.dtorv)(tn->par_addr, d_tn->vec_len); 188 } 189 } 190 } 191 } 192 if (d_tn->obj_init != 0) { 193 (*d_tn->dt.dtorv)(d_tn->obj_init, d_tn->vec_len); 194 } 195 } 196 } else { 197 if (d_tn->dt.dtor != 0) { 198 for (gtid = 0; gtid < __kmp_all_nth; ++gtid) { 199 if (__kmp_threads[gtid]) { 200 if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid)) 201 : (!KMP_UBER_GTID(gtid))) { 202 tn = __kmp_threadprivate_find_task_common( 203 __kmp_threads[gtid]->th.th_pri_common, gtid, 204 d_tn->gbl_addr); 205 if (tn) { 206 (*d_tn->dt.dtor)(tn->par_addr); 207 } 208 } 209 } 210 } 211 if (d_tn->obj_init != 0) { 212 (*d_tn->dt.dtor)(d_tn->obj_init); 213 } 214 } 215 } 216 } 217 __kmp_threadprivate_d_table.data[q] = 0; 218 } 219 } 220 } 221 222 /* Call all destructors for threadprivate data belonging to this thread */ 223 void __kmp_common_destroy_gtid(int gtid) { 224 struct private_common *tn; 225 struct shared_common *d_tn; 226 227 if (!TCR_4(__kmp_init_gtid)) { 228 // This is possible when one of multiple roots initiates early library 229 // termination in a sequential region while other teams are active, and its 230 // child threads are about to end. 231 return; 232 } 233 234 KC_TRACE(10, ("__kmp_common_destroy_gtid: T#%d called\n", gtid)); 235 if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid)) : (!KMP_UBER_GTID(gtid))) { 236 237 if (TCR_4(__kmp_init_common)) { 238 239 /* Cannot do this here since not all threads have destroyed their data */ 240 /* TCW_4(__kmp_init_common, FALSE); */ 241 242 for (tn = __kmp_threads[gtid]->th.th_pri_head; tn; tn = tn->link) { 243 244 d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, gtid, 245 tn->gbl_addr); 246 if (d_tn == NULL) 247 continue; 248 if (d_tn->is_vec) { 249 if (d_tn->dt.dtorv != 0) { 250 (void)(*d_tn->dt.dtorv)(tn->par_addr, d_tn->vec_len); 251 } 252 if (d_tn->obj_init != 0) { 253 (void)(*d_tn->dt.dtorv)(d_tn->obj_init, d_tn->vec_len); 254 } 255 } else { 256 if (d_tn->dt.dtor != 0) { 257 (void)(*d_tn->dt.dtor)(tn->par_addr); 258 } 259 if (d_tn->obj_init != 0) { 260 (void)(*d_tn->dt.dtor)(d_tn->obj_init); 261 } 262 } 263 } 264 KC_TRACE(30, ("__kmp_common_destroy_gtid: T#%d threadprivate destructors " 265 "complete\n", 266 gtid)); 267 } 268 } 269 } 270 271 #ifdef KMP_TASK_COMMON_DEBUG 272 static void dump_list(void) { 273 int p, q; 274 275 for (p = 0; p < __kmp_all_nth; ++p) { 276 if (!__kmp_threads[p]) 277 continue; 278 for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) { 279 if (__kmp_threads[p]->th.th_pri_common->data[q]) { 280 struct private_common *tn; 281 282 KC_TRACE(10, ("\tdump_list: gtid:%d addresses\n", p)); 283 284 for (tn = __kmp_threads[p]->th.th_pri_common->data[q]; tn; 285 tn = tn->next) { 286 KC_TRACE(10, 287 ("\tdump_list: THREADPRIVATE: Serial %p -> Parallel %p\n", 288 tn->gbl_addr, tn->par_addr)); 289 } 290 } 291 } 292 } 293 } 294 #endif /* KMP_TASK_COMMON_DEBUG */ 295 296 // NOTE: this routine is to be called only from the serial part of the program. 297 void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr, 298 void *data_addr, size_t pc_size) { 299 struct shared_common **lnk_tn, *d_tn; 300 KMP_DEBUG_ASSERT(__kmp_threads[gtid] && 301 __kmp_threads[gtid]->th.th_root->r.r_active == 0); 302 303 d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, gtid, 304 pc_addr); 305 306 if (d_tn == 0) { 307 d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common)); 308 309 d_tn->gbl_addr = pc_addr; 310 d_tn->pod_init = __kmp_init_common_data(data_addr, pc_size); 311 /* 312 d_tn->obj_init = 0; // AC: commented out because __kmp_allocate 313 zeroes the memory 314 d_tn->ct.ctor = 0; 315 d_tn->cct.cctor = 0;; 316 d_tn->dt.dtor = 0; 317 d_tn->is_vec = FALSE; 318 d_tn->vec_len = 0L; 319 */ 320 d_tn->cmn_size = pc_size; 321 322 __kmp_acquire_lock(&__kmp_global_lock, gtid); 323 324 lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(pc_addr)]); 325 326 d_tn->next = *lnk_tn; 327 *lnk_tn = d_tn; 328 329 __kmp_release_lock(&__kmp_global_lock, gtid); 330 } 331 } 332 333 struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr, 334 void *data_addr, 335 size_t pc_size) { 336 struct private_common *tn, **tt; 337 struct shared_common *d_tn; 338 339 /* +++++++++ START OF CRITICAL SECTION +++++++++ */ 340 __kmp_acquire_lock(&__kmp_global_lock, gtid); 341 342 tn = (struct private_common *)__kmp_allocate(sizeof(struct private_common)); 343 344 tn->gbl_addr = pc_addr; 345 346 d_tn = __kmp_find_shared_task_common( 347 &__kmp_threadprivate_d_table, gtid, 348 pc_addr); /* Only the MASTER data table exists. */ 349 350 if (d_tn != 0) { 351 /* This threadprivate variable has already been seen. */ 352 353 if (d_tn->pod_init == 0 && d_tn->obj_init == 0) { 354 d_tn->cmn_size = pc_size; 355 356 if (d_tn->is_vec) { 357 if (d_tn->ct.ctorv != 0) { 358 /* Construct from scratch so no prototype exists */ 359 d_tn->obj_init = 0; 360 } else if (d_tn->cct.cctorv != 0) { 361 /* Now data initialize the prototype since it was previously 362 * registered */ 363 d_tn->obj_init = (void *)__kmp_allocate(d_tn->cmn_size); 364 (void)(*d_tn->cct.cctorv)(d_tn->obj_init, pc_addr, d_tn->vec_len); 365 } else { 366 d_tn->pod_init = __kmp_init_common_data(data_addr, d_tn->cmn_size); 367 } 368 } else { 369 if (d_tn->ct.ctor != 0) { 370 /* Construct from scratch so no prototype exists */ 371 d_tn->obj_init = 0; 372 } else if (d_tn->cct.cctor != 0) { 373 /* Now data initialize the prototype since it was previously 374 registered */ 375 d_tn->obj_init = (void *)__kmp_allocate(d_tn->cmn_size); 376 (void)(*d_tn->cct.cctor)(d_tn->obj_init, pc_addr); 377 } else { 378 d_tn->pod_init = __kmp_init_common_data(data_addr, d_tn->cmn_size); 379 } 380 } 381 } 382 } else { 383 struct shared_common **lnk_tn; 384 385 d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common)); 386 d_tn->gbl_addr = pc_addr; 387 d_tn->cmn_size = pc_size; 388 d_tn->pod_init = __kmp_init_common_data(data_addr, pc_size); 389 /* 390 d_tn->obj_init = 0; // AC: commented out because __kmp_allocate 391 zeroes the memory 392 d_tn->ct.ctor = 0; 393 d_tn->cct.cctor = 0; 394 d_tn->dt.dtor = 0; 395 d_tn->is_vec = FALSE; 396 d_tn->vec_len = 0L; 397 */ 398 lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(pc_addr)]); 399 400 d_tn->next = *lnk_tn; 401 *lnk_tn = d_tn; 402 } 403 404 tn->cmn_size = d_tn->cmn_size; 405 406 if ((__kmp_foreign_tp) ? (KMP_INITIAL_GTID(gtid)) : (KMP_UBER_GTID(gtid))) { 407 tn->par_addr = (void *)pc_addr; 408 } else { 409 tn->par_addr = (void *)__kmp_allocate(tn->cmn_size); 410 } 411 412 __kmp_release_lock(&__kmp_global_lock, gtid); 413 /* +++++++++ END OF CRITICAL SECTION +++++++++ */ 414 415 #ifdef USE_CHECKS_COMMON 416 if (pc_size > d_tn->cmn_size) { 417 KC_TRACE( 418 10, ("__kmp_threadprivate_insert: THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC 419 " ,%" KMP_UINTPTR_SPEC ")\n", 420 pc_addr, pc_size, d_tn->cmn_size)); 421 KMP_FATAL(TPCommonBlocksInconsist); 422 } 423 #endif /* USE_CHECKS_COMMON */ 424 425 tt = &(__kmp_threads[gtid]->th.th_pri_common->data[KMP_HASH(pc_addr)]); 426 427 #ifdef KMP_TASK_COMMON_DEBUG 428 if (*tt != 0) { 429 KC_TRACE( 430 10, 431 ("__kmp_threadprivate_insert: WARNING! thread#%d: collision on %p\n", 432 gtid, pc_addr)); 433 } 434 #endif 435 tn->next = *tt; 436 *tt = tn; 437 438 #ifdef KMP_TASK_COMMON_DEBUG 439 KC_TRACE(10, 440 ("__kmp_threadprivate_insert: thread#%d, inserted node %p on list\n", 441 gtid, pc_addr)); 442 dump_list(); 443 #endif 444 445 /* Link the node into a simple list */ 446 447 tn->link = __kmp_threads[gtid]->th.th_pri_head; 448 __kmp_threads[gtid]->th.th_pri_head = tn; 449 450 if ((__kmp_foreign_tp) ? (KMP_INITIAL_GTID(gtid)) : (KMP_UBER_GTID(gtid))) 451 return tn; 452 453 /* if C++ object with copy constructor, use it; 454 * else if C++ object with constructor, use it for the non-master copies only; 455 * else use pod_init and memcpy 456 * 457 * C++ constructors need to be called once for each non-master thread on 458 * allocate 459 * C++ copy constructors need to be called once for each thread on allocate */ 460 461 /* C++ object with constructors/destructors; don't call constructors for 462 master thread though */ 463 if (d_tn->is_vec) { 464 if (d_tn->ct.ctorv != 0) { 465 (void)(*d_tn->ct.ctorv)(tn->par_addr, d_tn->vec_len); 466 } else if (d_tn->cct.cctorv != 0) { 467 (void)(*d_tn->cct.cctorv)(tn->par_addr, d_tn->obj_init, d_tn->vec_len); 468 } else if (tn->par_addr != tn->gbl_addr) { 469 __kmp_copy_common_data(tn->par_addr, d_tn->pod_init); 470 } 471 } else { 472 if (d_tn->ct.ctor != 0) { 473 (void)(*d_tn->ct.ctor)(tn->par_addr); 474 } else if (d_tn->cct.cctor != 0) { 475 (void)(*d_tn->cct.cctor)(tn->par_addr, d_tn->obj_init); 476 } else if (tn->par_addr != tn->gbl_addr) { 477 __kmp_copy_common_data(tn->par_addr, d_tn->pod_init); 478 } 479 } 480 /* !BUILD_OPENMP_C 481 if (tn->par_addr != tn->gbl_addr) 482 __kmp_copy_common_data( tn->par_addr, d_tn->pod_init ); */ 483 484 return tn; 485 } 486 487 /* ------------------------------------------------------------------------ */ 488 /* We are currently parallel, and we know the thread id. */ 489 /* ------------------------------------------------------------------------ */ 490 491 /*! 492 @ingroup THREADPRIVATE 493 494 @param loc source location information 495 @param data pointer to data being privatized 496 @param ctor pointer to constructor function for data 497 @param cctor pointer to copy constructor function for data 498 @param dtor pointer to destructor function for data 499 500 Register constructors and destructors for thread private data. 501 This function is called when executing in parallel, when we know the thread id. 502 */ 503 void __kmpc_threadprivate_register(ident_t *loc, void *data, kmpc_ctor ctor, 504 kmpc_cctor cctor, kmpc_dtor dtor) { 505 struct shared_common *d_tn, **lnk_tn; 506 507 KC_TRACE(10, ("__kmpc_threadprivate_register: called\n")); 508 509 #ifdef USE_CHECKS_COMMON 510 /* copy constructor must be zero for current code gen (Nov 2002 - jph) */ 511 KMP_ASSERT(cctor == 0); 512 #endif /* USE_CHECKS_COMMON */ 513 514 /* Only the global data table exists. */ 515 d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, -1, data); 516 517 if (d_tn == 0) { 518 d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common)); 519 d_tn->gbl_addr = data; 520 521 d_tn->ct.ctor = ctor; 522 d_tn->cct.cctor = cctor; 523 d_tn->dt.dtor = dtor; 524 /* 525 d_tn->is_vec = FALSE; // AC: commented out because __kmp_allocate 526 zeroes the memory 527 d_tn->vec_len = 0L; 528 d_tn->obj_init = 0; 529 d_tn->pod_init = 0; 530 */ 531 lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(data)]); 532 533 d_tn->next = *lnk_tn; 534 *lnk_tn = d_tn; 535 } 536 } 537 538 void *__kmpc_threadprivate(ident_t *loc, kmp_int32 global_tid, void *data, 539 size_t size) { 540 void *ret; 541 struct private_common *tn; 542 543 KC_TRACE(10, ("__kmpc_threadprivate: T#%d called\n", global_tid)); 544 545 #ifdef USE_CHECKS_COMMON 546 if (!__kmp_init_serial) 547 KMP_FATAL(RTLNotInitialized); 548 #endif /* USE_CHECKS_COMMON */ 549 550 if (!__kmp_threads[global_tid]->th.th_root->r.r_active && !__kmp_foreign_tp) { 551 /* The parallel address will NEVER overlap with the data_address */ 552 /* dkp: 3rd arg to kmp_threadprivate_insert_private_data() is the 553 * data_address; use data_address = data */ 554 555 KC_TRACE(20, ("__kmpc_threadprivate: T#%d inserting private data\n", 556 global_tid)); 557 kmp_threadprivate_insert_private_data(global_tid, data, data, size); 558 559 ret = data; 560 } else { 561 KC_TRACE( 562 50, 563 ("__kmpc_threadprivate: T#%d try to find private data at address %p\n", 564 global_tid, data)); 565 tn = __kmp_threadprivate_find_task_common( 566 __kmp_threads[global_tid]->th.th_pri_common, global_tid, data); 567 568 if (tn) { 569 KC_TRACE(20, ("__kmpc_threadprivate: T#%d found data\n", global_tid)); 570 #ifdef USE_CHECKS_COMMON 571 if ((size_t)size > tn->cmn_size) { 572 KC_TRACE(10, ("THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC 573 " ,%" KMP_UINTPTR_SPEC ")\n", 574 data, size, tn->cmn_size)); 575 KMP_FATAL(TPCommonBlocksInconsist); 576 } 577 #endif /* USE_CHECKS_COMMON */ 578 } else { 579 /* The parallel address will NEVER overlap with the data_address */ 580 /* dkp: 3rd arg to kmp_threadprivate_insert() is the data_address; use 581 * data_address = data */ 582 KC_TRACE(20, ("__kmpc_threadprivate: T#%d inserting data\n", global_tid)); 583 tn = kmp_threadprivate_insert(global_tid, data, data, size); 584 } 585 586 ret = tn->par_addr; 587 } 588 KC_TRACE(10, ("__kmpc_threadprivate: T#%d exiting; return value = %p\n", 589 global_tid, ret)); 590 591 return ret; 592 } 593 594 static kmp_cached_addr_t *__kmp_find_cache(void *data) { 595 kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list; 596 while (ptr && ptr->data != data) 597 ptr = ptr->next; 598 return ptr; 599 } 600 601 /*! 602 @ingroup THREADPRIVATE 603 @param loc source location information 604 @param global_tid global thread number 605 @param data pointer to data to privatize 606 @param size size of data to privatize 607 @param cache pointer to cache 608 @return pointer to private storage 609 610 Allocate private storage for threadprivate data. 611 */ 612 void * 613 __kmpc_threadprivate_cached(ident_t *loc, 614 kmp_int32 global_tid, // gtid. 615 void *data, // Pointer to original global variable. 616 size_t size, // Size of original global variable. 617 void ***cache) { 618 KC_TRACE(10, ("__kmpc_threadprivate_cached: T#%d called with cache: %p, " 619 "address: %p, size: %" KMP_SIZE_T_SPEC "\n", 620 global_tid, *cache, data, size)); 621 622 if (TCR_PTR(*cache) == 0) { 623 __kmp_acquire_lock(&__kmp_global_lock, global_tid); 624 625 if (TCR_PTR(*cache) == 0) { 626 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock); 627 // Compiler often passes in NULL cache, even if it's already been created 628 void **my_cache; 629 kmp_cached_addr_t *tp_cache_addr; 630 // Look for an existing cache 631 tp_cache_addr = __kmp_find_cache(data); 632 if (!tp_cache_addr) { // Cache was never created; do it now 633 __kmp_tp_cached = 1; 634 KMP_ITT_IGNORE(my_cache = (void **)__kmp_allocate( 635 sizeof(void *) * __kmp_tp_capacity + 636 sizeof(kmp_cached_addr_t));); 637 // No need to zero the allocated memory; __kmp_allocate does that. 638 KC_TRACE(50, ("__kmpc_threadprivate_cached: T#%d allocated cache at " 639 "address %p\n", 640 global_tid, my_cache)); 641 /* TODO: free all this memory in __kmp_common_destroy using 642 * __kmp_threadpriv_cache_list */ 643 /* Add address of mycache to linked list for cleanup later */ 644 tp_cache_addr = (kmp_cached_addr_t *)&my_cache[__kmp_tp_capacity]; 645 tp_cache_addr->addr = my_cache; 646 tp_cache_addr->data = data; 647 tp_cache_addr->compiler_cache = cache; 648 tp_cache_addr->next = __kmp_threadpriv_cache_list; 649 __kmp_threadpriv_cache_list = tp_cache_addr; 650 } else { // A cache was already created; use it 651 my_cache = tp_cache_addr->addr; 652 tp_cache_addr->compiler_cache = cache; 653 } 654 KMP_MB(); 655 656 TCW_PTR(*cache, my_cache); 657 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock); 658 659 KMP_MB(); 660 } 661 __kmp_release_lock(&__kmp_global_lock, global_tid); 662 } 663 664 void *ret; 665 if ((ret = TCR_PTR((*cache)[global_tid])) == 0) { 666 ret = __kmpc_threadprivate(loc, global_tid, data, (size_t)size); 667 668 TCW_PTR((*cache)[global_tid], ret); 669 } 670 KC_TRACE(10, 671 ("__kmpc_threadprivate_cached: T#%d exiting; return value = %p\n", 672 global_tid, ret)); 673 return ret; 674 } 675 676 // This function should only be called when both __kmp_tp_cached_lock and 677 // kmp_forkjoin_lock are held. 678 void __kmp_threadprivate_resize_cache(int newCapacity) { 679 KC_TRACE(10, ("__kmp_threadprivate_resize_cache: called with size: %d\n", 680 newCapacity)); 681 682 kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list; 683 684 while (ptr) { 685 if (ptr->data) { // this location has an active cache; resize it 686 void **my_cache; 687 KMP_ITT_IGNORE(my_cache = 688 (void **)__kmp_allocate(sizeof(void *) * newCapacity + 689 sizeof(kmp_cached_addr_t));); 690 // No need to zero the allocated memory; __kmp_allocate does that. 691 KC_TRACE(50, ("__kmp_threadprivate_resize_cache: allocated cache at %p\n", 692 my_cache)); 693 // Now copy old cache into new cache 694 void **old_cache = ptr->addr; 695 for (int i = 0; i < __kmp_tp_capacity; ++i) { 696 my_cache[i] = old_cache[i]; 697 } 698 699 // Add address of new my_cache to linked list for cleanup later 700 kmp_cached_addr_t *tp_cache_addr; 701 tp_cache_addr = (kmp_cached_addr_t *)&my_cache[newCapacity]; 702 tp_cache_addr->addr = my_cache; 703 tp_cache_addr->data = ptr->data; 704 tp_cache_addr->compiler_cache = ptr->compiler_cache; 705 tp_cache_addr->next = __kmp_threadpriv_cache_list; 706 __kmp_threadpriv_cache_list = tp_cache_addr; 707 708 // Copy new cache to compiler's location: We can copy directly 709 // to (*compiler_cache) if compiler guarantees it will keep 710 // using the same location for the cache. This is not yet true 711 // for some compilers, in which case we have to check if 712 // compiler_cache is still pointing at old cache, and if so, we 713 // can point it at the new cache with an atomic compare&swap 714 // operation. (Old method will always work, but we should shift 715 // to new method (commented line below) when Intel and Clang 716 // compilers use new method.) 717 (void)KMP_COMPARE_AND_STORE_PTR(tp_cache_addr->compiler_cache, old_cache, 718 my_cache); 719 // TCW_PTR(*(tp_cache_addr->compiler_cache), my_cache); 720 721 // If the store doesn't happen here, the compiler's old behavior will 722 // inevitably call __kmpc_threadprivate_cache with a new location for the 723 // cache, and that function will store the resized cache there at that 724 // point. 725 726 // Nullify old cache's data pointer so we skip it next time 727 ptr->data = NULL; 728 } 729 ptr = ptr->next; 730 } 731 // After all caches are resized, update __kmp_tp_capacity to the new size 732 *(volatile int *)&__kmp_tp_capacity = newCapacity; 733 } 734 735 /*! 736 @ingroup THREADPRIVATE 737 @param loc source location information 738 @param data pointer to data being privatized 739 @param ctor pointer to constructor function for data 740 @param cctor pointer to copy constructor function for data 741 @param dtor pointer to destructor function for data 742 @param vector_length length of the vector (bytes or elements?) 743 Register vector constructors and destructors for thread private data. 744 */ 745 void __kmpc_threadprivate_register_vec(ident_t *loc, void *data, 746 kmpc_ctor_vec ctor, kmpc_cctor_vec cctor, 747 kmpc_dtor_vec dtor, 748 size_t vector_length) { 749 struct shared_common *d_tn, **lnk_tn; 750 751 KC_TRACE(10, ("__kmpc_threadprivate_register_vec: called\n")); 752 753 #ifdef USE_CHECKS_COMMON 754 /* copy constructor must be zero for current code gen (Nov 2002 - jph) */ 755 KMP_ASSERT(cctor == 0); 756 #endif /* USE_CHECKS_COMMON */ 757 758 d_tn = __kmp_find_shared_task_common( 759 &__kmp_threadprivate_d_table, -1, 760 data); /* Only the global data table exists. */ 761 762 if (d_tn == 0) { 763 d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common)); 764 d_tn->gbl_addr = data; 765 766 d_tn->ct.ctorv = ctor; 767 d_tn->cct.cctorv = cctor; 768 d_tn->dt.dtorv = dtor; 769 d_tn->is_vec = TRUE; 770 d_tn->vec_len = (size_t)vector_length; 771 // d_tn->obj_init = 0; // AC: __kmp_allocate zeroes the memory 772 // d_tn->pod_init = 0; 773 lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(data)]); 774 775 d_tn->next = *lnk_tn; 776 *lnk_tn = d_tn; 777 } 778 } 779 780 void __kmp_cleanup_threadprivate_caches() { 781 kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list; 782 783 while (ptr) { 784 void **cache = ptr->addr; 785 __kmp_threadpriv_cache_list = ptr->next; 786 if (*ptr->compiler_cache) 787 *ptr->compiler_cache = NULL; 788 ptr->compiler_cache = NULL; 789 ptr->data = NULL; 790 ptr->addr = NULL; 791 ptr->next = NULL; 792 // Threadprivate data pointed at by cache entries are destroyed at end of 793 // __kmp_launch_thread with __kmp_common_destroy_gtid. 794 __kmp_free(cache); // implicitly frees ptr too 795 ptr = __kmp_threadpriv_cache_list; 796 } 797 } 798