1 /*
2 * kmp_threadprivate.cpp -- OpenMP threadprivate support library
3 */
4
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "kmp.h"
14 #include "kmp_i18n.h"
15 #include "kmp_itt.h"
16
17 #define USE_CHECKS_COMMON
18
19 #define KMP_INLINE_SUBR 1
20
21 void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr,
22 void *data_addr, size_t pc_size);
23 struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr,
24 void *data_addr,
25 size_t pc_size);
26
27 struct shared_table __kmp_threadprivate_d_table;
28
29 static
30 #ifdef KMP_INLINE_SUBR
31 __forceinline
32 #endif
33 struct private_common *
__kmp_threadprivate_find_task_common(struct common_table * tbl,int gtid,void * pc_addr)34 __kmp_threadprivate_find_task_common(struct common_table *tbl, int gtid,
35 void *pc_addr)
36
37 {
38 struct private_common *tn;
39
40 #ifdef KMP_TASK_COMMON_DEBUG
41 KC_TRACE(10, ("__kmp_threadprivate_find_task_common: thread#%d, called with "
42 "address %p\n",
43 gtid, pc_addr));
44 dump_list();
45 #endif
46
47 for (tn = tbl->data[KMP_HASH(pc_addr)]; tn; tn = tn->next) {
48 if (tn->gbl_addr == pc_addr) {
49 #ifdef KMP_TASK_COMMON_DEBUG
50 KC_TRACE(10, ("__kmp_threadprivate_find_task_common: thread#%d, found "
51 "node %p on list\n",
52 gtid, pc_addr));
53 #endif
54 return tn;
55 }
56 }
57 return 0;
58 }
59
60 static
61 #ifdef KMP_INLINE_SUBR
62 __forceinline
63 #endif
64 struct shared_common *
__kmp_find_shared_task_common(struct shared_table * tbl,int gtid,void * pc_addr)65 __kmp_find_shared_task_common(struct shared_table *tbl, int gtid,
66 void *pc_addr) {
67 struct shared_common *tn;
68
69 for (tn = tbl->data[KMP_HASH(pc_addr)]; tn; tn = tn->next) {
70 if (tn->gbl_addr == pc_addr) {
71 #ifdef KMP_TASK_COMMON_DEBUG
72 KC_TRACE(
73 10,
74 ("__kmp_find_shared_task_common: thread#%d, found node %p on list\n",
75 gtid, pc_addr));
76 #endif
77 return tn;
78 }
79 }
80 return 0;
81 }
82
83 // Create a template for the data initialized storage. Either the template is
84 // NULL indicating zero fill, or the template is a copy of the original data.
__kmp_init_common_data(void * pc_addr,size_t pc_size)85 static struct private_data *__kmp_init_common_data(void *pc_addr,
86 size_t pc_size) {
87 struct private_data *d;
88 size_t i;
89 char *p;
90
91 d = (struct private_data *)__kmp_allocate(sizeof(struct private_data));
92 /*
93 d->data = 0; // AC: commented out because __kmp_allocate zeroes the
94 memory
95 d->next = 0;
96 */
97 d->size = pc_size;
98 d->more = 1;
99
100 p = (char *)pc_addr;
101
102 for (i = pc_size; i > 0; --i) {
103 if (*p++ != '\0') {
104 d->data = __kmp_allocate(pc_size);
105 KMP_MEMCPY(d->data, pc_addr, pc_size);
106 break;
107 }
108 }
109
110 return d;
111 }
112
113 // Initialize the data area from the template.
__kmp_copy_common_data(void * pc_addr,struct private_data * d)114 static void __kmp_copy_common_data(void *pc_addr, struct private_data *d) {
115 char *addr = (char *)pc_addr;
116
117 for (size_t offset = 0; d != 0; d = d->next) {
118 for (int i = d->more; i > 0; --i) {
119 if (d->data == 0)
120 memset(&addr[offset], '\0', d->size);
121 else
122 KMP_MEMCPY(&addr[offset], d->data, d->size);
123 offset += d->size;
124 }
125 }
126 }
127
128 /* we are called from __kmp_serial_initialize() with __kmp_initz_lock held. */
__kmp_common_initialize(void)129 void __kmp_common_initialize(void) {
130 if (!TCR_4(__kmp_init_common)) {
131 int q;
132 #ifdef KMP_DEBUG
133 int gtid;
134 #endif
135
136 __kmp_threadpriv_cache_list = NULL;
137
138 #ifdef KMP_DEBUG
139 /* verify the uber masters were initialized */
140 for (gtid = 0; gtid < __kmp_threads_capacity; gtid++)
141 if (__kmp_root[gtid]) {
142 KMP_DEBUG_ASSERT(__kmp_root[gtid]->r.r_uber_thread);
143 for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q)
144 KMP_DEBUG_ASSERT(
145 !__kmp_root[gtid]->r.r_uber_thread->th.th_pri_common->data[q]);
146 /* __kmp_root[ gitd ]-> r.r_uber_thread ->
147 * th.th_pri_common -> data[ q ] = 0;*/
148 }
149 #endif /* KMP_DEBUG */
150
151 for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q)
152 __kmp_threadprivate_d_table.data[q] = 0;
153
154 TCW_4(__kmp_init_common, TRUE);
155 }
156 }
157
158 /* Call all destructors for threadprivate data belonging to all threads.
159 Currently unused! */
__kmp_common_destroy(void)160 void __kmp_common_destroy(void) {
161 if (TCR_4(__kmp_init_common)) {
162 int q;
163
164 TCW_4(__kmp_init_common, FALSE);
165
166 for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) {
167 int gtid;
168 struct private_common *tn;
169 struct shared_common *d_tn;
170
171 /* C++ destructors need to be called once per thread before exiting.
172 Don't call destructors for primary thread though unless we used copy
173 constructor */
174
175 for (d_tn = __kmp_threadprivate_d_table.data[q]; d_tn;
176 d_tn = d_tn->next) {
177 if (d_tn->is_vec) {
178 if (d_tn->dt.dtorv != 0) {
179 for (gtid = 0; gtid < __kmp_all_nth; ++gtid) {
180 if (__kmp_threads[gtid]) {
181 if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid))
182 : (!KMP_UBER_GTID(gtid))) {
183 tn = __kmp_threadprivate_find_task_common(
184 __kmp_threads[gtid]->th.th_pri_common, gtid,
185 d_tn->gbl_addr);
186 if (tn) {
187 (*d_tn->dt.dtorv)(tn->par_addr, d_tn->vec_len);
188 }
189 }
190 }
191 }
192 if (d_tn->obj_init != 0) {
193 (*d_tn->dt.dtorv)(d_tn->obj_init, d_tn->vec_len);
194 }
195 }
196 } else {
197 if (d_tn->dt.dtor != 0) {
198 for (gtid = 0; gtid < __kmp_all_nth; ++gtid) {
199 if (__kmp_threads[gtid]) {
200 if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid))
201 : (!KMP_UBER_GTID(gtid))) {
202 tn = __kmp_threadprivate_find_task_common(
203 __kmp_threads[gtid]->th.th_pri_common, gtid,
204 d_tn->gbl_addr);
205 if (tn) {
206 (*d_tn->dt.dtor)(tn->par_addr);
207 }
208 }
209 }
210 }
211 if (d_tn->obj_init != 0) {
212 (*d_tn->dt.dtor)(d_tn->obj_init);
213 }
214 }
215 }
216 }
217 __kmp_threadprivate_d_table.data[q] = 0;
218 }
219 }
220 }
221
222 /* Call all destructors for threadprivate data belonging to this thread */
__kmp_common_destroy_gtid(int gtid)223 void __kmp_common_destroy_gtid(int gtid) {
224 struct private_common *tn;
225 struct shared_common *d_tn;
226
227 if (!TCR_4(__kmp_init_gtid)) {
228 // This is possible when one of multiple roots initiates early library
229 // termination in a sequential region while other teams are active, and its
230 // child threads are about to end.
231 return;
232 }
233
234 KC_TRACE(10, ("__kmp_common_destroy_gtid: T#%d called\n", gtid));
235 if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid)) : (!KMP_UBER_GTID(gtid))) {
236
237 if (TCR_4(__kmp_init_common)) {
238
239 /* Cannot do this here since not all threads have destroyed their data */
240 /* TCW_4(__kmp_init_common, FALSE); */
241
242 for (tn = __kmp_threads[gtid]->th.th_pri_head; tn; tn = tn->link) {
243
244 d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, gtid,
245 tn->gbl_addr);
246 if (d_tn == NULL)
247 continue;
248 if (d_tn->is_vec) {
249 if (d_tn->dt.dtorv != 0) {
250 (void)(*d_tn->dt.dtorv)(tn->par_addr, d_tn->vec_len);
251 if (d_tn->obj_init != 0) {
252 (void)(*d_tn->dt.dtorv)(d_tn->obj_init, d_tn->vec_len);
253 }
254 }
255 } else {
256 if (d_tn->dt.dtor != 0) {
257 (void)(*d_tn->dt.dtor)(tn->par_addr);
258 if (d_tn->obj_init != 0) {
259 (void)(*d_tn->dt.dtor)(d_tn->obj_init);
260 }
261 }
262 }
263 }
264 KC_TRACE(30, ("__kmp_common_destroy_gtid: T#%d threadprivate destructors "
265 "complete\n",
266 gtid));
267 }
268 }
269 }
270
271 #ifdef KMP_TASK_COMMON_DEBUG
dump_list(void)272 static void dump_list(void) {
273 int p, q;
274
275 for (p = 0; p < __kmp_all_nth; ++p) {
276 if (!__kmp_threads[p])
277 continue;
278 for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) {
279 if (__kmp_threads[p]->th.th_pri_common->data[q]) {
280 struct private_common *tn;
281
282 KC_TRACE(10, ("\tdump_list: gtid:%d addresses\n", p));
283
284 for (tn = __kmp_threads[p]->th.th_pri_common->data[q]; tn;
285 tn = tn->next) {
286 KC_TRACE(10,
287 ("\tdump_list: THREADPRIVATE: Serial %p -> Parallel %p\n",
288 tn->gbl_addr, tn->par_addr));
289 }
290 }
291 }
292 }
293 }
294 #endif /* KMP_TASK_COMMON_DEBUG */
295
296 // NOTE: this routine is to be called only from the serial part of the program.
kmp_threadprivate_insert_private_data(int gtid,void * pc_addr,void * data_addr,size_t pc_size)297 void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr,
298 void *data_addr, size_t pc_size) {
299 struct shared_common **lnk_tn, *d_tn;
300 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
301 __kmp_threads[gtid]->th.th_root->r.r_active == 0);
302
303 d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, gtid,
304 pc_addr);
305
306 if (d_tn == 0) {
307 d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
308
309 d_tn->gbl_addr = pc_addr;
310 d_tn->pod_init = __kmp_init_common_data(data_addr, pc_size);
311 /*
312 d_tn->obj_init = 0; // AC: commented out because __kmp_allocate
313 zeroes the memory
314 d_tn->ct.ctor = 0;
315 d_tn->cct.cctor = 0;;
316 d_tn->dt.dtor = 0;
317 d_tn->is_vec = FALSE;
318 d_tn->vec_len = 0L;
319 */
320 d_tn->cmn_size = pc_size;
321
322 __kmp_acquire_lock(&__kmp_global_lock, gtid);
323
324 lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(pc_addr)]);
325
326 d_tn->next = *lnk_tn;
327 *lnk_tn = d_tn;
328
329 __kmp_release_lock(&__kmp_global_lock, gtid);
330 }
331 }
332
kmp_threadprivate_insert(int gtid,void * pc_addr,void * data_addr,size_t pc_size)333 struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr,
334 void *data_addr,
335 size_t pc_size) {
336 struct private_common *tn, **tt;
337 struct shared_common *d_tn;
338
339 /* +++++++++ START OF CRITICAL SECTION +++++++++ */
340 __kmp_acquire_lock(&__kmp_global_lock, gtid);
341
342 tn = (struct private_common *)__kmp_allocate(sizeof(struct private_common));
343
344 tn->gbl_addr = pc_addr;
345
346 d_tn = __kmp_find_shared_task_common(
347 &__kmp_threadprivate_d_table, gtid,
348 pc_addr); /* Only the MASTER data table exists. */
349
350 if (d_tn != 0) {
351 /* This threadprivate variable has already been seen. */
352
353 if (d_tn->pod_init == 0 && d_tn->obj_init == 0) {
354 d_tn->cmn_size = pc_size;
355
356 if (d_tn->is_vec) {
357 if (d_tn->ct.ctorv != 0) {
358 /* Construct from scratch so no prototype exists */
359 d_tn->obj_init = 0;
360 } else if (d_tn->cct.cctorv != 0) {
361 /* Now data initialize the prototype since it was previously
362 * registered */
363 d_tn->obj_init = (void *)__kmp_allocate(d_tn->cmn_size);
364 (void)(*d_tn->cct.cctorv)(d_tn->obj_init, pc_addr, d_tn->vec_len);
365 } else {
366 d_tn->pod_init = __kmp_init_common_data(data_addr, d_tn->cmn_size);
367 }
368 } else {
369 if (d_tn->ct.ctor != 0) {
370 /* Construct from scratch so no prototype exists */
371 d_tn->obj_init = 0;
372 } else if (d_tn->cct.cctor != 0) {
373 /* Now data initialize the prototype since it was previously
374 registered */
375 d_tn->obj_init = (void *)__kmp_allocate(d_tn->cmn_size);
376 (void)(*d_tn->cct.cctor)(d_tn->obj_init, pc_addr);
377 } else {
378 d_tn->pod_init = __kmp_init_common_data(data_addr, d_tn->cmn_size);
379 }
380 }
381 }
382 } else {
383 struct shared_common **lnk_tn;
384
385 d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
386 d_tn->gbl_addr = pc_addr;
387 d_tn->cmn_size = pc_size;
388 d_tn->pod_init = __kmp_init_common_data(data_addr, pc_size);
389 /*
390 d_tn->obj_init = 0; // AC: commented out because __kmp_allocate
391 zeroes the memory
392 d_tn->ct.ctor = 0;
393 d_tn->cct.cctor = 0;
394 d_tn->dt.dtor = 0;
395 d_tn->is_vec = FALSE;
396 d_tn->vec_len = 0L;
397 */
398 lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(pc_addr)]);
399
400 d_tn->next = *lnk_tn;
401 *lnk_tn = d_tn;
402 }
403
404 tn->cmn_size = d_tn->cmn_size;
405
406 if ((__kmp_foreign_tp) ? (KMP_INITIAL_GTID(gtid)) : (KMP_UBER_GTID(gtid))) {
407 tn->par_addr = (void *)pc_addr;
408 } else {
409 tn->par_addr = (void *)__kmp_allocate(tn->cmn_size);
410 }
411
412 __kmp_release_lock(&__kmp_global_lock, gtid);
413 /* +++++++++ END OF CRITICAL SECTION +++++++++ */
414
415 #ifdef USE_CHECKS_COMMON
416 if (pc_size > d_tn->cmn_size) {
417 KC_TRACE(
418 10, ("__kmp_threadprivate_insert: THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC
419 " ,%" KMP_UINTPTR_SPEC ")\n",
420 pc_addr, pc_size, d_tn->cmn_size));
421 KMP_FATAL(TPCommonBlocksInconsist);
422 }
423 #endif /* USE_CHECKS_COMMON */
424
425 tt = &(__kmp_threads[gtid]->th.th_pri_common->data[KMP_HASH(pc_addr)]);
426
427 #ifdef KMP_TASK_COMMON_DEBUG
428 if (*tt != 0) {
429 KC_TRACE(
430 10,
431 ("__kmp_threadprivate_insert: WARNING! thread#%d: collision on %p\n",
432 gtid, pc_addr));
433 }
434 #endif
435 tn->next = *tt;
436 *tt = tn;
437
438 #ifdef KMP_TASK_COMMON_DEBUG
439 KC_TRACE(10,
440 ("__kmp_threadprivate_insert: thread#%d, inserted node %p on list\n",
441 gtid, pc_addr));
442 dump_list();
443 #endif
444
445 /* Link the node into a simple list */
446
447 tn->link = __kmp_threads[gtid]->th.th_pri_head;
448 __kmp_threads[gtid]->th.th_pri_head = tn;
449
450 if ((__kmp_foreign_tp) ? (KMP_INITIAL_GTID(gtid)) : (KMP_UBER_GTID(gtid)))
451 return tn;
452
453 /* if C++ object with copy constructor, use it;
454 * else if C++ object with constructor, use it for the non-primary thread
455 copies only;
456 * else use pod_init and memcpy
457 *
458 * C++ constructors need to be called once for each non-primary thread on
459 * allocate
460 * C++ copy constructors need to be called once for each thread on allocate */
461
462 /* C++ object with constructors/destructors; don't call constructors for
463 primary thread though */
464 if (d_tn->is_vec) {
465 if (d_tn->ct.ctorv != 0) {
466 (void)(*d_tn->ct.ctorv)(tn->par_addr, d_tn->vec_len);
467 } else if (d_tn->cct.cctorv != 0) {
468 (void)(*d_tn->cct.cctorv)(tn->par_addr, d_tn->obj_init, d_tn->vec_len);
469 } else if (tn->par_addr != tn->gbl_addr) {
470 __kmp_copy_common_data(tn->par_addr, d_tn->pod_init);
471 }
472 } else {
473 if (d_tn->ct.ctor != 0) {
474 (void)(*d_tn->ct.ctor)(tn->par_addr);
475 } else if (d_tn->cct.cctor != 0) {
476 (void)(*d_tn->cct.cctor)(tn->par_addr, d_tn->obj_init);
477 } else if (tn->par_addr != tn->gbl_addr) {
478 __kmp_copy_common_data(tn->par_addr, d_tn->pod_init);
479 }
480 }
481 /* !BUILD_OPENMP_C
482 if (tn->par_addr != tn->gbl_addr)
483 __kmp_copy_common_data( tn->par_addr, d_tn->pod_init ); */
484
485 return tn;
486 }
487
488 /* ------------------------------------------------------------------------ */
489 /* We are currently parallel, and we know the thread id. */
490 /* ------------------------------------------------------------------------ */
491
492 /*!
493 @ingroup THREADPRIVATE
494
495 @param loc source location information
496 @param data pointer to data being privatized
497 @param ctor pointer to constructor function for data
498 @param cctor pointer to copy constructor function for data
499 @param dtor pointer to destructor function for data
500
501 Register constructors and destructors for thread private data.
502 This function is called when executing in parallel, when we know the thread id.
503 */
__kmpc_threadprivate_register(ident_t * loc,void * data,kmpc_ctor ctor,kmpc_cctor cctor,kmpc_dtor dtor)504 void __kmpc_threadprivate_register(ident_t *loc, void *data, kmpc_ctor ctor,
505 kmpc_cctor cctor, kmpc_dtor dtor) {
506 struct shared_common *d_tn, **lnk_tn;
507
508 KC_TRACE(10, ("__kmpc_threadprivate_register: called\n"));
509
510 #ifdef USE_CHECKS_COMMON
511 /* copy constructor must be zero for current code gen (Nov 2002 - jph) */
512 KMP_ASSERT(cctor == 0);
513 #endif /* USE_CHECKS_COMMON */
514
515 /* Only the global data table exists. */
516 d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, -1, data);
517
518 if (d_tn == 0) {
519 d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
520 d_tn->gbl_addr = data;
521
522 d_tn->ct.ctor = ctor;
523 d_tn->cct.cctor = cctor;
524 d_tn->dt.dtor = dtor;
525 /*
526 d_tn->is_vec = FALSE; // AC: commented out because __kmp_allocate
527 zeroes the memory
528 d_tn->vec_len = 0L;
529 d_tn->obj_init = 0;
530 d_tn->pod_init = 0;
531 */
532 lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(data)]);
533
534 d_tn->next = *lnk_tn;
535 *lnk_tn = d_tn;
536 }
537 }
538
__kmpc_threadprivate(ident_t * loc,kmp_int32 global_tid,void * data,size_t size)539 void *__kmpc_threadprivate(ident_t *loc, kmp_int32 global_tid, void *data,
540 size_t size) {
541 void *ret;
542 struct private_common *tn;
543
544 KC_TRACE(10, ("__kmpc_threadprivate: T#%d called\n", global_tid));
545
546 #ifdef USE_CHECKS_COMMON
547 if (!__kmp_init_serial)
548 KMP_FATAL(RTLNotInitialized);
549 #endif /* USE_CHECKS_COMMON */
550
551 if (!__kmp_threads[global_tid]->th.th_root->r.r_active && !__kmp_foreign_tp) {
552 /* The parallel address will NEVER overlap with the data_address */
553 /* dkp: 3rd arg to kmp_threadprivate_insert_private_data() is the
554 * data_address; use data_address = data */
555
556 KC_TRACE(20, ("__kmpc_threadprivate: T#%d inserting private data\n",
557 global_tid));
558 kmp_threadprivate_insert_private_data(global_tid, data, data, size);
559
560 ret = data;
561 } else {
562 KC_TRACE(
563 50,
564 ("__kmpc_threadprivate: T#%d try to find private data at address %p\n",
565 global_tid, data));
566 tn = __kmp_threadprivate_find_task_common(
567 __kmp_threads[global_tid]->th.th_pri_common, global_tid, data);
568
569 if (tn) {
570 KC_TRACE(20, ("__kmpc_threadprivate: T#%d found data\n", global_tid));
571 #ifdef USE_CHECKS_COMMON
572 if ((size_t)size > tn->cmn_size) {
573 KC_TRACE(10, ("THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC
574 " ,%" KMP_UINTPTR_SPEC ")\n",
575 data, size, tn->cmn_size));
576 KMP_FATAL(TPCommonBlocksInconsist);
577 }
578 #endif /* USE_CHECKS_COMMON */
579 } else {
580 /* The parallel address will NEVER overlap with the data_address */
581 /* dkp: 3rd arg to kmp_threadprivate_insert() is the data_address; use
582 * data_address = data */
583 KC_TRACE(20, ("__kmpc_threadprivate: T#%d inserting data\n", global_tid));
584 tn = kmp_threadprivate_insert(global_tid, data, data, size);
585 }
586
587 ret = tn->par_addr;
588 }
589 KC_TRACE(10, ("__kmpc_threadprivate: T#%d exiting; return value = %p\n",
590 global_tid, ret));
591
592 return ret;
593 }
594
__kmp_find_cache(void * data)595 static kmp_cached_addr_t *__kmp_find_cache(void *data) {
596 kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list;
597 while (ptr && ptr->data != data)
598 ptr = ptr->next;
599 return ptr;
600 }
601
602 /*!
603 @ingroup THREADPRIVATE
604 @param loc source location information
605 @param global_tid global thread number
606 @param data pointer to data to privatize
607 @param size size of data to privatize
608 @param cache pointer to cache
609 @return pointer to private storage
610
611 Allocate private storage for threadprivate data.
612 */
613 void *
__kmpc_threadprivate_cached(ident_t * loc,kmp_int32 global_tid,void * data,size_t size,void *** cache)614 __kmpc_threadprivate_cached(ident_t *loc,
615 kmp_int32 global_tid, // gtid.
616 void *data, // Pointer to original global variable.
617 size_t size, // Size of original global variable.
618 void ***cache) {
619 KC_TRACE(10, ("__kmpc_threadprivate_cached: T#%d called with cache: %p, "
620 "address: %p, size: %" KMP_SIZE_T_SPEC "\n",
621 global_tid, *cache, data, size));
622
623 if (TCR_PTR(*cache) == 0) {
624 __kmp_acquire_lock(&__kmp_global_lock, global_tid);
625
626 if (TCR_PTR(*cache) == 0) {
627 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
628 // Compiler often passes in NULL cache, even if it's already been created
629 void **my_cache;
630 kmp_cached_addr_t *tp_cache_addr;
631 // Look for an existing cache
632 tp_cache_addr = __kmp_find_cache(data);
633 if (!tp_cache_addr) { // Cache was never created; do it now
634 __kmp_tp_cached = 1;
635 KMP_ITT_IGNORE(my_cache = (void **)__kmp_allocate(
636 sizeof(void *) * __kmp_tp_capacity +
637 sizeof(kmp_cached_addr_t)););
638 // No need to zero the allocated memory; __kmp_allocate does that.
639 KC_TRACE(50, ("__kmpc_threadprivate_cached: T#%d allocated cache at "
640 "address %p\n",
641 global_tid, my_cache));
642 /* TODO: free all this memory in __kmp_common_destroy using
643 * __kmp_threadpriv_cache_list */
644 /* Add address of mycache to linked list for cleanup later */
645 tp_cache_addr = (kmp_cached_addr_t *)&my_cache[__kmp_tp_capacity];
646 tp_cache_addr->addr = my_cache;
647 tp_cache_addr->data = data;
648 tp_cache_addr->compiler_cache = cache;
649 tp_cache_addr->next = __kmp_threadpriv_cache_list;
650 __kmp_threadpriv_cache_list = tp_cache_addr;
651 } else { // A cache was already created; use it
652 my_cache = tp_cache_addr->addr;
653 tp_cache_addr->compiler_cache = cache;
654 }
655 KMP_MB();
656
657 TCW_PTR(*cache, my_cache);
658 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
659
660 KMP_MB();
661 }
662 __kmp_release_lock(&__kmp_global_lock, global_tid);
663 }
664
665 void *ret;
666 if ((ret = TCR_PTR((*cache)[global_tid])) == 0) {
667 ret = __kmpc_threadprivate(loc, global_tid, data, (size_t)size);
668
669 TCW_PTR((*cache)[global_tid], ret);
670 }
671 KC_TRACE(10,
672 ("__kmpc_threadprivate_cached: T#%d exiting; return value = %p\n",
673 global_tid, ret));
674 return ret;
675 }
676
677 // This function should only be called when both __kmp_tp_cached_lock and
678 // kmp_forkjoin_lock are held.
__kmp_threadprivate_resize_cache(int newCapacity)679 void __kmp_threadprivate_resize_cache(int newCapacity) {
680 KC_TRACE(10, ("__kmp_threadprivate_resize_cache: called with size: %d\n",
681 newCapacity));
682
683 kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list;
684
685 while (ptr) {
686 if (ptr->data) { // this location has an active cache; resize it
687 void **my_cache;
688 KMP_ITT_IGNORE(my_cache =
689 (void **)__kmp_allocate(sizeof(void *) * newCapacity +
690 sizeof(kmp_cached_addr_t)););
691 // No need to zero the allocated memory; __kmp_allocate does that.
692 KC_TRACE(50, ("__kmp_threadprivate_resize_cache: allocated cache at %p\n",
693 my_cache));
694 // Now copy old cache into new cache
695 void **old_cache = ptr->addr;
696 for (int i = 0; i < __kmp_tp_capacity; ++i) {
697 my_cache[i] = old_cache[i];
698 }
699
700 // Add address of new my_cache to linked list for cleanup later
701 kmp_cached_addr_t *tp_cache_addr;
702 tp_cache_addr = (kmp_cached_addr_t *)&my_cache[newCapacity];
703 tp_cache_addr->addr = my_cache;
704 tp_cache_addr->data = ptr->data;
705 tp_cache_addr->compiler_cache = ptr->compiler_cache;
706 tp_cache_addr->next = __kmp_threadpriv_cache_list;
707 __kmp_threadpriv_cache_list = tp_cache_addr;
708
709 // Copy new cache to compiler's location: We can copy directly
710 // to (*compiler_cache) if compiler guarantees it will keep
711 // using the same location for the cache. This is not yet true
712 // for some compilers, in which case we have to check if
713 // compiler_cache is still pointing at old cache, and if so, we
714 // can point it at the new cache with an atomic compare&swap
715 // operation. (Old method will always work, but we should shift
716 // to new method (commented line below) when Intel and Clang
717 // compilers use new method.)
718 (void)KMP_COMPARE_AND_STORE_PTR(tp_cache_addr->compiler_cache, old_cache,
719 my_cache);
720 // TCW_PTR(*(tp_cache_addr->compiler_cache), my_cache);
721
722 // If the store doesn't happen here, the compiler's old behavior will
723 // inevitably call __kmpc_threadprivate_cache with a new location for the
724 // cache, and that function will store the resized cache there at that
725 // point.
726
727 // Nullify old cache's data pointer so we skip it next time
728 ptr->data = NULL;
729 }
730 ptr = ptr->next;
731 }
732 // After all caches are resized, update __kmp_tp_capacity to the new size
733 *(volatile int *)&__kmp_tp_capacity = newCapacity;
734 }
735
736 /*!
737 @ingroup THREADPRIVATE
738 @param loc source location information
739 @param data pointer to data being privatized
740 @param ctor pointer to constructor function for data
741 @param cctor pointer to copy constructor function for data
742 @param dtor pointer to destructor function for data
743 @param vector_length length of the vector (bytes or elements?)
744 Register vector constructors and destructors for thread private data.
745 */
__kmpc_threadprivate_register_vec(ident_t * loc,void * data,kmpc_ctor_vec ctor,kmpc_cctor_vec cctor,kmpc_dtor_vec dtor,size_t vector_length)746 void __kmpc_threadprivate_register_vec(ident_t *loc, void *data,
747 kmpc_ctor_vec ctor, kmpc_cctor_vec cctor,
748 kmpc_dtor_vec dtor,
749 size_t vector_length) {
750 struct shared_common *d_tn, **lnk_tn;
751
752 KC_TRACE(10, ("__kmpc_threadprivate_register_vec: called\n"));
753
754 #ifdef USE_CHECKS_COMMON
755 /* copy constructor must be zero for current code gen (Nov 2002 - jph) */
756 KMP_ASSERT(cctor == 0);
757 #endif /* USE_CHECKS_COMMON */
758
759 d_tn = __kmp_find_shared_task_common(
760 &__kmp_threadprivate_d_table, -1,
761 data); /* Only the global data table exists. */
762
763 if (d_tn == 0) {
764 d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
765 d_tn->gbl_addr = data;
766
767 d_tn->ct.ctorv = ctor;
768 d_tn->cct.cctorv = cctor;
769 d_tn->dt.dtorv = dtor;
770 d_tn->is_vec = TRUE;
771 d_tn->vec_len = (size_t)vector_length;
772 // d_tn->obj_init = 0; // AC: __kmp_allocate zeroes the memory
773 // d_tn->pod_init = 0;
774 lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(data)]);
775
776 d_tn->next = *lnk_tn;
777 *lnk_tn = d_tn;
778 }
779 }
780
__kmp_cleanup_threadprivate_caches()781 void __kmp_cleanup_threadprivate_caches() {
782 kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list;
783
784 while (ptr) {
785 void **cache = ptr->addr;
786 __kmp_threadpriv_cache_list = ptr->next;
787 if (*ptr->compiler_cache)
788 *ptr->compiler_cache = NULL;
789 ptr->compiler_cache = NULL;
790 ptr->data = NULL;
791 ptr->addr = NULL;
792 ptr->next = NULL;
793 // Threadprivate data pointed at by cache entries are destroyed at end of
794 // __kmp_launch_thread with __kmp_common_destroy_gtid.
795 __kmp_free(cache); // implicitly frees ptr too
796 ptr = __kmp_threadpriv_cache_list;
797 }
798 }
799