xref: /freebsd/contrib/llvm-project/openmp/runtime/src/kmp_threadprivate.cpp (revision a7623790fb345e6dc986dfd31df0ace115e6f2e4)
1 /*
2  * kmp_threadprivate.cpp -- OpenMP threadprivate support library
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "kmp.h"
14 #include "kmp_i18n.h"
15 #include "kmp_itt.h"
16 
17 #define USE_CHECKS_COMMON
18 
19 #define KMP_INLINE_SUBR 1
20 
21 void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr,
22                                            void *data_addr, size_t pc_size);
23 struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr,
24                                                 void *data_addr,
25                                                 size_t pc_size);
26 
27 struct shared_table __kmp_threadprivate_d_table;
28 
29 static
30 #ifdef KMP_INLINE_SUBR
31     __forceinline
32 #endif
33     struct private_common *
34     __kmp_threadprivate_find_task_common(struct common_table *tbl, int gtid,
35                                          void *pc_addr)
36 
37 {
38   struct private_common *tn;
39 
40 #ifdef KMP_TASK_COMMON_DEBUG
41   KC_TRACE(10, ("__kmp_threadprivate_find_task_common: thread#%d, called with "
42                 "address %p\n",
43                 gtid, pc_addr));
44   dump_list();
45 #endif
46 
47   for (tn = tbl->data[KMP_HASH(pc_addr)]; tn; tn = tn->next) {
48     if (tn->gbl_addr == pc_addr) {
49 #ifdef KMP_TASK_COMMON_DEBUG
50       KC_TRACE(10, ("__kmp_threadprivate_find_task_common: thread#%d, found "
51                     "node %p on list\n",
52                     gtid, pc_addr));
53 #endif
54       return tn;
55     }
56   }
57   return 0;
58 }
59 
60 static
61 #ifdef KMP_INLINE_SUBR
62     __forceinline
63 #endif
64     struct shared_common *
65     __kmp_find_shared_task_common(struct shared_table *tbl, int gtid,
66                                   void *pc_addr) {
67   struct shared_common *tn;
68 
69   for (tn = tbl->data[KMP_HASH(pc_addr)]; tn; tn = tn->next) {
70     if (tn->gbl_addr == pc_addr) {
71 #ifdef KMP_TASK_COMMON_DEBUG
72       KC_TRACE(
73           10,
74           ("__kmp_find_shared_task_common: thread#%d, found node %p on list\n",
75            gtid, pc_addr));
76 #endif
77       return tn;
78     }
79   }
80   return 0;
81 }
82 
83 // Create a template for the data initialized storage. Either the template is
84 // NULL indicating zero fill, or the template is a copy of the original data.
85 static struct private_data *__kmp_init_common_data(void *pc_addr,
86                                                    size_t pc_size) {
87   struct private_data *d;
88   size_t i;
89   char *p;
90 
91   d = (struct private_data *)__kmp_allocate(sizeof(struct private_data));
92   /*
93       d->data = 0;  // AC: commented out because __kmp_allocate zeroes the
94      memory
95       d->next = 0;
96   */
97   d->size = pc_size;
98   d->more = 1;
99 
100   p = (char *)pc_addr;
101 
102   for (i = pc_size; i > 0; --i) {
103     if (*p++ != '\0') {
104       d->data = __kmp_allocate(pc_size);
105       KMP_MEMCPY(d->data, pc_addr, pc_size);
106       break;
107     }
108   }
109 
110   return d;
111 }
112 
113 // Initialize the data area from the template.
114 static void __kmp_copy_common_data(void *pc_addr, struct private_data *d) {
115   char *addr = (char *)pc_addr;
116   int i, offset;
117 
118   for (offset = 0; d != 0; d = d->next) {
119     for (i = d->more; i > 0; --i) {
120       if (d->data == 0)
121         memset(&addr[offset], '\0', d->size);
122       else
123         KMP_MEMCPY(&addr[offset], d->data, d->size);
124       offset += d->size;
125     }
126   }
127 }
128 
129 /* we are called from __kmp_serial_initialize() with __kmp_initz_lock held. */
130 void __kmp_common_initialize(void) {
131   if (!TCR_4(__kmp_init_common)) {
132     int q;
133 #ifdef KMP_DEBUG
134     int gtid;
135 #endif
136 
137     __kmp_threadpriv_cache_list = NULL;
138 
139 #ifdef KMP_DEBUG
140     /* verify the uber masters were initialized */
141     for (gtid = 0; gtid < __kmp_threads_capacity; gtid++)
142       if (__kmp_root[gtid]) {
143         KMP_DEBUG_ASSERT(__kmp_root[gtid]->r.r_uber_thread);
144         for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q)
145           KMP_DEBUG_ASSERT(
146               !__kmp_root[gtid]->r.r_uber_thread->th.th_pri_common->data[q]);
147         /*                    __kmp_root[ gitd ]-> r.r_uber_thread ->
148          * th.th_pri_common -> data[ q ] = 0;*/
149       }
150 #endif /* KMP_DEBUG */
151 
152     for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q)
153       __kmp_threadprivate_d_table.data[q] = 0;
154 
155     TCW_4(__kmp_init_common, TRUE);
156   }
157 }
158 
159 /* Call all destructors for threadprivate data belonging to all threads.
160    Currently unused! */
161 void __kmp_common_destroy(void) {
162   if (TCR_4(__kmp_init_common)) {
163     int q;
164 
165     TCW_4(__kmp_init_common, FALSE);
166 
167     for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) {
168       int gtid;
169       struct private_common *tn;
170       struct shared_common *d_tn;
171 
172       /* C++ destructors need to be called once per thread before exiting.
173          Don't call destructors for master thread though unless we used copy
174          constructor */
175 
176       for (d_tn = __kmp_threadprivate_d_table.data[q]; d_tn;
177            d_tn = d_tn->next) {
178         if (d_tn->is_vec) {
179           if (d_tn->dt.dtorv != 0) {
180             for (gtid = 0; gtid < __kmp_all_nth; ++gtid) {
181               if (__kmp_threads[gtid]) {
182                 if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid))
183                                        : (!KMP_UBER_GTID(gtid))) {
184                   tn = __kmp_threadprivate_find_task_common(
185                       __kmp_threads[gtid]->th.th_pri_common, gtid,
186                       d_tn->gbl_addr);
187                   if (tn) {
188                     (*d_tn->dt.dtorv)(tn->par_addr, d_tn->vec_len);
189                   }
190                 }
191               }
192             }
193             if (d_tn->obj_init != 0) {
194               (*d_tn->dt.dtorv)(d_tn->obj_init, d_tn->vec_len);
195             }
196           }
197         } else {
198           if (d_tn->dt.dtor != 0) {
199             for (gtid = 0; gtid < __kmp_all_nth; ++gtid) {
200               if (__kmp_threads[gtid]) {
201                 if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid))
202                                        : (!KMP_UBER_GTID(gtid))) {
203                   tn = __kmp_threadprivate_find_task_common(
204                       __kmp_threads[gtid]->th.th_pri_common, gtid,
205                       d_tn->gbl_addr);
206                   if (tn) {
207                     (*d_tn->dt.dtor)(tn->par_addr);
208                   }
209                 }
210               }
211             }
212             if (d_tn->obj_init != 0) {
213               (*d_tn->dt.dtor)(d_tn->obj_init);
214             }
215           }
216         }
217       }
218       __kmp_threadprivate_d_table.data[q] = 0;
219     }
220   }
221 }
222 
223 /* Call all destructors for threadprivate data belonging to this thread */
224 void __kmp_common_destroy_gtid(int gtid) {
225   struct private_common *tn;
226   struct shared_common *d_tn;
227 
228   if (!TCR_4(__kmp_init_gtid)) {
229     // This is possible when one of multiple roots initiates early library
230     // termination in a sequential region while other teams are active, and its
231     // child threads are about to end.
232     return;
233   }
234 
235   KC_TRACE(10, ("__kmp_common_destroy_gtid: T#%d called\n", gtid));
236   if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid)) : (!KMP_UBER_GTID(gtid))) {
237 
238     if (TCR_4(__kmp_init_common)) {
239 
240       /* Cannot do this here since not all threads have destroyed their data */
241       /* TCW_4(__kmp_init_common, FALSE); */
242 
243       for (tn = __kmp_threads[gtid]->th.th_pri_head; tn; tn = tn->link) {
244 
245         d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, gtid,
246                                              tn->gbl_addr);
247 
248         KMP_DEBUG_ASSERT(d_tn);
249 
250         if (d_tn->is_vec) {
251           if (d_tn->dt.dtorv != 0) {
252             (void)(*d_tn->dt.dtorv)(tn->par_addr, d_tn->vec_len);
253           }
254           if (d_tn->obj_init != 0) {
255             (void)(*d_tn->dt.dtorv)(d_tn->obj_init, d_tn->vec_len);
256           }
257         } else {
258           if (d_tn->dt.dtor != 0) {
259             (void)(*d_tn->dt.dtor)(tn->par_addr);
260           }
261           if (d_tn->obj_init != 0) {
262             (void)(*d_tn->dt.dtor)(d_tn->obj_init);
263           }
264         }
265       }
266       KC_TRACE(30, ("__kmp_common_destroy_gtid: T#%d threadprivate destructors "
267                     "complete\n",
268                     gtid));
269     }
270   }
271 }
272 
273 #ifdef KMP_TASK_COMMON_DEBUG
274 static void dump_list(void) {
275   int p, q;
276 
277   for (p = 0; p < __kmp_all_nth; ++p) {
278     if (!__kmp_threads[p])
279       continue;
280     for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) {
281       if (__kmp_threads[p]->th.th_pri_common->data[q]) {
282         struct private_common *tn;
283 
284         KC_TRACE(10, ("\tdump_list: gtid:%d addresses\n", p));
285 
286         for (tn = __kmp_threads[p]->th.th_pri_common->data[q]; tn;
287              tn = tn->next) {
288           KC_TRACE(10,
289                    ("\tdump_list: THREADPRIVATE: Serial %p -> Parallel %p\n",
290                     tn->gbl_addr, tn->par_addr));
291         }
292       }
293     }
294   }
295 }
296 #endif /* KMP_TASK_COMMON_DEBUG */
297 
298 // NOTE: this routine is to be called only from the serial part of the program.
299 void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr,
300                                            void *data_addr, size_t pc_size) {
301   struct shared_common **lnk_tn, *d_tn;
302   KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
303                    __kmp_threads[gtid]->th.th_root->r.r_active == 0);
304 
305   d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, gtid,
306                                        pc_addr);
307 
308   if (d_tn == 0) {
309     d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
310 
311     d_tn->gbl_addr = pc_addr;
312     d_tn->pod_init = __kmp_init_common_data(data_addr, pc_size);
313     /*
314             d_tn->obj_init = 0;  // AC: commented out because __kmp_allocate
315        zeroes the memory
316             d_tn->ct.ctor = 0;
317             d_tn->cct.cctor = 0;;
318             d_tn->dt.dtor = 0;
319             d_tn->is_vec = FALSE;
320             d_tn->vec_len = 0L;
321     */
322     d_tn->cmn_size = pc_size;
323 
324     __kmp_acquire_lock(&__kmp_global_lock, gtid);
325 
326     lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(pc_addr)]);
327 
328     d_tn->next = *lnk_tn;
329     *lnk_tn = d_tn;
330 
331     __kmp_release_lock(&__kmp_global_lock, gtid);
332   }
333 }
334 
335 struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr,
336                                                 void *data_addr,
337                                                 size_t pc_size) {
338   struct private_common *tn, **tt;
339   struct shared_common *d_tn;
340 
341   /* +++++++++ START OF CRITICAL SECTION +++++++++ */
342   __kmp_acquire_lock(&__kmp_global_lock, gtid);
343 
344   tn = (struct private_common *)__kmp_allocate(sizeof(struct private_common));
345 
346   tn->gbl_addr = pc_addr;
347 
348   d_tn = __kmp_find_shared_task_common(
349       &__kmp_threadprivate_d_table, gtid,
350       pc_addr); /* Only the MASTER data table exists. */
351 
352   if (d_tn != 0) {
353     /* This threadprivate variable has already been seen. */
354 
355     if (d_tn->pod_init == 0 && d_tn->obj_init == 0) {
356       d_tn->cmn_size = pc_size;
357 
358       if (d_tn->is_vec) {
359         if (d_tn->ct.ctorv != 0) {
360           /* Construct from scratch so no prototype exists */
361           d_tn->obj_init = 0;
362         } else if (d_tn->cct.cctorv != 0) {
363           /* Now data initialize the prototype since it was previously
364            * registered */
365           d_tn->obj_init = (void *)__kmp_allocate(d_tn->cmn_size);
366           (void)(*d_tn->cct.cctorv)(d_tn->obj_init, pc_addr, d_tn->vec_len);
367         } else {
368           d_tn->pod_init = __kmp_init_common_data(data_addr, d_tn->cmn_size);
369         }
370       } else {
371         if (d_tn->ct.ctor != 0) {
372           /* Construct from scratch so no prototype exists */
373           d_tn->obj_init = 0;
374         } else if (d_tn->cct.cctor != 0) {
375           /* Now data initialize the prototype since it was previously
376              registered */
377           d_tn->obj_init = (void *)__kmp_allocate(d_tn->cmn_size);
378           (void)(*d_tn->cct.cctor)(d_tn->obj_init, pc_addr);
379         } else {
380           d_tn->pod_init = __kmp_init_common_data(data_addr, d_tn->cmn_size);
381         }
382       }
383     }
384   } else {
385     struct shared_common **lnk_tn;
386 
387     d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
388     d_tn->gbl_addr = pc_addr;
389     d_tn->cmn_size = pc_size;
390     d_tn->pod_init = __kmp_init_common_data(data_addr, pc_size);
391     /*
392             d_tn->obj_init = 0;  // AC: commented out because __kmp_allocate
393        zeroes the memory
394             d_tn->ct.ctor = 0;
395             d_tn->cct.cctor = 0;
396             d_tn->dt.dtor = 0;
397             d_tn->is_vec = FALSE;
398             d_tn->vec_len = 0L;
399     */
400     lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(pc_addr)]);
401 
402     d_tn->next = *lnk_tn;
403     *lnk_tn = d_tn;
404   }
405 
406   tn->cmn_size = d_tn->cmn_size;
407 
408   if ((__kmp_foreign_tp) ? (KMP_INITIAL_GTID(gtid)) : (KMP_UBER_GTID(gtid))) {
409     tn->par_addr = (void *)pc_addr;
410   } else {
411     tn->par_addr = (void *)__kmp_allocate(tn->cmn_size);
412   }
413 
414   __kmp_release_lock(&__kmp_global_lock, gtid);
415 /* +++++++++ END OF CRITICAL SECTION +++++++++ */
416 
417 #ifdef USE_CHECKS_COMMON
418   if (pc_size > d_tn->cmn_size) {
419     KC_TRACE(
420         10, ("__kmp_threadprivate_insert: THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC
421              " ,%" KMP_UINTPTR_SPEC ")\n",
422              pc_addr, pc_size, d_tn->cmn_size));
423     KMP_FATAL(TPCommonBlocksInconsist);
424   }
425 #endif /* USE_CHECKS_COMMON */
426 
427   tt = &(__kmp_threads[gtid]->th.th_pri_common->data[KMP_HASH(pc_addr)]);
428 
429 #ifdef KMP_TASK_COMMON_DEBUG
430   if (*tt != 0) {
431     KC_TRACE(
432         10,
433         ("__kmp_threadprivate_insert: WARNING! thread#%d: collision on %p\n",
434          gtid, pc_addr));
435   }
436 #endif
437   tn->next = *tt;
438   *tt = tn;
439 
440 #ifdef KMP_TASK_COMMON_DEBUG
441   KC_TRACE(10,
442            ("__kmp_threadprivate_insert: thread#%d, inserted node %p on list\n",
443             gtid, pc_addr));
444   dump_list();
445 #endif
446 
447   /* Link the node into a simple list */
448 
449   tn->link = __kmp_threads[gtid]->th.th_pri_head;
450   __kmp_threads[gtid]->th.th_pri_head = tn;
451 
452   if ((__kmp_foreign_tp) ? (KMP_INITIAL_GTID(gtid)) : (KMP_UBER_GTID(gtid)))
453     return tn;
454 
455   /* if C++ object with copy constructor, use it;
456    * else if C++ object with constructor, use it for the non-master copies only;
457    * else use pod_init and memcpy
458    *
459    * C++ constructors need to be called once for each non-master thread on
460    * allocate
461    * C++ copy constructors need to be called once for each thread on allocate */
462 
463   /* C++ object with constructors/destructors; don't call constructors for
464      master thread though */
465   if (d_tn->is_vec) {
466     if (d_tn->ct.ctorv != 0) {
467       (void)(*d_tn->ct.ctorv)(tn->par_addr, d_tn->vec_len);
468     } else if (d_tn->cct.cctorv != 0) {
469       (void)(*d_tn->cct.cctorv)(tn->par_addr, d_tn->obj_init, d_tn->vec_len);
470     } else if (tn->par_addr != tn->gbl_addr) {
471       __kmp_copy_common_data(tn->par_addr, d_tn->pod_init);
472     }
473   } else {
474     if (d_tn->ct.ctor != 0) {
475       (void)(*d_tn->ct.ctor)(tn->par_addr);
476     } else if (d_tn->cct.cctor != 0) {
477       (void)(*d_tn->cct.cctor)(tn->par_addr, d_tn->obj_init);
478     } else if (tn->par_addr != tn->gbl_addr) {
479       __kmp_copy_common_data(tn->par_addr, d_tn->pod_init);
480     }
481   }
482   /* !BUILD_OPENMP_C
483       if (tn->par_addr != tn->gbl_addr)
484           __kmp_copy_common_data( tn->par_addr, d_tn->pod_init ); */
485 
486   return tn;
487 }
488 
489 /* ------------------------------------------------------------------------ */
490 /* We are currently parallel, and we know the thread id.                    */
491 /* ------------------------------------------------------------------------ */
492 
493 /*!
494  @ingroup THREADPRIVATE
495 
496  @param loc source location information
497  @param data  pointer to data being privatized
498  @param ctor  pointer to constructor function for data
499  @param cctor  pointer to copy constructor function for data
500  @param dtor  pointer to destructor function for data
501 
502  Register constructors and destructors for thread private data.
503  This function is called when executing in parallel, when we know the thread id.
504 */
505 void __kmpc_threadprivate_register(ident_t *loc, void *data, kmpc_ctor ctor,
506                                    kmpc_cctor cctor, kmpc_dtor dtor) {
507   struct shared_common *d_tn, **lnk_tn;
508 
509   KC_TRACE(10, ("__kmpc_threadprivate_register: called\n"));
510 
511 #ifdef USE_CHECKS_COMMON
512   /* copy constructor must be zero for current code gen (Nov 2002 - jph) */
513   KMP_ASSERT(cctor == 0);
514 #endif /* USE_CHECKS_COMMON */
515 
516   /* Only the global data table exists. */
517   d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, -1, data);
518 
519   if (d_tn == 0) {
520     d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
521     d_tn->gbl_addr = data;
522 
523     d_tn->ct.ctor = ctor;
524     d_tn->cct.cctor = cctor;
525     d_tn->dt.dtor = dtor;
526     /*
527             d_tn->is_vec = FALSE;  // AC: commented out because __kmp_allocate
528        zeroes the memory
529             d_tn->vec_len = 0L;
530             d_tn->obj_init = 0;
531             d_tn->pod_init = 0;
532     */
533     lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(data)]);
534 
535     d_tn->next = *lnk_tn;
536     *lnk_tn = d_tn;
537   }
538 }
539 
540 void *__kmpc_threadprivate(ident_t *loc, kmp_int32 global_tid, void *data,
541                            size_t size) {
542   void *ret;
543   struct private_common *tn;
544 
545   KC_TRACE(10, ("__kmpc_threadprivate: T#%d called\n", global_tid));
546 
547 #ifdef USE_CHECKS_COMMON
548   if (!__kmp_init_serial)
549     KMP_FATAL(RTLNotInitialized);
550 #endif /* USE_CHECKS_COMMON */
551 
552   if (!__kmp_threads[global_tid]->th.th_root->r.r_active && !__kmp_foreign_tp) {
553     /* The parallel address will NEVER overlap with the data_address */
554     /* dkp: 3rd arg to kmp_threadprivate_insert_private_data() is the
555      * data_address; use data_address = data */
556 
557     KC_TRACE(20, ("__kmpc_threadprivate: T#%d inserting private data\n",
558                   global_tid));
559     kmp_threadprivate_insert_private_data(global_tid, data, data, size);
560 
561     ret = data;
562   } else {
563     KC_TRACE(
564         50,
565         ("__kmpc_threadprivate: T#%d try to find private data at address %p\n",
566          global_tid, data));
567     tn = __kmp_threadprivate_find_task_common(
568         __kmp_threads[global_tid]->th.th_pri_common, global_tid, data);
569 
570     if (tn) {
571       KC_TRACE(20, ("__kmpc_threadprivate: T#%d found data\n", global_tid));
572 #ifdef USE_CHECKS_COMMON
573       if ((size_t)size > tn->cmn_size) {
574         KC_TRACE(10, ("THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC
575                       " ,%" KMP_UINTPTR_SPEC ")\n",
576                       data, size, tn->cmn_size));
577         KMP_FATAL(TPCommonBlocksInconsist);
578       }
579 #endif /* USE_CHECKS_COMMON */
580     } else {
581       /* The parallel address will NEVER overlap with the data_address */
582       /* dkp: 3rd arg to kmp_threadprivate_insert() is the data_address; use
583        * data_address = data */
584       KC_TRACE(20, ("__kmpc_threadprivate: T#%d inserting data\n", global_tid));
585       tn = kmp_threadprivate_insert(global_tid, data, data, size);
586     }
587 
588     ret = tn->par_addr;
589   }
590   KC_TRACE(10, ("__kmpc_threadprivate: T#%d exiting; return value = %p\n",
591                 global_tid, ret));
592 
593   return ret;
594 }
595 
596 static kmp_cached_addr_t *__kmp_find_cache(void *data) {
597   kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list;
598   while (ptr && ptr->data != data)
599     ptr = ptr->next;
600   return ptr;
601 }
602 
603 /*!
604  @ingroup THREADPRIVATE
605  @param loc source location information
606  @param global_tid  global thread number
607  @param data  pointer to data to privatize
608  @param size  size of data to privatize
609  @param cache  pointer to cache
610  @return pointer to private storage
611 
612  Allocate private storage for threadprivate data.
613 */
614 void *
615 __kmpc_threadprivate_cached(ident_t *loc,
616                             kmp_int32 global_tid, // gtid.
617                             void *data, // Pointer to original global variable.
618                             size_t size, // Size of original global variable.
619                             void ***cache) {
620   KC_TRACE(10, ("__kmpc_threadprivate_cached: T#%d called with cache: %p, "
621                 "address: %p, size: %" KMP_SIZE_T_SPEC "\n",
622                 global_tid, *cache, data, size));
623 
624   if (TCR_PTR(*cache) == 0) {
625     __kmp_acquire_lock(&__kmp_global_lock, global_tid);
626 
627     if (TCR_PTR(*cache) == 0) {
628       __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
629       // Compiler often passes in NULL cache, even if it's already been created
630       void **my_cache;
631       kmp_cached_addr_t *tp_cache_addr;
632       // Look for an existing cache
633       tp_cache_addr = __kmp_find_cache(data);
634       if (!tp_cache_addr) { // Cache was never created; do it now
635         __kmp_tp_cached = 1;
636         KMP_ITT_IGNORE(my_cache = (void **)__kmp_allocate(
637                            sizeof(void *) * __kmp_tp_capacity +
638                            sizeof(kmp_cached_addr_t)););
639         // No need to zero the allocated memory; __kmp_allocate does that.
640         KC_TRACE(50, ("__kmpc_threadprivate_cached: T#%d allocated cache at "
641                       "address %p\n",
642                       global_tid, my_cache));
643         /* TODO: free all this memory in __kmp_common_destroy using
644          * __kmp_threadpriv_cache_list */
645         /* Add address of mycache to linked list for cleanup later  */
646         tp_cache_addr = (kmp_cached_addr_t *)&my_cache[__kmp_tp_capacity];
647         tp_cache_addr->addr = my_cache;
648         tp_cache_addr->data = data;
649         tp_cache_addr->compiler_cache = cache;
650         tp_cache_addr->next = __kmp_threadpriv_cache_list;
651         __kmp_threadpriv_cache_list = tp_cache_addr;
652       } else { // A cache was already created; use it
653         my_cache = tp_cache_addr->addr;
654         tp_cache_addr->compiler_cache = cache;
655       }
656       KMP_MB();
657 
658       TCW_PTR(*cache, my_cache);
659       __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
660 
661       KMP_MB();
662     }
663     __kmp_release_lock(&__kmp_global_lock, global_tid);
664   }
665 
666   void *ret;
667   if ((ret = TCR_PTR((*cache)[global_tid])) == 0) {
668     ret = __kmpc_threadprivate(loc, global_tid, data, (size_t)size);
669 
670     TCW_PTR((*cache)[global_tid], ret);
671   }
672   KC_TRACE(10,
673            ("__kmpc_threadprivate_cached: T#%d exiting; return value = %p\n",
674             global_tid, ret));
675   return ret;
676 }
677 
678 // This function should only be called when both __kmp_tp_cached_lock and
679 // kmp_forkjoin_lock are held.
680 void __kmp_threadprivate_resize_cache(int newCapacity) {
681   KC_TRACE(10, ("__kmp_threadprivate_resize_cache: called with size: %d\n",
682                 newCapacity));
683 
684   kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list;
685 
686   while (ptr) {
687     if (ptr->data) { // this location has an active cache; resize it
688       void **my_cache;
689       KMP_ITT_IGNORE(my_cache =
690                          (void **)__kmp_allocate(sizeof(void *) * newCapacity +
691                                                  sizeof(kmp_cached_addr_t)););
692       // No need to zero the allocated memory; __kmp_allocate does that.
693       KC_TRACE(50, ("__kmp_threadprivate_resize_cache: allocated cache at %p\n",
694                     my_cache));
695       // Now copy old cache into new cache
696       void **old_cache = ptr->addr;
697       for (int i = 0; i < __kmp_tp_capacity; ++i) {
698         my_cache[i] = old_cache[i];
699       }
700 
701       // Add address of new my_cache to linked list for cleanup later
702       kmp_cached_addr_t *tp_cache_addr;
703       tp_cache_addr = (kmp_cached_addr_t *)&my_cache[newCapacity];
704       tp_cache_addr->addr = my_cache;
705       tp_cache_addr->data = ptr->data;
706       tp_cache_addr->compiler_cache = ptr->compiler_cache;
707       tp_cache_addr->next = __kmp_threadpriv_cache_list;
708       __kmp_threadpriv_cache_list = tp_cache_addr;
709 
710       // Copy new cache to compiler's location: We can copy directly
711       // to (*compiler_cache) if compiler guarantees it will keep
712       // using the same location for the cache. This is not yet true
713       // for some compilers, in which case we have to check if
714       // compiler_cache is still pointing at old cache, and if so, we
715       // can point it at the new cache with an atomic compare&swap
716       // operation. (Old method will always work, but we should shift
717       // to new method (commented line below) when Intel and Clang
718       // compilers use new method.)
719       (void)KMP_COMPARE_AND_STORE_PTR(tp_cache_addr->compiler_cache, old_cache,
720                                       my_cache);
721       // TCW_PTR(*(tp_cache_addr->compiler_cache), my_cache);
722 
723       // If the store doesn't happen here, the compiler's old behavior will
724       // inevitably call __kmpc_threadprivate_cache with a new location for the
725       // cache, and that function will store the resized cache there at that
726       // point.
727 
728       // Nullify old cache's data pointer so we skip it next time
729       ptr->data = NULL;
730     }
731     ptr = ptr->next;
732   }
733   // After all caches are resized, update __kmp_tp_capacity to the new size
734   *(volatile int *)&__kmp_tp_capacity = newCapacity;
735 }
736 
737 /*!
738  @ingroup THREADPRIVATE
739  @param loc source location information
740  @param data  pointer to data being privatized
741  @param ctor  pointer to constructor function for data
742  @param cctor  pointer to copy constructor function for data
743  @param dtor  pointer to destructor function for data
744  @param vector_length length of the vector (bytes or elements?)
745  Register vector constructors and destructors for thread private data.
746 */
747 void __kmpc_threadprivate_register_vec(ident_t *loc, void *data,
748                                        kmpc_ctor_vec ctor, kmpc_cctor_vec cctor,
749                                        kmpc_dtor_vec dtor,
750                                        size_t vector_length) {
751   struct shared_common *d_tn, **lnk_tn;
752 
753   KC_TRACE(10, ("__kmpc_threadprivate_register_vec: called\n"));
754 
755 #ifdef USE_CHECKS_COMMON
756   /* copy constructor must be zero for current code gen (Nov 2002 - jph) */
757   KMP_ASSERT(cctor == 0);
758 #endif /* USE_CHECKS_COMMON */
759 
760   d_tn = __kmp_find_shared_task_common(
761       &__kmp_threadprivate_d_table, -1,
762       data); /* Only the global data table exists. */
763 
764   if (d_tn == 0) {
765     d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
766     d_tn->gbl_addr = data;
767 
768     d_tn->ct.ctorv = ctor;
769     d_tn->cct.cctorv = cctor;
770     d_tn->dt.dtorv = dtor;
771     d_tn->is_vec = TRUE;
772     d_tn->vec_len = (size_t)vector_length;
773     // d_tn->obj_init = 0;  // AC: __kmp_allocate zeroes the memory
774     // d_tn->pod_init = 0;
775     lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(data)]);
776 
777     d_tn->next = *lnk_tn;
778     *lnk_tn = d_tn;
779   }
780 }
781 
782 void __kmp_cleanup_threadprivate_caches() {
783   kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list;
784 
785   while (ptr) {
786     void **cache = ptr->addr;
787     __kmp_threadpriv_cache_list = ptr->next;
788     if (*ptr->compiler_cache)
789       *ptr->compiler_cache = NULL;
790     ptr->compiler_cache = NULL;
791     ptr->data = NULL;
792     ptr->addr = NULL;
793     ptr->next = NULL;
794     // Threadprivate data pointed at by cache entries are destroyed at end of
795     // __kmp_launch_thread with __kmp_common_destroy_gtid.
796     __kmp_free(cache); // implicitly frees ptr too
797     ptr = __kmp_threadpriv_cache_list;
798   }
799 }
800