1 /* 2 * Copyright 2011-2015 Samy Al Bahra. 3 * Copyright 2011 David Joseph. 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <ck_barrier.h> 29 #include <ck_cc.h> 30 #include <ck_pr.h> 31 #include <ck_spinlock.h> 32 33 struct ck_barrier_combining_queue { 34 struct ck_barrier_combining_group *head; 35 struct ck_barrier_combining_group *tail; 36 }; 37 38 CK_CC_INLINE static struct ck_barrier_combining_group * 39 ck_barrier_combining_queue_dequeue(struct ck_barrier_combining_queue *queue) 40 { 41 struct ck_barrier_combining_group *front = NULL; 42 43 if (queue->head != NULL) { 44 front = queue->head; 45 queue->head = queue->head->next; 46 } 47 48 return front; 49 } 50 51 CK_CC_INLINE static void 52 ck_barrier_combining_insert(struct ck_barrier_combining_group *parent, 53 struct ck_barrier_combining_group *tnode, 54 struct ck_barrier_combining_group **child) 55 { 56 57 *child = tnode; 58 tnode->parent = parent; 59 60 /* 61 * After inserting, we must increment the parent group's count for 62 * number of threads expected to reach it; otherwise, the 63 * barrier may end prematurely. 64 */ 65 parent->k++; 66 return; 67 } 68 69 /* 70 * This implementation of software combining tree barriers 71 * uses level order traversal to insert new thread groups 72 * into the barrier's tree. We use a queue to implement this 73 * traversal. 74 */ 75 CK_CC_INLINE static void 76 ck_barrier_combining_queue_enqueue(struct ck_barrier_combining_queue *queue, 77 struct ck_barrier_combining_group *node_value) 78 { 79 80 node_value->next = NULL; 81 if (queue->head == NULL) { 82 queue->head = queue->tail = node_value; 83 return; 84 } 85 86 queue->tail->next = node_value; 87 queue->tail = node_value; 88 89 return; 90 } 91 92 93 void 94 ck_barrier_combining_group_init(struct ck_barrier_combining *root, 95 struct ck_barrier_combining_group *tnode, 96 unsigned int nthr) 97 { 98 struct ck_barrier_combining_group *node; 99 struct ck_barrier_combining_queue queue; 100 101 queue.head = queue.tail = NULL; 102 103 tnode->k = nthr; 104 tnode->count = 0; 105 tnode->sense = 0; 106 tnode->left = tnode->right = NULL; 107 108 /* 109 * Finds the first available node for linkage into the combining 110 * tree. The use of a spinlock is excusable as this is a one-time 111 * initialization cost. 112 */ 113 ck_spinlock_fas_lock(&root->mutex); 114 ck_barrier_combining_queue_enqueue(&queue, root->root); 115 while (queue.head != NULL) { 116 node = ck_barrier_combining_queue_dequeue(&queue); 117 118 /* If the left child is free, link the group there. */ 119 if (node->left == NULL) { 120 ck_barrier_combining_insert(node, tnode, &node->left); 121 goto leave; 122 } 123 124 /* If the right child is free, link the group there. */ 125 if (node->right == NULL) { 126 ck_barrier_combining_insert(node, tnode, &node->right); 127 goto leave; 128 } 129 130 /* 131 * If unsuccessful, try inserting as a child of the children of the 132 * current node. 133 */ 134 ck_barrier_combining_queue_enqueue(&queue, node->left); 135 ck_barrier_combining_queue_enqueue(&queue, node->right); 136 } 137 138 leave: 139 ck_spinlock_fas_unlock(&root->mutex); 140 return; 141 } 142 143 void 144 ck_barrier_combining_init(struct ck_barrier_combining *root, 145 struct ck_barrier_combining_group *init_root) 146 { 147 148 init_root->k = 0; 149 init_root->count = 0; 150 init_root->sense = 0; 151 init_root->parent = init_root->left = init_root->right = NULL; 152 ck_spinlock_fas_init(&root->mutex); 153 root->root = init_root; 154 return; 155 } 156 157 static void 158 ck_barrier_combining_aux(struct ck_barrier_combining *barrier, 159 struct ck_barrier_combining_group *tnode, 160 unsigned int sense) 161 { 162 163 /* 164 * If this is the last thread in the group, it moves on to the parent group. 165 * Otherwise, it spins on this group's sense. 166 */ 167 if (ck_pr_faa_uint(&tnode->count, 1) == tnode->k - 1) { 168 /* 169 * If we are and will be the last thread entering the barrier for the 170 * current group then signal the parent group if one exists. 171 */ 172 if (tnode->parent != NULL) 173 ck_barrier_combining_aux(barrier, tnode->parent, sense); 174 175 /* 176 * Once the thread returns from its parent(s), it reinitializes the group's 177 * arrival count and signals other threads to continue by flipping the group 178 * sense. Order of these operations is not important since we assume a static 179 * number of threads are members of a barrier for the lifetime of the barrier. 180 * Since count is explicitly reinitialized, it is guaranteed that at any point 181 * tnode->count is equivalent to tnode->k if and only if that many threads 182 * are at the barrier. 183 */ 184 ck_pr_store_uint(&tnode->count, 0); 185 ck_pr_fence_store(); 186 ck_pr_store_uint(&tnode->sense, ~tnode->sense); 187 } else { 188 ck_pr_fence_memory(); 189 while (sense != ck_pr_load_uint(&tnode->sense)) 190 ck_pr_stall(); 191 } 192 193 return; 194 } 195 196 void 197 ck_barrier_combining(struct ck_barrier_combining *barrier, 198 struct ck_barrier_combining_group *tnode, 199 struct ck_barrier_combining_state *state) 200 { 201 202 ck_barrier_combining_aux(barrier, tnode, state->sense); 203 204 /* Reverse the execution context's sense for the next barrier. */ 205 state->sense = ~state->sense; 206 return; 207 } 208