1 /* 2 * Copyright © 2017 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25 #include <linux/slab.h> 26 27 #include "i915_syncmap.h" 28 29 #include "i915_gem.h" /* GEM_BUG_ON() */ 30 #include "i915_selftest.h" 31 32 #define SHIFT ilog2(KSYNCMAP) 33 #define MASK (KSYNCMAP - 1) 34 35 /* 36 * struct i915_syncmap is a layer of a radixtree that maps a u64 fence 37 * context id to the last u32 fence seqno waited upon from that context. 38 * Unlike lib/radixtree it uses a parent pointer that allows traversal back to 39 * the root. This allows us to access the whole tree via a single pointer 40 * to the most recently used layer. We expect fence contexts to be dense 41 * and most reuse to be on the same i915_gem_context but on neighbouring 42 * engines (i.e. on adjacent contexts) and reuse the same leaf, a very 43 * effective lookup cache. If the new lookup is not on the same leaf, we 44 * expect it to be on the neighbouring branch. 45 * 46 * A leaf holds an array of u32 seqno, and has height 0. The bitmap field 47 * allows us to store whether a particular seqno is valid (i.e. allows us 48 * to distinguish unset from 0). 49 * 50 * A branch holds an array of layer pointers, and has height > 0, and always 51 * has at least 2 layers (either branches or leaves) below it. 52 * 53 * For example, 54 * for x in 55 * 0 1 2 0x10 0x11 0x200 0x201 56 * 0x500000 0x500001 0x503000 0x503001 57 * 0xE<<60: 58 * i915_syncmap_set(&sync, x, lower_32_bits(x)); 59 * will build a tree like: 60 * 0xXXXXXXXXXXXXXXXX 61 * 0-> 0x0000000000XXXXXX 62 * | 0-> 0x0000000000000XXX 63 * | | 0-> 0x00000000000000XX 64 * | | | 0-> 0x000000000000000X 0:0, 1:1, 2:2 65 * | | | 1-> 0x000000000000001X 0:10, 1:11 66 * | | 2-> 0x000000000000020X 0:200, 1:201 67 * | 5-> 0x000000000050XXXX 68 * | 0-> 0x000000000050000X 0:500000, 1:500001 69 * | 3-> 0x000000000050300X 0:503000, 1:503001 70 * e-> 0xe00000000000000X e:e 71 */ 72 73 struct i915_syncmap { 74 u64 prefix; 75 unsigned int height; 76 unsigned int bitmap; 77 struct i915_syncmap *parent; 78 union { 79 DECLARE_FLEX_ARRAY(u32, seqno); 80 DECLARE_FLEX_ARRAY(struct i915_syncmap *, child); 81 }; 82 }; 83 84 /** 85 * i915_syncmap_init -- initialise the #i915_syncmap 86 * @root: pointer to the #i915_syncmap 87 */ 88 void i915_syncmap_init(struct i915_syncmap **root) 89 { 90 BUILD_BUG_ON_NOT_POWER_OF_2(KSYNCMAP); 91 BUILD_BUG_ON_NOT_POWER_OF_2(SHIFT); 92 BUILD_BUG_ON(KSYNCMAP > BITS_PER_TYPE((*root)->bitmap)); 93 *root = NULL; 94 } 95 96 static inline u32 *__sync_seqno(struct i915_syncmap *p) 97 { 98 GEM_BUG_ON(p->height); 99 return p->seqno; 100 } 101 102 static inline struct i915_syncmap **__sync_child(struct i915_syncmap *p) 103 { 104 GEM_BUG_ON(!p->height); 105 return p->child; 106 } 107 108 static inline unsigned int 109 __sync_branch_idx(const struct i915_syncmap *p, u64 id) 110 { 111 return (id >> p->height) & MASK; 112 } 113 114 static inline unsigned int 115 __sync_leaf_idx(const struct i915_syncmap *p, u64 id) 116 { 117 GEM_BUG_ON(p->height); 118 return id & MASK; 119 } 120 121 static inline u64 __sync_branch_prefix(const struct i915_syncmap *p, u64 id) 122 { 123 return id >> p->height >> SHIFT; 124 } 125 126 static inline u64 __sync_leaf_prefix(const struct i915_syncmap *p, u64 id) 127 { 128 GEM_BUG_ON(p->height); 129 return id >> SHIFT; 130 } 131 132 static inline bool seqno_later(u32 a, u32 b) 133 { 134 return (s32)(a - b) >= 0; 135 } 136 137 /** 138 * i915_syncmap_is_later -- compare against the last know sync point 139 * @root: pointer to the #i915_syncmap 140 * @id: the context id (other timeline) we are synchronising to 141 * @seqno: the sequence number along the other timeline 142 * 143 * If we have already synchronised this @root timeline with another (@id) then 144 * we can omit any repeated or earlier synchronisation requests. If the two 145 * timelines are already coupled, we can also omit the dependency between the 146 * two as that is already known via the timeline. 147 * 148 * Returns true if the two timelines are already synchronised wrt to @seqno, 149 * false if not and the synchronisation must be emitted. 150 */ 151 bool i915_syncmap_is_later(struct i915_syncmap **root, u64 id, u32 seqno) 152 { 153 struct i915_syncmap *p; 154 unsigned int idx; 155 156 p = *root; 157 if (!p) 158 return false; 159 160 if (likely(__sync_leaf_prefix(p, id) == p->prefix)) 161 goto found; 162 163 /* First climb the tree back to a parent branch */ 164 do { 165 p = p->parent; 166 if (!p) 167 return false; 168 169 if (__sync_branch_prefix(p, id) == p->prefix) 170 break; 171 } while (1); 172 173 /* And then descend again until we find our leaf */ 174 do { 175 if (!p->height) 176 break; 177 178 p = __sync_child(p)[__sync_branch_idx(p, id)]; 179 if (!p) 180 return false; 181 182 if (__sync_branch_prefix(p, id) != p->prefix) 183 return false; 184 } while (1); 185 186 *root = p; 187 found: 188 idx = __sync_leaf_idx(p, id); 189 if (!(p->bitmap & BIT(idx))) 190 return false; 191 192 return seqno_later(__sync_seqno(p)[idx], seqno); 193 } 194 195 static struct i915_syncmap * 196 __sync_alloc_leaf(struct i915_syncmap *parent, u64 id) 197 { 198 struct i915_syncmap *p; 199 200 p = kmalloc(struct_size(p, seqno, KSYNCMAP), GFP_KERNEL); 201 if (unlikely(!p)) 202 return NULL; 203 204 p->parent = parent; 205 p->height = 0; 206 p->bitmap = 0; 207 p->prefix = __sync_leaf_prefix(p, id); 208 return p; 209 } 210 211 static inline void __sync_set_seqno(struct i915_syncmap *p, u64 id, u32 seqno) 212 { 213 unsigned int idx = __sync_leaf_idx(p, id); 214 215 p->bitmap |= BIT(idx); 216 __sync_seqno(p)[idx] = seqno; 217 } 218 219 static inline void __sync_set_child(struct i915_syncmap *p, 220 unsigned int idx, 221 struct i915_syncmap *child) 222 { 223 p->bitmap |= BIT(idx); 224 __sync_child(p)[idx] = child; 225 } 226 227 static noinline int __sync_set(struct i915_syncmap **root, u64 id, u32 seqno) 228 { 229 struct i915_syncmap *p = *root; 230 unsigned int idx; 231 232 if (!p) { 233 p = __sync_alloc_leaf(NULL, id); 234 if (unlikely(!p)) 235 return -ENOMEM; 236 237 goto found; 238 } 239 240 /* Caller handled the likely cached case */ 241 GEM_BUG_ON(__sync_leaf_prefix(p, id) == p->prefix); 242 243 /* Climb back up the tree until we find a common prefix */ 244 do { 245 if (!p->parent) 246 break; 247 248 p = p->parent; 249 250 if (__sync_branch_prefix(p, id) == p->prefix) 251 break; 252 } while (1); 253 254 /* 255 * No shortcut, we have to descend the tree to find the right layer 256 * containing this fence. 257 * 258 * Each layer in the tree holds 16 (KSYNCMAP) pointers, either fences 259 * or lower layers. Leaf nodes (height = 0) contain the fences, all 260 * other nodes (height > 0) are internal layers that point to a lower 261 * node. Each internal layer has at least 2 descendents. 262 * 263 * Starting at the top, we check whether the current prefix matches. If 264 * it doesn't, we have gone past our target and need to insert a join 265 * into the tree, and a new leaf node for the target as a descendent 266 * of the join, as well as the original layer. 267 * 268 * The matching prefix means we are still following the right branch 269 * of the tree. If it has height 0, we have found our leaf and just 270 * need to replace the fence slot with ourselves. If the height is 271 * not zero, our slot contains the next layer in the tree (unless 272 * it is empty, in which case we can add ourselves as a new leaf). 273 * As descend the tree the prefix grows (and height decreases). 274 */ 275 do { 276 struct i915_syncmap *next; 277 278 if (__sync_branch_prefix(p, id) != p->prefix) { 279 unsigned int above; 280 281 /* Insert a join above the current layer */ 282 next = kzalloc(struct_size(next, child, KSYNCMAP), 283 GFP_KERNEL); 284 if (unlikely(!next)) 285 return -ENOMEM; 286 287 /* Compute the height at which these two diverge */ 288 above = fls64(__sync_branch_prefix(p, id) ^ p->prefix); 289 above = round_up(above, SHIFT); 290 next->height = above + p->height; 291 next->prefix = __sync_branch_prefix(next, id); 292 293 /* Insert the join into the parent */ 294 if (p->parent) { 295 idx = __sync_branch_idx(p->parent, id); 296 __sync_child(p->parent)[idx] = next; 297 GEM_BUG_ON(!(p->parent->bitmap & BIT(idx))); 298 } 299 next->parent = p->parent; 300 301 /* Compute the idx of the other branch, not our id! */ 302 idx = p->prefix >> (above - SHIFT) & MASK; 303 __sync_set_child(next, idx, p); 304 p->parent = next; 305 306 /* Ascend to the join */ 307 p = next; 308 } else { 309 if (!p->height) 310 break; 311 } 312 313 /* Descend into the next layer */ 314 GEM_BUG_ON(!p->height); 315 idx = __sync_branch_idx(p, id); 316 next = __sync_child(p)[idx]; 317 if (!next) { 318 next = __sync_alloc_leaf(p, id); 319 if (unlikely(!next)) 320 return -ENOMEM; 321 322 __sync_set_child(p, idx, next); 323 p = next; 324 break; 325 } 326 327 p = next; 328 } while (1); 329 330 found: 331 GEM_BUG_ON(p->prefix != __sync_leaf_prefix(p, id)); 332 __sync_set_seqno(p, id, seqno); 333 *root = p; 334 return 0; 335 } 336 337 /** 338 * i915_syncmap_set -- mark the most recent syncpoint between contexts 339 * @root: pointer to the #i915_syncmap 340 * @id: the context id (other timeline) we have synchronised to 341 * @seqno: the sequence number along the other timeline 342 * 343 * When we synchronise this @root timeline with another (@id), we also know 344 * that we have synchronized with all previous seqno along that timeline. If 345 * we then have a request to synchronise with the same seqno or older, we can 346 * omit it, see i915_syncmap_is_later() 347 * 348 * Returns 0 on success, or a negative error code. 349 */ 350 int i915_syncmap_set(struct i915_syncmap **root, u64 id, u32 seqno) 351 { 352 struct i915_syncmap *p = *root; 353 354 /* 355 * We expect to be called in sequence following is_later(id), which 356 * should have preloaded the root for us. 357 */ 358 if (likely(p && __sync_leaf_prefix(p, id) == p->prefix)) { 359 __sync_set_seqno(p, id, seqno); 360 return 0; 361 } 362 363 return __sync_set(root, id, seqno); 364 } 365 366 static void __sync_free(struct i915_syncmap *p) 367 { 368 if (p->height) { 369 unsigned int i; 370 371 while ((i = ffs(p->bitmap))) { 372 p->bitmap &= ~0u << i; 373 __sync_free(__sync_child(p)[i - 1]); 374 } 375 } 376 377 kfree(p); 378 } 379 380 /** 381 * i915_syncmap_free -- free all memory associated with the syncmap 382 * @root: pointer to the #i915_syncmap 383 * 384 * Either when the timeline is to be freed and we no longer need the sync 385 * point tracking, or when the fences are all known to be signaled and the 386 * sync point tracking is redundant, we can free the #i915_syncmap to recover 387 * its allocations. 388 * 389 * Will reinitialise the @root pointer so that the #i915_syncmap is ready for 390 * reuse. 391 */ 392 void i915_syncmap_free(struct i915_syncmap **root) 393 { 394 struct i915_syncmap *p; 395 396 p = *root; 397 if (!p) 398 return; 399 400 while (p->parent) 401 p = p->parent; 402 403 __sync_free(p); 404 *root = NULL; 405 } 406 407 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 408 #include "selftests/i915_syncmap.c" 409 #endif 410