1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Asynchronous RAID-6 recovery calculations ASYNC_TX API. 4 * Copyright(c) 2009 Intel Corporation 5 * 6 * based on raid6recov.c: 7 * Copyright 2002 H. Peter Anvin 8 */ 9 #include <linux/kernel.h> 10 #include <linux/interrupt.h> 11 #include <linux/module.h> 12 #include <linux/dma-mapping.h> 13 #include <linux/raid/pq.h> 14 #include <linux/raid/pq_tables.h> 15 #include <linux/async_tx.h> 16 #include <linux/dmaengine.h> 17 18 static struct dma_async_tx_descriptor * 19 async_sum_product(struct page *dest, unsigned int d_off, 20 struct page **srcs, unsigned int *src_offs, unsigned char *coef, 21 size_t len, struct async_submit_ctl *submit) 22 { 23 struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ, 24 &dest, 1, srcs, 2, len); 25 struct dma_device *dma = chan ? chan->device : NULL; 26 struct dmaengine_unmap_data *unmap = NULL; 27 const u8 *amul, *bmul; 28 u8 ax, bx; 29 u8 *a, *b, *c; 30 31 if (dma) 32 unmap = dmaengine_get_unmap_data(dma->dev, 3, GFP_NOWAIT); 33 34 if (unmap) { 35 struct device *dev = dma->dev; 36 dma_addr_t pq[2]; 37 struct dma_async_tx_descriptor *tx; 38 enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P; 39 40 if (submit->flags & ASYNC_TX_FENCE) 41 dma_flags |= DMA_PREP_FENCE; 42 unmap->addr[0] = dma_map_page(dev, srcs[0], src_offs[0], 43 len, DMA_TO_DEVICE); 44 unmap->addr[1] = dma_map_page(dev, srcs[1], src_offs[1], 45 len, DMA_TO_DEVICE); 46 unmap->to_cnt = 2; 47 48 unmap->addr[2] = dma_map_page(dev, dest, d_off, 49 len, DMA_BIDIRECTIONAL); 50 unmap->bidi_cnt = 1; 51 /* engine only looks at Q, but expects it to follow P */ 52 pq[1] = unmap->addr[2]; 53 54 unmap->len = len; 55 tx = dma->device_prep_dma_pq(chan, pq, unmap->addr, 2, coef, 56 len, dma_flags); 57 if (tx) { 58 dma_set_unmap(tx, unmap); 59 async_tx_submit(chan, tx, submit); 60 dmaengine_unmap_put(unmap); 61 return tx; 62 } 63 64 /* could not get a descriptor, unmap and fall through to 65 * the synchronous path 66 */ 67 dmaengine_unmap_put(unmap); 68 } 69 70 /* run the operation synchronously */ 71 async_tx_quiesce(&submit->depend_tx); 72 amul = raid6_gfmul[coef[0]]; 73 bmul = raid6_gfmul[coef[1]]; 74 a = page_address(srcs[0]) + src_offs[0]; 75 b = page_address(srcs[1]) + src_offs[1]; 76 c = page_address(dest) + d_off; 77 78 while (len--) { 79 ax = amul[*a++]; 80 bx = bmul[*b++]; 81 *c++ = ax ^ bx; 82 } 83 84 return NULL; 85 } 86 87 static struct dma_async_tx_descriptor * 88 async_mult(struct page *dest, unsigned int d_off, struct page *src, 89 unsigned int s_off, u8 coef, size_t len, 90 struct async_submit_ctl *submit) 91 { 92 struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ, 93 &dest, 1, &src, 1, len); 94 struct dma_device *dma = chan ? chan->device : NULL; 95 struct dmaengine_unmap_data *unmap = NULL; 96 const u8 *qmul; /* Q multiplier table */ 97 u8 *d, *s; 98 99 if (dma) 100 unmap = dmaengine_get_unmap_data(dma->dev, 3, GFP_NOWAIT); 101 102 if (unmap) { 103 dma_addr_t dma_dest[2]; 104 struct device *dev = dma->dev; 105 struct dma_async_tx_descriptor *tx; 106 enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P; 107 108 if (submit->flags & ASYNC_TX_FENCE) 109 dma_flags |= DMA_PREP_FENCE; 110 unmap->addr[0] = dma_map_page(dev, src, s_off, 111 len, DMA_TO_DEVICE); 112 unmap->to_cnt++; 113 unmap->addr[1] = dma_map_page(dev, dest, d_off, 114 len, DMA_BIDIRECTIONAL); 115 dma_dest[1] = unmap->addr[1]; 116 unmap->bidi_cnt++; 117 unmap->len = len; 118 119 /* this looks funny, but the engine looks for Q at 120 * dma_dest[1] and ignores dma_dest[0] as a dest 121 * due to DMA_PREP_PQ_DISABLE_P 122 */ 123 tx = dma->device_prep_dma_pq(chan, dma_dest, unmap->addr, 124 1, &coef, len, dma_flags); 125 126 if (tx) { 127 dma_set_unmap(tx, unmap); 128 dmaengine_unmap_put(unmap); 129 async_tx_submit(chan, tx, submit); 130 return tx; 131 } 132 133 /* could not get a descriptor, unmap and fall through to 134 * the synchronous path 135 */ 136 dmaengine_unmap_put(unmap); 137 } 138 139 /* no channel available, or failed to allocate a descriptor, so 140 * perform the operation synchronously 141 */ 142 async_tx_quiesce(&submit->depend_tx); 143 qmul = raid6_gfmul[coef]; 144 d = page_address(dest) + d_off; 145 s = page_address(src) + s_off; 146 147 while (len--) 148 *d++ = qmul[*s++]; 149 150 return NULL; 151 } 152 153 static struct dma_async_tx_descriptor * 154 __2data_recov_4(int disks, size_t bytes, int faila, int failb, 155 struct page **blocks, unsigned int *offs, 156 struct async_submit_ctl *submit) 157 { 158 struct dma_async_tx_descriptor *tx = NULL; 159 struct page *p, *q, *a, *b; 160 unsigned int p_off, q_off, a_off, b_off; 161 struct page *srcs[2]; 162 unsigned int src_offs[2]; 163 unsigned char coef[2]; 164 enum async_tx_flags flags = submit->flags; 165 dma_async_tx_callback cb_fn = submit->cb_fn; 166 void *cb_param = submit->cb_param; 167 void *scribble = submit->scribble; 168 169 p = blocks[disks-2]; 170 p_off = offs[disks-2]; 171 q = blocks[disks-1]; 172 q_off = offs[disks-1]; 173 174 a = blocks[faila]; 175 a_off = offs[faila]; 176 b = blocks[failb]; 177 b_off = offs[failb]; 178 179 /* in the 4 disk case P + Pxy == P and Q + Qxy == Q */ 180 /* Dx = A*(P+Pxy) + B*(Q+Qxy) */ 181 srcs[0] = p; 182 src_offs[0] = p_off; 183 srcs[1] = q; 184 src_offs[1] = q_off; 185 coef[0] = raid6_gfexi[failb-faila]; 186 coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; 187 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); 188 tx = async_sum_product(b, b_off, srcs, src_offs, coef, bytes, submit); 189 190 /* Dy = P+Pxy+Dx */ 191 srcs[0] = p; 192 src_offs[0] = p_off; 193 srcs[1] = b; 194 src_offs[1] = b_off; 195 init_async_submit(submit, flags | ASYNC_TX_XOR_ZERO_DST, tx, cb_fn, 196 cb_param, scribble); 197 tx = async_xor_offs(a, a_off, srcs, src_offs, 2, bytes, submit); 198 199 return tx; 200 201 } 202 203 static struct dma_async_tx_descriptor * 204 __2data_recov_5(int disks, size_t bytes, int faila, int failb, 205 struct page **blocks, unsigned int *offs, 206 struct async_submit_ctl *submit) 207 { 208 struct dma_async_tx_descriptor *tx = NULL; 209 struct page *p, *q, *g, *dp, *dq; 210 unsigned int p_off, q_off, g_off, dp_off, dq_off; 211 struct page *srcs[2]; 212 unsigned int src_offs[2]; 213 unsigned char coef[2]; 214 enum async_tx_flags flags = submit->flags; 215 dma_async_tx_callback cb_fn = submit->cb_fn; 216 void *cb_param = submit->cb_param; 217 void *scribble = submit->scribble; 218 int good_srcs, good, i; 219 220 good_srcs = 0; 221 good = -1; 222 for (i = 0; i < disks-2; i++) { 223 if (blocks[i] == NULL) 224 continue; 225 if (i == faila || i == failb) 226 continue; 227 good = i; 228 good_srcs++; 229 } 230 BUG_ON(good_srcs > 1); 231 232 p = blocks[disks-2]; 233 p_off = offs[disks-2]; 234 q = blocks[disks-1]; 235 q_off = offs[disks-1]; 236 g = blocks[good]; 237 g_off = offs[good]; 238 239 /* Compute syndrome with zero for the missing data pages 240 * Use the dead data pages as temporary storage for delta p and 241 * delta q 242 */ 243 dp = blocks[faila]; 244 dp_off = offs[faila]; 245 dq = blocks[failb]; 246 dq_off = offs[failb]; 247 248 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); 249 tx = async_memcpy(dp, g, dp_off, g_off, bytes, submit); 250 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); 251 tx = async_mult(dq, dq_off, g, g_off, 252 raid6_gfexp[good], bytes, submit); 253 254 /* compute P + Pxy */ 255 srcs[0] = dp; 256 src_offs[0] = dp_off; 257 srcs[1] = p; 258 src_offs[1] = p_off; 259 init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, 260 NULL, NULL, scribble); 261 tx = async_xor_offs(dp, dp_off, srcs, src_offs, 2, bytes, submit); 262 263 /* compute Q + Qxy */ 264 srcs[0] = dq; 265 src_offs[0] = dq_off; 266 srcs[1] = q; 267 src_offs[1] = q_off; 268 init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, 269 NULL, NULL, scribble); 270 tx = async_xor_offs(dq, dq_off, srcs, src_offs, 2, bytes, submit); 271 272 /* Dx = A*(P+Pxy) + B*(Q+Qxy) */ 273 srcs[0] = dp; 274 src_offs[0] = dp_off; 275 srcs[1] = dq; 276 src_offs[1] = dq_off; 277 coef[0] = raid6_gfexi[failb-faila]; 278 coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; 279 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); 280 tx = async_sum_product(dq, dq_off, srcs, src_offs, coef, bytes, submit); 281 282 /* Dy = P+Pxy+Dx */ 283 srcs[0] = dp; 284 src_offs[0] = dp_off; 285 srcs[1] = dq; 286 src_offs[1] = dq_off; 287 init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn, 288 cb_param, scribble); 289 tx = async_xor_offs(dp, dp_off, srcs, src_offs, 2, bytes, submit); 290 291 return tx; 292 } 293 294 static struct dma_async_tx_descriptor * 295 __2data_recov_n(int disks, size_t bytes, int faila, int failb, 296 struct page **blocks, unsigned int *offs, 297 struct async_submit_ctl *submit) 298 { 299 struct dma_async_tx_descriptor *tx = NULL; 300 struct page *p, *q, *dp, *dq; 301 unsigned int p_off, q_off, dp_off, dq_off; 302 struct page *srcs[2]; 303 unsigned int src_offs[2]; 304 unsigned char coef[2]; 305 enum async_tx_flags flags = submit->flags; 306 dma_async_tx_callback cb_fn = submit->cb_fn; 307 void *cb_param = submit->cb_param; 308 void *scribble = submit->scribble; 309 310 p = blocks[disks-2]; 311 p_off = offs[disks-2]; 312 q = blocks[disks-1]; 313 q_off = offs[disks-1]; 314 315 /* Compute syndrome with zero for the missing data pages 316 * Use the dead data pages as temporary storage for 317 * delta p and delta q 318 */ 319 dp = blocks[faila]; 320 dp_off = offs[faila]; 321 blocks[faila] = NULL; 322 blocks[disks-2] = dp; 323 offs[disks-2] = dp_off; 324 dq = blocks[failb]; 325 dq_off = offs[failb]; 326 blocks[failb] = NULL; 327 blocks[disks-1] = dq; 328 offs[disks-1] = dq_off; 329 330 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); 331 tx = async_gen_syndrome(blocks, offs, disks, bytes, submit); 332 333 /* Restore pointer table */ 334 blocks[faila] = dp; 335 offs[faila] = dp_off; 336 blocks[failb] = dq; 337 offs[failb] = dq_off; 338 blocks[disks-2] = p; 339 offs[disks-2] = p_off; 340 blocks[disks-1] = q; 341 offs[disks-1] = q_off; 342 343 /* compute P + Pxy */ 344 srcs[0] = dp; 345 src_offs[0] = dp_off; 346 srcs[1] = p; 347 src_offs[1] = p_off; 348 init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, 349 NULL, NULL, scribble); 350 tx = async_xor_offs(dp, dp_off, srcs, src_offs, 2, bytes, submit); 351 352 /* compute Q + Qxy */ 353 srcs[0] = dq; 354 src_offs[0] = dq_off; 355 srcs[1] = q; 356 src_offs[1] = q_off; 357 init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, 358 NULL, NULL, scribble); 359 tx = async_xor_offs(dq, dq_off, srcs, src_offs, 2, bytes, submit); 360 361 /* Dx = A*(P+Pxy) + B*(Q+Qxy) */ 362 srcs[0] = dp; 363 src_offs[0] = dp_off; 364 srcs[1] = dq; 365 src_offs[1] = dq_off; 366 coef[0] = raid6_gfexi[failb-faila]; 367 coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; 368 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); 369 tx = async_sum_product(dq, dq_off, srcs, src_offs, coef, bytes, submit); 370 371 /* Dy = P+Pxy+Dx */ 372 srcs[0] = dp; 373 src_offs[0] = dp_off; 374 srcs[1] = dq; 375 src_offs[1] = dq_off; 376 init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn, 377 cb_param, scribble); 378 tx = async_xor_offs(dp, dp_off, srcs, src_offs, 2, bytes, submit); 379 380 return tx; 381 } 382 383 /** 384 * async_raid6_2data_recov - asynchronously calculate two missing data blocks 385 * @disks: number of disks in the RAID-6 array 386 * @bytes: block size 387 * @faila: first failed drive index 388 * @failb: second failed drive index 389 * @blocks: array of source pointers where the last two entries are p and q 390 * @offs: array of offset for pages in blocks 391 * @submit: submission/completion modifiers 392 */ 393 struct dma_async_tx_descriptor * 394 async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb, 395 struct page **blocks, unsigned int *offs, 396 struct async_submit_ctl *submit) 397 { 398 void *scribble = submit->scribble; 399 int non_zero_srcs, i; 400 401 BUG_ON(faila == failb); 402 if (failb < faila) 403 swap(faila, failb); 404 405 pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes); 406 407 /* if a dma resource is not available or a scribble buffer is not 408 * available punt to the synchronous path. In the 'dma not 409 * available' case be sure to use the scribble buffer to 410 * preserve the content of 'blocks' as the caller intended. 411 */ 412 if (!async_dma_find_channel(DMA_PQ) || !scribble) { 413 void **ptrs = scribble ? scribble : (void **) blocks; 414 415 async_tx_quiesce(&submit->depend_tx); 416 for (i = 0; i < disks; i++) 417 if (blocks[i] == NULL) 418 ptrs[i] = page_address(ZERO_PAGE(0)); 419 else 420 ptrs[i] = page_address(blocks[i]) + offs[i]; 421 422 raid6_recov_2data(disks, bytes, faila, failb, ptrs); 423 424 async_tx_sync_epilog(submit); 425 426 return NULL; 427 } 428 429 non_zero_srcs = 0; 430 for (i = 0; i < disks-2 && non_zero_srcs < 4; i++) 431 if (blocks[i]) 432 non_zero_srcs++; 433 switch (non_zero_srcs) { 434 case 0: 435 case 1: 436 /* There must be at least 2 sources - the failed devices. */ 437 BUG(); 438 439 case 2: 440 /* dma devices do not uniformly understand a zero source pq 441 * operation (in contrast to the synchronous case), so 442 * explicitly handle the special case of a 4 disk array with 443 * both data disks missing. 444 */ 445 return __2data_recov_4(disks, bytes, faila, failb, 446 blocks, offs, submit); 447 case 3: 448 /* dma devices do not uniformly understand a single 449 * source pq operation (in contrast to the synchronous 450 * case), so explicitly handle the special case of a 5 disk 451 * array with 2 of 3 data disks missing. 452 */ 453 return __2data_recov_5(disks, bytes, faila, failb, 454 blocks, offs, submit); 455 default: 456 return __2data_recov_n(disks, bytes, faila, failb, 457 blocks, offs, submit); 458 } 459 } 460 EXPORT_SYMBOL_GPL(async_raid6_2data_recov); 461 462 /** 463 * async_raid6_datap_recov - asynchronously calculate a data and the 'p' block 464 * @disks: number of disks in the RAID-6 array 465 * @bytes: block size 466 * @faila: failed drive index 467 * @blocks: array of source pointers where the last two entries are p and q 468 * @offs: array of offset for pages in blocks 469 * @submit: submission/completion modifiers 470 */ 471 struct dma_async_tx_descriptor * 472 async_raid6_datap_recov(int disks, size_t bytes, int faila, 473 struct page **blocks, unsigned int *offs, 474 struct async_submit_ctl *submit) 475 { 476 struct dma_async_tx_descriptor *tx = NULL; 477 struct page *p, *q, *dq; 478 unsigned int p_off, q_off, dq_off; 479 u8 coef; 480 enum async_tx_flags flags = submit->flags; 481 dma_async_tx_callback cb_fn = submit->cb_fn; 482 void *cb_param = submit->cb_param; 483 void *scribble = submit->scribble; 484 int good_srcs, good, i; 485 struct page *srcs[2]; 486 unsigned int src_offs[2]; 487 488 pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes); 489 490 /* if a dma resource is not available or a scribble buffer is not 491 * available punt to the synchronous path. In the 'dma not 492 * available' case be sure to use the scribble buffer to 493 * preserve the content of 'blocks' as the caller intended. 494 */ 495 if (!async_dma_find_channel(DMA_PQ) || !scribble) { 496 void **ptrs = scribble ? scribble : (void **) blocks; 497 498 async_tx_quiesce(&submit->depend_tx); 499 for (i = 0; i < disks; i++) 500 if (blocks[i] == NULL) 501 ptrs[i] = page_address(ZERO_PAGE(0)); 502 else 503 ptrs[i] = page_address(blocks[i]) + offs[i]; 504 505 raid6_recov_datap(disks, bytes, faila, ptrs); 506 507 async_tx_sync_epilog(submit); 508 509 return NULL; 510 } 511 512 good_srcs = 0; 513 good = -1; 514 for (i = 0; i < disks-2; i++) { 515 if (i == faila) 516 continue; 517 if (blocks[i]) { 518 good = i; 519 good_srcs++; 520 if (good_srcs > 1) 521 break; 522 } 523 } 524 BUG_ON(good_srcs == 0); 525 526 p = blocks[disks-2]; 527 p_off = offs[disks-2]; 528 q = blocks[disks-1]; 529 q_off = offs[disks-1]; 530 531 /* Compute syndrome with zero for the missing data page 532 * Use the dead data page as temporary storage for delta q 533 */ 534 dq = blocks[faila]; 535 dq_off = offs[faila]; 536 blocks[faila] = NULL; 537 blocks[disks-1] = dq; 538 offs[disks-1] = dq_off; 539 540 /* in the 4-disk case we only need to perform a single source 541 * multiplication with the one good data block. 542 */ 543 if (good_srcs == 1) { 544 struct page *g = blocks[good]; 545 unsigned int g_off = offs[good]; 546 547 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, 548 scribble); 549 tx = async_memcpy(p, g, p_off, g_off, bytes, submit); 550 551 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, 552 scribble); 553 tx = async_mult(dq, dq_off, g, g_off, 554 raid6_gfexp[good], bytes, submit); 555 } else { 556 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, 557 scribble); 558 tx = async_gen_syndrome(blocks, offs, disks, bytes, submit); 559 } 560 561 /* Restore pointer table */ 562 blocks[faila] = dq; 563 offs[faila] = dq_off; 564 blocks[disks-1] = q; 565 offs[disks-1] = q_off; 566 567 /* calculate g^{-faila} */ 568 coef = raid6_gfinv[raid6_gfexp[faila]]; 569 570 srcs[0] = dq; 571 src_offs[0] = dq_off; 572 srcs[1] = q; 573 src_offs[1] = q_off; 574 init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, 575 NULL, NULL, scribble); 576 tx = async_xor_offs(dq, dq_off, srcs, src_offs, 2, bytes, submit); 577 578 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); 579 tx = async_mult(dq, dq_off, dq, dq_off, coef, bytes, submit); 580 581 srcs[0] = p; 582 src_offs[0] = p_off; 583 srcs[1] = dq; 584 src_offs[1] = dq_off; 585 init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn, 586 cb_param, scribble); 587 tx = async_xor_offs(p, p_off, srcs, src_offs, 2, bytes, submit); 588 589 return tx; 590 } 591 EXPORT_SYMBOL_GPL(async_raid6_datap_recov); 592 593 MODULE_AUTHOR("Dan Williams <dan.j.williams@intel.com>"); 594 MODULE_DESCRIPTION("asynchronous RAID-6 recovery api"); 595 MODULE_LICENSE("GPL"); 596