1/* 2 * fuc microcode for g98 psec engine 3 * Copyright (C) 2010 Marcin Kościelnicki 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, write to the Free Software 17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 18 */ 19 20.section #g98_psec_data 21 22ctx_dma: 23ctx_dma_query: .b32 0 24ctx_dma_src: .b32 0 25ctx_dma_dst: .b32 0 26.equ #dma_count 3 27ctx_query_address_high: .b32 0 28ctx_query_address_low: .b32 0 29ctx_query_counter: .b32 0 30ctx_cond_address_high: .b32 0 31ctx_cond_address_low: .b32 0 32ctx_cond_off: .b32 0 33ctx_src_address_high: .b32 0 34ctx_src_address_low: .b32 0 35ctx_dst_address_high: .b32 0 36ctx_dst_address_low: .b32 0 37ctx_mode: .b32 0 38.align 16 39ctx_key: .skip 16 40ctx_iv: .skip 16 41 42.align 0x80 43swap: 44.skip 32 45 46.align 8 47common_cmd_dtable: 48.b32 #ctx_query_address_high + 0x20000 ~0xff 49.b32 #ctx_query_address_low + 0x20000 ~0xfffffff0 50.b32 #ctx_query_counter + 0x20000 ~0xffffffff 51.b32 #cmd_query_get + 0x00000 ~1 52.b32 #ctx_cond_address_high + 0x20000 ~0xff 53.b32 #ctx_cond_address_low + 0x20000 ~0xfffffff0 54.b32 #cmd_cond_mode + 0x00000 ~7 55.b32 #cmd_wrcache_flush + 0x00000 ~0 56.equ #common_cmd_max 0x88 57 58 59.align 8 60engine_cmd_dtable: 61.b32 #ctx_key + 0x0 + 0x20000 ~0xffffffff 62.b32 #ctx_key + 0x4 + 0x20000 ~0xffffffff 63.b32 #ctx_key + 0x8 + 0x20000 ~0xffffffff 64.b32 #ctx_key + 0xc + 0x20000 ~0xffffffff 65.b32 #ctx_iv + 0x0 + 0x20000 ~0xffffffff 66.b32 #ctx_iv + 0x4 + 0x20000 ~0xffffffff 67.b32 #ctx_iv + 0x8 + 0x20000 ~0xffffffff 68.b32 #ctx_iv + 0xc + 0x20000 ~0xffffffff 69.b32 #ctx_src_address_high + 0x20000 ~0xff 70.b32 #ctx_src_address_low + 0x20000 ~0xfffffff0 71.b32 #ctx_dst_address_high + 0x20000 ~0xff 72.b32 #ctx_dst_address_low + 0x20000 ~0xfffffff0 73.b32 #sec_cmd_mode + 0x00000 ~0xf 74.b32 #sec_cmd_length + 0x10000 ~0x0ffffff0 75.equ #engine_cmd_max 0xce 76 77.align 4 78sec_dtable: 79.b16 #sec_copy_prep #sec_do_inout 80.b16 #sec_store_prep #sec_do_out 81.b16 #sec_ecb_e_prep #sec_do_inout 82.b16 #sec_ecb_d_prep #sec_do_inout 83.b16 #sec_cbc_e_prep #sec_do_inout 84.b16 #sec_cbc_d_prep #sec_do_inout 85.b16 #sec_pcbc_e_prep #sec_do_inout 86.b16 #sec_pcbc_d_prep #sec_do_inout 87.b16 #sec_cfb_e_prep #sec_do_inout 88.b16 #sec_cfb_d_prep #sec_do_inout 89.b16 #sec_ofb_prep #sec_do_inout 90.b16 #sec_ctr_prep #sec_do_inout 91.b16 #sec_cbc_mac_prep #sec_do_in 92.b16 #sec_cmac_finish_complete_prep #sec_do_in 93.b16 #sec_cmac_finish_partial_prep #sec_do_in 94 95.align 0x100 96 97.section #g98_psec_code 98 99 // $r0 is always set to 0 in our code - this allows some space savings. 100 clear b32 $r0 101 102 // set up the interrupt handler 103 mov $r1 #ih 104 mov $iv0 $r1 105 106 // init stack pointer 107 mov $sp $r0 108 109 // set interrupt dispatch - route timer, fifo, ctxswitch to i0, others to host 110 movw $r1 0xfff0 111 sethi $r1 0 112 mov $r2 0x400 113 iowr I[$r2 + 0x300] $r1 114 115 // enable the interrupts 116 or $r1 0xc 117 iowr I[$r2] $r1 118 119 // enable fifo access and context switching 120 mov $r1 3 121 mov $r2 0x1200 122 iowr I[$r2] $r1 123 124 // enable i0 delivery 125 bset $flags ie0 126 127 // sleep forver, waking only for interrupts. 128 bset $flags $p0 129 spin: 130 sleep $p0 131 bra #spin 132 133// i0 handler 134ih: 135 // see which interrupts we got 136 iord $r1 I[$r0 + 0x200] 137 138 and $r2 $r1 0x8 139 cmpu b32 $r2 0 140 bra e #noctx 141 142 // context switch... prepare the regs for xfer 143 mov $r2 0x7700 144 mov $xtargets $r2 145 mov $xdbase $r0 146 // 128-byte context. 147 mov $r2 0 148 sethi $r2 0x50000 149 150 // read current channel 151 mov $r3 0x1400 152 iord $r4 I[$r3] 153 // if bit 30 set, it's active, so we have to unload it first. 154 shl b32 $r5 $r4 1 155 cmps b32 $r5 0 156 bra nc #ctxload 157 158 // unload the current channel - save the context 159 xdst $r0 $r2 160 xdwait 161 // and clear bit 30, then write back 162 bclr $r4 0x1e 163 iowr I[$r3] $r4 164 // tell PFIFO we unloaded 165 mov $r4 1 166 iowr I[$r3 + 0x200] $r4 167 168 bra #noctx 169 170 ctxload: 171 // no channel loaded - perhaps we're requested to load one 172 iord $r4 I[$r3 + 0x100] 173 shl b32 $r15 $r4 1 174 cmps b32 $r15 0 175 // if bit 30 of next channel not set, probably PFIFO is just 176 // killing a context. do a faux load, without the active bit. 177 bra nc #dummyload 178 179 // ok, do a real context load. 180 xdld $r0 $r2 181 xdwait 182 mov $r5 #ctx_dma 183 mov $r6 #dma_count - 1 184 ctxload_dma_loop: 185 ld b32 $r7 D[$r5 + $r6 * 4] 186 add b32 $r8 $r6 0x180 187 shl b32 $r8 8 188 iowr I[$r8] $r7 189 sub b32 $r6 1 190 bra nc #ctxload_dma_loop 191 192 dummyload: 193 // tell PFIFO we're done 194 mov $r5 2 195 iowr I[$r3 + 0x200] $r5 196 197 noctx: 198 and $r2 $r1 0x4 199 cmpu b32 $r2 0 200 bra e #nocmd 201 202 // incoming fifo command. 203 mov $r3 0x1900 204 iord $r2 I[$r3 + 0x100] 205 iord $r3 I[$r3] 206 // extract the method 207 and $r4 $r2 0x7ff 208 // shift the addr to proper position if we need to interrupt later 209 shl b32 $r2 0x10 210 211 // mthd 0 and 0x100 [NAME, NOP]: ignore 212 and $r5 $r4 0x7bf 213 cmpu b32 $r5 0 214 bra e #cmddone 215 216 mov $r5 #engine_cmd_dtable - 0xc0 * 8 217 mov $r6 #engine_cmd_max 218 cmpu b32 $r4 0xc0 219 bra nc #dtable_cmd 220 mov $r5 #common_cmd_dtable - 0x80 * 8 221 mov $r6 #common_cmd_max 222 cmpu b32 $r4 0x80 223 bra nc #dtable_cmd 224 cmpu b32 $r4 0x60 225 bra nc #dma_cmd 226 cmpu b32 $r4 0x50 227 bra ne #illegal_mthd 228 229 // mthd 0x140: PM_TRIGGER 230 mov $r2 0x2200 231 clear b32 $r3 232 sethi $r3 0x20000 233 iowr I[$r2] $r3 234 bra #cmddone 235 236 dma_cmd: 237 // mthd 0x180...: DMA_* 238 cmpu b32 $r4 0x60+#dma_count 239 bra nc #illegal_mthd 240 shl b32 $r5 $r4 2 241 add b32 $r5 ((#ctx_dma - 0x60 * 4) & 0xffff) 242 bset $r3 0x1e 243 st b32 D[$r5] $r3 244 add b32 $r4 0x180 - 0x60 245 shl b32 $r4 8 246 iowr I[$r4] $r3 247 bra #cmddone 248 249 dtable_cmd: 250 cmpu b32 $r4 $r6 251 bra nc #illegal_mthd 252 shl b32 $r4 3 253 add b32 $r4 $r5 254 ld b32 $r5 D[$r4 + 4] 255 and $r5 $r3 256 cmpu b32 $r5 0 257 bra ne #invalid_bitfield 258 ld b16 $r5 D[$r4] 259 ld b16 $r6 D[$r4 + 2] 260 cmpu b32 $r6 2 261 bra e #cmd_setctx 262 ld b32 $r7 D[$r0 + #ctx_cond_off] 263 and $r6 $r7 264 cmpu b32 $r6 1 265 bra e #cmddone 266 call $r5 267 bra $p1 #dispatch_error 268 bra #cmddone 269 270 cmd_setctx: 271 st b32 D[$r5] $r3 272 bra #cmddone 273 274 275 invalid_bitfield: 276 or $r2 1 277 dispatch_error: 278 illegal_mthd: 279 mov $r4 0x1000 280 iowr I[$r4] $r2 281 iowr I[$r4 + 0x100] $r3 282 mov $r4 0x40 283 iowr I[$r0] $r4 284 285 im_loop: 286 iord $r4 I[$r0 + 0x200] 287 and $r4 0x40 288 cmpu b32 $r4 0 289 bra ne #im_loop 290 291 cmddone: 292 // remove the command from FIFO 293 mov $r3 0x1d00 294 mov $r4 1 295 iowr I[$r3] $r4 296 297 nocmd: 298 // ack the processed interrupts 299 and $r1 $r1 0xc 300 iowr I[$r0 + 0x100] $r1 301iret 302 303cmd_query_get: 304 // if bit 0 of param set, trigger interrupt afterwards. 305 setp $p1 $r3 306 or $r2 3 307 308 // read PTIMER, beware of races... 309 mov $r4 0xb00 310 ptimer_retry: 311 iord $r6 I[$r4 + 0x100] 312 iord $r5 I[$r4] 313 iord $r7 I[$r4 + 0x100] 314 cmpu b32 $r6 $r7 315 bra ne #ptimer_retry 316 317 // prepare the query structure 318 ld b32 $r4 D[$r0 + #ctx_query_counter] 319 st b32 D[$r0 + #swap + 0x0] $r4 320 st b32 D[$r0 + #swap + 0x4] $r0 321 st b32 D[$r0 + #swap + 0x8] $r5 322 st b32 D[$r0 + #swap + 0xc] $r6 323 324 // will use target 0, DMA_QUERY. 325 mov $xtargets $r0 326 327 ld b32 $r4 D[$r0 + #ctx_query_address_high] 328 shl b32 $r4 0x18 329 mov $xdbase $r4 330 331 ld b32 $r4 D[$r0 + #ctx_query_address_low] 332 mov $r5 #swap 333 sethi $r5 0x20000 334 xdst $r4 $r5 335 xdwait 336 337 ret 338 339cmd_cond_mode: 340 // if >= 5, INVALID_ENUM 341 bset $flags $p1 342 or $r2 2 343 cmpu b32 $r3 5 344 bra nc #return 345 346 // otherwise, no error. 347 bclr $flags $p1 348 349 // if < 2, no QUERY object is involved 350 cmpu b32 $r3 2 351 bra nc #cmd_cond_mode_queryful 352 353 xor $r3 1 354 st b32 D[$r0 + #ctx_cond_off] $r3 355 return: 356 ret 357 358 cmd_cond_mode_queryful: 359 // ok, will need to pull a QUERY object, prepare offsets 360 ld b32 $r4 D[$r0 + #ctx_cond_address_high] 361 ld b32 $r5 D[$r0 + #ctx_cond_address_low] 362 and $r6 $r5 0xff 363 shr b32 $r5 8 364 shl b32 $r4 0x18 365 or $r4 $r5 366 mov $xdbase $r4 367 mov $xtargets $r0 368 369 // pull the first one 370 mov $r5 #swap 371 sethi $r5 0x20000 372 xdld $r6 $r5 373 374 // if == 2, only a single QUERY is involved... 375 cmpu b32 $r3 2 376 bra ne #cmd_cond_mode_double 377 378 xdwait 379 ld b32 $r4 D[$r0 + #swap + 4] 380 cmpu b32 $r4 0 381 xbit $r4 $flags z 382 st b32 D[$r0 + #ctx_cond_off] $r4 383 ret 384 385 // ok, we'll need to pull second one too 386 cmd_cond_mode_double: 387 add b32 $r6 0x10 388 add b32 $r5 0x10 389 xdld $r6 $r5 390 xdwait 391 392 // compare COUNTERs 393 ld b32 $r5 D[$r0 + #swap + 0x00] 394 ld b32 $r6 D[$r0 + #swap + 0x10] 395 cmpu b32 $r5 $r6 396 xbit $r4 $flags z 397 398 // compare RESen 399 ld b32 $r5 D[$r0 + #swap + 0x04] 400 ld b32 $r6 D[$r0 + #swap + 0x14] 401 cmpu b32 $r5 $r6 402 xbit $r5 $flags z 403 and $r4 $r5 404 405 // and negate or not, depending on mode 406 cmpu b32 $r3 3 407 xbit $r5 $flags z 408 xor $r4 $r5 409 st b32 D[$r0 + #ctx_cond_off] $r4 410 ret 411 412cmd_wrcache_flush: 413 bclr $flags $p1 414 mov $r2 0x2200 415 clear b32 $r3 416 sethi $r3 0x10000 417 iowr I[$r2] $r3 418 ret 419 420sec_cmd_mode: 421 // if >= 0xf, INVALID_ENUM 422 bset $flags $p1 423 or $r2 2 424 cmpu b32 $r3 0xf 425 bra nc #sec_cmd_mode_return 426 427 bclr $flags $p1 428 st b32 D[$r0 + #ctx_mode] $r3 429 430 sec_cmd_mode_return: 431 ret 432 433sec_cmd_length: 434 // nop if length == 0 435 cmpu b32 $r3 0 436 bra e #sec_cmd_mode_return 437 438 // init key, IV 439 cxset 3 440 mov $r4 #ctx_key 441 sethi $r4 0x70000 442 xdst $r0 $r4 443 mov $r4 #ctx_iv 444 sethi $r4 0x60000 445 xdst $r0 $r4 446 xdwait 447 ckeyreg $c7 448 449 // prepare the targets 450 mov $r4 0x2100 451 mov $xtargets $r4 452 453 // prepare src address 454 ld b32 $r4 D[$r0 + #ctx_src_address_high] 455 ld b32 $r5 D[$r0 + #ctx_src_address_low] 456 shr b32 $r8 $r5 8 457 shl b32 $r4 0x18 458 or $r4 $r8 459 and $r5 $r5 0xff 460 461 // prepare dst address 462 ld b32 $r6 D[$r0 + #ctx_dst_address_high] 463 ld b32 $r7 D[$r0 + #ctx_dst_address_low] 464 shr b32 $r8 $r7 8 465 shl b32 $r6 0x18 466 or $r6 $r8 467 and $r7 $r7 0xff 468 469 // find the proper prep & do functions 470 ld b32 $r8 D[$r0 + #ctx_mode] 471 shl b32 $r8 2 472 473 // run prep 474 ld b16 $r9 D[$r8 + #sec_dtable] 475 call $r9 476 477 // do it 478 ld b16 $r9 D[$r8 + #sec_dtable + 2] 479 call $r9 480 cxset 1 481 xdwait 482 cxset 0x61 483 xdwait 484 xdwait 485 486 // update src address 487 shr b32 $r8 $r4 0x18 488 shl b32 $r9 $r4 8 489 add b32 $r9 $r5 490 adc b32 $r8 0 491 st b32 D[$r0 + #ctx_src_address_high] $r8 492 st b32 D[$r0 + #ctx_src_address_low] $r9 493 494 // update dst address 495 shr b32 $r8 $r6 0x18 496 shl b32 $r9 $r6 8 497 add b32 $r9 $r7 498 adc b32 $r8 0 499 st b32 D[$r0 + #ctx_dst_address_high] $r8 500 st b32 D[$r0 + #ctx_dst_address_low] $r9 501 502 // pull updated IV 503 cxset 2 504 mov $r4 #ctx_iv 505 sethi $r4 0x60000 506 xdld $r0 $r4 507 xdwait 508 509 ret 510 511 512sec_copy_prep: 513 cs0begin 2 514 cxsin $c0 515 cxsout $c0 516 ret 517 518sec_store_prep: 519 cs0begin 1 520 cxsout $c6 521 ret 522 523sec_ecb_e_prep: 524 cs0begin 3 525 cxsin $c0 526 cenc $c0 $c0 527 cxsout $c0 528 ret 529 530sec_ecb_d_prep: 531 ckexp $c7 $c7 532 cs0begin 3 533 cxsin $c0 534 cdec $c0 $c0 535 cxsout $c0 536 ret 537 538sec_cbc_e_prep: 539 cs0begin 4 540 cxsin $c0 541 cxor $c6 $c0 542 cenc $c6 $c6 543 cxsout $c6 544 ret 545 546sec_cbc_d_prep: 547 ckexp $c7 $c7 548 cs0begin 5 549 cmov $c2 $c6 550 cxsin $c6 551 cdec $c0 $c6 552 cxor $c0 $c2 553 cxsout $c0 554 ret 555 556sec_pcbc_e_prep: 557 cs0begin 5 558 cxsin $c0 559 cxor $c6 $c0 560 cenc $c6 $c6 561 cxsout $c6 562 cxor $c6 $c0 563 ret 564 565sec_pcbc_d_prep: 566 ckexp $c7 $c7 567 cs0begin 5 568 cxsin $c0 569 cdec $c1 $c0 570 cxor $c6 $c1 571 cxsout $c6 572 cxor $c6 $c0 573 ret 574 575sec_cfb_e_prep: 576 cs0begin 4 577 cenc $c6 $c6 578 cxsin $c0 579 cxor $c6 $c0 580 cxsout $c6 581 ret 582 583sec_cfb_d_prep: 584 cs0begin 4 585 cenc $c0 $c6 586 cxsin $c6 587 cxor $c0 $c6 588 cxsout $c0 589 ret 590 591sec_ofb_prep: 592 cs0begin 4 593 cenc $c6 $c6 594 cxsin $c0 595 cxor $c0 $c6 596 cxsout $c0 597 ret 598 599sec_ctr_prep: 600 cs0begin 5 601 cenc $c1 $c6 602 cadd $c6 1 603 cxsin $c0 604 cxor $c0 $c1 605 cxsout $c0 606 ret 607 608sec_cbc_mac_prep: 609 cs0begin 3 610 cxsin $c0 611 cxor $c6 $c0 612 cenc $c6 $c6 613 ret 614 615sec_cmac_finish_complete_prep: 616 cs0begin 7 617 cxsin $c0 618 cxor $c6 $c0 619 cxor $c0 $c0 620 cenc $c0 $c0 621 cprecmac $c0 $c0 622 cxor $c6 $c0 623 cenc $c6 $c6 624 ret 625 626sec_cmac_finish_partial_prep: 627 cs0begin 8 628 cxsin $c0 629 cxor $c6 $c0 630 cxor $c0 $c0 631 cenc $c0 $c0 632 cprecmac $c0 $c0 633 cprecmac $c0 $c0 634 cxor $c6 $c0 635 cenc $c6 $c6 636 ret 637 638// TODO 639sec_do_in: 640 add b32 $r3 $r5 641 mov $xdbase $r4 642 mov $r9 #swap 643 sethi $r9 0x20000 644 sec_do_in_loop: 645 xdld $r5 $r9 646 xdwait 647 cxset 0x22 648 xdst $r0 $r9 649 cs0exec 1 650 xdwait 651 add b32 $r5 0x10 652 cmpu b32 $r5 $r3 653 bra ne #sec_do_in_loop 654 cxset 1 655 xdwait 656 ret 657 658sec_do_out: 659 add b32 $r3 $r7 660 mov $xdbase $r6 661 mov $r9 #swap 662 sethi $r9 0x20000 663 sec_do_out_loop: 664 cs0exec 1 665 cxset 0x61 666 xdld $r7 $r9 667 xdst $r7 $r9 668 cxset 1 669 xdwait 670 add b32 $r7 0x10 671 cmpu b32 $r7 $r3 672 bra ne #sec_do_out_loop 673 ret 674 675sec_do_inout: 676 add b32 $r3 $r5 677 mov $r9 #swap 678 sethi $r9 0x20000 679 sec_do_inout_loop: 680 mov $xdbase $r4 681 xdld $r5 $r9 682 xdwait 683 cxset 0x21 684 xdst $r0 $r9 685 cs0exec 1 686 cxset 0x61 687 mov $xdbase $r6 688 xdld $r7 $r9 689 xdst $r7 $r9 690 cxset 1 691 xdwait 692 add b32 $r5 0x10 693 add b32 $r7 0x10 694 cmpu b32 $r5 $r3 695 bra ne #sec_do_inout_loop 696 ret 697 698.align 0x100 699