1 //===---------- Support/UnicodeCaseFold.cpp -------------------------------===// 2 // 3 // This file was generated by utils/unicode-case-fold.py from the Unicode 4 // case folding database at 5 // http://www.unicode.org/Public/9.0.0/ucd/CaseFolding.txt 6 // 7 // To regenerate this file, run: 8 // utils/unicode-case-fold.py \ 9 // "http://www.unicode.org/Public/9.0.0/ucd/CaseFolding.txt" \ 10 // > lib/Support/UnicodeCaseFold.cpp 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Support/Unicode.h" 15 16 int llvm::sys::unicode::foldCharSimple(int C) { 17 if (C < 0x0041) 18 return C; 19 // 26 characters 20 if (C <= 0x005a) 21 return C + 32; 22 // MICRO SIGN 23 if (C == 0x00b5) 24 return 0x03bc; 25 if (C < 0x00c0) 26 return C; 27 // 23 characters 28 if (C <= 0x00d6) 29 return C + 32; 30 if (C < 0x00d8) 31 return C; 32 // 7 characters 33 if (C <= 0x00de) 34 return C + 32; 35 if (C < 0x0100) 36 return C; 37 // 24 characters 38 if (C <= 0x012e) 39 return C | 1; 40 if (C < 0x0132) 41 return C; 42 // 3 characters 43 if (C <= 0x0136) 44 return C | 1; 45 if (C < 0x0139) 46 return C; 47 // 8 characters 48 if (C <= 0x0147 && C % 2 == 1) 49 return C + 1; 50 if (C < 0x014a) 51 return C; 52 // 23 characters 53 if (C <= 0x0176) 54 return C | 1; 55 // LATIN CAPITAL LETTER Y WITH DIAERESIS 56 if (C == 0x0178) 57 return 0x00ff; 58 if (C < 0x0179) 59 return C; 60 // 3 characters 61 if (C <= 0x017d && C % 2 == 1) 62 return C + 1; 63 // LATIN SMALL LETTER LONG S 64 if (C == 0x017f) 65 return 0x0073; 66 // LATIN CAPITAL LETTER B WITH HOOK 67 if (C == 0x0181) 68 return 0x0253; 69 if (C < 0x0182) 70 return C; 71 // 2 characters 72 if (C <= 0x0184) 73 return C | 1; 74 // LATIN CAPITAL LETTER OPEN O 75 if (C == 0x0186) 76 return 0x0254; 77 // LATIN CAPITAL LETTER C WITH HOOK 78 if (C == 0x0187) 79 return 0x0188; 80 if (C < 0x0189) 81 return C; 82 // 2 characters 83 if (C <= 0x018a) 84 return C + 205; 85 // LATIN CAPITAL LETTER D WITH TOPBAR 86 if (C == 0x018b) 87 return 0x018c; 88 // LATIN CAPITAL LETTER REVERSED E 89 if (C == 0x018e) 90 return 0x01dd; 91 // LATIN CAPITAL LETTER SCHWA 92 if (C == 0x018f) 93 return 0x0259; 94 // LATIN CAPITAL LETTER OPEN E 95 if (C == 0x0190) 96 return 0x025b; 97 // LATIN CAPITAL LETTER F WITH HOOK 98 if (C == 0x0191) 99 return 0x0192; 100 // LATIN CAPITAL LETTER G WITH HOOK 101 if (C == 0x0193) 102 return 0x0260; 103 // LATIN CAPITAL LETTER GAMMA 104 if (C == 0x0194) 105 return 0x0263; 106 // LATIN CAPITAL LETTER IOTA 107 if (C == 0x0196) 108 return 0x0269; 109 // LATIN CAPITAL LETTER I WITH STROKE 110 if (C == 0x0197) 111 return 0x0268; 112 // LATIN CAPITAL LETTER K WITH HOOK 113 if (C == 0x0198) 114 return 0x0199; 115 // LATIN CAPITAL LETTER TURNED M 116 if (C == 0x019c) 117 return 0x026f; 118 // LATIN CAPITAL LETTER N WITH LEFT HOOK 119 if (C == 0x019d) 120 return 0x0272; 121 // LATIN CAPITAL LETTER O WITH MIDDLE TILDE 122 if (C == 0x019f) 123 return 0x0275; 124 if (C < 0x01a0) 125 return C; 126 // 3 characters 127 if (C <= 0x01a4) 128 return C | 1; 129 // LATIN LETTER YR 130 if (C == 0x01a6) 131 return 0x0280; 132 // LATIN CAPITAL LETTER TONE TWO 133 if (C == 0x01a7) 134 return 0x01a8; 135 // LATIN CAPITAL LETTER ESH 136 if (C == 0x01a9) 137 return 0x0283; 138 // LATIN CAPITAL LETTER T WITH HOOK 139 if (C == 0x01ac) 140 return 0x01ad; 141 // LATIN CAPITAL LETTER T WITH RETROFLEX HOOK 142 if (C == 0x01ae) 143 return 0x0288; 144 // LATIN CAPITAL LETTER U WITH HORN 145 if (C == 0x01af) 146 return 0x01b0; 147 if (C < 0x01b1) 148 return C; 149 // 2 characters 150 if (C <= 0x01b2) 151 return C + 217; 152 if (C < 0x01b3) 153 return C; 154 // 2 characters 155 if (C <= 0x01b5 && C % 2 == 1) 156 return C + 1; 157 // LATIN CAPITAL LETTER EZH 158 if (C == 0x01b7) 159 return 0x0292; 160 if (C < 0x01b8) 161 return C; 162 // 2 characters 163 if (C <= 0x01bc && C % 4 == 0) 164 return C + 1; 165 // LATIN CAPITAL LETTER DZ WITH CARON 166 if (C == 0x01c4) 167 return 0x01c6; 168 // LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON 169 if (C == 0x01c5) 170 return 0x01c6; 171 // LATIN CAPITAL LETTER LJ 172 if (C == 0x01c7) 173 return 0x01c9; 174 // LATIN CAPITAL LETTER L WITH SMALL LETTER J 175 if (C == 0x01c8) 176 return 0x01c9; 177 // LATIN CAPITAL LETTER NJ 178 if (C == 0x01ca) 179 return 0x01cc; 180 if (C < 0x01cb) 181 return C; 182 // 9 characters 183 if (C <= 0x01db && C % 2 == 1) 184 return C + 1; 185 if (C < 0x01de) 186 return C; 187 // 9 characters 188 if (C <= 0x01ee) 189 return C | 1; 190 // LATIN CAPITAL LETTER DZ 191 if (C == 0x01f1) 192 return 0x01f3; 193 if (C < 0x01f2) 194 return C; 195 // 2 characters 196 if (C <= 0x01f4) 197 return C | 1; 198 // LATIN CAPITAL LETTER HWAIR 199 if (C == 0x01f6) 200 return 0x0195; 201 // LATIN CAPITAL LETTER WYNN 202 if (C == 0x01f7) 203 return 0x01bf; 204 if (C < 0x01f8) 205 return C; 206 // 20 characters 207 if (C <= 0x021e) 208 return C | 1; 209 // LATIN CAPITAL LETTER N WITH LONG RIGHT LEG 210 if (C == 0x0220) 211 return 0x019e; 212 if (C < 0x0222) 213 return C; 214 // 9 characters 215 if (C <= 0x0232) 216 return C | 1; 217 // LATIN CAPITAL LETTER A WITH STROKE 218 if (C == 0x023a) 219 return 0x2c65; 220 // LATIN CAPITAL LETTER C WITH STROKE 221 if (C == 0x023b) 222 return 0x023c; 223 // LATIN CAPITAL LETTER L WITH BAR 224 if (C == 0x023d) 225 return 0x019a; 226 // LATIN CAPITAL LETTER T WITH DIAGONAL STROKE 227 if (C == 0x023e) 228 return 0x2c66; 229 // LATIN CAPITAL LETTER GLOTTAL STOP 230 if (C == 0x0241) 231 return 0x0242; 232 // LATIN CAPITAL LETTER B WITH STROKE 233 if (C == 0x0243) 234 return 0x0180; 235 // LATIN CAPITAL LETTER U BAR 236 if (C == 0x0244) 237 return 0x0289; 238 // LATIN CAPITAL LETTER TURNED V 239 if (C == 0x0245) 240 return 0x028c; 241 if (C < 0x0246) 242 return C; 243 // 5 characters 244 if (C <= 0x024e) 245 return C | 1; 246 // COMBINING GREEK YPOGEGRAMMENI 247 if (C == 0x0345) 248 return 0x03b9; 249 if (C < 0x0370) 250 return C; 251 // 2 characters 252 if (C <= 0x0372) 253 return C | 1; 254 // GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA 255 if (C == 0x0376) 256 return 0x0377; 257 // GREEK CAPITAL LETTER YOT 258 if (C == 0x037f) 259 return 0x03f3; 260 // GREEK CAPITAL LETTER ALPHA WITH TONOS 261 if (C == 0x0386) 262 return 0x03ac; 263 if (C < 0x0388) 264 return C; 265 // 3 characters 266 if (C <= 0x038a) 267 return C + 37; 268 // GREEK CAPITAL LETTER OMICRON WITH TONOS 269 if (C == 0x038c) 270 return 0x03cc; 271 if (C < 0x038e) 272 return C; 273 // 2 characters 274 if (C <= 0x038f) 275 return C + 63; 276 if (C < 0x0391) 277 return C; 278 // 17 characters 279 if (C <= 0x03a1) 280 return C + 32; 281 if (C < 0x03a3) 282 return C; 283 // 9 characters 284 if (C <= 0x03ab) 285 return C + 32; 286 // GREEK SMALL LETTER FINAL SIGMA 287 if (C == 0x03c2) 288 return 0x03c3; 289 // GREEK CAPITAL KAI SYMBOL 290 if (C == 0x03cf) 291 return 0x03d7; 292 // GREEK BETA SYMBOL 293 if (C == 0x03d0) 294 return 0x03b2; 295 // GREEK THETA SYMBOL 296 if (C == 0x03d1) 297 return 0x03b8; 298 // GREEK PHI SYMBOL 299 if (C == 0x03d5) 300 return 0x03c6; 301 // GREEK PI SYMBOL 302 if (C == 0x03d6) 303 return 0x03c0; 304 if (C < 0x03d8) 305 return C; 306 // 12 characters 307 if (C <= 0x03ee) 308 return C | 1; 309 // GREEK KAPPA SYMBOL 310 if (C == 0x03f0) 311 return 0x03ba; 312 // GREEK RHO SYMBOL 313 if (C == 0x03f1) 314 return 0x03c1; 315 // GREEK CAPITAL THETA SYMBOL 316 if (C == 0x03f4) 317 return 0x03b8; 318 // GREEK LUNATE EPSILON SYMBOL 319 if (C == 0x03f5) 320 return 0x03b5; 321 // GREEK CAPITAL LETTER SHO 322 if (C == 0x03f7) 323 return 0x03f8; 324 // GREEK CAPITAL LUNATE SIGMA SYMBOL 325 if (C == 0x03f9) 326 return 0x03f2; 327 // GREEK CAPITAL LETTER SAN 328 if (C == 0x03fa) 329 return 0x03fb; 330 if (C < 0x03fd) 331 return C; 332 // 3 characters 333 if (C <= 0x03ff) 334 return C + -130; 335 if (C < 0x0400) 336 return C; 337 // 16 characters 338 if (C <= 0x040f) 339 return C + 80; 340 if (C < 0x0410) 341 return C; 342 // 32 characters 343 if (C <= 0x042f) 344 return C + 32; 345 if (C < 0x0460) 346 return C; 347 // 17 characters 348 if (C <= 0x0480) 349 return C | 1; 350 if (C < 0x048a) 351 return C; 352 // 27 characters 353 if (C <= 0x04be) 354 return C | 1; 355 // CYRILLIC LETTER PALOCHKA 356 if (C == 0x04c0) 357 return 0x04cf; 358 if (C < 0x04c1) 359 return C; 360 // 7 characters 361 if (C <= 0x04cd && C % 2 == 1) 362 return C + 1; 363 if (C < 0x04d0) 364 return C; 365 // 48 characters 366 if (C <= 0x052e) 367 return C | 1; 368 if (C < 0x0531) 369 return C; 370 // 38 characters 371 if (C <= 0x0556) 372 return C + 48; 373 if (C < 0x10a0) 374 return C; 375 // 38 characters 376 if (C <= 0x10c5) 377 return C + 7264; 378 if (C < 0x10c7) 379 return C; 380 // 2 characters 381 if (C <= 0x10cd && C % 6 == 5) 382 return C + 7264; 383 if (C < 0x13f8) 384 return C; 385 // 6 characters 386 if (C <= 0x13fd) 387 return C + -8; 388 // CYRILLIC SMALL LETTER ROUNDED VE 389 if (C == 0x1c80) 390 return 0x0432; 391 // CYRILLIC SMALL LETTER LONG-LEGGED DE 392 if (C == 0x1c81) 393 return 0x0434; 394 // CYRILLIC SMALL LETTER NARROW O 395 if (C == 0x1c82) 396 return 0x043e; 397 if (C < 0x1c83) 398 return C; 399 // 2 characters 400 if (C <= 0x1c84) 401 return C + -6210; 402 // CYRILLIC SMALL LETTER THREE-LEGGED TE 403 if (C == 0x1c85) 404 return 0x0442; 405 // CYRILLIC SMALL LETTER TALL HARD SIGN 406 if (C == 0x1c86) 407 return 0x044a; 408 // CYRILLIC SMALL LETTER TALL YAT 409 if (C == 0x1c87) 410 return 0x0463; 411 // CYRILLIC SMALL LETTER UNBLENDED UK 412 if (C == 0x1c88) 413 return 0xa64b; 414 if (C < 0x1e00) 415 return C; 416 // 75 characters 417 if (C <= 0x1e94) 418 return C | 1; 419 // LATIN SMALL LETTER LONG S WITH DOT ABOVE 420 if (C == 0x1e9b) 421 return 0x1e61; 422 // LATIN CAPITAL LETTER SHARP S 423 if (C == 0x1e9e) 424 return 0x00df; 425 if (C < 0x1ea0) 426 return C; 427 // 48 characters 428 if (C <= 0x1efe) 429 return C | 1; 430 if (C < 0x1f08) 431 return C; 432 // 8 characters 433 if (C <= 0x1f0f) 434 return C + -8; 435 if (C < 0x1f18) 436 return C; 437 // 6 characters 438 if (C <= 0x1f1d) 439 return C + -8; 440 if (C < 0x1f28) 441 return C; 442 // 8 characters 443 if (C <= 0x1f2f) 444 return C + -8; 445 if (C < 0x1f38) 446 return C; 447 // 8 characters 448 if (C <= 0x1f3f) 449 return C + -8; 450 if (C < 0x1f48) 451 return C; 452 // 6 characters 453 if (C <= 0x1f4d) 454 return C + -8; 455 if (C < 0x1f59) 456 return C; 457 // 4 characters 458 if (C <= 0x1f5f && C % 2 == 1) 459 return C + -8; 460 if (C < 0x1f68) 461 return C; 462 // 8 characters 463 if (C <= 0x1f6f) 464 return C + -8; 465 if (C < 0x1f88) 466 return C; 467 // 8 characters 468 if (C <= 0x1f8f) 469 return C + -8; 470 if (C < 0x1f98) 471 return C; 472 // 8 characters 473 if (C <= 0x1f9f) 474 return C + -8; 475 if (C < 0x1fa8) 476 return C; 477 // 8 characters 478 if (C <= 0x1faf) 479 return C + -8; 480 if (C < 0x1fb8) 481 return C; 482 // 2 characters 483 if (C <= 0x1fb9) 484 return C + -8; 485 if (C < 0x1fba) 486 return C; 487 // 2 characters 488 if (C <= 0x1fbb) 489 return C + -74; 490 // GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI 491 if (C == 0x1fbc) 492 return 0x1fb3; 493 // GREEK PROSGEGRAMMENI 494 if (C == 0x1fbe) 495 return 0x03b9; 496 if (C < 0x1fc8) 497 return C; 498 // 4 characters 499 if (C <= 0x1fcb) 500 return C + -86; 501 // GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI 502 if (C == 0x1fcc) 503 return 0x1fc3; 504 if (C < 0x1fd8) 505 return C; 506 // 2 characters 507 if (C <= 0x1fd9) 508 return C + -8; 509 if (C < 0x1fda) 510 return C; 511 // 2 characters 512 if (C <= 0x1fdb) 513 return C + -100; 514 if (C < 0x1fe8) 515 return C; 516 // 2 characters 517 if (C <= 0x1fe9) 518 return C + -8; 519 if (C < 0x1fea) 520 return C; 521 // 2 characters 522 if (C <= 0x1feb) 523 return C + -112; 524 // GREEK CAPITAL LETTER RHO WITH DASIA 525 if (C == 0x1fec) 526 return 0x1fe5; 527 if (C < 0x1ff8) 528 return C; 529 // 2 characters 530 if (C <= 0x1ff9) 531 return C + -128; 532 if (C < 0x1ffa) 533 return C; 534 // 2 characters 535 if (C <= 0x1ffb) 536 return C + -126; 537 // GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI 538 if (C == 0x1ffc) 539 return 0x1ff3; 540 // OHM SIGN 541 if (C == 0x2126) 542 return 0x03c9; 543 // KELVIN SIGN 544 if (C == 0x212a) 545 return 0x006b; 546 // ANGSTROM SIGN 547 if (C == 0x212b) 548 return 0x00e5; 549 // TURNED CAPITAL F 550 if (C == 0x2132) 551 return 0x214e; 552 if (C < 0x2160) 553 return C; 554 // 16 characters 555 if (C <= 0x216f) 556 return C + 16; 557 // ROMAN NUMERAL REVERSED ONE HUNDRED 558 if (C == 0x2183) 559 return 0x2184; 560 if (C < 0x24b6) 561 return C; 562 // 26 characters 563 if (C <= 0x24cf) 564 return C + 26; 565 if (C < 0x2c00) 566 return C; 567 // 47 characters 568 if (C <= 0x2c2e) 569 return C + 48; 570 // LATIN CAPITAL LETTER L WITH DOUBLE BAR 571 if (C == 0x2c60) 572 return 0x2c61; 573 // LATIN CAPITAL LETTER L WITH MIDDLE TILDE 574 if (C == 0x2c62) 575 return 0x026b; 576 // LATIN CAPITAL LETTER P WITH STROKE 577 if (C == 0x2c63) 578 return 0x1d7d; 579 // LATIN CAPITAL LETTER R WITH TAIL 580 if (C == 0x2c64) 581 return 0x027d; 582 if (C < 0x2c67) 583 return C; 584 // 3 characters 585 if (C <= 0x2c6b && C % 2 == 1) 586 return C + 1; 587 // LATIN CAPITAL LETTER ALPHA 588 if (C == 0x2c6d) 589 return 0x0251; 590 // LATIN CAPITAL LETTER M WITH HOOK 591 if (C == 0x2c6e) 592 return 0x0271; 593 // LATIN CAPITAL LETTER TURNED A 594 if (C == 0x2c6f) 595 return 0x0250; 596 // LATIN CAPITAL LETTER TURNED ALPHA 597 if (C == 0x2c70) 598 return 0x0252; 599 if (C < 0x2c72) 600 return C; 601 // 2 characters 602 if (C <= 0x2c75 && C % 3 == 2) 603 return C + 1; 604 if (C < 0x2c7e) 605 return C; 606 // 2 characters 607 if (C <= 0x2c7f) 608 return C + -10815; 609 if (C < 0x2c80) 610 return C; 611 // 50 characters 612 if (C <= 0x2ce2) 613 return C | 1; 614 if (C < 0x2ceb) 615 return C; 616 // 2 characters 617 if (C <= 0x2ced && C % 2 == 1) 618 return C + 1; 619 if (C < 0x2cf2) 620 return C; 621 // 2 characters 622 if (C <= 0xa640 && C % 31054 == 11506) 623 return C + 1; 624 if (C < 0xa642) 625 return C; 626 // 22 characters 627 if (C <= 0xa66c) 628 return C | 1; 629 if (C < 0xa680) 630 return C; 631 // 14 characters 632 if (C <= 0xa69a) 633 return C | 1; 634 if (C < 0xa722) 635 return C; 636 // 7 characters 637 if (C <= 0xa72e) 638 return C | 1; 639 if (C < 0xa732) 640 return C; 641 // 31 characters 642 if (C <= 0xa76e) 643 return C | 1; 644 if (C < 0xa779) 645 return C; 646 // 2 characters 647 if (C <= 0xa77b && C % 2 == 1) 648 return C + 1; 649 // LATIN CAPITAL LETTER INSULAR G 650 if (C == 0xa77d) 651 return 0x1d79; 652 if (C < 0xa77e) 653 return C; 654 // 5 characters 655 if (C <= 0xa786) 656 return C | 1; 657 // LATIN CAPITAL LETTER SALTILLO 658 if (C == 0xa78b) 659 return 0xa78c; 660 // LATIN CAPITAL LETTER TURNED H 661 if (C == 0xa78d) 662 return 0x0265; 663 if (C < 0xa790) 664 return C; 665 // 2 characters 666 if (C <= 0xa792) 667 return C | 1; 668 if (C < 0xa796) 669 return C; 670 // 10 characters 671 if (C <= 0xa7a8) 672 return C | 1; 673 // LATIN CAPITAL LETTER H WITH HOOK 674 if (C == 0xa7aa) 675 return 0x0266; 676 // LATIN CAPITAL LETTER REVERSED OPEN E 677 if (C == 0xa7ab) 678 return 0x025c; 679 // LATIN CAPITAL LETTER SCRIPT G 680 if (C == 0xa7ac) 681 return 0x0261; 682 // LATIN CAPITAL LETTER L WITH BELT 683 if (C == 0xa7ad) 684 return 0x026c; 685 // LATIN CAPITAL LETTER SMALL CAPITAL I 686 if (C == 0xa7ae) 687 return 0x026a; 688 // LATIN CAPITAL LETTER TURNED K 689 if (C == 0xa7b0) 690 return 0x029e; 691 // LATIN CAPITAL LETTER TURNED T 692 if (C == 0xa7b1) 693 return 0x0287; 694 // LATIN CAPITAL LETTER J WITH CROSSED-TAIL 695 if (C == 0xa7b2) 696 return 0x029d; 697 // LATIN CAPITAL LETTER CHI 698 if (C == 0xa7b3) 699 return 0xab53; 700 if (C < 0xa7b4) 701 return C; 702 // 2 characters 703 if (C <= 0xa7b6) 704 return C | 1; 705 if (C < 0xab70) 706 return C; 707 // 80 characters 708 if (C <= 0xabbf) 709 return C + -38864; 710 if (C < 0xff21) 711 return C; 712 // 26 characters 713 if (C <= 0xff3a) 714 return C + 32; 715 if (C < 0x10400) 716 return C; 717 // 40 characters 718 if (C <= 0x10427) 719 return C + 40; 720 if (C < 0x104b0) 721 return C; 722 // 36 characters 723 if (C <= 0x104d3) 724 return C + 40; 725 if (C < 0x10c80) 726 return C; 727 // 51 characters 728 if (C <= 0x10cb2) 729 return C + 64; 730 if (C < 0x118a0) 731 return C; 732 // 32 characters 733 if (C <= 0x118bf) 734 return C + 32; 735 if (C < 0x1e900) 736 return C; 737 // 34 characters 738 if (C <= 0x1e921) 739 return C + 34; 740 741 return C; 742 } 743