xref: /freebsd/contrib/llvm-project/llvm/lib/Support/Unicode.cpp (revision dddf29712f38ba1d804c02bcfd02d24098ae48b0)
1  //===- llvm/Support/Unicode.cpp - Unicode character properties  -*- C++ -*-===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  //
9  // This file implements functions that allow querying certain properties of
10  // Unicode characters.
11  //
12  //===----------------------------------------------------------------------===//
13  
14  #include "llvm/Support/Unicode.h"
15  #include "llvm/Support/ConvertUTF.h"
16  #include "llvm/Support/UnicodeCharRanges.h"
17  
18  namespace llvm {
19  namespace sys {
20  namespace unicode {
21  
22  /// Unicode code points of the categories L, M, N, P, S and Zs are considered
23  /// printable.
24  /// In addition, U+00AD SOFT HYPHEN is also considered printable, as
25  /// it's actually displayed on most terminals. \return true if the character is
26  /// considered printable.
27  bool isPrintable(int UCS) {
28    // https://unicode.org/Public/15.1.0/ucdxml/
29    static const UnicodeCharRange PrintableRanges[] = {
30        {0x0020, 0x007E},   {0x00A0, 0x00AC},   {0x00AE, 0x0377},
31        {0x037A, 0x037F},   {0x0384, 0x038A},   {0x038C, 0x038C},
32        {0x038E, 0x03A1},   {0x03A3, 0x052F},   {0x0531, 0x0556},
33        {0x0559, 0x058A},   {0x058D, 0x058F},   {0x0591, 0x05C7},
34        {0x05D0, 0x05EA},   {0x05EF, 0x05F4},   {0x0606, 0x061B},
35        {0x061D, 0x06DC},   {0x06DE, 0x070D},   {0x0710, 0x074A},
36        {0x074D, 0x07B1},   {0x07C0, 0x07FA},   {0x07FD, 0x082D},
37        {0x0830, 0x083E},   {0x0840, 0x085B},   {0x085E, 0x085E},
38        {0x0860, 0x086A},   {0x0870, 0x088E},   {0x0898, 0x08E1},
39        {0x08E3, 0x0983},   {0x0985, 0x098C},   {0x098F, 0x0990},
40        {0x0993, 0x09A8},   {0x09AA, 0x09B0},   {0x09B2, 0x09B2},
41        {0x09B6, 0x09B9},   {0x09BC, 0x09C4},   {0x09C7, 0x09C8},
42        {0x09CB, 0x09CE},   {0x09D7, 0x09D7},   {0x09DC, 0x09DD},
43        {0x09DF, 0x09E3},   {0x09E6, 0x09FE},   {0x0A01, 0x0A03},
44        {0x0A05, 0x0A0A},   {0x0A0F, 0x0A10},   {0x0A13, 0x0A28},
45        {0x0A2A, 0x0A30},   {0x0A32, 0x0A33},   {0x0A35, 0x0A36},
46        {0x0A38, 0x0A39},   {0x0A3C, 0x0A3C},   {0x0A3E, 0x0A42},
47        {0x0A47, 0x0A48},   {0x0A4B, 0x0A4D},   {0x0A51, 0x0A51},
48        {0x0A59, 0x0A5C},   {0x0A5E, 0x0A5E},   {0x0A66, 0x0A76},
49        {0x0A81, 0x0A83},   {0x0A85, 0x0A8D},   {0x0A8F, 0x0A91},
50        {0x0A93, 0x0AA8},   {0x0AAA, 0x0AB0},   {0x0AB2, 0x0AB3},
51        {0x0AB5, 0x0AB9},   {0x0ABC, 0x0AC5},   {0x0AC7, 0x0AC9},
52        {0x0ACB, 0x0ACD},   {0x0AD0, 0x0AD0},   {0x0AE0, 0x0AE3},
53        {0x0AE6, 0x0AF1},   {0x0AF9, 0x0AFF},   {0x0B01, 0x0B03},
54        {0x0B05, 0x0B0C},   {0x0B0F, 0x0B10},   {0x0B13, 0x0B28},
55        {0x0B2A, 0x0B30},   {0x0B32, 0x0B33},   {0x0B35, 0x0B39},
56        {0x0B3C, 0x0B44},   {0x0B47, 0x0B48},   {0x0B4B, 0x0B4D},
57        {0x0B55, 0x0B57},   {0x0B5C, 0x0B5D},   {0x0B5F, 0x0B63},
58        {0x0B66, 0x0B77},   {0x0B82, 0x0B83},   {0x0B85, 0x0B8A},
59        {0x0B8E, 0x0B90},   {0x0B92, 0x0B95},   {0x0B99, 0x0B9A},
60        {0x0B9C, 0x0B9C},   {0x0B9E, 0x0B9F},   {0x0BA3, 0x0BA4},
61        {0x0BA8, 0x0BAA},   {0x0BAE, 0x0BB9},   {0x0BBE, 0x0BC2},
62        {0x0BC6, 0x0BC8},   {0x0BCA, 0x0BCD},   {0x0BD0, 0x0BD0},
63        {0x0BD7, 0x0BD7},   {0x0BE6, 0x0BFA},   {0x0C00, 0x0C0C},
64        {0x0C0E, 0x0C10},   {0x0C12, 0x0C28},   {0x0C2A, 0x0C39},
65        {0x0C3C, 0x0C44},   {0x0C46, 0x0C48},   {0x0C4A, 0x0C4D},
66        {0x0C55, 0x0C56},   {0x0C58, 0x0C5A},   {0x0C5D, 0x0C5D},
67        {0x0C60, 0x0C63},   {0x0C66, 0x0C6F},   {0x0C77, 0x0C8C},
68        {0x0C8E, 0x0C90},   {0x0C92, 0x0CA8},   {0x0CAA, 0x0CB3},
69        {0x0CB5, 0x0CB9},   {0x0CBC, 0x0CC4},   {0x0CC6, 0x0CC8},
70        {0x0CCA, 0x0CCD},   {0x0CD5, 0x0CD6},   {0x0CDD, 0x0CDE},
71        {0x0CE0, 0x0CE3},   {0x0CE6, 0x0CEF},   {0x0CF1, 0x0CF3},
72        {0x0D00, 0x0D0C},   {0x0D0E, 0x0D10},   {0x0D12, 0x0D44},
73        {0x0D46, 0x0D48},   {0x0D4A, 0x0D4F},   {0x0D54, 0x0D63},
74        {0x0D66, 0x0D7F},   {0x0D81, 0x0D83},   {0x0D85, 0x0D96},
75        {0x0D9A, 0x0DB1},   {0x0DB3, 0x0DBB},   {0x0DBD, 0x0DBD},
76        {0x0DC0, 0x0DC6},   {0x0DCA, 0x0DCA},   {0x0DCF, 0x0DD4},
77        {0x0DD6, 0x0DD6},   {0x0DD8, 0x0DDF},   {0x0DE6, 0x0DEF},
78        {0x0DF2, 0x0DF4},   {0x0E01, 0x0E3A},   {0x0E3F, 0x0E5B},
79        {0x0E81, 0x0E82},   {0x0E84, 0x0E84},   {0x0E86, 0x0E8A},
80        {0x0E8C, 0x0EA3},   {0x0EA5, 0x0EA5},   {0x0EA7, 0x0EBD},
81        {0x0EC0, 0x0EC4},   {0x0EC6, 0x0EC6},   {0x0EC8, 0x0ECE},
82        {0x0ED0, 0x0ED9},   {0x0EDC, 0x0EDF},   {0x0F00, 0x0F47},
83        {0x0F49, 0x0F6C},   {0x0F71, 0x0F97},   {0x0F99, 0x0FBC},
84        {0x0FBE, 0x0FCC},   {0x0FCE, 0x0FDA},   {0x1000, 0x10C5},
85        {0x10C7, 0x10C7},   {0x10CD, 0x10CD},   {0x10D0, 0x1248},
86        {0x124A, 0x124D},   {0x1250, 0x1256},   {0x1258, 0x1258},
87        {0x125A, 0x125D},   {0x1260, 0x1288},   {0x128A, 0x128D},
88        {0x1290, 0x12B0},   {0x12B2, 0x12B5},   {0x12B8, 0x12BE},
89        {0x12C0, 0x12C0},   {0x12C2, 0x12C5},   {0x12C8, 0x12D6},
90        {0x12D8, 0x1310},   {0x1312, 0x1315},   {0x1318, 0x135A},
91        {0x135D, 0x137C},   {0x1380, 0x1399},   {0x13A0, 0x13F5},
92        {0x13F8, 0x13FD},   {0x1400, 0x169C},   {0x16A0, 0x16F8},
93        {0x1700, 0x1715},   {0x171F, 0x1736},   {0x1740, 0x1753},
94        {0x1760, 0x176C},   {0x176E, 0x1770},   {0x1772, 0x1773},
95        {0x1780, 0x17DD},   {0x17E0, 0x17E9},   {0x17F0, 0x17F9},
96        {0x1800, 0x180D},   {0x180F, 0x1819},   {0x1820, 0x1878},
97        {0x1880, 0x18AA},   {0x18B0, 0x18F5},   {0x1900, 0x191E},
98        {0x1920, 0x192B},   {0x1930, 0x193B},   {0x1940, 0x1940},
99        {0x1944, 0x196D},   {0x1970, 0x1974},   {0x1980, 0x19AB},
100        {0x19B0, 0x19C9},   {0x19D0, 0x19DA},   {0x19DE, 0x1A1B},
101        {0x1A1E, 0x1A5E},   {0x1A60, 0x1A7C},   {0x1A7F, 0x1A89},
102        {0x1A90, 0x1A99},   {0x1AA0, 0x1AAD},   {0x1AB0, 0x1ACE},
103        {0x1B00, 0x1B4C},   {0x1B50, 0x1B7E},   {0x1B80, 0x1BF3},
104        {0x1BFC, 0x1C37},   {0x1C3B, 0x1C49},   {0x1C4D, 0x1C88},
105        {0x1C90, 0x1CBA},   {0x1CBD, 0x1CC7},   {0x1CD0, 0x1CFA},
106        {0x1D00, 0x1F15},   {0x1F18, 0x1F1D},   {0x1F20, 0x1F45},
107        {0x1F48, 0x1F4D},   {0x1F50, 0x1F57},   {0x1F59, 0x1F59},
108        {0x1F5B, 0x1F5B},   {0x1F5D, 0x1F5D},   {0x1F5F, 0x1F7D},
109        {0x1F80, 0x1FB4},   {0x1FB6, 0x1FC4},   {0x1FC6, 0x1FD3},
110        {0x1FD6, 0x1FDB},   {0x1FDD, 0x1FEF},   {0x1FF2, 0x1FF4},
111        {0x1FF6, 0x1FFE},   {0x2000, 0x200A},   {0x2010, 0x2027},
112        {0x202F, 0x205F},   {0x2070, 0x2071},   {0x2074, 0x208E},
113        {0x2090, 0x209C},   {0x20A0, 0x20C0},   {0x20D0, 0x20F0},
114        {0x2100, 0x218B},   {0x2190, 0x2426},   {0x2440, 0x244A},
115        {0x2460, 0x2B73},   {0x2B76, 0x2B95},   {0x2B97, 0x2CF3},
116        {0x2CF9, 0x2D25},   {0x2D27, 0x2D27},   {0x2D2D, 0x2D2D},
117        {0x2D30, 0x2D67},   {0x2D6F, 0x2D70},   {0x2D7F, 0x2D96},
118        {0x2DA0, 0x2DA6},   {0x2DA8, 0x2DAE},   {0x2DB0, 0x2DB6},
119        {0x2DB8, 0x2DBE},   {0x2DC0, 0x2DC6},   {0x2DC8, 0x2DCE},
120        {0x2DD0, 0x2DD6},   {0x2DD8, 0x2DDE},   {0x2DE0, 0x2E5D},
121        {0x2E80, 0x2E99},   {0x2E9B, 0x2EF3},   {0x2F00, 0x2FD5},
122        {0x2FF0, 0x303F},   {0x3041, 0x3096},   {0x3099, 0x30FF},
123        {0x3105, 0x312F},   {0x3131, 0x318E},   {0x3190, 0x31E3},
124        {0x31EF, 0x321E},   {0x3220, 0xA48C},   {0xA490, 0xA4C6},
125        {0xA4D0, 0xA62B},   {0xA640, 0xA6F7},   {0xA700, 0xA7CA},
126        {0xA7D0, 0xA7D1},   {0xA7D3, 0xA7D3},   {0xA7D5, 0xA7D9},
127        {0xA7F2, 0xA82C},   {0xA830, 0xA839},   {0xA840, 0xA877},
128        {0xA880, 0xA8C5},   {0xA8CE, 0xA8D9},   {0xA8E0, 0xA953},
129        {0xA95F, 0xA97C},   {0xA980, 0xA9CD},   {0xA9CF, 0xA9D9},
130        {0xA9DE, 0xA9FE},   {0xAA00, 0xAA36},   {0xAA40, 0xAA4D},
131        {0xAA50, 0xAA59},   {0xAA5C, 0xAAC2},   {0xAADB, 0xAAF6},
132        {0xAB01, 0xAB06},   {0xAB09, 0xAB0E},   {0xAB11, 0xAB16},
133        {0xAB20, 0xAB26},   {0xAB28, 0xAB2E},   {0xAB30, 0xAB6B},
134        {0xAB70, 0xABED},   {0xABF0, 0xABF9},   {0xAC00, 0xD7A3},
135        {0xD7B0, 0xD7C6},   {0xD7CB, 0xD7FB},   {0xF900, 0xFA6D},
136        {0xFA70, 0xFAD9},   {0xFB00, 0xFB06},   {0xFB13, 0xFB17},
137        {0xFB1D, 0xFB36},   {0xFB38, 0xFB3C},   {0xFB3E, 0xFB3E},
138        {0xFB40, 0xFB41},   {0xFB43, 0xFB44},   {0xFB46, 0xFBC2},
139        {0xFBD3, 0xFD8F},   {0xFD92, 0xFDC7},   {0xFDCF, 0xFDCF},
140        {0xFDF0, 0xFE19},   {0xFE20, 0xFE52},   {0xFE54, 0xFE66},
141        {0xFE68, 0xFE6B},   {0xFE70, 0xFE74},   {0xFE76, 0xFEFC},
142        {0xFF01, 0xFFBE},   {0xFFC2, 0xFFC7},   {0xFFCA, 0xFFCF},
143        {0xFFD2, 0xFFD7},   {0xFFDA, 0xFFDC},   {0xFFE0, 0xFFE6},
144        {0xFFE8, 0xFFEE},   {0xFFFC, 0xFFFD},   {0x10000, 0x1000B},
145        {0x1000D, 0x10026}, {0x10028, 0x1003A}, {0x1003C, 0x1003D},
146        {0x1003F, 0x1004D}, {0x10050, 0x1005D}, {0x10080, 0x100FA},
147        {0x10100, 0x10102}, {0x10107, 0x10133}, {0x10137, 0x1018E},
148        {0x10190, 0x1019C}, {0x101A0, 0x101A0}, {0x101D0, 0x101FD},
149        {0x10280, 0x1029C}, {0x102A0, 0x102D0}, {0x102E0, 0x102FB},
150        {0x10300, 0x10323}, {0x1032D, 0x1034A}, {0x10350, 0x1037A},
151        {0x10380, 0x1039D}, {0x1039F, 0x103C3}, {0x103C8, 0x103D5},
152        {0x10400, 0x1049D}, {0x104A0, 0x104A9}, {0x104B0, 0x104D3},
153        {0x104D8, 0x104FB}, {0x10500, 0x10527}, {0x10530, 0x10563},
154        {0x1056F, 0x1057A}, {0x1057C, 0x1058A}, {0x1058C, 0x10592},
155        {0x10594, 0x10595}, {0x10597, 0x105A1}, {0x105A3, 0x105B1},
156        {0x105B3, 0x105B9}, {0x105BB, 0x105BC}, {0x10600, 0x10736},
157        {0x10740, 0x10755}, {0x10760, 0x10767}, {0x10780, 0x10785},
158        {0x10787, 0x107B0}, {0x107B2, 0x107BA}, {0x10800, 0x10805},
159        {0x10808, 0x10808}, {0x1080A, 0x10835}, {0x10837, 0x10838},
160        {0x1083C, 0x1083C}, {0x1083F, 0x10855}, {0x10857, 0x1089E},
161        {0x108A7, 0x108AF}, {0x108E0, 0x108F2}, {0x108F4, 0x108F5},
162        {0x108FB, 0x1091B}, {0x1091F, 0x10939}, {0x1093F, 0x1093F},
163        {0x10980, 0x109B7}, {0x109BC, 0x109CF}, {0x109D2, 0x10A03},
164        {0x10A05, 0x10A06}, {0x10A0C, 0x10A13}, {0x10A15, 0x10A17},
165        {0x10A19, 0x10A35}, {0x10A38, 0x10A3A}, {0x10A3F, 0x10A48},
166        {0x10A50, 0x10A58}, {0x10A60, 0x10A9F}, {0x10AC0, 0x10AE6},
167        {0x10AEB, 0x10AF6}, {0x10B00, 0x10B35}, {0x10B39, 0x10B55},
168        {0x10B58, 0x10B72}, {0x10B78, 0x10B91}, {0x10B99, 0x10B9C},
169        {0x10BA9, 0x10BAF}, {0x10C00, 0x10C48}, {0x10C80, 0x10CB2},
170        {0x10CC0, 0x10CF2}, {0x10CFA, 0x10D27}, {0x10D30, 0x10D39},
171        {0x10E60, 0x10E7E}, {0x10E80, 0x10EA9}, {0x10EAB, 0x10EAD},
172        {0x10EB0, 0x10EB1}, {0x10EFD, 0x10F27}, {0x10F30, 0x10F59},
173        {0x10F70, 0x10F89}, {0x10FB0, 0x10FCB}, {0x10FE0, 0x10FF6},
174        {0x11000, 0x1104D}, {0x11052, 0x11075}, {0x1107F, 0x110BC},
175        {0x110BE, 0x110C2}, {0x110D0, 0x110E8}, {0x110F0, 0x110F9},
176        {0x11100, 0x11134}, {0x11136, 0x11147}, {0x11150, 0x11176},
177        {0x11180, 0x111DF}, {0x111E1, 0x111F4}, {0x11200, 0x11211},
178        {0x11213, 0x11241}, {0x11280, 0x11286}, {0x11288, 0x11288},
179        {0x1128A, 0x1128D}, {0x1128F, 0x1129D}, {0x1129F, 0x112A9},
180        {0x112B0, 0x112EA}, {0x112F0, 0x112F9}, {0x11300, 0x11303},
181        {0x11305, 0x1130C}, {0x1130F, 0x11310}, {0x11313, 0x11328},
182        {0x1132A, 0x11330}, {0x11332, 0x11333}, {0x11335, 0x11339},
183        {0x1133B, 0x11344}, {0x11347, 0x11348}, {0x1134B, 0x1134D},
184        {0x11350, 0x11350}, {0x11357, 0x11357}, {0x1135D, 0x11363},
185        {0x11366, 0x1136C}, {0x11370, 0x11374}, {0x11400, 0x1145B},
186        {0x1145D, 0x11461}, {0x11480, 0x114C7}, {0x114D0, 0x114D9},
187        {0x11580, 0x115B5}, {0x115B8, 0x115DD}, {0x11600, 0x11644},
188        {0x11650, 0x11659}, {0x11660, 0x1166C}, {0x11680, 0x116B9},
189        {0x116C0, 0x116C9}, {0x11700, 0x1171A}, {0x1171D, 0x1172B},
190        {0x11730, 0x11746}, {0x11800, 0x1183B}, {0x118A0, 0x118F2},
191        {0x118FF, 0x11906}, {0x11909, 0x11909}, {0x1190C, 0x11913},
192        {0x11915, 0x11916}, {0x11918, 0x11935}, {0x11937, 0x11938},
193        {0x1193B, 0x11946}, {0x11950, 0x11959}, {0x119A0, 0x119A7},
194        {0x119AA, 0x119D7}, {0x119DA, 0x119E4}, {0x11A00, 0x11A47},
195        {0x11A50, 0x11AA2}, {0x11AB0, 0x11AF8}, {0x11B00, 0x11B09},
196        {0x11C00, 0x11C08}, {0x11C0A, 0x11C36}, {0x11C38, 0x11C45},
197        {0x11C50, 0x11C6C}, {0x11C70, 0x11C8F}, {0x11C92, 0x11CA7},
198        {0x11CA9, 0x11CB6}, {0x11D00, 0x11D06}, {0x11D08, 0x11D09},
199        {0x11D0B, 0x11D36}, {0x11D3A, 0x11D3A}, {0x11D3C, 0x11D3D},
200        {0x11D3F, 0x11D47}, {0x11D50, 0x11D59}, {0x11D60, 0x11D65},
201        {0x11D67, 0x11D68}, {0x11D6A, 0x11D8E}, {0x11D90, 0x11D91},
202        {0x11D93, 0x11D98}, {0x11DA0, 0x11DA9}, {0x11EE0, 0x11EF8},
203        {0x11F00, 0x11F10}, {0x11F12, 0x11F3A}, {0x11F3E, 0x11F59},
204        {0x11FB0, 0x11FB0}, {0x11FC0, 0x11FF1}, {0x11FFF, 0x12399},
205        {0x12400, 0x1246E}, {0x12470, 0x12474}, {0x12480, 0x12543},
206        {0x12F90, 0x12FF2}, {0x13000, 0x1342F}, {0x13440, 0x13455},
207        {0x14400, 0x14646}, {0x16800, 0x16A38}, {0x16A40, 0x16A5E},
208        {0x16A60, 0x16A69}, {0x16A6E, 0x16ABE}, {0x16AC0, 0x16AC9},
209        {0x16AD0, 0x16AED}, {0x16AF0, 0x16AF5}, {0x16B00, 0x16B45},
210        {0x16B50, 0x16B59}, {0x16B5B, 0x16B61}, {0x16B63, 0x16B77},
211        {0x16B7D, 0x16B8F}, {0x16E40, 0x16E9A}, {0x16F00, 0x16F4A},
212        {0x16F4F, 0x16F87}, {0x16F8F, 0x16F9F}, {0x16FE0, 0x16FE4},
213        {0x16FF0, 0x16FF1}, {0x17000, 0x187F7}, {0x18800, 0x18CD5},
214        {0x18D00, 0x18D08}, {0x1AFF0, 0x1AFF3}, {0x1AFF5, 0x1AFFB},
215        {0x1AFFD, 0x1AFFE}, {0x1B000, 0x1B122}, {0x1B132, 0x1B132},
216        {0x1B150, 0x1B152}, {0x1B155, 0x1B155}, {0x1B164, 0x1B167},
217        {0x1B170, 0x1B2FB}, {0x1BC00, 0x1BC6A}, {0x1BC70, 0x1BC7C},
218        {0x1BC80, 0x1BC88}, {0x1BC90, 0x1BC99}, {0x1BC9C, 0x1BC9F},
219        {0x1CF00, 0x1CF2D}, {0x1CF30, 0x1CF46}, {0x1CF50, 0x1CFC3},
220        {0x1D000, 0x1D0F5}, {0x1D100, 0x1D126}, {0x1D129, 0x1D172},
221        {0x1D17B, 0x1D1EA}, {0x1D200, 0x1D245}, {0x1D2C0, 0x1D2D3},
222        {0x1D2E0, 0x1D2F3}, {0x1D300, 0x1D356}, {0x1D360, 0x1D378},
223        {0x1D400, 0x1D454}, {0x1D456, 0x1D49C}, {0x1D49E, 0x1D49F},
224        {0x1D4A2, 0x1D4A2}, {0x1D4A5, 0x1D4A6}, {0x1D4A9, 0x1D4AC},
225        {0x1D4AE, 0x1D4B9}, {0x1D4BB, 0x1D4BB}, {0x1D4BD, 0x1D4C3},
226        {0x1D4C5, 0x1D505}, {0x1D507, 0x1D50A}, {0x1D50D, 0x1D514},
227        {0x1D516, 0x1D51C}, {0x1D51E, 0x1D539}, {0x1D53B, 0x1D53E},
228        {0x1D540, 0x1D544}, {0x1D546, 0x1D546}, {0x1D54A, 0x1D550},
229        {0x1D552, 0x1D6A5}, {0x1D6A8, 0x1D7CB}, {0x1D7CE, 0x1DA8B},
230        {0x1DA9B, 0x1DA9F}, {0x1DAA1, 0x1DAAF}, {0x1DF00, 0x1DF1E},
231        {0x1DF25, 0x1DF2A}, {0x1E000, 0x1E006}, {0x1E008, 0x1E018},
232        {0x1E01B, 0x1E021}, {0x1E023, 0x1E024}, {0x1E026, 0x1E02A},
233        {0x1E030, 0x1E06D}, {0x1E08F, 0x1E08F}, {0x1E100, 0x1E12C},
234        {0x1E130, 0x1E13D}, {0x1E140, 0x1E149}, {0x1E14E, 0x1E14F},
235        {0x1E290, 0x1E2AE}, {0x1E2C0, 0x1E2F9}, {0x1E2FF, 0x1E2FF},
236        {0x1E4D0, 0x1E4F9}, {0x1E7E0, 0x1E7E6}, {0x1E7E8, 0x1E7EB},
237        {0x1E7ED, 0x1E7EE}, {0x1E7F0, 0x1E7FE}, {0x1E800, 0x1E8C4},
238        {0x1E8C7, 0x1E8D6}, {0x1E900, 0x1E94B}, {0x1E950, 0x1E959},
239        {0x1E95E, 0x1E95F}, {0x1EC71, 0x1ECB4}, {0x1ED01, 0x1ED3D},
240        {0x1EE00, 0x1EE03}, {0x1EE05, 0x1EE1F}, {0x1EE21, 0x1EE22},
241        {0x1EE24, 0x1EE24}, {0x1EE27, 0x1EE27}, {0x1EE29, 0x1EE32},
242        {0x1EE34, 0x1EE37}, {0x1EE39, 0x1EE39}, {0x1EE3B, 0x1EE3B},
243        {0x1EE42, 0x1EE42}, {0x1EE47, 0x1EE47}, {0x1EE49, 0x1EE49},
244        {0x1EE4B, 0x1EE4B}, {0x1EE4D, 0x1EE4F}, {0x1EE51, 0x1EE52},
245        {0x1EE54, 0x1EE54}, {0x1EE57, 0x1EE57}, {0x1EE59, 0x1EE59},
246        {0x1EE5B, 0x1EE5B}, {0x1EE5D, 0x1EE5D}, {0x1EE5F, 0x1EE5F},
247        {0x1EE61, 0x1EE62}, {0x1EE64, 0x1EE64}, {0x1EE67, 0x1EE6A},
248        {0x1EE6C, 0x1EE72}, {0x1EE74, 0x1EE77}, {0x1EE79, 0x1EE7C},
249        {0x1EE7E, 0x1EE7E}, {0x1EE80, 0x1EE89}, {0x1EE8B, 0x1EE9B},
250        {0x1EEA1, 0x1EEA3}, {0x1EEA5, 0x1EEA9}, {0x1EEAB, 0x1EEBB},
251        {0x1EEF0, 0x1EEF1}, {0x1F000, 0x1F02B}, {0x1F030, 0x1F093},
252        {0x1F0A0, 0x1F0AE}, {0x1F0B1, 0x1F0BF}, {0x1F0C1, 0x1F0CF},
253        {0x1F0D1, 0x1F0F5}, {0x1F100, 0x1F1AD}, {0x1F1E6, 0x1F202},
254        {0x1F210, 0x1F23B}, {0x1F240, 0x1F248}, {0x1F250, 0x1F251},
255        {0x1F260, 0x1F265}, {0x1F300, 0x1F6D7}, {0x1F6DC, 0x1F6EC},
256        {0x1F6F0, 0x1F6FC}, {0x1F700, 0x1F776}, {0x1F77B, 0x1F7D9},
257        {0x1F7E0, 0x1F7EB}, {0x1F7F0, 0x1F7F0}, {0x1F800, 0x1F80B},
258        {0x1F810, 0x1F847}, {0x1F850, 0x1F859}, {0x1F860, 0x1F887},
259        {0x1F890, 0x1F8AD}, {0x1F8B0, 0x1F8B1}, {0x1F900, 0x1FA53},
260        {0x1FA60, 0x1FA6D}, {0x1FA70, 0x1FA7C}, {0x1FA80, 0x1FA88},
261        {0x1FA90, 0x1FABD}, {0x1FABF, 0x1FAC5}, {0x1FACE, 0x1FADB},
262        {0x1FAE0, 0x1FAE8}, {0x1FAF0, 0x1FAF8}, {0x1FB00, 0x1FB92},
263        {0x1FB94, 0x1FBCA}, {0x1FBF0, 0x1FBF9}, {0x20000, 0x2A6DF},
264        {0x2A700, 0x2B739}, {0x2B740, 0x2B81D}, {0x2B820, 0x2CEA1},
265        {0x2CEB0, 0x2EBE0}, {0x2EBF0, 0x2EE5D}, {0x2F800, 0x2FA1D},
266        {0x30000, 0x3134A}, {0x31350, 0x323AF}, {0xE0100, 0xE01EF},
267    };
268  
269    static const UnicodeCharSet Printables(PrintableRanges);
270    // Clang special cases 0x00AD (SOFT HYPHEN) which is rendered as an actual
271    // hyphen in most terminals.
272    return UCS == 0x00AD || Printables.contains(UCS);
273  }
274  
275  /// Unicode code points of the Cf category are considered
276  /// formatting characters.
277  bool isFormatting(int UCS) {
278  
279    // https://unicode.org/Public/15.1.0/ucdxml/
280    static const UnicodeCharRange Cf[] = {
281        {0x00AD, 0x00AD},   {0x0600, 0x0605},   {0x061C, 0x061C},
282        {0x06DD, 0x06DD},   {0x070F, 0x070F},   {0x0890, 0x0891},
283        {0x08E2, 0x08E2},   {0x180E, 0x180E},   {0x200B, 0x200F},
284        {0x202A, 0x202E},   {0x2060, 0x2064},   {0x2066, 0x206F},
285        {0xFEFF, 0xFEFF},   {0xFFF9, 0xFFFB},   {0x110BD, 0x110BD},
286        {0x110CD, 0x110CD}, {0x13430, 0x1343F}, {0x1BCA0, 0x1BCA3},
287        {0x1D173, 0x1D17A}, {0xE0001, 0xE0001}, {0xE0020, 0xE007F}};
288  
289    static const UnicodeCharSet Format(Cf);
290    return Format.contains(UCS);
291  }
292  
293  /// Gets the number of positions a character is likely to occupy when output
294  /// on a terminal ("character width"). This depends on the implementation of the
295  /// terminal, and there's no standard definition of character width.
296  /// The implementation defines it in a way that is expected to be compatible
297  /// with a generic Unicode-capable terminal.
298  /// \return Character width:
299  ///   * ErrorNonPrintableCharacter (-1) for non-printable characters (as
300  ///     identified by isPrintable);
301  ///   * 0 for non-spacing and enclosing combining marks;
302  ///   * 2 for CJK characters excluding halfwidth forms;
303  ///   * 1 for all remaining characters.
304  static inline int charWidth(int UCS) {
305    if (!isPrintable(UCS))
306      return ErrorNonPrintableCharacter;
307  
308    // Sorted list of non-spacing and enclosing combining mark intervals as
309    // defined in "3.6 Combination" of
310    // https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf
311    static const UnicodeCharRange CombiningCharacterRanges[] = {
312        {0x0300, 0x036F},   {0x0483, 0x0489},   {0x0591, 0x05BD},
313        {0x05BF, 0x05BF},   {0x05C1, 0x05C2},   {0x05C4, 0x05C5},
314        {0x05C7, 0x05C7},   {0x0610, 0x061A},   {0x064B, 0x065F},
315        {0x0670, 0x0670},   {0x06D6, 0x06DC},   {0x06DF, 0x06E4},
316        {0x06E7, 0x06E8},   {0x06EA, 0x06ED},   {0x0711, 0x0711},
317        {0x0730, 0x074A},   {0x07A6, 0x07B0},   {0x07EB, 0x07F3},
318        {0x07FD, 0x07FD},   {0x0816, 0x0819},   {0x081B, 0x0823},
319        {0x0825, 0x0827},   {0x0829, 0x082D},   {0x0859, 0x085B},
320        {0x0898, 0x089F},   {0x08CA, 0x08E1},   {0x08E3, 0x0902},
321        {0x093A, 0x093A},   {0x093C, 0x093C},   {0x0941, 0x0948},
322        {0x094D, 0x094D},   {0x0951, 0x0957},   {0x0962, 0x0963},
323        {0x0981, 0x0981},   {0x09BC, 0x09BC},   {0x09C1, 0x09C4},
324        {0x09CD, 0x09CD},   {0x09E2, 0x09E3},   {0x09FE, 0x09FE},
325        {0x0A01, 0x0A02},   {0x0A3C, 0x0A3C},   {0x0A41, 0x0A42},
326        {0x0A47, 0x0A48},   {0x0A4B, 0x0A4D},   {0x0A51, 0x0A51},
327        {0x0A70, 0x0A71},   {0x0A75, 0x0A75},   {0x0A81, 0x0A82},
328        {0x0ABC, 0x0ABC},   {0x0AC1, 0x0AC5},   {0x0AC7, 0x0AC8},
329        {0x0ACD, 0x0ACD},   {0x0AE2, 0x0AE3},   {0x0AFA, 0x0AFF},
330        {0x0B01, 0x0B01},   {0x0B3C, 0x0B3C},   {0x0B3F, 0x0B3F},
331        {0x0B41, 0x0B44},   {0x0B4D, 0x0B4D},   {0x0B55, 0x0B56},
332        {0x0B62, 0x0B63},   {0x0B82, 0x0B82},   {0x0BC0, 0x0BC0},
333        {0x0BCD, 0x0BCD},   {0x0C00, 0x0C00},   {0x0C04, 0x0C04},
334        {0x0C3C, 0x0C3C},   {0x0C3E, 0x0C40},   {0x0C46, 0x0C48},
335        {0x0C4A, 0x0C4D},   {0x0C55, 0x0C56},   {0x0C62, 0x0C63},
336        {0x0C81, 0x0C81},   {0x0CBC, 0x0CBC},   {0x0CBF, 0x0CBF},
337        {0x0CC6, 0x0CC6},   {0x0CCC, 0x0CCD},   {0x0CE2, 0x0CE3},
338        {0x0D00, 0x0D01},   {0x0D3B, 0x0D3C},   {0x0D41, 0x0D44},
339        {0x0D4D, 0x0D4D},   {0x0D62, 0x0D63},   {0x0D81, 0x0D81},
340        {0x0DCA, 0x0DCA},   {0x0DD2, 0x0DD4},   {0x0DD6, 0x0DD6},
341        {0x0E31, 0x0E31},   {0x0E34, 0x0E3A},   {0x0E47, 0x0E4E},
342        {0x0EB1, 0x0EB1},   {0x0EB4, 0x0EBC},   {0x0EC8, 0x0ECE},
343        {0x0F18, 0x0F19},   {0x0F35, 0x0F35},   {0x0F37, 0x0F37},
344        {0x0F39, 0x0F39},   {0x0F71, 0x0F7E},   {0x0F80, 0x0F84},
345        {0x0F86, 0x0F87},   {0x0F8D, 0x0F97},   {0x0F99, 0x0FBC},
346        {0x0FC6, 0x0FC6},   {0x102D, 0x1030},   {0x1032, 0x1037},
347        {0x1039, 0x103A},   {0x103D, 0x103E},   {0x1058, 0x1059},
348        {0x105E, 0x1060},   {0x1071, 0x1074},   {0x1082, 0x1082},
349        {0x1085, 0x1086},   {0x108D, 0x108D},   {0x109D, 0x109D},
350        {0x135D, 0x135F},   {0x1712, 0x1714},   {0x1732, 0x1733},
351        {0x1752, 0x1753},   {0x1772, 0x1773},   {0x17B4, 0x17B5},
352        {0x17B7, 0x17BD},   {0x17C6, 0x17C6},   {0x17C9, 0x17D3},
353        {0x17DD, 0x17DD},   {0x180B, 0x180D},   {0x180F, 0x180F},
354        {0x1885, 0x1886},   {0x18A9, 0x18A9},   {0x1920, 0x1922},
355        {0x1927, 0x1928},   {0x1932, 0x1932},   {0x1939, 0x193B},
356        {0x1A17, 0x1A18},   {0x1A1B, 0x1A1B},   {0x1A56, 0x1A56},
357        {0x1A58, 0x1A5E},   {0x1A60, 0x1A60},   {0x1A62, 0x1A62},
358        {0x1A65, 0x1A6C},   {0x1A73, 0x1A7C},   {0x1A7F, 0x1A7F},
359        {0x1AB0, 0x1ACE},   {0x1B00, 0x1B03},   {0x1B34, 0x1B34},
360        {0x1B36, 0x1B3A},   {0x1B3C, 0x1B3C},   {0x1B42, 0x1B42},
361        {0x1B6B, 0x1B73},   {0x1B80, 0x1B81},   {0x1BA2, 0x1BA5},
362        {0x1BA8, 0x1BA9},   {0x1BAB, 0x1BAD},   {0x1BE6, 0x1BE6},
363        {0x1BE8, 0x1BE9},   {0x1BED, 0x1BED},   {0x1BEF, 0x1BF1},
364        {0x1C2C, 0x1C33},   {0x1C36, 0x1C37},   {0x1CD0, 0x1CD2},
365        {0x1CD4, 0x1CE0},   {0x1CE2, 0x1CE8},   {0x1CED, 0x1CED},
366        {0x1CF4, 0x1CF4},   {0x1CF8, 0x1CF9},   {0x1DC0, 0x1DFF},
367        {0x20D0, 0x20F0},   {0x2CEF, 0x2CF1},   {0x2D7F, 0x2D7F},
368        {0x2DE0, 0x2DFF},   {0x302A, 0x302D},   {0x3099, 0x309A},
369        {0xA66F, 0xA672},   {0xA674, 0xA67D},   {0xA69E, 0xA69F},
370        {0xA6F0, 0xA6F1},   {0xA802, 0xA802},   {0xA806, 0xA806},
371        {0xA80B, 0xA80B},   {0xA825, 0xA826},   {0xA82C, 0xA82C},
372        {0xA8C4, 0xA8C5},   {0xA8E0, 0xA8F1},   {0xA8FF, 0xA8FF},
373        {0xA926, 0xA92D},   {0xA947, 0xA951},   {0xA980, 0xA982},
374        {0xA9B3, 0xA9B3},   {0xA9B6, 0xA9B9},   {0xA9BC, 0xA9BD},
375        {0xA9E5, 0xA9E5},   {0xAA29, 0xAA2E},   {0xAA31, 0xAA32},
376        {0xAA35, 0xAA36},   {0xAA43, 0xAA43},   {0xAA4C, 0xAA4C},
377        {0xAA7C, 0xAA7C},   {0xAAB0, 0xAAB0},   {0xAAB2, 0xAAB4},
378        {0xAAB7, 0xAAB8},   {0xAABE, 0xAABF},   {0xAAC1, 0xAAC1},
379        {0xAAEC, 0xAAED},   {0xAAF6, 0xAAF6},   {0xABE5, 0xABE5},
380        {0xABE8, 0xABE8},   {0xABED, 0xABED},   {0xFB1E, 0xFB1E},
381        {0xFE00, 0xFE0F},   {0xFE20, 0xFE2F},   {0x101FD, 0x101FD},
382        {0x102E0, 0x102E0}, {0x10376, 0x1037A}, {0x10A01, 0x10A03},
383        {0x10A05, 0x10A06}, {0x10A0C, 0x10A0F}, {0x10A38, 0x10A3A},
384        {0x10A3F, 0x10A3F}, {0x10AE5, 0x10AE6}, {0x10D24, 0x10D27},
385        {0x10EAB, 0x10EAC}, {0x10EFD, 0x10EFF}, {0x10F46, 0x10F50},
386        {0x10F82, 0x10F85}, {0x11001, 0x11001}, {0x11038, 0x11046},
387        {0x11070, 0x11070}, {0x11073, 0x11074}, {0x1107F, 0x11081},
388        {0x110B3, 0x110B6}, {0x110B9, 0x110BA}, {0x110C2, 0x110C2},
389        {0x11100, 0x11102}, {0x11127, 0x1112B}, {0x1112D, 0x11134},
390        {0x11173, 0x11173}, {0x11180, 0x11181}, {0x111B6, 0x111BE},
391        {0x111C9, 0x111CC}, {0x111CF, 0x111CF}, {0x1122F, 0x11231},
392        {0x11234, 0x11234}, {0x11236, 0x11237}, {0x1123E, 0x1123E},
393        {0x11241, 0x11241}, {0x112DF, 0x112DF}, {0x112E3, 0x112EA},
394        {0x11300, 0x11301}, {0x1133B, 0x1133C}, {0x11340, 0x11340},
395        {0x11366, 0x1136C}, {0x11370, 0x11374}, {0x11438, 0x1143F},
396        {0x11442, 0x11444}, {0x11446, 0x11446}, {0x1145E, 0x1145E},
397        {0x114B3, 0x114B8}, {0x114BA, 0x114BA}, {0x114BF, 0x114C0},
398        {0x114C2, 0x114C3}, {0x115B2, 0x115B5}, {0x115BC, 0x115BD},
399        {0x115BF, 0x115C0}, {0x115DC, 0x115DD}, {0x11633, 0x1163A},
400        {0x1163D, 0x1163D}, {0x1163F, 0x11640}, {0x116AB, 0x116AB},
401        {0x116AD, 0x116AD}, {0x116B0, 0x116B5}, {0x116B7, 0x116B7},
402        {0x1171D, 0x1171F}, {0x11722, 0x11725}, {0x11727, 0x1172B},
403        {0x1182F, 0x11837}, {0x11839, 0x1183A}, {0x1193B, 0x1193C},
404        {0x1193E, 0x1193E}, {0x11943, 0x11943}, {0x119D4, 0x119D7},
405        {0x119DA, 0x119DB}, {0x119E0, 0x119E0}, {0x11A01, 0x11A0A},
406        {0x11A33, 0x11A38}, {0x11A3B, 0x11A3E}, {0x11A47, 0x11A47},
407        {0x11A51, 0x11A56}, {0x11A59, 0x11A5B}, {0x11A8A, 0x11A96},
408        {0x11A98, 0x11A99}, {0x11C30, 0x11C36}, {0x11C38, 0x11C3D},
409        {0x11C3F, 0x11C3F}, {0x11C92, 0x11CA7}, {0x11CAA, 0x11CB0},
410        {0x11CB2, 0x11CB3}, {0x11CB5, 0x11CB6}, {0x11D31, 0x11D36},
411        {0x11D3A, 0x11D3A}, {0x11D3C, 0x11D3D}, {0x11D3F, 0x11D45},
412        {0x11D47, 0x11D47}, {0x11D90, 0x11D91}, {0x11D95, 0x11D95},
413        {0x11D97, 0x11D97}, {0x11EF3, 0x11EF4}, {0x11F00, 0x11F01},
414        {0x11F36, 0x11F3A}, {0x11F40, 0x11F40}, {0x11F42, 0x11F42},
415        {0x13440, 0x13440}, {0x13447, 0x13455}, {0x16AF0, 0x16AF4},
416        {0x16B30, 0x16B36}, {0x16F4F, 0x16F4F}, {0x16F8F, 0x16F92},
417        {0x16FE4, 0x16FE4}, {0x1BC9D, 0x1BC9E}, {0x1CF00, 0x1CF2D},
418        {0x1CF30, 0x1CF46}, {0x1D167, 0x1D169}, {0x1D17B, 0x1D182},
419        {0x1D185, 0x1D18B}, {0x1D1AA, 0x1D1AD}, {0x1D242, 0x1D244},
420        {0x1DA00, 0x1DA36}, {0x1DA3B, 0x1DA6C}, {0x1DA75, 0x1DA75},
421        {0x1DA84, 0x1DA84}, {0x1DA9B, 0x1DA9F}, {0x1DAA1, 0x1DAAF},
422        {0x1E000, 0x1E006}, {0x1E008, 0x1E018}, {0x1E01B, 0x1E021},
423        {0x1E023, 0x1E024}, {0x1E026, 0x1E02A}, {0x1E08F, 0x1E08F},
424        {0x1E130, 0x1E136}, {0x1E2AE, 0x1E2AE}, {0x1E2EC, 0x1E2EF},
425        {0x1E4EC, 0x1E4EF}, {0x1E8D0, 0x1E8D6}, {0x1E944, 0x1E94A},
426        {0xE0100, 0xE01EF},
427    };
428    static const UnicodeCharSet CombiningCharacters(CombiningCharacterRanges);
429  
430    if (CombiningCharacters.contains(UCS))
431      return 0;
432  
433    // We consider double width codepoints any codepoint with
434    // the property East_Asian_Width=F|W
435    // + Misc Symbols and Pictographs (U+1F300...U+1F5FF)
436    // + Supplemental Symbols and Pictographs (U+1F900...U+1F9FF)
437    static const UnicodeCharRange DoubleWidthCharacterRanges[] = {
438        {0x1100, 0x115F},   {0x231A, 0x231B},   {0x2329, 0x232A},
439        {0x23E9, 0x23EC},   {0x23F0, 0x23F0},   {0x23F3, 0x23F3},
440        {0x25FD, 0x25FE},   {0x2614, 0x2615},   {0x2648, 0x2653},
441        {0x267F, 0x267F},   {0x2693, 0x2693},   {0x26A1, 0x26A1},
442        {0x26AA, 0x26AB},   {0x26BD, 0x26BE},   {0x26C4, 0x26C5},
443        {0x26CE, 0x26CE},   {0x26D4, 0x26D4},   {0x26EA, 0x26EA},
444        {0x26F2, 0x26F3},   {0x26F5, 0x26F5},   {0x26FA, 0x26FA},
445        {0x26FD, 0x26FD},   {0x2705, 0x2705},   {0x270A, 0x270B},
446        {0x2728, 0x2728},   {0x274C, 0x274C},   {0x274E, 0x274E},
447        {0x2753, 0x2755},   {0x2757, 0x2757},   {0x2795, 0x2797},
448        {0x27B0, 0x27B0},   {0x27BF, 0x27BF},   {0x2B1B, 0x2B1C},
449        {0x2B50, 0x2B50},   {0x2B55, 0x2B55},   {0x2E80, 0x2E99},
450        {0x2E9B, 0x2EF3},   {0x2F00, 0x2FD5},   {0x2FF0, 0x303E},
451        {0x3041, 0x3096},   {0x3099, 0x30FF},   {0x3105, 0x312F},
452        {0x3131, 0x318E},   {0x3190, 0x31E3},   {0x31EF, 0x321E},
453        {0x3220, 0x3247},   {0x3250, 0xA48C},   {0xA490, 0xA4C6},
454        {0xA960, 0xA97C},   {0xAC00, 0xD7A3},   {0xF900, 0xFAFF},
455        {0xFE10, 0xFE19},   {0xFE30, 0xFE52},   {0xFE54, 0xFE66},
456        {0xFE68, 0xFE6B},   {0xFF01, 0xFF60},   {0xFFE0, 0xFFE6},
457        {0x16FE0, 0x16FE4}, {0x16FF0, 0x16FF1}, {0x17000, 0x187F7},
458        {0x18800, 0x18CD5}, {0x18D00, 0x18D08}, {0x1AFF0, 0x1AFF3},
459        {0x1AFF5, 0x1AFFB}, {0x1AFFD, 0x1AFFE}, {0x1B000, 0x1B122},
460        {0x1B132, 0x1B132}, {0x1B150, 0x1B152}, {0x1B155, 0x1B155},
461        {0x1B164, 0x1B167}, {0x1B170, 0x1B2FB}, {0x1F004, 0x1F004},
462        {0x1F0CF, 0x1F0CF}, {0x1F18E, 0x1F18E}, {0x1F191, 0x1F19A},
463        {0x1F200, 0x1F202}, {0x1F210, 0x1F23B}, {0x1F240, 0x1F248},
464        {0x1F250, 0x1F251}, {0x1F260, 0x1F265}, {0x1F300, 0x1F64F},
465        {0x1F680, 0x1F6C5}, {0x1F6CC, 0x1F6CC}, {0x1F6D0, 0x1F6D2},
466        {0x1F6D5, 0x1F6D7}, {0x1F6DC, 0x1F6DF}, {0x1F6EB, 0x1F6EC},
467        {0x1F6F4, 0x1F6FC}, {0x1F7E0, 0x1F7EB}, {0x1F7F0, 0x1F7F0},
468        {0x1F900, 0x1F9FF}, {0x1FA70, 0x1FA7C}, {0x1FA80, 0x1FA88},
469        {0x1FA90, 0x1FABD}, {0x1FABF, 0x1FAC5}, {0x1FACE, 0x1FADB},
470        {0x1FAE0, 0x1FAE8}, {0x1FAF0, 0x1FAF8}, {0x20000, 0x2FFFD},
471        {0x30000, 0x3FFFD}};
472    static const UnicodeCharSet DoubleWidthCharacters(DoubleWidthCharacterRanges);
473  
474    if (DoubleWidthCharacters.contains(UCS))
475      return 2;
476    return 1;
477  }
478  
479  static bool isprintableascii(char c) { return c > 31 && c < 127; }
480  
481  int columnWidthUTF8(StringRef Text) {
482    unsigned ColumnWidth = 0;
483    unsigned Length;
484    for (size_t i = 0, e = Text.size(); i < e; i += Length) {
485      Length = getNumBytesForUTF8(Text[i]);
486  
487      // fast path for ASCII characters
488      if (Length == 1) {
489        if (!isprintableascii(Text[i]))
490          return ErrorNonPrintableCharacter;
491        ColumnWidth += 1;
492        continue;
493      }
494  
495      if (Length <= 0 || i + Length > Text.size())
496        return ErrorInvalidUTF8;
497      UTF32 buf[1];
498      const UTF8 *Start = reinterpret_cast<const UTF8 *>(Text.data() + i);
499      UTF32 *Target = &buf[0];
500      if (conversionOK != ConvertUTF8toUTF32(&Start, Start + Length, &Target,
501                                             Target + 1, strictConversion))
502        return ErrorInvalidUTF8;
503      int Width = charWidth(buf[0]);
504      if (Width < 0)
505        return ErrorNonPrintableCharacter;
506      ColumnWidth += Width;
507    }
508    return ColumnWidth;
509  }
510  
511  } // namespace unicode
512  } // namespace sys
513  } // namespace llvm
514