gen_ucs_width_table.py - OpenGrok cross reference for /linux/drivers/tty/vt/gen_ucs_width

Lines Matching +full:range +full:- +full:double
2 # SPDX-License-Identifier: GPL-2.0
17 # --- Global Constants for Width Assignments ---
19 # Known zero-width characters
22     0x200C,  # ZERO WIDTH NON-JOINER
25     0xFEFF   # ZERO WIDTH NO-BREAK SPACE (BOM)
28 # Zero-width emoji modifiers and components
29 # NOTE: Some of these characters would normally be single-width according to
31 # zero-width because they function as modifiers in emoji sequences.
37     (0xFE00, 0xFE0F),    # Variation Selectors 1-16
40     # These would be single-width by Unicode properties, but are zero-width
52 REGIONAL_INDICATORS = (0x1F1E6, 0x1F1FF)  # Regional indicator symbols A-Z
54 # Double-width emoji ranges
56 # Many emoji characters are classified as single-width according to Unicode
58 # deliberately override them to be double-width. References:
62 #    (https://drafts.csswg.org/css-text-3/#character-properties)
64 #    universally render emoji as double-width characters regardless of their
66 # 4. W3C Work Item: Requirements for Japanese Text Layout - Section 3.8.1
69     (0x1F000, 0x1F02F),  # Mahjong Tiles (EAW: N, but displayed as double-width)
70     (0x1F0A0, 0x1F0FF),  # Playing Cards (EAW: N, but displayed as double-width)
76     (0x1F800, 0x1F8FF),  # Supplemental Arrows-C
79     (0x1FA70, 0x1FAFF),  # Symbols and Pictographs Extended-A
93     # Mark emoji modifiers as zero-width
95         for cp in range(start, end + 1):
98     # Mark all regional indicators as single-width as they are usually paired
101     for cp in range(start, end + 1):
105     # Supplementary Planes) Range 0x0 to 0x10FFFF (the full Unicode range)
106     for block_start in range(0, 0x110000, 0x1000):
108         for cp in range(block_start, block_end):
128                 # in a non-bidirectional text environment.
133                 # Known zero-width characters
145                     # Default to single-width for unknown
152     # Process Emoji - generally double-width
154         for cp in range(start, end + 1):
155             if cp not in width_map or width_map[cp] != 0:  # Don't override zero-width
162     # Optimize to create range tables
179         # Add the last range
194         zero_width_ranges: List of (start, end) ranges for zero-width characters
195         double_width_ranges: List of (start, end) ranges for double-width characters
199     # Function to split ranges into BMP (16-bit) and non-BMP (above 16-bit)
210                 # Split the range at 0xFFFF
216     # Split ranges into BMP and non-BMP
228                 return f"/* {start_char_desc} - {end_char_desc} */"
233                 return f"/* U+{start:04X} - U+{end:04X} */"
238 /* SPDX-License-Identifier: GPL-2.0 */
240  * {out_file} - Unicode character width
242  * Auto-generated by {this_file}
247 /* Zero-width character ranges (BMP - Basic Multilingual Plane, U+0000 to U+FFFF) */
258 /* Zero-width character ranges (non-BMP, U+10000 and above) */
269 /* Double-width character ranges (BMP - Basic Multilingual Plane, U+0000 to U+FFFF) */
280 /* Double-width character ranges (non-BMP, U+10000 and above) */
293     parser.add_argument("-o", "--output", dest="output_file", default=DEFAULT_OUT_FILE,
302     zero_width_count = sum(end - start + 1 for start, end in zero_width_ranges)
303     double_width_count = sum(end - start + 1 for start, end in double_width_ranges)
305     print(f"- {len(zero_width_ranges)} zero-width ranges covering ~{zero_width_count} code points")
306 …print(f"- {len(double_width_ranges)} double-width ranges covering ~{double_width_count} code point…
307     print(f"- Unicode Version: {unicodedata.unidata_version}")