Lines Matching +full:range +full:- +full:double
2 # SPDX-License-Identifier: GPL-2.0
17 # --- Global Constants for Width Assignments ---
19 # Known zero-width characters
22 0x200C, # ZERO WIDTH NON-JOINER
25 0xFEFF # ZERO WIDTH NO-BREAK SPACE (BOM)
28 # Zero-width emoji modifiers and components
29 # NOTE: Some of these characters would normally be single-width according to
31 # zero-width because they function as modifiers in emoji sequences.
37 (0xFE00, 0xFE0F), # Variation Selectors 1-16
40 # These would be single-width by Unicode properties, but are zero-width
52 REGIONAL_INDICATORS = (0x1F1E6, 0x1F1FF) # Regional indicator symbols A-Z
54 # Double-width emoji ranges
56 # Many emoji characters are classified as single-width according to Unicode
58 # deliberately override them to be double-width. References:
62 # (https://drafts.csswg.org/css-text-3/#character-properties)
64 # universally render emoji as double-width characters regardless of their
66 # 4. W3C Work Item: Requirements for Japanese Text Layout - Section 3.8.1
69 (0x1F000, 0x1F02F), # Mahjong Tiles (EAW: N, but displayed as double-width)
70 (0x1F0A0, 0x1F0FF), # Playing Cards (EAW: N, but displayed as double-width)
76 (0x1F800, 0x1F8FF), # Supplemental Arrows-C
79 (0x1FA70, 0x1FAFF), # Symbols and Pictographs Extended-A
93 # Mark emoji modifiers as zero-width
95 for cp in range(start, end + 1):
98 # Mark all regional indicators as single-width as they are usually paired
101 for cp in range(start, end + 1):
105 # Supplementary Planes) Range 0x0 to 0x10FFFF (the full Unicode range)
106 for block_start in range(0, 0x110000, 0x1000):
108 for cp in range(block_start, block_end):
128 # in a non-bidirectional text environment.
133 # Known zero-width characters
145 # Default to single-width for unknown
152 # Process Emoji - generally double-width
154 for cp in range(start, end + 1):
155 if cp not in width_map or width_map[cp] != 0: # Don't override zero-width
162 # Optimize to create range tables
179 # Add the last range
194 zero_width_ranges: List of (start, end) ranges for zero-width characters
195 double_width_ranges: List of (start, end) ranges for double-width characters
199 # Function to split ranges into BMP (16-bit) and non-BMP (above 16-bit)
210 # Split the range at 0xFFFF
216 # Split ranges into BMP and non-BMP
228 return f"/* {start_char_desc} - {end_char_desc} */"
233 return f"/* U+{start:04X} - U+{end:04X} */"
238 /* SPDX-License-Identifier: GPL-2.0 */
240 * {out_file} - Unicode character width
242 * Auto-generated by {this_file}
247 /* Zero-width character ranges (BMP - Basic Multilingual Plane, U+0000 to U+FFFF) */
258 /* Zero-width character ranges (non-BMP, U+10000 and above) */
269 /* Double-width character ranges (BMP - Basic Multilingual Plane, U+0000 to U+FFFF) */
280 /* Double-width character ranges (non-BMP, U+10000 and above) */
293 parser.add_argument("-o", "--output", dest="output_file", default=DEFAULT_OUT_FILE,
302 zero_width_count = sum(end - start + 1 for start, end in zero_width_ranges)
303 double_width_count = sum(end - start + 1 for start, end in double_width_ranges)
305 print(f"- {len(zero_width_ranges)} zero-width ranges covering ~{zero_width_count} code points")
306 …print(f"- {len(double_width_ranges)} double-width ranges covering ~{double_width_count} code point…
307 print(f"- Unicode Version: {unicodedata.unidata_version}")