gen_ucs_width_table.py - OpenGrok cross reference for /linux/drivers/tty/vt/gen_ucs_width

Lines Matching full:width
17 # --- Global Constants for Width Assignments ---
19 # Known zero-width characters
21     0x200B,  # ZERO WIDTH SPACE
22     0x200C,  # ZERO WIDTH NON-JOINER
23     0x200D,  # ZERO WIDTH JOINER
25     0xFEFF   # ZERO WIDTH NO-BREAK SPACE (BOM)
28 # Zero-width emoji modifiers and components
29 # NOTE: Some of these characters would normally be single-width according to
30 # East Asian Width properties, but we deliberately override them to be
31 # zero-width because they function as modifiers in emoji sequences.
40     # These would be single-width by Unicode properties, but are zero-width
54 # Double-width emoji ranges
56 # Many emoji characters are classified as single-width according to Unicode
57 # Standard Annex #11 East Asian Width property (N or Neutral), but we
58 # deliberately override them to be double-width. References:
64 #    universally render emoji as double-width characters regardless of their
67 #    Emoji width (https://www.w3.org/TR/jlreq/)
69     (0x1F000, 0x1F02F),  # Mahjong Tiles (EAW: N, but displayed as double-width)
70     (0x1F0A0, 0x1F0FF),  # Playing Cards (EAW: N, but displayed as double-width)
84     Creates Unicode character width tables and returns the data structures.
90     # Width data mapping
91     width_map = {}  # Maps code points to width (0, 1, 2)
93     # Mark emoji modifiers as zero-width
98     # Mark all regional indicators as single-width as they are usually paired
99     # providing a combined width of 2 when displayed together.
126                 # characters (category Cf) can be treated with width 0 (zero)
133                 # Known zero-width characters
138                 # Use East Asian Width property
145                     # Default to single-width for unknown
152     # Process Emoji - generally double-width
155             if cp not in width_map or width_map[cp] != 0:  # Don't override zero-width
164         points = sorted([cp for cp, width in width_data.items() if width == target_width])
183     # Extract ranges for each width
194         zero_width_ranges: List of (start, end) ranges for zero-width characters
195         double_width_ranges: List of (start, end) ranges for double-width characters
240  * {out_file} - Unicode character width
247 /* Zero-width character ranges (BMP - Basic Multilingual Plane, U+0000 to U+FFFF) */
258 /* Zero-width character ranges (non-BMP, U+10000 and above) */
269 /* Double-width character ranges (BMP - Basic Multilingual Plane, U+0000 to U+FFFF) */
280 /* Double-width character ranges (non-BMP, U+10000 and above) */
292     parser = argparse.ArgumentParser(description="Generate Unicode width tables")
305     print(f"- {len(zero_width_ranges)} zero-width ranges covering ~{zero_width_count} code points")
306 …print(f"- {len(double_width_ranges)} double-width ranges covering ~{double_width_count} code point…