xref: /linux/drivers/tty/vt/gen_ucs_recompose_table.py (revision 378ec25aec5a8444879f8696d580c94950a1f1df)
103c6de01SNicolas Pitre#!/usr/bin/env python3
203c6de01SNicolas Pitre# SPDX-License-Identifier: GPL-2.0
303c6de01SNicolas Pitre#
403c6de01SNicolas Pitre# Leverage Python's unicodedata module to generate ucs_recompose_table.h
503c6de01SNicolas Pitre#
603c6de01SNicolas Pitre# The generated table maps base character + combining mark pairs to their
703c6de01SNicolas Pitre# precomposed equivalents.
803c6de01SNicolas Pitre#
903c6de01SNicolas Pitre# Usage:
1003c6de01SNicolas Pitre#   python3 gen_ucs_recompose_table.py         # Generate with common recomposition pairs
1103c6de01SNicolas Pitre#   python3 gen_ucs_recompose_table.py --full  # Generate with all recomposition pairs
1203c6de01SNicolas Pitre
1303c6de01SNicolas Pitreimport unicodedata
1403c6de01SNicolas Pitreimport sys
1503c6de01SNicolas Pitreimport argparse
1603c6de01SNicolas Pitreimport textwrap
1703c6de01SNicolas Pitre
1803c6de01SNicolas Pitre# This script's file name
1903c6de01SNicolas Pitrefrom pathlib import Path
2003c6de01SNicolas Pitrethis_file = Path(__file__).name
2103c6de01SNicolas Pitre
22*c2d2c5c0SNicolas Pitre# Default output file name
23*c2d2c5c0SNicolas PitreDEFAULT_OUT_FILE = "ucs_recompose_table.h"
2403c6de01SNicolas Pitre
2503c6de01SNicolas Pitrecommon_recompose_description = "most commonly used Latin, Greek, and Cyrillic recomposition pairs only"
2603c6de01SNicolas PitreCOMMON_RECOMPOSITION_PAIRS = [
2703c6de01SNicolas Pitre    # Latin letters with accents - uppercase
2803c6de01SNicolas Pitre    (0x0041, 0x0300, 0x00C0),  # A + COMBINING GRAVE ACCENT = LATIN CAPITAL LETTER A WITH GRAVE
2903c6de01SNicolas Pitre    (0x0041, 0x0301, 0x00C1),  # A + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER A WITH ACUTE
3003c6de01SNicolas Pitre    (0x0041, 0x0302, 0x00C2),  # A + COMBINING CIRCUMFLEX ACCENT = LATIN CAPITAL LETTER A WITH CIRCUMFLEX
3103c6de01SNicolas Pitre    (0x0041, 0x0303, 0x00C3),  # A + COMBINING TILDE = LATIN CAPITAL LETTER A WITH TILDE
3203c6de01SNicolas Pitre    (0x0041, 0x0308, 0x00C4),  # A + COMBINING DIAERESIS = LATIN CAPITAL LETTER A WITH DIAERESIS
3303c6de01SNicolas Pitre    (0x0041, 0x030A, 0x00C5),  # A + COMBINING RING ABOVE = LATIN CAPITAL LETTER A WITH RING ABOVE
3403c6de01SNicolas Pitre    (0x0043, 0x0327, 0x00C7),  # C + COMBINING CEDILLA = LATIN CAPITAL LETTER C WITH CEDILLA
3503c6de01SNicolas Pitre    (0x0045, 0x0300, 0x00C8),  # E + COMBINING GRAVE ACCENT = LATIN CAPITAL LETTER E WITH GRAVE
3603c6de01SNicolas Pitre    (0x0045, 0x0301, 0x00C9),  # E + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER E WITH ACUTE
3703c6de01SNicolas Pitre    (0x0045, 0x0302, 0x00CA),  # E + COMBINING CIRCUMFLEX ACCENT = LATIN CAPITAL LETTER E WITH CIRCUMFLEX
3803c6de01SNicolas Pitre    (0x0045, 0x0308, 0x00CB),  # E + COMBINING DIAERESIS = LATIN CAPITAL LETTER E WITH DIAERESIS
3903c6de01SNicolas Pitre    (0x0049, 0x0300, 0x00CC),  # I + COMBINING GRAVE ACCENT = LATIN CAPITAL LETTER I WITH GRAVE
4003c6de01SNicolas Pitre    (0x0049, 0x0301, 0x00CD),  # I + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER I WITH ACUTE
4103c6de01SNicolas Pitre    (0x0049, 0x0302, 0x00CE),  # I + COMBINING CIRCUMFLEX ACCENT = LATIN CAPITAL LETTER I WITH CIRCUMFLEX
4203c6de01SNicolas Pitre    (0x0049, 0x0308, 0x00CF),  # I + COMBINING DIAERESIS = LATIN CAPITAL LETTER I WITH DIAERESIS
4303c6de01SNicolas Pitre    (0x004E, 0x0303, 0x00D1),  # N + COMBINING TILDE = LATIN CAPITAL LETTER N WITH TILDE
4403c6de01SNicolas Pitre    (0x004F, 0x0300, 0x00D2),  # O + COMBINING GRAVE ACCENT = LATIN CAPITAL LETTER O WITH GRAVE
4503c6de01SNicolas Pitre    (0x004F, 0x0301, 0x00D3),  # O + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER O WITH ACUTE
4603c6de01SNicolas Pitre    (0x004F, 0x0302, 0x00D4),  # O + COMBINING CIRCUMFLEX ACCENT = LATIN CAPITAL LETTER O WITH CIRCUMFLEX
4703c6de01SNicolas Pitre    (0x004F, 0x0303, 0x00D5),  # O + COMBINING TILDE = LATIN CAPITAL LETTER O WITH TILDE
4803c6de01SNicolas Pitre    (0x004F, 0x0308, 0x00D6),  # O + COMBINING DIAERESIS = LATIN CAPITAL LETTER O WITH DIAERESIS
4903c6de01SNicolas Pitre    (0x0055, 0x0300, 0x00D9),  # U + COMBINING GRAVE ACCENT = LATIN CAPITAL LETTER U WITH GRAVE
5003c6de01SNicolas Pitre    (0x0055, 0x0301, 0x00DA),  # U + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER U WITH ACUTE
5103c6de01SNicolas Pitre    (0x0055, 0x0302, 0x00DB),  # U + COMBINING CIRCUMFLEX ACCENT = LATIN CAPITAL LETTER U WITH CIRCUMFLEX
5203c6de01SNicolas Pitre    (0x0055, 0x0308, 0x00DC),  # U + COMBINING DIAERESIS = LATIN CAPITAL LETTER U WITH DIAERESIS
5303c6de01SNicolas Pitre    (0x0059, 0x0301, 0x00DD),  # Y + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER Y WITH ACUTE
5403c6de01SNicolas Pitre
5503c6de01SNicolas Pitre    # Latin letters with accents - lowercase
5603c6de01SNicolas Pitre    (0x0061, 0x0300, 0x00E0),  # a + COMBINING GRAVE ACCENT = LATIN SMALL LETTER A WITH GRAVE
5703c6de01SNicolas Pitre    (0x0061, 0x0301, 0x00E1),  # a + COMBINING ACUTE ACCENT = LATIN SMALL LETTER A WITH ACUTE
5803c6de01SNicolas Pitre    (0x0061, 0x0302, 0x00E2),  # a + COMBINING CIRCUMFLEX ACCENT = LATIN SMALL LETTER A WITH CIRCUMFLEX
5903c6de01SNicolas Pitre    (0x0061, 0x0303, 0x00E3),  # a + COMBINING TILDE = LATIN SMALL LETTER A WITH TILDE
6003c6de01SNicolas Pitre    (0x0061, 0x0308, 0x00E4),  # a + COMBINING DIAERESIS = LATIN SMALL LETTER A WITH DIAERESIS
6103c6de01SNicolas Pitre    (0x0061, 0x030A, 0x00E5),  # a + COMBINING RING ABOVE = LATIN SMALL LETTER A WITH RING ABOVE
6203c6de01SNicolas Pitre    (0x0063, 0x0327, 0x00E7),  # c + COMBINING CEDILLA = LATIN SMALL LETTER C WITH CEDILLA
6303c6de01SNicolas Pitre    (0x0065, 0x0300, 0x00E8),  # e + COMBINING GRAVE ACCENT = LATIN SMALL LETTER E WITH GRAVE
6403c6de01SNicolas Pitre    (0x0065, 0x0301, 0x00E9),  # e + COMBINING ACUTE ACCENT = LATIN SMALL LETTER E WITH ACUTE
6503c6de01SNicolas Pitre    (0x0065, 0x0302, 0x00EA),  # e + COMBINING CIRCUMFLEX ACCENT = LATIN SMALL LETTER E WITH CIRCUMFLEX
6603c6de01SNicolas Pitre    (0x0065, 0x0308, 0x00EB),  # e + COMBINING DIAERESIS = LATIN SMALL LETTER E WITH DIAERESIS
6703c6de01SNicolas Pitre    (0x0069, 0x0300, 0x00EC),  # i + COMBINING GRAVE ACCENT = LATIN SMALL LETTER I WITH GRAVE
6803c6de01SNicolas Pitre    (0x0069, 0x0301, 0x00ED),  # i + COMBINING ACUTE ACCENT = LATIN SMALL LETTER I WITH ACUTE
6903c6de01SNicolas Pitre    (0x0069, 0x0302, 0x00EE),  # i + COMBINING CIRCUMFLEX ACCENT = LATIN SMALL LETTER I WITH CIRCUMFLEX
7003c6de01SNicolas Pitre    (0x0069, 0x0308, 0x00EF),  # i + COMBINING DIAERESIS = LATIN SMALL LETTER I WITH DIAERESIS
7103c6de01SNicolas Pitre    (0x006E, 0x0303, 0x00F1),  # n + COMBINING TILDE = LATIN SMALL LETTER N WITH TILDE
7203c6de01SNicolas Pitre    (0x006F, 0x0300, 0x00F2),  # o + COMBINING GRAVE ACCENT = LATIN SMALL LETTER O WITH GRAVE
7303c6de01SNicolas Pitre    (0x006F, 0x0301, 0x00F3),  # o + COMBINING ACUTE ACCENT = LATIN SMALL LETTER O WITH ACUTE
7403c6de01SNicolas Pitre    (0x006F, 0x0302, 0x00F4),  # o + COMBINING CIRCUMFLEX ACCENT = LATIN SMALL LETTER O WITH CIRCUMFLEX
7503c6de01SNicolas Pitre    (0x006F, 0x0303, 0x00F5),  # o + COMBINING TILDE = LATIN SMALL LETTER O WITH TILDE
7603c6de01SNicolas Pitre    (0x006F, 0x0308, 0x00F6),  # o + COMBINING DIAERESIS = LATIN SMALL LETTER O WITH DIAERESIS
7703c6de01SNicolas Pitre    (0x0075, 0x0300, 0x00F9),  # u + COMBINING GRAVE ACCENT = LATIN SMALL LETTER U WITH GRAVE
7803c6de01SNicolas Pitre    (0x0075, 0x0301, 0x00FA),  # u + COMBINING ACUTE ACCENT = LATIN SMALL LETTER U WITH ACUTE
7903c6de01SNicolas Pitre    (0x0075, 0x0302, 0x00FB),  # u + COMBINING CIRCUMFLEX ACCENT = LATIN SMALL LETTER U WITH CIRCUMFLEX
8003c6de01SNicolas Pitre    (0x0075, 0x0308, 0x00FC),  # u + COMBINING DIAERESIS = LATIN SMALL LETTER U WITH DIAERESIS
8103c6de01SNicolas Pitre    (0x0079, 0x0301, 0x00FD),  # y + COMBINING ACUTE ACCENT = LATIN SMALL LETTER Y WITH ACUTE
8203c6de01SNicolas Pitre    (0x0079, 0x0308, 0x00FF),  # y + COMBINING DIAERESIS = LATIN SMALL LETTER Y WITH DIAERESIS
8303c6de01SNicolas Pitre
8403c6de01SNicolas Pitre    # Common Greek characters
8503c6de01SNicolas Pitre    (0x0391, 0x0301, 0x0386),  # Α + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER ALPHA WITH TONOS
8603c6de01SNicolas Pitre    (0x0395, 0x0301, 0x0388),  # Ε + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER EPSILON WITH TONOS
8703c6de01SNicolas Pitre    (0x0397, 0x0301, 0x0389),  # Η + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER ETA WITH TONOS
8803c6de01SNicolas Pitre    (0x0399, 0x0301, 0x038A),  # Ι + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER IOTA WITH TONOS
8903c6de01SNicolas Pitre    (0x039F, 0x0301, 0x038C),  # Ο + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER OMICRON WITH TONOS
9003c6de01SNicolas Pitre    (0x03A5, 0x0301, 0x038E),  # Υ + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER UPSILON WITH TONOS
9103c6de01SNicolas Pitre    (0x03A9, 0x0301, 0x038F),  # Ω + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER OMEGA WITH TONOS
9203c6de01SNicolas Pitre    (0x03B1, 0x0301, 0x03AC),  # α + COMBINING ACUTE ACCENT = GREEK SMALL LETTER ALPHA WITH TONOS
9303c6de01SNicolas Pitre    (0x03B5, 0x0301, 0x03AD),  # ε + COMBINING ACUTE ACCENT = GREEK SMALL LETTER EPSILON WITH TONOS
9403c6de01SNicolas Pitre    (0x03B7, 0x0301, 0x03AE),  # η + COMBINING ACUTE ACCENT = GREEK SMALL LETTER ETA WITH TONOS
9503c6de01SNicolas Pitre    (0x03B9, 0x0301, 0x03AF),  # ι + COMBINING ACUTE ACCENT = GREEK SMALL LETTER IOTA WITH TONOS
9603c6de01SNicolas Pitre    (0x03BF, 0x0301, 0x03CC),  # ο + COMBINING ACUTE ACCENT = GREEK SMALL LETTER OMICRON WITH TONOS
9703c6de01SNicolas Pitre    (0x03C5, 0x0301, 0x03CD),  # υ + COMBINING ACUTE ACCENT = GREEK SMALL LETTER UPSILON WITH TONOS
9803c6de01SNicolas Pitre    (0x03C9, 0x0301, 0x03CE),  # ω + COMBINING ACUTE ACCENT = GREEK SMALL LETTER OMEGA WITH TONOS
9903c6de01SNicolas Pitre
10003c6de01SNicolas Pitre    # Common Cyrillic characters
10103c6de01SNicolas Pitre    (0x0418, 0x0306, 0x0419),  # И + COMBINING BREVE = CYRILLIC CAPITAL LETTER SHORT I
10203c6de01SNicolas Pitre    (0x0438, 0x0306, 0x0439),  # и + COMBINING BREVE = CYRILLIC SMALL LETTER SHORT I
10303c6de01SNicolas Pitre    (0x0423, 0x0306, 0x040E),  # У + COMBINING BREVE = CYRILLIC CAPITAL LETTER SHORT U
10403c6de01SNicolas Pitre    (0x0443, 0x0306, 0x045E),  # у + COMBINING BREVE = CYRILLIC SMALL LETTER SHORT U
10503c6de01SNicolas Pitre]
10603c6de01SNicolas Pitre
10703c6de01SNicolas Pitrefull_recompose_description = "all possible recomposition pairs from the Unicode BMP"
10803c6de01SNicolas Pitredef collect_all_recomposition_pairs():
10903c6de01SNicolas Pitre    """Collect all possible recomposition pairs from the Unicode data."""
11003c6de01SNicolas Pitre    # Map to store recomposition pairs: (base, combining) -> recomposed
11103c6de01SNicolas Pitre    recompose_map = {}
11203c6de01SNicolas Pitre
11303c6de01SNicolas Pitre    # Process all assigned Unicode code points in BMP (Basic Multilingual Plane)
11403c6de01SNicolas Pitre    # We limit to BMP (0x0000-0xFFFF) to keep our table smaller with uint16_t
11503c6de01SNicolas Pitre    for cp in range(0, 0x10000):
11603c6de01SNicolas Pitre        try:
11703c6de01SNicolas Pitre            char = chr(cp)
11803c6de01SNicolas Pitre
11903c6de01SNicolas Pitre            # Skip unassigned or control characters
12003c6de01SNicolas Pitre            if not unicodedata.name(char, ''):
12103c6de01SNicolas Pitre                continue
12203c6de01SNicolas Pitre
12303c6de01SNicolas Pitre            # Find decomposition
12403c6de01SNicolas Pitre            decomp = unicodedata.decomposition(char)
12503c6de01SNicolas Pitre            if not decomp or '<' in decomp:  # Skip compatibility decompositions
12603c6de01SNicolas Pitre                continue
12703c6de01SNicolas Pitre
12803c6de01SNicolas Pitre            # Parse the decomposition
12903c6de01SNicolas Pitre            parts = decomp.split()
13003c6de01SNicolas Pitre            if len(parts) == 2:  # Simple base + combining mark
13103c6de01SNicolas Pitre                base = int(parts[0], 16)
13203c6de01SNicolas Pitre                combining = int(parts[1], 16)
13303c6de01SNicolas Pitre
13403c6de01SNicolas Pitre                # Only store if both are in BMP
13503c6de01SNicolas Pitre                if base < 0x10000 and combining < 0x10000:
13603c6de01SNicolas Pitre                    recompose_map[(base, combining)] = cp
13703c6de01SNicolas Pitre
13803c6de01SNicolas Pitre        except (ValueError, TypeError):
13903c6de01SNicolas Pitre            continue
14003c6de01SNicolas Pitre
14103c6de01SNicolas Pitre    # Convert to a list of tuples and sort for binary search
14203c6de01SNicolas Pitre    recompose_list = [(base, combining, recomposed)
14303c6de01SNicolas Pitre                     for (base, combining), recomposed in recompose_map.items()]
14403c6de01SNicolas Pitre    recompose_list.sort()
14503c6de01SNicolas Pitre
14603c6de01SNicolas Pitre    return recompose_list
14703c6de01SNicolas Pitre
14803c6de01SNicolas Pitredef validate_common_pairs(full_list):
14903c6de01SNicolas Pitre    """Validate that all common pairs are in the full list.
15003c6de01SNicolas Pitre
15103c6de01SNicolas Pitre    Raises:
15203c6de01SNicolas Pitre        ValueError: If any common pair is missing or has a different recomposition
15303c6de01SNicolas Pitre        value than what's in the full table.
15403c6de01SNicolas Pitre    """
15503c6de01SNicolas Pitre    full_pairs = {(base, combining): recomposed for base, combining, recomposed in full_list}
15603c6de01SNicolas Pitre    for base, combining, recomposed in COMMON_RECOMPOSITION_PAIRS:
15703c6de01SNicolas Pitre        full_recomposed = full_pairs.get((base, combining))
15803c6de01SNicolas Pitre        if full_recomposed is None:
15903c6de01SNicolas Pitre            error_msg = f"Error: Common pair (0x{base:04X}, 0x{combining:04X}) not found in full data"
16003c6de01SNicolas Pitre            print(error_msg)
16103c6de01SNicolas Pitre            raise ValueError(error_msg)
16203c6de01SNicolas Pitre        elif full_recomposed != recomposed:
16303c6de01SNicolas Pitre            error_msg = (f"Error: Common pair (0x{base:04X}, 0x{combining:04X}) has different recomposition: "
16403c6de01SNicolas Pitre                         f"0x{recomposed:04X} vs 0x{full_recomposed:04X}")
16503c6de01SNicolas Pitre            print(error_msg)
16603c6de01SNicolas Pitre            raise ValueError(error_msg)
16703c6de01SNicolas Pitre
168*c2d2c5c0SNicolas Pitredef generate_recomposition_table(use_full_list=False, out_file=DEFAULT_OUT_FILE):
16903c6de01SNicolas Pitre    """Generate the recomposition C table."""
17003c6de01SNicolas Pitre
17103c6de01SNicolas Pitre    # Collect all recomposition pairs for validation
17203c6de01SNicolas Pitre    full_recompose_list = collect_all_recomposition_pairs()
17303c6de01SNicolas Pitre
17403c6de01SNicolas Pitre    # Decide which list to use
17503c6de01SNicolas Pitre    if use_full_list:
17603c6de01SNicolas Pitre        print("Using full recomposition list...")
17703c6de01SNicolas Pitre        recompose_list = full_recompose_list
17803c6de01SNicolas Pitre        table_description = full_recompose_description
17903c6de01SNicolas Pitre        alt_list = COMMON_RECOMPOSITION_PAIRS
18003c6de01SNicolas Pitre        alt_description = common_recompose_description
18103c6de01SNicolas Pitre    else:
18203c6de01SNicolas Pitre        print("Using common recomposition list...")
18303c6de01SNicolas Pitre        # Validate that all common pairs are in the full list
18403c6de01SNicolas Pitre        validate_common_pairs(full_recompose_list)
18503c6de01SNicolas Pitre        recompose_list = sorted(COMMON_RECOMPOSITION_PAIRS)
18603c6de01SNicolas Pitre        table_description = common_recompose_description
18703c6de01SNicolas Pitre        alt_list = full_recompose_list
18803c6de01SNicolas Pitre        alt_description = full_recompose_description
18903c6de01SNicolas Pitre    generation_mode = " --full" if use_full_list else ""
19003c6de01SNicolas Pitre    alternative_mode = " --full" if not use_full_list else ""
19103c6de01SNicolas Pitre    table_description_detail = f"{table_description} ({len(recompose_list)} entries)"
19203c6de01SNicolas Pitre    alt_description_detail = f"{alt_description} ({len(alt_list)} entries)"
19303c6de01SNicolas Pitre
19403c6de01SNicolas Pitre    # Calculate min/max values for boundary checks
19503c6de01SNicolas Pitre    min_base = min(base for base, _, _ in recompose_list)
19603c6de01SNicolas Pitre    max_base = max(base for base, _, _ in recompose_list)
19703c6de01SNicolas Pitre    min_combining = min(combining for _, combining, _ in recompose_list)
19803c6de01SNicolas Pitre    max_combining = max(combining for _, combining, _ in recompose_list)
19903c6de01SNicolas Pitre
20003c6de01SNicolas Pitre    # Generate implementation file
20103c6de01SNicolas Pitre    with open(out_file, 'w') as f:
20203c6de01SNicolas Pitre        f.write(f"""\
20303c6de01SNicolas Pitre/* SPDX-License-Identifier: GPL-2.0 */
20403c6de01SNicolas Pitre/*
20503c6de01SNicolas Pitre * {out_file} - Unicode character recomposition
20603c6de01SNicolas Pitre *
20703c6de01SNicolas Pitre * Auto-generated by {this_file}{generation_mode}
20803c6de01SNicolas Pitre *
20903c6de01SNicolas Pitre * Unicode Version: {unicodedata.unidata_version}
21003c6de01SNicolas Pitre *
21103c6de01SNicolas Pitre{textwrap.fill(
21203c6de01SNicolas Pitre    f"This file contains a table with {table_description_detail}. " +
21303c6de01SNicolas Pitre    f"To generate a table with {alt_description_detail} instead, run:",
21403c6de01SNicolas Pitre    width=75, initial_indent=" * ", subsequent_indent=" * ")}
21503c6de01SNicolas Pitre *
21603c6de01SNicolas Pitre *   python3 {this_file}{alternative_mode}
21703c6de01SNicolas Pitre */
21803c6de01SNicolas Pitre
21903c6de01SNicolas Pitre/*
22003c6de01SNicolas Pitre * Table of {table_description}
22103c6de01SNicolas Pitre * Sorted by base character and then combining mark for binary search
22203c6de01SNicolas Pitre */
22303c6de01SNicolas Pitrestatic const struct ucs_recomposition ucs_recomposition_table[] = {{
22403c6de01SNicolas Pitre""")
22503c6de01SNicolas Pitre
22603c6de01SNicolas Pitre        for base, combining, recomposed in recompose_list:
22703c6de01SNicolas Pitre            try:
22803c6de01SNicolas Pitre                base_name = unicodedata.name(chr(base))
22903c6de01SNicolas Pitre                combining_name = unicodedata.name(chr(combining))
23003c6de01SNicolas Pitre                recomposed_name = unicodedata.name(chr(recomposed))
23103c6de01SNicolas Pitre                comment = f"/* {base_name} + {combining_name} = {recomposed_name} */"
23203c6de01SNicolas Pitre            except ValueError:
23303c6de01SNicolas Pitre                comment = f"/* U+{base:04X} + U+{combining:04X} = U+{recomposed:04X} */"
23403c6de01SNicolas Pitre            f.write(f"\t{{ 0x{base:04X}, 0x{combining:04X}, 0x{recomposed:04X} }}, {comment}\n")
23503c6de01SNicolas Pitre
23603c6de01SNicolas Pitre        f.write(f"""\
23703c6de01SNicolas Pitre}};
23803c6de01SNicolas Pitre
23903c6de01SNicolas Pitre/*
24003c6de01SNicolas Pitre * Boundary values for quick rejection
24103c6de01SNicolas Pitre * These are calculated by analyzing the table during generation
24203c6de01SNicolas Pitre */
24303c6de01SNicolas Pitre#define UCS_RECOMPOSE_MIN_BASE  0x{min_base:04X}
24403c6de01SNicolas Pitre#define UCS_RECOMPOSE_MAX_BASE  0x{max_base:04X}
24503c6de01SNicolas Pitre#define UCS_RECOMPOSE_MIN_MARK  0x{min_combining:04X}
24603c6de01SNicolas Pitre#define UCS_RECOMPOSE_MAX_MARK  0x{max_combining:04X}
24703c6de01SNicolas Pitre""")
24803c6de01SNicolas Pitre
24903c6de01SNicolas Pitreif __name__ == "__main__":
25003c6de01SNicolas Pitre    parser = argparse.ArgumentParser(description="Generate Unicode recomposition table")
25103c6de01SNicolas Pitre    parser.add_argument("--full", action="store_true",
25203c6de01SNicolas Pitre                        help="Generate a full recomposition table (default: common pairs only)")
253*c2d2c5c0SNicolas Pitre    parser.add_argument("-o", "--output", dest="output_file", default=DEFAULT_OUT_FILE,
254*c2d2c5c0SNicolas Pitre                        help=f"Output file name (default: {DEFAULT_OUT_FILE})")
25503c6de01SNicolas Pitre    args = parser.parse_args()
25603c6de01SNicolas Pitre
257*c2d2c5c0SNicolas Pitre    generate_recomposition_table(use_full_list=args.full, out_file=args.output_file)
258