103c6de01SNicolas Pitre#!/usr/bin/env python3 203c6de01SNicolas Pitre# SPDX-License-Identifier: GPL-2.0 303c6de01SNicolas Pitre# 403c6de01SNicolas Pitre# Leverage Python's unicodedata module to generate ucs_recompose_table.h 503c6de01SNicolas Pitre# 603c6de01SNicolas Pitre# The generated table maps base character + combining mark pairs to their 703c6de01SNicolas Pitre# precomposed equivalents. 803c6de01SNicolas Pitre# 903c6de01SNicolas Pitre# Usage: 1003c6de01SNicolas Pitre# python3 gen_ucs_recompose_table.py # Generate with common recomposition pairs 1103c6de01SNicolas Pitre# python3 gen_ucs_recompose_table.py --full # Generate with all recomposition pairs 1203c6de01SNicolas Pitre 1303c6de01SNicolas Pitreimport unicodedata 1403c6de01SNicolas Pitreimport sys 1503c6de01SNicolas Pitreimport argparse 1603c6de01SNicolas Pitreimport textwrap 1703c6de01SNicolas Pitre 1803c6de01SNicolas Pitre# This script's file name 1903c6de01SNicolas Pitrefrom pathlib import Path 2003c6de01SNicolas Pitrethis_file = Path(__file__).name 2103c6de01SNicolas Pitre 22*c2d2c5c0SNicolas Pitre# Default output file name 23*c2d2c5c0SNicolas PitreDEFAULT_OUT_FILE = "ucs_recompose_table.h" 2403c6de01SNicolas Pitre 2503c6de01SNicolas Pitrecommon_recompose_description = "most commonly used Latin, Greek, and Cyrillic recomposition pairs only" 2603c6de01SNicolas PitreCOMMON_RECOMPOSITION_PAIRS = [ 2703c6de01SNicolas Pitre # Latin letters with accents - uppercase 2803c6de01SNicolas Pitre (0x0041, 0x0300, 0x00C0), # A + COMBINING GRAVE ACCENT = LATIN CAPITAL LETTER A WITH GRAVE 2903c6de01SNicolas Pitre (0x0041, 0x0301, 0x00C1), # A + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER A WITH ACUTE 3003c6de01SNicolas Pitre (0x0041, 0x0302, 0x00C2), # A + COMBINING CIRCUMFLEX ACCENT = LATIN CAPITAL LETTER A WITH CIRCUMFLEX 3103c6de01SNicolas Pitre (0x0041, 0x0303, 0x00C3), # A + COMBINING TILDE = LATIN CAPITAL LETTER A WITH TILDE 3203c6de01SNicolas Pitre (0x0041, 0x0308, 0x00C4), # A + COMBINING DIAERESIS = LATIN CAPITAL LETTER A WITH DIAERESIS 3303c6de01SNicolas Pitre (0x0041, 0x030A, 0x00C5), # A + COMBINING RING ABOVE = LATIN CAPITAL LETTER A WITH RING ABOVE 3403c6de01SNicolas Pitre (0x0043, 0x0327, 0x00C7), # C + COMBINING CEDILLA = LATIN CAPITAL LETTER C WITH CEDILLA 3503c6de01SNicolas Pitre (0x0045, 0x0300, 0x00C8), # E + COMBINING GRAVE ACCENT = LATIN CAPITAL LETTER E WITH GRAVE 3603c6de01SNicolas Pitre (0x0045, 0x0301, 0x00C9), # E + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER E WITH ACUTE 3703c6de01SNicolas Pitre (0x0045, 0x0302, 0x00CA), # E + COMBINING CIRCUMFLEX ACCENT = LATIN CAPITAL LETTER E WITH CIRCUMFLEX 3803c6de01SNicolas Pitre (0x0045, 0x0308, 0x00CB), # E + COMBINING DIAERESIS = LATIN CAPITAL LETTER E WITH DIAERESIS 3903c6de01SNicolas Pitre (0x0049, 0x0300, 0x00CC), # I + COMBINING GRAVE ACCENT = LATIN CAPITAL LETTER I WITH GRAVE 4003c6de01SNicolas Pitre (0x0049, 0x0301, 0x00CD), # I + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER I WITH ACUTE 4103c6de01SNicolas Pitre (0x0049, 0x0302, 0x00CE), # I + COMBINING CIRCUMFLEX ACCENT = LATIN CAPITAL LETTER I WITH CIRCUMFLEX 4203c6de01SNicolas Pitre (0x0049, 0x0308, 0x00CF), # I + COMBINING DIAERESIS = LATIN CAPITAL LETTER I WITH DIAERESIS 4303c6de01SNicolas Pitre (0x004E, 0x0303, 0x00D1), # N + COMBINING TILDE = LATIN CAPITAL LETTER N WITH TILDE 4403c6de01SNicolas Pitre (0x004F, 0x0300, 0x00D2), # O + COMBINING GRAVE ACCENT = LATIN CAPITAL LETTER O WITH GRAVE 4503c6de01SNicolas Pitre (0x004F, 0x0301, 0x00D3), # O + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER O WITH ACUTE 4603c6de01SNicolas Pitre (0x004F, 0x0302, 0x00D4), # O + COMBINING CIRCUMFLEX ACCENT = LATIN CAPITAL LETTER O WITH CIRCUMFLEX 4703c6de01SNicolas Pitre (0x004F, 0x0303, 0x00D5), # O + COMBINING TILDE = LATIN CAPITAL LETTER O WITH TILDE 4803c6de01SNicolas Pitre (0x004F, 0x0308, 0x00D6), # O + COMBINING DIAERESIS = LATIN CAPITAL LETTER O WITH DIAERESIS 4903c6de01SNicolas Pitre (0x0055, 0x0300, 0x00D9), # U + COMBINING GRAVE ACCENT = LATIN CAPITAL LETTER U WITH GRAVE 5003c6de01SNicolas Pitre (0x0055, 0x0301, 0x00DA), # U + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER U WITH ACUTE 5103c6de01SNicolas Pitre (0x0055, 0x0302, 0x00DB), # U + COMBINING CIRCUMFLEX ACCENT = LATIN CAPITAL LETTER U WITH CIRCUMFLEX 5203c6de01SNicolas Pitre (0x0055, 0x0308, 0x00DC), # U + COMBINING DIAERESIS = LATIN CAPITAL LETTER U WITH DIAERESIS 5303c6de01SNicolas Pitre (0x0059, 0x0301, 0x00DD), # Y + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER Y WITH ACUTE 5403c6de01SNicolas Pitre 5503c6de01SNicolas Pitre # Latin letters with accents - lowercase 5603c6de01SNicolas Pitre (0x0061, 0x0300, 0x00E0), # a + COMBINING GRAVE ACCENT = LATIN SMALL LETTER A WITH GRAVE 5703c6de01SNicolas Pitre (0x0061, 0x0301, 0x00E1), # a + COMBINING ACUTE ACCENT = LATIN SMALL LETTER A WITH ACUTE 5803c6de01SNicolas Pitre (0x0061, 0x0302, 0x00E2), # a + COMBINING CIRCUMFLEX ACCENT = LATIN SMALL LETTER A WITH CIRCUMFLEX 5903c6de01SNicolas Pitre (0x0061, 0x0303, 0x00E3), # a + COMBINING TILDE = LATIN SMALL LETTER A WITH TILDE 6003c6de01SNicolas Pitre (0x0061, 0x0308, 0x00E4), # a + COMBINING DIAERESIS = LATIN SMALL LETTER A WITH DIAERESIS 6103c6de01SNicolas Pitre (0x0061, 0x030A, 0x00E5), # a + COMBINING RING ABOVE = LATIN SMALL LETTER A WITH RING ABOVE 6203c6de01SNicolas Pitre (0x0063, 0x0327, 0x00E7), # c + COMBINING CEDILLA = LATIN SMALL LETTER C WITH CEDILLA 6303c6de01SNicolas Pitre (0x0065, 0x0300, 0x00E8), # e + COMBINING GRAVE ACCENT = LATIN SMALL LETTER E WITH GRAVE 6403c6de01SNicolas Pitre (0x0065, 0x0301, 0x00E9), # e + COMBINING ACUTE ACCENT = LATIN SMALL LETTER E WITH ACUTE 6503c6de01SNicolas Pitre (0x0065, 0x0302, 0x00EA), # e + COMBINING CIRCUMFLEX ACCENT = LATIN SMALL LETTER E WITH CIRCUMFLEX 6603c6de01SNicolas Pitre (0x0065, 0x0308, 0x00EB), # e + COMBINING DIAERESIS = LATIN SMALL LETTER E WITH DIAERESIS 6703c6de01SNicolas Pitre (0x0069, 0x0300, 0x00EC), # i + COMBINING GRAVE ACCENT = LATIN SMALL LETTER I WITH GRAVE 6803c6de01SNicolas Pitre (0x0069, 0x0301, 0x00ED), # i + COMBINING ACUTE ACCENT = LATIN SMALL LETTER I WITH ACUTE 6903c6de01SNicolas Pitre (0x0069, 0x0302, 0x00EE), # i + COMBINING CIRCUMFLEX ACCENT = LATIN SMALL LETTER I WITH CIRCUMFLEX 7003c6de01SNicolas Pitre (0x0069, 0x0308, 0x00EF), # i + COMBINING DIAERESIS = LATIN SMALL LETTER I WITH DIAERESIS 7103c6de01SNicolas Pitre (0x006E, 0x0303, 0x00F1), # n + COMBINING TILDE = LATIN SMALL LETTER N WITH TILDE 7203c6de01SNicolas Pitre (0x006F, 0x0300, 0x00F2), # o + COMBINING GRAVE ACCENT = LATIN SMALL LETTER O WITH GRAVE 7303c6de01SNicolas Pitre (0x006F, 0x0301, 0x00F3), # o + COMBINING ACUTE ACCENT = LATIN SMALL LETTER O WITH ACUTE 7403c6de01SNicolas Pitre (0x006F, 0x0302, 0x00F4), # o + COMBINING CIRCUMFLEX ACCENT = LATIN SMALL LETTER O WITH CIRCUMFLEX 7503c6de01SNicolas Pitre (0x006F, 0x0303, 0x00F5), # o + COMBINING TILDE = LATIN SMALL LETTER O WITH TILDE 7603c6de01SNicolas Pitre (0x006F, 0x0308, 0x00F6), # o + COMBINING DIAERESIS = LATIN SMALL LETTER O WITH DIAERESIS 7703c6de01SNicolas Pitre (0x0075, 0x0300, 0x00F9), # u + COMBINING GRAVE ACCENT = LATIN SMALL LETTER U WITH GRAVE 7803c6de01SNicolas Pitre (0x0075, 0x0301, 0x00FA), # u + COMBINING ACUTE ACCENT = LATIN SMALL LETTER U WITH ACUTE 7903c6de01SNicolas Pitre (0x0075, 0x0302, 0x00FB), # u + COMBINING CIRCUMFLEX ACCENT = LATIN SMALL LETTER U WITH CIRCUMFLEX 8003c6de01SNicolas Pitre (0x0075, 0x0308, 0x00FC), # u + COMBINING DIAERESIS = LATIN SMALL LETTER U WITH DIAERESIS 8103c6de01SNicolas Pitre (0x0079, 0x0301, 0x00FD), # y + COMBINING ACUTE ACCENT = LATIN SMALL LETTER Y WITH ACUTE 8203c6de01SNicolas Pitre (0x0079, 0x0308, 0x00FF), # y + COMBINING DIAERESIS = LATIN SMALL LETTER Y WITH DIAERESIS 8303c6de01SNicolas Pitre 8403c6de01SNicolas Pitre # Common Greek characters 8503c6de01SNicolas Pitre (0x0391, 0x0301, 0x0386), # Α + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER ALPHA WITH TONOS 8603c6de01SNicolas Pitre (0x0395, 0x0301, 0x0388), # Ε + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER EPSILON WITH TONOS 8703c6de01SNicolas Pitre (0x0397, 0x0301, 0x0389), # Η + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER ETA WITH TONOS 8803c6de01SNicolas Pitre (0x0399, 0x0301, 0x038A), # Ι + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER IOTA WITH TONOS 8903c6de01SNicolas Pitre (0x039F, 0x0301, 0x038C), # Ο + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER OMICRON WITH TONOS 9003c6de01SNicolas Pitre (0x03A5, 0x0301, 0x038E), # Υ + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER UPSILON WITH TONOS 9103c6de01SNicolas Pitre (0x03A9, 0x0301, 0x038F), # Ω + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER OMEGA WITH TONOS 9203c6de01SNicolas Pitre (0x03B1, 0x0301, 0x03AC), # α + COMBINING ACUTE ACCENT = GREEK SMALL LETTER ALPHA WITH TONOS 9303c6de01SNicolas Pitre (0x03B5, 0x0301, 0x03AD), # ε + COMBINING ACUTE ACCENT = GREEK SMALL LETTER EPSILON WITH TONOS 9403c6de01SNicolas Pitre (0x03B7, 0x0301, 0x03AE), # η + COMBINING ACUTE ACCENT = GREEK SMALL LETTER ETA WITH TONOS 9503c6de01SNicolas Pitre (0x03B9, 0x0301, 0x03AF), # ι + COMBINING ACUTE ACCENT = GREEK SMALL LETTER IOTA WITH TONOS 9603c6de01SNicolas Pitre (0x03BF, 0x0301, 0x03CC), # ο + COMBINING ACUTE ACCENT = GREEK SMALL LETTER OMICRON WITH TONOS 9703c6de01SNicolas Pitre (0x03C5, 0x0301, 0x03CD), # υ + COMBINING ACUTE ACCENT = GREEK SMALL LETTER UPSILON WITH TONOS 9803c6de01SNicolas Pitre (0x03C9, 0x0301, 0x03CE), # ω + COMBINING ACUTE ACCENT = GREEK SMALL LETTER OMEGA WITH TONOS 9903c6de01SNicolas Pitre 10003c6de01SNicolas Pitre # Common Cyrillic characters 10103c6de01SNicolas Pitre (0x0418, 0x0306, 0x0419), # И + COMBINING BREVE = CYRILLIC CAPITAL LETTER SHORT I 10203c6de01SNicolas Pitre (0x0438, 0x0306, 0x0439), # и + COMBINING BREVE = CYRILLIC SMALL LETTER SHORT I 10303c6de01SNicolas Pitre (0x0423, 0x0306, 0x040E), # У + COMBINING BREVE = CYRILLIC CAPITAL LETTER SHORT U 10403c6de01SNicolas Pitre (0x0443, 0x0306, 0x045E), # у + COMBINING BREVE = CYRILLIC SMALL LETTER SHORT U 10503c6de01SNicolas Pitre] 10603c6de01SNicolas Pitre 10703c6de01SNicolas Pitrefull_recompose_description = "all possible recomposition pairs from the Unicode BMP" 10803c6de01SNicolas Pitredef collect_all_recomposition_pairs(): 10903c6de01SNicolas Pitre """Collect all possible recomposition pairs from the Unicode data.""" 11003c6de01SNicolas Pitre # Map to store recomposition pairs: (base, combining) -> recomposed 11103c6de01SNicolas Pitre recompose_map = {} 11203c6de01SNicolas Pitre 11303c6de01SNicolas Pitre # Process all assigned Unicode code points in BMP (Basic Multilingual Plane) 11403c6de01SNicolas Pitre # We limit to BMP (0x0000-0xFFFF) to keep our table smaller with uint16_t 11503c6de01SNicolas Pitre for cp in range(0, 0x10000): 11603c6de01SNicolas Pitre try: 11703c6de01SNicolas Pitre char = chr(cp) 11803c6de01SNicolas Pitre 11903c6de01SNicolas Pitre # Skip unassigned or control characters 12003c6de01SNicolas Pitre if not unicodedata.name(char, ''): 12103c6de01SNicolas Pitre continue 12203c6de01SNicolas Pitre 12303c6de01SNicolas Pitre # Find decomposition 12403c6de01SNicolas Pitre decomp = unicodedata.decomposition(char) 12503c6de01SNicolas Pitre if not decomp or '<' in decomp: # Skip compatibility decompositions 12603c6de01SNicolas Pitre continue 12703c6de01SNicolas Pitre 12803c6de01SNicolas Pitre # Parse the decomposition 12903c6de01SNicolas Pitre parts = decomp.split() 13003c6de01SNicolas Pitre if len(parts) == 2: # Simple base + combining mark 13103c6de01SNicolas Pitre base = int(parts[0], 16) 13203c6de01SNicolas Pitre combining = int(parts[1], 16) 13303c6de01SNicolas Pitre 13403c6de01SNicolas Pitre # Only store if both are in BMP 13503c6de01SNicolas Pitre if base < 0x10000 and combining < 0x10000: 13603c6de01SNicolas Pitre recompose_map[(base, combining)] = cp 13703c6de01SNicolas Pitre 13803c6de01SNicolas Pitre except (ValueError, TypeError): 13903c6de01SNicolas Pitre continue 14003c6de01SNicolas Pitre 14103c6de01SNicolas Pitre # Convert to a list of tuples and sort for binary search 14203c6de01SNicolas Pitre recompose_list = [(base, combining, recomposed) 14303c6de01SNicolas Pitre for (base, combining), recomposed in recompose_map.items()] 14403c6de01SNicolas Pitre recompose_list.sort() 14503c6de01SNicolas Pitre 14603c6de01SNicolas Pitre return recompose_list 14703c6de01SNicolas Pitre 14803c6de01SNicolas Pitredef validate_common_pairs(full_list): 14903c6de01SNicolas Pitre """Validate that all common pairs are in the full list. 15003c6de01SNicolas Pitre 15103c6de01SNicolas Pitre Raises: 15203c6de01SNicolas Pitre ValueError: If any common pair is missing or has a different recomposition 15303c6de01SNicolas Pitre value than what's in the full table. 15403c6de01SNicolas Pitre """ 15503c6de01SNicolas Pitre full_pairs = {(base, combining): recomposed for base, combining, recomposed in full_list} 15603c6de01SNicolas Pitre for base, combining, recomposed in COMMON_RECOMPOSITION_PAIRS: 15703c6de01SNicolas Pitre full_recomposed = full_pairs.get((base, combining)) 15803c6de01SNicolas Pitre if full_recomposed is None: 15903c6de01SNicolas Pitre error_msg = f"Error: Common pair (0x{base:04X}, 0x{combining:04X}) not found in full data" 16003c6de01SNicolas Pitre print(error_msg) 16103c6de01SNicolas Pitre raise ValueError(error_msg) 16203c6de01SNicolas Pitre elif full_recomposed != recomposed: 16303c6de01SNicolas Pitre error_msg = (f"Error: Common pair (0x{base:04X}, 0x{combining:04X}) has different recomposition: " 16403c6de01SNicolas Pitre f"0x{recomposed:04X} vs 0x{full_recomposed:04X}") 16503c6de01SNicolas Pitre print(error_msg) 16603c6de01SNicolas Pitre raise ValueError(error_msg) 16703c6de01SNicolas Pitre 168*c2d2c5c0SNicolas Pitredef generate_recomposition_table(use_full_list=False, out_file=DEFAULT_OUT_FILE): 16903c6de01SNicolas Pitre """Generate the recomposition C table.""" 17003c6de01SNicolas Pitre 17103c6de01SNicolas Pitre # Collect all recomposition pairs for validation 17203c6de01SNicolas Pitre full_recompose_list = collect_all_recomposition_pairs() 17303c6de01SNicolas Pitre 17403c6de01SNicolas Pitre # Decide which list to use 17503c6de01SNicolas Pitre if use_full_list: 17603c6de01SNicolas Pitre print("Using full recomposition list...") 17703c6de01SNicolas Pitre recompose_list = full_recompose_list 17803c6de01SNicolas Pitre table_description = full_recompose_description 17903c6de01SNicolas Pitre alt_list = COMMON_RECOMPOSITION_PAIRS 18003c6de01SNicolas Pitre alt_description = common_recompose_description 18103c6de01SNicolas Pitre else: 18203c6de01SNicolas Pitre print("Using common recomposition list...") 18303c6de01SNicolas Pitre # Validate that all common pairs are in the full list 18403c6de01SNicolas Pitre validate_common_pairs(full_recompose_list) 18503c6de01SNicolas Pitre recompose_list = sorted(COMMON_RECOMPOSITION_PAIRS) 18603c6de01SNicolas Pitre table_description = common_recompose_description 18703c6de01SNicolas Pitre alt_list = full_recompose_list 18803c6de01SNicolas Pitre alt_description = full_recompose_description 18903c6de01SNicolas Pitre generation_mode = " --full" if use_full_list else "" 19003c6de01SNicolas Pitre alternative_mode = " --full" if not use_full_list else "" 19103c6de01SNicolas Pitre table_description_detail = f"{table_description} ({len(recompose_list)} entries)" 19203c6de01SNicolas Pitre alt_description_detail = f"{alt_description} ({len(alt_list)} entries)" 19303c6de01SNicolas Pitre 19403c6de01SNicolas Pitre # Calculate min/max values for boundary checks 19503c6de01SNicolas Pitre min_base = min(base for base, _, _ in recompose_list) 19603c6de01SNicolas Pitre max_base = max(base for base, _, _ in recompose_list) 19703c6de01SNicolas Pitre min_combining = min(combining for _, combining, _ in recompose_list) 19803c6de01SNicolas Pitre max_combining = max(combining for _, combining, _ in recompose_list) 19903c6de01SNicolas Pitre 20003c6de01SNicolas Pitre # Generate implementation file 20103c6de01SNicolas Pitre with open(out_file, 'w') as f: 20203c6de01SNicolas Pitre f.write(f"""\ 20303c6de01SNicolas Pitre/* SPDX-License-Identifier: GPL-2.0 */ 20403c6de01SNicolas Pitre/* 20503c6de01SNicolas Pitre * {out_file} - Unicode character recomposition 20603c6de01SNicolas Pitre * 20703c6de01SNicolas Pitre * Auto-generated by {this_file}{generation_mode} 20803c6de01SNicolas Pitre * 20903c6de01SNicolas Pitre * Unicode Version: {unicodedata.unidata_version} 21003c6de01SNicolas Pitre * 21103c6de01SNicolas Pitre{textwrap.fill( 21203c6de01SNicolas Pitre f"This file contains a table with {table_description_detail}. " + 21303c6de01SNicolas Pitre f"To generate a table with {alt_description_detail} instead, run:", 21403c6de01SNicolas Pitre width=75, initial_indent=" * ", subsequent_indent=" * ")} 21503c6de01SNicolas Pitre * 21603c6de01SNicolas Pitre * python3 {this_file}{alternative_mode} 21703c6de01SNicolas Pitre */ 21803c6de01SNicolas Pitre 21903c6de01SNicolas Pitre/* 22003c6de01SNicolas Pitre * Table of {table_description} 22103c6de01SNicolas Pitre * Sorted by base character and then combining mark for binary search 22203c6de01SNicolas Pitre */ 22303c6de01SNicolas Pitrestatic const struct ucs_recomposition ucs_recomposition_table[] = {{ 22403c6de01SNicolas Pitre""") 22503c6de01SNicolas Pitre 22603c6de01SNicolas Pitre for base, combining, recomposed in recompose_list: 22703c6de01SNicolas Pitre try: 22803c6de01SNicolas Pitre base_name = unicodedata.name(chr(base)) 22903c6de01SNicolas Pitre combining_name = unicodedata.name(chr(combining)) 23003c6de01SNicolas Pitre recomposed_name = unicodedata.name(chr(recomposed)) 23103c6de01SNicolas Pitre comment = f"/* {base_name} + {combining_name} = {recomposed_name} */" 23203c6de01SNicolas Pitre except ValueError: 23303c6de01SNicolas Pitre comment = f"/* U+{base:04X} + U+{combining:04X} = U+{recomposed:04X} */" 23403c6de01SNicolas Pitre f.write(f"\t{{ 0x{base:04X}, 0x{combining:04X}, 0x{recomposed:04X} }}, {comment}\n") 23503c6de01SNicolas Pitre 23603c6de01SNicolas Pitre f.write(f"""\ 23703c6de01SNicolas Pitre}}; 23803c6de01SNicolas Pitre 23903c6de01SNicolas Pitre/* 24003c6de01SNicolas Pitre * Boundary values for quick rejection 24103c6de01SNicolas Pitre * These are calculated by analyzing the table during generation 24203c6de01SNicolas Pitre */ 24303c6de01SNicolas Pitre#define UCS_RECOMPOSE_MIN_BASE 0x{min_base:04X} 24403c6de01SNicolas Pitre#define UCS_RECOMPOSE_MAX_BASE 0x{max_base:04X} 24503c6de01SNicolas Pitre#define UCS_RECOMPOSE_MIN_MARK 0x{min_combining:04X} 24603c6de01SNicolas Pitre#define UCS_RECOMPOSE_MAX_MARK 0x{max_combining:04X} 24703c6de01SNicolas Pitre""") 24803c6de01SNicolas Pitre 24903c6de01SNicolas Pitreif __name__ == "__main__": 25003c6de01SNicolas Pitre parser = argparse.ArgumentParser(description="Generate Unicode recomposition table") 25103c6de01SNicolas Pitre parser.add_argument("--full", action="store_true", 25203c6de01SNicolas Pitre help="Generate a full recomposition table (default: common pairs only)") 253*c2d2c5c0SNicolas Pitre parser.add_argument("-o", "--output", dest="output_file", default=DEFAULT_OUT_FILE, 254*c2d2c5c0SNicolas Pitre help=f"Output file name (default: {DEFAULT_OUT_FILE})") 25503c6de01SNicolas Pitre args = parser.parse_args() 25603c6de01SNicolas Pitre 257*c2d2c5c0SNicolas Pitre generate_recomposition_table(use_full_list=args.full, out_file=args.output_file) 258