xref: /linux/tools/docs/lib/parse_data_structs.py (revision 4f38da1f027ea2c9f01bb71daa7a299c191b6940)
1cde49466SMauro Carvalho Chehab#!/usr/bin/env python3
2cde49466SMauro Carvalho Chehab# SPDX-License-Identifier: GPL-2.0
3cde49466SMauro Carvalho Chehab# Copyright (c) 2016-2025 by Mauro Carvalho Chehab <mchehab@kernel.org>.
4cde49466SMauro Carvalho Chehab# pylint: disable=R0912,R0915
5cde49466SMauro Carvalho Chehab
6cde49466SMauro Carvalho Chehab"""
7cde49466SMauro Carvalho ChehabParse a source file or header, creating ReStructured Text cross references.
8cde49466SMauro Carvalho Chehab
9cde49466SMauro Carvalho ChehabIt accepts an optional file to change the default symbol reference or to
10cde49466SMauro Carvalho Chehabsuppress symbols from the output.
11cde49466SMauro Carvalho Chehab
12cde49466SMauro Carvalho ChehabIt is capable of identifying defines, functions, structs, typedefs,
13cde49466SMauro Carvalho Chehabenums and enum symbols and create cross-references for all of them.
14cde49466SMauro Carvalho ChehabIt is also capable of distinguish #define used for specifying a Linux
15cde49466SMauro Carvalho Chehabioctl.
16cde49466SMauro Carvalho Chehab
17cde49466SMauro Carvalho ChehabThe optional rules file contains a set of rules like:
18cde49466SMauro Carvalho Chehab
19cde49466SMauro Carvalho Chehab    ignore ioctl VIDIOC_ENUM_FMT
20cde49466SMauro Carvalho Chehab    replace ioctl VIDIOC_DQBUF vidioc_qbuf
21cde49466SMauro Carvalho Chehab    replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det`
22cde49466SMauro Carvalho Chehab"""
23cde49466SMauro Carvalho Chehab
24cde49466SMauro Carvalho Chehabimport os
25cde49466SMauro Carvalho Chehabimport re
26cde49466SMauro Carvalho Chehabimport sys
27cde49466SMauro Carvalho Chehab
28cde49466SMauro Carvalho Chehab
29cde49466SMauro Carvalho Chehabclass ParseDataStructs:
30cde49466SMauro Carvalho Chehab    """
31cde49466SMauro Carvalho Chehab    Creates an enriched version of a Kernel header file with cross-links
32cde49466SMauro Carvalho Chehab    to each C data structure type.
33cde49466SMauro Carvalho Chehab
34cde49466SMauro Carvalho Chehab    It is meant to allow having a more comprehensive documentation, where
35cde49466SMauro Carvalho Chehab    uAPI headers will create cross-reference links to the code.
36cde49466SMauro Carvalho Chehab
37cde49466SMauro Carvalho Chehab    It is capable of identifying defines, functions, structs, typedefs,
38cde49466SMauro Carvalho Chehab    enums and enum symbols and create cross-references for all of them.
39cde49466SMauro Carvalho Chehab    It is also capable of distinguish #define used for specifying a Linux
40cde49466SMauro Carvalho Chehab    ioctl.
41cde49466SMauro Carvalho Chehab
42cde49466SMauro Carvalho Chehab    By default, it create rules for all symbols and defines, but it also
43cde49466SMauro Carvalho Chehab    allows parsing an exception file. Such file contains a set of rules
44cde49466SMauro Carvalho Chehab    using the syntax below:
45cde49466SMauro Carvalho Chehab
46cde49466SMauro Carvalho Chehab    1. Ignore rules:
47cde49466SMauro Carvalho Chehab
48cde49466SMauro Carvalho Chehab        ignore <type> <symbol>`
49cde49466SMauro Carvalho Chehab
50cde49466SMauro Carvalho Chehab    Removes the symbol from reference generation.
51cde49466SMauro Carvalho Chehab
52cde49466SMauro Carvalho Chehab    2. Replace rules:
53cde49466SMauro Carvalho Chehab
54cde49466SMauro Carvalho Chehab        replace <type> <old_symbol> <new_reference>
55cde49466SMauro Carvalho Chehab
56cde49466SMauro Carvalho Chehab    Replaces how old_symbol with a new reference. The new_reference can be:
57cde49466SMauro Carvalho Chehab        - A simple symbol name;
58cde49466SMauro Carvalho Chehab        - A full Sphinx reference.
59cde49466SMauro Carvalho Chehab
60cde49466SMauro Carvalho Chehab    On both cases, <type> can be:
61cde49466SMauro Carvalho Chehab        - ioctl: for defines that end with _IO*, e.g. ioctl definitions
62cde49466SMauro Carvalho Chehab        - define: for other defines
63cde49466SMauro Carvalho Chehab        - symbol: for symbols defined within enums;
64cde49466SMauro Carvalho Chehab        - typedef: for typedefs;
65cde49466SMauro Carvalho Chehab        - enum: for the name of a non-anonymous enum;
66cde49466SMauro Carvalho Chehab        - struct: for structs.
67cde49466SMauro Carvalho Chehab
68cde49466SMauro Carvalho Chehab    Examples:
69cde49466SMauro Carvalho Chehab
70cde49466SMauro Carvalho Chehab        ignore define __LINUX_MEDIA_H
71cde49466SMauro Carvalho Chehab        ignore ioctl VIDIOC_ENUM_FMT
72cde49466SMauro Carvalho Chehab        replace ioctl VIDIOC_DQBUF vidioc_qbuf
73cde49466SMauro Carvalho Chehab        replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det`
74cde49466SMauro Carvalho Chehab    """
75cde49466SMauro Carvalho Chehab
76cde49466SMauro Carvalho Chehab    # Parser regexes with multiple ways to capture enums and structs
77cde49466SMauro Carvalho Chehab    RE_ENUMS = [
78cde49466SMauro Carvalho Chehab        re.compile(r"^\s*enum\s+([\w_]+)\s*\{"),
79cde49466SMauro Carvalho Chehab        re.compile(r"^\s*enum\s+([\w_]+)\s*$"),
80cde49466SMauro Carvalho Chehab        re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*\{"),
81cde49466SMauro Carvalho Chehab        re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*$"),
82cde49466SMauro Carvalho Chehab    ]
83cde49466SMauro Carvalho Chehab    RE_STRUCTS = [
84cde49466SMauro Carvalho Chehab        re.compile(r"^\s*struct\s+([_\w][\w\d_]+)\s*\{"),
85cde49466SMauro Carvalho Chehab        re.compile(r"^\s*struct\s+([_\w][\w\d_]+)$"),
86cde49466SMauro Carvalho Chehab        re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)\s*\{"),
87cde49466SMauro Carvalho Chehab        re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)$"),
88cde49466SMauro Carvalho Chehab    ]
89cde49466SMauro Carvalho Chehab
90cde49466SMauro Carvalho Chehab    # FIXME: the original code was written a long time before Sphinx C
91cde49466SMauro Carvalho Chehab    # domain to have multiple namespaces. To avoid to much turn at the
92cde49466SMauro Carvalho Chehab    # existing hyperlinks, the code kept using "c:type" instead of the
93cde49466SMauro Carvalho Chehab    # right types. To change that, we need to change the types not only
94cde49466SMauro Carvalho Chehab    # here, but also at the uAPI media documentation.
95cde49466SMauro Carvalho Chehab    DEF_SYMBOL_TYPES = {
96cde49466SMauro Carvalho Chehab        "ioctl": {
97cde49466SMauro Carvalho Chehab            "prefix": "\\ ",
98cde49466SMauro Carvalho Chehab            "suffix": "\\ ",
99cde49466SMauro Carvalho Chehab            "ref_type": ":ref",
100*242cfe3fSMauro Carvalho Chehab            "description": "IOCTL Commands",
101cde49466SMauro Carvalho Chehab        },
102cde49466SMauro Carvalho Chehab        "define": {
103cde49466SMauro Carvalho Chehab            "prefix": "\\ ",
104cde49466SMauro Carvalho Chehab            "suffix": "\\ ",
105cde49466SMauro Carvalho Chehab            "ref_type": ":ref",
106*242cfe3fSMauro Carvalho Chehab            "description": "Macros and Definitions",
107cde49466SMauro Carvalho Chehab        },
108cde49466SMauro Carvalho Chehab        # We're calling each definition inside an enum as "symbol"
109cde49466SMauro Carvalho Chehab        "symbol": {
110cde49466SMauro Carvalho Chehab            "prefix": "\\ ",
111cde49466SMauro Carvalho Chehab            "suffix": "\\ ",
112cde49466SMauro Carvalho Chehab            "ref_type": ":ref",
113*242cfe3fSMauro Carvalho Chehab            "description": "Enumeration values",
114cde49466SMauro Carvalho Chehab        },
115cde49466SMauro Carvalho Chehab        "typedef": {
116cde49466SMauro Carvalho Chehab            "prefix": "\\ ",
117cde49466SMauro Carvalho Chehab            "suffix": "\\ ",
118cde49466SMauro Carvalho Chehab            "ref_type": ":c:type",
119*242cfe3fSMauro Carvalho Chehab            "description": "Type Definitions",
120cde49466SMauro Carvalho Chehab        },
121*242cfe3fSMauro Carvalho Chehab        # This is the description of the enum itself
122cde49466SMauro Carvalho Chehab        "enum": {
123cde49466SMauro Carvalho Chehab            "prefix": "\\ ",
124cde49466SMauro Carvalho Chehab            "suffix": "\\ ",
125cde49466SMauro Carvalho Chehab            "ref_type": ":c:type",
126*242cfe3fSMauro Carvalho Chehab            "description": "Enumerations",
127cde49466SMauro Carvalho Chehab        },
128cde49466SMauro Carvalho Chehab        "struct": {
129cde49466SMauro Carvalho Chehab            "prefix": "\\ ",
130cde49466SMauro Carvalho Chehab            "suffix": "\\ ",
131cde49466SMauro Carvalho Chehab            "ref_type": ":c:type",
132*242cfe3fSMauro Carvalho Chehab            "description": "Structures",
133cde49466SMauro Carvalho Chehab        },
134cde49466SMauro Carvalho Chehab    }
135cde49466SMauro Carvalho Chehab
136cde49466SMauro Carvalho Chehab    def __init__(self, debug: bool = False):
137cde49466SMauro Carvalho Chehab        """Initialize internal vars"""
138cde49466SMauro Carvalho Chehab        self.debug = debug
139cde49466SMauro Carvalho Chehab        self.data = ""
140cde49466SMauro Carvalho Chehab
141cde49466SMauro Carvalho Chehab        self.symbols = {}
142cde49466SMauro Carvalho Chehab
143cde49466SMauro Carvalho Chehab        for symbol_type in self.DEF_SYMBOL_TYPES:
144cde49466SMauro Carvalho Chehab            self.symbols[symbol_type] = {}
145cde49466SMauro Carvalho Chehab
146cde49466SMauro Carvalho Chehab    def store_type(self, symbol_type: str, symbol: str,
147cde49466SMauro Carvalho Chehab                   ref_name: str = None, replace_underscores: bool = True):
148cde49466SMauro Carvalho Chehab        """
149cde49466SMauro Carvalho Chehab        Stores a new symbol at self.symbols under symbol_type.
150cde49466SMauro Carvalho Chehab
151cde49466SMauro Carvalho Chehab        By default, underscores are replaced by "-"
152cde49466SMauro Carvalho Chehab        """
153cde49466SMauro Carvalho Chehab        defs = self.DEF_SYMBOL_TYPES[symbol_type]
154cde49466SMauro Carvalho Chehab
155cde49466SMauro Carvalho Chehab        prefix = defs.get("prefix", "")
156cde49466SMauro Carvalho Chehab        suffix = defs.get("suffix", "")
157cde49466SMauro Carvalho Chehab        ref_type = defs.get("ref_type")
158cde49466SMauro Carvalho Chehab
159cde49466SMauro Carvalho Chehab        # Determine ref_link based on symbol type
160cde49466SMauro Carvalho Chehab        if ref_type:
161cde49466SMauro Carvalho Chehab            if symbol_type == "enum":
162cde49466SMauro Carvalho Chehab                ref_link = f"{ref_type}:`{symbol}`"
163cde49466SMauro Carvalho Chehab            else:
164cde49466SMauro Carvalho Chehab                if not ref_name:
165cde49466SMauro Carvalho Chehab                    ref_name = symbol.lower()
166cde49466SMauro Carvalho Chehab
167cde49466SMauro Carvalho Chehab                # c-type references don't support hash
168cde49466SMauro Carvalho Chehab                if ref_type == ":ref" and replace_underscores:
169cde49466SMauro Carvalho Chehab                    ref_name = ref_name.replace("_", "-")
170cde49466SMauro Carvalho Chehab
171cde49466SMauro Carvalho Chehab                ref_link = f"{ref_type}:`{symbol} <{ref_name}>`"
172cde49466SMauro Carvalho Chehab        else:
173cde49466SMauro Carvalho Chehab            ref_link = symbol
174cde49466SMauro Carvalho Chehab
175cde49466SMauro Carvalho Chehab        self.symbols[symbol_type][symbol] = f"{prefix}{ref_link}{suffix}"
176cde49466SMauro Carvalho Chehab
177cde49466SMauro Carvalho Chehab    def store_line(self, line):
178cde49466SMauro Carvalho Chehab        """Stores a line at self.data, properly indented"""
179cde49466SMauro Carvalho Chehab        line = "    " + line.expandtabs()
180cde49466SMauro Carvalho Chehab        self.data += line.rstrip(" ")
181cde49466SMauro Carvalho Chehab
182cde49466SMauro Carvalho Chehab    def parse_file(self, file_in: str):
183cde49466SMauro Carvalho Chehab        """Reads a C source file and get identifiers"""
184cde49466SMauro Carvalho Chehab        self.data = ""
185cde49466SMauro Carvalho Chehab        is_enum = False
186cde49466SMauro Carvalho Chehab        is_comment = False
187cde49466SMauro Carvalho Chehab        multiline = ""
188cde49466SMauro Carvalho Chehab
189cde49466SMauro Carvalho Chehab        with open(file_in, "r",
190cde49466SMauro Carvalho Chehab                  encoding="utf-8", errors="backslashreplace") as f:
191cde49466SMauro Carvalho Chehab            for line_no, line in enumerate(f):
192cde49466SMauro Carvalho Chehab                self.store_line(line)
193cde49466SMauro Carvalho Chehab                line = line.strip("\n")
194cde49466SMauro Carvalho Chehab
195cde49466SMauro Carvalho Chehab                # Handle continuation lines
196cde49466SMauro Carvalho Chehab                if line.endswith(r"\\"):
197cde49466SMauro Carvalho Chehab                    multiline += line[-1]
198cde49466SMauro Carvalho Chehab                    continue
199cde49466SMauro Carvalho Chehab
200cde49466SMauro Carvalho Chehab                if multiline:
201cde49466SMauro Carvalho Chehab                    line = multiline + line
202cde49466SMauro Carvalho Chehab                    multiline = ""
203cde49466SMauro Carvalho Chehab
204cde49466SMauro Carvalho Chehab                # Handle comments. They can be multilined
205cde49466SMauro Carvalho Chehab                if not is_comment:
206cde49466SMauro Carvalho Chehab                    if re.search(r"/\*.*", line):
207cde49466SMauro Carvalho Chehab                        is_comment = True
208cde49466SMauro Carvalho Chehab                    else:
209cde49466SMauro Carvalho Chehab                        # Strip C99-style comments
210cde49466SMauro Carvalho Chehab                        line = re.sub(r"(//.*)", "", line)
211cde49466SMauro Carvalho Chehab
212cde49466SMauro Carvalho Chehab                if is_comment:
213cde49466SMauro Carvalho Chehab                    if re.search(r".*\*/", line):
214cde49466SMauro Carvalho Chehab                        is_comment = False
215cde49466SMauro Carvalho Chehab                    else:
216cde49466SMauro Carvalho Chehab                        multiline = line
217cde49466SMauro Carvalho Chehab                        continue
218cde49466SMauro Carvalho Chehab
219cde49466SMauro Carvalho Chehab                # At this point, line variable may be a multilined statement,
220cde49466SMauro Carvalho Chehab                # if lines end with \ or if they have multi-line comments
221cde49466SMauro Carvalho Chehab                # With that, it can safely remove the entire comments,
222cde49466SMauro Carvalho Chehab                # and there's no need to use re.DOTALL for the logic below
223cde49466SMauro Carvalho Chehab
224cde49466SMauro Carvalho Chehab                line = re.sub(r"(/\*.*\*/)", "", line)
225cde49466SMauro Carvalho Chehab                if not line.strip():
226cde49466SMauro Carvalho Chehab                    continue
227cde49466SMauro Carvalho Chehab
228cde49466SMauro Carvalho Chehab                # It can be useful for debug purposes to print the file after
229cde49466SMauro Carvalho Chehab                # having comments stripped and multi-lines grouped.
230cde49466SMauro Carvalho Chehab                if self.debug > 1:
231cde49466SMauro Carvalho Chehab                    print(f"line {line_no + 1}: {line}")
232cde49466SMauro Carvalho Chehab
233cde49466SMauro Carvalho Chehab                # Now the fun begins: parse each type and store it.
234cde49466SMauro Carvalho Chehab
235cde49466SMauro Carvalho Chehab                # We opted for a two parsing logic here due to:
236cde49466SMauro Carvalho Chehab                # 1. it makes easier to debug issues not-parsed symbols;
237cde49466SMauro Carvalho Chehab                # 2. we want symbol replacement at the entire content, not
238cde49466SMauro Carvalho Chehab                #    just when the symbol is detected.
239cde49466SMauro Carvalho Chehab
240cde49466SMauro Carvalho Chehab                if is_enum:
241cde49466SMauro Carvalho Chehab                    match = re.match(r"^\s*([_\w][\w\d_]+)\s*[\,=]?", line)
242cde49466SMauro Carvalho Chehab                    if match:
243cde49466SMauro Carvalho Chehab                        self.store_type("symbol", match.group(1))
244cde49466SMauro Carvalho Chehab                    if "}" in line:
245cde49466SMauro Carvalho Chehab                        is_enum = False
246cde49466SMauro Carvalho Chehab                    continue
247cde49466SMauro Carvalho Chehab
248cde49466SMauro Carvalho Chehab                match = re.match(r"^\s*#\s*define\s+([\w_]+)\s+_IO", line)
249cde49466SMauro Carvalho Chehab                if match:
250cde49466SMauro Carvalho Chehab                    self.store_type("ioctl", match.group(1),
251cde49466SMauro Carvalho Chehab                                    replace_underscores=False)
252cde49466SMauro Carvalho Chehab                    continue
253cde49466SMauro Carvalho Chehab
254cde49466SMauro Carvalho Chehab                match = re.match(r"^\s*#\s*define\s+([\w_]+)(\s+|$)", line)
255cde49466SMauro Carvalho Chehab                if match:
256cde49466SMauro Carvalho Chehab                    self.store_type("define", match.group(1))
257cde49466SMauro Carvalho Chehab                    continue
258cde49466SMauro Carvalho Chehab
259cde49466SMauro Carvalho Chehab                match = re.match(r"^\s*typedef\s+([_\w][\w\d_]+)\s+(.*)\s+([_\w][\w\d_]+);",
260cde49466SMauro Carvalho Chehab                                 line)
261cde49466SMauro Carvalho Chehab                if match:
262cde49466SMauro Carvalho Chehab                    name = match.group(2).strip()
263cde49466SMauro Carvalho Chehab                    symbol = match.group(3)
264cde49466SMauro Carvalho Chehab                    self.store_type("typedef", symbol, ref_name=name)
265cde49466SMauro Carvalho Chehab                    continue
266cde49466SMauro Carvalho Chehab
267cde49466SMauro Carvalho Chehab                for re_enum in self.RE_ENUMS:
268cde49466SMauro Carvalho Chehab                    match = re_enum.match(line)
269cde49466SMauro Carvalho Chehab                    if match:
270cde49466SMauro Carvalho Chehab                        self.store_type("enum", match.group(1))
271cde49466SMauro Carvalho Chehab                        is_enum = True
272cde49466SMauro Carvalho Chehab                        break
273cde49466SMauro Carvalho Chehab
274cde49466SMauro Carvalho Chehab                for re_struct in self.RE_STRUCTS:
275cde49466SMauro Carvalho Chehab                    match = re_struct.match(line)
276cde49466SMauro Carvalho Chehab                    if match:
277cde49466SMauro Carvalho Chehab                        self.store_type("struct", match.group(1))
278cde49466SMauro Carvalho Chehab                        break
279cde49466SMauro Carvalho Chehab
280cde49466SMauro Carvalho Chehab    def process_exceptions(self, fname: str):
281cde49466SMauro Carvalho Chehab        """
282cde49466SMauro Carvalho Chehab        Process exceptions file with rules to ignore or replace references.
283cde49466SMauro Carvalho Chehab        """
284cde49466SMauro Carvalho Chehab        if not fname:
285cde49466SMauro Carvalho Chehab            return
286cde49466SMauro Carvalho Chehab
287cde49466SMauro Carvalho Chehab        name = os.path.basename(fname)
288cde49466SMauro Carvalho Chehab
289cde49466SMauro Carvalho Chehab        with open(fname, "r", encoding="utf-8", errors="backslashreplace") as f:
290cde49466SMauro Carvalho Chehab            for ln, line in enumerate(f):
291cde49466SMauro Carvalho Chehab                ln += 1
292cde49466SMauro Carvalho Chehab                line = line.strip()
293cde49466SMauro Carvalho Chehab                if not line or line.startswith("#"):
294cde49466SMauro Carvalho Chehab                    continue
295cde49466SMauro Carvalho Chehab
296cde49466SMauro Carvalho Chehab                # Handle ignore rules
297cde49466SMauro Carvalho Chehab                match = re.match(r"^ignore\s+(\w+)\s+(\S+)", line)
298cde49466SMauro Carvalho Chehab                if match:
299cde49466SMauro Carvalho Chehab                    c_type = match.group(1)
300cde49466SMauro Carvalho Chehab                    symbol = match.group(2)
301cde49466SMauro Carvalho Chehab
302cde49466SMauro Carvalho Chehab                    if c_type not in self.DEF_SYMBOL_TYPES:
303cde49466SMauro Carvalho Chehab                        sys.exit(f"{name}:{ln}: {c_type} is invalid")
304cde49466SMauro Carvalho Chehab
305cde49466SMauro Carvalho Chehab                    d = self.symbols[c_type]
306cde49466SMauro Carvalho Chehab                    if symbol in d:
307cde49466SMauro Carvalho Chehab                        del d[symbol]
308cde49466SMauro Carvalho Chehab
309cde49466SMauro Carvalho Chehab                    continue
310cde49466SMauro Carvalho Chehab
311cde49466SMauro Carvalho Chehab                # Handle replace rules
312cde49466SMauro Carvalho Chehab                match = re.match(r"^replace\s+(\S+)\s+(\S+)\s+(\S+)", line)
313cde49466SMauro Carvalho Chehab                if not match:
314cde49466SMauro Carvalho Chehab                    sys.exit(f"{name}:{ln}: invalid line: {line}")
315cde49466SMauro Carvalho Chehab
316cde49466SMauro Carvalho Chehab                c_type, old, new = match.groups()
317cde49466SMauro Carvalho Chehab
318cde49466SMauro Carvalho Chehab                if c_type not in self.DEF_SYMBOL_TYPES:
319cde49466SMauro Carvalho Chehab                    sys.exit(f"{name}:{ln}: {c_type} is invalid")
320cde49466SMauro Carvalho Chehab
321cde49466SMauro Carvalho Chehab                reftype = None
322cde49466SMauro Carvalho Chehab
323cde49466SMauro Carvalho Chehab                # Parse reference type when the type is specified
324cde49466SMauro Carvalho Chehab
325cde49466SMauro Carvalho Chehab                match = re.match(r"^\:c\:(data|func|macro|type)\:\`(.+)\`", new)
326cde49466SMauro Carvalho Chehab                if match:
327cde49466SMauro Carvalho Chehab                    reftype = f":c:{match.group(1)}"
328cde49466SMauro Carvalho Chehab                    new = match.group(2)
329cde49466SMauro Carvalho Chehab                else:
330cde49466SMauro Carvalho Chehab                    match = re.search(r"(\:ref)\:\`(.+)\`", new)
331cde49466SMauro Carvalho Chehab                    if match:
332cde49466SMauro Carvalho Chehab                        reftype = match.group(1)
333cde49466SMauro Carvalho Chehab                        new = match.group(2)
334cde49466SMauro Carvalho Chehab
335cde49466SMauro Carvalho Chehab                # If the replacement rule doesn't have a type, get default
336cde49466SMauro Carvalho Chehab                if not reftype:
337cde49466SMauro Carvalho Chehab                    reftype = self.DEF_SYMBOL_TYPES[c_type].get("ref_type")
338cde49466SMauro Carvalho Chehab                    if not reftype:
339cde49466SMauro Carvalho Chehab                        reftype = self.DEF_SYMBOL_TYPES[c_type].get("real_type")
340cde49466SMauro Carvalho Chehab
341cde49466SMauro Carvalho Chehab                new_ref = f"{reftype}:`{old} <{new}>`"
342cde49466SMauro Carvalho Chehab
343cde49466SMauro Carvalho Chehab                # Change self.symbols to use the replacement rule
344cde49466SMauro Carvalho Chehab                if old in self.symbols[c_type]:
345cde49466SMauro Carvalho Chehab                    self.symbols[c_type][old] = new_ref
346cde49466SMauro Carvalho Chehab                else:
347cde49466SMauro Carvalho Chehab                    print(f"{name}:{ln}: Warning: can't find {old} {c_type}")
348cde49466SMauro Carvalho Chehab
349cde49466SMauro Carvalho Chehab    def debug_print(self):
350cde49466SMauro Carvalho Chehab        """
351cde49466SMauro Carvalho Chehab        Print debug information containing the replacement rules per symbol.
352cde49466SMauro Carvalho Chehab        To make easier to check, group them per type.
353cde49466SMauro Carvalho Chehab        """
354cde49466SMauro Carvalho Chehab        if not self.debug:
355cde49466SMauro Carvalho Chehab            return
356cde49466SMauro Carvalho Chehab
357cde49466SMauro Carvalho Chehab        for c_type, refs in self.symbols.items():
358cde49466SMauro Carvalho Chehab            if not refs:  # Skip empty dictionaries
359cde49466SMauro Carvalho Chehab                continue
360cde49466SMauro Carvalho Chehab
361cde49466SMauro Carvalho Chehab            print(f"{c_type}:")
362cde49466SMauro Carvalho Chehab
363cde49466SMauro Carvalho Chehab            for symbol, ref in sorted(refs.items()):
364cde49466SMauro Carvalho Chehab                print(f"  {symbol} -> {ref}")
365cde49466SMauro Carvalho Chehab
366cde49466SMauro Carvalho Chehab            print()
367cde49466SMauro Carvalho Chehab
368*242cfe3fSMauro Carvalho Chehab    def gen_output(self):
369cde49466SMauro Carvalho Chehab        """Write the formatted output to a file."""
370cde49466SMauro Carvalho Chehab
371cde49466SMauro Carvalho Chehab        # Avoid extra blank lines
372cde49466SMauro Carvalho Chehab        text = re.sub(r"\s+$", "", self.data) + "\n"
373cde49466SMauro Carvalho Chehab        text = re.sub(r"\n\s+\n", "\n\n", text)
374cde49466SMauro Carvalho Chehab
375cde49466SMauro Carvalho Chehab        # Escape Sphinx special characters
376cde49466SMauro Carvalho Chehab        text = re.sub(r"([\_\`\*\<\>\&\\\\:\/\|\%\$\#\{\}\~\^])", r"\\\1", text)
377cde49466SMauro Carvalho Chehab
378cde49466SMauro Carvalho Chehab        # Source uAPI files may have special notes. Use bold font for them
379cde49466SMauro Carvalho Chehab        text = re.sub(r"DEPRECATED", "**DEPRECATED**", text)
380cde49466SMauro Carvalho Chehab
381cde49466SMauro Carvalho Chehab        # Delimiters to catch the entire symbol after escaped
382cde49466SMauro Carvalho Chehab        start_delim = r"([ \n\t\(=\*\@])"
383cde49466SMauro Carvalho Chehab        end_delim = r"(\s|,|\\=|\\:|\;|\)|\}|\{)"
384cde49466SMauro Carvalho Chehab
385cde49466SMauro Carvalho Chehab        # Process all reference types
386cde49466SMauro Carvalho Chehab        for ref_dict in self.symbols.values():
387cde49466SMauro Carvalho Chehab            for symbol, replacement in ref_dict.items():
388cde49466SMauro Carvalho Chehab                symbol = re.escape(re.sub(r"([\_\`\*\<\>\&\\\\:\/])", r"\\\1", symbol))
389cde49466SMauro Carvalho Chehab                text = re.sub(fr'{start_delim}{symbol}{end_delim}',
390cde49466SMauro Carvalho Chehab                              fr'\1{replacement}\2', text)
391cde49466SMauro Carvalho Chehab
392cde49466SMauro Carvalho Chehab        # Remove "\ " where not needed: before spaces and at the end of lines
393cde49466SMauro Carvalho Chehab        text = re.sub(r"\\ ([\n ])", r"\1", text)
394cde49466SMauro Carvalho Chehab        text = re.sub(r" \\ ", " ", text)
395cde49466SMauro Carvalho Chehab
396*242cfe3fSMauro Carvalho Chehab        return text
397cde49466SMauro Carvalho Chehab
398*242cfe3fSMauro Carvalho Chehab    def gen_toc(self):
399*242cfe3fSMauro Carvalho Chehab        """
400*242cfe3fSMauro Carvalho Chehab        Create a TOC table pointing to each symbol from the header
401*242cfe3fSMauro Carvalho Chehab        """
402*242cfe3fSMauro Carvalho Chehab        text = []
403*242cfe3fSMauro Carvalho Chehab
404*242cfe3fSMauro Carvalho Chehab        # Add header
405*242cfe3fSMauro Carvalho Chehab        text.append(".. contents:: Table of Contents")
406*242cfe3fSMauro Carvalho Chehab        text.append("   :depth: 2")
407*242cfe3fSMauro Carvalho Chehab        text.append("   :local:")
408*242cfe3fSMauro Carvalho Chehab        text.append("")
409*242cfe3fSMauro Carvalho Chehab
410*242cfe3fSMauro Carvalho Chehab        # Sort symbol types per description
411*242cfe3fSMauro Carvalho Chehab        symbol_descriptions = []
412*242cfe3fSMauro Carvalho Chehab        for k, v in self.DEF_SYMBOL_TYPES.items():
413*242cfe3fSMauro Carvalho Chehab            symbol_descriptions.append((v['description'], k))
414*242cfe3fSMauro Carvalho Chehab
415*242cfe3fSMauro Carvalho Chehab        symbol_descriptions.sort()
416*242cfe3fSMauro Carvalho Chehab
417*242cfe3fSMauro Carvalho Chehab        # Process each category
418*242cfe3fSMauro Carvalho Chehab        for description, c_type in symbol_descriptions:
419*242cfe3fSMauro Carvalho Chehab
420*242cfe3fSMauro Carvalho Chehab            refs = self.symbols[c_type]
421*242cfe3fSMauro Carvalho Chehab            if not refs:  # Skip empty categories
422*242cfe3fSMauro Carvalho Chehab                continue
423*242cfe3fSMauro Carvalho Chehab
424*242cfe3fSMauro Carvalho Chehab            text.append(f"{description}")
425*242cfe3fSMauro Carvalho Chehab            text.append("-" * len(description))
426*242cfe3fSMauro Carvalho Chehab            text.append("")
427*242cfe3fSMauro Carvalho Chehab
428*242cfe3fSMauro Carvalho Chehab            # Sort symbols alphabetically
429*242cfe3fSMauro Carvalho Chehab            for symbol, ref in sorted(refs.items()):
430*242cfe3fSMauro Carvalho Chehab                text.append(f"* :{ref}:")
431*242cfe3fSMauro Carvalho Chehab
432*242cfe3fSMauro Carvalho Chehab            text.append("")  # Add empty line between categories
433*242cfe3fSMauro Carvalho Chehab
434*242cfe3fSMauro Carvalho Chehab        return "\n".join(text)
435*242cfe3fSMauro Carvalho Chehab
436*242cfe3fSMauro Carvalho Chehab    def write_output(self, file_in: str, file_out: str, toc: bool):
437cde49466SMauro Carvalho Chehab        title = os.path.basename(file_in)
438cde49466SMauro Carvalho Chehab
439*242cfe3fSMauro Carvalho Chehab        if toc:
440*242cfe3fSMauro Carvalho Chehab            text = self.gen_toc()
441*242cfe3fSMauro Carvalho Chehab        else:
442*242cfe3fSMauro Carvalho Chehab            text = self.gen_output()
443*242cfe3fSMauro Carvalho Chehab
444cde49466SMauro Carvalho Chehab        with open(file_out, "w", encoding="utf-8", errors="backslashreplace") as f:
445cde49466SMauro Carvalho Chehab            f.write(".. -*- coding: utf-8; mode: rst -*-\n\n")
446cde49466SMauro Carvalho Chehab            f.write(f"{title}\n")
447*242cfe3fSMauro Carvalho Chehab            f.write("=" * len(title) + "\n\n")
448*242cfe3fSMauro Carvalho Chehab
449*242cfe3fSMauro Carvalho Chehab            if not toc:
450*242cfe3fSMauro Carvalho Chehab                f.write(".. parsed-literal::\n\n")
451*242cfe3fSMauro Carvalho Chehab
452cde49466SMauro Carvalho Chehab            f.write(text)
453