xref: /linux/scripts/lib/abi/abi_parser.py (revision 6b48bea16848dd7c771411db3dcc01b3bc4dd4c2)
1#!/usr/bin/env python3
2# pylint: disable=R0902,R0903,R0911,R0912,R0913,R0914,R0915,R0917,C0302
3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
4# SPDX-License-Identifier: GPL-2.0
5
6"""
7Parse ABI documentation and produce results from it.
8"""
9
10from argparse import Namespace
11import logging
12import os
13import re
14
15from glob import glob
16from pprint import pformat
17from random import randrange, seed
18
19# Import Python modules
20
21from helpers import AbiDebug, ABI_DIR
22
23
24class AbiParser:
25    """Main class to parse ABI files"""
26
27    TAGS = r"(what|where|date|kernelversion|contact|description|users)"
28    XREF = r"(?:^|\s|\()(\/(?:sys|config|proc|dev|kvd)\/[^,.:;\)\s]+)(?:[,.:;\)\s]|\Z)"
29
30    def __init__(self, directory, logger=None,
31                 enable_lineno=False, show_warnings=True, debug=0):
32        """Stores arguments for the class and initialize class vars"""
33
34        self.directory = directory
35        self.enable_lineno = enable_lineno
36        self.show_warnings = show_warnings
37        self.debug = debug
38
39        if not logger:
40            self.log = logging.getLogger("get_abi")
41        else:
42            self.log = logger
43
44        self.data = {}
45        self.what_symbols = {}
46        self.file_refs = {}
47        self.what_refs = {}
48
49        # Regular expressions used on parser
50        self.re_tag = re.compile(r"(\S+)(:\s*)(.*)", re.I)
51        self.re_valid = re.compile(self.TAGS)
52        self.re_start_spc = re.compile(r"(\s*)(\S.*)")
53        self.re_whitespace = re.compile(r"^\s+")
54
55        # Regular used on print
56        self.re_what = re.compile(r"(\/?(?:[\w\-]+\/?){1,2})")
57        self.re_escape = re.compile(r"([\.\x01-\x08\x0e-\x1f\x21-\x2f\x3a-\x40\x7b-\xff])")
58        self.re_unprintable = re.compile(r"([\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\xff]+)")
59        self.re_title_mark = re.compile(r"\n[\-\*\=\^\~]+\n")
60        self.re_doc = re.compile(r"Documentation/(?!devicetree)(\S+)\.rst")
61        self.re_abi = re.compile(r"(Documentation/ABI/)([\w\/\-]+)")
62        self.re_xref_node = re.compile(self.XREF)
63
64    def warn(self, fdata, msg, extra=None):
65        """Displays a parse error if warning is enabled"""
66
67        if not self.show_warnings:
68            return
69
70        msg = f"{fdata.fname}:{fdata.ln}: {msg}"
71        if extra:
72            msg += "\n\t\t" + extra
73
74        self.log.warning(msg)
75
76    def add_symbol(self, what, fname, ln=None, xref=None):
77        """Create a reference table describing where each 'what' is located"""
78
79        if what not in self.what_symbols:
80            self.what_symbols[what] = {"file": {}}
81
82        if fname not in self.what_symbols[what]["file"]:
83            self.what_symbols[what]["file"][fname] = []
84
85        if ln and ln not in self.what_symbols[what]["file"][fname]:
86            self.what_symbols[what]["file"][fname].append(ln)
87
88        if xref:
89            self.what_symbols[what]["xref"] = xref
90
91    def _parse_line(self, fdata, line):
92        """Parse a single line of an ABI file"""
93
94        new_what = False
95        new_tag = False
96        content = None
97
98        match = self.re_tag.match(line)
99        if match:
100            new = match.group(1).lower()
101            sep = match.group(2)
102            content = match.group(3)
103
104            match = self.re_valid.search(new)
105            if match:
106                new_tag = match.group(1)
107            else:
108                if fdata.tag == "description":
109                    # New "tag" is actually part of description.
110                    # Don't consider it a tag
111                    new_tag = False
112                elif fdata.tag != "":
113                    self.warn(fdata, f"tag '{fdata.tag}' is invalid", line)
114
115        if new_tag:
116            # "where" is Invalid, but was a common mistake. Warn if found
117            if new_tag == "where":
118                self.warn(fdata, "tag 'Where' is invalid. Should be 'What:' instead")
119                new_tag = "what"
120
121            if new_tag == "what":
122                fdata.space = None
123
124                if content not in self.what_symbols:
125                    self.add_symbol(what=content, fname=fdata.fname, ln=fdata.ln)
126
127                if fdata.tag == "what":
128                    fdata.what.append(content.strip("\n"))
129                else:
130                    if fdata.key:
131                        if "description" not in self.data.get(fdata.key, {}):
132                            self.warn(fdata, f"{fdata.key} doesn't have a description")
133
134                        for w in fdata.what:
135                            self.add_symbol(what=w, fname=fdata.fname,
136                                            ln=fdata.what_ln, xref=fdata.key)
137
138                    fdata.label = content
139                    new_what = True
140
141                    key = "abi_" + content.lower()
142                    fdata.key = self.re_unprintable.sub("_", key).strip("_")
143
144                    # Avoid duplicated keys but using a defined seed, to make
145                    # the namespace identical if there aren't changes at the
146                    # ABI symbols
147                    seed(42)
148
149                    while fdata.key in self.data:
150                        char = randrange(0, 51) + ord("A")
151                        if char > ord("Z"):
152                            char += ord("a") - ord("Z") - 1
153
154                        fdata.key += chr(char)
155
156                    if fdata.key and fdata.key not in self.data:
157                        self.data[fdata.key] = {
158                            "what": [content],
159                            "file": [fdata.file_ref],
160                            "line_no": fdata.ln,
161                        }
162
163                    fdata.what = self.data[fdata.key]["what"]
164
165                self.what_refs[content] = fdata.key
166                fdata.tag = new_tag
167                fdata.what_ln = fdata.ln
168
169                if fdata.nametag["what"]:
170                    t = (content, fdata.key)
171                    if t not in fdata.nametag["symbols"]:
172                        fdata.nametag["symbols"].append(t)
173
174                return
175
176            if fdata.tag and new_tag:
177                fdata.tag = new_tag
178
179                if new_what:
180                    fdata.label = ""
181
182                    self.data[fdata.key]["type"] = fdata.ftype
183
184                    if "description" in self.data[fdata.key]:
185                        self.data[fdata.key]["description"] += "\n\n"
186
187                    if fdata.file_ref not in self.data[fdata.key]["file"]:
188                        self.data[fdata.key]["file"].append(fdata.file_ref)
189
190                    if self.debug == AbiDebug.WHAT_PARSING:
191                        self.log.debug("what: %s", fdata.what)
192
193                if not fdata.what:
194                    self.warn(fdata, "'What:' should come first:", line)
195                    return
196
197                if new_tag == "description":
198                    fdata.space = None
199
200                    if content:
201                        sep = sep.replace(":", " ")
202
203                        c = " " * len(new_tag) + sep + content
204                        c = c.expandtabs()
205
206                        match = self.re_start_spc.match(c)
207                        if match:
208                            # Preserve initial spaces for the first line
209                            fdata.space = match.group(1)
210                            content = match.group(2) + "\n"
211
212                self.data[fdata.key][fdata.tag] = content
213
214            return
215
216        # Store any contents before tags at the database
217        if not fdata.tag and "what" in fdata.nametag:
218            fdata.nametag["description"] += line
219            return
220
221        if fdata.tag == "description":
222            content = line.expandtabs()
223
224            if self.re_whitespace.sub("", content) == "":
225                self.data[fdata.key][fdata.tag] += "\n"
226                return
227
228            if fdata.space is None:
229                match = self.re_start_spc.match(content)
230                if match:
231                    # Preserve initial spaces for the first line
232                    fdata.space = match.group(1)
233
234                    content = match.group(2) + "\n"
235            else:
236                if content.startswith(fdata.space):
237                    content = content[len(fdata.space):]
238
239                else:
240                    fdata.space = ""
241
242            if fdata.tag == "what":
243                w = content.strip("\n")
244                if w:
245                    self.data[fdata.key][fdata.tag].append(w)
246            else:
247                self.data[fdata.key][fdata.tag] += content
248            return
249
250        content = line.strip()
251        if fdata.tag:
252            if fdata.tag == "what":
253                w = content.strip("\n")
254                if w:
255                    self.data[fdata.key][fdata.tag].append(w)
256            else:
257                self.data[fdata.key][fdata.tag] += "\n" + content.rstrip("\n")
258            return
259
260        # Everything else is error
261        if content:
262            self.warn(fdata, "Unexpected content", line)
263
264    def parse_file(self, fname, path, basename):
265        """Parse a single file"""
266
267        ref = f"abi_file_{path}_{basename}"
268        ref = self.re_unprintable.sub("_", ref).strip("_")
269
270        # Store per-file state into a namespace variable. This will be used
271        # by the per-line parser state machine and by the warning function.
272        fdata = Namespace
273
274        fdata.fname = fname
275        fdata.name = basename
276
277        pos = fname.find(ABI_DIR)
278        if pos > 0:
279            f = fname[pos:]
280        else:
281            f = fname
282
283        fdata.file_ref = (f, ref)
284        self.file_refs[f] = ref
285
286        fdata.ln = 0
287        fdata.what_ln = 0
288        fdata.tag = ""
289        fdata.label = ""
290        fdata.what = []
291        fdata.key = None
292        fdata.xrefs = None
293        fdata.space = None
294        fdata.ftype = path.split("/")[0]
295
296        fdata.nametag = {}
297        fdata.nametag["what"] = [f"File {path}/{basename}"]
298        fdata.nametag["type"] = "File"
299        fdata.nametag["file"] = [fdata.file_ref]
300        fdata.nametag["line_no"] = 1
301        fdata.nametag["description"] = ""
302        fdata.nametag["symbols"] = []
303
304        self.data[ref] = fdata.nametag
305
306        if self.debug & AbiDebug.WHAT_OPEN:
307            self.log.debug("Opening file %s", fname)
308
309        with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp:
310            for line in fp:
311                fdata.ln += 1
312
313                self._parse_line(fdata, line)
314
315            if "description" in fdata.nametag:
316                fdata.nametag["description"] = fdata.nametag["description"].lstrip("\n")
317
318            if fdata.key:
319                if "description" not in self.data.get(fdata.key, {}):
320                    self.warn(fdata, f"{fdata.key} doesn't have a description")
321
322                for w in fdata.what:
323                    self.add_symbol(what=w, fname=fname, xref=fdata.key)
324
325    def parse_abi(self):
326        """Parse documentation ABI"""
327
328        ignore_suffixes = ("rej", "org", "orig", "bak", "~")
329        re_abi = re.compile(r".*" + ABI_DIR)
330
331        for fname in glob(os.path.join(self.directory, "**"), recursive=True):
332            if os.path.isdir(fname):
333                continue
334
335            basename = os.path.basename(fname)
336
337            if basename == "README":
338                continue
339            if basename.startswith(".") or basename.endswith(ignore_suffixes):
340                continue
341
342            path = re_abi.sub("", os.path.dirname(fname))
343
344            self.parse_file(fname, path, basename)
345
346        if self.debug & AbiDebug.DUMP_ABI_STRUCTS:
347            self.log.debug(pformat(self.data))
348
349    def print_desc_txt(self, desc):
350        """Print description as found inside ABI files"""
351
352        desc = desc.strip(" \t\n")
353
354        print(desc + "\n")
355
356    def print_desc_rst(self, desc):
357        """Enrich ReST output by creating cross-references"""
358
359        # Remove title markups from the description
360        # Having titles inside ABI files will only work if extra
361        # care would be taken in order to strictly follow the same
362        # level order for each markup.
363        desc = self.re_title_mark.sub("\n\n", "\n" + desc)
364        desc = desc.rstrip(" \t\n").lstrip("\n")
365
366        # Python's regex performance for non-compiled expressions is a lot
367        # than Perl, as Perl automatically caches them at their
368        # first usage. Here, we'll need to do the same, as otherwise the
369        # performance penalty is be high
370
371        new_desc = ""
372        for d in desc.split("\n"):
373            if d == "":
374                new_desc += "\n"
375                continue
376
377            # Use cross-references for doc files where needed
378            d = self.re_doc.sub(r":doc:`/\1`", d)
379
380            # Use cross-references for ABI generated docs where needed
381            matches = self.re_abi.findall(d)
382            for m in matches:
383                abi = m[0] + m[1]
384
385                xref = self.file_refs.get(abi)
386                if not xref:
387                    # This may happen if ABI is on a separate directory,
388                    # like parsing ABI testing and symbol is at stable.
389                    # The proper solution is to move this part of the code
390                    # for it to be inside sphinx/kernel_abi.py
391                    self.log.info("Didn't find ABI reference for '%s'", abi)
392                else:
393                    new = self.re_escape.sub(r"\\\1", m[1])
394                    d = re.sub(fr"\b{abi}\b", f":ref:`{new} <{xref}>`", d)
395
396            # Seek for cross reference symbols like /sys/...
397            # Need to be careful to avoid doing it on a code block
398            if d[0] not in [" ", "\t"]:
399                matches = self.re_xref_node.findall(d)
400                for m in matches:
401                    # Finding ABI here is more complex due to wildcards
402                    xref = self.what_refs.get(m)
403                    if xref:
404                        new = self.re_escape.sub(r"\\\1", m)
405                        d = re.sub(fr"\b{m}\b", f":ref:`{new} <{xref}>`", d)
406
407            new_desc += d + "\n"
408
409        print(new_desc + "\n")
410
411    def print_data(self, enable_lineno, output_in_txt, show_file=False):
412        """Print ABI at stdout"""
413
414        part = None
415        for key, v in sorted(self.data.items(),
416                             key=lambda x: (x[1].get("type", ""),
417                                            x[1].get("what"))):
418
419            wtype = v.get("type", "Var")
420            file_ref = v.get("file")
421            names = v.get("what", [""])
422
423            if not show_file and wtype == "File":
424                continue
425
426            if enable_lineno:
427                ln = v.get("line_no", 1)
428                print(f".. LINENO {file_ref[0][0]}#{ln}\n")
429
430            if wtype != "File":
431                cur_part = names[0]
432                if cur_part.find("/") >= 0:
433                    match = self.re_what.match(cur_part)
434                    if match:
435                        symbol = match.group(1).rstrip("/")
436                        cur_part = "Symbols under " + symbol
437
438                if cur_part and cur_part != part:
439                    part = cur_part
440                    print(f"{part}\n{"-" * len(part)}\n")
441
442                print(f".. _{key}:\n")
443
444                max_len = 0
445                for i in range(0, len(names)):           # pylint: disable=C0200
446                    names[i] = "**" + self.re_escape.sub(r"\\\1", names[i]) + "**"
447
448                    max_len = max(max_len, len(names[i]))
449
450                print("+-" + "-" * max_len + "-+")
451                for name in names:
452                    print(f"| {name}" + " " * (max_len - len(name)) + " |")
453                    print("+-" + "-" * max_len + "-+")
454                print()
455
456            for ref in file_ref:
457                if wtype == "File":
458                    print(f".. _{ref[1]}:\n")
459                else:
460                    base = os.path.basename(ref[0])
461                    print(f"Defined on file :ref:`{base} <{ref[1]}>`\n")
462
463            if wtype == "File":
464                print(f"{names[0]}\n{"-" * len(names[0])}\n")
465
466            desc = v.get("description")
467            if not desc and wtype != "File":
468                print(f"DESCRIPTION MISSING for {names[0]}\n")
469
470            if desc:
471                if output_in_txt:
472                    self.print_desc_txt(desc)
473                else:
474                    self.print_desc_rst(desc)
475
476            symbols = v.get("symbols")
477            if symbols:
478                print("Has the following ABI:\n")
479
480                for w, label in symbols:
481                    # Escape special chars from content
482                    content = self.re_escape.sub(r"\\\1", w)
483
484                    print(f"- :ref:`{content} <{label}>`\n")
485
486            users = v.get("users")
487            if users and users.strip(" \t\n"):
488                print(f"Users:\n\t{users.strip("\n").replace('\n', '\n\t')}\n")
489
490    def check_issues(self):
491        """Warn about duplicated ABI entries"""
492
493        for what, v in self.what_symbols.items():
494            files = v.get("file")
495            if not files:
496                # Should never happen if the parser works properly
497                self.log.warning("%s doesn't have a file associated", what)
498                continue
499
500            if len(files) == 1:
501                continue
502
503            f = []
504            for fname, lines in sorted(files.items()):
505                if not lines:
506                    f.append(f"{fname}")
507                elif len(lines) == 1:
508                    f.append(f"{fname}:{lines[0]}")
509                else:
510                    f.append(f"{fname} lines {", ".join(str(x) for x in lines)}")
511
512            self.log.warning("%s is defined %d times: %s", what, len(f), "; ".join(f))
513
514    def search_symbols(self, expr):
515        """ Searches for ABI symbols """
516
517        regex = re.compile(expr, re.I)
518
519        found_keys = 0
520        for t in sorted(self.data.items(), key=lambda x: [0]):
521            v = t[1]
522
523            wtype = v.get("type", "")
524            if wtype == "File":
525                continue
526
527            for what in v.get("what", [""]):
528                if regex.search(what):
529                    found_keys += 1
530
531                    kernelversion = v.get("kernelversion", "").strip(" \t\n")
532                    date = v.get("date", "").strip(" \t\n")
533                    contact = v.get("contact", "").strip(" \t\n")
534                    users = v.get("users", "").strip(" \t\n")
535                    desc = v.get("description", "").strip(" \t\n")
536
537                    files = []
538                    for f in v.get("file", ()):
539                        files.append(f[0])
540
541                    what = str(found_keys) + ". " + what
542                    title_tag = "-" * len(what)
543
544                    print(f"\n{what}\n{title_tag}\n")
545
546                    if kernelversion:
547                        print(f"Kernel version:\t\t{kernelversion}")
548
549                    if date:
550                        print(f"Date:\t\t\t{date}")
551
552                    if contact:
553                        print(f"Contact:\t\t{contact}")
554
555                    if users:
556                        print(f"Users:\t\t\t{users}")
557
558                    print(f"Defined on file{'s'[:len(files) ^ 1]}:\t{", ".join(files)}")
559
560                    if desc:
561                        print(f"\n{desc.strip("\n")}\n")
562
563        if not found_keys:
564            print(f"Regular expression /{expr}/ not found.")
565