xref: /linux/tools/lib/python/abi/abi_parser.py (revision 23b0f90ba871f096474e1c27c3d14f455189d2d9)
1#!/usr/bin/env python3
2# pylint: disable=R0902,R0903,R0911,R0912,R0913,R0914,R0915,R0917,C0302
3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
4# SPDX-License-Identifier: GPL-2.0
5
6"""
7Parse ABI documentation and produce results from it.
8"""
9
10from argparse import Namespace
11import logging
12import os
13import re
14
15from pprint import pformat
16from random import randrange, seed
17
18# Import Python modules
19
20from abi.helpers import AbiDebug, ABI_DIR
21
22
23class AbiParser:
24    """Main class to parse ABI files."""
25
26    #: Valid tags at Documentation/ABI.
27    TAGS = r"(what|where|date|kernelversion|contact|description|users)"
28
29    #: ABI elements that will auto-generate cross-references.
30    XREF = r"(?:^|\s|\()(\/(?:sys|config|proc|dev|kvd)\/[^,.:;\)\s]+)(?:[,.:;\)\s]|\Z)"
31
32    def __init__(self, directory, logger=None,
33                 enable_lineno=False, show_warnings=True, debug=0):
34        """Stores arguments for the class and initialize class vars."""
35
36        self.directory = directory
37        self.enable_lineno = enable_lineno
38        self.show_warnings = show_warnings
39        self.debug = debug
40
41        if not logger:
42            self.log = logging.getLogger("get_abi")
43        else:
44            self.log = logger
45
46        self.data = {}
47        self.what_symbols = {}
48        self.file_refs = {}
49        self.what_refs = {}
50
51        # Ignore files that contain such suffixes
52        self.ignore_suffixes = (".rej", ".org", ".orig", ".bak", "~")
53
54        # Regular expressions used on parser
55        self.re_abi_dir = re.compile(r"(.*)" + ABI_DIR)
56        self.re_tag = re.compile(r"(\S+)(:\s*)(.*)", re.I)
57        self.re_valid = re.compile(self.TAGS)
58        self.re_start_spc = re.compile(r"(\s*)(\S.*)")
59        self.re_whitespace = re.compile(r"^\s+")
60
61        # Regular used on print
62        self.re_what = re.compile(r"(\/?(?:[\w\-]+\/?){1,2})")
63        self.re_escape = re.compile(r"([\.\x01-\x08\x0e-\x1f\x21-\x2f\x3a-\x40\x7b-\xff])")
64        self.re_unprintable = re.compile(r"([\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\xff]+)")
65        self.re_title_mark = re.compile(r"\n[\-\*\=\^\~]+\n")
66        self.re_doc = re.compile(r"Documentation/(?!devicetree)(\S+)\.rst")
67        self.re_abi = re.compile(r"(Documentation/ABI/)([\w\/\-]+)")
68        self.re_xref_node = re.compile(self.XREF)
69
70    def warn(self, fdata, msg, extra=None):
71        """Displays a parse error if warning is enabled."""
72
73        if not self.show_warnings:
74            return
75
76        msg = f"{fdata.fname}:{fdata.ln}: {msg}"
77        if extra:
78            msg += "\n\t\t" + extra
79
80        self.log.warning(msg)
81
82    def add_symbol(self, what, fname, ln=None, xref=None):
83        """Create a reference table describing where each 'what' is located."""
84
85        if what not in self.what_symbols:
86            self.what_symbols[what] = {"file": {}}
87
88        if fname not in self.what_symbols[what]["file"]:
89            self.what_symbols[what]["file"][fname] = []
90
91        if ln and ln not in self.what_symbols[what]["file"][fname]:
92            self.what_symbols[what]["file"][fname].append(ln)
93
94        if xref:
95            self.what_symbols[what]["xref"] = xref
96
97    def _parse_line(self, fdata, line):
98        """Parse a single line of an ABI file."""
99
100        new_what = False
101        new_tag = False
102        content = None
103
104        match = self.re_tag.match(line)
105        if match:
106            new = match.group(1).lower()
107            sep = match.group(2)
108            content = match.group(3)
109
110            match = self.re_valid.search(new)
111            if match:
112                new_tag = match.group(1)
113            else:
114                if fdata.tag == "description":
115                    # New "tag" is actually part of description.
116                    # Don't consider it a tag
117                    new_tag = False
118                elif fdata.tag != "":
119                    self.warn(fdata, f"tag '{fdata.tag}' is invalid", line)
120
121        if new_tag:
122            # "where" is Invalid, but was a common mistake. Warn if found
123            if new_tag == "where":
124                self.warn(fdata, "tag 'Where' is invalid. Should be 'What:' instead")
125                new_tag = "what"
126
127            if new_tag == "what":
128                fdata.space = None
129
130                if content not in self.what_symbols:
131                    self.add_symbol(what=content, fname=fdata.fname, ln=fdata.ln)
132
133                if fdata.tag == "what":
134                    fdata.what.append(content.strip("\n"))
135                else:
136                    if fdata.key:
137                        if "description" not in self.data.get(fdata.key, {}):
138                            self.warn(fdata, f"{fdata.key} doesn't have a description")
139
140                        for w in fdata.what:
141                            self.add_symbol(what=w, fname=fdata.fname,
142                                            ln=fdata.what_ln, xref=fdata.key)
143
144                    fdata.label = content
145                    new_what = True
146
147                    key = "abi_" + content.lower()
148                    fdata.key = self.re_unprintable.sub("_", key).strip("_")
149
150                    # Avoid duplicated keys but using a defined seed, to make
151                    # the namespace identical if there aren't changes at the
152                    # ABI symbols
153                    seed(42)
154
155                    while fdata.key in self.data:
156                        char = randrange(0, 51) + ord("A")
157                        if char > ord("Z"):
158                            char += ord("a") - ord("Z") - 1
159
160                        fdata.key += chr(char)
161
162                    if fdata.key and fdata.key not in self.data:
163                        self.data[fdata.key] = {
164                            "what": [content],
165                            "file": [fdata.file_ref],
166                            "path": fdata.ftype,
167                            "line_no": fdata.ln,
168                        }
169
170                    fdata.what = self.data[fdata.key]["what"]
171
172                self.what_refs[content] = fdata.key
173                fdata.tag = new_tag
174                fdata.what_ln = fdata.ln
175
176                if fdata.nametag["what"]:
177                    t = (content, fdata.key)
178                    if t not in fdata.nametag["symbols"]:
179                        fdata.nametag["symbols"].append(t)
180
181                return
182
183            if fdata.tag and new_tag:
184                fdata.tag = new_tag
185
186                if new_what:
187                    fdata.label = ""
188
189                    if "description" in self.data[fdata.key]:
190                        self.data[fdata.key]["description"] += "\n\n"
191
192                    if fdata.file_ref not in self.data[fdata.key]["file"]:
193                        self.data[fdata.key]["file"].append(fdata.file_ref)
194
195                    if self.debug == AbiDebug.WHAT_PARSING:
196                        self.log.debug("what: %s", fdata.what)
197
198                if not fdata.what:
199                    self.warn(fdata, "'What:' should come first:", line)
200                    return
201
202                if new_tag == "description":
203                    fdata.space = None
204
205                    if content:
206                        sep = sep.replace(":", " ")
207
208                        c = " " * len(new_tag) + sep + content
209                        c = c.expandtabs()
210
211                        match = self.re_start_spc.match(c)
212                        if match:
213                            # Preserve initial spaces for the first line
214                            fdata.space = match.group(1)
215                            content = match.group(2) + "\n"
216
217                self.data[fdata.key][fdata.tag] = content
218
219            return
220
221        # Store any contents before tags at the database
222        if not fdata.tag and "what" in fdata.nametag:
223            fdata.nametag["description"] += line
224            return
225
226        if fdata.tag == "description":
227            content = line.expandtabs()
228
229            if self.re_whitespace.sub("", content) == "":
230                self.data[fdata.key][fdata.tag] += "\n"
231                return
232
233            if fdata.space is None:
234                match = self.re_start_spc.match(content)
235                if match:
236                    # Preserve initial spaces for the first line
237                    fdata.space = match.group(1)
238
239                    content = match.group(2) + "\n"
240            else:
241                if content.startswith(fdata.space):
242                    content = content[len(fdata.space):]
243
244                else:
245                    fdata.space = ""
246
247            if fdata.tag == "what":
248                w = content.strip("\n")
249                if w:
250                    self.data[fdata.key][fdata.tag].append(w)
251            else:
252                self.data[fdata.key][fdata.tag] += content
253            return
254
255        content = line.strip()
256        if fdata.tag:
257            if fdata.tag == "what":
258                w = content.strip("\n")
259                if w:
260                    self.data[fdata.key][fdata.tag].append(w)
261            else:
262                self.data[fdata.key][fdata.tag] += "\n" + content.rstrip("\n")
263            return
264
265        # Everything else is error
266        if content:
267            self.warn(fdata, "Unexpected content", line)
268
269    def parse_readme(self, nametag, fname):
270        """Parse ABI README file."""
271
272        nametag["what"] = ["Introduction"]
273        nametag["path"] = "README"
274        with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp:
275            for line in fp:
276                match = self.re_tag.match(line)
277                if match:
278                    new = match.group(1).lower()
279
280                    match = self.re_valid.search(new)
281                    if match:
282                        nametag["description"] += "\n:" + line
283                        continue
284
285                nametag["description"] += line
286
287    def parse_file(self, fname, path, basename):
288        """Parse a single file."""
289
290        ref = f"abi_file_{path}_{basename}"
291        ref = self.re_unprintable.sub("_", ref).strip("_")
292
293        # Store per-file state into a namespace variable. This will be used
294        # by the per-line parser state machine and by the warning function.
295        fdata = Namespace
296
297        fdata.fname = fname
298        fdata.name = basename
299
300        pos = fname.find(ABI_DIR)
301        if pos > 0:
302            f = fname[pos:]
303        else:
304            f = fname
305
306        fdata.file_ref = (f, ref)
307        self.file_refs[f] = ref
308
309        fdata.ln = 0
310        fdata.what_ln = 0
311        fdata.tag = ""
312        fdata.label = ""
313        fdata.what = []
314        fdata.key = None
315        fdata.xrefs = None
316        fdata.space = None
317        fdata.ftype = path.split("/")[0]
318
319        fdata.nametag = {}
320        fdata.nametag["what"] = [f"ABI file {path}/{basename}"]
321        fdata.nametag["type"] = "File"
322        fdata.nametag["path"] = fdata.ftype
323        fdata.nametag["file"] = [fdata.file_ref]
324        fdata.nametag["line_no"] = 1
325        fdata.nametag["description"] = ""
326        fdata.nametag["symbols"] = []
327
328        self.data[ref] = fdata.nametag
329
330        if self.debug & AbiDebug.WHAT_OPEN:
331            self.log.debug("Opening file %s", fname)
332
333        if basename == "README":
334            self.parse_readme(fdata.nametag, fname)
335            return
336
337        with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp:
338            for line in fp:
339                fdata.ln += 1
340
341                self._parse_line(fdata, line)
342
343            if "description" in fdata.nametag:
344                fdata.nametag["description"] = fdata.nametag["description"].lstrip("\n")
345
346            if fdata.key:
347                if "description" not in self.data.get(fdata.key, {}):
348                    self.warn(fdata, f"{fdata.key} doesn't have a description")
349
350                for w in fdata.what:
351                    self.add_symbol(what=w, fname=fname, xref=fdata.key)
352
353    def _parse_abi(self, root=None):
354        """Internal function to parse documentation ABI recursively."""
355
356        if not root:
357            root = self.directory
358
359        with os.scandir(root) as obj:
360            for entry in obj:
361                name = os.path.join(root, entry.name)
362
363                if entry.is_dir():
364                    self._parse_abi(name)
365                    continue
366
367                if not entry.is_file():
368                    continue
369
370                basename = os.path.basename(name)
371
372                if basename.startswith("."):
373                    continue
374
375                if basename.endswith(self.ignore_suffixes):
376                    continue
377
378                path = self.re_abi_dir.sub("", os.path.dirname(name))
379
380                self.parse_file(name, path, basename)
381
382    def parse_abi(self, root=None):
383        """Parse documentation ABI."""
384
385        self._parse_abi(root)
386
387        if self.debug & AbiDebug.DUMP_ABI_STRUCTS:
388            self.log.debug(pformat(self.data))
389
390    def desc_txt(self, desc):
391        """Print description as found inside ABI files."""
392
393        desc = desc.strip(" \t\n")
394
395        return desc + "\n\n"
396
397    def xref(self, fname):
398        """
399        Converts a Documentation/ABI + basename into a ReST cross-reference.
400        """
401
402        xref = self.file_refs.get(fname)
403        if not xref:
404            return None
405        else:
406            return xref
407
408    def desc_rst(self, desc):
409        """Enrich ReST output by creating cross-references."""
410
411        # Remove title markups from the description
412        # Having titles inside ABI files will only work if extra
413        # care would be taken in order to strictly follow the same
414        # level order for each markup.
415        desc = self.re_title_mark.sub("\n\n", "\n" + desc)
416        desc = desc.rstrip(" \t\n").lstrip("\n")
417
418        # Python's regex performance for non-compiled expressions is a lot
419        # than Perl, as Perl automatically caches them at their
420        # first usage. Here, we'll need to do the same, as otherwise the
421        # performance penalty is be high
422
423        new_desc = ""
424        for d in desc.split("\n"):
425            if d == "":
426                new_desc += "\n"
427                continue
428
429            # Use cross-references for doc files where needed
430            d = self.re_doc.sub(r":doc:`/\1`", d)
431
432            # Use cross-references for ABI generated docs where needed
433            matches = self.re_abi.findall(d)
434            for m in matches:
435                abi = m[0] + m[1]
436
437                xref = self.file_refs.get(abi)
438                if not xref:
439                    # This may happen if ABI is on a separate directory,
440                    # like parsing ABI testing and symbol is at stable.
441                    # The proper solution is to move this part of the code
442                    # for it to be inside sphinx/kernel_abi.py
443                    self.log.info("Didn't find ABI reference for '%s'", abi)
444                else:
445                    new = self.re_escape.sub(r"\\\1", m[1])
446                    d = re.sub(fr"\b{abi}\b", f":ref:`{new} <{xref}>`", d)
447
448            # Seek for cross reference symbols like /sys/...
449            # Need to be careful to avoid doing it on a code block
450            if d[0] not in [" ", "\t"]:
451                matches = self.re_xref_node.findall(d)
452                for m in matches:
453                    # Finding ABI here is more complex due to wildcards
454                    xref = self.what_refs.get(m)
455                    if xref:
456                        new = self.re_escape.sub(r"\\\1", m)
457                        d = re.sub(fr"\b{m}\b", f":ref:`{new} <{xref}>`", d)
458
459            new_desc += d + "\n"
460
461        return new_desc + "\n\n"
462
463    def doc(self, output_in_txt=False, show_symbols=True, show_file=True,
464            filter_path=None):
465        """Print ABI at stdout."""
466
467        part = None
468        for key, v in sorted(self.data.items(),
469                             key=lambda x: (x[1].get("type", ""),
470                                            x[1].get("what"))):
471
472            wtype = v.get("type", "Symbol")
473            file_ref = v.get("file")
474            names = v.get("what", [""])
475
476            if wtype == "File":
477                if not show_file:
478                    continue
479            else:
480                if not show_symbols:
481                    continue
482
483            if filter_path:
484                if v.get("path") != filter_path:
485                    continue
486
487            msg = ""
488
489            if wtype != "File":
490                cur_part = names[0]
491                if cur_part.find("/") >= 0:
492                    match = self.re_what.match(cur_part)
493                    if match:
494                        symbol = match.group(1).rstrip("/")
495                        cur_part = "Symbols under " + symbol
496
497                if cur_part and cur_part != part:
498                    part = cur_part
499                    msg += part + "\n"+ "-" * len(part) +"\n\n"
500
501                msg += f".. _{key}:\n\n"
502
503                max_len = 0
504                for i in range(0, len(names)):           # pylint: disable=C0200
505                    names[i] = "**" + self.re_escape.sub(r"\\\1", names[i]) + "**"
506
507                    max_len = max(max_len, len(names[i]))
508
509                msg += "+-" + "-" * max_len + "-+\n"
510                for name in names:
511                    msg += f"| {name}" + " " * (max_len - len(name)) + " |\n"
512                    msg += "+-" + "-" * max_len + "-+\n"
513                msg += "\n"
514
515            for ref in file_ref:
516                if wtype == "File":
517                    msg += f".. _{ref[1]}:\n\n"
518                else:
519                    base = os.path.basename(ref[0])
520                    msg += f"Defined on file :ref:`{base} <{ref[1]}>`\n\n"
521
522            if wtype == "File":
523                msg += names[0] +"\n" + "-" * len(names[0]) +"\n\n"
524
525            desc = v.get("description")
526            if not desc and wtype != "File":
527                msg += f"DESCRIPTION MISSING for {names[0]}\n\n"
528
529            if desc:
530                if output_in_txt:
531                    msg += self.desc_txt(desc)
532                else:
533                    msg += self.desc_rst(desc)
534
535            symbols = v.get("symbols")
536            if symbols:
537                msg += "Has the following ABI:\n\n"
538
539                for w, label in symbols:
540                    # Escape special chars from content
541                    content = self.re_escape.sub(r"\\\1", w)
542
543                    msg += f"- :ref:`{content} <{label}>`\n\n"
544
545            users = v.get("users")
546            if users and users.strip(" \t\n"):
547                users = users.strip("\n").replace('\n', '\n\t')
548                msg += f"Users:\n\t{users}\n\n"
549
550            ln = v.get("line_no", 1)
551
552            yield (msg, file_ref[0][0], ln)
553
554    def check_issues(self):
555        """Warn about duplicated ABI entries."""
556
557        for what, v in self.what_symbols.items():
558            files = v.get("file")
559            if not files:
560                # Should never happen if the parser works properly
561                self.log.warning("%s doesn't have a file associated", what)
562                continue
563
564            if len(files) == 1:
565                continue
566
567            f = []
568            for fname, lines in sorted(files.items()):
569                if not lines:
570                    f.append(f"{fname}")
571                elif len(lines) == 1:
572                    f.append(f"{fname}:{lines[0]}")
573                else:
574                    m = fname + "lines "
575                    m += ", ".join(str(x) for x in lines)
576                    f.append(m)
577
578            self.log.warning("%s is defined %d times: %s", what, len(f), "; ".join(f))
579
580    def search_symbols(self, expr):
581        """ Searches for ABI symbols."""
582
583        regex = re.compile(expr, re.I)
584
585        found_keys = 0
586        for t in sorted(self.data.items(), key=lambda x: [0]):
587            v = t[1]
588
589            wtype = v.get("type", "")
590            if wtype == "File":
591                continue
592
593            for what in v.get("what", [""]):
594                if regex.search(what):
595                    found_keys += 1
596
597                    kernelversion = v.get("kernelversion", "").strip(" \t\n")
598                    date = v.get("date", "").strip(" \t\n")
599                    contact = v.get("contact", "").strip(" \t\n")
600                    users = v.get("users", "").strip(" \t\n")
601                    desc = v.get("description", "").strip(" \t\n")
602
603                    files = []
604                    for f in v.get("file", ()):
605                        files.append(f[0])
606
607                    what = str(found_keys) + ". " + what
608                    title_tag = "-" * len(what)
609
610                    print(f"\n{what}\n{title_tag}\n")
611
612                    if kernelversion:
613                        print(f"Kernel version:\t\t{kernelversion}")
614
615                    if date:
616                        print(f"Date:\t\t\t{date}")
617
618                    if contact:
619                        print(f"Contact:\t\t{contact}")
620
621                    if users:
622                        print(f"Users:\t\t\t{users}")
623
624                    print("Defined on file(s):\t" + ", ".join(files))
625
626                    if desc:
627                        desc = desc.strip("\n")
628                        print(f"\n{desc}\n")
629
630        if not found_keys:
631            print(f"Regular expression /{expr}/ not found.")
632