xref: /linux/scripts/lib/abi/abi_parser.py (revision 5d7871d77f6d62406b3d459a58810c1ddb8904c2)
1#!/usr/bin/env python3
2# pylint: disable=R0902,R0903,R0911,R0912,R0913,R0914,R0915,R0917,C0302
3# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
4# SPDX-License-Identifier: GPL-2.0
5
6"""
7Parse ABI documentation and produce results from it.
8"""
9
10from argparse import Namespace
11import logging
12import os
13import re
14
15from pprint import pformat
16from random import randrange, seed
17
18# Import Python modules
19
20from helpers import AbiDebug, ABI_DIR
21
22
23class AbiParser:
24    """Main class to parse ABI files"""
25
26    TAGS = r"(what|where|date|kernelversion|contact|description|users)"
27    XREF = r"(?:^|\s|\()(\/(?:sys|config|proc|dev|kvd)\/[^,.:;\)\s]+)(?:[,.:;\)\s]|\Z)"
28
29    def __init__(self, directory, logger=None,
30                 enable_lineno=False, show_warnings=True, debug=0):
31        """Stores arguments for the class and initialize class vars"""
32
33        self.directory = directory
34        self.enable_lineno = enable_lineno
35        self.show_warnings = show_warnings
36        self.debug = debug
37
38        if not logger:
39            self.log = logging.getLogger("get_abi")
40        else:
41            self.log = logger
42
43        self.data = {}
44        self.what_symbols = {}
45        self.file_refs = {}
46        self.what_refs = {}
47
48        # Ignore files that contain such suffixes
49        self.ignore_suffixes = (".rej", ".org", ".orig", ".bak", "~")
50
51        # Regular expressions used on parser
52        self.re_abi_dir = re.compile(r"(.*)" + ABI_DIR)
53        self.re_tag = re.compile(r"(\S+)(:\s*)(.*)", re.I)
54        self.re_valid = re.compile(self.TAGS)
55        self.re_start_spc = re.compile(r"(\s*)(\S.*)")
56        self.re_whitespace = re.compile(r"^\s+")
57
58        # Regular used on print
59        self.re_what = re.compile(r"(\/?(?:[\w\-]+\/?){1,2})")
60        self.re_escape = re.compile(r"([\.\x01-\x08\x0e-\x1f\x21-\x2f\x3a-\x40\x7b-\xff])")
61        self.re_unprintable = re.compile(r"([\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\xff]+)")
62        self.re_title_mark = re.compile(r"\n[\-\*\=\^\~]+\n")
63        self.re_doc = re.compile(r"Documentation/(?!devicetree)(\S+)\.rst")
64        self.re_abi = re.compile(r"(Documentation/ABI/)([\w\/\-]+)")
65        self.re_xref_node = re.compile(self.XREF)
66
67    def warn(self, fdata, msg, extra=None):
68        """Displays a parse error if warning is enabled"""
69
70        if not self.show_warnings:
71            return
72
73        msg = f"{fdata.fname}:{fdata.ln}: {msg}"
74        if extra:
75            msg += "\n\t\t" + extra
76
77        self.log.warning(msg)
78
79    def add_symbol(self, what, fname, ln=None, xref=None):
80        """Create a reference table describing where each 'what' is located"""
81
82        if what not in self.what_symbols:
83            self.what_symbols[what] = {"file": {}}
84
85        if fname not in self.what_symbols[what]["file"]:
86            self.what_symbols[what]["file"][fname] = []
87
88        if ln and ln not in self.what_symbols[what]["file"][fname]:
89            self.what_symbols[what]["file"][fname].append(ln)
90
91        if xref:
92            self.what_symbols[what]["xref"] = xref
93
94    def _parse_line(self, fdata, line):
95        """Parse a single line of an ABI file"""
96
97        new_what = False
98        new_tag = False
99        content = None
100
101        match = self.re_tag.match(line)
102        if match:
103            new = match.group(1).lower()
104            sep = match.group(2)
105            content = match.group(3)
106
107            match = self.re_valid.search(new)
108            if match:
109                new_tag = match.group(1)
110            else:
111                if fdata.tag == "description":
112                    # New "tag" is actually part of description.
113                    # Don't consider it a tag
114                    new_tag = False
115                elif fdata.tag != "":
116                    self.warn(fdata, f"tag '{fdata.tag}' is invalid", line)
117
118        if new_tag:
119            # "where" is Invalid, but was a common mistake. Warn if found
120            if new_tag == "where":
121                self.warn(fdata, "tag 'Where' is invalid. Should be 'What:' instead")
122                new_tag = "what"
123
124            if new_tag == "what":
125                fdata.space = None
126
127                if content not in self.what_symbols:
128                    self.add_symbol(what=content, fname=fdata.fname, ln=fdata.ln)
129
130                if fdata.tag == "what":
131                    fdata.what.append(content.strip("\n"))
132                else:
133                    if fdata.key:
134                        if "description" not in self.data.get(fdata.key, {}):
135                            self.warn(fdata, f"{fdata.key} doesn't have a description")
136
137                        for w in fdata.what:
138                            self.add_symbol(what=w, fname=fdata.fname,
139                                            ln=fdata.what_ln, xref=fdata.key)
140
141                    fdata.label = content
142                    new_what = True
143
144                    key = "abi_" + content.lower()
145                    fdata.key = self.re_unprintable.sub("_", key).strip("_")
146
147                    # Avoid duplicated keys but using a defined seed, to make
148                    # the namespace identical if there aren't changes at the
149                    # ABI symbols
150                    seed(42)
151
152                    while fdata.key in self.data:
153                        char = randrange(0, 51) + ord("A")
154                        if char > ord("Z"):
155                            char += ord("a") - ord("Z") - 1
156
157                        fdata.key += chr(char)
158
159                    if fdata.key and fdata.key not in self.data:
160                        self.data[fdata.key] = {
161                            "what": [content],
162                            "file": [fdata.file_ref],
163                            "path": fdata.ftype,
164                            "line_no": fdata.ln,
165                        }
166
167                    fdata.what = self.data[fdata.key]["what"]
168
169                self.what_refs[content] = fdata.key
170                fdata.tag = new_tag
171                fdata.what_ln = fdata.ln
172
173                if fdata.nametag["what"]:
174                    t = (content, fdata.key)
175                    if t not in fdata.nametag["symbols"]:
176                        fdata.nametag["symbols"].append(t)
177
178                return
179
180            if fdata.tag and new_tag:
181                fdata.tag = new_tag
182
183                if new_what:
184                    fdata.label = ""
185
186                    if "description" in self.data[fdata.key]:
187                        self.data[fdata.key]["description"] += "\n\n"
188
189                    if fdata.file_ref not in self.data[fdata.key]["file"]:
190                        self.data[fdata.key]["file"].append(fdata.file_ref)
191
192                    if self.debug == AbiDebug.WHAT_PARSING:
193                        self.log.debug("what: %s", fdata.what)
194
195                if not fdata.what:
196                    self.warn(fdata, "'What:' should come first:", line)
197                    return
198
199                if new_tag == "description":
200                    fdata.space = None
201
202                    if content:
203                        sep = sep.replace(":", " ")
204
205                        c = " " * len(new_tag) + sep + content
206                        c = c.expandtabs()
207
208                        match = self.re_start_spc.match(c)
209                        if match:
210                            # Preserve initial spaces for the first line
211                            fdata.space = match.group(1)
212                            content = match.group(2) + "\n"
213
214                self.data[fdata.key][fdata.tag] = content
215
216            return
217
218        # Store any contents before tags at the database
219        if not fdata.tag and "what" in fdata.nametag:
220            fdata.nametag["description"] += line
221            return
222
223        if fdata.tag == "description":
224            content = line.expandtabs()
225
226            if self.re_whitespace.sub("", content) == "":
227                self.data[fdata.key][fdata.tag] += "\n"
228                return
229
230            if fdata.space is None:
231                match = self.re_start_spc.match(content)
232                if match:
233                    # Preserve initial spaces for the first line
234                    fdata.space = match.group(1)
235
236                    content = match.group(2) + "\n"
237            else:
238                if content.startswith(fdata.space):
239                    content = content[len(fdata.space):]
240
241                else:
242                    fdata.space = ""
243
244            if fdata.tag == "what":
245                w = content.strip("\n")
246                if w:
247                    self.data[fdata.key][fdata.tag].append(w)
248            else:
249                self.data[fdata.key][fdata.tag] += content
250            return
251
252        content = line.strip()
253        if fdata.tag:
254            if fdata.tag == "what":
255                w = content.strip("\n")
256                if w:
257                    self.data[fdata.key][fdata.tag].append(w)
258            else:
259                self.data[fdata.key][fdata.tag] += "\n" + content.rstrip("\n")
260            return
261
262        # Everything else is error
263        if content:
264            self.warn(fdata, "Unexpected content", line)
265
266    def parse_readme(self, nametag, fname):
267        """Parse ABI README file"""
268
269        nametag["what"] = ["ABI file contents"]
270        nametag["path"] = "README"
271        with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp:
272            for line in fp:
273                match = self.re_tag.match(line)
274                if match:
275                    new = match.group(1).lower()
276
277                    match = self.re_valid.search(new)
278                    if match:
279                        nametag["description"] += "\n:" + line
280                        continue
281
282                nametag["description"] += line
283
284    def parse_file(self, fname, path, basename):
285        """Parse a single file"""
286
287        ref = f"abi_file_{path}_{basename}"
288        ref = self.re_unprintable.sub("_", ref).strip("_")
289
290        # Store per-file state into a namespace variable. This will be used
291        # by the per-line parser state machine and by the warning function.
292        fdata = Namespace
293
294        fdata.fname = fname
295        fdata.name = basename
296
297        pos = fname.find(ABI_DIR)
298        if pos > 0:
299            f = fname[pos:]
300        else:
301            f = fname
302
303        fdata.file_ref = (f, ref)
304        self.file_refs[f] = ref
305
306        fdata.ln = 0
307        fdata.what_ln = 0
308        fdata.tag = ""
309        fdata.label = ""
310        fdata.what = []
311        fdata.key = None
312        fdata.xrefs = None
313        fdata.space = None
314        fdata.ftype = path.split("/")[0]
315
316        fdata.nametag = {}
317        fdata.nametag["what"] = [f"File {path}/{basename}"]
318        fdata.nametag["type"] = "File"
319        fdata.nametag["path"] = fdata.ftype
320        fdata.nametag["file"] = [fdata.file_ref]
321        fdata.nametag["line_no"] = 1
322        fdata.nametag["description"] = ""
323        fdata.nametag["symbols"] = []
324
325        self.data[ref] = fdata.nametag
326
327        if self.debug & AbiDebug.WHAT_OPEN:
328            self.log.debug("Opening file %s", fname)
329
330        if basename == "README":
331            self.parse_readme(fdata.nametag, fname)
332            return
333
334        with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp:
335            for line in fp:
336                fdata.ln += 1
337
338                self._parse_line(fdata, line)
339
340            if "description" in fdata.nametag:
341                fdata.nametag["description"] = fdata.nametag["description"].lstrip("\n")
342
343            if fdata.key:
344                if "description" not in self.data.get(fdata.key, {}):
345                    self.warn(fdata, f"{fdata.key} doesn't have a description")
346
347                for w in fdata.what:
348                    self.add_symbol(what=w, fname=fname, xref=fdata.key)
349
350    def _parse_abi(self, root=None):
351        """Internal function to parse documentation ABI recursively"""
352
353        if not root:
354            root = self.directory
355
356        with os.scandir(root) as obj:
357            for entry in obj:
358                name = os.path.join(root, entry.name)
359
360                if entry.is_dir():
361                    self._parse_abi(name)
362                    continue
363
364                if not entry.is_file():
365                    continue
366
367                basename = os.path.basename(name)
368
369                if basename.startswith("."):
370                    continue
371
372                if basename.endswith(self.ignore_suffixes):
373                    continue
374
375                path = self.re_abi_dir.sub("", os.path.dirname(name))
376
377                self.parse_file(name, path, basename)
378
379    def parse_abi(self, root=None):
380        """Parse documentation ABI"""
381
382        self._parse_abi(root)
383
384        if self.debug & AbiDebug.DUMP_ABI_STRUCTS:
385            self.log.debug(pformat(self.data))
386
387    def desc_txt(self, desc):
388        """Print description as found inside ABI files"""
389
390        desc = desc.strip(" \t\n")
391
392        return desc + "\n\n"
393
394    def desc_rst(self, desc):
395        """Enrich ReST output by creating cross-references"""
396
397        # Remove title markups from the description
398        # Having titles inside ABI files will only work if extra
399        # care would be taken in order to strictly follow the same
400        # level order for each markup.
401        desc = self.re_title_mark.sub("\n\n", "\n" + desc)
402        desc = desc.rstrip(" \t\n").lstrip("\n")
403
404        # Python's regex performance for non-compiled expressions is a lot
405        # than Perl, as Perl automatically caches them at their
406        # first usage. Here, we'll need to do the same, as otherwise the
407        # performance penalty is be high
408
409        new_desc = ""
410        for d in desc.split("\n"):
411            if d == "":
412                new_desc += "\n"
413                continue
414
415            # Use cross-references for doc files where needed
416            d = self.re_doc.sub(r":doc:`/\1`", d)
417
418            # Use cross-references for ABI generated docs where needed
419            matches = self.re_abi.findall(d)
420            for m in matches:
421                abi = m[0] + m[1]
422
423                xref = self.file_refs.get(abi)
424                if not xref:
425                    # This may happen if ABI is on a separate directory,
426                    # like parsing ABI testing and symbol is at stable.
427                    # The proper solution is to move this part of the code
428                    # for it to be inside sphinx/kernel_abi.py
429                    self.log.info("Didn't find ABI reference for '%s'", abi)
430                else:
431                    new = self.re_escape.sub(r"\\\1", m[1])
432                    d = re.sub(fr"\b{abi}\b", f":ref:`{new} <{xref}>`", d)
433
434            # Seek for cross reference symbols like /sys/...
435            # Need to be careful to avoid doing it on a code block
436            if d[0] not in [" ", "\t"]:
437                matches = self.re_xref_node.findall(d)
438                for m in matches:
439                    # Finding ABI here is more complex due to wildcards
440                    xref = self.what_refs.get(m)
441                    if xref:
442                        new = self.re_escape.sub(r"\\\1", m)
443                        d = re.sub(fr"\b{m}\b", f":ref:`{new} <{xref}>`", d)
444
445            new_desc += d + "\n"
446
447        return new_desc + "\n\n"
448
449    def doc(self, output_in_txt=False, show_symbols=True, show_file=True,
450            filter_path=None):
451        """Print ABI at stdout"""
452
453        part = None
454        for key, v in sorted(self.data.items(),
455                             key=lambda x: (x[1].get("type", ""),
456                                            x[1].get("what"))):
457
458            wtype = v.get("type", "Symbol")
459            file_ref = v.get("file")
460            names = v.get("what", [""])
461
462            if wtype == "File":
463                if not show_file:
464                    continue
465            else:
466                if not show_symbols:
467                    continue
468
469            if filter_path:
470                if v.get("path") != filter_path:
471                    continue
472
473            msg = ""
474
475            if wtype != "File":
476                cur_part = names[0]
477                if cur_part.find("/") >= 0:
478                    match = self.re_what.match(cur_part)
479                    if match:
480                        symbol = match.group(1).rstrip("/")
481                        cur_part = "Symbols under " + symbol
482
483                if cur_part and cur_part != part:
484                    part = cur_part
485                    msg += f"{part}\n{"-" * len(part)}\n\n"
486
487                msg += f".. _{key}:\n\n"
488
489                max_len = 0
490                for i in range(0, len(names)):           # pylint: disable=C0200
491                    names[i] = "**" + self.re_escape.sub(r"\\\1", names[i]) + "**"
492
493                    max_len = max(max_len, len(names[i]))
494
495                msg += "+-" + "-" * max_len + "-+\n"
496                for name in names:
497                    msg += f"| {name}" + " " * (max_len - len(name)) + " |\n"
498                    msg += "+-" + "-" * max_len + "-+\n"
499                msg += "\n"
500
501            for ref in file_ref:
502                if wtype == "File":
503                    msg += f".. _{ref[1]}:\n\n"
504                else:
505                    base = os.path.basename(ref[0])
506                    msg += f"Defined on file :ref:`{base} <{ref[1]}>`\n\n"
507
508            if wtype == "File":
509                msg += f"{names[0]}\n{"-" * len(names[0])}\n\n"
510
511            desc = v.get("description")
512            if not desc and wtype != "File":
513                msg += f"DESCRIPTION MISSING for {names[0]}\n\n"
514
515            if desc:
516                if output_in_txt:
517                    msg += self.desc_txt(desc)
518                else:
519                    msg += self.desc_rst(desc)
520
521            symbols = v.get("symbols")
522            if symbols:
523                msg += "Has the following ABI:\n\n"
524
525                for w, label in symbols:
526                    # Escape special chars from content
527                    content = self.re_escape.sub(r"\\\1", w)
528
529                    msg += f"- :ref:`{content} <{label}>`\n\n"
530
531            users = v.get("users")
532            if users and users.strip(" \t\n"):
533                msg += f"Users:\n\t{users.strip("\n").replace('\n', '\n\t')}\n\n"
534
535            ln = v.get("line_no", 1)
536
537            yield (msg, file_ref[0][0], ln)
538
539    def check_issues(self):
540        """Warn about duplicated ABI entries"""
541
542        for what, v in self.what_symbols.items():
543            files = v.get("file")
544            if not files:
545                # Should never happen if the parser works properly
546                self.log.warning("%s doesn't have a file associated", what)
547                continue
548
549            if len(files) == 1:
550                continue
551
552            f = []
553            for fname, lines in sorted(files.items()):
554                if not lines:
555                    f.append(f"{fname}")
556                elif len(lines) == 1:
557                    f.append(f"{fname}:{lines[0]}")
558                else:
559                    f.append(f"{fname} lines {", ".join(str(x) for x in lines)}")
560
561            self.log.warning("%s is defined %d times: %s", what, len(f), "; ".join(f))
562
563    def search_symbols(self, expr):
564        """ Searches for ABI symbols """
565
566        regex = re.compile(expr, re.I)
567
568        found_keys = 0
569        for t in sorted(self.data.items(), key=lambda x: [0]):
570            v = t[1]
571
572            wtype = v.get("type", "")
573            if wtype == "File":
574                continue
575
576            for what in v.get("what", [""]):
577                if regex.search(what):
578                    found_keys += 1
579
580                    kernelversion = v.get("kernelversion", "").strip(" \t\n")
581                    date = v.get("date", "").strip(" \t\n")
582                    contact = v.get("contact", "").strip(" \t\n")
583                    users = v.get("users", "").strip(" \t\n")
584                    desc = v.get("description", "").strip(" \t\n")
585
586                    files = []
587                    for f in v.get("file", ()):
588                        files.append(f[0])
589
590                    what = str(found_keys) + ". " + what
591                    title_tag = "-" * len(what)
592
593                    print(f"\n{what}\n{title_tag}\n")
594
595                    if kernelversion:
596                        print(f"Kernel version:\t\t{kernelversion}")
597
598                    if date:
599                        print(f"Date:\t\t\t{date}")
600
601                    if contact:
602                        print(f"Contact:\t\t{contact}")
603
604                    if users:
605                        print(f"Users:\t\t\t{users}")
606
607                    print(f"Defined on file{'s'[:len(files) ^ 1]}:\t{", ".join(files)}")
608
609                    if desc:
610                        print(f"\n{desc.strip("\n")}\n")
611
612        if not found_keys:
613            print(f"Regular expression /{expr}/ not found.")
614