xref: /freebsd/contrib/pkgconf/doc/extract.py (revision a3cefe7f2b4df0f70ff92d4570ce18e517af43ec)
1# derived from https://github.com/jeanralphaviles/comment_parser/blob/master/comment_parser/parsers/c_parser.py
2# MIT license - https://github.com/jeanralphaviles/comment_parser/blob/master/LICENSE
3
4
5class Comment:
6    def __init__(self, comment, line, multiline):
7        self.comment = comment
8        self.line = line
9        self.multiline = multiline
10
11    def __repr__(self):
12        return "Comment(comment=%r, line=%r, multiline=%r)" % (self.comment, self.line, self.multiline)
13
14    @property
15    def clean_text(self):
16        if not self.multiline:
17            return self.comment.strip()
18
19        lines = self.comment.splitlines()
20        cleanlines = []
21        for line in lines:
22            if line[0:3] == ' * ':
23                cleanlines.append(line[3:])
24            elif len(line) == 2:
25                cleanlines.append('')
26        return '\n'.join(cleanlines)
27
28    @property
29    def doc_text(self):
30        text = self.clean_text
31        if '!doc' in text[0:4]:
32            return text[5:]
33        return None
34
35
36class FileError(Exception):
37    pass
38
39
40class UnterminatedCommentError(Exception):
41    pass
42
43
44def extract_comments(filename):
45    """Extracts a list of comments from the given C family source file.
46    Comments are represented with the Comment class found in the common module.
47    C family comments come in two forms, single and multi-line comments.
48        - Single-line comments begin with '//' and continue to the end of line.
49        - Multi-line comments begin with '/*' and end with '*/' and can span
50            multiple lines of code. If a multi-line comment does not terminate
51            before EOF is reached, then an exception is raised.
52    Note that this doesn't take language-specific preprocessor directives into
53    consideration.
54    Args:
55        filename: String name of the file to extract comments from.
56    Returns:
57        Python list of Comment objects in the order that they appear in the file.
58    Raises:
59        FileError: File was unable to be open or read.
60        UnterminatedCommentError: Encountered an unterminated multi-line
61            comment.
62    """
63    try:
64        with open(filename, 'r') as source_file:
65            state = 0
66            current_comment = ''
67            comments = []
68            line_counter = 1
69            comment_start = 1
70            while True:
71                char = source_file.read(1)
72                if not char:
73                    if state == 3 or state == 4:
74                        raise UnterminatedCommentError()
75                    if state == 2:
76                        # Was in single line comment. Create comment.
77                        comment = Comment(current_comment, line_counter, False)
78                        comments.append(comment)
79                    return comments
80                if state == 0:
81                    # Waiting for comment start character or beginning of
82                    # string.
83                    if char == '/':
84                        state = 1
85                    elif char == '"':
86                        state = 5
87                elif state == 1:
88                    # Found comment start character, classify next character and
89                    # determine if single or multiline comment.
90                    if char == '/':
91                        state = 2
92                    elif char == '*':
93                        comment_start = line_counter
94                        state = 3
95                    else:
96                        state = 0
97                elif state == 2:
98                    # In single line comment, read characters until EOL.
99                    if char == '\n':
100                        comment = Comment(current_comment, line_counter, False)
101                        comments.append(comment)
102                        current_comment = ''
103                        state = 0
104                    else:
105                        current_comment += char
106                elif state == 3:
107                    # In multi-line comment, add characters until '*'
108                    # encountered.
109                    if char == '*':
110                        state = 4
111                    else:
112                        current_comment += char
113                elif state == 4:
114                    # In multi-line comment with asterisk found. Determine if
115                    # comment is ending.
116                    if char == '/':
117                        comment = Comment(
118                            current_comment, comment_start, True)
119                        comments.append(comment)
120                        current_comment = ''
121                        state = 0
122                    else:
123                        current_comment += '*'
124                        # Care for multiple '*' in a row
125                        if char != '*':
126                            current_comment += char
127                            state = 3
128                elif state == 5:
129                    # In string literal, expect literal end or escape char.
130                    if char == '"':
131                        state = 0
132                    elif char == '\\':
133                        state = 6
134                elif state == 6:
135                    # In string literal, escaping current char.
136                    state = 5
137                if char == '\n':
138                    line_counter += 1
139    except OSError as exception:
140        raise FileError(str(exception))
141
142
143if __name__ == '__main__':
144    import sys
145    from pprint import pprint
146
147    comments = [comment for comment in extract_comments(sys.argv[1]) if comment.doc_text]
148    for comment in comments:
149        print(comment.doc_text)
150