1# derived from https://github.com/jeanralphaviles/comment_parser/blob/master/comment_parser/parsers/c_parser.py 2# MIT license - https://github.com/jeanralphaviles/comment_parser/blob/master/LICENSE 3 4 5class Comment: 6 def __init__(self, comment, line, multiline): 7 self.comment = comment 8 self.line = line 9 self.multiline = multiline 10 11 def __repr__(self): 12 return "Comment(comment=%r, line=%r, multiline=%r)" % (self.comment, self.line, self.multiline) 13 14 @property 15 def clean_text(self): 16 if not self.multiline: 17 return self.comment.strip() 18 19 lines = self.comment.splitlines() 20 cleanlines = [] 21 for line in lines: 22 if line[0:3] == ' * ': 23 cleanlines.append(line[3:]) 24 elif len(line) == 2: 25 cleanlines.append('') 26 return '\n'.join(cleanlines) 27 28 @property 29 def doc_text(self): 30 text = self.clean_text 31 if '!doc' in text[0:4]: 32 return text[5:] 33 return None 34 35 36class FileError(Exception): 37 pass 38 39 40class UnterminatedCommentError(Exception): 41 pass 42 43 44def extract_comments(filename): 45 """Extracts a list of comments from the given C family source file. 46 Comments are represented with the Comment class found in the common module. 47 C family comments come in two forms, single and multi-line comments. 48 - Single-line comments begin with '//' and continue to the end of line. 49 - Multi-line comments begin with '/*' and end with '*/' and can span 50 multiple lines of code. If a multi-line comment does not terminate 51 before EOF is reached, then an exception is raised. 52 Note that this doesn't take language-specific preprocessor directives into 53 consideration. 54 Args: 55 filename: String name of the file to extract comments from. 56 Returns: 57 Python list of Comment objects in the order that they appear in the file. 58 Raises: 59 FileError: File was unable to be open or read. 60 UnterminatedCommentError: Encountered an unterminated multi-line 61 comment. 62 """ 63 try: 64 with open(filename, 'r') as source_file: 65 state = 0 66 current_comment = '' 67 comments = [] 68 line_counter = 1 69 comment_start = 1 70 while True: 71 char = source_file.read(1) 72 if not char: 73 if state == 3 or state == 4: 74 raise UnterminatedCommentError() 75 if state == 2: 76 # Was in single line comment. Create comment. 77 comment = Comment(current_comment, line_counter, False) 78 comments.append(comment) 79 return comments 80 if state == 0: 81 # Waiting for comment start character or beginning of 82 # string. 83 if char == '/': 84 state = 1 85 elif char == '"': 86 state = 5 87 elif state == 1: 88 # Found comment start character, classify next character and 89 # determine if single or multiline comment. 90 if char == '/': 91 state = 2 92 elif char == '*': 93 comment_start = line_counter 94 state = 3 95 else: 96 state = 0 97 elif state == 2: 98 # In single line comment, read characters until EOL. 99 if char == '\n': 100 comment = Comment(current_comment, line_counter, False) 101 comments.append(comment) 102 current_comment = '' 103 state = 0 104 else: 105 current_comment += char 106 elif state == 3: 107 # In multi-line comment, add characters until '*' 108 # encountered. 109 if char == '*': 110 state = 4 111 else: 112 current_comment += char 113 elif state == 4: 114 # In multi-line comment with asterisk found. Determine if 115 # comment is ending. 116 if char == '/': 117 comment = Comment( 118 current_comment, comment_start, True) 119 comments.append(comment) 120 current_comment = '' 121 state = 0 122 else: 123 current_comment += '*' 124 # Care for multiple '*' in a row 125 if char != '*': 126 current_comment += char 127 state = 3 128 elif state == 5: 129 # In string literal, expect literal end or escape char. 130 if char == '"': 131 state = 0 132 elif char == '\\': 133 state = 6 134 elif state == 6: 135 # In string literal, escaping current char. 136 state = 5 137 if char == '\n': 138 line_counter += 1 139 except OSError as exception: 140 raise FileError(str(exception)) 141 142 143if __name__ == '__main__': 144 import sys 145 from pprint import pprint 146 147 comments = [comment for comment in extract_comments(sys.argv[1]) if comment.doc_text] 148 for comment in comments: 149 print(comment.doc_text) 150