1#!/usr/bin/env python3 2# SPDX-License-Identifier: GPL-2.0 3 4""" 5This script helps track the translation status of the documentation 6in different locales, e.g., zh_CN. More specially, it uses `git log` 7commit to find the latest english commit from the translation commit 8(order by author date) and the latest english commits from HEAD. If 9differences occur, report the file and commits that need to be updated. 10 11The usage is as follows: 12- tools/docs/checktransupdate.py -l zh_CN 13This will print all the files that need to be updated or translated in the zh_CN locale. 14- tools/docs/checktransupdate.py Documentation/translations/zh_CN/dev-tools/testing-overview.rst 15This will only print the status of the specified file. 16- tools/docs/checktransupdate.py Documentation/translations/zh_CN/dev-tools 17This will print the status of all files under the directory. 18 19The output is something like: 20Documentation/dev-tools/kfence.rst 21No translation in the locale of zh_CN 22 23Documentation/translations/zh_CN/dev-tools/testing-overview.rst 24commit 42fb9cfd5b18 ("Documentation: dev-tools: Add link to RV docs") 251 commits needs resolving in total 26""" 27 28import os 29import re 30import time 31import logging 32from argparse import ArgumentParser, ArgumentTypeError, BooleanOptionalAction 33from datetime import datetime 34 35 36def get_origin_path(file_path): 37 """Get the origin path from the translation path""" 38 paths = file_path.split("/") 39 tidx = paths.index("translations") 40 opaths = paths[:tidx] 41 opaths += paths[tidx + 2 :] 42 return "/".join(opaths) 43 44 45def get_latest_commit_from(file_path, commit): 46 """Get the latest commit from the specified commit for the specified file""" 47 command = f"git log --pretty=format:%H%n%aD%n%cD%n%n%B {commit} -1 -- {file_path}" 48 logging.debug(command) 49 pipe = os.popen(command) 50 result = pipe.read() 51 result = result.split("\n") 52 if len(result) <= 1: 53 return None 54 55 logging.debug("Result: %s", result[0]) 56 57 return { 58 "hash": result[0], 59 "author_date": datetime.strptime(result[1], "%a, %d %b %Y %H:%M:%S %z"), 60 "commit_date": datetime.strptime(result[2], "%a, %d %b %Y %H:%M:%S %z"), 61 "message": result[4:], 62 } 63 64 65def get_origin_from_trans(origin_path, t_from_head): 66 """Get the latest origin commit from the translation commit""" 67 o_from_t = get_latest_commit_from(origin_path, t_from_head["hash"]) 68 while o_from_t is not None and o_from_t["author_date"] > t_from_head["author_date"]: 69 o_from_t = get_latest_commit_from(origin_path, o_from_t["hash"] + "^") 70 if o_from_t is not None: 71 logging.debug("tracked origin commit id: %s", o_from_t["hash"]) 72 return o_from_t 73 74 75def get_origin_from_trans_smartly(origin_path, t_from_head): 76 """Get the latest origin commit from the formatted translation commit: 77 (1) update to commit HASH (TITLE) 78 (2) Update the translation through commit HASH (TITLE) 79 """ 80 # catch flag for 12-bit commit hash 81 hash_re = r'([0-9a-f]{12})' 82 # pattern 1: contains "update to commit HASH" 83 pat_update_to = re.compile(rf'update to commit {hash_re}') 84 # pattern 2: contains "Update the translation through commit HASH" 85 pat_update_translation = re.compile(rf'Update the translation through commit {hash_re}') 86 87 origin_commit_hash = None 88 for line in t_from_head["message"]: 89 # check if the line matches the first pattern 90 match = pat_update_to.search(line) 91 if match: 92 origin_commit_hash = match.group(1) 93 break 94 # check if the line matches the second pattern 95 match = pat_update_translation.search(line) 96 if match: 97 origin_commit_hash = match.group(1) 98 break 99 if origin_commit_hash is None: 100 return None 101 o_from_t = get_latest_commit_from(origin_path, origin_commit_hash) 102 if o_from_t is not None: 103 logging.debug("tracked origin commit id: %s", o_from_t["hash"]) 104 return o_from_t 105 106 107def get_commits_count_between(opath, commit1, commit2): 108 """Get the commits count between two commits for the specified file""" 109 command = f"git log --pretty=format:%H {commit1}...{commit2} -- {opath}" 110 logging.debug(command) 111 pipe = os.popen(command) 112 result = pipe.read().split("\n") 113 # filter out empty lines 114 result = list(filter(lambda x: x != "", result)) 115 return result 116 117 118def pretty_output(commit): 119 """Pretty print the commit message""" 120 command = f"git log --pretty='format:%h (\"%s\")' -1 {commit}" 121 logging.debug(command) 122 pipe = os.popen(command) 123 return pipe.read() 124 125 126def valid_commit(commit): 127 """Check if the commit is valid or not""" 128 msg = pretty_output(commit) 129 return "Merge tag" not in msg 130 131def check_per_file(file_path): 132 """Check the translation status for the specified file""" 133 opath = get_origin_path(file_path) 134 135 if not os.path.isfile(opath): 136 logging.error("Cannot find the origin path for %s", file_path) 137 return 138 139 o_from_head = get_latest_commit_from(opath, "HEAD") 140 t_from_head = get_latest_commit_from(file_path, "HEAD") 141 142 if o_from_head is None or t_from_head is None: 143 logging.error("Cannot find the latest commit for %s", file_path) 144 return 145 146 o_from_t = get_origin_from_trans_smartly(opath, t_from_head) 147 # notice, o_from_t from get_*_smartly() is always more accurate than from get_*() 148 if o_from_t is None: 149 o_from_t = get_origin_from_trans(opath, t_from_head) 150 151 if o_from_t is None: 152 logging.error("Error: Cannot find the latest origin commit for %s", file_path) 153 return 154 155 if o_from_head["hash"] == o_from_t["hash"]: 156 logging.debug("No update needed for %s", file_path) 157 else: 158 logging.info(file_path) 159 commits = get_commits_count_between( 160 opath, o_from_t["hash"], o_from_head["hash"] 161 ) 162 count = 0 163 for commit in commits: 164 if valid_commit(commit): 165 logging.info("commit %s", pretty_output(commit)) 166 count += 1 167 logging.info("%d commits needs resolving in total\n", count) 168 169 170def valid_locales(locale): 171 """Check if the locale is valid or not""" 172 script_path = os.path.dirname(os.path.abspath(__file__)) 173 linux_path = os.path.join(script_path, "../..") 174 if not os.path.isdir(f"{linux_path}/Documentation/translations/{locale}"): 175 raise ArgumentTypeError("Invalid locale: {locale}") 176 return locale 177 178 179def list_files_with_excluding_folders(folder, exclude_folders, include_suffix): 180 """List all files with the specified suffix in the folder and its subfolders""" 181 files = [] 182 stack = [folder] 183 184 while stack: 185 pwd = stack.pop() 186 # filter out the exclude folders 187 if os.path.basename(pwd) in exclude_folders: 188 continue 189 # list all files and folders 190 for item in os.listdir(pwd): 191 ab_item = os.path.join(pwd, item) 192 if os.path.isdir(ab_item): 193 stack.append(ab_item) 194 else: 195 if ab_item.endswith(include_suffix): 196 files.append(ab_item) 197 198 return files 199 200 201class DmesgFormatter(logging.Formatter): 202 """Custom dmesg logging formatter""" 203 def format(self, record): 204 timestamp = time.time() 205 formatted_time = f"[{timestamp:>10.6f}]" 206 log_message = f"{formatted_time} {record.getMessage()}" 207 return log_message 208 209 210def config_logging(log_level, log_file="checktransupdate.log"): 211 """configure logging based on the log level""" 212 # set up the root logger 213 logger = logging.getLogger() 214 logger.setLevel(log_level) 215 216 # Create console handler 217 console_handler = logging.StreamHandler() 218 console_handler.setLevel(log_level) 219 220 # Create file handler 221 file_handler = logging.FileHandler(log_file) 222 file_handler.setLevel(log_level) 223 224 # Create formatter and add it to the handlers 225 formatter = DmesgFormatter() 226 console_handler.setFormatter(formatter) 227 file_handler.setFormatter(formatter) 228 229 # Add the handler to the logger 230 logger.addHandler(console_handler) 231 logger.addHandler(file_handler) 232 233 234def main(): 235 """Main function of the script""" 236 script_path = os.path.dirname(os.path.abspath(__file__)) 237 linux_path = os.path.join(script_path, "../..") 238 239 parser = ArgumentParser(description="Check the translation update") 240 parser.add_argument( 241 "-l", 242 "--locale", 243 default="zh_CN", 244 type=valid_locales, 245 help="Locale to check when files are not specified", 246 ) 247 248 parser.add_argument( 249 "--print-missing-translations", 250 action=BooleanOptionalAction, 251 default=True, 252 help="Print files that do not have translations", 253 ) 254 255 parser.add_argument( 256 '--log', 257 default='INFO', 258 choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], 259 help='Set the logging level') 260 261 parser.add_argument( 262 '--logfile', 263 default='checktransupdate.log', 264 help='Set the logging file (default: checktransupdate.log)') 265 266 parser.add_argument( 267 "files", nargs="*", help="Files or directories to check, if not specified, check all files" 268 ) 269 args = parser.parse_args() 270 271 # Configure logging based on the --log argument 272 log_level = getattr(logging, args.log.upper(), logging.INFO) 273 config_logging(log_level) 274 275 # Get files related to linux path 276 files = args.files 277 if len(files) == 0: 278 offical_files = list_files_with_excluding_folders( 279 os.path.join(linux_path, "Documentation"), ["translations", "output"], "rst" 280 ) 281 282 for file in offical_files: 283 # split the path into parts 284 path_parts = file.split(os.sep) 285 # find the index of the "Documentation" directory 286 kindex = path_parts.index("Documentation") 287 # insert the translations and locale after the Documentation directory 288 new_path_parts = path_parts[:kindex + 1] + ["translations", args.locale] \ 289 + path_parts[kindex + 1 :] 290 # join the path parts back together 291 new_file = os.sep.join(new_path_parts) 292 if os.path.isfile(new_file): 293 files.append(new_file) 294 else: 295 if args.print_missing_translations: 296 logging.info(os.path.relpath(os.path.abspath(file), linux_path)) 297 logging.info("No translation in the locale of %s\n", args.locale) 298 else: 299 # check if the files are directories or files 300 new_files = [] 301 for file in files: 302 if os.path.isfile(file): 303 new_files.append(file) 304 elif os.path.isdir(file): 305 # for directories, list all files in the directory and its subfolders 306 new_files.extend(list_files_with_excluding_folders(file, [], "rst")) 307 files = new_files 308 309 files = list(map(lambda x: os.path.relpath(os.path.abspath(x), linux_path), files)) 310 311 # cd to linux root directory 312 os.chdir(linux_path) 313 314 for file in files: 315 check_per_file(file) 316 317 318if __name__ == "__main__": 319 main() 320