xref: /linux/scripts/get_maintainer.pl (revision 27258e448eb301cf89e351df87aa8cb916653bf2)
1#!/usr/bin/perl -w
2# (c) 2007, Joe Perches <joe@perches.com>
3#           created from checkpatch.pl
4#
5# Print selected MAINTAINERS information for
6# the files modified in a patch or for a file
7#
8# usage: perl scripts/get_maintainers.pl [OPTIONS] <patch>
9#        perl scripts/get_maintainers.pl [OPTIONS] -f <file>
10#
11# Licensed under the terms of the GNU GPL License version 2
12
13use strict;
14
15my $P = $0;
16my $V = '0.17';
17
18use Getopt::Long qw(:config no_auto_abbrev);
19
20my $lk_path = "./";
21my $email = 1;
22my $email_usename = 1;
23my $email_maintainer = 1;
24my $email_list = 1;
25my $email_subscriber_list = 0;
26my $email_git = 1;
27my $email_git_penguin_chiefs = 0;
28my $email_git_min_signatures = 1;
29my $email_git_max_maintainers = 5;
30my $email_git_min_percent = 5;
31my $email_git_since = "1-year-ago";
32my $output_multiline = 1;
33my $output_separator = ", ";
34my $scm = 0;
35my $web = 0;
36my $subsystem = 0;
37my $status = 0;
38my $from_filename = 0;
39my $version = 0;
40my $help = 0;
41
42my $exit = 0;
43
44my @penguin_chief = ();
45push(@penguin_chief,"Linus Torvalds:torvalds\@linux-foundation.org");
46#Andrew wants in on most everything - 2009/01/14
47#push(@penguin_chief,"Andrew Morton:akpm\@linux-foundation.org");
48
49my @penguin_chief_names = ();
50foreach my $chief (@penguin_chief) {
51    if ($chief =~ m/^(.*):(.*)/) {
52	my $chief_name = $1;
53	my $chief_addr = $2;
54	push(@penguin_chief_names, $chief_name);
55    }
56}
57my $penguin_chiefs = "\(" . join("|",@penguin_chief_names) . "\)";
58
59# rfc822 email address - preloaded methods go here.
60my $rfc822_lwsp = "(?:(?:\\r\\n)?[ \\t])";
61my $rfc822_char = '[\\000-\\377]';
62
63if (!GetOptions(
64		'email!' => \$email,
65		'git!' => \$email_git,
66		'git-chief-penguins!' => \$email_git_penguin_chiefs,
67		'git-min-signatures=i' => \$email_git_min_signatures,
68		'git-max-maintainers=i' => \$email_git_max_maintainers,
69		'git-min-percent=i' => \$email_git_min_percent,
70		'git-since=s' => \$email_git_since,
71		'm!' => \$email_maintainer,
72		'n!' => \$email_usename,
73		'l!' => \$email_list,
74		's!' => \$email_subscriber_list,
75		'multiline!' => \$output_multiline,
76		'separator=s' => \$output_separator,
77		'subsystem!' => \$subsystem,
78		'status!' => \$status,
79		'scm!' => \$scm,
80		'web!' => \$web,
81		'f|file' => \$from_filename,
82		'v|version' => \$version,
83		'h|help' => \$help,
84		)) {
85    usage();
86    die "$P: invalid argument\n";
87}
88
89if ($help != 0) {
90    usage();
91    exit 0;
92}
93
94if ($version != 0) {
95    print("${P} ${V}\n");
96    exit 0;
97}
98
99if ($#ARGV < 0) {
100    usage();
101    die "$P: argument missing: patchfile or -f file please\n";
102}
103
104my $selections = $email + $scm + $status + $subsystem + $web;
105if ($selections == 0) {
106    usage();
107    die "$P:  Missing required option: email, scm, status, subsystem or web\n";
108}
109
110if ($email && ($email_maintainer + $email_list + $email_subscriber_list
111	       + $email_git + $email_git_penguin_chiefs) == 0) {
112    usage();
113    die "$P: Please select at least 1 email option\n";
114}
115
116if (!top_of_kernel_tree($lk_path)) {
117    die "$P: The current directory does not appear to be "
118	. "a linux kernel source tree.\n";
119}
120
121## Read MAINTAINERS for type/value pairs
122
123my @typevalue = ();
124open(MAINT, "<${lk_path}MAINTAINERS") || die "$P: Can't open MAINTAINERS\n";
125while (<MAINT>) {
126    my $line = $_;
127
128    if ($line =~ m/^(\C):\s*(.*)/) {
129	my $type = $1;
130	my $value = $2;
131
132	##Filename pattern matching
133	if ($type eq "F" || $type eq "X") {
134	    $value =~ s@\.@\\\.@g;       ##Convert . to \.
135	    $value =~ s/\*/\.\*/g;       ##Convert * to .*
136	    $value =~ s/\?/\./g;         ##Convert ? to .
137	    ##if pattern is a directory and it lacks a trailing slash, add one
138	    if ((-d $value)) {
139		$value =~ s@([^/])$@$1/@;
140	    }
141	}
142	push(@typevalue, "$type:$value");
143    } elsif (!/^(\s)*$/) {
144	$line =~ s/\n$//g;
145	push(@typevalue, $line);
146    }
147}
148close(MAINT);
149
150## use the filenames on the command line or find the filenames in the patchfiles
151
152my @files = ();
153
154foreach my $file (@ARGV) {
155    ##if $file is a directory and it lacks a trailing slash, add one
156    if ((-d $file)) {
157	$file =~ s@([^/])$@$1/@;
158    } elsif (!(-f $file)) {
159	die "$P: file '${file}' not found\n";
160    }
161    if ($from_filename) {
162	push(@files, $file);
163    } else {
164	my $file_cnt = @files;
165	open(PATCH, "<$file") or die "$P: Can't open ${file}\n";
166	while (<PATCH>) {
167	    if (m/^\+\+\+\s+(\S+)/) {
168		my $filename = $1;
169		$filename =~ s@^[^/]*/@@;
170		$filename =~ s@\n@@;
171		push(@files, $filename);
172	    }
173	}
174	close(PATCH);
175	if ($file_cnt == @files) {
176	    warn "$P: file '${file}' doesn't appear to be a patch.  "
177		. "Add -f to options?\n";
178	}
179	@files = sort_and_uniq(@files);
180    }
181}
182
183my @email_to = ();
184my @list_to = ();
185my @scm = ();
186my @web = ();
187my @subsystem = ();
188my @status = ();
189
190# Find responsible parties
191
192foreach my $file (@files) {
193
194#Do not match excluded file patterns
195
196    my $exclude = 0;
197    foreach my $line (@typevalue) {
198	if ($line =~ m/^(\C):\s*(.*)/) {
199	    my $type = $1;
200	    my $value = $2;
201	    if ($type eq 'X') {
202		if (file_match_pattern($file, $value)) {
203		    $exclude = 1;
204		}
205	    }
206	}
207    }
208
209    if (!$exclude) {
210	my $tvi = 0;
211	foreach my $line (@typevalue) {
212	    if ($line =~ m/^(\C):\s*(.*)/) {
213		my $type = $1;
214		my $value = $2;
215		if ($type eq 'F') {
216		    if (file_match_pattern($file, $value)) {
217			add_categories($tvi);
218		    }
219		}
220	    }
221	    $tvi++;
222	}
223    }
224
225    if ($email && $email_git) {
226	recent_git_signoffs($file);
227    }
228
229}
230
231if ($email) {
232    foreach my $chief (@penguin_chief) {
233	if ($chief =~ m/^(.*):(.*)/) {
234	    my $email_address;
235	    if ($email_usename) {
236		$email_address = format_email($1, $2);
237	    } else {
238		$email_address = $2;
239	    }
240	    if ($email_git_penguin_chiefs) {
241		push(@email_to, $email_address);
242	    } else {
243		@email_to = grep(!/${email_address}/, @email_to);
244	    }
245	}
246    }
247}
248
249if ($email || $email_list) {
250    my @to = ();
251    if ($email) {
252	@to = (@to, @email_to);
253    }
254    if ($email_list) {
255	@to = (@to, @list_to);
256    }
257    output(uniq(@to));
258}
259
260if ($scm) {
261    @scm = sort_and_uniq(@scm);
262    output(@scm);
263}
264
265if ($status) {
266    @status = sort_and_uniq(@status);
267    output(@status);
268}
269
270if ($subsystem) {
271    @subsystem = sort_and_uniq(@subsystem);
272    output(@subsystem);
273}
274
275if ($web) {
276    @web = sort_and_uniq(@web);
277    output(@web);
278}
279
280exit($exit);
281
282sub file_match_pattern {
283    my ($file, $pattern) = @_;
284    if (substr($pattern, -1) eq "/") {
285	if ($file =~ m@^$pattern@) {
286	    return 1;
287	}
288    } else {
289	if ($file =~ m@^$pattern@) {
290	    my $s1 = ($file =~ tr@/@@);
291	    my $s2 = ($pattern =~ tr@/@@);
292	    if ($s1 == $s2) {
293		return 1;
294	    }
295	}
296    }
297    return 0;
298}
299
300sub usage {
301    print <<EOT;
302usage: $P [options] patchfile
303       $P [options] -f file|directory
304version: $V
305
306MAINTAINER field selection options:
307  --email => print email address(es) if any
308    --git => include recent git \*-by: signers
309    --git-chief-penguins => include ${penguin_chiefs}
310    --git-min-signatures => number of signatures required (default: 1)
311    --git-max-maintainers => maximum maintainers to add (default: 5)
312    --git-min-percent => minimum percentage of commits required (default: 5)
313    --git-since => git history to use (default: 1-year-ago)
314    --m => include maintainer(s) if any
315    --n => include name 'Full Name <addr\@domain.tld>'
316    --l => include list(s) if any
317    --s => include subscriber only list(s) if any
318  --scm => print SCM tree(s) if any
319  --status => print status if any
320  --subsystem => print subsystem name if any
321  --web => print website(s) if any
322
323Output type options:
324  --separator [, ] => separator for multiple entries on 1 line
325  --multiline => print 1 entry per line
326
327Default options:
328  [--email --git --m --n --l --multiline]
329
330Other options:
331  --version => show version
332  --help => show this help information
333
334Notes:
335  Using "-f directory" may give unexpected results:
336
337  Used with "--git", git signators for _all_ files in and below
338     directory are examined as git recurses directories.
339     Any specified X: (exclude) pattern matches are _not_ ignored.
340  Used with "--nogit", directory is used as a pattern match,
341     no individual file within the directory or subdirectory
342     is matched.
343EOT
344}
345
346sub top_of_kernel_tree {
347	my ($lk_path) = @_;
348
349	if ($lk_path ne "" && substr($lk_path,length($lk_path)-1,1) ne "/") {
350	    $lk_path .= "/";
351	}
352	if (   (-f "${lk_path}COPYING")
353	    && (-f "${lk_path}CREDITS")
354	    && (-f "${lk_path}Kbuild")
355	    && (-f "${lk_path}MAINTAINERS")
356	    && (-f "${lk_path}Makefile")
357	    && (-f "${lk_path}README")
358	    && (-d "${lk_path}Documentation")
359	    && (-d "${lk_path}arch")
360	    && (-d "${lk_path}include")
361	    && (-d "${lk_path}drivers")
362	    && (-d "${lk_path}fs")
363	    && (-d "${lk_path}init")
364	    && (-d "${lk_path}ipc")
365	    && (-d "${lk_path}kernel")
366	    && (-d "${lk_path}lib")
367	    && (-d "${lk_path}scripts")) {
368		return 1;
369	}
370	return 0;
371}
372
373sub format_email {
374    my ($name, $email) = @_;
375
376    $name =~ s/^\s+|\s+$//g;
377    $name =~ s/^\"|\"$//g;
378    $email =~ s/^\s+|\s+$//g;
379
380    my $formatted_email = "";
381
382    if ($name =~ /[^a-z0-9 \.\-]/i) {    ##has "must quote" chars
383	$name =~ s/(?<!\\)"/\\"/g;       ##escape quotes
384	$formatted_email = "\"${name}\"\ \<${email}\>";
385    } else {
386	$formatted_email = "${name} \<${email}\>";
387    }
388    return $formatted_email;
389}
390
391sub add_categories {
392    my ($index) = @_;
393
394    $index = $index - 1;
395    while ($index >= 0) {
396	my $tv = $typevalue[$index];
397	if ($tv =~ m/^(\C):\s*(.*)/) {
398	    my $ptype = $1;
399	    my $pvalue = $2;
400	    if ($ptype eq "L") {
401		my $list_address = $pvalue;
402		my $list_additional = "";
403		if ($list_address =~ m/([^\s]+)\s+(.*)$/) {
404		    $list_address = $1;
405		    $list_additional = $2;
406		}
407		if ($list_additional =~ m/subscribers-only/) {
408		    if ($email_subscriber_list) {
409			push(@list_to, $list_address);
410		    }
411		} else {
412		    if ($email_list) {
413			push(@list_to, $list_address);
414		    }
415		}
416	    } elsif ($ptype eq "M") {
417		my $p_used = 0;
418		if ($index >= 0) {
419		    my $tv = $typevalue[$index - 1];
420		    if ($tv =~ m/^(\C):\s*(.*)/) {
421			if ($1 eq "P") {
422			    if ($email_usename) {
423				push_email_address(format_email($2, $pvalue));
424				$p_used = 1;
425			    }
426			}
427		    }
428		}
429		if (!$p_used) {
430		    push_email_addresses($pvalue);
431		}
432	    } elsif ($ptype eq "T") {
433		push(@scm, $pvalue);
434	    } elsif ($ptype eq "W") {
435		push(@web, $pvalue);
436	    } elsif ($ptype eq "S") {
437		push(@status, $pvalue);
438	    }
439
440	    $index--;
441	} else {
442	    push(@subsystem,$tv);
443	    $index = -1;
444	}
445    }
446}
447
448sub push_email_address {
449    my ($email_address) = @_;
450
451    my $email_name = "";
452    if ($email_address =~ m/([^<]+)<(.*\@.*)>$/) {
453	$email_name = $1;
454	$email_address = $2;
455    }
456
457    if ($email_maintainer) {
458	if ($email_usename && $email_name) {
459	    push(@email_to, format_email($email_name, $email_address));
460	} else {
461	    push(@email_to, $email_address);
462	}
463    }
464}
465
466sub push_email_addresses {
467    my ($address) = @_;
468
469    my @address_list = ();
470
471    if (rfc822_valid($address)) {
472	push_email_address($address);
473    } elsif (@address_list = rfc822_validlist($address)) {
474	my $array_count = shift(@address_list);
475	while (my $entry = shift(@address_list)) {
476	    push_email_address($entry);
477	}
478    } else {
479	warn("Invalid MAINTAINERS address: '" . $address . "'\n");
480    }
481}
482
483sub which {
484    my ($bin) = @_;
485
486    foreach my $path (split(/:/, $ENV{PATH})) {
487	if (-e "$path/$bin") {
488	    return "$path/$bin";
489	}
490    }
491
492    return "";
493}
494
495sub recent_git_signoffs {
496    my ($file) = @_;
497
498    my $sign_offs = "";
499    my $cmd = "";
500    my $output = "";
501    my $count = 0;
502    my @lines = ();
503    my $total_sign_offs;
504
505    if (which("git") eq "") {
506	warn("$P: git not found.  Add --nogit to options?\n");
507	return;
508    }
509    if (!(-d ".git")) {
510	warn("$P: .git directory not found.  Use a git repository for better results.\n");
511	warn("$P: perhaps 'git clone git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git'\n");
512	return;
513    }
514
515    $cmd = "git log --since=${email_git_since} -- ${file}";
516    $cmd .= " | grep -Ei \"^[-_ 	a-z]+by:.*\\\@.*\$\"";
517    if (!$email_git_penguin_chiefs) {
518	$cmd .= " | grep -Ev \"${penguin_chiefs}\"";
519    }
520    $cmd .= " | cut -f2- -d\":\"";
521    $cmd .= " | sort | uniq -c | sort -rn";
522
523    $output = `${cmd}`;
524    $output =~ s/^\s*//gm;
525
526    @lines = split("\n", $output);
527
528    $total_sign_offs = 0;
529    foreach my $line (@lines) {
530	if ($line =~ m/([0-9]+)\s+(.*)/) {
531	    $total_sign_offs += $1;
532	} else {
533	    die("$P: Unexpected git output: ${line}\n");
534	}
535    }
536
537    foreach my $line (@lines) {
538	if ($line =~ m/([0-9]+)\s+(.*)/) {
539	    my $sign_offs = $1;
540	    $line = $2;
541	    $count++;
542	    if ($sign_offs < $email_git_min_signatures ||
543	        $count > $email_git_max_maintainers ||
544		$sign_offs * 100 / $total_sign_offs < $email_git_min_percent) {
545		last;
546	    }
547	}
548	if ($line =~ m/(.+)<(.+)>/) {
549	    my $git_name = $1;
550	    my $git_addr = $2;
551	    if ($email_usename) {
552		push(@email_to, format_email($git_name, $git_addr));
553	    } else {
554		push(@email_to, $git_addr);
555	    }
556	} elsif ($line =~ m/<(.+)>/) {
557	    my $git_addr = $1;
558	    push(@email_to, $git_addr);
559	} else {
560	    push(@email_to, $line);
561	}
562    }
563}
564
565sub uniq {
566    my @parms = @_;
567
568    my %saw;
569    @parms = grep(!$saw{$_}++, @parms);
570    return @parms;
571}
572
573sub sort_and_uniq {
574    my @parms = @_;
575
576    my %saw;
577    @parms = sort @parms;
578    @parms = grep(!$saw{$_}++, @parms);
579    return @parms;
580}
581
582sub output {
583    my @parms = @_;
584
585    if ($output_multiline) {
586	foreach my $line (@parms) {
587	    print("${line}\n");
588	}
589    } else {
590	print(join($output_separator, @parms));
591	print("\n");
592    }
593}
594
595my $rfc822re;
596
597sub make_rfc822re {
598#   Basic lexical tokens are specials, domain_literal, quoted_string, atom, and
599#   comment.  We must allow for rfc822_lwsp (or comments) after each of these.
600#   This regexp will only work on addresses which have had comments stripped
601#   and replaced with rfc822_lwsp.
602
603    my $specials = '()<>@,;:\\\\".\\[\\]';
604    my $controls = '\\000-\\037\\177';
605
606    my $dtext = "[^\\[\\]\\r\\\\]";
607    my $domain_literal = "\\[(?:$dtext|\\\\.)*\\]$rfc822_lwsp*";
608
609    my $quoted_string = "\"(?:[^\\\"\\r\\\\]|\\\\.|$rfc822_lwsp)*\"$rfc822_lwsp*";
610
611#   Use zero-width assertion to spot the limit of an atom.  A simple
612#   $rfc822_lwsp* causes the regexp engine to hang occasionally.
613    my $atom = "[^$specials $controls]+(?:$rfc822_lwsp+|\\Z|(?=[\\[\"$specials]))";
614    my $word = "(?:$atom|$quoted_string)";
615    my $localpart = "$word(?:\\.$rfc822_lwsp*$word)*";
616
617    my $sub_domain = "(?:$atom|$domain_literal)";
618    my $domain = "$sub_domain(?:\\.$rfc822_lwsp*$sub_domain)*";
619
620    my $addr_spec = "$localpart\@$rfc822_lwsp*$domain";
621
622    my $phrase = "$word*";
623    my $route = "(?:\@$domain(?:,\@$rfc822_lwsp*$domain)*:$rfc822_lwsp*)";
624    my $route_addr = "\\<$rfc822_lwsp*$route?$addr_spec\\>$rfc822_lwsp*";
625    my $mailbox = "(?:$addr_spec|$phrase$route_addr)";
626
627    my $group = "$phrase:$rfc822_lwsp*(?:$mailbox(?:,\\s*$mailbox)*)?;\\s*";
628    my $address = "(?:$mailbox|$group)";
629
630    return "$rfc822_lwsp*$address";
631}
632
633sub rfc822_strip_comments {
634    my $s = shift;
635#   Recursively remove comments, and replace with a single space.  The simpler
636#   regexps in the Email Addressing FAQ are imperfect - they will miss escaped
637#   chars in atoms, for example.
638
639    while ($s =~ s/^((?:[^"\\]|\\.)*
640                    (?:"(?:[^"\\]|\\.)*"(?:[^"\\]|\\.)*)*)
641                    \((?:[^()\\]|\\.)*\)/$1 /osx) {}
642    return $s;
643}
644
645#   valid: returns true if the parameter is an RFC822 valid address
646#
647sub rfc822_valid ($) {
648    my $s = rfc822_strip_comments(shift);
649
650    if (!$rfc822re) {
651        $rfc822re = make_rfc822re();
652    }
653
654    return $s =~ m/^$rfc822re$/so && $s =~ m/^$rfc822_char*$/;
655}
656
657#   validlist: In scalar context, returns true if the parameter is an RFC822
658#              valid list of addresses.
659#
660#              In list context, returns an empty list on failure (an invalid
661#              address was found); otherwise a list whose first element is the
662#              number of addresses found and whose remaining elements are the
663#              addresses.  This is needed to disambiguate failure (invalid)
664#              from success with no addresses found, because an empty string is
665#              a valid list.
666
667sub rfc822_validlist ($) {
668    my $s = rfc822_strip_comments(shift);
669
670    if (!$rfc822re) {
671        $rfc822re = make_rfc822re();
672    }
673    # * null list items are valid according to the RFC
674    # * the '1' business is to aid in distinguishing failure from no results
675
676    my @r;
677    if ($s =~ m/^(?:$rfc822re)?(?:,(?:$rfc822re)?)*$/so &&
678	$s =~ m/^$rfc822_char*$/) {
679        while ($s =~ m/(?:^|,$rfc822_lwsp*)($rfc822re)/gos) {
680            push @r, $1;
681        }
682        return wantarray ? (scalar(@r), @r) : 1;
683    }
684    else {
685        return wantarray ? () : 0;
686    }
687}
688