xref: /linux/scripts/get_maintainer.pl (revision e20043b4765cdf7ec8e963d706bb91469cba8cb8)
1#!/usr/bin/env perl
2# SPDX-License-Identifier: GPL-2.0
3#
4# (c) 2007, Joe Perches <joe@perches.com>
5#           created from checkpatch.pl
6#
7# Print selected MAINTAINERS information for
8# the files modified in a patch or for a file
9#
10# usage: perl scripts/get_maintainer.pl [OPTIONS] <patch>
11#        perl scripts/get_maintainer.pl [OPTIONS] -f <file>
12
13use warnings;
14use strict;
15
16my $P = $0;
17my $V = '0.26';
18
19use Getopt::Long qw(:config no_auto_abbrev);
20use Cwd;
21use File::Find;
22use File::Spec::Functions;
23use open qw(:std :encoding(UTF-8));
24
25my $cur_path = fastgetcwd() . '/';
26my $lk_path = "./";
27my $email = 1;
28my $email_usename = 1;
29my $email_maintainer = 1;
30my $email_reviewer = 1;
31my $email_fixes = 1;
32my $email_list = 1;
33my $email_moderated_list = 1;
34my $email_subscriber_list = 0;
35my $email_git_penguin_chiefs = 0;
36my $email_git = 0;
37my $email_git_all_signature_types = 0;
38my $email_git_blame = 0;
39my $email_git_blame_signatures = 1;
40my $email_git_fallback = 1;
41my $email_git_min_signatures = 1;
42my $email_git_max_maintainers = 5;
43my $email_git_min_percent = 5;
44my $email_git_since = "1-year-ago";
45my $email_hg_since = "-365";
46my $interactive = 0;
47my $email_remove_duplicates = 1;
48my $email_use_mailmap = 1;
49my $output_multiline = 1;
50my $output_separator = ", ";
51my $output_roles = 0;
52my $output_rolestats = 1;
53my $output_substatus = undef;
54my $output_section_maxlen = 50;
55my $scm = 0;
56my $tree = 1;
57my $web = 0;
58my $bug = 0;
59my $subsystem = 0;
60my $status = 0;
61my $letters = "";
62my $keywords = 1;
63my $keywords_in_file = 0;
64my $sections = 0;
65my $email_file_emails = 0;
66my $from_filename = 0;
67my $pattern_depth = 0;
68my $self_test = undef;
69my $version = 0;
70my $help = 0;
71my $find_maintainer_files = 0;
72my $maintainer_path;
73my $vcs_used = 0;
74
75my $exit = 0;
76
77my @files = ();
78my @fixes = ();			# If a patch description includes Fixes: lines
79my @range = ();
80my @keyword_tvi = ();
81my @file_emails = ();
82
83my %commit_author_hash;
84my %commit_signer_hash;
85
86my @penguin_chief = ();
87push(@penguin_chief, "Linus Torvalds:torvalds\@linux-foundation.org");
88#Andrew wants in on most everything - 2009/01/14
89#push(@penguin_chief, "Andrew Morton:akpm\@linux-foundation.org");
90
91my @penguin_chief_names = ();
92foreach my $chief (@penguin_chief) {
93    if ($chief =~ m/^(.*):(.*)/) {
94	my $chief_name = $1;
95	my $chief_addr = $2;
96	push(@penguin_chief_names, $chief_name);
97    }
98}
99my $penguin_chiefs = "\(" . join("|", @penguin_chief_names) . "\)";
100
101# Signature types of people who are either
102# 	a) responsible for the code in question, or
103# 	b) familiar enough with it to give relevant feedback
104my @signature_tags = ();
105push(@signature_tags, "Signed-off-by:");
106push(@signature_tags, "Reviewed-by:");
107push(@signature_tags, "Acked-by:");
108
109my $signature_pattern = "\(" . join("|", @signature_tags) . "\)";
110
111# rfc822 email address - preloaded methods go here.
112my $rfc822_lwsp = "(?:(?:\\r\\n)?[ \\t])";
113my $rfc822_char = '[\\000-\\377]';
114
115# VCS command support: class-like functions and strings
116
117my %VCS_cmds;
118
119my %VCS_cmds_git = (
120    "execute_cmd" => \&git_execute_cmd,
121    "available" => '(which("git") ne "") && (-e ".git")',
122    "find_signers_cmd" =>
123	"git log --no-color --follow --since=\$email_git_since " .
124	    '--numstat --no-merges ' .
125	    '--format="GitCommit: %H%n' .
126		      'GitAuthor: %an <%ae>%n' .
127		      'GitDate: %aD%n' .
128		      'GitSubject: %s%n' .
129		      '%b%n"' .
130	    " -- \$file",
131    "find_commit_signers_cmd" =>
132	"git log --no-color " .
133	    '--numstat ' .
134	    '--format="GitCommit: %H%n' .
135		      'GitAuthor: %an <%ae>%n' .
136		      'GitDate: %aD%n' .
137		      'GitSubject: %s%n' .
138		      '%b%n"' .
139	    " -1 \$commit",
140    "find_commit_author_cmd" =>
141	"git log --no-color " .
142	    '--numstat ' .
143	    '--format="GitCommit: %H%n' .
144		      'GitAuthor: %an <%ae>%n' .
145		      'GitDate: %aD%n' .
146		      'GitSubject: %s%n"' .
147	    " -1 \$commit",
148    "blame_range_cmd" => "git blame -l -L \$diff_start,+\$diff_length \$file",
149    "blame_file_cmd" => "git blame -l \$file",
150    "commit_pattern" => "^GitCommit: ([0-9a-f]{40,40})",
151    "blame_commit_pattern" => "^([0-9a-f]+) ",
152    "author_pattern" => "^GitAuthor: (.*)",
153    "subject_pattern" => "^GitSubject: (.*)",
154    "stat_pattern" => "^(\\d+)\\t(\\d+)\\t\$file\$",
155    "file_exists_cmd" => "git ls-files \$file",
156    "list_files_cmd" => "git ls-files \$file",
157);
158
159my %VCS_cmds_hg = (
160    "execute_cmd" => \&hg_execute_cmd,
161    "available" => '(which("hg") ne "") && (-d ".hg")',
162    "find_signers_cmd" =>
163	"hg log --date=\$email_hg_since " .
164	    "--template='HgCommit: {node}\\n" .
165	                "HgAuthor: {author}\\n" .
166			"HgSubject: {desc}\\n'" .
167	    " -- \$file",
168    "find_commit_signers_cmd" =>
169	"hg log " .
170	    "--template='HgSubject: {desc}\\n'" .
171	    " -r \$commit",
172    "find_commit_author_cmd" =>
173	"hg log " .
174	    "--template='HgCommit: {node}\\n" .
175		        "HgAuthor: {author}\\n" .
176			"HgSubject: {desc|firstline}\\n'" .
177	    " -r \$commit",
178    "blame_range_cmd" => "",		# not supported
179    "blame_file_cmd" => "hg blame -n \$file",
180    "commit_pattern" => "^HgCommit: ([0-9a-f]{40,40})",
181    "blame_commit_pattern" => "^([ 0-9a-f]+):",
182    "author_pattern" => "^HgAuthor: (.*)",
183    "subject_pattern" => "^HgSubject: (.*)",
184    "stat_pattern" => "^(\\d+)\t(\\d+)\t\$file\$",
185    "file_exists_cmd" => "hg files \$file",
186    "list_files_cmd" => "hg manifest -R \$file",
187);
188
189my $conf = which_conf(".get_maintainer.conf");
190if (-f $conf) {
191    my @conf_args;
192    open(my $conffile, '<', "$conf")
193	or warn "$P: Can't find a readable .get_maintainer.conf file $!\n";
194
195    while (<$conffile>) {
196	my $line = $_;
197
198	$line =~ s/\s*\n?$//g;
199	$line =~ s/^\s*//g;
200	$line =~ s/\s+/ /g;
201
202	next if ($line =~ m/^\s*#/);
203	next if ($line =~ m/^\s*$/);
204
205	my @words = split(" ", $line);
206	foreach my $word (@words) {
207	    last if ($word =~ m/^#/);
208	    push (@conf_args, $word);
209	}
210    }
211    close($conffile);
212    unshift(@ARGV, @conf_args) if @conf_args;
213}
214
215my @ignore_emails = ();
216my $ignore_file = which_conf(".get_maintainer.ignore");
217if (-f $ignore_file) {
218    open(my $ignore, '<', "$ignore_file")
219	or warn "$P: Can't find a readable .get_maintainer.ignore file $!\n";
220    while (<$ignore>) {
221	my $line = $_;
222
223	$line =~ s/\s*\n?$//;
224	$line =~ s/^\s*//;
225	$line =~ s/\s+$//;
226	$line =~ s/#.*$//;
227
228	next if ($line =~ m/^\s*$/);
229	if (rfc822_valid($line)) {
230	    push(@ignore_emails, $line);
231	}
232    }
233    close($ignore);
234}
235
236if ($#ARGV > 0) {
237    foreach (@ARGV) {
238        if ($_ =~ /^-{1,2}self-test(?:=|$)/) {
239            die "$P: using --self-test does not allow any other option or argument\n";
240        }
241    }
242}
243
244if (!GetOptions(
245		'email!' => \$email,
246		'git!' => \$email_git,
247		'git-all-signature-types!' => \$email_git_all_signature_types,
248		'git-blame!' => \$email_git_blame,
249		'git-blame-signatures!' => \$email_git_blame_signatures,
250		'git-fallback!' => \$email_git_fallback,
251		'git-chief-penguins!' => \$email_git_penguin_chiefs,
252		'git-min-signatures=i' => \$email_git_min_signatures,
253		'git-max-maintainers=i' => \$email_git_max_maintainers,
254		'git-min-percent=i' => \$email_git_min_percent,
255		'git-since=s' => \$email_git_since,
256		'hg-since=s' => \$email_hg_since,
257		'i|interactive!' => \$interactive,
258		'remove-duplicates!' => \$email_remove_duplicates,
259		'mailmap!' => \$email_use_mailmap,
260		'm!' => \$email_maintainer,
261		'r!' => \$email_reviewer,
262		'n!' => \$email_usename,
263		'l!' => \$email_list,
264		'fixes!' => \$email_fixes,
265		'moderated!' => \$email_moderated_list,
266		's!' => \$email_subscriber_list,
267		'multiline!' => \$output_multiline,
268		'roles!' => \$output_roles,
269		'rolestats!' => \$output_rolestats,
270		'separator=s' => \$output_separator,
271		'subsystem!' => \$subsystem,
272		'status!' => \$status,
273		'substatus!' => \$output_substatus,
274		'scm!' => \$scm,
275		'tree!' => \$tree,
276		'web!' => \$web,
277		'bug!' => \$bug,
278		'letters=s' => \$letters,
279		'pattern-depth=i' => \$pattern_depth,
280		'k|keywords!' => \$keywords,
281		'kf|keywords-in-file!' => \$keywords_in_file,
282		'sections!' => \$sections,
283		'fe|file-emails!' => \$email_file_emails,
284		'f|file' => \$from_filename,
285		'find-maintainer-files' => \$find_maintainer_files,
286		'mpath|maintainer-path=s' => \$maintainer_path,
287		'self-test:s' => \$self_test,
288		'v|version' => \$version,
289		'h|help|usage' => \$help,
290		)) {
291    die "$P: invalid argument - use --help if necessary\n";
292}
293
294if ($help != 0) {
295    usage();
296    exit 0;
297}
298
299if ($version != 0) {
300    print("${P} ${V}\n");
301    exit 0;
302}
303
304if (defined $self_test) {
305    read_all_maintainer_files();
306    self_test();
307    exit 0;
308}
309
310if (-t STDIN && !@ARGV) {
311    # We're talking to a terminal, but have no command line arguments.
312    die "$P: missing patchfile or -f file - use --help if necessary\n";
313}
314
315$output_multiline = 0 if ($output_separator ne ", ");
316$output_rolestats = 1 if ($interactive);
317$output_roles = 1 if ($output_rolestats);
318
319if (!defined $output_substatus) {
320    $output_substatus = $email && $output_roles && -t STDOUT;
321}
322
323if ($sections || $letters ne "") {
324    $sections = 1;
325    $email = 0;
326    $email_list = 0;
327    $scm = 0;
328    $status = 0;
329    $subsystem = 0;
330    $web = 0;
331    $bug = 0;
332    $keywords = 0;
333    $keywords_in_file = 0;
334    $interactive = 0;
335} else {
336    my $selections = $email + $scm + $status + $subsystem + $web + $bug;
337    if ($selections == 0) {
338	die "$P:  Missing required option: email, scm, status, subsystem, web or bug\n";
339    }
340}
341
342if ($email &&
343    ($email_maintainer + $email_reviewer +
344     $email_list + $email_subscriber_list +
345     $email_git + $email_git_penguin_chiefs + $email_git_blame) == 0) {
346    die "$P: Please select at least 1 email option\n";
347}
348
349if ($tree && !top_of_kernel_tree($lk_path)) {
350    die "$P: The current directory does not appear to be "
351	. "a linux kernel source tree.\n";
352}
353
354## Read MAINTAINERS for type/value pairs
355
356my @typevalue = ();
357my %keyword_hash;
358my @mfiles = ();
359my @self_test_info = ();
360
361sub read_maintainer_file {
362    my ($file) = @_;
363
364    open (my $maint, '<', "$file")
365	or die "$P: Can't open MAINTAINERS file '$file': $!\n";
366    my $i = 1;
367    while (<$maint>) {
368	my $line = $_;
369	chomp $line;
370
371	if ($line =~ m/^([A-Z]):\s*(.*)/) {
372	    my $type = $1;
373	    my $value = $2;
374
375	    ##Filename pattern matching
376	    if ($type eq "F" || $type eq "X") {
377		$value =~ s@\.@\\\.@g;       ##Convert . to \.
378		$value =~ s/\*\*/\x00/g;     ##Convert ** to placeholder
379		$value =~ s/\*/\.\*/g;       ##Convert * to .*
380		$value =~ s/\?/\./g;         ##Convert ? to .
381		$value =~ s/\x00/(?:.*)/g;   ##Convert placeholder to (?:.*)
382		##if pattern is a directory and it lacks a trailing slash, add one
383		if ((-d $value)) {
384		    $value =~ s@([^/])$@$1/@;
385		}
386	    } elsif ($type eq "K") {
387		$keyword_hash{@typevalue} = $value;
388	    }
389	    push(@typevalue, "$type:$value");
390	} elsif (!(/^\s*$/ || /^\s*\#/)) {
391	    push(@typevalue, $line);
392	}
393	if (defined $self_test) {
394	    push(@self_test_info, {file=>$file, linenr=>$i, line=>$line});
395	}
396	$i++;
397    }
398    close($maint);
399}
400
401sub find_is_maintainer_file {
402    my ($file) = $_;
403    return if ($file !~ m@/MAINTAINERS$@);
404    $file = $File::Find::name;
405    return if (! -f $file);
406    push(@mfiles, $file);
407}
408
409sub find_ignore_git {
410    return grep { $_ !~ /^\.git$/; } @_;
411}
412
413read_all_maintainer_files();
414
415sub read_all_maintainer_files {
416    my $path = "${lk_path}MAINTAINERS";
417    if (defined $maintainer_path) {
418	$path = $maintainer_path;
419	# Perl Cookbook tilde expansion if necessary
420	$path =~ s@^~([^/]*)@ $1 ? (getpwnam($1))[7] : ( $ENV{HOME} || $ENV{LOGDIR} || (getpwuid($<))[7])@ex;
421    }
422
423    if (-d $path) {
424	$path .= '/' if ($path !~ m@/$@);
425	if ($find_maintainer_files) {
426	    find( { wanted => \&find_is_maintainer_file,
427		    preprocess => \&find_ignore_git,
428		    no_chdir => 1,
429		}, "$path");
430	} else {
431	    opendir(DIR, "$path") or die $!;
432	    my @files = readdir(DIR);
433	    closedir(DIR);
434	    foreach my $file (@files) {
435		push(@mfiles, "$path$file") if ($file !~ /^\./);
436	    }
437	}
438    } elsif (-f "$path") {
439	push(@mfiles, "$path");
440    } else {
441	die "$P: MAINTAINER file not found '$path'\n";
442    }
443    die "$P: No MAINTAINER files found in '$path'\n" if (scalar(@mfiles) == 0);
444    foreach my $file (@mfiles) {
445	read_maintainer_file("$file");
446    }
447}
448
449sub maintainers_in_file {
450    my ($file) = @_;
451
452    return if ($file =~ m@\bMAINTAINERS$@);
453
454    if (-f $file && ($email_file_emails || $file =~ /\.yaml$/)) {
455	open(my $f, '<', $file)
456	    or die "$P: Can't open $file: $!\n";
457	my $text = do { local($/) ; <$f> };
458	close($f);
459
460	my @poss_addr = $text =~ m$[\p{L}\"\' \,\.\+-]*\s*[\,]*\s*[\(\<\{]{0,1}[A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+\.[A-Za-z0-9]+[\)\>\}]{0,1}$g;
461	push(@file_emails, clean_file_emails(@poss_addr));
462    }
463}
464
465#
466# Read mail address map
467#
468
469my $mailmap;
470
471read_mailmap();
472
473sub read_mailmap {
474    $mailmap = {
475	names => {},
476	addresses => {}
477    };
478
479    return if (!$email_use_mailmap || !(-f "${lk_path}.mailmap"));
480
481    open(my $mailmap_file, '<', "${lk_path}.mailmap")
482	or warn "$P: Can't open .mailmap: $!\n";
483
484    while (<$mailmap_file>) {
485	s/#.*$//; #strip comments
486	s/^\s+|\s+$//g; #trim
487
488	next if (/^\s*$/); #skip empty lines
489	#entries have one of the following formats:
490	# name1 <mail1>
491	# <mail1> <mail2>
492	# name1 <mail1> <mail2>
493	# name1 <mail1> name2 <mail2>
494	# (see man git-shortlog)
495
496	if (/^([^<]+)<([^>]+)>$/) {
497	    my $real_name = $1;
498	    my $address = $2;
499
500	    $real_name =~ s/\s+$//;
501	    ($real_name, $address) = parse_email("$real_name <$address>");
502	    $mailmap->{names}->{$address} = $real_name;
503
504	} elsif (/^<([^>]+)>\s*<([^>]+)>$/) {
505	    my $real_address = $1;
506	    my $wrong_address = $2;
507
508	    $mailmap->{addresses}->{$wrong_address} = $real_address;
509
510	} elsif (/^(.+)<([^>]+)>\s*<([^>]+)>$/) {
511	    my $real_name = $1;
512	    my $real_address = $2;
513	    my $wrong_address = $3;
514
515	    $real_name =~ s/\s+$//;
516	    ($real_name, $real_address) =
517		parse_email("$real_name <$real_address>");
518	    $mailmap->{names}->{$wrong_address} = $real_name;
519	    $mailmap->{addresses}->{$wrong_address} = $real_address;
520
521	} elsif (/^(.+)<([^>]+)>\s*(.+)\s*<([^>]+)>$/) {
522	    my $real_name = $1;
523	    my $real_address = $2;
524	    my $wrong_name = $3;
525	    my $wrong_address = $4;
526
527	    $real_name =~ s/\s+$//;
528	    ($real_name, $real_address) =
529		parse_email("$real_name <$real_address>");
530
531	    $wrong_name =~ s/\s+$//;
532	    ($wrong_name, $wrong_address) =
533		parse_email("$wrong_name <$wrong_address>");
534
535	    my $wrong_email = format_email($wrong_name, $wrong_address, 1);
536	    $mailmap->{names}->{$wrong_email} = $real_name;
537	    $mailmap->{addresses}->{$wrong_email} = $real_address;
538	}
539    }
540    close($mailmap_file);
541}
542
543## use the filenames on the command line or find the filenames in the patchfiles
544
545if (!@ARGV) {
546    push(@ARGV, "&STDIN");
547}
548
549foreach my $file (@ARGV) {
550    if ($file ne "&STDIN") {
551	$file = canonpath($file);
552	##if $file is a directory and it lacks a trailing slash, add one
553	if ((-d $file)) {
554	    $file =~ s@([^/])$@$1/@;
555	} elsif (!(-f $file)) {
556	    die "$P: file '${file}' not found\n";
557	}
558    }
559    if ($from_filename && (vcs_exists() && !vcs_file_exists($file))) {
560	warn "$P: file '$file' not found in version control $!\n";
561    }
562    if ($from_filename || ($file ne "&STDIN" && vcs_file_exists($file))) {
563	$file =~ s/^\Q${cur_path}\E//;	#strip any absolute path
564	$file =~ s/^\Q${lk_path}\E//;	#or the path to the lk tree
565	push(@files, $file);
566	if ($file ne "MAINTAINERS" && -f $file && $keywords && $keywords_in_file) {
567	    open(my $f, '<', $file)
568		or die "$P: Can't open $file: $!\n";
569	    my $text = do { local($/) ; <$f> };
570	    close($f);
571	    foreach my $line (keys %keyword_hash) {
572		if ($text =~ m/$keyword_hash{$line}/x) {
573		    push(@keyword_tvi, $line);
574		}
575	    }
576	}
577    } else {
578	my $file_cnt = @files;
579	my $lastfile;
580
581	open(my $patch, "< $file")
582	    or die "$P: Can't open $file: $!\n";
583
584	# We can check arbitrary information before the patch
585	# like the commit message, mail headers, etc...
586	# This allows us to match arbitrary keywords against any part
587	# of a git format-patch generated file (subject tags, etc...)
588
589	my $patch_prefix = "";			#Parsing the intro
590
591	while (<$patch>) {
592	    my $patch_line = $_;
593	    if (m/^ mode change [0-7]+ => [0-7]+ (\S+)\s*$/) {
594		my $filename = $1;
595		push(@files, $filename);
596	    } elsif (m/^rename (?:from|to) (\S+)\s*$/) {
597		my $filename = $1;
598		push(@files, $filename);
599	    } elsif (m/^diff --git a\/(\S+) b\/(\S+)\s*$/) {
600		my $filename1 = $1;
601		my $filename2 = $2;
602		push(@files, $filename1);
603		push(@files, $filename2);
604	    } elsif (m/^Fixes:\s+([0-9a-fA-F]{6,40})/) {
605		push(@fixes, $1) if ($email_fixes);
606	    } elsif (m/^\+\+\+\s+(\S+)/ or m/^---\s+(\S+)/) {
607		my $filename = $1;
608		$filename =~ s@^[^/]*/@@;
609		$filename =~ s@\n@@;
610		$lastfile = $filename;
611		push(@files, $filename);
612		$patch_prefix = "^[+-].*";	#Now parsing the actual patch
613	    } elsif (m/^\@\@ -(\d+),(\d+)/) {
614		if ($email_git_blame) {
615		    push(@range, "$lastfile:$1:$2");
616		}
617	    } elsif ($keywords) {
618		foreach my $line (keys %keyword_hash) {
619		    if ($patch_line =~ m/${patch_prefix}$keyword_hash{$line}/x) {
620			push(@keyword_tvi, $line);
621		    }
622		}
623	    }
624	}
625	close($patch);
626
627	if ($file_cnt == @files) {
628	    warn "$P: file '${file}' doesn't appear to be a patch.  "
629		. "Add -f to options?\n";
630	}
631	@files = sort_and_uniq(@files);
632    }
633}
634
635@file_emails = uniq(@file_emails);
636@fixes = uniq(@fixes);
637
638my %email_hash_name;
639my %email_hash_address;
640my @email_to = ();
641my %hash_list_to;
642my @list_to = ();
643my @scm = ();
644my @web = ();
645my @bug = ();
646my @subsystem = ();
647my @status = ();
648my @substatus = ();
649my %deduplicate_name_hash = ();
650my %deduplicate_address_hash = ();
651
652my @maintainers = get_maintainers();
653if (@maintainers) {
654    @maintainers = merge_email(@maintainers);
655    output(@maintainers);
656}
657
658if ($scm) {
659    @scm = uniq(@scm);
660    output(@scm);
661}
662
663if ($output_substatus) {
664    @substatus = uniq(@substatus);
665    output(@substatus);
666}
667
668if ($status) {
669    @status = uniq(@status);
670    output(@status);
671}
672
673if ($subsystem) {
674    @subsystem = uniq(@subsystem);
675    output(@subsystem);
676}
677
678if ($web) {
679    @web = uniq(@web);
680    output(@web);
681}
682
683if ($bug) {
684    @bug = uniq(@bug);
685    output(@bug);
686}
687
688exit($exit);
689
690sub self_test {
691    my @lsfiles = ();
692    my @good_links = ();
693    my @bad_links = ();
694    my @section_headers = ();
695    my $index = 0;
696
697    @lsfiles = vcs_list_files($lk_path);
698
699    for my $x (@self_test_info) {
700	$index++;
701
702	## Section header duplication and missing section content
703	if (($self_test eq "" || $self_test =~ /\bsections\b/) &&
704	    $x->{line} =~ /^\S[^:]/ &&
705	    defined $self_test_info[$index] &&
706	    $self_test_info[$index]->{line} =~ /^([A-Z]):\s*\S/) {
707	    my $has_S = 0;
708	    my $has_F = 0;
709	    my $has_ML = 0;
710	    my $status = "";
711	    if (grep(m@^\Q$x->{line}\E@, @section_headers)) {
712		print("$x->{file}:$x->{linenr}: warning: duplicate section header\t$x->{line}\n");
713	    } else {
714		push(@section_headers, $x->{line});
715	    }
716	    my $nextline = $index;
717	    while (defined $self_test_info[$nextline] &&
718		   $self_test_info[$nextline]->{line} =~ /^([A-Z]):\s*(\S.*)/) {
719		my $type = $1;
720		my $value = $2;
721		if ($type eq "S") {
722		    $has_S = 1;
723		    $status = $value;
724		} elsif ($type eq "F" || $type eq "N") {
725		    $has_F = 1;
726		} elsif ($type eq "M" || $type eq "R" || $type eq "L") {
727		    $has_ML = 1;
728		}
729		$nextline++;
730	    }
731	    if (!$has_ML && $status !~ /orphan|obsolete/i) {
732		print("$x->{file}:$x->{linenr}: warning: section without email address\t$x->{line}\n");
733	    }
734	    if (!$has_S) {
735		print("$x->{file}:$x->{linenr}: warning: section without status \t$x->{line}\n");
736	    }
737	    if (!$has_F) {
738		print("$x->{file}:$x->{linenr}: warning: section without file pattern\t$x->{line}\n");
739	    }
740	}
741
742	next if ($x->{line} !~ /^([A-Z]):\s*(.*)/);
743
744	my $type = $1;
745	my $value = $2;
746
747	## Filename pattern matching
748	if (($type eq "F" || $type eq "X") &&
749	    ($self_test eq "" || $self_test =~ /\bpatterns\b/)) {
750	    $value =~ s@\.@\\\.@g;       ##Convert . to \.
751	    $value =~ s/\*\*/\x00/g;     ##Convert ** to placeholder
752	    $value =~ s/\*/\.\*/g;       ##Convert * to .*
753	    $value =~ s/\?/\./g;         ##Convert ? to .
754	    $value =~ s/\x00/(?:.*)/g;   ##Convert placeholder to (?:.*)
755	    ##if pattern is a directory and it lacks a trailing slash, add one
756	    if ((-d $value)) {
757		$value =~ s@([^/])$@$1/@;
758	    }
759	    if (!grep(m@^$value@, @lsfiles)) {
760		print("$x->{file}:$x->{linenr}: warning: no file matches\t$x->{line}\n");
761	    }
762
763	## Link reachability
764	} elsif (($type eq "W" || $type eq "Q" || $type eq "B") &&
765		 $value =~ /^https?:/ &&
766		 ($self_test eq "" || $self_test =~ /\blinks\b/)) {
767	    next if (grep(m@^\Q$value\E$@, @good_links));
768	    my $isbad = 0;
769	    if (grep(m@^\Q$value\E$@, @bad_links)) {
770	        $isbad = 1;
771	    } else {
772		my $output = `wget --spider -q --no-check-certificate --timeout 10 --tries 1 $value`;
773		if ($? == 0) {
774		    push(@good_links, $value);
775		} else {
776		    push(@bad_links, $value);
777		    $isbad = 1;
778		}
779	    }
780	    if ($isbad) {
781	        print("$x->{file}:$x->{linenr}: warning: possible bad link\t$x->{line}\n");
782	    }
783
784	## SCM reachability
785	} elsif ($type eq "T" &&
786		 ($self_test eq "" || $self_test =~ /\bscm\b/)) {
787	    next if (grep(m@^\Q$value\E$@, @good_links));
788	    my $isbad = 0;
789	    if (grep(m@^\Q$value\E$@, @bad_links)) {
790	        $isbad = 1;
791            } elsif ($value !~ /^(?:git|quilt|hg)\s+\S/) {
792		print("$x->{file}:$x->{linenr}: warning: malformed entry\t$x->{line}\n");
793	    } elsif ($value =~ /^git\s+(\S+)(\s+([^\(]+\S+))?/) {
794		my $url = $1;
795		my $branch = "";
796		$branch = $3 if $3;
797		my $output = `git ls-remote --exit-code -h "$url" $branch > /dev/null 2>&1`;
798		if ($? == 0) {
799		    push(@good_links, $value);
800		} else {
801		    push(@bad_links, $value);
802		    $isbad = 1;
803		}
804	    } elsif ($value =~ /^(?:quilt|hg)\s+(https?:\S+)/) {
805		my $url = $1;
806		my $output = `wget --spider -q --no-check-certificate --timeout 10 --tries 1 $url`;
807		if ($? == 0) {
808		    push(@good_links, $value);
809		} else {
810		    push(@bad_links, $value);
811		    $isbad = 1;
812		}
813	    }
814	    if ($isbad) {
815		print("$x->{file}:$x->{linenr}: warning: possible bad link\t$x->{line}\n");
816	    }
817	}
818    }
819}
820
821sub ignore_email_address {
822    my ($address) = @_;
823
824    foreach my $ignore (@ignore_emails) {
825	return 1 if ($ignore eq $address);
826    }
827
828    return 0;
829}
830
831sub range_is_maintained {
832    my ($start, $end) = @_;
833
834    for (my $i = $start; $i < $end; $i++) {
835	my $line = $typevalue[$i];
836	if ($line =~ m/^([A-Z]):\s*(.*)/) {
837	    my $type = $1;
838	    my $value = $2;
839	    if ($type eq 'S') {
840		if ($value =~ /(maintain|support)/i) {
841		    return 1;
842		}
843	    }
844	}
845    }
846    return 0;
847}
848
849sub range_has_maintainer {
850    my ($start, $end) = @_;
851
852    for (my $i = $start; $i < $end; $i++) {
853	my $line = $typevalue[$i];
854	if ($line =~ m/^([A-Z]):\s*(.*)/) {
855	    my $type = $1;
856	    my $value = $2;
857	    if ($type eq 'M') {
858		return 1;
859	    }
860	}
861    }
862    return 0;
863}
864
865sub get_maintainers {
866    %email_hash_name = ();
867    %email_hash_address = ();
868    %commit_author_hash = ();
869    %commit_signer_hash = ();
870    @email_to = ();
871    %hash_list_to = ();
872    @list_to = ();
873    @scm = ();
874    @web = ();
875    @bug = ();
876    @subsystem = ();
877    @status = ();
878    @substatus = ();
879    %deduplicate_name_hash = ();
880    %deduplicate_address_hash = ();
881    if ($email_git_all_signature_types) {
882	$signature_pattern = "(.+?)[Bb][Yy]:";
883    } else {
884	$signature_pattern = "\(" . join("|", @signature_tags) . "\)";
885    }
886
887    # Find responsible parties
888
889    my %exact_pattern_match_hash = ();
890
891    foreach my $file (@files) {
892
893	my %hash;
894	my $tvi = find_first_section();
895	while ($tvi < @typevalue) {
896	    my $start = find_starting_index($tvi);
897	    my $end = find_ending_index($tvi);
898	    my $exclude = 0;
899	    my $i;
900
901	    #Do not match excluded file patterns
902
903	    for ($i = $start; $i < $end; $i++) {
904		my $line = $typevalue[$i];
905		if ($line =~ m/^([A-Z]):\s*(.*)/) {
906		    my $type = $1;
907		    my $value = $2;
908		    if ($type eq 'X') {
909			if (file_match_pattern($file, $value)) {
910			    $exclude = 1;
911			    last;
912			}
913		    }
914		}
915	    }
916
917	    if (!$exclude) {
918		for ($i = $start; $i < $end; $i++) {
919		    my $line = $typevalue[$i];
920		    if ($line =~ m/^([A-Z]):\s*(.*)/) {
921			my $type = $1;
922			my $value = $2;
923			if ($type eq 'F') {
924			    if (file_match_pattern($file, $value)) {
925				my $value_pd = ($value =~ tr@/@@);
926				my $file_pd = ($file  =~ tr@/@@);
927				$value_pd++ if (substr($value,-1,1) ne "/");
928				$value_pd = -1 if ($value =~ /^(\.\*|\(\?:\.\*\))/);
929				if ($value_pd >= $file_pd &&
930				    range_is_maintained($start, $end) &&
931				    range_has_maintainer($start, $end)) {
932				    $exact_pattern_match_hash{$file} = 1;
933				}
934				if ($pattern_depth == 0 ||
935				    (($file_pd - $value_pd) < $pattern_depth)) {
936				    $hash{$tvi} = $value_pd;
937				}
938			    }
939			} elsif ($type eq 'N') {
940			    if ($file =~ m/$value/x) {
941				$hash{$tvi} = 0;
942			    }
943			}
944		    }
945		}
946	    }
947	    $tvi = $end + 1;
948	}
949
950	foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
951	    add_categories($line, "");
952	    if ($sections) {
953		my $i;
954		my $start = find_starting_index($line);
955		my $end = find_ending_index($line);
956		for ($i = $start; $i < $end; $i++) {
957		    my $line = $typevalue[$i];
958		    if ($line =~ /^[FX]:/) {		##Restore file patterns
959			$line =~ s/([^\\])\.([^\*])/$1\?$2/g;
960			$line =~ s/([^\\])\.$/$1\?/g;	##Convert . back to ?
961			$line =~ s/\\\./\./g;       	##Convert \. to .
962			$line =~ s/\(\?:\.\*\)/\*\*/g;	##Convert (?:.*) to **
963			$line =~ s/\.\*/\*/g;       	##Convert .* to *
964		    }
965		    my $count = $line =~ s/^([A-Z]):/$1:\t/g;
966		    if ($letters eq "" || (!$count || $letters =~ /$1/i)) {
967			print("$line\n");
968		    }
969		}
970		print("\n");
971	    }
972	}
973
974	maintainers_in_file($file);
975    }
976
977    if ($keywords) {
978	@keyword_tvi = sort_and_uniq(@keyword_tvi);
979	foreach my $line (@keyword_tvi) {
980	    add_categories($line, ":Keyword:$keyword_hash{$line}");
981	}
982    }
983
984    foreach my $email (@email_to, @list_to) {
985	$email->[0] = deduplicate_email($email->[0]);
986    }
987
988    foreach my $file (@files) {
989	if ($email &&
990	    ($email_git ||
991	     ($email_git_fallback &&
992	      $file !~ /MAINTAINERS$/ &&
993	      !$exact_pattern_match_hash{$file}))) {
994	    vcs_file_signoffs($file);
995	}
996	if ($email && $email_git_blame) {
997	    vcs_file_blame($file);
998	}
999    }
1000
1001    if ($email) {
1002	foreach my $chief (@penguin_chief) {
1003	    if ($chief =~ m/^(.*):(.*)/) {
1004		my $email_address;
1005
1006		$email_address = format_email($1, $2, $email_usename);
1007		if ($email_git_penguin_chiefs) {
1008		    push(@email_to, [$email_address, 'chief penguin']);
1009		} else {
1010		    @email_to = grep($_->[0] !~ /${email_address}/, @email_to);
1011		}
1012	    }
1013	}
1014
1015	foreach my $email (@file_emails) {
1016	    $email = mailmap_email($email);
1017	    my ($name, $address) = parse_email($email);
1018
1019	    my $tmp_email = format_email($name, $address, $email_usename);
1020	    push_email_address($tmp_email, '');
1021	    add_role($tmp_email, 'in file');
1022	}
1023    }
1024
1025    foreach my $fix (@fixes) {
1026	vcs_add_commit_signers($fix, "blamed_fixes");
1027    }
1028
1029    my @to = ();
1030    if ($email || $email_list) {
1031	if ($email) {
1032	    @to = (@to, @email_to);
1033	}
1034	if ($email_list) {
1035	    @to = (@to, @list_to);
1036	}
1037    }
1038
1039    if ($interactive) {
1040	@to = interactive_get_maintainers(\@to);
1041    }
1042
1043    return @to;
1044}
1045
1046sub file_match_pattern {
1047    my ($file, $pattern) = @_;
1048    if (substr($pattern, -1) eq "/") {
1049	if ($file =~ m@^$pattern@) {
1050	    return 1;
1051	}
1052    } else {
1053	if ($file =~ m@^$pattern@) {
1054	    my $s1 = ($file =~ tr@/@@);
1055	    my $s2 = ($pattern =~ tr@/@@);
1056	    if ($s1 == $s2 || $pattern =~ /\(\?:/) {
1057		return 1;
1058	    }
1059	}
1060    }
1061    return 0;
1062}
1063
1064sub usage {
1065    print <<EOT;
1066usage: $P [options] patchfile
1067       $P [options] -f file|directory
1068version: $V
1069
1070MAINTAINER field selection options:
1071  --email => print email address(es) if any
1072    --git => include recent git \*-by: signers
1073    --git-all-signature-types => include signers regardless of signature type
1074        or use only ${signature_pattern} signers (default: $email_git_all_signature_types)
1075    --git-fallback => use git when no exact MAINTAINERS pattern (default: $email_git_fallback)
1076    --git-chief-penguins => include ${penguin_chiefs}
1077    --git-min-signatures => number of signatures required (default: $email_git_min_signatures)
1078    --git-max-maintainers => maximum maintainers to add (default: $email_git_max_maintainers)
1079    --git-min-percent => minimum percentage of commits required (default: $email_git_min_percent)
1080    --git-blame => use git blame to find modified commits for patch or file
1081    --git-blame-signatures => when used with --git-blame, also include all commit signers
1082    --git-since => git history to use (default: $email_git_since)
1083    --hg-since => hg history to use (default: $email_hg_since)
1084    --interactive => display a menu (mostly useful if used with the --git option)
1085    --m => include maintainer(s) if any
1086    --r => include reviewer(s) if any
1087    --n => include name 'Full Name <addr\@domain.tld>'
1088    --l => include list(s) if any
1089    --moderated => include moderated lists(s) if any (default: true)
1090    --s => include subscriber only list(s) if any (default: false)
1091    --remove-duplicates => minimize duplicate email names/addresses
1092    --roles => show roles (role:subsystem, git-signer, list, etc...)
1093    --rolestats => show roles and statistics (commits/total_commits, %)
1094    --substatus => show subsystem status if not Maintained (default: match --roles when output is tty)"
1095    --file-emails => add email addresses found in -f file (default: 0 (off))
1096    --fixes => for patches, add signatures of commits with 'Fixes: <commit>' (default: 1 (on))
1097  --scm => print SCM tree(s) if any
1098  --status => print status if any
1099  --subsystem => print subsystem name if any
1100  --web => print website(s) if any
1101  --bug => print bug reporting info if any
1102
1103Output type options:
1104  --separator [, ] => separator for multiple entries on 1 line
1105    using --separator also sets --nomultiline if --separator is not [, ]
1106  --multiline => print 1 entry per line
1107
1108Other options:
1109  --pattern-depth => Number of pattern directory traversals (default: 0 (all))
1110  --keywords => scan patch for keywords (default: $keywords)
1111  --keywords-in-file => scan file for keywords (default: $keywords_in_file)
1112  --sections => print all of the subsystem sections with pattern matches
1113  --letters => print all matching 'letter' types from all matching sections
1114  --mailmap => use .mailmap file (default: $email_use_mailmap)
1115  --no-tree => run without a kernel tree
1116  --self-test => show potential issues with MAINTAINERS file content
1117  --version => show version
1118  --help => show this help information
1119
1120Default options:
1121  [--email --tree --nogit --git-fallback --m --r --n --l --multiline
1122   --pattern-depth=0 --remove-duplicates --rolestats --keywords]
1123
1124Notes:
1125  Using "-f directory" may give unexpected results:
1126      Used with "--git", git signators for _all_ files in and below
1127          directory are examined as git recurses directories.
1128          Any specified X: (exclude) pattern matches are _not_ ignored.
1129      Used with "--nogit", directory is used as a pattern match,
1130          no individual file within the directory or subdirectory
1131          is matched.
1132      Used with "--git-blame", does not iterate all files in directory
1133  Using "--git-blame" is slow and may add old committers and authors
1134      that are no longer active maintainers to the output.
1135  Using "--roles" or "--rolestats" with git send-email --cc-cmd or any
1136      other automated tools that expect only ["name"] <email address>
1137      may not work because of additional output after <email address>.
1138  Using "--rolestats" and "--git-blame" shows the #/total=% commits,
1139      not the percentage of the entire file authored.  # of commits is
1140      not a good measure of amount of code authored.  1 major commit may
1141      contain a thousand lines, 5 trivial commits may modify a single line.
1142  If git is not installed, but mercurial (hg) is installed and an .hg
1143      repository exists, the following options apply to mercurial:
1144          --git,
1145          --git-min-signatures, --git-max-maintainers, --git-min-percent, and
1146          --git-blame
1147      Use --hg-since not --git-since to control date selection
1148  File ".get_maintainer.conf", if it exists in the linux kernel source root
1149      directory, can change whatever get_maintainer defaults are desired.
1150      Entries in this file can be any command line argument.
1151      This file is prepended to any additional command line arguments.
1152      Multiple lines and # comments are allowed.
1153  Most options have both positive and negative forms.
1154      The negative forms for --<foo> are --no<foo> and --no-<foo>.
1155
1156EOT
1157}
1158
1159sub top_of_kernel_tree {
1160    my ($lk_path) = @_;
1161
1162    if ($lk_path ne "" && substr($lk_path,length($lk_path)-1,1) ne "/") {
1163	$lk_path .= "/";
1164    }
1165    if (   (-f "${lk_path}COPYING")
1166	&& (-f "${lk_path}CREDITS")
1167	&& (-f "${lk_path}Kbuild")
1168	&& (-e "${lk_path}MAINTAINERS")
1169	&& (-f "${lk_path}Makefile")
1170	&& (-f "${lk_path}README")
1171	&& (-d "${lk_path}Documentation")
1172	&& (-d "${lk_path}arch")
1173	&& (-d "${lk_path}include")
1174	&& (-d "${lk_path}drivers")
1175	&& (-d "${lk_path}fs")
1176	&& (-d "${lk_path}init")
1177	&& (-d "${lk_path}ipc")
1178	&& (-d "${lk_path}kernel")
1179	&& (-d "${lk_path}lib")
1180	&& (-d "${lk_path}scripts")) {
1181	return 1;
1182    }
1183    return 0;
1184}
1185
1186sub escape_name {
1187    my ($name) = @_;
1188
1189    if ($name =~ /[^\w \-]/ai) {  	 ##has "must quote" chars
1190	$name =~ s/(?<!\\)"/\\"/g;       ##escape quotes
1191	$name = "\"$name\"";
1192    }
1193
1194    return $name;
1195}
1196
1197sub parse_email {
1198    my ($formatted_email) = @_;
1199
1200    my $name = "";
1201    my $address = "";
1202
1203    if ($formatted_email =~ /^([^<]+)<(.+\@.*)>.*$/) {
1204	$name = $1;
1205	$address = $2;
1206    } elsif ($formatted_email =~ /^\s*<(.+\@\S*)>.*$/) {
1207	$address = $1;
1208    } elsif ($formatted_email =~ /^(.+\@\S*).*$/) {
1209	$address = $1;
1210    }
1211
1212    $name =~ s/^\s+|\s+$//g;
1213    $name =~ s/^\"|\"$//g;
1214    $name = escape_name($name);
1215    $address =~ s/^\s+|\s+$//g;
1216
1217    return ($name, $address);
1218}
1219
1220sub format_email {
1221    my ($name, $address, $usename) = @_;
1222
1223    my $formatted_email;
1224
1225    $name =~ s/^\s+|\s+$//g;
1226    $name =~ s/^\"|\"$//g;
1227    $name = escape_name($name);
1228    $address =~ s/^\s+|\s+$//g;
1229
1230    if ($usename) {
1231	if ("$name" eq "") {
1232	    $formatted_email = "$address";
1233	} else {
1234	    $formatted_email = "$name <$address>";
1235	}
1236    } else {
1237	$formatted_email = $address;
1238    }
1239
1240    return $formatted_email;
1241}
1242
1243sub find_first_section {
1244    my $index = 0;
1245
1246    while ($index < @typevalue) {
1247	my $tv = $typevalue[$index];
1248	if (($tv =~ m/^([A-Z]):\s*(.*)/)) {
1249	    last;
1250	}
1251	$index++;
1252    }
1253
1254    return $index;
1255}
1256
1257sub find_starting_index {
1258    my ($index) = @_;
1259
1260    while ($index > 0) {
1261	my $tv = $typevalue[$index];
1262	if (!($tv =~ m/^([A-Z]):\s*(.*)/)) {
1263	    last;
1264	}
1265	$index--;
1266    }
1267
1268    return $index;
1269}
1270
1271sub find_ending_index {
1272    my ($index) = @_;
1273
1274    while ($index < @typevalue) {
1275	my $tv = $typevalue[$index];
1276	if (!($tv =~ m/^([A-Z]):\s*(.*)/)) {
1277	    last;
1278	}
1279	$index++;
1280    }
1281
1282    return $index;
1283}
1284
1285sub get_subsystem_name {
1286    my ($index) = @_;
1287
1288    my $start = find_starting_index($index);
1289
1290    my $subsystem = $typevalue[$start];
1291    if ($output_section_maxlen && length($subsystem) > $output_section_maxlen) {
1292	$subsystem = substr($subsystem, 0, $output_section_maxlen - 3);
1293	$subsystem =~ s/\s*$//;
1294	$subsystem = $subsystem . "...";
1295    }
1296    return $subsystem;
1297}
1298
1299sub get_maintainer_role {
1300    my ($index) = @_;
1301
1302    my $i;
1303    my $start = find_starting_index($index);
1304    my $end = find_ending_index($index);
1305
1306    my $role = "maintainer";
1307    my $subsystem = get_subsystem_name($index);
1308    my $status = "unknown";
1309
1310    for ($i = $start + 1; $i < $end; $i++) {
1311	my $tv = $typevalue[$i];
1312	if ($tv =~ m/^([A-Z]):\s*(.*)/) {
1313	    my $ptype = $1;
1314	    my $pvalue = $2;
1315	    if ($ptype eq "S") {
1316		$status = $pvalue;
1317	    }
1318	}
1319    }
1320
1321    $status = lc($status);
1322    if ($status eq "buried alive in reporters") {
1323	$role = "chief penguin";
1324    }
1325
1326    return $role . ":" . $subsystem;
1327}
1328
1329sub get_list_role {
1330    my ($index) = @_;
1331
1332    my $subsystem = get_subsystem_name($index);
1333
1334    if ($subsystem eq "THE REST") {
1335	$subsystem = "";
1336    }
1337
1338    return $subsystem;
1339}
1340
1341sub add_categories {
1342    my ($index, $suffix) = @_;
1343
1344    my $i;
1345    my $start = find_starting_index($index);
1346    my $end = find_ending_index($index);
1347
1348    my $subsystem = $typevalue[$start];
1349    push(@subsystem, $subsystem);
1350    my $status = "Unknown";
1351
1352    for ($i = $start + 1; $i < $end; $i++) {
1353	my $tv = $typevalue[$i];
1354	if ($tv =~ m/^([A-Z]):\s*(.*)/) {
1355	    my $ptype = $1;
1356	    my $pvalue = $2;
1357	    if ($ptype eq "L") {
1358		my $list_address = $pvalue;
1359		my $list_additional = "";
1360		my $list_role = get_list_role($i);
1361
1362		if ($list_role ne "") {
1363		    $list_role = ":" . $list_role;
1364		}
1365		if ($list_address =~ m/([^\s]+)\s+(.*)$/) {
1366		    $list_address = $1;
1367		    $list_additional = $2;
1368		}
1369		if ($list_additional =~ m/subscribers-only/) {
1370		    if ($email_subscriber_list) {
1371			if (!$hash_list_to{lc($list_address)}) {
1372			    $hash_list_to{lc($list_address)} = 1;
1373			    push(@list_to, [$list_address,
1374					    "subscriber list${list_role}" . $suffix]);
1375			}
1376		    }
1377		} else {
1378		    if ($email_list) {
1379			if (!$hash_list_to{lc($list_address)}) {
1380			    if ($list_additional =~ m/moderated/) {
1381				if ($email_moderated_list) {
1382				    $hash_list_to{lc($list_address)} = 1;
1383				    push(@list_to, [$list_address,
1384						    "moderated list${list_role}" . $suffix]);
1385				}
1386			    } else {
1387				$hash_list_to{lc($list_address)} = 1;
1388				push(@list_to, [$list_address,
1389						"open list${list_role}" . $suffix]);
1390			    }
1391			}
1392		    }
1393		}
1394	    } elsif ($ptype eq "M") {
1395		if ($email_maintainer) {
1396		    my $role = get_maintainer_role($i);
1397		    push_email_addresses($pvalue, $role . $suffix);
1398		}
1399	    } elsif ($ptype eq "R") {
1400		if ($email_reviewer) {
1401		    my $subs = get_subsystem_name($i);
1402		    push_email_addresses($pvalue, "reviewer:$subs" . $suffix);
1403		}
1404	    } elsif ($ptype eq "T") {
1405		push(@scm, $pvalue . $suffix);
1406	    } elsif ($ptype eq "W") {
1407		push(@web, $pvalue . $suffix);
1408	    } elsif ($ptype eq "B") {
1409		push(@bug, $pvalue . $suffix);
1410	    } elsif ($ptype eq "S") {
1411		push(@status, $pvalue . $suffix);
1412		$status = $pvalue;
1413	    }
1414	}
1415    }
1416
1417    if ($subsystem ne "THE REST" and $status ne "Maintained") {
1418	push(@substatus, $subsystem . " status: " . $status . $suffix)
1419    }
1420}
1421
1422sub email_inuse {
1423    my ($name, $address) = @_;
1424
1425    return 1 if (($name eq "") && ($address eq ""));
1426    return 1 if (($name ne "") && exists($email_hash_name{lc($name)}));
1427    return 1 if (($address ne "") && exists($email_hash_address{lc($address)}));
1428
1429    return 0;
1430}
1431
1432sub push_email_address {
1433    my ($line, $role) = @_;
1434
1435    my ($name, $address) = parse_email($line);
1436
1437    if ($address eq "") {
1438	return 0;
1439    }
1440
1441    if (!$email_remove_duplicates) {
1442	push(@email_to, [format_email($name, $address, $email_usename), $role]);
1443    } elsif (!email_inuse($name, $address)) {
1444	push(@email_to, [format_email($name, $address, $email_usename), $role]);
1445	$email_hash_name{lc($name)}++ if ($name ne "");
1446	$email_hash_address{lc($address)}++;
1447    }
1448
1449    return 1;
1450}
1451
1452sub push_email_addresses {
1453    my ($address, $role) = @_;
1454
1455    my @address_list = ();
1456
1457    if (rfc822_valid($address)) {
1458	push_email_address($address, $role);
1459    } elsif (@address_list = rfc822_validlist($address)) {
1460	my $array_count = shift(@address_list);
1461	while (my $entry = shift(@address_list)) {
1462	    push_email_address($entry, $role);
1463	}
1464    } else {
1465	if (!push_email_address($address, $role)) {
1466	    warn("Invalid MAINTAINERS address: '" . $address . "'\n");
1467	}
1468    }
1469}
1470
1471sub add_role {
1472    my ($line, $role) = @_;
1473
1474    my ($name, $address) = parse_email($line);
1475    my $email = format_email($name, $address, $email_usename);
1476
1477    foreach my $entry (@email_to) {
1478	if ($email_remove_duplicates) {
1479	    my ($entry_name, $entry_address) = parse_email($entry->[0]);
1480	    if (($name eq $entry_name || $address eq $entry_address)
1481		&& ($role eq "" || !($entry->[1] =~ m/$role/))
1482	    ) {
1483		if ($entry->[1] eq "") {
1484		    $entry->[1] = "$role";
1485		} else {
1486		    $entry->[1] = "$entry->[1],$role";
1487		}
1488	    }
1489	} else {
1490	    if ($email eq $entry->[0]
1491		&& ($role eq "" || !($entry->[1] =~ m/$role/))
1492	    ) {
1493		if ($entry->[1] eq "") {
1494		    $entry->[1] = "$role";
1495		} else {
1496		    $entry->[1] = "$entry->[1],$role";
1497		}
1498	    }
1499	}
1500    }
1501}
1502
1503sub which {
1504    my ($bin) = @_;
1505
1506    foreach my $path (split(/:/, $ENV{PATH})) {
1507	if (-e "$path/$bin") {
1508	    return "$path/$bin";
1509	}
1510    }
1511
1512    return "";
1513}
1514
1515sub which_conf {
1516    my ($conf) = @_;
1517
1518    foreach my $path (split(/:/, ".:$ENV{HOME}:.scripts")) {
1519	if (-e "$path/$conf") {
1520	    return "$path/$conf";
1521	}
1522    }
1523
1524    return "";
1525}
1526
1527sub mailmap_email {
1528    my ($line) = @_;
1529
1530    my ($name, $address) = parse_email($line);
1531    my $email = format_email($name, $address, 1);
1532    my $real_name = $name;
1533    my $real_address = $address;
1534
1535    if (exists $mailmap->{names}->{$email} ||
1536	exists $mailmap->{addresses}->{$email}) {
1537	if (exists $mailmap->{names}->{$email}) {
1538	    $real_name = $mailmap->{names}->{$email};
1539	}
1540	if (exists $mailmap->{addresses}->{$email}) {
1541	    $real_address = $mailmap->{addresses}->{$email};
1542	}
1543    } else {
1544	if (exists $mailmap->{names}->{$address}) {
1545	    $real_name = $mailmap->{names}->{$address};
1546	}
1547	if (exists $mailmap->{addresses}->{$address}) {
1548	    $real_address = $mailmap->{addresses}->{$address};
1549	}
1550    }
1551    return format_email($real_name, $real_address, 1);
1552}
1553
1554sub mailmap {
1555    my (@addresses) = @_;
1556
1557    my @mapped_emails = ();
1558    foreach my $line (@addresses) {
1559	push(@mapped_emails, mailmap_email($line));
1560    }
1561    merge_by_realname(@mapped_emails) if ($email_use_mailmap);
1562    return @mapped_emails;
1563}
1564
1565sub merge_by_realname {
1566    my %address_map;
1567    my (@emails) = @_;
1568
1569    foreach my $email (@emails) {
1570	my ($name, $address) = parse_email($email);
1571	if (exists $address_map{$name}) {
1572	    $address = $address_map{$name};
1573	    $email = format_email($name, $address, 1);
1574	} else {
1575	    $address_map{$name} = $address;
1576	}
1577    }
1578}
1579
1580sub git_execute_cmd {
1581    my ($cmd) = @_;
1582    my @lines = ();
1583
1584    my $output = `$cmd`;
1585    $output =~ s/^\s*//gm;
1586    @lines = split("\n", $output);
1587
1588    return @lines;
1589}
1590
1591sub hg_execute_cmd {
1592    my ($cmd) = @_;
1593    my @lines = ();
1594
1595    my $output = `$cmd`;
1596    @lines = split("\n", $output);
1597
1598    return @lines;
1599}
1600
1601sub extract_formatted_signatures {
1602    my (@signature_lines) = @_;
1603
1604    my @type = @signature_lines;
1605
1606    s/\s*(.*):.*/$1/ for (@type);
1607
1608    # cut -f2- -d":"
1609    s/\s*.*:\s*(.+)\s*/$1/ for (@signature_lines);
1610
1611## Reformat email addresses (with names) to avoid badly written signatures
1612
1613    foreach my $signer (@signature_lines) {
1614	$signer = deduplicate_email($signer);
1615    }
1616
1617    return (\@type, \@signature_lines);
1618}
1619
1620sub vcs_find_signers {
1621    my ($cmd, $file) = @_;
1622    my $commits;
1623    my @lines = ();
1624    my @signatures = ();
1625    my @authors = ();
1626    my @stats = ();
1627
1628    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1629
1630    my $pattern = $VCS_cmds{"commit_pattern"};
1631    my $author_pattern = $VCS_cmds{"author_pattern"};
1632    my $stat_pattern = $VCS_cmds{"stat_pattern"};
1633
1634    $stat_pattern =~ s/(\$\w+)/$1/eeg;		#interpolate $stat_pattern
1635
1636    $commits = grep(/$pattern/, @lines);	# of commits
1637
1638    @authors = grep(/$author_pattern/, @lines);
1639    @signatures = grep(/^[ \t]*${signature_pattern}.*\@.*$/, @lines);
1640    @stats = grep(/$stat_pattern/, @lines);
1641
1642#    print("stats: <@stats>\n");
1643
1644    return (0, \@signatures, \@authors, \@stats) if !@signatures;
1645
1646    save_commits_by_author(@lines) if ($interactive);
1647    save_commits_by_signer(@lines) if ($interactive);
1648
1649    if (!$email_git_penguin_chiefs) {
1650	@signatures = grep(!/${penguin_chiefs}/i, @signatures);
1651    }
1652
1653    my ($author_ref, $authors_ref) = extract_formatted_signatures(@authors);
1654    my ($types_ref, $signers_ref) = extract_formatted_signatures(@signatures);
1655
1656    return ($commits, $signers_ref, $authors_ref, \@stats);
1657}
1658
1659sub vcs_find_author {
1660    my ($cmd) = @_;
1661    my @lines = ();
1662
1663    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1664
1665    if (!$email_git_penguin_chiefs) {
1666	@lines = grep(!/${penguin_chiefs}/i, @lines);
1667    }
1668
1669    return @lines if !@lines;
1670
1671    my @authors = ();
1672    foreach my $line (@lines) {
1673	if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
1674	    my $author = $1;
1675	    my ($name, $address) = parse_email($author);
1676	    $author = format_email($name, $address, 1);
1677	    push(@authors, $author);
1678	}
1679    }
1680
1681    save_commits_by_author(@lines) if ($interactive);
1682    save_commits_by_signer(@lines) if ($interactive);
1683
1684    return @authors;
1685}
1686
1687sub vcs_save_commits {
1688    my ($cmd) = @_;
1689    my @lines = ();
1690    my @commits = ();
1691
1692    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1693
1694    foreach my $line (@lines) {
1695	if ($line =~ m/$VCS_cmds{"blame_commit_pattern"}/) {
1696	    push(@commits, $1);
1697	}
1698    }
1699
1700    return @commits;
1701}
1702
1703sub vcs_blame {
1704    my ($file) = @_;
1705    my $cmd;
1706    my @commits = ();
1707
1708    return @commits if (!(-f $file));
1709
1710    if (@range && $VCS_cmds{"blame_range_cmd"} eq "") {
1711	my @all_commits = ();
1712
1713	$cmd = $VCS_cmds{"blame_file_cmd"};
1714	$cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1715	@all_commits = vcs_save_commits($cmd);
1716
1717	foreach my $file_range_diff (@range) {
1718	    next if (!($file_range_diff =~ m/(.+):(.+):(.+)/));
1719	    my $diff_file = $1;
1720	    my $diff_start = $2;
1721	    my $diff_length = $3;
1722	    next if ("$file" ne "$diff_file");
1723	    for (my $i = $diff_start; $i < $diff_start + $diff_length; $i++) {
1724		push(@commits, $all_commits[$i]);
1725	    }
1726	}
1727    } elsif (@range) {
1728	foreach my $file_range_diff (@range) {
1729	    next if (!($file_range_diff =~ m/(.+):(.+):(.+)/));
1730	    my $diff_file = $1;
1731	    my $diff_start = $2;
1732	    my $diff_length = $3;
1733	    next if ("$file" ne "$diff_file");
1734	    $cmd = $VCS_cmds{"blame_range_cmd"};
1735	    $cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1736	    push(@commits, vcs_save_commits($cmd));
1737	}
1738    } else {
1739	$cmd = $VCS_cmds{"blame_file_cmd"};
1740	$cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1741	@commits = vcs_save_commits($cmd);
1742    }
1743
1744    foreach my $commit (@commits) {
1745	$commit =~ s/^\^//g;
1746    }
1747
1748    return @commits;
1749}
1750
1751my $printed_novcs = 0;
1752sub vcs_exists {
1753    %VCS_cmds = %VCS_cmds_git;
1754    return 1 if eval $VCS_cmds{"available"};
1755    %VCS_cmds = %VCS_cmds_hg;
1756    return 2 if eval $VCS_cmds{"available"};
1757    %VCS_cmds = ();
1758    if (!$printed_novcs && $email_git) {
1759	warn("$P: No supported VCS found.  Add --nogit to options?\n");
1760	warn("Using a git repository produces better results.\n");
1761	warn("Try Linus Torvalds' latest git repository using:\n");
1762	warn("git clone git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git\n");
1763	$printed_novcs = 1;
1764    }
1765    return 0;
1766}
1767
1768sub vcs_is_git {
1769    vcs_exists();
1770    return $vcs_used == 1;
1771}
1772
1773sub vcs_is_hg {
1774    return $vcs_used == 2;
1775}
1776
1777sub vcs_add_commit_signers {
1778    return if (!vcs_exists());
1779
1780    my ($commit, $desc) = @_;
1781    my $commit_count = 0;
1782    my $commit_authors_ref;
1783    my $commit_signers_ref;
1784    my $stats_ref;
1785    my @commit_authors = ();
1786    my @commit_signers = ();
1787    my $cmd;
1788
1789    $cmd = $VCS_cmds{"find_commit_signers_cmd"};
1790    $cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
1791
1792    ($commit_count, $commit_signers_ref, $commit_authors_ref, $stats_ref) = vcs_find_signers($cmd, "");
1793    @commit_authors = @{$commit_authors_ref} if defined $commit_authors_ref;
1794    @commit_signers = @{$commit_signers_ref} if defined $commit_signers_ref;
1795
1796    foreach my $signer (@commit_signers) {
1797	$signer = deduplicate_email($signer);
1798    }
1799
1800    vcs_assign($desc, 1, @commit_signers);
1801}
1802
1803sub interactive_get_maintainers {
1804    my ($list_ref) = @_;
1805    my @list = @$list_ref;
1806
1807    vcs_exists();
1808
1809    my %selected;
1810    my %authored;
1811    my %signed;
1812    my $count = 0;
1813    my $maintained = 0;
1814    foreach my $entry (@list) {
1815	$maintained = 1 if ($entry->[1] =~ /^(maintainer|supporter)/i);
1816	$selected{$count} = 1;
1817	$authored{$count} = 0;
1818	$signed{$count} = 0;
1819	$count++;
1820    }
1821
1822    #menu loop
1823    my $done = 0;
1824    my $print_options = 0;
1825    my $redraw = 1;
1826    while (!$done) {
1827	$count = 0;
1828	if ($redraw) {
1829	    printf STDERR "\n%1s %2s %-65s",
1830			  "*", "#", "email/list and role:stats";
1831	    if ($email_git ||
1832		($email_git_fallback && !$maintained) ||
1833		$email_git_blame) {
1834		print STDERR "auth sign";
1835	    }
1836	    print STDERR "\n";
1837	    foreach my $entry (@list) {
1838		my $email = $entry->[0];
1839		my $role = $entry->[1];
1840		my $sel = "";
1841		$sel = "*" if ($selected{$count});
1842		my $commit_author = $commit_author_hash{$email};
1843		my $commit_signer = $commit_signer_hash{$email};
1844		my $authored = 0;
1845		my $signed = 0;
1846		$authored++ for (@{$commit_author});
1847		$signed++ for (@{$commit_signer});
1848		printf STDERR "%1s %2d %-65s", $sel, $count + 1, $email;
1849		printf STDERR "%4d %4d", $authored, $signed
1850		    if ($authored > 0 || $signed > 0);
1851		printf STDERR "\n     %s\n", $role;
1852		if ($authored{$count}) {
1853		    my $commit_author = $commit_author_hash{$email};
1854		    foreach my $ref (@{$commit_author}) {
1855			print STDERR "     Author: @{$ref}[1]\n";
1856		    }
1857		}
1858		if ($signed{$count}) {
1859		    my $commit_signer = $commit_signer_hash{$email};
1860		    foreach my $ref (@{$commit_signer}) {
1861			print STDERR "     @{$ref}[2]: @{$ref}[1]\n";
1862		    }
1863		}
1864
1865		$count++;
1866	    }
1867	}
1868	my $date_ref = \$email_git_since;
1869	$date_ref = \$email_hg_since if (vcs_is_hg());
1870	if ($print_options) {
1871	    $print_options = 0;
1872	    if (vcs_exists()) {
1873		print STDERR <<EOT
1874
1875Version Control options:
1876g  use git history      [$email_git]
1877gf use git-fallback     [$email_git_fallback]
1878b  use git blame        [$email_git_blame]
1879bs use blame signatures [$email_git_blame_signatures]
1880c# minimum commits      [$email_git_min_signatures]
1881%# min percent          [$email_git_min_percent]
1882d# history to use       [$$date_ref]
1883x# max maintainers      [$email_git_max_maintainers]
1884t  all signature types  [$email_git_all_signature_types]
1885m  use .mailmap         [$email_use_mailmap]
1886EOT
1887	    }
1888	    print STDERR <<EOT
1889
1890Additional options:
18910  toggle all
1892tm toggle maintainers
1893tg toggle git entries
1894tl toggle open list entries
1895ts toggle subscriber list entries
1896f  emails in file       [$email_file_emails]
1897k  keywords in file     [$keywords]
1898r  remove duplicates    [$email_remove_duplicates]
1899p# pattern match depth  [$pattern_depth]
1900EOT
1901	}
1902	print STDERR
1903"\n#(toggle), A#(author), S#(signed) *(all), ^(none), O(options), Y(approve): ";
1904
1905	my $input = <STDIN>;
1906	chomp($input);
1907
1908	$redraw = 1;
1909	my $rerun = 0;
1910	my @wish = split(/[, ]+/, $input);
1911	foreach my $nr (@wish) {
1912	    $nr = lc($nr);
1913	    my $sel = substr($nr, 0, 1);
1914	    my $str = substr($nr, 1);
1915	    my $val = 0;
1916	    $val = $1 if $str =~ /^(\d+)$/;
1917
1918	    if ($sel eq "y") {
1919		$interactive = 0;
1920		$done = 1;
1921		$output_rolestats = 0;
1922		$output_roles = 0;
1923		$output_substatus = 0;
1924		last;
1925	    } elsif ($nr =~ /^\d+$/ && $nr > 0 && $nr <= $count) {
1926		$selected{$nr - 1} = !$selected{$nr - 1};
1927	    } elsif ($sel eq "*" || $sel eq '^') {
1928		my $toggle = 0;
1929		$toggle = 1 if ($sel eq '*');
1930		for (my $i = 0; $i < $count; $i++) {
1931		    $selected{$i} = $toggle;
1932		}
1933	    } elsif ($sel eq "0") {
1934		for (my $i = 0; $i < $count; $i++) {
1935		    $selected{$i} = !$selected{$i};
1936		}
1937	    } elsif ($sel eq "t") {
1938		if (lc($str) eq "m") {
1939		    for (my $i = 0; $i < $count; $i++) {
1940			$selected{$i} = !$selected{$i}
1941			    if ($list[$i]->[1] =~ /^(maintainer|supporter)/i);
1942		    }
1943		} elsif (lc($str) eq "g") {
1944		    for (my $i = 0; $i < $count; $i++) {
1945			$selected{$i} = !$selected{$i}
1946			    if ($list[$i]->[1] =~ /^(author|commit|signer)/i);
1947		    }
1948		} elsif (lc($str) eq "l") {
1949		    for (my $i = 0; $i < $count; $i++) {
1950			$selected{$i} = !$selected{$i}
1951			    if ($list[$i]->[1] =~ /^(open list)/i);
1952		    }
1953		} elsif (lc($str) eq "s") {
1954		    for (my $i = 0; $i < $count; $i++) {
1955			$selected{$i} = !$selected{$i}
1956			    if ($list[$i]->[1] =~ /^(subscriber list)/i);
1957		    }
1958		}
1959	    } elsif ($sel eq "a") {
1960		if ($val > 0 && $val <= $count) {
1961		    $authored{$val - 1} = !$authored{$val - 1};
1962		} elsif ($str eq '*' || $str eq '^') {
1963		    my $toggle = 0;
1964		    $toggle = 1 if ($str eq '*');
1965		    for (my $i = 0; $i < $count; $i++) {
1966			$authored{$i} = $toggle;
1967		    }
1968		}
1969	    } elsif ($sel eq "s") {
1970		if ($val > 0 && $val <= $count) {
1971		    $signed{$val - 1} = !$signed{$val - 1};
1972		} elsif ($str eq '*' || $str eq '^') {
1973		    my $toggle = 0;
1974		    $toggle = 1 if ($str eq '*');
1975		    for (my $i = 0; $i < $count; $i++) {
1976			$signed{$i} = $toggle;
1977		    }
1978		}
1979	    } elsif ($sel eq "o") {
1980		$print_options = 1;
1981		$redraw = 1;
1982	    } elsif ($sel eq "g") {
1983		if ($str eq "f") {
1984		    bool_invert(\$email_git_fallback);
1985		} else {
1986		    bool_invert(\$email_git);
1987		}
1988		$rerun = 1;
1989	    } elsif ($sel eq "b") {
1990		if ($str eq "s") {
1991		    bool_invert(\$email_git_blame_signatures);
1992		} else {
1993		    bool_invert(\$email_git_blame);
1994		}
1995		$rerun = 1;
1996	    } elsif ($sel eq "c") {
1997		if ($val > 0) {
1998		    $email_git_min_signatures = $val;
1999		    $rerun = 1;
2000		}
2001	    } elsif ($sel eq "x") {
2002		if ($val > 0) {
2003		    $email_git_max_maintainers = $val;
2004		    $rerun = 1;
2005		}
2006	    } elsif ($sel eq "%") {
2007		if ($str ne "" && $val >= 0) {
2008		    $email_git_min_percent = $val;
2009		    $rerun = 1;
2010		}
2011	    } elsif ($sel eq "d") {
2012		if (vcs_is_git()) {
2013		    $email_git_since = $str;
2014		} elsif (vcs_is_hg()) {
2015		    $email_hg_since = $str;
2016		}
2017		$rerun = 1;
2018	    } elsif ($sel eq "t") {
2019		bool_invert(\$email_git_all_signature_types);
2020		$rerun = 1;
2021	    } elsif ($sel eq "f") {
2022		bool_invert(\$email_file_emails);
2023		$rerun = 1;
2024	    } elsif ($sel eq "r") {
2025		bool_invert(\$email_remove_duplicates);
2026		$rerun = 1;
2027	    } elsif ($sel eq "m") {
2028		bool_invert(\$email_use_mailmap);
2029		read_mailmap();
2030		$rerun = 1;
2031	    } elsif ($sel eq "k") {
2032		bool_invert(\$keywords);
2033		$rerun = 1;
2034	    } elsif ($sel eq "p") {
2035		if ($str ne "" && $val >= 0) {
2036		    $pattern_depth = $val;
2037		    $rerun = 1;
2038		}
2039	    } elsif ($sel eq "h" || $sel eq "?") {
2040		print STDERR <<EOT
2041
2042Interactive mode allows you to select the various maintainers, submitters,
2043commit signers and mailing lists that could be CC'd on a patch.
2044
2045Any *'d entry is selected.
2046
2047If you have git or hg installed, you can choose to summarize the commit
2048history of files in the patch.  Also, each line of the current file can
2049be matched to its commit author and that commits signers with blame.
2050
2051Various knobs exist to control the length of time for active commit
2052tracking, the maximum number of commit authors and signers to add,
2053and such.
2054
2055Enter selections at the prompt until you are satisfied that the selected
2056maintainers are appropriate.  You may enter multiple selections separated
2057by either commas or spaces.
2058
2059EOT
2060	    } else {
2061		print STDERR "invalid option: '$nr'\n";
2062		$redraw = 0;
2063	    }
2064	}
2065	if ($rerun) {
2066	    print STDERR "git-blame can be very slow, please have patience..."
2067		if ($email_git_blame);
2068	    goto &get_maintainers;
2069	}
2070    }
2071
2072    #drop not selected entries
2073    $count = 0;
2074    my @new_emailto = ();
2075    foreach my $entry (@list) {
2076	if ($selected{$count}) {
2077	    push(@new_emailto, $list[$count]);
2078	}
2079	$count++;
2080    }
2081    return @new_emailto;
2082}
2083
2084sub bool_invert {
2085    my ($bool_ref) = @_;
2086
2087    if ($$bool_ref) {
2088	$$bool_ref = 0;
2089    } else {
2090	$$bool_ref = 1;
2091    }
2092}
2093
2094sub deduplicate_email {
2095    my ($email) = @_;
2096
2097    my $matched = 0;
2098    my ($name, $address) = parse_email($email);
2099    $email = format_email($name, $address, 1);
2100    $email = mailmap_email($email);
2101
2102    return $email if (!$email_remove_duplicates);
2103
2104    ($name, $address) = parse_email($email);
2105
2106    if ($name ne "" && $deduplicate_name_hash{lc($name)}) {
2107	$name = $deduplicate_name_hash{lc($name)}->[0];
2108	$address = $deduplicate_name_hash{lc($name)}->[1];
2109	$matched = 1;
2110    } elsif ($deduplicate_address_hash{lc($address)}) {
2111	$name = $deduplicate_address_hash{lc($address)}->[0];
2112	$address = $deduplicate_address_hash{lc($address)}->[1];
2113	$matched = 1;
2114    }
2115    if (!$matched) {
2116	$deduplicate_name_hash{lc($name)} = [ $name, $address ];
2117	$deduplicate_address_hash{lc($address)} = [ $name, $address ];
2118    }
2119    $email = format_email($name, $address, 1);
2120    $email = mailmap_email($email);
2121    return $email;
2122}
2123
2124sub save_commits_by_author {
2125    my (@lines) = @_;
2126
2127    my @authors = ();
2128    my @commits = ();
2129    my @subjects = ();
2130
2131    foreach my $line (@lines) {
2132	if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
2133	    my $author = $1;
2134	    $author = deduplicate_email($author);
2135	    push(@authors, $author);
2136	}
2137	push(@commits, $1) if ($line =~ m/$VCS_cmds{"commit_pattern"}/);
2138	push(@subjects, $1) if ($line =~ m/$VCS_cmds{"subject_pattern"}/);
2139    }
2140
2141    for (my $i = 0; $i < @authors; $i++) {
2142	my $exists = 0;
2143	foreach my $ref(@{$commit_author_hash{$authors[$i]}}) {
2144	    if (@{$ref}[0] eq $commits[$i] &&
2145		@{$ref}[1] eq $subjects[$i]) {
2146		$exists = 1;
2147		last;
2148	    }
2149	}
2150	if (!$exists) {
2151	    push(@{$commit_author_hash{$authors[$i]}},
2152		 [ ($commits[$i], $subjects[$i]) ]);
2153	}
2154    }
2155}
2156
2157sub save_commits_by_signer {
2158    my (@lines) = @_;
2159
2160    my $commit = "";
2161    my $subject = "";
2162
2163    foreach my $line (@lines) {
2164	$commit = $1 if ($line =~ m/$VCS_cmds{"commit_pattern"}/);
2165	$subject = $1 if ($line =~ m/$VCS_cmds{"subject_pattern"}/);
2166	if ($line =~ /^[ \t]*${signature_pattern}.*\@.*$/) {
2167	    my @signatures = ($line);
2168	    my ($types_ref, $signers_ref) = extract_formatted_signatures(@signatures);
2169	    my @types = @$types_ref;
2170	    my @signers = @$signers_ref;
2171
2172	    my $type = $types[0];
2173	    my $signer = $signers[0];
2174
2175	    $signer = deduplicate_email($signer);
2176
2177	    my $exists = 0;
2178	    foreach my $ref(@{$commit_signer_hash{$signer}}) {
2179		if (@{$ref}[0] eq $commit &&
2180		    @{$ref}[1] eq $subject &&
2181		    @{$ref}[2] eq $type) {
2182		    $exists = 1;
2183		    last;
2184		}
2185	    }
2186	    if (!$exists) {
2187		push(@{$commit_signer_hash{$signer}},
2188		     [ ($commit, $subject, $type) ]);
2189	    }
2190	}
2191    }
2192}
2193
2194sub vcs_assign {
2195    my ($role, $divisor, @lines) = @_;
2196
2197    my %hash;
2198    my $count = 0;
2199
2200    return if (@lines <= 0);
2201
2202    if ($divisor <= 0) {
2203	warn("Bad divisor in " . (caller(0))[3] . ": $divisor\n");
2204	$divisor = 1;
2205    }
2206
2207    @lines = mailmap(@lines);
2208
2209    return if (@lines <= 0);
2210
2211    @lines = sort(@lines);
2212
2213    # uniq -c
2214    $hash{$_}++ for @lines;
2215
2216    # sort -rn
2217    foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
2218	my $sign_offs = $hash{$line};
2219	my $percent = $sign_offs * 100 / $divisor;
2220
2221	$percent = 100 if ($percent > 100);
2222	next if (ignore_email_address($line));
2223	$count++;
2224	last if ($sign_offs < $email_git_min_signatures ||
2225		 $count > $email_git_max_maintainers ||
2226		 $percent < $email_git_min_percent);
2227	push_email_address($line, '');
2228	if ($output_rolestats) {
2229	    my $fmt_percent = sprintf("%.0f", $percent);
2230	    add_role($line, "$role:$sign_offs/$divisor=$fmt_percent%");
2231	} else {
2232	    add_role($line, $role);
2233	}
2234    }
2235}
2236
2237sub vcs_file_signoffs {
2238    my ($file) = @_;
2239
2240    my $authors_ref;
2241    my $signers_ref;
2242    my $stats_ref;
2243    my @authors = ();
2244    my @signers = ();
2245    my @stats = ();
2246    my $commits;
2247
2248    $vcs_used = vcs_exists();
2249    return if (!$vcs_used);
2250
2251    my $cmd = $VCS_cmds{"find_signers_cmd"};
2252    $cmd =~ s/(\$\w+)/$1/eeg;		# interpolate $cmd
2253
2254    ($commits, $signers_ref, $authors_ref, $stats_ref) = vcs_find_signers($cmd, $file);
2255
2256    @signers = @{$signers_ref} if defined $signers_ref;
2257    @authors = @{$authors_ref} if defined $authors_ref;
2258    @stats = @{$stats_ref} if defined $stats_ref;
2259
2260#    print("commits: <$commits>\nsigners:<@signers>\nauthors: <@authors>\nstats: <@stats>\n");
2261
2262    foreach my $signer (@signers) {
2263	$signer = deduplicate_email($signer);
2264    }
2265
2266    vcs_assign("commit_signer", $commits, @signers);
2267    vcs_assign("authored", $commits, @authors);
2268    if ($#authors == $#stats) {
2269	my $stat_pattern = $VCS_cmds{"stat_pattern"};
2270	$stat_pattern =~ s/(\$\w+)/$1/eeg;	#interpolate $stat_pattern
2271
2272	my $added = 0;
2273	my $deleted = 0;
2274	for (my $i = 0; $i <= $#stats; $i++) {
2275	    if ($stats[$i] =~ /$stat_pattern/) {
2276		$added += $1;
2277		$deleted += $2;
2278	    }
2279	}
2280	my @tmp_authors = uniq(@authors);
2281	foreach my $author (@tmp_authors) {
2282	    $author = deduplicate_email($author);
2283	}
2284	@tmp_authors = uniq(@tmp_authors);
2285	my @list_added = ();
2286	my @list_deleted = ();
2287	foreach my $author (@tmp_authors) {
2288	    my $auth_added = 0;
2289	    my $auth_deleted = 0;
2290	    for (my $i = 0; $i <= $#stats; $i++) {
2291		if ($author eq deduplicate_email($authors[$i]) &&
2292		    $stats[$i] =~ /$stat_pattern/) {
2293		    $auth_added += $1;
2294		    $auth_deleted += $2;
2295		}
2296	    }
2297	    for (my $i = 0; $i < $auth_added; $i++) {
2298		push(@list_added, $author);
2299	    }
2300	    for (my $i = 0; $i < $auth_deleted; $i++) {
2301		push(@list_deleted, $author);
2302	    }
2303	}
2304	vcs_assign("added_lines", $added, @list_added);
2305	vcs_assign("removed_lines", $deleted, @list_deleted);
2306    }
2307}
2308
2309sub vcs_file_blame {
2310    my ($file) = @_;
2311
2312    my @signers = ();
2313    my @all_commits = ();
2314    my @commits = ();
2315    my $total_commits;
2316    my $total_lines;
2317
2318    $vcs_used = vcs_exists();
2319    return if (!$vcs_used);
2320
2321    @all_commits = vcs_blame($file);
2322    @commits = uniq(@all_commits);
2323    $total_commits = @commits;
2324    $total_lines = @all_commits;
2325
2326    if ($email_git_blame_signatures) {
2327	if (vcs_is_hg()) {
2328	    my $commit_count;
2329	    my $commit_authors_ref;
2330	    my $commit_signers_ref;
2331	    my $stats_ref;
2332	    my @commit_authors = ();
2333	    my @commit_signers = ();
2334	    my $commit = join(" -r ", @commits);
2335	    my $cmd;
2336
2337	    $cmd = $VCS_cmds{"find_commit_signers_cmd"};
2338	    $cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
2339
2340	    ($commit_count, $commit_signers_ref, $commit_authors_ref, $stats_ref) = vcs_find_signers($cmd, $file);
2341	    @commit_authors = @{$commit_authors_ref} if defined $commit_authors_ref;
2342	    @commit_signers = @{$commit_signers_ref} if defined $commit_signers_ref;
2343
2344	    push(@signers, @commit_signers);
2345	} else {
2346	    foreach my $commit (@commits) {
2347		my $commit_count;
2348		my $commit_authors_ref;
2349		my $commit_signers_ref;
2350		my $stats_ref;
2351		my @commit_authors = ();
2352		my @commit_signers = ();
2353		my $cmd;
2354
2355		$cmd = $VCS_cmds{"find_commit_signers_cmd"};
2356		$cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
2357
2358		($commit_count, $commit_signers_ref, $commit_authors_ref, $stats_ref) = vcs_find_signers($cmd, $file);
2359		@commit_authors = @{$commit_authors_ref} if defined $commit_authors_ref;
2360		@commit_signers = @{$commit_signers_ref} if defined $commit_signers_ref;
2361
2362		push(@signers, @commit_signers);
2363	    }
2364	}
2365    }
2366
2367    if ($from_filename) {
2368	if ($output_rolestats) {
2369	    my @blame_signers;
2370	    if (vcs_is_hg()) {{		# Double brace for last exit
2371		my $commit_count;
2372		my @commit_signers = ();
2373		@commits = uniq(@commits);
2374		@commits = sort(@commits);
2375		my $commit = join(" -r ", @commits);
2376		my $cmd;
2377
2378		$cmd = $VCS_cmds{"find_commit_author_cmd"};
2379		$cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
2380
2381		my @lines = ();
2382
2383		@lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
2384
2385		if (!$email_git_penguin_chiefs) {
2386		    @lines = grep(!/${penguin_chiefs}/i, @lines);
2387		}
2388
2389		last if !@lines;
2390
2391		my @authors = ();
2392		foreach my $line (@lines) {
2393		    if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
2394			my $author = $1;
2395			$author = deduplicate_email($author);
2396			push(@authors, $author);
2397		    }
2398		}
2399
2400		save_commits_by_author(@lines) if ($interactive);
2401		save_commits_by_signer(@lines) if ($interactive);
2402
2403		push(@signers, @authors);
2404	    }}
2405	    else {
2406		foreach my $commit (@commits) {
2407		    my $i;
2408		    my $cmd = $VCS_cmds{"find_commit_author_cmd"};
2409		    $cmd =~ s/(\$\w+)/$1/eeg;	#interpolate $cmd
2410		    my @author = vcs_find_author($cmd);
2411		    next if !@author;
2412
2413		    my $formatted_author = deduplicate_email($author[0]);
2414
2415		    my $count = grep(/$commit/, @all_commits);
2416		    for ($i = 0; $i < $count ; $i++) {
2417			push(@blame_signers, $formatted_author);
2418		    }
2419		}
2420	    }
2421	    if (@blame_signers) {
2422		vcs_assign("authored lines", $total_lines, @blame_signers);
2423	    }
2424	}
2425	foreach my $signer (@signers) {
2426	    $signer = deduplicate_email($signer);
2427	}
2428	vcs_assign("commits", $total_commits, @signers);
2429    } else {
2430	foreach my $signer (@signers) {
2431	    $signer = deduplicate_email($signer);
2432	}
2433	vcs_assign("modified commits", $total_commits, @signers);
2434    }
2435}
2436
2437sub vcs_file_exists {
2438    my ($file) = @_;
2439
2440    my $exists;
2441
2442    my $vcs_used = vcs_exists();
2443    return 0 if (!$vcs_used);
2444
2445    my $cmd = $VCS_cmds{"file_exists_cmd"};
2446    $cmd =~ s/(\$\w+)/$1/eeg;		# interpolate $cmd
2447    $cmd .= " 2>&1";
2448    $exists = &{$VCS_cmds{"execute_cmd"}}($cmd);
2449
2450    return 0 if ($? != 0);
2451
2452    return $exists;
2453}
2454
2455sub vcs_list_files {
2456    my ($file) = @_;
2457
2458    my @lsfiles = ();
2459
2460    my $vcs_used = vcs_exists();
2461    return 0 if (!$vcs_used);
2462
2463    my $cmd = $VCS_cmds{"list_files_cmd"};
2464    $cmd =~ s/(\$\w+)/$1/eeg;   # interpolate $cmd
2465    @lsfiles = &{$VCS_cmds{"execute_cmd"}}($cmd);
2466
2467    return () if ($? != 0);
2468
2469    return @lsfiles;
2470}
2471
2472sub uniq {
2473    my (@parms) = @_;
2474
2475    my %saw;
2476    @parms = grep(!$saw{$_}++, @parms);
2477    return @parms;
2478}
2479
2480sub sort_and_uniq {
2481    my (@parms) = @_;
2482
2483    my %saw;
2484    @parms = sort @parms;
2485    @parms = grep(!$saw{$_}++, @parms);
2486    return @parms;
2487}
2488
2489sub clean_file_emails {
2490    my (@file_emails) = @_;
2491    my @fmt_emails = ();
2492
2493    foreach my $email (@file_emails) {
2494	$email =~ s/[\(\<\{]{0,1}([A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+)[\)\>\}]{0,1}/\<$1\>/g;
2495	my ($name, $address) = parse_email($email);
2496
2497	# Strip quotes for easier processing, format_email will add them back
2498	$name =~ s/^"(.*)"$/$1/;
2499
2500	# Split into name-like parts and remove stray punctuation particles
2501	my @nw = split(/[^\p{L}\'\,\.\+-]/, $name);
2502	@nw = grep(!/^[\'\,\.\+-]$/, @nw);
2503
2504	# Make a best effort to extract the name, and only the name, by taking
2505	# only the last two names, or in the case of obvious initials, the last
2506	# three names.
2507	if (@nw > 2) {
2508	    my $first = $nw[@nw - 3];
2509	    my $middle = $nw[@nw - 2];
2510	    my $last = $nw[@nw - 1];
2511
2512	    if (((length($first) == 1 && $first =~ m/\p{L}/) ||
2513		 (length($first) == 2 && substr($first, -1) eq ".")) ||
2514		(length($middle) == 1 ||
2515		 (length($middle) == 2 && substr($middle, -1) eq "."))) {
2516		$name = "$first $middle $last";
2517	    } else {
2518		$name = "$middle $last";
2519	    }
2520	} else {
2521	    $name = "@nw";
2522	}
2523
2524	if (substr($name, -1) =~ /[,\.]/) {
2525	    $name = substr($name, 0, length($name) - 1);
2526	}
2527
2528	if (substr($name, 0, 1) =~ /[,\.]/) {
2529	    $name = substr($name, 1, length($name) - 1);
2530	}
2531
2532	my $fmt_email = format_email($name, $address, $email_usename);
2533	push(@fmt_emails, $fmt_email);
2534    }
2535    return @fmt_emails;
2536}
2537
2538sub merge_email {
2539    my @lines;
2540    my %saw;
2541
2542    for (@_) {
2543	my ($address, $role) = @$_;
2544	if (!$saw{$address}) {
2545	    if ($output_roles) {
2546		push(@lines, "$address ($role)");
2547	    } else {
2548		push(@lines, $address);
2549	    }
2550	    $saw{$address} = 1;
2551	}
2552    }
2553
2554    return @lines;
2555}
2556
2557sub output {
2558    my (@parms) = @_;
2559
2560    if ($output_multiline) {
2561	foreach my $line (@parms) {
2562	    print("${line}\n");
2563	}
2564    } else {
2565	print(join($output_separator, @parms));
2566	print("\n");
2567    }
2568}
2569
2570my $rfc822re;
2571
2572sub make_rfc822re {
2573#   Basic lexical tokens are specials, domain_literal, quoted_string, atom, and
2574#   comment.  We must allow for rfc822_lwsp (or comments) after each of these.
2575#   This regexp will only work on addresses which have had comments stripped
2576#   and replaced with rfc822_lwsp.
2577
2578    my $specials = '()<>@,;:\\\\".\\[\\]';
2579    my $controls = '\\000-\\037\\177';
2580
2581    my $dtext = "[^\\[\\]\\r\\\\]";
2582    my $domain_literal = "\\[(?:$dtext|\\\\.)*\\]$rfc822_lwsp*";
2583
2584    my $quoted_string = "\"(?:[^\\\"\\r\\\\]|\\\\.|$rfc822_lwsp)*\"$rfc822_lwsp*";
2585
2586#   Use zero-width assertion to spot the limit of an atom.  A simple
2587#   $rfc822_lwsp* causes the regexp engine to hang occasionally.
2588    my $atom = "[^$specials $controls]+(?:$rfc822_lwsp+|\\Z|(?=[\\[\"$specials]))";
2589    my $word = "(?:$atom|$quoted_string)";
2590    my $localpart = "$word(?:\\.$rfc822_lwsp*$word)*";
2591
2592    my $sub_domain = "(?:$atom|$domain_literal)";
2593    my $domain = "$sub_domain(?:\\.$rfc822_lwsp*$sub_domain)*";
2594
2595    my $addr_spec = "$localpart\@$rfc822_lwsp*$domain";
2596
2597    my $phrase = "$word*";
2598    my $route = "(?:\@$domain(?:,\@$rfc822_lwsp*$domain)*:$rfc822_lwsp*)";
2599    my $route_addr = "\\<$rfc822_lwsp*$route?$addr_spec\\>$rfc822_lwsp*";
2600    my $mailbox = "(?:$addr_spec|$phrase$route_addr)";
2601
2602    my $group = "$phrase:$rfc822_lwsp*(?:$mailbox(?:,\\s*$mailbox)*)?;\\s*";
2603    my $address = "(?:$mailbox|$group)";
2604
2605    return "$rfc822_lwsp*$address";
2606}
2607
2608sub rfc822_strip_comments {
2609    my $s = shift;
2610#   Recursively remove comments, and replace with a single space.  The simpler
2611#   regexps in the Email Addressing FAQ are imperfect - they will miss escaped
2612#   chars in atoms, for example.
2613
2614    while ($s =~ s/^((?:[^"\\]|\\.)*
2615                    (?:"(?:[^"\\]|\\.)*"(?:[^"\\]|\\.)*)*)
2616                    \((?:[^()\\]|\\.)*\)/$1 /osx) {}
2617    return $s;
2618}
2619
2620#   valid: returns true if the parameter is an RFC822 valid address
2621#
2622sub rfc822_valid {
2623    my $s = rfc822_strip_comments(shift);
2624
2625    if (!$rfc822re) {
2626        $rfc822re = make_rfc822re();
2627    }
2628
2629    return $s =~ m/^$rfc822re$/so && $s =~ m/^$rfc822_char*$/;
2630}
2631
2632#   validlist: In scalar context, returns true if the parameter is an RFC822
2633#              valid list of addresses.
2634#
2635#              In list context, returns an empty list on failure (an invalid
2636#              address was found); otherwise a list whose first element is the
2637#              number of addresses found and whose remaining elements are the
2638#              addresses.  This is needed to disambiguate failure (invalid)
2639#              from success with no addresses found, because an empty string is
2640#              a valid list.
2641
2642sub rfc822_validlist {
2643    my $s = rfc822_strip_comments(shift);
2644
2645    if (!$rfc822re) {
2646        $rfc822re = make_rfc822re();
2647    }
2648    # * null list items are valid according to the RFC
2649    # * the '1' business is to aid in distinguishing failure from no results
2650
2651    my @r;
2652    if ($s =~ m/^(?:$rfc822re)?(?:,(?:$rfc822re)?)*$/so &&
2653	$s =~ m/^$rfc822_char*$/) {
2654        while ($s =~ m/(?:^|,$rfc822_lwsp*)($rfc822re)/gos) {
2655            push(@r, $1);
2656        }
2657        return wantarray ? (scalar(@r), @r) : 1;
2658    }
2659    return wantarray ? () : 0;
2660}
2661