xref: /linux/scripts/get_maintainer.pl (revision 273b281fa22c293963ee3e6eec418f5dda2dbc83)
1#!/usr/bin/perl -w
2# (c) 2007, Joe Perches <joe@perches.com>
3#           created from checkpatch.pl
4#
5# Print selected MAINTAINERS information for
6# the files modified in a patch or for a file
7#
8# usage: perl scripts/get_maintainer.pl [OPTIONS] <patch>
9#        perl scripts/get_maintainer.pl [OPTIONS] -f <file>
10#
11# Licensed under the terms of the GNU GPL License version 2
12
13use strict;
14
15my $P = $0;
16my $V = '0.21';
17
18use Getopt::Long qw(:config no_auto_abbrev);
19
20my $lk_path = "./";
21my $email = 1;
22my $email_usename = 1;
23my $email_maintainer = 1;
24my $email_list = 1;
25my $email_subscriber_list = 0;
26my $email_git = 1;
27my $email_git_penguin_chiefs = 0;
28my $email_git_min_signatures = 1;
29my $email_git_max_maintainers = 5;
30my $email_git_min_percent = 5;
31my $email_git_since = "1-year-ago";
32my $email_git_blame = 0;
33my $email_remove_duplicates = 1;
34my $output_multiline = 1;
35my $output_separator = ", ";
36my $scm = 0;
37my $web = 0;
38my $subsystem = 0;
39my $status = 0;
40my $keywords = 1;
41my $from_filename = 0;
42my $pattern_depth = 0;
43my $version = 0;
44my $help = 0;
45
46my $exit = 0;
47
48my @penguin_chief = ();
49push(@penguin_chief,"Linus Torvalds:torvalds\@linux-foundation.org");
50#Andrew wants in on most everything - 2009/01/14
51#push(@penguin_chief,"Andrew Morton:akpm\@linux-foundation.org");
52
53my @penguin_chief_names = ();
54foreach my $chief (@penguin_chief) {
55    if ($chief =~ m/^(.*):(.*)/) {
56	my $chief_name = $1;
57	my $chief_addr = $2;
58	push(@penguin_chief_names, $chief_name);
59    }
60}
61my $penguin_chiefs = "\(" . join("|",@penguin_chief_names) . "\)";
62
63# rfc822 email address - preloaded methods go here.
64my $rfc822_lwsp = "(?:(?:\\r\\n)?[ \\t])";
65my $rfc822_char = '[\\000-\\377]';
66
67if (!GetOptions(
68		'email!' => \$email,
69		'git!' => \$email_git,
70		'git-chief-penguins!' => \$email_git_penguin_chiefs,
71		'git-min-signatures=i' => \$email_git_min_signatures,
72		'git-max-maintainers=i' => \$email_git_max_maintainers,
73		'git-min-percent=i' => \$email_git_min_percent,
74		'git-since=s' => \$email_git_since,
75		'git-blame!' => \$email_git_blame,
76		'remove-duplicates!' => \$email_remove_duplicates,
77		'm!' => \$email_maintainer,
78		'n!' => \$email_usename,
79		'l!' => \$email_list,
80		's!' => \$email_subscriber_list,
81		'multiline!' => \$output_multiline,
82		'separator=s' => \$output_separator,
83		'subsystem!' => \$subsystem,
84		'status!' => \$status,
85		'scm!' => \$scm,
86		'web!' => \$web,
87		'pattern-depth=i' => \$pattern_depth,
88		'k|keywords!' => \$keywords,
89		'f|file' => \$from_filename,
90		'v|version' => \$version,
91		'h|help' => \$help,
92		)) {
93    usage();
94    die "$P: invalid argument\n";
95}
96
97if ($help != 0) {
98    usage();
99    exit 0;
100}
101
102if ($version != 0) {
103    print("${P} ${V}\n");
104    exit 0;
105}
106
107if ($#ARGV < 0) {
108    usage();
109    die "$P: argument missing: patchfile or -f file please\n";
110}
111
112if ($output_separator ne ", ") {
113    $output_multiline = 0;
114}
115
116my $selections = $email + $scm + $status + $subsystem + $web;
117if ($selections == 0) {
118    usage();
119    die "$P:  Missing required option: email, scm, status, subsystem or web\n";
120}
121
122if ($email &&
123    ($email_maintainer + $email_list + $email_subscriber_list +
124     $email_git + $email_git_penguin_chiefs + $email_git_blame) == 0) {
125    usage();
126    die "$P: Please select at least 1 email option\n";
127}
128
129if (!top_of_kernel_tree($lk_path)) {
130    die "$P: The current directory does not appear to be "
131	. "a linux kernel source tree.\n";
132}
133
134## Read MAINTAINERS for type/value pairs
135
136my @typevalue = ();
137my %keyword_hash;
138
139open(MAINT, "<${lk_path}MAINTAINERS") || die "$P: Can't open MAINTAINERS\n";
140while (<MAINT>) {
141    my $line = $_;
142
143    if ($line =~ m/^(\C):\s*(.*)/) {
144	my $type = $1;
145	my $value = $2;
146
147	##Filename pattern matching
148	if ($type eq "F" || $type eq "X") {
149	    $value =~ s@\.@\\\.@g;       ##Convert . to \.
150	    $value =~ s/\*/\.\*/g;       ##Convert * to .*
151	    $value =~ s/\?/\./g;         ##Convert ? to .
152	    ##if pattern is a directory and it lacks a trailing slash, add one
153	    if ((-d $value)) {
154		$value =~ s@([^/])$@$1/@;
155	    }
156	} elsif ($type eq "K") {
157	    $keyword_hash{@typevalue} = $value;
158	}
159	push(@typevalue, "$type:$value");
160    } elsif (!/^(\s)*$/) {
161	$line =~ s/\n$//g;
162	push(@typevalue, $line);
163    }
164}
165close(MAINT);
166
167my %mailmap;
168
169if ($email_remove_duplicates) {
170    open(MAILMAP, "<${lk_path}.mailmap") || warn "$P: Can't open .mailmap\n";
171    while (<MAILMAP>) {
172	my $line = $_;
173
174	next if ($line =~ m/^\s*#/);
175	next if ($line =~ m/^\s*$/);
176
177	my ($name, $address) = parse_email($line);
178	$line = format_email($name, $address);
179
180	next if ($line =~ m/^\s*$/);
181
182	if (exists($mailmap{$name})) {
183	    my $obj = $mailmap{$name};
184	    push(@$obj, $address);
185	} else {
186	    my @arr = ($address);
187	    $mailmap{$name} = \@arr;
188	}
189    }
190    close(MAILMAP);
191}
192
193## use the filenames on the command line or find the filenames in the patchfiles
194
195my @files = ();
196my @range = ();
197my @keyword_tvi = ();
198
199foreach my $file (@ARGV) {
200    ##if $file is a directory and it lacks a trailing slash, add one
201    if ((-d $file)) {
202	$file =~ s@([^/])$@$1/@;
203    } elsif (!(-f $file)) {
204	die "$P: file '${file}' not found\n";
205    }
206    if ($from_filename) {
207	push(@files, $file);
208	if (-f $file && $keywords) {
209	    open(FILE, "<$file") or die "$P: Can't open ${file}\n";
210	    while (<FILE>) {
211		my $patch_line = $_;
212		foreach my $line (keys %keyword_hash) {
213		    if ($patch_line =~ m/^.*$keyword_hash{$line}/x) {
214			push(@keyword_tvi, $line);
215		    }
216		}
217	    }
218	    close(FILE);
219	}
220    } else {
221	my $file_cnt = @files;
222	my $lastfile;
223	open(PATCH, "<$file") or die "$P: Can't open ${file}\n";
224	while (<PATCH>) {
225	    my $patch_line = $_;
226	    if (m/^\+\+\+\s+(\S+)/) {
227		my $filename = $1;
228		$filename =~ s@^[^/]*/@@;
229		$filename =~ s@\n@@;
230		$lastfile = $filename;
231		push(@files, $filename);
232	    } elsif (m/^\@\@ -(\d+),(\d+)/) {
233		if ($email_git_blame) {
234		    push(@range, "$lastfile:$1:$2");
235		}
236	    } elsif ($keywords) {
237		foreach my $line (keys %keyword_hash) {
238		    if ($patch_line =~ m/^[+-].*$keyword_hash{$line}/x) {
239			push(@keyword_tvi, $line);
240		    }
241		}
242	    }
243	}
244	close(PATCH);
245	if ($file_cnt == @files) {
246	    warn "$P: file '${file}' doesn't appear to be a patch.  "
247		. "Add -f to options?\n";
248	}
249	@files = sort_and_uniq(@files);
250    }
251}
252
253my @email_to = ();
254my @list_to = ();
255my @scm = ();
256my @web = ();
257my @subsystem = ();
258my @status = ();
259
260# Find responsible parties
261
262foreach my $file (@files) {
263
264#Do not match excluded file patterns
265
266    my $exclude = 0;
267    foreach my $line (@typevalue) {
268	if ($line =~ m/^(\C):\s*(.*)/) {
269	    my $type = $1;
270	    my $value = $2;
271	    if ($type eq 'X') {
272		if (file_match_pattern($file, $value)) {
273		    $exclude = 1;
274		    last;
275		}
276	    }
277	}
278    }
279
280    if (!$exclude) {
281	my $tvi = 0;
282	my %hash;
283	foreach my $line (@typevalue) {
284	    if ($line =~ m/^(\C):\s*(.*)/) {
285		my $type = $1;
286		my $value = $2;
287		if ($type eq 'F') {
288		    if (file_match_pattern($file, $value)) {
289			my $value_pd = ($value =~ tr@/@@);
290			my $file_pd = ($file  =~ tr@/@@);
291			$value_pd++ if (substr($value,-1,1) ne "/");
292			if ($pattern_depth == 0 ||
293			    (($file_pd - $value_pd) < $pattern_depth)) {
294			    $hash{$tvi} = $value_pd;
295			}
296		    }
297		}
298	    }
299	    $tvi++;
300	}
301	foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
302	    add_categories($line);
303	}
304    }
305
306    if ($email && $email_git) {
307	recent_git_signoffs($file);
308    }
309
310    if ($email && $email_git_blame) {
311	git_assign_blame($file);
312    }
313}
314
315if ($keywords) {
316    @keyword_tvi = sort_and_uniq(@keyword_tvi);
317    foreach my $line (@keyword_tvi) {
318	add_categories($line);
319    }
320}
321
322if ($email) {
323    foreach my $chief (@penguin_chief) {
324	if ($chief =~ m/^(.*):(.*)/) {
325	    my $email_address;
326
327	    $email_address = format_email($1, $2);
328	    if ($email_git_penguin_chiefs) {
329		push(@email_to, $email_address);
330	    } else {
331		@email_to = grep(!/${email_address}/, @email_to);
332	    }
333	}
334    }
335}
336
337if ($email || $email_list) {
338    my @to = ();
339    if ($email) {
340	@to = (@to, @email_to);
341    }
342    if ($email_list) {
343	@to = (@to, @list_to);
344    }
345    output(uniq(@to));
346}
347
348if ($scm) {
349    @scm = uniq(@scm);
350    output(@scm);
351}
352
353if ($status) {
354    @status = uniq(@status);
355    output(@status);
356}
357
358if ($subsystem) {
359    @subsystem = uniq(@subsystem);
360    output(@subsystem);
361}
362
363if ($web) {
364    @web = uniq(@web);
365    output(@web);
366}
367
368exit($exit);
369
370sub file_match_pattern {
371    my ($file, $pattern) = @_;
372    if (substr($pattern, -1) eq "/") {
373	if ($file =~ m@^$pattern@) {
374	    return 1;
375	}
376    } else {
377	if ($file =~ m@^$pattern@) {
378	    my $s1 = ($file =~ tr@/@@);
379	    my $s2 = ($pattern =~ tr@/@@);
380	    if ($s1 == $s2) {
381		return 1;
382	    }
383	}
384    }
385    return 0;
386}
387
388sub usage {
389    print <<EOT;
390usage: $P [options] patchfile
391       $P [options] -f file|directory
392version: $V
393
394MAINTAINER field selection options:
395  --email => print email address(es) if any
396    --git => include recent git \*-by: signers
397    --git-chief-penguins => include ${penguin_chiefs}
398    --git-min-signatures => number of signatures required (default: 1)
399    --git-max-maintainers => maximum maintainers to add (default: 5)
400    --git-min-percent => minimum percentage of commits required (default: 5)
401    --git-since => git history to use (default: 1-year-ago)
402    --git-blame => use git blame to find modified commits for patch or file
403    --m => include maintainer(s) if any
404    --n => include name 'Full Name <addr\@domain.tld>'
405    --l => include list(s) if any
406    --s => include subscriber only list(s) if any
407    --remove-duplicates => minimize duplicate email names/addresses
408  --scm => print SCM tree(s) if any
409  --status => print status if any
410  --subsystem => print subsystem name if any
411  --web => print website(s) if any
412
413Output type options:
414  --separator [, ] => separator for multiple entries on 1 line
415    using --separator also sets --nomultiline if --separator is not [, ]
416  --multiline => print 1 entry per line
417
418Other options:
419  --pattern-depth => Number of pattern directory traversals (default: 0 (all))
420  --keywords => scan patch for keywords (default: 1 (on))
421  --version => show version
422  --help => show this help information
423
424Default options:
425  [--email --git --m --n --l --multiline --pattern-depth=0 --remove-duplicates]
426
427Notes:
428  Using "-f directory" may give unexpected results:
429      Used with "--git", git signators for _all_ files in and below
430          directory are examined as git recurses directories.
431          Any specified X: (exclude) pattern matches are _not_ ignored.
432      Used with "--nogit", directory is used as a pattern match,
433         no individual file within the directory or subdirectory
434         is matched.
435      Used with "--git-blame", does not iterate all files in directory
436  Using "--git-blame" is slow and may add old committers and authors
437      that are no longer active maintainers to the output.
438EOT
439}
440
441sub top_of_kernel_tree {
442	my ($lk_path) = @_;
443
444	if ($lk_path ne "" && substr($lk_path,length($lk_path)-1,1) ne "/") {
445	    $lk_path .= "/";
446	}
447	if (   (-f "${lk_path}COPYING")
448	    && (-f "${lk_path}CREDITS")
449	    && (-f "${lk_path}Kbuild")
450	    && (-f "${lk_path}MAINTAINERS")
451	    && (-f "${lk_path}Makefile")
452	    && (-f "${lk_path}README")
453	    && (-d "${lk_path}Documentation")
454	    && (-d "${lk_path}arch")
455	    && (-d "${lk_path}include")
456	    && (-d "${lk_path}drivers")
457	    && (-d "${lk_path}fs")
458	    && (-d "${lk_path}init")
459	    && (-d "${lk_path}ipc")
460	    && (-d "${lk_path}kernel")
461	    && (-d "${lk_path}lib")
462	    && (-d "${lk_path}scripts")) {
463		return 1;
464	}
465	return 0;
466}
467
468sub parse_email {
469    my ($formatted_email) = @_;
470
471    my $name = "";
472    my $address = "";
473
474    if ($formatted_email =~ /^([^<]+)<(.+\@.*)>.*$/) {
475	$name = $1;
476	$address = $2;
477    } elsif ($formatted_email =~ /^\s*<(.+\@\S*)>.*$/) {
478	$address = $1;
479    } elsif ($formatted_email =~ /^(.+\@\S*).*$/) {
480	$address = $1;
481    }
482
483    $name =~ s/^\s+|\s+$//g;
484    $name =~ s/^\"|\"$//g;
485    $address =~ s/^\s+|\s+$//g;
486
487    if ($name =~ /[^a-z0-9 \.\-]/i) {    ##has "must quote" chars
488	$name =~ s/(?<!\\)"/\\"/g;       ##escape quotes
489	$name = "\"$name\"";
490    }
491
492    return ($name, $address);
493}
494
495sub format_email {
496    my ($name, $address) = @_;
497
498    my $formatted_email;
499
500    $name =~ s/^\s+|\s+$//g;
501    $name =~ s/^\"|\"$//g;
502    $address =~ s/^\s+|\s+$//g;
503
504    if ($name =~ /[^a-z0-9 \.\-]/i) {    ##has "must quote" chars
505	$name =~ s/(?<!\\)"/\\"/g;       ##escape quotes
506	$name = "\"$name\"";
507    }
508
509    if ($email_usename) {
510	if ("$name" eq "") {
511	    $formatted_email = "$address";
512	} else {
513	    $formatted_email = "$name <${address}>";
514	}
515    } else {
516	$formatted_email = $address;
517    }
518
519    return $formatted_email;
520}
521
522sub find_starting_index {
523    my ($index) = @_;
524
525    while ($index > 0) {
526	my $tv = $typevalue[$index];
527	if (!($tv =~ m/^(\C):\s*(.*)/)) {
528	    last;
529	}
530	$index--;
531    }
532
533    return $index;
534}
535
536sub find_ending_index {
537    my ($index) = @_;
538
539    while ($index < @typevalue) {
540	my $tv = $typevalue[$index];
541	if (!($tv =~ m/^(\C):\s*(.*)/)) {
542	    last;
543	}
544	$index++;
545    }
546
547    return $index;
548}
549
550sub add_categories {
551    my ($index) = @_;
552
553    my $i;
554    my $start = find_starting_index($index);
555    my $end = find_ending_index($index);
556
557    push(@subsystem, $typevalue[$start]);
558
559    for ($i = $start + 1; $i < $end; $i++) {
560	my $tv = $typevalue[$i];
561	if ($tv =~ m/^(\C):\s*(.*)/) {
562	    my $ptype = $1;
563	    my $pvalue = $2;
564	    if ($ptype eq "L") {
565		my $list_address = $pvalue;
566		my $list_additional = "";
567		if ($list_address =~ m/([^\s]+)\s+(.*)$/) {
568		    $list_address = $1;
569		    $list_additional = $2;
570		}
571		if ($list_additional =~ m/subscribers-only/) {
572		    if ($email_subscriber_list) {
573			push(@list_to, $list_address);
574		    }
575		} else {
576		    if ($email_list) {
577			push(@list_to, $list_address);
578		    }
579		}
580	    } elsif ($ptype eq "M") {
581		my ($name, $address) = parse_email($pvalue);
582		if ($name eq "") {
583		    if ($i > 0) {
584			my $tv = $typevalue[$i - 1];
585			if ($tv =~ m/^(\C):\s*(.*)/) {
586			    if ($1 eq "P") {
587				$name = $2;
588				$pvalue = format_email($name, $address);
589			    }
590			}
591		    }
592		}
593		if ($email_maintainer) {
594		    push_email_addresses($pvalue);
595		}
596	    } elsif ($ptype eq "T") {
597		push(@scm, $pvalue);
598	    } elsif ($ptype eq "W") {
599		push(@web, $pvalue);
600	    } elsif ($ptype eq "S") {
601		push(@status, $pvalue);
602	    }
603	}
604    }
605}
606
607my %email_hash_name;
608my %email_hash_address;
609
610sub email_inuse {
611    my ($name, $address) = @_;
612
613    return 1 if (($name eq "") && ($address eq ""));
614    return 1 if (($name ne "") && exists($email_hash_name{$name}));
615    return 1 if (($address ne "") && exists($email_hash_address{$address}));
616
617    return 0;
618}
619
620sub push_email_address {
621    my ($line) = @_;
622
623    my ($name, $address) = parse_email($line);
624
625    if ($address eq "") {
626	return 0;
627    }
628
629    if (!$email_remove_duplicates) {
630	push(@email_to, format_email($name, $address));
631    } elsif (!email_inuse($name, $address)) {
632	push(@email_to, format_email($name, $address));
633	$email_hash_name{$name}++;
634	$email_hash_address{$address}++;
635    }
636
637    return 1;
638}
639
640sub push_email_addresses {
641    my ($address) = @_;
642
643    my @address_list = ();
644
645    if (rfc822_valid($address)) {
646	push_email_address($address);
647    } elsif (@address_list = rfc822_validlist($address)) {
648	my $array_count = shift(@address_list);
649	while (my $entry = shift(@address_list)) {
650	    push_email_address($entry);
651	}
652    } else {
653	if (!push_email_address($address)) {
654	    warn("Invalid MAINTAINERS address: '" . $address . "'\n");
655	}
656    }
657}
658
659sub which {
660    my ($bin) = @_;
661
662    foreach my $path (split(/:/, $ENV{PATH})) {
663	if (-e "$path/$bin") {
664	    return "$path/$bin";
665	}
666    }
667
668    return "";
669}
670
671sub mailmap {
672    my @lines = @_;
673    my %hash;
674
675    foreach my $line (@lines) {
676	my ($name, $address) = parse_email($line);
677	if (!exists($hash{$name})) {
678	    $hash{$name} = $address;
679	} elsif ($address ne $hash{$name}) {
680	    $address = $hash{$name};
681	    $line = format_email($name, $address);
682	}
683	if (exists($mailmap{$name})) {
684	    my $obj = $mailmap{$name};
685	    foreach my $map_address (@$obj) {
686		if (($map_address eq $address) &&
687		    ($map_address ne $hash{$name})) {
688		    $line = format_email($name, $hash{$name});
689		}
690	    }
691	}
692    }
693
694    return @lines;
695}
696
697sub recent_git_signoffs {
698    my ($file) = @_;
699
700    my $sign_offs = "";
701    my $cmd = "";
702    my $output = "";
703    my $count = 0;
704    my @lines = ();
705    my %hash;
706    my $total_sign_offs;
707
708    if (which("git") eq "") {
709	warn("$P: git not found.  Add --nogit to options?\n");
710	return;
711    }
712    if (!(-d ".git")) {
713	warn("$P: .git directory not found.  Use a git repository for better results.\n");
714	warn("$P: perhaps 'git clone git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git'\n");
715	return;
716    }
717
718    $cmd = "git log --since=${email_git_since} -- ${file}";
719
720    $output = `${cmd}`;
721    $output =~ s/^\s*//gm;
722
723    @lines = split("\n", $output);
724
725    @lines = grep(/^[-_ 	a-z]+by:.*\@.*$/i, @lines);
726    if (!$email_git_penguin_chiefs) {
727	@lines = grep(!/${penguin_chiefs}/i, @lines);
728    }
729    # cut -f2- -d":"
730    s/.*:\s*(.+)\s*/$1/ for (@lines);
731
732    $total_sign_offs = @lines;
733
734    if ($email_remove_duplicates) {
735	@lines = mailmap(@lines);
736    }
737
738    @lines = sort(@lines);
739
740    # uniq -c
741    $hash{$_}++ for @lines;
742
743    # sort -rn
744    foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
745	my $sign_offs = $hash{$line};
746	$count++;
747	last if ($sign_offs < $email_git_min_signatures ||
748		 $count > $email_git_max_maintainers ||
749		 $sign_offs * 100 / $total_sign_offs < $email_git_min_percent);
750	push_email_address($line);
751    }
752}
753
754sub save_commits {
755    my ($cmd, @commits) = @_;
756    my $output;
757    my @lines = ();
758
759    $output = `${cmd}`;
760
761    @lines = split("\n", $output);
762    foreach my $line (@lines) {
763	if ($line =~ m/^(\w+) /) {
764	    push (@commits, $1);
765	}
766    }
767    return @commits;
768}
769
770sub git_assign_blame {
771    my ($file) = @_;
772
773    my @lines = ();
774    my @commits = ();
775    my $cmd;
776    my $output;
777    my %hash;
778    my $total_sign_offs;
779    my $count;
780
781    if (@range) {
782	foreach my $file_range_diff (@range) {
783	    next if (!($file_range_diff =~ m/(.+):(.+):(.+)/));
784	    my $diff_file = $1;
785	    my $diff_start = $2;
786	    my $diff_length = $3;
787	    next if (!("$file" eq "$diff_file"));
788	    $cmd = "git blame -l -L $diff_start,+$diff_length $file";
789	    @commits = save_commits($cmd, @commits);
790	}
791    } else {
792	if (-f $file) {
793	    $cmd = "git blame -l $file";
794	    @commits = save_commits($cmd, @commits);
795	}
796    }
797
798    $total_sign_offs = 0;
799    @commits = uniq(@commits);
800    foreach my $commit (@commits) {
801	$cmd = "git log -1 ${commit}";
802
803	$output = `${cmd}`;
804	$output =~ s/^\s*//gm;
805	@lines = split("\n", $output);
806
807	@lines = grep(/^[-_ 	a-z]+by:.*\@.*$/i, @lines);
808	if (!$email_git_penguin_chiefs) {
809	    @lines = grep(!/${penguin_chiefs}/i, @lines);
810	}
811
812	# cut -f2- -d":"
813	s/.*:\s*(.+)\s*/$1/ for (@lines);
814
815	$total_sign_offs += @lines;
816
817	if ($email_remove_duplicates) {
818	    @lines = mailmap(@lines);
819	}
820
821	$hash{$_}++ for @lines;
822    }
823
824    $count = 0;
825    foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
826	my $sign_offs = $hash{$line};
827	$count++;
828	last if ($sign_offs < $email_git_min_signatures ||
829		 $count > $email_git_max_maintainers ||
830		 $sign_offs * 100 / $total_sign_offs < $email_git_min_percent);
831	push_email_address($line);
832    }
833}
834
835sub uniq {
836    my @parms = @_;
837
838    my %saw;
839    @parms = grep(!$saw{$_}++, @parms);
840    return @parms;
841}
842
843sub sort_and_uniq {
844    my @parms = @_;
845
846    my %saw;
847    @parms = sort @parms;
848    @parms = grep(!$saw{$_}++, @parms);
849    return @parms;
850}
851
852sub output {
853    my @parms = @_;
854
855    if ($output_multiline) {
856	foreach my $line (@parms) {
857	    print("${line}\n");
858	}
859    } else {
860	print(join($output_separator, @parms));
861	print("\n");
862    }
863}
864
865my $rfc822re;
866
867sub make_rfc822re {
868#   Basic lexical tokens are specials, domain_literal, quoted_string, atom, and
869#   comment.  We must allow for rfc822_lwsp (or comments) after each of these.
870#   This regexp will only work on addresses which have had comments stripped
871#   and replaced with rfc822_lwsp.
872
873    my $specials = '()<>@,;:\\\\".\\[\\]';
874    my $controls = '\\000-\\037\\177';
875
876    my $dtext = "[^\\[\\]\\r\\\\]";
877    my $domain_literal = "\\[(?:$dtext|\\\\.)*\\]$rfc822_lwsp*";
878
879    my $quoted_string = "\"(?:[^\\\"\\r\\\\]|\\\\.|$rfc822_lwsp)*\"$rfc822_lwsp*";
880
881#   Use zero-width assertion to spot the limit of an atom.  A simple
882#   $rfc822_lwsp* causes the regexp engine to hang occasionally.
883    my $atom = "[^$specials $controls]+(?:$rfc822_lwsp+|\\Z|(?=[\\[\"$specials]))";
884    my $word = "(?:$atom|$quoted_string)";
885    my $localpart = "$word(?:\\.$rfc822_lwsp*$word)*";
886
887    my $sub_domain = "(?:$atom|$domain_literal)";
888    my $domain = "$sub_domain(?:\\.$rfc822_lwsp*$sub_domain)*";
889
890    my $addr_spec = "$localpart\@$rfc822_lwsp*$domain";
891
892    my $phrase = "$word*";
893    my $route = "(?:\@$domain(?:,\@$rfc822_lwsp*$domain)*:$rfc822_lwsp*)";
894    my $route_addr = "\\<$rfc822_lwsp*$route?$addr_spec\\>$rfc822_lwsp*";
895    my $mailbox = "(?:$addr_spec|$phrase$route_addr)";
896
897    my $group = "$phrase:$rfc822_lwsp*(?:$mailbox(?:,\\s*$mailbox)*)?;\\s*";
898    my $address = "(?:$mailbox|$group)";
899
900    return "$rfc822_lwsp*$address";
901}
902
903sub rfc822_strip_comments {
904    my $s = shift;
905#   Recursively remove comments, and replace with a single space.  The simpler
906#   regexps in the Email Addressing FAQ are imperfect - they will miss escaped
907#   chars in atoms, for example.
908
909    while ($s =~ s/^((?:[^"\\]|\\.)*
910                    (?:"(?:[^"\\]|\\.)*"(?:[^"\\]|\\.)*)*)
911                    \((?:[^()\\]|\\.)*\)/$1 /osx) {}
912    return $s;
913}
914
915#   valid: returns true if the parameter is an RFC822 valid address
916#
917sub rfc822_valid ($) {
918    my $s = rfc822_strip_comments(shift);
919
920    if (!$rfc822re) {
921        $rfc822re = make_rfc822re();
922    }
923
924    return $s =~ m/^$rfc822re$/so && $s =~ m/^$rfc822_char*$/;
925}
926
927#   validlist: In scalar context, returns true if the parameter is an RFC822
928#              valid list of addresses.
929#
930#              In list context, returns an empty list on failure (an invalid
931#              address was found); otherwise a list whose first element is the
932#              number of addresses found and whose remaining elements are the
933#              addresses.  This is needed to disambiguate failure (invalid)
934#              from success with no addresses found, because an empty string is
935#              a valid list.
936
937sub rfc822_validlist ($) {
938    my $s = rfc822_strip_comments(shift);
939
940    if (!$rfc822re) {
941        $rfc822re = make_rfc822re();
942    }
943    # * null list items are valid according to the RFC
944    # * the '1' business is to aid in distinguishing failure from no results
945
946    my @r;
947    if ($s =~ m/^(?:$rfc822re)?(?:,(?:$rfc822re)?)*$/so &&
948	$s =~ m/^$rfc822_char*$/) {
949        while ($s =~ m/(?:^|,$rfc822_lwsp*)($rfc822re)/gos) {
950            push @r, $1;
951        }
952        return wantarray ? (scalar(@r), @r) : 1;
953    }
954    else {
955        return wantarray ? () : 0;
956    }
957}
958