xref: /freebsd/usr.bin/whois/whois.c (revision 1c4ee7dfb8affed302171232b0f612e6bcba3c10)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1980, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/types.h>
33 #include <sys/socket.h>
34 #include <sys/poll.h>
35 #include <netinet/in.h>
36 #include <arpa/inet.h>
37 #include <ctype.h>
38 #include <err.h>
39 #include <netdb.h>
40 #include <stdarg.h>
41 #include <stdio.h>
42 #include <stdlib.h>
43 #include <string.h>
44 #include <sysexits.h>
45 #include <unistd.h>
46 #include <fcntl.h>
47 #include <errno.h>
48 
49 #define	ABUSEHOST	"whois.abuse.net"
50 #define	ANICHOST	"whois.arin.net"
51 #define	DENICHOST	"whois.denic.de"
52 #define	DKNICHOST	"whois.dk-hostmaster.dk"
53 #define	FNICHOST	"whois.afrinic.net"
54 #define	GNICHOST	"whois.nic.gov"
55 #define	IANAHOST	"whois.iana.org"
56 #define	INICHOST	"whois.internic.net"
57 #define	KNICHOST	"whois.krnic.net"
58 #define	LNICHOST	"whois.lacnic.net"
59 #define	MNICHOST	"whois.ra.net"
60 #define	PDBHOST		"whois.peeringdb.com"
61 #define	PNICHOST	"whois.apnic.net"
62 #define	QNICHOST_TAIL	".whois-servers.net"
63 #define	RNICHOST	"whois.ripe.net"
64 #define	VNICHOST	"whois.verisign-grs.com"
65 
66 #define	DEFAULT_PORT	"whois"
67 
68 #define WHOIS_RECURSE	0x01
69 #define WHOIS_QUICK	0x02
70 #define WHOIS_SPAM_ME	0x04
71 
72 #define CHOPSPAM	">>> Last update of WHOIS database:"
73 
74 #define ishost(h) (isalnum((unsigned char)h) || h == '.' || h == '-')
75 
76 #define SCAN(p, end, check)					\
77 	while ((p) < (end))					\
78 		if (check) ++(p);				\
79 		else break
80 
81 static struct {
82 	const char *suffix, *server;
83 } whoiswhere[] = {
84 	/* Various handles */
85 	{ "-ARIN", ANICHOST },
86 	{ "-NICAT", "at" QNICHOST_TAIL },
87 	{ "-NORID", "no" QNICHOST_TAIL },
88 	{ "-RIPE", RNICHOST },
89 	/* Nominet's whois server doesn't return referrals to JANET */
90 	{ ".ac.uk", "ac.uk" QNICHOST_TAIL },
91 	{ ".gov.uk", "ac.uk" QNICHOST_TAIL },
92 	{ "", IANAHOST }, /* default */
93 	{ NULL, NULL } /* safety belt */
94 };
95 
96 #define WHOIS_REFERRAL(s) { s, sizeof(s) - 1 }
97 static struct {
98 	const char *prefix;
99 	size_t len;
100 } whois_referral[] = {
101 	WHOIS_REFERRAL("whois:"), /* IANA */
102 	WHOIS_REFERRAL("Whois Server:"),
103 	WHOIS_REFERRAL("Registrar WHOIS Server:"), /* corporatedomains.com */
104 	WHOIS_REFERRAL("ReferralServer:  whois://"), /* ARIN */
105 	WHOIS_REFERRAL("ReferralServer:  rwhois://"), /* ARIN */
106 	WHOIS_REFERRAL("descr:          region. Please query"), /* AfriNIC */
107 	{ NULL, 0 }
108 };
109 
110 /*
111  * We have a list of patterns for RIRs that assert ignorance rather than
112  * providing referrals. If that happens, we guess that ARIN will be more
113  * helpful. But, before following a referral to an RIR, we check if we have
114  * asked that RIR already, and if so we make another guess.
115  */
116 static const char *actually_arin[] = {
117 	"netname:        ERX-NETBLOCK\n", /* APNIC */
118 	"netname:        NON-RIPE-NCC-MANAGED-ADDRESS-BLOCK\n",
119 	NULL
120 };
121 
122 static struct {
123 	int loop;
124 	const char *host;
125 } try_rir[] = {
126 	{ 0, ANICHOST },
127 	{ 0, RNICHOST },
128 	{ 0, PNICHOST },
129 	{ 0, FNICHOST },
130 	{ 0, LNICHOST },
131 	{ 0, NULL }
132 };
133 
134 static void
135 reset_rir(void) {
136 	int i;
137 
138 	for (i = 0; try_rir[i].host != NULL; i++)
139 		try_rir[i].loop = 0;
140 }
141 
142 static const char *port = DEFAULT_PORT;
143 
144 static const char *choose_server(char *);
145 static struct addrinfo *gethostinfo(const char *, const char *, int);
146 static void s_asprintf(char **ret, const char *format, ...) __printflike(2, 3);
147 static void usage(void) __dead2;
148 static void whois(const char *, const char *, const char *, int);
149 
150 int
151 main(int argc, char *argv[])
152 {
153 	const char *country, *host;
154 	int ch, flags;
155 
156 #ifdef	SOCKS
157 	SOCKSinit(argv[0]);
158 #endif
159 
160 	country = host = NULL;
161 	flags = 0;
162 	while ((ch = getopt(argc, argv, "aAbc:fgh:iIklmp:PQrRS")) != -1) {
163 		switch (ch) {
164 		case 'a':
165 			host = ANICHOST;
166 			break;
167 		case 'A':
168 			host = PNICHOST;
169 			break;
170 		case 'b':
171 			host = ABUSEHOST;
172 			break;
173 		case 'c':
174 			country = optarg;
175 			break;
176 		case 'f':
177 			host = FNICHOST;
178 			break;
179 		case 'g':
180 			host = GNICHOST;
181 			break;
182 		case 'h':
183 			host = optarg;
184 			break;
185 		case 'i':
186 			host = INICHOST;
187 			break;
188 		case 'I':
189 			host = IANAHOST;
190 			break;
191 		case 'k':
192 			host = KNICHOST;
193 			break;
194 		case 'l':
195 			host = LNICHOST;
196 			break;
197 		case 'm':
198 			host = MNICHOST;
199 			break;
200 		case 'p':
201 			port = optarg;
202 			break;
203 		case 'P':
204 			host = PDBHOST;
205 			break;
206 		case 'Q':
207 			flags |= WHOIS_QUICK;
208 			break;
209 		case 'r':
210 			host = RNICHOST;
211 			break;
212 		case 'R':
213 			flags |= WHOIS_RECURSE;
214 			break;
215 		case 'S':
216 			flags |= WHOIS_SPAM_ME;
217 			break;
218 		case '?':
219 		default:
220 			usage();
221 			/* NOTREACHED */
222 		}
223 	}
224 	argc -= optind;
225 	argv += optind;
226 
227 	if (!argc || (country != NULL && host != NULL))
228 		usage();
229 
230 	/*
231 	 * If no host or country is specified, rely on referrals from IANA.
232 	 */
233 	if (host == NULL && country == NULL) {
234 		if ((host = getenv("WHOIS_SERVER")) == NULL &&
235 		    (host = getenv("RA_SERVER")) == NULL) {
236 			if (!(flags & WHOIS_QUICK))
237 				flags |= WHOIS_RECURSE;
238 		}
239 	}
240 	while (argc-- > 0) {
241 		if (country != NULL) {
242 			char *qnichost;
243 			s_asprintf(&qnichost, "%s%s", country, QNICHOST_TAIL);
244 			whois(*argv, qnichost, port, flags);
245 			free(qnichost);
246 		} else
247 			whois(*argv, host != NULL ? host :
248 			      choose_server(*argv), port, flags);
249 		reset_rir();
250 		argv++;
251 	}
252 	exit(0);
253 }
254 
255 static const char *
256 choose_server(char *domain)
257 {
258 	size_t len = strlen(domain);
259 	int i;
260 
261 	for (i = 0; whoiswhere[i].suffix != NULL; i++) {
262 		size_t suffix_len = strlen(whoiswhere[i].suffix);
263 		if (len > suffix_len &&
264 		    strcasecmp(domain + len - suffix_len,
265 			       whoiswhere[i].suffix) == 0)
266 			return (whoiswhere[i].server);
267 	}
268 	errx(EX_SOFTWARE, "no default whois server");
269 }
270 
271 static struct addrinfo *
272 gethostinfo(const char *host, const char *hport, int exit_on_noname)
273 {
274 	struct addrinfo hints, *res;
275 	int error;
276 
277 	memset(&hints, 0, sizeof(hints));
278 	hints.ai_flags = AI_CANONNAME;
279 	hints.ai_family = AF_UNSPEC;
280 	hints.ai_socktype = SOCK_STREAM;
281 	res = NULL;
282 	error = getaddrinfo(host, hport, &hints, &res);
283 	if (error && (exit_on_noname || error != EAI_NONAME))
284 		err(EX_NOHOST, "%s: %s", host, gai_strerror(error));
285 	return (res);
286 }
287 
288 /*
289  * Wrapper for asprintf(3) that exits on error.
290  */
291 static void
292 s_asprintf(char **ret, const char *format, ...)
293 {
294 	va_list ap;
295 
296 	va_start(ap, format);
297 	if (vasprintf(ret, format, ap) == -1) {
298 		va_end(ap);
299 		err(EX_OSERR, "vasprintf()");
300 	}
301 	va_end(ap);
302 }
303 
304 static int
305 connect_to_any_host(struct addrinfo *hostres)
306 {
307 	struct addrinfo *res;
308 	nfds_t i, j;
309 	size_t count;
310 	struct pollfd *fds;
311 	int timeout = 180, s = -1;
312 
313 	for (res = hostres, count = 0; res; res = res->ai_next)
314 		count++;
315 	fds = calloc(count, sizeof(*fds));
316 	if (fds == NULL)
317 		err(EX_OSERR, "calloc()");
318 
319 	/*
320 	 * Traverse the result list elements and make non-block
321 	 * connection attempts.
322 	 */
323 	count = i = 0;
324 	for (res = hostres; res != NULL; res = res->ai_next) {
325 		s = socket(res->ai_family, res->ai_socktype | SOCK_NONBLOCK,
326 		    res->ai_protocol);
327 		if (s < 0)
328 			continue;
329 		if (connect(s, res->ai_addr, res->ai_addrlen) < 0) {
330 			if (errno == EINPROGRESS) {
331 				/* Add the socket to poll list */
332 				fds[i].fd = s;
333 				fds[i].events = POLLERR | POLLHUP |
334 						POLLIN | POLLOUT;
335 				/*
336 				 * From here until a socket connects, the
337 				 * socket fd is owned by the fds[] poll array.
338 				 */
339 				s = -1;
340 				count++;
341 				i++;
342 			} else {
343 				close(s);
344 				s = -1;
345 
346 				/*
347 				 * Poll only if we have something to poll,
348 				 * otherwise just go ahead and try next
349 				 * address
350 				 */
351 				if (count == 0)
352 					continue;
353 			}
354 		} else
355 			goto done;
356 
357 		/*
358 		 * If we are at the last address, poll until a connection is
359 		 * established or we failed all connection attempts.
360 		 */
361 		if (res->ai_next == NULL)
362 			timeout = INFTIM;
363 
364 		/*
365 		 * Poll the watched descriptors for successful connections:
366 		 * if we still have more untried resolved addresses, poll only
367 		 * once; otherwise, poll until all descriptors have errors,
368 		 * which will be considered as ETIMEDOUT later.
369 		 */
370 		do {
371 			int n;
372 
373 			n = poll(fds, i, timeout);
374 			if (n == 0) {
375 				/*
376 				 * No event reported in time.  Try with a
377 				 * smaller timeout (but cap at 2-3ms)
378 				 * after a new host have been added.
379 				 */
380 				if (timeout >= 3)
381 					timeout >>= 1;
382 
383 				break;
384 			} else if (n < 0) {
385 				/*
386 				 * errno here can only be EINTR which we would
387 				 * want to clean up and bail out.
388 				 */
389 				s = -1;
390 				goto done;
391 			}
392 
393 			/*
394 			 * Check for the event(s) we have seen.
395 			 */
396 			for (j = 0; j < i; j++) {
397 				if (fds[j].fd == -1 || fds[j].events == 0 ||
398 				    fds[j].revents == 0)
399 					continue;
400 				if (fds[j].revents & ~(POLLIN | POLLOUT)) {
401 					close(fds[j].fd);
402 					fds[j].fd = -1;
403 					fds[j].events = 0;
404 					count--;
405 					continue;
406 				} else if (fds[j].revents & (POLLIN | POLLOUT)) {
407 					/* Connect succeeded. */
408 					s = fds[j].fd;
409 					fds[j].fd = -1;
410 
411 					goto done;
412 				}
413 
414 			}
415 		} while (timeout == INFTIM && count != 0);
416 	}
417 
418 	/* All attempts were failed */
419 	s = -1;
420 	if (count == 0)
421 		errno = ETIMEDOUT;
422 
423 done:
424 	/* Close all watched fds except the succeeded one */
425 	for (j = 0; j < i; j++)
426 		if (fds[j].fd != -1)
427 			close(fds[j].fd);
428 	free(fds);
429 	return (s);
430 }
431 
432 static void
433 whois(const char *query, const char *hostname, const char *hostport, int flags)
434 {
435 	FILE *fp;
436 	struct addrinfo *hostres;
437 	char *buf, *host, *nhost, *nport, *p;
438 	int comment, s, f;
439 	size_t len, i;
440 
441 	hostres = gethostinfo(hostname, hostport, 1);
442 	s = connect_to_any_host(hostres);
443 	if (s == -1)
444 		err(EX_OSERR, "connect()");
445 
446 	/* Restore default blocking behavior.  */
447 	if ((f = fcntl(s, F_GETFL)) == -1)
448 		err(EX_OSERR, "fcntl()");
449 	f &= ~O_NONBLOCK;
450 	if (fcntl(s, F_SETFL, f) == -1)
451 		err(EX_OSERR, "fcntl()");
452 
453 	fp = fdopen(s, "r+");
454 	if (fp == NULL)
455 		err(EX_OSERR, "fdopen()");
456 
457 	if (!(flags & WHOIS_SPAM_ME) &&
458 	    (strcasecmp(hostname, DENICHOST) == 0 ||
459 	     strcasecmp(hostname, "de" QNICHOST_TAIL) == 0)) {
460 		const char *q;
461 		int idn = 0;
462 		for (q = query; *q != '\0'; q++)
463 			if (!isascii(*q))
464 				idn = 1;
465 		fprintf(fp, "-T dn%s %s\r\n", idn ? "" : ",ace", query);
466 	} else if (!(flags & WHOIS_SPAM_ME) &&
467 		   (strcasecmp(hostname, DKNICHOST) == 0 ||
468 		    strcasecmp(hostname, "dk" QNICHOST_TAIL) == 0))
469 		fprintf(fp, "--show-handles %s\r\n", query);
470 	else if ((flags & WHOIS_SPAM_ME) ||
471 		 strchr(query, ' ') != NULL)
472 		fprintf(fp, "%s\r\n", query);
473 	else if (strcasecmp(hostname, ANICHOST) == 0) {
474 		if (strncasecmp(query, "AS", 2) == 0 &&
475 		    strspn(query+2, "0123456789") == strlen(query+2))
476 			fprintf(fp, "+ a %s\r\n", query+2);
477 		else
478 			fprintf(fp, "+ %s\r\n", query);
479 	} else if (strcasecmp(hostres->ai_canonname, VNICHOST) == 0)
480 		fprintf(fp, "domain %s\r\n", query);
481 	else
482 		fprintf(fp, "%s\r\n", query);
483 	fflush(fp);
484 
485 	comment = 0;
486 	if (!(flags & WHOIS_SPAM_ME) &&
487 	    (strcasecmp(hostname, ANICHOST) == 0 ||
488 	     strcasecmp(hostname, RNICHOST) == 0)) {
489 		comment = 2;
490 	}
491 
492 	nhost = NULL;
493 	while ((buf = fgetln(fp, &len)) != NULL) {
494 		/* Nominet */
495 		if (!(flags & WHOIS_SPAM_ME) &&
496 		    len == 5 && strncmp(buf, "-- \r\n", 5) == 0)
497 			break;
498 		/* RIRs */
499 		if (comment == 1 && buf[0] == '#')
500 			break;
501 		else if (comment == 2) {
502 			if (strchr("#%\r\n", buf[0]) != NULL)
503 				continue;
504 			else
505 				comment = 1;
506 		}
507 
508 		printf("%.*s", (int)len, buf);
509 
510 		if ((flags & WHOIS_RECURSE) && nhost == NULL) {
511 			for (i = 0; whois_referral[i].prefix != NULL; i++) {
512 				p = buf;
513 				SCAN(p, buf+len, *p == ' ');
514 				if (strncasecmp(p, whois_referral[i].prefix,
515 					           whois_referral[i].len) != 0)
516 					continue;
517 				p += whois_referral[i].len;
518 				SCAN(p, buf+len, *p == ' ');
519 				host = p;
520 				SCAN(p, buf+len, ishost(*p));
521 				if (p > host) {
522 					char *pstr;
523 
524 					s_asprintf(&nhost, "%.*s",
525 						   (int)(p - host), host);
526 
527 					if (*p != ':') {
528 						s_asprintf(&nport, "%s", port);
529 						break;
530 					}
531 
532 					pstr = ++p;
533 					SCAN(p, buf+len, isdigit(*p));
534 					if (p > pstr && (p - pstr) < 6) {
535 						s_asprintf(&nport, "%.*s",
536 						    (int)(p - pstr), pstr);
537 						break;
538 					}
539 
540 					/* Invalid port; don't recurse */
541 					free(nhost);
542 					nhost = NULL;
543 				}
544 				break;
545 			}
546 			for (i = 0; actually_arin[i] != NULL; i++) {
547 				if (strncmp(buf, actually_arin[i], len) == 0) {
548 					s_asprintf(&nhost, "%s", ANICHOST);
549 					s_asprintf(&nport, "%s", port);
550 					break;
551 				}
552 			}
553 		}
554 		/* Verisign etc. */
555 		if (!(flags & WHOIS_SPAM_ME) &&
556 		    len >= sizeof(CHOPSPAM)-1 &&
557 		    (strncasecmp(buf, CHOPSPAM, sizeof(CHOPSPAM)-1) == 0 ||
558 		     strncasecmp(buf, CHOPSPAM+4, sizeof(CHOPSPAM)-5) == 0)) {
559 			printf("\n");
560 			break;
561 		}
562 	}
563 	fclose(fp);
564 	freeaddrinfo(hostres);
565 
566 	f = 0;
567 	for (i = 0; try_rir[i].host != NULL; i++) {
568 		/* Remember visits to RIRs */
569 		if (try_rir[i].loop == 0 &&
570 		    strcasecmp(try_rir[i].host, hostname) == 0)
571 			try_rir[i].loop = 1;
572 		/* Do we need to find an alternative RIR? */
573 		if (try_rir[i].loop != 0 && nhost != NULL &&
574 		    strcasecmp(try_rir[i].host, nhost) == 0) {
575 			free(nhost);
576 			nhost = NULL;
577 			free(nport);
578 			nport = NULL;
579 			f = 1;
580 		}
581 	}
582 	if (f) {
583 		/* Find a replacement RIR */
584 		for (i = 0; try_rir[i].host != NULL; i++) {
585 			if (try_rir[i].loop == 0) {
586 				s_asprintf(&nhost, "%s", try_rir[i].host);
587 				s_asprintf(&nport, "%s", port);
588 				break;
589 			}
590 		}
591 	}
592 	if (nhost != NULL) {
593 		/* Ignore self-referrals */
594 		if (strcasecmp(hostname, nhost) != 0) {
595 			printf("# %s\n\n", nhost);
596 			whois(query, nhost, nport, flags);
597 		}
598 		free(nhost);
599 		free(nport);
600 	}
601 }
602 
603 static void
604 usage(void)
605 {
606 	fprintf(stderr,
607 	    "usage: whois [-aAbfgiIklmPQrRS] [-c country-code | -h hostname] "
608 	    "[-p port] name ...\n");
609 	exit(EX_USAGE);
610 }
611