xref: /illumos-gate/usr/src/stand/lib/inet/ethernet.c (revision 4eaa471005973e11a6110b69fe990530b3b95a38)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * Ethernet routines. Includes ARP and Reverse ARP. Used for ethernet-like
31  * media also - so be sure NOT to use ETHERMTU as a mtu limit. macinit()
32  * will set this appropriately.
33  */
34 
35 #include <sys/types.h>
36 #include <socket_impl.h>
37 #include <socket_inet.h>
38 #include <sys/time.h>
39 #include <sys/socket.h>
40 #include <net/if.h>
41 #include <net/if_arp.h>
42 #include <netinet/in_systm.h>
43 #include <netinet/in.h>
44 #include <netinet/ip.h>
45 #include <netinet/if_ether.h>
46 #include <sys/promif.h>
47 #include <sys/prom_plat.h>
48 #include <sys/salib.h>
49 #include <sys/bootdebug.h>
50 
51 #include "ipv4.h"
52 #include "ipv4_impl.h"
53 #include "mac.h"
54 #include "mac_impl.h"
55 #include "ethernet_inet.h"
56 
57 ether_addr_t etherbroadcastaddr = {
58 	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
59 };
60 
61 struct arp_packet {
62 	struct ether_header	arp_eh;
63 	struct ether_arp	arp_ea;
64 #define	USED_SIZE (sizeof (struct ether_header) + sizeof (struct ether_arp))
65 	char	filler[ETHERMIN - sizeof (struct ether_arp)];
66 };
67 
68 static char *
69 ether_print(ether_addr_t ea)
70 {
71 	static char eprintbuf[20];
72 
73 	(void) sprintf(eprintbuf, "%x:%x:%x:%x:%x:%x", ea[0], ea[1], ea[2],
74 	    ea[3], ea[4], ea[5]);
75 	return (eprintbuf);
76 }
77 
78 /*
79  * Common ARP code. Broadcast the packet and wait for the right response.
80  *
81  * If rarp is called for, caller expects an IPv4 address in the target
82  * protocol address (tpa) field of the "out" argument.
83  *
84  * If arp is called for, caller expects a hardware address in the
85  * source hardware address (sha) field of the "out" argument.
86  *
87  * Returns TRUE if transaction succeeded, FALSE otherwise.
88  *
89  * The timeout argument is the number of milliseconds to wait for a
90  * response. An infinite timeout can be specified as 0xffffffff.
91  */
92 static int
93 ether_comarp(struct arp_packet *out, uint32_t timeout)
94 {
95 	struct arp_packet *in = (struct arp_packet *)mac_state.mac_buf;
96 	int count, time, feedback, len, delay = 2;
97 	char    *ind = "-\\|/";
98 	struct in_addr tmp_ia;
99 	uint32_t wait_time;
100 
101 	bcopy((caddr_t)etherbroadcastaddr, (caddr_t)&out->arp_eh.ether_dhost,
102 	    sizeof (ether_addr_t));
103 	bcopy((caddr_t)mac_state.mac_addr_buf,
104 	    (caddr_t)&out->arp_eh.ether_shost, sizeof (ether_addr_t));
105 
106 	out->arp_ea.arp_hrd =  htons(ARPHRD_ETHER);
107 	out->arp_ea.arp_pro = htons(ETHERTYPE_IP);
108 	out->arp_ea.arp_hln = sizeof (ether_addr_t);
109 	out->arp_ea.arp_pln = sizeof (struct in_addr);
110 	bcopy(mac_state.mac_addr_buf, (caddr_t)&out->arp_ea.arp_sha,
111 	    sizeof (ether_addr_t));
112 	ipv4_getipaddr(&tmp_ia);
113 	tmp_ia.s_addr = htonl(tmp_ia.s_addr);
114 	bcopy((caddr_t)&tmp_ia, (caddr_t)out->arp_ea.arp_spa,
115 	    sizeof (struct in_addr));
116 	feedback = 0;
117 
118 	wait_time = prom_gettime() + timeout;
119 	for (count = 0; timeout == ~0U || prom_gettime() < wait_time; count++) {
120 		if (count == ETHER_WAITCNT) {
121 			if (out->arp_ea.arp_op == ARPOP_REQUEST) {
122 				bcopy((caddr_t)out->arp_ea.arp_tpa,
123 				    (caddr_t)&tmp_ia, sizeof (struct in_addr));
124 				printf(
125 				    "\nRequesting Ethernet address for: %s\n",
126 				    inet_ntoa(tmp_ia));
127 			} else {
128 				printf("\nRequesting Internet address for %s\n",
129 				    ether_print(out->arp_ea.arp_tha));
130 			}
131 		}
132 
133 		(void) prom_write(mac_state.mac_dev, (caddr_t)out,
134 		    sizeof (*out), 0, NETWORK);
135 
136 		if (count >= ETHER_WAITCNT)
137 			printf("%c\b", ind[feedback++ % 4]); /* activity */
138 
139 		time = prom_gettime() + (delay * 1000);	/* broadcast delay */
140 		while (prom_gettime() <= time) {
141 			len = prom_read(mac_state.mac_dev, mac_state.mac_buf,
142 			    mac_state.mac_mtu, 0, NETWORK);
143 			if (len < USED_SIZE)
144 				continue;
145 			if (in->arp_ea.arp_pro != ntohs(ETHERTYPE_IP))
146 				continue;
147 			if (out->arp_ea.arp_op == ntohs(ARPOP_REQUEST)) {
148 				if (in->arp_eh.ether_type !=
149 				    ntohs(ETHERTYPE_ARP))
150 					continue;
151 				if (in->arp_ea.arp_op != ntohs(ARPOP_REPLY))
152 					continue;
153 				if (bcmp((caddr_t)in->arp_ea.arp_spa,
154 				    (caddr_t)out->arp_ea.arp_tpa,
155 				    sizeof (struct in_addr)) != 0)
156 					continue;
157 				if (boothowto & RB_VERBOSE) {
158 					bcopy((caddr_t)in->arp_ea.arp_spa,
159 					    (caddr_t)&tmp_ia,
160 					    sizeof (struct in_addr));
161 					printf("Found %s @ %s\n",
162 					    inet_ntoa(tmp_ia),
163 					    ether_print(in->arp_ea.arp_sha));
164 				}
165 				/* copy hardware addr into "out" for caller */
166 				bcopy((caddr_t)&in->arp_ea.arp_sha,
167 				    (caddr_t)&out->arp_ea.arp_sha,
168 				    sizeof (ether_addr_t));
169 				return (TRUE);
170 			} else {		/* Reverse ARP */
171 				if (in->arp_eh.ether_type !=
172 				    ntohs(ETHERTYPE_REVARP))
173 					continue;
174 				if (in->arp_ea.arp_op != ntohs(REVARP_REPLY))
175 					continue;
176 				if (bcmp((caddr_t)in->arp_ea.arp_tha,
177 				    (caddr_t)out->arp_ea.arp_tha,
178 				    sizeof (ether_addr_t)) != 0)
179 					continue;
180 				if (boothowto & RB_VERBOSE) {
181 					bcopy((caddr_t)in->arp_ea.arp_tpa,
182 					    (caddr_t)&tmp_ia,
183 					    sizeof (struct in_addr));
184 					printf("Internet address is: %s\n",
185 					    inet_ntoa(tmp_ia));
186 				}
187 				/* copy IP address into "out" for caller */
188 				bcopy((caddr_t)in->arp_ea.arp_tpa,
189 				    (caddr_t)out->arp_ea.arp_tpa,
190 				    sizeof (struct in_addr));
191 				return (TRUE);
192 			}
193 		}
194 
195 		delay = delay * 2;	/* Double the request delay */
196 		if (delay > 64)		/* maximum delay is 64 seconds */
197 			delay = 64;
198 	}
199 	return (FALSE);
200 }
201 
202 /*
203  * ARP client side
204  * Broadcasts to determine MAC address given network order IP address.
205  * See RFC 826
206  *
207  * Returns TRUE if successful, FALSE otherwise.
208  */
209 int
210 ether_arp(struct in_addr *ip, void *hap, uint32_t timeout)
211 {
212 	ether_addr_t *ep = (ether_addr_t *)hap;
213 	struct arp_packet out;
214 	int result;
215 
216 	if (!initialized)
217 		prom_panic("Ethernet device is not initialized.");
218 
219 	bzero((char *)&out, sizeof (struct arp_packet));
220 
221 	out.arp_eh.ether_type = htons(ETHERTYPE_ARP);
222 	out.arp_ea.arp_op = htons(ARPOP_REQUEST);
223 	bcopy((caddr_t)etherbroadcastaddr, (caddr_t)&out.arp_ea.arp_tha,
224 	    sizeof (ether_addr_t));
225 	bcopy((caddr_t)ip, (caddr_t)out.arp_ea.arp_tpa,
226 	    sizeof (struct in_addr));
227 
228 	result = ether_comarp(&out, timeout);
229 
230 	if (result && (ep != NULL)) {
231 		bcopy((caddr_t)&out.arp_ea.arp_sha, (caddr_t)ep,
232 		    sizeof (ether_addr_t));
233 	}
234 	return (result);
235 }
236 
237 /*
238  * Reverse ARP client side
239  * Determine our Internet address given our MAC address
240  * See RFC 903
241  */
242 void
243 ether_revarp(void)
244 {
245 	struct in_addr	ip;
246 	struct arp_packet out;
247 
248 	if (!initialized)
249 		prom_panic("Ethernet device is not initialized.");
250 
251 	bzero((char *)&out, sizeof (struct arp_packet));
252 
253 	out.arp_eh.ether_type = htons(ETHERTYPE_REVARP);
254 	out.arp_ea.arp_op = htons(REVARP_REQUEST);
255 	bcopy(mac_state.mac_addr_buf, (caddr_t)&out.arp_ea.arp_tha,
256 	    sizeof (ether_addr_t));
257 
258 	/* Wait forever */
259 	(void) ether_comarp(&out, 0xffffffff);
260 
261 	bcopy((caddr_t)&out.arp_ea.arp_tpa, (caddr_t)&ip,
262 	    sizeof (struct in_addr));
263 
264 	ip.s_addr = ntohl(ip.s_addr);
265 	ipv4_setipaddr(&ip);
266 }
267 
268 /* ARGSUSED */
269 int
270 ether_header_len(struct inetgram *igm)
271 {
272 	return (sizeof (struct ether_header));
273 }
274 
275 /*
276  * Handle a IP datagram addressed to our ethernet address or to the
277  * ethernet broadcast address. Also respond to ARP requests. Generates
278  * inetgrams as long as there's data and the mac level IP timeout timer
279  * hasn't expired. As soon as there is no data, we try for
280  * ETHER_INPUT_ATTEMPTS for more, then exit the loop, even if there is time
281  * left, since we expect to have data waiting for us when we're called, we just
282  * don't know how much.
283  *
284  * We workaround slow proms (some proms have hard sleeps for as much as 3msec)
285  * even though there are is data waiting.
286  *
287  * Returns the total number of MEDIA_LVL frames placed on the socket.
288  * Caller is expected to free up the inetgram resources.
289  */
290 int
291 ether_input(int index)
292 {
293 	struct inetgram		*inp;
294 	struct ether_header	*eh;
295 	int		frames = 0;	/* successful frames */
296 	int		attempts = 0;	/* failed attempts after success */
297 	int16_t		len = 0, data_len;
298 	uint32_t	timeout, reltime;
299 	uint32_t	pre_pr, post_pr; /* prom_read interval */
300 
301 #ifdef	DEBUG
302 	int		failures = 0;		/* total failures */
303 	int		total_attempts = 0;	/* total prom_read */
304 	int		no_data = 0;		/* no data in prom */
305 	int		arps = 0;		/* arp requests processed */
306 	uint32_t	tot_pr = 0;		/* prom_read time */
307 	uint32_t	tot_pc = 0;		/* inetgram creation time */
308 	uint32_t	pre_pc;
309 	uint32_t	now;
310 #endif	/* DEBUG */
311 
312 	if (!initialized)
313 		prom_panic("Ethernet device is not initialized.");
314 
315 	if ((reltime = sockets[index].in_timeout) == 0)
316 		reltime = mac_state.mac_in_timeout;
317 	timeout = prom_gettime() + reltime;
318 
319 	do {
320 		if (frames > ETHER_MAX_FRAMES) {
321 			/* someone is trying a denial of service attack */
322 			break;
323 		}
324 
325 		/*
326 		 * The following is a workaround for a calvin prom (V2) bug
327 		 * where prom_read() returns a nonzero length, even when it's
328 		 * not read a packet. So we zero out the header to compensate.
329 		 */
330 		bzero(mac_state.mac_buf, sizeof (struct ether_header));
331 
332 		/*
333 		 * Prom_read() will return 0 or -2 if no data is present. A
334 		 * return value of -1 means an error has occurred. We adjust
335 		 * the timeout by calling the time spent in prom_read() "free".
336 		 * prom_read() returns the number of bytes actually read, but
337 		 * will only copy "len" bytes into our buffer. Adjust in
338 		 * case the MTU is wrong.
339 		 */
340 		pre_pr = prom_gettime();
341 		len = prom_read(mac_state.mac_dev, mac_state.mac_buf,
342 		    mac_state.mac_mtu, 0, NETWORK);
343 		post_pr = prom_gettime();
344 		timeout += (post_pr - pre_pr);
345 #ifdef	DEBUG
346 		tot_pr += (post_pr - pre_pr);
347 		total_attempts++;
348 #endif	/* DEBUG */
349 
350 		if (len > mac_state.mac_mtu) {
351 			dprintf("ether_input: adjusting MTU %d -> %d\n",
352 			    mac_state.mac_mtu, len);
353 			bkmem_free(mac_state.mac_buf, mac_state.mac_mtu);
354 			mac_state.mac_mtu = len;
355 			mac_state.mac_buf = bkmem_alloc(mac_state.mac_mtu);
356 			if (mac_state.mac_buf == NULL) {
357 				prom_panic("ether_input: Cannot reallocate "
358 				    "netbuf memory.");
359 			}
360 			len = 0; /* pretend there was no data */
361 		}
362 
363 		if (len == -1) {
364 #ifdef	DEBUG
365 			failures++;
366 #endif	/* DEBUG */
367 			break;
368 		}
369 		if (len == 0 || len == -2) {
370 			if (frames != 0)
371 				attempts++;
372 #ifdef	DEBUG
373 			no_data++;
374 #endif	/* DEBUG */
375 			continue;
376 		}
377 
378 		eh = (struct ether_header *)mac_state.mac_buf;
379 		if (eh->ether_type == ntohs(ETHERTYPE_IP) &&
380 		    len >= (sizeof (struct ether_header) +
381 		    sizeof (struct ip))) {
382 
383 			int offset;
384 #ifdef	DEBUG
385 			pre_pc = prom_gettime();
386 #endif	/* DEBUG */
387 
388 			inp = (struct inetgram *)bkmem_zalloc(
389 			    sizeof (struct inetgram));
390 			if (inp == NULL) {
391 				errno = ENOMEM;
392 				return (frames == 0 ? -1 : frames);
393 			}
394 			offset = sizeof (struct ether_header);
395 			data_len = len - offset;
396 			inp->igm_mp = allocb(data_len, 0);
397 			if (inp->igm_mp == NULL) {
398 				errno = ENOMEM;
399 				bkmem_free((caddr_t)inp,
400 				    sizeof (struct inetgram));
401 				return (frames == 0 ? -1 : frames);
402 			}
403 			bcopy((caddr_t)(mac_state.mac_buf + offset),
404 			    inp->igm_mp->b_rptr, data_len);
405 			inp->igm_mp->b_wptr += data_len;
406 			inp->igm_level = NETWORK_LVL;
407 			add_grams(&sockets[index].inq, inp);
408 			frames++;
409 			attempts = 0;
410 #ifdef	DEBUG
411 			tot_pc += prom_gettime() - pre_pc;
412 #endif	/* DEBUG */
413 			continue;
414 		}
415 
416 		if (eh->ether_type == ntohs(ETHERTYPE_ARP) &&
417 		    len >= (sizeof (struct ether_header) +
418 		    sizeof (struct ether_arp))) {
419 
420 			struct in_addr		ip;
421 			struct ether_arp	*ea;
422 
423 #ifdef	DEBUG
424 			printf("ether_input: ARP message received\n");
425 			arps++;
426 #endif	/* DEBUG */
427 
428 			ea = (struct ether_arp *)(mac_state.mac_buf +
429 			    sizeof (struct ether_header));
430 			if (ea->arp_pro != ntohs(ETHERTYPE_IP))
431 				continue;
432 
433 			ipv4_getipaddr(&ip);
434 			ip.s_addr = ntohl(ip.s_addr);
435 
436 			if (ea->arp_op == ntohs(ARPOP_REQUEST) &&
437 			    ip.s_addr != INADDR_ANY &&
438 			    (bcmp((caddr_t)ea->arp_tpa, (caddr_t)&ip,
439 			    sizeof (struct in_addr)) == 0)) {
440 				ea->arp_op = htons(ARPOP_REPLY);
441 				bcopy((caddr_t)ea->arp_sha,
442 				    (caddr_t)&eh->ether_dhost,
443 				    sizeof (ether_addr_t));
444 				bcopy(mac_state.mac_addr_buf,
445 				    (caddr_t)&eh->ether_shost,
446 				    mac_state.mac_addr_len);
447 				bcopy((caddr_t)ea->arp_sha,
448 				    (caddr_t)ea->arp_tha,
449 				    sizeof (ether_addr_t));
450 				bcopy((caddr_t)ea->arp_spa,
451 				    (caddr_t)ea->arp_tpa,
452 				    sizeof (struct in_addr));
453 				bcopy(mac_state.mac_addr_buf,
454 				    (caddr_t)ea->arp_sha,
455 				    mac_state.mac_addr_len);
456 				bcopy((caddr_t)&ip, (caddr_t)ea->arp_spa,
457 				    sizeof (struct in_addr));
458 				(void) prom_write(mac_state.mac_dev,
459 				    mac_state.mac_buf,
460 				    sizeof (struct arp_packet),
461 				    0, NETWORK);
462 				/* don't charge for ARP replies */
463 				timeout += reltime;
464 			}
465 		}
466 	} while (attempts < ETHER_INPUT_ATTEMPTS &&
467 #ifdef	DEBUG
468 		(now = prom_gettime()) < timeout);
469 #else
470 		prom_gettime() < timeout);
471 #endif	/* DEBUG */
472 
473 #ifdef	DEBUG
474 	printf("ether_input(%d): T/S/N/A/F/P/M: %d/%d/%d/%d/%d/%d/%d "
475 	    "T/O: %d < %d = %s\n", index, total_attempts, frames, no_data,
476 	    arps, failures, tot_pr, tot_pc, now, timeout,
477 	    (now < timeout) ? "TRUE" : "FALSE");
478 #endif	/* DEBUG */
479 	return (frames);
480 }
481 
482 /*
483  * Send out an ethernet datagram. We expect a IP frame appropriately fragmented
484  * at this level.
485  *
486  * Errno is set and -1 is returned if an error occurs. Number of bytes sent
487  * is returned on success.
488  */
489 /* ARGSUSED */
490 int
491 ether_output(int index, struct inetgram *ogp)
492 {
493 	int			header_len, result;
494 	struct ether_header	eh;
495 	struct ip		*ip;
496 	struct in_addr		tmpip, ipdst, netid;
497 	int			broadcast = FALSE;
498 	int			size;
499 	mblk_t			*mp;
500 
501 
502 #ifdef DEBUG
503 	printf("ether_output (%d): size %d\n", index,
504 	    ogp->igm_mp->b_wptr - ogp->igm_mp->b_rptr);
505 #endif
506 	if (!initialized)
507 		prom_panic("Ethernet device is not initialized.");
508 
509 	if (ogp->igm_level != MEDIA_LVL) {
510 		dprintf("ether_output: frame type wrong: socket: %d\n",
511 		    index * SOCKETTYPE);
512 		errno = EINVAL;
513 		return (-1);
514 	}
515 
516 	header_len = sizeof (struct ether_header);
517 	mp = ogp->igm_mp;
518 	size = mp->b_wptr - mp->b_rptr;
519 	if (size > mac_state.mac_mtu) {
520 		dprintf("ether_output: frame size too big: %d\n", size);
521 		errno = E2BIG;
522 		return (-1);
523 	}
524 
525 	size += header_len;
526 	ip = (struct ip *)(mp->b_rptr);
527 
528 	eh.ether_type = htons(ETHERTYPE_IP);
529 	bcopy(mac_state.mac_addr_buf, (caddr_t)&eh.ether_shost,
530 	    mac_state.mac_addr_len);
531 	bcopy((caddr_t)&ip->ip_dst, (caddr_t)&ipdst, sizeof (ipdst));
532 
533 	if (ipdst.s_addr == htonl(INADDR_BROADCAST))
534 		broadcast = TRUE; /* limited broadcast */
535 
536 	if (!broadcast) {
537 		struct in_addr mask;
538 
539 		ipv4_getnetid(&netid);
540 		ipv4_getnetmask(&mask);
541 		mask.s_addr = htonl(mask.s_addr);
542 		netid.s_addr = htonl(netid.s_addr);
543 
544 		/*
545 		 * check for all-hosts directed broadcast for
546 		 * to its own subnet.
547 		 */
548 		if (mask.s_addr != htonl(INADDR_BROADCAST) &&
549 		    (ipdst.s_addr & ~mask.s_addr) == 0 &&
550 		    (ipdst.s_addr & mask.s_addr) ==  netid.s_addr) {
551 			broadcast = TRUE; /* directed broadcast */
552 		} else {
553 			if (ogp->igm_router.s_addr != htonl(INADDR_ANY))
554 				tmpip.s_addr = ogp->igm_router.s_addr;
555 			else
556 				tmpip.s_addr = ipdst.s_addr;
557 
558 			result = mac_get_arp(&tmpip, (void *)&eh.ether_dhost,
559 			    sizeof (ether_addr_t), mac_state.mac_arp_timeout);
560 			if (!result) {
561 				errno = ETIMEDOUT;
562 				dprintf("ether_output: ARP request for %s "
563 				    "timed out.\n", inet_ntoa(tmpip));
564 				return (-1);
565 			}
566 		}
567 	}
568 
569 	if (broadcast) {
570 		bcopy((caddr_t)etherbroadcastaddr,
571 		    (caddr_t)&eh.ether_dhost, sizeof (ether_addr_t));
572 	}
573 
574 	/* add the ethernet header */
575 	mp->b_rptr -= sizeof (eh);
576 	bcopy((caddr_t)&eh, mp->b_rptr, sizeof (eh));
577 #ifdef	DEBUG
578 	printf("ether_output(%d): level(%d) frame(0x%x) len(%d)\n",
579 	    index, ogp->igm_level, mp->b_rptr, size);
580 #if DEBUG > 1
581 	printf("Dump ethernet (%d): \n", size);
582 	hexdump((char *)mp->b_rptr, size);
583 	printf("\n");
584 #endif /* DEBUG > 1 */
585 #endif	/* DEBUG */
586 	return (prom_write(mac_state.mac_dev, (char *)mp->b_rptr, size,
587 	    0, NETWORK));
588 }
589