xref: /illumos-gate/usr/src/uts/common/io/mac/mac_protect.c (revision ce68629858b847885cda69bf09057ab27980fe8d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2015, Joyent, Inc.  All rights reserved.
25  */
26 /*
27  * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
28  */
29 
30 #include <sys/strsun.h>
31 #include <sys/sdt.h>
32 #include <sys/mac.h>
33 #include <sys/mac_impl.h>
34 #include <sys/mac_client_impl.h>
35 #include <sys/mac_client_priv.h>
36 #include <sys/ethernet.h>
37 #include <sys/vlan.h>
38 #include <sys/dlpi.h>
39 #include <sys/avl.h>
40 #include <inet/ip.h>
41 #include <inet/ip6.h>
42 #include <inet/arp.h>
43 #include <netinet/arp.h>
44 #include <netinet/udp.h>
45 #include <netinet/dhcp.h>
46 #include <netinet/dhcp6.h>
47 
48 /*
49  * Implementation overview for DHCP address detection
50  *
51  * The purpose of DHCP address detection is to relieve the user of having to
52  * manually configure static IP addresses when ip-nospoof protection is turned
53  * on. To achieve this, the mac layer needs to intercept DHCP packets to
54  * determine the assigned IP addresses.
55  *
56  * A DHCP handshake between client and server typically requires at least
57  * 4 messages:
58  *
59  * 1. DISCOVER - client attempts to locate DHCP servers via a
60  *               broadcast message to its subnet.
61  * 2. OFFER    - server responds to client with an IP address and
62  *               other parameters.
63  * 3. REQUEST  - client requests the offered address.
64  * 4. ACK      - server verifies that the requested address matches
65  *               the one it offered.
66  *
67  * DHCPv6 behaves pretty much the same way aside from different message names.
68  *
69  * Address information is embedded in either the OFFER or REQUEST message.
70  * We chose to intercept REQUEST because this is at the last part of the
71  * handshake and it indicates that the client intends to keep the address.
72  * Intercepting OFFERs is unreliable because the client may receive multiple
73  * offers from different servers, and we can't tell which address the client
74  * will keep.
75  *
76  * Each DHCP message has a transaction ID. We use this transaction ID to match
77  * REQUESTs with ACKs received from servers.
78  *
79  * For IPv4, the process to acquire a DHCP-assigned address is as follows:
80  *
81  * 1. Client sends REQUEST. a new dhcpv4_txn_t object is created and inserted
82  *    in the the mci_v4_pending_txn table (keyed by xid). This object represents
83  *    a new transaction. It contains the xid, the client ID and requested IP
84  *    address.
85  *
86  * 2. Server responds with an ACK. The xid from this ACK is used to lookup the
87  *    pending transaction from the mci_v4_pending_txn table. Once the object is
88  *    found, it is removed from the pending table and inserted into the
89  *    completed table (mci_v4_completed_txn, keyed by client ID) and the dynamic
90  *    IP table (mci_v4_dyn_ip, keyed by IP address).
91  *
92  * 3. An outgoing packet that goes through the ip-nospoof path will be checked
93  *    against the dynamic IP table. Packets that have the assigned DHCP address
94  *    as the source IP address will pass the check and be admitted onto the
95  *    network.
96  *
97  * IPv4 notes:
98  *
99  * If the server never responds with an ACK, there is a timer that is set after
100  * the insertion of the transaction into the pending table. When the timer
101  * fires, it will check whether the transaction is old (by comparing current
102  * time and the txn's timestamp), if so the transaction will be freed. along
103  * with this, any transaction in the completed/dyn-ip tables matching the client
104  * ID of this stale transaction will also be freed. If the client fails to
105  * extend a lease, we want to stop the client from using any IP addresses that
106  * were granted previously.
107  *
108  * A RELEASE message from the client will not cause a transaction to be created.
109  * The client ID in the RELEASE message will be used for finding and removing
110  * transactions in the completed and dyn-ip tables.
111  *
112  *
113  * For IPv6, the process to acquire a DHCPv6-assigned address is as follows:
114  *
115  * 1. Client sends REQUEST. The DUID is extracted and stored into a dhcpv6_cid_t
116  *    structure. A new transaction structure (dhcpv6_txn_t) is also created and
117  *    it will point to the dhcpv6_cid_t. If an existing transaction with a
118  *    matching xid is not found, this dhcpv6_txn_t will be inserted into the
119  *    mci_v6_pending_txn table (keyed by xid).
120  *
121  * 2. Server responds with a REPLY. If a pending transaction is found, the
122  *    addresses in the reply will be placed into the dhcpv6_cid_t pointed to by
123  *    the transaction. The dhcpv6_cid_t will then be moved to the mci_v6_cid
124  *    table (keyed by cid). The associated addresses will be added to the
125  *    mci_v6_dyn_ip table (while still being pointed to by the dhcpv6_cid_t).
126  *
127  * 3. IPv6 ip-nospoof will now check mci_v6_dyn_ip for matching packets.
128  *    Packets with a source address matching one of the DHCPv6-assigned
129  *    addresses will be allowed through.
130  *
131  * IPv6 notes:
132  *
133  * The v6 code shares the same timer as v4 for scrubbing stale transactions.
134  * Just like v4, as part of removing an expired transaction, a RELEASE will be
135  * be triggered on the cid associated with the expired transaction.
136  *
137  * The data structures used for v6 are slightly different because a v6 client
138  * may have multiple addresses associated with it.
139  */
140 
141 /*
142  * These are just arbitrary limits meant for preventing abuse (e.g. a user
143  * flooding the network with bogus transactions). They are not meant to be
144  * user-modifiable so they are not exposed as linkprops.
145  */
146 static ulong_t	dhcp_max_pending_txn = 512;
147 static ulong_t	dhcp_max_completed_txn = 512;
148 static hrtime_t	txn_cleanup_interval = 60 * NANOSEC;
149 
150 /*
151  * DHCPv4 transaction. It may be added to three different tables
152  * (keyed by different fields).
153  */
154 typedef struct dhcpv4_txn {
155 	uint32_t		dt_xid;
156 	hrtime_t		dt_timestamp;
157 	uint8_t			dt_cid[DHCP_MAX_OPT_SIZE];
158 	uint8_t			dt_cid_len;
159 	ipaddr_t		dt_ipaddr;
160 	avl_node_t		dt_node;
161 	avl_node_t		dt_ipnode;
162 	struct dhcpv4_txn	*dt_next;
163 } dhcpv4_txn_t;
164 
165 /*
166  * DHCPv6 address. May be added to mci_v6_dyn_ip.
167  * It is always pointed to by its parent dhcpv6_cid_t structure.
168  */
169 typedef struct dhcpv6_addr {
170 	in6_addr_t		da_addr;
171 	avl_node_t		da_node;
172 	struct dhcpv6_addr	*da_next;
173 } dhcpv6_addr_t;
174 
175 /*
176  * DHCPv6 client ID. May be added to mci_v6_cid.
177  * No dhcpv6_txn_t should be pointing to it after it is added to mci_v6_cid.
178  */
179 typedef struct dhcpv6_cid {
180 	uchar_t			*dc_cid;
181 	uint_t			dc_cid_len;
182 	dhcpv6_addr_t		*dc_addr;
183 	uint_t			dc_addrcnt;
184 	avl_node_t		dc_node;
185 } dhcpv6_cid_t;
186 
187 /*
188  * DHCPv6 transaction. Unlike its v4 counterpart, this object gets freed up
189  * as soon as the transaction completes or expires.
190  */
191 typedef struct dhcpv6_txn {
192 	uint32_t		dt_xid;
193 	hrtime_t		dt_timestamp;
194 	dhcpv6_cid_t		*dt_cid;
195 	avl_node_t		dt_node;
196 	struct dhcpv6_txn	*dt_next;
197 } dhcpv6_txn_t;
198 
199 static void	start_txn_cleanup_timer(mac_client_impl_t *);
200 static boolean_t allowed_ips_set(mac_resource_props_t *, uint32_t);
201 
202 #define	BUMP_STAT(m, s)	(m)->mci_misc_stat.mms_##s++
203 
204 /*
205  * Comparison functions for the 3 AVL trees used:
206  * mci_v4_pending_txn, mci_v4_completed_txn, mci_v4_dyn_ip
207  */
208 static int
209 compare_dhcpv4_xid(const void *arg1, const void *arg2)
210 {
211 	const dhcpv4_txn_t	*txn1 = arg1, *txn2 = arg2;
212 
213 	if (txn1->dt_xid < txn2->dt_xid)
214 		return (-1);
215 	else if (txn1->dt_xid > txn2->dt_xid)
216 		return (1);
217 	else
218 		return (0);
219 }
220 
221 static int
222 compare_dhcpv4_cid(const void *arg1, const void *arg2)
223 {
224 	const dhcpv4_txn_t	*txn1 = arg1, *txn2 = arg2;
225 	int			ret;
226 
227 	if (txn1->dt_cid_len < txn2->dt_cid_len)
228 		return (-1);
229 	else if (txn1->dt_cid_len > txn2->dt_cid_len)
230 		return (1);
231 
232 	if (txn1->dt_cid_len == 0)
233 		return (0);
234 
235 	ret = memcmp(txn1->dt_cid, txn2->dt_cid, txn1->dt_cid_len);
236 	if (ret < 0)
237 		return (-1);
238 	else if (ret > 0)
239 		return (1);
240 	else
241 		return (0);
242 }
243 
244 static int
245 compare_dhcpv4_ip(const void *arg1, const void *arg2)
246 {
247 	const dhcpv4_txn_t	*txn1 = arg1, *txn2 = arg2;
248 
249 	if (txn1->dt_ipaddr < txn2->dt_ipaddr)
250 		return (-1);
251 	else if (txn1->dt_ipaddr > txn2->dt_ipaddr)
252 		return (1);
253 	else
254 		return (0);
255 }
256 
257 /*
258  * Find the specified DHCPv4 option.
259  */
260 static int
261 get_dhcpv4_option(struct dhcp *dh4, uchar_t *end, uint8_t type,
262     uchar_t **opt, uint8_t *opt_len)
263 {
264 	uchar_t		*start = (uchar_t *)dh4->options;
265 	uint8_t		otype, olen;
266 
267 	while (start < end) {
268 		if (*start == CD_PAD) {
269 			start++;
270 			continue;
271 		}
272 		if (*start == CD_END)
273 			break;
274 
275 		otype = *start++;
276 		olen = *start++;
277 		if (otype == type && olen > 0) {
278 			*opt = start;
279 			*opt_len = olen;
280 			return (0);
281 		}
282 		start += olen;
283 	}
284 	return (ENOENT);
285 }
286 
287 /*
288  * Locate the start of a DHCPv4 header.
289  * The possible return values and associated meanings are:
290  * 0      - packet is DHCP and has a DHCP header.
291  * EINVAL - packet is not DHCP. the recommended action is to let it pass.
292  * ENOSPC - packet is a initial fragment that is DHCP or is unidentifiable.
293  *          the recommended action is to drop it.
294  */
295 static int
296 get_dhcpv4_info(ipha_t *ipha, uchar_t *end, struct dhcp **dh4)
297 {
298 	uint16_t	offset_and_flags, client, server;
299 	boolean_t	first_frag = B_FALSE;
300 	struct udphdr	*udph;
301 	uchar_t		*dh;
302 
303 	if (ipha->ipha_protocol != IPPROTO_UDP)
304 		return (EINVAL);
305 
306 	offset_and_flags = ntohs(ipha->ipha_fragment_offset_and_flags);
307 	if ((offset_and_flags & (IPH_MF | IPH_OFFSET)) != 0) {
308 		/*
309 		 * All non-initial fragments may pass because we cannot
310 		 * identify their type. It's safe to let them through
311 		 * because reassembly will fail if we decide to drop the
312 		 * initial fragment.
313 		 */
314 		if (((offset_and_flags << 3) & 0xffff) != 0)
315 			return (EINVAL);
316 		first_frag = B_TRUE;
317 	}
318 	/* drop packets without a udp header */
319 	udph = (struct udphdr *)((uchar_t *)ipha + IPH_HDR_LENGTH(ipha));
320 	if ((uchar_t *)&udph[1] > end)
321 		return (ENOSPC);
322 
323 	client = htons(IPPORT_BOOTPC);
324 	server = htons(IPPORT_BOOTPS);
325 	if (udph->uh_sport != client && udph->uh_sport != server &&
326 	    udph->uh_dport != client && udph->uh_dport != server)
327 		return (EINVAL);
328 
329 	/* drop dhcp fragments */
330 	if (first_frag)
331 		return (ENOSPC);
332 
333 	dh = (uchar_t *)&udph[1];
334 	if (dh + BASE_PKT_SIZE > end)
335 		return (EINVAL);
336 
337 	*dh4 = (struct dhcp *)dh;
338 	return (0);
339 }
340 
341 /*
342  * Wrappers for accesses to avl trees to improve readability.
343  * Their purposes are fairly self-explanatory.
344  */
345 static dhcpv4_txn_t *
346 find_dhcpv4_pending_txn(mac_client_impl_t *mcip, uint32_t xid)
347 {
348 	dhcpv4_txn_t	tmp_txn;
349 
350 	ASSERT(MUTEX_HELD(&mcip->mci_protect_lock));
351 	tmp_txn.dt_xid = xid;
352 	return (avl_find(&mcip->mci_v4_pending_txn, &tmp_txn, NULL));
353 }
354 
355 static int
356 insert_dhcpv4_pending_txn(mac_client_impl_t *mcip, dhcpv4_txn_t *txn)
357 {
358 	avl_index_t	where;
359 
360 	ASSERT(MUTEX_HELD(&mcip->mci_protect_lock));
361 	if (avl_find(&mcip->mci_v4_pending_txn, txn, &where) != NULL)
362 		return (EEXIST);
363 
364 	if (avl_numnodes(&mcip->mci_v4_pending_txn) >= dhcp_max_pending_txn) {
365 		BUMP_STAT(mcip, dhcpdropped);
366 		return (EAGAIN);
367 	}
368 	avl_insert(&mcip->mci_v4_pending_txn, txn, where);
369 	return (0);
370 }
371 
372 static void
373 remove_dhcpv4_pending_txn(mac_client_impl_t *mcip, dhcpv4_txn_t *txn)
374 {
375 	ASSERT(MUTEX_HELD(&mcip->mci_protect_lock));
376 	avl_remove(&mcip->mci_v4_pending_txn, txn);
377 }
378 
379 static dhcpv4_txn_t *
380 find_dhcpv4_completed_txn(mac_client_impl_t *mcip, uint8_t *cid,
381     uint8_t cid_len)
382 {
383 	dhcpv4_txn_t	tmp_txn;
384 
385 	ASSERT(MUTEX_HELD(&mcip->mci_protect_lock));
386 	if (cid_len > 0)
387 		bcopy(cid, tmp_txn.dt_cid, cid_len);
388 	tmp_txn.dt_cid_len = cid_len;
389 	return (avl_find(&mcip->mci_v4_completed_txn, &tmp_txn, NULL));
390 }
391 
392 /*
393  * After a pending txn is removed from the pending table, it is inserted
394  * into both the completed and dyn-ip tables. These two insertions are
395  * done together because a client ID must have 1:1 correspondence with
396  * an IP address and IP addresses must be unique in the dyn-ip table.
397  */
398 static int
399 insert_dhcpv4_completed_txn(mac_client_impl_t *mcip, dhcpv4_txn_t *txn)
400 {
401 	avl_index_t	where;
402 
403 	ASSERT(MUTEX_HELD(&mcip->mci_protect_lock));
404 	if (avl_find(&mcip->mci_v4_completed_txn, txn, &where) != NULL)
405 		return (EEXIST);
406 
407 	if (avl_numnodes(&mcip->mci_v4_completed_txn) >=
408 	    dhcp_max_completed_txn) {
409 		BUMP_STAT(mcip, dhcpdropped);
410 		return (EAGAIN);
411 	}
412 
413 	avl_insert(&mcip->mci_v4_completed_txn, txn, where);
414 	if (avl_find(&mcip->mci_v4_dyn_ip, txn, &where) != NULL) {
415 		avl_remove(&mcip->mci_v4_completed_txn, txn);
416 		return (EEXIST);
417 	}
418 	avl_insert(&mcip->mci_v4_dyn_ip, txn, where);
419 	return (0);
420 }
421 
422 static void
423 remove_dhcpv4_completed_txn(mac_client_impl_t *mcip, dhcpv4_txn_t *txn)
424 {
425 	dhcpv4_txn_t	*ctxn;
426 
427 	ASSERT(MUTEX_HELD(&mcip->mci_protect_lock));
428 	if ((ctxn = avl_find(&mcip->mci_v4_dyn_ip, txn, NULL)) != NULL &&
429 	    ctxn == txn)
430 		avl_remove(&mcip->mci_v4_dyn_ip, txn);
431 
432 	avl_remove(&mcip->mci_v4_completed_txn, txn);
433 }
434 
435 /*
436  * Check whether an IP address is in the dyn-ip table.
437  */
438 static boolean_t
439 check_dhcpv4_dyn_ip(mac_client_impl_t *mcip, ipaddr_t ipaddr)
440 {
441 	dhcpv4_txn_t	tmp_txn, *txn;
442 
443 	mutex_enter(&mcip->mci_protect_lock);
444 	tmp_txn.dt_ipaddr = ipaddr;
445 	txn = avl_find(&mcip->mci_v4_dyn_ip, &tmp_txn, NULL);
446 	mutex_exit(&mcip->mci_protect_lock);
447 	return (txn != NULL);
448 }
449 
450 /*
451  * Create/destroy a DHCPv4 transaction.
452  */
453 static dhcpv4_txn_t *
454 create_dhcpv4_txn(uint32_t xid, uint8_t *cid, uint8_t cid_len, ipaddr_t ipaddr)
455 {
456 	dhcpv4_txn_t	*txn;
457 
458 	if ((txn = kmem_zalloc(sizeof (*txn), KM_NOSLEEP)) == NULL)
459 		return (NULL);
460 
461 	txn->dt_xid = xid;
462 	txn->dt_timestamp = gethrtime();
463 	if (cid_len > 0)
464 		bcopy(cid, &txn->dt_cid, cid_len);
465 	txn->dt_cid_len = cid_len;
466 	txn->dt_ipaddr = ipaddr;
467 	return (txn);
468 }
469 
470 static void
471 free_dhcpv4_txn(dhcpv4_txn_t *txn)
472 {
473 	kmem_free(txn, sizeof (*txn));
474 }
475 
476 /*
477  * Clean up all v4 tables.
478  */
479 static void
480 flush_dhcpv4(mac_client_impl_t *mcip)
481 {
482 	void		*cookie = NULL;
483 	dhcpv4_txn_t	*txn;
484 
485 	ASSERT(MUTEX_HELD(&mcip->mci_protect_lock));
486 	while ((txn = avl_destroy_nodes(&mcip->mci_v4_dyn_ip,
487 	    &cookie)) != NULL) {
488 		/*
489 		 * No freeing needed here because the same txn exists
490 		 * in the mci_v4_completed_txn table as well.
491 		 */
492 	}
493 	cookie = NULL;
494 	while ((txn = avl_destroy_nodes(&mcip->mci_v4_completed_txn,
495 	    &cookie)) != NULL) {
496 		free_dhcpv4_txn(txn);
497 	}
498 	cookie = NULL;
499 	while ((txn = avl_destroy_nodes(&mcip->mci_v4_pending_txn,
500 	    &cookie)) != NULL) {
501 		free_dhcpv4_txn(txn);
502 	}
503 }
504 
505 /*
506  * Cleanup stale DHCPv4 transactions.
507  */
508 static void
509 txn_cleanup_v4(mac_client_impl_t *mcip)
510 {
511 	dhcpv4_txn_t		*txn, *ctxn, *next, *txn_list = NULL;
512 
513 	/*
514 	 * Find stale pending transactions and place them on a list
515 	 * to be removed.
516 	 */
517 	for (txn = avl_first(&mcip->mci_v4_pending_txn); txn != NULL;
518 	    txn = avl_walk(&mcip->mci_v4_pending_txn, txn, AVL_AFTER)) {
519 		if (gethrtime() - txn->dt_timestamp > txn_cleanup_interval) {
520 			DTRACE_PROBE2(found__expired__txn,
521 			    mac_client_impl_t *, mcip,
522 			    dhcpv4_txn_t *, txn);
523 
524 			txn->dt_next = txn_list;
525 			txn_list = txn;
526 		}
527 	}
528 
529 	/*
530 	 * Remove and free stale pending transactions and completed
531 	 * transactions with the same client IDs as the stale transactions.
532 	 */
533 	for (txn = txn_list; txn != NULL; txn = next) {
534 		avl_remove(&mcip->mci_v4_pending_txn, txn);
535 
536 		ctxn = find_dhcpv4_completed_txn(mcip, txn->dt_cid,
537 		    txn->dt_cid_len);
538 		if (ctxn != NULL) {
539 			DTRACE_PROBE2(removing__completed__txn,
540 			    mac_client_impl_t *, mcip,
541 			    dhcpv4_txn_t *, ctxn);
542 
543 			remove_dhcpv4_completed_txn(mcip, ctxn);
544 			free_dhcpv4_txn(ctxn);
545 		}
546 		next = txn->dt_next;
547 		txn->dt_next = NULL;
548 
549 		DTRACE_PROBE2(freeing__txn, mac_client_impl_t *, mcip,
550 		    dhcpv4_txn_t *, txn);
551 		free_dhcpv4_txn(txn);
552 	}
553 }
554 
555 /*
556  * Core logic for intercepting outbound DHCPv4 packets.
557  */
558 static boolean_t
559 intercept_dhcpv4_outbound(mac_client_impl_t *mcip, ipha_t *ipha, uchar_t *end)
560 {
561 	struct dhcp		*dh4;
562 	uchar_t			*opt;
563 	dhcpv4_txn_t		*txn, *ctxn;
564 	ipaddr_t		ipaddr;
565 	uint8_t			opt_len, mtype, cid[DHCP_MAX_OPT_SIZE], cid_len;
566 	mac_resource_props_t	*mrp = MCIP_RESOURCE_PROPS(mcip);
567 
568 	if (get_dhcpv4_info(ipha, end, &dh4) != 0)
569 		return (B_TRUE);
570 
571 	/* ip_nospoof/allowed-ips and DHCP are mutually exclusive by default */
572 	if (allowed_ips_set(mrp, IPV4_VERSION))
573 		return (B_FALSE);
574 
575 	if (get_dhcpv4_option(dh4, end, CD_DHCP_TYPE, &opt, &opt_len) != 0 ||
576 	    opt_len != 1) {
577 		DTRACE_PROBE2(mtype__not__found, mac_client_impl_t *, mcip,
578 		    struct dhcp *, dh4);
579 		return (B_TRUE);
580 	}
581 	mtype = *opt;
582 	if (mtype != REQUEST && mtype != RELEASE) {
583 		DTRACE_PROBE3(ignored__mtype, mac_client_impl_t *, mcip,
584 		    struct dhcp *, dh4, uint8_t, mtype);
585 		return (B_TRUE);
586 	}
587 
588 	/* client ID is optional for IPv4 */
589 	if (get_dhcpv4_option(dh4, end, CD_CLIENT_ID, &opt, &opt_len) == 0 &&
590 	    opt_len >= 2) {
591 		bcopy(opt, cid, opt_len);
592 		cid_len = opt_len;
593 	} else {
594 		bzero(cid, DHCP_MAX_OPT_SIZE);
595 		cid_len = 0;
596 	}
597 
598 	mutex_enter(&mcip->mci_protect_lock);
599 	if (mtype == RELEASE) {
600 		DTRACE_PROBE2(release, mac_client_impl_t *, mcip,
601 		    struct dhcp *, dh4);
602 
603 		/* flush any completed txn with this cid */
604 		ctxn = find_dhcpv4_completed_txn(mcip, cid, cid_len);
605 		if (ctxn != NULL) {
606 			DTRACE_PROBE2(release__successful, mac_client_impl_t *,
607 			    mcip, struct dhcp *, dh4);
608 
609 			remove_dhcpv4_completed_txn(mcip, ctxn);
610 			free_dhcpv4_txn(ctxn);
611 		}
612 		goto done;
613 	}
614 
615 	/*
616 	 * If a pending txn already exists, we'll update its timestamp so
617 	 * it won't get flushed by the timer. We don't need to create new
618 	 * txns for retransmissions.
619 	 */
620 	if ((txn = find_dhcpv4_pending_txn(mcip, dh4->xid)) != NULL) {
621 		DTRACE_PROBE2(update, mac_client_impl_t *, mcip,
622 		    dhcpv4_txn_t *, txn);
623 		txn->dt_timestamp = gethrtime();
624 		goto done;
625 	}
626 
627 	if (get_dhcpv4_option(dh4, end, CD_REQUESTED_IP_ADDR,
628 	    &opt, &opt_len) != 0 || opt_len != sizeof (ipaddr)) {
629 		DTRACE_PROBE2(ipaddr__not__found, mac_client_impl_t *, mcip,
630 		    struct dhcp *, dh4);
631 		goto done;
632 	}
633 	bcopy(opt, &ipaddr, sizeof (ipaddr));
634 	if ((txn = create_dhcpv4_txn(dh4->xid, cid, cid_len, ipaddr)) == NULL)
635 		goto done;
636 
637 	if (insert_dhcpv4_pending_txn(mcip, txn) != 0) {
638 		DTRACE_PROBE2(insert__failed, mac_client_impl_t *, mcip,
639 		    dhcpv4_txn_t *, txn);
640 		free_dhcpv4_txn(txn);
641 		goto done;
642 	}
643 	start_txn_cleanup_timer(mcip);
644 
645 	DTRACE_PROBE2(txn__pending, mac_client_impl_t *, mcip,
646 	    dhcpv4_txn_t *, txn);
647 
648 done:
649 	mutex_exit(&mcip->mci_protect_lock);
650 	return (B_TRUE);
651 }
652 
653 /*
654  * Core logic for intercepting inbound DHCPv4 packets.
655  */
656 static void
657 intercept_dhcpv4_inbound(mac_client_impl_t *mcip, ipha_t *ipha, uchar_t *end)
658 {
659 	uchar_t		*opt;
660 	struct dhcp	*dh4;
661 	dhcpv4_txn_t	*txn, *ctxn;
662 	uint8_t		opt_len, mtype;
663 
664 	if (get_dhcpv4_info(ipha, end, &dh4) != 0)
665 		return;
666 
667 	if (get_dhcpv4_option(dh4, end, CD_DHCP_TYPE, &opt, &opt_len) != 0 ||
668 	    opt_len != 1) {
669 		DTRACE_PROBE2(mtype__not__found, mac_client_impl_t *, mcip,
670 		    struct dhcp *, dh4);
671 		return;
672 	}
673 	mtype = *opt;
674 	if (mtype != ACK && mtype != NAK) {
675 		DTRACE_PROBE3(ignored__mtype, mac_client_impl_t *, mcip,
676 		    struct dhcp *, dh4, uint8_t, mtype);
677 		return;
678 	}
679 
680 	mutex_enter(&mcip->mci_protect_lock);
681 	if ((txn = find_dhcpv4_pending_txn(mcip, dh4->xid)) == NULL) {
682 		DTRACE_PROBE2(txn__not__found, mac_client_impl_t *, mcip,
683 		    struct dhcp *, dh4);
684 		goto done;
685 	}
686 	remove_dhcpv4_pending_txn(mcip, txn);
687 
688 	/*
689 	 * We're about to move a txn from the pending table to the completed/
690 	 * dyn-ip tables. If there is an existing completed txn with the
691 	 * same cid as our txn, we need to remove and free it.
692 	 */
693 	ctxn = find_dhcpv4_completed_txn(mcip, txn->dt_cid, txn->dt_cid_len);
694 	if (ctxn != NULL) {
695 		DTRACE_PROBE2(replacing__old__txn, mac_client_impl_t *, mcip,
696 		    dhcpv4_txn_t *, ctxn);
697 		remove_dhcpv4_completed_txn(mcip, ctxn);
698 		free_dhcpv4_txn(ctxn);
699 	}
700 	if (mtype == NAK) {
701 		DTRACE_PROBE2(nak__received, mac_client_impl_t *, mcip,
702 		    dhcpv4_txn_t *, txn);
703 		free_dhcpv4_txn(txn);
704 		goto done;
705 	}
706 	if (insert_dhcpv4_completed_txn(mcip, txn) != 0) {
707 		DTRACE_PROBE2(insert__failed, mac_client_impl_t *, mcip,
708 		    dhcpv4_txn_t *, txn);
709 		free_dhcpv4_txn(txn);
710 		goto done;
711 	}
712 	DTRACE_PROBE2(txn__completed, mac_client_impl_t *, mcip,
713 	    dhcpv4_txn_t *, txn);
714 
715 done:
716 	mutex_exit(&mcip->mci_protect_lock);
717 }
718 
719 
720 /*
721  * Comparison functions for the DHCPv6 AVL trees.
722  */
723 static int
724 compare_dhcpv6_xid(const void *arg1, const void *arg2)
725 {
726 	const dhcpv6_txn_t	*txn1 = arg1, *txn2 = arg2;
727 
728 	if (txn1->dt_xid < txn2->dt_xid)
729 		return (-1);
730 	else if (txn1->dt_xid > txn2->dt_xid)
731 		return (1);
732 	else
733 		return (0);
734 }
735 
736 static int
737 compare_dhcpv6_ip(const void *arg1, const void *arg2)
738 {
739 	const dhcpv6_addr_t	*ip1 = arg1, *ip2 = arg2;
740 	int			ret;
741 
742 	ret = memcmp(&ip1->da_addr, &ip2->da_addr, sizeof (in6_addr_t));
743 	if (ret < 0)
744 		return (-1);
745 	else if (ret > 0)
746 		return (1);
747 	else
748 		return (0);
749 }
750 
751 static int
752 compare_dhcpv6_cid(const void *arg1, const void *arg2)
753 {
754 	const dhcpv6_cid_t	*cid1 = arg1, *cid2 = arg2;
755 	int			ret;
756 
757 	if (cid1->dc_cid_len < cid2->dc_cid_len)
758 		return (-1);
759 	else if (cid1->dc_cid_len > cid2->dc_cid_len)
760 		return (1);
761 
762 	if (cid1->dc_cid_len == 0)
763 		return (0);
764 
765 	ret = memcmp(cid1->dc_cid, cid2->dc_cid, cid1->dc_cid_len);
766 	if (ret < 0)
767 		return (-1);
768 	else if (ret > 0)
769 		return (1);
770 	else
771 		return (0);
772 }
773 
774 /*
775  * Locate the start of a DHCPv6 header.
776  * The possible return values and associated meanings are:
777  * 0      - packet is DHCP and has a DHCP header.
778  * EINVAL - packet is not DHCP. the recommended action is to let it pass.
779  * ENOSPC - packet is a initial fragment that is DHCP or is unidentifiable.
780  *          the recommended action is to drop it.
781  */
782 static int
783 get_dhcpv6_info(ip6_t *ip6h, uchar_t *end, dhcpv6_message_t **dh6)
784 {
785 	uint16_t	hdrlen, client, server;
786 	boolean_t	first_frag = B_FALSE;
787 	ip6_frag_t	*frag = NULL;
788 	uint8_t		proto;
789 	struct udphdr	*udph;
790 	uchar_t		*dh;
791 
792 	if (!mac_ip_hdr_length_v6(ip6h, end, &hdrlen, &proto, &frag))
793 		return (ENOSPC);
794 
795 	if (proto != IPPROTO_UDP)
796 		return (EINVAL);
797 
798 	if (frag != NULL) {
799 		/*
800 		 * All non-initial fragments may pass because we cannot
801 		 * identify their type. It's safe to let them through
802 		 * because reassembly will fail if we decide to drop the
803 		 * initial fragment.
804 		 */
805 		if ((ntohs(frag->ip6f_offlg) & ~7) != 0)
806 			return (EINVAL);
807 		first_frag = B_TRUE;
808 	}
809 	/* drop packets without a udp header */
810 	udph = (struct udphdr *)((uchar_t *)ip6h + hdrlen);
811 	if ((uchar_t *)&udph[1] > end)
812 		return (ENOSPC);
813 
814 	client = htons(IPPORT_DHCPV6C);
815 	server = htons(IPPORT_DHCPV6S);
816 	if (udph->uh_sport != client && udph->uh_sport != server &&
817 	    udph->uh_dport != client && udph->uh_dport != server)
818 		return (EINVAL);
819 
820 	/* drop dhcp fragments */
821 	if (first_frag)
822 		return (ENOSPC);
823 
824 	dh = (uchar_t *)&udph[1];
825 	if (dh + sizeof (dhcpv6_message_t) > end)
826 		return (EINVAL);
827 
828 	*dh6 = (dhcpv6_message_t *)dh;
829 	return (0);
830 }
831 
832 /*
833  * Find the specified DHCPv6 option.
834  */
835 static dhcpv6_option_t *
836 get_dhcpv6_option(void *buf, size_t buflen, dhcpv6_option_t *oldopt,
837     uint16_t codenum, uint_t *retlenp)
838 {
839 	uchar_t		*bp;
840 	dhcpv6_option_t	d6o;
841 	uint_t		olen;
842 
843 	codenum = htons(codenum);
844 	bp = buf;
845 	while (buflen >= sizeof (dhcpv6_option_t)) {
846 		bcopy(bp, &d6o, sizeof (d6o));
847 		olen = ntohs(d6o.d6o_len) + sizeof (d6o);
848 		if (olen > buflen)
849 			break;
850 		if (d6o.d6o_code != codenum || d6o.d6o_len == 0 ||
851 		    (oldopt != NULL && bp <= (uchar_t *)oldopt)) {
852 			bp += olen;
853 			buflen -= olen;
854 			continue;
855 		}
856 		if (retlenp != NULL)
857 			*retlenp = olen;
858 		/* LINTED : alignment */
859 		return ((dhcpv6_option_t *)bp);
860 	}
861 	return (NULL);
862 }
863 
864 /*
865  * Get the status code from a reply message.
866  */
867 static int
868 get_dhcpv6_status(dhcpv6_message_t *dh6, uchar_t *end, uint16_t *status)
869 {
870 	dhcpv6_option_t	*d6o;
871 	uint_t		olen;
872 	uint16_t	s;
873 
874 	d6o = get_dhcpv6_option(&dh6[1], end - (uchar_t *)&dh6[1], NULL,
875 	    DHCPV6_OPT_STATUS_CODE, &olen);
876 
877 	/* Success is implied if status code is missing */
878 	if (d6o == NULL) {
879 		*status = DHCPV6_STAT_SUCCESS;
880 		return (0);
881 	}
882 	if ((uchar_t *)d6o + olen > end)
883 		return (EINVAL);
884 
885 	olen -= sizeof (*d6o);
886 	if (olen < sizeof (s))
887 		return (EINVAL);
888 
889 	bcopy(&d6o[1], &s, sizeof (s));
890 	*status = ntohs(s);
891 	return (0);
892 }
893 
894 /*
895  * Get the addresses from a reply message.
896  */
897 static int
898 get_dhcpv6_addrs(dhcpv6_message_t *dh6, uchar_t *end, dhcpv6_cid_t *cid)
899 {
900 	dhcpv6_option_t		*d6o;
901 	dhcpv6_addr_t		*next;
902 	uint_t			olen;
903 
904 	d6o = NULL;
905 	while ((d6o = get_dhcpv6_option(&dh6[1], end - (uchar_t *)&dh6[1],
906 	    d6o, DHCPV6_OPT_IA_NA, &olen)) != NULL) {
907 		dhcpv6_option_t		*d6so;
908 		dhcpv6_iaaddr_t		d6ia;
909 		dhcpv6_addr_t		**addrp;
910 		uchar_t			*obase;
911 		uint_t			solen;
912 
913 		if (olen < sizeof (dhcpv6_ia_na_t) ||
914 		    (uchar_t *)d6o + olen > end)
915 			goto fail;
916 
917 		obase = (uchar_t *)d6o + sizeof (dhcpv6_ia_na_t);
918 		olen -= sizeof (dhcpv6_ia_na_t);
919 		d6so = NULL;
920 		while ((d6so = get_dhcpv6_option(obase, olen, d6so,
921 		    DHCPV6_OPT_IAADDR, &solen)) != NULL) {
922 			if (solen < sizeof (dhcpv6_iaaddr_t) ||
923 			    (uchar_t *)d6so + solen > end)
924 				goto fail;
925 
926 			bcopy(d6so, &d6ia, sizeof (d6ia));
927 			for (addrp = &cid->dc_addr; *addrp != NULL;
928 			    addrp = &(*addrp)->da_next) {
929 				if (bcmp(&(*addrp)->da_addr, &d6ia.d6ia_addr,
930 				    sizeof (in6_addr_t)) == 0)
931 					goto fail;
932 			}
933 			if ((*addrp = kmem_zalloc(sizeof (dhcpv6_addr_t),
934 			    KM_NOSLEEP)) == NULL)
935 				goto fail;
936 
937 			bcopy(&d6ia.d6ia_addr, &(*addrp)->da_addr,
938 			    sizeof (in6_addr_t));
939 			cid->dc_addrcnt++;
940 		}
941 	}
942 	if (cid->dc_addrcnt == 0)
943 		return (ENOENT);
944 
945 	return (0);
946 
947 fail:
948 	for (; cid->dc_addr != NULL; cid->dc_addr = next) {
949 		next = cid->dc_addr->da_next;
950 		kmem_free(cid->dc_addr, sizeof (dhcpv6_addr_t));
951 		cid->dc_addrcnt--;
952 	}
953 	ASSERT(cid->dc_addrcnt == 0);
954 	return (EINVAL);
955 }
956 
957 /*
958  * Free a cid.
959  * Before this gets called the caller must ensure that all the
960  * addresses are removed from the mci_v6_dyn_ip table.
961  */
962 static void
963 free_dhcpv6_cid(dhcpv6_cid_t *cid)
964 {
965 	dhcpv6_addr_t	*addr, *next;
966 	uint_t		cnt = 0;
967 
968 	kmem_free(cid->dc_cid, cid->dc_cid_len);
969 	for (addr = cid->dc_addr; addr != NULL; addr = next) {
970 		next = addr->da_next;
971 		kmem_free(addr, sizeof (*addr));
972 		cnt++;
973 	}
974 	ASSERT(cnt == cid->dc_addrcnt);
975 	kmem_free(cid, sizeof (*cid));
976 }
977 
978 /*
979  * Extract the DUID from a message. The associated addresses will be
980  * extracted later from the reply message.
981  */
982 static dhcpv6_cid_t *
983 create_dhcpv6_cid(dhcpv6_message_t *dh6, uchar_t *end)
984 {
985 	dhcpv6_option_t		*d6o;
986 	dhcpv6_cid_t		*cid;
987 	uchar_t			*rawcid;
988 	uint_t			olen, rawcidlen;
989 
990 	d6o = get_dhcpv6_option(&dh6[1], end - (uchar_t *)&dh6[1], NULL,
991 	    DHCPV6_OPT_CLIENTID, &olen);
992 	if (d6o == NULL || (uchar_t *)d6o + olen > end)
993 		return (NULL);
994 
995 	rawcidlen = olen - sizeof (*d6o);
996 	if ((rawcid = kmem_zalloc(rawcidlen, KM_NOSLEEP)) == NULL)
997 		return (NULL);
998 	bcopy(d6o + 1, rawcid, rawcidlen);
999 
1000 	if ((cid = kmem_zalloc(sizeof (*cid), KM_NOSLEEP)) == NULL) {
1001 		kmem_free(rawcid, rawcidlen);
1002 		return (NULL);
1003 	}
1004 	cid->dc_cid = rawcid;
1005 	cid->dc_cid_len = rawcidlen;
1006 	return (cid);
1007 }
1008 
1009 /*
1010  * Remove a cid from mci_v6_cid. The addresses owned by the cid
1011  * are also removed from mci_v6_dyn_ip.
1012  */
1013 static void
1014 remove_dhcpv6_cid(mac_client_impl_t *mcip, dhcpv6_cid_t *cid)
1015 {
1016 	dhcpv6_addr_t	*addr, *tmp_addr;
1017 
1018 	ASSERT(MUTEX_HELD(&mcip->mci_protect_lock));
1019 	avl_remove(&mcip->mci_v6_cid, cid);
1020 	for (addr = cid->dc_addr; addr != NULL; addr = addr->da_next) {
1021 		tmp_addr = avl_find(&mcip->mci_v6_dyn_ip, addr, NULL);
1022 		if (tmp_addr == addr)
1023 			avl_remove(&mcip->mci_v6_dyn_ip, addr);
1024 	}
1025 }
1026 
1027 /*
1028  * Find and remove a matching cid and associated addresses from
1029  * their respective tables.
1030  */
1031 static void
1032 release_dhcpv6_cid(mac_client_impl_t *mcip, dhcpv6_cid_t *cid)
1033 {
1034 	dhcpv6_cid_t	*oldcid;
1035 
1036 	ASSERT(MUTEX_HELD(&mcip->mci_protect_lock));
1037 	if ((oldcid = avl_find(&mcip->mci_v6_cid, cid, NULL)) == NULL)
1038 		return;
1039 
1040 	/*
1041 	 * Since cid belongs to a pending txn, it can't possibly be in
1042 	 * mci_v6_cid. Anything that's found must be an existing cid.
1043 	 */
1044 	ASSERT(oldcid != cid);
1045 	remove_dhcpv6_cid(mcip, oldcid);
1046 	free_dhcpv6_cid(oldcid);
1047 }
1048 
1049 /*
1050  * Insert cid into mci_v6_cid.
1051  */
1052 static int
1053 insert_dhcpv6_cid(mac_client_impl_t *mcip, dhcpv6_cid_t *cid)
1054 {
1055 	avl_index_t	where;
1056 	dhcpv6_addr_t	*addr;
1057 
1058 	ASSERT(MUTEX_HELD(&mcip->mci_protect_lock));
1059 	if (avl_find(&mcip->mci_v6_cid, cid, &where) != NULL)
1060 		return (EEXIST);
1061 
1062 	if (avl_numnodes(&mcip->mci_v6_cid) >= dhcp_max_completed_txn) {
1063 		BUMP_STAT(mcip, dhcpdropped);
1064 		return (EAGAIN);
1065 	}
1066 	avl_insert(&mcip->mci_v6_cid, cid, where);
1067 	for (addr = cid->dc_addr; addr != NULL; addr = addr->da_next) {
1068 		if (avl_find(&mcip->mci_v6_dyn_ip, addr, &where) != NULL)
1069 			goto fail;
1070 
1071 		avl_insert(&mcip->mci_v6_dyn_ip, addr, where);
1072 	}
1073 	return (0);
1074 
1075 fail:
1076 	remove_dhcpv6_cid(mcip, cid);
1077 	return (EEXIST);
1078 }
1079 
1080 /*
1081  * Check whether an IP address is in the dyn-ip table.
1082  */
1083 static boolean_t
1084 check_dhcpv6_dyn_ip(mac_client_impl_t *mcip, in6_addr_t *addr)
1085 {
1086 	dhcpv6_addr_t	tmp_addr, *a;
1087 
1088 	mutex_enter(&mcip->mci_protect_lock);
1089 	bcopy(addr, &tmp_addr.da_addr, sizeof (in6_addr_t));
1090 	a = avl_find(&mcip->mci_v6_dyn_ip, &tmp_addr, NULL);
1091 	mutex_exit(&mcip->mci_protect_lock);
1092 	return (a != NULL);
1093 }
1094 
1095 static dhcpv6_txn_t *
1096 find_dhcpv6_pending_txn(mac_client_impl_t *mcip, uint32_t xid)
1097 {
1098 	dhcpv6_txn_t	tmp_txn;
1099 
1100 	ASSERT(MUTEX_HELD(&mcip->mci_protect_lock));
1101 	tmp_txn.dt_xid = xid;
1102 	return (avl_find(&mcip->mci_v6_pending_txn, &tmp_txn, NULL));
1103 }
1104 
1105 static void
1106 remove_dhcpv6_pending_txn(mac_client_impl_t *mcip, dhcpv6_txn_t *txn)
1107 {
1108 	ASSERT(MUTEX_HELD(&mcip->mci_protect_lock));
1109 	avl_remove(&mcip->mci_v6_pending_txn, txn);
1110 }
1111 
1112 static dhcpv6_txn_t *
1113 create_dhcpv6_txn(uint32_t xid, dhcpv6_cid_t *cid)
1114 {
1115 	dhcpv6_txn_t	*txn;
1116 
1117 	if ((txn = kmem_zalloc(sizeof (dhcpv6_txn_t), KM_NOSLEEP)) == NULL)
1118 		return (NULL);
1119 
1120 	txn->dt_xid = xid;
1121 	txn->dt_cid = cid;
1122 	txn->dt_timestamp = gethrtime();
1123 	return (txn);
1124 }
1125 
1126 static void
1127 free_dhcpv6_txn(dhcpv6_txn_t *txn)
1128 {
1129 	if (txn->dt_cid != NULL)
1130 		free_dhcpv6_cid(txn->dt_cid);
1131 	kmem_free(txn, sizeof (dhcpv6_txn_t));
1132 }
1133 
1134 static int
1135 insert_dhcpv6_pending_txn(mac_client_impl_t *mcip, dhcpv6_txn_t *txn)
1136 {
1137 	avl_index_t	where;
1138 
1139 	ASSERT(MUTEX_HELD(&mcip->mci_protect_lock));
1140 	if (avl_find(&mcip->mci_v6_pending_txn, txn, &where) != NULL)
1141 		return (EEXIST);
1142 
1143 	if (avl_numnodes(&mcip->mci_v6_pending_txn) >= dhcp_max_pending_txn) {
1144 		BUMP_STAT(mcip, dhcpdropped);
1145 		return (EAGAIN);
1146 	}
1147 	avl_insert(&mcip->mci_v6_pending_txn, txn, where);
1148 	return (0);
1149 }
1150 
1151 /*
1152  * Clean up all v6 tables.
1153  */
1154 static void
1155 flush_dhcpv6(mac_client_impl_t *mcip)
1156 {
1157 	void		*cookie = NULL;
1158 	dhcpv6_cid_t	*cid;
1159 	dhcpv6_txn_t	*txn;
1160 
1161 	ASSERT(MUTEX_HELD(&mcip->mci_protect_lock));
1162 	while (avl_destroy_nodes(&mcip->mci_v6_dyn_ip, &cookie) != NULL) {
1163 	}
1164 	cookie = NULL;
1165 	while ((cid = avl_destroy_nodes(&mcip->mci_v6_cid, &cookie)) != NULL) {
1166 		free_dhcpv6_cid(cid);
1167 	}
1168 	cookie = NULL;
1169 	while ((txn = avl_destroy_nodes(&mcip->mci_v6_pending_txn,
1170 	    &cookie)) != NULL) {
1171 		free_dhcpv6_txn(txn);
1172 	}
1173 }
1174 
1175 /*
1176  * Cleanup stale DHCPv6 transactions.
1177  */
1178 static void
1179 txn_cleanup_v6(mac_client_impl_t *mcip)
1180 {
1181 	dhcpv6_txn_t		*txn, *next, *txn_list = NULL;
1182 
1183 	/*
1184 	 * Find stale pending transactions and place them on a list
1185 	 * to be removed.
1186 	 */
1187 	for (txn = avl_first(&mcip->mci_v6_pending_txn); txn != NULL;
1188 	    txn = avl_walk(&mcip->mci_v6_pending_txn, txn, AVL_AFTER)) {
1189 		if (gethrtime() - txn->dt_timestamp > txn_cleanup_interval) {
1190 			DTRACE_PROBE2(found__expired__txn,
1191 			    mac_client_impl_t *, mcip,
1192 			    dhcpv6_txn_t *, txn);
1193 
1194 			txn->dt_next = txn_list;
1195 			txn_list = txn;
1196 		}
1197 	}
1198 
1199 	/*
1200 	 * Remove and free stale pending transactions.
1201 	 * Release any existing cids matching the stale transactions.
1202 	 */
1203 	for (txn = txn_list; txn != NULL; txn = next) {
1204 		avl_remove(&mcip->mci_v6_pending_txn, txn);
1205 		release_dhcpv6_cid(mcip, txn->dt_cid);
1206 		next = txn->dt_next;
1207 		txn->dt_next = NULL;
1208 
1209 		DTRACE_PROBE2(freeing__txn, mac_client_impl_t *, mcip,
1210 		    dhcpv6_txn_t *, txn);
1211 		free_dhcpv6_txn(txn);
1212 	}
1213 
1214 }
1215 
1216 /*
1217  * Core logic for intercepting outbound DHCPv6 packets.
1218  */
1219 static boolean_t
1220 intercept_dhcpv6_outbound(mac_client_impl_t *mcip, ip6_t *ip6h, uchar_t *end)
1221 {
1222 	dhcpv6_message_t	*dh6;
1223 	dhcpv6_txn_t		*txn;
1224 	dhcpv6_cid_t		*cid = NULL;
1225 	uint32_t		xid;
1226 	uint8_t			mtype;
1227 	mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip);
1228 
1229 	if (get_dhcpv6_info(ip6h, end, &dh6) != 0)
1230 		return (B_TRUE);
1231 
1232 	/* ip_nospoof/allowed-ips and DHCP are mutually exclusive by default */
1233 	if (allowed_ips_set(mrp, IPV6_VERSION))
1234 		return (B_FALSE);
1235 
1236 	/*
1237 	 * We want to act on packets that result in DHCPv6 Reply messages, or
1238 	 * on packets that give up an IPv6 address. For example, a Request or
1239 	 * Solicit (w/ the Rapid Commit option) will cause the server to send a
1240 	 * Reply, ending the transaction.
1241 	 */
1242 	mtype = dh6->d6m_msg_type;
1243 	if (mtype != DHCPV6_MSG_SOLICIT && mtype != DHCPV6_MSG_REQUEST &&
1244 	    mtype != DHCPV6_MSG_RENEW && mtype != DHCPV6_MSG_REBIND &&
1245 	    mtype != DHCPV6_MSG_RELEASE)
1246 		return (B_TRUE);
1247 
1248 	if ((cid = create_dhcpv6_cid(dh6, end)) == NULL)
1249 		return (B_TRUE);
1250 
1251 	mutex_enter(&mcip->mci_protect_lock);
1252 	if (mtype == DHCPV6_MSG_RELEASE) {
1253 		release_dhcpv6_cid(mcip, cid);
1254 		goto done;
1255 	}
1256 	xid = DHCPV6_GET_TRANSID(dh6);
1257 	if ((txn = find_dhcpv6_pending_txn(mcip, xid)) != NULL) {
1258 		DTRACE_PROBE2(update, mac_client_impl_t *, mcip,
1259 		    dhcpv6_txn_t *, txn);
1260 		txn->dt_timestamp = gethrtime();
1261 		goto done;
1262 	}
1263 	if ((txn = create_dhcpv6_txn(xid, cid)) == NULL)
1264 		goto done;
1265 
1266 	cid = NULL;
1267 	if (insert_dhcpv6_pending_txn(mcip, txn) != 0) {
1268 		DTRACE_PROBE2(insert__failed, mac_client_impl_t *, mcip,
1269 		    dhcpv6_txn_t *, txn);
1270 		free_dhcpv6_txn(txn);
1271 		goto done;
1272 	}
1273 	start_txn_cleanup_timer(mcip);
1274 
1275 	DTRACE_PROBE2(txn__pending, mac_client_impl_t *, mcip,
1276 	    dhcpv6_txn_t *, txn);
1277 
1278 done:
1279 	if (cid != NULL)
1280 		free_dhcpv6_cid(cid);
1281 
1282 	mutex_exit(&mcip->mci_protect_lock);
1283 	return (B_TRUE);
1284 }
1285 
1286 /*
1287  * Core logic for intercepting inbound DHCPv6 packets.
1288  */
1289 static void
1290 intercept_dhcpv6_inbound(mac_client_impl_t *mcip, ip6_t *ip6h, uchar_t *end)
1291 {
1292 	dhcpv6_message_t	*dh6;
1293 	dhcpv6_txn_t		*txn;
1294 	uint32_t		xid;
1295 	uint8_t			mtype;
1296 	uint16_t		status;
1297 
1298 	if (get_dhcpv6_info(ip6h, end, &dh6) != 0)
1299 		return;
1300 
1301 	mtype = dh6->d6m_msg_type;
1302 	if (mtype != DHCPV6_MSG_REPLY)
1303 		return;
1304 
1305 	mutex_enter(&mcip->mci_protect_lock);
1306 	xid = DHCPV6_GET_TRANSID(dh6);
1307 	if ((txn = find_dhcpv6_pending_txn(mcip, xid)) == NULL) {
1308 		DTRACE_PROBE2(txn__not__found, mac_client_impl_t *, mcip,
1309 		    dhcpv6_message_t *, dh6);
1310 		goto done;
1311 	}
1312 	remove_dhcpv6_pending_txn(mcip, txn);
1313 	release_dhcpv6_cid(mcip, txn->dt_cid);
1314 
1315 	if (get_dhcpv6_status(dh6, end, &status) != 0 ||
1316 	    status != DHCPV6_STAT_SUCCESS) {
1317 		DTRACE_PROBE2(error__status, mac_client_impl_t *, mcip,
1318 		    dhcpv6_txn_t *, txn);
1319 		goto done;
1320 	}
1321 	if (get_dhcpv6_addrs(dh6, end, txn->dt_cid) != 0) {
1322 		DTRACE_PROBE2(no__addrs, mac_client_impl_t *, mcip,
1323 		    dhcpv6_txn_t *, txn);
1324 		goto done;
1325 	}
1326 	if (insert_dhcpv6_cid(mcip, txn->dt_cid) != 0) {
1327 		DTRACE_PROBE2(insert__failed, mac_client_impl_t *, mcip,
1328 		    dhcpv6_txn_t *, txn);
1329 		goto done;
1330 	}
1331 	DTRACE_PROBE2(txn__completed, mac_client_impl_t *, mcip,
1332 	    dhcpv6_txn_t *, txn);
1333 
1334 	txn->dt_cid = NULL;
1335 
1336 done:
1337 	if (txn != NULL)
1338 		free_dhcpv6_txn(txn);
1339 	mutex_exit(&mcip->mci_protect_lock);
1340 }
1341 
1342 /*
1343  * Timer for cleaning up stale transactions.
1344  */
1345 static void
1346 txn_cleanup_timer(void *arg)
1347 {
1348 	mac_client_impl_t	*mcip = arg;
1349 
1350 	mutex_enter(&mcip->mci_protect_lock);
1351 	if (mcip->mci_txn_cleanup_tid == 0) {
1352 		/* do nothing if timer got cancelled */
1353 		mutex_exit(&mcip->mci_protect_lock);
1354 		return;
1355 	}
1356 	mcip->mci_txn_cleanup_tid = 0;
1357 
1358 	txn_cleanup_v4(mcip);
1359 	txn_cleanup_v6(mcip);
1360 
1361 	/*
1362 	 * Restart timer if pending transactions still exist.
1363 	 */
1364 	if (!avl_is_empty(&mcip->mci_v4_pending_txn) ||
1365 	    !avl_is_empty(&mcip->mci_v6_pending_txn)) {
1366 		DTRACE_PROBE1(restarting__timer, mac_client_impl_t *, mcip);
1367 
1368 		mcip->mci_txn_cleanup_tid = timeout(txn_cleanup_timer, mcip,
1369 		    drv_usectohz(txn_cleanup_interval / (NANOSEC / MICROSEC)));
1370 	}
1371 	mutex_exit(&mcip->mci_protect_lock);
1372 }
1373 
1374 static void
1375 start_txn_cleanup_timer(mac_client_impl_t *mcip)
1376 {
1377 	ASSERT(MUTEX_HELD(&mcip->mci_protect_lock));
1378 	if (mcip->mci_txn_cleanup_tid == 0) {
1379 		mcip->mci_txn_cleanup_tid = timeout(txn_cleanup_timer, mcip,
1380 		    drv_usectohz(txn_cleanup_interval / (NANOSEC / MICROSEC)));
1381 	}
1382 }
1383 
1384 static void
1385 cancel_txn_cleanup_timer(mac_client_impl_t *mcip)
1386 {
1387 	timeout_id_t	tid;
1388 
1389 	ASSERT(MUTEX_HELD(&mcip->mci_protect_lock));
1390 
1391 	/*
1392 	 * This needs to be a while loop because the timer could get
1393 	 * rearmed during untimeout().
1394 	 */
1395 	while ((tid = mcip->mci_txn_cleanup_tid) != 0) {
1396 		mcip->mci_txn_cleanup_tid = 0;
1397 		mutex_exit(&mcip->mci_protect_lock);
1398 		(void) untimeout(tid);
1399 		mutex_enter(&mcip->mci_protect_lock);
1400 	}
1401 }
1402 
1403 /*
1404  * Get the start/end pointers of an L3 packet and also do pullup if needed.
1405  * pulled-up packet needs to be freed by the caller.
1406  */
1407 static int
1408 get_l3_info(mblk_t *mp, size_t hdrsize, uchar_t **start, uchar_t **end,
1409     mblk_t **nmp)
1410 {
1411 	uchar_t	*s, *e;
1412 	mblk_t	*newmp = NULL;
1413 
1414 	/*
1415 	 * Pullup if necessary but reject packets that do not have
1416 	 * a proper mac header.
1417 	 */
1418 	s = mp->b_rptr + hdrsize;
1419 	e = mp->b_wptr;
1420 
1421 	if (s > mp->b_wptr)
1422 		return (EINVAL);
1423 
1424 	if (!OK_32PTR(s) || mp->b_cont != NULL) {
1425 		/*
1426 		 * Temporarily adjust mp->b_rptr to ensure proper
1427 		 * alignment of IP header in newmp.
1428 		 */
1429 		DTRACE_PROBE1(pullup__needed, mblk_t *, mp);
1430 
1431 		mp->b_rptr += hdrsize;
1432 		newmp = msgpullup(mp, -1);
1433 		mp->b_rptr -= hdrsize;
1434 
1435 		if (newmp == NULL)
1436 			return (ENOMEM);
1437 
1438 		s = newmp->b_rptr;
1439 		e = newmp->b_wptr;
1440 	}
1441 
1442 	*start = s;
1443 	*end = e;
1444 	*nmp = newmp;
1445 	return (0);
1446 }
1447 
1448 void
1449 mac_protect_intercept_dhcp_one(mac_client_impl_t *mcip, mblk_t *mp)
1450 {
1451 	mac_impl_t		*mip = mcip->mci_mip;
1452 	uchar_t			*start, *end;
1453 	mblk_t			*nmp = NULL;
1454 	mac_header_info_t	mhi;
1455 	int			err;
1456 
1457 	err = mac_vlan_header_info((mac_handle_t)mip, mp, &mhi);
1458 	if (err != 0) {
1459 		DTRACE_PROBE2(invalid__header, mac_client_impl_t *, mcip,
1460 		    mblk_t *, mp);
1461 		return;
1462 	}
1463 
1464 	err = get_l3_info(mp, mhi.mhi_hdrsize, &start, &end, &nmp);
1465 	if (err != 0) {
1466 		DTRACE_PROBE2(invalid__l3, mac_client_impl_t *, mcip,
1467 		    mblk_t *, mp);
1468 		return;
1469 	}
1470 
1471 	switch (mhi.mhi_bindsap) {
1472 	case ETHERTYPE_IP: {
1473 		ipha_t	*ipha = (ipha_t *)start;
1474 
1475 		if (start + sizeof (ipha_t) > end)
1476 			return;
1477 
1478 		intercept_dhcpv4_inbound(mcip, ipha, end);
1479 		break;
1480 	}
1481 	case ETHERTYPE_IPV6: {
1482 		ip6_t		*ip6h = (ip6_t *)start;
1483 
1484 		if (start + sizeof (ip6_t) > end)
1485 			return;
1486 
1487 		intercept_dhcpv6_inbound(mcip, ip6h, end);
1488 		break;
1489 	}
1490 	}
1491 	freemsg(nmp);
1492 }
1493 
1494 void
1495 mac_protect_intercept_dhcp(mac_client_impl_t *mcip, mblk_t *mp)
1496 {
1497 	/*
1498 	 * Skip checks if we are part of an aggr.
1499 	 */
1500 	if ((mcip->mci_state_flags & MCIS_IS_AGGR_PORT) != 0)
1501 		return;
1502 
1503 	for (; mp != NULL; mp = mp->b_next)
1504 		mac_protect_intercept_dhcp_one(mcip, mp);
1505 }
1506 
1507 void
1508 mac_protect_flush_dhcp(mac_client_impl_t *mcip)
1509 {
1510 	mutex_enter(&mcip->mci_protect_lock);
1511 	flush_dhcpv4(mcip);
1512 	flush_dhcpv6(mcip);
1513 	mutex_exit(&mcip->mci_protect_lock);
1514 }
1515 
1516 void
1517 mac_protect_cancel_timer(mac_client_impl_t *mcip)
1518 {
1519 	mutex_enter(&mcip->mci_protect_lock);
1520 	cancel_txn_cleanup_timer(mcip);
1521 	mutex_exit(&mcip->mci_protect_lock);
1522 }
1523 
1524 /*
1525  * Check if addr is in the 'allowed-ips' list.
1526  */
1527 
1528 /* ARGSUSED */
1529 static boolean_t
1530 ipnospoof_check_v4(mac_client_impl_t *mcip, mac_protect_t *protect,
1531     ipaddr_t *addr)
1532 {
1533 	uint_t	i;
1534 
1535 	/*
1536 	 * The unspecified address is allowed.
1537 	 */
1538 	if (*addr == INADDR_ANY)
1539 		return (B_TRUE);
1540 
1541 	for (i = 0; i < protect->mp_ipaddrcnt; i++) {
1542 		mac_ipaddr_t	*v4addr = &protect->mp_ipaddrs[i];
1543 
1544 		if (v4addr->ip_version == IPV4_VERSION) {
1545 			uint32_t mask;
1546 
1547 			/* LINTED E_SUSPICIOUS_COMPARISON */
1548 			ASSERT(v4addr->ip_netmask >= 0 &&
1549 			    v4addr->ip_netmask <= 32);
1550 			mask = 0xFFFFFFFFu << (32 - v4addr->ip_netmask);
1551 			/*
1552 			 * Since we have a netmask we know this entry
1553 			 * signifies the entire subnet. Check if the
1554 			 * given address is on the subnet.
1555 			 */
1556 			if (htonl(V4_PART_OF_V6(v4addr->ip_addr)) ==
1557 			    (htonl(*addr) & mask))
1558 				return (B_TRUE);
1559 		}
1560 	}
1561 	return (protect->mp_ipaddrcnt == 0 ?
1562 	    check_dhcpv4_dyn_ip(mcip, *addr) : B_FALSE);
1563 }
1564 
1565 static boolean_t
1566 ipnospoof_check_v6(mac_client_impl_t *mcip, mac_protect_t *protect,
1567     in6_addr_t *addr)
1568 {
1569 	uint_t	i;
1570 
1571 	/*
1572 	 * The unspecified address and the v6 link local address are allowed.
1573 	 */
1574 	if (IN6_IS_ADDR_UNSPECIFIED(addr) ||
1575 	    ((mcip->mci_protect_flags & MPT_FLAG_V6_LOCAL_ADDR_SET) != 0 &&
1576 	    IN6_ARE_ADDR_EQUAL(&mcip->mci_v6_local_addr, addr)))
1577 		return (B_TRUE);
1578 
1579 
1580 	for (i = 0; i < protect->mp_ipaddrcnt; i++) {
1581 		mac_ipaddr_t	*v6addr = &protect->mp_ipaddrs[i];
1582 
1583 		if (v6addr->ip_version == IPV6_VERSION &&
1584 		    /* LINTED E_SUSPICIOUS_COMPARISON */
1585 		    IN6_ARE_PREFIXEDADDR_EQUAL(&v6addr->ip_addr, addr,
1586 		    v6addr->ip_netmask))
1587 			return (B_TRUE);
1588 	}
1589 	return (protect->mp_ipaddrcnt == 0 ?
1590 	    check_dhcpv6_dyn_ip(mcip, addr) : B_FALSE);
1591 }
1592 
1593 /*
1594  * Checks various fields within an IPv6 NDP packet.
1595  */
1596 static boolean_t
1597 ipnospoof_check_ndp(mac_client_impl_t *mcip, mac_protect_t *protect,
1598     ip6_t *ip6h, uchar_t *end)
1599 {
1600 	icmp6_t			*icmp_nd = (icmp6_t *)&ip6h[1];
1601 	int			hdrlen, optlen, opttype, len;
1602 	uint_t			addrlen, maclen;
1603 	uint8_t			type;
1604 	nd_opt_hdr_t		*opt;
1605 	struct nd_opt_lla	*lla = NULL;
1606 
1607 	/*
1608 	 * NDP packets do not have extension headers so the ICMPv6 header
1609 	 * must immediately follow the IPv6 header.
1610 	 */
1611 	if (ip6h->ip6_nxt != IPPROTO_ICMPV6)
1612 		return (B_TRUE);
1613 
1614 	/* ICMPv6 header missing */
1615 	if ((uchar_t *)&icmp_nd[1] > end)
1616 		return (B_FALSE);
1617 
1618 	len = end - (uchar_t *)icmp_nd;
1619 	type = icmp_nd->icmp6_type;
1620 
1621 	switch (type) {
1622 	case ND_ROUTER_SOLICIT:
1623 		hdrlen = sizeof (nd_router_solicit_t);
1624 		break;
1625 	case ND_ROUTER_ADVERT:
1626 		hdrlen = sizeof (nd_router_advert_t);
1627 		break;
1628 	case ND_NEIGHBOR_SOLICIT:
1629 		hdrlen = sizeof (nd_neighbor_solicit_t);
1630 		break;
1631 	case ND_NEIGHBOR_ADVERT:
1632 		hdrlen = sizeof (nd_neighbor_advert_t);
1633 		break;
1634 	case ND_REDIRECT:
1635 		hdrlen = sizeof (nd_redirect_t);
1636 		break;
1637 	default:
1638 		return (B_TRUE);
1639 	}
1640 
1641 	if (len < hdrlen)
1642 		return (B_FALSE);
1643 
1644 	/* SLLA option checking is needed for RS/RA/NS */
1645 	opttype = ND_OPT_SOURCE_LINKADDR;
1646 
1647 	switch (type) {
1648 	case ND_NEIGHBOR_ADVERT: {
1649 		nd_neighbor_advert_t	*na = (nd_neighbor_advert_t *)icmp_nd;
1650 
1651 		if (!ipnospoof_check_v6(mcip, protect, &na->nd_na_target)) {
1652 			DTRACE_PROBE2(ndp__na__fail,
1653 			    mac_client_impl_t *, mcip, ip6_t *, ip6h);
1654 			return (B_FALSE);
1655 		}
1656 
1657 		/* TLLA option for NA */
1658 		opttype = ND_OPT_TARGET_LINKADDR;
1659 		break;
1660 	}
1661 	case ND_REDIRECT: {
1662 		/* option checking not needed for RD */
1663 		return (B_TRUE);
1664 	}
1665 	default:
1666 		break;
1667 	}
1668 
1669 	if (len == hdrlen) {
1670 		/* no options, we're done */
1671 		return (B_TRUE);
1672 	}
1673 	opt = (nd_opt_hdr_t *)((uchar_t *)icmp_nd + hdrlen);
1674 	optlen = len - hdrlen;
1675 
1676 	/* find the option header we need */
1677 	while (optlen > sizeof (nd_opt_hdr_t)) {
1678 		if (opt->nd_opt_type == opttype) {
1679 			lla = (struct nd_opt_lla *)opt;
1680 			break;
1681 		}
1682 		optlen -= 8 * opt->nd_opt_len;
1683 		opt = (nd_opt_hdr_t *)
1684 		    ((uchar_t *)opt + 8 * opt->nd_opt_len);
1685 	}
1686 	if (lla == NULL)
1687 		return (B_TRUE);
1688 
1689 	addrlen = lla->nd_opt_lla_len * 8 - sizeof (nd_opt_hdr_t);
1690 	maclen = mcip->mci_mip->mi_info.mi_addr_length;
1691 
1692 	if (addrlen != maclen ||
1693 	    bcmp(mcip->mci_unicast->ma_addr,
1694 	    lla->nd_opt_lla_hdw_addr, maclen) != 0) {
1695 		DTRACE_PROBE2(ndp__lla__fail,
1696 		    mac_client_impl_t *, mcip, ip6_t *, ip6h);
1697 		return (B_FALSE);
1698 	}
1699 
1700 	DTRACE_PROBE2(ndp__lla__ok, mac_client_impl_t *, mcip, ip6_t *, ip6h);
1701 	return (B_TRUE);
1702 }
1703 
1704 /*
1705  * Enforce ip-nospoof protection.
1706  */
1707 static int
1708 ipnospoof_check(mac_client_impl_t *mcip, mac_protect_t *protect,
1709     mblk_t *mp, mac_header_info_t *mhip)
1710 {
1711 	size_t		hdrsize = mhip->mhi_hdrsize;
1712 	uint32_t	sap = mhip->mhi_bindsap;
1713 	uchar_t		*start, *end;
1714 	mblk_t		*nmp = NULL;
1715 	int		err;
1716 
1717 	err = get_l3_info(mp, hdrsize, &start, &end, &nmp);
1718 	if (err != 0) {
1719 		DTRACE_PROBE2(invalid__l3, mac_client_impl_t *, mcip,
1720 		    mblk_t *, mp);
1721 		return (err);
1722 	}
1723 	err = EINVAL;
1724 
1725 	switch (sap) {
1726 	case ETHERTYPE_IP: {
1727 		ipha_t	*ipha = (ipha_t *)start;
1728 
1729 		if (start + sizeof (ipha_t) > end)
1730 			goto fail;
1731 
1732 		if (!ipnospoof_check_v4(mcip, protect, &ipha->ipha_src))
1733 			goto fail;
1734 
1735 		if (!intercept_dhcpv4_outbound(mcip, ipha, end))
1736 			goto fail;
1737 		break;
1738 	}
1739 	case ETHERTYPE_ARP: {
1740 		arh_t		*arh = (arh_t *)start;
1741 		uint32_t	maclen, hlen, plen, arplen;
1742 		ipaddr_t	spaddr;
1743 		uchar_t		*shaddr;
1744 
1745 		if (start + sizeof (arh_t) > end)
1746 			goto fail;
1747 
1748 		maclen = mcip->mci_mip->mi_info.mi_addr_length;
1749 		hlen = arh->arh_hlen;
1750 		plen = arh->arh_plen;
1751 		if ((hlen != 0 && hlen != maclen) ||
1752 		    plen != sizeof (ipaddr_t))
1753 			goto fail;
1754 
1755 		arplen = sizeof (arh_t) + 2 * hlen + 2 * plen;
1756 		if (start + arplen > end)
1757 			goto fail;
1758 
1759 		shaddr = start + sizeof (arh_t);
1760 		if (hlen != 0 &&
1761 		    bcmp(mcip->mci_unicast->ma_addr, shaddr, maclen) != 0)
1762 			goto fail;
1763 
1764 		bcopy(shaddr + hlen, &spaddr, sizeof (spaddr));
1765 		if (!ipnospoof_check_v4(mcip, protect, &spaddr))
1766 			goto fail;
1767 		break;
1768 	}
1769 	case ETHERTYPE_IPV6: {
1770 		ip6_t		*ip6h = (ip6_t *)start;
1771 
1772 		if (start + sizeof (ip6_t) > end)
1773 			goto fail;
1774 
1775 		if (!ipnospoof_check_v6(mcip, protect, &ip6h->ip6_src))
1776 			goto fail;
1777 
1778 		if (!ipnospoof_check_ndp(mcip, protect, ip6h, end))
1779 			goto fail;
1780 
1781 		if (!intercept_dhcpv6_outbound(mcip, ip6h, end))
1782 			goto fail;
1783 		break;
1784 	}
1785 	}
1786 	freemsg(nmp);
1787 	return (0);
1788 
1789 fail:
1790 	freemsg(nmp);
1791 	return (err);
1792 }
1793 
1794 static boolean_t
1795 dhcpnospoof_check_cid(mac_protect_t *p, uchar_t *cid, uint_t cidlen)
1796 {
1797 	int	i;
1798 
1799 	for (i = 0; i < p->mp_cidcnt; i++) {
1800 		mac_dhcpcid_t	*dcid = &p->mp_cids[i];
1801 
1802 		if (dcid->dc_len == cidlen &&
1803 		    bcmp(dcid->dc_id, cid, cidlen) == 0)
1804 			return (B_TRUE);
1805 	}
1806 	return (B_FALSE);
1807 }
1808 
1809 static boolean_t
1810 dhcpnospoof_check_v4(mac_client_impl_t *mcip, mac_protect_t *p,
1811     ipha_t *ipha, uchar_t *end)
1812 {
1813 	struct dhcp	*dh4;
1814 	uchar_t		*cid;
1815 	uint_t		maclen, cidlen = 0;
1816 	uint8_t		optlen;
1817 	int		err;
1818 
1819 	if ((err = get_dhcpv4_info(ipha, end, &dh4)) != 0)
1820 		return (err == EINVAL);
1821 
1822 	maclen = mcip->mci_mip->mi_info.mi_addr_length;
1823 	if (dh4->hlen == maclen &&
1824 	    bcmp(mcip->mci_unicast->ma_addr, dh4->chaddr, maclen) != 0) {
1825 		return (B_FALSE);
1826 	}
1827 	if (get_dhcpv4_option(dh4, end, CD_CLIENT_ID, &cid, &optlen) == 0)
1828 		cidlen = optlen;
1829 
1830 	if (cidlen == 0)
1831 		return (B_TRUE);
1832 
1833 	if (*cid == ARPHRD_ETHER && cidlen - 1 == maclen &&
1834 	    bcmp(mcip->mci_unicast->ma_addr, cid + 1, maclen) == 0)
1835 		return (B_TRUE);
1836 
1837 	return (dhcpnospoof_check_cid(p, cid, cidlen));
1838 }
1839 
1840 static boolean_t
1841 dhcpnospoof_check_v6(mac_client_impl_t *mcip, mac_protect_t *p,
1842     ip6_t *ip6h, uchar_t *end)
1843 {
1844 	dhcpv6_message_t	*dh6;
1845 	dhcpv6_option_t		*d6o;
1846 	uint8_t			mtype;
1847 	uchar_t			*cid, *lladdr = NULL;
1848 	uint_t			cidlen, maclen, addrlen = 0;
1849 	uint16_t		cidtype;
1850 	int			err;
1851 
1852 	if ((err = get_dhcpv6_info(ip6h, end, &dh6)) != 0)
1853 		return (err == EINVAL);
1854 
1855 	/*
1856 	 * We only check client-generated messages.
1857 	 */
1858 	mtype = dh6->d6m_msg_type;
1859 	if (mtype == DHCPV6_MSG_ADVERTISE || mtype == DHCPV6_MSG_REPLY ||
1860 	    mtype == DHCPV6_MSG_RECONFIGURE)
1861 		return (B_TRUE);
1862 
1863 	d6o = get_dhcpv6_option(&dh6[1], end - (uchar_t *)&dh6[1], NULL,
1864 	    DHCPV6_OPT_CLIENTID, &cidlen);
1865 	if (d6o == NULL || (uchar_t *)d6o + cidlen > end)
1866 		return (B_TRUE);
1867 
1868 	cid = (uchar_t *)&d6o[1];
1869 	cidlen -= sizeof (*d6o);
1870 	if (cidlen < sizeof (cidtype))
1871 		return (B_TRUE);
1872 
1873 	bcopy(cid, &cidtype, sizeof (cidtype));
1874 	cidtype = ntohs(cidtype);
1875 	if (cidtype == DHCPV6_DUID_LLT && cidlen >= sizeof (duid_llt_t)) {
1876 		lladdr = cid + sizeof (duid_llt_t);
1877 		addrlen = cidlen - sizeof (duid_llt_t);
1878 	}
1879 	if (cidtype == DHCPV6_DUID_LL && cidlen >= sizeof (duid_ll_t)) {
1880 		lladdr = cid + sizeof (duid_ll_t);
1881 		addrlen = cidlen - sizeof (duid_ll_t);
1882 	}
1883 	maclen = mcip->mci_mip->mi_info.mi_addr_length;
1884 	if (lladdr != NULL && addrlen == maclen &&
1885 	    bcmp(mcip->mci_unicast->ma_addr, lladdr, maclen) == 0) {
1886 		return (B_TRUE);
1887 	}
1888 	return (dhcpnospoof_check_cid(p, cid, cidlen));
1889 }
1890 
1891 /*
1892  * Enforce dhcp-nospoof protection.
1893  */
1894 static int
1895 dhcpnospoof_check(mac_client_impl_t *mcip, mac_protect_t *protect,
1896     mblk_t *mp, mac_header_info_t *mhip)
1897 {
1898 	size_t		hdrsize = mhip->mhi_hdrsize;
1899 	uint32_t	sap = mhip->mhi_bindsap;
1900 	uchar_t		*start, *end;
1901 	mblk_t		*nmp = NULL;
1902 	int		err;
1903 
1904 	err = get_l3_info(mp, hdrsize, &start, &end, &nmp);
1905 	if (err != 0) {
1906 		DTRACE_PROBE2(invalid__l3, mac_client_impl_t *, mcip,
1907 		    mblk_t *, mp);
1908 		return (err);
1909 	}
1910 	err = EINVAL;
1911 
1912 	switch (sap) {
1913 	case ETHERTYPE_IP: {
1914 		ipha_t	*ipha = (ipha_t *)start;
1915 
1916 		if (start + sizeof (ipha_t) > end)
1917 			goto fail;
1918 
1919 		if (!dhcpnospoof_check_v4(mcip, protect, ipha, end))
1920 			goto fail;
1921 
1922 		break;
1923 	}
1924 	case ETHERTYPE_IPV6: {
1925 		ip6_t		*ip6h = (ip6_t *)start;
1926 
1927 		if (start + sizeof (ip6_t) > end)
1928 			goto fail;
1929 
1930 		if (!dhcpnospoof_check_v6(mcip, protect, ip6h, end))
1931 			goto fail;
1932 
1933 		break;
1934 	}
1935 	}
1936 	freemsg(nmp);
1937 	return (0);
1938 
1939 fail:
1940 	/* increment dhcpnospoof stat here */
1941 	freemsg(nmp);
1942 	return (err);
1943 }
1944 
1945 /*
1946  * This needs to be called whenever the mac client's mac address changes.
1947  */
1948 void
1949 mac_protect_update_v6_local_addr(mac_client_impl_t *mcip)
1950 {
1951 	uint8_t		*p, *macaddr = mcip->mci_unicast->ma_addr;
1952 	uint_t		i, media = mcip->mci_mip->mi_info.mi_media;
1953 	in6_addr_t	token, *v6addr = &mcip->mci_v6_local_addr;
1954 	in6_addr_t	ll_template = {(uint32_t)V6_LINKLOCAL, 0x0, 0x0, 0x0};
1955 
1956 
1957 	bzero(&token, sizeof (token));
1958 	p = (uint8_t *)&token.s6_addr32[2];
1959 
1960 	switch (media) {
1961 	case DL_ETHER:
1962 		bcopy(macaddr, p, 3);
1963 		p[0] ^= 0x2;
1964 		p[3] = 0xff;
1965 		p[4] = 0xfe;
1966 		bcopy(macaddr + 3, p + 5, 3);
1967 		break;
1968 	case DL_IB:
1969 		ASSERT(mcip->mci_mip->mi_info.mi_addr_length == 20);
1970 		bcopy(macaddr + 12, p, 8);
1971 		p[0] |= 2;
1972 		break;
1973 	default:
1974 		/*
1975 		 * We do not need to generate the local address for link types
1976 		 * that do not support link protection. Wifi pretends to be
1977 		 * ethernet so it is covered by the DL_ETHER case (note the
1978 		 * use of mi_media instead of mi_nativemedia).
1979 		 */
1980 		return;
1981 	}
1982 
1983 	for (i = 0; i < 4; i++) {
1984 		v6addr->s6_addr32[i] = token.s6_addr32[i] |
1985 		    ll_template.s6_addr32[i];
1986 	}
1987 	mcip->mci_protect_flags |= MPT_FLAG_V6_LOCAL_ADDR_SET;
1988 }
1989 
1990 /*
1991  * Enforce link protection on one packet.
1992  */
1993 static int
1994 mac_protect_check_one(mac_client_impl_t *mcip, mblk_t *mp)
1995 {
1996 	mac_impl_t		*mip = mcip->mci_mip;
1997 	mac_resource_props_t	*mrp = MCIP_RESOURCE_PROPS(mcip);
1998 	mac_protect_t		*protect;
1999 	mac_header_info_t	mhi;
2000 	uint32_t		types;
2001 	int			err;
2002 
2003 	ASSERT(mp->b_next == NULL);
2004 	ASSERT(mrp != NULL);
2005 
2006 	err = mac_vlan_header_info((mac_handle_t)mip, mp, &mhi);
2007 	if (err != 0) {
2008 		DTRACE_PROBE2(invalid__header, mac_client_impl_t *, mcip,
2009 		    mblk_t *, mp);
2010 		return (err);
2011 	}
2012 	protect = &mrp->mrp_protect;
2013 	types = protect->mp_types;
2014 
2015 	if ((types & MPT_MACNOSPOOF) != 0) {
2016 		if (mhi.mhi_saddr != NULL &&
2017 		    bcmp(mcip->mci_unicast->ma_addr, mhi.mhi_saddr,
2018 		    mip->mi_info.mi_addr_length) != 0) {
2019 			BUMP_STAT(mcip, macspoofed);
2020 			DTRACE_PROBE2(mac__nospoof__fail,
2021 			    mac_client_impl_t *, mcip, mblk_t *, mp);
2022 			return (EINVAL);
2023 		}
2024 	}
2025 	if ((types & MPT_RESTRICTED) != 0) {
2026 		uint32_t	vid = VLAN_ID(mhi.mhi_tci);
2027 		uint32_t	sap = mhi.mhi_bindsap;
2028 
2029 		/*
2030 		 * ETHERTYPE_VLAN packets are allowed through, provided that
2031 		 * the vid is not spoofed.
2032 		 */
2033 		if (vid != 0 && !mac_client_check_flow_vid(mcip, vid)) {
2034 			BUMP_STAT(mcip, restricted);
2035 			DTRACE_PROBE2(restricted__vid__invalid,
2036 			    mac_client_impl_t *, mcip, mblk_t *, mp);
2037 			return (EINVAL);
2038 		}
2039 
2040 		if (sap != ETHERTYPE_IP && sap != ETHERTYPE_IPV6 &&
2041 		    sap != ETHERTYPE_ARP) {
2042 			BUMP_STAT(mcip, restricted);
2043 			DTRACE_PROBE2(restricted__fail,
2044 			    mac_client_impl_t *, mcip, mblk_t *, mp);
2045 			return (EINVAL);
2046 		}
2047 	}
2048 	if ((types & MPT_IPNOSPOOF) != 0) {
2049 		if ((err = ipnospoof_check(mcip, protect, mp, &mhi)) != 0) {
2050 			BUMP_STAT(mcip, ipspoofed);
2051 			DTRACE_PROBE2(ip__nospoof__fail,
2052 			    mac_client_impl_t *, mcip, mblk_t *, mp);
2053 			return (err);
2054 		}
2055 	}
2056 	if ((types & MPT_DHCPNOSPOOF) != 0) {
2057 		if ((err = dhcpnospoof_check(mcip, protect, mp, &mhi)) != 0) {
2058 			BUMP_STAT(mcip, dhcpspoofed);
2059 			DTRACE_PROBE2(dhcp__nospoof__fail,
2060 			    mac_client_impl_t *, mcip, mblk_t *, mp);
2061 			return (err);
2062 		}
2063 	}
2064 	return (0);
2065 }
2066 
2067 /*
2068  * Enforce link protection on a packet chain.
2069  * Packets that pass the checks are returned back to the caller.
2070  */
2071 mblk_t *
2072 mac_protect_check(mac_client_handle_t mch, mblk_t *mp)
2073 {
2074 	mac_client_impl_t	*mcip = (mac_client_impl_t *)mch;
2075 	mblk_t			*ret_mp = NULL, **tailp = &ret_mp, *next;
2076 
2077 	/*
2078 	 * Skip checks if we are part of an aggr.
2079 	 */
2080 	if ((mcip->mci_state_flags & MCIS_IS_AGGR_PORT) != 0)
2081 		return (mp);
2082 
2083 	for (; mp != NULL; mp = next) {
2084 		next = mp->b_next;
2085 		mp->b_next = NULL;
2086 
2087 		if (mac_protect_check_one(mcip, mp) == 0) {
2088 			*tailp = mp;
2089 			tailp = &mp->b_next;
2090 		} else {
2091 			freemsg(mp);
2092 		}
2093 	}
2094 	return (ret_mp);
2095 }
2096 
2097 /*
2098  * Check if a particular protection type is enabled.
2099  */
2100 boolean_t
2101 mac_protect_enabled(mac_client_handle_t mch, uint32_t type)
2102 {
2103 	return (MAC_PROTECT_ENABLED((mac_client_impl_t *)mch, type));
2104 }
2105 
2106 static int
2107 validate_ips(mac_protect_t *p)
2108 {
2109 	uint_t		i, j;
2110 
2111 	if (p->mp_ipaddrcnt == MPT_RESET)
2112 		return (0);
2113 
2114 	if (p->mp_ipaddrcnt > MPT_MAXIPADDR)
2115 		return (EINVAL);
2116 
2117 	for (i = 0; i < p->mp_ipaddrcnt; i++) {
2118 		mac_ipaddr_t	*addr = &p->mp_ipaddrs[i];
2119 
2120 		/*
2121 		 * The unspecified address is implicitly allowed so there's no
2122 		 * need to add it to the list. Also, validate that the netmask,
2123 		 * if any, is sane for the specific version of IP. A mask of
2124 		 * some kind is always required.
2125 		 */
2126 		if (addr->ip_netmask == 0)
2127 			return (EINVAL);
2128 
2129 		if (addr->ip_version == IPV4_VERSION) {
2130 			if (V4_PART_OF_V6(addr->ip_addr) == INADDR_ANY)
2131 				return (EINVAL);
2132 			if (addr->ip_netmask > 32)
2133 				return (EINVAL);
2134 		} else if (addr->ip_version == IPV6_VERSION) {
2135 			if (IN6_IS_ADDR_UNSPECIFIED(&addr->ip_addr))
2136 				return (EINVAL);
2137 
2138 			if (IN6_IS_ADDR_V4MAPPED_ANY(&addr->ip_addr))
2139 				return (EINVAL);
2140 
2141 			if (addr->ip_netmask > 128)
2142 				return (EINVAL);
2143 		} else {
2144 			/* invalid ip version */
2145 			return (EINVAL);
2146 		}
2147 
2148 		for (j = 0; j < p->mp_ipaddrcnt; j++) {
2149 			mac_ipaddr_t	*addr1 = &p->mp_ipaddrs[j];
2150 
2151 			if (i == j || addr->ip_version != addr1->ip_version)
2152 				continue;
2153 
2154 			/* found a duplicate */
2155 			if ((addr->ip_version == IPV4_VERSION &&
2156 			    V4_PART_OF_V6(addr->ip_addr) ==
2157 			    V4_PART_OF_V6(addr1->ip_addr)) ||
2158 			    IN6_ARE_ADDR_EQUAL(&addr->ip_addr,
2159 			    &addr1->ip_addr))
2160 				return (EINVAL);
2161 		}
2162 	}
2163 	return (0);
2164 }
2165 
2166 /* ARGSUSED */
2167 static int
2168 validate_cids(mac_protect_t *p)
2169 {
2170 	uint_t		i, j;
2171 
2172 	if (p->mp_cidcnt == MPT_RESET)
2173 		return (0);
2174 
2175 	if (p->mp_cidcnt > MPT_MAXCID)
2176 		return (EINVAL);
2177 
2178 	for (i = 0; i < p->mp_cidcnt; i++) {
2179 		mac_dhcpcid_t	*cid = &p->mp_cids[i];
2180 
2181 		if (cid->dc_len > MPT_MAXCIDLEN ||
2182 		    (cid->dc_form != CIDFORM_TYPED &&
2183 		    cid->dc_form != CIDFORM_HEX &&
2184 		    cid->dc_form != CIDFORM_STR))
2185 			return (EINVAL);
2186 
2187 		for (j = 0; j < p->mp_cidcnt; j++) {
2188 			mac_dhcpcid_t	*cid1 = &p->mp_cids[j];
2189 
2190 			if (i == j || cid->dc_len != cid1->dc_len)
2191 				continue;
2192 
2193 			/* found a duplicate */
2194 			if (bcmp(cid->dc_id, cid1->dc_id, cid->dc_len) == 0)
2195 				return (EINVAL);
2196 		}
2197 	}
2198 	return (0);
2199 }
2200 
2201 /*
2202  * Sanity-checks parameters given by userland.
2203  */
2204 int
2205 mac_protect_validate(mac_resource_props_t *mrp)
2206 {
2207 	mac_protect_t	*p = &mrp->mrp_protect;
2208 	int		err;
2209 
2210 	/* check for invalid types */
2211 	if (p->mp_types != MPT_RESET && (p->mp_types & ~MPT_ALL) != 0)
2212 		return (EINVAL);
2213 
2214 	if ((err = validate_ips(p)) != 0)
2215 		return (err);
2216 
2217 	if ((err = validate_cids(p)) != 0)
2218 		return (err);
2219 
2220 	return (0);
2221 }
2222 
2223 /*
2224  * Enable/disable link protection.
2225  */
2226 int
2227 mac_protect_set(mac_client_handle_t mch, mac_resource_props_t *mrp)
2228 {
2229 	mac_client_impl_t	*mcip = (mac_client_impl_t *)mch;
2230 	mac_impl_t		*mip = mcip->mci_mip;
2231 	uint_t			media = mip->mi_info.mi_nativemedia;
2232 	int			err;
2233 
2234 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
2235 
2236 	/* tunnels are not supported */
2237 	if (media == DL_IPV4 || media == DL_IPV6 || media == DL_6TO4)
2238 		return (ENOTSUP);
2239 
2240 	if ((err = mac_protect_validate(mrp)) != 0)
2241 		return (err);
2242 
2243 	if (err != 0)
2244 		return (err);
2245 
2246 	mac_update_resources(mrp, MCIP_RESOURCE_PROPS(mcip), B_FALSE);
2247 	i_mac_notify(((mcip->mci_state_flags & MCIS_IS_VNIC) != 0 ?
2248 	    mcip->mci_upper_mip : mip), MAC_NOTE_ALLOWED_IPS);
2249 	return (0);
2250 }
2251 
2252 void
2253 mac_protect_update(mac_resource_props_t *new, mac_resource_props_t *curr)
2254 {
2255 	mac_protect_t	*np = &new->mrp_protect;
2256 	mac_protect_t	*cp = &curr->mrp_protect;
2257 	uint32_t	types = np->mp_types;
2258 
2259 	if (types == MPT_RESET) {
2260 		cp->mp_types = 0;
2261 		curr->mrp_mask &= ~MRP_PROTECT;
2262 	} else {
2263 		if (types != 0) {
2264 			cp->mp_types = types;
2265 			curr->mrp_mask |= MRP_PROTECT;
2266 		}
2267 	}
2268 	if (np->mp_ipaddrcnt != 0) {
2269 		if (np->mp_ipaddrcnt <= MPT_MAXIPADDR) {
2270 			bcopy(np->mp_ipaddrs, cp->mp_ipaddrs,
2271 			    sizeof (cp->mp_ipaddrs));
2272 			cp->mp_ipaddrcnt = np->mp_ipaddrcnt;
2273 		} else if (np->mp_ipaddrcnt == MPT_RESET) {
2274 			bzero(cp->mp_ipaddrs, sizeof (cp->mp_ipaddrs));
2275 			cp->mp_ipaddrcnt = 0;
2276 		}
2277 	}
2278 	if (np->mp_cidcnt != 0) {
2279 		if (np->mp_cidcnt <= MPT_MAXCID) {
2280 			bcopy(np->mp_cids, cp->mp_cids, sizeof (cp->mp_cids));
2281 			cp->mp_cidcnt = np->mp_cidcnt;
2282 		} else if (np->mp_cidcnt == MPT_RESET) {
2283 			bzero(cp->mp_cids, sizeof (cp->mp_cids));
2284 			cp->mp_cidcnt = 0;
2285 		}
2286 	}
2287 }
2288 
2289 void
2290 mac_protect_init(mac_client_impl_t *mcip)
2291 {
2292 	mutex_init(&mcip->mci_protect_lock, NULL, MUTEX_DRIVER, NULL);
2293 	mcip->mci_protect_flags = 0;
2294 	mcip->mci_txn_cleanup_tid = 0;
2295 	avl_create(&mcip->mci_v4_pending_txn, compare_dhcpv4_xid,
2296 	    sizeof (dhcpv4_txn_t), offsetof(dhcpv4_txn_t, dt_node));
2297 	avl_create(&mcip->mci_v4_completed_txn, compare_dhcpv4_cid,
2298 	    sizeof (dhcpv4_txn_t), offsetof(dhcpv4_txn_t, dt_node));
2299 	avl_create(&mcip->mci_v4_dyn_ip, compare_dhcpv4_ip,
2300 	    sizeof (dhcpv4_txn_t), offsetof(dhcpv4_txn_t, dt_ipnode));
2301 	avl_create(&mcip->mci_v6_pending_txn, compare_dhcpv6_xid,
2302 	    sizeof (dhcpv6_txn_t), offsetof(dhcpv6_txn_t, dt_node));
2303 	avl_create(&mcip->mci_v6_cid, compare_dhcpv6_cid,
2304 	    sizeof (dhcpv6_cid_t), offsetof(dhcpv6_cid_t, dc_node));
2305 	avl_create(&mcip->mci_v6_dyn_ip, compare_dhcpv6_ip,
2306 	    sizeof (dhcpv6_addr_t), offsetof(dhcpv6_addr_t, da_node));
2307 }
2308 
2309 void
2310 mac_protect_fini(mac_client_impl_t *mcip)
2311 {
2312 	avl_destroy(&mcip->mci_v6_dyn_ip);
2313 	avl_destroy(&mcip->mci_v6_cid);
2314 	avl_destroy(&mcip->mci_v6_pending_txn);
2315 	avl_destroy(&mcip->mci_v4_dyn_ip);
2316 	avl_destroy(&mcip->mci_v4_completed_txn);
2317 	avl_destroy(&mcip->mci_v4_pending_txn);
2318 	mcip->mci_txn_cleanup_tid = 0;
2319 	mcip->mci_protect_flags = 0;
2320 	mutex_destroy(&mcip->mci_protect_lock);
2321 }
2322 
2323 static boolean_t
2324 allowed_ips_set(mac_resource_props_t *mrp, uint32_t af)
2325 {
2326 	int i;
2327 
2328 	for (i = 0; i < mrp->mrp_protect.mp_ipaddrcnt; i++) {
2329 		if (mrp->mrp_protect.mp_ipaddrs[i].ip_version == af)
2330 			return (B_TRUE);
2331 	}
2332 	return (B_FALSE);
2333 }
2334 
2335 mac_protect_t *
2336 mac_protect_get(mac_handle_t mh)
2337 {
2338 	mac_impl_t *mip = (mac_impl_t *)mh;
2339 
2340 	return (&mip->mi_resource_props.mrp_protect);
2341 }
2342