xref: /titanic_50/usr/src/uts/common/io/mac/mac_protect.c (revision 10d63b7db37a83b39c7f511cf9426c9d03ea0760)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2014, Joyent, Inc.  All rights reserved.
25  */
26 /*
27  * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
28  */
29 
30 #include <sys/strsun.h>
31 #include <sys/sdt.h>
32 #include <sys/mac.h>
33 #include <sys/mac_impl.h>
34 #include <sys/mac_client_impl.h>
35 #include <sys/mac_client_priv.h>
36 #include <sys/ethernet.h>
37 #include <sys/vlan.h>
38 #include <sys/dlpi.h>
39 #include <sys/avl.h>
40 #include <inet/ip.h>
41 #include <inet/ip6.h>
42 #include <inet/arp.h>
43 #include <netinet/arp.h>
44 #include <netinet/udp.h>
45 #include <netinet/dhcp.h>
46 #include <netinet/dhcp6.h>
47 
48 /*
49  * Implementation overview for DHCP address detection
50  *
51  * The purpose of DHCP address detection is to relieve the user of having to
52  * manually configure static IP addresses when ip-nospoof protection is turned
53  * on. To achieve this, the mac layer needs to intercept DHCP packets to
54  * determine the assigned IP addresses.
55  *
56  * A DHCP handshake between client and server typically requires at least
57  * 4 messages:
58  *
59  * 1. DISCOVER - client attempts to locate DHCP servers via a
60  *               broadcast message to its subnet.
61  * 2. OFFER    - server responds to client with an IP address and
62  *               other parameters.
63  * 3. REQUEST  - client requests the offered address.
64  * 4. ACK      - server verifies that the requested address matches
65  *               the one it offered.
66  *
67  * DHCPv6 behaves pretty much the same way aside from different message names.
68  *
69  * Address information is embedded in either the OFFER or REQUEST message.
70  * We chose to intercept REQUEST because this is at the last part of the
71  * handshake and it indicates that the client intends to keep the address.
72  * Intercepting OFFERs is unreliable because the client may receive multiple
73  * offers from different servers, and we can't tell which address the client
74  * will keep.
75  *
76  * Each DHCP message has a transaction ID. We use this transaction ID to match
77  * REQUESTs with ACKs received from servers.
78  *
79  * For IPv4, the process to acquire a DHCP-assigned address is as follows:
80  *
81  * 1. Client sends REQUEST. a new dhcpv4_txn_t object is created and inserted
82  *    in the the mci_v4_pending_txn table (keyed by xid). This object represents
83  *    a new transaction. It contains the xid, the client ID and requested IP
84  *    address.
85  *
86  * 2. Server responds with an ACK. The xid from this ACK is used to lookup the
87  *    pending transaction from the mci_v4_pending_txn table. Once the object is
88  *    found, it is removed from the pending table and inserted into the
89  *    completed table (mci_v4_completed_txn, keyed by client ID) and the dynamic
90  *    IP table (mci_v4_dyn_ip, keyed by IP address).
91  *
92  * 3. An outgoing packet that goes through the ip-nospoof path will be checked
93  *    against the dynamic IP table. Packets that have the assigned DHCP address
94  *    as the source IP address will pass the check and be admitted onto the
95  *    network.
96  *
97  * IPv4 notes:
98  *
99  * If the server never responds with an ACK, there is a timer that is set after
100  * the insertion of the transaction into the pending table. When the timer
101  * fires, it will check whether the transaction is old (by comparing current
102  * time and the txn's timestamp), if so the transaction will be freed. along
103  * with this, any transaction in the completed/dyn-ip tables matching the client
104  * ID of this stale transaction will also be freed. If the client fails to
105  * extend a lease, we want to stop the client from using any IP addresses that
106  * were granted previously.
107  *
108  * A RELEASE message from the client will not cause a transaction to be created.
109  * The client ID in the RELEASE message will be used for finding and removing
110  * transactions in the completed and dyn-ip tables.
111  *
112  *
113  * For IPv6, the process to acquire a DHCPv6-assigned address is as follows:
114  *
115  * 1. Client sends REQUEST. The DUID is extracted and stored into a dhcpv6_cid_t
116  *    structure. A new transaction structure (dhcpv6_txn_t) is also created and
117  *    it will point to the dhcpv6_cid_t. If an existing transaction with a
118  *    matching xid is not found, this dhcpv6_txn_t will be inserted into the
119  *    mci_v6_pending_txn table (keyed by xid).
120  *
121  * 2. Server responds with a REPLY. If a pending transaction is found, the
122  *    addresses in the reply will be placed into the dhcpv6_cid_t pointed to by
123  *    the transaction. The dhcpv6_cid_t will then be moved to the mci_v6_cid
124  *    table (keyed by cid). The associated addresses will be added to the
125  *    mci_v6_dyn_ip table (while still being pointed to by the dhcpv6_cid_t).
126  *
127  * 3. IPv6 ip-nospoof will now check mci_v6_dyn_ip for matching packets.
128  *    Packets with a source address matching one of the DHCPv6-assigned
129  *    addresses will be allowed through.
130  *
131  * IPv6 notes:
132  *
133  * The v6 code shares the same timer as v4 for scrubbing stale transactions.
134  * Just like v4, as part of removing an expired transaction, a RELEASE will be
135  * be triggered on the cid associated with the expired transaction.
136  *
137  * The data structures used for v6 are slightly different because a v6 client
138  * may have multiple addresses associated with it.
139  */
140 
141 /*
142  * These are just arbitrary limits meant for preventing abuse (e.g. a user
143  * flooding the network with bogus transactions). They are not meant to be
144  * user-modifiable so they are not exposed as linkprops.
145  */
146 static ulong_t	dhcp_max_pending_txn = 512;
147 static ulong_t	dhcp_max_completed_txn = 512;
148 static hrtime_t	txn_cleanup_interval = 60 * NANOSEC;
149 
150 /*
151  * DHCPv4 transaction. It may be added to three different tables
152  * (keyed by different fields).
153  */
154 typedef struct dhcpv4_txn {
155 	uint32_t		dt_xid;
156 	hrtime_t		dt_timestamp;
157 	uint8_t			dt_cid[DHCP_MAX_OPT_SIZE];
158 	uint8_t			dt_cid_len;
159 	ipaddr_t		dt_ipaddr;
160 	avl_node_t		dt_node;
161 	avl_node_t		dt_ipnode;
162 	struct dhcpv4_txn	*dt_next;
163 } dhcpv4_txn_t;
164 
165 /*
166  * DHCPv6 address. May be added to mci_v6_dyn_ip.
167  * It is always pointed to by its parent dhcpv6_cid_t structure.
168  */
169 typedef struct dhcpv6_addr {
170 	in6_addr_t		da_addr;
171 	avl_node_t		da_node;
172 	struct dhcpv6_addr	*da_next;
173 } dhcpv6_addr_t;
174 
175 /*
176  * DHCPv6 client ID. May be added to mci_v6_cid.
177  * No dhcpv6_txn_t should be pointing to it after it is added to mci_v6_cid.
178  */
179 typedef struct dhcpv6_cid {
180 	uchar_t			*dc_cid;
181 	uint_t			dc_cid_len;
182 	dhcpv6_addr_t		*dc_addr;
183 	uint_t			dc_addrcnt;
184 	avl_node_t		dc_node;
185 } dhcpv6_cid_t;
186 
187 /*
188  * DHCPv6 transaction. Unlike its v4 counterpart, this object gets freed up
189  * as soon as the transaction completes or expires.
190  */
191 typedef struct dhcpv6_txn {
192 	uint32_t		dt_xid;
193 	hrtime_t		dt_timestamp;
194 	dhcpv6_cid_t		*dt_cid;
195 	avl_node_t		dt_node;
196 	struct dhcpv6_txn	*dt_next;
197 } dhcpv6_txn_t;
198 
199 static void	start_txn_cleanup_timer(mac_client_impl_t *);
200 static boolean_t allowed_ips_set(mac_resource_props_t *, uint32_t);
201 
202 #define	BUMP_STAT(m, s)	(m)->mci_misc_stat.mms_##s++
203 
204 /*
205  * Comparison functions for the 3 AVL trees used:
206  * mci_v4_pending_txn, mci_v4_completed_txn, mci_v4_dyn_ip
207  */
208 static int
209 compare_dhcpv4_xid(const void *arg1, const void *arg2)
210 {
211 	const dhcpv4_txn_t	*txn1 = arg1, *txn2 = arg2;
212 
213 	if (txn1->dt_xid < txn2->dt_xid)
214 		return (-1);
215 	else if (txn1->dt_xid > txn2->dt_xid)
216 		return (1);
217 	else
218 		return (0);
219 }
220 
221 static int
222 compare_dhcpv4_cid(const void *arg1, const void *arg2)
223 {
224 	const dhcpv4_txn_t	*txn1 = arg1, *txn2 = arg2;
225 	int			ret;
226 
227 	if (txn1->dt_cid_len < txn2->dt_cid_len)
228 		return (-1);
229 	else if (txn1->dt_cid_len > txn2->dt_cid_len)
230 		return (1);
231 
232 	if (txn1->dt_cid_len == 0)
233 		return (0);
234 
235 	ret = memcmp(txn1->dt_cid, txn2->dt_cid, txn1->dt_cid_len);
236 	if (ret < 0)
237 		return (-1);
238 	else if (ret > 0)
239 		return (1);
240 	else
241 		return (0);
242 }
243 
244 static int
245 compare_dhcpv4_ip(const void *arg1, const void *arg2)
246 {
247 	const dhcpv4_txn_t	*txn1 = arg1, *txn2 = arg2;
248 
249 	if (txn1->dt_ipaddr < txn2->dt_ipaddr)
250 		return (-1);
251 	else if (txn1->dt_ipaddr > txn2->dt_ipaddr)
252 		return (1);
253 	else
254 		return (0);
255 }
256 
257 /*
258  * Find the specified DHCPv4 option.
259  */
260 static int
261 get_dhcpv4_option(struct dhcp *dh4, uchar_t *end, uint8_t type,
262     uchar_t **opt, uint8_t *opt_len)
263 {
264 	uchar_t		*start = (uchar_t *)dh4->options;
265 	uint8_t		otype, olen;
266 
267 	while (start < end) {
268 		if (*start == CD_PAD) {
269 			start++;
270 			continue;
271 		}
272 		if (*start == CD_END)
273 			break;
274 
275 		otype = *start++;
276 		olen = *start++;
277 		if (otype == type && olen > 0) {
278 			*opt = start;
279 			*opt_len = olen;
280 			return (0);
281 		}
282 		start += olen;
283 	}
284 	return (ENOENT);
285 }
286 
287 /*
288  * Locate the start of a DHCPv4 header.
289  * The possible return values and associated meanings are:
290  * 0      - packet is DHCP and has a DHCP header.
291  * EINVAL - packet is not DHCP. the recommended action is to let it pass.
292  * ENOSPC - packet is a initial fragment that is DHCP or is unidentifiable.
293  *          the recommended action is to drop it.
294  */
295 static int
296 get_dhcpv4_info(ipha_t *ipha, uchar_t *end, struct dhcp **dh4)
297 {
298 	uint16_t	offset_and_flags, client, server;
299 	boolean_t	first_frag = B_FALSE;
300 	struct udphdr	*udph;
301 	uchar_t		*dh;
302 
303 	if (ipha->ipha_protocol != IPPROTO_UDP)
304 		return (EINVAL);
305 
306 	offset_and_flags = ntohs(ipha->ipha_fragment_offset_and_flags);
307 	if ((offset_and_flags & (IPH_MF | IPH_OFFSET)) != 0) {
308 		/*
309 		 * All non-initial fragments may pass because we cannot
310 		 * identify their type. It's safe to let them through
311 		 * because reassembly will fail if we decide to drop the
312 		 * initial fragment.
313 		 */
314 		if (((offset_and_flags << 3) & 0xffff) != 0)
315 			return (EINVAL);
316 		first_frag = B_TRUE;
317 	}
318 	/* drop packets without a udp header */
319 	udph = (struct udphdr *)((uchar_t *)ipha + IPH_HDR_LENGTH(ipha));
320 	if ((uchar_t *)&udph[1] > end)
321 		return (ENOSPC);
322 
323 	client = htons(IPPORT_BOOTPC);
324 	server = htons(IPPORT_BOOTPS);
325 	if (udph->uh_sport != client && udph->uh_sport != server &&
326 	    udph->uh_dport != client && udph->uh_dport != server)
327 		return (EINVAL);
328 
329 	/* drop dhcp fragments */
330 	if (first_frag)
331 		return (ENOSPC);
332 
333 	dh = (uchar_t *)&udph[1];
334 	if (dh + BASE_PKT_SIZE > end)
335 		return (EINVAL);
336 
337 	*dh4 = (struct dhcp *)dh;
338 	return (0);
339 }
340 
341 /*
342  * Wrappers for accesses to avl trees to improve readability.
343  * Their purposes are fairly self-explanatory.
344  */
345 static dhcpv4_txn_t *
346 find_dhcpv4_pending_txn(mac_client_impl_t *mcip, uint32_t xid)
347 {
348 	dhcpv4_txn_t	tmp_txn;
349 
350 	ASSERT(MUTEX_HELD(&mcip->mci_protect_lock));
351 	tmp_txn.dt_xid = xid;
352 	return (avl_find(&mcip->mci_v4_pending_txn, &tmp_txn, NULL));
353 }
354 
355 static int
356 insert_dhcpv4_pending_txn(mac_client_impl_t *mcip, dhcpv4_txn_t *txn)
357 {
358 	avl_index_t	where;
359 
360 	ASSERT(MUTEX_HELD(&mcip->mci_protect_lock));
361 	if (avl_find(&mcip->mci_v4_pending_txn, txn, &where) != NULL)
362 		return (EEXIST);
363 
364 	if (avl_numnodes(&mcip->mci_v4_pending_txn) >= dhcp_max_pending_txn) {
365 		BUMP_STAT(mcip, dhcpdropped);
366 		return (EAGAIN);
367 	}
368 	avl_insert(&mcip->mci_v4_pending_txn, txn, where);
369 	return (0);
370 }
371 
372 static void
373 remove_dhcpv4_pending_txn(mac_client_impl_t *mcip, dhcpv4_txn_t *txn)
374 {
375 	ASSERT(MUTEX_HELD(&mcip->mci_protect_lock));
376 	avl_remove(&mcip->mci_v4_pending_txn, txn);
377 }
378 
379 static dhcpv4_txn_t *
380 find_dhcpv4_completed_txn(mac_client_impl_t *mcip, uint8_t *cid,
381     uint8_t cid_len)
382 {
383 	dhcpv4_txn_t	tmp_txn;
384 
385 	ASSERT(MUTEX_HELD(&mcip->mci_protect_lock));
386 	if (cid_len > 0)
387 		bcopy(cid, tmp_txn.dt_cid, cid_len);
388 	tmp_txn.dt_cid_len = cid_len;
389 	return (avl_find(&mcip->mci_v4_completed_txn, &tmp_txn, NULL));
390 }
391 
392 /*
393  * After a pending txn is removed from the pending table, it is inserted
394  * into both the completed and dyn-ip tables. These two insertions are
395  * done together because a client ID must have 1:1 correspondence with
396  * an IP address and IP addresses must be unique in the dyn-ip table.
397  */
398 static int
399 insert_dhcpv4_completed_txn(mac_client_impl_t *mcip, dhcpv4_txn_t *txn)
400 {
401 	avl_index_t	where;
402 
403 	ASSERT(MUTEX_HELD(&mcip->mci_protect_lock));
404 	if (avl_find(&mcip->mci_v4_completed_txn, txn, &where) != NULL)
405 		return (EEXIST);
406 
407 	if (avl_numnodes(&mcip->mci_v4_completed_txn) >=
408 	    dhcp_max_completed_txn) {
409 		BUMP_STAT(mcip, dhcpdropped);
410 		return (EAGAIN);
411 	}
412 
413 	avl_insert(&mcip->mci_v4_completed_txn, txn, where);
414 	if (avl_find(&mcip->mci_v4_dyn_ip, txn, &where) != NULL) {
415 		avl_remove(&mcip->mci_v4_completed_txn, txn);
416 		return (EEXIST);
417 	}
418 	avl_insert(&mcip->mci_v4_dyn_ip, txn, where);
419 	return (0);
420 }
421 
422 static void
423 remove_dhcpv4_completed_txn(mac_client_impl_t *mcip, dhcpv4_txn_t *txn)
424 {
425 	dhcpv4_txn_t	*ctxn;
426 
427 	ASSERT(MUTEX_HELD(&mcip->mci_protect_lock));
428 	if ((ctxn = avl_find(&mcip->mci_v4_dyn_ip, txn, NULL)) != NULL &&
429 	    ctxn == txn)
430 		avl_remove(&mcip->mci_v4_dyn_ip, txn);
431 
432 	avl_remove(&mcip->mci_v4_completed_txn, txn);
433 }
434 
435 /*
436  * Check whether an IP address is in the dyn-ip table.
437  */
438 static boolean_t
439 check_dhcpv4_dyn_ip(mac_client_impl_t *mcip, ipaddr_t ipaddr)
440 {
441 	dhcpv4_txn_t	tmp_txn, *txn;
442 
443 	mutex_enter(&mcip->mci_protect_lock);
444 	tmp_txn.dt_ipaddr = ipaddr;
445 	txn = avl_find(&mcip->mci_v4_dyn_ip, &tmp_txn, NULL);
446 	mutex_exit(&mcip->mci_protect_lock);
447 	return (txn != NULL);
448 }
449 
450 /*
451  * Create/destroy a DHCPv4 transaction.
452  */
453 static dhcpv4_txn_t *
454 create_dhcpv4_txn(uint32_t xid, uint8_t *cid, uint8_t cid_len, ipaddr_t ipaddr)
455 {
456 	dhcpv4_txn_t	*txn;
457 
458 	if ((txn = kmem_zalloc(sizeof (*txn), KM_NOSLEEP)) == NULL)
459 		return (NULL);
460 
461 	txn->dt_xid = xid;
462 	txn->dt_timestamp = gethrtime();
463 	if (cid_len > 0)
464 		bcopy(cid, &txn->dt_cid, cid_len);
465 	txn->dt_cid_len = cid_len;
466 	txn->dt_ipaddr = ipaddr;
467 	return (txn);
468 }
469 
470 static void
471 free_dhcpv4_txn(dhcpv4_txn_t *txn)
472 {
473 	kmem_free(txn, sizeof (*txn));
474 }
475 
476 /*
477  * Clean up all v4 tables.
478  */
479 static void
480 flush_dhcpv4(mac_client_impl_t *mcip)
481 {
482 	void		*cookie = NULL;
483 	dhcpv4_txn_t	*txn;
484 
485 	ASSERT(MUTEX_HELD(&mcip->mci_protect_lock));
486 	while ((txn = avl_destroy_nodes(&mcip->mci_v4_dyn_ip,
487 	    &cookie)) != NULL) {
488 		/*
489 		 * No freeing needed here because the same txn exists
490 		 * in the mci_v4_completed_txn table as well.
491 		 */
492 	}
493 	cookie = NULL;
494 	while ((txn = avl_destroy_nodes(&mcip->mci_v4_completed_txn,
495 	    &cookie)) != NULL) {
496 		free_dhcpv4_txn(txn);
497 	}
498 	cookie = NULL;
499 	while ((txn = avl_destroy_nodes(&mcip->mci_v4_pending_txn,
500 	    &cookie)) != NULL) {
501 		free_dhcpv4_txn(txn);
502 	}
503 }
504 
505 /*
506  * Cleanup stale DHCPv4 transactions.
507  */
508 static void
509 txn_cleanup_v4(mac_client_impl_t *mcip)
510 {
511 	dhcpv4_txn_t		*txn, *ctxn, *next, *txn_list = NULL;
512 
513 	/*
514 	 * Find stale pending transactions and place them on a list
515 	 * to be removed.
516 	 */
517 	for (txn = avl_first(&mcip->mci_v4_pending_txn); txn != NULL;
518 	    txn = avl_walk(&mcip->mci_v4_pending_txn, txn, AVL_AFTER)) {
519 		if (gethrtime() - txn->dt_timestamp > txn_cleanup_interval) {
520 			DTRACE_PROBE2(found__expired__txn,
521 			    mac_client_impl_t *, mcip,
522 			    dhcpv4_txn_t *, txn);
523 
524 			txn->dt_next = txn_list;
525 			txn_list = txn;
526 		}
527 	}
528 
529 	/*
530 	 * Remove and free stale pending transactions and completed
531 	 * transactions with the same client IDs as the stale transactions.
532 	 */
533 	for (txn = txn_list; txn != NULL; txn = next) {
534 		avl_remove(&mcip->mci_v4_pending_txn, txn);
535 
536 		ctxn = find_dhcpv4_completed_txn(mcip, txn->dt_cid,
537 		    txn->dt_cid_len);
538 		if (ctxn != NULL) {
539 			DTRACE_PROBE2(removing__completed__txn,
540 			    mac_client_impl_t *, mcip,
541 			    dhcpv4_txn_t *, ctxn);
542 
543 			remove_dhcpv4_completed_txn(mcip, ctxn);
544 			free_dhcpv4_txn(ctxn);
545 		}
546 		next = txn->dt_next;
547 		txn->dt_next = NULL;
548 
549 		DTRACE_PROBE2(freeing__txn, mac_client_impl_t *, mcip,
550 		    dhcpv4_txn_t *, txn);
551 		free_dhcpv4_txn(txn);
552 	}
553 }
554 
555 /*
556  * Core logic for intercepting outbound DHCPv4 packets.
557  */
558 static boolean_t
559 intercept_dhcpv4_outbound(mac_client_impl_t *mcip, ipha_t *ipha, uchar_t *end)
560 {
561 	struct dhcp		*dh4;
562 	uchar_t			*opt;
563 	dhcpv4_txn_t		*txn, *ctxn;
564 	ipaddr_t		ipaddr;
565 	uint8_t			opt_len, mtype, cid[DHCP_MAX_OPT_SIZE], cid_len;
566 	mac_resource_props_t	*mrp = MCIP_RESOURCE_PROPS(mcip);
567 
568 	if (get_dhcpv4_info(ipha, end, &dh4) != 0)
569 		return (B_TRUE);
570 
571 	/* ip_nospoof/allowed-ips and DHCP are mutually exclusive by default */
572 	if (allowed_ips_set(mrp, IPV4_VERSION))
573 		return (B_FALSE);
574 
575 	if (get_dhcpv4_option(dh4, end, CD_DHCP_TYPE, &opt, &opt_len) != 0 ||
576 	    opt_len != 1) {
577 		DTRACE_PROBE2(mtype__not__found, mac_client_impl_t *, mcip,
578 		    struct dhcp *, dh4);
579 		return (B_TRUE);
580 	}
581 	mtype = *opt;
582 	if (mtype != REQUEST && mtype != RELEASE) {
583 		DTRACE_PROBE3(ignored__mtype, mac_client_impl_t *, mcip,
584 		    struct dhcp *, dh4, uint8_t, mtype);
585 		return (B_TRUE);
586 	}
587 
588 	/* client ID is optional for IPv4 */
589 	if (get_dhcpv4_option(dh4, end, CD_CLIENT_ID, &opt, &opt_len) == 0 &&
590 	    opt_len >= 2) {
591 		bcopy(opt, cid, opt_len);
592 		cid_len = opt_len;
593 	} else {
594 		bzero(cid, DHCP_MAX_OPT_SIZE);
595 		cid_len = 0;
596 	}
597 
598 	mutex_enter(&mcip->mci_protect_lock);
599 	if (mtype == RELEASE) {
600 		DTRACE_PROBE2(release, mac_client_impl_t *, mcip,
601 		    struct dhcp *, dh4);
602 
603 		/* flush any completed txn with this cid */
604 		ctxn = find_dhcpv4_completed_txn(mcip, cid, cid_len);
605 		if (ctxn != NULL) {
606 			DTRACE_PROBE2(release__successful, mac_client_impl_t *,
607 			    mcip, struct dhcp *, dh4);
608 
609 			remove_dhcpv4_completed_txn(mcip, ctxn);
610 			free_dhcpv4_txn(ctxn);
611 		}
612 		goto done;
613 	}
614 
615 	/*
616 	 * If a pending txn already exists, we'll update its timestamp so
617 	 * it won't get flushed by the timer. We don't need to create new
618 	 * txns for retransmissions.
619 	 */
620 	if ((txn = find_dhcpv4_pending_txn(mcip, dh4->xid)) != NULL) {
621 		DTRACE_PROBE2(update, mac_client_impl_t *, mcip,
622 		    dhcpv4_txn_t *, txn);
623 		txn->dt_timestamp = gethrtime();
624 		goto done;
625 	}
626 
627 	if (get_dhcpv4_option(dh4, end, CD_REQUESTED_IP_ADDR,
628 	    &opt, &opt_len) != 0 || opt_len != sizeof (ipaddr)) {
629 		DTRACE_PROBE2(ipaddr__not__found, mac_client_impl_t *, mcip,
630 		    struct dhcp *, dh4);
631 		goto done;
632 	}
633 	bcopy(opt, &ipaddr, sizeof (ipaddr));
634 	if ((txn = create_dhcpv4_txn(dh4->xid, cid, cid_len, ipaddr)) == NULL)
635 		goto done;
636 
637 	if (insert_dhcpv4_pending_txn(mcip, txn) != 0) {
638 		DTRACE_PROBE2(insert__failed, mac_client_impl_t *, mcip,
639 		    dhcpv4_txn_t *, txn);
640 		free_dhcpv4_txn(txn);
641 		goto done;
642 	}
643 	start_txn_cleanup_timer(mcip);
644 
645 	DTRACE_PROBE2(txn__pending, mac_client_impl_t *, mcip,
646 	    dhcpv4_txn_t *, txn);
647 
648 done:
649 	mutex_exit(&mcip->mci_protect_lock);
650 	return (B_TRUE);
651 }
652 
653 /*
654  * Core logic for intercepting inbound DHCPv4 packets.
655  */
656 static void
657 intercept_dhcpv4_inbound(mac_client_impl_t *mcip, ipha_t *ipha, uchar_t *end)
658 {
659 	uchar_t		*opt;
660 	struct dhcp	*dh4;
661 	dhcpv4_txn_t	*txn, *ctxn;
662 	uint8_t		opt_len, mtype;
663 
664 	if (get_dhcpv4_info(ipha, end, &dh4) != 0)
665 		return;
666 
667 	if (get_dhcpv4_option(dh4, end, CD_DHCP_TYPE, &opt, &opt_len) != 0 ||
668 	    opt_len != 1) {
669 		DTRACE_PROBE2(mtype__not__found, mac_client_impl_t *, mcip,
670 		    struct dhcp *, dh4);
671 		return;
672 	}
673 	mtype = *opt;
674 	if (mtype != ACK && mtype != NAK) {
675 		DTRACE_PROBE3(ignored__mtype, mac_client_impl_t *, mcip,
676 		    struct dhcp *, dh4, uint8_t, mtype);
677 		return;
678 	}
679 
680 	mutex_enter(&mcip->mci_protect_lock);
681 	if ((txn = find_dhcpv4_pending_txn(mcip, dh4->xid)) == NULL) {
682 		DTRACE_PROBE2(txn__not__found, mac_client_impl_t *, mcip,
683 		    struct dhcp *, dh4);
684 		goto done;
685 	}
686 	remove_dhcpv4_pending_txn(mcip, txn);
687 
688 	/*
689 	 * We're about to move a txn from the pending table to the completed/
690 	 * dyn-ip tables. If there is an existing completed txn with the
691 	 * same cid as our txn, we need to remove and free it.
692 	 */
693 	ctxn = find_dhcpv4_completed_txn(mcip, txn->dt_cid, txn->dt_cid_len);
694 	if (ctxn != NULL) {
695 		DTRACE_PROBE2(replacing__old__txn, mac_client_impl_t *, mcip,
696 		    dhcpv4_txn_t *, ctxn);
697 		remove_dhcpv4_completed_txn(mcip, ctxn);
698 		free_dhcpv4_txn(ctxn);
699 	}
700 	if (mtype == NAK) {
701 		DTRACE_PROBE2(nak__received, mac_client_impl_t *, mcip,
702 		    dhcpv4_txn_t *, txn);
703 		free_dhcpv4_txn(txn);
704 		goto done;
705 	}
706 	if (insert_dhcpv4_completed_txn(mcip, txn) != 0) {
707 		DTRACE_PROBE2(insert__failed, mac_client_impl_t *, mcip,
708 		    dhcpv4_txn_t *, txn);
709 		free_dhcpv4_txn(txn);
710 		goto done;
711 	}
712 	DTRACE_PROBE2(txn__completed, mac_client_impl_t *, mcip,
713 	    dhcpv4_txn_t *, txn);
714 
715 done:
716 	mutex_exit(&mcip->mci_protect_lock);
717 }
718 
719 
720 /*
721  * Comparison functions for the DHCPv6 AVL trees.
722  */
723 static int
724 compare_dhcpv6_xid(const void *arg1, const void *arg2)
725 {
726 	const dhcpv6_txn_t	*txn1 = arg1, *txn2 = arg2;
727 
728 	if (txn1->dt_xid < txn2->dt_xid)
729 		return (-1);
730 	else if (txn1->dt_xid > txn2->dt_xid)
731 		return (1);
732 	else
733 		return (0);
734 }
735 
736 static int
737 compare_dhcpv6_ip(const void *arg1, const void *arg2)
738 {
739 	const dhcpv6_addr_t	*ip1 = arg1, *ip2 = arg2;
740 	int			ret;
741 
742 	ret = memcmp(&ip1->da_addr, &ip2->da_addr, sizeof (in6_addr_t));
743 	if (ret < 0)
744 		return (-1);
745 	else if (ret > 0)
746 		return (1);
747 	else
748 		return (0);
749 }
750 
751 static int
752 compare_dhcpv6_cid(const void *arg1, const void *arg2)
753 {
754 	const dhcpv6_cid_t	*cid1 = arg1, *cid2 = arg2;
755 	int			ret;
756 
757 	if (cid1->dc_cid_len < cid2->dc_cid_len)
758 		return (-1);
759 	else if (cid1->dc_cid_len > cid2->dc_cid_len)
760 		return (1);
761 
762 	if (cid1->dc_cid_len == 0)
763 		return (0);
764 
765 	ret = memcmp(cid1->dc_cid, cid2->dc_cid, cid1->dc_cid_len);
766 	if (ret < 0)
767 		return (-1);
768 	else if (ret > 0)
769 		return (1);
770 	else
771 		return (0);
772 }
773 
774 /*
775  * Locate the start of a DHCPv6 header.
776  * The possible return values and associated meanings are:
777  * 0      - packet is DHCP and has a DHCP header.
778  * EINVAL - packet is not DHCP. the recommended action is to let it pass.
779  * ENOSPC - packet is a initial fragment that is DHCP or is unidentifiable.
780  *          the recommended action is to drop it.
781  */
782 static int
783 get_dhcpv6_info(ip6_t *ip6h, uchar_t *end, dhcpv6_message_t **dh6)
784 {
785 	uint16_t	hdrlen, client, server;
786 	boolean_t	first_frag = B_FALSE;
787 	ip6_frag_t	*frag = NULL;
788 	uint8_t		proto;
789 	struct udphdr	*udph;
790 	uchar_t		*dh;
791 
792 	if (!mac_ip_hdr_length_v6(ip6h, end, &hdrlen, &proto, &frag))
793 		return (ENOSPC);
794 
795 	if (proto != IPPROTO_UDP)
796 		return (EINVAL);
797 
798 	if (frag != NULL) {
799 		/*
800 		 * All non-initial fragments may pass because we cannot
801 		 * identify their type. It's safe to let them through
802 		 * because reassembly will fail if we decide to drop the
803 		 * initial fragment.
804 		 */
805 		if ((ntohs(frag->ip6f_offlg) & ~7) != 0)
806 			return (EINVAL);
807 		first_frag = B_TRUE;
808 	}
809 	/* drop packets without a udp header */
810 	udph = (struct udphdr *)((uchar_t *)ip6h + hdrlen);
811 	if ((uchar_t *)&udph[1] > end)
812 		return (ENOSPC);
813 
814 	client = htons(IPPORT_DHCPV6C);
815 	server = htons(IPPORT_DHCPV6S);
816 	if (udph->uh_sport != client && udph->uh_sport != server &&
817 	    udph->uh_dport != client && udph->uh_dport != server)
818 		return (EINVAL);
819 
820 	/* drop dhcp fragments */
821 	if (first_frag)
822 		return (ENOSPC);
823 
824 	dh = (uchar_t *)&udph[1];
825 	if (dh + sizeof (dhcpv6_message_t) > end)
826 		return (EINVAL);
827 
828 	*dh6 = (dhcpv6_message_t *)dh;
829 	return (0);
830 }
831 
832 /*
833  * Find the specified DHCPv6 option.
834  */
835 static dhcpv6_option_t *
836 get_dhcpv6_option(void *buf, size_t buflen, dhcpv6_option_t *oldopt,
837     uint16_t codenum, uint_t *retlenp)
838 {
839 	uchar_t		*bp;
840 	dhcpv6_option_t	d6o;
841 	uint_t		olen;
842 
843 	codenum = htons(codenum);
844 	bp = buf;
845 	while (buflen >= sizeof (dhcpv6_option_t)) {
846 		bcopy(bp, &d6o, sizeof (d6o));
847 		olen = ntohs(d6o.d6o_len) + sizeof (d6o);
848 		if (olen > buflen)
849 			break;
850 		if (d6o.d6o_code != codenum || d6o.d6o_len == 0 ||
851 		    (oldopt != NULL && bp <= (uchar_t *)oldopt)) {
852 			bp += olen;
853 			buflen -= olen;
854 			continue;
855 		}
856 		if (retlenp != NULL)
857 			*retlenp = olen;
858 		/* LINTED : alignment */
859 		return ((dhcpv6_option_t *)bp);
860 	}
861 	return (NULL);
862 }
863 
864 /*
865  * Get the status code from a reply message.
866  */
867 static int
868 get_dhcpv6_status(dhcpv6_message_t *dh6, uchar_t *end, uint16_t *status)
869 {
870 	dhcpv6_option_t	*d6o;
871 	uint_t		olen;
872 	uint16_t	s;
873 
874 	d6o = get_dhcpv6_option(&dh6[1], end - (uchar_t *)&dh6[1], NULL,
875 	    DHCPV6_OPT_STATUS_CODE, &olen);
876 
877 	/* Success is implied if status code is missing */
878 	if (d6o == NULL) {
879 		*status = DHCPV6_STAT_SUCCESS;
880 		return (0);
881 	}
882 	if ((uchar_t *)d6o + olen > end)
883 		return (EINVAL);
884 
885 	olen -= sizeof (*d6o);
886 	if (olen < sizeof (s))
887 		return (EINVAL);
888 
889 	bcopy(&d6o[1], &s, sizeof (s));
890 	*status = ntohs(s);
891 	return (0);
892 }
893 
894 /*
895  * Get the addresses from a reply message.
896  */
897 static int
898 get_dhcpv6_addrs(dhcpv6_message_t *dh6, uchar_t *end, dhcpv6_cid_t *cid)
899 {
900 	dhcpv6_option_t		*d6o;
901 	dhcpv6_addr_t		*next;
902 	uint_t			olen;
903 
904 	d6o = NULL;
905 	while ((d6o = get_dhcpv6_option(&dh6[1], end - (uchar_t *)&dh6[1],
906 	    d6o, DHCPV6_OPT_IA_NA, &olen)) != NULL) {
907 		dhcpv6_option_t		*d6so;
908 		dhcpv6_iaaddr_t		d6ia;
909 		dhcpv6_addr_t		**addrp;
910 		uchar_t			*obase;
911 		uint_t			solen;
912 
913 		if (olen < sizeof (dhcpv6_ia_na_t) ||
914 		    (uchar_t *)d6o + olen > end)
915 			goto fail;
916 
917 		obase = (uchar_t *)d6o + sizeof (dhcpv6_ia_na_t);
918 		olen -= sizeof (dhcpv6_ia_na_t);
919 		d6so = NULL;
920 		while ((d6so = get_dhcpv6_option(obase, olen, d6so,
921 		    DHCPV6_OPT_IAADDR, &solen)) != NULL) {
922 			if (solen < sizeof (dhcpv6_iaaddr_t) ||
923 			    (uchar_t *)d6so + solen > end)
924 				goto fail;
925 
926 			bcopy(d6so, &d6ia, sizeof (d6ia));
927 			for (addrp = &cid->dc_addr; *addrp != NULL;
928 			    addrp = &(*addrp)->da_next) {
929 				if (bcmp(&(*addrp)->da_addr, &d6ia.d6ia_addr,
930 				    sizeof (in6_addr_t)) == 0)
931 					goto fail;
932 			}
933 			if ((*addrp = kmem_zalloc(sizeof (dhcpv6_addr_t),
934 			    KM_NOSLEEP)) == NULL)
935 				goto fail;
936 
937 			bcopy(&d6ia.d6ia_addr, &(*addrp)->da_addr,
938 			    sizeof (in6_addr_t));
939 			cid->dc_addrcnt++;
940 		}
941 	}
942 	if (cid->dc_addrcnt == 0)
943 		return (ENOENT);
944 
945 	return (0);
946 
947 fail:
948 	for (; cid->dc_addr != NULL; cid->dc_addr = next) {
949 		next = cid->dc_addr->da_next;
950 		kmem_free(cid->dc_addr, sizeof (dhcpv6_addr_t));
951 		cid->dc_addrcnt--;
952 	}
953 	ASSERT(cid->dc_addrcnt == 0);
954 	return (EINVAL);
955 }
956 
957 /*
958  * Free a cid.
959  * Before this gets called the caller must ensure that all the
960  * addresses are removed from the mci_v6_dyn_ip table.
961  */
962 static void
963 free_dhcpv6_cid(dhcpv6_cid_t *cid)
964 {
965 	dhcpv6_addr_t	*addr, *next;
966 	uint_t		cnt = 0;
967 
968 	kmem_free(cid->dc_cid, cid->dc_cid_len);
969 	for (addr = cid->dc_addr; addr != NULL; addr = next) {
970 		next = addr->da_next;
971 		kmem_free(addr, sizeof (*addr));
972 		cnt++;
973 	}
974 	ASSERT(cnt == cid->dc_addrcnt);
975 	kmem_free(cid, sizeof (*cid));
976 }
977 
978 /*
979  * Extract the DUID from a message. The associated addresses will be
980  * extracted later from the reply message.
981  */
982 static dhcpv6_cid_t *
983 create_dhcpv6_cid(dhcpv6_message_t *dh6, uchar_t *end)
984 {
985 	dhcpv6_option_t		*d6o;
986 	dhcpv6_cid_t		*cid;
987 	uchar_t			*rawcid;
988 	uint_t			olen, rawcidlen;
989 
990 	d6o = get_dhcpv6_option(&dh6[1], end - (uchar_t *)&dh6[1], NULL,
991 	    DHCPV6_OPT_CLIENTID, &olen);
992 	if (d6o == NULL || (uchar_t *)d6o + olen > end)
993 		return (NULL);
994 
995 	rawcidlen = olen - sizeof (*d6o);
996 	if ((rawcid = kmem_zalloc(rawcidlen, KM_NOSLEEP)) == NULL)
997 		return (NULL);
998 	bcopy(d6o + 1, rawcid, rawcidlen);
999 
1000 	if ((cid = kmem_zalloc(sizeof (*cid), KM_NOSLEEP)) == NULL) {
1001 		kmem_free(rawcid, rawcidlen);
1002 		return (NULL);
1003 	}
1004 	cid->dc_cid = rawcid;
1005 	cid->dc_cid_len = rawcidlen;
1006 	return (cid);
1007 }
1008 
1009 /*
1010  * Remove a cid from mci_v6_cid. The addresses owned by the cid
1011  * are also removed from mci_v6_dyn_ip.
1012  */
1013 static void
1014 remove_dhcpv6_cid(mac_client_impl_t *mcip, dhcpv6_cid_t *cid)
1015 {
1016 	dhcpv6_addr_t	*addr, *tmp_addr;
1017 
1018 	ASSERT(MUTEX_HELD(&mcip->mci_protect_lock));
1019 	avl_remove(&mcip->mci_v6_cid, cid);
1020 	for (addr = cid->dc_addr; addr != NULL; addr = addr->da_next) {
1021 		tmp_addr = avl_find(&mcip->mci_v6_dyn_ip, addr, NULL);
1022 		if (tmp_addr == addr)
1023 			avl_remove(&mcip->mci_v6_dyn_ip, addr);
1024 	}
1025 }
1026 
1027 /*
1028  * Find and remove a matching cid and associated addresses from
1029  * their respective tables.
1030  */
1031 static void
1032 release_dhcpv6_cid(mac_client_impl_t *mcip, dhcpv6_cid_t *cid)
1033 {
1034 	dhcpv6_cid_t	*oldcid;
1035 
1036 	ASSERT(MUTEX_HELD(&mcip->mci_protect_lock));
1037 	if ((oldcid = avl_find(&mcip->mci_v6_cid, cid, NULL)) == NULL)
1038 		return;
1039 
1040 	/*
1041 	 * Since cid belongs to a pending txn, it can't possibly be in
1042 	 * mci_v6_cid. Anything that's found must be an existing cid.
1043 	 */
1044 	ASSERT(oldcid != cid);
1045 	remove_dhcpv6_cid(mcip, oldcid);
1046 	free_dhcpv6_cid(oldcid);
1047 }
1048 
1049 /*
1050  * Insert cid into mci_v6_cid.
1051  */
1052 static int
1053 insert_dhcpv6_cid(mac_client_impl_t *mcip, dhcpv6_cid_t *cid)
1054 {
1055 	avl_index_t	where;
1056 	dhcpv6_addr_t	*addr;
1057 
1058 	ASSERT(MUTEX_HELD(&mcip->mci_protect_lock));
1059 	if (avl_find(&mcip->mci_v6_cid, cid, &where) != NULL)
1060 		return (EEXIST);
1061 
1062 	if (avl_numnodes(&mcip->mci_v6_cid) >= dhcp_max_completed_txn) {
1063 		BUMP_STAT(mcip, dhcpdropped);
1064 		return (EAGAIN);
1065 	}
1066 	avl_insert(&mcip->mci_v6_cid, cid, where);
1067 	for (addr = cid->dc_addr; addr != NULL; addr = addr->da_next) {
1068 		if (avl_find(&mcip->mci_v6_dyn_ip, addr, &where) != NULL)
1069 			goto fail;
1070 
1071 		avl_insert(&mcip->mci_v6_dyn_ip, addr, where);
1072 	}
1073 	return (0);
1074 
1075 fail:
1076 	remove_dhcpv6_cid(mcip, cid);
1077 	return (EEXIST);
1078 }
1079 
1080 /*
1081  * Check whether an IP address is in the dyn-ip table.
1082  */
1083 static boolean_t
1084 check_dhcpv6_dyn_ip(mac_client_impl_t *mcip, in6_addr_t *addr)
1085 {
1086 	dhcpv6_addr_t	tmp_addr, *a;
1087 
1088 	mutex_enter(&mcip->mci_protect_lock);
1089 	bcopy(addr, &tmp_addr.da_addr, sizeof (in6_addr_t));
1090 	a = avl_find(&mcip->mci_v6_dyn_ip, &tmp_addr, NULL);
1091 	mutex_exit(&mcip->mci_protect_lock);
1092 	return (a != NULL);
1093 }
1094 
1095 static dhcpv6_txn_t *
1096 find_dhcpv6_pending_txn(mac_client_impl_t *mcip, uint32_t xid)
1097 {
1098 	dhcpv6_txn_t	tmp_txn;
1099 
1100 	ASSERT(MUTEX_HELD(&mcip->mci_protect_lock));
1101 	tmp_txn.dt_xid = xid;
1102 	return (avl_find(&mcip->mci_v6_pending_txn, &tmp_txn, NULL));
1103 }
1104 
1105 static void
1106 remove_dhcpv6_pending_txn(mac_client_impl_t *mcip, dhcpv6_txn_t *txn)
1107 {
1108 	ASSERT(MUTEX_HELD(&mcip->mci_protect_lock));
1109 	avl_remove(&mcip->mci_v6_pending_txn, txn);
1110 }
1111 
1112 static dhcpv6_txn_t *
1113 create_dhcpv6_txn(uint32_t xid, dhcpv6_cid_t *cid)
1114 {
1115 	dhcpv6_txn_t	*txn;
1116 
1117 	if ((txn = kmem_zalloc(sizeof (dhcpv6_txn_t), KM_NOSLEEP)) == NULL)
1118 		return (NULL);
1119 
1120 	txn->dt_xid = xid;
1121 	txn->dt_cid = cid;
1122 	txn->dt_timestamp = gethrtime();
1123 	return (txn);
1124 }
1125 
1126 static void
1127 free_dhcpv6_txn(dhcpv6_txn_t *txn)
1128 {
1129 	if (txn->dt_cid != NULL)
1130 		free_dhcpv6_cid(txn->dt_cid);
1131 	kmem_free(txn, sizeof (dhcpv6_txn_t));
1132 }
1133 
1134 static int
1135 insert_dhcpv6_pending_txn(mac_client_impl_t *mcip, dhcpv6_txn_t *txn)
1136 {
1137 	avl_index_t	where;
1138 
1139 	ASSERT(MUTEX_HELD(&mcip->mci_protect_lock));
1140 	if (avl_find(&mcip->mci_v6_pending_txn, txn, &where) != NULL)
1141 		return (EEXIST);
1142 
1143 	if (avl_numnodes(&mcip->mci_v6_pending_txn) >= dhcp_max_pending_txn) {
1144 		BUMP_STAT(mcip, dhcpdropped);
1145 		return (EAGAIN);
1146 	}
1147 	avl_insert(&mcip->mci_v6_pending_txn, txn, where);
1148 	return (0);
1149 }
1150 
1151 /*
1152  * Clean up all v6 tables.
1153  */
1154 static void
1155 flush_dhcpv6(mac_client_impl_t *mcip)
1156 {
1157 	void		*cookie = NULL;
1158 	dhcpv6_cid_t	*cid;
1159 	dhcpv6_txn_t	*txn;
1160 
1161 	ASSERT(MUTEX_HELD(&mcip->mci_protect_lock));
1162 	while (avl_destroy_nodes(&mcip->mci_v6_dyn_ip, &cookie) != NULL) {
1163 	}
1164 	cookie = NULL;
1165 	while ((cid = avl_destroy_nodes(&mcip->mci_v6_cid, &cookie)) != NULL) {
1166 		free_dhcpv6_cid(cid);
1167 	}
1168 	cookie = NULL;
1169 	while ((txn = avl_destroy_nodes(&mcip->mci_v6_pending_txn,
1170 	    &cookie)) != NULL) {
1171 		free_dhcpv6_txn(txn);
1172 	}
1173 }
1174 
1175 /*
1176  * Cleanup stale DHCPv6 transactions.
1177  */
1178 static void
1179 txn_cleanup_v6(mac_client_impl_t *mcip)
1180 {
1181 	dhcpv6_txn_t		*txn, *next, *txn_list = NULL;
1182 
1183 	/*
1184 	 * Find stale pending transactions and place them on a list
1185 	 * to be removed.
1186 	 */
1187 	for (txn = avl_first(&mcip->mci_v6_pending_txn); txn != NULL;
1188 	    txn = avl_walk(&mcip->mci_v6_pending_txn, txn, AVL_AFTER)) {
1189 		if (gethrtime() - txn->dt_timestamp > txn_cleanup_interval) {
1190 			DTRACE_PROBE2(found__expired__txn,
1191 			    mac_client_impl_t *, mcip,
1192 			    dhcpv6_txn_t *, txn);
1193 
1194 			txn->dt_next = txn_list;
1195 			txn_list = txn;
1196 		}
1197 	}
1198 
1199 	/*
1200 	 * Remove and free stale pending transactions.
1201 	 * Release any existing cids matching the stale transactions.
1202 	 */
1203 	for (txn = txn_list; txn != NULL; txn = next) {
1204 		avl_remove(&mcip->mci_v6_pending_txn, txn);
1205 		release_dhcpv6_cid(mcip, txn->dt_cid);
1206 		next = txn->dt_next;
1207 		txn->dt_next = NULL;
1208 
1209 		DTRACE_PROBE2(freeing__txn, mac_client_impl_t *, mcip,
1210 		    dhcpv6_txn_t *, txn);
1211 		free_dhcpv6_txn(txn);
1212 	}
1213 
1214 }
1215 
1216 /*
1217  * Core logic for intercepting outbound DHCPv6 packets.
1218  */
1219 static boolean_t
1220 intercept_dhcpv6_outbound(mac_client_impl_t *mcip, ip6_t *ip6h, uchar_t *end)
1221 {
1222 	dhcpv6_message_t	*dh6;
1223 	dhcpv6_txn_t		*txn;
1224 	dhcpv6_cid_t		*cid = NULL;
1225 	uint32_t		xid;
1226 	uint8_t			mtype;
1227 	mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip);
1228 
1229 	if (get_dhcpv6_info(ip6h, end, &dh6) != 0)
1230 		return (B_TRUE);
1231 
1232 	/* ip_nospoof/allowed-ips and DHCP are mutually exclusive by default */
1233 	if (allowed_ips_set(mrp, IPV6_VERSION))
1234 		return (B_FALSE);
1235 
1236 	mtype = dh6->d6m_msg_type;
1237 	if (mtype != DHCPV6_MSG_REQUEST && mtype != DHCPV6_MSG_RENEW &&
1238 	    mtype != DHCPV6_MSG_REBIND && mtype != DHCPV6_MSG_RELEASE)
1239 		return (B_TRUE);
1240 
1241 	if ((cid = create_dhcpv6_cid(dh6, end)) == NULL)
1242 		return (B_TRUE);
1243 
1244 	mutex_enter(&mcip->mci_protect_lock);
1245 	if (mtype == DHCPV6_MSG_RELEASE) {
1246 		release_dhcpv6_cid(mcip, cid);
1247 		goto done;
1248 	}
1249 	xid = DHCPV6_GET_TRANSID(dh6);
1250 	if ((txn = find_dhcpv6_pending_txn(mcip, xid)) != NULL) {
1251 		DTRACE_PROBE2(update, mac_client_impl_t *, mcip,
1252 		    dhcpv6_txn_t *, txn);
1253 		txn->dt_timestamp = gethrtime();
1254 		goto done;
1255 	}
1256 	if ((txn = create_dhcpv6_txn(xid, cid)) == NULL)
1257 		goto done;
1258 
1259 	cid = NULL;
1260 	if (insert_dhcpv6_pending_txn(mcip, txn) != 0) {
1261 		DTRACE_PROBE2(insert__failed, mac_client_impl_t *, mcip,
1262 		    dhcpv6_txn_t *, txn);
1263 		free_dhcpv6_txn(txn);
1264 		goto done;
1265 	}
1266 	start_txn_cleanup_timer(mcip);
1267 
1268 	DTRACE_PROBE2(txn__pending, mac_client_impl_t *, mcip,
1269 	    dhcpv6_txn_t *, txn);
1270 
1271 done:
1272 	if (cid != NULL)
1273 		free_dhcpv6_cid(cid);
1274 
1275 	mutex_exit(&mcip->mci_protect_lock);
1276 	return (B_TRUE);
1277 }
1278 
1279 /*
1280  * Core logic for intercepting inbound DHCPv6 packets.
1281  */
1282 static void
1283 intercept_dhcpv6_inbound(mac_client_impl_t *mcip, ip6_t *ip6h, uchar_t *end)
1284 {
1285 	dhcpv6_message_t	*dh6;
1286 	dhcpv6_txn_t		*txn;
1287 	uint32_t		xid;
1288 	uint8_t			mtype;
1289 	uint16_t		status;
1290 
1291 	if (get_dhcpv6_info(ip6h, end, &dh6) != 0)
1292 		return;
1293 
1294 	mtype = dh6->d6m_msg_type;
1295 	if (mtype != DHCPV6_MSG_REPLY)
1296 		return;
1297 
1298 	mutex_enter(&mcip->mci_protect_lock);
1299 	xid = DHCPV6_GET_TRANSID(dh6);
1300 	if ((txn = find_dhcpv6_pending_txn(mcip, xid)) == NULL) {
1301 		DTRACE_PROBE2(txn__not__found, mac_client_impl_t *, mcip,
1302 		    dhcpv6_message_t *, dh6);
1303 		goto done;
1304 	}
1305 	remove_dhcpv6_pending_txn(mcip, txn);
1306 	release_dhcpv6_cid(mcip, txn->dt_cid);
1307 
1308 	if (get_dhcpv6_status(dh6, end, &status) != 0 ||
1309 	    status != DHCPV6_STAT_SUCCESS) {
1310 		DTRACE_PROBE2(error__status, mac_client_impl_t *, mcip,
1311 		    dhcpv6_txn_t *, txn);
1312 		goto done;
1313 	}
1314 	if (get_dhcpv6_addrs(dh6, end, txn->dt_cid) != 0) {
1315 		DTRACE_PROBE2(no__addrs, mac_client_impl_t *, mcip,
1316 		    dhcpv6_txn_t *, txn);
1317 		goto done;
1318 	}
1319 	if (insert_dhcpv6_cid(mcip, txn->dt_cid) != 0) {
1320 		DTRACE_PROBE2(insert__failed, mac_client_impl_t *, mcip,
1321 		    dhcpv6_txn_t *, txn);
1322 		goto done;
1323 	}
1324 	DTRACE_PROBE2(txn__completed, mac_client_impl_t *, mcip,
1325 	    dhcpv6_txn_t *, txn);
1326 
1327 	txn->dt_cid = NULL;
1328 
1329 done:
1330 	if (txn != NULL)
1331 		free_dhcpv6_txn(txn);
1332 	mutex_exit(&mcip->mci_protect_lock);
1333 }
1334 
1335 /*
1336  * Timer for cleaning up stale transactions.
1337  */
1338 static void
1339 txn_cleanup_timer(void *arg)
1340 {
1341 	mac_client_impl_t	*mcip = arg;
1342 
1343 	mutex_enter(&mcip->mci_protect_lock);
1344 	if (mcip->mci_txn_cleanup_tid == 0) {
1345 		/* do nothing if timer got cancelled */
1346 		mutex_exit(&mcip->mci_protect_lock);
1347 		return;
1348 	}
1349 	mcip->mci_txn_cleanup_tid = 0;
1350 
1351 	txn_cleanup_v4(mcip);
1352 	txn_cleanup_v6(mcip);
1353 
1354 	/*
1355 	 * Restart timer if pending transactions still exist.
1356 	 */
1357 	if (!avl_is_empty(&mcip->mci_v4_pending_txn) ||
1358 	    !avl_is_empty(&mcip->mci_v6_pending_txn)) {
1359 		DTRACE_PROBE1(restarting__timer, mac_client_impl_t *, mcip);
1360 
1361 		mcip->mci_txn_cleanup_tid = timeout(txn_cleanup_timer, mcip,
1362 		    drv_usectohz(txn_cleanup_interval / (NANOSEC / MICROSEC)));
1363 	}
1364 	mutex_exit(&mcip->mci_protect_lock);
1365 }
1366 
1367 static void
1368 start_txn_cleanup_timer(mac_client_impl_t *mcip)
1369 {
1370 	ASSERT(MUTEX_HELD(&mcip->mci_protect_lock));
1371 	if (mcip->mci_txn_cleanup_tid == 0) {
1372 		mcip->mci_txn_cleanup_tid = timeout(txn_cleanup_timer, mcip,
1373 		    drv_usectohz(txn_cleanup_interval / (NANOSEC / MICROSEC)));
1374 	}
1375 }
1376 
1377 static void
1378 cancel_txn_cleanup_timer(mac_client_impl_t *mcip)
1379 {
1380 	timeout_id_t	tid;
1381 
1382 	ASSERT(MUTEX_HELD(&mcip->mci_protect_lock));
1383 
1384 	/*
1385 	 * This needs to be a while loop because the timer could get
1386 	 * rearmed during untimeout().
1387 	 */
1388 	while ((tid = mcip->mci_txn_cleanup_tid) != 0) {
1389 		mcip->mci_txn_cleanup_tid = 0;
1390 		mutex_exit(&mcip->mci_protect_lock);
1391 		(void) untimeout(tid);
1392 		mutex_enter(&mcip->mci_protect_lock);
1393 	}
1394 }
1395 
1396 /*
1397  * Get the start/end pointers of an L3 packet and also do pullup if needed.
1398  * pulled-up packet needs to be freed by the caller.
1399  */
1400 static int
1401 get_l3_info(mblk_t *mp, size_t hdrsize, uchar_t **start, uchar_t **end,
1402     mblk_t **nmp)
1403 {
1404 	uchar_t	*s, *e;
1405 	mblk_t	*newmp = NULL;
1406 
1407 	/*
1408 	 * Pullup if necessary but reject packets that do not have
1409 	 * a proper mac header.
1410 	 */
1411 	s = mp->b_rptr + hdrsize;
1412 	e = mp->b_wptr;
1413 
1414 	if (s > mp->b_wptr)
1415 		return (EINVAL);
1416 
1417 	if (!OK_32PTR(s) || mp->b_cont != NULL) {
1418 		/*
1419 		 * Temporarily adjust mp->b_rptr to ensure proper
1420 		 * alignment of IP header in newmp.
1421 		 */
1422 		DTRACE_PROBE1(pullup__needed, mblk_t *, mp);
1423 
1424 		mp->b_rptr += hdrsize;
1425 		newmp = msgpullup(mp, -1);
1426 		mp->b_rptr -= hdrsize;
1427 
1428 		if (newmp == NULL)
1429 			return (ENOMEM);
1430 
1431 		s = newmp->b_rptr;
1432 		e = newmp->b_wptr;
1433 	}
1434 
1435 	*start = s;
1436 	*end = e;
1437 	*nmp = newmp;
1438 	return (0);
1439 }
1440 
1441 void
1442 mac_protect_intercept_dhcp_one(mac_client_impl_t *mcip, mblk_t *mp)
1443 {
1444 	mac_impl_t		*mip = mcip->mci_mip;
1445 	uchar_t			*start, *end;
1446 	mblk_t			*nmp = NULL;
1447 	mac_header_info_t	mhi;
1448 	int			err;
1449 
1450 	err = mac_vlan_header_info((mac_handle_t)mip, mp, &mhi);
1451 	if (err != 0) {
1452 		DTRACE_PROBE2(invalid__header, mac_client_impl_t *, mcip,
1453 		    mblk_t *, mp);
1454 		return;
1455 	}
1456 
1457 	err = get_l3_info(mp, mhi.mhi_hdrsize, &start, &end, &nmp);
1458 	if (err != 0) {
1459 		DTRACE_PROBE2(invalid__l3, mac_client_impl_t *, mcip,
1460 		    mblk_t *, mp);
1461 		return;
1462 	}
1463 
1464 	switch (mhi.mhi_bindsap) {
1465 	case ETHERTYPE_IP: {
1466 		ipha_t	*ipha = (ipha_t *)start;
1467 
1468 		if (start + sizeof (ipha_t) > end)
1469 			return;
1470 
1471 		intercept_dhcpv4_inbound(mcip, ipha, end);
1472 		break;
1473 	}
1474 	case ETHERTYPE_IPV6: {
1475 		ip6_t		*ip6h = (ip6_t *)start;
1476 
1477 		if (start + sizeof (ip6_t) > end)
1478 			return;
1479 
1480 		intercept_dhcpv6_inbound(mcip, ip6h, end);
1481 		break;
1482 	}
1483 	}
1484 	freemsg(nmp);
1485 }
1486 
1487 void
1488 mac_protect_intercept_dhcp(mac_client_impl_t *mcip, mblk_t *mp)
1489 {
1490 	/*
1491 	 * Skip checks if we are part of an aggr.
1492 	 */
1493 	if ((mcip->mci_state_flags & MCIS_IS_AGGR_PORT) != 0)
1494 		return;
1495 
1496 	for (; mp != NULL; mp = mp->b_next)
1497 		mac_protect_intercept_dhcp_one(mcip, mp);
1498 }
1499 
1500 void
1501 mac_protect_flush_dhcp(mac_client_impl_t *mcip)
1502 {
1503 	mutex_enter(&mcip->mci_protect_lock);
1504 	flush_dhcpv4(mcip);
1505 	flush_dhcpv6(mcip);
1506 	mutex_exit(&mcip->mci_protect_lock);
1507 }
1508 
1509 void
1510 mac_protect_cancel_timer(mac_client_impl_t *mcip)
1511 {
1512 	mutex_enter(&mcip->mci_protect_lock);
1513 	cancel_txn_cleanup_timer(mcip);
1514 	mutex_exit(&mcip->mci_protect_lock);
1515 }
1516 
1517 /*
1518  * Check if addr is in the 'allowed-ips' list.
1519  */
1520 
1521 /* ARGSUSED */
1522 static boolean_t
1523 ipnospoof_check_v4(mac_client_impl_t *mcip, mac_protect_t *protect,
1524     ipaddr_t *addr)
1525 {
1526 	uint_t	i;
1527 
1528 	/*
1529 	 * The unspecified address is allowed.
1530 	 */
1531 	if (*addr == INADDR_ANY)
1532 		return (B_TRUE);
1533 
1534 	for (i = 0; i < protect->mp_ipaddrcnt; i++) {
1535 		mac_ipaddr_t	*v4addr = &protect->mp_ipaddrs[i];
1536 
1537 		if (v4addr->ip_version == IPV4_VERSION) {
1538 			uint32_t mask;
1539 
1540 			/* LINTED E_SUSPICIOUS_COMPARISON */
1541 			ASSERT(v4addr->ip_netmask >= 0 &&
1542 			    v4addr->ip_netmask <= 32);
1543 			mask = 0xFFFFFFFFu << (32 - v4addr->ip_netmask);
1544 			/*
1545 			 * Since we have a netmask we know this entry
1546 			 * signifies the entire subnet. Check if the
1547 			 * given address is on the subnet.
1548 			 */
1549 			if (htonl(V4_PART_OF_V6(v4addr->ip_addr)) ==
1550 			    (htonl(*addr) & mask))
1551 				return (B_TRUE);
1552 		}
1553 	}
1554 	return (protect->mp_ipaddrcnt == 0 ?
1555 	    check_dhcpv4_dyn_ip(mcip, *addr) : B_FALSE);
1556 }
1557 
1558 static boolean_t
1559 ipnospoof_check_v6(mac_client_impl_t *mcip, mac_protect_t *protect,
1560     in6_addr_t *addr)
1561 {
1562 	uint_t	i;
1563 
1564 	/*
1565 	 * The unspecified address and the v6 link local address are allowed.
1566 	 */
1567 	if (IN6_IS_ADDR_UNSPECIFIED(addr) ||
1568 	    ((mcip->mci_protect_flags & MPT_FLAG_V6_LOCAL_ADDR_SET) != 0 &&
1569 	    IN6_ARE_ADDR_EQUAL(&mcip->mci_v6_local_addr, addr)))
1570 		return (B_TRUE);
1571 
1572 
1573 	for (i = 0; i < protect->mp_ipaddrcnt; i++) {
1574 		mac_ipaddr_t	*v6addr = &protect->mp_ipaddrs[i];
1575 
1576 		if (v6addr->ip_version == IPV6_VERSION &&
1577 		    /* LINTED E_SUSPICIOUS_COMPARISON */
1578 		    IN6_ARE_PREFIXEDADDR_EQUAL(&v6addr->ip_addr, addr,
1579 		    v6addr->ip_netmask))
1580 			return (B_TRUE);
1581 	}
1582 	return (protect->mp_ipaddrcnt == 0 ?
1583 	    check_dhcpv6_dyn_ip(mcip, addr) : B_FALSE);
1584 }
1585 
1586 /*
1587  * Checks various fields within an IPv6 NDP packet.
1588  */
1589 static boolean_t
1590 ipnospoof_check_ndp(mac_client_impl_t *mcip, mac_protect_t *protect,
1591     ip6_t *ip6h, uchar_t *end)
1592 {
1593 	icmp6_t			*icmp_nd = (icmp6_t *)&ip6h[1];
1594 	int			hdrlen, optlen, opttype, len;
1595 	uint_t			addrlen, maclen;
1596 	uint8_t			type;
1597 	nd_opt_hdr_t		*opt;
1598 	struct nd_opt_lla	*lla = NULL;
1599 
1600 	/*
1601 	 * NDP packets do not have extension headers so the ICMPv6 header
1602 	 * must immediately follow the IPv6 header.
1603 	 */
1604 	if (ip6h->ip6_nxt != IPPROTO_ICMPV6)
1605 		return (B_TRUE);
1606 
1607 	/* ICMPv6 header missing */
1608 	if ((uchar_t *)&icmp_nd[1] > end)
1609 		return (B_FALSE);
1610 
1611 	len = end - (uchar_t *)icmp_nd;
1612 	type = icmp_nd->icmp6_type;
1613 
1614 	switch (type) {
1615 	case ND_ROUTER_SOLICIT:
1616 		hdrlen = sizeof (nd_router_solicit_t);
1617 		break;
1618 	case ND_ROUTER_ADVERT:
1619 		hdrlen = sizeof (nd_router_advert_t);
1620 		break;
1621 	case ND_NEIGHBOR_SOLICIT:
1622 		hdrlen = sizeof (nd_neighbor_solicit_t);
1623 		break;
1624 	case ND_NEIGHBOR_ADVERT:
1625 		hdrlen = sizeof (nd_neighbor_advert_t);
1626 		break;
1627 	case ND_REDIRECT:
1628 		hdrlen = sizeof (nd_redirect_t);
1629 		break;
1630 	default:
1631 		return (B_TRUE);
1632 	}
1633 
1634 	if (len < hdrlen)
1635 		return (B_FALSE);
1636 
1637 	/* SLLA option checking is needed for RS/RA/NS */
1638 	opttype = ND_OPT_SOURCE_LINKADDR;
1639 
1640 	switch (type) {
1641 	case ND_NEIGHBOR_ADVERT: {
1642 		nd_neighbor_advert_t	*na = (nd_neighbor_advert_t *)icmp_nd;
1643 
1644 		if (!ipnospoof_check_v6(mcip, protect, &na->nd_na_target)) {
1645 			DTRACE_PROBE2(ndp__na__fail,
1646 			    mac_client_impl_t *, mcip, ip6_t *, ip6h);
1647 			return (B_FALSE);
1648 		}
1649 
1650 		/* TLLA option for NA */
1651 		opttype = ND_OPT_TARGET_LINKADDR;
1652 		break;
1653 	}
1654 	case ND_REDIRECT: {
1655 		/* option checking not needed for RD */
1656 		return (B_TRUE);
1657 	}
1658 	default:
1659 		break;
1660 	}
1661 
1662 	if (len == hdrlen) {
1663 		/* no options, we're done */
1664 		return (B_TRUE);
1665 	}
1666 	opt = (nd_opt_hdr_t *)((uchar_t *)icmp_nd + hdrlen);
1667 	optlen = len - hdrlen;
1668 
1669 	/* find the option header we need */
1670 	while (optlen > sizeof (nd_opt_hdr_t)) {
1671 		if (opt->nd_opt_type == opttype) {
1672 			lla = (struct nd_opt_lla *)opt;
1673 			break;
1674 		}
1675 		optlen -= 8 * opt->nd_opt_len;
1676 		opt = (nd_opt_hdr_t *)
1677 		    ((uchar_t *)opt + 8 * opt->nd_opt_len);
1678 	}
1679 	if (lla == NULL)
1680 		return (B_TRUE);
1681 
1682 	addrlen = lla->nd_opt_lla_len * 8 - sizeof (nd_opt_hdr_t);
1683 	maclen = mcip->mci_mip->mi_info.mi_addr_length;
1684 
1685 	if (addrlen != maclen ||
1686 	    bcmp(mcip->mci_unicast->ma_addr,
1687 	    lla->nd_opt_lla_hdw_addr, maclen) != 0) {
1688 		DTRACE_PROBE2(ndp__lla__fail,
1689 		    mac_client_impl_t *, mcip, ip6_t *, ip6h);
1690 		return (B_FALSE);
1691 	}
1692 
1693 	DTRACE_PROBE2(ndp__lla__ok, mac_client_impl_t *, mcip, ip6_t *, ip6h);
1694 	return (B_TRUE);
1695 }
1696 
1697 /*
1698  * Enforce ip-nospoof protection.
1699  */
1700 static int
1701 ipnospoof_check(mac_client_impl_t *mcip, mac_protect_t *protect,
1702     mblk_t *mp, mac_header_info_t *mhip)
1703 {
1704 	size_t		hdrsize = mhip->mhi_hdrsize;
1705 	uint32_t	sap = mhip->mhi_bindsap;
1706 	uchar_t		*start, *end;
1707 	mblk_t		*nmp = NULL;
1708 	int		err;
1709 
1710 	err = get_l3_info(mp, hdrsize, &start, &end, &nmp);
1711 	if (err != 0) {
1712 		DTRACE_PROBE2(invalid__l3, mac_client_impl_t *, mcip,
1713 		    mblk_t *, mp);
1714 		return (err);
1715 	}
1716 	err = EINVAL;
1717 
1718 	switch (sap) {
1719 	case ETHERTYPE_IP: {
1720 		ipha_t	*ipha = (ipha_t *)start;
1721 
1722 		if (start + sizeof (ipha_t) > end)
1723 			goto fail;
1724 
1725 		if (!ipnospoof_check_v4(mcip, protect, &ipha->ipha_src))
1726 			goto fail;
1727 
1728 		if (!intercept_dhcpv4_outbound(mcip, ipha, end))
1729 			goto fail;
1730 		break;
1731 	}
1732 	case ETHERTYPE_ARP: {
1733 		arh_t		*arh = (arh_t *)start;
1734 		uint32_t	maclen, hlen, plen, arplen;
1735 		ipaddr_t	spaddr;
1736 		uchar_t		*shaddr;
1737 
1738 		if (start + sizeof (arh_t) > end)
1739 			goto fail;
1740 
1741 		maclen = mcip->mci_mip->mi_info.mi_addr_length;
1742 		hlen = arh->arh_hlen;
1743 		plen = arh->arh_plen;
1744 		if ((hlen != 0 && hlen != maclen) ||
1745 		    plen != sizeof (ipaddr_t))
1746 			goto fail;
1747 
1748 		arplen = sizeof (arh_t) + 2 * hlen + 2 * plen;
1749 		if (start + arplen > end)
1750 			goto fail;
1751 
1752 		shaddr = start + sizeof (arh_t);
1753 		if (hlen != 0 &&
1754 		    bcmp(mcip->mci_unicast->ma_addr, shaddr, maclen) != 0)
1755 			goto fail;
1756 
1757 		bcopy(shaddr + hlen, &spaddr, sizeof (spaddr));
1758 		if (!ipnospoof_check_v4(mcip, protect, &spaddr))
1759 			goto fail;
1760 		break;
1761 	}
1762 	case ETHERTYPE_IPV6: {
1763 		ip6_t		*ip6h = (ip6_t *)start;
1764 
1765 		if (start + sizeof (ip6_t) > end)
1766 			goto fail;
1767 
1768 		if (!ipnospoof_check_v6(mcip, protect, &ip6h->ip6_src))
1769 			goto fail;
1770 
1771 		if (!ipnospoof_check_ndp(mcip, protect, ip6h, end))
1772 			goto fail;
1773 
1774 		if (!intercept_dhcpv6_outbound(mcip, ip6h, end))
1775 			goto fail;
1776 		break;
1777 	}
1778 	}
1779 	freemsg(nmp);
1780 	return (0);
1781 
1782 fail:
1783 	freemsg(nmp);
1784 	return (err);
1785 }
1786 
1787 static boolean_t
1788 dhcpnospoof_check_cid(mac_protect_t *p, uchar_t *cid, uint_t cidlen)
1789 {
1790 	int	i;
1791 
1792 	for (i = 0; i < p->mp_cidcnt; i++) {
1793 		mac_dhcpcid_t	*dcid = &p->mp_cids[i];
1794 
1795 		if (dcid->dc_len == cidlen &&
1796 		    bcmp(dcid->dc_id, cid, cidlen) == 0)
1797 			return (B_TRUE);
1798 	}
1799 	return (B_FALSE);
1800 }
1801 
1802 static boolean_t
1803 dhcpnospoof_check_v4(mac_client_impl_t *mcip, mac_protect_t *p,
1804     ipha_t *ipha, uchar_t *end)
1805 {
1806 	struct dhcp	*dh4;
1807 	uchar_t		*cid;
1808 	uint_t		maclen, cidlen = 0;
1809 	uint8_t		optlen;
1810 	int		err;
1811 
1812 	if ((err = get_dhcpv4_info(ipha, end, &dh4)) != 0)
1813 		return (err == EINVAL);
1814 
1815 	maclen = mcip->mci_mip->mi_info.mi_addr_length;
1816 	if (dh4->hlen == maclen &&
1817 	    bcmp(mcip->mci_unicast->ma_addr, dh4->chaddr, maclen) != 0) {
1818 		return (B_FALSE);
1819 	}
1820 	if (get_dhcpv4_option(dh4, end, CD_CLIENT_ID, &cid, &optlen) == 0)
1821 		cidlen = optlen;
1822 
1823 	if (cidlen == 0)
1824 		return (B_TRUE);
1825 
1826 	if (*cid == ARPHRD_ETHER && cidlen - 1 == maclen &&
1827 	    bcmp(mcip->mci_unicast->ma_addr, cid + 1, maclen) == 0)
1828 		return (B_TRUE);
1829 
1830 	return (dhcpnospoof_check_cid(p, cid, cidlen));
1831 }
1832 
1833 static boolean_t
1834 dhcpnospoof_check_v6(mac_client_impl_t *mcip, mac_protect_t *p,
1835     ip6_t *ip6h, uchar_t *end)
1836 {
1837 	dhcpv6_message_t	*dh6;
1838 	dhcpv6_option_t		*d6o;
1839 	uint8_t			mtype;
1840 	uchar_t			*cid, *lladdr = NULL;
1841 	uint_t			cidlen, maclen, addrlen = 0;
1842 	uint16_t		cidtype;
1843 	int			err;
1844 
1845 	if ((err = get_dhcpv6_info(ip6h, end, &dh6)) != 0)
1846 		return (err == EINVAL);
1847 
1848 	/*
1849 	 * We only check client-generated messages.
1850 	 */
1851 	mtype = dh6->d6m_msg_type;
1852 	if (mtype == DHCPV6_MSG_ADVERTISE || mtype == DHCPV6_MSG_REPLY ||
1853 	    mtype == DHCPV6_MSG_RECONFIGURE)
1854 		return (B_TRUE);
1855 
1856 	d6o = get_dhcpv6_option(&dh6[1], end - (uchar_t *)&dh6[1], NULL,
1857 	    DHCPV6_OPT_CLIENTID, &cidlen);
1858 	if (d6o == NULL || (uchar_t *)d6o + cidlen > end)
1859 		return (B_TRUE);
1860 
1861 	cid = (uchar_t *)&d6o[1];
1862 	cidlen -= sizeof (*d6o);
1863 	if (cidlen < sizeof (cidtype))
1864 		return (B_TRUE);
1865 
1866 	bcopy(cid, &cidtype, sizeof (cidtype));
1867 	cidtype = ntohs(cidtype);
1868 	if (cidtype == DHCPV6_DUID_LLT && cidlen >= sizeof (duid_llt_t)) {
1869 		lladdr = cid + sizeof (duid_llt_t);
1870 		addrlen = cidlen - sizeof (duid_llt_t);
1871 	}
1872 	if (cidtype == DHCPV6_DUID_LL && cidlen >= sizeof (duid_ll_t)) {
1873 		lladdr = cid + sizeof (duid_ll_t);
1874 		addrlen = cidlen - sizeof (duid_ll_t);
1875 	}
1876 	maclen = mcip->mci_mip->mi_info.mi_addr_length;
1877 	if (lladdr != NULL && addrlen == maclen &&
1878 	    bcmp(mcip->mci_unicast->ma_addr, lladdr, maclen) == 0) {
1879 		return (B_TRUE);
1880 	}
1881 	return (dhcpnospoof_check_cid(p, cid, cidlen));
1882 }
1883 
1884 /*
1885  * Enforce dhcp-nospoof protection.
1886  */
1887 static int
1888 dhcpnospoof_check(mac_client_impl_t *mcip, mac_protect_t *protect,
1889     mblk_t *mp, mac_header_info_t *mhip)
1890 {
1891 	size_t		hdrsize = mhip->mhi_hdrsize;
1892 	uint32_t	sap = mhip->mhi_bindsap;
1893 	uchar_t		*start, *end;
1894 	mblk_t		*nmp = NULL;
1895 	int		err;
1896 
1897 	err = get_l3_info(mp, hdrsize, &start, &end, &nmp);
1898 	if (err != 0) {
1899 		DTRACE_PROBE2(invalid__l3, mac_client_impl_t *, mcip,
1900 		    mblk_t *, mp);
1901 		return (err);
1902 	}
1903 	err = EINVAL;
1904 
1905 	switch (sap) {
1906 	case ETHERTYPE_IP: {
1907 		ipha_t	*ipha = (ipha_t *)start;
1908 
1909 		if (start + sizeof (ipha_t) > end)
1910 			goto fail;
1911 
1912 		if (!dhcpnospoof_check_v4(mcip, protect, ipha, end))
1913 			goto fail;
1914 
1915 		break;
1916 	}
1917 	case ETHERTYPE_IPV6: {
1918 		ip6_t		*ip6h = (ip6_t *)start;
1919 
1920 		if (start + sizeof (ip6_t) > end)
1921 			goto fail;
1922 
1923 		if (!dhcpnospoof_check_v6(mcip, protect, ip6h, end))
1924 			goto fail;
1925 
1926 		break;
1927 	}
1928 	}
1929 	freemsg(nmp);
1930 	return (0);
1931 
1932 fail:
1933 	/* increment dhcpnospoof stat here */
1934 	freemsg(nmp);
1935 	return (err);
1936 }
1937 
1938 /*
1939  * This needs to be called whenever the mac client's mac address changes.
1940  */
1941 void
1942 mac_protect_update_v6_local_addr(mac_client_impl_t *mcip)
1943 {
1944 	uint8_t		*p, *macaddr = mcip->mci_unicast->ma_addr;
1945 	uint_t		i, media = mcip->mci_mip->mi_info.mi_media;
1946 	in6_addr_t	token, *v6addr = &mcip->mci_v6_local_addr;
1947 	in6_addr_t	ll_template = {(uint32_t)V6_LINKLOCAL, 0x0, 0x0, 0x0};
1948 
1949 
1950 	bzero(&token, sizeof (token));
1951 	p = (uint8_t *)&token.s6_addr32[2];
1952 
1953 	switch (media) {
1954 	case DL_ETHER:
1955 		bcopy(macaddr, p, 3);
1956 		p[0] ^= 0x2;
1957 		p[3] = 0xff;
1958 		p[4] = 0xfe;
1959 		bcopy(macaddr + 3, p + 5, 3);
1960 		break;
1961 	case DL_IB:
1962 		ASSERT(mcip->mci_mip->mi_info.mi_addr_length == 20);
1963 		bcopy(macaddr + 12, p, 8);
1964 		p[0] |= 2;
1965 		break;
1966 	default:
1967 		/*
1968 		 * We do not need to generate the local address for link types
1969 		 * that do not support link protection. Wifi pretends to be
1970 		 * ethernet so it is covered by the DL_ETHER case (note the
1971 		 * use of mi_media instead of mi_nativemedia).
1972 		 */
1973 		return;
1974 	}
1975 
1976 	for (i = 0; i < 4; i++) {
1977 		v6addr->s6_addr32[i] = token.s6_addr32[i] |
1978 		    ll_template.s6_addr32[i];
1979 	}
1980 	mcip->mci_protect_flags |= MPT_FLAG_V6_LOCAL_ADDR_SET;
1981 }
1982 
1983 /*
1984  * Enforce link protection on one packet.
1985  */
1986 static int
1987 mac_protect_check_one(mac_client_impl_t *mcip, mblk_t *mp)
1988 {
1989 	mac_impl_t		*mip = mcip->mci_mip;
1990 	mac_resource_props_t	*mrp = MCIP_RESOURCE_PROPS(mcip);
1991 	mac_protect_t		*protect;
1992 	mac_header_info_t	mhi;
1993 	uint32_t		types;
1994 	int			err;
1995 
1996 	ASSERT(mp->b_next == NULL);
1997 	ASSERT(mrp != NULL);
1998 
1999 	err = mac_vlan_header_info((mac_handle_t)mip, mp, &mhi);
2000 	if (err != 0) {
2001 		DTRACE_PROBE2(invalid__header, mac_client_impl_t *, mcip,
2002 		    mblk_t *, mp);
2003 		return (err);
2004 	}
2005 	protect = &mrp->mrp_protect;
2006 	types = protect->mp_types;
2007 
2008 	if ((types & MPT_MACNOSPOOF) != 0) {
2009 		if (mhi.mhi_saddr != NULL &&
2010 		    bcmp(mcip->mci_unicast->ma_addr, mhi.mhi_saddr,
2011 		    mip->mi_info.mi_addr_length) != 0) {
2012 			BUMP_STAT(mcip, macspoofed);
2013 			DTRACE_PROBE2(mac__nospoof__fail,
2014 			    mac_client_impl_t *, mcip, mblk_t *, mp);
2015 			return (EINVAL);
2016 		}
2017 	}
2018 	if ((types & MPT_RESTRICTED) != 0) {
2019 		uint32_t	vid = VLAN_ID(mhi.mhi_tci);
2020 		uint32_t	sap = mhi.mhi_bindsap;
2021 
2022 		/*
2023 		 * ETHERTYPE_VLAN packets are allowed through, provided that
2024 		 * the vid is not spoofed.
2025 		 */
2026 		if (vid != 0 && !mac_client_check_flow_vid(mcip, vid)) {
2027 			BUMP_STAT(mcip, restricted);
2028 			DTRACE_PROBE2(restricted__vid__invalid,
2029 			    mac_client_impl_t *, mcip, mblk_t *, mp);
2030 			return (EINVAL);
2031 		}
2032 
2033 		if (sap != ETHERTYPE_IP && sap != ETHERTYPE_IPV6 &&
2034 		    sap != ETHERTYPE_ARP) {
2035 			BUMP_STAT(mcip, restricted);
2036 			DTRACE_PROBE2(restricted__fail,
2037 			    mac_client_impl_t *, mcip, mblk_t *, mp);
2038 			return (EINVAL);
2039 		}
2040 	}
2041 	if ((types & MPT_IPNOSPOOF) != 0) {
2042 		if ((err = ipnospoof_check(mcip, protect, mp, &mhi)) != 0) {
2043 			BUMP_STAT(mcip, ipspoofed);
2044 			DTRACE_PROBE2(ip__nospoof__fail,
2045 			    mac_client_impl_t *, mcip, mblk_t *, mp);
2046 			return (err);
2047 		}
2048 	}
2049 	if ((types & MPT_DHCPNOSPOOF) != 0) {
2050 		if ((err = dhcpnospoof_check(mcip, protect, mp, &mhi)) != 0) {
2051 			BUMP_STAT(mcip, dhcpspoofed);
2052 			DTRACE_PROBE2(dhcp__nospoof__fail,
2053 			    mac_client_impl_t *, mcip, mblk_t *, mp);
2054 			return (err);
2055 		}
2056 	}
2057 	return (0);
2058 }
2059 
2060 /*
2061  * Enforce link protection on a packet chain.
2062  * Packets that pass the checks are returned back to the caller.
2063  */
2064 mblk_t *
2065 mac_protect_check(mac_client_handle_t mch, mblk_t *mp)
2066 {
2067 	mac_client_impl_t	*mcip = (mac_client_impl_t *)mch;
2068 	mblk_t			*ret_mp = NULL, **tailp = &ret_mp, *next;
2069 
2070 	/*
2071 	 * Skip checks if we are part of an aggr.
2072 	 */
2073 	if ((mcip->mci_state_flags & MCIS_IS_AGGR_PORT) != 0)
2074 		return (mp);
2075 
2076 	for (; mp != NULL; mp = next) {
2077 		next = mp->b_next;
2078 		mp->b_next = NULL;
2079 
2080 		if (mac_protect_check_one(mcip, mp) == 0) {
2081 			*tailp = mp;
2082 			tailp = &mp->b_next;
2083 		} else {
2084 			freemsg(mp);
2085 		}
2086 	}
2087 	return (ret_mp);
2088 }
2089 
2090 /*
2091  * Check if a particular protection type is enabled.
2092  */
2093 boolean_t
2094 mac_protect_enabled(mac_client_handle_t mch, uint32_t type)
2095 {
2096 	return (MAC_PROTECT_ENABLED((mac_client_impl_t *)mch, type));
2097 }
2098 
2099 static int
2100 validate_ips(mac_protect_t *p)
2101 {
2102 	uint_t		i, j;
2103 
2104 	if (p->mp_ipaddrcnt == MPT_RESET)
2105 		return (0);
2106 
2107 	if (p->mp_ipaddrcnt > MPT_MAXIPADDR)
2108 		return (EINVAL);
2109 
2110 	for (i = 0; i < p->mp_ipaddrcnt; i++) {
2111 		mac_ipaddr_t	*addr = &p->mp_ipaddrs[i];
2112 
2113 		/*
2114 		 * The unspecified address is implicitly allowed so there's no
2115 		 * need to add it to the list. Also, validate that the netmask,
2116 		 * if any, is sane for the specific version of IP. A mask of
2117 		 * some kind is always required.
2118 		 */
2119 		if (addr->ip_netmask == 0)
2120 			return (EINVAL);
2121 
2122 		if (addr->ip_version == IPV4_VERSION) {
2123 			if (V4_PART_OF_V6(addr->ip_addr) == INADDR_ANY)
2124 				return (EINVAL);
2125 			if (addr->ip_netmask > 32)
2126 				return (EINVAL);
2127 		} else if (addr->ip_version == IPV6_VERSION) {
2128 			if (IN6_IS_ADDR_UNSPECIFIED(&addr->ip_addr))
2129 				return (EINVAL);
2130 
2131 			if (IN6_IS_ADDR_V4MAPPED_ANY(&addr->ip_addr))
2132 				return (EINVAL);
2133 
2134 			if (addr->ip_netmask > 128)
2135 				return (EINVAL);
2136 		} else {
2137 			/* invalid ip version */
2138 			return (EINVAL);
2139 		}
2140 
2141 		for (j = 0; j < p->mp_ipaddrcnt; j++) {
2142 			mac_ipaddr_t	*addr1 = &p->mp_ipaddrs[j];
2143 
2144 			if (i == j || addr->ip_version != addr1->ip_version)
2145 				continue;
2146 
2147 			/* found a duplicate */
2148 			if ((addr->ip_version == IPV4_VERSION &&
2149 			    V4_PART_OF_V6(addr->ip_addr) ==
2150 			    V4_PART_OF_V6(addr1->ip_addr)) ||
2151 			    IN6_ARE_ADDR_EQUAL(&addr->ip_addr,
2152 			    &addr1->ip_addr))
2153 				return (EINVAL);
2154 		}
2155 	}
2156 	return (0);
2157 }
2158 
2159 /* ARGSUSED */
2160 static int
2161 validate_cids(mac_protect_t *p)
2162 {
2163 	uint_t		i, j;
2164 
2165 	if (p->mp_cidcnt == MPT_RESET)
2166 		return (0);
2167 
2168 	if (p->mp_cidcnt > MPT_MAXCID)
2169 		return (EINVAL);
2170 
2171 	for (i = 0; i < p->mp_cidcnt; i++) {
2172 		mac_dhcpcid_t	*cid = &p->mp_cids[i];
2173 
2174 		if (cid->dc_len > MPT_MAXCIDLEN ||
2175 		    (cid->dc_form != CIDFORM_TYPED &&
2176 		    cid->dc_form != CIDFORM_HEX &&
2177 		    cid->dc_form != CIDFORM_STR))
2178 			return (EINVAL);
2179 
2180 		for (j = 0; j < p->mp_cidcnt; j++) {
2181 			mac_dhcpcid_t	*cid1 = &p->mp_cids[j];
2182 
2183 			if (i == j || cid->dc_len != cid1->dc_len)
2184 				continue;
2185 
2186 			/* found a duplicate */
2187 			if (bcmp(cid->dc_id, cid1->dc_id, cid->dc_len) == 0)
2188 				return (EINVAL);
2189 		}
2190 	}
2191 	return (0);
2192 }
2193 
2194 /*
2195  * Sanity-checks parameters given by userland.
2196  */
2197 int
2198 mac_protect_validate(mac_resource_props_t *mrp)
2199 {
2200 	mac_protect_t	*p = &mrp->mrp_protect;
2201 	int		err;
2202 
2203 	/* check for invalid types */
2204 	if (p->mp_types != MPT_RESET && (p->mp_types & ~MPT_ALL) != 0)
2205 		return (EINVAL);
2206 
2207 	if ((err = validate_ips(p)) != 0)
2208 		return (err);
2209 
2210 	if ((err = validate_cids(p)) != 0)
2211 		return (err);
2212 
2213 	return (0);
2214 }
2215 
2216 /*
2217  * Enable/disable link protection.
2218  */
2219 int
2220 mac_protect_set(mac_client_handle_t mch, mac_resource_props_t *mrp)
2221 {
2222 	mac_client_impl_t	*mcip = (mac_client_impl_t *)mch;
2223 	mac_impl_t		*mip = mcip->mci_mip;
2224 	uint_t			media = mip->mi_info.mi_nativemedia;
2225 	int			err;
2226 
2227 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
2228 
2229 	/* tunnels are not supported */
2230 	if (media == DL_IPV4 || media == DL_IPV6 || media == DL_6TO4)
2231 		return (ENOTSUP);
2232 
2233 	if ((err = mac_protect_validate(mrp)) != 0)
2234 		return (err);
2235 
2236 	if (err != 0)
2237 		return (err);
2238 
2239 	mac_update_resources(mrp, MCIP_RESOURCE_PROPS(mcip), B_FALSE);
2240 	i_mac_notify(((mcip->mci_state_flags & MCIS_IS_VNIC) != 0 ?
2241 	    mcip->mci_upper_mip : mip), MAC_NOTE_ALLOWED_IPS);
2242 	return (0);
2243 }
2244 
2245 void
2246 mac_protect_update(mac_resource_props_t *new, mac_resource_props_t *curr)
2247 {
2248 	mac_protect_t	*np = &new->mrp_protect;
2249 	mac_protect_t	*cp = &curr->mrp_protect;
2250 	uint32_t	types = np->mp_types;
2251 
2252 	if (types == MPT_RESET) {
2253 		cp->mp_types = 0;
2254 		curr->mrp_mask &= ~MRP_PROTECT;
2255 	} else {
2256 		if (types != 0) {
2257 			cp->mp_types = types;
2258 			curr->mrp_mask |= MRP_PROTECT;
2259 		}
2260 	}
2261 	if (np->mp_ipaddrcnt != 0) {
2262 		if (np->mp_ipaddrcnt <= MPT_MAXIPADDR) {
2263 			bcopy(np->mp_ipaddrs, cp->mp_ipaddrs,
2264 			    sizeof (cp->mp_ipaddrs));
2265 			cp->mp_ipaddrcnt = np->mp_ipaddrcnt;
2266 		} else if (np->mp_ipaddrcnt == MPT_RESET) {
2267 			bzero(cp->mp_ipaddrs, sizeof (cp->mp_ipaddrs));
2268 			cp->mp_ipaddrcnt = 0;
2269 		}
2270 	}
2271 	if (np->mp_cidcnt != 0) {
2272 		if (np->mp_cidcnt <= MPT_MAXCID) {
2273 			bcopy(np->mp_cids, cp->mp_cids, sizeof (cp->mp_cids));
2274 			cp->mp_cidcnt = np->mp_cidcnt;
2275 		} else if (np->mp_cidcnt == MPT_RESET) {
2276 			bzero(cp->mp_cids, sizeof (cp->mp_cids));
2277 			cp->mp_cidcnt = 0;
2278 		}
2279 	}
2280 }
2281 
2282 void
2283 mac_protect_init(mac_client_impl_t *mcip)
2284 {
2285 	mutex_init(&mcip->mci_protect_lock, NULL, MUTEX_DRIVER, NULL);
2286 	mcip->mci_protect_flags = 0;
2287 	mcip->mci_txn_cleanup_tid = 0;
2288 	avl_create(&mcip->mci_v4_pending_txn, compare_dhcpv4_xid,
2289 	    sizeof (dhcpv4_txn_t), offsetof(dhcpv4_txn_t, dt_node));
2290 	avl_create(&mcip->mci_v4_completed_txn, compare_dhcpv4_cid,
2291 	    sizeof (dhcpv4_txn_t), offsetof(dhcpv4_txn_t, dt_node));
2292 	avl_create(&mcip->mci_v4_dyn_ip, compare_dhcpv4_ip,
2293 	    sizeof (dhcpv4_txn_t), offsetof(dhcpv4_txn_t, dt_ipnode));
2294 	avl_create(&mcip->mci_v6_pending_txn, compare_dhcpv6_xid,
2295 	    sizeof (dhcpv6_txn_t), offsetof(dhcpv6_txn_t, dt_node));
2296 	avl_create(&mcip->mci_v6_cid, compare_dhcpv6_cid,
2297 	    sizeof (dhcpv6_cid_t), offsetof(dhcpv6_cid_t, dc_node));
2298 	avl_create(&mcip->mci_v6_dyn_ip, compare_dhcpv6_ip,
2299 	    sizeof (dhcpv6_addr_t), offsetof(dhcpv6_addr_t, da_node));
2300 }
2301 
2302 void
2303 mac_protect_fini(mac_client_impl_t *mcip)
2304 {
2305 	avl_destroy(&mcip->mci_v6_dyn_ip);
2306 	avl_destroy(&mcip->mci_v6_cid);
2307 	avl_destroy(&mcip->mci_v6_pending_txn);
2308 	avl_destroy(&mcip->mci_v4_dyn_ip);
2309 	avl_destroy(&mcip->mci_v4_completed_txn);
2310 	avl_destroy(&mcip->mci_v4_pending_txn);
2311 	mcip->mci_txn_cleanup_tid = 0;
2312 	mcip->mci_protect_flags = 0;
2313 	mutex_destroy(&mcip->mci_protect_lock);
2314 }
2315 
2316 static boolean_t
2317 allowed_ips_set(mac_resource_props_t *mrp, uint32_t af)
2318 {
2319 	int i;
2320 
2321 	for (i = 0; i < mrp->mrp_protect.mp_ipaddrcnt; i++) {
2322 		if (mrp->mrp_protect.mp_ipaddrs[i].ip_version == af)
2323 			return (B_TRUE);
2324 	}
2325 	return (B_FALSE);
2326 }
2327 
2328 mac_protect_t *
2329 mac_protect_get(mac_handle_t mh)
2330 {
2331 	mac_impl_t *mip = (mac_impl_t *)mh;
2332 
2333 	return (&mip->mi_resource_props.mrp_protect);
2334 }
2335