xref: /illumos-gate/usr/src/uts/common/inet/ip/ip6_asp.c (revision c432de9c6e1189ea0aa9b0fe1c35c18427653f27)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Copyright 2017 Sebastian Wiedenroth
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/socket.h>
29 #include <sys/ksynch.h>
30 #include <sys/kmem.h>
31 #include <sys/errno.h>
32 #include <sys/systm.h>
33 #include <sys/sysmacros.h>
34 #include <sys/cmn_err.h>
35 #include <sys/strsun.h>
36 #include <sys/zone.h>
37 #include <netinet/in.h>
38 #include <inet/common.h>
39 #include <inet/ip.h>
40 #include <inet/ip6.h>
41 #include <inet/ip6_asp.h>
42 #include <inet/ip_ire.h>
43 #include <inet/ip_if.h>
44 #include <inet/ipclassifier.h>
45 
46 #define	IN6ADDR_MASK128_INIT \
47 	{ 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }
48 #define	IN6ADDR_MASK96_INIT	{ 0xffffffffU, 0xffffffffU, 0xffffffffU, 0 }
49 #define	IN6ADDR_MASK32_INIT	{ 0xffffffffU, 0, 0, 0 }
50 #ifdef _BIG_ENDIAN
51 #define	IN6ADDR_MASK16_INIT	{ 0xffff0000U, 0, 0, 0 }
52 #define	IN6ADDR_MASK10_INIT	{ 0xffc00000U, 0, 0, 0 }
53 #define	IN6ADDR_MASK7_INIT	{ 0xfe000000U, 0, 0, 0 }
54 #else
55 #define	IN6ADDR_MASK16_INIT	{ 0x0000ffffU, 0, 0, 0 }
56 #define	IN6ADDR_MASK10_INIT	{ 0x0000c0ffU, 0, 0, 0 }
57 #define	IN6ADDR_MASK7_INIT	{ 0x000000feU, 0, 0, 0 }
58 #endif
59 
60 /*
61  * This table is ordered such that longest prefix matches are hit first
62  * (longer prefix lengths first).  The last entry must be the "default"
63  * entry (::0/0).
64  */
65 static ip6_asp_t default_ip6_asp_table[] = {
66 	{ IN6ADDR_LOOPBACK_INIT,	IN6ADDR_MASK128_INIT,
67 	    "Loopback", 50 },
68 	{ IN6ADDR_ANY_INIT,		IN6ADDR_MASK96_INIT,
69 	    "IPv4_Compatible", 1 },
70 #ifdef _BIG_ENDIAN
71 	{ { 0, 0, 0x0000ffffU, 0 },	IN6ADDR_MASK96_INIT,
72 	    "IPv4", 35 },
73 	{ { 0x20010000U, 0, 0, 0 },	IN6ADDR_MASK32_INIT,
74 	    "Teredo", 5 },
75 	{ { 0x20020000U, 0, 0, 0 },	IN6ADDR_MASK16_INIT,
76 	    "6to4", 30 },
77 	{ { 0x3ffe0000U, 0, 0, 0 },	IN6ADDR_MASK16_INIT,
78 	    "6bone", 1 },
79 	{ { 0xfec00000U, 0, 0, 0 },	IN6ADDR_MASK10_INIT,
80 	    "Site_Local", 1 },
81 	{ { 0xfc000000U, 0, 0, 0 },	IN6ADDR_MASK7_INIT,
82 	    "ULA", 3 },
83 #else
84 	{ { 0, 0, 0xffff0000U, 0 },	IN6ADDR_MASK96_INIT,
85 	    "IPv4", 35 },
86 	{ { 0x00000120U, 0, 0, 0 },	IN6ADDR_MASK32_INIT,
87 	    "Teredo", 5 },
88 	{ { 0x00000220U, 0, 0, 0 },	IN6ADDR_MASK16_INIT,
89 	    "6to4", 30 },
90 	{ { 0x0000fe3fU, 0, 0, 0 },	IN6ADDR_MASK16_INIT,
91 	    "6bone", 1 },
92 	{ { 0x0000c0feU, 0, 0, 0 },	IN6ADDR_MASK10_INIT,
93 	    "Site_Local", 1 },
94 	{ { 0x000000fcU, 0, 0, 0 },	IN6ADDR_MASK7_INIT,
95 	    "ULA", 3 },
96 #endif
97 	{ IN6ADDR_ANY_INIT,		IN6ADDR_ANY_INIT,
98 	    "Default", 40 }
99 };
100 
101 /*
102  * The IPv6 Default Address Selection policy table.
103  * Until someone up above reconfigures the policy table, use the global
104  * default.  The table needs no lock since the only way to alter it is
105  * through the SIOCSIP6ADDRPOLICY which is exclusive in ip.
106  */
107 static void ip6_asp_copy(ip6_asp_t *, ip6_asp_t *, uint_t);
108 static void ip6_asp_check_for_updates(ip_stack_t *);
109 
110 void
111 ip6_asp_init(ip_stack_t *ipst)
112 {
113 	/* Initialize the table lock */
114 	mutex_init(&ipst->ips_ip6_asp_lock, NULL, MUTEX_DEFAULT, NULL);
115 
116 	ipst->ips_ip6_asp_table = default_ip6_asp_table;
117 
118 	ipst->ips_ip6_asp_table_count =
119 	    sizeof (default_ip6_asp_table) / sizeof (ip6_asp_t);
120 }
121 
122 void
123 ip6_asp_free(ip_stack_t *ipst)
124 {
125 	if (ipst->ips_ip6_asp_table != default_ip6_asp_table) {
126 		kmem_free(ipst->ips_ip6_asp_table,
127 		    ipst->ips_ip6_asp_table_count * sizeof (ip6_asp_t));
128 		ipst->ips_ip6_asp_table = NULL;
129 	}
130 	mutex_destroy(&ipst->ips_ip6_asp_lock);
131 }
132 
133 /*
134  * Return false if the table is being updated. Else, increment the ref
135  * count and return true.
136  */
137 boolean_t
138 ip6_asp_can_lookup(ip_stack_t *ipst)
139 {
140 	mutex_enter(&ipst->ips_ip6_asp_lock);
141 	if (ipst->ips_ip6_asp_uip) {
142 		mutex_exit(&ipst->ips_ip6_asp_lock);
143 		return (B_FALSE);
144 	}
145 	IP6_ASP_TABLE_REFHOLD(ipst);
146 	mutex_exit(&ipst->ips_ip6_asp_lock);
147 	return (B_TRUE);
148 
149 }
150 
151 void
152 ip6_asp_pending_op(queue_t *q, mblk_t *mp, aspfunc_t func)
153 {
154 	conn_t	*connp = Q_TO_CONN(q);
155 	ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
156 
157 	ASSERT((mp->b_prev == NULL) && (mp->b_queue == NULL) &&
158 	    (mp->b_next == NULL));
159 	mp->b_queue = (void *)q;
160 	mp->b_prev = (void *)func;
161 	mp->b_next = NULL;
162 
163 	mutex_enter(&ipst->ips_ip6_asp_lock);
164 	if (ipst->ips_ip6_asp_pending_ops == NULL) {
165 		ASSERT(ipst->ips_ip6_asp_pending_ops_tail == NULL);
166 		ipst->ips_ip6_asp_pending_ops =
167 		    ipst->ips_ip6_asp_pending_ops_tail = mp;
168 	} else {
169 		ipst->ips_ip6_asp_pending_ops_tail->b_next = mp;
170 		ipst->ips_ip6_asp_pending_ops_tail = mp;
171 	}
172 	mutex_exit(&ipst->ips_ip6_asp_lock);
173 }
174 
175 static void
176 ip6_asp_complete_op(ip_stack_t *ipst)
177 {
178 	mblk_t		*mp;
179 	queue_t		*q;
180 	aspfunc_t	func;
181 
182 	mutex_enter(&ipst->ips_ip6_asp_lock);
183 	while (ipst->ips_ip6_asp_pending_ops != NULL) {
184 		mp = ipst->ips_ip6_asp_pending_ops;
185 		ipst->ips_ip6_asp_pending_ops = mp->b_next;
186 		mp->b_next = NULL;
187 		if (ipst->ips_ip6_asp_pending_ops == NULL)
188 			ipst->ips_ip6_asp_pending_ops_tail = NULL;
189 		mutex_exit(&ipst->ips_ip6_asp_lock);
190 
191 		q = (queue_t *)mp->b_queue;
192 		func = (aspfunc_t)mp->b_prev;
193 
194 		mp->b_prev = NULL;
195 		mp->b_queue = NULL;
196 
197 
198 		(*func)(NULL, q, mp, NULL);
199 		mutex_enter(&ipst->ips_ip6_asp_lock);
200 	}
201 	mutex_exit(&ipst->ips_ip6_asp_lock);
202 }
203 
204 /*
205  * Decrement reference count. When it gets to 0, we check for (pending)
206  * saved update to the table, if any.
207  */
208 void
209 ip6_asp_table_refrele(ip_stack_t *ipst)
210 {
211 	IP6_ASP_TABLE_REFRELE(ipst);
212 }
213 
214 /*
215  * This function is guaranteed never to return a NULL pointer.  It
216  * will always return information from one of the entries in the
217  * asp_table (which will never be empty).  If a pointer is passed
218  * in for the precedence, the precedence value will be set; a
219  * pointer to the label will be returned by the function.
220  *
221  * Since the table is only anticipated to have about 10 entries
222  * total, the lookup algorithm hasn't been optimized to anything
223  * better than O(n).
224  */
225 char *
226 ip6_asp_lookup(const in6_addr_t *addr, uint32_t *precedence, ip_stack_t *ipst)
227 {
228 	ip6_asp_t *aspp;
229 	ip6_asp_t *match = NULL;
230 	ip6_asp_t *default_policy;
231 
232 	aspp = ipst->ips_ip6_asp_table;
233 	/* The default entry must always be the last one */
234 	default_policy = aspp + ipst->ips_ip6_asp_table_count - 1;
235 
236 	while (match == NULL) {
237 		if (aspp == default_policy) {
238 			match = aspp;
239 		} else {
240 			if (V6_MASK_EQ(*addr, aspp->ip6_asp_mask,
241 			    aspp->ip6_asp_prefix))
242 				match = aspp;
243 			else
244 				aspp++;
245 		}
246 	}
247 
248 	if (precedence != NULL)
249 		*precedence = match->ip6_asp_precedence;
250 	return (match->ip6_asp_label);
251 }
252 
253 /*
254  * If we had deferred updating the table because of outstanding references,
255  * do it now. Note, we don't do error checking on the queued IOCTL mblk, since
256  * ip_sioctl_ip6addrpolicy() has already done it for us.
257  */
258 void
259 ip6_asp_check_for_updates(ip_stack_t *ipst)
260 {
261 	ip6_asp_t *table;
262 	size_t	table_size;
263 	mblk_t	*data_mp, *mp;
264 	struct iocblk *iocp;
265 
266 	mutex_enter(&ipst->ips_ip6_asp_lock);
267 	if (ipst->ips_ip6_asp_pending_update == NULL ||
268 	    ipst->ips_ip6_asp_refcnt > 0) {
269 		mutex_exit(&ipst->ips_ip6_asp_lock);
270 		return;
271 	}
272 
273 	mp = ipst->ips_ip6_asp_pending_update;
274 	ipst->ips_ip6_asp_pending_update = NULL;
275 	ASSERT(mp->b_prev != NULL);
276 
277 	ipst->ips_ip6_asp_uip = B_TRUE;
278 
279 	iocp = (struct iocblk *)mp->b_rptr;
280 	data_mp = mp->b_cont;
281 	if (data_mp == NULL) {
282 		table = NULL;
283 		table_size = iocp->ioc_count;
284 	} else {
285 		table = (ip6_asp_t *)data_mp->b_rptr;
286 		table_size = iocp->ioc_count;
287 	}
288 
289 	ip6_asp_replace(mp, table, table_size, B_TRUE, ipst,
290 	    iocp->ioc_flag & IOC_MODELS);
291 }
292 
293 /*
294  * ip6_asp_replace replaces the contents of the IPv6 address selection
295  * policy table with those specified in new_table.  If new_table is NULL,
296  * this indicates that the caller wishes ip to use the default policy
297  * table.  The caller is responsible for making sure that there are exactly
298  * new_count policy entries in new_table.
299  */
300 /*ARGSUSED5*/
301 void
302 ip6_asp_replace(mblk_t *mp, ip6_asp_t *new_table, size_t new_size,
303     boolean_t locked, ip_stack_t *ipst, model_t datamodel)
304 {
305 	int			ret_val = 0;
306 	ip6_asp_t		*tmp_table;
307 	uint_t			count;
308 	queue_t			*q;
309 	struct iocblk		*iocp;
310 #if defined(_SYSCALL32_IMPL) && _LONG_LONG_ALIGNMENT_32 == 4
311 	size_t ip6_asp_size = SIZEOF_STRUCT(ip6_asp, datamodel);
312 #else
313 	const size_t ip6_asp_size = sizeof (ip6_asp_t);
314 #endif
315 
316 	if (new_size % ip6_asp_size != 0) {
317 		ip1dbg(("ip6_asp_replace: invalid table size\n"));
318 		ret_val = EINVAL;
319 		if (locked)
320 			goto unlock_end;
321 		goto replace_end;
322 	} else {
323 		count = new_size / ip6_asp_size;
324 	}
325 
326 
327 	if (!locked)
328 		mutex_enter(&ipst->ips_ip6_asp_lock);
329 	/*
330 	 * Check if we are in the process of creating any IRE using the
331 	 * current information. If so, wait till that is done.
332 	 */
333 	if (!locked && ipst->ips_ip6_asp_refcnt > 0) {
334 		/* Save this request for later processing */
335 		if (ipst->ips_ip6_asp_pending_update == NULL) {
336 			ipst->ips_ip6_asp_pending_update = mp;
337 		} else {
338 			/* Let's not queue multiple requests for now */
339 			ip1dbg(("ip6_asp_replace: discarding request\n"));
340 			mutex_exit(&ipst->ips_ip6_asp_lock);
341 			ret_val =  EAGAIN;
342 			goto replace_end;
343 		}
344 		mutex_exit(&ipst->ips_ip6_asp_lock);
345 		return;
346 	}
347 
348 	/* Prevent lookups till the table have been updated */
349 	if (!locked)
350 		ipst->ips_ip6_asp_uip = B_TRUE;
351 
352 	ASSERT(ipst->ips_ip6_asp_refcnt == 0);
353 
354 	if (new_table == NULL) {
355 		/*
356 		 * This is a special case.  The user wants to revert
357 		 * back to using the default table.
358 		 */
359 		if (ipst->ips_ip6_asp_table == default_ip6_asp_table)
360 			goto unlock_end;
361 
362 		kmem_free(ipst->ips_ip6_asp_table,
363 		    ipst->ips_ip6_asp_table_count * sizeof (ip6_asp_t));
364 		ipst->ips_ip6_asp_table = default_ip6_asp_table;
365 		ipst->ips_ip6_asp_table_count =
366 		    sizeof (default_ip6_asp_table) / sizeof (ip6_asp_t);
367 		goto unlock_end;
368 	}
369 
370 	if (count == 0) {
371 		ret_val = EINVAL;
372 		ip1dbg(("ip6_asp_replace: empty table\n"));
373 		goto unlock_end;
374 	}
375 
376 	if ((tmp_table = kmem_alloc(count * sizeof (ip6_asp_t), KM_NOSLEEP)) ==
377 	    NULL) {
378 		ret_val = ENOMEM;
379 		goto unlock_end;
380 	}
381 
382 #if defined(_SYSCALL32_IMPL) && _LONG_LONG_ALIGNMENT_32 == 4
383 
384 	/*
385 	 * If 'new_table' -actually- originates from a 32-bit process
386 	 * then the nicely aligned ip6_asp_label array will be
387 	 * subtlely misaligned on this kernel, because the structure
388 	 * is 8 byte aligned in the kernel, but only 4 byte aligned in
389 	 * userland.  Fix it up here.
390 	 *
391 	 * XX64	See the notes in ip_sioctl_ip6addrpolicy.  Perhaps we could
392 	 *	do the datamodel transformation (below) there instead of here?
393 	 */
394 	if (datamodel == IOC_ILP32) {
395 		ip6_asp_t *dst;
396 		ip6_asp32_t *src;
397 		int i;
398 
399 		if ((dst = kmem_zalloc(count * sizeof (*dst),
400 		    KM_NOSLEEP)) == NULL) {
401 			kmem_free(tmp_table, count * sizeof (ip6_asp_t));
402 			ret_val = ENOMEM;
403 			goto unlock_end;
404 		}
405 
406 		/*
407 		 * Copy each element of the table from ip6_asp32_t
408 		 * format into ip6_asp_t format.  Fortunately, since
409 		 * we're just dealing with a trailing structure pad,
410 		 * we can do this straightforwardly with a flurry of
411 		 * bcopying.
412 		 */
413 		src = (void *)new_table;
414 		for (i = 0; i < count; i++)
415 			bcopy(src + i, dst + i, sizeof (*src));
416 
417 		ip6_asp_copy(dst, tmp_table, count);
418 		kmem_free(dst, count * sizeof (*dst));
419 	} else
420 #endif
421 		ip6_asp_copy(new_table, tmp_table, count);
422 
423 	/* Make sure the last entry is the default entry */
424 	if (!IN6_IS_ADDR_UNSPECIFIED(&tmp_table[count - 1].ip6_asp_prefix) ||
425 	    !IN6_IS_ADDR_UNSPECIFIED(&tmp_table[count - 1].ip6_asp_mask)) {
426 		ret_val = EINVAL;
427 		kmem_free(tmp_table, count * sizeof (ip6_asp_t));
428 		ip1dbg(("ip6_asp_replace: bad table: no default entry\n"));
429 		goto unlock_end;
430 	}
431 	if (ipst->ips_ip6_asp_table != default_ip6_asp_table) {
432 		kmem_free(ipst->ips_ip6_asp_table,
433 		    ipst->ips_ip6_asp_table_count * sizeof (ip6_asp_t));
434 	}
435 	ipst->ips_ip6_asp_table = tmp_table;
436 	ipst->ips_ip6_asp_table_count = count;
437 
438 unlock_end:
439 	ipst->ips_ip6_asp_uip = B_FALSE;
440 	mutex_exit(&ipst->ips_ip6_asp_lock);
441 
442 	/* Let conn_ixa caching know that source address selection changed */
443 	ip_update_source_selection(ipst);
444 
445 replace_end:
446 	/* Reply to the ioctl */
447 	q = (queue_t *)mp->b_prev;
448 	mp->b_prev = NULL;
449 	if (q == NULL) {
450 		freemsg(mp);
451 		goto check_binds;
452 	}
453 	iocp = (struct iocblk *)mp->b_rptr;
454 	iocp->ioc_error = ret_val;
455 	iocp->ioc_count = 0;
456 	DB_TYPE(mp) = (iocp->ioc_error == 0) ? M_IOCACK : M_IOCNAK;
457 	qreply(q, mp);
458 check_binds:
459 	ip6_asp_complete_op(ipst);
460 }
461 
462 /*
463  * Copies the contents of src_table to dst_table, and sorts the
464  * entries in decending order of prefix lengths.  It assumes that both
465  * tables are appropriately sized to contain count entries.
466  */
467 static void
468 ip6_asp_copy(ip6_asp_t *src_table, ip6_asp_t *dst_table, uint_t count)
469 {
470 	ip6_asp_t *src_ptr, *src_limit, *dst_ptr, *dst_limit, *dp;
471 
472 	dst_table[0] = src_table[0];
473 	if (count == 1)
474 		return;
475 
476 	/*
477 	 * Sort the entries in descending order of prefix lengths.
478 	 *
479 	 * Note: this should be a small table.  In 99% of cases, we
480 	 * expect the table to have 9 entries.  In the remaining 1%
481 	 * of cases, we expect the table to have one or two more
482 	 * entries.
483 	 */
484 	src_limit = src_table + count;
485 	dst_limit = dst_table + 1;
486 	for (src_ptr = src_table + 1; src_ptr != src_limit;
487 	    src_ptr++, dst_limit++) {
488 		for (dst_ptr = dst_table; dst_ptr < dst_limit; dst_ptr++) {
489 			if (ip_mask_to_plen_v6(&src_ptr->ip6_asp_mask) >
490 			    ip_mask_to_plen_v6(&dst_ptr->ip6_asp_mask)) {
491 				/*
492 				 * Make room to insert the source entry
493 				 * before dst_ptr by shifting entries to
494 				 * the right.
495 				 */
496 				for (dp = dst_limit - 1; dp >= dst_ptr; dp--)
497 					*(dp + 1) = *dp;
498 				break;
499 			}
500 		}
501 		*dst_ptr = *src_ptr;
502 	}
503 }
504 
505 /*
506  * This function copies as many entries from ip6_asp_table as will fit
507  * into dtable.  The dtable_size parameter is the size of dtable
508  * in bytes.  This function returns the number of entries in
509  * ip6_asp_table, even if it's not able to fit all of the entries into
510  * dtable.
511  */
512 int
513 ip6_asp_get(ip6_asp_t *dtable, size_t dtable_size, ip_stack_t *ipst)
514 {
515 	uint_t dtable_count;
516 
517 	if (dtable != NULL) {
518 		if (dtable_size < sizeof (ip6_asp_t))
519 			return (-1);
520 
521 		dtable_count = dtable_size / sizeof (ip6_asp_t);
522 		bcopy(ipst->ips_ip6_asp_table, dtable,
523 		    MIN(ipst->ips_ip6_asp_table_count, dtable_count) *
524 		    sizeof (ip6_asp_t));
525 	}
526 
527 	return (ipst->ips_ip6_asp_table_count);
528 }
529 
530 /*
531  * Compare two labels.  Return B_TRUE if they are equal, B_FALSE
532  * otherwise.
533  */
534 boolean_t
535 ip6_asp_labelcmp(const char *label1, const char *label2)
536 {
537 	int64_t *llptr1, *llptr2;
538 
539 	/*
540 	 * The common case, the two labels are actually the same string
541 	 * from the policy table.
542 	 */
543 	if (label1 == label2)
544 		return (B_TRUE);
545 
546 	/*
547 	 * Since we know the labels are at most 16 bytes long, compare
548 	 * the two strings as two 8-byte long integers.  The ip6_asp_t
549 	 * structure guarantees that the labels are 8 byte alligned.
550 	 */
551 	llptr1 = (int64_t *)label1;
552 	llptr2 = (int64_t *)label2;
553 	if (llptr1[0] == llptr2[0] && llptr1[1] == llptr2[1])
554 		return (B_TRUE);
555 	return (B_FALSE);
556 }
557