xref: /titanic_44/usr/src/uts/common/inet/ipnet.h (revision 0a0e9771ca0211c15f3ac4466b661c145feeb9e4)
1b127ac41SPhilip Kirk /*
2b127ac41SPhilip Kirk  * CDDL HEADER START
3b127ac41SPhilip Kirk  *
4b127ac41SPhilip Kirk  * The contents of this file are subject to the terms of the
5b127ac41SPhilip Kirk  * Common Development and Distribution License (the "License").
6b127ac41SPhilip Kirk  * You may not use this file except in compliance with the License.
7b127ac41SPhilip Kirk  *
8b127ac41SPhilip Kirk  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9b127ac41SPhilip Kirk  * or http://www.opensolaris.org/os/licensing.
10b127ac41SPhilip Kirk  * See the License for the specific language governing permissions
11b127ac41SPhilip Kirk  * and limitations under the License.
12b127ac41SPhilip Kirk  *
13b127ac41SPhilip Kirk  * When distributing Covered Code, include this CDDL HEADER in each
14b127ac41SPhilip Kirk  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15b127ac41SPhilip Kirk  * If applicable, add the following below this CDDL HEADER, with the
16b127ac41SPhilip Kirk  * fields enclosed by brackets "[]" replaced with your own identifying
17b127ac41SPhilip Kirk  * information: Portions Copyright [yyyy] [name of copyright owner]
18b127ac41SPhilip Kirk  *
19b127ac41SPhilip Kirk  * CDDL HEADER END
20b127ac41SPhilip Kirk  */
21b127ac41SPhilip Kirk 
22b127ac41SPhilip Kirk /*
23*0a0e9771SDarren Reed  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24b127ac41SPhilip Kirk  * Use is subject to license terms.
25b127ac41SPhilip Kirk  */
26b127ac41SPhilip Kirk 
27b127ac41SPhilip Kirk #ifndef _INET_IPNET_H
28b127ac41SPhilip Kirk #define	_INET_IPNET_H
29b127ac41SPhilip Kirk 
30b127ac41SPhilip Kirk #ifdef __cplusplus
31b127ac41SPhilip Kirk extern "C" {
32b127ac41SPhilip Kirk #endif
33b127ac41SPhilip Kirk 
34b127ac41SPhilip Kirk #include <sys/types.h>
35b127ac41SPhilip Kirk #include <sys/netstack.h>
36b127ac41SPhilip Kirk #include <sys/list.h>
37b127ac41SPhilip Kirk #include <netinet/in.h>
38b127ac41SPhilip Kirk #include <net/if.h>
39*0a0e9771SDarren Reed #include <net/bpf.h>
40b127ac41SPhilip Kirk #include <sys/avl.h>
41b127ac41SPhilip Kirk #include <sys/neti.h>
42*0a0e9771SDarren Reed #include <sys/hook_event.h>
43*0a0e9771SDarren Reed #include <sys/zone.h>
44*0a0e9771SDarren Reed #include <sys/kstat.h>
45*0a0e9771SDarren Reed 
46*0a0e9771SDarren Reed typedef struct ipnet_kstats_s	{
47*0a0e9771SDarren Reed 	kstat_named_t	ik_duplicationFail;
48*0a0e9771SDarren Reed 	kstat_named_t	ik_dispatchOk;
49*0a0e9771SDarren Reed 	kstat_named_t	ik_dispatchFail;
50*0a0e9771SDarren Reed 	kstat_named_t	ik_dispatchHeaderDrop;
51*0a0e9771SDarren Reed 	kstat_named_t	ik_dispatchDupDrop;
52*0a0e9771SDarren Reed 	kstat_named_t	ik_dispatchPutDrop;
53*0a0e9771SDarren Reed 	kstat_named_t	ik_dispatchDeliver;
54*0a0e9771SDarren Reed 	kstat_named_t	ik_acceptOk;
55*0a0e9771SDarren Reed 	kstat_named_t	ik_acceptFail;
56*0a0e9771SDarren Reed } ipnet_kstats_t;
57*0a0e9771SDarren Reed 
58*0a0e9771SDarren Reed #define	IPSK_BUMP(_x, _y)	(_x)->ips_stats._y.value.ui64++
59b127ac41SPhilip Kirk 
60b127ac41SPhilip Kirk /*
61b127ac41SPhilip Kirk  * Structure used to hold information for both IPv4 and IPv6 addresses.
62*0a0e9771SDarren Reed  *
63*0a0e9771SDarren Reed  * When ifa_shared is non-NULL, it points to a "fake" ipnetif_t structure
64*0a0e9771SDarren Reed  * that represents the network interface for each zone that shares its
65*0a0e9771SDarren Reed  * network stack. This is used by BPF to build a list of interface names
66*0a0e9771SDarren Reed  * present in each zone. Multiple ipnetif_addr_t's may point to a single
67*0a0e9771SDarren Reed  * ipnetif_t using ifa_shared. The typical case is the global zone has
68*0a0e9771SDarren Reed  * a bge0 that other zones use as bge0:1, bge0:2, etc. In ipnet, the
69*0a0e9771SDarren Reed  * ipnetif_addr_t's that store the IP address for bge0:1, etc, would
70*0a0e9771SDarren Reed  * point to an ipnetif_t stored in the if_avl_by_shared tree that has
71*0a0e9771SDarren Reed  * the name "bge0".
72b127ac41SPhilip Kirk  */
73b127ac41SPhilip Kirk typedef struct ipnetif_addr {
74b127ac41SPhilip Kirk 	union {
75b127ac41SPhilip Kirk 		ipaddr_t	ifau_ip4addr;
76b127ac41SPhilip Kirk 		in6_addr_t	ifau_ip6addr;
77b127ac41SPhilip Kirk 	} ifa_addr;
78b127ac41SPhilip Kirk 	ipaddr_t	ifa_brdaddr;
79b127ac41SPhilip Kirk 	zoneid_t	ifa_zone;
80b127ac41SPhilip Kirk 	uint64_t	ifa_id;
81b127ac41SPhilip Kirk 	list_node_t	ifa_link;
82*0a0e9771SDarren Reed 	struct ipnetif	*ifa_shared;
83b127ac41SPhilip Kirk } ipnetif_addr_t;
84b127ac41SPhilip Kirk #define	ifa_ip4addr	ifa_addr.ifau_ip4addr
85b127ac41SPhilip Kirk #define	ifa_ip6addr	ifa_addr.ifau_ip6addr
86b127ac41SPhilip Kirk 
87b127ac41SPhilip Kirk /*
88b127ac41SPhilip Kirk  * Structure describes the ipnet module representation of an ip interface.
89b127ac41SPhilip Kirk  * The structure holds both IPv4 and IPv6 addresses, the address lists are
90b127ac41SPhilip Kirk  * protected by a mutex. The ipnetif structures are held per stack instance
91b127ac41SPhilip Kirk  * within avl trees indexed on name and ip index.
92*0a0e9771SDarren Reed  *
93*0a0e9771SDarren Reed  * if_avl_by_shared is used by zones that share their instance of IP with
94*0a0e9771SDarren Reed  * other zones. It is used to store ipnetif_t structures. An example of this
95*0a0e9771SDarren Reed  * is the global zone sharing its instance of IP with other local zones.
96*0a0e9771SDarren Reed  * In this case, if_avl_by_shared is a tree of names that are in active use
97*0a0e9771SDarren Reed  * by zones using a shared instance of IP.
98*0a0e9771SDarren Reed  * The value in if_sharecnt represents the number of ipnetif_addr_t's that
99*0a0e9771SDarren Reed  * point to it.
100b127ac41SPhilip Kirk  */
101b127ac41SPhilip Kirk typedef struct ipnetif {
102b127ac41SPhilip Kirk 	char		if_name[LIFNAMSIZ];
103b127ac41SPhilip Kirk 	uint_t		if_flags;
104*0a0e9771SDarren Reed 	uint_t		if_index;
105b127ac41SPhilip Kirk 	kmutex_t	if_addr_lock;	/* protects both addr lists */
106b127ac41SPhilip Kirk 	list_t		if_ip4addr_list;
107b127ac41SPhilip Kirk 	list_t		if_ip6addr_list;
108b127ac41SPhilip Kirk 	avl_node_t	if_avl_by_index;
109b127ac41SPhilip Kirk 	avl_node_t	if_avl_by_name;
110b127ac41SPhilip Kirk 	dev_t		if_dev;
111b127ac41SPhilip Kirk 	uint_t		if_multicnt;	/* protected by ips_event_lock */
112b127ac41SPhilip Kirk 	kmutex_t	if_reflock;	/* protects if_refcnt */
113*0a0e9771SDarren Reed 	int		if_refcnt;	/* if_reflock */
114*0a0e9771SDarren Reed 	zoneid_t	if_zoneid;
115*0a0e9771SDarren Reed 	avl_node_t	if_avl_by_shared;	/* protected by ips_avl_lock */
116*0a0e9771SDarren Reed 	struct ipnet_stack *if_stackp;
117*0a0e9771SDarren Reed 	int		if_sharecnt;	/* protected by if_reflock */
118b127ac41SPhilip Kirk } ipnetif_t;
119b127ac41SPhilip Kirk 
120b127ac41SPhilip Kirk /* if_flags */
121b127ac41SPhilip Kirk #define	IPNETIF_IPV4PLUMBED	0x01
122b127ac41SPhilip Kirk #define	IPNETIF_IPV6PLUMBED	0x02
123b127ac41SPhilip Kirk #define	IPNETIF_IPV4ALLMULTI	0x04
124b127ac41SPhilip Kirk #define	IPNETIF_IPV6ALLMULTI	0x08
125*0a0e9771SDarren Reed #define	IPNETIF_LOOPBACK	0x10
126b127ac41SPhilip Kirk 
127b127ac41SPhilip Kirk /*
128b127ac41SPhilip Kirk  * Structure used by the accept callback function.  This is simply an address
129b127ac41SPhilip Kirk  * pointer into a packet (either IPv4 or IPv6), along with an address family
130b127ac41SPhilip Kirk  * that denotes which pointer is valid.
131b127ac41SPhilip Kirk  */
132b127ac41SPhilip Kirk typedef struct ipnet_addrp {
133b127ac41SPhilip Kirk 	sa_family_t	iap_family;
134b127ac41SPhilip Kirk 	union {
135b127ac41SPhilip Kirk 		ipaddr_t	*iapu_addr4;
136b127ac41SPhilip Kirk 		in6_addr_t	*iapu_addr6;
137b127ac41SPhilip Kirk 	} iap_addrp;
138b127ac41SPhilip Kirk } ipnet_addrp_t;
139b127ac41SPhilip Kirk #define	iap_addr4	iap_addrp.iapu_addr4
140b127ac41SPhilip Kirk #define	iap_addr6	iap_addrp.iapu_addr6
141b127ac41SPhilip Kirk 
142b127ac41SPhilip Kirk struct ipnet;
143b127ac41SPhilip Kirk struct ipobs_hook_data;
144*0a0e9771SDarren Reed typedef boolean_t ipnet_acceptfn_t(struct ipnet *, struct hook_pkt_observe_s *,
145b127ac41SPhilip Kirk     ipnet_addrp_t *, ipnet_addrp_t *);
146b127ac41SPhilip Kirk 
147b127ac41SPhilip Kirk /*
148b127ac41SPhilip Kirk  * Per instance data for all open streams. Instance data is held on a
149b127ac41SPhilip Kirk  * per netstack list see struct ipnet_stack below.
150b127ac41SPhilip Kirk  */
151b127ac41SPhilip Kirk typedef struct ipnet {
152b127ac41SPhilip Kirk 	queue_t		*ipnet_rq;	/* read queue pointer */
153b127ac41SPhilip Kirk 	minor_t		ipnet_minor;	/* minor number for this instance */
154b127ac41SPhilip Kirk 	ipnetif_t	*ipnet_if;	/* ipnetif for this open instance */
155b127ac41SPhilip Kirk 	zoneid_t	ipnet_zoneid;	/* zoneid the device was opened in */
156*0a0e9771SDarren Reed 	uint_t		ipnet_flags;	/* see below */
157*0a0e9771SDarren Reed 	t_scalar_t	ipnet_family;	/* protocol family of this instance */
158b127ac41SPhilip Kirk 	t_uscalar_t	ipnet_dlstate;	/* dlpi state */
159b127ac41SPhilip Kirk 	list_node_t	ipnet_next;	/* list next member */
160b127ac41SPhilip Kirk 	netstack_t	*ipnet_ns;	/* netstack of zone we were opened in */
161b127ac41SPhilip Kirk 	ipnet_acceptfn_t *ipnet_acceptfn; /* accept callback function pointer */
162*0a0e9771SDarren Reed 	hook_t		*ipnet_hook;	/* hook token to unregister */
163*0a0e9771SDarren Reed 	void		*ipnet_data;	/* value to pass back to bpf_itap */
164b127ac41SPhilip Kirk } ipnet_t;
165b127ac41SPhilip Kirk 
166b127ac41SPhilip Kirk /* ipnet_flags */
167b127ac41SPhilip Kirk #define	IPNET_PROMISC_PHYS	0x01
168b127ac41SPhilip Kirk #define	IPNET_PROMISC_MULTI	0x02
169b127ac41SPhilip Kirk #define	IPNET_PROMISC_SAP	0x04
170b127ac41SPhilip Kirk #define	IPNET_INFO		0x08
171b127ac41SPhilip Kirk #define	IPNET_LOMODE		0x10
172b127ac41SPhilip Kirk 
173b127ac41SPhilip Kirk /*
174b127ac41SPhilip Kirk  * Per-netstack data holding:
175b127ac41SPhilip Kirk  * - net_handle_t references for IPv4 and IPv6 for this netstack.
176b127ac41SPhilip Kirk  * - avl trees by name and index for ip interfaces associated with this
177b127ac41SPhilip Kirk  *   netstack. The trees are protected by ips_avl_lock.
178b127ac41SPhilip Kirk  * - ips_str_list is a list of open client streams.  ips_walkers_lock in
179b127ac41SPhilip Kirk  *   conjunction with ips_walkers_cv and ips_walkers_cnt synchronize access to
180b127ac41SPhilip Kirk  *   the list.  The count is incremented in ipnet_dispatch() at the start of a
181b127ac41SPhilip Kirk  *   walk and decremented when the walk is finished. If the walkers count is 0
182b127ac41SPhilip Kirk  *   then we cv_broadcast() waiting any threads waiting on the walkers count.
183b127ac41SPhilip Kirk  * - ips_event_lock synchronizes ipnet_if_init() and incoming NIC info events.
184b127ac41SPhilip Kirk  *   We cannot be processing any NIC info events while initializing interfaces
185b127ac41SPhilip Kirk  *   in ipnet_if_init().
186b127ac41SPhilip Kirk  *
187b127ac41SPhilip Kirk  * Note on lock ordering: If a thread needs to both hold the ips_event_lock
188b127ac41SPhilip Kirk  * and any other lock such as ips_walkers_lock, ips_avl_lock, or if_addr_lock,
189b127ac41SPhilip Kirk  * the ips_event_lock must be held first.  This lock ordering is mandated by
190b127ac41SPhilip Kirk  * ipnet_nicevent_cb() which must always grab ips_event_lock before continuing
191b127ac41SPhilip Kirk  * with processing NIC events.
192b127ac41SPhilip Kirk  */
193b127ac41SPhilip Kirk typedef struct ipnet_stack {
194b127ac41SPhilip Kirk 	net_handle_t	ips_ndv4;
195b127ac41SPhilip Kirk 	net_handle_t	ips_ndv6;
196b127ac41SPhilip Kirk 	netstack_t	*ips_netstack;
197b127ac41SPhilip Kirk 	hook_t		*ips_nicevents;
198b127ac41SPhilip Kirk 	kmutex_t	ips_event_lock;
199b127ac41SPhilip Kirk 	kmutex_t	ips_avl_lock;
200b127ac41SPhilip Kirk 	avl_tree_t	ips_avl_by_index;
201b127ac41SPhilip Kirk 	avl_tree_t	ips_avl_by_name;
202b127ac41SPhilip Kirk 	kmutex_t	ips_walkers_lock;
203b127ac41SPhilip Kirk 	kcondvar_t	ips_walkers_cv;
204b127ac41SPhilip Kirk 	uint_t		ips_walkers_cnt;
205b127ac41SPhilip Kirk 	list_t		ips_str_list;
206*0a0e9771SDarren Reed 	kstat_t		*ips_kstatp;
207*0a0e9771SDarren Reed 	ipnet_kstats_t	ips_stats;
208*0a0e9771SDarren Reed 	bpf_attach_fn_t	ips_bpfattach_fn;
209*0a0e9771SDarren Reed 	bpf_detach_fn_t	ips_bpfdetach_fn;
210*0a0e9771SDarren Reed 	avl_tree_t	ips_avl_by_shared;
211*0a0e9771SDarren Reed 	hook_t		*ips_hook;
212b127ac41SPhilip Kirk } ipnet_stack_t;
213b127ac41SPhilip Kirk 
214b127ac41SPhilip Kirk /*
215b127ac41SPhilip Kirk  * Template for dl_info_ack_t initialization.  We don't have an address, so we
216b127ac41SPhilip Kirk  * set the address length to just the SAP length (16 bits).  We don't really
217b127ac41SPhilip Kirk  * have a maximum SDU, but setting it to UINT_MAX proved problematic with
218b127ac41SPhilip Kirk  * applications that performed arithmetic on dl_max_sdu and wrapped around, so
219b127ac41SPhilip Kirk  * we sleaze out and use INT_MAX.
220b127ac41SPhilip Kirk  */
221b127ac41SPhilip Kirk #define	IPNET_INFO_ACK_INIT {						\
222b127ac41SPhilip Kirk 	DL_INFO_ACK,			/* dl_primitive */		\
223b127ac41SPhilip Kirk 	INT_MAX,			/* dl_max_sdu */		\
224b127ac41SPhilip Kirk 	0,				/* dl_min_sdu */		\
225b127ac41SPhilip Kirk 	sizeof (uint16_t),		/* dl_addr_length */ 		\
226b127ac41SPhilip Kirk 	DL_IPNET,			/* dl_mac_type */		\
227b127ac41SPhilip Kirk 	0,				/* dl_reserved */		\
228b127ac41SPhilip Kirk 	0,				/* dl_current_state */		\
229b127ac41SPhilip Kirk 	sizeof (uint16_t),		/* dl_sap_length */ 		\
230b127ac41SPhilip Kirk 	DL_CLDLS,			/* dl_service_mode */		\
231b127ac41SPhilip Kirk 	0,				/* dl_qos_length */		\
232b127ac41SPhilip Kirk 	0,				/* dl_qos_offset */		\
233b127ac41SPhilip Kirk 	0,				/* dl_range_length */		\
234b127ac41SPhilip Kirk 	0,				/* dl_range_offset */		\
235b127ac41SPhilip Kirk 	DL_STYLE1,			/* dl_provider_style */		\
236b127ac41SPhilip Kirk 	0,				/* dl_addr_offset */		\
237b127ac41SPhilip Kirk 	DL_VERSION_2,			/* dl_version */		\
238b127ac41SPhilip Kirk 	0,				/* dl_brdcst_addr_length */	\
239b127ac41SPhilip Kirk 	0				/* dl_brdcst_addr_offset */	\
240b127ac41SPhilip Kirk }
241b127ac41SPhilip Kirk 
242b127ac41SPhilip Kirk typedef void ipnet_walkfunc_t(const char *, void *, dev_t);
243*0a0e9771SDarren Reed 
244*0a0e9771SDarren Reed extern int	ipnet_client_open(ipnetif_t *, ipnetif_t **);
245*0a0e9771SDarren Reed extern void	ipnet_client_close(ipnetif_t *);
246*0a0e9771SDarren Reed extern void	ipnet_close_byhandle(ipnetif_t *);
247*0a0e9771SDarren Reed extern int	ipnet_get_linkid_byname(const char *, datalink_id_t *,
248*0a0e9771SDarren Reed     zoneid_t);
249b127ac41SPhilip Kirk extern dev_t	ipnet_if_getdev(char *, zoneid_t);
250*0a0e9771SDarren Reed extern const char *ipnet_name(ipnetif_t *);
251*0a0e9771SDarren Reed extern int	ipnet_open_byname(const char *, ipnetif_t **, zoneid_t);
252*0a0e9771SDarren Reed extern int	ipnet_promisc_add(void *, uint_t, void *, uintptr_t *, int);
253*0a0e9771SDarren Reed extern void	ipnet_promisc_remove(void *);
254*0a0e9771SDarren Reed extern void	ipnet_set_bpfattach(bpf_attach_fn_t, bpf_detach_fn_t,
255*0a0e9771SDarren Reed     zoneid_t, bpf_itap_fn_t, bpf_provider_reg_fn_t);
256*0a0e9771SDarren Reed extern void	ipnet_walk_if(ipnet_walkfunc_t *, void *, zoneid_t);
257*0a0e9771SDarren Reed 
258*0a0e9771SDarren Reed extern bpf_provider_t	bpf_ipnet;
259b127ac41SPhilip Kirk 
260b127ac41SPhilip Kirk #ifdef __cplusplus
261b127ac41SPhilip Kirk }
262b127ac41SPhilip Kirk #endif
263b127ac41SPhilip Kirk 
264b127ac41SPhilip Kirk #endif /* _INET_IPNET_H */
265