xref: /illumos-gate/usr/src/uts/common/sys/idm/idm_impl.h (revision a98e9e2e16f7c0305e8538246f8f8464517b3884)
1  /*
2   * CDDL HEADER START
3   *
4   * The contents of this file are subject to the terms of the
5   * Common Development and Distribution License (the "License").
6   * You may not use this file except in compliance with the License.
7   *
8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9   * or http://www.opensolaris.org/os/licensing.
10   * See the License for the specific language governing permissions
11   * and limitations under the License.
12   *
13   * When distributing Covered Code, include this CDDL HEADER in each
14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15   * If applicable, add the following below this CDDL HEADER, with the
16   * fields enclosed by brackets "[]" replaced with your own identifying
17   * information: Portions Copyright [yyyy] [name of copyright owner]
18   *
19   * CDDL HEADER END
20   */
21  /*
22   * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
23   */
24  /*
25   * Copyright 2014-2015 Nexenta Systems, Inc.  All rights reserved.
26   */
27  
28  #ifndef	_IDM_IMPL_H_
29  #define	_IDM_IMPL_H_
30  
31  #ifdef	__cplusplus
32  extern "C" {
33  #endif
34  
35  #include <sys/avl.h>
36  #include <sys/socket_impl.h>
37  #include <sys/taskq_impl.h>
38  
39  /*
40   * IDM lock order:
41   *
42   * idm_taskid_table_lock, idm_task_t.idt_mutex
43   */
44  
45  #define	CF_LOGIN_READY		0x00000001
46  #define	CF_INITIAL_LOGIN	0x00000002
47  #define	CF_ERROR		0x80000000
48  
49  typedef enum {
50  	CONN_TYPE_INI = 1,
51  	CONN_TYPE_TGT
52  } idm_conn_type_t;
53  
54  /*
55   * Watchdog interval in seconds
56   */
57  #define	IDM_WD_INTERVAL			5
58  
59  /*
60   * Timeout period before the client "keepalive" callback is invoked in
61   * seconds if the connection is idle.
62   */
63  #define	IDM_TRANSPORT_KEEPALIVE_IDLE_TIMEOUT	20
64  
65  /*
66   * Timeout period before a TRANSPORT_FAIL event is generated in seconds
67   * if the connection is idle.
68   */
69  #define	IDM_TRANSPORT_FAIL_IDLE_TIMEOUT	30
70  
71  /*
72   * IDM reference count structure.  Audit code is shamelessly adapted
73   * from CIFS server.
74   */
75  
76  #define	REFCNT_AUDIT_STACK_DEPTH	16
77  #define	REFCNT_AUDIT_BUF_MAX_REC	16
78  
79  typedef struct {
80  	uint32_t		anr_refcnt;
81  	int			anr_depth;
82  	pc_t			anr_stack[REFCNT_AUDIT_STACK_DEPTH];
83  } refcnt_audit_record_t;
84  
85  typedef struct {
86  	int			anb_index;
87  	int			anb_max_index;
88  	refcnt_audit_record_t	anb_records[REFCNT_AUDIT_BUF_MAX_REC];
89  } refcnt_audit_buf_t;
90  
91  #define	REFCNT_AUDIT(_rf_) {				\
92  	refcnt_audit_record_t	*anr;			\
93  							\
94  	anr = (_rf_)->ir_audit_buf.anb_records;		\
95  	anr += (_rf_)->ir_audit_buf.anb_index;		\
96  	(_rf_)->ir_audit_buf.anb_index++;		\
97  	(_rf_)->ir_audit_buf.anb_index &=		\
98  	    (_rf_)->ir_audit_buf.anb_max_index;		\
99  	anr->anr_refcnt = (_rf_)->ir_refcnt;		\
100  	anr->anr_depth = getpcstack(anr->anr_stack,	\
101  	    REFCNT_AUDIT_STACK_DEPTH);			\
102  }
103  
104  struct idm_refcnt_s;
105  
106  typedef void (idm_refcnt_cb_t)(void *ref_obj);
107  
108  typedef enum {
109  	REF_NOWAIT,
110  	REF_WAIT_SYNC,
111  	REF_WAIT_ASYNC
112  } idm_refcnt_wait_t;
113  
114  typedef struct idm_refcnt_s {
115  	int			ir_refcnt;
116  	void			*ir_referenced_obj;
117  	idm_refcnt_wait_t	ir_waiting;
118  	kmutex_t		ir_mutex;
119  	kcondvar_t		ir_cv;
120  	idm_refcnt_cb_t		*ir_cb;
121  	refcnt_audit_buf_t	ir_audit_buf;
122  } idm_refcnt_t;
123  
124  /*
125   * connection parameters - These parameters would be populated at
126   * connection create, or during key-value negotiation at login
127   */
128  typedef struct idm_conn_params_s {
129  	uint32_t		max_recv_dataseglen;
130  	uint32_t		max_xmit_dataseglen;
131  	uint32_t		conn_login_max;
132  	uint32_t		conn_login_interval;
133  	boolean_t		nonblock_socket;
134  } idm_conn_param_t;
135  
136  typedef struct idm_svc_s {
137  	list_node_t		is_list_node;
138  	kmutex_t		is_mutex;
139  	kcondvar_t		is_cv;
140  	kmutex_t		is_count_mutex;
141  	kcondvar_t		is_count_cv;
142  	idm_refcnt_t		is_refcnt;
143  	int			is_online;
144  	/* transport-specific service components */
145  	void			*is_so_svc;
146  	void			*is_iser_svc;
147  	idm_svc_req_t		is_svc_req;
148  } idm_svc_t;
149  
150  #define	ISCSI_MAX_TSIH_LEN	6	/* 0x%04x */
151  #define	ISCSI_MAX_ISID_LEN	ISCSI_ISID_LEN * 2
152  
153  typedef struct idm_conn_s {
154  	list_node_t		ic_list_node;
155  	void			*ic_handle;
156  	idm_refcnt_t		ic_refcnt;
157  	idm_svc_t		*ic_svc_binding; /* Target conn. only */
158  	idm_sockaddr_t		ic_ini_dst_addr;
159  	struct sockaddr_storage	ic_laddr;	/* conn local address */
160  	struct sockaddr_storage	ic_raddr;	/* conn remote address */
161  
162  	/*
163  	 * the target_name, initiator_name, initiator session
164  	 * identifier and target session identifying handle
165  	 * are only used for target connections.
166  	 */
167  	char			ic_target_name[ISCSI_MAX_NAME_LEN + 1];
168  	char			ic_initiator_name[ISCSI_MAX_NAME_LEN + 1];
169  	char			ic_tsih[ISCSI_MAX_TSIH_LEN + 1];
170  	char			ic_isid[ISCSI_MAX_ISID_LEN + 1];
171  	idm_conn_state_t	ic_state;
172  	idm_conn_state_t	ic_last_state;
173  	sm_audit_buf_t		ic_state_audit;
174  	kmutex_t		ic_state_mutex;
175  	kcondvar_t		ic_state_cv;
176  	uint32_t		ic_state_flags;
177  	timeout_id_t		ic_state_timeout;
178  	struct idm_conn_s	*ic_reinstate_conn; /* For conn reinst. */
179  	struct idm_conn_s	*ic_logout_conn; /* For other conn logout */
180  	taskq_t			*ic_state_taskq;
181  	int			ic_pdu_events;
182  	boolean_t		ic_login_info_valid;
183  	boolean_t		ic_rdma_extensions;
184  	uint16_t		ic_login_cid;
185  
186  	kmutex_t		ic_mutex;
187  	kcondvar_t		ic_cv;
188  	idm_status_t		ic_conn_sm_status;
189  
190  	boolean_t		ic_ffp;
191  	boolean_t		ic_keepalive;
192  	uint32_t		ic_internal_cid;
193  
194  	uint32_t		ic_conn_flags;
195  	idm_conn_type_t		ic_conn_type;
196  	idm_conn_ops_t		ic_conn_ops;
197  	idm_transport_ops_t	*ic_transport_ops;
198  	idm_transport_type_t	ic_transport_type;
199  	int			ic_transport_hdrlen;
200  	void			*ic_transport_private;
201  	idm_conn_param_t	ic_conn_params;
202  	/*
203  	 * Save client callback to interpose idm callback
204  	 */
205  	idm_pdu_cb_t		*ic_client_callback;
206  	clock_t			ic_timestamp;
207  } idm_conn_t;
208  
209  #define	IDM_CONN_HEADER_DIGEST	0x00000001
210  #define	IDM_CONN_DATA_DIGEST	0x00000002
211  #define	IDM_CONN_USE_SCOREBOARD	0x00000004
212  
213  #define	IDM_CONN_ISINI(ICI_IC)	((ICI_IC)->ic_conn_type == CONN_TYPE_INI)
214  #define	IDM_CONN_ISTGT(ICI_IC)	((ICI_IC)->ic_conn_type == CONN_TYPE_TGT)
215  
216  /*
217   * An IDM target task can transfer data using multiple buffers. The task
218   * will maintain a list of buffers, and each buffer will contain the relative
219   * offset of the transfer and a pointer to the next buffer in the list.
220   *
221   * Note on client private data:
222   * idt_private is intended to be a pointer to some sort of client-
223   * specific state.
224   *
225   * idt_client_handle is a more generic client-private piece of data that can
226   * be used by the client for the express purpose of task lookup.  The driving
227   * use case for this is for the client to store the initiator task tag for
228   * a given task so that it may be more easily retrieved for task management.
229   *
230   * The key take away here is that clients should never call
231   * idm_task_find_by_handle in the performance path.
232   *
233   * An initiator will require only one buffer per task, the offset will be 0.
234   */
235  
236  typedef struct idm_task_s {
237  	idm_conn_t		*idt_ic;	/* Associated connection */
238  	/* connection type is in idt_ic->ic_conn_type */
239  	kmutex_t		idt_mutex;
240  	void			*idt_private;	/* Client private data */
241  	uintptr_t		idt_client_handle;	/* Client private */
242  	uint32_t		idt_tt;		/* Task tag */
243  	uint32_t		idt_r2t_ttt;	/* R2T Target Task tag */
244  	idm_task_state_t	idt_state;
245  	idm_refcnt_t		idt_refcnt;
246  
247  	/*
248  	 * Statistics
249  	 */
250  	int			idt_tx_to_ini_start;
251  	int			idt_tx_to_ini_done;
252  	int			idt_rx_from_ini_start;
253  	int			idt_rx_from_ini_done;
254  	int			idt_tx_bytes;	/* IDM_CONN_USE_SCOREBOARD */
255  	int			idt_rx_bytes;	/* IDM_CONN_USE_SCOREBOARD */
256  
257  	uint32_t		idt_exp_datasn;	/* expected datasn */
258  	uint32_t		idt_exp_rttsn;	/* expected rttsn */
259  	list_t			idt_inbufv;	/* chunks of IN buffers */
260  	list_t			idt_outbufv;	/* chunks of OUT buffers */
261  
262  	/*
263  	 * Transport header, which describes this tasks remote tagged buffer
264  	 */
265  	int			idt_transport_hdrlen;
266  	void			*idt_transport_hdr;
267  	uint32_t		idt_flags;	/* phase collapse */
268  } idm_task_t;
269  
270  int idm_task_constructor(void *task_void, void *arg, int flags);
271  void idm_task_destructor(void *task_void, void *arg);
272  
273  #define	IDM_TASKIDS_MAX		16384
274  #define	IDM_BUF_MAGIC		0x49425546	/* "IBUF" */
275  
276  #define	IDM_TASK_PHASECOLLAPSE_REQ	0x00000001 /* request phase collapse */
277  #define	IDM_TASK_PHASECOLLAPSE_SUCCESS	0x00000002 /* phase collapse success */
278  
279  /* Protect with task mutex */
280  typedef struct idm_buf_s {
281  	uint32_t	idb_magic;	/* "IBUF" */
282  
283  	/*
284  	 * Note: idm_tx_link *must* be the second element in the list for
285  	 * proper TX PDU ordering.
286  	 */
287  	list_node_t	idm_tx_link;	/* link in a list of TX objects */
288  
289  	list_node_t	idb_buflink;	/* link in a multi-buffer data xfer */
290  	idm_conn_t	*idb_ic;	/* Associated connection */
291  	void		*idb_buf;	/* data */
292  	uint64_t	idb_buflen;	/* length of buffer */
293  	size_t		idb_bufoffset;	/* offset in a multi-buffer xfer */
294  	boolean_t	idb_bufalloc;  /* true if alloc'd in idm_buf_alloc */
295  	/*
296  	 * DataPDUInOrder=Yes, so to track that the PDUs in a sequence are sent
297  	 * in continuously increasing address order, check that offsets for a
298  	 * single buffer xfer are in order.
299  	 */
300  	uint32_t	idb_exp_offset;
301  	size_t		idb_xfer_len;	/* Current requested xfer len */
302  	void		*idb_buf_private; /* transport-specific buf handle */
303  	void		*idb_reg_private; /* transport-specific reg handle */
304  	void		*idb_bufptr; /* transport-specific bcopy pointer */
305  	boolean_t	idb_bufbcopy;	/* true if bcopy required */
306  
307  	idm_buf_cb_t	*idb_buf_cb;	/* Data Completion Notify, tgt only */
308  	void		*idb_cb_arg;	/* Client private data */
309  	idm_task_t	*idb_task_binding;
310  	timespec_t	idb_xfer_start;
311  	timespec_t	idb_xfer_done;
312  	boolean_t	idb_in_transport;
313  	boolean_t	idb_tx_thread;		/* Sockets only */
314  	iscsi_hdr_t	idb_data_hdr_tmpl;	/* Sockets only */
315  	idm_status_t	idb_status;
316  } idm_buf_t;
317  
318  typedef enum {
319  	BP_CHECK_QUICK,
320  	BP_CHECK_THOROUGH,
321  	BP_CHECK_ASSERT
322  } idm_bufpat_check_type_t;
323  
324  #define	BUFPAT_MATCH(bc_bufpat, bc_idb)			\
325  	((bufpat->bufpat_idb == bc_idb) &&		\
326  	    (bufpat->bufpat_bufmagic == IDM_BUF_MAGIC))
327  
328  typedef struct idm_bufpat_s {
329  	void		*bufpat_idb;
330  	uint32_t	bufpat_bufmagic;
331  	uint32_t	bufpat_offset;
332  } idm_bufpat_t;
333  
334  #define	PDU_MAX_IOVLEN	12
335  #define	IDM_PDU_MAGIC	0x49504455	/* "IPDU" */
336  
337  typedef struct idm_pdu_s {
338  	uint32_t	isp_magic;	/* "IPDU" */
339  
340  	/*
341  	 * Internal - Order is vital.  idm_tx_link *must* be the second
342  	 * element in this structure for proper TX PDU ordering.
343  	 */
344  	list_node_t	idm_tx_link;
345  
346  	list_node_t	isp_client_lnd;
347  
348  	idm_conn_t	*isp_ic;	/* Must be set */
349  	iscsi_hdr_t	*isp_hdr;
350  	uint_t		isp_hdrlen;
351  	uint8_t		*isp_data;
352  	uint_t		isp_datalen;
353  
354  	/* Transport header */
355  	void		*isp_transport_hdr;
356  	uint32_t	isp_transport_hdrlen;
357  	void		*isp_transport_private;
358  
359  	/*
360  	 * isp_data is used for sending SCSI status, NOP, text, scsi and
361  	 * non-scsi data. Data is received using isp_iov and isp_iovlen
362  	 * to support data over multiple buffers.
363  	 */
364  	void		*isp_private;
365  	idm_pdu_cb_t	*isp_callback;
366  	idm_status_t	isp_status;
367  
368  	/*
369  	 * The following four elements are only used in
370  	 * idm_sorecv_scsidata() currently.
371  	 */
372  	struct iovec	isp_iov[PDU_MAX_IOVLEN];
373  	int		isp_iovlen;
374  	idm_buf_t	*isp_sorx_buf;
375  
376  	/* Implementation data for idm_pdu_alloc and sorx PDU cache */
377  	uint32_t	isp_flags;
378  	uint_t		isp_hdrbuflen;
379  	uint_t		isp_databuflen;
380  	hrtime_t	isp_queue_time;
381  
382  	/* Taskq dispatching state for deferred PDU */
383  	taskq_ent_t	isp_tqent;
384  } idm_pdu_t;
385  
386  /*
387   * This "generic" object is used when removing an item from the ic_tx_list
388   * in order to determine whether it's an idm_pdu_t or an idm_buf_t
389   */
390  
391  typedef struct {
392  	uint32_t	idm_tx_obj_magic;
393  	/*
394  	 * idm_tx_link *must* be the second element in this structure.
395  	 */
396  	list_node_t	idm_tx_link;
397  } idm_tx_obj_t;
398  
399  
400  #define	IDM_PDU_OPCODE(PDU) \
401  	((PDU)->isp_hdr->opcode & ISCSI_OPCODE_MASK)
402  
403  #define	IDM_PDU_ALLOC		0x00000001
404  #define	IDM_PDU_ADDL_HDR	0x00000002
405  #define	IDM_PDU_ADDL_DATA	0x00000004
406  #define	IDM_PDU_LOGIN_TX	0x00000008
407  #define	IDM_PDU_SET_STATSN	0x00000010
408  #define	IDM_PDU_ADVANCE_STATSN	0x00000020
409  
410  #define	OSD_EXT_CDB_AHSLEN	(200 - 15)
411  #define	BIDI_AHS_LENGTH		5
412  /*
413   * Additional Header Segment (AHS)
414   * AHS is only valid for SCSI Requests and contains SCSI CDB information
415   * which doesn't fit in the standard 16 byte area of the PDU. Commonly
416   * this only holds true for OSD device commands.
417   *
418   * IDM_SORX_CACHE_ASHLEN is the amount of memory which is preallocated in bytes.
419   * When used in the header the AHS length is stored as the number of 4-byte
420   * words; so IDM_SORX_WIRE_ASHLEN is IDM_SORX_CACHE_ASHLEN in words.
421   */
422  #define	IDM_SORX_CACHE_AHSLEN \
423  	((OSD_EXT_CDB_AHSLEN + 3) + (BIDI_AHS_LENGTH + 3))
424  #define	IDM_SORX_WIRE_AHSLEN (IDM_SORX_CACHE_AHSLEN / sizeof (uint32_t))
425  #define	IDM_SORX_CACHE_HDRLEN	(sizeof (iscsi_hdr_t) + IDM_SORX_CACHE_AHSLEN)
426  
427  /*
428   * ID pool
429   */
430  
431  #define	IDM_IDPOOL_MAGIC	0x4944504C	/* IDPL */
432  #define	IDM_IDPOOL_MIN_SIZE	64	/* Number of IDs to begin with */
433  #define	IDM_IDPOOL_MAX_SIZE	64 * 1024
434  
435  typedef struct idm_idpool {
436  	uint32_t	id_magic;
437  	kmutex_t	id_mutex;
438  	uint8_t		*id_pool;
439  	uint32_t	id_size;
440  	uint8_t		id_bit;
441  	uint8_t		id_bit_idx;
442  	uint32_t	id_idx;
443  	uint32_t	id_idx_msk;
444  	uint32_t	id_free_counter;
445  	uint32_t	id_max_free_counter;
446  } idm_idpool_t;
447  
448  /*
449   * Global IDM state structure
450   */
451  typedef struct {
452  	kmutex_t	idm_global_mutex;
453  	taskq_t		*idm_global_taskq;
454  	kthread_t	*idm_wd_thread;
455  	kt_did_t	idm_wd_thread_did;
456  	boolean_t	idm_wd_thread_running;
457  	kcondvar_t	idm_wd_cv;
458  	list_t		idm_tgt_svc_list;
459  	kcondvar_t	idm_tgt_svc_cv;
460  	list_t		idm_tgt_conn_list;
461  	int		idm_tgt_conn_count;
462  	list_t		idm_ini_conn_list;
463  	kmem_cache_t	*idm_buf_cache;
464  	kmem_cache_t	*idm_task_cache;
465  	krwlock_t	idm_taskid_table_lock;
466  	idm_task_t	**idm_taskid_table;
467  	uint32_t	idm_taskid_next;
468  	uint32_t	idm_taskid_max;
469  	idm_idpool_t	idm_conn_id_pool;
470  	kmem_cache_t	*idm_sotx_pdu_cache;
471  	kmem_cache_t	*idm_sorx_pdu_cache;
472  	kmem_cache_t	*idm_so_128k_buf_cache;
473  } idm_global_t;
474  
475  extern idm_global_t	idm; /* Global state */
476  
477  int
478  idm_idpool_create(idm_idpool_t	*pool);
479  
480  void
481  idm_idpool_destroy(idm_idpool_t *pool);
482  
483  int
484  idm_idpool_alloc(idm_idpool_t *pool, uint16_t *id);
485  
486  void
487  idm_idpool_free(idm_idpool_t *pool, uint16_t id);
488  
489  void
490  idm_pdu_rx(idm_conn_t *ic, idm_pdu_t *pdu);
491  
492  void
493  idm_pdu_tx_forward(idm_conn_t *ic, idm_pdu_t *pdu);
494  
495  boolean_t
496  idm_pdu_rx_forward_ffp(idm_conn_t *ic, idm_pdu_t *pdu);
497  
498  void
499  idm_pdu_rx_forward(idm_conn_t *ic, idm_pdu_t *pdu);
500  
501  void
502  idm_pdu_tx_protocol_error(idm_conn_t *ic, idm_pdu_t *pdu);
503  
504  void
505  idm_pdu_rx_protocol_error(idm_conn_t *ic, idm_pdu_t *pdu);
506  
507  void idm_parse_login_rsp(idm_conn_t *ic, idm_pdu_t *logout_req_pdu,
508      boolean_t rx);
509  
510  void idm_parse_logout_req(idm_conn_t *ic, idm_pdu_t *logout_req_pdu,
511      boolean_t rx);
512  
513  void idm_parse_logout_rsp(idm_conn_t *ic, idm_pdu_t *login_rsp_pdu,
514      boolean_t rx);
515  
516  idm_status_t idm_svc_conn_create(idm_svc_t *is, idm_transport_type_t type,
517      idm_conn_t **ic_result);
518  
519  void idm_svc_conn_destroy(idm_conn_t *ic);
520  
521  idm_status_t idm_ini_conn_finish(idm_conn_t *ic);
522  
523  idm_status_t idm_tgt_conn_finish(idm_conn_t *ic);
524  
525  idm_conn_t *idm_conn_create_common(idm_conn_type_t conn_type,
526      idm_transport_type_t tt, idm_conn_ops_t *conn_ops);
527  
528  void idm_conn_destroy_common(idm_conn_t *ic);
529  
530  void idm_conn_close(idm_conn_t *ic);
531  
532  uint32_t idm_cid_alloc(void);
533  
534  void idm_cid_free(uint32_t cid);
535  
536  uint32_t idm_crc32c(void *address, unsigned long length);
537  
538  uint32_t idm_crc32c_continued(void *address, unsigned long length,
539      uint32_t crc);
540  
541  void idm_listbuf_insert(list_t *lst, idm_buf_t *buf);
542  
543  idm_conn_t *idm_lookup_conn(uint8_t *isid, uint16_t tsih, uint16_t cid);
544  
545  #ifdef	__cplusplus
546  }
547  #endif
548  
549  #endif /* _IDM_IMPL_H_ */
550