1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 /* 25 * Copyright 2014 Nexenta Systems, Inc. All rights reserved. 26 */ 27 28 #ifndef _IDM_IMPL_H_ 29 #define _IDM_IMPL_H_ 30 31 #ifdef __cplusplus 32 extern "C" { 33 #endif 34 35 #include <sys/avl.h> 36 #include <sys/socket_impl.h> 37 #include <sys/taskq_impl.h> 38 39 /* 40 * IDM lock order: 41 * 42 * idm_taskid_table_lock, idm_task_t.idt_mutex 43 */ 44 45 #define CF_LOGIN_READY 0x00000001 46 #define CF_INITIAL_LOGIN 0x00000002 47 #define CF_ERROR 0x80000000 48 49 typedef enum { 50 CONN_TYPE_INI = 1, 51 CONN_TYPE_TGT 52 } idm_conn_type_t; 53 54 /* 55 * Watchdog interval in seconds 56 */ 57 #define IDM_WD_INTERVAL 5 58 59 /* 60 * Timeout period before the client "keepalive" callback is invoked in 61 * seconds if the connection is idle. 62 */ 63 #define IDM_TRANSPORT_KEEPALIVE_IDLE_TIMEOUT 20 64 65 /* 66 * Timeout period before a TRANSPORT_FAIL event is generated in seconds 67 * if the connection is idle. 68 */ 69 #define IDM_TRANSPORT_FAIL_IDLE_TIMEOUT 30 70 71 /* 72 * IDM reference count structure. Audit code is shamelessly adapted 73 * from CIFS server. 74 */ 75 76 #define REFCNT_AUDIT_STACK_DEPTH 16 77 #define REFCNT_AUDIT_BUF_MAX_REC 16 78 79 typedef struct { 80 uint32_t anr_refcnt; 81 int anr_depth; 82 pc_t anr_stack[REFCNT_AUDIT_STACK_DEPTH]; 83 } refcnt_audit_record_t; 84 85 typedef struct { 86 int anb_index; 87 int anb_max_index; 88 refcnt_audit_record_t anb_records[REFCNT_AUDIT_BUF_MAX_REC]; 89 } refcnt_audit_buf_t; 90 91 #define REFCNT_AUDIT(_rf_) { \ 92 refcnt_audit_record_t *anr; \ 93 \ 94 anr = (_rf_)->ir_audit_buf.anb_records; \ 95 anr += (_rf_)->ir_audit_buf.anb_index; \ 96 (_rf_)->ir_audit_buf.anb_index++; \ 97 (_rf_)->ir_audit_buf.anb_index &= \ 98 (_rf_)->ir_audit_buf.anb_max_index; \ 99 anr->anr_refcnt = (_rf_)->ir_refcnt; \ 100 anr->anr_depth = getpcstack(anr->anr_stack, \ 101 REFCNT_AUDIT_STACK_DEPTH); \ 102 } 103 104 struct idm_refcnt_s; 105 106 typedef void (idm_refcnt_cb_t)(void *ref_obj); 107 108 typedef enum { 109 REF_NOWAIT, 110 REF_WAIT_SYNC, 111 REF_WAIT_ASYNC 112 } idm_refcnt_wait_t; 113 114 typedef struct idm_refcnt_s { 115 int ir_refcnt; 116 void *ir_referenced_obj; 117 idm_refcnt_wait_t ir_waiting; 118 kmutex_t ir_mutex; 119 kcondvar_t ir_cv; 120 idm_refcnt_cb_t *ir_cb; 121 refcnt_audit_buf_t ir_audit_buf; 122 } idm_refcnt_t; 123 124 /* 125 * connection parameters - These parameters would be populated at 126 * connection create, or during key-value negotiation at login 127 */ 128 typedef struct idm_conn_params_s { 129 uint32_t max_recv_dataseglen; 130 uint32_t max_xmit_dataseglen; 131 uint32_t conn_login_max; 132 uint32_t conn_login_interval; 133 boolean_t nonblock_socket; 134 } idm_conn_param_t; 135 136 typedef struct idm_svc_s { 137 list_node_t is_list_node; 138 kmutex_t is_mutex; 139 kcondvar_t is_cv; 140 kmutex_t is_count_mutex; 141 kcondvar_t is_count_cv; 142 idm_refcnt_t is_refcnt; 143 int is_online; 144 /* transport-specific service components */ 145 void *is_so_svc; 146 void *is_iser_svc; 147 idm_svc_req_t is_svc_req; 148 } idm_svc_t; 149 150 #define ISCSI_MAX_TSIH_LEN 6 /* 0x%04x */ 151 #define ISCSI_MAX_ISID_LEN ISCSI_ISID_LEN * 2 152 153 typedef struct idm_conn_s { 154 list_node_t ic_list_node; 155 void *ic_handle; 156 idm_refcnt_t ic_refcnt; 157 idm_svc_t *ic_svc_binding; /* Target conn. only */ 158 idm_sockaddr_t ic_ini_dst_addr; 159 struct sockaddr_storage ic_laddr; /* conn local address */ 160 struct sockaddr_storage ic_raddr; /* conn remote address */ 161 162 /* 163 * the target_name, initiator_name, initiator session 164 * identifier and target session identifying handle 165 * are only used for target connections. 166 */ 167 char ic_target_name[ISCSI_MAX_NAME_LEN + 1]; 168 char ic_initiator_name[ISCSI_MAX_NAME_LEN + 1]; 169 char ic_tsih[ISCSI_MAX_TSIH_LEN + 1]; 170 char ic_isid[ISCSI_MAX_ISID_LEN + 1]; 171 idm_conn_state_t ic_state; 172 idm_conn_state_t ic_last_state; 173 sm_audit_buf_t ic_state_audit; 174 kmutex_t ic_state_mutex; 175 kcondvar_t ic_state_cv; 176 uint32_t ic_state_flags; 177 timeout_id_t ic_state_timeout; 178 struct idm_conn_s *ic_reinstate_conn; /* For conn reinst. */ 179 struct idm_conn_s *ic_logout_conn; /* For other conn logout */ 180 taskq_t *ic_state_taskq; 181 int ic_pdu_events; 182 boolean_t ic_login_info_valid; 183 boolean_t ic_rdma_extensions; 184 uint16_t ic_login_cid; 185 186 kmutex_t ic_mutex; 187 kcondvar_t ic_cv; 188 idm_status_t ic_conn_sm_status; 189 190 boolean_t ic_ffp; 191 boolean_t ic_keepalive; 192 uint32_t ic_internal_cid; 193 194 uint32_t ic_conn_flags; 195 idm_conn_type_t ic_conn_type; 196 idm_conn_ops_t ic_conn_ops; 197 idm_transport_ops_t *ic_transport_ops; 198 idm_transport_type_t ic_transport_type; 199 int ic_transport_hdrlen; 200 void *ic_transport_private; 201 idm_conn_param_t ic_conn_params; 202 /* 203 * Save client callback to interpose idm callback 204 */ 205 idm_pdu_cb_t *ic_client_callback; 206 clock_t ic_timestamp; 207 } idm_conn_t; 208 209 #define IDM_CONN_HEADER_DIGEST 0x00000001 210 #define IDM_CONN_DATA_DIGEST 0x00000002 211 #define IDM_CONN_USE_SCOREBOARD 0x00000004 212 213 #define IDM_CONN_ISINI(ICI_IC) ((ICI_IC)->ic_conn_type == CONN_TYPE_INI) 214 #define IDM_CONN_ISTGT(ICI_IC) ((ICI_IC)->ic_conn_type == CONN_TYPE_TGT) 215 216 /* 217 * An IDM target task can transfer data using multiple buffers. The task 218 * will maintain a list of buffers, and each buffer will contain the relative 219 * offset of the transfer and a pointer to the next buffer in the list. 220 * 221 * Note on client private data: 222 * idt_private is intended to be a pointer to some sort of client- 223 * specific state. 224 * 225 * idt_client_handle is a more generic client-private piece of data that can 226 * be used by the client for the express purpose of task lookup. The driving 227 * use case for this is for the client to store the initiator task tag for 228 * a given task so that it may be more easily retrieved for task management. 229 * 230 * The key take away here is that clients should never call 231 * idm_task_find_by_handle in the performance path. 232 * 233 * An initiator will require only one buffer per task, the offset will be 0. 234 */ 235 236 typedef struct idm_task_s { 237 idm_conn_t *idt_ic; /* Associated connection */ 238 /* connection type is in idt_ic->ic_conn_type */ 239 kmutex_t idt_mutex; 240 void *idt_private; /* Client private data */ 241 uintptr_t idt_client_handle; /* Client private */ 242 uint32_t idt_tt; /* Task tag */ 243 uint32_t idt_r2t_ttt; /* R2T Target Task tag */ 244 idm_task_state_t idt_state; 245 idm_refcnt_t idt_refcnt; 246 247 /* 248 * Statistics 249 */ 250 int idt_tx_to_ini_start; 251 int idt_tx_to_ini_done; 252 int idt_rx_from_ini_start; 253 int idt_rx_from_ini_done; 254 int idt_tx_bytes; /* IDM_CONN_USE_SCOREBOARD */ 255 int idt_rx_bytes; /* IDM_CONN_USE_SCOREBOARD */ 256 257 uint32_t idt_exp_datasn; /* expected datasn */ 258 uint32_t idt_exp_rttsn; /* expected rttsn */ 259 list_t idt_inbufv; /* chunks of IN buffers */ 260 list_t idt_outbufv; /* chunks of OUT buffers */ 261 262 /* 263 * Transport header, which describes this tasks remote tagged buffer 264 */ 265 int idt_transport_hdrlen; 266 void *idt_transport_hdr; 267 uint32_t idt_flags; /* phase collapse */ 268 } idm_task_t; 269 270 int idm_task_constructor(void *task_void, void *arg, int flags); 271 void idm_task_destructor(void *task_void, void *arg); 272 273 #define IDM_TASKIDS_MAX 16384 274 #define IDM_BUF_MAGIC 0x49425546 /* "IBUF" */ 275 276 #define IDM_TASK_PHASECOLLAPSE_REQ 0x00000001 /* request phase collapse */ 277 #define IDM_TASK_PHASECOLLAPSE_SUCCESS 0x00000002 /* phase collapse success */ 278 279 /* Protect with task mutex */ 280 typedef struct idm_buf_s { 281 uint32_t idb_magic; /* "IBUF" */ 282 283 /* 284 * Note: idm_tx_link *must* be the second element in the list for 285 * proper TX PDU ordering. 286 */ 287 list_node_t idm_tx_link; /* link in a list of TX objects */ 288 289 list_node_t idb_buflink; /* link in a multi-buffer data xfer */ 290 idm_conn_t *idb_ic; /* Associated connection */ 291 void *idb_buf; /* data */ 292 uint64_t idb_buflen; /* length of buffer */ 293 size_t idb_bufoffset; /* offset in a multi-buffer xfer */ 294 boolean_t idb_bufalloc; /* true if alloc'd in idm_buf_alloc */ 295 /* 296 * DataPDUInOrder=Yes, so to track that the PDUs in a sequence are sent 297 * in continuously increasing address order, check that offsets for a 298 * single buffer xfer are in order. 299 */ 300 uint32_t idb_exp_offset; 301 size_t idb_xfer_len; /* Current requested xfer len */ 302 void *idb_buf_private; /* transport-specific buf handle */ 303 void *idb_reg_private; /* transport-specific reg handle */ 304 void *idb_bufptr; /* transport-specific bcopy pointer */ 305 boolean_t idb_bufbcopy; /* true if bcopy required */ 306 307 idm_buf_cb_t *idb_buf_cb; /* Data Completion Notify, tgt only */ 308 void *idb_cb_arg; /* Client private data */ 309 idm_task_t *idb_task_binding; 310 timespec_t idb_xfer_start; 311 timespec_t idb_xfer_done; 312 boolean_t idb_in_transport; 313 boolean_t idb_tx_thread; /* Sockets only */ 314 iscsi_hdr_t idb_data_hdr_tmpl; /* Sockets only */ 315 idm_status_t idb_status; 316 } idm_buf_t; 317 318 typedef enum { 319 BP_CHECK_QUICK, 320 BP_CHECK_THOROUGH, 321 BP_CHECK_ASSERT 322 } idm_bufpat_check_type_t; 323 324 #define BUFPAT_MATCH(bc_bufpat, bc_idb) \ 325 ((bufpat->bufpat_idb == bc_idb) && \ 326 (bufpat->bufpat_bufmagic == IDM_BUF_MAGIC)) 327 328 typedef struct idm_bufpat_s { 329 void *bufpat_idb; 330 uint32_t bufpat_bufmagic; 331 uint32_t bufpat_offset; 332 } idm_bufpat_t; 333 334 #define PDU_MAX_IOVLEN 12 335 #define IDM_PDU_MAGIC 0x49504455 /* "IPDU" */ 336 337 typedef struct idm_pdu_s { 338 uint32_t isp_magic; /* "IPDU" */ 339 340 /* 341 * Internal - Order is vital. idm_tx_link *must* be the second 342 * element in this structure for proper TX PDU ordering. 343 */ 344 list_node_t idm_tx_link; 345 346 list_node_t isp_client_lnd; 347 348 idm_conn_t *isp_ic; /* Must be set */ 349 iscsi_hdr_t *isp_hdr; 350 uint_t isp_hdrlen; 351 uint8_t *isp_data; 352 uint_t isp_datalen; 353 354 /* Transport header */ 355 void *isp_transport_hdr; 356 uint32_t isp_transport_hdrlen; 357 void *isp_transport_private; 358 359 /* 360 * isp_data is used for sending SCSI status, NOP, text, scsi and 361 * non-scsi data. Data is received using isp_iov and isp_iovlen 362 * to support data over multiple buffers. 363 */ 364 void *isp_private; 365 idm_pdu_cb_t *isp_callback; 366 idm_status_t isp_status; 367 368 /* 369 * The following four elements are only used in 370 * idm_sorecv_scsidata() currently. 371 */ 372 struct iovec isp_iov[PDU_MAX_IOVLEN]; 373 int isp_iovlen; 374 idm_buf_t *isp_sorx_buf; 375 376 /* Implementation data for idm_pdu_alloc and sorx PDU cache */ 377 uint32_t isp_flags; 378 uint_t isp_hdrbuflen; 379 uint_t isp_databuflen; 380 hrtime_t isp_queue_time; 381 382 /* Taskq dispatching state for deferred PDU */ 383 taskq_ent_t isp_tqent; 384 } idm_pdu_t; 385 386 /* 387 * This "generic" object is used when removing an item from the ic_tx_list 388 * in order to determine whether it's an idm_pdu_t or an idm_buf_t 389 */ 390 391 typedef struct { 392 uint32_t idm_tx_obj_magic; 393 /* 394 * idm_tx_link *must* be the second element in this structure. 395 */ 396 list_node_t idm_tx_link; 397 } idm_tx_obj_t; 398 399 400 #define IDM_PDU_OPCODE(PDU) \ 401 ((PDU)->isp_hdr->opcode & ISCSI_OPCODE_MASK) 402 403 #define IDM_PDU_ALLOC 0x00000001 404 #define IDM_PDU_ADDL_HDR 0x00000002 405 #define IDM_PDU_ADDL_DATA 0x00000004 406 #define IDM_PDU_LOGIN_TX 0x00000008 407 #define IDM_PDU_SET_STATSN 0x00000010 408 #define IDM_PDU_ADVANCE_STATSN 0x00000020 409 410 #define OSD_EXT_CDB_AHSLEN (200 - 15) 411 #define BIDI_AHS_LENGTH 5 412 #define IDM_SORX_CACHE_AHSLEN \ 413 (((OSD_EXT_CDB_AHSLEN + 3) + \ 414 (BIDI_AHS_LENGTH + 3)) / sizeof (uint32_t)) 415 #define IDM_SORX_CACHE_HDRLEN (sizeof (iscsi_hdr_t) + IDM_SORX_CACHE_AHSLEN) 416 417 /* 418 * ID pool 419 */ 420 421 #define IDM_IDPOOL_MAGIC 0x4944504C /* IDPL */ 422 #define IDM_IDPOOL_MIN_SIZE 64 /* Number of IDs to begin with */ 423 #define IDM_IDPOOL_MAX_SIZE 64 * 1024 424 425 typedef struct idm_idpool { 426 uint32_t id_magic; 427 kmutex_t id_mutex; 428 uint8_t *id_pool; 429 uint32_t id_size; 430 uint8_t id_bit; 431 uint8_t id_bit_idx; 432 uint32_t id_idx; 433 uint32_t id_idx_msk; 434 uint32_t id_free_counter; 435 uint32_t id_max_free_counter; 436 } idm_idpool_t; 437 438 /* 439 * Global IDM state structure 440 */ 441 typedef struct { 442 kmutex_t idm_global_mutex; 443 taskq_t *idm_global_taskq; 444 kthread_t *idm_wd_thread; 445 kt_did_t idm_wd_thread_did; 446 boolean_t idm_wd_thread_running; 447 kcondvar_t idm_wd_cv; 448 list_t idm_tgt_svc_list; 449 kcondvar_t idm_tgt_svc_cv; 450 list_t idm_tgt_conn_list; 451 int idm_tgt_conn_count; 452 list_t idm_ini_conn_list; 453 kmem_cache_t *idm_buf_cache; 454 kmem_cache_t *idm_task_cache; 455 krwlock_t idm_taskid_table_lock; 456 idm_task_t **idm_taskid_table; 457 uint32_t idm_taskid_next; 458 uint32_t idm_taskid_max; 459 idm_idpool_t idm_conn_id_pool; 460 kmem_cache_t *idm_sotx_pdu_cache; 461 kmem_cache_t *idm_sorx_pdu_cache; 462 kmem_cache_t *idm_so_128k_buf_cache; 463 } idm_global_t; 464 465 idm_global_t idm; /* Global state */ 466 467 int 468 idm_idpool_create(idm_idpool_t *pool); 469 470 void 471 idm_idpool_destroy(idm_idpool_t *pool); 472 473 int 474 idm_idpool_alloc(idm_idpool_t *pool, uint16_t *id); 475 476 void 477 idm_idpool_free(idm_idpool_t *pool, uint16_t id); 478 479 void 480 idm_pdu_rx(idm_conn_t *ic, idm_pdu_t *pdu); 481 482 void 483 idm_pdu_tx_forward(idm_conn_t *ic, idm_pdu_t *pdu); 484 485 boolean_t 486 idm_pdu_rx_forward_ffp(idm_conn_t *ic, idm_pdu_t *pdu); 487 488 void 489 idm_pdu_rx_forward(idm_conn_t *ic, idm_pdu_t *pdu); 490 491 void 492 idm_pdu_tx_protocol_error(idm_conn_t *ic, idm_pdu_t *pdu); 493 494 void 495 idm_pdu_rx_protocol_error(idm_conn_t *ic, idm_pdu_t *pdu); 496 497 void idm_parse_login_rsp(idm_conn_t *ic, idm_pdu_t *logout_req_pdu, 498 boolean_t rx); 499 500 void idm_parse_logout_req(idm_conn_t *ic, idm_pdu_t *logout_req_pdu, 501 boolean_t rx); 502 503 void idm_parse_logout_rsp(idm_conn_t *ic, idm_pdu_t *login_rsp_pdu, 504 boolean_t rx); 505 506 idm_status_t idm_svc_conn_create(idm_svc_t *is, idm_transport_type_t type, 507 idm_conn_t **ic_result); 508 509 void idm_svc_conn_destroy(idm_conn_t *ic); 510 511 idm_status_t idm_ini_conn_finish(idm_conn_t *ic); 512 513 idm_status_t idm_tgt_conn_finish(idm_conn_t *ic); 514 515 idm_conn_t *idm_conn_create_common(idm_conn_type_t conn_type, 516 idm_transport_type_t tt, idm_conn_ops_t *conn_ops); 517 518 void idm_conn_destroy_common(idm_conn_t *ic); 519 520 void idm_conn_close(idm_conn_t *ic); 521 522 uint32_t idm_cid_alloc(void); 523 524 void idm_cid_free(uint32_t cid); 525 526 uint32_t idm_crc32c(void *address, unsigned long length); 527 528 uint32_t idm_crc32c_continued(void *address, unsigned long length, 529 uint32_t crc); 530 531 void idm_listbuf_insert(list_t *lst, idm_buf_t *buf); 532 533 idm_conn_t *idm_lookup_conn(uint8_t *isid, uint16_t tsih, uint16_t cid); 534 535 #ifdef __cplusplus 536 } 537 #endif 538 539 #endif /* _IDM_IMPL_H_ */ 540