1 /* 2 * Generic SCSI-3 ALUA SCSI Device Handler 3 * 4 * Copyright (C) 2007-2010 Hannes Reinecke, SUSE Linux Products GmbH. 5 * All rights reserved. 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 20 * 21 */ 22 #include <linux/slab.h> 23 #include <linux/delay.h> 24 #include <linux/module.h> 25 #include <asm/unaligned.h> 26 #include <scsi/scsi.h> 27 #include <scsi/scsi_proto.h> 28 #include <scsi/scsi_dbg.h> 29 #include <scsi/scsi_eh.h> 30 #include <scsi/scsi_dh.h> 31 32 #define ALUA_DH_NAME "alua" 33 #define ALUA_DH_VER "2.0" 34 35 #define TPGS_SUPPORT_NONE 0x00 36 #define TPGS_SUPPORT_OPTIMIZED 0x01 37 #define TPGS_SUPPORT_NONOPTIMIZED 0x02 38 #define TPGS_SUPPORT_STANDBY 0x04 39 #define TPGS_SUPPORT_UNAVAILABLE 0x08 40 #define TPGS_SUPPORT_LBA_DEPENDENT 0x10 41 #define TPGS_SUPPORT_OFFLINE 0x40 42 #define TPGS_SUPPORT_TRANSITION 0x80 43 44 #define RTPG_FMT_MASK 0x70 45 #define RTPG_FMT_EXT_HDR 0x10 46 47 #define TPGS_MODE_UNINITIALIZED -1 48 #define TPGS_MODE_NONE 0x0 49 #define TPGS_MODE_IMPLICIT 0x1 50 #define TPGS_MODE_EXPLICIT 0x2 51 52 #define ALUA_RTPG_SIZE 128 53 #define ALUA_FAILOVER_TIMEOUT 60 54 #define ALUA_FAILOVER_RETRIES 5 55 #define ALUA_RTPG_DELAY_MSECS 5 56 57 /* device handler flags */ 58 #define ALUA_OPTIMIZE_STPG 0x01 59 #define ALUA_RTPG_EXT_HDR_UNSUPP 0x02 60 #define ALUA_SYNC_STPG 0x04 61 /* State machine flags */ 62 #define ALUA_PG_RUN_RTPG 0x10 63 #define ALUA_PG_RUN_STPG 0x20 64 #define ALUA_PG_RUNNING 0x40 65 66 static uint optimize_stpg; 67 module_param(optimize_stpg, uint, S_IRUGO|S_IWUSR); 68 MODULE_PARM_DESC(optimize_stpg, "Allow use of a non-optimized path, rather than sending a STPG, when implicit TPGS is supported (0=No,1=Yes). Default is 0."); 69 70 static LIST_HEAD(port_group_list); 71 static DEFINE_SPINLOCK(port_group_lock); 72 static struct workqueue_struct *kaluad_wq; 73 static struct workqueue_struct *kaluad_sync_wq; 74 75 struct alua_port_group { 76 struct kref kref; 77 struct rcu_head rcu; 78 struct list_head node; 79 struct list_head dh_list; 80 unsigned char device_id_str[256]; 81 int device_id_len; 82 int group_id; 83 int tpgs; 84 int state; 85 int pref; 86 unsigned flags; /* used for optimizing STPG */ 87 unsigned char transition_tmo; 88 unsigned long expiry; 89 unsigned long interval; 90 struct delayed_work rtpg_work; 91 spinlock_t lock; 92 struct list_head rtpg_list; 93 struct scsi_device *rtpg_sdev; 94 }; 95 96 struct alua_dh_data { 97 struct list_head node; 98 struct alua_port_group __rcu *pg; 99 int group_id; 100 spinlock_t pg_lock; 101 struct scsi_device *sdev; 102 int init_error; 103 struct mutex init_mutex; 104 }; 105 106 struct alua_queue_data { 107 struct list_head entry; 108 activate_complete callback_fn; 109 void *callback_data; 110 }; 111 112 #define ALUA_POLICY_SWITCH_CURRENT 0 113 #define ALUA_POLICY_SWITCH_ALL 1 114 115 static void alua_rtpg_work(struct work_struct *work); 116 static bool alua_rtpg_queue(struct alua_port_group *pg, 117 struct scsi_device *sdev, 118 struct alua_queue_data *qdata, bool force); 119 static void alua_check(struct scsi_device *sdev, bool force); 120 121 static void release_port_group(struct kref *kref) 122 { 123 struct alua_port_group *pg; 124 125 pg = container_of(kref, struct alua_port_group, kref); 126 if (pg->rtpg_sdev) 127 flush_delayed_work(&pg->rtpg_work); 128 spin_lock(&port_group_lock); 129 list_del(&pg->node); 130 spin_unlock(&port_group_lock); 131 kfree_rcu(pg, rcu); 132 } 133 134 /* 135 * submit_rtpg - Issue a REPORT TARGET GROUP STATES command 136 * @sdev: sdev the command should be sent to 137 */ 138 static int submit_rtpg(struct scsi_device *sdev, unsigned char *buff, 139 int bufflen, struct scsi_sense_hdr *sshdr, int flags) 140 { 141 u8 cdb[COMMAND_SIZE(MAINTENANCE_IN)]; 142 int req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | 143 REQ_FAILFAST_DRIVER; 144 145 /* Prepare the command. */ 146 memset(cdb, 0x0, COMMAND_SIZE(MAINTENANCE_IN)); 147 cdb[0] = MAINTENANCE_IN; 148 if (!(flags & ALUA_RTPG_EXT_HDR_UNSUPP)) 149 cdb[1] = MI_REPORT_TARGET_PGS | MI_EXT_HDR_PARAM_FMT; 150 else 151 cdb[1] = MI_REPORT_TARGET_PGS; 152 put_unaligned_be32(bufflen, &cdb[6]); 153 154 return scsi_execute(sdev, cdb, DMA_FROM_DEVICE, buff, bufflen, NULL, 155 sshdr, ALUA_FAILOVER_TIMEOUT * HZ, 156 ALUA_FAILOVER_RETRIES, req_flags, 0, NULL); 157 } 158 159 /* 160 * submit_stpg - Issue a SET TARGET PORT GROUP command 161 * 162 * Currently we're only setting the current target port group state 163 * to 'active/optimized' and let the array firmware figure out 164 * the states of the remaining groups. 165 */ 166 static int submit_stpg(struct scsi_device *sdev, int group_id, 167 struct scsi_sense_hdr *sshdr) 168 { 169 u8 cdb[COMMAND_SIZE(MAINTENANCE_OUT)]; 170 unsigned char stpg_data[8]; 171 int stpg_len = 8; 172 int req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | 173 REQ_FAILFAST_DRIVER; 174 175 /* Prepare the data buffer */ 176 memset(stpg_data, 0, stpg_len); 177 stpg_data[4] = SCSI_ACCESS_STATE_OPTIMAL; 178 put_unaligned_be16(group_id, &stpg_data[6]); 179 180 /* Prepare the command. */ 181 memset(cdb, 0x0, COMMAND_SIZE(MAINTENANCE_OUT)); 182 cdb[0] = MAINTENANCE_OUT; 183 cdb[1] = MO_SET_TARGET_PGS; 184 put_unaligned_be32(stpg_len, &cdb[6]); 185 186 return scsi_execute(sdev, cdb, DMA_TO_DEVICE, stpg_data, stpg_len, NULL, 187 sshdr, ALUA_FAILOVER_TIMEOUT * HZ, 188 ALUA_FAILOVER_RETRIES, req_flags, 0, NULL); 189 } 190 191 static struct alua_port_group *alua_find_get_pg(char *id_str, size_t id_size, 192 int group_id) 193 { 194 struct alua_port_group *pg; 195 196 if (!id_str || !id_size || !strlen(id_str)) 197 return NULL; 198 199 list_for_each_entry(pg, &port_group_list, node) { 200 if (pg->group_id != group_id) 201 continue; 202 if (!pg->device_id_len || pg->device_id_len != id_size) 203 continue; 204 if (strncmp(pg->device_id_str, id_str, id_size)) 205 continue; 206 if (!kref_get_unless_zero(&pg->kref)) 207 continue; 208 return pg; 209 } 210 211 return NULL; 212 } 213 214 /* 215 * alua_alloc_pg - Allocate a new port_group structure 216 * @sdev: scsi device 217 * @h: alua device_handler data 218 * @group_id: port group id 219 * 220 * Allocate a new port_group structure for a given 221 * device. 222 */ 223 static struct alua_port_group *alua_alloc_pg(struct scsi_device *sdev, 224 int group_id, int tpgs) 225 { 226 struct alua_port_group *pg, *tmp_pg; 227 228 pg = kzalloc(sizeof(struct alua_port_group), GFP_KERNEL); 229 if (!pg) 230 return ERR_PTR(-ENOMEM); 231 232 pg->device_id_len = scsi_vpd_lun_id(sdev, pg->device_id_str, 233 sizeof(pg->device_id_str)); 234 if (pg->device_id_len <= 0) { 235 /* 236 * TPGS supported but no device identification found. 237 * Generate private device identification. 238 */ 239 sdev_printk(KERN_INFO, sdev, 240 "%s: No device descriptors found\n", 241 ALUA_DH_NAME); 242 pg->device_id_str[0] = '\0'; 243 pg->device_id_len = 0; 244 } 245 pg->group_id = group_id; 246 pg->tpgs = tpgs; 247 pg->state = SCSI_ACCESS_STATE_OPTIMAL; 248 if (optimize_stpg) 249 pg->flags |= ALUA_OPTIMIZE_STPG; 250 kref_init(&pg->kref); 251 INIT_DELAYED_WORK(&pg->rtpg_work, alua_rtpg_work); 252 INIT_LIST_HEAD(&pg->rtpg_list); 253 INIT_LIST_HEAD(&pg->node); 254 INIT_LIST_HEAD(&pg->dh_list); 255 spin_lock_init(&pg->lock); 256 257 spin_lock(&port_group_lock); 258 tmp_pg = alua_find_get_pg(pg->device_id_str, pg->device_id_len, 259 group_id); 260 if (tmp_pg) { 261 spin_unlock(&port_group_lock); 262 kfree(pg); 263 return tmp_pg; 264 } 265 266 list_add(&pg->node, &port_group_list); 267 spin_unlock(&port_group_lock); 268 269 return pg; 270 } 271 272 /* 273 * alua_check_tpgs - Evaluate TPGS setting 274 * @sdev: device to be checked 275 * 276 * Examine the TPGS setting of the sdev to find out if ALUA 277 * is supported. 278 */ 279 static int alua_check_tpgs(struct scsi_device *sdev) 280 { 281 int tpgs = TPGS_MODE_NONE; 282 283 /* 284 * ALUA support for non-disk devices is fraught with 285 * difficulties, so disable it for now. 286 */ 287 if (sdev->type != TYPE_DISK) { 288 sdev_printk(KERN_INFO, sdev, 289 "%s: disable for non-disk devices\n", 290 ALUA_DH_NAME); 291 return tpgs; 292 } 293 294 tpgs = scsi_device_tpgs(sdev); 295 switch (tpgs) { 296 case TPGS_MODE_EXPLICIT|TPGS_MODE_IMPLICIT: 297 sdev_printk(KERN_INFO, sdev, 298 "%s: supports implicit and explicit TPGS\n", 299 ALUA_DH_NAME); 300 break; 301 case TPGS_MODE_EXPLICIT: 302 sdev_printk(KERN_INFO, sdev, "%s: supports explicit TPGS\n", 303 ALUA_DH_NAME); 304 break; 305 case TPGS_MODE_IMPLICIT: 306 sdev_printk(KERN_INFO, sdev, "%s: supports implicit TPGS\n", 307 ALUA_DH_NAME); 308 break; 309 case TPGS_MODE_NONE: 310 sdev_printk(KERN_INFO, sdev, "%s: not supported\n", 311 ALUA_DH_NAME); 312 break; 313 default: 314 sdev_printk(KERN_INFO, sdev, 315 "%s: unsupported TPGS setting %d\n", 316 ALUA_DH_NAME, tpgs); 317 tpgs = TPGS_MODE_NONE; 318 break; 319 } 320 321 return tpgs; 322 } 323 324 /* 325 * alua_check_vpd - Evaluate INQUIRY vpd page 0x83 326 * @sdev: device to be checked 327 * 328 * Extract the relative target port and the target port group 329 * descriptor from the list of identificators. 330 */ 331 static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h, 332 int tpgs) 333 { 334 int rel_port = -1, group_id; 335 struct alua_port_group *pg, *old_pg = NULL; 336 bool pg_updated = false; 337 unsigned long flags; 338 339 group_id = scsi_vpd_tpg_id(sdev, &rel_port); 340 if (group_id < 0) { 341 /* 342 * Internal error; TPGS supported but required 343 * VPD identification descriptors not present. 344 * Disable ALUA support 345 */ 346 sdev_printk(KERN_INFO, sdev, 347 "%s: No target port descriptors found\n", 348 ALUA_DH_NAME); 349 return SCSI_DH_DEV_UNSUPP; 350 } 351 352 pg = alua_alloc_pg(sdev, group_id, tpgs); 353 if (IS_ERR(pg)) { 354 if (PTR_ERR(pg) == -ENOMEM) 355 return SCSI_DH_NOMEM; 356 return SCSI_DH_DEV_UNSUPP; 357 } 358 if (pg->device_id_len) 359 sdev_printk(KERN_INFO, sdev, 360 "%s: device %s port group %x rel port %x\n", 361 ALUA_DH_NAME, pg->device_id_str, 362 group_id, rel_port); 363 else 364 sdev_printk(KERN_INFO, sdev, 365 "%s: port group %x rel port %x\n", 366 ALUA_DH_NAME, group_id, rel_port); 367 368 /* Check for existing port group references */ 369 spin_lock(&h->pg_lock); 370 old_pg = rcu_dereference_protected(h->pg, lockdep_is_held(&h->pg_lock)); 371 if (old_pg != pg) { 372 /* port group has changed. Update to new port group */ 373 if (h->pg) { 374 spin_lock_irqsave(&old_pg->lock, flags); 375 list_del_rcu(&h->node); 376 spin_unlock_irqrestore(&old_pg->lock, flags); 377 } 378 rcu_assign_pointer(h->pg, pg); 379 pg_updated = true; 380 } 381 382 spin_lock_irqsave(&pg->lock, flags); 383 if (sdev->synchronous_alua) 384 pg->flags |= ALUA_SYNC_STPG; 385 if (pg_updated) 386 list_add_rcu(&h->node, &pg->dh_list); 387 spin_unlock_irqrestore(&pg->lock, flags); 388 389 alua_rtpg_queue(rcu_dereference_protected(h->pg, 390 lockdep_is_held(&h->pg_lock)), 391 sdev, NULL, true); 392 spin_unlock(&h->pg_lock); 393 394 if (old_pg) 395 kref_put(&old_pg->kref, release_port_group); 396 397 return SCSI_DH_OK; 398 } 399 400 static char print_alua_state(unsigned char state) 401 { 402 switch (state) { 403 case SCSI_ACCESS_STATE_OPTIMAL: 404 return 'A'; 405 case SCSI_ACCESS_STATE_ACTIVE: 406 return 'N'; 407 case SCSI_ACCESS_STATE_STANDBY: 408 return 'S'; 409 case SCSI_ACCESS_STATE_UNAVAILABLE: 410 return 'U'; 411 case SCSI_ACCESS_STATE_LBA: 412 return 'L'; 413 case SCSI_ACCESS_STATE_OFFLINE: 414 return 'O'; 415 case SCSI_ACCESS_STATE_TRANSITIONING: 416 return 'T'; 417 default: 418 return 'X'; 419 } 420 } 421 422 static int alua_check_sense(struct scsi_device *sdev, 423 struct scsi_sense_hdr *sense_hdr) 424 { 425 switch (sense_hdr->sense_key) { 426 case NOT_READY: 427 if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a) { 428 /* 429 * LUN Not Accessible - ALUA state transition 430 */ 431 alua_check(sdev, false); 432 return NEEDS_RETRY; 433 } 434 break; 435 case UNIT_ATTENTION: 436 if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00) { 437 /* 438 * Power On, Reset, or Bus Device Reset. 439 * Might have obscured a state transition, 440 * so schedule a recheck. 441 */ 442 alua_check(sdev, true); 443 return ADD_TO_MLQUEUE; 444 } 445 if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x04) 446 /* 447 * Device internal reset 448 */ 449 return ADD_TO_MLQUEUE; 450 if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x01) 451 /* 452 * Mode Parameters Changed 453 */ 454 return ADD_TO_MLQUEUE; 455 if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x06) { 456 /* 457 * ALUA state changed 458 */ 459 alua_check(sdev, true); 460 return ADD_TO_MLQUEUE; 461 } 462 if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x07) { 463 /* 464 * Implicit ALUA state transition failed 465 */ 466 alua_check(sdev, true); 467 return ADD_TO_MLQUEUE; 468 } 469 if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x03) 470 /* 471 * Inquiry data has changed 472 */ 473 return ADD_TO_MLQUEUE; 474 if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x0e) 475 /* 476 * REPORTED_LUNS_DATA_HAS_CHANGED is reported 477 * when switching controllers on targets like 478 * Intel Multi-Flex. We can just retry. 479 */ 480 return ADD_TO_MLQUEUE; 481 break; 482 } 483 484 return SCSI_RETURN_NOT_HANDLED; 485 } 486 487 /* 488 * alua_tur - Send a TEST UNIT READY 489 * @sdev: device to which the TEST UNIT READY command should be send 490 * 491 * Send a TEST UNIT READY to @sdev to figure out the device state 492 * Returns SCSI_DH_RETRY if the sense code is NOT READY/ALUA TRANSITIONING, 493 * SCSI_DH_OK if no error occurred, and SCSI_DH_IO otherwise. 494 */ 495 static int alua_tur(struct scsi_device *sdev) 496 { 497 struct scsi_sense_hdr sense_hdr; 498 int retval; 499 500 retval = scsi_test_unit_ready(sdev, ALUA_FAILOVER_TIMEOUT * HZ, 501 ALUA_FAILOVER_RETRIES, &sense_hdr); 502 if (sense_hdr.sense_key == NOT_READY && 503 sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a) 504 return SCSI_DH_RETRY; 505 else if (retval) 506 return SCSI_DH_IO; 507 else 508 return SCSI_DH_OK; 509 } 510 511 /* 512 * alua_rtpg - Evaluate REPORT TARGET GROUP STATES 513 * @sdev: the device to be evaluated. 514 * 515 * Evaluate the Target Port Group State. 516 * Returns SCSI_DH_DEV_OFFLINED if the path is 517 * found to be unusable. 518 */ 519 static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg) 520 { 521 struct scsi_sense_hdr sense_hdr; 522 struct alua_port_group *tmp_pg; 523 int len, k, off, valid_states = 0, bufflen = ALUA_RTPG_SIZE; 524 unsigned char *desc, *buff; 525 unsigned err, retval; 526 unsigned int tpg_desc_tbl_off; 527 unsigned char orig_transition_tmo; 528 unsigned long flags; 529 530 if (!pg->expiry) { 531 unsigned long transition_tmo = ALUA_FAILOVER_TIMEOUT * HZ; 532 533 if (pg->transition_tmo) 534 transition_tmo = pg->transition_tmo * HZ; 535 536 pg->expiry = round_jiffies_up(jiffies + transition_tmo); 537 } 538 539 buff = kzalloc(bufflen, GFP_KERNEL); 540 if (!buff) 541 return SCSI_DH_DEV_TEMP_BUSY; 542 543 retry: 544 err = 0; 545 retval = submit_rtpg(sdev, buff, bufflen, &sense_hdr, pg->flags); 546 547 if (retval) { 548 if (!scsi_sense_valid(&sense_hdr)) { 549 sdev_printk(KERN_INFO, sdev, 550 "%s: rtpg failed, result %d\n", 551 ALUA_DH_NAME, retval); 552 kfree(buff); 553 if (driver_byte(retval) == DRIVER_ERROR) 554 return SCSI_DH_DEV_TEMP_BUSY; 555 return SCSI_DH_IO; 556 } 557 558 /* 559 * submit_rtpg() has failed on existing arrays 560 * when requesting extended header info, and 561 * the array doesn't support extended headers, 562 * even though it shouldn't according to T10. 563 * The retry without rtpg_ext_hdr_req set 564 * handles this. 565 */ 566 if (!(pg->flags & ALUA_RTPG_EXT_HDR_UNSUPP) && 567 sense_hdr.sense_key == ILLEGAL_REQUEST && 568 sense_hdr.asc == 0x24 && sense_hdr.ascq == 0) { 569 pg->flags |= ALUA_RTPG_EXT_HDR_UNSUPP; 570 goto retry; 571 } 572 /* 573 * Retry on ALUA state transition or if any 574 * UNIT ATTENTION occurred. 575 */ 576 if (sense_hdr.sense_key == NOT_READY && 577 sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a) 578 err = SCSI_DH_RETRY; 579 else if (sense_hdr.sense_key == UNIT_ATTENTION) 580 err = SCSI_DH_RETRY; 581 if (err == SCSI_DH_RETRY && 582 pg->expiry != 0 && time_before(jiffies, pg->expiry)) { 583 sdev_printk(KERN_ERR, sdev, "%s: rtpg retry\n", 584 ALUA_DH_NAME); 585 scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr); 586 kfree(buff); 587 return err; 588 } 589 sdev_printk(KERN_ERR, sdev, "%s: rtpg failed\n", 590 ALUA_DH_NAME); 591 scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr); 592 kfree(buff); 593 pg->expiry = 0; 594 return SCSI_DH_IO; 595 } 596 597 len = get_unaligned_be32(&buff[0]) + 4; 598 599 if (len > bufflen) { 600 /* Resubmit with the correct length */ 601 kfree(buff); 602 bufflen = len; 603 buff = kmalloc(bufflen, GFP_KERNEL); 604 if (!buff) { 605 sdev_printk(KERN_WARNING, sdev, 606 "%s: kmalloc buffer failed\n",__func__); 607 /* Temporary failure, bypass */ 608 pg->expiry = 0; 609 return SCSI_DH_DEV_TEMP_BUSY; 610 } 611 goto retry; 612 } 613 614 orig_transition_tmo = pg->transition_tmo; 615 if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR && buff[5] != 0) 616 pg->transition_tmo = buff[5]; 617 else 618 pg->transition_tmo = ALUA_FAILOVER_TIMEOUT; 619 620 if (orig_transition_tmo != pg->transition_tmo) { 621 sdev_printk(KERN_INFO, sdev, 622 "%s: transition timeout set to %d seconds\n", 623 ALUA_DH_NAME, pg->transition_tmo); 624 pg->expiry = jiffies + pg->transition_tmo * HZ; 625 } 626 627 if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR) 628 tpg_desc_tbl_off = 8; 629 else 630 tpg_desc_tbl_off = 4; 631 632 for (k = tpg_desc_tbl_off, desc = buff + tpg_desc_tbl_off; 633 k < len; 634 k += off, desc += off) { 635 u16 group_id = get_unaligned_be16(&desc[2]); 636 637 spin_lock_irqsave(&port_group_lock, flags); 638 tmp_pg = alua_find_get_pg(pg->device_id_str, pg->device_id_len, 639 group_id); 640 spin_unlock_irqrestore(&port_group_lock, flags); 641 if (tmp_pg) { 642 if (spin_trylock_irqsave(&tmp_pg->lock, flags)) { 643 if ((tmp_pg == pg) || 644 !(tmp_pg->flags & ALUA_PG_RUNNING)) { 645 struct alua_dh_data *h; 646 647 tmp_pg->state = desc[0] & 0x0f; 648 tmp_pg->pref = desc[0] >> 7; 649 rcu_read_lock(); 650 list_for_each_entry_rcu(h, 651 &tmp_pg->dh_list, node) { 652 /* h->sdev should always be valid */ 653 BUG_ON(!h->sdev); 654 h->sdev->access_state = desc[0]; 655 } 656 rcu_read_unlock(); 657 } 658 if (tmp_pg == pg) 659 valid_states = desc[1]; 660 spin_unlock_irqrestore(&tmp_pg->lock, flags); 661 } 662 kref_put(&tmp_pg->kref, release_port_group); 663 } 664 off = 8 + (desc[7] * 4); 665 } 666 667 spin_lock_irqsave(&pg->lock, flags); 668 sdev_printk(KERN_INFO, sdev, 669 "%s: port group %02x state %c %s supports %c%c%c%c%c%c%c\n", 670 ALUA_DH_NAME, pg->group_id, print_alua_state(pg->state), 671 pg->pref ? "preferred" : "non-preferred", 672 valid_states&TPGS_SUPPORT_TRANSITION?'T':'t', 673 valid_states&TPGS_SUPPORT_OFFLINE?'O':'o', 674 valid_states&TPGS_SUPPORT_LBA_DEPENDENT?'L':'l', 675 valid_states&TPGS_SUPPORT_UNAVAILABLE?'U':'u', 676 valid_states&TPGS_SUPPORT_STANDBY?'S':'s', 677 valid_states&TPGS_SUPPORT_NONOPTIMIZED?'N':'n', 678 valid_states&TPGS_SUPPORT_OPTIMIZED?'A':'a'); 679 680 switch (pg->state) { 681 case SCSI_ACCESS_STATE_TRANSITIONING: 682 if (time_before(jiffies, pg->expiry)) { 683 /* State transition, retry */ 684 pg->interval = 2; 685 err = SCSI_DH_RETRY; 686 } else { 687 struct alua_dh_data *h; 688 689 /* Transitioning time exceeded, set port to standby */ 690 err = SCSI_DH_IO; 691 pg->state = SCSI_ACCESS_STATE_STANDBY; 692 pg->expiry = 0; 693 rcu_read_lock(); 694 list_for_each_entry_rcu(h, &pg->dh_list, node) { 695 BUG_ON(!h->sdev); 696 h->sdev->access_state = 697 (pg->state & SCSI_ACCESS_STATE_MASK); 698 if (pg->pref) 699 h->sdev->access_state |= 700 SCSI_ACCESS_STATE_PREFERRED; 701 } 702 rcu_read_unlock(); 703 } 704 break; 705 case SCSI_ACCESS_STATE_OFFLINE: 706 /* Path unusable */ 707 err = SCSI_DH_DEV_OFFLINED; 708 pg->expiry = 0; 709 break; 710 default: 711 /* Useable path if active */ 712 err = SCSI_DH_OK; 713 pg->expiry = 0; 714 break; 715 } 716 spin_unlock_irqrestore(&pg->lock, flags); 717 kfree(buff); 718 return err; 719 } 720 721 /* 722 * alua_stpg - Issue a SET TARGET PORT GROUP command 723 * 724 * Issue a SET TARGET PORT GROUP command and evaluate the 725 * response. Returns SCSI_DH_RETRY per default to trigger 726 * a re-evaluation of the target group state or SCSI_DH_OK 727 * if no further action needs to be taken. 728 */ 729 static unsigned alua_stpg(struct scsi_device *sdev, struct alua_port_group *pg) 730 { 731 int retval; 732 struct scsi_sense_hdr sense_hdr; 733 734 if (!(pg->tpgs & TPGS_MODE_EXPLICIT)) { 735 /* Only implicit ALUA supported, retry */ 736 return SCSI_DH_RETRY; 737 } 738 switch (pg->state) { 739 case SCSI_ACCESS_STATE_OPTIMAL: 740 return SCSI_DH_OK; 741 case SCSI_ACCESS_STATE_ACTIVE: 742 if ((pg->flags & ALUA_OPTIMIZE_STPG) && 743 !pg->pref && 744 (pg->tpgs & TPGS_MODE_IMPLICIT)) 745 return SCSI_DH_OK; 746 break; 747 case SCSI_ACCESS_STATE_STANDBY: 748 case SCSI_ACCESS_STATE_UNAVAILABLE: 749 break; 750 case SCSI_ACCESS_STATE_OFFLINE: 751 return SCSI_DH_IO; 752 case SCSI_ACCESS_STATE_TRANSITIONING: 753 break; 754 default: 755 sdev_printk(KERN_INFO, sdev, 756 "%s: stpg failed, unhandled TPGS state %d", 757 ALUA_DH_NAME, pg->state); 758 return SCSI_DH_NOSYS; 759 } 760 retval = submit_stpg(sdev, pg->group_id, &sense_hdr); 761 762 if (retval) { 763 if (!scsi_sense_valid(&sense_hdr)) { 764 sdev_printk(KERN_INFO, sdev, 765 "%s: stpg failed, result %d", 766 ALUA_DH_NAME, retval); 767 if (driver_byte(retval) == DRIVER_ERROR) 768 return SCSI_DH_DEV_TEMP_BUSY; 769 } else { 770 sdev_printk(KERN_INFO, sdev, "%s: stpg failed\n", 771 ALUA_DH_NAME); 772 scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr); 773 } 774 } 775 /* Retry RTPG */ 776 return SCSI_DH_RETRY; 777 } 778 779 static void alua_rtpg_work(struct work_struct *work) 780 { 781 struct alua_port_group *pg = 782 container_of(work, struct alua_port_group, rtpg_work.work); 783 struct scsi_device *sdev; 784 LIST_HEAD(qdata_list); 785 int err = SCSI_DH_OK; 786 struct alua_queue_data *qdata, *tmp; 787 unsigned long flags; 788 struct workqueue_struct *alua_wq = kaluad_wq; 789 790 spin_lock_irqsave(&pg->lock, flags); 791 sdev = pg->rtpg_sdev; 792 if (!sdev) { 793 WARN_ON(pg->flags & ALUA_PG_RUN_RTPG); 794 WARN_ON(pg->flags & ALUA_PG_RUN_STPG); 795 spin_unlock_irqrestore(&pg->lock, flags); 796 kref_put(&pg->kref, release_port_group); 797 return; 798 } 799 if (pg->flags & ALUA_SYNC_STPG) 800 alua_wq = kaluad_sync_wq; 801 pg->flags |= ALUA_PG_RUNNING; 802 if (pg->flags & ALUA_PG_RUN_RTPG) { 803 int state = pg->state; 804 805 pg->flags &= ~ALUA_PG_RUN_RTPG; 806 spin_unlock_irqrestore(&pg->lock, flags); 807 if (state == SCSI_ACCESS_STATE_TRANSITIONING) { 808 if (alua_tur(sdev) == SCSI_DH_RETRY) { 809 spin_lock_irqsave(&pg->lock, flags); 810 pg->flags &= ~ALUA_PG_RUNNING; 811 pg->flags |= ALUA_PG_RUN_RTPG; 812 spin_unlock_irqrestore(&pg->lock, flags); 813 queue_delayed_work(alua_wq, &pg->rtpg_work, 814 pg->interval * HZ); 815 return; 816 } 817 /* Send RTPG on failure or if TUR indicates SUCCESS */ 818 } 819 err = alua_rtpg(sdev, pg); 820 spin_lock_irqsave(&pg->lock, flags); 821 if (err == SCSI_DH_RETRY || pg->flags & ALUA_PG_RUN_RTPG) { 822 pg->flags &= ~ALUA_PG_RUNNING; 823 pg->flags |= ALUA_PG_RUN_RTPG; 824 spin_unlock_irqrestore(&pg->lock, flags); 825 queue_delayed_work(alua_wq, &pg->rtpg_work, 826 pg->interval * HZ); 827 return; 828 } 829 if (err != SCSI_DH_OK) 830 pg->flags &= ~ALUA_PG_RUN_STPG; 831 } 832 if (pg->flags & ALUA_PG_RUN_STPG) { 833 pg->flags &= ~ALUA_PG_RUN_STPG; 834 spin_unlock_irqrestore(&pg->lock, flags); 835 err = alua_stpg(sdev, pg); 836 spin_lock_irqsave(&pg->lock, flags); 837 if (err == SCSI_DH_RETRY || pg->flags & ALUA_PG_RUN_RTPG) { 838 pg->flags |= ALUA_PG_RUN_RTPG; 839 pg->interval = 0; 840 pg->flags &= ~ALUA_PG_RUNNING; 841 spin_unlock_irqrestore(&pg->lock, flags); 842 queue_delayed_work(alua_wq, &pg->rtpg_work, 843 pg->interval * HZ); 844 return; 845 } 846 } 847 848 list_splice_init(&pg->rtpg_list, &qdata_list); 849 pg->rtpg_sdev = NULL; 850 spin_unlock_irqrestore(&pg->lock, flags); 851 852 list_for_each_entry_safe(qdata, tmp, &qdata_list, entry) { 853 list_del(&qdata->entry); 854 if (qdata->callback_fn) 855 qdata->callback_fn(qdata->callback_data, err); 856 kfree(qdata); 857 } 858 spin_lock_irqsave(&pg->lock, flags); 859 pg->flags &= ~ALUA_PG_RUNNING; 860 spin_unlock_irqrestore(&pg->lock, flags); 861 scsi_device_put(sdev); 862 kref_put(&pg->kref, release_port_group); 863 } 864 865 /** 866 * alua_rtpg_queue() - cause RTPG to be submitted asynchronously 867 * 868 * Returns true if and only if alua_rtpg_work() will be called asynchronously. 869 * That function is responsible for calling @qdata->fn(). 870 */ 871 static bool alua_rtpg_queue(struct alua_port_group *pg, 872 struct scsi_device *sdev, 873 struct alua_queue_data *qdata, bool force) 874 { 875 int start_queue = 0; 876 unsigned long flags; 877 struct workqueue_struct *alua_wq = kaluad_wq; 878 879 if (WARN_ON_ONCE(!pg) || scsi_device_get(sdev)) 880 return false; 881 882 spin_lock_irqsave(&pg->lock, flags); 883 if (qdata) { 884 list_add_tail(&qdata->entry, &pg->rtpg_list); 885 pg->flags |= ALUA_PG_RUN_STPG; 886 force = true; 887 } 888 if (pg->rtpg_sdev == NULL) { 889 pg->interval = 0; 890 pg->flags |= ALUA_PG_RUN_RTPG; 891 kref_get(&pg->kref); 892 pg->rtpg_sdev = sdev; 893 start_queue = 1; 894 } else if (!(pg->flags & ALUA_PG_RUN_RTPG) && force) { 895 pg->flags |= ALUA_PG_RUN_RTPG; 896 /* Do not queue if the worker is already running */ 897 if (!(pg->flags & ALUA_PG_RUNNING)) { 898 kref_get(&pg->kref); 899 start_queue = 1; 900 } 901 } 902 903 if (pg->flags & ALUA_SYNC_STPG) 904 alua_wq = kaluad_sync_wq; 905 spin_unlock_irqrestore(&pg->lock, flags); 906 907 if (start_queue) { 908 if (queue_delayed_work(alua_wq, &pg->rtpg_work, 909 msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS))) 910 sdev = NULL; 911 else 912 kref_put(&pg->kref, release_port_group); 913 } 914 if (sdev) 915 scsi_device_put(sdev); 916 917 return true; 918 } 919 920 /* 921 * alua_initialize - Initialize ALUA state 922 * @sdev: the device to be initialized 923 * 924 * For the prep_fn to work correctly we have 925 * to initialize the ALUA state for the device. 926 */ 927 static int alua_initialize(struct scsi_device *sdev, struct alua_dh_data *h) 928 { 929 int err = SCSI_DH_DEV_UNSUPP, tpgs; 930 931 mutex_lock(&h->init_mutex); 932 tpgs = alua_check_tpgs(sdev); 933 if (tpgs != TPGS_MODE_NONE) 934 err = alua_check_vpd(sdev, h, tpgs); 935 h->init_error = err; 936 mutex_unlock(&h->init_mutex); 937 return err; 938 } 939 /* 940 * alua_set_params - set/unset the optimize flag 941 * @sdev: device on the path to be activated 942 * params - parameters in the following format 943 * "no_of_params\0param1\0param2\0param3\0...\0" 944 * For example, to set the flag pass the following parameters 945 * from multipath.conf 946 * hardware_handler "2 alua 1" 947 */ 948 static int alua_set_params(struct scsi_device *sdev, const char *params) 949 { 950 struct alua_dh_data *h = sdev->handler_data; 951 struct alua_port_group *pg = NULL; 952 unsigned int optimize = 0, argc; 953 const char *p = params; 954 int result = SCSI_DH_OK; 955 unsigned long flags; 956 957 if ((sscanf(params, "%u", &argc) != 1) || (argc != 1)) 958 return -EINVAL; 959 960 while (*p++) 961 ; 962 if ((sscanf(p, "%u", &optimize) != 1) || (optimize > 1)) 963 return -EINVAL; 964 965 rcu_read_lock(); 966 pg = rcu_dereference(h->pg); 967 if (!pg) { 968 rcu_read_unlock(); 969 return -ENXIO; 970 } 971 spin_lock_irqsave(&pg->lock, flags); 972 if (optimize) 973 pg->flags |= ALUA_OPTIMIZE_STPG; 974 else 975 pg->flags &= ~ALUA_OPTIMIZE_STPG; 976 spin_unlock_irqrestore(&pg->lock, flags); 977 rcu_read_unlock(); 978 979 return result; 980 } 981 982 /* 983 * alua_activate - activate a path 984 * @sdev: device on the path to be activated 985 * 986 * We're currently switching the port group to be activated only and 987 * let the array figure out the rest. 988 * There may be other arrays which require us to switch all port groups 989 * based on a certain policy. But until we actually encounter them it 990 * should be okay. 991 */ 992 static int alua_activate(struct scsi_device *sdev, 993 activate_complete fn, void *data) 994 { 995 struct alua_dh_data *h = sdev->handler_data; 996 int err = SCSI_DH_OK; 997 struct alua_queue_data *qdata; 998 struct alua_port_group *pg; 999 1000 qdata = kzalloc(sizeof(*qdata), GFP_KERNEL); 1001 if (!qdata) { 1002 err = SCSI_DH_RES_TEMP_UNAVAIL; 1003 goto out; 1004 } 1005 qdata->callback_fn = fn; 1006 qdata->callback_data = data; 1007 1008 mutex_lock(&h->init_mutex); 1009 rcu_read_lock(); 1010 pg = rcu_dereference(h->pg); 1011 if (!pg || !kref_get_unless_zero(&pg->kref)) { 1012 rcu_read_unlock(); 1013 kfree(qdata); 1014 err = h->init_error; 1015 mutex_unlock(&h->init_mutex); 1016 goto out; 1017 } 1018 rcu_read_unlock(); 1019 mutex_unlock(&h->init_mutex); 1020 1021 if (alua_rtpg_queue(pg, sdev, qdata, true)) 1022 fn = NULL; 1023 else 1024 err = SCSI_DH_DEV_OFFLINED; 1025 kref_put(&pg->kref, release_port_group); 1026 out: 1027 if (fn) 1028 fn(data, err); 1029 return 0; 1030 } 1031 1032 /* 1033 * alua_check - check path status 1034 * @sdev: device on the path to be checked 1035 * 1036 * Check the device status 1037 */ 1038 static void alua_check(struct scsi_device *sdev, bool force) 1039 { 1040 struct alua_dh_data *h = sdev->handler_data; 1041 struct alua_port_group *pg; 1042 1043 rcu_read_lock(); 1044 pg = rcu_dereference(h->pg); 1045 if (!pg || !kref_get_unless_zero(&pg->kref)) { 1046 rcu_read_unlock(); 1047 return; 1048 } 1049 rcu_read_unlock(); 1050 1051 alua_rtpg_queue(pg, sdev, NULL, force); 1052 kref_put(&pg->kref, release_port_group); 1053 } 1054 1055 /* 1056 * alua_prep_fn - request callback 1057 * 1058 * Fail I/O to all paths not in state 1059 * active/optimized or active/non-optimized. 1060 */ 1061 static int alua_prep_fn(struct scsi_device *sdev, struct request *req) 1062 { 1063 struct alua_dh_data *h = sdev->handler_data; 1064 struct alua_port_group *pg; 1065 unsigned char state = SCSI_ACCESS_STATE_OPTIMAL; 1066 int ret = BLKPREP_OK; 1067 1068 rcu_read_lock(); 1069 pg = rcu_dereference(h->pg); 1070 if (pg) 1071 state = pg->state; 1072 rcu_read_unlock(); 1073 if (state == SCSI_ACCESS_STATE_TRANSITIONING) 1074 ret = BLKPREP_DEFER; 1075 else if (state != SCSI_ACCESS_STATE_OPTIMAL && 1076 state != SCSI_ACCESS_STATE_ACTIVE && 1077 state != SCSI_ACCESS_STATE_LBA) { 1078 ret = BLKPREP_KILL; 1079 req->rq_flags |= RQF_QUIET; 1080 } 1081 return ret; 1082 1083 } 1084 1085 static void alua_rescan(struct scsi_device *sdev) 1086 { 1087 struct alua_dh_data *h = sdev->handler_data; 1088 1089 alua_initialize(sdev, h); 1090 } 1091 1092 /* 1093 * alua_bus_attach - Attach device handler 1094 * @sdev: device to be attached to 1095 */ 1096 static int alua_bus_attach(struct scsi_device *sdev) 1097 { 1098 struct alua_dh_data *h; 1099 int err, ret = -EINVAL; 1100 1101 h = kzalloc(sizeof(*h) , GFP_KERNEL); 1102 if (!h) 1103 return -ENOMEM; 1104 spin_lock_init(&h->pg_lock); 1105 rcu_assign_pointer(h->pg, NULL); 1106 h->init_error = SCSI_DH_OK; 1107 h->sdev = sdev; 1108 INIT_LIST_HEAD(&h->node); 1109 1110 mutex_init(&h->init_mutex); 1111 err = alua_initialize(sdev, h); 1112 if (err == SCSI_DH_NOMEM) 1113 ret = -ENOMEM; 1114 if (err != SCSI_DH_OK && err != SCSI_DH_DEV_OFFLINED) 1115 goto failed; 1116 1117 sdev->handler_data = h; 1118 return 0; 1119 failed: 1120 kfree(h); 1121 return ret; 1122 } 1123 1124 /* 1125 * alua_bus_detach - Detach device handler 1126 * @sdev: device to be detached from 1127 */ 1128 static void alua_bus_detach(struct scsi_device *sdev) 1129 { 1130 struct alua_dh_data *h = sdev->handler_data; 1131 struct alua_port_group *pg; 1132 1133 spin_lock(&h->pg_lock); 1134 pg = rcu_dereference_protected(h->pg, lockdep_is_held(&h->pg_lock)); 1135 rcu_assign_pointer(h->pg, NULL); 1136 h->sdev = NULL; 1137 spin_unlock(&h->pg_lock); 1138 if (pg) { 1139 spin_lock_irq(&pg->lock); 1140 list_del_rcu(&h->node); 1141 spin_unlock_irq(&pg->lock); 1142 kref_put(&pg->kref, release_port_group); 1143 } 1144 sdev->handler_data = NULL; 1145 kfree(h); 1146 } 1147 1148 static struct scsi_device_handler alua_dh = { 1149 .name = ALUA_DH_NAME, 1150 .module = THIS_MODULE, 1151 .attach = alua_bus_attach, 1152 .detach = alua_bus_detach, 1153 .prep_fn = alua_prep_fn, 1154 .check_sense = alua_check_sense, 1155 .activate = alua_activate, 1156 .rescan = alua_rescan, 1157 .set_params = alua_set_params, 1158 }; 1159 1160 static int __init alua_init(void) 1161 { 1162 int r; 1163 1164 kaluad_wq = alloc_workqueue("kaluad", WQ_MEM_RECLAIM, 0); 1165 if (!kaluad_wq) { 1166 /* Temporary failure, bypass */ 1167 return SCSI_DH_DEV_TEMP_BUSY; 1168 } 1169 kaluad_sync_wq = create_workqueue("kaluad_sync"); 1170 if (!kaluad_sync_wq) { 1171 destroy_workqueue(kaluad_wq); 1172 return SCSI_DH_DEV_TEMP_BUSY; 1173 } 1174 r = scsi_register_device_handler(&alua_dh); 1175 if (r != 0) { 1176 printk(KERN_ERR "%s: Failed to register scsi device handler", 1177 ALUA_DH_NAME); 1178 destroy_workqueue(kaluad_sync_wq); 1179 destroy_workqueue(kaluad_wq); 1180 } 1181 return r; 1182 } 1183 1184 static void __exit alua_exit(void) 1185 { 1186 scsi_unregister_device_handler(&alua_dh); 1187 destroy_workqueue(kaluad_sync_wq); 1188 destroy_workqueue(kaluad_wq); 1189 } 1190 1191 module_init(alua_init); 1192 module_exit(alua_exit); 1193 1194 MODULE_DESCRIPTION("DM Multipath ALUA support"); 1195 MODULE_AUTHOR("Hannes Reinecke <hare@suse.de>"); 1196 MODULE_LICENSE("GPL"); 1197 MODULE_VERSION(ALUA_DH_VER); 1198