1 /* 2 * Generic SCSI-3 ALUA SCSI Device Handler 3 * 4 * Copyright (C) 2007-2010 Hannes Reinecke, SUSE Linux Products GmbH. 5 * All rights reserved. 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 20 * 21 */ 22 #include <linux/slab.h> 23 #include <linux/delay.h> 24 #include <linux/module.h> 25 #include <asm/unaligned.h> 26 #include <scsi/scsi.h> 27 #include <scsi/scsi_proto.h> 28 #include <scsi/scsi_dbg.h> 29 #include <scsi/scsi_eh.h> 30 #include <scsi/scsi_dh.h> 31 32 #define ALUA_DH_NAME "alua" 33 #define ALUA_DH_VER "2.0" 34 35 #define TPGS_SUPPORT_NONE 0x00 36 #define TPGS_SUPPORT_OPTIMIZED 0x01 37 #define TPGS_SUPPORT_NONOPTIMIZED 0x02 38 #define TPGS_SUPPORT_STANDBY 0x04 39 #define TPGS_SUPPORT_UNAVAILABLE 0x08 40 #define TPGS_SUPPORT_LBA_DEPENDENT 0x10 41 #define TPGS_SUPPORT_OFFLINE 0x40 42 #define TPGS_SUPPORT_TRANSITION 0x80 43 44 #define RTPG_FMT_MASK 0x70 45 #define RTPG_FMT_EXT_HDR 0x10 46 47 #define TPGS_MODE_UNINITIALIZED -1 48 #define TPGS_MODE_NONE 0x0 49 #define TPGS_MODE_IMPLICIT 0x1 50 #define TPGS_MODE_EXPLICIT 0x2 51 52 #define ALUA_RTPG_SIZE 128 53 #define ALUA_FAILOVER_TIMEOUT 60 54 #define ALUA_FAILOVER_RETRIES 5 55 #define ALUA_RTPG_DELAY_MSECS 5 56 57 /* device handler flags */ 58 #define ALUA_OPTIMIZE_STPG 0x01 59 #define ALUA_RTPG_EXT_HDR_UNSUPP 0x02 60 #define ALUA_SYNC_STPG 0x04 61 /* State machine flags */ 62 #define ALUA_PG_RUN_RTPG 0x10 63 #define ALUA_PG_RUN_STPG 0x20 64 #define ALUA_PG_RUNNING 0x40 65 66 static uint optimize_stpg; 67 module_param(optimize_stpg, uint, S_IRUGO|S_IWUSR); 68 MODULE_PARM_DESC(optimize_stpg, "Allow use of a non-optimized path, rather than sending a STPG, when implicit TPGS is supported (0=No,1=Yes). Default is 0."); 69 70 static LIST_HEAD(port_group_list); 71 static DEFINE_SPINLOCK(port_group_lock); 72 static struct workqueue_struct *kaluad_wq; 73 static struct workqueue_struct *kaluad_sync_wq; 74 75 struct alua_port_group { 76 struct kref kref; 77 struct rcu_head rcu; 78 struct list_head node; 79 struct list_head dh_list; 80 unsigned char device_id_str[256]; 81 int device_id_len; 82 int group_id; 83 int tpgs; 84 int state; 85 int pref; 86 unsigned flags; /* used for optimizing STPG */ 87 unsigned char transition_tmo; 88 unsigned long expiry; 89 unsigned long interval; 90 struct delayed_work rtpg_work; 91 spinlock_t lock; 92 struct list_head rtpg_list; 93 struct scsi_device *rtpg_sdev; 94 }; 95 96 struct alua_dh_data { 97 struct list_head node; 98 struct alua_port_group *pg; 99 int group_id; 100 spinlock_t pg_lock; 101 struct scsi_device *sdev; 102 int init_error; 103 struct mutex init_mutex; 104 }; 105 106 struct alua_queue_data { 107 struct list_head entry; 108 activate_complete callback_fn; 109 void *callback_data; 110 }; 111 112 #define ALUA_POLICY_SWITCH_CURRENT 0 113 #define ALUA_POLICY_SWITCH_ALL 1 114 115 static void alua_rtpg_work(struct work_struct *work); 116 static void alua_rtpg_queue(struct alua_port_group *pg, 117 struct scsi_device *sdev, 118 struct alua_queue_data *qdata, bool force); 119 static void alua_check(struct scsi_device *sdev, bool force); 120 121 static void release_port_group(struct kref *kref) 122 { 123 struct alua_port_group *pg; 124 125 pg = container_of(kref, struct alua_port_group, kref); 126 if (pg->rtpg_sdev) 127 flush_delayed_work(&pg->rtpg_work); 128 spin_lock(&port_group_lock); 129 list_del(&pg->node); 130 spin_unlock(&port_group_lock); 131 kfree_rcu(pg, rcu); 132 } 133 134 /* 135 * submit_rtpg - Issue a REPORT TARGET GROUP STATES command 136 * @sdev: sdev the command should be sent to 137 */ 138 static int submit_rtpg(struct scsi_device *sdev, unsigned char *buff, 139 int bufflen, struct scsi_sense_hdr *sshdr, int flags) 140 { 141 u8 cdb[COMMAND_SIZE(MAINTENANCE_IN)]; 142 int req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | 143 REQ_FAILFAST_DRIVER; 144 145 /* Prepare the command. */ 146 memset(cdb, 0x0, COMMAND_SIZE(MAINTENANCE_IN)); 147 cdb[0] = MAINTENANCE_IN; 148 if (!(flags & ALUA_RTPG_EXT_HDR_UNSUPP)) 149 cdb[1] = MI_REPORT_TARGET_PGS | MI_EXT_HDR_PARAM_FMT; 150 else 151 cdb[1] = MI_REPORT_TARGET_PGS; 152 put_unaligned_be32(bufflen, &cdb[6]); 153 154 return scsi_execute_req_flags(sdev, cdb, DMA_FROM_DEVICE, 155 buff, bufflen, sshdr, 156 ALUA_FAILOVER_TIMEOUT * HZ, 157 ALUA_FAILOVER_RETRIES, NULL, req_flags); 158 } 159 160 /* 161 * submit_stpg - Issue a SET TARGET PORT GROUP command 162 * 163 * Currently we're only setting the current target port group state 164 * to 'active/optimized' and let the array firmware figure out 165 * the states of the remaining groups. 166 */ 167 static int submit_stpg(struct scsi_device *sdev, int group_id, 168 struct scsi_sense_hdr *sshdr) 169 { 170 u8 cdb[COMMAND_SIZE(MAINTENANCE_OUT)]; 171 unsigned char stpg_data[8]; 172 int stpg_len = 8; 173 int req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | 174 REQ_FAILFAST_DRIVER; 175 176 /* Prepare the data buffer */ 177 memset(stpg_data, 0, stpg_len); 178 stpg_data[4] = SCSI_ACCESS_STATE_OPTIMAL; 179 put_unaligned_be16(group_id, &stpg_data[6]); 180 181 /* Prepare the command. */ 182 memset(cdb, 0x0, COMMAND_SIZE(MAINTENANCE_OUT)); 183 cdb[0] = MAINTENANCE_OUT; 184 cdb[1] = MO_SET_TARGET_PGS; 185 put_unaligned_be32(stpg_len, &cdb[6]); 186 187 return scsi_execute_req_flags(sdev, cdb, DMA_TO_DEVICE, 188 stpg_data, stpg_len, 189 sshdr, ALUA_FAILOVER_TIMEOUT * HZ, 190 ALUA_FAILOVER_RETRIES, NULL, req_flags); 191 } 192 193 struct alua_port_group *alua_find_get_pg(char *id_str, size_t id_size, 194 int group_id) 195 { 196 struct alua_port_group *pg; 197 198 list_for_each_entry(pg, &port_group_list, node) { 199 if (pg->group_id != group_id) 200 continue; 201 if (pg->device_id_len != id_size) 202 continue; 203 if (strncmp(pg->device_id_str, id_str, id_size)) 204 continue; 205 if (!kref_get_unless_zero(&pg->kref)) 206 continue; 207 return pg; 208 } 209 210 return NULL; 211 } 212 213 /* 214 * alua_alloc_pg - Allocate a new port_group structure 215 * @sdev: scsi device 216 * @h: alua device_handler data 217 * @group_id: port group id 218 * 219 * Allocate a new port_group structure for a given 220 * device. 221 */ 222 struct alua_port_group *alua_alloc_pg(struct scsi_device *sdev, 223 int group_id, int tpgs) 224 { 225 struct alua_port_group *pg, *tmp_pg; 226 227 pg = kzalloc(sizeof(struct alua_port_group), GFP_KERNEL); 228 if (!pg) 229 return ERR_PTR(-ENOMEM); 230 231 pg->device_id_len = scsi_vpd_lun_id(sdev, pg->device_id_str, 232 sizeof(pg->device_id_str)); 233 if (pg->device_id_len <= 0) { 234 /* 235 * Internal error: TPGS supported but no device 236 * identifcation found. Disable ALUA support. 237 */ 238 kfree(pg); 239 sdev_printk(KERN_INFO, sdev, 240 "%s: No device descriptors found\n", 241 ALUA_DH_NAME); 242 return ERR_PTR(-ENXIO); 243 } 244 pg->group_id = group_id; 245 pg->tpgs = tpgs; 246 pg->state = SCSI_ACCESS_STATE_OPTIMAL; 247 if (optimize_stpg) 248 pg->flags |= ALUA_OPTIMIZE_STPG; 249 kref_init(&pg->kref); 250 INIT_DELAYED_WORK(&pg->rtpg_work, alua_rtpg_work); 251 INIT_LIST_HEAD(&pg->rtpg_list); 252 INIT_LIST_HEAD(&pg->node); 253 INIT_LIST_HEAD(&pg->dh_list); 254 spin_lock_init(&pg->lock); 255 256 spin_lock(&port_group_lock); 257 tmp_pg = alua_find_get_pg(pg->device_id_str, pg->device_id_len, 258 group_id); 259 if (tmp_pg) { 260 spin_unlock(&port_group_lock); 261 kfree(pg); 262 return tmp_pg; 263 } 264 265 list_add(&pg->node, &port_group_list); 266 spin_unlock(&port_group_lock); 267 268 return pg; 269 } 270 271 /* 272 * alua_check_tpgs - Evaluate TPGS setting 273 * @sdev: device to be checked 274 * 275 * Examine the TPGS setting of the sdev to find out if ALUA 276 * is supported. 277 */ 278 static int alua_check_tpgs(struct scsi_device *sdev) 279 { 280 int tpgs = TPGS_MODE_NONE; 281 282 /* 283 * ALUA support for non-disk devices is fraught with 284 * difficulties, so disable it for now. 285 */ 286 if (sdev->type != TYPE_DISK) { 287 sdev_printk(KERN_INFO, sdev, 288 "%s: disable for non-disk devices\n", 289 ALUA_DH_NAME); 290 return tpgs; 291 } 292 293 tpgs = scsi_device_tpgs(sdev); 294 switch (tpgs) { 295 case TPGS_MODE_EXPLICIT|TPGS_MODE_IMPLICIT: 296 sdev_printk(KERN_INFO, sdev, 297 "%s: supports implicit and explicit TPGS\n", 298 ALUA_DH_NAME); 299 break; 300 case TPGS_MODE_EXPLICIT: 301 sdev_printk(KERN_INFO, sdev, "%s: supports explicit TPGS\n", 302 ALUA_DH_NAME); 303 break; 304 case TPGS_MODE_IMPLICIT: 305 sdev_printk(KERN_INFO, sdev, "%s: supports implicit TPGS\n", 306 ALUA_DH_NAME); 307 break; 308 case TPGS_MODE_NONE: 309 sdev_printk(KERN_INFO, sdev, "%s: not supported\n", 310 ALUA_DH_NAME); 311 break; 312 default: 313 sdev_printk(KERN_INFO, sdev, 314 "%s: unsupported TPGS setting %d\n", 315 ALUA_DH_NAME, tpgs); 316 tpgs = TPGS_MODE_NONE; 317 break; 318 } 319 320 return tpgs; 321 } 322 323 /* 324 * alua_check_vpd - Evaluate INQUIRY vpd page 0x83 325 * @sdev: device to be checked 326 * 327 * Extract the relative target port and the target port group 328 * descriptor from the list of identificators. 329 */ 330 static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h, 331 int tpgs) 332 { 333 int rel_port = -1, group_id; 334 struct alua_port_group *pg, *old_pg = NULL; 335 bool pg_updated = false; 336 unsigned long flags; 337 338 group_id = scsi_vpd_tpg_id(sdev, &rel_port); 339 if (group_id < 0) { 340 /* 341 * Internal error; TPGS supported but required 342 * VPD identification descriptors not present. 343 * Disable ALUA support 344 */ 345 sdev_printk(KERN_INFO, sdev, 346 "%s: No target port descriptors found\n", 347 ALUA_DH_NAME); 348 return SCSI_DH_DEV_UNSUPP; 349 } 350 351 pg = alua_alloc_pg(sdev, group_id, tpgs); 352 if (IS_ERR(pg)) { 353 if (PTR_ERR(pg) == -ENOMEM) 354 return SCSI_DH_NOMEM; 355 return SCSI_DH_DEV_UNSUPP; 356 } 357 sdev_printk(KERN_INFO, sdev, 358 "%s: device %s port group %x rel port %x\n", 359 ALUA_DH_NAME, pg->device_id_str, group_id, rel_port); 360 361 /* Check for existing port group references */ 362 spin_lock(&h->pg_lock); 363 old_pg = h->pg; 364 if (old_pg != pg) { 365 /* port group has changed. Update to new port group */ 366 if (h->pg) { 367 spin_lock_irqsave(&old_pg->lock, flags); 368 list_del_rcu(&h->node); 369 spin_unlock_irqrestore(&old_pg->lock, flags); 370 } 371 rcu_assign_pointer(h->pg, pg); 372 pg_updated = true; 373 } 374 375 spin_lock_irqsave(&pg->lock, flags); 376 if (sdev->synchronous_alua) 377 pg->flags |= ALUA_SYNC_STPG; 378 if (pg_updated) 379 list_add_rcu(&h->node, &pg->dh_list); 380 spin_unlock_irqrestore(&pg->lock, flags); 381 382 alua_rtpg_queue(h->pg, sdev, NULL, true); 383 spin_unlock(&h->pg_lock); 384 385 if (old_pg) 386 kref_put(&old_pg->kref, release_port_group); 387 388 return SCSI_DH_OK; 389 } 390 391 static char print_alua_state(unsigned char state) 392 { 393 switch (state) { 394 case SCSI_ACCESS_STATE_OPTIMAL: 395 return 'A'; 396 case SCSI_ACCESS_STATE_ACTIVE: 397 return 'N'; 398 case SCSI_ACCESS_STATE_STANDBY: 399 return 'S'; 400 case SCSI_ACCESS_STATE_UNAVAILABLE: 401 return 'U'; 402 case SCSI_ACCESS_STATE_LBA: 403 return 'L'; 404 case SCSI_ACCESS_STATE_OFFLINE: 405 return 'O'; 406 case SCSI_ACCESS_STATE_TRANSITIONING: 407 return 'T'; 408 default: 409 return 'X'; 410 } 411 } 412 413 static int alua_check_sense(struct scsi_device *sdev, 414 struct scsi_sense_hdr *sense_hdr) 415 { 416 switch (sense_hdr->sense_key) { 417 case NOT_READY: 418 if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a) { 419 /* 420 * LUN Not Accessible - ALUA state transition 421 */ 422 alua_check(sdev, false); 423 return NEEDS_RETRY; 424 } 425 break; 426 case UNIT_ATTENTION: 427 if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00) { 428 /* 429 * Power On, Reset, or Bus Device Reset. 430 * Might have obscured a state transition, 431 * so schedule a recheck. 432 */ 433 alua_check(sdev, true); 434 return ADD_TO_MLQUEUE; 435 } 436 if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x04) 437 /* 438 * Device internal reset 439 */ 440 return ADD_TO_MLQUEUE; 441 if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x01) 442 /* 443 * Mode Parameters Changed 444 */ 445 return ADD_TO_MLQUEUE; 446 if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x06) { 447 /* 448 * ALUA state changed 449 */ 450 alua_check(sdev, true); 451 return ADD_TO_MLQUEUE; 452 } 453 if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x07) { 454 /* 455 * Implicit ALUA state transition failed 456 */ 457 alua_check(sdev, true); 458 return ADD_TO_MLQUEUE; 459 } 460 if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x03) 461 /* 462 * Inquiry data has changed 463 */ 464 return ADD_TO_MLQUEUE; 465 if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x0e) 466 /* 467 * REPORTED_LUNS_DATA_HAS_CHANGED is reported 468 * when switching controllers on targets like 469 * Intel Multi-Flex. We can just retry. 470 */ 471 return ADD_TO_MLQUEUE; 472 break; 473 } 474 475 return SCSI_RETURN_NOT_HANDLED; 476 } 477 478 /* 479 * alua_tur - Send a TEST UNIT READY 480 * @sdev: device to which the TEST UNIT READY command should be send 481 * 482 * Send a TEST UNIT READY to @sdev to figure out the device state 483 * Returns SCSI_DH_RETRY if the sense code is NOT READY/ALUA TRANSITIONING, 484 * SCSI_DH_OK if no error occurred, and SCSI_DH_IO otherwise. 485 */ 486 static int alua_tur(struct scsi_device *sdev) 487 { 488 struct scsi_sense_hdr sense_hdr; 489 int retval; 490 491 retval = scsi_test_unit_ready(sdev, ALUA_FAILOVER_TIMEOUT * HZ, 492 ALUA_FAILOVER_RETRIES, &sense_hdr); 493 if (sense_hdr.sense_key == NOT_READY && 494 sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a) 495 return SCSI_DH_RETRY; 496 else if (retval) 497 return SCSI_DH_IO; 498 else 499 return SCSI_DH_OK; 500 } 501 502 /* 503 * alua_rtpg - Evaluate REPORT TARGET GROUP STATES 504 * @sdev: the device to be evaluated. 505 * 506 * Evaluate the Target Port Group State. 507 * Returns SCSI_DH_DEV_OFFLINED if the path is 508 * found to be unusable. 509 */ 510 static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg) 511 { 512 struct scsi_sense_hdr sense_hdr; 513 struct alua_port_group *tmp_pg; 514 int len, k, off, valid_states = 0, bufflen = ALUA_RTPG_SIZE; 515 unsigned char *desc, *buff; 516 unsigned err, retval; 517 unsigned int tpg_desc_tbl_off; 518 unsigned char orig_transition_tmo; 519 unsigned long flags; 520 521 if (!pg->expiry) { 522 unsigned long transition_tmo = ALUA_FAILOVER_TIMEOUT * HZ; 523 524 if (pg->transition_tmo) 525 transition_tmo = pg->transition_tmo * HZ; 526 527 pg->expiry = round_jiffies_up(jiffies + transition_tmo); 528 } 529 530 buff = kzalloc(bufflen, GFP_KERNEL); 531 if (!buff) 532 return SCSI_DH_DEV_TEMP_BUSY; 533 534 retry: 535 err = 0; 536 retval = submit_rtpg(sdev, buff, bufflen, &sense_hdr, pg->flags); 537 538 if (retval) { 539 if (!scsi_sense_valid(&sense_hdr)) { 540 sdev_printk(KERN_INFO, sdev, 541 "%s: rtpg failed, result %d\n", 542 ALUA_DH_NAME, retval); 543 kfree(buff); 544 if (driver_byte(retval) == DRIVER_ERROR) 545 return SCSI_DH_DEV_TEMP_BUSY; 546 return SCSI_DH_IO; 547 } 548 549 /* 550 * submit_rtpg() has failed on existing arrays 551 * when requesting extended header info, and 552 * the array doesn't support extended headers, 553 * even though it shouldn't according to T10. 554 * The retry without rtpg_ext_hdr_req set 555 * handles this. 556 */ 557 if (!(pg->flags & ALUA_RTPG_EXT_HDR_UNSUPP) && 558 sense_hdr.sense_key == ILLEGAL_REQUEST && 559 sense_hdr.asc == 0x24 && sense_hdr.ascq == 0) { 560 pg->flags |= ALUA_RTPG_EXT_HDR_UNSUPP; 561 goto retry; 562 } 563 /* 564 * Retry on ALUA state transition or if any 565 * UNIT ATTENTION occurred. 566 */ 567 if (sense_hdr.sense_key == NOT_READY && 568 sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a) 569 err = SCSI_DH_RETRY; 570 else if (sense_hdr.sense_key == UNIT_ATTENTION) 571 err = SCSI_DH_RETRY; 572 if (err == SCSI_DH_RETRY && 573 pg->expiry != 0 && time_before(jiffies, pg->expiry)) { 574 sdev_printk(KERN_ERR, sdev, "%s: rtpg retry\n", 575 ALUA_DH_NAME); 576 scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr); 577 return err; 578 } 579 sdev_printk(KERN_ERR, sdev, "%s: rtpg failed\n", 580 ALUA_DH_NAME); 581 scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr); 582 kfree(buff); 583 pg->expiry = 0; 584 return SCSI_DH_IO; 585 } 586 587 len = get_unaligned_be32(&buff[0]) + 4; 588 589 if (len > bufflen) { 590 /* Resubmit with the correct length */ 591 kfree(buff); 592 bufflen = len; 593 buff = kmalloc(bufflen, GFP_KERNEL); 594 if (!buff) { 595 sdev_printk(KERN_WARNING, sdev, 596 "%s: kmalloc buffer failed\n",__func__); 597 /* Temporary failure, bypass */ 598 pg->expiry = 0; 599 return SCSI_DH_DEV_TEMP_BUSY; 600 } 601 goto retry; 602 } 603 604 orig_transition_tmo = pg->transition_tmo; 605 if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR && buff[5] != 0) 606 pg->transition_tmo = buff[5]; 607 else 608 pg->transition_tmo = ALUA_FAILOVER_TIMEOUT; 609 610 if (orig_transition_tmo != pg->transition_tmo) { 611 sdev_printk(KERN_INFO, sdev, 612 "%s: transition timeout set to %d seconds\n", 613 ALUA_DH_NAME, pg->transition_tmo); 614 pg->expiry = jiffies + pg->transition_tmo * HZ; 615 } 616 617 if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR) 618 tpg_desc_tbl_off = 8; 619 else 620 tpg_desc_tbl_off = 4; 621 622 for (k = tpg_desc_tbl_off, desc = buff + tpg_desc_tbl_off; 623 k < len; 624 k += off, desc += off) { 625 u16 group_id = get_unaligned_be16(&desc[2]); 626 627 spin_lock_irqsave(&port_group_lock, flags); 628 tmp_pg = alua_find_get_pg(pg->device_id_str, pg->device_id_len, 629 group_id); 630 spin_unlock_irqrestore(&port_group_lock, flags); 631 if (tmp_pg) { 632 if (spin_trylock_irqsave(&tmp_pg->lock, flags)) { 633 if ((tmp_pg == pg) || 634 !(tmp_pg->flags & ALUA_PG_RUNNING)) { 635 struct alua_dh_data *h; 636 637 tmp_pg->state = desc[0] & 0x0f; 638 tmp_pg->pref = desc[0] >> 7; 639 rcu_read_lock(); 640 list_for_each_entry_rcu(h, 641 &tmp_pg->dh_list, node) { 642 /* h->sdev should always be valid */ 643 BUG_ON(!h->sdev); 644 h->sdev->access_state = desc[0]; 645 } 646 rcu_read_unlock(); 647 } 648 if (tmp_pg == pg) 649 valid_states = desc[1]; 650 spin_unlock_irqrestore(&tmp_pg->lock, flags); 651 } 652 kref_put(&tmp_pg->kref, release_port_group); 653 } 654 off = 8 + (desc[7] * 4); 655 } 656 657 spin_lock_irqsave(&pg->lock, flags); 658 sdev_printk(KERN_INFO, sdev, 659 "%s: port group %02x state %c %s supports %c%c%c%c%c%c%c\n", 660 ALUA_DH_NAME, pg->group_id, print_alua_state(pg->state), 661 pg->pref ? "preferred" : "non-preferred", 662 valid_states&TPGS_SUPPORT_TRANSITION?'T':'t', 663 valid_states&TPGS_SUPPORT_OFFLINE?'O':'o', 664 valid_states&TPGS_SUPPORT_LBA_DEPENDENT?'L':'l', 665 valid_states&TPGS_SUPPORT_UNAVAILABLE?'U':'u', 666 valid_states&TPGS_SUPPORT_STANDBY?'S':'s', 667 valid_states&TPGS_SUPPORT_NONOPTIMIZED?'N':'n', 668 valid_states&TPGS_SUPPORT_OPTIMIZED?'A':'a'); 669 670 switch (pg->state) { 671 case SCSI_ACCESS_STATE_TRANSITIONING: 672 if (time_before(jiffies, pg->expiry)) { 673 /* State transition, retry */ 674 pg->interval = 2; 675 err = SCSI_DH_RETRY; 676 } else { 677 struct alua_dh_data *h; 678 679 /* Transitioning time exceeded, set port to standby */ 680 err = SCSI_DH_IO; 681 pg->state = SCSI_ACCESS_STATE_STANDBY; 682 pg->expiry = 0; 683 rcu_read_lock(); 684 list_for_each_entry_rcu(h, &pg->dh_list, node) { 685 BUG_ON(!h->sdev); 686 h->sdev->access_state = 687 (pg->state & SCSI_ACCESS_STATE_MASK); 688 if (pg->pref) 689 h->sdev->access_state |= 690 SCSI_ACCESS_STATE_PREFERRED; 691 } 692 rcu_read_unlock(); 693 } 694 break; 695 case SCSI_ACCESS_STATE_OFFLINE: 696 /* Path unusable */ 697 err = SCSI_DH_DEV_OFFLINED; 698 pg->expiry = 0; 699 break; 700 default: 701 /* Useable path if active */ 702 err = SCSI_DH_OK; 703 pg->expiry = 0; 704 break; 705 } 706 spin_unlock_irqrestore(&pg->lock, flags); 707 kfree(buff); 708 return err; 709 } 710 711 /* 712 * alua_stpg - Issue a SET TARGET PORT GROUP command 713 * 714 * Issue a SET TARGET PORT GROUP command and evaluate the 715 * response. Returns SCSI_DH_RETRY per default to trigger 716 * a re-evaluation of the target group state or SCSI_DH_OK 717 * if no further action needs to be taken. 718 */ 719 static unsigned alua_stpg(struct scsi_device *sdev, struct alua_port_group *pg) 720 { 721 int retval; 722 struct scsi_sense_hdr sense_hdr; 723 724 if (!(pg->tpgs & TPGS_MODE_EXPLICIT)) { 725 /* Only implicit ALUA supported, retry */ 726 return SCSI_DH_RETRY; 727 } 728 switch (pg->state) { 729 case SCSI_ACCESS_STATE_OPTIMAL: 730 return SCSI_DH_OK; 731 case SCSI_ACCESS_STATE_ACTIVE: 732 if ((pg->flags & ALUA_OPTIMIZE_STPG) && 733 !pg->pref && 734 (pg->tpgs & TPGS_MODE_IMPLICIT)) 735 return SCSI_DH_OK; 736 break; 737 case SCSI_ACCESS_STATE_STANDBY: 738 case SCSI_ACCESS_STATE_UNAVAILABLE: 739 break; 740 case SCSI_ACCESS_STATE_OFFLINE: 741 return SCSI_DH_IO; 742 case SCSI_ACCESS_STATE_TRANSITIONING: 743 break; 744 default: 745 sdev_printk(KERN_INFO, sdev, 746 "%s: stpg failed, unhandled TPGS state %d", 747 ALUA_DH_NAME, pg->state); 748 return SCSI_DH_NOSYS; 749 } 750 retval = submit_stpg(sdev, pg->group_id, &sense_hdr); 751 752 if (retval) { 753 if (!scsi_sense_valid(&sense_hdr)) { 754 sdev_printk(KERN_INFO, sdev, 755 "%s: stpg failed, result %d", 756 ALUA_DH_NAME, retval); 757 if (driver_byte(retval) == DRIVER_ERROR) 758 return SCSI_DH_DEV_TEMP_BUSY; 759 } else { 760 sdev_printk(KERN_INFO, sdev, "%s: stpg failed\n", 761 ALUA_DH_NAME); 762 scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr); 763 } 764 } 765 /* Retry RTPG */ 766 return SCSI_DH_RETRY; 767 } 768 769 static void alua_rtpg_work(struct work_struct *work) 770 { 771 struct alua_port_group *pg = 772 container_of(work, struct alua_port_group, rtpg_work.work); 773 struct scsi_device *sdev; 774 LIST_HEAD(qdata_list); 775 int err = SCSI_DH_OK; 776 struct alua_queue_data *qdata, *tmp; 777 unsigned long flags; 778 struct workqueue_struct *alua_wq = kaluad_wq; 779 780 spin_lock_irqsave(&pg->lock, flags); 781 sdev = pg->rtpg_sdev; 782 if (!sdev) { 783 WARN_ON(pg->flags & ALUA_PG_RUN_RTPG); 784 WARN_ON(pg->flags & ALUA_PG_RUN_STPG); 785 spin_unlock_irqrestore(&pg->lock, flags); 786 return; 787 } 788 if (pg->flags & ALUA_SYNC_STPG) 789 alua_wq = kaluad_sync_wq; 790 pg->flags |= ALUA_PG_RUNNING; 791 if (pg->flags & ALUA_PG_RUN_RTPG) { 792 int state = pg->state; 793 794 pg->flags &= ~ALUA_PG_RUN_RTPG; 795 spin_unlock_irqrestore(&pg->lock, flags); 796 if (state == SCSI_ACCESS_STATE_TRANSITIONING) { 797 if (alua_tur(sdev) == SCSI_DH_RETRY) { 798 spin_lock_irqsave(&pg->lock, flags); 799 pg->flags &= ~ALUA_PG_RUNNING; 800 pg->flags |= ALUA_PG_RUN_RTPG; 801 spin_unlock_irqrestore(&pg->lock, flags); 802 queue_delayed_work(alua_wq, &pg->rtpg_work, 803 pg->interval * HZ); 804 return; 805 } 806 /* Send RTPG on failure or if TUR indicates SUCCESS */ 807 } 808 err = alua_rtpg(sdev, pg); 809 spin_lock_irqsave(&pg->lock, flags); 810 if (err == SCSI_DH_RETRY || pg->flags & ALUA_PG_RUN_RTPG) { 811 pg->flags &= ~ALUA_PG_RUNNING; 812 pg->flags |= ALUA_PG_RUN_RTPG; 813 spin_unlock_irqrestore(&pg->lock, flags); 814 queue_delayed_work(alua_wq, &pg->rtpg_work, 815 pg->interval * HZ); 816 return; 817 } 818 if (err != SCSI_DH_OK) 819 pg->flags &= ~ALUA_PG_RUN_STPG; 820 } 821 if (pg->flags & ALUA_PG_RUN_STPG) { 822 pg->flags &= ~ALUA_PG_RUN_STPG; 823 spin_unlock_irqrestore(&pg->lock, flags); 824 err = alua_stpg(sdev, pg); 825 spin_lock_irqsave(&pg->lock, flags); 826 if (err == SCSI_DH_RETRY || pg->flags & ALUA_PG_RUN_RTPG) { 827 pg->flags |= ALUA_PG_RUN_RTPG; 828 pg->interval = 0; 829 pg->flags &= ~ALUA_PG_RUNNING; 830 spin_unlock_irqrestore(&pg->lock, flags); 831 queue_delayed_work(alua_wq, &pg->rtpg_work, 832 pg->interval * HZ); 833 return; 834 } 835 } 836 837 list_splice_init(&pg->rtpg_list, &qdata_list); 838 pg->rtpg_sdev = NULL; 839 spin_unlock_irqrestore(&pg->lock, flags); 840 841 list_for_each_entry_safe(qdata, tmp, &qdata_list, entry) { 842 list_del(&qdata->entry); 843 if (qdata->callback_fn) 844 qdata->callback_fn(qdata->callback_data, err); 845 kfree(qdata); 846 } 847 spin_lock_irqsave(&pg->lock, flags); 848 pg->flags &= ~ALUA_PG_RUNNING; 849 spin_unlock_irqrestore(&pg->lock, flags); 850 scsi_device_put(sdev); 851 kref_put(&pg->kref, release_port_group); 852 } 853 854 static void alua_rtpg_queue(struct alua_port_group *pg, 855 struct scsi_device *sdev, 856 struct alua_queue_data *qdata, bool force) 857 { 858 int start_queue = 0; 859 unsigned long flags; 860 struct workqueue_struct *alua_wq = kaluad_wq; 861 862 if (!pg) 863 return; 864 865 spin_lock_irqsave(&pg->lock, flags); 866 if (qdata) { 867 list_add_tail(&qdata->entry, &pg->rtpg_list); 868 pg->flags |= ALUA_PG_RUN_STPG; 869 force = true; 870 } 871 if (pg->rtpg_sdev == NULL) { 872 pg->interval = 0; 873 pg->flags |= ALUA_PG_RUN_RTPG; 874 kref_get(&pg->kref); 875 pg->rtpg_sdev = sdev; 876 scsi_device_get(sdev); 877 start_queue = 1; 878 } else if (!(pg->flags & ALUA_PG_RUN_RTPG) && force) { 879 pg->flags |= ALUA_PG_RUN_RTPG; 880 /* Do not queue if the worker is already running */ 881 if (!(pg->flags & ALUA_PG_RUNNING)) { 882 kref_get(&pg->kref); 883 start_queue = 1; 884 } 885 } 886 887 if (pg->flags & ALUA_SYNC_STPG) 888 alua_wq = kaluad_sync_wq; 889 spin_unlock_irqrestore(&pg->lock, flags); 890 891 if (start_queue && 892 !queue_delayed_work(alua_wq, &pg->rtpg_work, 893 msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS))) { 894 scsi_device_put(sdev); 895 kref_put(&pg->kref, release_port_group); 896 } 897 } 898 899 /* 900 * alua_initialize - Initialize ALUA state 901 * @sdev: the device to be initialized 902 * 903 * For the prep_fn to work correctly we have 904 * to initialize the ALUA state for the device. 905 */ 906 static int alua_initialize(struct scsi_device *sdev, struct alua_dh_data *h) 907 { 908 int err = SCSI_DH_DEV_UNSUPP, tpgs; 909 910 mutex_lock(&h->init_mutex); 911 tpgs = alua_check_tpgs(sdev); 912 if (tpgs != TPGS_MODE_NONE) 913 err = alua_check_vpd(sdev, h, tpgs); 914 h->init_error = err; 915 mutex_unlock(&h->init_mutex); 916 return err; 917 } 918 /* 919 * alua_set_params - set/unset the optimize flag 920 * @sdev: device on the path to be activated 921 * params - parameters in the following format 922 * "no_of_params\0param1\0param2\0param3\0...\0" 923 * For example, to set the flag pass the following parameters 924 * from multipath.conf 925 * hardware_handler "2 alua 1" 926 */ 927 static int alua_set_params(struct scsi_device *sdev, const char *params) 928 { 929 struct alua_dh_data *h = sdev->handler_data; 930 struct alua_port_group __rcu *pg = NULL; 931 unsigned int optimize = 0, argc; 932 const char *p = params; 933 int result = SCSI_DH_OK; 934 unsigned long flags; 935 936 if ((sscanf(params, "%u", &argc) != 1) || (argc != 1)) 937 return -EINVAL; 938 939 while (*p++) 940 ; 941 if ((sscanf(p, "%u", &optimize) != 1) || (optimize > 1)) 942 return -EINVAL; 943 944 rcu_read_lock(); 945 pg = rcu_dereference(h->pg); 946 if (!pg) { 947 rcu_read_unlock(); 948 return -ENXIO; 949 } 950 spin_lock_irqsave(&pg->lock, flags); 951 if (optimize) 952 pg->flags |= ALUA_OPTIMIZE_STPG; 953 else 954 pg->flags &= ~ALUA_OPTIMIZE_STPG; 955 spin_unlock_irqrestore(&pg->lock, flags); 956 rcu_read_unlock(); 957 958 return result; 959 } 960 961 /* 962 * alua_activate - activate a path 963 * @sdev: device on the path to be activated 964 * 965 * We're currently switching the port group to be activated only and 966 * let the array figure out the rest. 967 * There may be other arrays which require us to switch all port groups 968 * based on a certain policy. But until we actually encounter them it 969 * should be okay. 970 */ 971 static int alua_activate(struct scsi_device *sdev, 972 activate_complete fn, void *data) 973 { 974 struct alua_dh_data *h = sdev->handler_data; 975 int err = SCSI_DH_OK; 976 struct alua_queue_data *qdata; 977 struct alua_port_group __rcu *pg; 978 979 qdata = kzalloc(sizeof(*qdata), GFP_KERNEL); 980 if (!qdata) { 981 err = SCSI_DH_RES_TEMP_UNAVAIL; 982 goto out; 983 } 984 qdata->callback_fn = fn; 985 qdata->callback_data = data; 986 987 mutex_lock(&h->init_mutex); 988 rcu_read_lock(); 989 pg = rcu_dereference(h->pg); 990 if (!pg || !kref_get_unless_zero(&pg->kref)) { 991 rcu_read_unlock(); 992 kfree(qdata); 993 err = h->init_error; 994 mutex_unlock(&h->init_mutex); 995 goto out; 996 } 997 fn = NULL; 998 rcu_read_unlock(); 999 mutex_unlock(&h->init_mutex); 1000 1001 alua_rtpg_queue(pg, sdev, qdata, true); 1002 kref_put(&pg->kref, release_port_group); 1003 out: 1004 if (fn) 1005 fn(data, err); 1006 return 0; 1007 } 1008 1009 /* 1010 * alua_check - check path status 1011 * @sdev: device on the path to be checked 1012 * 1013 * Check the device status 1014 */ 1015 static void alua_check(struct scsi_device *sdev, bool force) 1016 { 1017 struct alua_dh_data *h = sdev->handler_data; 1018 struct alua_port_group *pg; 1019 1020 rcu_read_lock(); 1021 pg = rcu_dereference(h->pg); 1022 if (!pg || !kref_get_unless_zero(&pg->kref)) { 1023 rcu_read_unlock(); 1024 return; 1025 } 1026 rcu_read_unlock(); 1027 1028 alua_rtpg_queue(pg, sdev, NULL, force); 1029 kref_put(&pg->kref, release_port_group); 1030 } 1031 1032 /* 1033 * alua_prep_fn - request callback 1034 * 1035 * Fail I/O to all paths not in state 1036 * active/optimized or active/non-optimized. 1037 */ 1038 static int alua_prep_fn(struct scsi_device *sdev, struct request *req) 1039 { 1040 struct alua_dh_data *h = sdev->handler_data; 1041 struct alua_port_group __rcu *pg; 1042 unsigned char state = SCSI_ACCESS_STATE_OPTIMAL; 1043 int ret = BLKPREP_OK; 1044 1045 rcu_read_lock(); 1046 pg = rcu_dereference(h->pg); 1047 if (pg) 1048 state = pg->state; 1049 rcu_read_unlock(); 1050 if (state == SCSI_ACCESS_STATE_TRANSITIONING) 1051 ret = BLKPREP_DEFER; 1052 else if (state != SCSI_ACCESS_STATE_OPTIMAL && 1053 state != SCSI_ACCESS_STATE_ACTIVE && 1054 state != SCSI_ACCESS_STATE_LBA) { 1055 ret = BLKPREP_KILL; 1056 req->cmd_flags |= REQ_QUIET; 1057 } 1058 return ret; 1059 1060 } 1061 1062 static void alua_rescan(struct scsi_device *sdev) 1063 { 1064 struct alua_dh_data *h = sdev->handler_data; 1065 1066 alua_initialize(sdev, h); 1067 } 1068 1069 /* 1070 * alua_bus_attach - Attach device handler 1071 * @sdev: device to be attached to 1072 */ 1073 static int alua_bus_attach(struct scsi_device *sdev) 1074 { 1075 struct alua_dh_data *h; 1076 int err, ret = -EINVAL; 1077 1078 h = kzalloc(sizeof(*h) , GFP_KERNEL); 1079 if (!h) 1080 return -ENOMEM; 1081 spin_lock_init(&h->pg_lock); 1082 rcu_assign_pointer(h->pg, NULL); 1083 h->init_error = SCSI_DH_OK; 1084 h->sdev = sdev; 1085 INIT_LIST_HEAD(&h->node); 1086 1087 mutex_init(&h->init_mutex); 1088 err = alua_initialize(sdev, h); 1089 if (err == SCSI_DH_NOMEM) 1090 ret = -ENOMEM; 1091 if (err != SCSI_DH_OK && err != SCSI_DH_DEV_OFFLINED) 1092 goto failed; 1093 1094 sdev->handler_data = h; 1095 return 0; 1096 failed: 1097 kfree(h); 1098 return ret; 1099 } 1100 1101 /* 1102 * alua_bus_detach - Detach device handler 1103 * @sdev: device to be detached from 1104 */ 1105 static void alua_bus_detach(struct scsi_device *sdev) 1106 { 1107 struct alua_dh_data *h = sdev->handler_data; 1108 struct alua_port_group *pg; 1109 1110 spin_lock(&h->pg_lock); 1111 pg = h->pg; 1112 rcu_assign_pointer(h->pg, NULL); 1113 h->sdev = NULL; 1114 spin_unlock(&h->pg_lock); 1115 if (pg) { 1116 spin_lock_irq(&pg->lock); 1117 list_del_rcu(&h->node); 1118 spin_unlock_irq(&pg->lock); 1119 kref_put(&pg->kref, release_port_group); 1120 } 1121 sdev->handler_data = NULL; 1122 kfree(h); 1123 } 1124 1125 static struct scsi_device_handler alua_dh = { 1126 .name = ALUA_DH_NAME, 1127 .module = THIS_MODULE, 1128 .attach = alua_bus_attach, 1129 .detach = alua_bus_detach, 1130 .prep_fn = alua_prep_fn, 1131 .check_sense = alua_check_sense, 1132 .activate = alua_activate, 1133 .rescan = alua_rescan, 1134 .set_params = alua_set_params, 1135 }; 1136 1137 static int __init alua_init(void) 1138 { 1139 int r; 1140 1141 kaluad_wq = alloc_workqueue("kaluad", WQ_MEM_RECLAIM, 0); 1142 if (!kaluad_wq) { 1143 /* Temporary failure, bypass */ 1144 return SCSI_DH_DEV_TEMP_BUSY; 1145 } 1146 kaluad_sync_wq = create_workqueue("kaluad_sync"); 1147 if (!kaluad_sync_wq) { 1148 destroy_workqueue(kaluad_wq); 1149 return SCSI_DH_DEV_TEMP_BUSY; 1150 } 1151 r = scsi_register_device_handler(&alua_dh); 1152 if (r != 0) { 1153 printk(KERN_ERR "%s: Failed to register scsi device handler", 1154 ALUA_DH_NAME); 1155 destroy_workqueue(kaluad_sync_wq); 1156 destroy_workqueue(kaluad_wq); 1157 } 1158 return r; 1159 } 1160 1161 static void __exit alua_exit(void) 1162 { 1163 scsi_unregister_device_handler(&alua_dh); 1164 destroy_workqueue(kaluad_sync_wq); 1165 destroy_workqueue(kaluad_wq); 1166 } 1167 1168 module_init(alua_init); 1169 module_exit(alua_exit); 1170 1171 MODULE_DESCRIPTION("DM Multipath ALUA support"); 1172 MODULE_AUTHOR("Hannes Reinecke <hare@suse.de>"); 1173 MODULE_LICENSE("GPL"); 1174 MODULE_VERSION(ALUA_DH_VER); 1175