1 /* 2 * Generic SCSI-3 ALUA SCSI Device Handler 3 * 4 * Copyright (C) 2007-2010 Hannes Reinecke, SUSE Linux Products GmbH. 5 * All rights reserved. 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 20 * 21 */ 22 #include <linux/slab.h> 23 #include <linux/delay.h> 24 #include <linux/module.h> 25 #include <asm/unaligned.h> 26 #include <scsi/scsi.h> 27 #include <scsi/scsi_proto.h> 28 #include <scsi/scsi_dbg.h> 29 #include <scsi/scsi_eh.h> 30 #include <scsi/scsi_dh.h> 31 32 #define ALUA_DH_NAME "alua" 33 #define ALUA_DH_VER "2.0" 34 35 #define TPGS_SUPPORT_NONE 0x00 36 #define TPGS_SUPPORT_OPTIMIZED 0x01 37 #define TPGS_SUPPORT_NONOPTIMIZED 0x02 38 #define TPGS_SUPPORT_STANDBY 0x04 39 #define TPGS_SUPPORT_UNAVAILABLE 0x08 40 #define TPGS_SUPPORT_LBA_DEPENDENT 0x10 41 #define TPGS_SUPPORT_OFFLINE 0x40 42 #define TPGS_SUPPORT_TRANSITION 0x80 43 #define TPGS_SUPPORT_ALL 0xdf 44 45 #define RTPG_FMT_MASK 0x70 46 #define RTPG_FMT_EXT_HDR 0x10 47 48 #define TPGS_MODE_UNINITIALIZED -1 49 #define TPGS_MODE_NONE 0x0 50 #define TPGS_MODE_IMPLICIT 0x1 51 #define TPGS_MODE_EXPLICIT 0x2 52 53 #define ALUA_RTPG_SIZE 128 54 #define ALUA_FAILOVER_TIMEOUT 60 55 #define ALUA_FAILOVER_RETRIES 5 56 #define ALUA_RTPG_DELAY_MSECS 5 57 58 /* device handler flags */ 59 #define ALUA_OPTIMIZE_STPG 0x01 60 #define ALUA_RTPG_EXT_HDR_UNSUPP 0x02 61 /* State machine flags */ 62 #define ALUA_PG_RUN_RTPG 0x10 63 #define ALUA_PG_RUN_STPG 0x20 64 #define ALUA_PG_RUNNING 0x40 65 66 static uint optimize_stpg; 67 module_param(optimize_stpg, uint, S_IRUGO|S_IWUSR); 68 MODULE_PARM_DESC(optimize_stpg, "Allow use of a non-optimized path, rather than sending a STPG, when implicit TPGS is supported (0=No,1=Yes). Default is 0."); 69 70 static LIST_HEAD(port_group_list); 71 static DEFINE_SPINLOCK(port_group_lock); 72 static struct workqueue_struct *kaluad_wq; 73 74 struct alua_port_group { 75 struct kref kref; 76 struct rcu_head rcu; 77 struct list_head node; 78 struct list_head dh_list; 79 unsigned char device_id_str[256]; 80 int device_id_len; 81 int group_id; 82 int tpgs; 83 int state; 84 int pref; 85 int valid_states; 86 unsigned flags; /* used for optimizing STPG */ 87 unsigned char transition_tmo; 88 unsigned long expiry; 89 unsigned long interval; 90 struct delayed_work rtpg_work; 91 spinlock_t lock; 92 struct list_head rtpg_list; 93 struct scsi_device *rtpg_sdev; 94 }; 95 96 struct alua_dh_data { 97 struct list_head node; 98 struct alua_port_group __rcu *pg; 99 int group_id; 100 spinlock_t pg_lock; 101 struct scsi_device *sdev; 102 int init_error; 103 struct mutex init_mutex; 104 }; 105 106 struct alua_queue_data { 107 struct list_head entry; 108 activate_complete callback_fn; 109 void *callback_data; 110 }; 111 112 #define ALUA_POLICY_SWITCH_CURRENT 0 113 #define ALUA_POLICY_SWITCH_ALL 1 114 115 static void alua_rtpg_work(struct work_struct *work); 116 static bool alua_rtpg_queue(struct alua_port_group *pg, 117 struct scsi_device *sdev, 118 struct alua_queue_data *qdata, bool force); 119 static void alua_check(struct scsi_device *sdev, bool force); 120 121 static void release_port_group(struct kref *kref) 122 { 123 struct alua_port_group *pg; 124 125 pg = container_of(kref, struct alua_port_group, kref); 126 if (pg->rtpg_sdev) 127 flush_delayed_work(&pg->rtpg_work); 128 spin_lock(&port_group_lock); 129 list_del(&pg->node); 130 spin_unlock(&port_group_lock); 131 kfree_rcu(pg, rcu); 132 } 133 134 /* 135 * submit_rtpg - Issue a REPORT TARGET GROUP STATES command 136 * @sdev: sdev the command should be sent to 137 */ 138 static int submit_rtpg(struct scsi_device *sdev, unsigned char *buff, 139 int bufflen, struct scsi_sense_hdr *sshdr, int flags) 140 { 141 u8 cdb[MAX_COMMAND_SIZE]; 142 int req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | 143 REQ_FAILFAST_DRIVER; 144 145 /* Prepare the command. */ 146 memset(cdb, 0x0, MAX_COMMAND_SIZE); 147 cdb[0] = MAINTENANCE_IN; 148 if (!(flags & ALUA_RTPG_EXT_HDR_UNSUPP)) 149 cdb[1] = MI_REPORT_TARGET_PGS | MI_EXT_HDR_PARAM_FMT; 150 else 151 cdb[1] = MI_REPORT_TARGET_PGS; 152 put_unaligned_be32(bufflen, &cdb[6]); 153 154 return scsi_execute(sdev, cdb, DMA_FROM_DEVICE, buff, bufflen, NULL, 155 sshdr, ALUA_FAILOVER_TIMEOUT * HZ, 156 ALUA_FAILOVER_RETRIES, req_flags, 0, NULL); 157 } 158 159 /* 160 * submit_stpg - Issue a SET TARGET PORT GROUP command 161 * 162 * Currently we're only setting the current target port group state 163 * to 'active/optimized' and let the array firmware figure out 164 * the states of the remaining groups. 165 */ 166 static int submit_stpg(struct scsi_device *sdev, int group_id, 167 struct scsi_sense_hdr *sshdr) 168 { 169 u8 cdb[MAX_COMMAND_SIZE]; 170 unsigned char stpg_data[8]; 171 int stpg_len = 8; 172 int req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | 173 REQ_FAILFAST_DRIVER; 174 175 /* Prepare the data buffer */ 176 memset(stpg_data, 0, stpg_len); 177 stpg_data[4] = SCSI_ACCESS_STATE_OPTIMAL; 178 put_unaligned_be16(group_id, &stpg_data[6]); 179 180 /* Prepare the command. */ 181 memset(cdb, 0x0, MAX_COMMAND_SIZE); 182 cdb[0] = MAINTENANCE_OUT; 183 cdb[1] = MO_SET_TARGET_PGS; 184 put_unaligned_be32(stpg_len, &cdb[6]); 185 186 return scsi_execute(sdev, cdb, DMA_TO_DEVICE, stpg_data, stpg_len, NULL, 187 sshdr, ALUA_FAILOVER_TIMEOUT * HZ, 188 ALUA_FAILOVER_RETRIES, req_flags, 0, NULL); 189 } 190 191 static struct alua_port_group *alua_find_get_pg(char *id_str, size_t id_size, 192 int group_id) 193 { 194 struct alua_port_group *pg; 195 196 if (!id_str || !id_size || !strlen(id_str)) 197 return NULL; 198 199 list_for_each_entry(pg, &port_group_list, node) { 200 if (pg->group_id != group_id) 201 continue; 202 if (!pg->device_id_len || pg->device_id_len != id_size) 203 continue; 204 if (strncmp(pg->device_id_str, id_str, id_size)) 205 continue; 206 if (!kref_get_unless_zero(&pg->kref)) 207 continue; 208 return pg; 209 } 210 211 return NULL; 212 } 213 214 /* 215 * alua_alloc_pg - Allocate a new port_group structure 216 * @sdev: scsi device 217 * @group_id: port group id 218 * @tpgs: target port group settings 219 * 220 * Allocate a new port_group structure for a given 221 * device. 222 */ 223 static struct alua_port_group *alua_alloc_pg(struct scsi_device *sdev, 224 int group_id, int tpgs) 225 { 226 struct alua_port_group *pg, *tmp_pg; 227 228 pg = kzalloc(sizeof(struct alua_port_group), GFP_KERNEL); 229 if (!pg) 230 return ERR_PTR(-ENOMEM); 231 232 pg->device_id_len = scsi_vpd_lun_id(sdev, pg->device_id_str, 233 sizeof(pg->device_id_str)); 234 if (pg->device_id_len <= 0) { 235 /* 236 * TPGS supported but no device identification found. 237 * Generate private device identification. 238 */ 239 sdev_printk(KERN_INFO, sdev, 240 "%s: No device descriptors found\n", 241 ALUA_DH_NAME); 242 pg->device_id_str[0] = '\0'; 243 pg->device_id_len = 0; 244 } 245 pg->group_id = group_id; 246 pg->tpgs = tpgs; 247 pg->state = SCSI_ACCESS_STATE_OPTIMAL; 248 pg->valid_states = TPGS_SUPPORT_ALL; 249 if (optimize_stpg) 250 pg->flags |= ALUA_OPTIMIZE_STPG; 251 kref_init(&pg->kref); 252 INIT_DELAYED_WORK(&pg->rtpg_work, alua_rtpg_work); 253 INIT_LIST_HEAD(&pg->rtpg_list); 254 INIT_LIST_HEAD(&pg->node); 255 INIT_LIST_HEAD(&pg->dh_list); 256 spin_lock_init(&pg->lock); 257 258 spin_lock(&port_group_lock); 259 tmp_pg = alua_find_get_pg(pg->device_id_str, pg->device_id_len, 260 group_id); 261 if (tmp_pg) { 262 spin_unlock(&port_group_lock); 263 kfree(pg); 264 return tmp_pg; 265 } 266 267 list_add(&pg->node, &port_group_list); 268 spin_unlock(&port_group_lock); 269 270 return pg; 271 } 272 273 /* 274 * alua_check_tpgs - Evaluate TPGS setting 275 * @sdev: device to be checked 276 * 277 * Examine the TPGS setting of the sdev to find out if ALUA 278 * is supported. 279 */ 280 static int alua_check_tpgs(struct scsi_device *sdev) 281 { 282 int tpgs = TPGS_MODE_NONE; 283 284 /* 285 * ALUA support for non-disk devices is fraught with 286 * difficulties, so disable it for now. 287 */ 288 if (sdev->type != TYPE_DISK) { 289 sdev_printk(KERN_INFO, sdev, 290 "%s: disable for non-disk devices\n", 291 ALUA_DH_NAME); 292 return tpgs; 293 } 294 295 tpgs = scsi_device_tpgs(sdev); 296 switch (tpgs) { 297 case TPGS_MODE_EXPLICIT|TPGS_MODE_IMPLICIT: 298 sdev_printk(KERN_INFO, sdev, 299 "%s: supports implicit and explicit TPGS\n", 300 ALUA_DH_NAME); 301 break; 302 case TPGS_MODE_EXPLICIT: 303 sdev_printk(KERN_INFO, sdev, "%s: supports explicit TPGS\n", 304 ALUA_DH_NAME); 305 break; 306 case TPGS_MODE_IMPLICIT: 307 sdev_printk(KERN_INFO, sdev, "%s: supports implicit TPGS\n", 308 ALUA_DH_NAME); 309 break; 310 case TPGS_MODE_NONE: 311 sdev_printk(KERN_INFO, sdev, "%s: not supported\n", 312 ALUA_DH_NAME); 313 break; 314 default: 315 sdev_printk(KERN_INFO, sdev, 316 "%s: unsupported TPGS setting %d\n", 317 ALUA_DH_NAME, tpgs); 318 tpgs = TPGS_MODE_NONE; 319 break; 320 } 321 322 return tpgs; 323 } 324 325 /* 326 * alua_check_vpd - Evaluate INQUIRY vpd page 0x83 327 * @sdev: device to be checked 328 * 329 * Extract the relative target port and the target port group 330 * descriptor from the list of identificators. 331 */ 332 static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h, 333 int tpgs) 334 { 335 int rel_port = -1, group_id; 336 struct alua_port_group *pg, *old_pg = NULL; 337 bool pg_updated = false; 338 unsigned long flags; 339 340 group_id = scsi_vpd_tpg_id(sdev, &rel_port); 341 if (group_id < 0) { 342 /* 343 * Internal error; TPGS supported but required 344 * VPD identification descriptors not present. 345 * Disable ALUA support 346 */ 347 sdev_printk(KERN_INFO, sdev, 348 "%s: No target port descriptors found\n", 349 ALUA_DH_NAME); 350 return SCSI_DH_DEV_UNSUPP; 351 } 352 353 pg = alua_alloc_pg(sdev, group_id, tpgs); 354 if (IS_ERR(pg)) { 355 if (PTR_ERR(pg) == -ENOMEM) 356 return SCSI_DH_NOMEM; 357 return SCSI_DH_DEV_UNSUPP; 358 } 359 if (pg->device_id_len) 360 sdev_printk(KERN_INFO, sdev, 361 "%s: device %s port group %x rel port %x\n", 362 ALUA_DH_NAME, pg->device_id_str, 363 group_id, rel_port); 364 else 365 sdev_printk(KERN_INFO, sdev, 366 "%s: port group %x rel port %x\n", 367 ALUA_DH_NAME, group_id, rel_port); 368 369 /* Check for existing port group references */ 370 spin_lock(&h->pg_lock); 371 old_pg = rcu_dereference_protected(h->pg, lockdep_is_held(&h->pg_lock)); 372 if (old_pg != pg) { 373 /* port group has changed. Update to new port group */ 374 if (h->pg) { 375 spin_lock_irqsave(&old_pg->lock, flags); 376 list_del_rcu(&h->node); 377 spin_unlock_irqrestore(&old_pg->lock, flags); 378 } 379 rcu_assign_pointer(h->pg, pg); 380 pg_updated = true; 381 } 382 383 spin_lock_irqsave(&pg->lock, flags); 384 if (pg_updated) 385 list_add_rcu(&h->node, &pg->dh_list); 386 spin_unlock_irqrestore(&pg->lock, flags); 387 388 alua_rtpg_queue(rcu_dereference_protected(h->pg, 389 lockdep_is_held(&h->pg_lock)), 390 sdev, NULL, true); 391 spin_unlock(&h->pg_lock); 392 393 if (old_pg) 394 kref_put(&old_pg->kref, release_port_group); 395 396 return SCSI_DH_OK; 397 } 398 399 static char print_alua_state(unsigned char state) 400 { 401 switch (state) { 402 case SCSI_ACCESS_STATE_OPTIMAL: 403 return 'A'; 404 case SCSI_ACCESS_STATE_ACTIVE: 405 return 'N'; 406 case SCSI_ACCESS_STATE_STANDBY: 407 return 'S'; 408 case SCSI_ACCESS_STATE_UNAVAILABLE: 409 return 'U'; 410 case SCSI_ACCESS_STATE_LBA: 411 return 'L'; 412 case SCSI_ACCESS_STATE_OFFLINE: 413 return 'O'; 414 case SCSI_ACCESS_STATE_TRANSITIONING: 415 return 'T'; 416 default: 417 return 'X'; 418 } 419 } 420 421 static int alua_check_sense(struct scsi_device *sdev, 422 struct scsi_sense_hdr *sense_hdr) 423 { 424 switch (sense_hdr->sense_key) { 425 case NOT_READY: 426 if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a) { 427 /* 428 * LUN Not Accessible - ALUA state transition 429 */ 430 alua_check(sdev, false); 431 return NEEDS_RETRY; 432 } 433 break; 434 case UNIT_ATTENTION: 435 if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00) { 436 /* 437 * Power On, Reset, or Bus Device Reset. 438 * Might have obscured a state transition, 439 * so schedule a recheck. 440 */ 441 alua_check(sdev, true); 442 return ADD_TO_MLQUEUE; 443 } 444 if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x04) 445 /* 446 * Device internal reset 447 */ 448 return ADD_TO_MLQUEUE; 449 if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x01) 450 /* 451 * Mode Parameters Changed 452 */ 453 return ADD_TO_MLQUEUE; 454 if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x06) { 455 /* 456 * ALUA state changed 457 */ 458 alua_check(sdev, true); 459 return ADD_TO_MLQUEUE; 460 } 461 if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x07) { 462 /* 463 * Implicit ALUA state transition failed 464 */ 465 alua_check(sdev, true); 466 return ADD_TO_MLQUEUE; 467 } 468 if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x03) 469 /* 470 * Inquiry data has changed 471 */ 472 return ADD_TO_MLQUEUE; 473 if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x0e) 474 /* 475 * REPORTED_LUNS_DATA_HAS_CHANGED is reported 476 * when switching controllers on targets like 477 * Intel Multi-Flex. We can just retry. 478 */ 479 return ADD_TO_MLQUEUE; 480 break; 481 } 482 483 return SCSI_RETURN_NOT_HANDLED; 484 } 485 486 /* 487 * alua_tur - Send a TEST UNIT READY 488 * @sdev: device to which the TEST UNIT READY command should be send 489 * 490 * Send a TEST UNIT READY to @sdev to figure out the device state 491 * Returns SCSI_DH_RETRY if the sense code is NOT READY/ALUA TRANSITIONING, 492 * SCSI_DH_OK if no error occurred, and SCSI_DH_IO otherwise. 493 */ 494 static int alua_tur(struct scsi_device *sdev) 495 { 496 struct scsi_sense_hdr sense_hdr; 497 int retval; 498 499 retval = scsi_test_unit_ready(sdev, ALUA_FAILOVER_TIMEOUT * HZ, 500 ALUA_FAILOVER_RETRIES, &sense_hdr); 501 if (sense_hdr.sense_key == NOT_READY && 502 sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a) 503 return SCSI_DH_RETRY; 504 else if (retval) 505 return SCSI_DH_IO; 506 else 507 return SCSI_DH_OK; 508 } 509 510 /* 511 * alua_rtpg - Evaluate REPORT TARGET GROUP STATES 512 * @sdev: the device to be evaluated. 513 * 514 * Evaluate the Target Port Group State. 515 * Returns SCSI_DH_DEV_OFFLINED if the path is 516 * found to be unusable. 517 */ 518 static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg) 519 { 520 struct scsi_sense_hdr sense_hdr; 521 struct alua_port_group *tmp_pg; 522 int len, k, off, bufflen = ALUA_RTPG_SIZE; 523 unsigned char *desc, *buff; 524 unsigned err, retval; 525 unsigned int tpg_desc_tbl_off; 526 unsigned char orig_transition_tmo; 527 unsigned long flags; 528 529 if (!pg->expiry) { 530 unsigned long transition_tmo = ALUA_FAILOVER_TIMEOUT * HZ; 531 532 if (pg->transition_tmo) 533 transition_tmo = pg->transition_tmo * HZ; 534 535 pg->expiry = round_jiffies_up(jiffies + transition_tmo); 536 } 537 538 buff = kzalloc(bufflen, GFP_KERNEL); 539 if (!buff) 540 return SCSI_DH_DEV_TEMP_BUSY; 541 542 retry: 543 err = 0; 544 retval = submit_rtpg(sdev, buff, bufflen, &sense_hdr, pg->flags); 545 546 if (retval) { 547 /* 548 * Some (broken) implementations have a habit of returning 549 * an error during things like firmware update etc. 550 * But if the target only supports active/optimized there's 551 * not much we can do; it's not that we can switch paths 552 * or anything. 553 * So ignore any errors to avoid spurious failures during 554 * path failover. 555 */ 556 if ((pg->valid_states & ~TPGS_SUPPORT_OPTIMIZED) == 0) { 557 sdev_printk(KERN_INFO, sdev, 558 "%s: ignoring rtpg result %d\n", 559 ALUA_DH_NAME, retval); 560 kfree(buff); 561 return SCSI_DH_OK; 562 } 563 if (!scsi_sense_valid(&sense_hdr)) { 564 sdev_printk(KERN_INFO, sdev, 565 "%s: rtpg failed, result %d\n", 566 ALUA_DH_NAME, retval); 567 kfree(buff); 568 if (driver_byte(retval) == DRIVER_ERROR) 569 return SCSI_DH_DEV_TEMP_BUSY; 570 return SCSI_DH_IO; 571 } 572 573 /* 574 * submit_rtpg() has failed on existing arrays 575 * when requesting extended header info, and 576 * the array doesn't support extended headers, 577 * even though it shouldn't according to T10. 578 * The retry without rtpg_ext_hdr_req set 579 * handles this. 580 */ 581 if (!(pg->flags & ALUA_RTPG_EXT_HDR_UNSUPP) && 582 sense_hdr.sense_key == ILLEGAL_REQUEST && 583 sense_hdr.asc == 0x24 && sense_hdr.ascq == 0) { 584 pg->flags |= ALUA_RTPG_EXT_HDR_UNSUPP; 585 goto retry; 586 } 587 /* 588 * Retry on ALUA state transition or if any 589 * UNIT ATTENTION occurred. 590 */ 591 if (sense_hdr.sense_key == NOT_READY && 592 sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a) 593 err = SCSI_DH_RETRY; 594 else if (sense_hdr.sense_key == UNIT_ATTENTION) 595 err = SCSI_DH_RETRY; 596 if (err == SCSI_DH_RETRY && 597 pg->expiry != 0 && time_before(jiffies, pg->expiry)) { 598 sdev_printk(KERN_ERR, sdev, "%s: rtpg retry\n", 599 ALUA_DH_NAME); 600 scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr); 601 kfree(buff); 602 return err; 603 } 604 sdev_printk(KERN_ERR, sdev, "%s: rtpg failed\n", 605 ALUA_DH_NAME); 606 scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr); 607 kfree(buff); 608 pg->expiry = 0; 609 return SCSI_DH_IO; 610 } 611 612 len = get_unaligned_be32(&buff[0]) + 4; 613 614 if (len > bufflen) { 615 /* Resubmit with the correct length */ 616 kfree(buff); 617 bufflen = len; 618 buff = kmalloc(bufflen, GFP_KERNEL); 619 if (!buff) { 620 sdev_printk(KERN_WARNING, sdev, 621 "%s: kmalloc buffer failed\n",__func__); 622 /* Temporary failure, bypass */ 623 pg->expiry = 0; 624 return SCSI_DH_DEV_TEMP_BUSY; 625 } 626 goto retry; 627 } 628 629 orig_transition_tmo = pg->transition_tmo; 630 if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR && buff[5] != 0) 631 pg->transition_tmo = buff[5]; 632 else 633 pg->transition_tmo = ALUA_FAILOVER_TIMEOUT; 634 635 if (orig_transition_tmo != pg->transition_tmo) { 636 sdev_printk(KERN_INFO, sdev, 637 "%s: transition timeout set to %d seconds\n", 638 ALUA_DH_NAME, pg->transition_tmo); 639 pg->expiry = jiffies + pg->transition_tmo * HZ; 640 } 641 642 if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR) 643 tpg_desc_tbl_off = 8; 644 else 645 tpg_desc_tbl_off = 4; 646 647 for (k = tpg_desc_tbl_off, desc = buff + tpg_desc_tbl_off; 648 k < len; 649 k += off, desc += off) { 650 u16 group_id = get_unaligned_be16(&desc[2]); 651 652 spin_lock_irqsave(&port_group_lock, flags); 653 tmp_pg = alua_find_get_pg(pg->device_id_str, pg->device_id_len, 654 group_id); 655 spin_unlock_irqrestore(&port_group_lock, flags); 656 if (tmp_pg) { 657 if (spin_trylock_irqsave(&tmp_pg->lock, flags)) { 658 if ((tmp_pg == pg) || 659 !(tmp_pg->flags & ALUA_PG_RUNNING)) { 660 struct alua_dh_data *h; 661 662 tmp_pg->state = desc[0] & 0x0f; 663 tmp_pg->pref = desc[0] >> 7; 664 rcu_read_lock(); 665 list_for_each_entry_rcu(h, 666 &tmp_pg->dh_list, node) { 667 /* h->sdev should always be valid */ 668 BUG_ON(!h->sdev); 669 h->sdev->access_state = desc[0]; 670 } 671 rcu_read_unlock(); 672 } 673 if (tmp_pg == pg) 674 tmp_pg->valid_states = desc[1]; 675 spin_unlock_irqrestore(&tmp_pg->lock, flags); 676 } 677 kref_put(&tmp_pg->kref, release_port_group); 678 } 679 off = 8 + (desc[7] * 4); 680 } 681 682 spin_lock_irqsave(&pg->lock, flags); 683 sdev_printk(KERN_INFO, sdev, 684 "%s: port group %02x state %c %s supports %c%c%c%c%c%c%c\n", 685 ALUA_DH_NAME, pg->group_id, print_alua_state(pg->state), 686 pg->pref ? "preferred" : "non-preferred", 687 pg->valid_states&TPGS_SUPPORT_TRANSITION?'T':'t', 688 pg->valid_states&TPGS_SUPPORT_OFFLINE?'O':'o', 689 pg->valid_states&TPGS_SUPPORT_LBA_DEPENDENT?'L':'l', 690 pg->valid_states&TPGS_SUPPORT_UNAVAILABLE?'U':'u', 691 pg->valid_states&TPGS_SUPPORT_STANDBY?'S':'s', 692 pg->valid_states&TPGS_SUPPORT_NONOPTIMIZED?'N':'n', 693 pg->valid_states&TPGS_SUPPORT_OPTIMIZED?'A':'a'); 694 695 switch (pg->state) { 696 case SCSI_ACCESS_STATE_TRANSITIONING: 697 if (time_before(jiffies, pg->expiry)) { 698 /* State transition, retry */ 699 pg->interval = 2; 700 err = SCSI_DH_RETRY; 701 } else { 702 struct alua_dh_data *h; 703 704 /* Transitioning time exceeded, set port to standby */ 705 err = SCSI_DH_IO; 706 pg->state = SCSI_ACCESS_STATE_STANDBY; 707 pg->expiry = 0; 708 rcu_read_lock(); 709 list_for_each_entry_rcu(h, &pg->dh_list, node) { 710 BUG_ON(!h->sdev); 711 h->sdev->access_state = 712 (pg->state & SCSI_ACCESS_STATE_MASK); 713 if (pg->pref) 714 h->sdev->access_state |= 715 SCSI_ACCESS_STATE_PREFERRED; 716 } 717 rcu_read_unlock(); 718 } 719 break; 720 case SCSI_ACCESS_STATE_OFFLINE: 721 /* Path unusable */ 722 err = SCSI_DH_DEV_OFFLINED; 723 pg->expiry = 0; 724 break; 725 default: 726 /* Useable path if active */ 727 err = SCSI_DH_OK; 728 pg->expiry = 0; 729 break; 730 } 731 spin_unlock_irqrestore(&pg->lock, flags); 732 kfree(buff); 733 return err; 734 } 735 736 /* 737 * alua_stpg - Issue a SET TARGET PORT GROUP command 738 * 739 * Issue a SET TARGET PORT GROUP command and evaluate the 740 * response. Returns SCSI_DH_RETRY per default to trigger 741 * a re-evaluation of the target group state or SCSI_DH_OK 742 * if no further action needs to be taken. 743 */ 744 static unsigned alua_stpg(struct scsi_device *sdev, struct alua_port_group *pg) 745 { 746 int retval; 747 struct scsi_sense_hdr sense_hdr; 748 749 if (!(pg->tpgs & TPGS_MODE_EXPLICIT)) { 750 /* Only implicit ALUA supported, retry */ 751 return SCSI_DH_RETRY; 752 } 753 switch (pg->state) { 754 case SCSI_ACCESS_STATE_OPTIMAL: 755 return SCSI_DH_OK; 756 case SCSI_ACCESS_STATE_ACTIVE: 757 if ((pg->flags & ALUA_OPTIMIZE_STPG) && 758 !pg->pref && 759 (pg->tpgs & TPGS_MODE_IMPLICIT)) 760 return SCSI_DH_OK; 761 break; 762 case SCSI_ACCESS_STATE_STANDBY: 763 case SCSI_ACCESS_STATE_UNAVAILABLE: 764 break; 765 case SCSI_ACCESS_STATE_OFFLINE: 766 return SCSI_DH_IO; 767 case SCSI_ACCESS_STATE_TRANSITIONING: 768 break; 769 default: 770 sdev_printk(KERN_INFO, sdev, 771 "%s: stpg failed, unhandled TPGS state %d", 772 ALUA_DH_NAME, pg->state); 773 return SCSI_DH_NOSYS; 774 } 775 retval = submit_stpg(sdev, pg->group_id, &sense_hdr); 776 777 if (retval) { 778 if (!scsi_sense_valid(&sense_hdr)) { 779 sdev_printk(KERN_INFO, sdev, 780 "%s: stpg failed, result %d", 781 ALUA_DH_NAME, retval); 782 if (driver_byte(retval) == DRIVER_ERROR) 783 return SCSI_DH_DEV_TEMP_BUSY; 784 } else { 785 sdev_printk(KERN_INFO, sdev, "%s: stpg failed\n", 786 ALUA_DH_NAME); 787 scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr); 788 } 789 } 790 /* Retry RTPG */ 791 return SCSI_DH_RETRY; 792 } 793 794 static void alua_rtpg_work(struct work_struct *work) 795 { 796 struct alua_port_group *pg = 797 container_of(work, struct alua_port_group, rtpg_work.work); 798 struct scsi_device *sdev; 799 LIST_HEAD(qdata_list); 800 int err = SCSI_DH_OK; 801 struct alua_queue_data *qdata, *tmp; 802 unsigned long flags; 803 804 spin_lock_irqsave(&pg->lock, flags); 805 sdev = pg->rtpg_sdev; 806 if (!sdev) { 807 WARN_ON(pg->flags & ALUA_PG_RUN_RTPG); 808 WARN_ON(pg->flags & ALUA_PG_RUN_STPG); 809 spin_unlock_irqrestore(&pg->lock, flags); 810 kref_put(&pg->kref, release_port_group); 811 return; 812 } 813 pg->flags |= ALUA_PG_RUNNING; 814 if (pg->flags & ALUA_PG_RUN_RTPG) { 815 int state = pg->state; 816 817 pg->flags &= ~ALUA_PG_RUN_RTPG; 818 spin_unlock_irqrestore(&pg->lock, flags); 819 if (state == SCSI_ACCESS_STATE_TRANSITIONING) { 820 if (alua_tur(sdev) == SCSI_DH_RETRY) { 821 spin_lock_irqsave(&pg->lock, flags); 822 pg->flags &= ~ALUA_PG_RUNNING; 823 pg->flags |= ALUA_PG_RUN_RTPG; 824 spin_unlock_irqrestore(&pg->lock, flags); 825 queue_delayed_work(kaluad_wq, &pg->rtpg_work, 826 pg->interval * HZ); 827 return; 828 } 829 /* Send RTPG on failure or if TUR indicates SUCCESS */ 830 } 831 err = alua_rtpg(sdev, pg); 832 spin_lock_irqsave(&pg->lock, flags); 833 if (err == SCSI_DH_RETRY || pg->flags & ALUA_PG_RUN_RTPG) { 834 pg->flags &= ~ALUA_PG_RUNNING; 835 pg->flags |= ALUA_PG_RUN_RTPG; 836 spin_unlock_irqrestore(&pg->lock, flags); 837 queue_delayed_work(kaluad_wq, &pg->rtpg_work, 838 pg->interval * HZ); 839 return; 840 } 841 if (err != SCSI_DH_OK) 842 pg->flags &= ~ALUA_PG_RUN_STPG; 843 } 844 if (pg->flags & ALUA_PG_RUN_STPG) { 845 pg->flags &= ~ALUA_PG_RUN_STPG; 846 spin_unlock_irqrestore(&pg->lock, flags); 847 err = alua_stpg(sdev, pg); 848 spin_lock_irqsave(&pg->lock, flags); 849 if (err == SCSI_DH_RETRY || pg->flags & ALUA_PG_RUN_RTPG) { 850 pg->flags |= ALUA_PG_RUN_RTPG; 851 pg->interval = 0; 852 pg->flags &= ~ALUA_PG_RUNNING; 853 spin_unlock_irqrestore(&pg->lock, flags); 854 queue_delayed_work(kaluad_wq, &pg->rtpg_work, 855 pg->interval * HZ); 856 return; 857 } 858 } 859 860 list_splice_init(&pg->rtpg_list, &qdata_list); 861 pg->rtpg_sdev = NULL; 862 spin_unlock_irqrestore(&pg->lock, flags); 863 864 list_for_each_entry_safe(qdata, tmp, &qdata_list, entry) { 865 list_del(&qdata->entry); 866 if (qdata->callback_fn) 867 qdata->callback_fn(qdata->callback_data, err); 868 kfree(qdata); 869 } 870 spin_lock_irqsave(&pg->lock, flags); 871 pg->flags &= ~ALUA_PG_RUNNING; 872 spin_unlock_irqrestore(&pg->lock, flags); 873 scsi_device_put(sdev); 874 kref_put(&pg->kref, release_port_group); 875 } 876 877 /** 878 * alua_rtpg_queue() - cause RTPG to be submitted asynchronously 879 * @pg: ALUA port group associated with @sdev. 880 * @sdev: SCSI device for which to submit an RTPG. 881 * @qdata: Information about the callback to invoke after the RTPG. 882 * @force: Whether or not to submit an RTPG if a work item that will submit an 883 * RTPG already has been scheduled. 884 * 885 * Returns true if and only if alua_rtpg_work() will be called asynchronously. 886 * That function is responsible for calling @qdata->fn(). 887 */ 888 static bool alua_rtpg_queue(struct alua_port_group *pg, 889 struct scsi_device *sdev, 890 struct alua_queue_data *qdata, bool force) 891 { 892 int start_queue = 0; 893 unsigned long flags; 894 if (WARN_ON_ONCE(!pg) || scsi_device_get(sdev)) 895 return false; 896 897 spin_lock_irqsave(&pg->lock, flags); 898 if (qdata) { 899 list_add_tail(&qdata->entry, &pg->rtpg_list); 900 pg->flags |= ALUA_PG_RUN_STPG; 901 force = true; 902 } 903 if (pg->rtpg_sdev == NULL) { 904 pg->interval = 0; 905 pg->flags |= ALUA_PG_RUN_RTPG; 906 kref_get(&pg->kref); 907 pg->rtpg_sdev = sdev; 908 start_queue = 1; 909 } else if (!(pg->flags & ALUA_PG_RUN_RTPG) && force) { 910 pg->flags |= ALUA_PG_RUN_RTPG; 911 /* Do not queue if the worker is already running */ 912 if (!(pg->flags & ALUA_PG_RUNNING)) { 913 kref_get(&pg->kref); 914 start_queue = 1; 915 } 916 } 917 918 spin_unlock_irqrestore(&pg->lock, flags); 919 920 if (start_queue) { 921 if (queue_delayed_work(kaluad_wq, &pg->rtpg_work, 922 msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS))) 923 sdev = NULL; 924 else 925 kref_put(&pg->kref, release_port_group); 926 } 927 if (sdev) 928 scsi_device_put(sdev); 929 930 return true; 931 } 932 933 /* 934 * alua_initialize - Initialize ALUA state 935 * @sdev: the device to be initialized 936 * 937 * For the prep_fn to work correctly we have 938 * to initialize the ALUA state for the device. 939 */ 940 static int alua_initialize(struct scsi_device *sdev, struct alua_dh_data *h) 941 { 942 int err = SCSI_DH_DEV_UNSUPP, tpgs; 943 944 mutex_lock(&h->init_mutex); 945 tpgs = alua_check_tpgs(sdev); 946 if (tpgs != TPGS_MODE_NONE) 947 err = alua_check_vpd(sdev, h, tpgs); 948 h->init_error = err; 949 mutex_unlock(&h->init_mutex); 950 return err; 951 } 952 /* 953 * alua_set_params - set/unset the optimize flag 954 * @sdev: device on the path to be activated 955 * params - parameters in the following format 956 * "no_of_params\0param1\0param2\0param3\0...\0" 957 * For example, to set the flag pass the following parameters 958 * from multipath.conf 959 * hardware_handler "2 alua 1" 960 */ 961 static int alua_set_params(struct scsi_device *sdev, const char *params) 962 { 963 struct alua_dh_data *h = sdev->handler_data; 964 struct alua_port_group *pg = NULL; 965 unsigned int optimize = 0, argc; 966 const char *p = params; 967 int result = SCSI_DH_OK; 968 unsigned long flags; 969 970 if ((sscanf(params, "%u", &argc) != 1) || (argc != 1)) 971 return -EINVAL; 972 973 while (*p++) 974 ; 975 if ((sscanf(p, "%u", &optimize) != 1) || (optimize > 1)) 976 return -EINVAL; 977 978 rcu_read_lock(); 979 pg = rcu_dereference(h->pg); 980 if (!pg) { 981 rcu_read_unlock(); 982 return -ENXIO; 983 } 984 spin_lock_irqsave(&pg->lock, flags); 985 if (optimize) 986 pg->flags |= ALUA_OPTIMIZE_STPG; 987 else 988 pg->flags &= ~ALUA_OPTIMIZE_STPG; 989 spin_unlock_irqrestore(&pg->lock, flags); 990 rcu_read_unlock(); 991 992 return result; 993 } 994 995 /* 996 * alua_activate - activate a path 997 * @sdev: device on the path to be activated 998 * 999 * We're currently switching the port group to be activated only and 1000 * let the array figure out the rest. 1001 * There may be other arrays which require us to switch all port groups 1002 * based on a certain policy. But until we actually encounter them it 1003 * should be okay. 1004 */ 1005 static int alua_activate(struct scsi_device *sdev, 1006 activate_complete fn, void *data) 1007 { 1008 struct alua_dh_data *h = sdev->handler_data; 1009 int err = SCSI_DH_OK; 1010 struct alua_queue_data *qdata; 1011 struct alua_port_group *pg; 1012 1013 qdata = kzalloc(sizeof(*qdata), GFP_KERNEL); 1014 if (!qdata) { 1015 err = SCSI_DH_RES_TEMP_UNAVAIL; 1016 goto out; 1017 } 1018 qdata->callback_fn = fn; 1019 qdata->callback_data = data; 1020 1021 mutex_lock(&h->init_mutex); 1022 rcu_read_lock(); 1023 pg = rcu_dereference(h->pg); 1024 if (!pg || !kref_get_unless_zero(&pg->kref)) { 1025 rcu_read_unlock(); 1026 kfree(qdata); 1027 err = h->init_error; 1028 mutex_unlock(&h->init_mutex); 1029 goto out; 1030 } 1031 rcu_read_unlock(); 1032 mutex_unlock(&h->init_mutex); 1033 1034 if (alua_rtpg_queue(pg, sdev, qdata, true)) 1035 fn = NULL; 1036 else 1037 err = SCSI_DH_DEV_OFFLINED; 1038 kref_put(&pg->kref, release_port_group); 1039 out: 1040 if (fn) 1041 fn(data, err); 1042 return 0; 1043 } 1044 1045 /* 1046 * alua_check - check path status 1047 * @sdev: device on the path to be checked 1048 * 1049 * Check the device status 1050 */ 1051 static void alua_check(struct scsi_device *sdev, bool force) 1052 { 1053 struct alua_dh_data *h = sdev->handler_data; 1054 struct alua_port_group *pg; 1055 1056 rcu_read_lock(); 1057 pg = rcu_dereference(h->pg); 1058 if (!pg || !kref_get_unless_zero(&pg->kref)) { 1059 rcu_read_unlock(); 1060 return; 1061 } 1062 rcu_read_unlock(); 1063 1064 alua_rtpg_queue(pg, sdev, NULL, force); 1065 kref_put(&pg->kref, release_port_group); 1066 } 1067 1068 /* 1069 * alua_prep_fn - request callback 1070 * 1071 * Fail I/O to all paths not in state 1072 * active/optimized or active/non-optimized. 1073 */ 1074 static int alua_prep_fn(struct scsi_device *sdev, struct request *req) 1075 { 1076 struct alua_dh_data *h = sdev->handler_data; 1077 struct alua_port_group *pg; 1078 unsigned char state = SCSI_ACCESS_STATE_OPTIMAL; 1079 int ret = BLKPREP_OK; 1080 1081 rcu_read_lock(); 1082 pg = rcu_dereference(h->pg); 1083 if (pg) 1084 state = pg->state; 1085 rcu_read_unlock(); 1086 if (state == SCSI_ACCESS_STATE_TRANSITIONING) 1087 ret = BLKPREP_DEFER; 1088 else if (state != SCSI_ACCESS_STATE_OPTIMAL && 1089 state != SCSI_ACCESS_STATE_ACTIVE && 1090 state != SCSI_ACCESS_STATE_LBA) { 1091 ret = BLKPREP_KILL; 1092 req->rq_flags |= RQF_QUIET; 1093 } 1094 return ret; 1095 1096 } 1097 1098 static void alua_rescan(struct scsi_device *sdev) 1099 { 1100 struct alua_dh_data *h = sdev->handler_data; 1101 1102 alua_initialize(sdev, h); 1103 } 1104 1105 /* 1106 * alua_bus_attach - Attach device handler 1107 * @sdev: device to be attached to 1108 */ 1109 static int alua_bus_attach(struct scsi_device *sdev) 1110 { 1111 struct alua_dh_data *h; 1112 int err; 1113 1114 h = kzalloc(sizeof(*h) , GFP_KERNEL); 1115 if (!h) 1116 return SCSI_DH_NOMEM; 1117 spin_lock_init(&h->pg_lock); 1118 rcu_assign_pointer(h->pg, NULL); 1119 h->init_error = SCSI_DH_OK; 1120 h->sdev = sdev; 1121 INIT_LIST_HEAD(&h->node); 1122 1123 mutex_init(&h->init_mutex); 1124 err = alua_initialize(sdev, h); 1125 if (err != SCSI_DH_OK && err != SCSI_DH_DEV_OFFLINED) 1126 goto failed; 1127 1128 sdev->handler_data = h; 1129 return SCSI_DH_OK; 1130 failed: 1131 kfree(h); 1132 return err; 1133 } 1134 1135 /* 1136 * alua_bus_detach - Detach device handler 1137 * @sdev: device to be detached from 1138 */ 1139 static void alua_bus_detach(struct scsi_device *sdev) 1140 { 1141 struct alua_dh_data *h = sdev->handler_data; 1142 struct alua_port_group *pg; 1143 1144 spin_lock(&h->pg_lock); 1145 pg = rcu_dereference_protected(h->pg, lockdep_is_held(&h->pg_lock)); 1146 rcu_assign_pointer(h->pg, NULL); 1147 h->sdev = NULL; 1148 spin_unlock(&h->pg_lock); 1149 if (pg) { 1150 spin_lock_irq(&pg->lock); 1151 list_del_rcu(&h->node); 1152 spin_unlock_irq(&pg->lock); 1153 kref_put(&pg->kref, release_port_group); 1154 } 1155 sdev->handler_data = NULL; 1156 kfree(h); 1157 } 1158 1159 static struct scsi_device_handler alua_dh = { 1160 .name = ALUA_DH_NAME, 1161 .module = THIS_MODULE, 1162 .attach = alua_bus_attach, 1163 .detach = alua_bus_detach, 1164 .prep_fn = alua_prep_fn, 1165 .check_sense = alua_check_sense, 1166 .activate = alua_activate, 1167 .rescan = alua_rescan, 1168 .set_params = alua_set_params, 1169 }; 1170 1171 static int __init alua_init(void) 1172 { 1173 int r; 1174 1175 kaluad_wq = alloc_workqueue("kaluad", WQ_MEM_RECLAIM, 0); 1176 if (!kaluad_wq) { 1177 /* Temporary failure, bypass */ 1178 return SCSI_DH_DEV_TEMP_BUSY; 1179 } 1180 1181 r = scsi_register_device_handler(&alua_dh); 1182 if (r != 0) { 1183 printk(KERN_ERR "%s: Failed to register scsi device handler", 1184 ALUA_DH_NAME); 1185 destroy_workqueue(kaluad_wq); 1186 } 1187 return r; 1188 } 1189 1190 static void __exit alua_exit(void) 1191 { 1192 scsi_unregister_device_handler(&alua_dh); 1193 destroy_workqueue(kaluad_wq); 1194 } 1195 1196 module_init(alua_init); 1197 module_exit(alua_exit); 1198 1199 MODULE_DESCRIPTION("DM Multipath ALUA support"); 1200 MODULE_AUTHOR("Hannes Reinecke <hare@suse.de>"); 1201 MODULE_LICENSE("GPL"); 1202 MODULE_VERSION(ALUA_DH_VER); 1203