xref: /linux/drivers/scsi/scsi_error.c (revision 14b42963f64b98ab61fa9723c03d71aa5ef4f862)
1 /*
2  *  scsi_error.c Copyright (C) 1997 Eric Youngdale
3  *
4  *  SCSI error/timeout handling
5  *      Initial versions: Eric Youngdale.  Based upon conversations with
6  *                        Leonard Zubkoff and David Miller at Linux Expo,
7  *                        ideas originating from all over the place.
8  *
9  *	Restructured scsi_unjam_host and associated functions.
10  *	September 04, 2002 Mike Anderson (andmike@us.ibm.com)
11  *
12  *	Forward port of Russell King's (rmk@arm.linux.org.uk) changes and
13  *	minor  cleanups.
14  *	September 30, 2002 Mike Anderson (andmike@us.ibm.com)
15  */
16 
17 #include <linux/module.h>
18 #include <linux/sched.h>
19 #include <linux/timer.h>
20 #include <linux/string.h>
21 #include <linux/slab.h>
22 #include <linux/kernel.h>
23 #include <linux/kthread.h>
24 #include <linux/interrupt.h>
25 #include <linux/blkdev.h>
26 #include <linux/delay.h>
27 
28 #include <scsi/scsi.h>
29 #include <scsi/scsi_cmnd.h>
30 #include <scsi/scsi_dbg.h>
31 #include <scsi/scsi_device.h>
32 #include <scsi/scsi_eh.h>
33 #include <scsi/scsi_transport.h>
34 #include <scsi/scsi_host.h>
35 #include <scsi/scsi_ioctl.h>
36 
37 #include "scsi_priv.h"
38 #include "scsi_logging.h"
39 
40 #define SENSE_TIMEOUT		(10*HZ)
41 #define START_UNIT_TIMEOUT	(30*HZ)
42 
43 /*
44  * These should *probably* be handled by the host itself.
45  * Since it is allowed to sleep, it probably should.
46  */
47 #define BUS_RESET_SETTLE_TIME   (10)
48 #define HOST_RESET_SETTLE_TIME  (10)
49 
50 /* called with shost->host_lock held */
51 void scsi_eh_wakeup(struct Scsi_Host *shost)
52 {
53 	if (shost->host_busy == shost->host_failed) {
54 		wake_up_process(shost->ehandler);
55 		SCSI_LOG_ERROR_RECOVERY(5,
56 				printk("Waking error handler thread\n"));
57 	}
58 }
59 
60 /**
61  * scsi_schedule_eh - schedule EH for SCSI host
62  * @shost:	SCSI host to invoke error handling on.
63  *
64  * Schedule SCSI EH without scmd.
65  **/
66 void scsi_schedule_eh(struct Scsi_Host *shost)
67 {
68 	unsigned long flags;
69 
70 	spin_lock_irqsave(shost->host_lock, flags);
71 
72 	if (scsi_host_set_state(shost, SHOST_RECOVERY) == 0 ||
73 	    scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY) == 0) {
74 		shost->host_eh_scheduled++;
75 		scsi_eh_wakeup(shost);
76 	}
77 
78 	spin_unlock_irqrestore(shost->host_lock, flags);
79 }
80 EXPORT_SYMBOL_GPL(scsi_schedule_eh);
81 
82 /**
83  * scsi_eh_scmd_add - add scsi cmd to error handling.
84  * @scmd:	scmd to run eh on.
85  * @eh_flag:	optional SCSI_EH flag.
86  *
87  * Return value:
88  *	0 on failure.
89  **/
90 int scsi_eh_scmd_add(struct scsi_cmnd *scmd, int eh_flag)
91 {
92 	struct Scsi_Host *shost = scmd->device->host;
93 	unsigned long flags;
94 	int ret = 0;
95 
96 	if (!shost->ehandler)
97 		return 0;
98 
99 	spin_lock_irqsave(shost->host_lock, flags);
100 	if (scsi_host_set_state(shost, SHOST_RECOVERY))
101 		if (scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY))
102 			goto out_unlock;
103 
104 	ret = 1;
105 	scmd->eh_eflags |= eh_flag;
106 	list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q);
107 	shost->host_failed++;
108 	scsi_eh_wakeup(shost);
109  out_unlock:
110 	spin_unlock_irqrestore(shost->host_lock, flags);
111 	return ret;
112 }
113 
114 /**
115  * scsi_add_timer - Start timeout timer for a single scsi command.
116  * @scmd:	scsi command that is about to start running.
117  * @timeout:	amount of time to allow this command to run.
118  * @complete:	timeout function to call if timer isn't canceled.
119  *
120  * Notes:
121  *    This should be turned into an inline function.  Each scsi command
122  *    has its own timer, and as it is added to the queue, we set up the
123  *    timer.  When the command completes, we cancel the timer.
124  **/
125 void scsi_add_timer(struct scsi_cmnd *scmd, int timeout,
126 		    void (*complete)(struct scsi_cmnd *))
127 {
128 
129 	/*
130 	 * If the clock was already running for this command, then
131 	 * first delete the timer.  The timer handling code gets rather
132 	 * confused if we don't do this.
133 	 */
134 	if (scmd->eh_timeout.function)
135 		del_timer(&scmd->eh_timeout);
136 
137 	scmd->eh_timeout.data = (unsigned long)scmd;
138 	scmd->eh_timeout.expires = jiffies + timeout;
139 	scmd->eh_timeout.function = (void (*)(unsigned long)) complete;
140 
141 	SCSI_LOG_ERROR_RECOVERY(5, printk("%s: scmd: %p, time:"
142 					  " %d, (%p)\n", __FUNCTION__,
143 					  scmd, timeout, complete));
144 
145 	add_timer(&scmd->eh_timeout);
146 }
147 
148 /**
149  * scsi_delete_timer - Delete/cancel timer for a given function.
150  * @scmd:	Cmd that we are canceling timer for
151  *
152  * Notes:
153  *     This should be turned into an inline function.
154  *
155  * Return value:
156  *     1 if we were able to detach the timer.  0 if we blew it, and the
157  *     timer function has already started to run.
158  **/
159 int scsi_delete_timer(struct scsi_cmnd *scmd)
160 {
161 	int rtn;
162 
163 	rtn = del_timer(&scmd->eh_timeout);
164 
165 	SCSI_LOG_ERROR_RECOVERY(5, printk("%s: scmd: %p,"
166 					 " rtn: %d\n", __FUNCTION__,
167 					 scmd, rtn));
168 
169 	scmd->eh_timeout.data = (unsigned long)NULL;
170 	scmd->eh_timeout.function = NULL;
171 
172 	return rtn;
173 }
174 
175 /**
176  * scsi_times_out - Timeout function for normal scsi commands.
177  * @scmd:	Cmd that is timing out.
178  *
179  * Notes:
180  *     We do not need to lock this.  There is the potential for a race
181  *     only in that the normal completion handling might run, but if the
182  *     normal completion function determines that the timer has already
183  *     fired, then it mustn't do anything.
184  **/
185 void scsi_times_out(struct scsi_cmnd *scmd)
186 {
187 	scsi_log_completion(scmd, TIMEOUT_ERROR);
188 
189 	if (scmd->device->host->transportt->eh_timed_out)
190 		switch (scmd->device->host->transportt->eh_timed_out(scmd)) {
191 		case EH_HANDLED:
192 			__scsi_done(scmd);
193 			return;
194 		case EH_RESET_TIMER:
195 			scsi_add_timer(scmd, scmd->timeout_per_command,
196 				       scsi_times_out);
197 			return;
198 		case EH_NOT_HANDLED:
199 			break;
200 		}
201 
202 	if (unlikely(!scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD))) {
203 		scmd->result |= DID_TIME_OUT << 16;
204 		__scsi_done(scmd);
205 	}
206 }
207 
208 /**
209  * scsi_block_when_processing_errors - Prevent cmds from being queued.
210  * @sdev:	Device on which we are performing recovery.
211  *
212  * Description:
213  *     We block until the host is out of error recovery, and then check to
214  *     see whether the host or the device is offline.
215  *
216  * Return value:
217  *     0 when dev was taken offline by error recovery. 1 OK to proceed.
218  **/
219 int scsi_block_when_processing_errors(struct scsi_device *sdev)
220 {
221 	int online;
222 
223 	wait_event(sdev->host->host_wait, !scsi_host_in_recovery(sdev->host));
224 
225 	online = scsi_device_online(sdev);
226 
227 	SCSI_LOG_ERROR_RECOVERY(5, printk("%s: rtn: %d\n", __FUNCTION__,
228 					  online));
229 
230 	return online;
231 }
232 EXPORT_SYMBOL(scsi_block_when_processing_errors);
233 
234 #ifdef CONFIG_SCSI_LOGGING
235 /**
236  * scsi_eh_prt_fail_stats - Log info on failures.
237  * @shost:	scsi host being recovered.
238  * @work_q:	Queue of scsi cmds to process.
239  **/
240 static inline void scsi_eh_prt_fail_stats(struct Scsi_Host *shost,
241 					  struct list_head *work_q)
242 {
243 	struct scsi_cmnd *scmd;
244 	struct scsi_device *sdev;
245 	int total_failures = 0;
246 	int cmd_failed = 0;
247 	int cmd_cancel = 0;
248 	int devices_failed = 0;
249 
250 	shost_for_each_device(sdev, shost) {
251 		list_for_each_entry(scmd, work_q, eh_entry) {
252 			if (scmd->device == sdev) {
253 				++total_failures;
254 				if (scmd->eh_eflags & SCSI_EH_CANCEL_CMD)
255 					++cmd_cancel;
256 				else
257 					++cmd_failed;
258 			}
259 		}
260 
261 		if (cmd_cancel || cmd_failed) {
262 			SCSI_LOG_ERROR_RECOVERY(3,
263 				sdev_printk(KERN_INFO, sdev,
264 					    "%s: cmds failed: %d, cancel: %d\n",
265 					    __FUNCTION__, cmd_failed,
266 					    cmd_cancel));
267 			cmd_cancel = 0;
268 			cmd_failed = 0;
269 			++devices_failed;
270 		}
271 	}
272 
273 	SCSI_LOG_ERROR_RECOVERY(2, printk("Total of %d commands on %d"
274 					  " devices require eh work\n",
275 				  total_failures, devices_failed));
276 }
277 #endif
278 
279 /**
280  * scsi_check_sense - Examine scsi cmd sense
281  * @scmd:	Cmd to have sense checked.
282  *
283  * Return value:
284  * 	SUCCESS or FAILED or NEEDS_RETRY
285  *
286  * Notes:
287  *	When a deferred error is detected the current command has
288  *	not been executed and needs retrying.
289  **/
290 static int scsi_check_sense(struct scsi_cmnd *scmd)
291 {
292 	struct scsi_sense_hdr sshdr;
293 
294 	if (! scsi_command_normalize_sense(scmd, &sshdr))
295 		return FAILED;	/* no valid sense data */
296 
297 	if (scsi_sense_is_deferred(&sshdr))
298 		return NEEDS_RETRY;
299 
300 	/*
301 	 * Previous logic looked for FILEMARK, EOM or ILI which are
302 	 * mainly associated with tapes and returned SUCCESS.
303 	 */
304 	if (sshdr.response_code == 0x70) {
305 		/* fixed format */
306 		if (scmd->sense_buffer[2] & 0xe0)
307 			return SUCCESS;
308 	} else {
309 		/*
310 		 * descriptor format: look for "stream commands sense data
311 		 * descriptor" (see SSC-3). Assume single sense data
312 		 * descriptor. Ignore ILI from SBC-2 READ LONG and WRITE LONG.
313 		 */
314 		if ((sshdr.additional_length > 3) &&
315 		    (scmd->sense_buffer[8] == 0x4) &&
316 		    (scmd->sense_buffer[11] & 0xe0))
317 			return SUCCESS;
318 	}
319 
320 	switch (sshdr.sense_key) {
321 	case NO_SENSE:
322 		return SUCCESS;
323 	case RECOVERED_ERROR:
324 		return /* soft_error */ SUCCESS;
325 
326 	case ABORTED_COMMAND:
327 		return NEEDS_RETRY;
328 	case NOT_READY:
329 	case UNIT_ATTENTION:
330 		/*
331 		 * if we are expecting a cc/ua because of a bus reset that we
332 		 * performed, treat this just as a retry.  otherwise this is
333 		 * information that we should pass up to the upper-level driver
334 		 * so that we can deal with it there.
335 		 */
336 		if (scmd->device->expecting_cc_ua) {
337 			scmd->device->expecting_cc_ua = 0;
338 			return NEEDS_RETRY;
339 		}
340 		/*
341 		 * if the device is in the process of becoming ready, we
342 		 * should retry.
343 		 */
344 		if ((sshdr.asc == 0x04) && (sshdr.ascq == 0x01))
345 			return NEEDS_RETRY;
346 		/*
347 		 * if the device is not started, we need to wake
348 		 * the error handler to start the motor
349 		 */
350 		if (scmd->device->allow_restart &&
351 		    (sshdr.asc == 0x04) && (sshdr.ascq == 0x02))
352 			return FAILED;
353 		return SUCCESS;
354 
355 		/* these three are not supported */
356 	case COPY_ABORTED:
357 	case VOLUME_OVERFLOW:
358 	case MISCOMPARE:
359 		return SUCCESS;
360 
361 	case MEDIUM_ERROR:
362 		return NEEDS_RETRY;
363 
364 	case HARDWARE_ERROR:
365 		if (scmd->device->retry_hwerror)
366 			return NEEDS_RETRY;
367 		else
368 			return SUCCESS;
369 
370 	case ILLEGAL_REQUEST:
371 	case BLANK_CHECK:
372 	case DATA_PROTECT:
373 	default:
374 		return SUCCESS;
375 	}
376 }
377 
378 /**
379  * scsi_eh_completed_normally - Disposition a eh cmd on return from LLD.
380  * @scmd:	SCSI cmd to examine.
381  *
382  * Notes:
383  *    This is *only* called when we are examining the status of commands
384  *    queued during error recovery.  the main difference here is that we
385  *    don't allow for the possibility of retries here, and we are a lot
386  *    more restrictive about what we consider acceptable.
387  **/
388 static int scsi_eh_completed_normally(struct scsi_cmnd *scmd)
389 {
390 	/*
391 	 * first check the host byte, to see if there is anything in there
392 	 * that would indicate what we need to do.
393 	 */
394 	if (host_byte(scmd->result) == DID_RESET) {
395 		/*
396 		 * rats.  we are already in the error handler, so we now
397 		 * get to try and figure out what to do next.  if the sense
398 		 * is valid, we have a pretty good idea of what to do.
399 		 * if not, we mark it as FAILED.
400 		 */
401 		return scsi_check_sense(scmd);
402 	}
403 	if (host_byte(scmd->result) != DID_OK)
404 		return FAILED;
405 
406 	/*
407 	 * next, check the message byte.
408 	 */
409 	if (msg_byte(scmd->result) != COMMAND_COMPLETE)
410 		return FAILED;
411 
412 	/*
413 	 * now, check the status byte to see if this indicates
414 	 * anything special.
415 	 */
416 	switch (status_byte(scmd->result)) {
417 	case GOOD:
418 	case COMMAND_TERMINATED:
419 		return SUCCESS;
420 	case CHECK_CONDITION:
421 		return scsi_check_sense(scmd);
422 	case CONDITION_GOOD:
423 	case INTERMEDIATE_GOOD:
424 	case INTERMEDIATE_C_GOOD:
425 		/*
426 		 * who knows?  FIXME(eric)
427 		 */
428 		return SUCCESS;
429 	case BUSY:
430 	case QUEUE_FULL:
431 	case RESERVATION_CONFLICT:
432 	default:
433 		return FAILED;
434 	}
435 	return FAILED;
436 }
437 
438 /**
439  * scsi_eh_done - Completion function for error handling.
440  * @scmd:	Cmd that is done.
441  **/
442 static void scsi_eh_done(struct scsi_cmnd *scmd)
443 {
444 	struct completion     *eh_action;
445 
446 	SCSI_LOG_ERROR_RECOVERY(3,
447 		printk("%s scmd: %p result: %x\n",
448 			__FUNCTION__, scmd, scmd->result));
449 
450 	eh_action = scmd->device->host->eh_action;
451 	if (eh_action)
452 		complete(eh_action);
453 }
454 
455 /**
456  * scsi_send_eh_cmnd  - send a cmd to a device as part of error recovery.
457  * @scmd:	SCSI Cmd to send.
458  * @timeout:	Timeout for cmd.
459  *
460  * Return value:
461  *    SUCCESS or FAILED or NEEDS_RETRY
462  **/
463 static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, int timeout)
464 {
465 	struct scsi_device *sdev = scmd->device;
466 	struct Scsi_Host *shost = sdev->host;
467 	DECLARE_COMPLETION(done);
468 	unsigned long timeleft;
469 	unsigned long flags;
470 	int rtn;
471 
472 	if (sdev->scsi_level <= SCSI_2)
473 		scmd->cmnd[1] = (scmd->cmnd[1] & 0x1f) |
474 			(sdev->lun << 5 & 0xe0);
475 
476 	shost->eh_action = &done;
477 
478 	spin_lock_irqsave(shost->host_lock, flags);
479 	scsi_log_send(scmd);
480 	shost->hostt->queuecommand(scmd, scsi_eh_done);
481 	spin_unlock_irqrestore(shost->host_lock, flags);
482 
483 	timeleft = wait_for_completion_timeout(&done, timeout);
484 
485 	shost->eh_action = NULL;
486 
487 	scsi_log_completion(scmd, SUCCESS);
488 
489 	SCSI_LOG_ERROR_RECOVERY(3,
490 		printk("%s: scmd: %p, timeleft: %ld\n",
491 			__FUNCTION__, scmd, timeleft));
492 
493 	/*
494 	 * If there is time left scsi_eh_done got called, and we will
495 	 * examine the actual status codes to see whether the command
496 	 * actually did complete normally, else tell the host to forget
497 	 * about this command.
498 	 */
499 	if (timeleft) {
500 		rtn = scsi_eh_completed_normally(scmd);
501 		SCSI_LOG_ERROR_RECOVERY(3,
502 			printk("%s: scsi_eh_completed_normally %x\n",
503 			       __FUNCTION__, rtn));
504 
505 		switch (rtn) {
506 		case SUCCESS:
507 		case NEEDS_RETRY:
508 		case FAILED:
509 			break;
510 		default:
511 			rtn = FAILED;
512 			break;
513 		}
514 	} else {
515 		/*
516 		 * FIXME(eric) - we are not tracking whether we could
517 		 * abort a timed out command or not.  not sure how
518 		 * we should treat them differently anyways.
519 		 */
520 		if (shost->hostt->eh_abort_handler)
521 			shost->hostt->eh_abort_handler(scmd);
522 		rtn = FAILED;
523 	}
524 
525 	return rtn;
526 }
527 
528 /**
529  * scsi_request_sense - Request sense data from a particular target.
530  * @scmd:	SCSI cmd for request sense.
531  *
532  * Notes:
533  *    Some hosts automatically obtain this information, others require
534  *    that we obtain it on our own. This function will *not* return until
535  *    the command either times out, or it completes.
536  **/
537 static int scsi_request_sense(struct scsi_cmnd *scmd)
538 {
539 	static unsigned char generic_sense[6] =
540 	{REQUEST_SENSE, 0, 0, 0, 252, 0};
541 	unsigned char *scsi_result;
542 	int saved_result;
543 	int rtn;
544 
545 	memcpy(scmd->cmnd, generic_sense, sizeof(generic_sense));
546 
547 	scsi_result = kmalloc(252, GFP_ATOMIC | ((scmd->device->host->hostt->unchecked_isa_dma) ? __GFP_DMA : 0));
548 
549 
550 	if (unlikely(!scsi_result)) {
551 		printk(KERN_ERR "%s: cannot allocate scsi_result.\n",
552 		       __FUNCTION__);
553 		return FAILED;
554 	}
555 
556 	/*
557 	 * zero the sense buffer.  some host adapters automatically always
558 	 * request sense, so it is not a good idea that
559 	 * scmd->request_buffer and scmd->sense_buffer point to the same
560 	 * address (db).  0 is not a valid sense code.
561 	 */
562 	memset(scmd->sense_buffer, 0, sizeof(scmd->sense_buffer));
563 	memset(scsi_result, 0, 252);
564 
565 	saved_result = scmd->result;
566 	scmd->request_buffer = scsi_result;
567 	scmd->request_bufflen = 252;
568 	scmd->use_sg = 0;
569 	scmd->cmd_len = COMMAND_SIZE(scmd->cmnd[0]);
570 	scmd->sc_data_direction = DMA_FROM_DEVICE;
571 	scmd->underflow = 0;
572 
573 	rtn = scsi_send_eh_cmnd(scmd, SENSE_TIMEOUT);
574 
575 	/* last chance to have valid sense data */
576 	if(!SCSI_SENSE_VALID(scmd)) {
577 		memcpy(scmd->sense_buffer, scmd->request_buffer,
578 		       sizeof(scmd->sense_buffer));
579 	}
580 
581 	kfree(scsi_result);
582 
583 	/*
584 	 * when we eventually call scsi_finish, we really wish to complete
585 	 * the original request, so let's restore the original data. (db)
586 	 */
587 	scsi_setup_cmd_retry(scmd);
588 	scmd->result = saved_result;
589 	return rtn;
590 }
591 
592 /**
593  * scsi_eh_finish_cmd - Handle a cmd that eh is finished with.
594  * @scmd:	Original SCSI cmd that eh has finished.
595  * @done_q:	Queue for processed commands.
596  *
597  * Notes:
598  *    We don't want to use the normal command completion while we are are
599  *    still handling errors - it may cause other commands to be queued,
600  *    and that would disturb what we are doing.  thus we really want to
601  *    keep a list of pending commands for final completion, and once we
602  *    are ready to leave error handling we handle completion for real.
603  **/
604 void scsi_eh_finish_cmd(struct scsi_cmnd *scmd, struct list_head *done_q)
605 {
606 	scmd->device->host->host_failed--;
607 	scmd->eh_eflags = 0;
608 
609 	/*
610 	 * set this back so that the upper level can correctly free up
611 	 * things.
612 	 */
613 	scsi_setup_cmd_retry(scmd);
614 	list_move_tail(&scmd->eh_entry, done_q);
615 }
616 EXPORT_SYMBOL(scsi_eh_finish_cmd);
617 
618 /**
619  * scsi_eh_get_sense - Get device sense data.
620  * @work_q:	Queue of commands to process.
621  * @done_q:	Queue of proccessed commands..
622  *
623  * Description:
624  *    See if we need to request sense information.  if so, then get it
625  *    now, so we have a better idea of what to do.
626  *
627  * Notes:
628  *    This has the unfortunate side effect that if a shost adapter does
629  *    not automatically request sense information, that we end up shutting
630  *    it down before we request it.
631  *
632  *    All drivers should request sense information internally these days,
633  *    so for now all I have to say is tough noogies if you end up in here.
634  *
635  *    XXX: Long term this code should go away, but that needs an audit of
636  *         all LLDDs first.
637  **/
638 static int scsi_eh_get_sense(struct list_head *work_q,
639 			     struct list_head *done_q)
640 {
641 	struct scsi_cmnd *scmd, *next;
642 	int rtn;
643 
644 	list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
645 		if ((scmd->eh_eflags & SCSI_EH_CANCEL_CMD) ||
646 		    SCSI_SENSE_VALID(scmd))
647 			continue;
648 
649 		SCSI_LOG_ERROR_RECOVERY(2, scmd_printk(KERN_INFO, scmd,
650 						  "%s: requesting sense\n",
651 						  current->comm));
652 		rtn = scsi_request_sense(scmd);
653 		if (rtn != SUCCESS)
654 			continue;
655 
656 		SCSI_LOG_ERROR_RECOVERY(3, printk("sense requested for %p"
657 						  " result %x\n", scmd,
658 						  scmd->result));
659 		SCSI_LOG_ERROR_RECOVERY(3, scsi_print_sense("bh", scmd));
660 
661 		rtn = scsi_decide_disposition(scmd);
662 
663 		/*
664 		 * if the result was normal, then just pass it along to the
665 		 * upper level.
666 		 */
667 		if (rtn == SUCCESS)
668 			/* we don't want this command reissued, just
669 			 * finished with the sense data, so set
670 			 * retries to the max allowed to ensure it
671 			 * won't get reissued */
672 			scmd->retries = scmd->allowed;
673 		else if (rtn != NEEDS_RETRY)
674 			continue;
675 
676 		scsi_eh_finish_cmd(scmd, done_q);
677 	}
678 
679 	return list_empty(work_q);
680 }
681 
682 /**
683  * scsi_try_to_abort_cmd - Ask host to abort a running command.
684  * @scmd:	SCSI cmd to abort from Lower Level.
685  *
686  * Notes:
687  *    This function will not return until the user's completion function
688  *    has been called.  there is no timeout on this operation.  if the
689  *    author of the low-level driver wishes this operation to be timed,
690  *    they can provide this facility themselves.  helper functions in
691  *    scsi_error.c can be supplied to make this easier to do.
692  **/
693 static int scsi_try_to_abort_cmd(struct scsi_cmnd *scmd)
694 {
695 	if (!scmd->device->host->hostt->eh_abort_handler)
696 		return FAILED;
697 
698 	/*
699 	 * scsi_done was called just after the command timed out and before
700 	 * we had a chance to process it. (db)
701 	 */
702 	if (scmd->serial_number == 0)
703 		return SUCCESS;
704 	return scmd->device->host->hostt->eh_abort_handler(scmd);
705 }
706 
707 /**
708  * scsi_eh_tur - Send TUR to device.
709  * @scmd:	Scsi cmd to send TUR
710  *
711  * Return value:
712  *    0 - Device is ready. 1 - Device NOT ready.
713  **/
714 static int scsi_eh_tur(struct scsi_cmnd *scmd)
715 {
716 	static unsigned char tur_command[6] = {TEST_UNIT_READY, 0, 0, 0, 0, 0};
717 	int retry_cnt = 1, rtn;
718 	int saved_result;
719 
720 retry_tur:
721 	memcpy(scmd->cmnd, tur_command, sizeof(tur_command));
722 
723 	/*
724 	 * zero the sense buffer.  the scsi spec mandates that any
725 	 * untransferred sense data should be interpreted as being zero.
726 	 */
727 	memset(scmd->sense_buffer, 0, sizeof(scmd->sense_buffer));
728 
729 	saved_result = scmd->result;
730 	scmd->request_buffer = NULL;
731 	scmd->request_bufflen = 0;
732 	scmd->use_sg = 0;
733 	scmd->cmd_len = COMMAND_SIZE(scmd->cmnd[0]);
734 	scmd->underflow = 0;
735 	scmd->sc_data_direction = DMA_NONE;
736 
737 	rtn = scsi_send_eh_cmnd(scmd, SENSE_TIMEOUT);
738 
739 	/*
740 	 * when we eventually call scsi_finish, we really wish to complete
741 	 * the original request, so let's restore the original data. (db)
742 	 */
743 	scsi_setup_cmd_retry(scmd);
744 	scmd->result = saved_result;
745 
746 	/*
747 	 * hey, we are done.  let's look to see what happened.
748 	 */
749 	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd %p rtn %x\n",
750 		__FUNCTION__, scmd, rtn));
751 	if (rtn == SUCCESS)
752 		return 0;
753 	else if (rtn == NEEDS_RETRY) {
754 		if (retry_cnt--)
755 			goto retry_tur;
756 		return 0;
757 	}
758 	return 1;
759 }
760 
761 /**
762  * scsi_eh_abort_cmds - abort canceled commands.
763  * @shost:	scsi host being recovered.
764  * @eh_done_q:	list_head for processed commands.
765  *
766  * Decription:
767  *    Try and see whether or not it makes sense to try and abort the
768  *    running command.  this only works out to be the case if we have one
769  *    command that has timed out.  if the command simply failed, it makes
770  *    no sense to try and abort the command, since as far as the shost
771  *    adapter is concerned, it isn't running.
772  **/
773 static int scsi_eh_abort_cmds(struct list_head *work_q,
774 			      struct list_head *done_q)
775 {
776 	struct scsi_cmnd *scmd, *next;
777 	int rtn;
778 
779 	list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
780 		if (!(scmd->eh_eflags & SCSI_EH_CANCEL_CMD))
781 			continue;
782 		SCSI_LOG_ERROR_RECOVERY(3, printk("%s: aborting cmd:"
783 						  "0x%p\n", current->comm,
784 						  scmd));
785 		rtn = scsi_try_to_abort_cmd(scmd);
786 		if (rtn == SUCCESS) {
787 			scmd->eh_eflags &= ~SCSI_EH_CANCEL_CMD;
788 			if (!scsi_device_online(scmd->device) ||
789 			    !scsi_eh_tur(scmd)) {
790 				scsi_eh_finish_cmd(scmd, done_q);
791 			}
792 
793 		} else
794 			SCSI_LOG_ERROR_RECOVERY(3, printk("%s: aborting"
795 							  " cmd failed:"
796 							  "0x%p\n",
797 							  current->comm,
798 							  scmd));
799 	}
800 
801 	return list_empty(work_q);
802 }
803 
804 /**
805  * scsi_try_bus_device_reset - Ask host to perform a BDR on a dev
806  * @scmd:	SCSI cmd used to send BDR
807  *
808  * Notes:
809  *    There is no timeout for this operation.  if this operation is
810  *    unreliable for a given host, then the host itself needs to put a
811  *    timer on it, and set the host back to a consistent state prior to
812  *    returning.
813  **/
814 static int scsi_try_bus_device_reset(struct scsi_cmnd *scmd)
815 {
816 	int rtn;
817 
818 	if (!scmd->device->host->hostt->eh_device_reset_handler)
819 		return FAILED;
820 
821 	rtn = scmd->device->host->hostt->eh_device_reset_handler(scmd);
822 	if (rtn == SUCCESS) {
823 		scmd->device->was_reset = 1;
824 		scmd->device->expecting_cc_ua = 1;
825 	}
826 
827 	return rtn;
828 }
829 
830 /**
831  * scsi_eh_try_stu - Send START_UNIT to device.
832  * @scmd:	Scsi cmd to send START_UNIT
833  *
834  * Return value:
835  *    0 - Device is ready. 1 - Device NOT ready.
836  **/
837 static int scsi_eh_try_stu(struct scsi_cmnd *scmd)
838 {
839 	static unsigned char stu_command[6] = {START_STOP, 0, 0, 0, 1, 0};
840 	int rtn;
841 	int saved_result;
842 
843 	if (!scmd->device->allow_restart)
844 		return 1;
845 
846 	memcpy(scmd->cmnd, stu_command, sizeof(stu_command));
847 
848 	/*
849 	 * zero the sense buffer.  the scsi spec mandates that any
850 	 * untransferred sense data should be interpreted as being zero.
851 	 */
852 	memset(scmd->sense_buffer, 0, sizeof(scmd->sense_buffer));
853 
854 	saved_result = scmd->result;
855 	scmd->request_buffer = NULL;
856 	scmd->request_bufflen = 0;
857 	scmd->use_sg = 0;
858 	scmd->cmd_len = COMMAND_SIZE(scmd->cmnd[0]);
859 	scmd->underflow = 0;
860 	scmd->sc_data_direction = DMA_NONE;
861 
862 	rtn = scsi_send_eh_cmnd(scmd, START_UNIT_TIMEOUT);
863 
864 	/*
865 	 * when we eventually call scsi_finish, we really wish to complete
866 	 * the original request, so let's restore the original data. (db)
867 	 */
868 	scsi_setup_cmd_retry(scmd);
869 	scmd->result = saved_result;
870 
871 	/*
872 	 * hey, we are done.  let's look to see what happened.
873 	 */
874 	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd %p rtn %x\n",
875 		__FUNCTION__, scmd, rtn));
876 	if (rtn == SUCCESS)
877 		return 0;
878 	return 1;
879 }
880 
881  /**
882  * scsi_eh_stu - send START_UNIT if needed
883  * @shost:	scsi host being recovered.
884  * @eh_done_q:	list_head for processed commands.
885  *
886  * Notes:
887  *    If commands are failing due to not ready, initializing command required,
888  *	try revalidating the device, which will end up sending a start unit.
889  **/
890 static int scsi_eh_stu(struct Scsi_Host *shost,
891 			      struct list_head *work_q,
892 			      struct list_head *done_q)
893 {
894 	struct scsi_cmnd *scmd, *stu_scmd, *next;
895 	struct scsi_device *sdev;
896 
897 	shost_for_each_device(sdev, shost) {
898 		stu_scmd = NULL;
899 		list_for_each_entry(scmd, work_q, eh_entry)
900 			if (scmd->device == sdev && SCSI_SENSE_VALID(scmd) &&
901 			    scsi_check_sense(scmd) == FAILED ) {
902 				stu_scmd = scmd;
903 				break;
904 			}
905 
906 		if (!stu_scmd)
907 			continue;
908 
909 		SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending START_UNIT to sdev:"
910 						  " 0x%p\n", current->comm, sdev));
911 
912 		if (!scsi_eh_try_stu(stu_scmd)) {
913 			if (!scsi_device_online(sdev) ||
914 			    !scsi_eh_tur(stu_scmd)) {
915 				list_for_each_entry_safe(scmd, next,
916 							  work_q, eh_entry) {
917 					if (scmd->device == sdev)
918 						scsi_eh_finish_cmd(scmd, done_q);
919 				}
920 			}
921 		} else {
922 			SCSI_LOG_ERROR_RECOVERY(3,
923 						printk("%s: START_UNIT failed to sdev:"
924 						       " 0x%p\n", current->comm, sdev));
925 		}
926 	}
927 
928 	return list_empty(work_q);
929 }
930 
931 
932 /**
933  * scsi_eh_bus_device_reset - send bdr if needed
934  * @shost:	scsi host being recovered.
935  * @eh_done_q:	list_head for processed commands.
936  *
937  * Notes:
938  *    Try a bus device reset.  still, look to see whether we have multiple
939  *    devices that are jammed or not - if we have multiple devices, it
940  *    makes no sense to try bus_device_reset - we really would need to try
941  *    a bus_reset instead.
942  **/
943 static int scsi_eh_bus_device_reset(struct Scsi_Host *shost,
944 				    struct list_head *work_q,
945 				    struct list_head *done_q)
946 {
947 	struct scsi_cmnd *scmd, *bdr_scmd, *next;
948 	struct scsi_device *sdev;
949 	int rtn;
950 
951 	shost_for_each_device(sdev, shost) {
952 		bdr_scmd = NULL;
953 		list_for_each_entry(scmd, work_q, eh_entry)
954 			if (scmd->device == sdev) {
955 				bdr_scmd = scmd;
956 				break;
957 			}
958 
959 		if (!bdr_scmd)
960 			continue;
961 
962 		SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending BDR sdev:"
963 						  " 0x%p\n", current->comm,
964 						  sdev));
965 		rtn = scsi_try_bus_device_reset(bdr_scmd);
966 		if (rtn == SUCCESS) {
967 			if (!scsi_device_online(sdev) ||
968 			    !scsi_eh_tur(bdr_scmd)) {
969 				list_for_each_entry_safe(scmd, next,
970 							 work_q, eh_entry) {
971 					if (scmd->device == sdev)
972 						scsi_eh_finish_cmd(scmd,
973 								   done_q);
974 				}
975 			}
976 		} else {
977 			SCSI_LOG_ERROR_RECOVERY(3, printk("%s: BDR"
978 							  " failed sdev:"
979 							  "0x%p\n",
980 							  current->comm,
981 							   sdev));
982 		}
983 	}
984 
985 	return list_empty(work_q);
986 }
987 
988 /**
989  * scsi_try_bus_reset - ask host to perform a bus reset
990  * @scmd:	SCSI cmd to send bus reset.
991  **/
992 static int scsi_try_bus_reset(struct scsi_cmnd *scmd)
993 {
994 	unsigned long flags;
995 	int rtn;
996 
997 	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Snd Bus RST\n",
998 					  __FUNCTION__));
999 
1000 	if (!scmd->device->host->hostt->eh_bus_reset_handler)
1001 		return FAILED;
1002 
1003 	rtn = scmd->device->host->hostt->eh_bus_reset_handler(scmd);
1004 
1005 	if (rtn == SUCCESS) {
1006 		if (!scmd->device->host->hostt->skip_settle_delay)
1007 			ssleep(BUS_RESET_SETTLE_TIME);
1008 		spin_lock_irqsave(scmd->device->host->host_lock, flags);
1009 		scsi_report_bus_reset(scmd->device->host,
1010 				      scmd_channel(scmd));
1011 		spin_unlock_irqrestore(scmd->device->host->host_lock, flags);
1012 	}
1013 
1014 	return rtn;
1015 }
1016 
1017 /**
1018  * scsi_try_host_reset - ask host adapter to reset itself
1019  * @scmd:	SCSI cmd to send hsot reset.
1020  **/
1021 static int scsi_try_host_reset(struct scsi_cmnd *scmd)
1022 {
1023 	unsigned long flags;
1024 	int rtn;
1025 
1026 	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Snd Host RST\n",
1027 					  __FUNCTION__));
1028 
1029 	if (!scmd->device->host->hostt->eh_host_reset_handler)
1030 		return FAILED;
1031 
1032 	rtn = scmd->device->host->hostt->eh_host_reset_handler(scmd);
1033 
1034 	if (rtn == SUCCESS) {
1035 		if (!scmd->device->host->hostt->skip_settle_delay)
1036 			ssleep(HOST_RESET_SETTLE_TIME);
1037 		spin_lock_irqsave(scmd->device->host->host_lock, flags);
1038 		scsi_report_bus_reset(scmd->device->host,
1039 				      scmd_channel(scmd));
1040 		spin_unlock_irqrestore(scmd->device->host->host_lock, flags);
1041 	}
1042 
1043 	return rtn;
1044 }
1045 
1046 /**
1047  * scsi_eh_bus_reset - send a bus reset
1048  * @shost:	scsi host being recovered.
1049  * @eh_done_q:	list_head for processed commands.
1050  **/
1051 static int scsi_eh_bus_reset(struct Scsi_Host *shost,
1052 			     struct list_head *work_q,
1053 			     struct list_head *done_q)
1054 {
1055 	struct scsi_cmnd *scmd, *chan_scmd, *next;
1056 	unsigned int channel;
1057 	int rtn;
1058 
1059 	/*
1060 	 * we really want to loop over the various channels, and do this on
1061 	 * a channel by channel basis.  we should also check to see if any
1062 	 * of the failed commands are on soft_reset devices, and if so, skip
1063 	 * the reset.
1064 	 */
1065 
1066 	for (channel = 0; channel <= shost->max_channel; channel++) {
1067 		chan_scmd = NULL;
1068 		list_for_each_entry(scmd, work_q, eh_entry) {
1069 			if (channel == scmd_channel(scmd)) {
1070 				chan_scmd = scmd;
1071 				break;
1072 				/*
1073 				 * FIXME add back in some support for
1074 				 * soft_reset devices.
1075 				 */
1076 			}
1077 		}
1078 
1079 		if (!chan_scmd)
1080 			continue;
1081 		SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending BRST chan:"
1082 						  " %d\n", current->comm,
1083 						  channel));
1084 		rtn = scsi_try_bus_reset(chan_scmd);
1085 		if (rtn == SUCCESS) {
1086 			list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
1087 				if (channel == scmd_channel(scmd))
1088 					if (!scsi_device_online(scmd->device) ||
1089 					    !scsi_eh_tur(scmd))
1090 						scsi_eh_finish_cmd(scmd,
1091 								   done_q);
1092 			}
1093 		} else {
1094 			SCSI_LOG_ERROR_RECOVERY(3, printk("%s: BRST"
1095 							  " failed chan: %d\n",
1096 							  current->comm,
1097 							  channel));
1098 		}
1099 	}
1100 	return list_empty(work_q);
1101 }
1102 
1103 /**
1104  * scsi_eh_host_reset - send a host reset
1105  * @work_q:	list_head for processed commands.
1106  * @done_q:	list_head for processed commands.
1107  **/
1108 static int scsi_eh_host_reset(struct list_head *work_q,
1109 			      struct list_head *done_q)
1110 {
1111 	struct scsi_cmnd *scmd, *next;
1112 	int rtn;
1113 
1114 	if (!list_empty(work_q)) {
1115 		scmd = list_entry(work_q->next,
1116 				  struct scsi_cmnd, eh_entry);
1117 
1118 		SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending HRST\n"
1119 						  , current->comm));
1120 
1121 		rtn = scsi_try_host_reset(scmd);
1122 		if (rtn == SUCCESS) {
1123 			list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
1124 				if (!scsi_device_online(scmd->device) ||
1125 				    (!scsi_eh_try_stu(scmd) && !scsi_eh_tur(scmd)) ||
1126 				    !scsi_eh_tur(scmd))
1127 					scsi_eh_finish_cmd(scmd, done_q);
1128 			}
1129 		} else {
1130 			SCSI_LOG_ERROR_RECOVERY(3, printk("%s: HRST"
1131 							  " failed\n",
1132 							  current->comm));
1133 		}
1134 	}
1135 	return list_empty(work_q);
1136 }
1137 
1138 /**
1139  * scsi_eh_offline_sdevs - offline scsi devices that fail to recover
1140  * @work_q:	list_head for processed commands.
1141  * @done_q:	list_head for processed commands.
1142  *
1143  **/
1144 static void scsi_eh_offline_sdevs(struct list_head *work_q,
1145 				  struct list_head *done_q)
1146 {
1147 	struct scsi_cmnd *scmd, *next;
1148 
1149 	list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
1150 		sdev_printk(KERN_INFO, scmd->device,
1151 			    "scsi: Device offlined - not"
1152 			    " ready after error recovery\n");
1153 		scsi_device_set_state(scmd->device, SDEV_OFFLINE);
1154 		if (scmd->eh_eflags & SCSI_EH_CANCEL_CMD) {
1155 			/*
1156 			 * FIXME: Handle lost cmds.
1157 			 */
1158 		}
1159 		scsi_eh_finish_cmd(scmd, done_q);
1160 	}
1161 	return;
1162 }
1163 
1164 /**
1165  * scsi_decide_disposition - Disposition a cmd on return from LLD.
1166  * @scmd:	SCSI cmd to examine.
1167  *
1168  * Notes:
1169  *    This is *only* called when we are examining the status after sending
1170  *    out the actual data command.  any commands that are queued for error
1171  *    recovery (e.g. test_unit_ready) do *not* come through here.
1172  *
1173  *    When this routine returns failed, it means the error handler thread
1174  *    is woken.  In cases where the error code indicates an error that
1175  *    doesn't require the error handler read (i.e. we don't need to
1176  *    abort/reset), this function should return SUCCESS.
1177  **/
1178 int scsi_decide_disposition(struct scsi_cmnd *scmd)
1179 {
1180 	int rtn;
1181 
1182 	/*
1183 	 * if the device is offline, then we clearly just pass the result back
1184 	 * up to the top level.
1185 	 */
1186 	if (!scsi_device_online(scmd->device)) {
1187 		SCSI_LOG_ERROR_RECOVERY(5, printk("%s: device offline - report"
1188 						  " as SUCCESS\n",
1189 						  __FUNCTION__));
1190 		return SUCCESS;
1191 	}
1192 
1193 	/*
1194 	 * first check the host byte, to see if there is anything in there
1195 	 * that would indicate what we need to do.
1196 	 */
1197 	switch (host_byte(scmd->result)) {
1198 	case DID_PASSTHROUGH:
1199 		/*
1200 		 * no matter what, pass this through to the upper layer.
1201 		 * nuke this special code so that it looks like we are saying
1202 		 * did_ok.
1203 		 */
1204 		scmd->result &= 0xff00ffff;
1205 		return SUCCESS;
1206 	case DID_OK:
1207 		/*
1208 		 * looks good.  drop through, and check the next byte.
1209 		 */
1210 		break;
1211 	case DID_NO_CONNECT:
1212 	case DID_BAD_TARGET:
1213 	case DID_ABORT:
1214 		/*
1215 		 * note - this means that we just report the status back
1216 		 * to the top level driver, not that we actually think
1217 		 * that it indicates SUCCESS.
1218 		 */
1219 		return SUCCESS;
1220 		/*
1221 		 * when the low level driver returns did_soft_error,
1222 		 * it is responsible for keeping an internal retry counter
1223 		 * in order to avoid endless loops (db)
1224 		 *
1225 		 * actually this is a bug in this function here.  we should
1226 		 * be mindful of the maximum number of retries specified
1227 		 * and not get stuck in a loop.
1228 		 */
1229 	case DID_SOFT_ERROR:
1230 		goto maybe_retry;
1231 	case DID_IMM_RETRY:
1232 		return NEEDS_RETRY;
1233 
1234 	case DID_REQUEUE:
1235 		return ADD_TO_MLQUEUE;
1236 
1237 	case DID_ERROR:
1238 		if (msg_byte(scmd->result) == COMMAND_COMPLETE &&
1239 		    status_byte(scmd->result) == RESERVATION_CONFLICT)
1240 			/*
1241 			 * execute reservation conflict processing code
1242 			 * lower down
1243 			 */
1244 			break;
1245 		/* fallthrough */
1246 
1247 	case DID_BUS_BUSY:
1248 	case DID_PARITY:
1249 		goto maybe_retry;
1250 	case DID_TIME_OUT:
1251 		/*
1252 		 * when we scan the bus, we get timeout messages for
1253 		 * these commands if there is no device available.
1254 		 * other hosts report did_no_connect for the same thing.
1255 		 */
1256 		if ((scmd->cmnd[0] == TEST_UNIT_READY ||
1257 		     scmd->cmnd[0] == INQUIRY)) {
1258 			return SUCCESS;
1259 		} else {
1260 			return FAILED;
1261 		}
1262 	case DID_RESET:
1263 		return SUCCESS;
1264 	default:
1265 		return FAILED;
1266 	}
1267 
1268 	/*
1269 	 * next, check the message byte.
1270 	 */
1271 	if (msg_byte(scmd->result) != COMMAND_COMPLETE)
1272 		return FAILED;
1273 
1274 	/*
1275 	 * check the status byte to see if this indicates anything special.
1276 	 */
1277 	switch (status_byte(scmd->result)) {
1278 	case QUEUE_FULL:
1279 		/*
1280 		 * the case of trying to send too many commands to a
1281 		 * tagged queueing device.
1282 		 */
1283 	case BUSY:
1284 		/*
1285 		 * device can't talk to us at the moment.  Should only
1286 		 * occur (SAM-3) when the task queue is empty, so will cause
1287 		 * the empty queue handling to trigger a stall in the
1288 		 * device.
1289 		 */
1290 		return ADD_TO_MLQUEUE;
1291 	case GOOD:
1292 	case COMMAND_TERMINATED:
1293 	case TASK_ABORTED:
1294 		return SUCCESS;
1295 	case CHECK_CONDITION:
1296 		rtn = scsi_check_sense(scmd);
1297 		if (rtn == NEEDS_RETRY)
1298 			goto maybe_retry;
1299 		/* if rtn == FAILED, we have no sense information;
1300 		 * returning FAILED will wake the error handler thread
1301 		 * to collect the sense and redo the decide
1302 		 * disposition */
1303 		return rtn;
1304 	case CONDITION_GOOD:
1305 	case INTERMEDIATE_GOOD:
1306 	case INTERMEDIATE_C_GOOD:
1307 	case ACA_ACTIVE:
1308 		/*
1309 		 * who knows?  FIXME(eric)
1310 		 */
1311 		return SUCCESS;
1312 
1313 	case RESERVATION_CONFLICT:
1314 		sdev_printk(KERN_INFO, scmd->device,
1315 			    "reservation conflict\n");
1316 		return SUCCESS; /* causes immediate i/o error */
1317 	default:
1318 		return FAILED;
1319 	}
1320 	return FAILED;
1321 
1322       maybe_retry:
1323 
1324 	/* we requeue for retry because the error was retryable, and
1325 	 * the request was not marked fast fail.  Note that above,
1326 	 * even if the request is marked fast fail, we still requeue
1327 	 * for queue congestion conditions (QUEUE_FULL or BUSY) */
1328 	if ((++scmd->retries) <= scmd->allowed
1329 	    && !blk_noretry_request(scmd->request)) {
1330 		return NEEDS_RETRY;
1331 	} else {
1332 		/*
1333 		 * no more retries - report this one back to upper level.
1334 		 */
1335 		return SUCCESS;
1336 	}
1337 }
1338 
1339 /**
1340  * scsi_eh_lock_door - Prevent medium removal for the specified device
1341  * @sdev:	SCSI device to prevent medium removal
1342  *
1343  * Locking:
1344  * 	We must be called from process context; scsi_allocate_request()
1345  * 	may sleep.
1346  *
1347  * Notes:
1348  * 	We queue up an asynchronous "ALLOW MEDIUM REMOVAL" request on the
1349  * 	head of the devices request queue, and continue.
1350  *
1351  * Bugs:
1352  * 	scsi_allocate_request() may sleep waiting for existing requests to
1353  * 	be processed.  However, since we haven't kicked off any request
1354  * 	processing for this host, this may deadlock.
1355  *
1356  *	If scsi_allocate_request() fails for what ever reason, we
1357  *	completely forget to lock the door.
1358  **/
1359 static void scsi_eh_lock_door(struct scsi_device *sdev)
1360 {
1361 	unsigned char cmnd[MAX_COMMAND_SIZE];
1362 
1363 	cmnd[0] = ALLOW_MEDIUM_REMOVAL;
1364 	cmnd[1] = 0;
1365 	cmnd[2] = 0;
1366 	cmnd[3] = 0;
1367 	cmnd[4] = SCSI_REMOVAL_PREVENT;
1368 	cmnd[5] = 0;
1369 
1370 	scsi_execute_async(sdev, cmnd, 6, DMA_NONE, NULL, 0, 0, 10 * HZ,
1371 			   5, NULL, NULL, GFP_KERNEL);
1372 }
1373 
1374 
1375 /**
1376  * scsi_restart_operations - restart io operations to the specified host.
1377  * @shost:	Host we are restarting.
1378  *
1379  * Notes:
1380  *    When we entered the error handler, we blocked all further i/o to
1381  *    this device.  we need to 'reverse' this process.
1382  **/
1383 static void scsi_restart_operations(struct Scsi_Host *shost)
1384 {
1385 	struct scsi_device *sdev;
1386 	unsigned long flags;
1387 
1388 	/*
1389 	 * If the door was locked, we need to insert a door lock request
1390 	 * onto the head of the SCSI request queue for the device.  There
1391 	 * is no point trying to lock the door of an off-line device.
1392 	 */
1393 	shost_for_each_device(sdev, shost) {
1394 		if (scsi_device_online(sdev) && sdev->locked)
1395 			scsi_eh_lock_door(sdev);
1396 	}
1397 
1398 	/*
1399 	 * next free up anything directly waiting upon the host.  this
1400 	 * will be requests for character device operations, and also for
1401 	 * ioctls to queued block devices.
1402 	 */
1403 	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: waking up host to restart\n",
1404 					  __FUNCTION__));
1405 
1406 	spin_lock_irqsave(shost->host_lock, flags);
1407 	if (scsi_host_set_state(shost, SHOST_RUNNING))
1408 		if (scsi_host_set_state(shost, SHOST_CANCEL))
1409 			BUG_ON(scsi_host_set_state(shost, SHOST_DEL));
1410 	spin_unlock_irqrestore(shost->host_lock, flags);
1411 
1412 	wake_up(&shost->host_wait);
1413 
1414 	/*
1415 	 * finally we need to re-initiate requests that may be pending.  we will
1416 	 * have had everything blocked while error handling is taking place, and
1417 	 * now that error recovery is done, we will need to ensure that these
1418 	 * requests are started.
1419 	 */
1420 	scsi_run_host_queues(shost);
1421 }
1422 
1423 /**
1424  * scsi_eh_ready_devs - check device ready state and recover if not.
1425  * @shost: 	host to be recovered.
1426  * @eh_done_q:	list_head for processed commands.
1427  *
1428  **/
1429 static void scsi_eh_ready_devs(struct Scsi_Host *shost,
1430 			       struct list_head *work_q,
1431 			       struct list_head *done_q)
1432 {
1433 	if (!scsi_eh_stu(shost, work_q, done_q))
1434 		if (!scsi_eh_bus_device_reset(shost, work_q, done_q))
1435 			if (!scsi_eh_bus_reset(shost, work_q, done_q))
1436 				if (!scsi_eh_host_reset(work_q, done_q))
1437 					scsi_eh_offline_sdevs(work_q, done_q);
1438 }
1439 
1440 /**
1441  * scsi_eh_flush_done_q - finish processed commands or retry them.
1442  * @done_q:	list_head of processed commands.
1443  *
1444  **/
1445 void scsi_eh_flush_done_q(struct list_head *done_q)
1446 {
1447 	struct scsi_cmnd *scmd, *next;
1448 
1449 	list_for_each_entry_safe(scmd, next, done_q, eh_entry) {
1450 		list_del_init(&scmd->eh_entry);
1451 		if (scsi_device_online(scmd->device) &&
1452 		    !blk_noretry_request(scmd->request) &&
1453 		    (++scmd->retries <= scmd->allowed)) {
1454 			SCSI_LOG_ERROR_RECOVERY(3, printk("%s: flush"
1455 							  " retry cmd: %p\n",
1456 							  current->comm,
1457 							  scmd));
1458 				scsi_queue_insert(scmd, SCSI_MLQUEUE_EH_RETRY);
1459 		} else {
1460 			/*
1461 			 * If just we got sense for the device (called
1462 			 * scsi_eh_get_sense), scmd->result is already
1463 			 * set, do not set DRIVER_TIMEOUT.
1464 			 */
1465 			if (!scmd->result)
1466 				scmd->result |= (DRIVER_TIMEOUT << 24);
1467 			SCSI_LOG_ERROR_RECOVERY(3, printk("%s: flush finish"
1468 							" cmd: %p\n",
1469 							current->comm, scmd));
1470 			scsi_finish_command(scmd);
1471 		}
1472 	}
1473 }
1474 EXPORT_SYMBOL(scsi_eh_flush_done_q);
1475 
1476 /**
1477  * scsi_unjam_host - Attempt to fix a host which has a cmd that failed.
1478  * @shost:	Host to unjam.
1479  *
1480  * Notes:
1481  *    When we come in here, we *know* that all commands on the bus have
1482  *    either completed, failed or timed out.  we also know that no further
1483  *    commands are being sent to the host, so things are relatively quiet
1484  *    and we have freedom to fiddle with things as we wish.
1485  *
1486  *    This is only the *default* implementation.  it is possible for
1487  *    individual drivers to supply their own version of this function, and
1488  *    if the maintainer wishes to do this, it is strongly suggested that
1489  *    this function be taken as a template and modified.  this function
1490  *    was designed to correctly handle problems for about 95% of the
1491  *    different cases out there, and it should always provide at least a
1492  *    reasonable amount of error recovery.
1493  *
1494  *    Any command marked 'failed' or 'timeout' must eventually have
1495  *    scsi_finish_cmd() called for it.  we do all of the retry stuff
1496  *    here, so when we restart the host after we return it should have an
1497  *    empty queue.
1498  **/
1499 static void scsi_unjam_host(struct Scsi_Host *shost)
1500 {
1501 	unsigned long flags;
1502 	LIST_HEAD(eh_work_q);
1503 	LIST_HEAD(eh_done_q);
1504 
1505 	spin_lock_irqsave(shost->host_lock, flags);
1506 	list_splice_init(&shost->eh_cmd_q, &eh_work_q);
1507 	spin_unlock_irqrestore(shost->host_lock, flags);
1508 
1509 	SCSI_LOG_ERROR_RECOVERY(1, scsi_eh_prt_fail_stats(shost, &eh_work_q));
1510 
1511 	if (!scsi_eh_get_sense(&eh_work_q, &eh_done_q))
1512 		if (!scsi_eh_abort_cmds(&eh_work_q, &eh_done_q))
1513 			scsi_eh_ready_devs(shost, &eh_work_q, &eh_done_q);
1514 
1515 	scsi_eh_flush_done_q(&eh_done_q);
1516 }
1517 
1518 /**
1519  * scsi_error_handler - SCSI error handler thread
1520  * @data:	Host for which we are running.
1521  *
1522  * Notes:
1523  *    This is the main error handling loop.  This is run as a kernel thread
1524  *    for every SCSI host and handles all error handling activity.
1525  **/
1526 int scsi_error_handler(void *data)
1527 {
1528 	struct Scsi_Host *shost = data;
1529 
1530 	current->flags |= PF_NOFREEZE;
1531 
1532 	/*
1533 	 * We use TASK_INTERRUPTIBLE so that the thread is not
1534 	 * counted against the load average as a running process.
1535 	 * We never actually get interrupted because kthread_run
1536 	 * disables singal delivery for the created thread.
1537 	 */
1538 	set_current_state(TASK_INTERRUPTIBLE);
1539 	while (!kthread_should_stop()) {
1540 		if ((shost->host_failed == 0 && shost->host_eh_scheduled == 0) ||
1541 		    shost->host_failed != shost->host_busy) {
1542 			SCSI_LOG_ERROR_RECOVERY(1,
1543 				printk("Error handler scsi_eh_%d sleeping\n",
1544 					shost->host_no));
1545 			schedule();
1546 			set_current_state(TASK_INTERRUPTIBLE);
1547 			continue;
1548 		}
1549 
1550 		__set_current_state(TASK_RUNNING);
1551 		SCSI_LOG_ERROR_RECOVERY(1,
1552 			printk("Error handler scsi_eh_%d waking up\n",
1553 				shost->host_no));
1554 
1555 		/*
1556 		 * We have a host that is failing for some reason.  Figure out
1557 		 * what we need to do to get it up and online again (if we can).
1558 		 * If we fail, we end up taking the thing offline.
1559 		 */
1560 		if (shost->transportt->eh_strategy_handler)
1561 			shost->transportt->eh_strategy_handler(shost);
1562 		else
1563 			scsi_unjam_host(shost);
1564 
1565 		/*
1566 		 * Note - if the above fails completely, the action is to take
1567 		 * individual devices offline and flush the queue of any
1568 		 * outstanding requests that may have been pending.  When we
1569 		 * restart, we restart any I/O to any other devices on the bus
1570 		 * which are still online.
1571 		 */
1572 		scsi_restart_operations(shost);
1573 		set_current_state(TASK_INTERRUPTIBLE);
1574 	}
1575 	__set_current_state(TASK_RUNNING);
1576 
1577 	SCSI_LOG_ERROR_RECOVERY(1,
1578 		printk("Error handler scsi_eh_%d exiting\n", shost->host_no));
1579 	shost->ehandler = NULL;
1580 	return 0;
1581 }
1582 
1583 /*
1584  * Function:    scsi_report_bus_reset()
1585  *
1586  * Purpose:     Utility function used by low-level drivers to report that
1587  *		they have observed a bus reset on the bus being handled.
1588  *
1589  * Arguments:   shost       - Host in question
1590  *		channel     - channel on which reset was observed.
1591  *
1592  * Returns:     Nothing
1593  *
1594  * Lock status: Host lock must be held.
1595  *
1596  * Notes:       This only needs to be called if the reset is one which
1597  *		originates from an unknown location.  Resets originated
1598  *		by the mid-level itself don't need to call this, but there
1599  *		should be no harm.
1600  *
1601  *		The main purpose of this is to make sure that a CHECK_CONDITION
1602  *		is properly treated.
1603  */
1604 void scsi_report_bus_reset(struct Scsi_Host *shost, int channel)
1605 {
1606 	struct scsi_device *sdev;
1607 
1608 	__shost_for_each_device(sdev, shost) {
1609 		if (channel == sdev_channel(sdev)) {
1610 			sdev->was_reset = 1;
1611 			sdev->expecting_cc_ua = 1;
1612 		}
1613 	}
1614 }
1615 EXPORT_SYMBOL(scsi_report_bus_reset);
1616 
1617 /*
1618  * Function:    scsi_report_device_reset()
1619  *
1620  * Purpose:     Utility function used by low-level drivers to report that
1621  *		they have observed a device reset on the device being handled.
1622  *
1623  * Arguments:   shost       - Host in question
1624  *		channel     - channel on which reset was observed
1625  *		target	    - target on which reset was observed
1626  *
1627  * Returns:     Nothing
1628  *
1629  * Lock status: Host lock must be held
1630  *
1631  * Notes:       This only needs to be called if the reset is one which
1632  *		originates from an unknown location.  Resets originated
1633  *		by the mid-level itself don't need to call this, but there
1634  *		should be no harm.
1635  *
1636  *		The main purpose of this is to make sure that a CHECK_CONDITION
1637  *		is properly treated.
1638  */
1639 void scsi_report_device_reset(struct Scsi_Host *shost, int channel, int target)
1640 {
1641 	struct scsi_device *sdev;
1642 
1643 	__shost_for_each_device(sdev, shost) {
1644 		if (channel == sdev_channel(sdev) &&
1645 		    target == sdev_id(sdev)) {
1646 			sdev->was_reset = 1;
1647 			sdev->expecting_cc_ua = 1;
1648 		}
1649 	}
1650 }
1651 EXPORT_SYMBOL(scsi_report_device_reset);
1652 
1653 static void
1654 scsi_reset_provider_done_command(struct scsi_cmnd *scmd)
1655 {
1656 }
1657 
1658 /*
1659  * Function:	scsi_reset_provider
1660  *
1661  * Purpose:	Send requested reset to a bus or device at any phase.
1662  *
1663  * Arguments:	device	- device to send reset to
1664  *		flag - reset type (see scsi.h)
1665  *
1666  * Returns:	SUCCESS/FAILURE.
1667  *
1668  * Notes:	This is used by the SCSI Generic driver to provide
1669  *		Bus/Device reset capability.
1670  */
1671 int
1672 scsi_reset_provider(struct scsi_device *dev, int flag)
1673 {
1674 	struct scsi_cmnd *scmd = scsi_get_command(dev, GFP_KERNEL);
1675 	struct Scsi_Host *shost = dev->host;
1676 	struct request req;
1677 	unsigned long flags;
1678 	int rtn;
1679 
1680 	scmd->request = &req;
1681 	memset(&scmd->eh_timeout, 0, sizeof(scmd->eh_timeout));
1682 
1683 	memset(&scmd->cmnd, '\0', sizeof(scmd->cmnd));
1684 
1685 	scmd->scsi_done		= scsi_reset_provider_done_command;
1686 	scmd->done			= NULL;
1687 	scmd->buffer			= NULL;
1688 	scmd->bufflen			= 0;
1689 	scmd->request_buffer		= NULL;
1690 	scmd->request_bufflen		= 0;
1691 
1692 	scmd->cmd_len			= 0;
1693 
1694 	scmd->sc_data_direction		= DMA_BIDIRECTIONAL;
1695 
1696 	init_timer(&scmd->eh_timeout);
1697 
1698 	/*
1699 	 * Sometimes the command can get back into the timer chain,
1700 	 * so use the pid as an identifier.
1701 	 */
1702 	scmd->pid			= 0;
1703 
1704 	spin_lock_irqsave(shost->host_lock, flags);
1705 	shost->tmf_in_progress = 1;
1706 	spin_unlock_irqrestore(shost->host_lock, flags);
1707 
1708 	switch (flag) {
1709 	case SCSI_TRY_RESET_DEVICE:
1710 		rtn = scsi_try_bus_device_reset(scmd);
1711 		if (rtn == SUCCESS)
1712 			break;
1713 		/* FALLTHROUGH */
1714 	case SCSI_TRY_RESET_BUS:
1715 		rtn = scsi_try_bus_reset(scmd);
1716 		if (rtn == SUCCESS)
1717 			break;
1718 		/* FALLTHROUGH */
1719 	case SCSI_TRY_RESET_HOST:
1720 		rtn = scsi_try_host_reset(scmd);
1721 		break;
1722 	default:
1723 		rtn = FAILED;
1724 	}
1725 
1726 	spin_lock_irqsave(shost->host_lock, flags);
1727 	shost->tmf_in_progress = 0;
1728 	spin_unlock_irqrestore(shost->host_lock, flags);
1729 
1730 	/*
1731 	 * be sure to wake up anyone who was sleeping or had their queue
1732 	 * suspended while we performed the TMF.
1733 	 */
1734 	SCSI_LOG_ERROR_RECOVERY(3,
1735 		printk("%s: waking up host to restart after TMF\n",
1736 		__FUNCTION__));
1737 
1738 	wake_up(&shost->host_wait);
1739 
1740 	scsi_run_host_queues(shost);
1741 
1742 	scsi_next_command(scmd);
1743 	return rtn;
1744 }
1745 EXPORT_SYMBOL(scsi_reset_provider);
1746 
1747 /**
1748  * scsi_normalize_sense - normalize main elements from either fixed or
1749  *			descriptor sense data format into a common format.
1750  *
1751  * @sense_buffer:	byte array containing sense data returned by device
1752  * @sb_len:		number of valid bytes in sense_buffer
1753  * @sshdr:		pointer to instance of structure that common
1754  *			elements are written to.
1755  *
1756  * Notes:
1757  *	The "main elements" from sense data are: response_code, sense_key,
1758  *	asc, ascq and additional_length (only for descriptor format).
1759  *
1760  *	Typically this function can be called after a device has
1761  *	responded to a SCSI command with the CHECK_CONDITION status.
1762  *
1763  * Return value:
1764  *	1 if valid sense data information found, else 0;
1765  **/
1766 int scsi_normalize_sense(const u8 *sense_buffer, int sb_len,
1767                          struct scsi_sense_hdr *sshdr)
1768 {
1769 	if (!sense_buffer || !sb_len)
1770 		return 0;
1771 
1772 	memset(sshdr, 0, sizeof(struct scsi_sense_hdr));
1773 
1774 	sshdr->response_code = (sense_buffer[0] & 0x7f);
1775 
1776 	if (!scsi_sense_valid(sshdr))
1777 		return 0;
1778 
1779 	if (sshdr->response_code >= 0x72) {
1780 		/*
1781 		 * descriptor format
1782 		 */
1783 		if (sb_len > 1)
1784 			sshdr->sense_key = (sense_buffer[1] & 0xf);
1785 		if (sb_len > 2)
1786 			sshdr->asc = sense_buffer[2];
1787 		if (sb_len > 3)
1788 			sshdr->ascq = sense_buffer[3];
1789 		if (sb_len > 7)
1790 			sshdr->additional_length = sense_buffer[7];
1791 	} else {
1792 		/*
1793 		 * fixed format
1794 		 */
1795 		if (sb_len > 2)
1796 			sshdr->sense_key = (sense_buffer[2] & 0xf);
1797 		if (sb_len > 7) {
1798 			sb_len = (sb_len < (sense_buffer[7] + 8)) ?
1799 					 sb_len : (sense_buffer[7] + 8);
1800 			if (sb_len > 12)
1801 				sshdr->asc = sense_buffer[12];
1802 			if (sb_len > 13)
1803 				sshdr->ascq = sense_buffer[13];
1804 		}
1805 	}
1806 
1807 	return 1;
1808 }
1809 EXPORT_SYMBOL(scsi_normalize_sense);
1810 
1811 int scsi_command_normalize_sense(struct scsi_cmnd *cmd,
1812 				 struct scsi_sense_hdr *sshdr)
1813 {
1814 	return scsi_normalize_sense(cmd->sense_buffer,
1815 			sizeof(cmd->sense_buffer), sshdr);
1816 }
1817 EXPORT_SYMBOL(scsi_command_normalize_sense);
1818 
1819 /**
1820  * scsi_sense_desc_find - search for a given descriptor type in
1821  *			descriptor sense data format.
1822  *
1823  * @sense_buffer:	byte array of descriptor format sense data
1824  * @sb_len:		number of valid bytes in sense_buffer
1825  * @desc_type:		value of descriptor type to find
1826  *			(e.g. 0 -> information)
1827  *
1828  * Notes:
1829  *	only valid when sense data is in descriptor format
1830  *
1831  * Return value:
1832  *	pointer to start of (first) descriptor if found else NULL
1833  **/
1834 const u8 * scsi_sense_desc_find(const u8 * sense_buffer, int sb_len,
1835 				int desc_type)
1836 {
1837 	int add_sen_len, add_len, desc_len, k;
1838 	const u8 * descp;
1839 
1840 	if ((sb_len < 8) || (0 == (add_sen_len = sense_buffer[7])))
1841 		return NULL;
1842 	if ((sense_buffer[0] < 0x72) || (sense_buffer[0] > 0x73))
1843 		return NULL;
1844 	add_sen_len = (add_sen_len < (sb_len - 8)) ?
1845 			add_sen_len : (sb_len - 8);
1846 	descp = &sense_buffer[8];
1847 	for (desc_len = 0, k = 0; k < add_sen_len; k += desc_len) {
1848 		descp += desc_len;
1849 		add_len = (k < (add_sen_len - 1)) ? descp[1]: -1;
1850 		desc_len = add_len + 2;
1851 		if (descp[0] == desc_type)
1852 			return descp;
1853 		if (add_len < 0) // short descriptor ??
1854 			break;
1855 	}
1856 	return NULL;
1857 }
1858 EXPORT_SYMBOL(scsi_sense_desc_find);
1859 
1860 /**
1861  * scsi_get_sense_info_fld - attempts to get information field from
1862  *			sense data (either fixed or descriptor format)
1863  *
1864  * @sense_buffer:	byte array of sense data
1865  * @sb_len:		number of valid bytes in sense_buffer
1866  * @info_out:		pointer to 64 integer where 8 or 4 byte information
1867  *			field will be placed if found.
1868  *
1869  * Return value:
1870  *	1 if information field found, 0 if not found.
1871  **/
1872 int scsi_get_sense_info_fld(const u8 * sense_buffer, int sb_len,
1873 			    u64 * info_out)
1874 {
1875 	int j;
1876 	const u8 * ucp;
1877 	u64 ull;
1878 
1879 	if (sb_len < 7)
1880 		return 0;
1881 	switch (sense_buffer[0] & 0x7f) {
1882 	case 0x70:
1883 	case 0x71:
1884 		if (sense_buffer[0] & 0x80) {
1885 			*info_out = (sense_buffer[3] << 24) +
1886 				    (sense_buffer[4] << 16) +
1887 				    (sense_buffer[5] << 8) + sense_buffer[6];
1888 			return 1;
1889 		} else
1890 			return 0;
1891 	case 0x72:
1892 	case 0x73:
1893 		ucp = scsi_sense_desc_find(sense_buffer, sb_len,
1894 					   0 /* info desc */);
1895 		if (ucp && (0xa == ucp[1])) {
1896 			ull = 0;
1897 			for (j = 0; j < 8; ++j) {
1898 				if (j > 0)
1899 					ull <<= 8;
1900 				ull |= ucp[4 + j];
1901 			}
1902 			*info_out = ull;
1903 			return 1;
1904 		} else
1905 			return 0;
1906 	default:
1907 		return 0;
1908 	}
1909 }
1910 EXPORT_SYMBOL(scsi_get_sense_info_fld);
1911