xref: /freebsd/sbin/camcontrol/depop.c (revision 32e86a82f54826f14ea381affa6674db3aa3b5ae)
153cce2e7SWarner Losh /*-
253cce2e7SWarner Losh  * Copyright (c) 2021 Netflix, Inc.
353cce2e7SWarner Losh  *
453cce2e7SWarner Losh  * Redistribution and use in source and binary forms, with or without
553cce2e7SWarner Losh  * modification, are permitted provided that the following conditions
653cce2e7SWarner Losh  * are met:
753cce2e7SWarner Losh  * 1. Redistributions of source code must retain the above copyright
853cce2e7SWarner Losh  *    notice, this list of conditions, and the following disclaimer,
953cce2e7SWarner Losh  *    without modification.
1053cce2e7SWarner Losh  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
1153cce2e7SWarner Losh  *    substantially similar to the "NO WARRANTY" disclaimer below
1253cce2e7SWarner Losh  *    ("Disclaimer") and any redistribution must be conditioned upon
1353cce2e7SWarner Losh  *    including a substantially similar Disclaimer requirement for further
1453cce2e7SWarner Losh  *    binary redistribution.
1553cce2e7SWarner Losh  *
1653cce2e7SWarner Losh  * NO WARRANTY
1753cce2e7SWarner Losh  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1853cce2e7SWarner Losh  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1953cce2e7SWarner Losh  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
2053cce2e7SWarner Losh  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
2153cce2e7SWarner Losh  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2253cce2e7SWarner Losh  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2353cce2e7SWarner Losh  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2453cce2e7SWarner Losh  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
2553cce2e7SWarner Losh  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
2653cce2e7SWarner Losh  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
2753cce2e7SWarner Losh  * POSSIBILITY OF SUCH DAMAGES.
2853cce2e7SWarner Losh  *
2953cce2e7SWarner Losh  */
3053cce2e7SWarner Losh /*
3153cce2e7SWarner Losh  * SCSI disk depop (head depopulation) support
3253cce2e7SWarner Losh  *
3353cce2e7SWarner Losh  * The standard defines 'storage elements' as the generic way of referring to a
3453cce2e7SWarner Losh  * disk drive head. Each storage element has an identifier and an active status.
35*90ea9c0dSGordon Bergling  * The health of an element can be queried. Active elements may be removed from
3653cce2e7SWarner Losh  * service with a REMOVE ELEMENT AND TRUNCATE (RET) command. Inactive element
3753cce2e7SWarner Losh  * may be returned to service with a RESTORE ELEMENTS AND REBUILD (RER)
3853cce2e7SWarner Losh  * command. GET PHYSICAL ELEMENT STATUS (GPES) will return a list of elements,
3953cce2e7SWarner Losh  * their health, whether they are in service, how much capacity the element is
4053cce2e7SWarner Losh  * used for, etc.
4153cce2e7SWarner Losh  *
4253cce2e7SWarner Losh  * When a depop operation starts, the drive becomes format corrupt. No normal
4353cce2e7SWarner Losh  * I/O can be done to the drive and a limited number of CDBs will
4453cce2e7SWarner Losh  * succeed. Status can be obtained by either a TEST UNIT READY or a GPES
4553cce2e7SWarner Losh  * command. A drive reset will not stop a depop operation, but a power cycle
4653cce2e7SWarner Losh  * will. A failed depop operation will be reported when the next TEST UNIT READY
4753cce2e7SWarner Losh  * is sent to the drive. Drives that are format corrupt after an interrupted
4853cce2e7SWarner Losh  * operation need to have that operation repeated.
4953cce2e7SWarner Losh  *
5053cce2e7SWarner Losh  * 'depop' provides a wrapper around all these functions.
5153cce2e7SWarner Losh  */
5253cce2e7SWarner Losh 
5353cce2e7SWarner Losh #include <sys/types.h>
5453cce2e7SWarner Losh 
5553cce2e7SWarner Losh #include <err.h>
5653cce2e7SWarner Losh #include <inttypes.h>
5753cce2e7SWarner Losh #include <stdio.h>
5853cce2e7SWarner Losh #include <stdlib.h>
5953cce2e7SWarner Losh #include <string.h>
6053cce2e7SWarner Losh #include <unistd.h>
6153cce2e7SWarner Losh 
6253cce2e7SWarner Losh #include <cam/cam.h>
6353cce2e7SWarner Losh #include <cam/cam_debug.h>
6453cce2e7SWarner Losh #include <cam/cam_ccb.h>
6553cce2e7SWarner Losh #include <cam/scsi/scsi_all.h>
6653cce2e7SWarner Losh #include <cam/scsi/scsi_message.h>
6753cce2e7SWarner Losh #include <camlib.h>
6853cce2e7SWarner Losh #include <scsi_wrap.h>
6953cce2e7SWarner Losh #include "camcontrol.h"
7053cce2e7SWarner Losh 
7153cce2e7SWarner Losh enum depop_action {
7253cce2e7SWarner Losh 	DEPOP_NONE,
7353cce2e7SWarner Losh 	DEPOP_LIST,
7453cce2e7SWarner Losh 	DEPOP_RESTORE,
7553cce2e7SWarner Losh 	DEPOP_REMOVE,
7653cce2e7SWarner Losh };
7753cce2e7SWarner Losh 
7853cce2e7SWarner Losh static int
depop_list(struct cam_device * device,int task_attr,int retry_count,int timeout,int verbosemode __unused)7953cce2e7SWarner Losh depop_list(struct cam_device *device, int task_attr, int retry_count,
8053cce2e7SWarner Losh     int timeout, int verbosemode __unused)
8153cce2e7SWarner Losh {
8253cce2e7SWarner Losh 	int error = 0;
8353cce2e7SWarner Losh 	uint32_t dtors;
8453cce2e7SWarner Losh 	struct scsi_get_physical_element_hdr *hdr;
8553cce2e7SWarner Losh 	struct scsi_get_physical_element_descriptor *dtor_ptr;
8653cce2e7SWarner Losh 
8753cce2e7SWarner Losh 	hdr = scsi_wrap_get_physical_element_status(device, task_attr, retry_count, timeout,
8853cce2e7SWarner Losh 	    SCSI_GPES_FILTER_ALL | SCSI_GPES_REPORT_TYPE_PHYS, 1);
8953cce2e7SWarner Losh 	if (hdr == NULL)
9053cce2e7SWarner Losh 		errx(1, "scsi_wrap_get_physical_element_status returned an error");
9153cce2e7SWarner Losh 
9253cce2e7SWarner Losh 	/*
9353cce2e7SWarner Losh 	 * OK, we have the data, not report it out.
9453cce2e7SWarner Losh 	 */
9553cce2e7SWarner Losh 	dtor_ptr = (struct scsi_get_physical_element_descriptor *)(hdr + 1);
9653cce2e7SWarner Losh 	dtors = scsi_4btoul(hdr->num_descriptors);
9753cce2e7SWarner Losh 	printf("Elem ID    * Health Capacity\n");
9853cce2e7SWarner Losh 	for (uint32_t i = 0; i < dtors; i++) {
9953cce2e7SWarner Losh 		uint32_t id = scsi_4btoul(dtor_ptr[i].element_identifier);
10053cce2e7SWarner Losh 		uint8_t ralwd = dtor_ptr[i].ralwd;
10153cce2e7SWarner Losh 		uint8_t type = dtor_ptr[i].physical_element_type;
10253cce2e7SWarner Losh 		uint8_t health = dtor_ptr[i].physical_element_health;
10353cce2e7SWarner Losh 		uint64_t cap = scsi_8btou64(dtor_ptr[i].capacity);
10453cce2e7SWarner Losh 		if (type != GPED_TYPE_STORAGE)
10553cce2e7SWarner Losh 			printf("0x%08x -- type unknown %d\n", id, type);
10653cce2e7SWarner Losh 		else
10753cce2e7SWarner Losh 			printf("0x%08x %c 0x%02x   %jd\n", id, ralwd ? '*' : ' ', health, cap);
10853cce2e7SWarner Losh 	}
10953cce2e7SWarner Losh 	printf("* -- Element can be restored\n");
11053cce2e7SWarner Losh 
11153cce2e7SWarner Losh 	free(hdr);
11253cce2e7SWarner Losh 	return (error);
11353cce2e7SWarner Losh }
11453cce2e7SWarner Losh 
11553cce2e7SWarner Losh static int
depop_remove(struct cam_device * device,int task_attr,int retry_count,int timeout,int verbosemode __unused,uint32_t elem,uint64_t capacity)11653cce2e7SWarner Losh depop_remove(struct cam_device *device, int task_attr, int retry_count,
11753cce2e7SWarner Losh     int timeout, int verbosemode __unused, uint32_t elem, uint64_t capacity)
11853cce2e7SWarner Losh {
11953cce2e7SWarner Losh 	union ccb *ccb;
12053cce2e7SWarner Losh 	int error = 0;
12153cce2e7SWarner Losh 
12253cce2e7SWarner Losh 	ccb = cam_getccb(device);
12353cce2e7SWarner Losh 	if (ccb == NULL) {
12453cce2e7SWarner Losh 		warnx("Can't allocate ccb");
12553cce2e7SWarner Losh 		return (1);
12653cce2e7SWarner Losh 	}
12753cce2e7SWarner Losh 	scsi_remove_element_and_truncate(&ccb->csio,
12853cce2e7SWarner Losh 	    retry_count,
12953cce2e7SWarner Losh 	    NULL,
13053cce2e7SWarner Losh 	    task_attr,
13153cce2e7SWarner Losh 	    capacity,
13253cce2e7SWarner Losh 	    elem,
13353cce2e7SWarner Losh 	    SSD_FULL_SIZE,
13453cce2e7SWarner Losh 	    timeout);
13553cce2e7SWarner Losh 	/* Disable freezing the device queue */
13653cce2e7SWarner Losh 	ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
13753cce2e7SWarner Losh 	if (cam_send_ccb(device, ccb) < 0) {
13853cce2e7SWarner Losh 		warn("error sending GET PHYSICAL ELEMENT STATUS command");
13953cce2e7SWarner Losh 		error = 1;
14053cce2e7SWarner Losh 		goto out;
14153cce2e7SWarner Losh 	}
14253cce2e7SWarner Losh 
14353cce2e7SWarner Losh 	if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
14453cce2e7SWarner Losh 		cam_error_print(device, ccb, CAM_ESF_ALL,
14553cce2e7SWarner Losh 				CAM_EPF_ALL, stderr);
14653cce2e7SWarner Losh 		error = 1;
14753cce2e7SWarner Losh 	}
14853cce2e7SWarner Losh 
14953cce2e7SWarner Losh out:
15053cce2e7SWarner Losh 	cam_freeccb(ccb);
15153cce2e7SWarner Losh 	return (error);
15253cce2e7SWarner Losh }
15353cce2e7SWarner Losh 
15453cce2e7SWarner Losh static int
depop_restore(struct cam_device * device,int task_attr,int retry_count,int timeout,int verbosemode __unused)15553cce2e7SWarner Losh depop_restore(struct cam_device *device, int task_attr, int retry_count,
15653cce2e7SWarner Losh     int timeout, int verbosemode __unused)
15753cce2e7SWarner Losh {
15853cce2e7SWarner Losh 	union ccb *ccb;
15953cce2e7SWarner Losh 	int error = 0;
16053cce2e7SWarner Losh 
16153cce2e7SWarner Losh 	ccb = cam_getccb(device);
16253cce2e7SWarner Losh 	if (ccb == NULL) {
16353cce2e7SWarner Losh 		warnx("Can't allocate ccb");
16453cce2e7SWarner Losh 		return (1);
16553cce2e7SWarner Losh 	}
16653cce2e7SWarner Losh 	scsi_restore_elements_and_rebuild(&ccb->csio,
16753cce2e7SWarner Losh 	    retry_count,
16853cce2e7SWarner Losh 	    NULL,
16953cce2e7SWarner Losh 	    task_attr,
17053cce2e7SWarner Losh 	    SSD_FULL_SIZE,
17153cce2e7SWarner Losh 	    timeout);
17253cce2e7SWarner Losh 
17353cce2e7SWarner Losh 	/* Disable freezing the device queue */
17453cce2e7SWarner Losh 	ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
17553cce2e7SWarner Losh 	if (cam_send_ccb(device, ccb) < 0) {
17653cce2e7SWarner Losh 		warn("error sending GET PHYSICAL ELEMENT STATUS command");
17753cce2e7SWarner Losh 		error = 1;
17853cce2e7SWarner Losh 		goto out;
17953cce2e7SWarner Losh 	}
18053cce2e7SWarner Losh 
18153cce2e7SWarner Losh 	if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
18253cce2e7SWarner Losh 		cam_error_print(device, ccb, CAM_ESF_ALL,
18353cce2e7SWarner Losh 				CAM_EPF_ALL, stderr);
18453cce2e7SWarner Losh 		error = 1;
18553cce2e7SWarner Losh 	}
18653cce2e7SWarner Losh 
18753cce2e7SWarner Losh out:
18853cce2e7SWarner Losh 	cam_freeccb(ccb);
18953cce2e7SWarner Losh 	return (error);
19053cce2e7SWarner Losh }
19153cce2e7SWarner Losh 
19253cce2e7SWarner Losh #define MUST_BE_NONE() \
19353cce2e7SWarner Losh 	if (action != DEPOP_NONE) { \
19453cce2e7SWarner Losh 		warnx("Use only one of -d, -l, or -r"); \
19553cce2e7SWarner Losh 		error = 1; \
19653cce2e7SWarner Losh 		goto bailout; \
19753cce2e7SWarner Losh 	}
19853cce2e7SWarner Losh 
19953cce2e7SWarner Losh int
depop(struct cam_device * device,int argc,char ** argv,char * combinedopt,int task_attr,int retry_count,int timeout,int verbosemode)20053cce2e7SWarner Losh depop(struct cam_device *device, int argc, char **argv, char *combinedopt,
20153cce2e7SWarner Losh     int task_attr, int retry_count, int timeout, int verbosemode)
20253cce2e7SWarner Losh {
20353cce2e7SWarner Losh 	int c;
20453cce2e7SWarner Losh 	int action = DEPOP_NONE;
20553cce2e7SWarner Losh 	char *endptr;
20653cce2e7SWarner Losh 	int error = 0;
20753cce2e7SWarner Losh 	uint32_t elem = 0;
20853cce2e7SWarner Losh 	uint64_t capacity = 0;
20953cce2e7SWarner Losh 
21053cce2e7SWarner Losh 	while ((c = getopt(argc, argv, combinedopt)) != -1) {
21153cce2e7SWarner Losh 		switch (c) {
21253cce2e7SWarner Losh 		case 'c':
21353cce2e7SWarner Losh 			capacity = strtoumax(optarg, &endptr, 0);
21453cce2e7SWarner Losh 			if (*endptr != '\0') {
21553cce2e7SWarner Losh 				warnx("Invalid capacity: %s", optarg);
21653cce2e7SWarner Losh 				error = 1;
21753cce2e7SWarner Losh 				goto bailout;
21853cce2e7SWarner Losh 			}
21953cce2e7SWarner Losh 			break;
22053cce2e7SWarner Losh 		case 'e':
22153cce2e7SWarner Losh 			elem = strtoul(optarg, &endptr, 0);
22253cce2e7SWarner Losh 			if (*endptr != '\0') {
22353cce2e7SWarner Losh 				warnx("Invalid element: %s", optarg);
22453cce2e7SWarner Losh 				error = 1;
22553cce2e7SWarner Losh 				goto bailout;
22653cce2e7SWarner Losh 			}
22753cce2e7SWarner Losh 			break;
22853cce2e7SWarner Losh 		case 'd':
22953cce2e7SWarner Losh 			MUST_BE_NONE();
23053cce2e7SWarner Losh 			action = DEPOP_REMOVE;
23153cce2e7SWarner Losh 			break;
23253cce2e7SWarner Losh 		case 'l':
23353cce2e7SWarner Losh 			MUST_BE_NONE();
23453cce2e7SWarner Losh 			action  = DEPOP_LIST;
23553cce2e7SWarner Losh 			break;
23653cce2e7SWarner Losh 		case 'r':
23753cce2e7SWarner Losh 			MUST_BE_NONE();
23853cce2e7SWarner Losh 			action  = DEPOP_RESTORE;
23953cce2e7SWarner Losh 			break;
24053cce2e7SWarner Losh 		default:
24153cce2e7SWarner Losh 			break;
24253cce2e7SWarner Losh 		}
24353cce2e7SWarner Losh 	}
24453cce2e7SWarner Losh 
24553cce2e7SWarner Losh 	/*
24653cce2e7SWarner Losh 	 * Compute a sane timeout if none given. 5 seconds for the list command
24753cce2e7SWarner Losh 	 * and whatever the block device characteristics VPD says for other
24853cce2e7SWarner Losh 	 * depop commands. If there's no value in that field, default to 1
24953cce2e7SWarner Losh 	 * day. Experience has shown that these operations take the better part
25053cce2e7SWarner Losh 	 * of a day to complete, so a 1 day timeout default seems appropriate.
25153cce2e7SWarner Losh 	 */
25253cce2e7SWarner Losh 	if (timeout == 0 && action != DEPOP_NONE) {
25353cce2e7SWarner Losh 		if (action == DEPOP_LIST) {
25453cce2e7SWarner Losh 			timeout = 5 * 1000;
25553cce2e7SWarner Losh 		} else {
25653cce2e7SWarner Losh 			struct scsi_vpd_block_device_characteristics *bdc;
25753cce2e7SWarner Losh 
25853cce2e7SWarner Losh 			timeout = 24 * 60 * 60 * 1000;	/* 1 day */
25953cce2e7SWarner Losh 			bdc = scsi_wrap_vpd_block_device_characteristics(device);
26053cce2e7SWarner Losh 			if (bdc != NULL) {
26153cce2e7SWarner Losh 				timeout = scsi_4btoul(bdc->depopulation_time);
26253cce2e7SWarner Losh 			}
26353cce2e7SWarner Losh 			free(bdc);
26453cce2e7SWarner Losh 		}
26553cce2e7SWarner Losh 	}
26653cce2e7SWarner Losh 
26753cce2e7SWarner Losh 	switch (action) {
26853cce2e7SWarner Losh 	case DEPOP_NONE:
26953cce2e7SWarner Losh 		warnx("Must specify one of -d, -l, or -r");
27053cce2e7SWarner Losh 		error = 1;
27153cce2e7SWarner Losh 		break;
27253cce2e7SWarner Losh 	case DEPOP_REMOVE:
27353cce2e7SWarner Losh 		if (elem == 0 && capacity == 0) {
27453cce2e7SWarner Losh 			warnx("Must specify at least one of -e and/or -c");
27553cce2e7SWarner Losh 			error = 1;
27653cce2e7SWarner Losh 			break;
27753cce2e7SWarner Losh 		}
27853cce2e7SWarner Losh 		error = depop_remove(device, task_attr, retry_count, timeout,
27953cce2e7SWarner Losh 		    verbosemode, elem, capacity);
28053cce2e7SWarner Losh 		break;
28153cce2e7SWarner Losh 	case DEPOP_RESTORE:
28253cce2e7SWarner Losh 		error = depop_restore(device, task_attr, retry_count, timeout,
28353cce2e7SWarner Losh 		    verbosemode);
28453cce2e7SWarner Losh 		break;
28553cce2e7SWarner Losh 	case DEPOP_LIST:
28653cce2e7SWarner Losh 		error = depop_list(device, task_attr, retry_count, timeout,
28753cce2e7SWarner Losh 		    verbosemode);
28853cce2e7SWarner Losh 		break;
28953cce2e7SWarner Losh 	}
29053cce2e7SWarner Losh 
29153cce2e7SWarner Losh bailout:
29253cce2e7SWarner Losh 
29353cce2e7SWarner Losh 	return (error);
29453cce2e7SWarner Losh }
295