153cce2e7SWarner Losh /*-
253cce2e7SWarner Losh * Copyright (c) 2021 Netflix, Inc.
353cce2e7SWarner Losh *
453cce2e7SWarner Losh * Redistribution and use in source and binary forms, with or without
553cce2e7SWarner Losh * modification, are permitted provided that the following conditions
653cce2e7SWarner Losh * are met:
753cce2e7SWarner Losh * 1. Redistributions of source code must retain the above copyright
853cce2e7SWarner Losh * notice, this list of conditions, and the following disclaimer,
953cce2e7SWarner Losh * without modification.
1053cce2e7SWarner Losh * 2. Redistributions in binary form must reproduce at minimum a disclaimer
1153cce2e7SWarner Losh * substantially similar to the "NO WARRANTY" disclaimer below
1253cce2e7SWarner Losh * ("Disclaimer") and any redistribution must be conditioned upon
1353cce2e7SWarner Losh * including a substantially similar Disclaimer requirement for further
1453cce2e7SWarner Losh * binary redistribution.
1553cce2e7SWarner Losh *
1653cce2e7SWarner Losh * NO WARRANTY
1753cce2e7SWarner Losh * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1853cce2e7SWarner Losh * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1953cce2e7SWarner Losh * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
2053cce2e7SWarner Losh * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
2153cce2e7SWarner Losh * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2253cce2e7SWarner Losh * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2353cce2e7SWarner Losh * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2453cce2e7SWarner Losh * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
2553cce2e7SWarner Losh * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
2653cce2e7SWarner Losh * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
2753cce2e7SWarner Losh * POSSIBILITY OF SUCH DAMAGES.
2853cce2e7SWarner Losh *
2953cce2e7SWarner Losh */
3053cce2e7SWarner Losh /*
3153cce2e7SWarner Losh * SCSI disk depop (head depopulation) support
3253cce2e7SWarner Losh *
3353cce2e7SWarner Losh * The standard defines 'storage elements' as the generic way of referring to a
3453cce2e7SWarner Losh * disk drive head. Each storage element has an identifier and an active status.
35*90ea9c0dSGordon Bergling * The health of an element can be queried. Active elements may be removed from
3653cce2e7SWarner Losh * service with a REMOVE ELEMENT AND TRUNCATE (RET) command. Inactive element
3753cce2e7SWarner Losh * may be returned to service with a RESTORE ELEMENTS AND REBUILD (RER)
3853cce2e7SWarner Losh * command. GET PHYSICAL ELEMENT STATUS (GPES) will return a list of elements,
3953cce2e7SWarner Losh * their health, whether they are in service, how much capacity the element is
4053cce2e7SWarner Losh * used for, etc.
4153cce2e7SWarner Losh *
4253cce2e7SWarner Losh * When a depop operation starts, the drive becomes format corrupt. No normal
4353cce2e7SWarner Losh * I/O can be done to the drive and a limited number of CDBs will
4453cce2e7SWarner Losh * succeed. Status can be obtained by either a TEST UNIT READY or a GPES
4553cce2e7SWarner Losh * command. A drive reset will not stop a depop operation, but a power cycle
4653cce2e7SWarner Losh * will. A failed depop operation will be reported when the next TEST UNIT READY
4753cce2e7SWarner Losh * is sent to the drive. Drives that are format corrupt after an interrupted
4853cce2e7SWarner Losh * operation need to have that operation repeated.
4953cce2e7SWarner Losh *
5053cce2e7SWarner Losh * 'depop' provides a wrapper around all these functions.
5153cce2e7SWarner Losh */
5253cce2e7SWarner Losh
5353cce2e7SWarner Losh #include <sys/types.h>
5453cce2e7SWarner Losh
5553cce2e7SWarner Losh #include <err.h>
5653cce2e7SWarner Losh #include <inttypes.h>
5753cce2e7SWarner Losh #include <stdio.h>
5853cce2e7SWarner Losh #include <stdlib.h>
5953cce2e7SWarner Losh #include <string.h>
6053cce2e7SWarner Losh #include <unistd.h>
6153cce2e7SWarner Losh
6253cce2e7SWarner Losh #include <cam/cam.h>
6353cce2e7SWarner Losh #include <cam/cam_debug.h>
6453cce2e7SWarner Losh #include <cam/cam_ccb.h>
6553cce2e7SWarner Losh #include <cam/scsi/scsi_all.h>
6653cce2e7SWarner Losh #include <cam/scsi/scsi_message.h>
6753cce2e7SWarner Losh #include <camlib.h>
6853cce2e7SWarner Losh #include <scsi_wrap.h>
6953cce2e7SWarner Losh #include "camcontrol.h"
7053cce2e7SWarner Losh
7153cce2e7SWarner Losh enum depop_action {
7253cce2e7SWarner Losh DEPOP_NONE,
7353cce2e7SWarner Losh DEPOP_LIST,
7453cce2e7SWarner Losh DEPOP_RESTORE,
7553cce2e7SWarner Losh DEPOP_REMOVE,
7653cce2e7SWarner Losh };
7753cce2e7SWarner Losh
7853cce2e7SWarner Losh static int
depop_list(struct cam_device * device,int task_attr,int retry_count,int timeout,int verbosemode __unused)7953cce2e7SWarner Losh depop_list(struct cam_device *device, int task_attr, int retry_count,
8053cce2e7SWarner Losh int timeout, int verbosemode __unused)
8153cce2e7SWarner Losh {
8253cce2e7SWarner Losh int error = 0;
8353cce2e7SWarner Losh uint32_t dtors;
8453cce2e7SWarner Losh struct scsi_get_physical_element_hdr *hdr;
8553cce2e7SWarner Losh struct scsi_get_physical_element_descriptor *dtor_ptr;
8653cce2e7SWarner Losh
8753cce2e7SWarner Losh hdr = scsi_wrap_get_physical_element_status(device, task_attr, retry_count, timeout,
8853cce2e7SWarner Losh SCSI_GPES_FILTER_ALL | SCSI_GPES_REPORT_TYPE_PHYS, 1);
8953cce2e7SWarner Losh if (hdr == NULL)
9053cce2e7SWarner Losh errx(1, "scsi_wrap_get_physical_element_status returned an error");
9153cce2e7SWarner Losh
9253cce2e7SWarner Losh /*
9353cce2e7SWarner Losh * OK, we have the data, not report it out.
9453cce2e7SWarner Losh */
9553cce2e7SWarner Losh dtor_ptr = (struct scsi_get_physical_element_descriptor *)(hdr + 1);
9653cce2e7SWarner Losh dtors = scsi_4btoul(hdr->num_descriptors);
9753cce2e7SWarner Losh printf("Elem ID * Health Capacity\n");
9853cce2e7SWarner Losh for (uint32_t i = 0; i < dtors; i++) {
9953cce2e7SWarner Losh uint32_t id = scsi_4btoul(dtor_ptr[i].element_identifier);
10053cce2e7SWarner Losh uint8_t ralwd = dtor_ptr[i].ralwd;
10153cce2e7SWarner Losh uint8_t type = dtor_ptr[i].physical_element_type;
10253cce2e7SWarner Losh uint8_t health = dtor_ptr[i].physical_element_health;
10353cce2e7SWarner Losh uint64_t cap = scsi_8btou64(dtor_ptr[i].capacity);
10453cce2e7SWarner Losh if (type != GPED_TYPE_STORAGE)
10553cce2e7SWarner Losh printf("0x%08x -- type unknown %d\n", id, type);
10653cce2e7SWarner Losh else
10753cce2e7SWarner Losh printf("0x%08x %c 0x%02x %jd\n", id, ralwd ? '*' : ' ', health, cap);
10853cce2e7SWarner Losh }
10953cce2e7SWarner Losh printf("* -- Element can be restored\n");
11053cce2e7SWarner Losh
11153cce2e7SWarner Losh free(hdr);
11253cce2e7SWarner Losh return (error);
11353cce2e7SWarner Losh }
11453cce2e7SWarner Losh
11553cce2e7SWarner Losh static int
depop_remove(struct cam_device * device,int task_attr,int retry_count,int timeout,int verbosemode __unused,uint32_t elem,uint64_t capacity)11653cce2e7SWarner Losh depop_remove(struct cam_device *device, int task_attr, int retry_count,
11753cce2e7SWarner Losh int timeout, int verbosemode __unused, uint32_t elem, uint64_t capacity)
11853cce2e7SWarner Losh {
11953cce2e7SWarner Losh union ccb *ccb;
12053cce2e7SWarner Losh int error = 0;
12153cce2e7SWarner Losh
12253cce2e7SWarner Losh ccb = cam_getccb(device);
12353cce2e7SWarner Losh if (ccb == NULL) {
12453cce2e7SWarner Losh warnx("Can't allocate ccb");
12553cce2e7SWarner Losh return (1);
12653cce2e7SWarner Losh }
12753cce2e7SWarner Losh scsi_remove_element_and_truncate(&ccb->csio,
12853cce2e7SWarner Losh retry_count,
12953cce2e7SWarner Losh NULL,
13053cce2e7SWarner Losh task_attr,
13153cce2e7SWarner Losh capacity,
13253cce2e7SWarner Losh elem,
13353cce2e7SWarner Losh SSD_FULL_SIZE,
13453cce2e7SWarner Losh timeout);
13553cce2e7SWarner Losh /* Disable freezing the device queue */
13653cce2e7SWarner Losh ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
13753cce2e7SWarner Losh if (cam_send_ccb(device, ccb) < 0) {
13853cce2e7SWarner Losh warn("error sending GET PHYSICAL ELEMENT STATUS command");
13953cce2e7SWarner Losh error = 1;
14053cce2e7SWarner Losh goto out;
14153cce2e7SWarner Losh }
14253cce2e7SWarner Losh
14353cce2e7SWarner Losh if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
14453cce2e7SWarner Losh cam_error_print(device, ccb, CAM_ESF_ALL,
14553cce2e7SWarner Losh CAM_EPF_ALL, stderr);
14653cce2e7SWarner Losh error = 1;
14753cce2e7SWarner Losh }
14853cce2e7SWarner Losh
14953cce2e7SWarner Losh out:
15053cce2e7SWarner Losh cam_freeccb(ccb);
15153cce2e7SWarner Losh return (error);
15253cce2e7SWarner Losh }
15353cce2e7SWarner Losh
15453cce2e7SWarner Losh static int
depop_restore(struct cam_device * device,int task_attr,int retry_count,int timeout,int verbosemode __unused)15553cce2e7SWarner Losh depop_restore(struct cam_device *device, int task_attr, int retry_count,
15653cce2e7SWarner Losh int timeout, int verbosemode __unused)
15753cce2e7SWarner Losh {
15853cce2e7SWarner Losh union ccb *ccb;
15953cce2e7SWarner Losh int error = 0;
16053cce2e7SWarner Losh
16153cce2e7SWarner Losh ccb = cam_getccb(device);
16253cce2e7SWarner Losh if (ccb == NULL) {
16353cce2e7SWarner Losh warnx("Can't allocate ccb");
16453cce2e7SWarner Losh return (1);
16553cce2e7SWarner Losh }
16653cce2e7SWarner Losh scsi_restore_elements_and_rebuild(&ccb->csio,
16753cce2e7SWarner Losh retry_count,
16853cce2e7SWarner Losh NULL,
16953cce2e7SWarner Losh task_attr,
17053cce2e7SWarner Losh SSD_FULL_SIZE,
17153cce2e7SWarner Losh timeout);
17253cce2e7SWarner Losh
17353cce2e7SWarner Losh /* Disable freezing the device queue */
17453cce2e7SWarner Losh ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
17553cce2e7SWarner Losh if (cam_send_ccb(device, ccb) < 0) {
17653cce2e7SWarner Losh warn("error sending GET PHYSICAL ELEMENT STATUS command");
17753cce2e7SWarner Losh error = 1;
17853cce2e7SWarner Losh goto out;
17953cce2e7SWarner Losh }
18053cce2e7SWarner Losh
18153cce2e7SWarner Losh if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
18253cce2e7SWarner Losh cam_error_print(device, ccb, CAM_ESF_ALL,
18353cce2e7SWarner Losh CAM_EPF_ALL, stderr);
18453cce2e7SWarner Losh error = 1;
18553cce2e7SWarner Losh }
18653cce2e7SWarner Losh
18753cce2e7SWarner Losh out:
18853cce2e7SWarner Losh cam_freeccb(ccb);
18953cce2e7SWarner Losh return (error);
19053cce2e7SWarner Losh }
19153cce2e7SWarner Losh
19253cce2e7SWarner Losh #define MUST_BE_NONE() \
19353cce2e7SWarner Losh if (action != DEPOP_NONE) { \
19453cce2e7SWarner Losh warnx("Use only one of -d, -l, or -r"); \
19553cce2e7SWarner Losh error = 1; \
19653cce2e7SWarner Losh goto bailout; \
19753cce2e7SWarner Losh }
19853cce2e7SWarner Losh
19953cce2e7SWarner Losh int
depop(struct cam_device * device,int argc,char ** argv,char * combinedopt,int task_attr,int retry_count,int timeout,int verbosemode)20053cce2e7SWarner Losh depop(struct cam_device *device, int argc, char **argv, char *combinedopt,
20153cce2e7SWarner Losh int task_attr, int retry_count, int timeout, int verbosemode)
20253cce2e7SWarner Losh {
20353cce2e7SWarner Losh int c;
20453cce2e7SWarner Losh int action = DEPOP_NONE;
20553cce2e7SWarner Losh char *endptr;
20653cce2e7SWarner Losh int error = 0;
20753cce2e7SWarner Losh uint32_t elem = 0;
20853cce2e7SWarner Losh uint64_t capacity = 0;
20953cce2e7SWarner Losh
21053cce2e7SWarner Losh while ((c = getopt(argc, argv, combinedopt)) != -1) {
21153cce2e7SWarner Losh switch (c) {
21253cce2e7SWarner Losh case 'c':
21353cce2e7SWarner Losh capacity = strtoumax(optarg, &endptr, 0);
21453cce2e7SWarner Losh if (*endptr != '\0') {
21553cce2e7SWarner Losh warnx("Invalid capacity: %s", optarg);
21653cce2e7SWarner Losh error = 1;
21753cce2e7SWarner Losh goto bailout;
21853cce2e7SWarner Losh }
21953cce2e7SWarner Losh break;
22053cce2e7SWarner Losh case 'e':
22153cce2e7SWarner Losh elem = strtoul(optarg, &endptr, 0);
22253cce2e7SWarner Losh if (*endptr != '\0') {
22353cce2e7SWarner Losh warnx("Invalid element: %s", optarg);
22453cce2e7SWarner Losh error = 1;
22553cce2e7SWarner Losh goto bailout;
22653cce2e7SWarner Losh }
22753cce2e7SWarner Losh break;
22853cce2e7SWarner Losh case 'd':
22953cce2e7SWarner Losh MUST_BE_NONE();
23053cce2e7SWarner Losh action = DEPOP_REMOVE;
23153cce2e7SWarner Losh break;
23253cce2e7SWarner Losh case 'l':
23353cce2e7SWarner Losh MUST_BE_NONE();
23453cce2e7SWarner Losh action = DEPOP_LIST;
23553cce2e7SWarner Losh break;
23653cce2e7SWarner Losh case 'r':
23753cce2e7SWarner Losh MUST_BE_NONE();
23853cce2e7SWarner Losh action = DEPOP_RESTORE;
23953cce2e7SWarner Losh break;
24053cce2e7SWarner Losh default:
24153cce2e7SWarner Losh break;
24253cce2e7SWarner Losh }
24353cce2e7SWarner Losh }
24453cce2e7SWarner Losh
24553cce2e7SWarner Losh /*
24653cce2e7SWarner Losh * Compute a sane timeout if none given. 5 seconds for the list command
24753cce2e7SWarner Losh * and whatever the block device characteristics VPD says for other
24853cce2e7SWarner Losh * depop commands. If there's no value in that field, default to 1
24953cce2e7SWarner Losh * day. Experience has shown that these operations take the better part
25053cce2e7SWarner Losh * of a day to complete, so a 1 day timeout default seems appropriate.
25153cce2e7SWarner Losh */
25253cce2e7SWarner Losh if (timeout == 0 && action != DEPOP_NONE) {
25353cce2e7SWarner Losh if (action == DEPOP_LIST) {
25453cce2e7SWarner Losh timeout = 5 * 1000;
25553cce2e7SWarner Losh } else {
25653cce2e7SWarner Losh struct scsi_vpd_block_device_characteristics *bdc;
25753cce2e7SWarner Losh
25853cce2e7SWarner Losh timeout = 24 * 60 * 60 * 1000; /* 1 day */
25953cce2e7SWarner Losh bdc = scsi_wrap_vpd_block_device_characteristics(device);
26053cce2e7SWarner Losh if (bdc != NULL) {
26153cce2e7SWarner Losh timeout = scsi_4btoul(bdc->depopulation_time);
26253cce2e7SWarner Losh }
26353cce2e7SWarner Losh free(bdc);
26453cce2e7SWarner Losh }
26553cce2e7SWarner Losh }
26653cce2e7SWarner Losh
26753cce2e7SWarner Losh switch (action) {
26853cce2e7SWarner Losh case DEPOP_NONE:
26953cce2e7SWarner Losh warnx("Must specify one of -d, -l, or -r");
27053cce2e7SWarner Losh error = 1;
27153cce2e7SWarner Losh break;
27253cce2e7SWarner Losh case DEPOP_REMOVE:
27353cce2e7SWarner Losh if (elem == 0 && capacity == 0) {
27453cce2e7SWarner Losh warnx("Must specify at least one of -e and/or -c");
27553cce2e7SWarner Losh error = 1;
27653cce2e7SWarner Losh break;
27753cce2e7SWarner Losh }
27853cce2e7SWarner Losh error = depop_remove(device, task_attr, retry_count, timeout,
27953cce2e7SWarner Losh verbosemode, elem, capacity);
28053cce2e7SWarner Losh break;
28153cce2e7SWarner Losh case DEPOP_RESTORE:
28253cce2e7SWarner Losh error = depop_restore(device, task_attr, retry_count, timeout,
28353cce2e7SWarner Losh verbosemode);
28453cce2e7SWarner Losh break;
28553cce2e7SWarner Losh case DEPOP_LIST:
28653cce2e7SWarner Losh error = depop_list(device, task_attr, retry_count, timeout,
28753cce2e7SWarner Losh verbosemode);
28853cce2e7SWarner Losh break;
28953cce2e7SWarner Losh }
29053cce2e7SWarner Losh
29153cce2e7SWarner Losh bailout:
29253cce2e7SWarner Losh
29353cce2e7SWarner Losh return (error);
29453cce2e7SWarner Losh }
295