1 /*- 2 * Copyright (c) 2021 Netflix, Inc. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions, and the following disclaimer, 9 * without modification. 10 * 2. Redistributions in binary form must reproduce at minimum a disclaimer 11 * substantially similar to the "NO WARRANTY" disclaimer below 12 * ("Disclaimer") and any redistribution must be conditioned upon 13 * including a substantially similar Disclaimer requirement for further 14 * binary redistribution. 15 * 16 * NO WARRANTY 17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR 20 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 26 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 * POSSIBILITY OF SUCH DAMAGES. 28 * 29 */ 30 /* 31 * SCSI disk depop (head depopulation) support 32 * 33 * The standard defines 'storage elements' as the generic way of referring to a 34 * disk drive head. Each storage element has an identifier and an active status. 35 * The health of an element can be queried. Active elements may be removed from 36 * service with a REMOVE ELEMENT AND TRUNCATE (RET) command. Inactive element 37 * may be returned to service with a RESTORE ELEMENTS AND REBUILD (RER) 38 * command. GET PHYSICAL ELEMENT STATUS (GPES) will return a list of elements, 39 * their health, whether they are in service, how much capacity the element is 40 * used for, etc. 41 * 42 * When a depop operation starts, the drive becomes format corrupt. No normal 43 * I/O can be done to the drive and a limited number of CDBs will 44 * succeed. Status can be obtained by either a TEST UNIT READY or a GPES 45 * command. A drive reset will not stop a depop operation, but a power cycle 46 * will. A failed depop operation will be reported when the next TEST UNIT READY 47 * is sent to the drive. Drives that are format corrupt after an interrupted 48 * operation need to have that operation repeated. 49 * 50 * 'depop' provides a wrapper around all these functions. 51 */ 52 53 #include <sys/cdefs.h> 54 __FBSDID("$FreeBSD$"); 55 56 #include <sys/types.h> 57 58 #include <err.h> 59 #include <inttypes.h> 60 #include <stdio.h> 61 #include <stdlib.h> 62 #include <string.h> 63 #include <unistd.h> 64 65 #include <cam/cam.h> 66 #include <cam/cam_debug.h> 67 #include <cam/cam_ccb.h> 68 #include <cam/scsi/scsi_all.h> 69 #include <cam/scsi/scsi_message.h> 70 #include <camlib.h> 71 #include <scsi_wrap.h> 72 #include "camcontrol.h" 73 74 enum depop_action { 75 DEPOP_NONE, 76 DEPOP_LIST, 77 DEPOP_RESTORE, 78 DEPOP_REMOVE, 79 }; 80 81 static int 82 depop_list(struct cam_device *device, int task_attr, int retry_count, 83 int timeout, int verbosemode __unused) 84 { 85 int error = 0; 86 uint32_t dtors; 87 struct scsi_get_physical_element_hdr *hdr; 88 struct scsi_get_physical_element_descriptor *dtor_ptr; 89 90 hdr = scsi_wrap_get_physical_element_status(device, task_attr, retry_count, timeout, 91 SCSI_GPES_FILTER_ALL | SCSI_GPES_REPORT_TYPE_PHYS, 1); 92 if (hdr == NULL) 93 errx(1, "scsi_wrap_get_physical_element_status returned an error"); 94 95 /* 96 * OK, we have the data, not report it out. 97 */ 98 dtor_ptr = (struct scsi_get_physical_element_descriptor *)(hdr + 1); 99 dtors = scsi_4btoul(hdr->num_descriptors); 100 printf("Elem ID * Health Capacity\n"); 101 for (uint32_t i = 0; i < dtors; i++) { 102 uint32_t id = scsi_4btoul(dtor_ptr[i].element_identifier); 103 uint8_t ralwd = dtor_ptr[i].ralwd; 104 uint8_t type = dtor_ptr[i].physical_element_type; 105 uint8_t health = dtor_ptr[i].physical_element_health; 106 uint64_t cap = scsi_8btou64(dtor_ptr[i].capacity); 107 if (type != GPED_TYPE_STORAGE) 108 printf("0x%08x -- type unknown %d\n", id, type); 109 else 110 printf("0x%08x %c 0x%02x %jd\n", id, ralwd ? '*' : ' ', health, cap); 111 } 112 printf("* -- Element can be restored\n"); 113 114 free(hdr); 115 return (error); 116 } 117 118 static int 119 depop_remove(struct cam_device *device, int task_attr, int retry_count, 120 int timeout, int verbosemode __unused, uint32_t elem, uint64_t capacity) 121 { 122 union ccb *ccb; 123 int error = 0; 124 125 ccb = cam_getccb(device); 126 if (ccb == NULL) { 127 warnx("Can't allocate ccb"); 128 return (1); 129 } 130 scsi_remove_element_and_truncate(&ccb->csio, 131 retry_count, 132 NULL, 133 task_attr, 134 capacity, 135 elem, 136 SSD_FULL_SIZE, 137 timeout); 138 /* Disable freezing the device queue */ 139 ccb->ccb_h.flags |= CAM_DEV_QFRZDIS; 140 if (cam_send_ccb(device, ccb) < 0) { 141 warn("error sending GET PHYSICAL ELEMENT STATUS command"); 142 error = 1; 143 goto out; 144 } 145 146 if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) { 147 cam_error_print(device, ccb, CAM_ESF_ALL, 148 CAM_EPF_ALL, stderr); 149 error = 1; 150 } 151 152 out: 153 cam_freeccb(ccb); 154 return (error); 155 } 156 157 static int 158 depop_restore(struct cam_device *device, int task_attr, int retry_count, 159 int timeout, int verbosemode __unused) 160 { 161 union ccb *ccb; 162 int error = 0; 163 164 ccb = cam_getccb(device); 165 if (ccb == NULL) { 166 warnx("Can't allocate ccb"); 167 return (1); 168 } 169 scsi_restore_elements_and_rebuild(&ccb->csio, 170 retry_count, 171 NULL, 172 task_attr, 173 SSD_FULL_SIZE, 174 timeout); 175 176 /* Disable freezing the device queue */ 177 ccb->ccb_h.flags |= CAM_DEV_QFRZDIS; 178 if (cam_send_ccb(device, ccb) < 0) { 179 warn("error sending GET PHYSICAL ELEMENT STATUS command"); 180 error = 1; 181 goto out; 182 } 183 184 if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) { 185 cam_error_print(device, ccb, CAM_ESF_ALL, 186 CAM_EPF_ALL, stderr); 187 error = 1; 188 } 189 190 out: 191 cam_freeccb(ccb); 192 return (error); 193 } 194 195 #define MUST_BE_NONE() \ 196 if (action != DEPOP_NONE) { \ 197 warnx("Use only one of -d, -l, or -r"); \ 198 error = 1; \ 199 goto bailout; \ 200 } 201 202 int 203 depop(struct cam_device *device, int argc, char **argv, char *combinedopt, 204 int task_attr, int retry_count, int timeout, int verbosemode) 205 { 206 int c; 207 int action = DEPOP_NONE; 208 char *endptr; 209 int error = 0; 210 uint32_t elem = 0; 211 uint64_t capacity = 0; 212 213 while ((c = getopt(argc, argv, combinedopt)) != -1) { 214 switch (c) { 215 case 'c': 216 capacity = strtoumax(optarg, &endptr, 0); 217 if (*endptr != '\0') { 218 warnx("Invalid capacity: %s", optarg); 219 error = 1; 220 goto bailout; 221 } 222 break; 223 case 'e': 224 elem = strtoul(optarg, &endptr, 0); 225 if (*endptr != '\0') { 226 warnx("Invalid element: %s", optarg); 227 error = 1; 228 goto bailout; 229 } 230 break; 231 case 'd': 232 MUST_BE_NONE(); 233 action = DEPOP_REMOVE; 234 break; 235 case 'l': 236 MUST_BE_NONE(); 237 action = DEPOP_LIST; 238 break; 239 case 'r': 240 MUST_BE_NONE(); 241 action = DEPOP_RESTORE; 242 break; 243 default: 244 break; 245 } 246 } 247 248 /* 249 * Compute a sane timeout if none given. 5 seconds for the list command 250 * and whatever the block device characteristics VPD says for other 251 * depop commands. If there's no value in that field, default to 1 252 * day. Experience has shown that these operations take the better part 253 * of a day to complete, so a 1 day timeout default seems appropriate. 254 */ 255 if (timeout == 0 && action != DEPOP_NONE) { 256 if (action == DEPOP_LIST) { 257 timeout = 5 * 1000; 258 } else { 259 struct scsi_vpd_block_device_characteristics *bdc; 260 261 timeout = 24 * 60 * 60 * 1000; /* 1 day */ 262 bdc = scsi_wrap_vpd_block_device_characteristics(device); 263 if (bdc != NULL) { 264 timeout = scsi_4btoul(bdc->depopulation_time); 265 } 266 free(bdc); 267 } 268 } 269 270 switch (action) { 271 case DEPOP_NONE: 272 warnx("Must specify one of -d, -l, or -r"); 273 error = 1; 274 break; 275 case DEPOP_REMOVE: 276 if (elem == 0 && capacity == 0) { 277 warnx("Must specify at least one of -e and/or -c"); 278 error = 1; 279 break; 280 } 281 error = depop_remove(device, task_attr, retry_count, timeout, 282 verbosemode, elem, capacity); 283 break; 284 case DEPOP_RESTORE: 285 error = depop_restore(device, task_attr, retry_count, timeout, 286 verbosemode); 287 break; 288 case DEPOP_LIST: 289 error = depop_list(device, task_attr, retry_count, timeout, 290 verbosemode); 291 break; 292 } 293 294 bailout: 295 296 return (error); 297 } 298