xref: /freebsd/sbin/camcontrol/depop.c (revision 72df847a94bccee245a3316e4f848482b9ac2ac2)
1 /*-
2  * Copyright (c) 2021 Netflix, Inc.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions, and the following disclaimer,
9  *    without modification.
10  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
11  *    substantially similar to the "NO WARRANTY" disclaimer below
12  *    ("Disclaimer") and any redistribution must be conditioned upon
13  *    including a substantially similar Disclaimer requirement for further
14  *    binary redistribution.
15  *
16  * NO WARRANTY
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
20  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
25  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
26  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27  * POSSIBILITY OF SUCH DAMAGES.
28  *
29  */
30 /*
31  * SCSI disk depop (head depopulation) support
32  *
33  * The standard defines 'storage elements' as the generic way of referring to a
34  * disk drive head. Each storage element has an identifier and an active status.
35  * The health of an element can be querried. Active elements may be removed from
36  * service with a REMOVE ELEMENT AND TRUNCATE (RET) command. Inactive element
37  * may be returned to service with a RESTORE ELEMENTS AND REBUILD (RER)
38  * command. GET PHYSICAL ELEMENT STATUS (GPES) will return a list of elements,
39  * their health, whether they are in service, how much capacity the element is
40  * used for, etc.
41  *
42  * When a depop operation starts, the drive becomes format corrupt. No normal
43  * I/O can be done to the drive and a limited number of CDBs will
44  * succeed. Status can be obtained by either a TEST UNIT READY or a GPES
45  * command. A drive reset will not stop a depop operation, but a power cycle
46  * will. A failed depop operation will be reported when the next TEST UNIT READY
47  * is sent to the drive. Drives that are format corrupt after an interrupted
48  * operation need to have that operation repeated.
49  *
50  * 'depop' provides a wrapper around all these functions.
51  */
52 
53 #include <sys/cdefs.h>
54 __FBSDID("$FreeBSD$");
55 
56 #include <sys/types.h>
57 
58 #include <err.h>
59 #include <inttypes.h>
60 #include <stdio.h>
61 #include <stdlib.h>
62 #include <string.h>
63 #include <unistd.h>
64 
65 #include <cam/cam.h>
66 #include <cam/cam_debug.h>
67 #include <cam/cam_ccb.h>
68 #include <cam/scsi/scsi_all.h>
69 #include <cam/scsi/scsi_message.h>
70 #include <camlib.h>
71 #include <scsi_wrap.h>
72 #include "camcontrol.h"
73 
74 enum depop_action {
75 	DEPOP_NONE,
76 	DEPOP_LIST,
77 	DEPOP_RESTORE,
78 	DEPOP_REMOVE,
79 };
80 
81 static int
82 depop_list(struct cam_device *device, int task_attr, int retry_count,
83     int timeout, int verbosemode __unused)
84 {
85 	int error = 0;
86 	uint32_t dtors;
87 	struct scsi_get_physical_element_hdr *hdr;
88 	struct scsi_get_physical_element_descriptor *dtor_ptr;
89 
90 	hdr = scsi_wrap_get_physical_element_status(device, task_attr, retry_count, timeout,
91 	    SCSI_GPES_FILTER_ALL | SCSI_GPES_REPORT_TYPE_PHYS, 1);
92 	if (hdr == NULL)
93 		errx(1, "scsi_wrap_get_physical_element_status returned an error");
94 
95 	/*
96 	 * OK, we have the data, not report it out.
97 	 */
98 	dtor_ptr = (struct scsi_get_physical_element_descriptor *)(hdr + 1);
99 	dtors = scsi_4btoul(hdr->num_descriptors);
100 	printf("Elem ID    * Health Capacity\n");
101 	for (uint32_t i = 0; i < dtors; i++) {
102 		uint32_t id = scsi_4btoul(dtor_ptr[i].element_identifier);
103 		uint8_t ralwd = dtor_ptr[i].ralwd;
104 		uint8_t type = dtor_ptr[i].physical_element_type;
105 		uint8_t health = dtor_ptr[i].physical_element_health;
106 		uint64_t cap = scsi_8btou64(dtor_ptr[i].capacity);
107 		if (type != GPED_TYPE_STORAGE)
108 			printf("0x%08x -- type unknown %d\n", id, type);
109 		else
110 			printf("0x%08x %c 0x%02x   %jd\n", id, ralwd ? '*' : ' ', health, cap);
111 	}
112 	printf("* -- Element can be restored\n");
113 
114 	free(hdr);
115 	return (error);
116 }
117 
118 static int
119 depop_remove(struct cam_device *device, int task_attr, int retry_count,
120     int timeout, int verbosemode __unused, uint32_t elem, uint64_t capacity)
121 {
122 	union ccb *ccb;
123 	int error = 0;
124 
125 	ccb = cam_getccb(device);
126 	if (ccb == NULL) {
127 		warnx("Can't allocate ccb");
128 		return (1);
129 	}
130 	scsi_remove_element_and_truncate(&ccb->csio,
131 	    retry_count,
132 	    NULL,
133 	    task_attr,
134 	    capacity,
135 	    elem,
136 	    SSD_FULL_SIZE,
137 	    timeout);
138 	/* Disable freezing the device queue */
139 	ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
140 	if (cam_send_ccb(device, ccb) < 0) {
141 		warn("error sending GET PHYSICAL ELEMENT STATUS command");
142 		error = 1;
143 		goto out;
144 	}
145 
146 	if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
147 		cam_error_print(device, ccb, CAM_ESF_ALL,
148 				CAM_EPF_ALL, stderr);
149 		error = 1;
150 	}
151 
152 out:
153 	cam_freeccb(ccb);
154 	return (error);
155 }
156 
157 static int
158 depop_restore(struct cam_device *device, int task_attr, int retry_count,
159     int timeout, int verbosemode __unused)
160 {
161 	union ccb *ccb;
162 	int error = 0;
163 
164 	ccb = cam_getccb(device);
165 	if (ccb == NULL) {
166 		warnx("Can't allocate ccb");
167 		return (1);
168 	}
169 	scsi_restore_elements_and_rebuild(&ccb->csio,
170 	    retry_count,
171 	    NULL,
172 	    task_attr,
173 	    SSD_FULL_SIZE,
174 	    timeout);
175 
176 	/* Disable freezing the device queue */
177 	ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
178 	if (cam_send_ccb(device, ccb) < 0) {
179 		warn("error sending GET PHYSICAL ELEMENT STATUS command");
180 		error = 1;
181 		goto out;
182 	}
183 
184 	if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
185 		cam_error_print(device, ccb, CAM_ESF_ALL,
186 				CAM_EPF_ALL, stderr);
187 		error = 1;
188 	}
189 
190 out:
191 	cam_freeccb(ccb);
192 	return (error);
193 }
194 
195 #define MUST_BE_NONE() \
196 	if (action != DEPOP_NONE) { \
197 		warnx("Use only one of -d, -l, or -r"); \
198 		error = 1; \
199 		goto bailout; \
200 	}
201 
202 int
203 depop(struct cam_device *device, int argc, char **argv, char *combinedopt,
204     int task_attr, int retry_count, int timeout, int verbosemode)
205 {
206 	int c;
207 	int action = DEPOP_NONE;
208 	char *endptr;
209 	int error = 0;
210 	uint32_t elem = 0;
211 	uint64_t capacity = 0;
212 
213 	while ((c = getopt(argc, argv, combinedopt)) != -1) {
214 		switch (c) {
215 		case 'c':
216 			capacity = strtoumax(optarg, &endptr, 0);
217 			if (*endptr != '\0') {
218 				warnx("Invalid capacity: %s", optarg);
219 				error = 1;
220 				goto bailout;
221 			}
222 			break;
223 		case 'e':
224 			elem = strtoul(optarg, &endptr, 0);
225 			if (*endptr != '\0') {
226 				warnx("Invalid element: %s", optarg);
227 				error = 1;
228 				goto bailout;
229 			}
230 			break;
231 		case 'd':
232 			MUST_BE_NONE();
233 			action = DEPOP_REMOVE;
234 			break;
235 		case 'l':
236 			MUST_BE_NONE();
237 			action  = DEPOP_LIST;
238 			break;
239 		case 'r':
240 			MUST_BE_NONE();
241 			action  = DEPOP_RESTORE;
242 			break;
243 		default:
244 			break;
245 		}
246 	}
247 
248 	/*
249 	 * Compute a sane timeout if none given. 5 seconds for the list command
250 	 * and whatever the block device characteristics VPD says for other
251 	 * depop commands. If there's no value in that field, default to 1
252 	 * day. Experience has shown that these operations take the better part
253 	 * of a day to complete, so a 1 day timeout default seems appropriate.
254 	 */
255 	if (timeout == 0 && action != DEPOP_NONE) {
256 		if (action == DEPOP_LIST) {
257 			timeout = 5 * 1000;
258 		} else {
259 			struct scsi_vpd_block_device_characteristics *bdc;
260 
261 			timeout = 24 * 60 * 60 * 1000;	/* 1 day */
262 			bdc = scsi_wrap_vpd_block_device_characteristics(device);
263 			if (bdc != NULL) {
264 				timeout = scsi_4btoul(bdc->depopulation_time);
265 			}
266 			free(bdc);
267 		}
268 	}
269 
270 	switch (action) {
271 	case DEPOP_NONE:
272 		warnx("Must specify one of -d, -l, or -r");
273 		error = 1;
274 		break;
275 	case DEPOP_REMOVE:
276 		if (elem == 0 && capacity == 0) {
277 			warnx("Must specify at least one of -e and/or -c");
278 			error = 1;
279 			break;
280 		}
281 		error = depop_remove(device, task_attr, retry_count, timeout,
282 		    verbosemode, elem, capacity);
283 		break;
284 	case DEPOP_RESTORE:
285 		error = depop_restore(device, task_attr, retry_count, timeout,
286 		    verbosemode);
287 		break;
288 	case DEPOP_LIST:
289 		error = depop_list(device, task_attr, retry_count, timeout,
290 		    verbosemode);
291 		break;
292 	}
293 
294 bailout:
295 
296 	return (error);
297 }
298