xref: /freebsd/sys/dev/nvmf/host/nvmf_aer.c (revision a8089ea5aee578e08acab2438e82fc9a9ae50ed8)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2024 Chelsio Communications, Inc.
5  * Written by: John Baldwin <jhb@FreeBSD.org>
6  */
7 
8 #include <sys/types.h>
9 #include <sys/bus.h>
10 #include <sys/lock.h>
11 #include <sys/malloc.h>
12 #include <sys/mutex.h>
13 #include <sys/taskqueue.h>
14 #include <dev/nvmf/host/nvmf_var.h>
15 
16 struct nvmf_aer {
17 	struct nvmf_softc *sc;
18 	uint8_t log_page_id;
19 	uint8_t info;
20 	uint8_t type;
21 
22 	u_int	page_len;
23 	void	*page;
24 
25 	int	error;
26 	uint16_t status;
27 	int	pending;
28 	struct mtx *lock;
29 	struct task complete_task;
30 	struct task finish_page_task;
31 };
32 
33 #define	MAX_LOG_PAGE_SIZE	4096
34 
35 static void	nvmf_complete_aer(void *arg, const struct nvme_completion *cqe);
36 
37 static void
38 nvmf_submit_aer(struct nvmf_softc *sc, struct nvmf_aer *aer)
39 {
40 	struct nvmf_request *req;
41 	struct nvme_command cmd;
42 
43 	memset(&cmd, 0, sizeof(cmd));
44 	cmd.opc = NVME_OPC_ASYNC_EVENT_REQUEST;
45 
46 	req = nvmf_allocate_request(sc->admin, &cmd, nvmf_complete_aer, aer,
47 	    M_WAITOK);
48 	if (req == NULL)
49 		return;
50 	req->aer = true;
51 	nvmf_submit_request(req);
52 }
53 
54 static void
55 nvmf_handle_changed_namespaces(struct nvmf_softc *sc,
56     struct nvme_ns_list *ns_list)
57 {
58 	uint32_t nsid;
59 
60 	/*
61 	 * If more than 1024 namespaces have changed, we should
62 	 * probably just rescan the entire set of namespaces.
63 	 */
64 	if (ns_list->ns[0] == 0xffffffff) {
65 		device_printf(sc->dev, "too many changed namespaces\n");
66 		return;
67 	}
68 
69 	for (u_int i = 0; i < nitems(ns_list->ns); i++) {
70 		if (ns_list->ns[i] == 0)
71 			break;
72 
73 		nsid = le32toh(ns_list->ns[i]);
74 		nvmf_rescan_ns(sc, nsid);
75 	}
76 }
77 
78 static void
79 nvmf_finish_aer_page(struct nvmf_softc *sc, struct nvmf_aer *aer)
80 {
81 	/* If an error occurred fetching the page, just bail. */
82 	if (aer->error != 0 || aer->status != 0)
83 		return;
84 
85 	taskqueue_enqueue(taskqueue_thread, &aer->finish_page_task);
86 }
87 
88 static void
89 nvmf_finish_aer_page_task(void *arg, int pending)
90 {
91 	struct nvmf_aer *aer = arg;
92 	struct nvmf_softc *sc = aer->sc;
93 
94 	switch (aer->log_page_id) {
95 	case NVME_LOG_ERROR:
96 		/* TODO: Should we log these? */
97 		break;
98 	case NVME_LOG_CHANGED_NAMESPACE:
99 		nvmf_handle_changed_namespaces(sc, aer->page);
100 		break;
101 	}
102 
103 	/* Resubmit this AER command. */
104 	nvmf_submit_aer(sc, aer);
105 }
106 
107 static void
108 nvmf_io_complete_aer_page(void *arg, size_t xfered, int error)
109 {
110 	struct nvmf_aer *aer = arg;
111 	struct nvmf_softc *sc = aer->sc;
112 
113 	mtx_lock(aer->lock);
114 	aer->error = error;
115 	aer->pending--;
116 	if (aer->pending == 0) {
117 		mtx_unlock(aer->lock);
118 		nvmf_finish_aer_page(sc, aer);
119 	} else
120 		mtx_unlock(aer->lock);
121 }
122 
123 static void
124 nvmf_complete_aer_page(void *arg, const struct nvme_completion *cqe)
125 {
126 	struct nvmf_aer *aer = arg;
127 	struct nvmf_softc *sc = aer->sc;
128 
129 	mtx_lock(aer->lock);
130 	aer->status = cqe->status;
131 	aer->pending--;
132 	if (aer->pending == 0) {
133 		mtx_unlock(aer->lock);
134 		nvmf_finish_aer_page(sc, aer);
135 	} else
136 		mtx_unlock(aer->lock);
137 }
138 
139 static u_int
140 nvmf_log_page_size(struct nvmf_softc *sc, uint8_t log_page_id)
141 {
142 	switch (log_page_id) {
143 	case NVME_LOG_ERROR:
144 		return ((sc->cdata->elpe + 1) *
145 		    sizeof(struct nvme_error_information_entry));
146 	case NVME_LOG_CHANGED_NAMESPACE:
147 		return (sizeof(struct nvme_ns_list));
148 	default:
149 		return (0);
150 	}
151 }
152 
153 static void
154 nvmf_complete_aer(void *arg, const struct nvme_completion *cqe)
155 {
156 	struct nvmf_aer *aer = arg;
157 	struct nvmf_softc *sc = aer->sc;
158 	uint32_t cdw0;
159 
160 	/*
161 	 * The only error defined for AER is an abort due to
162 	 * submitting too many AER commands.  Just discard this AER
163 	 * without resubmitting if we get an error.
164 	 *
165 	 * NB: Pending AER commands are aborted during controller
166 	 * shutdown, so discard aborted commands silently.
167 	 */
168 	if (cqe->status != 0) {
169 		if (!nvmf_cqe_aborted(cqe))
170 			device_printf(sc->dev, "Ignoring error %#x for AER\n",
171 			    le16toh(cqe->status));
172 		return;
173 	}
174 
175 	cdw0 = le32toh(cqe->cdw0);
176 	aer->log_page_id = NVMEV(NVME_ASYNC_EVENT_LOG_PAGE_ID, cdw0);
177 	aer->info = NVMEV(NVME_ASYNC_EVENT_INFO, cdw0);
178 	aer->type = NVMEV(NVME_ASYNC_EVENT_TYPE, cdw0);
179 
180 	device_printf(sc->dev, "AER type %u, info %#x, page %#x\n",
181 	    aer->type, aer->info, aer->log_page_id);
182 
183 	aer->page_len = nvmf_log_page_size(sc, aer->log_page_id);
184 	taskqueue_enqueue(taskqueue_thread, &aer->complete_task);
185 }
186 
187 static void
188 nvmf_complete_aer_task(void *arg, int pending)
189 {
190 	struct nvmf_aer *aer = arg;
191 	struct nvmf_softc *sc = aer->sc;
192 
193 	if (aer->page_len != 0) {
194 		/* Read the associated log page. */
195 		aer->page_len = MIN(aer->page_len, MAX_LOG_PAGE_SIZE);
196 		aer->pending = 2;
197 		(void) nvmf_cmd_get_log_page(sc, NVME_GLOBAL_NAMESPACE_TAG,
198 		    aer->log_page_id, 0, aer->page, aer->page_len,
199 		    nvmf_complete_aer_page, aer, nvmf_io_complete_aer_page,
200 		    aer, M_WAITOK);
201 	} else {
202 		/* Resubmit this AER command. */
203 		nvmf_submit_aer(sc, aer);
204 	}
205 }
206 
207 static int
208 nvmf_set_async_event_config(struct nvmf_softc *sc, uint32_t config)
209 {
210 	struct nvme_command cmd;
211 	struct nvmf_completion_status status;
212 	struct nvmf_request *req;
213 
214 	memset(&cmd, 0, sizeof(cmd));
215 	cmd.opc = NVME_OPC_SET_FEATURES;
216 	cmd.cdw10 = htole32(NVME_FEAT_ASYNC_EVENT_CONFIGURATION);
217 	cmd.cdw11 = htole32(config);
218 
219 	nvmf_status_init(&status);
220 	req = nvmf_allocate_request(sc->admin, &cmd, nvmf_complete, &status,
221 	    M_WAITOK);
222 	if (req == NULL) {
223 		device_printf(sc->dev,
224 		    "failed to allocate SET_FEATURES (ASYNC_EVENT_CONFIGURATION) command\n");
225 		return (ECONNABORTED);
226 	}
227 	nvmf_submit_request(req);
228 	nvmf_wait_for_reply(&status);
229 
230 	if (status.cqe.status != 0) {
231 		device_printf(sc->dev,
232 		    "SET_FEATURES (ASYNC_EVENT_CONFIGURATION) failed, status %#x\n",
233 		    le16toh(status.cqe.status));
234 		return (EIO);
235 	}
236 
237 	return (0);
238 }
239 
240 void
241 nvmf_init_aer(struct nvmf_softc *sc)
242 {
243 	/* 8 matches NVME_MAX_ASYNC_EVENTS */
244 	sc->num_aer = min(8, sc->cdata->aerl + 1);
245 	sc->aer = mallocarray(sc->num_aer, sizeof(*sc->aer), M_NVMF,
246 	    M_WAITOK | M_ZERO);
247 	for (u_int i = 0; i < sc->num_aer; i++) {
248 		sc->aer[i].sc = sc;
249 		sc->aer[i].page = malloc(MAX_LOG_PAGE_SIZE, M_NVMF, M_WAITOK);
250 		sc->aer[i].lock = mtx_pool_find(mtxpool_sleep, &sc->aer[i]);
251 		TASK_INIT(&sc->aer[i].complete_task, 0, nvmf_complete_aer_task,
252 		    &sc->aer[i]);
253 		TASK_INIT(&sc->aer[i].finish_page_task, 0,
254 		    nvmf_finish_aer_page_task, &sc->aer[i]);
255 	}
256 }
257 
258 int
259 nvmf_start_aer(struct nvmf_softc *sc)
260 {
261 	uint32_t async_event_config;
262 	int error;
263 
264 	async_event_config = NVME_CRIT_WARN_ST_AVAILABLE_SPARE |
265 	    NVME_CRIT_WARN_ST_DEVICE_RELIABILITY |
266 	    NVME_CRIT_WARN_ST_READ_ONLY |
267 	    NVME_CRIT_WARN_ST_VOLATILE_MEMORY_BACKUP;
268 	if (sc->cdata->ver >= NVME_REV(1, 2))
269 		async_event_config |=
270 		    sc->cdata->oaes & NVME_ASYNC_EVENT_NS_ATTRIBUTE;
271 	error = nvmf_set_async_event_config(sc, async_event_config);
272 	if (error != 0)
273 		return (error);
274 
275 	for (u_int i = 0; i < sc->num_aer; i++)
276 		nvmf_submit_aer(sc, &sc->aer[i]);
277 
278 	return (0);
279 }
280 
281 void
282 nvmf_destroy_aer(struct nvmf_softc *sc)
283 {
284 	for (u_int i = 0; i < sc->num_aer; i++) {
285 		taskqueue_drain(taskqueue_thread, &sc->aer[i].complete_task);
286 		taskqueue_drain(taskqueue_thread, &sc->aer[i].finish_page_task);
287 		free(sc->aer[i].page, M_NVMF);
288 	}
289 	free(sc->aer, M_NVMF);
290 }
291