xref: /freebsd/sys/dev/nvmf/host/nvmf_aer.c (revision f6d434f110fd95e346f18fb09a6f91f36b528d2d)
1a1eda741SJohn Baldwin /*-
2a1eda741SJohn Baldwin  * SPDX-License-Identifier: BSD-2-Clause
3a1eda741SJohn Baldwin  *
4a1eda741SJohn Baldwin  * Copyright (c) 2024 Chelsio Communications, Inc.
5a1eda741SJohn Baldwin  * Written by: John Baldwin <jhb@FreeBSD.org>
6a1eda741SJohn Baldwin  */
7a1eda741SJohn Baldwin 
8a1eda741SJohn Baldwin #include <sys/types.h>
9a1eda741SJohn Baldwin #include <sys/bus.h>
10a1eda741SJohn Baldwin #include <sys/lock.h>
11a1eda741SJohn Baldwin #include <sys/malloc.h>
12a1eda741SJohn Baldwin #include <sys/mutex.h>
13a1eda741SJohn Baldwin #include <sys/taskqueue.h>
14a1eda741SJohn Baldwin #include <dev/nvmf/host/nvmf_var.h>
15a1eda741SJohn Baldwin 
16a1eda741SJohn Baldwin struct nvmf_aer {
17a1eda741SJohn Baldwin 	struct nvmf_softc *sc;
18a1eda741SJohn Baldwin 	uint8_t log_page_id;
19a1eda741SJohn Baldwin 	uint8_t info;
20a1eda741SJohn Baldwin 	uint8_t type;
21a1eda741SJohn Baldwin 
22a1eda741SJohn Baldwin 	u_int	page_len;
23a1eda741SJohn Baldwin 	void	*page;
24a1eda741SJohn Baldwin 
25a1eda741SJohn Baldwin 	int	error;
26a1eda741SJohn Baldwin 	uint16_t status;
27a1eda741SJohn Baldwin 	int	pending;
28a1eda741SJohn Baldwin 	struct mtx *lock;
29a1eda741SJohn Baldwin 	struct task complete_task;
30a1eda741SJohn Baldwin 	struct task finish_page_task;
31a1eda741SJohn Baldwin };
32a1eda741SJohn Baldwin 
33a1eda741SJohn Baldwin #define	MAX_LOG_PAGE_SIZE	4096
34a1eda741SJohn Baldwin 
35a1eda741SJohn Baldwin static void	nvmf_complete_aer(void *arg, const struct nvme_completion *cqe);
36a1eda741SJohn Baldwin 
37a1eda741SJohn Baldwin static void
nvmf_submit_aer(struct nvmf_softc * sc,struct nvmf_aer * aer)38a1eda741SJohn Baldwin nvmf_submit_aer(struct nvmf_softc *sc, struct nvmf_aer *aer)
39a1eda741SJohn Baldwin {
40a1eda741SJohn Baldwin 	struct nvmf_request *req;
41a1eda741SJohn Baldwin 	struct nvme_command cmd;
42a1eda741SJohn Baldwin 
43a1eda741SJohn Baldwin 	memset(&cmd, 0, sizeof(cmd));
44a1eda741SJohn Baldwin 	cmd.opc = NVME_OPC_ASYNC_EVENT_REQUEST;
45a1eda741SJohn Baldwin 
46a1eda741SJohn Baldwin 	req = nvmf_allocate_request(sc->admin, &cmd, nvmf_complete_aer, aer,
47a1eda741SJohn Baldwin 	    M_WAITOK);
48a1eda741SJohn Baldwin 	if (req == NULL)
49a1eda741SJohn Baldwin 		return;
50a1eda741SJohn Baldwin 	req->aer = true;
51a1eda741SJohn Baldwin 	nvmf_submit_request(req);
52a1eda741SJohn Baldwin }
53a1eda741SJohn Baldwin 
54a1eda741SJohn Baldwin static void
nvmf_handle_changed_namespaces(struct nvmf_softc * sc,struct nvme_ns_list * ns_list)55a1eda741SJohn Baldwin nvmf_handle_changed_namespaces(struct nvmf_softc *sc,
56a1eda741SJohn Baldwin     struct nvme_ns_list *ns_list)
57a1eda741SJohn Baldwin {
58a1eda741SJohn Baldwin 	uint32_t nsid;
59a1eda741SJohn Baldwin 
60a1eda741SJohn Baldwin 	/*
61a1eda741SJohn Baldwin 	 * If more than 1024 namespaces have changed, we should
62a1eda741SJohn Baldwin 	 * probably just rescan the entire set of namespaces.
63a1eda741SJohn Baldwin 	 */
64a1eda741SJohn Baldwin 	if (ns_list->ns[0] == 0xffffffff) {
65*f6d434f1SJohn Baldwin 		nvmf_rescan_all_ns(sc);
66a1eda741SJohn Baldwin 		return;
67a1eda741SJohn Baldwin 	}
68a1eda741SJohn Baldwin 
69a1eda741SJohn Baldwin 	for (u_int i = 0; i < nitems(ns_list->ns); i++) {
70a1eda741SJohn Baldwin 		if (ns_list->ns[i] == 0)
71a1eda741SJohn Baldwin 			break;
72a1eda741SJohn Baldwin 
73a1eda741SJohn Baldwin 		nsid = le32toh(ns_list->ns[i]);
74a1eda741SJohn Baldwin 		nvmf_rescan_ns(sc, nsid);
75a1eda741SJohn Baldwin 	}
76a1eda741SJohn Baldwin }
77a1eda741SJohn Baldwin 
78a1eda741SJohn Baldwin static void
nvmf_finish_aer_page(struct nvmf_softc * sc,struct nvmf_aer * aer)79a1eda741SJohn Baldwin nvmf_finish_aer_page(struct nvmf_softc *sc, struct nvmf_aer *aer)
80a1eda741SJohn Baldwin {
81a1eda741SJohn Baldwin 	/* If an error occurred fetching the page, just bail. */
82a1eda741SJohn Baldwin 	if (aer->error != 0 || aer->status != 0)
83a1eda741SJohn Baldwin 		return;
84a1eda741SJohn Baldwin 
85a1eda741SJohn Baldwin 	taskqueue_enqueue(taskqueue_thread, &aer->finish_page_task);
86a1eda741SJohn Baldwin }
87a1eda741SJohn Baldwin 
88a1eda741SJohn Baldwin static void
nvmf_finish_aer_page_task(void * arg,int pending)89a1eda741SJohn Baldwin nvmf_finish_aer_page_task(void *arg, int pending)
90a1eda741SJohn Baldwin {
91a1eda741SJohn Baldwin 	struct nvmf_aer *aer = arg;
92a1eda741SJohn Baldwin 	struct nvmf_softc *sc = aer->sc;
93a1eda741SJohn Baldwin 
94a1eda741SJohn Baldwin 	switch (aer->log_page_id) {
95a1eda741SJohn Baldwin 	case NVME_LOG_ERROR:
96a1eda741SJohn Baldwin 		/* TODO: Should we log these? */
97a1eda741SJohn Baldwin 		break;
98a1eda741SJohn Baldwin 	case NVME_LOG_CHANGED_NAMESPACE:
99a1eda741SJohn Baldwin 		nvmf_handle_changed_namespaces(sc, aer->page);
100a1eda741SJohn Baldwin 		break;
101a1eda741SJohn Baldwin 	}
102a1eda741SJohn Baldwin 
103a1eda741SJohn Baldwin 	/* Resubmit this AER command. */
104a1eda741SJohn Baldwin 	nvmf_submit_aer(sc, aer);
105a1eda741SJohn Baldwin }
106a1eda741SJohn Baldwin 
107a1eda741SJohn Baldwin static void
nvmf_io_complete_aer_page(void * arg,size_t xfered,int error)108a1eda741SJohn Baldwin nvmf_io_complete_aer_page(void *arg, size_t xfered, int error)
109a1eda741SJohn Baldwin {
110a1eda741SJohn Baldwin 	struct nvmf_aer *aer = arg;
111a1eda741SJohn Baldwin 	struct nvmf_softc *sc = aer->sc;
112a1eda741SJohn Baldwin 
113a1eda741SJohn Baldwin 	mtx_lock(aer->lock);
114a1eda741SJohn Baldwin 	aer->error = error;
115a1eda741SJohn Baldwin 	aer->pending--;
116a1eda741SJohn Baldwin 	if (aer->pending == 0) {
117a1eda741SJohn Baldwin 		mtx_unlock(aer->lock);
118a1eda741SJohn Baldwin 		nvmf_finish_aer_page(sc, aer);
119a1eda741SJohn Baldwin 	} else
120a1eda741SJohn Baldwin 		mtx_unlock(aer->lock);
121a1eda741SJohn Baldwin }
122a1eda741SJohn Baldwin 
123a1eda741SJohn Baldwin static void
nvmf_complete_aer_page(void * arg,const struct nvme_completion * cqe)124a1eda741SJohn Baldwin nvmf_complete_aer_page(void *arg, const struct nvme_completion *cqe)
125a1eda741SJohn Baldwin {
126a1eda741SJohn Baldwin 	struct nvmf_aer *aer = arg;
127a1eda741SJohn Baldwin 	struct nvmf_softc *sc = aer->sc;
128a1eda741SJohn Baldwin 
129a1eda741SJohn Baldwin 	mtx_lock(aer->lock);
130a1eda741SJohn Baldwin 	aer->status = cqe->status;
131a1eda741SJohn Baldwin 	aer->pending--;
132a1eda741SJohn Baldwin 	if (aer->pending == 0) {
133a1eda741SJohn Baldwin 		mtx_unlock(aer->lock);
134a1eda741SJohn Baldwin 		nvmf_finish_aer_page(sc, aer);
135a1eda741SJohn Baldwin 	} else
136a1eda741SJohn Baldwin 		mtx_unlock(aer->lock);
137a1eda741SJohn Baldwin }
138a1eda741SJohn Baldwin 
139a1eda741SJohn Baldwin static u_int
nvmf_log_page_size(struct nvmf_softc * sc,uint8_t log_page_id)140a1eda741SJohn Baldwin nvmf_log_page_size(struct nvmf_softc *sc, uint8_t log_page_id)
141a1eda741SJohn Baldwin {
142a1eda741SJohn Baldwin 	switch (log_page_id) {
143a1eda741SJohn Baldwin 	case NVME_LOG_ERROR:
144a1eda741SJohn Baldwin 		return ((sc->cdata->elpe + 1) *
145a1eda741SJohn Baldwin 		    sizeof(struct nvme_error_information_entry));
146a1eda741SJohn Baldwin 	case NVME_LOG_CHANGED_NAMESPACE:
147a1eda741SJohn Baldwin 		return (sizeof(struct nvme_ns_list));
148a1eda741SJohn Baldwin 	default:
149a1eda741SJohn Baldwin 		return (0);
150a1eda741SJohn Baldwin 	}
151a1eda741SJohn Baldwin }
152a1eda741SJohn Baldwin 
153a1eda741SJohn Baldwin static void
nvmf_complete_aer(void * arg,const struct nvme_completion * cqe)154a1eda741SJohn Baldwin nvmf_complete_aer(void *arg, const struct nvme_completion *cqe)
155a1eda741SJohn Baldwin {
156a1eda741SJohn Baldwin 	struct nvmf_aer *aer = arg;
157a1eda741SJohn Baldwin 	struct nvmf_softc *sc = aer->sc;
158a1eda741SJohn Baldwin 	uint32_t cdw0;
159a1eda741SJohn Baldwin 
160a1eda741SJohn Baldwin 	/*
161a1eda741SJohn Baldwin 	 * The only error defined for AER is an abort due to
162a1eda741SJohn Baldwin 	 * submitting too many AER commands.  Just discard this AER
163a1eda741SJohn Baldwin 	 * without resubmitting if we get an error.
164a1eda741SJohn Baldwin 	 *
165a1eda741SJohn Baldwin 	 * NB: Pending AER commands are aborted during controller
166a1eda741SJohn Baldwin 	 * shutdown, so discard aborted commands silently.
167a1eda741SJohn Baldwin 	 */
168a1eda741SJohn Baldwin 	if (cqe->status != 0) {
169a1eda741SJohn Baldwin 		if (!nvmf_cqe_aborted(cqe))
170a1eda741SJohn Baldwin 			device_printf(sc->dev, "Ignoring error %#x for AER\n",
171a1eda741SJohn Baldwin 			    le16toh(cqe->status));
172a1eda741SJohn Baldwin 		return;
173a1eda741SJohn Baldwin 	}
174a1eda741SJohn Baldwin 
175a1eda741SJohn Baldwin 	cdw0 = le32toh(cqe->cdw0);
176a1eda741SJohn Baldwin 	aer->log_page_id = NVMEV(NVME_ASYNC_EVENT_LOG_PAGE_ID, cdw0);
177a1eda741SJohn Baldwin 	aer->info = NVMEV(NVME_ASYNC_EVENT_INFO, cdw0);
178a1eda741SJohn Baldwin 	aer->type = NVMEV(NVME_ASYNC_EVENT_TYPE, cdw0);
179a1eda741SJohn Baldwin 
180a1eda741SJohn Baldwin 	device_printf(sc->dev, "AER type %u, info %#x, page %#x\n",
181a1eda741SJohn Baldwin 	    aer->type, aer->info, aer->log_page_id);
182a1eda741SJohn Baldwin 
183a1eda741SJohn Baldwin 	aer->page_len = nvmf_log_page_size(sc, aer->log_page_id);
184a1eda741SJohn Baldwin 	taskqueue_enqueue(taskqueue_thread, &aer->complete_task);
185a1eda741SJohn Baldwin }
186a1eda741SJohn Baldwin 
187a1eda741SJohn Baldwin static void
nvmf_complete_aer_task(void * arg,int pending)188a1eda741SJohn Baldwin nvmf_complete_aer_task(void *arg, int pending)
189a1eda741SJohn Baldwin {
190a1eda741SJohn Baldwin 	struct nvmf_aer *aer = arg;
191a1eda741SJohn Baldwin 	struct nvmf_softc *sc = aer->sc;
192a1eda741SJohn Baldwin 
193a1eda741SJohn Baldwin 	if (aer->page_len != 0) {
194a1eda741SJohn Baldwin 		/* Read the associated log page. */
195a1eda741SJohn Baldwin 		aer->page_len = MIN(aer->page_len, MAX_LOG_PAGE_SIZE);
196a1eda741SJohn Baldwin 		aer->pending = 2;
197a1eda741SJohn Baldwin 		(void) nvmf_cmd_get_log_page(sc, NVME_GLOBAL_NAMESPACE_TAG,
198a1eda741SJohn Baldwin 		    aer->log_page_id, 0, aer->page, aer->page_len,
199a1eda741SJohn Baldwin 		    nvmf_complete_aer_page, aer, nvmf_io_complete_aer_page,
200a1eda741SJohn Baldwin 		    aer, M_WAITOK);
201a1eda741SJohn Baldwin 	} else {
202a1eda741SJohn Baldwin 		/* Resubmit this AER command. */
203a1eda741SJohn Baldwin 		nvmf_submit_aer(sc, aer);
204a1eda741SJohn Baldwin 	}
205a1eda741SJohn Baldwin }
206a1eda741SJohn Baldwin 
207a1eda741SJohn Baldwin static int
nvmf_set_async_event_config(struct nvmf_softc * sc,uint32_t config)208a1eda741SJohn Baldwin nvmf_set_async_event_config(struct nvmf_softc *sc, uint32_t config)
209a1eda741SJohn Baldwin {
210a1eda741SJohn Baldwin 	struct nvme_command cmd;
211a1eda741SJohn Baldwin 	struct nvmf_completion_status status;
212a1eda741SJohn Baldwin 	struct nvmf_request *req;
213a1eda741SJohn Baldwin 
214a1eda741SJohn Baldwin 	memset(&cmd, 0, sizeof(cmd));
215a1eda741SJohn Baldwin 	cmd.opc = NVME_OPC_SET_FEATURES;
216a1eda741SJohn Baldwin 	cmd.cdw10 = htole32(NVME_FEAT_ASYNC_EVENT_CONFIGURATION);
217a1eda741SJohn Baldwin 	cmd.cdw11 = htole32(config);
218a1eda741SJohn Baldwin 
219a1eda741SJohn Baldwin 	nvmf_status_init(&status);
220a1eda741SJohn Baldwin 	req = nvmf_allocate_request(sc->admin, &cmd, nvmf_complete, &status,
221a1eda741SJohn Baldwin 	    M_WAITOK);
222a1eda741SJohn Baldwin 	if (req == NULL) {
223a1eda741SJohn Baldwin 		device_printf(sc->dev,
224a1eda741SJohn Baldwin 		    "failed to allocate SET_FEATURES (ASYNC_EVENT_CONFIGURATION) command\n");
225a1eda741SJohn Baldwin 		return (ECONNABORTED);
226a1eda741SJohn Baldwin 	}
227a1eda741SJohn Baldwin 	nvmf_submit_request(req);
228a1eda741SJohn Baldwin 	nvmf_wait_for_reply(&status);
229a1eda741SJohn Baldwin 
230a1eda741SJohn Baldwin 	if (status.cqe.status != 0) {
231a1eda741SJohn Baldwin 		device_printf(sc->dev,
232a1eda741SJohn Baldwin 		    "SET_FEATURES (ASYNC_EVENT_CONFIGURATION) failed, status %#x\n",
233a1eda741SJohn Baldwin 		    le16toh(status.cqe.status));
234a1eda741SJohn Baldwin 		return (EIO);
235a1eda741SJohn Baldwin 	}
236a1eda741SJohn Baldwin 
237a1eda741SJohn Baldwin 	return (0);
238a1eda741SJohn Baldwin }
239a1eda741SJohn Baldwin 
240a1eda741SJohn Baldwin void
nvmf_init_aer(struct nvmf_softc * sc)241a1eda741SJohn Baldwin nvmf_init_aer(struct nvmf_softc *sc)
242a1eda741SJohn Baldwin {
243a1eda741SJohn Baldwin 	/* 8 matches NVME_MAX_ASYNC_EVENTS */
244a1eda741SJohn Baldwin 	sc->num_aer = min(8, sc->cdata->aerl + 1);
245a1eda741SJohn Baldwin 	sc->aer = mallocarray(sc->num_aer, sizeof(*sc->aer), M_NVMF,
246a1eda741SJohn Baldwin 	    M_WAITOK | M_ZERO);
247a1eda741SJohn Baldwin 	for (u_int i = 0; i < sc->num_aer; i++) {
248a1eda741SJohn Baldwin 		sc->aer[i].sc = sc;
249a1eda741SJohn Baldwin 		sc->aer[i].page = malloc(MAX_LOG_PAGE_SIZE, M_NVMF, M_WAITOK);
250a1eda741SJohn Baldwin 		sc->aer[i].lock = mtx_pool_find(mtxpool_sleep, &sc->aer[i]);
251a1eda741SJohn Baldwin 		TASK_INIT(&sc->aer[i].complete_task, 0, nvmf_complete_aer_task,
252a1eda741SJohn Baldwin 		    &sc->aer[i]);
253a1eda741SJohn Baldwin 		TASK_INIT(&sc->aer[i].finish_page_task, 0,
254a1eda741SJohn Baldwin 		    nvmf_finish_aer_page_task, &sc->aer[i]);
255a1eda741SJohn Baldwin 	}
256a1eda741SJohn Baldwin }
257a1eda741SJohn Baldwin 
258a1eda741SJohn Baldwin int
nvmf_start_aer(struct nvmf_softc * sc)259a1eda741SJohn Baldwin nvmf_start_aer(struct nvmf_softc *sc)
260a1eda741SJohn Baldwin {
261a1eda741SJohn Baldwin 	uint32_t async_event_config;
262a1eda741SJohn Baldwin 	int error;
263a1eda741SJohn Baldwin 
264a1eda741SJohn Baldwin 	async_event_config = NVME_CRIT_WARN_ST_AVAILABLE_SPARE |
265a1eda741SJohn Baldwin 	    NVME_CRIT_WARN_ST_DEVICE_RELIABILITY |
266a1eda741SJohn Baldwin 	    NVME_CRIT_WARN_ST_READ_ONLY |
267a1eda741SJohn Baldwin 	    NVME_CRIT_WARN_ST_VOLATILE_MEMORY_BACKUP;
268a1eda741SJohn Baldwin 	if (sc->cdata->ver >= NVME_REV(1, 2))
269a1eda741SJohn Baldwin 		async_event_config |=
270a1eda741SJohn Baldwin 		    sc->cdata->oaes & NVME_ASYNC_EVENT_NS_ATTRIBUTE;
271a1eda741SJohn Baldwin 	error = nvmf_set_async_event_config(sc, async_event_config);
272a1eda741SJohn Baldwin 	if (error != 0)
273a1eda741SJohn Baldwin 		return (error);
274a1eda741SJohn Baldwin 
275a1eda741SJohn Baldwin 	for (u_int i = 0; i < sc->num_aer; i++)
276a1eda741SJohn Baldwin 		nvmf_submit_aer(sc, &sc->aer[i]);
277a1eda741SJohn Baldwin 
278a1eda741SJohn Baldwin 	return (0);
279a1eda741SJohn Baldwin }
280a1eda741SJohn Baldwin 
281a1eda741SJohn Baldwin void
nvmf_destroy_aer(struct nvmf_softc * sc)282a1eda741SJohn Baldwin nvmf_destroy_aer(struct nvmf_softc *sc)
283a1eda741SJohn Baldwin {
284a1eda741SJohn Baldwin 	for (u_int i = 0; i < sc->num_aer; i++) {
285a1eda741SJohn Baldwin 		taskqueue_drain(taskqueue_thread, &sc->aer[i].complete_task);
286a1eda741SJohn Baldwin 		taskqueue_drain(taskqueue_thread, &sc->aer[i].finish_page_task);
287a1eda741SJohn Baldwin 		free(sc->aer[i].page, M_NVMF);
288a1eda741SJohn Baldwin 	}
289a1eda741SJohn Baldwin 	free(sc->aer, M_NVMF);
290a1eda741SJohn Baldwin }
291