xref: /freebsd/usr.sbin/nvmfd/io.c (revision 399362bac312d4fa77a3fd918ea002c0782bc315)
1a8089ea5SJohn Baldwin /*-
2a8089ea5SJohn Baldwin  * SPDX-License-Identifier: BSD-2-Clause
3a8089ea5SJohn Baldwin  *
4a8089ea5SJohn Baldwin  * Copyright (c) 2023-2024 Chelsio Communications, Inc.
5a8089ea5SJohn Baldwin  * Written by: John Baldwin <jhb@FreeBSD.org>
6a8089ea5SJohn Baldwin  */
7a8089ea5SJohn Baldwin 
8a8089ea5SJohn Baldwin #include <sys/sysctl.h>
9a8089ea5SJohn Baldwin #include <err.h>
10a8089ea5SJohn Baldwin #include <errno.h>
11a8089ea5SJohn Baldwin #include <libnvmf.h>
12a8089ea5SJohn Baldwin #include <pthread.h>
13a8089ea5SJohn Baldwin #include <stdio.h>
14a8089ea5SJohn Baldwin #include <stdlib.h>
15a8089ea5SJohn Baldwin #include <string.h>
16a8089ea5SJohn Baldwin #include <unistd.h>
17a8089ea5SJohn Baldwin 
18a8089ea5SJohn Baldwin #include "internal.h"
19a8089ea5SJohn Baldwin 
20a8089ea5SJohn Baldwin struct io_controller {
21a8089ea5SJohn Baldwin 	struct controller *c;
22a8089ea5SJohn Baldwin 
23a8089ea5SJohn Baldwin 	u_int num_io_queues;
24a8089ea5SJohn Baldwin 	u_int active_io_queues;
25a8089ea5SJohn Baldwin 	struct nvmf_qpair **io_qpairs;
26a8089ea5SJohn Baldwin 	int *io_sockets;
27a8089ea5SJohn Baldwin 
28a8089ea5SJohn Baldwin 	struct nvme_firmware_page fp;
29a8089ea5SJohn Baldwin 	struct nvme_health_information_page hip;
30a8089ea5SJohn Baldwin 	uint16_t partial_dur;
31a8089ea5SJohn Baldwin 	uint16_t partial_duw;
32a8089ea5SJohn Baldwin 
33a8089ea5SJohn Baldwin 	uint16_t cntlid;
34a8089ea5SJohn Baldwin 	char hostid[16];
35a8089ea5SJohn Baldwin 	char hostnqn[NVME_NQN_FIELD_SIZE];
36a8089ea5SJohn Baldwin };
37a8089ea5SJohn Baldwin 
38a8089ea5SJohn Baldwin static struct nvmf_association *io_na;
39a8089ea5SJohn Baldwin static pthread_cond_t io_cond;
40a8089ea5SJohn Baldwin static pthread_mutex_t io_na_mutex;
41a8089ea5SJohn Baldwin static struct io_controller *io_controller;
42a8089ea5SJohn Baldwin static const char *nqn;
43a8089ea5SJohn Baldwin static char serial[NVME_SERIAL_NUMBER_LENGTH];
44a8089ea5SJohn Baldwin 
45a8089ea5SJohn Baldwin void
46a8089ea5SJohn Baldwin init_io(const char *subnqn)
47a8089ea5SJohn Baldwin {
48a8089ea5SJohn Baldwin 	struct nvmf_association_params aparams;
49a8089ea5SJohn Baldwin 	u_long hostid;
50a8089ea5SJohn Baldwin 	size_t len;
51a8089ea5SJohn Baldwin 
52a8089ea5SJohn Baldwin 	memset(&aparams, 0, sizeof(aparams));
53a8089ea5SJohn Baldwin 	aparams.sq_flow_control = !flow_control_disable;
54a8089ea5SJohn Baldwin 	aparams.dynamic_controller_model = true;
55a8089ea5SJohn Baldwin 	aparams.max_admin_qsize = NVME_MAX_ADMIN_ENTRIES;
56a8089ea5SJohn Baldwin 	aparams.max_io_qsize = NVMF_MAX_IO_ENTRIES;
57a8089ea5SJohn Baldwin 	aparams.tcp.pda = 0;
58a8089ea5SJohn Baldwin 	aparams.tcp.header_digests = header_digests;
59a8089ea5SJohn Baldwin 	aparams.tcp.data_digests = data_digests;
60*399362baSJohn Baldwin 	aparams.tcp.maxh2cdata = maxh2cdata;
61a8089ea5SJohn Baldwin 	io_na = nvmf_allocate_association(NVMF_TRTYPE_TCP, true,
62a8089ea5SJohn Baldwin 	    &aparams);
63a8089ea5SJohn Baldwin 	if (io_na == NULL)
64a8089ea5SJohn Baldwin 		err(1, "Failed to create I/O controller association");
65a8089ea5SJohn Baldwin 
66a8089ea5SJohn Baldwin 	nqn = subnqn;
67a8089ea5SJohn Baldwin 
68a8089ea5SJohn Baldwin 	/* Generate a serial number from the kern.hostid node. */
69a8089ea5SJohn Baldwin 	len = sizeof(hostid);
70a8089ea5SJohn Baldwin 	if (sysctlbyname("kern.hostid", &hostid, &len, NULL, 0) == -1)
71a8089ea5SJohn Baldwin 		err(1, "sysctl: kern.hostid");
72a8089ea5SJohn Baldwin 
73a8089ea5SJohn Baldwin 	nvmf_controller_serial(serial, sizeof(serial), hostid);
74a8089ea5SJohn Baldwin 
75a8089ea5SJohn Baldwin 	pthread_cond_init(&io_cond, NULL);
76a8089ea5SJohn Baldwin 	pthread_mutex_init(&io_na_mutex, NULL);
77a8089ea5SJohn Baldwin 
78a8089ea5SJohn Baldwin 	if (kernel_io)
79a8089ea5SJohn Baldwin 		init_ctl_port(subnqn, &aparams);
80a8089ea5SJohn Baldwin }
81a8089ea5SJohn Baldwin 
82a8089ea5SJohn Baldwin void
83a8089ea5SJohn Baldwin shutdown_io(void)
84a8089ea5SJohn Baldwin {
85a8089ea5SJohn Baldwin 	if (kernel_io)
86a8089ea5SJohn Baldwin 		shutdown_ctl_port(nqn);
87a8089ea5SJohn Baldwin }
88a8089ea5SJohn Baldwin 
89a8089ea5SJohn Baldwin static void
90a8089ea5SJohn Baldwin handle_get_log_page(struct io_controller *ioc, const struct nvmf_capsule *nc,
91a8089ea5SJohn Baldwin     const struct nvme_command *cmd)
92a8089ea5SJohn Baldwin {
93a8089ea5SJohn Baldwin 	uint64_t offset;
94a8089ea5SJohn Baldwin 	uint32_t numd;
95a8089ea5SJohn Baldwin 	size_t len;
96a8089ea5SJohn Baldwin 	uint8_t lid;
97a8089ea5SJohn Baldwin 
98a8089ea5SJohn Baldwin 	lid = le32toh(cmd->cdw10) & 0xff;
99a8089ea5SJohn Baldwin 	numd = le32toh(cmd->cdw10) >> 16 | le32toh(cmd->cdw11) << 16;
100a8089ea5SJohn Baldwin 	offset = le32toh(cmd->cdw12) | (uint64_t)le32toh(cmd->cdw13) << 32;
101a8089ea5SJohn Baldwin 
102a8089ea5SJohn Baldwin 	if (offset % 3 != 0)
103a8089ea5SJohn Baldwin 		goto error;
104a8089ea5SJohn Baldwin 
105a8089ea5SJohn Baldwin 	len = (numd + 1) * 4;
106a8089ea5SJohn Baldwin 
107a8089ea5SJohn Baldwin 	switch (lid) {
108a8089ea5SJohn Baldwin 	case NVME_LOG_ERROR:
109a8089ea5SJohn Baldwin 	{
110a8089ea5SJohn Baldwin 		void *buf;
111a8089ea5SJohn Baldwin 
112a8089ea5SJohn Baldwin 		if (len % sizeof(struct nvme_error_information_entry) != 0)
113a8089ea5SJohn Baldwin 			goto error;
114a8089ea5SJohn Baldwin 
115a8089ea5SJohn Baldwin 		buf = calloc(1, len);
116a8089ea5SJohn Baldwin 		nvmf_send_controller_data(nc, buf, len);
117a8089ea5SJohn Baldwin 		free(buf);
118a8089ea5SJohn Baldwin 		return;
119a8089ea5SJohn Baldwin 	}
120a8089ea5SJohn Baldwin 	case NVME_LOG_HEALTH_INFORMATION:
121a8089ea5SJohn Baldwin 		if (len != sizeof(ioc->hip))
122a8089ea5SJohn Baldwin 			goto error;
123a8089ea5SJohn Baldwin 
124a8089ea5SJohn Baldwin 		nvmf_send_controller_data(nc, &ioc->hip, sizeof(ioc->hip));
125a8089ea5SJohn Baldwin 		return;
126a8089ea5SJohn Baldwin 	case NVME_LOG_FIRMWARE_SLOT:
127a8089ea5SJohn Baldwin 		if (len != sizeof(ioc->fp))
128a8089ea5SJohn Baldwin 			goto error;
129a8089ea5SJohn Baldwin 
130a8089ea5SJohn Baldwin 		nvmf_send_controller_data(nc, &ioc->fp, sizeof(ioc->fp));
131a8089ea5SJohn Baldwin 		return;
132a8089ea5SJohn Baldwin 	default:
133a8089ea5SJohn Baldwin 		warnx("Unsupported page %#x for GET_LOG_PAGE\n", lid);
134a8089ea5SJohn Baldwin 		goto error;
135a8089ea5SJohn Baldwin 	}
136a8089ea5SJohn Baldwin 
137a8089ea5SJohn Baldwin error:
138a8089ea5SJohn Baldwin 	nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD);
139a8089ea5SJohn Baldwin }
140a8089ea5SJohn Baldwin 
141a8089ea5SJohn Baldwin static bool
142a8089ea5SJohn Baldwin handle_io_identify_command(const struct nvmf_capsule *nc,
143a8089ea5SJohn Baldwin     const struct nvme_command *cmd)
144a8089ea5SJohn Baldwin {
145a8089ea5SJohn Baldwin 	struct nvme_namespace_data nsdata;
146a8089ea5SJohn Baldwin 	struct nvme_ns_list nslist;
147a8089ea5SJohn Baldwin 	uint32_t nsid;
148a8089ea5SJohn Baldwin 	uint8_t cns;
149a8089ea5SJohn Baldwin 
150a8089ea5SJohn Baldwin 	cns = le32toh(cmd->cdw10) & 0xFF;
151a8089ea5SJohn Baldwin 	switch (cns) {
152a8089ea5SJohn Baldwin 	case 0:	/* Namespace data. */
153a8089ea5SJohn Baldwin 		if (!device_namespace_data(le32toh(cmd->nsid), &nsdata)) {
154a8089ea5SJohn Baldwin 			nvmf_send_generic_error(nc,
155a8089ea5SJohn Baldwin 			    NVME_SC_INVALID_NAMESPACE_OR_FORMAT);
156a8089ea5SJohn Baldwin 			return (true);
157a8089ea5SJohn Baldwin 		}
158a8089ea5SJohn Baldwin 
159a8089ea5SJohn Baldwin 		nvmf_send_controller_data(nc, &nsdata, sizeof(nsdata));
160a8089ea5SJohn Baldwin 		return (true);
161a8089ea5SJohn Baldwin 	case 2:	/* Active namespace list. */
162a8089ea5SJohn Baldwin 		nsid = le32toh(cmd->nsid);
163a8089ea5SJohn Baldwin 		if (nsid >= 0xfffffffe) {
164a8089ea5SJohn Baldwin 			nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD);
165a8089ea5SJohn Baldwin 			return (true);
166a8089ea5SJohn Baldwin 		}
167a8089ea5SJohn Baldwin 
168a8089ea5SJohn Baldwin 		device_active_nslist(nsid, &nslist);
169a8089ea5SJohn Baldwin 		nvmf_send_controller_data(nc, &nslist, sizeof(nslist));
170a8089ea5SJohn Baldwin 		return (true);
171a8089ea5SJohn Baldwin 	case 3:	/* Namespace Identification Descriptor list. */
172a8089ea5SJohn Baldwin 		if (!device_identification_descriptor(le32toh(cmd->nsid),
173a8089ea5SJohn Baldwin 		    &nsdata)) {
174a8089ea5SJohn Baldwin 			nvmf_send_generic_error(nc,
175a8089ea5SJohn Baldwin 			    NVME_SC_INVALID_NAMESPACE_OR_FORMAT);
176a8089ea5SJohn Baldwin 			return (true);
177a8089ea5SJohn Baldwin 		}
178a8089ea5SJohn Baldwin 
179a8089ea5SJohn Baldwin 		nvmf_send_controller_data(nc, &nsdata, sizeof(nsdata));
180a8089ea5SJohn Baldwin 		return (true);
181a8089ea5SJohn Baldwin 	default:
182a8089ea5SJohn Baldwin 		return (false);
183a8089ea5SJohn Baldwin 	}
184a8089ea5SJohn Baldwin }
185a8089ea5SJohn Baldwin 
186a8089ea5SJohn Baldwin static void
187a8089ea5SJohn Baldwin handle_set_features(struct io_controller *ioc, const struct nvmf_capsule *nc,
188a8089ea5SJohn Baldwin     const struct nvme_command *cmd)
189a8089ea5SJohn Baldwin {
190a8089ea5SJohn Baldwin 	struct nvme_completion cqe;
191a8089ea5SJohn Baldwin 	uint8_t fid;
192a8089ea5SJohn Baldwin 
193a8089ea5SJohn Baldwin 	fid = NVMEV(NVME_FEAT_SET_FID, le32toh(cmd->cdw10));
194a8089ea5SJohn Baldwin 	switch (fid) {
195a8089ea5SJohn Baldwin 	case NVME_FEAT_NUMBER_OF_QUEUES:
196a8089ea5SJohn Baldwin 	{
197a8089ea5SJohn Baldwin 		uint32_t num_queues;
198a8089ea5SJohn Baldwin 
199a8089ea5SJohn Baldwin 		if (ioc->num_io_queues != 0) {
200a8089ea5SJohn Baldwin 			nvmf_send_generic_error(nc,
201a8089ea5SJohn Baldwin 			    NVME_SC_COMMAND_SEQUENCE_ERROR);
202a8089ea5SJohn Baldwin 			return;
203a8089ea5SJohn Baldwin 		}
204a8089ea5SJohn Baldwin 
205a8089ea5SJohn Baldwin 		num_queues = le32toh(cmd->cdw11) & 0xffff;
206a8089ea5SJohn Baldwin 
207a8089ea5SJohn Baldwin 		/* 5.12.1.7: 65535 is invalid. */
208a8089ea5SJohn Baldwin 		if (num_queues == 65535)
209a8089ea5SJohn Baldwin 			goto error;
210a8089ea5SJohn Baldwin 
211a8089ea5SJohn Baldwin 		/* Fabrics requires the same number of SQs and CQs. */
212a8089ea5SJohn Baldwin 		if (le32toh(cmd->cdw11) >> 16 != num_queues)
213a8089ea5SJohn Baldwin 			goto error;
214a8089ea5SJohn Baldwin 
215a8089ea5SJohn Baldwin 		/* Convert to 1's based */
216a8089ea5SJohn Baldwin 		num_queues++;
217a8089ea5SJohn Baldwin 
218a8089ea5SJohn Baldwin 		/* Lock to synchronize with handle_io_qpair. */
219a8089ea5SJohn Baldwin 		pthread_mutex_lock(&io_na_mutex);
220a8089ea5SJohn Baldwin 		ioc->num_io_queues = num_queues;
221a8089ea5SJohn Baldwin 		ioc->io_qpairs = calloc(num_queues, sizeof(*ioc->io_qpairs));
222a8089ea5SJohn Baldwin 		ioc->io_sockets = calloc(num_queues, sizeof(*ioc->io_sockets));
223a8089ea5SJohn Baldwin 		pthread_mutex_unlock(&io_na_mutex);
224a8089ea5SJohn Baldwin 
225a8089ea5SJohn Baldwin 		nvmf_init_cqe(&cqe, nc, 0);
226a8089ea5SJohn Baldwin 		cqe.cdw0 = cmd->cdw11;
227a8089ea5SJohn Baldwin 		nvmf_send_response(nc, &cqe);
228a8089ea5SJohn Baldwin 		return;
229a8089ea5SJohn Baldwin 	}
230a8089ea5SJohn Baldwin 	case NVME_FEAT_ASYNC_EVENT_CONFIGURATION:
231a8089ea5SJohn Baldwin 	{
232a8089ea5SJohn Baldwin 		uint32_t aer_mask;
233a8089ea5SJohn Baldwin 
234a8089ea5SJohn Baldwin 		aer_mask = le32toh(cmd->cdw11);
235a8089ea5SJohn Baldwin 
236a8089ea5SJohn Baldwin 		/* Check for any reserved or unimplemented feature bits. */
237a8089ea5SJohn Baldwin 		if ((aer_mask & 0xffffc000) != 0)
238a8089ea5SJohn Baldwin 			goto error;
239a8089ea5SJohn Baldwin 
240a8089ea5SJohn Baldwin 		/* No AERs are generated by this daemon. */
241a8089ea5SJohn Baldwin 		nvmf_send_success(nc);
242a8089ea5SJohn Baldwin 		return;
243a8089ea5SJohn Baldwin 	}
244a8089ea5SJohn Baldwin 	default:
245a8089ea5SJohn Baldwin 		warnx("Unsupported feature ID %u for SET_FEATURES", fid);
246a8089ea5SJohn Baldwin 		goto error;
247a8089ea5SJohn Baldwin 	}
248a8089ea5SJohn Baldwin 
249a8089ea5SJohn Baldwin error:
250a8089ea5SJohn Baldwin 	nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD);
251a8089ea5SJohn Baldwin }
252a8089ea5SJohn Baldwin 
253a8089ea5SJohn Baldwin static bool
254a8089ea5SJohn Baldwin admin_command(const struct nvmf_capsule *nc, const struct nvme_command *cmd,
255a8089ea5SJohn Baldwin     void *arg)
256a8089ea5SJohn Baldwin {
257a8089ea5SJohn Baldwin 	struct io_controller *ioc = arg;
258a8089ea5SJohn Baldwin 
259a8089ea5SJohn Baldwin 	switch (cmd->opc) {
260a8089ea5SJohn Baldwin 	case NVME_OPC_GET_LOG_PAGE:
261a8089ea5SJohn Baldwin 		handle_get_log_page(ioc, nc, cmd);
262a8089ea5SJohn Baldwin 		return (true);
263a8089ea5SJohn Baldwin 	case NVME_OPC_IDENTIFY:
264a8089ea5SJohn Baldwin 		return (handle_io_identify_command(nc, cmd));
265a8089ea5SJohn Baldwin 	case NVME_OPC_SET_FEATURES:
266a8089ea5SJohn Baldwin 		handle_set_features(ioc, nc, cmd);
267a8089ea5SJohn Baldwin 		return (true);
268a8089ea5SJohn Baldwin 	case NVME_OPC_ASYNC_EVENT_REQUEST:
269a8089ea5SJohn Baldwin 		/* Ignore and never complete. */
270a8089ea5SJohn Baldwin 		return (true);
271a8089ea5SJohn Baldwin 	case NVME_OPC_KEEP_ALIVE:
272a8089ea5SJohn Baldwin 		nvmf_send_success(nc);
273a8089ea5SJohn Baldwin 		return (true);
274a8089ea5SJohn Baldwin 	default:
275a8089ea5SJohn Baldwin 		return (false);
276a8089ea5SJohn Baldwin 	}
277a8089ea5SJohn Baldwin }
278a8089ea5SJohn Baldwin 
279a8089ea5SJohn Baldwin static void
280a8089ea5SJohn Baldwin handle_admin_qpair(struct io_controller *ioc)
281a8089ea5SJohn Baldwin {
282a8089ea5SJohn Baldwin 	pthread_setname_np(pthread_self(), "admin queue");
283a8089ea5SJohn Baldwin 
284a8089ea5SJohn Baldwin 	controller_handle_admin_commands(ioc->c, admin_command, ioc);
285a8089ea5SJohn Baldwin 
286a8089ea5SJohn Baldwin 	pthread_mutex_lock(&io_na_mutex);
287a8089ea5SJohn Baldwin 	for (u_int i = 0; i < ioc->num_io_queues; i++) {
288a8089ea5SJohn Baldwin 		if (ioc->io_qpairs[i] == NULL || ioc->io_sockets[i] == -1)
289a8089ea5SJohn Baldwin 			continue;
290a8089ea5SJohn Baldwin 		close(ioc->io_sockets[i]);
291a8089ea5SJohn Baldwin 		ioc->io_sockets[i] = -1;
292a8089ea5SJohn Baldwin 	}
293a8089ea5SJohn Baldwin 
294a8089ea5SJohn Baldwin 	/* Wait for I/O threads to notice. */
295a8089ea5SJohn Baldwin 	while (ioc->active_io_queues > 0)
296a8089ea5SJohn Baldwin 		pthread_cond_wait(&io_cond, &io_na_mutex);
297a8089ea5SJohn Baldwin 
298a8089ea5SJohn Baldwin 	io_controller = NULL;
299a8089ea5SJohn Baldwin 	pthread_mutex_unlock(&io_na_mutex);
300a8089ea5SJohn Baldwin 
301a8089ea5SJohn Baldwin 	free_controller(ioc->c);
302a8089ea5SJohn Baldwin 
303a8089ea5SJohn Baldwin 	free(ioc);
304a8089ea5SJohn Baldwin }
305a8089ea5SJohn Baldwin 
306a8089ea5SJohn Baldwin static bool
307a8089ea5SJohn Baldwin handle_io_fabrics_command(const struct nvmf_capsule *nc,
308a8089ea5SJohn Baldwin     const struct nvmf_fabric_cmd *fc)
309a8089ea5SJohn Baldwin {
310a8089ea5SJohn Baldwin 	switch (fc->fctype) {
311a8089ea5SJohn Baldwin 	case NVMF_FABRIC_COMMAND_CONNECT:
312a8089ea5SJohn Baldwin 		warnx("CONNECT command on connected queue");
313a8089ea5SJohn Baldwin 		nvmf_send_generic_error(nc, NVME_SC_COMMAND_SEQUENCE_ERROR);
314a8089ea5SJohn Baldwin 		break;
315a8089ea5SJohn Baldwin 	case NVMF_FABRIC_COMMAND_DISCONNECT:
316a8089ea5SJohn Baldwin 	{
317a8089ea5SJohn Baldwin 		const struct nvmf_fabric_disconnect_cmd *dis =
318a8089ea5SJohn Baldwin 		    (const struct nvmf_fabric_disconnect_cmd *)fc;
319a8089ea5SJohn Baldwin 		if (dis->recfmt != htole16(0)) {
320a8089ea5SJohn Baldwin 			nvmf_send_error(nc, NVME_SCT_COMMAND_SPECIFIC,
321a8089ea5SJohn Baldwin 			    NVMF_FABRIC_SC_INCOMPATIBLE_FORMAT);
322a8089ea5SJohn Baldwin 			break;
323a8089ea5SJohn Baldwin 		}
324a8089ea5SJohn Baldwin 		nvmf_send_success(nc);
325a8089ea5SJohn Baldwin 		return (true);
326a8089ea5SJohn Baldwin 	}
327a8089ea5SJohn Baldwin 	default:
328a8089ea5SJohn Baldwin 		warnx("Unsupported fabrics command %#x", fc->fctype);
329a8089ea5SJohn Baldwin 		nvmf_send_generic_error(nc, NVME_SC_INVALID_OPCODE);
330a8089ea5SJohn Baldwin 		break;
331a8089ea5SJohn Baldwin 	}
332a8089ea5SJohn Baldwin 
333a8089ea5SJohn Baldwin 	return (false);
334a8089ea5SJohn Baldwin }
335a8089ea5SJohn Baldwin 
336a8089ea5SJohn Baldwin static void
337a8089ea5SJohn Baldwin hip_add(uint64_t pair[2], uint64_t addend)
338a8089ea5SJohn Baldwin {
339a8089ea5SJohn Baldwin 	uint64_t old, new;
340a8089ea5SJohn Baldwin 
341a8089ea5SJohn Baldwin 	old = le64toh(pair[0]);
342a8089ea5SJohn Baldwin 	new = old + addend;
343a8089ea5SJohn Baldwin 	pair[0] = htole64(new);
344a8089ea5SJohn Baldwin 	if (new < old)
345a8089ea5SJohn Baldwin 		pair[1] += htole64(1);
346a8089ea5SJohn Baldwin }
347a8089ea5SJohn Baldwin 
348a8089ea5SJohn Baldwin static uint64_t
349a8089ea5SJohn Baldwin cmd_lba(const struct nvme_command *cmd)
350a8089ea5SJohn Baldwin {
351a8089ea5SJohn Baldwin 	return ((uint64_t)le32toh(cmd->cdw11) << 32 | le32toh(cmd->cdw10));
352a8089ea5SJohn Baldwin }
353a8089ea5SJohn Baldwin 
354a8089ea5SJohn Baldwin static u_int
355a8089ea5SJohn Baldwin cmd_nlb(const struct nvme_command *cmd)
356a8089ea5SJohn Baldwin {
357a8089ea5SJohn Baldwin 	return ((le32toh(cmd->cdw12) & 0xffff) + 1);
358a8089ea5SJohn Baldwin }
359a8089ea5SJohn Baldwin 
360a8089ea5SJohn Baldwin static void
361a8089ea5SJohn Baldwin handle_read(struct io_controller *ioc, const struct nvmf_capsule *nc,
362a8089ea5SJohn Baldwin     const struct nvme_command *cmd)
363a8089ea5SJohn Baldwin {
364a8089ea5SJohn Baldwin 	size_t len;
365a8089ea5SJohn Baldwin 
366a8089ea5SJohn Baldwin 	len = nvmf_capsule_data_len(nc);
367a8089ea5SJohn Baldwin 	device_read(le32toh(cmd->nsid), cmd_lba(cmd), cmd_nlb(cmd), nc);
368a8089ea5SJohn Baldwin 	hip_add(ioc->hip.host_read_commands, 1);
369a8089ea5SJohn Baldwin 
370a8089ea5SJohn Baldwin 	len /= 512;
371a8089ea5SJohn Baldwin 	len += ioc->partial_dur;
372a8089ea5SJohn Baldwin 	if (len > 1000)
373a8089ea5SJohn Baldwin 		hip_add(ioc->hip.data_units_read, len / 1000);
374a8089ea5SJohn Baldwin 	ioc->partial_dur = len % 1000;
375a8089ea5SJohn Baldwin }
376a8089ea5SJohn Baldwin 
377a8089ea5SJohn Baldwin static void
378a8089ea5SJohn Baldwin handle_write(struct io_controller *ioc, const struct nvmf_capsule *nc,
379a8089ea5SJohn Baldwin     const struct nvme_command *cmd)
380a8089ea5SJohn Baldwin {
381a8089ea5SJohn Baldwin 	size_t len;
382a8089ea5SJohn Baldwin 
383a8089ea5SJohn Baldwin 	len = nvmf_capsule_data_len(nc);
384a8089ea5SJohn Baldwin 	device_write(le32toh(cmd->nsid), cmd_lba(cmd), cmd_nlb(cmd), nc);
385a8089ea5SJohn Baldwin 	hip_add(ioc->hip.host_write_commands, 1);
386a8089ea5SJohn Baldwin 
387a8089ea5SJohn Baldwin 	len /= 512;
388a8089ea5SJohn Baldwin 	len += ioc->partial_duw;
389a8089ea5SJohn Baldwin 	if (len > 1000)
390a8089ea5SJohn Baldwin 		hip_add(ioc->hip.data_units_written, len / 1000);
391a8089ea5SJohn Baldwin 	ioc->partial_duw = len % 1000;
392a8089ea5SJohn Baldwin }
393a8089ea5SJohn Baldwin 
394a8089ea5SJohn Baldwin static void
395a8089ea5SJohn Baldwin handle_flush(const struct nvmf_capsule *nc, const struct nvme_command *cmd)
396a8089ea5SJohn Baldwin {
397a8089ea5SJohn Baldwin 	device_flush(le32toh(cmd->nsid), nc);
398a8089ea5SJohn Baldwin }
399a8089ea5SJohn Baldwin 
400a8089ea5SJohn Baldwin static bool
401a8089ea5SJohn Baldwin handle_io_commands(struct io_controller *ioc, struct nvmf_qpair *qp)
402a8089ea5SJohn Baldwin {
403a8089ea5SJohn Baldwin 	const struct nvme_command *cmd;
404a8089ea5SJohn Baldwin 	struct nvmf_capsule *nc;
405a8089ea5SJohn Baldwin 	int error;
406a8089ea5SJohn Baldwin 	bool disconnect;
407a8089ea5SJohn Baldwin 
408a8089ea5SJohn Baldwin 	disconnect = false;
409a8089ea5SJohn Baldwin 
410a8089ea5SJohn Baldwin 	while (!disconnect) {
411a8089ea5SJohn Baldwin 		error = nvmf_controller_receive_capsule(qp, &nc);
412a8089ea5SJohn Baldwin 		if (error != 0) {
413a8089ea5SJohn Baldwin 			if (error != ECONNRESET)
414a8089ea5SJohn Baldwin 				warnc(error, "Failed to read command capsule");
415a8089ea5SJohn Baldwin 			break;
416a8089ea5SJohn Baldwin 		}
417a8089ea5SJohn Baldwin 
418a8089ea5SJohn Baldwin 		cmd = nvmf_capsule_sqe(nc);
419a8089ea5SJohn Baldwin 
420a8089ea5SJohn Baldwin 		switch (cmd->opc) {
421a8089ea5SJohn Baldwin 		case NVME_OPC_FLUSH:
422a8089ea5SJohn Baldwin 			if (cmd->nsid == htole32(0xffffffff)) {
423a8089ea5SJohn Baldwin 				nvmf_send_generic_error(nc,
424a8089ea5SJohn Baldwin 				    NVME_SC_INVALID_NAMESPACE_OR_FORMAT);
425a8089ea5SJohn Baldwin 				break;
426a8089ea5SJohn Baldwin 			}
427a8089ea5SJohn Baldwin 			handle_flush(nc, cmd);
428a8089ea5SJohn Baldwin 			break;
429a8089ea5SJohn Baldwin 		case NVME_OPC_WRITE:
430a8089ea5SJohn Baldwin 			handle_write(ioc, nc, cmd);
431a8089ea5SJohn Baldwin 			break;
432a8089ea5SJohn Baldwin 		case NVME_OPC_READ:
433a8089ea5SJohn Baldwin 			handle_read(ioc, nc, cmd);
434a8089ea5SJohn Baldwin 			break;
435a8089ea5SJohn Baldwin 		case NVME_OPC_FABRICS_COMMANDS:
436a8089ea5SJohn Baldwin 			disconnect = handle_io_fabrics_command(nc,
437a8089ea5SJohn Baldwin 			    (const struct nvmf_fabric_cmd *)cmd);
438a8089ea5SJohn Baldwin 			break;
439a8089ea5SJohn Baldwin 		default:
440a8089ea5SJohn Baldwin 			warnx("Unsupported NVM opcode %#x", cmd->opc);
441a8089ea5SJohn Baldwin 			nvmf_send_generic_error(nc, NVME_SC_INVALID_OPCODE);
442a8089ea5SJohn Baldwin 			break;
443a8089ea5SJohn Baldwin 		}
444a8089ea5SJohn Baldwin 		nvmf_free_capsule(nc);
445a8089ea5SJohn Baldwin 	}
446a8089ea5SJohn Baldwin 
447a8089ea5SJohn Baldwin 	return (disconnect);
448a8089ea5SJohn Baldwin }
449a8089ea5SJohn Baldwin 
450a8089ea5SJohn Baldwin static void
451a8089ea5SJohn Baldwin handle_io_qpair(struct io_controller *ioc, struct nvmf_qpair *qp, int qid)
452a8089ea5SJohn Baldwin {
453a8089ea5SJohn Baldwin 	char name[64];
454a8089ea5SJohn Baldwin 	bool disconnect;
455a8089ea5SJohn Baldwin 
456a8089ea5SJohn Baldwin 	snprintf(name, sizeof(name), "I/O queue %d", qid);
457a8089ea5SJohn Baldwin 	pthread_setname_np(pthread_self(), name);
458a8089ea5SJohn Baldwin 
459a8089ea5SJohn Baldwin 	disconnect = handle_io_commands(ioc, qp);
460a8089ea5SJohn Baldwin 
461a8089ea5SJohn Baldwin 	pthread_mutex_lock(&io_na_mutex);
462a8089ea5SJohn Baldwin 	if (disconnect)
463a8089ea5SJohn Baldwin 		ioc->io_qpairs[qid - 1] = NULL;
464a8089ea5SJohn Baldwin 	if (ioc->io_sockets[qid - 1] != -1) {
465a8089ea5SJohn Baldwin 		close(ioc->io_sockets[qid - 1]);
466a8089ea5SJohn Baldwin 		ioc->io_sockets[qid - 1] = -1;
467a8089ea5SJohn Baldwin 	}
468a8089ea5SJohn Baldwin 	ioc->active_io_queues--;
469a8089ea5SJohn Baldwin 	if (ioc->active_io_queues == 0)
470a8089ea5SJohn Baldwin 		pthread_cond_broadcast(&io_cond);
471a8089ea5SJohn Baldwin 	pthread_mutex_unlock(&io_na_mutex);
472a8089ea5SJohn Baldwin }
473a8089ea5SJohn Baldwin 
474a8089ea5SJohn Baldwin static void
475a8089ea5SJohn Baldwin connect_admin_qpair(int s, struct nvmf_qpair *qp, struct nvmf_capsule *nc,
476a8089ea5SJohn Baldwin     const struct nvmf_fabric_connect_data *data)
477a8089ea5SJohn Baldwin {
478a8089ea5SJohn Baldwin 	struct nvme_controller_data cdata;
479a8089ea5SJohn Baldwin 	struct io_controller *ioc;
480a8089ea5SJohn Baldwin 	int error;
481a8089ea5SJohn Baldwin 
482a8089ea5SJohn Baldwin 	/* Can only have one active I/O controller at a time. */
483a8089ea5SJohn Baldwin 	pthread_mutex_lock(&io_na_mutex);
484a8089ea5SJohn Baldwin 	if (io_controller != NULL) {
485a8089ea5SJohn Baldwin 		pthread_mutex_unlock(&io_na_mutex);
486a8089ea5SJohn Baldwin 		nvmf_send_error(nc, NVME_SCT_COMMAND_SPECIFIC,
487a8089ea5SJohn Baldwin 		    NVMF_FABRIC_SC_CONTROLLER_BUSY);
488a8089ea5SJohn Baldwin 		goto error;
489a8089ea5SJohn Baldwin 	}
490a8089ea5SJohn Baldwin 
491a8089ea5SJohn Baldwin 	error = nvmf_finish_accept(nc, 2);
492a8089ea5SJohn Baldwin 	if (error != 0) {
493a8089ea5SJohn Baldwin 		pthread_mutex_unlock(&io_na_mutex);
494a8089ea5SJohn Baldwin 		warnc(error, "Failed to send CONNECT response");
495a8089ea5SJohn Baldwin 		goto error;
496a8089ea5SJohn Baldwin 	}
497a8089ea5SJohn Baldwin 
498a8089ea5SJohn Baldwin 	ioc = calloc(1, sizeof(*ioc));
499a8089ea5SJohn Baldwin 	ioc->cntlid = 2;
500a8089ea5SJohn Baldwin 	memcpy(ioc->hostid, data->hostid, sizeof(ioc->hostid));
501a8089ea5SJohn Baldwin 	memcpy(ioc->hostnqn, data->hostnqn, sizeof(ioc->hostnqn));
502a8089ea5SJohn Baldwin 
503a8089ea5SJohn Baldwin 	nvmf_init_io_controller_data(qp, serial, nqn, device_count(),
504a8089ea5SJohn Baldwin 	    NVMF_IOCCSZ, &cdata);
505a8089ea5SJohn Baldwin 
506a8089ea5SJohn Baldwin 	ioc->fp.afi = NVMEF(NVME_FIRMWARE_PAGE_AFI_SLOT, 1);
507a8089ea5SJohn Baldwin 	memcpy(ioc->fp.revision[0], cdata.fr, sizeof(cdata.fr));
508a8089ea5SJohn Baldwin 
509a8089ea5SJohn Baldwin 	ioc->hip.power_cycles[0] = 1;
510a8089ea5SJohn Baldwin 
511a8089ea5SJohn Baldwin 	ioc->c = init_controller(qp, &cdata);
512a8089ea5SJohn Baldwin 
513a8089ea5SJohn Baldwin 	io_controller = ioc;
514a8089ea5SJohn Baldwin 	pthread_mutex_unlock(&io_na_mutex);
515a8089ea5SJohn Baldwin 
516a8089ea5SJohn Baldwin 	nvmf_free_capsule(nc);
517a8089ea5SJohn Baldwin 
518a8089ea5SJohn Baldwin 	handle_admin_qpair(ioc);
519a8089ea5SJohn Baldwin 	close(s);
520a8089ea5SJohn Baldwin 	return;
521a8089ea5SJohn Baldwin 
522a8089ea5SJohn Baldwin error:
523a8089ea5SJohn Baldwin 	nvmf_free_capsule(nc);
524a8089ea5SJohn Baldwin 	close(s);
525a8089ea5SJohn Baldwin }
526a8089ea5SJohn Baldwin 
527a8089ea5SJohn Baldwin static void
528a8089ea5SJohn Baldwin connect_io_qpair(int s, struct nvmf_qpair *qp, struct nvmf_capsule *nc,
529a8089ea5SJohn Baldwin     const struct nvmf_fabric_connect_data *data, uint16_t qid)
530a8089ea5SJohn Baldwin {
531a8089ea5SJohn Baldwin 	struct io_controller *ioc;
532a8089ea5SJohn Baldwin 	int error;
533a8089ea5SJohn Baldwin 
534a8089ea5SJohn Baldwin 	pthread_mutex_lock(&io_na_mutex);
535a8089ea5SJohn Baldwin 	if (io_controller == NULL) {
536a8089ea5SJohn Baldwin 		pthread_mutex_unlock(&io_na_mutex);
537a8089ea5SJohn Baldwin 		warnx("Attempt to create I/O qpair without admin qpair");
538a8089ea5SJohn Baldwin 		nvmf_send_generic_error(nc, NVME_SC_COMMAND_SEQUENCE_ERROR);
539a8089ea5SJohn Baldwin 		goto error;
540a8089ea5SJohn Baldwin 	}
541a8089ea5SJohn Baldwin 
542a8089ea5SJohn Baldwin 	if (memcmp(io_controller->hostid, data->hostid,
543a8089ea5SJohn Baldwin 	    sizeof(data->hostid)) != 0) {
544a8089ea5SJohn Baldwin 		pthread_mutex_unlock(&io_na_mutex);
545a8089ea5SJohn Baldwin 		warnx("hostid mismatch for I/O qpair CONNECT");
546a8089ea5SJohn Baldwin 		nvmf_connect_invalid_parameters(nc, true,
547a8089ea5SJohn Baldwin 		    offsetof(struct nvmf_fabric_connect_data, hostid));
548a8089ea5SJohn Baldwin 		goto error;
549a8089ea5SJohn Baldwin 	}
550a8089ea5SJohn Baldwin 	if (le16toh(data->cntlid) != io_controller->cntlid) {
551a8089ea5SJohn Baldwin 		pthread_mutex_unlock(&io_na_mutex);
552a8089ea5SJohn Baldwin 		warnx("cntlid mismatch for I/O qpair CONNECT");
553a8089ea5SJohn Baldwin 		nvmf_connect_invalid_parameters(nc, true,
554a8089ea5SJohn Baldwin 		    offsetof(struct nvmf_fabric_connect_data, cntlid));
555a8089ea5SJohn Baldwin 		goto error;
556a8089ea5SJohn Baldwin 	}
557a8089ea5SJohn Baldwin 	if (memcmp(io_controller->hostnqn, data->hostnqn,
558a8089ea5SJohn Baldwin 	    sizeof(data->hostid)) != 0) {
559a8089ea5SJohn Baldwin 		pthread_mutex_unlock(&io_na_mutex);
560a8089ea5SJohn Baldwin 		warnx("host NQN mismatch for I/O qpair CONNECT");
561a8089ea5SJohn Baldwin 		nvmf_connect_invalid_parameters(nc, true,
562a8089ea5SJohn Baldwin 		    offsetof(struct nvmf_fabric_connect_data, hostnqn));
563a8089ea5SJohn Baldwin 		goto error;
564a8089ea5SJohn Baldwin 	}
565a8089ea5SJohn Baldwin 
566a8089ea5SJohn Baldwin 	if (io_controller->num_io_queues == 0) {
567a8089ea5SJohn Baldwin 		pthread_mutex_unlock(&io_na_mutex);
568a8089ea5SJohn Baldwin 		warnx("Attempt to create I/O qpair without enabled queues");
569a8089ea5SJohn Baldwin 		nvmf_send_generic_error(nc, NVME_SC_COMMAND_SEQUENCE_ERROR);
570a8089ea5SJohn Baldwin 		goto error;
571a8089ea5SJohn Baldwin 	}
572a8089ea5SJohn Baldwin 	if (qid > io_controller->num_io_queues) {
573a8089ea5SJohn Baldwin 		pthread_mutex_unlock(&io_na_mutex);
574a8089ea5SJohn Baldwin 		warnx("Attempt to create invalid I/O qpair %u", qid);
575a8089ea5SJohn Baldwin 		nvmf_connect_invalid_parameters(nc, false,
576a8089ea5SJohn Baldwin 		    offsetof(struct nvmf_fabric_connect_cmd, qid));
577a8089ea5SJohn Baldwin 		goto error;
578a8089ea5SJohn Baldwin 	}
579a8089ea5SJohn Baldwin 	if (io_controller->io_qpairs[qid - 1] != NULL) {
580a8089ea5SJohn Baldwin 		pthread_mutex_unlock(&io_na_mutex);
581a8089ea5SJohn Baldwin 		warnx("Attempt to re-create I/O qpair %u", qid);
582a8089ea5SJohn Baldwin 		nvmf_send_generic_error(nc, NVME_SC_COMMAND_SEQUENCE_ERROR);
583a8089ea5SJohn Baldwin 		goto error;
584a8089ea5SJohn Baldwin 	}
585a8089ea5SJohn Baldwin 
586a8089ea5SJohn Baldwin 	error = nvmf_finish_accept(nc, io_controller->cntlid);
587a8089ea5SJohn Baldwin 	if (error != 0) {
588a8089ea5SJohn Baldwin 		pthread_mutex_unlock(&io_na_mutex);
589a8089ea5SJohn Baldwin 		warnc(error, "Failed to send CONNECT response");
590a8089ea5SJohn Baldwin 		goto error;
591a8089ea5SJohn Baldwin 	}
592a8089ea5SJohn Baldwin 
593a8089ea5SJohn Baldwin 	ioc = io_controller;
594a8089ea5SJohn Baldwin 	ioc->active_io_queues++;
595a8089ea5SJohn Baldwin 	ioc->io_qpairs[qid - 1] = qp;
596a8089ea5SJohn Baldwin 	ioc->io_sockets[qid - 1] = s;
597a8089ea5SJohn Baldwin 	pthread_mutex_unlock(&io_na_mutex);
598a8089ea5SJohn Baldwin 
599a8089ea5SJohn Baldwin 	nvmf_free_capsule(nc);
600a8089ea5SJohn Baldwin 
601a8089ea5SJohn Baldwin 	handle_io_qpair(ioc, qp, qid);
602a8089ea5SJohn Baldwin 	return;
603a8089ea5SJohn Baldwin 
604a8089ea5SJohn Baldwin error:
605a8089ea5SJohn Baldwin 	nvmf_free_capsule(nc);
606a8089ea5SJohn Baldwin 	close(s);
607a8089ea5SJohn Baldwin }
608a8089ea5SJohn Baldwin 
609a8089ea5SJohn Baldwin static void *
610a8089ea5SJohn Baldwin io_socket_thread(void *arg)
611a8089ea5SJohn Baldwin {
612a8089ea5SJohn Baldwin 	struct nvmf_fabric_connect_data data;
613a8089ea5SJohn Baldwin 	struct nvmf_qpair_params qparams;
614a8089ea5SJohn Baldwin 	const struct nvmf_fabric_connect_cmd *cmd;
615a8089ea5SJohn Baldwin 	struct nvmf_capsule *nc;
616a8089ea5SJohn Baldwin 	struct nvmf_qpair *qp;
617a8089ea5SJohn Baldwin 	int s;
618a8089ea5SJohn Baldwin 
619a8089ea5SJohn Baldwin 	pthread_detach(pthread_self());
620a8089ea5SJohn Baldwin 
621a8089ea5SJohn Baldwin 	s = (intptr_t)arg;
622a8089ea5SJohn Baldwin 	memset(&qparams, 0, sizeof(qparams));
623a8089ea5SJohn Baldwin 	qparams.tcp.fd = s;
624a8089ea5SJohn Baldwin 
625a8089ea5SJohn Baldwin 	nc = NULL;
626a8089ea5SJohn Baldwin 	qp = nvmf_accept(io_na, &qparams, &nc, &data);
627a8089ea5SJohn Baldwin 	if (qp == NULL) {
628a8089ea5SJohn Baldwin 		warnx("Failed to create I/O qpair: %s",
629a8089ea5SJohn Baldwin 		    nvmf_association_error(io_na));
630a8089ea5SJohn Baldwin 		goto error;
631a8089ea5SJohn Baldwin 	}
632a8089ea5SJohn Baldwin 
633a8089ea5SJohn Baldwin 	if (kernel_io) {
634a8089ea5SJohn Baldwin 		ctl_handoff_qpair(qp, nvmf_capsule_sqe(nc), &data);
635a8089ea5SJohn Baldwin 		goto error;
636a8089ea5SJohn Baldwin 	}
637a8089ea5SJohn Baldwin 
638a8089ea5SJohn Baldwin 	if (strcmp(data.subnqn, nqn) != 0) {
639a8089ea5SJohn Baldwin 		warn("I/O qpair with invalid SubNQN: %.*s",
640a8089ea5SJohn Baldwin 		    (int)sizeof(data.subnqn), data.subnqn);
641a8089ea5SJohn Baldwin 		nvmf_connect_invalid_parameters(nc, true,
642a8089ea5SJohn Baldwin 		    offsetof(struct nvmf_fabric_connect_data, subnqn));
643a8089ea5SJohn Baldwin 		goto error;
644a8089ea5SJohn Baldwin 	}
645a8089ea5SJohn Baldwin 
646a8089ea5SJohn Baldwin 	/* Is this an admin or I/O queue pair? */
647a8089ea5SJohn Baldwin 	cmd = nvmf_capsule_sqe(nc);
648a8089ea5SJohn Baldwin 	if (cmd->qid == 0)
649a8089ea5SJohn Baldwin 		connect_admin_qpair(s, qp, nc, &data);
650a8089ea5SJohn Baldwin 	else
651a8089ea5SJohn Baldwin 		connect_io_qpair(s, qp, nc, &data, le16toh(cmd->qid));
652a8089ea5SJohn Baldwin 	nvmf_free_qpair(qp);
653a8089ea5SJohn Baldwin 	return (NULL);
654a8089ea5SJohn Baldwin 
655a8089ea5SJohn Baldwin error:
656a8089ea5SJohn Baldwin 	if (nc != NULL)
657a8089ea5SJohn Baldwin 		nvmf_free_capsule(nc);
658a8089ea5SJohn Baldwin 	if (qp != NULL)
659a8089ea5SJohn Baldwin 		nvmf_free_qpair(qp);
660a8089ea5SJohn Baldwin 	close(s);
661a8089ea5SJohn Baldwin 	return (NULL);
662a8089ea5SJohn Baldwin }
663a8089ea5SJohn Baldwin 
664a8089ea5SJohn Baldwin void
665a8089ea5SJohn Baldwin handle_io_socket(int s)
666a8089ea5SJohn Baldwin {
667a8089ea5SJohn Baldwin 	pthread_t thr;
668a8089ea5SJohn Baldwin 	int error;
669a8089ea5SJohn Baldwin 
670a8089ea5SJohn Baldwin 	error = pthread_create(&thr, NULL, io_socket_thread,
671a8089ea5SJohn Baldwin 	    (void *)(uintptr_t)s);
672a8089ea5SJohn Baldwin 	if (error != 0) {
673a8089ea5SJohn Baldwin 		warnc(error, "Failed to create I/O qpair thread");
674a8089ea5SJohn Baldwin 		close(s);
675a8089ea5SJohn Baldwin 	}
676a8089ea5SJohn Baldwin }
677