xref: /linux/drivers/nvme/host/fabrics.c (revision 34f7c6e7d4396090692a09789db231e12cb4762b)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * NVMe over Fabrics common host code.
4  * Copyright (c) 2015-2016 HGST, a Western Digital Company.
5  */
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7 #include <linux/init.h>
8 #include <linux/miscdevice.h>
9 #include <linux/module.h>
10 #include <linux/mutex.h>
11 #include <linux/parser.h>
12 #include <linux/seq_file.h>
13 #include "nvme.h"
14 #include "fabrics.h"
15 
16 static LIST_HEAD(nvmf_transports);
17 static DECLARE_RWSEM(nvmf_transports_rwsem);
18 
19 static LIST_HEAD(nvmf_hosts);
20 static DEFINE_MUTEX(nvmf_hosts_mutex);
21 
22 static struct nvmf_host *nvmf_default_host;
23 
24 static struct nvmf_host *__nvmf_host_find(const char *hostnqn)
25 {
26 	struct nvmf_host *host;
27 
28 	list_for_each_entry(host, &nvmf_hosts, list) {
29 		if (!strcmp(host->nqn, hostnqn))
30 			return host;
31 	}
32 
33 	return NULL;
34 }
35 
36 static struct nvmf_host *nvmf_host_add(const char *hostnqn)
37 {
38 	struct nvmf_host *host;
39 
40 	mutex_lock(&nvmf_hosts_mutex);
41 	host = __nvmf_host_find(hostnqn);
42 	if (host) {
43 		kref_get(&host->ref);
44 		goto out_unlock;
45 	}
46 
47 	host = kmalloc(sizeof(*host), GFP_KERNEL);
48 	if (!host)
49 		goto out_unlock;
50 
51 	kref_init(&host->ref);
52 	strlcpy(host->nqn, hostnqn, NVMF_NQN_SIZE);
53 
54 	list_add_tail(&host->list, &nvmf_hosts);
55 out_unlock:
56 	mutex_unlock(&nvmf_hosts_mutex);
57 	return host;
58 }
59 
60 static struct nvmf_host *nvmf_host_default(void)
61 {
62 	struct nvmf_host *host;
63 
64 	host = kmalloc(sizeof(*host), GFP_KERNEL);
65 	if (!host)
66 		return NULL;
67 
68 	kref_init(&host->ref);
69 	uuid_gen(&host->id);
70 	snprintf(host->nqn, NVMF_NQN_SIZE,
71 		"nqn.2014-08.org.nvmexpress:uuid:%pUb", &host->id);
72 
73 	mutex_lock(&nvmf_hosts_mutex);
74 	list_add_tail(&host->list, &nvmf_hosts);
75 	mutex_unlock(&nvmf_hosts_mutex);
76 
77 	return host;
78 }
79 
80 static void nvmf_host_destroy(struct kref *ref)
81 {
82 	struct nvmf_host *host = container_of(ref, struct nvmf_host, ref);
83 
84 	mutex_lock(&nvmf_hosts_mutex);
85 	list_del(&host->list);
86 	mutex_unlock(&nvmf_hosts_mutex);
87 
88 	kfree(host);
89 }
90 
91 static void nvmf_host_put(struct nvmf_host *host)
92 {
93 	if (host)
94 		kref_put(&host->ref, nvmf_host_destroy);
95 }
96 
97 /**
98  * nvmf_get_address() -  Get address/port
99  * @ctrl:	Host NVMe controller instance which we got the address
100  * @buf:	OUTPUT parameter that will contain the address/port
101  * @size:	buffer size
102  */
103 int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
104 {
105 	int len = 0;
106 
107 	if (ctrl->opts->mask & NVMF_OPT_TRADDR)
108 		len += scnprintf(buf, size, "traddr=%s", ctrl->opts->traddr);
109 	if (ctrl->opts->mask & NVMF_OPT_TRSVCID)
110 		len += scnprintf(buf + len, size - len, "%strsvcid=%s",
111 				(len) ? "," : "", ctrl->opts->trsvcid);
112 	if (ctrl->opts->mask & NVMF_OPT_HOST_TRADDR)
113 		len += scnprintf(buf + len, size - len, "%shost_traddr=%s",
114 				(len) ? "," : "", ctrl->opts->host_traddr);
115 	if (ctrl->opts->mask & NVMF_OPT_HOST_IFACE)
116 		len += scnprintf(buf + len, size - len, "%shost_iface=%s",
117 				(len) ? "," : "", ctrl->opts->host_iface);
118 	len += scnprintf(buf + len, size - len, "\n");
119 
120 	return len;
121 }
122 EXPORT_SYMBOL_GPL(nvmf_get_address);
123 
124 /**
125  * nvmf_reg_read32() -  NVMe Fabrics "Property Get" API function.
126  * @ctrl:	Host NVMe controller instance maintaining the admin
127  *		queue used to submit the property read command to
128  *		the allocated NVMe controller resource on the target system.
129  * @off:	Starting offset value of the targeted property
130  *		register (see the fabrics section of the NVMe standard).
131  * @val:	OUTPUT parameter that will contain the value of
132  *		the property after a successful read.
133  *
134  * Used by the host system to retrieve a 32-bit capsule property value
135  * from an NVMe controller on the target system.
136  *
137  * ("Capsule property" is an "PCIe register concept" applied to the
138  * NVMe fabrics space.)
139  *
140  * Return:
141  *	0: successful read
142  *	> 0: NVMe error status code
143  *	< 0: Linux errno error code
144  */
145 int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
146 {
147 	struct nvme_command cmd = { };
148 	union nvme_result res;
149 	int ret;
150 
151 	cmd.prop_get.opcode = nvme_fabrics_command;
152 	cmd.prop_get.fctype = nvme_fabrics_type_property_get;
153 	cmd.prop_get.offset = cpu_to_le32(off);
154 
155 	ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res, NULL, 0, 0,
156 			NVME_QID_ANY, 0, 0);
157 
158 	if (ret >= 0)
159 		*val = le64_to_cpu(res.u64);
160 	if (unlikely(ret != 0))
161 		dev_err(ctrl->device,
162 			"Property Get error: %d, offset %#x\n",
163 			ret > 0 ? ret & ~NVME_SC_DNR : ret, off);
164 
165 	return ret;
166 }
167 EXPORT_SYMBOL_GPL(nvmf_reg_read32);
168 
169 /**
170  * nvmf_reg_read64() -  NVMe Fabrics "Property Get" API function.
171  * @ctrl:	Host NVMe controller instance maintaining the admin
172  *		queue used to submit the property read command to
173  *		the allocated controller resource on the target system.
174  * @off:	Starting offset value of the targeted property
175  *		register (see the fabrics section of the NVMe standard).
176  * @val:	OUTPUT parameter that will contain the value of
177  *		the property after a successful read.
178  *
179  * Used by the host system to retrieve a 64-bit capsule property value
180  * from an NVMe controller on the target system.
181  *
182  * ("Capsule property" is an "PCIe register concept" applied to the
183  * NVMe fabrics space.)
184  *
185  * Return:
186  *	0: successful read
187  *	> 0: NVMe error status code
188  *	< 0: Linux errno error code
189  */
190 int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
191 {
192 	struct nvme_command cmd = { };
193 	union nvme_result res;
194 	int ret;
195 
196 	cmd.prop_get.opcode = nvme_fabrics_command;
197 	cmd.prop_get.fctype = nvme_fabrics_type_property_get;
198 	cmd.prop_get.attrib = 1;
199 	cmd.prop_get.offset = cpu_to_le32(off);
200 
201 	ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res, NULL, 0, 0,
202 			NVME_QID_ANY, 0, 0);
203 
204 	if (ret >= 0)
205 		*val = le64_to_cpu(res.u64);
206 	if (unlikely(ret != 0))
207 		dev_err(ctrl->device,
208 			"Property Get error: %d, offset %#x\n",
209 			ret > 0 ? ret & ~NVME_SC_DNR : ret, off);
210 	return ret;
211 }
212 EXPORT_SYMBOL_GPL(nvmf_reg_read64);
213 
214 /**
215  * nvmf_reg_write32() -  NVMe Fabrics "Property Write" API function.
216  * @ctrl:	Host NVMe controller instance maintaining the admin
217  *		queue used to submit the property read command to
218  *		the allocated NVMe controller resource on the target system.
219  * @off:	Starting offset value of the targeted property
220  *		register (see the fabrics section of the NVMe standard).
221  * @val:	Input parameter that contains the value to be
222  *		written to the property.
223  *
224  * Used by the NVMe host system to write a 32-bit capsule property value
225  * to an NVMe controller on the target system.
226  *
227  * ("Capsule property" is an "PCIe register concept" applied to the
228  * NVMe fabrics space.)
229  *
230  * Return:
231  *	0: successful write
232  *	> 0: NVMe error status code
233  *	< 0: Linux errno error code
234  */
235 int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val)
236 {
237 	struct nvme_command cmd = { };
238 	int ret;
239 
240 	cmd.prop_set.opcode = nvme_fabrics_command;
241 	cmd.prop_set.fctype = nvme_fabrics_type_property_set;
242 	cmd.prop_set.attrib = 0;
243 	cmd.prop_set.offset = cpu_to_le32(off);
244 	cmd.prop_set.value = cpu_to_le64(val);
245 
246 	ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, NULL, NULL, 0, 0,
247 			NVME_QID_ANY, 0, 0);
248 	if (unlikely(ret))
249 		dev_err(ctrl->device,
250 			"Property Set error: %d, offset %#x\n",
251 			ret > 0 ? ret & ~NVME_SC_DNR : ret, off);
252 	return ret;
253 }
254 EXPORT_SYMBOL_GPL(nvmf_reg_write32);
255 
256 /**
257  * nvmf_log_connect_error() - Error-parsing-diagnostic print out function for
258  * 				connect() errors.
259  * @ctrl:	The specific /dev/nvmeX device that had the error.
260  * @errval:	Error code to be decoded in a more human-friendly
261  * 		printout.
262  * @offset:	For use with the NVMe error code
263  * 		NVME_SC_CONNECT_INVALID_PARAM.
264  * @cmd:	This is the SQE portion of a submission capsule.
265  * @data:	This is the "Data" portion of a submission capsule.
266  */
267 static void nvmf_log_connect_error(struct nvme_ctrl *ctrl,
268 		int errval, int offset, struct nvme_command *cmd,
269 		struct nvmf_connect_data *data)
270 {
271 	int err_sctype = errval & ~NVME_SC_DNR;
272 
273 	switch (err_sctype) {
274 	case NVME_SC_CONNECT_INVALID_PARAM:
275 		if (offset >> 16) {
276 			char *inv_data = "Connect Invalid Data Parameter";
277 
278 			switch (offset & 0xffff) {
279 			case (offsetof(struct nvmf_connect_data, cntlid)):
280 				dev_err(ctrl->device,
281 					"%s, cntlid: %d\n",
282 					inv_data, data->cntlid);
283 				break;
284 			case (offsetof(struct nvmf_connect_data, hostnqn)):
285 				dev_err(ctrl->device,
286 					"%s, hostnqn \"%s\"\n",
287 					inv_data, data->hostnqn);
288 				break;
289 			case (offsetof(struct nvmf_connect_data, subsysnqn)):
290 				dev_err(ctrl->device,
291 					"%s, subsysnqn \"%s\"\n",
292 					inv_data, data->subsysnqn);
293 				break;
294 			default:
295 				dev_err(ctrl->device,
296 					"%s, starting byte offset: %d\n",
297 				       inv_data, offset & 0xffff);
298 				break;
299 			}
300 		} else {
301 			char *inv_sqe = "Connect Invalid SQE Parameter";
302 
303 			switch (offset) {
304 			case (offsetof(struct nvmf_connect_command, qid)):
305 				dev_err(ctrl->device,
306 				       "%s, qid %d\n",
307 					inv_sqe, cmd->connect.qid);
308 				break;
309 			default:
310 				dev_err(ctrl->device,
311 					"%s, starting byte offset: %d\n",
312 					inv_sqe, offset);
313 			}
314 		}
315 		break;
316 	case NVME_SC_CONNECT_INVALID_HOST:
317 		dev_err(ctrl->device,
318 			"Connect for subsystem %s is not allowed, hostnqn: %s\n",
319 			data->subsysnqn, data->hostnqn);
320 		break;
321 	case NVME_SC_CONNECT_CTRL_BUSY:
322 		dev_err(ctrl->device,
323 			"Connect command failed: controller is busy or not available\n");
324 		break;
325 	case NVME_SC_CONNECT_FORMAT:
326 		dev_err(ctrl->device,
327 			"Connect incompatible format: %d",
328 			cmd->connect.recfmt);
329 		break;
330 	case NVME_SC_HOST_PATH_ERROR:
331 		dev_err(ctrl->device,
332 			"Connect command failed: host path error\n");
333 		break;
334 	default:
335 		dev_err(ctrl->device,
336 			"Connect command failed, error wo/DNR bit: %d\n",
337 			err_sctype);
338 		break;
339 	}
340 }
341 
342 /**
343  * nvmf_connect_admin_queue() - NVMe Fabrics Admin Queue "Connect"
344  *				API function.
345  * @ctrl:	Host nvme controller instance used to request
346  *              a new NVMe controller allocation on the target
347  *              system and  establish an NVMe Admin connection to
348  *              that controller.
349  *
350  * This function enables an NVMe host device to request a new allocation of
351  * an NVMe controller resource on a target system as well establish a
352  * fabrics-protocol connection of the NVMe Admin queue between the
353  * host system device and the allocated NVMe controller on the
354  * target system via a NVMe Fabrics "Connect" command.
355  *
356  * Return:
357  *	0: success
358  *	> 0: NVMe error status code
359  *	< 0: Linux errno error code
360  *
361  */
362 int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl)
363 {
364 	struct nvme_command cmd = { };
365 	union nvme_result res;
366 	struct nvmf_connect_data *data;
367 	int ret;
368 
369 	cmd.connect.opcode = nvme_fabrics_command;
370 	cmd.connect.fctype = nvme_fabrics_type_connect;
371 	cmd.connect.qid = 0;
372 	cmd.connect.sqsize = cpu_to_le16(NVME_AQ_DEPTH - 1);
373 
374 	/*
375 	 * Set keep-alive timeout in seconds granularity (ms * 1000)
376 	 */
377 	cmd.connect.kato = cpu_to_le32(ctrl->kato * 1000);
378 
379 	if (ctrl->opts->disable_sqflow)
380 		cmd.connect.cattr |= NVME_CONNECT_DISABLE_SQFLOW;
381 
382 	data = kzalloc(sizeof(*data), GFP_KERNEL);
383 	if (!data)
384 		return -ENOMEM;
385 
386 	uuid_copy(&data->hostid, &ctrl->opts->host->id);
387 	data->cntlid = cpu_to_le16(0xffff);
388 	strncpy(data->subsysnqn, ctrl->opts->subsysnqn, NVMF_NQN_SIZE);
389 	strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE);
390 
391 	ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res,
392 			data, sizeof(*data), 0, NVME_QID_ANY, 1,
393 			BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT);
394 	if (ret) {
395 		nvmf_log_connect_error(ctrl, ret, le32_to_cpu(res.u32),
396 				       &cmd, data);
397 		goto out_free_data;
398 	}
399 
400 	ctrl->cntlid = le16_to_cpu(res.u16);
401 
402 out_free_data:
403 	kfree(data);
404 	return ret;
405 }
406 EXPORT_SYMBOL_GPL(nvmf_connect_admin_queue);
407 
408 /**
409  * nvmf_connect_io_queue() - NVMe Fabrics I/O Queue "Connect"
410  *			     API function.
411  * @ctrl:	Host nvme controller instance used to establish an
412  *		NVMe I/O queue connection to the already allocated NVMe
413  *		controller on the target system.
414  * @qid:	NVMe I/O queue number for the new I/O connection between
415  *		host and target (note qid == 0 is illegal as this is
416  *		the Admin queue, per NVMe standard).
417  *
418  * This function issues a fabrics-protocol connection
419  * of a NVMe I/O queue (via NVMe Fabrics "Connect" command)
420  * between the host system device and the allocated NVMe controller
421  * on the target system.
422  *
423  * Return:
424  *	0: success
425  *	> 0: NVMe error status code
426  *	< 0: Linux errno error code
427  */
428 int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid)
429 {
430 	struct nvme_command cmd = { };
431 	struct nvmf_connect_data *data;
432 	union nvme_result res;
433 	int ret;
434 
435 	cmd.connect.opcode = nvme_fabrics_command;
436 	cmd.connect.fctype = nvme_fabrics_type_connect;
437 	cmd.connect.qid = cpu_to_le16(qid);
438 	cmd.connect.sqsize = cpu_to_le16(ctrl->sqsize);
439 
440 	if (ctrl->opts->disable_sqflow)
441 		cmd.connect.cattr |= NVME_CONNECT_DISABLE_SQFLOW;
442 
443 	data = kzalloc(sizeof(*data), GFP_KERNEL);
444 	if (!data)
445 		return -ENOMEM;
446 
447 	uuid_copy(&data->hostid, &ctrl->opts->host->id);
448 	data->cntlid = cpu_to_le16(ctrl->cntlid);
449 	strncpy(data->subsysnqn, ctrl->opts->subsysnqn, NVMF_NQN_SIZE);
450 	strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE);
451 
452 	ret = __nvme_submit_sync_cmd(ctrl->connect_q, &cmd, &res,
453 			data, sizeof(*data), 0, qid, 1,
454 			BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT);
455 	if (ret) {
456 		nvmf_log_connect_error(ctrl, ret, le32_to_cpu(res.u32),
457 				       &cmd, data);
458 	}
459 	kfree(data);
460 	return ret;
461 }
462 EXPORT_SYMBOL_GPL(nvmf_connect_io_queue);
463 
464 bool nvmf_should_reconnect(struct nvme_ctrl *ctrl)
465 {
466 	if (ctrl->opts->max_reconnects == -1 ||
467 	    ctrl->nr_reconnects < ctrl->opts->max_reconnects)
468 		return true;
469 
470 	return false;
471 }
472 EXPORT_SYMBOL_GPL(nvmf_should_reconnect);
473 
474 /**
475  * nvmf_register_transport() - NVMe Fabrics Library registration function.
476  * @ops:	Transport ops instance to be registered to the
477  *		common fabrics library.
478  *
479  * API function that registers the type of specific transport fabric
480  * being implemented to the common NVMe fabrics library. Part of
481  * the overall init sequence of starting up a fabrics driver.
482  */
483 int nvmf_register_transport(struct nvmf_transport_ops *ops)
484 {
485 	if (!ops->create_ctrl)
486 		return -EINVAL;
487 
488 	down_write(&nvmf_transports_rwsem);
489 	list_add_tail(&ops->entry, &nvmf_transports);
490 	up_write(&nvmf_transports_rwsem);
491 
492 	return 0;
493 }
494 EXPORT_SYMBOL_GPL(nvmf_register_transport);
495 
496 /**
497  * nvmf_unregister_transport() - NVMe Fabrics Library unregistration function.
498  * @ops:	Transport ops instance to be unregistered from the
499  *		common fabrics library.
500  *
501  * Fabrics API function that unregisters the type of specific transport
502  * fabric being implemented from the common NVMe fabrics library.
503  * Part of the overall exit sequence of unloading the implemented driver.
504  */
505 void nvmf_unregister_transport(struct nvmf_transport_ops *ops)
506 {
507 	down_write(&nvmf_transports_rwsem);
508 	list_del(&ops->entry);
509 	up_write(&nvmf_transports_rwsem);
510 }
511 EXPORT_SYMBOL_GPL(nvmf_unregister_transport);
512 
513 static struct nvmf_transport_ops *nvmf_lookup_transport(
514 		struct nvmf_ctrl_options *opts)
515 {
516 	struct nvmf_transport_ops *ops;
517 
518 	lockdep_assert_held(&nvmf_transports_rwsem);
519 
520 	list_for_each_entry(ops, &nvmf_transports, entry) {
521 		if (strcmp(ops->name, opts->transport) == 0)
522 			return ops;
523 	}
524 
525 	return NULL;
526 }
527 
528 static const match_table_t opt_tokens = {
529 	{ NVMF_OPT_TRANSPORT,		"transport=%s"		},
530 	{ NVMF_OPT_TRADDR,		"traddr=%s"		},
531 	{ NVMF_OPT_TRSVCID,		"trsvcid=%s"		},
532 	{ NVMF_OPT_NQN,			"nqn=%s"		},
533 	{ NVMF_OPT_QUEUE_SIZE,		"queue_size=%d"		},
534 	{ NVMF_OPT_NR_IO_QUEUES,	"nr_io_queues=%d"	},
535 	{ NVMF_OPT_RECONNECT_DELAY,	"reconnect_delay=%d"	},
536 	{ NVMF_OPT_CTRL_LOSS_TMO,	"ctrl_loss_tmo=%d"	},
537 	{ NVMF_OPT_KATO,		"keep_alive_tmo=%d"	},
538 	{ NVMF_OPT_HOSTNQN,		"hostnqn=%s"		},
539 	{ NVMF_OPT_HOST_TRADDR,		"host_traddr=%s"	},
540 	{ NVMF_OPT_HOST_IFACE,		"host_iface=%s"		},
541 	{ NVMF_OPT_HOST_ID,		"hostid=%s"		},
542 	{ NVMF_OPT_DUP_CONNECT,		"duplicate_connect"	},
543 	{ NVMF_OPT_DISABLE_SQFLOW,	"disable_sqflow"	},
544 	{ NVMF_OPT_HDR_DIGEST,		"hdr_digest"		},
545 	{ NVMF_OPT_DATA_DIGEST,		"data_digest"		},
546 	{ NVMF_OPT_NR_WRITE_QUEUES,	"nr_write_queues=%d"	},
547 	{ NVMF_OPT_NR_POLL_QUEUES,	"nr_poll_queues=%d"	},
548 	{ NVMF_OPT_TOS,			"tos=%d"		},
549 	{ NVMF_OPT_FAIL_FAST_TMO,	"fast_io_fail_tmo=%d"	},
550 	{ NVMF_OPT_DISCOVERY,		"discovery"		},
551 	{ NVMF_OPT_ERR,			NULL			}
552 };
553 
554 static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
555 		const char *buf)
556 {
557 	substring_t args[MAX_OPT_ARGS];
558 	char *options, *o, *p;
559 	int token, ret = 0;
560 	size_t nqnlen  = 0;
561 	int ctrl_loss_tmo = NVMF_DEF_CTRL_LOSS_TMO;
562 	uuid_t hostid;
563 
564 	/* Set defaults */
565 	opts->queue_size = NVMF_DEF_QUEUE_SIZE;
566 	opts->nr_io_queues = num_online_cpus();
567 	opts->reconnect_delay = NVMF_DEF_RECONNECT_DELAY;
568 	opts->kato = 0;
569 	opts->duplicate_connect = false;
570 	opts->fast_io_fail_tmo = NVMF_DEF_FAIL_FAST_TMO;
571 	opts->hdr_digest = false;
572 	opts->data_digest = false;
573 	opts->tos = -1; /* < 0 == use transport default */
574 
575 	options = o = kstrdup(buf, GFP_KERNEL);
576 	if (!options)
577 		return -ENOMEM;
578 
579 	uuid_gen(&hostid);
580 
581 	while ((p = strsep(&o, ",\n")) != NULL) {
582 		if (!*p)
583 			continue;
584 
585 		token = match_token(p, opt_tokens, args);
586 		opts->mask |= token;
587 		switch (token) {
588 		case NVMF_OPT_TRANSPORT:
589 			p = match_strdup(args);
590 			if (!p) {
591 				ret = -ENOMEM;
592 				goto out;
593 			}
594 			kfree(opts->transport);
595 			opts->transport = p;
596 			break;
597 		case NVMF_OPT_NQN:
598 			p = match_strdup(args);
599 			if (!p) {
600 				ret = -ENOMEM;
601 				goto out;
602 			}
603 			kfree(opts->subsysnqn);
604 			opts->subsysnqn = p;
605 			nqnlen = strlen(opts->subsysnqn);
606 			if (nqnlen >= NVMF_NQN_SIZE) {
607 				pr_err("%s needs to be < %d bytes\n",
608 					opts->subsysnqn, NVMF_NQN_SIZE);
609 				ret = -EINVAL;
610 				goto out;
611 			}
612 			opts->discovery_nqn =
613 				!(strcmp(opts->subsysnqn,
614 					 NVME_DISC_SUBSYS_NAME));
615 			break;
616 		case NVMF_OPT_TRADDR:
617 			p = match_strdup(args);
618 			if (!p) {
619 				ret = -ENOMEM;
620 				goto out;
621 			}
622 			kfree(opts->traddr);
623 			opts->traddr = p;
624 			break;
625 		case NVMF_OPT_TRSVCID:
626 			p = match_strdup(args);
627 			if (!p) {
628 				ret = -ENOMEM;
629 				goto out;
630 			}
631 			kfree(opts->trsvcid);
632 			opts->trsvcid = p;
633 			break;
634 		case NVMF_OPT_QUEUE_SIZE:
635 			if (match_int(args, &token)) {
636 				ret = -EINVAL;
637 				goto out;
638 			}
639 			if (token < NVMF_MIN_QUEUE_SIZE ||
640 			    token > NVMF_MAX_QUEUE_SIZE) {
641 				pr_err("Invalid queue_size %d\n", token);
642 				ret = -EINVAL;
643 				goto out;
644 			}
645 			opts->queue_size = token;
646 			break;
647 		case NVMF_OPT_NR_IO_QUEUES:
648 			if (match_int(args, &token)) {
649 				ret = -EINVAL;
650 				goto out;
651 			}
652 			if (token <= 0) {
653 				pr_err("Invalid number of IOQs %d\n", token);
654 				ret = -EINVAL;
655 				goto out;
656 			}
657 			if (opts->discovery_nqn) {
658 				pr_debug("Ignoring nr_io_queues value for discovery controller\n");
659 				break;
660 			}
661 
662 			opts->nr_io_queues = min_t(unsigned int,
663 					num_online_cpus(), token);
664 			break;
665 		case NVMF_OPT_KATO:
666 			if (match_int(args, &token)) {
667 				ret = -EINVAL;
668 				goto out;
669 			}
670 
671 			if (token < 0) {
672 				pr_err("Invalid keep_alive_tmo %d\n", token);
673 				ret = -EINVAL;
674 				goto out;
675 			} else if (token == 0 && !opts->discovery_nqn) {
676 				/* Allowed for debug */
677 				pr_warn("keep_alive_tmo 0 won't execute keep alives!!!\n");
678 			}
679 			opts->kato = token;
680 			break;
681 		case NVMF_OPT_CTRL_LOSS_TMO:
682 			if (match_int(args, &token)) {
683 				ret = -EINVAL;
684 				goto out;
685 			}
686 
687 			if (token < 0)
688 				pr_warn("ctrl_loss_tmo < 0 will reconnect forever\n");
689 			ctrl_loss_tmo = token;
690 			break;
691 		case NVMF_OPT_FAIL_FAST_TMO:
692 			if (match_int(args, &token)) {
693 				ret = -EINVAL;
694 				goto out;
695 			}
696 
697 			if (token >= 0)
698 				pr_warn("I/O fail on reconnect controller after %d sec\n",
699 					token);
700 			else
701 				token = -1;
702 
703 			opts->fast_io_fail_tmo = token;
704 			break;
705 		case NVMF_OPT_HOSTNQN:
706 			if (opts->host) {
707 				pr_err("hostnqn already user-assigned: %s\n",
708 				       opts->host->nqn);
709 				ret = -EADDRINUSE;
710 				goto out;
711 			}
712 			p = match_strdup(args);
713 			if (!p) {
714 				ret = -ENOMEM;
715 				goto out;
716 			}
717 			nqnlen = strlen(p);
718 			if (nqnlen >= NVMF_NQN_SIZE) {
719 				pr_err("%s needs to be < %d bytes\n",
720 					p, NVMF_NQN_SIZE);
721 				kfree(p);
722 				ret = -EINVAL;
723 				goto out;
724 			}
725 			opts->host = nvmf_host_add(p);
726 			kfree(p);
727 			if (!opts->host) {
728 				ret = -ENOMEM;
729 				goto out;
730 			}
731 			break;
732 		case NVMF_OPT_RECONNECT_DELAY:
733 			if (match_int(args, &token)) {
734 				ret = -EINVAL;
735 				goto out;
736 			}
737 			if (token <= 0) {
738 				pr_err("Invalid reconnect_delay %d\n", token);
739 				ret = -EINVAL;
740 				goto out;
741 			}
742 			opts->reconnect_delay = token;
743 			break;
744 		case NVMF_OPT_HOST_TRADDR:
745 			p = match_strdup(args);
746 			if (!p) {
747 				ret = -ENOMEM;
748 				goto out;
749 			}
750 			kfree(opts->host_traddr);
751 			opts->host_traddr = p;
752 			break;
753 		case NVMF_OPT_HOST_IFACE:
754 			p = match_strdup(args);
755 			if (!p) {
756 				ret = -ENOMEM;
757 				goto out;
758 			}
759 			kfree(opts->host_iface);
760 			opts->host_iface = p;
761 			break;
762 		case NVMF_OPT_HOST_ID:
763 			p = match_strdup(args);
764 			if (!p) {
765 				ret = -ENOMEM;
766 				goto out;
767 			}
768 			ret = uuid_parse(p, &hostid);
769 			if (ret) {
770 				pr_err("Invalid hostid %s\n", p);
771 				ret = -EINVAL;
772 				kfree(p);
773 				goto out;
774 			}
775 			kfree(p);
776 			break;
777 		case NVMF_OPT_DUP_CONNECT:
778 			opts->duplicate_connect = true;
779 			break;
780 		case NVMF_OPT_DISABLE_SQFLOW:
781 			opts->disable_sqflow = true;
782 			break;
783 		case NVMF_OPT_HDR_DIGEST:
784 			opts->hdr_digest = true;
785 			break;
786 		case NVMF_OPT_DATA_DIGEST:
787 			opts->data_digest = true;
788 			break;
789 		case NVMF_OPT_NR_WRITE_QUEUES:
790 			if (match_int(args, &token)) {
791 				ret = -EINVAL;
792 				goto out;
793 			}
794 			if (token <= 0) {
795 				pr_err("Invalid nr_write_queues %d\n", token);
796 				ret = -EINVAL;
797 				goto out;
798 			}
799 			opts->nr_write_queues = token;
800 			break;
801 		case NVMF_OPT_NR_POLL_QUEUES:
802 			if (match_int(args, &token)) {
803 				ret = -EINVAL;
804 				goto out;
805 			}
806 			if (token <= 0) {
807 				pr_err("Invalid nr_poll_queues %d\n", token);
808 				ret = -EINVAL;
809 				goto out;
810 			}
811 			opts->nr_poll_queues = token;
812 			break;
813 		case NVMF_OPT_TOS:
814 			if (match_int(args, &token)) {
815 				ret = -EINVAL;
816 				goto out;
817 			}
818 			if (token < 0) {
819 				pr_err("Invalid type of service %d\n", token);
820 				ret = -EINVAL;
821 				goto out;
822 			}
823 			if (token > 255) {
824 				pr_warn("Clamping type of service to 255\n");
825 				token = 255;
826 			}
827 			opts->tos = token;
828 			break;
829 		case NVMF_OPT_DISCOVERY:
830 			opts->discovery_nqn = true;
831 			break;
832 		default:
833 			pr_warn("unknown parameter or missing value '%s' in ctrl creation request\n",
834 				p);
835 			ret = -EINVAL;
836 			goto out;
837 		}
838 	}
839 
840 	if (opts->discovery_nqn) {
841 		opts->nr_io_queues = 0;
842 		opts->nr_write_queues = 0;
843 		opts->nr_poll_queues = 0;
844 		opts->duplicate_connect = true;
845 	} else {
846 		if (!opts->kato)
847 			opts->kato = NVME_DEFAULT_KATO;
848 	}
849 	if (ctrl_loss_tmo < 0) {
850 		opts->max_reconnects = -1;
851 	} else {
852 		opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo,
853 						opts->reconnect_delay);
854 		if (ctrl_loss_tmo < opts->fast_io_fail_tmo)
855 			pr_warn("failfast tmo (%d) larger than controller loss tmo (%d)\n",
856 				opts->fast_io_fail_tmo, ctrl_loss_tmo);
857 	}
858 
859 	if (!opts->host) {
860 		kref_get(&nvmf_default_host->ref);
861 		opts->host = nvmf_default_host;
862 	}
863 
864 	uuid_copy(&opts->host->id, &hostid);
865 
866 out:
867 	kfree(options);
868 	return ret;
869 }
870 
871 static int nvmf_check_required_opts(struct nvmf_ctrl_options *opts,
872 		unsigned int required_opts)
873 {
874 	if ((opts->mask & required_opts) != required_opts) {
875 		unsigned int i;
876 
877 		for (i = 0; i < ARRAY_SIZE(opt_tokens); i++) {
878 			if ((opt_tokens[i].token & required_opts) &&
879 			    !(opt_tokens[i].token & opts->mask)) {
880 				pr_warn("missing parameter '%s'\n",
881 					opt_tokens[i].pattern);
882 			}
883 		}
884 
885 		return -EINVAL;
886 	}
887 
888 	return 0;
889 }
890 
891 bool nvmf_ip_options_match(struct nvme_ctrl *ctrl,
892 		struct nvmf_ctrl_options *opts)
893 {
894 	if (!nvmf_ctlr_matches_baseopts(ctrl, opts) ||
895 	    strcmp(opts->traddr, ctrl->opts->traddr) ||
896 	    strcmp(opts->trsvcid, ctrl->opts->trsvcid))
897 		return false;
898 
899 	/*
900 	 * Checking the local address is rough. In most cases, none is specified
901 	 * and the host port is selected by the stack.
902 	 *
903 	 * Assume no match if:
904 	 * -  local address is specified and address is not the same
905 	 * -  local address is not specified but remote is, or vice versa
906 	 *    (admin using specific host_traddr when it matters).
907 	 */
908 	if ((opts->mask & NVMF_OPT_HOST_TRADDR) &&
909 	    (ctrl->opts->mask & NVMF_OPT_HOST_TRADDR)) {
910 		if (strcmp(opts->host_traddr, ctrl->opts->host_traddr))
911 			return false;
912 	} else if ((opts->mask & NVMF_OPT_HOST_TRADDR) ||
913 		   (ctrl->opts->mask & NVMF_OPT_HOST_TRADDR)) {
914 		return false;
915 	}
916 
917 	return true;
918 }
919 EXPORT_SYMBOL_GPL(nvmf_ip_options_match);
920 
921 static int nvmf_check_allowed_opts(struct nvmf_ctrl_options *opts,
922 		unsigned int allowed_opts)
923 {
924 	if (opts->mask & ~allowed_opts) {
925 		unsigned int i;
926 
927 		for (i = 0; i < ARRAY_SIZE(opt_tokens); i++) {
928 			if ((opt_tokens[i].token & opts->mask) &&
929 			    (opt_tokens[i].token & ~allowed_opts)) {
930 				pr_warn("invalid parameter '%s'\n",
931 					opt_tokens[i].pattern);
932 			}
933 		}
934 
935 		return -EINVAL;
936 	}
937 
938 	return 0;
939 }
940 
941 void nvmf_free_options(struct nvmf_ctrl_options *opts)
942 {
943 	nvmf_host_put(opts->host);
944 	kfree(opts->transport);
945 	kfree(opts->traddr);
946 	kfree(opts->trsvcid);
947 	kfree(opts->subsysnqn);
948 	kfree(opts->host_traddr);
949 	kfree(opts->host_iface);
950 	kfree(opts);
951 }
952 EXPORT_SYMBOL_GPL(nvmf_free_options);
953 
954 #define NVMF_REQUIRED_OPTS	(NVMF_OPT_TRANSPORT | NVMF_OPT_NQN)
955 #define NVMF_ALLOWED_OPTS	(NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \
956 				 NVMF_OPT_KATO | NVMF_OPT_HOSTNQN | \
957 				 NVMF_OPT_HOST_ID | NVMF_OPT_DUP_CONNECT |\
958 				 NVMF_OPT_DISABLE_SQFLOW | NVMF_OPT_DISCOVERY |\
959 				 NVMF_OPT_FAIL_FAST_TMO)
960 
961 static struct nvme_ctrl *
962 nvmf_create_ctrl(struct device *dev, const char *buf)
963 {
964 	struct nvmf_ctrl_options *opts;
965 	struct nvmf_transport_ops *ops;
966 	struct nvme_ctrl *ctrl;
967 	int ret;
968 
969 	opts = kzalloc(sizeof(*opts), GFP_KERNEL);
970 	if (!opts)
971 		return ERR_PTR(-ENOMEM);
972 
973 	ret = nvmf_parse_options(opts, buf);
974 	if (ret)
975 		goto out_free_opts;
976 
977 
978 	request_module("nvme-%s", opts->transport);
979 
980 	/*
981 	 * Check the generic options first as we need a valid transport for
982 	 * the lookup below.  Then clear the generic flags so that transport
983 	 * drivers don't have to care about them.
984 	 */
985 	ret = nvmf_check_required_opts(opts, NVMF_REQUIRED_OPTS);
986 	if (ret)
987 		goto out_free_opts;
988 	opts->mask &= ~NVMF_REQUIRED_OPTS;
989 
990 	down_read(&nvmf_transports_rwsem);
991 	ops = nvmf_lookup_transport(opts);
992 	if (!ops) {
993 		pr_info("no handler found for transport %s.\n",
994 			opts->transport);
995 		ret = -EINVAL;
996 		goto out_unlock;
997 	}
998 
999 	if (!try_module_get(ops->module)) {
1000 		ret = -EBUSY;
1001 		goto out_unlock;
1002 	}
1003 	up_read(&nvmf_transports_rwsem);
1004 
1005 	ret = nvmf_check_required_opts(opts, ops->required_opts);
1006 	if (ret)
1007 		goto out_module_put;
1008 	ret = nvmf_check_allowed_opts(opts, NVMF_ALLOWED_OPTS |
1009 				ops->allowed_opts | ops->required_opts);
1010 	if (ret)
1011 		goto out_module_put;
1012 
1013 	ctrl = ops->create_ctrl(dev, opts);
1014 	if (IS_ERR(ctrl)) {
1015 		ret = PTR_ERR(ctrl);
1016 		goto out_module_put;
1017 	}
1018 
1019 	module_put(ops->module);
1020 	return ctrl;
1021 
1022 out_module_put:
1023 	module_put(ops->module);
1024 	goto out_free_opts;
1025 out_unlock:
1026 	up_read(&nvmf_transports_rwsem);
1027 out_free_opts:
1028 	nvmf_free_options(opts);
1029 	return ERR_PTR(ret);
1030 }
1031 
1032 static struct class *nvmf_class;
1033 static struct device *nvmf_device;
1034 static DEFINE_MUTEX(nvmf_dev_mutex);
1035 
1036 static ssize_t nvmf_dev_write(struct file *file, const char __user *ubuf,
1037 		size_t count, loff_t *pos)
1038 {
1039 	struct seq_file *seq_file = file->private_data;
1040 	struct nvme_ctrl *ctrl;
1041 	const char *buf;
1042 	int ret = 0;
1043 
1044 	if (count > PAGE_SIZE)
1045 		return -ENOMEM;
1046 
1047 	buf = memdup_user_nul(ubuf, count);
1048 	if (IS_ERR(buf))
1049 		return PTR_ERR(buf);
1050 
1051 	mutex_lock(&nvmf_dev_mutex);
1052 	if (seq_file->private) {
1053 		ret = -EINVAL;
1054 		goto out_unlock;
1055 	}
1056 
1057 	ctrl = nvmf_create_ctrl(nvmf_device, buf);
1058 	if (IS_ERR(ctrl)) {
1059 		ret = PTR_ERR(ctrl);
1060 		goto out_unlock;
1061 	}
1062 
1063 	seq_file->private = ctrl;
1064 
1065 out_unlock:
1066 	mutex_unlock(&nvmf_dev_mutex);
1067 	kfree(buf);
1068 	return ret ? ret : count;
1069 }
1070 
1071 static void __nvmf_concat_opt_tokens(struct seq_file *seq_file)
1072 {
1073 	const struct match_token *tok;
1074 	int idx;
1075 
1076 	/*
1077 	 * Add dummy entries for instance and cntlid to
1078 	 * signal an invalid/non-existing controller
1079 	 */
1080 	seq_puts(seq_file, "instance=-1,cntlid=-1");
1081 	for (idx = 0; idx < ARRAY_SIZE(opt_tokens); idx++) {
1082 		tok = &opt_tokens[idx];
1083 		if (tok->token == NVMF_OPT_ERR)
1084 			continue;
1085 		seq_puts(seq_file, ",");
1086 		seq_puts(seq_file, tok->pattern);
1087 	}
1088 	seq_puts(seq_file, "\n");
1089 }
1090 
1091 static int nvmf_dev_show(struct seq_file *seq_file, void *private)
1092 {
1093 	struct nvme_ctrl *ctrl;
1094 
1095 	mutex_lock(&nvmf_dev_mutex);
1096 	ctrl = seq_file->private;
1097 	if (!ctrl) {
1098 		__nvmf_concat_opt_tokens(seq_file);
1099 		goto out_unlock;
1100 	}
1101 
1102 	seq_printf(seq_file, "instance=%d,cntlid=%d\n",
1103 			ctrl->instance, ctrl->cntlid);
1104 
1105 out_unlock:
1106 	mutex_unlock(&nvmf_dev_mutex);
1107 	return 0;
1108 }
1109 
1110 static int nvmf_dev_open(struct inode *inode, struct file *file)
1111 {
1112 	/*
1113 	 * The miscdevice code initializes file->private_data, but doesn't
1114 	 * make use of it later.
1115 	 */
1116 	file->private_data = NULL;
1117 	return single_open(file, nvmf_dev_show, NULL);
1118 }
1119 
1120 static int nvmf_dev_release(struct inode *inode, struct file *file)
1121 {
1122 	struct seq_file *seq_file = file->private_data;
1123 	struct nvme_ctrl *ctrl = seq_file->private;
1124 
1125 	if (ctrl)
1126 		nvme_put_ctrl(ctrl);
1127 	return single_release(inode, file);
1128 }
1129 
1130 static const struct file_operations nvmf_dev_fops = {
1131 	.owner		= THIS_MODULE,
1132 	.write		= nvmf_dev_write,
1133 	.read		= seq_read,
1134 	.open		= nvmf_dev_open,
1135 	.release	= nvmf_dev_release,
1136 };
1137 
1138 static struct miscdevice nvmf_misc = {
1139 	.minor		= MISC_DYNAMIC_MINOR,
1140 	.name           = "nvme-fabrics",
1141 	.fops		= &nvmf_dev_fops,
1142 };
1143 
1144 static int __init nvmf_init(void)
1145 {
1146 	int ret;
1147 
1148 	nvmf_default_host = nvmf_host_default();
1149 	if (!nvmf_default_host)
1150 		return -ENOMEM;
1151 
1152 	nvmf_class = class_create(THIS_MODULE, "nvme-fabrics");
1153 	if (IS_ERR(nvmf_class)) {
1154 		pr_err("couldn't register class nvme-fabrics\n");
1155 		ret = PTR_ERR(nvmf_class);
1156 		goto out_free_host;
1157 	}
1158 
1159 	nvmf_device =
1160 		device_create(nvmf_class, NULL, MKDEV(0, 0), NULL, "ctl");
1161 	if (IS_ERR(nvmf_device)) {
1162 		pr_err("couldn't create nvme-fabris device!\n");
1163 		ret = PTR_ERR(nvmf_device);
1164 		goto out_destroy_class;
1165 	}
1166 
1167 	ret = misc_register(&nvmf_misc);
1168 	if (ret) {
1169 		pr_err("couldn't register misc device: %d\n", ret);
1170 		goto out_destroy_device;
1171 	}
1172 
1173 	return 0;
1174 
1175 out_destroy_device:
1176 	device_destroy(nvmf_class, MKDEV(0, 0));
1177 out_destroy_class:
1178 	class_destroy(nvmf_class);
1179 out_free_host:
1180 	nvmf_host_put(nvmf_default_host);
1181 	return ret;
1182 }
1183 
1184 static void __exit nvmf_exit(void)
1185 {
1186 	misc_deregister(&nvmf_misc);
1187 	device_destroy(nvmf_class, MKDEV(0, 0));
1188 	class_destroy(nvmf_class);
1189 	nvmf_host_put(nvmf_default_host);
1190 
1191 	BUILD_BUG_ON(sizeof(struct nvmf_common_command) != 64);
1192 	BUILD_BUG_ON(sizeof(struct nvmf_connect_command) != 64);
1193 	BUILD_BUG_ON(sizeof(struct nvmf_property_get_command) != 64);
1194 	BUILD_BUG_ON(sizeof(struct nvmf_property_set_command) != 64);
1195 	BUILD_BUG_ON(sizeof(struct nvmf_connect_data) != 1024);
1196 }
1197 
1198 MODULE_LICENSE("GPL v2");
1199 
1200 module_init(nvmf_init);
1201 module_exit(nvmf_exit);
1202