xref: /illumos-gate/usr/src/uts/common/io/eedev/eedev.c (revision 04a1c1a11476a9a84da46c1937024cde61ddb850)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2025 Oxide Computer Company
14  */
15 
16 /*
17  * eedev(4D): EEPROM support module.
18  *
19  * This module exists to make it easier to read and write various eeprom style
20  * devices and have a single implementation for the surrounding character glue.
21  * It provides and exposes the minor nodes.
22  *
23  * --------------------------
24  * Driver and User Interfaces
25  * --------------------------
26  *
27  * Drivers can register a number of logical devices by creating an eedev_hdl_t
28  * and registering it by calling eedev_create(). Once created, the eedev driver
29  * creates a corresponding minor node which will show up under /dev/eeprom as
30  * /dev/eeprom/<driver>/<instance>/<name>. When the driver doesn't provide a
31  * name, "eeprom" is used. The way that this name is communicated to userland
32  * and understood by the devfsadm plugin is to use a ':' delineated minor node
33  * name. So when we create a node we use "<driver>:<instance>:<name>".
34  *
35  * As part of registering with us, the driver provides a bunch of information
36  * about the device in question including:
37  *
38  *  1. The overall capacity of the device. We set the 64-bit DDI "Size" property
39  *     with this information. This is used by specfs in its VOP_GETATTR()
40  *     implementation allowing userland to see the size of the device.
41  *
42  *  2. The number of bytes per-device logical address. Consider a 512-byte
43  *     EEPROM. You can think of this generally as 512 1-byte registers. Some
44  *     devices may phrase this as 256 2-byte registers. This is not the same as
45  *     a device's page size. A different way to put it is that this is the
46  *     device's smallest read and write it can perform.
47  *
48  *  3. The device also gives us page segment information. This segmentation
49  *     information is used to make sure that I/O requests don't cross device
50  *     boundaries that would cause the device to read/write from the start of
51  *     the segment. For example, a device with a 32-byte page would can only
52  *     write bytes in a single 32-byte aligned region at a time. Exceeding this
53  *     leads it to continue writing at the start of the 32-byte region.
54  *     Something most folks don't want!
55  *
56  *  4. The device gives us information about the maximum amount of read and
57  *     write I/O it can do at any time. This may be a property of the device or
58  *     the property of the I/O bus that it's operating on. For example, an I2C
59  *     based EEPROM is going to be constrained by its controller. Some SMBus
60  *     controllers will limit the I/O to up to 32-bytes.
61  *
62  * When issuing a read() or write() request, the framework will inherently limit
63  * the amount of I/O to be in accordance with this. In addition, today it always
64  * returns short reads and short writes. This is that case where read(2) or
65  * write(2) say they can return less data than was requested! We mostly do that
66  * for simplicity at our end today.
67  *
68  * Finally, when it comes to device interfaces, we explicitly don't guarantee
69  * any serialization to the device. We leave that at the discretion of the
70  * device implementer.
71  *
72  * ----------------
73  * Device Lifetimes
74  * ----------------
75  *
76  * A side effect of the eedev pseudo-device owning the minor nodes is that it
77  * means there is no way for us to correlate a call to detach() eedev(4D) with
78  * that of a driver providing the EEPROM. Effectively, we end up implementing
79  * the same logic as /devices. When a device detaches, we don't actually remove
80  * the minor node. It is only when the driver is actually removed from the
81  * system that we do.
82  *
83  * Instead, when someone calls open(2) on a device, we will ensure that the
84  * provider module is loaded and that it has recreated its existing minor node.
85  * Once that happens, as long as someone holds the eedev minor open we will have
86  * a corresponding NDI hold on the device, ensuring it cannot disappear until
87  * close(2) has been called. There is one bit of trickiness to be aware of: the
88  * DDI will call open(2) multiple times, but it will only call close(2) at the
89  * final time. In general, this is what we want, but it means that we don't
90  * actually track the number of open(2) calls today because everything is using
91  * the same minor. If we were to use cloning opens, then that would generally
92  * change.
93  *
94  * This leads to the following overall locking rules:
95  *
96  *  1. Entering the NDI must be done while no other locks are held. It is
97  *     acceptable to put an NDI hold on a parent and then exit the NDI devi
98  *     lock.
99  *
100  *  2. No NDI operations should be performed while holding the eedev mutex. In
101  *     particular, alls to ndi_devi_config_one() (or others) should not be
102  *     performed while holding locks.
103  *
104  *  3. The eedev.eedev_mutex should be the first mutex taken in the driver and
105  *     used when looking at information about the overall state of the devices
106  *     and the corresponding list_t structures. Only one thread should attempt
107  *     to call into and bring a driver back to life.
108  *
109  *  4. When calling into devices to perform read() and write() operations, one
110  *     should not hold any locks. In general, only read-only information should
111  *     be required in those operations.
112  *
113  * -----------
114  * Future Work
115  * -----------
116  *
117  * There are a few areas and things that the eedev framework doesn't handle
118  * today, that we think would be good for the future:
119  *
120  *  1. It would be nice to have support for FEXCL. We would implement this by
121  *     using a cloning open. If we do this, we should also add a corresponding
122  *     ioctl() to allow for similar behavior at non-open time that would go
123  *     alongside this like we have for other devices with transactions.
124  *
125  *  2. Today we don't plumb through any information about device security
126  *     features. Many devices support some form of write-protection. It would be
127  *     good to plumb this through and allow it be set from a series of ioctls
128  *     and to have a corresponding user command.
129  *
130  *  3. It may end up making sense to revisit the constraints that we have around
131  *     alignment and not performing read-modify-write if we have devices with a
132  *     multi-byte read/write granularity.
133  *
134  *  4. Similarly, based on experience from additional consumers, we may need to
135  *     revisit the fact that we don't try to perform I/O to completion. In
136  *     general, these devices are on the smaller end (< 1 MiB) and are not
137  *     designed assuming massive I/O, so this is hopefully not a problem.
138  */
139 
140 #include <sys/ddi.h>
141 #include <sys/sunddi.h>
142 #include <sys/stat.h>
143 #include <sys/open.h>
144 #include <sys/types.h>
145 #include <sys/file.h>
146 #include <sys/conf.h>
147 #include <sys/avl.h>
148 #include <sys/stddef.h>
149 #include <sys/sysmacros.h>
150 #include <sys/id_space.h>
151 #include <sys/mkdev.h>
152 #include <sys/sunndi.h>
153 #include <sys/esunddi.h>
154 #include <sys/ctype.h>
155 #include <sys/fs/dv_node.h>
156 
157 #include "eedev.h"
158 
159 /*
160  * Minimum and maximum minors. These currently are designed to cover devices
161  * which keep us in the range of [1, MAXMIN32]. 0 is reserved for a control
162  * interface if it's ever required. If we end up with minors that cover user
163  * state, then we should create a second range starting at MAXMIN32 + 1 and
164  * covering a generous number of entries.
165  */
166 #define	EEDEV_MINOR_MIN	1
167 #define	EEDEV_MINOR_MAX	MAXMIN32
168 
169 typedef struct eedev {
170 	kmutex_t eedev_mutex;
171 	list_t eedev_list;
172 	list_t eedev_dips;
173 	id_space_t *eedev_idspace;
174 	dev_info_t *eedev_dip;
175 } eedev_t;
176 
177 static eedev_t eedev;
178 
179 typedef enum {
180 	/*
181 	 * Indicates that the device should be treated as read-only.
182 	 */
183 	EEDEV_F_READ_ONLY	= 1 << 0,
184 	/*
185 	 * Indicates that the handle has allocated an id_t for a minor node.
186 	 */
187 	EEDEV_F_ID_ALLOC	= 1 << 1,
188 	/*
189 	 * Indicates that the appropriate properties have been set on the minor.
190 	 */
191 	EEDEV_F_MINOR_PROPS	= 1 << 2,
192 	/*
193 	 * Indicates that the actual minor node has been created.
194 	 */
195 	EEDEV_F_MINOR_VALID	= 1 << 3,
196 	/*
197 	 * Indicates that someone is trying to actively check / validate that
198 	 * this sensor is usable.
199 	 */
200 	EEDEV_F_BUSY		= 1 << 4,
201 	/*
202 	 * This indicates that the eeprom driver is currently usable.
203 	 * Effectively that no one has called detach on the provider driver yet.
204 	 */
205 	EEDEV_F_USABLE		= 1 << 5,
206 	/*
207 	 * Indicates that this eeprom has a hold on its dev_info_t. This is set
208 	 * between an open() and close(). Only a single open can set this.
209 	 */
210 	EEDEV_F_HELD		= 1 << 6
211 } eedev_flags_t;
212 
213 typedef enum {
214 	EEDEV_DIP_F_REMOVED	= 1 << 0
215 } eedev_dip_flags_t;
216 
217 typedef struct eedev_dip {
218 	list_node_t ed_link;
219 	dev_info_t *ed_dip;
220 	char *ed_ua;
221 	eedev_dip_flags_t ed_flags;
222 	ddi_unbind_callback_t ed_cb;
223 	list_t ed_devs;
224 } eedev_dip_t;
225 
226 struct eedev_hdl {
227 	list_node_t eh_link;
228 	list_node_t eh_dip_link;
229 	eedev_dip_t *eh_dip;
230 	kcondvar_t eh_cv;
231 	void *eh_driver;
232 	char *eh_name;
233 	const eedev_ops_t *eh_ops;
234 	id_t eh_minor;
235 	dev_t eh_dev;
236 	uint32_t eh_size;
237 	uint32_t eh_seg;
238 	uint32_t eh_read_gran;
239 	uint32_t eh_write_gran;
240 	uint32_t eh_max_read;
241 	uint32_t eh_max_write;
242 	eedev_flags_t eh_flags;
243 	uint32_t eh_nwaiters;
244 };
245 
246 /*
247  * A token number of maximum bytes to read/write in one go to a device if it
248  * doesn't give us something more specific. This number was mostly a guess based
249  * on common I2C device sizes and the resulting bus utilization time they
250  * implied.
251  *
252  * This value will want to be revisited if SPI devices use this framework. In
253  * general, they'd want to be able to at least send a full erased page in a
254  * single I/O. They also have a different bus utilization as compared to a 100
255  * kHz I2C standard speed, those devices usually run at least at 10 MHz if not
256  * faster.
257  */
258 static uint32_t eedev_default_max_io = 128;
259 
260 static eedev_dip_t *
eedev_dip_find(dev_info_t * dip)261 eedev_dip_find(dev_info_t *dip)
262 {
263 	VERIFY(MUTEX_HELD(&eedev.eedev_mutex));
264 	for (eedev_dip_t *e = list_head(&eedev.eedev_dips); e != NULL;
265 	    e = list_next(&eedev.eedev_dips, e)) {
266 		if (dip == e->ed_dip) {
267 			return (e);
268 		}
269 	}
270 
271 	return (NULL);
272 }
273 
274 /*
275  * This is used in the various operations to look up an existing eedev based on
276  * its dev_t. This is meant to be used by everything other than open(9E), as it
277  * will assume that a hold already exists.
278  */
279 static eedev_hdl_t *
eedev_lookup_by_id(dev_t dev)280 eedev_lookup_by_id(dev_t dev)
281 {
282 	mutex_enter(&eedev.eedev_mutex);
283 	for (eedev_hdl_t *h = list_head(&eedev.eedev_list); h != NULL;
284 	    h = list_next(&eedev.eedev_list, h)) {
285 		if (h->eh_dev != dev)
286 			continue;
287 
288 		if ((h->eh_flags & EEDEV_F_HELD) == 0)
289 			break;
290 
291 		mutex_exit(&eedev.eedev_mutex);
292 		return (h);
293 	}
294 
295 	mutex_exit(&eedev.eedev_mutex);
296 	return (NULL);
297 }
298 
299 /*
300  * We are called here by one or more threads that are trying to open a specific
301  * eeprom. When an eeprom is opened, we may need to cons the provider driver
302  * back into existence. We serialize opens, but also have to drop all of our
303  * locks along the way.
304  *
305  * While multiple threads can call open() on the same minor, we will only
306  * receive a single close. Therefore, we also need to make sure that we don't go
307  * overboard and put too many references on.
308  */
309 static int
eedev_hold_by_id(dev_t dev)310 eedev_hold_by_id(dev_t dev)
311 {
312 	eedev_hdl_t *hdl = NULL;
313 
314 	mutex_enter(&eedev.eedev_mutex);
315 	for (eedev_hdl_t *h = list_head(&eedev.eedev_list); h != NULL;
316 	    h = list_next(&eedev.eedev_list, h)) {
317 		if (h->eh_dev == dev) {
318 			hdl = h;
319 			break;
320 		}
321 	}
322 
323 	if (hdl == NULL) {
324 		mutex_exit(&eedev.eedev_mutex);
325 		return (ESTALE);
326 	}
327 
328 restart:
329 	if ((hdl->eh_dip->ed_flags & EEDEV_DIP_F_REMOVED) != 0) {
330 		mutex_exit(&eedev.eedev_mutex);
331 		return (ESTALE);
332 	}
333 
334 	/*
335 	 * We have our eeprom. If it's already held, then there's nothing more
336 	 * for us to do. The kernel guarantees that it won't call close() on
337 	 * this dev_t while open() is running. If it's not both held and usable
338 	 * then there is work to do.
339 	 */
340 	const eedev_flags_t targ = EEDEV_F_HELD | EEDEV_F_USABLE;
341 	if ((hdl->eh_flags & targ) == targ) {
342 		VERIFY0(hdl->eh_flags & EEDEV_F_BUSY);
343 		mutex_exit(&eedev.eedev_mutex);
344 		return (0);
345 	}
346 
347 	/*
348 	 * This eeprom isn't both held and usable right now. That means we would
349 	 * like to hold it and potentially reattach the provider, which means
350 	 * entering its parent NDI locks. We will indicate that we're trying to
351 	 * use this node and serialize this.
352 	 */
353 	if ((hdl->eh_flags & EEDEV_F_BUSY) != 0) {
354 		hdl->eh_nwaiters++;
355 		while ((hdl->eh_flags & EEDEV_F_BUSY) != 0) {
356 			int cv = cv_wait_sig(&hdl->eh_cv, &eedev.eedev_mutex);
357 			if (cv == 0) {
358 				hdl->eh_nwaiters--;
359 				cv_broadcast(&hdl->eh_cv);
360 				mutex_exit(&eedev.eedev_mutex);
361 				return (EINTR);
362 			}
363 		}
364 		hdl->eh_nwaiters--;
365 		goto restart;
366 	}
367 
368 	/*
369 	 * We technically have ownership of this node now. Set that we're trying
370 	 * to be the ones to hold it.
371 	 */
372 	hdl->eh_flags |= EEDEV_F_BUSY;
373 	dev_info_t *pdip = ddi_get_parent(hdl->eh_dip->ed_dip);
374 	mutex_exit(&eedev.eedev_mutex);
375 
376 	ndi_devi_enter(pdip);
377 	e_ddi_hold_devi(hdl->eh_dip->ed_dip);
378 	ndi_devi_exit(pdip);
379 
380 	/*
381 	 * Now that we have an NDI hold, check if this is valid or not. There's
382 	 * a chance we were racing with a detach.
383 	 */
384 	mutex_enter(&eedev.eedev_mutex);
385 	hdl->eh_flags |= EEDEV_F_HELD;
386 
387 	if ((hdl->eh_dip->ed_flags & EEDEV_DIP_F_REMOVED) != 0) {
388 		hdl->eh_flags &= ~(EEDEV_F_HELD | EEDEV_F_BUSY);
389 		cv_broadcast(&hdl->eh_cv);
390 		mutex_exit(&eedev.eedev_mutex);
391 		ddi_release_devi(hdl->eh_dip->ed_dip);
392 		return (ESTALE);
393 	}
394 
395 	/*
396 	 * If it's not usable, try to configure the driver. This requires us to
397 	 * drop the lock again, and thus have another chance of a race
398 	 * condition.
399 	 */
400 	if ((hdl->eh_dip->ed_flags & EEDEV_F_USABLE) == 0) {
401 		dev_info_t *child;
402 		mutex_exit(&eedev.eedev_mutex);
403 		if (ndi_devi_config_one(pdip, hdl->eh_dip->ed_ua, &child,
404 		    NDI_CONFIG | NDI_ONLINE_ATTACH | NDI_NO_EVENT) ==
405 		    NDI_SUCCESS) {
406 			/*
407 			 * When this is successful, a hold on the child is
408 			 * placed. We already have one. Release this one.
409 			 */
410 			ddi_release_devi(child);
411 		}
412 		mutex_enter(&eedev.eedev_mutex);
413 
414 		if ((hdl->eh_dip->ed_flags & EEDEV_DIP_F_REMOVED) != 0 ||
415 		    (hdl->eh_flags & EEDEV_F_USABLE) == 0) {
416 			hdl->eh_flags &= ~(EEDEV_F_HELD | EEDEV_F_BUSY);
417 			cv_broadcast(&hdl->eh_cv);
418 			mutex_exit(&eedev.eedev_mutex);
419 			ddi_release_devi(hdl->eh_dip->ed_dip);
420 			return (ESTALE);
421 		}
422 	}
423 
424 	hdl->eh_flags &= ~EEDEV_F_BUSY;
425 	cv_broadcast(&hdl->eh_cv);
426 	VERIFY3U(hdl->eh_flags & targ, ==, targ);
427 	mutex_exit(&eedev.eedev_mutex);
428 
429 	return (0);
430 }
431 
432 static void
eedev_dip_free(eedev_dip_t * e)433 eedev_dip_free(eedev_dip_t *e)
434 {
435 	list_destroy(&e->ed_devs);
436 	strfree(e->ed_ua);
437 	kmem_free(e, sizeof (eedev_dip_t));
438 }
439 
440 static void
eedev_free(eedev_hdl_t * eh)441 eedev_free(eedev_hdl_t *eh)
442 {
443 	if ((eh->eh_flags & EEDEV_F_MINOR_VALID) != 0) {
444 		ddi_remove_minor_node(eedev.eedev_dip, eh->eh_name);
445 		eh->eh_flags &= ~EEDEV_F_MINOR_VALID;
446 	}
447 
448 	if ((eh->eh_flags & EEDEV_F_MINOR_PROPS) != 0) {
449 		(void) ddi_prop_remove(eh->eh_dev, eedev.eedev_dip, "Size");
450 		eh->eh_flags &= ~EEDEV_F_MINOR_PROPS;
451 	}
452 
453 	if ((eh->eh_flags & EEDEV_F_ID_ALLOC) != 0) {
454 		id_free(eedev.eedev_idspace, eh->eh_minor);
455 	}
456 
457 	strfree(eh->eh_name);
458 	cv_destroy(&eh->eh_cv);
459 	kmem_free(eh, sizeof (eedev_hdl_t));
460 }
461 
462 void
eedev_fini(eedev_hdl_t * eh)463 eedev_fini(eedev_hdl_t *eh)
464 {
465 	if (eh == NULL) {
466 		return;
467 	}
468 
469 	mutex_enter(&eedev.eedev_mutex);
470 	VERIFY0(eh->eh_flags & EEDEV_F_HELD);
471 	VERIFY0(eh->eh_flags & EEDEV_F_BUSY);
472 	VERIFY3U(eh->eh_flags & EEDEV_F_USABLE, !=, 0);
473 	eh->eh_flags &= ~EEDEV_F_USABLE;
474 	eh->eh_ops = NULL;
475 	eh->eh_driver = NULL;
476 	mutex_exit(&eedev.eedev_mutex);
477 }
478 
479 static void
eedev_dip_unbind_taskq(void * arg)480 eedev_dip_unbind_taskq(void *arg)
481 {
482 	eedev_hdl_t *hdl;
483 	eedev_dip_t *ed = arg;
484 
485 	mutex_enter(&eedev.eedev_mutex);
486 	while ((hdl = list_remove_head(&ed->ed_devs)) != NULL) {
487 		while ((hdl->eh_flags & EEDEV_F_BUSY) != 0 ||
488 		    hdl->eh_nwaiters > 0) {
489 			cv_wait(&hdl->eh_cv, &eedev.eedev_mutex);
490 		}
491 		eedev_free(hdl);
492 	}
493 
494 	/*
495 	 * Ensure that any stale minors that we've created have been removed.
496 	 */
497 	(void) devfs_clean(ddi_get_parent(eedev.eedev_dip), NULL, 0);
498 	eedev_dip_free(ed);
499 	mutex_exit(&eedev.eedev_mutex);
500 }
501 
502 /*
503  * We're being called back because a node is being destroyed. Set that this is
504  * being removed, remove them from our global lists, and then dispatch a taskq
505  * to finish clean up outside of the actual NDI context.
506  */
507 static void
eedev_dip_unbind_cb(void * arg,dev_info_t * dip)508 eedev_dip_unbind_cb(void *arg, dev_info_t *dip)
509 {
510 	eedev_dip_t *ed = arg;
511 
512 	mutex_enter(&eedev.eedev_mutex);
513 	ed->ed_flags |= EEDEV_DIP_F_REMOVED;
514 	list_remove(&eedev.eedev_dips, ed);
515 
516 	for (eedev_hdl_t *h = list_head(&ed->ed_devs); h != NULL;
517 	    h = list_next(&ed->ed_devs, h)) {
518 		list_remove(&eedev.eedev_list, h);
519 	}
520 	mutex_exit(&eedev.eedev_mutex);
521 
522 	(void) taskq_dispatch(system_taskq, eedev_dip_unbind_taskq, ed,
523 	    TQ_SLEEP);
524 }
525 
526 static eedev_dip_t *
eedev_dip_create(dev_info_t * dip)527 eedev_dip_create(dev_info_t *dip)
528 {
529 	eedev_dip_t *e;
530 
531 	e = kmem_zalloc(sizeof (eedev_dip_t), KM_SLEEP);
532 	e->ed_dip = dip;
533 	e->ed_ua = kmem_asprintf("%s@%s", ddi_node_name(dip),
534 	    ddi_get_name_addr(dip));
535 	e->ed_cb.ddiub_cb = eedev_dip_unbind_cb;
536 	e->ed_cb.ddiub_arg = e;
537 	list_create(&e->ed_devs, sizeof (eedev_hdl_t),
538 	    offsetof(eedev_hdl_t, eh_dip_link));
539 	e_ddi_register_unbind_callback(dip, &e->ed_cb);
540 
541 	return (e);
542 }
543 
544 static bool
eedev_minor_create(eedev_hdl_t * hdl)545 eedev_minor_create(eedev_hdl_t *hdl)
546 {
547 	VERIFY(MUTEX_HELD(&eedev.eedev_mutex));
548 
549 	hdl->eh_dev = makedevice(ddi_driver_major(eedev.eedev_dip),
550 	    hdl->eh_minor);
551 
552 	if ((hdl->eh_flags & EEDEV_F_MINOR_PROPS) == 0) {
553 		if (ddi_prop_update_int64(hdl->eh_dev, eedev.eedev_dip, "Size",
554 		    hdl->eh_size) != DDI_PROP_SUCCESS) {
555 			dev_err(eedev.eedev_dip, CE_WARN, "!failed to set Size "
556 			    "property for minor %s (%d) for %s%d", hdl->eh_name,
557 			    hdl->eh_minor, ddi_driver_name(hdl->eh_dip->ed_dip),
558 			    ddi_get_instance(hdl->eh_dip->ed_dip));
559 			return (false);
560 		}
561 		hdl->eh_flags |= EEDEV_F_MINOR_PROPS;
562 	}
563 
564 	if ((hdl->eh_flags & EEDEV_F_MINOR_VALID) == 0) {
565 		if (ddi_create_minor_node(eedev.eedev_dip, hdl->eh_name,
566 		    S_IFCHR, hdl->eh_minor, DDI_NT_EEPROM, 0) != DDI_SUCCESS) {
567 			dev_err(eedev.eedev_dip, CE_WARN, "!failed to create "
568 			    "eeprom minor %s (%d) for %s%d", hdl->eh_name,
569 			    hdl->eh_minor, ddi_driver_name(hdl->eh_dip->ed_dip),
570 			    ddi_get_instance(hdl->eh_dip->ed_dip));
571 			return (false);
572 		}
573 	}
574 
575 	hdl->eh_flags |= EEDEV_F_MINOR_VALID;
576 	return (true);
577 }
578 
579 int
eedev_create(const eedev_reg_t * reg,eedev_hdl_t ** hdlp)580 eedev_create(const eedev_reg_t *reg, eedev_hdl_t **hdlp)
581 {
582 	eedev_hdl_t *hdl;
583 	eedev_dip_t *dip;
584 	char *name;
585 
586 	if (reg->ereg_vers != EEDEV_REG_VERS0) {
587 		return (ENOTSUP);
588 	}
589 
590 	if (reg->ereg_size == 0 || reg->ereg_dip == NULL ||
591 	    reg->ereg_ops == NULL || reg->ereg_ops->eo_read == NULL) {
592 		return (EINVAL);
593 	}
594 
595 	if (!reg->ereg_ro && reg->ereg_ops->eo_write == NULL) {
596 		return (EINVAL);
597 	}
598 
599 	if (reg->ereg_seg > reg->ereg_size ||
600 	    reg->ereg_read_gran > reg->ereg_size ||
601 	    reg->ereg_write_gran > reg->ereg_size) {
602 		return (EINVAL);
603 	}
604 
605 	if (reg->ereg_name != NULL) {
606 		size_t len = strnlen(reg->ereg_name, EEDEV_NAME_MAX);
607 		if (len >= EEDEV_NAME_MAX || len == 0) {
608 			return (EINVAL);
609 		}
610 
611 		for (size_t i = 0; i < len; i++) {
612 			if (!ISALNUM(reg->ereg_name[i])) {
613 				return (EINVAL);
614 			}
615 		}
616 	}
617 
618 	mutex_enter(&eedev.eedev_mutex);
619 
620 	/*
621 	 * Make sure the dip tracking this exists so we can bring this device
622 	 * back if required.
623 	 */
624 	dip = eedev_dip_find(reg->ereg_dip);
625 	if (dip == NULL) {
626 		dip = eedev_dip_create(reg->ereg_dip);
627 		list_insert_tail(&eedev.eedev_dips, dip);
628 	}
629 
630 	if (reg->ereg_name != NULL) {
631 		name = kmem_asprintf("%s:%d:%s", ddi_driver_name(reg->ereg_dip),
632 		    ddi_get_instance(reg->ereg_dip), reg->ereg_name);
633 	} else {
634 		name = kmem_asprintf("%s:%d:eeprom",
635 		    ddi_driver_name(reg->ereg_dip),
636 		    ddi_get_instance(reg->ereg_dip));
637 	}
638 
639 	/*
640 	 * Check to see if this handle is something that's come back from the
641 	 * first time it was created because it was reattached.
642 	 */
643 	hdl = NULL;
644 	for (eedev_hdl_t *h = list_head(&dip->ed_devs); h != NULL;
645 	    h = list_next(&dip->ed_devs, h)) {
646 		if (strcmp(h->eh_name, name) == 0) {
647 			hdl = h;
648 			break;
649 		}
650 	}
651 
652 	if (hdl != NULL) {
653 		VERIFY0(hdl->eh_flags & EEDEV_F_USABLE);
654 
655 		strfree(name);
656 		name = NULL;
657 		hdl->eh_ops = reg->ereg_ops;
658 		hdl->eh_driver = reg->ereg_driver;
659 
660 		VERIFY3U(hdl->eh_size, ==, reg->ereg_size);
661 		VERIFY3U(hdl->eh_seg, ==, reg->ereg_seg);
662 		VERIFY3U(hdl->eh_read_gran, ==, reg->ereg_read_gran);
663 		VERIFY3U(hdl->eh_write_gran, ==, reg->ereg_write_gran);
664 		if (reg->ereg_max_read != 0) {
665 			VERIFY3U(hdl->eh_max_read, ==, reg->ereg_max_read);
666 		}
667 
668 		if (reg->ereg_max_write != 0) {
669 			VERIFY3U(hdl->eh_max_write, ==, reg->ereg_max_write);
670 		}
671 	} else {
672 		hdl = kmem_zalloc(sizeof (eedev_hdl_t), KM_SLEEP);
673 		cv_init(&hdl->eh_cv, NULL, CV_DRIVER, NULL);
674 		hdl->eh_dip = dip;
675 		hdl->eh_driver = reg->ereg_driver;
676 		hdl->eh_name = name;
677 		name = NULL;
678 		hdl->eh_ops = reg->ereg_ops;
679 		hdl->eh_minor = id_alloc_nosleep(eedev.eedev_idspace);
680 		if (hdl->eh_minor == -1) {
681 			eedev_free(hdl);
682 			return (EOVERFLOW);
683 		}
684 		hdl->eh_flags |= EEDEV_F_ID_ALLOC;
685 
686 		hdl->eh_ops = reg->ereg_ops;
687 		hdl->eh_driver = reg->ereg_driver;
688 		hdl->eh_size = reg->ereg_size;
689 		hdl->eh_seg = reg->ereg_seg;
690 		hdl->eh_read_gran = reg->ereg_read_gran;
691 		hdl->eh_write_gran = reg->ereg_write_gran;
692 		hdl->eh_max_read = reg->ereg_max_read;
693 		hdl->eh_max_write = reg->ereg_max_write;
694 		if (hdl->eh_max_read == 0) {
695 			hdl->eh_max_read = MIN(eedev_default_max_io,
696 			    hdl->eh_size);
697 		}
698 
699 		if (hdl->eh_max_write == 0) {
700 			hdl->eh_max_write = MIN(eedev_default_max_io,
701 			    hdl->eh_size);
702 		}
703 
704 		if (reg->ereg_ro) {
705 			hdl->eh_flags |= EEDEV_F_READ_ONLY;
706 		}
707 
708 		/*
709 		 * Check to make sure that this name is unique across all
710 		 * devices.
711 		 */
712 		for (eedev_hdl_t *h = list_head(&eedev.eedev_list); h != NULL;
713 		    h = list_next(&eedev.eedev_list, h)) {
714 			if (strcmp(h->eh_name, hdl->eh_name) == 0) {
715 				eedev_free(hdl);
716 				mutex_exit(&eedev.eedev_mutex);
717 				return (EEXIST);
718 			}
719 		}
720 
721 		list_insert_tail(&eedev.eedev_list, hdl);
722 		list_insert_tail(&dip->ed_devs, hdl);
723 	}
724 
725 	/*
726 	 * Because we're being called and created, by definition this is usable
727 	 * in the sense that the operations vector and driver has to be valid.
728 	 */
729 	hdl->eh_flags |= EEDEV_F_USABLE;
730 
731 	if (eedev.eedev_dip != NULL) {
732 		if (!eedev_minor_create(hdl)) {
733 			list_remove(&eedev.eedev_list, hdl);
734 			list_remove(&dip->ed_devs, hdl);
735 			eedev_free(hdl);
736 			mutex_exit(&eedev.eedev_mutex);
737 			return (ENXIO);
738 		}
739 	}
740 	mutex_exit(&eedev.eedev_mutex);
741 
742 	*hdlp = hdl;
743 	return (0);
744 }
745 
746 static int
eedev_open(dev_t * devp,int flag,int otyp,cred_t * credp)747 eedev_open(dev_t *devp, int flag, int otyp, cred_t *credp)
748 {
749 	if (drv_priv(credp) != 0)
750 		return (EPERM);
751 
752 	if (otyp != OTYP_CHR)
753 		return (ENOTSUP);
754 
755 	/*
756 	 * In the future we should perform cloning opens to allow for FEXCL
757 	 * support.
758 	 */
759 	if ((flag & (FNDELAY | FNONBLOCK | FEXCL)) != 0)
760 		return (EINVAL);
761 
762 	if ((flag & (FREAD | FWRITE)) == 0)
763 		return (EINVAL);
764 
765 	/*
766 	 * Establish a hold on this if doesn't already exist.
767 	 */
768 	return (eedev_hold_by_id(*devp));
769 }
770 
771 static int
eedev_read(dev_t dev,struct uio * uio,cred_t * credp)772 eedev_read(dev_t dev, struct uio *uio, cred_t *credp)
773 {
774 	uint32_t page, off, nbytes, end;
775 	eedev_hdl_t *hdl = eedev_lookup_by_id(dev);
776 
777 	if (hdl == NULL)
778 		return (ENXIO);
779 
780 	if ((uio->uio_fmode & FREAD) == 0)
781 		return (EBADF);
782 
783 	if ((uio->uio_fmode & (FNONBLOCK | FNDELAY)) != 0)
784 		return (EINVAL);
785 
786 	/*
787 	 * Determine if this read is aligned. The read granularity
788 	 * basically tells us the units in which the device reads. It
789 	 * must be at least one granularity long and granularity
790 	 * aligned.
791 	 */
792 	if ((uio->uio_offset % hdl->eh_read_gran) != 0 ||
793 	    (uio->uio_resid % hdl->eh_read_gran) != 0) {
794 		return (EINVAL);
795 	}
796 
797 	if (uio->uio_offset >= hdl->eh_size || uio->uio_resid == 0) {
798 		return (0);
799 	}
800 
801 	/*
802 	 * Determine if we have a page segment to consider. Devices that do
803 	 * should not cross that in a single I/O.
804 	 */
805 	if (hdl->eh_seg != 0) {
806 		page = uio->uio_offset / hdl->eh_seg;
807 		off = uio->uio_offset % hdl->eh_seg;
808 		end = (page + 1) * hdl->eh_seg;
809 	} else {
810 		page = 0;
811 		off = uio->uio_offset;
812 		end = hdl->eh_size;
813 	}
814 
815 	/*
816 	 * Determine how many bytes to tell the device to read. This is governed
817 	 * by both how many bytes are left in the device / page region and the
818 	 * device's maximum read I/O size.
819 	 */
820 	nbytes = MIN(uio->uio_resid, end - uio->uio_offset);
821 	nbytes = MIN(nbytes, hdl->eh_max_read);
822 
823 	return (hdl->eh_ops->eo_read(hdl->eh_driver, uio, page, off, nbytes));
824 }
825 
826 static int
eedev_write(dev_t dev,struct uio * uio,cred_t * credp)827 eedev_write(dev_t dev, struct uio *uio, cred_t *credp)
828 {
829 	uint32_t page, off, nbytes, end;
830 	eedev_hdl_t *hdl = eedev_lookup_by_id(dev);
831 
832 	if (hdl == NULL)
833 		return (ENXIO);
834 
835 	if ((uio->uio_fmode & FWRITE) == 0)
836 		return (EBADF);
837 
838 	if ((uio->uio_fmode & (FNONBLOCK | FNDELAY)) != 0)
839 		return (EINVAL);
840 
841 	/*
842 	 * Determine if this write is aligned. The write granularity
843 	 * basically tells us the units in which the device writes. It
844 	 * must be at least one granularity long and granularity
845 	 * aligned.
846 	 */
847 	if ((uio->uio_offset % hdl->eh_write_gran) != 0 ||
848 	    (uio->uio_resid % hdl->eh_write_gran) != 0) {
849 		return (EINVAL);
850 	}
851 
852 	if (uio->uio_offset >= hdl->eh_size || uio->uio_resid <= 0) {
853 		return (EINVAL);
854 	}
855 
856 	/*
857 	 * Determine if we have a page segment to consider. Devices that do
858 	 * should not cross that in a single I/O.
859 	 */
860 	if (hdl->eh_seg != 0) {
861 		page = uio->uio_offset / hdl->eh_seg;
862 		off = uio->uio_offset % hdl->eh_seg;
863 		end = (page + 1) * hdl->eh_seg;
864 	} else {
865 		page = 0;
866 		off = uio->uio_offset;
867 		end = hdl->eh_size;
868 	}
869 
870 	/*
871 	 * Determine how many bytes to tell the device to write. This is
872 	 * governed by both how many bytes are left in the device / page region
873 	 * and the device's maximum write I/O size.
874 	 */
875 	nbytes = MIN(uio->uio_resid, end - uio->uio_offset);
876 	nbytes = MIN(nbytes, hdl->eh_max_write);
877 
878 	return (hdl->eh_ops->eo_write(hdl->eh_driver, uio, page, off, nbytes));
879 }
880 
881 static int
eedev_close(dev_t dev,int flag,int otyp,cred_t * credp)882 eedev_close(dev_t dev, int flag, int otyp, cred_t *credp)
883 {
884 	eedev_hdl_t *hdl;
885 
886 	if (otyp != OTYP_CHR)
887 		return (EINVAL);
888 
889 	hdl = eedev_lookup_by_id(dev);
890 	if (hdl == NULL)
891 		return (ENXIO);
892 
893 	/*
894 	 * If we support FEXCL tagged cloned opens, then we should clean that up
895 	 * here.
896 	 */
897 
898 	/*
899 	 * This releases our hold on the eeprom provider driver. There may be
900 	 * other holds if there is more than one EEPROM here.
901 	 */
902 	mutex_enter(&eedev.eedev_mutex);
903 	VERIFY0(hdl->eh_flags & EEDEV_F_BUSY);
904 	VERIFY3U(hdl->eh_flags & EEDEV_F_HELD, !=, 0);
905 	VERIFY3U(hdl->eh_flags & EEDEV_F_USABLE, !=, 0);
906 	hdl->eh_flags &= ~EEDEV_F_HELD;
907 	mutex_exit(&eedev.eedev_mutex);
908 	ddi_release_devi(hdl->eh_dip->ed_dip);
909 
910 	return (0);
911 }
912 
913 static struct cb_ops eedev_cb_ops = {
914 	.cb_open = eedev_open,
915 	.cb_close = eedev_close,
916 	.cb_strategy = nodev,
917 	.cb_print = nodev,
918 	.cb_dump = nodev,
919 	.cb_read = eedev_read,
920 	.cb_write = eedev_write,
921 	.cb_ioctl = nodev,
922 	.cb_devmap = nodev,
923 	.cb_mmap = nodev,
924 	.cb_segmap = nodev,
925 	.cb_chpoll = nochpoll,
926 	.cb_prop_op = ddi_prop_op,
927 	.cb_flag = D_MP,
928 	.cb_rev = CB_REV,
929 	.cb_aread = nodev,
930 	.cb_awrite = nodev
931 };
932 
933 
934 static int
eedev_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)935 eedev_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
936 {
937 	if (cmd == DDI_RESUME) {
938 		return (DDI_SUCCESS);
939 	} else if (cmd != DDI_ATTACH) {
940 		return (DDI_FAILURE);
941 	}
942 
943 	if (ddi_get_instance(dip) != 0) {
944 		dev_err(dip, CE_WARN, "only a single instance of eedev is "
945 		    "supported");
946 		return (DDI_FAILURE);
947 	}
948 
949 	mutex_enter(&eedev.eedev_mutex);
950 	VERIFY3P(eedev.eedev_dip, ==, NULL);
951 	eedev.eedev_dip = dip;
952 
953 	/*
954 	 * It is possible for devices to have registered prior to us being
955 	 * attached. Specifically, modules that use eedev have a dependency on
956 	 * the module, not on an instance. If they have already called
957 	 * eedev_create(), then they will already be in eedev.eedev_list. We
958 	 * need to go through and create a minor node now.
959 	 */
960 	for (eedev_hdl_t *h = list_head(&eedev.eedev_list); h != NULL;
961 	    h = list_next(&eedev.eedev_list, h)) {
962 		eedev_flags_t need = EEDEV_F_MINOR_PROPS | EEDEV_F_MINOR_VALID;
963 		if ((h->eh_flags & need) != need) {
964 			(void) eedev_minor_create(h);
965 		}
966 	}
967 	mutex_exit(&eedev.eedev_mutex);
968 
969 	return (DDI_SUCCESS);
970 }
971 
972 static int
eedev_getinfo(dev_info_t * dip,ddi_info_cmd_t cmd,void * arg,void ** outp)973 eedev_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **outp)
974 {
975 	switch (cmd) {
976 	case DDI_INFO_DEVT2DEVINFO:
977 		VERIFY3P(eedev.eedev_dip, !=, NULL);
978 		*outp = eedev.eedev_dip;
979 		break;
980 	case DDI_INFO_DEVT2INSTANCE:
981 		VERIFY3P(eedev.eedev_dip, !=, NULL);
982 		*outp = eedev.eedev_dip;
983 		*outp = (void *)(uintptr_t)ddi_get_instance(eedev.eedev_dip);
984 		break;
985 	default:
986 		return (DDI_FAILURE);
987 	}
988 
989 	return (DDI_SUCCESS);
990 }
991 
992 static int
eedev_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)993 eedev_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
994 {
995 	if (cmd == DDI_SUSPEND) {
996 		return (DDI_FAILURE);
997 	} else if (cmd != DDI_DETACH) {
998 		return (DDI_FAILURE);
999 	}
1000 
1001 	VERIFY3P(dip, ==, eedev.eedev_dip);
1002 	mutex_enter(&eedev.eedev_mutex);
1003 	if (list_is_empty(&eedev.eedev_list)) {
1004 		mutex_exit(&eedev.eedev_mutex);
1005 		return (DDI_FAILURE);
1006 	}
1007 
1008 	ddi_remove_minor_node(eedev.eedev_dip, NULL);
1009 	eedev.eedev_dip = NULL;
1010 	mutex_exit(&eedev.eedev_mutex);
1011 
1012 	return (DDI_SUCCESS);
1013 }
1014 
1015 static struct dev_ops eedev_dev_ops = {
1016 	.devo_rev = DEVO_REV,
1017 	.devo_refcnt = 0,
1018 	.devo_getinfo = eedev_getinfo,
1019 	.devo_identify = nulldev,
1020 	.devo_probe = nulldev,
1021 	.devo_attach = eedev_attach,
1022 	.devo_detach = eedev_detach,
1023 	.devo_reset = nodev,
1024 	.devo_quiesce = ddi_quiesce_not_needed,
1025 	.devo_cb_ops = &eedev_cb_ops
1026 };
1027 
1028 static struct modldrv eedev_modldrv = {
1029 	.drv_modops = &mod_driverops,
1030 	.drv_linkinfo = "EEPROM support module",
1031 	.drv_dev_ops = &eedev_dev_ops
1032 };
1033 
1034 static struct modlinkage eedev_modlinkage = {
1035 	.ml_rev = MODREV_1,
1036 	.ml_linkage = { &eedev_modldrv, NULL }
1037 };
1038 
1039 static int
eedev_mod_init(void)1040 eedev_mod_init(void)
1041 {
1042 	eedev.eedev_idspace = id_space_create("eedev_minors", EEDEV_MINOR_MIN,
1043 	    EEDEV_MINOR_MAX);
1044 	if (eedev.eedev_idspace == NULL) {
1045 		return (ENOMEM);
1046 	}
1047 	mutex_init(&eedev.eedev_mutex, NULL, MUTEX_DRIVER, NULL);
1048 	list_create(&eedev.eedev_list, sizeof (eedev_hdl_t),
1049 	    offsetof(eedev_hdl_t, eh_link));
1050 	list_create(&eedev.eedev_dips, sizeof (eedev_dip_t),
1051 	    offsetof(eedev_dip_t, ed_link));
1052 
1053 	return (0);
1054 }
1055 
1056 static void
eedev_mod_fini(void)1057 eedev_mod_fini(void)
1058 {
1059 	list_destroy(&eedev.eedev_dips);
1060 	list_destroy(&eedev.eedev_list);
1061 	mutex_destroy(&eedev.eedev_mutex);
1062 	id_space_destroy(eedev.eedev_idspace);
1063 }
1064 
1065 int
_init(void)1066 _init(void)
1067 {
1068 	int ret;
1069 
1070 	if ((ret = eedev_mod_init()) != 0) {
1071 		return (ret);
1072 	}
1073 
1074 	if ((ret = mod_install(&eedev_modlinkage)) != 0) {
1075 		eedev_mod_fini();
1076 	}
1077 
1078 	return (ret);
1079 }
1080 
1081 int
_info(struct modinfo * modinfop)1082 _info(struct modinfo *modinfop)
1083 {
1084 	return (mod_info(&eedev_modlinkage, modinfop));
1085 }
1086 
1087 int
_fini(void)1088 _fini(void)
1089 {
1090 	int ret;
1091 
1092 	if ((ret = mod_remove(&eedev_modlinkage)) == 0) {
1093 		eedev_mod_fini();
1094 	}
1095 
1096 	return (ret);
1097 }
1098