xref: /linux/drivers/uio/uio_hv_generic.c (revision c26f4fbd58375bd6ef74f95eb73d61762ad97c59)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * uio_hv_generic - generic UIO driver for VMBus
4  *
5  * Copyright (c) 2013-2016 Brocade Communications Systems, Inc.
6  * Copyright (c) 2016, Microsoft Corporation.
7  *
8  * Since the driver does not declare any device ids, you must allocate
9  * id and bind the device to the driver yourself.  For example:
10  *
11  * Associate Network GUID with UIO device
12  * # echo "f8615163-df3e-46c5-913f-f2d2f965ed0e" \
13  *    > /sys/bus/vmbus/drivers/uio_hv_generic/new_id
14  * Then rebind
15  * # echo -n "ed963694-e847-4b2a-85af-bc9cfc11d6f3" \
16  *    > /sys/bus/vmbus/drivers/hv_netvsc/unbind
17  * # echo -n "ed963694-e847-4b2a-85af-bc9cfc11d6f3" \
18  *    > /sys/bus/vmbus/drivers/uio_hv_generic/bind
19  */
20 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
21 
22 #include <linux/device.h>
23 #include <linux/kernel.h>
24 #include <linux/module.h>
25 #include <linux/uio_driver.h>
26 #include <linux/netdevice.h>
27 #include <linux/if_ether.h>
28 #include <linux/skbuff.h>
29 #include <linux/hyperv.h>
30 #include <linux/vmalloc.h>
31 #include <linux/slab.h>
32 
33 #include "../hv/hyperv_vmbus.h"
34 
35 #define DRIVER_VERSION	"0.02.1"
36 #define DRIVER_AUTHOR	"Stephen Hemminger <sthemmin at microsoft.com>"
37 #define DRIVER_DESC	"Generic UIO driver for VMBus devices"
38 
39 #define SEND_BUFFER_SIZE (16 * 1024 * 1024)
40 #define RECV_BUFFER_SIZE (31 * 1024 * 1024)
41 
42 /*
43  * List of resources to be mapped to user space
44  * can be extended up to MAX_UIO_MAPS(5) items
45  */
46 enum hv_uio_map {
47 	TXRX_RING_MAP = 0,
48 	INT_PAGE_MAP,
49 	MON_PAGE_MAP,
50 	RECV_BUF_MAP,
51 	SEND_BUF_MAP
52 };
53 
54 struct hv_uio_private_data {
55 	struct uio_info info;
56 	struct hv_device *device;
57 	atomic_t refcnt;
58 
59 	void	*recv_buf;
60 	struct vmbus_gpadl recv_gpadl;
61 	char	recv_name[32];	/* "recv_4294967295" */
62 
63 	void	*send_buf;
64 	struct vmbus_gpadl send_gpadl;
65 	char	send_name[32];
66 };
67 
set_event(struct vmbus_channel * channel,s32 irq_state)68 static void set_event(struct vmbus_channel *channel, s32 irq_state)
69 {
70 	channel->inbound.ring_buffer->interrupt_mask = !irq_state;
71 	if (!channel->offermsg.monitor_allocated && irq_state) {
72 		/* MB is needed for host to see the interrupt mask first */
73 		virt_mb();
74 		vmbus_set_event(channel);
75 	}
76 }
77 
78 /*
79  * This is the irqcontrol callback to be registered to uio_info.
80  * It can be used to disable/enable interrupt from user space processes.
81  *
82  * @param info
83  *  pointer to uio_info.
84  * @param irq_state
85  *  state value. 1 to enable interrupt, 0 to disable interrupt.
86  */
87 static int
hv_uio_irqcontrol(struct uio_info * info,s32 irq_state)88 hv_uio_irqcontrol(struct uio_info *info, s32 irq_state)
89 {
90 	struct hv_uio_private_data *pdata = info->priv;
91 	struct hv_device *dev = pdata->device;
92 	struct vmbus_channel *primary, *sc;
93 
94 	primary = dev->channel;
95 	set_event(primary, irq_state);
96 
97 	mutex_lock(&vmbus_connection.channel_mutex);
98 	list_for_each_entry(sc, &primary->sc_list, sc_list)
99 		set_event(sc, irq_state);
100 	mutex_unlock(&vmbus_connection.channel_mutex);
101 
102 	return 0;
103 }
104 
105 /*
106  * Callback from vmbus_event when something is in inbound ring.
107  */
hv_uio_channel_cb(void * context)108 static void hv_uio_channel_cb(void *context)
109 {
110 	struct vmbus_channel *chan = context;
111 	struct hv_device *hv_dev;
112 	struct hv_uio_private_data *pdata;
113 
114 	chan->inbound.ring_buffer->interrupt_mask = 1;
115 	virt_mb();
116 
117 	/*
118 	 * The callback may come from a subchannel, in which case look
119 	 * for the hv device in the primary channel
120 	 */
121 	hv_dev = chan->primary_channel ?
122 		 chan->primary_channel->device_obj : chan->device_obj;
123 	pdata = hv_get_drvdata(hv_dev);
124 	uio_event_notify(&pdata->info);
125 }
126 
127 /*
128  * Callback from vmbus_event when channel is rescinded.
129  * It is meant for rescind of primary channels only.
130  */
hv_uio_rescind(struct vmbus_channel * channel)131 static void hv_uio_rescind(struct vmbus_channel *channel)
132 {
133 	struct hv_device *hv_dev = channel->device_obj;
134 	struct hv_uio_private_data *pdata = hv_get_drvdata(hv_dev);
135 
136 	/*
137 	 * Turn off the interrupt file handle
138 	 * Next read for event will return -EIO
139 	 */
140 	pdata->info.irq = 0;
141 
142 	/* Wake up reader */
143 	uio_event_notify(&pdata->info);
144 
145 	/*
146 	 * With rescind callback registered, rescind path will not unregister the device
147 	 * from vmbus when the primary channel is rescinded.
148 	 * Without it, rescind handling is incomplete and next onoffer msg does not come.
149 	 * Unregister the device from vmbus here.
150 	 */
151 	vmbus_device_unregister(channel->device_obj);
152 }
153 
154 /* Function used for mmap of ring buffer sysfs interface.
155  * The ring buffer is allocated as contiguous memory by vmbus_open
156  */
157 static int
hv_uio_ring_mmap(struct vmbus_channel * channel,struct vm_area_struct * vma)158 hv_uio_ring_mmap(struct vmbus_channel *channel, struct vm_area_struct *vma)
159 {
160 	void *ring_buffer = page_address(channel->ringbuffer_page);
161 
162 	if (channel->state != CHANNEL_OPENED_STATE)
163 		return -ENODEV;
164 
165 	return vm_iomap_memory(vma, virt_to_phys(ring_buffer),
166 			       channel->ringbuffer_pagecount << PAGE_SHIFT);
167 }
168 
169 /* Callback from VMBUS subsystem when new channel created. */
170 static void
hv_uio_new_channel(struct vmbus_channel * new_sc)171 hv_uio_new_channel(struct vmbus_channel *new_sc)
172 {
173 	struct hv_device *hv_dev = new_sc->primary_channel->device_obj;
174 	struct device *device = &hv_dev->device;
175 	const size_t ring_bytes = SZ_2M;
176 	int ret;
177 
178 	/* Create host communication ring */
179 	ret = vmbus_open(new_sc, ring_bytes, ring_bytes, NULL, 0,
180 			 hv_uio_channel_cb, new_sc);
181 	if (ret) {
182 		dev_err(device, "vmbus_open subchannel failed: %d\n", ret);
183 		return;
184 	}
185 
186 	/* Disable interrupts on sub channel */
187 	new_sc->inbound.ring_buffer->interrupt_mask = 1;
188 	set_channel_read_mode(new_sc, HV_CALL_ISR);
189 	ret = hv_create_ring_sysfs(new_sc, hv_uio_ring_mmap);
190 	if (ret) {
191 		dev_err(device, "sysfs create ring bin file failed; %d\n", ret);
192 		vmbus_close(new_sc);
193 	}
194 }
195 
196 /* free the reserved buffers for send and receive */
197 static void
hv_uio_cleanup(struct hv_device * dev,struct hv_uio_private_data * pdata)198 hv_uio_cleanup(struct hv_device *dev, struct hv_uio_private_data *pdata)
199 {
200 	if (pdata->send_gpadl.gpadl_handle) {
201 		vmbus_teardown_gpadl(dev->channel, &pdata->send_gpadl);
202 		if (!pdata->send_gpadl.decrypted)
203 			vfree(pdata->send_buf);
204 	}
205 
206 	if (pdata->recv_gpadl.gpadl_handle) {
207 		vmbus_teardown_gpadl(dev->channel, &pdata->recv_gpadl);
208 		if (!pdata->recv_gpadl.decrypted)
209 			vfree(pdata->recv_buf);
210 	}
211 }
212 
213 /* VMBus primary channel is opened on first use */
214 static int
hv_uio_open(struct uio_info * info,struct inode * inode)215 hv_uio_open(struct uio_info *info, struct inode *inode)
216 {
217 	struct hv_uio_private_data *pdata
218 		= container_of(info, struct hv_uio_private_data, info);
219 	struct hv_device *dev = pdata->device;
220 	int ret;
221 
222 	if (atomic_inc_return(&pdata->refcnt) != 1)
223 		return 0;
224 
225 	vmbus_set_chn_rescind_callback(dev->channel, hv_uio_rescind);
226 	vmbus_set_sc_create_callback(dev->channel, hv_uio_new_channel);
227 
228 	ret = vmbus_connect_ring(dev->channel,
229 				 hv_uio_channel_cb, dev->channel);
230 	if (ret == 0)
231 		dev->channel->inbound.ring_buffer->interrupt_mask = 1;
232 	else
233 		atomic_dec(&pdata->refcnt);
234 
235 	return ret;
236 }
237 
238 /* VMBus primary channel is closed on last close */
239 static int
hv_uio_release(struct uio_info * info,struct inode * inode)240 hv_uio_release(struct uio_info *info, struct inode *inode)
241 {
242 	struct hv_uio_private_data *pdata
243 		= container_of(info, struct hv_uio_private_data, info);
244 	struct hv_device *dev = pdata->device;
245 	int ret = 0;
246 
247 	if (atomic_dec_and_test(&pdata->refcnt))
248 		ret = vmbus_disconnect_ring(dev->channel);
249 
250 	return ret;
251 }
252 
253 static int
hv_uio_probe(struct hv_device * dev,const struct hv_vmbus_device_id * dev_id)254 hv_uio_probe(struct hv_device *dev,
255 	     const struct hv_vmbus_device_id *dev_id)
256 {
257 	struct vmbus_channel *channel = dev->channel;
258 	struct hv_uio_private_data *pdata;
259 	void *ring_buffer;
260 	int ret;
261 	size_t ring_size = hv_dev_ring_size(channel);
262 
263 	if (!ring_size)
264 		ring_size = SZ_2M;
265 
266 	/* Adjust ring size if necessary to have it page aligned */
267 	ring_size = VMBUS_RING_SIZE(ring_size);
268 
269 	pdata = devm_kzalloc(&dev->device, sizeof(*pdata), GFP_KERNEL);
270 	if (!pdata)
271 		return -ENOMEM;
272 
273 	ret = vmbus_alloc_ring(channel, ring_size, ring_size);
274 	if (ret)
275 		return ret;
276 
277 	set_channel_read_mode(channel, HV_CALL_ISR);
278 
279 	/* Fill general uio info */
280 	pdata->info.name = "uio_hv_generic";
281 	pdata->info.version = DRIVER_VERSION;
282 	pdata->info.irqcontrol = hv_uio_irqcontrol;
283 	pdata->info.open = hv_uio_open;
284 	pdata->info.release = hv_uio_release;
285 	pdata->info.irq = UIO_IRQ_CUSTOM;
286 	atomic_set(&pdata->refcnt, 0);
287 
288 	/* mem resources */
289 	pdata->info.mem[TXRX_RING_MAP].name = "txrx_rings";
290 	ring_buffer = page_address(channel->ringbuffer_page);
291 	pdata->info.mem[TXRX_RING_MAP].addr
292 		= (uintptr_t)virt_to_phys(ring_buffer);
293 	pdata->info.mem[TXRX_RING_MAP].size
294 		= channel->ringbuffer_pagecount << PAGE_SHIFT;
295 	pdata->info.mem[TXRX_RING_MAP].memtype = UIO_MEM_IOVA;
296 
297 	pdata->info.mem[INT_PAGE_MAP].name = "int_page";
298 	pdata->info.mem[INT_PAGE_MAP].addr
299 		= (uintptr_t)vmbus_connection.int_page;
300 	pdata->info.mem[INT_PAGE_MAP].size = HV_HYP_PAGE_SIZE;
301 	pdata->info.mem[INT_PAGE_MAP].memtype = UIO_MEM_LOGICAL;
302 
303 	pdata->info.mem[MON_PAGE_MAP].name = "monitor_page";
304 	pdata->info.mem[MON_PAGE_MAP].addr
305 		= (uintptr_t)vmbus_connection.monitor_pages[1];
306 	pdata->info.mem[MON_PAGE_MAP].size = HV_HYP_PAGE_SIZE;
307 	pdata->info.mem[MON_PAGE_MAP].memtype = UIO_MEM_LOGICAL;
308 
309 	if (channel->device_id == HV_NIC) {
310 		pdata->recv_buf = vzalloc(RECV_BUFFER_SIZE);
311 		if (!pdata->recv_buf) {
312 			ret = -ENOMEM;
313 			goto fail_free_ring;
314 		}
315 
316 		ret = vmbus_establish_gpadl(channel, pdata->recv_buf,
317 					    RECV_BUFFER_SIZE, &pdata->recv_gpadl);
318 		if (ret) {
319 			if (!pdata->recv_gpadl.decrypted)
320 				vfree(pdata->recv_buf);
321 			goto fail_close;
322 		}
323 
324 		/* put Global Physical Address Label in name */
325 		snprintf(pdata->recv_name, sizeof(pdata->recv_name),
326 			 "recv:%u", pdata->recv_gpadl.gpadl_handle);
327 		pdata->info.mem[RECV_BUF_MAP].name = pdata->recv_name;
328 		pdata->info.mem[RECV_BUF_MAP].addr = (uintptr_t)pdata->recv_buf;
329 		pdata->info.mem[RECV_BUF_MAP].size = RECV_BUFFER_SIZE;
330 		pdata->info.mem[RECV_BUF_MAP].memtype = UIO_MEM_VIRTUAL;
331 
332 		pdata->send_buf = vzalloc(SEND_BUFFER_SIZE);
333 		if (!pdata->send_buf) {
334 			ret = -ENOMEM;
335 			goto fail_close;
336 		}
337 
338 		ret = vmbus_establish_gpadl(channel, pdata->send_buf,
339 					    SEND_BUFFER_SIZE, &pdata->send_gpadl);
340 		if (ret) {
341 			if (!pdata->send_gpadl.decrypted)
342 				vfree(pdata->send_buf);
343 			goto fail_close;
344 		}
345 
346 		snprintf(pdata->send_name, sizeof(pdata->send_name),
347 			 "send:%u", pdata->send_gpadl.gpadl_handle);
348 		pdata->info.mem[SEND_BUF_MAP].name = pdata->send_name;
349 		pdata->info.mem[SEND_BUF_MAP].addr = (uintptr_t)pdata->send_buf;
350 		pdata->info.mem[SEND_BUF_MAP].size = SEND_BUFFER_SIZE;
351 		pdata->info.mem[SEND_BUF_MAP].memtype = UIO_MEM_VIRTUAL;
352 	}
353 
354 	pdata->info.priv = pdata;
355 	pdata->device = dev;
356 
357 	ret = uio_register_device(&dev->device, &pdata->info);
358 	if (ret) {
359 		dev_err(&dev->device, "hv_uio register failed\n");
360 		goto fail_close;
361 	}
362 
363 	/*
364 	 * This internally calls sysfs_update_group, which returns a non-zero value if it executes
365 	 * before sysfs_create_group. This is expected as the 'ring' will be created later in
366 	 * vmbus_device_register() -> vmbus_add_channel_kobj(). Thus, no need to check the return
367 	 * value and print warning.
368 	 *
369 	 * Creating/exposing sysfs in driver probe is not encouraged as it can lead to race
370 	 * conditions with userspace. For backward compatibility, "ring" sysfs could not be removed
371 	 * or decoupled from uio_hv_generic probe. Userspace programs can make use of inotify
372 	 * APIs to make sure that ring is created.
373 	 */
374 	hv_create_ring_sysfs(channel, hv_uio_ring_mmap);
375 
376 	hv_set_drvdata(dev, pdata);
377 
378 	return 0;
379 
380 fail_close:
381 	hv_uio_cleanup(dev, pdata);
382 fail_free_ring:
383 	vmbus_free_ring(dev->channel);
384 
385 	return ret;
386 }
387 
388 static void
hv_uio_remove(struct hv_device * dev)389 hv_uio_remove(struct hv_device *dev)
390 {
391 	struct hv_uio_private_data *pdata = hv_get_drvdata(dev);
392 
393 	if (!pdata)
394 		return;
395 
396 	hv_remove_ring_sysfs(dev->channel);
397 	uio_unregister_device(&pdata->info);
398 	hv_uio_cleanup(dev, pdata);
399 
400 	vmbus_free_ring(dev->channel);
401 }
402 
403 static struct hv_driver hv_uio_drv = {
404 	.name = "uio_hv_generic",
405 	.id_table = NULL, /* only dynamic id's */
406 	.probe = hv_uio_probe,
407 	.remove = hv_uio_remove,
408 };
409 
410 static int __init
hyperv_module_init(void)411 hyperv_module_init(void)
412 {
413 	return vmbus_driver_register(&hv_uio_drv);
414 }
415 
416 static void __exit
hyperv_module_exit(void)417 hyperv_module_exit(void)
418 {
419 	vmbus_driver_unregister(&hv_uio_drv);
420 }
421 
422 module_init(hyperv_module_init);
423 module_exit(hyperv_module_exit);
424 
425 MODULE_VERSION(DRIVER_VERSION);
426 MODULE_LICENSE("GPL v2");
427 MODULE_AUTHOR(DRIVER_AUTHOR);
428 MODULE_DESCRIPTION(DRIVER_DESC);
429