xref: /linux/fs/orangefs/orangefs-bufmap.c (revision fde05627a2d5cb85a2bded96d11f493e6671ecaa)
1  // SPDX-License-Identifier: GPL-2.0
2  /*
3   * (C) 2001 Clemson University and The University of Chicago
4   *
5   * See COPYING in top-level directory.
6   */
7  #include "protocol.h"
8  #include "orangefs-kernel.h"
9  #include "orangefs-bufmap.h"
10  
11  struct slot_map {
12  	int c;
13  	wait_queue_head_t q;
14  	int count;
15  	unsigned long *map;
16  };
17  
18  static struct slot_map rw_map = {
19  	.c = -1,
20  	.q = __WAIT_QUEUE_HEAD_INITIALIZER(rw_map.q)
21  };
22  static struct slot_map readdir_map = {
23  	.c = -1,
24  	.q = __WAIT_QUEUE_HEAD_INITIALIZER(readdir_map.q)
25  };
26  
27  
install(struct slot_map * m,int count,unsigned long * map)28  static void install(struct slot_map *m, int count, unsigned long *map)
29  {
30  	spin_lock(&m->q.lock);
31  	m->c = m->count = count;
32  	m->map = map;
33  	wake_up_all_locked(&m->q);
34  	spin_unlock(&m->q.lock);
35  }
36  
mark_killed(struct slot_map * m)37  static void mark_killed(struct slot_map *m)
38  {
39  	spin_lock(&m->q.lock);
40  	m->c -= m->count + 1;
41  	spin_unlock(&m->q.lock);
42  }
43  
run_down(struct slot_map * m)44  static void run_down(struct slot_map *m)
45  {
46  	DEFINE_WAIT(wait);
47  	spin_lock(&m->q.lock);
48  	if (m->c != -1) {
49  		for (;;) {
50  			if (likely(list_empty(&wait.entry)))
51  				__add_wait_queue_entry_tail(&m->q, &wait);
52  			set_current_state(TASK_UNINTERRUPTIBLE);
53  
54  			if (m->c == -1)
55  				break;
56  
57  			spin_unlock(&m->q.lock);
58  			schedule();
59  			spin_lock(&m->q.lock);
60  		}
61  		__remove_wait_queue(&m->q, &wait);
62  		__set_current_state(TASK_RUNNING);
63  	}
64  	m->map = NULL;
65  	spin_unlock(&m->q.lock);
66  }
67  
put(struct slot_map * m,int slot)68  static void put(struct slot_map *m, int slot)
69  {
70  	int v;
71  	spin_lock(&m->q.lock);
72  	__clear_bit(slot, m->map);
73  	v = ++m->c;
74  	if (v > 0)
75  		wake_up_locked(&m->q);
76  	if (unlikely(v == -1))     /* finished dying */
77  		wake_up_all_locked(&m->q);
78  	spin_unlock(&m->q.lock);
79  }
80  
wait_for_free(struct slot_map * m)81  static int wait_for_free(struct slot_map *m)
82  {
83  	long left = slot_timeout_secs * HZ;
84  	DEFINE_WAIT(wait);
85  
86  	do {
87  		long n = left, t;
88  		if (likely(list_empty(&wait.entry)))
89  			__add_wait_queue_entry_tail_exclusive(&m->q, &wait);
90  		set_current_state(TASK_INTERRUPTIBLE);
91  
92  		if (m->c > 0)
93  			break;
94  
95  		if (m->c < 0) {
96  			/* we are waiting for map to be installed */
97  			/* it would better be there soon, or we go away */
98  			if (n > ORANGEFS_BUFMAP_WAIT_TIMEOUT_SECS * HZ)
99  				n = ORANGEFS_BUFMAP_WAIT_TIMEOUT_SECS * HZ;
100  		}
101  		spin_unlock(&m->q.lock);
102  		t = schedule_timeout(n);
103  		spin_lock(&m->q.lock);
104  		if (unlikely(!t) && n != left && m->c < 0)
105  			left = t;
106  		else
107  			left = t + (left - n);
108  		if (signal_pending(current))
109  			left = -EINTR;
110  	} while (left > 0);
111  
112  	if (!list_empty(&wait.entry))
113  		list_del(&wait.entry);
114  	else if (left <= 0 && waitqueue_active(&m->q))
115  		__wake_up_locked_key(&m->q, TASK_INTERRUPTIBLE, NULL);
116  	__set_current_state(TASK_RUNNING);
117  
118  	if (likely(left > 0))
119  		return 0;
120  
121  	return left < 0 ? -EINTR : -ETIMEDOUT;
122  }
123  
get(struct slot_map * m)124  static int get(struct slot_map *m)
125  {
126  	int res = 0;
127  	spin_lock(&m->q.lock);
128  	if (unlikely(m->c <= 0))
129  		res = wait_for_free(m);
130  	if (likely(!res)) {
131  		m->c--;
132  		res = find_first_zero_bit(m->map, m->count);
133  		__set_bit(res, m->map);
134  	}
135  	spin_unlock(&m->q.lock);
136  	return res;
137  }
138  
139  /* used to describe mapped buffers */
140  struct orangefs_bufmap_desc {
141  	void __user *uaddr;		/* user space address pointer */
142  	struct page **page_array;	/* array of mapped pages */
143  	int array_count;		/* size of above arrays */
144  	struct list_head list_link;
145  };
146  
147  static struct orangefs_bufmap {
148  	int desc_size;
149  	int desc_shift;
150  	int desc_count;
151  	int total_size;
152  	int page_count;
153  
154  	struct page **page_array;
155  	struct orangefs_bufmap_desc *desc_array;
156  
157  	/* array to track usage of buffer descriptors */
158  	unsigned long *buffer_index_array;
159  
160  	/* array to track usage of buffer descriptors for readdir */
161  #define N DIV_ROUND_UP(ORANGEFS_READDIR_DEFAULT_DESC_COUNT, BITS_PER_LONG)
162  	unsigned long readdir_index_array[N];
163  #undef N
164  } *__orangefs_bufmap;
165  
166  static DEFINE_SPINLOCK(orangefs_bufmap_lock);
167  
168  static void
orangefs_bufmap_unmap(struct orangefs_bufmap * bufmap)169  orangefs_bufmap_unmap(struct orangefs_bufmap *bufmap)
170  {
171  	unpin_user_pages(bufmap->page_array, bufmap->page_count);
172  }
173  
174  static void
orangefs_bufmap_free(struct orangefs_bufmap * bufmap)175  orangefs_bufmap_free(struct orangefs_bufmap *bufmap)
176  {
177  	kfree(bufmap->page_array);
178  	kfree(bufmap->desc_array);
179  	bitmap_free(bufmap->buffer_index_array);
180  	kfree(bufmap);
181  }
182  
183  /*
184   * XXX: Can the size and shift change while the caller gives up the
185   * XXX: lock between calling this and doing something useful?
186   */
187  
orangefs_bufmap_size_query(void)188  int orangefs_bufmap_size_query(void)
189  {
190  	struct orangefs_bufmap *bufmap;
191  	int size = 0;
192  	spin_lock(&orangefs_bufmap_lock);
193  	bufmap = __orangefs_bufmap;
194  	if (bufmap)
195  		size = bufmap->desc_size;
196  	spin_unlock(&orangefs_bufmap_lock);
197  	return size;
198  }
199  
200  static DECLARE_WAIT_QUEUE_HEAD(bufmap_waitq);
201  static DECLARE_WAIT_QUEUE_HEAD(readdir_waitq);
202  
203  static struct orangefs_bufmap *
orangefs_bufmap_alloc(struct ORANGEFS_dev_map_desc * user_desc)204  orangefs_bufmap_alloc(struct ORANGEFS_dev_map_desc *user_desc)
205  {
206  	struct orangefs_bufmap *bufmap;
207  
208  	bufmap = kzalloc(sizeof(*bufmap), GFP_KERNEL);
209  	if (!bufmap)
210  		goto out;
211  
212  	bufmap->total_size = user_desc->total_size;
213  	bufmap->desc_count = user_desc->count;
214  	bufmap->desc_size = user_desc->size;
215  	bufmap->desc_shift = ilog2(bufmap->desc_size);
216  
217  	bufmap->buffer_index_array = bitmap_zalloc(bufmap->desc_count, GFP_KERNEL);
218  	if (!bufmap->buffer_index_array)
219  		goto out_free_bufmap;
220  
221  	bufmap->desc_array =
222  		kcalloc(bufmap->desc_count, sizeof(struct orangefs_bufmap_desc),
223  			GFP_KERNEL);
224  	if (!bufmap->desc_array)
225  		goto out_free_index_array;
226  
227  	bufmap->page_count = bufmap->total_size / PAGE_SIZE;
228  
229  	/* allocate storage to track our page mappings */
230  	bufmap->page_array =
231  		kcalloc(bufmap->page_count, sizeof(struct page *), GFP_KERNEL);
232  	if (!bufmap->page_array)
233  		goto out_free_desc_array;
234  
235  	return bufmap;
236  
237  out_free_desc_array:
238  	kfree(bufmap->desc_array);
239  out_free_index_array:
240  	bitmap_free(bufmap->buffer_index_array);
241  out_free_bufmap:
242  	kfree(bufmap);
243  out:
244  	return NULL;
245  }
246  
247  static int
orangefs_bufmap_map(struct orangefs_bufmap * bufmap,struct ORANGEFS_dev_map_desc * user_desc)248  orangefs_bufmap_map(struct orangefs_bufmap *bufmap,
249  		struct ORANGEFS_dev_map_desc *user_desc)
250  {
251  	int pages_per_desc = bufmap->desc_size / PAGE_SIZE;
252  	int offset = 0, ret, i;
253  
254  	/* map the pages */
255  	ret = pin_user_pages_fast((unsigned long)user_desc->ptr,
256  			     bufmap->page_count, FOLL_WRITE, bufmap->page_array);
257  
258  	if (ret < 0)
259  		return ret;
260  
261  	if (ret != bufmap->page_count) {
262  		gossip_err("orangefs error: asked for %d pages, only got %d.\n",
263  				bufmap->page_count, ret);
264  
265  		for (i = 0; i < ret; i++)
266  			unpin_user_page(bufmap->page_array[i]);
267  		return -ENOMEM;
268  	}
269  
270  	/*
271  	 * ideally we want to get kernel space pointers for each page, but
272  	 * we can't kmap that many pages at once if highmem is being used.
273  	 * so instead, we just kmap/kunmap the page address each time the
274  	 * kaddr is needed.
275  	 */
276  	for (i = 0; i < bufmap->page_count; i++)
277  		flush_dcache_page(bufmap->page_array[i]);
278  
279  	/* build a list of available descriptors */
280  	for (offset = 0, i = 0; i < bufmap->desc_count; i++) {
281  		bufmap->desc_array[i].page_array = &bufmap->page_array[offset];
282  		bufmap->desc_array[i].array_count = pages_per_desc;
283  		bufmap->desc_array[i].uaddr =
284  		    (user_desc->ptr + (i * pages_per_desc * PAGE_SIZE));
285  		offset += pages_per_desc;
286  	}
287  
288  	return 0;
289  }
290  
291  /*
292   * orangefs_bufmap_initialize()
293   *
294   * initializes the mapped buffer interface
295   *
296   * returns 0 on success, -errno on failure
297   */
orangefs_bufmap_initialize(struct ORANGEFS_dev_map_desc * user_desc)298  int orangefs_bufmap_initialize(struct ORANGEFS_dev_map_desc *user_desc)
299  {
300  	struct orangefs_bufmap *bufmap;
301  	int ret = -EINVAL;
302  
303  	gossip_debug(GOSSIP_BUFMAP_DEBUG,
304  		     "orangefs_bufmap_initialize: called (ptr ("
305  		     "%p) sz (%d) cnt(%d).\n",
306  		     user_desc->ptr,
307  		     user_desc->size,
308  		     user_desc->count);
309  
310  	if (user_desc->total_size < 0 ||
311  	    user_desc->size < 0 ||
312  	    user_desc->count < 0)
313  		goto out;
314  
315  	/*
316  	 * sanity check alignment and size of buffer that caller wants to
317  	 * work with
318  	 */
319  	if (PAGE_ALIGN((unsigned long)user_desc->ptr) !=
320  	    (unsigned long)user_desc->ptr) {
321  		gossip_err("orangefs error: memory alignment (front). %p\n",
322  			   user_desc->ptr);
323  		goto out;
324  	}
325  
326  	if (PAGE_ALIGN(((unsigned long)user_desc->ptr + user_desc->total_size))
327  	    != (unsigned long)(user_desc->ptr + user_desc->total_size)) {
328  		gossip_err("orangefs error: memory alignment (back).(%p + %d)\n",
329  			   user_desc->ptr,
330  			   user_desc->total_size);
331  		goto out;
332  	}
333  
334  	if (user_desc->total_size != (user_desc->size * user_desc->count)) {
335  		gossip_err("orangefs error: user provided an oddly sized buffer: (%d, %d, %d)\n",
336  			   user_desc->total_size,
337  			   user_desc->size,
338  			   user_desc->count);
339  		goto out;
340  	}
341  
342  	if ((user_desc->size % PAGE_SIZE) != 0) {
343  		gossip_err("orangefs error: bufmap size not page size divisible (%d).\n",
344  			   user_desc->size);
345  		goto out;
346  	}
347  
348  	ret = -ENOMEM;
349  	bufmap = orangefs_bufmap_alloc(user_desc);
350  	if (!bufmap)
351  		goto out;
352  
353  	ret = orangefs_bufmap_map(bufmap, user_desc);
354  	if (ret)
355  		goto out_free_bufmap;
356  
357  
358  	spin_lock(&orangefs_bufmap_lock);
359  	if (__orangefs_bufmap) {
360  		spin_unlock(&orangefs_bufmap_lock);
361  		gossip_err("orangefs: error: bufmap already initialized.\n");
362  		ret = -EINVAL;
363  		goto out_unmap_bufmap;
364  	}
365  	__orangefs_bufmap = bufmap;
366  	install(&rw_map,
367  		bufmap->desc_count,
368  		bufmap->buffer_index_array);
369  	install(&readdir_map,
370  		ORANGEFS_READDIR_DEFAULT_DESC_COUNT,
371  		bufmap->readdir_index_array);
372  	spin_unlock(&orangefs_bufmap_lock);
373  
374  	gossip_debug(GOSSIP_BUFMAP_DEBUG,
375  		     "orangefs_bufmap_initialize: exiting normally\n");
376  	return 0;
377  
378  out_unmap_bufmap:
379  	orangefs_bufmap_unmap(bufmap);
380  out_free_bufmap:
381  	orangefs_bufmap_free(bufmap);
382  out:
383  	return ret;
384  }
385  
386  /*
387   * orangefs_bufmap_finalize()
388   *
389   * shuts down the mapped buffer interface and releases any resources
390   * associated with it
391   *
392   * no return value
393   */
orangefs_bufmap_finalize(void)394  void orangefs_bufmap_finalize(void)
395  {
396  	struct orangefs_bufmap *bufmap = __orangefs_bufmap;
397  	if (!bufmap)
398  		return;
399  	gossip_debug(GOSSIP_BUFMAP_DEBUG, "orangefs_bufmap_finalize: called\n");
400  	mark_killed(&rw_map);
401  	mark_killed(&readdir_map);
402  	gossip_debug(GOSSIP_BUFMAP_DEBUG,
403  		     "orangefs_bufmap_finalize: exiting normally\n");
404  }
405  
orangefs_bufmap_run_down(void)406  void orangefs_bufmap_run_down(void)
407  {
408  	struct orangefs_bufmap *bufmap = __orangefs_bufmap;
409  	if (!bufmap)
410  		return;
411  	run_down(&rw_map);
412  	run_down(&readdir_map);
413  	spin_lock(&orangefs_bufmap_lock);
414  	__orangefs_bufmap = NULL;
415  	spin_unlock(&orangefs_bufmap_lock);
416  	orangefs_bufmap_unmap(bufmap);
417  	orangefs_bufmap_free(bufmap);
418  }
419  
420  /*
421   * orangefs_bufmap_get()
422   *
423   * gets a free mapped buffer descriptor, will sleep until one becomes
424   * available if necessary
425   *
426   * returns slot on success, -errno on failure
427   */
orangefs_bufmap_get(void)428  int orangefs_bufmap_get(void)
429  {
430  	return get(&rw_map);
431  }
432  
433  /*
434   * orangefs_bufmap_put()
435   *
436   * returns a mapped buffer descriptor to the collection
437   *
438   * no return value
439   */
orangefs_bufmap_put(int buffer_index)440  void orangefs_bufmap_put(int buffer_index)
441  {
442  	put(&rw_map, buffer_index);
443  }
444  
445  /*
446   * orangefs_readdir_index_get()
447   *
448   * gets a free descriptor, will sleep until one becomes
449   * available if necessary.
450   * Although the readdir buffers are not mapped into kernel space
451   * we could do that at a later point of time. Regardless, these
452   * indices are used by the client-core.
453   *
454   * returns slot on success, -errno on failure
455   */
orangefs_readdir_index_get(void)456  int orangefs_readdir_index_get(void)
457  {
458  	return get(&readdir_map);
459  }
460  
orangefs_readdir_index_put(int buffer_index)461  void orangefs_readdir_index_put(int buffer_index)
462  {
463  	put(&readdir_map, buffer_index);
464  }
465  
466  /*
467   * we've been handed an iovec, we need to copy it to
468   * the shared memory descriptor at "buffer_index".
469   */
orangefs_bufmap_copy_from_iovec(struct iov_iter * iter,int buffer_index,size_t size)470  int orangefs_bufmap_copy_from_iovec(struct iov_iter *iter,
471  				int buffer_index,
472  				size_t size)
473  {
474  	struct orangefs_bufmap_desc *to;
475  	int i;
476  
477  	gossip_debug(GOSSIP_BUFMAP_DEBUG,
478  		     "%s: buffer_index:%d: size:%zu:\n",
479  		     __func__, buffer_index, size);
480  
481  	to = &__orangefs_bufmap->desc_array[buffer_index];
482  	for (i = 0; size; i++) {
483  		struct page *page = to->page_array[i];
484  		size_t n = size;
485  		if (n > PAGE_SIZE)
486  			n = PAGE_SIZE;
487  		if (copy_page_from_iter(page, 0, n, iter) != n)
488  			return -EFAULT;
489  		size -= n;
490  	}
491  	return 0;
492  }
493  
494  /*
495   * we've been handed an iovec, we need to fill it from
496   * the shared memory descriptor at "buffer_index".
497   */
orangefs_bufmap_copy_to_iovec(struct iov_iter * iter,int buffer_index,size_t size)498  int orangefs_bufmap_copy_to_iovec(struct iov_iter *iter,
499  				    int buffer_index,
500  				    size_t size)
501  {
502  	struct orangefs_bufmap_desc *from;
503  	int i;
504  
505  	from = &__orangefs_bufmap->desc_array[buffer_index];
506  	gossip_debug(GOSSIP_BUFMAP_DEBUG,
507  		     "%s: buffer_index:%d: size:%zu:\n",
508  		     __func__, buffer_index, size);
509  
510  
511  	for (i = 0; size; i++) {
512  		struct page *page = from->page_array[i];
513  		size_t n = size;
514  		if (n > PAGE_SIZE)
515  			n = PAGE_SIZE;
516  		n = copy_page_to_iter(page, 0, n, iter);
517  		if (!n)
518  			return -EFAULT;
519  		size -= n;
520  	}
521  	return 0;
522  }
523