1 /* 2 * Copyright (c) 2006 Oracle. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 * 32 */ 33 #include <linux/percpu.h> 34 #include <linux/seq_file.h> 35 #include <linux/slab.h> 36 #include <linux/proc_fs.h> 37 #include <linux/export.h> 38 #include <linux/uio.h> 39 40 #include "rds.h" 41 42 /* 43 * This file implements a getsockopt() call which copies a set of fixed 44 * sized structs into a user-specified buffer as a means of providing 45 * read-only information about RDS. 46 * 47 * For a given information source there are a given number of fixed sized 48 * structs at a given time. The structs are only copied if the user-specified 49 * buffer is big enough. The destination pages that make up the buffer 50 * are pinned for the duration of the copy. 51 * 52 * This gives us the following benefits: 53 * 54 * - simple implementation, no copy "position" across multiple calls 55 * - consistent snapshot of an info source 56 * - atomic copy works well with whatever locking info source has 57 * - one portable tool to get rds info across implementations 58 * - long-lived tool can get info without allocating 59 * 60 * at the following costs: 61 * 62 * - info source copy must be pinned, may be "large" 63 */ 64 65 struct rds_info_iterator { 66 struct page **pages; 67 void *addr; 68 unsigned long offset; 69 }; 70 71 static DEFINE_SPINLOCK(rds_info_lock); 72 static rds_info_func rds_info_funcs[RDS_INFO_LAST - RDS_INFO_FIRST + 1]; 73 74 void rds_info_register_func(int optname, rds_info_func func) 75 { 76 int offset = optname - RDS_INFO_FIRST; 77 78 BUG_ON(optname < RDS_INFO_FIRST || optname > RDS_INFO_LAST); 79 80 spin_lock(&rds_info_lock); 81 BUG_ON(rds_info_funcs[offset]); 82 rds_info_funcs[offset] = func; 83 spin_unlock(&rds_info_lock); 84 } 85 EXPORT_SYMBOL_GPL(rds_info_register_func); 86 87 void rds_info_deregister_func(int optname, rds_info_func func) 88 { 89 int offset = optname - RDS_INFO_FIRST; 90 91 BUG_ON(optname < RDS_INFO_FIRST || optname > RDS_INFO_LAST); 92 93 spin_lock(&rds_info_lock); 94 BUG_ON(rds_info_funcs[offset] != func); 95 rds_info_funcs[offset] = NULL; 96 spin_unlock(&rds_info_lock); 97 } 98 EXPORT_SYMBOL_GPL(rds_info_deregister_func); 99 100 /* 101 * Typically we hold an atomic kmap across multiple rds_info_copy() calls 102 * because the kmap is so expensive. This must be called before using blocking 103 * operations while holding the mapping and as the iterator is torn down. 104 */ 105 void rds_info_iter_unmap(struct rds_info_iterator *iter) 106 { 107 if (iter->addr) { 108 kunmap_atomic(iter->addr); 109 iter->addr = NULL; 110 } 111 } 112 113 /* 114 * get_user_pages() called flush_dcache_page() on the pages for us. 115 */ 116 void rds_info_copy(struct rds_info_iterator *iter, void *data, 117 unsigned long bytes) 118 { 119 unsigned long this; 120 121 while (bytes) { 122 if (!iter->addr) 123 iter->addr = kmap_atomic(*iter->pages); 124 125 this = min(bytes, PAGE_SIZE - iter->offset); 126 127 rdsdebug("page %p addr %p offset %lu this %lu data %p " 128 "bytes %lu\n", *iter->pages, iter->addr, 129 iter->offset, this, data, bytes); 130 131 memcpy(iter->addr + iter->offset, data, this); 132 133 data += this; 134 bytes -= this; 135 iter->offset += this; 136 137 if (iter->offset == PAGE_SIZE) { 138 kunmap_atomic(iter->addr); 139 iter->addr = NULL; 140 iter->offset = 0; 141 iter->pages++; 142 } 143 } 144 } 145 EXPORT_SYMBOL_GPL(rds_info_copy); 146 147 /* 148 * @opt->iter_out describes the buffer that the information snapshot will be 149 * copied into, and @opt->optlen is the size of that buffer on input. On 150 * output @opt->optlen is set to the size of the requested snapshot in bytes. 151 * 152 * This function returns -errno if there is a failure, particularly -ENOSPC 153 * if the given buffer was not large enough to fit the snapshot. On success 154 * it returns the positive number of bytes of each array element in the 155 * snapshot. 156 */ 157 int rds_info_getsockopt(struct socket *sock, int optname, sockopt_t *opt) 158 { 159 struct rds_info_iterator iter; 160 struct rds_info_lengths lens; 161 unsigned long nr_pages = 0; 162 rds_info_func func; 163 struct page **pages = NULL; 164 size_t offset0 = 0; 165 int npages = 0; 166 int ret; 167 int len; 168 int total; 169 170 len = opt->optlen; 171 172 /* check for all kinds of wrapping and the like */ 173 if (len < 0 || len > INT_MAX - PAGE_SIZE + 1) { 174 ret = -EINVAL; 175 goto out; 176 } 177 178 /* The info producers write into the pages with kmap_atomic() while 179 * holding a spinlock, so they need a genuine page-backed user buffer. 180 */ 181 if (!user_backed_iter(&opt->iter_out)) { 182 ret = -EOPNOTSUPP; 183 goto out; 184 } 185 186 /* a 0 len call is just trying to probe its length */ 187 if (len == 0) 188 goto call_func; 189 190 /* 191 * Preallocate the page array and pass it in so that 192 * iov_iter_extract_pages() fills it in place rather than allocating 193 * one for us. Handing it a non-NULL array keeps ownership of the 194 * array with us on every return path, instead of depending on the 195 * iterator code to allocate and hand it back. 196 */ 197 npages = iov_iter_npages(&opt->iter_out, INT_MAX); 198 pages = kvmalloc_array(npages, sizeof(*pages), GFP_KERNEL); 199 if (!pages) { 200 ret = -ENOMEM; 201 goto out; 202 } 203 204 ret = iov_iter_extract_pages(&opt->iter_out, &pages, len, npages, 205 0, &offset0); 206 if (ret < 0) 207 goto out; 208 nr_pages = DIV_ROUND_UP(offset0 + ret, PAGE_SIZE); 209 if (ret != len) { 210 ret = -EAGAIN; /* XXX ? */ 211 goto out; 212 } 213 214 rdsdebug("len %d nr_pages %lu\n", len, nr_pages); 215 216 call_func: 217 func = rds_info_funcs[optname - RDS_INFO_FIRST]; 218 if (!func) { 219 ret = -ENOPROTOOPT; 220 goto out; 221 } 222 223 iter.pages = pages; 224 iter.addr = NULL; 225 iter.offset = offset0; 226 227 func(sock, len, &iter, &lens); 228 BUG_ON(lens.each == 0); 229 230 total = lens.nr * lens.each; 231 232 rds_info_iter_unmap(&iter); 233 234 if (total > len) { 235 len = total; 236 ret = -ENOSPC; 237 } else { 238 len = total; 239 ret = lens.each; 240 } 241 242 opt->optlen = len; 243 244 out: 245 /* 246 * iov_iter_extract_pages() pins only user-backed (ubuf) iters; 247 * iov_iter_extract_will_pin() reports whether an unpin is owed here. 248 */ 249 if (pages && iov_iter_extract_will_pin(&opt->iter_out)) 250 unpin_user_pages_dirty_lock(pages, nr_pages, true); 251 kvfree(pages); 252 253 return ret; 254 } 255