1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright 2017 Omnibond Systems, L.L.C. 4 */ 5 6 #include <linux/filelock.h> 7 #include "protocol.h" 8 #include "orangefs-kernel.h" 9 #include "orangefs-bufmap.h" 10 11 struct orangefs_dir_part { 12 struct orangefs_dir_part *next; 13 size_t len; 14 }; 15 16 struct orangefs_dir { 17 __u64 token; 18 struct orangefs_dir_part *part; 19 loff_t end; 20 int error; 21 }; 22 23 #define PART_SHIFT (24) 24 #define PART_SIZE (1<<24) 25 #define PART_MASK (~(PART_SIZE - 1)) 26 27 /* 28 * There can be up to 512 directory entries. Each entry is encoded as 29 * follows: 30 * 4 bytes: string size (n) 31 * n bytes: string 32 * 1 byte: trailing zero 33 * padding to 8 bytes 34 * 16 bytes: khandle 35 * padding to 8 bytes 36 * 37 * The trailer_buf starts with a struct orangefs_readdir_response_s 38 * which must be skipped to get to the directory data. 39 * 40 * The data which is received from the userspace daemon is termed a 41 * part and is stored in a linked list in case more than one part is 42 * needed for a large directory. 43 * 44 * The position pointer (ctx->pos) encodes the part and offset on which 45 * to begin reading at. Bits above PART_SHIFT encode the part and bits 46 * below PART_SHIFT encode the offset. Parts are stored in a linked 47 * list which grows as data is received from the server. The overhead 48 * associated with managing the list is presumed to be small compared to 49 * the overhead of communicating with the server. 50 * 51 * As data is received from the server, it is placed at the end of the 52 * part list. Data is parsed from the current position as it is needed. 53 * When data is determined to be corrupt, it is either because the 54 * userspace component has sent back corrupt data or because the file 55 * pointer has been moved to an invalid location. Since the two cannot 56 * be differentiated, return EIO. 57 * 58 * Part zero is synthesized to contains `.' and `..'. Part one is the 59 * first part of the part list. 60 */ 61 62 static int do_readdir(struct orangefs_dir *od, struct inode *inode, 63 struct orangefs_kernel_op_s *op) 64 { 65 struct orangefs_inode_s *oi = ORANGEFS_I(inode); 66 struct orangefs_readdir_response_s *resp; 67 int bufi, r; 68 69 /* 70 * Despite the badly named field, readdir does not use shared 71 * memory. However, there are a limited number of readdir 72 * slots, which must be allocated here. This flag simply tells 73 * the op scheduler to return the op here for retry. 74 */ 75 op->uses_shared_memory = 1; 76 op->upcall.req.readdir.refn = oi->refn; 77 op->upcall.req.readdir.token = od->token; 78 op->upcall.req.readdir.max_dirent_count = 79 ORANGEFS_MAX_DIRENT_COUNT_READDIR; 80 81 again: 82 bufi = orangefs_readdir_index_get(); 83 if (bufi < 0) { 84 od->error = bufi; 85 return bufi; 86 } 87 88 op->upcall.req.readdir.buf_index = bufi; 89 90 r = service_operation(op, "orangefs_readdir", 91 get_interruptible_flag(inode)); 92 93 orangefs_readdir_index_put(bufi); 94 95 if (op_state_purged(op)) { 96 if (r == -EAGAIN) { 97 vfree(op->downcall.trailer_buf); 98 goto again; 99 } else if (r == -EIO) { 100 vfree(op->downcall.trailer_buf); 101 od->error = r; 102 return r; 103 } 104 } 105 106 if (r < 0) { 107 vfree(op->downcall.trailer_buf); 108 od->error = r; 109 return r; 110 } else if (op->downcall.status) { 111 vfree(op->downcall.trailer_buf); 112 od->error = op->downcall.status; 113 return op->downcall.status; 114 } 115 116 /* 117 * The maximum size is size per entry times the 512 entries plus 118 * the header. This is well under the limit. 119 */ 120 if (op->downcall.trailer_size > PART_SIZE) { 121 vfree(op->downcall.trailer_buf); 122 od->error = -EIO; 123 return -EIO; 124 } 125 126 resp = (struct orangefs_readdir_response_s *) 127 op->downcall.trailer_buf; 128 od->token = resp->token; 129 return 0; 130 } 131 132 static int parse_readdir(struct orangefs_dir *od, 133 struct orangefs_kernel_op_s *op) 134 { 135 struct orangefs_dir_part *part, *new; 136 size_t count; 137 138 count = 1; 139 part = od->part; 140 while (part) { 141 count++; 142 if (part->next) 143 part = part->next; 144 else 145 break; 146 } 147 148 new = (void *)op->downcall.trailer_buf; 149 new->next = NULL; 150 new->len = op->downcall.trailer_size - 151 sizeof(struct orangefs_readdir_response_s); 152 if (!od->part) 153 od->part = new; 154 else 155 part->next = new; 156 count++; 157 od->end = count << PART_SHIFT; 158 159 return 0; 160 } 161 162 static int orangefs_dir_more(struct orangefs_dir *od, struct inode *inode) 163 { 164 struct orangefs_kernel_op_s *op; 165 int r; 166 167 op = op_alloc(ORANGEFS_VFS_OP_READDIR); 168 if (!op) { 169 od->error = -ENOMEM; 170 return -ENOMEM; 171 } 172 r = do_readdir(od, inode, op); 173 if (r) { 174 od->error = r; 175 goto out; 176 } 177 r = parse_readdir(od, op); 178 if (r) { 179 od->error = r; 180 goto out; 181 } 182 183 od->error = 0; 184 out: 185 op_release(op); 186 return od->error; 187 } 188 189 static int fill_from_part(struct orangefs_dir_part *part, 190 struct dir_context *ctx) 191 { 192 const int offset = sizeof(struct orangefs_readdir_response_s); 193 struct orangefs_khandle *khandle; 194 __u32 *len, padlen; 195 loff_t i; 196 char *s; 197 i = ctx->pos & ~PART_MASK; 198 199 /* The file offset from userspace is too large. */ 200 if (i > part->len) 201 return 1; 202 203 /* 204 * If the seek pointer is positioned just before an entry it 205 * should find the next entry. 206 */ 207 if (i % 8) 208 i = i + (8 - i%8)%8; 209 210 while (i < part->len) { 211 if (part->len < i + sizeof *len) 212 break; 213 len = (void *)part + offset + i; 214 /* 215 * len is the size of the string itself. padlen is the 216 * total size of the encoded string. 217 */ 218 padlen = (sizeof *len + *len + 1) + 219 (8 - (sizeof *len + *len + 1)%8)%8; 220 if (part->len < i + padlen + sizeof *khandle) 221 goto next; 222 s = (void *)part + offset + i + sizeof *len; 223 if (s[*len] != 0) 224 goto next; 225 khandle = (void *)part + offset + i + padlen; 226 if (!dir_emit(ctx, s, *len, 227 orangefs_khandle_to_ino(khandle), 228 DT_UNKNOWN)) 229 return 0; 230 i += padlen + sizeof *khandle; 231 i = i + (8 - i%8)%8; 232 BUG_ON(i > part->len); 233 ctx->pos = (ctx->pos & PART_MASK) | i; 234 continue; 235 next: 236 i += 8; 237 } 238 return 1; 239 } 240 241 static int orangefs_dir_fill(struct orangefs_dir *od, struct dir_context *ctx) 242 { 243 struct orangefs_dir_part *part; 244 size_t count; 245 246 count = ((ctx->pos & PART_MASK) >> PART_SHIFT) - 1; 247 248 part = od->part; 249 while (part->next && count) { 250 count--; 251 part = part->next; 252 } 253 /* This means the userspace file offset is invalid. */ 254 if (count) { 255 od->error = -EIO; 256 return -EIO; 257 } 258 259 while (part && part->len) { 260 int r; 261 r = fill_from_part(part, ctx); 262 if (r < 0) { 263 od->error = r; 264 return r; 265 } else if (r == 0) { 266 /* Userspace buffer is full. */ 267 break; 268 } else { 269 /* 270 * The part ran out of data. Move to the next 271 * part. */ 272 ctx->pos = (ctx->pos & PART_MASK) + 273 (1 << PART_SHIFT); 274 part = part->next; 275 } 276 } 277 return 0; 278 } 279 280 static loff_t orangefs_dir_llseek(struct file *file, loff_t offset, 281 int whence) 282 { 283 struct orangefs_dir *od = file->private_data; 284 /* 285 * Delete the stored data so userspace sees new directory 286 * entries. 287 */ 288 if (!whence && offset < od->end) { 289 struct orangefs_dir_part *part = od->part; 290 while (part) { 291 struct orangefs_dir_part *next = part->next; 292 vfree(part); 293 part = next; 294 } 295 od->token = ORANGEFS_ITERATE_START; 296 od->part = NULL; 297 od->end = 1 << PART_SHIFT; 298 } 299 return default_llseek(file, offset, whence); 300 } 301 302 static int orangefs_dir_iterate(struct file *file, 303 struct dir_context *ctx) 304 { 305 struct orangefs_dir *od = file->private_data; 306 struct inode *inode = file_inode(file); 307 int r; 308 309 if (od->error) 310 return od->error; 311 312 if (ctx->pos == 0) { 313 if (!dir_emit_dot(file, ctx)) 314 return 0; 315 ctx->pos++; 316 } 317 if (ctx->pos == 1) { 318 if (!dir_emit_dotdot(file, ctx)) 319 return 0; 320 ctx->pos = 1 << PART_SHIFT; 321 } 322 323 /* 324 * The seek position is in the first synthesized part but is not 325 * valid. 326 */ 327 if ((ctx->pos & PART_MASK) == 0) 328 return -EIO; 329 330 r = 0; 331 332 /* 333 * Must read more if the user has sought past what has been read 334 * so far. Stop a user who has sought past the end. 335 */ 336 while (od->token != ORANGEFS_ITERATE_END && 337 ctx->pos > od->end) { 338 r = orangefs_dir_more(od, inode); 339 if (r) 340 return r; 341 } 342 if (od->token == ORANGEFS_ITERATE_END && ctx->pos > od->end) 343 return -EIO; 344 345 /* Then try to fill if there's any left in the buffer. */ 346 if (ctx->pos < od->end) { 347 r = orangefs_dir_fill(od, ctx); 348 if (r) 349 return r; 350 } 351 352 /* Finally get some more and try to fill. */ 353 if (od->token != ORANGEFS_ITERATE_END) { 354 r = orangefs_dir_more(od, inode); 355 if (r) 356 return r; 357 r = orangefs_dir_fill(od, ctx); 358 } 359 360 return r; 361 } 362 363 static int orangefs_dir_open(struct inode *inode, struct file *file) 364 { 365 struct orangefs_dir *od; 366 file->private_data = kmalloc(sizeof(struct orangefs_dir), 367 GFP_KERNEL); 368 if (!file->private_data) 369 return -ENOMEM; 370 od = file->private_data; 371 od->token = ORANGEFS_ITERATE_START; 372 od->part = NULL; 373 od->end = 1 << PART_SHIFT; 374 od->error = 0; 375 return 0; 376 } 377 378 static int orangefs_dir_release(struct inode *inode, struct file *file) 379 { 380 struct orangefs_dir *od = file->private_data; 381 struct orangefs_dir_part *part = od->part; 382 while (part) { 383 struct orangefs_dir_part *next = part->next; 384 vfree(part); 385 part = next; 386 } 387 kfree(od); 388 return 0; 389 } 390 391 const struct file_operations orangefs_dir_operations = { 392 .llseek = orangefs_dir_llseek, 393 .read = generic_read_dir, 394 .iterate_shared = orangefs_dir_iterate, 395 .open = orangefs_dir_open, 396 .release = orangefs_dir_release, 397 .setlease = generic_setlease, 398 }; 399