1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright 2017 Omnibond Systems, L.L.C. 4 */ 5 6 #include "protocol.h" 7 #include "orangefs-kernel.h" 8 #include "orangefs-bufmap.h" 9 10 struct orangefs_dir_part { 11 struct orangefs_dir_part *next; 12 size_t len; 13 }; 14 15 struct orangefs_dir { 16 __u64 token; 17 struct orangefs_dir_part *part; 18 loff_t end; 19 int error; 20 }; 21 22 #define PART_SHIFT (24) 23 #define PART_SIZE (1<<24) 24 #define PART_MASK (~(PART_SIZE - 1)) 25 26 /* 27 * There can be up to 512 directory entries. Each entry is encoded as 28 * follows: 29 * 4 bytes: string size (n) 30 * n bytes: string 31 * 1 byte: trailing zero 32 * padding to 8 bytes 33 * 16 bytes: khandle 34 * padding to 8 bytes 35 * 36 * The trailer_buf starts with a struct orangefs_readdir_response_s 37 * which must be skipped to get to the directory data. 38 * 39 * The data which is received from the userspace daemon is termed a 40 * part and is stored in a linked list in case more than one part is 41 * needed for a large directory. 42 * 43 * The position pointer (ctx->pos) encodes the part and offset on which 44 * to begin reading at. Bits above PART_SHIFT encode the part and bits 45 * below PART_SHIFT encode the offset. Parts are stored in a linked 46 * list which grows as data is received from the server. The overhead 47 * associated with managing the list is presumed to be small compared to 48 * the overhead of communicating with the server. 49 * 50 * As data is received from the server, it is placed at the end of the 51 * part list. Data is parsed from the current position as it is needed. 52 * When data is determined to be corrupt, it is either because the 53 * userspace component has sent back corrupt data or because the file 54 * pointer has been moved to an invalid location. Since the two cannot 55 * be differentiated, return EIO. 56 * 57 * Part zero is synthesized to contains `.' and `..'. Part one is the 58 * first part of the part list. 59 */ 60 61 static int do_readdir(struct orangefs_dir *od, struct inode *inode, 62 struct orangefs_kernel_op_s *op) 63 { 64 struct orangefs_inode_s *oi = ORANGEFS_I(inode); 65 struct orangefs_readdir_response_s *resp; 66 int bufi, r; 67 68 /* 69 * Despite the badly named field, readdir does not use shared 70 * memory. However, there are a limited number of readdir 71 * slots, which must be allocated here. This flag simply tells 72 * the op scheduler to return the op here for retry. 73 */ 74 op->uses_shared_memory = 1; 75 op->upcall.req.readdir.refn = oi->refn; 76 op->upcall.req.readdir.token = od->token; 77 op->upcall.req.readdir.max_dirent_count = 78 ORANGEFS_MAX_DIRENT_COUNT_READDIR; 79 80 again: 81 bufi = orangefs_readdir_index_get(); 82 if (bufi < 0) { 83 od->error = bufi; 84 return bufi; 85 } 86 87 op->upcall.req.readdir.buf_index = bufi; 88 89 r = service_operation(op, "orangefs_readdir", 90 get_interruptible_flag(inode)); 91 92 orangefs_readdir_index_put(bufi); 93 94 if (op_state_purged(op)) { 95 if (r == -EAGAIN) { 96 vfree(op->downcall.trailer_buf); 97 goto again; 98 } else if (r == -EIO) { 99 vfree(op->downcall.trailer_buf); 100 od->error = r; 101 return r; 102 } 103 } 104 105 if (r < 0) { 106 vfree(op->downcall.trailer_buf); 107 od->error = r; 108 return r; 109 } else if (op->downcall.status) { 110 vfree(op->downcall.trailer_buf); 111 od->error = op->downcall.status; 112 return op->downcall.status; 113 } 114 115 /* 116 * The maximum size is size per entry times the 512 entries plus 117 * the header. This is well under the limit. 118 */ 119 if (op->downcall.trailer_size > PART_SIZE) { 120 vfree(op->downcall.trailer_buf); 121 od->error = -EIO; 122 return -EIO; 123 } 124 125 resp = (struct orangefs_readdir_response_s *) 126 op->downcall.trailer_buf; 127 od->token = resp->token; 128 return 0; 129 } 130 131 static int parse_readdir(struct orangefs_dir *od, 132 struct orangefs_kernel_op_s *op) 133 { 134 struct orangefs_dir_part *part, *new; 135 size_t count; 136 137 count = 1; 138 part = od->part; 139 while (part) { 140 count++; 141 if (part->next) 142 part = part->next; 143 else 144 break; 145 } 146 147 new = (void *)op->downcall.trailer_buf; 148 new->next = NULL; 149 new->len = op->downcall.trailer_size - 150 sizeof(struct orangefs_readdir_response_s); 151 if (!od->part) 152 od->part = new; 153 else 154 part->next = new; 155 count++; 156 od->end = count << PART_SHIFT; 157 158 return 0; 159 } 160 161 static int orangefs_dir_more(struct orangefs_dir *od, struct inode *inode) 162 { 163 struct orangefs_kernel_op_s *op; 164 int r; 165 166 op = op_alloc(ORANGEFS_VFS_OP_READDIR); 167 if (!op) { 168 od->error = -ENOMEM; 169 return -ENOMEM; 170 } 171 r = do_readdir(od, inode, op); 172 if (r) { 173 od->error = r; 174 goto out; 175 } 176 r = parse_readdir(od, op); 177 if (r) { 178 od->error = r; 179 goto out; 180 } 181 182 od->error = 0; 183 out: 184 op_release(op); 185 return od->error; 186 } 187 188 static int fill_from_part(struct orangefs_dir_part *part, 189 struct dir_context *ctx) 190 { 191 const int offset = sizeof(struct orangefs_readdir_response_s); 192 struct orangefs_khandle *khandle; 193 __u32 *len, padlen; 194 loff_t i; 195 char *s; 196 i = ctx->pos & ~PART_MASK; 197 198 /* The file offset from userspace is too large. */ 199 if (i > part->len) 200 return 1; 201 202 /* 203 * If the seek pointer is positioned just before an entry it 204 * should find the next entry. 205 */ 206 if (i % 8) 207 i = i + (8 - i%8)%8; 208 209 while (i < part->len) { 210 if (part->len < i + sizeof *len) 211 break; 212 len = (void *)part + offset + i; 213 /* 214 * len is the size of the string itself. padlen is the 215 * total size of the encoded string. 216 */ 217 padlen = (sizeof *len + *len + 1) + 218 (8 - (sizeof *len + *len + 1)%8)%8; 219 if (part->len < i + padlen + sizeof *khandle) 220 goto next; 221 s = (void *)part + offset + i + sizeof *len; 222 if (s[*len] != 0) 223 goto next; 224 khandle = (void *)part + offset + i + padlen; 225 if (!dir_emit(ctx, s, *len, 226 orangefs_khandle_to_ino(khandle), 227 DT_UNKNOWN)) 228 return 0; 229 i += padlen + sizeof *khandle; 230 i = i + (8 - i%8)%8; 231 BUG_ON(i > part->len); 232 ctx->pos = (ctx->pos & PART_MASK) | i; 233 continue; 234 next: 235 i += 8; 236 } 237 return 1; 238 } 239 240 static int orangefs_dir_fill(struct orangefs_dir *od, struct dir_context *ctx) 241 { 242 struct orangefs_dir_part *part; 243 size_t count; 244 245 count = ((ctx->pos & PART_MASK) >> PART_SHIFT) - 1; 246 247 part = od->part; 248 while (part->next && count) { 249 count--; 250 part = part->next; 251 } 252 /* This means the userspace file offset is invalid. */ 253 if (count) { 254 od->error = -EIO; 255 return -EIO; 256 } 257 258 while (part && part->len) { 259 int r; 260 r = fill_from_part(part, ctx); 261 if (r < 0) { 262 od->error = r; 263 return r; 264 } else if (r == 0) { 265 /* Userspace buffer is full. */ 266 break; 267 } else { 268 /* 269 * The part ran out of data. Move to the next 270 * part. */ 271 ctx->pos = (ctx->pos & PART_MASK) + 272 (1 << PART_SHIFT); 273 part = part->next; 274 } 275 } 276 return 0; 277 } 278 279 static loff_t orangefs_dir_llseek(struct file *file, loff_t offset, 280 int whence) 281 { 282 struct orangefs_dir *od = file->private_data; 283 /* 284 * Delete the stored data so userspace sees new directory 285 * entries. 286 */ 287 if (!whence && offset < od->end) { 288 struct orangefs_dir_part *part = od->part; 289 while (part) { 290 struct orangefs_dir_part *next = part->next; 291 vfree(part); 292 part = next; 293 } 294 od->token = ORANGEFS_ITERATE_START; 295 od->part = NULL; 296 od->end = 1 << PART_SHIFT; 297 } 298 return default_llseek(file, offset, whence); 299 } 300 301 static int orangefs_dir_iterate(struct file *file, 302 struct dir_context *ctx) 303 { 304 struct orangefs_dir *od = file->private_data; 305 struct inode *inode = file_inode(file); 306 int r; 307 308 if (od->error) 309 return od->error; 310 311 if (ctx->pos == 0) { 312 if (!dir_emit_dot(file, ctx)) 313 return 0; 314 ctx->pos++; 315 } 316 if (ctx->pos == 1) { 317 if (!dir_emit_dotdot(file, ctx)) 318 return 0; 319 ctx->pos = 1 << PART_SHIFT; 320 } 321 322 /* 323 * The seek position is in the first synthesized part but is not 324 * valid. 325 */ 326 if ((ctx->pos & PART_MASK) == 0) 327 return -EIO; 328 329 r = 0; 330 331 /* 332 * Must read more if the user has sought past what has been read 333 * so far. Stop a user who has sought past the end. 334 */ 335 while (od->token != ORANGEFS_ITERATE_END && 336 ctx->pos > od->end) { 337 r = orangefs_dir_more(od, inode); 338 if (r) 339 return r; 340 } 341 if (od->token == ORANGEFS_ITERATE_END && ctx->pos > od->end) 342 return -EIO; 343 344 /* Then try to fill if there's any left in the buffer. */ 345 if (ctx->pos < od->end) { 346 r = orangefs_dir_fill(od, ctx); 347 if (r) 348 return r; 349 } 350 351 /* Finally get some more and try to fill. */ 352 if (od->token != ORANGEFS_ITERATE_END) { 353 r = orangefs_dir_more(od, inode); 354 if (r) 355 return r; 356 r = orangefs_dir_fill(od, ctx); 357 } 358 359 return r; 360 } 361 362 static int orangefs_dir_open(struct inode *inode, struct file *file) 363 { 364 struct orangefs_dir *od; 365 file->private_data = kmalloc(sizeof(struct orangefs_dir), 366 GFP_KERNEL); 367 if (!file->private_data) 368 return -ENOMEM; 369 od = file->private_data; 370 od->token = ORANGEFS_ITERATE_START; 371 od->part = NULL; 372 od->end = 1 << PART_SHIFT; 373 od->error = 0; 374 return 0; 375 } 376 377 static int orangefs_dir_release(struct inode *inode, struct file *file) 378 { 379 struct orangefs_dir *od = file->private_data; 380 struct orangefs_dir_part *part = od->part; 381 while (part) { 382 struct orangefs_dir_part *next = part->next; 383 vfree(part); 384 part = next; 385 } 386 kfree(od); 387 return 0; 388 } 389 390 const struct file_operations orangefs_dir_operations = { 391 .llseek = orangefs_dir_llseek, 392 .read = generic_read_dir, 393 .iterate_shared = orangefs_dir_iterate, 394 .open = orangefs_dir_open, 395 .release = orangefs_dir_release 396 }; 397