1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * (C) 2001 Clemson University and The University of Chicago 4 * Copyright 2018 Omnibond Systems, L.L.C. 5 * 6 * See COPYING in top-level directory. 7 */ 8 #include <linux/kernel.h> 9 #include "protocol.h" 10 #include "orangefs-kernel.h" 11 #include "orangefs-dev-proto.h" 12 #include "orangefs-bufmap.h" 13 14 __s32 fsid_of_op(struct orangefs_kernel_op_s *op) 15 { 16 __s32 fsid = ORANGEFS_FS_ID_NULL; 17 18 if (op) { 19 switch (op->upcall.type) { 20 case ORANGEFS_VFS_OP_FILE_IO: 21 fsid = op->upcall.req.io.refn.fs_id; 22 break; 23 case ORANGEFS_VFS_OP_LOOKUP: 24 fsid = op->upcall.req.lookup.parent_refn.fs_id; 25 break; 26 case ORANGEFS_VFS_OP_CREATE: 27 fsid = op->upcall.req.create.parent_refn.fs_id; 28 break; 29 case ORANGEFS_VFS_OP_GETATTR: 30 fsid = op->upcall.req.getattr.refn.fs_id; 31 break; 32 case ORANGEFS_VFS_OP_REMOVE: 33 fsid = op->upcall.req.remove.parent_refn.fs_id; 34 break; 35 case ORANGEFS_VFS_OP_MKDIR: 36 fsid = op->upcall.req.mkdir.parent_refn.fs_id; 37 break; 38 case ORANGEFS_VFS_OP_READDIR: 39 fsid = op->upcall.req.readdir.refn.fs_id; 40 break; 41 case ORANGEFS_VFS_OP_SETATTR: 42 fsid = op->upcall.req.setattr.refn.fs_id; 43 break; 44 case ORANGEFS_VFS_OP_SYMLINK: 45 fsid = op->upcall.req.sym.parent_refn.fs_id; 46 break; 47 case ORANGEFS_VFS_OP_RENAME: 48 fsid = op->upcall.req.rename.old_parent_refn.fs_id; 49 break; 50 case ORANGEFS_VFS_OP_STATFS: 51 fsid = op->upcall.req.statfs.fs_id; 52 break; 53 case ORANGEFS_VFS_OP_TRUNCATE: 54 fsid = op->upcall.req.truncate.refn.fs_id; 55 break; 56 case ORANGEFS_VFS_OP_RA_FLUSH: 57 fsid = op->upcall.req.ra_cache_flush.refn.fs_id; 58 break; 59 case ORANGEFS_VFS_OP_FS_UMOUNT: 60 fsid = op->upcall.req.fs_umount.fs_id; 61 break; 62 case ORANGEFS_VFS_OP_GETXATTR: 63 fsid = op->upcall.req.getxattr.refn.fs_id; 64 break; 65 case ORANGEFS_VFS_OP_SETXATTR: 66 fsid = op->upcall.req.setxattr.refn.fs_id; 67 break; 68 case ORANGEFS_VFS_OP_LISTXATTR: 69 fsid = op->upcall.req.listxattr.refn.fs_id; 70 break; 71 case ORANGEFS_VFS_OP_REMOVEXATTR: 72 fsid = op->upcall.req.removexattr.refn.fs_id; 73 break; 74 case ORANGEFS_VFS_OP_FSYNC: 75 fsid = op->upcall.req.fsync.refn.fs_id; 76 break; 77 default: 78 break; 79 } 80 } 81 return fsid; 82 } 83 84 static int orangefs_inode_flags(struct ORANGEFS_sys_attr_s *attrs) 85 { 86 int flags = 0; 87 if (attrs->flags & ORANGEFS_IMMUTABLE_FL) 88 flags |= S_IMMUTABLE; 89 else 90 flags &= ~S_IMMUTABLE; 91 if (attrs->flags & ORANGEFS_APPEND_FL) 92 flags |= S_APPEND; 93 else 94 flags &= ~S_APPEND; 95 if (attrs->flags & ORANGEFS_NOATIME_FL) 96 flags |= S_NOATIME; 97 else 98 flags &= ~S_NOATIME; 99 return flags; 100 } 101 102 static int orangefs_inode_perms(struct ORANGEFS_sys_attr_s *attrs) 103 { 104 int perm_mode = 0; 105 106 if (attrs->perms & ORANGEFS_O_EXECUTE) 107 perm_mode |= S_IXOTH; 108 if (attrs->perms & ORANGEFS_O_WRITE) 109 perm_mode |= S_IWOTH; 110 if (attrs->perms & ORANGEFS_O_READ) 111 perm_mode |= S_IROTH; 112 113 if (attrs->perms & ORANGEFS_G_EXECUTE) 114 perm_mode |= S_IXGRP; 115 if (attrs->perms & ORANGEFS_G_WRITE) 116 perm_mode |= S_IWGRP; 117 if (attrs->perms & ORANGEFS_G_READ) 118 perm_mode |= S_IRGRP; 119 120 if (attrs->perms & ORANGEFS_U_EXECUTE) 121 perm_mode |= S_IXUSR; 122 if (attrs->perms & ORANGEFS_U_WRITE) 123 perm_mode |= S_IWUSR; 124 if (attrs->perms & ORANGEFS_U_READ) 125 perm_mode |= S_IRUSR; 126 127 if (attrs->perms & ORANGEFS_G_SGID) 128 perm_mode |= S_ISGID; 129 if (attrs->perms & ORANGEFS_U_SUID) 130 perm_mode |= S_ISUID; 131 132 return perm_mode; 133 } 134 135 /* 136 * NOTE: in kernel land, we never use the sys_attr->link_target for 137 * anything, so don't bother copying it into the sys_attr object here. 138 */ 139 static inline void copy_attributes_from_inode(struct inode *inode, 140 struct ORANGEFS_sys_attr_s *attrs) 141 { 142 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); 143 attrs->mask = 0; 144 if (orangefs_inode->attr_valid & ATTR_UID) { 145 attrs->owner = from_kuid(&init_user_ns, inode->i_uid); 146 attrs->mask |= ORANGEFS_ATTR_SYS_UID; 147 gossip_debug(GOSSIP_UTILS_DEBUG, "(UID) %d\n", attrs->owner); 148 } 149 if (orangefs_inode->attr_valid & ATTR_GID) { 150 attrs->group = from_kgid(&init_user_ns, inode->i_gid); 151 attrs->mask |= ORANGEFS_ATTR_SYS_GID; 152 gossip_debug(GOSSIP_UTILS_DEBUG, "(GID) %d\n", attrs->group); 153 } 154 155 if (orangefs_inode->attr_valid & ATTR_ATIME) { 156 attrs->mask |= ORANGEFS_ATTR_SYS_ATIME; 157 if (orangefs_inode->attr_valid & ATTR_ATIME_SET) { 158 attrs->atime = (time64_t)inode->i_atime.tv_sec; 159 attrs->mask |= ORANGEFS_ATTR_SYS_ATIME_SET; 160 } 161 } 162 if (orangefs_inode->attr_valid & ATTR_MTIME) { 163 attrs->mask |= ORANGEFS_ATTR_SYS_MTIME; 164 if (orangefs_inode->attr_valid & ATTR_MTIME_SET) { 165 attrs->mtime = (time64_t)inode->i_mtime.tv_sec; 166 attrs->mask |= ORANGEFS_ATTR_SYS_MTIME_SET; 167 } 168 } 169 if (orangefs_inode->attr_valid & ATTR_CTIME) 170 attrs->mask |= ORANGEFS_ATTR_SYS_CTIME; 171 172 /* 173 * ORANGEFS cannot set size with a setattr operation. Probably not 174 * likely to be requested through the VFS, but just in case, don't 175 * worry about ATTR_SIZE 176 */ 177 178 if (orangefs_inode->attr_valid & ATTR_MODE) { 179 attrs->perms = ORANGEFS_util_translate_mode(inode->i_mode); 180 attrs->mask |= ORANGEFS_ATTR_SYS_PERM; 181 } 182 } 183 184 static int orangefs_inode_type(enum orangefs_ds_type objtype) 185 { 186 if (objtype == ORANGEFS_TYPE_METAFILE) 187 return S_IFREG; 188 else if (objtype == ORANGEFS_TYPE_DIRECTORY) 189 return S_IFDIR; 190 else if (objtype == ORANGEFS_TYPE_SYMLINK) 191 return S_IFLNK; 192 else 193 return -1; 194 } 195 196 static void orangefs_make_bad_inode(struct inode *inode) 197 { 198 if (is_root_handle(inode)) { 199 /* 200 * if this occurs, the pvfs2-client-core was killed but we 201 * can't afford to lose the inode operations and such 202 * associated with the root handle in any case. 203 */ 204 gossip_debug(GOSSIP_UTILS_DEBUG, 205 "*** NOT making bad root inode %pU\n", 206 get_khandle_from_ino(inode)); 207 } else { 208 gossip_debug(GOSSIP_UTILS_DEBUG, 209 "*** making bad inode %pU\n", 210 get_khandle_from_ino(inode)); 211 make_bad_inode(inode); 212 } 213 } 214 215 static int orangefs_inode_is_stale(struct inode *inode, 216 struct ORANGEFS_sys_attr_s *attrs, char *link_target) 217 { 218 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); 219 int type = orangefs_inode_type(attrs->objtype); 220 /* 221 * If the inode type or symlink target have changed then this 222 * inode is stale. 223 */ 224 if (type == -1 || !(inode->i_mode & type)) { 225 orangefs_make_bad_inode(inode); 226 return 1; 227 } 228 if (type == S_IFLNK && strncmp(orangefs_inode->link_target, 229 link_target, ORANGEFS_NAME_MAX)) { 230 orangefs_make_bad_inode(inode); 231 return 1; 232 } 233 return 0; 234 } 235 236 int orangefs_inode_getattr(struct inode *inode, int flags) 237 { 238 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); 239 struct orangefs_kernel_op_s *new_op; 240 loff_t inode_size; 241 int ret, type; 242 243 gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU flags %d\n", 244 __func__, get_khandle_from_ino(inode), flags); 245 246 again: 247 spin_lock(&inode->i_lock); 248 /* Must have all the attributes in the mask and be within cache time. */ 249 if ((!flags && time_before(jiffies, orangefs_inode->getattr_time)) || 250 orangefs_inode->attr_valid || inode->i_state & I_DIRTY_PAGES) { 251 if (orangefs_inode->attr_valid) { 252 spin_unlock(&inode->i_lock); 253 write_inode_now(inode, 1); 254 goto again; 255 } 256 spin_unlock(&inode->i_lock); 257 return 0; 258 } 259 spin_unlock(&inode->i_lock); 260 261 new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR); 262 if (!new_op) 263 return -ENOMEM; 264 new_op->upcall.req.getattr.refn = orangefs_inode->refn; 265 /* 266 * Size is the hardest attribute to get. The incremental cost of any 267 * other attribute is essentially zero. 268 */ 269 if (flags) 270 new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_ALL_NOHINT; 271 else 272 new_op->upcall.req.getattr.mask = 273 ORANGEFS_ATTR_SYS_ALL_NOHINT & ~ORANGEFS_ATTR_SYS_SIZE; 274 275 ret = service_operation(new_op, __func__, 276 get_interruptible_flag(inode)); 277 if (ret != 0) 278 goto out; 279 280 again2: 281 spin_lock(&inode->i_lock); 282 /* Must have all the attributes in the mask and be within cache time. */ 283 if ((!flags && time_before(jiffies, orangefs_inode->getattr_time)) || 284 orangefs_inode->attr_valid || inode->i_state & I_DIRTY_PAGES) { 285 if (orangefs_inode->attr_valid) { 286 spin_unlock(&inode->i_lock); 287 write_inode_now(inode, 1); 288 goto again2; 289 } 290 if (inode->i_state & I_DIRTY_PAGES) { 291 ret = 0; 292 goto out_unlock; 293 } 294 gossip_debug(GOSSIP_UTILS_DEBUG, "%s: in cache or dirty\n", 295 __func__); 296 ret = 0; 297 goto out_unlock; 298 } 299 300 if (!(flags & ORANGEFS_GETATTR_NEW)) { 301 ret = orangefs_inode_is_stale(inode, 302 &new_op->downcall.resp.getattr.attributes, 303 new_op->downcall.resp.getattr.link_target); 304 if (ret) { 305 ret = -ESTALE; 306 goto out_unlock; 307 } 308 } 309 310 type = orangefs_inode_type(new_op-> 311 downcall.resp.getattr.attributes.objtype); 312 switch (type) { 313 case S_IFREG: 314 inode->i_flags = orangefs_inode_flags(&new_op-> 315 downcall.resp.getattr.attributes); 316 if (flags) { 317 inode_size = (loff_t)new_op-> 318 downcall.resp.getattr.attributes.size; 319 inode->i_size = inode_size; 320 inode->i_blkbits = ffs(new_op->downcall.resp.getattr. 321 attributes.blksize); 322 inode->i_bytes = inode_size; 323 inode->i_blocks = 324 (inode_size + 512 - inode_size % 512)/512; 325 } 326 break; 327 case S_IFDIR: 328 if (flags) { 329 inode->i_size = PAGE_SIZE; 330 inode_set_bytes(inode, inode->i_size); 331 } 332 set_nlink(inode, 1); 333 break; 334 case S_IFLNK: 335 if (flags & ORANGEFS_GETATTR_NEW) { 336 inode->i_size = (loff_t)strlen(new_op-> 337 downcall.resp.getattr.link_target); 338 ret = strscpy(orangefs_inode->link_target, 339 new_op->downcall.resp.getattr.link_target, 340 ORANGEFS_NAME_MAX); 341 if (ret == -E2BIG) { 342 ret = -EIO; 343 goto out_unlock; 344 } 345 inode->i_link = orangefs_inode->link_target; 346 } 347 break; 348 /* i.e. -1 */ 349 default: 350 /* XXX: ESTALE? This is what is done if it is not new. */ 351 orangefs_make_bad_inode(inode); 352 ret = -ESTALE; 353 goto out_unlock; 354 } 355 356 inode->i_uid = make_kuid(&init_user_ns, new_op-> 357 downcall.resp.getattr.attributes.owner); 358 inode->i_gid = make_kgid(&init_user_ns, new_op-> 359 downcall.resp.getattr.attributes.group); 360 inode->i_atime.tv_sec = (time64_t)new_op-> 361 downcall.resp.getattr.attributes.atime; 362 inode->i_mtime.tv_sec = (time64_t)new_op-> 363 downcall.resp.getattr.attributes.mtime; 364 inode->i_ctime.tv_sec = (time64_t)new_op-> 365 downcall.resp.getattr.attributes.ctime; 366 inode->i_atime.tv_nsec = 0; 367 inode->i_mtime.tv_nsec = 0; 368 inode->i_ctime.tv_nsec = 0; 369 370 /* special case: mark the root inode as sticky */ 371 inode->i_mode = type | (is_root_handle(inode) ? S_ISVTX : 0) | 372 orangefs_inode_perms(&new_op->downcall.resp.getattr.attributes); 373 374 orangefs_inode->getattr_time = jiffies + 375 orangefs_getattr_timeout_msecs*HZ/1000; 376 ret = 0; 377 out_unlock: 378 spin_unlock(&inode->i_lock); 379 out: 380 op_release(new_op); 381 return ret; 382 } 383 384 int orangefs_inode_check_changed(struct inode *inode) 385 { 386 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); 387 struct orangefs_kernel_op_s *new_op; 388 int ret; 389 390 gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU\n", __func__, 391 get_khandle_from_ino(inode)); 392 393 new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR); 394 if (!new_op) 395 return -ENOMEM; 396 new_op->upcall.req.getattr.refn = orangefs_inode->refn; 397 new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_TYPE | 398 ORANGEFS_ATTR_SYS_LNK_TARGET; 399 400 ret = service_operation(new_op, __func__, 401 get_interruptible_flag(inode)); 402 if (ret != 0) 403 goto out; 404 405 ret = orangefs_inode_is_stale(inode, 406 &new_op->downcall.resp.getattr.attributes, 407 new_op->downcall.resp.getattr.link_target); 408 out: 409 op_release(new_op); 410 return ret; 411 } 412 413 /* 414 * issues a orangefs setattr request to make sure the new attribute values 415 * take effect if successful. returns 0 on success; -errno otherwise 416 */ 417 int orangefs_inode_setattr(struct inode *inode) 418 { 419 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); 420 struct orangefs_kernel_op_s *new_op; 421 int ret; 422 423 new_op = op_alloc(ORANGEFS_VFS_OP_SETATTR); 424 if (!new_op) 425 return -ENOMEM; 426 427 spin_lock(&inode->i_lock); 428 new_op->upcall.uid = from_kuid(&init_user_ns, orangefs_inode->attr_uid); 429 new_op->upcall.gid = from_kgid(&init_user_ns, orangefs_inode->attr_gid); 430 new_op->upcall.req.setattr.refn = orangefs_inode->refn; 431 copy_attributes_from_inode(inode, 432 &new_op->upcall.req.setattr.attributes); 433 orangefs_inode->attr_valid = 0; 434 if (!new_op->upcall.req.setattr.attributes.mask) { 435 spin_unlock(&inode->i_lock); 436 op_release(new_op); 437 return 0; 438 } 439 spin_unlock(&inode->i_lock); 440 441 ret = service_operation(new_op, __func__, 442 get_interruptible_flag(inode) | ORANGEFS_OP_WRITEBACK); 443 gossip_debug(GOSSIP_UTILS_DEBUG, 444 "orangefs_inode_setattr: returning %d\n", ret); 445 if (ret) 446 orangefs_make_bad_inode(inode); 447 448 op_release(new_op); 449 450 if (ret == 0) 451 orangefs_inode->getattr_time = jiffies - 1; 452 return ret; 453 } 454 455 /* 456 * The following is a very dirty hack that is now a permanent part of the 457 * ORANGEFS protocol. See protocol.h for more error definitions. 458 */ 459 460 /* The order matches include/orangefs-types.h in the OrangeFS source. */ 461 static int PINT_errno_mapping[] = { 462 0, EPERM, ENOENT, EINTR, EIO, ENXIO, EBADF, EAGAIN, ENOMEM, 463 EFAULT, EBUSY, EEXIST, ENODEV, ENOTDIR, EISDIR, EINVAL, EMFILE, 464 EFBIG, ENOSPC, EROFS, EMLINK, EPIPE, EDEADLK, ENAMETOOLONG, 465 ENOLCK, ENOSYS, ENOTEMPTY, ELOOP, EWOULDBLOCK, ENOMSG, EUNATCH, 466 EBADR, EDEADLOCK, ENODATA, ETIME, ENONET, EREMOTE, ECOMM, 467 EPROTO, EBADMSG, EOVERFLOW, ERESTART, EMSGSIZE, EPROTOTYPE, 468 ENOPROTOOPT, EPROTONOSUPPORT, EOPNOTSUPP, EADDRINUSE, 469 EADDRNOTAVAIL, ENETDOWN, ENETUNREACH, ENETRESET, ENOBUFS, 470 ETIMEDOUT, ECONNREFUSED, EHOSTDOWN, EHOSTUNREACH, EALREADY, 471 EACCES, ECONNRESET, ERANGE 472 }; 473 474 int orangefs_normalize_to_errno(__s32 error_code) 475 { 476 __u32 i; 477 478 /* Success */ 479 if (error_code == 0) { 480 return 0; 481 /* 482 * This shouldn't ever happen. If it does it should be fixed on the 483 * server. 484 */ 485 } else if (error_code > 0) { 486 gossip_err("orangefs: error status received.\n"); 487 gossip_err("orangefs: assuming error code is inverted.\n"); 488 error_code = -error_code; 489 } 490 491 /* 492 * XXX: This is very bad since error codes from ORANGEFS may not be 493 * suitable for return into userspace. 494 */ 495 496 /* 497 * Convert ORANGEFS error values into errno values suitable for return 498 * from the kernel. 499 */ 500 if ((-error_code) & ORANGEFS_NON_ERRNO_ERROR_BIT) { 501 if (((-error_code) & 502 (ORANGEFS_ERROR_NUMBER_BITS|ORANGEFS_NON_ERRNO_ERROR_BIT| 503 ORANGEFS_ERROR_BIT)) == ORANGEFS_ECANCEL) { 504 /* 505 * cancellation error codes generally correspond to 506 * a timeout from the client's perspective 507 */ 508 error_code = -ETIMEDOUT; 509 } else { 510 /* assume a default error code */ 511 gossip_err("%s: bad error code :%d:.\n", 512 __func__, 513 error_code); 514 error_code = -EINVAL; 515 } 516 517 /* Convert ORANGEFS encoded errno values into regular errno values. */ 518 } else if ((-error_code) & ORANGEFS_ERROR_BIT) { 519 i = (-error_code) & ~(ORANGEFS_ERROR_BIT|ORANGEFS_ERROR_CLASS_BITS); 520 if (i < ARRAY_SIZE(PINT_errno_mapping)) 521 error_code = -PINT_errno_mapping[i]; 522 else 523 error_code = -EINVAL; 524 525 /* 526 * Only ORANGEFS protocol error codes should ever come here. Otherwise 527 * there is a bug somewhere. 528 */ 529 } else { 530 gossip_err("%s: unknown error code.\n", __func__); 531 error_code = -EINVAL; 532 } 533 return error_code; 534 } 535 536 #define NUM_MODES 11 537 __s32 ORANGEFS_util_translate_mode(int mode) 538 { 539 int ret = 0; 540 int i = 0; 541 static int modes[NUM_MODES] = { 542 S_IXOTH, S_IWOTH, S_IROTH, 543 S_IXGRP, S_IWGRP, S_IRGRP, 544 S_IXUSR, S_IWUSR, S_IRUSR, 545 S_ISGID, S_ISUID 546 }; 547 static int orangefs_modes[NUM_MODES] = { 548 ORANGEFS_O_EXECUTE, ORANGEFS_O_WRITE, ORANGEFS_O_READ, 549 ORANGEFS_G_EXECUTE, ORANGEFS_G_WRITE, ORANGEFS_G_READ, 550 ORANGEFS_U_EXECUTE, ORANGEFS_U_WRITE, ORANGEFS_U_READ, 551 ORANGEFS_G_SGID, ORANGEFS_U_SUID 552 }; 553 554 for (i = 0; i < NUM_MODES; i++) 555 if (mode & modes[i]) 556 ret |= orangefs_modes[i]; 557 558 return ret; 559 } 560 #undef NUM_MODES 561