1 /* 2 * 3 * Copyright (C) 2011 Novell Inc. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 as published by 7 * the Free Software Foundation. 8 */ 9 10 #include <linux/fs.h> 11 #include <linux/slab.h> 12 #include <linux/file.h> 13 #include <linux/splice.h> 14 #include <linux/xattr.h> 15 #include <linux/security.h> 16 #include <linux/uaccess.h> 17 #include <linux/sched.h> 18 #include <linux/namei.h> 19 #include "overlayfs.h" 20 21 #define OVL_COPY_UP_CHUNK_SIZE (1 << 20) 22 23 int ovl_copy_xattr(struct dentry *old, struct dentry *new) 24 { 25 ssize_t list_size, size; 26 char *buf, *name, *value; 27 int error; 28 29 if (!old->d_inode->i_op->getxattr || 30 !new->d_inode->i_op->getxattr) 31 return 0; 32 33 list_size = vfs_listxattr(old, NULL, 0); 34 if (list_size <= 0) { 35 if (list_size == -EOPNOTSUPP) 36 return 0; 37 return list_size; 38 } 39 40 buf = kzalloc(list_size, GFP_KERNEL); 41 if (!buf) 42 return -ENOMEM; 43 44 error = -ENOMEM; 45 value = kmalloc(XATTR_SIZE_MAX, GFP_KERNEL); 46 if (!value) 47 goto out; 48 49 list_size = vfs_listxattr(old, buf, list_size); 50 if (list_size <= 0) { 51 error = list_size; 52 goto out_free_value; 53 } 54 55 for (name = buf; name < (buf + list_size); name += strlen(name) + 1) { 56 size = vfs_getxattr(old, name, value, XATTR_SIZE_MAX); 57 if (size <= 0) { 58 error = size; 59 goto out_free_value; 60 } 61 error = vfs_setxattr(new, name, value, size, 0); 62 if (error) 63 goto out_free_value; 64 } 65 66 out_free_value: 67 kfree(value); 68 out: 69 kfree(buf); 70 return error; 71 } 72 73 static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) 74 { 75 struct file *old_file; 76 struct file *new_file; 77 loff_t old_pos = 0; 78 loff_t new_pos = 0; 79 int error = 0; 80 81 if (len == 0) 82 return 0; 83 84 old_file = ovl_path_open(old, O_RDONLY); 85 if (IS_ERR(old_file)) 86 return PTR_ERR(old_file); 87 88 new_file = ovl_path_open(new, O_WRONLY); 89 if (IS_ERR(new_file)) { 90 error = PTR_ERR(new_file); 91 goto out_fput; 92 } 93 94 /* FIXME: copy up sparse files efficiently */ 95 while (len) { 96 size_t this_len = OVL_COPY_UP_CHUNK_SIZE; 97 long bytes; 98 99 if (len < this_len) 100 this_len = len; 101 102 if (signal_pending_state(TASK_KILLABLE, current)) { 103 error = -EINTR; 104 break; 105 } 106 107 bytes = do_splice_direct(old_file, &old_pos, 108 new_file, &new_pos, 109 this_len, SPLICE_F_MOVE); 110 if (bytes <= 0) { 111 error = bytes; 112 break; 113 } 114 WARN_ON(old_pos != new_pos); 115 116 len -= bytes; 117 } 118 119 fput(new_file); 120 out_fput: 121 fput(old_file); 122 return error; 123 } 124 125 static char *ovl_read_symlink(struct dentry *realdentry) 126 { 127 int res; 128 char *buf; 129 struct inode *inode = realdentry->d_inode; 130 mm_segment_t old_fs; 131 132 res = -EINVAL; 133 if (!inode->i_op->readlink) 134 goto err; 135 136 res = -ENOMEM; 137 buf = (char *) __get_free_page(GFP_KERNEL); 138 if (!buf) 139 goto err; 140 141 old_fs = get_fs(); 142 set_fs(get_ds()); 143 /* The cast to a user pointer is valid due to the set_fs() */ 144 res = inode->i_op->readlink(realdentry, 145 (char __user *)buf, PAGE_SIZE - 1); 146 set_fs(old_fs); 147 if (res < 0) { 148 free_page((unsigned long) buf); 149 goto err; 150 } 151 buf[res] = '\0'; 152 153 return buf; 154 155 err: 156 return ERR_PTR(res); 157 } 158 159 static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat) 160 { 161 struct iattr attr = { 162 .ia_valid = 163 ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET, 164 .ia_atime = stat->atime, 165 .ia_mtime = stat->mtime, 166 }; 167 168 return notify_change(upperdentry, &attr, NULL); 169 } 170 171 int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat) 172 { 173 int err = 0; 174 175 if (!S_ISLNK(stat->mode)) { 176 struct iattr attr = { 177 .ia_valid = ATTR_MODE, 178 .ia_mode = stat->mode, 179 }; 180 err = notify_change(upperdentry, &attr, NULL); 181 } 182 if (!err) { 183 struct iattr attr = { 184 .ia_valid = ATTR_UID | ATTR_GID, 185 .ia_uid = stat->uid, 186 .ia_gid = stat->gid, 187 }; 188 err = notify_change(upperdentry, &attr, NULL); 189 } 190 if (!err) 191 ovl_set_timestamps(upperdentry, stat); 192 193 return err; 194 } 195 196 static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, 197 struct dentry *dentry, struct path *lowerpath, 198 struct kstat *stat, struct iattr *attr, 199 const char *link) 200 { 201 struct inode *wdir = workdir->d_inode; 202 struct inode *udir = upperdir->d_inode; 203 struct dentry *newdentry = NULL; 204 struct dentry *upper = NULL; 205 umode_t mode = stat->mode; 206 int err; 207 208 newdentry = ovl_lookup_temp(workdir, dentry); 209 err = PTR_ERR(newdentry); 210 if (IS_ERR(newdentry)) 211 goto out; 212 213 upper = lookup_one_len(dentry->d_name.name, upperdir, 214 dentry->d_name.len); 215 err = PTR_ERR(upper); 216 if (IS_ERR(upper)) 217 goto out1; 218 219 /* Can't properly set mode on creation because of the umask */ 220 stat->mode &= S_IFMT; 221 err = ovl_create_real(wdir, newdentry, stat, link, NULL, true); 222 stat->mode = mode; 223 if (err) 224 goto out2; 225 226 if (S_ISREG(stat->mode)) { 227 struct path upperpath; 228 ovl_path_upper(dentry, &upperpath); 229 BUG_ON(upperpath.dentry != NULL); 230 upperpath.dentry = newdentry; 231 232 err = ovl_copy_up_data(lowerpath, &upperpath, stat->size); 233 if (err) 234 goto out_cleanup; 235 } 236 237 err = ovl_copy_xattr(lowerpath->dentry, newdentry); 238 if (err) 239 goto out_cleanup; 240 241 mutex_lock(&newdentry->d_inode->i_mutex); 242 err = ovl_set_attr(newdentry, stat); 243 if (!err && attr) 244 err = notify_change(newdentry, attr, NULL); 245 mutex_unlock(&newdentry->d_inode->i_mutex); 246 if (err) 247 goto out_cleanup; 248 249 err = ovl_do_rename(wdir, newdentry, udir, upper, 0); 250 if (err) 251 goto out_cleanup; 252 253 ovl_dentry_update(dentry, newdentry); 254 newdentry = NULL; 255 256 /* 257 * Non-directores become opaque when copied up. 258 */ 259 if (!S_ISDIR(stat->mode)) 260 ovl_dentry_set_opaque(dentry, true); 261 out2: 262 dput(upper); 263 out1: 264 dput(newdentry); 265 out: 266 return err; 267 268 out_cleanup: 269 ovl_cleanup(wdir, newdentry); 270 goto out; 271 } 272 273 /* 274 * Copy up a single dentry 275 * 276 * Directory renames only allowed on "pure upper" (already created on 277 * upper filesystem, never copied up). Directories which are on lower or 278 * are merged may not be renamed. For these -EXDEV is returned and 279 * userspace has to deal with it. This means, when copying up a 280 * directory we can rely on it and ancestors being stable. 281 * 282 * Non-directory renames start with copy up of source if necessary. The 283 * actual rename will only proceed once the copy up was successful. Copy 284 * up uses upper parent i_mutex for exclusion. Since rename can change 285 * d_parent it is possible that the copy up will lock the old parent. At 286 * that point the file will have already been copied up anyway. 287 */ 288 int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, 289 struct path *lowerpath, struct kstat *stat, 290 struct iattr *attr) 291 { 292 struct dentry *workdir = ovl_workdir(dentry); 293 int err; 294 struct kstat pstat; 295 struct path parentpath; 296 struct dentry *upperdir; 297 struct dentry *upperdentry; 298 const struct cred *old_cred; 299 struct cred *override_cred; 300 char *link = NULL; 301 302 if (WARN_ON(!workdir)) 303 return -EROFS; 304 305 ovl_path_upper(parent, &parentpath); 306 upperdir = parentpath.dentry; 307 308 err = vfs_getattr(&parentpath, &pstat); 309 if (err) 310 return err; 311 312 if (S_ISLNK(stat->mode)) { 313 link = ovl_read_symlink(lowerpath->dentry); 314 if (IS_ERR(link)) 315 return PTR_ERR(link); 316 } 317 318 err = -ENOMEM; 319 override_cred = prepare_creds(); 320 if (!override_cred) 321 goto out_free_link; 322 323 override_cred->fsuid = stat->uid; 324 override_cred->fsgid = stat->gid; 325 /* 326 * CAP_SYS_ADMIN for copying up extended attributes 327 * CAP_DAC_OVERRIDE for create 328 * CAP_FOWNER for chmod, timestamp update 329 * CAP_FSETID for chmod 330 * CAP_CHOWN for chown 331 * CAP_MKNOD for mknod 332 */ 333 cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); 334 cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); 335 cap_raise(override_cred->cap_effective, CAP_FOWNER); 336 cap_raise(override_cred->cap_effective, CAP_FSETID); 337 cap_raise(override_cred->cap_effective, CAP_CHOWN); 338 cap_raise(override_cred->cap_effective, CAP_MKNOD); 339 old_cred = override_creds(override_cred); 340 341 err = -EIO; 342 if (lock_rename(workdir, upperdir) != NULL) { 343 pr_err("overlayfs: failed to lock workdir+upperdir\n"); 344 goto out_unlock; 345 } 346 upperdentry = ovl_dentry_upper(dentry); 347 if (upperdentry) { 348 unlock_rename(workdir, upperdir); 349 err = 0; 350 /* Raced with another copy-up? Do the setattr here */ 351 if (attr) { 352 mutex_lock(&upperdentry->d_inode->i_mutex); 353 err = notify_change(upperdentry, attr, NULL); 354 mutex_unlock(&upperdentry->d_inode->i_mutex); 355 } 356 goto out_put_cred; 357 } 358 359 err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath, 360 stat, attr, link); 361 if (!err) { 362 /* Restore timestamps on parent (best effort) */ 363 ovl_set_timestamps(upperdir, &pstat); 364 } 365 out_unlock: 366 unlock_rename(workdir, upperdir); 367 out_put_cred: 368 revert_creds(old_cred); 369 put_cred(override_cred); 370 371 out_free_link: 372 if (link) 373 free_page((unsigned long) link); 374 375 return err; 376 } 377 378 int ovl_copy_up(struct dentry *dentry) 379 { 380 int err; 381 382 err = 0; 383 while (!err) { 384 struct dentry *next; 385 struct dentry *parent; 386 struct path lowerpath; 387 struct kstat stat; 388 enum ovl_path_type type = ovl_path_type(dentry); 389 390 if (OVL_TYPE_UPPER(type)) 391 break; 392 393 next = dget(dentry); 394 /* find the topmost dentry not yet copied up */ 395 for (;;) { 396 parent = dget_parent(next); 397 398 type = ovl_path_type(parent); 399 if (OVL_TYPE_UPPER(type)) 400 break; 401 402 dput(next); 403 next = parent; 404 } 405 406 ovl_path_lower(next, &lowerpath); 407 err = vfs_getattr(&lowerpath, &stat); 408 if (!err) 409 err = ovl_copy_up_one(parent, next, &lowerpath, &stat, NULL); 410 411 dput(parent); 412 dput(next); 413 } 414 415 return err; 416 } 417