1 #include "ceph_debug.h" 2 3 #include <linux/file.h> 4 #include <linux/namei.h> 5 6 #include "super.h" 7 #include "mds_client.h" 8 #include "pagelist.h" 9 10 /** 11 * Implement fcntl and flock locking functions. 12 */ 13 static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, 14 u64 pid, u64 pid_ns, 15 int cmd, u64 start, u64 length, u8 wait) 16 { 17 struct inode *inode = file->f_dentry->d_inode; 18 struct ceph_mds_client *mdsc = 19 &ceph_sb_to_client(inode->i_sb)->mdsc; 20 struct ceph_mds_request *req; 21 int err; 22 23 req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS); 24 if (IS_ERR(req)) 25 return PTR_ERR(req); 26 req->r_inode = igrab(inode); 27 28 dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " 29 "length: %llu, wait: %d, type`: %d", (int)lock_type, 30 (int)operation, pid, start, length, wait, cmd); 31 32 req->r_args.filelock_change.rule = lock_type; 33 req->r_args.filelock_change.type = cmd; 34 req->r_args.filelock_change.pid = cpu_to_le64(pid); 35 /* This should be adjusted, but I'm not sure if 36 namespaces actually get id numbers*/ 37 req->r_args.filelock_change.pid_namespace = 38 cpu_to_le64((u64)pid_ns); 39 req->r_args.filelock_change.start = cpu_to_le64(start); 40 req->r_args.filelock_change.length = cpu_to_le64(length); 41 req->r_args.filelock_change.wait = wait; 42 43 err = ceph_mdsc_do_request(mdsc, inode, req); 44 ceph_mdsc_put_request(req); 45 dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " 46 "length: %llu, wait: %d, type`: %d err code %d", (int)lock_type, 47 (int)operation, pid, start, length, wait, cmd, err); 48 return err; 49 } 50 51 /** 52 * Attempt to set an fcntl lock. 53 * For now, this just goes away to the server. Later it may be more awesome. 54 */ 55 int ceph_lock(struct file *file, int cmd, struct file_lock *fl) 56 { 57 u64 length; 58 u8 lock_cmd; 59 int err; 60 u8 wait = 0; 61 u16 op = CEPH_MDS_OP_SETFILELOCK; 62 63 fl->fl_nspid = get_pid(task_tgid(current)); 64 dout("ceph_lock, fl_pid:%d", fl->fl_pid); 65 66 /* set wait bit as appropriate, then make command as Ceph expects it*/ 67 if (F_SETLKW == cmd) 68 wait = 1; 69 if (F_GETLK == cmd) 70 op = CEPH_MDS_OP_GETFILELOCK; 71 72 if (F_RDLCK == fl->fl_type) 73 lock_cmd = CEPH_LOCK_SHARED; 74 else if (F_WRLCK == fl->fl_type) 75 lock_cmd = CEPH_LOCK_EXCL; 76 else 77 lock_cmd = CEPH_LOCK_UNLOCK; 78 79 if (LLONG_MAX == fl->fl_end) 80 length = 0; 81 else 82 length = fl->fl_end - fl->fl_start + 1; 83 84 err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, 85 (u64)fl->fl_pid, (u64)fl->fl_nspid, 86 lock_cmd, fl->fl_start, 87 length, wait); 88 if (!err) { 89 dout("mds locked, locking locally"); 90 err = posix_lock_file(file, fl, NULL); 91 if (err && (CEPH_MDS_OP_SETFILELOCK == op)) { 92 /* undo! This should only happen if the kernel detects 93 * local deadlock. */ 94 ceph_lock_message(CEPH_LOCK_FCNTL, op, file, 95 (u64)fl->fl_pid, (u64)fl->fl_nspid, 96 CEPH_LOCK_UNLOCK, fl->fl_start, 97 length, 0); 98 dout("got %d on posix_lock_file, undid lock", err); 99 } 100 } else { 101 dout("mds returned error code %d", err); 102 } 103 return err; 104 } 105 106 int ceph_flock(struct file *file, int cmd, struct file_lock *fl) 107 { 108 u64 length; 109 u8 lock_cmd; 110 int err; 111 u8 wait = 1; 112 113 fl->fl_nspid = get_pid(task_tgid(current)); 114 dout("ceph_flock, fl_pid:%d", fl->fl_pid); 115 116 /* set wait bit, then clear it out of cmd*/ 117 if (cmd & LOCK_NB) 118 wait = 0; 119 cmd = cmd & (LOCK_SH | LOCK_EX | LOCK_UN); 120 /* set command sequence that Ceph wants to see: 121 shared lock, exclusive lock, or unlock */ 122 if (LOCK_SH == cmd) 123 lock_cmd = CEPH_LOCK_SHARED; 124 else if (LOCK_EX == cmd) 125 lock_cmd = CEPH_LOCK_EXCL; 126 else 127 lock_cmd = CEPH_LOCK_UNLOCK; 128 /* mds requires start and length rather than start and end */ 129 if (LLONG_MAX == fl->fl_end) 130 length = 0; 131 else 132 length = fl->fl_end - fl->fl_start + 1; 133 134 err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, 135 file, (u64)fl->fl_pid, (u64)fl->fl_nspid, 136 lock_cmd, fl->fl_start, 137 length, wait); 138 if (!err) { 139 err = flock_lock_file_wait(file, fl); 140 if (err) { 141 ceph_lock_message(CEPH_LOCK_FLOCK, 142 CEPH_MDS_OP_SETFILELOCK, 143 file, (u64)fl->fl_pid, 144 (u64)fl->fl_nspid, 145 CEPH_LOCK_UNLOCK, fl->fl_start, 146 length, 0); 147 dout("got %d on flock_lock_file_wait, undid lock", err); 148 } 149 } else { 150 dout("mds error code %d", err); 151 } 152 return err; 153 } 154 155 /** 156 * Must be called with BKL already held. Fills in the passed 157 * counter variables, so you can prepare pagelist metadata before calling 158 * ceph_encode_locks. 159 */ 160 void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count) 161 { 162 struct file_lock *lock; 163 164 *fcntl_count = 0; 165 *flock_count = 0; 166 167 for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { 168 if (lock->fl_flags & FL_POSIX) 169 ++(*fcntl_count); 170 else if (lock->fl_flags & FL_FLOCK) 171 ++(*flock_count); 172 } 173 dout("counted %d flock locks and %d fcntl locks", 174 *flock_count, *fcntl_count); 175 } 176 177 /** 178 * Encode the flock and fcntl locks for the given inode into the pagelist. 179 * Format is: #fcntl locks, sequential fcntl locks, #flock locks, 180 * sequential flock locks. 181 * Must be called with BLK already held, and the lock numbers should have 182 * been gathered under the same lock holding window. 183 */ 184 int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist, 185 int num_fcntl_locks, int num_flock_locks) 186 { 187 struct file_lock *lock; 188 struct ceph_filelock cephlock; 189 int err = 0; 190 191 dout("encoding %d flock and %d fcntl locks", num_flock_locks, 192 num_fcntl_locks); 193 err = ceph_pagelist_append(pagelist, &num_fcntl_locks, sizeof(u32)); 194 if (err) 195 goto fail; 196 for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { 197 if (lock->fl_flags & FL_POSIX) { 198 err = lock_to_ceph_filelock(lock, &cephlock); 199 if (err) 200 goto fail; 201 err = ceph_pagelist_append(pagelist, &cephlock, 202 sizeof(struct ceph_filelock)); 203 } 204 if (err) 205 goto fail; 206 } 207 208 err = ceph_pagelist_append(pagelist, &num_flock_locks, sizeof(u32)); 209 if (err) 210 goto fail; 211 for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { 212 if (lock->fl_flags & FL_FLOCK) { 213 err = lock_to_ceph_filelock(lock, &cephlock); 214 if (err) 215 goto fail; 216 err = ceph_pagelist_append(pagelist, &cephlock, 217 sizeof(struct ceph_filelock)); 218 } 219 if (err) 220 goto fail; 221 } 222 fail: 223 return err; 224 } 225 226 /* 227 * Given a pointer to a lock, convert it to a ceph filelock 228 */ 229 int lock_to_ceph_filelock(struct file_lock *lock, 230 struct ceph_filelock *cephlock) 231 { 232 int err = 0; 233 234 cephlock->start = cpu_to_le64(lock->fl_start); 235 cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1); 236 cephlock->client = cpu_to_le64(0); 237 cephlock->pid = cpu_to_le64(lock->fl_pid); 238 cephlock->pid_namespace = cpu_to_le64((u64)lock->fl_nspid); 239 240 switch (lock->fl_type) { 241 case F_RDLCK: 242 cephlock->type = CEPH_LOCK_SHARED; 243 break; 244 case F_WRLCK: 245 cephlock->type = CEPH_LOCK_EXCL; 246 break; 247 case F_UNLCK: 248 cephlock->type = CEPH_LOCK_UNLOCK; 249 break; 250 default: 251 dout("Have unknown lock type %d", lock->fl_type); 252 err = -EINVAL; 253 } 254 255 return err; 256 } 257