1 #include <linux/ceph/ceph_debug.h> 2 3 #include <linux/file.h> 4 #include <linux/namei.h> 5 #include <linux/random.h> 6 7 #include "super.h" 8 #include "mds_client.h" 9 #include <linux/ceph/pagelist.h> 10 11 static u64 lock_secret; 12 static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc, 13 struct ceph_mds_request *req); 14 15 static inline u64 secure_addr(void *addr) 16 { 17 u64 v = lock_secret ^ (u64)(unsigned long)addr; 18 /* 19 * Set the most significant bit, so that MDS knows the 'owner' 20 * is sufficient to identify the owner of lock. (old code uses 21 * both 'owner' and 'pid') 22 */ 23 v |= (1ULL << 63); 24 return v; 25 } 26 27 void __init ceph_flock_init(void) 28 { 29 get_random_bytes(&lock_secret, sizeof(lock_secret)); 30 } 31 32 /** 33 * Implement fcntl and flock locking functions. 34 */ 35 static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, 36 int cmd, u8 wait, struct file_lock *fl) 37 { 38 struct inode *inode = file_inode(file); 39 struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; 40 struct ceph_mds_request *req; 41 int err; 42 u64 length = 0; 43 u64 owner; 44 45 if (operation != CEPH_MDS_OP_SETFILELOCK || cmd == CEPH_LOCK_UNLOCK) 46 wait = 0; 47 48 req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS); 49 if (IS_ERR(req)) 50 return PTR_ERR(req); 51 req->r_inode = inode; 52 ihold(inode); 53 req->r_num_caps = 1; 54 55 /* mds requires start and length rather than start and end */ 56 if (LLONG_MAX == fl->fl_end) 57 length = 0; 58 else 59 length = fl->fl_end - fl->fl_start + 1; 60 61 owner = secure_addr(fl->fl_owner); 62 63 dout("ceph_lock_message: rule: %d, op: %d, owner: %llx, pid: %llu, " 64 "start: %llu, length: %llu, wait: %d, type: %d", (int)lock_type, 65 (int)operation, owner, (u64)fl->fl_pid, fl->fl_start, length, 66 wait, fl->fl_type); 67 68 req->r_args.filelock_change.rule = lock_type; 69 req->r_args.filelock_change.type = cmd; 70 req->r_args.filelock_change.owner = cpu_to_le64(owner); 71 req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid); 72 req->r_args.filelock_change.start = cpu_to_le64(fl->fl_start); 73 req->r_args.filelock_change.length = cpu_to_le64(length); 74 req->r_args.filelock_change.wait = wait; 75 76 if (wait) 77 req->r_wait_for_completion = ceph_lock_wait_for_completion; 78 79 err = ceph_mdsc_do_request(mdsc, inode, req); 80 81 if (operation == CEPH_MDS_OP_GETFILELOCK) { 82 fl->fl_pid = le64_to_cpu(req->r_reply_info.filelock_reply->pid); 83 if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type) 84 fl->fl_type = F_RDLCK; 85 else if (CEPH_LOCK_EXCL == req->r_reply_info.filelock_reply->type) 86 fl->fl_type = F_WRLCK; 87 else 88 fl->fl_type = F_UNLCK; 89 90 fl->fl_start = le64_to_cpu(req->r_reply_info.filelock_reply->start); 91 length = le64_to_cpu(req->r_reply_info.filelock_reply->start) + 92 le64_to_cpu(req->r_reply_info.filelock_reply->length); 93 if (length >= 1) 94 fl->fl_end = length -1; 95 else 96 fl->fl_end = 0; 97 98 } 99 ceph_mdsc_put_request(req); 100 dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " 101 "length: %llu, wait: %d, type: %d, err code %d", (int)lock_type, 102 (int)operation, (u64)fl->fl_pid, fl->fl_start, 103 length, wait, fl->fl_type, err); 104 return err; 105 } 106 107 static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc, 108 struct ceph_mds_request *req) 109 { 110 struct ceph_mds_request *intr_req; 111 struct inode *inode = req->r_inode; 112 int err, lock_type; 113 114 BUG_ON(req->r_op != CEPH_MDS_OP_SETFILELOCK); 115 if (req->r_args.filelock_change.rule == CEPH_LOCK_FCNTL) 116 lock_type = CEPH_LOCK_FCNTL_INTR; 117 else if (req->r_args.filelock_change.rule == CEPH_LOCK_FLOCK) 118 lock_type = CEPH_LOCK_FLOCK_INTR; 119 else 120 BUG_ON(1); 121 BUG_ON(req->r_args.filelock_change.type == CEPH_LOCK_UNLOCK); 122 123 err = wait_for_completion_interruptible(&req->r_completion); 124 if (!err) 125 return 0; 126 127 dout("ceph_lock_wait_for_completion: request %llu was interrupted\n", 128 req->r_tid); 129 130 intr_req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETFILELOCK, 131 USE_AUTH_MDS); 132 if (IS_ERR(intr_req)) 133 return PTR_ERR(intr_req); 134 135 intr_req->r_inode = inode; 136 ihold(inode); 137 intr_req->r_num_caps = 1; 138 139 intr_req->r_args.filelock_change = req->r_args.filelock_change; 140 intr_req->r_args.filelock_change.rule = lock_type; 141 intr_req->r_args.filelock_change.type = CEPH_LOCK_UNLOCK; 142 143 err = ceph_mdsc_do_request(mdsc, inode, intr_req); 144 ceph_mdsc_put_request(intr_req); 145 146 if (err && err != -ERESTARTSYS) 147 return err; 148 149 wait_for_completion(&req->r_completion); 150 return 0; 151 } 152 153 /** 154 * Attempt to set an fcntl lock. 155 * For now, this just goes away to the server. Later it may be more awesome. 156 */ 157 int ceph_lock(struct file *file, int cmd, struct file_lock *fl) 158 { 159 u8 lock_cmd; 160 int err; 161 u8 wait = 0; 162 u16 op = CEPH_MDS_OP_SETFILELOCK; 163 164 if (!(fl->fl_flags & FL_POSIX)) 165 return -ENOLCK; 166 /* No mandatory locks */ 167 if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK) 168 return -ENOLCK; 169 170 dout("ceph_lock, fl_owner: %p", fl->fl_owner); 171 172 /* set wait bit as appropriate, then make command as Ceph expects it*/ 173 if (IS_GETLK(cmd)) 174 op = CEPH_MDS_OP_GETFILELOCK; 175 else if (IS_SETLKW(cmd)) 176 wait = 1; 177 178 if (F_RDLCK == fl->fl_type) 179 lock_cmd = CEPH_LOCK_SHARED; 180 else if (F_WRLCK == fl->fl_type) 181 lock_cmd = CEPH_LOCK_EXCL; 182 else 183 lock_cmd = CEPH_LOCK_UNLOCK; 184 185 err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl); 186 if (!err) { 187 if (op != CEPH_MDS_OP_GETFILELOCK) { 188 dout("mds locked, locking locally"); 189 err = posix_lock_file(file, fl, NULL); 190 if (err && (CEPH_MDS_OP_SETFILELOCK == op)) { 191 /* undo! This should only happen if 192 * the kernel detects local 193 * deadlock. */ 194 ceph_lock_message(CEPH_LOCK_FCNTL, op, file, 195 CEPH_LOCK_UNLOCK, 0, fl); 196 dout("got %d on posix_lock_file, undid lock", 197 err); 198 } 199 } 200 } 201 return err; 202 } 203 204 int ceph_flock(struct file *file, int cmd, struct file_lock *fl) 205 { 206 u8 lock_cmd; 207 int err; 208 u8 wait = 0; 209 210 if (!(fl->fl_flags & FL_FLOCK)) 211 return -ENOLCK; 212 /* No mandatory locks */ 213 if (fl->fl_type & LOCK_MAND) 214 return -EOPNOTSUPP; 215 216 dout("ceph_flock, fl_file: %p", fl->fl_file); 217 218 if (IS_SETLKW(cmd)) 219 wait = 1; 220 221 if (F_RDLCK == fl->fl_type) 222 lock_cmd = CEPH_LOCK_SHARED; 223 else if (F_WRLCK == fl->fl_type) 224 lock_cmd = CEPH_LOCK_EXCL; 225 else 226 lock_cmd = CEPH_LOCK_UNLOCK; 227 228 err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, 229 file, lock_cmd, wait, fl); 230 if (!err) { 231 err = locks_lock_file_wait(file, fl); 232 if (err) { 233 ceph_lock_message(CEPH_LOCK_FLOCK, 234 CEPH_MDS_OP_SETFILELOCK, 235 file, CEPH_LOCK_UNLOCK, 0, fl); 236 dout("got %d on locks_lock_file_wait, undid lock", err); 237 } 238 } 239 return err; 240 } 241 242 /* 243 * Fills in the passed counter variables, so you can prepare pagelist metadata 244 * before calling ceph_encode_locks. 245 */ 246 void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count) 247 { 248 struct file_lock *lock; 249 struct file_lock_context *ctx; 250 251 *fcntl_count = 0; 252 *flock_count = 0; 253 254 ctx = inode->i_flctx; 255 if (ctx) { 256 spin_lock(&ctx->flc_lock); 257 list_for_each_entry(lock, &ctx->flc_posix, fl_list) 258 ++(*fcntl_count); 259 list_for_each_entry(lock, &ctx->flc_flock, fl_list) 260 ++(*flock_count); 261 spin_unlock(&ctx->flc_lock); 262 } 263 dout("counted %d flock locks and %d fcntl locks", 264 *flock_count, *fcntl_count); 265 } 266 267 /** 268 * Encode the flock and fcntl locks for the given inode into the ceph_filelock 269 * array. Must be called with inode->i_lock already held. 270 * If we encounter more of a specific lock type than expected, return -ENOSPC. 271 */ 272 int ceph_encode_locks_to_buffer(struct inode *inode, 273 struct ceph_filelock *flocks, 274 int num_fcntl_locks, int num_flock_locks) 275 { 276 struct file_lock *lock; 277 struct file_lock_context *ctx = inode->i_flctx; 278 int err = 0; 279 int seen_fcntl = 0; 280 int seen_flock = 0; 281 int l = 0; 282 283 dout("encoding %d flock and %d fcntl locks", num_flock_locks, 284 num_fcntl_locks); 285 286 if (!ctx) 287 return 0; 288 289 spin_lock(&ctx->flc_lock); 290 list_for_each_entry(lock, &ctx->flc_posix, fl_list) { 291 ++seen_fcntl; 292 if (seen_fcntl > num_fcntl_locks) { 293 err = -ENOSPC; 294 goto fail; 295 } 296 err = lock_to_ceph_filelock(lock, &flocks[l]); 297 if (err) 298 goto fail; 299 ++l; 300 } 301 list_for_each_entry(lock, &ctx->flc_flock, fl_list) { 302 ++seen_flock; 303 if (seen_flock > num_flock_locks) { 304 err = -ENOSPC; 305 goto fail; 306 } 307 err = lock_to_ceph_filelock(lock, &flocks[l]); 308 if (err) 309 goto fail; 310 ++l; 311 } 312 fail: 313 spin_unlock(&ctx->flc_lock); 314 return err; 315 } 316 317 /** 318 * Copy the encoded flock and fcntl locks into the pagelist. 319 * Format is: #fcntl locks, sequential fcntl locks, #flock locks, 320 * sequential flock locks. 321 * Returns zero on success. 322 */ 323 int ceph_locks_to_pagelist(struct ceph_filelock *flocks, 324 struct ceph_pagelist *pagelist, 325 int num_fcntl_locks, int num_flock_locks) 326 { 327 int err = 0; 328 __le32 nlocks; 329 330 nlocks = cpu_to_le32(num_fcntl_locks); 331 err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks)); 332 if (err) 333 goto out_fail; 334 335 err = ceph_pagelist_append(pagelist, flocks, 336 num_fcntl_locks * sizeof(*flocks)); 337 if (err) 338 goto out_fail; 339 340 nlocks = cpu_to_le32(num_flock_locks); 341 err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks)); 342 if (err) 343 goto out_fail; 344 345 err = ceph_pagelist_append(pagelist, 346 &flocks[num_fcntl_locks], 347 num_flock_locks * sizeof(*flocks)); 348 out_fail: 349 return err; 350 } 351 352 /* 353 * Given a pointer to a lock, convert it to a ceph filelock 354 */ 355 int lock_to_ceph_filelock(struct file_lock *lock, 356 struct ceph_filelock *cephlock) 357 { 358 int err = 0; 359 cephlock->start = cpu_to_le64(lock->fl_start); 360 cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1); 361 cephlock->client = cpu_to_le64(0); 362 cephlock->pid = cpu_to_le64((u64)lock->fl_pid); 363 cephlock->owner = cpu_to_le64(secure_addr(lock->fl_owner)); 364 365 switch (lock->fl_type) { 366 case F_RDLCK: 367 cephlock->type = CEPH_LOCK_SHARED; 368 break; 369 case F_WRLCK: 370 cephlock->type = CEPH_LOCK_EXCL; 371 break; 372 case F_UNLCK: 373 cephlock->type = CEPH_LOCK_UNLOCK; 374 break; 375 default: 376 dout("Have unknown lock type %d", lock->fl_type); 377 err = -EINVAL; 378 } 379 380 return err; 381 } 382