dlmglue.c (e63aecb651ba73dffc62f9608ee1b7ae2a0ffd4b) | dlmglue.c (cf8e06f1a860d8680d6bb4ac8ec7d7724988e46f) |
---|---|
1/* -*- mode: c; c-basic-offset: 8; -*- 2 * vim: noexpandtab sw=8 ts=8 sts=0: 3 * 4 * dlmglue.c 5 * 6 * Code which implements an OCFS2 specific interface to our DLM. 7 * 8 * Copyright (C) 2003, 2004 Oracle. All rights reserved. --- 54 unchanged lines hidden (view full) --- 63 int mw_status; 64 struct completion mw_complete; 65 unsigned long mw_mask; 66 unsigned long mw_goal; 67}; 68 69static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres); 70static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres); | 1/* -*- mode: c; c-basic-offset: 8; -*- 2 * vim: noexpandtab sw=8 ts=8 sts=0: 3 * 4 * dlmglue.c 5 * 6 * Code which implements an OCFS2 specific interface to our DLM. 7 * 8 * Copyright (C) 2003, 2004 Oracle. All rights reserved. --- 54 unchanged lines hidden (view full) --- 63 int mw_status; 64 struct completion mw_complete; 65 unsigned long mw_mask; 66 unsigned long mw_goal; 67}; 68 69static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres); 70static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres); |
71static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres); |
|
71 72/* 73 * Return value from ->downconvert_worker functions. 74 * 75 * These control the precise actions of ocfs2_unblock_lock() 76 * and ocfs2_process_blocked_lock() 77 * 78 */ --- 168 unchanged lines hidden (view full) --- 247 .flags = 0, 248}; 249 250static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = { 251 .get_osb = ocfs2_get_inode_osb, 252 .flags = 0, 253}; 254 | 72 73/* 74 * Return value from ->downconvert_worker functions. 75 * 76 * These control the precise actions of ocfs2_unblock_lock() 77 * and ocfs2_process_blocked_lock() 78 * 79 */ --- 168 unchanged lines hidden (view full) --- 248 .flags = 0, 249}; 250 251static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = { 252 .get_osb = ocfs2_get_inode_osb, 253 .flags = 0, 254}; 255 |
256static struct ocfs2_lock_res_ops ocfs2_flock_lops = { 257 .get_osb = ocfs2_get_file_osb, 258 .flags = 0, 259}; 260 |
|
255static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) 256{ 257 return lockres->l_type == OCFS2_LOCK_TYPE_META || 258 lockres->l_type == OCFS2_LOCK_TYPE_RW || 259 lockres->l_type == OCFS2_LOCK_TYPE_OPEN; 260} 261 262static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres) --- 42 unchanged lines hidden (view full) --- 305} while (0) 306static int ocfs2_downconvert_thread(void *arg); 307static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, 308 struct ocfs2_lock_res *lockres); 309static int ocfs2_inode_lock_update(struct inode *inode, 310 struct buffer_head **bh); 311static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); 312static inline int ocfs2_highest_compat_lock_level(int level); | 261static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) 262{ 263 return lockres->l_type == OCFS2_LOCK_TYPE_META || 264 lockres->l_type == OCFS2_LOCK_TYPE_RW || 265 lockres->l_type == OCFS2_LOCK_TYPE_OPEN; 266} 267 268static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres) --- 42 unchanged lines hidden (view full) --- 311} while (0) 312static int ocfs2_downconvert_thread(void *arg); 313static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, 314 struct ocfs2_lock_res *lockres); 315static int ocfs2_inode_lock_update(struct inode *inode, 316 struct buffer_head **bh); 317static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); 318static inline int ocfs2_highest_compat_lock_level(int level); |
319static void ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, 320 int new_level); 321static int ocfs2_downconvert_lock(struct ocfs2_super *osb, 322 struct ocfs2_lock_res *lockres, 323 int new_level, 324 int lvb); 325static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, 326 struct ocfs2_lock_res *lockres); 327static int ocfs2_cancel_convert(struct ocfs2_super *osb, 328 struct ocfs2_lock_res *lockres); |
|
313 | 329 |
330 |
|
314static void ocfs2_build_lock_name(enum ocfs2_lock_type type, 315 u64 blkno, 316 u32 generation, 317 char *name) 318{ 319 int len; 320 321 mlog_entry_void(); --- 92 unchanged lines hidden (view full) --- 414 415static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres) 416{ 417 struct inode *inode = ocfs2_lock_res_inode(lockres); 418 419 return OCFS2_SB(inode->i_sb); 420} 421 | 331static void ocfs2_build_lock_name(enum ocfs2_lock_type type, 332 u64 blkno, 333 u32 generation, 334 char *name) 335{ 336 int len; 337 338 mlog_entry_void(); --- 92 unchanged lines hidden (view full) --- 431 432static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres) 433{ 434 struct inode *inode = ocfs2_lock_res_inode(lockres); 435 436 return OCFS2_SB(inode->i_sb); 437} 438 |
439static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres) 440{ 441 struct ocfs2_file_private *fp = lockres->l_priv; 442 443 return OCFS2_SB(fp->fp_file->f_mapping->host->i_sb); 444} 445 |
|
422static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres) 423{ 424 __be64 inode_blkno_be; 425 426 memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], 427 sizeof(__be64)); 428 429 return be64_to_cpu(inode_blkno_be); --- 64 unchanged lines hidden (view full) --- 494 /* Rename lockres doesn't come from a slab so we call init 495 * once on it manually. */ 496 ocfs2_lock_res_init_once(res); 497 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name); 498 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME, 499 &ocfs2_rename_lops, osb); 500} 501 | 446static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres) 447{ 448 __be64 inode_blkno_be; 449 450 memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], 451 sizeof(__be64)); 452 453 return be64_to_cpu(inode_blkno_be); --- 64 unchanged lines hidden (view full) --- 518 /* Rename lockres doesn't come from a slab so we call init 519 * once on it manually. */ 520 ocfs2_lock_res_init_once(res); 521 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name); 522 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME, 523 &ocfs2_rename_lops, osb); 524} 525 |
526void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres, 527 struct ocfs2_file_private *fp) 528{ 529 struct inode *inode = fp->fp_file->f_mapping->host; 530 struct ocfs2_inode_info *oi = OCFS2_I(inode); 531 532 ocfs2_lock_res_init_once(lockres); 533 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_FLOCK, oi->ip_blkno, 534 inode->i_generation, lockres->l_name); 535 ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres, 536 OCFS2_LOCK_TYPE_FLOCK, &ocfs2_flock_lops, 537 fp); 538 lockres->l_flags |= OCFS2_LOCK_NOCACHE; 539} 540 |
|
502void ocfs2_lock_res_free(struct ocfs2_lock_res *res) 503{ 504 mlog_entry_void(); 505 506 if (!(res->l_flags & OCFS2_LOCK_INITIALIZED)) 507 return; 508 509 ocfs2_remove_lockres_tracking(res); --- 200 unchanged lines hidden (view full) --- 710 unsigned long flags; 711 712 BUG_ON(level <= LKM_NLMODE); 713 714 mlog(0, "BAST fired for lockres %s, blocking %d, level %d type %s\n", 715 lockres->l_name, level, lockres->l_level, 716 ocfs2_lock_type_string(lockres->l_type)); 717 | 541void ocfs2_lock_res_free(struct ocfs2_lock_res *res) 542{ 543 mlog_entry_void(); 544 545 if (!(res->l_flags & OCFS2_LOCK_INITIALIZED)) 546 return; 547 548 ocfs2_remove_lockres_tracking(res); --- 200 unchanged lines hidden (view full) --- 749 unsigned long flags; 750 751 BUG_ON(level <= LKM_NLMODE); 752 753 mlog(0, "BAST fired for lockres %s, blocking %d, level %d type %s\n", 754 lockres->l_name, level, lockres->l_level, 755 ocfs2_lock_type_string(lockres->l_type)); 756 |
757 /* 758 * We can skip the bast for locks which don't enable caching - 759 * they'll be dropped at the earliest possible time anyway. 760 */ 761 if (lockres->l_flags & OCFS2_LOCK_NOCACHE) 762 return; 763 |
|
718 spin_lock_irqsave(&lockres->l_lock, flags); 719 needs_downconvert = ocfs2_generic_handle_bast(lockres, level); 720 if (needs_downconvert) 721 ocfs2_schedule_blocked_lock(osb, lockres); 722 spin_unlock_irqrestore(&lockres->l_lock, flags); 723 724 wake_up(&lockres->l_event); 725 --- 195 unchanged lines hidden (view full) --- 921 init_completion(&mw->mw_complete); 922 } 923 spin_unlock_irqrestore(&lockres->l_lock, flags); 924 925 return ret; 926 927} 928 | 764 spin_lock_irqsave(&lockres->l_lock, flags); 765 needs_downconvert = ocfs2_generic_handle_bast(lockres, level); 766 if (needs_downconvert) 767 ocfs2_schedule_blocked_lock(osb, lockres); 768 spin_unlock_irqrestore(&lockres->l_lock, flags); 769 770 wake_up(&lockres->l_event); 771 --- 195 unchanged lines hidden (view full) --- 967 init_completion(&mw->mw_complete); 968 } 969 spin_unlock_irqrestore(&lockres->l_lock, flags); 970 971 return ret; 972 973} 974 |
975static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw, 976 struct ocfs2_lock_res *lockres) 977{ 978 int ret; 979 980 ret = wait_for_completion_interruptible(&mw->mw_complete); 981 if (ret) 982 lockres_remove_mask_waiter(lockres, mw); 983 else 984 ret = mw->mw_status; 985 /* Re-arm the completion in case we want to wait on it again */ 986 INIT_COMPLETION(mw->mw_complete); 987 return ret; 988} 989 |
|
929static int ocfs2_cluster_lock(struct ocfs2_super *osb, 930 struct ocfs2_lock_res *lockres, 931 int level, 932 int lkm_flags, 933 int arg_flags) 934{ 935 struct ocfs2_mask_waiter mw; 936 enum dlm_status status; --- 354 unchanged lines hidden (view full) --- 1291 if(lockres->l_ex_holders) 1292 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, 1293 LKM_EXMODE); 1294 1295out: 1296 mlog_exit_void(); 1297} 1298 | 990static int ocfs2_cluster_lock(struct ocfs2_super *osb, 991 struct ocfs2_lock_res *lockres, 992 int level, 993 int lkm_flags, 994 int arg_flags) 995{ 996 struct ocfs2_mask_waiter mw; 997 enum dlm_status status; --- 354 unchanged lines hidden (view full) --- 1352 if(lockres->l_ex_holders) 1353 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, 1354 LKM_EXMODE); 1355 1356out: 1357 mlog_exit_void(); 1358} 1359 |
1360static int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres, 1361 int level) 1362{ 1363 int ret; 1364 struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); 1365 unsigned long flags; 1366 struct ocfs2_mask_waiter mw; 1367 1368 ocfs2_init_mask_waiter(&mw); 1369 1370retry_cancel: 1371 spin_lock_irqsave(&lockres->l_lock, flags); 1372 if (lockres->l_flags & OCFS2_LOCK_BUSY) { 1373 ret = ocfs2_prepare_cancel_convert(osb, lockres); 1374 if (ret) { 1375 spin_unlock_irqrestore(&lockres->l_lock, flags); 1376 ret = ocfs2_cancel_convert(osb, lockres); 1377 if (ret < 0) { 1378 mlog_errno(ret); 1379 goto out; 1380 } 1381 goto retry_cancel; 1382 } 1383 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1384 spin_unlock_irqrestore(&lockres->l_lock, flags); 1385 1386 ocfs2_wait_for_mask(&mw); 1387 goto retry_cancel; 1388 } 1389 1390 ret = -ERESTARTSYS; 1391 /* 1392 * We may still have gotten the lock, in which case there's no 1393 * point to restarting the syscall. 1394 */ 1395 if (lockres->l_level == level) 1396 ret = 0; 1397 1398 mlog(0, "Cancel returning %d. flags: 0x%lx, level: %d, act: %d\n", ret, 1399 lockres->l_flags, lockres->l_level, lockres->l_action); 1400 1401 spin_unlock_irqrestore(&lockres->l_lock, flags); 1402 1403out: 1404 return ret; 1405} 1406 1407/* 1408 * ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of 1409 * flock() calls. The locking approach this requires is sufficiently 1410 * different from all other cluster lock types that we implement a 1411 * seperate path to the "low-level" dlm calls. In particular: 1412 * 1413 * - No optimization of lock levels is done - we take at exactly 1414 * what's been requested. 1415 * 1416 * - No lock caching is employed. We immediately downconvert to 1417 * no-lock at unlock time. This also means flock locks never go on 1418 * the blocking list). 1419 * 1420 * - Since userspace can trivially deadlock itself with flock, we make 1421 * sure to allow cancellation of a misbehaving applications flock() 1422 * request. 1423 * 1424 * - Access to any flock lockres doesn't require concurrency, so we 1425 * can simplify the code by requiring the caller to guarantee 1426 * serialization of dlmglue flock calls. 1427 */ 1428int ocfs2_file_lock(struct file *file, int ex, int trylock) 1429{ 1430 int ret, level = ex ? LKM_EXMODE : LKM_PRMODE; 1431 unsigned int lkm_flags = trylock ? LKM_NOQUEUE : 0; 1432 unsigned long flags; 1433 struct ocfs2_file_private *fp = file->private_data; 1434 struct ocfs2_lock_res *lockres = &fp->fp_flock; 1435 struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); 1436 struct ocfs2_mask_waiter mw; 1437 1438 ocfs2_init_mask_waiter(&mw); 1439 1440 if ((lockres->l_flags & OCFS2_LOCK_BUSY) || 1441 (lockres->l_level > LKM_NLMODE)) { 1442 mlog(ML_ERROR, 1443 "File lock \"%s\" has busy or locked state: flags: 0x%lx, " 1444 "level: %u\n", lockres->l_name, lockres->l_flags, 1445 lockres->l_level); 1446 return -EINVAL; 1447 } 1448 1449 spin_lock_irqsave(&lockres->l_lock, flags); 1450 if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { 1451 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1452 spin_unlock_irqrestore(&lockres->l_lock, flags); 1453 1454 /* 1455 * Get the lock at NLMODE to start - that way we 1456 * can cancel the upconvert request if need be. 1457 */ 1458 ret = ocfs2_lock_create(osb, lockres, LKM_NLMODE, 0); 1459 if (ret < 0) { 1460 mlog_errno(ret); 1461 goto out; 1462 } 1463 1464 ret = ocfs2_wait_for_mask(&mw); 1465 if (ret) { 1466 mlog_errno(ret); 1467 goto out; 1468 } 1469 spin_lock_irqsave(&lockres->l_lock, flags); 1470 } 1471 1472 lockres->l_action = OCFS2_AST_CONVERT; 1473 lkm_flags |= LKM_CONVERT; 1474 lockres->l_requested = level; 1475 lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 1476 1477 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1478 spin_unlock_irqrestore(&lockres->l_lock, flags); 1479 1480 ret = dlmlock(osb->dlm, level, &lockres->l_lksb, lkm_flags, 1481 lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1, 1482 ocfs2_locking_ast, lockres, ocfs2_blocking_ast); 1483 if (ret != DLM_NORMAL) { 1484 if (trylock && ret == DLM_NOTQUEUED) 1485 ret = -EAGAIN; 1486 else { 1487 ocfs2_log_dlm_error("dlmlock", ret, lockres); 1488 ret = -EINVAL; 1489 } 1490 1491 ocfs2_recover_from_dlm_error(lockres, 1); 1492 lockres_remove_mask_waiter(lockres, &mw); 1493 goto out; 1494 } 1495 1496 ret = ocfs2_wait_for_mask_interruptible(&mw, lockres); 1497 if (ret == -ERESTARTSYS) { 1498 /* 1499 * Userspace can cause deadlock itself with 1500 * flock(). Current behavior locally is to allow the 1501 * deadlock, but abort the system call if a signal is 1502 * received. We follow this example, otherwise a 1503 * poorly written program could sit in kernel until 1504 * reboot. 1505 * 1506 * Handling this is a bit more complicated for Ocfs2 1507 * though. We can't exit this function with an 1508 * outstanding lock request, so a cancel convert is 1509 * required. We intentionally overwrite 'ret' - if the 1510 * cancel fails and the lock was granted, it's easier 1511 * to just bubble sucess back up to the user. 1512 */ 1513 ret = ocfs2_flock_handle_signal(lockres, level); 1514 } 1515 1516out: 1517 1518 mlog(0, "Lock: \"%s\" ex: %d, trylock: %d, returns: %d\n", 1519 lockres->l_name, ex, trylock, ret); 1520 return ret; 1521} 1522 1523void ocfs2_file_unlock(struct file *file) 1524{ 1525 int ret; 1526 unsigned long flags; 1527 struct ocfs2_file_private *fp = file->private_data; 1528 struct ocfs2_lock_res *lockres = &fp->fp_flock; 1529 struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); 1530 struct ocfs2_mask_waiter mw; 1531 1532 ocfs2_init_mask_waiter(&mw); 1533 1534 if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) 1535 return; 1536 1537 if (lockres->l_level == LKM_NLMODE) 1538 return; 1539 1540 mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n", 1541 lockres->l_name, lockres->l_flags, lockres->l_level, 1542 lockres->l_action); 1543 1544 spin_lock_irqsave(&lockres->l_lock, flags); 1545 /* 1546 * Fake a blocking ast for the downconvert code. 1547 */ 1548 lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); 1549 lockres->l_blocking = LKM_EXMODE; 1550 1551 ocfs2_prepare_downconvert(lockres, LKM_NLMODE); 1552 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1553 spin_unlock_irqrestore(&lockres->l_lock, flags); 1554 1555 ret = ocfs2_downconvert_lock(osb, lockres, LKM_NLMODE, 0); 1556 if (ret) { 1557 mlog_errno(ret); 1558 return; 1559 } 1560 1561 ret = ocfs2_wait_for_mask(&mw); 1562 if (ret) 1563 mlog_errno(ret); 1564} 1565 |
|
1299static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, 1300 struct ocfs2_lock_res *lockres) 1301{ 1302 int kick = 0; 1303 1304 mlog_entry_void(); 1305 1306 /* If we know that another node is waiting on our lock, kick --- 1790 unchanged lines hidden --- | 1566static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, 1567 struct ocfs2_lock_res *lockres) 1568{ 1569 int kick = 0; 1570 1571 mlog_entry_void(); 1572 1573 /* If we know that another node is waiting on our lock, kick --- 1790 unchanged lines hidden --- |