dlmglue.c (e63aecb651ba73dffc62f9608ee1b7ae2a0ffd4b) dlmglue.c (cf8e06f1a860d8680d6bb4ac8ec7d7724988e46f)
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * dlmglue.c
5 *
6 * Code which implements an OCFS2 specific interface to our DLM.
7 *
8 * Copyright (C) 2003, 2004 Oracle. All rights reserved.

--- 54 unchanged lines hidden (view full) ---

63 int mw_status;
64 struct completion mw_complete;
65 unsigned long mw_mask;
66 unsigned long mw_goal;
67};
68
69static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres);
70static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres);
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * dlmglue.c
5 *
6 * Code which implements an OCFS2 specific interface to our DLM.
7 *
8 * Copyright (C) 2003, 2004 Oracle. All rights reserved.

--- 54 unchanged lines hidden (view full) ---

63 int mw_status;
64 struct completion mw_complete;
65 unsigned long mw_mask;
66 unsigned long mw_goal;
67};
68
69static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres);
70static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres);
71static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres);
71
72/*
73 * Return value from ->downconvert_worker functions.
74 *
75 * These control the precise actions of ocfs2_unblock_lock()
76 * and ocfs2_process_blocked_lock()
77 *
78 */

--- 168 unchanged lines hidden (view full) ---

247 .flags = 0,
248};
249
250static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = {
251 .get_osb = ocfs2_get_inode_osb,
252 .flags = 0,
253};
254
72
73/*
74 * Return value from ->downconvert_worker functions.
75 *
76 * These control the precise actions of ocfs2_unblock_lock()
77 * and ocfs2_process_blocked_lock()
78 *
79 */

--- 168 unchanged lines hidden (view full) ---

248 .flags = 0,
249};
250
251static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = {
252 .get_osb = ocfs2_get_inode_osb,
253 .flags = 0,
254};
255
256static struct ocfs2_lock_res_ops ocfs2_flock_lops = {
257 .get_osb = ocfs2_get_file_osb,
258 .flags = 0,
259};
260
255static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
256{
257 return lockres->l_type == OCFS2_LOCK_TYPE_META ||
258 lockres->l_type == OCFS2_LOCK_TYPE_RW ||
259 lockres->l_type == OCFS2_LOCK_TYPE_OPEN;
260}
261
262static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres)

--- 42 unchanged lines hidden (view full) ---

305} while (0)
306static int ocfs2_downconvert_thread(void *arg);
307static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
308 struct ocfs2_lock_res *lockres);
309static int ocfs2_inode_lock_update(struct inode *inode,
310 struct buffer_head **bh);
311static void ocfs2_drop_osb_locks(struct ocfs2_super *osb);
312static inline int ocfs2_highest_compat_lock_level(int level);
261static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
262{
263 return lockres->l_type == OCFS2_LOCK_TYPE_META ||
264 lockres->l_type == OCFS2_LOCK_TYPE_RW ||
265 lockres->l_type == OCFS2_LOCK_TYPE_OPEN;
266}
267
268static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres)

--- 42 unchanged lines hidden (view full) ---

311} while (0)
312static int ocfs2_downconvert_thread(void *arg);
313static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
314 struct ocfs2_lock_res *lockres);
315static int ocfs2_inode_lock_update(struct inode *inode,
316 struct buffer_head **bh);
317static void ocfs2_drop_osb_locks(struct ocfs2_super *osb);
318static inline int ocfs2_highest_compat_lock_level(int level);
319static void ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres,
320 int new_level);
321static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
322 struct ocfs2_lock_res *lockres,
323 int new_level,
324 int lvb);
325static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb,
326 struct ocfs2_lock_res *lockres);
327static int ocfs2_cancel_convert(struct ocfs2_super *osb,
328 struct ocfs2_lock_res *lockres);
313
329
330
314static void ocfs2_build_lock_name(enum ocfs2_lock_type type,
315 u64 blkno,
316 u32 generation,
317 char *name)
318{
319 int len;
320
321 mlog_entry_void();

--- 92 unchanged lines hidden (view full) ---

414
415static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres)
416{
417 struct inode *inode = ocfs2_lock_res_inode(lockres);
418
419 return OCFS2_SB(inode->i_sb);
420}
421
331static void ocfs2_build_lock_name(enum ocfs2_lock_type type,
332 u64 blkno,
333 u32 generation,
334 char *name)
335{
336 int len;
337
338 mlog_entry_void();

--- 92 unchanged lines hidden (view full) ---

431
432static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres)
433{
434 struct inode *inode = ocfs2_lock_res_inode(lockres);
435
436 return OCFS2_SB(inode->i_sb);
437}
438
439static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres)
440{
441 struct ocfs2_file_private *fp = lockres->l_priv;
442
443 return OCFS2_SB(fp->fp_file->f_mapping->host->i_sb);
444}
445
422static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres)
423{
424 __be64 inode_blkno_be;
425
426 memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START],
427 sizeof(__be64));
428
429 return be64_to_cpu(inode_blkno_be);

--- 64 unchanged lines hidden (view full) ---

494 /* Rename lockres doesn't come from a slab so we call init
495 * once on it manually. */
496 ocfs2_lock_res_init_once(res);
497 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name);
498 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME,
499 &ocfs2_rename_lops, osb);
500}
501
446static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres)
447{
448 __be64 inode_blkno_be;
449
450 memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START],
451 sizeof(__be64));
452
453 return be64_to_cpu(inode_blkno_be);

--- 64 unchanged lines hidden (view full) ---

518 /* Rename lockres doesn't come from a slab so we call init
519 * once on it manually. */
520 ocfs2_lock_res_init_once(res);
521 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name);
522 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME,
523 &ocfs2_rename_lops, osb);
524}
525
526void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
527 struct ocfs2_file_private *fp)
528{
529 struct inode *inode = fp->fp_file->f_mapping->host;
530 struct ocfs2_inode_info *oi = OCFS2_I(inode);
531
532 ocfs2_lock_res_init_once(lockres);
533 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_FLOCK, oi->ip_blkno,
534 inode->i_generation, lockres->l_name);
535 ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres,
536 OCFS2_LOCK_TYPE_FLOCK, &ocfs2_flock_lops,
537 fp);
538 lockres->l_flags |= OCFS2_LOCK_NOCACHE;
539}
540
502void ocfs2_lock_res_free(struct ocfs2_lock_res *res)
503{
504 mlog_entry_void();
505
506 if (!(res->l_flags & OCFS2_LOCK_INITIALIZED))
507 return;
508
509 ocfs2_remove_lockres_tracking(res);

--- 200 unchanged lines hidden (view full) ---

710 unsigned long flags;
711
712 BUG_ON(level <= LKM_NLMODE);
713
714 mlog(0, "BAST fired for lockres %s, blocking %d, level %d type %s\n",
715 lockres->l_name, level, lockres->l_level,
716 ocfs2_lock_type_string(lockres->l_type));
717
541void ocfs2_lock_res_free(struct ocfs2_lock_res *res)
542{
543 mlog_entry_void();
544
545 if (!(res->l_flags & OCFS2_LOCK_INITIALIZED))
546 return;
547
548 ocfs2_remove_lockres_tracking(res);

--- 200 unchanged lines hidden (view full) ---

749 unsigned long flags;
750
751 BUG_ON(level <= LKM_NLMODE);
752
753 mlog(0, "BAST fired for lockres %s, blocking %d, level %d type %s\n",
754 lockres->l_name, level, lockres->l_level,
755 ocfs2_lock_type_string(lockres->l_type));
756
757 /*
758 * We can skip the bast for locks which don't enable caching -
759 * they'll be dropped at the earliest possible time anyway.
760 */
761 if (lockres->l_flags & OCFS2_LOCK_NOCACHE)
762 return;
763
718 spin_lock_irqsave(&lockres->l_lock, flags);
719 needs_downconvert = ocfs2_generic_handle_bast(lockres, level);
720 if (needs_downconvert)
721 ocfs2_schedule_blocked_lock(osb, lockres);
722 spin_unlock_irqrestore(&lockres->l_lock, flags);
723
724 wake_up(&lockres->l_event);
725

--- 195 unchanged lines hidden (view full) ---

921 init_completion(&mw->mw_complete);
922 }
923 spin_unlock_irqrestore(&lockres->l_lock, flags);
924
925 return ret;
926
927}
928
764 spin_lock_irqsave(&lockres->l_lock, flags);
765 needs_downconvert = ocfs2_generic_handle_bast(lockres, level);
766 if (needs_downconvert)
767 ocfs2_schedule_blocked_lock(osb, lockres);
768 spin_unlock_irqrestore(&lockres->l_lock, flags);
769
770 wake_up(&lockres->l_event);
771

--- 195 unchanged lines hidden (view full) ---

967 init_completion(&mw->mw_complete);
968 }
969 spin_unlock_irqrestore(&lockres->l_lock, flags);
970
971 return ret;
972
973}
974
975static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw,
976 struct ocfs2_lock_res *lockres)
977{
978 int ret;
979
980 ret = wait_for_completion_interruptible(&mw->mw_complete);
981 if (ret)
982 lockres_remove_mask_waiter(lockres, mw);
983 else
984 ret = mw->mw_status;
985 /* Re-arm the completion in case we want to wait on it again */
986 INIT_COMPLETION(mw->mw_complete);
987 return ret;
988}
989
929static int ocfs2_cluster_lock(struct ocfs2_super *osb,
930 struct ocfs2_lock_res *lockres,
931 int level,
932 int lkm_flags,
933 int arg_flags)
934{
935 struct ocfs2_mask_waiter mw;
936 enum dlm_status status;

--- 354 unchanged lines hidden (view full) ---

1291 if(lockres->l_ex_holders)
1292 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres,
1293 LKM_EXMODE);
1294
1295out:
1296 mlog_exit_void();
1297}
1298
990static int ocfs2_cluster_lock(struct ocfs2_super *osb,
991 struct ocfs2_lock_res *lockres,
992 int level,
993 int lkm_flags,
994 int arg_flags)
995{
996 struct ocfs2_mask_waiter mw;
997 enum dlm_status status;

--- 354 unchanged lines hidden (view full) ---

1352 if(lockres->l_ex_holders)
1353 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres,
1354 LKM_EXMODE);
1355
1356out:
1357 mlog_exit_void();
1358}
1359
1360static int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres,
1361 int level)
1362{
1363 int ret;
1364 struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
1365 unsigned long flags;
1366 struct ocfs2_mask_waiter mw;
1367
1368 ocfs2_init_mask_waiter(&mw);
1369
1370retry_cancel:
1371 spin_lock_irqsave(&lockres->l_lock, flags);
1372 if (lockres->l_flags & OCFS2_LOCK_BUSY) {
1373 ret = ocfs2_prepare_cancel_convert(osb, lockres);
1374 if (ret) {
1375 spin_unlock_irqrestore(&lockres->l_lock, flags);
1376 ret = ocfs2_cancel_convert(osb, lockres);
1377 if (ret < 0) {
1378 mlog_errno(ret);
1379 goto out;
1380 }
1381 goto retry_cancel;
1382 }
1383 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1384 spin_unlock_irqrestore(&lockres->l_lock, flags);
1385
1386 ocfs2_wait_for_mask(&mw);
1387 goto retry_cancel;
1388 }
1389
1390 ret = -ERESTARTSYS;
1391 /*
1392 * We may still have gotten the lock, in which case there's no
1393 * point to restarting the syscall.
1394 */
1395 if (lockres->l_level == level)
1396 ret = 0;
1397
1398 mlog(0, "Cancel returning %d. flags: 0x%lx, level: %d, act: %d\n", ret,
1399 lockres->l_flags, lockres->l_level, lockres->l_action);
1400
1401 spin_unlock_irqrestore(&lockres->l_lock, flags);
1402
1403out:
1404 return ret;
1405}
1406
1407/*
1408 * ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of
1409 * flock() calls. The locking approach this requires is sufficiently
1410 * different from all other cluster lock types that we implement a
1411 * seperate path to the "low-level" dlm calls. In particular:
1412 *
1413 * - No optimization of lock levels is done - we take at exactly
1414 * what's been requested.
1415 *
1416 * - No lock caching is employed. We immediately downconvert to
1417 * no-lock at unlock time. This also means flock locks never go on
1418 * the blocking list).
1419 *
1420 * - Since userspace can trivially deadlock itself with flock, we make
1421 * sure to allow cancellation of a misbehaving applications flock()
1422 * request.
1423 *
1424 * - Access to any flock lockres doesn't require concurrency, so we
1425 * can simplify the code by requiring the caller to guarantee
1426 * serialization of dlmglue flock calls.
1427 */
1428int ocfs2_file_lock(struct file *file, int ex, int trylock)
1429{
1430 int ret, level = ex ? LKM_EXMODE : LKM_PRMODE;
1431 unsigned int lkm_flags = trylock ? LKM_NOQUEUE : 0;
1432 unsigned long flags;
1433 struct ocfs2_file_private *fp = file->private_data;
1434 struct ocfs2_lock_res *lockres = &fp->fp_flock;
1435 struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb);
1436 struct ocfs2_mask_waiter mw;
1437
1438 ocfs2_init_mask_waiter(&mw);
1439
1440 if ((lockres->l_flags & OCFS2_LOCK_BUSY) ||
1441 (lockres->l_level > LKM_NLMODE)) {
1442 mlog(ML_ERROR,
1443 "File lock \"%s\" has busy or locked state: flags: 0x%lx, "
1444 "level: %u\n", lockres->l_name, lockres->l_flags,
1445 lockres->l_level);
1446 return -EINVAL;
1447 }
1448
1449 spin_lock_irqsave(&lockres->l_lock, flags);
1450 if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
1451 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1452 spin_unlock_irqrestore(&lockres->l_lock, flags);
1453
1454 /*
1455 * Get the lock at NLMODE to start - that way we
1456 * can cancel the upconvert request if need be.
1457 */
1458 ret = ocfs2_lock_create(osb, lockres, LKM_NLMODE, 0);
1459 if (ret < 0) {
1460 mlog_errno(ret);
1461 goto out;
1462 }
1463
1464 ret = ocfs2_wait_for_mask(&mw);
1465 if (ret) {
1466 mlog_errno(ret);
1467 goto out;
1468 }
1469 spin_lock_irqsave(&lockres->l_lock, flags);
1470 }
1471
1472 lockres->l_action = OCFS2_AST_CONVERT;
1473 lkm_flags |= LKM_CONVERT;
1474 lockres->l_requested = level;
1475 lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
1476
1477 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1478 spin_unlock_irqrestore(&lockres->l_lock, flags);
1479
1480 ret = dlmlock(osb->dlm, level, &lockres->l_lksb, lkm_flags,
1481 lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1,
1482 ocfs2_locking_ast, lockres, ocfs2_blocking_ast);
1483 if (ret != DLM_NORMAL) {
1484 if (trylock && ret == DLM_NOTQUEUED)
1485 ret = -EAGAIN;
1486 else {
1487 ocfs2_log_dlm_error("dlmlock", ret, lockres);
1488 ret = -EINVAL;
1489 }
1490
1491 ocfs2_recover_from_dlm_error(lockres, 1);
1492 lockres_remove_mask_waiter(lockres, &mw);
1493 goto out;
1494 }
1495
1496 ret = ocfs2_wait_for_mask_interruptible(&mw, lockres);
1497 if (ret == -ERESTARTSYS) {
1498 /*
1499 * Userspace can cause deadlock itself with
1500 * flock(). Current behavior locally is to allow the
1501 * deadlock, but abort the system call if a signal is
1502 * received. We follow this example, otherwise a
1503 * poorly written program could sit in kernel until
1504 * reboot.
1505 *
1506 * Handling this is a bit more complicated for Ocfs2
1507 * though. We can't exit this function with an
1508 * outstanding lock request, so a cancel convert is
1509 * required. We intentionally overwrite 'ret' - if the
1510 * cancel fails and the lock was granted, it's easier
1511 * to just bubble sucess back up to the user.
1512 */
1513 ret = ocfs2_flock_handle_signal(lockres, level);
1514 }
1515
1516out:
1517
1518 mlog(0, "Lock: \"%s\" ex: %d, trylock: %d, returns: %d\n",
1519 lockres->l_name, ex, trylock, ret);
1520 return ret;
1521}
1522
1523void ocfs2_file_unlock(struct file *file)
1524{
1525 int ret;
1526 unsigned long flags;
1527 struct ocfs2_file_private *fp = file->private_data;
1528 struct ocfs2_lock_res *lockres = &fp->fp_flock;
1529 struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb);
1530 struct ocfs2_mask_waiter mw;
1531
1532 ocfs2_init_mask_waiter(&mw);
1533
1534 if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED))
1535 return;
1536
1537 if (lockres->l_level == LKM_NLMODE)
1538 return;
1539
1540 mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n",
1541 lockres->l_name, lockres->l_flags, lockres->l_level,
1542 lockres->l_action);
1543
1544 spin_lock_irqsave(&lockres->l_lock, flags);
1545 /*
1546 * Fake a blocking ast for the downconvert code.
1547 */
1548 lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
1549 lockres->l_blocking = LKM_EXMODE;
1550
1551 ocfs2_prepare_downconvert(lockres, LKM_NLMODE);
1552 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1553 spin_unlock_irqrestore(&lockres->l_lock, flags);
1554
1555 ret = ocfs2_downconvert_lock(osb, lockres, LKM_NLMODE, 0);
1556 if (ret) {
1557 mlog_errno(ret);
1558 return;
1559 }
1560
1561 ret = ocfs2_wait_for_mask(&mw);
1562 if (ret)
1563 mlog_errno(ret);
1564}
1565
1299static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
1300 struct ocfs2_lock_res *lockres)
1301{
1302 int kick = 0;
1303
1304 mlog_entry_void();
1305
1306 /* If we know that another node is waiting on our lock, kick

--- 1790 unchanged lines hidden ---
1566static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
1567 struct ocfs2_lock_res *lockres)
1568{
1569 int kick = 0;
1570
1571 mlog_entry_void();
1572
1573 /* If we know that another node is waiting on our lock, kick

--- 1790 unchanged lines hidden ---