13d14c5d2SYehuda Sadeh #include <linux/ceph/ceph_debug.h> 22f2dc053SSage Weil 32f2dc053SSage Weil #include <linux/bug.h> 42f2dc053SSage Weil #include <linux/err.h> 52f2dc053SSage Weil #include <linux/random.h> 62f2dc053SSage Weil #include <linux/slab.h> 72f2dc053SSage Weil #include <linux/types.h> 82f2dc053SSage Weil 93d14c5d2SYehuda Sadeh #include <linux/ceph/mdsmap.h> 103d14c5d2SYehuda Sadeh #include <linux/ceph/messenger.h> 113d14c5d2SYehuda Sadeh #include <linux/ceph/decode.h> 122f2dc053SSage Weil 132f2dc053SSage Weil #include "super.h" 142f2dc053SSage Weil 152f2dc053SSage Weil 162f2dc053SSage Weil /* 172f2dc053SSage Weil * choose a random mds that is "up" (i.e. has a state > 0), or -1. 182f2dc053SSage Weil */ 192f2dc053SSage Weil int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m) 202f2dc053SSage Weil { 212f2dc053SSage Weil int n = 0; 222f2dc053SSage Weil int i; 23a84cd293SSam Lang 24a84cd293SSam Lang /* special case for one mds */ 25a84cd293SSam Lang if (1 == m->m_max_mds && m->m_info[0].state > 0) 26a84cd293SSam Lang return 0; 272f2dc053SSage Weil 282f2dc053SSage Weil /* count */ 292f2dc053SSage Weil for (i = 0; i < m->m_max_mds; i++) 302f2dc053SSage Weil if (m->m_info[i].state > 0) 312f2dc053SSage Weil n++; 322f2dc053SSage Weil if (n == 0) 332f2dc053SSage Weil return -1; 342f2dc053SSage Weil 352f2dc053SSage Weil /* pick */ 36a84cd293SSam Lang n = prandom_u32() % n; 372f2dc053SSage Weil i = 0; 382f2dc053SSage Weil for (i = 0; n > 0; i++, n--) 392f2dc053SSage Weil while (m->m_info[i].state <= 0) 402f2dc053SSage Weil i++; 412f2dc053SSage Weil 422f2dc053SSage Weil return i; 432f2dc053SSage Weil } 442f2dc053SSage Weil 452f2dc053SSage Weil /* 462f2dc053SSage Weil * Decode an MDS map 472f2dc053SSage Weil * 482f2dc053SSage Weil * Ignore any fields we don't care about (there are quite a few of 492f2dc053SSage Weil * them). 502f2dc053SSage Weil */ 512f2dc053SSage Weil struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) 522f2dc053SSage Weil { 532f2dc053SSage Weil struct ceph_mdsmap *m; 549ec7cab1SSage Weil const void *start = *p; 552f2dc053SSage Weil int i, j, n; 562f2dc053SSage Weil int err = -EINVAL; 572f2dc053SSage Weil u16 version; 582f2dc053SSage Weil 592f2dc053SSage Weil m = kzalloc(sizeof(*m), GFP_NOFS); 602f2dc053SSage Weil if (m == NULL) 612f2dc053SSage Weil return ERR_PTR(-ENOMEM); 622f2dc053SSage Weil 632f2dc053SSage Weil ceph_decode_16_safe(p, end, version, bad); 644f6a7e5eSSage Weil if (version > 3) { 654f6a7e5eSSage Weil pr_warning("got mdsmap version %d > 3, failing", version); 664f6a7e5eSSage Weil goto bad; 674f6a7e5eSSage Weil } 682f2dc053SSage Weil 692f2dc053SSage Weil ceph_decode_need(p, end, 8*sizeof(u32) + sizeof(u64), bad); 70c89136eaSSage Weil m->m_epoch = ceph_decode_32(p); 71c89136eaSSage Weil m->m_client_epoch = ceph_decode_32(p); 72c89136eaSSage Weil m->m_last_failure = ceph_decode_32(p); 73c89136eaSSage Weil m->m_root = ceph_decode_32(p); 74c89136eaSSage Weil m->m_session_timeout = ceph_decode_32(p); 75c89136eaSSage Weil m->m_session_autoclose = ceph_decode_32(p); 76c89136eaSSage Weil m->m_max_file_size = ceph_decode_64(p); 77c89136eaSSage Weil m->m_max_mds = ceph_decode_32(p); 782f2dc053SSage Weil 792f2dc053SSage Weil m->m_info = kcalloc(m->m_max_mds, sizeof(*m->m_info), GFP_NOFS); 802f2dc053SSage Weil if (m->m_info == NULL) 812f2dc053SSage Weil goto badmem; 822f2dc053SSage Weil 832f2dc053SSage Weil /* pick out active nodes from mds_info (state > 0) */ 84c89136eaSSage Weil n = ceph_decode_32(p); 852f2dc053SSage Weil for (i = 0; i < n; i++) { 8694045e11SSage Weil u64 global_id; 872f2dc053SSage Weil u32 namelen; 882f2dc053SSage Weil s32 mds, inc, state; 892f2dc053SSage Weil u64 state_seq; 902f2dc053SSage Weil u8 infoversion; 912f2dc053SSage Weil struct ceph_entity_addr addr; 922f2dc053SSage Weil u32 num_export_targets; 932f2dc053SSage Weil void *pexport_targets = NULL; 940deb01c9SSage Weil struct ceph_timespec laggy_since; 952f2dc053SSage Weil 9694045e11SSage Weil ceph_decode_need(p, end, sizeof(u64)*2 + 1 + sizeof(u32), bad); 9794045e11SSage Weil global_id = ceph_decode_64(p); 98c89136eaSSage Weil infoversion = ceph_decode_8(p); 9994045e11SSage Weil *p += sizeof(u64); 100c89136eaSSage Weil namelen = ceph_decode_32(p); /* skip mds name */ 1012f2dc053SSage Weil *p += namelen; 1022f2dc053SSage Weil 1032f2dc053SSage Weil ceph_decode_need(p, end, 104e251e288SSage Weil 4*sizeof(u32) + sizeof(u64) + 1052f2dc053SSage Weil sizeof(addr) + sizeof(struct ceph_timespec), 1062f2dc053SSage Weil bad); 107c89136eaSSage Weil mds = ceph_decode_32(p); 108c89136eaSSage Weil inc = ceph_decode_32(p); 109c89136eaSSage Weil state = ceph_decode_32(p); 110c89136eaSSage Weil state_seq = ceph_decode_64(p); 11194045e11SSage Weil ceph_decode_copy(p, &addr, sizeof(addr)); 11294045e11SSage Weil ceph_decode_addr(&addr); 1130deb01c9SSage Weil ceph_decode_copy(p, &laggy_since, sizeof(laggy_since)); 1142f2dc053SSage Weil *p += sizeof(u32); 1152f2dc053SSage Weil ceph_decode_32_safe(p, end, namelen, bad); 116e251e288SSage Weil *p += namelen; 1172f2dc053SSage Weil if (infoversion >= 2) { 1182f2dc053SSage Weil ceph_decode_32_safe(p, end, num_export_targets, bad); 1192f2dc053SSage Weil pexport_targets = *p; 120e251e288SSage Weil *p += num_export_targets * sizeof(u32); 1212f2dc053SSage Weil } else { 1222f2dc053SSage Weil num_export_targets = 0; 1232f2dc053SSage Weil } 1242f2dc053SSage Weil 12594045e11SSage Weil dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s\n", 1263d14c5d2SYehuda Sadeh i+1, n, global_id, mds, inc, 1273d14c5d2SYehuda Sadeh ceph_pr_addr(&addr.in_addr), 1282f2dc053SSage Weil ceph_mds_state_name(state)); 1292f2dc053SSage Weil if (mds >= 0 && mds < m->m_max_mds && state > 0) { 13094045e11SSage Weil m->m_info[mds].global_id = global_id; 1312f2dc053SSage Weil m->m_info[mds].state = state; 1322f2dc053SSage Weil m->m_info[mds].addr = addr; 1330deb01c9SSage Weil m->m_info[mds].laggy = 1340deb01c9SSage Weil (laggy_since.tv_sec != 0 || 1350deb01c9SSage Weil laggy_since.tv_nsec != 0); 1362f2dc053SSage Weil m->m_info[mds].num_export_targets = num_export_targets; 1372f2dc053SSage Weil if (num_export_targets) { 1382f2dc053SSage Weil m->m_info[mds].export_targets = 1392f2dc053SSage Weil kcalloc(num_export_targets, sizeof(u32), 1402f2dc053SSage Weil GFP_NOFS); 141*c213b50bSEmil Goode if (m->m_info[mds].export_targets == NULL) 142*c213b50bSEmil Goode goto badmem; 1432f2dc053SSage Weil for (j = 0; j < num_export_targets; j++) 144c89136eaSSage Weil m->m_info[mds].export_targets[j] = 145c89136eaSSage Weil ceph_decode_32(&pexport_targets); 1462f2dc053SSage Weil } else { 1472f2dc053SSage Weil m->m_info[mds].export_targets = NULL; 1482f2dc053SSage Weil } 1492f2dc053SSage Weil } 1502f2dc053SSage Weil } 1512f2dc053SSage Weil 1522f2dc053SSage Weil /* pg_pools */ 1532f2dc053SSage Weil ceph_decode_32_safe(p, end, n, bad); 1542f2dc053SSage Weil m->m_num_data_pg_pools = n; 1554f6a7e5eSSage Weil m->m_data_pg_pools = kcalloc(n, sizeof(u64), GFP_NOFS); 1562f2dc053SSage Weil if (!m->m_data_pg_pools) 1572f2dc053SSage Weil goto badmem; 1584f6a7e5eSSage Weil ceph_decode_need(p, end, sizeof(u64)*(n+1), bad); 1592f2dc053SSage Weil for (i = 0; i < n; i++) 1604f6a7e5eSSage Weil m->m_data_pg_pools[i] = ceph_decode_64(p); 1614f6a7e5eSSage Weil m->m_cas_pg_pool = ceph_decode_64(p); 1622f2dc053SSage Weil 1632f2dc053SSage Weil /* ok, we don't care about the rest. */ 1642f2dc053SSage Weil dout("mdsmap_decode success epoch %u\n", m->m_epoch); 1652f2dc053SSage Weil return m; 1662f2dc053SSage Weil 1672f2dc053SSage Weil badmem: 1682f2dc053SSage Weil err = -ENOMEM; 1692f2dc053SSage Weil bad: 1702f2dc053SSage Weil pr_err("corrupt mdsmap\n"); 1719ec7cab1SSage Weil print_hex_dump(KERN_DEBUG, "mdsmap: ", 1729ec7cab1SSage Weil DUMP_PREFIX_OFFSET, 16, 1, 1739ec7cab1SSage Weil start, end - start, true); 1742f2dc053SSage Weil ceph_mdsmap_destroy(m); 175*c213b50bSEmil Goode return ERR_PTR(err); 1762f2dc053SSage Weil } 1772f2dc053SSage Weil 1782f2dc053SSage Weil void ceph_mdsmap_destroy(struct ceph_mdsmap *m) 1792f2dc053SSage Weil { 1802f2dc053SSage Weil int i; 1812f2dc053SSage Weil 1822f2dc053SSage Weil for (i = 0; i < m->m_max_mds; i++) 1832f2dc053SSage Weil kfree(m->m_info[i].export_targets); 1842f2dc053SSage Weil kfree(m->m_info); 1852f2dc053SSage Weil kfree(m->m_data_pg_pools); 1862f2dc053SSage Weil kfree(m); 1872f2dc053SSage Weil } 188