13d14c5d2SYehuda Sadeh #include <linux/ceph/ceph_debug.h> 22f2dc053SSage Weil 32f2dc053SSage Weil #include <linux/bug.h> 42f2dc053SSage Weil #include <linux/err.h> 52f2dc053SSage Weil #include <linux/random.h> 62f2dc053SSage Weil #include <linux/slab.h> 72f2dc053SSage Weil #include <linux/types.h> 82f2dc053SSage Weil 93d14c5d2SYehuda Sadeh #include <linux/ceph/mdsmap.h> 103d14c5d2SYehuda Sadeh #include <linux/ceph/messenger.h> 113d14c5d2SYehuda Sadeh #include <linux/ceph/decode.h> 122f2dc053SSage Weil 132f2dc053SSage Weil #include "super.h" 142f2dc053SSage Weil 152f2dc053SSage Weil 162f2dc053SSage Weil /* 172f2dc053SSage Weil * choose a random mds that is "up" (i.e. has a state > 0), or -1. 182f2dc053SSage Weil */ 192f2dc053SSage Weil int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m) 202f2dc053SSage Weil { 212f2dc053SSage Weil int n = 0; 222f2dc053SSage Weil int i; 23a84cd293SSam Lang 24a84cd293SSam Lang /* special case for one mds */ 25a84cd293SSam Lang if (1 == m->m_max_mds && m->m_info[0].state > 0) 26a84cd293SSam Lang return 0; 272f2dc053SSage Weil 282f2dc053SSage Weil /* count */ 292f2dc053SSage Weil for (i = 0; i < m->m_max_mds; i++) 302f2dc053SSage Weil if (m->m_info[i].state > 0) 312f2dc053SSage Weil n++; 322f2dc053SSage Weil if (n == 0) 332f2dc053SSage Weil return -1; 342f2dc053SSage Weil 352f2dc053SSage Weil /* pick */ 36a84cd293SSam Lang n = prandom_u32() % n; 372f2dc053SSage Weil i = 0; 382f2dc053SSage Weil for (i = 0; n > 0; i++, n--) 392f2dc053SSage Weil while (m->m_info[i].state <= 0) 402f2dc053SSage Weil i++; 412f2dc053SSage Weil 422f2dc053SSage Weil return i; 432f2dc053SSage Weil } 442f2dc053SSage Weil 452f2dc053SSage Weil /* 462f2dc053SSage Weil * Decode an MDS map 472f2dc053SSage Weil * 482f2dc053SSage Weil * Ignore any fields we don't care about (there are quite a few of 492f2dc053SSage Weil * them). 502f2dc053SSage Weil */ 512f2dc053SSage Weil struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) 522f2dc053SSage Weil { 532f2dc053SSage Weil struct ceph_mdsmap *m; 549ec7cab1SSage Weil const void *start = *p; 552f2dc053SSage Weil int i, j, n; 562f2dc053SSage Weil int err = -EINVAL; 57*d463a43dSYan, Zheng u8 mdsmap_v, mdsmap_cv; 582f2dc053SSage Weil 592f2dc053SSage Weil m = kzalloc(sizeof(*m), GFP_NOFS); 602f2dc053SSage Weil if (m == NULL) 612f2dc053SSage Weil return ERR_PTR(-ENOMEM); 622f2dc053SSage Weil 63*d463a43dSYan, Zheng ceph_decode_need(p, end, 1 + 1, bad); 64*d463a43dSYan, Zheng mdsmap_v = ceph_decode_8(p); 65*d463a43dSYan, Zheng mdsmap_cv = ceph_decode_8(p); 66*d463a43dSYan, Zheng if (mdsmap_v >= 4) { 67*d463a43dSYan, Zheng u32 mdsmap_len; 68*d463a43dSYan, Zheng ceph_decode_32_safe(p, end, mdsmap_len, bad); 69*d463a43dSYan, Zheng if (end < *p + mdsmap_len) 704f6a7e5eSSage Weil goto bad; 71*d463a43dSYan, Zheng end = *p + mdsmap_len; 724f6a7e5eSSage Weil } 732f2dc053SSage Weil 742f2dc053SSage Weil ceph_decode_need(p, end, 8*sizeof(u32) + sizeof(u64), bad); 75c89136eaSSage Weil m->m_epoch = ceph_decode_32(p); 76c89136eaSSage Weil m->m_client_epoch = ceph_decode_32(p); 77c89136eaSSage Weil m->m_last_failure = ceph_decode_32(p); 78c89136eaSSage Weil m->m_root = ceph_decode_32(p); 79c89136eaSSage Weil m->m_session_timeout = ceph_decode_32(p); 80c89136eaSSage Weil m->m_session_autoclose = ceph_decode_32(p); 81c89136eaSSage Weil m->m_max_file_size = ceph_decode_64(p); 82c89136eaSSage Weil m->m_max_mds = ceph_decode_32(p); 832f2dc053SSage Weil 842f2dc053SSage Weil m->m_info = kcalloc(m->m_max_mds, sizeof(*m->m_info), GFP_NOFS); 852f2dc053SSage Weil if (m->m_info == NULL) 862f2dc053SSage Weil goto badmem; 872f2dc053SSage Weil 882f2dc053SSage Weil /* pick out active nodes from mds_info (state > 0) */ 89c89136eaSSage Weil n = ceph_decode_32(p); 902f2dc053SSage Weil for (i = 0; i < n; i++) { 9194045e11SSage Weil u64 global_id; 922f2dc053SSage Weil u32 namelen; 932f2dc053SSage Weil s32 mds, inc, state; 942f2dc053SSage Weil u64 state_seq; 95*d463a43dSYan, Zheng u8 info_v; 96*d463a43dSYan, Zheng void *info_end = NULL; 972f2dc053SSage Weil struct ceph_entity_addr addr; 982f2dc053SSage Weil u32 num_export_targets; 992f2dc053SSage Weil void *pexport_targets = NULL; 1000deb01c9SSage Weil struct ceph_timespec laggy_since; 1016af86528SDan Carpenter struct ceph_mds_info *info; 1022f2dc053SSage Weil 103*d463a43dSYan, Zheng ceph_decode_need(p, end, sizeof(u64) + 1, bad); 10494045e11SSage Weil global_id = ceph_decode_64(p); 105*d463a43dSYan, Zheng info_v= ceph_decode_8(p); 106*d463a43dSYan, Zheng if (info_v >= 4) { 107*d463a43dSYan, Zheng u32 info_len; 108*d463a43dSYan, Zheng u8 info_cv; 109*d463a43dSYan, Zheng ceph_decode_need(p, end, 1 + sizeof(u32), bad); 110*d463a43dSYan, Zheng info_cv = ceph_decode_8(p); 111*d463a43dSYan, Zheng info_len = ceph_decode_32(p); 112*d463a43dSYan, Zheng info_end = *p + info_len; 113*d463a43dSYan, Zheng if (info_end > end) 114*d463a43dSYan, Zheng goto bad; 115*d463a43dSYan, Zheng } 116*d463a43dSYan, Zheng 117*d463a43dSYan, Zheng ceph_decode_need(p, end, sizeof(u64) + sizeof(u32), bad); 11894045e11SSage Weil *p += sizeof(u64); 119c89136eaSSage Weil namelen = ceph_decode_32(p); /* skip mds name */ 1202f2dc053SSage Weil *p += namelen; 1212f2dc053SSage Weil 1222f2dc053SSage Weil ceph_decode_need(p, end, 123e251e288SSage Weil 4*sizeof(u32) + sizeof(u64) + 1242f2dc053SSage Weil sizeof(addr) + sizeof(struct ceph_timespec), 1252f2dc053SSage Weil bad); 126c89136eaSSage Weil mds = ceph_decode_32(p); 127c89136eaSSage Weil inc = ceph_decode_32(p); 128c89136eaSSage Weil state = ceph_decode_32(p); 129c89136eaSSage Weil state_seq = ceph_decode_64(p); 13094045e11SSage Weil ceph_decode_copy(p, &addr, sizeof(addr)); 13194045e11SSage Weil ceph_decode_addr(&addr); 1320deb01c9SSage Weil ceph_decode_copy(p, &laggy_since, sizeof(laggy_since)); 1332f2dc053SSage Weil *p += sizeof(u32); 1342f2dc053SSage Weil ceph_decode_32_safe(p, end, namelen, bad); 135e251e288SSage Weil *p += namelen; 136*d463a43dSYan, Zheng if (info_v >= 2) { 1372f2dc053SSage Weil ceph_decode_32_safe(p, end, num_export_targets, bad); 1382f2dc053SSage Weil pexport_targets = *p; 139e251e288SSage Weil *p += num_export_targets * sizeof(u32); 1402f2dc053SSage Weil } else { 1412f2dc053SSage Weil num_export_targets = 0; 1422f2dc053SSage Weil } 1432f2dc053SSage Weil 144*d463a43dSYan, Zheng if (info_end && *p != info_end) { 145*d463a43dSYan, Zheng if (*p > info_end) 146*d463a43dSYan, Zheng goto bad; 147*d463a43dSYan, Zheng *p = info_end; 148*d463a43dSYan, Zheng } 149*d463a43dSYan, Zheng 15094045e11SSage Weil dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s\n", 1513d14c5d2SYehuda Sadeh i+1, n, global_id, mds, inc, 1523d14c5d2SYehuda Sadeh ceph_pr_addr(&addr.in_addr), 1532f2dc053SSage Weil ceph_mds_state_name(state)); 1546af86528SDan Carpenter 1556af86528SDan Carpenter if (mds < 0 || mds >= m->m_max_mds || state <= 0) 1566af86528SDan Carpenter continue; 1576af86528SDan Carpenter 1586af86528SDan Carpenter info = &m->m_info[mds]; 1596af86528SDan Carpenter info->global_id = global_id; 1606af86528SDan Carpenter info->state = state; 1616af86528SDan Carpenter info->addr = addr; 1626af86528SDan Carpenter info->laggy = (laggy_since.tv_sec != 0 || 1630deb01c9SSage Weil laggy_since.tv_nsec != 0); 1646af86528SDan Carpenter info->num_export_targets = num_export_targets; 1652f2dc053SSage Weil if (num_export_targets) { 1666af86528SDan Carpenter info->export_targets = kcalloc(num_export_targets, 1676af86528SDan Carpenter sizeof(u32), GFP_NOFS); 1686af86528SDan Carpenter if (info->export_targets == NULL) 169c213b50bSEmil Goode goto badmem; 1702f2dc053SSage Weil for (j = 0; j < num_export_targets; j++) 1716af86528SDan Carpenter info->export_targets[j] = 172c89136eaSSage Weil ceph_decode_32(&pexport_targets); 1732f2dc053SSage Weil } else { 1746af86528SDan Carpenter info->export_targets = NULL; 1752f2dc053SSage Weil } 1762f2dc053SSage Weil } 1772f2dc053SSage Weil 1782f2dc053SSage Weil /* pg_pools */ 1792f2dc053SSage Weil ceph_decode_32_safe(p, end, n, bad); 1802f2dc053SSage Weil m->m_num_data_pg_pools = n; 1814f6a7e5eSSage Weil m->m_data_pg_pools = kcalloc(n, sizeof(u64), GFP_NOFS); 1822f2dc053SSage Weil if (!m->m_data_pg_pools) 1832f2dc053SSage Weil goto badmem; 1844f6a7e5eSSage Weil ceph_decode_need(p, end, sizeof(u64)*(n+1), bad); 1852f2dc053SSage Weil for (i = 0; i < n; i++) 1864f6a7e5eSSage Weil m->m_data_pg_pools[i] = ceph_decode_64(p); 1874f6a7e5eSSage Weil m->m_cas_pg_pool = ceph_decode_64(p); 1882f2dc053SSage Weil 1892f2dc053SSage Weil /* ok, we don't care about the rest. */ 190*d463a43dSYan, Zheng *p = end; 1912f2dc053SSage Weil dout("mdsmap_decode success epoch %u\n", m->m_epoch); 1922f2dc053SSage Weil return m; 1932f2dc053SSage Weil 1942f2dc053SSage Weil badmem: 1952f2dc053SSage Weil err = -ENOMEM; 1962f2dc053SSage Weil bad: 1972f2dc053SSage Weil pr_err("corrupt mdsmap\n"); 1989ec7cab1SSage Weil print_hex_dump(KERN_DEBUG, "mdsmap: ", 1999ec7cab1SSage Weil DUMP_PREFIX_OFFSET, 16, 1, 2009ec7cab1SSage Weil start, end - start, true); 2012f2dc053SSage Weil ceph_mdsmap_destroy(m); 202c213b50bSEmil Goode return ERR_PTR(err); 2032f2dc053SSage Weil } 2042f2dc053SSage Weil 2052f2dc053SSage Weil void ceph_mdsmap_destroy(struct ceph_mdsmap *m) 2062f2dc053SSage Weil { 2072f2dc053SSage Weil int i; 2082f2dc053SSage Weil 2092f2dc053SSage Weil for (i = 0; i < m->m_max_mds; i++) 2102f2dc053SSage Weil kfree(m->m_info[i].export_targets); 2112f2dc053SSage Weil kfree(m->m_info); 2122f2dc053SSage Weil kfree(m->m_data_pg_pools); 2132f2dc053SSage Weil kfree(m); 2142f2dc053SSage Weil } 215