12f2dc053SSage Weil #include "ceph_debug.h" 22f2dc053SSage Weil 32f2dc053SSage Weil #include <linux/bug.h> 42f2dc053SSage Weil #include <linux/err.h> 52f2dc053SSage Weil #include <linux/random.h> 62f2dc053SSage Weil #include <linux/slab.h> 72f2dc053SSage Weil #include <linux/types.h> 82f2dc053SSage Weil 92f2dc053SSage Weil #include "mdsmap.h" 102f2dc053SSage Weil #include "messenger.h" 112f2dc053SSage Weil #include "decode.h" 122f2dc053SSage Weil 132f2dc053SSage Weil #include "super.h" 142f2dc053SSage Weil 152f2dc053SSage Weil 162f2dc053SSage Weil /* 172f2dc053SSage Weil * choose a random mds that is "up" (i.e. has a state > 0), or -1. 182f2dc053SSage Weil */ 192f2dc053SSage Weil int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m) 202f2dc053SSage Weil { 212f2dc053SSage Weil int n = 0; 222f2dc053SSage Weil int i; 232f2dc053SSage Weil char r; 242f2dc053SSage Weil 252f2dc053SSage Weil /* count */ 262f2dc053SSage Weil for (i = 0; i < m->m_max_mds; i++) 272f2dc053SSage Weil if (m->m_info[i].state > 0) 282f2dc053SSage Weil n++; 292f2dc053SSage Weil if (n == 0) 302f2dc053SSage Weil return -1; 312f2dc053SSage Weil 322f2dc053SSage Weil /* pick */ 332f2dc053SSage Weil get_random_bytes(&r, 1); 342f2dc053SSage Weil n = r % n; 352f2dc053SSage Weil i = 0; 362f2dc053SSage Weil for (i = 0; n > 0; i++, n--) 372f2dc053SSage Weil while (m->m_info[i].state <= 0) 382f2dc053SSage Weil i++; 392f2dc053SSage Weil 402f2dc053SSage Weil return i; 412f2dc053SSage Weil } 422f2dc053SSage Weil 432f2dc053SSage Weil /* 442f2dc053SSage Weil * Decode an MDS map 452f2dc053SSage Weil * 462f2dc053SSage Weil * Ignore any fields we don't care about (there are quite a few of 472f2dc053SSage Weil * them). 482f2dc053SSage Weil */ 492f2dc053SSage Weil struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) 502f2dc053SSage Weil { 512f2dc053SSage Weil struct ceph_mdsmap *m; 529ec7cab1SSage Weil const void *start = *p; 532f2dc053SSage Weil int i, j, n; 542f2dc053SSage Weil int err = -EINVAL; 552f2dc053SSage Weil u16 version; 562f2dc053SSage Weil 572f2dc053SSage Weil m = kzalloc(sizeof(*m), GFP_NOFS); 582f2dc053SSage Weil if (m == NULL) 592f2dc053SSage Weil return ERR_PTR(-ENOMEM); 602f2dc053SSage Weil 612f2dc053SSage Weil ceph_decode_16_safe(p, end, version, bad); 622f2dc053SSage Weil 632f2dc053SSage Weil ceph_decode_need(p, end, 8*sizeof(u32) + sizeof(u64), bad); 64c89136eaSSage Weil m->m_epoch = ceph_decode_32(p); 65c89136eaSSage Weil m->m_client_epoch = ceph_decode_32(p); 66c89136eaSSage Weil m->m_last_failure = ceph_decode_32(p); 67c89136eaSSage Weil m->m_root = ceph_decode_32(p); 68c89136eaSSage Weil m->m_session_timeout = ceph_decode_32(p); 69c89136eaSSage Weil m->m_session_autoclose = ceph_decode_32(p); 70c89136eaSSage Weil m->m_max_file_size = ceph_decode_64(p); 71c89136eaSSage Weil m->m_max_mds = ceph_decode_32(p); 722f2dc053SSage Weil 732f2dc053SSage Weil m->m_info = kcalloc(m->m_max_mds, sizeof(*m->m_info), GFP_NOFS); 742f2dc053SSage Weil if (m->m_info == NULL) 752f2dc053SSage Weil goto badmem; 762f2dc053SSage Weil 772f2dc053SSage Weil /* pick out active nodes from mds_info (state > 0) */ 78c89136eaSSage Weil n = ceph_decode_32(p); 792f2dc053SSage Weil for (i = 0; i < n; i++) { 8094045e11SSage Weil u64 global_id; 812f2dc053SSage Weil u32 namelen; 822f2dc053SSage Weil s32 mds, inc, state; 832f2dc053SSage Weil u64 state_seq; 842f2dc053SSage Weil u8 infoversion; 852f2dc053SSage Weil struct ceph_entity_addr addr; 862f2dc053SSage Weil u32 num_export_targets; 872f2dc053SSage Weil void *pexport_targets = NULL; 88*0deb01c9SSage Weil struct ceph_timespec laggy_since; 892f2dc053SSage Weil 9094045e11SSage Weil ceph_decode_need(p, end, sizeof(u64)*2 + 1 + sizeof(u32), bad); 9194045e11SSage Weil global_id = ceph_decode_64(p); 92c89136eaSSage Weil infoversion = ceph_decode_8(p); 9394045e11SSage Weil *p += sizeof(u64); 94c89136eaSSage Weil namelen = ceph_decode_32(p); /* skip mds name */ 952f2dc053SSage Weil *p += namelen; 962f2dc053SSage Weil 972f2dc053SSage Weil ceph_decode_need(p, end, 98e251e288SSage Weil 4*sizeof(u32) + sizeof(u64) + 992f2dc053SSage Weil sizeof(addr) + sizeof(struct ceph_timespec), 1002f2dc053SSage Weil bad); 101c89136eaSSage Weil mds = ceph_decode_32(p); 102c89136eaSSage Weil inc = ceph_decode_32(p); 103c89136eaSSage Weil state = ceph_decode_32(p); 104c89136eaSSage Weil state_seq = ceph_decode_64(p); 10594045e11SSage Weil ceph_decode_copy(p, &addr, sizeof(addr)); 10694045e11SSage Weil ceph_decode_addr(&addr); 107*0deb01c9SSage Weil ceph_decode_copy(p, &laggy_since, sizeof(laggy_since)); 1082f2dc053SSage Weil *p += sizeof(u32); 1092f2dc053SSage Weil ceph_decode_32_safe(p, end, namelen, bad); 110e251e288SSage Weil *p += namelen; 1112f2dc053SSage Weil if (infoversion >= 2) { 1122f2dc053SSage Weil ceph_decode_32_safe(p, end, num_export_targets, bad); 1132f2dc053SSage Weil pexport_targets = *p; 114e251e288SSage Weil *p += num_export_targets * sizeof(u32); 1152f2dc053SSage Weil } else { 1162f2dc053SSage Weil num_export_targets = 0; 1172f2dc053SSage Weil } 1182f2dc053SSage Weil 11994045e11SSage Weil dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s\n", 12094045e11SSage Weil i+1, n, global_id, mds, inc, pr_addr(&addr.in_addr), 1212f2dc053SSage Weil ceph_mds_state_name(state)); 1222f2dc053SSage Weil if (mds >= 0 && mds < m->m_max_mds && state > 0) { 12394045e11SSage Weil m->m_info[mds].global_id = global_id; 1242f2dc053SSage Weil m->m_info[mds].state = state; 1252f2dc053SSage Weil m->m_info[mds].addr = addr; 126*0deb01c9SSage Weil m->m_info[mds].laggy = 127*0deb01c9SSage Weil (laggy_since.tv_sec != 0 || 128*0deb01c9SSage Weil laggy_since.tv_nsec != 0); 1292f2dc053SSage Weil m->m_info[mds].num_export_targets = num_export_targets; 1302f2dc053SSage Weil if (num_export_targets) { 1312f2dc053SSage Weil m->m_info[mds].export_targets = 1322f2dc053SSage Weil kcalloc(num_export_targets, sizeof(u32), 1332f2dc053SSage Weil GFP_NOFS); 1342f2dc053SSage Weil for (j = 0; j < num_export_targets; j++) 135c89136eaSSage Weil m->m_info[mds].export_targets[j] = 136c89136eaSSage Weil ceph_decode_32(&pexport_targets); 1372f2dc053SSage Weil } else { 1382f2dc053SSage Weil m->m_info[mds].export_targets = NULL; 1392f2dc053SSage Weil } 1402f2dc053SSage Weil } 1412f2dc053SSage Weil } 1422f2dc053SSage Weil 1432f2dc053SSage Weil /* pg_pools */ 1442f2dc053SSage Weil ceph_decode_32_safe(p, end, n, bad); 1452f2dc053SSage Weil m->m_num_data_pg_pools = n; 1462f2dc053SSage Weil m->m_data_pg_pools = kcalloc(n, sizeof(u32), GFP_NOFS); 1472f2dc053SSage Weil if (!m->m_data_pg_pools) 1482f2dc053SSage Weil goto badmem; 1492f2dc053SSage Weil ceph_decode_need(p, end, sizeof(u32)*(n+1), bad); 1502f2dc053SSage Weil for (i = 0; i < n; i++) 151c89136eaSSage Weil m->m_data_pg_pools[i] = ceph_decode_32(p); 152c89136eaSSage Weil m->m_cas_pg_pool = ceph_decode_32(p); 1532f2dc053SSage Weil 1542f2dc053SSage Weil /* ok, we don't care about the rest. */ 1552f2dc053SSage Weil dout("mdsmap_decode success epoch %u\n", m->m_epoch); 1562f2dc053SSage Weil return m; 1572f2dc053SSage Weil 1582f2dc053SSage Weil badmem: 1592f2dc053SSage Weil err = -ENOMEM; 1602f2dc053SSage Weil bad: 1612f2dc053SSage Weil pr_err("corrupt mdsmap\n"); 1629ec7cab1SSage Weil print_hex_dump(KERN_DEBUG, "mdsmap: ", 1639ec7cab1SSage Weil DUMP_PREFIX_OFFSET, 16, 1, 1649ec7cab1SSage Weil start, end - start, true); 1652f2dc053SSage Weil ceph_mdsmap_destroy(m); 1662f2dc053SSage Weil return ERR_PTR(-EINVAL); 1672f2dc053SSage Weil } 1682f2dc053SSage Weil 1692f2dc053SSage Weil void ceph_mdsmap_destroy(struct ceph_mdsmap *m) 1702f2dc053SSage Weil { 1712f2dc053SSage Weil int i; 1722f2dc053SSage Weil 1732f2dc053SSage Weil for (i = 0; i < m->m_max_mds; i++) 1742f2dc053SSage Weil kfree(m->m_info[i].export_targets); 1752f2dc053SSage Weil kfree(m->m_info); 1762f2dc053SSage Weil kfree(m->m_data_pg_pools); 1772f2dc053SSage Weil kfree(m); 1782f2dc053SSage Weil } 179