12f2dc053SSage Weil #include "ceph_debug.h" 22f2dc053SSage Weil 32f2dc053SSage Weil #include <linux/bug.h> 42f2dc053SSage Weil #include <linux/err.h> 52f2dc053SSage Weil #include <linux/random.h> 62f2dc053SSage Weil #include <linux/slab.h> 72f2dc053SSage Weil #include <linux/types.h> 82f2dc053SSage Weil 92f2dc053SSage Weil #include "mdsmap.h" 102f2dc053SSage Weil #include "messenger.h" 112f2dc053SSage Weil #include "decode.h" 122f2dc053SSage Weil 132f2dc053SSage Weil #include "super.h" 142f2dc053SSage Weil 152f2dc053SSage Weil 162f2dc053SSage Weil /* 172f2dc053SSage Weil * choose a random mds that is "up" (i.e. has a state > 0), or -1. 182f2dc053SSage Weil */ 192f2dc053SSage Weil int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m) 202f2dc053SSage Weil { 212f2dc053SSage Weil int n = 0; 222f2dc053SSage Weil int i; 232f2dc053SSage Weil char r; 242f2dc053SSage Weil 252f2dc053SSage Weil /* count */ 262f2dc053SSage Weil for (i = 0; i < m->m_max_mds; i++) 272f2dc053SSage Weil if (m->m_info[i].state > 0) 282f2dc053SSage Weil n++; 292f2dc053SSage Weil if (n == 0) 302f2dc053SSage Weil return -1; 312f2dc053SSage Weil 322f2dc053SSage Weil /* pick */ 332f2dc053SSage Weil get_random_bytes(&r, 1); 342f2dc053SSage Weil n = r % n; 352f2dc053SSage Weil i = 0; 362f2dc053SSage Weil for (i = 0; n > 0; i++, n--) 372f2dc053SSage Weil while (m->m_info[i].state <= 0) 382f2dc053SSage Weil i++; 392f2dc053SSage Weil 402f2dc053SSage Weil return i; 412f2dc053SSage Weil } 422f2dc053SSage Weil 432f2dc053SSage Weil /* 442f2dc053SSage Weil * Decode an MDS map 452f2dc053SSage Weil * 462f2dc053SSage Weil * Ignore any fields we don't care about (there are quite a few of 472f2dc053SSage Weil * them). 482f2dc053SSage Weil */ 492f2dc053SSage Weil struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) 502f2dc053SSage Weil { 512f2dc053SSage Weil struct ceph_mdsmap *m; 522f2dc053SSage Weil int i, j, n; 532f2dc053SSage Weil int err = -EINVAL; 542f2dc053SSage Weil u16 version; 552f2dc053SSage Weil 562f2dc053SSage Weil m = kzalloc(sizeof(*m), GFP_NOFS); 572f2dc053SSage Weil if (m == NULL) 582f2dc053SSage Weil return ERR_PTR(-ENOMEM); 592f2dc053SSage Weil 602f2dc053SSage Weil ceph_decode_16_safe(p, end, version, bad); 612f2dc053SSage Weil 622f2dc053SSage Weil ceph_decode_need(p, end, 8*sizeof(u32) + sizeof(u64), bad); 63c89136eaSSage Weil m->m_epoch = ceph_decode_32(p); 64c89136eaSSage Weil m->m_client_epoch = ceph_decode_32(p); 65c89136eaSSage Weil m->m_last_failure = ceph_decode_32(p); 66c89136eaSSage Weil m->m_root = ceph_decode_32(p); 67c89136eaSSage Weil m->m_session_timeout = ceph_decode_32(p); 68c89136eaSSage Weil m->m_session_autoclose = ceph_decode_32(p); 69c89136eaSSage Weil m->m_max_file_size = ceph_decode_64(p); 70c89136eaSSage Weil m->m_max_mds = ceph_decode_32(p); 712f2dc053SSage Weil 722f2dc053SSage Weil m->m_info = kcalloc(m->m_max_mds, sizeof(*m->m_info), GFP_NOFS); 732f2dc053SSage Weil if (m->m_info == NULL) 742f2dc053SSage Weil goto badmem; 752f2dc053SSage Weil 762f2dc053SSage Weil /* pick out active nodes from mds_info (state > 0) */ 77c89136eaSSage Weil n = ceph_decode_32(p); 782f2dc053SSage Weil for (i = 0; i < n; i++) { 79*94045e11SSage Weil u64 global_id; 802f2dc053SSage Weil u32 namelen; 812f2dc053SSage Weil s32 mds, inc, state; 822f2dc053SSage Weil u64 state_seq; 832f2dc053SSage Weil u8 infoversion; 842f2dc053SSage Weil struct ceph_entity_addr addr; 852f2dc053SSage Weil u32 num_export_targets; 862f2dc053SSage Weil void *pexport_targets = NULL; 872f2dc053SSage Weil 88*94045e11SSage Weil ceph_decode_need(p, end, sizeof(u64)*2 + 1 + sizeof(u32), bad); 89*94045e11SSage Weil global_id = ceph_decode_64(p); 90c89136eaSSage Weil infoversion = ceph_decode_8(p); 91*94045e11SSage Weil *p += sizeof(u64); 92c89136eaSSage Weil namelen = ceph_decode_32(p); /* skip mds name */ 932f2dc053SSage Weil *p += namelen; 942f2dc053SSage Weil 952f2dc053SSage Weil ceph_decode_need(p, end, 96e251e288SSage Weil 4*sizeof(u32) + sizeof(u64) + 972f2dc053SSage Weil sizeof(addr) + sizeof(struct ceph_timespec), 982f2dc053SSage Weil bad); 99c89136eaSSage Weil mds = ceph_decode_32(p); 100c89136eaSSage Weil inc = ceph_decode_32(p); 101c89136eaSSage Weil state = ceph_decode_32(p); 102c89136eaSSage Weil state_seq = ceph_decode_64(p); 103*94045e11SSage Weil ceph_decode_copy(p, &addr, sizeof(addr)); 104*94045e11SSage Weil ceph_decode_addr(&addr); 1052f2dc053SSage Weil *p += sizeof(struct ceph_timespec); 1062f2dc053SSage Weil *p += sizeof(u32); 1072f2dc053SSage Weil ceph_decode_32_safe(p, end, namelen, bad); 108e251e288SSage Weil *p += namelen; 1092f2dc053SSage Weil if (infoversion >= 2) { 1102f2dc053SSage Weil ceph_decode_32_safe(p, end, num_export_targets, bad); 1112f2dc053SSage Weil pexport_targets = *p; 112e251e288SSage Weil *p += num_export_targets * sizeof(u32); 1132f2dc053SSage Weil } else { 1142f2dc053SSage Weil num_export_targets = 0; 1152f2dc053SSage Weil } 1162f2dc053SSage Weil 117*94045e11SSage Weil dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s\n", 118*94045e11SSage Weil i+1, n, global_id, mds, inc, pr_addr(&addr.in_addr), 1192f2dc053SSage Weil ceph_mds_state_name(state)); 1202f2dc053SSage Weil if (mds >= 0 && mds < m->m_max_mds && state > 0) { 121*94045e11SSage Weil m->m_info[mds].global_id = global_id; 1222f2dc053SSage Weil m->m_info[mds].state = state; 1232f2dc053SSage Weil m->m_info[mds].addr = addr; 1242f2dc053SSage Weil m->m_info[mds].num_export_targets = num_export_targets; 1252f2dc053SSage Weil if (num_export_targets) { 1262f2dc053SSage Weil m->m_info[mds].export_targets = 1272f2dc053SSage Weil kcalloc(num_export_targets, sizeof(u32), 1282f2dc053SSage Weil GFP_NOFS); 1292f2dc053SSage Weil for (j = 0; j < num_export_targets; j++) 130c89136eaSSage Weil m->m_info[mds].export_targets[j] = 131c89136eaSSage Weil ceph_decode_32(&pexport_targets); 1322f2dc053SSage Weil } else { 1332f2dc053SSage Weil m->m_info[mds].export_targets = NULL; 1342f2dc053SSage Weil } 1352f2dc053SSage Weil } 1362f2dc053SSage Weil } 1372f2dc053SSage Weil 1382f2dc053SSage Weil /* pg_pools */ 1392f2dc053SSage Weil ceph_decode_32_safe(p, end, n, bad); 1402f2dc053SSage Weil m->m_num_data_pg_pools = n; 1412f2dc053SSage Weil m->m_data_pg_pools = kcalloc(n, sizeof(u32), GFP_NOFS); 1422f2dc053SSage Weil if (!m->m_data_pg_pools) 1432f2dc053SSage Weil goto badmem; 1442f2dc053SSage Weil ceph_decode_need(p, end, sizeof(u32)*(n+1), bad); 1452f2dc053SSage Weil for (i = 0; i < n; i++) 146c89136eaSSage Weil m->m_data_pg_pools[i] = ceph_decode_32(p); 147c89136eaSSage Weil m->m_cas_pg_pool = ceph_decode_32(p); 1482f2dc053SSage Weil 1492f2dc053SSage Weil /* ok, we don't care about the rest. */ 1502f2dc053SSage Weil dout("mdsmap_decode success epoch %u\n", m->m_epoch); 1512f2dc053SSage Weil return m; 1522f2dc053SSage Weil 1532f2dc053SSage Weil badmem: 1542f2dc053SSage Weil err = -ENOMEM; 1552f2dc053SSage Weil bad: 1562f2dc053SSage Weil pr_err("corrupt mdsmap\n"); 1572f2dc053SSage Weil ceph_mdsmap_destroy(m); 1582f2dc053SSage Weil return ERR_PTR(-EINVAL); 1592f2dc053SSage Weil } 1602f2dc053SSage Weil 1612f2dc053SSage Weil void ceph_mdsmap_destroy(struct ceph_mdsmap *m) 1622f2dc053SSage Weil { 1632f2dc053SSage Weil int i; 1642f2dc053SSage Weil 1652f2dc053SSage Weil for (i = 0; i < m->m_max_mds; i++) 1662f2dc053SSage Weil kfree(m->m_info[i].export_targets); 1672f2dc053SSage Weil kfree(m->m_info); 1682f2dc053SSage Weil kfree(m->m_data_pg_pools); 1692f2dc053SSage Weil kfree(m); 1702f2dc053SSage Weil } 171