1 /* 2 * Basic Node interface support 3 */ 4 5 #include <linux/module.h> 6 #include <linux/init.h> 7 #include <linux/mm.h> 8 #include <linux/memory.h> 9 #include <linux/vmstat.h> 10 #include <linux/node.h> 11 #include <linux/hugetlb.h> 12 #include <linux/compaction.h> 13 #include <linux/cpumask.h> 14 #include <linux/topology.h> 15 #include <linux/nodemask.h> 16 #include <linux/cpu.h> 17 #include <linux/device.h> 18 #include <linux/swap.h> 19 #include <linux/slab.h> 20 21 static struct bus_type node_subsys = { 22 .name = "node", 23 .dev_name = "node", 24 }; 25 26 27 static ssize_t node_read_cpumap(struct device *dev, int type, char *buf) 28 { 29 struct node *node_dev = to_node(dev); 30 const struct cpumask *mask = cpumask_of_node(node_dev->dev.id); 31 int len; 32 33 /* 2008/04/07: buf currently PAGE_SIZE, need 9 chars per 32 bits. */ 34 BUILD_BUG_ON((NR_CPUS/32 * 9) > (PAGE_SIZE-1)); 35 36 len = type? 37 cpulist_scnprintf(buf, PAGE_SIZE-2, mask) : 38 cpumask_scnprintf(buf, PAGE_SIZE-2, mask); 39 buf[len++] = '\n'; 40 buf[len] = '\0'; 41 return len; 42 } 43 44 static inline ssize_t node_read_cpumask(struct device *dev, 45 struct device_attribute *attr, char *buf) 46 { 47 return node_read_cpumap(dev, 0, buf); 48 } 49 static inline ssize_t node_read_cpulist(struct device *dev, 50 struct device_attribute *attr, char *buf) 51 { 52 return node_read_cpumap(dev, 1, buf); 53 } 54 55 static DEVICE_ATTR(cpumap, S_IRUGO, node_read_cpumask, NULL); 56 static DEVICE_ATTR(cpulist, S_IRUGO, node_read_cpulist, NULL); 57 58 #define K(x) ((x) << (PAGE_SHIFT - 10)) 59 static ssize_t node_read_meminfo(struct device *dev, 60 struct device_attribute *attr, char *buf) 61 { 62 int n; 63 int nid = dev->id; 64 struct sysinfo i; 65 66 si_meminfo_node(&i, nid); 67 n = sprintf(buf, 68 "Node %d MemTotal: %8lu kB\n" 69 "Node %d MemFree: %8lu kB\n" 70 "Node %d MemUsed: %8lu kB\n" 71 "Node %d Active: %8lu kB\n" 72 "Node %d Inactive: %8lu kB\n" 73 "Node %d Active(anon): %8lu kB\n" 74 "Node %d Inactive(anon): %8lu kB\n" 75 "Node %d Active(file): %8lu kB\n" 76 "Node %d Inactive(file): %8lu kB\n" 77 "Node %d Unevictable: %8lu kB\n" 78 "Node %d Mlocked: %8lu kB\n", 79 nid, K(i.totalram), 80 nid, K(i.freeram), 81 nid, K(i.totalram - i.freeram), 82 nid, K(node_page_state(nid, NR_ACTIVE_ANON) + 83 node_page_state(nid, NR_ACTIVE_FILE)), 84 nid, K(node_page_state(nid, NR_INACTIVE_ANON) + 85 node_page_state(nid, NR_INACTIVE_FILE)), 86 nid, K(node_page_state(nid, NR_ACTIVE_ANON)), 87 nid, K(node_page_state(nid, NR_INACTIVE_ANON)), 88 nid, K(node_page_state(nid, NR_ACTIVE_FILE)), 89 nid, K(node_page_state(nid, NR_INACTIVE_FILE)), 90 nid, K(node_page_state(nid, NR_UNEVICTABLE)), 91 nid, K(node_page_state(nid, NR_MLOCK))); 92 93 #ifdef CONFIG_HIGHMEM 94 n += sprintf(buf + n, 95 "Node %d HighTotal: %8lu kB\n" 96 "Node %d HighFree: %8lu kB\n" 97 "Node %d LowTotal: %8lu kB\n" 98 "Node %d LowFree: %8lu kB\n", 99 nid, K(i.totalhigh), 100 nid, K(i.freehigh), 101 nid, K(i.totalram - i.totalhigh), 102 nid, K(i.freeram - i.freehigh)); 103 #endif 104 n += sprintf(buf + n, 105 "Node %d Dirty: %8lu kB\n" 106 "Node %d Writeback: %8lu kB\n" 107 "Node %d FilePages: %8lu kB\n" 108 "Node %d Mapped: %8lu kB\n" 109 "Node %d AnonPages: %8lu kB\n" 110 "Node %d Shmem: %8lu kB\n" 111 "Node %d KernelStack: %8lu kB\n" 112 "Node %d PageTables: %8lu kB\n" 113 "Node %d NFS_Unstable: %8lu kB\n" 114 "Node %d Bounce: %8lu kB\n" 115 "Node %d WritebackTmp: %8lu kB\n" 116 "Node %d Slab: %8lu kB\n" 117 "Node %d SReclaimable: %8lu kB\n" 118 "Node %d SUnreclaim: %8lu kB\n" 119 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 120 "Node %d AnonHugePages: %8lu kB\n" 121 #endif 122 , 123 nid, K(node_page_state(nid, NR_FILE_DIRTY)), 124 nid, K(node_page_state(nid, NR_WRITEBACK)), 125 nid, K(node_page_state(nid, NR_FILE_PAGES)), 126 nid, K(node_page_state(nid, NR_FILE_MAPPED)), 127 nid, K(node_page_state(nid, NR_ANON_PAGES) 128 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 129 + node_page_state(nid, NR_ANON_TRANSPARENT_HUGEPAGES) * 130 HPAGE_PMD_NR 131 #endif 132 ), 133 nid, K(node_page_state(nid, NR_SHMEM)), 134 nid, node_page_state(nid, NR_KERNEL_STACK) * 135 THREAD_SIZE / 1024, 136 nid, K(node_page_state(nid, NR_PAGETABLE)), 137 nid, K(node_page_state(nid, NR_UNSTABLE_NFS)), 138 nid, K(node_page_state(nid, NR_BOUNCE)), 139 nid, K(node_page_state(nid, NR_WRITEBACK_TEMP)), 140 nid, K(node_page_state(nid, NR_SLAB_RECLAIMABLE) + 141 node_page_state(nid, NR_SLAB_UNRECLAIMABLE)), 142 nid, K(node_page_state(nid, NR_SLAB_RECLAIMABLE)), 143 nid, K(node_page_state(nid, NR_SLAB_UNRECLAIMABLE)) 144 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 145 , nid, 146 K(node_page_state(nid, NR_ANON_TRANSPARENT_HUGEPAGES) * 147 HPAGE_PMD_NR) 148 #endif 149 ); 150 n += hugetlb_report_node_meminfo(nid, buf + n); 151 return n; 152 } 153 154 #undef K 155 static DEVICE_ATTR(meminfo, S_IRUGO, node_read_meminfo, NULL); 156 157 static ssize_t node_read_numastat(struct device *dev, 158 struct device_attribute *attr, char *buf) 159 { 160 return sprintf(buf, 161 "numa_hit %lu\n" 162 "numa_miss %lu\n" 163 "numa_foreign %lu\n" 164 "interleave_hit %lu\n" 165 "local_node %lu\n" 166 "other_node %lu\n", 167 node_page_state(dev->id, NUMA_HIT), 168 node_page_state(dev->id, NUMA_MISS), 169 node_page_state(dev->id, NUMA_FOREIGN), 170 node_page_state(dev->id, NUMA_INTERLEAVE_HIT), 171 node_page_state(dev->id, NUMA_LOCAL), 172 node_page_state(dev->id, NUMA_OTHER)); 173 } 174 static DEVICE_ATTR(numastat, S_IRUGO, node_read_numastat, NULL); 175 176 static ssize_t node_read_vmstat(struct device *dev, 177 struct device_attribute *attr, char *buf) 178 { 179 int nid = dev->id; 180 int i; 181 int n = 0; 182 183 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 184 n += sprintf(buf+n, "%s %lu\n", vmstat_text[i], 185 node_page_state(nid, i)); 186 187 return n; 188 } 189 static DEVICE_ATTR(vmstat, S_IRUGO, node_read_vmstat, NULL); 190 191 static ssize_t node_read_distance(struct device *dev, 192 struct device_attribute *attr, char * buf) 193 { 194 int nid = dev->id; 195 int len = 0; 196 int i; 197 198 /* 199 * buf is currently PAGE_SIZE in length and each node needs 4 chars 200 * at the most (distance + space or newline). 201 */ 202 BUILD_BUG_ON(MAX_NUMNODES * 4 > PAGE_SIZE); 203 204 for_each_online_node(i) 205 len += sprintf(buf + len, "%s%d", i ? " " : "", node_distance(nid, i)); 206 207 len += sprintf(buf + len, "\n"); 208 return len; 209 } 210 static DEVICE_ATTR(distance, S_IRUGO, node_read_distance, NULL); 211 212 #ifdef CONFIG_HUGETLBFS 213 /* 214 * hugetlbfs per node attributes registration interface: 215 * When/if hugetlb[fs] subsystem initializes [sometime after this module], 216 * it will register its per node attributes for all online nodes with 217 * memory. It will also call register_hugetlbfs_with_node(), below, to 218 * register its attribute registration functions with this node driver. 219 * Once these hooks have been initialized, the node driver will call into 220 * the hugetlb module to [un]register attributes for hot-plugged nodes. 221 */ 222 static node_registration_func_t __hugetlb_register_node; 223 static node_registration_func_t __hugetlb_unregister_node; 224 225 static inline bool hugetlb_register_node(struct node *node) 226 { 227 if (__hugetlb_register_node && 228 node_state(node->dev.id, N_HIGH_MEMORY)) { 229 __hugetlb_register_node(node); 230 return true; 231 } 232 return false; 233 } 234 235 static inline void hugetlb_unregister_node(struct node *node) 236 { 237 if (__hugetlb_unregister_node) 238 __hugetlb_unregister_node(node); 239 } 240 241 void register_hugetlbfs_with_node(node_registration_func_t doregister, 242 node_registration_func_t unregister) 243 { 244 __hugetlb_register_node = doregister; 245 __hugetlb_unregister_node = unregister; 246 } 247 #else 248 static inline void hugetlb_register_node(struct node *node) {} 249 250 static inline void hugetlb_unregister_node(struct node *node) {} 251 #endif 252 253 254 /* 255 * register_node - Setup a sysfs device for a node. 256 * @num - Node number to use when creating the device. 257 * 258 * Initialize and register the node device. 259 */ 260 int register_node(struct node *node, int num, struct node *parent) 261 { 262 int error; 263 264 node->dev.id = num; 265 node->dev.bus = &node_subsys; 266 error = device_register(&node->dev); 267 268 if (!error){ 269 device_create_file(&node->dev, &dev_attr_cpumap); 270 device_create_file(&node->dev, &dev_attr_cpulist); 271 device_create_file(&node->dev, &dev_attr_meminfo); 272 device_create_file(&node->dev, &dev_attr_numastat); 273 device_create_file(&node->dev, &dev_attr_distance); 274 device_create_file(&node->dev, &dev_attr_vmstat); 275 276 scan_unevictable_register_node(node); 277 278 hugetlb_register_node(node); 279 280 compaction_register_node(node); 281 } 282 return error; 283 } 284 285 /** 286 * unregister_node - unregister a node device 287 * @node: node going away 288 * 289 * Unregisters a node device @node. All the devices on the node must be 290 * unregistered before calling this function. 291 */ 292 void unregister_node(struct node *node) 293 { 294 device_remove_file(&node->dev, &dev_attr_cpumap); 295 device_remove_file(&node->dev, &dev_attr_cpulist); 296 device_remove_file(&node->dev, &dev_attr_meminfo); 297 device_remove_file(&node->dev, &dev_attr_numastat); 298 device_remove_file(&node->dev, &dev_attr_distance); 299 device_remove_file(&node->dev, &dev_attr_vmstat); 300 301 scan_unevictable_unregister_node(node); 302 hugetlb_unregister_node(node); /* no-op, if memoryless node */ 303 304 device_unregister(&node->dev); 305 } 306 307 struct node node_devices[MAX_NUMNODES]; 308 309 /* 310 * register cpu under node 311 */ 312 int register_cpu_under_node(unsigned int cpu, unsigned int nid) 313 { 314 int ret; 315 struct device *obj; 316 317 if (!node_online(nid)) 318 return 0; 319 320 obj = get_cpu_device(cpu); 321 if (!obj) 322 return 0; 323 324 ret = sysfs_create_link(&node_devices[nid].dev.kobj, 325 &obj->kobj, 326 kobject_name(&obj->kobj)); 327 if (ret) 328 return ret; 329 330 return sysfs_create_link(&obj->kobj, 331 &node_devices[nid].dev.kobj, 332 kobject_name(&node_devices[nid].dev.kobj)); 333 } 334 335 int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) 336 { 337 struct device *obj; 338 339 if (!node_online(nid)) 340 return 0; 341 342 obj = get_cpu_device(cpu); 343 if (!obj) 344 return 0; 345 346 sysfs_remove_link(&node_devices[nid].dev.kobj, 347 kobject_name(&obj->kobj)); 348 sysfs_remove_link(&obj->kobj, 349 kobject_name(&node_devices[nid].dev.kobj)); 350 351 return 0; 352 } 353 354 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE 355 #define page_initialized(page) (page->lru.next) 356 357 static int get_nid_for_pfn(unsigned long pfn) 358 { 359 struct page *page; 360 361 if (!pfn_valid_within(pfn)) 362 return -1; 363 page = pfn_to_page(pfn); 364 if (!page_initialized(page)) 365 return -1; 366 return pfn_to_nid(pfn); 367 } 368 369 /* register memory section under specified node if it spans that node */ 370 int register_mem_sect_under_node(struct memory_block *mem_blk, int nid) 371 { 372 int ret; 373 unsigned long pfn, sect_start_pfn, sect_end_pfn; 374 375 if (!mem_blk) 376 return -EFAULT; 377 if (!node_online(nid)) 378 return 0; 379 380 sect_start_pfn = section_nr_to_pfn(mem_blk->start_section_nr); 381 sect_end_pfn = section_nr_to_pfn(mem_blk->end_section_nr); 382 sect_end_pfn += PAGES_PER_SECTION - 1; 383 for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) { 384 int page_nid; 385 386 page_nid = get_nid_for_pfn(pfn); 387 if (page_nid < 0) 388 continue; 389 if (page_nid != nid) 390 continue; 391 ret = sysfs_create_link_nowarn(&node_devices[nid].dev.kobj, 392 &mem_blk->dev.kobj, 393 kobject_name(&mem_blk->dev.kobj)); 394 if (ret) 395 return ret; 396 397 return sysfs_create_link_nowarn(&mem_blk->dev.kobj, 398 &node_devices[nid].dev.kobj, 399 kobject_name(&node_devices[nid].dev.kobj)); 400 } 401 /* mem section does not span the specified node */ 402 return 0; 403 } 404 405 /* unregister memory section under all nodes that it spans */ 406 int unregister_mem_sect_under_nodes(struct memory_block *mem_blk, 407 unsigned long phys_index) 408 { 409 NODEMASK_ALLOC(nodemask_t, unlinked_nodes, GFP_KERNEL); 410 unsigned long pfn, sect_start_pfn, sect_end_pfn; 411 412 if (!mem_blk) { 413 NODEMASK_FREE(unlinked_nodes); 414 return -EFAULT; 415 } 416 if (!unlinked_nodes) 417 return -ENOMEM; 418 nodes_clear(*unlinked_nodes); 419 420 sect_start_pfn = section_nr_to_pfn(phys_index); 421 sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1; 422 for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) { 423 int nid; 424 425 nid = get_nid_for_pfn(pfn); 426 if (nid < 0) 427 continue; 428 if (!node_online(nid)) 429 continue; 430 if (node_test_and_set(nid, *unlinked_nodes)) 431 continue; 432 sysfs_remove_link(&node_devices[nid].dev.kobj, 433 kobject_name(&mem_blk->dev.kobj)); 434 sysfs_remove_link(&mem_blk->dev.kobj, 435 kobject_name(&node_devices[nid].dev.kobj)); 436 } 437 NODEMASK_FREE(unlinked_nodes); 438 return 0; 439 } 440 441 static int link_mem_sections(int nid) 442 { 443 unsigned long start_pfn = NODE_DATA(nid)->node_start_pfn; 444 unsigned long end_pfn = start_pfn + NODE_DATA(nid)->node_spanned_pages; 445 unsigned long pfn; 446 struct memory_block *mem_blk = NULL; 447 int err = 0; 448 449 for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { 450 unsigned long section_nr = pfn_to_section_nr(pfn); 451 struct mem_section *mem_sect; 452 int ret; 453 454 if (!present_section_nr(section_nr)) 455 continue; 456 mem_sect = __nr_to_section(section_nr); 457 mem_blk = find_memory_block_hinted(mem_sect, mem_blk); 458 ret = register_mem_sect_under_node(mem_blk, nid); 459 if (!err) 460 err = ret; 461 462 /* discard ref obtained in find_memory_block() */ 463 } 464 465 if (mem_blk) 466 kobject_put(&mem_blk->dev.kobj); 467 return err; 468 } 469 470 #ifdef CONFIG_HUGETLBFS 471 /* 472 * Handle per node hstate attribute [un]registration on transistions 473 * to/from memoryless state. 474 */ 475 static void node_hugetlb_work(struct work_struct *work) 476 { 477 struct node *node = container_of(work, struct node, node_work); 478 479 /* 480 * We only get here when a node transitions to/from memoryless state. 481 * We can detect which transition occurred by examining whether the 482 * node has memory now. hugetlb_register_node() already check this 483 * so we try to register the attributes. If that fails, then the 484 * node has transitioned to memoryless, try to unregister the 485 * attributes. 486 */ 487 if (!hugetlb_register_node(node)) 488 hugetlb_unregister_node(node); 489 } 490 491 static void init_node_hugetlb_work(int nid) 492 { 493 INIT_WORK(&node_devices[nid].node_work, node_hugetlb_work); 494 } 495 496 static int node_memory_callback(struct notifier_block *self, 497 unsigned long action, void *arg) 498 { 499 struct memory_notify *mnb = arg; 500 int nid = mnb->status_change_nid; 501 502 switch (action) { 503 case MEM_ONLINE: 504 case MEM_OFFLINE: 505 /* 506 * offload per node hstate [un]registration to a work thread 507 * when transitioning to/from memoryless state. 508 */ 509 if (nid != NUMA_NO_NODE) 510 schedule_work(&node_devices[nid].node_work); 511 break; 512 513 case MEM_GOING_ONLINE: 514 case MEM_GOING_OFFLINE: 515 case MEM_CANCEL_ONLINE: 516 case MEM_CANCEL_OFFLINE: 517 default: 518 break; 519 } 520 521 return NOTIFY_OK; 522 } 523 #endif /* CONFIG_HUGETLBFS */ 524 #else /* !CONFIG_MEMORY_HOTPLUG_SPARSE */ 525 526 static int link_mem_sections(int nid) { return 0; } 527 #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ 528 529 #if !defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || \ 530 !defined(CONFIG_HUGETLBFS) 531 static inline int node_memory_callback(struct notifier_block *self, 532 unsigned long action, void *arg) 533 { 534 return NOTIFY_OK; 535 } 536 537 static void init_node_hugetlb_work(int nid) { } 538 539 #endif 540 541 int register_one_node(int nid) 542 { 543 int error = 0; 544 int cpu; 545 546 if (node_online(nid)) { 547 int p_node = parent_node(nid); 548 struct node *parent = NULL; 549 550 if (p_node != nid) 551 parent = &node_devices[p_node]; 552 553 error = register_node(&node_devices[nid], nid, parent); 554 555 /* link cpu under this node */ 556 for_each_present_cpu(cpu) { 557 if (cpu_to_node(cpu) == nid) 558 register_cpu_under_node(cpu, nid); 559 } 560 561 /* link memory sections under this node */ 562 error = link_mem_sections(nid); 563 564 /* initialize work queue for memory hot plug */ 565 init_node_hugetlb_work(nid); 566 } 567 568 return error; 569 570 } 571 572 void unregister_one_node(int nid) 573 { 574 unregister_node(&node_devices[nid]); 575 } 576 577 /* 578 * node states attributes 579 */ 580 581 static ssize_t print_nodes_state(enum node_states state, char *buf) 582 { 583 int n; 584 585 n = nodelist_scnprintf(buf, PAGE_SIZE, node_states[state]); 586 if (n > 0 && PAGE_SIZE > n + 1) { 587 *(buf + n++) = '\n'; 588 *(buf + n++) = '\0'; 589 } 590 return n; 591 } 592 593 struct node_attr { 594 struct device_attribute attr; 595 enum node_states state; 596 }; 597 598 static ssize_t show_node_state(struct device *dev, 599 struct device_attribute *attr, char *buf) 600 { 601 struct node_attr *na = container_of(attr, struct node_attr, attr); 602 return print_nodes_state(na->state, buf); 603 } 604 605 #define _NODE_ATTR(name, state) \ 606 { __ATTR(name, 0444, show_node_state, NULL), state } 607 608 static struct node_attr node_state_attr[] = { 609 _NODE_ATTR(possible, N_POSSIBLE), 610 _NODE_ATTR(online, N_ONLINE), 611 _NODE_ATTR(has_normal_memory, N_NORMAL_MEMORY), 612 _NODE_ATTR(has_cpu, N_CPU), 613 #ifdef CONFIG_HIGHMEM 614 _NODE_ATTR(has_high_memory, N_HIGH_MEMORY), 615 #endif 616 }; 617 618 static struct attribute *node_state_attrs[] = { 619 &node_state_attr[0].attr.attr, 620 &node_state_attr[1].attr.attr, 621 &node_state_attr[2].attr.attr, 622 &node_state_attr[3].attr.attr, 623 #ifdef CONFIG_HIGHMEM 624 &node_state_attr[4].attr.attr, 625 #endif 626 NULL 627 }; 628 629 static struct attribute_group memory_root_attr_group = { 630 .attrs = node_state_attrs, 631 }; 632 633 static const struct attribute_group *cpu_root_attr_groups[] = { 634 &memory_root_attr_group, 635 NULL, 636 }; 637 638 #define NODE_CALLBACK_PRI 2 /* lower than SLAB */ 639 static int __init register_node_type(void) 640 { 641 int ret; 642 643 BUILD_BUG_ON(ARRAY_SIZE(node_state_attr) != NR_NODE_STATES); 644 BUILD_BUG_ON(ARRAY_SIZE(node_state_attrs)-1 != NR_NODE_STATES); 645 646 ret = subsys_system_register(&node_subsys, cpu_root_attr_groups); 647 if (!ret) { 648 hotplug_memory_notifier(node_memory_callback, 649 NODE_CALLBACK_PRI); 650 } 651 652 /* 653 * Note: we're not going to unregister the node class if we fail 654 * to register the node state class attribute files. 655 */ 656 return ret; 657 } 658 postcore_initcall(register_node_type); 659