17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5ee88d2b9Skchow * Common Development and Distribution License (the "License"). 6ee88d2b9Skchow * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 22*23a80de1SStan Studzinski * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. 237c478bd9Sstevel@tonic-gate */ 247c478bd9Sstevel@tonic-gate 257c478bd9Sstevel@tonic-gate #include <sys/types.h> 267c478bd9Sstevel@tonic-gate #include <sys/param.h> 277c478bd9Sstevel@tonic-gate #include <sys/thread.h> 287c478bd9Sstevel@tonic-gate #include <sys/proc.h> 297c478bd9Sstevel@tonic-gate #include <sys/callb.h> 307c478bd9Sstevel@tonic-gate #include <sys/vnode.h> 317c478bd9Sstevel@tonic-gate #include <sys/debug.h> 327c478bd9Sstevel@tonic-gate #include <sys/systm.h> /* for bzero */ 337c478bd9Sstevel@tonic-gate #include <sys/memlist.h> 347c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 357c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 367c478bd9Sstevel@tonic-gate #include <sys/vmsystm.h> /* for NOMEMWAIT() */ 377c478bd9Sstevel@tonic-gate #include <sys/atomic.h> /* used to update kcage_freemem */ 387c478bd9Sstevel@tonic-gate #include <sys/kmem.h> /* for kmem_reap */ 397c478bd9Sstevel@tonic-gate #include <sys/errno.h> 407c478bd9Sstevel@tonic-gate #include <sys/mem_cage.h> 417c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h> 427c478bd9Sstevel@tonic-gate #include <vm/page.h> 437c478bd9Sstevel@tonic-gate #include <vm/hat.h> 44affbd3ccSkchow #include <vm/vm_dep.h> 457c478bd9Sstevel@tonic-gate #include <sys/mem_config.h> 467c478bd9Sstevel@tonic-gate #include <sys/lgrp.h> 475d07b933Sdp78419 #include <sys/rwlock.h> 48dc84a327Svb70745 #include <sys/cpupart.h> 497c478bd9Sstevel@tonic-gate 507c478bd9Sstevel@tonic-gate extern pri_t maxclsyspri; 517c478bd9Sstevel@tonic-gate 527c478bd9Sstevel@tonic-gate #ifdef DEBUG 537c478bd9Sstevel@tonic-gate #define KCAGE_STATS 547c478bd9Sstevel@tonic-gate #endif 557c478bd9Sstevel@tonic-gate 567c478bd9Sstevel@tonic-gate #ifdef KCAGE_STATS 577c478bd9Sstevel@tonic-gate 587c478bd9Sstevel@tonic-gate #define KCAGE_STATS_VERSION 9 /* can help report generators */ 597c478bd9Sstevel@tonic-gate #define KCAGE_STATS_NSCANS 256 /* depth of scan statistics buffer */ 607c478bd9Sstevel@tonic-gate 617c478bd9Sstevel@tonic-gate struct kcage_stats_scan { 627c478bd9Sstevel@tonic-gate /* managed by KCAGE_STAT_* macros */ 637c478bd9Sstevel@tonic-gate clock_t scan_lbolt; 647c478bd9Sstevel@tonic-gate uint_t scan_id; 657c478bd9Sstevel@tonic-gate 667c478bd9Sstevel@tonic-gate /* set in kcage_cageout() */ 677c478bd9Sstevel@tonic-gate uint_t kt_passes; 687c478bd9Sstevel@tonic-gate clock_t kt_ticks; 697c478bd9Sstevel@tonic-gate pgcnt_t kt_kcage_freemem_start; 707c478bd9Sstevel@tonic-gate pgcnt_t kt_kcage_freemem_end; 717c478bd9Sstevel@tonic-gate pgcnt_t kt_freemem_start; 727c478bd9Sstevel@tonic-gate pgcnt_t kt_freemem_end; 737c478bd9Sstevel@tonic-gate uint_t kt_examined; 747c478bd9Sstevel@tonic-gate uint_t kt_cantlock; 757c478bd9Sstevel@tonic-gate uint_t kt_gotone; 767c478bd9Sstevel@tonic-gate uint_t kt_gotonefree; 777c478bd9Sstevel@tonic-gate uint_t kt_skipshared; 787c478bd9Sstevel@tonic-gate uint_t kt_skiprefd; 797c478bd9Sstevel@tonic-gate uint_t kt_destroy; 807c478bd9Sstevel@tonic-gate 817c478bd9Sstevel@tonic-gate /* set in kcage_invalidate_page() */ 827c478bd9Sstevel@tonic-gate uint_t kip_reloclocked; 837c478bd9Sstevel@tonic-gate uint_t kip_relocmod; 847c478bd9Sstevel@tonic-gate uint_t kip_destroy; 857c478bd9Sstevel@tonic-gate uint_t kip_nomem; 867c478bd9Sstevel@tonic-gate uint_t kip_demotefailed; 877c478bd9Sstevel@tonic-gate 887c478bd9Sstevel@tonic-gate /* set in kcage_expand() */ 897c478bd9Sstevel@tonic-gate uint_t ke_wanted; 907c478bd9Sstevel@tonic-gate uint_t ke_examined; 917c478bd9Sstevel@tonic-gate uint_t ke_lefthole; 927c478bd9Sstevel@tonic-gate uint_t ke_gotone; 937c478bd9Sstevel@tonic-gate uint_t ke_gotonefree; 947c478bd9Sstevel@tonic-gate }; 957c478bd9Sstevel@tonic-gate 967c478bd9Sstevel@tonic-gate struct kcage_stats { 977c478bd9Sstevel@tonic-gate /* managed by KCAGE_STAT_* macros */ 987c478bd9Sstevel@tonic-gate uint_t version; 997c478bd9Sstevel@tonic-gate uint_t size; 1007c478bd9Sstevel@tonic-gate 1017c478bd9Sstevel@tonic-gate /* set in kcage_cageout */ 1027c478bd9Sstevel@tonic-gate uint_t kt_wakeups; 1037c478bd9Sstevel@tonic-gate uint_t kt_scans; 1047c478bd9Sstevel@tonic-gate uint_t kt_cageout_break; 1057c478bd9Sstevel@tonic-gate 1067c478bd9Sstevel@tonic-gate /* set in kcage_expand */ 1077c478bd9Sstevel@tonic-gate uint_t ke_calls; 1087c478bd9Sstevel@tonic-gate uint_t ke_nopfn; 1097c478bd9Sstevel@tonic-gate uint_t ke_nopaget; 1107c478bd9Sstevel@tonic-gate uint_t ke_isnoreloc; 1117c478bd9Sstevel@tonic-gate uint_t ke_deleting; 1127c478bd9Sstevel@tonic-gate uint_t ke_lowfreemem; 1137c478bd9Sstevel@tonic-gate uint_t ke_terminate; 1147c478bd9Sstevel@tonic-gate 1157c478bd9Sstevel@tonic-gate /* set in kcage_freemem_add() */ 1167c478bd9Sstevel@tonic-gate uint_t kfa_trottlewake; 1177c478bd9Sstevel@tonic-gate 1187c478bd9Sstevel@tonic-gate /* set in kcage_freemem_sub() */ 1197c478bd9Sstevel@tonic-gate uint_t kfs_cagewake; 1207c478bd9Sstevel@tonic-gate 1217c478bd9Sstevel@tonic-gate /* set in kcage_create_throttle */ 1227c478bd9Sstevel@tonic-gate uint_t kct_calls; 1237c478bd9Sstevel@tonic-gate uint_t kct_cageout; 1247c478bd9Sstevel@tonic-gate uint_t kct_critical; 1257c478bd9Sstevel@tonic-gate uint_t kct_exempt; 1267c478bd9Sstevel@tonic-gate uint_t kct_cagewake; 1277c478bd9Sstevel@tonic-gate uint_t kct_wait; 1287c478bd9Sstevel@tonic-gate uint_t kct_progress; 1297c478bd9Sstevel@tonic-gate uint_t kct_noprogress; 1307c478bd9Sstevel@tonic-gate uint_t kct_timeout; 1317c478bd9Sstevel@tonic-gate 1327c478bd9Sstevel@tonic-gate /* set in kcage_cageout_wakeup */ 1337c478bd9Sstevel@tonic-gate uint_t kcw_expandearly; 1347c478bd9Sstevel@tonic-gate 1357c478bd9Sstevel@tonic-gate /* managed by KCAGE_STAT_* macros */ 1367c478bd9Sstevel@tonic-gate uint_t scan_array_size; 1377c478bd9Sstevel@tonic-gate uint_t scan_index; 1387c478bd9Sstevel@tonic-gate struct kcage_stats_scan scans[KCAGE_STATS_NSCANS]; 1397c478bd9Sstevel@tonic-gate }; 1407c478bd9Sstevel@tonic-gate 1417c478bd9Sstevel@tonic-gate static struct kcage_stats kcage_stats; 1427c478bd9Sstevel@tonic-gate static struct kcage_stats_scan kcage_stats_scan_zero; 1437c478bd9Sstevel@tonic-gate 1447c478bd9Sstevel@tonic-gate /* 1457c478bd9Sstevel@tonic-gate * No real need for atomics here. For the most part the incs and sets are 1467c478bd9Sstevel@tonic-gate * done by the kernel cage thread. There are a few that are done by any 1477c478bd9Sstevel@tonic-gate * number of other threads. Those cases are noted by comments. 1487c478bd9Sstevel@tonic-gate */ 1497c478bd9Sstevel@tonic-gate #define KCAGE_STAT_INCR(m) kcage_stats.m++ 1507c478bd9Sstevel@tonic-gate 1517c478bd9Sstevel@tonic-gate #define KCAGE_STAT_NINCR(m, v) kcage_stats.m += (v) 1527c478bd9Sstevel@tonic-gate 1537c478bd9Sstevel@tonic-gate #define KCAGE_STAT_INCR_SCAN(m) \ 1547c478bd9Sstevel@tonic-gate KCAGE_STAT_INCR(scans[kcage_stats.scan_index].m) 1557c478bd9Sstevel@tonic-gate 1567c478bd9Sstevel@tonic-gate #define KCAGE_STAT_NINCR_SCAN(m, v) \ 1577c478bd9Sstevel@tonic-gate KCAGE_STAT_NINCR(scans[kcage_stats.scan_index].m, v) 1587c478bd9Sstevel@tonic-gate 1597c478bd9Sstevel@tonic-gate #define KCAGE_STAT_SET(m, v) kcage_stats.m = (v) 1607c478bd9Sstevel@tonic-gate 1617c478bd9Sstevel@tonic-gate #define KCAGE_STAT_SETZ(m, v) \ 1627c478bd9Sstevel@tonic-gate if (kcage_stats.m == 0) kcage_stats.m = (v) 1637c478bd9Sstevel@tonic-gate 1647c478bd9Sstevel@tonic-gate #define KCAGE_STAT_SET_SCAN(m, v) \ 1657c478bd9Sstevel@tonic-gate KCAGE_STAT_SET(scans[kcage_stats.scan_index].m, v) 1667c478bd9Sstevel@tonic-gate 1677c478bd9Sstevel@tonic-gate #define KCAGE_STAT_SETZ_SCAN(m, v) \ 1687c478bd9Sstevel@tonic-gate KCAGE_STAT_SETZ(scans[kcage_stats.scan_index].m, v) 1697c478bd9Sstevel@tonic-gate 1707c478bd9Sstevel@tonic-gate #define KCAGE_STAT_INC_SCAN_INDEX \ 171d3d50737SRafael Vanoni KCAGE_STAT_SET_SCAN(scan_lbolt, ddi_get_lbolt()); \ 1727c478bd9Sstevel@tonic-gate KCAGE_STAT_SET_SCAN(scan_id, kcage_stats.scan_index); \ 1737c478bd9Sstevel@tonic-gate kcage_stats.scan_index = \ 1747c478bd9Sstevel@tonic-gate (kcage_stats.scan_index + 1) % KCAGE_STATS_NSCANS; \ 1757c478bd9Sstevel@tonic-gate kcage_stats.scans[kcage_stats.scan_index] = kcage_stats_scan_zero 1767c478bd9Sstevel@tonic-gate 1777c478bd9Sstevel@tonic-gate #define KCAGE_STAT_INIT_SCAN_INDEX \ 1787c478bd9Sstevel@tonic-gate kcage_stats.version = KCAGE_STATS_VERSION; \ 1797c478bd9Sstevel@tonic-gate kcage_stats.size = sizeof (kcage_stats); \ 1807c478bd9Sstevel@tonic-gate kcage_stats.scan_array_size = KCAGE_STATS_NSCANS; \ 1817c478bd9Sstevel@tonic-gate kcage_stats.scan_index = 0 1827c478bd9Sstevel@tonic-gate 1837c478bd9Sstevel@tonic-gate #else /* KCAGE_STATS */ 1847c478bd9Sstevel@tonic-gate 1857c478bd9Sstevel@tonic-gate #define KCAGE_STAT_INCR(v) 1867c478bd9Sstevel@tonic-gate #define KCAGE_STAT_NINCR(m, v) 1877c478bd9Sstevel@tonic-gate #define KCAGE_STAT_INCR_SCAN(v) 1887c478bd9Sstevel@tonic-gate #define KCAGE_STAT_NINCR_SCAN(m, v) 1897c478bd9Sstevel@tonic-gate #define KCAGE_STAT_SET(m, v) 1907c478bd9Sstevel@tonic-gate #define KCAGE_STAT_SETZ(m, v) 1917c478bd9Sstevel@tonic-gate #define KCAGE_STAT_SET_SCAN(m, v) 1927c478bd9Sstevel@tonic-gate #define KCAGE_STAT_SETZ_SCAN(m, v) 1937c478bd9Sstevel@tonic-gate #define KCAGE_STAT_INC_SCAN_INDEX 1947c478bd9Sstevel@tonic-gate #define KCAGE_STAT_INIT_SCAN_INDEX 1957c478bd9Sstevel@tonic-gate 1967c478bd9Sstevel@tonic-gate #endif /* KCAGE_STATS */ 1977c478bd9Sstevel@tonic-gate 1987c478bd9Sstevel@tonic-gate static kmutex_t kcage_throttle_mutex; /* protects kcage_throttle_cv */ 1997c478bd9Sstevel@tonic-gate static kcondvar_t kcage_throttle_cv; 2007c478bd9Sstevel@tonic-gate 2017c478bd9Sstevel@tonic-gate static kmutex_t kcage_cageout_mutex; /* protects cv and ready flag */ 2027c478bd9Sstevel@tonic-gate static kcondvar_t kcage_cageout_cv; /* cageout thread naps here */ 2037c478bd9Sstevel@tonic-gate static int kcage_cageout_ready; /* nonzero when cageout thread ready */ 2047c478bd9Sstevel@tonic-gate kthread_id_t kcage_cageout_thread; /* to aid debugging */ 2057c478bd9Sstevel@tonic-gate 2065d07b933Sdp78419 static krwlock_t kcage_range_rwlock; /* protects kcage_glist elements */ 2077c478bd9Sstevel@tonic-gate 2087c478bd9Sstevel@tonic-gate /* 2097c478bd9Sstevel@tonic-gate * Cage expansion happens within a range. 2107c478bd9Sstevel@tonic-gate */ 2117c478bd9Sstevel@tonic-gate struct kcage_glist { 2127c478bd9Sstevel@tonic-gate struct kcage_glist *next; 2137c478bd9Sstevel@tonic-gate pfn_t base; 2147c478bd9Sstevel@tonic-gate pfn_t lim; 2157c478bd9Sstevel@tonic-gate pfn_t curr; 2167c478bd9Sstevel@tonic-gate int decr; 2177c478bd9Sstevel@tonic-gate }; 2187c478bd9Sstevel@tonic-gate 2197c478bd9Sstevel@tonic-gate static struct kcage_glist *kcage_glist; 2207c478bd9Sstevel@tonic-gate static struct kcage_glist *kcage_current_glist; 2217c478bd9Sstevel@tonic-gate 2227c478bd9Sstevel@tonic-gate /* 2237c478bd9Sstevel@tonic-gate * The firstfree element is provided so that kmem_alloc can be avoided 2247c478bd9Sstevel@tonic-gate * until that cage has somewhere to go. This is not currently a problem 2257c478bd9Sstevel@tonic-gate * as early kmem_alloc's use BOP_ALLOC instead of page_create_va. 2267c478bd9Sstevel@tonic-gate */ 22785f58038Sdp78419 static vmem_t *kcage_arena; 2287c478bd9Sstevel@tonic-gate static struct kcage_glist kcage_glist_firstfree; 2297c478bd9Sstevel@tonic-gate static struct kcage_glist *kcage_glist_freelist = &kcage_glist_firstfree; 2307c478bd9Sstevel@tonic-gate 2317c478bd9Sstevel@tonic-gate /* 2327c478bd9Sstevel@tonic-gate * Miscellaneous forward references 2337c478bd9Sstevel@tonic-gate */ 2347c478bd9Sstevel@tonic-gate static struct kcage_glist *kcage_glist_alloc(void); 2357c478bd9Sstevel@tonic-gate static int kcage_glist_delete(pfn_t, pfn_t, struct kcage_glist **); 2367c478bd9Sstevel@tonic-gate static void kcage_cageout(void); 2377c478bd9Sstevel@tonic-gate static int kcage_invalidate_page(page_t *, pgcnt_t *); 2387c478bd9Sstevel@tonic-gate static int kcage_setnoreloc_pages(page_t *, se_t); 23985f58038Sdp78419 static int kcage_range_add_internal(pfn_t base, pgcnt_t npgs, kcage_dir_t); 24085f58038Sdp78419 static void kcage_init(pgcnt_t preferred_size); 24185f58038Sdp78419 static int kcage_range_delete_internal(pfn_t base, pgcnt_t npgs); 2427c478bd9Sstevel@tonic-gate 2437c478bd9Sstevel@tonic-gate /* 2447c478bd9Sstevel@tonic-gate * Kernel Memory Cage counters and thresholds. 2457c478bd9Sstevel@tonic-gate */ 2467c478bd9Sstevel@tonic-gate int kcage_on = 0; 2477c478bd9Sstevel@tonic-gate pgcnt_t kcage_freemem; 2487c478bd9Sstevel@tonic-gate pgcnt_t kcage_needfree; 2497c478bd9Sstevel@tonic-gate pgcnt_t kcage_lotsfree; 2507c478bd9Sstevel@tonic-gate pgcnt_t kcage_desfree; 2517c478bd9Sstevel@tonic-gate pgcnt_t kcage_minfree; 2527c478bd9Sstevel@tonic-gate pgcnt_t kcage_throttlefree; 253bc203165Svb70745 pgcnt_t kcage_reserve; 2547c478bd9Sstevel@tonic-gate int kcage_maxwait = 10; /* in seconds */ 2557c478bd9Sstevel@tonic-gate 2567c478bd9Sstevel@tonic-gate /* when we use lp for kmem we start the cage at a higher initial value */ 2577c478bd9Sstevel@tonic-gate pgcnt_t kcage_kmemlp_mincage; 2587c478bd9Sstevel@tonic-gate 2597c478bd9Sstevel@tonic-gate #ifdef DEBUG 2607c478bd9Sstevel@tonic-gate pgcnt_t kcage_pagets; 2617c478bd9Sstevel@tonic-gate #define KCAGEPAGETS_INC() kcage_pagets++ 2627c478bd9Sstevel@tonic-gate #else 2637c478bd9Sstevel@tonic-gate #define KCAGEPAGETS_INC() 2647c478bd9Sstevel@tonic-gate #endif 2657c478bd9Sstevel@tonic-gate 2668b464eb8Smec /* kstats to export what pages are currently caged */ 2678b464eb8Smec kmutex_t kcage_kstat_lock; 2688b464eb8Smec static int kcage_kstat_update(kstat_t *ksp, int rw); 2698b464eb8Smec static int kcage_kstat_snapshot(kstat_t *ksp, void *buf, int rw); 2708b464eb8Smec 2717c478bd9Sstevel@tonic-gate /* 2727c478bd9Sstevel@tonic-gate * Startup and Dynamic Reconfiguration interfaces. 2737c478bd9Sstevel@tonic-gate * kcage_range_add() 2747c478bd9Sstevel@tonic-gate * kcage_range_del() 27585f58038Sdp78419 * kcage_range_delete_post_mem_del() 27685f58038Sdp78419 * kcage_range_init() 2777c478bd9Sstevel@tonic-gate * kcage_set_thresholds() 2787c478bd9Sstevel@tonic-gate */ 2797c478bd9Sstevel@tonic-gate 2805d07b933Sdp78419 /* 2817c478bd9Sstevel@tonic-gate * Called from page_get_contig_pages to get the approximate kcage pfn range 2827c478bd9Sstevel@tonic-gate * for exclusion from search for contiguous pages. This routine is called 2837c478bd9Sstevel@tonic-gate * without kcage_range lock (kcage routines can call page_get_contig_pages 2847c478bd9Sstevel@tonic-gate * through page_relocate) and with the assumption, based on kcage_range_add, 2857c478bd9Sstevel@tonic-gate * that kcage_current_glist always contain a valid pointer. 2867c478bd9Sstevel@tonic-gate */ 2877c478bd9Sstevel@tonic-gate 2887c478bd9Sstevel@tonic-gate int 2897c478bd9Sstevel@tonic-gate kcage_current_pfn(pfn_t *pfncur) 2907c478bd9Sstevel@tonic-gate { 2917c478bd9Sstevel@tonic-gate struct kcage_glist *lp = kcage_current_glist; 2927c478bd9Sstevel@tonic-gate 2937c478bd9Sstevel@tonic-gate ASSERT(kcage_on); 2947c478bd9Sstevel@tonic-gate 2957c478bd9Sstevel@tonic-gate ASSERT(lp != NULL); 2967c478bd9Sstevel@tonic-gate 2977c478bd9Sstevel@tonic-gate *pfncur = lp->curr; 2987c478bd9Sstevel@tonic-gate 2997c478bd9Sstevel@tonic-gate return (lp->decr); 3007c478bd9Sstevel@tonic-gate } 3017c478bd9Sstevel@tonic-gate 3025d07b933Sdp78419 /* 3035d07b933Sdp78419 * Called from vm_pagelist.c during coalesce to find kernel cage regions 3045d07b933Sdp78419 * within an mnode. Looks for the lowest range between lo and hi. 3055d07b933Sdp78419 * 3065d07b933Sdp78419 * Kernel cage memory is defined between kcage_glist and kcage_current_glist. 3075d07b933Sdp78419 * Non-cage memory is defined between kcage_current_glist and list end. 3085d07b933Sdp78419 * 3095d07b933Sdp78419 * If incage is set, returns the lowest kcage range. Otherwise returns lowest 3105d07b933Sdp78419 * non-cage range. 3115d07b933Sdp78419 * 3125d07b933Sdp78419 * Returns zero on success and nlo, nhi: 3135d07b933Sdp78419 * lo <= nlo < nhi <= hi 3145d07b933Sdp78419 * Returns non-zero if no overlapping range is found. 3155d07b933Sdp78419 */ 3165d07b933Sdp78419 int 3175d07b933Sdp78419 kcage_next_range(int incage, pfn_t lo, pfn_t hi, 3185d07b933Sdp78419 pfn_t *nlo, pfn_t *nhi) 3195d07b933Sdp78419 { 3205d07b933Sdp78419 struct kcage_glist *lp; 3215d07b933Sdp78419 pfn_t tlo = hi; 3225d07b933Sdp78419 pfn_t thi = hi; 3235d07b933Sdp78419 3245d07b933Sdp78419 ASSERT(lo <= hi); 3255d07b933Sdp78419 3265d07b933Sdp78419 /* 3275d07b933Sdp78419 * Reader lock protects the list, but kcage_get_pfn 3285d07b933Sdp78419 * running concurrently may advance kcage_current_glist 3295d07b933Sdp78419 * and also update kcage_current_glist->curr. Page 3305d07b933Sdp78419 * coalesce can handle this race condition. 3315d07b933Sdp78419 */ 3325d07b933Sdp78419 rw_enter(&kcage_range_rwlock, RW_READER); 3335d07b933Sdp78419 3345d07b933Sdp78419 for (lp = incage ? kcage_glist : kcage_current_glist; 3355d07b933Sdp78419 lp != NULL; lp = lp->next) { 3365d07b933Sdp78419 3375d07b933Sdp78419 pfn_t klo, khi; 3385d07b933Sdp78419 3395d07b933Sdp78419 /* find the range limits in this element */ 3405d07b933Sdp78419 if ((incage && lp->decr) || (!incage && !lp->decr)) { 3415d07b933Sdp78419 klo = lp->curr; 3425d07b933Sdp78419 khi = lp->lim; 3435d07b933Sdp78419 } else { 3445d07b933Sdp78419 klo = lp->base; 3455d07b933Sdp78419 khi = lp->curr; 3465d07b933Sdp78419 } 3475d07b933Sdp78419 3485d07b933Sdp78419 /* handle overlap */ 3495d07b933Sdp78419 if (klo < tlo && klo < khi && lo < khi && klo < hi) { 3505d07b933Sdp78419 tlo = MAX(lo, klo); 3515d07b933Sdp78419 thi = MIN(hi, khi); 3525d07b933Sdp78419 if (tlo == lo) 3535d07b933Sdp78419 break; 3545d07b933Sdp78419 } 3555d07b933Sdp78419 3565d07b933Sdp78419 /* check end of kcage */ 3575d07b933Sdp78419 if (incage && lp == kcage_current_glist) { 3585d07b933Sdp78419 break; 3595d07b933Sdp78419 } 3605d07b933Sdp78419 } 3615d07b933Sdp78419 3625d07b933Sdp78419 rw_exit(&kcage_range_rwlock); 3635d07b933Sdp78419 3645d07b933Sdp78419 /* return non-zero if no overlapping range found */ 3655d07b933Sdp78419 if (tlo == thi) 3665d07b933Sdp78419 return (1); 3675d07b933Sdp78419 3685d07b933Sdp78419 ASSERT(lo <= tlo && tlo < thi && thi <= hi); 3695d07b933Sdp78419 3705d07b933Sdp78419 /* return overlapping range */ 3715d07b933Sdp78419 *nlo = tlo; 3725d07b933Sdp78419 *nhi = thi; 3735d07b933Sdp78419 return (0); 3745d07b933Sdp78419 } 3755d07b933Sdp78419 37685f58038Sdp78419 void 37785f58038Sdp78419 kcage_range_init(struct memlist *ml, kcage_dir_t d, pgcnt_t preferred_size) 3787c478bd9Sstevel@tonic-gate { 3797c478bd9Sstevel@tonic-gate int ret = 0; 3807c478bd9Sstevel@tonic-gate 38185f58038Sdp78419 ASSERT(kcage_arena == NULL); 38285f58038Sdp78419 kcage_arena = vmem_create("kcage_arena", NULL, 0, sizeof (uint64_t), 38385f58038Sdp78419 segkmem_alloc, segkmem_free, heap_arena, 0, VM_SLEEP); 38485f58038Sdp78419 ASSERT(kcage_arena != NULL); 3857c478bd9Sstevel@tonic-gate 38685f58038Sdp78419 if (d == KCAGE_DOWN) { 38756f33205SJonathan Adams while (ml->ml_next != NULL) 38856f33205SJonathan Adams ml = ml->ml_next; 3897c478bd9Sstevel@tonic-gate } 3907c478bd9Sstevel@tonic-gate 39185f58038Sdp78419 rw_enter(&kcage_range_rwlock, RW_WRITER); 3928c754b1bSdp78419 39385f58038Sdp78419 while (ml != NULL) { 39456f33205SJonathan Adams ret = kcage_range_add_internal(btop(ml->ml_address), 39556f33205SJonathan Adams btop(ml->ml_size), d); 39685f58038Sdp78419 if (ret) 39785f58038Sdp78419 panic("kcage_range_add_internal failed: " 398903a11ebSrh87107 "ml=%p, ret=0x%x\n", (void *)ml, ret); 39985f58038Sdp78419 40056f33205SJonathan Adams ml = (d == KCAGE_DOWN ? ml->ml_prev : ml->ml_next); 4017c478bd9Sstevel@tonic-gate } 4027c478bd9Sstevel@tonic-gate 40385f58038Sdp78419 rw_exit(&kcage_range_rwlock); 40485f58038Sdp78419 40585f58038Sdp78419 if (ret == 0) 40685f58038Sdp78419 kcage_init(preferred_size); 4077c478bd9Sstevel@tonic-gate } 4087c478bd9Sstevel@tonic-gate 4097c478bd9Sstevel@tonic-gate /* 4107c478bd9Sstevel@tonic-gate * Third arg controls direction of growth: 0: increasing pfns, 4117c478bd9Sstevel@tonic-gate * 1: decreasing. 4127c478bd9Sstevel@tonic-gate */ 41385f58038Sdp78419 static int 41485f58038Sdp78419 kcage_range_add_internal(pfn_t base, pgcnt_t npgs, kcage_dir_t d) 4157c478bd9Sstevel@tonic-gate { 4167c478bd9Sstevel@tonic-gate struct kcage_glist *new, **lpp; 4177c478bd9Sstevel@tonic-gate pfn_t lim; 4187c478bd9Sstevel@tonic-gate 41985f58038Sdp78419 ASSERT(rw_write_held(&kcage_range_rwlock)); 4207c478bd9Sstevel@tonic-gate 4217c478bd9Sstevel@tonic-gate ASSERT(npgs != 0); 4227c478bd9Sstevel@tonic-gate if (npgs == 0) 4237c478bd9Sstevel@tonic-gate return (EINVAL); 4247c478bd9Sstevel@tonic-gate 4257c478bd9Sstevel@tonic-gate lim = base + npgs; 4267c478bd9Sstevel@tonic-gate 4277c478bd9Sstevel@tonic-gate ASSERT(lim > base); 4287c478bd9Sstevel@tonic-gate if (lim <= base) 4297c478bd9Sstevel@tonic-gate return (EINVAL); 4307c478bd9Sstevel@tonic-gate 4317c478bd9Sstevel@tonic-gate new = kcage_glist_alloc(); 4327c478bd9Sstevel@tonic-gate if (new == NULL) { 4337c478bd9Sstevel@tonic-gate return (ENOMEM); 4347c478bd9Sstevel@tonic-gate } 4357c478bd9Sstevel@tonic-gate 4367c478bd9Sstevel@tonic-gate new->base = base; 4377c478bd9Sstevel@tonic-gate new->lim = lim; 43885f58038Sdp78419 new->decr = (d == KCAGE_DOWN); 4397c478bd9Sstevel@tonic-gate if (new->decr != 0) 4407c478bd9Sstevel@tonic-gate new->curr = new->lim; 4417c478bd9Sstevel@tonic-gate else 4427c478bd9Sstevel@tonic-gate new->curr = new->base; 4437c478bd9Sstevel@tonic-gate /* 4447c478bd9Sstevel@tonic-gate * Any overlapping existing ranges are removed by deleting 4457c478bd9Sstevel@tonic-gate * from the new list as we search for the tail. 4467c478bd9Sstevel@tonic-gate */ 4477c478bd9Sstevel@tonic-gate lpp = &kcage_glist; 4487c478bd9Sstevel@tonic-gate while (*lpp != NULL) { 4497c478bd9Sstevel@tonic-gate int ret; 4507c478bd9Sstevel@tonic-gate ret = kcage_glist_delete((*lpp)->base, (*lpp)->lim, &new); 4517c478bd9Sstevel@tonic-gate if (ret != 0) 4527c478bd9Sstevel@tonic-gate return (ret); 4537c478bd9Sstevel@tonic-gate lpp = &(*lpp)->next; 4547c478bd9Sstevel@tonic-gate } 4557c478bd9Sstevel@tonic-gate 4567c478bd9Sstevel@tonic-gate *lpp = new; 4577c478bd9Sstevel@tonic-gate 4587c478bd9Sstevel@tonic-gate if (kcage_current_glist == NULL) { 4597c478bd9Sstevel@tonic-gate kcage_current_glist = kcage_glist; 4607c478bd9Sstevel@tonic-gate } 4617c478bd9Sstevel@tonic-gate 4627c478bd9Sstevel@tonic-gate return (0); 4637c478bd9Sstevel@tonic-gate } 4647c478bd9Sstevel@tonic-gate 4656b990117Sdm120769 int 46685f58038Sdp78419 kcage_range_add(pfn_t base, pgcnt_t npgs, kcage_dir_t d) 46785f58038Sdp78419 { 46885f58038Sdp78419 int ret; 46985f58038Sdp78419 47085f58038Sdp78419 rw_enter(&kcage_range_rwlock, RW_WRITER); 47185f58038Sdp78419 ret = kcage_range_add_internal(base, npgs, d); 47285f58038Sdp78419 rw_exit(&kcage_range_rwlock); 47385f58038Sdp78419 return (ret); 47485f58038Sdp78419 } 47585f58038Sdp78419 47685f58038Sdp78419 /* 47785f58038Sdp78419 * Calls to add and delete must be protected by kcage_range_rwlock 47885f58038Sdp78419 */ 47985f58038Sdp78419 static int 48085f58038Sdp78419 kcage_range_delete_internal(pfn_t base, pgcnt_t npgs) 4817c478bd9Sstevel@tonic-gate { 4827c478bd9Sstevel@tonic-gate struct kcage_glist *lp; 4837c478bd9Sstevel@tonic-gate pfn_t lim; 4847c478bd9Sstevel@tonic-gate 48585f58038Sdp78419 ASSERT(rw_write_held(&kcage_range_rwlock)); 4867c478bd9Sstevel@tonic-gate 4877c478bd9Sstevel@tonic-gate ASSERT(npgs != 0); 4887c478bd9Sstevel@tonic-gate if (npgs == 0) 4897c478bd9Sstevel@tonic-gate return (EINVAL); 4907c478bd9Sstevel@tonic-gate 4917c478bd9Sstevel@tonic-gate lim = base + npgs; 4927c478bd9Sstevel@tonic-gate 4937c478bd9Sstevel@tonic-gate ASSERT(lim > base); 4947c478bd9Sstevel@tonic-gate if (lim <= base) 4957c478bd9Sstevel@tonic-gate return (EINVAL); 4967c478bd9Sstevel@tonic-gate 4977c478bd9Sstevel@tonic-gate /* 4987c478bd9Sstevel@tonic-gate * Check if the delete is OK first as a number of elements 4997c478bd9Sstevel@tonic-gate * might be involved and it will be difficult to go 5007c478bd9Sstevel@tonic-gate * back and undo (can't just add the range back in). 5017c478bd9Sstevel@tonic-gate */ 5027c478bd9Sstevel@tonic-gate for (lp = kcage_glist; lp != NULL; lp = lp->next) { 5037c478bd9Sstevel@tonic-gate /* 5047c478bd9Sstevel@tonic-gate * If there have been no pages allocated from this 5057c478bd9Sstevel@tonic-gate * element, we don't need to check it. 5067c478bd9Sstevel@tonic-gate */ 5077c478bd9Sstevel@tonic-gate if ((lp->decr == 0 && lp->curr == lp->base) || 5087c478bd9Sstevel@tonic-gate (lp->decr != 0 && lp->curr == lp->lim)) 5097c478bd9Sstevel@tonic-gate continue; 5107c478bd9Sstevel@tonic-gate /* 5117c478bd9Sstevel@tonic-gate * If the element does not overlap, its OK. 5127c478bd9Sstevel@tonic-gate */ 5137c478bd9Sstevel@tonic-gate if (base >= lp->lim || lim <= lp->base) 5147c478bd9Sstevel@tonic-gate continue; 5157c478bd9Sstevel@tonic-gate /* 5167c478bd9Sstevel@tonic-gate * Overlapping element: Does the range to be deleted 5177c478bd9Sstevel@tonic-gate * overlap the area already used? If so fail. 5187c478bd9Sstevel@tonic-gate */ 5197c478bd9Sstevel@tonic-gate if (lp->decr == 0 && base < lp->curr && lim >= lp->base) { 5207c478bd9Sstevel@tonic-gate return (EBUSY); 5217c478bd9Sstevel@tonic-gate } 5227c478bd9Sstevel@tonic-gate if (lp->decr != 0 && base < lp->lim && lim >= lp->curr) { 5237c478bd9Sstevel@tonic-gate return (EBUSY); 5247c478bd9Sstevel@tonic-gate } 5257c478bd9Sstevel@tonic-gate } 5267c478bd9Sstevel@tonic-gate return (kcage_glist_delete(base, lim, &kcage_glist)); 5277c478bd9Sstevel@tonic-gate } 5287c478bd9Sstevel@tonic-gate 52985f58038Sdp78419 int 53085f58038Sdp78419 kcage_range_delete(pfn_t base, pgcnt_t npgs) 53185f58038Sdp78419 { 53285f58038Sdp78419 int ret; 53385f58038Sdp78419 53485f58038Sdp78419 rw_enter(&kcage_range_rwlock, RW_WRITER); 53585f58038Sdp78419 ret = kcage_range_delete_internal(base, npgs); 53685f58038Sdp78419 rw_exit(&kcage_range_rwlock); 53785f58038Sdp78419 return (ret); 53885f58038Sdp78419 } 53985f58038Sdp78419 5407c478bd9Sstevel@tonic-gate /* 54185f58038Sdp78419 * Calls to add and delete must be protected by kcage_range_rwlock. 5427c478bd9Sstevel@tonic-gate * This routine gets called after successful Solaris memory 5437c478bd9Sstevel@tonic-gate * delete operation from DR post memory delete routines. 5447c478bd9Sstevel@tonic-gate */ 54585f58038Sdp78419 static int 54685f58038Sdp78419 kcage_range_delete_post_mem_del_internal(pfn_t base, pgcnt_t npgs) 5477c478bd9Sstevel@tonic-gate { 5487c478bd9Sstevel@tonic-gate pfn_t lim; 5497c478bd9Sstevel@tonic-gate 55085f58038Sdp78419 ASSERT(rw_write_held(&kcage_range_rwlock)); 5517c478bd9Sstevel@tonic-gate 5527c478bd9Sstevel@tonic-gate ASSERT(npgs != 0); 5537c478bd9Sstevel@tonic-gate if (npgs == 0) 5547c478bd9Sstevel@tonic-gate return (EINVAL); 5557c478bd9Sstevel@tonic-gate 5567c478bd9Sstevel@tonic-gate lim = base + npgs; 5577c478bd9Sstevel@tonic-gate 5587c478bd9Sstevel@tonic-gate ASSERT(lim > base); 5597c478bd9Sstevel@tonic-gate if (lim <= base) 5607c478bd9Sstevel@tonic-gate return (EINVAL); 5617c478bd9Sstevel@tonic-gate 5627c478bd9Sstevel@tonic-gate return (kcage_glist_delete(base, lim, &kcage_glist)); 5637c478bd9Sstevel@tonic-gate } 5647c478bd9Sstevel@tonic-gate 56585f58038Sdp78419 int 56685f58038Sdp78419 kcage_range_delete_post_mem_del(pfn_t base, pgcnt_t npgs) 56785f58038Sdp78419 { 56885f58038Sdp78419 int ret; 56985f58038Sdp78419 57085f58038Sdp78419 rw_enter(&kcage_range_rwlock, RW_WRITER); 57185f58038Sdp78419 ret = kcage_range_delete_post_mem_del_internal(base, npgs); 57285f58038Sdp78419 rw_exit(&kcage_range_rwlock); 57385f58038Sdp78419 return (ret); 57485f58038Sdp78419 } 57585f58038Sdp78419 5767c478bd9Sstevel@tonic-gate /* 5777c478bd9Sstevel@tonic-gate * No locking is required here as the whole operation is covered 57885f58038Sdp78419 * by kcage_range_rwlock writer lock. 5797c478bd9Sstevel@tonic-gate */ 5807c478bd9Sstevel@tonic-gate static struct kcage_glist * 5817c478bd9Sstevel@tonic-gate kcage_glist_alloc(void) 5827c478bd9Sstevel@tonic-gate { 5837c478bd9Sstevel@tonic-gate struct kcage_glist *new; 5847c478bd9Sstevel@tonic-gate 5857c478bd9Sstevel@tonic-gate if ((new = kcage_glist_freelist) != NULL) { 5867c478bd9Sstevel@tonic-gate kcage_glist_freelist = new->next; 5875cc9da9eSVijay Balakrishna, SG-RPE } else if (kernel_cage_enable) { 58885f58038Sdp78419 new = vmem_alloc(kcage_arena, sizeof (*new), VM_NOSLEEP); 5895cc9da9eSVijay Balakrishna, SG-RPE } else { 5905cc9da9eSVijay Balakrishna, SG-RPE /* 5915cc9da9eSVijay Balakrishna, SG-RPE * On DR supported platforms we allow memory add 5925cc9da9eSVijay Balakrishna, SG-RPE * even when kernel cage is disabled. "kcage_arena" is 5935cc9da9eSVijay Balakrishna, SG-RPE * created only when kernel cage is enabled. 5945cc9da9eSVijay Balakrishna, SG-RPE */ 5955cc9da9eSVijay Balakrishna, SG-RPE new = kmem_zalloc(sizeof (*new), KM_NOSLEEP); 5966b990117Sdm120769 } 59785f58038Sdp78419 59885f58038Sdp78419 if (new != NULL) 59985f58038Sdp78419 bzero(new, sizeof (*new)); 60085f58038Sdp78419 6017c478bd9Sstevel@tonic-gate return (new); 6027c478bd9Sstevel@tonic-gate } 6037c478bd9Sstevel@tonic-gate 6047c478bd9Sstevel@tonic-gate static void 6057c478bd9Sstevel@tonic-gate kcage_glist_free(struct kcage_glist *lp) 6067c478bd9Sstevel@tonic-gate { 6077c478bd9Sstevel@tonic-gate lp->next = kcage_glist_freelist; 6087c478bd9Sstevel@tonic-gate kcage_glist_freelist = lp; 6097c478bd9Sstevel@tonic-gate } 6107c478bd9Sstevel@tonic-gate 6117c478bd9Sstevel@tonic-gate static int 6127c478bd9Sstevel@tonic-gate kcage_glist_delete(pfn_t base, pfn_t lim, struct kcage_glist **lpp) 6137c478bd9Sstevel@tonic-gate { 6147c478bd9Sstevel@tonic-gate struct kcage_glist *lp, *prev = *lpp; 6157c478bd9Sstevel@tonic-gate 6167c478bd9Sstevel@tonic-gate while ((lp = *lpp) != NULL) { 6177c478bd9Sstevel@tonic-gate if (lim > lp->base && base < lp->lim) { 6187c478bd9Sstevel@tonic-gate /* The delete range overlaps this element. */ 6197c478bd9Sstevel@tonic-gate if (base <= lp->base && lim >= lp->lim) { 6207c478bd9Sstevel@tonic-gate /* Delete whole element. */ 6217c478bd9Sstevel@tonic-gate *lpp = lp->next; 6227c478bd9Sstevel@tonic-gate if (lp == kcage_current_glist) { 6237c478bd9Sstevel@tonic-gate /* This can never happen. */ 6247c478bd9Sstevel@tonic-gate ASSERT(kcage_current_glist != prev); 6257c478bd9Sstevel@tonic-gate kcage_current_glist = prev; 6267c478bd9Sstevel@tonic-gate } 6277c478bd9Sstevel@tonic-gate kcage_glist_free(lp); 6287c478bd9Sstevel@tonic-gate continue; 6297c478bd9Sstevel@tonic-gate } 6307c478bd9Sstevel@tonic-gate 6317c478bd9Sstevel@tonic-gate /* Partial delete. */ 6327c478bd9Sstevel@tonic-gate if (base > lp->base && lim < lp->lim) { 6337c478bd9Sstevel@tonic-gate struct kcage_glist *new; 6347c478bd9Sstevel@tonic-gate 6357c478bd9Sstevel@tonic-gate /* 6367c478bd9Sstevel@tonic-gate * Remove a section from the middle, 6377c478bd9Sstevel@tonic-gate * need to allocate a new element. 6387c478bd9Sstevel@tonic-gate */ 6397c478bd9Sstevel@tonic-gate new = kcage_glist_alloc(); 6407c478bd9Sstevel@tonic-gate if (new == NULL) { 6417c478bd9Sstevel@tonic-gate return (ENOMEM); 6427c478bd9Sstevel@tonic-gate } 6437c478bd9Sstevel@tonic-gate 6447c478bd9Sstevel@tonic-gate /* 6457c478bd9Sstevel@tonic-gate * Tranfser unused range to new. 6467c478bd9Sstevel@tonic-gate * Edit lp in place to preserve 6477c478bd9Sstevel@tonic-gate * kcage_current_glist. 6487c478bd9Sstevel@tonic-gate */ 6497c478bd9Sstevel@tonic-gate new->decr = lp->decr; 6507c478bd9Sstevel@tonic-gate if (new->decr != 0) { 6517c478bd9Sstevel@tonic-gate new->base = lp->base; 6527c478bd9Sstevel@tonic-gate new->lim = base; 6537c478bd9Sstevel@tonic-gate new->curr = base; 6547c478bd9Sstevel@tonic-gate 6557c478bd9Sstevel@tonic-gate lp->base = lim; 6567c478bd9Sstevel@tonic-gate } else { 6577c478bd9Sstevel@tonic-gate new->base = lim; 6587c478bd9Sstevel@tonic-gate new->lim = lp->lim; 6597c478bd9Sstevel@tonic-gate new->curr = new->base; 6607c478bd9Sstevel@tonic-gate 6617c478bd9Sstevel@tonic-gate lp->lim = base; 6627c478bd9Sstevel@tonic-gate } 6637c478bd9Sstevel@tonic-gate 6647c478bd9Sstevel@tonic-gate /* Insert new. */ 6657c478bd9Sstevel@tonic-gate new->next = lp->next; 6667c478bd9Sstevel@tonic-gate lp->next = new; 6677c478bd9Sstevel@tonic-gate lpp = &lp->next; 6687c478bd9Sstevel@tonic-gate } else { 6697c478bd9Sstevel@tonic-gate /* Delete part of current block. */ 6707c478bd9Sstevel@tonic-gate if (base > lp->base) { 6717c478bd9Sstevel@tonic-gate ASSERT(lim >= lp->lim); 6727c478bd9Sstevel@tonic-gate ASSERT(base < lp->lim); 6737c478bd9Sstevel@tonic-gate if (lp->decr != 0 && 6747c478bd9Sstevel@tonic-gate lp->curr == lp->lim) 6757c478bd9Sstevel@tonic-gate lp->curr = base; 6767c478bd9Sstevel@tonic-gate lp->lim = base; 6777c478bd9Sstevel@tonic-gate } else { 6787c478bd9Sstevel@tonic-gate ASSERT(base <= lp->base); 6797c478bd9Sstevel@tonic-gate ASSERT(lim > lp->base); 6807c478bd9Sstevel@tonic-gate if (lp->decr == 0 && 6817c478bd9Sstevel@tonic-gate lp->curr == lp->base) 6827c478bd9Sstevel@tonic-gate lp->curr = lim; 6837c478bd9Sstevel@tonic-gate lp->base = lim; 6847c478bd9Sstevel@tonic-gate } 6857c478bd9Sstevel@tonic-gate } 6867c478bd9Sstevel@tonic-gate } 6877c478bd9Sstevel@tonic-gate prev = *lpp; 6887c478bd9Sstevel@tonic-gate lpp = &(*lpp)->next; 6897c478bd9Sstevel@tonic-gate } 6907c478bd9Sstevel@tonic-gate 6917c478bd9Sstevel@tonic-gate return (0); 6927c478bd9Sstevel@tonic-gate } 6937c478bd9Sstevel@tonic-gate 6947c478bd9Sstevel@tonic-gate /* 69585f58038Sdp78419 * If lockit is 1, kcage_get_pfn holds the 69685f58038Sdp78419 * reader lock for kcage_range_rwlock. 69785f58038Sdp78419 * Changes to lp->curr can cause race conditions, but 69885f58038Sdp78419 * they are handled by higher level code (see kcage_next_range.) 6997c478bd9Sstevel@tonic-gate */ 7007c478bd9Sstevel@tonic-gate static pfn_t 70185f58038Sdp78419 kcage_get_pfn(int lockit) 7027c478bd9Sstevel@tonic-gate { 7037c478bd9Sstevel@tonic-gate struct kcage_glist *lp; 70485f58038Sdp78419 pfn_t pfn = PFN_INVALID; 7057c478bd9Sstevel@tonic-gate 70685f58038Sdp78419 if (lockit && !rw_tryenter(&kcage_range_rwlock, RW_READER)) 70785f58038Sdp78419 return (pfn); 7087c478bd9Sstevel@tonic-gate 7097c478bd9Sstevel@tonic-gate lp = kcage_current_glist; 7107c478bd9Sstevel@tonic-gate while (lp != NULL) { 7117c478bd9Sstevel@tonic-gate if (lp->decr != 0) { 7127c478bd9Sstevel@tonic-gate if (lp->curr != lp->base) { 7137c478bd9Sstevel@tonic-gate pfn = --lp->curr; 71485f58038Sdp78419 break; 7157c478bd9Sstevel@tonic-gate } 7167c478bd9Sstevel@tonic-gate } else { 7177c478bd9Sstevel@tonic-gate if (lp->curr != lp->lim) { 7187c478bd9Sstevel@tonic-gate pfn = lp->curr++; 71985f58038Sdp78419 break; 7207c478bd9Sstevel@tonic-gate } 7217c478bd9Sstevel@tonic-gate } 7227c478bd9Sstevel@tonic-gate 7237c478bd9Sstevel@tonic-gate lp = lp->next; 7247c478bd9Sstevel@tonic-gate if (lp) 7257c478bd9Sstevel@tonic-gate kcage_current_glist = lp; 7267c478bd9Sstevel@tonic-gate } 7277c478bd9Sstevel@tonic-gate 72885f58038Sdp78419 if (lockit) 72985f58038Sdp78419 rw_exit(&kcage_range_rwlock); 73085f58038Sdp78419 return (pfn); 7317c478bd9Sstevel@tonic-gate } 7327c478bd9Sstevel@tonic-gate 7337c478bd9Sstevel@tonic-gate /* 7347c478bd9Sstevel@tonic-gate * Walk the physical address space of the cage. 7357c478bd9Sstevel@tonic-gate * This routine does not guarantee to return PFNs in the order 7367c478bd9Sstevel@tonic-gate * in which they were allocated to the cage. Instead, it walks 7377c478bd9Sstevel@tonic-gate * each range as they appear on the growth list returning the PFNs 7387c478bd9Sstevel@tonic-gate * range in ascending order. 7397c478bd9Sstevel@tonic-gate * 7407c478bd9Sstevel@tonic-gate * To begin scanning at lower edge of cage, reset should be nonzero. 7417c478bd9Sstevel@tonic-gate * To step through cage, reset should be zero. 7427c478bd9Sstevel@tonic-gate * 7437c478bd9Sstevel@tonic-gate * PFN_INVALID will be returned when the upper end of the cage is 7447c478bd9Sstevel@tonic-gate * reached -- indicating a full scan of the cage has been completed since 7457c478bd9Sstevel@tonic-gate * previous reset. PFN_INVALID will continue to be returned until 7467c478bd9Sstevel@tonic-gate * kcage_walk_cage is reset. 7477c478bd9Sstevel@tonic-gate * 7487c478bd9Sstevel@tonic-gate * It is possible to receive a PFN_INVALID result on reset if a growth 7497c478bd9Sstevel@tonic-gate * list is not installed or if none of the PFNs in the installed list have 7507c478bd9Sstevel@tonic-gate * been allocated to the cage. In otherwords, there is no cage. 7517c478bd9Sstevel@tonic-gate * 75285f58038Sdp78419 * Caller need not hold kcage_range_rwlock while calling this function 7537c478bd9Sstevel@tonic-gate * as the front part of the list is static - pages never come out of 7547c478bd9Sstevel@tonic-gate * the cage. 7557c478bd9Sstevel@tonic-gate * 7567c478bd9Sstevel@tonic-gate * The caller is expected to only be kcage_cageout(). 7577c478bd9Sstevel@tonic-gate */ 7587c478bd9Sstevel@tonic-gate static pfn_t 7597c478bd9Sstevel@tonic-gate kcage_walk_cage(int reset) 7607c478bd9Sstevel@tonic-gate { 7617c478bd9Sstevel@tonic-gate static struct kcage_glist *lp = NULL; 7627c478bd9Sstevel@tonic-gate static pfn_t pfn; 7637c478bd9Sstevel@tonic-gate 7647c478bd9Sstevel@tonic-gate if (reset) 7657c478bd9Sstevel@tonic-gate lp = NULL; 7667c478bd9Sstevel@tonic-gate if (lp == NULL) { 7677c478bd9Sstevel@tonic-gate lp = kcage_glist; 7687c478bd9Sstevel@tonic-gate pfn = PFN_INVALID; 7697c478bd9Sstevel@tonic-gate } 7707c478bd9Sstevel@tonic-gate again: 7717c478bd9Sstevel@tonic-gate if (pfn == PFN_INVALID) { 7727c478bd9Sstevel@tonic-gate if (lp == NULL) 7737c478bd9Sstevel@tonic-gate return (PFN_INVALID); 7747c478bd9Sstevel@tonic-gate 7757c478bd9Sstevel@tonic-gate if (lp->decr != 0) { 7767c478bd9Sstevel@tonic-gate /* 7777c478bd9Sstevel@tonic-gate * In this range the cage grows from the highest 7787c478bd9Sstevel@tonic-gate * address towards the lowest. 7797c478bd9Sstevel@tonic-gate * Arrange to return pfns from curr to lim-1, 7807c478bd9Sstevel@tonic-gate * inclusive, in ascending order. 7817c478bd9Sstevel@tonic-gate */ 7827c478bd9Sstevel@tonic-gate 7837c478bd9Sstevel@tonic-gate pfn = lp->curr; 7847c478bd9Sstevel@tonic-gate } else { 7857c478bd9Sstevel@tonic-gate /* 7867c478bd9Sstevel@tonic-gate * In this range the cage grows from the lowest 7877c478bd9Sstevel@tonic-gate * address towards the highest. 7887c478bd9Sstevel@tonic-gate * Arrange to return pfns from base to curr, 7897c478bd9Sstevel@tonic-gate * inclusive, in ascending order. 7907c478bd9Sstevel@tonic-gate */ 7917c478bd9Sstevel@tonic-gate 7927c478bd9Sstevel@tonic-gate pfn = lp->base; 7937c478bd9Sstevel@tonic-gate } 7947c478bd9Sstevel@tonic-gate } 7957c478bd9Sstevel@tonic-gate 7967c478bd9Sstevel@tonic-gate if (lp->decr != 0) { /* decrementing pfn */ 7977c478bd9Sstevel@tonic-gate if (pfn == lp->lim) { 7987c478bd9Sstevel@tonic-gate /* Don't go beyond the static part of the glist. */ 7997c478bd9Sstevel@tonic-gate if (lp == kcage_current_glist) 8007c478bd9Sstevel@tonic-gate lp = NULL; 8017c478bd9Sstevel@tonic-gate else 8027c478bd9Sstevel@tonic-gate lp = lp->next; 8037c478bd9Sstevel@tonic-gate pfn = PFN_INVALID; 8047c478bd9Sstevel@tonic-gate goto again; 8057c478bd9Sstevel@tonic-gate } 8067c478bd9Sstevel@tonic-gate 8077c478bd9Sstevel@tonic-gate ASSERT(pfn >= lp->curr && pfn < lp->lim); 8087c478bd9Sstevel@tonic-gate } else { /* incrementing pfn */ 8097c478bd9Sstevel@tonic-gate if (pfn == lp->curr) { 8107c478bd9Sstevel@tonic-gate /* Don't go beyond the static part of the glist. */ 8117c478bd9Sstevel@tonic-gate if (lp == kcage_current_glist) 8127c478bd9Sstevel@tonic-gate lp = NULL; 8137c478bd9Sstevel@tonic-gate else 8147c478bd9Sstevel@tonic-gate lp = lp->next; 8157c478bd9Sstevel@tonic-gate pfn = PFN_INVALID; 8167c478bd9Sstevel@tonic-gate goto again; 8177c478bd9Sstevel@tonic-gate } 8187c478bd9Sstevel@tonic-gate 8197c478bd9Sstevel@tonic-gate ASSERT(pfn >= lp->base && pfn < lp->curr); 8207c478bd9Sstevel@tonic-gate } 8217c478bd9Sstevel@tonic-gate 8227c478bd9Sstevel@tonic-gate return (pfn++); 8237c478bd9Sstevel@tonic-gate } 8247c478bd9Sstevel@tonic-gate 8257c478bd9Sstevel@tonic-gate /* 8267c478bd9Sstevel@tonic-gate * Callback functions for to recalc cage thresholds after 8277c478bd9Sstevel@tonic-gate * Kphysm memory add/delete operations. 8287c478bd9Sstevel@tonic-gate */ 8297c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 8307c478bd9Sstevel@tonic-gate static void 8317c478bd9Sstevel@tonic-gate kcage_kphysm_postadd_cb(void *arg, pgcnt_t delta_pages) 8327c478bd9Sstevel@tonic-gate { 8337c478bd9Sstevel@tonic-gate kcage_recalc_thresholds(); 8347c478bd9Sstevel@tonic-gate } 8357c478bd9Sstevel@tonic-gate 8367c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 8377c478bd9Sstevel@tonic-gate static int 8387c478bd9Sstevel@tonic-gate kcage_kphysm_predel_cb(void *arg, pgcnt_t delta_pages) 8397c478bd9Sstevel@tonic-gate { 8407c478bd9Sstevel@tonic-gate /* TODO: when should cage refuse memory delete requests? */ 8417c478bd9Sstevel@tonic-gate return (0); 8427c478bd9Sstevel@tonic-gate } 8437c478bd9Sstevel@tonic-gate 8447c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 8457c478bd9Sstevel@tonic-gate static void 8467c478bd9Sstevel@tonic-gate kcage_kphysm_postdel_cb(void *arg, pgcnt_t delta_pages, int cancelled) 8477c478bd9Sstevel@tonic-gate { 8487c478bd9Sstevel@tonic-gate kcage_recalc_thresholds(); 8497c478bd9Sstevel@tonic-gate } 8507c478bd9Sstevel@tonic-gate 8517c478bd9Sstevel@tonic-gate static kphysm_setup_vector_t kcage_kphysm_vectors = { 8527c478bd9Sstevel@tonic-gate KPHYSM_SETUP_VECTOR_VERSION, 8537c478bd9Sstevel@tonic-gate kcage_kphysm_postadd_cb, 8547c478bd9Sstevel@tonic-gate kcage_kphysm_predel_cb, 8557c478bd9Sstevel@tonic-gate kcage_kphysm_postdel_cb 8567c478bd9Sstevel@tonic-gate }; 8577c478bd9Sstevel@tonic-gate 8587c478bd9Sstevel@tonic-gate /* 8597c478bd9Sstevel@tonic-gate * This is called before a CPR suspend and after a CPR resume. We have to 8607c478bd9Sstevel@tonic-gate * turn off kcage_cageout_ready before a suspend, and turn it back on after a 8617c478bd9Sstevel@tonic-gate * restart. 8627c478bd9Sstevel@tonic-gate */ 8637c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 8647c478bd9Sstevel@tonic-gate static boolean_t 8657c478bd9Sstevel@tonic-gate kcage_cageout_cpr(void *arg, int code) 8667c478bd9Sstevel@tonic-gate { 8677c478bd9Sstevel@tonic-gate if (code == CB_CODE_CPR_CHKPT) { 8687c478bd9Sstevel@tonic-gate ASSERT(kcage_cageout_ready); 8697c478bd9Sstevel@tonic-gate kcage_cageout_ready = 0; 8707c478bd9Sstevel@tonic-gate return (B_TRUE); 8717c478bd9Sstevel@tonic-gate } else if (code == CB_CODE_CPR_RESUME) { 8727c478bd9Sstevel@tonic-gate ASSERT(kcage_cageout_ready == 0); 8737c478bd9Sstevel@tonic-gate kcage_cageout_ready = 1; 8747c478bd9Sstevel@tonic-gate return (B_TRUE); 8757c478bd9Sstevel@tonic-gate } 8767c478bd9Sstevel@tonic-gate return (B_FALSE); 8777c478bd9Sstevel@tonic-gate } 8787c478bd9Sstevel@tonic-gate 8797c478bd9Sstevel@tonic-gate /* 8807c478bd9Sstevel@tonic-gate * kcage_recalc_preferred_size() increases initial cage size to improve large 8817c478bd9Sstevel@tonic-gate * page availability when lp for kmem is enabled and kpr is disabled 8827c478bd9Sstevel@tonic-gate */ 8837c478bd9Sstevel@tonic-gate static pgcnt_t 8847c478bd9Sstevel@tonic-gate kcage_recalc_preferred_size(pgcnt_t preferred_size) 8857c478bd9Sstevel@tonic-gate { 8867c478bd9Sstevel@tonic-gate if (SEGKMEM_USE_LARGEPAGES && segkmem_reloc == 0) { 8877c478bd9Sstevel@tonic-gate pgcnt_t lpmincage = kcage_kmemlp_mincage; 8887c478bd9Sstevel@tonic-gate if (lpmincage == 0) { 8897c478bd9Sstevel@tonic-gate lpmincage = MIN(P2ROUNDUP(((physmem * PAGESIZE) / 8), 8907c478bd9Sstevel@tonic-gate segkmem_heaplp_quantum), 0x40000000UL) / PAGESIZE; 8917c478bd9Sstevel@tonic-gate } 8927c478bd9Sstevel@tonic-gate kcage_kmemlp_mincage = MIN(lpmincage, 8937c478bd9Sstevel@tonic-gate (segkmem_kmemlp_max / PAGESIZE)); 8947c478bd9Sstevel@tonic-gate preferred_size = MAX(kcage_kmemlp_mincage, preferred_size); 8957c478bd9Sstevel@tonic-gate } 8967c478bd9Sstevel@tonic-gate return (preferred_size); 8977c478bd9Sstevel@tonic-gate } 8987c478bd9Sstevel@tonic-gate 8997c478bd9Sstevel@tonic-gate /* 9007c478bd9Sstevel@tonic-gate * Kcage_init() builds the cage and initializes the cage thresholds. 9017c478bd9Sstevel@tonic-gate * The size of the cage is determined by the argument preferred_size. 9027c478bd9Sstevel@tonic-gate * or the actual amount of memory, whichever is smaller. 9037c478bd9Sstevel@tonic-gate */ 90485f58038Sdp78419 static void 9057c478bd9Sstevel@tonic-gate kcage_init(pgcnt_t preferred_size) 9067c478bd9Sstevel@tonic-gate { 9077c478bd9Sstevel@tonic-gate pgcnt_t wanted; 9087c478bd9Sstevel@tonic-gate pfn_t pfn; 9097c478bd9Sstevel@tonic-gate page_t *pp; 9108b464eb8Smec kstat_t *ksp; 9118b464eb8Smec 9127c478bd9Sstevel@tonic-gate extern void page_list_noreloc_startup(page_t *); 9137c478bd9Sstevel@tonic-gate 9147c478bd9Sstevel@tonic-gate ASSERT(!kcage_on); 9157c478bd9Sstevel@tonic-gate 9167c478bd9Sstevel@tonic-gate /* increase preferred cage size for lp for kmem */ 9177c478bd9Sstevel@tonic-gate preferred_size = kcage_recalc_preferred_size(preferred_size); 9187c478bd9Sstevel@tonic-gate 9197c478bd9Sstevel@tonic-gate /* Debug note: initialize this now so early expansions can stat */ 9207c478bd9Sstevel@tonic-gate KCAGE_STAT_INIT_SCAN_INDEX; 9217c478bd9Sstevel@tonic-gate 9227c478bd9Sstevel@tonic-gate /* 9237c478bd9Sstevel@tonic-gate * Initialize cage thresholds and install kphysm callback. 9247c478bd9Sstevel@tonic-gate * If we can't arrange to have the thresholds track with 9257c478bd9Sstevel@tonic-gate * available physical memory, then the cage thresholds may 9267c478bd9Sstevel@tonic-gate * end up over time at levels that adversly effect system 9277c478bd9Sstevel@tonic-gate * performance; so, bail out. 9287c478bd9Sstevel@tonic-gate */ 9297c478bd9Sstevel@tonic-gate kcage_recalc_thresholds(); 9307c478bd9Sstevel@tonic-gate if (kphysm_setup_func_register(&kcage_kphysm_vectors, NULL)) { 9317c478bd9Sstevel@tonic-gate ASSERT(0); /* Catch this in DEBUG kernels. */ 9327c478bd9Sstevel@tonic-gate return; 9337c478bd9Sstevel@tonic-gate } 9347c478bd9Sstevel@tonic-gate 9357c478bd9Sstevel@tonic-gate /* 9367c478bd9Sstevel@tonic-gate * Limit startup cage size within the range of kcage_minfree 9377c478bd9Sstevel@tonic-gate * and availrmem, inclusively. 9387c478bd9Sstevel@tonic-gate */ 9397c478bd9Sstevel@tonic-gate wanted = MIN(MAX(preferred_size, kcage_minfree), availrmem); 9407c478bd9Sstevel@tonic-gate 9417c478bd9Sstevel@tonic-gate /* 9427c478bd9Sstevel@tonic-gate * Construct the cage. PFNs are allocated from the glist. It 9437c478bd9Sstevel@tonic-gate * is assumed that the list has been properly ordered for the 9447c478bd9Sstevel@tonic-gate * platform by the platform code. Typically, this is as simple 9457c478bd9Sstevel@tonic-gate * as calling kcage_range_init(phys_avail, decr), where decr is 9467c478bd9Sstevel@tonic-gate * 1 if the kernel has been loaded into upper end of physical 9477c478bd9Sstevel@tonic-gate * memory, or 0 if the kernel has been loaded at the low end. 9487c478bd9Sstevel@tonic-gate * 9497c478bd9Sstevel@tonic-gate * Note: it is assumed that we are in the startup flow, so there 9507c478bd9Sstevel@tonic-gate * is no reason to grab the page lock. 9517c478bd9Sstevel@tonic-gate */ 9527c478bd9Sstevel@tonic-gate kcage_freemem = 0; 9537c478bd9Sstevel@tonic-gate pfn = PFN_INVALID; /* prime for alignment test */ 9547c478bd9Sstevel@tonic-gate while (wanted != 0) { 95585f58038Sdp78419 if ((pfn = kcage_get_pfn(0)) == PFN_INVALID) 9567c478bd9Sstevel@tonic-gate break; 9577c478bd9Sstevel@tonic-gate 9587c478bd9Sstevel@tonic-gate if ((pp = page_numtopp_nolock(pfn)) != NULL) { 9597c478bd9Sstevel@tonic-gate KCAGEPAGETS_INC(); 9607c478bd9Sstevel@tonic-gate /* 9617c478bd9Sstevel@tonic-gate * Set the noreloc state on the page. 9627c478bd9Sstevel@tonic-gate * If the page is free and not already 9637c478bd9Sstevel@tonic-gate * on the noreloc list then move it. 9647c478bd9Sstevel@tonic-gate */ 9657c478bd9Sstevel@tonic-gate if (PP_ISFREE(pp)) { 9667c478bd9Sstevel@tonic-gate if (PP_ISNORELOC(pp) == 0) 9677c478bd9Sstevel@tonic-gate page_list_noreloc_startup(pp); 9687c478bd9Sstevel@tonic-gate } else { 9697c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 9707c478bd9Sstevel@tonic-gate PP_SETNORELOC(pp); 9717c478bd9Sstevel@tonic-gate } 9727c478bd9Sstevel@tonic-gate } 973e21bae1bSkchow PLCNT_XFER_NORELOC(pp); 9747c478bd9Sstevel@tonic-gate wanted -= 1; 9757c478bd9Sstevel@tonic-gate } 9767c478bd9Sstevel@tonic-gate 9777c478bd9Sstevel@tonic-gate /* 9787c478bd9Sstevel@tonic-gate * Need to go through and find kernel allocated pages 9797c478bd9Sstevel@tonic-gate * and capture them into the Cage. These will primarily 9807c478bd9Sstevel@tonic-gate * be pages gotten through boot_alloc(). 9817c478bd9Sstevel@tonic-gate */ 9827c478bd9Sstevel@tonic-gate if (kvp.v_pages) { 9837c478bd9Sstevel@tonic-gate 9847c478bd9Sstevel@tonic-gate pp = kvp.v_pages; 9857c478bd9Sstevel@tonic-gate do { 9867c478bd9Sstevel@tonic-gate ASSERT(!PP_ISFREE(pp)); 9877c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 98800e145c7Skchow if (PP_ISNORELOC(pp) == 0) { 9897c478bd9Sstevel@tonic-gate PP_SETNORELOC(pp); 99000e145c7Skchow PLCNT_XFER_NORELOC(pp); 99100e145c7Skchow } 9927c478bd9Sstevel@tonic-gate } while ((pp = pp->p_vpnext) != kvp.v_pages); 9937c478bd9Sstevel@tonic-gate 9947c478bd9Sstevel@tonic-gate } 9957c478bd9Sstevel@tonic-gate 9967c478bd9Sstevel@tonic-gate kcage_on = 1; 9977c478bd9Sstevel@tonic-gate 9987c478bd9Sstevel@tonic-gate /* 9997c478bd9Sstevel@tonic-gate * CB_CL_CPR_POST_KERNEL is the class that executes from cpr_suspend() 10007c478bd9Sstevel@tonic-gate * after the cageout thread is blocked, and executes from cpr_resume() 10017c478bd9Sstevel@tonic-gate * before the cageout thread is restarted. By executing in this class, 10027c478bd9Sstevel@tonic-gate * we are assured that the kernel cage thread won't miss wakeup calls 10037c478bd9Sstevel@tonic-gate * and also CPR's larger kmem_alloc requests will not fail after 10047c478bd9Sstevel@tonic-gate * CPR shuts down the cageout kernel thread. 10057c478bd9Sstevel@tonic-gate */ 10067c478bd9Sstevel@tonic-gate (void) callb_add(kcage_cageout_cpr, NULL, CB_CL_CPR_POST_KERNEL, 10077c478bd9Sstevel@tonic-gate "cageout"); 10087c478bd9Sstevel@tonic-gate 10097c478bd9Sstevel@tonic-gate /* 10107c478bd9Sstevel@tonic-gate * Coalesce pages to improve large page availability. A better fix 10117c478bd9Sstevel@tonic-gate * would to coalesce pages as they are included in the cage 10127c478bd9Sstevel@tonic-gate */ 10137c478bd9Sstevel@tonic-gate if (SEGKMEM_USE_LARGEPAGES) { 10147c478bd9Sstevel@tonic-gate extern void page_freelist_coalesce_all(int mnode); 1015ce8eb11aSdp78419 page_freelist_coalesce_all(-1); /* do all mnodes */ 10167c478bd9Sstevel@tonic-gate } 10178b464eb8Smec 10188b464eb8Smec ksp = kstat_create("kcage", 0, "kcage_page_list", "misc", 10198b464eb8Smec KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VAR_SIZE | KSTAT_FLAG_VIRTUAL); 10208b464eb8Smec if (ksp != NULL) { 10218b464eb8Smec ksp->ks_update = kcage_kstat_update; 10228b464eb8Smec ksp->ks_snapshot = kcage_kstat_snapshot; 10238b464eb8Smec ksp->ks_lock = &kcage_kstat_lock; /* XXX - not really needed */ 10248b464eb8Smec kstat_install(ksp); 10258b464eb8Smec } 10268b464eb8Smec } 10278b464eb8Smec 10288b464eb8Smec static int 10298b464eb8Smec kcage_kstat_update(kstat_t *ksp, int rw) 10308b464eb8Smec { 10318b464eb8Smec struct kcage_glist *lp; 10328b464eb8Smec uint_t count; 10338b464eb8Smec 10348b464eb8Smec if (rw == KSTAT_WRITE) 10358b464eb8Smec return (EACCES); 10368b464eb8Smec 10378b464eb8Smec count = 0; 103885f58038Sdp78419 rw_enter(&kcage_range_rwlock, RW_WRITER); 10398b464eb8Smec for (lp = kcage_glist; lp != NULL; lp = lp->next) { 10408b464eb8Smec if (lp->decr) { 10418b464eb8Smec if (lp->curr != lp->lim) { 10428b464eb8Smec count++; 10438b464eb8Smec } 10448b464eb8Smec } else { 10458b464eb8Smec if (lp->curr != lp->base) { 10468b464eb8Smec count++; 10478b464eb8Smec } 10488b464eb8Smec } 10498b464eb8Smec } 105085f58038Sdp78419 rw_exit(&kcage_range_rwlock); 10518b464eb8Smec 10528b464eb8Smec ksp->ks_ndata = count; 10538b464eb8Smec ksp->ks_data_size = count * 2 * sizeof (uint64_t); 10548b464eb8Smec 10558b464eb8Smec return (0); 10568b464eb8Smec } 10578b464eb8Smec 10588b464eb8Smec static int 10598b464eb8Smec kcage_kstat_snapshot(kstat_t *ksp, void *buf, int rw) 10608b464eb8Smec { 10618b464eb8Smec struct kcage_glist *lp; 10628b464eb8Smec struct memunit { 10638b464eb8Smec uint64_t address; 10648b464eb8Smec uint64_t size; 10658b464eb8Smec } *kspmem; 10668b464eb8Smec 10678b464eb8Smec if (rw == KSTAT_WRITE) 10688b464eb8Smec return (EACCES); 10698b464eb8Smec 10708b464eb8Smec ksp->ks_snaptime = gethrtime(); 10718b464eb8Smec 10728b464eb8Smec kspmem = (struct memunit *)buf; 107385f58038Sdp78419 rw_enter(&kcage_range_rwlock, RW_WRITER); 10748b464eb8Smec for (lp = kcage_glist; lp != NULL; lp = lp->next, kspmem++) { 10758b464eb8Smec if ((caddr_t)kspmem >= (caddr_t)buf + ksp->ks_data_size) 10768b464eb8Smec break; 10778b464eb8Smec 10788b464eb8Smec if (lp->decr) { 10798b464eb8Smec if (lp->curr != lp->lim) { 10808b464eb8Smec kspmem->address = ptob(lp->curr); 10818b464eb8Smec kspmem->size = ptob(lp->lim - lp->curr); 10828b464eb8Smec } 10838b464eb8Smec } else { 10848b464eb8Smec if (lp->curr != lp->base) { 10858b464eb8Smec kspmem->address = ptob(lp->base); 10868b464eb8Smec kspmem->size = ptob(lp->curr - lp->base); 10878b464eb8Smec } 10888b464eb8Smec } 10898b464eb8Smec } 109085f58038Sdp78419 rw_exit(&kcage_range_rwlock); 10918b464eb8Smec 10928b464eb8Smec return (0); 10937c478bd9Sstevel@tonic-gate } 10947c478bd9Sstevel@tonic-gate 10957c478bd9Sstevel@tonic-gate void 10967c478bd9Sstevel@tonic-gate kcage_recalc_thresholds() 10977c478bd9Sstevel@tonic-gate { 10987c478bd9Sstevel@tonic-gate static int first = 1; 10997c478bd9Sstevel@tonic-gate static pgcnt_t init_lotsfree; 11007c478bd9Sstevel@tonic-gate static pgcnt_t init_desfree; 11017c478bd9Sstevel@tonic-gate static pgcnt_t init_minfree; 11027c478bd9Sstevel@tonic-gate static pgcnt_t init_throttlefree; 1103bc203165Svb70745 static pgcnt_t init_reserve; 11047c478bd9Sstevel@tonic-gate 11057c478bd9Sstevel@tonic-gate /* TODO: any reason to take more care than this with live editing? */ 11067c478bd9Sstevel@tonic-gate mutex_enter(&kcage_cageout_mutex); 11077c478bd9Sstevel@tonic-gate mutex_enter(&freemem_lock); 11087c478bd9Sstevel@tonic-gate 11097c478bd9Sstevel@tonic-gate if (first) { 11107c478bd9Sstevel@tonic-gate first = 0; 11117c478bd9Sstevel@tonic-gate init_lotsfree = kcage_lotsfree; 11127c478bd9Sstevel@tonic-gate init_desfree = kcage_desfree; 11137c478bd9Sstevel@tonic-gate init_minfree = kcage_minfree; 11147c478bd9Sstevel@tonic-gate init_throttlefree = kcage_throttlefree; 1115bc203165Svb70745 init_reserve = kcage_reserve; 11167c478bd9Sstevel@tonic-gate } else { 11177c478bd9Sstevel@tonic-gate kcage_lotsfree = init_lotsfree; 11187c478bd9Sstevel@tonic-gate kcage_desfree = init_desfree; 11197c478bd9Sstevel@tonic-gate kcage_minfree = init_minfree; 11207c478bd9Sstevel@tonic-gate kcage_throttlefree = init_throttlefree; 1121bc203165Svb70745 kcage_reserve = init_reserve; 11227c478bd9Sstevel@tonic-gate } 11237c478bd9Sstevel@tonic-gate 11247c478bd9Sstevel@tonic-gate if (kcage_lotsfree == 0) 11257c478bd9Sstevel@tonic-gate kcage_lotsfree = MAX(32, total_pages / 256); 11267c478bd9Sstevel@tonic-gate 11277c478bd9Sstevel@tonic-gate if (kcage_minfree == 0) 11287c478bd9Sstevel@tonic-gate kcage_minfree = MAX(32, kcage_lotsfree / 2); 11297c478bd9Sstevel@tonic-gate 11307c478bd9Sstevel@tonic-gate if (kcage_desfree == 0) 11317c478bd9Sstevel@tonic-gate kcage_desfree = MAX(32, kcage_minfree); 11327c478bd9Sstevel@tonic-gate 11337c478bd9Sstevel@tonic-gate if (kcage_throttlefree == 0) 11347c478bd9Sstevel@tonic-gate kcage_throttlefree = MAX(32, kcage_minfree / 2); 11357c478bd9Sstevel@tonic-gate 1136bc203165Svb70745 if (kcage_reserve == 0) 1137bc203165Svb70745 kcage_reserve = MIN(32, kcage_throttlefree / 2); 1138bc203165Svb70745 11397c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock); 11407c478bd9Sstevel@tonic-gate mutex_exit(&kcage_cageout_mutex); 11417c478bd9Sstevel@tonic-gate 11427c478bd9Sstevel@tonic-gate if (kcage_cageout_ready) { 11437c478bd9Sstevel@tonic-gate if (kcage_freemem < kcage_desfree) 11447c478bd9Sstevel@tonic-gate kcage_cageout_wakeup(); 11457c478bd9Sstevel@tonic-gate 11467c478bd9Sstevel@tonic-gate if (kcage_needfree) { 11477c478bd9Sstevel@tonic-gate mutex_enter(&kcage_throttle_mutex); 11487c478bd9Sstevel@tonic-gate cv_broadcast(&kcage_throttle_cv); 11497c478bd9Sstevel@tonic-gate mutex_exit(&kcage_throttle_mutex); 11507c478bd9Sstevel@tonic-gate } 11517c478bd9Sstevel@tonic-gate } 11527c478bd9Sstevel@tonic-gate } 11537c478bd9Sstevel@tonic-gate 11547c478bd9Sstevel@tonic-gate /* 11557c478bd9Sstevel@tonic-gate * Pageout interface: 11567c478bd9Sstevel@tonic-gate * kcage_cageout_init() 11577c478bd9Sstevel@tonic-gate */ 11587c478bd9Sstevel@tonic-gate void 11597c478bd9Sstevel@tonic-gate kcage_cageout_init() 11607c478bd9Sstevel@tonic-gate { 11617c478bd9Sstevel@tonic-gate if (kcage_on) { 116235a5a358SJonathan Adams (void) lwp_kernel_create(proc_pageout, kcage_cageout, NULL, 116335a5a358SJonathan Adams TS_RUN, maxclsyspri - 1); 11647c478bd9Sstevel@tonic-gate } 11657c478bd9Sstevel@tonic-gate } 11667c478bd9Sstevel@tonic-gate 11677c478bd9Sstevel@tonic-gate 11687c478bd9Sstevel@tonic-gate /* 11697c478bd9Sstevel@tonic-gate * VM Interfaces: 11707c478bd9Sstevel@tonic-gate * kcage_create_throttle() 11717c478bd9Sstevel@tonic-gate * kcage_freemem_add() 11727c478bd9Sstevel@tonic-gate * kcage_freemem_sub() 11737c478bd9Sstevel@tonic-gate */ 11747c478bd9Sstevel@tonic-gate 11757c478bd9Sstevel@tonic-gate /* 11767c478bd9Sstevel@tonic-gate * Wakeup cageout thread and throttle waiting for the number of pages 11777c478bd9Sstevel@tonic-gate * requested to become available. For non-critical requests, a 11787c478bd9Sstevel@tonic-gate * timeout is added, since freemem accounting is separate from cage 11797c478bd9Sstevel@tonic-gate * freemem accounting: it's possible for us to get stuck and not make 11807c478bd9Sstevel@tonic-gate * forward progress even though there was sufficient freemem before 11817c478bd9Sstevel@tonic-gate * arriving here. 11827c478bd9Sstevel@tonic-gate */ 11837c478bd9Sstevel@tonic-gate int 11847c478bd9Sstevel@tonic-gate kcage_create_throttle(pgcnt_t npages, int flags) 11857c478bd9Sstevel@tonic-gate { 11867c478bd9Sstevel@tonic-gate 11877c478bd9Sstevel@tonic-gate KCAGE_STAT_INCR(kct_calls); /* unprotected incr. */ 11887c478bd9Sstevel@tonic-gate 11897c478bd9Sstevel@tonic-gate /* 11907c478bd9Sstevel@tonic-gate * Obviously, we can't throttle the cageout thread since 11917c478bd9Sstevel@tonic-gate * we depend on it. We also can't throttle the panic thread. 11927c478bd9Sstevel@tonic-gate */ 11937c478bd9Sstevel@tonic-gate if (curthread == kcage_cageout_thread || panicstr) { 11947c478bd9Sstevel@tonic-gate KCAGE_STAT_INCR(kct_cageout); /* unprotected incr. */ 11957c478bd9Sstevel@tonic-gate return (KCT_CRIT); 11967c478bd9Sstevel@tonic-gate } 11977c478bd9Sstevel@tonic-gate 11987c478bd9Sstevel@tonic-gate /* 11997c478bd9Sstevel@tonic-gate * Don't throttle threads which are critical for proper 12007c478bd9Sstevel@tonic-gate * vm management if we're above kcage_throttlefree or 12017c478bd9Sstevel@tonic-gate * if freemem is very low. 12027c478bd9Sstevel@tonic-gate */ 12037c478bd9Sstevel@tonic-gate if (NOMEMWAIT()) { 1204*23a80de1SStan Studzinski if (kcage_freemem > kcage_throttlefree + npages) { 12057c478bd9Sstevel@tonic-gate KCAGE_STAT_INCR(kct_exempt); /* unprotected incr. */ 12067c478bd9Sstevel@tonic-gate return (KCT_CRIT); 12077c478bd9Sstevel@tonic-gate } else if (freemem < minfree) { 12087c478bd9Sstevel@tonic-gate KCAGE_STAT_INCR(kct_critical); /* unprotected incr. */ 12097c478bd9Sstevel@tonic-gate return (KCT_CRIT); 12107c478bd9Sstevel@tonic-gate } 12117c478bd9Sstevel@tonic-gate } 12127c478bd9Sstevel@tonic-gate 12137c478bd9Sstevel@tonic-gate /* 1214bc203165Svb70745 * Don't throttle real-time threads if kcage_freemem > kcage_reserve. 12157c478bd9Sstevel@tonic-gate */ 1216bc203165Svb70745 if (DISP_PRIO(curthread) > maxclsyspri && 1217bc203165Svb70745 kcage_freemem > kcage_reserve) { 12187c478bd9Sstevel@tonic-gate KCAGE_STAT_INCR(kct_exempt); /* unprotected incr. */ 12197c478bd9Sstevel@tonic-gate return (KCT_CRIT); 12207c478bd9Sstevel@tonic-gate } 12217c478bd9Sstevel@tonic-gate 12227c478bd9Sstevel@tonic-gate /* 12237c478bd9Sstevel@tonic-gate * Cause all other threads (which are assumed to not be 12247c478bd9Sstevel@tonic-gate * critical to cageout) to wait here until their request 12257c478bd9Sstevel@tonic-gate * can be satisfied. Be a little paranoid and wake the 12267c478bd9Sstevel@tonic-gate * kernel cage on each loop through this logic. 12277c478bd9Sstevel@tonic-gate */ 12287c478bd9Sstevel@tonic-gate while (kcage_freemem < kcage_throttlefree + npages) { 12297c478bd9Sstevel@tonic-gate ASSERT(kcage_on); 12307c478bd9Sstevel@tonic-gate if (kcage_cageout_ready) { 12317c478bd9Sstevel@tonic-gate mutex_enter(&kcage_throttle_mutex); 12327c478bd9Sstevel@tonic-gate 12337c478bd9Sstevel@tonic-gate kcage_needfree += npages; 12347c478bd9Sstevel@tonic-gate KCAGE_STAT_INCR(kct_wait); 12357c478bd9Sstevel@tonic-gate 12367c478bd9Sstevel@tonic-gate kcage_cageout_wakeup(); 12377c478bd9Sstevel@tonic-gate KCAGE_STAT_INCR(kct_cagewake); 12387c478bd9Sstevel@tonic-gate 12397c478bd9Sstevel@tonic-gate cv_wait(&kcage_throttle_cv, &kcage_throttle_mutex); 12407c478bd9Sstevel@tonic-gate 12417c478bd9Sstevel@tonic-gate kcage_needfree -= npages; 12427c478bd9Sstevel@tonic-gate 12437c478bd9Sstevel@tonic-gate mutex_exit(&kcage_throttle_mutex); 12447c478bd9Sstevel@tonic-gate } else { 12457c478bd9Sstevel@tonic-gate /* 12467c478bd9Sstevel@tonic-gate * NOTE: atomics are used just in case we enter 12477c478bd9Sstevel@tonic-gate * mp operation before the cageout thread is ready. 12487c478bd9Sstevel@tonic-gate */ 12497c478bd9Sstevel@tonic-gate atomic_add_long(&kcage_needfree, npages); 12507c478bd9Sstevel@tonic-gate 12517c478bd9Sstevel@tonic-gate kcage_cageout_wakeup(); 12527c478bd9Sstevel@tonic-gate KCAGE_STAT_INCR(kct_cagewake); /* unprotected incr. */ 12537c478bd9Sstevel@tonic-gate 12547c478bd9Sstevel@tonic-gate atomic_add_long(&kcage_needfree, -npages); 12557c478bd9Sstevel@tonic-gate } 12567c478bd9Sstevel@tonic-gate 1257a98e9dbfSaguzovsk if (NOMEMWAIT() && freemem < minfree) { 1258a98e9dbfSaguzovsk return (KCT_CRIT); 1259a98e9dbfSaguzovsk } 1260*23a80de1SStan Studzinski if ((flags & PG_WAIT) == 0) { 1261*23a80de1SStan Studzinski pgcnt_t limit = (flags & PG_NORMALPRI) ? 1262*23a80de1SStan Studzinski throttlefree : pageout_reserve; 1263a98e9dbfSaguzovsk 1264*23a80de1SStan Studzinski if ((kcage_freemem < kcage_throttlefree + npages) && 1265*23a80de1SStan Studzinski (freemem < limit + npages)) { 1266*23a80de1SStan Studzinski return (KCT_FAILURE); 1267*23a80de1SStan Studzinski } else { 1268*23a80de1SStan Studzinski return (KCT_NONCRIT); 1269*23a80de1SStan Studzinski } 1270*23a80de1SStan Studzinski } 12717c478bd9Sstevel@tonic-gate } 12727c478bd9Sstevel@tonic-gate return (KCT_NONCRIT); 12737c478bd9Sstevel@tonic-gate } 12747c478bd9Sstevel@tonic-gate 12757c478bd9Sstevel@tonic-gate void 12767c478bd9Sstevel@tonic-gate kcage_freemem_add(pgcnt_t npages) 12777c478bd9Sstevel@tonic-gate { 12787c478bd9Sstevel@tonic-gate extern void wakeup_pcgs(void); 12797c478bd9Sstevel@tonic-gate 12807c478bd9Sstevel@tonic-gate atomic_add_long(&kcage_freemem, npages); 12817c478bd9Sstevel@tonic-gate 12827c478bd9Sstevel@tonic-gate wakeup_pcgs(); /* wakeup threads in pcgs() */ 12837c478bd9Sstevel@tonic-gate 12847c478bd9Sstevel@tonic-gate if (kcage_needfree != 0 && 12857c478bd9Sstevel@tonic-gate kcage_freemem >= (kcage_throttlefree + kcage_needfree)) { 12867c478bd9Sstevel@tonic-gate 12877c478bd9Sstevel@tonic-gate mutex_enter(&kcage_throttle_mutex); 12887c478bd9Sstevel@tonic-gate cv_broadcast(&kcage_throttle_cv); 12897c478bd9Sstevel@tonic-gate KCAGE_STAT_INCR(kfa_trottlewake); 12907c478bd9Sstevel@tonic-gate mutex_exit(&kcage_throttle_mutex); 12917c478bd9Sstevel@tonic-gate } 12927c478bd9Sstevel@tonic-gate } 12937c478bd9Sstevel@tonic-gate 12947c478bd9Sstevel@tonic-gate void 12957c478bd9Sstevel@tonic-gate kcage_freemem_sub(pgcnt_t npages) 12967c478bd9Sstevel@tonic-gate { 12977c478bd9Sstevel@tonic-gate atomic_add_long(&kcage_freemem, -npages); 12987c478bd9Sstevel@tonic-gate 12997c478bd9Sstevel@tonic-gate if (kcage_freemem < kcage_desfree) { 13007c478bd9Sstevel@tonic-gate kcage_cageout_wakeup(); 13017c478bd9Sstevel@tonic-gate KCAGE_STAT_INCR(kfs_cagewake); /* unprotected incr. */ 13027c478bd9Sstevel@tonic-gate } 13037c478bd9Sstevel@tonic-gate } 13047c478bd9Sstevel@tonic-gate 13057c478bd9Sstevel@tonic-gate /* 13067c478bd9Sstevel@tonic-gate * return 0 on failure and 1 on success. 13077c478bd9Sstevel@tonic-gate */ 13087c478bd9Sstevel@tonic-gate static int 13097c478bd9Sstevel@tonic-gate kcage_setnoreloc_pages(page_t *rootpp, se_t se) 13107c478bd9Sstevel@tonic-gate { 13117c478bd9Sstevel@tonic-gate pgcnt_t npgs, i; 13127c478bd9Sstevel@tonic-gate page_t *pp; 13137c478bd9Sstevel@tonic-gate pfn_t rootpfn = page_pptonum(rootpp); 13147c478bd9Sstevel@tonic-gate uint_t szc; 13157c478bd9Sstevel@tonic-gate 13167c478bd9Sstevel@tonic-gate ASSERT(!PP_ISFREE(rootpp)); 13177c478bd9Sstevel@tonic-gate ASSERT(PAGE_LOCKED_SE(rootpp, se)); 13187c478bd9Sstevel@tonic-gate if (!group_page_trylock(rootpp, se)) { 13197c478bd9Sstevel@tonic-gate return (0); 13207c478bd9Sstevel@tonic-gate } 13217c478bd9Sstevel@tonic-gate szc = rootpp->p_szc; 13227c478bd9Sstevel@tonic-gate if (szc == 0) { 13237c478bd9Sstevel@tonic-gate /* 13247c478bd9Sstevel@tonic-gate * The szc of a locked page can only change for pages that are 13257c478bd9Sstevel@tonic-gate * non-swapfs (i.e. anonymous memory) file system pages. 13267c478bd9Sstevel@tonic-gate */ 13277c478bd9Sstevel@tonic-gate ASSERT(rootpp->p_vnode != NULL && 1328ad23a2dbSjohansen !PP_ISKAS(rootpp) && 13297c478bd9Sstevel@tonic-gate !IS_SWAPFSVP(rootpp->p_vnode)); 13307c478bd9Sstevel@tonic-gate PP_SETNORELOC(rootpp); 13317c478bd9Sstevel@tonic-gate return (1); 13327c478bd9Sstevel@tonic-gate } 13337c478bd9Sstevel@tonic-gate npgs = page_get_pagecnt(szc); 13347c478bd9Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(rootpfn, npgs)); 13357c478bd9Sstevel@tonic-gate pp = rootpp; 1336affbd3ccSkchow for (i = 0; i < npgs; i++, pp++) { 13377c478bd9Sstevel@tonic-gate ASSERT(PAGE_LOCKED_SE(pp, se)); 13387c478bd9Sstevel@tonic-gate ASSERT(!PP_ISFREE(pp)); 13397c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == szc); 13407c478bd9Sstevel@tonic-gate PP_SETNORELOC(pp); 13417c478bd9Sstevel@tonic-gate } 13427c478bd9Sstevel@tonic-gate group_page_unlock(rootpp); 13437c478bd9Sstevel@tonic-gate return (1); 13447c478bd9Sstevel@tonic-gate } 13457c478bd9Sstevel@tonic-gate 13467c478bd9Sstevel@tonic-gate /* 13477c478bd9Sstevel@tonic-gate * Attempt to convert page to a caged page (set the P_NORELOC flag). 13487c478bd9Sstevel@tonic-gate * If successful and pages is free, move page to the tail of whichever 13497c478bd9Sstevel@tonic-gate * list it is on. 13507c478bd9Sstevel@tonic-gate * Returns: 13517c478bd9Sstevel@tonic-gate * EBUSY page already locked, assimilated but not free. 13527c478bd9Sstevel@tonic-gate * ENOMEM page assimilated, but memory too low to relocate. Page not free. 13537c478bd9Sstevel@tonic-gate * EAGAIN page not assimilated. Page not free. 13547c478bd9Sstevel@tonic-gate * ERANGE page assimilated. Page not root. 13557c478bd9Sstevel@tonic-gate * 0 page assimilated. Page free. 13567c478bd9Sstevel@tonic-gate * *nfreedp number of pages freed. 13577c478bd9Sstevel@tonic-gate * NOTE: With error codes ENOMEM, EBUSY, and 0 (zero), there is no way 13587c478bd9Sstevel@tonic-gate * to distinguish between a page that was already a NORELOC page from 13597c478bd9Sstevel@tonic-gate * those newly converted to NORELOC pages by this invocation of 13607c478bd9Sstevel@tonic-gate * kcage_assimilate_page. 13617c478bd9Sstevel@tonic-gate */ 13627c478bd9Sstevel@tonic-gate static int 13637c478bd9Sstevel@tonic-gate kcage_assimilate_page(page_t *pp, pgcnt_t *nfreedp) 13647c478bd9Sstevel@tonic-gate { 13657c478bd9Sstevel@tonic-gate if (page_trylock(pp, SE_EXCL)) { 13667c478bd9Sstevel@tonic-gate if (PP_ISNORELOC(pp)) { 13677c478bd9Sstevel@tonic-gate check_free_and_return: 13687c478bd9Sstevel@tonic-gate if (PP_ISFREE(pp)) { 13697c478bd9Sstevel@tonic-gate page_unlock(pp); 13707c478bd9Sstevel@tonic-gate *nfreedp = 0; 13717c478bd9Sstevel@tonic-gate return (0); 13727c478bd9Sstevel@tonic-gate } else { 13737c478bd9Sstevel@tonic-gate page_unlock(pp); 13747c478bd9Sstevel@tonic-gate return (EBUSY); 13757c478bd9Sstevel@tonic-gate } 13767c478bd9Sstevel@tonic-gate /*NOTREACHED*/ 13777c478bd9Sstevel@tonic-gate } 13787c478bd9Sstevel@tonic-gate } else { 13797c478bd9Sstevel@tonic-gate if (page_trylock(pp, SE_SHARED)) { 13807c478bd9Sstevel@tonic-gate if (PP_ISNORELOC(pp)) 13817c478bd9Sstevel@tonic-gate goto check_free_and_return; 1382*23a80de1SStan Studzinski } else { 13837c478bd9Sstevel@tonic-gate return (EAGAIN); 1384*23a80de1SStan Studzinski } 13857c478bd9Sstevel@tonic-gate if (!PP_ISFREE(pp)) { 13867c478bd9Sstevel@tonic-gate page_unlock(pp); 13877c478bd9Sstevel@tonic-gate return (EAGAIN); 13887c478bd9Sstevel@tonic-gate } 13897c478bd9Sstevel@tonic-gate 13907c478bd9Sstevel@tonic-gate /* 13917c478bd9Sstevel@tonic-gate * Need to upgrade the lock on it and set the NORELOC 13927c478bd9Sstevel@tonic-gate * bit. If it is free then remove it from the free 13937c478bd9Sstevel@tonic-gate * list so that the platform free list code can keep 13947c478bd9Sstevel@tonic-gate * NORELOC pages where they should be. 13957c478bd9Sstevel@tonic-gate */ 13967c478bd9Sstevel@tonic-gate /* 13977c478bd9Sstevel@tonic-gate * Before doing anything, get the exclusive lock. 13987c478bd9Sstevel@tonic-gate * This may fail (eg ISM pages are left shared locked). 13997c478bd9Sstevel@tonic-gate * If the page is free this will leave a hole in the 14007c478bd9Sstevel@tonic-gate * cage. There is no solution yet to this. 14017c478bd9Sstevel@tonic-gate */ 14027c478bd9Sstevel@tonic-gate if (!page_tryupgrade(pp)) { 14037c478bd9Sstevel@tonic-gate page_unlock(pp); 14047c478bd9Sstevel@tonic-gate return (EAGAIN); 14057c478bd9Sstevel@tonic-gate } 14067c478bd9Sstevel@tonic-gate } 14077c478bd9Sstevel@tonic-gate 14087c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp)); 14097c478bd9Sstevel@tonic-gate 14107c478bd9Sstevel@tonic-gate if (PP_ISFREE(pp)) { 14117c478bd9Sstevel@tonic-gate int which = PP_ISAGED(pp) ? PG_FREE_LIST : PG_CACHE_LIST; 14127c478bd9Sstevel@tonic-gate 1413e21bae1bSkchow page_list_sub(pp, which); 14147c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 14157c478bd9Sstevel@tonic-gate PP_SETNORELOC(pp); 1416ee88d2b9Skchow PLCNT_XFER_NORELOC(pp); 1417e21bae1bSkchow page_list_add(pp, which | PG_LIST_TAIL); 14187c478bd9Sstevel@tonic-gate 14197c478bd9Sstevel@tonic-gate page_unlock(pp); 14207c478bd9Sstevel@tonic-gate *nfreedp = 1; 14217c478bd9Sstevel@tonic-gate return (0); 14227c478bd9Sstevel@tonic-gate } else { 14237c478bd9Sstevel@tonic-gate if (pp->p_szc != 0) { 14247c478bd9Sstevel@tonic-gate if (!kcage_setnoreloc_pages(pp, SE_EXCL)) { 14257c478bd9Sstevel@tonic-gate page_unlock(pp); 14267c478bd9Sstevel@tonic-gate return (EAGAIN); 14277c478bd9Sstevel@tonic-gate } 14287c478bd9Sstevel@tonic-gate ASSERT(PP_ISNORELOC(pp)); 14297c478bd9Sstevel@tonic-gate } else { 14307c478bd9Sstevel@tonic-gate PP_SETNORELOC(pp); 14317c478bd9Sstevel@tonic-gate } 1432e21bae1bSkchow PLCNT_XFER_NORELOC(pp); 14337c478bd9Sstevel@tonic-gate return (kcage_invalidate_page(pp, nfreedp)); 14347c478bd9Sstevel@tonic-gate } 14357c478bd9Sstevel@tonic-gate /*NOTREACHED*/ 14367c478bd9Sstevel@tonic-gate } 14377c478bd9Sstevel@tonic-gate 14387c478bd9Sstevel@tonic-gate static int 14397c478bd9Sstevel@tonic-gate kcage_expand() 14407c478bd9Sstevel@tonic-gate { 14417c478bd9Sstevel@tonic-gate int did_something = 0; 14427c478bd9Sstevel@tonic-gate 14437c478bd9Sstevel@tonic-gate spgcnt_t wanted; 14447c478bd9Sstevel@tonic-gate pfn_t pfn; 14457c478bd9Sstevel@tonic-gate page_t *pp; 14467c478bd9Sstevel@tonic-gate /* TODO: we don't really need n any more? */ 14477c478bd9Sstevel@tonic-gate pgcnt_t n; 14487c478bd9Sstevel@tonic-gate pgcnt_t nf, nfreed; 14497c478bd9Sstevel@tonic-gate 14507c478bd9Sstevel@tonic-gate /* 14517c478bd9Sstevel@tonic-gate * Expand the cage if available cage memory is really low. Calculate 14527c478bd9Sstevel@tonic-gate * the amount required to return kcage_freemem to the level of 14537c478bd9Sstevel@tonic-gate * kcage_lotsfree, or to satisfy throttled requests, whichever is 14547c478bd9Sstevel@tonic-gate * more. It is rare for their sum to create an artificial threshold 14557c478bd9Sstevel@tonic-gate * above kcage_lotsfree, but it is possible. 14567c478bd9Sstevel@tonic-gate * 14577c478bd9Sstevel@tonic-gate * Exit early if expansion amount is equal to or less than zero. 14587c478bd9Sstevel@tonic-gate * (<0 is possible if kcage_freemem rises suddenly.) 14597c478bd9Sstevel@tonic-gate * 1460*23a80de1SStan Studzinski * Exit early when freemem drops below pageout_reserve plus the request. 14617c478bd9Sstevel@tonic-gate */ 14627c478bd9Sstevel@tonic-gate wanted = MAX(kcage_lotsfree, kcage_throttlefree + kcage_needfree) 14637c478bd9Sstevel@tonic-gate - kcage_freemem; 1464*23a80de1SStan Studzinski if (wanted <= 0) { 14657c478bd9Sstevel@tonic-gate return (0); 1466*23a80de1SStan Studzinski } else if (freemem < pageout_reserve + wanted) { 14677c478bd9Sstevel@tonic-gate KCAGE_STAT_INCR(ke_lowfreemem); 14687c478bd9Sstevel@tonic-gate return (0); 14697c478bd9Sstevel@tonic-gate } 14707c478bd9Sstevel@tonic-gate 14717c478bd9Sstevel@tonic-gate KCAGE_STAT_INCR(ke_calls); 14727c478bd9Sstevel@tonic-gate KCAGE_STAT_SET_SCAN(ke_wanted, (uint_t)wanted); 14737c478bd9Sstevel@tonic-gate 14747c478bd9Sstevel@tonic-gate /* 14757c478bd9Sstevel@tonic-gate * Assimilate more pages from the global page pool into the cage. 14767c478bd9Sstevel@tonic-gate */ 14777c478bd9Sstevel@tonic-gate n = 0; /* number of pages PP_SETNORELOC'd */ 14787c478bd9Sstevel@tonic-gate nf = 0; /* number of those actually free */ 14797c478bd9Sstevel@tonic-gate while (kcage_on && nf < wanted) { 148085f58038Sdp78419 pfn = kcage_get_pfn(1); 14817c478bd9Sstevel@tonic-gate if (pfn == PFN_INVALID) { /* eek! no where to grow */ 14827c478bd9Sstevel@tonic-gate KCAGE_STAT_INCR(ke_nopfn); 14837c478bd9Sstevel@tonic-gate goto terminate; 14847c478bd9Sstevel@tonic-gate } 14857c478bd9Sstevel@tonic-gate 14867c478bd9Sstevel@tonic-gate KCAGE_STAT_INCR_SCAN(ke_examined); 14877c478bd9Sstevel@tonic-gate 14887c478bd9Sstevel@tonic-gate if ((pp = page_numtopp_nolock(pfn)) == NULL) { 14897c478bd9Sstevel@tonic-gate KCAGE_STAT_INCR(ke_nopaget); 14907c478bd9Sstevel@tonic-gate continue; 14917c478bd9Sstevel@tonic-gate } 14927c478bd9Sstevel@tonic-gate KCAGEPAGETS_INC(); 14937c478bd9Sstevel@tonic-gate /* 14947c478bd9Sstevel@tonic-gate * Sanity check. Skip this pfn if it is 14957c478bd9Sstevel@tonic-gate * being deleted. 14967c478bd9Sstevel@tonic-gate */ 14977c478bd9Sstevel@tonic-gate if (pfn_is_being_deleted(pfn)) { 14987c478bd9Sstevel@tonic-gate KCAGE_STAT_INCR(ke_deleting); 14997c478bd9Sstevel@tonic-gate continue; 15007c478bd9Sstevel@tonic-gate } 15017c478bd9Sstevel@tonic-gate 15027c478bd9Sstevel@tonic-gate if (PP_ISNORELOC(pp)) { 15037c478bd9Sstevel@tonic-gate KCAGE_STAT_INCR(ke_isnoreloc); 15047c478bd9Sstevel@tonic-gate continue; 15057c478bd9Sstevel@tonic-gate } 15067c478bd9Sstevel@tonic-gate 15077c478bd9Sstevel@tonic-gate switch (kcage_assimilate_page(pp, &nfreed)) { 15087c478bd9Sstevel@tonic-gate case 0: /* assimilated, page is free */ 15097c478bd9Sstevel@tonic-gate KCAGE_STAT_NINCR_SCAN(ke_gotonefree, nfreed); 15107c478bd9Sstevel@tonic-gate did_something = 1; 15117c478bd9Sstevel@tonic-gate nf += nfreed; 15127c478bd9Sstevel@tonic-gate n++; 15137c478bd9Sstevel@tonic-gate break; 15147c478bd9Sstevel@tonic-gate 15157c478bd9Sstevel@tonic-gate case EBUSY: /* assimilated, page not free */ 15167c478bd9Sstevel@tonic-gate case ERANGE: /* assimilated, page not root */ 15177c478bd9Sstevel@tonic-gate KCAGE_STAT_INCR_SCAN(ke_gotone); 15187c478bd9Sstevel@tonic-gate did_something = 1; 15197c478bd9Sstevel@tonic-gate n++; 15207c478bd9Sstevel@tonic-gate break; 15217c478bd9Sstevel@tonic-gate 15227c478bd9Sstevel@tonic-gate case ENOMEM: /* assimilated, but no mem */ 15237c478bd9Sstevel@tonic-gate KCAGE_STAT_INCR(ke_terminate); 15247c478bd9Sstevel@tonic-gate did_something = 1; 15257c478bd9Sstevel@tonic-gate n++; 15267c478bd9Sstevel@tonic-gate goto terminate; 15277c478bd9Sstevel@tonic-gate 15287c478bd9Sstevel@tonic-gate case EAGAIN: /* can't assimilate */ 15297c478bd9Sstevel@tonic-gate KCAGE_STAT_INCR_SCAN(ke_lefthole); 15307c478bd9Sstevel@tonic-gate break; 15317c478bd9Sstevel@tonic-gate 15327c478bd9Sstevel@tonic-gate default: /* catch this with debug kernels */ 15337c478bd9Sstevel@tonic-gate ASSERT(0); 15347c478bd9Sstevel@tonic-gate break; 15357c478bd9Sstevel@tonic-gate } 15367c478bd9Sstevel@tonic-gate } 15377c478bd9Sstevel@tonic-gate 15387c478bd9Sstevel@tonic-gate /* 15397c478bd9Sstevel@tonic-gate * Realign cage edge with the nearest physical address 15407c478bd9Sstevel@tonic-gate * boundry for big pages. This is done to give us a 15417c478bd9Sstevel@tonic-gate * better chance of actually getting usable big pages 15427c478bd9Sstevel@tonic-gate * in the cage. 15437c478bd9Sstevel@tonic-gate */ 15447c478bd9Sstevel@tonic-gate 15457c478bd9Sstevel@tonic-gate terminate: 15467c478bd9Sstevel@tonic-gate 15477c478bd9Sstevel@tonic-gate return (did_something); 15487c478bd9Sstevel@tonic-gate } 15497c478bd9Sstevel@tonic-gate 15507c478bd9Sstevel@tonic-gate /* 15517c478bd9Sstevel@tonic-gate * Relocate page opp (Original Page Pointer) from cage pool to page rpp 15527c478bd9Sstevel@tonic-gate * (Replacement Page Pointer) in the global pool. Page opp will be freed 15537c478bd9Sstevel@tonic-gate * if relocation is successful, otherwise it is only unlocked. 15547c478bd9Sstevel@tonic-gate * On entry, page opp must be exclusively locked and not free. 15557c478bd9Sstevel@tonic-gate * *nfreedp: number of pages freed. 15567c478bd9Sstevel@tonic-gate */ 15577c478bd9Sstevel@tonic-gate static int 15587c478bd9Sstevel@tonic-gate kcage_relocate_page(page_t *pp, pgcnt_t *nfreedp) 15597c478bd9Sstevel@tonic-gate { 15607c478bd9Sstevel@tonic-gate page_t *opp = pp; 15617c478bd9Sstevel@tonic-gate page_t *rpp = NULL; 15627c478bd9Sstevel@tonic-gate spgcnt_t npgs; 15637c478bd9Sstevel@tonic-gate int result; 15647c478bd9Sstevel@tonic-gate 15657c478bd9Sstevel@tonic-gate ASSERT(!PP_ISFREE(opp)); 15667c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(opp)); 15677c478bd9Sstevel@tonic-gate 15687c478bd9Sstevel@tonic-gate result = page_relocate(&opp, &rpp, 1, 1, &npgs, NULL); 15697c478bd9Sstevel@tonic-gate *nfreedp = npgs; 15707c478bd9Sstevel@tonic-gate if (result == 0) { 15717c478bd9Sstevel@tonic-gate while (npgs-- > 0) { 15727c478bd9Sstevel@tonic-gate page_t *tpp; 15737c478bd9Sstevel@tonic-gate 15747c478bd9Sstevel@tonic-gate ASSERT(rpp != NULL); 15757c478bd9Sstevel@tonic-gate tpp = rpp; 15767c478bd9Sstevel@tonic-gate page_sub(&rpp, tpp); 15777c478bd9Sstevel@tonic-gate page_unlock(tpp); 15787c478bd9Sstevel@tonic-gate } 15797c478bd9Sstevel@tonic-gate 15807c478bd9Sstevel@tonic-gate ASSERT(rpp == NULL); 15817c478bd9Sstevel@tonic-gate 15827c478bd9Sstevel@tonic-gate return (0); /* success */ 15837c478bd9Sstevel@tonic-gate } 15847c478bd9Sstevel@tonic-gate 15857c478bd9Sstevel@tonic-gate page_unlock(opp); 15867c478bd9Sstevel@tonic-gate return (result); 15877c478bd9Sstevel@tonic-gate } 15887c478bd9Sstevel@tonic-gate 15897c478bd9Sstevel@tonic-gate /* 15907c478bd9Sstevel@tonic-gate * Based on page_invalidate_pages() 15917c478bd9Sstevel@tonic-gate * 15927c478bd9Sstevel@tonic-gate * Kcage_invalidate_page() uses page_relocate() twice. Both instances 15937c478bd9Sstevel@tonic-gate * of use must be updated to match the new page_relocate() when it 15947c478bd9Sstevel@tonic-gate * becomes available. 15957c478bd9Sstevel@tonic-gate * 15967c478bd9Sstevel@tonic-gate * Return result of kcage_relocate_page or zero if page was directly freed. 15977c478bd9Sstevel@tonic-gate * *nfreedp: number of pages freed. 15987c478bd9Sstevel@tonic-gate */ 15997c478bd9Sstevel@tonic-gate static int 16007c478bd9Sstevel@tonic-gate kcage_invalidate_page(page_t *pp, pgcnt_t *nfreedp) 16017c478bd9Sstevel@tonic-gate { 16027c478bd9Sstevel@tonic-gate int result; 16037c478bd9Sstevel@tonic-gate 16047c478bd9Sstevel@tonic-gate #if defined(__sparc) 1605af4c679fSSean McEnroe ASSERT(pp->p_vnode != &promvp); 16067c478bd9Sstevel@tonic-gate #endif /* __sparc */ 16077c478bd9Sstevel@tonic-gate ASSERT(!PP_ISFREE(pp)); 16087c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp)); 16097c478bd9Sstevel@tonic-gate 16107c478bd9Sstevel@tonic-gate /* 16117c478bd9Sstevel@tonic-gate * Is this page involved in some I/O? shared? 16127c478bd9Sstevel@tonic-gate * The page_struct_lock need not be acquired to 16137c478bd9Sstevel@tonic-gate * examine these fields since the page has an 16147c478bd9Sstevel@tonic-gate * "exclusive" lock. 16157c478bd9Sstevel@tonic-gate */ 16167c478bd9Sstevel@tonic-gate if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0) { 16177c478bd9Sstevel@tonic-gate result = kcage_relocate_page(pp, nfreedp); 16187c478bd9Sstevel@tonic-gate #ifdef KCAGE_STATS 16197c478bd9Sstevel@tonic-gate if (result == 0) 16207c478bd9Sstevel@tonic-gate KCAGE_STAT_INCR_SCAN(kip_reloclocked); 16217c478bd9Sstevel@tonic-gate else if (result == ENOMEM) 16227c478bd9Sstevel@tonic-gate KCAGE_STAT_INCR_SCAN(kip_nomem); 16237c478bd9Sstevel@tonic-gate #endif 16247c478bd9Sstevel@tonic-gate return (result); 16257c478bd9Sstevel@tonic-gate } 16267c478bd9Sstevel@tonic-gate 16277c478bd9Sstevel@tonic-gate ASSERT(pp->p_vnode->v_type != VCHR); 16287c478bd9Sstevel@tonic-gate 16297c478bd9Sstevel@tonic-gate /* 16307c478bd9Sstevel@tonic-gate * Unload the mappings and check if mod bit is set. 16317c478bd9Sstevel@tonic-gate */ 16327c478bd9Sstevel@tonic-gate (void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD); 16337c478bd9Sstevel@tonic-gate 16347c478bd9Sstevel@tonic-gate if (hat_ismod(pp)) { 16357c478bd9Sstevel@tonic-gate result = kcage_relocate_page(pp, nfreedp); 16367c478bd9Sstevel@tonic-gate #ifdef KCAGE_STATS 16377c478bd9Sstevel@tonic-gate if (result == 0) 16387c478bd9Sstevel@tonic-gate KCAGE_STAT_INCR_SCAN(kip_relocmod); 16397c478bd9Sstevel@tonic-gate else if (result == ENOMEM) 16407c478bd9Sstevel@tonic-gate KCAGE_STAT_INCR_SCAN(kip_nomem); 16417c478bd9Sstevel@tonic-gate #endif 16427c478bd9Sstevel@tonic-gate return (result); 16437c478bd9Sstevel@tonic-gate } 16447c478bd9Sstevel@tonic-gate 16457c478bd9Sstevel@tonic-gate if (!page_try_demote_pages(pp)) { 16467c478bd9Sstevel@tonic-gate KCAGE_STAT_INCR_SCAN(kip_demotefailed); 16477c478bd9Sstevel@tonic-gate page_unlock(pp); 16487c478bd9Sstevel@tonic-gate return (EAGAIN); 16497c478bd9Sstevel@tonic-gate } 16507c478bd9Sstevel@tonic-gate 16512e0ea4c4SMichael Corcoran /* LINTED: constant in conditional context */ 16522e0ea4c4SMichael Corcoran VN_DISPOSE(pp, B_INVAL, 0, kcred); 16537c478bd9Sstevel@tonic-gate KCAGE_STAT_INCR_SCAN(kip_destroy); 16547c478bd9Sstevel@tonic-gate *nfreedp = 1; 16557c478bd9Sstevel@tonic-gate return (0); 16567c478bd9Sstevel@tonic-gate } 16577c478bd9Sstevel@tonic-gate 1658*23a80de1SStan Studzinski /* 1659*23a80de1SStan Studzinski * Expand cage only if there is not enough memory to satisfy 1660*23a80de1SStan Studzinski * current request. We only do one (complete) scan of the cage. 1661*23a80de1SStan Studzinski * Dirty pages and pages with shared mappings are skipped; 1662*23a80de1SStan Studzinski * Locked pages (p_lckcnt and p_cowcnt) are also skipped. 1663*23a80de1SStan Studzinski * All other pages are freed (if they can be locked). 1664*23a80de1SStan Studzinski * This may affect caching of user pages which are in cage by freeing/ 1665*23a80de1SStan Studzinski * reclaiming them more often. However cage is mainly for kernel (heap) 1666*23a80de1SStan Studzinski * pages and we want to keep user pages outside of cage. The above policy 1667*23a80de1SStan Studzinski * should also reduce cage expansion plus it should speed up cage mem 1668*23a80de1SStan Studzinski * allocations. 1669*23a80de1SStan Studzinski */ 16707c478bd9Sstevel@tonic-gate static void 16717c478bd9Sstevel@tonic-gate kcage_cageout() 16727c478bd9Sstevel@tonic-gate { 16737c478bd9Sstevel@tonic-gate pfn_t pfn; 16747c478bd9Sstevel@tonic-gate page_t *pp; 16757c478bd9Sstevel@tonic-gate callb_cpr_t cprinfo; 16767c478bd9Sstevel@tonic-gate int did_something; 16777c478bd9Sstevel@tonic-gate pfn_t start_pfn; 167805d3dc4bSpaulsan ulong_t shared_level = 8; 16797c478bd9Sstevel@tonic-gate pgcnt_t nfreed; 16807c478bd9Sstevel@tonic-gate #ifdef KCAGE_STATS 16817c478bd9Sstevel@tonic-gate clock_t scan_start; 16827c478bd9Sstevel@tonic-gate #endif 16837c478bd9Sstevel@tonic-gate 16847c478bd9Sstevel@tonic-gate CALLB_CPR_INIT(&cprinfo, &kcage_cageout_mutex, 16857c478bd9Sstevel@tonic-gate callb_generic_cpr, "cageout"); 16867c478bd9Sstevel@tonic-gate 16877c478bd9Sstevel@tonic-gate mutex_enter(&kcage_cageout_mutex); 1688dd2069f5Sggredvig kcage_cageout_thread = curthread; 16897c478bd9Sstevel@tonic-gate 16907c478bd9Sstevel@tonic-gate pfn = PFN_INVALID; /* force scan reset */ 16917c478bd9Sstevel@tonic-gate start_pfn = PFN_INVALID; /* force init with 1st cage pfn */ 16927c478bd9Sstevel@tonic-gate kcage_cageout_ready = 1; /* switch kcage_cageout_wakeup mode */ 16937c478bd9Sstevel@tonic-gate 16947c478bd9Sstevel@tonic-gate loop: 16957c478bd9Sstevel@tonic-gate /* 16967c478bd9Sstevel@tonic-gate * Wait here. Sooner or later, kcage_freemem_sub() will notice 16977c478bd9Sstevel@tonic-gate * that kcage_freemem is less than kcage_desfree. When it does 16987c478bd9Sstevel@tonic-gate * notice, kcage_freemem_sub() will wake us up via call to 16997c478bd9Sstevel@tonic-gate * kcage_cageout_wakeup(). 17007c478bd9Sstevel@tonic-gate */ 17017c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cprinfo); 17027c478bd9Sstevel@tonic-gate cv_wait(&kcage_cageout_cv, &kcage_cageout_mutex); 17037c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cprinfo, &kcage_cageout_mutex); 17047c478bd9Sstevel@tonic-gate 17057c478bd9Sstevel@tonic-gate KCAGE_STAT_INCR(kt_wakeups); 17067c478bd9Sstevel@tonic-gate KCAGE_STAT_SET_SCAN(kt_freemem_start, freemem); 17077c478bd9Sstevel@tonic-gate KCAGE_STAT_SET_SCAN(kt_kcage_freemem_start, kcage_freemem); 17087c478bd9Sstevel@tonic-gate #ifdef KCAGE_STATS 1709d3d50737SRafael Vanoni scan_start = ddi_get_lbolt(); 17107c478bd9Sstevel@tonic-gate #endif 17117c478bd9Sstevel@tonic-gate if (!kcage_on) 17127c478bd9Sstevel@tonic-gate goto loop; 17137c478bd9Sstevel@tonic-gate 17147c478bd9Sstevel@tonic-gate KCAGE_STAT_INCR(kt_scans); 17157c478bd9Sstevel@tonic-gate KCAGE_STAT_INCR_SCAN(kt_passes); 17167c478bd9Sstevel@tonic-gate 17177c478bd9Sstevel@tonic-gate did_something = 0; 1718*23a80de1SStan Studzinski while (kcage_freemem < kcage_lotsfree + kcage_needfree) { 1719*23a80de1SStan Studzinski 1720*23a80de1SStan Studzinski if ((pfn = kcage_walk_cage(pfn == PFN_INVALID)) == 1721*23a80de1SStan Studzinski PFN_INVALID) { 1722*23a80de1SStan Studzinski break; 1723*23a80de1SStan Studzinski } 17247c478bd9Sstevel@tonic-gate 17257c478bd9Sstevel@tonic-gate if (start_pfn == PFN_INVALID) 17267c478bd9Sstevel@tonic-gate start_pfn = pfn; 17277c478bd9Sstevel@tonic-gate else if (start_pfn == pfn) { 17287c478bd9Sstevel@tonic-gate /* 17297c478bd9Sstevel@tonic-gate * Did a complete walk of kernel cage, but didn't free 1730dc84a327Svb70745 * any pages. If only one cpu is active then 17317c478bd9Sstevel@tonic-gate * stop kernel cage walk and try expanding. 17327c478bd9Sstevel@tonic-gate */ 1733dc84a327Svb70745 if (cp_default.cp_ncpus == 1 && did_something == 0) { 17347c478bd9Sstevel@tonic-gate KCAGE_STAT_INCR(kt_cageout_break); 17357c478bd9Sstevel@tonic-gate break; 17367c478bd9Sstevel@tonic-gate } 17377c478bd9Sstevel@tonic-gate } 17387c478bd9Sstevel@tonic-gate 17397c478bd9Sstevel@tonic-gate pp = page_numtopp_nolock(pfn); 17407c478bd9Sstevel@tonic-gate if (pp == NULL) { 17417c478bd9Sstevel@tonic-gate continue; 17427c478bd9Sstevel@tonic-gate } 17437c478bd9Sstevel@tonic-gate 17447c478bd9Sstevel@tonic-gate KCAGE_STAT_INCR_SCAN(kt_examined); 17457c478bd9Sstevel@tonic-gate 17467c478bd9Sstevel@tonic-gate /* 17477c478bd9Sstevel@tonic-gate * Do a quick PP_ISNORELOC() and PP_ISFREE test outside 17487c478bd9Sstevel@tonic-gate * of the lock. If one is missed it will be seen next 17497c478bd9Sstevel@tonic-gate * time through. 17507c478bd9Sstevel@tonic-gate * 17517c478bd9Sstevel@tonic-gate * Skip non-caged-pages. These pages can exist in the cage 17527c478bd9Sstevel@tonic-gate * because, if during cage expansion, a page is 17537c478bd9Sstevel@tonic-gate * encountered that is long-term locked the lock prevents the 17547c478bd9Sstevel@tonic-gate * expansion logic from setting the P_NORELOC flag. Hence, 17557c478bd9Sstevel@tonic-gate * non-caged-pages surrounded by caged-pages. 17567c478bd9Sstevel@tonic-gate */ 17577c478bd9Sstevel@tonic-gate if (!PP_ISNORELOC(pp)) { 17587c478bd9Sstevel@tonic-gate switch (kcage_assimilate_page(pp, &nfreed)) { 17597c478bd9Sstevel@tonic-gate case 0: 17607c478bd9Sstevel@tonic-gate did_something = 1; 17617c478bd9Sstevel@tonic-gate KCAGE_STAT_NINCR_SCAN(kt_gotonefree, 17627c478bd9Sstevel@tonic-gate nfreed); 17637c478bd9Sstevel@tonic-gate break; 17647c478bd9Sstevel@tonic-gate 17657c478bd9Sstevel@tonic-gate case EBUSY: 17667c478bd9Sstevel@tonic-gate case ERANGE: 17677c478bd9Sstevel@tonic-gate did_something = 1; 17687c478bd9Sstevel@tonic-gate KCAGE_STAT_INCR_SCAN(kt_gotone); 17697c478bd9Sstevel@tonic-gate break; 17707c478bd9Sstevel@tonic-gate 17717c478bd9Sstevel@tonic-gate case EAGAIN: 17727c478bd9Sstevel@tonic-gate case ENOMEM: 17737c478bd9Sstevel@tonic-gate break; 17747c478bd9Sstevel@tonic-gate 17757c478bd9Sstevel@tonic-gate default: 17767c478bd9Sstevel@tonic-gate /* catch this with debug kernels */ 17777c478bd9Sstevel@tonic-gate ASSERT(0); 17787c478bd9Sstevel@tonic-gate break; 17797c478bd9Sstevel@tonic-gate } 17807c478bd9Sstevel@tonic-gate 17817c478bd9Sstevel@tonic-gate continue; 17827c478bd9Sstevel@tonic-gate } else { 17837c478bd9Sstevel@tonic-gate if (PP_ISFREE(pp)) { 17847c478bd9Sstevel@tonic-gate continue; 17857c478bd9Sstevel@tonic-gate } 17867c478bd9Sstevel@tonic-gate 1787ad23a2dbSjohansen if ((PP_ISKAS(pp) && pp->p_lckcnt > 0) || 17887c478bd9Sstevel@tonic-gate !page_trylock(pp, SE_EXCL)) { 17897c478bd9Sstevel@tonic-gate KCAGE_STAT_INCR_SCAN(kt_cantlock); 17907c478bd9Sstevel@tonic-gate continue; 17917c478bd9Sstevel@tonic-gate } 17927c478bd9Sstevel@tonic-gate 17937c478bd9Sstevel@tonic-gate /* P_NORELOC bit should not have gone away. */ 17947c478bd9Sstevel@tonic-gate ASSERT(PP_ISNORELOC(pp)); 1795ad23a2dbSjohansen if (PP_ISFREE(pp) || (PP_ISKAS(pp) && 17967c478bd9Sstevel@tonic-gate pp->p_lckcnt > 0)) { 17977c478bd9Sstevel@tonic-gate page_unlock(pp); 17987c478bd9Sstevel@tonic-gate continue; 17997c478bd9Sstevel@tonic-gate } 18007c478bd9Sstevel@tonic-gate 180105d3dc4bSpaulsan if (hat_page_checkshare(pp, shared_level)) { 18027c478bd9Sstevel@tonic-gate page_unlock(pp); 18037c478bd9Sstevel@tonic-gate KCAGE_STAT_INCR_SCAN(kt_skipshared); 18047c478bd9Sstevel@tonic-gate continue; 18057c478bd9Sstevel@tonic-gate } 18067c478bd9Sstevel@tonic-gate 18077c478bd9Sstevel@tonic-gate if (kcage_invalidate_page(pp, &nfreed) == 0) { 18087c478bd9Sstevel@tonic-gate did_something = 1; 18097c478bd9Sstevel@tonic-gate KCAGE_STAT_NINCR_SCAN(kt_gotonefree, nfreed); 18107c478bd9Sstevel@tonic-gate } 18117c478bd9Sstevel@tonic-gate 18127c478bd9Sstevel@tonic-gate /* 18137c478bd9Sstevel@tonic-gate * No need to drop the page lock here. 18147c478bd9Sstevel@tonic-gate * Kcage_invalidate_page has done that for us 18157c478bd9Sstevel@tonic-gate * either explicitly or through a page_free. 18167c478bd9Sstevel@tonic-gate */ 18177c478bd9Sstevel@tonic-gate } 18187c478bd9Sstevel@tonic-gate } 18197c478bd9Sstevel@tonic-gate 1820*23a80de1SStan Studzinski if (kcage_freemem < kcage_throttlefree + kcage_needfree) 1821*23a80de1SStan Studzinski (void) kcage_expand(); 18227c478bd9Sstevel@tonic-gate 1823*23a80de1SStan Studzinski if (kcage_on && kcage_cageout_ready) 1824*23a80de1SStan Studzinski cv_broadcast(&kcage_throttle_cv); 18257c478bd9Sstevel@tonic-gate 18267c478bd9Sstevel@tonic-gate KCAGE_STAT_SET_SCAN(kt_freemem_end, freemem); 18277c478bd9Sstevel@tonic-gate KCAGE_STAT_SET_SCAN(kt_kcage_freemem_end, kcage_freemem); 1828d3d50737SRafael Vanoni KCAGE_STAT_SET_SCAN(kt_ticks, ddi_get_lbolt() - scan_start); 18297c478bd9Sstevel@tonic-gate KCAGE_STAT_INC_SCAN_INDEX; 18307c478bd9Sstevel@tonic-gate goto loop; 18317c478bd9Sstevel@tonic-gate 18327c478bd9Sstevel@tonic-gate /*NOTREACHED*/ 18337c478bd9Sstevel@tonic-gate } 18347c478bd9Sstevel@tonic-gate 18357c478bd9Sstevel@tonic-gate void 18367c478bd9Sstevel@tonic-gate kcage_cageout_wakeup() 18377c478bd9Sstevel@tonic-gate { 18387c478bd9Sstevel@tonic-gate if (mutex_tryenter(&kcage_cageout_mutex)) { 18397c478bd9Sstevel@tonic-gate if (kcage_cageout_ready) { 18407c478bd9Sstevel@tonic-gate cv_signal(&kcage_cageout_cv); 18417c478bd9Sstevel@tonic-gate } else if (kcage_freemem < kcage_minfree || kcage_needfree) { 18427c478bd9Sstevel@tonic-gate /* 18437c478bd9Sstevel@tonic-gate * Available cage memory is really low. Time to 18447c478bd9Sstevel@tonic-gate * start expanding the cage. However, the 18457c478bd9Sstevel@tonic-gate * kernel cage thread is not yet ready to 18467c478bd9Sstevel@tonic-gate * do the work. Use *this* thread, which is 18477c478bd9Sstevel@tonic-gate * most likely to be t0, to do the work. 18487c478bd9Sstevel@tonic-gate */ 18497c478bd9Sstevel@tonic-gate KCAGE_STAT_INCR(kcw_expandearly); 18507c478bd9Sstevel@tonic-gate (void) kcage_expand(); 18517c478bd9Sstevel@tonic-gate KCAGE_STAT_INC_SCAN_INDEX; 18527c478bd9Sstevel@tonic-gate } 18537c478bd9Sstevel@tonic-gate 18547c478bd9Sstevel@tonic-gate mutex_exit(&kcage_cageout_mutex); 18557c478bd9Sstevel@tonic-gate } 18567c478bd9Sstevel@tonic-gate /* else, kernel cage thread is already running */ 18577c478bd9Sstevel@tonic-gate } 18587c478bd9Sstevel@tonic-gate 18597c478bd9Sstevel@tonic-gate void 18607c478bd9Sstevel@tonic-gate kcage_tick() 18617c478bd9Sstevel@tonic-gate { 18627c478bd9Sstevel@tonic-gate /* 18637c478bd9Sstevel@tonic-gate * Once per second we wake up all the threads throttled 18647c478bd9Sstevel@tonic-gate * waiting for cage memory, in case we've become stuck 18657c478bd9Sstevel@tonic-gate * and haven't made forward progress expanding the cage. 18667c478bd9Sstevel@tonic-gate */ 18677c478bd9Sstevel@tonic-gate if (kcage_on && kcage_cageout_ready) 18687c478bd9Sstevel@tonic-gate cv_broadcast(&kcage_throttle_cv); 18697c478bd9Sstevel@tonic-gate } 1870