/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License, Version 1.0 only * (the "License"). You may not use this file except in compliance * with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2005 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Static routines */ static void sfmmu_map_prom_mappings(struct translation *, size_t); static struct translation *read_prom_mappings(size_t *); static void sfmmu_reloc_trap_handler(void *, void *, size_t); /* * External routines */ extern void sfmmu_remap_kernel(void); extern void sfmmu_patch_utsb(void); /* * Global Data: */ extern caddr_t textva, datava; extern tte_t ktext_tte, kdata_tte; /* ttes for kernel text and data */ extern int enable_bigktsb; uint64_t memsegspa = (uintptr_t)MSEG_NULLPTR_PA; /* memsegs physical linkage */ uint64_t memseg_phash[N_MEM_SLOTS]; /* use physical memseg addresses */ int sfmmu_kern_mapped = 0; /* * DMMU primary context register for the kernel context. Machine specific code * inserts correct page size codes when necessary */ uint64_t kcontextreg = KCONTEXT; /* Extern Global Data */ extern int page_relocate_ready; /* * Controls the logic which enables the use of the * QUAD_LDD_PHYS ASI for TSB accesses. */ extern int ktsb_phys; /* * Global Routines called from within: * usr/src/uts/sun4u * usr/src/uts/sfmmu * usr/src/uts/sun */ pfn_t va_to_pfn(void *vaddr) { u_longlong_t physaddr; int mode, valid; if (tba_taken_over) return (hat_getpfnum(kas.a_hat, (caddr_t)vaddr)); if ((prom_translate_virt(vaddr, &valid, &physaddr, &mode) != -1) && (valid == -1)) { return ((pfn_t)(physaddr >> MMU_PAGESHIFT)); } return (PFN_INVALID); } uint64_t va_to_pa(void *vaddr) { pfn_t pfn; if ((pfn = va_to_pfn(vaddr)) == PFN_INVALID) return ((uint64_t)-1); return (((uint64_t)pfn << MMU_PAGESHIFT) | ((uint64_t)vaddr & MMU_PAGEOFFSET)); } void hat_kern_setup(void) { struct translation *trans_root; size_t ntrans_root; extern void startup_fixup_physavail(void); /* * These are the steps we take to take over the mmu from the prom. * * (1) Read the prom's mappings through the translation property. * (2) Remap the kernel text and kernel data with 2 locked 4MB ttes. * Create the the hmeblks for these 2 ttes at this time. * (3) Create hat structures for all other prom mappings. Since the * kernel text and data hme_blks have already been created we * skip the equivalent prom's mappings. * (4) Initialize the tsb and its corresponding hardware regs. * (5) Take over the trap table (currently in startup). * (6) Up to this point it is possible the prom required some of its * locked tte's. Now that we own the trap table we remove them. */ ktsb_pbase = va_to_pa(ktsb_base); ktsb4m_pbase = va_to_pa(ktsb4m_base); PRM_DEBUG(ktsb_pbase); PRM_DEBUG(ktsb4m_pbase); sfmmu_setup_4lp(); sfmmu_patch_ktsb(); sfmmu_patch_utsb(); sfmmu_patch_mmu_asi(ktsb_phys); sfmmu_init_tsbs(); if (kpm_enable) { sfmmu_kpm_patch_tlbm(); if (kpm_smallpages == 0) { sfmmu_kpm_patch_tsbm(); } } /* * The 8K-indexed kernel TSB space is used to hold * translations below... */ trans_root = read_prom_mappings(&ntrans_root); sfmmu_remap_kernel(); startup_fixup_physavail(); mmu_init_kernel_pgsz(kas.a_hat); sfmmu_map_prom_mappings(trans_root, ntrans_root); /* * We invalidate 8K kernel TSB because we used it in * sfmmu_map_prom_mappings() */ sfmmu_inv_tsb(ktsb_base, ktsb_sz); sfmmu_inv_tsb(ktsb4m_base, ktsb4m_sz); sfmmu_init_ktsbinfo(); sfmmu_kern_mapped = 1; /* * hments have been created for mapped pages, and thus we're ready * for kmdb to start using its own trap table. It walks the hments * to resolve TLB misses, and can't be used until they're ready. */ if (boothowto & RB_DEBUG) kdi_dvec_vmready(); } /* * Macro used below to convert the prom's 32-bit high and low fields into * a value appropriate for the 64-bit kernel. */ #define COMBINE(hi, lo) (((uint64_t)(uint32_t)(hi) << 32) | (uint32_t)(lo)) /* * This function traverses the prom mapping list and creates equivalent * mappings in the sfmmu mapping hash. */ static void sfmmu_map_prom_mappings(struct translation *trans_root, size_t ntrans_root) { struct translation *promt; tte_t tte, oldtte, *ttep; pfn_t pfn, oldpfn, basepfn; caddr_t vaddr; size_t size, offset; unsigned long i; uint_t attr; page_t *pp; extern struct memlist *virt_avail; ttep = &tte; for (i = 0, promt = trans_root; i < ntrans_root; i++, promt++) { ASSERT(promt->tte_hi != 0); ASSERT32(promt->virt_hi == 0 && promt->size_hi == 0); /* * hack until we get rid of map-for-unix */ if (COMBINE(promt->virt_hi, promt->virt_lo) < KERNELBASE) continue; ttep->tte_inthi = promt->tte_hi; ttep->tte_intlo = promt->tte_lo; attr = PROC_DATA | HAT_NOSYNC; #if defined(TTE_IS_GLOBAL) if (TTE_IS_GLOBAL(ttep)) { /* * The prom better not use global translations * because a user process might use the same * virtual addresses */ cmn_err(CE_PANIC, "map_prom: global translation"); TTE_SET_LOFLAGS(ttep, TTE_GLB_INT, 0); } #endif if (TTE_IS_LOCKED(ttep)) { /* clear the lock bits */ TTE_CLR_LOCKED(ttep); } attr |= (TTE_IS_VCACHEABLE(ttep)) ? 0 : SFMMU_UNCACHEVTTE; attr |= (TTE_IS_PCACHEABLE(ttep)) ? 0 : SFMMU_UNCACHEPTTE; attr |= (TTE_IS_SIDEFFECT(ttep)) ? SFMMU_SIDEFFECT : 0; attr |= (TTE_IS_IE(ttep)) ? HAT_STRUCTURE_LE : 0; size = COMBINE(promt->size_hi, promt->size_lo); offset = 0; basepfn = TTE_TO_PFN((caddr_t)COMBINE(promt->virt_hi, promt->virt_lo), ttep); while (size) { vaddr = (caddr_t)(COMBINE(promt->virt_hi, promt->virt_lo) + offset); /* * make sure address is not in virt-avail list */ if (address_in_memlist(virt_avail, (uint64_t)vaddr, size)) { cmn_err(CE_PANIC, "map_prom: inconsistent " "translation/avail lists"); } pfn = basepfn + mmu_btop(offset); if (pf_is_memory(pfn)) { if (attr & SFMMU_UNCACHEPTTE) { cmn_err(CE_PANIC, "map_prom: " "uncached prom memory page"); } } else { if (!(attr & SFMMU_SIDEFFECT)) { cmn_err(CE_PANIC, "map_prom: prom " "i/o page without side-effect"); } } oldpfn = sfmmu_vatopfn(vaddr, KHATID, &oldtte); ASSERT(oldpfn != PFN_SUSPENDED); ASSERT(page_relocate_ready == 0); if (oldpfn != PFN_INVALID) { /* * mapping already exists. * Verify they are equal */ if (pfn != oldpfn) { cmn_err(CE_PANIC, "map_prom: mapping " "conflict (va=0x%p pfn=%p, " "oldpfn=%p)", (void *)vaddr, (void *)pfn, (void *)oldpfn); } size -= MMU_PAGESIZE; offset += MMU_PAGESIZE; continue; } pp = page_numtopp_nolock(pfn); if ((pp != NULL) && PP_ISFREE((page_t *)pp)) { cmn_err(CE_PANIC, "map_prom: " "prom-mapped page (va 0x%p, pfn 0x%p) " "on free list", (void *)vaddr, (void *)pfn); } sfmmu_memtte(ttep, pfn, attr, TTE8K); sfmmu_tteload(kas.a_hat, ttep, vaddr, pp, HAT_LOAD_LOCK | SFMMU_NO_TSBLOAD); size -= MMU_PAGESIZE; offset += MMU_PAGESIZE; } } } #undef COMBINE /* local to previous routine */ /* * This routine reads in the "translations" property in to a buffer and * returns a pointer to this buffer and the number of translations. */ static struct translation * read_prom_mappings(size_t *ntransrootp) { char *prop = "translations"; size_t translen; dnode_t node; struct translation *transroot; /* * the "translations" property is associated with the mmu node */ node = (dnode_t)prom_getphandle(prom_mmu_ihandle()); /* * We use the TSB space to read in the prom mappings. This space * is currently not being used because we haven't taken over the * trap table yet. It should be big enough to hold the mappings. */ if ((translen = prom_getproplen(node, prop)) == -1) cmn_err(CE_PANIC, "no translations property"); *ntransrootp = translen / sizeof (*transroot); translen = roundup(translen, MMU_PAGESIZE); PRM_DEBUG(translen); if (translen > TSB_BYTES(ktsb_szcode)) cmn_err(CE_PANIC, "not enough space for translations"); transroot = (struct translation *)ktsb_base; ASSERT(transroot); if (prom_getprop(node, prop, (caddr_t)transroot) == -1) { cmn_err(CE_PANIC, "translations getprop failed"); } return (transroot); } /* * Init routine of the nucleus data memory allocator. * * The nucleus data memory allocator is organized in ecache_alignsize'd * memory chunks. Memory allocated by ndata_alloc() will never be freed. * * The ndata argument is used as header of the ndata freelist. * Other freelist nodes are placed in the nucleus memory itself * at the beginning of a free memory chunk. Therefore a freelist * node (struct memlist) must fit into the smallest allocatable * memory chunk (ecache_alignsize bytes). * * The memory interval [base, end] passed to ndata_alloc_init() must be * bzero'd to allow the allocator to return bzero'd memory easily. */ void ndata_alloc_init(struct memlist *ndata, uintptr_t base, uintptr_t end) { ASSERT(sizeof (struct memlist) <= ecache_alignsize); base = roundup(base, ecache_alignsize); end = end - end % ecache_alignsize; ASSERT(base < end); ndata->address = base; ndata->size = end - base; ndata->next = NULL; ndata->prev = NULL; } /* * Deliver the size of the largest free memory chunk. */ size_t ndata_maxsize(struct memlist *ndata) { size_t chunksize = ndata->size; while ((ndata = ndata->next) != NULL) { if (chunksize < ndata->size) chunksize = ndata->size; } return (chunksize); } /* * This is a special function to figure out if the memory chunk needed * for the page structs can fit in the nucleus or not. If it fits the * function calculates and returns the possible remaining ndata size * in the last element if the size needed for page structs would be * allocated from the nucleus. */ size_t ndata_spare(struct memlist *ndata, size_t wanted, size_t alignment) { struct memlist *frlist; uintptr_t base; uintptr_t end; for (frlist = ndata; frlist != NULL; frlist = frlist->next) { base = roundup(frlist->address, alignment); end = roundup(base + wanted, ecache_alignsize); if (end <= frlist->address + frlist->size) { if (frlist->next == NULL) return (frlist->address + frlist->size - end); while (frlist->next != NULL) frlist = frlist->next; return (frlist->size); } } return (0); } /* * Allocate the last properly aligned memory chunk. * This function is called when no more large nucleus memory chunks * will be allocated. The remaining free nucleus memory at the end * of the nucleus can be added to the phys_avail list. */ void * ndata_extra_base(struct memlist *ndata, size_t alignment) { uintptr_t base; size_t wasteage = 0; #ifdef DEBUG static int called = 0; if (called++ > 0) cmn_err(CE_PANIC, "ndata_extra_base() called more than once"); #endif /* DEBUG */ /* * The alignment needs to be a multiple of ecache_alignsize. */ ASSERT((alignment % ecache_alignsize) == 0); while (ndata->next != NULL) { wasteage += ndata->size; ndata = ndata->next; } base = roundup(ndata->address, alignment); if (base >= ndata->address + ndata->size) return (NULL); if (base == ndata->address) { if (ndata->prev != NULL) ndata->prev->next = NULL; else ndata->size = 0; bzero((void *)base, sizeof (struct memlist)); } else { ndata->size = base - ndata->address; wasteage += ndata->size; } PRM_DEBUG(wasteage); return ((void *)base); } /* * Select the best matching buffer, avoid memory fragmentation. */ static struct memlist * ndata_select_chunk(struct memlist *ndata, size_t wanted, size_t alignment) { struct memlist *fnd_below = NULL; struct memlist *fnd_above = NULL; struct memlist *fnd_unused = NULL; struct memlist *frlist; uintptr_t base; uintptr_t end; size_t below; size_t above; size_t unused; size_t best_below = ULONG_MAX; size_t best_above = ULONG_MAX; size_t best_unused = ULONG_MAX; ASSERT(ndata != NULL); /* * Look for the best matching buffer, avoid memory fragmentation. * The following strategy is used, try to find * 1. an exact fitting buffer * 2. avoid wasting any space below the buffer, take first * fitting buffer * 3. avoid wasting any space above the buffer, take first * fitting buffer * 4. avoid wasting space, take first fitting buffer * 5. take the last buffer in chain */ for (frlist = ndata; frlist != NULL; frlist = frlist->next) { base = roundup(frlist->address, alignment); end = roundup(base + wanted, ecache_alignsize); if (end > frlist->address + frlist->size) continue; below = (base - frlist->address) / ecache_alignsize; above = (frlist->address + frlist->size - end) / ecache_alignsize; unused = below + above; if (unused == 0) return (frlist); if (frlist->next == NULL) break; if (below < best_below) { best_below = below; fnd_below = frlist; } if (above < best_above) { best_above = above; fnd_above = frlist; } if (unused < best_unused) { best_unused = unused; fnd_unused = frlist; } } if (best_below == 0) return (fnd_below); if (best_above == 0) return (fnd_above); if (best_unused < ULONG_MAX) return (fnd_unused); return (frlist); } /* * Nucleus data memory allocator. * The granularity of the allocator is ecache_alignsize. * See also comment for ndata_alloc_init(). */ void * ndata_alloc(struct memlist *ndata, size_t wanted, size_t alignment) { struct memlist *found; struct memlist *fnd_above; uintptr_t base; uintptr_t end; size_t below; size_t above; /* * Look for the best matching buffer, avoid memory fragmentation. */ if ((found = ndata_select_chunk(ndata, wanted, alignment)) == NULL) return (NULL); /* * Allocate the nucleus data buffer. */ base = roundup(found->address, alignment); end = roundup(base + wanted, ecache_alignsize); ASSERT(end <= found->address + found->size); below = base - found->address; above = found->address + found->size - end; ASSERT(above == 0 || (above % ecache_alignsize) == 0); if (below >= ecache_alignsize) { /* * There is free memory below the allocated memory chunk. */ found->size = below - below % ecache_alignsize; if (above) { fnd_above = (struct memlist *)end; fnd_above->address = end; fnd_above->size = above; if ((fnd_above->next = found->next) != NULL) found->next->prev = fnd_above; fnd_above->prev = found; found->next = fnd_above; } return ((void *)base); } if (found->prev == NULL) { /* * The first chunk (ndata) is selected. */ ASSERT(found == ndata); if (above) { found->address = end; found->size = above; } else if (found->next != NULL) { found->address = found->next->address; found->size = found->next->size; if ((found->next = found->next->next) != NULL) found->next->prev = found; bzero((void *)found->address, sizeof (struct memlist)); } else { found->address = end; found->size = 0; } return ((void *)base); } /* * Not the first chunk. */ if (above) { fnd_above = (struct memlist *)end; fnd_above->address = end; fnd_above->size = above; if ((fnd_above->next = found->next) != NULL) fnd_above->next->prev = fnd_above; fnd_above->prev = found->prev; found->prev->next = fnd_above; } else { if ((found->prev->next = found->next) != NULL) found->next->prev = found->prev; } bzero((void *)found->address, sizeof (struct memlist)); return ((void *)base); } /* * Size the kernel TSBs based upon the amount of physical * memory in the system. */ static void calc_tsb_sizes(pgcnt_t npages) { PRM_DEBUG(npages); if (npages <= TSB_FREEMEM_MIN) { ktsb_szcode = TSB_128K_SZCODE; enable_bigktsb = 0; } else if (npages <= TSB_FREEMEM_LARGE / 2) { ktsb_szcode = TSB_256K_SZCODE; enable_bigktsb = 0; } else if (npages <= TSB_FREEMEM_LARGE) { ktsb_szcode = TSB_512K_SZCODE; enable_bigktsb = 0; } else if (npages <= TSB_FREEMEM_LARGE * 2 || enable_bigktsb == 0) { ktsb_szcode = TSB_1M_SZCODE; enable_bigktsb = 0; } else { ktsb_szcode = highbit(npages - 1); ktsb_szcode -= TSB_START_SIZE; ktsb_szcode = MAX(ktsb_szcode, MIN_BIGKTSB_SZCODE); ktsb_szcode = MIN(ktsb_szcode, MAX_BIGKTSB_SZCODE); } /* * We choose the TSB to hold kernel 4M mappings to have twice * the reach as the primary kernel TSB since this TSB will * potentially (currently) be shared by both mappings to all of * physical memory plus user TSBs. Since the current * limit on primary kernel TSB size is 16MB this will top out * at 64K which we can certainly afford. */ ktsb4m_szcode = ktsb_szcode - (MMU_PAGESHIFT4M - MMU_PAGESHIFT) + 1; if (ktsb4m_szcode < TSB_MIN_SZCODE) ktsb4m_szcode = TSB_MIN_SZCODE; ktsb_sz = TSB_BYTES(ktsb_szcode); /* kernel 8K tsb size */ ktsb4m_sz = TSB_BYTES(ktsb4m_szcode); /* kernel 4M tsb size */ } /* * Allocate kernel TSBs from nucleus data memory. * The function return 0 on success and -1 on failure. */ int ndata_alloc_tsbs(struct memlist *ndata, pgcnt_t npages) { /* * Size the kernel TSBs based upon the amount of physical * memory in the system. */ calc_tsb_sizes(npages); /* * Allocate the 8K kernel TSB if it belongs inside the nucleus. */ if (enable_bigktsb == 0) { if ((ktsb_base = ndata_alloc(ndata, ktsb_sz, ktsb_sz)) == NULL) return (-1); ASSERT(!((uintptr_t)ktsb_base & (ktsb_sz - 1))); PRM_DEBUG(ktsb_base); PRM_DEBUG(ktsb_sz); PRM_DEBUG(ktsb_szcode); } /* * Next, allocate 4M kernel TSB from the nucleus since it's small. */ if ((ktsb4m_base = ndata_alloc(ndata, ktsb4m_sz, ktsb4m_sz)) == NULL) return (-1); ASSERT(!((uintptr_t)ktsb4m_base & (ktsb4m_sz - 1))); PRM_DEBUG(ktsb4m_base); PRM_DEBUG(ktsb4m_sz); PRM_DEBUG(ktsb4m_szcode); return (0); } /* * Allocate hat structs from the nucleus data memory. */ int ndata_alloc_hat(struct memlist *ndata, pgcnt_t npages, pgcnt_t kpm_npages) { size_t ctx_sz; size_t mml_alloc_sz; size_t cb_alloc_sz; int max_nucuhme_buckets = MAX_NUCUHME_BUCKETS; int max_nuckhme_buckets = MAX_NUCKHME_BUCKETS; ulong_t hme_buckets; if (enable_bigktsb) { ASSERT((max_nucuhme_buckets + max_nuckhme_buckets) * sizeof (struct hmehash_bucket) <= TSB_BYTES(TSB_1M_SZCODE)); max_nucuhme_buckets *= 2; max_nuckhme_buckets *= 2; } /* * Allocate ctx structures * * based on v_proc to calculate how many ctx structures * is not possible; * use whatever module_setup() assigned to nctxs */ PRM_DEBUG(nctxs); ctx_sz = nctxs * sizeof (struct ctx); if ((ctxs = ndata_alloc(ndata, ctx_sz, sizeof (struct ctx))) == NULL) return (-1); PRM_DEBUG(ctxs); /* * The number of buckets in the hme hash tables * is a power of 2 such that the average hash chain length is * HMENT_HASHAVELEN. The number of buckets for the user hash is * a function of physical memory and a predefined overmapping factor. * The number of buckets for the kernel hash is a function of * physical memory only. */ hme_buckets = (npages * HMEHASH_FACTOR) / (HMENT_HASHAVELEN * (HMEBLK_SPAN(TTE8K) >> MMU_PAGESHIFT)); uhmehash_num = (int)MIN(hme_buckets, MAX_UHME_BUCKETS); if (uhmehash_num > USER_BUCKETS_THRESHOLD) { /* * if uhmehash_num is not power of 2 round it down to the * next power of 2. */ uint_t align = 1 << (highbit(uhmehash_num - 1) - 1); uhmehash_num = P2ALIGN(uhmehash_num, align); } else uhmehash_num = 1 << highbit(uhmehash_num - 1); hme_buckets = npages / (HMEBLK_SPAN(TTE8K) >> MMU_PAGESHIFT); khmehash_num = (int)MIN(hme_buckets, MAX_KHME_BUCKETS); khmehash_num = 1 << highbit(khmehash_num - 1); khmehash_num = MAX(khmehash_num, MIN_KHME_BUCKETS); if ((khmehash_num > max_nuckhme_buckets) || (uhmehash_num > max_nucuhme_buckets)) { khme_hash = NULL; uhme_hash = NULL; } else { size_t hmehash_sz = (uhmehash_num + khmehash_num) * sizeof (struct hmehash_bucket); if ((khme_hash = ndata_alloc(ndata, hmehash_sz, ecache_alignsize)) != NULL) uhme_hash = &khme_hash[khmehash_num]; else uhme_hash = NULL; PRM_DEBUG(hmehash_sz); } PRM_DEBUG(khme_hash); PRM_DEBUG(khmehash_num); PRM_DEBUG(uhme_hash); PRM_DEBUG(uhmehash_num); /* * For the page mapping list mutex array we allocate one mutex * for every 128 pages (1 MB) with a minimum of 64 entries and * a maximum of 8K entries. For the initial computation npages * is rounded up (ie. 1 << highbit(npages * 1.5 / 128)) * * mml_shift is roughly log2(mml_table_sz) + 3 for MLIST_HASH * * It is not required that this be allocated from the nucleus, * but it is desirable. So we first allocate from the nucleus * everything that must be there. Having done so, if mml_table * will fit within what remains of the nucleus then it will be * allocated here. If not, set mml_table to NULL, which will cause * startup_memlist() to BOP_ALLOC() space for it after our return... */ mml_table_sz = 1 << highbit((npages * 3) / 256); if (mml_table_sz < 64) mml_table_sz = 64; else if (mml_table_sz > 8192) mml_table_sz = 8192; mml_shift = highbit(mml_table_sz) + 3; PRM_DEBUG(mml_table_sz); PRM_DEBUG(mml_shift); mml_alloc_sz = mml_table_sz * sizeof (kmutex_t); mml_table = ndata_alloc(ndata, mml_alloc_sz, ecache_alignsize); PRM_DEBUG(mml_table); cb_alloc_sz = sfmmu_max_cb_id * sizeof (struct sfmmu_callback); PRM_DEBUG(cb_alloc_sz); sfmmu_cb_table = ndata_alloc(ndata, cb_alloc_sz, ecache_alignsize); PRM_DEBUG(sfmmu_cb_table); /* * For the kpm_page mutex array we allocate one mutex every 16 * kpm pages (64MB). In smallpage mode we allocate one mutex * every 8K pages. The minimum is set to 64 entries and the * maximum to 8K entries. * * It is not required that this be allocated from the nucleus, * but it is desirable. So we first allocate from the nucleus * everything that must be there. Having done so, if kpmp_table * or kpmp_stable will fit within what remains of the nucleus * then it will be allocated here. If not, startup_memlist() * will use BOP_ALLOC() space for it after our return... */ if (kpm_enable) { size_t kpmp_alloc_sz; if (kpm_smallpages == 0) { kpmp_shift = highbit(sizeof (kpm_page_t)) - 1; kpmp_table_sz = 1 << highbit(kpm_npages / 16); kpmp_table_sz = (kpmp_table_sz < 64) ? 64 : ((kpmp_table_sz > 8192) ? 8192 : kpmp_table_sz); kpmp_alloc_sz = kpmp_table_sz * sizeof (kpm_hlk_t); kpmp_table = ndata_alloc(ndata, kpmp_alloc_sz, ecache_alignsize); PRM_DEBUG(kpmp_table); PRM_DEBUG(kpmp_table_sz); kpmp_stable_sz = 0; kpmp_stable = NULL; } else { ASSERT(kpm_pgsz == PAGESIZE); kpmp_shift = highbit(sizeof (kpm_shlk_t)) + 1; kpmp_stable_sz = 1 << highbit(kpm_npages / 8192); kpmp_stable_sz = (kpmp_stable_sz < 64) ? 64 : ((kpmp_stable_sz > 8192) ? 8192 : kpmp_stable_sz); kpmp_alloc_sz = kpmp_stable_sz * sizeof (kpm_shlk_t); kpmp_stable = ndata_alloc(ndata, kpmp_alloc_sz, ecache_alignsize); PRM_DEBUG(kpmp_stable); PRM_DEBUG(kpmp_stable_sz); kpmp_table_sz = 0; kpmp_table = NULL; } PRM_DEBUG(kpmp_shift); } return (0); } caddr_t alloc_hme_buckets(caddr_t base, int pagesize) { size_t hmehash_sz = (uhmehash_num + khmehash_num) * sizeof (struct hmehash_bucket); ASSERT(khme_hash == NULL); ASSERT(uhme_hash == NULL); /* If no pagesize specified, use default MMU pagesize */ if (!pagesize) pagesize = MMU_PAGESIZE; /* * If we start aligned and ask for a multiple of a pagesize, and OBP * supports large pages, we will then use mappings of the largest size * possible for the BOP_ALLOC, possibly saving us tens of thousands of * TLB miss-induced traversals of the TSBs and/or the HME hashes... */ base = (caddr_t)roundup((uintptr_t)base, pagesize); hmehash_sz = roundup(hmehash_sz, pagesize); khme_hash = (struct hmehash_bucket *)BOP_ALLOC(bootops, base, hmehash_sz, pagesize); if ((caddr_t)khme_hash != base) cmn_err(CE_PANIC, "Cannot bop_alloc hme hash buckets."); uhme_hash = (struct hmehash_bucket *)((caddr_t)khme_hash + khmehash_num * sizeof (struct hmehash_bucket)); base += hmehash_sz; return (base); } /* * This function bop allocs the kernel TSB. */ caddr_t sfmmu_ktsb_alloc(caddr_t tsbbase) { caddr_t vaddr; if (enable_bigktsb) { ktsb_base = (caddr_t)roundup((uintptr_t)tsbbase, ktsb_sz); vaddr = (caddr_t)BOP_ALLOC(bootops, ktsb_base, ktsb_sz, ktsb_sz); if (vaddr != ktsb_base) cmn_err(CE_PANIC, "sfmmu_ktsb_alloc: can't alloc" " bigktsb"); ktsb_base = vaddr; tsbbase = ktsb_base + ktsb_sz; PRM_DEBUG(ktsb_base); PRM_DEBUG(tsbbase); } return (tsbbase); } /* * Moves code assembled outside of the trap table into the trap * table taking care to relocate relative branches to code outside * of the trap handler. */ static void sfmmu_reloc_trap_handler(void *tablep, void *start, size_t count) { size_t i; uint32_t *src; uint32_t *dst; uint32_t inst; int op, op2; int32_t offset; int disp; src = start; dst = tablep; offset = src - dst; for (src = start, i = 0; i < count; i++, src++, dst++) { inst = *dst = *src; op = (inst >> 30) & 0x2; if (op == 1) { /* call */ disp = ((int32_t)inst << 2) >> 2; /* sign-extend */ if (disp + i >= 0 && disp + i < count) continue; disp += offset; inst = 0x40000000u | (disp & 0x3fffffffu); *dst = inst; } else if (op == 0) { /* branch or sethi */ op2 = (inst >> 22) & 0x7; switch (op2) { case 0x3: /* BPr */ disp = (((inst >> 20) & 0x3) << 14) | (inst & 0x3fff); disp = (disp << 16) >> 16; /* sign-extend */ if (disp + i >= 0 && disp + i < count) continue; disp += offset; if (((disp << 16) >> 16) != disp) cmn_err(CE_PANIC, "bad reloc"); inst &= ~0x303fff; inst |= (disp & 0x3fff); inst |= (disp & 0xc000) << 6; break; case 0x2: /* Bicc */ disp = ((int32_t)inst << 10) >> 10; if (disp + i >= 0 && disp + i < count) continue; disp += offset; if (((disp << 10) >> 10) != disp) cmn_err(CE_PANIC, "bad reloc"); inst &= ~0x3fffff; inst |= (disp & 0x3fffff); break; case 0x1: /* Bpcc */ disp = ((int32_t)inst << 13) >> 13; if (disp + i >= 0 && disp + i < count) continue; disp += offset; if (((disp << 13) >> 13) != disp) cmn_err(CE_PANIC, "bad reloc"); inst &= ~0x7ffff; inst |= (disp & 0x7ffffu); break; } *dst = inst; } } flush_instr_mem(tablep, count * sizeof (uint32_t)); } /* * Routine to allocate a large page to use in the TSB caches. */ /*ARGSUSED*/ static page_t * sfmmu_tsb_page_create(void *addr, size_t size, int vmflag, void *arg) { int pgflags; pgflags = PG_EXCL; if ((vmflag & VM_NOSLEEP) == 0) pgflags |= PG_WAIT; if (vmflag & VM_PANIC) pgflags |= PG_PANIC; if (vmflag & VM_PUSHPAGE) pgflags |= PG_PUSHPAGE; return (page_create_va_large(&kvp, (u_offset_t)(uintptr_t)addr, size, pgflags, &kvseg, addr, arg)); } /* * Allocate a large page to back the virtual address range * [addr, addr + size). If addr is NULL, allocate the virtual address * space as well. */ static void * sfmmu_tsb_xalloc(vmem_t *vmp, void *inaddr, size_t size, int vmflag, uint_t attr, page_t *(*page_create_func)(void *, size_t, int, void *), void *pcarg) { page_t *ppl; page_t *rootpp; caddr_t addr = inaddr; pgcnt_t npages = btopr(size); page_t **ppa; int i = 0; /* * Assuming that only TSBs will call this with size > PAGESIZE * There is no reason why this couldn't be expanded to 8k pages as * well, or other page sizes in the future .... but for now, we * only support fixed sized page requests. */ if ((inaddr == NULL) && ((addr = vmem_xalloc(vmp, size, size, 0, 0, NULL, NULL, vmflag)) == NULL)) return (NULL); /* If we ever don't want TSB slab-sized pages, this will panic */ ASSERT(((uintptr_t)addr & (tsb_slab_size - 1)) == 0); if (page_resv(npages, vmflag & VM_KMFLAGS) == 0) { if (inaddr == NULL) vmem_xfree(vmp, addr, size); return (NULL); } ppl = page_create_func(addr, size, vmflag, pcarg); if (ppl == NULL) { if (inaddr == NULL) vmem_xfree(vmp, addr, size); page_unresv(npages); return (NULL); } rootpp = ppl; ppa = kmem_zalloc(npages * sizeof (page_t *), KM_SLEEP); while (ppl != NULL) { page_t *pp = ppl; ppa[i++] = pp; page_sub(&ppl, pp); ASSERT(page_iolock_assert(pp)); page_io_unlock(pp); } /* * Load the locked entry. It's OK to preload the entry into * the TSB since we now support large mappings in the kernel TSB. */ hat_memload_array(kas.a_hat, (caddr_t)rootpp->p_offset, size, ppa, (PROT_ALL & ~PROT_USER) | HAT_NOSYNC | attr, HAT_LOAD_LOCK); for (--i; i >= 0; --i) { (void) page_pp_lock(ppa[i], 0, 1); page_unlock(ppa[i]); } kmem_free(ppa, npages * sizeof (page_t *)); return (addr); } /* Called to import new spans into the TSB vmem arenas */ void * sfmmu_tsb_segkmem_alloc(vmem_t *vmp, size_t size, int vmflag) { lgrp_id_t lgrpid = LGRP_NONE; if (tsb_lgrp_affinity) { /* * Search for the vmp->lgrpid mapping by brute force; * some day vmp will have an lgrp, until then we have * to do this the hard way. */ for (lgrpid = 0; lgrpid < NLGRPS_MAX && vmp != kmem_tsb_default_arena[lgrpid]; lgrpid++); if (lgrpid == NLGRPS_MAX) lgrpid = LGRP_NONE; } return (sfmmu_tsb_xalloc(vmp, NULL, size, vmflag, 0, sfmmu_tsb_page_create, lgrpid != LGRP_NONE? &lgrpid : NULL)); } /* Called to free spans from the TSB vmem arenas */ void sfmmu_tsb_segkmem_free(vmem_t *vmp, void *inaddr, size_t size) { page_t *pp; caddr_t addr = inaddr; caddr_t eaddr; pgcnt_t npages = btopr(size); pgcnt_t pgs_left = npages; page_t *rootpp = NULL; ASSERT(((uintptr_t)addr & (tsb_slab_size - 1)) == 0); hat_unload(kas.a_hat, addr, size, HAT_UNLOAD_UNLOCK); for (eaddr = addr + size; addr < eaddr; addr += PAGESIZE) { pp = page_lookup(&kvp, (u_offset_t)(uintptr_t)addr, SE_EXCL); if (pp == NULL) panic("sfmmu_tsb_segkmem_free: page not found"); ASSERT(PAGE_EXCL(pp)); page_pp_unlock(pp, 0, 1); if (rootpp == NULL) rootpp = pp; if (--pgs_left == 0) { /* * similar logic to segspt_free_pages, but we know we * have one large page. */ page_destroy_pages(rootpp); } } page_unresv(npages); if (vmp != NULL) vmem_xfree(vmp, inaddr, size); }