17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 57b93957cSeota * Common Development and Distribution License (the "License"). 67b93957cSeota * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 22*733cdf20Smrj * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 237c478bd9Sstevel@tonic-gate * Use is subject to license terms. 247c478bd9Sstevel@tonic-gate */ 257c478bd9Sstevel@tonic-gate 267c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 277c478bd9Sstevel@tonic-gate 287c478bd9Sstevel@tonic-gate /* 297c478bd9Sstevel@tonic-gate * PMEM - Direct mapping physical memory pages to userland process 307c478bd9Sstevel@tonic-gate * 317c478bd9Sstevel@tonic-gate * Provide functions used for directly (w/o occupying kernel virtual address 327c478bd9Sstevel@tonic-gate * space) allocating and exporting physical memory pages to userland. 337c478bd9Sstevel@tonic-gate */ 347c478bd9Sstevel@tonic-gate 357c478bd9Sstevel@tonic-gate #include <sys/types.h> 367c478bd9Sstevel@tonic-gate #include <sys/mutex.h> 377c478bd9Sstevel@tonic-gate #include <sys/sunddi.h> 387c478bd9Sstevel@tonic-gate #include <sys/ddidevmap.h> 397c478bd9Sstevel@tonic-gate #include <sys/vnode.h> 407c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 417c478bd9Sstevel@tonic-gate #include <vm/seg_dev.h> 427c478bd9Sstevel@tonic-gate #include <sys/pmem.h> 437c478bd9Sstevel@tonic-gate #include <vm/hat_i86.h> 447c478bd9Sstevel@tonic-gate #include <sys/task.h> 457c478bd9Sstevel@tonic-gate #include <sys/sdt.h> 467c478bd9Sstevel@tonic-gate 477c478bd9Sstevel@tonic-gate /* 487c478bd9Sstevel@tonic-gate * The routines in this file allocate memory which will be accessed through 497c478bd9Sstevel@tonic-gate * the AGP GART hardware. The GART is programmed with the PFNs for this 507c478bd9Sstevel@tonic-gate * memory, and the only mechanism for removing these entries is by an 517c478bd9Sstevel@tonic-gate * explicit process operation (ioctl/close of the driver, or process exit). 527c478bd9Sstevel@tonic-gate * As such, the pages need to remain locked to ensure that they won't be 537c478bd9Sstevel@tonic-gate * relocated or paged out. 547c478bd9Sstevel@tonic-gate * 557c478bd9Sstevel@tonic-gate * To prevent these locked pages from getting in the way of page 567c478bd9Sstevel@tonic-gate * coalescing, we try to allocate large pages from the system, and carve 577c478bd9Sstevel@tonic-gate * them up to satisfy pmem allocation requests. This will keep the locked 587c478bd9Sstevel@tonic-gate * pages within a constrained area of physical memory, limiting the number 597c478bd9Sstevel@tonic-gate * of large pages that would be pinned by our locked pages. This is, of 607c478bd9Sstevel@tonic-gate * course, another take on the infamous kernel cage, and it has many of the 617c478bd9Sstevel@tonic-gate * downsides of the original cage. It also interferes with system-wide 627c478bd9Sstevel@tonic-gate * resource management decisions, as it maintains its own pool of unused 637c478bd9Sstevel@tonic-gate * pages which can't be easily reclaimed and used during low-memory 647c478bd9Sstevel@tonic-gate * situations. 657c478bd9Sstevel@tonic-gate * 667c478bd9Sstevel@tonic-gate * The right solution is for pmem to register a callback that the VM system 677c478bd9Sstevel@tonic-gate * could call, which would temporarily remove any GART entries for pages 687c478bd9Sstevel@tonic-gate * that were being relocated. This would let us leave the pages unlocked, 697c478bd9Sstevel@tonic-gate * which would remove the need for using large pages, which would simplify 707c478bd9Sstevel@tonic-gate * this code a great deal. Unfortunately, the support for these callbacks 717c478bd9Sstevel@tonic-gate * only exists on some SPARC platforms right now. 727c478bd9Sstevel@tonic-gate * 737c478bd9Sstevel@tonic-gate * Note that this is the *only* reason that large pages are used here. The 747c478bd9Sstevel@tonic-gate * GART can't perform large-page translations, and the code appropriately 757c478bd9Sstevel@tonic-gate * falls back to using small pages if page_create_va_large() fails. 767c478bd9Sstevel@tonic-gate */ 777c478bd9Sstevel@tonic-gate 787c478bd9Sstevel@tonic-gate #define HOLD_DHP_LOCK(dhp) if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) \ 797c478bd9Sstevel@tonic-gate { mutex_enter(&dhp->dh_lock); } 807c478bd9Sstevel@tonic-gate 817c478bd9Sstevel@tonic-gate #define RELE_DHP_LOCK(dhp) if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) \ 827c478bd9Sstevel@tonic-gate { mutex_exit(&dhp->dh_lock); } 837c478bd9Sstevel@tonic-gate 847c478bd9Sstevel@tonic-gate #define FROM_LPG(pp) (pp->p_szc != 0) 857c478bd9Sstevel@tonic-gate #define PFIND(pp) (page_pptonum(pp) & (pmem_pgcnt - 1)) 867c478bd9Sstevel@tonic-gate 877c478bd9Sstevel@tonic-gate /* 887c478bd9Sstevel@tonic-gate * Structs and static variables used for pmem only. 897c478bd9Sstevel@tonic-gate */ 907c478bd9Sstevel@tonic-gate typedef struct pmem_lpg { 917c478bd9Sstevel@tonic-gate page_t *pl_pp; /* start pp */ 927c478bd9Sstevel@tonic-gate ulong_t *pl_bitmap; /* allocation status for each page */ 937c478bd9Sstevel@tonic-gate ushort_t pl_pfree; /* this large page might be fully freed */ 947c478bd9Sstevel@tonic-gate struct pmem_lpg *pl_next; 957c478bd9Sstevel@tonic-gate struct pmem_lpg *pl_prev; 967c478bd9Sstevel@tonic-gate } pmem_lpg_t; 977c478bd9Sstevel@tonic-gate 987c478bd9Sstevel@tonic-gate static size_t pmem_lpgsize; /* the size of one large page */ 997c478bd9Sstevel@tonic-gate static pgcnt_t pmem_pgcnt; /* the number of small pages in a large page */ 1007c478bd9Sstevel@tonic-gate static uint_t pmem_lszc; /* page size code of the large page */ 1017c478bd9Sstevel@tonic-gate /* The segment to be associated with all the allocated pages. */ 1027c478bd9Sstevel@tonic-gate static struct seg pmem_seg; 1037c478bd9Sstevel@tonic-gate /* Fully occupied large pages allocated for pmem. */ 1047c478bd9Sstevel@tonic-gate static pmem_lpg_t *pmem_occ_lpgs; 1057c478bd9Sstevel@tonic-gate /* Memory pool to store residual small pages from large pages. */ 1067c478bd9Sstevel@tonic-gate static page_t *pmem_mpool = NULL; 1077c478bd9Sstevel@tonic-gate /* Number of small pages reside in pmem_mpool currently. */ 1087c478bd9Sstevel@tonic-gate static pgcnt_t pmem_nmpages = 0; 1097c478bd9Sstevel@tonic-gate /* To protect pmem_nmpages, pmem_mpool and pmem_occ_lpgs. */ 1107c478bd9Sstevel@tonic-gate kmutex_t pmem_mutex; 1117c478bd9Sstevel@tonic-gate 1127c478bd9Sstevel@tonic-gate static int lpg_isfree(pmem_lpg_t *); 1137c478bd9Sstevel@tonic-gate static void pmem_lpg_sub(pmem_lpg_t **, pmem_lpg_t *); 1147c478bd9Sstevel@tonic-gate static void pmem_lpg_concat(pmem_lpg_t **, pmem_lpg_t **); 1157c478bd9Sstevel@tonic-gate static pmem_lpg_t *pmem_lpg_get(pmem_lpg_t *, page_t *, pmem_lpg_t **); 1167c478bd9Sstevel@tonic-gate static pmem_lpg_t *pmem_lpg_alloc(uint_t); 1177c478bd9Sstevel@tonic-gate static void pmem_lpg_free(pmem_lpg_t **, pmem_lpg_t *); 1187c478bd9Sstevel@tonic-gate static void lpg_free(page_t *spp); 1197c478bd9Sstevel@tonic-gate static pgcnt_t mpool_break(page_t **, pgcnt_t); 1207c478bd9Sstevel@tonic-gate static void mpool_append(page_t **, pgcnt_t); 1217c478bd9Sstevel@tonic-gate static void lpp_break(page_t **, pgcnt_t, pgcnt_t, pmem_lpg_t *); 1227c478bd9Sstevel@tonic-gate static void lpp_free(page_t *, pgcnt_t, pmem_lpg_t **); 1237c478bd9Sstevel@tonic-gate static int lpp_create(page_t **, pgcnt_t, pgcnt_t *, pmem_lpg_t **, 1247c478bd9Sstevel@tonic-gate vnode_t *, u_offset_t *, uint_t); 1257c478bd9Sstevel@tonic-gate static void tlist_in(page_t *, pgcnt_t, vnode_t *, u_offset_t *); 1267c478bd9Sstevel@tonic-gate static void tlist_out(page_t *, pgcnt_t); 1277c478bd9Sstevel@tonic-gate static int pmem_cookie_alloc(struct devmap_pmem_cookie **, pgcnt_t, uint_t); 128c6939658Ssl108498 static int pmem_lock(pgcnt_t, proc_t *p); 1297c478bd9Sstevel@tonic-gate 1307c478bd9Sstevel@tonic-gate /* 1317c478bd9Sstevel@tonic-gate * Called by driver devmap routine to pass physical memory mapping info to 1327c478bd9Sstevel@tonic-gate * seg_dev framework, used only for physical memory allocated from 1337c478bd9Sstevel@tonic-gate * devmap_pmem_alloc(). 1347c478bd9Sstevel@tonic-gate */ 1357c478bd9Sstevel@tonic-gate /* ARGSUSED */ 1367c478bd9Sstevel@tonic-gate int 1377c478bd9Sstevel@tonic-gate devmap_pmem_setup(devmap_cookie_t dhc, dev_info_t *dip, 1387c478bd9Sstevel@tonic-gate struct devmap_callback_ctl *callbackops, devmap_pmem_cookie_t cookie, 1397c478bd9Sstevel@tonic-gate offset_t off, size_t len, uint_t maxprot, uint_t flags, 1407c478bd9Sstevel@tonic-gate ddi_device_acc_attr_t *accattrp) 1417c478bd9Sstevel@tonic-gate { 1427c478bd9Sstevel@tonic-gate devmap_handle_t *dhp = (devmap_handle_t *)dhc; 1437c478bd9Sstevel@tonic-gate struct devmap_pmem_cookie *pcp = (struct devmap_pmem_cookie *)cookie; 1447b93957cSeota uint_t cache_attr = IOMEM_CACHE_ATTR(flags); 1457c478bd9Sstevel@tonic-gate 1467c478bd9Sstevel@tonic-gate if (pcp == NULL || (off + len) > ptob(pcp->dp_npages)) 1477c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 1487c478bd9Sstevel@tonic-gate 1497c478bd9Sstevel@tonic-gate /* 1507c478bd9Sstevel@tonic-gate * First to check if this function has been called for this dhp. 1517c478bd9Sstevel@tonic-gate */ 1527c478bd9Sstevel@tonic-gate if (dhp->dh_flags & DEVMAP_SETUP_DONE) 1537c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 1547c478bd9Sstevel@tonic-gate 1557c478bd9Sstevel@tonic-gate if ((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) != dhp->dh_prot) 1567c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 1577c478bd9Sstevel@tonic-gate 1587b93957cSeota /* 1597b93957cSeota * Check if the cache attributes are supported. Need to pay 1607b93957cSeota * attention that only uncachable or write-combining is 1617b93957cSeota * permitted for pmem. 1627b93957cSeota */ 1637b93957cSeota if (i_ddi_check_cache_attr(flags) == B_FALSE || 1647b93957cSeota (cache_attr & (IOMEM_DATA_UNCACHED|IOMEM_DATA_UC_WR_COMBINE)) == 0) 1657b93957cSeota return (DDI_FAILURE); 1667b93957cSeota 1677c478bd9Sstevel@tonic-gate if (flags & DEVMAP_MAPPING_INVALID) { 1687c478bd9Sstevel@tonic-gate /* 1697c478bd9Sstevel@tonic-gate * If DEVMAP_MAPPING_INVALID is specified, we have to grant 1707c478bd9Sstevel@tonic-gate * remap permission. 1717c478bd9Sstevel@tonic-gate */ 1727c478bd9Sstevel@tonic-gate if (!(flags & DEVMAP_ALLOW_REMAP)) 1737c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 1747c478bd9Sstevel@tonic-gate } else { 1757c478bd9Sstevel@tonic-gate dhp->dh_pcookie = (devmap_pmem_cookie_t)pcp; 1767c478bd9Sstevel@tonic-gate /* dh_roff is the offset inside the dh_pcookie. */ 1777c478bd9Sstevel@tonic-gate dhp->dh_roff = ptob(btop(off)); 1787b93957cSeota /* Set the cache attributes correctly */ 1797b93957cSeota i_ddi_cacheattr_to_hatacc(cache_attr, &dhp->dh_hat_attr); 1807c478bd9Sstevel@tonic-gate } 1817c478bd9Sstevel@tonic-gate 1827c478bd9Sstevel@tonic-gate dhp->dh_cookie = DEVMAP_PMEM_COOKIE; 1837c478bd9Sstevel@tonic-gate dhp->dh_flags |= (flags & DEVMAP_SETUP_FLAGS); 1847c478bd9Sstevel@tonic-gate dhp->dh_len = ptob(btopr(len)); 1857c478bd9Sstevel@tonic-gate 1867c478bd9Sstevel@tonic-gate dhp->dh_maxprot = maxprot & dhp->dh_orig_maxprot; 1877c478bd9Sstevel@tonic-gate ASSERT((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) == dhp->dh_prot); 1887c478bd9Sstevel@tonic-gate 1897c478bd9Sstevel@tonic-gate if (callbackops != NULL) { 1907c478bd9Sstevel@tonic-gate bcopy(callbackops, &dhp->dh_callbackops, 1917c478bd9Sstevel@tonic-gate sizeof (struct devmap_callback_ctl)); 1927c478bd9Sstevel@tonic-gate } 1937c478bd9Sstevel@tonic-gate 1947c478bd9Sstevel@tonic-gate /* 1957c478bd9Sstevel@tonic-gate * Initialize dh_lock if we want to do remap. 1967c478bd9Sstevel@tonic-gate */ 1977c478bd9Sstevel@tonic-gate if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) { 1987c478bd9Sstevel@tonic-gate mutex_init(&dhp->dh_lock, NULL, MUTEX_DEFAULT, NULL); 1997c478bd9Sstevel@tonic-gate dhp->dh_flags |= DEVMAP_LOCK_INITED; 2007c478bd9Sstevel@tonic-gate } 2017c478bd9Sstevel@tonic-gate 2027c478bd9Sstevel@tonic-gate dhp->dh_flags |= DEVMAP_SETUP_DONE; 2037c478bd9Sstevel@tonic-gate 2047c478bd9Sstevel@tonic-gate return (DDI_SUCCESS); 2057c478bd9Sstevel@tonic-gate } 2067c478bd9Sstevel@tonic-gate 2077c478bd9Sstevel@tonic-gate /* 2087c478bd9Sstevel@tonic-gate * Replace existing mapping using a new cookie, mainly gets called when doing 2097c478bd9Sstevel@tonic-gate * fork(). Should be called in associated devmap_dup(9E). 2107c478bd9Sstevel@tonic-gate */ 2117c478bd9Sstevel@tonic-gate /* ARGSUSED */ 2127c478bd9Sstevel@tonic-gate int 2137c478bd9Sstevel@tonic-gate devmap_pmem_remap(devmap_cookie_t dhc, dev_info_t *dip, 2147c478bd9Sstevel@tonic-gate devmap_pmem_cookie_t cookie, offset_t off, size_t len, uint_t maxprot, 2157c478bd9Sstevel@tonic-gate uint_t flags, ddi_device_acc_attr_t *accattrp) 2167c478bd9Sstevel@tonic-gate { 2177c478bd9Sstevel@tonic-gate devmap_handle_t *dhp = (devmap_handle_t *)dhc; 2187c478bd9Sstevel@tonic-gate struct devmap_pmem_cookie *pcp = (struct devmap_pmem_cookie *)cookie; 2197b93957cSeota uint_t cache_attr = IOMEM_CACHE_ATTR(flags); 2207c478bd9Sstevel@tonic-gate 2217c478bd9Sstevel@tonic-gate /* 2227c478bd9Sstevel@tonic-gate * Reture failure if setup has not been done or no remap permission 2237c478bd9Sstevel@tonic-gate * has been granted during the setup. 2247c478bd9Sstevel@tonic-gate */ 2257c478bd9Sstevel@tonic-gate if ((dhp->dh_flags & DEVMAP_SETUP_DONE) == 0 || 2267c478bd9Sstevel@tonic-gate (dhp->dh_flags & DEVMAP_ALLOW_REMAP) == 0) 2277c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 2287c478bd9Sstevel@tonic-gate 2297c478bd9Sstevel@tonic-gate /* No flags supported for remap yet. */ 2307c478bd9Sstevel@tonic-gate if (flags != 0) 2317c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 2327c478bd9Sstevel@tonic-gate 2337c478bd9Sstevel@tonic-gate if ((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) != dhp->dh_prot) 2347c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 2357c478bd9Sstevel@tonic-gate 2367c478bd9Sstevel@tonic-gate if (pcp == NULL || (off + len) > ptob(pcp->dp_npages)) 2377c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 2387c478bd9Sstevel@tonic-gate 2397b93957cSeota /* 2407b93957cSeota * Check if the cache attributes are supported. Need to pay 2417b93957cSeota * attention that only uncachable or write-combining is 2427b93957cSeota * permitted for pmem. 2437b93957cSeota */ 2447b93957cSeota if (i_ddi_check_cache_attr(flags) == B_FALSE || 2457b93957cSeota (cache_attr & (IOMEM_DATA_UNCACHED|IOMEM_DATA_UC_WR_COMBINE)) == 0) 2467b93957cSeota return (DDI_FAILURE); 2477b93957cSeota 2487c478bd9Sstevel@tonic-gate HOLD_DHP_LOCK(dhp); 2497c478bd9Sstevel@tonic-gate /* 2507c478bd9Sstevel@tonic-gate * Unload the old mapping of pages reloated with this dhp, so next 2517c478bd9Sstevel@tonic-gate * fault will setup the new mappings. It is in segdev_faultpage that 2527c478bd9Sstevel@tonic-gate * calls hat_devload to establish the mapping. Do this while holding 2537c478bd9Sstevel@tonic-gate * the dhp lock so other faults dont reestablish the mappings. 2547c478bd9Sstevel@tonic-gate */ 2557c478bd9Sstevel@tonic-gate hat_unload(dhp->dh_seg->s_as->a_hat, dhp->dh_uvaddr, 2567c478bd9Sstevel@tonic-gate dhp->dh_len, HAT_UNLOAD|HAT_UNLOAD_OTHER); 2577c478bd9Sstevel@tonic-gate 2587b93957cSeota /* Set the cache attributes correctly */ 2597b93957cSeota i_ddi_cacheattr_to_hatacc(cache_attr, &dhp->dh_hat_attr); 2607b93957cSeota 2617c478bd9Sstevel@tonic-gate dhp->dh_pcookie = cookie; 2627c478bd9Sstevel@tonic-gate dhp->dh_roff = ptob(btop(off)); 2637c478bd9Sstevel@tonic-gate dhp->dh_len = ptob(btopr(len)); 2647c478bd9Sstevel@tonic-gate 2657c478bd9Sstevel@tonic-gate /* Clear the large page size flag. */ 2667c478bd9Sstevel@tonic-gate dhp->dh_flags &= ~DEVMAP_FLAG_LARGE; 2677c478bd9Sstevel@tonic-gate 2687c478bd9Sstevel@tonic-gate dhp->dh_maxprot = maxprot & dhp->dh_orig_maxprot; 2697c478bd9Sstevel@tonic-gate ASSERT((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) == dhp->dh_prot); 2707c478bd9Sstevel@tonic-gate RELE_DHP_LOCK(dhp); 2717c478bd9Sstevel@tonic-gate return (DDI_SUCCESS); 2727c478bd9Sstevel@tonic-gate } 2737c478bd9Sstevel@tonic-gate 2747c478bd9Sstevel@tonic-gate /* 2757c478bd9Sstevel@tonic-gate * Directly (i.e., without occupying kernel virtual address space) allocate 2767c478bd9Sstevel@tonic-gate * 'npages' physical memory pages for exporting to user land. The allocated 2777c478bd9Sstevel@tonic-gate * page_t pointer will be recorded in cookie. 2787c478bd9Sstevel@tonic-gate */ 2797c478bd9Sstevel@tonic-gate int 2807c478bd9Sstevel@tonic-gate devmap_pmem_alloc(size_t size, uint_t flags, devmap_pmem_cookie_t *cookiep) 2817c478bd9Sstevel@tonic-gate { 2827c478bd9Sstevel@tonic-gate u_offset_t pmem_off = 0; 2837c478bd9Sstevel@tonic-gate page_t *pp = NULL; 2847c478bd9Sstevel@tonic-gate page_t *lpp = NULL; 2857c478bd9Sstevel@tonic-gate page_t *tlist = NULL; 2867c478bd9Sstevel@tonic-gate pgcnt_t i = 0; 2877c478bd9Sstevel@tonic-gate pgcnt_t rpages = 0; 2887c478bd9Sstevel@tonic-gate pgcnt_t lpages = 0; 2897c478bd9Sstevel@tonic-gate pgcnt_t tpages = 0; 2907c478bd9Sstevel@tonic-gate pgcnt_t npages = btopr(size); 2917c478bd9Sstevel@tonic-gate pmem_lpg_t *plp = NULL; 2927c478bd9Sstevel@tonic-gate struct devmap_pmem_cookie *pcp; 2937c478bd9Sstevel@tonic-gate uint_t reserved = 0; 2947c478bd9Sstevel@tonic-gate uint_t locked = 0; 2957c478bd9Sstevel@tonic-gate uint_t pflags, kflags; 2967c478bd9Sstevel@tonic-gate 2977c478bd9Sstevel@tonic-gate *cookiep = NULL; 2987c478bd9Sstevel@tonic-gate 2997c478bd9Sstevel@tonic-gate /* 3007c478bd9Sstevel@tonic-gate * Number larger than this will cause page_create_va() to loop 3017c478bd9Sstevel@tonic-gate * infinitely. 3027c478bd9Sstevel@tonic-gate */ 3037c478bd9Sstevel@tonic-gate if (npages == 0 || npages >= total_pages / 2) 3047c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 3057c478bd9Sstevel@tonic-gate if ((flags & (PMEM_SLEEP | PMEM_NOSLEEP)) == 0) 3067c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 3077c478bd9Sstevel@tonic-gate pflags = flags & PMEM_NOSLEEP ? PG_EXCL : PG_WAIT; 3087c478bd9Sstevel@tonic-gate kflags = flags & PMEM_NOSLEEP ? KM_NOSLEEP : KM_SLEEP; 3097c478bd9Sstevel@tonic-gate 3107c478bd9Sstevel@tonic-gate /* Allocate pmem cookie. */ 3117c478bd9Sstevel@tonic-gate if (pmem_cookie_alloc(&pcp, npages, kflags) == DDI_FAILURE) 3127c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 3137c478bd9Sstevel@tonic-gate pcp->dp_npages = npages; 3147c478bd9Sstevel@tonic-gate 3157c478bd9Sstevel@tonic-gate /* 316c6939658Ssl108498 * See if the requested memory can be locked. 3177c478bd9Sstevel@tonic-gate */ 318c6939658Ssl108498 pcp->dp_proc = curproc; 319c6939658Ssl108498 if (pmem_lock(npages, curproc) == DDI_FAILURE) 3207c478bd9Sstevel@tonic-gate goto alloc_fail; 3217c478bd9Sstevel@tonic-gate locked = 1; 3227c478bd9Sstevel@tonic-gate /* 3237c478bd9Sstevel@tonic-gate * First, grab as many as possible from pmem_mpool. If pages in 3247c478bd9Sstevel@tonic-gate * pmem_mpool are enough for this request, we are done. 3257c478bd9Sstevel@tonic-gate */ 3267c478bd9Sstevel@tonic-gate mutex_enter(&pmem_mutex); 3277c478bd9Sstevel@tonic-gate tpages = mpool_break(&tlist, npages); 3287c478bd9Sstevel@tonic-gate /* IOlock and hashin them into the new offset. */ 3297c478bd9Sstevel@tonic-gate if (tpages) 3307c478bd9Sstevel@tonic-gate tlist_in(tlist, tpages, pcp->dp_vnp, &pmem_off); 3317c478bd9Sstevel@tonic-gate mutex_exit(&pmem_mutex); 3327c478bd9Sstevel@tonic-gate 3337c478bd9Sstevel@tonic-gate if (tpages == npages) 3347c478bd9Sstevel@tonic-gate goto done; 3357c478bd9Sstevel@tonic-gate 3367c478bd9Sstevel@tonic-gate rpages = npages - tpages; 3377c478bd9Sstevel@tonic-gate /* Quit now if memory cannot be reserved. */ 3387c478bd9Sstevel@tonic-gate if (!page_resv(rpages, kflags)) 3397c478bd9Sstevel@tonic-gate goto alloc_fail; 3407c478bd9Sstevel@tonic-gate reserved = 1; 3417c478bd9Sstevel@tonic-gate 342*733cdf20Smrj /* If we have large pages */ 343*733cdf20Smrj if (pmem_lpgsize > PAGESIZE) { 344*733cdf20Smrj /* Try to alloc large pages first to decrease fragmentation. */ 3457c478bd9Sstevel@tonic-gate i = (rpages + (pmem_pgcnt - 1)) / pmem_pgcnt; 3467c478bd9Sstevel@tonic-gate if (lpp_create(&lpp, i, &lpages, &plp, pcp->dp_vnp, &pmem_off, 3477c478bd9Sstevel@tonic-gate kflags) == DDI_FAILURE) 3487c478bd9Sstevel@tonic-gate goto alloc_fail; 3497c478bd9Sstevel@tonic-gate ASSERT(lpages == 0 ? lpp == NULL : 1); 350*733cdf20Smrj } 3517c478bd9Sstevel@tonic-gate 3527c478bd9Sstevel@tonic-gate /* 3537c478bd9Sstevel@tonic-gate * Pages in large pages is more than the request, put the residual 3547c478bd9Sstevel@tonic-gate * pages into pmem_mpool. 3557c478bd9Sstevel@tonic-gate */ 3567c478bd9Sstevel@tonic-gate if (lpages >= rpages) { 3577c478bd9Sstevel@tonic-gate lpp_break(&lpp, lpages, lpages - rpages, plp); 3587c478bd9Sstevel@tonic-gate goto done; 3597c478bd9Sstevel@tonic-gate } 3607c478bd9Sstevel@tonic-gate 3617c478bd9Sstevel@tonic-gate /* Allocate small pages if lpp+tlist cannot satisfy the request. */ 3627c478bd9Sstevel@tonic-gate i = rpages - lpages; 3637c478bd9Sstevel@tonic-gate if ((pp = page_create_va(pcp->dp_vnp, pmem_off, ptob(i), 36402bbca18Sms148562 pflags, &pmem_seg, (caddr_t)(uintptr_t)pmem_off)) == NULL) 3657c478bd9Sstevel@tonic-gate goto alloc_fail; 3667c478bd9Sstevel@tonic-gate 3677c478bd9Sstevel@tonic-gate done: 3687c478bd9Sstevel@tonic-gate page_list_concat(&tlist, &lpp); 3697c478bd9Sstevel@tonic-gate page_list_concat(&tlist, &pp); 3707c478bd9Sstevel@tonic-gate /* Set those small pages from large pages as allocated. */ 3717c478bd9Sstevel@tonic-gate mutex_enter(&pmem_mutex); 3727c478bd9Sstevel@tonic-gate pmem_lpg_concat(&pmem_occ_lpgs, &plp); 3737c478bd9Sstevel@tonic-gate mutex_exit(&pmem_mutex); 3747c478bd9Sstevel@tonic-gate 3757c478bd9Sstevel@tonic-gate /* 3767c478bd9Sstevel@tonic-gate * Now tlist holds all the pages for this cookie. Record these pages in 3777c478bd9Sstevel@tonic-gate * pmem cookie. 3787c478bd9Sstevel@tonic-gate */ 3797c478bd9Sstevel@tonic-gate for (pp = tlist, i = 0; i < npages; i++) { 3807c478bd9Sstevel@tonic-gate pcp->dp_pparray[i] = pp; 3817c478bd9Sstevel@tonic-gate page_io_unlock(pp); 3827c478bd9Sstevel@tonic-gate pp = pp->p_next; 3837c478bd9Sstevel@tonic-gate page_sub(&tlist, pp->p_prev); 3847c478bd9Sstevel@tonic-gate } 3857c478bd9Sstevel@tonic-gate ASSERT(tlist == NULL); 3867c478bd9Sstevel@tonic-gate *cookiep = (devmap_pmem_cookie_t)pcp; 3877c478bd9Sstevel@tonic-gate 3887c478bd9Sstevel@tonic-gate return (DDI_SUCCESS); 3897c478bd9Sstevel@tonic-gate 3907c478bd9Sstevel@tonic-gate alloc_fail: 3917c478bd9Sstevel@tonic-gate DTRACE_PROBE(pmem__alloc__fail); 3927c478bd9Sstevel@tonic-gate /* Free large pages and the associated allocation records. */ 3937c478bd9Sstevel@tonic-gate if (lpp) 3947c478bd9Sstevel@tonic-gate lpp_free(lpp, lpages / pmem_pgcnt, &plp); 3957c478bd9Sstevel@tonic-gate if (reserved == 1) 3967c478bd9Sstevel@tonic-gate page_unresv(rpages); 3977c478bd9Sstevel@tonic-gate /* Put those pages in tlist back into pmem_mpool. */ 3987c478bd9Sstevel@tonic-gate if (tpages != 0) { 3997c478bd9Sstevel@tonic-gate mutex_enter(&pmem_mutex); 4007c478bd9Sstevel@tonic-gate /* IOunlock, hashout and update the allocation records. */ 4017c478bd9Sstevel@tonic-gate tlist_out(tlist, tpages); 4027c478bd9Sstevel@tonic-gate mpool_append(&tlist, tpages); 4037c478bd9Sstevel@tonic-gate mutex_exit(&pmem_mutex); 4047c478bd9Sstevel@tonic-gate } 4057c478bd9Sstevel@tonic-gate if (locked == 1) 406c6939658Ssl108498 i_ddi_decr_locked_memory(pcp->dp_proc, ptob(pcp->dp_npages)); 4077c478bd9Sstevel@tonic-gate /* Freeing pmem_cookie. */ 4087c478bd9Sstevel@tonic-gate kmem_free(pcp->dp_vnp, sizeof (vnode_t)); 4097c478bd9Sstevel@tonic-gate kmem_free(pcp->dp_pparray, npages * sizeof (page_t *)); 4107c478bd9Sstevel@tonic-gate kmem_free(pcp, sizeof (struct devmap_pmem_cookie)); 4117c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 4127c478bd9Sstevel@tonic-gate } 4137c478bd9Sstevel@tonic-gate 4147c478bd9Sstevel@tonic-gate /* 4157c478bd9Sstevel@tonic-gate * Free all small pages inside cookie, and return pages from large pages into 4167c478bd9Sstevel@tonic-gate * mpool, if all the pages from one large page is in mpool, free it as a whole. 4177c478bd9Sstevel@tonic-gate */ 4187c478bd9Sstevel@tonic-gate void 4197c478bd9Sstevel@tonic-gate devmap_pmem_free(devmap_pmem_cookie_t cookie) 4207c478bd9Sstevel@tonic-gate { 4217c478bd9Sstevel@tonic-gate struct devmap_pmem_cookie *pcp = (struct devmap_pmem_cookie *)cookie; 4227c478bd9Sstevel@tonic-gate pgcnt_t i; 4237c478bd9Sstevel@tonic-gate pgcnt_t tpages = 0; 4247c478bd9Sstevel@tonic-gate page_t *pp; 4257c478bd9Sstevel@tonic-gate pmem_lpg_t *pl1, *plp; 4267c478bd9Sstevel@tonic-gate pmem_lpg_t *pf_lpgs = NULL; 4277c478bd9Sstevel@tonic-gate uint_t npls = 0; 4287c478bd9Sstevel@tonic-gate pmem_lpg_t *last_pl = NULL; 4297c478bd9Sstevel@tonic-gate pmem_lpg_t *plast_pl = NULL; 4307c478bd9Sstevel@tonic-gate 4317c478bd9Sstevel@tonic-gate ASSERT(pcp); 4327c478bd9Sstevel@tonic-gate mutex_enter(&pmem_mutex); 4337c478bd9Sstevel@tonic-gate /* Free small pages and return them to memory pool. */ 4347c478bd9Sstevel@tonic-gate for (i = pcp->dp_npages; i > 0; i--) { 4357c478bd9Sstevel@tonic-gate pp = pcp->dp_pparray[i - 1]; 4367c478bd9Sstevel@tonic-gate page_hashout(pp, NULL); 4377c478bd9Sstevel@tonic-gate /* 4387c478bd9Sstevel@tonic-gate * Remove the mapping of this single page, this mapping is 4397c478bd9Sstevel@tonic-gate * created using hat_devload() in segdev_faultpage(). 4407c478bd9Sstevel@tonic-gate */ 4417c478bd9Sstevel@tonic-gate (void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD); 4427c478bd9Sstevel@tonic-gate if (!FROM_LPG(pp)) { 4437c478bd9Sstevel@tonic-gate /* Normal small page. */ 4447c478bd9Sstevel@tonic-gate page_free(pp, 1); 4457c478bd9Sstevel@tonic-gate page_unresv(1); 4467c478bd9Sstevel@tonic-gate } else { 4477c478bd9Sstevel@tonic-gate /* Small page from large pages. */ 4487c478bd9Sstevel@tonic-gate plp = pmem_lpg_get(pmem_occ_lpgs, pp, &last_pl); 4497c478bd9Sstevel@tonic-gate if (plp && !(plp->pl_pfree)) { 4507c478bd9Sstevel@tonic-gate /* 4517c478bd9Sstevel@tonic-gate * Move this record to pf_lpgs list, this large 4527c478bd9Sstevel@tonic-gate * page may be able to be freed as a whole. 4537c478bd9Sstevel@tonic-gate */ 4547c478bd9Sstevel@tonic-gate pmem_lpg_sub(&pmem_occ_lpgs, plp); 4557c478bd9Sstevel@tonic-gate pmem_lpg_concat(&pf_lpgs, &plp); 4567c478bd9Sstevel@tonic-gate plp->pl_pfree = 1; 4577c478bd9Sstevel@tonic-gate npls++; 4587c478bd9Sstevel@tonic-gate last_pl = NULL; 4597c478bd9Sstevel@tonic-gate } else { 4607c478bd9Sstevel@tonic-gate /* Search in pf_lpgs list. */ 4617c478bd9Sstevel@tonic-gate plp = pmem_lpg_get(pf_lpgs, pp, &plast_pl); 4627c478bd9Sstevel@tonic-gate } 4637c478bd9Sstevel@tonic-gate ASSERT(plp); 4647c478bd9Sstevel@tonic-gate /* Mark this page as free. */ 4657c478bd9Sstevel@tonic-gate BT_SET(plp->pl_bitmap, PFIND(pp)); 4667c478bd9Sstevel@tonic-gate /* Record this page in pmem_mpool. */ 4677c478bd9Sstevel@tonic-gate mpool_append(&pp, 1); 4687c478bd9Sstevel@tonic-gate } 4697c478bd9Sstevel@tonic-gate } 4707c478bd9Sstevel@tonic-gate 4717c478bd9Sstevel@tonic-gate /* 4727c478bd9Sstevel@tonic-gate * Find out the large pages whose pages have been freed, remove them 4737c478bd9Sstevel@tonic-gate * from plp list, free them and the associated pmem_lpg struct. 4747c478bd9Sstevel@tonic-gate */ 4757c478bd9Sstevel@tonic-gate for (plp = pf_lpgs; npls != 0; npls--) { 4767c478bd9Sstevel@tonic-gate pl1 = plp; 4777c478bd9Sstevel@tonic-gate plp = plp->pl_next; 4787c478bd9Sstevel@tonic-gate if (lpg_isfree(pl1)) { 4797c478bd9Sstevel@tonic-gate /* 4807c478bd9Sstevel@tonic-gate * Get one free large page. Find all pages in this 4817c478bd9Sstevel@tonic-gate * large page and remove them from pmem_mpool. 4827c478bd9Sstevel@tonic-gate */ 4837c478bd9Sstevel@tonic-gate lpg_free(pl1->pl_pp); 4847c478bd9Sstevel@tonic-gate /* Remove associated allocation records. */ 4857c478bd9Sstevel@tonic-gate pmem_lpg_sub(&pf_lpgs, pl1); 4867c478bd9Sstevel@tonic-gate pmem_lpg_free(&pf_lpgs, pl1); 4877c478bd9Sstevel@tonic-gate tpages -= pmem_pgcnt; 4887c478bd9Sstevel@tonic-gate } else 4897c478bd9Sstevel@tonic-gate pl1->pl_pfree = 0; 4907c478bd9Sstevel@tonic-gate } 4917c478bd9Sstevel@tonic-gate /* Update allocation records accordingly. */ 4927c478bd9Sstevel@tonic-gate pmem_lpg_concat(&pmem_occ_lpgs, &pf_lpgs); 4937c478bd9Sstevel@tonic-gate mutex_exit(&pmem_mutex); 4947c478bd9Sstevel@tonic-gate 495c6939658Ssl108498 if (curproc == pcp->dp_proc) 496c6939658Ssl108498 i_ddi_decr_locked_memory(curproc, ptob(pcp->dp_npages)); 4977c478bd9Sstevel@tonic-gate kmem_free(pcp->dp_vnp, sizeof (vnode_t)); 4987c478bd9Sstevel@tonic-gate kmem_free(pcp->dp_pparray, pcp->dp_npages * sizeof (page_t *)); 4997c478bd9Sstevel@tonic-gate kmem_free(pcp, sizeof (struct devmap_pmem_cookie)); 5007c478bd9Sstevel@tonic-gate } 5017c478bd9Sstevel@tonic-gate 5027c478bd9Sstevel@tonic-gate /* 5037c478bd9Sstevel@tonic-gate * To extract page frame number from specified range in a cookie. 5047c478bd9Sstevel@tonic-gate */ 5057c478bd9Sstevel@tonic-gate int 5067c478bd9Sstevel@tonic-gate devmap_pmem_getpfns(devmap_pmem_cookie_t cookie, uint_t start, pgcnt_t npages, 5077c478bd9Sstevel@tonic-gate pfn_t *pfnarray) 5087c478bd9Sstevel@tonic-gate { 5097c478bd9Sstevel@tonic-gate struct devmap_pmem_cookie *pcp = (struct devmap_pmem_cookie *)cookie; 5107c478bd9Sstevel@tonic-gate pgcnt_t i; 5117c478bd9Sstevel@tonic-gate 5127c478bd9Sstevel@tonic-gate if (pcp == NULL || start + npages > pcp->dp_npages) 5137c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 5147c478bd9Sstevel@tonic-gate 5157c478bd9Sstevel@tonic-gate for (i = start; i < start + npages; i++) 516*733cdf20Smrj pfnarray[i - start] = pfn_to_mfn(pcp->dp_pparray[i]->p_pagenum); 517*733cdf20Smrj 5187c478bd9Sstevel@tonic-gate return (DDI_SUCCESS); 5197c478bd9Sstevel@tonic-gate } 5207c478bd9Sstevel@tonic-gate 5217c478bd9Sstevel@tonic-gate void 5227c478bd9Sstevel@tonic-gate pmem_init() 5237c478bd9Sstevel@tonic-gate { 5247c478bd9Sstevel@tonic-gate mutex_init(&pmem_mutex, NULL, MUTEX_DEFAULT, NULL); 5257c478bd9Sstevel@tonic-gate pmem_lszc = MIN(1, page_num_pagesizes() - 1); 5267c478bd9Sstevel@tonic-gate pmem_lpgsize = page_get_pagesize(pmem_lszc); 5277c478bd9Sstevel@tonic-gate pmem_pgcnt = pmem_lpgsize >> PAGESHIFT; 5287c478bd9Sstevel@tonic-gate bzero(&pmem_seg, sizeof (struct seg)); 5297c478bd9Sstevel@tonic-gate pmem_seg.s_as = &kas; 5307c478bd9Sstevel@tonic-gate } 5317c478bd9Sstevel@tonic-gate 5327c478bd9Sstevel@tonic-gate /* Allocate kernel memory for one pmem cookie with n pages. */ 5337c478bd9Sstevel@tonic-gate static int 5347c478bd9Sstevel@tonic-gate pmem_cookie_alloc(struct devmap_pmem_cookie **pcpp, pgcnt_t n, uint_t kflags) 5357c478bd9Sstevel@tonic-gate { 5367c478bd9Sstevel@tonic-gate struct devmap_pmem_cookie *pcp; 5377c478bd9Sstevel@tonic-gate 5387c478bd9Sstevel@tonic-gate if ((*pcpp = kmem_zalloc(sizeof (struct devmap_pmem_cookie), 5397c478bd9Sstevel@tonic-gate kflags)) == NULL) 5407c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 5417c478bd9Sstevel@tonic-gate pcp = *pcpp; 5427c478bd9Sstevel@tonic-gate if ((pcp->dp_vnp = 5437c478bd9Sstevel@tonic-gate kmem_zalloc(sizeof (vnode_t), kflags)) == NULL) { 5447c478bd9Sstevel@tonic-gate kmem_free(pcp, sizeof (struct devmap_pmem_cookie)); 5457c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 5467c478bd9Sstevel@tonic-gate } 5477c478bd9Sstevel@tonic-gate if ((pcp->dp_pparray = 5487c478bd9Sstevel@tonic-gate kmem_zalloc(n * sizeof (page_t *), kflags)) == NULL) { 5497c478bd9Sstevel@tonic-gate kmem_free(pcp->dp_vnp, sizeof (vnode_t)); 5507c478bd9Sstevel@tonic-gate kmem_free(pcp, sizeof (struct devmap_pmem_cookie)); 5517c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 5527c478bd9Sstevel@tonic-gate } 5537c478bd9Sstevel@tonic-gate return (DDI_SUCCESS); 5547c478bd9Sstevel@tonic-gate } 5557c478bd9Sstevel@tonic-gate 556c6939658Ssl108498 /* Try to lock down n pages resource */ 5577c478bd9Sstevel@tonic-gate static int 558c6939658Ssl108498 pmem_lock(pgcnt_t n, proc_t *p) 5597c478bd9Sstevel@tonic-gate { 560c6939658Ssl108498 if (i_ddi_incr_locked_memory(p, ptob(n)) != 0) { 5617c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 5627c478bd9Sstevel@tonic-gate } 5637c478bd9Sstevel@tonic-gate return (DDI_SUCCESS); 5647c478bd9Sstevel@tonic-gate } 5657c478bd9Sstevel@tonic-gate 5667c478bd9Sstevel@tonic-gate /* To check if all the pages in a large page are freed. */ 5677c478bd9Sstevel@tonic-gate static int 5687c478bd9Sstevel@tonic-gate lpg_isfree(pmem_lpg_t *plp) 5697c478bd9Sstevel@tonic-gate { 5707c478bd9Sstevel@tonic-gate uint_t i; 5717c478bd9Sstevel@tonic-gate 5727c478bd9Sstevel@tonic-gate for (i = 0; i < BT_BITOUL(pmem_pgcnt); i++) 5737c478bd9Sstevel@tonic-gate if (plp->pl_bitmap[i] != BT_ULMAXMASK) 5747c478bd9Sstevel@tonic-gate return (0); 5757c478bd9Sstevel@tonic-gate /* All 1 means all pages are freed. */ 5767c478bd9Sstevel@tonic-gate return (1); 5777c478bd9Sstevel@tonic-gate } 5787c478bd9Sstevel@tonic-gate 5797c478bd9Sstevel@tonic-gate /* 5807c478bd9Sstevel@tonic-gate * Using pp to get the associated large page allocation record, searching in 5817c478bd9Sstevel@tonic-gate * the splp linked list with *last as the heuristic pointer. Return NULL if 5827c478bd9Sstevel@tonic-gate * not found. 5837c478bd9Sstevel@tonic-gate */ 5847c478bd9Sstevel@tonic-gate static pmem_lpg_t * 5857c478bd9Sstevel@tonic-gate pmem_lpg_get(pmem_lpg_t *splp, page_t *pp, pmem_lpg_t **last) 5867c478bd9Sstevel@tonic-gate { 5877c478bd9Sstevel@tonic-gate pmem_lpg_t *plp; 5887c478bd9Sstevel@tonic-gate pgcnt_t root_pfn; 5897c478bd9Sstevel@tonic-gate 5907c478bd9Sstevel@tonic-gate ASSERT(pp); 5917c478bd9Sstevel@tonic-gate if (splp == NULL) 5927c478bd9Sstevel@tonic-gate return (NULL); 5937c478bd9Sstevel@tonic-gate root_pfn = page_pptonum(pp) & ~(pmem_pgcnt - 1); 5947c478bd9Sstevel@tonic-gate 5957c478bd9Sstevel@tonic-gate /* Try last winner first. */ 5967c478bd9Sstevel@tonic-gate if (*last && root_pfn == page_pptonum((*last)->pl_pp)) 5977c478bd9Sstevel@tonic-gate goto pl_found; 5987c478bd9Sstevel@tonic-gate 5997c478bd9Sstevel@tonic-gate /* Else search the whole pmem_lpg list. */ 6007c478bd9Sstevel@tonic-gate for (plp = splp; root_pfn != page_pptonum(plp->pl_pp); ) { 6017c478bd9Sstevel@tonic-gate plp = plp->pl_next; 6027c478bd9Sstevel@tonic-gate if (plp == splp) { 6037c478bd9Sstevel@tonic-gate plp = NULL; 6047c478bd9Sstevel@tonic-gate break; 6057c478bd9Sstevel@tonic-gate } 6067c478bd9Sstevel@tonic-gate ASSERT(plp->pl_pp); 6077c478bd9Sstevel@tonic-gate } 6087c478bd9Sstevel@tonic-gate 6097c478bd9Sstevel@tonic-gate *last = plp; 6107c478bd9Sstevel@tonic-gate 6117c478bd9Sstevel@tonic-gate pl_found: 6127c478bd9Sstevel@tonic-gate return (*last); 6137c478bd9Sstevel@tonic-gate } 6147c478bd9Sstevel@tonic-gate 6157c478bd9Sstevel@tonic-gate /* 6167c478bd9Sstevel@tonic-gate * Remove one pmem_lpg plp from the oplpp list. 6177c478bd9Sstevel@tonic-gate */ 6187c478bd9Sstevel@tonic-gate static void 6197c478bd9Sstevel@tonic-gate pmem_lpg_sub(pmem_lpg_t **oplpp, pmem_lpg_t *plp) 6207c478bd9Sstevel@tonic-gate { 6217c478bd9Sstevel@tonic-gate if (*oplpp == plp) 6227c478bd9Sstevel@tonic-gate *oplpp = plp->pl_next; /* go to next pmem_lpg */ 6237c478bd9Sstevel@tonic-gate 6247c478bd9Sstevel@tonic-gate if (*oplpp == plp) 6257c478bd9Sstevel@tonic-gate *oplpp = NULL; /* pmem_lpg list is gone */ 6267c478bd9Sstevel@tonic-gate else { 6277c478bd9Sstevel@tonic-gate plp->pl_prev->pl_next = plp->pl_next; 6287c478bd9Sstevel@tonic-gate plp->pl_next->pl_prev = plp->pl_prev; 6297c478bd9Sstevel@tonic-gate } 6307c478bd9Sstevel@tonic-gate plp->pl_prev = plp->pl_next = plp; /* make plp a list of one */ 6317c478bd9Sstevel@tonic-gate } 6327c478bd9Sstevel@tonic-gate 6337c478bd9Sstevel@tonic-gate /* 6347c478bd9Sstevel@tonic-gate * Concatenate page list nplpp onto the end of list plpp. 6357c478bd9Sstevel@tonic-gate */ 6367c478bd9Sstevel@tonic-gate static void 6377c478bd9Sstevel@tonic-gate pmem_lpg_concat(pmem_lpg_t **plpp, pmem_lpg_t **nplpp) 6387c478bd9Sstevel@tonic-gate { 6397c478bd9Sstevel@tonic-gate pmem_lpg_t *s1p, *s2p, *e1p, *e2p; 6407c478bd9Sstevel@tonic-gate 6417c478bd9Sstevel@tonic-gate if (*nplpp == NULL) { 6427c478bd9Sstevel@tonic-gate return; 6437c478bd9Sstevel@tonic-gate } 6447c478bd9Sstevel@tonic-gate if (*plpp == NULL) { 6457c478bd9Sstevel@tonic-gate *plpp = *nplpp; 6467c478bd9Sstevel@tonic-gate return; 6477c478bd9Sstevel@tonic-gate } 6487c478bd9Sstevel@tonic-gate s1p = *plpp; 6497c478bd9Sstevel@tonic-gate e1p = s1p->pl_prev; 6507c478bd9Sstevel@tonic-gate s2p = *nplpp; 6517c478bd9Sstevel@tonic-gate e2p = s2p->pl_prev; 6527c478bd9Sstevel@tonic-gate s1p->pl_prev = e2p; 6537c478bd9Sstevel@tonic-gate e2p->pl_next = s1p; 6547c478bd9Sstevel@tonic-gate e1p->pl_next = s2p; 6557c478bd9Sstevel@tonic-gate s2p->pl_prev = e1p; 6567c478bd9Sstevel@tonic-gate } 6577c478bd9Sstevel@tonic-gate 6587c478bd9Sstevel@tonic-gate /* 6597c478bd9Sstevel@tonic-gate * Allocate and initialize the allocation record of one large page, the init 6607c478bd9Sstevel@tonic-gate * value is 'allocated'. 6617c478bd9Sstevel@tonic-gate */ 6627c478bd9Sstevel@tonic-gate static pmem_lpg_t * 6637c478bd9Sstevel@tonic-gate pmem_lpg_alloc(uint_t kflags) 6647c478bd9Sstevel@tonic-gate { 6657c478bd9Sstevel@tonic-gate pmem_lpg_t *plp; 6667c478bd9Sstevel@tonic-gate 6677c478bd9Sstevel@tonic-gate ASSERT(pmem_pgcnt % BT_NBIPUL == 0); 6687c478bd9Sstevel@tonic-gate plp = kmem_zalloc(sizeof (pmem_lpg_t), kflags); 6697c478bd9Sstevel@tonic-gate if (plp == NULL) 6707c478bd9Sstevel@tonic-gate return (NULL); 6717c478bd9Sstevel@tonic-gate plp->pl_bitmap = kmem_zalloc(BT_SIZEOFMAP(pmem_pgcnt), kflags); 6727c478bd9Sstevel@tonic-gate if (plp->pl_bitmap == NULL) { 6737c478bd9Sstevel@tonic-gate kmem_free(plp, sizeof (*plp)); 6747c478bd9Sstevel@tonic-gate return (NULL); 6757c478bd9Sstevel@tonic-gate } 6767c478bd9Sstevel@tonic-gate plp->pl_next = plp->pl_prev = plp; 6777c478bd9Sstevel@tonic-gate return (plp); 6787c478bd9Sstevel@tonic-gate } 6797c478bd9Sstevel@tonic-gate 6807c478bd9Sstevel@tonic-gate /* Free one allocation record pointed by oplp. */ 6817c478bd9Sstevel@tonic-gate static void 6827c478bd9Sstevel@tonic-gate pmem_lpg_free(pmem_lpg_t **headp, pmem_lpg_t *plp) 6837c478bd9Sstevel@tonic-gate { 6847c478bd9Sstevel@tonic-gate if (*headp == plp) 6857c478bd9Sstevel@tonic-gate *headp = plp->pl_next; /* go to next pmem_lpg_t */ 6867c478bd9Sstevel@tonic-gate 6877c478bd9Sstevel@tonic-gate if (*headp == plp) 6887c478bd9Sstevel@tonic-gate *headp = NULL; /* this list is gone */ 6897c478bd9Sstevel@tonic-gate else { 6907c478bd9Sstevel@tonic-gate plp->pl_prev->pl_next = plp->pl_next; 6917c478bd9Sstevel@tonic-gate plp->pl_next->pl_prev = plp->pl_prev; 6927c478bd9Sstevel@tonic-gate } 6937c478bd9Sstevel@tonic-gate kmem_free(plp->pl_bitmap, BT_SIZEOFMAP(pmem_pgcnt)); 6947c478bd9Sstevel@tonic-gate kmem_free(plp, sizeof (*plp)); 6957c478bd9Sstevel@tonic-gate } 6967c478bd9Sstevel@tonic-gate 6977c478bd9Sstevel@tonic-gate /* Free one large page headed by spp from pmem_mpool. */ 6987c478bd9Sstevel@tonic-gate static void 6997c478bd9Sstevel@tonic-gate lpg_free(page_t *spp) 7007c478bd9Sstevel@tonic-gate { 7017c478bd9Sstevel@tonic-gate page_t *pp1 = spp; 7027c478bd9Sstevel@tonic-gate uint_t i; 7037c478bd9Sstevel@tonic-gate 7047c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pmem_mutex)); 7057c478bd9Sstevel@tonic-gate for (i = 0; i < pmem_pgcnt; i++) { 7067c478bd9Sstevel@tonic-gate /* Break pp1 from pmem_mpool. */ 7077c478bd9Sstevel@tonic-gate page_sub(&pmem_mpool, pp1); 7087c478bd9Sstevel@tonic-gate pp1++; 7097c478bd9Sstevel@tonic-gate } 7107c478bd9Sstevel@tonic-gate /* Free pages in this large page. */ 7117c478bd9Sstevel@tonic-gate page_free_pages(spp); 7127c478bd9Sstevel@tonic-gate page_unresv(pmem_pgcnt); 7137c478bd9Sstevel@tonic-gate pmem_nmpages -= pmem_pgcnt; 7147c478bd9Sstevel@tonic-gate ASSERT((pmem_nmpages && pmem_mpool) || (!pmem_nmpages && !pmem_mpool)); 7157c478bd9Sstevel@tonic-gate } 7167c478bd9Sstevel@tonic-gate 7177c478bd9Sstevel@tonic-gate /* Put n pages in *ppp list back into pmem_mpool. */ 7187c478bd9Sstevel@tonic-gate static void 7197c478bd9Sstevel@tonic-gate mpool_append(page_t **ppp, pgcnt_t n) 7207c478bd9Sstevel@tonic-gate { 7217c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pmem_mutex)); 7227c478bd9Sstevel@tonic-gate /* Put back pages. */ 7237c478bd9Sstevel@tonic-gate page_list_concat(&pmem_mpool, ppp); 7247c478bd9Sstevel@tonic-gate pmem_nmpages += n; 7257c478bd9Sstevel@tonic-gate ASSERT((pmem_nmpages && pmem_mpool) || (!pmem_nmpages && !pmem_mpool)); 7267c478bd9Sstevel@tonic-gate } 7277c478bd9Sstevel@tonic-gate 7287c478bd9Sstevel@tonic-gate /* 7297c478bd9Sstevel@tonic-gate * Try to grab MIN(pmem_nmpages, n) pages from pmem_mpool, put them into *ppp 7307c478bd9Sstevel@tonic-gate * list, and return the number of grabbed pages. 7317c478bd9Sstevel@tonic-gate */ 7327c478bd9Sstevel@tonic-gate static pgcnt_t 7337c478bd9Sstevel@tonic-gate mpool_break(page_t **ppp, pgcnt_t n) 7347c478bd9Sstevel@tonic-gate { 7357c478bd9Sstevel@tonic-gate pgcnt_t i; 7367c478bd9Sstevel@tonic-gate 7377c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pmem_mutex)); 7387c478bd9Sstevel@tonic-gate /* Grab the pages. */ 7397c478bd9Sstevel@tonic-gate i = MIN(pmem_nmpages, n); 7407c478bd9Sstevel@tonic-gate *ppp = pmem_mpool; 7417c478bd9Sstevel@tonic-gate page_list_break(ppp, &pmem_mpool, i); 7427c478bd9Sstevel@tonic-gate pmem_nmpages -= i; 7437c478bd9Sstevel@tonic-gate ASSERT((pmem_nmpages && pmem_mpool) || (!pmem_nmpages && !pmem_mpool)); 7447c478bd9Sstevel@tonic-gate return (i); 7457c478bd9Sstevel@tonic-gate } 7467c478bd9Sstevel@tonic-gate 7477c478bd9Sstevel@tonic-gate /* 7487c478bd9Sstevel@tonic-gate * Create n large pages, lpages and plpp contains the number of small pages and 7497c478bd9Sstevel@tonic-gate * allocation records list respectively. 7507c478bd9Sstevel@tonic-gate */ 7517c478bd9Sstevel@tonic-gate static int 7527c478bd9Sstevel@tonic-gate lpp_create(page_t **lppp, pgcnt_t n, pgcnt_t *lpages, pmem_lpg_t **plpp, 7537c478bd9Sstevel@tonic-gate vnode_t *vnp, u_offset_t *offp, uint_t kflags) 7547c478bd9Sstevel@tonic-gate { 7557c478bd9Sstevel@tonic-gate pgcnt_t i; 7567c478bd9Sstevel@tonic-gate pmem_lpg_t *plp; 7577c478bd9Sstevel@tonic-gate page_t *pp; 7587c478bd9Sstevel@tonic-gate 7597c478bd9Sstevel@tonic-gate for (i = 0, *lpages = 0; i < n; i++) { 7607c478bd9Sstevel@tonic-gate /* Allocte one large page each time. */ 7617c478bd9Sstevel@tonic-gate pp = page_create_va_large(vnp, *offp, pmem_lpgsize, 76202bbca18Sms148562 PG_EXCL, &pmem_seg, (caddr_t)(uintptr_t)*offp, NULL); 7637c478bd9Sstevel@tonic-gate if (pp == NULL) 7647c478bd9Sstevel@tonic-gate break; 7657c478bd9Sstevel@tonic-gate *offp += pmem_lpgsize; 7667c478bd9Sstevel@tonic-gate page_list_concat(lppp, &pp); 7677c478bd9Sstevel@tonic-gate *lpages += pmem_pgcnt; 7687c478bd9Sstevel@tonic-gate /* Add one allocation record for this large page. */ 7697c478bd9Sstevel@tonic-gate if ((plp = pmem_lpg_alloc(kflags)) == NULL) 7707c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 7717c478bd9Sstevel@tonic-gate plp->pl_pp = pp; 7727c478bd9Sstevel@tonic-gate pmem_lpg_concat(plpp, &plp); 7737c478bd9Sstevel@tonic-gate } 7747c478bd9Sstevel@tonic-gate return (DDI_SUCCESS); 7757c478bd9Sstevel@tonic-gate } 7767c478bd9Sstevel@tonic-gate 7777c478bd9Sstevel@tonic-gate /* 7787c478bd9Sstevel@tonic-gate * Break the last r small pages from the large page list *lppp (with totally n 7797c478bd9Sstevel@tonic-gate * small pages) and put them into pmem_mpool. 7807c478bd9Sstevel@tonic-gate */ 7817c478bd9Sstevel@tonic-gate static void 7827c478bd9Sstevel@tonic-gate lpp_break(page_t **lppp, pgcnt_t n, pgcnt_t r, pmem_lpg_t *oplp) 7837c478bd9Sstevel@tonic-gate { 7847c478bd9Sstevel@tonic-gate page_t *pp, *pp1; 7857c478bd9Sstevel@tonic-gate pgcnt_t i; 7867c478bd9Sstevel@tonic-gate pmem_lpg_t *plp; 7877c478bd9Sstevel@tonic-gate 7887c478bd9Sstevel@tonic-gate if (r == 0) 7897c478bd9Sstevel@tonic-gate return; 7907c478bd9Sstevel@tonic-gate ASSERT(*lppp != NULL && r < pmem_pgcnt); 7917c478bd9Sstevel@tonic-gate page_list_break(lppp, &pp, n - r); 7927c478bd9Sstevel@tonic-gate 7937c478bd9Sstevel@tonic-gate /* The residual should reside in the last large page. */ 7947c478bd9Sstevel@tonic-gate plp = oplp->pl_prev; 7957c478bd9Sstevel@tonic-gate /* IOunlock and hashout the residual pages. */ 7967c478bd9Sstevel@tonic-gate for (pp1 = pp, i = 0; i < r; i++) { 7977c478bd9Sstevel@tonic-gate page_io_unlock(pp1); 7987c478bd9Sstevel@tonic-gate page_hashout(pp1, NULL); 7997c478bd9Sstevel@tonic-gate /* Mark this page as free. */ 8007c478bd9Sstevel@tonic-gate BT_SET(plp->pl_bitmap, PFIND(pp1)); 8017c478bd9Sstevel@tonic-gate pp1 = pp1->p_next; 8027c478bd9Sstevel@tonic-gate } 8037c478bd9Sstevel@tonic-gate ASSERT(pp1 == pp); 8047c478bd9Sstevel@tonic-gate /* Put these residual pages into memory pool. */ 8057c478bd9Sstevel@tonic-gate mutex_enter(&pmem_mutex); 8067c478bd9Sstevel@tonic-gate mpool_append(&pp, r); 8077c478bd9Sstevel@tonic-gate mutex_exit(&pmem_mutex); 8087c478bd9Sstevel@tonic-gate } 8097c478bd9Sstevel@tonic-gate 8107c478bd9Sstevel@tonic-gate /* Freeing large pages in lpp and the associated allocation records in plp. */ 8117c478bd9Sstevel@tonic-gate static void 8127c478bd9Sstevel@tonic-gate lpp_free(page_t *lpp, pgcnt_t lpgs, pmem_lpg_t **plpp) 8137c478bd9Sstevel@tonic-gate { 8147c478bd9Sstevel@tonic-gate pgcnt_t i, j; 8157c478bd9Sstevel@tonic-gate page_t *pp = lpp, *pp1; 8167c478bd9Sstevel@tonic-gate pmem_lpg_t *plp1, *plp2; 8177c478bd9Sstevel@tonic-gate 8187c478bd9Sstevel@tonic-gate for (i = 0; i < lpgs; i++) { 8197c478bd9Sstevel@tonic-gate for (j = 0; j < pmem_pgcnt; j++) { 8207c478bd9Sstevel@tonic-gate /* IO unlock and hashout this small page. */ 8217c478bd9Sstevel@tonic-gate page_io_unlock(pp); 8227c478bd9Sstevel@tonic-gate page_hashout(pp, NULL); 8237c478bd9Sstevel@tonic-gate pp1 = pp->p_next; 8247c478bd9Sstevel@tonic-gate pp->p_prev = pp->p_next = pp; 8257c478bd9Sstevel@tonic-gate pp = pp1; 8267c478bd9Sstevel@tonic-gate } 8277c478bd9Sstevel@tonic-gate /* Free one large page at one time. */ 8287c478bd9Sstevel@tonic-gate page_free_pages(lpp); 8297c478bd9Sstevel@tonic-gate lpp = pp; 8307c478bd9Sstevel@tonic-gate } 8317c478bd9Sstevel@tonic-gate /* Free associate pmem large page allocation records. */ 8327c478bd9Sstevel@tonic-gate for (plp1 = *plpp; *plpp; plp1 = plp2) { 8337c478bd9Sstevel@tonic-gate plp2 = plp1->pl_next; 8347c478bd9Sstevel@tonic-gate pmem_lpg_free(plpp, plp1); 8357c478bd9Sstevel@tonic-gate } 8367c478bd9Sstevel@tonic-gate } 8377c478bd9Sstevel@tonic-gate 8387c478bd9Sstevel@tonic-gate /* 8397c478bd9Sstevel@tonic-gate * IOlock and hashin all pages in tlist, associate them with vnode *pvnp 8407c478bd9Sstevel@tonic-gate * and offset starting with *poffp. Update allocation records accordingly at 8417c478bd9Sstevel@tonic-gate * the same time. 8427c478bd9Sstevel@tonic-gate */ 8437c478bd9Sstevel@tonic-gate static void 8447c478bd9Sstevel@tonic-gate tlist_in(page_t *tlist, pgcnt_t tpages, vnode_t *pvnp, u_offset_t *poffp) 8457c478bd9Sstevel@tonic-gate { 8467c478bd9Sstevel@tonic-gate page_t *pp; 8477c478bd9Sstevel@tonic-gate pgcnt_t i = 0; 8487c478bd9Sstevel@tonic-gate pmem_lpg_t *plp, *last_pl = NULL; 8497c478bd9Sstevel@tonic-gate 8507c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pmem_mutex)); 8517c478bd9Sstevel@tonic-gate for (pp = tlist; i < tpages; i++) { 8527c478bd9Sstevel@tonic-gate ASSERT(FROM_LPG(pp)); 8537c478bd9Sstevel@tonic-gate page_io_lock(pp); 8547c478bd9Sstevel@tonic-gate (void) page_hashin(pp, pvnp, *poffp, NULL); 8557c478bd9Sstevel@tonic-gate plp = pmem_lpg_get(pmem_occ_lpgs, pp, &last_pl); 8567c478bd9Sstevel@tonic-gate /* Mark this page as allocated. */ 8577c478bd9Sstevel@tonic-gate BT_CLEAR(plp->pl_bitmap, PFIND(pp)); 8587c478bd9Sstevel@tonic-gate *poffp += PAGESIZE; 8597c478bd9Sstevel@tonic-gate pp = pp->p_next; 8607c478bd9Sstevel@tonic-gate } 8617c478bd9Sstevel@tonic-gate ASSERT(pp == tlist); 8627c478bd9Sstevel@tonic-gate } 8637c478bd9Sstevel@tonic-gate 8647c478bd9Sstevel@tonic-gate /* 8657c478bd9Sstevel@tonic-gate * IOunlock and hashout all pages in tlist, update allocation records 8667c478bd9Sstevel@tonic-gate * accordingly at the same time. 8677c478bd9Sstevel@tonic-gate */ 8687c478bd9Sstevel@tonic-gate static void 8697c478bd9Sstevel@tonic-gate tlist_out(page_t *tlist, pgcnt_t tpages) 8707c478bd9Sstevel@tonic-gate { 8717c478bd9Sstevel@tonic-gate page_t *pp; 8727c478bd9Sstevel@tonic-gate pgcnt_t i = 0; 8737c478bd9Sstevel@tonic-gate pmem_lpg_t *plp, *last_pl = NULL; 8747c478bd9Sstevel@tonic-gate 8757c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pmem_mutex)); 8767c478bd9Sstevel@tonic-gate for (pp = tlist; i < tpages; i++) { 8777c478bd9Sstevel@tonic-gate ASSERT(FROM_LPG(pp)); 8787c478bd9Sstevel@tonic-gate page_io_unlock(pp); 8797c478bd9Sstevel@tonic-gate page_hashout(pp, NULL); 8807c478bd9Sstevel@tonic-gate plp = pmem_lpg_get(pmem_occ_lpgs, pp, &last_pl); 8817c478bd9Sstevel@tonic-gate /* Mark this page as free. */ 8827c478bd9Sstevel@tonic-gate BT_SET(plp->pl_bitmap, PFIND(pp)); 8837c478bd9Sstevel@tonic-gate pp = pp->p_next; 8847c478bd9Sstevel@tonic-gate } 8857c478bd9Sstevel@tonic-gate ASSERT(pp == tlist); 8867c478bd9Sstevel@tonic-gate } 887