17a59208dSJustin T. Gibbs /* 22a888f93SKenneth D. Merry * Copyright (c) 1997, 1998, 1999 Kenneth D. Merry. 37a59208dSJustin T. Gibbs * All rights reserved. 47a59208dSJustin T. Gibbs * 57a59208dSJustin T. Gibbs * Redistribution and use in source and binary forms, with or without 67a59208dSJustin T. Gibbs * modification, are permitted provided that the following conditions 77a59208dSJustin T. Gibbs * are met: 87a59208dSJustin T. Gibbs * 1. Redistributions of source code must retain the above copyright 97a59208dSJustin T. Gibbs * notice, this list of conditions and the following disclaimer. 107a59208dSJustin T. Gibbs * 2. Redistributions in binary form must reproduce the above copyright 117a59208dSJustin T. Gibbs * notice, this list of conditions and the following disclaimer in the 127a59208dSJustin T. Gibbs * documentation and/or other materials provided with the distribution. 137a59208dSJustin T. Gibbs * 3. The name of the author may not be used to endorse or promote products 147a59208dSJustin T. Gibbs * derived from this software without specific prior written permission. 157a59208dSJustin T. Gibbs * 167a59208dSJustin T. Gibbs * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 177a59208dSJustin T. Gibbs * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 187a59208dSJustin T. Gibbs * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 197a59208dSJustin T. Gibbs * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 207a59208dSJustin T. Gibbs * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 217a59208dSJustin T. Gibbs * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 227a59208dSJustin T. Gibbs * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 237a59208dSJustin T. Gibbs * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 247a59208dSJustin T. Gibbs * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 257a59208dSJustin T. Gibbs * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 267a59208dSJustin T. Gibbs * SUCH DAMAGE. 277a59208dSJustin T. Gibbs */ 287a59208dSJustin T. Gibbs 29677b542eSDavid E. O'Brien #include <sys/cdefs.h> 30677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 31677b542eSDavid E. O'Brien 327a59208dSJustin T. Gibbs #include <sys/param.h> 337a59208dSJustin T. Gibbs #include <sys/kernel.h> 347a59208dSJustin T. Gibbs #include <sys/systm.h> 359626b608SPoul-Henning Kamp #include <sys/bio.h> 36224d5539SPoul-Henning Kamp #include <sys/devicestat.h> 377a59208dSJustin T. Gibbs #include <sys/sysctl.h> 38c7e73d59SPoul-Henning Kamp #include <sys/malloc.h> 39224d5539SPoul-Henning Kamp #include <sys/lock.h> 40224d5539SPoul-Henning Kamp #include <sys/mutex.h> 41c7e73d59SPoul-Henning Kamp #include <sys/conf.h> 42c7e73d59SPoul-Henning Kamp #include <vm/vm.h> 43c7e73d59SPoul-Henning Kamp #include <vm/pmap.h> 447a59208dSJustin T. Gibbs 45224d5539SPoul-Henning Kamp #include <machine/atomic.h> 467a59208dSJustin T. Gibbs 477a59208dSJustin T. Gibbs static int devstat_num_devs; 48037c3d0fSPoul-Henning Kamp static long devstat_generation = 1; 497a59208dSJustin T. Gibbs static int devstat_version = DEVSTAT_VERSION; 507a59208dSJustin T. Gibbs static int devstat_current_devnumber; 51224d5539SPoul-Henning Kamp static struct mtx devstat_mutex; 527a59208dSJustin T. Gibbs 53938a4e5cSThomas Moestl static struct devstatlist device_statq; 54c7e73d59SPoul-Henning Kamp static struct devstat *devstat_alloc(void); 55c7e73d59SPoul-Henning Kamp static void devstat_free(struct devstat *); 56538aabaaSPoul-Henning Kamp static void devstat_add_entry(struct devstat *ds, const void *dev_name, 57f37de122SPoul-Henning Kamp int unit_number, u_int32_t block_size, 58f37de122SPoul-Henning Kamp devstat_support_flags flags, 59f37de122SPoul-Henning Kamp devstat_type_flags device_type, 60f37de122SPoul-Henning Kamp devstat_priority priority); 61c7e73d59SPoul-Henning Kamp 62c7e73d59SPoul-Henning Kamp /* 63c7e73d59SPoul-Henning Kamp * Allocate a devstat and initialize it 64c7e73d59SPoul-Henning Kamp */ 65c7e73d59SPoul-Henning Kamp struct devstat * 66538aabaaSPoul-Henning Kamp devstat_new_entry(const void *dev_name, 67c7e73d59SPoul-Henning Kamp int unit_number, u_int32_t block_size, 68c7e73d59SPoul-Henning Kamp devstat_support_flags flags, 69c7e73d59SPoul-Henning Kamp devstat_type_flags device_type, 70c7e73d59SPoul-Henning Kamp devstat_priority priority) 71c7e73d59SPoul-Henning Kamp { 72c7e73d59SPoul-Henning Kamp struct devstat *ds; 73224d5539SPoul-Henning Kamp static int once; 74224d5539SPoul-Henning Kamp 75224d5539SPoul-Henning Kamp if (!once) { 76224d5539SPoul-Henning Kamp STAILQ_INIT(&device_statq); 77224d5539SPoul-Henning Kamp mtx_init(&devstat_mutex, "devstat", NULL, MTX_DEF); 78224d5539SPoul-Henning Kamp once = 1; 79224d5539SPoul-Henning Kamp } 80224d5539SPoul-Henning Kamp mtx_assert(&devstat_mutex, MA_NOTOWNED); 81c7e73d59SPoul-Henning Kamp 82c7e73d59SPoul-Henning Kamp ds = devstat_alloc(); 83224d5539SPoul-Henning Kamp mtx_lock(&devstat_mutex); 84c967bee7SPoul-Henning Kamp if (unit_number == -1) { 85c967bee7SPoul-Henning Kamp ds->id = dev_name; 86c967bee7SPoul-Henning Kamp binuptime(&ds->creation_time); 87c967bee7SPoul-Henning Kamp devstat_generation++; 88c967bee7SPoul-Henning Kamp } else { 89c7e73d59SPoul-Henning Kamp devstat_add_entry(ds, dev_name, unit_number, block_size, 90c7e73d59SPoul-Henning Kamp flags, device_type, priority); 91c967bee7SPoul-Henning Kamp } 92224d5539SPoul-Henning Kamp mtx_unlock(&devstat_mutex); 93c7e73d59SPoul-Henning Kamp return (ds); 94c7e73d59SPoul-Henning Kamp } 957a59208dSJustin T. Gibbs 967a59208dSJustin T. Gibbs /* 977a59208dSJustin T. Gibbs * Take a malloced and zeroed devstat structure given to us, fill it in 987a59208dSJustin T. Gibbs * and add it to the queue of devices. 997a59208dSJustin T. Gibbs */ 100f37de122SPoul-Henning Kamp static void 101538aabaaSPoul-Henning Kamp devstat_add_entry(struct devstat *ds, const void *dev_name, 1027a59208dSJustin T. Gibbs int unit_number, u_int32_t block_size, 1037a59208dSJustin T. Gibbs devstat_support_flags flags, 1042a888f93SKenneth D. Merry devstat_type_flags device_type, 1052a888f93SKenneth D. Merry devstat_priority priority) 1067a59208dSJustin T. Gibbs { 1077a59208dSJustin T. Gibbs struct devstatlist *devstat_head; 1082a888f93SKenneth D. Merry struct devstat *ds_tmp; 1097a59208dSJustin T. Gibbs 110224d5539SPoul-Henning Kamp mtx_assert(&devstat_mutex, MA_OWNED); 1117a59208dSJustin T. Gibbs devstat_num_devs++; 1127a59208dSJustin T. Gibbs 1137a59208dSJustin T. Gibbs devstat_head = &device_statq; 1147a59208dSJustin T. Gibbs 1152a888f93SKenneth D. Merry /* 1162a888f93SKenneth D. Merry * Priority sort. Each driver passes in its priority when it adds 1172a888f93SKenneth D. Merry * its devstat entry. Drivers are sorted first by priority, and 1182a888f93SKenneth D. Merry * then by probe order. 1192a888f93SKenneth D. Merry * 1202a888f93SKenneth D. Merry * For the first device, we just insert it, since the priority 1212a888f93SKenneth D. Merry * doesn't really matter yet. Subsequent devices are inserted into 1222a888f93SKenneth D. Merry * the list using the order outlined above. 1232a888f93SKenneth D. Merry */ 1242a888f93SKenneth D. Merry if (devstat_num_devs == 1) 1257a59208dSJustin T. Gibbs STAILQ_INSERT_TAIL(devstat_head, ds, dev_links); 1262a888f93SKenneth D. Merry else { 12737d40066SPoul-Henning Kamp STAILQ_FOREACH(ds_tmp, devstat_head, dev_links) { 1282a888f93SKenneth D. Merry struct devstat *ds_next; 1292a888f93SKenneth D. Merry 1302a888f93SKenneth D. Merry ds_next = STAILQ_NEXT(ds_tmp, dev_links); 1312a888f93SKenneth D. Merry 1322a888f93SKenneth D. Merry /* 1332a888f93SKenneth D. Merry * If we find a break between higher and lower 1342a888f93SKenneth D. Merry * priority items, and if this item fits in the 1352a888f93SKenneth D. Merry * break, insert it. This also applies if the 1362a888f93SKenneth D. Merry * "lower priority item" is the end of the list. 1372a888f93SKenneth D. Merry */ 1382a888f93SKenneth D. Merry if ((priority <= ds_tmp->priority) 1392a888f93SKenneth D. Merry && ((ds_next == NULL) 1402a888f93SKenneth D. Merry || (priority > ds_next->priority))) { 1412a888f93SKenneth D. Merry STAILQ_INSERT_AFTER(devstat_head, ds_tmp, ds, 1422a888f93SKenneth D. Merry dev_links); 1432a888f93SKenneth D. Merry break; 1442a888f93SKenneth D. Merry } else if (priority > ds_tmp->priority) { 1452a888f93SKenneth D. Merry /* 1462a888f93SKenneth D. Merry * If this is the case, we should be able 1472a888f93SKenneth D. Merry * to insert ourselves at the head of the 1482a888f93SKenneth D. Merry * list. If we can't, something is wrong. 1492a888f93SKenneth D. Merry */ 1502a888f93SKenneth D. Merry if (ds_tmp == STAILQ_FIRST(devstat_head)) { 1512a888f93SKenneth D. Merry STAILQ_INSERT_HEAD(devstat_head, 1522a888f93SKenneth D. Merry ds, dev_links); 1532a888f93SKenneth D. Merry break; 1542a888f93SKenneth D. Merry } else { 1552a888f93SKenneth D. Merry STAILQ_INSERT_TAIL(devstat_head, 1562a888f93SKenneth D. Merry ds, dev_links); 1572a888f93SKenneth D. Merry printf("devstat_add_entry: HELP! " 1582a888f93SKenneth D. Merry "sorting problem detected " 159538aabaaSPoul-Henning Kamp "for name %p unit %d\n", 160538aabaaSPoul-Henning Kamp dev_name, unit_number); 1612a888f93SKenneth D. Merry break; 1622a888f93SKenneth D. Merry } 1632a888f93SKenneth D. Merry } 1642a888f93SKenneth D. Merry } 1652a888f93SKenneth D. Merry } 1667a59208dSJustin T. Gibbs 1677a59208dSJustin T. Gibbs ds->device_number = devstat_current_devnumber++; 1687a59208dSJustin T. Gibbs ds->unit_number = unit_number; 169e80fb434SRobert Drehmel strlcpy(ds->device_name, dev_name, DEVSTAT_NAME_LEN); 1707a59208dSJustin T. Gibbs ds->block_size = block_size; 1717a59208dSJustin T. Gibbs ds->flags = flags; 1727a59208dSJustin T. Gibbs ds->device_type = device_type; 1732a888f93SKenneth D. Merry ds->priority = priority; 1747194d335SPoul-Henning Kamp binuptime(&ds->creation_time); 175224d5539SPoul-Henning Kamp devstat_generation++; 1767a59208dSJustin T. Gibbs } 1777a59208dSJustin T. Gibbs 1787a59208dSJustin T. Gibbs /* 1797a59208dSJustin T. Gibbs * Remove a devstat structure from the list of devices. 1807a59208dSJustin T. Gibbs */ 1817a59208dSJustin T. Gibbs void 1827a59208dSJustin T. Gibbs devstat_remove_entry(struct devstat *ds) 1837a59208dSJustin T. Gibbs { 1847a59208dSJustin T. Gibbs struct devstatlist *devstat_head; 1857a59208dSJustin T. Gibbs 186224d5539SPoul-Henning Kamp mtx_assert(&devstat_mutex, MA_NOTOWNED); 1877a59208dSJustin T. Gibbs if (ds == NULL) 1887a59208dSJustin T. Gibbs return; 1897a59208dSJustin T. Gibbs 190224d5539SPoul-Henning Kamp mtx_lock(&devstat_mutex); 1917a59208dSJustin T. Gibbs 1927a59208dSJustin T. Gibbs devstat_head = &device_statq; 1937a59208dSJustin T. Gibbs 1947a59208dSJustin T. Gibbs /* Remove this entry from the devstat queue */ 195224d5539SPoul-Henning Kamp atomic_add_acq_int(&ds->sequence1, 1); 196c967bee7SPoul-Henning Kamp if (ds->id == NULL) { 197224d5539SPoul-Henning Kamp devstat_num_devs--; 198e3975643SJake Burkholder STAILQ_REMOVE(devstat_head, ds, devstat, dev_links); 199c967bee7SPoul-Henning Kamp } 200c7e73d59SPoul-Henning Kamp devstat_free(ds); 201224d5539SPoul-Henning Kamp devstat_generation++; 202224d5539SPoul-Henning Kamp mtx_unlock(&devstat_mutex); 2037a59208dSJustin T. Gibbs } 2047a59208dSJustin T. Gibbs 2057a59208dSJustin T. Gibbs /* 2067a59208dSJustin T. Gibbs * Record a transaction start. 2077194d335SPoul-Henning Kamp * 2087194d335SPoul-Henning Kamp * See comments for devstat_end_transaction(). Ordering is very important 2097194d335SPoul-Henning Kamp * here. 2107a59208dSJustin T. Gibbs */ 2117a59208dSJustin T. Gibbs void 2127194d335SPoul-Henning Kamp devstat_start_transaction(struct devstat *ds, struct bintime *now) 2137a59208dSJustin T. Gibbs { 214224d5539SPoul-Henning Kamp 215224d5539SPoul-Henning Kamp mtx_assert(&devstat_mutex, MA_NOTOWNED); 216224d5539SPoul-Henning Kamp 2177a59208dSJustin T. Gibbs /* sanity check */ 2187a59208dSJustin T. Gibbs if (ds == NULL) 2197a59208dSJustin T. Gibbs return; 2207a59208dSJustin T. Gibbs 221224d5539SPoul-Henning Kamp atomic_add_acq_int(&ds->sequence1, 1); 2227a59208dSJustin T. Gibbs /* 2237a59208dSJustin T. Gibbs * We only want to set the start time when we are going from idle 2247a59208dSJustin T. Gibbs * to busy. The start time is really the start of the latest busy 2257a59208dSJustin T. Gibbs * period. 2267a59208dSJustin T. Gibbs */ 2277194d335SPoul-Henning Kamp if (ds->start_count == ds->end_count) { 2287194d335SPoul-Henning Kamp if (now != NULL) 2297194d335SPoul-Henning Kamp ds->busy_from = *now; 2307194d335SPoul-Henning Kamp else 2317194d335SPoul-Henning Kamp binuptime(&ds->busy_from); 2327194d335SPoul-Henning Kamp } 2337194d335SPoul-Henning Kamp ds->start_count++; 234224d5539SPoul-Henning Kamp atomic_add_rel_int(&ds->sequence0, 1); 2357194d335SPoul-Henning Kamp } 2367194d335SPoul-Henning Kamp 2377194d335SPoul-Henning Kamp void 2387194d335SPoul-Henning Kamp devstat_start_transaction_bio(struct devstat *ds, struct bio *bp) 2397194d335SPoul-Henning Kamp { 2407194d335SPoul-Henning Kamp 241224d5539SPoul-Henning Kamp mtx_assert(&devstat_mutex, MA_NOTOWNED); 242224d5539SPoul-Henning Kamp 243224d5539SPoul-Henning Kamp /* sanity check */ 244224d5539SPoul-Henning Kamp if (ds == NULL) 245224d5539SPoul-Henning Kamp return; 246224d5539SPoul-Henning Kamp 2477194d335SPoul-Henning Kamp binuptime(&bp->bio_t0); 2487194d335SPoul-Henning Kamp devstat_start_transaction(ds, &bp->bio_t0); 2497a59208dSJustin T. Gibbs } 2507a59208dSJustin T. Gibbs 2517a59208dSJustin T. Gibbs /* 2527a59208dSJustin T. Gibbs * Record the ending of a transaction, and incrment the various counters. 2537194d335SPoul-Henning Kamp * 2547194d335SPoul-Henning Kamp * Ordering in this function, and in devstat_start_transaction() is VERY 2557194d335SPoul-Henning Kamp * important. The idea here is to run without locks, so we are very 2567194d335SPoul-Henning Kamp * careful to only modify some fields on the way "down" (i.e. at 2577194d335SPoul-Henning Kamp * transaction start) and some fields on the way "up" (i.e. at transaction 2587194d335SPoul-Henning Kamp * completion). One exception is busy_from, which we only modify in 2597194d335SPoul-Henning Kamp * devstat_start_transaction() when there are no outstanding transactions, 2607194d335SPoul-Henning Kamp * and thus it can't be modified in devstat_end_transaction() 2617194d335SPoul-Henning Kamp * simultaneously. 262224d5539SPoul-Henning Kamp * 263224d5539SPoul-Henning Kamp * The sequence0 and sequence1 fields are provided to enable an application 264224d5539SPoul-Henning Kamp * spying on the structures with mmap(2) to tell when a structure is in a 265224d5539SPoul-Henning Kamp * consistent state or not. 266224d5539SPoul-Henning Kamp * 267224d5539SPoul-Henning Kamp * For this to work 100% reliably, it is important that the two fields 268224d5539SPoul-Henning Kamp * are at opposite ends of the structure and that they are incremented 269224d5539SPoul-Henning Kamp * in the opposite order of how a memcpy(3) in userland would copy them. 270224d5539SPoul-Henning Kamp * We assume that the copying happens front to back, but there is actually 271224d5539SPoul-Henning Kamp * no way short of writing your own memcpy(3) replacement to guarantee 272224d5539SPoul-Henning Kamp * this will be the case. 273224d5539SPoul-Henning Kamp * 274224d5539SPoul-Henning Kamp * In addition to this, being a kind of locks, they must be updated with 275224d5539SPoul-Henning Kamp * atomic instructions using appropriate memory barriers. 2767a59208dSJustin T. Gibbs */ 2777a59208dSJustin T. Gibbs void 2787a59208dSJustin T. Gibbs devstat_end_transaction(struct devstat *ds, u_int32_t bytes, 2797194d335SPoul-Henning Kamp devstat_tag_type tag_type, devstat_trans_flags flags, 2807194d335SPoul-Henning Kamp struct bintime *now, struct bintime *then) 2817a59208dSJustin T. Gibbs { 2827194d335SPoul-Henning Kamp struct bintime dt, lnow; 2837a59208dSJustin T. Gibbs 284224d5539SPoul-Henning Kamp mtx_assert(&devstat_mutex, MA_NOTOWNED); 285224d5539SPoul-Henning Kamp 2867a59208dSJustin T. Gibbs /* sanity check */ 2877a59208dSJustin T. Gibbs if (ds == NULL) 2887a59208dSJustin T. Gibbs return; 2897a59208dSJustin T. Gibbs 2907194d335SPoul-Henning Kamp if (now == NULL) { 2917194d335SPoul-Henning Kamp now = &lnow; 2927194d335SPoul-Henning Kamp binuptime(now); 2937194d335SPoul-Henning Kamp } 2947a59208dSJustin T. Gibbs 295224d5539SPoul-Henning Kamp atomic_add_acq_int(&ds->sequence1, 1); 2967194d335SPoul-Henning Kamp /* Update byte and operations counts */ 2977194d335SPoul-Henning Kamp ds->bytes[flags] += bytes; 2987194d335SPoul-Henning Kamp ds->operations[flags]++; 2997a59208dSJustin T. Gibbs 3007a59208dSJustin T. Gibbs /* 3017a59208dSJustin T. Gibbs * Keep a count of the various tag types sent. 3027a59208dSJustin T. Gibbs */ 3038db3b947SPoul-Henning Kamp if ((ds->flags & DEVSTAT_NO_ORDERED_TAGS) == 0 && 304f80d57eeSPoul-Henning Kamp tag_type != DEVSTAT_TAG_NONE) 3057a59208dSJustin T. Gibbs ds->tag_types[tag_type]++; 3067a59208dSJustin T. Gibbs 3077194d335SPoul-Henning Kamp if (then != NULL) { 3087194d335SPoul-Henning Kamp /* Update duration of operations */ 3097194d335SPoul-Henning Kamp dt = *now; 3107194d335SPoul-Henning Kamp bintime_sub(&dt, then); 3117194d335SPoul-Henning Kamp bintime_add(&ds->duration[flags], &dt); 3127194d335SPoul-Henning Kamp } 3137a59208dSJustin T. Gibbs 3147194d335SPoul-Henning Kamp /* Accumulate busy time */ 3157194d335SPoul-Henning Kamp dt = *now; 3167194d335SPoul-Henning Kamp bintime_sub(&dt, &ds->busy_from); 3177194d335SPoul-Henning Kamp bintime_add(&ds->busy_time, &dt); 3187194d335SPoul-Henning Kamp ds->busy_from = *now; 3197194d335SPoul-Henning Kamp 3207194d335SPoul-Henning Kamp ds->end_count++; 321224d5539SPoul-Henning Kamp atomic_add_rel_int(&ds->sequence0, 1); 3227a59208dSJustin T. Gibbs } 3237a59208dSJustin T. Gibbs 324f80d57eeSPoul-Henning Kamp void 325282ac69eSPoul-Henning Kamp devstat_end_transaction_bio(struct devstat *ds, struct bio *bp) 326282ac69eSPoul-Henning Kamp { 327282ac69eSPoul-Henning Kamp devstat_trans_flags flg; 328282ac69eSPoul-Henning Kamp 329224d5539SPoul-Henning Kamp mtx_assert(&devstat_mutex, MA_NOTOWNED); 330224d5539SPoul-Henning Kamp 331224d5539SPoul-Henning Kamp /* sanity check */ 332224d5539SPoul-Henning Kamp if (ds == NULL) 333224d5539SPoul-Henning Kamp return; 334224d5539SPoul-Henning Kamp 335282ac69eSPoul-Henning Kamp if (bp->bio_cmd == BIO_DELETE) 336282ac69eSPoul-Henning Kamp flg = DEVSTAT_FREE; 337282ac69eSPoul-Henning Kamp else if (bp->bio_cmd == BIO_READ) 338282ac69eSPoul-Henning Kamp flg = DEVSTAT_READ; 339224d5539SPoul-Henning Kamp else if (bp->bio_cmd == BIO_WRITE) 340282ac69eSPoul-Henning Kamp flg = DEVSTAT_WRITE; 341224d5539SPoul-Henning Kamp else 342224d5539SPoul-Henning Kamp flg = DEVSTAT_NO_DATA; 343282ac69eSPoul-Henning Kamp 344282ac69eSPoul-Henning Kamp devstat_end_transaction(ds, bp->bio_bcount - bp->bio_resid, 3457194d335SPoul-Henning Kamp DEVSTAT_TAG_SIMPLE, flg, NULL, &bp->bio_t0); 346282ac69eSPoul-Henning Kamp } 347282ac69eSPoul-Henning Kamp 3487a59208dSJustin T. Gibbs /* 3497a59208dSJustin T. Gibbs * This is the sysctl handler for the devstat package. The data pushed out 3507a59208dSJustin T. Gibbs * on the kern.devstat.all sysctl variable consists of the current devstat 3517a59208dSJustin T. Gibbs * generation number, and then an array of devstat structures, one for each 3527a59208dSJustin T. Gibbs * device in the system. 3537a59208dSJustin T. Gibbs * 354224d5539SPoul-Henning Kamp * This is more cryptic that obvious, but basically we neither can nor 355224d5539SPoul-Henning Kamp * want to hold the devstat_mutex for any amount of time, so we grab it 356224d5539SPoul-Henning Kamp * only when we need to and keep an eye on devstat_generation all the time. 3577a59208dSJustin T. Gibbs */ 3587a59208dSJustin T. Gibbs static int 35982d9ae4eSPoul-Henning Kamp sysctl_devstat(SYSCTL_HANDLER_ARGS) 3607a59208dSJustin T. Gibbs { 361224d5539SPoul-Henning Kamp int error; 3626e17a0d7SHartmut Brandt long mygen; 3637a59208dSJustin T. Gibbs struct devstat *nds; 364224d5539SPoul-Henning Kamp 365224d5539SPoul-Henning Kamp mtx_assert(&devstat_mutex, MA_NOTOWNED); 3667a59208dSJustin T. Gibbs 3677a59208dSJustin T. Gibbs /* 368224d5539SPoul-Henning Kamp * XXX devstat_generation should really be "volatile" but that 369224d5539SPoul-Henning Kamp * XXX freaks out the sysctl macro below. The places where we 370224d5539SPoul-Henning Kamp * XXX change it and inspect it are bracketed in the mutex which 371224d5539SPoul-Henning Kamp * XXX guarantees us proper write barriers. I don't belive the 372224d5539SPoul-Henning Kamp * XXX compiler is allowed to optimize mygen away across calls 373224d5539SPoul-Henning Kamp * XXX to other functions, so the following is belived to be safe. 3747a59208dSJustin T. Gibbs */ 375224d5539SPoul-Henning Kamp mygen = devstat_generation; 3767a59208dSJustin T. Gibbs 377224d5539SPoul-Henning Kamp error = SYSCTL_OUT(req, &mygen, sizeof(mygen)); 378224d5539SPoul-Henning Kamp 379037c3d0fSPoul-Henning Kamp if (devstat_num_devs == 0) 380037c3d0fSPoul-Henning Kamp return(0); 381037c3d0fSPoul-Henning Kamp 382224d5539SPoul-Henning Kamp if (error != 0) 383224d5539SPoul-Henning Kamp return (error); 384224d5539SPoul-Henning Kamp 385224d5539SPoul-Henning Kamp mtx_lock(&devstat_mutex); 386224d5539SPoul-Henning Kamp nds = STAILQ_FIRST(&device_statq); 387224d5539SPoul-Henning Kamp if (mygen != devstat_generation) 388224d5539SPoul-Henning Kamp error = EBUSY; 389224d5539SPoul-Henning Kamp mtx_unlock(&devstat_mutex); 390224d5539SPoul-Henning Kamp 391224d5539SPoul-Henning Kamp if (error != 0) 392224d5539SPoul-Henning Kamp return (error); 393224d5539SPoul-Henning Kamp 394224d5539SPoul-Henning Kamp for (;nds != NULL;) { 3957a59208dSJustin T. Gibbs error = SYSCTL_OUT(req, nds, sizeof(struct devstat)); 396224d5539SPoul-Henning Kamp if (error != 0) 397224d5539SPoul-Henning Kamp return (error); 398224d5539SPoul-Henning Kamp mtx_lock(&devstat_mutex); 399224d5539SPoul-Henning Kamp if (mygen != devstat_generation) 400224d5539SPoul-Henning Kamp error = EBUSY; 401224d5539SPoul-Henning Kamp else 402224d5539SPoul-Henning Kamp nds = STAILQ_NEXT(nds, dev_links); 403224d5539SPoul-Henning Kamp mtx_unlock(&devstat_mutex); 404224d5539SPoul-Henning Kamp if (error != 0) 405224d5539SPoul-Henning Kamp return (error); 406224d5539SPoul-Henning Kamp } 4077a59208dSJustin T. Gibbs return(error); 4087a59208dSJustin T. Gibbs } 4097a59208dSJustin T. Gibbs 4107a59208dSJustin T. Gibbs /* 4117a59208dSJustin T. Gibbs * Sysctl entries for devstat. The first one is a node that all the rest 4127a59208dSJustin T. Gibbs * hang off of. 4137a59208dSJustin T. Gibbs */ 4147a59208dSJustin T. Gibbs SYSCTL_NODE(_kern, OID_AUTO, devstat, CTLFLAG_RD, 0, "Device Statistics"); 4157a59208dSJustin T. Gibbs 4167a59208dSJustin T. Gibbs SYSCTL_PROC(_kern_devstat, OID_AUTO, all, CTLFLAG_RD|CTLTYPE_OPAQUE, 4173d177f46SBill Fumerola 0, 0, sysctl_devstat, "S,devstat", "All devices in the devstat list"); 4187a59208dSJustin T. Gibbs /* 4197a59208dSJustin T. Gibbs * Export the number of devices in the system so that userland utilities 4207a59208dSJustin T. Gibbs * can determine how much memory to allocate to hold all the devices. 4217a59208dSJustin T. Gibbs */ 4223d177f46SBill Fumerola SYSCTL_INT(_kern_devstat, OID_AUTO, numdevs, CTLFLAG_RD, 4233d177f46SBill Fumerola &devstat_num_devs, 0, "Number of devices in the devstat list"); 4246e17a0d7SHartmut Brandt SYSCTL_LONG(_kern_devstat, OID_AUTO, generation, CTLFLAG_RD, 4259701cd40SJohn Baldwin &devstat_generation, 0, "Devstat list generation"); 4263d177f46SBill Fumerola SYSCTL_INT(_kern_devstat, OID_AUTO, version, CTLFLAG_RD, 4273d177f46SBill Fumerola &devstat_version, 0, "Devstat list version number"); 428c7e73d59SPoul-Henning Kamp 429224d5539SPoul-Henning Kamp /* 430224d5539SPoul-Henning Kamp * Allocator for struct devstat structures. We sub-allocate these from pages 431224d5539SPoul-Henning Kamp * which we get from malloc. These pages are exported for mmap(2)'ing through 432224d5539SPoul-Henning Kamp * a miniature device driver 433224d5539SPoul-Henning Kamp */ 434224d5539SPoul-Henning Kamp 435c7e73d59SPoul-Henning Kamp #define statsperpage (PAGE_SIZE / sizeof(struct devstat)) 436c7e73d59SPoul-Henning Kamp 437c7e73d59SPoul-Henning Kamp static d_mmap_t devstat_mmap; 438c7e73d59SPoul-Henning Kamp 439c7e73d59SPoul-Henning Kamp static struct cdevsw devstat_cdevsw = { 440c7e73d59SPoul-Henning Kamp .d_open = nullopen, 441c7e73d59SPoul-Henning Kamp .d_close = nullclose, 442c7e73d59SPoul-Henning Kamp .d_mmap = devstat_mmap, 443c7e73d59SPoul-Henning Kamp .d_name = "devstat", 444c7e73d59SPoul-Henning Kamp }; 445c7e73d59SPoul-Henning Kamp 446c7e73d59SPoul-Henning Kamp struct statspage { 447c7e73d59SPoul-Henning Kamp TAILQ_ENTRY(statspage) list; 448c7e73d59SPoul-Henning Kamp struct devstat *stat; 449c7e73d59SPoul-Henning Kamp u_int nfree; 450c7e73d59SPoul-Henning Kamp }; 451c7e73d59SPoul-Henning Kamp 452c7e73d59SPoul-Henning Kamp static TAILQ_HEAD(, statspage) pagelist = TAILQ_HEAD_INITIALIZER(pagelist); 453c7e73d59SPoul-Henning Kamp static MALLOC_DEFINE(M_DEVSTAT, "devstat", "Device statistics"); 454c7e73d59SPoul-Henning Kamp 455c7e73d59SPoul-Henning Kamp static int 456227f9a1cSJake Burkholder devstat_mmap(dev_t dev, vm_offset_t offset, vm_paddr_t *paddr, int nprot) 457c7e73d59SPoul-Henning Kamp { 458c7e73d59SPoul-Henning Kamp struct statspage *spp; 459c7e73d59SPoul-Henning Kamp 460c7e73d59SPoul-Henning Kamp if (nprot != VM_PROT_READ) 461c7e73d59SPoul-Henning Kamp return (-1); 462c7e73d59SPoul-Henning Kamp TAILQ_FOREACH(spp, &pagelist, list) { 463c7e73d59SPoul-Henning Kamp if (offset == 0) { 464c7e73d59SPoul-Henning Kamp *paddr = vtophys(spp->stat); 465c7e73d59SPoul-Henning Kamp return (0); 466c7e73d59SPoul-Henning Kamp } 467c7e73d59SPoul-Henning Kamp offset -= PAGE_SIZE; 468c7e73d59SPoul-Henning Kamp } 469c7e73d59SPoul-Henning Kamp return (-1); 470c7e73d59SPoul-Henning Kamp } 471c7e73d59SPoul-Henning Kamp 472c7e73d59SPoul-Henning Kamp static struct devstat * 473c7e73d59SPoul-Henning Kamp devstat_alloc(void) 474c7e73d59SPoul-Henning Kamp { 475c7e73d59SPoul-Henning Kamp struct devstat *dsp; 476c7e73d59SPoul-Henning Kamp struct statspage *spp; 477c7e73d59SPoul-Henning Kamp u_int u; 478c7e73d59SPoul-Henning Kamp static int once; 479c7e73d59SPoul-Henning Kamp 480224d5539SPoul-Henning Kamp mtx_assert(&devstat_mutex, MA_NOTOWNED); 481c7e73d59SPoul-Henning Kamp if (!once) { 482c7e73d59SPoul-Henning Kamp make_dev(&devstat_cdevsw, 0, 4835fa5746dSPoul-Henning Kamp UID_ROOT, GID_WHEEL, 0400, DEVSTAT_DEVICE_NAME); 484224d5539SPoul-Henning Kamp once = 1; 485c7e73d59SPoul-Henning Kamp } 486224d5539SPoul-Henning Kamp mtx_lock(&devstat_mutex); 487224d5539SPoul-Henning Kamp for (;;) { 488c7e73d59SPoul-Henning Kamp TAILQ_FOREACH(spp, &pagelist, list) { 489c7e73d59SPoul-Henning Kamp if (spp->nfree > 0) 490c7e73d59SPoul-Henning Kamp break; 491c7e73d59SPoul-Henning Kamp } 492224d5539SPoul-Henning Kamp if (spp != NULL) 493224d5539SPoul-Henning Kamp break; 494224d5539SPoul-Henning Kamp /* 495224d5539SPoul-Henning Kamp * We had no free slot in any of our pages, drop the mutex 496224d5539SPoul-Henning Kamp * and get another page. In theory we could have more than 497224d5539SPoul-Henning Kamp * one process doing this at the same time and consequently 498224d5539SPoul-Henning Kamp * we may allocate more pages than we will need. That is 499224d5539SPoul-Henning Kamp * Just Too Bad[tm], we can live with that. 500224d5539SPoul-Henning Kamp */ 501224d5539SPoul-Henning Kamp mtx_unlock(&devstat_mutex); 502c7e73d59SPoul-Henning Kamp spp = malloc(sizeof *spp, M_DEVSTAT, M_ZERO | M_WAITOK); 503c7e73d59SPoul-Henning Kamp spp->stat = malloc(PAGE_SIZE, M_DEVSTAT, M_ZERO | M_WAITOK); 504c7e73d59SPoul-Henning Kamp spp->nfree = statsperpage; 505224d5539SPoul-Henning Kamp mtx_lock(&devstat_mutex); 506224d5539SPoul-Henning Kamp /* 507224d5539SPoul-Henning Kamp * It would make more sense to add the new page at the head 508224d5539SPoul-Henning Kamp * but the order on the list determine the sequence of the 509224d5539SPoul-Henning Kamp * mapping so we can't do that. 510224d5539SPoul-Henning Kamp */ 511224d5539SPoul-Henning Kamp TAILQ_INSERT_TAIL(&pagelist, spp, list); 512c7e73d59SPoul-Henning Kamp } 513c7e73d59SPoul-Henning Kamp dsp = spp->stat; 514c7e73d59SPoul-Henning Kamp for (u = 0; u < statsperpage; u++) { 515c7e73d59SPoul-Henning Kamp if (dsp->allocated == 0) 516c7e73d59SPoul-Henning Kamp break; 517c7e73d59SPoul-Henning Kamp dsp++; 518c7e73d59SPoul-Henning Kamp } 519c7e73d59SPoul-Henning Kamp spp->nfree--; 520c7e73d59SPoul-Henning Kamp dsp->allocated = 1; 521224d5539SPoul-Henning Kamp mtx_unlock(&devstat_mutex); 522c7e73d59SPoul-Henning Kamp return (dsp); 523c7e73d59SPoul-Henning Kamp } 524c7e73d59SPoul-Henning Kamp 525c7e73d59SPoul-Henning Kamp static void 526c7e73d59SPoul-Henning Kamp devstat_free(struct devstat *dsp) 527c7e73d59SPoul-Henning Kamp { 528c7e73d59SPoul-Henning Kamp struct statspage *spp; 529c7e73d59SPoul-Henning Kamp 530224d5539SPoul-Henning Kamp mtx_assert(&devstat_mutex, MA_OWNED); 531c7e73d59SPoul-Henning Kamp bzero(dsp, sizeof *dsp); 532c7e73d59SPoul-Henning Kamp TAILQ_FOREACH(spp, &pagelist, list) { 533c7e73d59SPoul-Henning Kamp if (dsp >= spp->stat && dsp < (spp->stat + statsperpage)) { 534c7e73d59SPoul-Henning Kamp spp->nfree++; 535c7e73d59SPoul-Henning Kamp return; 536c7e73d59SPoul-Henning Kamp } 537c7e73d59SPoul-Henning Kamp } 538c7e73d59SPoul-Henning Kamp } 5397194d335SPoul-Henning Kamp 5407194d335SPoul-Henning Kamp SYSCTL_INT(_debug_sizeof, OID_AUTO, devstat, CTLFLAG_RD, 5417194d335SPoul-Henning Kamp 0, sizeof(struct devstat), "sizeof(struct devstat)"); 542