19454b2d8SWarner Losh /*- 28a36da99SPedro F. Giffuni * SPDX-License-Identifier: BSD-3-Clause 38a36da99SPedro F. Giffuni * 42a888f93SKenneth D. Merry * Copyright (c) 1997, 1998, 1999 Kenneth D. Merry. 57a59208dSJustin T. Gibbs * All rights reserved. 67a59208dSJustin T. Gibbs * 77a59208dSJustin T. Gibbs * Redistribution and use in source and binary forms, with or without 87a59208dSJustin T. Gibbs * modification, are permitted provided that the following conditions 97a59208dSJustin T. Gibbs * are met: 107a59208dSJustin T. Gibbs * 1. Redistributions of source code must retain the above copyright 117a59208dSJustin T. Gibbs * notice, this list of conditions and the following disclaimer. 127a59208dSJustin T. Gibbs * 2. Redistributions in binary form must reproduce the above copyright 137a59208dSJustin T. Gibbs * notice, this list of conditions and the following disclaimer in the 147a59208dSJustin T. Gibbs * documentation and/or other materials provided with the distribution. 157a59208dSJustin T. Gibbs * 3. The name of the author may not be used to endorse or promote products 167a59208dSJustin T. Gibbs * derived from this software without specific prior written permission. 177a59208dSJustin T. Gibbs * 187a59208dSJustin T. Gibbs * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 197a59208dSJustin T. Gibbs * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 207a59208dSJustin T. Gibbs * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 217a59208dSJustin T. Gibbs * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 227a59208dSJustin T. Gibbs * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 237a59208dSJustin T. Gibbs * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 247a59208dSJustin T. Gibbs * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 257a59208dSJustin T. Gibbs * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 267a59208dSJustin T. Gibbs * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 277a59208dSJustin T. Gibbs * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 287a59208dSJustin T. Gibbs * SUCH DAMAGE. 297a59208dSJustin T. Gibbs */ 307a59208dSJustin T. Gibbs 31677b542eSDavid E. O'Brien #include <sys/cdefs.h> 32677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 33677b542eSDavid E. O'Brien 347a59208dSJustin T. Gibbs #include <sys/param.h> 357a59208dSJustin T. Gibbs #include <sys/kernel.h> 367a59208dSJustin T. Gibbs #include <sys/systm.h> 379626b608SPoul-Henning Kamp #include <sys/bio.h> 38224d5539SPoul-Henning Kamp #include <sys/devicestat.h> 392e1ae0b3SMark Johnston #include <sys/sdt.h> 407a59208dSJustin T. Gibbs #include <sys/sysctl.h> 41c7e73d59SPoul-Henning Kamp #include <sys/malloc.h> 42224d5539SPoul-Henning Kamp #include <sys/lock.h> 43224d5539SPoul-Henning Kamp #include <sys/mutex.h> 44c7e73d59SPoul-Henning Kamp #include <sys/conf.h> 45c7e73d59SPoul-Henning Kamp #include <vm/vm.h> 46c7e73d59SPoul-Henning Kamp #include <vm/pmap.h> 477a59208dSJustin T. Gibbs 48224d5539SPoul-Henning Kamp #include <machine/atomic.h> 497a59208dSJustin T. Gibbs 502e1ae0b3SMark Johnston SDT_PROVIDER_DEFINE(io); 513fac94baSGeorge V. Neville-Neil 52d9fae5abSAndriy Gapon SDT_PROBE_DEFINE2(io, , , start, "struct bio *", "struct devstat *"); 53d9fae5abSAndriy Gapon SDT_PROBE_DEFINE2(io, , , done, "struct bio *", "struct devstat *"); 54d9fae5abSAndriy Gapon SDT_PROBE_DEFINE2(io, , , wait__start, "struct bio *", 552e1ae0b3SMark Johnston "struct devstat *"); 56d9fae5abSAndriy Gapon SDT_PROBE_DEFINE2(io, , , wait__done, "struct bio *", 572e1ae0b3SMark Johnston "struct devstat *"); 583fac94baSGeorge V. Neville-Neil 592e1ae0b3SMark Johnston #define DTRACE_DEVSTAT_START() SDT_PROBE2(io, , , start, NULL, ds) 602e1ae0b3SMark Johnston #define DTRACE_DEVSTAT_BIO_START() SDT_PROBE2(io, , , start, bp, ds) 612e1ae0b3SMark Johnston #define DTRACE_DEVSTAT_DONE() SDT_PROBE2(io, , , done, NULL, ds) 622e1ae0b3SMark Johnston #define DTRACE_DEVSTAT_BIO_DONE() SDT_PROBE2(io, , , done, bp, ds) 63d9fae5abSAndriy Gapon #define DTRACE_DEVSTAT_WAIT_START() SDT_PROBE2(io, , , wait__start, NULL, ds) 64d9fae5abSAndriy Gapon #define DTRACE_DEVSTAT_WAIT_DONE() SDT_PROBE2(io, , , wait__done, NULL, ds) 653fac94baSGeorge V. Neville-Neil 667a59208dSJustin T. Gibbs static int devstat_num_devs; 67037c3d0fSPoul-Henning Kamp static long devstat_generation = 1; 687a59208dSJustin T. Gibbs static int devstat_version = DEVSTAT_VERSION; 697a59208dSJustin T. Gibbs static int devstat_current_devnumber; 70224d5539SPoul-Henning Kamp static struct mtx devstat_mutex; 7127c959cfSJustin T. Gibbs MTX_SYSINIT(devstat_mutex, &devstat_mutex, "devstat", MTX_DEF); 727a59208dSJustin T. Gibbs 7327c959cfSJustin T. Gibbs static struct devstatlist device_statq = STAILQ_HEAD_INITIALIZER(device_statq); 74c7e73d59SPoul-Henning Kamp static struct devstat *devstat_alloc(void); 75c7e73d59SPoul-Henning Kamp static void devstat_free(struct devstat *); 76538aabaaSPoul-Henning Kamp static void devstat_add_entry(struct devstat *ds, const void *dev_name, 7760ae52f7SEd Schouten int unit_number, uint32_t block_size, 78f37de122SPoul-Henning Kamp devstat_support_flags flags, 79f37de122SPoul-Henning Kamp devstat_type_flags device_type, 80f37de122SPoul-Henning Kamp devstat_priority priority); 81c7e73d59SPoul-Henning Kamp 82c7e73d59SPoul-Henning Kamp /* 83c7e73d59SPoul-Henning Kamp * Allocate a devstat and initialize it 84c7e73d59SPoul-Henning Kamp */ 85c7e73d59SPoul-Henning Kamp struct devstat * 86538aabaaSPoul-Henning Kamp devstat_new_entry(const void *dev_name, 8760ae52f7SEd Schouten int unit_number, uint32_t block_size, 88c7e73d59SPoul-Henning Kamp devstat_support_flags flags, 89c7e73d59SPoul-Henning Kamp devstat_type_flags device_type, 90c7e73d59SPoul-Henning Kamp devstat_priority priority) 91c7e73d59SPoul-Henning Kamp { 92c7e73d59SPoul-Henning Kamp struct devstat *ds; 93224d5539SPoul-Henning Kamp 94224d5539SPoul-Henning Kamp mtx_assert(&devstat_mutex, MA_NOTOWNED); 95c7e73d59SPoul-Henning Kamp 96c7e73d59SPoul-Henning Kamp ds = devstat_alloc(); 97224d5539SPoul-Henning Kamp mtx_lock(&devstat_mutex); 98c967bee7SPoul-Henning Kamp if (unit_number == -1) { 9940ea77a0SAlexander Motin ds->unit_number = unit_number; 100c967bee7SPoul-Henning Kamp ds->id = dev_name; 101c967bee7SPoul-Henning Kamp binuptime(&ds->creation_time); 102c967bee7SPoul-Henning Kamp devstat_generation++; 103c967bee7SPoul-Henning Kamp } else { 104c7e73d59SPoul-Henning Kamp devstat_add_entry(ds, dev_name, unit_number, block_size, 105c7e73d59SPoul-Henning Kamp flags, device_type, priority); 106c967bee7SPoul-Henning Kamp } 107224d5539SPoul-Henning Kamp mtx_unlock(&devstat_mutex); 108c7e73d59SPoul-Henning Kamp return (ds); 109c7e73d59SPoul-Henning Kamp } 1107a59208dSJustin T. Gibbs 1117a59208dSJustin T. Gibbs /* 1127a59208dSJustin T. Gibbs * Take a malloced and zeroed devstat structure given to us, fill it in 1137a59208dSJustin T. Gibbs * and add it to the queue of devices. 1147a59208dSJustin T. Gibbs */ 115f37de122SPoul-Henning Kamp static void 116538aabaaSPoul-Henning Kamp devstat_add_entry(struct devstat *ds, const void *dev_name, 11760ae52f7SEd Schouten int unit_number, uint32_t block_size, 1187a59208dSJustin T. Gibbs devstat_support_flags flags, 1192a888f93SKenneth D. Merry devstat_type_flags device_type, 1202a888f93SKenneth D. Merry devstat_priority priority) 1217a59208dSJustin T. Gibbs { 1227a59208dSJustin T. Gibbs struct devstatlist *devstat_head; 1232a888f93SKenneth D. Merry struct devstat *ds_tmp; 1247a59208dSJustin T. Gibbs 125224d5539SPoul-Henning Kamp mtx_assert(&devstat_mutex, MA_OWNED); 1267a59208dSJustin T. Gibbs devstat_num_devs++; 1277a59208dSJustin T. Gibbs 1287a59208dSJustin T. Gibbs devstat_head = &device_statq; 1297a59208dSJustin T. Gibbs 1302a888f93SKenneth D. Merry /* 1312a888f93SKenneth D. Merry * Priority sort. Each driver passes in its priority when it adds 1322a888f93SKenneth D. Merry * its devstat entry. Drivers are sorted first by priority, and 1332a888f93SKenneth D. Merry * then by probe order. 1342a888f93SKenneth D. Merry * 1352a888f93SKenneth D. Merry * For the first device, we just insert it, since the priority 1362a888f93SKenneth D. Merry * doesn't really matter yet. Subsequent devices are inserted into 1372a888f93SKenneth D. Merry * the list using the order outlined above. 1382a888f93SKenneth D. Merry */ 1392a888f93SKenneth D. Merry if (devstat_num_devs == 1) 1407a59208dSJustin T. Gibbs STAILQ_INSERT_TAIL(devstat_head, ds, dev_links); 1412a888f93SKenneth D. Merry else { 14237d40066SPoul-Henning Kamp STAILQ_FOREACH(ds_tmp, devstat_head, dev_links) { 1432a888f93SKenneth D. Merry struct devstat *ds_next; 1442a888f93SKenneth D. Merry 1452a888f93SKenneth D. Merry ds_next = STAILQ_NEXT(ds_tmp, dev_links); 1462a888f93SKenneth D. Merry 1472a888f93SKenneth D. Merry /* 1482a888f93SKenneth D. Merry * If we find a break between higher and lower 1492a888f93SKenneth D. Merry * priority items, and if this item fits in the 1502a888f93SKenneth D. Merry * break, insert it. This also applies if the 1512a888f93SKenneth D. Merry * "lower priority item" is the end of the list. 1522a888f93SKenneth D. Merry */ 1532a888f93SKenneth D. Merry if ((priority <= ds_tmp->priority) 1542a888f93SKenneth D. Merry && ((ds_next == NULL) 1552a888f93SKenneth D. Merry || (priority > ds_next->priority))) { 1562a888f93SKenneth D. Merry STAILQ_INSERT_AFTER(devstat_head, ds_tmp, ds, 1572a888f93SKenneth D. Merry dev_links); 1582a888f93SKenneth D. Merry break; 1592a888f93SKenneth D. Merry } else if (priority > ds_tmp->priority) { 1602a888f93SKenneth D. Merry /* 1612a888f93SKenneth D. Merry * If this is the case, we should be able 1622a888f93SKenneth D. Merry * to insert ourselves at the head of the 1632a888f93SKenneth D. Merry * list. If we can't, something is wrong. 1642a888f93SKenneth D. Merry */ 1652a888f93SKenneth D. Merry if (ds_tmp == STAILQ_FIRST(devstat_head)) { 1662a888f93SKenneth D. Merry STAILQ_INSERT_HEAD(devstat_head, 1672a888f93SKenneth D. Merry ds, dev_links); 1682a888f93SKenneth D. Merry break; 1692a888f93SKenneth D. Merry } else { 1702a888f93SKenneth D. Merry STAILQ_INSERT_TAIL(devstat_head, 1712a888f93SKenneth D. Merry ds, dev_links); 1722a888f93SKenneth D. Merry printf("devstat_add_entry: HELP! " 1732a888f93SKenneth D. Merry "sorting problem detected " 174538aabaaSPoul-Henning Kamp "for name %p unit %d\n", 175538aabaaSPoul-Henning Kamp dev_name, unit_number); 1762a888f93SKenneth D. Merry break; 1772a888f93SKenneth D. Merry } 1782a888f93SKenneth D. Merry } 1792a888f93SKenneth D. Merry } 1802a888f93SKenneth D. Merry } 1817a59208dSJustin T. Gibbs 1827a59208dSJustin T. Gibbs ds->device_number = devstat_current_devnumber++; 1837a59208dSJustin T. Gibbs ds->unit_number = unit_number; 184e80fb434SRobert Drehmel strlcpy(ds->device_name, dev_name, DEVSTAT_NAME_LEN); 1857a59208dSJustin T. Gibbs ds->block_size = block_size; 1867a59208dSJustin T. Gibbs ds->flags = flags; 1877a59208dSJustin T. Gibbs ds->device_type = device_type; 1882a888f93SKenneth D. Merry ds->priority = priority; 1897194d335SPoul-Henning Kamp binuptime(&ds->creation_time); 190224d5539SPoul-Henning Kamp devstat_generation++; 1917a59208dSJustin T. Gibbs } 1927a59208dSJustin T. Gibbs 1937a59208dSJustin T. Gibbs /* 1947a59208dSJustin T. Gibbs * Remove a devstat structure from the list of devices. 1957a59208dSJustin T. Gibbs */ 1967a59208dSJustin T. Gibbs void 1977a59208dSJustin T. Gibbs devstat_remove_entry(struct devstat *ds) 1987a59208dSJustin T. Gibbs { 1997a59208dSJustin T. Gibbs struct devstatlist *devstat_head; 2007a59208dSJustin T. Gibbs 201224d5539SPoul-Henning Kamp mtx_assert(&devstat_mutex, MA_NOTOWNED); 2027a59208dSJustin T. Gibbs if (ds == NULL) 2037a59208dSJustin T. Gibbs return; 2047a59208dSJustin T. Gibbs 205224d5539SPoul-Henning Kamp mtx_lock(&devstat_mutex); 2067a59208dSJustin T. Gibbs 2077a59208dSJustin T. Gibbs devstat_head = &device_statq; 2087a59208dSJustin T. Gibbs 2097a59208dSJustin T. Gibbs /* Remove this entry from the devstat queue */ 210224d5539SPoul-Henning Kamp atomic_add_acq_int(&ds->sequence1, 1); 21140ea77a0SAlexander Motin if (ds->unit_number != -1) { 212224d5539SPoul-Henning Kamp devstat_num_devs--; 213e3975643SJake Burkholder STAILQ_REMOVE(devstat_head, ds, devstat, dev_links); 214c967bee7SPoul-Henning Kamp } 215c7e73d59SPoul-Henning Kamp devstat_free(ds); 216224d5539SPoul-Henning Kamp devstat_generation++; 217224d5539SPoul-Henning Kamp mtx_unlock(&devstat_mutex); 2187a59208dSJustin T. Gibbs } 2197a59208dSJustin T. Gibbs 2207a59208dSJustin T. Gibbs /* 2217a59208dSJustin T. Gibbs * Record a transaction start. 2227194d335SPoul-Henning Kamp * 2237194d335SPoul-Henning Kamp * See comments for devstat_end_transaction(). Ordering is very important 2247194d335SPoul-Henning Kamp * here. 2257a59208dSJustin T. Gibbs */ 2267a59208dSJustin T. Gibbs void 227b6c7d9c3SConrad Meyer devstat_start_transaction(struct devstat *ds, const struct bintime *now) 2287a59208dSJustin T. Gibbs { 229224d5539SPoul-Henning Kamp 2307a59208dSJustin T. Gibbs /* sanity check */ 2317a59208dSJustin T. Gibbs if (ds == NULL) 2327a59208dSJustin T. Gibbs return; 2337a59208dSJustin T. Gibbs 234224d5539SPoul-Henning Kamp atomic_add_acq_int(&ds->sequence1, 1); 2357a59208dSJustin T. Gibbs /* 2367a59208dSJustin T. Gibbs * We only want to set the start time when we are going from idle 2377a59208dSJustin T. Gibbs * to busy. The start time is really the start of the latest busy 2387a59208dSJustin T. Gibbs * period. 2397a59208dSJustin T. Gibbs */ 240024932aaSAlexander Motin if (atomic_fetchadd_int(&ds->start_count, 1) == ds->end_count) { 2417194d335SPoul-Henning Kamp if (now != NULL) 2427194d335SPoul-Henning Kamp ds->busy_from = *now; 2437194d335SPoul-Henning Kamp else 2447194d335SPoul-Henning Kamp binuptime(&ds->busy_from); 2457194d335SPoul-Henning Kamp } 246224d5539SPoul-Henning Kamp atomic_add_rel_int(&ds->sequence0, 1); 2473fac94baSGeorge V. Neville-Neil DTRACE_DEVSTAT_START(); 2487194d335SPoul-Henning Kamp } 2497194d335SPoul-Henning Kamp 2507194d335SPoul-Henning Kamp void 2517194d335SPoul-Henning Kamp devstat_start_transaction_bio(struct devstat *ds, struct bio *bp) 2527194d335SPoul-Henning Kamp { 2537194d335SPoul-Henning Kamp 254224d5539SPoul-Henning Kamp /* sanity check */ 255224d5539SPoul-Henning Kamp if (ds == NULL) 256224d5539SPoul-Henning Kamp return; 257224d5539SPoul-Henning Kamp 2587194d335SPoul-Henning Kamp binuptime(&bp->bio_t0); 259*8b220f89SAlexander Motin devstat_start_transaction_bio_t0(ds, bp); 260*8b220f89SAlexander Motin } 261*8b220f89SAlexander Motin 262*8b220f89SAlexander Motin void 263*8b220f89SAlexander Motin devstat_start_transaction_bio_t0(struct devstat *ds, struct bio *bp) 264*8b220f89SAlexander Motin { 265*8b220f89SAlexander Motin 266*8b220f89SAlexander Motin /* sanity check */ 267*8b220f89SAlexander Motin if (ds == NULL) 268*8b220f89SAlexander Motin return; 269*8b220f89SAlexander Motin 2707194d335SPoul-Henning Kamp devstat_start_transaction(ds, &bp->bio_t0); 2713fac94baSGeorge V. Neville-Neil DTRACE_DEVSTAT_BIO_START(); 2727a59208dSJustin T. Gibbs } 2737a59208dSJustin T. Gibbs 2747a59208dSJustin T. Gibbs /* 2757a59208dSJustin T. Gibbs * Record the ending of a transaction, and incrment the various counters. 2767194d335SPoul-Henning Kamp * 2777194d335SPoul-Henning Kamp * Ordering in this function, and in devstat_start_transaction() is VERY 2787194d335SPoul-Henning Kamp * important. The idea here is to run without locks, so we are very 2797194d335SPoul-Henning Kamp * careful to only modify some fields on the way "down" (i.e. at 2807194d335SPoul-Henning Kamp * transaction start) and some fields on the way "up" (i.e. at transaction 2817194d335SPoul-Henning Kamp * completion). One exception is busy_from, which we only modify in 2827194d335SPoul-Henning Kamp * devstat_start_transaction() when there are no outstanding transactions, 2837194d335SPoul-Henning Kamp * and thus it can't be modified in devstat_end_transaction() 2847194d335SPoul-Henning Kamp * simultaneously. 285224d5539SPoul-Henning Kamp * 286224d5539SPoul-Henning Kamp * The sequence0 and sequence1 fields are provided to enable an application 287224d5539SPoul-Henning Kamp * spying on the structures with mmap(2) to tell when a structure is in a 288224d5539SPoul-Henning Kamp * consistent state or not. 289224d5539SPoul-Henning Kamp * 290224d5539SPoul-Henning Kamp * For this to work 100% reliably, it is important that the two fields 291224d5539SPoul-Henning Kamp * are at opposite ends of the structure and that they are incremented 292224d5539SPoul-Henning Kamp * in the opposite order of how a memcpy(3) in userland would copy them. 293224d5539SPoul-Henning Kamp * We assume that the copying happens front to back, but there is actually 294224d5539SPoul-Henning Kamp * no way short of writing your own memcpy(3) replacement to guarantee 295224d5539SPoul-Henning Kamp * this will be the case. 296224d5539SPoul-Henning Kamp * 297224d5539SPoul-Henning Kamp * In addition to this, being a kind of locks, they must be updated with 298224d5539SPoul-Henning Kamp * atomic instructions using appropriate memory barriers. 2997a59208dSJustin T. Gibbs */ 3007a59208dSJustin T. Gibbs void 30160ae52f7SEd Schouten devstat_end_transaction(struct devstat *ds, uint32_t bytes, 3027194d335SPoul-Henning Kamp devstat_tag_type tag_type, devstat_trans_flags flags, 303b6c7d9c3SConrad Meyer const struct bintime *now, const struct bintime *then) 3047a59208dSJustin T. Gibbs { 3057194d335SPoul-Henning Kamp struct bintime dt, lnow; 3067a59208dSJustin T. Gibbs 3077a59208dSJustin T. Gibbs /* sanity check */ 3087a59208dSJustin T. Gibbs if (ds == NULL) 3097a59208dSJustin T. Gibbs return; 3107a59208dSJustin T. Gibbs 3117194d335SPoul-Henning Kamp if (now == NULL) { 312b6c7d9c3SConrad Meyer binuptime(&lnow); 3137194d335SPoul-Henning Kamp now = &lnow; 3147194d335SPoul-Henning Kamp } 3157a59208dSJustin T. Gibbs 316224d5539SPoul-Henning Kamp atomic_add_acq_int(&ds->sequence1, 1); 3177194d335SPoul-Henning Kamp /* Update byte and operations counts */ 3187194d335SPoul-Henning Kamp ds->bytes[flags] += bytes; 3197194d335SPoul-Henning Kamp ds->operations[flags]++; 3207a59208dSJustin T. Gibbs 3217a59208dSJustin T. Gibbs /* 3227a59208dSJustin T. Gibbs * Keep a count of the various tag types sent. 3237a59208dSJustin T. Gibbs */ 3248db3b947SPoul-Henning Kamp if ((ds->flags & DEVSTAT_NO_ORDERED_TAGS) == 0 && 325f80d57eeSPoul-Henning Kamp tag_type != DEVSTAT_TAG_NONE) 3267a59208dSJustin T. Gibbs ds->tag_types[tag_type]++; 3277a59208dSJustin T. Gibbs 3287194d335SPoul-Henning Kamp if (then != NULL) { 3297194d335SPoul-Henning Kamp /* Update duration of operations */ 3307194d335SPoul-Henning Kamp dt = *now; 3317194d335SPoul-Henning Kamp bintime_sub(&dt, then); 3327194d335SPoul-Henning Kamp bintime_add(&ds->duration[flags], &dt); 3337194d335SPoul-Henning Kamp } 3347a59208dSJustin T. Gibbs 3357194d335SPoul-Henning Kamp /* Accumulate busy time */ 3367194d335SPoul-Henning Kamp dt = *now; 3377194d335SPoul-Henning Kamp bintime_sub(&dt, &ds->busy_from); 3387194d335SPoul-Henning Kamp bintime_add(&ds->busy_time, &dt); 3397194d335SPoul-Henning Kamp ds->busy_from = *now; 3407194d335SPoul-Henning Kamp 3417194d335SPoul-Henning Kamp ds->end_count++; 342224d5539SPoul-Henning Kamp atomic_add_rel_int(&ds->sequence0, 1); 3433fac94baSGeorge V. Neville-Neil DTRACE_DEVSTAT_DONE(); 3447a59208dSJustin T. Gibbs } 3457a59208dSJustin T. Gibbs 346f80d57eeSPoul-Henning Kamp void 347b6c7d9c3SConrad Meyer devstat_end_transaction_bio(struct devstat *ds, const struct bio *bp) 348282ac69eSPoul-Henning Kamp { 349e431d66cSAlexander Motin 350e431d66cSAlexander Motin devstat_end_transaction_bio_bt(ds, bp, NULL); 351e431d66cSAlexander Motin } 352e431d66cSAlexander Motin 353e431d66cSAlexander Motin void 354b6c7d9c3SConrad Meyer devstat_end_transaction_bio_bt(struct devstat *ds, const struct bio *bp, 355b6c7d9c3SConrad Meyer const struct bintime *now) 356e431d66cSAlexander Motin { 357282ac69eSPoul-Henning Kamp devstat_trans_flags flg; 358cb847b81SAlexander Motin devstat_tag_type tag; 359282ac69eSPoul-Henning Kamp 360224d5539SPoul-Henning Kamp /* sanity check */ 361224d5539SPoul-Henning Kamp if (ds == NULL) 362224d5539SPoul-Henning Kamp return; 363224d5539SPoul-Henning Kamp 364cb847b81SAlexander Motin if (bp->bio_flags & BIO_ORDERED) 365cb847b81SAlexander Motin tag = DEVSTAT_TAG_ORDERED; 366cb847b81SAlexander Motin else 367cb847b81SAlexander Motin tag = DEVSTAT_TAG_SIMPLE; 368282ac69eSPoul-Henning Kamp if (bp->bio_cmd == BIO_DELETE) 369282ac69eSPoul-Henning Kamp flg = DEVSTAT_FREE; 3709a6844d5SKenneth D. Merry else if ((bp->bio_cmd == BIO_READ) 3719a6844d5SKenneth D. Merry || ((bp->bio_cmd == BIO_ZONE) 3729a6844d5SKenneth D. Merry && (bp->bio_zone.zone_cmd == DISK_ZONE_REPORT_ZONES))) 373282ac69eSPoul-Henning Kamp flg = DEVSTAT_READ; 374224d5539SPoul-Henning Kamp else if (bp->bio_cmd == BIO_WRITE) 375282ac69eSPoul-Henning Kamp flg = DEVSTAT_WRITE; 376224d5539SPoul-Henning Kamp else 377224d5539SPoul-Henning Kamp flg = DEVSTAT_NO_DATA; 378282ac69eSPoul-Henning Kamp 379282ac69eSPoul-Henning Kamp devstat_end_transaction(ds, bp->bio_bcount - bp->bio_resid, 380cb847b81SAlexander Motin tag, flg, now, &bp->bio_t0); 3813fac94baSGeorge V. Neville-Neil DTRACE_DEVSTAT_BIO_DONE(); 382282ac69eSPoul-Henning Kamp } 383282ac69eSPoul-Henning Kamp 3847a59208dSJustin T. Gibbs /* 3857a59208dSJustin T. Gibbs * This is the sysctl handler for the devstat package. The data pushed out 3867a59208dSJustin T. Gibbs * on the kern.devstat.all sysctl variable consists of the current devstat 3877a59208dSJustin T. Gibbs * generation number, and then an array of devstat structures, one for each 3887a59208dSJustin T. Gibbs * device in the system. 3897a59208dSJustin T. Gibbs * 390224d5539SPoul-Henning Kamp * This is more cryptic that obvious, but basically we neither can nor 391224d5539SPoul-Henning Kamp * want to hold the devstat_mutex for any amount of time, so we grab it 392224d5539SPoul-Henning Kamp * only when we need to and keep an eye on devstat_generation all the time. 3937a59208dSJustin T. Gibbs */ 3947a59208dSJustin T. Gibbs static int 39582d9ae4eSPoul-Henning Kamp sysctl_devstat(SYSCTL_HANDLER_ARGS) 3967a59208dSJustin T. Gibbs { 397224d5539SPoul-Henning Kamp int error; 3986e17a0d7SHartmut Brandt long mygen; 3997a59208dSJustin T. Gibbs struct devstat *nds; 400224d5539SPoul-Henning Kamp 401224d5539SPoul-Henning Kamp mtx_assert(&devstat_mutex, MA_NOTOWNED); 4027a59208dSJustin T. Gibbs 4037a59208dSJustin T. Gibbs /* 404224d5539SPoul-Henning Kamp * XXX devstat_generation should really be "volatile" but that 405224d5539SPoul-Henning Kamp * XXX freaks out the sysctl macro below. The places where we 406224d5539SPoul-Henning Kamp * XXX change it and inspect it are bracketed in the mutex which 407e3043798SPedro F. Giffuni * XXX guarantees us proper write barriers. I don't believe the 408224d5539SPoul-Henning Kamp * XXX compiler is allowed to optimize mygen away across calls 409224d5539SPoul-Henning Kamp * XXX to other functions, so the following is belived to be safe. 4107a59208dSJustin T. Gibbs */ 411224d5539SPoul-Henning Kamp mygen = devstat_generation; 4127a59208dSJustin T. Gibbs 413224d5539SPoul-Henning Kamp error = SYSCTL_OUT(req, &mygen, sizeof(mygen)); 414224d5539SPoul-Henning Kamp 415037c3d0fSPoul-Henning Kamp if (devstat_num_devs == 0) 416037c3d0fSPoul-Henning Kamp return(0); 417037c3d0fSPoul-Henning Kamp 418224d5539SPoul-Henning Kamp if (error != 0) 419224d5539SPoul-Henning Kamp return (error); 420224d5539SPoul-Henning Kamp 421224d5539SPoul-Henning Kamp mtx_lock(&devstat_mutex); 422224d5539SPoul-Henning Kamp nds = STAILQ_FIRST(&device_statq); 423224d5539SPoul-Henning Kamp if (mygen != devstat_generation) 424224d5539SPoul-Henning Kamp error = EBUSY; 425224d5539SPoul-Henning Kamp mtx_unlock(&devstat_mutex); 426224d5539SPoul-Henning Kamp 427224d5539SPoul-Henning Kamp if (error != 0) 428224d5539SPoul-Henning Kamp return (error); 429224d5539SPoul-Henning Kamp 430224d5539SPoul-Henning Kamp for (;nds != NULL;) { 4317a59208dSJustin T. Gibbs error = SYSCTL_OUT(req, nds, sizeof(struct devstat)); 432224d5539SPoul-Henning Kamp if (error != 0) 433224d5539SPoul-Henning Kamp return (error); 434224d5539SPoul-Henning Kamp mtx_lock(&devstat_mutex); 435224d5539SPoul-Henning Kamp if (mygen != devstat_generation) 436224d5539SPoul-Henning Kamp error = EBUSY; 437224d5539SPoul-Henning Kamp else 438224d5539SPoul-Henning Kamp nds = STAILQ_NEXT(nds, dev_links); 439224d5539SPoul-Henning Kamp mtx_unlock(&devstat_mutex); 440224d5539SPoul-Henning Kamp if (error != 0) 441224d5539SPoul-Henning Kamp return (error); 442224d5539SPoul-Henning Kamp } 4437a59208dSJustin T. Gibbs return(error); 4447a59208dSJustin T. Gibbs } 4457a59208dSJustin T. Gibbs 4467a59208dSJustin T. Gibbs /* 4477a59208dSJustin T. Gibbs * Sysctl entries for devstat. The first one is a node that all the rest 4487a59208dSJustin T. Gibbs * hang off of. 4497a59208dSJustin T. Gibbs */ 4507029da5cSPawel Biernacki static SYSCTL_NODE(_kern, OID_AUTO, devstat, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 4516472ac3dSEd Schouten "Device Statistics"); 4527a59208dSJustin T. Gibbs 4537029da5cSPawel Biernacki SYSCTL_PROC(_kern_devstat, OID_AUTO, all, 4547029da5cSPawel Biernacki CTLFLAG_RD | CTLTYPE_OPAQUE | CTLFLAG_MPSAFE, NULL, 0, 4557029da5cSPawel Biernacki sysctl_devstat, "S,devstat", 4567029da5cSPawel Biernacki "All devices in the devstat list"); 4577a59208dSJustin T. Gibbs /* 4587a59208dSJustin T. Gibbs * Export the number of devices in the system so that userland utilities 4597a59208dSJustin T. Gibbs * can determine how much memory to allocate to hold all the devices. 4607a59208dSJustin T. Gibbs */ 4613d177f46SBill Fumerola SYSCTL_INT(_kern_devstat, OID_AUTO, numdevs, CTLFLAG_RD, 4623d177f46SBill Fumerola &devstat_num_devs, 0, "Number of devices in the devstat list"); 4636e17a0d7SHartmut Brandt SYSCTL_LONG(_kern_devstat, OID_AUTO, generation, CTLFLAG_RD, 4649701cd40SJohn Baldwin &devstat_generation, 0, "Devstat list generation"); 4653d177f46SBill Fumerola SYSCTL_INT(_kern_devstat, OID_AUTO, version, CTLFLAG_RD, 4663d177f46SBill Fumerola &devstat_version, 0, "Devstat list version number"); 467c7e73d59SPoul-Henning Kamp 468224d5539SPoul-Henning Kamp /* 469224d5539SPoul-Henning Kamp * Allocator for struct devstat structures. We sub-allocate these from pages 470224d5539SPoul-Henning Kamp * which we get from malloc. These pages are exported for mmap(2)'ing through 471224d5539SPoul-Henning Kamp * a miniature device driver 472224d5539SPoul-Henning Kamp */ 473224d5539SPoul-Henning Kamp 474c7e73d59SPoul-Henning Kamp #define statsperpage (PAGE_SIZE / sizeof(struct devstat)) 475c7e73d59SPoul-Henning Kamp 476c7e73d59SPoul-Henning Kamp static d_mmap_t devstat_mmap; 477c7e73d59SPoul-Henning Kamp 478c7e73d59SPoul-Henning Kamp static struct cdevsw devstat_cdevsw = { 479dc08ffecSPoul-Henning Kamp .d_version = D_VERSION, 480c7e73d59SPoul-Henning Kamp .d_mmap = devstat_mmap, 481c7e73d59SPoul-Henning Kamp .d_name = "devstat", 482c7e73d59SPoul-Henning Kamp }; 483c7e73d59SPoul-Henning Kamp 484c7e73d59SPoul-Henning Kamp struct statspage { 485c7e73d59SPoul-Henning Kamp TAILQ_ENTRY(statspage) list; 486c7e73d59SPoul-Henning Kamp struct devstat *stat; 487c7e73d59SPoul-Henning Kamp u_int nfree; 488c7e73d59SPoul-Henning Kamp }; 489c7e73d59SPoul-Henning Kamp 490c7e73d59SPoul-Henning Kamp static TAILQ_HEAD(, statspage) pagelist = TAILQ_HEAD_INITIALIZER(pagelist); 491c7e73d59SPoul-Henning Kamp static MALLOC_DEFINE(M_DEVSTAT, "devstat", "Device statistics"); 492c7e73d59SPoul-Henning Kamp 493c7e73d59SPoul-Henning Kamp static int 494cfd7baceSRobert Noland devstat_mmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr, 495cfd7baceSRobert Noland int nprot, vm_memattr_t *memattr) 496c7e73d59SPoul-Henning Kamp { 497c7e73d59SPoul-Henning Kamp struct statspage *spp; 498c7e73d59SPoul-Henning Kamp 499c7e73d59SPoul-Henning Kamp if (nprot != VM_PROT_READ) 500c7e73d59SPoul-Henning Kamp return (-1); 501b646225aSMaksim Yevmenkin mtx_lock(&devstat_mutex); 502c7e73d59SPoul-Henning Kamp TAILQ_FOREACH(spp, &pagelist, list) { 503c7e73d59SPoul-Henning Kamp if (offset == 0) { 504c7e73d59SPoul-Henning Kamp *paddr = vtophys(spp->stat); 505b646225aSMaksim Yevmenkin mtx_unlock(&devstat_mutex); 506c7e73d59SPoul-Henning Kamp return (0); 507c7e73d59SPoul-Henning Kamp } 508c7e73d59SPoul-Henning Kamp offset -= PAGE_SIZE; 509c7e73d59SPoul-Henning Kamp } 510b646225aSMaksim Yevmenkin mtx_unlock(&devstat_mutex); 511c7e73d59SPoul-Henning Kamp return (-1); 512c7e73d59SPoul-Henning Kamp } 513c7e73d59SPoul-Henning Kamp 514c7e73d59SPoul-Henning Kamp static struct devstat * 515c7e73d59SPoul-Henning Kamp devstat_alloc(void) 516c7e73d59SPoul-Henning Kamp { 517c7e73d59SPoul-Henning Kamp struct devstat *dsp; 51839df6da8SAttilio Rao struct statspage *spp, *spp2; 519c7e73d59SPoul-Henning Kamp u_int u; 520c7e73d59SPoul-Henning Kamp static int once; 521c7e73d59SPoul-Henning Kamp 522224d5539SPoul-Henning Kamp mtx_assert(&devstat_mutex, MA_NOTOWNED); 523094efe75SKenneth D. Merry if (!once) { 524094efe75SKenneth D. Merry make_dev_credf(MAKEDEV_ETERNAL | MAKEDEV_CHECKNAME, 525b646225aSMaksim Yevmenkin &devstat_cdevsw, 0, NULL, UID_ROOT, GID_WHEEL, 0444, 526094efe75SKenneth D. Merry DEVSTAT_DEVICE_NAME); 527094efe75SKenneth D. Merry once = 1; 528c7e73d59SPoul-Henning Kamp } 52939df6da8SAttilio Rao spp2 = NULL; 530224d5539SPoul-Henning Kamp mtx_lock(&devstat_mutex); 531224d5539SPoul-Henning Kamp for (;;) { 532c7e73d59SPoul-Henning Kamp TAILQ_FOREACH(spp, &pagelist, list) { 533c7e73d59SPoul-Henning Kamp if (spp->nfree > 0) 534c7e73d59SPoul-Henning Kamp break; 535c7e73d59SPoul-Henning Kamp } 536224d5539SPoul-Henning Kamp if (spp != NULL) 537224d5539SPoul-Henning Kamp break; 538224d5539SPoul-Henning Kamp mtx_unlock(&devstat_mutex); 53939df6da8SAttilio Rao spp2 = malloc(sizeof *spp, M_DEVSTAT, M_ZERO | M_WAITOK); 54039df6da8SAttilio Rao spp2->stat = malloc(PAGE_SIZE, M_DEVSTAT, M_ZERO | M_WAITOK); 54139df6da8SAttilio Rao spp2->nfree = statsperpage; 54239df6da8SAttilio Rao 543224d5539SPoul-Henning Kamp /* 54439df6da8SAttilio Rao * If free statspages were added while the lock was released 54539df6da8SAttilio Rao * just reuse them. 54639df6da8SAttilio Rao */ 54739df6da8SAttilio Rao mtx_lock(&devstat_mutex); 54839df6da8SAttilio Rao TAILQ_FOREACH(spp, &pagelist, list) 54939df6da8SAttilio Rao if (spp->nfree > 0) 55039df6da8SAttilio Rao break; 55139df6da8SAttilio Rao if (spp == NULL) { 55239df6da8SAttilio Rao spp = spp2; 55339df6da8SAttilio Rao 55439df6da8SAttilio Rao /* 55539df6da8SAttilio Rao * It would make more sense to add the new page at the 55639df6da8SAttilio Rao * head but the order on the list determine the 55739df6da8SAttilio Rao * sequence of the mapping so we can't do that. 558224d5539SPoul-Henning Kamp */ 559224d5539SPoul-Henning Kamp TAILQ_INSERT_TAIL(&pagelist, spp, list); 56039df6da8SAttilio Rao } else 56139df6da8SAttilio Rao break; 562c7e73d59SPoul-Henning Kamp } 563c7e73d59SPoul-Henning Kamp dsp = spp->stat; 564c7e73d59SPoul-Henning Kamp for (u = 0; u < statsperpage; u++) { 565c7e73d59SPoul-Henning Kamp if (dsp->allocated == 0) 566c7e73d59SPoul-Henning Kamp break; 567c7e73d59SPoul-Henning Kamp dsp++; 568c7e73d59SPoul-Henning Kamp } 569c7e73d59SPoul-Henning Kamp spp->nfree--; 570c7e73d59SPoul-Henning Kamp dsp->allocated = 1; 571224d5539SPoul-Henning Kamp mtx_unlock(&devstat_mutex); 57239df6da8SAttilio Rao if (spp2 != NULL && spp2 != spp) { 57339df6da8SAttilio Rao free(spp2->stat, M_DEVSTAT); 57439df6da8SAttilio Rao free(spp2, M_DEVSTAT); 57539df6da8SAttilio Rao } 576c7e73d59SPoul-Henning Kamp return (dsp); 577c7e73d59SPoul-Henning Kamp } 578c7e73d59SPoul-Henning Kamp 579c7e73d59SPoul-Henning Kamp static void 580c7e73d59SPoul-Henning Kamp devstat_free(struct devstat *dsp) 581c7e73d59SPoul-Henning Kamp { 582c7e73d59SPoul-Henning Kamp struct statspage *spp; 583c7e73d59SPoul-Henning Kamp 584224d5539SPoul-Henning Kamp mtx_assert(&devstat_mutex, MA_OWNED); 585c7e73d59SPoul-Henning Kamp bzero(dsp, sizeof *dsp); 586c7e73d59SPoul-Henning Kamp TAILQ_FOREACH(spp, &pagelist, list) { 587c7e73d59SPoul-Henning Kamp if (dsp >= spp->stat && dsp < (spp->stat + statsperpage)) { 588c7e73d59SPoul-Henning Kamp spp->nfree++; 589c7e73d59SPoul-Henning Kamp return; 590c7e73d59SPoul-Henning Kamp } 591c7e73d59SPoul-Henning Kamp } 592c7e73d59SPoul-Henning Kamp } 5937194d335SPoul-Henning Kamp 5947194d335SPoul-Henning Kamp SYSCTL_INT(_debug_sizeof, OID_AUTO, devstat, CTLFLAG_RD, 595f0188618SHans Petter Selasky SYSCTL_NULL_INT_PTR, sizeof(struct devstat), "sizeof(struct devstat)"); 596