1 /* 2 * Copyright (c) 1997, 1998, 1999 Kenneth D. Merry. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. The name of the author may not be used to endorse or promote products 14 * derived from this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 #include <sys/param.h> 32 #include <sys/kernel.h> 33 #include <sys/systm.h> 34 #include <sys/bio.h> 35 #include <sys/sysctl.h> 36 #include <sys/malloc.h> 37 #include <sys/conf.h> 38 #include <vm/vm.h> 39 #include <vm/pmap.h> 40 41 #include <sys/devicestat.h> 42 43 static int devstat_num_devs; 44 static long devstat_generation; 45 static int devstat_version = DEVSTAT_VERSION; 46 static int devstat_current_devnumber; 47 48 static struct devstatlist device_statq; 49 static struct devstat *devstat_alloc(void); 50 static void devstat_free(struct devstat *); 51 static void devstat_add_entry(struct devstat *ds, const char *dev_name, 52 int unit_number, u_int32_t block_size, 53 devstat_support_flags flags, 54 devstat_type_flags device_type, 55 devstat_priority priority); 56 57 /* 58 * Allocate a devstat and initialize it 59 */ 60 struct devstat * 61 devstat_new_entry(const char *dev_name, 62 int unit_number, u_int32_t block_size, 63 devstat_support_flags flags, 64 devstat_type_flags device_type, 65 devstat_priority priority) 66 { 67 struct devstat *ds; 68 69 ds = devstat_alloc(); 70 devstat_add_entry(ds, dev_name, unit_number, block_size, 71 flags, device_type, priority); 72 return (ds); 73 } 74 75 /* 76 * Take a malloced and zeroed devstat structure given to us, fill it in 77 * and add it to the queue of devices. 78 */ 79 static void 80 devstat_add_entry(struct devstat *ds, const char *dev_name, 81 int unit_number, u_int32_t block_size, 82 devstat_support_flags flags, 83 devstat_type_flags device_type, 84 devstat_priority priority) 85 { 86 struct devstatlist *devstat_head; 87 struct devstat *ds_tmp; 88 89 if (ds == NULL) 90 return; 91 92 if (devstat_num_devs == 0) 93 STAILQ_INIT(&device_statq); 94 95 devstat_generation++; 96 devstat_num_devs++; 97 98 devstat_head = &device_statq; 99 100 /* 101 * Priority sort. Each driver passes in its priority when it adds 102 * its devstat entry. Drivers are sorted first by priority, and 103 * then by probe order. 104 * 105 * For the first device, we just insert it, since the priority 106 * doesn't really matter yet. Subsequent devices are inserted into 107 * the list using the order outlined above. 108 */ 109 if (devstat_num_devs == 1) 110 STAILQ_INSERT_TAIL(devstat_head, ds, dev_links); 111 else { 112 STAILQ_FOREACH(ds_tmp, devstat_head, dev_links) { 113 struct devstat *ds_next; 114 115 ds_next = STAILQ_NEXT(ds_tmp, dev_links); 116 117 /* 118 * If we find a break between higher and lower 119 * priority items, and if this item fits in the 120 * break, insert it. This also applies if the 121 * "lower priority item" is the end of the list. 122 */ 123 if ((priority <= ds_tmp->priority) 124 && ((ds_next == NULL) 125 || (priority > ds_next->priority))) { 126 STAILQ_INSERT_AFTER(devstat_head, ds_tmp, ds, 127 dev_links); 128 break; 129 } else if (priority > ds_tmp->priority) { 130 /* 131 * If this is the case, we should be able 132 * to insert ourselves at the head of the 133 * list. If we can't, something is wrong. 134 */ 135 if (ds_tmp == STAILQ_FIRST(devstat_head)) { 136 STAILQ_INSERT_HEAD(devstat_head, 137 ds, dev_links); 138 break; 139 } else { 140 STAILQ_INSERT_TAIL(devstat_head, 141 ds, dev_links); 142 printf("devstat_add_entry: HELP! " 143 "sorting problem detected " 144 "for %s%d\n", dev_name, 145 unit_number); 146 break; 147 } 148 } 149 } 150 } 151 152 ds->device_number = devstat_current_devnumber++; 153 ds->unit_number = unit_number; 154 strlcpy(ds->device_name, dev_name, DEVSTAT_NAME_LEN); 155 ds->block_size = block_size; 156 ds->flags = flags; 157 ds->device_type = device_type; 158 ds->priority = priority; 159 getmicrotime(&ds->dev_creation_time); 160 } 161 162 /* 163 * Remove a devstat structure from the list of devices. 164 */ 165 void 166 devstat_remove_entry(struct devstat *ds) 167 { 168 struct devstatlist *devstat_head; 169 170 if (ds == NULL) 171 return; 172 173 devstat_generation++; 174 devstat_num_devs--; 175 176 devstat_head = &device_statq; 177 178 /* Remove this entry from the devstat queue */ 179 STAILQ_REMOVE(devstat_head, ds, devstat, dev_links); 180 if (ds->allocated) 181 devstat_free(ds); 182 } 183 184 /* 185 * Record a transaction start. 186 */ 187 void 188 devstat_start_transaction(struct devstat *ds) 189 { 190 /* sanity check */ 191 if (ds == NULL) 192 return; 193 194 /* 195 * We only want to set the start time when we are going from idle 196 * to busy. The start time is really the start of the latest busy 197 * period. 198 */ 199 if (ds->busy_count == 0) 200 getmicrouptime(&ds->start_time); 201 ds->busy_count++; 202 } 203 204 /* 205 * Record the ending of a transaction, and incrment the various counters. 206 */ 207 void 208 devstat_end_transaction(struct devstat *ds, u_int32_t bytes, 209 devstat_tag_type tag_type, devstat_trans_flags flags) 210 { 211 struct timeval busy_time; 212 213 /* sanity check */ 214 if (ds == NULL) 215 return; 216 217 getmicrouptime(&ds->last_comp_time); 218 ds->busy_count--; 219 220 /* 221 * There might be some transactions (DEVSTAT_NO_DATA) that don't 222 * transfer any data. 223 */ 224 if (flags == DEVSTAT_READ) { 225 ds->bytes_read += bytes; 226 ds->num_reads++; 227 } else if (flags == DEVSTAT_WRITE) { 228 ds->bytes_written += bytes; 229 ds->num_writes++; 230 } else if (flags == DEVSTAT_FREE) { 231 ds->bytes_freed += bytes; 232 ds->num_frees++; 233 } else 234 ds->num_other++; 235 236 /* 237 * Keep a count of the various tag types sent. 238 */ 239 if ((ds->flags & DEVSTAT_NO_ORDERED_TAGS) == 0 && 240 tag_type != DEVSTAT_TAG_NONE) 241 ds->tag_types[tag_type]++; 242 243 /* 244 * We only update the busy time when we go idle. Otherwise, this 245 * calculation would require many more clock cycles. 246 */ 247 if (ds->busy_count == 0) { 248 /* Calculate how long we were busy */ 249 busy_time = ds->last_comp_time; 250 timevalsub(&busy_time, &ds->start_time); 251 252 /* Add our busy time to the total busy time. */ 253 timevaladd(&ds->busy_time, &busy_time); 254 } else if (ds->busy_count < 0) 255 printf("devstat_end_transaction: HELP!! busy_count " 256 "for %s%d is < 0 (%d)!\n", ds->device_name, 257 ds->unit_number, ds->busy_count); 258 } 259 260 void 261 devstat_end_transaction_bio(struct devstat *ds, struct bio *bp) 262 { 263 devstat_trans_flags flg; 264 265 if (bp->bio_cmd == BIO_DELETE) 266 flg = DEVSTAT_FREE; 267 else if (bp->bio_cmd == BIO_READ) 268 flg = DEVSTAT_READ; 269 else 270 flg = DEVSTAT_WRITE; 271 272 devstat_end_transaction(ds, bp->bio_bcount - bp->bio_resid, 273 DEVSTAT_TAG_SIMPLE, flg); 274 } 275 276 /* 277 * This is the sysctl handler for the devstat package. The data pushed out 278 * on the kern.devstat.all sysctl variable consists of the current devstat 279 * generation number, and then an array of devstat structures, one for each 280 * device in the system. 281 * 282 * I'm really not too fond of this method of doing things, but there really 283 * aren't that many alternatives. We must have some method of making sure 284 * that the generation number the user gets corresponds with the data the 285 * user gets. If the user makes a separate sysctl call to get the 286 * generation, and then a sysctl call to get the device statistics, the 287 * device list could have changed in that brief period of time. By 288 * supplying the generation number along with the statistics output, we can 289 * guarantee that the generation number and the statistics match up. 290 */ 291 static int 292 sysctl_devstat(SYSCTL_HANDLER_ARGS) 293 { 294 int error, i; 295 struct devstat *nds; 296 struct devstatlist *devstat_head; 297 298 if (devstat_num_devs == 0) 299 return(EINVAL); 300 301 error = 0; 302 devstat_head = &device_statq; 303 304 /* 305 * First push out the generation number. 306 */ 307 error = SYSCTL_OUT(req, &devstat_generation, sizeof(long)); 308 309 /* 310 * Now push out all the devices. 311 */ 312 for (i = 0, nds = STAILQ_FIRST(devstat_head); 313 (nds != NULL) && (i < devstat_num_devs) && (error == 0); 314 nds = STAILQ_NEXT(nds, dev_links), i++) 315 error = SYSCTL_OUT(req, nds, sizeof(struct devstat)); 316 317 return(error); 318 } 319 320 /* 321 * Sysctl entries for devstat. The first one is a node that all the rest 322 * hang off of. 323 */ 324 SYSCTL_NODE(_kern, OID_AUTO, devstat, CTLFLAG_RD, 0, "Device Statistics"); 325 326 SYSCTL_PROC(_kern_devstat, OID_AUTO, all, CTLFLAG_RD|CTLTYPE_OPAQUE, 327 0, 0, sysctl_devstat, "S,devstat", "All devices in the devstat list"); 328 /* 329 * Export the number of devices in the system so that userland utilities 330 * can determine how much memory to allocate to hold all the devices. 331 */ 332 SYSCTL_INT(_kern_devstat, OID_AUTO, numdevs, CTLFLAG_RD, 333 &devstat_num_devs, 0, "Number of devices in the devstat list"); 334 SYSCTL_LONG(_kern_devstat, OID_AUTO, generation, CTLFLAG_RD, 335 &devstat_generation, 0, "Devstat list generation"); 336 SYSCTL_INT(_kern_devstat, OID_AUTO, version, CTLFLAG_RD, 337 &devstat_version, 0, "Devstat list version number"); 338 339 #define statsperpage (PAGE_SIZE / sizeof(struct devstat)) 340 341 static d_mmap_t devstat_mmap; 342 343 static struct cdevsw devstat_cdevsw = { 344 .d_open = nullopen, 345 .d_close = nullclose, 346 .d_mmap = devstat_mmap, 347 .d_name = "devstat", 348 }; 349 350 struct statspage { 351 TAILQ_ENTRY(statspage) list; 352 struct devstat *stat; 353 u_int nfree; 354 }; 355 356 static TAILQ_HEAD(, statspage) pagelist = TAILQ_HEAD_INITIALIZER(pagelist); 357 static MALLOC_DEFINE(M_DEVSTAT, "devstat", "Device statistics"); 358 359 static int 360 devstat_mmap(dev_t dev, vm_offset_t offset, vm_offset_t *paddr, int nprot) 361 { 362 struct statspage *spp; 363 364 if (nprot != VM_PROT_READ) 365 return (-1); 366 TAILQ_FOREACH(spp, &pagelist, list) { 367 if (offset == 0) { 368 *paddr = vtophys(spp->stat); 369 return (0); 370 } 371 offset -= PAGE_SIZE; 372 } 373 return (-1); 374 } 375 376 static struct devstat * 377 devstat_alloc(void) 378 { 379 struct devstat *dsp; 380 struct statspage *spp; 381 u_int u; 382 static int once; 383 384 if (!once) { 385 make_dev(&devstat_cdevsw, 0, 386 UID_ROOT, GID_WHEEL, 0400, "devstat"); 387 once++; 388 } 389 TAILQ_FOREACH(spp, &pagelist, list) { 390 if (spp->nfree > 0) 391 break; 392 } 393 if (spp == NULL) { 394 spp = malloc(sizeof *spp, M_DEVSTAT, M_ZERO | M_WAITOK); 395 TAILQ_INSERT_TAIL(&pagelist, spp, list); 396 spp->stat = malloc(PAGE_SIZE, M_DEVSTAT, M_ZERO | M_WAITOK); 397 spp->nfree = statsperpage; 398 } 399 dsp = spp->stat; 400 for (u = 0; u < statsperpage; u++) { 401 if (dsp->allocated == 0) 402 break; 403 dsp++; 404 } 405 spp->nfree--; 406 dsp->allocated = 1; 407 return (dsp); 408 } 409 410 static void 411 devstat_free(struct devstat *dsp) 412 { 413 struct statspage *spp; 414 415 bzero(dsp, sizeof *dsp); 416 TAILQ_FOREACH(spp, &pagelist, list) { 417 if (dsp >= spp->stat && dsp < (spp->stat + statsperpage)) { 418 spp->nfree++; 419 return; 420 } 421 } 422 } 423