xref: /freebsd/stand/i386/libi386/biosdisk.c (revision fe2494903422ba3b924eba82cb63a6a9188fad7a)
1 /*-
2  * Copyright (c) 1998 Michael Smith <msmith@freebsd.org>
3  * Copyright (c) 2012 Andrey V. Elsukov <ae@FreeBSD.org>
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 /*
32  * BIOS disk device handling.
33  *
34  * Ideas and algorithms from:
35  *
36  * - NetBSD libi386/biosdisk.c
37  * - FreeBSD biosboot/disk.c
38  *
39  */
40 
41 #include <sys/disk.h>
42 #include <sys/limits.h>
43 #include <stand.h>
44 #include <machine/bootinfo.h>
45 #include <stdarg.h>
46 
47 #include <bootstrap.h>
48 #include <btxv86.h>
49 #include <edd.h>
50 #include "disk.h"
51 #include "libi386.h"
52 
53 #define	BIOS_NUMDRIVES		0x475
54 #define	BIOSDISK_SECSIZE	512
55 #define	BUFSIZE			(1 * BIOSDISK_SECSIZE)
56 
57 #define	DT_ATAPI	0x10	/* disk type for ATAPI floppies */
58 #define	WDMAJOR		0	/* major numbers for devices we frontend for */
59 #define	WFDMAJOR	1
60 #define	FDMAJOR		2
61 #define	DAMAJOR		4
62 
63 #ifdef DISK_DEBUG
64 #define	DEBUG(fmt, args...)	printf("%s: " fmt "\n", __func__, ## args)
65 #else
66 #define	DEBUG(fmt, args...)
67 #endif
68 
69 /*
70  * List of BIOS devices, translation from disk unit number to
71  * BIOS unit number.
72  */
73 static struct bdinfo
74 {
75 	int		bd_unit;	/* BIOS unit number */
76 	int		bd_cyl;		/* BIOS geometry */
77 	int		bd_hds;
78 	int		bd_sec;
79 	int		bd_flags;
80 #define	BD_MODEINT13	0x0000
81 #define	BD_MODEEDD1	0x0001
82 #define	BD_MODEEDD3	0x0002
83 #define	BD_MODEEDD	(BD_MODEEDD1 | BD_MODEEDD3)
84 #define	BD_MODEMASK	0x0003
85 #define	BD_FLOPPY	0x0004
86 #define	BD_NO_MEDIA	0x0008
87 	int		bd_type;	/* BIOS 'drive type' (floppy only) */
88 	uint16_t	bd_sectorsize;	/* Sector size */
89 	uint64_t	bd_sectors;	/* Disk size */
90 	int		bd_open;	/* reference counter */
91 	void		*bd_bcache;	/* buffer cache data */
92 } bdinfo [MAXBDDEV];
93 static int nbdinfo = 0;
94 
95 #define	BD(dev)		(bdinfo[(dev)->dd.d_unit])
96 #define	BD_RD		0
97 #define	BD_WR		1
98 
99 static void bd_io_workaround(struct disk_devdesc *dev);
100 
101 static int bd_io(struct disk_devdesc *, daddr_t, int, caddr_t, int);
102 static int bd_int13probe(struct bdinfo *bd);
103 
104 static int bd_init(void);
105 static int bd_strategy(void *devdata, int flag, daddr_t dblk, size_t size,
106     char *buf, size_t *rsize);
107 static int bd_realstrategy(void *devdata, int flag, daddr_t dblk, size_t size,
108     char *buf, size_t *rsize);
109 static int bd_open(struct open_file *f, ...);
110 static int bd_close(struct open_file *f);
111 static int bd_ioctl(struct open_file *f, u_long cmd, void *data);
112 static int bd_print(int verbose);
113 
114 struct devsw biosdisk = {
115 	"disk",
116 	DEVT_DISK,
117 	bd_init,
118 	bd_strategy,
119 	bd_open,
120 	bd_close,
121 	bd_ioctl,
122 	bd_print,
123 	NULL
124 };
125 
126 /*
127  * Translate between BIOS device numbers and our private unit numbers.
128  */
129 int
130 bd_bios2unit(int biosdev)
131 {
132 	int i;
133 
134 	DEBUG("looking for bios device 0x%x", biosdev);
135 	for (i = 0; i < nbdinfo; i++) {
136 		DEBUG("bd unit %d is BIOS device 0x%x", i, bdinfo[i].bd_unit);
137 		if (bdinfo[i].bd_unit == biosdev)
138 			return (i);
139 	}
140 	return (-1);
141 }
142 
143 int
144 bd_unit2bios(int unit)
145 {
146 
147 	if ((unit >= 0) && (unit < nbdinfo))
148 		return (bdinfo[unit].bd_unit);
149 	return (-1);
150 }
151 
152 /*
153  * Quiz the BIOS for disk devices, save a little info about them.
154  */
155 static int
156 bd_init(void)
157 {
158 	int base, unit, nfd = 0;
159 
160 	/* sequence 0, 0x80 */
161 	for (base = 0; base <= 0x80; base += 0x80) {
162 		for (unit = base; (nbdinfo < MAXBDDEV); unit++) {
163 #ifndef VIRTUALBOX
164 			/*
165 			 * Check the BIOS equipment list for number
166 			 * of fixed disks.
167 			 */
168 			if (base == 0x80 &&
169 			    (nfd >= *(unsigned char *)PTOV(BIOS_NUMDRIVES)))
170 				break;
171 #endif
172 			bdinfo[nbdinfo].bd_open = 0;
173 			bdinfo[nbdinfo].bd_bcache = NULL;
174 			bdinfo[nbdinfo].bd_unit = unit;
175 			bdinfo[nbdinfo].bd_flags = unit < 0x80 ? BD_FLOPPY: 0;
176 			if (!bd_int13probe(&bdinfo[nbdinfo]))
177 				break;
178 
179 			/* XXX we need "disk aliases" to make this simpler */
180 			printf("BIOS drive %c: is disk%d\n", (unit < 0x80) ?
181 			    ('A' + unit): ('C' + unit - 0x80), nbdinfo);
182 			nbdinfo++;
183 			if (base == 0x80)
184 				nfd++;
185 		}
186 	}
187 	bcache_add_dev(nbdinfo);
188 	return (0);
189 }
190 
191 /*
192  * Return EDD version or 0 if EDD is not supported on this drive.
193  */
194 static int
195 bd_check_extensions(int unit)
196 {
197 	/* Determine if we can use EDD with this device. */
198 	v86.ctl = V86_FLAGS;
199 	v86.addr = 0x13;
200 	v86.eax = 0x4100;
201 	v86.edx = unit;
202 	v86.ebx = 0x55aa;
203 	v86int();
204 
205 	if (V86_CY(v86.efl) ||			/* carry set */
206 	    (v86.ebx & 0xffff) != 0xaa55)	/* signature */
207 		return (0);
208 
209 	/* extended disk access functions (AH=42h-44h,47h,48h) supported */
210 	if ((v86.ecx & EDD_INTERFACE_FIXED_DISK) == 0)
211 		return (0);
212 
213 	return ((v86.eax >> 8) & 0xff);
214 }
215 
216 static void
217 bd_reset_disk(int unit)
218 {
219 	/* reset disk */
220 	v86.ctl = V86_FLAGS;
221 	v86.addr = 0x13;
222 	v86.eax = 0;
223 	v86.edx = unit;
224 	v86int();
225 }
226 
227 /*
228  * Read CHS info. Return 0 on success, error otherwise.
229  */
230 static int
231 bd_get_diskinfo_std(struct bdinfo *bd)
232 {
233 	bzero(&v86, sizeof(v86));
234 	v86.ctl = V86_FLAGS;
235 	v86.addr = 0x13;
236 	v86.eax = 0x800;
237 	v86.edx = bd->bd_unit;
238 	v86int();
239 
240 	if (V86_CY(v86.efl) && ((v86.eax & 0xff00) != 0))
241 		return ((v86.eax & 0xff00) >> 8);
242 
243 	/* return custom error on absurd sector number */
244 	if ((v86.ecx & 0x3f) == 0)
245 		return (0x60);
246 
247 	bd->bd_cyl = ((v86.ecx & 0xc0) << 2) + ((v86.ecx & 0xff00) >> 8) + 1;
248 	/* Convert max head # -> # of heads */
249 	bd->bd_hds = ((v86.edx & 0xff00) >> 8) + 1;
250 	bd->bd_sec = v86.ecx & 0x3f;
251 	bd->bd_type = v86.ebx;
252 	bd->bd_sectors = (uint64_t)bd->bd_cyl * bd->bd_hds * bd->bd_sec;
253 
254 	return (0);
255 }
256 
257 /*
258  * Read EDD info. Return 0 on success, error otherwise.
259  */
260 static int
261 bd_get_diskinfo_ext(struct bdinfo *bd)
262 {
263 	struct edd_params params;
264 	uint64_t total;
265 
266 	/* Get disk params */
267 	bzero(&params, sizeof(params));
268 	params.len = sizeof(params);
269 	v86.ctl = V86_FLAGS;
270 	v86.addr = 0x13;
271 	v86.eax = 0x4800;
272 	v86.edx = bd->bd_unit;
273 	v86.ds = VTOPSEG(&params);
274 	v86.esi = VTOPOFF(&params);
275 	v86int();
276 
277 	if (V86_CY(v86.efl) && ((v86.eax & 0xff00) != 0))
278 		return ((v86.eax & 0xff00) >> 8);
279 
280 	/*
281 	 * Sector size must be a multiple of 512 bytes.
282 	 * An alternate test would be to check power of 2,
283 	 * powerof2(params.sector_size).
284 	 * 4K is largest read buffer we can use at this time.
285 	 */
286 	if (params.sector_size >= 512 &&
287 	    params.sector_size <= 4096 &&
288 	    (params.sector_size % BIOSDISK_SECSIZE) == 0)
289 		bd->bd_sectorsize = params.sector_size;
290 
291 	bd->bd_cyl = params.cylinders;
292 	bd->bd_hds = params.heads;
293 	bd->bd_sec = params.sectors_per_track;
294 
295 	if (params.sectors != 0) {
296 		total = params.sectors;
297 	} else {
298 		total = (uint64_t)params.cylinders *
299 		    params.heads * params.sectors_per_track;
300 	}
301 	bd->bd_sectors = total;
302 
303 	return (0);
304 }
305 
306 /*
307  * Try to detect a device supported by the legacy int13 BIOS
308  */
309 static int
310 bd_int13probe(struct bdinfo *bd)
311 {
312 	int edd;
313 	int ret;
314 
315 	bd->bd_flags &= ~BD_NO_MEDIA;
316 
317 	edd = bd_check_extensions(bd->bd_unit);
318 	if (edd == 0)
319 		bd->bd_flags |= BD_MODEINT13;
320 	else if (edd < 0x30)
321 		bd->bd_flags |= BD_MODEEDD1;
322 	else
323 		bd->bd_flags |= BD_MODEEDD3;
324 
325 	/* Default sector size */
326 	bd->bd_sectorsize = BIOSDISK_SECSIZE;
327 
328 	/*
329 	 * Test if the floppy device is present, so we can avoid receiving
330 	 * bogus information from bd_get_diskinfo_std().
331 	 */
332 	if (bd->bd_unit < 0x80) {
333 		/* reset disk */
334 		bd_reset_disk(bd->bd_unit);
335 
336 		/* Get disk type */
337 		v86.ctl = V86_FLAGS;
338 		v86.addr = 0x13;
339 		v86.eax = 0x1500;
340 		v86.edx = bd->bd_unit;
341 		v86int();
342 		if (V86_CY(v86.efl) || (v86.eax & 0x300) == 0)
343 			return (0);
344 	}
345 
346 	ret = 1;
347 	if (edd != 0)
348 		ret = bd_get_diskinfo_ext(bd);
349 	if (ret != 0 || bd->bd_sectors == 0)
350 		ret = bd_get_diskinfo_std(bd);
351 
352 	if (ret != 0 && bd->bd_unit < 0x80) {
353 		/* Set defaults for 1.44 floppy */
354 		bd->bd_cyl = 80;
355 		bd->bd_hds = 2;
356 		bd->bd_sec = 18;
357 		bd->bd_type = 4;
358 		bd->bd_sectors = 2880;
359 		/* Since we are there, there most likely is no media */
360 		bd->bd_flags |= BD_NO_MEDIA;
361 		ret = 0;
362 	}
363 
364 	if (ret != 0) {
365 		if (bd->bd_sectors != 0 && edd != 0) {
366 			bd->bd_sec = 63;
367 			bd->bd_hds = 255;
368 			bd->bd_cyl =
369 			    (bd->bd_sectors + bd->bd_sec * bd->bd_hds - 1) /
370 			    bd->bd_sec * bd->bd_hds;
371 		} else {
372 			printf("Can not get information about %s unit %#x\n",
373 			    biosdisk.dv_name, bd->bd_unit);
374 			return (0);
375 		}
376 	}
377 
378 	if (bd->bd_sec == 0)
379 		bd->bd_sec = 63;
380 	if (bd->bd_hds == 0)
381 		bd->bd_hds = 255;
382 
383 	if (bd->bd_sectors == 0)
384 		bd->bd_sectors = (uint64_t)bd->bd_cyl * bd->bd_hds * bd->bd_sec;
385 
386 	DEBUG("unit 0x%x geometry %d/%d/%d", bd->bd_unit, bd->bd_cyl,
387 	    bd->bd_hds, bd->bd_sec);
388 
389 	return (1);
390 }
391 
392 /*
393  * Print information about disks
394  */
395 static int
396 bd_print(int verbose)
397 {
398 	static char line[80];
399 	struct disk_devdesc dev;
400 	int i, ret = 0;
401 
402 	if (nbdinfo == 0)
403 		return (0);
404 
405 	printf("%s devices:", biosdisk.dv_name);
406 	if ((ret = pager_output("\n")) != 0)
407 		return (ret);
408 
409 	for (i = 0; i < nbdinfo; i++) {
410 		snprintf(line, sizeof(line),
411 		    "    disk%d:   BIOS drive %c (%s%ju X %u):\n", i,
412 		    (bdinfo[i].bd_unit < 0x80) ? ('A' + bdinfo[i].bd_unit):
413 		    ('C' + bdinfo[i].bd_unit - 0x80),
414 		    (bdinfo[i].bd_flags & BD_NO_MEDIA) == BD_NO_MEDIA ?
415 		    "no media, " : "",
416 		    (uintmax_t)bdinfo[i].bd_sectors,
417 		    bdinfo[i].bd_sectorsize);
418 		if ((ret = pager_output(line)) != 0)
419 			break;
420 
421 		if ((bdinfo[i].bd_flags & BD_NO_MEDIA) == BD_NO_MEDIA)
422 			continue;
423 
424 		dev.dd.d_dev = &biosdisk;
425 		dev.dd.d_unit = i;
426 		dev.d_slice = -1;
427 		dev.d_partition = -1;
428 		if (disk_open(&dev,
429 		    bdinfo[i].bd_sectorsize * bdinfo[i].bd_sectors,
430 		    bdinfo[i].bd_sectorsize) == 0) {
431 			snprintf(line, sizeof(line), "    disk%d", i);
432 			ret = disk_print(&dev, line, verbose);
433 			disk_close(&dev);
434 			if (ret != 0)
435 				break;
436 		}
437 	}
438 	return (ret);
439 }
440 
441 /*
442  * Attempt to open the disk described by (dev) for use by (f).
443  *
444  * Note that the philosophy here is "give them exactly what
445  * they ask for".  This is necessary because being too "smart"
446  * about what the user might want leads to complications.
447  * (eg. given no slice or partition value, with a disk that is
448  *  sliced - are they after the first BSD slice, or the DOS
449  *  slice before it?)
450  */
451 static int
452 bd_open(struct open_file *f, ...)
453 {
454 	struct disk_devdesc *dev;
455 	struct disk_devdesc disk;
456 	va_list ap;
457 	uint64_t size;
458 	int rc;
459 
460 	va_start(ap, f);
461 	dev = va_arg(ap, struct disk_devdesc *);
462 	va_end(ap);
463 
464 	if (dev->dd.d_unit < 0 || dev->dd.d_unit >= nbdinfo)
465 		return (EIO);
466 
467 	if ((BD(dev).bd_flags & BD_NO_MEDIA) == BD_NO_MEDIA) {
468 		if (!bd_int13probe(&BD(dev)))
469 			return (EIO);
470 		if ((BD(dev).bd_flags & BD_NO_MEDIA) == BD_NO_MEDIA)
471 			return (EIO);
472 	}
473 	BD(dev).bd_open++;
474 	if (BD(dev).bd_bcache == NULL)
475 	    BD(dev).bd_bcache = bcache_allocate();
476 
477 	/*
478 	 * Read disk size from partition.
479 	 * This is needed to work around buggy BIOS systems returning
480 	 * wrong (truncated) disk media size.
481 	 * During bd_probe() we tested if the mulitplication of bd_sectors
482 	 * would overflow so it should be safe to perform here.
483 	 */
484 	disk.dd.d_dev = dev->dd.d_dev;
485 	disk.dd.d_unit = dev->dd.d_unit;
486 	disk.d_slice = -1;
487 	disk.d_partition = -1;
488 	disk.d_offset = 0;
489 
490 	if (disk_open(&disk, BD(dev).bd_sectors * BD(dev).bd_sectorsize,
491 	    BD(dev).bd_sectorsize) == 0) {
492 
493 		if (disk_ioctl(&disk, DIOCGMEDIASIZE, &size) == 0) {
494 			size /= BD(dev).bd_sectorsize;
495 			if (size > BD(dev).bd_sectors)
496 				BD(dev).bd_sectors = size;
497 		}
498 		disk_close(&disk);
499 	}
500 
501 	rc = disk_open(dev, BD(dev).bd_sectors * BD(dev).bd_sectorsize,
502 	    BD(dev).bd_sectorsize);
503 	if (rc != 0) {
504 		BD(dev).bd_open--;
505 		if (BD(dev).bd_open == 0) {
506 			bcache_free(BD(dev).bd_bcache);
507 			BD(dev).bd_bcache = NULL;
508 		}
509 	}
510 	return (rc);
511 }
512 
513 static int
514 bd_close(struct open_file *f)
515 {
516 	struct disk_devdesc *dev;
517 
518 	dev = (struct disk_devdesc *)f->f_devdata;
519 	BD(dev).bd_open--;
520 	if (BD(dev).bd_open == 0) {
521 	    bcache_free(BD(dev).bd_bcache);
522 	    BD(dev).bd_bcache = NULL;
523 	}
524 	return (disk_close(dev));
525 }
526 
527 static int
528 bd_ioctl(struct open_file *f, u_long cmd, void *data)
529 {
530 	struct disk_devdesc *dev;
531 	int rc;
532 
533 	dev = (struct disk_devdesc *)f->f_devdata;
534 
535 	rc = disk_ioctl(dev, cmd, data);
536 	if (rc != ENOTTY)
537 		return (rc);
538 
539 	switch (cmd) {
540 	case DIOCGSECTORSIZE:
541 		*(uint32_t *)data = BD(dev).bd_sectorsize;
542 		break;
543 	case DIOCGMEDIASIZE:
544 		*(uint64_t *)data = BD(dev).bd_sectors * BD(dev).bd_sectorsize;
545 		break;
546 	default:
547 		return (ENOTTY);
548 	}
549 	return (0);
550 }
551 
552 static int
553 bd_strategy(void *devdata, int rw, daddr_t dblk, size_t size,
554     char *buf, size_t *rsize)
555 {
556 	struct bcache_devdata bcd;
557 	struct disk_devdesc *dev;
558 
559 	dev = (struct disk_devdesc *)devdata;
560 	bcd.dv_strategy = bd_realstrategy;
561 	bcd.dv_devdata = devdata;
562 	bcd.dv_cache = BD(dev).bd_bcache;
563 	return (bcache_strategy(&bcd, rw, dblk + dev->d_offset, size,
564 	    buf, rsize));
565 }
566 
567 static int
568 bd_realstrategy(void *devdata, int rw, daddr_t dblk, size_t size,
569     char *buf, size_t *rsize)
570 {
571 	struct disk_devdesc *dev = (struct disk_devdesc *)devdata;
572 	uint64_t disk_blocks, offset;
573 	size_t blks, blkoff, bsize, rest;
574 	caddr_t bbuf;
575 	int rc;
576 
577 	if ((BD(dev).bd_flags & BD_NO_MEDIA) == BD_NO_MEDIA)
578 		return (EIO);
579 
580 	/*
581 	 * First make sure the IO size is a multiple of 512 bytes. While we do
582 	 * process partial reads below, the strategy mechanism is built
583 	 * assuming IO is a multiple of 512B blocks. If the request is not
584 	 * a multiple of 512B blocks, it has to be some sort of bug.
585 	 */
586 	if (size == 0 || (size % BIOSDISK_SECSIZE) != 0) {
587 		printf("bd_strategy: %d bytes I/O not multiple of %d\n",
588 		    size, BIOSDISK_SECSIZE);
589 		return (EIO);
590 	}
591 
592 	DEBUG("open_disk %p", dev);
593 
594 	offset = dblk * BIOSDISK_SECSIZE;
595 	dblk = offset / BD(dev).bd_sectorsize;
596 	blkoff = offset % BD(dev).bd_sectorsize;
597 
598 	/*
599 	 * Check the value of the size argument. We do have quite small
600 	 * heap (64MB), but we do not know good upper limit, so we check against
601 	 * INT_MAX here. This will also protect us against possible overflows
602 	 * while translating block count to bytes.
603 	 */
604 	if (size > INT_MAX) {
605 		DEBUG("too large I/O: %zu bytes", size);
606 		return (EIO);
607 	}
608 
609 	blks = size / BD(dev).bd_sectorsize;
610 	if (blks == 0 || (size % BD(dev).bd_sectorsize) != 0)
611 		blks++;
612 
613 	if (dblk > dblk + blks)
614 		return (EIO);
615 
616 	if (rsize)
617 		*rsize = 0;
618 
619 	/*
620 	 * Get disk blocks, this value is either for whole disk or for
621 	 * partition.
622 	 */
623 	if (disk_ioctl(dev, DIOCGMEDIASIZE, &disk_blocks) == 0) {
624 		/* DIOCGMEDIASIZE does return bytes. */
625 		disk_blocks /= BD(dev).bd_sectorsize;
626 	} else {
627 		/* We should not get here. Just try to survive. */
628 		disk_blocks = BD(dev).bd_sectors - dev->d_offset;
629 	}
630 
631 	/* Validate source block address. */
632 	if (dblk < dev->d_offset || dblk >= dev->d_offset + disk_blocks)
633 		return (EIO);
634 
635 	/*
636 	 * Truncate if we are crossing disk or partition end.
637 	 */
638 	if (dblk + blks >= dev->d_offset + disk_blocks) {
639 		blks = dev->d_offset + disk_blocks - dblk;
640 		size = blks * BD(dev).bd_sectorsize;
641 		DEBUG("short I/O %d", blks);
642 	}
643 
644 	if (V86_IO_BUFFER_SIZE / BD(dev).bd_sectorsize == 0)
645 		panic("BUG: Real mode buffer is too small\n");
646 
647 	bbuf = PTOV(V86_IO_BUFFER);
648 	rest = size;
649 
650 	while (blks > 0) {
651 		int x = min(blks, V86_IO_BUFFER_SIZE / BD(dev).bd_sectorsize);
652 
653 		switch (rw & F_MASK) {
654 		case F_READ:
655 			DEBUG("read %d from %lld to %p", x, dblk, buf);
656 			bsize = BD(dev).bd_sectorsize * x - blkoff;
657 			if (rest < bsize)
658 				bsize = rest;
659 
660 			if ((rc = bd_io(dev, dblk, x, bbuf, BD_RD)) != 0)
661 				return (EIO);
662 
663 			bcopy(bbuf + blkoff, buf, bsize);
664 			break;
665 		case F_WRITE :
666 			DEBUG("write %d from %lld to %p", x, dblk, buf);
667 			if (blkoff != 0) {
668 				/*
669 				 * We got offset to sector, read 1 sector to
670 				 * bbuf.
671 				 */
672 				x = 1;
673 				bsize = BD(dev).bd_sectorsize - blkoff;
674 				bsize = min(bsize, rest);
675 				rc = bd_io(dev, dblk, x, bbuf, BD_RD);
676 			} else if (rest < BD(dev).bd_sectorsize) {
677 				/*
678 				 * The remaining block is not full
679 				 * sector. Read 1 sector to bbuf.
680 				 */
681 				x = 1;
682 				bsize = rest;
683 				rc = bd_io(dev, dblk, x, bbuf, BD_RD);
684 			} else {
685 				/* We can write full sector(s). */
686 				bsize = BD(dev).bd_sectorsize * x;
687 			}
688 			/*
689 			 * Put your Data In, Put your Data out,
690 			 * Put your Data In, and shake it all about
691 			 */
692 			bcopy(buf, bbuf + blkoff, bsize);
693 			if ((rc = bd_io(dev, dblk, x, bbuf, BD_WR)) != 0)
694 				return (EIO);
695 
696 			break;
697 		default:
698 			/* DO NOTHING */
699 			return (EROFS);
700 		}
701 
702 		blkoff = 0;
703 		buf += bsize;
704 		rest -= bsize;
705 		blks -= x;
706 		dblk += x;
707 	}
708 
709 	if (rsize != NULL)
710 		*rsize = size;
711 	return (0);
712 }
713 
714 static int
715 bd_edd_io(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest,
716     int dowrite)
717 {
718 	static struct edd_packet packet;
719 
720 	packet.len = sizeof(struct edd_packet);
721 	packet.count = blks;
722 	packet.off = VTOPOFF(dest);
723 	packet.seg = VTOPSEG(dest);
724 	packet.lba = dblk;
725 	v86.ctl = V86_FLAGS;
726 	v86.addr = 0x13;
727 	/* Should we Write with verify ?? 0x4302 ? */
728 	if (dowrite == BD_WR)
729 		v86.eax = 0x4300;
730 	else
731 		v86.eax = 0x4200;
732 	v86.edx = BD(dev).bd_unit;
733 	v86.ds = VTOPSEG(&packet);
734 	v86.esi = VTOPOFF(&packet);
735 	v86int();
736 	if (V86_CY(v86.efl))
737 		return (v86.eax >> 8);
738 	return (0);
739 }
740 
741 static int
742 bd_chs_io(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest,
743     int dowrite)
744 {
745 	uint32_t x, bpc, cyl, hd, sec;
746 
747 	bpc = BD(dev).bd_sec * BD(dev).bd_hds;	/* blocks per cylinder */
748 	x = dblk;
749 	cyl = x / bpc;			/* block # / blocks per cylinder */
750 	x %= bpc;				/* block offset into cylinder */
751 	hd = x / BD(dev).bd_sec;		/* offset / blocks per track */
752 	sec = x % BD(dev).bd_sec;		/* offset into track */
753 
754 	/* correct sector number for 1-based BIOS numbering */
755 	sec++;
756 
757 	if (cyl > 1023) {
758 		/* CHS doesn't support cylinders > 1023. */
759 		return (1);
760 	}
761 
762 	v86.ctl = V86_FLAGS;
763 	v86.addr = 0x13;
764 	if (dowrite == BD_WR)
765 		v86.eax = 0x300 | blks;
766 	else
767 		v86.eax = 0x200 | blks;
768 	v86.ecx = ((cyl & 0xff) << 8) | ((cyl & 0x300) >> 2) | sec;
769 	v86.edx = (hd << 8) | BD(dev).bd_unit;
770 	v86.es = VTOPSEG(dest);
771 	v86.ebx = VTOPOFF(dest);
772 	v86int();
773 	if (V86_CY(v86.efl))
774 		return (v86.eax >> 8);
775 	return (0);
776 }
777 
778 static void
779 bd_io_workaround(struct disk_devdesc *dev)
780 {
781 	uint8_t buf[8 * 1024];
782 
783 	bd_edd_io(dev, 0xffffffff, 1, (caddr_t)buf, BD_RD);
784 }
785 
786 static int
787 bd_io(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest,
788     int dowrite)
789 {
790 	int result, retry;
791 
792 	/* Just in case some idiot actually tries to read/write -1 blocks... */
793 	if (blks < 0)
794 		return (-1);
795 
796 	/*
797 	 * Workaround for a problem with some HP ProLiant BIOS failing to work
798 	 * out the boot disk after installation. hrs and kuriyama discovered
799 	 * this problem with an HP ProLiant DL320e Gen 8 with a 3TB HDD, and
800 	 * discovered that an int13h call seems to cause a buffer overrun in
801 	 * the bios. The problem is alleviated by doing an extra read before
802 	 * the buggy read. It is not immediately known whether other models
803 	 * are similarly affected.
804 	 * Loop retrying the operation a couple of times.  The BIOS
805 	 * may also retry.
806 	 */
807 	if (dowrite == BD_RD && dblk >= 0x100000000)
808 		bd_io_workaround(dev);
809 	for (retry = 0; retry < 3; retry++) {
810 		if (BD(dev).bd_flags & BD_MODEEDD)
811 			result = bd_edd_io(dev, dblk, blks, dest, dowrite);
812 		else
813 			result = bd_chs_io(dev, dblk, blks, dest, dowrite);
814 
815 		if (result == 0) {
816 			if (BD(dev).bd_flags & BD_NO_MEDIA)
817 				BD(dev).bd_flags &= ~BD_NO_MEDIA;
818 			break;
819 		}
820 
821 		bd_reset_disk(BD(dev).bd_unit);
822 
823 		/*
824 		 * Error codes:
825 		 * 20h	controller failure
826 		 * 31h	no media in drive (IBM/MS INT 13 extensions)
827 		 * 80h	no media in drive, VMWare (Fusion)
828 		 * There is no reason to repeat the IO with errors above.
829 		 */
830 		if (result == 0x20 || result == 0x31 || result == 0x80) {
831 			BD(dev).bd_flags |= BD_NO_MEDIA;
832 			break;
833 		}
834 	}
835 
836 	if (result != 0 && (BD(dev).bd_flags & BD_NO_MEDIA) == 0) {
837 		if (dowrite == BD_WR) {
838 			printf("%s%d: Write %d sector(s) from %p (0x%x) "
839 			    "to %lld: 0x%x\n", dev->dd.d_dev->dv_name,
840 			    dev->dd.d_unit, blks, dest, VTOP(dest), dblk,
841 			    result);
842 		} else {
843 			printf("%s%d: Read %d sector(s) from %lld to %p "
844 			    "(0x%x): 0x%x\n", dev->dd.d_dev->dv_name,
845 			    dev->dd.d_unit, blks, dblk, dest, VTOP(dest),
846 			    result);
847 		}
848 	}
849 
850 	return (result);
851 }
852 
853 /*
854  * Return the BIOS geometry of a given "fixed drive" in a format
855  * suitable for the legacy bootinfo structure.  Since the kernel is
856  * expecting raw int 0x13/0x8 values for N_BIOS_GEOM drives, we
857  * prefer to get the information directly, rather than rely on being
858  * able to put it together from information already maintained for
859  * different purposes and for a probably different number of drives.
860  *
861  * For valid drives, the geometry is expected in the format (31..0)
862  * "000000cc cccccccc hhhhhhhh 00ssssss"; and invalid drives are
863  * indicated by returning the geometry of a "1.2M" PC-format floppy
864  * disk.  And, incidentally, what is returned is not the geometry as
865  * such but the highest valid cylinder, head, and sector numbers.
866  */
867 uint32_t
868 bd_getbigeom(int bunit)
869 {
870 
871 	v86.ctl = V86_FLAGS;
872 	v86.addr = 0x13;
873 	v86.eax = 0x800;
874 	v86.edx = 0x80 + bunit;
875 	v86int();
876 	if (V86_CY(v86.efl))
877 		return (0x4f010f);
878 	return (((v86.ecx & 0xc0) << 18) | ((v86.ecx & 0xff00) << 8) |
879 	    (v86.edx & 0xff00) | (v86.ecx & 0x3f));
880 }
881 
882 /*
883  * Return a suitable dev_t value for (dev).
884  *
885  * In the case where it looks like (dev) is a SCSI disk, we allow the number of
886  * IDE disks to be specified in $num_ide_disks.  There should be a Better Way.
887  */
888 int
889 bd_getdev(struct i386_devdesc *d)
890 {
891 	struct disk_devdesc *dev;
892 	int	biosdev;
893 	int	major;
894 	int	rootdev;
895 	char	*nip, *cp;
896 	int	i, unit;
897 
898 	dev = (struct disk_devdesc *)d;
899 	biosdev = bd_unit2bios(dev->dd.d_unit);
900 	DEBUG("unit %d BIOS device %d", dev->dd.d_unit, biosdev);
901 	if (biosdev == -1)			/* not a BIOS device */
902 		return (-1);
903 	if (disk_open(dev, BD(dev).bd_sectors * BD(dev).bd_sectorsize,
904 	    BD(dev).bd_sectorsize) != 0)	/* oops, not a viable device */
905 		return (-1);
906 	else
907 		disk_close(dev);
908 
909 	if (biosdev < 0x80) {
910 		/* floppy (or emulated floppy) or ATAPI device */
911 		if (bdinfo[dev->dd.d_unit].bd_type == DT_ATAPI) {
912 			/* is an ATAPI disk */
913 			major = WFDMAJOR;
914 		} else {
915 			/* is a floppy disk */
916 			major = FDMAJOR;
917 		}
918 	} else {
919 		/* assume an IDE disk */
920 		major = WDMAJOR;
921 	}
922 	/* default root disk unit number */
923 	unit = biosdev & 0x7f;
924 
925 	/* XXX a better kludge to set the root disk unit number */
926 	if ((nip = getenv("root_disk_unit")) != NULL) {
927 		i = strtol(nip, &cp, 0);
928 		/* check for parse error */
929 		if ((cp != nip) && (*cp == 0))
930 			unit = i;
931 	}
932 
933 	rootdev = MAKEBOOTDEV(major, dev->d_slice + 1, unit, dev->d_partition);
934 	DEBUG("dev is 0x%x\n", rootdev);
935 	return (rootdev);
936 }
937