xref: /freebsd/stand/i386/libi386/biosdisk.c (revision 752d135e0dacd9a463d24ffb89779b67ce0a7ea0)
1 /*-
2  * Copyright (c) 1998 Michael Smith <msmith@freebsd.org>
3  * Copyright (c) 2012 Andrey V. Elsukov <ae@FreeBSD.org>
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 /*
32  * BIOS disk device handling.
33  *
34  * Ideas and algorithms from:
35  *
36  * - NetBSD libi386/biosdisk.c
37  * - FreeBSD biosboot/disk.c
38  *
39  */
40 
41 #include <sys/disk.h>
42 #include <sys/limits.h>
43 #include <stand.h>
44 #include <machine/bootinfo.h>
45 #include <stdarg.h>
46 
47 #include <bootstrap.h>
48 #include <btxv86.h>
49 #include <edd.h>
50 #include "disk.h"
51 #include "libi386.h"
52 
53 #define	BIOS_NUMDRIVES		0x475
54 #define	BIOSDISK_SECSIZE	512
55 #define	BUFSIZE			(1 * BIOSDISK_SECSIZE)
56 
57 #define	DT_ATAPI	0x10	/* disk type for ATAPI floppies */
58 #define	WDMAJOR		0	/* major numbers for devices we frontend for */
59 #define	WFDMAJOR	1
60 #define	FDMAJOR		2
61 #define	DAMAJOR		4
62 
63 #ifdef DISK_DEBUG
64 #define	DEBUG(fmt, args...)	printf("%s: " fmt "\n", __func__, ## args)
65 #else
66 #define	DEBUG(fmt, args...)
67 #endif
68 
69 /*
70  * List of BIOS devices, translation from disk unit number to
71  * BIOS unit number.
72  */
73 static struct bdinfo
74 {
75 	int		bd_unit;	/* BIOS unit number */
76 	int		bd_cyl;		/* BIOS geometry */
77 	int		bd_hds;
78 	int		bd_sec;
79 	int		bd_flags;
80 #define	BD_MODEINT13	0x0000
81 #define	BD_MODEEDD1	0x0001
82 #define	BD_MODEEDD3	0x0002
83 #define	BD_MODEEDD	(BD_MODEEDD1 | BD_MODEEDD3)
84 #define	BD_MODEMASK	0x0003
85 #define	BD_FLOPPY	0x0004
86 #define	BD_NO_MEDIA	0x0008
87 	int		bd_type;	/* BIOS 'drive type' (floppy only) */
88 	uint16_t	bd_sectorsize;	/* Sector size */
89 	uint64_t	bd_sectors;	/* Disk size */
90 	int		bd_open;	/* reference counter */
91 	void		*bd_bcache;	/* buffer cache data */
92 } bdinfo [MAXBDDEV];
93 static int nbdinfo = 0;
94 
95 #define	BD(dev)		(bdinfo[(dev)->dd.d_unit])
96 #define	BD_RD		0
97 #define	BD_WR		1
98 
99 static void bd_io_workaround(struct disk_devdesc *dev);
100 
101 static int bd_io(struct disk_devdesc *, daddr_t, int, caddr_t, int);
102 static int bd_int13probe(struct bdinfo *bd);
103 
104 static int bd_init(void);
105 static int bd_strategy(void *devdata, int flag, daddr_t dblk, size_t size,
106     char *buf, size_t *rsize);
107 static int bd_realstrategy(void *devdata, int flag, daddr_t dblk, size_t size,
108     char *buf, size_t *rsize);
109 static int bd_open(struct open_file *f, ...);
110 static int bd_close(struct open_file *f);
111 static int bd_ioctl(struct open_file *f, u_long cmd, void *data);
112 static int bd_print(int verbose);
113 
114 struct devsw biosdisk = {
115 	"disk",
116 	DEVT_DISK,
117 	bd_init,
118 	bd_strategy,
119 	bd_open,
120 	bd_close,
121 	bd_ioctl,
122 	bd_print,
123 	NULL
124 };
125 
126 /*
127  * Translate between BIOS device numbers and our private unit numbers.
128  */
129 int
130 bd_bios2unit(int biosdev)
131 {
132 	int i;
133 
134 	DEBUG("looking for bios device 0x%x", biosdev);
135 	for (i = 0; i < nbdinfo; i++) {
136 		DEBUG("bd unit %d is BIOS device 0x%x", i, bdinfo[i].bd_unit);
137 		if (bdinfo[i].bd_unit == biosdev)
138 			return (i);
139 	}
140 	return (-1);
141 }
142 
143 int
144 bd_unit2bios(int unit)
145 {
146 
147 	if ((unit >= 0) && (unit < nbdinfo))
148 		return (bdinfo[unit].bd_unit);
149 	return (-1);
150 }
151 
152 /*
153  * Quiz the BIOS for disk devices, save a little info about them.
154  */
155 static int
156 bd_init(void)
157 {
158 	int base, unit, nfd = 0;
159 
160 	/* sequence 0, 0x80 */
161 	for (base = 0; base <= 0x80; base += 0x80) {
162 		for (unit = base; (nbdinfo < MAXBDDEV); unit++) {
163 #ifndef VIRTUALBOX
164 			/*
165 			 * Check the BIOS equipment list for number
166 			 * of fixed disks.
167 			 */
168 			if (base == 0x80 &&
169 			    (nfd >= *(unsigned char *)PTOV(BIOS_NUMDRIVES)))
170 				break;
171 #endif
172 			bdinfo[nbdinfo].bd_open = 0;
173 			bdinfo[nbdinfo].bd_bcache = NULL;
174 			bdinfo[nbdinfo].bd_unit = unit;
175 			bdinfo[nbdinfo].bd_flags = unit < 0x80 ? BD_FLOPPY: 0;
176 			if (!bd_int13probe(&bdinfo[nbdinfo]))
177 				break;
178 
179 			/* XXX we need "disk aliases" to make this simpler */
180 			printf("BIOS drive %c: is disk%d\n", (unit < 0x80) ?
181 			    ('A' + unit): ('C' + unit - 0x80), nbdinfo);
182 			nbdinfo++;
183 			if (base == 0x80)
184 				nfd++;
185 		}
186 	}
187 	bcache_add_dev(nbdinfo);
188 	return (0);
189 }
190 
191 /*
192  * Return EDD version or 0 if EDD is not supported on this drive.
193  */
194 static int
195 bd_check_extensions(int unit)
196 {
197 	/* Determine if we can use EDD with this device. */
198 	v86.ctl = V86_FLAGS;
199 	v86.addr = 0x13;
200 	v86.eax = 0x4100;
201 	v86.edx = unit;
202 	v86.ebx = 0x55aa;
203 	v86int();
204 
205 	if (V86_CY(v86.efl) ||			/* carry set */
206 	    (v86.ebx & 0xffff) != 0xaa55)	/* signature */
207 		return (0);
208 
209 	/* extended disk access functions (AH=42h-44h,47h,48h) supported */
210 	if ((v86.ecx & EDD_INTERFACE_FIXED_DISK) == 0)
211 		return (0);
212 
213 	return ((v86.eax >> 8) & 0xff);
214 }
215 
216 static void
217 bd_reset_disk(int unit)
218 {
219 	/* reset disk */
220 	v86.ctl = V86_FLAGS;
221 	v86.addr = 0x13;
222 	v86.eax = 0;
223 	v86.edx = unit;
224 	v86int();
225 }
226 
227 /*
228  * Read CHS info. Return 0 on success, error otherwise.
229  */
230 static int
231 bd_get_diskinfo_std(struct bdinfo *bd)
232 {
233 	bzero(&v86, sizeof(v86));
234 	v86.ctl = V86_FLAGS;
235 	v86.addr = 0x13;
236 	v86.eax = 0x800;
237 	v86.edx = bd->bd_unit;
238 	v86int();
239 
240 	if (V86_CY(v86.efl) && ((v86.eax & 0xff00) != 0))
241 		return ((v86.eax & 0xff00) >> 8);
242 
243 	/* return custom error on absurd sector number */
244 	if ((v86.ecx & 0x3f) == 0)
245 		return (0x60);
246 
247 	bd->bd_cyl = ((v86.ecx & 0xc0) << 2) + ((v86.ecx & 0xff00) >> 8) + 1;
248 	/* Convert max head # -> # of heads */
249 	bd->bd_hds = ((v86.edx & 0xff00) >> 8) + 1;
250 	bd->bd_sec = v86.ecx & 0x3f;
251 	bd->bd_type = v86.ebx;
252 	bd->bd_sectors = (uint64_t)bd->bd_cyl * bd->bd_hds * bd->bd_sec;
253 
254 	return (0);
255 }
256 
257 /*
258  * Read EDD info. Return 0 on success, error otherwise.
259  */
260 static int
261 bd_get_diskinfo_ext(struct bdinfo *bd)
262 {
263 	struct edd_params params;
264 	uint64_t total;
265 
266 	/* Get disk params */
267 	bzero(&params, sizeof(params));
268 	params.len = sizeof(params);
269 	v86.ctl = V86_FLAGS;
270 	v86.addr = 0x13;
271 	v86.eax = 0x4800;
272 	v86.edx = bd->bd_unit;
273 	v86.ds = VTOPSEG(&params);
274 	v86.esi = VTOPOFF(&params);
275 	v86int();
276 
277 	if (V86_CY(v86.efl) && ((v86.eax & 0xff00) != 0))
278 		return ((v86.eax & 0xff00) >> 8);
279 
280 	/*
281 	 * Sector size must be a multiple of 512 bytes.
282 	 * An alternate test would be to check power of 2,
283 	 * powerof2(params.sector_size).
284 	 * 4K is largest read buffer we can use at this time.
285 	 */
286 	if (params.sector_size >= 512 &&
287 	    params.sector_size <= 4096 &&
288 	    (params.sector_size % BIOSDISK_SECSIZE) == 0)
289 		bd->bd_sectorsize = params.sector_size;
290 
291 	bd->bd_cyl = params.cylinders;
292 	bd->bd_hds = params.heads;
293 	bd->bd_sec = params.sectors_per_track;
294 
295 	if (params.sectors != 0) {
296 		total = params.sectors;
297 	} else {
298 		total = (uint64_t)params.cylinders *
299 		    params.heads * params.sectors_per_track;
300 	}
301 	bd->bd_sectors = total;
302 
303 	return (0);
304 }
305 
306 /*
307  * Try to detect a device supported by the legacy int13 BIOS
308  */
309 static int
310 bd_int13probe(struct bdinfo *bd)
311 {
312 	int edd;
313 	int ret;
314 
315 	bd->bd_flags &= ~BD_NO_MEDIA;
316 
317 	edd = bd_check_extensions(bd->bd_unit);
318 	if (edd == 0)
319 		bd->bd_flags |= BD_MODEINT13;
320 	else if (edd < 0x30)
321 		bd->bd_flags |= BD_MODEEDD1;
322 	else
323 		bd->bd_flags |= BD_MODEEDD3;
324 
325 	/* Default sector size */
326 	bd->bd_sectorsize = BIOSDISK_SECSIZE;
327 
328 	/*
329 	 * Test if the floppy device is present, so we can avoid receiving
330 	 * bogus information from bd_get_diskinfo_std().
331 	 */
332 	if (bd->bd_unit < 0x80) {
333 		/* reset disk */
334 		bd_reset_disk(bd->bd_unit);
335 
336 		/* Get disk type */
337 		v86.ctl = V86_FLAGS;
338 		v86.addr = 0x13;
339 		v86.eax = 0x1500;
340 		v86.edx = bd->bd_unit;
341 		v86int();
342 		if (V86_CY(v86.efl) || (v86.eax & 0x300) == 0)
343 			return (0);
344 	}
345 
346 	ret = 1;
347 	if (edd != 0)
348 		ret = bd_get_diskinfo_ext(bd);
349 	if (ret != 0 || bd->bd_sectors == 0)
350 		ret = bd_get_diskinfo_std(bd);
351 
352 	if (ret != 0 && bd->bd_unit < 0x80) {
353 		/* Set defaults for 1.44 floppy */
354 		bd->bd_cyl = 80;
355 		bd->bd_hds = 2;
356 		bd->bd_sec = 18;
357 		bd->bd_type = 4;
358 		bd->bd_sectors = 2880;
359 		/* Since we are there, there most likely is no media */
360 		bd->bd_flags |= BD_NO_MEDIA;
361 		ret = 0;
362 	}
363 
364 	if (ret != 0) {
365 		if (bd->bd_sectors != 0 && edd != 0) {
366 			bd->bd_sec = 63;
367 			bd->bd_hds = 255;
368 			bd->bd_cyl =
369 			    (bd->bd_sectors + bd->bd_sec * bd->bd_hds - 1) /
370 			    bd->bd_sec * bd->bd_hds;
371 		} else {
372 			printf("Can not get information about %s unit %#x\n",
373 			    biosdisk.dv_name, bd->bd_unit);
374 			return (0);
375 		}
376 	}
377 
378 	if (bd->bd_sec == 0)
379 		bd->bd_sec = 63;
380 	if (bd->bd_hds == 0)
381 		bd->bd_hds = 255;
382 
383 	if (bd->bd_sectors == 0)
384 		bd->bd_sectors = (uint64_t)bd->bd_cyl * bd->bd_hds * bd->bd_sec;
385 
386 	DEBUG("unit 0x%x geometry %d/%d/%d", bd->bd_unit, bd->bd_cyl,
387 	    bd->bd_hds, bd->bd_sec);
388 
389 	return (1);
390 }
391 
392 /*
393  * Print information about disks
394  */
395 static int
396 bd_print(int verbose)
397 {
398 	static char line[80];
399 	struct disk_devdesc dev;
400 	int i, ret = 0;
401 
402 	if (nbdinfo == 0)
403 		return (0);
404 
405 	printf("%s devices:", biosdisk.dv_name);
406 	if ((ret = pager_output("\n")) != 0)
407 		return (ret);
408 
409 	for (i = 0; i < nbdinfo; i++) {
410 		snprintf(line, sizeof(line),
411 		    "    disk%d:   BIOS drive %c (%s%ju X %u):\n", i,
412 		    (bdinfo[i].bd_unit < 0x80) ? ('A' + bdinfo[i].bd_unit):
413 		    ('C' + bdinfo[i].bd_unit - 0x80),
414 		    (bdinfo[i].bd_flags & BD_NO_MEDIA) == BD_NO_MEDIA ?
415 		    "no media, " : "",
416 		    (uintmax_t)bdinfo[i].bd_sectors,
417 		    bdinfo[i].bd_sectorsize);
418 		if ((ret = pager_output(line)) != 0)
419 			break;
420 
421 		if ((bdinfo[i].bd_flags & BD_NO_MEDIA) == BD_NO_MEDIA)
422 			continue;
423 
424 		dev.dd.d_dev = &biosdisk;
425 		dev.dd.d_unit = i;
426 		dev.d_slice = -1;
427 		dev.d_partition = -1;
428 		if (disk_open(&dev,
429 		    bdinfo[i].bd_sectorsize * bdinfo[i].bd_sectors,
430 		    bdinfo[i].bd_sectorsize) == 0) {
431 			snprintf(line, sizeof(line), "    disk%d", i);
432 			ret = disk_print(&dev, line, verbose);
433 			disk_close(&dev);
434 			if (ret != 0)
435 				break;
436 		}
437 	}
438 	return (ret);
439 }
440 
441 /*
442  * Read disk size from partition.
443  * This is needed to work around buggy BIOS systems returning
444  * wrong (truncated) disk media size.
445  * During bd_probe() we tested if the multiplication of bd_sectors
446  * would overflow so it should be safe to perform here.
447  */
448 static uint64_t
449 bd_disk_get_sectors(struct disk_devdesc *dev)
450 {
451 	struct disk_devdesc disk;
452 	uint64_t size;
453 
454 	disk.dd.d_dev = dev->dd.d_dev;
455 	disk.dd.d_unit = dev->dd.d_unit;
456 	disk.d_slice = -1;
457 	disk.d_partition = -1;
458 	disk.d_offset = 0;
459 
460 	size = BD(dev).bd_sectors * BD(dev).bd_sectorsize;
461 	if (disk_open(&disk, size, BD(dev).bd_sectorsize) == 0) {
462 		(void) disk_ioctl(&disk, DIOCGMEDIASIZE, &size);
463 		disk_close(&disk);
464 	}
465 	return (size / BD(dev).bd_sectorsize);
466 }
467 
468 /*
469  * Attempt to open the disk described by (dev) for use by (f).
470  *
471  * Note that the philosophy here is "give them exactly what
472  * they ask for".  This is necessary because being too "smart"
473  * about what the user might want leads to complications.
474  * (eg. given no slice or partition value, with a disk that is
475  *  sliced - are they after the first BSD slice, or the DOS
476  *  slice before it?)
477  */
478 static int
479 bd_open(struct open_file *f, ...)
480 {
481 	struct disk_devdesc *dev;
482 	va_list ap;
483 	int rc;
484 
485 	va_start(ap, f);
486 	dev = va_arg(ap, struct disk_devdesc *);
487 	va_end(ap);
488 
489 	if (dev->dd.d_unit < 0 || dev->dd.d_unit >= nbdinfo)
490 		return (EIO);
491 
492 	if ((BD(dev).bd_flags & BD_NO_MEDIA) == BD_NO_MEDIA) {
493 		if (!bd_int13probe(&BD(dev)))
494 			return (EIO);
495 		if ((BD(dev).bd_flags & BD_NO_MEDIA) == BD_NO_MEDIA)
496 			return (EIO);
497 	}
498 	if (BD(dev).bd_bcache == NULL)
499 	    BD(dev).bd_bcache = bcache_allocate();
500 
501 	if (BD(dev).bd_open == 0)
502 		BD(dev).bd_sectors = bd_disk_get_sectors(dev);
503 	BD(dev).bd_open++;
504 
505 	rc = disk_open(dev, BD(dev).bd_sectors * BD(dev).bd_sectorsize,
506 	    BD(dev).bd_sectorsize);
507 	if (rc != 0) {
508 		BD(dev).bd_open--;
509 		if (BD(dev).bd_open == 0) {
510 			bcache_free(BD(dev).bd_bcache);
511 			BD(dev).bd_bcache = NULL;
512 		}
513 	}
514 	return (rc);
515 }
516 
517 static int
518 bd_close(struct open_file *f)
519 {
520 	struct disk_devdesc *dev;
521 
522 	dev = (struct disk_devdesc *)f->f_devdata;
523 	BD(dev).bd_open--;
524 	if (BD(dev).bd_open == 0) {
525 	    bcache_free(BD(dev).bd_bcache);
526 	    BD(dev).bd_bcache = NULL;
527 	}
528 	return (disk_close(dev));
529 }
530 
531 static int
532 bd_ioctl(struct open_file *f, u_long cmd, void *data)
533 {
534 	struct disk_devdesc *dev;
535 	int rc;
536 
537 	dev = (struct disk_devdesc *)f->f_devdata;
538 
539 	rc = disk_ioctl(dev, cmd, data);
540 	if (rc != ENOTTY)
541 		return (rc);
542 
543 	switch (cmd) {
544 	case DIOCGSECTORSIZE:
545 		*(uint32_t *)data = BD(dev).bd_sectorsize;
546 		break;
547 	case DIOCGMEDIASIZE:
548 		*(uint64_t *)data = BD(dev).bd_sectors * BD(dev).bd_sectorsize;
549 		break;
550 	default:
551 		return (ENOTTY);
552 	}
553 	return (0);
554 }
555 
556 static int
557 bd_strategy(void *devdata, int rw, daddr_t dblk, size_t size,
558     char *buf, size_t *rsize)
559 {
560 	struct bcache_devdata bcd;
561 	struct disk_devdesc *dev;
562 
563 	dev = (struct disk_devdesc *)devdata;
564 	bcd.dv_strategy = bd_realstrategy;
565 	bcd.dv_devdata = devdata;
566 	bcd.dv_cache = BD(dev).bd_bcache;
567 	return (bcache_strategy(&bcd, rw, dblk + dev->d_offset, size,
568 	    buf, rsize));
569 }
570 
571 static int
572 bd_realstrategy(void *devdata, int rw, daddr_t dblk, size_t size,
573     char *buf, size_t *rsize)
574 {
575 	struct disk_devdesc *dev = (struct disk_devdesc *)devdata;
576 	uint64_t disk_blocks, offset;
577 	size_t blks, blkoff, bsize, rest;
578 	caddr_t bbuf;
579 	int rc;
580 
581 	if ((BD(dev).bd_flags & BD_NO_MEDIA) == BD_NO_MEDIA)
582 		return (EIO);
583 
584 	/*
585 	 * First make sure the IO size is a multiple of 512 bytes. While we do
586 	 * process partial reads below, the strategy mechanism is built
587 	 * assuming IO is a multiple of 512B blocks. If the request is not
588 	 * a multiple of 512B blocks, it has to be some sort of bug.
589 	 */
590 	if (size == 0 || (size % BIOSDISK_SECSIZE) != 0) {
591 		printf("bd_strategy: %d bytes I/O not multiple of %d\n",
592 		    size, BIOSDISK_SECSIZE);
593 		return (EIO);
594 	}
595 
596 	DEBUG("open_disk %p", dev);
597 
598 	offset = dblk * BIOSDISK_SECSIZE;
599 	dblk = offset / BD(dev).bd_sectorsize;
600 	blkoff = offset % BD(dev).bd_sectorsize;
601 
602 	/*
603 	 * Check the value of the size argument. We do have quite small
604 	 * heap (64MB), but we do not know good upper limit, so we check against
605 	 * INT_MAX here. This will also protect us against possible overflows
606 	 * while translating block count to bytes.
607 	 */
608 	if (size > INT_MAX) {
609 		DEBUG("too large I/O: %zu bytes", size);
610 		return (EIO);
611 	}
612 
613 	blks = size / BD(dev).bd_sectorsize;
614 	if (blks == 0 || (size % BD(dev).bd_sectorsize) != 0)
615 		blks++;
616 
617 	if (dblk > dblk + blks)
618 		return (EIO);
619 
620 	if (rsize)
621 		*rsize = 0;
622 
623 	/*
624 	 * Get disk blocks, this value is either for whole disk or for
625 	 * partition.
626 	 */
627 	if (disk_ioctl(dev, DIOCGMEDIASIZE, &disk_blocks) == 0) {
628 		/* DIOCGMEDIASIZE does return bytes. */
629 		disk_blocks /= BD(dev).bd_sectorsize;
630 	} else {
631 		/* We should not get here. Just try to survive. */
632 		disk_blocks = BD(dev).bd_sectors - dev->d_offset;
633 	}
634 
635 	/* Validate source block address. */
636 	if (dblk < dev->d_offset || dblk >= dev->d_offset + disk_blocks)
637 		return (EIO);
638 
639 	/*
640 	 * Truncate if we are crossing disk or partition end.
641 	 */
642 	if (dblk + blks >= dev->d_offset + disk_blocks) {
643 		blks = dev->d_offset + disk_blocks - dblk;
644 		size = blks * BD(dev).bd_sectorsize;
645 		DEBUG("short I/O %d", blks);
646 	}
647 
648 	if (V86_IO_BUFFER_SIZE / BD(dev).bd_sectorsize == 0)
649 		panic("BUG: Real mode buffer is too small\n");
650 
651 	bbuf = PTOV(V86_IO_BUFFER);
652 	rest = size;
653 
654 	while (blks > 0) {
655 		int x = min(blks, V86_IO_BUFFER_SIZE / BD(dev).bd_sectorsize);
656 
657 		switch (rw & F_MASK) {
658 		case F_READ:
659 			DEBUG("read %d from %lld to %p", x, dblk, buf);
660 			bsize = BD(dev).bd_sectorsize * x - blkoff;
661 			if (rest < bsize)
662 				bsize = rest;
663 
664 			if ((rc = bd_io(dev, dblk, x, bbuf, BD_RD)) != 0)
665 				return (EIO);
666 
667 			bcopy(bbuf + blkoff, buf, bsize);
668 			break;
669 		case F_WRITE :
670 			DEBUG("write %d from %lld to %p", x, dblk, buf);
671 			if (blkoff != 0) {
672 				/*
673 				 * We got offset to sector, read 1 sector to
674 				 * bbuf.
675 				 */
676 				x = 1;
677 				bsize = BD(dev).bd_sectorsize - blkoff;
678 				bsize = min(bsize, rest);
679 				rc = bd_io(dev, dblk, x, bbuf, BD_RD);
680 			} else if (rest < BD(dev).bd_sectorsize) {
681 				/*
682 				 * The remaining block is not full
683 				 * sector. Read 1 sector to bbuf.
684 				 */
685 				x = 1;
686 				bsize = rest;
687 				rc = bd_io(dev, dblk, x, bbuf, BD_RD);
688 			} else {
689 				/* We can write full sector(s). */
690 				bsize = BD(dev).bd_sectorsize * x;
691 			}
692 			/*
693 			 * Put your Data In, Put your Data out,
694 			 * Put your Data In, and shake it all about
695 			 */
696 			bcopy(buf, bbuf + blkoff, bsize);
697 			if ((rc = bd_io(dev, dblk, x, bbuf, BD_WR)) != 0)
698 				return (EIO);
699 
700 			break;
701 		default:
702 			/* DO NOTHING */
703 			return (EROFS);
704 		}
705 
706 		blkoff = 0;
707 		buf += bsize;
708 		rest -= bsize;
709 		blks -= x;
710 		dblk += x;
711 	}
712 
713 	if (rsize != NULL)
714 		*rsize = size;
715 	return (0);
716 }
717 
718 static int
719 bd_edd_io(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest,
720     int dowrite)
721 {
722 	static struct edd_packet packet;
723 
724 	packet.len = sizeof(struct edd_packet);
725 	packet.count = blks;
726 	packet.off = VTOPOFF(dest);
727 	packet.seg = VTOPSEG(dest);
728 	packet.lba = dblk;
729 	v86.ctl = V86_FLAGS;
730 	v86.addr = 0x13;
731 	/* Should we Write with verify ?? 0x4302 ? */
732 	if (dowrite == BD_WR)
733 		v86.eax = 0x4300;
734 	else
735 		v86.eax = 0x4200;
736 	v86.edx = BD(dev).bd_unit;
737 	v86.ds = VTOPSEG(&packet);
738 	v86.esi = VTOPOFF(&packet);
739 	v86int();
740 	if (V86_CY(v86.efl))
741 		return (v86.eax >> 8);
742 	return (0);
743 }
744 
745 static int
746 bd_chs_io(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest,
747     int dowrite)
748 {
749 	uint32_t x, bpc, cyl, hd, sec;
750 
751 	bpc = BD(dev).bd_sec * BD(dev).bd_hds;	/* blocks per cylinder */
752 	x = dblk;
753 	cyl = x / bpc;			/* block # / blocks per cylinder */
754 	x %= bpc;				/* block offset into cylinder */
755 	hd = x / BD(dev).bd_sec;		/* offset / blocks per track */
756 	sec = x % BD(dev).bd_sec;		/* offset into track */
757 
758 	/* correct sector number for 1-based BIOS numbering */
759 	sec++;
760 
761 	if (cyl > 1023) {
762 		/* CHS doesn't support cylinders > 1023. */
763 		return (1);
764 	}
765 
766 	v86.ctl = V86_FLAGS;
767 	v86.addr = 0x13;
768 	if (dowrite == BD_WR)
769 		v86.eax = 0x300 | blks;
770 	else
771 		v86.eax = 0x200 | blks;
772 	v86.ecx = ((cyl & 0xff) << 8) | ((cyl & 0x300) >> 2) | sec;
773 	v86.edx = (hd << 8) | BD(dev).bd_unit;
774 	v86.es = VTOPSEG(dest);
775 	v86.ebx = VTOPOFF(dest);
776 	v86int();
777 	if (V86_CY(v86.efl))
778 		return (v86.eax >> 8);
779 	return (0);
780 }
781 
782 static void
783 bd_io_workaround(struct disk_devdesc *dev)
784 {
785 	uint8_t buf[8 * 1024];
786 
787 	bd_edd_io(dev, 0xffffffff, 1, (caddr_t)buf, BD_RD);
788 }
789 
790 static int
791 bd_io(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest,
792     int dowrite)
793 {
794 	int result, retry;
795 
796 	/* Just in case some idiot actually tries to read/write -1 blocks... */
797 	if (blks < 0)
798 		return (-1);
799 
800 	/*
801 	 * Workaround for a problem with some HP ProLiant BIOS failing to work
802 	 * out the boot disk after installation. hrs and kuriyama discovered
803 	 * this problem with an HP ProLiant DL320e Gen 8 with a 3TB HDD, and
804 	 * discovered that an int13h call seems to cause a buffer overrun in
805 	 * the bios. The problem is alleviated by doing an extra read before
806 	 * the buggy read. It is not immediately known whether other models
807 	 * are similarly affected.
808 	 * Loop retrying the operation a couple of times.  The BIOS
809 	 * may also retry.
810 	 */
811 	if (dowrite == BD_RD && dblk >= 0x100000000)
812 		bd_io_workaround(dev);
813 	for (retry = 0; retry < 3; retry++) {
814 		if (BD(dev).bd_flags & BD_MODEEDD)
815 			result = bd_edd_io(dev, dblk, blks, dest, dowrite);
816 		else
817 			result = bd_chs_io(dev, dblk, blks, dest, dowrite);
818 
819 		if (result == 0) {
820 			if (BD(dev).bd_flags & BD_NO_MEDIA)
821 				BD(dev).bd_flags &= ~BD_NO_MEDIA;
822 			break;
823 		}
824 
825 		bd_reset_disk(BD(dev).bd_unit);
826 
827 		/*
828 		 * Error codes:
829 		 * 20h	controller failure
830 		 * 31h	no media in drive (IBM/MS INT 13 extensions)
831 		 * 80h	no media in drive, VMWare (Fusion)
832 		 * There is no reason to repeat the IO with errors above.
833 		 */
834 		if (result == 0x20 || result == 0x31 || result == 0x80) {
835 			BD(dev).bd_flags |= BD_NO_MEDIA;
836 			break;
837 		}
838 	}
839 
840 	if (result != 0 && (BD(dev).bd_flags & BD_NO_MEDIA) == 0) {
841 		if (dowrite == BD_WR) {
842 			printf("%s%d: Write %d sector(s) from %p (0x%x) "
843 			    "to %lld: 0x%x\n", dev->dd.d_dev->dv_name,
844 			    dev->dd.d_unit, blks, dest, VTOP(dest), dblk,
845 			    result);
846 		} else {
847 			printf("%s%d: Read %d sector(s) from %lld to %p "
848 			    "(0x%x): 0x%x\n", dev->dd.d_dev->dv_name,
849 			    dev->dd.d_unit, blks, dblk, dest, VTOP(dest),
850 			    result);
851 		}
852 	}
853 
854 	return (result);
855 }
856 
857 /*
858  * Return the BIOS geometry of a given "fixed drive" in a format
859  * suitable for the legacy bootinfo structure.  Since the kernel is
860  * expecting raw int 0x13/0x8 values for N_BIOS_GEOM drives, we
861  * prefer to get the information directly, rather than rely on being
862  * able to put it together from information already maintained for
863  * different purposes and for a probably different number of drives.
864  *
865  * For valid drives, the geometry is expected in the format (31..0)
866  * "000000cc cccccccc hhhhhhhh 00ssssss"; and invalid drives are
867  * indicated by returning the geometry of a "1.2M" PC-format floppy
868  * disk.  And, incidentally, what is returned is not the geometry as
869  * such but the highest valid cylinder, head, and sector numbers.
870  */
871 uint32_t
872 bd_getbigeom(int bunit)
873 {
874 
875 	v86.ctl = V86_FLAGS;
876 	v86.addr = 0x13;
877 	v86.eax = 0x800;
878 	v86.edx = 0x80 + bunit;
879 	v86int();
880 	if (V86_CY(v86.efl))
881 		return (0x4f010f);
882 	return (((v86.ecx & 0xc0) << 18) | ((v86.ecx & 0xff00) << 8) |
883 	    (v86.edx & 0xff00) | (v86.ecx & 0x3f));
884 }
885 
886 /*
887  * Return a suitable dev_t value for (dev).
888  *
889  * In the case where it looks like (dev) is a SCSI disk, we allow the number of
890  * IDE disks to be specified in $num_ide_disks.  There should be a Better Way.
891  */
892 int
893 bd_getdev(struct i386_devdesc *d)
894 {
895 	struct disk_devdesc *dev;
896 	int	biosdev;
897 	int	major;
898 	int	rootdev;
899 	char	*nip, *cp;
900 	int	i, unit;
901 
902 	dev = (struct disk_devdesc *)d;
903 	biosdev = bd_unit2bios(dev->dd.d_unit);
904 	DEBUG("unit %d BIOS device %d", dev->dd.d_unit, biosdev);
905 	if (biosdev == -1)			/* not a BIOS device */
906 		return (-1);
907 	if (disk_open(dev, BD(dev).bd_sectors * BD(dev).bd_sectorsize,
908 	    BD(dev).bd_sectorsize) != 0)	/* oops, not a viable device */
909 		return (-1);
910 	else
911 		disk_close(dev);
912 
913 	if (biosdev < 0x80) {
914 		/* floppy (or emulated floppy) or ATAPI device */
915 		if (bdinfo[dev->dd.d_unit].bd_type == DT_ATAPI) {
916 			/* is an ATAPI disk */
917 			major = WFDMAJOR;
918 		} else {
919 			/* is a floppy disk */
920 			major = FDMAJOR;
921 		}
922 	} else {
923 		/* assume an IDE disk */
924 		major = WDMAJOR;
925 	}
926 	/* default root disk unit number */
927 	unit = biosdev & 0x7f;
928 
929 	/* XXX a better kludge to set the root disk unit number */
930 	if ((nip = getenv("root_disk_unit")) != NULL) {
931 		i = strtol(nip, &cp, 0);
932 		/* check for parse error */
933 		if ((cp != nip) && (*cp == 0))
934 			unit = i;
935 	}
936 
937 	rootdev = MAKEBOOTDEV(major, dev->d_slice + 1, unit, dev->d_partition);
938 	DEBUG("dev is 0x%x\n", rootdev);
939 	return (rootdev);
940 }
941