xref: /freebsd/stand/i386/libi386/biosdisk.c (revision cd9cc48b9affbd920825c1b2d26a1eae151249b8)
1 /*-
2  * Copyright (c) 1998 Michael Smith <msmith@freebsd.org>
3  * Copyright (c) 2012 Andrey V. Elsukov <ae@FreeBSD.org>
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 /*
32  * BIOS disk device handling.
33  *
34  * Ideas and algorithms from:
35  *
36  * - NetBSD libi386/biosdisk.c
37  * - FreeBSD biosboot/disk.c
38  *
39  */
40 
41 #include <sys/disk.h>
42 #include <sys/limits.h>
43 #include <stand.h>
44 #include <machine/bootinfo.h>
45 #include <stdarg.h>
46 
47 #include <bootstrap.h>
48 #include <btxv86.h>
49 #include <edd.h>
50 #include "disk.h"
51 #include "libi386.h"
52 
53 #ifdef LOADER_GELI_SUPPORT
54 #include "cons.h"
55 #include "drv.h"
56 #include "gpt.h"
57 #include "part.h"
58 #include <uuid.h>
59 struct pentry {
60 	struct ptable_entry	part;
61 	uint64_t		flags;
62 	union {
63 		uint8_t bsd;
64 		uint8_t	mbr;
65 		uuid_t	gpt;
66 		uint16_t vtoc8;
67 	} type;
68 	STAILQ_ENTRY(pentry)	entry;
69 };
70 struct ptable {
71 	enum ptable_type	type;
72 	uint16_t		sectorsize;
73 	uint64_t		sectors;
74 
75 	STAILQ_HEAD(, pentry)	entries;
76 };
77 
78 #include "geliboot.c"
79 #endif /* LOADER_GELI_SUPPORT */
80 
81 #define BIOS_NUMDRIVES		0x475
82 #define BIOSDISK_SECSIZE	512
83 #define BUFSIZE			(1 * BIOSDISK_SECSIZE)
84 
85 #define DT_ATAPI		0x10		/* disk type for ATAPI floppies */
86 #define WDMAJOR			0		/* major numbers for devices we frontend for */
87 #define WFDMAJOR		1
88 #define FDMAJOR			2
89 #define DAMAJOR			4
90 
91 #ifdef DISK_DEBUG
92 # define DEBUG(fmt, args...)	printf("%s: " fmt "\n" , __func__ , ## args)
93 #else
94 # define DEBUG(fmt, args...)
95 #endif
96 
97 /*
98  * List of BIOS devices, translation from disk unit number to
99  * BIOS unit number.
100  */
101 static struct bdinfo
102 {
103 	int		bd_unit;	/* BIOS unit number */
104 	int		bd_cyl;		/* BIOS geometry */
105 	int		bd_hds;
106 	int		bd_sec;
107 	int		bd_flags;
108 #define	BD_MODEINT13	0x0000
109 #define	BD_MODEEDD1	0x0001
110 #define	BD_MODEEDD3	0x0002
111 #define	BD_MODEMASK	0x0003
112 #define	BD_FLOPPY	0x0004
113 	int		bd_type;	/* BIOS 'drive type' (floppy only) */
114 	uint16_t	bd_sectorsize;	/* Sector size */
115 	uint64_t	bd_sectors;	/* Disk size */
116 	int		bd_open;	/* reference counter */
117 	void		*bd_bcache;	/* buffer cache data */
118 } bdinfo [MAXBDDEV];
119 static int nbdinfo = 0;
120 
121 #define	BD(dev)		(bdinfo[(dev)->dd.d_unit])
122 
123 static void bd_io_workaround(struct disk_devdesc *dev);
124 
125 static int bd_read(struct disk_devdesc *dev, daddr_t dblk, int blks,
126     caddr_t dest);
127 static int bd_write(struct disk_devdesc *dev, daddr_t dblk, int blks,
128     caddr_t dest);
129 static int bd_int13probe(struct bdinfo *bd);
130 
131 static int bd_init(void);
132 static int bd_strategy(void *devdata, int flag, daddr_t dblk, size_t size,
133     char *buf, size_t *rsize);
134 static int bd_realstrategy(void *devdata, int flag, daddr_t dblk, size_t size,
135     char *buf, size_t *rsize);
136 static int bd_open(struct open_file *f, ...);
137 static int bd_close(struct open_file *f);
138 static int bd_ioctl(struct open_file *f, u_long cmd, void *data);
139 static int bd_print(int verbose);
140 
141 #ifdef LOADER_GELI_SUPPORT
142 enum isgeli {
143 	ISGELI_UNKNOWN,
144 	ISGELI_NO,
145 	ISGELI_YES
146 };
147 static enum isgeli geli_status[MAXBDDEV][MAXTBLENTS];
148 
149 int bios_read(void *, void *, off_t off, void *buf, size_t bytes);
150 #endif /* LOADER_GELI_SUPPORT */
151 
152 struct devsw biosdisk = {
153 	"disk",
154 	DEVT_DISK,
155 	bd_init,
156 	bd_strategy,
157 	bd_open,
158 	bd_close,
159 	bd_ioctl,
160 	bd_print,
161 	NULL
162 };
163 
164 /*
165  * Translate between BIOS device numbers and our private unit numbers.
166  */
167 int
168 bd_bios2unit(int biosdev)
169 {
170 	int i;
171 
172 	DEBUG("looking for bios device 0x%x", biosdev);
173 	for (i = 0; i < nbdinfo; i++) {
174 		DEBUG("bd unit %d is BIOS device 0x%x", i, bdinfo[i].bd_unit);
175 		if (bdinfo[i].bd_unit == biosdev)
176 			return (i);
177 	}
178 	return (-1);
179 }
180 
181 int
182 bd_unit2bios(int unit)
183 {
184 
185 	if ((unit >= 0) && (unit < nbdinfo))
186 		return (bdinfo[unit].bd_unit);
187 	return (-1);
188 }
189 
190 /*
191  * Quiz the BIOS for disk devices, save a little info about them.
192  */
193 static int
194 bd_init(void)
195 {
196 	int base, unit, nfd = 0;
197 
198 #ifdef LOADER_GELI_SUPPORT
199 	geli_init();
200 #endif
201 	/* sequence 0, 0x80 */
202 	for (base = 0; base <= 0x80; base += 0x80) {
203 		for (unit = base; (nbdinfo < MAXBDDEV); unit++) {
204 #ifndef VIRTUALBOX
205 			/*
206 			 * Check the BIOS equipment list for number
207 			 * of fixed disks.
208 			 */
209 			if(base == 0x80 &&
210 			    (nfd >= *(unsigned char *)PTOV(BIOS_NUMDRIVES)))
211 				break;
212 #endif
213 			bdinfo[nbdinfo].bd_open = 0;
214 			bdinfo[nbdinfo].bd_bcache = NULL;
215 			bdinfo[nbdinfo].bd_unit = unit;
216 			bdinfo[nbdinfo].bd_flags = unit < 0x80 ? BD_FLOPPY: 0;
217 			if (!bd_int13probe(&bdinfo[nbdinfo]))
218 				break;
219 
220 			/* XXX we need "disk aliases" to make this simpler */
221 			printf("BIOS drive %c: is disk%d\n", (unit < 0x80) ?
222 			    ('A' + unit): ('C' + unit - 0x80), nbdinfo);
223 			nbdinfo++;
224 			if (base == 0x80)
225 				nfd++;
226 		}
227 	}
228 	bcache_add_dev(nbdinfo);
229 	return(0);
230 }
231 
232 /*
233  * Try to detect a device supported by the legacy int13 BIOS
234  */
235 static int
236 bd_int13probe(struct bdinfo *bd)
237 {
238 	struct edd_params params;
239 	int ret = 1;	/* assume success */
240 
241 	v86.ctl = V86_FLAGS;
242 	v86.addr = 0x13;
243 	v86.eax = 0x800;
244 	v86.edx = bd->bd_unit;
245 	v86int();
246 
247 	/* Don't error out if we get bad sector number, try EDD as well */
248 	if (V86_CY(v86.efl) ||	/* carry set */
249 	    (v86.edx & 0xff) <= (unsigned)(bd->bd_unit & 0x7f))	/* unit # bad */
250 		return (0);	/* skip device */
251 
252 	if ((v86.ecx & 0x3f) == 0) /* absurd sector number */
253 		ret = 0;	/* set error */
254 
255 	/* Convert max cyl # -> # of cylinders */
256 	bd->bd_cyl = ((v86.ecx & 0xc0) << 2) + ((v86.ecx & 0xff00) >> 8) + 1;
257 	/* Convert max head # -> # of heads */
258 	bd->bd_hds = ((v86.edx & 0xff00) >> 8) + 1;
259 	bd->bd_sec = v86.ecx & 0x3f;
260 	bd->bd_type = v86.ebx & 0xff;
261 	bd->bd_flags |= BD_MODEINT13;
262 
263 	/* Calculate sectors count from the geometry */
264 	bd->bd_sectors = bd->bd_cyl * bd->bd_hds * bd->bd_sec;
265 	bd->bd_sectorsize = BIOSDISK_SECSIZE;
266 	DEBUG("unit 0x%x geometry %d/%d/%d", bd->bd_unit, bd->bd_cyl,
267 	    bd->bd_hds, bd->bd_sec);
268 
269 	/* Determine if we can use EDD with this device. */
270 	v86.ctl = V86_FLAGS;
271 	v86.addr = 0x13;
272 	v86.eax = 0x4100;
273 	v86.edx = bd->bd_unit;
274 	v86.ebx = 0x55aa;
275 	v86int();
276 	if (V86_CY(v86.efl) ||	/* carry set */
277 	    (v86.ebx & 0xffff) != 0xaa55 || /* signature */
278 	    (v86.ecx & EDD_INTERFACE_FIXED_DISK) == 0)
279 		return (ret);	/* return code from int13 AH=08 */
280 
281 	/* EDD supported */
282 	bd->bd_flags |= BD_MODEEDD1;
283 	if ((v86.eax & 0xff00) >= 0x3000)
284 		bd->bd_flags |= BD_MODEEDD3;
285 	/* Get disk params */
286 	params.len = sizeof(struct edd_params);
287 	v86.ctl = V86_FLAGS;
288 	v86.addr = 0x13;
289 	v86.eax = 0x4800;
290 	v86.edx = bd->bd_unit;
291 	v86.ds = VTOPSEG(&params);
292 	v86.esi = VTOPOFF(&params);
293 	v86int();
294 	if (!V86_CY(v86.efl)) {
295 		uint64_t total;
296 
297 		/*
298 		 * Sector size must be a multiple of 512 bytes.
299 		 * An alternate test would be to check power of 2,
300 		 * powerof2(params.sector_size).
301 		 */
302 		if (params.sector_size % BIOSDISK_SECSIZE)
303 			bd->bd_sectorsize = BIOSDISK_SECSIZE;
304 		else
305 			bd->bd_sectorsize = params.sector_size;
306 
307 		total = bd->bd_sectorsize * params.sectors;
308 		if (params.sectors != 0) {
309 			/* Only update if we did not overflow. */
310 			if (total > params.sectors)
311 				bd->bd_sectors = params.sectors;
312 		}
313 
314 		total = (uint64_t)params.cylinders *
315 		    params.heads * params.sectors_per_track;
316 		if (bd->bd_sectors < total)
317 			bd->bd_sectors = total;
318 
319 		ret = 1;
320 	}
321 	DEBUG("unit 0x%x flags %x, sectors %llu, sectorsize %u",
322 	    bd->bd_unit, bd->bd_flags, bd->bd_sectors, bd->bd_sectorsize);
323 	return (ret);
324 }
325 
326 /*
327  * Print information about disks
328  */
329 static int
330 bd_print(int verbose)
331 {
332 	static char line[80];
333 	struct disk_devdesc dev;
334 	int i, ret = 0;
335 
336 	if (nbdinfo == 0)
337 		return (0);
338 
339 	printf("%s devices:", biosdisk.dv_name);
340 	if ((ret = pager_output("\n")) != 0)
341 		return (ret);
342 
343 	for (i = 0; i < nbdinfo; i++) {
344 		snprintf(line, sizeof(line),
345 		    "    disk%d:   BIOS drive %c (%ju X %u):\n", i,
346 		    (bdinfo[i].bd_unit < 0x80) ? ('A' + bdinfo[i].bd_unit):
347 		    ('C' + bdinfo[i].bd_unit - 0x80),
348 		    (uintmax_t)bdinfo[i].bd_sectors,
349 		    bdinfo[i].bd_sectorsize);
350 		if ((ret = pager_output(line)) != 0)
351 			break;
352 		dev.dd.d_dev = &biosdisk;
353 		dev.dd.d_unit = i;
354 		dev.d_slice = -1;
355 		dev.d_partition = -1;
356 		if (disk_open(&dev,
357 		    bdinfo[i].bd_sectorsize * bdinfo[i].bd_sectors,
358 		    bdinfo[i].bd_sectorsize) == 0) {
359 			snprintf(line, sizeof(line), "    disk%d", i);
360 			ret = disk_print(&dev, line, verbose);
361 			disk_close(&dev);
362 			if (ret != 0)
363 			    return (ret);
364 		}
365 	}
366 	return (ret);
367 }
368 
369 /*
370  * Attempt to open the disk described by (dev) for use by (f).
371  *
372  * Note that the philosophy here is "give them exactly what
373  * they ask for".  This is necessary because being too "smart"
374  * about what the user might want leads to complications.
375  * (eg. given no slice or partition value, with a disk that is
376  *  sliced - are they after the first BSD slice, or the DOS
377  *  slice before it?)
378  */
379 static int
380 bd_open(struct open_file *f, ...)
381 {
382 	struct disk_devdesc *dev;
383 	struct disk_devdesc disk;
384 	int err, g_err;
385 	va_list ap;
386 	uint64_t size;
387 
388 	va_start(ap, f);
389 	dev = va_arg(ap, struct disk_devdesc *);
390 	va_end(ap);
391 
392 	if (dev->dd.d_unit < 0 || dev->dd.d_unit >= nbdinfo)
393 		return (EIO);
394 	BD(dev).bd_open++;
395 	if (BD(dev).bd_bcache == NULL)
396 	    BD(dev).bd_bcache = bcache_allocate();
397 
398 	/*
399 	 * Read disk size from partition.
400 	 * This is needed to work around buggy BIOS systems returning
401 	 * wrong (truncated) disk media size.
402 	 * During bd_probe() we tested if the mulitplication of bd_sectors
403 	 * would overflow so it should be safe to perform here.
404 	 */
405 	disk.dd.d_dev = dev->dd.d_dev;
406 	disk.dd.d_unit = dev->dd.d_unit;
407 	disk.d_slice = -1;
408 	disk.d_partition = -1;
409 	disk.d_offset = 0;
410 	if (disk_open(&disk, BD(dev).bd_sectors * BD(dev).bd_sectorsize,
411 	    BD(dev).bd_sectorsize) == 0) {
412 
413 		if (disk_ioctl(&disk, DIOCGMEDIASIZE, &size) == 0) {
414 			size /= BD(dev).bd_sectorsize;
415 			if (size > BD(dev).bd_sectors)
416 				BD(dev).bd_sectors = size;
417 		}
418 		disk_close(&disk);
419 	}
420 
421 	err = disk_open(dev, BD(dev).bd_sectors * BD(dev).bd_sectorsize,
422 	    BD(dev).bd_sectorsize);
423 
424 #ifdef LOADER_GELI_SUPPORT
425 	static char gelipw[GELI_PW_MAXLEN];
426 	char *passphrase;
427 
428 	if (err)
429 		return (err);
430 
431 	/* if we already know there is no GELI, skip the rest */
432 	if (geli_status[dev->dd.d_unit][dev->d_slice] != ISGELI_UNKNOWN)
433 		return (err);
434 
435 	struct dsk dskp;
436 	struct ptable *table = NULL;
437 	struct ptable_entry part;
438 	struct pentry *entry;
439 	int geli_part = 0;
440 
441 	dskp.drive = bd_unit2bios(dev->dd.d_unit);
442 	dskp.type = dev->dd.d_dev->dv_type;
443 	dskp.unit = dev->dd.d_unit;
444 	dskp.slice = dev->d_slice;
445 	dskp.part = dev->d_partition;
446 	dskp.start = dev->d_offset;
447 
448 	/* We need the LBA of the end of the partition */
449 	table = ptable_open(&disk, BD(dev).bd_sectors,
450 	    BD(dev).bd_sectorsize, ptblread);
451 	if (table == NULL) {
452 		DEBUG("Can't read partition table");
453 		/* soft failure, return the exit status of disk_open */
454 		return (err);
455 	}
456 
457 	if (table->type == PTABLE_GPT)
458 		dskp.part = 255;
459 
460 	STAILQ_FOREACH(entry, &table->entries, entry) {
461 		dskp.slice = entry->part.index;
462 		dskp.start = entry->part.start;
463 		if (is_geli(&dskp) == 0) {
464 			geli_status[dev->dd.d_unit][dskp.slice] = ISGELI_YES;
465 			return (0);
466 		}
467 		if (geli_taste(bios_read, &dskp,
468 		    entry->part.end - entry->part.start) == 0) {
469 			if (geli_havekey(&dskp) == 0) {
470 				geli_status[dev->dd.d_unit][dskp.slice] = ISGELI_YES;
471 				geli_part++;
472 				continue;
473 			}
474 			if ((passphrase = getenv("kern.geom.eli.passphrase"))
475 			    != NULL) {
476 				/* Use the cached passphrase */
477 				bcopy(passphrase, &gelipw, GELI_PW_MAXLEN);
478 			}
479 			if (geli_passphrase(gelipw, dskp.unit, 'p',
480 				    (dskp.slice > 0 ? dskp.slice : dskp.part),
481 				    &dskp) == 0) {
482 				setenv("kern.geom.eli.passphrase", gelipw, 1);
483 				bzero(gelipw, sizeof(gelipw));
484 				geli_status[dev->dd.d_unit][dskp.slice] = ISGELI_YES;
485 				geli_part++;
486 				continue;
487 			}
488 		} else
489 			geli_status[dev->dd.d_unit][dskp.slice] = ISGELI_NO;
490 	}
491 
492 	/* none of the partitions on this disk have GELI */
493 	if (geli_part == 0) {
494 		/* found no GELI */
495 		geli_status[dev->dd.d_unit][dev->d_slice] = ISGELI_NO;
496 	}
497 #endif /* LOADER_GELI_SUPPORT */
498 
499 	return (err);
500 }
501 
502 static int
503 bd_close(struct open_file *f)
504 {
505 	struct disk_devdesc *dev;
506 
507 	dev = (struct disk_devdesc *)f->f_devdata;
508 	BD(dev).bd_open--;
509 	if (BD(dev).bd_open == 0) {
510 	    bcache_free(BD(dev).bd_bcache);
511 	    BD(dev).bd_bcache = NULL;
512 	}
513 	return (disk_close(dev));
514 }
515 
516 static int
517 bd_ioctl(struct open_file *f, u_long cmd, void *data)
518 {
519 	struct disk_devdesc *dev;
520 	int rc;
521 
522 	dev = (struct disk_devdesc *)f->f_devdata;
523 
524 	rc = disk_ioctl(dev, cmd, data);
525 	if (rc != ENOTTY)
526 		return (rc);
527 
528 	switch (cmd) {
529 	case DIOCGSECTORSIZE:
530 		*(u_int *)data = BD(dev).bd_sectorsize;
531 		break;
532 	case DIOCGMEDIASIZE:
533 		*(uint64_t *)data = BD(dev).bd_sectors * BD(dev).bd_sectorsize;
534 		break;
535 	default:
536 		return (ENOTTY);
537 	}
538 	return (0);
539 }
540 
541 static int
542 bd_strategy(void *devdata, int rw, daddr_t dblk, size_t size,
543     char *buf, size_t *rsize)
544 {
545 	struct bcache_devdata bcd;
546 	struct disk_devdesc *dev;
547 
548 	dev = (struct disk_devdesc *)devdata;
549 	bcd.dv_strategy = bd_realstrategy;
550 	bcd.dv_devdata = devdata;
551 	bcd.dv_cache = BD(dev).bd_bcache;
552 	return (bcache_strategy(&bcd, rw, dblk + dev->d_offset,
553 	    size, buf, rsize));
554 }
555 
556 static int
557 bd_realstrategy(void *devdata, int rw, daddr_t dblk, size_t size,
558     char *buf, size_t *rsize)
559 {
560     struct disk_devdesc *dev = (struct disk_devdesc *)devdata;
561     uint64_t		disk_blocks;
562     int			blks, rc;
563 #ifdef BD_SUPPORT_FRAGS /* XXX: sector size */
564     char		fragbuf[BIOSDISK_SECSIZE];
565     size_t		fragsize;
566 
567     fragsize = size % BIOSDISK_SECSIZE;
568 #else
569     if (size % BD(dev).bd_sectorsize)
570 	panic("bd_strategy: %d bytes I/O not multiple of block size", size);
571 #endif
572 
573     DEBUG("open_disk %p", dev);
574 
575     /*
576      * Check the value of the size argument. We do have quite small
577      * heap (64MB), but we do not know good upper limit, so we check against
578      * INT_MAX here. This will also protect us against possible overflows
579      * while translating block count to bytes.
580      */
581     if (size > INT_MAX) {
582 	DEBUG("too large read: %zu bytes", size);
583 	return (EIO);
584     }
585 
586     blks = size / BD(dev).bd_sectorsize;
587     if (dblk > dblk + blks)
588 	return (EIO);
589 
590     if (rsize)
591 	*rsize = 0;
592 
593     /* Get disk blocks, this value is either for whole disk or for partition */
594     if (disk_ioctl(dev, DIOCGMEDIASIZE, &disk_blocks) == 0) {
595 	/* DIOCGMEDIASIZE returns bytes. */
596         disk_blocks /= BD(dev).bd_sectorsize;
597     } else {
598 	/* We should not get here. Just try to survive. */
599 	disk_blocks = BD(dev).bd_sectors - dev->d_offset;
600     }
601 
602     /* Validate source block address. */
603     if (dblk < dev->d_offset || dblk >= dev->d_offset + disk_blocks)
604 	return (EIO);
605 
606     /*
607      * Truncate if we are crossing disk or partition end.
608      */
609     if (dblk + blks >= dev->d_offset + disk_blocks) {
610 	blks = dev->d_offset + disk_blocks - dblk;
611 	size = blks * BD(dev).bd_sectorsize;
612 	DEBUG("short read %d", blks);
613     }
614 
615     switch (rw & F_MASK) {
616     case F_READ:
617 	DEBUG("read %d from %lld to %p", blks, dblk, buf);
618 
619 	if (blks && (rc = bd_read(dev, dblk, blks, buf))) {
620 	    /* Filter out floppy controller errors */
621 	    if (BD(dev).bd_flags != BD_FLOPPY || rc != 0x20) {
622 		printf("read %d from %lld to %p, error: 0x%x\n", blks, dblk,
623 		    buf, rc);
624 	    }
625 	    return (EIO);
626 	}
627 #ifdef BD_SUPPORT_FRAGS /* XXX: sector size */
628 	DEBUG("bd_strategy: frag read %d from %d+%d to %p",
629 	    fragsize, dblk, blks, buf + (blks * BIOSDISK_SECSIZE));
630 	if (fragsize && bd_read(od, dblk + blks, 1, fragsize)) {
631 	    DEBUG("frag read error");
632 	    return(EIO);
633 	}
634 	bcopy(fragbuf, buf + (blks * BIOSDISK_SECSIZE), fragsize);
635 #endif
636 	break;
637     case F_WRITE :
638 	DEBUG("write %d from %lld to %p", blks, dblk, buf);
639 
640 	if (blks && bd_write(dev, dblk, blks, buf)) {
641 	    DEBUG("write error");
642 	    return (EIO);
643 	}
644 #ifdef BD_SUPPORT_FRAGS
645 	if(fragsize) {
646 	    DEBUG("Attempted to write a frag");
647 	    return (EIO);
648 	}
649 #endif
650 	break;
651     default:
652 	/* DO NOTHING */
653 	return (EROFS);
654     }
655 
656     if (rsize)
657 	*rsize = size;
658     return (0);
659 }
660 
661 static int
662 bd_edd_io(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest,
663     int write)
664 {
665     static struct edd_packet packet;
666 
667     packet.len = sizeof(struct edd_packet);
668     packet.count = blks;
669     packet.off = VTOPOFF(dest);
670     packet.seg = VTOPSEG(dest);
671     packet.lba = dblk;
672     v86.ctl = V86_FLAGS;
673     v86.addr = 0x13;
674     if (write)
675 	/* Should we Write with verify ?? 0x4302 ? */
676 	v86.eax = 0x4300;
677     else
678 	v86.eax = 0x4200;
679     v86.edx = BD(dev).bd_unit;
680     v86.ds = VTOPSEG(&packet);
681     v86.esi = VTOPOFF(&packet);
682     v86int();
683     if (V86_CY(v86.efl))
684 	return (v86.eax >> 8);
685     return (0);
686 }
687 
688 static int
689 bd_chs_io(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest,
690     int write)
691 {
692     u_int	x, bpc, cyl, hd, sec;
693 
694     bpc = BD(dev).bd_sec * BD(dev).bd_hds;	/* blocks per cylinder */
695     x = dblk;
696     cyl = x / bpc;			/* block # / blocks per cylinder */
697     x %= bpc;				/* block offset into cylinder */
698     hd = x / BD(dev).bd_sec;		/* offset / blocks per track */
699     sec = x % BD(dev).bd_sec;		/* offset into track */
700 
701     /* correct sector number for 1-based BIOS numbering */
702     sec++;
703 
704     if (cyl > 1023)
705 	/* CHS doesn't support cylinders > 1023. */
706 	return (1);
707 
708     v86.ctl = V86_FLAGS;
709     v86.addr = 0x13;
710     if (write)
711 	v86.eax = 0x300 | blks;
712     else
713 	v86.eax = 0x200 | blks;
714     v86.ecx = ((cyl & 0xff) << 8) | ((cyl & 0x300) >> 2) | sec;
715     v86.edx = (hd << 8) | BD(dev).bd_unit;
716     v86.es = VTOPSEG(dest);
717     v86.ebx = VTOPOFF(dest);
718     v86int();
719     if (V86_CY(v86.efl))
720 	return (v86.eax >> 8);
721     return (0);
722 }
723 
724 static void
725 bd_io_workaround(struct disk_devdesc *dev)
726 {
727 	uint8_t buf[8 * 1024];
728 
729 	bd_edd_io(dev, 0xffffffff, 1, (caddr_t)buf, 0);
730 }
731 
732 
733 static int
734 bd_io(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest, int write)
735 {
736     u_int	x, sec, result, resid, retry, maxfer;
737     caddr_t	p, xp, bbuf;
738 
739     /* Just in case some idiot actually tries to read/write -1 blocks... */
740     if (blks < 0)
741 	return (-1);
742 
743     resid = blks;
744     p = dest;
745 
746     /*
747      * Workaround for a problem with some HP ProLiant BIOS failing to work out
748      * the boot disk after installation. hrs and kuriyama discovered this
749      * problem with an HP ProLiant DL320e Gen 8 with a 3TB HDD, and discovered
750      * that an int13h call seems to cause a buffer overrun in the bios. The
751      * problem is alleviated by doing an extra read before the buggy read. It
752      * is not immediately known whether other models are similarly affected.
753      */
754     if (dblk >= 0x100000000)
755 	bd_io_workaround(dev);
756 
757     /* Decide whether we have to bounce */
758     if (VTOP(dest) >> 20 != 0 || (BD(dev).bd_unit < 0x80 &&
759 	(VTOP(dest) >> 16) != (VTOP(dest +
760 	blks * BD(dev).bd_sectorsize) >> 16))) {
761 
762 	/*
763 	 * There is a 64k physical boundary somewhere in the
764 	 * destination buffer, or the destination buffer is above
765 	 * first 1MB of physical memory so we have to arrange a
766 	 * suitable bounce buffer.  Allocate a buffer twice as large
767 	 * as we need to.  Use the bottom half unless there is a break
768 	 * there, in which case we use the top half.
769 	 */
770 	x = V86_IO_BUFFER_SIZE / BD(dev).bd_sectorsize;
771 	x = min(x, (unsigned)blks);
772 	bbuf = PTOV(V86_IO_BUFFER);
773 	maxfer = x;		/* limit transfers to bounce region size */
774     } else {
775 	bbuf = NULL;
776 	maxfer = 0;
777     }
778 
779     while (resid > 0) {
780 	/*
781 	 * Play it safe and don't cross track boundaries.
782 	 * (XXX this is probably unnecessary)
783 	 */
784 	sec = dblk % BD(dev).bd_sec;	/* offset into track */
785 	x = min(BD(dev).bd_sec - sec, resid);
786 	if (maxfer > 0)
787 	    x = min(x, maxfer);		/* fit bounce buffer */
788 
789 	/* where do we transfer to? */
790 	xp = bbuf == NULL ? p : bbuf;
791 
792 	/*
793 	 * Put your Data In, Put your Data out,
794 	 * Put your Data In, and shake it all about
795 	 */
796 	if (write && bbuf != NULL)
797 	    bcopy(p, bbuf, x * BD(dev).bd_sectorsize);
798 
799 	/*
800 	 * Loop retrying the operation a couple of times.  The BIOS
801 	 * may also retry.
802 	 */
803 	for (retry = 0; retry < 3; retry++) {
804 	    /* if retrying, reset the drive */
805 	    if (retry > 0) {
806 		v86.ctl = V86_FLAGS;
807 		v86.addr = 0x13;
808 		v86.eax = 0;
809 		v86.edx = BD(dev).bd_unit;
810 		v86int();
811 	    }
812 
813 	    if (BD(dev).bd_flags & BD_MODEEDD1)
814 		result = bd_edd_io(dev, dblk, x, xp, write);
815 	    else
816 		result = bd_chs_io(dev, dblk, x, xp, write);
817 	    if (result == 0)
818 		break;
819 	}
820 
821 	if (write)
822 	    DEBUG("Write %d sector(s) from %p (0x%x) to %lld %s", x,
823 		p, VTOP(p), dblk, result ? "failed" : "ok");
824 	else
825 	    DEBUG("Read %d sector(s) from %lld to %p (0x%x) %s", x,
826 		dblk, p, VTOP(p), result ? "failed" : "ok");
827 	if (result) {
828 	    return (result);
829 	}
830 	if (!write && bbuf != NULL)
831 	    bcopy(bbuf, p, x * BD(dev).bd_sectorsize);
832 	p += (x * BD(dev).bd_sectorsize);
833 	dblk += x;
834 	resid -= x;
835     }
836 
837 /*    hexdump(dest, (blks * BD(dev).bd_sectorsize)); */
838     return(0);
839 }
840 
841 static int
842 bd_read(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest)
843 {
844 #ifdef LOADER_GELI_SUPPORT
845 	struct dsk dskp;
846 	off_t p_off, diff;
847 	daddr_t alignlba;
848 	int err, n, alignblks;
849 	char *tmpbuf;
850 
851 	/* if we already know there is no GELI, skip the rest */
852 	if (geli_status[dev->dd.d_unit][dev->d_slice] != ISGELI_YES)
853 		return (bd_io(dev, dblk, blks, dest, 0));
854 
855 	if (geli_status[dev->dd.d_unit][dev->d_slice] == ISGELI_YES) {
856 		/*
857 		 * Align reads to DEV_GELIBOOT_BSIZE bytes because partial
858 		 * sectors cannot be decrypted. Round the requested LBA down to
859 		 * nearest multiple of DEV_GELIBOOT_BSIZE bytes.
860 		 */
861 		alignlba = rounddown2(dblk * BD(dev).bd_sectorsize,
862 		    DEV_GELIBOOT_BSIZE) / BD(dev).bd_sectorsize;
863 		/*
864 		 * Round number of blocks to read up to nearest multiple of
865 		 * DEV_GELIBOOT_BSIZE
866 		 */
867 		diff = (dblk - alignlba) * BD(dev).bd_sectorsize;
868 		alignblks = roundup2(blks * BD(dev).bd_sectorsize + diff,
869 		    DEV_GELIBOOT_BSIZE) / BD(dev).bd_sectorsize;
870 
871 		/*
872 		 * If the read is rounded up to a larger size, use a temporary
873 		 * buffer here because the buffer provided by the caller may be
874 		 * too small.
875 		 */
876 		if (diff == 0) {
877 			tmpbuf = dest;
878 		} else {
879 			tmpbuf = malloc(alignblks * BD(dev).bd_sectorsize);
880 			if (tmpbuf == NULL) {
881 				return (-1);
882 			}
883 		}
884 
885 		if (alignlba + alignblks > BD(dev).bd_sectors) {
886 			DEBUG("Shorted read at %llu from %d to %llu blocks",
887 			    alignlba, alignblks, BD(dev).bd_sectors - alignlba);
888 			alignblks = BD(dev).bd_sectors - alignlba;
889 		}
890 
891 		err = bd_io(dev, alignlba, alignblks, tmpbuf, 0);
892 		if (err)
893 			return (err);
894 
895 		dskp.drive = bd_unit2bios(dev->dd.d_unit);
896 		dskp.type = dev->dd.d_dev->dv_type;
897 		dskp.unit = dev->dd.d_unit;
898 		dskp.slice = dev->d_slice;
899 		dskp.part = dev->d_partition;
900 		dskp.start = dev->d_offset;
901 
902 		/* GELI needs the offset relative to the partition start */
903 		p_off = alignlba - dskp.start;
904 
905 		err = geli_read(&dskp, p_off * BD(dev).bd_sectorsize, (u_char *)tmpbuf,
906 		    alignblks * BD(dev).bd_sectorsize);
907 		if (err)
908 			return (err);
909 
910 		if (tmpbuf != dest) {
911 			bcopy(tmpbuf + diff, dest, blks * BD(dev).bd_sectorsize);
912 			free(tmpbuf);
913 		}
914 		return (0);
915 	}
916 #endif /* LOADER_GELI_SUPPORT */
917 
918 	return (bd_io(dev, dblk, blks, dest, 0));
919 }
920 
921 static int
922 bd_write(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest)
923 {
924 
925 	return (bd_io(dev, dblk, blks, dest, 1));
926 }
927 
928 /*
929  * Return the BIOS geometry of a given "fixed drive" in a format
930  * suitable for the legacy bootinfo structure.  Since the kernel is
931  * expecting raw int 0x13/0x8 values for N_BIOS_GEOM drives, we
932  * prefer to get the information directly, rather than rely on being
933  * able to put it together from information already maintained for
934  * different purposes and for a probably different number of drives.
935  *
936  * For valid drives, the geometry is expected in the format (31..0)
937  * "000000cc cccccccc hhhhhhhh 00ssssss"; and invalid drives are
938  * indicated by returning the geometry of a "1.2M" PC-format floppy
939  * disk.  And, incidentally, what is returned is not the geometry as
940  * such but the highest valid cylinder, head, and sector numbers.
941  */
942 uint32_t
943 bd_getbigeom(int bunit)
944 {
945 
946     v86.ctl = V86_FLAGS;
947     v86.addr = 0x13;
948     v86.eax = 0x800;
949     v86.edx = 0x80 + bunit;
950     v86int();
951     if (V86_CY(v86.efl))
952 	return 0x4f010f;
953     return ((v86.ecx & 0xc0) << 18) | ((v86.ecx & 0xff00) << 8) |
954 	   (v86.edx & 0xff00) | (v86.ecx & 0x3f);
955 }
956 
957 /*
958  * Return a suitable dev_t value for (dev).
959  *
960  * In the case where it looks like (dev) is a SCSI disk, we allow the number of
961  * IDE disks to be specified in $num_ide_disks.  There should be a Better Way.
962  */
963 int
964 bd_getdev(struct i386_devdesc *d)
965 {
966     struct disk_devdesc		*dev;
967     int				biosdev;
968     int 			major;
969     int				rootdev;
970     char			*nip, *cp;
971     int				i, unit;
972 
973     dev = (struct disk_devdesc *)d;
974     biosdev = bd_unit2bios(dev->dd.d_unit);
975     DEBUG("unit %d BIOS device %d", dev->dd.d_unit, biosdev);
976     if (biosdev == -1)				/* not a BIOS device */
977 	return(-1);
978     if (disk_open(dev, BD(dev).bd_sectors * BD(dev).bd_sectorsize,
979 	BD(dev).bd_sectorsize) != 0)		/* oops, not a viable device */
980 	    return (-1);
981     else
982 	disk_close(dev);
983 
984     if (biosdev < 0x80) {
985 	/* floppy (or emulated floppy) or ATAPI device */
986 	if (bdinfo[dev->dd.d_unit].bd_type == DT_ATAPI) {
987 	    /* is an ATAPI disk */
988 	    major = WFDMAJOR;
989 	} else {
990 	    /* is a floppy disk */
991 	    major = FDMAJOR;
992 	}
993     } else {
994 	    /* assume an IDE disk */
995 	    major = WDMAJOR;
996     }
997     /* default root disk unit number */
998     unit = biosdev & 0x7f;
999 
1000     /* XXX a better kludge to set the root disk unit number */
1001     if ((nip = getenv("root_disk_unit")) != NULL) {
1002 	i = strtol(nip, &cp, 0);
1003 	/* check for parse error */
1004 	if ((cp != nip) && (*cp == 0))
1005 	    unit = i;
1006     }
1007 
1008     rootdev = MAKEBOOTDEV(major, dev->d_slice + 1, unit, dev->d_partition);
1009     DEBUG("dev is 0x%x\n", rootdev);
1010     return(rootdev);
1011 }
1012 
1013 #ifdef LOADER_GELI_SUPPORT
1014 int
1015 bios_read(void *vdev __unused, void *xpriv, off_t off, void *buf, size_t bytes)
1016 {
1017 	struct disk_devdesc dev;
1018 	struct dsk *priv = xpriv;
1019 
1020 	dev.dd.d_dev = &biosdisk;
1021 	dev.dd.d_unit = priv->unit;
1022 	dev.d_slice = priv->slice;
1023 	dev.d_partition = priv->part;
1024 	dev.d_offset = priv->start;
1025 
1026 	off = off / BD(&dev).bd_sectorsize;
1027 	/* GELI gives us the offset relative to the partition start */
1028 	off += dev.d_offset;
1029 	bytes = bytes / BD(&dev).bd_sectorsize;
1030 
1031 	return (bd_io(&dev, off, bytes, buf, 0));
1032 }
1033 #endif /* LOADER_GELI_SUPPORT */
1034