1 /*- 2 * Copyright (c) 2012 Sandvine, Inc. 3 * Copyright (c) 2012 NetApp, Inc. 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 * $FreeBSD$ 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #ifdef _KERNEL 34 #include <sys/param.h> 35 #include <sys/pcpu.h> 36 #include <sys/systm.h> 37 #include <sys/proc.h> 38 39 #include <vm/vm.h> 40 #include <vm/pmap.h> 41 42 #include <machine/vmparam.h> 43 #include <machine/vmm.h> 44 #else /* !_KERNEL */ 45 #include <sys/types.h> 46 #include <sys/errno.h> 47 48 #include <machine/vmm.h> 49 50 #include <assert.h> 51 #include <vmmapi.h> 52 #define KASSERT(exp,msg) assert((exp)) 53 #endif /* _KERNEL */ 54 55 #include <machine/vmm_instruction_emul.h> 56 #include <x86/psl.h> 57 #include <x86/specialreg.h> 58 59 /* struct vie_op.op_type */ 60 enum { 61 VIE_OP_TYPE_NONE = 0, 62 VIE_OP_TYPE_MOV, 63 VIE_OP_TYPE_MOVSX, 64 VIE_OP_TYPE_MOVZX, 65 VIE_OP_TYPE_AND, 66 VIE_OP_TYPE_OR, 67 VIE_OP_TYPE_TWO_BYTE, 68 VIE_OP_TYPE_LAST 69 }; 70 71 /* struct vie_op.op_flags */ 72 #define VIE_OP_F_IMM (1 << 0) /* 16/32-bit immediate operand */ 73 #define VIE_OP_F_IMM8 (1 << 1) /* 8-bit immediate operand */ 74 #define VIE_OP_F_MOFFSET (1 << 2) /* 16/32/64-bit immediate moffset */ 75 76 static const struct vie_op two_byte_opcodes[256] = { 77 [0xB6] = { 78 .op_byte = 0xB6, 79 .op_type = VIE_OP_TYPE_MOVZX, 80 }, 81 [0xBE] = { 82 .op_byte = 0xBE, 83 .op_type = VIE_OP_TYPE_MOVSX, 84 }, 85 }; 86 87 static const struct vie_op one_byte_opcodes[256] = { 88 [0x0F] = { 89 .op_byte = 0x0F, 90 .op_type = VIE_OP_TYPE_TWO_BYTE 91 }, 92 [0x88] = { 93 .op_byte = 0x88, 94 .op_type = VIE_OP_TYPE_MOV, 95 }, 96 [0x89] = { 97 .op_byte = 0x89, 98 .op_type = VIE_OP_TYPE_MOV, 99 }, 100 [0x8A] = { 101 .op_byte = 0x8A, 102 .op_type = VIE_OP_TYPE_MOV, 103 }, 104 [0x8B] = { 105 .op_byte = 0x8B, 106 .op_type = VIE_OP_TYPE_MOV, 107 }, 108 [0xC6] = { 109 /* XXX Group 11 extended opcode - not just MOV */ 110 .op_byte = 0xC6, 111 .op_type = VIE_OP_TYPE_MOV, 112 .op_flags = VIE_OP_F_IMM8, 113 }, 114 [0xC7] = { 115 .op_byte = 0xC7, 116 .op_type = VIE_OP_TYPE_MOV, 117 .op_flags = VIE_OP_F_IMM, 118 }, 119 [0x23] = { 120 .op_byte = 0x23, 121 .op_type = VIE_OP_TYPE_AND, 122 }, 123 [0x81] = { 124 /* XXX Group 1 extended opcode - not just AND */ 125 .op_byte = 0x81, 126 .op_type = VIE_OP_TYPE_AND, 127 .op_flags = VIE_OP_F_IMM, 128 }, 129 [0x83] = { 130 /* XXX Group 1 extended opcode - not just OR */ 131 .op_byte = 0x83, 132 .op_type = VIE_OP_TYPE_OR, 133 .op_flags = VIE_OP_F_IMM8, 134 }, 135 }; 136 137 /* struct vie.mod */ 138 #define VIE_MOD_INDIRECT 0 139 #define VIE_MOD_INDIRECT_DISP8 1 140 #define VIE_MOD_INDIRECT_DISP32 2 141 #define VIE_MOD_DIRECT 3 142 143 /* struct vie.rm */ 144 #define VIE_RM_SIB 4 145 #define VIE_RM_DISP32 5 146 147 #define GB (1024 * 1024 * 1024) 148 149 static enum vm_reg_name gpr_map[16] = { 150 VM_REG_GUEST_RAX, 151 VM_REG_GUEST_RCX, 152 VM_REG_GUEST_RDX, 153 VM_REG_GUEST_RBX, 154 VM_REG_GUEST_RSP, 155 VM_REG_GUEST_RBP, 156 VM_REG_GUEST_RSI, 157 VM_REG_GUEST_RDI, 158 VM_REG_GUEST_R8, 159 VM_REG_GUEST_R9, 160 VM_REG_GUEST_R10, 161 VM_REG_GUEST_R11, 162 VM_REG_GUEST_R12, 163 VM_REG_GUEST_R13, 164 VM_REG_GUEST_R14, 165 VM_REG_GUEST_R15 166 }; 167 168 static uint64_t size2mask[] = { 169 [1] = 0xff, 170 [2] = 0xffff, 171 [4] = 0xffffffff, 172 [8] = 0xffffffffffffffff, 173 }; 174 175 static int 176 vie_read_register(void *vm, int vcpuid, enum vm_reg_name reg, uint64_t *rval) 177 { 178 int error; 179 180 error = vm_get_register(vm, vcpuid, reg, rval); 181 182 return (error); 183 } 184 185 static void 186 vie_calc_bytereg(struct vie *vie, enum vm_reg_name *reg, int *lhbr) 187 { 188 *lhbr = 0; 189 *reg = gpr_map[vie->reg]; 190 191 /* 192 * 64-bit mode imposes limitations on accessing legacy high byte 193 * registers (lhbr). 194 * 195 * The legacy high-byte registers cannot be addressed if the REX 196 * prefix is present. In this case the values 4, 5, 6 and 7 of the 197 * 'ModRM:reg' field address %spl, %bpl, %sil and %dil respectively. 198 * 199 * If the REX prefix is not present then the values 4, 5, 6 and 7 200 * of the 'ModRM:reg' field address the legacy high-byte registers, 201 * %ah, %ch, %dh and %bh respectively. 202 */ 203 if (!vie->rex_present) { 204 if (vie->reg & 0x4) { 205 *lhbr = 1; 206 *reg = gpr_map[vie->reg & 0x3]; 207 } 208 } 209 } 210 211 static int 212 vie_read_bytereg(void *vm, int vcpuid, struct vie *vie, uint8_t *rval) 213 { 214 uint64_t val; 215 int error, lhbr; 216 enum vm_reg_name reg; 217 218 vie_calc_bytereg(vie, ®, &lhbr); 219 error = vm_get_register(vm, vcpuid, reg, &val); 220 221 /* 222 * To obtain the value of a legacy high byte register shift the 223 * base register right by 8 bits (%ah = %rax >> 8). 224 */ 225 if (lhbr) 226 *rval = val >> 8; 227 else 228 *rval = val; 229 return (error); 230 } 231 232 static int 233 vie_write_bytereg(void *vm, int vcpuid, struct vie *vie, uint8_t byte) 234 { 235 uint64_t origval, val, mask; 236 int error, lhbr; 237 enum vm_reg_name reg; 238 239 vie_calc_bytereg(vie, ®, &lhbr); 240 error = vm_get_register(vm, vcpuid, reg, &origval); 241 if (error == 0) { 242 val = byte; 243 mask = 0xff; 244 if (lhbr) { 245 /* 246 * Shift left by 8 to store 'byte' in a legacy high 247 * byte register. 248 */ 249 val <<= 8; 250 mask <<= 8; 251 } 252 val |= origval & ~mask; 253 error = vm_set_register(vm, vcpuid, reg, val); 254 } 255 return (error); 256 } 257 258 int 259 vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg, 260 uint64_t val, int size) 261 { 262 int error; 263 uint64_t origval; 264 265 switch (size) { 266 case 1: 267 case 2: 268 error = vie_read_register(vm, vcpuid, reg, &origval); 269 if (error) 270 return (error); 271 val &= size2mask[size]; 272 val |= origval & ~size2mask[size]; 273 break; 274 case 4: 275 val &= 0xffffffffUL; 276 break; 277 case 8: 278 break; 279 default: 280 return (EINVAL); 281 } 282 283 error = vm_set_register(vm, vcpuid, reg, val); 284 return (error); 285 } 286 287 static int 288 emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 289 mem_region_read_t memread, mem_region_write_t memwrite, void *arg) 290 { 291 int error, size; 292 enum vm_reg_name reg; 293 uint8_t byte; 294 uint64_t val; 295 296 size = vie->opsize; 297 error = EINVAL; 298 299 switch (vie->op.op_byte) { 300 case 0x88: 301 /* 302 * MOV byte from reg (ModRM:reg) to mem (ModRM:r/m) 303 * 88/r: mov r/m8, r8 304 * REX + 88/r: mov r/m8, r8 (%ah, %ch, %dh, %bh not available) 305 */ 306 size = 1; /* override for byte operation */ 307 error = vie_read_bytereg(vm, vcpuid, vie, &byte); 308 if (error == 0) 309 error = memwrite(vm, vcpuid, gpa, byte, size, arg); 310 break; 311 case 0x89: 312 /* 313 * MOV from reg (ModRM:reg) to mem (ModRM:r/m) 314 * 89/r: mov r/m16, r16 315 * 89/r: mov r/m32, r32 316 * REX.W + 89/r mov r/m64, r64 317 */ 318 reg = gpr_map[vie->reg]; 319 error = vie_read_register(vm, vcpuid, reg, &val); 320 if (error == 0) { 321 val &= size2mask[size]; 322 error = memwrite(vm, vcpuid, gpa, val, size, arg); 323 } 324 break; 325 case 0x8A: 326 /* 327 * MOV byte from mem (ModRM:r/m) to reg (ModRM:reg) 328 * 8A/r: mov r8, r/m8 329 * REX + 8A/r: mov r8, r/m8 330 */ 331 size = 1; /* override for byte operation */ 332 error = memread(vm, vcpuid, gpa, &val, size, arg); 333 if (error == 0) 334 error = vie_write_bytereg(vm, vcpuid, vie, val); 335 break; 336 case 0x8B: 337 /* 338 * MOV from mem (ModRM:r/m) to reg (ModRM:reg) 339 * 8B/r: mov r16, r/m16 340 * 8B/r: mov r32, r/m32 341 * REX.W 8B/r: mov r64, r/m64 342 */ 343 error = memread(vm, vcpuid, gpa, &val, size, arg); 344 if (error == 0) { 345 reg = gpr_map[vie->reg]; 346 error = vie_update_register(vm, vcpuid, reg, val, size); 347 } 348 break; 349 case 0xC6: 350 /* 351 * MOV from imm8 to mem (ModRM:r/m) 352 * C6/0 mov r/m8, imm8 353 * REX + C6/0 mov r/m8, imm8 354 */ 355 size = 1; /* override for byte operation */ 356 error = memwrite(vm, vcpuid, gpa, vie->immediate, size, arg); 357 break; 358 case 0xC7: 359 /* 360 * MOV from imm16/imm32 to mem (ModRM:r/m) 361 * C7/0 mov r/m16, imm16 362 * C7/0 mov r/m32, imm32 363 * REX.W + C7/0 mov r/m64, imm32 (sign-extended to 64-bits) 364 */ 365 val = vie->immediate & size2mask[size]; 366 error = memwrite(vm, vcpuid, gpa, val, size, arg); 367 break; 368 default: 369 break; 370 } 371 372 return (error); 373 } 374 375 static int 376 emulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 377 mem_region_read_t memread, mem_region_write_t memwrite, 378 void *arg) 379 { 380 int error, size; 381 enum vm_reg_name reg; 382 uint64_t val; 383 384 size = vie->opsize; 385 error = EINVAL; 386 387 switch (vie->op.op_byte) { 388 case 0xB6: 389 /* 390 * MOV and zero extend byte from mem (ModRM:r/m) to 391 * reg (ModRM:reg). 392 * 393 * 0F B6/r movzx r16, r/m8 394 * 0F B6/r movzx r32, r/m8 395 * REX.W + 0F B6/r movzx r64, r/m8 396 */ 397 398 /* get the first operand */ 399 error = memread(vm, vcpuid, gpa, &val, 1, arg); 400 if (error) 401 break; 402 403 /* get the second operand */ 404 reg = gpr_map[vie->reg]; 405 406 /* zero-extend byte */ 407 val = (uint8_t)val; 408 409 /* write the result */ 410 error = vie_update_register(vm, vcpuid, reg, val, size); 411 break; 412 case 0xBE: 413 /* 414 * MOV and sign extend byte from mem (ModRM:r/m) to 415 * reg (ModRM:reg). 416 * 417 * 0F BE/r movsx r16, r/m8 418 * 0F BE/r movsx r32, r/m8 419 * REX.W + 0F BE/r movsx r64, r/m8 420 */ 421 422 /* get the first operand */ 423 error = memread(vm, vcpuid, gpa, &val, 1, arg); 424 if (error) 425 break; 426 427 /* get the second operand */ 428 reg = gpr_map[vie->reg]; 429 430 /* sign extend byte */ 431 val = (int8_t)val; 432 433 /* write the result */ 434 error = vie_update_register(vm, vcpuid, reg, val, size); 435 break; 436 default: 437 break; 438 } 439 return (error); 440 } 441 442 static int 443 emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 444 mem_region_read_t memread, mem_region_write_t memwrite, void *arg) 445 { 446 int error, size; 447 enum vm_reg_name reg; 448 uint64_t val1, val2; 449 450 size = vie->opsize; 451 error = EINVAL; 452 453 switch (vie->op.op_byte) { 454 case 0x23: 455 /* 456 * AND reg (ModRM:reg) and mem (ModRM:r/m) and store the 457 * result in reg. 458 * 459 * 23/r and r16, r/m16 460 * 23/r and r32, r/m32 461 * REX.W + 23/r and r64, r/m64 462 */ 463 464 /* get the first operand */ 465 reg = gpr_map[vie->reg]; 466 error = vie_read_register(vm, vcpuid, reg, &val1); 467 if (error) 468 break; 469 470 /* get the second operand */ 471 error = memread(vm, vcpuid, gpa, &val2, size, arg); 472 if (error) 473 break; 474 475 /* perform the operation and write the result */ 476 val1 &= val2; 477 error = vie_update_register(vm, vcpuid, reg, val1, size); 478 break; 479 case 0x81: 480 /* 481 * AND mem (ModRM:r/m) with immediate and store the 482 * result in mem. 483 * 484 * 81 /4 and r/m16, imm16 485 * 81 /4 and r/m32, imm32 486 * REX.W + 81 /4 and r/m64, imm32 sign-extended to 64 487 * 488 * Currently, only the AND operation of the 0x81 opcode 489 * is implemented (ModRM:reg = b100). 490 */ 491 if ((vie->reg & 7) != 4) 492 break; 493 494 /* get the first operand */ 495 error = memread(vm, vcpuid, gpa, &val1, size, arg); 496 if (error) 497 break; 498 499 /* 500 * perform the operation with the pre-fetched immediate 501 * operand and write the result 502 */ 503 val1 &= vie->immediate; 504 error = memwrite(vm, vcpuid, gpa, val1, size, arg); 505 break; 506 default: 507 break; 508 } 509 return (error); 510 } 511 512 static int 513 emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 514 mem_region_read_t memread, mem_region_write_t memwrite, void *arg) 515 { 516 int error, size; 517 uint64_t val1; 518 519 size = vie->opsize; 520 error = EINVAL; 521 522 switch (vie->op.op_byte) { 523 case 0x83: 524 /* 525 * OR mem (ModRM:r/m) with immediate and store the 526 * result in mem. 527 * 528 * 83 /1 OR r/m16, imm8 sign-extended to 16 529 * 83 /1 OR r/m32, imm8 sign-extended to 32 530 * REX.W + 83/1 OR r/m64, imm8 sign-extended to 64 531 * 532 * Currently, only the OR operation of the 0x83 opcode 533 * is implemented (ModRM:reg = b001). 534 */ 535 if ((vie->reg & 7) != 1) 536 break; 537 538 /* get the first operand */ 539 error = memread(vm, vcpuid, gpa, &val1, size, arg); 540 if (error) 541 break; 542 543 /* 544 * perform the operation with the pre-fetched immediate 545 * operand and write the result 546 */ 547 val1 |= vie->immediate; 548 error = memwrite(vm, vcpuid, gpa, val1, size, arg); 549 break; 550 default: 551 break; 552 } 553 return (error); 554 } 555 556 int 557 vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 558 mem_region_read_t memread, mem_region_write_t memwrite, 559 void *memarg) 560 { 561 int error; 562 563 if (!vie->decoded) 564 return (EINVAL); 565 566 switch (vie->op.op_type) { 567 case VIE_OP_TYPE_MOV: 568 error = emulate_mov(vm, vcpuid, gpa, vie, 569 memread, memwrite, memarg); 570 break; 571 case VIE_OP_TYPE_MOVSX: 572 case VIE_OP_TYPE_MOVZX: 573 error = emulate_movx(vm, vcpuid, gpa, vie, 574 memread, memwrite, memarg); 575 break; 576 case VIE_OP_TYPE_AND: 577 error = emulate_and(vm, vcpuid, gpa, vie, 578 memread, memwrite, memarg); 579 break; 580 case VIE_OP_TYPE_OR: 581 error = emulate_or(vm, vcpuid, gpa, vie, 582 memread, memwrite, memarg); 583 break; 584 default: 585 error = EINVAL; 586 break; 587 } 588 589 return (error); 590 } 591 592 int 593 vie_alignment_check(int cpl, int size, uint64_t cr0, uint64_t rf, uint64_t gla) 594 { 595 KASSERT(size == 1 || size == 2 || size == 4 || size == 8, 596 ("%s: invalid size %d", __func__, size)); 597 KASSERT(cpl >= 0 && cpl <= 3, ("%s: invalid cpl %d", __func__, cpl)); 598 599 if (cpl != 3 || (cr0 & CR0_AM) == 0 || (rf & PSL_AC) == 0) 600 return (0); 601 602 return ((gla & (size - 1)) ? 1 : 0); 603 } 604 605 int 606 vie_canonical_check(enum vm_cpu_mode cpu_mode, uint64_t gla) 607 { 608 uint64_t mask; 609 610 if (cpu_mode != CPU_MODE_64BIT) 611 return (0); 612 613 /* 614 * The value of the bit 47 in the 'gla' should be replicated in the 615 * most significant 16 bits. 616 */ 617 mask = ~((1UL << 48) - 1); 618 if (gla & (1UL << 47)) 619 return ((gla & mask) != mask); 620 else 621 return ((gla & mask) != 0); 622 } 623 624 uint64_t 625 vie_size2mask(int size) 626 { 627 KASSERT(size == 1 || size == 2 || size == 4 || size == 8, 628 ("vie_size2mask: invalid size %d", size)); 629 return (size2mask[size]); 630 } 631 632 int 633 vie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg, 634 struct seg_desc *desc, uint64_t offset, int length, int addrsize, 635 int prot, uint64_t *gla) 636 { 637 uint64_t firstoff, low_limit, high_limit, segbase; 638 int glasize, type; 639 640 KASSERT(seg >= VM_REG_GUEST_ES && seg <= VM_REG_GUEST_GS, 641 ("%s: invalid segment %d", __func__, seg)); 642 KASSERT(length == 1 || length == 2 || length == 4 || length == 8, 643 ("%s: invalid operand size %d", __func__, length)); 644 KASSERT((prot & ~(PROT_READ | PROT_WRITE)) == 0, 645 ("%s: invalid prot %#x", __func__, prot)); 646 647 firstoff = offset; 648 if (cpu_mode == CPU_MODE_64BIT) { 649 KASSERT(addrsize == 4 || addrsize == 8, ("%s: invalid address " 650 "size %d for cpu_mode %d", __func__, addrsize, cpu_mode)); 651 glasize = 8; 652 } else { 653 KASSERT(addrsize == 2 || addrsize == 4, ("%s: invalid address " 654 "size %d for cpu mode %d", __func__, addrsize, cpu_mode)); 655 glasize = 4; 656 /* 657 * If the segment selector is loaded with a NULL selector 658 * then the descriptor is unusable and attempting to use 659 * it results in a #GP(0). 660 */ 661 if (SEG_DESC_UNUSABLE(desc->access)) 662 return (-1); 663 664 /* 665 * The processor generates a #NP exception when a segment 666 * register is loaded with a selector that points to a 667 * descriptor that is not present. If this was the case then 668 * it would have been checked before the VM-exit. 669 */ 670 KASSERT(SEG_DESC_PRESENT(desc->access), 671 ("segment %d not present: %#x", seg, desc->access)); 672 673 /* 674 * The descriptor type must indicate a code/data segment. 675 */ 676 type = SEG_DESC_TYPE(desc->access); 677 KASSERT(type >= 16 && type <= 31, ("segment %d has invalid " 678 "descriptor type %#x", seg, type)); 679 680 if (prot & PROT_READ) { 681 /* #GP on a read access to a exec-only code segment */ 682 if ((type & 0xA) == 0x8) 683 return (-1); 684 } 685 686 if (prot & PROT_WRITE) { 687 /* 688 * #GP on a write access to a code segment or a 689 * read-only data segment. 690 */ 691 if (type & 0x8) /* code segment */ 692 return (-1); 693 694 if ((type & 0xA) == 0) /* read-only data seg */ 695 return (-1); 696 } 697 698 /* 699 * 'desc->limit' is fully expanded taking granularity into 700 * account. 701 */ 702 if ((type & 0xC) == 0x4) { 703 /* expand-down data segment */ 704 low_limit = desc->limit + 1; 705 high_limit = SEG_DESC_DEF32(desc->access) ? 706 0xffffffff : 0xffff; 707 } else { 708 /* code segment or expand-up data segment */ 709 low_limit = 0; 710 high_limit = desc->limit; 711 } 712 713 while (length > 0) { 714 offset &= vie_size2mask(addrsize); 715 if (offset < low_limit || offset > high_limit) 716 return (-1); 717 offset++; 718 length--; 719 } 720 } 721 722 /* 723 * In 64-bit mode all segments except %fs and %gs have a segment 724 * base address of 0. 725 */ 726 if (cpu_mode == CPU_MODE_64BIT && seg != VM_REG_GUEST_FS && 727 seg != VM_REG_GUEST_GS) { 728 segbase = 0; 729 } else { 730 segbase = desc->base; 731 } 732 733 /* 734 * Truncate 'firstoff' to the effective address size before adding 735 * it to the segment base. 736 */ 737 firstoff &= vie_size2mask(addrsize); 738 *gla = (segbase + firstoff) & vie_size2mask(glasize); 739 return (0); 740 } 741 742 #ifdef _KERNEL 743 void 744 vie_init(struct vie *vie) 745 { 746 747 bzero(vie, sizeof(struct vie)); 748 749 vie->base_register = VM_REG_LAST; 750 vie->index_register = VM_REG_LAST; 751 } 752 753 static int 754 pf_error_code(int usermode, int prot, int rsvd, uint64_t pte) 755 { 756 int error_code = 0; 757 758 if (pte & PG_V) 759 error_code |= PGEX_P; 760 if (prot & VM_PROT_WRITE) 761 error_code |= PGEX_W; 762 if (usermode) 763 error_code |= PGEX_U; 764 if (rsvd) 765 error_code |= PGEX_RSV; 766 if (prot & VM_PROT_EXECUTE) 767 error_code |= PGEX_I; 768 769 return (error_code); 770 } 771 772 static void 773 ptp_release(void **cookie) 774 { 775 if (*cookie != NULL) { 776 vm_gpa_release(*cookie); 777 *cookie = NULL; 778 } 779 } 780 781 static void * 782 ptp_hold(struct vm *vm, vm_paddr_t ptpphys, size_t len, void **cookie) 783 { 784 void *ptr; 785 786 ptp_release(cookie); 787 ptr = vm_gpa_hold(vm, ptpphys, len, VM_PROT_RW, cookie); 788 return (ptr); 789 } 790 791 int 792 vmm_gla2gpa(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, 793 uint64_t gla, int prot, uint64_t *gpa) 794 { 795 int nlevels, pfcode, ptpshift, ptpindex, retval, usermode, writable; 796 u_int retries; 797 uint64_t *ptpbase, ptpphys, pte, pgsize; 798 uint32_t *ptpbase32, pte32; 799 void *cookie; 800 801 usermode = (paging->cpl == 3 ? 1 : 0); 802 writable = prot & VM_PROT_WRITE; 803 cookie = NULL; 804 retval = 0; 805 retries = 0; 806 restart: 807 ptpphys = paging->cr3; /* root of the page tables */ 808 ptp_release(&cookie); 809 if (retries++ > 0) 810 maybe_yield(); 811 812 if (vie_canonical_check(paging->cpu_mode, gla)) { 813 /* 814 * XXX assuming a non-stack reference otherwise a stack fault 815 * should be generated. 816 */ 817 vm_inject_gp(vm, vcpuid); 818 goto fault; 819 } 820 821 if (paging->paging_mode == PAGING_MODE_FLAT) { 822 *gpa = gla; 823 goto done; 824 } 825 826 if (paging->paging_mode == PAGING_MODE_32) { 827 nlevels = 2; 828 while (--nlevels >= 0) { 829 /* Zero out the lower 12 bits. */ 830 ptpphys &= ~0xfff; 831 832 ptpbase32 = ptp_hold(vm, ptpphys, PAGE_SIZE, &cookie); 833 834 if (ptpbase32 == NULL) 835 goto error; 836 837 ptpshift = PAGE_SHIFT + nlevels * 10; 838 ptpindex = (gla >> ptpshift) & 0x3FF; 839 pgsize = 1UL << ptpshift; 840 841 pte32 = ptpbase32[ptpindex]; 842 843 if ((pte32 & PG_V) == 0 || 844 (usermode && (pte32 & PG_U) == 0) || 845 (writable && (pte32 & PG_RW) == 0)) { 846 pfcode = pf_error_code(usermode, prot, 0, 847 pte32); 848 vm_inject_pf(vm, vcpuid, pfcode, gla); 849 goto fault; 850 } 851 852 /* 853 * Emulate the x86 MMU's management of the accessed 854 * and dirty flags. While the accessed flag is set 855 * at every level of the page table, the dirty flag 856 * is only set at the last level providing the guest 857 * physical address. 858 */ 859 if ((pte32 & PG_A) == 0) { 860 if (atomic_cmpset_32(&ptpbase32[ptpindex], 861 pte32, pte32 | PG_A) == 0) { 862 goto restart; 863 } 864 } 865 866 /* XXX must be ignored if CR4.PSE=0 */ 867 if (nlevels > 0 && (pte32 & PG_PS) != 0) 868 break; 869 870 ptpphys = pte32; 871 } 872 873 /* Set the dirty bit in the page table entry if necessary */ 874 if (writable && (pte32 & PG_M) == 0) { 875 if (atomic_cmpset_32(&ptpbase32[ptpindex], 876 pte32, pte32 | PG_M) == 0) { 877 goto restart; 878 } 879 } 880 881 /* Zero out the lower 'ptpshift' bits */ 882 pte32 >>= ptpshift; pte32 <<= ptpshift; 883 *gpa = pte32 | (gla & (pgsize - 1)); 884 goto done; 885 } 886 887 if (paging->paging_mode == PAGING_MODE_PAE) { 888 /* Zero out the lower 5 bits and the upper 32 bits */ 889 ptpphys &= 0xffffffe0UL; 890 891 ptpbase = ptp_hold(vm, ptpphys, sizeof(*ptpbase) * 4, &cookie); 892 if (ptpbase == NULL) 893 goto error; 894 895 ptpindex = (gla >> 30) & 0x3; 896 897 pte = ptpbase[ptpindex]; 898 899 if ((pte & PG_V) == 0) { 900 pfcode = pf_error_code(usermode, prot, 0, pte); 901 vm_inject_pf(vm, vcpuid, pfcode, gla); 902 goto fault; 903 } 904 905 ptpphys = pte; 906 907 nlevels = 2; 908 } else 909 nlevels = 4; 910 while (--nlevels >= 0) { 911 /* Zero out the lower 12 bits and the upper 12 bits */ 912 ptpphys >>= 12; ptpphys <<= 24; ptpphys >>= 12; 913 914 ptpbase = ptp_hold(vm, ptpphys, PAGE_SIZE, &cookie); 915 if (ptpbase == NULL) 916 goto error; 917 918 ptpshift = PAGE_SHIFT + nlevels * 9; 919 ptpindex = (gla >> ptpshift) & 0x1FF; 920 pgsize = 1UL << ptpshift; 921 922 pte = ptpbase[ptpindex]; 923 924 if ((pte & PG_V) == 0 || 925 (usermode && (pte & PG_U) == 0) || 926 (writable && (pte & PG_RW) == 0)) { 927 pfcode = pf_error_code(usermode, prot, 0, pte); 928 vm_inject_pf(vm, vcpuid, pfcode, gla); 929 goto fault; 930 } 931 932 /* Set the accessed bit in the page table entry */ 933 if ((pte & PG_A) == 0) { 934 if (atomic_cmpset_64(&ptpbase[ptpindex], 935 pte, pte | PG_A) == 0) { 936 goto restart; 937 } 938 } 939 940 if (nlevels > 0 && (pte & PG_PS) != 0) { 941 if (pgsize > 1 * GB) { 942 pfcode = pf_error_code(usermode, prot, 1, pte); 943 vm_inject_pf(vm, vcpuid, pfcode, gla); 944 goto fault; 945 } 946 break; 947 } 948 949 ptpphys = pte; 950 } 951 952 /* Set the dirty bit in the page table entry if necessary */ 953 if (writable && (pte & PG_M) == 0) { 954 if (atomic_cmpset_64(&ptpbase[ptpindex], pte, pte | PG_M) == 0) 955 goto restart; 956 } 957 958 /* Zero out the lower 'ptpshift' bits and the upper 12 bits */ 959 pte >>= ptpshift; pte <<= (ptpshift + 12); pte >>= 12; 960 *gpa = pte | (gla & (pgsize - 1)); 961 done: 962 ptp_release(&cookie); 963 return (retval); 964 error: 965 retval = -1; 966 goto done; 967 fault: 968 retval = 1; 969 goto done; 970 } 971 972 int 973 vmm_fetch_instruction(struct vm *vm, int cpuid, struct vm_guest_paging *paging, 974 uint64_t rip, int inst_length, struct vie *vie) 975 { 976 int n, error, prot; 977 uint64_t gpa, off; 978 void *hpa, *cookie; 979 980 /* 981 * XXX cache previously fetched instructions using 'rip' as the tag 982 */ 983 984 prot = VM_PROT_READ | VM_PROT_EXECUTE; 985 if (inst_length > VIE_INST_SIZE) 986 panic("vmm_fetch_instruction: invalid length %d", inst_length); 987 988 /* Copy the instruction into 'vie' */ 989 while (vie->num_valid < inst_length) { 990 error = vmm_gla2gpa(vm, cpuid, paging, rip, prot, &gpa); 991 if (error) 992 return (error); 993 994 off = gpa & PAGE_MASK; 995 n = min(inst_length - vie->num_valid, PAGE_SIZE - off); 996 997 if ((hpa = vm_gpa_hold(vm, gpa, n, prot, &cookie)) == NULL) 998 break; 999 1000 bcopy(hpa, &vie->inst[vie->num_valid], n); 1001 1002 vm_gpa_release(cookie); 1003 1004 rip += n; 1005 vie->num_valid += n; 1006 } 1007 1008 if (vie->num_valid == inst_length) 1009 return (0); 1010 else 1011 return (-1); 1012 } 1013 1014 static int 1015 vie_peek(struct vie *vie, uint8_t *x) 1016 { 1017 1018 if (vie->num_processed < vie->num_valid) { 1019 *x = vie->inst[vie->num_processed]; 1020 return (0); 1021 } else 1022 return (-1); 1023 } 1024 1025 static void 1026 vie_advance(struct vie *vie) 1027 { 1028 1029 vie->num_processed++; 1030 } 1031 1032 static int 1033 decode_prefixes(struct vie *vie, enum vm_cpu_mode cpu_mode, int cs_d) 1034 { 1035 uint8_t x; 1036 1037 while (1) { 1038 if (vie_peek(vie, &x)) 1039 return (-1); 1040 1041 if (x == 0x66) 1042 vie->opsize_override = 1; 1043 else if (x == 0x67) 1044 vie->addrsize_override = 1; 1045 else 1046 break; 1047 1048 vie_advance(vie); 1049 } 1050 1051 /* 1052 * From section 2.2.1, "REX Prefixes", Intel SDM Vol 2: 1053 * - Only one REX prefix is allowed per instruction. 1054 * - The REX prefix must immediately precede the opcode byte or the 1055 * escape opcode byte. 1056 * - If an instruction has a mandatory prefix (0x66, 0xF2 or 0xF3) 1057 * the mandatory prefix must come before the REX prefix. 1058 */ 1059 if (cpu_mode == CPU_MODE_64BIT && x >= 0x40 && x <= 0x4F) { 1060 vie->rex_present = 1; 1061 vie->rex_w = x & 0x8 ? 1 : 0; 1062 vie->rex_r = x & 0x4 ? 1 : 0; 1063 vie->rex_x = x & 0x2 ? 1 : 0; 1064 vie->rex_b = x & 0x1 ? 1 : 0; 1065 vie_advance(vie); 1066 } 1067 1068 /* 1069 * Section "Operand-Size And Address-Size Attributes", Intel SDM, Vol 1 1070 */ 1071 if (cpu_mode == CPU_MODE_64BIT) { 1072 /* 1073 * Default address size is 64-bits and default operand size 1074 * is 32-bits. 1075 */ 1076 vie->addrsize = vie->addrsize_override ? 4 : 8; 1077 if (vie->rex_w) 1078 vie->opsize = 8; 1079 else if (vie->opsize_override) 1080 vie->opsize = 2; 1081 else 1082 vie->opsize = 4; 1083 } else if (cs_d) { 1084 /* Default address and operand sizes are 32-bits */ 1085 vie->addrsize = vie->addrsize_override ? 2 : 4; 1086 vie->opsize = vie->opsize_override ? 2 : 4; 1087 } else { 1088 /* Default address and operand sizes are 16-bits */ 1089 vie->addrsize = vie->addrsize_override ? 4 : 2; 1090 vie->opsize = vie->opsize_override ? 4 : 2; 1091 } 1092 return (0); 1093 } 1094 1095 static int 1096 decode_two_byte_opcode(struct vie *vie) 1097 { 1098 uint8_t x; 1099 1100 if (vie_peek(vie, &x)) 1101 return (-1); 1102 1103 vie->op = two_byte_opcodes[x]; 1104 1105 if (vie->op.op_type == VIE_OP_TYPE_NONE) 1106 return (-1); 1107 1108 vie_advance(vie); 1109 return (0); 1110 } 1111 1112 static int 1113 decode_opcode(struct vie *vie) 1114 { 1115 uint8_t x; 1116 1117 if (vie_peek(vie, &x)) 1118 return (-1); 1119 1120 vie->op = one_byte_opcodes[x]; 1121 1122 if (vie->op.op_type == VIE_OP_TYPE_NONE) 1123 return (-1); 1124 1125 vie_advance(vie); 1126 1127 if (vie->op.op_type == VIE_OP_TYPE_TWO_BYTE) 1128 return (decode_two_byte_opcode(vie)); 1129 1130 return (0); 1131 } 1132 1133 static int 1134 decode_modrm(struct vie *vie, enum vm_cpu_mode cpu_mode) 1135 { 1136 uint8_t x; 1137 1138 if (cpu_mode == CPU_MODE_REAL) 1139 return (-1); 1140 1141 if (vie_peek(vie, &x)) 1142 return (-1); 1143 1144 vie->mod = (x >> 6) & 0x3; 1145 vie->rm = (x >> 0) & 0x7; 1146 vie->reg = (x >> 3) & 0x7; 1147 1148 /* 1149 * A direct addressing mode makes no sense in the context of an EPT 1150 * fault. There has to be a memory access involved to cause the 1151 * EPT fault. 1152 */ 1153 if (vie->mod == VIE_MOD_DIRECT) 1154 return (-1); 1155 1156 if ((vie->mod == VIE_MOD_INDIRECT && vie->rm == VIE_RM_DISP32) || 1157 (vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB)) { 1158 /* 1159 * Table 2-5: Special Cases of REX Encodings 1160 * 1161 * mod=0, r/m=5 is used in the compatibility mode to 1162 * indicate a disp32 without a base register. 1163 * 1164 * mod!=3, r/m=4 is used in the compatibility mode to 1165 * indicate that the SIB byte is present. 1166 * 1167 * The 'b' bit in the REX prefix is don't care in 1168 * this case. 1169 */ 1170 } else { 1171 vie->rm |= (vie->rex_b << 3); 1172 } 1173 1174 vie->reg |= (vie->rex_r << 3); 1175 1176 /* SIB */ 1177 if (vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB) 1178 goto done; 1179 1180 vie->base_register = gpr_map[vie->rm]; 1181 1182 switch (vie->mod) { 1183 case VIE_MOD_INDIRECT_DISP8: 1184 vie->disp_bytes = 1; 1185 break; 1186 case VIE_MOD_INDIRECT_DISP32: 1187 vie->disp_bytes = 4; 1188 break; 1189 case VIE_MOD_INDIRECT: 1190 if (vie->rm == VIE_RM_DISP32) { 1191 vie->disp_bytes = 4; 1192 /* 1193 * Table 2-7. RIP-Relative Addressing 1194 * 1195 * In 64-bit mode mod=00 r/m=101 implies [rip] + disp32 1196 * whereas in compatibility mode it just implies disp32. 1197 */ 1198 1199 if (cpu_mode == CPU_MODE_64BIT) 1200 vie->base_register = VM_REG_GUEST_RIP; 1201 else 1202 vie->base_register = VM_REG_LAST; 1203 } 1204 break; 1205 } 1206 1207 done: 1208 vie_advance(vie); 1209 1210 return (0); 1211 } 1212 1213 static int 1214 decode_sib(struct vie *vie) 1215 { 1216 uint8_t x; 1217 1218 /* Proceed only if SIB byte is present */ 1219 if (vie->mod == VIE_MOD_DIRECT || vie->rm != VIE_RM_SIB) 1220 return (0); 1221 1222 if (vie_peek(vie, &x)) 1223 return (-1); 1224 1225 /* De-construct the SIB byte */ 1226 vie->ss = (x >> 6) & 0x3; 1227 vie->index = (x >> 3) & 0x7; 1228 vie->base = (x >> 0) & 0x7; 1229 1230 /* Apply the REX prefix modifiers */ 1231 vie->index |= vie->rex_x << 3; 1232 vie->base |= vie->rex_b << 3; 1233 1234 switch (vie->mod) { 1235 case VIE_MOD_INDIRECT_DISP8: 1236 vie->disp_bytes = 1; 1237 break; 1238 case VIE_MOD_INDIRECT_DISP32: 1239 vie->disp_bytes = 4; 1240 break; 1241 } 1242 1243 if (vie->mod == VIE_MOD_INDIRECT && 1244 (vie->base == 5 || vie->base == 13)) { 1245 /* 1246 * Special case when base register is unused if mod = 0 1247 * and base = %rbp or %r13. 1248 * 1249 * Documented in: 1250 * Table 2-3: 32-bit Addressing Forms with the SIB Byte 1251 * Table 2-5: Special Cases of REX Encodings 1252 */ 1253 vie->disp_bytes = 4; 1254 } else { 1255 vie->base_register = gpr_map[vie->base]; 1256 } 1257 1258 /* 1259 * All encodings of 'index' are valid except for %rsp (4). 1260 * 1261 * Documented in: 1262 * Table 2-3: 32-bit Addressing Forms with the SIB Byte 1263 * Table 2-5: Special Cases of REX Encodings 1264 */ 1265 if (vie->index != 4) 1266 vie->index_register = gpr_map[vie->index]; 1267 1268 /* 'scale' makes sense only in the context of an index register */ 1269 if (vie->index_register < VM_REG_LAST) 1270 vie->scale = 1 << vie->ss; 1271 1272 vie_advance(vie); 1273 1274 return (0); 1275 } 1276 1277 static int 1278 decode_displacement(struct vie *vie) 1279 { 1280 int n, i; 1281 uint8_t x; 1282 1283 union { 1284 char buf[4]; 1285 int8_t signed8; 1286 int32_t signed32; 1287 } u; 1288 1289 if ((n = vie->disp_bytes) == 0) 1290 return (0); 1291 1292 if (n != 1 && n != 4) 1293 panic("decode_displacement: invalid disp_bytes %d", n); 1294 1295 for (i = 0; i < n; i++) { 1296 if (vie_peek(vie, &x)) 1297 return (-1); 1298 1299 u.buf[i] = x; 1300 vie_advance(vie); 1301 } 1302 1303 if (n == 1) 1304 vie->displacement = u.signed8; /* sign-extended */ 1305 else 1306 vie->displacement = u.signed32; /* sign-extended */ 1307 1308 return (0); 1309 } 1310 1311 static int 1312 decode_immediate(struct vie *vie) 1313 { 1314 int i, n; 1315 uint8_t x; 1316 union { 1317 char buf[8]; 1318 int8_t signed8; 1319 int16_t signed16; 1320 int32_t signed32; 1321 int64_t signed64; 1322 } u; 1323 1324 /* Figure out immediate operand size (if any) */ 1325 if (vie->op.op_flags & VIE_OP_F_MOFFSET) { 1326 /* 1327 * Section 2.2.1.4, "Direct Memory-Offset MOVs", Intel SDM: 1328 * The memory offset size follows the address-size of the 1329 * instruction. Although this is treated as an immediate 1330 * value during instruction decoding it is interpreted as 1331 * a segment offset by the instruction emulation. 1332 */ 1333 vie->imm_bytes = vie->addrsize; 1334 } else if (vie->op.op_flags & VIE_OP_F_IMM) { 1335 /* 1336 * Section 2.2.1.5 "Immediates", Intel SDM: 1337 * In 64-bit mode the typical size of immediate operands 1338 * remains 32-bits. When the operand size if 64-bits, the 1339 * processor sign-extends all immediates to 64-bits prior 1340 * to their use. 1341 */ 1342 if (vie->opsize == 4 || vie->opsize == 8) 1343 vie->imm_bytes = 4; 1344 else 1345 vie->imm_bytes = 2; 1346 } else if (vie->op.op_flags & VIE_OP_F_IMM8) { 1347 vie->imm_bytes = 1; 1348 } 1349 1350 if ((n = vie->imm_bytes) == 0) 1351 return (0); 1352 1353 KASSERT(n == 1 || n == 2 || n == 4 || n == 8, 1354 ("%s: invalid number of immediate bytes: %d", __func__, n)); 1355 1356 for (i = 0; i < n; i++) { 1357 if (vie_peek(vie, &x)) 1358 return (-1); 1359 1360 u.buf[i] = x; 1361 vie_advance(vie); 1362 } 1363 1364 /* sign-extend the immediate value before use */ 1365 if (n == 1) 1366 vie->immediate = u.signed8; 1367 else if (n == 2) 1368 vie->immediate = u.signed16; 1369 else if (n == 4) 1370 vie->immediate = u.signed32; 1371 else 1372 vie->immediate = u.signed64; 1373 1374 1375 if (vie->op.op_flags & VIE_OP_F_MOFFSET) { 1376 /* 1377 * If the immediate value is going to be interpreted as a 1378 * segment offset then undo the sign-extension above. 1379 */ 1380 vie->immediate &= size2mask[n]; 1381 } 1382 1383 return (0); 1384 } 1385 1386 /* 1387 * Verify that all the bytes in the instruction buffer were consumed. 1388 */ 1389 static int 1390 verify_inst_length(struct vie *vie) 1391 { 1392 1393 if (vie->num_processed == vie->num_valid) 1394 return (0); 1395 else 1396 return (-1); 1397 } 1398 1399 /* 1400 * Verify that the 'guest linear address' provided as collateral of the nested 1401 * page table fault matches with our instruction decoding. 1402 */ 1403 static int 1404 verify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie) 1405 { 1406 int error; 1407 uint64_t base, idx, gla2; 1408 1409 /* Skip 'gla' verification */ 1410 if (gla == VIE_INVALID_GLA) 1411 return (0); 1412 1413 base = 0; 1414 if (vie->base_register != VM_REG_LAST) { 1415 error = vm_get_register(vm, cpuid, vie->base_register, &base); 1416 if (error) { 1417 printf("verify_gla: error %d getting base reg %d\n", 1418 error, vie->base_register); 1419 return (-1); 1420 } 1421 1422 /* 1423 * RIP-relative addressing starts from the following 1424 * instruction 1425 */ 1426 if (vie->base_register == VM_REG_GUEST_RIP) 1427 base += vie->num_valid; 1428 } 1429 1430 idx = 0; 1431 if (vie->index_register != VM_REG_LAST) { 1432 error = vm_get_register(vm, cpuid, vie->index_register, &idx); 1433 if (error) { 1434 printf("verify_gla: error %d getting index reg %d\n", 1435 error, vie->index_register); 1436 return (-1); 1437 } 1438 } 1439 1440 /* XXX assuming that the base address of the segment is 0 */ 1441 gla2 = base + vie->scale * idx + vie->displacement; 1442 gla2 &= size2mask[vie->addrsize]; 1443 if (gla != gla2) { 1444 printf("verify_gla mismatch: " 1445 "base(0x%0lx), scale(%d), index(0x%0lx), " 1446 "disp(0x%0lx), gla(0x%0lx), gla2(0x%0lx)\n", 1447 base, vie->scale, idx, vie->displacement, gla, gla2); 1448 return (-1); 1449 } 1450 1451 return (0); 1452 } 1453 1454 int 1455 vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla, 1456 enum vm_cpu_mode cpu_mode, int cs_d, struct vie *vie) 1457 { 1458 1459 if (decode_prefixes(vie, cpu_mode, cs_d)) 1460 return (-1); 1461 1462 if (decode_opcode(vie)) 1463 return (-1); 1464 1465 if (decode_modrm(vie, cpu_mode)) 1466 return (-1); 1467 1468 if (decode_sib(vie)) 1469 return (-1); 1470 1471 if (decode_displacement(vie)) 1472 return (-1); 1473 1474 if (decode_immediate(vie)) 1475 return (-1); 1476 1477 if (verify_inst_length(vie)) 1478 return (-1); 1479 1480 if (verify_gla(vm, cpuid, gla, vie)) 1481 return (-1); 1482 1483 vie->decoded = 1; /* success */ 1484 1485 return (0); 1486 } 1487 #endif /* _KERNEL */ 1488