/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2014 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2014 by Delphix. All rights reserved. * Copyright 2019 Joyent, Inc. */ /* * Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. * Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T * All Rights Reserved */ /* * Copyright (c) 2009, Intel Corporation. * All rights reserved. */ /* * General assembly language routines. * It is the intent of this file to contain routines that are * independent of the specific kernel architecture, and those that are * common across kernel architectures. * As architectures diverge, and implementations of specific * architecture-dependent routines change, the routines should be moved * from this file into the respective ../`arch -k`/subr.s file. */ #include #include #include #include #include #include #include #include #include #include "assym.h" #include /* * on_fault() * * Catch lofault faults. Like setjmp except it returns one * if code following causes uncorrectable fault. Turned off * by calling no_fault(). Note that while under on_fault(), * SMAP is disabled. For more information see * uts/intel/ml/copy.s. */ ENTRY(on_fault) movq %gs:CPU_THREAD, %rsi leaq catch_fault(%rip), %rdx movq %rdi, T_ONFAULT(%rsi) /* jumpbuf in t_onfault */ movq %rdx, T_LOFAULT(%rsi) /* catch_fault in t_lofault */ call smap_disable /* allow user accesses */ jmp setjmp /* let setjmp do the rest */ catch_fault: movq %gs:CPU_THREAD, %rsi movq T_ONFAULT(%rsi), %rdi /* address of save area */ xorl %eax, %eax movq %rax, T_ONFAULT(%rsi) /* turn off onfault */ movq %rax, T_LOFAULT(%rsi) /* turn off lofault */ call smap_enable /* disallow user accesses */ jmp longjmp /* let longjmp do the rest */ SET_SIZE(on_fault) ENTRY(no_fault) movq %gs:CPU_THREAD, %rsi xorl %eax, %eax movq %rax, T_ONFAULT(%rsi) /* turn off onfault */ movq %rax, T_LOFAULT(%rsi) /* turn off lofault */ call smap_enable /* disallow user accesses */ ret SET_SIZE(no_fault) /* * Default trampoline code for on_trap() (see ). We just * do a longjmp(&curthread->t_ontrap->ot_jmpbuf) if this is ever called. */ ENTRY(on_trap_trampoline) movq %gs:CPU_THREAD, %rsi movq T_ONTRAP(%rsi), %rdi addq $OT_JMPBUF, %rdi jmp longjmp SET_SIZE(on_trap_trampoline) /* * Push a new element on to the t_ontrap stack. Refer to for * more information about the on_trap() mechanism. If the on_trap_data is the * same as the topmost stack element, we just modify that element. */ ENTRY(on_trap) movw %si, OT_PROT(%rdi) /* ot_prot = prot */ movw $0, OT_TRAP(%rdi) /* ot_trap = 0 */ leaq on_trap_trampoline(%rip), %rdx /* rdx = &on_trap_trampoline */ movq %rdx, OT_TRAMPOLINE(%rdi) /* ot_trampoline = rdx */ xorl %ecx, %ecx movq %rcx, OT_HANDLE(%rdi) /* ot_handle = NULL */ movq %rcx, OT_PAD1(%rdi) /* ot_pad1 = NULL */ movq %gs:CPU_THREAD, %rdx /* rdx = curthread */ movq T_ONTRAP(%rdx), %rcx /* rcx = curthread->t_ontrap */ cmpq %rdi, %rcx /* if (otp == %rcx) */ je 0f /* don't modify t_ontrap */ movq %rcx, OT_PREV(%rdi) /* ot_prev = t_ontrap */ movq %rdi, T_ONTRAP(%rdx) /* curthread->t_ontrap = otp */ 0: addq $OT_JMPBUF, %rdi /* &ot_jmpbuf */ jmp setjmp SET_SIZE(on_trap) /* * Setjmp and longjmp implement non-local gotos using state vectors * type label_t. */ #if LABEL_PC != 0 #error LABEL_PC MUST be defined as 0 for setjmp/longjmp to work as coded #endif /* LABEL_PC != 0 */ ENTRY(setjmp) movq %rsp, LABEL_SP(%rdi) movq %rbp, LABEL_RBP(%rdi) movq %rbx, LABEL_RBX(%rdi) movq %r12, LABEL_R12(%rdi) movq %r13, LABEL_R13(%rdi) movq %r14, LABEL_R14(%rdi) movq %r15, LABEL_R15(%rdi) movq (%rsp), %rdx /* return address */ movq %rdx, (%rdi) /* LABEL_PC is 0 */ xorl %eax, %eax /* return 0 */ ret SET_SIZE(setjmp) ENTRY(longjmp) movq LABEL_SP(%rdi), %rsp movq LABEL_RBP(%rdi), %rbp movq LABEL_RBX(%rdi), %rbx movq LABEL_R12(%rdi), %r12 movq LABEL_R13(%rdi), %r13 movq LABEL_R14(%rdi), %r14 movq LABEL_R15(%rdi), %r15 movq (%rdi), %rdx /* return address; LABEL_PC is 0 */ movq %rdx, (%rsp) xorl %eax, %eax incl %eax /* return 1 */ ret SET_SIZE(longjmp) /* * if a() calls b() calls caller(), * caller() returns return address in a(). * (Note: We assume a() and b() are C routines which do the normal entry/exit * sequence.) */ ENTRY(caller) movq 8(%rbp), %rax /* b()'s return pc, in a() */ ret SET_SIZE(caller) /* * if a() calls callee(), callee() returns the * return address in a(); */ ENTRY(callee) movq (%rsp), %rax /* callee()'s return pc, in a() */ ret SET_SIZE(callee) /* * return the current frame pointer */ ENTRY(getfp) movq %rbp, %rax ret SET_SIZE(getfp) /* * Invalidate a single page table entry in the TLB */ ENTRY(mmu_invlpg) invlpg (%rdi) ret SET_SIZE(mmu_invlpg) /* * Get/Set the value of various control registers */ ENTRY(getcr0) movq %cr0, %rax ret SET_SIZE(getcr0) ENTRY(setcr0) movq %rdi, %cr0 ret SET_SIZE(setcr0) ENTRY(getcr2) #if defined(__xpv) movq %gs:CPU_VCPU_INFO, %rax movq VCPU_INFO_ARCH_CR2(%rax), %rax #else movq %cr2, %rax #endif ret SET_SIZE(getcr2) ENTRY(getcr3) movq %cr3, %rax ret SET_SIZE(getcr3) #if !defined(__xpv) ENTRY(setcr3) movq %rdi, %cr3 ret SET_SIZE(setcr3) ENTRY(reload_cr3) movq %cr3, %rdi movq %rdi, %cr3 ret SET_SIZE(reload_cr3) #endif /* __xpv */ ENTRY(getcr4) movq %cr4, %rax ret SET_SIZE(getcr4) ENTRY(setcr4) movq %rdi, %cr4 ret SET_SIZE(setcr4) ENTRY(getcr8) movq %cr8, %rax ret SET_SIZE(getcr8) ENTRY(setcr8) movq %rdi, %cr8 ret SET_SIZE(setcr8) ENTRY(__cpuid_insn) movq %rbx, %r8 movq %rcx, %r9 movq %rdx, %r11 movl (%rdi), %eax /* %eax = regs->cp_eax */ movl 0x4(%rdi), %ebx /* %ebx = regs->cp_ebx */ movl 0x8(%rdi), %ecx /* %ecx = regs->cp_ecx */ movl 0xc(%rdi), %edx /* %edx = regs->cp_edx */ cpuid movl %eax, (%rdi) /* regs->cp_eax = %eax */ movl %ebx, 0x4(%rdi) /* regs->cp_ebx = %ebx */ movl %ecx, 0x8(%rdi) /* regs->cp_ecx = %ecx */ movl %edx, 0xc(%rdi) /* regs->cp_edx = %edx */ movq %r8, %rbx movq %r9, %rcx movq %r11, %rdx ret SET_SIZE(__cpuid_insn) ENTRY_NP(i86_monitor) pushq %rbp movq %rsp, %rbp movq %rdi, %rax /* addr */ movq %rsi, %rcx /* extensions */ /* rdx contains input arg3: hints */ clflush (%rax) .byte 0x0f, 0x01, 0xc8 /* monitor */ leave ret SET_SIZE(i86_monitor) ENTRY_NP(i86_mwait) pushq %rbp call x86_md_clear movq %rsp, %rbp movq %rdi, %rax /* data */ movq %rsi, %rcx /* extensions */ .byte 0x0f, 0x01, 0xc9 /* mwait */ leave ret SET_SIZE(i86_mwait) #if defined(__xpv) /* * Defined in C */ #else ENTRY_NP(tsc_read) movq %rbx, %r11 movl $0, %eax cpuid rdtsc movq %r11, %rbx shlq $32, %rdx orq %rdx, %rax ret .globl _tsc_mfence_start _tsc_mfence_start: mfence rdtsc shlq $32, %rdx orq %rdx, %rax ret .globl _tsc_mfence_end _tsc_mfence_end: .globl _tscp_start _tscp_start: .byte 0x0f, 0x01, 0xf9 /* rdtscp instruction */ shlq $32, %rdx orq %rdx, %rax ret .globl _tscp_end _tscp_end: .globl _no_rdtsc_start _no_rdtsc_start: xorl %edx, %edx xorl %eax, %eax ret .globl _no_rdtsc_end _no_rdtsc_end: .globl _tsc_lfence_start _tsc_lfence_start: lfence rdtsc shlq $32, %rdx orq %rdx, %rax ret .globl _tsc_lfence_end _tsc_lfence_end: SET_SIZE(tsc_read) #endif /* __xpv */ ENTRY_NP(randtick) rdtsc shlq $32, %rdx orq %rdx, %rax ret SET_SIZE(randtick) /* * Insert entryp after predp in a doubly linked list. */ ENTRY(_insque) movq (%rsi), %rax /* predp->forw */ movq %rsi, CPTRSIZE(%rdi) /* entryp->back = predp */ movq %rax, (%rdi) /* entryp->forw = predp->forw */ movq %rdi, (%rsi) /* predp->forw = entryp */ movq %rdi, CPTRSIZE(%rax) /* predp->forw->back = entryp */ ret SET_SIZE(_insque) /* * Remove entryp from a doubly linked list */ ENTRY(_remque) movq (%rdi), %rax /* entry->forw */ movq CPTRSIZE(%rdi), %rdx /* entry->back */ movq %rax, (%rdx) /* entry->back->forw = entry->forw */ movq %rdx, CPTRSIZE(%rax) /* entry->forw->back = entry->back */ ret SET_SIZE(_remque) /* * Returns the number of * non-NULL bytes in string argument. */ /* * This is close to a simple transliteration of a C version of this * routine. We should either just -make- this be a C version, or * justify having it in assembler by making it significantly faster. * * size_t * strlen(const char *s) * { * const char *s0; * #if defined(DEBUG) * if ((uintptr_t)s < KERNELBASE) * panic(.str_panic_msg); * #endif * for (s0 = s; *s; s++) * ; * return (s - s0); * } */ ENTRY(strlen) #ifdef DEBUG movq postbootkernelbase(%rip), %rax cmpq %rax, %rdi jae str_valid pushq %rbp movq %rsp, %rbp leaq .str_panic_msg(%rip), %rdi xorl %eax, %eax call panic #endif /* DEBUG */ str_valid: cmpb $0, (%rdi) movq %rdi, %rax je .null_found .align 4 .strlen_loop: incq %rdi cmpb $0, (%rdi) jne .strlen_loop .null_found: subq %rax, %rdi movq %rdi, %rax ret SET_SIZE(strlen) #ifdef DEBUG .text .str_panic_msg: .string "strlen: argument below kernelbase" #endif /* DEBUG */ /* * Berkeley 4.3 introduced symbolically named interrupt levels * as a way deal with priority in a machine independent fashion. * Numbered priorities are machine specific, and should be * discouraged where possible. * * Note, for the machine specific priorities there are * examples listed for devices that use a particular priority. * It should not be construed that all devices of that * type should be at that priority. It is currently were * the current devices fit into the priority scheme based * upon time criticalness. * * The underlying assumption of these assignments is that * IPL 10 is the highest level from which a device * routine can call wakeup. Devices that interrupt from higher * levels are restricted in what they can do. If they need * kernels services they should schedule a routine at a lower * level (via software interrupt) to do the required * processing. * * Examples of this higher usage: * Level Usage * 14 Profiling clock (and PROM uart polling clock) * 12 Serial ports * * The serial ports request lower level processing on level 6. * * Also, almost all splN routines (where N is a number or a * mnemonic) will do a RAISE(), on the assumption that they are * never used to lower our priority. * The exceptions are: * spl8() Because you can't be above 15 to begin with! * splzs() Because this is used at boot time to lower our * priority, to allow the PROM to poll the uart. * spl0() Used to lower priority to 0. */ #define SETPRI(level) \ movl $##level, %edi; /* new priority */ \ jmp do_splx /* redirect to do_splx */ #define RAISE(level) \ movl $##level, %edi; /* new priority */ \ jmp splr /* redirect to splr */ /* locks out all interrupts, including memory errors */ ENTRY(spl8) SETPRI(15) SET_SIZE(spl8) /* just below the level that profiling runs */ ENTRY(spl7) RAISE(13) SET_SIZE(spl7) /* sun specific - highest priority onboard serial i/o asy ports */ ENTRY(splzs) SETPRI(12) /* Can't be a RAISE, as it's used to lower us */ SET_SIZE(splzs) ENTRY(splhi) ALTENTRY(splhigh) ALTENTRY(spl6) ALTENTRY(i_ddi_splhigh) RAISE(DISP_LEVEL) SET_SIZE(i_ddi_splhigh) SET_SIZE(spl6) SET_SIZE(splhigh) SET_SIZE(splhi) /* allow all interrupts */ ENTRY(spl0) SETPRI(0) SET_SIZE(spl0) /* splx implementation */ ENTRY(splx) jmp do_splx /* redirect to common splx code */ SET_SIZE(splx) ENTRY(wait_500ms) pushq %rbx movl $50000, %ebx 1: call tenmicrosec decl %ebx jnz 1b popq %rbx ret SET_SIZE(wait_500ms) #define RESET_METHOD_KBC 1 #define RESET_METHOD_PORT92 2 #define RESET_METHOD_PCI 4 DGDEF3(pc_reset_methods, 4, 8) .long RESET_METHOD_KBC|RESET_METHOD_PORT92|RESET_METHOD_PCI; ENTRY(pc_reset) testl $RESET_METHOD_KBC, pc_reset_methods(%rip) jz 1f / / Try the classic keyboard controller-triggered reset. / movw $0x64, %dx movb $0xfe, %al outb (%dx) / Wait up to 500 milliseconds here for the keyboard controller / to pull the reset line. On some systems where the keyboard / controller is slow to pull the reset line, the next reset method / may be executed (which may be bad if those systems hang when the / next reset method is used, e.g. Ferrari 3400 (doesn't like port 92), / and Ferrari 4000 (doesn't like the cf9 reset method)) call wait_500ms 1: testl $RESET_METHOD_PORT92, pc_reset_methods(%rip) jz 3f / / Try port 0x92 fast reset / movw $0x92, %dx inb (%dx) cmpb $0xff, %al / If port's not there, we should get back 0xFF je 1f testb $1, %al / If bit 0 jz 2f / is clear, jump to perform the reset andb $0xfe, %al / otherwise, outb (%dx) / clear bit 0 first, then 2: orb $1, %al / Set bit 0 outb (%dx) / and reset the system 1: call wait_500ms 3: testl $RESET_METHOD_PCI, pc_reset_methods(%rip) jz 4f / Try the PCI (soft) reset vector (should work on all modern systems, / but has been shown to cause problems on 450NX systems, and some newer / systems (e.g. ATI IXP400-equipped systems)) / When resetting via this method, 2 writes are required. The first / targets bit 1 (0=hard reset without power cycle, 1=hard reset with / power cycle). / The reset occurs on the second write, during bit 2's transition from / 0->1. movw $0xcf9, %dx movb $0x2, %al / Reset mode = hard, no power cycle outb (%dx) movb $0x6, %al outb (%dx) call wait_500ms 4: / / port 0xcf9 failed also. Last-ditch effort is to / triple-fault the CPU. / Also, use triple fault for EFI firmware / ENTRY(efi_reset) pushq $0x0 pushq $0x0 / IDT base of 0, limit of 0 + 2 unused bytes lidt (%rsp) int $0x0 / Trigger interrupt, generate triple-fault cli hlt / Wait forever /*NOTREACHED*/ SET_SIZE(efi_reset) SET_SIZE(pc_reset) /* * C callable in and out routines */ ENTRY(outl) movw %di, %dx movl %esi, %eax outl (%dx) ret SET_SIZE(outl) ENTRY(outw) movw %di, %dx movw %si, %ax D16 outl (%dx) /* XX64 why not outw? */ ret SET_SIZE(outw) ENTRY(outb) movw %di, %dx movb %sil, %al outb (%dx) ret SET_SIZE(outb) ENTRY(inl) xorl %eax, %eax movw %di, %dx inl (%dx) ret SET_SIZE(inl) ENTRY(inw) xorl %eax, %eax movw %di, %dx D16 inl (%dx) ret SET_SIZE(inw) ENTRY(inb) xorl %eax, %eax movw %di, %dx inb (%dx) ret SET_SIZE(inb) /* * void int3(void) * void int18(void) * void int20(void) * void int_cmci(void) */ ENTRY(int3) int $T_BPTFLT ret SET_SIZE(int3) ENTRY(int18) int $T_MCE ret SET_SIZE(int18) ENTRY(int20) movl boothowto, %eax andl $RB_DEBUG, %eax jz 1f int $T_DBGENTR 1: rep; ret /* use 2 byte return instruction when branch target */ /* AMD Software Optimization Guide - Section 6.2 */ SET_SIZE(int20) ENTRY(int_cmci) int $T_ENOEXTFLT ret SET_SIZE(int_cmci) ENTRY(scanc) /* rdi == size */ /* rsi == cp */ /* rdx == table */ /* rcx == mask */ addq %rsi, %rdi /* end = &cp[size] */ .scanloop: cmpq %rdi, %rsi /* while (cp < end */ jnb .scandone movzbq (%rsi), %r8 /* %r8 = *cp */ incq %rsi /* cp++ */ testb %cl, (%r8, %rdx) jz .scanloop /* && (table[*cp] & mask) == 0) */ decq %rsi /* (fix post-increment) */ .scandone: movl %edi, %eax subl %esi, %eax /* return (end - cp) */ ret SET_SIZE(scanc) /* * Replacement functions for ones that are normally inlined. * In addition to the inline copies, they are defined here just in case. */ ENTRY(intr_clear) ENTRY(clear_int_flag) pushfq popq %rax #if defined(__xpv) leaq xpv_panicking, %rdi movl (%rdi), %edi cmpl $0, %edi jne 2f CLIRET(%rdi, %dl) /* returns event mask in %dl */ /* * Synthesize the PS_IE bit from the event mask bit */ andq $_BITNOT(PS_IE), %rax testb $1, %dl jnz 1f orq $PS_IE, %rax 1: ret 2: #endif CLI(%rdi) ret SET_SIZE(clear_int_flag) SET_SIZE(intr_clear) ENTRY(curcpup) movq %gs:CPU_SELF, %rax ret SET_SIZE(curcpup) /* htonll(), ntohll(), htonl(), ntohl(), htons(), ntohs() * These functions reverse the byte order of the input parameter and returns * the result. This is to convert the byte order from host byte order * (little endian) to network byte order (big endian), or vice versa. */ ENTRY(htonll) ALTENTRY(ntohll) movq %rdi, %rax bswapq %rax ret SET_SIZE(ntohll) SET_SIZE(htonll) /* XX64 there must be shorter sequences for this */ ENTRY(htonl) ALTENTRY(ntohl) movl %edi, %eax bswap %eax ret SET_SIZE(ntohl) SET_SIZE(htonl) /* XX64 there must be better sequences for this */ ENTRY(htons) ALTENTRY(ntohs) movl %edi, %eax bswap %eax shrl $16, %eax ret SET_SIZE(ntohs) SET_SIZE(htons) ENTRY(intr_restore) ENTRY(restore_int_flag) testq $PS_IE, %rdi jz 1f #if defined(__xpv) leaq xpv_panicking, %rsi movl (%rsi), %esi cmpl $0, %esi jne 1f /* * Since we're -really- running unprivileged, our attempt * to change the state of the IF bit will be ignored. * The virtual IF bit is tweaked by CLI and STI. */ IE_TO_EVENT_MASK(%rsi, %rdi) #else sti #endif 1: ret SET_SIZE(restore_int_flag) SET_SIZE(intr_restore) ENTRY(sti) STI ret SET_SIZE(sti) ENTRY(cli) CLI(%rax) ret SET_SIZE(cli) ENTRY(dtrace_interrupt_disable) pushfq popq %rax #if defined(__xpv) leaq xpv_panicking, %rdi movl (%rdi), %edi cmpl $0, %edi jne .dtrace_interrupt_disable_done CLIRET(%rdi, %dl) /* returns event mask in %dl */ /* * Synthesize the PS_IE bit from the event mask bit */ andq $_BITNOT(PS_IE), %rax testb $1, %dl jnz .dtrace_interrupt_disable_done orq $PS_IE, %rax #else CLI(%rdx) #endif .dtrace_interrupt_disable_done: ret SET_SIZE(dtrace_interrupt_disable) ENTRY(dtrace_interrupt_enable) pushq %rdi popfq #if defined(__xpv) leaq xpv_panicking, %rdx movl (%rdx), %edx cmpl $0, %edx jne .dtrace_interrupt_enable_done /* * Since we're -really- running unprivileged, our attempt * to change the state of the IF bit will be ignored. The * virtual IF bit is tweaked by CLI and STI. */ IE_TO_EVENT_MASK(%rdx, %rdi) #endif .dtrace_interrupt_enable_done: ret SET_SIZE(dtrace_interrupt_enable) ENTRY(dtrace_membar_producer) rep; ret /* use 2 byte return instruction when branch target */ /* AMD Software Optimization Guide - Section 6.2 */ SET_SIZE(dtrace_membar_producer) ENTRY(dtrace_membar_consumer) rep; ret /* use 2 byte return instruction when branch target */ /* AMD Software Optimization Guide - Section 6.2 */ SET_SIZE(dtrace_membar_consumer) ENTRY(threadp) movq %gs:CPU_THREAD, %rax ret SET_SIZE(threadp) /* * Checksum routine for Internet Protocol Headers */ ENTRY(ip_ocsum) pushq %rbp movq %rsp, %rbp #ifdef DEBUG movq postbootkernelbase(%rip), %rax cmpq %rax, %rdi jnb 1f xorl %eax, %eax movq %rdi, %rsi leaq .ip_ocsum_panic_msg(%rip), %rdi call panic /*NOTREACHED*/ .ip_ocsum_panic_msg: .string "ip_ocsum: address 0x%p below kernelbase\n" 1: #endif movl %esi, %ecx /* halfword_count */ movq %rdi, %rsi /* address */ /* partial sum in %edx */ xorl %eax, %eax testl %ecx, %ecx jz .ip_ocsum_done testq $3, %rsi jnz .ip_csum_notaligned .ip_csum_aligned: /* XX64 opportunities for 8-byte operations? */ .next_iter: /* XX64 opportunities for prefetch? */ /* XX64 compute csum with 64 bit quantities? */ subl $32, %ecx jl .less_than_32 addl 0(%rsi), %edx .only60: adcl 4(%rsi), %eax .only56: adcl 8(%rsi), %edx .only52: adcl 12(%rsi), %eax .only48: adcl 16(%rsi), %edx .only44: adcl 20(%rsi), %eax .only40: adcl 24(%rsi), %edx .only36: adcl 28(%rsi), %eax .only32: adcl 32(%rsi), %edx .only28: adcl 36(%rsi), %eax .only24: adcl 40(%rsi), %edx .only20: adcl 44(%rsi), %eax .only16: adcl 48(%rsi), %edx .only12: adcl 52(%rsi), %eax .only8: adcl 56(%rsi), %edx .only4: adcl 60(%rsi), %eax /* could be adding -1 and -1 with a carry */ .only0: adcl $0, %eax /* could be adding -1 in eax with a carry */ adcl $0, %eax addq $64, %rsi testl %ecx, %ecx jnz .next_iter .ip_ocsum_done: addl %eax, %edx adcl $0, %edx movl %edx, %eax /* form a 16 bit checksum by */ shrl $16, %eax /* adding two halves of 32 bit checksum */ addw %dx, %ax adcw $0, %ax andl $0xffff, %eax leave ret .ip_csum_notaligned: xorl %edi, %edi movw (%rsi), %di addl %edi, %edx adcl $0, %edx addq $2, %rsi decl %ecx jmp .ip_csum_aligned .less_than_32: addl $32, %ecx testl $1, %ecx jz .size_aligned andl $0xfe, %ecx movzwl (%rsi, %rcx, 2), %edi addl %edi, %edx adcl $0, %edx .size_aligned: movl %ecx, %edi shrl $1, %ecx shl $1, %edi subq $64, %rdi addq %rdi, %rsi leaq .ip_ocsum_jmptbl(%rip), %rdi leaq (%rdi, %rcx, 8), %rdi xorl %ecx, %ecx clc movq (%rdi), %rdi INDIRECT_JMP_REG(rdi) .align 8 .ip_ocsum_jmptbl: .quad .only0, .only4, .only8, .only12, .only16, .only20 .quad .only24, .only28, .only32, .only36, .only40, .only44 .quad .only48, .only52, .only56, .only60 SET_SIZE(ip_ocsum) /* * multiply two long numbers and yield a u_longlong_t result, callable from C. * Provided to manipulate hrtime_t values. */ ENTRY(mul32) xorl %edx, %edx /* XX64 joe, paranoia? */ movl %edi, %eax mull %esi shlq $32, %rdx orq %rdx, %rax ret SET_SIZE(mul32) ENTRY(scan_memory) shrq $3, %rsi /* convert %rsi from byte to quadword count */ jz .scanm_done movq %rsi, %rcx /* move count into rep control register */ movq %rdi, %rsi /* move addr into lodsq control reg. */ rep lodsq /* scan the memory range */ .scanm_done: rep; ret /* use 2 byte return instruction when branch target */ /* AMD Software Optimization Guide - Section 6.2 */ SET_SIZE(scan_memory) ENTRY(lowbit) movl $-1, %eax bsfq %rdi, %rdi cmovnz %edi, %eax incl %eax ret SET_SIZE(lowbit) ENTRY(highbit) ALTENTRY(highbit64) movl $-1, %eax bsrq %rdi, %rdi cmovnz %edi, %eax incl %eax ret SET_SIZE(highbit64) SET_SIZE(highbit) #define XMSR_ACCESS_VAL $0x9c5a203a ENTRY(rdmsr) movl %edi, %ecx rdmsr shlq $32, %rdx orq %rdx, %rax ret SET_SIZE(rdmsr) ENTRY(wrmsr) movq %rsi, %rdx shrq $32, %rdx movl %esi, %eax movl %edi, %ecx wrmsr ret SET_SIZE(wrmsr) ENTRY(xrdmsr) pushq %rbp movq %rsp, %rbp movl %edi, %ecx movl XMSR_ACCESS_VAL, %edi /* this value is needed to access MSR */ rdmsr shlq $32, %rdx orq %rdx, %rax leave ret SET_SIZE(xrdmsr) ENTRY(xwrmsr) pushq %rbp movq %rsp, %rbp movl %edi, %ecx movl XMSR_ACCESS_VAL, %edi /* this value is needed to access MSR */ movq %rsi, %rdx shrq $32, %rdx movl %esi, %eax wrmsr leave ret SET_SIZE(xwrmsr) ENTRY(get_xcr) movl %edi, %ecx #xgetbv .byte 0x0f,0x01,0xd0 shlq $32, %rdx orq %rdx, %rax ret SET_SIZE(get_xcr) ENTRY(set_xcr) movq %rsi, %rdx shrq $32, %rdx movl %esi, %eax movl %edi, %ecx #xsetbv .byte 0x0f,0x01,0xd1 ret SET_SIZE(set_xcr) ENTRY(invalidate_cache) wbinvd ret SET_SIZE(invalidate_cache) ENTRY_NP(getcregs) #if defined(__xpv) /* * Only a few of the hardware control registers or descriptor tables * are directly accessible to us, so just zero the structure. * * XXPV Perhaps it would be helpful for the hypervisor to return * virtualized versions of these for post-mortem use. * (Need to reevaluate - perhaps it already does!) */ pushq %rdi /* save *crp */ movq $CREGSZ, %rsi call bzero popq %rdi /* * Dump what limited information we can */ movq %cr0, %rax movq %rax, CREG_CR0(%rdi) /* cr0 */ movq %cr2, %rax movq %rax, CREG_CR2(%rdi) /* cr2 */ movq %cr3, %rax movq %rax, CREG_CR3(%rdi) /* cr3 */ movq %cr4, %rax movq %rax, CREG_CR4(%rdi) /* cr4 */ #else /* __xpv */ #define GETMSR(r, off, d) \ movl $r, %ecx; \ rdmsr; \ movl %eax, off(d); \ movl %edx, off+4(d) xorl %eax, %eax movq %rax, CREG_GDT+8(%rdi) sgdt CREG_GDT(%rdi) /* 10 bytes */ movq %rax, CREG_IDT+8(%rdi) sidt CREG_IDT(%rdi) /* 10 bytes */ movq %rax, CREG_LDT(%rdi) sldt CREG_LDT(%rdi) /* 2 bytes */ movq %rax, CREG_TASKR(%rdi) str CREG_TASKR(%rdi) /* 2 bytes */ movq %cr0, %rax movq %rax, CREG_CR0(%rdi) /* cr0 */ movq %cr2, %rax movq %rax, CREG_CR2(%rdi) /* cr2 */ movq %cr3, %rax movq %rax, CREG_CR3(%rdi) /* cr3 */ movq %cr4, %rax movq %rax, CREG_CR4(%rdi) /* cr4 */ movq %cr8, %rax movq %rax, CREG_CR8(%rdi) /* cr8 */ GETMSR(MSR_AMD_KGSBASE, CREG_KGSBASE, %rdi) GETMSR(MSR_AMD_EFER, CREG_EFER, %rdi) #endif /* __xpv */ ret SET_SIZE(getcregs) #undef GETMSR /* * A panic trigger is a word which is updated atomically and can only be set * once. We atomically store 0xDEFACEDD and load the old value. If the * previous value was 0, we succeed and return 1; otherwise return 0. * This allows a partially corrupt trigger to still trigger correctly. DTrace * has its own version of this function to allow it to panic correctly from * probe context. */ ENTRY_NP(panic_trigger) xorl %eax, %eax movl $0xdefacedd, %edx lock xchgl %edx, (%rdi) cmpl $0, %edx je 0f movl $0, %eax ret 0: movl $1, %eax ret SET_SIZE(panic_trigger) ENTRY_NP(dtrace_panic_trigger) xorl %eax, %eax movl $0xdefacedd, %edx lock xchgl %edx, (%rdi) cmpl $0, %edx je 0f movl $0, %eax ret 0: movl $1, %eax ret SET_SIZE(dtrace_panic_trigger) /* * The panic() and cmn_err() functions invoke vpanic() as a common entry point * into the panic code implemented in panicsys(). vpanic() is responsible * for passing through the format string and arguments, and constructing a * regs structure on the stack into which it saves the current register * values. If we are not dying due to a fatal trap, these registers will * then be preserved in panicbuf as the current processor state. Before * invoking panicsys(), vpanic() activates the first panic trigger (see * common/os/panic.c) and switches to the panic_stack if successful. Note that * DTrace takes a slightly different panic path if it must panic from probe * context. Instead of calling panic, it calls into dtrace_vpanic(), which * sets up the initial stack as vpanic does, calls dtrace_panic_trigger(), and * branches back into vpanic(). */ ENTRY_NP(vpanic) /* Initial stack layout: */ pushq %rbp /* | %rip | 0x60 */ movq %rsp, %rbp /* | %rbp | 0x58 */ pushfq /* | rfl | 0x50 */ pushq %r11 /* | %r11 | 0x48 */ pushq %r10 /* | %r10 | 0x40 */ pushq %rbx /* | %rbx | 0x38 */ pushq %rax /* | %rax | 0x30 */ pushq %r9 /* | %r9 | 0x28 */ pushq %r8 /* | %r8 | 0x20 */ pushq %rcx /* | %rcx | 0x18 */ pushq %rdx /* | %rdx | 0x10 */ pushq %rsi /* | %rsi | 0x8 alist */ pushq %rdi /* | %rdi | 0x0 format */ movq %rsp, %rbx /* %rbx = current %rsp */ leaq panic_quiesce(%rip), %rdi /* %rdi = &panic_quiesce */ call panic_trigger /* %eax = panic_trigger() */ vpanic_common: /* * The panic_trigger result is in %eax from the call above, and * dtrace_panic places it in %eax before branching here. * The rdmsr instructions that follow below will clobber %eax so * we stash the panic_trigger result in %r11d. */ movl %eax, %r11d cmpl $0, %r11d je 0f /* * If panic_trigger() was successful, we are the first to initiate a * panic: we now switch to the reserved panic_stack before continuing. */ leaq panic_stack(%rip), %rsp addq $PANICSTKSIZE, %rsp 0: subq $REGSIZE, %rsp /* * Now that we've got everything set up, store the register values as * they were when we entered vpanic() to the designated location in * the regs structure we allocated on the stack. */ movq 0x0(%rbx), %rcx movq %rcx, REGOFF_RDI(%rsp) movq 0x8(%rbx), %rcx movq %rcx, REGOFF_RSI(%rsp) movq 0x10(%rbx), %rcx movq %rcx, REGOFF_RDX(%rsp) movq 0x18(%rbx), %rcx movq %rcx, REGOFF_RCX(%rsp) movq 0x20(%rbx), %rcx movq %rcx, REGOFF_R8(%rsp) movq 0x28(%rbx), %rcx movq %rcx, REGOFF_R9(%rsp) movq 0x30(%rbx), %rcx movq %rcx, REGOFF_RAX(%rsp) movq 0x38(%rbx), %rcx movq %rcx, REGOFF_RBX(%rsp) movq 0x58(%rbx), %rcx movq %rcx, REGOFF_RBP(%rsp) movq 0x40(%rbx), %rcx movq %rcx, REGOFF_R10(%rsp) movq 0x48(%rbx), %rcx movq %rcx, REGOFF_R11(%rsp) movq %r12, REGOFF_R12(%rsp) movq %r13, REGOFF_R13(%rsp) movq %r14, REGOFF_R14(%rsp) movq %r15, REGOFF_R15(%rsp) xorl %ecx, %ecx movw %ds, %cx movq %rcx, REGOFF_DS(%rsp) movw %es, %cx movq %rcx, REGOFF_ES(%rsp) movw %fs, %cx movq %rcx, REGOFF_FS(%rsp) movw %gs, %cx movq %rcx, REGOFF_GS(%rsp) movq $0, REGOFF_TRAPNO(%rsp) movq $0, REGOFF_ERR(%rsp) leaq vpanic(%rip), %rcx movq %rcx, REGOFF_RIP(%rsp) movw %cs, %cx movzwq %cx, %rcx movq %rcx, REGOFF_CS(%rsp) movq 0x50(%rbx), %rcx movq %rcx, REGOFF_RFL(%rsp) movq %rbx, %rcx addq $0x60, %rcx movq %rcx, REGOFF_RSP(%rsp) movw %ss, %cx movzwq %cx, %rcx movq %rcx, REGOFF_SS(%rsp) /* * panicsys(format, alist, rp, on_panic_stack) */ movq REGOFF_RDI(%rsp), %rdi /* format */ movq REGOFF_RSI(%rsp), %rsi /* alist */ movq %rsp, %rdx /* struct regs */ movl %r11d, %ecx /* on_panic_stack */ call panicsys addq $REGSIZE, %rsp popq %rdi popq %rsi popq %rdx popq %rcx popq %r8 popq %r9 popq %rax popq %rbx popq %r10 popq %r11 popfq leave ret SET_SIZE(vpanic) ENTRY_NP(dtrace_vpanic) /* Initial stack layout: */ pushq %rbp /* | %rip | 0x60 */ movq %rsp, %rbp /* | %rbp | 0x58 */ pushfq /* | rfl | 0x50 */ pushq %r11 /* | %r11 | 0x48 */ pushq %r10 /* | %r10 | 0x40 */ pushq %rbx /* | %rbx | 0x38 */ pushq %rax /* | %rax | 0x30 */ pushq %r9 /* | %r9 | 0x28 */ pushq %r8 /* | %r8 | 0x20 */ pushq %rcx /* | %rcx | 0x18 */ pushq %rdx /* | %rdx | 0x10 */ pushq %rsi /* | %rsi | 0x8 alist */ pushq %rdi /* | %rdi | 0x0 format */ movq %rsp, %rbx /* %rbx = current %rsp */ leaq panic_quiesce(%rip), %rdi /* %rdi = &panic_quiesce */ call dtrace_panic_trigger /* %eax = dtrace_panic_trigger() */ jmp vpanic_common SET_SIZE(dtrace_vpanic) DGDEF3(timedelta, 8, 8) .long 0, 0 /* * initialized to a non zero value to make pc_gethrtime() * work correctly even before clock is initialized */ DGDEF3(hrtime_base, 8, 8) .long _MUL(NSEC_PER_CLOCK_TICK, 6), 0 DGDEF3(adj_shift, 4, 4) .long ADJ_SHIFT ENTRY_NP(hres_tick) pushq %rbp movq %rsp, %rbp /* * We need to call *gethrtimef before picking up CLOCK_LOCK (obviously, * hres_last_tick can only be modified while holding CLOCK_LOCK). * At worst, performing this now instead of under CLOCK_LOCK may * introduce some jitter in pc_gethrestime(). */ movq gethrtimef(%rip), %rsi INDIRECT_CALL_REG(rsi) movq %rax, %r8 leaq hres_lock(%rip), %rax movb $-1, %dl .CL1: xchgb %dl, (%rax) testb %dl, %dl jz .CL3 /* got it */ .CL2: cmpb $0, (%rax) /* possible to get lock? */ pause jne .CL2 jmp .CL1 /* yes, try again */ .CL3: /* * compute the interval since last time hres_tick was called * and adjust hrtime_base and hrestime accordingly * hrtime_base is an 8 byte value (in nsec), hrestime is * a timestruc_t (sec, nsec) */ leaq hres_last_tick(%rip), %rax movq %r8, %r11 subq (%rax), %r8 addq %r8, hrtime_base(%rip) /* add interval to hrtime_base */ addq %r8, hrestime+8(%rip) /* add interval to hrestime.tv_nsec */ /* * Now that we have CLOCK_LOCK, we can update hres_last_tick */ movq %r11, (%rax) call __adj_hrestime /* * release the hres_lock */ incl hres_lock(%rip) leave ret SET_SIZE(hres_tick) /* * void prefetch_smap_w(void *) * * Prefetch ahead within a linear list of smap structures. * Not implemented for ia32. Stub for compatibility. */ ENTRY(prefetch_smap_w) rep; ret /* use 2 byte return instruction when branch target */ /* AMD Software Optimization Guide - Section 6.2 */ SET_SIZE(prefetch_smap_w) /* * prefetch_page_r(page_t *) * issue prefetch instructions for a page_t */ ENTRY(prefetch_page_r) rep; ret /* use 2 byte return instruction when branch target */ /* AMD Software Optimization Guide - Section 6.2 */ SET_SIZE(prefetch_page_r) ENTRY(bcmp) pushq %rbp movq %rsp, %rbp #ifdef DEBUG testq %rdx,%rdx je 1f movq postbootkernelbase(%rip), %r11 cmpq %r11, %rdi jb 0f cmpq %r11, %rsi jnb 1f 0: leaq .bcmp_panic_msg(%rip), %rdi xorl %eax, %eax call panic 1: #endif /* DEBUG */ call memcmp testl %eax, %eax setne %dl leave movzbl %dl, %eax ret SET_SIZE(bcmp) #ifdef DEBUG .text .bcmp_panic_msg: .string "bcmp: arguments below kernelbase" #endif /* DEBUG */ ENTRY_NP(bsrw_insn) xorl %eax, %eax bsrw %di, %ax ret SET_SIZE(bsrw_insn) ENTRY_NP(switch_sp_and_call) pushq %rbp movq %rsp, %rbp /* set up stack frame */ movq %rdi, %rsp /* switch stack pointer */ movq %rdx, %rdi /* pass func arg 1 */ movq %rsi, %r11 /* save function to call */ movq %rcx, %rsi /* pass func arg 2 */ INDIRECT_CALL_REG(r11) /* call function */ leave /* restore stack */ ret SET_SIZE(switch_sp_and_call) ENTRY_NP(kmdb_enter) pushq %rbp movq %rsp, %rbp /* * Save flags, do a 'cli' then return the saved flags */ call intr_clear int $T_DBGENTR /* * Restore the saved flags */ movq %rax, %rdi call intr_restore leave ret SET_SIZE(kmdb_enter) ENTRY_NP(return_instr) rep; ret /* use 2 byte instruction when branch target */ /* AMD Software Optimization Guide - Section 6.2 */ SET_SIZE(return_instr) ENTRY(getflags) pushfq popq %rax #if defined(__xpv) CURTHREAD(%rdi) KPREEMPT_DISABLE(%rdi) /* * Synthesize the PS_IE bit from the event mask bit */ CURVCPU(%r11) andq $_BITNOT(PS_IE), %rax XEN_TEST_UPCALL_MASK(%r11) jnz 1f orq $PS_IE, %rax 1: KPREEMPT_ENABLE_NOKP(%rdi) #endif ret SET_SIZE(getflags) ENTRY(ftrace_interrupt_disable) pushfq popq %rax CLI(%rdx) ret SET_SIZE(ftrace_interrupt_disable) ENTRY(ftrace_interrupt_enable) pushq %rdi popfq ret SET_SIZE(ftrace_interrupt_enable) ENTRY(clflush_insn) clflush (%rdi) ret SET_SIZE(clflush_insn) ENTRY(mfence_insn) mfence ret SET_SIZE(mfence_insn) /* * VMware implements an I/O port that programs can query to detect if software * is running in a VMware hypervisor. This hypervisor port behaves differently * depending on magic values in certain registers and modifies some registers * as a side effect. * * References: http://kb.vmware.com/kb/1009458 */ ENTRY(vmware_port) pushq %rbx movl $VMWARE_HVMAGIC, %eax movl $0xffffffff, %ebx movl %edi, %ecx movl $VMWARE_HVPORT, %edx inl (%dx) movl %eax, (%rsi) movl %ebx, 4(%rsi) movl %ecx, 8(%rsi) movl %edx, 12(%rsi) popq %rbx ret SET_SIZE(vmware_port)