xref: /titanic_41/usr/src/uts/intel/ia32/ml/lock_prim.s (revision 25351652d920ae27c5a56c199da581033ce763f6)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28#if defined(lint) || defined(__lint)
29#include <sys/types.h>
30#include <sys/thread.h>
31#include <sys/cpuvar.h>
32#include <vm/page.h>
33#else	/* __lint */
34#include "assym.h"
35#endif	/* __lint */
36
37#include <sys/mutex_impl.h>
38#include <sys/asm_linkage.h>
39#include <sys/asm_misc.h>
40#include <sys/regset.h>
41#include <sys/rwlock_impl.h>
42#include <sys/lockstat.h>
43
44/*
45 * lock_try(lp), ulock_try(lp)
46 *	- returns non-zero on success.
47 *	- doesn't block interrupts so don't use this to spin on a lock.
48 *
49 * ulock_try() is for a lock in the user address space.
50 */
51
52#if defined(lint) || defined(__lint)
53
54/* ARGSUSED */
55int
56lock_try(lock_t *lp)
57{ return (0); }
58
59/* ARGSUSED */
60int
61lock_spin_try(lock_t *lp)
62{ return (0); }
63
64/* ARGSUSED */
65int
66ulock_try(lock_t *lp)
67{ return (0); }
68
69#else	/* __lint */
70	.globl	kernelbase
71
72#if defined(__amd64)
73
74	ENTRY(lock_try)
75	movb	$-1, %dl
76	movzbq	%dl, %rax
77	xchgb	%dl, (%rdi)
78	xorb	%dl, %al
79.lock_try_lockstat_patch_point:
80	ret
81	testb	%al, %al
82	jnz	0f
83	ret
840:
85	movq	%gs:CPU_THREAD, %rdx	/* rdx = thread addr */
86	movq	%rdi, %rsi		/* rsi = lock addr */
87	movl	$LS_LOCK_TRY_ACQUIRE, %edi /* edi = event */
88	jmp	lockstat_wrapper
89	SET_SIZE(lock_try)
90
91	ENTRY(lock_spin_try)
92	movb	$-1, %dl
93	movzbq	%dl, %rax
94	xchgb	%dl, (%rdi)
95	xorb	%dl, %al
96	ret
97	SET_SIZE(lock_spin_try)
98
99	ENTRY(ulock_try)
100#ifdef DEBUG
101	movq	kernelbase(%rip), %rax
102	cmpq	%rax, %rdi		/* test uaddr < kernelbase */
103	jb	ulock_pass		/*	uaddr < kernelbase, proceed */
104
105	movq	%rdi, %r12		/* preserve lock ptr for debugging */
106	leaq	.ulock_panic_msg(%rip), %rdi
107	pushq	%rbp			/* align stack properly */
108	movq	%rsp, %rbp
109	xorl	%eax, %eax		/* clear for varargs */
110	call	panic
111
112#endif /* DEBUG */
113
114ulock_pass:
115	movl	$1, %eax
116	xchgb	%al, (%rdi)
117	xorb	$1, %al
118	ret
119	SET_SIZE(ulock_try)
120
121#else
122
123	ENTRY(lock_try)
124	movl	$1,%edx
125	movl	4(%esp),%ecx		/* ecx = lock addr */
126	xorl	%eax,%eax
127	xchgb	%dl, (%ecx)		/* using dl will avoid partial */
128	testb	%dl,%dl			/* stalls on P6 ? */
129	setz	%al
130.lock_try_lockstat_patch_point:
131	ret
132	movl	%gs:CPU_THREAD, %edx	/* edx = thread addr */
133	testl	%eax, %eax
134	jz	0f
135	movl	$LS_LOCK_TRY_ACQUIRE, %eax
136	jmp	lockstat_wrapper
1370:
138	ret
139	SET_SIZE(lock_try)
140
141	ENTRY(lock_spin_try)
142	movl	$-1,%edx
143	movl	4(%esp),%ecx		/* ecx = lock addr */
144	xorl	%eax,%eax
145	xchgb	%dl, (%ecx)		/* using dl will avoid partial */
146	testb	%dl,%dl			/* stalls on P6 ? */
147	setz	%al
148	ret
149	SET_SIZE(lock_spin_try)
150
151	ENTRY(ulock_try)
152#ifdef DEBUG
153	movl	kernelbase, %eax
154	cmpl	%eax, 4(%esp)		/* test uaddr < kernelbase */
155	jb	ulock_pass		/* uaddr < kernelbase, proceed */
156
157	pushl	$.ulock_panic_msg
158	call	panic
159
160#endif /* DEBUG */
161
162ulock_pass:
163	movl	$1,%eax
164	movl	4(%esp),%ecx
165	xchgb	%al, (%ecx)
166	xorb	$1, %al
167	ret
168	SET_SIZE(ulock_try)
169
170#endif	/* !__amd64 */
171
172#ifdef DEBUG
173	.data
174.ulock_panic_msg:
175	.string "ulock_try: Argument is above kernelbase"
176	.text
177#endif	/* DEBUG */
178
179#endif	/* __lint */
180
181/*
182 * lock_clear(lp)
183 *	- unlock lock without changing interrupt priority level.
184 */
185
186#if defined(lint) || defined(__lint)
187
188/* ARGSUSED */
189void
190lock_clear(lock_t *lp)
191{}
192
193/* ARGSUSED */
194void
195ulock_clear(lock_t *lp)
196{}
197
198#else	/* __lint */
199
200#if defined(__amd64)
201
202	ENTRY(lock_clear)
203	movb	$0, (%rdi)
204.lock_clear_lockstat_patch_point:
205	ret
206	movq	%rdi, %rsi			/* rsi = lock addr */
207	movq	%gs:CPU_THREAD, %rdx		/* rdx = thread addr */
208	movl	$LS_LOCK_CLEAR_RELEASE, %edi	/* edi = event */
209	jmp	lockstat_wrapper
210	SET_SIZE(lock_clear)
211
212	ENTRY(ulock_clear)
213#ifdef DEBUG
214	movq	kernelbase(%rip), %rcx
215	cmpq	%rcx, %rdi		/* test uaddr < kernelbase */
216	jb	ulock_clr		/*	 uaddr < kernelbase, proceed */
217
218	leaq	.ulock_clear_msg(%rip), %rdi
219	pushq	%rbp			/* align stack properly */
220	movq	%rsp, %rbp
221	xorl	%eax, %eax		/* clear for varargs */
222	call	panic
223#endif
224
225ulock_clr:
226	movb	$0, (%rdi)
227	ret
228	SET_SIZE(ulock_clear)
229
230#else
231
232	ENTRY(lock_clear)
233	movl	4(%esp), %eax
234	movb	$0, (%eax)
235.lock_clear_lockstat_patch_point:
236	ret
237	movl	%gs:CPU_THREAD, %edx		/* edx = thread addr */
238	movl	%eax, %ecx			/* ecx = lock pointer */
239	movl	$LS_LOCK_CLEAR_RELEASE, %eax
240	jmp	lockstat_wrapper
241	SET_SIZE(lock_clear)
242
243	ENTRY(ulock_clear)
244#ifdef DEBUG
245	movl	kernelbase, %ecx
246	cmpl	%ecx, 4(%esp)		/* test uaddr < kernelbase */
247	jb	ulock_clr		/* uaddr < kernelbase, proceed */
248
249	pushl	$.ulock_clear_msg
250	call	panic
251#endif
252
253ulock_clr:
254	movl	4(%esp),%eax
255	xorl	%ecx,%ecx
256	movb	%cl, (%eax)
257	ret
258	SET_SIZE(ulock_clear)
259
260#endif	/* !__amd64 */
261
262#ifdef DEBUG
263	.data
264.ulock_clear_msg:
265	.string "ulock_clear: Argument is above kernelbase"
266	.text
267#endif	/* DEBUG */
268
269
270#endif	/* __lint */
271
272/*
273 * lock_set_spl(lock_t *lp, int new_pil, u_short *old_pil)
274 * Drops lp, sets pil to new_pil, stores old pil in *old_pil.
275 */
276
277#if defined(lint) || defined(__lint)
278
279/* ARGSUSED */
280void
281lock_set_spl(lock_t *lp, int new_pil, u_short *old_pil)
282{}
283
284#else	/* __lint */
285
286#if defined(__amd64)
287
288	ENTRY(lock_set_spl)
289	pushq	%rbp
290	movq	%rsp, %rbp
291	subq	$32, %rsp
292	movl	%esi, 8(%rsp)		/* save priority level */
293	movq	%rdx, 16(%rsp)		/* save old pil ptr */
294	movq	%rdi, 24(%rsp)		/* save lock pointer */
295	movl	%esi, %edi		/* pass priority level */
296	call	splr			/* raise priority level */
297	movq	24(%rsp), %rdi		/* rdi = lock addr */
298	movb	$-1, %dl
299	xchgb	%dl, (%rdi)		/* try to set lock */
300	testb	%dl, %dl		/* did we get the lock? ... */
301	jnz	.lss_miss		/* ... no, go to C for the hard case */
302	movq	16(%rsp), %rdx		/* rdx = old pil addr */
303	movw	%ax, (%rdx)		/* store old pil */
304	leave
305.lock_set_spl_lockstat_patch_point:
306	ret
307	movq	%rdi, %rsi		/* rsi = lock addr */
308	movq	%gs:CPU_THREAD, %rdx	/* rdx = thread addr */
309	movl	$LS_LOCK_SET_SPL_ACQUIRE, %edi
310	jmp	lockstat_wrapper
311.lss_miss:
312	movl	8(%rsp), %esi		/* new_pil */
313	movq	16(%rsp), %rdx		/* old_pil_addr */
314	movl	%eax, %ecx		/* original pil */
315	leave				/* unwind stack */
316	jmp	lock_set_spl_spin
317	SET_SIZE(lock_set_spl)
318
319#else
320
321	ENTRY(lock_set_spl)
322	movl	8(%esp), %eax		/* get priority level */
323	pushl	%eax
324	call	splr			/* raise priority level */
325	movl 	8(%esp), %ecx		/* ecx = lock addr */
326	movl	$-1, %edx
327	addl	$4, %esp
328	xchgb	%dl, (%ecx)		/* try to set lock */
329	testb	%dl, %dl		/* did we get the lock? ... */
330	movl	12(%esp), %edx		/* edx = olp pil addr (ZF unaffected) */
331	jnz	.lss_miss		/* ... no, go to C for the hard case */
332	movw	%ax, (%edx)		/* store old pil */
333.lock_set_spl_lockstat_patch_point:
334	ret
335	movl	%gs:CPU_THREAD, %edx	/* edx = thread addr*/
336	movl	$LS_LOCK_SET_SPL_ACQUIRE, %eax
337	jmp	lockstat_wrapper
338.lss_miss:
339	pushl	%eax			/* original pil */
340	pushl	%edx			/* old_pil addr */
341	pushl	16(%esp)		/* new_pil */
342	pushl	%ecx			/* lock addr */
343	call	lock_set_spl_spin
344	addl	$16, %esp
345	ret
346	SET_SIZE(lock_set_spl)
347
348#endif	/* !__amd64 */
349
350#endif	/* __lint */
351
352/*
353 * void
354 * lock_init(lp)
355 */
356
357#if defined(__lint)
358
359/* ARGSUSED */
360void
361lock_init(lock_t *lp)
362{}
363
364#else	/* __lint */
365
366#if defined(__amd64)
367
368	ENTRY(lock_init)
369	movb	$0, (%rdi)
370	ret
371	SET_SIZE(lock_init)
372
373#else
374
375	ENTRY(lock_init)
376	movl	4(%esp), %eax
377	movb	$0, (%eax)
378	ret
379	SET_SIZE(lock_init)
380
381#endif	/* !__amd64 */
382
383#endif	/* __lint */
384
385/*
386 * void
387 * lock_set(lp)
388 */
389
390#if defined(lint) || defined(__lint)
391
392/* ARGSUSED */
393void
394lock_set(lock_t *lp)
395{}
396
397#else	/* __lint */
398
399#if defined(__amd64)
400
401	ENTRY(lock_set)
402	movb	$-1, %dl
403	xchgb	%dl, (%rdi)		/* try to set lock */
404	testb	%dl, %dl		/* did we get it? */
405	jnz	lock_set_spin		/* no, go to C for the hard case */
406.lock_set_lockstat_patch_point:
407	ret
408	movq	%rdi, %rsi		/* rsi = lock addr */
409	movq	%gs:CPU_THREAD, %rdx	/* rdx = thread addr */
410	movl	$LS_LOCK_SET_ACQUIRE, %edi
411	jmp	lockstat_wrapper
412	SET_SIZE(lock_set)
413
414#else
415
416	ENTRY(lock_set)
417	movl	4(%esp), %ecx		/* ecx = lock addr */
418	movl	$-1, %edx
419	xchgb	%dl, (%ecx)		/* try to set lock */
420	testb	%dl, %dl		/* did we get it? */
421	jnz	lock_set_spin		/* no, go to C for the hard case */
422.lock_set_lockstat_patch_point:
423	ret
424	movl	%gs:CPU_THREAD, %edx	/* edx = thread addr */
425	movl	$LS_LOCK_SET_ACQUIRE, %eax
426	jmp	lockstat_wrapper
427	SET_SIZE(lock_set)
428
429#endif	/* !__amd64 */
430
431#endif	/* __lint */
432
433/*
434 * lock_clear_splx(lp, s)
435 */
436
437#if defined(lint) || defined(__lint)
438
439/* ARGSUSED */
440void
441lock_clear_splx(lock_t *lp, int s)
442{}
443
444#else	/* __lint */
445
446#if defined(__amd64)
447
448	ENTRY(lock_clear_splx)
449	movb	$0, (%rdi)		/* clear lock */
450.lock_clear_splx_lockstat_patch_point:
451	jmp	0f
4520:
453	movl	%esi, %edi		/* arg for splx */
454	jmp	splx			/* let splx do its thing */
455.lock_clear_splx_lockstat:
456	pushq	%rbp			/* align stack properly */
457	movq	%rsp, %rbp
458	subq	$16, %rsp		/* space to save args across splx */
459	movq	%rdi, 8(%rsp)		/* save lock ptr across splx call */
460	movl	%esi, %edi		/* arg for splx */
461	call	splx			/* lower the priority */
462	movq	8(%rsp), %rsi		/* rsi = lock ptr */
463	leave				/* unwind stack */
464	movq	%gs:CPU_THREAD, %rdx	/* rdx = thread addr */
465	movl	$LS_LOCK_CLEAR_SPLX_RELEASE, %edi
466	jmp	lockstat_wrapper
467	SET_SIZE(lock_clear_splx)
468
469#else
470
471	ENTRY(lock_clear_splx)
472	movl	4(%esp), %eax		/* eax = lock addr */
473	movb	$0, (%eax)		/* clear lock */
474.lock_clear_splx_lockstat_patch_point:
475	jmp	0f
4760:
477	movl	8(%esp), %edx		/* edx = desired pil */
478	movl	%edx, 4(%esp)		/* set spl arg up for splx */
479	jmp	splx			/* let splx do it's thing */
480.lock_clear_splx_lockstat:
481	movl	8(%esp), %edx		/* edx = desired pil */
482	pushl	%ebp			/* set up stack frame */
483	movl	%esp, %ebp
484	pushl	%edx
485	call	splx
486	leave				/* unwind stack */
487	movl	4(%esp), %ecx		/* ecx = lock pointer */
488	movl	%gs:CPU_THREAD, %edx	/* edx = thread addr */
489	movl	$LS_LOCK_CLEAR_SPLX_RELEASE, %eax
490	jmp	lockstat_wrapper
491	SET_SIZE(lock_clear_splx)
492
493#endif	/* !__amd64 */
494
495#if defined(__GNUC_AS__)
496#define	LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_VAL	\
497	(.lock_clear_splx_lockstat - .lock_clear_splx_lockstat_patch_point - 2)
498
499#define LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_POINT	\
500	(.lock_clear_splx_lockstat_patch_point + 1)
501#else
502#define	LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_VAL	\
503	[.lock_clear_splx_lockstat - .lock_clear_splx_lockstat_patch_point - 2]
504
505#define LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_POINT	\
506	[.lock_clear_splx_lockstat_patch_point + 1]
507#endif
508
509#endif	/* __lint */
510
511/*
512 * mutex_enter() and mutex_exit().
513 *
514 * These routines handle the simple cases of mutex_enter() (adaptive
515 * lock, not held) and mutex_exit() (adaptive lock, held, no waiters).
516 * If anything complicated is going on we punt to mutex_vector_enter().
517 *
518 * mutex_tryenter() is similar to mutex_enter() but returns zero if
519 * the lock cannot be acquired, nonzero on success.
520 *
521 * If mutex_exit() gets preempted in the window between checking waiters
522 * and clearing the lock, we can miss wakeups.  Disabling preemption
523 * in the mutex code is prohibitively expensive, so instead we detect
524 * mutex preemption by examining the trapped PC in the interrupt path.
525 * If we interrupt a thread in mutex_exit() that has not yet cleared
526 * the lock, cmnint() resets its PC back to the beginning of
527 * mutex_exit() so it will check again for waiters when it resumes.
528 *
529 * The lockstat code below is activated when the lockstat driver
530 * calls lockstat_hot_patch() to hot-patch the kernel mutex code.
531 * Note that we don't need to test lockstat_event_mask here -- we won't
532 * patch this code in unless we're gathering ADAPTIVE_HOLD lockstats.
533 */
534#if defined(lint) || defined(__lint)
535
536/* ARGSUSED */
537void
538mutex_enter(kmutex_t *lp)
539{}
540
541/* ARGSUSED */
542int
543mutex_tryenter(kmutex_t *lp)
544{ return (0); }
545
546/* ARGSUSED */
547int
548mutex_adaptive_tryenter(mutex_impl_t *lp)
549{ return (0); }
550
551/* ARGSUSED */
552void
553mutex_exit(kmutex_t *lp)
554{}
555
556#else
557
558#if defined(__amd64)
559
560	ENTRY_NP(mutex_enter)
561	movq	%gs:CPU_THREAD, %rdx		/* rdx = thread ptr */
562	xorl	%eax, %eax			/* rax = 0 (unheld adaptive) */
563	lock
564	cmpxchgq %rdx, (%rdi)
565	jnz	mutex_vector_enter
566.mutex_enter_lockstat_patch_point:
567#if defined(OPTERON_WORKAROUND_6323525)
568.mutex_enter_6323525_patch_point:
569	ret					/* nop space for lfence */
570	nop
571	nop
572.mutex_enter_lockstat_6323525_patch_point:	/* new patch point if lfence */
573	nop
574#else	/* OPTERON_WORKAROUND_6323525 */
575	ret
576#endif	/* OPTERON_WORKAROUND_6323525 */
577	movq	%rdi, %rsi
578	movl	$LS_MUTEX_ENTER_ACQUIRE, %edi
579/*
580 * expects %rdx=thread, %rsi=lock, %edi=lockstat event
581 */
582	ALTENTRY(lockstat_wrapper)
583	incb	T_LOCKSTAT(%rdx)		/* curthread->t_lockstat++ */
584	leaq	lockstat_probemap(%rip), %rax
585	movl	(%rax, %rdi, DTRACE_IDSIZE), %eax
586	testl	%eax, %eax			/* check for non-zero probe */
587	jz	1f
588	pushq	%rbp				/* align stack properly */
589	movq	%rsp, %rbp
590	movl	%eax, %edi
591	call	*lockstat_probe
592	leave					/* unwind stack */
5931:
594	movq	%gs:CPU_THREAD, %rdx		/* reload thread ptr */
595	decb	T_LOCKSTAT(%rdx)		/* curthread->t_lockstat-- */
596	movl	$1, %eax			/* return success if tryenter */
597	ret
598	SET_SIZE(lockstat_wrapper)
599	SET_SIZE(mutex_enter)
600
601/*
602 * expects %rcx=thread, %rdx=arg, %rsi=lock, %edi=lockstat event
603 */
604	ENTRY(lockstat_wrapper_arg)
605	incb	T_LOCKSTAT(%rcx)		/* curthread->t_lockstat++ */
606	leaq	lockstat_probemap(%rip), %rax
607	movl	(%rax, %rdi, DTRACE_IDSIZE), %eax
608	testl	%eax, %eax			/* check for non-zero probe */
609	jz	1f
610	pushq	%rbp				/* align stack properly */
611	movq	%rsp, %rbp
612	movl	%eax, %edi
613	call	*lockstat_probe
614	leave					/* unwind stack */
6151:
616	movq	%gs:CPU_THREAD, %rdx		/* reload thread ptr */
617	decb	T_LOCKSTAT(%rdx)		/* curthread->t_lockstat-- */
618	movl	$1, %eax			/* return success if tryenter */
619	ret
620	SET_SIZE(lockstat_wrapper_arg)
621
622
623	ENTRY(mutex_tryenter)
624	movq	%gs:CPU_THREAD, %rdx		/* rdx = thread ptr */
625	xorl	%eax, %eax			/* rax = 0 (unheld adaptive) */
626	lock
627	cmpxchgq %rdx, (%rdi)
628	jnz	mutex_vector_tryenter
629	not	%eax				/* return success (nonzero) */
630#if defined(OPTERON_WORKAROUND_6323525)
631.mutex_tryenter_lockstat_patch_point:
632.mutex_tryenter_6323525_patch_point:
633	ret					/* nop space for lfence */
634	nop
635	nop
636.mutex_tryenter_lockstat_6323525_patch_point:	/* new patch point if lfence */
637	nop
638#else	/* OPTERON_WORKAROUND_6323525 */
639.mutex_tryenter_lockstat_patch_point:
640	ret
641#endif	/* OPTERON_WORKAROUND_6323525 */
642	movq	%rdi, %rsi
643	movl	$LS_MUTEX_ENTER_ACQUIRE, %edi
644	jmp	lockstat_wrapper
645	SET_SIZE(mutex_tryenter)
646
647	ENTRY(mutex_adaptive_tryenter)
648	movq	%gs:CPU_THREAD, %rdx		/* rdx = thread ptr */
649	xorl	%eax, %eax			/* rax = 0 (unheld adaptive) */
650	lock
651	cmpxchgq %rdx, (%rdi)
652	jnz	0f
653	not	%eax				/* return success (nonzero) */
654#if defined(OPTERON_WORKAROUND_6323525)
655.mutex_atryenter_6323525_patch_point:
656	ret					/* nop space for lfence */
657	nop
658	nop
659	nop
660#else	/* OPTERON_WORKAROUND_6323525 */
661	ret
662#endif	/* OPTERON_WORKAROUND_6323525 */
6630:
664	xorl	%eax, %eax			/* return failure */
665	ret
666	SET_SIZE(mutex_adaptive_tryenter)
667
668	.globl	mutex_owner_running_critical_start
669
670	ENTRY(mutex_owner_running)
671mutex_owner_running_critical_start:
672	movq	(%rdi), %r11		/* get owner field */
673	andq	$MUTEX_THREAD, %r11	/* remove waiters bit */
674	cmpq	$0, %r11		/* if free, skip */
675	je	1f			/* go return 0 */
676	movq	T_CPU(%r11), %r8	/* get owner->t_cpu */
677	movq	CPU_THREAD(%r8), %r9	/* get t_cpu->cpu_thread */
678.mutex_owner_running_critical_end:
679	cmpq	%r11, %r9	/* owner == running thread? */
680	je	2f		/* yes, go return cpu */
6811:
682	xorq	%rax, %rax	/* return 0 */
683	ret
6842:
685	movq	%r8, %rax		/* return cpu */
686	ret
687	SET_SIZE(mutex_owner_running)
688
689	.globl	mutex_owner_running_critical_size
690	.type	mutex_owner_running_critical_size, @object
691	.align	CPTRSIZE
692mutex_owner_running_critical_size:
693	.quad	.mutex_owner_running_critical_end - mutex_owner_running_critical_start
694	SET_SIZE(mutex_owner_running_critical_size)
695
696	.globl	mutex_exit_critical_start
697
698	ENTRY(mutex_exit)
699mutex_exit_critical_start:		/* If interrupted, restart here */
700	movq	%gs:CPU_THREAD, %rdx
701	cmpq	%rdx, (%rdi)
702	jne	mutex_vector_exit		/* wrong type or wrong owner */
703	movq	$0, (%rdi)			/* clear owner AND lock */
704.mutex_exit_critical_end:
705.mutex_exit_lockstat_patch_point:
706	ret
707	movq	%rdi, %rsi
708	movl	$LS_MUTEX_EXIT_RELEASE, %edi
709	jmp	lockstat_wrapper
710	SET_SIZE(mutex_exit)
711
712	.globl	mutex_exit_critical_size
713	.type	mutex_exit_critical_size, @object
714	.align	CPTRSIZE
715mutex_exit_critical_size:
716	.quad	.mutex_exit_critical_end - mutex_exit_critical_start
717	SET_SIZE(mutex_exit_critical_size)
718
719#else
720
721	ENTRY_NP(mutex_enter)
722	movl	%gs:CPU_THREAD, %edx		/* edx = thread ptr */
723	movl	4(%esp), %ecx			/* ecx = lock ptr */
724	xorl	%eax, %eax			/* eax = 0 (unheld adaptive) */
725	lock
726	cmpxchgl %edx, (%ecx)
727	jnz	mutex_vector_enter
728#if defined(OPTERON_WORKAROUND_6323525)
729.mutex_enter_lockstat_patch_point:
730.mutex_enter_6323525_patch_point:
731	ret					/* nop space for lfence */
732	nop
733	nop
734.mutex_enter_lockstat_6323525_patch_point:	/* new patch point if lfence */
735	nop
736#else	/* OPTERON_WORKAROUND_6323525 */
737.mutex_enter_lockstat_patch_point:
738	ret
739#endif	/* OPTERON_WORKAROUND_6323525 */
740	movl	$LS_MUTEX_ENTER_ACQUIRE, %eax
741	ALTENTRY(lockstat_wrapper)	/* expects edx=thread, ecx=lock, */
742					/*   eax=lockstat event */
743	pushl	%ebp				/* buy a frame */
744	movl	%esp, %ebp
745	incb	T_LOCKSTAT(%edx)		/* curthread->t_lockstat++ */
746	pushl	%edx				/* save thread pointer	 */
747	movl	$lockstat_probemap, %edx
748	movl	(%edx, %eax, DTRACE_IDSIZE), %eax
749	testl	%eax, %eax			/* check for non-zero probe */
750	jz	1f
751	pushl	%ecx				/* push lock */
752	pushl	%eax				/* push probe ID */
753	call	*lockstat_probe
754	addl	$8, %esp
7551:
756	popl	%edx				/* restore thread pointer */
757	decb	T_LOCKSTAT(%edx)		/* curthread->t_lockstat-- */
758	movl	$1, %eax			/* return success if tryenter */
759	popl	%ebp				/* pop off frame */
760	ret
761	SET_SIZE(lockstat_wrapper)
762	SET_SIZE(mutex_enter)
763
764	ENTRY(lockstat_wrapper_arg)	/* expects edx=thread, ecx=lock, */
765					/* eax=lockstat event, pushed arg */
766	incb	T_LOCKSTAT(%edx)		/* curthread->t_lockstat++ */
767	pushl	%edx				/* save thread pointer	 */
768	movl	$lockstat_probemap, %edx
769	movl	(%edx, %eax, DTRACE_IDSIZE), %eax
770	testl	%eax, %eax			/* check for non-zero probe */
771	jz	1f
772	pushl	%ebp				/* save %ebp */
773	pushl	8(%esp)				/* push arg1 */
774	movl	%ebp, 12(%esp)			/* fake up the stack frame */
775	movl	%esp, %ebp			/* fake up base pointer */
776	addl	$12, %ebp			/* adjust faked base pointer */
777	pushl	%ecx				/* push lock */
778	pushl	%eax				/* push probe ID */
779	call	*lockstat_probe
780	addl	$12, %esp			/* adjust for arguments */
781	popl	%ebp				/* pop frame */
7821:
783	popl	%edx				/* restore thread pointer */
784	decb	T_LOCKSTAT(%edx)		/* curthread->t_lockstat-- */
785	movl	$1, %eax			/* return success if tryenter */
786	addl	$4, %esp			/* pop argument */
787	ret
788	SET_SIZE(lockstat_wrapper_arg)
789
790
791	ENTRY(mutex_tryenter)
792	movl	%gs:CPU_THREAD, %edx		/* edx = thread ptr */
793	movl	4(%esp), %ecx			/* ecx = lock ptr */
794	xorl	%eax, %eax			/* eax = 0 (unheld adaptive) */
795	lock
796	cmpxchgl %edx, (%ecx)
797	jnz	mutex_vector_tryenter
798	movl	%ecx, %eax
799#if defined(OPTERON_WORKAROUND_6323525)
800.mutex_tryenter_lockstat_patch_point:
801.mutex_tryenter_6323525_patch_point:
802	ret					/* nop space for lfence */
803	nop
804	nop
805.mutex_tryenter_lockstat_6323525_patch_point:	/* new patch point if lfence */
806	nop
807#else	/* OPTERON_WORKAROUND_6323525 */
808.mutex_tryenter_lockstat_patch_point:
809	ret
810#endif	/* OPTERON_WORKAROUND_6323525 */
811	movl	$LS_MUTEX_ENTER_ACQUIRE, %eax
812	jmp	lockstat_wrapper
813	SET_SIZE(mutex_tryenter)
814
815	ENTRY(mutex_adaptive_tryenter)
816	movl	%gs:CPU_THREAD, %edx		/* edx = thread ptr */
817	movl	4(%esp), %ecx			/* ecx = lock ptr */
818	xorl	%eax, %eax			/* eax = 0 (unheld adaptive) */
819	lock
820	cmpxchgl %edx, (%ecx)
821	jnz	0f
822	movl	%ecx, %eax
823#if defined(OPTERON_WORKAROUND_6323525)
824.mutex_atryenter_6323525_patch_point:
825	ret					/* nop space for lfence */
826	nop
827	nop
828	nop
829#else	/* OPTERON_WORKAROUND_6323525 */
830	ret
831#endif	/* OPTERON_WORKAROUND_6323525 */
8320:
833	xorl	%eax, %eax
834	ret
835	SET_SIZE(mutex_adaptive_tryenter)
836
837	.globl	mutex_owner_running_critical_start
838
839	ENTRY(mutex_owner_running)
840mutex_owner_running_critical_start:
841	movl	4(%esp), %eax		/* get owner field */
842	movl	(%eax), %eax
843	andl	$MUTEX_THREAD, %eax	/* remove waiters bit */
844	cmpl	$0, %eax		/* if free, skip */
845	je	1f			/* go return 0 */
846	movl	T_CPU(%eax), %ecx	/* get owner->t_cpu */
847	movl	CPU_THREAD(%ecx), %edx	/* get t_cpu->cpu_thread */
848.mutex_owner_running_critical_end:
849	cmpl	%eax, %edx	/* owner == running thread? */
850	je	2f		/* yes, go return cpu */
8511:
852	xorl	%eax, %eax	/* return 0 */
853	ret
8542:
855	movl	%ecx, %eax	/* return cpu */
856	ret
857
858	SET_SIZE(mutex_owner_running)
859
860	.globl	mutex_owner_running_critical_size
861	.type	mutex_owner_running_critical_size, @object
862	.align	CPTRSIZE
863mutex_owner_running_critical_size:
864	.long	.mutex_owner_running_critical_end - mutex_owner_running_critical_start
865	SET_SIZE(mutex_owner_running_critical_size)
866
867	.globl	mutex_exit_critical_start
868
869	ENTRY(mutex_exit)
870mutex_exit_critical_start:		/* If interrupted, restart here */
871	movl	%gs:CPU_THREAD, %edx
872	movl	4(%esp), %ecx
873	cmpl	%edx, (%ecx)
874	jne	mutex_vector_exit		/* wrong type or wrong owner */
875	movl	$0, (%ecx)			/* clear owner AND lock */
876.mutex_exit_critical_end:
877.mutex_exit_lockstat_patch_point:
878	ret
879	movl	$LS_MUTEX_EXIT_RELEASE, %eax
880	jmp	lockstat_wrapper
881	SET_SIZE(mutex_exit)
882
883	.globl	mutex_exit_critical_size
884	.type	mutex_exit_critical_size, @object
885	.align	CPTRSIZE
886mutex_exit_critical_size:
887	.long	.mutex_exit_critical_end - mutex_exit_critical_start
888	SET_SIZE(mutex_exit_critical_size)
889
890#endif	/* !__amd64 */
891
892#endif	/* __lint */
893
894/*
895 * rw_enter() and rw_exit().
896 *
897 * These routines handle the simple cases of rw_enter (write-locking an unheld
898 * lock or read-locking a lock that's neither write-locked nor write-wanted)
899 * and rw_exit (no waiters or not the last reader).  If anything complicated
900 * is going on we punt to rw_enter_sleep() and rw_exit_wakeup(), respectively.
901 */
902#if defined(lint) || defined(__lint)
903
904/* ARGSUSED */
905void
906rw_enter(krwlock_t *lp, krw_t rw)
907{}
908
909/* ARGSUSED */
910void
911rw_exit(krwlock_t *lp)
912{}
913
914#else	/* __lint */
915
916#if defined(__amd64)
917
918	ENTRY(rw_enter)
919	movq	%gs:CPU_THREAD, %rdx		/* rdx = thread ptr */
920	cmpl	$RW_WRITER, %esi
921	je	.rw_write_enter
922	incl	T_KPRI_REQ(%rdx)		/* THREAD_KPRI_REQUEST() */
923	movq	(%rdi), %rax			/* rax = old rw_wwwh value */
924	testl	$RW_WRITE_LOCKED|RW_WRITE_WANTED, %eax
925	jnz	rw_enter_sleep
926	leaq	RW_READ_LOCK(%rax), %rdx	/* rdx = new rw_wwwh value */
927	lock
928	cmpxchgq %rdx, (%rdi)			/* try to grab read lock */
929	jnz	rw_enter_sleep
930.rw_read_enter_lockstat_patch_point:
931	ret
932	movq	%gs:CPU_THREAD, %rcx		/* rcx = thread ptr */
933	movq	%rdi, %rsi			/* rsi = lock ptr */
934	movl	$LS_RW_ENTER_ACQUIRE, %edi
935	movl	$RW_READER, %edx
936	jmp	lockstat_wrapper_arg
937.rw_write_enter:
938	orq	$RW_WRITE_LOCKED, %rdx		/* rdx = write-locked value */
939	xorl	%eax, %eax			/* rax = unheld value */
940	lock
941	cmpxchgq %rdx, (%rdi)			/* try to grab write lock */
942	jnz	rw_enter_sleep
943
944#if defined(OPTERON_WORKAROUND_6323525)
945.rw_write_enter_lockstat_patch_point:
946.rw_write_enter_6323525_patch_point:
947	ret
948	nop
949	nop
950.rw_write_enter_lockstat_6323525_patch_point:
951	nop
952#else	/* OPTERON_WORKAROUND_6323525 */
953.rw_write_enter_lockstat_patch_point:
954	ret
955#endif	/* OPTERON_WORKAROUND_6323525 */
956
957	movq	%gs:CPU_THREAD, %rcx		/* rcx = thread ptr */
958	movq	%rdi, %rsi			/* rsi = lock ptr */
959	movl	$LS_RW_ENTER_ACQUIRE, %edi
960	movl	$RW_WRITER, %edx
961	jmp	lockstat_wrapper_arg
962	SET_SIZE(rw_enter)
963
964	ENTRY(rw_exit)
965	movq	(%rdi), %rax			/* rax = old rw_wwwh value */
966	cmpl	$RW_READ_LOCK, %eax		/* single-reader, no waiters? */
967	jne	.rw_not_single_reader
968	xorl	%edx, %edx			/* rdx = new value (unheld) */
969.rw_read_exit:
970	lock
971	cmpxchgq %rdx, (%rdi)			/* try to drop read lock */
972	jnz	rw_exit_wakeup
973	movq	%gs:CPU_THREAD, %rcx		/* rcx = thread ptr */
974	decl	T_KPRI_REQ(%rcx)		/* THREAD_KPRI_RELEASE() */
975.rw_read_exit_lockstat_patch_point:
976	ret
977	movq	%rdi, %rsi			/* rsi = lock ptr */
978	movl	$LS_RW_EXIT_RELEASE, %edi
979	movl	$RW_READER, %edx
980	jmp	lockstat_wrapper_arg
981.rw_not_single_reader:
982	testl	$RW_WRITE_LOCKED, %eax	/* write-locked or write-wanted? */
983	jnz	.rw_write_exit
984	leaq	-RW_READ_LOCK(%rax), %rdx	/* rdx = new value */
985	cmpl	$RW_READ_LOCK, %edx
986	jge	.rw_read_exit		/* not last reader, safe to drop */
987	jmp	rw_exit_wakeup			/* last reader with waiters */
988.rw_write_exit:
989	movq	%gs:CPU_THREAD, %rax		/* rax = thread ptr */
990	xorl	%edx, %edx			/* rdx = new value (unheld) */
991	orq	$RW_WRITE_LOCKED, %rax		/* eax = write-locked value */
992	lock
993	cmpxchgq %rdx, (%rdi)			/* try to drop read lock */
994	jnz	rw_exit_wakeup
995.rw_write_exit_lockstat_patch_point:
996	ret
997	movq	%gs:CPU_THREAD, %rcx		/* rcx = thread ptr */
998	movq	%rdi, %rsi			/* rsi - lock ptr */
999	movl	$LS_RW_EXIT_RELEASE, %edi
1000	movl	$RW_WRITER, %edx
1001	jmp	lockstat_wrapper_arg
1002	SET_SIZE(rw_exit)
1003
1004#else
1005
1006	ENTRY(rw_enter)
1007	movl	%gs:CPU_THREAD, %edx		/* edx = thread ptr */
1008	movl	4(%esp), %ecx			/* ecx = lock ptr */
1009	cmpl	$RW_WRITER, 8(%esp)
1010	je	.rw_write_enter
1011	incl	T_KPRI_REQ(%edx)		/* THREAD_KPRI_REQUEST() */
1012	movl	(%ecx), %eax			/* eax = old rw_wwwh value */
1013	testl	$RW_WRITE_LOCKED|RW_WRITE_WANTED, %eax
1014	jnz	rw_enter_sleep
1015	leal	RW_READ_LOCK(%eax), %edx	/* edx = new rw_wwwh value */
1016	lock
1017	cmpxchgl %edx, (%ecx)			/* try to grab read lock */
1018	jnz	rw_enter_sleep
1019.rw_read_enter_lockstat_patch_point:
1020	ret
1021	movl	%gs:CPU_THREAD, %edx		/* edx = thread ptr */
1022	movl	$LS_RW_ENTER_ACQUIRE, %eax
1023	pushl	$RW_READER
1024	jmp	lockstat_wrapper_arg
1025.rw_write_enter:
1026	orl	$RW_WRITE_LOCKED, %edx		/* edx = write-locked value */
1027	xorl	%eax, %eax			/* eax = unheld value */
1028	lock
1029	cmpxchgl %edx, (%ecx)			/* try to grab write lock */
1030	jnz	rw_enter_sleep
1031
1032#if defined(OPTERON_WORKAROUND_6323525)
1033.rw_write_enter_lockstat_patch_point:
1034.rw_write_enter_6323525_patch_point:
1035	ret
1036	nop
1037	nop
1038.rw_write_enter_lockstat_6323525_patch_point:
1039	nop
1040#else	/* OPTERON_WORKAROUND_6323525 */
1041.rw_write_enter_lockstat_patch_point:
1042	ret
1043#endif	/* OPTERON_WORKAROUND_6323525 */
1044
1045	movl	%gs:CPU_THREAD, %edx		/* edx = thread ptr */
1046	movl	$LS_RW_ENTER_ACQUIRE, %eax
1047	pushl	$RW_WRITER
1048	jmp	lockstat_wrapper_arg
1049	SET_SIZE(rw_enter)
1050
1051	ENTRY(rw_exit)
1052	movl	4(%esp), %ecx			/* ecx = lock ptr */
1053	movl	(%ecx), %eax			/* eax = old rw_wwwh value */
1054	cmpl	$RW_READ_LOCK, %eax		/* single-reader, no waiters? */
1055	jne	.rw_not_single_reader
1056	xorl	%edx, %edx			/* edx = new value (unheld) */
1057.rw_read_exit:
1058	lock
1059	cmpxchgl %edx, (%ecx)			/* try to drop read lock */
1060	jnz	rw_exit_wakeup
1061	movl	%gs:CPU_THREAD, %edx		/* edx = thread ptr */
1062	decl	T_KPRI_REQ(%edx)		/* THREAD_KPRI_RELEASE() */
1063.rw_read_exit_lockstat_patch_point:
1064	ret
1065	movl	$LS_RW_EXIT_RELEASE, %eax
1066	pushl	$RW_READER
1067	jmp	lockstat_wrapper_arg
1068.rw_not_single_reader:
1069	testl	$RW_WRITE_LOCKED, %eax	/* write-locked or write-wanted? */
1070	jnz	.rw_write_exit
1071	leal	-RW_READ_LOCK(%eax), %edx	/* edx = new value */
1072	cmpl	$RW_READ_LOCK, %edx
1073	jge	.rw_read_exit		/* not last reader, safe to drop */
1074	jmp	rw_exit_wakeup			/* last reader with waiters */
1075.rw_write_exit:
1076	movl	%gs:CPU_THREAD, %eax		/* eax = thread ptr */
1077	xorl	%edx, %edx			/* edx = new value (unheld) */
1078	orl	$RW_WRITE_LOCKED, %eax		/* eax = write-locked value */
1079	lock
1080	cmpxchgl %edx, (%ecx)			/* try to drop read lock */
1081	jnz	rw_exit_wakeup
1082.rw_write_exit_lockstat_patch_point:
1083	ret
1084	movl	%gs:CPU_THREAD, %edx		/* edx = thread ptr */
1085	movl	$LS_RW_EXIT_RELEASE, %eax
1086	pushl	$RW_WRITER
1087	jmp	lockstat_wrapper_arg
1088	SET_SIZE(rw_exit)
1089
1090#endif	/* !__amd64 */
1091
1092#endif	/* __lint */
1093
1094#if defined(OPTERON_WORKAROUND_6323525)
1095#if defined(lint) || defined(__lint)
1096
1097int	workaround_6323525_patched;
1098
1099void
1100patch_workaround_6323525(void)
1101{}
1102
1103#else	/* lint */
1104
1105/*
1106 * If it is necessary to patch the lock enter routines with the lfence
1107 * workaround, workaround_6323525_patched is set to a non-zero value so that
1108 * the lockstat_hat_patch routine can patch to the new location of the 'ret'
1109 * instruction.
1110 */
1111	DGDEF3(workaround_6323525_patched, 4, 4)
1112	.long	0
1113
1114#if defined(__amd64)
1115
1116#define HOT_MUTEX_PATCH(srcaddr, dstaddr, size)	\
1117	movq	$size, %rbx;			\
1118	movq	$dstaddr, %r13;			\
1119	addq	%rbx, %r13;			\
1120	movq	$srcaddr, %r12;			\
1121	addq	%rbx, %r12;			\
11220:						\
1123	decq	%r13;				\
1124	decq	%r12;				\
1125	movzbl	(%r12), %esi;			\
1126	movq	$1, %rdx;			\
1127	movq	%r13, %rdi;			\
1128	call	hot_patch_kernel_text;		\
1129	decq	%rbx;				\
1130	testq	%rbx, %rbx;			\
1131	jg	0b;
1132
1133/*
1134 * patch_workaround_6323525: provide workaround for 6323525
1135 *
1136 * The workaround is to place a fencing instruction (lfence) between the
1137 * mutex operation and the subsequent read-modify-write instruction.
1138 *
1139 * This routine hot patches the lfence instruction on top of the space
1140 * reserved by nops in the lock enter routines.
1141 */
1142	ENTRY_NP(patch_workaround_6323525)
1143	pushq	%rbp
1144	movq	%rsp, %rbp
1145	pushq	%r12
1146	pushq	%r13
1147	pushq	%rbx
1148
1149	/*
1150	 * lockstat_hot_patch() to use the alternate lockstat workaround
1151	 * 6323525 patch points (points past the lfence instruction to the
1152	 * new ret) when workaround_6323525_patched is set.
1153	 */
1154	movl	$1, workaround_6323525_patched
1155
1156	/*
1157	 * patch ret/nop/nop/nop to lfence/ret at the end of the lock enter
1158	 * routines. The 4 bytes are patched in reverse order so that the
1159	 * the existing ret is overwritten last. This provides lock enter
1160	 * sanity during the intermediate patching stages.
1161	 */
1162	HOT_MUTEX_PATCH(_lfence_insn, .mutex_enter_6323525_patch_point, 4)
1163	HOT_MUTEX_PATCH(_lfence_insn, .mutex_tryenter_6323525_patch_point, 4)
1164	HOT_MUTEX_PATCH(_lfence_insn, .mutex_atryenter_6323525_patch_point, 4)
1165	HOT_MUTEX_PATCH(_lfence_insn, .rw_write_enter_6323525_patch_point, 4)
1166
1167	popq	%rbx
1168	popq	%r13
1169	popq	%r12
1170	movq	%rbp, %rsp
1171	popq	%rbp
1172	ret
1173_lfence_insn:
1174	lfence
1175	ret
1176	SET_SIZE(patch_workaround_6323525)
1177
1178
1179#else	/* __amd64 */
1180
1181#define HOT_MUTEX_PATCH(srcaddr, dstaddr, size)	\
1182	movl	$size, %ebx;			\
1183	movl	$srcaddr, %esi;			\
1184	addl	%ebx, %esi;			\
1185	movl	$dstaddr, %edi;			\
1186	addl	%ebx, %edi;			\
11870:      					\
1188	decl	%esi;				\
1189	decl	%edi;				\
1190	pushl	$1;				\
1191	movzbl	(%esi), %eax;			\
1192	pushl	%eax;				\
1193	pushl	%edi;				\
1194	call	hot_patch_kernel_text;		\
1195	addl	$12, %esp;			\
1196	decl	%ebx;				\
1197	testl	%ebx, %ebx;			\
1198	jg	0b;
1199
1200
1201	/* see comments above */
1202	ENTRY_NP(patch_workaround_6323525)
1203	pushl	%ebp
1204	movl	%esp, %ebp
1205	pushl	%ebx
1206	pushl	%esi
1207	pushl	%edi
1208
1209	movl	$1, workaround_6323525_patched
1210
1211	HOT_MUTEX_PATCH(_lfence_insn, .mutex_enter_6323525_patch_point, 4)
1212	HOT_MUTEX_PATCH(_lfence_insn, .mutex_tryenter_6323525_patch_point, 4)
1213	HOT_MUTEX_PATCH(_lfence_insn, .mutex_atryenter_6323525_patch_point, 4)
1214	HOT_MUTEX_PATCH(_lfence_insn, .rw_write_enter_6323525_patch_point, 4)
1215
1216	popl	%edi
1217	popl	%esi
1218	popl	%ebx
1219	movl	%ebp, %esp
1220	popl	%ebp
1221	ret
1222_lfence_insn:
1223	.byte	0xf, 0xae, 0xe8		/ [lfence instruction]
1224	ret
1225	SET_SIZE(patch_workaround_6323525)
1226
1227#endif	/* !__amd64 */
1228#endif	/* !lint */
1229#endif	/* OPTERON_WORKAROUND_6323525 */
1230
1231
1232#if defined(lint) || defined(__lint)
1233
1234void
1235lockstat_hot_patch(void)
1236{}
1237
1238#else
1239
1240#if defined(__amd64)
1241
1242#define	HOT_PATCH(addr, event, active_instr, normal_instr, len)	\
1243	movq	$normal_instr, %rsi;		\
1244	movq	$active_instr, %rdi;		\
1245	leaq	lockstat_probemap(%rip), %rax;	\
1246	movl 	_MUL(event, DTRACE_IDSIZE)(%rax), %eax;	\
1247	testl	%eax, %eax;			\
1248	jz	9f;				\
1249	movq	%rdi, %rsi;			\
12509:						\
1251	movq	$len, %rdx;			\
1252	movq	$addr, %rdi;			\
1253	call	hot_patch_kernel_text
1254
1255#else
1256
1257#define	HOT_PATCH(addr, event, active_instr, normal_instr, len)	\
1258	movl	$normal_instr, %ecx;		\
1259	movl	$active_instr, %edx;		\
1260	movl	$lockstat_probemap, %eax;	\
1261	movl	_MUL(event, DTRACE_IDSIZE)(%eax), %eax;	\
1262	testl	%eax, %eax;			\
1263	jz	. + 4;				\
1264	movl	%edx, %ecx;			\
1265	pushl	$len;				\
1266	pushl	%ecx;				\
1267	pushl	$addr;				\
1268	call	hot_patch_kernel_text;		\
1269	addl	$12, %esp;
1270
1271#endif	/* !__amd64 */
1272
1273	ENTRY(lockstat_hot_patch)
1274#if defined(__amd64)
1275	pushq	%rbp			/* align stack properly */
1276	movq	%rsp, %rbp
1277#endif	/* __amd64 */
1278
1279#if defined(OPTERON_WORKAROUND_6323525)
1280	cmpl	$0, workaround_6323525_patched
1281	je	1f
1282	HOT_PATCH(.mutex_enter_lockstat_6323525_patch_point,
1283		LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
1284	HOT_PATCH(.mutex_tryenter_lockstat_6323525_patch_point,
1285		LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
1286	HOT_PATCH(.rw_write_enter_lockstat_6323525_patch_point,
1287		LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
1288	jmp	2f
12891:
1290	HOT_PATCH(.mutex_enter_lockstat_patch_point,
1291		LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
1292	HOT_PATCH(.mutex_tryenter_lockstat_patch_point,
1293		LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
1294	HOT_PATCH(.rw_write_enter_lockstat_patch_point,
1295		LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
12962:
1297#else	/* OPTERON_WORKAROUND_6323525 */
1298	HOT_PATCH(.mutex_enter_lockstat_patch_point,
1299		LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
1300	HOT_PATCH(.mutex_tryenter_lockstat_patch_point,
1301		LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
1302	HOT_PATCH(.rw_write_enter_lockstat_patch_point,
1303		LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
1304#endif	/* !OPTERON_WORKAROUND_6323525 */
1305	HOT_PATCH(.mutex_exit_lockstat_patch_point,
1306		LS_MUTEX_EXIT_RELEASE, NOP_INSTR, RET_INSTR, 1)
1307	HOT_PATCH(.rw_read_enter_lockstat_patch_point,
1308		LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
1309	HOT_PATCH(.rw_write_exit_lockstat_patch_point,
1310		LS_RW_EXIT_RELEASE, NOP_INSTR, RET_INSTR, 1)
1311	HOT_PATCH(.rw_read_exit_lockstat_patch_point,
1312		LS_RW_EXIT_RELEASE, NOP_INSTR, RET_INSTR, 1)
1313	HOT_PATCH(.lock_set_lockstat_patch_point,
1314		LS_LOCK_SET_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
1315	HOT_PATCH(.lock_try_lockstat_patch_point,
1316		LS_LOCK_TRY_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
1317	HOT_PATCH(.lock_clear_lockstat_patch_point,
1318		LS_LOCK_CLEAR_RELEASE, NOP_INSTR, RET_INSTR, 1)
1319	HOT_PATCH(.lock_set_spl_lockstat_patch_point,
1320		LS_LOCK_SET_SPL_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
1321
1322	HOT_PATCH(LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_POINT,
1323		LS_LOCK_CLEAR_SPLX_RELEASE,
1324		LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_VAL, 0, 1);
1325#if defined(__amd64)
1326	leave			/* unwind stack */
1327#endif	/* __amd64 */
1328	ret
1329	SET_SIZE(lockstat_hot_patch)
1330
1331#endif	/* __lint */
1332
1333#if defined(lint) || defined(__lint)
1334
1335/* XX64 membar_*() should be inlines */
1336
1337void
1338membar_sync(void)
1339{}
1340
1341void
1342membar_enter(void)
1343{}
1344
1345void
1346membar_exit(void)
1347{}
1348
1349void
1350membar_producer(void)
1351{}
1352
1353void
1354membar_consumer(void)
1355{}
1356
1357#else	/* __lint */
1358
1359#if defined(__amd64)
1360
1361	ENTRY(membar_enter)
1362	ALTENTRY(membar_exit)
1363	ALTENTRY(membar_sync)
1364	mfence			/* lighter weight than lock; xorq $0,(%rsp) */
1365	ret
1366	SET_SIZE(membar_sync)
1367	SET_SIZE(membar_exit)
1368	SET_SIZE(membar_enter)
1369
1370	ENTRY(membar_producer)
1371	sfence
1372	ret
1373	SET_SIZE(membar_producer)
1374
1375	ENTRY(membar_consumer)
1376	lfence
1377	ret
1378	SET_SIZE(membar_consumer)
1379
1380#else
1381
1382	ENTRY(membar_enter)
1383	ALTENTRY(membar_exit)
1384	ALTENTRY(membar_sync)
1385	lock
1386	xorl	$0, (%esp)
1387	ret
1388	SET_SIZE(membar_sync)
1389	SET_SIZE(membar_exit)
1390	SET_SIZE(membar_enter)
1391
1392/*
1393 * On machines that support sfence and lfence, these
1394 * memory barriers can be more precisely implemented
1395 * without causing the whole world to stop
1396 */
1397	ENTRY(membar_producer)
1398	.globl	_patch_sfence_ret
1399_patch_sfence_ret:			/* c.f. membar #StoreStore */
1400	lock
1401	xorl	$0, (%esp)
1402	ret
1403	SET_SIZE(membar_producer)
1404
1405	ENTRY(membar_consumer)
1406	.globl	_patch_lfence_ret
1407_patch_lfence_ret:			/* c.f. membar #LoadLoad */
1408	lock
1409	xorl	$0, (%esp)
1410	ret
1411	SET_SIZE(membar_consumer)
1412
1413#endif	/* !__amd64 */
1414
1415#endif	/* __lint */
1416
1417/*
1418 * thread_onproc()
1419 * Set thread in onproc state for the specified CPU.
1420 * Also set the thread lock pointer to the CPU's onproc lock.
1421 * Since the new lock isn't held, the store ordering is important.
1422 * If not done in assembler, the compiler could reorder the stores.
1423 */
1424#if defined(lint) || defined(__lint)
1425
1426void
1427thread_onproc(kthread_id_t t, cpu_t *cp)
1428{
1429	t->t_state = TS_ONPROC;
1430	t->t_lockp = &cp->cpu_thread_lock;
1431}
1432
1433#else	/* __lint */
1434
1435#if defined(__amd64)
1436
1437	ENTRY(thread_onproc)
1438	addq	$CPU_THREAD_LOCK, %rsi	/* pointer to disp_lock while running */
1439	movl	$ONPROC_THREAD, T_STATE(%rdi)	/* set state to TS_ONPROC */
1440	movq	%rsi, T_LOCKP(%rdi)	/* store new lock pointer */
1441	ret
1442	SET_SIZE(thread_onproc)
1443
1444#else
1445
1446	ENTRY(thread_onproc)
1447	movl	4(%esp), %eax
1448	movl	8(%esp), %ecx
1449	addl	$CPU_THREAD_LOCK, %ecx	/* pointer to disp_lock while running */
1450	movl	$ONPROC_THREAD, T_STATE(%eax)	/* set state to TS_ONPROC */
1451	movl	%ecx, T_LOCKP(%eax)	/* store new lock pointer */
1452	ret
1453	SET_SIZE(thread_onproc)
1454
1455#endif	/* !__amd64 */
1456
1457#endif	/* __lint */
1458
1459/*
1460 * mutex_delay_default(void)
1461 * Spins for approx a few hundred processor cycles and returns to caller.
1462 */
1463
1464#if defined(lint) || defined(__lint)
1465
1466void
1467mutex_delay_default(void)
1468{}
1469
1470#else	/* __lint */
1471
1472#if defined(__amd64)
1473
1474	ENTRY(mutex_delay_default)
1475	movq	$92,%r11
14760:	decq	%r11
1477	jg	0b
1478	ret
1479	SET_SIZE(mutex_delay_default)
1480
1481#else
1482
1483	ENTRY(mutex_delay_default)
1484	push	%ebp
1485	movl	%esp,%ebp
1486	andl	$-16,%esp
1487	push	%ebx
1488	movl	$93,%ebx
14890:	decl	%ebx
1490	jg	0b
1491	pop	%ebx
1492	leave
1493	ret
1494	SET_SIZE(mutex_delay_default)
1495
1496#endif	/* !__amd64 */
1497#endif	/* __lint */
1498