xref: /titanic_50/usr/src/uts/intel/ia32/ml/lock_prim.s (revision c77a61a72b5ecdc507d6cf104142edd371a16c84)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28#if defined(lint) || defined(__lint)
29#include <sys/types.h>
30#include <sys/thread.h>
31#include <sys/cpuvar.h>
32#include <vm/page.h>
33#include <sys/mutex_impl.h>
34#else	/* __lint */
35#include "assym.h"
36#endif	/* __lint */
37
38#include <sys/asm_linkage.h>
39#include <sys/asm_misc.h>
40#include <sys/regset.h>
41#include <sys/rwlock_impl.h>
42#include <sys/lockstat.h>
43
44/*
45 * lock_try(lp), ulock_try(lp)
46 *	- returns non-zero on success.
47 *	- doesn't block interrupts so don't use this to spin on a lock.
48 *
49 * ulock_try() is for a lock in the user address space.
50 */
51
52#if defined(lint) || defined(__lint)
53
54/* ARGSUSED */
55int
56lock_try(lock_t *lp)
57{ return (0); }
58
59/* ARGSUSED */
60int
61lock_spin_try(lock_t *lp)
62{ return (0); }
63
64/* ARGSUSED */
65int
66ulock_try(lock_t *lp)
67{ return (0); }
68
69#else	/* __lint */
70	.globl	kernelbase
71
72#if defined(__amd64)
73
74	ENTRY(lock_try)
75	movb	$-1, %dl
76	movzbq	%dl, %rax
77	xchgb	%dl, (%rdi)
78	xorb	%dl, %al
79.lock_try_lockstat_patch_point:
80	ret
81	testb	%al, %al
82	jnz	0f
83	ret
840:
85	movq	%gs:CPU_THREAD, %rdx	/* rdx = thread addr */
86	movq	%rdi, %rsi		/* rsi = lock addr */
87	movl	$LS_LOCK_TRY_ACQUIRE, %edi /* edi = event */
88	jmp	lockstat_wrapper
89	SET_SIZE(lock_try)
90
91	ENTRY(lock_spin_try)
92	movb	$-1, %dl
93	movzbq	%dl, %rax
94	xchgb	%dl, (%rdi)
95	xorb	%dl, %al
96	ret
97	SET_SIZE(lock_spin_try)
98
99	ENTRY(ulock_try)
100#ifdef DEBUG
101	movq	kernelbase(%rip), %rax
102	cmpq	%rax, %rdi		/* test uaddr < kernelbase */
103	jb	ulock_pass		/*	uaddr < kernelbase, proceed */
104
105	movq	%rdi, %r12		/* preserve lock ptr for debugging */
106	leaq	.ulock_panic_msg(%rip), %rdi
107	pushq	%rbp			/* align stack properly */
108	movq	%rsp, %rbp
109	xorl	%eax, %eax		/* clear for varargs */
110	call	panic
111
112#endif /* DEBUG */
113
114ulock_pass:
115	movl	$1, %eax
116	xchgb	%al, (%rdi)
117	xorb	$1, %al
118	ret
119	SET_SIZE(ulock_try)
120
121#else
122
123	ENTRY(lock_try)
124	movl	$1,%edx
125	movl	4(%esp),%ecx		/* ecx = lock addr */
126	xorl	%eax,%eax
127	xchgb	%dl, (%ecx)		/* using dl will avoid partial */
128	testb	%dl,%dl			/* stalls on P6 ? */
129	setz	%al
130.lock_try_lockstat_patch_point:
131	ret
132	movl	%gs:CPU_THREAD, %edx	/* edx = thread addr */
133	testl	%eax, %eax
134	jz	0f
135	movl	$LS_LOCK_TRY_ACQUIRE, %eax
136	jmp	lockstat_wrapper
1370:
138	ret
139	SET_SIZE(lock_try)
140
141	ENTRY(lock_spin_try)
142	movl	$-1,%edx
143	movl	4(%esp),%ecx		/* ecx = lock addr */
144	xorl	%eax,%eax
145	xchgb	%dl, (%ecx)		/* using dl will avoid partial */
146	testb	%dl,%dl			/* stalls on P6 ? */
147	setz	%al
148	ret
149	SET_SIZE(lock_spin_try)
150
151	ENTRY(ulock_try)
152#ifdef DEBUG
153	movl	kernelbase, %eax
154	cmpl	%eax, 4(%esp)		/* test uaddr < kernelbase */
155	jb	ulock_pass		/* uaddr < kernelbase, proceed */
156
157	pushl	$.ulock_panic_msg
158	call	panic
159
160#endif /* DEBUG */
161
162ulock_pass:
163	movl	$1,%eax
164	movl	4(%esp),%ecx
165	xchgb	%al, (%ecx)
166	xorb	$1, %al
167	ret
168	SET_SIZE(ulock_try)
169
170#endif	/* !__amd64 */
171
172#ifdef DEBUG
173	.data
174.ulock_panic_msg:
175	.string "ulock_try: Argument is above kernelbase"
176	.text
177#endif	/* DEBUG */
178
179#endif	/* __lint */
180
181/*
182 * lock_clear(lp)
183 *	- unlock lock without changing interrupt priority level.
184 */
185
186#if defined(lint) || defined(__lint)
187
188/* ARGSUSED */
189void
190lock_clear(lock_t *lp)
191{}
192
193/* ARGSUSED */
194void
195ulock_clear(lock_t *lp)
196{}
197
198#else	/* __lint */
199
200#if defined(__amd64)
201
202	ENTRY(lock_clear)
203	movb	$0, (%rdi)
204.lock_clear_lockstat_patch_point:
205	ret
206	movq	%rdi, %rsi			/* rsi = lock addr */
207	movq	%gs:CPU_THREAD, %rdx		/* rdx = thread addr */
208	movl	$LS_LOCK_CLEAR_RELEASE, %edi	/* edi = event */
209	jmp	lockstat_wrapper
210	SET_SIZE(lock_clear)
211
212	ENTRY(ulock_clear)
213#ifdef DEBUG
214	movq	kernelbase(%rip), %rcx
215	cmpq	%rcx, %rdi		/* test uaddr < kernelbase */
216	jb	ulock_clr		/*	 uaddr < kernelbase, proceed */
217
218	leaq	.ulock_clear_msg(%rip), %rdi
219	pushq	%rbp			/* align stack properly */
220	movq	%rsp, %rbp
221	xorl	%eax, %eax		/* clear for varargs */
222	call	panic
223#endif
224
225ulock_clr:
226	movb	$0, (%rdi)
227	ret
228	SET_SIZE(ulock_clear)
229
230#else
231
232	ENTRY(lock_clear)
233	movl	4(%esp), %eax
234	movb	$0, (%eax)
235.lock_clear_lockstat_patch_point:
236	ret
237	movl	%gs:CPU_THREAD, %edx		/* edx = thread addr */
238	movl	%eax, %ecx			/* ecx = lock pointer */
239	movl	$LS_LOCK_CLEAR_RELEASE, %eax
240	jmp	lockstat_wrapper
241	SET_SIZE(lock_clear)
242
243	ENTRY(ulock_clear)
244#ifdef DEBUG
245	movl	kernelbase, %ecx
246	cmpl	%ecx, 4(%esp)		/* test uaddr < kernelbase */
247	jb	ulock_clr		/* uaddr < kernelbase, proceed */
248
249	pushl	$.ulock_clear_msg
250	call	panic
251#endif
252
253ulock_clr:
254	movl	4(%esp),%eax
255	xorl	%ecx,%ecx
256	movb	%cl, (%eax)
257	ret
258	SET_SIZE(ulock_clear)
259
260#endif	/* !__amd64 */
261
262#ifdef DEBUG
263	.data
264.ulock_clear_msg:
265	.string "ulock_clear: Argument is above kernelbase"
266	.text
267#endif	/* DEBUG */
268
269
270#endif	/* __lint */
271
272/*
273 * lock_set_spl(lock_t *lp, int new_pil, u_short *old_pil)
274 * Drops lp, sets pil to new_pil, stores old pil in *old_pil.
275 */
276
277#if defined(lint) || defined(__lint)
278
279/* ARGSUSED */
280void
281lock_set_spl(lock_t *lp, int new_pil, u_short *old_pil)
282{}
283
284#else	/* __lint */
285
286#if defined(__amd64)
287
288	ENTRY(lock_set_spl)
289	pushq	%rbp
290	movq	%rsp, %rbp
291	subq	$32, %rsp
292	movl	%esi, 8(%rsp)		/* save priority level */
293	movq	%rdx, 16(%rsp)		/* save old pil ptr */
294	movq	%rdi, 24(%rsp)		/* save lock pointer */
295	movl	%esi, %edi		/* pass priority level */
296	call	splr			/* raise priority level */
297	movq	24(%rsp), %rdi		/* rdi = lock addr */
298	movb	$-1, %dl
299	xchgb	%dl, (%rdi)		/* try to set lock */
300	testb	%dl, %dl		/* did we get the lock? ... */
301	jnz	.lss_miss		/* ... no, go to C for the hard case */
302	movq	16(%rsp), %rdx		/* rdx = old pil addr */
303	movw	%ax, (%rdx)		/* store old pil */
304	leave
305.lock_set_spl_lockstat_patch_point:
306	ret
307	movq	%rdi, %rsi		/* rsi = lock addr */
308	movq	%gs:CPU_THREAD, %rdx	/* rdx = thread addr */
309	movl	$LS_LOCK_SET_SPL_ACQUIRE, %edi
310	jmp	lockstat_wrapper
311.lss_miss:
312	movl	8(%rsp), %esi		/* new_pil */
313	movq	16(%rsp), %rdx		/* old_pil_addr */
314	movl	%eax, %ecx		/* original pil */
315	leave				/* unwind stack */
316	jmp	lock_set_spl_spin
317	SET_SIZE(lock_set_spl)
318
319#else
320
321	ENTRY(lock_set_spl)
322	movl	8(%esp), %eax		/* get priority level */
323	pushl	%eax
324	call	splr			/* raise priority level */
325	movl 	8(%esp), %ecx		/* ecx = lock addr */
326	movl	$-1, %edx
327	addl	$4, %esp
328	xchgb	%dl, (%ecx)		/* try to set lock */
329	testb	%dl, %dl		/* did we get the lock? ... */
330	movl	12(%esp), %edx		/* edx = olp pil addr (ZF unaffected) */
331	jnz	.lss_miss		/* ... no, go to C for the hard case */
332	movw	%ax, (%edx)		/* store old pil */
333.lock_set_spl_lockstat_patch_point:
334	ret
335	movl	%gs:CPU_THREAD, %edx	/* edx = thread addr*/
336	movl	$LS_LOCK_SET_SPL_ACQUIRE, %eax
337	jmp	lockstat_wrapper
338.lss_miss:
339	pushl	%eax			/* original pil */
340	pushl	%edx			/* old_pil addr */
341	pushl	16(%esp)		/* new_pil */
342	pushl	%ecx			/* lock addr */
343	call	lock_set_spl_spin
344	addl	$16, %esp
345	ret
346	SET_SIZE(lock_set_spl)
347
348#endif	/* !__amd64 */
349
350#endif	/* __lint */
351
352/*
353 * void
354 * lock_init(lp)
355 */
356
357#if defined(__lint)
358
359/* ARGSUSED */
360void
361lock_init(lock_t *lp)
362{}
363
364#else	/* __lint */
365
366#if defined(__amd64)
367
368	ENTRY(lock_init)
369	movb	$0, (%rdi)
370	ret
371	SET_SIZE(lock_init)
372
373#else
374
375	ENTRY(lock_init)
376	movl	4(%esp), %eax
377	movb	$0, (%eax)
378	ret
379	SET_SIZE(lock_init)
380
381#endif	/* !__amd64 */
382
383#endif	/* __lint */
384
385/*
386 * void
387 * lock_set(lp)
388 */
389
390#if defined(lint) || defined(__lint)
391
392/* ARGSUSED */
393void
394lock_set(lock_t *lp)
395{}
396
397#else	/* __lint */
398
399#if defined(__amd64)
400
401	ENTRY(lock_set)
402	movb	$-1, %dl
403	xchgb	%dl, (%rdi)		/* try to set lock */
404	testb	%dl, %dl		/* did we get it? */
405	jnz	lock_set_spin		/* no, go to C for the hard case */
406.lock_set_lockstat_patch_point:
407	ret
408	movq	%rdi, %rsi		/* rsi = lock addr */
409	movq	%gs:CPU_THREAD, %rdx	/* rdx = thread addr */
410	movl	$LS_LOCK_SET_ACQUIRE, %edi
411	jmp	lockstat_wrapper
412	SET_SIZE(lock_set)
413
414#else
415
416	ENTRY(lock_set)
417	movl	4(%esp), %ecx		/* ecx = lock addr */
418	movl	$-1, %edx
419	xchgb	%dl, (%ecx)		/* try to set lock */
420	testb	%dl, %dl		/* did we get it? */
421	jnz	lock_set_spin		/* no, go to C for the hard case */
422.lock_set_lockstat_patch_point:
423	ret
424	movl	%gs:CPU_THREAD, %edx	/* edx = thread addr */
425	movl	$LS_LOCK_SET_ACQUIRE, %eax
426	jmp	lockstat_wrapper
427	SET_SIZE(lock_set)
428
429#endif	/* !__amd64 */
430
431#endif	/* __lint */
432
433/*
434 * lock_clear_splx(lp, s)
435 */
436
437#if defined(lint) || defined(__lint)
438
439/* ARGSUSED */
440void
441lock_clear_splx(lock_t *lp, int s)
442{}
443
444#else	/* __lint */
445
446#if defined(__amd64)
447
448	ENTRY(lock_clear_splx)
449	movb	$0, (%rdi)		/* clear lock */
450.lock_clear_splx_lockstat_patch_point:
451	jmp	0f
4520:
453	movl	%esi, %edi		/* arg for splx */
454	jmp	splx			/* let splx do its thing */
455.lock_clear_splx_lockstat:
456	pushq	%rbp			/* align stack properly */
457	movq	%rsp, %rbp
458	subq	$16, %rsp		/* space to save args across splx */
459	movq	%rdi, 8(%rsp)		/* save lock ptr across splx call */
460	movl	%esi, %edi		/* arg for splx */
461	call	splx			/* lower the priority */
462	movq	8(%rsp), %rsi		/* rsi = lock ptr */
463	leave				/* unwind stack */
464	movq	%gs:CPU_THREAD, %rdx	/* rdx = thread addr */
465	movl	$LS_LOCK_CLEAR_SPLX_RELEASE, %edi
466	jmp	lockstat_wrapper
467	SET_SIZE(lock_clear_splx)
468
469#if defined(__GNUC_AS__)
470#define	LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_VAL	\
471	(.lock_clear_splx_lockstat - .lock_clear_splx_lockstat_patch_point - 2)
472
473#define LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_POINT	\
474	(.lock_clear_splx_lockstat_patch_point + 1)
475#else
476#define	LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_VAL	\
477	[.lock_clear_splx_lockstat - .lock_clear_splx_lockstat_patch_point - 2]
478
479#define LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_POINT	\
480	[.lock_clear_splx_lockstat_patch_point + 1]
481#endif
482
483#else
484
485	ENTRY(lock_clear_splx)
486	LOADCPU(%ecx)			/* ecx = cpu pointer */
487	movl	4(%esp), %eax		/* eax = lock addr */
488	movl	8(%esp), %edx		/* edx = desired pil */
489	movb	$0, (%eax)		/* clear lock */
490	cli				/* disable interrupts */
491	call	spl			/* magic calling sequence */
492.lock_clear_splx_lockstat_patch_point:
493	ret
494	movl	4(%esp), %ecx		/* ecx = lock pointer */
495	movl	%gs:CPU_THREAD, %edx	/* edx = thread addr */
496	movl	$LS_LOCK_CLEAR_SPLX_RELEASE, %eax
497	jmp	lockstat_wrapper
498	SET_SIZE(lock_clear_splx)
499
500#endif	/* !__amd64 */
501
502#endif	/* __lint */
503
504/*
505 * mutex_enter() and mutex_exit().
506 *
507 * These routines handle the simple cases of mutex_enter() (adaptive
508 * lock, not held) and mutex_exit() (adaptive lock, held, no waiters).
509 * If anything complicated is going on we punt to mutex_vector_enter().
510 *
511 * mutex_tryenter() is similar to mutex_enter() but returns zero if
512 * the lock cannot be acquired, nonzero on success.
513 *
514 * If mutex_exit() gets preempted in the window between checking waiters
515 * and clearing the lock, we can miss wakeups.  Disabling preemption
516 * in the mutex code is prohibitively expensive, so instead we detect
517 * mutex preemption by examining the trapped PC in the interrupt path.
518 * If we interrupt a thread in mutex_exit() that has not yet cleared
519 * the lock, cmnint() resets its PC back to the beginning of
520 * mutex_exit() so it will check again for waiters when it resumes.
521 *
522 * The lockstat code below is activated when the lockstat driver
523 * calls lockstat_hot_patch() to hot-patch the kernel mutex code.
524 * Note that we don't need to test lockstat_event_mask here -- we won't
525 * patch this code in unless we're gathering ADAPTIVE_HOLD lockstats.
526 */
527#if defined(lint) || defined(__lint)
528
529/* ARGSUSED */
530void
531mutex_enter(kmutex_t *lp)
532{}
533
534/* ARGSUSED */
535int
536mutex_tryenter(kmutex_t *lp)
537{ return (0); }
538
539/* ARGSUSED */
540int
541mutex_adaptive_tryenter(mutex_impl_t *lp)
542{ return (0); }
543
544/* ARGSUSED */
545void
546mutex_exit(kmutex_t *lp)
547{}
548
549#else
550
551#if defined(__amd64)
552
553	ENTRY_NP(mutex_enter)
554	movq	%gs:CPU_THREAD, %rdx		/* rdx = thread ptr */
555	xorl	%eax, %eax			/* rax = 0 (unheld adaptive) */
556	lock
557	cmpxchgq %rdx, (%rdi)
558	jnz	mutex_vector_enter
559.mutex_enter_lockstat_patch_point:
560#if defined(OPTERON_WORKAROUND_6323525)
561.mutex_enter_6323525_patch_point:
562	ret					/* nop space for lfence */
563	nop
564	nop
565.mutex_enter_lockstat_6323525_patch_point:	/* new patch point if lfence */
566	nop
567#else	/* OPTERON_WORKAROUND_6323525 */
568	ret
569#endif	/* OPTERON_WORKAROUND_6323525 */
570	movq	%rdi, %rsi
571	movl	$LS_MUTEX_ENTER_ACQUIRE, %edi
572/*
573 * expects %rdx=thread, %rsi=lock, %edi=lockstat event
574 */
575	ALTENTRY(lockstat_wrapper)
576	incb	T_LOCKSTAT(%rdx)		/* curthread->t_lockstat++ */
577	leaq	lockstat_probemap(%rip), %rax
578	movl	(%rax, %rdi, DTRACE_IDSIZE), %eax
579	testl	%eax, %eax			/* check for non-zero probe */
580	jz	1f
581	pushq	%rbp				/* align stack properly */
582	movq	%rsp, %rbp
583	movl	%eax, %edi
584	call	*lockstat_probe
585	leave					/* unwind stack */
5861:
587	movq	%gs:CPU_THREAD, %rdx		/* reload thread ptr */
588	decb	T_LOCKSTAT(%rdx)		/* curthread->t_lockstat-- */
589	movl	$1, %eax			/* return success if tryenter */
590	ret
591	SET_SIZE(lockstat_wrapper)
592	SET_SIZE(mutex_enter)
593
594/*
595 * expects %rcx=thread, %rdx=arg, %rsi=lock, %edi=lockstat event
596 */
597	ENTRY(lockstat_wrapper_arg)
598	incb	T_LOCKSTAT(%rcx)		/* curthread->t_lockstat++ */
599	leaq	lockstat_probemap(%rip), %rax
600	movl	(%rax, %rdi, DTRACE_IDSIZE), %eax
601	testl	%eax, %eax			/* check for non-zero probe */
602	jz	1f
603	pushq	%rbp				/* align stack properly */
604	movq	%rsp, %rbp
605	movl	%eax, %edi
606	call	*lockstat_probe
607	leave					/* unwind stack */
6081:
609	movq	%gs:CPU_THREAD, %rdx		/* reload thread ptr */
610	decb	T_LOCKSTAT(%rdx)		/* curthread->t_lockstat-- */
611	movl	$1, %eax			/* return success if tryenter */
612	ret
613	SET_SIZE(lockstat_wrapper_arg)
614
615
616	ENTRY(mutex_tryenter)
617	movq	%gs:CPU_THREAD, %rdx		/* rdx = thread ptr */
618	xorl	%eax, %eax			/* rax = 0 (unheld adaptive) */
619	lock
620	cmpxchgq %rdx, (%rdi)
621	jnz	mutex_vector_tryenter
622	not	%eax				/* return success (nonzero) */
623#if defined(OPTERON_WORKAROUND_6323525)
624.mutex_tryenter_lockstat_patch_point:
625.mutex_tryenter_6323525_patch_point:
626	ret					/* nop space for lfence */
627	nop
628	nop
629.mutex_tryenter_lockstat_6323525_patch_point:	/* new patch point if lfence */
630	nop
631#else	/* OPTERON_WORKAROUND_6323525 */
632.mutex_tryenter_lockstat_patch_point:
633	ret
634#endif	/* OPTERON_WORKAROUND_6323525 */
635	movq	%rdi, %rsi
636	movl	$LS_MUTEX_ENTER_ACQUIRE, %edi
637	jmp	lockstat_wrapper
638	SET_SIZE(mutex_tryenter)
639
640	ENTRY(mutex_adaptive_tryenter)
641	movq	%gs:CPU_THREAD, %rdx		/* rdx = thread ptr */
642	xorl	%eax, %eax			/* rax = 0 (unheld adaptive) */
643	lock
644	cmpxchgq %rdx, (%rdi)
645	jnz	0f
646	not	%eax				/* return success (nonzero) */
647#if defined(OPTERON_WORKAROUND_6323525)
648.mutex_atryenter_6323525_patch_point:
649	ret					/* nop space for lfence */
650	nop
651	nop
652	nop
653#else	/* OPTERON_WORKAROUND_6323525 */
654	ret
655#endif	/* OPTERON_WORKAROUND_6323525 */
6560:
657	xorl	%eax, %eax			/* return failure */
658	ret
659	SET_SIZE(mutex_adaptive_tryenter)
660
661	.globl	mutex_exit_critical_start
662
663	ENTRY(mutex_exit)
664mutex_exit_critical_start:		/* If interrupted, restart here */
665	movq	%gs:CPU_THREAD, %rdx
666	cmpq	%rdx, (%rdi)
667	jne	mutex_vector_exit		/* wrong type or wrong owner */
668	movq	$0, (%rdi)			/* clear owner AND lock */
669.mutex_exit_critical_end:
670.mutex_exit_lockstat_patch_point:
671	ret
672	movq	%rdi, %rsi
673	movl	$LS_MUTEX_EXIT_RELEASE, %edi
674	jmp	lockstat_wrapper
675	SET_SIZE(mutex_exit)
676
677	.globl	mutex_exit_critical_size
678	.type	mutex_exit_critical_size, @object
679	.align	CPTRSIZE
680mutex_exit_critical_size:
681	.quad	.mutex_exit_critical_end - mutex_exit_critical_start
682	SET_SIZE(mutex_exit_critical_size)
683
684#else
685
686	ENTRY_NP(mutex_enter)
687	movl	%gs:CPU_THREAD, %edx		/* edx = thread ptr */
688	movl	4(%esp), %ecx			/* ecx = lock ptr */
689	xorl	%eax, %eax			/* eax = 0 (unheld adaptive) */
690	lock
691	cmpxchgl %edx, (%ecx)
692	jnz	mutex_vector_enter
693#if defined(OPTERON_WORKAROUND_6323525)
694.mutex_enter_lockstat_patch_point:
695.mutex_enter_6323525_patch_point:
696	ret					/* nop space for lfence */
697	nop
698	nop
699.mutex_enter_lockstat_6323525_patch_point:	/* new patch point if lfence */
700	nop
701#else	/* OPTERON_WORKAROUND_6323525 */
702.mutex_enter_lockstat_patch_point:
703	ret
704#endif	/* OPTERON_WORKAROUND_6323525 */
705	movl	$LS_MUTEX_ENTER_ACQUIRE, %eax
706	ALTENTRY(lockstat_wrapper)	/* expects edx=thread, ecx=lock, */
707					/*   eax=lockstat event */
708	pushl	%ebp				/* buy a frame */
709	movl	%esp, %ebp
710	incb	T_LOCKSTAT(%edx)		/* curthread->t_lockstat++ */
711	pushl	%edx				/* save thread pointer	 */
712	movl	$lockstat_probemap, %edx
713	movl	(%edx, %eax, DTRACE_IDSIZE), %eax
714	testl	%eax, %eax			/* check for non-zero probe */
715	jz	1f
716	pushl	%ecx				/* push lock */
717	pushl	%eax				/* push probe ID */
718	call	*lockstat_probe
719	addl	$8, %esp
7201:
721	popl	%edx				/* restore thread pointer */
722	decb	T_LOCKSTAT(%edx)		/* curthread->t_lockstat-- */
723	movl	$1, %eax			/* return success if tryenter */
724	popl	%ebp				/* pop off frame */
725	ret
726	SET_SIZE(lockstat_wrapper)
727	SET_SIZE(mutex_enter)
728
729	ENTRY(lockstat_wrapper_arg)	/* expects edx=thread, ecx=lock, */
730					/* eax=lockstat event, pushed arg */
731	incb	T_LOCKSTAT(%edx)		/* curthread->t_lockstat++ */
732	pushl	%edx				/* save thread pointer	 */
733	movl	$lockstat_probemap, %edx
734	movl	(%edx, %eax, DTRACE_IDSIZE), %eax
735	testl	%eax, %eax			/* check for non-zero probe */
736	jz	1f
737	pushl	%ebp				/* save %ebp */
738	pushl	8(%esp)				/* push arg1 */
739	movl	%ebp, 12(%esp)			/* fake up the stack frame */
740	movl	%esp, %ebp			/* fake up base pointer */
741	addl	$12, %ebp			/* adjust faked base pointer */
742	pushl	%ecx				/* push lock */
743	pushl	%eax				/* push probe ID */
744	call	*lockstat_probe
745	addl	$12, %esp			/* adjust for arguments */
746	popl	%ebp				/* pop frame */
7471:
748	popl	%edx				/* restore thread pointer */
749	decb	T_LOCKSTAT(%edx)		/* curthread->t_lockstat-- */
750	movl	$1, %eax			/* return success if tryenter */
751	addl	$4, %esp			/* pop argument */
752	ret
753	SET_SIZE(lockstat_wrapper_arg)
754
755
756	ENTRY(mutex_tryenter)
757	movl	%gs:CPU_THREAD, %edx		/* edx = thread ptr */
758	movl	4(%esp), %ecx			/* ecx = lock ptr */
759	xorl	%eax, %eax			/* eax = 0 (unheld adaptive) */
760	lock
761	cmpxchgl %edx, (%ecx)
762	jnz	mutex_vector_tryenter
763	movl	%ecx, %eax
764#if defined(OPTERON_WORKAROUND_6323525)
765.mutex_tryenter_lockstat_patch_point:
766.mutex_tryenter_6323525_patch_point:
767	ret					/* nop space for lfence */
768	nop
769	nop
770.mutex_tryenter_lockstat_6323525_patch_point:	/* new patch point if lfence */
771	nop
772#else	/* OPTERON_WORKAROUND_6323525 */
773.mutex_tryenter_lockstat_patch_point:
774	ret
775#endif	/* OPTERON_WORKAROUND_6323525 */
776	movl	$LS_MUTEX_ENTER_ACQUIRE, %eax
777	jmp	lockstat_wrapper
778	SET_SIZE(mutex_tryenter)
779
780	ENTRY(mutex_adaptive_tryenter)
781	movl	%gs:CPU_THREAD, %edx		/* edx = thread ptr */
782	movl	4(%esp), %ecx			/* ecx = lock ptr */
783	xorl	%eax, %eax			/* eax = 0 (unheld adaptive) */
784	lock
785	cmpxchgl %edx, (%ecx)
786	jnz	0f
787	movl	%ecx, %eax
788#if defined(OPTERON_WORKAROUND_6323525)
789.mutex_atryenter_6323525_patch_point:
790	ret					/* nop space for lfence */
791	nop
792	nop
793	nop
794#else	/* OPTERON_WORKAROUND_6323525 */
795	ret
796#endif	/* OPTERON_WORKAROUND_6323525 */
7970:
798	xorl	%eax, %eax
799	ret
800	SET_SIZE(mutex_adaptive_tryenter)
801
802	.globl	mutex_exit_critical_size
803	.globl	mutex_exit_critical_start
804
805	ENTRY(mutex_exit)
806mutex_exit_critical_start:		/* If interrupted, restart here */
807	movl	%gs:CPU_THREAD, %edx
808	movl	4(%esp), %ecx
809	cmpl	%edx, (%ecx)
810	jne	mutex_vector_exit		/* wrong type or wrong owner */
811	movl	$0, (%ecx)			/* clear owner AND lock */
812.mutex_exit_critical_end:
813.mutex_exit_lockstat_patch_point:
814	ret
815	movl	$LS_MUTEX_EXIT_RELEASE, %eax
816	jmp	lockstat_wrapper
817	SET_SIZE(mutex_exit)
818
819	.globl	mutex_exit_critical_size
820	.type	mutex_exit_critical_size, @object
821	.align	CPTRSIZE
822mutex_exit_critical_size:
823	.long	.mutex_exit_critical_end - mutex_exit_critical_start
824	SET_SIZE(mutex_exit_critical_size)
825
826#endif	/* !__amd64 */
827
828#endif	/* __lint */
829
830/*
831 * rw_enter() and rw_exit().
832 *
833 * These routines handle the simple cases of rw_enter (write-locking an unheld
834 * lock or read-locking a lock that's neither write-locked nor write-wanted)
835 * and rw_exit (no waiters or not the last reader).  If anything complicated
836 * is going on we punt to rw_enter_sleep() and rw_exit_wakeup(), respectively.
837 */
838#if defined(lint) || defined(__lint)
839
840/* ARGSUSED */
841void
842rw_enter(krwlock_t *lp, krw_t rw)
843{}
844
845/* ARGSUSED */
846void
847rw_exit(krwlock_t *lp)
848{}
849
850#else	/* __lint */
851
852#if defined(__amd64)
853
854	ENTRY(rw_enter)
855	movq	%gs:CPU_THREAD, %rdx		/* rdx = thread ptr */
856	cmpl	$RW_WRITER, %esi
857	je	.rw_write_enter
858	incl	T_KPRI_REQ(%rdx)		/* THREAD_KPRI_REQUEST() */
859	movq	(%rdi), %rax			/* rax = old rw_wwwh value */
860	testl	$RW_WRITE_LOCKED|RW_WRITE_WANTED, %eax
861	jnz	rw_enter_sleep
862	leaq	RW_READ_LOCK(%rax), %rdx	/* rdx = new rw_wwwh value */
863	lock
864	cmpxchgq %rdx, (%rdi)			/* try to grab read lock */
865	jnz	rw_enter_sleep
866.rw_read_enter_lockstat_patch_point:
867	ret
868	movq	%gs:CPU_THREAD, %rcx		/* rcx = thread ptr */
869	movq	%rdi, %rsi			/* rsi = lock ptr */
870	movl	$LS_RW_ENTER_ACQUIRE, %edi
871	movl	$RW_READER, %edx
872	jmp	lockstat_wrapper_arg
873.rw_write_enter:
874	orq	$RW_WRITE_LOCKED, %rdx		/* rdx = write-locked value */
875	xorl	%eax, %eax			/* rax = unheld value */
876	lock
877	cmpxchgq %rdx, (%rdi)			/* try to grab write lock */
878	jnz	rw_enter_sleep
879
880#if defined(OPTERON_WORKAROUND_6323525)
881.rw_write_enter_lockstat_patch_point:
882.rw_write_enter_6323525_patch_point:
883	ret
884	nop
885	nop
886.rw_write_enter_lockstat_6323525_patch_point:
887	nop
888#else	/* OPTERON_WORKAROUND_6323525 */
889.rw_write_enter_lockstat_patch_point:
890	ret
891#endif	/* OPTERON_WORKAROUND_6323525 */
892
893	movq	%gs:CPU_THREAD, %rcx		/* rcx = thread ptr */
894	movq	%rdi, %rsi			/* rsi = lock ptr */
895	movl	$LS_RW_ENTER_ACQUIRE, %edi
896	movl	$RW_WRITER, %edx
897	jmp	lockstat_wrapper_arg
898	SET_SIZE(rw_enter)
899
900	ENTRY(rw_exit)
901	movq	(%rdi), %rax			/* rax = old rw_wwwh value */
902	cmpl	$RW_READ_LOCK, %eax		/* single-reader, no waiters? */
903	jne	.rw_not_single_reader
904	xorl	%edx, %edx			/* rdx = new value (unheld) */
905.rw_read_exit:
906	lock
907	cmpxchgq %rdx, (%rdi)			/* try to drop read lock */
908	jnz	rw_exit_wakeup
909	movq	%gs:CPU_THREAD, %rcx		/* rcx = thread ptr */
910	decl	T_KPRI_REQ(%rcx)		/* THREAD_KPRI_RELEASE() */
911.rw_read_exit_lockstat_patch_point:
912	ret
913	movq	%rdi, %rsi			/* rsi = lock ptr */
914	movl	$LS_RW_EXIT_RELEASE, %edi
915	movl	$RW_READER, %edx
916	jmp	lockstat_wrapper_arg
917.rw_not_single_reader:
918	testl	$RW_WRITE_LOCKED, %eax	/* write-locked or write-wanted? */
919	jnz	.rw_write_exit
920	leaq	-RW_READ_LOCK(%rax), %rdx	/* rdx = new value */
921	cmpl	$RW_READ_LOCK, %edx
922	jge	.rw_read_exit		/* not last reader, safe to drop */
923	jmp	rw_exit_wakeup			/* last reader with waiters */
924.rw_write_exit:
925	movq	%gs:CPU_THREAD, %rax		/* rax = thread ptr */
926	xorl	%edx, %edx			/* rdx = new value (unheld) */
927	orq	$RW_WRITE_LOCKED, %rax		/* eax = write-locked value */
928	lock
929	cmpxchgq %rdx, (%rdi)			/* try to drop read lock */
930	jnz	rw_exit_wakeup
931.rw_write_exit_lockstat_patch_point:
932	ret
933	movq	%gs:CPU_THREAD, %rcx		/* rcx = thread ptr */
934	movq	%rdi, %rsi			/* rsi - lock ptr */
935	movl	$LS_RW_EXIT_RELEASE, %edi
936	movl	$RW_WRITER, %edx
937	jmp	lockstat_wrapper_arg
938	SET_SIZE(rw_exit)
939
940#else
941
942	ENTRY(rw_enter)
943	movl	%gs:CPU_THREAD, %edx		/* edx = thread ptr */
944	movl	4(%esp), %ecx			/* ecx = lock ptr */
945	cmpl	$RW_WRITER, 8(%esp)
946	je	.rw_write_enter
947	incl	T_KPRI_REQ(%edx)		/* THREAD_KPRI_REQUEST() */
948	movl	(%ecx), %eax			/* eax = old rw_wwwh value */
949	testl	$RW_WRITE_LOCKED|RW_WRITE_WANTED, %eax
950	jnz	rw_enter_sleep
951	leal	RW_READ_LOCK(%eax), %edx	/* edx = new rw_wwwh value */
952	lock
953	cmpxchgl %edx, (%ecx)			/* try to grab read lock */
954	jnz	rw_enter_sleep
955.rw_read_enter_lockstat_patch_point:
956	ret
957	movl	%gs:CPU_THREAD, %edx		/* edx = thread ptr */
958	movl	$LS_RW_ENTER_ACQUIRE, %eax
959	pushl	$RW_READER
960	jmp	lockstat_wrapper_arg
961.rw_write_enter:
962	orl	$RW_WRITE_LOCKED, %edx		/* edx = write-locked value */
963	xorl	%eax, %eax			/* eax = unheld value */
964	lock
965	cmpxchgl %edx, (%ecx)			/* try to grab write lock */
966	jnz	rw_enter_sleep
967
968#if defined(OPTERON_WORKAROUND_6323525)
969.rw_write_enter_lockstat_patch_point:
970.rw_write_enter_6323525_patch_point:
971	ret
972	nop
973	nop
974.rw_write_enter_lockstat_6323525_patch_point:
975	nop
976#else	/* OPTERON_WORKAROUND_6323525 */
977.rw_write_enter_lockstat_patch_point:
978	ret
979#endif	/* OPTERON_WORKAROUND_6323525 */
980
981	movl	%gs:CPU_THREAD, %edx		/* edx = thread ptr */
982	movl	$LS_RW_ENTER_ACQUIRE, %eax
983	pushl	$RW_WRITER
984	jmp	lockstat_wrapper_arg
985	SET_SIZE(rw_enter)
986
987	ENTRY(rw_exit)
988	movl	4(%esp), %ecx			/* ecx = lock ptr */
989	movl	(%ecx), %eax			/* eax = old rw_wwwh value */
990	cmpl	$RW_READ_LOCK, %eax		/* single-reader, no waiters? */
991	jne	.rw_not_single_reader
992	xorl	%edx, %edx			/* edx = new value (unheld) */
993.rw_read_exit:
994	lock
995	cmpxchgl %edx, (%ecx)			/* try to drop read lock */
996	jnz	rw_exit_wakeup
997	movl	%gs:CPU_THREAD, %edx		/* edx = thread ptr */
998	decl	T_KPRI_REQ(%edx)		/* THREAD_KPRI_RELEASE() */
999.rw_read_exit_lockstat_patch_point:
1000	ret
1001	movl	$LS_RW_EXIT_RELEASE, %eax
1002	pushl	$RW_READER
1003	jmp	lockstat_wrapper_arg
1004.rw_not_single_reader:
1005	testl	$RW_WRITE_LOCKED, %eax	/* write-locked or write-wanted? */
1006	jnz	.rw_write_exit
1007	leal	-RW_READ_LOCK(%eax), %edx	/* edx = new value */
1008	cmpl	$RW_READ_LOCK, %edx
1009	jge	.rw_read_exit		/* not last reader, safe to drop */
1010	jmp	rw_exit_wakeup			/* last reader with waiters */
1011.rw_write_exit:
1012	movl	%gs:CPU_THREAD, %eax		/* eax = thread ptr */
1013	xorl	%edx, %edx			/* edx = new value (unheld) */
1014	orl	$RW_WRITE_LOCKED, %eax		/* eax = write-locked value */
1015	lock
1016	cmpxchgl %edx, (%ecx)			/* try to drop read lock */
1017	jnz	rw_exit_wakeup
1018.rw_write_exit_lockstat_patch_point:
1019	ret
1020	movl	%gs:CPU_THREAD, %edx		/* edx = thread ptr */
1021	movl	$LS_RW_EXIT_RELEASE, %eax
1022	pushl	$RW_WRITER
1023	jmp	lockstat_wrapper_arg
1024	SET_SIZE(rw_exit)
1025
1026#endif	/* !__amd64 */
1027
1028#endif	/* __lint */
1029
1030#if defined(OPTERON_WORKAROUND_6323525)
1031#if defined(lint) || defined(__lint)
1032
1033int	workaround_6323525_patched;
1034
1035void
1036patch_workaround_6323525(void)
1037{}
1038
1039#else	/* lint */
1040
1041/*
1042 * If it is necessary to patch the lock enter routines with the lfence
1043 * workaround, workaround_6323525_patched is set to a non-zero value so that
1044 * the lockstat_hat_patch routine can patch to the new location of the 'ret'
1045 * instruction.
1046 */
1047	DGDEF3(workaround_6323525_patched, 4, 4)
1048	.long	0
1049
1050#if defined(__amd64)
1051
1052#define HOT_MUTEX_PATCH(srcaddr, dstaddr, size)	\
1053	movq	$size, %rbx;			\
1054	movq	$dstaddr, %r13;			\
1055	addq	%rbx, %r13;			\
1056	movq	$srcaddr, %r12;			\
1057	addq	%rbx, %r12;			\
10580:						\
1059	decq	%r13;				\
1060	decq	%r12;				\
1061	movzbl	(%r12), %esi;			\
1062	movq	$1, %rdx;			\
1063	movq	%r13, %rdi;			\
1064	call	hot_patch_kernel_text;		\
1065	decq	%rbx;				\
1066	testq	%rbx, %rbx;			\
1067	jg	0b;
1068
1069/*
1070 * patch_workaround_6323525: provide workaround for 6323525
1071 *
1072 * The workaround is to place a fencing instruction (lfence) between the
1073 * mutex operation and the subsequent read-modify-write instruction.
1074 *
1075 * This routine hot patches the lfence instruction on top of the space
1076 * reserved by nops in the lock enter routines.
1077 */
1078	ENTRY_NP(patch_workaround_6323525)
1079	pushq	%rbp
1080	movq	%rsp, %rbp
1081	pushq	%r12
1082	pushq	%r13
1083	pushq	%rbx
1084
1085	/*
1086	 * lockstat_hot_patch() to use the alternate lockstat workaround
1087	 * 6323525 patch points (points past the lfence instruction to the
1088	 * new ret) when workaround_6323525_patched is set.
1089	 */
1090	movl	$1, workaround_6323525_patched
1091
1092	/*
1093	 * patch ret/nop/nop/nop to lfence/ret at the end of the lock enter
1094	 * routines. The 4 bytes are patched in reverse order so that the
1095	 * the existing ret is overwritten last. This provides lock enter
1096	 * sanity during the intermediate patching stages.
1097	 */
1098	HOT_MUTEX_PATCH(_lfence_insn, .mutex_enter_6323525_patch_point, 4)
1099	HOT_MUTEX_PATCH(_lfence_insn, .mutex_tryenter_6323525_patch_point, 4)
1100	HOT_MUTEX_PATCH(_lfence_insn, .mutex_atryenter_6323525_patch_point, 4)
1101	HOT_MUTEX_PATCH(_lfence_insn, .rw_write_enter_6323525_patch_point, 4)
1102
1103	popq	%rbx
1104	popq	%r13
1105	popq	%r12
1106	movq	%rbp, %rsp
1107	popq	%rbp
1108	ret
1109_lfence_insn:
1110	lfence
1111	ret
1112	SET_SIZE(patch_workaround_6323525)
1113
1114
1115#else	/* __amd64 */
1116
1117#define HOT_MUTEX_PATCH(srcaddr, dstaddr, size)	\
1118	movl	$size, %ebx;			\
1119	movl	$srcaddr, %esi;			\
1120	addl	%ebx, %esi;			\
1121	movl	$dstaddr, %edi;			\
1122	addl	%ebx, %edi;			\
11230:      					\
1124	decl	%esi;				\
1125	decl	%edi;				\
1126	pushl	$1;				\
1127	movzbl	(%esi), %eax;			\
1128	pushl	%eax;				\
1129	pushl	%edi;				\
1130	call	hot_patch_kernel_text;		\
1131	addl	$12, %esp;			\
1132	decl	%ebx;				\
1133	testl	%ebx, %ebx;			\
1134	jg	0b;
1135
1136
1137	/* see comments above */
1138	ENTRY_NP(patch_workaround_6323525)
1139	pushl	%ebp
1140	movl	%esp, %ebp
1141	pushl	%ebx
1142	pushl	%esi
1143	pushl	%edi
1144
1145	movl	$1, workaround_6323525_patched
1146
1147	HOT_MUTEX_PATCH(_lfence_insn, .mutex_enter_6323525_patch_point, 4)
1148	HOT_MUTEX_PATCH(_lfence_insn, .mutex_tryenter_6323525_patch_point, 4)
1149	HOT_MUTEX_PATCH(_lfence_insn, .mutex_atryenter_6323525_patch_point, 4)
1150	HOT_MUTEX_PATCH(_lfence_insn, .rw_write_enter_6323525_patch_point, 4)
1151
1152	popl	%edi
1153	popl	%esi
1154	popl	%ebx
1155	movl	%ebp, %esp
1156	popl	%ebp
1157	ret
1158_lfence_insn:
1159	.byte	0xf, 0xae, 0xe8		/ [lfence instruction]
1160	ret
1161	SET_SIZE(patch_workaround_6323525)
1162
1163#endif	/* !__amd64 */
1164#endif	/* !lint */
1165#endif	/* OPTERON_WORKAROUND_6323525 */
1166
1167
1168#if defined(lint) || defined(__lint)
1169
1170void
1171lockstat_hot_patch(void)
1172{}
1173
1174#else
1175
1176#if defined(__amd64)
1177
1178#define	HOT_PATCH(addr, event, active_instr, normal_instr, len)	\
1179	movq	$normal_instr, %rsi;		\
1180	movq	$active_instr, %rdi;		\
1181	leaq	lockstat_probemap(%rip), %rax;	\
1182	movl 	_MUL(event, DTRACE_IDSIZE)(%rax), %eax;	\
1183	testl	%eax, %eax;			\
1184	jz	9f;				\
1185	movq	%rdi, %rsi;			\
11869:						\
1187	movq	$len, %rdx;			\
1188	movq	$addr, %rdi;			\
1189	call	hot_patch_kernel_text
1190
1191#else
1192
1193#define	HOT_PATCH(addr, event, active_instr, normal_instr, len)	\
1194	movl	$normal_instr, %ecx;		\
1195	movl	$active_instr, %edx;		\
1196	movl	$lockstat_probemap, %eax;	\
1197	movl	_MUL(event, DTRACE_IDSIZE)(%eax), %eax;	\
1198	testl	%eax, %eax;			\
1199	jz	. + 4;				\
1200	movl	%edx, %ecx;			\
1201	pushl	$len;				\
1202	pushl	%ecx;				\
1203	pushl	$addr;				\
1204	call	hot_patch_kernel_text;		\
1205	addl	$12, %esp;
1206
1207#endif	/* !__amd64 */
1208
1209	ENTRY(lockstat_hot_patch)
1210#if defined(__amd64)
1211	pushq	%rbp			/* align stack properly */
1212	movq	%rsp, %rbp
1213#endif	/* __amd64 */
1214
1215#if defined(OPTERON_WORKAROUND_6323525)
1216	cmpl	$0, workaround_6323525_patched
1217	je	1f
1218	HOT_PATCH(.mutex_enter_lockstat_6323525_patch_point,
1219		LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
1220	HOT_PATCH(.mutex_tryenter_lockstat_6323525_patch_point,
1221		LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
1222	HOT_PATCH(.rw_write_enter_lockstat_6323525_patch_point,
1223		LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
1224	jmp	2f
12251:
1226	HOT_PATCH(.mutex_enter_lockstat_patch_point,
1227		LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
1228	HOT_PATCH(.mutex_tryenter_lockstat_patch_point,
1229		LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
1230	HOT_PATCH(.rw_write_enter_lockstat_patch_point,
1231		LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
12322:
1233#else	/* OPTERON_WORKAROUND_6323525 */
1234	HOT_PATCH(.mutex_enter_lockstat_patch_point,
1235		LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
1236	HOT_PATCH(.mutex_tryenter_lockstat_patch_point,
1237		LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
1238	HOT_PATCH(.rw_write_enter_lockstat_patch_point,
1239		LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
1240#endif	/* !OPTERON_WORKAROUND_6323525 */
1241	HOT_PATCH(.mutex_exit_lockstat_patch_point,
1242		LS_MUTEX_EXIT_RELEASE, NOP_INSTR, RET_INSTR, 1)
1243	HOT_PATCH(.rw_read_enter_lockstat_patch_point,
1244		LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
1245	HOT_PATCH(.rw_write_exit_lockstat_patch_point,
1246		LS_RW_EXIT_RELEASE, NOP_INSTR, RET_INSTR, 1)
1247	HOT_PATCH(.rw_read_exit_lockstat_patch_point,
1248		LS_RW_EXIT_RELEASE, NOP_INSTR, RET_INSTR, 1)
1249	HOT_PATCH(.lock_set_lockstat_patch_point,
1250		LS_LOCK_SET_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
1251	HOT_PATCH(.lock_try_lockstat_patch_point,
1252		LS_LOCK_TRY_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
1253	HOT_PATCH(.lock_clear_lockstat_patch_point,
1254		LS_LOCK_CLEAR_RELEASE, NOP_INSTR, RET_INSTR, 1)
1255	HOT_PATCH(.lock_set_spl_lockstat_patch_point,
1256		LS_LOCK_SET_SPL_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
1257
1258#if defined(__amd64)
1259	HOT_PATCH(LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_POINT,
1260		LS_LOCK_CLEAR_SPLX_RELEASE,
1261		LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_VAL, 0, 1);
1262#else
1263	HOT_PATCH(.lock_clear_splx_lockstat_patch_point,
1264		LS_LOCK_CLEAR_SPLX_RELEASE, NOP_INSTR, RET_INSTR, 1)
1265#endif	/* !__amd64 */
1266
1267#if defined(__amd64)
1268	leave			/* unwind stack */
1269#endif	/* __amd64 */
1270	ret
1271	SET_SIZE(lockstat_hot_patch)
1272
1273#endif	/* __lint */
1274
1275#if defined(lint) || defined(__lint)
1276
1277/* XX64 membar_*() should be inlines */
1278
1279void
1280membar_enter(void)
1281{}
1282
1283void
1284membar_exit(void)
1285{}
1286
1287void
1288membar_producer(void)
1289{}
1290
1291void
1292membar_consumer(void)
1293{}
1294
1295#else	/* __lint */
1296
1297#if defined(__amd64)
1298
1299	ENTRY(membar_enter)
1300	ALTENTRY(membar_exit)
1301	mfence			/* lighter weight than lock; xorq $0,(%rsp) */
1302	ret
1303	SET_SIZE(membar_exit)
1304	SET_SIZE(membar_enter)
1305
1306	ENTRY(membar_producer)
1307	sfence
1308	ret
1309	SET_SIZE(membar_producer)
1310
1311	ENTRY(membar_consumer)
1312	lfence
1313	ret
1314	SET_SIZE(membar_consumer)
1315
1316#else
1317
1318	ENTRY(membar_enter)
1319	ALTENTRY(membar_exit)
1320	lock
1321	xorl	$0, (%esp)
1322	ret
1323	SET_SIZE(membar_exit)
1324	SET_SIZE(membar_enter)
1325
1326/*
1327 * On machines that support sfence and lfence, these
1328 * memory barriers can be more precisely implemented
1329 * without causing the whole world to stop
1330 */
1331	ENTRY(membar_producer)
1332	.globl	_patch_sfence_ret
1333_patch_sfence_ret:			/* c.f. membar #StoreStore */
1334	lock
1335	xorl	$0, (%esp)
1336	ret
1337	SET_SIZE(membar_producer)
1338
1339	ENTRY(membar_consumer)
1340	.globl	_patch_lfence_ret
1341_patch_lfence_ret:			/* c.f. membar #LoadLoad */
1342	lock
1343	xorl	$0, (%esp)
1344	ret
1345	SET_SIZE(membar_consumer)
1346
1347#endif	/* !__amd64 */
1348
1349#endif	/* __lint */
1350
1351/*
1352 * thread_onproc()
1353 * Set thread in onproc state for the specified CPU.
1354 * Also set the thread lock pointer to the CPU's onproc lock.
1355 * Since the new lock isn't held, the store ordering is important.
1356 * If not done in assembler, the compiler could reorder the stores.
1357 */
1358#if defined(lint) || defined(__lint)
1359
1360void
1361thread_onproc(kthread_id_t t, cpu_t *cp)
1362{
1363	t->t_state = TS_ONPROC;
1364	t->t_lockp = &cp->cpu_thread_lock;
1365}
1366
1367#else	/* __lint */
1368
1369#if defined(__amd64)
1370
1371	ENTRY(thread_onproc)
1372	addq	$CPU_THREAD_LOCK, %rsi	/* pointer to disp_lock while running */
1373	movl	$ONPROC_THREAD, T_STATE(%rdi)	/* set state to TS_ONPROC */
1374	movq	%rsi, T_LOCKP(%rdi)	/* store new lock pointer */
1375	ret
1376	SET_SIZE(thread_onproc)
1377
1378#else
1379
1380	ENTRY(thread_onproc)
1381	movl	4(%esp), %eax
1382	movl	8(%esp), %ecx
1383	addl	$CPU_THREAD_LOCK, %ecx	/* pointer to disp_lock while running */
1384	movl	$ONPROC_THREAD, T_STATE(%eax)	/* set state to TS_ONPROC */
1385	movl	%ecx, T_LOCKP(%eax)	/* store new lock pointer */
1386	ret
1387	SET_SIZE(thread_onproc)
1388
1389#endif	/* !__amd64 */
1390
1391#endif	/* __lint */
1392