xref: /linux/rust/zerocopy/benches/extend_vec_zeroed.x86-64.mca (revision b079329b8691768962aa514b8f8c9077ca352459)
1*c3739801SMiguel OjedaIterations:        100
2*c3739801SMiguel OjedaInstructions:      5400
3*c3739801SMiguel OjedaTotal Cycles:      6595
4*c3739801SMiguel OjedaTotal uOps:        6800
5*c3739801SMiguel Ojeda
6*c3739801SMiguel OjedaDispatch Width:    4
7*c3739801SMiguel OjedauOps Per Cycle:    1.03
8*c3739801SMiguel OjedaIPC:               0.82
9*c3739801SMiguel OjedaBlock RThroughput: 17.0
10*c3739801SMiguel Ojeda
11*c3739801SMiguel Ojeda
12*c3739801SMiguel OjedaInstruction Info:
13*c3739801SMiguel Ojeda[1]: #uOps
14*c3739801SMiguel Ojeda[2]: Latency
15*c3739801SMiguel Ojeda[3]: RThroughput
16*c3739801SMiguel Ojeda[4]: MayLoad
17*c3739801SMiguel Ojeda[5]: MayStore
18*c3739801SMiguel Ojeda[6]: HasSideEffects (U)
19*c3739801SMiguel Ojeda
20*c3739801SMiguel Ojeda[1]    [2]    [3]    [4]    [5]    [6]    Instructions:
21*c3739801SMiguel Ojeda 2      5     1.00           *            push	r15
22*c3739801SMiguel Ojeda 2      5     1.00           *            push	r14
23*c3739801SMiguel Ojeda 2      5     1.00           *            push	r13
24*c3739801SMiguel Ojeda 2      5     1.00           *            push	r12
25*c3739801SMiguel Ojeda 2      5     1.00           *            push	rbx
26*c3739801SMiguel Ojeda 1      1     0.33                        sub	rsp, 32
27*c3739801SMiguel Ojeda 1      1     0.33                        mov	rbx, rdi
28*c3739801SMiguel Ojeda 1      5     0.50    *                   mov	rax, qword ptr [rdi]
29*c3739801SMiguel Ojeda 1      5     0.50    *                   mov	r12, qword ptr [rdi + 16]
30*c3739801SMiguel Ojeda 1      1     0.33                        mov	rcx, rax
31*c3739801SMiguel Ojeda 1      1     0.33                        sub	rcx, r12
32*c3739801SMiguel Ojeda 1      1     0.33                        cmp	rsi, rcx
33*c3739801SMiguel Ojeda 1      1     1.00                        jbe	.LBB6_3
34*c3739801SMiguel Ojeda 1      1     0.33                        mov	r15, r12
35*c3739801SMiguel Ojeda 1      1     0.33                        add	r15, rsi
36*c3739801SMiguel Ojeda 1      1     1.00                        jae	.LBB6_6
37*c3739801SMiguel Ojeda 1      0     0.25                        xor	eax, eax
38*c3739801SMiguel Ojeda 1      1     1.00                        jmp	.LBB6_5
39*c3739801SMiguel Ojeda 1      5     0.50    *                   mov	rax, qword ptr [rbx + 8]
40*c3739801SMiguel Ojeda 1      1     0.50                        lea	r15, [r12 + rsi]
41*c3739801SMiguel Ojeda 1      1     0.50                        lea	rcx, [r12 + 2*r12]
42*c3739801SMiguel Ojeda 1      1     0.50                        lea	rdi, [rax + 2*rcx]
43*c3739801SMiguel Ojeda 1      1     0.33                        add	rsi, rsi
44*c3739801SMiguel Ojeda 1      1     0.50                        lea	rdx, [rsi + 2*rsi]
45*c3739801SMiguel Ojeda 1      0     0.25                        xor	esi, esi
46*c3739801SMiguel Ojeda 4      7     1.00    *                   call	qword ptr [rip + memset@GOTPCREL]
47*c3739801SMiguel Ojeda 1      1     1.00           *            mov	qword ptr [rbx + 16], r15
48*c3739801SMiguel Ojeda 1      1     0.33                        mov	al, 1
49*c3739801SMiguel Ojeda 1      1     0.33                        add	rsp, 32
50*c3739801SMiguel Ojeda 1      6     0.50    *                   pop	rbx
51*c3739801SMiguel Ojeda 1      6     0.50    *                   pop	r12
52*c3739801SMiguel Ojeda 1      6     0.50    *                   pop	r13
53*c3739801SMiguel Ojeda 1      6     0.50    *                   pop	r14
54*c3739801SMiguel Ojeda 1      6     0.50    *                   pop	r15
55*c3739801SMiguel Ojeda 1      1     1.00                  U     ret
56*c3739801SMiguel Ojeda 1      1     0.33                        mov	r13, rsi
57*c3739801SMiguel Ojeda 1      1     0.50                        lea	rcx, [rax + rax]
58*c3739801SMiguel Ojeda 1      1     0.33                        cmp	r15, rcx
59*c3739801SMiguel Ojeda 3      3     1.00                        cmova	rcx, r15
60*c3739801SMiguel Ojeda 1      1     0.33                        cmp	rcx, 5
61*c3739801SMiguel Ojeda 1      1     0.33                        mov	r14d, 4
62*c3739801SMiguel Ojeda 2      2     0.67                        cmovae	r14, rcx
63*c3739801SMiguel Ojeda 1      5     0.50    *                   mov	rdx, qword ptr [rbx + 8]
64*c3739801SMiguel Ojeda 1      1     0.50                        lea	rdi, [rsp + 8]
65*c3739801SMiguel Ojeda 1      1     0.33                        mov	rsi, rax
66*c3739801SMiguel Ojeda 1      1     0.33                        mov	rcx, r14
67*c3739801SMiguel Ojeda 3      5     1.00                        call	<alloc::raw_vec::RawVecInner>::finish_grow
68*c3739801SMiguel Ojeda 2      6     0.50    *                   cmp	dword ptr [rsp + 8], 1
69*c3739801SMiguel Ojeda 1      1     1.00                        je	.LBB6_2
70*c3739801SMiguel Ojeda 1      5     0.50    *                   mov	rax, qword ptr [rsp + 16]
71*c3739801SMiguel Ojeda 1      1     1.00           *            mov	qword ptr [rbx + 8], rax
72*c3739801SMiguel Ojeda 1      1     1.00           *            mov	qword ptr [rbx], r14
73*c3739801SMiguel Ojeda 1      1     0.33                        mov	rsi, r13
74*c3739801SMiguel Ojeda 1      1     1.00                        jmp	.LBB6_4
75*c3739801SMiguel Ojeda
76*c3739801SMiguel Ojeda
77*c3739801SMiguel OjedaResources:
78*c3739801SMiguel Ojeda[0]   - SBDivider
79*c3739801SMiguel Ojeda[1]   - SBFPDivider
80*c3739801SMiguel Ojeda[2]   - SBPort0
81*c3739801SMiguel Ojeda[3]   - SBPort1
82*c3739801SMiguel Ojeda[4]   - SBPort4
83*c3739801SMiguel Ojeda[5]   - SBPort5
84*c3739801SMiguel Ojeda[6.0] - SBPort23
85*c3739801SMiguel Ojeda[6.1] - SBPort23
86*c3739801SMiguel Ojeda
87*c3739801SMiguel Ojeda
88*c3739801SMiguel OjedaResource pressure per iteration:
89*c3739801SMiguel Ojeda[0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]
90*c3739801SMiguel Ojeda -      -     12.00  12.00  10.00  13.00  11.00  11.00
91*c3739801SMiguel Ojeda
92*c3739801SMiguel OjedaResource pressure by instruction:
93*c3739801SMiguel Ojeda[0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]  Instructions:
94*c3739801SMiguel Ojeda -      -      -      -     1.00    -     0.49   0.51   push	r15
95*c3739801SMiguel Ojeda -      -      -      -     1.00    -     0.51   0.49   push	r14
96*c3739801SMiguel Ojeda -      -      -      -     1.00    -     0.50   0.50   push	r13
97*c3739801SMiguel Ojeda -      -      -      -     1.00    -     0.50   0.50   push	r12
98*c3739801SMiguel Ojeda -      -      -      -     1.00    -     0.50   0.50   push	rbx
99*c3739801SMiguel Ojeda -      -     0.01   0.99    -      -      -      -     sub	rsp, 32
100*c3739801SMiguel Ojeda -      -      -      -      -     1.00    -      -     mov	rbx, rdi
101*c3739801SMiguel Ojeda -      -      -      -      -      -     0.50   0.50   mov	rax, qword ptr [rdi]
102*c3739801SMiguel Ojeda -      -      -      -      -      -     0.50   0.50   mov	r12, qword ptr [rdi + 16]
103*c3739801SMiguel Ojeda -      -      -     1.00    -      -      -      -     mov	rcx, rax
104*c3739801SMiguel Ojeda -      -      -     0.99    -     0.01    -      -     sub	rcx, r12
105*c3739801SMiguel Ojeda -      -      -      -      -     1.00    -      -     cmp	rsi, rcx
106*c3739801SMiguel Ojeda -      -      -      -      -     1.00    -      -     jbe	.LBB6_3
107*c3739801SMiguel Ojeda -      -     0.01   0.98    -     0.01    -      -     mov	r15, r12
108*c3739801SMiguel Ojeda -      -     0.99   0.01    -      -      -      -     add	r15, rsi
109*c3739801SMiguel Ojeda -      -      -      -      -     1.00    -      -     jae	.LBB6_6
110*c3739801SMiguel Ojeda -      -      -      -      -      -      -      -     xor	eax, eax
111*c3739801SMiguel Ojeda -      -      -      -      -     1.00    -      -     jmp	.LBB6_5
112*c3739801SMiguel Ojeda -      -      -      -      -      -     0.50   0.50   mov	rax, qword ptr [rbx + 8]
113*c3739801SMiguel Ojeda -      -     1.00    -      -      -      -      -     lea	r15, [r12 + rsi]
114*c3739801SMiguel Ojeda -      -     0.98   0.02    -      -      -      -     lea	rcx, [r12 + 2*r12]
115*c3739801SMiguel Ojeda -      -     0.99   0.01    -      -      -      -     lea	rdi, [rax + 2*rcx]
116*c3739801SMiguel Ojeda -      -      -     1.00    -      -      -      -     add	rsi, rsi
117*c3739801SMiguel Ojeda -      -     0.99   0.01    -      -      -      -     lea	rdx, [rsi + 2*rsi]
118*c3739801SMiguel Ojeda -      -      -      -      -      -      -      -     xor	esi, esi
119*c3739801SMiguel Ojeda -      -      -      -     1.00   1.00   1.00   1.00   call	qword ptr [rip + memset@GOTPCREL]
120*c3739801SMiguel Ojeda -      -      -      -     1.00    -     0.50   0.50   mov	qword ptr [rbx + 16], r15
121*c3739801SMiguel Ojeda -      -     0.01   0.99    -      -      -      -     mov	al, 1
122*c3739801SMiguel Ojeda -      -     1.00    -      -      -      -      -     add	rsp, 32
123*c3739801SMiguel Ojeda -      -      -      -      -      -     0.50   0.50   pop	rbx
124*c3739801SMiguel Ojeda -      -      -      -      -      -     0.50   0.50   pop	r12
125*c3739801SMiguel Ojeda -      -      -      -      -      -     0.50   0.50   pop	r13
126*c3739801SMiguel Ojeda -      -      -      -      -      -     0.50   0.50   pop	r14
127*c3739801SMiguel Ojeda -      -      -      -      -      -     0.50   0.50   pop	r15
128*c3739801SMiguel Ojeda -      -      -      -      -     1.00    -      -     ret
129*c3739801SMiguel Ojeda -      -     1.00    -      -      -      -      -     mov	r13, rsi
130*c3739801SMiguel Ojeda -      -     0.01   0.99    -      -      -      -     lea	rcx, [rax + rax]
131*c3739801SMiguel Ojeda -      -     0.99   0.01    -      -      -      -     cmp	r15, rcx
132*c3739801SMiguel Ojeda -      -     2.00   0.01    -     0.99    -      -     cmova	rcx, r15
133*c3739801SMiguel Ojeda -      -     0.01   0.99    -      -      -      -     cmp	rcx, 5
134*c3739801SMiguel Ojeda -      -     0.01   0.99    -      -      -      -     mov	r14d, 4
135*c3739801SMiguel Ojeda -      -     1.00   0.01    -     0.99    -      -     cmovae	r14, rcx
136*c3739801SMiguel Ojeda -      -      -      -      -      -     0.50   0.50   mov	rdx, qword ptr [rbx + 8]
137*c3739801SMiguel Ojeda -      -     0.01   0.99    -      -      -      -     lea	rdi, [rsp + 8]
138*c3739801SMiguel Ojeda -      -      -     1.00    -      -      -      -     mov	rsi, rax
139*c3739801SMiguel Ojeda -      -      -     0.01    -     0.99    -      -     mov	rcx, r14
140*c3739801SMiguel Ojeda -      -      -      -     1.00   1.00   0.50   0.50   call	<alloc::raw_vec::RawVecInner>::finish_grow
141*c3739801SMiguel Ojeda -      -      -     0.99    -     0.01   0.50   0.50   cmp	dword ptr [rsp + 8], 1
142*c3739801SMiguel Ojeda -      -      -      -      -     1.00    -      -     je	.LBB6_2
143*c3739801SMiguel Ojeda -      -      -      -      -      -     0.50   0.50   mov	rax, qword ptr [rsp + 16]
144*c3739801SMiguel Ojeda -      -      -      -     1.00    -     0.49   0.51   mov	qword ptr [rbx + 8], rax
145*c3739801SMiguel Ojeda -      -      -      -     1.00    -     0.51   0.49   mov	qword ptr [rbx], r14
146*c3739801SMiguel Ojeda -      -     0.99   0.01    -      -      -      -     mov	rsi, r13
147*c3739801SMiguel Ojeda -      -      -      -      -     1.00    -      -     jmp	.LBB6_4
148