Go to the documentation of this file. 1 #ifndef _ASM_X86_XOR_AVX_H
2 #define _ASM_X86_XOR_AVX_H
20 #include <linux/compiler.h>
25 BLOCK(32 * (i + 1), 1) \
26 BLOCK(32 * (i + 2), 2) \
27 BLOCK(32 * (i + 3), 3)
35 static void xor_avx_2(
unsigned long bytes,
unsigned long *
p0,
unsigned long *
p1)
37 unsigned long lines = bytes >> 9;
43 #define BLOCK(i, reg) \
45 asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p1[i / sizeof(*p1)])); \
46 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
47 "m" (p0[i / sizeof(*p0)])); \
48 asm volatile("vmovdqa %%ymm" #reg ", %0" : \
49 "=m" (p0[i / sizeof(*p0)])); \
54 p0 = (
unsigned long *)((
uintptr_t)p0 + 512);
55 p1 = (
unsigned long *)((
uintptr_t)p1 + 512);
61 static
void xor_avx_3(
unsigned long bytes,
unsigned long *p0,
unsigned long *p1,
64 unsigned long lines = bytes >> 9;
70 #define BLOCK(i, reg) \
72 asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p2[i / sizeof(*p2)])); \
73 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
74 "m" (p1[i / sizeof(*p1)])); \
75 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
76 "m" (p0[i / sizeof(*p0)])); \
77 asm volatile("vmovdqa %%ymm" #reg ", %0" : \
78 "=m" (p0[i / sizeof(*p0)])); \
83 p0 = (
unsigned long *)((
uintptr_t)p0 + 512);
84 p1 = (
unsigned long *)((
uintptr_t)p1 + 512);
85 p2 = (
unsigned long *)((
uintptr_t)p2 + 512);
91 static
void xor_avx_4(
unsigned long bytes,
unsigned long *p0,
unsigned long *p1,
92 unsigned long *p2,
unsigned long *p3)
94 unsigned long lines = bytes >> 9;
100 #define BLOCK(i, reg) \
102 asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p3[i / sizeof(*p3)])); \
103 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
104 "m" (p2[i / sizeof(*p2)])); \
105 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
106 "m" (p1[i / sizeof(*p1)])); \
107 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
108 "m" (p0[i / sizeof(*p0)])); \
109 asm volatile("vmovdqa %%ymm" #reg ", %0" : \
110 "=m" (p0[i / sizeof(*p0)])); \
115 p0 = (
unsigned long *)((
uintptr_t)p0 + 512);
116 p1 = (
unsigned long *)((
uintptr_t)p1 + 512);
117 p2 = (
unsigned long *)((
uintptr_t)p2 + 512);
118 p3 = (
unsigned long *)((
uintptr_t)p3 + 512);
124 static void xor_avx_5(
unsigned long bytes,
unsigned long *p0,
unsigned long *p1,
125 unsigned long *p2,
unsigned long *p3,
unsigned long *p4)
127 unsigned long lines = bytes >> 9;
133 #define BLOCK(i, reg) \
135 asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p4[i / sizeof(*p4)])); \
136 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
137 "m" (p3[i / sizeof(*p3)])); \
138 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
139 "m" (p2[i / sizeof(*p2)])); \
140 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
141 "m" (p1[i / sizeof(*p1)])); \
142 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
143 "m" (p0[i / sizeof(*p0)])); \
144 asm volatile("vmovdqa %%ymm" #reg ", %0" : \
145 "=m" (p0[i / sizeof(*p0)])); \
150 p0 = (
unsigned long *)((
uintptr_t)p0 + 512);
151 p1 = (
unsigned long *)((
uintptr_t)p1 + 512);
152 p2 = (
unsigned long *)((
uintptr_t)p2 + 512);
153 p3 = (
unsigned long *)((
uintptr_t)p3 + 512);
154 p4 = (
unsigned long *)((
uintptr_t)p4 + 512);
168 #define AVX_XOR_SPEED \
171 xor_speed(&xor_block_avx); \
174 #define AVX_SELECT(FASTEST) \
175 (cpu_has_avx ? &xor_block_avx : FASTEST)
179 #define AVX_XOR_SPEED {}
181 #define AVX_SELECT(FASTEST) (FASTEST)