Go to the documentation of this file. 1 #ifndef _ASM_X86_XOR_64_H
2 #define _ASM_X86_XOR_64_H
39 #define OFFS(x) "16*("#x")"
40 #define PF_OFFS(x) "256+16*("#x")"
41 #define PF0(x) " prefetchnta "PF_OFFS(x)"(%[p1]) ;\n"
42 #define LD(x, y) " movaps "OFFS(x)"(%[p1]), %%xmm"#y" ;\n"
43 #define ST(x, y) " movaps %%xmm"#y", "OFFS(x)"(%[p1]) ;\n"
44 #define PF1(x) " prefetchnta "PF_OFFS(x)"(%[p2]) ;\n"
45 #define PF2(x) " prefetchnta "PF_OFFS(x)"(%[p3]) ;\n"
46 #define PF3(x) " prefetchnta "PF_OFFS(x)"(%[p4]) ;\n"
47 #define PF4(x) " prefetchnta "PF_OFFS(x)"(%[p5]) ;\n"
48 #define PF5(x) " prefetchnta "PF_OFFS(x)"(%[p6]) ;\n"
49 #define XO1(x, y) " xorps "OFFS(x)"(%[p2]), %%xmm"#y" ;\n"
50 #define XO2(x, y) " xorps "OFFS(x)"(%[p3]), %%xmm"#y" ;\n"
51 #define XO3(x, y) " xorps "OFFS(x)"(%[p4]), %%xmm"#y" ;\n"
52 #define XO4(x, y) " xorps "OFFS(x)"(%[p5]), %%xmm"#y" ;\n"
53 #define XO5(x, y) " xorps "OFFS(x)"(%[p6]), %%xmm"#y" ;\n"
57 xor_sse_2(
unsigned long bytes,
unsigned long *
p1,
unsigned long *p2)
59 unsigned int lines = bytes >> 8;
95 " addq %[inc], %[p1] ;\n"
96 " addq %[inc], %[p2] ;\n"
97 " decl %[cnt] ; jnz 1b"
98 : [
p1]
"+r" (
p1), [p2]
"+r" (p2), [
cnt]
"+r" (lines)
106 xor_sse_3(
unsigned long bytes,
unsigned long *
p1,
unsigned long *p2,
109 unsigned int lines = bytes >> 8;
150 " addq %[
inc], %[p1] ;\
n"
151 " addq %[inc], %[p2] ;\
n"
152 " addq %[inc], %[p3] ;\
n"
153 " decl %[
cnt] ; jnz 1
b"
154 : [cnt] "+
r" (lines),
155 [p1] "+
r" (p1), [p2] "+
r" (p2), [p3] "+
r" (p3)
162 xor_sse_4(
unsigned long bytes,
unsigned long *p1,
unsigned long *p2,
163 unsigned long *p3,
unsigned long *p4)
165 unsigned int lines = bytes >> 8;
213 " addq %[inc], %[p1] ;\
n"
214 " addq %[inc], %[p2] ;\
n"
215 " addq %[inc], %[p3] ;\
n"
216 " addq %[inc], %[p4] ;\
n"
217 " decl %[cnt] ; jnz 1
b"
218 : [cnt] "+
c" (lines),
219 [p1] "+
r" (p1), [p2] "+
r" (p2), [p3] "+
r" (p3), [p4] "+
r" (p4)
227 xor_sse_5(
unsigned long bytes,
unsigned long *p1,
unsigned long *p2,
228 unsigned long *p3,
unsigned long *p4,
unsigned long *p5)
230 unsigned int lines = bytes >> 8;
284 " addq %[inc], %[p1] ;\
n"
285 " addq %[inc], %[p2] ;\
n"
286 " addq %[inc], %[p3] ;\
n"
287 " addq %[inc], %[p4] ;\
n"
288 " addq %[inc], %[p5] ;\
n"
289 " decl %[cnt] ; jnz 1
b"
290 : [cnt] "+
c" (lines),
291 [p1] "+
r" (p1), [p2] "+
r" (p2), [p3] "+
r" (p3), [p4] "+
r" (p4),
300 .
name =
"generic_sse",
311 #undef XOR_TRY_TEMPLATES
312 #define XOR_TRY_TEMPLATES \
315 xor_speed(&xor_block_sse); \
321 #define XOR_SELECT_TEMPLATE(FASTEST) \
322 AVX_SELECT(&xor_block_sse)