|
#define | LD(x, y) " movq 8*("#x")(%1), %%mm"#y" ;\n" |
|
#define | ST(x, y) " movq %%mm"#y", 8*("#x")(%1) ;\n" |
|
#define | XO1(x, y) " pxor 8*("#x")(%2), %%mm"#y" ;\n" |
|
#define | XO2(x, y) " pxor 8*("#x")(%3), %%mm"#y" ;\n" |
|
#define | XO3(x, y) " pxor 8*("#x")(%4), %%mm"#y" ;\n" |
|
#define | XO4(x, y) " pxor 8*("#x")(%5), %%mm"#y" ;\n" |
|
#define | BLOCK(i) |
|
#define | BLOCK(i) |
|
#define | BLOCK(i) |
|
#define | BLOCK(i) |
|
#define | OFFS(x) "16*("#x")" |
|
#define | PF_OFFS(x) "256+16*("#x")" |
|
#define | PF0(x) " prefetchnta "PF_OFFS(x)"(%1) ;\n" |
|
#define | LD(x, y) " movaps "OFFS(x)"(%1), %%xmm"#y" ;\n" |
|
#define | ST(x, y) " movaps %%xmm"#y", "OFFS(x)"(%1) ;\n" |
|
#define | PF1(x) " prefetchnta "PF_OFFS(x)"(%2) ;\n" |
|
#define | PF2(x) " prefetchnta "PF_OFFS(x)"(%3) ;\n" |
|
#define | PF3(x) " prefetchnta "PF_OFFS(x)"(%4) ;\n" |
|
#define | PF4(x) " prefetchnta "PF_OFFS(x)"(%5) ;\n" |
|
#define | PF5(x) " prefetchnta "PF_OFFS(x)"(%6) ;\n" |
|
#define | XO1(x, y) " xorps "OFFS(x)"(%2), %%xmm"#y" ;\n" |
|
#define | XO2(x, y) " xorps "OFFS(x)"(%3), %%xmm"#y" ;\n" |
|
#define | XO3(x, y) " xorps "OFFS(x)"(%4), %%xmm"#y" ;\n" |
|
#define | XO4(x, y) " xorps "OFFS(x)"(%5), %%xmm"#y" ;\n" |
|
#define | XO5(x, y) " xorps "OFFS(x)"(%6), %%xmm"#y" ;\n" |
|
#define | BLOCK(i) |
|
#define | BLOCK(i) |
|
#define | BLOCK(i) |
|
#define | BLOCK(i) |
|
#define | XOR_TRY_TEMPLATES |
|
#define | XOR_SELECT_TEMPLATE(FASTEST) AVX_SELECT(cpu_has_xmm ? &xor_block_pIII_sse : FASTEST) |
|