20 #if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
25 static const struct raid6_sse_constants {
28 { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL },
31 static int raid6_have_sse2(
void)
43 static void raid6_sse21_gen_syndrome(
int disks,
size_t bytes,
void **
ptrs)
45 u8 **dptr = (
u8 **)ptrs;
55 asm volatile(
"movdqa %0,%%xmm0" : :
"m" (raid6_sse_constants.x1d[0]));
56 asm volatile(
"pxor %xmm5,%xmm5");
58 for ( d = 0 ; d <
bytes ; d += 16 ) {
59 asm volatile(
"prefetchnta %0" : :
"m" (dptr[z0][
d]));
60 asm volatile(
"movdqa %0,%%xmm2" : :
"m" (dptr[z0][
d]));
61 asm volatile(
"prefetchnta %0" : :
"m" (dptr[z0-1][
d]));
62 asm volatile(
"movdqa %xmm2,%xmm4");
63 asm volatile(
"movdqa %0,%%xmm6" : :
"m" (dptr[z0-1][
d]));
64 for ( z = z0-2 ; z >= 0 ; z-- ) {
65 asm volatile(
"prefetchnta %0" : :
"m" (dptr[z][
d]));
66 asm volatile(
"pcmpgtb %xmm4,%xmm5");
67 asm volatile(
"paddb %xmm4,%xmm4");
68 asm volatile(
"pand %xmm0,%xmm5");
69 asm volatile(
"pxor %xmm5,%xmm4");
70 asm volatile(
"pxor %xmm5,%xmm5");
71 asm volatile(
"pxor %xmm6,%xmm2");
72 asm volatile(
"pxor %xmm6,%xmm4");
73 asm volatile(
"movdqa %0,%%xmm6" : :
"m" (dptr[z][
d]));
75 asm volatile(
"pcmpgtb %xmm4,%xmm5");
76 asm volatile(
"paddb %xmm4,%xmm4");
77 asm volatile(
"pand %xmm0,%xmm5");
78 asm volatile(
"pxor %xmm5,%xmm4");
79 asm volatile(
"pxor %xmm5,%xmm5");
80 asm volatile(
"pxor %xmm6,%xmm2");
81 asm volatile(
"pxor %xmm6,%xmm4");
83 asm volatile(
"movntdq %%xmm2,%0" :
"=m" (p[
d]));
84 asm volatile(
"pxor %xmm2,%xmm2");
85 asm volatile(
"movntdq %%xmm4,%0" :
"=m" (q[
d]));
86 asm volatile(
"pxor %xmm4,%xmm4");
89 asm volatile(
"sfence" : : :
"memory");
94 raid6_sse21_gen_syndrome,
103 static void raid6_sse22_gen_syndrome(
int disks,
size_t bytes,
void **ptrs)
105 u8 **dptr = (
u8 **)ptrs;
115 asm volatile(
"movdqa %0,%%xmm0" : :
"m" (raid6_sse_constants.x1d[0]));
116 asm volatile(
"pxor %xmm5,%xmm5");
117 asm volatile(
"pxor %xmm7,%xmm7");
120 for ( d = 0 ; d <
bytes ; d += 32 ) {
121 asm volatile(
"prefetchnta %0" : :
"m" (dptr[z0][
d]));
122 asm volatile(
"movdqa %0,%%xmm2" : :
"m" (dptr[z0][
d]));
123 asm volatile(
"movdqa %0,%%xmm3" : :
"m" (dptr[z0][d+16]));
124 asm volatile(
"movdqa %xmm2,%xmm4");
125 asm volatile(
"movdqa %xmm3,%xmm6");
126 for ( z = z0-1 ; z >= 0 ; z-- ) {
127 asm volatile(
"prefetchnta %0" : :
"m" (dptr[z][
d]));
128 asm volatile(
"pcmpgtb %xmm4,%xmm5");
129 asm volatile(
"pcmpgtb %xmm6,%xmm7");
130 asm volatile(
"paddb %xmm4,%xmm4");
131 asm volatile(
"paddb %xmm6,%xmm6");
132 asm volatile(
"pand %xmm0,%xmm5");
133 asm volatile(
"pand %xmm0,%xmm7");
134 asm volatile(
"pxor %xmm5,%xmm4");
135 asm volatile(
"pxor %xmm7,%xmm6");
136 asm volatile(
"movdqa %0,%%xmm5" : :
"m" (dptr[z][
d]));
137 asm volatile(
"movdqa %0,%%xmm7" : :
"m" (dptr[z][d+16]));
138 asm volatile(
"pxor %xmm5,%xmm2");
139 asm volatile(
"pxor %xmm7,%xmm3");
140 asm volatile(
"pxor %xmm5,%xmm4");
141 asm volatile(
"pxor %xmm7,%xmm6");
142 asm volatile(
"pxor %xmm5,%xmm5");
143 asm volatile(
"pxor %xmm7,%xmm7");
145 asm volatile(
"movntdq %%xmm2,%0" :
"=m" (p[
d]));
146 asm volatile(
"movntdq %%xmm3,%0" :
"=m" (p[d+16]));
147 asm volatile(
"movntdq %%xmm4,%0" :
"=m" (q[
d]));
148 asm volatile(
"movntdq %%xmm6,%0" :
"=m" (q[d+16]));
151 asm volatile(
"sfence" : : :
"memory");
156 raid6_sse22_gen_syndrome,
164 #if defined(__x86_64__) && !defined(__arch_um__)
169 static void raid6_sse24_gen_syndrome(
int disks,
size_t bytes,
void **ptrs)
171 u8 **dptr = (
u8 **)ptrs;
181 asm volatile(
"movdqa %0,%%xmm0" ::
"m" (raid6_sse_constants.x1d[0]));
182 asm volatile(
"pxor %xmm2,%xmm2");
183 asm volatile(
"pxor %xmm3,%xmm3");
184 asm volatile(
"pxor %xmm4,%xmm4");
185 asm volatile(
"pxor %xmm5,%xmm5");
186 asm volatile(
"pxor %xmm6,%xmm6");
187 asm volatile(
"pxor %xmm7,%xmm7");
188 asm volatile(
"pxor %xmm10,%xmm10");
189 asm volatile(
"pxor %xmm11,%xmm11");
190 asm volatile(
"pxor %xmm12,%xmm12");
191 asm volatile(
"pxor %xmm13,%xmm13");
192 asm volatile(
"pxor %xmm14,%xmm14");
193 asm volatile(
"pxor %xmm15,%xmm15");
195 for ( d = 0 ; d <
bytes ; d += 64 ) {
196 for ( z = z0 ; z >= 0 ; z-- ) {
198 asm volatile(
"prefetchnta %0" ::
"m" (dptr[z][
d]));
199 asm volatile(
"prefetchnta %0" ::
"m" (dptr[z][d+32]));
200 asm volatile(
"pcmpgtb %xmm4,%xmm5");
201 asm volatile(
"pcmpgtb %xmm6,%xmm7");
202 asm volatile(
"pcmpgtb %xmm12,%xmm13");
203 asm volatile(
"pcmpgtb %xmm14,%xmm15");
204 asm volatile(
"paddb %xmm4,%xmm4");
205 asm volatile(
"paddb %xmm6,%xmm6");
206 asm volatile(
"paddb %xmm12,%xmm12");
207 asm volatile(
"paddb %xmm14,%xmm14");
208 asm volatile(
"pand %xmm0,%xmm5");
209 asm volatile(
"pand %xmm0,%xmm7");
210 asm volatile(
"pand %xmm0,%xmm13");
211 asm volatile(
"pand %xmm0,%xmm15");
212 asm volatile(
"pxor %xmm5,%xmm4");
213 asm volatile(
"pxor %xmm7,%xmm6");
214 asm volatile(
"pxor %xmm13,%xmm12");
215 asm volatile(
"pxor %xmm15,%xmm14");
216 asm volatile(
"movdqa %0,%%xmm5" ::
"m" (dptr[z][
d]));
217 asm volatile(
"movdqa %0,%%xmm7" ::
"m" (dptr[z][d+16]));
218 asm volatile(
"movdqa %0,%%xmm13" ::
"m" (dptr[z][d+32]));
219 asm volatile(
"movdqa %0,%%xmm15" ::
"m" (dptr[z][d+48]));
220 asm volatile(
"pxor %xmm5,%xmm2");
221 asm volatile(
"pxor %xmm7,%xmm3");
222 asm volatile(
"pxor %xmm13,%xmm10");
223 asm volatile(
"pxor %xmm15,%xmm11");
224 asm volatile(
"pxor %xmm5,%xmm4");
225 asm volatile(
"pxor %xmm7,%xmm6");
226 asm volatile(
"pxor %xmm13,%xmm12");
227 asm volatile(
"pxor %xmm15,%xmm14");
228 asm volatile(
"pxor %xmm5,%xmm5");
229 asm volatile(
"pxor %xmm7,%xmm7");
230 asm volatile(
"pxor %xmm13,%xmm13");
231 asm volatile(
"pxor %xmm15,%xmm15");
233 asm volatile(
"movntdq %%xmm2,%0" :
"=m" (p[
d]));
234 asm volatile(
"pxor %xmm2,%xmm2");
235 asm volatile(
"movntdq %%xmm3,%0" :
"=m" (p[d+16]));
236 asm volatile(
"pxor %xmm3,%xmm3");
237 asm volatile(
"movntdq %%xmm10,%0" :
"=m" (p[d+32]));
238 asm volatile(
"pxor %xmm10,%xmm10");
239 asm volatile(
"movntdq %%xmm11,%0" :
"=m" (p[d+48]));
240 asm volatile(
"pxor %xmm11,%xmm11");
241 asm volatile(
"movntdq %%xmm4,%0" :
"=m" (q[
d]));
242 asm volatile(
"pxor %xmm4,%xmm4");
243 asm volatile(
"movntdq %%xmm6,%0" :
"=m" (q[d+16]));
244 asm volatile(
"pxor %xmm6,%xmm6");
245 asm volatile(
"movntdq %%xmm12,%0" :
"=m" (q[d+32]));
246 asm volatile(
"pxor %xmm12,%xmm12");
247 asm volatile(
"movntdq %%xmm14,%0" :
"=m" (q[d+48]));
248 asm volatile(
"pxor %xmm14,%xmm14");
251 asm volatile(
"sfence" : : :
"memory");
256 raid6_sse24_gen_syndrome,