Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
recov_ssse3.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2012 Intel Corporation
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; version 2
7  * of the License.
8  */
9 
10 #if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
11 
12 #include <linux/raid/pq.h>
13 #include "x86.h"
14 
15 static int raid6_has_ssse3(void)
16 {
17  return boot_cpu_has(X86_FEATURE_XMM) &&
18  boot_cpu_has(X86_FEATURE_XMM2) &&
19  boot_cpu_has(X86_FEATURE_SSSE3);
20 }
21 
22 static void raid6_2data_recov_ssse3(int disks, size_t bytes, int faila,
23  int failb, void **ptrs)
24 {
25  u8 *p, *q, *dp, *dq;
26  const u8 *pbmul; /* P multiplier table for B data */
27  const u8 *qmul; /* Q multiplier table (for both) */
28  static const u8 __aligned(16) x0f[16] = {
29  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
30  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f};
31 
32  p = (u8 *)ptrs[disks-2];
33  q = (u8 *)ptrs[disks-1];
34 
35  /* Compute syndrome with zero for the missing data pages
36  Use the dead data pages as temporary storage for
37  delta p and delta q */
38  dp = (u8 *)ptrs[faila];
39  ptrs[faila] = (void *)raid6_empty_zero_page;
40  ptrs[disks-2] = dp;
41  dq = (u8 *)ptrs[failb];
42  ptrs[failb] = (void *)raid6_empty_zero_page;
43  ptrs[disks-1] = dq;
44 
45  raid6_call.gen_syndrome(disks, bytes, ptrs);
46 
47  /* Restore pointer table */
48  ptrs[faila] = dp;
49  ptrs[failb] = dq;
50  ptrs[disks-2] = p;
51  ptrs[disks-1] = q;
52 
53  /* Now, pick the proper data tables */
54  pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]];
55  qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
56  raid6_gfexp[failb]]];
57 
58  kernel_fpu_begin();
59 
60  asm volatile("movdqa %0,%%xmm7" : : "m" (x0f[0]));
61 
62 #ifdef CONFIG_X86_64
63  asm volatile("movdqa %0,%%xmm6" : : "m" (qmul[0]));
64  asm volatile("movdqa %0,%%xmm14" : : "m" (pbmul[0]));
65  asm volatile("movdqa %0,%%xmm15" : : "m" (pbmul[16]));
66 #endif
67 
68  /* Now do it... */
69  while (bytes) {
70 #ifdef CONFIG_X86_64
71  /* xmm6, xmm14, xmm15 */
72 
73  asm volatile("movdqa %0,%%xmm1" : : "m" (q[0]));
74  asm volatile("movdqa %0,%%xmm9" : : "m" (q[16]));
75  asm volatile("movdqa %0,%%xmm0" : : "m" (p[0]));
76  asm volatile("movdqa %0,%%xmm8" : : "m" (p[16]));
77  asm volatile("pxor %0,%%xmm1" : : "m" (dq[0]));
78  asm volatile("pxor %0,%%xmm9" : : "m" (dq[16]));
79  asm volatile("pxor %0,%%xmm0" : : "m" (dp[0]));
80  asm volatile("pxor %0,%%xmm8" : : "m" (dp[16]));
81 
82  /* xmm0/8 = px */
83 
84  asm volatile("movdqa %xmm6,%xmm4");
85  asm volatile("movdqa %0,%%xmm5" : : "m" (qmul[16]));
86  asm volatile("movdqa %xmm6,%xmm12");
87  asm volatile("movdqa %xmm5,%xmm13");
88  asm volatile("movdqa %xmm1,%xmm3");
89  asm volatile("movdqa %xmm9,%xmm11");
90  asm volatile("movdqa %xmm0,%xmm2"); /* xmm2/10 = px */
91  asm volatile("movdqa %xmm8,%xmm10");
92  asm volatile("psraw $4,%xmm1");
93  asm volatile("psraw $4,%xmm9");
94  asm volatile("pand %xmm7,%xmm3");
95  asm volatile("pand %xmm7,%xmm11");
96  asm volatile("pand %xmm7,%xmm1");
97  asm volatile("pand %xmm7,%xmm9");
98  asm volatile("pshufb %xmm3,%xmm4");
99  asm volatile("pshufb %xmm11,%xmm12");
100  asm volatile("pshufb %xmm1,%xmm5");
101  asm volatile("pshufb %xmm9,%xmm13");
102  asm volatile("pxor %xmm4,%xmm5");
103  asm volatile("pxor %xmm12,%xmm13");
104 
105  /* xmm5/13 = qx */
106 
107  asm volatile("movdqa %xmm14,%xmm4");
108  asm volatile("movdqa %xmm15,%xmm1");
109  asm volatile("movdqa %xmm14,%xmm12");
110  asm volatile("movdqa %xmm15,%xmm9");
111  asm volatile("movdqa %xmm2,%xmm3");
112  asm volatile("movdqa %xmm10,%xmm11");
113  asm volatile("psraw $4,%xmm2");
114  asm volatile("psraw $4,%xmm10");
115  asm volatile("pand %xmm7,%xmm3");
116  asm volatile("pand %xmm7,%xmm11");
117  asm volatile("pand %xmm7,%xmm2");
118  asm volatile("pand %xmm7,%xmm10");
119  asm volatile("pshufb %xmm3,%xmm4");
120  asm volatile("pshufb %xmm11,%xmm12");
121  asm volatile("pshufb %xmm2,%xmm1");
122  asm volatile("pshufb %xmm10,%xmm9");
123  asm volatile("pxor %xmm4,%xmm1");
124  asm volatile("pxor %xmm12,%xmm9");
125 
126  /* xmm1/9 = pbmul[px] */
127  asm volatile("pxor %xmm5,%xmm1");
128  asm volatile("pxor %xmm13,%xmm9");
129  /* xmm1/9 = db = DQ */
130  asm volatile("movdqa %%xmm1,%0" : "=m" (dq[0]));
131  asm volatile("movdqa %%xmm9,%0" : "=m" (dq[16]));
132 
133  asm volatile("pxor %xmm1,%xmm0");
134  asm volatile("pxor %xmm9,%xmm8");
135  asm volatile("movdqa %%xmm0,%0" : "=m" (dp[0]));
136  asm volatile("movdqa %%xmm8,%0" : "=m" (dp[16]));
137 
138  bytes -= 32;
139  p += 32;
140  q += 32;
141  dp += 32;
142  dq += 32;
143 #else
144  asm volatile("movdqa %0,%%xmm1" : : "m" (*q));
145  asm volatile("movdqa %0,%%xmm0" : : "m" (*p));
146  asm volatile("pxor %0,%%xmm1" : : "m" (*dq));
147  asm volatile("pxor %0,%%xmm0" : : "m" (*dp));
148 
149  /* 1 = dq ^ q
150  * 0 = dp ^ p
151  */
152  asm volatile("movdqa %0,%%xmm4" : : "m" (qmul[0]));
153  asm volatile("movdqa %0,%%xmm5" : : "m" (qmul[16]));
154 
155  asm volatile("movdqa %xmm1,%xmm3");
156  asm volatile("psraw $4,%xmm1");
157  asm volatile("pand %xmm7,%xmm3");
158  asm volatile("pand %xmm7,%xmm1");
159  asm volatile("pshufb %xmm3,%xmm4");
160  asm volatile("pshufb %xmm1,%xmm5");
161  asm volatile("pxor %xmm4,%xmm5");
162 
163  asm volatile("movdqa %xmm0,%xmm2"); /* xmm2 = px */
164 
165  /* xmm5 = qx */
166 
167  asm volatile("movdqa %0,%%xmm4" : : "m" (pbmul[0]));
168  asm volatile("movdqa %0,%%xmm1" : : "m" (pbmul[16]));
169  asm volatile("movdqa %xmm2,%xmm3");
170  asm volatile("psraw $4,%xmm2");
171  asm volatile("pand %xmm7,%xmm3");
172  asm volatile("pand %xmm7,%xmm2");
173  asm volatile("pshufb %xmm3,%xmm4");
174  asm volatile("pshufb %xmm2,%xmm1");
175  asm volatile("pxor %xmm4,%xmm1");
176 
177  /* xmm1 = pbmul[px] */
178  asm volatile("pxor %xmm5,%xmm1");
179  /* xmm1 = db = DQ */
180  asm volatile("movdqa %%xmm1,%0" : "=m" (*dq));
181 
182  asm volatile("pxor %xmm1,%xmm0");
183  asm volatile("movdqa %%xmm0,%0" : "=m" (*dp));
184 
185  bytes -= 16;
186  p += 16;
187  q += 16;
188  dp += 16;
189  dq += 16;
190 #endif
191  }
192 
193  kernel_fpu_end();
194 }
195 
196 
197 static void raid6_datap_recov_ssse3(int disks, size_t bytes, int faila,
198  void **ptrs)
199 {
200  u8 *p, *q, *dq;
201  const u8 *qmul; /* Q multiplier table */
202  static const u8 __aligned(16) x0f[16] = {
203  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
204  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f};
205 
206  p = (u8 *)ptrs[disks-2];
207  q = (u8 *)ptrs[disks-1];
208 
209  /* Compute syndrome with zero for the missing data page
210  Use the dead data page as temporary storage for delta q */
211  dq = (u8 *)ptrs[faila];
212  ptrs[faila] = (void *)raid6_empty_zero_page;
213  ptrs[disks-1] = dq;
214 
215  raid6_call.gen_syndrome(disks, bytes, ptrs);
216 
217  /* Restore pointer table */
218  ptrs[faila] = dq;
219  ptrs[disks-1] = q;
220 
221  /* Now, pick the proper data tables */
222  qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
223 
224  kernel_fpu_begin();
225 
226  asm volatile("movdqa %0, %%xmm7" : : "m" (x0f[0]));
227 
228  while (bytes) {
229 #ifdef CONFIG_X86_64
230  asm volatile("movdqa %0, %%xmm3" : : "m" (dq[0]));
231  asm volatile("movdqa %0, %%xmm4" : : "m" (dq[16]));
232  asm volatile("pxor %0, %%xmm3" : : "m" (q[0]));
233  asm volatile("movdqa %0, %%xmm0" : : "m" (qmul[0]));
234 
235  /* xmm3 = q[0] ^ dq[0] */
236 
237  asm volatile("pxor %0, %%xmm4" : : "m" (q[16]));
238  asm volatile("movdqa %0, %%xmm1" : : "m" (qmul[16]));
239 
240  /* xmm4 = q[16] ^ dq[16] */
241 
242  asm volatile("movdqa %xmm3, %xmm6");
243  asm volatile("movdqa %xmm4, %xmm8");
244 
245  /* xmm4 = xmm8 = q[16] ^ dq[16] */
246 
247  asm volatile("psraw $4, %xmm3");
248  asm volatile("pand %xmm7, %xmm6");
249  asm volatile("pand %xmm7, %xmm3");
250  asm volatile("pshufb %xmm6, %xmm0");
251  asm volatile("pshufb %xmm3, %xmm1");
252  asm volatile("movdqa %0, %%xmm10" : : "m" (qmul[0]));
253  asm volatile("pxor %xmm0, %xmm1");
254  asm volatile("movdqa %0, %%xmm11" : : "m" (qmul[16]));
255 
256  /* xmm1 = qmul[q[0] ^ dq[0]] */
257 
258  asm volatile("psraw $4, %xmm4");
259  asm volatile("pand %xmm7, %xmm8");
260  asm volatile("pand %xmm7, %xmm4");
261  asm volatile("pshufb %xmm8, %xmm10");
262  asm volatile("pshufb %xmm4, %xmm11");
263  asm volatile("movdqa %0, %%xmm2" : : "m" (p[0]));
264  asm volatile("pxor %xmm10, %xmm11");
265  asm volatile("movdqa %0, %%xmm12" : : "m" (p[16]));
266 
267  /* xmm11 = qmul[q[16] ^ dq[16]] */
268 
269  asm volatile("pxor %xmm1, %xmm2");
270 
271  /* xmm2 = p[0] ^ qmul[q[0] ^ dq[0]] */
272 
273  asm volatile("pxor %xmm11, %xmm12");
274 
275  /* xmm12 = p[16] ^ qmul[q[16] ^ dq[16]] */
276 
277  asm volatile("movdqa %%xmm1, %0" : "=m" (dq[0]));
278  asm volatile("movdqa %%xmm11, %0" : "=m" (dq[16]));
279 
280  asm volatile("movdqa %%xmm2, %0" : "=m" (p[0]));
281  asm volatile("movdqa %%xmm12, %0" : "=m" (p[16]));
282 
283  bytes -= 32;
284  p += 32;
285  q += 32;
286  dq += 32;
287 
288 #else
289  asm volatile("movdqa %0, %%xmm3" : : "m" (dq[0]));
290  asm volatile("movdqa %0, %%xmm0" : : "m" (qmul[0]));
291  asm volatile("pxor %0, %%xmm3" : : "m" (q[0]));
292  asm volatile("movdqa %0, %%xmm1" : : "m" (qmul[16]));
293 
294  /* xmm3 = *q ^ *dq */
295 
296  asm volatile("movdqa %xmm3, %xmm6");
297  asm volatile("movdqa %0, %%xmm2" : : "m" (p[0]));
298  asm volatile("psraw $4, %xmm3");
299  asm volatile("pand %xmm7, %xmm6");
300  asm volatile("pand %xmm7, %xmm3");
301  asm volatile("pshufb %xmm6, %xmm0");
302  asm volatile("pshufb %xmm3, %xmm1");
303  asm volatile("pxor %xmm0, %xmm1");
304 
305  /* xmm1 = qmul[*q ^ *dq */
306 
307  asm volatile("pxor %xmm1, %xmm2");
308 
309  /* xmm2 = *p ^ qmul[*q ^ *dq] */
310 
311  asm volatile("movdqa %%xmm1, %0" : "=m" (dq[0]));
312  asm volatile("movdqa %%xmm2, %0" : "=m" (p[0]));
313 
314  bytes -= 16;
315  p += 16;
316  q += 16;
317  dq += 16;
318 #endif
319  }
320 
321  kernel_fpu_end();
322 }
323 
324 const struct raid6_recov_calls raid6_recov_ssse3 = {
325  .data2 = raid6_2data_recov_ssse3,
326  .datap = raid6_datap_recov_ssse3,
327  .valid = raid6_has_ssse3,
328 #ifdef CONFIG_X86_64
329  .name = "ssse3x2",
330 #else
331  .name = "ssse3x1",
332 #endif
333  .priority = 1,
334 };
335 
336 #endif