Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
xor_avx.h
Go to the documentation of this file.
1 #ifndef _ASM_X86_XOR_AVX_H
2 #define _ASM_X86_XOR_AVX_H
3 
4 /*
5  * Optimized RAID-5 checksumming functions for AVX
6  *
7  * Copyright (C) 2012 Intel Corporation
8  * Author: Jim Kukunas <[email protected]>
9  *
10  * Based on Ingo Molnar and Zach Brown's respective MMX and SSE routines
11  *
12  * This program is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU General Public License
14  * as published by the Free Software Foundation; version 2
15  * of the License.
16  */
17 
18 #ifdef CONFIG_AS_AVX
19 
20 #include <linux/compiler.h>
21 #include <asm/i387.h>
22 
23 #define BLOCK4(i) \
24  BLOCK(32 * i, 0) \
25  BLOCK(32 * (i + 1), 1) \
26  BLOCK(32 * (i + 2), 2) \
27  BLOCK(32 * (i + 3), 3)
28 
29 #define BLOCK16() \
30  BLOCK4(0) \
31  BLOCK4(4) \
32  BLOCK4(8) \
33  BLOCK4(12)
34 
35 static void xor_avx_2(unsigned long bytes, unsigned long *p0, unsigned long *p1)
36 {
37  unsigned long lines = bytes >> 9;
38 
39  kernel_fpu_begin();
40 
41  while (lines--) {
42 #undef BLOCK
43 #define BLOCK(i, reg) \
44 do { \
45  asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p1[i / sizeof(*p1)])); \
46  asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
47  "m" (p0[i / sizeof(*p0)])); \
48  asm volatile("vmovdqa %%ymm" #reg ", %0" : \
49  "=m" (p0[i / sizeof(*p0)])); \
50 } while (0);
51 
52  BLOCK16()
53 
54  p0 = (unsigned long *)((uintptr_t)p0 + 512);
55  p1 = (unsigned long *)((uintptr_t)p1 + 512);
56  }
57 
59 }
60 
61 static void xor_avx_3(unsigned long bytes, unsigned long *p0, unsigned long *p1,
62  unsigned long *p2)
63 {
64  unsigned long lines = bytes >> 9;
65 
66  kernel_fpu_begin();
67 
68  while (lines--) {
69 #undef BLOCK
70 #define BLOCK(i, reg) \
71 do { \
72  asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p2[i / sizeof(*p2)])); \
73  asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
74  "m" (p1[i / sizeof(*p1)])); \
75  asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
76  "m" (p0[i / sizeof(*p0)])); \
77  asm volatile("vmovdqa %%ymm" #reg ", %0" : \
78  "=m" (p0[i / sizeof(*p0)])); \
79 } while (0);
80 
81  BLOCK16()
82 
83  p0 = (unsigned long *)((uintptr_t)p0 + 512);
84  p1 = (unsigned long *)((uintptr_t)p1 + 512);
85  p2 = (unsigned long *)((uintptr_t)p2 + 512);
86  }
87 
89 }
90 
91 static void xor_avx_4(unsigned long bytes, unsigned long *p0, unsigned long *p1,
92  unsigned long *p2, unsigned long *p3)
93 {
94  unsigned long lines = bytes >> 9;
95 
96  kernel_fpu_begin();
97 
98  while (lines--) {
99 #undef BLOCK
100 #define BLOCK(i, reg) \
101 do { \
102  asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p3[i / sizeof(*p3)])); \
103  asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
104  "m" (p2[i / sizeof(*p2)])); \
105  asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
106  "m" (p1[i / sizeof(*p1)])); \
107  asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
108  "m" (p0[i / sizeof(*p0)])); \
109  asm volatile("vmovdqa %%ymm" #reg ", %0" : \
110  "=m" (p0[i / sizeof(*p0)])); \
111 } while (0);
112 
113  BLOCK16();
114 
115  p0 = (unsigned long *)((uintptr_t)p0 + 512);
116  p1 = (unsigned long *)((uintptr_t)p1 + 512);
117  p2 = (unsigned long *)((uintptr_t)p2 + 512);
118  p3 = (unsigned long *)((uintptr_t)p3 + 512);
119  }
120 
121  kernel_fpu_end();
122 }
123 
124 static void xor_avx_5(unsigned long bytes, unsigned long *p0, unsigned long *p1,
125  unsigned long *p2, unsigned long *p3, unsigned long *p4)
126 {
127  unsigned long lines = bytes >> 9;
128 
129  kernel_fpu_begin();
130 
131  while (lines--) {
132 #undef BLOCK
133 #define BLOCK(i, reg) \
134 do { \
135  asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p4[i / sizeof(*p4)])); \
136  asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
137  "m" (p3[i / sizeof(*p3)])); \
138  asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
139  "m" (p2[i / sizeof(*p2)])); \
140  asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
141  "m" (p1[i / sizeof(*p1)])); \
142  asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
143  "m" (p0[i / sizeof(*p0)])); \
144  asm volatile("vmovdqa %%ymm" #reg ", %0" : \
145  "=m" (p0[i / sizeof(*p0)])); \
146 } while (0);
147 
148  BLOCK16()
149 
150  p0 = (unsigned long *)((uintptr_t)p0 + 512);
151  p1 = (unsigned long *)((uintptr_t)p1 + 512);
152  p2 = (unsigned long *)((uintptr_t)p2 + 512);
153  p3 = (unsigned long *)((uintptr_t)p3 + 512);
154  p4 = (unsigned long *)((uintptr_t)p4 + 512);
155  }
156 
157  kernel_fpu_end();
158 }
159 
160 static struct xor_block_template xor_block_avx = {
161  .name = "avx",
162  .do_2 = xor_avx_2,
163  .do_3 = xor_avx_3,
164  .do_4 = xor_avx_4,
165  .do_5 = xor_avx_5,
166 };
167 
168 #define AVX_XOR_SPEED \
169 do { \
170  if (cpu_has_avx) \
171  xor_speed(&xor_block_avx); \
172 } while (0)
173 
174 #define AVX_SELECT(FASTEST) \
175  (cpu_has_avx ? &xor_block_avx : FASTEST)
176 
177 #else
178 
179 #define AVX_XOR_SPEED {}
180 
181 #define AVX_SELECT(FASTEST) (FASTEST)
182 
183 #endif
184 #endif