Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
intel_cacheinfo.c
Go to the documentation of this file.
1 /*
2  * Routines to indentify caches on Intel CPU.
3  *
4  * Changes:
5  * Venkatesh Pallipadi : Adding cache identification through cpuid(4)
6  * Ashok Raj <[email protected]>: Work with CPU hotplug infrastructure.
7  * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD.
8  */
9 
10 #include <linux/init.h>
11 #include <linux/slab.h>
12 #include <linux/device.h>
13 #include <linux/compiler.h>
14 #include <linux/cpu.h>
15 #include <linux/sched.h>
16 #include <linux/pci.h>
17 
18 #include <asm/processor.h>
19 #include <linux/smp.h>
20 #include <asm/amd_nb.h>
21 #include <asm/smp.h>
22 
23 #define LVL_1_INST 1
24 #define LVL_1_DATA 2
25 #define LVL_2 3
26 #define LVL_3 4
27 #define LVL_TRACE 5
28 
29 struct _cache_table {
30  unsigned char descriptor;
31  char cache_type;
32  short size;
33 };
34 
35 #define MB(x) ((x) * 1024)
36 
37 /* All the cache descriptor types we care about (no TLB or
38  trace cache entries) */
39 
40 static const struct _cache_table __cpuinitconst cache_table[] =
41 {
42  { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */
43  { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */
44  { 0x09, LVL_1_INST, 32 }, /* 4-way set assoc, 64 byte line size */
45  { 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */
46  { 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */
47  { 0x0d, LVL_1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */
48  { 0x0e, LVL_1_DATA, 24 }, /* 6-way set assoc, 64 byte line size */
49  { 0x21, LVL_2, 256 }, /* 8-way set assoc, 64 byte line size */
50  { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
51  { 0x23, LVL_3, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */
52  { 0x25, LVL_3, MB(2) }, /* 8-way set assoc, sectored cache, 64 byte line size */
53  { 0x29, LVL_3, MB(4) }, /* 8-way set assoc, sectored cache, 64 byte line size */
54  { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */
55  { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */
56  { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */
57  { 0x3a, LVL_2, 192 }, /* 6-way set assoc, sectored cache, 64 byte line size */
58  { 0x3b, LVL_2, 128 }, /* 2-way set assoc, sectored cache, 64 byte line size */
59  { 0x3c, LVL_2, 256 }, /* 4-way set assoc, sectored cache, 64 byte line size */
60  { 0x3d, LVL_2, 384 }, /* 6-way set assoc, sectored cache, 64 byte line size */
61  { 0x3e, LVL_2, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
62  { 0x3f, LVL_2, 256 }, /* 2-way set assoc, 64 byte line size */
63  { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */
64  { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */
65  { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */
66  { 0x44, LVL_2, MB(1) }, /* 4-way set assoc, 32 byte line size */
67  { 0x45, LVL_2, MB(2) }, /* 4-way set assoc, 32 byte line size */
68  { 0x46, LVL_3, MB(4) }, /* 4-way set assoc, 64 byte line size */
69  { 0x47, LVL_3, MB(8) }, /* 8-way set assoc, 64 byte line size */
70  { 0x48, LVL_2, MB(3) }, /* 12-way set assoc, 64 byte line size */
71  { 0x49, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */
72  { 0x4a, LVL_3, MB(6) }, /* 12-way set assoc, 64 byte line size */
73  { 0x4b, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */
74  { 0x4c, LVL_3, MB(12) }, /* 12-way set assoc, 64 byte line size */
75  { 0x4d, LVL_3, MB(16) }, /* 16-way set assoc, 64 byte line size */
76  { 0x4e, LVL_2, MB(6) }, /* 24-way set assoc, 64 byte line size */
77  { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */
78  { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */
79  { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */
80  { 0x68, LVL_1_DATA, 32 }, /* 4-way set assoc, sectored cache, 64 byte line size */
81  { 0x70, LVL_TRACE, 12 }, /* 8-way set assoc */
82  { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */
83  { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */
84  { 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */
85  { 0x78, LVL_2, MB(1) }, /* 4-way set assoc, 64 byte line size */
86  { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */
87  { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */
88  { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */
89  { 0x7c, LVL_2, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */
90  { 0x7d, LVL_2, MB(2) }, /* 8-way set assoc, 64 byte line size */
91  { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */
92  { 0x80, LVL_2, 512 }, /* 8-way set assoc, 64 byte line size */
93  { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */
94  { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */
95  { 0x84, LVL_2, MB(1) }, /* 8-way set assoc, 32 byte line size */
96  { 0x85, LVL_2, MB(2) }, /* 8-way set assoc, 32 byte line size */
97  { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */
98  { 0x87, LVL_2, MB(1) }, /* 8-way set assoc, 64 byte line size */
99  { 0xd0, LVL_3, 512 }, /* 4-way set assoc, 64 byte line size */
100  { 0xd1, LVL_3, MB(1) }, /* 4-way set assoc, 64 byte line size */
101  { 0xd2, LVL_3, MB(2) }, /* 4-way set assoc, 64 byte line size */
102  { 0xd6, LVL_3, MB(1) }, /* 8-way set assoc, 64 byte line size */
103  { 0xd7, LVL_3, MB(2) }, /* 8-way set assoc, 64 byte line size */
104  { 0xd8, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */
105  { 0xdc, LVL_3, MB(2) }, /* 12-way set assoc, 64 byte line size */
106  { 0xdd, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */
107  { 0xde, LVL_3, MB(8) }, /* 12-way set assoc, 64 byte line size */
108  { 0xe2, LVL_3, MB(2) }, /* 16-way set assoc, 64 byte line size */
109  { 0xe3, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */
110  { 0xe4, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */
111  { 0xea, LVL_3, MB(12) }, /* 24-way set assoc, 64 byte line size */
112  { 0xeb, LVL_3, MB(18) }, /* 24-way set assoc, 64 byte line size */
113  { 0xec, LVL_3, MB(24) }, /* 24-way set assoc, 64 byte line size */
114  { 0x00, 0, 0}
115 };
116 
117 
123 };
124 
126  struct {
128  unsigned int level:3;
129  unsigned int is_self_initializing:1;
130  unsigned int is_fully_associative:1;
131  unsigned int reserved:4;
132  unsigned int num_threads_sharing:12;
133  unsigned int num_cores_on_die:6;
134  } split;
136 };
137 
139  struct {
140  unsigned int coherency_line_size:12;
141  unsigned int physical_line_partition:10;
142  unsigned int ways_of_associativity:10;
143  } split;
145 };
146 
148  struct {
149  unsigned int number_of_sets:32;
150  } split;
152 };
153 
158  unsigned long size;
160 };
161 
162 struct _cpuid4_info {
164  DECLARE_BITMAP(shared_cpu_map, NR_CPUS);
165 };
166 
167 unsigned short num_cache_leaves;
168 
169 /* AMD doesn't have CPUID4. Emulate it here to report the same
170  information to the user. This makes some assumptions about the machine:
171  L2 not shared, no SMT etc. that is currently true on AMD CPUs.
172 
173  In theory the TLBs could be reported as fake type (they are in "dummy").
174  Maybe later */
175 union l1_cache {
176  struct {
177  unsigned line_size:8;
178  unsigned lines_per_tag:8;
179  unsigned assoc:8;
180  unsigned size_in_kb:8;
181  };
182  unsigned val;
183 };
184 
185 union l2_cache {
186  struct {
187  unsigned line_size:8;
188  unsigned lines_per_tag:4;
189  unsigned assoc:4;
190  unsigned size_in_kb:16;
191  };
192  unsigned val;
193 };
194 
195 union l3_cache {
196  struct {
197  unsigned line_size:8;
198  unsigned lines_per_tag:4;
199  unsigned assoc:4;
200  unsigned res:2;
201  unsigned size_encoded:14;
202  };
203  unsigned val;
204 };
205 
206 static const unsigned short __cpuinitconst assocs[] = {
207  [1] = 1,
208  [2] = 2,
209  [4] = 4,
210  [6] = 8,
211  [8] = 16,
212  [0xa] = 32,
213  [0xb] = 48,
214  [0xc] = 64,
215  [0xd] = 96,
216  [0xe] = 128,
217  [0xf] = 0xffff /* fully associative - no way to show this currently */
218 };
219 
220 static const unsigned char __cpuinitconst levels[] = { 1, 1, 2, 3 };
221 static const unsigned char __cpuinitconst types[] = { 1, 2, 3, 3 };
222 
223 static void __cpuinit
224 amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
225  union _cpuid4_leaf_ebx *ebx,
226  union _cpuid4_leaf_ecx *ecx)
227 {
228  unsigned dummy;
229  unsigned line_size, lines_per_tag, assoc, size_in_kb;
230  union l1_cache l1i, l1d;
231  union l2_cache l2;
232  union l3_cache l3;
233  union l1_cache *l1 = &l1d;
234 
235  eax->full = 0;
236  ebx->full = 0;
237  ecx->full = 0;
238 
239  cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
240  cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
241 
242  switch (leaf) {
243  case 1:
244  l1 = &l1i;
245  case 0:
246  if (!l1->val)
247  return;
248  assoc = assocs[l1->assoc];
249  line_size = l1->line_size;
250  lines_per_tag = l1->lines_per_tag;
251  size_in_kb = l1->size_in_kb;
252  break;
253  case 2:
254  if (!l2.val)
255  return;
256  assoc = assocs[l2.assoc];
257  line_size = l2.line_size;
258  lines_per_tag = l2.lines_per_tag;
259  /* cpu_data has errata corrections for K7 applied */
260  size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
261  break;
262  case 3:
263  if (!l3.val)
264  return;
265  assoc = assocs[l3.assoc];
266  line_size = l3.line_size;
267  lines_per_tag = l3.lines_per_tag;
268  size_in_kb = l3.size_encoded * 512;
269  if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
270  size_in_kb = size_in_kb >> 1;
271  assoc = assoc >> 1;
272  }
273  break;
274  default:
275  return;
276  }
277 
278  eax->split.is_self_initializing = 1;
279  eax->split.type = types[leaf];
280  eax->split.level = levels[leaf];
281  eax->split.num_threads_sharing = 0;
282  eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
283 
284 
285  if (assoc == 0xffff)
286  eax->split.is_fully_associative = 1;
287  ebx->split.coherency_line_size = line_size - 1;
288  ebx->split.ways_of_associativity = assoc - 1;
289  ebx->split.physical_line_partition = lines_per_tag - 1;
290  ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
291  (ebx->split.ways_of_associativity + 1) - 1;
292 }
293 
294 struct _cache_attr {
295  struct attribute attr;
296  ssize_t (*show)(struct _cpuid4_info *, char *, unsigned int);
297  ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count,
298  unsigned int);
299 };
300 
301 #ifdef CONFIG_AMD_NB
302 
303 /*
304  * L3 cache descriptors
305  */
306 static void __cpuinit amd_calc_l3_indices(struct amd_northbridge *nb)
307 {
308  struct amd_l3_cache *l3 = &nb->l3_cache;
309  unsigned int sc0, sc1, sc2, sc3;
310  u32 val = 0;
311 
312  pci_read_config_dword(nb->misc, 0x1C4, &val);
313 
314  /* calculate subcache sizes */
315  l3->subcaches[0] = sc0 = !(val & BIT(0));
316  l3->subcaches[1] = sc1 = !(val & BIT(4));
317 
318  if (boot_cpu_data.x86 == 0x15) {
319  l3->subcaches[0] = sc0 += !(val & BIT(1));
320  l3->subcaches[1] = sc1 += !(val & BIT(5));
321  }
322 
323  l3->subcaches[2] = sc2 = !(val & BIT(8)) + !(val & BIT(9));
324  l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
325 
326  l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
327 }
328 
329 static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
330 {
331  int node;
332 
333  /* only for L3, and not in virtualized environments */
334  if (index < 3)
335  return;
336 
338  this_leaf->nb = node_to_amd_nb(node);
339  if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
340  amd_calc_l3_indices(this_leaf->nb);
341 }
342 
343 /*
344  * check whether a slot used for disabling an L3 index is occupied.
345  * @l3: L3 cache descriptor
346  * @slot: slot number (0..1)
347  *
348  * @returns: the disabled index if used or negative value if slot free.
349  */
350 int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
351 {
352  unsigned int reg = 0;
353 
354  pci_read_config_dword(nb->misc, 0x1BC + slot * 4, &reg);
355 
356  /* check whether this slot is activated already */
357  if (reg & (3UL << 30))
358  return reg & 0xfff;
359 
360  return -1;
361 }
362 
363 static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
364  unsigned int slot)
365 {
366  int index;
367 
368  if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
369  return -EINVAL;
370 
371  index = amd_get_l3_disable_slot(this_leaf->base.nb, slot);
372  if (index >= 0)
373  return sprintf(buf, "%d\n", index);
374 
375  return sprintf(buf, "FREE\n");
376 }
377 
378 #define SHOW_CACHE_DISABLE(slot) \
379 static ssize_t \
380 show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf, \
381  unsigned int cpu) \
382 { \
383  return show_cache_disable(this_leaf, buf, slot); \
384 }
385 SHOW_CACHE_DISABLE(0)
386 SHOW_CACHE_DISABLE(1)
387 
388 static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
389  unsigned slot, unsigned long idx)
390 {
391  int i;
392 
393  idx |= BIT(30);
394 
395  /*
396  * disable index in all 4 subcaches
397  */
398  for (i = 0; i < 4; i++) {
399  u32 reg = idx | (i << 20);
400 
401  if (!nb->l3_cache.subcaches[i])
402  continue;
403 
404  pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
405 
406  /*
407  * We need to WBINVD on a core on the node containing the L3
408  * cache which indices we disable therefore a simple wbinvd()
409  * is not sufficient.
410  */
411  wbinvd_on_cpu(cpu);
412 
413  reg |= BIT(31);
414  pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
415  }
416 }
417 
418 /*
419  * disable a L3 cache index by using a disable-slot
420  *
421  * @l3: L3 cache descriptor
422  * @cpu: A CPU on the node containing the L3 cache
423  * @slot: slot number (0..1)
424  * @index: index to disable
425  *
426  * @return: 0 on success, error status on failure
427  */
428 int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu, unsigned slot,
429  unsigned long index)
430 {
431  int ret = 0;
432 
433  /* check if @slot is already used or the index is already disabled */
434  ret = amd_get_l3_disable_slot(nb, slot);
435  if (ret >= 0)
436  return -EEXIST;
437 
438  if (index > nb->l3_cache.indices)
439  return -EINVAL;
440 
441  /* check whether the other slot has disabled the same index already */
442  if (index == amd_get_l3_disable_slot(nb, !slot))
443  return -EEXIST;
444 
445  amd_l3_disable_index(nb, cpu, slot, index);
446 
447  return 0;
448 }
449 
450 static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
451  const char *buf, size_t count,
452  unsigned int slot)
453 {
454  unsigned long val = 0;
455  int cpu, err = 0;
456 
457  if (!capable(CAP_SYS_ADMIN))
458  return -EPERM;
459 
460  if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
461  return -EINVAL;
462 
463  cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
464 
465  if (strict_strtoul(buf, 10, &val) < 0)
466  return -EINVAL;
467 
468  err = amd_set_l3_disable_slot(this_leaf->base.nb, cpu, slot, val);
469  if (err) {
470  if (err == -EEXIST)
471  pr_warning("L3 slot %d in use/index already disabled!\n",
472  slot);
473  return err;
474  }
475  return count;
476 }
477 
478 #define STORE_CACHE_DISABLE(slot) \
479 static ssize_t \
480 store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \
481  const char *buf, size_t count, \
482  unsigned int cpu) \
483 { \
484  return store_cache_disable(this_leaf, buf, count, slot); \
485 }
486 STORE_CACHE_DISABLE(0)
487 STORE_CACHE_DISABLE(1)
488 
489 static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
490  show_cache_disable_0, store_cache_disable_0);
491 static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
492  show_cache_disable_1, store_cache_disable_1);
493 
494 static ssize_t
495 show_subcaches(struct _cpuid4_info *this_leaf, char *buf, unsigned int cpu)
496 {
497  if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
498  return -EINVAL;
499 
500  return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
501 }
502 
503 static ssize_t
504 store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count,
505  unsigned int cpu)
506 {
507  unsigned long val;
508 
509  if (!capable(CAP_SYS_ADMIN))
510  return -EPERM;
511 
512  if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
513  return -EINVAL;
514 
515  if (strict_strtoul(buf, 16, &val) < 0)
516  return -EINVAL;
517 
518  if (amd_set_subcaches(cpu, val))
519  return -EINVAL;
520 
521  return count;
522 }
523 
524 static struct _cache_attr subcaches =
525  __ATTR(subcaches, 0644, show_subcaches, store_subcaches);
526 
527 #else /* CONFIG_AMD_NB */
528 #define amd_init_l3_cache(x, y)
529 #endif /* CONFIG_AMD_NB */
530 
531 static int
532 __cpuinit cpuid4_cache_lookup_regs(int index,
533  struct _cpuid4_info_regs *this_leaf)
534 {
535  union _cpuid4_leaf_eax eax;
536  union _cpuid4_leaf_ebx ebx;
537  union _cpuid4_leaf_ecx ecx;
538  unsigned edx;
539 
540  if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
541  amd_cpuid4(index, &eax, &ebx, &ecx);
542  amd_init_l3_cache(this_leaf, index);
543  } else {
544  cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
545  }
546 
547  if (eax.split.type == CACHE_TYPE_NULL)
548  return -EIO; /* better error ? */
549 
550  this_leaf->eax = eax;
551  this_leaf->ebx = ebx;
552  this_leaf->ecx = ecx;
553  this_leaf->size = (ecx.split.number_of_sets + 1) *
554  (ebx.split.coherency_line_size + 1) *
555  (ebx.split.physical_line_partition + 1) *
556  (ebx.split.ways_of_associativity + 1);
557  return 0;
558 }
559 
560 static int __cpuinit find_num_cache_leaves(void)
561 {
562  unsigned int eax, ebx, ecx, edx;
563  union _cpuid4_leaf_eax cache_eax;
564  int i = -1;
565 
566  do {
567  ++i;
568  /* Do cpuid(4) loop to find out num_cache_leaves */
569  cpuid_count(4, i, &eax, &ebx, &ecx, &edx);
570  cache_eax.full = eax;
571  } while (cache_eax.split.type != CACHE_TYPE_NULL);
572  return i;
573 }
574 
576 {
577  /* Cache sizes */
578  unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
579  unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
580  unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
581  unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
582 #ifdef CONFIG_X86_HT
583  unsigned int cpu = c->cpu_index;
584 #endif
585 
586  if (c->cpuid_level > 3) {
587  static int is_initialized;
588 
589  if (is_initialized == 0) {
590  /* Init num_cache_leaves from boot CPU */
591  num_cache_leaves = find_num_cache_leaves();
592  is_initialized++;
593  }
594 
595  /*
596  * Whenever possible use cpuid(4), deterministic cache
597  * parameters cpuid leaf to find the cache details
598  */
599  for (i = 0; i < num_cache_leaves; i++) {
600  struct _cpuid4_info_regs this_leaf;
601  int retval;
602 
603  retval = cpuid4_cache_lookup_regs(i, &this_leaf);
604  if (retval >= 0) {
605  switch (this_leaf.eax.split.level) {
606  case 1:
607  if (this_leaf.eax.split.type ==
609  new_l1d = this_leaf.size/1024;
610  else if (this_leaf.eax.split.type ==
612  new_l1i = this_leaf.size/1024;
613  break;
614  case 2:
615  new_l2 = this_leaf.size/1024;
616  num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
617  index_msb = get_count_order(num_threads_sharing);
618  l2_id = c->apicid & ~((1 << index_msb) - 1);
619  break;
620  case 3:
621  new_l3 = this_leaf.size/1024;
622  num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
623  index_msb = get_count_order(
624  num_threads_sharing);
625  l3_id = c->apicid & ~((1 << index_msb) - 1);
626  break;
627  default:
628  break;
629  }
630  }
631  }
632  }
633  /*
634  * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
635  * trace cache
636  */
637  if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
638  /* supports eax=2 call */
639  int j, n;
640  unsigned int regs[4];
641  unsigned char *dp = (unsigned char *)regs;
642  int only_trace = 0;
643 
644  if (num_cache_leaves != 0 && c->x86 == 15)
645  only_trace = 1;
646 
647  /* Number of times to iterate */
648  n = cpuid_eax(2) & 0xFF;
649 
650  for (i = 0 ; i < n ; i++) {
651  cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
652 
653  /* If bit 31 is set, this is an unknown format */
654  for (j = 0 ; j < 3 ; j++)
655  if (regs[j] & (1 << 31))
656  regs[j] = 0;
657 
658  /* Byte 0 is level count, not a descriptor */
659  for (j = 1 ; j < 16 ; j++) {
660  unsigned char des = dp[j];
661  unsigned char k = 0;
662 
663  /* look up this descriptor in the table */
664  while (cache_table[k].descriptor != 0) {
665  if (cache_table[k].descriptor == des) {
666  if (only_trace && cache_table[k].cache_type != LVL_TRACE)
667  break;
668  switch (cache_table[k].cache_type) {
669  case LVL_1_INST:
670  l1i += cache_table[k].size;
671  break;
672  case LVL_1_DATA:
673  l1d += cache_table[k].size;
674  break;
675  case LVL_2:
676  l2 += cache_table[k].size;
677  break;
678  case LVL_3:
679  l3 += cache_table[k].size;
680  break;
681  case LVL_TRACE:
682  trace += cache_table[k].size;
683  break;
684  }
685 
686  break;
687  }
688 
689  k++;
690  }
691  }
692  }
693  }
694 
695  if (new_l1d)
696  l1d = new_l1d;
697 
698  if (new_l1i)
699  l1i = new_l1i;
700 
701  if (new_l2) {
702  l2 = new_l2;
703 #ifdef CONFIG_X86_HT
704  per_cpu(cpu_llc_id, cpu) = l2_id;
705 #endif
706  }
707 
708  if (new_l3) {
709  l3 = new_l3;
710 #ifdef CONFIG_X86_HT
711  per_cpu(cpu_llc_id, cpu) = l3_id;
712 #endif
713  }
714 
715  c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
716 
717  return l2;
718 }
719 
720 #ifdef CONFIG_SYSFS
721 
722 /* pointer to _cpuid4_info array (for each cache leaf) */
723 static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info);
724 #define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y]))
725 
726 #ifdef CONFIG_SMP
727 
728 static int __cpuinit cache_shared_amd_cpu_map_setup(unsigned int cpu, int index)
729 {
730  struct _cpuid4_info *this_leaf;
731  int ret, i, sibling;
732  struct cpuinfo_x86 *c = &cpu_data(cpu);
733 
734  ret = 0;
735  if (index == 3) {
736  ret = 1;
737  for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
738  if (!per_cpu(ici_cpuid4_info, i))
739  continue;
740  this_leaf = CPUID4_INFO_IDX(i, index);
741  for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
742  if (!cpu_online(sibling))
743  continue;
744  set_bit(sibling, this_leaf->shared_cpu_map);
745  }
746  }
747  } else if ((c->x86 == 0x15) && ((index == 1) || (index == 2))) {
748  ret = 1;
749  for_each_cpu(i, cpu_sibling_mask(cpu)) {
750  if (!per_cpu(ici_cpuid4_info, i))
751  continue;
752  this_leaf = CPUID4_INFO_IDX(i, index);
753  for_each_cpu(sibling, cpu_sibling_mask(cpu)) {
754  if (!cpu_online(sibling))
755  continue;
756  set_bit(sibling, this_leaf->shared_cpu_map);
757  }
758  }
759  }
760 
761  return ret;
762 }
763 
764 static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
765 {
766  struct _cpuid4_info *this_leaf, *sibling_leaf;
767  unsigned long num_threads_sharing;
768  int index_msb, i;
769  struct cpuinfo_x86 *c = &cpu_data(cpu);
770 
771  if (c->x86_vendor == X86_VENDOR_AMD) {
772  if (cache_shared_amd_cpu_map_setup(cpu, index))
773  return;
774  }
775 
776  this_leaf = CPUID4_INFO_IDX(cpu, index);
777  num_threads_sharing = 1 + this_leaf->base.eax.split.num_threads_sharing;
778 
779  if (num_threads_sharing == 1)
780  cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map));
781  else {
782  index_msb = get_count_order(num_threads_sharing);
783 
785  if (cpu_data(i).apicid >> index_msb ==
786  c->apicid >> index_msb) {
787  cpumask_set_cpu(i,
788  to_cpumask(this_leaf->shared_cpu_map));
789  if (i != cpu && per_cpu(ici_cpuid4_info, i)) {
790  sibling_leaf =
791  CPUID4_INFO_IDX(i, index);
792  cpumask_set_cpu(cpu, to_cpumask(
793  sibling_leaf->shared_cpu_map));
794  }
795  }
796  }
797  }
798 }
799 static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
800 {
801  struct _cpuid4_info *this_leaf, *sibling_leaf;
802  int sibling;
803 
804  this_leaf = CPUID4_INFO_IDX(cpu, index);
805  for_each_cpu(sibling, to_cpumask(this_leaf->shared_cpu_map)) {
806  sibling_leaf = CPUID4_INFO_IDX(sibling, index);
807  cpumask_clear_cpu(cpu,
808  to_cpumask(sibling_leaf->shared_cpu_map));
809  }
810 }
811 #else
812 static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
813 {
814 }
815 
816 static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
817 {
818 }
819 #endif
820 
821 static void __cpuinit free_cache_attributes(unsigned int cpu)
822 {
823  int i;
824 
825  for (i = 0; i < num_cache_leaves; i++)
826  cache_remove_shared_cpu_map(cpu, i);
827 
828  kfree(per_cpu(ici_cpuid4_info, cpu));
829  per_cpu(ici_cpuid4_info, cpu) = NULL;
830 }
831 
832 static void __cpuinit get_cpu_leaves(void *_retval)
833 {
834  int j, *retval = _retval, cpu = smp_processor_id();
835 
836  /* Do cpuid and store the results */
837  for (j = 0; j < num_cache_leaves; j++) {
838  struct _cpuid4_info *this_leaf = CPUID4_INFO_IDX(cpu, j);
839 
840  *retval = cpuid4_cache_lookup_regs(j, &this_leaf->base);
841  if (unlikely(*retval < 0)) {
842  int i;
843 
844  for (i = 0; i < j; i++)
845  cache_remove_shared_cpu_map(cpu, i);
846  break;
847  }
848  cache_shared_cpu_map_setup(cpu, j);
849  }
850 }
851 
852 static int __cpuinit detect_cache_attributes(unsigned int cpu)
853 {
854  int retval;
855 
856  if (num_cache_leaves == 0)
857  return -ENOENT;
858 
859  per_cpu(ici_cpuid4_info, cpu) = kzalloc(
860  sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
861  if (per_cpu(ici_cpuid4_info, cpu) == NULL)
862  return -ENOMEM;
863 
864  smp_call_function_single(cpu, get_cpu_leaves, &retval, true);
865  if (retval) {
866  kfree(per_cpu(ici_cpuid4_info, cpu));
867  per_cpu(ici_cpuid4_info, cpu) = NULL;
868  }
869 
870  return retval;
871 }
872 
873 #include <linux/kobject.h>
874 #include <linux/sysfs.h>
875 #include <linux/cpu.h>
876 
877 /* pointer to kobject for cpuX/cache */
878 static DEFINE_PER_CPU(struct kobject *, ici_cache_kobject);
879 
880 struct _index_kobject {
881  struct kobject kobj;
882  unsigned int cpu;
883  unsigned short index;
884 };
885 
886 /* pointer to array of kobjects for cpuX/cache/indexY */
887 static DEFINE_PER_CPU(struct _index_kobject *, ici_index_kobject);
888 #define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(ici_index_kobject, x))[y]))
889 
890 #define show_one_plus(file_name, object, val) \
891 static ssize_t show_##file_name(struct _cpuid4_info *this_leaf, char *buf, \
892  unsigned int cpu) \
893 { \
894  return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \
895 }
896 
897 show_one_plus(level, base.eax.split.level, 0);
898 show_one_plus(coherency_line_size, base.ebx.split.coherency_line_size, 1);
899 show_one_plus(physical_line_partition, base.ebx.split.physical_line_partition, 1);
900 show_one_plus(ways_of_associativity, base.ebx.split.ways_of_associativity, 1);
901 show_one_plus(number_of_sets, base.ecx.split.number_of_sets, 1);
902 
903 static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf,
904  unsigned int cpu)
905 {
906  return sprintf(buf, "%luK\n", this_leaf->base.size / 1024);
907 }
908 
909 static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
910  int type, char *buf)
911 {
912  ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf;
913  int n = 0;
914 
915  if (len > 1) {
916  const struct cpumask *mask;
917 
918  mask = to_cpumask(this_leaf->shared_cpu_map);
919  n = type ?
920  cpulist_scnprintf(buf, len-2, mask) :
921  cpumask_scnprintf(buf, len-2, mask);
922  buf[n++] = '\n';
923  buf[n] = '\0';
924  }
925  return n;
926 }
927 
928 static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf,
929  unsigned int cpu)
930 {
931  return show_shared_cpu_map_func(leaf, 0, buf);
932 }
933 
934 static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf,
935  unsigned int cpu)
936 {
937  return show_shared_cpu_map_func(leaf, 1, buf);
938 }
939 
940 static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf,
941  unsigned int cpu)
942 {
943  switch (this_leaf->base.eax.split.type) {
944  case CACHE_TYPE_DATA:
945  return sprintf(buf, "Data\n");
946  case CACHE_TYPE_INST:
947  return sprintf(buf, "Instruction\n");
948  case CACHE_TYPE_UNIFIED:
949  return sprintf(buf, "Unified\n");
950  default:
951  return sprintf(buf, "Unknown\n");
952  }
953 }
954 
955 #define to_object(k) container_of(k, struct _index_kobject, kobj)
956 #define to_attr(a) container_of(a, struct _cache_attr, attr)
957 
958 #define define_one_ro(_name) \
959 static struct _cache_attr _name = \
960  __ATTR(_name, 0444, show_##_name, NULL)
961 
963 define_one_ro(type);
964 define_one_ro(coherency_line_size);
965 define_one_ro(physical_line_partition);
966 define_one_ro(ways_of_associativity);
967 define_one_ro(number_of_sets);
969 define_one_ro(shared_cpu_map);
970 define_one_ro(shared_cpu_list);
971 
972 static struct attribute *default_attrs[] = {
973  &type.attr,
974  &level.attr,
975  &coherency_line_size.attr,
976  &physical_line_partition.attr,
977  &ways_of_associativity.attr,
978  &number_of_sets.attr,
979  &size.attr,
980  &shared_cpu_map.attr,
981  &shared_cpu_list.attr,
982  NULL
983 };
984 
985 #ifdef CONFIG_AMD_NB
986 static struct attribute ** __cpuinit amd_l3_attrs(void)
987 {
988  static struct attribute **attrs;
989  int n;
990 
991  if (attrs)
992  return attrs;
993 
994  n = ARRAY_SIZE(default_attrs);
995 
997  n += 2;
998 
1000  n += 1;
1001 
1002  attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL);
1003  if (attrs == NULL)
1004  return attrs = default_attrs;
1005 
1006  for (n = 0; default_attrs[n]; n++)
1007  attrs[n] = default_attrs[n];
1008 
1010  attrs[n++] = &cache_disable_0.attr;
1011  attrs[n++] = &cache_disable_1.attr;
1012  }
1013 
1015  attrs[n++] = &subcaches.attr;
1016 
1017  return attrs;
1018 }
1019 #endif
1020 
1021 static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
1022 {
1023  struct _cache_attr *fattr = to_attr(attr);
1024  struct _index_kobject *this_leaf = to_object(kobj);
1025  ssize_t ret;
1026 
1027  ret = fattr->show ?
1028  fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
1029  buf, this_leaf->cpu) :
1030  0;
1031  return ret;
1032 }
1033 
1034 static ssize_t store(struct kobject *kobj, struct attribute *attr,
1035  const char *buf, size_t count)
1036 {
1037  struct _cache_attr *fattr = to_attr(attr);
1038  struct _index_kobject *this_leaf = to_object(kobj);
1039  ssize_t ret;
1040 
1041  ret = fattr->store ?
1042  fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
1043  buf, count, this_leaf->cpu) :
1044  0;
1045  return ret;
1046 }
1047 
1048 static const struct sysfs_ops sysfs_ops = {
1049  .show = show,
1050  .store = store,
1051 };
1052 
1053 static struct kobj_type ktype_cache = {
1054  .sysfs_ops = &sysfs_ops,
1055  .default_attrs = default_attrs,
1056 };
1057 
1058 static struct kobj_type ktype_percpu_entry = {
1059  .sysfs_ops = &sysfs_ops,
1060 };
1061 
1062 static void __cpuinit cpuid4_cache_sysfs_exit(unsigned int cpu)
1063 {
1064  kfree(per_cpu(ici_cache_kobject, cpu));
1065  kfree(per_cpu(ici_index_kobject, cpu));
1066  per_cpu(ici_cache_kobject, cpu) = NULL;
1067  per_cpu(ici_index_kobject, cpu) = NULL;
1068  free_cache_attributes(cpu);
1069 }
1070 
1071 static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu)
1072 {
1073  int err;
1074 
1075  if (num_cache_leaves == 0)
1076  return -ENOENT;
1077 
1078  err = detect_cache_attributes(cpu);
1079  if (err)
1080  return err;
1081 
1082  /* Allocate all required memory */
1083  per_cpu(ici_cache_kobject, cpu) =
1084  kzalloc(sizeof(struct kobject), GFP_KERNEL);
1085  if (unlikely(per_cpu(ici_cache_kobject, cpu) == NULL))
1086  goto err_out;
1087 
1088  per_cpu(ici_index_kobject, cpu) = kzalloc(
1089  sizeof(struct _index_kobject) * num_cache_leaves, GFP_KERNEL);
1090  if (unlikely(per_cpu(ici_index_kobject, cpu) == NULL))
1091  goto err_out;
1092 
1093  return 0;
1094 
1095 err_out:
1096  cpuid4_cache_sysfs_exit(cpu);
1097  return -ENOMEM;
1098 }
1099 
1100 static DECLARE_BITMAP(cache_dev_map, NR_CPUS);
1101 
1102 /* Add/Remove cache interface for CPU device */
1103 static int __cpuinit cache_add_dev(struct device *dev)
1104 {
1105  unsigned int cpu = dev->id;
1106  unsigned long i, j;
1107  struct _index_kobject *this_object;
1108  struct _cpuid4_info *this_leaf;
1109  int retval;
1110 
1111  retval = cpuid4_cache_sysfs_init(cpu);
1112  if (unlikely(retval < 0))
1113  return retval;
1114 
1115  retval = kobject_init_and_add(per_cpu(ici_cache_kobject, cpu),
1116  &ktype_percpu_entry,
1117  &dev->kobj, "%s", "cache");
1118  if (retval < 0) {
1119  cpuid4_cache_sysfs_exit(cpu);
1120  return retval;
1121  }
1122 
1123  for (i = 0; i < num_cache_leaves; i++) {
1124  this_object = INDEX_KOBJECT_PTR(cpu, i);
1125  this_object->cpu = cpu;
1126  this_object->index = i;
1127 
1128  this_leaf = CPUID4_INFO_IDX(cpu, i);
1129 
1130  ktype_cache.default_attrs = default_attrs;
1131 #ifdef CONFIG_AMD_NB
1132  if (this_leaf->base.nb)
1133  ktype_cache.default_attrs = amd_l3_attrs();
1134 #endif
1135  retval = kobject_init_and_add(&(this_object->kobj),
1136  &ktype_cache,
1137  per_cpu(ici_cache_kobject, cpu),
1138  "index%1lu", i);
1139  if (unlikely(retval)) {
1140  for (j = 0; j < i; j++)
1141  kobject_put(&(INDEX_KOBJECT_PTR(cpu, j)->kobj));
1142  kobject_put(per_cpu(ici_cache_kobject, cpu));
1143  cpuid4_cache_sysfs_exit(cpu);
1144  return retval;
1145  }
1146  kobject_uevent(&(this_object->kobj), KOBJ_ADD);
1147  }
1148  cpumask_set_cpu(cpu, to_cpumask(cache_dev_map));
1149 
1150  kobject_uevent(per_cpu(ici_cache_kobject, cpu), KOBJ_ADD);
1151  return 0;
1152 }
1153 
1154 static void __cpuinit cache_remove_dev(struct device *dev)
1155 {
1156  unsigned int cpu = dev->id;
1157  unsigned long i;
1158 
1159  if (per_cpu(ici_cpuid4_info, cpu) == NULL)
1160  return;
1161  if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map)))
1162  return;
1163  cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map));
1164 
1165  for (i = 0; i < num_cache_leaves; i++)
1166  kobject_put(&(INDEX_KOBJECT_PTR(cpu, i)->kobj));
1167  kobject_put(per_cpu(ici_cache_kobject, cpu));
1168  cpuid4_cache_sysfs_exit(cpu);
1169 }
1170 
1171 static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb,
1172  unsigned long action, void *hcpu)
1173 {
1174  unsigned int cpu = (unsigned long)hcpu;
1175  struct device *dev;
1176 
1177  dev = get_cpu_device(cpu);
1178  switch (action) {
1179  case CPU_ONLINE:
1180  case CPU_ONLINE_FROZEN:
1181  cache_add_dev(dev);
1182  break;
1183  case CPU_DEAD:
1184  case CPU_DEAD_FROZEN:
1185  cache_remove_dev(dev);
1186  break;
1187  }
1188  return NOTIFY_OK;
1189 }
1190 
1191 static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = {
1192  .notifier_call = cacheinfo_cpu_callback,
1193 };
1194 
1195 static int __cpuinit cache_sysfs_init(void)
1196 {
1197  int i;
1198 
1199  if (num_cache_leaves == 0)
1200  return 0;
1201 
1202  for_each_online_cpu(i) {
1203  int err;
1204  struct device *dev = get_cpu_device(i);
1205 
1206  err = cache_add_dev(dev);
1207  if (err)
1208  return err;
1209  }
1210  register_hotcpu_notifier(&cacheinfo_cpu_notifier);
1211  return 0;
1212 }
1213 
1214 device_initcall(cache_sysfs_init);
1215 
1216 #endif