Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
arbiter.c
Go to the documentation of this file.
1 /*
2  * Memory arbiter functions. Allocates bandwidth through the
3  * arbiter and sets up arbiter breakpoints.
4  *
5  * The algorithm first assigns slots to the clients that has specified
6  * bandwidth (e.g. ethernet) and then the remaining slots are divided
7  * on all the active clients.
8  *
9  * Copyright (c) 2004-2007 Axis Communications AB.
10  *
11  * The artpec-3 has two arbiters. The memory hierarchy looks like this:
12  *
13  *
14  * CPU DMAs
15  * | |
16  * | |
17  * -------------- ------------------
18  * | foo arbiter|----| Internal memory|
19  * -------------- ------------------
20  * |
21  * --------------
22  * | L2 cache |
23  * --------------
24  * |
25  * h264 etc |
26  * | |
27  * | |
28  * --------------
29  * | bar arbiter|
30  * --------------
31  * |
32  * ---------
33  * | SDRAM |
34  * ---------
35  *
36  */
37 
38 #include <hwregs/reg_map.h>
39 #include <hwregs/reg_rdwr.h>
40 #include <hwregs/marb_foo_defs.h>
41 #include <hwregs/marb_bar_defs.h>
42 #include <arbiter.h>
43 #include <hwregs/intr_vect.h>
44 #include <linux/interrupt.h>
45 #include <linux/irq.h>
46 #include <linux/signal.h>
47 #include <linux/errno.h>
48 #include <linux/spinlock.h>
49 #include <asm/io.h>
50 #include <asm/irq_regs.h>
51 
52 #define D(x)
53 
55  unsigned long instance;
57  unsigned long start;
58  unsigned long end;
59  int used;
60 };
61 
62 #define NUMBER_OF_BP 4
63 #define SDRAM_BANDWIDTH 400000000
64 #define INTMEM_BANDWIDTH 400000000
65 #define NBR_OF_SLOTS 64
66 #define NBR_OF_REGIONS 2
67 #define NBR_OF_CLIENTS 15
68 #define ARBITERS 2
69 #define UNASSIGNED 100
70 
71 struct arbiter {
72  unsigned long instance;
77 };
78 
79 static struct crisv32_watch_entry watches[ARBITERS][NUMBER_OF_BP] =
80 {
81  {
86  },
87  {
92  }
93 };
94 
96 {
97  { /* L2 cache arbiter */
98  .instance = regi_marb_foo,
99  .nbr_regions = 2,
100  .nbr_clients = 15
101  },
102  { /* DDR2 arbiter */
103  .instance = regi_marb_bar,
104  .nbr_regions = 1,
105  .nbr_clients = 9
106  }
107 };
108 
109 static int max_bandwidth[NBR_OF_REGIONS] = {SDRAM_BANDWIDTH, INTMEM_BANDWIDTH};
110 
111 DEFINE_SPINLOCK(arbiter_lock);
112 
113 static irqreturn_t
114 crisv32_foo_arbiter_irq(int irq, void *dev_id);
115 static irqreturn_t
116 crisv32_bar_arbiter_irq(int irq, void *dev_id);
117 
118 /*
119  * "I'm the arbiter, I know the score.
120  * From square one I'll be watching all 64."
121  * (memory arbiter slots, that is)
122  *
123  * Or in other words:
124  * Program the memory arbiter slots for "region" according to what's
125  * in requested_slots[] and active_clients[], while minimizing
126  * latency. A caller may pass a non-zero positive amount for
127  * "unused_slots", which must then be the unallocated, remaining
128  * number of slots, free to hand out to any client.
129  */
130 
131 static void crisv32_arbiter_config(int arbiter, int region, int unused_slots)
132 {
133  int slot;
134  int client;
135  int interval = 0;
136 
137  /*
138  * This vector corresponds to the hardware arbiter slots (see
139  * the hardware documentation for semantics). We initialize
140  * each slot with a suitable sentinel value outside the valid
141  * range {0 .. NBR_OF_CLIENTS - 1} and replace them with
142  * client indexes. Then it's fed to the hardware.
143  */
145 
146  for (slot = 0; slot < NBR_OF_SLOTS; slot++)
147  val[slot] = -1;
148 
149  for (client = 0; client < arbiters[arbiter].nbr_clients; client++) {
150  int pos;
151  /* Allocate the requested non-zero number of slots, but
152  * also give clients with zero-requests one slot each
153  * while stocks last. We do the latter here, in client
154  * order. This makes sure zero-request clients are the
155  * first to get to any spare slots, else those slots
156  * could, when bandwidth is allocated close to the limit,
157  * all be allocated to low-index non-zero-request clients
158  * in the default-fill loop below. Another positive but
159  * secondary effect is a somewhat better spread of the
160  * zero-bandwidth clients in the vector, avoiding some of
161  * the latency that could otherwise be caused by the
162  * partitioning of non-zero-bandwidth clients at low
163  * indexes and zero-bandwidth clients at high
164  * indexes. (Note that this spreading can only affect the
165  * unallocated bandwidth.) All the above only matters for
166  * memory-intensive situations, of course.
167  */
168  if (!arbiters[arbiter].requested_slots[region][client]) {
169  /*
170  * Skip inactive clients. Also skip zero-slot
171  * allocations in this pass when there are no known
172  * free slots.
173  */
174  if (!arbiters[arbiter].active_clients[region][client] ||
175  unused_slots <= 0)
176  continue;
177 
178  unused_slots--;
179 
180  /* Only allocate one slot for this client. */
181  interval = NBR_OF_SLOTS;
182  } else
183  interval = NBR_OF_SLOTS /
184  arbiters[arbiter].requested_slots[region][client];
185 
186  pos = 0;
187  while (pos < NBR_OF_SLOTS) {
188  if (val[pos] >= 0)
189  pos++;
190  else {
191  val[pos] = client;
192  pos += interval;
193  }
194  }
195  }
196 
197  client = 0;
198  for (slot = 0; slot < NBR_OF_SLOTS; slot++) {
199  /*
200  * Allocate remaining slots in round-robin
201  * client-number order for active clients. For this
202  * pass, we ignore requested bandwidth and previous
203  * allocations.
204  */
205  if (val[slot] < 0) {
206  int first = client;
207  while (!arbiters[arbiter].active_clients[region][client]) {
208  client = (client + 1) %
209  arbiters[arbiter].nbr_clients;
210  if (client == first)
211  break;
212  }
213  val[slot] = client;
214  client = (client + 1) % arbiters[arbiter].nbr_clients;
215  }
216  if (arbiter == 0) {
217  if (region == EXT_REGION)
218  REG_WR_INT_VECT(marb_foo, regi_marb_foo,
219  rw_l2_slots, slot, val[slot]);
220  else if (region == INT_REGION)
221  REG_WR_INT_VECT(marb_foo, regi_marb_foo,
222  rw_intm_slots, slot, val[slot]);
223  } else {
224  REG_WR_INT_VECT(marb_bar, regi_marb_bar,
225  rw_ddr2_slots, slot, val[slot]);
226  }
227  }
228 }
229 
230 extern char _stext, _etext;
231 
232 static void crisv32_arbiter_init(void)
233 {
234  static int initialized;
235 
236  if (initialized)
237  return;
238 
239  initialized = 1;
240 
241  /*
242  * CPU caches are always set to active, but with zero
243  * bandwidth allocated. It should be ok to allocate zero
244  * bandwidth for the caches, because DMA for other channels
245  * will supposedly finish, once their programmed amount is
246  * done, and then the caches will get access according to the
247  * "fixed scheme" for unclaimed slots. Though, if for some
248  * use-case somewhere, there's a maximum CPU latency for
249  * e.g. some interrupt, we have to start allocating specific
250  * bandwidth for the CPU caches too.
251  */
252  arbiters[0].active_clients[EXT_REGION][11] = 1;
253  arbiters[0].active_clients[EXT_REGION][12] = 1;
254  crisv32_arbiter_config(0, EXT_REGION, 0);
255  crisv32_arbiter_config(0, INT_REGION, 0);
256  crisv32_arbiter_config(1, EXT_REGION, 0);
257 
258  if (request_irq(MEMARB_FOO_INTR_VECT, crisv32_foo_arbiter_irq,
259  IRQF_DISABLED, "arbiter", NULL))
260  printk(KERN_ERR "Couldn't allocate arbiter IRQ\n");
261 
262  if (request_irq(MEMARB_BAR_INTR_VECT, crisv32_bar_arbiter_irq,
263  IRQF_DISABLED, "arbiter", NULL))
264  printk(KERN_ERR "Couldn't allocate arbiter IRQ\n");
265 
266 #ifndef CONFIG_ETRAX_KGDB
267  /* Global watch for writes to kernel text segment. */
268  crisv32_arbiter_watch(virt_to_phys(&_stext), &_etext - &_stext,
271 #endif
272 
273  /* Set up max burst sizes by default */
274  REG_WR_INT(marb_bar, regi_marb_bar, rw_h264_rd_burst, 3);
275  REG_WR_INT(marb_bar, regi_marb_bar, rw_h264_wr_burst, 3);
276  REG_WR_INT(marb_bar, regi_marb_bar, rw_ccd_burst, 3);
277  REG_WR_INT(marb_bar, regi_marb_bar, rw_vin_wr_burst, 3);
278  REG_WR_INT(marb_bar, regi_marb_bar, rw_vin_rd_burst, 3);
279  REG_WR_INT(marb_bar, regi_marb_bar, rw_sclr_rd_burst, 3);
280  REG_WR_INT(marb_bar, regi_marb_bar, rw_vout_burst, 3);
281  REG_WR_INT(marb_bar, regi_marb_bar, rw_sclr_fifo_burst, 3);
282  REG_WR_INT(marb_bar, regi_marb_bar, rw_l2cache_burst, 3);
283 }
284 
285 int crisv32_arbiter_allocate_bandwidth(int client, int region,
286  unsigned long bandwidth)
287 {
288  int i;
289  int total_assigned = 0;
290  int total_clients = 0;
291  int req;
292  int arbiter = 0;
293 
294  crisv32_arbiter_init();
295 
296  if (client & 0xffff0000) {
297  arbiter = 1;
298  client >>= 16;
299  }
300 
301  for (i = 0; i < arbiters[arbiter].nbr_clients; i++) {
302  total_assigned += arbiters[arbiter].requested_slots[region][i];
303  total_clients += arbiters[arbiter].active_clients[region][i];
304  }
305 
306  /* Avoid division by 0 for 0-bandwidth requests. */
307  req = bandwidth == 0
308  ? 0 : NBR_OF_SLOTS / (max_bandwidth[region] / bandwidth);
309 
310  /*
311  * We make sure that there are enough slots only for non-zero
312  * requests. Requesting 0 bandwidth *may* allocate slots,
313  * though if all bandwidth is allocated, such a client won't
314  * get any and will have to rely on getting memory access
315  * according to the fixed scheme that's the default when one
316  * of the slot-allocated clients doesn't claim their slot.
317  */
318  if (total_assigned + req > NBR_OF_SLOTS)
319  return -ENOMEM;
320 
321  arbiters[arbiter].active_clients[region][client] = 1;
322  arbiters[arbiter].requested_slots[region][client] = req;
323  crisv32_arbiter_config(arbiter, region, NBR_OF_SLOTS - total_assigned);
324 
325  /* Propagate allocation from foo to bar */
326  if (arbiter == 0)
328  EXT_REGION, bandwidth);
329  return 0;
330 }
331 
332 /*
333  * Main entry for bandwidth deallocation.
334  *
335  * Strictly speaking, for a somewhat constant set of clients where
336  * each client gets a constant bandwidth and is just enabled or
337  * disabled (somewhat dynamically), no action is necessary here to
338  * avoid starvation for non-zero-allocation clients, as the allocated
339  * slots will just be unused. However, handing out those unused slots
340  * to active clients avoids needless latency if the "fixed scheme"
341  * would give unclaimed slots to an eager low-index client.
342  */
343 
344 void crisv32_arbiter_deallocate_bandwidth(int client, int region)
345 {
346  int i;
347  int total_assigned = 0;
348  int arbiter = 0;
349 
350  if (client & 0xffff0000)
351  arbiter = 1;
352 
353  arbiters[arbiter].requested_slots[region][client] = 0;
354  arbiters[arbiter].active_clients[region][client] = 0;
355 
356  for (i = 0; i < arbiters[arbiter].nbr_clients; i++)
357  total_assigned += arbiters[arbiter].requested_slots[region][i];
358 
359  crisv32_arbiter_config(arbiter, region, NBR_OF_SLOTS - total_assigned);
360 }
361 
362 int crisv32_arbiter_watch(unsigned long start, unsigned long size,
363  unsigned long clients, unsigned long accesses,
365 {
366  int i;
367  int arbiter;
368  int used[2];
369  int ret = 0;
370 
371  crisv32_arbiter_init();
372 
373  if (start > 0x80000000) {
374  printk(KERN_ERR "Arbiter: %lX doesn't look like a "
375  "physical address", start);
376  return -EFAULT;
377  }
378 
379  spin_lock(&arbiter_lock);
380 
381  if (clients & 0xffff)
382  used[0] = 1;
383  if (clients & 0xffff0000)
384  used[1] = 1;
385 
386  for (arbiter = 0; arbiter < ARBITERS; arbiter++) {
387  if (!used[arbiter])
388  continue;
389 
390  for (i = 0; i < NUMBER_OF_BP; i++) {
391  if (!watches[arbiter][i].used) {
392  unsigned intr_mask;
393  if (arbiter)
394  intr_mask = REG_RD_INT(marb_bar,
395  regi_marb_bar, rw_intr_mask);
396  else
397  intr_mask = REG_RD_INT(marb_foo,
398  regi_marb_foo, rw_intr_mask);
399 
400  watches[arbiter][i].used = 1;
401  watches[arbiter][i].start = start;
402  watches[arbiter][i].end = start + size;
403  watches[arbiter][i].cb = cb;
404 
405  ret |= (i + 1) << (arbiter + 8);
406  if (arbiter) {
407  REG_WR_INT(marb_bar_bp,
408  watches[arbiter][i].instance,
409  rw_first_addr,
410  watches[arbiter][i].start);
411  REG_WR_INT(marb_bar_bp,
412  watches[arbiter][i].instance,
413  rw_last_addr,
414  watches[arbiter][i].end);
415  REG_WR_INT(marb_bar_bp,
416  watches[arbiter][i].instance,
417  rw_op, accesses);
418  REG_WR_INT(marb_bar_bp,
419  watches[arbiter][i].instance,
420  rw_clients,
421  clients & 0xffff);
422  } else {
423  REG_WR_INT(marb_foo_bp,
424  watches[arbiter][i].instance,
425  rw_first_addr,
426  watches[arbiter][i].start);
427  REG_WR_INT(marb_foo_bp,
428  watches[arbiter][i].instance,
429  rw_last_addr,
430  watches[arbiter][i].end);
431  REG_WR_INT(marb_foo_bp,
432  watches[arbiter][i].instance,
433  rw_op, accesses);
434  REG_WR_INT(marb_foo_bp,
435  watches[arbiter][i].instance,
436  rw_clients, clients >> 16);
437  }
438 
439  if (i == 0)
440  intr_mask |= 1;
441  else if (i == 1)
442  intr_mask |= 2;
443  else if (i == 2)
444  intr_mask |= 4;
445  else if (i == 3)
446  intr_mask |= 8;
447 
448  if (arbiter)
449  REG_WR_INT(marb_bar, regi_marb_bar,
450  rw_intr_mask, intr_mask);
451  else
452  REG_WR_INT(marb_foo, regi_marb_foo,
453  rw_intr_mask, intr_mask);
454 
455  spin_unlock(&arbiter_lock);
456 
457  break;
458  }
459  }
460  }
461  spin_unlock(&arbiter_lock);
462  if (ret)
463  return ret;
464  else
465  return -ENOMEM;
466 }
467 
469 {
470  int arbiter;
471  int intr_mask;
472 
473  crisv32_arbiter_init();
474 
475  spin_lock(&arbiter_lock);
476 
477  for (arbiter = 0; arbiter < ARBITERS; arbiter++) {
478  int id2;
479 
480  if (arbiter)
481  intr_mask = REG_RD_INT(marb_bar, regi_marb_bar,
482  rw_intr_mask);
483  else
484  intr_mask = REG_RD_INT(marb_foo, regi_marb_foo,
485  rw_intr_mask);
486 
487  id2 = (id & (0xff << (arbiter + 8))) >> (arbiter + 8);
488  if (id2 == 0)
489  continue;
490  id2--;
491  if ((id2 >= NUMBER_OF_BP) || (!watches[arbiter][id2].used)) {
492  spin_unlock(&arbiter_lock);
493  return -EINVAL;
494  }
495 
496  memset(&watches[arbiter][id2], 0,
497  sizeof(struct crisv32_watch_entry));
498 
499  if (id2 == 0)
500  intr_mask &= ~1;
501  else if (id2 == 1)
502  intr_mask &= ~2;
503  else if (id2 == 2)
504  intr_mask &= ~4;
505  else if (id2 == 3)
506  intr_mask &= ~8;
507 
508  if (arbiter)
509  REG_WR_INT(marb_bar, regi_marb_bar, rw_intr_mask,
510  intr_mask);
511  else
512  REG_WR_INT(marb_foo, regi_marb_foo, rw_intr_mask,
513  intr_mask);
514  }
515 
516  spin_unlock(&arbiter_lock);
517  return 0;
518 }
519 
520 extern void show_registers(struct pt_regs *regs);
521 
522 
523 static irqreturn_t
524 crisv32_foo_arbiter_irq(int irq, void *dev_id)
525 {
526  reg_marb_foo_r_masked_intr masked_intr =
527  REG_RD(marb_foo, regi_marb_foo, r_masked_intr);
534  reg_marb_foo_rw_ack_intr ack_intr = {
535  .bp0 = 1, .bp1 = 1, .bp2 = 1, .bp3 = 1
536  };
537  struct crisv32_watch_entry *watch;
538  unsigned arbiter = (unsigned)dev_id;
539 
540  masked_intr = REG_RD(marb_foo, regi_marb_foo, r_masked_intr);
541 
542  if (masked_intr.bp0)
543  watch = &watches[arbiter][0];
544  else if (masked_intr.bp1)
545  watch = &watches[arbiter][1];
546  else if (masked_intr.bp2)
547  watch = &watches[arbiter][2];
548  else if (masked_intr.bp3)
549  watch = &watches[arbiter][3];
550  else
551  return IRQ_NONE;
552 
553  /* Retrieve all useful information and print it. */
554  r_clients = REG_RD(marb_foo_bp, watch->instance, r_brk_clients);
555  r_addr = REG_RD(marb_foo_bp, watch->instance, r_brk_addr);
556  r_op = REG_RD(marb_foo_bp, watch->instance, r_brk_op);
557  r_first = REG_RD(marb_foo_bp, watch->instance, r_brk_first_client);
558  r_size = REG_RD(marb_foo_bp, watch->instance, r_brk_size);
559 
560  printk(KERN_DEBUG "Arbiter IRQ\n");
561  printk(KERN_DEBUG "Clients %X addr %X op %X first %X size %X\n",
567 
568  REG_WR(marb_foo_bp, watch->instance, rw_ack, ack);
569  REG_WR(marb_foo, regi_marb_foo, rw_ack_intr, ack_intr);
570 
571  printk(KERN_DEBUG "IRQ occurred at %X\n", (unsigned)get_irq_regs());
572 
573  if (watch->cb)
574  watch->cb();
575 
576  return IRQ_HANDLED;
577 }
578 
579 static irqreturn_t
580 crisv32_bar_arbiter_irq(int irq, void *dev_id)
581 {
582  reg_marb_bar_r_masked_intr masked_intr =
583  REG_RD(marb_bar, regi_marb_bar, r_masked_intr);
589  reg_marb_bar_bp_rw_ack ack = {0};
590  reg_marb_bar_rw_ack_intr ack_intr = {
591  .bp0 = 1, .bp1 = 1, .bp2 = 1, .bp3 = 1
592  };
593  struct crisv32_watch_entry *watch;
594  unsigned arbiter = (unsigned)dev_id;
595 
596  masked_intr = REG_RD(marb_bar, regi_marb_bar, r_masked_intr);
597 
598  if (masked_intr.bp0)
599  watch = &watches[arbiter][0];
600  else if (masked_intr.bp1)
601  watch = &watches[arbiter][1];
602  else if (masked_intr.bp2)
603  watch = &watches[arbiter][2];
604  else if (masked_intr.bp3)
605  watch = &watches[arbiter][3];
606  else
607  return IRQ_NONE;
608 
609  /* Retrieve all useful information and print it. */
610  r_clients = REG_RD(marb_bar_bp, watch->instance, r_brk_clients);
611  r_addr = REG_RD(marb_bar_bp, watch->instance, r_brk_addr);
612  r_op = REG_RD(marb_bar_bp, watch->instance, r_brk_op);
613  r_first = REG_RD(marb_bar_bp, watch->instance, r_brk_first_client);
614  r_size = REG_RD(marb_bar_bp, watch->instance, r_brk_size);
615 
616  printk(KERN_DEBUG "Arbiter IRQ\n");
617  printk(KERN_DEBUG "Clients %X addr %X op %X first %X size %X\n",
623 
624  REG_WR(marb_bar_bp, watch->instance, rw_ack, ack);
625  REG_WR(marb_bar, regi_marb_bar, rw_ack_intr, ack_intr);
626 
627  printk(KERN_DEBUG "IRQ occurred at %X\n", (unsigned)get_irq_regs()->erp);
628 
629  if (watch->cb)
630  watch->cb();
631 
632  return IRQ_HANDLED;
633 }
634