Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
mpipe.h
Go to the documentation of this file.
1 /*
2  * Copyright 2012 Tilera Corporation. All Rights Reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation, version 2.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11  * NON INFRINGEMENT. See the GNU General Public License for
12  * more details.
13  */
14 
15 #ifndef _GXIO_MPIPE_H_
16 #define _GXIO_MPIPE_H_
17 
18 /*
19  *
20  * An API for allocating, configuring, and manipulating mPIPE hardware
21  * resources.
22  */
23 
24 #include <gxio/common.h>
25 #include <gxio/dma_queue.h>
26 
27 #include <linux/time.h>
28 
29 #include <arch/mpipe_def.h>
30 #include <arch/mpipe_shm.h>
31 
32 #include <hv/drv_mpipe_intf.h>
33 #include <hv/iorpc.h>
34 
35 /*
36  *
37  * The TILE-Gx mPIPE&tm; shim provides Ethernet connectivity, packet
38  * classification, and packet load balancing services. The
39  * gxio_mpipe_ API, declared in <gxio/mpipe.h>, allows applications to
40  * allocate mPIPE IO channels, configure packet distribution
41  * parameters, and send and receive Ethernet packets. The API is
42  * designed to be a minimal wrapper around the mPIPE hardware, making
43  * system calls only where necessary to preserve inter-process
44  * protection guarantees.
45  *
46  * The APIs described below allow the programmer to allocate and
47  * configure mPIPE resources. As described below, the mPIPE is a
48  * single shared hardware device that provides partitionable resources
49  * that are shared between all applications in the system. The
50  * gxio_mpipe_ API allows userspace code to make resource request
51  * calls to the hypervisor, which in turns keeps track of the
52  * resources in use by all applications, maintains protection
53  * guarantees, and resets resources upon application shutdown.
54  *
55  * We strongly recommend reading the mPIPE section of the IO Device
56  * Guide (UG404) before working with this API. Most functions in the
57  * gxio_mpipe_ API are directly analogous to hardware interfaces and
58  * the documentation assumes that the reader understands those
59  * hardware interfaces.
60  *
61  * @section mpipe__ingress mPIPE Ingress Hardware Resources
62  *
63  * The mPIPE ingress hardware provides extensive hardware offload for
64  * tasks like packet header parsing, load balancing, and memory
65  * management. This section provides a brief introduction to the
66  * hardware components and the gxio_mpipe_ calls used to manage them;
67  * see the IO Device Guide for a much more detailed description of the
68  * mPIPE's capabilities.
69  *
70  * When a packet arrives at one of the mPIPE's Ethernet MACs, it is
71  * assigned a channel number indicating which MAC received it. It
72  * then proceeds through the following hardware pipeline:
73  *
74  * @subsection mpipe__classification Classification
75  *
76  * A set of classification processors run header parsing code on each
77  * incoming packet, extracting information including the destination
78  * MAC address, VLAN, Ethernet type, and five-tuple hash. Some of
79  * this information is then used to choose which buffer stack will be
80  * used to hold the packet, and which bucket will be used by the load
81  * balancer to determine which application will receive the packet.
82  *
83  * The rules by which the buffer stack and bucket are chosen can be
84  * configured via the @ref gxio_mpipe_classifier API. A given app can
85  * specify multiple rules, each one specifying a bucket range, and a
86  * set of buffer stacks, to be used for packets matching the rule.
87  * Each rule can optionally specify a restricted set of channels,
88  * VLANs, and/or dMACs, in which it is interested. By default, a
89  * given rule starts out matching all channels associated with the
90  * mPIPE context's set of open links; all VLANs; and all dMACs.
91  * Subsequent restrictions can then be added.
92  *
93  * @subsection mpipe__load_balancing Load Balancing
94  *
95  * The mPIPE load balancer is responsible for choosing the NotifRing
96  * to which the packet will be delivered. This decision is based on
97  * the bucket number indicated by the classification program. In
98  * general, the bucket number is based on some number of low bits of
99  * the packet's flow hash (applications that aren't interested in flow
100  * hashing use a single bucket). Each load balancer bucket keeps a
101  * record of the NotifRing to which packets directed to that bucket
102  * are currently being delivered. Based on the bucket's load
103  * balancing mode (@ref gxio_mpipe_bucket_mode_t), the load balancer
104  * either forwards the packet to the previously assigned NotifRing or
105  * decides to choose a new NotifRing. If a new NotifRing is required,
106  * the load balancer chooses the least loaded ring in the NotifGroup
107  * associated with the bucket.
108  *
109  * The load balancer is a shared resource. Each application needs to
110  * explicitly allocate NotifRings, NotifGroups, and buckets, using
111  * gxio_mpipe_alloc_notif_rings(), gxio_mpipe_alloc_notif_groups(),
112  * and gxio_mpipe_alloc_buckets(). Then the application needs to
113  * configure them using gxio_mpipe_init_notif_ring() and
114  * gxio_mpipe_init_notif_group_and_buckets().
115  *
116  * @subsection mpipe__buffers Buffer Selection and Packet Delivery
117  *
118  * Once the load balancer has chosen the destination NotifRing, the
119  * mPIPE DMA engine pops at least one buffer off of the 'buffer stack'
120  * chosen by the classification program and DMAs the packet data into
121  * that buffer. Each buffer stack provides a hardware-accelerated
122  * stack of data buffers with the same size. If the packet data is
123  * larger than the buffers provided by the chosen buffer stack, the
124  * mPIPE hardware pops off multiple buffers and chains the packet data
125  * through a multi-buffer linked list. Once the packet data is
126  * delivered to the buffer(s), the mPIPE hardware writes the
127  * ::gxio_mpipe_idesc_t metadata object (calculated by the classifier)
128  * into the NotifRing and increments the number of packets delivered
129  * to that ring.
130  *
131  * Applications can push buffers onto a buffer stack by calling
132  * gxio_mpipe_push_buffer() or by egressing a packet with the
133  * ::gxio_mpipe_edesc_t::hwb bit set, indicating that the egressed
134  * buffers should be returned to the stack.
135  *
136  * Applications can allocate and initialize buffer stacks with the
137  * gxio_mpipe_alloc_buffer_stacks() and gxio_mpipe_init_buffer_stack()
138  * APIs.
139  *
140  * The application must also register the memory pages that will hold
141  * packets. This requires calling gxio_mpipe_register_page() for each
142  * memory page that will hold packets allocated by the application for
143  * a given buffer stack. Since each buffer stack is limited to 16
144  * registered pages, it may be necessary to use huge pages, or even
145  * extremely huge pages, to hold all the buffers.
146  *
147  * @subsection mpipe__iqueue NotifRings
148  *
149  * Each NotifRing is a region of shared memory, allocated by the
150  * application, to which the mPIPE delivers packet descriptors
151  * (::gxio_mpipe_idesc_t). The application can allocate them via
152  * gxio_mpipe_alloc_notif_rings(). The application can then either
153  * explicitly initialize them with gxio_mpipe_init_notif_ring() and
154  * then read from them manually, or can make use of the convenience
155  * wrappers provided by @ref gxio_mpipe_wrappers.
156  *
157  * @section mpipe__egress mPIPE Egress Hardware
158  *
159  * Applications use eDMA rings to queue packets for egress. The
160  * application can allocate them via gxio_mpipe_alloc_edma_rings().
161  * The application can then either explicitly initialize them with
162  * gxio_mpipe_init_edma_ring() and then write to them manually, or
163  * can make use of the convenience wrappers provided by
164  * @ref gxio_mpipe_wrappers.
165  *
166  * @section gxio__shortcomings Plans for Future API Revisions
167  *
168  * The API defined here is only an initial version of the mPIPE API.
169  * Future plans include:
170  *
171  * - Higher level wrapper functions to provide common initialization
172  * patterns. This should help users start writing mPIPE programs
173  * without having to learn the details of the hardware.
174  *
175  * - Support for reset and deallocation of resources, including
176  * cleanup upon application shutdown.
177  *
178  * - Support for calling these APIs in the BME.
179  *
180  * - Support for IO interrupts.
181  *
182  * - Clearer definitions of thread safety guarantees.
183  *
184  * @section gxio__mpipe_examples Examples
185  *
186  * See the following mPIPE example programs for more information about
187  * allocating mPIPE resources and using them in real applications:
188  *
189  * - @ref mpipe/ingress/app.c : Receiving packets.
190  *
191  * - @ref mpipe/forward/app.c : Forwarding packets.
192  *
193  * Note that there are several more examples.
194  */
195 
196 /* Flags that can be passed to resource allocation functions. */
198  /* Require an allocation to start at a specified resource index. */
200 };
201 
202 /* Flags that can be passed to memory registration functions. */
204  /* Do not fill L3 when writing, and invalidate lines upon egress. */
206 
207  /* L3 cache fills should only populate IO cache ways. */
209 };
210 
211 /* An ingress packet descriptor. When a packet arrives, the mPIPE
212  * hardware generates this structure and writes it into a NotifRing.
213  */
215 
216 /* An egress command descriptor. Applications write this structure
217  * into eDMA rings and the hardware performs the indicated operation
218  * (normally involving egressing some bytes). Note that egressing a
219  * single packet may involve multiple egress command descriptors.
220  */
222 
223 /* Get the "va" field from an "idesc".
224  *
225  * This is the address at which the ingress hardware copied the first
226  * byte of the packet.
227  *
228  * If the classifier detected a custom header, then this will point to
229  * the custom header, and gxio_mpipe_idesc_get_l2_start() will point
230  * to the actual L2 header.
231  *
232  * Note that this value may be misleading if "idesc->be" is set.
233  *
234  * @param idesc An ingress packet descriptor.
235  */
236 static inline unsigned char *gxio_mpipe_idesc_get_va(gxio_mpipe_idesc_t *idesc)
237 {
238  return (unsigned char *)(long)idesc->va;
239 }
240 
241 /* Get the "xfer_size" from an "idesc".
242  *
243  * This is the actual number of packet bytes transferred into memory
244  * by the hardware.
245  *
246  * Note that this value may be misleading if "idesc->be" is set.
247  *
248  * @param idesc An ingress packet descriptor.
249  *
250  * ISSUE: Is this the best name for this?
251  * FIXME: Add more docs about chaining, clipping, etc.
252  */
253 static inline unsigned int gxio_mpipe_idesc_get_xfer_size(gxio_mpipe_idesc_t
254  *idesc)
255 {
256  return idesc->l2_size;
257 }
258 
259 /* Get the "l2_offset" from an "idesc".
260  *
261  * Extremely customized classifiers might not support this function.
262  *
263  * This is the number of bytes between the "va" and the L2 header.
264  *
265  * The L2 header consists of a destination mac address, a source mac
266  * address, and an initial ethertype. Various initial ethertypes
267  * allow encoding extra information in the L2 header, often including
268  * a vlan, and/or a new ethertype.
269  *
270  * Note that the "l2_offset" will be non-zero if (and only if) the
271  * classifier processed a custom header for the packet.
272  *
273  * @param idesc An ingress packet descriptor.
274  */
275 static inline uint8_t gxio_mpipe_idesc_get_l2_offset(gxio_mpipe_idesc_t *idesc)
276 {
277  return (idesc->custom1 >> 32) & 0xFF;
278 }
279 
280 /* Get the "l2_start" from an "idesc".
281  *
282  * This is simply gxio_mpipe_idesc_get_va() plus
283  * gxio_mpipe_idesc_get_l2_offset().
284  *
285  * @param idesc An ingress packet descriptor.
286  */
287 static inline unsigned char *gxio_mpipe_idesc_get_l2_start(gxio_mpipe_idesc_t
288  *idesc)
289 {
290  unsigned char *va = gxio_mpipe_idesc_get_va(idesc);
291  return va + gxio_mpipe_idesc_get_l2_offset(idesc);
292 }
293 
294 /* Get the "l2_length" from an "idesc".
295  *
296  * This is simply gxio_mpipe_idesc_get_xfer_size() minus
297  * gxio_mpipe_idesc_get_l2_offset().
298  *
299  * @param idesc An ingress packet descriptor.
300  */
301 static inline unsigned int gxio_mpipe_idesc_get_l2_length(gxio_mpipe_idesc_t
302  *idesc)
303 {
304  unsigned int xfer_size = idesc->l2_size;
305  return xfer_size - gxio_mpipe_idesc_get_l2_offset(idesc);
306 }
307 
308 /* A context object used to manage mPIPE hardware resources. */
309 typedef struct {
310 
311  /* File descriptor for calling up to Linux (and thus the HV). */
312  int fd;
313 
314  /* The VA at which configuration registers are mapped. */
316 
317  /* The VA at which IDMA, EDMA, and buffer manager are mapped. */
319 
320  /* The "initialized" buffer stacks. */
322 
324 
325 /* This is only used internally, but it's most easily made visible here. */
327 
328 /* Initialize an mPIPE context.
329  *
330  * This function allocates an mPIPE "service domain" and maps the MMIO
331  * registers into the caller's VA space.
332  *
333  * @param context Context object to be initialized.
334  * @param mpipe_instance Instance number of mPIPE shim to be controlled via
335  * context.
336  */
338  unsigned int mpipe_instance);
339 
340 /* Destroy an mPIPE context.
341  *
342  * This function frees the mPIPE "service domain" and unmaps the MMIO
343  * registers from the caller's VA space.
344  *
345  * If a user process exits without calling this routine, the kernel
346  * will destroy the mPIPE context as part of process teardown.
347  *
348  * @param context Context object to be destroyed.
349  */
351 
352 /*****************************************************************
353  * Buffer Stacks *
354  ******************************************************************/
355 
356 /* Allocate a set of buffer stacks.
357  *
358  * The return value is NOT interesting if count is zero.
359  *
360  * @param context An initialized mPIPE context.
361  * @param count Number of stacks required.
362  * @param first Index of first stack if ::GXIO_MPIPE_ALLOC_FIXED flag is set,
363  * otherwise ignored.
364  * @param flags Flag bits from ::gxio_mpipe_alloc_flags_e.
365  * @return Index of first allocated buffer stack, or
366  * ::GXIO_MPIPE_ERR_NO_BUFFER_STACK if allocation failed.
367  */
369  unsigned int count,
370  unsigned int first,
371  unsigned int flags);
372 
373 /* Enum codes for buffer sizes supported by mPIPE. */
374 typedef enum {
375  /* 128 byte packet data buffer. */
377  /* 256 byte packet data buffer. */
379  /* 512 byte packet data buffer. */
381  /* 1024 byte packet data buffer. */
383  /* 1664 byte packet data buffer. */
385  /* 4096 byte packet data buffer. */
387  /* 10368 byte packet data buffer. */
390  /* 16384 byte packet data buffer. */
393 
394 /* Convert a buffer size in bytes into a buffer size enum. */
397 
398 /* Convert a buffer size enum into a buffer size in bytes. */
399 extern size_t
401  buffer_size_enum);
402 
403 /* Calculate the number of bytes required to store a given number of
404  * buffers in the memory registered with a buffer stack via
405  * gxio_mpipe_init_buffer_stack().
406  */
407 extern size_t gxio_mpipe_calc_buffer_stack_bytes(unsigned long buffers);
408 
409 /* Initialize a buffer stack. This function binds a region of memory
410  * to be used by the hardware for storing buffer addresses pushed via
411  * gxio_mpipe_push_buffer() or as the result of sending a buffer out
412  * the egress with the 'push to stack when done' bit set. Once this
413  * function returns, the memory region's contents may be arbitrarily
414  * modified by the hardware at any time and software should not access
415  * the memory region again.
416  *
417  * @param context An initialized mPIPE context.
418  * @param stack The buffer stack index.
419  * @param buffer_size_enum The size of each buffer in the buffer stack,
420  * as an enum.
421  * @param mem The address of the buffer stack. This memory must be
422  * physically contiguous and aligned to a 64kB boundary.
423  * @param mem_size The size of the buffer stack, in bytes.
424  * @param mem_flags ::gxio_mpipe_mem_flags_e memory flags.
425  * @return Zero on success, ::GXIO_MPIPE_ERR_INVAL_BUFFER_SIZE if
426  * buffer_size_enum is invalid, ::GXIO_MPIPE_ERR_BAD_BUFFER_STACK if
427  * stack has not been allocated.
428  */
430  unsigned int stack,
432  buffer_size_enum, void *mem,
433  size_t mem_size,
434  unsigned int mem_flags);
435 
436 /* Push a buffer onto a previously initialized buffer stack.
437  *
438  * The size of the buffer being pushed must match the size that was
439  * registered with gxio_mpipe_init_buffer_stack(). All packet buffer
440  * addresses are 128-byte aligned; the low 7 bits of the specified
441  * buffer address will be ignored.
442  *
443  * @param context An initialized mPIPE context.
444  * @param stack The buffer stack index.
445  * @param buffer The buffer (the low seven bits are ignored).
446  */
447 static inline void gxio_mpipe_push_buffer(gxio_mpipe_context_t *context,
448  unsigned int stack, void *buffer)
449 {
451  MPIPE_BSM_REGION_VAL_t val = { {0} };
452 
453  /*
454  * The mmio_fast_base region starts at the IDMA region, so subtract
455  * off that initial offset.
456  */
457  offset.region =
460  offset.stack = stack;
461 
462 #if __SIZEOF_POINTER__ == 4
463  val.va = ((ulong) buffer) >> MPIPE_BSM_REGION_VAL__VA_SHIFT;
464 #else
465  val.va = ((long)buffer) >> MPIPE_BSM_REGION_VAL__VA_SHIFT;
466 #endif
467 
468  __gxio_mmio_write(context->mmio_fast_base + offset.word, val.word);
469 }
470 
471 /* Pop a buffer off of a previously initialized buffer stack.
472  *
473  * @param context An initialized mPIPE context.
474  * @param stack The buffer stack index.
475  * @return The buffer, or NULL if the stack is empty.
476  */
477 static inline void *gxio_mpipe_pop_buffer(gxio_mpipe_context_t *context,
478  unsigned int stack)
479 {
481 
482  /*
483  * The mmio_fast_base region starts at the IDMA region, so subtract
484  * off that initial offset.
485  */
486  offset.region =
489  offset.stack = stack;
490 
491  while (1) {
492  /*
493  * Case 1: val.c == ..._UNCHAINED, va is non-zero.
494  * Case 2: val.c == ..._INVALID, va is zero.
495  * Case 3: val.c == ..._NOT_RDY, va is zero.
496  */
498  val.word =
500  offset.word);
501 
502  /*
503  * Handle case 1 and 2 by returning the buffer (or NULL).
504  * Handle case 3 by waiting for the prefetch buffer to refill.
505  */
507  return (void *)((unsigned long)val.
509  }
510 }
511 
512 /*****************************************************************
513  * NotifRings *
514  ******************************************************************/
515 
516 /* Allocate a set of NotifRings.
517  *
518  * The return value is NOT interesting if count is zero.
519  *
520  * Note that NotifRings are allocated in chunks, so allocating one at
521  * a time is much less efficient than allocating several at once.
522  *
523  * @param context An initialized mPIPE context.
524  * @param count Number of NotifRings required.
525  * @param first Index of first NotifRing if ::GXIO_MPIPE_ALLOC_FIXED flag
526  * is set, otherwise ignored.
527  * @param flags Flag bits from ::gxio_mpipe_alloc_flags_e.
528  * @return Index of first allocated buffer NotifRing, or
529  * ::GXIO_MPIPE_ERR_NO_NOTIF_RING if allocation failed.
530  */
532  unsigned int count, unsigned int first,
533  unsigned int flags);
534 
535 /* Initialize a NotifRing, using the given memory and size.
536  *
537  * @param context An initialized mPIPE context.
538  * @param ring The NotifRing index.
539  * @param mem A physically contiguous region of memory to be filled
540  * with a ring of ::gxio_mpipe_idesc_t structures.
541  * @param mem_size Number of bytes in the ring. Must be 128, 512,
542  * 2048, or 65536 * sizeof(gxio_mpipe_idesc_t).
543  * @param mem_flags ::gxio_mpipe_mem_flags_e memory flags.
544  *
545  * @return 0 on success, ::GXIO_MPIPE_ERR_BAD_NOTIF_RING or
546  * ::GXIO_ERR_INVAL_MEMORY_SIZE on failure.
547  */
549  unsigned int ring,
550  void *mem, size_t mem_size,
551  unsigned int mem_flags);
552 
553 /* Configure an interrupt to be sent to a tile on incoming NotifRing
554  * traffic. Once an interrupt is sent for a particular ring, no more
555  * will be sent until gxio_mica_enable_notif_ring_interrupt() is called.
556  *
557  * @param context An initialized mPIPE context.
558  * @param x X coordinate of interrupt target tile.
559  * @param y Y coordinate of interrupt target tile.
560  * @param i Index of the IPI register which will receive the interrupt.
561  * @param e Specific event which will be set in the target IPI register when
562  * the interrupt occurs.
563  * @param ring The NotifRing index.
564  * @return Zero on success, GXIO_ERR_INVAL if params are out of range.
565  */
567  *context, int x, int y,
568  int i, int e,
569  unsigned int ring);
570 
571 /* Enable an interrupt on incoming NotifRing traffic.
572  *
573  * @param context An initialized mPIPE context.
574  * @param ring The NotifRing index.
575  * @return Zero on success, GXIO_ERR_INVAL if params are out of range.
576  */
578  *context, unsigned int ring);
579 
580 /* Map all of a client's memory via the given IOTLB.
581  * @param context An initialized mPIPE context.
582  * @param iotlb IOTLB index.
583  * @param pte Page table entry.
584  * @param flags Flags.
585  * @return Zero on success, or a negative error code.
586  */
588  unsigned int iotlb, HV_PTE pte,
589  unsigned int flags);
590 
591 /*****************************************************************
592  * Notif Groups *
593  ******************************************************************/
594 
595 /* Allocate a set of NotifGroups.
596  *
597  * The return value is NOT interesting if count is zero.
598  *
599  * @param context An initialized mPIPE context.
600  * @param count Number of NotifGroups required.
601  * @param first Index of first NotifGroup if ::GXIO_MPIPE_ALLOC_FIXED flag
602  * is set, otherwise ignored.
603  * @param flags Flag bits from ::gxio_mpipe_alloc_flags_e.
604  * @return Index of first allocated buffer NotifGroup, or
605  * ::GXIO_MPIPE_ERR_NO_NOTIF_GROUP if allocation failed.
606  */
608  unsigned int count,
609  unsigned int first,
610  unsigned int flags);
611 
612 /* Add a NotifRing to a NotifGroup. This only sets a bit in the
613  * application's 'group' object; the hardware NotifGroup can be
614  * initialized by passing 'group' to gxio_mpipe_init_notif_group() or
615  * gxio_mpipe_init_notif_group_and_buckets().
616  */
617 static inline void
618 gxio_mpipe_notif_group_add_ring(gxio_mpipe_notif_group_bits_t *bits, int ring)
619 {
620  bits->ring_mask[ring / 64] |= (1ull << (ring % 64));
621 }
622 
623 /* Set a particular NotifGroup bitmask. Since the load balancer
624  * makes decisions based on both bucket and NotifGroup state, most
625  * applications should use gxio_mpipe_init_notif_group_and_buckets()
626  * rather than using this function to configure just a NotifGroup.
627  */
629  unsigned int group,
631 
632 /*****************************************************************
633  * Load Balancer *
634  ******************************************************************/
635 
636 /* Allocate a set of load balancer buckets.
637  *
638  * The return value is NOT interesting if count is zero.
639  *
640  * Note that buckets are allocated in chunks, so allocating one at
641  * a time is much less efficient than allocating several at once.
642  *
643  * Note that the buckets are actually divided into two sub-ranges, of
644  * different sizes, and different chunk sizes, and the range you get
645  * by default is determined by the size of the request. Allocations
646  * cannot span the two sub-ranges.
647  *
648  * @param context An initialized mPIPE context.
649  * @param count Number of buckets required.
650  * @param first Index of first bucket if ::GXIO_MPIPE_ALLOC_FIXED flag is set,
651  * otherwise ignored.
652  * @param flags Flag bits from ::gxio_mpipe_alloc_flags_e.
653  * @return Index of first allocated buffer bucket, or
654  * ::GXIO_MPIPE_ERR_NO_BUCKET if allocation failed.
655  */
657  unsigned int count, unsigned int first,
658  unsigned int flags);
659 
660 /* The legal modes for gxio_mpipe_bucket_info_t and
661  * gxio_mpipe_init_notif_group_and_buckets().
662  *
663  * All modes except ::GXIO_MPIPE_BUCKET_ROUND_ROBIN expect that the user
664  * will allocate a power-of-two number of buckets and initialize them
665  * to the same mode. The classifier program then uses the appropriate
666  * number of low bits from the incoming packet's flow hash to choose a
667  * load balancer bucket. Based on that bucket's load balancing mode,
668  * reference count, and currently active NotifRing, the load balancer
669  * chooses the NotifRing to which the packet will be delivered.
670  */
671 typedef enum {
672  /* All packets for a bucket go to the same NotifRing unless the
673  * NotifRing gets full, in which case packets will be dropped. If
674  * the bucket reference count ever reaches zero, a new NotifRing may
675  * be chosen.
676  */
679 
680  /* All packets for a bucket always go to the same NotifRing.
681  */
684 
685  /* All packets for a bucket go to the least full NotifRing in the
686  * group, providing load balancing round robin behavior.
687  */
690 
691  /* All packets for a bucket go to the same NotifRing unless the
692  * NotifRing gets full, at which point the bucket starts using the
693  * least full NotifRing in the group. If all NotifRings in the
694  * group are full, packets will be dropped.
695  */
698 
699  /* All packets for a bucket go to the same NotifRing unless the
700  * NotifRing gets full, or a random timer fires, at which point the
701  * bucket starts using the least full NotifRing in the group. If
702  * all NotifRings in the group are full, packets will be dropped.
703  * WARNING: This mode is BROKEN on chips with fewer than 64 tiles.
704  */
707 
709 
710 /* Copy a set of bucket initialization values into the mPIPE
711  * hardware. Since the load balancer makes decisions based on both
712  * bucket and NotifGroup state, most applications should use
713  * gxio_mpipe_init_notif_group_and_buckets() rather than using this
714  * function to configure a single bucket.
715  *
716  * @param context An initialized mPIPE context.
717  * @param bucket Bucket index to be initialized.
718  * @param bucket_info Initial reference count, NotifRing index, and mode.
719  * @return 0 on success, ::GXIO_MPIPE_ERR_BAD_BUCKET on failure.
720  */
721 extern int gxio_mpipe_init_bucket(gxio_mpipe_context_t *context,
722  unsigned int bucket,
723  gxio_mpipe_bucket_info_t bucket_info);
724 
725 /* Initializes a group and range of buckets and range of rings such
726  * that the load balancer runs a particular load balancing function.
727  *
728  * First, the group is initialized with the given rings.
729  *
730  * Second, each bucket is initialized with the mode and group, and a
731  * ring chosen round-robin from the given rings.
732  *
733  * Normally, the classifier picks a bucket, and then the load balancer
734  * picks a ring, based on the bucket's mode, group, and current ring,
735  * possibly updating the bucket's ring.
736  *
737  * @param context An initialized mPIPE context.
738  * @param group The group.
739  * @param ring The first ring.
740  * @param num_rings The number of rings.
741  * @param bucket The first bucket.
742  * @param num_buckets The number of buckets.
743  * @param mode The load balancing mode.
744  *
745  * @return 0 on success, ::GXIO_MPIPE_ERR_BAD_BUCKET,
746  * ::GXIO_MPIPE_ERR_BAD_NOTIF_GROUP, or
747  * ::GXIO_MPIPE_ERR_BAD_NOTIF_RING on failure.
748  */
750  *context,
751  unsigned int group,
752  unsigned int ring,
753  unsigned int num_rings,
754  unsigned int bucket,
755  unsigned int num_buckets,
757  mode);
758 
759 /* Return credits to a NotifRing and/or bucket.
760  *
761  * @param context An initialized mPIPE context.
762  * @param ring The NotifRing index, or -1.
763  * @param bucket The bucket, or -1.
764  * @param count The number of credits to return.
765  */
766 static inline void gxio_mpipe_credit(gxio_mpipe_context_t *context,
767  int ring, int bucket, unsigned int count)
768 {
769  /* NOTE: Fancy struct initialization would break "C89" header test. */
770 
771  MPIPE_IDMA_RELEASE_REGION_ADDR_t offset = { {0} };
772  MPIPE_IDMA_RELEASE_REGION_VAL_t val = { {0} };
773 
774  /*
775  * The mmio_fast_base region starts at the IDMA region, so subtract
776  * off that initial offset.
777  */
778  offset.region =
779  MPIPE_MMIO_ADDR__REGION_VAL_IDMA -
781  offset.ring = ring;
782  offset.bucket = bucket;
783  offset.ring_enable = (ring >= 0);
784  offset.bucket_enable = (bucket >= 0);
785  val.count = count;
786 
787  __gxio_mmio_write(context->mmio_fast_base + offset.word, val.word);
788 }
789 
790 /*****************************************************************
791  * Egress Rings *
792  ******************************************************************/
793 
794 /* Allocate a set of eDMA rings.
795  *
796  * The return value is NOT interesting if count is zero.
797  *
798  * @param context An initialized mPIPE context.
799  * @param count Number of eDMA rings required.
800  * @param first Index of first eDMA ring if ::GXIO_MPIPE_ALLOC_FIXED flag
801  * is set, otherwise ignored.
802  * @param flags Flag bits from ::gxio_mpipe_alloc_flags_e.
803  * @return Index of first allocated buffer eDMA ring, or
804  * ::GXIO_MPIPE_ERR_NO_EDMA_RING if allocation failed.
805  */
807  unsigned int count, unsigned int first,
808  unsigned int flags);
809 
810 /* Initialize an eDMA ring, using the given memory and size.
811  *
812  * @param context An initialized mPIPE context.
813  * @param ring The eDMA ring index.
814  * @param channel The channel to use. This must be one of the channels
815  * associated with the context's set of open links.
816  * @param mem A physically contiguous region of memory to be filled
817  * with a ring of ::gxio_mpipe_edesc_t structures.
818  * @param mem_size Number of bytes in the ring. Must be 512, 2048,
819  * 8192 or 65536, times 16 (i.e. sizeof(gxio_mpipe_edesc_t)).
820  * @param mem_flags ::gxio_mpipe_mem_flags_e memory flags.
821  *
822  * @return 0 on success, ::GXIO_MPIPE_ERR_BAD_EDMA_RING or
823  * ::GXIO_ERR_INVAL_MEMORY_SIZE on failure.
824  */
826  unsigned int ring, unsigned int channel,
827  void *mem, size_t mem_size,
828  unsigned int mem_flags);
829 
830 /*****************************************************************
831  * Classifier Program *
832  ******************************************************************/
833 
834 /*
835  *
836  * Functions for loading or configuring the mPIPE classifier program.
837  *
838  * The mPIPE classification processors all run a special "classifier"
839  * program which, for each incoming packet, parses the packet headers,
840  * encodes some packet metadata in the "idesc", and either drops the
841  * packet, or picks a notif ring to handle the packet, and a buffer
842  * stack to contain the packet, usually based on the channel, VLAN,
843  * dMAC, flow hash, and packet size, under the guidance of the "rules"
844  * API described below.
845  *
846  * @section gxio_mpipe_classifier_default Default Classifier
847  *
848  * The MDE provides a simple "default" classifier program. It is
849  * shipped as source in "$TILERA_ROOT/src/sys/mpipe/classifier.c",
850  * which serves as its official documentation. It is shipped as a
851  * binary program in "$TILERA_ROOT/tile/boot/classifier", which is
852  * automatically included in bootroms created by "tile-monitor", and
853  * is automatically loaded by the hypervisor at boot time.
854  *
855  * The L2 analysis handles LLC packets, SNAP packets, and "VLAN
856  * wrappers" (keeping the outer VLAN).
857  *
858  * The L3 analysis handles IPv4 and IPv6, dropping packets with bad
859  * IPv4 header checksums, requesting computation of a TCP/UDP checksum
860  * if appropriate, and hashing the dest and src IP addresses, plus the
861  * ports for TCP/UDP packets, into the flow hash. No special analysis
862  * is done for "fragmented" packets or "tunneling" protocols. Thus,
863  * the first fragment of a fragmented TCP/UDP packet is hashed using
864  * src/dest IP address and ports and all subsequent fragments are only
865  * hashed according to src/dest IP address.
866  *
867  * The L3 analysis handles other packets too, hashing the dMAC
868  * smac into a flow hash.
869  *
870  * The channel, VLAN, and dMAC used to pick a "rule" (see the
871  * "rules" APIs below), which in turn is used to pick a buffer stack
872  * (based on the packet size) and a bucket (based on the flow hash).
873  *
874  * To receive traffic matching a particular (channel/VLAN/dMAC
875  * pattern, an application should allocate its own buffer stacks and
876  * load balancer buckets, and map traffic to those stacks and buckets,
877  * as decribed by the "rules" API below.
878  *
879  * Various packet metadata is encoded in the idesc. The flow hash is
880  * four bytes at 0x0C. The VLAN is two bytes at 0x10. The ethtype is
881  * two bytes at 0x12. The l3 start is one byte at 0x14. The l4 start
882  * is one byte at 0x15 for IPv4 and IPv6 packets, and otherwise zero.
883  * The protocol is one byte at 0x16 for IPv4 and IPv6 packets, and
884  * otherwise zero.
885  *
886  * @section gxio_mpipe_classifier_custom Custom Classifiers.
887  *
888  * A custom classifier may be created using "tile-mpipe-cc" with a
889  * customized version of the default classifier sources.
890  *
891  * The custom classifier may be included in bootroms using the
892  * "--classifier" option to "tile-monitor", or loaded dynamically
893  * using gxio_mpipe_classifier_load_from_file().
894  *
895  * Be aware that "extreme" customizations may break the assumptions of
896  * the "rules" APIs described below, but simple customizations, such
897  * as adding new packet metadata, should be fine.
898  */
899 
900 /* A set of classifier rules, plus a context. */
901 typedef struct {
902 
903  /* The context. */
905 
906  /* The actual rules. */
908 
910 
911 /* Initialize a classifier program rules list.
912  *
913  * This function can be called on a previously initialized rules list
914  * to discard any previously added rules.
915  *
916  * @param rules Rules list to initialize.
917  * @param context An initialized mPIPE context.
918  */
919 extern void gxio_mpipe_rules_init(gxio_mpipe_rules_t *rules,
920  gxio_mpipe_context_t *context);
921 
922 /* Begin a new rule on the indicated rules list.
923  *
924  * Note that an empty rule matches all packets, but an empty rule list
925  * matches no packets.
926  *
927  * @param rules Rules list to which new rule is appended.
928  * @param bucket First load balancer bucket to which packets will be
929  * delivered.
930  * @param num_buckets Number of buckets (must be a power of two) across
931  * which packets will be distributed based on the "flow hash".
932  * @param stacks Either NULL, to assign each packet to the smallest
933  * initialized buffer stack which does not induce chaining (and to
934  * drop packets which exceed the largest initialized buffer stack
935  * buffer size), or an array, with each entry indicating which buffer
936  * stack should be used for packets up to that size (with 255
937  * indicating that those packets should be dropped).
938  * @return 0 on success, or a negative error code on failure.
939  */
941  unsigned int bucket,
942  unsigned int num_buckets,
943  gxio_mpipe_rules_stacks_t *stacks);
944 
945 /* Set the headroom of the current rule.
946  *
947  * @param rules Rules list whose current rule will be modified.
948  * @param headroom The headroom.
949  * @return 0 on success, or a negative error code on failure.
950  */
952  uint8_t headroom);
953 
954 /* Indicate that packets from a particular channel can be delivered
955  * to the buckets and buffer stacks associated with the current rule.
956  *
957  * Channels added must be associated with links opened by the mPIPE context
958  * used in gxio_mpipe_rules_init(). A rule with no channels is equivalent
959  * to a rule naming all such associated channels.
960  *
961  * @param rules Rules list whose current rule will be modified.
962  * @param channel The channel to add.
963  * @return 0 on success, or a negative error code on failure.
964  */
966  unsigned int channel);
967 
968 /* Commit rules.
969  *
970  * The rules are sent to the hypervisor, where they are combined with
971  * the rules from other apps, and used to program the hardware classifier.
972  *
973  * Note that if this function returns an error, then the rules will NOT
974  * have been committed, even if the error is due to interactions with
975  * rules from another app.
976  *
977  * @param rules Rules list to commit.
978  * @return 0 on success, or a negative error code on failure.
979  */
981 
982 /*****************************************************************
983  * Ingress Queue Wrapper *
984  ******************************************************************/
985 
986 /*
987  *
988  * Convenience functions for receiving packets from a NotifRing and
989  * sending packets via an eDMA ring.
990  *
991  * The mpipe ingress and egress hardware uses shared memory packet
992  * descriptors to describe packets that have arrived on ingress or
993  * are destined for egress. These descriptors are stored in shared
994  * memory ring buffers and written or read by hardware as necessary.
995  * The gxio library provides wrapper functions that manage the head and
996  * tail pointers for these rings, allowing the user to easily read or
997  * write packet descriptors.
998  *
999  * The initialization interface for ingress and egress rings is quite
1000  * similar. For example, to create an ingress queue, the user passes
1001  * a ::gxio_mpipe_iqueue_t state object, a ring number from
1002  * gxio_mpipe_alloc_notif_rings(), and the address of memory to hold a
1003  * ring buffer to the gxio_mpipe_iqueue_init() function. The function
1004  * returns success when the state object has been initialized and the
1005  * hardware configured to deliver packets to the specified ring
1006  * buffer. Similarly, gxio_mpipe_equeue_init() takes a
1007  * ::gxio_mpipe_equeue_t state object, a ring number from
1008  * gxio_mpipe_alloc_edma_rings(), and a shared memory buffer.
1009  *
1010  * @section gxio_mpipe_iqueue Working with Ingress Queues
1011  *
1012  * Once initialized, the gxio_mpipe_iqueue_t API provides two flows
1013  * for getting the ::gxio_mpipe_idesc_t packet descriptor associated
1014  * with incoming packets. The simplest is to call
1015  * gxio_mpipe_iqueue_get() or gxio_mpipe_iqueue_try_get(). These
1016  * functions copy the oldest packet descriptor out of the NotifRing and
1017  * into a descriptor provided by the caller. They also immediately
1018  * inform the hardware that a descriptor has been processed.
1019  *
1020  * For applications with stringent performance requirements, higher
1021  * efficiency can be achieved by avoiding the packet descriptor copy
1022  * and processing multiple descriptors at once. The
1023  * gxio_mpipe_iqueue_peek() and gxio_mpipe_iqueue_try_peek() functions
1024  * allow such optimizations. These functions provide a pointer to the
1025  * next valid ingress descriptor in the NotifRing's shared memory ring
1026  * buffer, and a count of how many contiguous descriptors are ready to
1027  * be processed. The application can then process any number of those
1028  * descriptors in place, calling gxio_mpipe_iqueue_consume() to inform
1029  * the hardware after each one has been processed.
1030  *
1031  * @section gxio_mpipe_equeue Working with Egress Queues
1032  *
1033  * Similarly, the egress queue API provides a high-performance
1034  * interface plus a simple wrapper for use in posting
1035  * ::gxio_mpipe_edesc_t egress packet descriptors. The simple
1036  * version, gxio_mpipe_equeue_put(), allows the programmer to wait for
1037  * an eDMA ring slot to become available and write a single descriptor
1038  * into the ring.
1039  *
1040  * Alternatively, you can reserve slots in the eDMA ring using
1041  * gxio_mpipe_equeue_reserve() or gxio_mpipe_equeue_try_reserve(), and
1042  * then fill in each slot using gxio_mpipe_equeue_put_at(). This
1043  * capability can be used to amortize the cost of reserving slots
1044  * across several packets. It also allows gather operations to be
1045  * performed on a shared equeue, by ensuring that the edescs for all
1046  * the fragments are all contiguous in the eDMA ring.
1047  *
1048  * The gxio_mpipe_equeue_reserve() and gxio_mpipe_equeue_try_reserve()
1049  * functions return a 63-bit "completion slot", which is actually a
1050  * sequence number, the low bits of which indicate the ring buffer
1051  * index and the high bits the number of times the application has
1052  * gone around the egress ring buffer. The extra bits allow an
1053  * application to check for egress completion by calling
1054  * gxio_mpipe_equeue_is_complete() to see whether a particular 'slot'
1055  * number has finished. Given the maximum packet rates of the Gx
1056  * processor, the 63-bit slot number will never wrap.
1057  *
1058  * In practice, most applications use the ::gxio_mpipe_edesc_t::hwb
1059  * bit to indicate that the buffers containing egress packet data
1060  * should be pushed onto a buffer stack when egress is complete. Such
1061  * applications generally do not need to know when an egress operation
1062  * completes (since there is no need to free a buffer post-egress),
1063  * and thus can use the optimized gxio_mpipe_equeue_reserve_fast() or
1064  * gxio_mpipe_equeue_try_reserve_fast() functions, which return a 24
1065  * bit "slot", instead of a 63-bit "completion slot".
1066  *
1067  * Once a slot has been "reserved", it MUST be filled. If the
1068  * application reserves a slot and then decides that it does not
1069  * actually need it, it can set the ::gxio_mpipe_edesc_t::ns (no send)
1070  * bit on the descriptor passed to gxio_mpipe_equeue_put_at() to
1071  * indicate that no data should be sent. This technique can also be
1072  * used to drop an incoming packet, instead of forwarding it, since
1073  * any buffer will still be pushed onto the buffer stack when the
1074  * egress descriptor is processed.
1075  */
1076 
1077 /* A convenient interface to a NotifRing, for use by a single thread.
1078  */
1079 typedef struct {
1080 
1081  /* The context. */
1083 
1084  /* The actual NotifRing. */
1086 
1087  /* The number of entries. */
1088  unsigned long num_entries;
1089 
1090  /* The number of entries minus one. */
1091  unsigned long mask_num_entries;
1092 
1093  /* The log2() of the number of entries. */
1094  unsigned long log2_num_entries;
1095 
1096  /* The next entry. */
1097  unsigned int head;
1098 
1099  /* The NotifRing id. */
1100  unsigned int ring;
1101 
1102 #ifdef __BIG_ENDIAN__
1103  /* The number of byteswapped entries. */
1104  unsigned int swapped;
1105 #endif
1106 
1108 
1109 /* Initialize an "iqueue".
1110  *
1111  * Takes the iqueue plus the same args as gxio_mpipe_init_notif_ring().
1112  */
1113 extern int gxio_mpipe_iqueue_init(gxio_mpipe_iqueue_t *iqueue,
1114  gxio_mpipe_context_t *context,
1115  unsigned int ring,
1116  void *mem, size_t mem_size,
1117  unsigned int mem_flags);
1118 
1119 /* Advance over some old entries in an iqueue.
1120  *
1121  * Please see the documentation for gxio_mpipe_iqueue_consume().
1122  *
1123  * @param iqueue An ingress queue initialized via gxio_mpipe_iqueue_init().
1124  * @param count The number of entries to advance over.
1125  */
1126 static inline void gxio_mpipe_iqueue_advance(gxio_mpipe_iqueue_t *iqueue,
1127  int count)
1128 {
1129  /* Advance with proper wrap. */
1130  int head = iqueue->head + count;
1131  iqueue->head =
1132  (head & iqueue->mask_num_entries) +
1133  (head >> iqueue->log2_num_entries);
1134 
1135 #ifdef __BIG_ENDIAN__
1136  /* HACK: Track swapped entries. */
1137  iqueue->swapped -= count;
1138 #endif
1139 }
1140 
1141 /* Release the ring and bucket for an old entry in an iqueue.
1142  *
1143  * Releasing the ring allows more packets to be delivered to the ring.
1144  *
1145  * Releasing the bucket allows flows using the bucket to be moved to a
1146  * new ring when using GXIO_MPIPE_BUCKET_DYNAMIC_FLOW_AFFINITY.
1147  *
1148  * This function is shorthand for "gxio_mpipe_credit(iqueue->context,
1149  * iqueue->ring, idesc->bucket_id, 1)", and it may be more convenient
1150  * to make that underlying call, using those values, instead of
1151  * tracking the entire "idesc".
1152  *
1153  * If packet processing is deferred, optimal performance requires that
1154  * the releasing be deferred as well.
1155  *
1156  * Please see the documentation for gxio_mpipe_iqueue_consume().
1157  *
1158  * @param iqueue An ingress queue initialized via gxio_mpipe_iqueue_init().
1159  * @param idesc The descriptor which was processed.
1160  */
1161 static inline void gxio_mpipe_iqueue_release(gxio_mpipe_iqueue_t *iqueue,
1162  gxio_mpipe_idesc_t *idesc)
1163 {
1164  gxio_mpipe_credit(iqueue->context, iqueue->ring, idesc->bucket_id, 1);
1165 }
1166 
1167 /* Consume a packet from an "iqueue".
1168  *
1169  * After processing packets peeked at via gxio_mpipe_iqueue_peek()
1170  * or gxio_mpipe_iqueue_try_peek(), you must call this function, or
1171  * gxio_mpipe_iqueue_advance() plus gxio_mpipe_iqueue_release(), to
1172  * advance over those entries, and release their rings and buckets.
1173  *
1174  * You may call this function as each packet is processed, or you can
1175  * wait until several packets have been processed.
1176  *
1177  * Note that if you are using a single bucket, and you are handling
1178  * batches of N packets, then you can replace several calls to this
1179  * function with calls to "gxio_mpipe_iqueue_advance(iqueue, N)" and
1180  * "gxio_mpipe_credit(iqueue->context, iqueue->ring, bucket, N)".
1181  *
1182  * Note that if your classifier sets "idesc->nr", then you should
1183  * explicitly call "gxio_mpipe_iqueue_advance(iqueue, idesc)" plus
1184  * "gxio_mpipe_credit(iqueue->context, iqueue->ring, -1, 1)", to
1185  * avoid incorrectly crediting the (unused) bucket.
1186  *
1187  * @param iqueue An ingress queue initialized via gxio_mpipe_iqueue_init().
1188  * @param idesc The descriptor which was processed.
1189  */
1190 static inline void gxio_mpipe_iqueue_consume(gxio_mpipe_iqueue_t *iqueue,
1191  gxio_mpipe_idesc_t *idesc)
1192 {
1193  gxio_mpipe_iqueue_advance(iqueue, 1);
1194  gxio_mpipe_iqueue_release(iqueue, idesc);
1195 }
1196 
1197 /* Peek at the next packet(s) in an "iqueue", without waiting.
1198  *
1199  * If no packets are available, fills idesc_ref with NULL, and then
1200  * returns ::GXIO_MPIPE_ERR_IQUEUE_EMPTY. Otherwise, fills idesc_ref
1201  * with the address of the next valid packet descriptor, and returns
1202  * the maximum number of valid descriptors which can be processed.
1203  * You may process fewer descriptors if desired.
1204  *
1205  * Call gxio_mpipe_iqueue_consume() on each packet once it has been
1206  * processed (or dropped), to allow more packets to be delivered.
1207  *
1208  * @param iqueue An ingress queue initialized via gxio_mpipe_iqueue_init().
1209  * @param idesc_ref A pointer to a packet descriptor pointer.
1210  * @return The (positive) number of packets which can be processed,
1211  * or ::GXIO_MPIPE_ERR_IQUEUE_EMPTY if no packets are available.
1212  */
1213 static inline int gxio_mpipe_iqueue_try_peek(gxio_mpipe_iqueue_t *iqueue,
1214  gxio_mpipe_idesc_t **idesc_ref)
1215 {
1217 
1218  uint64_t head = iqueue->head;
1219  uint64_t tail = __gxio_mmio_read(iqueue->idescs);
1220 
1221  /* Available entries. */
1222  uint64_t avail =
1223  (tail >= head) ? (tail - head) : (iqueue->num_entries - head);
1224 
1225  if (avail == 0) {
1226  *idesc_ref = NULL;
1228  }
1229 
1230  next = &iqueue->idescs[head];
1231 
1232  /* ISSUE: Is this helpful? */
1233  __insn_prefetch(next);
1234 
1235 #ifdef __BIG_ENDIAN__
1236  /* HACK: Swap new entries directly in memory. */
1237  {
1238  int i, j;
1239  for (i = iqueue->swapped; i < avail; i++) {
1240  for (j = 0; j < 8; j++)
1241  next[i].words[j] =
1242  __builtin_bswap64(next[i].words[j]);
1243  }
1244  iqueue->swapped = avail;
1245  }
1246 #endif
1247 
1248  *idesc_ref = next;
1249 
1250  return avail;
1251 }
1252 
1253 /* Drop a packet by pushing its buffer (if appropriate).
1254  *
1255  * NOTE: The caller must still call gxio_mpipe_iqueue_consume() if idesc
1256  * came from gxio_mpipe_iqueue_try_peek() or gxio_mpipe_iqueue_peek().
1257  *
1258  * @param iqueue An ingress queue initialized via gxio_mpipe_iqueue_init().
1259  * @param idesc A packet descriptor.
1260  */
1261 static inline void gxio_mpipe_iqueue_drop(gxio_mpipe_iqueue_t *iqueue,
1262  gxio_mpipe_idesc_t *idesc)
1263 {
1264  /* FIXME: Handle "chaining" properly. */
1265 
1266  if (!idesc->be) {
1267  unsigned char *va = gxio_mpipe_idesc_get_va(idesc);
1268  gxio_mpipe_push_buffer(iqueue->context, idesc->stack_idx, va);
1269  }
1270 }
1271 
1272 /*****************************************************************
1273  * Egress Queue Wrapper *
1274  ******************************************************************/
1275 
1276 /* A convenient, thread-safe interface to an eDMA ring. */
1277 typedef struct {
1278 
1279  /* State object for tracking head and tail pointers. */
1281 
1282  /* The ring entries. */
1284 
1285  /* The number of entries minus one. */
1286  unsigned long mask_num_entries;
1287 
1288  /* The log2() of the number of entries. */
1289  unsigned long log2_num_entries;
1290 
1292 
1293 /* Initialize an "equeue".
1294  *
1295  * Takes the equeue plus the same args as gxio_mpipe_init_edma_ring().
1296  */
1297 extern int gxio_mpipe_equeue_init(gxio_mpipe_equeue_t *equeue,
1298  gxio_mpipe_context_t *context,
1299  unsigned int edma_ring_id,
1300  unsigned int channel,
1301  void *mem, unsigned int mem_size,
1302  unsigned int mem_flags);
1303 
1304 /* Reserve completion slots for edescs.
1305  *
1306  * Use gxio_mpipe_equeue_put_at() to actually populate the slots.
1307  *
1308  * This function is slower than gxio_mpipe_equeue_reserve_fast(), but
1309  * returns a full 64 bit completion slot, which can be used with
1310  * gxio_mpipe_equeue_is_complete().
1311  *
1312  * @param equeue An egress queue initialized via gxio_mpipe_equeue_init().
1313  * @param num Number of slots to reserve (must be non-zero).
1314  * @return The first reserved completion slot, or a negative error code.
1315  */
1316 static inline int64_t gxio_mpipe_equeue_reserve(gxio_mpipe_equeue_t *equeue,
1317  unsigned int num)
1318 {
1319  return __gxio_dma_queue_reserve_aux(&equeue->dma_queue, num, true);
1320 }
1321 
1322 /* Reserve completion slots for edescs, if possible.
1323  *
1324  * Use gxio_mpipe_equeue_put_at() to actually populate the slots.
1325  *
1326  * This function is slower than gxio_mpipe_equeue_try_reserve_fast(),
1327  * but returns a full 64 bit completion slot, which can be used with
1328  * gxio_mpipe_equeue_is_complete().
1329  *
1330  * @param equeue An egress queue initialized via gxio_mpipe_equeue_init().
1331  * @param num Number of slots to reserve (must be non-zero).
1332  * @return The first reserved completion slot, or a negative error code.
1333  */
1334 static inline int64_t gxio_mpipe_equeue_try_reserve(gxio_mpipe_equeue_t
1335  *equeue, unsigned int num)
1336 {
1337  return __gxio_dma_queue_reserve_aux(&equeue->dma_queue, num, false);
1338 }
1339 
1340 /* Reserve slots for edescs.
1341  *
1342  * Use gxio_mpipe_equeue_put_at() to actually populate the slots.
1343  *
1344  * This function is faster than gxio_mpipe_equeue_reserve(), but
1345  * returns a 24 bit slot (instead of a 64 bit completion slot), which
1346  * thus cannot be used with gxio_mpipe_equeue_is_complete().
1347  *
1348  * @param equeue An egress queue initialized via gxio_mpipe_equeue_init().
1349  * @param num Number of slots to reserve (should be non-zero).
1350  * @return The first reserved slot, or a negative error code.
1351  */
1352 static inline int64_t gxio_mpipe_equeue_reserve_fast(gxio_mpipe_equeue_t
1353  *equeue, unsigned int num)
1354 {
1355  return __gxio_dma_queue_reserve(&equeue->dma_queue, num, true, false);
1356 }
1357 
1358 /* Reserve slots for edescs, if possible.
1359  *
1360  * Use gxio_mpipe_equeue_put_at() to actually populate the slots.
1361  *
1362  * This function is faster than gxio_mpipe_equeue_try_reserve(), but
1363  * returns a 24 bit slot (instead of a 64 bit completion slot), which
1364  * thus cannot be used with gxio_mpipe_equeue_is_complete().
1365  *
1366  * @param equeue An egress queue initialized via gxio_mpipe_equeue_init().
1367  * @param num Number of slots to reserve (should be non-zero).
1368  * @return The first reserved slot, or a negative error code.
1369  */
1370 static inline int64_t gxio_mpipe_equeue_try_reserve_fast(gxio_mpipe_equeue_t
1371  *equeue,
1372  unsigned int num)
1373 {
1374  return __gxio_dma_queue_reserve(&equeue->dma_queue, num, false, false);
1375 }
1376 
1377 /*
1378  * HACK: This helper function tricks gcc 4.6 into avoiding saving
1379  * a copy of "edesc->words[0]" on the stack for no obvious reason.
1380  */
1381 
1382 static inline void gxio_mpipe_equeue_put_at_aux(gxio_mpipe_equeue_t *equeue,
1383  uint_reg_t ew[2],
1384  unsigned long slot)
1385 {
1386  unsigned long edma_slot = slot & equeue->mask_num_entries;
1387  gxio_mpipe_edesc_t *edesc_p = &equeue->edescs[edma_slot];
1388 
1389  /*
1390  * ISSUE: Could set eDMA ring to be on generation 1 at start, which
1391  * would avoid the negation here, perhaps allowing "__insn_bfins()".
1392  */
1393  ew[0] |= !((slot >> equeue->log2_num_entries) & 1);
1394 
1395  /*
1396  * NOTE: We use "__gxio_mpipe_write()", plus the fact that the eDMA
1397  * queue alignment restrictions ensure that these two words are on
1398  * the same cacheline, to force proper ordering between the stores.
1399  */
1400  __gxio_mmio_write64(&edesc_p->words[1], ew[1]);
1401  __gxio_mmio_write64(&edesc_p->words[0], ew[0]);
1402 }
1403 
1404 /* Post an edesc to a given slot in an equeue.
1405  *
1406  * This function copies the supplied edesc into entry "slot mod N" in
1407  * the underlying ring, setting the "gen" bit to the appropriate value
1408  * based on "(slot mod N*2)", where "N" is the size of the ring. Note
1409  * that the higher bits of slot are unused, and thus, this function
1410  * can handle "slots" as well as "completion slots".
1411  *
1412  * Normally this function is used to fill in slots reserved by
1413  * gxio_mpipe_equeue_try_reserve(), gxio_mpipe_equeue_reserve(),
1414  * gxio_mpipe_equeue_try_reserve_fast(), or
1415  * gxio_mpipe_equeue_reserve_fast(),
1416  *
1417  * This function can also be used without "reserving" slots, if the
1418  * application KNOWS that the ring can never overflow, for example, by
1419  * pushing fewer buffers into the buffer stacks than there are total
1420  * slots in the equeue, but this is NOT recommended.
1421  *
1422  * @param equeue An egress queue initialized via gxio_mpipe_equeue_init().
1423  * @param edesc The egress descriptor to be posted.
1424  * @param slot An egress slot (only the low bits are actually used).
1425  */
1426 static inline void gxio_mpipe_equeue_put_at(gxio_mpipe_equeue_t *equeue,
1427  gxio_mpipe_edesc_t edesc,
1428  unsigned long slot)
1429 {
1430  gxio_mpipe_equeue_put_at_aux(equeue, edesc.words, slot);
1431 }
1432 
1433 /* Post an edesc to the next slot in an equeue.
1434  *
1435  * This is a convenience wrapper around
1436  * gxio_mpipe_equeue_reserve_fast() and gxio_mpipe_equeue_put_at().
1437  *
1438  * @param equeue An egress queue initialized via gxio_mpipe_equeue_init().
1439  * @param edesc The egress descriptor to be posted.
1440  * @return 0 on success.
1441  */
1442 static inline int gxio_mpipe_equeue_put(gxio_mpipe_equeue_t *equeue,
1443  gxio_mpipe_edesc_t edesc)
1444 {
1445  int64_t slot = gxio_mpipe_equeue_reserve_fast(equeue, 1);
1446  if (slot < 0)
1447  return (int)slot;
1448 
1449  gxio_mpipe_equeue_put_at(equeue, edesc, slot);
1450 
1451  return 0;
1452 }
1453 
1454 /* Ask the mPIPE hardware to egress outstanding packets immediately.
1455  *
1456  * This call is not necessary, but may slightly reduce overall latency.
1457  *
1458  * Technically, you should flush all gxio_mpipe_equeue_put_at() writes
1459  * to memory before calling this function, to ensure the descriptors
1460  * are visible in memory before the mPIPE hardware actually looks for
1461  * them. But this should be very rare, and the only side effect would
1462  * be increased latency, so it is up to the caller to decide whether
1463  * or not to flush memory.
1464  *
1465  * @param equeue An egress queue initialized via gxio_mpipe_equeue_init().
1466  */
1467 static inline void gxio_mpipe_equeue_flush(gxio_mpipe_equeue_t *equeue)
1468 {
1469  /* Use "ring_idx = 0" and "count = 0" to "wake up" the eDMA ring. */
1470  MPIPE_EDMA_POST_REGION_VAL_t val = { {0} };
1471  /* Flush the write buffers. */
1472  __insn_flushwb();
1474 }
1475 
1476 /* Determine if a given edesc has been completed.
1477  *
1478  * Note that this function requires a "completion slot", and thus may
1479  * NOT be used with a "slot" from gxio_mpipe_equeue_reserve_fast() or
1480  * gxio_mpipe_equeue_try_reserve_fast().
1481  *
1482  * @param equeue An egress queue initialized via gxio_mpipe_equeue_init().
1483  * @param completion_slot The completion slot used by the edesc.
1484  * @param update If true, and the desc does not appear to have completed
1485  * yet, then update any software cache of the hardware completion counter,
1486  * and check again. This should normally be true.
1487  * @return True iff the given edesc has been completed.
1488  */
1489 static inline int gxio_mpipe_equeue_is_complete(gxio_mpipe_equeue_t *equeue,
1490  int64_t completion_slot,
1491  int update)
1492 {
1493  return __gxio_dma_queue_is_complete(&equeue->dma_queue,
1494  completion_slot, update);
1495 }
1496 
1497 /*****************************************************************
1498  * Link Management *
1499  ******************************************************************/
1500 
1501 /*
1502  *
1503  * Functions for manipulating and sensing the state and configuration
1504  * of physical network links.
1505  *
1506  * @section gxio_mpipe_link_perm Link Permissions
1507  *
1508  * Opening a link (with gxio_mpipe_link_open()) requests a set of link
1509  * permissions, which control what may be done with the link, and potentially
1510  * what permissions may be granted to other processes.
1511  *
1512  * Data permission allows the process to receive packets from the link by
1513  * specifying the link's channel number in mPIPE packet distribution rules,
1514  * and to send packets to the link by using the link's channel number as
1515  * the target for an eDMA ring.
1516  *
1517  * Stats permission allows the process to retrieve link attributes (such as
1518  * the speeds it is capable of running at, or whether it is currently up), and
1519  * to read and write certain statistics-related registers in the link's MAC.
1520  *
1521  * Control permission allows the process to retrieve and modify link attributes
1522  * (so that it may, for example, bring the link up and take it down), and
1523  * read and write many registers in the link's MAC and PHY.
1524  *
1525  * Any permission may be requested as shared, which allows other processes
1526  * to also request shared permission, or exclusive, which prevents other
1527  * processes from requesting it. In keeping with GXIO's typical usage in
1528  * an embedded environment, the defaults for all permissions are shared.
1529  *
1530  * Permissions are granted on a first-come, first-served basis, so if two
1531  * applications request an exclusive permission on the same link, the one
1532  * to run first will win. Note, however, that some system components, like
1533  * the kernel Ethernet driver, may get an opportunity to open links before
1534  * any applications run.
1535  *
1536  * @section gxio_mpipe_link_names Link Names
1537  *
1538  * Link names are of the form gbe<em>number</em> (for Gigabit Ethernet),
1539  * xgbe<em>number</em> (for 10 Gigabit Ethernet), loop<em>number</em> (for
1540  * internal mPIPE loopback), or ilk<em>number</em>/<em>channel</em>
1541  * (for Interlaken links); for instance, gbe0, xgbe1, loop3, and
1542  * ilk0/12 are all possible link names. The correspondence between
1543  * the link name and an mPIPE instance number or mPIPE channel number is
1544  * system-dependent; all links will not exist on all systems, and the set
1545  * of numbers used for a particular link type may not start at zero and may
1546  * not be contiguous. Use gxio_mpipe_link_enumerate() to retrieve the set of
1547  * links which exist on a system, and always use gxio_mpipe_link_instance()
1548  * to determine which mPIPE controls a particular link.
1549  *
1550  * Note that in some cases, links may share hardware, such as PHYs, or
1551  * internal mPIPE buffers; in these cases, only one of the links may be
1552  * opened at a time. This is especially common with xgbe and gbe ports,
1553  * since each xgbe port uses 4 SERDES lanes, each of which may also be
1554  * configured as one gbe port.
1555  *
1556  * @section gxio_mpipe_link_states Link States
1557  *
1558  * The mPIPE link management model revolves around three different states,
1559  * which are maintained for each link:
1560  *
1561  * 1. The <em>current</em> link state: is the link up now, and if so, at
1562  * what speed?
1563  *
1564  * 2. The <em>desired</em> link state: what do we want the link state to be?
1565  * The system is always working to make this state the current state;
1566  * thus, if the desired state is up, and the link is down, we'll be
1567  * constantly trying to bring it up, automatically.
1568  *
1569  * 3. The <em>possible</em> link state: what speeds are valid for this
1570  * particular link? Or, in other words, what are the capabilities of
1571  * the link hardware?
1572  *
1573  * These link states are not, strictly speaking, related to application
1574  * state; they may be manipulated at any time, whether or not the link
1575  * is currently being used for data transfer. However, for convenience,
1576  * gxio_mpipe_link_open() and gxio_mpipe_link_close() (or application exit)
1577  * can affect the link state. These implicit link management operations
1578  * may be modified or disabled by the use of link open flags.
1579  *
1580  * From an application, you can use gxio_mpipe_link_get_attr()
1581  * and gxio_mpipe_link_set_attr() to manipulate the link states.
1582  * gxio_mpipe_link_get_attr() with ::GXIO_MPIPE_LINK_POSSIBLE_STATE
1583  * gets you the possible link state. gxio_mpipe_link_get_attr() with
1584  * ::GXIO_MPIPE_LINK_CURRENT_STATE gets you the current link state.
1585  * Finally, gxio_mpipe_link_set_attr() and gxio_mpipe_link_get_attr()
1586  * with ::GXIO_MPIPE_LINK_DESIRED_STATE allow you to modify or retrieve
1587  * the desired link state.
1588  *
1589  * If you want to manage a link from a part of your application which isn't
1590  * involved in packet processing, you can use the ::GXIO_MPIPE_LINK_NO_DATA
1591  * flags on a gxio_mpipe_link_open() call. This opens the link, but does
1592  * not request data permission, so it does not conflict with any exclusive
1593  * permissions which may be held by other processes. You can then can use
1594  * gxio_mpipe_link_get_attr() and gxio_mpipe_link_set_attr() on this link
1595  * object to bring up or take down the link.
1596  *
1597  * Some links support link state bits which support various loopback
1598  * modes. ::GXIO_MPIPE_LINK_LOOP_MAC tests datapaths within the Tile
1599  * Processor itself; ::GXIO_MPIPE_LINK_LOOP_PHY tests the datapath between
1600  * the Tile Processor and the external physical layer interface chip; and
1601  * ::GXIO_MPIPE_LINK_LOOP_EXT tests the entire network datapath with the
1602  * aid of an external loopback connector. In addition to enabling hardware
1603  * testing, such configuration can be useful for software testing, as well.
1604  *
1605  * When LOOP_MAC or LOOP_PHY is enabled, packets transmitted on a channel
1606  * will be received by that channel, instead of being emitted on the
1607  * physical link, and packets received on the physical link will be ignored.
1608  * Other than that, all standard GXIO operations work as you might expect.
1609  * Note that loopback operation requires that the link be brought up using
1610  * one or more of the GXIO_MPIPE_LINK_SPEED_xxx link state bits.
1611  *
1612  * Those familiar with previous versions of the MDE on TILEPro hardware
1613  * will notice significant similarities between the NetIO link management
1614  * model and the mPIPE link management model. However, the NetIO model
1615  * was developed in stages, and some of its features -- for instance,
1616  * the default setting of certain flags -- were shaped by the need to be
1617  * compatible with previous versions of NetIO. Since the features provided
1618  * by the mPIPE hardware and the mPIPE GXIO library are significantly
1619  * different than those provided by NetIO, in some cases, we have made
1620  * different choices in the mPIPE link management API. Thus, please read
1621  * this documentation carefully before assuming that mPIPE link management
1622  * operations are exactly equivalent to their NetIO counterparts.
1623  */
1624 
1625 /* An object used to manage mPIPE link state and resources. */
1626 typedef struct {
1627  /* The overall mPIPE context. */
1629 
1630  /* The channel number used by this link. */
1632 
1633  /* The MAC index used by this link. */
1636 
1637 /* Retrieve one of this system's legal link names, and its MAC address.
1638  *
1639  * @param index Link name index. If a system supports N legal link names,
1640  * then indices between 0 and N - 1, inclusive, each correspond to one of
1641  * those names. Thus, to retrieve all of a system's legal link names,
1642  * call this function in a loop, starting with an index of zero, and
1643  * incrementing it once per iteration until -1 is returned.
1644  * @param link_name Pointer to the buffer which will receive the retrieved
1645  * link name. The buffer should contain space for at least
1646  * ::GXIO_MPIPE_LINK_NAME_LEN bytes; the returned name, including the
1647  * terminating null byte, will be no longer than that.
1648  * @param link_name Pointer to the buffer which will receive the retrieved
1649  * MAC address. The buffer should contain space for at least 6 bytes.
1650  * @return Zero if a link name was successfully retrieved; -1 if one was
1651  * not.
1652  */
1653 extern int gxio_mpipe_link_enumerate_mac(int index, char *link_name,
1654  uint8_t *mac_addr);
1655 
1656 /* Open an mPIPE link.
1657  *
1658  * A link must be opened before it may be used to send or receive packets,
1659  * and before its state may be examined or changed. Depending up on the
1660  * link's intended use, one or more link permissions may be requested via
1661  * the flags parameter; see @ref gxio_mpipe_link_perm. In addition, flags
1662  * may request that the link's state be modified at open time. See @ref
1663  * gxio_mpipe_link_states and @ref gxio_mpipe_link_open_flags for more detail.
1664  *
1665  * @param link A link state object, which will be initialized if this
1666  * function completes successfully.
1667  * @param context An initialized mPIPE context.
1668  * @param link_name Name of the link.
1669  * @param flags Zero or more @ref gxio_mpipe_link_open_flags, ORed together.
1670  * @return 0 if the link was successfully opened, or a negative error code.
1671  *
1672  */
1674  gxio_mpipe_context_t *context,
1675  const char *link_name, unsigned int flags);
1676 
1677 /* Close an mPIPE link.
1678  *
1679  * Closing a link makes it available for use by other processes. Once
1680  * a link has been closed, packets may no longer be sent on or received
1681  * from the link, and its state may not be examined or changed.
1682  *
1683  * @param link A link state object, which will no longer be initialized
1684  * if this function completes successfully.
1685  * @return 0 if the link was successfully closed, or a negative error code.
1686  *
1687  */
1689 
1690 /* Return a link's channel number.
1691  *
1692  * @param link A properly initialized link state object.
1693  * @return The channel number for the link.
1694  */
1695 static inline int gxio_mpipe_link_channel(gxio_mpipe_link_t *link)
1696 {
1697  return link->channel;
1698 }
1699 
1701 // Timestamp //
1703 
1704 /* Get the timestamp of mPIPE when this routine is called.
1705  *
1706  * @param context An initialized mPIPE context.
1707  * @param ts A timespec structure to store the current clock.
1708  * @return If the call was successful, zero; otherwise, a negative error
1709  * code.
1710  */
1712  struct timespec *ts);
1713 
1714 /* Set the timestamp of mPIPE.
1715  *
1716  * @param context An initialized mPIPE context.
1717  * @param ts A timespec structure to store the requested clock.
1718  * @return If the call was successful, zero; otherwise, a negative error
1719  * code.
1720  */
1722  const struct timespec *ts);
1723 
1724 /* Adjust the timestamp of mPIPE.
1725  *
1726  * @param context An initialized mPIPE context.
1727  * @param delta A signed time offset to adjust, in nanoseconds.
1728  * The absolute value of this parameter must be less than or
1729  * equal to 1000000000.
1730  * @return If the call was successful, zero; otherwise, a negative error
1731  * code.
1732  */
1734  int64_t delta);
1735 
1736 #endif /* !_GXIO_MPIPE_H_ */