Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
myri10ge.c
Go to the documentation of this file.
1 /*************************************************************************
2  * myri10ge.c: Myricom Myri-10G Ethernet driver.
3  *
4  * Copyright (C) 2005 - 2011 Myricom, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  * notice, this list of conditions and the following disclaimer in the
14  * documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of Myricom, Inc. nor the names of its contributors
16  * may be used to endorse or promote products derived from this software
17  * without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  *
31  *
32  * If the eeprom on your board is not recent enough, you will need to get a
33  * newer firmware image at:
34  * http://www.myri.com/scs/download-Myri10GE.html
35  *
36  * Contact Information:
38  * Myricom, Inc., 325N Santa Anita Avenue, Arcadia, CA 91006
39  *************************************************************************/
40 
41 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
42 
43 #include <linux/tcp.h>
44 #include <linux/netdevice.h>
45 #include <linux/skbuff.h>
46 #include <linux/string.h>
47 #include <linux/module.h>
48 #include <linux/pci.h>
49 #include <linux/dma-mapping.h>
50 #include <linux/etherdevice.h>
51 #include <linux/if_ether.h>
52 #include <linux/if_vlan.h>
53 #include <linux/inet_lro.h>
54 #include <linux/dca.h>
55 #include <linux/ip.h>
56 #include <linux/inet.h>
57 #include <linux/in.h>
58 #include <linux/ethtool.h>
59 #include <linux/firmware.h>
60 #include <linux/delay.h>
61 #include <linux/timer.h>
62 #include <linux/vmalloc.h>
63 #include <linux/crc32.h>
64 #include <linux/moduleparam.h>
65 #include <linux/io.h>
66 #include <linux/log2.h>
67 #include <linux/slab.h>
68 #include <linux/prefetch.h>
69 #include <net/checksum.h>
70 #include <net/ip.h>
71 #include <net/tcp.h>
72 #include <asm/byteorder.h>
73 #include <asm/io.h>
74 #include <asm/processor.h>
75 #ifdef CONFIG_MTRR
76 #include <asm/mtrr.h>
77 #endif
78 
79 #include "myri10ge_mcp.h"
81 
82 #define MYRI10GE_VERSION_STR "1.5.3-1.534"
83 
84 MODULE_DESCRIPTION("Myricom 10G driver (10GbE)");
85 MODULE_AUTHOR("Maintainer: [email protected]");
87 MODULE_LICENSE("Dual BSD/GPL");
88 
89 #define MYRI10GE_MAX_ETHER_MTU 9014
90 
91 #define MYRI10GE_ETH_STOPPED 0
92 #define MYRI10GE_ETH_STOPPING 1
93 #define MYRI10GE_ETH_STARTING 2
94 #define MYRI10GE_ETH_RUNNING 3
95 #define MYRI10GE_ETH_OPEN_FAILED 4
96 
97 #define MYRI10GE_EEPROM_STRINGS_SIZE 256
98 #define MYRI10GE_MAX_SEND_DESC_TSO ((65536 / 2048) * 2)
99 #define MYRI10GE_MAX_LRO_DESCRIPTORS 8
100 #define MYRI10GE_LRO_MAX_PKTS 64
101 
102 #define MYRI10GE_NO_CONFIRM_DATA htonl(0xffffffff)
103 #define MYRI10GE_NO_RESPONSE_RESULT 0xffffffff
104 
105 #define MYRI10GE_ALLOC_ORDER 0
106 #define MYRI10GE_ALLOC_SIZE ((1 << MYRI10GE_ALLOC_ORDER) * PAGE_SIZE)
107 #define MYRI10GE_MAX_FRAGS_PER_FRAME (MYRI10GE_MAX_ETHER_MTU/MYRI10GE_ALLOC_SIZE + 1)
108 
109 #define MYRI10GE_MAX_SLICES 32
110 
112  struct page *page;
116 };
117 
119  struct sk_buff *skb;
120  int last;
123 };
124 
125 struct myri10ge_cmd {
129 };
130 
132  struct mcp_kreq_ether_recv __iomem *lanai; /* lanai ptr for recv ring */
133  struct mcp_kreq_ether_recv *shadow; /* host shadow of recv ring */
135  struct page *page;
138  int cnt;
139  int fill_cnt;
141  int mask; /* number of rx slots -1 */
143 };
144 
146  struct mcp_kreq_ether_send __iomem *lanai; /* lanai ptr for sendq */
147  __be32 __iomem *send_go; /* "go" doorbell ptr */
148  __be32 __iomem *send_stop; /* "stop" doorbell ptr */
149  struct mcp_kreq_ether_send *req_list; /* host shadow of sendq */
150  char *req_bytes;
152  int mask; /* number of transmit slots -1 */
153  int req ____cacheline_aligned; /* transmit slots submitted */
154  int pkt_start; /* packets started */
157  int done ____cacheline_aligned; /* transmit slots completed */
158  int pkt_done; /* packets completed */
161 };
162 
164  struct mcp_slot *entry;
166  int cnt;
167  int idx;
170 };
171 
173  unsigned long rx_packets;
174  unsigned long tx_packets;
175  unsigned long rx_bytes;
176  unsigned long tx_bytes;
177  unsigned long rx_dropped;
178  unsigned long tx_dropped;
179 };
180 
182  struct myri10ge_tx_buf tx; /* transmit ring */
186  struct net_device *dev;
196  int stuck;
197 #ifdef CONFIG_MYRI10GE_DCA
198  int cached_dca_tag;
199  int cpu;
200  __be32 __iomem *dca_tag;
201 #endif
202  char irq_desc[32];
203 };
204 
207  int tx_boundary; /* boundary transmits cannot cross */
209  int running; /* running? */
213  struct net_device *dev;
216  unsigned long board_span;
217  unsigned long iomem_base;
222  struct pci_dev *pdev;
225  struct msix_entry *msix_vectors;
226 #ifdef CONFIG_MYRI10GE_DCA
227  int dca_enabled;
228  int relaxed_order;
229 #endif
231  unsigned int rdma_tags_available;
234  int mtrr;
236  int down_cnt;
242  int pause;
244  char *fw_name;
247  char fw_version[128];
252  u8 mac_addr[6]; /* eeprom mac address */
253  unsigned long serial_number;
263  unsigned int board_number;
264  int rebooted;
265 };
266 
267 static char *myri10ge_fw_unaligned = "myri10ge_ethp_z8e.dat";
268 static char *myri10ge_fw_aligned = "myri10ge_eth_z8e.dat";
269 static char *myri10ge_fw_rss_unaligned = "myri10ge_rss_ethp_z8e.dat";
270 static char *myri10ge_fw_rss_aligned = "myri10ge_rss_eth_z8e.dat";
271 MODULE_FIRMWARE("myri10ge_ethp_z8e.dat");
272 MODULE_FIRMWARE("myri10ge_eth_z8e.dat");
273 MODULE_FIRMWARE("myri10ge_rss_ethp_z8e.dat");
274 MODULE_FIRMWARE("myri10ge_rss_eth_z8e.dat");
275 
276 /* Careful: must be accessed under kparam_block_sysfs_write */
277 static char *myri10ge_fw_name = NULL;
278 module_param(myri10ge_fw_name, charp, S_IRUGO | S_IWUSR);
279 MODULE_PARM_DESC(myri10ge_fw_name, "Firmware image name");
280 
281 #define MYRI10GE_MAX_BOARDS 8
282 static char *myri10ge_fw_names[MYRI10GE_MAX_BOARDS] =
283  {[0 ... (MYRI10GE_MAX_BOARDS - 1)] = NULL };
284 module_param_array_named(myri10ge_fw_names, myri10ge_fw_names, charp, NULL,
285  0444);
286 MODULE_PARM_DESC(myri10ge_fw_name, "Firmware image names per board");
287 
288 static int myri10ge_ecrc_enable = 1;
289 module_param(myri10ge_ecrc_enable, int, S_IRUGO);
290 MODULE_PARM_DESC(myri10ge_ecrc_enable, "Enable Extended CRC on PCI-E");
291 
292 static int myri10ge_small_bytes = -1; /* -1 == auto */
293 module_param(myri10ge_small_bytes, int, S_IRUGO | S_IWUSR);
294 MODULE_PARM_DESC(myri10ge_small_bytes, "Threshold of small packets");
295 
296 static int myri10ge_msi = 1; /* enable msi by default */
297 module_param(myri10ge_msi, int, S_IRUGO | S_IWUSR);
298 MODULE_PARM_DESC(myri10ge_msi, "Enable Message Signalled Interrupts");
299 
300 static int myri10ge_intr_coal_delay = 75;
301 module_param(myri10ge_intr_coal_delay, int, S_IRUGO);
302 MODULE_PARM_DESC(myri10ge_intr_coal_delay, "Interrupt coalescing delay");
303 
304 static int myri10ge_flow_control = 1;
305 module_param(myri10ge_flow_control, int, S_IRUGO);
306 MODULE_PARM_DESC(myri10ge_flow_control, "Pause parameter");
307 
308 static int myri10ge_deassert_wait = 1;
309 module_param(myri10ge_deassert_wait, int, S_IRUGO | S_IWUSR);
310 MODULE_PARM_DESC(myri10ge_deassert_wait,
311  "Wait when deasserting legacy interrupts");
312 
313 static int myri10ge_force_firmware = 0;
314 module_param(myri10ge_force_firmware, int, S_IRUGO);
315 MODULE_PARM_DESC(myri10ge_force_firmware,
316  "Force firmware to assume aligned completions");
317 
318 static int myri10ge_initial_mtu = MYRI10GE_MAX_ETHER_MTU - ETH_HLEN;
319 module_param(myri10ge_initial_mtu, int, S_IRUGO);
320 MODULE_PARM_DESC(myri10ge_initial_mtu, "Initial MTU");
321 
322 static int myri10ge_napi_weight = 64;
323 module_param(myri10ge_napi_weight, int, S_IRUGO);
324 MODULE_PARM_DESC(myri10ge_napi_weight, "Set NAPI weight");
325 
326 static int myri10ge_watchdog_timeout = 1;
327 module_param(myri10ge_watchdog_timeout, int, S_IRUGO);
328 MODULE_PARM_DESC(myri10ge_watchdog_timeout, "Set watchdog timeout");
329 
330 static int myri10ge_max_irq_loops = 1048576;
331 module_param(myri10ge_max_irq_loops, int, S_IRUGO);
332 MODULE_PARM_DESC(myri10ge_max_irq_loops,
333  "Set stuck legacy IRQ detection threshold");
334 
335 #define MYRI10GE_MSG_DEFAULT NETIF_MSG_LINK
336 
337 static int myri10ge_debug = -1; /* defaults above */
338 module_param(myri10ge_debug, int, 0);
339 MODULE_PARM_DESC(myri10ge_debug, "Debug level (0=none,...,16=all)");
340 
341 static int myri10ge_lro_max_pkts = MYRI10GE_LRO_MAX_PKTS;
342 module_param(myri10ge_lro_max_pkts, int, S_IRUGO);
343 MODULE_PARM_DESC(myri10ge_lro_max_pkts,
344  "Number of LRO packets to be aggregated");
345 
346 static int myri10ge_fill_thresh = 256;
347 module_param(myri10ge_fill_thresh, int, S_IRUGO | S_IWUSR);
348 MODULE_PARM_DESC(myri10ge_fill_thresh, "Number of empty rx slots allowed");
349 
350 static int myri10ge_reset_recover = 1;
351 
352 static int myri10ge_max_slices = 1;
353 module_param(myri10ge_max_slices, int, S_IRUGO);
354 MODULE_PARM_DESC(myri10ge_max_slices, "Max tx/rx queues");
355 
356 static int myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
357 module_param(myri10ge_rss_hash, int, S_IRUGO);
358 MODULE_PARM_DESC(myri10ge_rss_hash, "Type of RSS hashing to do");
359 
360 static int myri10ge_dca = 1;
361 module_param(myri10ge_dca, int, S_IRUGO);
362 MODULE_PARM_DESC(myri10ge_dca, "Enable DCA if possible");
363 
364 #define MYRI10GE_FW_OFFSET 1024*1024
365 #define MYRI10GE_HIGHPART_TO_U32(X) \
366 (sizeof (X) == 8) ? ((u32)((u64)(X) >> 32)) : (0)
367 #define MYRI10GE_LOWPART_TO_U32(X) ((u32)(X))
368 
369 #define myri10ge_pio_copy(to,from,size) __iowrite64_copy(to,from,size/8)
370 
371 static void myri10ge_set_multicast_list(struct net_device *dev);
372 static netdev_tx_t myri10ge_sw_tso(struct sk_buff *skb,
373  struct net_device *dev);
374 
375 static inline void put_be32(__be32 val, __be32 __iomem * p)
376 {
377  __raw_writel((__force __u32) val, (__force void __iomem *)p);
378 }
379 
380 static struct rtnl_link_stats64 *myri10ge_get_stats(struct net_device *dev,
381  struct rtnl_link_stats64 *stats);
382 
383 static void set_fw_name(struct myri10ge_priv *mgp, char *name, bool allocated)
384 {
385  if (mgp->fw_name_allocated)
386  kfree(mgp->fw_name);
387  mgp->fw_name = name;
388  mgp->fw_name_allocated = allocated;
389 }
390 
391 static int
392 myri10ge_send_cmd(struct myri10ge_priv *mgp, u32 cmd,
393  struct myri10ge_cmd *data, int atomic)
394 {
395  struct mcp_cmd *buf;
396  char buf_bytes[sizeof(*buf) + 8];
397  struct mcp_cmd_response *response = mgp->cmd;
398  char __iomem *cmd_addr = mgp->sram + MXGEFW_ETH_CMD;
399  u32 dma_low, dma_high, result, value;
400  int sleep_total = 0;
401 
402  /* ensure buf is aligned to 8 bytes */
403  buf = (struct mcp_cmd *)ALIGN((unsigned long)buf_bytes, 8);
404 
405  buf->data0 = htonl(data->data0);
406  buf->data1 = htonl(data->data1);
407  buf->data2 = htonl(data->data2);
408  buf->cmd = htonl(cmd);
409  dma_low = MYRI10GE_LOWPART_TO_U32(mgp->cmd_bus);
410  dma_high = MYRI10GE_HIGHPART_TO_U32(mgp->cmd_bus);
411 
412  buf->response_addr.low = htonl(dma_low);
413  buf->response_addr.high = htonl(dma_high);
415  mb();
416  myri10ge_pio_copy(cmd_addr, buf, sizeof(*buf));
417 
418  /* wait up to 15ms. Longest command is the DMA benchmark,
419  * which is capped at 5ms, but runs from a timeout handler
420  * that runs every 7.8ms. So a 15ms timeout leaves us with
421  * a 2.2ms margin
422  */
423  if (atomic) {
424  /* if atomic is set, do not sleep,
425  * and try to get the completion quickly
426  * (1ms will be enough for those commands) */
427  for (sleep_total = 0;
428  sleep_total < 1000 &&
430  sleep_total += 10) {
431  udelay(10);
432  mb();
433  }
434  } else {
435  /* use msleep for most command */
436  for (sleep_total = 0;
437  sleep_total < 15 &&
439  sleep_total++)
440  msleep(1);
441  }
442 
443  result = ntohl(response->result);
444  value = ntohl(response->data);
445  if (result != MYRI10GE_NO_RESPONSE_RESULT) {
446  if (result == 0) {
447  data->data0 = value;
448  return 0;
449  } else if (result == MXGEFW_CMD_UNKNOWN) {
450  return -ENOSYS;
451  } else if (result == MXGEFW_CMD_ERROR_UNALIGNED) {
452  return -E2BIG;
453  } else if (result == MXGEFW_CMD_ERROR_RANGE &&
455  (data->
457  0) {
458  return -ERANGE;
459  } else {
460  dev_err(&mgp->pdev->dev,
461  "command %d failed, result = %d\n",
462  cmd, result);
463  return -ENXIO;
464  }
465  }
466 
467  dev_err(&mgp->pdev->dev, "command %d timed out, result = %d\n",
468  cmd, result);
469  return -EAGAIN;
470 }
471 
472 /*
473  * The eeprom strings on the lanaiX have the format
474  * SN=x\0
475  * MAC=x:x:x:x:x:x\0
476  * PT:ddd mmm xx xx:xx:xx xx\0
477  * PV:ddd mmm xx xx:xx:xx xx\0
478  */
479 static int myri10ge_read_mac_addr(struct myri10ge_priv *mgp)
480 {
481  char *ptr, *limit;
482  int i;
483 
484  ptr = mgp->eeprom_strings;
486 
487  while (*ptr != '\0' && ptr < limit) {
488  if (memcmp(ptr, "MAC=", 4) == 0) {
489  ptr += 4;
490  mgp->mac_addr_string = ptr;
491  for (i = 0; i < 6; i++) {
492  if ((ptr + 2) > limit)
493  goto abort;
494  mgp->mac_addr[i] =
495  simple_strtoul(ptr, &ptr, 16);
496  ptr += 1;
497  }
498  }
499  if (memcmp(ptr, "PC=", 3) == 0) {
500  ptr += 3;
501  mgp->product_code_string = ptr;
502  }
503  if (memcmp((const void *)ptr, "SN=", 3) == 0) {
504  ptr += 3;
505  mgp->serial_number = simple_strtoul(ptr, &ptr, 10);
506  }
507  while (ptr < limit && *ptr++) ;
508  }
509 
510  return 0;
511 
512 abort:
513  dev_err(&mgp->pdev->dev, "failed to parse eeprom_strings\n");
514  return -ENXIO;
515 }
516 
517 /*
518  * Enable or disable periodic RDMAs from the host to make certain
519  * chipsets resend dropped PCIe messages
520  */
521 
522 static void myri10ge_dummy_rdma(struct myri10ge_priv *mgp, int enable)
523 {
524  char __iomem *submit;
525  __be32 buf[16] __attribute__ ((__aligned__(8)));
526  u32 dma_low, dma_high;
527  int i;
528 
529  /* clear confirmation addr */
530  mgp->cmd->data = 0;
531  mb();
532 
533  /* send a rdma command to the PCIe engine, and wait for the
534  * response in the confirmation address. The firmware should
535  * write a -1 there to indicate it is alive and well
536  */
537  dma_low = MYRI10GE_LOWPART_TO_U32(mgp->cmd_bus);
538  dma_high = MYRI10GE_HIGHPART_TO_U32(mgp->cmd_bus);
539 
540  buf[0] = htonl(dma_high); /* confirm addr MSW */
541  buf[1] = htonl(dma_low); /* confirm addr LSW */
542  buf[2] = MYRI10GE_NO_CONFIRM_DATA; /* confirm data */
543  buf[3] = htonl(dma_high); /* dummy addr MSW */
544  buf[4] = htonl(dma_low); /* dummy addr LSW */
545  buf[5] = htonl(enable); /* enable? */
546 
547  submit = mgp->sram + MXGEFW_BOOT_DUMMY_RDMA;
548 
549  myri10ge_pio_copy(submit, &buf, sizeof(buf));
550  for (i = 0; mgp->cmd->data != MYRI10GE_NO_CONFIRM_DATA && i < 20; i++)
551  msleep(1);
552  if (mgp->cmd->data != MYRI10GE_NO_CONFIRM_DATA)
553  dev_err(&mgp->pdev->dev, "dummy rdma %s failed\n",
554  (enable ? "enable" : "disable"));
555 }
556 
557 static int
558 myri10ge_validate_firmware(struct myri10ge_priv *mgp,
559  struct mcp_gen_header *hdr)
560 {
561  struct device *dev = &mgp->pdev->dev;
562 
563  /* check firmware type */
564  if (ntohl(hdr->mcp_type) != MCP_TYPE_ETH) {
565  dev_err(dev, "Bad firmware type: 0x%x\n", ntohl(hdr->mcp_type));
566  return -EINVAL;
567  }
568 
569  /* save firmware version for ethtool */
570  strncpy(mgp->fw_version, hdr->version, sizeof(mgp->fw_version));
571 
572  sscanf(mgp->fw_version, "%d.%d.%d", &mgp->fw_ver_major,
573  &mgp->fw_ver_minor, &mgp->fw_ver_tiny);
574 
575  if (!(mgp->fw_ver_major == MXGEFW_VERSION_MAJOR &&
577  dev_err(dev, "Found firmware version %s\n", mgp->fw_version);
578  dev_err(dev, "Driver needs %d.%d\n", MXGEFW_VERSION_MAJOR,
580  return -EINVAL;
581  }
582  return 0;
583 }
584 
585 static int myri10ge_load_hotplug_firmware(struct myri10ge_priv *mgp, u32 * size)
586 {
587  unsigned crc, reread_crc;
588  const struct firmware *fw;
589  struct device *dev = &mgp->pdev->dev;
590  unsigned char *fw_readback;
591  struct mcp_gen_header *hdr;
592  size_t hdr_offset;
593  int status;
594  unsigned i;
595 
596  if ((status = request_firmware(&fw, mgp->fw_name, dev)) < 0) {
597  dev_err(dev, "Unable to load %s firmware image via hotplug\n",
598  mgp->fw_name);
599  status = -EINVAL;
600  goto abort_with_nothing;
601  }
602 
603  /* check size */
604 
605  if (fw->size >= mgp->sram_size - MYRI10GE_FW_OFFSET ||
606  fw->size < MCP_HEADER_PTR_OFFSET + 4) {
607  dev_err(dev, "Firmware size invalid:%d\n", (int)fw->size);
608  status = -EINVAL;
609  goto abort_with_fw;
610  }
611 
612  /* check id */
613  hdr_offset = ntohl(*(__be32 *) (fw->data + MCP_HEADER_PTR_OFFSET));
614  if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw->size) {
615  dev_err(dev, "Bad firmware file\n");
616  status = -EINVAL;
617  goto abort_with_fw;
618  }
619  hdr = (void *)(fw->data + hdr_offset);
620 
621  status = myri10ge_validate_firmware(mgp, hdr);
622  if (status != 0)
623  goto abort_with_fw;
624 
625  crc = crc32(~0, fw->data, fw->size);
626  for (i = 0; i < fw->size; i += 256) {
628  fw->data + i,
629  min(256U, (unsigned)(fw->size - i)));
630  mb();
631  readb(mgp->sram);
632  }
633  fw_readback = vmalloc(fw->size);
634  if (!fw_readback) {
635  status = -ENOMEM;
636  goto abort_with_fw;
637  }
638  /* corruption checking is good for parity recovery and buggy chipset */
639  memcpy_fromio(fw_readback, mgp->sram + MYRI10GE_FW_OFFSET, fw->size);
640  reread_crc = crc32(~0, fw_readback, fw->size);
641  vfree(fw_readback);
642  if (crc != reread_crc) {
643  dev_err(dev, "CRC failed(fw-len=%u), got 0x%x (expect 0x%x)\n",
644  (unsigned)fw->size, reread_crc, crc);
645  status = -EIO;
646  goto abort_with_fw;
647  }
648  *size = (u32) fw->size;
649 
650 abort_with_fw:
651  release_firmware(fw);
652 
653 abort_with_nothing:
654  return status;
655 }
656 
657 static int myri10ge_adopt_running_firmware(struct myri10ge_priv *mgp)
658 {
659  struct mcp_gen_header *hdr;
660  struct device *dev = &mgp->pdev->dev;
661  const size_t bytes = sizeof(struct mcp_gen_header);
662  size_t hdr_offset;
663  int status;
664 
665  /* find running firmware header */
666  hdr_offset = swab32(readl(mgp->sram + MCP_HEADER_PTR_OFFSET));
667 
668  if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > mgp->sram_size) {
669  dev_err(dev, "Running firmware has bad header offset (%d)\n",
670  (int)hdr_offset);
671  return -EIO;
672  }
673 
674  /* copy header of running firmware from SRAM to host memory to
675  * validate firmware */
676  hdr = kmalloc(bytes, GFP_KERNEL);
677  if (hdr == NULL) {
678  dev_err(dev, "could not malloc firmware hdr\n");
679  return -ENOMEM;
680  }
681  memcpy_fromio(hdr, mgp->sram + hdr_offset, bytes);
682  status = myri10ge_validate_firmware(mgp, hdr);
683  kfree(hdr);
684 
685  /* check to see if adopted firmware has bug where adopting
686  * it will cause broadcasts to be filtered unless the NIC
687  * is kept in ALLMULTI mode */
688  if (mgp->fw_ver_major == 1 && mgp->fw_ver_minor == 4 &&
689  mgp->fw_ver_tiny >= 4 && mgp->fw_ver_tiny <= 11) {
690  mgp->adopted_rx_filter_bug = 1;
691  dev_warn(dev, "Adopting fw %d.%d.%d: "
692  "working around rx filter bug\n",
693  mgp->fw_ver_major, mgp->fw_ver_minor,
694  mgp->fw_ver_tiny);
695  }
696  return status;
697 }
698 
699 static int myri10ge_get_firmware_capabilities(struct myri10ge_priv *mgp)
700 {
701  struct myri10ge_cmd cmd;
702  int status;
703 
704  /* probe for IPv6 TSO support */
706  status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE,
707  &cmd, 0);
708  if (status == 0) {
709  mgp->max_tso6 = cmd.data0;
710  mgp->features |= NETIF_F_TSO6;
711  }
712 
713  status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd, 0);
714  if (status != 0) {
715  dev_err(&mgp->pdev->dev,
716  "failed MXGEFW_CMD_GET_RX_RING_SIZE\n");
717  return -ENXIO;
718  }
719 
720  mgp->max_intr_slots = 2 * (cmd.data0 / sizeof(struct mcp_dma_addr));
721 
722  return 0;
723 }
724 
725 static int myri10ge_load_firmware(struct myri10ge_priv *mgp, int adopt)
726 {
727  char __iomem *submit;
728  __be32 buf[16] __attribute__ ((__aligned__(8)));
729  u32 dma_low, dma_high, size;
730  int status, i;
731 
732  size = 0;
733  status = myri10ge_load_hotplug_firmware(mgp, &size);
734  if (status) {
735  if (!adopt)
736  return status;
737  dev_warn(&mgp->pdev->dev, "hotplug firmware loading failed\n");
738 
739  /* Do not attempt to adopt firmware if there
740  * was a bad crc */
741  if (status == -EIO)
742  return status;
743 
744  status = myri10ge_adopt_running_firmware(mgp);
745  if (status != 0) {
746  dev_err(&mgp->pdev->dev,
747  "failed to adopt running firmware\n");
748  return status;
749  }
750  dev_info(&mgp->pdev->dev,
751  "Successfully adopted running firmware\n");
752  if (mgp->tx_boundary == 4096) {
753  dev_warn(&mgp->pdev->dev,
754  "Using firmware currently running on NIC"
755  ". For optimal\n");
756  dev_warn(&mgp->pdev->dev,
757  "performance consider loading optimized "
758  "firmware\n");
759  dev_warn(&mgp->pdev->dev, "via hotplug\n");
760  }
761 
762  set_fw_name(mgp, "adopted", false);
763  mgp->tx_boundary = 2048;
764  myri10ge_dummy_rdma(mgp, 1);
765  status = myri10ge_get_firmware_capabilities(mgp);
766  return status;
767  }
768 
769  /* clear confirmation addr */
770  mgp->cmd->data = 0;
771  mb();
772 
773  /* send a reload command to the bootstrap MCP, and wait for the
774  * response in the confirmation address. The firmware should
775  * write a -1 there to indicate it is alive and well
776  */
777  dma_low = MYRI10GE_LOWPART_TO_U32(mgp->cmd_bus);
778  dma_high = MYRI10GE_HIGHPART_TO_U32(mgp->cmd_bus);
779 
780  buf[0] = htonl(dma_high); /* confirm addr MSW */
781  buf[1] = htonl(dma_low); /* confirm addr LSW */
782  buf[2] = MYRI10GE_NO_CONFIRM_DATA; /* confirm data */
783 
784  /* FIX: All newest firmware should un-protect the bottom of
785  * the sram before handoff. However, the very first interfaces
786  * do not. Therefore the handoff copy must skip the first 8 bytes
787  */
788  buf[3] = htonl(MYRI10GE_FW_OFFSET + 8); /* where the code starts */
789  buf[4] = htonl(size - 8); /* length of code */
790  buf[5] = htonl(8); /* where to copy to */
791  buf[6] = htonl(0); /* where to jump to */
792 
793  submit = mgp->sram + MXGEFW_BOOT_HANDOFF;
794 
795  myri10ge_pio_copy(submit, &buf, sizeof(buf));
796  mb();
797  msleep(1);
798  mb();
799  i = 0;
800  while (mgp->cmd->data != MYRI10GE_NO_CONFIRM_DATA && i < 9) {
801  msleep(1 << i);
802  i++;
803  }
804  if (mgp->cmd->data != MYRI10GE_NO_CONFIRM_DATA) {
805  dev_err(&mgp->pdev->dev, "handoff failed\n");
806  return -ENXIO;
807  }
808  myri10ge_dummy_rdma(mgp, 1);
809  status = myri10ge_get_firmware_capabilities(mgp);
810 
811  return status;
812 }
813 
814 static int myri10ge_update_mac_address(struct myri10ge_priv *mgp, u8 * addr)
815 {
816  struct myri10ge_cmd cmd;
817  int status;
818 
819  cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
820  | (addr[2] << 8) | addr[3]);
821 
822  cmd.data1 = ((addr[4] << 8) | (addr[5]));
823 
824  status = myri10ge_send_cmd(mgp, MXGEFW_SET_MAC_ADDRESS, &cmd, 0);
825  return status;
826 }
827 
828 static int myri10ge_change_pause(struct myri10ge_priv *mgp, int pause)
829 {
830  struct myri10ge_cmd cmd;
831  int status, ctl;
832 
834  status = myri10ge_send_cmd(mgp, ctl, &cmd, 0);
835 
836  if (status) {
837  netdev_err(mgp->dev, "Failed to set flow control mode\n");
838  return status;
839  }
840  mgp->pause = pause;
841  return 0;
842 }
843 
844 static void
845 myri10ge_change_promisc(struct myri10ge_priv *mgp, int promisc, int atomic)
846 {
847  struct myri10ge_cmd cmd;
848  int status, ctl;
849 
851  status = myri10ge_send_cmd(mgp, ctl, &cmd, atomic);
852  if (status)
853  netdev_err(mgp->dev, "Failed to set promisc mode\n");
854 }
855 
856 static int myri10ge_dma_test(struct myri10ge_priv *mgp, int test_type)
857 {
858  struct myri10ge_cmd cmd;
859  int status;
860  u32 len;
861  struct page *dmatest_page;
862  dma_addr_t dmatest_bus;
863  char *test = " ";
864 
865  dmatest_page = alloc_page(GFP_KERNEL);
866  if (!dmatest_page)
867  return -ENOMEM;
868  dmatest_bus = pci_map_page(mgp->pdev, dmatest_page, 0, PAGE_SIZE,
870 
871  /* Run a small DMA test.
872  * The magic multipliers to the length tell the firmware
873  * to do DMA read, write, or read+write tests. The
874  * results are returned in cmd.data0. The upper 16
875  * bits or the return is the number of transfers completed.
876  * The lower 16 bits is the time in 0.5us ticks that the
877  * transfers took to complete.
878  */
879 
880  len = mgp->tx_boundary;
881 
882  cmd.data0 = MYRI10GE_LOWPART_TO_U32(dmatest_bus);
883  cmd.data1 = MYRI10GE_HIGHPART_TO_U32(dmatest_bus);
884  cmd.data2 = len * 0x10000;
885  status = myri10ge_send_cmd(mgp, test_type, &cmd, 0);
886  if (status != 0) {
887  test = "read";
888  goto abort;
889  }
890  mgp->read_dma = ((cmd.data0 >> 16) * len * 2) / (cmd.data0 & 0xffff);
891  cmd.data0 = MYRI10GE_LOWPART_TO_U32(dmatest_bus);
892  cmd.data1 = MYRI10GE_HIGHPART_TO_U32(dmatest_bus);
893  cmd.data2 = len * 0x1;
894  status = myri10ge_send_cmd(mgp, test_type, &cmd, 0);
895  if (status != 0) {
896  test = "write";
897  goto abort;
898  }
899  mgp->write_dma = ((cmd.data0 >> 16) * len * 2) / (cmd.data0 & 0xffff);
900 
901  cmd.data0 = MYRI10GE_LOWPART_TO_U32(dmatest_bus);
902  cmd.data1 = MYRI10GE_HIGHPART_TO_U32(dmatest_bus);
903  cmd.data2 = len * 0x10001;
904  status = myri10ge_send_cmd(mgp, test_type, &cmd, 0);
905  if (status != 0) {
906  test = "read/write";
907  goto abort;
908  }
909  mgp->read_write_dma = ((cmd.data0 >> 16) * len * 2 * 2) /
910  (cmd.data0 & 0xffff);
911 
912 abort:
913  pci_unmap_page(mgp->pdev, dmatest_bus, PAGE_SIZE, DMA_BIDIRECTIONAL);
914  put_page(dmatest_page);
915 
916  if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
917  dev_warn(&mgp->pdev->dev, "DMA %s benchmark failed: %d\n",
918  test, status);
919 
920  return status;
921 }
922 
923 static int myri10ge_reset(struct myri10ge_priv *mgp)
924 {
925  struct myri10ge_cmd cmd;
926  struct myri10ge_slice_state *ss;
927  int i, status;
928  size_t bytes;
929 #ifdef CONFIG_MYRI10GE_DCA
930  unsigned long dca_tag_off;
931 #endif
932 
933  /* try to send a reset command to the card to see if it
934  * is alive */
935  memset(&cmd, 0, sizeof(cmd));
936  status = myri10ge_send_cmd(mgp, MXGEFW_CMD_RESET, &cmd, 0);
937  if (status != 0) {
938  dev_err(&mgp->pdev->dev, "failed reset\n");
939  return -ENXIO;
940  }
941 
942  (void)myri10ge_dma_test(mgp, MXGEFW_DMA_TEST);
943  /*
944  * Use non-ndis mcp_slot (eg, 4 bytes total,
945  * no toeplitz hash value returned. Older firmware will
946  * not understand this command, but will use the correct
947  * sized mcp_slot, so we ignore error returns
948  */
949  cmd.data0 = MXGEFW_RSS_MCP_SLOT_TYPE_MIN;
950  (void)myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_MCP_SLOT_TYPE, &cmd, 0);
951 
952  /* Now exchange information about interrupts */
953 
954  bytes = mgp->max_intr_slots * sizeof(*mgp->ss[0].rx_done.entry);
955  cmd.data0 = (u32) bytes;
956  status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd, 0);
957 
958  /*
959  * Even though we already know how many slices are supported
960  * via myri10ge_probe_slices() MXGEFW_CMD_GET_MAX_RSS_QUEUES
961  * has magic side effects, and must be called after a reset.
962  * It must be called prior to calling any RSS related cmds,
963  * including assigning an interrupt queue for anything but
964  * slice 0. It must also be called *after*
965  * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
966  * the firmware to compute offsets.
967  */
968 
969  if (mgp->num_slices > 1) {
970 
971  /* ask the maximum number of slices it supports */
972  status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
973  &cmd, 0);
974  if (status != 0) {
975  dev_err(&mgp->pdev->dev,
976  "failed to get number of slices\n");
977  }
978 
979  /*
980  * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
981  * to setting up the interrupt queue DMA
982  */
983 
984  cmd.data0 = mgp->num_slices;
986  if (mgp->dev->real_num_tx_queues > 1)
988  status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES,
989  &cmd, 0);
990 
991  /* Firmware older than 1.4.32 only supports multiple
992  * RX queues, so if we get an error, first retry using a
993  * single TX queue before giving up */
994  if (status != 0 && mgp->dev->real_num_tx_queues > 1) {
996  cmd.data0 = mgp->num_slices;
998  status = myri10ge_send_cmd(mgp,
1000  &cmd, 0);
1001  }
1002 
1003  if (status != 0) {
1004  dev_err(&mgp->pdev->dev,
1005  "failed to set number of slices\n");
1006 
1007  return status;
1008  }
1009  }
1010  for (i = 0; i < mgp->num_slices; i++) {
1011  ss = &mgp->ss[i];
1012  cmd.data0 = MYRI10GE_LOWPART_TO_U32(ss->rx_done.bus);
1013  cmd.data1 = MYRI10GE_HIGHPART_TO_U32(ss->rx_done.bus);
1014  cmd.data2 = i;
1015  status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_DMA,
1016  &cmd, 0);
1017  }
1018 
1019  status |=
1020  myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd, 0);
1021  for (i = 0; i < mgp->num_slices; i++) {
1022  ss = &mgp->ss[i];
1023  ss->irq_claim =
1024  (__iomem __be32 *) (mgp->sram + cmd.data0 + 8 * i);
1025  }
1026  status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET,
1027  &cmd, 0);
1028  mgp->irq_deassert = (__iomem __be32 *) (mgp->sram + cmd.data0);
1029 
1030  status |= myri10ge_send_cmd
1031  (mgp, MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd, 0);
1032  mgp->intr_coal_delay_ptr = (__iomem __be32 *) (mgp->sram + cmd.data0);
1033  if (status != 0) {
1034  dev_err(&mgp->pdev->dev, "failed set interrupt parameters\n");
1035  return status;
1036  }
1037  put_be32(htonl(mgp->intr_coal_delay), mgp->intr_coal_delay_ptr);
1038 
1039 #ifdef CONFIG_MYRI10GE_DCA
1040  status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_DCA_OFFSET, &cmd, 0);
1041  dca_tag_off = cmd.data0;
1042  for (i = 0; i < mgp->num_slices; i++) {
1043  ss = &mgp->ss[i];
1044  if (status == 0) {
1045  ss->dca_tag = (__iomem __be32 *)
1046  (mgp->sram + dca_tag_off + 4 * i);
1047  } else {
1048  ss->dca_tag = NULL;
1049  }
1050  }
1051 #endif /* CONFIG_MYRI10GE_DCA */
1052 
1053  /* reset mcp/driver shared state back to 0 */
1054 
1055  mgp->link_changes = 0;
1056  for (i = 0; i < mgp->num_slices; i++) {
1057  ss = &mgp->ss[i];
1058 
1059  memset(ss->rx_done.entry, 0, bytes);
1060  ss->tx.req = 0;
1061  ss->tx.done = 0;
1062  ss->tx.pkt_start = 0;
1063  ss->tx.pkt_done = 0;
1064  ss->rx_big.cnt = 0;
1065  ss->rx_small.cnt = 0;
1066  ss->rx_done.idx = 0;
1067  ss->rx_done.cnt = 0;
1068  ss->tx.wake_queue = 0;
1069  ss->tx.stop_queue = 0;
1070  }
1071 
1072  status = myri10ge_update_mac_address(mgp, mgp->dev->dev_addr);
1073  myri10ge_change_pause(mgp, mgp->pause);
1074  myri10ge_set_multicast_list(mgp->dev);
1075  return status;
1076 }
1077 
1078 #ifdef CONFIG_MYRI10GE_DCA
1079 static int myri10ge_toggle_relaxed(struct pci_dev *pdev, int on)
1080 {
1081  int ret;
1082  u16 ctl;
1083 
1085 
1086  ret = (ctl & PCI_EXP_DEVCTL_RELAX_EN) >> 4;
1087  if (ret != on) {
1088  ctl &= ~PCI_EXP_DEVCTL_RELAX_EN;
1089  ctl |= (on << 4);
1091  }
1092  return ret;
1093 }
1094 
1095 static void
1096 myri10ge_write_dca(struct myri10ge_slice_state *ss, int cpu, int tag)
1097 {
1098  ss->cached_dca_tag = tag;
1099  put_be32(htonl(tag), ss->dca_tag);
1100 }
1101 
1102 static inline void myri10ge_update_dca(struct myri10ge_slice_state *ss)
1103 {
1104  int cpu = get_cpu();
1105  int tag;
1106 
1107  if (cpu != ss->cpu) {
1108  tag = dca3_get_tag(&ss->mgp->pdev->dev, cpu);
1109  if (ss->cached_dca_tag != tag)
1110  myri10ge_write_dca(ss, cpu, tag);
1111  ss->cpu = cpu;
1112  }
1113  put_cpu();
1114 }
1115 
1116 static void myri10ge_setup_dca(struct myri10ge_priv *mgp)
1117 {
1118  int err, i;
1119  struct pci_dev *pdev = mgp->pdev;
1120 
1121  if (mgp->ss[0].dca_tag == NULL || mgp->dca_enabled)
1122  return;
1123  if (!myri10ge_dca) {
1124  dev_err(&pdev->dev, "dca disabled by administrator\n");
1125  return;
1126  }
1127  err = dca_add_requester(&pdev->dev);
1128  if (err) {
1129  if (err != -ENODEV)
1130  dev_err(&pdev->dev,
1131  "dca_add_requester() failed, err=%d\n", err);
1132  return;
1133  }
1134  mgp->relaxed_order = myri10ge_toggle_relaxed(pdev, 0);
1135  mgp->dca_enabled = 1;
1136  for (i = 0; i < mgp->num_slices; i++) {
1137  mgp->ss[i].cpu = -1;
1138  mgp->ss[i].cached_dca_tag = -1;
1139  myri10ge_update_dca(&mgp->ss[i]);
1140  }
1141 }
1142 
1143 static void myri10ge_teardown_dca(struct myri10ge_priv *mgp)
1144 {
1145  struct pci_dev *pdev = mgp->pdev;
1146 
1147  if (!mgp->dca_enabled)
1148  return;
1149  mgp->dca_enabled = 0;
1150  if (mgp->relaxed_order)
1151  myri10ge_toggle_relaxed(pdev, 1);
1152  dca_remove_requester(&pdev->dev);
1153 }
1154 
1155 static int myri10ge_notify_dca_device(struct device *dev, void *data)
1156 {
1157  struct myri10ge_priv *mgp;
1158  unsigned long event;
1159 
1160  mgp = dev_get_drvdata(dev);
1161  event = *(unsigned long *)data;
1162 
1163  if (event == DCA_PROVIDER_ADD)
1164  myri10ge_setup_dca(mgp);
1165  else if (event == DCA_PROVIDER_REMOVE)
1166  myri10ge_teardown_dca(mgp);
1167  return 0;
1168 }
1169 #endif /* CONFIG_MYRI10GE_DCA */
1170 
1171 static inline void
1172 myri10ge_submit_8rx(struct mcp_kreq_ether_recv __iomem * dst,
1173  struct mcp_kreq_ether_recv *src)
1174 {
1175  __be32 low;
1176 
1177  low = src->addr_low;
1178  src->addr_low = htonl(DMA_BIT_MASK(32));
1179  myri10ge_pio_copy(dst, src, 4 * sizeof(*src));
1180  mb();
1181  myri10ge_pio_copy(dst + 4, src + 4, 4 * sizeof(*src));
1182  mb();
1183  src->addr_low = low;
1184  put_be32(low, &dst->addr_low);
1185  mb();
1186 }
1187 
1188 static inline void myri10ge_vlan_ip_csum(struct sk_buff *skb, __wsum hw_csum)
1189 {
1190  struct vlan_hdr *vh = (struct vlan_hdr *)(skb->data);
1191 
1192  if ((skb->protocol == htons(ETH_P_8021Q)) &&
1195  skb->csum = hw_csum;
1197  }
1198 }
1199 
1200 static inline void
1201 myri10ge_rx_skb_build(struct sk_buff *skb, u8 * va,
1202  struct skb_frag_struct *rx_frags, int len, int hlen)
1203 {
1204  struct skb_frag_struct *skb_frags;
1205 
1206  skb->len = skb->data_len = len;
1207  /* attach the page(s) */
1208 
1209  skb_frags = skb_shinfo(skb)->frags;
1210  while (len > 0) {
1211  memcpy(skb_frags, rx_frags, sizeof(*skb_frags));
1212  len -= skb_frag_size(rx_frags);
1213  skb_frags++;
1214  rx_frags++;
1215  skb_shinfo(skb)->nr_frags++;
1216  }
1217 
1218  /* pskb_may_pull is not available in irq context, but
1219  * skb_pull() (for ether_pad and eth_type_trans()) requires
1220  * the beginning of the packet in skb_headlen(), move it
1221  * manually */
1222  skb_copy_to_linear_data(skb, va, hlen);
1223  skb_shinfo(skb)->frags[0].page_offset += hlen;
1224  skb_frag_size_sub(&skb_shinfo(skb)->frags[0], hlen);
1225  skb->data_len -= hlen;
1226  skb->tail += hlen;
1227  skb_pull(skb, MXGEFW_PAD);
1228 }
1229 
1230 static void
1231 myri10ge_alloc_rx_pages(struct myri10ge_priv *mgp, struct myri10ge_rx_buf *rx,
1232  int bytes, int watchdog)
1233 {
1234  struct page *page;
1235  int idx;
1236 #if MYRI10GE_ALLOC_SIZE > 4096
1237  int end_offset;
1238 #endif
1239 
1240  if (unlikely(rx->watchdog_needed && !watchdog))
1241  return;
1242 
1243  /* try to refill entire ring */
1244  while (rx->fill_cnt != (rx->cnt + rx->mask + 1)) {
1245  idx = rx->fill_cnt & rx->mask;
1246  if (rx->page_offset + bytes <= MYRI10GE_ALLOC_SIZE) {
1247  /* we can use part of previous page */
1248  get_page(rx->page);
1249  } else {
1250  /* we need a new page */
1251  page =
1254  if (unlikely(page == NULL)) {
1255  if (rx->fill_cnt - rx->cnt < 16)
1256  rx->watchdog_needed = 1;
1257  return;
1258  }
1259  rx->page = page;
1260  rx->page_offset = 0;
1261  rx->bus = pci_map_page(mgp->pdev, page, 0,
1264  }
1265  rx->info[idx].page = rx->page;
1266  rx->info[idx].page_offset = rx->page_offset;
1267  /* note that this is the address of the start of the
1268  * page */
1269  dma_unmap_addr_set(&rx->info[idx], bus, rx->bus);
1270  rx->shadow[idx].addr_low =
1272  rx->shadow[idx].addr_high =
1274 
1275  /* start next packet on a cacheline boundary */
1276  rx->page_offset += SKB_DATA_ALIGN(bytes);
1277 
1278 #if MYRI10GE_ALLOC_SIZE > 4096
1279  /* don't cross a 4KB boundary */
1280  end_offset = rx->page_offset + bytes - 1;
1281  if ((unsigned)(rx->page_offset ^ end_offset) > 4095)
1282  rx->page_offset = end_offset & ~4095;
1283 #endif
1284  rx->fill_cnt++;
1285 
1286  /* copy 8 descriptors to the firmware at a time */
1287  if ((idx & 7) == 7) {
1288  myri10ge_submit_8rx(&rx->lanai[idx - 7],
1289  &rx->shadow[idx - 7]);
1290  }
1291  }
1292 }
1293 
1294 static inline void
1295 myri10ge_unmap_rx_page(struct pci_dev *pdev,
1296  struct myri10ge_rx_buffer_state *info, int bytes)
1297 {
1298  /* unmap the recvd page if we're the only or last user of it */
1299  if (bytes >= MYRI10GE_ALLOC_SIZE / 2 ||
1300  (info->page_offset + 2 * bytes) > MYRI10GE_ALLOC_SIZE) {
1301  pci_unmap_page(pdev, (dma_unmap_addr(info, bus)
1302  & ~(MYRI10GE_ALLOC_SIZE - 1)),
1304  }
1305 }
1306 
1307 #define MYRI10GE_HLEN 64 /* The number of bytes to copy from a
1308  * page into an skb */
1309 
1310 static inline int
1311 myri10ge_rx_done(struct myri10ge_slice_state *ss, int len, __wsum csum,
1312  bool lro_enabled)
1313 {
1314  struct myri10ge_priv *mgp = ss->mgp;
1315  struct sk_buff *skb;
1316  struct skb_frag_struct rx_frags[MYRI10GE_MAX_FRAGS_PER_FRAME];
1317  struct myri10ge_rx_buf *rx;
1318  int i, idx, hlen, remainder, bytes;
1319  struct pci_dev *pdev = mgp->pdev;
1320  struct net_device *dev = mgp->dev;
1321  u8 *va;
1322 
1323  if (len <= mgp->small_bytes) {
1324  rx = &ss->rx_small;
1325  bytes = mgp->small_bytes;
1326  } else {
1327  rx = &ss->rx_big;
1328  bytes = mgp->big_bytes;
1329  }
1330 
1331  len += MXGEFW_PAD;
1332  idx = rx->cnt & rx->mask;
1333  va = page_address(rx->info[idx].page) + rx->info[idx].page_offset;
1334  prefetch(va);
1335  /* Fill skb_frag_struct(s) with data from our receive */
1336  for (i = 0, remainder = len; remainder > 0; i++) {
1337  myri10ge_unmap_rx_page(pdev, &rx->info[idx], bytes);
1338  __skb_frag_set_page(&rx_frags[i], rx->info[idx].page);
1339  rx_frags[i].page_offset = rx->info[idx].page_offset;
1340  if (remainder < MYRI10GE_ALLOC_SIZE)
1341  skb_frag_size_set(&rx_frags[i], remainder);
1342  else
1343  skb_frag_size_set(&rx_frags[i], MYRI10GE_ALLOC_SIZE);
1344  rx->cnt++;
1345  idx = rx->cnt & rx->mask;
1346  remainder -= MYRI10GE_ALLOC_SIZE;
1347  }
1348 
1349  if (lro_enabled) {
1350  rx_frags[0].page_offset += MXGEFW_PAD;
1351  skb_frag_size_sub(&rx_frags[0], MXGEFW_PAD);
1352  len -= MXGEFW_PAD;
1353  lro_receive_frags(&ss->rx_done.lro_mgr, rx_frags,
1354  /* opaque, will come back in get_frag_header */
1355  len, len,
1356  (void *)(__force unsigned long)csum, csum);
1357 
1358  return 1;
1359  }
1360 
1361  hlen = MYRI10GE_HLEN > len ? len : MYRI10GE_HLEN;
1362 
1363  /* allocate an skb to attach the page(s) to. This is done
1364  * after trying LRO, so as to avoid skb allocation overheads */
1365 
1366  skb = netdev_alloc_skb(dev, MYRI10GE_HLEN + 16);
1367  if (unlikely(skb == NULL)) {
1368  ss->stats.rx_dropped++;
1369  do {
1370  i--;
1371  __skb_frag_unref(&rx_frags[i]);
1372  } while (i != 0);
1373  return 0;
1374  }
1375 
1376  /* Attach the pages to the skb, and trim off any padding */
1377  myri10ge_rx_skb_build(skb, va, rx_frags, len, hlen);
1378  if (skb_frag_size(&skb_shinfo(skb)->frags[0]) <= 0) {
1379  skb_frag_unref(skb, 0);
1380  skb_shinfo(skb)->nr_frags = 0;
1381  } else {
1382  skb->truesize += bytes * skb_shinfo(skb)->nr_frags;
1383  }
1384  skb->protocol = eth_type_trans(skb, dev);
1385  skb_record_rx_queue(skb, ss - &mgp->ss[0]);
1386 
1387  if (dev->features & NETIF_F_RXCSUM) {
1388  if ((skb->protocol == htons(ETH_P_IP)) ||
1389  (skb->protocol == htons(ETH_P_IPV6))) {
1390  skb->csum = csum;
1392  } else
1393  myri10ge_vlan_ip_csum(skb, csum);
1394  }
1395  netif_receive_skb(skb);
1396  return 1;
1397 }
1398 
1399 static inline void
1400 myri10ge_tx_done(struct myri10ge_slice_state *ss, int mcp_index)
1401 {
1402  struct pci_dev *pdev = ss->mgp->pdev;
1403  struct myri10ge_tx_buf *tx = &ss->tx;
1404  struct netdev_queue *dev_queue;
1405  struct sk_buff *skb;
1406  int idx, len;
1407 
1408  while (tx->pkt_done != mcp_index) {
1409  idx = tx->done & tx->mask;
1410  skb = tx->info[idx].skb;
1411 
1412  /* Mark as free */
1413  tx->info[idx].skb = NULL;
1414  if (tx->info[idx].last) {
1415  tx->pkt_done++;
1416  tx->info[idx].last = 0;
1417  }
1418  tx->done++;
1419  len = dma_unmap_len(&tx->info[idx], len);
1420  dma_unmap_len_set(&tx->info[idx], len, 0);
1421  if (skb) {
1422  ss->stats.tx_bytes += skb->len;
1423  ss->stats.tx_packets++;
1424  dev_kfree_skb_irq(skb);
1425  if (len)
1426  pci_unmap_single(pdev,
1427  dma_unmap_addr(&tx->info[idx],
1428  bus), len,
1430  } else {
1431  if (len)
1432  pci_unmap_page(pdev,
1433  dma_unmap_addr(&tx->info[idx],
1434  bus), len,
1436  }
1437  }
1438 
1439  dev_queue = netdev_get_tx_queue(ss->dev, ss - ss->mgp->ss);
1440  /*
1441  * Make a minimal effort to prevent the NIC from polling an
1442  * idle tx queue. If we can't get the lock we leave the queue
1443  * active. In this case, either a thread was about to start
1444  * using the queue anyway, or we lost a race and the NIC will
1445  * waste some of its resources polling an inactive queue for a
1446  * while.
1447  */
1448 
1449  if ((ss->mgp->dev->real_num_tx_queues > 1) &&
1450  __netif_tx_trylock(dev_queue)) {
1451  if (tx->req == tx->done) {
1452  tx->queue_active = 0;
1453  put_be32(htonl(1), tx->send_stop);
1454  mb();
1455  mmiowb();
1456  }
1457  __netif_tx_unlock(dev_queue);
1458  }
1459 
1460  /* start the queue if we've stopped it */
1461  if (netif_tx_queue_stopped(dev_queue) &&
1462  tx->req - tx->done < (tx->mask >> 1) &&
1463  ss->mgp->running == MYRI10GE_ETH_RUNNING) {
1464  tx->wake_queue++;
1465  netif_tx_wake_queue(dev_queue);
1466  }
1467 }
1468 
1469 static inline int
1470 myri10ge_clean_rx_done(struct myri10ge_slice_state *ss, int budget)
1471 {
1472  struct myri10ge_rx_done *rx_done = &ss->rx_done;
1473  struct myri10ge_priv *mgp = ss->mgp;
1474  unsigned long rx_bytes = 0;
1475  unsigned long rx_packets = 0;
1476  unsigned long rx_ok;
1477  int idx = rx_done->idx;
1478  int cnt = rx_done->cnt;
1479  int work_done = 0;
1480  u16 length;
1481  __wsum checksum;
1482 
1483  /*
1484  * Prevent compiler from generating more than one ->features memory
1485  * access to avoid theoretical race condition with functions that
1486  * change NETIF_F_LRO flag at runtime.
1487  */
1488  bool lro_enabled = !!(ACCESS_ONCE(mgp->dev->features) & NETIF_F_LRO);
1489 
1490  while (rx_done->entry[idx].length != 0 && work_done < budget) {
1491  length = ntohs(rx_done->entry[idx].length);
1492  rx_done->entry[idx].length = 0;
1493  checksum = csum_unfold(rx_done->entry[idx].checksum);
1494  rx_ok = myri10ge_rx_done(ss, length, checksum, lro_enabled);
1495  rx_packets += rx_ok;
1496  rx_bytes += rx_ok * (unsigned long)length;
1497  cnt++;
1498  idx = cnt & (mgp->max_intr_slots - 1);
1499  work_done++;
1500  }
1501  rx_done->idx = idx;
1502  rx_done->cnt = cnt;
1503  ss->stats.rx_packets += rx_packets;
1504  ss->stats.rx_bytes += rx_bytes;
1505 
1506  if (lro_enabled)
1507  lro_flush_all(&rx_done->lro_mgr);
1508 
1509  /* restock receive rings if needed */
1510  if (ss->rx_small.fill_cnt - ss->rx_small.cnt < myri10ge_fill_thresh)
1511  myri10ge_alloc_rx_pages(mgp, &ss->rx_small,
1512  mgp->small_bytes + MXGEFW_PAD, 0);
1513  if (ss->rx_big.fill_cnt - ss->rx_big.cnt < myri10ge_fill_thresh)
1514  myri10ge_alloc_rx_pages(mgp, &ss->rx_big, mgp->big_bytes, 0);
1515 
1516  return work_done;
1517 }
1518 
1519 static inline void myri10ge_check_statblock(struct myri10ge_priv *mgp)
1520 {
1521  struct mcp_irq_data *stats = mgp->ss[0].fw_stats;
1522 
1523  if (unlikely(stats->stats_updated)) {
1524  unsigned link_up = ntohl(stats->link_up);
1525  if (mgp->link_state != link_up) {
1526  mgp->link_state = link_up;
1527 
1528  if (mgp->link_state == MXGEFW_LINK_UP) {
1529  netif_info(mgp, link, mgp->dev, "link up\n");
1530  netif_carrier_on(mgp->dev);
1531  mgp->link_changes++;
1532  } else {
1533  netif_info(mgp, link, mgp->dev, "link %s\n",
1534  (link_up == MXGEFW_LINK_MYRINET ?
1535  "mismatch (Myrinet detected)" :
1536  "down"));
1537  netif_carrier_off(mgp->dev);
1538  mgp->link_changes++;
1539  }
1540  }
1541  if (mgp->rdma_tags_available !=
1542  ntohl(stats->rdma_tags_available)) {
1543  mgp->rdma_tags_available =
1544  ntohl(stats->rdma_tags_available);
1545  netdev_warn(mgp->dev, "RDMA timed out! %d tags left\n",
1546  mgp->rdma_tags_available);
1547  }
1548  mgp->down_cnt += stats->link_down;
1549  if (stats->link_down)
1550  wake_up(&mgp->down_wq);
1551  }
1552 }
1553 
1554 static int myri10ge_poll(struct napi_struct *napi, int budget)
1555 {
1556  struct myri10ge_slice_state *ss =
1557  container_of(napi, struct myri10ge_slice_state, napi);
1558  int work_done;
1559 
1560 #ifdef CONFIG_MYRI10GE_DCA
1561  if (ss->mgp->dca_enabled)
1562  myri10ge_update_dca(ss);
1563 #endif
1564 
1565  /* process as many rx events as NAPI will allow */
1566  work_done = myri10ge_clean_rx_done(ss, budget);
1567 
1568  if (work_done < budget) {
1569  napi_complete(napi);
1570  put_be32(htonl(3), ss->irq_claim);
1571  }
1572  return work_done;
1573 }
1574 
1575 static irqreturn_t myri10ge_intr(int irq, void *arg)
1576 {
1577  struct myri10ge_slice_state *ss = arg;
1578  struct myri10ge_priv *mgp = ss->mgp;
1579  struct mcp_irq_data *stats = ss->fw_stats;
1580  struct myri10ge_tx_buf *tx = &ss->tx;
1581  u32 send_done_count;
1582  int i;
1583 
1584  /* an interrupt on a non-zero receive-only slice is implicitly
1585  * valid since MSI-X irqs are not shared */
1586  if ((mgp->dev->real_num_tx_queues == 1) && (ss != mgp->ss)) {
1587  napi_schedule(&ss->napi);
1588  return IRQ_HANDLED;
1589  }
1590 
1591  /* make sure it is our IRQ, and that the DMA has finished */
1592  if (unlikely(!stats->valid))
1593  return IRQ_NONE;
1594 
1595  /* low bit indicates receives are present, so schedule
1596  * napi poll handler */
1597  if (stats->valid & 1)
1598  napi_schedule(&ss->napi);
1599 
1600  if (!mgp->msi_enabled && !mgp->msix_enabled) {
1601  put_be32(0, mgp->irq_deassert);
1602  if (!myri10ge_deassert_wait)
1603  stats->valid = 0;
1604  mb();
1605  } else
1606  stats->valid = 0;
1607 
1608  /* Wait for IRQ line to go low, if using INTx */
1609  i = 0;
1610  while (1) {
1611  i++;
1612  /* check for transmit completes and receives */
1613  send_done_count = ntohl(stats->send_done_count);
1614  if (send_done_count != tx->pkt_done)
1615  myri10ge_tx_done(ss, (int)send_done_count);
1616  if (unlikely(i > myri10ge_max_irq_loops)) {
1617  netdev_warn(mgp->dev, "irq stuck?\n");
1618  stats->valid = 0;
1620  }
1621  if (likely(stats->valid == 0))
1622  break;
1623  cpu_relax();
1624  barrier();
1625  }
1626 
1627  /* Only slice 0 updates stats */
1628  if (ss == mgp->ss)
1629  myri10ge_check_statblock(mgp);
1630 
1631  put_be32(htonl(3), ss->irq_claim + 1);
1632  return IRQ_HANDLED;
1633 }
1634 
1635 static int
1636 myri10ge_get_settings(struct net_device *netdev, struct ethtool_cmd *cmd)
1637 {
1638  struct myri10ge_priv *mgp = netdev_priv(netdev);
1639  char *ptr;
1640  int i;
1641 
1642  cmd->autoneg = AUTONEG_DISABLE;
1643  ethtool_cmd_speed_set(cmd, SPEED_10000);
1644  cmd->duplex = DUPLEX_FULL;
1645 
1646  /*
1647  * parse the product code to deterimine the interface type
1648  * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
1649  * after the 3rd dash in the driver's cached copy of the
1650  * EEPROM's product code string.
1651  */
1652  ptr = mgp->product_code_string;
1653  if (ptr == NULL) {
1654  netdev_err(netdev, "Missing product code\n");
1655  return 0;
1656  }
1657  for (i = 0; i < 3; i++, ptr++) {
1658  ptr = strchr(ptr, '-');
1659  if (ptr == NULL) {
1660  netdev_err(netdev, "Invalid product code %s\n",
1661  mgp->product_code_string);
1662  return 0;
1663  }
1664  }
1665  if (*ptr == '2')
1666  ptr++;
1667  if (*ptr == 'R' || *ptr == 'Q' || *ptr == 'S') {
1668  /* We've found either an XFP, quad ribbon fiber, or SFP+ */
1669  cmd->port = PORT_FIBRE;
1670  cmd->supported |= SUPPORTED_FIBRE;
1671  cmd->advertising |= ADVERTISED_FIBRE;
1672  } else {
1673  cmd->port = PORT_OTHER;
1674  }
1675  if (*ptr == 'R' || *ptr == 'S')
1676  cmd->transceiver = XCVR_EXTERNAL;
1677  else
1678  cmd->transceiver = XCVR_INTERNAL;
1679 
1680  return 0;
1681 }
1682 
1683 static void
1684 myri10ge_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *info)
1685 {
1686  struct myri10ge_priv *mgp = netdev_priv(netdev);
1687 
1688  strlcpy(info->driver, "myri10ge", sizeof(info->driver));
1689  strlcpy(info->version, MYRI10GE_VERSION_STR, sizeof(info->version));
1690  strlcpy(info->fw_version, mgp->fw_version, sizeof(info->fw_version));
1691  strlcpy(info->bus_info, pci_name(mgp->pdev), sizeof(info->bus_info));
1692 }
1693 
1694 static int
1695 myri10ge_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *coal)
1696 {
1697  struct myri10ge_priv *mgp = netdev_priv(netdev);
1698 
1699  coal->rx_coalesce_usecs = mgp->intr_coal_delay;
1700  return 0;
1701 }
1702 
1703 static int
1704 myri10ge_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *coal)
1705 {
1706  struct myri10ge_priv *mgp = netdev_priv(netdev);
1707 
1708  mgp->intr_coal_delay = coal->rx_coalesce_usecs;
1709  put_be32(htonl(mgp->intr_coal_delay), mgp->intr_coal_delay_ptr);
1710  return 0;
1711 }
1712 
1713 static void
1714 myri10ge_get_pauseparam(struct net_device *netdev,
1715  struct ethtool_pauseparam *pause)
1716 {
1717  struct myri10ge_priv *mgp = netdev_priv(netdev);
1718 
1719  pause->autoneg = 0;
1720  pause->rx_pause = mgp->pause;
1721  pause->tx_pause = mgp->pause;
1722 }
1723 
1724 static int
1725 myri10ge_set_pauseparam(struct net_device *netdev,
1726  struct ethtool_pauseparam *pause)
1727 {
1728  struct myri10ge_priv *mgp = netdev_priv(netdev);
1729 
1730  if (pause->tx_pause != mgp->pause)
1731  return myri10ge_change_pause(mgp, pause->tx_pause);
1732  if (pause->rx_pause != mgp->pause)
1733  return myri10ge_change_pause(mgp, pause->rx_pause);
1734  if (pause->autoneg != 0)
1735  return -EINVAL;
1736  return 0;
1737 }
1738 
1739 static void
1740 myri10ge_get_ringparam(struct net_device *netdev,
1741  struct ethtool_ringparam *ring)
1742 {
1743  struct myri10ge_priv *mgp = netdev_priv(netdev);
1744 
1745  ring->rx_mini_max_pending = mgp->ss[0].rx_small.mask + 1;
1746  ring->rx_max_pending = mgp->ss[0].rx_big.mask + 1;
1747  ring->rx_jumbo_max_pending = 0;
1748  ring->tx_max_pending = mgp->ss[0].tx.mask + 1;
1749  ring->rx_mini_pending = ring->rx_mini_max_pending;
1750  ring->rx_pending = ring->rx_max_pending;
1751  ring->rx_jumbo_pending = ring->rx_jumbo_max_pending;
1752  ring->tx_pending = ring->tx_max_pending;
1753 }
1754 
1755 static const char myri10ge_gstrings_main_stats[][ETH_GSTRING_LEN] = {
1756  "rx_packets", "tx_packets", "rx_bytes", "tx_bytes", "rx_errors",
1757  "tx_errors", "rx_dropped", "tx_dropped", "multicast", "collisions",
1758  "rx_length_errors", "rx_over_errors", "rx_crc_errors",
1759  "rx_frame_errors", "rx_fifo_errors", "rx_missed_errors",
1760  "tx_aborted_errors", "tx_carrier_errors", "tx_fifo_errors",
1761  "tx_heartbeat_errors", "tx_window_errors",
1762  /* device-specific stats */
1763  "tx_boundary", "WC", "irq", "MSI", "MSIX",
1764  "read_dma_bw_MBs", "write_dma_bw_MBs", "read_write_dma_bw_MBs",
1765  "serial_number", "watchdog_resets",
1766 #ifdef CONFIG_MYRI10GE_DCA
1767  "dca_capable_firmware", "dca_device_present",
1768 #endif
1769  "link_changes", "link_up", "dropped_link_overflow",
1770  "dropped_link_error_or_filtered",
1771  "dropped_pause", "dropped_bad_phy", "dropped_bad_crc32",
1772  "dropped_unicast_filtered", "dropped_multicast_filtered",
1773  "dropped_runt", "dropped_overrun", "dropped_no_small_buffer",
1774  "dropped_no_big_buffer"
1775 };
1776 
1777 static const char myri10ge_gstrings_slice_stats[][ETH_GSTRING_LEN] = {
1778  "----------- slice ---------",
1779  "tx_pkt_start", "tx_pkt_done", "tx_req", "tx_done",
1780  "rx_small_cnt", "rx_big_cnt",
1781  "wake_queue", "stop_queue", "tx_linearized",
1782  "LRO aggregated", "LRO flushed", "LRO avg aggr", "LRO no_desc",
1783 };
1785 #define MYRI10GE_NET_STATS_LEN 21
1786 #define MYRI10GE_MAIN_STATS_LEN ARRAY_SIZE(myri10ge_gstrings_main_stats)
1787 #define MYRI10GE_SLICE_STATS_LEN ARRAY_SIZE(myri10ge_gstrings_slice_stats)
1788 
1789 static void
1790 myri10ge_get_strings(struct net_device *netdev, u32 stringset, u8 * data)
1791 {
1792  struct myri10ge_priv *mgp = netdev_priv(netdev);
1793  int i;
1794 
1795  switch (stringset) {
1796  case ETH_SS_STATS:
1797  memcpy(data, *myri10ge_gstrings_main_stats,
1798  sizeof(myri10ge_gstrings_main_stats));
1799  data += sizeof(myri10ge_gstrings_main_stats);
1800  for (i = 0; i < mgp->num_slices; i++) {
1801  memcpy(data, *myri10ge_gstrings_slice_stats,
1802  sizeof(myri10ge_gstrings_slice_stats));
1803  data += sizeof(myri10ge_gstrings_slice_stats);
1804  }
1805  break;
1806  }
1807 }
1808 
1809 static int myri10ge_get_sset_count(struct net_device *netdev, int sset)
1810 {
1811  struct myri10ge_priv *mgp = netdev_priv(netdev);
1812 
1813  switch (sset) {
1814  case ETH_SS_STATS:
1815  return MYRI10GE_MAIN_STATS_LEN +
1817  default:
1818  return -EOPNOTSUPP;
1819  }
1820 }
1821 
1822 static void
1823 myri10ge_get_ethtool_stats(struct net_device *netdev,
1824  struct ethtool_stats *stats, u64 * data)
1825 {
1826  struct myri10ge_priv *mgp = netdev_priv(netdev);
1827  struct myri10ge_slice_state *ss;
1828  struct rtnl_link_stats64 link_stats;
1829  int slice;
1830  int i;
1831 
1832  /* force stats update */
1833  memset(&link_stats, 0, sizeof(link_stats));
1834  (void)myri10ge_get_stats(netdev, &link_stats);
1835  for (i = 0; i < MYRI10GE_NET_STATS_LEN; i++)
1836  data[i] = ((u64 *)&link_stats)[i];
1837 
1838  data[i++] = (unsigned int)mgp->tx_boundary;
1839  data[i++] = (unsigned int)mgp->wc_enabled;
1840  data[i++] = (unsigned int)mgp->pdev->irq;
1841  data[i++] = (unsigned int)mgp->msi_enabled;
1842  data[i++] = (unsigned int)mgp->msix_enabled;
1843  data[i++] = (unsigned int)mgp->read_dma;
1844  data[i++] = (unsigned int)mgp->write_dma;
1845  data[i++] = (unsigned int)mgp->read_write_dma;
1846  data[i++] = (unsigned int)mgp->serial_number;
1847  data[i++] = (unsigned int)mgp->watchdog_resets;
1848 #ifdef CONFIG_MYRI10GE_DCA
1849  data[i++] = (unsigned int)(mgp->ss[0].dca_tag != NULL);
1850  data[i++] = (unsigned int)(mgp->dca_enabled);
1851 #endif
1852  data[i++] = (unsigned int)mgp->link_changes;
1853 
1854  /* firmware stats are useful only in the first slice */
1855  ss = &mgp->ss[0];
1856  data[i++] = (unsigned int)ntohl(ss->fw_stats->link_up);
1857  data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_link_overflow);
1858  data[i++] =
1859  (unsigned int)ntohl(ss->fw_stats->dropped_link_error_or_filtered);
1860  data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_pause);
1861  data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_bad_phy);
1862  data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_bad_crc32);
1863  data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_unicast_filtered);
1864  data[i++] =
1865  (unsigned int)ntohl(ss->fw_stats->dropped_multicast_filtered);
1866  data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_runt);
1867  data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_overrun);
1868  data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_no_small_buffer);
1869  data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_no_big_buffer);
1870 
1871  for (slice = 0; slice < mgp->num_slices; slice++) {
1872  ss = &mgp->ss[slice];
1873  data[i++] = slice;
1874  data[i++] = (unsigned int)ss->tx.pkt_start;
1875  data[i++] = (unsigned int)ss->tx.pkt_done;
1876  data[i++] = (unsigned int)ss->tx.req;
1877  data[i++] = (unsigned int)ss->tx.done;
1878  data[i++] = (unsigned int)ss->rx_small.cnt;
1879  data[i++] = (unsigned int)ss->rx_big.cnt;
1880  data[i++] = (unsigned int)ss->tx.wake_queue;
1881  data[i++] = (unsigned int)ss->tx.stop_queue;
1882  data[i++] = (unsigned int)ss->tx.linearized;
1883  data[i++] = ss->rx_done.lro_mgr.stats.aggregated;
1884  data[i++] = ss->rx_done.lro_mgr.stats.flushed;
1885  if (ss->rx_done.lro_mgr.stats.flushed)
1886  data[i++] = ss->rx_done.lro_mgr.stats.aggregated /
1887  ss->rx_done.lro_mgr.stats.flushed;
1888  else
1889  data[i++] = 0;
1890  data[i++] = ss->rx_done.lro_mgr.stats.no_desc;
1891  }
1892 }
1893 
1894 static void myri10ge_set_msglevel(struct net_device *netdev, u32 value)
1895 {
1896  struct myri10ge_priv *mgp = netdev_priv(netdev);
1897  mgp->msg_enable = value;
1898 }
1899 
1900 static u32 myri10ge_get_msglevel(struct net_device *netdev)
1901 {
1902  struct myri10ge_priv *mgp = netdev_priv(netdev);
1903  return mgp->msg_enable;
1904 }
1905 
1906 /*
1907  * Use a low-level command to change the LED behavior. Rather than
1908  * blinking (which is the normal case), when identify is used, the
1909  * yellow LED turns solid.
1910  */
1911 static int myri10ge_led(struct myri10ge_priv *mgp, int on)
1912 {
1913  struct mcp_gen_header *hdr;
1914  struct device *dev = &mgp->pdev->dev;
1915  size_t hdr_off, pattern_off, hdr_len;
1916  u32 pattern = 0xfffffffe;
1917 
1918  /* find running firmware header */
1919  hdr_off = swab32(readl(mgp->sram + MCP_HEADER_PTR_OFFSET));
1920  if ((hdr_off & 3) || hdr_off + sizeof(*hdr) > mgp->sram_size) {
1921  dev_err(dev, "Running firmware has bad header offset (%d)\n",
1922  (int)hdr_off);
1923  return -EIO;
1924  }
1925  hdr_len = swab32(readl(mgp->sram + hdr_off +
1927  pattern_off = hdr_off + offsetof(struct mcp_gen_header, led_pattern);
1928  if (pattern_off >= (hdr_len + hdr_off)) {
1929  dev_info(dev, "Firmware does not support LED identification\n");
1930  return -EINVAL;
1931  }
1932  if (!on)
1933  pattern = swab32(readl(mgp->sram + pattern_off + 4));
1934  writel(htonl(pattern), mgp->sram + pattern_off);
1935  return 0;
1936 }
1937 
1938 static int
1939 myri10ge_phys_id(struct net_device *netdev, enum ethtool_phys_id_state state)
1940 {
1941  struct myri10ge_priv *mgp = netdev_priv(netdev);
1942  int rc;
1943 
1944  switch (state) {
1945  case ETHTOOL_ID_ACTIVE:
1946  rc = myri10ge_led(mgp, 1);
1947  break;
1948 
1949  case ETHTOOL_ID_INACTIVE:
1950  rc = myri10ge_led(mgp, 0);
1951  break;
1952 
1953  default:
1954  rc = -EINVAL;
1955  }
1956 
1957  return rc;
1958 }
1959 
1960 static const struct ethtool_ops myri10ge_ethtool_ops = {
1961  .get_settings = myri10ge_get_settings,
1962  .get_drvinfo = myri10ge_get_drvinfo,
1963  .get_coalesce = myri10ge_get_coalesce,
1964  .set_coalesce = myri10ge_set_coalesce,
1965  .get_pauseparam = myri10ge_get_pauseparam,
1966  .set_pauseparam = myri10ge_set_pauseparam,
1967  .get_ringparam = myri10ge_get_ringparam,
1968  .get_link = ethtool_op_get_link,
1969  .get_strings = myri10ge_get_strings,
1970  .get_sset_count = myri10ge_get_sset_count,
1971  .get_ethtool_stats = myri10ge_get_ethtool_stats,
1972  .set_msglevel = myri10ge_set_msglevel,
1973  .get_msglevel = myri10ge_get_msglevel,
1974  .set_phys_id = myri10ge_phys_id,
1975 };
1976 
1977 static int myri10ge_allocate_rings(struct myri10ge_slice_state *ss)
1978 {
1979  struct myri10ge_priv *mgp = ss->mgp;
1980  struct myri10ge_cmd cmd;
1981  struct net_device *dev = mgp->dev;
1982  int tx_ring_size, rx_ring_size;
1983  int tx_ring_entries, rx_ring_entries;
1984  int i, slice, status;
1985  size_t bytes;
1986 
1987  /* get ring sizes */
1988  slice = ss - mgp->ss;
1989  cmd.data0 = slice;
1990  status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd, 0);
1991  tx_ring_size = cmd.data0;
1992  cmd.data0 = slice;
1993  status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd, 0);
1994  if (status != 0)
1995  return status;
1996  rx_ring_size = cmd.data0;
1997 
1998  tx_ring_entries = tx_ring_size / sizeof(struct mcp_kreq_ether_send);
1999  rx_ring_entries = rx_ring_size / sizeof(struct mcp_dma_addr);
2000  ss->tx.mask = tx_ring_entries - 1;
2001  ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
2002 
2003  status = -ENOMEM;
2004 
2005  /* allocate the host shadow rings */
2006 
2007  bytes = 8 + (MYRI10GE_MAX_SEND_DESC_TSO + 4)
2008  * sizeof(*ss->tx.req_list);
2009  ss->tx.req_bytes = kzalloc(bytes, GFP_KERNEL);
2010  if (ss->tx.req_bytes == NULL)
2011  goto abort_with_nothing;
2012 
2013  /* ensure req_list entries are aligned to 8 bytes */
2014  ss->tx.req_list = (struct mcp_kreq_ether_send *)
2015  ALIGN((unsigned long)ss->tx.req_bytes, 8);
2016  ss->tx.queue_active = 0;
2017 
2018  bytes = rx_ring_entries * sizeof(*ss->rx_small.shadow);
2019  ss->rx_small.shadow = kzalloc(bytes, GFP_KERNEL);
2020  if (ss->rx_small.shadow == NULL)
2021  goto abort_with_tx_req_bytes;
2022 
2023  bytes = rx_ring_entries * sizeof(*ss->rx_big.shadow);
2024  ss->rx_big.shadow = kzalloc(bytes, GFP_KERNEL);
2025  if (ss->rx_big.shadow == NULL)
2026  goto abort_with_rx_small_shadow;
2027 
2028  /* allocate the host info rings */
2029 
2030  bytes = tx_ring_entries * sizeof(*ss->tx.info);
2031  ss->tx.info = kzalloc(bytes, GFP_KERNEL);
2032  if (ss->tx.info == NULL)
2033  goto abort_with_rx_big_shadow;
2034 
2035  bytes = rx_ring_entries * sizeof(*ss->rx_small.info);
2036  ss->rx_small.info = kzalloc(bytes, GFP_KERNEL);
2037  if (ss->rx_small.info == NULL)
2038  goto abort_with_tx_info;
2039 
2040  bytes = rx_ring_entries * sizeof(*ss->rx_big.info);
2041  ss->rx_big.info = kzalloc(bytes, GFP_KERNEL);
2042  if (ss->rx_big.info == NULL)
2043  goto abort_with_rx_small_info;
2044 
2045  /* Fill the receive rings */
2046  ss->rx_big.cnt = 0;
2047  ss->rx_small.cnt = 0;
2048  ss->rx_big.fill_cnt = 0;
2049  ss->rx_small.fill_cnt = 0;
2050  ss->rx_small.page_offset = MYRI10GE_ALLOC_SIZE;
2051  ss->rx_big.page_offset = MYRI10GE_ALLOC_SIZE;
2052  ss->rx_small.watchdog_needed = 0;
2053  ss->rx_big.watchdog_needed = 0;
2054  if (mgp->small_bytes == 0) {
2055  ss->rx_small.fill_cnt = ss->rx_small.mask + 1;
2056  } else {
2057  myri10ge_alloc_rx_pages(mgp, &ss->rx_small,
2058  mgp->small_bytes + MXGEFW_PAD, 0);
2059  }
2060 
2061  if (ss->rx_small.fill_cnt < ss->rx_small.mask + 1) {
2062  netdev_err(dev, "slice-%d: alloced only %d small bufs\n",
2063  slice, ss->rx_small.fill_cnt);
2064  goto abort_with_rx_small_ring;
2065  }
2066 
2067  myri10ge_alloc_rx_pages(mgp, &ss->rx_big, mgp->big_bytes, 0);
2068  if (ss->rx_big.fill_cnt < ss->rx_big.mask + 1) {
2069  netdev_err(dev, "slice-%d: alloced only %d big bufs\n",
2070  slice, ss->rx_big.fill_cnt);
2071  goto abort_with_rx_big_ring;
2072  }
2073 
2074  return 0;
2075 
2076 abort_with_rx_big_ring:
2077  for (i = ss->rx_big.cnt; i < ss->rx_big.fill_cnt; i++) {
2078  int idx = i & ss->rx_big.mask;
2079  myri10ge_unmap_rx_page(mgp->pdev, &ss->rx_big.info[idx],
2080  mgp->big_bytes);
2081  put_page(ss->rx_big.info[idx].page);
2082  }
2083 
2084 abort_with_rx_small_ring:
2085  if (mgp->small_bytes == 0)
2086  ss->rx_small.fill_cnt = ss->rx_small.cnt;
2087  for (i = ss->rx_small.cnt; i < ss->rx_small.fill_cnt; i++) {
2088  int idx = i & ss->rx_small.mask;
2089  myri10ge_unmap_rx_page(mgp->pdev, &ss->rx_small.info[idx],
2090  mgp->small_bytes + MXGEFW_PAD);
2091  put_page(ss->rx_small.info[idx].page);
2092  }
2093 
2094  kfree(ss->rx_big.info);
2095 
2096 abort_with_rx_small_info:
2097  kfree(ss->rx_small.info);
2098 
2099 abort_with_tx_info:
2100  kfree(ss->tx.info);
2101 
2102 abort_with_rx_big_shadow:
2103  kfree(ss->rx_big.shadow);
2104 
2105 abort_with_rx_small_shadow:
2106  kfree(ss->rx_small.shadow);
2107 
2108 abort_with_tx_req_bytes:
2109  kfree(ss->tx.req_bytes);
2110  ss->tx.req_bytes = NULL;
2111  ss->tx.req_list = NULL;
2112 
2113 abort_with_nothing:
2114  return status;
2115 }
2116 
2117 static void myri10ge_free_rings(struct myri10ge_slice_state *ss)
2118 {
2119  struct myri10ge_priv *mgp = ss->mgp;
2120  struct sk_buff *skb;
2121  struct myri10ge_tx_buf *tx;
2122  int i, len, idx;
2123 
2124  /* If not allocated, skip it */
2125  if (ss->tx.req_list == NULL)
2126  return;
2127 
2128  for (i = ss->rx_big.cnt; i < ss->rx_big.fill_cnt; i++) {
2129  idx = i & ss->rx_big.mask;
2130  if (i == ss->rx_big.fill_cnt - 1)
2131  ss->rx_big.info[idx].page_offset = MYRI10GE_ALLOC_SIZE;
2132  myri10ge_unmap_rx_page(mgp->pdev, &ss->rx_big.info[idx],
2133  mgp->big_bytes);
2134  put_page(ss->rx_big.info[idx].page);
2135  }
2136 
2137  if (mgp->small_bytes == 0)
2138  ss->rx_small.fill_cnt = ss->rx_small.cnt;
2139  for (i = ss->rx_small.cnt; i < ss->rx_small.fill_cnt; i++) {
2140  idx = i & ss->rx_small.mask;
2141  if (i == ss->rx_small.fill_cnt - 1)
2142  ss->rx_small.info[idx].page_offset =
2144  myri10ge_unmap_rx_page(mgp->pdev, &ss->rx_small.info[idx],
2145  mgp->small_bytes + MXGEFW_PAD);
2146  put_page(ss->rx_small.info[idx].page);
2147  }
2148  tx = &ss->tx;
2149  while (tx->done != tx->req) {
2150  idx = tx->done & tx->mask;
2151  skb = tx->info[idx].skb;
2152 
2153  /* Mark as free */
2154  tx->info[idx].skb = NULL;
2155  tx->done++;
2156  len = dma_unmap_len(&tx->info[idx], len);
2157  dma_unmap_len_set(&tx->info[idx], len, 0);
2158  if (skb) {
2159  ss->stats.tx_dropped++;
2160  dev_kfree_skb_any(skb);
2161  if (len)
2162  pci_unmap_single(mgp->pdev,
2163  dma_unmap_addr(&tx->info[idx],
2164  bus), len,
2166  } else {
2167  if (len)
2168  pci_unmap_page(mgp->pdev,
2169  dma_unmap_addr(&tx->info[idx],
2170  bus), len,
2172  }
2173  }
2174  kfree(ss->rx_big.info);
2175 
2176  kfree(ss->rx_small.info);
2177 
2178  kfree(ss->tx.info);
2179 
2180  kfree(ss->rx_big.shadow);
2181 
2182  kfree(ss->rx_small.shadow);
2183 
2184  kfree(ss->tx.req_bytes);
2185  ss->tx.req_bytes = NULL;
2186  ss->tx.req_list = NULL;
2187 }
2188 
2189 static int myri10ge_request_irq(struct myri10ge_priv *mgp)
2190 {
2191  struct pci_dev *pdev = mgp->pdev;
2192  struct myri10ge_slice_state *ss;
2193  struct net_device *netdev = mgp->dev;
2194  int i;
2195  int status;
2196 
2197  mgp->msi_enabled = 0;
2198  mgp->msix_enabled = 0;
2199  status = 0;
2200  if (myri10ge_msi) {
2201  if (mgp->num_slices > 1) {
2202  status =
2203  pci_enable_msix(pdev, mgp->msix_vectors,
2204  mgp->num_slices);
2205  if (status == 0) {
2206  mgp->msix_enabled = 1;
2207  } else {
2208  dev_err(&pdev->dev,
2209  "Error %d setting up MSI-X\n", status);
2210  return status;
2211  }
2212  }
2213  if (mgp->msix_enabled == 0) {
2214  status = pci_enable_msi(pdev);
2215  if (status != 0) {
2216  dev_err(&pdev->dev,
2217  "Error %d setting up MSI; falling back to xPIC\n",
2218  status);
2219  } else {
2220  mgp->msi_enabled = 1;
2221  }
2222  }
2223  }
2224  if (mgp->msix_enabled) {
2225  for (i = 0; i < mgp->num_slices; i++) {
2226  ss = &mgp->ss[i];
2227  snprintf(ss->irq_desc, sizeof(ss->irq_desc),
2228  "%s:slice-%d", netdev->name, i);
2229  status = request_irq(mgp->msix_vectors[i].vector,
2230  myri10ge_intr, 0, ss->irq_desc,
2231  ss);
2232  if (status != 0) {
2233  dev_err(&pdev->dev,
2234  "slice %d failed to allocate IRQ\n", i);
2235  i--;
2236  while (i >= 0) {
2237  free_irq(mgp->msix_vectors[i].vector,
2238  &mgp->ss[i]);
2239  i--;
2240  }
2241  pci_disable_msix(pdev);
2242  return status;
2243  }
2244  }
2245  } else {
2246  status = request_irq(pdev->irq, myri10ge_intr, IRQF_SHARED,
2247  mgp->dev->name, &mgp->ss[0]);
2248  if (status != 0) {
2249  dev_err(&pdev->dev, "failed to allocate IRQ\n");
2250  if (mgp->msi_enabled)
2251  pci_disable_msi(pdev);
2252  }
2253  }
2254  return status;
2255 }
2256 
2257 static void myri10ge_free_irq(struct myri10ge_priv *mgp)
2258 {
2259  struct pci_dev *pdev = mgp->pdev;
2260  int i;
2261 
2262  if (mgp->msix_enabled) {
2263  for (i = 0; i < mgp->num_slices; i++)
2264  free_irq(mgp->msix_vectors[i].vector, &mgp->ss[i]);
2265  } else {
2266  free_irq(pdev->irq, &mgp->ss[0]);
2267  }
2268  if (mgp->msi_enabled)
2269  pci_disable_msi(pdev);
2270  if (mgp->msix_enabled)
2271  pci_disable_msix(pdev);
2272 }
2273 
2274 static int
2275 myri10ge_get_frag_header(struct skb_frag_struct *frag, void **mac_hdr,
2276  void **ip_hdr, void **tcpudp_hdr,
2277  u64 * hdr_flags, void *priv)
2278 {
2279  struct ethhdr *eh;
2280  struct vlan_ethhdr *veh;
2281  struct iphdr *iph;
2282  u8 *va = skb_frag_address(frag);
2283  unsigned long ll_hlen;
2284  /* passed opaque through lro_receive_frags() */
2285  __wsum csum = (__force __wsum) (unsigned long)priv;
2286 
2287  /* find the mac header, aborting if not IPv4 */
2288 
2289  eh = (struct ethhdr *)va;
2290  *mac_hdr = eh;
2291  ll_hlen = ETH_HLEN;
2292  if (eh->h_proto != htons(ETH_P_IP)) {
2293  if (eh->h_proto == htons(ETH_P_8021Q)) {
2294  veh = (struct vlan_ethhdr *)va;
2296  return -1;
2297 
2298  ll_hlen += VLAN_HLEN;
2299 
2300  /*
2301  * HW checksum starts ETH_HLEN bytes into
2302  * frame, so we must subtract off the VLAN
2303  * header's checksum before csum can be used
2304  */
2305  csum = csum_sub(csum, csum_partial(va + ETH_HLEN,
2306  VLAN_HLEN, 0));
2307  } else {
2308  return -1;
2309  }
2310  }
2311  *hdr_flags = LRO_IPV4;
2312 
2313  iph = (struct iphdr *)(va + ll_hlen);
2314  *ip_hdr = iph;
2315  if (iph->protocol != IPPROTO_TCP)
2316  return -1;
2317  if (ip_is_fragment(iph))
2318  return -1;
2319  *hdr_flags |= LRO_TCP;
2320  *tcpudp_hdr = (u8 *) (*ip_hdr) + (iph->ihl << 2);
2321 
2322  /* verify the IP checksum */
2323  if (unlikely(ip_fast_csum((u8 *) iph, iph->ihl)))
2324  return -1;
2325 
2326  /* verify the checksum */
2327  if (unlikely(csum_tcpudp_magic(iph->saddr, iph->daddr,
2328  ntohs(iph->tot_len) - (iph->ihl << 2),
2329  IPPROTO_TCP, csum)))
2330  return -1;
2331 
2332  return 0;
2333 }
2334 
2335 static int myri10ge_get_txrx(struct myri10ge_priv *mgp, int slice)
2336 {
2337  struct myri10ge_cmd cmd;
2338  struct myri10ge_slice_state *ss;
2339  int status;
2340 
2341  ss = &mgp->ss[slice];
2342  status = 0;
2343  if (slice == 0 || (mgp->dev->real_num_tx_queues > 1)) {
2344  cmd.data0 = slice;
2345  status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_OFFSET,
2346  &cmd, 0);
2347  ss->tx.lanai = (struct mcp_kreq_ether_send __iomem *)
2348  (mgp->sram + cmd.data0);
2349  }
2350  cmd.data0 = slice;
2351  status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SMALL_RX_OFFSET,
2352  &cmd, 0);
2353  ss->rx_small.lanai = (struct mcp_kreq_ether_recv __iomem *)
2354  (mgp->sram + cmd.data0);
2355 
2356  cmd.data0 = slice;
2357  status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd, 0);
2358  ss->rx_big.lanai = (struct mcp_kreq_ether_recv __iomem *)
2359  (mgp->sram + cmd.data0);
2360 
2361  ss->tx.send_go = (__iomem __be32 *)
2362  (mgp->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
2363  ss->tx.send_stop = (__iomem __be32 *)
2364  (mgp->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
2365  return status;
2366 
2367 }
2368 
2369 static int myri10ge_set_stats(struct myri10ge_priv *mgp, int slice)
2370 {
2371  struct myri10ge_cmd cmd;
2372  struct myri10ge_slice_state *ss;
2373  int status;
2374 
2375  ss = &mgp->ss[slice];
2376  cmd.data0 = MYRI10GE_LOWPART_TO_U32(ss->fw_stats_bus);
2377  cmd.data1 = MYRI10GE_HIGHPART_TO_U32(ss->fw_stats_bus);
2378  cmd.data2 = sizeof(struct mcp_irq_data) | (slice << 16);
2379  status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd, 0);
2380  if (status == -ENOSYS) {
2381  dma_addr_t bus = ss->fw_stats_bus;
2382  if (slice != 0)
2383  return -EINVAL;
2384  bus += offsetof(struct mcp_irq_data, send_done_count);
2385  cmd.data0 = MYRI10GE_LOWPART_TO_U32(bus);
2386  cmd.data1 = MYRI10GE_HIGHPART_TO_U32(bus);
2387  status = myri10ge_send_cmd(mgp,
2389  &cmd, 0);
2390  /* Firmware cannot support multicast without STATS_DMA_V2 */
2391  mgp->fw_multicast_support = 0;
2392  } else {
2393  mgp->fw_multicast_support = 1;
2394  }
2395  return 0;
2396 }
2397 
2398 static int myri10ge_open(struct net_device *dev)
2399 {
2400  struct myri10ge_slice_state *ss;
2401  struct myri10ge_priv *mgp = netdev_priv(dev);
2402  struct myri10ge_cmd cmd;
2403  int i, status, big_pow2, slice;
2404  u8 *itable;
2405  struct net_lro_mgr *lro_mgr;
2406 
2407  if (mgp->running != MYRI10GE_ETH_STOPPED)
2408  return -EBUSY;
2409 
2411  status = myri10ge_reset(mgp);
2412  if (status != 0) {
2413  netdev_err(dev, "failed reset\n");
2414  goto abort_with_nothing;
2415  }
2416 
2417  if (mgp->num_slices > 1) {
2418  cmd.data0 = mgp->num_slices;
2420  if (mgp->dev->real_num_tx_queues > 1)
2422  status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES,
2423  &cmd, 0);
2424  if (status != 0) {
2425  netdev_err(dev, "failed to set number of slices\n");
2426  goto abort_with_nothing;
2427  }
2428  /* setup the indirection table */
2429  cmd.data0 = mgp->num_slices;
2430  status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
2431  &cmd, 0);
2432 
2433  status |= myri10ge_send_cmd(mgp,
2435  &cmd, 0);
2436  if (status != 0) {
2437  netdev_err(dev, "failed to setup rss tables\n");
2438  goto abort_with_nothing;
2439  }
2440 
2441  /* just enable an identity mapping */
2442  itable = mgp->sram + cmd.data0;
2443  for (i = 0; i < mgp->num_slices; i++)
2444  __raw_writeb(i, &itable[i]);
2445 
2446  cmd.data0 = 1;
2447  cmd.data1 = myri10ge_rss_hash;
2448  status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_ENABLE,
2449  &cmd, 0);
2450  if (status != 0) {
2451  netdev_err(dev, "failed to enable slices\n");
2452  goto abort_with_nothing;
2453  }
2454  }
2455 
2456  status = myri10ge_request_irq(mgp);
2457  if (status != 0)
2458  goto abort_with_nothing;
2459 
2460  /* decide what small buffer size to use. For good TCP rx
2461  * performance, it is important to not receive 1514 byte
2462  * frames into jumbo buffers, as it confuses the socket buffer
2463  * accounting code, leading to drops and erratic performance.
2464  */
2465 
2466  if (dev->mtu <= ETH_DATA_LEN)
2467  /* enough for a TCP header */
2468  mgp->small_bytes = (128 > SMP_CACHE_BYTES)
2469  ? (128 - MXGEFW_PAD)
2471  else
2472  /* enough for a vlan encapsulated ETH_DATA_LEN frame */
2474 
2475  /* Override the small buffer size? */
2476  if (myri10ge_small_bytes >= 0)
2477  mgp->small_bytes = myri10ge_small_bytes;
2478 
2479  /* Firmware needs the big buff size as a power of 2. Lie and
2480  * tell him the buffer is larger, because we only use 1
2481  * buffer/pkt, and the mtu will prevent overruns.
2482  */
2483  big_pow2 = dev->mtu + ETH_HLEN + VLAN_HLEN + MXGEFW_PAD;
2484  if (big_pow2 < MYRI10GE_ALLOC_SIZE / 2) {
2485  while (!is_power_of_2(big_pow2))
2486  big_pow2++;
2487  mgp->big_bytes = dev->mtu + ETH_HLEN + VLAN_HLEN + MXGEFW_PAD;
2488  } else {
2489  big_pow2 = MYRI10GE_ALLOC_SIZE;
2490  mgp->big_bytes = big_pow2;
2491  }
2492 
2493  /* setup the per-slice data structures */
2494  for (slice = 0; slice < mgp->num_slices; slice++) {
2495  ss = &mgp->ss[slice];
2496 
2497  status = myri10ge_get_txrx(mgp, slice);
2498  if (status != 0) {
2499  netdev_err(dev, "failed to get ring sizes or locations\n");
2500  goto abort_with_rings;
2501  }
2502  status = myri10ge_allocate_rings(ss);
2503  if (status != 0)
2504  goto abort_with_rings;
2505 
2506  /* only firmware which supports multiple TX queues
2507  * supports setting up the tx stats on non-zero
2508  * slices */
2509  if (slice == 0 || mgp->dev->real_num_tx_queues > 1)
2510  status = myri10ge_set_stats(mgp, slice);
2511  if (status) {
2512  netdev_err(dev, "Couldn't set stats DMA\n");
2513  goto abort_with_rings;
2514  }
2515 
2516  lro_mgr = &ss->rx_done.lro_mgr;
2517  lro_mgr->dev = dev;
2518  lro_mgr->features = LRO_F_NAPI;
2519  lro_mgr->ip_summed = CHECKSUM_COMPLETE;
2522  lro_mgr->lro_arr = ss->rx_done.lro_desc;
2523  lro_mgr->get_frag_header = myri10ge_get_frag_header;
2524  lro_mgr->max_aggr = myri10ge_lro_max_pkts;
2525  lro_mgr->frag_align_pad = 2;
2526  if (lro_mgr->max_aggr > MAX_SKB_FRAGS)
2527  lro_mgr->max_aggr = MAX_SKB_FRAGS;
2528 
2529  /* must happen prior to any irq */
2530  napi_enable(&(ss)->napi);
2531  }
2532 
2533  /* now give firmware buffers sizes, and MTU */
2534  cmd.data0 = dev->mtu + ETH_HLEN + VLAN_HLEN;
2535  status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_MTU, &cmd, 0);
2536  cmd.data0 = mgp->small_bytes;
2537  status |=
2538  myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, &cmd, 0);
2539  cmd.data0 = big_pow2;
2540  status |=
2541  myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd, 0);
2542  if (status) {
2543  netdev_err(dev, "Couldn't set buffer sizes\n");
2544  goto abort_with_rings;
2545  }
2546 
2547  /*
2548  * Set Linux style TSO mode; this is needed only on newer
2549  * firmware versions. Older versions default to Linux
2550  * style TSO
2551  */
2552  cmd.data0 = 0;
2553  status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_TSO_MODE, &cmd, 0);
2554  if (status && status != -ENOSYS) {
2555  netdev_err(dev, "Couldn't set TSO mode\n");
2556  goto abort_with_rings;
2557  }
2558 
2559  mgp->link_state = ~0U;
2560  mgp->rdma_tags_available = 15;
2561 
2562  status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_UP, &cmd, 0);
2563  if (status) {
2564  netdev_err(dev, "Couldn't bring up link\n");
2565  goto abort_with_rings;
2566  }
2567 
2569  mgp->watchdog_timer.expires = jiffies + myri10ge_watchdog_timeout * HZ;
2570  add_timer(&mgp->watchdog_timer);
2571  netif_tx_wake_all_queues(dev);
2572 
2573  return 0;
2574 
2575 abort_with_rings:
2576  while (slice) {
2577  slice--;
2578  napi_disable(&mgp->ss[slice].napi);
2579  }
2580  for (i = 0; i < mgp->num_slices; i++)
2581  myri10ge_free_rings(&mgp->ss[i]);
2582 
2583  myri10ge_free_irq(mgp);
2584 
2585 abort_with_nothing:
2587  return -ENOMEM;
2588 }
2589 
2590 static int myri10ge_close(struct net_device *dev)
2591 {
2592  struct myri10ge_priv *mgp = netdev_priv(dev);
2593  struct myri10ge_cmd cmd;
2594  int status, old_down_cnt;
2595  int i;
2596 
2597  if (mgp->running != MYRI10GE_ETH_RUNNING)
2598  return 0;
2599 
2600  if (mgp->ss[0].tx.req_bytes == NULL)
2601  return 0;
2602 
2605  for (i = 0; i < mgp->num_slices; i++) {
2606  napi_disable(&mgp->ss[i].napi);
2607  }
2608  netif_carrier_off(dev);
2609 
2610  netif_tx_stop_all_queues(dev);
2611  if (mgp->rebooted == 0) {
2612  old_down_cnt = mgp->down_cnt;
2613  mb();
2614  status =
2615  myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_DOWN, &cmd, 0);
2616  if (status)
2617  netdev_err(dev, "Couldn't bring down link\n");
2618 
2619  wait_event_timeout(mgp->down_wq, old_down_cnt != mgp->down_cnt,
2620  HZ);
2621  if (old_down_cnt == mgp->down_cnt)
2622  netdev_err(dev, "never got down irq\n");
2623  }
2624  netif_tx_disable(dev);
2625  myri10ge_free_irq(mgp);
2626  for (i = 0; i < mgp->num_slices; i++)
2627  myri10ge_free_rings(&mgp->ss[i]);
2628 
2630  return 0;
2631 }
2632 
2633 /* copy an array of struct mcp_kreq_ether_send's to the mcp. Copy
2634  * backwards one at a time and handle ring wraps */
2635 
2636 static inline void
2637 myri10ge_submit_req_backwards(struct myri10ge_tx_buf *tx,
2638  struct mcp_kreq_ether_send *src, int cnt)
2639 {
2640  int idx, starting_slot;
2641  starting_slot = tx->req;
2642  while (cnt > 1) {
2643  cnt--;
2644  idx = (starting_slot + cnt) & tx->mask;
2645  myri10ge_pio_copy(&tx->lanai[idx], &src[cnt], sizeof(*src));
2646  mb();
2647  }
2648 }
2649 
2650 /*
2651  * copy an array of struct mcp_kreq_ether_send's to the mcp. Copy
2652  * at most 32 bytes at a time, so as to avoid involving the software
2653  * pio handler in the nic. We re-write the first segment's flags
2654  * to mark them valid only after writing the entire chain.
2655  */
2656 
2657 static inline void
2658 myri10ge_submit_req(struct myri10ge_tx_buf *tx, struct mcp_kreq_ether_send *src,
2659  int cnt)
2660 {
2661  int idx, i;
2662  struct mcp_kreq_ether_send __iomem *dstp, *dst;
2663  struct mcp_kreq_ether_send *srcp;
2664  u8 last_flags;
2665 
2666  idx = tx->req & tx->mask;
2667 
2668  last_flags = src->flags;
2669  src->flags = 0;
2670  mb();
2671  dst = dstp = &tx->lanai[idx];
2672  srcp = src;
2673 
2674  if ((idx + cnt) < tx->mask) {
2675  for (i = 0; i < (cnt - 1); i += 2) {
2676  myri10ge_pio_copy(dstp, srcp, 2 * sizeof(*src));
2677  mb(); /* force write every 32 bytes */
2678  srcp += 2;
2679  dstp += 2;
2680  }
2681  } else {
2682  /* submit all but the first request, and ensure
2683  * that it is submitted below */
2684  myri10ge_submit_req_backwards(tx, src, cnt);
2685  i = 0;
2686  }
2687  if (i < cnt) {
2688  /* submit the first request */
2689  myri10ge_pio_copy(dstp, srcp, sizeof(*src));
2690  mb(); /* barrier before setting valid flag */
2691  }
2692 
2693  /* re-write the last 32-bits with the valid flags */
2694  src->flags = last_flags;
2695  put_be32(*((__be32 *) src + 3), (__be32 __iomem *) dst + 3);
2696  tx->req += cnt;
2697  mb();
2698 }
2699 
2700 /*
2701  * Transmit a packet. We need to split the packet so that a single
2702  * segment does not cross myri10ge->tx_boundary, so this makes segment
2703  * counting tricky. So rather than try to count segments up front, we
2704  * just give up if there are too few segments to hold a reasonably
2705  * fragmented packet currently available. If we run
2706  * out of segments while preparing a packet for DMA, we just linearize
2707  * it and try again.
2708  */
2709 
2710 static netdev_tx_t myri10ge_xmit(struct sk_buff *skb,
2711  struct net_device *dev)
2712 {
2713  struct myri10ge_priv *mgp = netdev_priv(dev);
2714  struct myri10ge_slice_state *ss;
2715  struct mcp_kreq_ether_send *req;
2716  struct myri10ge_tx_buf *tx;
2717  struct skb_frag_struct *frag;
2718  struct netdev_queue *netdev_queue;
2719  dma_addr_t bus;
2720  u32 low;
2721  __be32 high_swapped;
2722  unsigned int len;
2723  int idx, last_idx, avail, frag_cnt, frag_idx, count, mss, max_segments;
2724  u16 pseudo_hdr_offset, cksum_offset, queue;
2725  int cum_len, seglen, boundary, rdma_count;
2726  u8 flags, odd_flag;
2727 
2728  queue = skb_get_queue_mapping(skb);
2729  ss = &mgp->ss[queue];
2730  netdev_queue = netdev_get_tx_queue(mgp->dev, queue);
2731  tx = &ss->tx;
2732 
2733 again:
2734  req = tx->req_list;
2735  avail = tx->mask - 1 - (tx->req - tx->done);
2736 
2737  mss = 0;
2738  max_segments = MXGEFW_MAX_SEND_DESC;
2739 
2740  if (skb_is_gso(skb)) {
2741  mss = skb_shinfo(skb)->gso_size;
2742  max_segments = MYRI10GE_MAX_SEND_DESC_TSO;
2743  }
2744 
2745  if ((unlikely(avail < max_segments))) {
2746  /* we are out of transmit resources */
2747  tx->stop_queue++;
2748  netif_tx_stop_queue(netdev_queue);
2749  return NETDEV_TX_BUSY;
2750  }
2751 
2752  /* Setup checksum offloading, if needed */
2753  cksum_offset = 0;
2754  pseudo_hdr_offset = 0;
2755  odd_flag = 0;
2757  if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
2758  cksum_offset = skb_checksum_start_offset(skb);
2759  pseudo_hdr_offset = cksum_offset + skb->csum_offset;
2760  /* If the headers are excessively large, then we must
2761  * fall back to a software checksum */
2762  if (unlikely(!mss && (cksum_offset > 255 ||
2763  pseudo_hdr_offset > 127))) {
2764  if (skb_checksum_help(skb))
2765  goto drop;
2766  cksum_offset = 0;
2767  pseudo_hdr_offset = 0;
2768  } else {
2769  odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
2770  flags |= MXGEFW_FLAGS_CKSUM;
2771  }
2772  }
2773 
2774  cum_len = 0;
2775 
2776  if (mss) { /* TSO */
2777  /* this removes any CKSUM flag from before */
2779 
2780  /* negative cum_len signifies to the
2781  * send loop that we are still in the
2782  * header portion of the TSO packet.
2783  * TSO header can be at most 1KB long */
2784  cum_len = -(skb_transport_offset(skb) + tcp_hdrlen(skb));
2785 
2786  /* for IPv6 TSO, the checksum offset stores the
2787  * TCP header length, to save the firmware from
2788  * the need to parse the headers */
2789  if (skb_is_gso_v6(skb)) {
2790  cksum_offset = tcp_hdrlen(skb);
2791  /* Can only handle headers <= max_tso6 long */
2792  if (unlikely(-cum_len > mgp->max_tso6))
2793  return myri10ge_sw_tso(skb, dev);
2794  }
2795  /* for TSO, pseudo_hdr_offset holds mss.
2796  * The firmware figures out where to put
2797  * the checksum by parsing the header. */
2798  pseudo_hdr_offset = mss;
2799  } else
2800  /* Mark small packets, and pad out tiny packets */
2801  if (skb->len <= MXGEFW_SEND_SMALL_SIZE) {
2802  flags |= MXGEFW_FLAGS_SMALL;
2803 
2804  /* pad frames to at least ETH_ZLEN bytes */
2805  if (unlikely(skb->len < ETH_ZLEN)) {
2806  if (skb_padto(skb, ETH_ZLEN)) {
2807  /* The packet is gone, so we must
2808  * return 0 */
2809  ss->stats.tx_dropped += 1;
2810  return NETDEV_TX_OK;
2811  }
2812  /* adjust the len to account for the zero pad
2813  * so that the nic can know how long it is */
2814  skb->len = ETH_ZLEN;
2815  }
2816  }
2817 
2818  /* map the skb for DMA */
2819  len = skb_headlen(skb);
2820  idx = tx->req & tx->mask;
2821  tx->info[idx].skb = skb;
2822  bus = pci_map_single(mgp->pdev, skb->data, len, PCI_DMA_TODEVICE);
2823  dma_unmap_addr_set(&tx->info[idx], bus, bus);
2824  dma_unmap_len_set(&tx->info[idx], len, len);
2825 
2826  frag_cnt = skb_shinfo(skb)->nr_frags;
2827  frag_idx = 0;
2828  count = 0;
2829  rdma_count = 0;
2830 
2831  /* "rdma_count" is the number of RDMAs belonging to the
2832  * current packet BEFORE the current send request. For
2833  * non-TSO packets, this is equal to "count".
2834  * For TSO packets, rdma_count needs to be reset
2835  * to 0 after a segment cut.
2836  *
2837  * The rdma_count field of the send request is
2838  * the number of RDMAs of the packet starting at
2839  * that request. For TSO send requests with one ore more cuts
2840  * in the middle, this is the number of RDMAs starting
2841  * after the last cut in the request. All previous
2842  * segments before the last cut implicitly have 1 RDMA.
2843  *
2844  * Since the number of RDMAs is not known beforehand,
2845  * it must be filled-in retroactively - after each
2846  * segmentation cut or at the end of the entire packet.
2847  */
2848 
2849  while (1) {
2850  /* Break the SKB or Fragment up into pieces which
2851  * do not cross mgp->tx_boundary */
2852  low = MYRI10GE_LOWPART_TO_U32(bus);
2853  high_swapped = htonl(MYRI10GE_HIGHPART_TO_U32(bus));
2854  while (len) {
2855  u8 flags_next;
2856  int cum_len_next;
2857 
2858  if (unlikely(count == max_segments))
2859  goto abort_linearize;
2860 
2861  boundary =
2862  (low + mgp->tx_boundary) & ~(mgp->tx_boundary - 1);
2863  seglen = boundary - low;
2864  if (seglen > len)
2865  seglen = len;
2866  flags_next = flags & ~MXGEFW_FLAGS_FIRST;
2867  cum_len_next = cum_len + seglen;
2868  if (mss) { /* TSO */
2869  (req - rdma_count)->rdma_count = rdma_count + 1;
2870 
2871  if (likely(cum_len >= 0)) { /* payload */
2872  int next_is_first, chop;
2873 
2874  chop = (cum_len_next > mss);
2875  cum_len_next = cum_len_next % mss;
2876  next_is_first = (cum_len_next == 0);
2877  flags |= chop * MXGEFW_FLAGS_TSO_CHOP;
2878  flags_next |= next_is_first *
2880  rdma_count |= -(chop | next_is_first);
2881  rdma_count += chop & !next_is_first;
2882  } else if (likely(cum_len_next >= 0)) { /* header ends */
2883  int small;
2884 
2885  rdma_count = -1;
2886  cum_len_next = 0;
2887  seglen = -cum_len;
2888  small = (mss <= MXGEFW_SEND_SMALL_SIZE);
2889  flags_next = MXGEFW_FLAGS_TSO_PLD |
2891  (small * MXGEFW_FLAGS_SMALL);
2892  }
2893  }
2894  req->addr_high = high_swapped;
2895  req->addr_low = htonl(low);
2896  req->pseudo_hdr_offset = htons(pseudo_hdr_offset);
2897  req->pad = 0; /* complete solid 16-byte block; does this matter? */
2898  req->rdma_count = 1;
2899  req->length = htons(seglen);
2900  req->cksum_offset = cksum_offset;
2901  req->flags = flags | ((cum_len & 1) * odd_flag);
2902 
2903  low += seglen;
2904  len -= seglen;
2905  cum_len = cum_len_next;
2906  flags = flags_next;
2907  req++;
2908  count++;
2909  rdma_count++;
2910  if (cksum_offset != 0 && !(mss && skb_is_gso_v6(skb))) {
2911  if (unlikely(cksum_offset > seglen))
2912  cksum_offset -= seglen;
2913  else
2914  cksum_offset = 0;
2915  }
2916  }
2917  if (frag_idx == frag_cnt)
2918  break;
2919 
2920  /* map next fragment for DMA */
2921  idx = (count + tx->req) & tx->mask;
2922  frag = &skb_shinfo(skb)->frags[frag_idx];
2923  frag_idx++;
2924  len = skb_frag_size(frag);
2925  bus = skb_frag_dma_map(&mgp->pdev->dev, frag, 0, len,
2926  DMA_TO_DEVICE);
2927  dma_unmap_addr_set(&tx->info[idx], bus, bus);
2928  dma_unmap_len_set(&tx->info[idx], len, len);
2929  }
2930 
2931  (req - rdma_count)->rdma_count = rdma_count;
2932  if (mss)
2933  do {
2934  req--;
2935  req->flags |= MXGEFW_FLAGS_TSO_LAST;
2936  } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP |
2937  MXGEFW_FLAGS_FIRST)));
2938  idx = ((count - 1) + tx->req) & tx->mask;
2939  tx->info[idx].last = 1;
2940  myri10ge_submit_req(tx, tx->req_list, count);
2941  /* if using multiple tx queues, make sure NIC polls the
2942  * current slice */
2943  if ((mgp->dev->real_num_tx_queues > 1) && tx->queue_active == 0) {
2944  tx->queue_active = 1;
2945  put_be32(htonl(1), tx->send_go);
2946  mb();
2947  mmiowb();
2948  }
2949  tx->pkt_start++;
2950  if ((avail - count) < MXGEFW_MAX_SEND_DESC) {
2951  tx->stop_queue++;
2952  netif_tx_stop_queue(netdev_queue);
2953  }
2954  return NETDEV_TX_OK;
2955 
2956 abort_linearize:
2957  /* Free any DMA resources we've alloced and clear out the skb
2958  * slot so as to not trip up assertions, and to avoid a
2959  * double-free if linearizing fails */
2960 
2961  last_idx = (idx + 1) & tx->mask;
2962  idx = tx->req & tx->mask;
2963  tx->info[idx].skb = NULL;
2964  do {
2965  len = dma_unmap_len(&tx->info[idx], len);
2966  if (len) {
2967  if (tx->info[idx].skb != NULL)
2968  pci_unmap_single(mgp->pdev,
2969  dma_unmap_addr(&tx->info[idx],
2970  bus), len,
2972  else
2973  pci_unmap_page(mgp->pdev,
2974  dma_unmap_addr(&tx->info[idx],
2975  bus), len,
2977  dma_unmap_len_set(&tx->info[idx], len, 0);
2978  tx->info[idx].skb = NULL;
2979  }
2980  idx = (idx + 1) & tx->mask;
2981  } while (idx != last_idx);
2982  if (skb_is_gso(skb)) {
2983  netdev_err(mgp->dev, "TSO but wanted to linearize?!?!?\n");
2984  goto drop;
2985  }
2986 
2987  if (skb_linearize(skb))
2988  goto drop;
2989 
2990  tx->linearized++;
2991  goto again;
2992 
2993 drop:
2994  dev_kfree_skb_any(skb);
2995  ss->stats.tx_dropped += 1;
2996  return NETDEV_TX_OK;
2997 
2998 }
2999 
3000 static netdev_tx_t myri10ge_sw_tso(struct sk_buff *skb,
3001  struct net_device *dev)
3002 {
3003  struct sk_buff *segs, *curr;
3004  struct myri10ge_priv *mgp = netdev_priv(dev);
3005  struct myri10ge_slice_state *ss;
3007 
3008  segs = skb_gso_segment(skb, dev->features & ~NETIF_F_TSO6);
3009  if (IS_ERR(segs))
3010  goto drop;
3011 
3012  while (segs) {
3013  curr = segs;
3014  segs = segs->next;
3015  curr->next = NULL;
3016  status = myri10ge_xmit(curr, dev);
3017  if (status != 0) {
3018  dev_kfree_skb_any(curr);
3019  if (segs != NULL) {
3020  curr = segs;
3021  segs = segs->next;
3022  curr->next = NULL;
3023  dev_kfree_skb_any(segs);
3024  }
3025  goto drop;
3026  }
3027  }
3028  dev_kfree_skb_any(skb);
3029  return NETDEV_TX_OK;
3030 
3031 drop:
3032  ss = &mgp->ss[skb_get_queue_mapping(skb)];
3033  dev_kfree_skb_any(skb);
3034  ss->stats.tx_dropped += 1;
3035  return NETDEV_TX_OK;
3036 }
3037 
3038 static struct rtnl_link_stats64 *myri10ge_get_stats(struct net_device *dev,
3039  struct rtnl_link_stats64 *stats)
3040 {
3041  const struct myri10ge_priv *mgp = netdev_priv(dev);
3042  const struct myri10ge_slice_netstats *slice_stats;
3043  int i;
3044 
3045  for (i = 0; i < mgp->num_slices; i++) {
3046  slice_stats = &mgp->ss[i].stats;
3047  stats->rx_packets += slice_stats->rx_packets;
3048  stats->tx_packets += slice_stats->tx_packets;
3049  stats->rx_bytes += slice_stats->rx_bytes;
3050  stats->tx_bytes += slice_stats->tx_bytes;
3051  stats->rx_dropped += slice_stats->rx_dropped;
3052  stats->tx_dropped += slice_stats->tx_dropped;
3053  }
3054  return stats;
3055 }
3056 
3057 static void myri10ge_set_multicast_list(struct net_device *dev)
3058 {
3059  struct myri10ge_priv *mgp = netdev_priv(dev);
3060  struct myri10ge_cmd cmd;
3061  struct netdev_hw_addr *ha;
3062  __be32 data[2] = { 0, 0 };
3063  int err;
3064 
3065  /* can be called from atomic contexts,
3066  * pass 1 to force atomicity in myri10ge_send_cmd() */
3067  myri10ge_change_promisc(mgp, dev->flags & IFF_PROMISC, 1);
3068 
3069  /* This firmware is known to not support multicast */
3070  if (!mgp->fw_multicast_support)
3071  return;
3072 
3073  /* Disable multicast filtering */
3074 
3075  err = myri10ge_send_cmd(mgp, MXGEFW_ENABLE_ALLMULTI, &cmd, 1);
3076  if (err != 0) {
3077  netdev_err(dev, "Failed MXGEFW_ENABLE_ALLMULTI, error status: %d\n",
3078  err);
3079  goto abort;
3080  }
3081 
3082  if ((dev->flags & IFF_ALLMULTI) || mgp->adopted_rx_filter_bug) {
3083  /* request to disable multicast filtering, so quit here */
3084  return;
3085  }
3086 
3087  /* Flush the filters */
3088 
3089  err = myri10ge_send_cmd(mgp, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS,
3090  &cmd, 1);
3091  if (err != 0) {
3092  netdev_err(dev, "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, error status: %d\n",
3093  err);
3094  goto abort;
3095  }
3096 
3097  /* Walk the multicast list, and add each address */
3098  netdev_for_each_mc_addr(ha, dev) {
3099  memcpy(data, &ha->addr, 6);
3100  cmd.data0 = ntohl(data[0]);
3101  cmd.data1 = ntohl(data[1]);
3102  err = myri10ge_send_cmd(mgp, MXGEFW_JOIN_MULTICAST_GROUP,
3103  &cmd, 1);
3104 
3105  if (err != 0) {
3106  netdev_err(dev, "Failed MXGEFW_JOIN_MULTICAST_GROUP, error status:%d %pM\n",
3107  err, ha->addr);
3108  goto abort;
3109  }
3110  }
3111  /* Enable multicast filtering */
3112  err = myri10ge_send_cmd(mgp, MXGEFW_DISABLE_ALLMULTI, &cmd, 1);
3113  if (err != 0) {
3114  netdev_err(dev, "Failed MXGEFW_DISABLE_ALLMULTI, error status: %d\n",
3115  err);
3116  goto abort;
3117  }
3118 
3119  return;
3120 
3121 abort:
3122  return;
3123 }
3124 
3125 static int myri10ge_set_mac_address(struct net_device *dev, void *addr)
3126 {
3127  struct sockaddr *sa = addr;
3128  struct myri10ge_priv *mgp = netdev_priv(dev);
3129  int status;
3130 
3131  if (!is_valid_ether_addr(sa->sa_data))
3132  return -EADDRNOTAVAIL;
3133 
3134  status = myri10ge_update_mac_address(mgp, sa->sa_data);
3135  if (status != 0) {
3136  netdev_err(dev, "changing mac address failed with %d\n",
3137  status);
3138  return status;
3139  }
3140 
3141  /* change the dev structure */
3142  memcpy(dev->dev_addr, sa->sa_data, 6);
3143  return 0;
3144 }
3145 
3146 static netdev_features_t myri10ge_fix_features(struct net_device *dev,
3148 {
3149  if (!(features & NETIF_F_RXCSUM))
3150  features &= ~NETIF_F_LRO;
3151 
3152  return features;
3153 }
3154 
3155 static int myri10ge_change_mtu(struct net_device *dev, int new_mtu)
3156 {
3157  struct myri10ge_priv *mgp = netdev_priv(dev);
3158  int error = 0;
3159 
3160  if ((new_mtu < 68) || (ETH_HLEN + new_mtu > MYRI10GE_MAX_ETHER_MTU)) {
3161  netdev_err(dev, "new mtu (%d) is not valid\n", new_mtu);
3162  return -EINVAL;
3163  }
3164  netdev_info(dev, "changing mtu from %d to %d\n", dev->mtu, new_mtu);
3165  if (mgp->running) {
3166  /* if we change the mtu on an active device, we must
3167  * reset the device so the firmware sees the change */
3168  myri10ge_close(dev);
3169  dev->mtu = new_mtu;
3170  myri10ge_open(dev);
3171  } else
3172  dev->mtu = new_mtu;
3173 
3174  return error;
3175 }
3176 
3177 /*
3178  * Enable ECRC to align PCI-E Completion packets on an 8-byte boundary.
3179  * Only do it if the bridge is a root port since we don't want to disturb
3180  * any other device, except if forced with myri10ge_ecrc_enable > 1.
3181  */
3182 
3183 static void myri10ge_enable_ecrc(struct myri10ge_priv *mgp)
3184 {
3185  struct pci_dev *bridge = mgp->pdev->bus->self;
3186  struct device *dev = &mgp->pdev->dev;
3187  int cap;
3188  unsigned err_cap;
3189  int ret;
3190 
3191  if (!myri10ge_ecrc_enable || !bridge)
3192  return;
3193 
3194  /* check that the bridge is a root port */
3195  if (pci_pcie_type(bridge) != PCI_EXP_TYPE_ROOT_PORT) {
3196  if (myri10ge_ecrc_enable > 1) {
3197  struct pci_dev *prev_bridge, *old_bridge = bridge;
3198 
3199  /* Walk the hierarchy up to the root port
3200  * where ECRC has to be enabled */
3201  do {
3202  prev_bridge = bridge;
3203  bridge = bridge->bus->self;
3204  if (!bridge || prev_bridge == bridge) {
3205  dev_err(dev,
3206  "Failed to find root port"
3207  " to force ECRC\n");
3208  return;
3209  }
3210  } while (pci_pcie_type(bridge) !=
3212 
3213  dev_info(dev,
3214  "Forcing ECRC on non-root port %s"
3215  " (enabling on root port %s)\n",
3216  pci_name(old_bridge), pci_name(bridge));
3217  } else {
3218  dev_err(dev,
3219  "Not enabling ECRC on non-root port %s\n",
3220  pci_name(bridge));
3221  return;
3222  }
3223  }
3224 
3226  if (!cap)
3227  return;
3228 
3229  ret = pci_read_config_dword(bridge, cap + PCI_ERR_CAP, &err_cap);
3230  if (ret) {
3231  dev_err(dev, "failed reading ext-conf-space of %s\n",
3232  pci_name(bridge));
3233  dev_err(dev, "\t pci=nommconf in use? "
3234  "or buggy/incomplete/absent ACPI MCFG attr?\n");
3235  return;
3236  }
3237  if (!(err_cap & PCI_ERR_CAP_ECRC_GENC))
3238  return;
3239 
3240  err_cap |= PCI_ERR_CAP_ECRC_GENE;
3241  pci_write_config_dword(bridge, cap + PCI_ERR_CAP, err_cap);
3242  dev_info(dev, "Enabled ECRC on upstream bridge %s\n", pci_name(bridge));
3243 }
3244 
3245 /*
3246  * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
3247  * when the PCI-E Completion packets are aligned on an 8-byte
3248  * boundary. Some PCI-E chip sets always align Completion packets; on
3249  * the ones that do not, the alignment can be enforced by enabling
3250  * ECRC generation (if supported).
3251  *
3252  * When PCI-E Completion packets are not aligned, it is actually more
3253  * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
3254  *
3255  * If the driver can neither enable ECRC nor verify that it has
3256  * already been enabled, then it must use a firmware image which works
3257  * around unaligned completion packets (myri10ge_rss_ethp_z8e.dat), and it
3258  * should also ensure that it never gives the device a Read-DMA which is
3259  * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is
3260  * enabled, then the driver should use the aligned (myri10ge_rss_eth_z8e.dat)
3261  * firmware image, and set tx_boundary to 4KB.
3262  */
3263 
3264 static void myri10ge_firmware_probe(struct myri10ge_priv *mgp)
3265 {
3266  struct pci_dev *pdev = mgp->pdev;
3267  struct device *dev = &pdev->dev;
3268  int status;
3269 
3270  mgp->tx_boundary = 4096;
3271  /*
3272  * Verify the max read request size was set to 4KB
3273  * before trying the test with 4KB.
3274  */
3275  status = pcie_get_readrq(pdev);
3276  if (status < 0) {
3277  dev_err(dev, "Couldn't read max read req size: %d\n", status);
3278  goto abort;
3279  }
3280  if (status != 4096) {
3281  dev_warn(dev, "Max Read Request size != 4096 (%d)\n", status);
3282  mgp->tx_boundary = 2048;
3283  }
3284  /*
3285  * load the optimized firmware (which assumes aligned PCIe
3286  * completions) in order to see if it works on this host.
3287  */
3288  set_fw_name(mgp, myri10ge_fw_aligned, false);
3289  status = myri10ge_load_firmware(mgp, 1);
3290  if (status != 0) {
3291  goto abort;
3292  }
3293 
3294  /*
3295  * Enable ECRC if possible
3296  */
3297  myri10ge_enable_ecrc(mgp);
3298 
3299  /*
3300  * Run a DMA test which watches for unaligned completions and
3301  * aborts on the first one seen.
3302  */
3303 
3304  status = myri10ge_dma_test(mgp, MXGEFW_CMD_UNALIGNED_TEST);
3305  if (status == 0)
3306  return; /* keep the aligned firmware */
3307 
3308  if (status != -E2BIG)
3309  dev_warn(dev, "DMA test failed: %d\n", status);
3310  if (status == -ENOSYS)
3311  dev_warn(dev, "Falling back to ethp! "
3312  "Please install up to date fw\n");
3313 abort:
3314  /* fall back to using the unaligned firmware */
3315  mgp->tx_boundary = 2048;
3316  set_fw_name(mgp, myri10ge_fw_unaligned, false);
3317 }
3318 
3319 static void myri10ge_select_firmware(struct myri10ge_priv *mgp)
3320 {
3321  int overridden = 0;
3322 
3323  if (myri10ge_force_firmware == 0) {
3324  int link_width;
3325  u16 lnk;
3326 
3328  link_width = (lnk >> 4) & 0x3f;
3329 
3330  /* Check to see if Link is less than 8 or if the
3331  * upstream bridge is known to provide aligned
3332  * completions */
3333  if (link_width < 8) {
3334  dev_info(&mgp->pdev->dev, "PCIE x%d Link\n",
3335  link_width);
3336  mgp->tx_boundary = 4096;
3337  set_fw_name(mgp, myri10ge_fw_aligned, false);
3338  } else {
3339  myri10ge_firmware_probe(mgp);
3340  }
3341  } else {
3342  if (myri10ge_force_firmware == 1) {
3343  dev_info(&mgp->pdev->dev,
3344  "Assuming aligned completions (forced)\n");
3345  mgp->tx_boundary = 4096;
3346  set_fw_name(mgp, myri10ge_fw_aligned, false);
3347  } else {
3348  dev_info(&mgp->pdev->dev,
3349  "Assuming unaligned completions (forced)\n");
3350  mgp->tx_boundary = 2048;
3351  set_fw_name(mgp, myri10ge_fw_unaligned, false);
3352  }
3353  }
3354 
3355  kparam_block_sysfs_write(myri10ge_fw_name);
3356  if (myri10ge_fw_name != NULL) {
3357  char *fw_name = kstrdup(myri10ge_fw_name, GFP_KERNEL);
3358  if (fw_name) {
3359  overridden = 1;
3360  set_fw_name(mgp, fw_name, true);
3361  }
3362  }
3363  kparam_unblock_sysfs_write(myri10ge_fw_name);
3364 
3365  if (mgp->board_number < MYRI10GE_MAX_BOARDS &&
3366  myri10ge_fw_names[mgp->board_number] != NULL &&
3367  strlen(myri10ge_fw_names[mgp->board_number])) {
3368  set_fw_name(mgp, myri10ge_fw_names[mgp->board_number], false);
3369  overridden = 1;
3370  }
3371  if (overridden)
3372  dev_info(&mgp->pdev->dev, "overriding firmware to %s\n",
3373  mgp->fw_name);
3374 }
3375 
3376 static void myri10ge_mask_surprise_down(struct pci_dev *pdev)
3377 {
3378  struct pci_dev *bridge = pdev->bus->self;
3379  int cap;
3380  u32 mask;
3381 
3382  if (bridge == NULL)
3383  return;
3384 
3386  if (cap) {
3387  /* a sram parity error can cause a surprise link
3388  * down; since we expect and can recover from sram
3389  * parity errors, mask surprise link down events */
3390  pci_read_config_dword(bridge, cap + PCI_ERR_UNCOR_MASK, &mask);
3391  mask |= 0x20;
3392  pci_write_config_dword(bridge, cap + PCI_ERR_UNCOR_MASK, mask);
3393  }
3394 }
3395 
3396 #ifdef CONFIG_PM
3397 static int myri10ge_suspend(struct pci_dev *pdev, pm_message_t state)
3398 {
3399  struct myri10ge_priv *mgp;
3400  struct net_device *netdev;
3401 
3402  mgp = pci_get_drvdata(pdev);
3403  if (mgp == NULL)
3404  return -EINVAL;
3405  netdev = mgp->dev;
3406 
3407  netif_device_detach(netdev);
3408  if (netif_running(netdev)) {
3409  netdev_info(netdev, "closing\n");
3410  rtnl_lock();
3411  myri10ge_close(netdev);
3412  rtnl_unlock();
3413  }
3414  myri10ge_dummy_rdma(mgp, 0);
3415  pci_save_state(pdev);
3416  pci_disable_device(pdev);
3417 
3418  return pci_set_power_state(pdev, pci_choose_state(pdev, state));
3419 }
3420 
3421 static int myri10ge_resume(struct pci_dev *pdev)
3422 {
3423  struct myri10ge_priv *mgp;
3424  struct net_device *netdev;
3425  int status;
3426  u16 vendor;
3427 
3428  mgp = pci_get_drvdata(pdev);
3429  if (mgp == NULL)
3430  return -EINVAL;
3431  netdev = mgp->dev;
3432  pci_set_power_state(pdev, 0); /* zeros conf space as a side effect */
3433  msleep(5); /* give card time to respond */
3434  pci_read_config_word(mgp->pdev, PCI_VENDOR_ID, &vendor);
3435  if (vendor == 0xffff) {
3436  netdev_err(mgp->dev, "device disappeared!\n");
3437  return -EIO;
3438  }
3439 
3440  pci_restore_state(pdev);
3441 
3442  status = pci_enable_device(pdev);
3443  if (status) {
3444  dev_err(&pdev->dev, "failed to enable device\n");
3445  return status;
3446  }
3447 
3448  pci_set_master(pdev);
3449 
3450  myri10ge_reset(mgp);
3451  myri10ge_dummy_rdma(mgp, 1);
3452 
3453  /* Save configuration space to be restored if the
3454  * nic resets due to a parity error */
3455  pci_save_state(pdev);
3456 
3457  if (netif_running(netdev)) {
3458  rtnl_lock();
3459  status = myri10ge_open(netdev);
3460  rtnl_unlock();
3461  if (status != 0)
3462  goto abort_with_enabled;
3463 
3464  }
3465  netif_device_attach(netdev);
3466 
3467  return 0;
3468 
3469 abort_with_enabled:
3470  pci_disable_device(pdev);
3471  return -EIO;
3472 
3473 }
3474 #endif /* CONFIG_PM */
3475 
3476 static u32 myri10ge_read_reboot(struct myri10ge_priv *mgp)
3477 {
3478  struct pci_dev *pdev = mgp->pdev;
3479  int vs = mgp->vendor_specific_offset;
3480  u32 reboot;
3481 
3482  /*enter read32 mode */
3483  pci_write_config_byte(pdev, vs + 0x10, 0x3);
3484 
3485  /*read REBOOT_STATUS (0xfffffff0) */
3486  pci_write_config_dword(pdev, vs + 0x18, 0xfffffff0);
3487  pci_read_config_dword(pdev, vs + 0x14, &reboot);
3488  return reboot;
3489 }
3490 
3491 static void
3492 myri10ge_check_slice(struct myri10ge_slice_state *ss, int *reset_needed,
3493  int *busy_slice_cnt, u32 rx_pause_cnt)
3494 {
3495  struct myri10ge_priv *mgp = ss->mgp;
3496  int slice = ss - mgp->ss;
3497 
3498  if (ss->tx.req != ss->tx.done &&
3499  ss->tx.done == ss->watchdog_tx_done &&
3500  ss->watchdog_tx_req != ss->watchdog_tx_done) {
3501  /* nic seems like it might be stuck.. */
3502  if (rx_pause_cnt != mgp->watchdog_pause) {
3503  if (net_ratelimit())
3504  netdev_warn(mgp->dev, "slice %d: TX paused, "
3505  "check link partner\n", slice);
3506  } else {
3507  netdev_warn(mgp->dev,
3508  "slice %d: TX stuck %d %d %d %d %d %d\n",
3509  slice, ss->tx.queue_active, ss->tx.req,
3510  ss->tx.done, ss->tx.pkt_start,
3511  ss->tx.pkt_done,
3512  (int)ntohl(mgp->ss[slice].fw_stats->
3513  send_done_count));
3514  *reset_needed = 1;
3515  ss->stuck = 1;
3516  }
3517  }
3518  if (ss->watchdog_tx_done != ss->tx.done ||
3519  ss->watchdog_rx_done != ss->rx_done.cnt) {
3520  *busy_slice_cnt += 1;
3521  }
3522  ss->watchdog_tx_done = ss->tx.done;
3523  ss->watchdog_tx_req = ss->tx.req;
3524  ss->watchdog_rx_done = ss->rx_done.cnt;
3525 }
3526 
3527 /*
3528  * This watchdog is used to check whether the board has suffered
3529  * from a parity error and needs to be recovered.
3530  */
3531 static void myri10ge_watchdog(struct work_struct *work)
3532 {
3533  struct myri10ge_priv *mgp =
3534  container_of(work, struct myri10ge_priv, watchdog_work);
3535  struct myri10ge_slice_state *ss;
3536  u32 reboot, rx_pause_cnt;
3537  int status, rebooted;
3538  int i;
3539  int reset_needed = 0;
3540  int busy_slice_cnt = 0;
3541  u16 cmd, vendor;
3542 
3543  mgp->watchdog_resets++;
3544  pci_read_config_word(mgp->pdev, PCI_COMMAND, &cmd);
3545  rebooted = 0;
3546  if ((cmd & PCI_COMMAND_MASTER) == 0) {
3547  /* Bus master DMA disabled? Check to see
3548  * if the card rebooted due to a parity error
3549  * For now, just report it */
3550  reboot = myri10ge_read_reboot(mgp);
3551  netdev_err(mgp->dev, "NIC rebooted (0x%x),%s resetting\n",
3552  reboot, myri10ge_reset_recover ? "" : " not");
3553  if (myri10ge_reset_recover == 0)
3554  return;
3555  rtnl_lock();
3556  mgp->rebooted = 1;
3557  rebooted = 1;
3558  myri10ge_close(mgp->dev);
3559  myri10ge_reset_recover--;
3560  mgp->rebooted = 0;
3561  /*
3562  * A rebooted nic will come back with config space as
3563  * it was after power was applied to PCIe bus.
3564  * Attempt to restore config space which was saved
3565  * when the driver was loaded, or the last time the
3566  * nic was resumed from power saving mode.
3567  */
3568  pci_restore_state(mgp->pdev);
3569 
3570  /* save state again for accounting reasons */
3571  pci_save_state(mgp->pdev);
3572 
3573  } else {
3574  /* if we get back -1's from our slot, perhaps somebody
3575  * powered off our card. Don't try to reset it in
3576  * this case */
3577  if (cmd == 0xffff) {
3578  pci_read_config_word(mgp->pdev, PCI_VENDOR_ID, &vendor);
3579  if (vendor == 0xffff) {
3580  netdev_err(mgp->dev, "device disappeared!\n");
3581  return;
3582  }
3583  }
3584  /* Perhaps it is a software error. See if stuck slice
3585  * has recovered, reset if not */
3586  rx_pause_cnt = ntohl(mgp->ss[0].fw_stats->dropped_pause);
3587  for (i = 0; i < mgp->num_slices; i++) {
3588  ss = mgp->ss;
3589  if (ss->stuck) {
3590  myri10ge_check_slice(ss, &reset_needed,
3591  &busy_slice_cnt,
3592  rx_pause_cnt);
3593  ss->stuck = 0;
3594  }
3595  }
3596  if (!reset_needed) {
3597  netdev_dbg(mgp->dev, "not resetting\n");
3598  return;
3599  }
3600 
3601  netdev_err(mgp->dev, "device timeout, resetting\n");
3602  }
3603 
3604  if (!rebooted) {
3605  rtnl_lock();
3606  myri10ge_close(mgp->dev);
3607  }
3608  status = myri10ge_load_firmware(mgp, 1);
3609  if (status != 0)
3610  netdev_err(mgp->dev, "failed to load firmware\n");
3611  else
3612  myri10ge_open(mgp->dev);
3613  rtnl_unlock();
3614 }
3615 
3616 /*
3617  * We use our own timer routine rather than relying upon
3618  * netdev->tx_timeout because we have a very large hardware transmit
3619  * queue. Due to the large queue, the netdev->tx_timeout function
3620  * cannot detect a NIC with a parity error in a timely fashion if the
3621  * NIC is lightly loaded.
3622  */
3623 static void myri10ge_watchdog_timer(unsigned long arg)
3624 {
3625  struct myri10ge_priv *mgp;
3626  struct myri10ge_slice_state *ss;
3627  int i, reset_needed, busy_slice_cnt;
3628  u32 rx_pause_cnt;
3629  u16 cmd;
3630 
3631  mgp = (struct myri10ge_priv *)arg;
3632 
3633  rx_pause_cnt = ntohl(mgp->ss[0].fw_stats->dropped_pause);
3634  busy_slice_cnt = 0;
3635  for (i = 0, reset_needed = 0;
3636  i < mgp->num_slices && reset_needed == 0; ++i) {
3637 
3638  ss = &mgp->ss[i];
3639  if (ss->rx_small.watchdog_needed) {
3640  myri10ge_alloc_rx_pages(mgp, &ss->rx_small,
3641  mgp->small_bytes + MXGEFW_PAD,
3642  1);
3643  if (ss->rx_small.fill_cnt - ss->rx_small.cnt >=
3644  myri10ge_fill_thresh)
3645  ss->rx_small.watchdog_needed = 0;
3646  }
3647  if (ss->rx_big.watchdog_needed) {
3648  myri10ge_alloc_rx_pages(mgp, &ss->rx_big,
3649  mgp->big_bytes, 1);
3650  if (ss->rx_big.fill_cnt - ss->rx_big.cnt >=
3651  myri10ge_fill_thresh)
3652  ss->rx_big.watchdog_needed = 0;
3653  }
3654  myri10ge_check_slice(ss, &reset_needed, &busy_slice_cnt,
3655  rx_pause_cnt);
3656  }
3657  /* if we've sent or received no traffic, poll the NIC to
3658  * ensure it is still there. Otherwise, we risk not noticing
3659  * an error in a timely fashion */
3660  if (busy_slice_cnt == 0) {
3661  pci_read_config_word(mgp->pdev, PCI_COMMAND, &cmd);
3662  if ((cmd & PCI_COMMAND_MASTER) == 0) {
3663  reset_needed = 1;
3664  }
3665  }
3666  mgp->watchdog_pause = rx_pause_cnt;
3667 
3668  if (reset_needed) {
3670  } else {
3671  /* rearm timer */
3672  mod_timer(&mgp->watchdog_timer,
3673  jiffies + myri10ge_watchdog_timeout * HZ);
3674  }
3675 }
3676 
3677 static void myri10ge_free_slices(struct myri10ge_priv *mgp)
3678 {
3679  struct myri10ge_slice_state *ss;
3680  struct pci_dev *pdev = mgp->pdev;
3681  size_t bytes;
3682  int i;
3683 
3684  if (mgp->ss == NULL)
3685  return;
3686 
3687  for (i = 0; i < mgp->num_slices; i++) {
3688  ss = &mgp->ss[i];
3689  if (ss->rx_done.entry != NULL) {
3690  bytes = mgp->max_intr_slots *
3691  sizeof(*ss->rx_done.entry);
3692  dma_free_coherent(&pdev->dev, bytes,
3693  ss->rx_done.entry, ss->rx_done.bus);
3694  ss->rx_done.entry = NULL;
3695  }
3696  if (ss->fw_stats != NULL) {
3697  bytes = sizeof(*ss->fw_stats);
3698  dma_free_coherent(&pdev->dev, bytes,
3699  ss->fw_stats, ss->fw_stats_bus);
3700  ss->fw_stats = NULL;
3701  }
3702  netif_napi_del(&ss->napi);
3703  }
3704  kfree(mgp->ss);
3705  mgp->ss = NULL;
3706 }
3707 
3708 static int myri10ge_alloc_slices(struct myri10ge_priv *mgp)
3709 {
3710  struct myri10ge_slice_state *ss;
3711  struct pci_dev *pdev = mgp->pdev;
3712  size_t bytes;
3713  int i;
3714 
3715  bytes = sizeof(*mgp->ss) * mgp->num_slices;
3716  mgp->ss = kzalloc(bytes, GFP_KERNEL);
3717  if (mgp->ss == NULL) {
3718  return -ENOMEM;
3719  }
3720 
3721  for (i = 0; i < mgp->num_slices; i++) {
3722  ss = &mgp->ss[i];
3723  bytes = mgp->max_intr_slots * sizeof(*ss->rx_done.entry);
3724  ss->rx_done.entry = dma_alloc_coherent(&pdev->dev, bytes,
3725  &ss->rx_done.bus,
3726  GFP_KERNEL);
3727  if (ss->rx_done.entry == NULL)
3728  goto abort;
3729  memset(ss->rx_done.entry, 0, bytes);
3730  bytes = sizeof(*ss->fw_stats);
3731  ss->fw_stats = dma_alloc_coherent(&pdev->dev, bytes,
3732  &ss->fw_stats_bus,
3733  GFP_KERNEL);
3734  if (ss->fw_stats == NULL)
3735  goto abort;
3736  ss->mgp = mgp;
3737  ss->dev = mgp->dev;
3738  netif_napi_add(ss->dev, &ss->napi, myri10ge_poll,
3739  myri10ge_napi_weight);
3740  }
3741  return 0;
3742 abort:
3743  myri10ge_free_slices(mgp);
3744  return -ENOMEM;
3745 }
3746 
3747 /*
3748  * This function determines the number of slices supported.
3749  * The number slices is the minimum of the number of CPUS,
3750  * the number of MSI-X irqs supported, the number of slices
3751  * supported by the firmware
3752  */
3753 static void myri10ge_probe_slices(struct myri10ge_priv *mgp)
3754 {
3755  struct myri10ge_cmd cmd;
3756  struct pci_dev *pdev = mgp->pdev;
3757  char *old_fw;
3758  bool old_allocated;
3759  int i, status, ncpus, msix_cap;
3760 
3761  mgp->num_slices = 1;
3762  msix_cap = pci_find_capability(pdev, PCI_CAP_ID_MSIX);
3764 
3765  if (myri10ge_max_slices == 1 || msix_cap == 0 ||
3766  (myri10ge_max_slices == -1 && ncpus < 2))
3767  return;
3768 
3769  /* try to load the slice aware rss firmware */
3770  old_fw = mgp->fw_name;
3771  old_allocated = mgp->fw_name_allocated;
3772  /* don't free old_fw if we override it. */
3773  mgp->fw_name_allocated = false;
3774 
3775  if (myri10ge_fw_name != NULL) {
3776  dev_info(&mgp->pdev->dev, "overriding rss firmware to %s\n",
3777  myri10ge_fw_name);
3778  set_fw_name(mgp, myri10ge_fw_name, false);
3779  } else if (old_fw == myri10ge_fw_aligned)
3780  set_fw_name(mgp, myri10ge_fw_rss_aligned, false);
3781  else
3782  set_fw_name(mgp, myri10ge_fw_rss_unaligned, false);
3783  status = myri10ge_load_firmware(mgp, 0);
3784  if (status != 0) {
3785  dev_info(&pdev->dev, "Rss firmware not found\n");
3786  if (old_allocated)
3787  kfree(old_fw);
3788  return;
3789  }
3790 
3791  /* hit the board with a reset to ensure it is alive */
3792  memset(&cmd, 0, sizeof(cmd));
3793  status = myri10ge_send_cmd(mgp, MXGEFW_CMD_RESET, &cmd, 0);
3794  if (status != 0) {
3795  dev_err(&mgp->pdev->dev, "failed reset\n");
3796  goto abort_with_fw;
3797  }
3798 
3799  mgp->max_intr_slots = cmd.data0 / sizeof(struct mcp_slot);
3800 
3801  /* tell it the size of the interrupt queues */
3802  cmd.data0 = mgp->max_intr_slots * sizeof(struct mcp_slot);
3803  status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd, 0);
3804  if (status != 0) {
3805  dev_err(&mgp->pdev->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n");
3806  goto abort_with_fw;
3807  }
3808 
3809  /* ask the maximum number of slices it supports */
3810  status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd, 0);
3811  if (status != 0)
3812  goto abort_with_fw;
3813  else
3814  mgp->num_slices = cmd.data0;
3815 
3816  /* Only allow multiple slices if MSI-X is usable */
3817  if (!myri10ge_msi) {
3818  goto abort_with_fw;
3819  }
3820 
3821  /* if the admin did not specify a limit to how many
3822  * slices we should use, cap it automatically to the
3823  * number of CPUs currently online */
3824  if (myri10ge_max_slices == -1)
3825  myri10ge_max_slices = ncpus;
3826 
3827  if (mgp->num_slices > myri10ge_max_slices)
3828  mgp->num_slices = myri10ge_max_slices;
3829 
3830  /* Now try to allocate as many MSI-X vectors as we have
3831  * slices. We give up on MSI-X if we can only get a single
3832  * vector. */
3833 
3834  mgp->msix_vectors = kcalloc(mgp->num_slices, sizeof(*mgp->msix_vectors),
3835  GFP_KERNEL);
3836  if (mgp->msix_vectors == NULL)
3837  goto disable_msix;
3838  for (i = 0; i < mgp->num_slices; i++) {
3839  mgp->msix_vectors[i].entry = i;
3840  }
3841 
3842  while (mgp->num_slices > 1) {
3843  /* make sure it is a power of two */
3844  while (!is_power_of_2(mgp->num_slices))
3845  mgp->num_slices--;
3846  if (mgp->num_slices == 1)
3847  goto disable_msix;
3848  status = pci_enable_msix(pdev, mgp->msix_vectors,
3849  mgp->num_slices);
3850  if (status == 0) {
3851  pci_disable_msix(pdev);
3852  if (old_allocated)
3853  kfree(old_fw);
3854  return;
3855  }
3856  if (status > 0)
3857  mgp->num_slices = status;
3858  else
3859  goto disable_msix;
3860  }
3861 
3862 disable_msix:
3863  if (mgp->msix_vectors != NULL) {
3864  kfree(mgp->msix_vectors);
3865  mgp->msix_vectors = NULL;
3866  }
3867 
3868 abort_with_fw:
3869  mgp->num_slices = 1;
3870  set_fw_name(mgp, old_fw, old_allocated);
3871  myri10ge_load_firmware(mgp, 0);
3872 }
3873 
3874 static const struct net_device_ops myri10ge_netdev_ops = {
3875  .ndo_open = myri10ge_open,
3876  .ndo_stop = myri10ge_close,
3877  .ndo_start_xmit = myri10ge_xmit,
3878  .ndo_get_stats64 = myri10ge_get_stats,
3879  .ndo_validate_addr = eth_validate_addr,
3880  .ndo_change_mtu = myri10ge_change_mtu,
3881  .ndo_fix_features = myri10ge_fix_features,
3882  .ndo_set_rx_mode = myri10ge_set_multicast_list,
3883  .ndo_set_mac_address = myri10ge_set_mac_address,
3884 };
3885 
3886 static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
3887 {
3888  struct net_device *netdev;
3889  struct myri10ge_priv *mgp;
3890  struct device *dev = &pdev->dev;
3891  int i;
3892  int status = -ENXIO;
3893  int dac_enabled;
3894  unsigned hdr_offset, ss_offset;
3895  static int board_number;
3896 
3897  netdev = alloc_etherdev_mq(sizeof(*mgp), MYRI10GE_MAX_SLICES);
3898  if (netdev == NULL)
3899  return -ENOMEM;
3900 
3901  SET_NETDEV_DEV(netdev, &pdev->dev);
3902 
3903  mgp = netdev_priv(netdev);
3904  mgp->dev = netdev;
3905  mgp->pdev = pdev;
3906  mgp->pause = myri10ge_flow_control;
3907  mgp->intr_coal_delay = myri10ge_intr_coal_delay;
3908  mgp->msg_enable = netif_msg_init(myri10ge_debug, MYRI10GE_MSG_DEFAULT);
3909  mgp->board_number = board_number;
3911 
3912  if (pci_enable_device(pdev)) {
3913  dev_err(&pdev->dev, "pci_enable_device call failed\n");
3914  status = -ENODEV;
3915  goto abort_with_netdev;
3916  }
3917 
3918  /* Find the vendor-specific cap so we can check
3919  * the reboot register later on */
3922 
3923  /* Set our max read request to 4KB */
3924  status = pcie_set_readrq(pdev, 4096);
3925  if (status != 0) {
3926  dev_err(&pdev->dev, "Error %d writing PCI_EXP_DEVCTL\n",
3927  status);
3928  goto abort_with_enabled;
3929  }
3930 
3931  myri10ge_mask_surprise_down(pdev);
3932  pci_set_master(pdev);
3933  dac_enabled = 1;
3934  status = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
3935  if (status != 0) {
3936  dac_enabled = 0;
3937  dev_err(&pdev->dev,
3938  "64-bit pci address mask was refused, "
3939  "trying 32-bit\n");
3940  status = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
3941  }
3942  if (status != 0) {
3943  dev_err(&pdev->dev, "Error %d setting DMA mask\n", status);
3944  goto abort_with_enabled;
3945  }
3946  (void)pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
3947  mgp->cmd = dma_alloc_coherent(&pdev->dev, sizeof(*mgp->cmd),
3948  &mgp->cmd_bus, GFP_KERNEL);
3949  if (mgp->cmd == NULL)
3950  goto abort_with_enabled;
3951 
3952  mgp->board_span = pci_resource_len(pdev, 0);
3953  mgp->iomem_base = pci_resource_start(pdev, 0);
3954  mgp->mtrr = -1;
3955  mgp->wc_enabled = 0;
3956 #ifdef CONFIG_MTRR
3957  mgp->mtrr = mtrr_add(mgp->iomem_base, mgp->board_span,
3958  MTRR_TYPE_WRCOMB, 1);
3959  if (mgp->mtrr >= 0)
3960  mgp->wc_enabled = 1;
3961 #endif
3962  mgp->sram = ioremap_wc(mgp->iomem_base, mgp->board_span);
3963  if (mgp->sram == NULL) {
3964  dev_err(&pdev->dev, "ioremap failed for %ld bytes at 0x%lx\n",
3965  mgp->board_span, mgp->iomem_base);
3966  status = -ENXIO;
3967  goto abort_with_mtrr;
3968  }
3969  hdr_offset =
3970  ntohl(__raw_readl(mgp->sram + MCP_HEADER_PTR_OFFSET)) & 0xffffc;
3971  ss_offset = hdr_offset + offsetof(struct mcp_gen_header, string_specs);
3972  mgp->sram_size = ntohl(__raw_readl(mgp->sram + ss_offset));
3973  if (mgp->sram_size > mgp->board_span ||
3974  mgp->sram_size <= MYRI10GE_FW_OFFSET) {
3975  dev_err(&pdev->dev,
3976  "invalid sram_size %dB or board span %ldB\n",
3977  mgp->sram_size, mgp->board_span);
3978  goto abort_with_ioremap;
3979  }
3983  status = myri10ge_read_mac_addr(mgp);
3984  if (status)
3985  goto abort_with_ioremap;
3986 
3987  for (i = 0; i < ETH_ALEN; i++)
3988  netdev->dev_addr[i] = mgp->mac_addr[i];
3989 
3990  myri10ge_select_firmware(mgp);
3991 
3992  status = myri10ge_load_firmware(mgp, 1);
3993  if (status != 0) {
3994  dev_err(&pdev->dev, "failed to load firmware\n");
3995  goto abort_with_ioremap;
3996  }
3997  myri10ge_probe_slices(mgp);
3998  status = myri10ge_alloc_slices(mgp);
3999  if (status != 0) {
4000  dev_err(&pdev->dev, "failed to alloc slice state\n");
4001  goto abort_with_firmware;
4002  }
4004  netif_set_real_num_rx_queues(netdev, mgp->num_slices);
4005  status = myri10ge_reset(mgp);
4006  if (status != 0) {
4007  dev_err(&pdev->dev, "failed reset\n");
4008  goto abort_with_slices;
4009  }
4010 #ifdef CONFIG_MYRI10GE_DCA
4011  myri10ge_setup_dca(mgp);
4012 #endif
4013  pci_set_drvdata(pdev, mgp);
4014  if ((myri10ge_initial_mtu + ETH_HLEN) > MYRI10GE_MAX_ETHER_MTU)
4015  myri10ge_initial_mtu = MYRI10GE_MAX_ETHER_MTU - ETH_HLEN;
4016  if ((myri10ge_initial_mtu + ETH_HLEN) < 68)
4017  myri10ge_initial_mtu = 68;
4018 
4019  netdev->netdev_ops = &myri10ge_netdev_ops;
4020  netdev->mtu = myri10ge_initial_mtu;
4021  netdev->hw_features = mgp->features | NETIF_F_LRO | NETIF_F_RXCSUM;
4022  netdev->features = netdev->hw_features;
4023 
4024  if (dac_enabled)
4025  netdev->features |= NETIF_F_HIGHDMA;
4026 
4027  netdev->vlan_features |= mgp->features;
4028  if (mgp->fw_ver_tiny < 37)
4029  netdev->vlan_features &= ~NETIF_F_TSO6;
4030  if (mgp->fw_ver_tiny < 32)
4031  netdev->vlan_features &= ~NETIF_F_TSO;
4032 
4033  /* make sure we can get an irq, and that MSI can be
4034  * setup (if available). */
4035  status = myri10ge_request_irq(mgp);
4036  if (status != 0)
4037  goto abort_with_firmware;
4038  myri10ge_free_irq(mgp);
4039 
4040  /* Save configuration space to be restored if the
4041  * nic resets due to a parity error */
4042  pci_save_state(pdev);
4043 
4044  /* Setup the watchdog timer */
4045  setup_timer(&mgp->watchdog_timer, myri10ge_watchdog_timer,
4046  (unsigned long)mgp);
4047 
4048  SET_ETHTOOL_OPS(netdev, &myri10ge_ethtool_ops);
4049  INIT_WORK(&mgp->watchdog_work, myri10ge_watchdog);
4050  status = register_netdev(netdev);
4051  if (status != 0) {
4052  dev_err(&pdev->dev, "register_netdev failed: %d\n", status);
4053  goto abort_with_state;
4054  }
4055  if (mgp->msix_enabled)
4056  dev_info(dev, "%d MSI-X IRQs, tx bndry %d, fw %s, WC %s\n",
4057  mgp->num_slices, mgp->tx_boundary, mgp->fw_name,
4058  (mgp->wc_enabled ? "Enabled" : "Disabled"));
4059  else
4060  dev_info(dev, "%s IRQ %d, tx bndry %d, fw %s, WC %s\n",
4061  mgp->msi_enabled ? "MSI" : "xPIC",
4062  pdev->irq, mgp->tx_boundary, mgp->fw_name,
4063  (mgp->wc_enabled ? "Enabled" : "Disabled"));
4064 
4065  board_number++;
4066  return 0;
4067 
4068 abort_with_state:
4069  pci_restore_state(pdev);
4070 
4071 abort_with_slices:
4072  myri10ge_free_slices(mgp);
4073 
4074 abort_with_firmware:
4075  myri10ge_dummy_rdma(mgp, 0);
4076 
4077 abort_with_ioremap:
4078  if (mgp->mac_addr_string != NULL)
4079  dev_err(&pdev->dev,
4080  "myri10ge_probe() failed: MAC=%s, SN=%ld\n",
4081  mgp->mac_addr_string, mgp->serial_number);
4082  iounmap(mgp->sram);
4083 
4084 abort_with_mtrr:
4085 #ifdef CONFIG_MTRR
4086  if (mgp->mtrr >= 0)
4087  mtrr_del(mgp->mtrr, mgp->iomem_base, mgp->board_span);
4088 #endif
4089  dma_free_coherent(&pdev->dev, sizeof(*mgp->cmd),
4090  mgp->cmd, mgp->cmd_bus);
4091 
4092 abort_with_enabled:
4093  pci_disable_device(pdev);
4094 
4095 abort_with_netdev:
4096  set_fw_name(mgp, NULL, false);
4097  free_netdev(netdev);
4098  return status;
4099 }
4100 
4101 /*
4102  * myri10ge_remove
4103  *
4104  * Does what is necessary to shutdown one Myrinet device. Called
4105  * once for each Myrinet card by the kernel when a module is
4106  * unloaded.
4107  */
4108 static void myri10ge_remove(struct pci_dev *pdev)
4109 {
4110  struct myri10ge_priv *mgp;
4111  struct net_device *netdev;
4112 
4113  mgp = pci_get_drvdata(pdev);
4114  if (mgp == NULL)
4115  return;
4116 
4118  netdev = mgp->dev;
4119  unregister_netdev(netdev);
4120 
4121 #ifdef CONFIG_MYRI10GE_DCA
4122  myri10ge_teardown_dca(mgp);
4123 #endif
4124  myri10ge_dummy_rdma(mgp, 0);
4125 
4126  /* avoid a memory leak */
4127  pci_restore_state(pdev);
4128 
4129  iounmap(mgp->sram);
4130 
4131 #ifdef CONFIG_MTRR
4132  if (mgp->mtrr >= 0)
4133  mtrr_del(mgp->mtrr, mgp->iomem_base, mgp->board_span);
4134 #endif
4135  myri10ge_free_slices(mgp);
4136  if (mgp->msix_vectors != NULL)
4137  kfree(mgp->msix_vectors);
4138  dma_free_coherent(&pdev->dev, sizeof(*mgp->cmd),
4139  mgp->cmd, mgp->cmd_bus);
4140 
4141  set_fw_name(mgp, NULL, false);
4142  free_netdev(netdev);
4143  pci_disable_device(pdev);
4144  pci_set_drvdata(pdev, NULL);
4145 }
4147 #define PCI_DEVICE_ID_MYRICOM_MYRI10GE_Z8E 0x0008
4148 #define PCI_DEVICE_ID_MYRICOM_MYRI10GE_Z8E_9 0x0009
4149 
4150 static DEFINE_PCI_DEVICE_TABLE(myri10ge_pci_tbl) = {
4152  {PCI_DEVICE
4154  {0},
4155 };
4156 
4157 MODULE_DEVICE_TABLE(pci, myri10ge_pci_tbl);
4158 
4159 static struct pci_driver myri10ge_driver = {
4160  .name = "myri10ge",
4161  .probe = myri10ge_probe,
4162  .remove = myri10ge_remove,
4163  .id_table = myri10ge_pci_tbl,
4164 #ifdef CONFIG_PM
4165  .suspend = myri10ge_suspend,
4166  .resume = myri10ge_resume,
4167 #endif
4168 };
4169 
4170 #ifdef CONFIG_MYRI10GE_DCA
4171 static int
4172 myri10ge_notify_dca(struct notifier_block *nb, unsigned long event, void *p)
4173 {
4174  int err = driver_for_each_device(&myri10ge_driver.driver,
4175  NULL, &event,
4176  myri10ge_notify_dca_device);
4177 
4178  if (err)
4179  return NOTIFY_BAD;
4180  return NOTIFY_DONE;
4181 }
4182 
4183 static struct notifier_block myri10ge_dca_notifier = {
4184  .notifier_call = myri10ge_notify_dca,
4185  .next = NULL,
4186  .priority = 0,
4187 };
4188 #endif /* CONFIG_MYRI10GE_DCA */
4189 
4190 static __init int myri10ge_init_module(void)
4191 {
4192  pr_info("Version %s\n", MYRI10GE_VERSION_STR);
4193 
4194  if (myri10ge_rss_hash > MXGEFW_RSS_HASH_TYPE_MAX) {
4195  pr_err("Illegal rssh hash type %d, defaulting to source port\n",
4196  myri10ge_rss_hash);
4197  myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_PORT;
4198  }
4199 #ifdef CONFIG_MYRI10GE_DCA
4200  dca_register_notify(&myri10ge_dca_notifier);
4201 #endif
4202  if (myri10ge_max_slices > MYRI10GE_MAX_SLICES)
4203  myri10ge_max_slices = MYRI10GE_MAX_SLICES;
4204 
4205  return pci_register_driver(&myri10ge_driver);
4206 }
4207 
4208 module_init(myri10ge_init_module);
4209 
4210 static __exit void myri10ge_cleanup_module(void)
4211 {
4212 #ifdef CONFIG_MYRI10GE_DCA
4213  dca_unregister_notify(&myri10ge_dca_notifier);
4214 #endif
4215  pci_unregister_driver(&myri10ge_driver);
4216 }
4217 
4218 module_exit(myri10ge_cleanup_module);