Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
cxgb4vf_main.c
Go to the documentation of this file.
1 /*
2  * This file is part of the Chelsio T4 PCI-E SR-IOV Virtual Function Ethernet
3  * driver for Linux.
4  *
5  * Copyright (c) 2009-2010 Chelsio Communications, Inc. All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses. You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  * Redistribution and use in source and binary forms, with or
14  * without modification, are permitted provided that the following
15  * conditions are met:
16  *
17  * - Redistributions of source code must retain the above
18  * copyright notice, this list of conditions and the following
19  * disclaimer.
20  *
21  * - Redistributions in binary form must reproduce the above
22  * copyright notice, this list of conditions and the following
23  * disclaimer in the documentation and/or other materials
24  * provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  */
35 
36 #include <linux/module.h>
37 #include <linux/moduleparam.h>
38 #include <linux/init.h>
39 #include <linux/pci.h>
40 #include <linux/dma-mapping.h>
41 #include <linux/netdevice.h>
42 #include <linux/etherdevice.h>
43 #include <linux/debugfs.h>
44 #include <linux/ethtool.h>
45 
46 #include "t4vf_common.h"
47 #include "t4vf_defs.h"
48 
49 #include "../cxgb4/t4_regs.h"
50 #include "../cxgb4/t4_msg.h"
51 
52 /*
53  * Generic information about the driver.
54  */
55 #define DRV_VERSION "1.0.0"
56 #define DRV_DESC "Chelsio T4 Virtual Function (VF) Network Driver"
57 
58 /*
59  * Module Parameters.
60  * ==================
61  */
62 
63 /*
64  * Default ethtool "message level" for adapters.
65  */
66 #define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \
67  NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\
68  NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR)
69 
70 static int dflt_msg_enable = DFLT_MSG_ENABLE;
71 
72 module_param(dflt_msg_enable, int, 0644);
73 MODULE_PARM_DESC(dflt_msg_enable,
74  "default adapter ethtool message level bitmap");
75 
76 /*
77  * The driver uses the best interrupt scheme available on a platform in the
78  * order MSI-X then MSI. This parameter determines which of these schemes the
79  * driver may consider as follows:
80  *
81  * msi = 2: choose from among MSI-X and MSI
82  * msi = 1: only consider MSI interrupts
83  *
84  * Note that unlike the Physical Function driver, this Virtual Function driver
85  * does _not_ support legacy INTx interrupts (this limitation is mandated by
86  * the PCI-E SR-IOV standard).
87  */
88 #define MSI_MSIX 2
89 #define MSI_MSI 1
90 #define MSI_DEFAULT MSI_MSIX
91 
92 static int msi = MSI_DEFAULT;
93 
94 module_param(msi, int, 0644);
95 MODULE_PARM_DESC(msi, "whether to use MSI-X or MSI");
96 
97 /*
98  * Fundamental constants.
99  * ======================
100  */
101 
102 enum {
105  MAX_RX_BUFFERS = 16384,
106 
110 
111  /*
112  * For purposes of manipulating the Free List size we need to
113  * recognize that Free Lists are actually Egress Queues (the host
114  * produces free buffers which the hardware consumes), Egress Queues
115  * indices are all in units of Egress Context Units bytes, and free
116  * list entries are 64-bit PCI DMA addresses. And since the state of
117  * the Producer Index == the Consumer Index implies an EMPTY list, we
118  * always have at least one Egress Unit's worth of Free List entries
119  * unused. See sge.c for more details ...
120  */
124 };
125 
126 /*
127  * Global driver state.
128  * ====================
129  */
130 
131 static struct dentry *cxgb4vf_debugfs_root;
132 
133 /*
134  * OS "Callback" functions.
135  * ========================
136  */
137 
138 /*
139  * The link status has changed on the indicated "port" (Virtual Interface).
140  */
141 void t4vf_os_link_changed(struct adapter *adapter, int pidx, int link_ok)
142 {
143  struct net_device *dev = adapter->port[pidx];
144 
145  /*
146  * If the port is disabled or the current recorded "link up"
147  * status matches the new status, just return.
148  */
149  if (!netif_running(dev) || link_ok == netif_carrier_ok(dev))
150  return;
151 
152  /*
153  * Tell the OS that the link status has changed and print a short
154  * informative message on the console about the event.
155  */
156  if (link_ok) {
157  const char *s;
158  const char *fc;
159  const struct port_info *pi = netdev_priv(dev);
160 
161  netif_carrier_on(dev);
162 
163  switch (pi->link_cfg.speed) {
164  case SPEED_10000:
165  s = "10Gbps";
166  break;
167 
168  case SPEED_1000:
169  s = "1000Mbps";
170  break;
171 
172  case SPEED_100:
173  s = "100Mbps";
174  break;
175 
176  default:
177  s = "unknown";
178  break;
179  }
180 
181  switch (pi->link_cfg.fc) {
182  case PAUSE_RX:
183  fc = "RX";
184  break;
185 
186  case PAUSE_TX:
187  fc = "TX";
188  break;
189 
190  case PAUSE_RX|PAUSE_TX:
191  fc = "RX/TX";
192  break;
193 
194  default:
195  fc = "no";
196  break;
197  }
198 
199  printk(KERN_INFO "%s: link up, %s, full-duplex, %s PAUSE\n",
200  dev->name, s, fc);
201  } else {
202  netif_carrier_off(dev);
203  printk(KERN_INFO "%s: link down\n", dev->name);
204  }
205 }
206 
207 /*
208  * Net device operations.
209  * ======================
210  */
211 
212 
213 
214 
215 /*
216  * Perform the MAC and PHY actions needed to enable a "port" (Virtual
217  * Interface).
218  */
219 static int link_start(struct net_device *dev)
220 {
221  int ret;
222  struct port_info *pi = netdev_priv(dev);
223 
224  /*
225  * We do not set address filters and promiscuity here, the stack does
226  * that step explicitly. Enable vlan accel.
227  */
228  ret = t4vf_set_rxmode(pi->adapter, pi->viid, dev->mtu, -1, -1, -1, 1,
229  true);
230  if (ret == 0) {
231  ret = t4vf_change_mac(pi->adapter, pi->viid,
232  pi->xact_addr_filt, dev->dev_addr, true);
233  if (ret >= 0) {
234  pi->xact_addr_filt = ret;
235  ret = 0;
236  }
237  }
238 
239  /*
240  * We don't need to actually "start the link" itself since the
241  * firmware will do that for us when the first Virtual Interface
242  * is enabled on a port.
243  */
244  if (ret == 0)
245  ret = t4vf_enable_vi(pi->adapter, pi->viid, true, true);
246  return ret;
247 }
248 
249 /*
250  * Name the MSI-X interrupts.
251  */
252 static void name_msix_vecs(struct adapter *adapter)
253 {
254  int namelen = sizeof(adapter->msix_info[0].desc) - 1;
255  int pidx;
256 
257  /*
258  * Firmware events.
259  */
260  snprintf(adapter->msix_info[MSIX_FW].desc, namelen,
261  "%s-FWeventq", adapter->name);
262  adapter->msix_info[MSIX_FW].desc[namelen] = 0;
263 
264  /*
265  * Ethernet queues.
266  */
267  for_each_port(adapter, pidx) {
268  struct net_device *dev = adapter->port[pidx];
269  const struct port_info *pi = netdev_priv(dev);
270  int qs, msi;
271 
272  for (qs = 0, msi = MSIX_IQFLINT; qs < pi->nqsets; qs++, msi++) {
273  snprintf(adapter->msix_info[msi].desc, namelen,
274  "%s-%d", dev->name, qs);
275  adapter->msix_info[msi].desc[namelen] = 0;
276  }
277  }
278 }
279 
280 /*
281  * Request all of our MSI-X resources.
282  */
283 static int request_msix_queue_irqs(struct adapter *adapter)
284 {
285  struct sge *s = &adapter->sge;
286  int rxq, msi, err;
287 
288  /*
289  * Firmware events.
290  */
291  err = request_irq(adapter->msix_info[MSIX_FW].vec, t4vf_sge_intr_msix,
292  0, adapter->msix_info[MSIX_FW].desc, &s->fw_evtq);
293  if (err)
294  return err;
295 
296  /*
297  * Ethernet queues.
298  */
299  msi = MSIX_IQFLINT;
300  for_each_ethrxq(s, rxq) {
301  err = request_irq(adapter->msix_info[msi].vec,
303  adapter->msix_info[msi].desc,
304  &s->ethrxq[rxq].rspq);
305  if (err)
306  goto err_free_irqs;
307  msi++;
308  }
309  return 0;
310 
311 err_free_irqs:
312  while (--rxq >= 0)
313  free_irq(adapter->msix_info[--msi].vec, &s->ethrxq[rxq].rspq);
314  free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq);
315  return err;
316 }
317 
318 /*
319  * Free our MSI-X resources.
320  */
321 static void free_msix_queue_irqs(struct adapter *adapter)
322 {
323  struct sge *s = &adapter->sge;
324  int rxq, msi;
325 
326  free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq);
327  msi = MSIX_IQFLINT;
328  for_each_ethrxq(s, rxq)
329  free_irq(adapter->msix_info[msi++].vec,
330  &s->ethrxq[rxq].rspq);
331 }
332 
333 /*
334  * Turn on NAPI and start up interrupts on a response queue.
335  */
336 static void qenable(struct sge_rspq *rspq)
337 {
338  napi_enable(&rspq->napi);
339 
340  /*
341  * 0-increment the Going To Sleep register to start the timer and
342  * enable interrupts.
343  */
344  t4_write_reg(rspq->adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
345  CIDXINC(0) |
346  SEINTARM(rspq->intr_params) |
347  INGRESSQID(rspq->cntxt_id));
348 }
349 
350 /*
351  * Enable NAPI scheduling and interrupt generation for all Receive Queues.
352  */
353 static void enable_rx(struct adapter *adapter)
354 {
355  int rxq;
356  struct sge *s = &adapter->sge;
357 
358  for_each_ethrxq(s, rxq)
359  qenable(&s->ethrxq[rxq].rspq);
360  qenable(&s->fw_evtq);
361 
362  /*
363  * The interrupt queue doesn't use NAPI so we do the 0-increment of
364  * its Going To Sleep register here to get it started.
365  */
366  if (adapter->flags & USING_MSI)
367  t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
368  CIDXINC(0) |
369  SEINTARM(s->intrq.intr_params) |
370  INGRESSQID(s->intrq.cntxt_id));
371 
372 }
373 
374 /*
375  * Wait until all NAPI handlers are descheduled.
376  */
377 static void quiesce_rx(struct adapter *adapter)
378 {
379  struct sge *s = &adapter->sge;
380  int rxq;
381 
382  for_each_ethrxq(s, rxq)
383  napi_disable(&s->ethrxq[rxq].rspq.napi);
384  napi_disable(&s->fw_evtq.napi);
385 }
386 
387 /*
388  * Response queue handler for the firmware event queue.
389  */
390 static int fwevtq_handler(struct sge_rspq *rspq, const __be64 *rsp,
391  const struct pkt_gl *gl)
392 {
393  /*
394  * Extract response opcode and get pointer to CPL message body.
395  */
396  struct adapter *adapter = rspq->adapter;
397  u8 opcode = ((const struct rss_header *)rsp)->opcode;
398  void *cpl = (void *)(rsp + 1);
399 
400  switch (opcode) {
401  case CPL_FW6_MSG: {
402  /*
403  * We've received an asynchronous message from the firmware.
404  */
405  const struct cpl_fw6_msg *fw_msg = cpl;
406  if (fw_msg->type == FW6_TYPE_CMD_RPL)
407  t4vf_handle_fw_rpl(adapter, fw_msg->data);
408  break;
409  }
410 
411  case CPL_SGE_EGR_UPDATE: {
412  /*
413  * We've received an Egress Queue Status Update message. We
414  * get these, if the SGE is configured to send these when the
415  * firmware passes certain points in processing our TX
416  * Ethernet Queue or if we make an explicit request for one.
417  * We use these updates to determine when we may need to
418  * restart a TX Ethernet Queue which was stopped for lack of
419  * free TX Queue Descriptors ...
420  */
421  const struct cpl_sge_egr_update *p = cpl;
422  unsigned int qid = EGR_QID(be32_to_cpu(p->opcode_qid));
423  struct sge *s = &adapter->sge;
424  struct sge_txq *tq;
425  struct sge_eth_txq *txq;
426  unsigned int eq_idx;
427 
428  /*
429  * Perform sanity checking on the Queue ID to make sure it
430  * really refers to one of our TX Ethernet Egress Queues which
431  * is active and matches the queue's ID. None of these error
432  * conditions should ever happen so we may want to either make
433  * them fatal and/or conditionalized under DEBUG.
434  */
435  eq_idx = EQ_IDX(s, qid);
436  if (unlikely(eq_idx >= MAX_EGRQ)) {
437  dev_err(adapter->pdev_dev,
438  "Egress Update QID %d out of range\n", qid);
439  break;
440  }
441  tq = s->egr_map[eq_idx];
442  if (unlikely(tq == NULL)) {
443  dev_err(adapter->pdev_dev,
444  "Egress Update QID %d TXQ=NULL\n", qid);
445  break;
446  }
447  txq = container_of(tq, struct sge_eth_txq, q);
448  if (unlikely(tq->abs_id != qid)) {
449  dev_err(adapter->pdev_dev,
450  "Egress Update QID %d refers to TXQ %d\n",
451  qid, tq->abs_id);
452  break;
453  }
454 
455  /*
456  * Restart a stopped TX Queue which has less than half of its
457  * TX ring in use ...
458  */
459  txq->q.restarts++;
460  netif_tx_wake_queue(txq->txq);
461  break;
462  }
463 
464  default:
465  dev_err(adapter->pdev_dev,
466  "unexpected CPL %#x on FW event queue\n", opcode);
467  }
468 
469  return 0;
470 }
471 
472 /*
473  * Allocate SGE TX/RX response queues. Determine how many sets of SGE queues
474  * to use and initializes them. We support multiple "Queue Sets" per port if
475  * we have MSI-X, otherwise just one queue set per port.
476  */
477 static int setup_sge_queues(struct adapter *adapter)
478 {
479  struct sge *s = &adapter->sge;
480  int err, pidx, msix;
481 
482  /*
483  * Clear "Queue Set" Free List Starving and TX Queue Mapping Error
484  * state.
485  */
486  bitmap_zero(s->starving_fl, MAX_EGRQ);
487 
488  /*
489  * If we're using MSI interrupt mode we need to set up a "forwarded
490  * interrupt" queue which we'll set up with our MSI vector. The rest
491  * of the ingress queues will be set up to forward their interrupts to
492  * this queue ... This must be first since t4vf_sge_alloc_rxq() uses
493  * the intrq's queue ID as the interrupt forwarding queue for the
494  * subsequent calls ...
495  */
496  if (adapter->flags & USING_MSI) {
497  err = t4vf_sge_alloc_rxq(adapter, &s->intrq, false,
498  adapter->port[0], 0, NULL, NULL);
499  if (err)
500  goto err_free_queues;
501  }
502 
503  /*
504  * Allocate our ingress queue for asynchronous firmware messages.
505  */
506  err = t4vf_sge_alloc_rxq(adapter, &s->fw_evtq, true, adapter->port[0],
507  MSIX_FW, NULL, fwevtq_handler);
508  if (err)
509  goto err_free_queues;
510 
511  /*
512  * Allocate each "port"'s initial Queue Sets. These can be changed
513  * later on ... up to the point where any interface on the adapter is
514  * brought up at which point lots of things get nailed down
515  * permanently ...
516  */
517  msix = MSIX_IQFLINT;
518  for_each_port(adapter, pidx) {
519  struct net_device *dev = adapter->port[pidx];
520  struct port_info *pi = netdev_priv(dev);
521  struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset];
522  struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset];
523  int qs;
524 
525  for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
526  err = t4vf_sge_alloc_rxq(adapter, &rxq->rspq, false,
527  dev, msix++,
528  &rxq->fl, t4vf_ethrx_handler);
529  if (err)
530  goto err_free_queues;
531 
532  err = t4vf_sge_alloc_eth_txq(adapter, txq, dev,
533  netdev_get_tx_queue(dev, qs),
534  s->fw_evtq.cntxt_id);
535  if (err)
536  goto err_free_queues;
537 
538  rxq->rspq.idx = qs;
539  memset(&rxq->stats, 0, sizeof(rxq->stats));
540  }
541  }
542 
543  /*
544  * Create the reverse mappings for the queues.
545  */
546  s->egr_base = s->ethtxq[0].q.abs_id - s->ethtxq[0].q.cntxt_id;
547  s->ingr_base = s->ethrxq[0].rspq.abs_id - s->ethrxq[0].rspq.cntxt_id;
548  IQ_MAP(s, s->fw_evtq.abs_id) = &s->fw_evtq;
549  for_each_port(adapter, pidx) {
550  struct net_device *dev = adapter->port[pidx];
551  struct port_info *pi = netdev_priv(dev);
552  struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset];
553  struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset];
554  int qs;
555 
556  for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
557  IQ_MAP(s, rxq->rspq.abs_id) = &rxq->rspq;
558  EQ_MAP(s, txq->q.abs_id) = &txq->q;
559 
560  /*
561  * The FW_IQ_CMD doesn't return the Absolute Queue IDs
562  * for Free Lists but since all of the Egress Queues
563  * (including Free Lists) have Relative Queue IDs
564  * which are computed as Absolute - Base Queue ID, we
565  * can synthesize the Absolute Queue IDs for the Free
566  * Lists. This is useful for debugging purposes when
567  * we want to dump Queue Contexts via the PF Driver.
568  */
569  rxq->fl.abs_id = rxq->fl.cntxt_id + s->egr_base;
570  EQ_MAP(s, rxq->fl.abs_id) = &rxq->fl;
571  }
572  }
573  return 0;
574 
575 err_free_queues:
576  t4vf_free_sge_resources(adapter);
577  return err;
578 }
579 
580 /*
581  * Set up Receive Side Scaling (RSS) to distribute packets to multiple receive
582  * queues. We configure the RSS CPU lookup table to distribute to the number
583  * of HW receive queues, and the response queue lookup table to narrow that
584  * down to the response queues actually configured for each "port" (Virtual
585  * Interface). We always configure the RSS mapping for all ports since the
586  * mapping table has plenty of entries.
587  */
588 static int setup_rss(struct adapter *adapter)
589 {
590  int pidx;
591 
592  for_each_port(adapter, pidx) {
593  struct port_info *pi = adap2pinfo(adapter, pidx);
594  struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset];
595  u16 rss[MAX_PORT_QSETS];
596  int qs, err;
597 
598  for (qs = 0; qs < pi->nqsets; qs++)
599  rss[qs] = rxq[qs].rspq.abs_id;
600 
601  err = t4vf_config_rss_range(adapter, pi->viid,
602  0, pi->rss_size, rss, pi->nqsets);
603  if (err)
604  return err;
605 
606  /*
607  * Perform Global RSS Mode-specific initialization.
608  */
609  switch (adapter->params.rss.mode) {
611  /*
612  * If Tunnel All Lookup isn't specified in the global
613  * RSS Configuration, then we need to specify a
614  * default Ingress Queue for any ingress packets which
615  * aren't hashed. We'll use our first ingress queue
616  * ...
617  */
618  if (!adapter->params.rss.u.basicvirtual.tnlalllookup) {
619  union rss_vi_config config;
620  err = t4vf_read_rss_vi_config(adapter,
621  pi->viid,
622  &config);
623  if (err)
624  return err;
625  config.basicvirtual.defaultq =
626  rxq[0].rspq.abs_id;
627  err = t4vf_write_rss_vi_config(adapter,
628  pi->viid,
629  &config);
630  if (err)
631  return err;
632  }
633  break;
634  }
635  }
636 
637  return 0;
638 }
639 
640 /*
641  * Bring the adapter up. Called whenever we go from no "ports" open to having
642  * one open. This function performs the actions necessary to make an adapter
643  * operational, such as completing the initialization of HW modules, and
644  * enabling interrupts. Must be called with the rtnl lock held. (Note that
645  * this is called "cxgb_up" in the PF Driver.)
646  */
647 static int adapter_up(struct adapter *adapter)
648 {
649  int err;
650 
651  /*
652  * If this is the first time we've been called, perform basic
653  * adapter setup. Once we've done this, many of our adapter
654  * parameters can no longer be changed ...
655  */
656  if ((adapter->flags & FULL_INIT_DONE) == 0) {
657  err = setup_sge_queues(adapter);
658  if (err)
659  return err;
660  err = setup_rss(adapter);
661  if (err) {
662  t4vf_free_sge_resources(adapter);
663  return err;
664  }
665 
666  if (adapter->flags & USING_MSIX)
667  name_msix_vecs(adapter);
668  adapter->flags |= FULL_INIT_DONE;
669  }
670 
671  /*
672  * Acquire our interrupt resources. We only support MSI-X and MSI.
673  */
674  BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0);
675  if (adapter->flags & USING_MSIX)
676  err = request_msix_queue_irqs(adapter);
677  else
678  err = request_irq(adapter->pdev->irq,
679  t4vf_intr_handler(adapter), 0,
680  adapter->name, adapter);
681  if (err) {
682  dev_err(adapter->pdev_dev, "request_irq failed, err %d\n",
683  err);
684  return err;
685  }
686 
687  /*
688  * Enable NAPI ingress processing and return success.
689  */
690  enable_rx(adapter);
691  t4vf_sge_start(adapter);
692  return 0;
693 }
694 
695 /*
696  * Bring the adapter down. Called whenever the last "port" (Virtual
697  * Interface) closed. (Note that this routine is called "cxgb_down" in the PF
698  * Driver.)
699  */
700 static void adapter_down(struct adapter *adapter)
701 {
702  /*
703  * Free interrupt resources.
704  */
705  if (adapter->flags & USING_MSIX)
706  free_msix_queue_irqs(adapter);
707  else
708  free_irq(adapter->pdev->irq, adapter);
709 
710  /*
711  * Wait for NAPI handlers to finish.
712  */
713  quiesce_rx(adapter);
714 }
715 
716 /*
717  * Start up a net device.
718  */
719 static int cxgb4vf_open(struct net_device *dev)
720 {
721  int err;
722  struct port_info *pi = netdev_priv(dev);
723  struct adapter *adapter = pi->adapter;
724 
725  /*
726  * If this is the first interface that we're opening on the "adapter",
727  * bring the "adapter" up now.
728  */
729  if (adapter->open_device_map == 0) {
730  err = adapter_up(adapter);
731  if (err)
732  return err;
733  }
734 
735  /*
736  * Note that this interface is up and start everything up ...
737  */
739  err = netif_set_real_num_rx_queues(dev, pi->nqsets);
740  if (err)
741  goto err_unwind;
742  err = link_start(dev);
743  if (err)
744  goto err_unwind;
745 
746  netif_tx_start_all_queues(dev);
747  set_bit(pi->port_id, &adapter->open_device_map);
748  return 0;
749 
750 err_unwind:
751  if (adapter->open_device_map == 0)
752  adapter_down(adapter);
753  return err;
754 }
755 
756 /*
757  * Shut down a net device. This routine is called "cxgb_close" in the PF
758  * Driver ...
759  */
760 static int cxgb4vf_stop(struct net_device *dev)
761 {
762  struct port_info *pi = netdev_priv(dev);
763  struct adapter *adapter = pi->adapter;
764 
765  netif_tx_stop_all_queues(dev);
766  netif_carrier_off(dev);
767  t4vf_enable_vi(adapter, pi->viid, false, false);
768  pi->link_cfg.link_ok = 0;
769 
770  clear_bit(pi->port_id, &adapter->open_device_map);
771  if (adapter->open_device_map == 0)
772  adapter_down(adapter);
773  return 0;
774 }
775 
776 /*
777  * Translate our basic statistics into the standard "ifconfig" statistics.
778  */
779 static struct net_device_stats *cxgb4vf_get_stats(struct net_device *dev)
780 {
781  struct t4vf_port_stats stats;
782  struct port_info *pi = netdev2pinfo(dev);
783  struct adapter *adapter = pi->adapter;
784  struct net_device_stats *ns = &dev->stats;
785  int err;
786 
787  spin_lock(&adapter->stats_lock);
788  err = t4vf_get_port_stats(adapter, pi->pidx, &stats);
789  spin_unlock(&adapter->stats_lock);
790 
791  memset(ns, 0, sizeof(*ns));
792  if (err)
793  return ns;
794 
795  ns->tx_bytes = (stats.tx_bcast_bytes + stats.tx_mcast_bytes +
796  stats.tx_ucast_bytes + stats.tx_offload_bytes);
797  ns->tx_packets = (stats.tx_bcast_frames + stats.tx_mcast_frames +
798  stats.tx_ucast_frames + stats.tx_offload_frames);
799  ns->rx_bytes = (stats.rx_bcast_bytes + stats.rx_mcast_bytes +
800  stats.rx_ucast_bytes);
801  ns->rx_packets = (stats.rx_bcast_frames + stats.rx_mcast_frames +
802  stats.rx_ucast_frames);
803  ns->multicast = stats.rx_mcast_frames;
804  ns->tx_errors = stats.tx_drop_frames;
805  ns->rx_errors = stats.rx_err_frames;
806 
807  return ns;
808 }
809 
810 /*
811  * Collect up to maxaddrs worth of a netdevice's unicast addresses, starting
812  * at a specified offset within the list, into an array of addrss pointers and
813  * return the number collected.
814  */
815 static inline unsigned int collect_netdev_uc_list_addrs(const struct net_device *dev,
816  const u8 **addr,
817  unsigned int offset,
818  unsigned int maxaddrs)
819 {
820  unsigned int index = 0;
821  unsigned int naddr = 0;
822  const struct netdev_hw_addr *ha;
823 
824  for_each_dev_addr(dev, ha)
825  if (index++ >= offset) {
826  addr[naddr++] = ha->addr;
827  if (naddr >= maxaddrs)
828  break;
829  }
830  return naddr;
831 }
832 
833 /*
834  * Collect up to maxaddrs worth of a netdevice's multicast addresses, starting
835  * at a specified offset within the list, into an array of addrss pointers and
836  * return the number collected.
837  */
838 static inline unsigned int collect_netdev_mc_list_addrs(const struct net_device *dev,
839  const u8 **addr,
840  unsigned int offset,
841  unsigned int maxaddrs)
842 {
843  unsigned int index = 0;
844  unsigned int naddr = 0;
845  const struct netdev_hw_addr *ha;
846 
847  netdev_for_each_mc_addr(ha, dev)
848  if (index++ >= offset) {
849  addr[naddr++] = ha->addr;
850  if (naddr >= maxaddrs)
851  break;
852  }
853  return naddr;
854 }
855 
856 /*
857  * Configure the exact and hash address filters to handle a port's multicast
858  * and secondary unicast MAC addresses.
859  */
860 static int set_addr_filters(const struct net_device *dev, bool sleep)
861 {
862  u64 mhash = 0;
863  u64 uhash = 0;
864  bool free = true;
865  unsigned int offset, naddr;
866  const u8 *addr[7];
867  int ret;
868  const struct port_info *pi = netdev_priv(dev);
869 
870  /* first do the secondary unicast addresses */
871  for (offset = 0; ; offset += naddr) {
872  naddr = collect_netdev_uc_list_addrs(dev, addr, offset,
873  ARRAY_SIZE(addr));
874  if (naddr == 0)
875  break;
876 
877  ret = t4vf_alloc_mac_filt(pi->adapter, pi->viid, free,
878  naddr, addr, NULL, &uhash, sleep);
879  if (ret < 0)
880  return ret;
881 
882  free = false;
883  }
884 
885  /* next set up the multicast addresses */
886  for (offset = 0; ; offset += naddr) {
887  naddr = collect_netdev_mc_list_addrs(dev, addr, offset,
888  ARRAY_SIZE(addr));
889  if (naddr == 0)
890  break;
891 
892  ret = t4vf_alloc_mac_filt(pi->adapter, pi->viid, free,
893  naddr, addr, NULL, &mhash, sleep);
894  if (ret < 0)
895  return ret;
896  free = false;
897  }
898 
899  return t4vf_set_addr_hash(pi->adapter, pi->viid, uhash != 0,
900  uhash | mhash, sleep);
901 }
902 
903 /*
904  * Set RX properties of a port, such as promiscruity, address filters, and MTU.
905  * If @mtu is -1 it is left unchanged.
906  */
907 static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok)
908 {
909  int ret;
910  struct port_info *pi = netdev_priv(dev);
911 
912  ret = set_addr_filters(dev, sleep_ok);
913  if (ret == 0)
914  ret = t4vf_set_rxmode(pi->adapter, pi->viid, -1,
915  (dev->flags & IFF_PROMISC) != 0,
916  (dev->flags & IFF_ALLMULTI) != 0,
917  1, -1, sleep_ok);
918  return ret;
919 }
920 
921 /*
922  * Set the current receive modes on the device.
923  */
924 static void cxgb4vf_set_rxmode(struct net_device *dev)
925 {
926  /* unfortunately we can't return errors to the stack */
927  set_rxmode(dev, -1, false);
928 }
929 
930 /*
931  * Find the entry in the interrupt holdoff timer value array which comes
932  * closest to the specified interrupt holdoff value.
933  */
934 static int closest_timer(const struct sge *s, int us)
935 {
936  int i, timer_idx = 0, min_delta = INT_MAX;
937 
938  for (i = 0; i < ARRAY_SIZE(s->timer_val); i++) {
939  int delta = us - s->timer_val[i];
940  if (delta < 0)
941  delta = -delta;
942  if (delta < min_delta) {
943  min_delta = delta;
944  timer_idx = i;
945  }
946  }
947  return timer_idx;
948 }
949 
950 static int closest_thres(const struct sge *s, int thres)
951 {
952  int i, delta, pktcnt_idx = 0, min_delta = INT_MAX;
953 
954  for (i = 0; i < ARRAY_SIZE(s->counter_val); i++) {
955  delta = thres - s->counter_val[i];
956  if (delta < 0)
957  delta = -delta;
958  if (delta < min_delta) {
959  min_delta = delta;
960  pktcnt_idx = i;
961  }
962  }
963  return pktcnt_idx;
964 }
965 
966 /*
967  * Return a queue's interrupt hold-off time in us. 0 means no timer.
968  */
969 static unsigned int qtimer_val(const struct adapter *adapter,
970  const struct sge_rspq *rspq)
971 {
972  unsigned int timer_idx = QINTR_TIMER_IDX_GET(rspq->intr_params);
973 
974  return timer_idx < SGE_NTIMERS
975  ? adapter->sge.timer_val[timer_idx]
976  : 0;
977 }
978 
990 static int set_rxq_intr_params(struct adapter *adapter, struct sge_rspq *rspq,
991  unsigned int us, unsigned int cnt)
992 {
993  unsigned int timer_idx;
994 
995  /*
996  * If both the interrupt holdoff timer and count are specified as
997  * zero, default to a holdoff count of 1 ...
998  */
999  if ((us | cnt) == 0)
1000  cnt = 1;
1001 
1002  /*
1003  * If an interrupt holdoff count has been specified, then find the
1004  * closest configured holdoff count and use that. If the response
1005  * queue has already been created, then update its queue context
1006  * parameters ...
1007  */
1008  if (cnt) {
1009  int err;
1010  u32 v, pktcnt_idx;
1011 
1012  pktcnt_idx = closest_thres(&adapter->sge, cnt);
1013  if (rspq->desc && rspq->pktcnt_idx != pktcnt_idx) {
1018  err = t4vf_set_params(adapter, 1, &v, &pktcnt_idx);
1019  if (err)
1020  return err;
1021  }
1022  rspq->pktcnt_idx = pktcnt_idx;
1023  }
1024 
1025  /*
1026  * Compute the closest holdoff timer index from the supplied holdoff
1027  * timer value.
1028  */
1029  timer_idx = (us == 0
1031  : closest_timer(&adapter->sge, us));
1032 
1033  /*
1034  * Update the response queue's interrupt coalescing parameters and
1035  * return success.
1036  */
1037  rspq->intr_params = (QINTR_TIMER_IDX(timer_idx) |
1038  (cnt > 0 ? QINTR_CNT_EN : 0));
1039  return 0;
1040 }
1041 
1042 /*
1043  * Return a version number to identify the type of adapter. The scheme is:
1044  * - bits 0..9: chip version
1045  * - bits 10..15: chip revision
1046  */
1047 static inline unsigned int mk_adap_vers(const struct adapter *adapter)
1048 {
1049  /*
1050  * Chip version 4, revision 0x3f (cxgb4vf).
1051  */
1052  return 4 | (0x3f << 10);
1053 }
1054 
1055 /*
1056  * Execute the specified ioctl command.
1057  */
1058 static int cxgb4vf_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1059 {
1060  int ret = 0;
1061 
1062  switch (cmd) {
1063  /*
1064  * The VF Driver doesn't have access to any of the other
1065  * common Ethernet device ioctl()'s (like reading/writing
1066  * PHY registers, etc.
1067  */
1068 
1069  default:
1070  ret = -EOPNOTSUPP;
1071  break;
1072  }
1073  return ret;
1074 }
1075 
1076 /*
1077  * Change the device's MTU.
1078  */
1079 static int cxgb4vf_change_mtu(struct net_device *dev, int new_mtu)
1080 {
1081  int ret;
1082  struct port_info *pi = netdev_priv(dev);
1083 
1084  /* accommodate SACK */
1085  if (new_mtu < 81)
1086  return -EINVAL;
1087 
1088  ret = t4vf_set_rxmode(pi->adapter, pi->viid, new_mtu,
1089  -1, -1, -1, -1, true);
1090  if (!ret)
1091  dev->mtu = new_mtu;
1092  return ret;
1093 }
1094 
1095 static netdev_features_t cxgb4vf_fix_features(struct net_device *dev,
1097 {
1098  /*
1099  * Since there is no support for separate rx/tx vlan accel
1100  * enable/disable make sure tx flag is always in same state as rx.
1101  */
1102  if (features & NETIF_F_HW_VLAN_RX)
1103  features |= NETIF_F_HW_VLAN_TX;
1104  else
1105  features &= ~NETIF_F_HW_VLAN_TX;
1106 
1107  return features;
1108 }
1109 
1110 static int cxgb4vf_set_features(struct net_device *dev,
1111  netdev_features_t features)
1112 {
1113  struct port_info *pi = netdev_priv(dev);
1115 
1116  if (changed & NETIF_F_HW_VLAN_RX)
1117  t4vf_set_rxmode(pi->adapter, pi->viid, -1, -1, -1, -1,
1118  features & NETIF_F_HW_VLAN_TX, 0);
1119 
1120  return 0;
1121 }
1122 
1123 /*
1124  * Change the devices MAC address.
1125  */
1126 static int cxgb4vf_set_mac_addr(struct net_device *dev, void *_addr)
1127 {
1128  int ret;
1129  struct sockaddr *addr = _addr;
1130  struct port_info *pi = netdev_priv(dev);
1131 
1132  if (!is_valid_ether_addr(addr->sa_data))
1133  return -EADDRNOTAVAIL;
1134 
1135  ret = t4vf_change_mac(pi->adapter, pi->viid, pi->xact_addr_filt,
1136  addr->sa_data, true);
1137  if (ret < 0)
1138  return ret;
1139 
1140  memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
1141  pi->xact_addr_filt = ret;
1142  return 0;
1143 }
1144 
1145 #ifdef CONFIG_NET_POLL_CONTROLLER
1146 /*
1147  * Poll all of our receive queues. This is called outside of normal interrupt
1148  * context.
1149  */
1150 static void cxgb4vf_poll_controller(struct net_device *dev)
1151 {
1152  struct port_info *pi = netdev_priv(dev);
1153  struct adapter *adapter = pi->adapter;
1154 
1155  if (adapter->flags & USING_MSIX) {
1156  struct sge_eth_rxq *rxq;
1157  int nqsets;
1158 
1159  rxq = &adapter->sge.ethrxq[pi->first_qset];
1160  for (nqsets = pi->nqsets; nqsets; nqsets--) {
1161  t4vf_sge_intr_msix(0, &rxq->rspq);
1162  rxq++;
1163  }
1164  } else
1165  t4vf_intr_handler(adapter)(0, adapter);
1166 }
1167 #endif
1168 
1169 /*
1170  * Ethtool operations.
1171  * ===================
1172  *
1173  * Note that we don't support any ethtool operations which change the physical
1174  * state of the port to which we're linked.
1175  */
1176 
1177 /*
1178  * Return current port link settings.
1179  */
1180 static int cxgb4vf_get_settings(struct net_device *dev,
1181  struct ethtool_cmd *cmd)
1182 {
1183  const struct port_info *pi = netdev_priv(dev);
1184 
1185  cmd->supported = pi->link_cfg.supported;
1186  cmd->advertising = pi->link_cfg.advertising;
1187  ethtool_cmd_speed_set(cmd,
1188  netif_carrier_ok(dev) ? pi->link_cfg.speed : -1);
1189  cmd->duplex = DUPLEX_FULL;
1190 
1191  cmd->port = (cmd->supported & SUPPORTED_TP) ? PORT_TP : PORT_FIBRE;
1192  cmd->phy_address = pi->port_id;
1193  cmd->transceiver = XCVR_EXTERNAL;
1194  cmd->autoneg = pi->link_cfg.autoneg;
1195  cmd->maxtxpkt = 0;
1196  cmd->maxrxpkt = 0;
1197  return 0;
1198 }
1199 
1200 /*
1201  * Return our driver information.
1202  */
1203 static void cxgb4vf_get_drvinfo(struct net_device *dev,
1204  struct ethtool_drvinfo *drvinfo)
1205 {
1206  struct adapter *adapter = netdev2adap(dev);
1207 
1208  strlcpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver));
1209  strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version));
1210  strlcpy(drvinfo->bus_info, pci_name(to_pci_dev(dev->dev.parent)),
1211  sizeof(drvinfo->bus_info));
1212  snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
1213  "%u.%u.%u.%u, TP %u.%u.%u.%u",
1214  FW_HDR_FW_VER_MAJOR_GET(adapter->params.dev.fwrev),
1215  FW_HDR_FW_VER_MINOR_GET(adapter->params.dev.fwrev),
1216  FW_HDR_FW_VER_MICRO_GET(adapter->params.dev.fwrev),
1217  FW_HDR_FW_VER_BUILD_GET(adapter->params.dev.fwrev),
1218  FW_HDR_FW_VER_MAJOR_GET(adapter->params.dev.tprev),
1219  FW_HDR_FW_VER_MINOR_GET(adapter->params.dev.tprev),
1220  FW_HDR_FW_VER_MICRO_GET(adapter->params.dev.tprev),
1221  FW_HDR_FW_VER_BUILD_GET(adapter->params.dev.tprev));
1222 }
1223 
1224 /*
1225  * Return current adapter message level.
1226  */
1227 static u32 cxgb4vf_get_msglevel(struct net_device *dev)
1228 {
1229  return netdev2adap(dev)->msg_enable;
1230 }
1231 
1232 /*
1233  * Set current adapter message level.
1234  */
1235 static void cxgb4vf_set_msglevel(struct net_device *dev, u32 msglevel)
1236 {
1237  netdev2adap(dev)->msg_enable = msglevel;
1238 }
1239 
1240 /*
1241  * Return the device's current Queue Set ring size parameters along with the
1242  * allowed maximum values. Since ethtool doesn't understand the concept of
1243  * multi-queue devices, we just return the current values associated with the
1244  * first Queue Set.
1245  */
1246 static void cxgb4vf_get_ringparam(struct net_device *dev,
1247  struct ethtool_ringparam *rp)
1248 {
1249  const struct port_info *pi = netdev_priv(dev);
1250  const struct sge *s = &pi->adapter->sge;
1251 
1254  rp->rx_jumbo_max_pending = 0;
1256 
1257  rp->rx_pending = s->ethrxq[pi->first_qset].fl.size - MIN_FL_RESID;
1258  rp->rx_mini_pending = s->ethrxq[pi->first_qset].rspq.size;
1259  rp->rx_jumbo_pending = 0;
1260  rp->tx_pending = s->ethtxq[pi->first_qset].q.size;
1261 }
1262 
1263 /*
1264  * Set the Queue Set ring size parameters for the device. Again, since
1265  * ethtool doesn't allow for the concept of multiple queues per device, we'll
1266  * apply these new values across all of the Queue Sets associated with the
1267  * device -- after vetting them of course!
1268  */
1269 static int cxgb4vf_set_ringparam(struct net_device *dev,
1270  struct ethtool_ringparam *rp)
1271 {
1272  const struct port_info *pi = netdev_priv(dev);
1273  struct adapter *adapter = pi->adapter;
1274  struct sge *s = &adapter->sge;
1275  int qs;
1276 
1277  if (rp->rx_pending > MAX_RX_BUFFERS ||
1278  rp->rx_jumbo_pending ||
1279  rp->tx_pending > MAX_TXQ_ENTRIES ||
1282  rp->rx_pending < MIN_FL_ENTRIES ||
1284  return -EINVAL;
1285 
1286  if (adapter->flags & FULL_INIT_DONE)
1287  return -EBUSY;
1288 
1289  for (qs = pi->first_qset; qs < pi->first_qset + pi->nqsets; qs++) {
1290  s->ethrxq[qs].fl.size = rp->rx_pending + MIN_FL_RESID;
1291  s->ethrxq[qs].rspq.size = rp->rx_mini_pending;
1292  s->ethtxq[qs].q.size = rp->tx_pending;
1293  }
1294  return 0;
1295 }
1296 
1297 /*
1298  * Return the interrupt holdoff timer and count for the first Queue Set on the
1299  * device. Our extension ioctl() (the cxgbtool interface) allows the
1300  * interrupt holdoff timer to be read on all of the device's Queue Sets.
1301  */
1302 static int cxgb4vf_get_coalesce(struct net_device *dev,
1303  struct ethtool_coalesce *coalesce)
1304 {
1305  const struct port_info *pi = netdev_priv(dev);
1306  const struct adapter *adapter = pi->adapter;
1307  const struct sge_rspq *rspq = &adapter->sge.ethrxq[pi->first_qset].rspq;
1308 
1309  coalesce->rx_coalesce_usecs = qtimer_val(adapter, rspq);
1310  coalesce->rx_max_coalesced_frames =
1311  ((rspq->intr_params & QINTR_CNT_EN)
1312  ? adapter->sge.counter_val[rspq->pktcnt_idx]
1313  : 0);
1314  return 0;
1315 }
1316 
1317 /*
1318  * Set the RX interrupt holdoff timer and count for the first Queue Set on the
1319  * interface. Our extension ioctl() (the cxgbtool interface) allows us to set
1320  * the interrupt holdoff timer on any of the device's Queue Sets.
1321  */
1322 static int cxgb4vf_set_coalesce(struct net_device *dev,
1323  struct ethtool_coalesce *coalesce)
1324 {
1325  const struct port_info *pi = netdev_priv(dev);
1326  struct adapter *adapter = pi->adapter;
1327 
1328  return set_rxq_intr_params(adapter,
1329  &adapter->sge.ethrxq[pi->first_qset].rspq,
1330  coalesce->rx_coalesce_usecs,
1331  coalesce->rx_max_coalesced_frames);
1332 }
1333 
1334 /*
1335  * Report current port link pause parameter settings.
1336  */
1337 static void cxgb4vf_get_pauseparam(struct net_device *dev,
1338  struct ethtool_pauseparam *pauseparam)
1339 {
1340  struct port_info *pi = netdev_priv(dev);
1341 
1342  pauseparam->autoneg = (pi->link_cfg.requested_fc & PAUSE_AUTONEG) != 0;
1343  pauseparam->rx_pause = (pi->link_cfg.fc & PAUSE_RX) != 0;
1344  pauseparam->tx_pause = (pi->link_cfg.fc & PAUSE_TX) != 0;
1345 }
1346 
1347 /*
1348  * Identify the port by blinking the port's LED.
1349  */
1350 static int cxgb4vf_phys_id(struct net_device *dev,
1352 {
1353  unsigned int val;
1354  struct port_info *pi = netdev_priv(dev);
1355 
1356  if (state == ETHTOOL_ID_ACTIVE)
1357  val = 0xffff;
1358  else if (state == ETHTOOL_ID_INACTIVE)
1359  val = 0;
1360  else
1361  return -EINVAL;
1362 
1363  return t4vf_identify_port(pi->adapter, pi->viid, val);
1364 }
1365 
1366 /*
1367  * Port stats maintained per queue of the port.
1368  */
1369 struct queue_port_stats {
1370  u64 tso;
1371  u64 tx_csum;
1372  u64 rx_csum;
1373  u64 vlan_ex;
1374  u64 vlan_ins;
1377 };
1378 
1379 /*
1380  * Strings for the ETH_SS_STATS statistics set ("ethtool -S"). Note that
1381  * these need to match the order of statistics returned by
1382  * t4vf_get_port_stats().
1383  */
1384 static const char stats_strings[][ETH_GSTRING_LEN] = {
1385  /*
1386  * These must match the layout of the t4vf_port_stats structure.
1387  */
1388  "TxBroadcastBytes ",
1389  "TxBroadcastFrames ",
1390  "TxMulticastBytes ",
1391  "TxMulticastFrames ",
1392  "TxUnicastBytes ",
1393  "TxUnicastFrames ",
1394  "TxDroppedFrames ",
1395  "TxOffloadBytes ",
1396  "TxOffloadFrames ",
1397  "RxBroadcastBytes ",
1398  "RxBroadcastFrames ",
1399  "RxMulticastBytes ",
1400  "RxMulticastFrames ",
1401  "RxUnicastBytes ",
1402  "RxUnicastFrames ",
1403  "RxErrorFrames ",
1404 
1405  /*
1406  * These are accumulated per-queue statistics and must match the
1407  * order of the fields in the queue_port_stats structure.
1408  */
1409  "TSO ",
1410  "TxCsumOffload ",
1411  "RxCsumGood ",
1412  "VLANextractions ",
1413  "VLANinsertions ",
1414  "GROPackets ",
1415  "GROMerged ",
1416 };
1417 
1418 /*
1419  * Return the number of statistics in the specified statistics set.
1420  */
1421 static int cxgb4vf_get_sset_count(struct net_device *dev, int sset)
1422 {
1423  switch (sset) {
1424  case ETH_SS_STATS:
1425  return ARRAY_SIZE(stats_strings);
1426  default:
1427  return -EOPNOTSUPP;
1428  }
1429  /*NOTREACHED*/
1430 }
1431 
1432 /*
1433  * Return the strings for the specified statistics set.
1434  */
1435 static void cxgb4vf_get_strings(struct net_device *dev,
1436  u32 sset,
1437  u8 *data)
1438 {
1439  switch (sset) {
1440  case ETH_SS_STATS:
1441  memcpy(data, stats_strings, sizeof(stats_strings));
1442  break;
1443  }
1444 }
1445 
1446 /*
1447  * Small utility routine to accumulate queue statistics across the queues of
1448  * a "port".
1449  */
1450 static void collect_sge_port_stats(const struct adapter *adapter,
1451  const struct port_info *pi,
1452  struct queue_port_stats *stats)
1453 {
1454  const struct sge_eth_txq *txq = &adapter->sge.ethtxq[pi->first_qset];
1455  const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset];
1456  int qs;
1457 
1458  memset(stats, 0, sizeof(*stats));
1459  for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
1460  stats->tso += txq->tso;
1461  stats->tx_csum += txq->tx_cso;
1462  stats->rx_csum += rxq->stats.rx_cso;
1463  stats->vlan_ex += rxq->stats.vlan_ex;
1464  stats->vlan_ins += txq->vlan_ins;
1465  stats->lro_pkts += rxq->stats.lro_pkts;
1466  stats->lro_merged += rxq->stats.lro_merged;
1467  }
1468 }
1469 
1470 /*
1471  * Return the ETH_SS_STATS statistics set.
1472  */
1473 static void cxgb4vf_get_ethtool_stats(struct net_device *dev,
1474  struct ethtool_stats *stats,
1475  u64 *data)
1476 {
1477  struct port_info *pi = netdev2pinfo(dev);
1478  struct adapter *adapter = pi->adapter;
1479  int err = t4vf_get_port_stats(adapter, pi->pidx,
1480  (struct t4vf_port_stats *)data);
1481  if (err)
1482  memset(data, 0, sizeof(struct t4vf_port_stats));
1483 
1484  data += sizeof(struct t4vf_port_stats) / sizeof(u64);
1485  collect_sge_port_stats(adapter, pi, (struct queue_port_stats *)data);
1486 }
1487 
1488 /*
1489  * Return the size of our register map.
1490  */
1491 static int cxgb4vf_get_regs_len(struct net_device *dev)
1492 {
1493  return T4VF_REGMAP_SIZE;
1494 }
1495 
1496 /*
1497  * Dump a block of registers, start to end inclusive, into a buffer.
1498  */
1499 static void reg_block_dump(struct adapter *adapter, void *regbuf,
1500  unsigned int start, unsigned int end)
1501 {
1502  u32 *bp = regbuf + start - T4VF_REGMAP_START;
1503 
1504  for ( ; start <= end; start += sizeof(u32)) {
1505  /*
1506  * Avoid reading the Mailbox Control register since that
1507  * can trigger a Mailbox Ownership Arbitration cycle and
1508  * interfere with communication with the firmware.
1509  */
1511  *bp++ = 0xffff;
1512  else
1513  *bp++ = t4_read_reg(adapter, start);
1514  }
1515 }
1516 
1517 /*
1518  * Copy our entire register map into the provided buffer.
1519  */
1520 static void cxgb4vf_get_regs(struct net_device *dev,
1521  struct ethtool_regs *regs,
1522  void *regbuf)
1523 {
1524  struct adapter *adapter = netdev2adap(dev);
1525 
1526  regs->version = mk_adap_vers(adapter);
1527 
1528  /*
1529  * Fill in register buffer with our register map.
1530  */
1531  memset(regbuf, 0, T4VF_REGMAP_SIZE);
1532 
1533  reg_block_dump(adapter, regbuf,
1534  T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_FIRST,
1535  T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_LAST);
1536  reg_block_dump(adapter, regbuf,
1537  T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_FIRST,
1538  T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_LAST);
1539  reg_block_dump(adapter, regbuf,
1540  T4VF_PL_BASE_ADDR + T4VF_MOD_MAP_PL_FIRST,
1541  T4VF_PL_BASE_ADDR + T4VF_MOD_MAP_PL_LAST);
1542  reg_block_dump(adapter, regbuf,
1543  T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_FIRST,
1544  T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_LAST);
1545 
1546  reg_block_dump(adapter, regbuf,
1549 }
1550 
1551 /*
1552  * Report current Wake On LAN settings.
1553  */
1554 static void cxgb4vf_get_wol(struct net_device *dev,
1555  struct ethtool_wolinfo *wol)
1556 {
1557  wol->supported = 0;
1558  wol->wolopts = 0;
1559  memset(&wol->sopass, 0, sizeof(wol->sopass));
1560 }
1561 
1562 /*
1563  * TCP Segmentation Offload flags which we support.
1564  */
1565 #define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
1566 
1567 static const struct ethtool_ops cxgb4vf_ethtool_ops = {
1568  .get_settings = cxgb4vf_get_settings,
1569  .get_drvinfo = cxgb4vf_get_drvinfo,
1570  .get_msglevel = cxgb4vf_get_msglevel,
1571  .set_msglevel = cxgb4vf_set_msglevel,
1572  .get_ringparam = cxgb4vf_get_ringparam,
1573  .set_ringparam = cxgb4vf_set_ringparam,
1574  .get_coalesce = cxgb4vf_get_coalesce,
1575  .set_coalesce = cxgb4vf_set_coalesce,
1576  .get_pauseparam = cxgb4vf_get_pauseparam,
1577  .get_link = ethtool_op_get_link,
1578  .get_strings = cxgb4vf_get_strings,
1579  .set_phys_id = cxgb4vf_phys_id,
1580  .get_sset_count = cxgb4vf_get_sset_count,
1581  .get_ethtool_stats = cxgb4vf_get_ethtool_stats,
1582  .get_regs_len = cxgb4vf_get_regs_len,
1583  .get_regs = cxgb4vf_get_regs,
1584  .get_wol = cxgb4vf_get_wol,
1585 };
1586 
1587 /*
1588  * /sys/kernel/debug/cxgb4vf support code and data.
1589  * ================================================
1590  */
1591 
1592 /*
1593  * Show SGE Queue Set information. We display QPL Queues Sets per line.
1594  */
1595 #define QPL 4
1596 
1597 static int sge_qinfo_show(struct seq_file *seq, void *v)
1598 {
1599  struct adapter *adapter = seq->private;
1600  int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL);
1601  int qs, r = (uintptr_t)v - 1;
1602 
1603  if (r)
1604  seq_putc(seq, '\n');
1605 
1606  #define S3(fmt_spec, s, v) \
1607  do {\
1608  seq_printf(seq, "%-12s", s); \
1609  for (qs = 0; qs < n; ++qs) \
1610  seq_printf(seq, " %16" fmt_spec, v); \
1611  seq_putc(seq, '\n'); \
1612  } while (0)
1613  #define S(s, v) S3("s", s, v)
1614  #define T(s, v) S3("u", s, txq[qs].v)
1615  #define R(s, v) S3("u", s, rxq[qs].v)
1616 
1617  if (r < eth_entries) {
1618  const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL];
1619  const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL];
1620  int n = min(QPL, adapter->sge.ethqsets - QPL * r);
1621 
1622  S("QType:", "Ethernet");
1623  S("Interface:",
1624  (rxq[qs].rspq.netdev
1625  ? rxq[qs].rspq.netdev->name
1626  : "N/A"));
1627  S3("d", "Port:",
1628  (rxq[qs].rspq.netdev
1629  ? ((struct port_info *)
1630  netdev_priv(rxq[qs].rspq.netdev))->port_id
1631  : -1));
1632  T("TxQ ID:", q.abs_id);
1633  T("TxQ size:", q.size);
1634  T("TxQ inuse:", q.in_use);
1635  T("TxQ PIdx:", q.pidx);
1636  T("TxQ CIdx:", q.cidx);
1637  R("RspQ ID:", rspq.abs_id);
1638  R("RspQ size:", rspq.size);
1639  R("RspQE size:", rspq.iqe_len);
1640  S3("u", "Intr delay:", qtimer_val(adapter, &rxq[qs].rspq));
1641  S3("u", "Intr pktcnt:",
1642  adapter->sge.counter_val[rxq[qs].rspq.pktcnt_idx]);
1643  R("RspQ CIdx:", rspq.cidx);
1644  R("RspQ Gen:", rspq.gen);
1645  R("FL ID:", fl.abs_id);
1646  R("FL size:", fl.size - MIN_FL_RESID);
1647  R("FL avail:", fl.avail);
1648  R("FL PIdx:", fl.pidx);
1649  R("FL CIdx:", fl.cidx);
1650  return 0;
1651  }
1652 
1653  r -= eth_entries;
1654  if (r == 0) {
1655  const struct sge_rspq *evtq = &adapter->sge.fw_evtq;
1656 
1657  seq_printf(seq, "%-12s %16s\n", "QType:", "FW event queue");
1658  seq_printf(seq, "%-12s %16u\n", "RspQ ID:", evtq->abs_id);
1659  seq_printf(seq, "%-12s %16u\n", "Intr delay:",
1660  qtimer_val(adapter, evtq));
1661  seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
1662  adapter->sge.counter_val[evtq->pktcnt_idx]);
1663  seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", evtq->cidx);
1664  seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", evtq->gen);
1665  } else if (r == 1) {
1666  const struct sge_rspq *intrq = &adapter->sge.intrq;
1667 
1668  seq_printf(seq, "%-12s %16s\n", "QType:", "Interrupt Queue");
1669  seq_printf(seq, "%-12s %16u\n", "RspQ ID:", intrq->abs_id);
1670  seq_printf(seq, "%-12s %16u\n", "Intr delay:",
1671  qtimer_val(adapter, intrq));
1672  seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
1673  adapter->sge.counter_val[intrq->pktcnt_idx]);
1674  seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", intrq->cidx);
1675  seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", intrq->gen);
1676  }
1677 
1678  #undef R
1679  #undef T
1680  #undef S
1681  #undef S3
1682 
1683  return 0;
1684 }
1685 
1686 /*
1687  * Return the number of "entries" in our "file". We group the multi-Queue
1688  * sections with QPL Queue Sets per "entry". The sections of the output are:
1689  *
1690  * Ethernet RX/TX Queue Sets
1691  * Firmware Event Queue
1692  * Forwarded Interrupt Queue (if in MSI mode)
1693  */
1694 static int sge_queue_entries(const struct adapter *adapter)
1695 {
1696  return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
1697  ((adapter->flags & USING_MSI) != 0);
1698 }
1699 
1700 static void *sge_queue_start(struct seq_file *seq, loff_t *pos)
1701 {
1702  int entries = sge_queue_entries(seq->private);
1703 
1704  return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1705 }
1706 
1707 static void sge_queue_stop(struct seq_file *seq, void *v)
1708 {
1709 }
1710 
1711 static void *sge_queue_next(struct seq_file *seq, void *v, loff_t *pos)
1712 {
1713  int entries = sge_queue_entries(seq->private);
1714 
1715  ++*pos;
1716  return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1717 }
1718 
1719 static const struct seq_operations sge_qinfo_seq_ops = {
1720  .start = sge_queue_start,
1721  .next = sge_queue_next,
1722  .stop = sge_queue_stop,
1723  .show = sge_qinfo_show
1724 };
1725 
1726 static int sge_qinfo_open(struct inode *inode, struct file *file)
1727 {
1728  int res = seq_open(file, &sge_qinfo_seq_ops);
1729 
1730  if (!res) {
1731  struct seq_file *seq = file->private_data;
1732  seq->private = inode->i_private;
1733  }
1734  return res;
1735 }
1736 
1737 static const struct file_operations sge_qinfo_debugfs_fops = {
1738  .owner = THIS_MODULE,
1739  .open = sge_qinfo_open,
1740  .read = seq_read,
1741  .llseek = seq_lseek,
1742  .release = seq_release,
1743 };
1744 
1745 /*
1746  * Show SGE Queue Set statistics. We display QPL Queues Sets per line.
1747  */
1748 #define QPL 4
1749 
1750 static int sge_qstats_show(struct seq_file *seq, void *v)
1751 {
1752  struct adapter *adapter = seq->private;
1753  int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL);
1754  int qs, r = (uintptr_t)v - 1;
1755 
1756  if (r)
1757  seq_putc(seq, '\n');
1758 
1759  #define S3(fmt, s, v) \
1760  do { \
1761  seq_printf(seq, "%-16s", s); \
1762  for (qs = 0; qs < n; ++qs) \
1763  seq_printf(seq, " %8" fmt, v); \
1764  seq_putc(seq, '\n'); \
1765  } while (0)
1766  #define S(s, v) S3("s", s, v)
1767 
1768  #define T3(fmt, s, v) S3(fmt, s, txq[qs].v)
1769  #define T(s, v) T3("lu", s, v)
1770 
1771  #define R3(fmt, s, v) S3(fmt, s, rxq[qs].v)
1772  #define R(s, v) R3("lu", s, v)
1773 
1774  if (r < eth_entries) {
1775  const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL];
1776  const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL];
1777  int n = min(QPL, adapter->sge.ethqsets - QPL * r);
1778 
1779  S("QType:", "Ethernet");
1780  S("Interface:",
1781  (rxq[qs].rspq.netdev
1782  ? rxq[qs].rspq.netdev->name
1783  : "N/A"));
1784  R3("u", "RspQNullInts:", rspq.unhandled_irqs);
1785  R("RxPackets:", stats.pkts);
1786  R("RxCSO:", stats.rx_cso);
1787  R("VLANxtract:", stats.vlan_ex);
1788  R("LROmerged:", stats.lro_merged);
1789  R("LROpackets:", stats.lro_pkts);
1790  R("RxDrops:", stats.rx_drops);
1791  T("TSO:", tso);
1792  T("TxCSO:", tx_cso);
1793  T("VLANins:", vlan_ins);
1794  T("TxQFull:", q.stops);
1795  T("TxQRestarts:", q.restarts);
1796  T("TxMapErr:", mapping_err);
1797  R("FLAllocErr:", fl.alloc_failed);
1798  R("FLLrgAlcErr:", fl.large_alloc_failed);
1799  R("FLStarving:", fl.starving);
1800  return 0;
1801  }
1802 
1803  r -= eth_entries;
1804  if (r == 0) {
1805  const struct sge_rspq *evtq = &adapter->sge.fw_evtq;
1806 
1807  seq_printf(seq, "%-8s %16s\n", "QType:", "FW event queue");
1808  seq_printf(seq, "%-16s %8u\n", "RspQNullInts:",
1809  evtq->unhandled_irqs);
1810  seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", evtq->cidx);
1811  seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", evtq->gen);
1812  } else if (r == 1) {
1813  const struct sge_rspq *intrq = &adapter->sge.intrq;
1814 
1815  seq_printf(seq, "%-8s %16s\n", "QType:", "Interrupt Queue");
1816  seq_printf(seq, "%-16s %8u\n", "RspQNullInts:",
1817  intrq->unhandled_irqs);
1818  seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", intrq->cidx);
1819  seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", intrq->gen);
1820  }
1821 
1822  #undef R
1823  #undef T
1824  #undef S
1825  #undef R3
1826  #undef T3
1827  #undef S3
1828 
1829  return 0;
1830 }
1831 
1832 /*
1833  * Return the number of "entries" in our "file". We group the multi-Queue
1834  * sections with QPL Queue Sets per "entry". The sections of the output are:
1835  *
1836  * Ethernet RX/TX Queue Sets
1837  * Firmware Event Queue
1838  * Forwarded Interrupt Queue (if in MSI mode)
1839  */
1840 static int sge_qstats_entries(const struct adapter *adapter)
1841 {
1842  return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
1843  ((adapter->flags & USING_MSI) != 0);
1844 }
1845 
1846 static void *sge_qstats_start(struct seq_file *seq, loff_t *pos)
1847 {
1848  int entries = sge_qstats_entries(seq->private);
1849 
1850  return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1851 }
1852 
1853 static void sge_qstats_stop(struct seq_file *seq, void *v)
1854 {
1855 }
1856 
1857 static void *sge_qstats_next(struct seq_file *seq, void *v, loff_t *pos)
1858 {
1859  int entries = sge_qstats_entries(seq->private);
1860 
1861  (*pos)++;
1862  return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1863 }
1864 
1865 static const struct seq_operations sge_qstats_seq_ops = {
1866  .start = sge_qstats_start,
1867  .next = sge_qstats_next,
1868  .stop = sge_qstats_stop,
1869  .show = sge_qstats_show
1870 };
1871 
1872 static int sge_qstats_open(struct inode *inode, struct file *file)
1873 {
1874  int res = seq_open(file, &sge_qstats_seq_ops);
1875 
1876  if (res == 0) {
1877  struct seq_file *seq = file->private_data;
1878  seq->private = inode->i_private;
1879  }
1880  return res;
1881 }
1882 
1883 static const struct file_operations sge_qstats_proc_fops = {
1884  .owner = THIS_MODULE,
1885  .open = sge_qstats_open,
1886  .read = seq_read,
1887  .llseek = seq_lseek,
1888  .release = seq_release,
1889 };
1890 
1891 /*
1892  * Show PCI-E SR-IOV Virtual Function Resource Limits.
1893  */
1894 static int resources_show(struct seq_file *seq, void *v)
1895 {
1896  struct adapter *adapter = seq->private;
1897  struct vf_resources *vfres = &adapter->params.vfres;
1898 
1899  #define S(desc, fmt, var) \
1900  seq_printf(seq, "%-60s " fmt "\n", \
1901  desc " (" #var "):", vfres->var)
1902 
1903  S("Virtual Interfaces", "%d", nvi);
1904  S("Egress Queues", "%d", neq);
1905  S("Ethernet Control", "%d", nethctrl);
1906  S("Ingress Queues/w Free Lists/Interrupts", "%d", niqflint);
1907  S("Ingress Queues", "%d", niq);
1908  S("Traffic Class", "%d", tc);
1909  S("Port Access Rights Mask", "%#x", pmask);
1910  S("MAC Address Filters", "%d", nexactf);
1911  S("Firmware Command Read Capabilities", "%#x", r_caps);
1912  S("Firmware Command Write/Execute Capabilities", "%#x", wx_caps);
1913 
1914  #undef S
1915 
1916  return 0;
1917 }
1918 
1919 static int resources_open(struct inode *inode, struct file *file)
1920 {
1921  return single_open(file, resources_show, inode->i_private);
1922 }
1923 
1924 static const struct file_operations resources_proc_fops = {
1925  .owner = THIS_MODULE,
1926  .open = resources_open,
1927  .read = seq_read,
1928  .llseek = seq_lseek,
1929  .release = single_release,
1930 };
1931 
1932 /*
1933  * Show Virtual Interfaces.
1934  */
1935 static int interfaces_show(struct seq_file *seq, void *v)
1936 {
1937  if (v == SEQ_START_TOKEN) {
1938  seq_puts(seq, "Interface Port VIID\n");
1939  } else {
1940  struct adapter *adapter = seq->private;
1941  int pidx = (uintptr_t)v - 2;
1942  struct net_device *dev = adapter->port[pidx];
1943  struct port_info *pi = netdev_priv(dev);
1944 
1945  seq_printf(seq, "%9s %4d %#5x\n",
1946  dev->name, pi->port_id, pi->viid);
1947  }
1948  return 0;
1949 }
1950 
1951 static inline void *interfaces_get_idx(struct adapter *adapter, loff_t pos)
1952 {
1953  return pos <= adapter->params.nports
1954  ? (void *)(uintptr_t)(pos + 1)
1955  : NULL;
1956 }
1957 
1958 static void *interfaces_start(struct seq_file *seq, loff_t *pos)
1959 {
1960  return *pos
1961  ? interfaces_get_idx(seq->private, *pos)
1962  : SEQ_START_TOKEN;
1963 }
1964 
1965 static void *interfaces_next(struct seq_file *seq, void *v, loff_t *pos)
1966 {
1967  (*pos)++;
1968  return interfaces_get_idx(seq->private, *pos);
1969 }
1970 
1971 static void interfaces_stop(struct seq_file *seq, void *v)
1972 {
1973 }
1974 
1975 static const struct seq_operations interfaces_seq_ops = {
1976  .start = interfaces_start,
1977  .next = interfaces_next,
1978  .stop = interfaces_stop,
1979  .show = interfaces_show
1980 };
1981 
1982 static int interfaces_open(struct inode *inode, struct file *file)
1983 {
1984  int res = seq_open(file, &interfaces_seq_ops);
1985 
1986  if (res == 0) {
1987  struct seq_file *seq = file->private_data;
1988  seq->private = inode->i_private;
1989  }
1990  return res;
1991 }
1992 
1993 static const struct file_operations interfaces_proc_fops = {
1994  .owner = THIS_MODULE,
1995  .open = interfaces_open,
1996  .read = seq_read,
1997  .llseek = seq_lseek,
1998  .release = seq_release,
1999 };
2000 
2001 /*
2002  * /sys/kernel/debugfs/cxgb4vf/ files list.
2003  */
2005  const char *name; /* name of debugfs node */
2006  umode_t mode; /* file system mode */
2007  const struct file_operations *fops;
2008 };
2009 
2010 static struct cxgb4vf_debugfs_entry debugfs_files[] = {
2011  { "sge_qinfo", S_IRUGO, &sge_qinfo_debugfs_fops },
2012  { "sge_qstats", S_IRUGO, &sge_qstats_proc_fops },
2013  { "resources", S_IRUGO, &resources_proc_fops },
2014  { "interfaces", S_IRUGO, &interfaces_proc_fops },
2015 };
2016 
2017 /*
2018  * Module and device initialization and cleanup code.
2019  * ==================================================
2020  */
2021 
2022 /*
2023  * Set up out /sys/kernel/debug/cxgb4vf sub-nodes. We assume that the
2024  * directory (debugfs_root) has already been set up.
2025  */
2026 static int __devinit setup_debugfs(struct adapter *adapter)
2027 {
2028  int i;
2029 
2030  BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
2031 
2032  /*
2033  * Debugfs support is best effort.
2034  */
2035  for (i = 0; i < ARRAY_SIZE(debugfs_files); i++)
2036  (void)debugfs_create_file(debugfs_files[i].name,
2037  debugfs_files[i].mode,
2038  adapter->debugfs_root,
2039  (void *)adapter,
2040  debugfs_files[i].fops);
2041 
2042  return 0;
2043 }
2044 
2045 /*
2046  * Tear down the /sys/kernel/debug/cxgb4vf sub-nodes created above. We leave
2047  * it to our caller to tear down the directory (debugfs_root).
2048  */
2049 static void cleanup_debugfs(struct adapter *adapter)
2050 {
2051  BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
2052 
2053  /*
2054  * Unlike our sister routine cleanup_proc(), we don't need to remove
2055  * individual entries because a call will be made to
2056  * debugfs_remove_recursive(). We just need to clean up any ancillary
2057  * persistent state.
2058  */
2059  /* nothing to do */
2060 }
2061 
2062 /*
2063  * Perform early "adapter" initialization. This is where we discover what
2064  * adapter parameters we're going to be using and initialize basic adapter
2065  * hardware support.
2066  */
2067 static int __devinit adap_init0(struct adapter *adapter)
2068 {
2069  struct vf_resources *vfres = &adapter->params.vfres;
2070  struct sge_params *sge_params = &adapter->params.sge;
2071  struct sge *s = &adapter->sge;
2072  unsigned int ethqsets;
2073  int err;
2074 
2075  /*
2076  * Wait for the device to become ready before proceeding ...
2077  */
2078  err = t4vf_wait_dev_ready(adapter);
2079  if (err) {
2080  dev_err(adapter->pdev_dev, "device didn't become ready:"
2081  " err=%d\n", err);
2082  return err;
2083  }
2084 
2085  /*
2086  * Some environments do not properly handle PCIE FLRs -- e.g. in Linux
2087  * 2.6.31 and later we can't call pci_reset_function() in order to
2088  * issue an FLR because of a self- deadlock on the device semaphore.
2089  * Meanwhile, the OS infrastructure doesn't issue FLRs in all the
2090  * cases where they're needed -- for instance, some versions of KVM
2091  * fail to reset "Assigned Devices" when the VM reboots. Therefore we
2092  * use the firmware based reset in order to reset any per function
2093  * state.
2094  */
2095  err = t4vf_fw_reset(adapter);
2096  if (err < 0) {
2097  dev_err(adapter->pdev_dev, "FW reset failed: err=%d\n", err);
2098  return err;
2099  }
2100 
2101  /*
2102  * Grab basic operational parameters. These will predominantly have
2103  * been set up by the Physical Function Driver or will be hard coded
2104  * into the adapter. We just have to live with them ... Note that
2105  * we _must_ get our VPD parameters before our SGE parameters because
2106  * we need to know the adapter's core clock from the VPD in order to
2107  * properly decode the SGE Timer Values.
2108  */
2109  err = t4vf_get_dev_params(adapter);
2110  if (err) {
2111  dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2112  " device parameters: err=%d\n", err);
2113  return err;
2114  }
2115  err = t4vf_get_vpd_params(adapter);
2116  if (err) {
2117  dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2118  " VPD parameters: err=%d\n", err);
2119  return err;
2120  }
2121  err = t4vf_get_sge_params(adapter);
2122  if (err) {
2123  dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2124  " SGE parameters: err=%d\n", err);
2125  return err;
2126  }
2127  err = t4vf_get_rss_glb_config(adapter);
2128  if (err) {
2129  dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2130  " RSS parameters: err=%d\n", err);
2131  return err;
2132  }
2133  if (adapter->params.rss.mode !=
2135  dev_err(adapter->pdev_dev, "unable to operate with global RSS"
2136  " mode %d\n", adapter->params.rss.mode);
2137  return -EINVAL;
2138  }
2139  err = t4vf_sge_init(adapter);
2140  if (err) {
2141  dev_err(adapter->pdev_dev, "unable to use adapter parameters:"
2142  " err=%d\n", err);
2143  return err;
2144  }
2145 
2146  /*
2147  * Retrieve our RX interrupt holdoff timer values and counter
2148  * threshold values from the SGE parameters.
2149  */
2150  s->timer_val[0] = core_ticks_to_us(adapter,
2152  s->timer_val[1] = core_ticks_to_us(adapter,
2154  s->timer_val[2] = core_ticks_to_us(adapter,
2156  s->timer_val[3] = core_ticks_to_us(adapter,
2158  s->timer_val[4] = core_ticks_to_us(adapter,
2160  s->timer_val[5] = core_ticks_to_us(adapter,
2162 
2163  s->counter_val[0] =
2165  s->counter_val[1] =
2167  s->counter_val[2] =
2169  s->counter_val[3] =
2171 
2172  /*
2173  * Grab our Virtual Interface resource allocation, extract the
2174  * features that we're interested in and do a bit of sanity testing on
2175  * what we discover.
2176  */
2177  err = t4vf_get_vfres(adapter);
2178  if (err) {
2179  dev_err(adapter->pdev_dev, "unable to get virtual interface"
2180  " resources: err=%d\n", err);
2181  return err;
2182  }
2183 
2184  /*
2185  * The number of "ports" which we support is equal to the number of
2186  * Virtual Interfaces with which we've been provisioned.
2187  */
2188  adapter->params.nports = vfres->nvi;
2189  if (adapter->params.nports > MAX_NPORTS) {
2190  dev_warn(adapter->pdev_dev, "only using %d of %d allowed"
2191  " virtual interfaces\n", MAX_NPORTS,
2192  adapter->params.nports);
2193  adapter->params.nports = MAX_NPORTS;
2194  }
2195 
2196  /*
2197  * We need to reserve a number of the ingress queues with Free List
2198  * and Interrupt capabilities for special interrupt purposes (like
2199  * asynchronous firmware messages, or forwarded interrupts if we're
2200  * using MSI). The rest of the FL/Intr-capable ingress queues will be
2201  * matched up one-for-one with Ethernet/Control egress queues in order
2202  * to form "Queue Sets" which will be aportioned between the "ports".
2203  * For each Queue Set, we'll need the ability to allocate two Egress
2204  * Contexts -- one for the Ingress Queue Free List and one for the TX
2205  * Ethernet Queue.
2206  */
2207  ethqsets = vfres->niqflint - INGQ_EXTRAS;
2208  if (vfres->nethctrl != ethqsets) {
2209  dev_warn(adapter->pdev_dev, "unequal number of [available]"
2210  " ingress/egress queues (%d/%d); using minimum for"
2211  " number of Queue Sets\n", ethqsets, vfres->nethctrl);
2212  ethqsets = min(vfres->nethctrl, ethqsets);
2213  }
2214  if (vfres->neq < ethqsets*2) {
2215  dev_warn(adapter->pdev_dev, "Not enough Egress Contexts (%d)"
2216  " to support Queue Sets (%d); reducing allowed Queue"
2217  " Sets\n", vfres->neq, ethqsets);
2218  ethqsets = vfres->neq/2;
2219  }
2220  if (ethqsets > MAX_ETH_QSETS) {
2221  dev_warn(adapter->pdev_dev, "only using %d of %d allowed Queue"
2222  " Sets\n", MAX_ETH_QSETS, adapter->sge.max_ethqsets);
2223  ethqsets = MAX_ETH_QSETS;
2224  }
2225  if (vfres->niq != 0 || vfres->neq > ethqsets*2) {
2226  dev_warn(adapter->pdev_dev, "unused resources niq/neq (%d/%d)"
2227  " ignored\n", vfres->niq, vfres->neq - ethqsets*2);
2228  }
2229  adapter->sge.max_ethqsets = ethqsets;
2230 
2231  /*
2232  * Check for various parameter sanity issues. Most checks simply
2233  * result in us using fewer resources than our provissioning but we
2234  * do need at least one "port" with which to work ...
2235  */
2236  if (adapter->sge.max_ethqsets < adapter->params.nports) {
2237  dev_warn(adapter->pdev_dev, "only using %d of %d available"
2238  " virtual interfaces (too few Queue Sets)\n",
2239  adapter->sge.max_ethqsets, adapter->params.nports);
2240  adapter->params.nports = adapter->sge.max_ethqsets;
2241  }
2242  if (adapter->params.nports == 0) {
2243  dev_err(adapter->pdev_dev, "no virtual interfaces configured/"
2244  "usable!\n");
2245  return -EINVAL;
2246  }
2247  return 0;
2248 }
2249 
2250 static inline void init_rspq(struct sge_rspq *rspq, u8 timer_idx,
2251  u8 pkt_cnt_idx, unsigned int size,
2252  unsigned int iqe_size)
2253 {
2254  rspq->intr_params = (QINTR_TIMER_IDX(timer_idx) |
2255  (pkt_cnt_idx < SGE_NCOUNTERS ? QINTR_CNT_EN : 0));
2256  rspq->pktcnt_idx = (pkt_cnt_idx < SGE_NCOUNTERS
2257  ? pkt_cnt_idx
2258  : 0);
2259  rspq->iqe_len = iqe_size;
2260  rspq->size = size;
2261 }
2262 
2263 /*
2264  * Perform default configuration of DMA queues depending on the number and
2265  * type of ports we found and the number of available CPUs. Most settings can
2266  * be modified by the admin via ethtool and cxgbtool prior to the adapter
2267  * being brought up for the first time.
2268  */
2269 static void __devinit cfg_queues(struct adapter *adapter)
2270 {
2271  struct sge *s = &adapter->sge;
2272  int q10g, n10g, qidx, pidx, qs;
2273  size_t iqe_size;
2274 
2275  /*
2276  * We should not be called till we know how many Queue Sets we can
2277  * support. In particular, this means that we need to know what kind
2278  * of interrupts we'll be using ...
2279  */
2280  BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0);
2281 
2282  /*
2283  * Count the number of 10GbE Virtual Interfaces that we have.
2284  */
2285  n10g = 0;
2286  for_each_port(adapter, pidx)
2287  n10g += is_10g_port(&adap2pinfo(adapter, pidx)->link_cfg);
2288 
2289  /*
2290  * We default to 1 queue per non-10G port and up to # of cores queues
2291  * per 10G port.
2292  */
2293  if (n10g == 0)
2294  q10g = 0;
2295  else {
2296  int n1g = (adapter->params.nports - n10g);
2297  q10g = (adapter->sge.max_ethqsets - n1g) / n10g;
2298  if (q10g > num_online_cpus())
2299  q10g = num_online_cpus();
2300  }
2301 
2302  /*
2303  * Allocate the "Queue Sets" to the various Virtual Interfaces.
2304  * The layout will be established in setup_sge_queues() when the
2305  * adapter is brough up for the first time.
2306  */
2307  qidx = 0;
2308  for_each_port(adapter, pidx) {
2309  struct port_info *pi = adap2pinfo(adapter, pidx);
2310 
2311  pi->first_qset = qidx;
2312  pi->nqsets = is_10g_port(&pi->link_cfg) ? q10g : 1;
2313  qidx += pi->nqsets;
2314  }
2315  s->ethqsets = qidx;
2316 
2317  /*
2318  * The Ingress Queue Entry Size for our various Response Queues needs
2319  * to be big enough to accommodate the largest message we can receive
2320  * from the chip/firmware; which is 64 bytes ...
2321  */
2322  iqe_size = 64;
2323 
2324  /*
2325  * Set up default Queue Set parameters ... Start off with the
2326  * shortest interrupt holdoff timer.
2327  */
2328  for (qs = 0; qs < s->max_ethqsets; qs++) {
2329  struct sge_eth_rxq *rxq = &s->ethrxq[qs];
2330  struct sge_eth_txq *txq = &s->ethtxq[qs];
2331 
2332  init_rspq(&rxq->rspq, 0, 0, 1024, iqe_size);
2333  rxq->fl.size = 72;
2334  txq->q.size = 1024;
2335  }
2336 
2337  /*
2338  * The firmware event queue is used for link state changes and
2339  * notifications of TX DMA completions.
2340  */
2341  init_rspq(&s->fw_evtq, SGE_TIMER_RSTRT_CNTR, 0, 512, iqe_size);
2342 
2343  /*
2344  * The forwarded interrupt queue is used when we're in MSI interrupt
2345  * mode. In this mode all interrupts associated with RX queues will
2346  * be forwarded to a single queue which we'll associate with our MSI
2347  * interrupt vector. The messages dropped in the forwarded interrupt
2348  * queue will indicate which ingress queue needs servicing ... This
2349  * queue needs to be large enough to accommodate all of the ingress
2350  * queues which are forwarding their interrupt (+1 to prevent the PIDX
2351  * from equalling the CIDX if every ingress queue has an outstanding
2352  * interrupt). The queue doesn't need to be any larger because no
2353  * ingress queue will ever have more than one outstanding interrupt at
2354  * any time ...
2355  */
2356  init_rspq(&s->intrq, SGE_TIMER_RSTRT_CNTR, 0, MSIX_ENTRIES + 1,
2357  iqe_size);
2358 }
2359 
2360 /*
2361  * Reduce the number of Ethernet queues across all ports to at most n.
2362  * n provides at least one queue per port.
2363  */
2364 static void __devinit reduce_ethqs(struct adapter *adapter, int n)
2365 {
2366  int i;
2367  struct port_info *pi;
2368 
2369  /*
2370  * While we have too many active Ether Queue Sets, interate across the
2371  * "ports" and reduce their individual Queue Set allocations.
2372  */
2373  BUG_ON(n < adapter->params.nports);
2374  while (n < adapter->sge.ethqsets)
2375  for_each_port(adapter, i) {
2376  pi = adap2pinfo(adapter, i);
2377  if (pi->nqsets > 1) {
2378  pi->nqsets--;
2379  adapter->sge.ethqsets--;
2380  if (adapter->sge.ethqsets <= n)
2381  break;
2382  }
2383  }
2384 
2385  /*
2386  * Reassign the starting Queue Sets for each of the "ports" ...
2387  */
2388  n = 0;
2389  for_each_port(adapter, i) {
2390  pi = adap2pinfo(adapter, i);
2391  pi->first_qset = n;
2392  n += pi->nqsets;
2393  }
2394 }
2395 
2396 /*
2397  * We need to grab enough MSI-X vectors to cover our interrupt needs. Ideally
2398  * we get a separate MSI-X vector for every "Queue Set" plus any extras we
2399  * need. Minimally we need one for every Virtual Interface plus those needed
2400  * for our "extras". Note that this process may lower the maximum number of
2401  * allowed Queue Sets ...
2402  */
2403 static int __devinit enable_msix(struct adapter *adapter)
2404 {
2405  int i, err, want, need;
2406  struct msix_entry entries[MSIX_ENTRIES];
2407  struct sge *s = &adapter->sge;
2408 
2409  for (i = 0; i < MSIX_ENTRIES; ++i)
2410  entries[i].entry = i;
2411 
2412  /*
2413  * We _want_ enough MSI-X interrupts to cover all of our "Queue Sets"
2414  * plus those needed for our "extras" (for example, the firmware
2415  * message queue). We _need_ at least one "Queue Set" per Virtual
2416  * Interface plus those needed for our "extras". So now we get to see
2417  * if the song is right ...
2418  */
2419  want = s->max_ethqsets + MSIX_EXTRAS;
2420  need = adapter->params.nports + MSIX_EXTRAS;
2421  while ((err = pci_enable_msix(adapter->pdev, entries, want)) >= need)
2422  want = err;
2423 
2424  if (err == 0) {
2425  int nqsets = want - MSIX_EXTRAS;
2426  if (nqsets < s->max_ethqsets) {
2427  dev_warn(adapter->pdev_dev, "only enough MSI-X vectors"
2428  " for %d Queue Sets\n", nqsets);
2429  s->max_ethqsets = nqsets;
2430  if (nqsets < s->ethqsets)
2431  reduce_ethqs(adapter, nqsets);
2432  }
2433  for (i = 0; i < want; ++i)
2434  adapter->msix_info[i].vec = entries[i].vector;
2435  } else if (err > 0) {
2436  pci_disable_msix(adapter->pdev);
2437  dev_info(adapter->pdev_dev, "only %d MSI-X vectors left,"
2438  " not using MSI-X\n", err);
2439  }
2440  return err;
2441 }
2442 
2443 static const struct net_device_ops cxgb4vf_netdev_ops = {
2444  .ndo_open = cxgb4vf_open,
2445  .ndo_stop = cxgb4vf_stop,
2446  .ndo_start_xmit = t4vf_eth_xmit,
2447  .ndo_get_stats = cxgb4vf_get_stats,
2448  .ndo_set_rx_mode = cxgb4vf_set_rxmode,
2449  .ndo_set_mac_address = cxgb4vf_set_mac_addr,
2450  .ndo_validate_addr = eth_validate_addr,
2451  .ndo_do_ioctl = cxgb4vf_do_ioctl,
2452  .ndo_change_mtu = cxgb4vf_change_mtu,
2453  .ndo_fix_features = cxgb4vf_fix_features,
2454  .ndo_set_features = cxgb4vf_set_features,
2455 #ifdef CONFIG_NET_POLL_CONTROLLER
2456  .ndo_poll_controller = cxgb4vf_poll_controller,
2457 #endif
2458 };
2459 
2460 /*
2461  * "Probe" a device: initialize a device and construct all kernel and driver
2462  * state needed to manage the device. This routine is called "init_one" in
2463  * the PF Driver ...
2464  */
2465 static int __devinit cxgb4vf_pci_probe(struct pci_dev *pdev,
2466  const struct pci_device_id *ent)
2467 {
2468  static int version_printed;
2469 
2470  int pci_using_dac;
2471  int err, pidx;
2472  unsigned int pmask;
2473  struct adapter *adapter;
2474  struct port_info *pi;
2475  struct net_device *netdev;
2476 
2477  /*
2478  * Print our driver banner the first time we're called to initialize a
2479  * device.
2480  */
2481  if (version_printed == 0) {
2482  printk(KERN_INFO "%s - version %s\n", DRV_DESC, DRV_VERSION);
2483  version_printed = 1;
2484  }
2485 
2486  /*
2487  * Initialize generic PCI device state.
2488  */
2489  err = pci_enable_device(pdev);
2490  if (err) {
2491  dev_err(&pdev->dev, "cannot enable PCI device\n");
2492  return err;
2493  }
2494 
2495  /*
2496  * Reserve PCI resources for the device. If we can't get them some
2497  * other driver may have already claimed the device ...
2498  */
2499  err = pci_request_regions(pdev, KBUILD_MODNAME);
2500  if (err) {
2501  dev_err(&pdev->dev, "cannot obtain PCI resources\n");
2502  goto err_disable_device;
2503  }
2504 
2505  /*
2506  * Set up our DMA mask: try for 64-bit address masking first and
2507  * fall back to 32-bit if we can't get 64 bits ...
2508  */
2509  err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
2510  if (err == 0) {
2511  err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
2512  if (err) {
2513  dev_err(&pdev->dev, "unable to obtain 64-bit DMA for"
2514  " coherent allocations\n");
2515  goto err_release_regions;
2516  }
2517  pci_using_dac = 1;
2518  } else {
2519  err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
2520  if (err != 0) {
2521  dev_err(&pdev->dev, "no usable DMA configuration\n");
2522  goto err_release_regions;
2523  }
2524  pci_using_dac = 0;
2525  }
2526 
2527  /*
2528  * Enable bus mastering for the device ...
2529  */
2530  pci_set_master(pdev);
2531 
2532  /*
2533  * Allocate our adapter data structure and attach it to the device.
2534  */
2535  adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
2536  if (!adapter) {
2537  err = -ENOMEM;
2538  goto err_release_regions;
2539  }
2540  pci_set_drvdata(pdev, adapter);
2541  adapter->pdev = pdev;
2542  adapter->pdev_dev = &pdev->dev;
2543 
2544  /*
2545  * Initialize SMP data synchronization resources.
2546  */
2547  spin_lock_init(&adapter->stats_lock);
2548 
2549  /*
2550  * Map our I/O registers in BAR0.
2551  */
2552  adapter->regs = pci_ioremap_bar(pdev, 0);
2553  if (!adapter->regs) {
2554  dev_err(&pdev->dev, "cannot map device registers\n");
2555  err = -ENOMEM;
2556  goto err_free_adapter;
2557  }
2558 
2559  /*
2560  * Initialize adapter level features.
2561  */
2562  adapter->name = pci_name(pdev);
2563  adapter->msg_enable = dflt_msg_enable;
2564  err = adap_init0(adapter);
2565  if (err)
2566  goto err_unmap_bar;
2567 
2568  /*
2569  * Allocate our "adapter ports" and stitch everything together.
2570  */
2571  pmask = adapter->params.vfres.pmask;
2572  for_each_port(adapter, pidx) {
2573  int port_id, viid;
2574 
2575  /*
2576  * We simplistically allocate our virtual interfaces
2577  * sequentially across the port numbers to which we have
2578  * access rights. This should be configurable in some manner
2579  * ...
2580  */
2581  if (pmask == 0)
2582  break;
2583  port_id = ffs(pmask) - 1;
2584  pmask &= ~(1 << port_id);
2585  viid = t4vf_alloc_vi(adapter, port_id);
2586  if (viid < 0) {
2587  dev_err(&pdev->dev, "cannot allocate VI for port %d:"
2588  " err=%d\n", port_id, viid);
2589  err = viid;
2590  goto err_free_dev;
2591  }
2592 
2593  /*
2594  * Allocate our network device and stitch things together.
2595  */
2596  netdev = alloc_etherdev_mq(sizeof(struct port_info),
2597  MAX_PORT_QSETS);
2598  if (netdev == NULL) {
2599  t4vf_free_vi(adapter, viid);
2600  err = -ENOMEM;
2601  goto err_free_dev;
2602  }
2603  adapter->port[pidx] = netdev;
2604  SET_NETDEV_DEV(netdev, &pdev->dev);
2605  pi = netdev_priv(netdev);
2606  pi->adapter = adapter;
2607  pi->pidx = pidx;
2608  pi->port_id = port_id;
2609  pi->viid = viid;
2610 
2611  /*
2612  * Initialize the starting state of our "port" and register
2613  * it.
2614  */
2615  pi->xact_addr_filt = -1;
2616  netif_carrier_off(netdev);
2617  netdev->irq = pdev->irq;
2618 
2619  netdev->hw_features = NETIF_F_SG | TSO_FLAGS |
2622  netdev->vlan_features = NETIF_F_SG | TSO_FLAGS |
2625  netdev->features = netdev->hw_features | NETIF_F_HW_VLAN_TX;
2626  if (pci_using_dac)
2627  netdev->features |= NETIF_F_HIGHDMA;
2628 
2629  netdev->priv_flags |= IFF_UNICAST_FLT;
2630 
2631  netdev->netdev_ops = &cxgb4vf_netdev_ops;
2632  SET_ETHTOOL_OPS(netdev, &cxgb4vf_ethtool_ops);
2633 
2634  /*
2635  * Initialize the hardware/software state for the port.
2636  */
2637  err = t4vf_port_init(adapter, pidx);
2638  if (err) {
2639  dev_err(&pdev->dev, "cannot initialize port %d\n",
2640  pidx);
2641  goto err_free_dev;
2642  }
2643  }
2644 
2645  /*
2646  * The "card" is now ready to go. If any errors occur during device
2647  * registration we do not fail the whole "card" but rather proceed
2648  * only with the ports we manage to register successfully. However we
2649  * must register at least one net device.
2650  */
2651  for_each_port(adapter, pidx) {
2652  netdev = adapter->port[pidx];
2653  if (netdev == NULL)
2654  continue;
2655 
2656  err = register_netdev(netdev);
2657  if (err) {
2658  dev_warn(&pdev->dev, "cannot register net device %s,"
2659  " skipping\n", netdev->name);
2660  continue;
2661  }
2662 
2663  set_bit(pidx, &adapter->registered_device_map);
2664  }
2665  if (adapter->registered_device_map == 0) {
2666  dev_err(&pdev->dev, "could not register any net devices\n");
2667  goto err_free_dev;
2668  }
2669 
2670  /*
2671  * Set up our debugfs entries.
2672  */
2673  if (!IS_ERR_OR_NULL(cxgb4vf_debugfs_root)) {
2674  adapter->debugfs_root =
2675  debugfs_create_dir(pci_name(pdev),
2676  cxgb4vf_debugfs_root);
2677  if (IS_ERR_OR_NULL(adapter->debugfs_root))
2678  dev_warn(&pdev->dev, "could not create debugfs"
2679  " directory");
2680  else
2681  setup_debugfs(adapter);
2682  }
2683 
2684  /*
2685  * See what interrupts we'll be using. If we've been configured to
2686  * use MSI-X interrupts, try to enable them but fall back to using
2687  * MSI interrupts if we can't enable MSI-X interrupts. If we can't
2688  * get MSI interrupts we bail with the error.
2689  */
2690  if (msi == MSI_MSIX && enable_msix(adapter) == 0)
2691  adapter->flags |= USING_MSIX;
2692  else {
2693  err = pci_enable_msi(pdev);
2694  if (err) {
2695  dev_err(&pdev->dev, "Unable to allocate %s interrupts;"
2696  " err=%d\n",
2697  msi == MSI_MSIX ? "MSI-X or MSI" : "MSI", err);
2698  goto err_free_debugfs;
2699  }
2700  adapter->flags |= USING_MSI;
2701  }
2702 
2703  /*
2704  * Now that we know how many "ports" we have and what their types are,
2705  * and how many Queue Sets we can support, we can configure our queue
2706  * resources.
2707  */
2708  cfg_queues(adapter);
2709 
2710  /*
2711  * Print a short notice on the existence and configuration of the new
2712  * VF network device ...
2713  */
2714  for_each_port(adapter, pidx) {
2715  dev_info(adapter->pdev_dev, "%s: Chelsio VF NIC PCIe %s\n",
2716  adapter->port[pidx]->name,
2717  (adapter->flags & USING_MSIX) ? "MSI-X" :
2718  (adapter->flags & USING_MSI) ? "MSI" : "");
2719  }
2720 
2721  /*
2722  * Return success!
2723  */
2724  return 0;
2725 
2726  /*
2727  * Error recovery and exit code. Unwind state that's been created
2728  * so far and return the error.
2729  */
2730 
2731 err_free_debugfs:
2732  if (!IS_ERR_OR_NULL(adapter->debugfs_root)) {
2733  cleanup_debugfs(adapter);
2735  }
2736 
2737 err_free_dev:
2738  for_each_port(adapter, pidx) {
2739  netdev = adapter->port[pidx];
2740  if (netdev == NULL)
2741  continue;
2742  pi = netdev_priv(netdev);
2743  t4vf_free_vi(adapter, pi->viid);
2744  if (test_bit(pidx, &adapter->registered_device_map))
2745  unregister_netdev(netdev);
2746  free_netdev(netdev);
2747  }
2748 
2749 err_unmap_bar:
2750  iounmap(adapter->regs);
2751 
2752 err_free_adapter:
2753  kfree(adapter);
2754  pci_set_drvdata(pdev, NULL);
2755 
2756 err_release_regions:
2757  pci_release_regions(pdev);
2758  pci_set_drvdata(pdev, NULL);
2759  pci_clear_master(pdev);
2760 
2761 err_disable_device:
2762  pci_disable_device(pdev);
2763 
2764  return err;
2765 }
2766 
2767 /*
2768  * "Remove" a device: tear down all kernel and driver state created in the
2769  * "probe" routine and quiesce the device (disable interrupts, etc.). (Note
2770  * that this is called "remove_one" in the PF Driver.)
2771  */
2772 static void __devexit cxgb4vf_pci_remove(struct pci_dev *pdev)
2773 {
2774  struct adapter *adapter = pci_get_drvdata(pdev);
2775 
2776  /*
2777  * Tear down driver state associated with device.
2778  */
2779  if (adapter) {
2780  int pidx;
2781 
2782  /*
2783  * Stop all of our activity. Unregister network port,
2784  * disable interrupts, etc.
2785  */
2786  for_each_port(adapter, pidx)
2787  if (test_bit(pidx, &adapter->registered_device_map))
2788  unregister_netdev(adapter->port[pidx]);
2789  t4vf_sge_stop(adapter);
2790  if (adapter->flags & USING_MSIX) {
2791  pci_disable_msix(adapter->pdev);
2792  adapter->flags &= ~USING_MSIX;
2793  } else if (adapter->flags & USING_MSI) {
2794  pci_disable_msi(adapter->pdev);
2795  adapter->flags &= ~USING_MSI;
2796  }
2797 
2798  /*
2799  * Tear down our debugfs entries.
2800  */
2801  if (!IS_ERR_OR_NULL(adapter->debugfs_root)) {
2802  cleanup_debugfs(adapter);
2804  }
2805 
2806  /*
2807  * Free all of the various resources which we've acquired ...
2808  */
2809  t4vf_free_sge_resources(adapter);
2810  for_each_port(adapter, pidx) {
2811  struct net_device *netdev = adapter->port[pidx];
2812  struct port_info *pi;
2813 
2814  if (netdev == NULL)
2815  continue;
2816 
2817  pi = netdev_priv(netdev);
2818  t4vf_free_vi(adapter, pi->viid);
2819  free_netdev(netdev);
2820  }
2821  iounmap(adapter->regs);
2822  kfree(adapter);
2823  pci_set_drvdata(pdev, NULL);
2824  }
2825 
2826  /*
2827  * Disable the device and release its PCI resources.
2828  */
2829  pci_disable_device(pdev);
2830  pci_clear_master(pdev);
2831  pci_release_regions(pdev);
2832 }
2833 
2834 /*
2835  * "Shutdown" quiesce the device, stopping Ingress Packet and Interrupt
2836  * delivery.
2837  */
2838 static void __devexit cxgb4vf_pci_shutdown(struct pci_dev *pdev)
2839 {
2840  struct adapter *adapter;
2841  int pidx;
2842 
2843  adapter = pci_get_drvdata(pdev);
2844  if (!adapter)
2845  return;
2846 
2847  /*
2848  * Disable all Virtual Interfaces. This will shut down the
2849  * delivery of all ingress packets into the chip for these
2850  * Virtual Interfaces.
2851  */
2852  for_each_port(adapter, pidx) {
2853  struct net_device *netdev;
2854  struct port_info *pi;
2855 
2856  if (!test_bit(pidx, &adapter->registered_device_map))
2857  continue;
2858 
2859  netdev = adapter->port[pidx];
2860  if (!netdev)
2861  continue;
2862 
2863  pi = netdev_priv(netdev);
2864  t4vf_enable_vi(adapter, pi->viid, false, false);
2865  }
2866 
2867  /*
2868  * Free up all Queues which will prevent further DMA and
2869  * Interrupts allowing various internal pathways to drain.
2870  */
2871  t4vf_free_sge_resources(adapter);
2872 }
2873 
2874 /*
2875  * PCI Device registration data structures.
2876  */
2877 #define CH_DEVICE(devid, idx) \
2878  { PCI_VENDOR_ID_CHELSIO, devid, PCI_ANY_ID, PCI_ANY_ID, 0, 0, idx }
2879 
2880 static struct pci_device_id cxgb4vf_pci_tbl[] = {
2881  CH_DEVICE(0xb000, 0), /* PE10K FPGA */
2882  CH_DEVICE(0x4800, 0), /* T440-dbg */
2883  CH_DEVICE(0x4801, 0), /* T420-cr */
2884  CH_DEVICE(0x4802, 0), /* T422-cr */
2885  CH_DEVICE(0x4803, 0), /* T440-cr */
2886  CH_DEVICE(0x4804, 0), /* T420-bch */
2887  CH_DEVICE(0x4805, 0), /* T440-bch */
2888  CH_DEVICE(0x4806, 0), /* T460-ch */
2889  CH_DEVICE(0x4807, 0), /* T420-so */
2890  CH_DEVICE(0x4808, 0), /* T420-cx */
2891  CH_DEVICE(0x4809, 0), /* T420-bt */
2892  CH_DEVICE(0x480a, 0), /* T404-bt */
2893  CH_DEVICE(0x480d, 0), /* T480-cr */
2894  CH_DEVICE(0x480e, 0), /* T440-lp-cr */
2895  { 0, }
2896 };
2897 
2899 MODULE_AUTHOR("Chelsio Communications");
2900 MODULE_LICENSE("Dual BSD/GPL");
2902 MODULE_DEVICE_TABLE(pci, cxgb4vf_pci_tbl);
2903 
2904 static struct pci_driver cxgb4vf_driver = {
2905  .name = KBUILD_MODNAME,
2906  .id_table = cxgb4vf_pci_tbl,
2907  .probe = cxgb4vf_pci_probe,
2908  .remove = __devexit_p(cxgb4vf_pci_remove),
2909  .shutdown = __devexit_p(cxgb4vf_pci_shutdown),
2910 };
2911 
2912 /*
2913  * Initialize global driver state.
2914  */
2915 static int __init cxgb4vf_module_init(void)
2916 {
2917  int ret;
2918 
2919  /*
2920  * Vet our module parameters.
2921  */
2922  if (msi != MSI_MSIX && msi != MSI_MSI) {
2923  printk(KERN_WARNING KBUILD_MODNAME
2924  ": bad module parameter msi=%d; must be %d"
2925  " (MSI-X or MSI) or %d (MSI)\n",
2926  msi, MSI_MSIX, MSI_MSI);
2927  return -EINVAL;
2928  }
2929 
2930  /* Debugfs support is optional, just warn if this fails */
2931  cxgb4vf_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL);
2932  if (IS_ERR_OR_NULL(cxgb4vf_debugfs_root))
2933  printk(KERN_WARNING KBUILD_MODNAME ": could not create"
2934  " debugfs entry, continuing\n");
2935 
2936  ret = pci_register_driver(&cxgb4vf_driver);
2937  if (ret < 0 && !IS_ERR_OR_NULL(cxgb4vf_debugfs_root))
2938  debugfs_remove(cxgb4vf_debugfs_root);
2939  return ret;
2940 }
2941 
2942 /*
2943  * Tear down global driver state.
2944  */
2945 static void __exit cxgb4vf_module_exit(void)
2946 {
2947  pci_unregister_driver(&cxgb4vf_driver);
2948  debugfs_remove(cxgb4vf_debugfs_root);
2949 }
2950 
2951 module_init(cxgb4vf_module_init);
2952 module_exit(cxgb4vf_module_exit);