ixgbe.c revision 1.31 1 /******************************************************************************
2
3 Copyright (c) 2001-2013, Intel Corporation
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*
34 * Copyright (c) 2011 The NetBSD Foundation, Inc.
35 * All rights reserved.
36 *
37 * This code is derived from software contributed to The NetBSD Foundation
38 * by Coyote Point Systems, Inc.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
50 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
51 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
52 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
53 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
54 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
55 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
56 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
57 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
58 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
59 * POSSIBILITY OF SUCH DAMAGE.
60 */
61 /*$FreeBSD: head/sys/dev/ixgbe/ixgbe.c 250108 2013-04-30 16:18:29Z luigi $*/
62 /*$NetBSD: ixgbe.c,v 1.31 2015/08/03 05:43:01 msaitoh Exp $*/
63
64 #include "opt_inet.h"
65 #include "opt_inet6.h"
66
67 #include "ixgbe.h"
68 #include "vlan.h"
69
70 /*********************************************************************
71 * Set this to one to display debug statistics
72 *********************************************************************/
73 int ixgbe_display_debug_stats = 0;
74
75 /*********************************************************************
76 * Driver version
77 *********************************************************************/
78 char ixgbe_driver_version[] = "2.5.8 - HEAD";
79
80 /*********************************************************************
81 * PCI Device ID Table
82 *
83 * Used by probe to select devices to load on
84 * Last field stores an index into ixgbe_strings
85 * Last entry must be all 0s
86 *
87 * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
88 *********************************************************************/
89
90 static ixgbe_vendor_info_t ixgbe_vendor_info_array[] =
91 {
92 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, 0, 0, 0},
93 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, 0, 0, 0},
94 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, 0, 0, 0},
95 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, 0, 0, 0},
96 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2, 0, 0, 0},
97 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, 0, 0, 0},
98 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, 0, 0, 0},
99 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, 0, 0, 0},
100 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, 0, 0, 0},
101 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, 0, 0, 0},
102 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, 0, 0, 0},
103 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, 0, 0, 0},
104 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ, 0, 0, 0},
105 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, 0, 0, 0},
106 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, 0, 0, 0},
107 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, 0, 0, 0},
108 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, 0, 0, 0},
109 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, 0, 0, 0},
110 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE, 0, 0, 0},
111 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2, 0, 0, 0},
112 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, 0, 0, 0},
113 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, 0, 0, 0},
114 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF_QP, 0, 0, 0},
115 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, 0, 0, 0},
116 /* required last entry */
117 {0, 0, 0, 0, 0}
118 };
119
120 /*********************************************************************
121 * Table of branding strings
122 *********************************************************************/
123
124 static const char *ixgbe_strings[] = {
125 "Intel(R) PRO/10GbE PCI-Express Network Driver"
126 };
127
128 /*********************************************************************
129 * Function prototypes
130 *********************************************************************/
131 static int ixgbe_probe(device_t, cfdata_t, void *);
132 static void ixgbe_attach(device_t, device_t, void *);
133 static int ixgbe_detach(device_t, int);
134 #if 0
135 static int ixgbe_shutdown(device_t);
136 #endif
137 #if IXGBE_LEGACY_TX
138 static void ixgbe_start(struct ifnet *);
139 static void ixgbe_start_locked(struct tx_ring *, struct ifnet *);
140 #else
141 static int ixgbe_mq_start(struct ifnet *, struct mbuf *);
142 static int ixgbe_mq_start_locked(struct ifnet *,
143 struct tx_ring *, struct mbuf *);
144 static void ixgbe_qflush(struct ifnet *);
145 static void ixgbe_deferred_mq_start(void *);
146 #endif
147 static int ixgbe_ioctl(struct ifnet *, u_long, void *);
148 static void ixgbe_ifstop(struct ifnet *, int);
149 static int ixgbe_init(struct ifnet *);
150 static void ixgbe_init_locked(struct adapter *);
151 static void ixgbe_stop(void *);
152 static void ixgbe_media_status(struct ifnet *, struct ifmediareq *);
153 static int ixgbe_media_change(struct ifnet *);
154 static void ixgbe_identify_hardware(struct adapter *);
155 static int ixgbe_allocate_pci_resources(struct adapter *,
156 const struct pci_attach_args *);
157 static int ixgbe_allocate_msix(struct adapter *,
158 const struct pci_attach_args *);
159 static int ixgbe_allocate_legacy(struct adapter *,
160 const struct pci_attach_args *);
161 static int ixgbe_allocate_queues(struct adapter *);
162 static int ixgbe_setup_msix(struct adapter *);
163 static void ixgbe_free_pci_resources(struct adapter *);
164 static void ixgbe_local_timer(void *);
165 static int ixgbe_setup_interface(device_t, struct adapter *);
166 static void ixgbe_config_link(struct adapter *);
167
168 static int ixgbe_allocate_transmit_buffers(struct tx_ring *);
169 static int ixgbe_setup_transmit_structures(struct adapter *);
170 static void ixgbe_setup_transmit_ring(struct tx_ring *);
171 static void ixgbe_initialize_transmit_units(struct adapter *);
172 static void ixgbe_free_transmit_structures(struct adapter *);
173 static void ixgbe_free_transmit_buffers(struct tx_ring *);
174
175 static int ixgbe_allocate_receive_buffers(struct rx_ring *);
176 static int ixgbe_setup_receive_structures(struct adapter *);
177 static int ixgbe_setup_receive_ring(struct rx_ring *);
178 static void ixgbe_initialize_receive_units(struct adapter *);
179 static void ixgbe_free_receive_structures(struct adapter *);
180 static void ixgbe_free_receive_buffers(struct rx_ring *);
181 static void ixgbe_setup_hw_rsc(struct rx_ring *);
182
183 static void ixgbe_enable_intr(struct adapter *);
184 static void ixgbe_disable_intr(struct adapter *);
185 static void ixgbe_update_stats_counters(struct adapter *);
186 static bool ixgbe_txeof(struct tx_ring *);
187 static bool ixgbe_rxeof(struct ix_queue *);
188 static void ixgbe_rx_checksum(u32, struct mbuf *, u32,
189 struct ixgbe_hw_stats *);
190 static void ixgbe_set_promisc(struct adapter *);
191 static void ixgbe_set_multi(struct adapter *);
192 static void ixgbe_update_link_status(struct adapter *);
193 static void ixgbe_refresh_mbufs(struct rx_ring *, int);
194 static int ixgbe_xmit(struct tx_ring *, struct mbuf *);
195 static int ixgbe_set_flowcntl(SYSCTLFN_PROTO);
196 static int ixgbe_set_advertise(SYSCTLFN_PROTO);
197 static int ixgbe_set_thermal_test(SYSCTLFN_PROTO);
198 static int ixgbe_dma_malloc(struct adapter *, bus_size_t,
199 struct ixgbe_dma_alloc *, int);
200 static void ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
201 static int ixgbe_tx_ctx_setup(struct tx_ring *,
202 struct mbuf *, u32 *, u32 *);
203 static int ixgbe_tso_setup(struct tx_ring *,
204 struct mbuf *, u32 *, u32 *);
205 static void ixgbe_set_ivar(struct adapter *, u8, u8, s8);
206 static void ixgbe_configure_ivars(struct adapter *);
207 static u8 * ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *);
208
209 static void ixgbe_setup_vlan_hw_support(struct adapter *);
210 #if 0
211 static void ixgbe_register_vlan(void *, struct ifnet *, u16);
212 static void ixgbe_unregister_vlan(void *, struct ifnet *, u16);
213 #endif
214
215 static void ixgbe_add_hw_stats(struct adapter *adapter);
216
217 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
218 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
219 struct mbuf *, u32);
220
221 static void ixgbe_enable_rx_drop(struct adapter *);
222 static void ixgbe_disable_rx_drop(struct adapter *);
223
224 /* Support for pluggable optic modules */
225 static bool ixgbe_sfp_probe(struct adapter *);
226 static void ixgbe_setup_optics(struct adapter *);
227
228 /* Legacy (single vector interrupt handler */
229 static int ixgbe_legacy_irq(void *);
230
231 #if defined(NETBSD_MSI_OR_MSIX)
232 /* The MSI/X Interrupt handlers */
233 static void ixgbe_msix_que(void *);
234 static void ixgbe_msix_link(void *);
235 #endif
236
237 /* Software interrupts for deferred work */
238 static void ixgbe_handle_que(void *);
239 static void ixgbe_handle_link(void *);
240 static void ixgbe_handle_msf(void *);
241 static void ixgbe_handle_mod(void *);
242
243 const struct sysctlnode *ixgbe_sysctl_instance(struct adapter *);
244 static ixgbe_vendor_info_t *ixgbe_lookup(const struct pci_attach_args *);
245
246 #ifdef IXGBE_FDIR
247 static void ixgbe_atr(struct tx_ring *, struct mbuf *);
248 static void ixgbe_reinit_fdir(void *, int);
249 #endif
250
251 /*********************************************************************
252 * FreeBSD Device Interface Entry Points
253 *********************************************************************/
254
255 CFATTACH_DECL3_NEW(ixg, sizeof(struct adapter),
256 ixgbe_probe, ixgbe_attach, ixgbe_detach, NULL, NULL, NULL,
257 DVF_DETACH_SHUTDOWN);
258
259 #if 0
260 devclass_t ixgbe_devclass;
261 DRIVER_MODULE(ixgbe, pci, ixgbe_driver, ixgbe_devclass, 0, 0);
262
263 MODULE_DEPEND(ixgbe, pci, 1, 1, 1);
264 MODULE_DEPEND(ixgbe, ether, 1, 1, 1);
265 #endif
266
267 /*
268 ** TUNEABLE PARAMETERS:
269 */
270
271 /*
272 ** AIM: Adaptive Interrupt Moderation
273 ** which means that the interrupt rate
274 ** is varied over time based on the
275 ** traffic for that interrupt vector
276 */
277 static int ixgbe_enable_aim = TRUE;
278 #define TUNABLE_INT(__x, __y)
279 TUNABLE_INT("hw.ixgbe.enable_aim", &ixgbe_enable_aim);
280
281 static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY);
282 TUNABLE_INT("hw.ixgbe.max_interrupt_rate", &ixgbe_max_interrupt_rate);
283
284 /* How many packets rxeof tries to clean at a time */
285 static int ixgbe_rx_process_limit = 256;
286 TUNABLE_INT("hw.ixgbe.rx_process_limit", &ixgbe_rx_process_limit);
287
288 /* How many packets txeof tries to clean at a time */
289 static int ixgbe_tx_process_limit = 256;
290 TUNABLE_INT("hw.ixgbe.tx_process_limit", &ixgbe_tx_process_limit);
291
292 /*
293 ** Smart speed setting, default to on
294 ** this only works as a compile option
295 ** right now as its during attach, set
296 ** this to 'ixgbe_smart_speed_off' to
297 ** disable.
298 */
299 static int ixgbe_smart_speed = ixgbe_smart_speed_on;
300
301 /*
302 * MSIX should be the default for best performance,
303 * but this allows it to be forced off for testing.
304 */
305 static int ixgbe_enable_msix = 1;
306 TUNABLE_INT("hw.ixgbe.enable_msix", &ixgbe_enable_msix);
307
308 #if defined(NETBSD_MSI_OR_MSIX)
309 /*
310 * Number of Queues, can be set to 0,
311 * it then autoconfigures based on the
312 * number of cpus with a max of 8. This
313 * can be overriden manually here.
314 */
315 static int ixgbe_num_queues = 0;
316 TUNABLE_INT("hw.ixgbe.num_queues", &ixgbe_num_queues);
317 #endif
318
319 /*
320 ** Number of TX descriptors per ring,
321 ** setting higher than RX as this seems
322 ** the better performing choice.
323 */
324 static int ixgbe_txd = PERFORM_TXD;
325 TUNABLE_INT("hw.ixgbe.txd", &ixgbe_txd);
326
327 /* Number of RX descriptors per ring */
328 static int ixgbe_rxd = PERFORM_RXD;
329 TUNABLE_INT("hw.ixgbe.rxd", &ixgbe_rxd);
330
331 /*
332 ** HW RSC control:
333 ** this feature only works with
334 ** IPv4, and only on 82599 and later.
335 ** Also this will cause IP forwarding to
336 ** fail and that can't be controlled by
337 ** the stack as LRO can. For all these
338 ** reasons I've deemed it best to leave
339 ** this off and not bother with a tuneable
340 ** interface, this would need to be compiled
341 ** to enable.
342 */
343 static bool ixgbe_rsc_enable = FALSE;
344
345 /* Keep running tab on them for sanity check */
346 static int ixgbe_total_ports;
347
348 #ifdef IXGBE_FDIR
349 /*
350 ** For Flow Director: this is the
351 ** number of TX packets we sample
352 ** for the filter pool, this means
353 ** every 20th packet will be probed.
354 **
355 ** This feature can be disabled by
356 ** setting this to 0.
357 */
358 static int atr_sample_rate = 20;
359 /*
360 ** Flow Director actually 'steals'
361 ** part of the packet buffer as its
362 ** filter pool, this variable controls
363 ** how much it uses:
364 ** 0 = 64K, 1 = 128K, 2 = 256K
365 */
366 static int fdir_pballoc = 1;
367 #endif
368
369 #ifdef DEV_NETMAP
370 /*
371 * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
372 * be a reference on how to implement netmap support in a driver.
373 * Additional comments are in ixgbe_netmap.h .
374 *
375 * <dev/netmap/ixgbe_netmap.h> contains functions for netmap support
376 * that extend the standard driver.
377 */
378 #include <dev/netmap/ixgbe_netmap.h>
379 #endif /* DEV_NETMAP */
380
381 /*********************************************************************
382 * Device identification routine
383 *
384 * ixgbe_probe determines if the driver should be loaded on
385 * adapter based on PCI vendor/device id of the adapter.
386 *
387 * return 1 on success, 0 on failure
388 *********************************************************************/
389
390 static int
391 ixgbe_probe(device_t dev, cfdata_t cf, void *aux)
392 {
393 const struct pci_attach_args *pa = aux;
394
395 return (ixgbe_lookup(pa) != NULL) ? 1 : 0;
396 }
397
398 static ixgbe_vendor_info_t *
399 ixgbe_lookup(const struct pci_attach_args *pa)
400 {
401 pcireg_t subid;
402 ixgbe_vendor_info_t *ent;
403
404 INIT_DEBUGOUT("ixgbe_probe: begin");
405
406 if (PCI_VENDOR(pa->pa_id) != IXGBE_INTEL_VENDOR_ID)
407 return NULL;
408
409 subid = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_SUBSYS_ID_REG);
410
411 for (ent = ixgbe_vendor_info_array; ent->vendor_id != 0; ent++) {
412 if (PCI_VENDOR(pa->pa_id) == ent->vendor_id &&
413 PCI_PRODUCT(pa->pa_id) == ent->device_id &&
414
415 (PCI_SUBSYS_VENDOR(subid) == ent->subvendor_id ||
416 ent->subvendor_id == 0) &&
417
418 (PCI_SUBSYS_ID(subid) == ent->subdevice_id ||
419 ent->subdevice_id == 0)) {
420 ++ixgbe_total_ports;
421 return ent;
422 }
423 }
424 return NULL;
425 }
426
427
428 static void
429 ixgbe_sysctl_attach(struct adapter *adapter)
430 {
431 struct sysctllog **log;
432 const struct sysctlnode *rnode, *cnode;
433 device_t dev;
434
435 dev = adapter->dev;
436 log = &adapter->sysctllog;
437
438 if ((rnode = ixgbe_sysctl_instance(adapter)) == NULL) {
439 aprint_error_dev(dev, "could not create sysctl root\n");
440 return;
441 }
442
443 if (sysctl_createv(log, 0, &rnode, &cnode,
444 CTLFLAG_READONLY, CTLTYPE_INT,
445 "num_rx_desc", SYSCTL_DESCR("Number of rx descriptors"),
446 NULL, 0, &adapter->num_rx_desc, 0, CTL_CREATE, CTL_EOL) != 0)
447 aprint_error_dev(dev, "could not create sysctl\n");
448
449 if (sysctl_createv(log, 0, &rnode, &cnode,
450 CTLFLAG_READONLY, CTLTYPE_INT,
451 "num_queues", SYSCTL_DESCR("Number of queues"),
452 NULL, 0, &adapter->num_queues, 0, CTL_CREATE, CTL_EOL) != 0)
453 aprint_error_dev(dev, "could not create sysctl\n");
454
455 if (sysctl_createv(log, 0, &rnode, &cnode,
456 CTLFLAG_READWRITE, CTLTYPE_INT,
457 "fc", SYSCTL_DESCR("Flow Control"),
458 ixgbe_set_flowcntl, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0)
459 aprint_error_dev(dev, "could not create sysctl\n");
460
461 /* XXX This is an *instance* sysctl controlling a *global* variable.
462 * XXX It's that way in the FreeBSD driver that this derives from.
463 */
464 if (sysctl_createv(log, 0, &rnode, &cnode,
465 CTLFLAG_READWRITE, CTLTYPE_INT,
466 "enable_aim", SYSCTL_DESCR("Interrupt Moderation"),
467 NULL, 0, &ixgbe_enable_aim, 0, CTL_CREATE, CTL_EOL) != 0)
468 aprint_error_dev(dev, "could not create sysctl\n");
469
470 if (sysctl_createv(log, 0, &rnode, &cnode,
471 CTLFLAG_READWRITE, CTLTYPE_INT,
472 "advertise_speed", SYSCTL_DESCR("Link Speed"),
473 ixgbe_set_advertise, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0)
474 aprint_error_dev(dev, "could not create sysctl\n");
475
476 if (sysctl_createv(log, 0, &rnode, &cnode,
477 CTLFLAG_READWRITE, CTLTYPE_INT,
478 "ts", SYSCTL_DESCR("Thermal Test"),
479 ixgbe_set_thermal_test, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0)
480 aprint_error_dev(dev, "could not create sysctl\n");
481 }
482
483 /*********************************************************************
484 * Device initialization routine
485 *
486 * The attach entry point is called when the driver is being loaded.
487 * This routine identifies the type of hardware, allocates all resources
488 * and initializes the hardware.
489 *
490 * return 0 on success, positive on failure
491 *********************************************************************/
492
493 static void
494 ixgbe_attach(device_t parent, device_t dev, void *aux)
495 {
496 struct adapter *adapter;
497 struct ixgbe_hw *hw;
498 int error = 0;
499 u16 csum;
500 u32 ctrl_ext;
501 ixgbe_vendor_info_t *ent;
502 const struct pci_attach_args *pa = aux;
503
504 INIT_DEBUGOUT("ixgbe_attach: begin");
505
506 /* Allocate, clear, and link in our adapter structure */
507 adapter = device_private(dev);
508 adapter->dev = adapter->osdep.dev = dev;
509 hw = &adapter->hw;
510 adapter->osdep.pc = pa->pa_pc;
511 adapter->osdep.tag = pa->pa_tag;
512 adapter->osdep.dmat = pa->pa_dmat;
513
514 ent = ixgbe_lookup(pa);
515
516 KASSERT(ent != NULL);
517
518 aprint_normal(": %s, Version - %s\n",
519 ixgbe_strings[ent->index], ixgbe_driver_version);
520
521 /* Core Lock Init*/
522 IXGBE_CORE_LOCK_INIT(adapter, device_xname(dev));
523
524 /* SYSCTL APIs */
525
526 ixgbe_sysctl_attach(adapter);
527
528 /* Set up the timer callout */
529 callout_init(&adapter->timer, 0);
530
531 /* Determine hardware revision */
532 ixgbe_identify_hardware(adapter);
533
534 /* Do base PCI setup - map BAR0 */
535 if (ixgbe_allocate_pci_resources(adapter, pa)) {
536 aprint_error_dev(dev, "Allocation of PCI resources failed\n");
537 error = ENXIO;
538 goto err_out;
539 }
540
541 /* Do descriptor calc and sanity checks */
542 if (((ixgbe_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 ||
543 ixgbe_txd < MIN_TXD || ixgbe_txd > MAX_TXD) {
544 aprint_error_dev(dev, "TXD config issue, using default!\n");
545 adapter->num_tx_desc = DEFAULT_TXD;
546 } else
547 adapter->num_tx_desc = ixgbe_txd;
548
549 /*
550 ** With many RX rings it is easy to exceed the
551 ** system mbuf allocation. Tuning nmbclusters
552 ** can alleviate this.
553 */
554 if (nmbclusters > 0 ) {
555 int s;
556 s = (ixgbe_rxd * adapter->num_queues) * ixgbe_total_ports;
557 if (s > nmbclusters) {
558 aprint_error_dev(dev, "RX Descriptors exceed "
559 "system mbuf max, using default instead!\n");
560 ixgbe_rxd = DEFAULT_RXD;
561 }
562 }
563
564 if (((ixgbe_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 ||
565 ixgbe_rxd < MIN_TXD || ixgbe_rxd > MAX_TXD) {
566 aprint_error_dev(dev, "RXD config issue, using default!\n");
567 adapter->num_rx_desc = DEFAULT_RXD;
568 } else
569 adapter->num_rx_desc = ixgbe_rxd;
570
571 /* Allocate our TX/RX Queues */
572 if (ixgbe_allocate_queues(adapter)) {
573 error = ENOMEM;
574 goto err_out;
575 }
576
577 /* Allocate multicast array memory. */
578 adapter->mta = malloc(sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
579 MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
580 if (adapter->mta == NULL) {
581 aprint_error_dev(dev, "Cannot allocate multicast setup array\n");
582 error = ENOMEM;
583 goto err_late;
584 }
585
586 /* Initialize the shared code */
587 error = ixgbe_init_shared_code(hw);
588 if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
589 /*
590 ** No optics in this port, set up
591 ** so the timer routine will probe
592 ** for later insertion.
593 */
594 adapter->sfp_probe = TRUE;
595 error = 0;
596 } else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
597 aprint_error_dev(dev,"Unsupported SFP+ module detected!\n");
598 error = EIO;
599 goto err_late;
600 } else if (error) {
601 aprint_error_dev(dev,"Unable to initialize the shared code\n");
602 error = EIO;
603 goto err_late;
604 }
605
606 /* Make sure we have a good EEPROM before we read from it */
607 if (ixgbe_validate_eeprom_checksum(&adapter->hw, &csum) < 0) {
608 aprint_error_dev(dev,"The EEPROM Checksum Is Not Valid\n");
609 error = EIO;
610 goto err_late;
611 }
612
613 error = ixgbe_init_hw(hw);
614 switch (error) {
615 case IXGBE_ERR_EEPROM_VERSION:
616 aprint_error_dev(dev, "This device is a pre-production adapter/"
617 "LOM. Please be aware there may be issues associated "
618 "with your hardware.\n If you are experiencing problems "
619 "please contact your Intel or hardware representative "
620 "who provided you with this hardware.\n");
621 break;
622 case IXGBE_ERR_SFP_NOT_SUPPORTED:
623 aprint_error_dev(dev,"Unsupported SFP+ Module\n");
624 error = EIO;
625 aprint_error_dev(dev,"Hardware Initialization Failure\n");
626 goto err_late;
627 case IXGBE_ERR_SFP_NOT_PRESENT:
628 device_printf(dev,"No SFP+ Module found\n");
629 /* falls thru */
630 default:
631 break;
632 }
633
634 /* Detect and set physical type */
635 ixgbe_setup_optics(adapter);
636
637 if ((adapter->msix > 1) && (ixgbe_enable_msix))
638 error = ixgbe_allocate_msix(adapter, pa);
639 else
640 error = ixgbe_allocate_legacy(adapter, pa);
641 if (error)
642 goto err_late;
643
644 /* Setup OS specific network interface */
645 if (ixgbe_setup_interface(dev, adapter) != 0)
646 goto err_late;
647
648 /* Initialize statistics */
649 ixgbe_update_stats_counters(adapter);
650
651 /* Print PCIE bus type/speed/width info */
652 ixgbe_get_bus_info(hw);
653 aprint_normal_dev(dev,"PCI Express Bus: Speed %s %s\n",
654 ((hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0Gb/s":
655 (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5Gb/s":"Unknown"),
656 (hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" :
657 (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" :
658 (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" :
659 ("Unknown"));
660
661 if ((hw->bus.width <= ixgbe_bus_width_pcie_x4) &&
662 (hw->bus.speed == ixgbe_bus_speed_2500)) {
663 aprint_error_dev(dev, "PCI-Express bandwidth available"
664 " for this card\n is not sufficient for"
665 " optimal performance.\n");
666 aprint_error_dev(dev, "For optimal performance a x8 "
667 "PCIE, or x4 PCIE 2 slot is required.\n");
668 }
669
670 /* Set an initial default flow control value */
671 adapter->fc = ixgbe_fc_full;
672
673 /* let hardware know driver is loaded */
674 ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
675 ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
676 IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
677
678 ixgbe_add_hw_stats(adapter);
679
680 #ifdef DEV_NETMAP
681 ixgbe_netmap_attach(adapter);
682 #endif /* DEV_NETMAP */
683 INIT_DEBUGOUT("ixgbe_attach: end");
684 return;
685 err_late:
686 ixgbe_free_transmit_structures(adapter);
687 ixgbe_free_receive_structures(adapter);
688 err_out:
689 if (adapter->ifp != NULL)
690 if_free(adapter->ifp);
691 ixgbe_free_pci_resources(adapter);
692 if (adapter->mta != NULL)
693 free(adapter->mta, M_DEVBUF);
694 return;
695
696 }
697
698 /*********************************************************************
699 * Device removal routine
700 *
701 * The detach entry point is called when the driver is being removed.
702 * This routine stops the adapter and deallocates all the resources
703 * that were allocated for driver operation.
704 *
705 * return 0 on success, positive on failure
706 *********************************************************************/
707
708 static int
709 ixgbe_detach(device_t dev, int flags)
710 {
711 struct adapter *adapter = device_private(dev);
712 struct rx_ring *rxr = adapter->rx_rings;
713 struct ixgbe_hw_stats *stats = &adapter->stats;
714 struct ix_queue *que = adapter->queues;
715 struct tx_ring *txr = adapter->tx_rings;
716 u32 ctrl_ext;
717
718 INIT_DEBUGOUT("ixgbe_detach: begin");
719
720 #if NVLAN > 0
721 /* Make sure VLANs are not using driver */
722 if (!VLAN_ATTACHED(&adapter->osdep.ec))
723 ; /* nothing to do: no VLANs */
724 else if ((flags & (DETACH_SHUTDOWN|DETACH_FORCE)) != 0)
725 vlan_ifdetach(adapter->ifp);
726 else {
727 aprint_error_dev(dev, "VLANs in use\n");
728 return EBUSY;
729 }
730 #endif
731
732 IXGBE_CORE_LOCK(adapter);
733 ixgbe_stop(adapter);
734 IXGBE_CORE_UNLOCK(adapter);
735
736 for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
737 #ifndef IXGBE_LEGACY_TX
738 softint_disestablish(txr->txq_si);
739 #endif
740 softint_disestablish(que->que_si);
741 }
742
743 /* Drain the Link queue */
744 softint_disestablish(adapter->link_si);
745 softint_disestablish(adapter->mod_si);
746 softint_disestablish(adapter->msf_si);
747 #ifdef IXGBE_FDIR
748 softint_disestablish(adapter->fdir_si);
749 #endif
750
751 /* let hardware know driver is unloading */
752 ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
753 ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
754 IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext);
755
756 ether_ifdetach(adapter->ifp);
757 callout_halt(&adapter->timer, NULL);
758 #ifdef DEV_NETMAP
759 netmap_detach(adapter->ifp);
760 #endif /* DEV_NETMAP */
761 ixgbe_free_pci_resources(adapter);
762 #if 0 /* XXX the NetBSD port is probably missing something here */
763 bus_generic_detach(dev);
764 #endif
765 if_detach(adapter->ifp);
766
767 sysctl_teardown(&adapter->sysctllog);
768 evcnt_detach(&adapter->handleq);
769 evcnt_detach(&adapter->req);
770 evcnt_detach(&adapter->morerx);
771 evcnt_detach(&adapter->moretx);
772 evcnt_detach(&adapter->txloops);
773 evcnt_detach(&adapter->efbig_tx_dma_setup);
774 evcnt_detach(&adapter->m_defrag_failed);
775 evcnt_detach(&adapter->efbig2_tx_dma_setup);
776 evcnt_detach(&adapter->einval_tx_dma_setup);
777 evcnt_detach(&adapter->other_tx_dma_setup);
778 evcnt_detach(&adapter->eagain_tx_dma_setup);
779 evcnt_detach(&adapter->enomem_tx_dma_setup);
780 evcnt_detach(&adapter->watchdog_events);
781 evcnt_detach(&adapter->tso_err);
782 evcnt_detach(&adapter->link_irq);
783
784 txr = adapter->tx_rings;
785 for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
786 evcnt_detach(&txr->no_desc_avail);
787 evcnt_detach(&txr->total_packets);
788 evcnt_detach(&txr->tso_tx);
789
790 if (i < __arraycount(adapter->stats.mpc)) {
791 evcnt_detach(&adapter->stats.mpc[i]);
792 }
793 if (i < __arraycount(adapter->stats.pxontxc)) {
794 evcnt_detach(&adapter->stats.pxontxc[i]);
795 evcnt_detach(&adapter->stats.pxonrxc[i]);
796 evcnt_detach(&adapter->stats.pxofftxc[i]);
797 evcnt_detach(&adapter->stats.pxoffrxc[i]);
798 evcnt_detach(&adapter->stats.pxon2offc[i]);
799 }
800 if (i < __arraycount(adapter->stats.qprc)) {
801 evcnt_detach(&adapter->stats.qprc[i]);
802 evcnt_detach(&adapter->stats.qptc[i]);
803 evcnt_detach(&adapter->stats.qbrc[i]);
804 evcnt_detach(&adapter->stats.qbtc[i]);
805 evcnt_detach(&adapter->stats.qprdc[i]);
806 }
807
808 evcnt_detach(&rxr->rx_packets);
809 evcnt_detach(&rxr->rx_bytes);
810 evcnt_detach(&rxr->rx_copies);
811 evcnt_detach(&rxr->no_jmbuf);
812 evcnt_detach(&rxr->rx_discarded);
813 evcnt_detach(&rxr->rx_irq);
814 }
815 evcnt_detach(&stats->ipcs);
816 evcnt_detach(&stats->l4cs);
817 evcnt_detach(&stats->ipcs_bad);
818 evcnt_detach(&stats->l4cs_bad);
819 evcnt_detach(&stats->intzero);
820 evcnt_detach(&stats->legint);
821 evcnt_detach(&stats->crcerrs);
822 evcnt_detach(&stats->illerrc);
823 evcnt_detach(&stats->errbc);
824 evcnt_detach(&stats->mspdc);
825 evcnt_detach(&stats->mlfc);
826 evcnt_detach(&stats->mrfc);
827 evcnt_detach(&stats->rlec);
828 evcnt_detach(&stats->lxontxc);
829 evcnt_detach(&stats->lxonrxc);
830 evcnt_detach(&stats->lxofftxc);
831 evcnt_detach(&stats->lxoffrxc);
832
833 /* Packet Reception Stats */
834 evcnt_detach(&stats->tor);
835 evcnt_detach(&stats->gorc);
836 evcnt_detach(&stats->tpr);
837 evcnt_detach(&stats->gprc);
838 evcnt_detach(&stats->mprc);
839 evcnt_detach(&stats->bprc);
840 evcnt_detach(&stats->prc64);
841 evcnt_detach(&stats->prc127);
842 evcnt_detach(&stats->prc255);
843 evcnt_detach(&stats->prc511);
844 evcnt_detach(&stats->prc1023);
845 evcnt_detach(&stats->prc1522);
846 evcnt_detach(&stats->ruc);
847 evcnt_detach(&stats->rfc);
848 evcnt_detach(&stats->roc);
849 evcnt_detach(&stats->rjc);
850 evcnt_detach(&stats->mngprc);
851 evcnt_detach(&stats->xec);
852
853 /* Packet Transmission Stats */
854 evcnt_detach(&stats->gotc);
855 evcnt_detach(&stats->tpt);
856 evcnt_detach(&stats->gptc);
857 evcnt_detach(&stats->bptc);
858 evcnt_detach(&stats->mptc);
859 evcnt_detach(&stats->mngptc);
860 evcnt_detach(&stats->ptc64);
861 evcnt_detach(&stats->ptc127);
862 evcnt_detach(&stats->ptc255);
863 evcnt_detach(&stats->ptc511);
864 evcnt_detach(&stats->ptc1023);
865 evcnt_detach(&stats->ptc1522);
866
867 ixgbe_free_transmit_structures(adapter);
868 ixgbe_free_receive_structures(adapter);
869 free(adapter->mta, M_DEVBUF);
870
871 IXGBE_CORE_LOCK_DESTROY(adapter);
872 return (0);
873 }
874
875 /*********************************************************************
876 *
877 * Shutdown entry point
878 *
879 **********************************************************************/
880
881 #if 0 /* XXX NetBSD ought to register something like this through pmf(9) */
882 static int
883 ixgbe_shutdown(device_t dev)
884 {
885 struct adapter *adapter = device_private(dev);
886 IXGBE_CORE_LOCK(adapter);
887 ixgbe_stop(adapter);
888 IXGBE_CORE_UNLOCK(adapter);
889 return (0);
890 }
891 #endif
892
893
894 #ifdef IXGBE_LEGACY_TX
895 /*********************************************************************
896 * Transmit entry point
897 *
898 * ixgbe_start is called by the stack to initiate a transmit.
899 * The driver will remain in this routine as long as there are
900 * packets to transmit and transmit resources are available.
901 * In case resources are not available stack is notified and
902 * the packet is requeued.
903 **********************************************************************/
904
905 static void
906 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
907 {
908 int rc;
909 struct mbuf *m_head;
910 struct adapter *adapter = txr->adapter;
911
912 IXGBE_TX_LOCK_ASSERT(txr);
913
914 if ((ifp->if_flags & IFF_RUNNING) == 0)
915 return;
916 if (!adapter->link_active)
917 return;
918
919 while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
920 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
921 break;
922
923 IFQ_POLL(&ifp->if_snd, m_head);
924 if (m_head == NULL)
925 break;
926
927 if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
928 break;
929 }
930 IFQ_DEQUEUE(&ifp->if_snd, m_head);
931 if (rc == EFBIG) {
932 struct mbuf *mtmp;
933
934 if ((mtmp = m_defrag(m_head, M_NOWAIT)) != NULL) {
935 m_head = mtmp;
936 rc = ixgbe_xmit(txr, m_head);
937 if (rc != 0)
938 adapter->efbig2_tx_dma_setup.ev_count++;
939 } else
940 adapter->m_defrag_failed.ev_count++;
941 }
942 if (rc != 0) {
943 m_freem(m_head);
944 continue;
945 }
946
947 /* Send a copy of the frame to the BPF listener */
948 bpf_mtap(ifp, m_head);
949
950 /* Set watchdog on */
951 getmicrotime(&txr->watchdog_time);
952 txr->queue_status = IXGBE_QUEUE_WORKING;
953
954 }
955 return;
956 }
957
958 /*
959 * Legacy TX start - called by the stack, this
960 * always uses the first tx ring, and should
961 * not be used with multiqueue tx enabled.
962 */
963 static void
964 ixgbe_start(struct ifnet *ifp)
965 {
966 struct adapter *adapter = ifp->if_softc;
967 struct tx_ring *txr = adapter->tx_rings;
968
969 if (ifp->if_flags & IFF_RUNNING) {
970 IXGBE_TX_LOCK(txr);
971 ixgbe_start_locked(txr, ifp);
972 IXGBE_TX_UNLOCK(txr);
973 }
974 return;
975 }
976
977 #else /* ! IXGBE_LEGACY_TX */
978
979 /*
980 ** Multiqueue Transmit driver
981 **
982 */
983 static int
984 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
985 {
986 struct adapter *adapter = ifp->if_softc;
987 struct ix_queue *que;
988 struct tx_ring *txr;
989 int i = 0, err = 0;
990
991 /* Which queue to use */
992 if ((m->m_flags & M_FLOWID) != 0)
993 i = m->m_pkthdr.flowid % adapter->num_queues;
994 else
995 i = cpu_index(curcpu()) % adapter->num_queues;
996
997 txr = &adapter->tx_rings[i];
998 que = &adapter->queues[i];
999
1000 if (IXGBE_TX_TRYLOCK(txr)) {
1001 err = ixgbe_mq_start_locked(ifp, txr, m);
1002 IXGBE_TX_UNLOCK(txr);
1003 } else {
1004 err = drbr_enqueue(ifp, txr->br, m);
1005 softint_schedule(txr->txq_si);
1006 }
1007
1008 return (err);
1009 }
1010
1011 static int
1012 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
1013 {
1014 struct adapter *adapter = txr->adapter;
1015 struct mbuf *next;
1016 int enqueued, err = 0;
1017
1018 if (((ifp->if_flags & IFF_RUNNING) == 0) ||
1019 adapter->link_active == 0) {
1020 if (m != NULL)
1021 err = drbr_enqueue(ifp, txr->br, m);
1022 return (err);
1023 }
1024
1025 enqueued = 0;
1026 if (m != NULL) {
1027 err = drbr_enqueue(ifp, txr->br, m);
1028 if (err) {
1029 return (err);
1030 }
1031 }
1032
1033 /* Process the queue */
1034 while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1035 if ((err = ixgbe_xmit(txr, &next)) != 0) {
1036 if (next == NULL) {
1037 drbr_advance(ifp, txr->br);
1038 } else {
1039 drbr_putback(ifp, txr->br, next);
1040 }
1041 break;
1042 }
1043 drbr_advance(ifp, txr->br);
1044 enqueued++;
1045 /* Send a copy of the frame to the BPF listener */
1046 bpf_mtap(ifp, next);
1047 if ((ifp->if_flags & IFF_RUNNING) == 0)
1048 break;
1049 if (txr->tx_avail < IXGBE_TX_OP_THRESHOLD)
1050 ixgbe_txeof(txr);
1051 }
1052
1053 if (enqueued > 0) {
1054 /* Set watchdog on */
1055 txr->queue_status = IXGBE_QUEUE_WORKING;
1056 getmicrotime(&txr->watchdog_time);
1057 }
1058
1059 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
1060 ixgbe_txeof(txr);
1061
1062 return (err);
1063 }
1064
1065 /*
1066 * Called from a taskqueue to drain queued transmit packets.
1067 */
1068 static void
1069 ixgbe_deferred_mq_start(void *arg)
1070 {
1071 struct tx_ring *txr = arg;
1072 struct adapter *adapter = txr->adapter;
1073 struct ifnet *ifp = adapter->ifp;
1074
1075 IXGBE_TX_LOCK(txr);
1076 if (!drbr_empty(ifp, txr->br))
1077 ixgbe_mq_start_locked(ifp, txr, NULL);
1078 IXGBE_TX_UNLOCK(txr);
1079 }
1080
1081 /*
1082 ** Flush all ring buffers
1083 */
1084 static void
1085 ixgbe_qflush(struct ifnet *ifp)
1086 {
1087 struct adapter *adapter = ifp->if_softc;
1088 struct tx_ring *txr = adapter->tx_rings;
1089 struct mbuf *m;
1090
1091 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1092 IXGBE_TX_LOCK(txr);
1093 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1094 m_freem(m);
1095 IXGBE_TX_UNLOCK(txr);
1096 }
1097 if_qflush(ifp);
1098 }
1099 #endif /* IXGBE_LEGACY_TX */
1100
1101 static int
1102 ixgbe_ifflags_cb(struct ethercom *ec)
1103 {
1104 struct ifnet *ifp = &ec->ec_if;
1105 struct adapter *adapter = ifp->if_softc;
1106 int change = ifp->if_flags ^ adapter->if_flags, rc = 0;
1107
1108 IXGBE_CORE_LOCK(adapter);
1109
1110 if (change != 0)
1111 adapter->if_flags = ifp->if_flags;
1112
1113 if ((change & ~(IFF_CANTCHANGE|IFF_DEBUG)) != 0)
1114 rc = ENETRESET;
1115 else if ((change & (IFF_PROMISC | IFF_ALLMULTI)) != 0)
1116 ixgbe_set_promisc(adapter);
1117
1118 /* Set up VLAN support and filter */
1119 ixgbe_setup_vlan_hw_support(adapter);
1120
1121 IXGBE_CORE_UNLOCK(adapter);
1122
1123 return rc;
1124 }
1125
1126 /*********************************************************************
1127 * Ioctl entry point
1128 *
1129 * ixgbe_ioctl is called when the user wants to configure the
1130 * interface.
1131 *
1132 * return 0 on success, positive on failure
1133 **********************************************************************/
1134
1135 static int
1136 ixgbe_ioctl(struct ifnet * ifp, u_long command, void *data)
1137 {
1138 struct adapter *adapter = ifp->if_softc;
1139 struct ixgbe_hw *hw = &adapter->hw;
1140 struct ifcapreq *ifcr = data;
1141 struct ifreq *ifr = data;
1142 int error = 0;
1143 int l4csum_en;
1144 const int l4csum = IFCAP_CSUM_TCPv4_Rx|IFCAP_CSUM_UDPv4_Rx|
1145 IFCAP_CSUM_TCPv6_Rx|IFCAP_CSUM_UDPv6_Rx;
1146
1147 switch (command) {
1148 case SIOCSIFFLAGS:
1149 IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)");
1150 break;
1151 case SIOCADDMULTI:
1152 case SIOCDELMULTI:
1153 IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI");
1154 break;
1155 case SIOCSIFMEDIA:
1156 case SIOCGIFMEDIA:
1157 IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)");
1158 break;
1159 case SIOCSIFCAP:
1160 IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)");
1161 break;
1162 case SIOCSIFMTU:
1163 IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)");
1164 break;
1165 default:
1166 IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command);
1167 break;
1168 }
1169
1170 switch (command) {
1171 case SIOCSIFMEDIA:
1172 case SIOCGIFMEDIA:
1173 return ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1174 case SIOCGI2C:
1175 {
1176 struct ixgbe_i2c_req i2c;
1177 IOCTL_DEBUGOUT("ioctl: SIOCGI2C (Get I2C Data)");
1178 error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
1179 if (error)
1180 break;
1181 if ((i2c.dev_addr != 0xA0) || (i2c.dev_addr != 0xA2)){
1182 error = EINVAL;
1183 break;
1184 }
1185 hw->phy.ops.read_i2c_byte(hw, i2c.offset,
1186 i2c.dev_addr, i2c.data);
1187 error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
1188 break;
1189 }
1190 case SIOCSIFCAP:
1191 /* Layer-4 Rx checksum offload has to be turned on and
1192 * off as a unit.
1193 */
1194 l4csum_en = ifcr->ifcr_capenable & l4csum;
1195 if (l4csum_en != l4csum && l4csum_en != 0)
1196 return EINVAL;
1197 /*FALLTHROUGH*/
1198 case SIOCADDMULTI:
1199 case SIOCDELMULTI:
1200 case SIOCSIFFLAGS:
1201 case SIOCSIFMTU:
1202 default:
1203 if ((error = ether_ioctl(ifp, command, data)) != ENETRESET)
1204 return error;
1205 if ((ifp->if_flags & IFF_RUNNING) == 0)
1206 ;
1207 else if (command == SIOCSIFCAP || command == SIOCSIFMTU) {
1208 IXGBE_CORE_LOCK(adapter);
1209 ixgbe_init_locked(adapter);
1210 IXGBE_CORE_UNLOCK(adapter);
1211 } else if (command == SIOCADDMULTI || command == SIOCDELMULTI) {
1212 /*
1213 * Multicast list has changed; set the hardware filter
1214 * accordingly.
1215 */
1216 IXGBE_CORE_LOCK(adapter);
1217 ixgbe_disable_intr(adapter);
1218 ixgbe_set_multi(adapter);
1219 ixgbe_enable_intr(adapter);
1220 IXGBE_CORE_UNLOCK(adapter);
1221 }
1222 return 0;
1223 }
1224
1225 return error;
1226 }
1227
1228 /*********************************************************************
1229 * Init entry point
1230 *
1231 * This routine is used in two ways. It is used by the stack as
1232 * init entry point in network interface structure. It is also used
1233 * by the driver as a hw/sw initialization routine to get to a
1234 * consistent state.
1235 *
1236 * return 0 on success, positive on failure
1237 **********************************************************************/
1238 #define IXGBE_MHADD_MFS_SHIFT 16
1239
1240 static void
1241 ixgbe_init_locked(struct adapter *adapter)
1242 {
1243 struct ifnet *ifp = adapter->ifp;
1244 device_t dev = adapter->dev;
1245 struct ixgbe_hw *hw = &adapter->hw;
1246 u32 k, txdctl, mhadd, gpie;
1247 u32 rxdctl, rxctrl;
1248
1249 /* XXX check IFF_UP and IFF_RUNNING, power-saving state! */
1250
1251 KASSERT(mutex_owned(&adapter->core_mtx));
1252 INIT_DEBUGOUT("ixgbe_init: begin");
1253 hw->adapter_stopped = FALSE;
1254 ixgbe_stop_adapter(hw);
1255 callout_stop(&adapter->timer);
1256
1257 /* XXX I moved this here from the SIOCSIFMTU case in ixgbe_ioctl(). */
1258 adapter->max_frame_size =
1259 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1260
1261 /* reprogram the RAR[0] in case user changed it. */
1262 ixgbe_set_rar(hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
1263
1264 /* Get the latest mac address, User can use a LAA */
1265 memcpy(hw->mac.addr, CLLADDR(adapter->ifp->if_sadl),
1266 IXGBE_ETH_LENGTH_OF_ADDRESS);
1267 ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
1268 hw->addr_ctrl.rar_used_count = 1;
1269
1270 /* Prepare transmit descriptors and buffers */
1271 if (ixgbe_setup_transmit_structures(adapter)) {
1272 device_printf(dev,"Could not setup transmit structures\n");
1273 ixgbe_stop(adapter);
1274 return;
1275 }
1276
1277 ixgbe_init_hw(hw);
1278 ixgbe_initialize_transmit_units(adapter);
1279
1280 /* Setup Multicast table */
1281 ixgbe_set_multi(adapter);
1282
1283 /*
1284 ** Determine the correct mbuf pool
1285 ** for doing jumbo frames
1286 */
1287 if (adapter->max_frame_size <= 2048)
1288 adapter->rx_mbuf_sz = MCLBYTES;
1289 else if (adapter->max_frame_size <= 4096)
1290 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1291 else if (adapter->max_frame_size <= 9216)
1292 adapter->rx_mbuf_sz = MJUM9BYTES;
1293 else
1294 adapter->rx_mbuf_sz = MJUM16BYTES;
1295
1296 /* Prepare receive descriptors and buffers */
1297 if (ixgbe_setup_receive_structures(adapter)) {
1298 device_printf(dev,"Could not setup receive structures\n");
1299 ixgbe_stop(adapter);
1300 return;
1301 }
1302
1303 /* Configure RX settings */
1304 ixgbe_initialize_receive_units(adapter);
1305
1306 gpie = IXGBE_READ_REG(&adapter->hw, IXGBE_GPIE);
1307
1308 /* Enable Fan Failure Interrupt */
1309 gpie |= IXGBE_SDP1_GPIEN;
1310
1311 /* Add for Thermal detection */
1312 if (hw->mac.type == ixgbe_mac_82599EB)
1313 gpie |= IXGBE_SDP2_GPIEN;
1314
1315 /* Thermal Failure Detection */
1316 if (hw->mac.type == ixgbe_mac_X540)
1317 gpie |= IXGBE_SDP0_GPIEN;
1318
1319 if (adapter->msix > 1) {
1320 /* Enable Enhanced MSIX mode */
1321 gpie |= IXGBE_GPIE_MSIX_MODE;
1322 gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
1323 IXGBE_GPIE_OCD;
1324 }
1325 IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
1326
1327 /* Set MTU size */
1328 if (ifp->if_mtu > ETHERMTU) {
1329 mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
1330 mhadd &= ~IXGBE_MHADD_MFS_MASK;
1331 mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
1332 IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
1333 }
1334
1335 /* Now enable all the queues */
1336
1337 for (int i = 0; i < adapter->num_queues; i++) {
1338 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
1339 txdctl |= IXGBE_TXDCTL_ENABLE;
1340 /* Set WTHRESH to 8, burst writeback */
1341 txdctl |= (8 << 16);
1342 /*
1343 * When the internal queue falls below PTHRESH (32),
1344 * start prefetching as long as there are at least
1345 * HTHRESH (1) buffers ready. The values are taken
1346 * from the Intel linux driver 3.8.21.
1347 * Prefetching enables tx line rate even with 1 queue.
1348 */
1349 txdctl |= (32 << 0) | (1 << 8);
1350 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
1351 }
1352
1353 for (int i = 0; i < adapter->num_queues; i++) {
1354 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
1355 if (hw->mac.type == ixgbe_mac_82598EB) {
1356 /*
1357 ** PTHRESH = 21
1358 ** HTHRESH = 4
1359 ** WTHRESH = 8
1360 */
1361 rxdctl &= ~0x3FFFFF;
1362 rxdctl |= 0x080420;
1363 }
1364 rxdctl |= IXGBE_RXDCTL_ENABLE;
1365 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
1366 /* XXX I don't trust this loop, and I don't trust the
1367 * XXX memory barrier. What is this meant to do? --dyoung
1368 */
1369 for (k = 0; k < 10; k++) {
1370 if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
1371 IXGBE_RXDCTL_ENABLE)
1372 break;
1373 else
1374 msec_delay(1);
1375 }
1376 wmb();
1377 #ifdef DEV_NETMAP
1378 /*
1379 * In netmap mode, we must preserve the buffers made
1380 * available to userspace before the if_init()
1381 * (this is true by default on the TX side, because
1382 * init makes all buffers available to userspace).
1383 *
1384 * netmap_reset() and the device specific routines
1385 * (e.g. ixgbe_setup_receive_rings()) map these
1386 * buffers at the end of the NIC ring, so here we
1387 * must set the RDT (tail) register to make sure
1388 * they are not overwritten.
1389 *
1390 * In this driver the NIC ring starts at RDH = 0,
1391 * RDT points to the last slot available for reception (?),
1392 * so RDT = num_rx_desc - 1 means the whole ring is available.
1393 */
1394 if (ifp->if_capenable & IFCAP_NETMAP) {
1395 struct netmap_adapter *na = NA(adapter->ifp);
1396 struct netmap_kring *kring = &na->rx_rings[i];
1397 int t = na->num_rx_desc - 1 - kring->nr_hwavail;
1398
1399 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), t);
1400 } else
1401 #endif /* DEV_NETMAP */
1402 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), adapter->num_rx_desc - 1);
1403 }
1404
1405 /* Set up VLAN support and filter */
1406 ixgbe_setup_vlan_hw_support(adapter);
1407
1408 /* Enable Receive engine */
1409 rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
1410 if (hw->mac.type == ixgbe_mac_82598EB)
1411 rxctrl |= IXGBE_RXCTRL_DMBYPS;
1412 rxctrl |= IXGBE_RXCTRL_RXEN;
1413 ixgbe_enable_rx_dma(hw, rxctrl);
1414
1415 callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
1416
1417 /* Set up MSI/X routing */
1418 if (ixgbe_enable_msix) {
1419 ixgbe_configure_ivars(adapter);
1420 /* Set up auto-mask */
1421 if (hw->mac.type == ixgbe_mac_82598EB)
1422 IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
1423 else {
1424 IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
1425 IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
1426 }
1427 } else { /* Simple settings for Legacy/MSI */
1428 ixgbe_set_ivar(adapter, 0, 0, 0);
1429 ixgbe_set_ivar(adapter, 0, 0, 1);
1430 IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
1431 }
1432
1433 #ifdef IXGBE_FDIR
1434 /* Init Flow director */
1435 if (hw->mac.type != ixgbe_mac_82598EB) {
1436 u32 hdrm = 32 << fdir_pballoc;
1437
1438 hw->mac.ops.setup_rxpba(hw, 0, hdrm, PBA_STRATEGY_EQUAL);
1439 ixgbe_init_fdir_signature_82599(&adapter->hw, fdir_pballoc);
1440 }
1441 #endif
1442
1443 /*
1444 ** Check on any SFP devices that
1445 ** need to be kick-started
1446 */
1447 if (hw->phy.type == ixgbe_phy_none) {
1448 int err = hw->phy.ops.identify(hw);
1449 if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
1450 device_printf(dev,
1451 "Unsupported SFP+ module type was detected.\n");
1452 return;
1453 }
1454 }
1455
1456 /* Set moderation on the Link interrupt */
1457 IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->linkvec), IXGBE_LINK_ITR);
1458
1459 /* Config/Enable Link */
1460 ixgbe_config_link(adapter);
1461
1462 /* Hardware Packet Buffer & Flow Control setup */
1463 {
1464 u32 rxpb, frame, size, tmp;
1465
1466 frame = adapter->max_frame_size;
1467
1468 /* Calculate High Water */
1469 if (hw->mac.type == ixgbe_mac_X540)
1470 tmp = IXGBE_DV_X540(frame, frame);
1471 else
1472 tmp = IXGBE_DV(frame, frame);
1473 size = IXGBE_BT2KB(tmp);
1474 rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
1475 hw->fc.high_water[0] = rxpb - size;
1476
1477 /* Now calculate Low Water */
1478 if (hw->mac.type == ixgbe_mac_X540)
1479 tmp = IXGBE_LOW_DV_X540(frame);
1480 else
1481 tmp = IXGBE_LOW_DV(frame);
1482 hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
1483
1484 hw->fc.requested_mode = adapter->fc;
1485 hw->fc.pause_time = IXGBE_FC_PAUSE;
1486 hw->fc.send_xon = TRUE;
1487 }
1488 /* Initialize the FC settings */
1489 ixgbe_start_hw(hw);
1490
1491 /* And now turn on interrupts */
1492 ixgbe_enable_intr(adapter);
1493
1494 /* Now inform the stack we're ready */
1495 ifp->if_flags |= IFF_RUNNING;
1496
1497 return;
1498 }
1499
1500 static int
1501 ixgbe_init(struct ifnet *ifp)
1502 {
1503 struct adapter *adapter = ifp->if_softc;
1504
1505 IXGBE_CORE_LOCK(adapter);
1506 ixgbe_init_locked(adapter);
1507 IXGBE_CORE_UNLOCK(adapter);
1508 return 0; /* XXX ixgbe_init_locked cannot fail? really? */
1509 }
1510
1511
1512 /*
1513 **
1514 ** MSIX Interrupt Handlers and Tasklets
1515 **
1516 */
1517
1518 static inline void
1519 ixgbe_enable_queue(struct adapter *adapter, u32 vector)
1520 {
1521 struct ixgbe_hw *hw = &adapter->hw;
1522 u64 queue = (u64)(1ULL << vector);
1523 u32 mask;
1524
1525 if (hw->mac.type == ixgbe_mac_82598EB) {
1526 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1527 IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
1528 } else {
1529 mask = (queue & 0xFFFFFFFF);
1530 if (mask)
1531 IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask);
1532 mask = (queue >> 32);
1533 if (mask)
1534 IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask);
1535 }
1536 }
1537
1538 __unused static inline void
1539 ixgbe_disable_queue(struct adapter *adapter, u32 vector)
1540 {
1541 struct ixgbe_hw *hw = &adapter->hw;
1542 u64 queue = (u64)(1ULL << vector);
1543 u32 mask;
1544
1545 if (hw->mac.type == ixgbe_mac_82598EB) {
1546 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1547 IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask);
1548 } else {
1549 mask = (queue & 0xFFFFFFFF);
1550 if (mask)
1551 IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask);
1552 mask = (queue >> 32);
1553 if (mask)
1554 IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask);
1555 }
1556 }
1557
1558 static inline void
1559 ixgbe_rearm_queues(struct adapter *adapter, u64 queues)
1560 {
1561 u32 mask;
1562
1563 if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
1564 mask = (IXGBE_EIMS_RTX_QUEUE & queues);
1565 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, mask);
1566 } else {
1567 mask = (queues & 0xFFFFFFFF);
1568 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(0), mask);
1569 mask = (queues >> 32);
1570 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(1), mask);
1571 }
1572 }
1573
1574
1575 static void
1576 ixgbe_handle_que(void *context)
1577 {
1578 struct ix_queue *que = context;
1579 struct adapter *adapter = que->adapter;
1580 struct tx_ring *txr = que->txr;
1581 struct ifnet *ifp = adapter->ifp;
1582 bool more;
1583
1584 adapter->handleq.ev_count++;
1585
1586 if (ifp->if_flags & IFF_RUNNING) {
1587 more = ixgbe_rxeof(que);
1588 IXGBE_TX_LOCK(txr);
1589 ixgbe_txeof(txr);
1590 #ifndef IXGBE_LEGACY_TX
1591 if (!drbr_empty(ifp, txr->br))
1592 ixgbe_mq_start_locked(ifp, txr, NULL);
1593 #else
1594 if (!IFQ_IS_EMPTY(&ifp->if_snd))
1595 ixgbe_start_locked(txr, ifp);
1596 #endif
1597 IXGBE_TX_UNLOCK(txr);
1598 if (more) {
1599 adapter->req.ev_count++;
1600 softint_schedule(que->que_si);
1601 return;
1602 }
1603 }
1604
1605 /* Reenable this interrupt */
1606 ixgbe_enable_queue(adapter, que->msix);
1607 return;
1608 }
1609
1610
1611 /*********************************************************************
1612 *
1613 * Legacy Interrupt Service routine
1614 *
1615 **********************************************************************/
1616
1617 static int
1618 ixgbe_legacy_irq(void *arg)
1619 {
1620 struct ix_queue *que = arg;
1621 struct adapter *adapter = que->adapter;
1622 struct ifnet *ifp = adapter->ifp;
1623 struct ixgbe_hw *hw = &adapter->hw;
1624 struct tx_ring *txr = adapter->tx_rings;
1625 bool more_tx = false, more_rx = false;
1626 u32 reg_eicr, loop = MAX_LOOP;
1627
1628 reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
1629
1630 adapter->stats.legint.ev_count++;
1631 ++que->irqs;
1632 if (reg_eicr == 0) {
1633 adapter->stats.intzero.ev_count++;
1634 if ((ifp->if_flags & IFF_UP) != 0)
1635 ixgbe_enable_intr(adapter);
1636 return 0;
1637 }
1638
1639 if ((ifp->if_flags & IFF_RUNNING) != 0) {
1640 more_rx = ixgbe_rxeof(que);
1641
1642 IXGBE_TX_LOCK(txr);
1643 do {
1644 adapter->txloops.ev_count++;
1645 more_tx = ixgbe_txeof(txr);
1646 } while (loop-- && more_tx);
1647 IXGBE_TX_UNLOCK(txr);
1648 }
1649
1650 if (more_rx || more_tx) {
1651 if (more_rx)
1652 adapter->morerx.ev_count++;
1653 if (more_tx)
1654 adapter->moretx.ev_count++;
1655 softint_schedule(que->que_si);
1656 }
1657
1658 /* Check for fan failure */
1659 if ((hw->phy.media_type == ixgbe_media_type_copper) &&
1660 (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1661 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
1662 "REPLACE IMMEDIATELY!!\n");
1663 IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1);
1664 }
1665
1666 /* Link status change */
1667 if (reg_eicr & IXGBE_EICR_LSC)
1668 softint_schedule(adapter->link_si);
1669
1670 ixgbe_enable_intr(adapter);
1671 return 1;
1672 }
1673
1674
1675 #if defined(NETBSD_MSI_OR_MSIX)
1676 /*********************************************************************
1677 *
1678 * MSIX Queue Interrupt Service routine
1679 *
1680 **********************************************************************/
1681 void
1682 ixgbe_msix_que(void *arg)
1683 {
1684 struct ix_queue *que = arg;
1685 struct adapter *adapter = que->adapter;
1686 struct tx_ring *txr = que->txr;
1687 struct rx_ring *rxr = que->rxr;
1688 bool more_tx, more_rx;
1689 u32 newitr = 0;
1690
1691 ixgbe_disable_queue(adapter, que->msix);
1692 ++que->irqs;
1693
1694 more_rx = ixgbe_rxeof(que);
1695
1696 IXGBE_TX_LOCK(txr);
1697 more_tx = ixgbe_txeof(txr);
1698 /*
1699 ** Make certain that if the stack
1700 ** has anything queued the task gets
1701 ** scheduled to handle it.
1702 */
1703 #ifdef IXGBE_LEGACY_TX
1704 if (!IFQ_IS_EMPTY(&adapter->ifp->if_snd))
1705 #else
1706 if (!drbr_empty(adapter->ifp, txr->br))
1707 #endif
1708 more_tx = 1;
1709 IXGBE_TX_UNLOCK(txr);
1710
1711 /* Do AIM now? */
1712
1713 if (ixgbe_enable_aim == FALSE)
1714 goto no_calc;
1715 /*
1716 ** Do Adaptive Interrupt Moderation:
1717 ** - Write out last calculated setting
1718 ** - Calculate based on average size over
1719 ** the last interval.
1720 */
1721 if (que->eitr_setting)
1722 IXGBE_WRITE_REG(&adapter->hw,
1723 IXGBE_EITR(que->msix), que->eitr_setting);
1724
1725 que->eitr_setting = 0;
1726
1727 /* Idle, do nothing */
1728 if ((txr->bytes == 0) && (rxr->bytes == 0))
1729 goto no_calc;
1730
1731 if ((txr->bytes) && (txr->packets))
1732 newitr = txr->bytes/txr->packets;
1733 if ((rxr->bytes) && (rxr->packets))
1734 newitr = max(newitr,
1735 (rxr->bytes / rxr->packets));
1736 newitr += 24; /* account for hardware frame, crc */
1737
1738 /* set an upper boundary */
1739 newitr = min(newitr, 3000);
1740
1741 /* Be nice to the mid range */
1742 if ((newitr > 300) && (newitr < 1200))
1743 newitr = (newitr / 3);
1744 else
1745 newitr = (newitr / 2);
1746
1747 if (adapter->hw.mac.type == ixgbe_mac_82598EB)
1748 newitr |= newitr << 16;
1749 else
1750 newitr |= IXGBE_EITR_CNT_WDIS;
1751
1752 /* save for next interrupt */
1753 que->eitr_setting = newitr;
1754
1755 /* Reset state */
1756 txr->bytes = 0;
1757 txr->packets = 0;
1758 rxr->bytes = 0;
1759 rxr->packets = 0;
1760
1761 no_calc:
1762 if (more_tx || more_rx)
1763 softint_schedule(que->que_si);
1764 else /* Reenable this interrupt */
1765 ixgbe_enable_queue(adapter, que->msix);
1766 return;
1767 }
1768
1769
1770 static void
1771 ixgbe_msix_link(void *arg)
1772 {
1773 struct adapter *adapter = arg;
1774 struct ixgbe_hw *hw = &adapter->hw;
1775 u32 reg_eicr;
1776
1777 ++adapter->link_irq.ev_count;
1778
1779 /* First get the cause */
1780 reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICS);
1781 /* Clear interrupt with write */
1782 IXGBE_WRITE_REG(hw, IXGBE_EICR, reg_eicr);
1783
1784 /* Link status change */
1785 if (reg_eicr & IXGBE_EICR_LSC)
1786 softint_schedule(adapter->link_si);
1787
1788 if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
1789 #ifdef IXGBE_FDIR
1790 if (reg_eicr & IXGBE_EICR_FLOW_DIR) {
1791 /* This is probably overkill :) */
1792 if (!atomic_cmpset_int(&adapter->fdir_reinit, 0, 1))
1793 return;
1794 /* Disable the interrupt */
1795 IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FLOW_DIR);
1796 softint_schedule(adapter->fdir_si);
1797 } else
1798 #endif
1799 if (reg_eicr & IXGBE_EICR_ECC) {
1800 device_printf(adapter->dev, "\nCRITICAL: ECC ERROR!! "
1801 "Please Reboot!!\n");
1802 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
1803 } else
1804
1805 if (reg_eicr & IXGBE_EICR_GPI_SDP1) {
1806 /* Clear the interrupt */
1807 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1808 softint_schedule(adapter->msf_si);
1809 } else if (reg_eicr & IXGBE_EICR_GPI_SDP2) {
1810 /* Clear the interrupt */
1811 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP2);
1812 softint_schedule(adapter->mod_si);
1813 }
1814 }
1815
1816 /* Check for fan failure */
1817 if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
1818 (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1819 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
1820 "REPLACE IMMEDIATELY!!\n");
1821 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1822 }
1823
1824 /* Check for over temp condition */
1825 if ((hw->mac.type == ixgbe_mac_X540) &&
1826 (reg_eicr & IXGBE_EICR_TS)) {
1827 device_printf(adapter->dev, "\nCRITICAL: OVER TEMP!! "
1828 "PHY IS SHUT DOWN!!\n");
1829 device_printf(adapter->dev, "System shutdown required\n");
1830 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_TS);
1831 }
1832
1833 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_OTHER);
1834 return;
1835 }
1836 #endif
1837
1838 /*********************************************************************
1839 *
1840 * Media Ioctl callback
1841 *
1842 * This routine is called whenever the user queries the status of
1843 * the interface using ifconfig.
1844 *
1845 **********************************************************************/
1846 static void
1847 ixgbe_media_status(struct ifnet * ifp, struct ifmediareq * ifmr)
1848 {
1849 struct adapter *adapter = ifp->if_softc;
1850
1851 INIT_DEBUGOUT("ixgbe_media_status: begin");
1852 IXGBE_CORE_LOCK(adapter);
1853 ixgbe_update_link_status(adapter);
1854
1855 ifmr->ifm_status = IFM_AVALID;
1856 ifmr->ifm_active = IFM_ETHER;
1857
1858 if (!adapter->link_active) {
1859 IXGBE_CORE_UNLOCK(adapter);
1860 return;
1861 }
1862
1863 ifmr->ifm_status |= IFM_ACTIVE;
1864
1865 switch (adapter->link_speed) {
1866 case IXGBE_LINK_SPEED_100_FULL:
1867 ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
1868 break;
1869 case IXGBE_LINK_SPEED_1GB_FULL:
1870 ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1871 break;
1872 case IXGBE_LINK_SPEED_10GB_FULL:
1873 ifmr->ifm_active |= adapter->optics | IFM_FDX;
1874 break;
1875 }
1876
1877 IXGBE_CORE_UNLOCK(adapter);
1878
1879 return;
1880 }
1881
1882 /*********************************************************************
1883 *
1884 * Media Ioctl callback
1885 *
1886 * This routine is called when the user changes speed/duplex using
1887 * media/mediopt option with ifconfig.
1888 *
1889 **********************************************************************/
1890 static int
1891 ixgbe_media_change(struct ifnet * ifp)
1892 {
1893 struct adapter *adapter = ifp->if_softc;
1894 struct ifmedia *ifm = &adapter->media;
1895
1896 INIT_DEBUGOUT("ixgbe_media_change: begin");
1897
1898 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1899 return (EINVAL);
1900
1901 switch (IFM_SUBTYPE(ifm->ifm_media)) {
1902 case IFM_AUTO:
1903 adapter->hw.phy.autoneg_advertised =
1904 IXGBE_LINK_SPEED_100_FULL |
1905 IXGBE_LINK_SPEED_1GB_FULL |
1906 IXGBE_LINK_SPEED_10GB_FULL;
1907 break;
1908 default:
1909 device_printf(adapter->dev, "Only auto media type\n");
1910 return (EINVAL);
1911 }
1912
1913 return (0);
1914 }
1915
1916 /*********************************************************************
1917 *
1918 * This routine maps the mbufs to tx descriptors, allowing the
1919 * TX engine to transmit the packets.
1920 * - return 0 on success, positive on failure
1921 *
1922 **********************************************************************/
1923
1924 static int
1925 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
1926 {
1927 struct m_tag *mtag;
1928 struct adapter *adapter = txr->adapter;
1929 struct ethercom *ec = &adapter->osdep.ec;
1930 u32 olinfo_status = 0, cmd_type_len;
1931 int i, j, error;
1932 int first;
1933 bus_dmamap_t map;
1934 struct ixgbe_tx_buf *txbuf;
1935 union ixgbe_adv_tx_desc *txd = NULL;
1936
1937 /* Basic descriptor defines */
1938 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
1939 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
1940
1941 if ((mtag = VLAN_OUTPUT_TAG(ec, m_head)) != NULL)
1942 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
1943
1944 /*
1945 * Important to capture the first descriptor
1946 * used because it will contain the index of
1947 * the one we tell the hardware to report back
1948 */
1949 first = txr->next_avail_desc;
1950 txbuf = &txr->tx_buffers[first];
1951 map = txbuf->map;
1952
1953 /*
1954 * Map the packet for DMA.
1955 */
1956 error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map,
1957 m_head, BUS_DMA_NOWAIT);
1958
1959 if (__predict_false(error)) {
1960
1961 switch (error) {
1962 case EAGAIN:
1963 adapter->eagain_tx_dma_setup.ev_count++;
1964 return EAGAIN;
1965 case ENOMEM:
1966 adapter->enomem_tx_dma_setup.ev_count++;
1967 return EAGAIN;
1968 case EFBIG:
1969 /*
1970 * XXX Try it again?
1971 * do m_defrag() and retry bus_dmamap_load_mbuf().
1972 */
1973 adapter->efbig_tx_dma_setup.ev_count++;
1974 return error;
1975 case EINVAL:
1976 adapter->einval_tx_dma_setup.ev_count++;
1977 return error;
1978 default:
1979 adapter->other_tx_dma_setup.ev_count++;
1980 return error;
1981 }
1982 }
1983
1984 /* Make certain there are enough descriptors */
1985 if (map->dm_nsegs > txr->tx_avail - 2) {
1986 txr->no_desc_avail.ev_count++;
1987 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
1988 return EAGAIN;
1989 }
1990
1991 /*
1992 ** Set up the appropriate offload context
1993 ** this will consume the first descriptor
1994 */
1995 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
1996 if (__predict_false(error)) {
1997 return (error);
1998 }
1999
2000 #ifdef IXGBE_FDIR
2001 /* Do the flow director magic */
2002 if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
2003 ++txr->atr_count;
2004 if (txr->atr_count >= atr_sample_rate) {
2005 ixgbe_atr(txr, m_head);
2006 txr->atr_count = 0;
2007 }
2008 }
2009 #endif
2010
2011 i = txr->next_avail_desc;
2012 for (j = 0; j < map->dm_nsegs; j++) {
2013 bus_size_t seglen;
2014 bus_addr_t segaddr;
2015
2016 txbuf = &txr->tx_buffers[i];
2017 txd = &txr->tx_base[i];
2018 seglen = map->dm_segs[j].ds_len;
2019 segaddr = htole64(map->dm_segs[j].ds_addr);
2020
2021 txd->read.buffer_addr = segaddr;
2022 txd->read.cmd_type_len = htole32(txr->txd_cmd |
2023 cmd_type_len |seglen);
2024 txd->read.olinfo_status = htole32(olinfo_status);
2025
2026 if (++i == txr->num_desc)
2027 i = 0;
2028 }
2029
2030 txd->read.cmd_type_len |=
2031 htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
2032 txr->tx_avail -= map->dm_nsegs;
2033 txr->next_avail_desc = i;
2034
2035 txbuf->m_head = m_head;
2036 /*
2037 ** Here we swap the map so the last descriptor,
2038 ** which gets the completion interrupt has the
2039 ** real map, and the first descriptor gets the
2040 ** unused map from this descriptor.
2041 */
2042 txr->tx_buffers[first].map = txbuf->map;
2043 txbuf->map = map;
2044 bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
2045 BUS_DMASYNC_PREWRITE);
2046
2047 /* Set the EOP descriptor that will be marked done */
2048 txbuf = &txr->tx_buffers[first];
2049 txbuf->eop = txd;
2050
2051 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2052 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2053 /*
2054 * Advance the Transmit Descriptor Tail (Tdt), this tells the
2055 * hardware that this frame is available to transmit.
2056 */
2057 ++txr->total_packets.ev_count;
2058 IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), i);
2059
2060 return 0;
2061 }
2062
2063 static void
2064 ixgbe_set_promisc(struct adapter *adapter)
2065 {
2066 struct ether_multi *enm;
2067 struct ether_multistep step;
2068 u_int32_t reg_rctl;
2069 struct ethercom *ec = &adapter->osdep.ec;
2070 struct ifnet *ifp = adapter->ifp;
2071 int mcnt = 0;
2072
2073 reg_rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
2074 reg_rctl &= (~IXGBE_FCTRL_UPE);
2075 if (ifp->if_flags & IFF_ALLMULTI)
2076 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2077 else {
2078 ETHER_FIRST_MULTI(step, ec, enm);
2079 while (enm != NULL) {
2080 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2081 break;
2082 mcnt++;
2083 ETHER_NEXT_MULTI(step, enm);
2084 }
2085 }
2086 if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2087 reg_rctl &= (~IXGBE_FCTRL_MPE);
2088 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
2089
2090 if (ifp->if_flags & IFF_PROMISC) {
2091 reg_rctl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
2092 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
2093 } else if (ifp->if_flags & IFF_ALLMULTI) {
2094 reg_rctl |= IXGBE_FCTRL_MPE;
2095 reg_rctl &= ~IXGBE_FCTRL_UPE;
2096 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
2097 }
2098 return;
2099 }
2100
2101
2102 /*********************************************************************
2103 * Multicast Update
2104 *
2105 * This routine is called whenever multicast address list is updated.
2106 *
2107 **********************************************************************/
2108 #define IXGBE_RAR_ENTRIES 16
2109
2110 static void
2111 ixgbe_set_multi(struct adapter *adapter)
2112 {
2113 struct ether_multi *enm;
2114 struct ether_multistep step;
2115 u32 fctrl;
2116 u8 *mta;
2117 u8 *update_ptr;
2118 int mcnt = 0;
2119 struct ethercom *ec = &adapter->osdep.ec;
2120 struct ifnet *ifp = adapter->ifp;
2121
2122 IOCTL_DEBUGOUT("ixgbe_set_multi: begin");
2123
2124 mta = adapter->mta;
2125 bzero(mta, sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
2126 MAX_NUM_MULTICAST_ADDRESSES);
2127
2128 ifp->if_flags &= ~IFF_ALLMULTI;
2129 ETHER_FIRST_MULTI(step, ec, enm);
2130 while (enm != NULL) {
2131 if ((mcnt == MAX_NUM_MULTICAST_ADDRESSES) ||
2132 (memcmp(enm->enm_addrlo, enm->enm_addrhi,
2133 ETHER_ADDR_LEN) != 0)) {
2134 ifp->if_flags |= IFF_ALLMULTI;
2135 break;
2136 }
2137 bcopy(enm->enm_addrlo,
2138 &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
2139 IXGBE_ETH_LENGTH_OF_ADDRESS);
2140 mcnt++;
2141 ETHER_NEXT_MULTI(step, enm);
2142 }
2143
2144 fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
2145 fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
2146 if (ifp->if_flags & IFF_PROMISC)
2147 fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
2148 else if (ifp->if_flags & IFF_ALLMULTI) {
2149 fctrl |= IXGBE_FCTRL_MPE;
2150 }
2151
2152 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl);
2153
2154 if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) {
2155 update_ptr = mta;
2156 ixgbe_update_mc_addr_list(&adapter->hw,
2157 update_ptr, mcnt, ixgbe_mc_array_itr, TRUE);
2158 }
2159
2160 return;
2161 }
2162
2163 /*
2164 * This is an iterator function now needed by the multicast
2165 * shared code. It simply feeds the shared code routine the
2166 * addresses in the array of ixgbe_set_multi() one by one.
2167 */
2168 static u8 *
2169 ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq)
2170 {
2171 u8 *addr = *update_ptr;
2172 u8 *newptr;
2173 *vmdq = 0;
2174
2175 newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
2176 *update_ptr = newptr;
2177 return addr;
2178 }
2179
2180
2181 /*********************************************************************
2182 * Timer routine
2183 *
2184 * This routine checks for link status,updates statistics,
2185 * and runs the watchdog check.
2186 *
2187 **********************************************************************/
2188
2189 static void
2190 ixgbe_local_timer1(void *arg)
2191 {
2192 struct adapter *adapter = arg;
2193 device_t dev = adapter->dev;
2194 struct ix_queue *que = adapter->queues;
2195 struct tx_ring *txr = adapter->tx_rings;
2196 int hung = 0, paused = 0;
2197
2198 KASSERT(mutex_owned(&adapter->core_mtx));
2199
2200 /* Check for pluggable optics */
2201 if (adapter->sfp_probe)
2202 if (!ixgbe_sfp_probe(adapter))
2203 goto out; /* Nothing to do */
2204
2205 ixgbe_update_link_status(adapter);
2206 ixgbe_update_stats_counters(adapter);
2207
2208 /*
2209 * If the interface has been paused
2210 * then don't do the watchdog check
2211 */
2212 if (IXGBE_READ_REG(&adapter->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF)
2213 paused = 1;
2214
2215 /*
2216 ** Check the TX queues status
2217 ** - watchdog only if all queues show hung
2218 */
2219 for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2220 if ((txr->queue_status == IXGBE_QUEUE_HUNG) &&
2221 (paused == 0))
2222 ++hung;
2223 else if (txr->queue_status == IXGBE_QUEUE_WORKING)
2224 softint_schedule(que->que_si);
2225 }
2226 /* Only truely watchdog if all queues show hung */
2227 if (hung == adapter->num_queues)
2228 goto watchdog;
2229
2230 out:
2231 ixgbe_rearm_queues(adapter, adapter->que_mask);
2232 callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
2233 return;
2234
2235 watchdog:
2236 device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2237 device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2238 IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(txr->me)),
2239 IXGBE_READ_REG(&adapter->hw, IXGBE_TDT(txr->me)));
2240 device_printf(dev,"TX(%d) desc avail = %d,"
2241 "Next TX to Clean = %d\n",
2242 txr->me, txr->tx_avail, txr->next_to_clean);
2243 adapter->ifp->if_flags &= ~IFF_RUNNING;
2244 adapter->watchdog_events.ev_count++;
2245 ixgbe_init_locked(adapter);
2246 }
2247
2248 static void
2249 ixgbe_local_timer(void *arg)
2250 {
2251 struct adapter *adapter = arg;
2252
2253 IXGBE_CORE_LOCK(adapter);
2254 ixgbe_local_timer1(adapter);
2255 IXGBE_CORE_UNLOCK(adapter);
2256 }
2257
2258 /*
2259 ** Note: this routine updates the OS on the link state
2260 ** the real check of the hardware only happens with
2261 ** a link interrupt.
2262 */
2263 static void
2264 ixgbe_update_link_status(struct adapter *adapter)
2265 {
2266 struct ifnet *ifp = adapter->ifp;
2267 device_t dev = adapter->dev;
2268
2269
2270 if (adapter->link_up){
2271 if (adapter->link_active == FALSE) {
2272 if (bootverbose)
2273 device_printf(dev,"Link is up %d Gbps %s \n",
2274 ((adapter->link_speed == 128)? 10:1),
2275 "Full Duplex");
2276 adapter->link_active = TRUE;
2277 /* Update any Flow Control changes */
2278 ixgbe_fc_enable(&adapter->hw);
2279 if_link_state_change(ifp, LINK_STATE_UP);
2280 }
2281 } else { /* Link down */
2282 if (adapter->link_active == TRUE) {
2283 if (bootverbose)
2284 device_printf(dev,"Link is Down\n");
2285 if_link_state_change(ifp, LINK_STATE_DOWN);
2286 adapter->link_active = FALSE;
2287 }
2288 }
2289
2290 return;
2291 }
2292
2293
2294 static void
2295 ixgbe_ifstop(struct ifnet *ifp, int disable)
2296 {
2297 struct adapter *adapter = ifp->if_softc;
2298
2299 IXGBE_CORE_LOCK(adapter);
2300 ixgbe_stop(adapter);
2301 IXGBE_CORE_UNLOCK(adapter);
2302 }
2303
2304 /*********************************************************************
2305 *
2306 * This routine disables all traffic on the adapter by issuing a
2307 * global reset on the MAC and deallocates TX/RX buffers.
2308 *
2309 **********************************************************************/
2310
2311 static void
2312 ixgbe_stop(void *arg)
2313 {
2314 struct ifnet *ifp;
2315 struct adapter *adapter = arg;
2316 struct ixgbe_hw *hw = &adapter->hw;
2317 ifp = adapter->ifp;
2318
2319 KASSERT(mutex_owned(&adapter->core_mtx));
2320
2321 INIT_DEBUGOUT("ixgbe_stop: begin\n");
2322 ixgbe_disable_intr(adapter);
2323 callout_stop(&adapter->timer);
2324
2325 /* Let the stack know...*/
2326 ifp->if_flags &= ~IFF_RUNNING;
2327
2328 ixgbe_reset_hw(hw);
2329 hw->adapter_stopped = FALSE;
2330 ixgbe_stop_adapter(hw);
2331 /* Turn off the laser */
2332 if (hw->phy.multispeed_fiber)
2333 ixgbe_disable_tx_laser(hw);
2334
2335 /* reprogram the RAR[0] in case user changed it. */
2336 ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
2337
2338 return;
2339 }
2340
2341
2342 /*********************************************************************
2343 *
2344 * Determine hardware revision.
2345 *
2346 **********************************************************************/
2347 static void
2348 ixgbe_identify_hardware(struct adapter *adapter)
2349 {
2350 pcitag_t tag;
2351 pci_chipset_tag_t pc;
2352 pcireg_t subid, id;
2353 struct ixgbe_hw *hw = &adapter->hw;
2354
2355 pc = adapter->osdep.pc;
2356 tag = adapter->osdep.tag;
2357
2358 id = pci_conf_read(pc, tag, PCI_ID_REG);
2359 subid = pci_conf_read(pc, tag, PCI_SUBSYS_ID_REG);
2360
2361 /* Save off the information about this board */
2362 hw->vendor_id = PCI_VENDOR(id);
2363 hw->device_id = PCI_PRODUCT(id);
2364 hw->revision_id =
2365 PCI_REVISION(pci_conf_read(pc, tag, PCI_CLASS_REG));
2366 hw->subsystem_vendor_id = PCI_SUBSYS_VENDOR(subid);
2367 hw->subsystem_device_id = PCI_SUBSYS_ID(subid);
2368
2369 /* We need this here to set the num_segs below */
2370 ixgbe_set_mac_type(hw);
2371
2372 /* Pick up the 82599 and VF settings */
2373 if (hw->mac.type != ixgbe_mac_82598EB) {
2374 hw->phy.smart_speed = ixgbe_smart_speed;
2375 adapter->num_segs = IXGBE_82599_SCATTER;
2376 } else
2377 adapter->num_segs = IXGBE_82598_SCATTER;
2378
2379 return;
2380 }
2381
2382 /*********************************************************************
2383 *
2384 * Determine optic type
2385 *
2386 **********************************************************************/
2387 static void
2388 ixgbe_setup_optics(struct adapter *adapter)
2389 {
2390 struct ixgbe_hw *hw = &adapter->hw;
2391 int layer;
2392
2393 layer = ixgbe_get_supported_physical_layer(hw);
2394
2395 if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
2396 adapter->optics = IFM_10G_T;
2397 return;
2398 }
2399
2400 if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
2401 adapter->optics = IFM_1000_T;
2402 return;
2403 }
2404
2405 if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) {
2406 adapter->optics = IFM_1000_SX;
2407 return;
2408 }
2409
2410 if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR |
2411 IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) {
2412 adapter->optics = IFM_10G_LR;
2413 return;
2414 }
2415
2416 if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
2417 adapter->optics = IFM_10G_SR;
2418 return;
2419 }
2420
2421 if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) {
2422 adapter->optics = IFM_10G_TWINAX;
2423 return;
2424 }
2425
2426 if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 |
2427 IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) {
2428 adapter->optics = IFM_10G_CX4;
2429 return;
2430 }
2431
2432 /* If we get here just set the default */
2433 adapter->optics = IFM_ETHER | IFM_AUTO;
2434 return;
2435 }
2436
2437 /*********************************************************************
2438 *
2439 * Setup the Legacy or MSI Interrupt handler
2440 *
2441 **********************************************************************/
2442 static int
2443 ixgbe_allocate_legacy(struct adapter *adapter, const struct pci_attach_args *pa)
2444 {
2445 device_t dev = adapter->dev;
2446 struct ix_queue *que = adapter->queues;
2447 #ifndef IXGBE_LEGACY_TX
2448 struct tx_ring *txr = adapter->tx_rings;
2449 #endif
2450 char intrbuf[PCI_INTRSTR_LEN];
2451 #if 0
2452 int rid = 0;
2453
2454 /* MSI RID at 1 */
2455 if (adapter->msix == 1)
2456 rid = 1;
2457 #endif
2458
2459 /* We allocate a single interrupt resource */
2460 if (pci_intr_map(pa, &adapter->osdep.ih) != 0) {
2461 aprint_error_dev(dev, "unable to map interrupt\n");
2462 return ENXIO;
2463 } else {
2464 aprint_normal_dev(dev, "interrupting at %s\n",
2465 pci_intr_string(adapter->osdep.pc, adapter->osdep.ih,
2466 intrbuf, sizeof(intrbuf)));
2467 }
2468
2469 /*
2470 * Try allocating a fast interrupt and the associated deferred
2471 * processing contexts.
2472 */
2473 #ifndef IXGBE_LEGACY_TX
2474 txr->txq_si = softint_establish(SOFTINT_NET, ixgbe_deferred_mq_start,
2475 txr);
2476 #endif
2477 que->que_si = softint_establish(SOFTINT_NET, ixgbe_handle_que, que);
2478
2479 /* Tasklets for Link, SFP and Multispeed Fiber */
2480 adapter->link_si =
2481 softint_establish(SOFTINT_NET, ixgbe_handle_link, adapter);
2482 adapter->mod_si =
2483 softint_establish(SOFTINT_NET, ixgbe_handle_mod, adapter);
2484 adapter->msf_si =
2485 softint_establish(SOFTINT_NET, ixgbe_handle_msf, adapter);
2486
2487 #ifdef IXGBE_FDIR
2488 adapter->fdir_si =
2489 softint_establish(SOFTINT_NET, ixgbe_reinit_fdir, adapter);
2490 #endif
2491 if (que->que_si == NULL ||
2492 adapter->link_si == NULL ||
2493 adapter->mod_si == NULL ||
2494 #ifdef IXGBE_FDIR
2495 adapter->fdir_si == NULL ||
2496 #endif
2497 adapter->msf_si == NULL) {
2498 aprint_error_dev(dev,
2499 "could not establish software interrupts\n");
2500 return ENXIO;
2501 }
2502
2503 adapter->osdep.intr = pci_intr_establish(adapter->osdep.pc,
2504 adapter->osdep.ih, IPL_NET, ixgbe_legacy_irq, que);
2505 if (adapter->osdep.intr == NULL) {
2506 aprint_error_dev(dev, "failed to register interrupt handler\n");
2507 softint_disestablish(que->que_si);
2508 softint_disestablish(adapter->link_si);
2509 softint_disestablish(adapter->mod_si);
2510 softint_disestablish(adapter->msf_si);
2511 #ifdef IXGBE_FDIR
2512 softint_disestablish(adapter->fdir_si);
2513 #endif
2514 return ENXIO;
2515 }
2516 /* For simplicity in the handlers */
2517 adapter->que_mask = IXGBE_EIMS_ENABLE_MASK;
2518
2519 return (0);
2520 }
2521
2522
2523 /*********************************************************************
2524 *
2525 * Setup MSIX Interrupt resources and handlers
2526 *
2527 **********************************************************************/
2528 static int
2529 ixgbe_allocate_msix(struct adapter *adapter, const struct pci_attach_args *pa)
2530 {
2531 #if !defined(NETBSD_MSI_OR_MSIX)
2532 return 0;
2533 #else
2534 device_t dev = adapter->dev;
2535 struct ix_queue *que = adapter->queues;
2536 struct tx_ring *txr = adapter->tx_rings;
2537 int error, rid, vector = 0;
2538
2539 for (int i = 0; i < adapter->num_queues; i++, vector++, que++, txr++) {
2540 rid = vector + 1;
2541 que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
2542 RF_SHAREABLE | RF_ACTIVE);
2543 if (que->res == NULL) {
2544 aprint_error_dev(dev,"Unable to allocate"
2545 " bus resource: que interrupt [%d]\n", vector);
2546 return (ENXIO);
2547 }
2548 /* Set the handler function */
2549 error = bus_setup_intr(dev, que->res,
2550 INTR_TYPE_NET | INTR_MPSAFE, NULL,
2551 ixgbe_msix_que, que, &que->tag);
2552 if (error) {
2553 que->res = NULL;
2554 aprint_error_dev(dev,
2555 "Failed to register QUE handler\n");
2556 return error;
2557 }
2558 #if __FreeBSD_version >= 800504
2559 bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2560 #endif
2561 que->msix = vector;
2562 adapter->que_mask |= (u64)(1 << que->msix);
2563 /*
2564 ** Bind the msix vector, and thus the
2565 ** ring to the corresponding cpu.
2566 */
2567 if (adapter->num_queues > 1)
2568 bus_bind_intr(dev, que->res, i);
2569
2570 #ifndef IXGBE_LEGACY_TX
2571 txr->txq_si = softint_establish(SOFTINT_NET,
2572 ixgbe_deferred_mq_start, txr);
2573 #endif
2574 que->que_si = softint_establish(SOFTINT_NET, ixgbe_handle_que,
2575 que);
2576 if (que->que_si == NULL) {
2577 aprint_error_dev(dev,
2578 "could not establish software interrupt\n");
2579 }
2580 }
2581
2582 /* and Link */
2583 rid = vector + 1;
2584 adapter->res = bus_alloc_resource_any(dev,
2585 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2586 if (!adapter->res) {
2587 aprint_error_dev(dev,"Unable to allocate bus resource: "
2588 "Link interrupt [%d]\n", rid);
2589 return (ENXIO);
2590 }
2591 /* Set the link handler function */
2592 error = bus_setup_intr(dev, adapter->res,
2593 INTR_TYPE_NET | INTR_MPSAFE, NULL,
2594 ixgbe_msix_link, adapter, &adapter->tag);
2595 if (error) {
2596 adapter->res = NULL;
2597 aprint_error_dev(dev, "Failed to register LINK handler\n");
2598 return (error);
2599 }
2600 #if __FreeBSD_version >= 800504
2601 bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2602 #endif
2603 adapter->linkvec = vector;
2604 /* Tasklets for Link, SFP and Multispeed Fiber */
2605 adapter->link_si =
2606 softint_establish(SOFTINT_NET, ixgbe_handle_link, adapter);
2607 adapter->mod_si =
2608 softint_establish(SOFTINT_NET, ixgbe_handle_mod, adapter);
2609 adapter->msf_si =
2610 softint_establish(SOFTINT_NET, ixgbe_handle_msf, adapter);
2611 #ifdef IXGBE_FDIR
2612 adapter->fdir_si =
2613 softint_establish(SOFTINT_NET, ixgbe_reinit_fdir, adapter);
2614 #endif
2615
2616 return (0);
2617 #endif
2618 }
2619
2620 /*
2621 * Setup Either MSI/X or MSI
2622 */
2623 static int
2624 ixgbe_setup_msix(struct adapter *adapter)
2625 {
2626 #if !defined(NETBSD_MSI_OR_MSIX)
2627 return 0;
2628 #else
2629 device_t dev = adapter->dev;
2630 int rid, want, queues, msgs;
2631
2632 /* Override by tuneable */
2633 if (ixgbe_enable_msix == 0)
2634 goto msi;
2635
2636 /* First try MSI/X */
2637 rid = PCI_BAR(MSIX_82598_BAR);
2638 adapter->msix_mem = bus_alloc_resource_any(dev,
2639 SYS_RES_MEMORY, &rid, RF_ACTIVE);
2640 if (!adapter->msix_mem) {
2641 rid += 4; /* 82599 maps in higher BAR */
2642 adapter->msix_mem = bus_alloc_resource_any(dev,
2643 SYS_RES_MEMORY, &rid, RF_ACTIVE);
2644 }
2645 if (!adapter->msix_mem) {
2646 /* May not be enabled */
2647 device_printf(adapter->dev,
2648 "Unable to map MSIX table \n");
2649 goto msi;
2650 }
2651
2652 msgs = pci_msix_count(dev);
2653 if (msgs == 0) { /* system has msix disabled */
2654 bus_release_resource(dev, SYS_RES_MEMORY,
2655 rid, adapter->msix_mem);
2656 adapter->msix_mem = NULL;
2657 goto msi;
2658 }
2659
2660 /* Figure out a reasonable auto config value */
2661 queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2662
2663 if (ixgbe_num_queues != 0)
2664 queues = ixgbe_num_queues;
2665 /* Set max queues to 8 when autoconfiguring */
2666 else if ((ixgbe_num_queues == 0) && (queues > 8))
2667 queues = 8;
2668
2669 /*
2670 ** Want one vector (RX/TX pair) per queue
2671 ** plus an additional for Link.
2672 */
2673 want = queues + 1;
2674 if (msgs >= want)
2675 msgs = want;
2676 else {
2677 device_printf(adapter->dev,
2678 "MSIX Configuration Problem, "
2679 "%d vectors but %d queues wanted!\n",
2680 msgs, want);
2681 return (0); /* Will go to Legacy setup */
2682 }
2683 if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2684 device_printf(adapter->dev,
2685 "Using MSIX interrupts with %d vectors\n", msgs);
2686 adapter->num_queues = queues;
2687 return (msgs);
2688 }
2689 msi:
2690 msgs = pci_msi_count(dev);
2691 if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2692 device_printf(adapter->dev,"Using an MSI interrupt\n");
2693 else
2694 device_printf(adapter->dev,"Using a Legacy interrupt\n");
2695 return (msgs);
2696 #endif
2697 }
2698
2699
2700 static int
2701 ixgbe_allocate_pci_resources(struct adapter *adapter, const struct pci_attach_args *pa)
2702 {
2703 pcireg_t memtype;
2704 device_t dev = adapter->dev;
2705 bus_addr_t addr;
2706 int flags;
2707
2708 memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, PCI_BAR(0));
2709 switch (memtype) {
2710 case PCI_MAPREG_TYPE_MEM | PCI_MAPREG_MEM_TYPE_32BIT:
2711 case PCI_MAPREG_TYPE_MEM | PCI_MAPREG_MEM_TYPE_64BIT:
2712 adapter->osdep.mem_bus_space_tag = pa->pa_memt;
2713 if (pci_mapreg_info(pa->pa_pc, pa->pa_tag, PCI_BAR(0),
2714 memtype, &addr, &adapter->osdep.mem_size, &flags) != 0)
2715 goto map_err;
2716 if ((flags & BUS_SPACE_MAP_PREFETCHABLE) != 0) {
2717 aprint_normal_dev(dev, "clearing prefetchable bit\n");
2718 flags &= ~BUS_SPACE_MAP_PREFETCHABLE;
2719 }
2720 if (bus_space_map(adapter->osdep.mem_bus_space_tag, addr,
2721 adapter->osdep.mem_size, flags,
2722 &adapter->osdep.mem_bus_space_handle) != 0) {
2723 map_err:
2724 adapter->osdep.mem_size = 0;
2725 aprint_error_dev(dev, "unable to map BAR0\n");
2726 return ENXIO;
2727 }
2728 break;
2729 default:
2730 aprint_error_dev(dev, "unexpected type on BAR0\n");
2731 return ENXIO;
2732 }
2733
2734 /* Legacy defaults */
2735 adapter->num_queues = 1;
2736 adapter->hw.back = &adapter->osdep;
2737
2738 /*
2739 ** Now setup MSI or MSI/X, should
2740 ** return us the number of supported
2741 ** vectors. (Will be 1 for MSI)
2742 */
2743 adapter->msix = ixgbe_setup_msix(adapter);
2744 return (0);
2745 }
2746
2747 static void
2748 ixgbe_free_pci_resources(struct adapter * adapter)
2749 {
2750 #if defined(NETBSD_MSI_OR_MSIX)
2751 struct ix_queue *que = adapter->queues;
2752 device_t dev = adapter->dev;
2753 #endif
2754 int rid;
2755
2756 #if defined(NETBSD_MSI_OR_MSIX)
2757 int memrid;
2758 if (adapter->hw.mac.type == ixgbe_mac_82598EB)
2759 memrid = PCI_BAR(MSIX_82598_BAR);
2760 else
2761 memrid = PCI_BAR(MSIX_82599_BAR);
2762
2763 /*
2764 ** There is a slight possibility of a failure mode
2765 ** in attach that will result in entering this function
2766 ** before interrupt resources have been initialized, and
2767 ** in that case we do not want to execute the loops below
2768 ** We can detect this reliably by the state of the adapter
2769 ** res pointer.
2770 */
2771 if (adapter->res == NULL)
2772 goto mem;
2773
2774 /*
2775 ** Release all msix queue resources:
2776 */
2777 for (int i = 0; i < adapter->num_queues; i++, que++) {
2778 rid = que->msix + 1;
2779 if (que->tag != NULL) {
2780 bus_teardown_intr(dev, que->res, que->tag);
2781 que->tag = NULL;
2782 }
2783 if (que->res != NULL)
2784 bus_release_resource(dev, SYS_RES_IRQ, rid, que->res);
2785 }
2786 #endif
2787
2788 /* Clean the Legacy or Link interrupt last */
2789 if (adapter->linkvec) /* we are doing MSIX */
2790 rid = adapter->linkvec + 1;
2791 else
2792 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2793
2794 pci_intr_disestablish(adapter->osdep.pc, adapter->osdep.intr);
2795 adapter->osdep.intr = NULL;
2796
2797 #if defined(NETBSD_MSI_OR_MSIX)
2798 mem:
2799 if (adapter->msix)
2800 pci_release_msi(dev);
2801
2802 if (adapter->msix_mem != NULL)
2803 bus_release_resource(dev, SYS_RES_MEMORY,
2804 memrid, adapter->msix_mem);
2805 #endif
2806
2807 if (adapter->osdep.mem_size != 0) {
2808 bus_space_unmap(adapter->osdep.mem_bus_space_tag,
2809 adapter->osdep.mem_bus_space_handle,
2810 adapter->osdep.mem_size);
2811 }
2812
2813 return;
2814 }
2815
2816 /*********************************************************************
2817 *
2818 * Setup networking device structure and register an interface.
2819 *
2820 **********************************************************************/
2821 static int
2822 ixgbe_setup_interface(device_t dev, struct adapter *adapter)
2823 {
2824 struct ethercom *ec = &adapter->osdep.ec;
2825 struct ixgbe_hw *hw = &adapter->hw;
2826 struct ifnet *ifp;
2827
2828 INIT_DEBUGOUT("ixgbe_setup_interface: begin");
2829
2830 ifp = adapter->ifp = &ec->ec_if;
2831 strlcpy(ifp->if_xname, device_xname(dev), IFNAMSIZ);
2832 ifp->if_baudrate = IF_Gbps(10);
2833 ifp->if_init = ixgbe_init;
2834 ifp->if_stop = ixgbe_ifstop;
2835 ifp->if_softc = adapter;
2836 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2837 ifp->if_ioctl = ixgbe_ioctl;
2838 #ifndef IXGBE_LEGACY_TX
2839 ifp->if_transmit = ixgbe_mq_start;
2840 ifp->if_qflush = ixgbe_qflush;
2841 #else
2842 ifp->if_start = ixgbe_start;
2843 IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 2);
2844 #endif
2845
2846 if_attach(ifp);
2847 ether_ifattach(ifp, adapter->hw.mac.addr);
2848 ether_set_ifflags_cb(ec, ixgbe_ifflags_cb);
2849
2850 adapter->max_frame_size =
2851 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
2852
2853 /*
2854 * Tell the upper layer(s) we support long frames.
2855 */
2856 ifp->if_hdrlen = sizeof(struct ether_vlan_header);
2857
2858 ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSOv4 | IFCAP_TSOv6;
2859 ifp->if_capenable = 0;
2860
2861 ec->ec_capabilities |= ETHERCAP_VLAN_HWCSUM;
2862 ec->ec_capabilities |= ETHERCAP_JUMBO_MTU;
2863 ifp->if_capabilities |= IFCAP_LRO;
2864 ec->ec_capabilities |= ETHERCAP_VLAN_HWTAGGING
2865 | ETHERCAP_VLAN_MTU;
2866 ec->ec_capenable = ec->ec_capabilities;
2867
2868 /*
2869 ** Don't turn this on by default, if vlans are
2870 ** created on another pseudo device (eg. lagg)
2871 ** then vlan events are not passed thru, breaking
2872 ** operation, but with HW FILTER off it works. If
2873 ** using vlans directly on the ixgbe driver you can
2874 ** enable this and get full hardware tag filtering.
2875 */
2876 ec->ec_capabilities |= ETHERCAP_VLAN_HWFILTER;
2877
2878 /*
2879 * Specify the media types supported by this adapter and register
2880 * callbacks to update media and link information
2881 */
2882 ifmedia_init(&adapter->media, IFM_IMASK, ixgbe_media_change,
2883 ixgbe_media_status);
2884 ifmedia_add(&adapter->media, IFM_ETHER | adapter->optics, 0, NULL);
2885 ifmedia_set(&adapter->media, IFM_ETHER | adapter->optics);
2886 if (hw->device_id == IXGBE_DEV_ID_82598AT) {
2887 ifmedia_add(&adapter->media,
2888 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2889 ifmedia_add(&adapter->media,
2890 IFM_ETHER | IFM_1000_T, 0, NULL);
2891 }
2892 ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2893 ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2894
2895 return (0);
2896 }
2897
2898 static void
2899 ixgbe_config_link(struct adapter *adapter)
2900 {
2901 struct ixgbe_hw *hw = &adapter->hw;
2902 u32 autoneg, err = 0;
2903 bool sfp, negotiate;
2904
2905 sfp = ixgbe_is_sfp(hw);
2906
2907 if (sfp) {
2908 void *ip;
2909
2910 if (hw->phy.multispeed_fiber) {
2911 hw->mac.ops.setup_sfp(hw);
2912 ixgbe_enable_tx_laser(hw);
2913 ip = adapter->msf_si;
2914 } else {
2915 ip = adapter->mod_si;
2916 }
2917
2918 kpreempt_disable();
2919 softint_schedule(ip);
2920 kpreempt_enable();
2921 } else {
2922 if (hw->mac.ops.check_link)
2923 err = ixgbe_check_link(hw, &adapter->link_speed,
2924 &adapter->link_up, FALSE);
2925 if (err)
2926 goto out;
2927 autoneg = hw->phy.autoneg_advertised;
2928 if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
2929 err = hw->mac.ops.get_link_capabilities(hw,
2930 &autoneg, &negotiate);
2931 else
2932 negotiate = 0;
2933 if (err)
2934 goto out;
2935 if (hw->mac.ops.setup_link)
2936 err = hw->mac.ops.setup_link(hw,
2937 autoneg, adapter->link_up);
2938 }
2939 out:
2940 return;
2941 }
2942
2943 /********************************************************************
2944 * Manage DMA'able memory.
2945 *******************************************************************/
2946
2947 static int
2948 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
2949 struct ixgbe_dma_alloc *dma, const int mapflags)
2950 {
2951 device_t dev = adapter->dev;
2952 int r, rsegs;
2953
2954 r = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */
2955 DBA_ALIGN, 0, /* alignment, bounds */
2956 size, /* maxsize */
2957 1, /* nsegments */
2958 size, /* maxsegsize */
2959 BUS_DMA_ALLOCNOW, /* flags */
2960 &dma->dma_tag);
2961 if (r != 0) {
2962 aprint_error_dev(dev,
2963 "%s: ixgbe_dma_tag_create failed; error %d\n", __func__, r);
2964 goto fail_0;
2965 }
2966
2967 r = bus_dmamem_alloc(dma->dma_tag->dt_dmat,
2968 size,
2969 dma->dma_tag->dt_alignment,
2970 dma->dma_tag->dt_boundary,
2971 &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
2972 if (r != 0) {
2973 aprint_error_dev(dev,
2974 "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
2975 goto fail_1;
2976 }
2977
2978 r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
2979 size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
2980 if (r != 0) {
2981 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2982 __func__, r);
2983 goto fail_2;
2984 }
2985
2986 r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
2987 if (r != 0) {
2988 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2989 __func__, r);
2990 goto fail_3;
2991 }
2992
2993 r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map, dma->dma_vaddr,
2994 size,
2995 NULL,
2996 mapflags | BUS_DMA_NOWAIT);
2997 if (r != 0) {
2998 aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
2999 __func__, r);
3000 goto fail_4;
3001 }
3002 dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
3003 dma->dma_size = size;
3004 return 0;
3005 fail_4:
3006 ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
3007 fail_3:
3008 bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
3009 fail_2:
3010 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
3011 fail_1:
3012 ixgbe_dma_tag_destroy(dma->dma_tag);
3013 fail_0:
3014 return r;
3015 }
3016
3017 static void
3018 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
3019 {
3020 bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
3021 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3022 ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
3023 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
3024 ixgbe_dma_tag_destroy(dma->dma_tag);
3025 }
3026
3027
3028 /*********************************************************************
3029 *
3030 * Allocate memory for the transmit and receive rings, and then
3031 * the descriptors associated with each, called only once at attach.
3032 *
3033 **********************************************************************/
3034 static int
3035 ixgbe_allocate_queues(struct adapter *adapter)
3036 {
3037 device_t dev = adapter->dev;
3038 struct ix_queue *que;
3039 struct tx_ring *txr;
3040 struct rx_ring *rxr;
3041 int rsize, tsize, error = IXGBE_SUCCESS;
3042 int txconf = 0, rxconf = 0;
3043
3044 /* First allocate the top level queue structs */
3045 if (!(adapter->queues =
3046 (struct ix_queue *) malloc(sizeof(struct ix_queue) *
3047 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3048 aprint_error_dev(dev, "Unable to allocate queue memory\n");
3049 error = ENOMEM;
3050 goto fail;
3051 }
3052
3053 /* First allocate the TX ring struct memory */
3054 if (!(adapter->tx_rings =
3055 (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3056 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3057 aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
3058 error = ENOMEM;
3059 goto tx_fail;
3060 }
3061
3062 /* Next allocate the RX */
3063 if (!(adapter->rx_rings =
3064 (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3065 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3066 aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
3067 error = ENOMEM;
3068 goto rx_fail;
3069 }
3070
3071 /* For the ring itself */
3072 tsize = roundup2(adapter->num_tx_desc *
3073 sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
3074
3075 /*
3076 * Now set up the TX queues, txconf is needed to handle the
3077 * possibility that things fail midcourse and we need to
3078 * undo memory gracefully
3079 */
3080 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3081 /* Set up some basics */
3082 txr = &adapter->tx_rings[i];
3083 txr->adapter = adapter;
3084 txr->me = i;
3085 txr->num_desc = adapter->num_tx_desc;
3086
3087 /* Initialize the TX side lock */
3088 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3089 device_xname(dev), txr->me);
3090 mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
3091
3092 if (ixgbe_dma_malloc(adapter, tsize,
3093 &txr->txdma, BUS_DMA_NOWAIT)) {
3094 aprint_error_dev(dev,
3095 "Unable to allocate TX Descriptor memory\n");
3096 error = ENOMEM;
3097 goto err_tx_desc;
3098 }
3099 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
3100 bzero((void *)txr->tx_base, tsize);
3101
3102 /* Now allocate transmit buffers for the ring */
3103 if (ixgbe_allocate_transmit_buffers(txr)) {
3104 aprint_error_dev(dev,
3105 "Critical Failure setting up transmit buffers\n");
3106 error = ENOMEM;
3107 goto err_tx_desc;
3108 }
3109 #ifndef IXGBE_LEGACY_TX
3110 /* Allocate a buf ring */
3111 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
3112 M_WAITOK, &txr->tx_mtx);
3113 if (txr->br == NULL) {
3114 aprint_error_dev(dev,
3115 "Critical Failure setting up buf ring\n");
3116 error = ENOMEM;
3117 goto err_tx_desc;
3118 }
3119 #endif
3120 }
3121
3122 /*
3123 * Next the RX queues...
3124 */
3125 rsize = roundup2(adapter->num_rx_desc *
3126 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
3127 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3128 rxr = &adapter->rx_rings[i];
3129 /* Set up some basics */
3130 rxr->adapter = adapter;
3131 rxr->me = i;
3132 rxr->num_desc = adapter->num_rx_desc;
3133
3134 /* Initialize the RX side lock */
3135 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3136 device_xname(dev), rxr->me);
3137 mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
3138
3139 if (ixgbe_dma_malloc(adapter, rsize,
3140 &rxr->rxdma, BUS_DMA_NOWAIT)) {
3141 aprint_error_dev(dev,
3142 "Unable to allocate RxDescriptor memory\n");
3143 error = ENOMEM;
3144 goto err_rx_desc;
3145 }
3146 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3147 bzero((void *)rxr->rx_base, rsize);
3148
3149 /* Allocate receive buffers for the ring*/
3150 if (ixgbe_allocate_receive_buffers(rxr)) {
3151 aprint_error_dev(dev,
3152 "Critical Failure setting up receive buffers\n");
3153 error = ENOMEM;
3154 goto err_rx_desc;
3155 }
3156 }
3157
3158 /*
3159 ** Finally set up the queue holding structs
3160 */
3161 for (int i = 0; i < adapter->num_queues; i++) {
3162 que = &adapter->queues[i];
3163 que->adapter = adapter;
3164 que->txr = &adapter->tx_rings[i];
3165 que->rxr = &adapter->rx_rings[i];
3166 }
3167
3168 return (0);
3169
3170 err_rx_desc:
3171 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3172 ixgbe_dma_free(adapter, &rxr->rxdma);
3173 err_tx_desc:
3174 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3175 ixgbe_dma_free(adapter, &txr->txdma);
3176 free(adapter->rx_rings, M_DEVBUF);
3177 rx_fail:
3178 free(adapter->tx_rings, M_DEVBUF);
3179 tx_fail:
3180 free(adapter->queues, M_DEVBUF);
3181 fail:
3182 return (error);
3183 }
3184
3185 /*********************************************************************
3186 *
3187 * Allocate memory for tx_buffer structures. The tx_buffer stores all
3188 * the information needed to transmit a packet on the wire. This is
3189 * called only once at attach, setup is done every reset.
3190 *
3191 **********************************************************************/
3192 static int
3193 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
3194 {
3195 struct adapter *adapter = txr->adapter;
3196 device_t dev = adapter->dev;
3197 struct ixgbe_tx_buf *txbuf;
3198 int error, i;
3199
3200 /*
3201 * Setup DMA descriptor areas.
3202 */
3203 if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */
3204 1, 0, /* alignment, bounds */
3205 IXGBE_TSO_SIZE, /* maxsize */
3206 adapter->num_segs, /* nsegments */
3207 PAGE_SIZE, /* maxsegsize */
3208 0, /* flags */
3209 &txr->txtag))) {
3210 aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
3211 goto fail;
3212 }
3213
3214 if (!(txr->tx_buffers =
3215 (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
3216 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3217 aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
3218 error = ENOMEM;
3219 goto fail;
3220 }
3221
3222 /* Create the descriptor buffer dma maps */
3223 txbuf = txr->tx_buffers;
3224 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3225 error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
3226 if (error != 0) {
3227 aprint_error_dev(dev,
3228 "Unable to create TX DMA map (%d)\n", error);
3229 goto fail;
3230 }
3231 }
3232
3233 return 0;
3234 fail:
3235 /* We free all, it handles case where we are in the middle */
3236 ixgbe_free_transmit_structures(adapter);
3237 return (error);
3238 }
3239
3240 /*********************************************************************
3241 *
3242 * Initialize a transmit ring.
3243 *
3244 **********************************************************************/
3245 static void
3246 ixgbe_setup_transmit_ring(struct tx_ring *txr)
3247 {
3248 struct adapter *adapter = txr->adapter;
3249 struct ixgbe_tx_buf *txbuf;
3250 int i;
3251 #ifdef DEV_NETMAP
3252 struct netmap_adapter *na = NA(adapter->ifp);
3253 struct netmap_slot *slot;
3254 #endif /* DEV_NETMAP */
3255
3256 /* Clear the old ring contents */
3257 IXGBE_TX_LOCK(txr);
3258 #ifdef DEV_NETMAP
3259 /*
3260 * (under lock): if in netmap mode, do some consistency
3261 * checks and set slot to entry 0 of the netmap ring.
3262 */
3263 slot = netmap_reset(na, NR_TX, txr->me, 0);
3264 #endif /* DEV_NETMAP */
3265 bzero((void *)txr->tx_base,
3266 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
3267 /* Reset indices */
3268 txr->next_avail_desc = 0;
3269 txr->next_to_clean = 0;
3270
3271 /* Free any existing tx buffers. */
3272 txbuf = txr->tx_buffers;
3273 for (i = 0; i < txr->num_desc; i++, txbuf++) {
3274 if (txbuf->m_head != NULL) {
3275 bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
3276 0, txbuf->m_head->m_pkthdr.len,
3277 BUS_DMASYNC_POSTWRITE);
3278 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
3279 m_freem(txbuf->m_head);
3280 txbuf->m_head = NULL;
3281 }
3282 #ifdef DEV_NETMAP
3283 /*
3284 * In netmap mode, set the map for the packet buffer.
3285 * NOTE: Some drivers (not this one) also need to set
3286 * the physical buffer address in the NIC ring.
3287 * Slots in the netmap ring (indexed by "si") are
3288 * kring->nkr_hwofs positions "ahead" wrt the
3289 * corresponding slot in the NIC ring. In some drivers
3290 * (not here) nkr_hwofs can be negative. Function
3291 * netmap_idx_n2k() handles wraparounds properly.
3292 */
3293 if (slot) {
3294 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3295 netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
3296 }
3297 #endif /* DEV_NETMAP */
3298 /* Clear the EOP descriptor pointer */
3299 txbuf->eop = NULL;
3300 }
3301
3302 #ifdef IXGBE_FDIR
3303 /* Set the rate at which we sample packets */
3304 if (adapter->hw.mac.type != ixgbe_mac_82598EB)
3305 txr->atr_sample = atr_sample_rate;
3306 #endif
3307
3308 /* Set number of descriptors available */
3309 txr->tx_avail = adapter->num_tx_desc;
3310
3311 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3312 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3313 IXGBE_TX_UNLOCK(txr);
3314 }
3315
3316 /*********************************************************************
3317 *
3318 * Initialize all transmit rings.
3319 *
3320 **********************************************************************/
3321 static int
3322 ixgbe_setup_transmit_structures(struct adapter *adapter)
3323 {
3324 struct tx_ring *txr = adapter->tx_rings;
3325
3326 for (int i = 0; i < adapter->num_queues; i++, txr++)
3327 ixgbe_setup_transmit_ring(txr);
3328
3329 return (0);
3330 }
3331
3332 /*********************************************************************
3333 *
3334 * Enable transmit unit.
3335 *
3336 **********************************************************************/
3337 static void
3338 ixgbe_initialize_transmit_units(struct adapter *adapter)
3339 {
3340 struct tx_ring *txr = adapter->tx_rings;
3341 struct ixgbe_hw *hw = &adapter->hw;
3342
3343 /* Setup the Base and Length of the Tx Descriptor Ring */
3344
3345 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3346 u64 tdba = txr->txdma.dma_paddr;
3347 u32 txctrl;
3348
3349 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i),
3350 (tdba & 0x00000000ffffffffULL));
3351 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (tdba >> 32));
3352 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
3353 adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc));
3354
3355 /* Setup the HW Tx Head and Tail descriptor pointers */
3356 IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
3357 IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
3358
3359 /* Setup Transmit Descriptor Cmd Settings */
3360 txr->txd_cmd = IXGBE_TXD_CMD_IFCS;
3361 txr->queue_status = IXGBE_QUEUE_IDLE;
3362
3363 /* Set the processing limit */
3364 txr->process_limit = ixgbe_tx_process_limit;
3365
3366 /* Disable Head Writeback */
3367 switch (hw->mac.type) {
3368 case ixgbe_mac_82598EB:
3369 txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
3370 break;
3371 case ixgbe_mac_82599EB:
3372 case ixgbe_mac_X540:
3373 default:
3374 txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
3375 break;
3376 }
3377 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
3378 switch (hw->mac.type) {
3379 case ixgbe_mac_82598EB:
3380 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
3381 break;
3382 case ixgbe_mac_82599EB:
3383 case ixgbe_mac_X540:
3384 default:
3385 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
3386 break;
3387 }
3388
3389 }
3390
3391 if (hw->mac.type != ixgbe_mac_82598EB) {
3392 u32 dmatxctl, rttdcs;
3393 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
3394 dmatxctl |= IXGBE_DMATXCTL_TE;
3395 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
3396 /* Disable arbiter to set MTQC */
3397 rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3398 rttdcs |= IXGBE_RTTDCS_ARBDIS;
3399 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3400 IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
3401 rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
3402 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3403 }
3404
3405 return;
3406 }
3407
3408 /*********************************************************************
3409 *
3410 * Free all transmit rings.
3411 *
3412 **********************************************************************/
3413 static void
3414 ixgbe_free_transmit_structures(struct adapter *adapter)
3415 {
3416 struct tx_ring *txr = adapter->tx_rings;
3417
3418 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3419 ixgbe_free_transmit_buffers(txr);
3420 ixgbe_dma_free(adapter, &txr->txdma);
3421 IXGBE_TX_LOCK_DESTROY(txr);
3422 }
3423 free(adapter->tx_rings, M_DEVBUF);
3424 }
3425
3426 /*********************************************************************
3427 *
3428 * Free transmit ring related data structures.
3429 *
3430 **********************************************************************/
3431 static void
3432 ixgbe_free_transmit_buffers(struct tx_ring *txr)
3433 {
3434 struct adapter *adapter = txr->adapter;
3435 struct ixgbe_tx_buf *tx_buffer;
3436 int i;
3437
3438 INIT_DEBUGOUT("free_transmit_ring: begin");
3439
3440 if (txr->tx_buffers == NULL)
3441 return;
3442
3443 tx_buffer = txr->tx_buffers;
3444 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3445 if (tx_buffer->m_head != NULL) {
3446 bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
3447 0, tx_buffer->m_head->m_pkthdr.len,
3448 BUS_DMASYNC_POSTWRITE);
3449 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
3450 m_freem(tx_buffer->m_head);
3451 tx_buffer->m_head = NULL;
3452 if (tx_buffer->map != NULL) {
3453 ixgbe_dmamap_destroy(txr->txtag,
3454 tx_buffer->map);
3455 tx_buffer->map = NULL;
3456 }
3457 } else if (tx_buffer->map != NULL) {
3458 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
3459 ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
3460 tx_buffer->map = NULL;
3461 }
3462 }
3463 #ifndef IXGBE_LEGACY_TX
3464 if (txr->br != NULL)
3465 buf_ring_free(txr->br, M_DEVBUF);
3466 #endif
3467 if (txr->tx_buffers != NULL) {
3468 free(txr->tx_buffers, M_DEVBUF);
3469 txr->tx_buffers = NULL;
3470 }
3471 if (txr->txtag != NULL) {
3472 ixgbe_dma_tag_destroy(txr->txtag);
3473 txr->txtag = NULL;
3474 }
3475 return;
3476 }
3477
3478 /*********************************************************************
3479 *
3480 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
3481 *
3482 **********************************************************************/
3483
3484 static int
3485 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
3486 u32 *cmd_type_len, u32 *olinfo_status)
3487 {
3488 struct m_tag *mtag;
3489 struct adapter *adapter = txr->adapter;
3490 struct ethercom *ec = &adapter->osdep.ec;
3491 struct ixgbe_adv_tx_context_desc *TXD;
3492 struct ether_vlan_header *eh;
3493 struct ip ip;
3494 struct ip6_hdr ip6;
3495 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3496 int ehdrlen, ip_hlen = 0;
3497 u16 etype;
3498 u8 ipproto __diagused = 0;
3499 int offload = TRUE;
3500 int ctxd = txr->next_avail_desc;
3501 u16 vtag = 0;
3502
3503 /* First check if TSO is to be used */
3504 if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4|M_CSUM_TSOv6))
3505 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
3506
3507 if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
3508 offload = FALSE;
3509
3510 /* Indicate the whole packet as payload when not doing TSO */
3511 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
3512
3513 /* Now ready a context descriptor */
3514 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
3515
3516 /*
3517 ** In advanced descriptors the vlan tag must
3518 ** be placed into the context descriptor. Hence
3519 ** we need to make one even if not doing offloads.
3520 */
3521 if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
3522 vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
3523 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
3524 } else if (offload == FALSE) /* ... no offload to do */
3525 return 0;
3526
3527 /*
3528 * Determine where frame payload starts.
3529 * Jump over vlan headers if already present,
3530 * helpful for QinQ too.
3531 */
3532 KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
3533 eh = mtod(mp, struct ether_vlan_header *);
3534 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3535 KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
3536 etype = ntohs(eh->evl_proto);
3537 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3538 } else {
3539 etype = ntohs(eh->evl_encap_proto);
3540 ehdrlen = ETHER_HDR_LEN;
3541 }
3542
3543 /* Set the ether header length */
3544 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
3545
3546 switch (etype) {
3547 case ETHERTYPE_IP:
3548 m_copydata(mp, ehdrlen, sizeof(ip), &ip);
3549 ip_hlen = ip.ip_hl << 2;
3550 ipproto = ip.ip_p;
3551 #if 0
3552 ip.ip_sum = 0;
3553 m_copyback(mp, ehdrlen, sizeof(ip), &ip);
3554 #else
3555 KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
3556 ip.ip_sum == 0);
3557 #endif
3558 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
3559 break;
3560 case ETHERTYPE_IPV6:
3561 m_copydata(mp, ehdrlen, sizeof(ip6), &ip6);
3562 ip_hlen = sizeof(ip6);
3563 /* XXX-BZ this will go badly in case of ext hdrs. */
3564 ipproto = ip6.ip6_nxt;
3565 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
3566 break;
3567 default:
3568 break;
3569 }
3570
3571 if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
3572 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
3573
3574 vlan_macip_lens |= ip_hlen;
3575 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
3576
3577 if (mp->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_TCPv6)) {
3578 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
3579 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
3580 KASSERT(ipproto == IPPROTO_TCP);
3581 } else if (mp->m_pkthdr.csum_flags & (M_CSUM_UDPv4|M_CSUM_UDPv6)) {
3582 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
3583 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
3584 KASSERT(ipproto == IPPROTO_UDP);
3585 }
3586
3587 /* Now copy bits into descriptor */
3588 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3589 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3590 TXD->seqnum_seed = htole32(0);
3591 TXD->mss_l4len_idx = htole32(0);
3592
3593 /* We've consumed the first desc, adjust counters */
3594 if (++ctxd == txr->num_desc)
3595 ctxd = 0;
3596 txr->next_avail_desc = ctxd;
3597 --txr->tx_avail;
3598
3599 return 0;
3600 }
3601
3602 /**********************************************************************
3603 *
3604 * Setup work for hardware segmentation offload (TSO) on
3605 * adapters using advanced tx descriptors
3606 *
3607 **********************************************************************/
3608 static int
3609 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
3610 u32 *cmd_type_len, u32 *olinfo_status)
3611 {
3612 struct m_tag *mtag;
3613 struct adapter *adapter = txr->adapter;
3614 struct ethercom *ec = &adapter->osdep.ec;
3615 struct ixgbe_adv_tx_context_desc *TXD;
3616 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3617 u32 mss_l4len_idx = 0, paylen;
3618 u16 vtag = 0, eh_type;
3619 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3620 struct ether_vlan_header *eh;
3621 #ifdef INET6
3622 struct ip6_hdr *ip6;
3623 #endif
3624 #ifdef INET
3625 struct ip *ip;
3626 #endif
3627 struct tcphdr *th;
3628
3629
3630 /*
3631 * Determine where frame payload starts.
3632 * Jump over vlan headers if already present
3633 */
3634 eh = mtod(mp, struct ether_vlan_header *);
3635 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3636 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3637 eh_type = eh->evl_proto;
3638 } else {
3639 ehdrlen = ETHER_HDR_LEN;
3640 eh_type = eh->evl_encap_proto;
3641 }
3642
3643 switch (ntohs(eh_type)) {
3644 #ifdef INET6
3645 case ETHERTYPE_IPV6:
3646 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3647 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
3648 if (ip6->ip6_nxt != IPPROTO_TCP)
3649 return (ENXIO);
3650 ip_hlen = sizeof(struct ip6_hdr);
3651 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3652 th = (struct tcphdr *)((char *)ip6 + ip_hlen);
3653 th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
3654 &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
3655 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
3656 break;
3657 #endif
3658 #ifdef INET
3659 case ETHERTYPE_IP:
3660 ip = (struct ip *)(mp->m_data + ehdrlen);
3661 if (ip->ip_p != IPPROTO_TCP)
3662 return (ENXIO);
3663 ip->ip_sum = 0;
3664 ip_hlen = ip->ip_hl << 2;
3665 th = (struct tcphdr *)((char *)ip + ip_hlen);
3666 th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
3667 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3668 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
3669 /* Tell transmit desc to also do IPv4 checksum. */
3670 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
3671 break;
3672 #endif
3673 default:
3674 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3675 __func__, ntohs(eh_type));
3676 break;
3677 }
3678
3679 ctxd = txr->next_avail_desc;
3680 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
3681
3682 tcp_hlen = th->th_off << 2;
3683
3684 /* This is used in the transmit desc in encap */
3685 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3686
3687 /* VLAN MACLEN IPLEN */
3688 if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
3689 vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
3690 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
3691 }
3692
3693 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
3694 vlan_macip_lens |= ip_hlen;
3695 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3696
3697 /* ADV DTYPE TUCMD */
3698 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
3699 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
3700 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3701
3702 /* MSS L4LEN IDX */
3703 mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
3704 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
3705 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3706
3707 TXD->seqnum_seed = htole32(0);
3708
3709 if (++ctxd == txr->num_desc)
3710 ctxd = 0;
3711
3712 txr->tx_avail--;
3713 txr->next_avail_desc = ctxd;
3714 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
3715 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
3716 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
3717 ++txr->tso_tx.ev_count;
3718 return (0);
3719 }
3720
3721 #ifdef IXGBE_FDIR
3722 /*
3723 ** This routine parses packet headers so that Flow
3724 ** Director can make a hashed filter table entry
3725 ** allowing traffic flows to be identified and kept
3726 ** on the same cpu. This would be a performance
3727 ** hit, but we only do it at IXGBE_FDIR_RATE of
3728 ** packets.
3729 */
3730 static void
3731 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
3732 {
3733 struct adapter *adapter = txr->adapter;
3734 struct ix_queue *que;
3735 struct ip *ip;
3736 struct tcphdr *th;
3737 struct udphdr *uh;
3738 struct ether_vlan_header *eh;
3739 union ixgbe_atr_hash_dword input = {.dword = 0};
3740 union ixgbe_atr_hash_dword common = {.dword = 0};
3741 int ehdrlen, ip_hlen;
3742 u16 etype;
3743
3744 eh = mtod(mp, struct ether_vlan_header *);
3745 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3746 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3747 etype = eh->evl_proto;
3748 } else {
3749 ehdrlen = ETHER_HDR_LEN;
3750 etype = eh->evl_encap_proto;
3751 }
3752
3753 /* Only handling IPv4 */
3754 if (etype != htons(ETHERTYPE_IP))
3755 return;
3756
3757 ip = (struct ip *)(mp->m_data + ehdrlen);
3758 ip_hlen = ip->ip_hl << 2;
3759
3760 /* check if we're UDP or TCP */
3761 switch (ip->ip_p) {
3762 case IPPROTO_TCP:
3763 th = (struct tcphdr *)((char *)ip + ip_hlen);
3764 /* src and dst are inverted */
3765 common.port.dst ^= th->th_sport;
3766 common.port.src ^= th->th_dport;
3767 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
3768 break;
3769 case IPPROTO_UDP:
3770 uh = (struct udphdr *)((char *)ip + ip_hlen);
3771 /* src and dst are inverted */
3772 common.port.dst ^= uh->uh_sport;
3773 common.port.src ^= uh->uh_dport;
3774 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
3775 break;
3776 default:
3777 return;
3778 }
3779
3780 input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
3781 if (mp->m_pkthdr.ether_vtag)
3782 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
3783 else
3784 common.flex_bytes ^= etype;
3785 common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
3786
3787 que = &adapter->queues[txr->me];
3788 /*
3789 ** This assumes the Rx queue and Tx
3790 ** queue are bound to the same CPU
3791 */
3792 ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
3793 input, common, que->msix);
3794 }
3795 #endif /* IXGBE_FDIR */
3796
3797 /**********************************************************************
3798 *
3799 * Examine each tx_buffer in the used queue. If the hardware is done
3800 * processing the packet then free associated resources. The
3801 * tx_buffer is put back on the free queue.
3802 *
3803 **********************************************************************/
3804 static bool
3805 ixgbe_txeof(struct tx_ring *txr)
3806 {
3807 struct adapter *adapter = txr->adapter;
3808 struct ifnet *ifp = adapter->ifp;
3809 u32 work, processed = 0;
3810 u16 limit = txr->process_limit;
3811 struct ixgbe_tx_buf *buf;
3812 union ixgbe_adv_tx_desc *txd;
3813 struct timeval now, elapsed;
3814
3815 KASSERT(mutex_owned(&txr->tx_mtx));
3816
3817 #ifdef DEV_NETMAP
3818 if (ifp->if_capenable & IFCAP_NETMAP) {
3819 struct netmap_adapter *na = NA(ifp);
3820 struct netmap_kring *kring = &na->tx_rings[txr->me];
3821 txd = txr->tx_base;
3822 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3823 BUS_DMASYNC_POSTREAD);
3824 /*
3825 * In netmap mode, all the work is done in the context
3826 * of the client thread. Interrupt handlers only wake up
3827 * clients, which may be sleeping on individual rings
3828 * or on a global resource for all rings.
3829 * To implement tx interrupt mitigation, we wake up the client
3830 * thread roughly every half ring, even if the NIC interrupts
3831 * more frequently. This is implemented as follows:
3832 * - ixgbe_txsync() sets kring->nr_kflags with the index of
3833 * the slot that should wake up the thread (nkr_num_slots
3834 * means the user thread should not be woken up);
3835 * - the driver ignores tx interrupts unless netmap_mitigate=0
3836 * or the slot has the DD bit set.
3837 *
3838 * When the driver has separate locks, we need to
3839 * release and re-acquire txlock to avoid deadlocks.
3840 * XXX see if we can find a better way.
3841 */
3842 if (!netmap_mitigate ||
3843 (kring->nr_kflags < kring->nkr_num_slots &&
3844 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
3845 netmap_tx_irq(ifp, txr->me |
3846 (NETMAP_LOCKED_ENTER|NETMAP_LOCKED_EXIT));
3847 }
3848 return FALSE;
3849 }
3850 #endif /* DEV_NETMAP */
3851
3852 if (txr->tx_avail == txr->num_desc) {
3853 txr->queue_status = IXGBE_QUEUE_IDLE;
3854 return false;
3855 }
3856
3857 /* Get work starting point */
3858 work = txr->next_to_clean;
3859 buf = &txr->tx_buffers[work];
3860 txd = &txr->tx_base[work];
3861 work -= txr->num_desc; /* The distance to ring end */
3862 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3863 BUS_DMASYNC_POSTREAD);
3864 do {
3865 union ixgbe_adv_tx_desc *eop= buf->eop;
3866 if (eop == NULL) /* No work */
3867 break;
3868
3869 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
3870 break; /* I/O not complete */
3871
3872 if (buf->m_head) {
3873 txr->bytes +=
3874 buf->m_head->m_pkthdr.len;
3875 bus_dmamap_sync(txr->txtag->dt_dmat,
3876 buf->map,
3877 0, buf->m_head->m_pkthdr.len,
3878 BUS_DMASYNC_POSTWRITE);
3879 ixgbe_dmamap_unload(txr->txtag,
3880 buf->map);
3881 m_freem(buf->m_head);
3882 buf->m_head = NULL;
3883 /*
3884 * NetBSD: Don't override buf->map with NULL here.
3885 * It'll panic when a ring runs one lap around.
3886 */
3887 }
3888 buf->eop = NULL;
3889 ++txr->tx_avail;
3890
3891 /* We clean the range if multi segment */
3892 while (txd != eop) {
3893 ++txd;
3894 ++buf;
3895 ++work;
3896 /* wrap the ring? */
3897 if (__predict_false(!work)) {
3898 work -= txr->num_desc;
3899 buf = txr->tx_buffers;
3900 txd = txr->tx_base;
3901 }
3902 if (buf->m_head) {
3903 txr->bytes +=
3904 buf->m_head->m_pkthdr.len;
3905 bus_dmamap_sync(txr->txtag->dt_dmat,
3906 buf->map,
3907 0, buf->m_head->m_pkthdr.len,
3908 BUS_DMASYNC_POSTWRITE);
3909 ixgbe_dmamap_unload(txr->txtag,
3910 buf->map);
3911 m_freem(buf->m_head);
3912 buf->m_head = NULL;
3913 /*
3914 * NetBSD: Don't override buf->map with NULL
3915 * here. It'll panic when a ring runs one lap
3916 * around.
3917 */
3918 }
3919 ++txr->tx_avail;
3920 buf->eop = NULL;
3921
3922 }
3923 ++txr->packets;
3924 ++processed;
3925 ++ifp->if_opackets;
3926 getmicrotime(&txr->watchdog_time);
3927
3928 /* Try the next packet */
3929 ++txd;
3930 ++buf;
3931 ++work;
3932 /* reset with a wrap */
3933 if (__predict_false(!work)) {
3934 work -= txr->num_desc;
3935 buf = txr->tx_buffers;
3936 txd = txr->tx_base;
3937 }
3938 prefetch(txd);
3939 } while (__predict_true(--limit));
3940
3941 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3942 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3943
3944 work += txr->num_desc;
3945 txr->next_to_clean = work;
3946
3947 /*
3948 ** Watchdog calculation, we know there's
3949 ** work outstanding or the first return
3950 ** would have been taken, so none processed
3951 ** for too long indicates a hang.
3952 */
3953 getmicrotime(&now);
3954 timersub(&now, &txr->watchdog_time, &elapsed);
3955 if (!processed && tvtohz(&elapsed) > IXGBE_WATCHDOG)
3956 txr->queue_status = IXGBE_QUEUE_HUNG;
3957
3958 if (txr->tx_avail == txr->num_desc) {
3959 txr->queue_status = IXGBE_QUEUE_IDLE;
3960 return false;
3961 }
3962
3963 return true;
3964 }
3965
3966 /*********************************************************************
3967 *
3968 * Refresh mbuf buffers for RX descriptor rings
3969 * - now keeps its own state so discards due to resource
3970 * exhaustion are unnecessary, if an mbuf cannot be obtained
3971 * it just returns, keeping its placeholder, thus it can simply
3972 * be recalled to try again.
3973 *
3974 **********************************************************************/
3975 static void
3976 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
3977 {
3978 struct adapter *adapter = rxr->adapter;
3979 struct ixgbe_rx_buf *rxbuf;
3980 struct mbuf *mp;
3981 int i, j, error;
3982 bool refreshed = false;
3983
3984 i = j = rxr->next_to_refresh;
3985 /* Control the loop with one beyond */
3986 if (++j == rxr->num_desc)
3987 j = 0;
3988
3989 while (j != limit) {
3990 rxbuf = &rxr->rx_buffers[i];
3991 if (rxbuf->buf == NULL) {
3992 mp = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
3993 MT_DATA, M_PKTHDR, rxr->mbuf_sz);
3994 if (mp == NULL) {
3995 rxr->no_jmbuf.ev_count++;
3996 goto update;
3997 }
3998 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3999 m_adj(mp, ETHER_ALIGN);
4000 } else
4001 mp = rxbuf->buf;
4002
4003 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
4004 /* If we're dealing with an mbuf that was copied rather
4005 * than replaced, there's no need to go through busdma.
4006 */
4007 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
4008 /* Get the memory mapping */
4009 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
4010 rxbuf->pmap, mp, BUS_DMA_NOWAIT);
4011 if (error != 0) {
4012 printf("Refresh mbufs: payload dmamap load"
4013 " failure - %d\n", error);
4014 m_free(mp);
4015 rxbuf->buf = NULL;
4016 goto update;
4017 }
4018 rxbuf->buf = mp;
4019 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
4020 0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
4021 rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
4022 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
4023 } else {
4024 rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
4025 rxbuf->flags &= ~IXGBE_RX_COPY;
4026 }
4027
4028 refreshed = true;
4029 /* Next is precalculated */
4030 i = j;
4031 rxr->next_to_refresh = i;
4032 if (++j == rxr->num_desc)
4033 j = 0;
4034 }
4035 update:
4036 if (refreshed) /* Update hardware tail index */
4037 IXGBE_WRITE_REG(&adapter->hw,
4038 IXGBE_RDT(rxr->me), rxr->next_to_refresh);
4039 return;
4040 }
4041
4042 /*********************************************************************
4043 *
4044 * Allocate memory for rx_buffer structures. Since we use one
4045 * rx_buffer per received packet, the maximum number of rx_buffer's
4046 * that we'll need is equal to the number of receive descriptors
4047 * that we've allocated.
4048 *
4049 **********************************************************************/
4050 static int
4051 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
4052 {
4053 struct adapter *adapter = rxr->adapter;
4054 device_t dev = adapter->dev;
4055 struct ixgbe_rx_buf *rxbuf;
4056 int i, bsize, error;
4057
4058 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
4059 if (!(rxr->rx_buffers =
4060 (struct ixgbe_rx_buf *) malloc(bsize,
4061 M_DEVBUF, M_NOWAIT | M_ZERO))) {
4062 aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
4063 error = ENOMEM;
4064 goto fail;
4065 }
4066
4067 if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */
4068 1, 0, /* alignment, bounds */
4069 MJUM16BYTES, /* maxsize */
4070 1, /* nsegments */
4071 MJUM16BYTES, /* maxsegsize */
4072 0, /* flags */
4073 &rxr->ptag))) {
4074 aprint_error_dev(dev, "Unable to create RX DMA tag\n");
4075 goto fail;
4076 }
4077
4078 for (i = 0; i < rxr->num_desc; i++, rxbuf++) {
4079 rxbuf = &rxr->rx_buffers[i];
4080 error = ixgbe_dmamap_create(rxr->ptag,
4081 BUS_DMA_NOWAIT, &rxbuf->pmap);
4082 if (error) {
4083 aprint_error_dev(dev, "Unable to create RX dma map\n");
4084 goto fail;
4085 }
4086 }
4087
4088 return (0);
4089
4090 fail:
4091 /* Frees all, but can handle partial completion */
4092 ixgbe_free_receive_structures(adapter);
4093 return (error);
4094 }
4095
4096 /*
4097 ** Used to detect a descriptor that has
4098 ** been merged by Hardware RSC.
4099 */
4100 static inline u32
4101 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
4102 {
4103 return (le32toh(rx->wb.lower.lo_dword.data) &
4104 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
4105 }
4106
4107 /*********************************************************************
4108 *
4109 * Initialize Hardware RSC (LRO) feature on 82599
4110 * for an RX ring, this is toggled by the LRO capability
4111 * even though it is transparent to the stack.
4112 *
4113 * NOTE: since this HW feature only works with IPV4 and
4114 * our testing has shown soft LRO to be as effective
4115 * I have decided to disable this by default.
4116 *
4117 **********************************************************************/
4118 static void
4119 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
4120 {
4121 struct adapter *adapter = rxr->adapter;
4122 struct ixgbe_hw *hw = &adapter->hw;
4123 u32 rscctrl, rdrxctl;
4124
4125 /* If turning LRO/RSC off we need to disable it */
4126 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
4127 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
4128 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
4129 return;
4130 }
4131
4132 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4133 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
4134 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
4135 if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
4136 #endif /* DEV_NETMAP */
4137 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
4138 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4139 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4140
4141 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
4142 rscctrl |= IXGBE_RSCCTL_RSCEN;
4143 /*
4144 ** Limit the total number of descriptors that
4145 ** can be combined, so it does not exceed 64K
4146 */
4147 if (rxr->mbuf_sz == MCLBYTES)
4148 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
4149 else if (rxr->mbuf_sz == MJUMPAGESIZE)
4150 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
4151 else if (rxr->mbuf_sz == MJUM9BYTES)
4152 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
4153 else /* Using 16K cluster */
4154 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
4155
4156 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
4157
4158 /* Enable TCP header recognition */
4159 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
4160 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
4161 IXGBE_PSRTYPE_TCPHDR));
4162
4163 /* Disable RSC for ACK packets */
4164 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
4165 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
4166
4167 rxr->hw_rsc = TRUE;
4168 }
4169
4170
4171 static void
4172 ixgbe_free_receive_ring(struct rx_ring *rxr)
4173 {
4174 struct ixgbe_rx_buf *rxbuf;
4175 int i;
4176
4177 for (i = 0; i < rxr->num_desc; i++) {
4178 rxbuf = &rxr->rx_buffers[i];
4179 if (rxbuf->buf != NULL) {
4180 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
4181 0, rxbuf->buf->m_pkthdr.len,
4182 BUS_DMASYNC_POSTREAD);
4183 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
4184 rxbuf->buf->m_flags |= M_PKTHDR;
4185 m_freem(rxbuf->buf);
4186 rxbuf->buf = NULL;
4187 }
4188 }
4189 }
4190
4191
4192 /*********************************************************************
4193 *
4194 * Initialize a receive ring and its buffers.
4195 *
4196 **********************************************************************/
4197 static int
4198 ixgbe_setup_receive_ring(struct rx_ring *rxr)
4199 {
4200 struct adapter *adapter;
4201 struct ixgbe_rx_buf *rxbuf;
4202 #ifdef LRO
4203 struct ifnet *ifp;
4204 struct lro_ctrl *lro = &rxr->lro;
4205 #endif /* LRO */
4206 int rsize, error = 0;
4207 #ifdef DEV_NETMAP
4208 struct netmap_adapter *na = NA(rxr->adapter->ifp);
4209 struct netmap_slot *slot;
4210 #endif /* DEV_NETMAP */
4211
4212 adapter = rxr->adapter;
4213 #ifdef LRO
4214 ifp = adapter->ifp;
4215 #endif /* LRO */
4216
4217 /* Clear the ring contents */
4218 IXGBE_RX_LOCK(rxr);
4219 #ifdef DEV_NETMAP
4220 /* same as in ixgbe_setup_transmit_ring() */
4221 slot = netmap_reset(na, NR_RX, rxr->me, 0);
4222 #endif /* DEV_NETMAP */
4223 rsize = roundup2(adapter->num_rx_desc *
4224 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
4225 bzero((void *)rxr->rx_base, rsize);
4226 /* Cache the size */
4227 rxr->mbuf_sz = adapter->rx_mbuf_sz;
4228
4229 /* Free current RX buffer structs and their mbufs */
4230 ixgbe_free_receive_ring(rxr);
4231
4232 IXGBE_RX_UNLOCK(rxr);
4233
4234 /* Now reinitialize our supply of jumbo mbufs. The number
4235 * or size of jumbo mbufs may have changed.
4236 */
4237 ixgbe_jcl_reinit(&adapter->jcl_head, rxr->ptag->dt_dmat,
4238 2 * adapter->num_rx_desc, adapter->rx_mbuf_sz);
4239
4240 IXGBE_RX_LOCK(rxr);
4241
4242 /* Now replenish the mbufs */
4243 for (int j = 0; j != rxr->num_desc; ++j) {
4244 struct mbuf *mp;
4245
4246 rxbuf = &rxr->rx_buffers[j];
4247 #ifdef DEV_NETMAP
4248 /*
4249 * In netmap mode, fill the map and set the buffer
4250 * address in the NIC ring, considering the offset
4251 * between the netmap and NIC rings (see comment in
4252 * ixgbe_setup_transmit_ring() ). No need to allocate
4253 * an mbuf, so end the block with a continue;
4254 */
4255 if (slot) {
4256 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4257 uint64_t paddr;
4258 void *addr;
4259
4260 addr = PNMB(slot + sj, &paddr);
4261 netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
4262 /* Update descriptor */
4263 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4264 continue;
4265 }
4266 #endif /* DEV_NETMAP */
4267 rxbuf->buf = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
4268 MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
4269 if (rxbuf->buf == NULL) {
4270 error = ENOBUFS;
4271 goto fail;
4272 }
4273 mp = rxbuf->buf;
4274 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
4275 /* Get the memory mapping */
4276 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
4277 rxbuf->pmap, mp, BUS_DMA_NOWAIT);
4278 if (error != 0)
4279 goto fail;
4280 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
4281 0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
4282 /* Update descriptor */
4283 rxr->rx_base[j].read.pkt_addr =
4284 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
4285 }
4286
4287
4288 /* Setup our descriptor indices */
4289 rxr->next_to_check = 0;
4290 rxr->next_to_refresh = 0;
4291 rxr->lro_enabled = FALSE;
4292 rxr->rx_copies.ev_count = 0;
4293 rxr->rx_bytes.ev_count = 0;
4294 rxr->discard = FALSE;
4295 rxr->vtag_strip = FALSE;
4296
4297 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4298 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4299
4300 /*
4301 ** Now set up the LRO interface:
4302 */
4303 if (ixgbe_rsc_enable)
4304 ixgbe_setup_hw_rsc(rxr);
4305 #ifdef LRO
4306 else if (ifp->if_capenable & IFCAP_LRO) {
4307 device_t dev = adapter->dev;
4308 int err = tcp_lro_init(lro);
4309 if (err) {
4310 device_printf(dev, "LRO Initialization failed!\n");
4311 goto fail;
4312 }
4313 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
4314 rxr->lro_enabled = TRUE;
4315 lro->ifp = adapter->ifp;
4316 }
4317 #endif /* LRO */
4318
4319 IXGBE_RX_UNLOCK(rxr);
4320 return (0);
4321
4322 fail:
4323 ixgbe_free_receive_ring(rxr);
4324 IXGBE_RX_UNLOCK(rxr);
4325 return (error);
4326 }
4327
4328 /*********************************************************************
4329 *
4330 * Initialize all receive rings.
4331 *
4332 **********************************************************************/
4333 static int
4334 ixgbe_setup_receive_structures(struct adapter *adapter)
4335 {
4336 struct rx_ring *rxr = adapter->rx_rings;
4337 int j;
4338
4339 for (j = 0; j < adapter->num_queues; j++, rxr++)
4340 if (ixgbe_setup_receive_ring(rxr))
4341 goto fail;
4342
4343 return (0);
4344 fail:
4345 /*
4346 * Free RX buffers allocated so far, we will only handle
4347 * the rings that completed, the failing case will have
4348 * cleaned up for itself. 'j' failed, so its the terminus.
4349 */
4350 for (int i = 0; i < j; ++i) {
4351 rxr = &adapter->rx_rings[i];
4352 ixgbe_free_receive_ring(rxr);
4353 }
4354
4355 return (ENOBUFS);
4356 }
4357
4358 /*********************************************************************
4359 *
4360 * Setup receive registers and features.
4361 *
4362 **********************************************************************/
4363 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
4364
4365 #define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
4366
4367 static void
4368 ixgbe_initialize_receive_units(struct adapter *adapter)
4369 {
4370 int i;
4371 struct rx_ring *rxr = adapter->rx_rings;
4372 struct ixgbe_hw *hw = &adapter->hw;
4373 struct ifnet *ifp = adapter->ifp;
4374 u32 bufsz, rxctrl, fctrl, srrctl, rxcsum;
4375 u32 reta, mrqc = 0, hlreg, r[10];
4376
4377
4378 /*
4379 * Make sure receives are disabled while
4380 * setting up the descriptor ring
4381 */
4382 rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4383 IXGBE_WRITE_REG(hw, IXGBE_RXCTRL,
4384 rxctrl & ~IXGBE_RXCTRL_RXEN);
4385
4386 /* Enable broadcasts */
4387 fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4388 fctrl |= IXGBE_FCTRL_BAM;
4389 fctrl |= IXGBE_FCTRL_DPF;
4390 fctrl |= IXGBE_FCTRL_PMCF;
4391 IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4392
4393 /* Set for Jumbo Frames? */
4394 hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4395 if (ifp->if_mtu > ETHERMTU)
4396 hlreg |= IXGBE_HLREG0_JUMBOEN;
4397 else
4398 hlreg &= ~IXGBE_HLREG0_JUMBOEN;
4399 #ifdef DEV_NETMAP
4400 /* crcstrip is conditional in netmap (in RDRXCTL too ?) */
4401 if (ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
4402 hlreg &= ~IXGBE_HLREG0_RXCRCSTRP;
4403 else
4404 hlreg |= IXGBE_HLREG0_RXCRCSTRP;
4405 #endif /* DEV_NETMAP */
4406 IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
4407
4408 bufsz = (adapter->rx_mbuf_sz +
4409 BSIZEPKT_ROUNDUP) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
4410
4411 for (i = 0; i < adapter->num_queues; i++, rxr++) {
4412 u64 rdba = rxr->rxdma.dma_paddr;
4413
4414 /* Setup the Base and Length of the Rx Descriptor Ring */
4415 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i),
4416 (rdba & 0x00000000ffffffffULL));
4417 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (rdba >> 32));
4418 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
4419 adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4420
4421 /* Set up the SRRCTL register */
4422 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
4423 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4424 srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
4425 srrctl |= bufsz;
4426 srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4427 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
4428
4429 /* Setup the HW Rx Head and Tail Descriptor Pointers */
4430 IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
4431 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
4432
4433 /* Set the processing limit */
4434 rxr->process_limit = ixgbe_rx_process_limit;
4435 }
4436
4437 if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
4438 u32 psrtype = IXGBE_PSRTYPE_TCPHDR |
4439 IXGBE_PSRTYPE_UDPHDR |
4440 IXGBE_PSRTYPE_IPV4HDR |
4441 IXGBE_PSRTYPE_IPV6HDR;
4442 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype);
4443 }
4444
4445 rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4446
4447 /* Setup RSS */
4448 if (adapter->num_queues > 1) {
4449 int j;
4450 reta = 0;
4451
4452 /* set up random bits */
4453 cprng_fast(&r, sizeof(r));
4454
4455 /* Set up the redirection table */
4456 for (i = 0, j = 0; i < 128; i++, j++) {
4457 if (j == adapter->num_queues) j = 0;
4458 reta = (reta << 8) | (j * 0x11);
4459 if ((i & 3) == 3)
4460 IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta);
4461 }
4462
4463 /* Now fill our hash function seeds */
4464 for (i = 0; i < 10; i++)
4465 IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), r[i]);
4466
4467 /* Perform hash on these packet types */
4468 mrqc = IXGBE_MRQC_RSSEN
4469 | IXGBE_MRQC_RSS_FIELD_IPV4
4470 | IXGBE_MRQC_RSS_FIELD_IPV4_TCP
4471 | IXGBE_MRQC_RSS_FIELD_IPV4_UDP
4472 | IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP
4473 | IXGBE_MRQC_RSS_FIELD_IPV6_EX
4474 | IXGBE_MRQC_RSS_FIELD_IPV6
4475 | IXGBE_MRQC_RSS_FIELD_IPV6_TCP
4476 | IXGBE_MRQC_RSS_FIELD_IPV6_UDP
4477 | IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
4478 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4479
4480 /* RSS and RX IPP Checksum are mutually exclusive */
4481 rxcsum |= IXGBE_RXCSUM_PCSD;
4482 }
4483
4484 if (ifp->if_capenable & IFCAP_RXCSUM)
4485 rxcsum |= IXGBE_RXCSUM_PCSD;
4486
4487 if (!(rxcsum & IXGBE_RXCSUM_PCSD))
4488 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4489
4490 IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4491
4492 return;
4493 }
4494
4495 /*********************************************************************
4496 *
4497 * Free all receive rings.
4498 *
4499 **********************************************************************/
4500 static void
4501 ixgbe_free_receive_structures(struct adapter *adapter)
4502 {
4503 struct rx_ring *rxr = adapter->rx_rings;
4504
4505 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4506 #ifdef LRO
4507 struct lro_ctrl *lro = &rxr->lro;
4508 #endif /* LRO */
4509 ixgbe_free_receive_buffers(rxr);
4510 #ifdef LRO
4511 /* Free LRO memory */
4512 tcp_lro_free(lro);
4513 #endif /* LRO */
4514 /* Free the ring memory as well */
4515 ixgbe_dma_free(adapter, &rxr->rxdma);
4516 IXGBE_RX_LOCK_DESTROY(rxr);
4517 }
4518
4519 free(adapter->rx_rings, M_DEVBUF);
4520 }
4521
4522
4523 /*********************************************************************
4524 *
4525 * Free receive ring data structures
4526 *
4527 **********************************************************************/
4528 static void
4529 ixgbe_free_receive_buffers(struct rx_ring *rxr)
4530 {
4531 struct adapter *adapter = rxr->adapter;
4532 struct ixgbe_rx_buf *rxbuf;
4533
4534 INIT_DEBUGOUT("free_receive_structures: begin");
4535
4536 /* Cleanup any existing buffers */
4537 if (rxr->rx_buffers != NULL) {
4538 for (int i = 0; i < adapter->num_rx_desc; i++) {
4539 rxbuf = &rxr->rx_buffers[i];
4540 if (rxbuf->buf != NULL) {
4541 bus_dmamap_sync(rxr->ptag->dt_dmat,
4542 rxbuf->pmap, 0, rxbuf->buf->m_pkthdr.len,
4543 BUS_DMASYNC_POSTREAD);
4544 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
4545 rxbuf->buf->m_flags |= M_PKTHDR;
4546 m_freem(rxbuf->buf);
4547 }
4548 rxbuf->buf = NULL;
4549 if (rxbuf->pmap != NULL) {
4550 ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4551 rxbuf->pmap = NULL;
4552 }
4553 }
4554 if (rxr->rx_buffers != NULL) {
4555 free(rxr->rx_buffers, M_DEVBUF);
4556 rxr->rx_buffers = NULL;
4557 }
4558 }
4559
4560 if (rxr->ptag != NULL) {
4561 ixgbe_dma_tag_destroy(rxr->ptag);
4562 rxr->ptag = NULL;
4563 }
4564
4565 return;
4566 }
4567
4568 static __inline void
4569 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4570 {
4571 int s;
4572
4573 #ifdef LRO
4574 struct adapter *adapter = ifp->if_softc;
4575 struct ethercom *ec = &adapter->osdep.ec;
4576
4577 /*
4578 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
4579 * should be computed by hardware. Also it should not have VLAN tag in
4580 * ethernet header. In case of IPv6 we do not yet support ext. hdrs.
4581 */
4582 if (rxr->lro_enabled &&
4583 (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
4584 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
4585 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
4586 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
4587 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
4588 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
4589 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4590 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4591 /*
4592 * Send to the stack if:
4593 ** - LRO not enabled, or
4594 ** - no LRO resources, or
4595 ** - lro enqueue fails
4596 */
4597 if (rxr->lro.lro_cnt != 0)
4598 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4599 return;
4600 }
4601 #endif /* LRO */
4602
4603 IXGBE_RX_UNLOCK(rxr);
4604
4605 s = splnet();
4606 /* Pass this up to any BPF listeners. */
4607 bpf_mtap(ifp, m);
4608 (*ifp->if_input)(ifp, m);
4609 splx(s);
4610
4611 IXGBE_RX_LOCK(rxr);
4612 }
4613
4614 static __inline void
4615 ixgbe_rx_discard(struct rx_ring *rxr, int i)
4616 {
4617 struct ixgbe_rx_buf *rbuf;
4618
4619 rbuf = &rxr->rx_buffers[i];
4620
4621 if (rbuf->fmp != NULL) {/* Partial chain ? */
4622 rbuf->fmp->m_flags |= M_PKTHDR;
4623 m_freem(rbuf->fmp);
4624 rbuf->fmp = NULL;
4625 }
4626
4627 /*
4628 ** With advanced descriptors the writeback
4629 ** clobbers the buffer addrs, so its easier
4630 ** to just free the existing mbufs and take
4631 ** the normal refresh path to get new buffers
4632 ** and mapping.
4633 */
4634 if (rbuf->buf) {
4635 m_free(rbuf->buf);
4636 rbuf->buf = NULL;
4637 }
4638
4639 return;
4640 }
4641
4642
4643 /*********************************************************************
4644 *
4645 * This routine executes in interrupt context. It replenishes
4646 * the mbufs in the descriptor and sends data which has been
4647 * dma'ed into host memory to upper layer.
4648 *
4649 * We loop at most count times if count is > 0, or until done if
4650 * count < 0.
4651 *
4652 * Return TRUE for more work, FALSE for all clean.
4653 *********************************************************************/
4654 static bool
4655 ixgbe_rxeof(struct ix_queue *que)
4656 {
4657 struct adapter *adapter = que->adapter;
4658 struct rx_ring *rxr = que->rxr;
4659 struct ifnet *ifp = adapter->ifp;
4660 #ifdef LRO
4661 struct lro_ctrl *lro = &rxr->lro;
4662 struct lro_entry *queued;
4663 #endif /* LRO */
4664 int i, nextp, processed = 0;
4665 u32 staterr = 0;
4666 u16 count = rxr->process_limit;
4667 union ixgbe_adv_rx_desc *cur;
4668 struct ixgbe_rx_buf *rbuf, *nbuf;
4669
4670 IXGBE_RX_LOCK(rxr);
4671
4672 #ifdef DEV_NETMAP
4673 /* Same as the txeof routine: wakeup clients on intr. */
4674 if (netmap_rx_irq(ifp, rxr->me | NETMAP_LOCKED_ENTER, &processed))
4675 return (FALSE);
4676 #endif /* DEV_NETMAP */
4677 for (i = rxr->next_to_check; count != 0;) {
4678 struct mbuf *sendmp, *mp;
4679 u32 rsc, ptype;
4680 u16 len;
4681 u16 vtag = 0;
4682 bool eop;
4683
4684 /* Sync the ring. */
4685 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4686 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4687
4688 cur = &rxr->rx_base[i];
4689 staterr = le32toh(cur->wb.upper.status_error);
4690
4691 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
4692 break;
4693 if ((ifp->if_flags & IFF_RUNNING) == 0)
4694 break;
4695
4696 count--;
4697 sendmp = NULL;
4698 nbuf = NULL;
4699 rsc = 0;
4700 cur->wb.upper.status_error = 0;
4701 rbuf = &rxr->rx_buffers[i];
4702 mp = rbuf->buf;
4703
4704 len = le16toh(cur->wb.upper.length);
4705 ptype = le32toh(cur->wb.lower.lo_dword.data) &
4706 IXGBE_RXDADV_PKTTYPE_MASK;
4707 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
4708
4709 /* Make sure bad packets are discarded */
4710 if (((staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) ||
4711 (rxr->discard)) {
4712 rxr->rx_discarded.ev_count++;
4713 if (eop)
4714 rxr->discard = FALSE;
4715 else
4716 rxr->discard = TRUE;
4717 ixgbe_rx_discard(rxr, i);
4718 goto next_desc;
4719 }
4720
4721 /*
4722 ** On 82599 which supports a hardware
4723 ** LRO (called HW RSC), packets need
4724 ** not be fragmented across sequential
4725 ** descriptors, rather the next descriptor
4726 ** is indicated in bits of the descriptor.
4727 ** This also means that we might proceses
4728 ** more than one packet at a time, something
4729 ** that has never been true before, it
4730 ** required eliminating global chain pointers
4731 ** in favor of what we are doing here. -jfv
4732 */
4733 if (!eop) {
4734 /*
4735 ** Figure out the next descriptor
4736 ** of this frame.
4737 */
4738 if (rxr->hw_rsc == TRUE) {
4739 rsc = ixgbe_rsc_count(cur);
4740 rxr->rsc_num += (rsc - 1);
4741 }
4742 if (rsc) { /* Get hardware index */
4743 nextp = ((staterr &
4744 IXGBE_RXDADV_NEXTP_MASK) >>
4745 IXGBE_RXDADV_NEXTP_SHIFT);
4746 } else { /* Just sequential */
4747 nextp = i + 1;
4748 if (nextp == adapter->num_rx_desc)
4749 nextp = 0;
4750 }
4751 nbuf = &rxr->rx_buffers[nextp];
4752 prefetch(nbuf);
4753 }
4754 /*
4755 ** Rather than using the fmp/lmp global pointers
4756 ** we now keep the head of a packet chain in the
4757 ** buffer struct and pass this along from one
4758 ** descriptor to the next, until we get EOP.
4759 */
4760 mp->m_len = len;
4761 /*
4762 ** See if there is a stored head
4763 ** that determines what we are
4764 */
4765 sendmp = rbuf->fmp;
4766
4767 if (sendmp != NULL) { /* secondary frag */
4768 rbuf->buf = rbuf->fmp = NULL;
4769 mp->m_flags &= ~M_PKTHDR;
4770 sendmp->m_pkthdr.len += mp->m_len;
4771 } else {
4772 /*
4773 * Optimize. This might be a small packet,
4774 * maybe just a TCP ACK. Do a fast copy that
4775 * is cache aligned into a new mbuf, and
4776 * leave the old mbuf+cluster for re-use.
4777 */
4778 if (eop && len <= IXGBE_RX_COPY_LEN) {
4779 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
4780 if (sendmp != NULL) {
4781 sendmp->m_data +=
4782 IXGBE_RX_COPY_ALIGN;
4783 ixgbe_bcopy(mp->m_data,
4784 sendmp->m_data, len);
4785 sendmp->m_len = len;
4786 rxr->rx_copies.ev_count++;
4787 rbuf->flags |= IXGBE_RX_COPY;
4788 }
4789 }
4790 if (sendmp == NULL) {
4791 rbuf->buf = rbuf->fmp = NULL;
4792 sendmp = mp;
4793 }
4794
4795 /* first desc of a non-ps chain */
4796 sendmp->m_flags |= M_PKTHDR;
4797 sendmp->m_pkthdr.len = mp->m_len;
4798 }
4799 ++processed;
4800 /* Pass the head pointer on */
4801 if (eop == 0) {
4802 nbuf->fmp = sendmp;
4803 sendmp = NULL;
4804 mp->m_next = nbuf->buf;
4805 } else { /* Sending this frame */
4806 sendmp->m_pkthdr.rcvif = ifp;
4807 ifp->if_ipackets++;
4808 rxr->rx_packets.ev_count++;
4809 /* capture data for AIM */
4810 rxr->bytes += sendmp->m_pkthdr.len;
4811 rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
4812 /* Process vlan info */
4813 if ((rxr->vtag_strip) &&
4814 (staterr & IXGBE_RXD_STAT_VP))
4815 vtag = le16toh(cur->wb.upper.vlan);
4816 if (vtag) {
4817 VLAN_INPUT_TAG(ifp, sendmp, vtag,
4818 printf("%s: could not apply VLAN "
4819 "tag", __func__));
4820 }
4821 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
4822 ixgbe_rx_checksum(staterr, sendmp, ptype,
4823 &adapter->stats);
4824 }
4825 #if __FreeBSD_version >= 800000
4826 sendmp->m_pkthdr.flowid = que->msix;
4827 sendmp->m_flags |= M_FLOWID;
4828 #endif
4829 }
4830 next_desc:
4831 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4832 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4833
4834 /* Advance our pointers to the next descriptor. */
4835 if (++i == rxr->num_desc)
4836 i = 0;
4837
4838 /* Now send to the stack or do LRO */
4839 if (sendmp != NULL) {
4840 rxr->next_to_check = i;
4841 ixgbe_rx_input(rxr, ifp, sendmp, ptype);
4842 i = rxr->next_to_check;
4843 }
4844
4845 /* Every 8 descriptors we go to refresh mbufs */
4846 if (processed == 8) {
4847 ixgbe_refresh_mbufs(rxr, i);
4848 processed = 0;
4849 }
4850 }
4851
4852 /* Refresh any remaining buf structs */
4853 if (ixgbe_rx_unrefreshed(rxr))
4854 ixgbe_refresh_mbufs(rxr, i);
4855
4856 rxr->next_to_check = i;
4857
4858 #ifdef LRO
4859 /*
4860 * Flush any outstanding LRO work
4861 */
4862 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4863 SLIST_REMOVE_HEAD(&lro->lro_active, next);
4864 tcp_lro_flush(lro, queued);
4865 }
4866 #endif /* LRO */
4867
4868 IXGBE_RX_UNLOCK(rxr);
4869
4870 /*
4871 ** We still have cleaning to do?
4872 ** Schedule another interrupt if so.
4873 */
4874 if ((staterr & IXGBE_RXD_STAT_DD) != 0) {
4875 ixgbe_rearm_queues(adapter, (u64)(1ULL << que->msix));
4876 return true;
4877 }
4878
4879 return false;
4880 }
4881
4882
4883 /*********************************************************************
4884 *
4885 * Verify that the hardware indicated that the checksum is valid.
4886 * Inform the stack about the status of checksum so that stack
4887 * doesn't spend time verifying the checksum.
4888 *
4889 *********************************************************************/
4890 static void
4891 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
4892 struct ixgbe_hw_stats *stats)
4893 {
4894 u16 status = (u16) staterr;
4895 u8 errors = (u8) (staterr >> 24);
4896 #if 0
4897 bool sctp = FALSE;
4898
4899 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
4900 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
4901 sctp = TRUE;
4902 #endif
4903
4904 if (status & IXGBE_RXD_STAT_IPCS) {
4905 stats->ipcs.ev_count++;
4906 if (!(errors & IXGBE_RXD_ERR_IPE)) {
4907 /* IP Checksum Good */
4908 mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
4909
4910 } else {
4911 stats->ipcs_bad.ev_count++;
4912 mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
4913 }
4914 }
4915 if (status & IXGBE_RXD_STAT_L4CS) {
4916 stats->l4cs.ev_count++;
4917 u16 type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
4918 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
4919 mp->m_pkthdr.csum_flags |= type;
4920 } else {
4921 stats->l4cs_bad.ev_count++;
4922 mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
4923 }
4924 }
4925 return;
4926 }
4927
4928
4929 #if 0 /* XXX Badly need to overhaul vlan(4) on NetBSD. */
4930 /*
4931 ** This routine is run via an vlan config EVENT,
4932 ** it enables us to use the HW Filter table since
4933 ** we can get the vlan id. This just creates the
4934 ** entry in the soft version of the VFTA, init will
4935 ** repopulate the real table.
4936 */
4937 static void
4938 ixgbe_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4939 {
4940 struct adapter *adapter = ifp->if_softc;
4941 u16 index, bit;
4942
4943 if (ifp->if_softc != arg) /* Not our event */
4944 return;
4945
4946 if ((vtag == 0) || (vtag > 4095)) /* Invalid */
4947 return;
4948
4949 IXGBE_CORE_LOCK(adapter);
4950 index = (vtag >> 5) & 0x7F;
4951 bit = vtag & 0x1F;
4952 adapter->shadow_vfta[index] |= (1 << bit);
4953 ixgbe_init_locked(adapter);
4954 IXGBE_CORE_UNLOCK(adapter);
4955 }
4956
4957 /*
4958 ** This routine is run via an vlan
4959 ** unconfig EVENT, remove our entry
4960 ** in the soft vfta.
4961 */
4962 static void
4963 ixgbe_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4964 {
4965 struct adapter *adapter = ifp->if_softc;
4966 u16 index, bit;
4967
4968 if (ifp->if_softc != arg)
4969 return;
4970
4971 if ((vtag == 0) || (vtag > 4095)) /* Invalid */
4972 return;
4973
4974 IXGBE_CORE_LOCK(adapter);
4975 index = (vtag >> 5) & 0x7F;
4976 bit = vtag & 0x1F;
4977 adapter->shadow_vfta[index] &= ~(1 << bit);
4978 /* Re-init to load the changes */
4979 ixgbe_init_locked(adapter);
4980 IXGBE_CORE_UNLOCK(adapter);
4981 }
4982 #endif
4983
4984 static void
4985 ixgbe_setup_vlan_hw_support(struct adapter *adapter)
4986 {
4987 struct ethercom *ec = &adapter->osdep.ec;
4988 struct ixgbe_hw *hw = &adapter->hw;
4989 struct rx_ring *rxr;
4990 u32 ctrl;
4991
4992 /*
4993 ** We get here thru init_locked, meaning
4994 ** a soft reset, this has already cleared
4995 ** the VFTA and other state, so if there
4996 ** have been no vlan's registered do nothing.
4997 */
4998 if (!VLAN_ATTACHED(&adapter->osdep.ec)) {
4999 return;
5000 }
5001
5002 /*
5003 ** A soft reset zero's out the VFTA, so
5004 ** we need to repopulate it now.
5005 */
5006 for (int i = 0; i < IXGBE_VFTA_SIZE; i++)
5007 if (adapter->shadow_vfta[i] != 0)
5008 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i),
5009 adapter->shadow_vfta[i]);
5010
5011 ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
5012 /* Enable the Filter Table if enabled */
5013 if (ec->ec_capenable & ETHERCAP_VLAN_HWFILTER) {
5014 ctrl &= ~IXGBE_VLNCTRL_CFIEN;
5015 ctrl |= IXGBE_VLNCTRL_VFE;
5016 }
5017 if (hw->mac.type == ixgbe_mac_82598EB)
5018 ctrl |= IXGBE_VLNCTRL_VME;
5019 IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
5020
5021 /* Setup the queues for vlans */
5022 for (int i = 0; i < adapter->num_queues; i++) {
5023 rxr = &adapter->rx_rings[i];
5024 /* On 82599 the VLAN enable is per/queue in RXDCTL */
5025 if (hw->mac.type != ixgbe_mac_82598EB) {
5026 ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
5027 ctrl |= IXGBE_RXDCTL_VME;
5028 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
5029 }
5030 rxr->vtag_strip = TRUE;
5031 }
5032 }
5033
5034 static void
5035 ixgbe_enable_intr(struct adapter *adapter)
5036 {
5037 struct ixgbe_hw *hw = &adapter->hw;
5038 struct ix_queue *que = adapter->queues;
5039 u32 mask, fwsm;
5040
5041 mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
5042 /* Enable Fan Failure detection */
5043 if (hw->device_id == IXGBE_DEV_ID_82598AT)
5044 mask |= IXGBE_EIMS_GPI_SDP1;
5045
5046 switch (adapter->hw.mac.type) {
5047 case ixgbe_mac_82599EB:
5048 mask |= IXGBE_EIMS_ECC;
5049 mask |= IXGBE_EIMS_GPI_SDP0;
5050 mask |= IXGBE_EIMS_GPI_SDP1;
5051 mask |= IXGBE_EIMS_GPI_SDP2;
5052 #ifdef IXGBE_FDIR
5053 mask |= IXGBE_EIMS_FLOW_DIR;
5054 #endif
5055 break;
5056 case ixgbe_mac_X540:
5057 mask |= IXGBE_EIMS_ECC;
5058 /* Detect if Thermal Sensor is enabled */
5059 fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
5060 if (fwsm & IXGBE_FWSM_TS_ENABLED)
5061 mask |= IXGBE_EIMS_TS;
5062 #ifdef IXGBE_FDIR
5063 mask |= IXGBE_EIMS_FLOW_DIR;
5064 #endif
5065 /* falls through */
5066 default:
5067 break;
5068 }
5069
5070 IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
5071
5072 /* With RSS we use auto clear */
5073 if (adapter->msix_mem) {
5074 mask = IXGBE_EIMS_ENABLE_MASK;
5075 /* Don't autoclear Link */
5076 mask &= ~IXGBE_EIMS_OTHER;
5077 mask &= ~IXGBE_EIMS_LSC;
5078 IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask);
5079 }
5080
5081 /*
5082 ** Now enable all queues, this is done separately to
5083 ** allow for handling the extended (beyond 32) MSIX
5084 ** vectors that can be used by 82599
5085 */
5086 for (int i = 0; i < adapter->num_queues; i++, que++)
5087 ixgbe_enable_queue(adapter, que->msix);
5088
5089 IXGBE_WRITE_FLUSH(hw);
5090
5091 return;
5092 }
5093
5094 static void
5095 ixgbe_disable_intr(struct adapter *adapter)
5096 {
5097 if (adapter->msix_mem)
5098 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, 0);
5099 if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
5100 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0);
5101 } else {
5102 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000);
5103 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0);
5104 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0);
5105 }
5106 IXGBE_WRITE_FLUSH(&adapter->hw);
5107 return;
5108 }
5109
5110 u16
5111 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
5112 {
5113 switch (reg % 4) {
5114 case 0:
5115 return pci_conf_read(hw->back->pc, hw->back->tag, reg) &
5116 __BITS(15, 0);
5117 case 2:
5118 return __SHIFTOUT(pci_conf_read(hw->back->pc, hw->back->tag,
5119 reg - 2), __BITS(31, 16));
5120 default:
5121 panic("%s: invalid register (%" PRIx32, __func__, reg);
5122 break;
5123 }
5124 }
5125
5126 void
5127 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
5128 {
5129 pcireg_t old;
5130
5131 switch (reg % 4) {
5132 case 0:
5133 old = pci_conf_read(hw->back->pc, hw->back->tag, reg) &
5134 __BITS(31, 16);
5135 pci_conf_write(hw->back->pc, hw->back->tag, reg, value | old);
5136 break;
5137 case 2:
5138 old = pci_conf_read(hw->back->pc, hw->back->tag, reg - 2) &
5139 __BITS(15, 0);
5140 pci_conf_write(hw->back->pc, hw->back->tag, reg - 2,
5141 __SHIFTIN(value, __BITS(31, 16)) | old);
5142 break;
5143 default:
5144 panic("%s: invalid register (%" PRIx32, __func__, reg);
5145 break;
5146 }
5147
5148 return;
5149 }
5150
5151 /*
5152 ** Setup the correct IVAR register for a particular MSIX interrupt
5153 ** (yes this is all very magic and confusing :)
5154 ** - entry is the register array entry
5155 ** - vector is the MSIX vector for this queue
5156 ** - type is RX/TX/MISC
5157 */
5158 static void
5159 ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type)
5160 {
5161 struct ixgbe_hw *hw = &adapter->hw;
5162 u32 ivar, index;
5163
5164 vector |= IXGBE_IVAR_ALLOC_VAL;
5165
5166 switch (hw->mac.type) {
5167
5168 case ixgbe_mac_82598EB:
5169 if (type == -1)
5170 entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
5171 else
5172 entry += (type * 64);
5173 index = (entry >> 2) & 0x1F;
5174 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
5175 ivar &= ~(0xFF << (8 * (entry & 0x3)));
5176 ivar |= (vector << (8 * (entry & 0x3)));
5177 IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR(index), ivar);
5178 break;
5179
5180 case ixgbe_mac_82599EB:
5181 case ixgbe_mac_X540:
5182 if (type == -1) { /* MISC IVAR */
5183 index = (entry & 1) * 8;
5184 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
5185 ivar &= ~(0xFF << index);
5186 ivar |= (vector << index);
5187 IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
5188 } else { /* RX/TX IVARS */
5189 index = (16 * (entry & 1)) + (8 * type);
5190 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
5191 ivar &= ~(0xFF << index);
5192 ivar |= (vector << index);
5193 IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
5194 }
5195
5196 default:
5197 break;
5198 }
5199 }
5200
5201 static void
5202 ixgbe_configure_ivars(struct adapter *adapter)
5203 {
5204 struct ix_queue *que = adapter->queues;
5205 u32 newitr;
5206
5207 if (ixgbe_max_interrupt_rate > 0)
5208 newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8;
5209 else
5210 newitr = 0;
5211
5212 for (int i = 0; i < adapter->num_queues; i++, que++) {
5213 /* First the RX queue entry */
5214 ixgbe_set_ivar(adapter, i, que->msix, 0);
5215 /* ... and the TX */
5216 ixgbe_set_ivar(adapter, i, que->msix, 1);
5217 /* Set an Initial EITR value */
5218 IXGBE_WRITE_REG(&adapter->hw,
5219 IXGBE_EITR(que->msix), newitr);
5220 }
5221
5222 /* For the Link interrupt */
5223 ixgbe_set_ivar(adapter, 1, adapter->linkvec, -1);
5224 }
5225
5226 /*
5227 ** ixgbe_sfp_probe - called in the local timer to
5228 ** determine if a port had optics inserted.
5229 */
5230 static bool ixgbe_sfp_probe(struct adapter *adapter)
5231 {
5232 struct ixgbe_hw *hw = &adapter->hw;
5233 device_t dev = adapter->dev;
5234 bool result = FALSE;
5235
5236 if ((hw->phy.type == ixgbe_phy_nl) &&
5237 (hw->phy.sfp_type == ixgbe_sfp_type_not_present)) {
5238 s32 ret = hw->phy.ops.identify_sfp(hw);
5239 if (ret)
5240 goto out;
5241 ret = hw->phy.ops.reset(hw);
5242 if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5243 device_printf(dev,"Unsupported SFP+ module detected!");
5244 device_printf(dev, "Reload driver with supported module.\n");
5245 adapter->sfp_probe = FALSE;
5246 goto out;
5247 } else
5248 device_printf(dev,"SFP+ module detected!\n");
5249 /* We now have supported optics */
5250 adapter->sfp_probe = FALSE;
5251 /* Set the optics type so system reports correctly */
5252 ixgbe_setup_optics(adapter);
5253 result = TRUE;
5254 }
5255 out:
5256 return (result);
5257 }
5258
5259 /*
5260 ** Tasklet handler for MSIX Link interrupts
5261 ** - do outside interrupt since it might sleep
5262 */
5263 static void
5264 ixgbe_handle_link(void *context)
5265 {
5266 struct adapter *adapter = context;
5267
5268 if (ixgbe_check_link(&adapter->hw,
5269 &adapter->link_speed, &adapter->link_up, 0) == 0)
5270 ixgbe_update_link_status(adapter);
5271 }
5272
5273 /*
5274 ** Tasklet for handling SFP module interrupts
5275 */
5276 static void
5277 ixgbe_handle_mod(void *context)
5278 {
5279 struct adapter *adapter = context;
5280 struct ixgbe_hw *hw = &adapter->hw;
5281 device_t dev = adapter->dev;
5282 u32 err;
5283
5284 err = hw->phy.ops.identify_sfp(hw);
5285 if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5286 device_printf(dev,
5287 "Unsupported SFP+ module type was detected.\n");
5288 return;
5289 }
5290 err = hw->mac.ops.setup_sfp(hw);
5291 if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5292 device_printf(dev,
5293 "Setup failure - unsupported SFP+ module type.\n");
5294 return;
5295 }
5296 softint_schedule(adapter->msf_si);
5297 return;
5298 }
5299
5300
5301 /*
5302 ** Tasklet for handling MSF (multispeed fiber) interrupts
5303 */
5304 static void
5305 ixgbe_handle_msf(void *context)
5306 {
5307 struct adapter *adapter = context;
5308 struct ixgbe_hw *hw = &adapter->hw;
5309 u32 autoneg;
5310 bool negotiate;
5311
5312 autoneg = hw->phy.autoneg_advertised;
5313 if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
5314 hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate);
5315 else
5316 negotiate = 0;
5317 if (hw->mac.ops.setup_link)
5318 hw->mac.ops.setup_link(hw, autoneg, TRUE);
5319 return;
5320 }
5321
5322 #ifdef IXGBE_FDIR
5323 /*
5324 ** Tasklet for reinitializing the Flow Director filter table
5325 */
5326 static void
5327 ixgbe_reinit_fdir(void *context)
5328 {
5329 struct adapter *adapter = context;
5330 struct ifnet *ifp = adapter->ifp;
5331
5332 if (adapter->fdir_reinit != 1) /* Shouldn't happen */
5333 return;
5334 ixgbe_reinit_fdir_tables_82599(&adapter->hw);
5335 adapter->fdir_reinit = 0;
5336 /* re-enable flow director interrupts */
5337 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_FLOW_DIR);
5338 /* Restart the interface */
5339 ifp->if_flags |= IFF_RUNNING;
5340 return;
5341 }
5342 #endif
5343
5344 /**********************************************************************
5345 *
5346 * Update the board statistics counters.
5347 *
5348 **********************************************************************/
5349 static void
5350 ixgbe_update_stats_counters(struct adapter *adapter)
5351 {
5352 struct ifnet *ifp = adapter->ifp;
5353 struct ixgbe_hw *hw = &adapter->hw;
5354 u32 missed_rx = 0, bprc, lxon, lxoff, total;
5355 u64 total_missed_rx = 0;
5356 uint64_t crcerrs, rlec;
5357
5358 crcerrs = IXGBE_READ_REG(hw, IXGBE_CRCERRS);
5359 adapter->stats.crcerrs.ev_count += crcerrs;
5360 adapter->stats.illerrc.ev_count += IXGBE_READ_REG(hw, IXGBE_ILLERRC);
5361 adapter->stats.errbc.ev_count += IXGBE_READ_REG(hw, IXGBE_ERRBC);
5362 adapter->stats.mspdc.ev_count += IXGBE_READ_REG(hw, IXGBE_MSPDC);
5363
5364 /*
5365 ** Note: these are for the 8 possible traffic classes,
5366 ** which in current implementation is unused,
5367 ** therefore only 0 should read real data.
5368 */
5369 for (int i = 0; i < __arraycount(adapter->stats.mpc); i++) {
5370 int j = i % adapter->num_queues;
5371 u32 mp;
5372 mp = IXGBE_READ_REG(hw, IXGBE_MPC(i));
5373 /* missed_rx tallies misses for the gprc workaround */
5374 missed_rx += mp;
5375 /* global total per queue */
5376 adapter->stats.mpc[j].ev_count += mp;
5377 /* Running comprehensive total for stats display */
5378 total_missed_rx += mp;
5379 if (hw->mac.type == ixgbe_mac_82598EB) {
5380 adapter->stats.rnbc[j] +=
5381 IXGBE_READ_REG(hw, IXGBE_RNBC(i));
5382 adapter->stats.qbtc[j].ev_count +=
5383 IXGBE_READ_REG(hw, IXGBE_QBTC(i));
5384 adapter->stats.qbrc[j].ev_count +=
5385 IXGBE_READ_REG(hw, IXGBE_QBRC(i));
5386 adapter->stats.pxonrxc[j].ev_count +=
5387 IXGBE_READ_REG(hw, IXGBE_PXONRXC(i));
5388 } else {
5389 adapter->stats.pxonrxc[j].ev_count +=
5390 IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i));
5391 }
5392 adapter->stats.pxontxc[j].ev_count +=
5393 IXGBE_READ_REG(hw, IXGBE_PXONTXC(i));
5394 adapter->stats.pxofftxc[j].ev_count +=
5395 IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i));
5396 adapter->stats.pxoffrxc[j].ev_count +=
5397 IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
5398 adapter->stats.pxon2offc[j].ev_count +=
5399 IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i));
5400 }
5401 for (int i = 0; i < __arraycount(adapter->stats.qprc); i++) {
5402 int j = i % adapter->num_queues;
5403 adapter->stats.qprc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
5404 adapter->stats.qptc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
5405 adapter->stats.qprdc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
5406 }
5407 adapter->stats.mlfc.ev_count += IXGBE_READ_REG(hw, IXGBE_MLFC);
5408 adapter->stats.mrfc.ev_count += IXGBE_READ_REG(hw, IXGBE_MRFC);
5409 rlec = IXGBE_READ_REG(hw, IXGBE_RLEC);
5410 adapter->stats.rlec.ev_count += rlec;
5411
5412 /* Hardware workaround, gprc counts missed packets */
5413 adapter->stats.gprc.ev_count += IXGBE_READ_REG(hw, IXGBE_GPRC) - missed_rx;
5414
5415 lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
5416 adapter->stats.lxontxc.ev_count += lxon;
5417 lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
5418 adapter->stats.lxofftxc.ev_count += lxoff;
5419 total = lxon + lxoff;
5420
5421 if (hw->mac.type != ixgbe_mac_82598EB) {
5422 adapter->stats.gorc.ev_count += IXGBE_READ_REG(hw, IXGBE_GORCL) +
5423 ((u64)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32);
5424 adapter->stats.gotc.ev_count += IXGBE_READ_REG(hw, IXGBE_GOTCL) +
5425 ((u64)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32) - total * ETHER_MIN_LEN;
5426 adapter->stats.tor.ev_count += IXGBE_READ_REG(hw, IXGBE_TORL) +
5427 ((u64)IXGBE_READ_REG(hw, IXGBE_TORH) << 32);
5428 adapter->stats.lxonrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
5429 adapter->stats.lxoffrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
5430 } else {
5431 adapter->stats.lxonrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
5432 adapter->stats.lxoffrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
5433 /* 82598 only has a counter in the high register */
5434 adapter->stats.gorc.ev_count += IXGBE_READ_REG(hw, IXGBE_GORCH);
5435 adapter->stats.gotc.ev_count += IXGBE_READ_REG(hw, IXGBE_GOTCH) - total * ETHER_MIN_LEN;
5436 adapter->stats.tor.ev_count += IXGBE_READ_REG(hw, IXGBE_TORH);
5437 }
5438
5439 /*
5440 * Workaround: mprc hardware is incorrectly counting
5441 * broadcasts, so for now we subtract those.
5442 */
5443 bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
5444 adapter->stats.bprc.ev_count += bprc;
5445 adapter->stats.mprc.ev_count += IXGBE_READ_REG(hw, IXGBE_MPRC) - ((hw->mac.type == ixgbe_mac_82598EB) ? bprc : 0);
5446
5447 adapter->stats.prc64.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC64);
5448 adapter->stats.prc127.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC127);
5449 adapter->stats.prc255.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC255);
5450 adapter->stats.prc511.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC511);
5451 adapter->stats.prc1023.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC1023);
5452 adapter->stats.prc1522.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC1522);
5453
5454 adapter->stats.gptc.ev_count += IXGBE_READ_REG(hw, IXGBE_GPTC) - total;
5455 adapter->stats.mptc.ev_count += IXGBE_READ_REG(hw, IXGBE_MPTC) - total;
5456 adapter->stats.ptc64.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC64) - total;
5457
5458 adapter->stats.ruc.ev_count += IXGBE_READ_REG(hw, IXGBE_RUC);
5459 adapter->stats.rfc.ev_count += IXGBE_READ_REG(hw, IXGBE_RFC);
5460 adapter->stats.roc.ev_count += IXGBE_READ_REG(hw, IXGBE_ROC);
5461 adapter->stats.rjc.ev_count += IXGBE_READ_REG(hw, IXGBE_RJC);
5462 adapter->stats.mngprc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPRC);
5463 adapter->stats.mngpdc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPDC);
5464 adapter->stats.mngptc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPTC);
5465 adapter->stats.tpr.ev_count += IXGBE_READ_REG(hw, IXGBE_TPR);
5466 adapter->stats.tpt.ev_count += IXGBE_READ_REG(hw, IXGBE_TPT);
5467 adapter->stats.ptc127.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC127);
5468 adapter->stats.ptc255.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC255);
5469 adapter->stats.ptc511.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC511);
5470 adapter->stats.ptc1023.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC1023);
5471 adapter->stats.ptc1522.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC1522);
5472 adapter->stats.bptc.ev_count += IXGBE_READ_REG(hw, IXGBE_BPTC);
5473 adapter->stats.xec.ev_count += IXGBE_READ_REG(hw, IXGBE_XEC);
5474 adapter->stats.fccrc.ev_count += IXGBE_READ_REG(hw, IXGBE_FCCRC);
5475 adapter->stats.fclast.ev_count += IXGBE_READ_REG(hw, IXGBE_FCLAST);
5476
5477 /* Only read FCOE on 82599 */
5478 if (hw->mac.type != ixgbe_mac_82598EB) {
5479 adapter->stats.fcoerpdc.ev_count +=
5480 IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
5481 adapter->stats.fcoeprc.ev_count +=
5482 IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
5483 adapter->stats.fcoeptc.ev_count +=
5484 IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
5485 adapter->stats.fcoedwrc.ev_count +=
5486 IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
5487 adapter->stats.fcoedwtc.ev_count +=
5488 IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
5489 }
5490
5491 /* Fill out the OS statistics structure */
5492 /*
5493 * NetBSD: Don't override if_{i|o}{packets|bytes|mcasts} with
5494 * adapter->stats counters. It's required to make ifconfig -z
5495 * (SOICZIFDATA) work.
5496 */
5497 ifp->if_collisions = 0;
5498
5499 /* Rx Errors */
5500 ifp->if_iqdrops += total_missed_rx;
5501 ifp->if_ierrors += crcerrs + rlec;
5502 }
5503
5504 /** ixgbe_sysctl_tdh_handler - Handler function
5505 * Retrieves the TDH value from the hardware
5506 */
5507 static int
5508 ixgbe_sysctl_tdh_handler(SYSCTLFN_ARGS)
5509 {
5510 struct sysctlnode node;
5511 uint32_t val;
5512 struct tx_ring *txr;
5513
5514 node = *rnode;
5515 txr = (struct tx_ring *)node.sysctl_data;
5516 if (txr == NULL)
5517 return 0;
5518 val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDH(txr->me));
5519 node.sysctl_data = &val;
5520 return sysctl_lookup(SYSCTLFN_CALL(&node));
5521 }
5522
5523 /** ixgbe_sysctl_tdt_handler - Handler function
5524 * Retrieves the TDT value from the hardware
5525 */
5526 static int
5527 ixgbe_sysctl_tdt_handler(SYSCTLFN_ARGS)
5528 {
5529 struct sysctlnode node;
5530 uint32_t val;
5531 struct tx_ring *txr;
5532
5533 node = *rnode;
5534 txr = (struct tx_ring *)node.sysctl_data;
5535 if (txr == NULL)
5536 return 0;
5537 val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDT(txr->me));
5538 node.sysctl_data = &val;
5539 return sysctl_lookup(SYSCTLFN_CALL(&node));
5540 }
5541
5542 /** ixgbe_sysctl_rdh_handler - Handler function
5543 * Retrieves the RDH value from the hardware
5544 */
5545 static int
5546 ixgbe_sysctl_rdh_handler(SYSCTLFN_ARGS)
5547 {
5548 struct sysctlnode node;
5549 uint32_t val;
5550 struct rx_ring *rxr;
5551
5552 node = *rnode;
5553 rxr = (struct rx_ring *)node.sysctl_data;
5554 if (rxr == NULL)
5555 return 0;
5556 val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDH(rxr->me));
5557 node.sysctl_data = &val;
5558 return sysctl_lookup(SYSCTLFN_CALL(&node));
5559 }
5560
5561 /** ixgbe_sysctl_rdt_handler - Handler function
5562 * Retrieves the RDT value from the hardware
5563 */
5564 static int
5565 ixgbe_sysctl_rdt_handler(SYSCTLFN_ARGS)
5566 {
5567 struct sysctlnode node;
5568 uint32_t val;
5569 struct rx_ring *rxr;
5570
5571 node = *rnode;
5572 rxr = (struct rx_ring *)node.sysctl_data;
5573 if (rxr == NULL)
5574 return 0;
5575 val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDT(rxr->me));
5576 node.sysctl_data = &val;
5577 return sysctl_lookup(SYSCTLFN_CALL(&node));
5578 }
5579
5580 static int
5581 ixgbe_sysctl_interrupt_rate_handler(SYSCTLFN_ARGS)
5582 {
5583 int error;
5584 struct sysctlnode node;
5585 struct ix_queue *que;
5586 uint32_t reg, usec, rate;
5587
5588 node = *rnode;
5589 que = (struct ix_queue *)node.sysctl_data;
5590 if (que == NULL)
5591 return 0;
5592 reg = IXGBE_READ_REG(&que->adapter->hw, IXGBE_EITR(que->msix));
5593 usec = ((reg & 0x0FF8) >> 3);
5594 if (usec > 0)
5595 rate = 500000 / usec;
5596 else
5597 rate = 0;
5598 node.sysctl_data = &rate;
5599 error = sysctl_lookup(SYSCTLFN_CALL(&node));
5600 if (error)
5601 return error;
5602 reg &= ~0xfff; /* default, no limitation */
5603 ixgbe_max_interrupt_rate = 0;
5604 if (rate > 0 && rate < 500000) {
5605 if (rate < 1000)
5606 rate = 1000;
5607 ixgbe_max_interrupt_rate = rate;
5608 reg |= ((4000000/rate) & 0xff8 );
5609 }
5610 IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg);
5611 return 0;
5612 }
5613
5614 const struct sysctlnode *
5615 ixgbe_sysctl_instance(struct adapter *adapter)
5616 {
5617 const char *dvname;
5618 struct sysctllog **log;
5619 int rc;
5620 const struct sysctlnode *rnode;
5621
5622 log = &adapter->sysctllog;
5623 dvname = device_xname(adapter->dev);
5624
5625 if ((rc = sysctl_createv(log, 0, NULL, &rnode,
5626 0, CTLTYPE_NODE, dvname,
5627 SYSCTL_DESCR("ixgbe information and settings"),
5628 NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL)) != 0)
5629 goto err;
5630
5631 return rnode;
5632 err:
5633 printf("%s: sysctl_createv failed, rc = %d\n", __func__, rc);
5634 return NULL;
5635 }
5636
5637 /*
5638 * Add sysctl variables, one per statistic, to the system.
5639 */
5640 static void
5641 ixgbe_add_hw_stats(struct adapter *adapter)
5642 {
5643 device_t dev = adapter->dev;
5644 const struct sysctlnode *rnode, *cnode;
5645 struct sysctllog **log = &adapter->sysctllog;
5646 struct tx_ring *txr = adapter->tx_rings;
5647 struct rx_ring *rxr = adapter->rx_rings;
5648 struct ixgbe_hw_stats *stats = &adapter->stats;
5649
5650 /* Driver Statistics */
5651 #if 0
5652 /* These counters are not updated by the software */
5653 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5654 CTLFLAG_RD, &adapter->dropped_pkts,
5655 "Driver dropped packets");
5656 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_header_failed",
5657 CTLFLAG_RD, &adapter->mbuf_header_failed,
5658 "???");
5659 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_packet_failed",
5660 CTLFLAG_RD, &adapter->mbuf_packet_failed,
5661 "???");
5662 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "no_tx_map_avail",
5663 CTLFLAG_RD, &adapter->no_tx_map_avail,
5664 "???");
5665 #endif
5666 evcnt_attach_dynamic(&adapter->handleq, EVCNT_TYPE_MISC,
5667 NULL, device_xname(dev), "Handled queue in softint");
5668 evcnt_attach_dynamic(&adapter->req, EVCNT_TYPE_MISC,
5669 NULL, device_xname(dev), "Requeued in softint");
5670 evcnt_attach_dynamic(&adapter->morerx, EVCNT_TYPE_MISC,
5671 NULL, device_xname(dev), "Interrupt handler more rx");
5672 evcnt_attach_dynamic(&adapter->moretx, EVCNT_TYPE_MISC,
5673 NULL, device_xname(dev), "Interrupt handler more tx");
5674 evcnt_attach_dynamic(&adapter->txloops, EVCNT_TYPE_MISC,
5675 NULL, device_xname(dev), "Interrupt handler tx loops");
5676 evcnt_attach_dynamic(&adapter->efbig_tx_dma_setup, EVCNT_TYPE_MISC,
5677 NULL, device_xname(dev), "Driver tx dma soft fail EFBIG");
5678 evcnt_attach_dynamic(&adapter->m_defrag_failed, EVCNT_TYPE_MISC,
5679 NULL, device_xname(dev), "m_defrag() failed");
5680 evcnt_attach_dynamic(&adapter->efbig2_tx_dma_setup, EVCNT_TYPE_MISC,
5681 NULL, device_xname(dev), "Driver tx dma hard fail EFBIG");
5682 evcnt_attach_dynamic(&adapter->einval_tx_dma_setup, EVCNT_TYPE_MISC,
5683 NULL, device_xname(dev), "Driver tx dma hard fail EINVAL");
5684 evcnt_attach_dynamic(&adapter->other_tx_dma_setup, EVCNT_TYPE_MISC,
5685 NULL, device_xname(dev), "Driver tx dma hard fail other");
5686 evcnt_attach_dynamic(&adapter->eagain_tx_dma_setup, EVCNT_TYPE_MISC,
5687 NULL, device_xname(dev), "Driver tx dma soft fail EAGAIN");
5688 evcnt_attach_dynamic(&adapter->enomem_tx_dma_setup, EVCNT_TYPE_MISC,
5689 NULL, device_xname(dev), "Driver tx dma soft fail ENOMEM");
5690 evcnt_attach_dynamic(&adapter->watchdog_events, EVCNT_TYPE_MISC,
5691 NULL, device_xname(dev), "Watchdog timeouts");
5692 evcnt_attach_dynamic(&adapter->tso_err, EVCNT_TYPE_MISC,
5693 NULL, device_xname(dev), "TSO errors");
5694 evcnt_attach_dynamic(&adapter->link_irq, EVCNT_TYPE_MISC,
5695 NULL, device_xname(dev), "Link MSIX IRQ Handled");
5696
5697 for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5698 snprintf(adapter->queues[i].evnamebuf,
5699 sizeof(adapter->queues[i].evnamebuf), "%s queue%d",
5700 device_xname(dev), i);
5701 snprintf(adapter->queues[i].namebuf,
5702 sizeof(adapter->queues[i].namebuf), "queue%d", i);
5703
5704 if ((rnode = ixgbe_sysctl_instance(adapter)) == NULL) {
5705 aprint_error_dev(dev, "could not create sysctl root\n");
5706 break;
5707 }
5708
5709 if (sysctl_createv(log, 0, &rnode, &rnode,
5710 0, CTLTYPE_NODE,
5711 adapter->queues[i].namebuf, SYSCTL_DESCR("Queue Name"),
5712 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL) != 0)
5713 break;
5714
5715 if (sysctl_createv(log, 0, &rnode, &cnode,
5716 CTLFLAG_READWRITE, CTLTYPE_INT,
5717 "interrupt_rate", SYSCTL_DESCR("Interrupt Rate"),
5718 ixgbe_sysctl_interrupt_rate_handler, 0,
5719 (void *)&adapter->queues[i], 0, CTL_CREATE, CTL_EOL) != 0)
5720 break;
5721
5722 if (sysctl_createv(log, 0, &rnode, &cnode,
5723 CTLFLAG_READONLY, CTLTYPE_QUAD,
5724 "irqs", SYSCTL_DESCR("irqs on this queue"),
5725 NULL, 0, &(adapter->queues[i].irqs),
5726 0, CTL_CREATE, CTL_EOL) != 0)
5727 break;
5728
5729 if (sysctl_createv(log, 0, &rnode, &cnode,
5730 CTLFLAG_READONLY, CTLTYPE_INT,
5731 "txd_head", SYSCTL_DESCR("Transmit Descriptor Head"),
5732 ixgbe_sysctl_tdh_handler, 0, (void *)txr,
5733 0, CTL_CREATE, CTL_EOL) != 0)
5734 break;
5735
5736 if (sysctl_createv(log, 0, &rnode, &cnode,
5737 CTLFLAG_READONLY, CTLTYPE_INT,
5738 "txd_tail", SYSCTL_DESCR("Transmit Descriptor Tail"),
5739 ixgbe_sysctl_tdt_handler, 0, (void *)txr,
5740 0, CTL_CREATE, CTL_EOL) != 0)
5741 break;
5742
5743 evcnt_attach_dynamic(&txr->tso_tx, EVCNT_TYPE_MISC,
5744 NULL, device_xname(dev), "TSO");
5745 evcnt_attach_dynamic(&txr->no_desc_avail, EVCNT_TYPE_MISC,
5746 NULL, adapter->queues[i].evnamebuf,
5747 "Queue No Descriptor Available");
5748 evcnt_attach_dynamic(&txr->total_packets, EVCNT_TYPE_MISC,
5749 NULL, adapter->queues[i].evnamebuf,
5750 "Queue Packets Transmitted");
5751
5752 #ifdef LRO
5753 struct lro_ctrl *lro = &rxr->lro;
5754 #endif /* LRO */
5755
5756 if (sysctl_createv(log, 0, &rnode, &cnode,
5757 CTLFLAG_READONLY,
5758 CTLTYPE_INT,
5759 "rxd_head", SYSCTL_DESCR("Receive Descriptor Head"),
5760 ixgbe_sysctl_rdh_handler, 0, (void *)rxr, 0,
5761 CTL_CREATE, CTL_EOL) != 0)
5762 break;
5763
5764 if (sysctl_createv(log, 0, &rnode, &cnode,
5765 CTLFLAG_READONLY,
5766 CTLTYPE_INT,
5767 "rxd_tail", SYSCTL_DESCR("Receive Descriptor Tail"),
5768 ixgbe_sysctl_rdt_handler, 0, (void *)rxr, 0,
5769 CTL_CREATE, CTL_EOL) != 0)
5770 break;
5771
5772 if (i < __arraycount(adapter->stats.mpc)) {
5773 evcnt_attach_dynamic(&adapter->stats.mpc[i],
5774 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
5775 "Missed Packet Count");
5776 }
5777 if (i < __arraycount(adapter->stats.pxontxc)) {
5778 evcnt_attach_dynamic(&adapter->stats.pxontxc[i],
5779 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
5780 "pxontxc");
5781 evcnt_attach_dynamic(&adapter->stats.pxonrxc[i],
5782 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
5783 "pxonrxc");
5784 evcnt_attach_dynamic(&adapter->stats.pxofftxc[i],
5785 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
5786 "pxofftxc");
5787 evcnt_attach_dynamic(&adapter->stats.pxoffrxc[i],
5788 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
5789 "pxoffrxc");
5790 evcnt_attach_dynamic(&adapter->stats.pxon2offc[i],
5791 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
5792 "pxon2offc");
5793 }
5794 if (i < __arraycount(adapter->stats.qprc)) {
5795 evcnt_attach_dynamic(&adapter->stats.qprc[i],
5796 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
5797 "qprc");
5798 evcnt_attach_dynamic(&adapter->stats.qptc[i],
5799 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
5800 "qptc");
5801 evcnt_attach_dynamic(&adapter->stats.qbrc[i],
5802 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
5803 "qbrc");
5804 evcnt_attach_dynamic(&adapter->stats.qbtc[i],
5805 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
5806 "qbtc");
5807 evcnt_attach_dynamic(&adapter->stats.qprdc[i],
5808 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
5809 "qprdc");
5810 }
5811
5812 evcnt_attach_dynamic(&rxr->rx_packets, EVCNT_TYPE_MISC,
5813 NULL, adapter->queues[i].evnamebuf, "Queue Packets Received");
5814 evcnt_attach_dynamic(&rxr->rx_bytes, EVCNT_TYPE_MISC,
5815 NULL, adapter->queues[i].evnamebuf, "Queue Bytes Received");
5816 evcnt_attach_dynamic(&rxr->rx_copies, EVCNT_TYPE_MISC,
5817 NULL, adapter->queues[i].evnamebuf, "Copied RX Frames");
5818 evcnt_attach_dynamic(&rxr->no_jmbuf, EVCNT_TYPE_MISC,
5819 NULL, adapter->queues[i].evnamebuf, "Rx no jumbo mbuf");
5820 evcnt_attach_dynamic(&rxr->rx_discarded, EVCNT_TYPE_MISC,
5821 NULL, adapter->queues[i].evnamebuf, "Rx discarded");
5822 evcnt_attach_dynamic(&rxr->rx_irq, EVCNT_TYPE_MISC,
5823 NULL, adapter->queues[i].evnamebuf, "Rx interrupts");
5824 #ifdef LRO
5825 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_queued",
5826 CTLFLAG_RD, &lro->lro_queued, 0,
5827 "LRO Queued");
5828 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_flushed",
5829 CTLFLAG_RD, &lro->lro_flushed, 0,
5830 "LRO Flushed");
5831 #endif /* LRO */
5832 }
5833
5834 /* MAC stats get the own sub node */
5835
5836
5837 snprintf(stats->namebuf,
5838 sizeof(stats->namebuf), "%s MAC Statistics", device_xname(dev));
5839
5840 evcnt_attach_dynamic(&stats->ipcs, EVCNT_TYPE_MISC, NULL,
5841 stats->namebuf, "rx csum offload - IP");
5842 evcnt_attach_dynamic(&stats->l4cs, EVCNT_TYPE_MISC, NULL,
5843 stats->namebuf, "rx csum offload - L4");
5844 evcnt_attach_dynamic(&stats->ipcs_bad, EVCNT_TYPE_MISC, NULL,
5845 stats->namebuf, "rx csum offload - IP bad");
5846 evcnt_attach_dynamic(&stats->l4cs_bad, EVCNT_TYPE_MISC, NULL,
5847 stats->namebuf, "rx csum offload - L4 bad");
5848 evcnt_attach_dynamic(&stats->intzero, EVCNT_TYPE_MISC, NULL,
5849 stats->namebuf, "Interrupt conditions zero");
5850 evcnt_attach_dynamic(&stats->legint, EVCNT_TYPE_MISC, NULL,
5851 stats->namebuf, "Legacy interrupts");
5852 evcnt_attach_dynamic(&stats->crcerrs, EVCNT_TYPE_MISC, NULL,
5853 stats->namebuf, "CRC Errors");
5854 evcnt_attach_dynamic(&stats->illerrc, EVCNT_TYPE_MISC, NULL,
5855 stats->namebuf, "Illegal Byte Errors");
5856 evcnt_attach_dynamic(&stats->errbc, EVCNT_TYPE_MISC, NULL,
5857 stats->namebuf, "Byte Errors");
5858 evcnt_attach_dynamic(&stats->mspdc, EVCNT_TYPE_MISC, NULL,
5859 stats->namebuf, "MAC Short Packets Discarded");
5860 evcnt_attach_dynamic(&stats->mlfc, EVCNT_TYPE_MISC, NULL,
5861 stats->namebuf, "MAC Local Faults");
5862 evcnt_attach_dynamic(&stats->mrfc, EVCNT_TYPE_MISC, NULL,
5863 stats->namebuf, "MAC Remote Faults");
5864 evcnt_attach_dynamic(&stats->rlec, EVCNT_TYPE_MISC, NULL,
5865 stats->namebuf, "Receive Length Errors");
5866 evcnt_attach_dynamic(&stats->lxontxc, EVCNT_TYPE_MISC, NULL,
5867 stats->namebuf, "Link XON Transmitted");
5868 evcnt_attach_dynamic(&stats->lxonrxc, EVCNT_TYPE_MISC, NULL,
5869 stats->namebuf, "Link XON Received");
5870 evcnt_attach_dynamic(&stats->lxofftxc, EVCNT_TYPE_MISC, NULL,
5871 stats->namebuf, "Link XOFF Transmitted");
5872 evcnt_attach_dynamic(&stats->lxoffrxc, EVCNT_TYPE_MISC, NULL,
5873 stats->namebuf, "Link XOFF Received");
5874
5875 /* Packet Reception Stats */
5876 evcnt_attach_dynamic(&stats->tor, EVCNT_TYPE_MISC, NULL,
5877 stats->namebuf, "Total Octets Received");
5878 evcnt_attach_dynamic(&stats->gorc, EVCNT_TYPE_MISC, NULL,
5879 stats->namebuf, "Good Octets Received");
5880 evcnt_attach_dynamic(&stats->tpr, EVCNT_TYPE_MISC, NULL,
5881 stats->namebuf, "Total Packets Received");
5882 evcnt_attach_dynamic(&stats->gprc, EVCNT_TYPE_MISC, NULL,
5883 stats->namebuf, "Good Packets Received");
5884 evcnt_attach_dynamic(&stats->mprc, EVCNT_TYPE_MISC, NULL,
5885 stats->namebuf, "Multicast Packets Received");
5886 evcnt_attach_dynamic(&stats->bprc, EVCNT_TYPE_MISC, NULL,
5887 stats->namebuf, "Broadcast Packets Received");
5888 evcnt_attach_dynamic(&stats->prc64, EVCNT_TYPE_MISC, NULL,
5889 stats->namebuf, "64 byte frames received ");
5890 evcnt_attach_dynamic(&stats->prc127, EVCNT_TYPE_MISC, NULL,
5891 stats->namebuf, "65-127 byte frames received");
5892 evcnt_attach_dynamic(&stats->prc255, EVCNT_TYPE_MISC, NULL,
5893 stats->namebuf, "128-255 byte frames received");
5894 evcnt_attach_dynamic(&stats->prc511, EVCNT_TYPE_MISC, NULL,
5895 stats->namebuf, "256-511 byte frames received");
5896 evcnt_attach_dynamic(&stats->prc1023, EVCNT_TYPE_MISC, NULL,
5897 stats->namebuf, "512-1023 byte frames received");
5898 evcnt_attach_dynamic(&stats->prc1522, EVCNT_TYPE_MISC, NULL,
5899 stats->namebuf, "1023-1522 byte frames received");
5900 evcnt_attach_dynamic(&stats->ruc, EVCNT_TYPE_MISC, NULL,
5901 stats->namebuf, "Receive Undersized");
5902 evcnt_attach_dynamic(&stats->rfc, EVCNT_TYPE_MISC, NULL,
5903 stats->namebuf, "Fragmented Packets Received ");
5904 evcnt_attach_dynamic(&stats->roc, EVCNT_TYPE_MISC, NULL,
5905 stats->namebuf, "Oversized Packets Received");
5906 evcnt_attach_dynamic(&stats->rjc, EVCNT_TYPE_MISC, NULL,
5907 stats->namebuf, "Received Jabber");
5908 evcnt_attach_dynamic(&stats->mngprc, EVCNT_TYPE_MISC, NULL,
5909 stats->namebuf, "Management Packets Received");
5910 evcnt_attach_dynamic(&stats->xec, EVCNT_TYPE_MISC, NULL,
5911 stats->namebuf, "Checksum Errors");
5912
5913 /* Packet Transmission Stats */
5914 evcnt_attach_dynamic(&stats->gotc, EVCNT_TYPE_MISC, NULL,
5915 stats->namebuf, "Good Octets Transmitted");
5916 evcnt_attach_dynamic(&stats->tpt, EVCNT_TYPE_MISC, NULL,
5917 stats->namebuf, "Total Packets Transmitted");
5918 evcnt_attach_dynamic(&stats->gptc, EVCNT_TYPE_MISC, NULL,
5919 stats->namebuf, "Good Packets Transmitted");
5920 evcnt_attach_dynamic(&stats->bptc, EVCNT_TYPE_MISC, NULL,
5921 stats->namebuf, "Broadcast Packets Transmitted");
5922 evcnt_attach_dynamic(&stats->mptc, EVCNT_TYPE_MISC, NULL,
5923 stats->namebuf, "Multicast Packets Transmitted");
5924 evcnt_attach_dynamic(&stats->mngptc, EVCNT_TYPE_MISC, NULL,
5925 stats->namebuf, "Management Packets Transmitted");
5926 evcnt_attach_dynamic(&stats->ptc64, EVCNT_TYPE_MISC, NULL,
5927 stats->namebuf, "64 byte frames transmitted ");
5928 evcnt_attach_dynamic(&stats->ptc127, EVCNT_TYPE_MISC, NULL,
5929 stats->namebuf, "65-127 byte frames transmitted");
5930 evcnt_attach_dynamic(&stats->ptc255, EVCNT_TYPE_MISC, NULL,
5931 stats->namebuf, "128-255 byte frames transmitted");
5932 evcnt_attach_dynamic(&stats->ptc511, EVCNT_TYPE_MISC, NULL,
5933 stats->namebuf, "256-511 byte frames transmitted");
5934 evcnt_attach_dynamic(&stats->ptc1023, EVCNT_TYPE_MISC, NULL,
5935 stats->namebuf, "512-1023 byte frames transmitted");
5936 evcnt_attach_dynamic(&stats->ptc1522, EVCNT_TYPE_MISC, NULL,
5937 stats->namebuf, "1024-1522 byte frames transmitted");
5938 }
5939
5940 /*
5941 ** Set flow control using sysctl:
5942 ** Flow control values:
5943 ** 0 - off
5944 ** 1 - rx pause
5945 ** 2 - tx pause
5946 ** 3 - full
5947 */
5948 static int
5949 ixgbe_set_flowcntl(SYSCTLFN_ARGS)
5950 {
5951 struct sysctlnode node;
5952 int error, last;
5953 struct adapter *adapter;
5954
5955 node = *rnode;
5956 adapter = (struct adapter *)node.sysctl_data;
5957 node.sysctl_data = &adapter->fc;
5958 last = adapter->fc;
5959 error = sysctl_lookup(SYSCTLFN_CALL(&node));
5960 if (error != 0 || newp == NULL)
5961 return error;
5962
5963 /* Don't bother if it's not changed */
5964 if (adapter->fc == last)
5965 return (0);
5966
5967 switch (adapter->fc) {
5968 case ixgbe_fc_rx_pause:
5969 case ixgbe_fc_tx_pause:
5970 case ixgbe_fc_full:
5971 adapter->hw.fc.requested_mode = adapter->fc;
5972 if (adapter->num_queues > 1)
5973 ixgbe_disable_rx_drop(adapter);
5974 break;
5975 case ixgbe_fc_none:
5976 adapter->hw.fc.requested_mode = ixgbe_fc_none;
5977 if (adapter->num_queues > 1)
5978 ixgbe_enable_rx_drop(adapter);
5979 break;
5980 default:
5981 adapter->fc = last;
5982 return (EINVAL);
5983 }
5984 /* Don't autoneg if forcing a value */
5985 adapter->hw.fc.disable_fc_autoneg = TRUE;
5986 ixgbe_fc_enable(&adapter->hw);
5987 return 0;
5988 }
5989
5990 /*
5991 ** Control link advertise speed:
5992 ** 1 - advertise only 1G
5993 ** 2 - advertise 100Mb
5994 ** 3 - advertise normal
5995 */
5996 static int
5997 ixgbe_set_advertise(SYSCTLFN_ARGS)
5998 {
5999 struct sysctlnode node;
6000 int t, error = 0;
6001 struct adapter *adapter;
6002 device_t dev;
6003 struct ixgbe_hw *hw;
6004 ixgbe_link_speed speed, last;
6005
6006 node = *rnode;
6007 adapter = (struct adapter *)node.sysctl_data;
6008 dev = adapter->dev;
6009 hw = &adapter->hw;
6010 last = adapter->advertise;
6011 t = adapter->advertise;
6012 node.sysctl_data = &t;
6013 error = sysctl_lookup(SYSCTLFN_CALL(&node));
6014 if (error != 0 || newp == NULL)
6015 return error;
6016
6017 if (adapter->advertise == last) /* no change */
6018 return (0);
6019
6020 if (t == -1)
6021 return 0;
6022
6023 adapter->advertise = t;
6024
6025 if (!((hw->phy.media_type == ixgbe_media_type_copper) ||
6026 (hw->phy.multispeed_fiber)))
6027 return (EINVAL);
6028
6029 if ((adapter->advertise == 2) && (hw->mac.type != ixgbe_mac_X540)) {
6030 device_printf(dev, "Set Advertise: 100Mb on X540 only\n");
6031 return (EINVAL);
6032 }
6033
6034 if (adapter->advertise == 1)
6035 speed = IXGBE_LINK_SPEED_1GB_FULL;
6036 else if (adapter->advertise == 2)
6037 speed = IXGBE_LINK_SPEED_100_FULL;
6038 else if (adapter->advertise == 3)
6039 speed = IXGBE_LINK_SPEED_1GB_FULL |
6040 IXGBE_LINK_SPEED_10GB_FULL;
6041 else {/* bogus value */
6042 adapter->advertise = last;
6043 return (EINVAL);
6044 }
6045
6046 hw->mac.autotry_restart = TRUE;
6047 hw->mac.ops.setup_link(hw, speed, TRUE);
6048
6049 return 0;
6050 }
6051
6052 /*
6053 ** Thermal Shutdown Trigger
6054 ** - cause a Thermal Overtemp IRQ
6055 */
6056 static int
6057 ixgbe_set_thermal_test(SYSCTLFN_ARGS)
6058 {
6059 struct sysctlnode node;
6060 int error, fire = 0;
6061 struct adapter *adapter;
6062 struct ixgbe_hw *hw;
6063
6064 node = *rnode;
6065 adapter = (struct adapter *)node.sysctl_data;
6066 hw = &adapter->hw;
6067
6068 if (hw->mac.type != ixgbe_mac_X540)
6069 return (0);
6070
6071 node.sysctl_data = &fire;
6072 error = sysctl_lookup(SYSCTLFN_CALL(&node));
6073 if ((error) || (newp == NULL))
6074 return (error);
6075
6076 if (fire) {
6077 u32 reg = IXGBE_READ_REG(hw, IXGBE_EICS);
6078 reg |= IXGBE_EICR_TS;
6079 IXGBE_WRITE_REG(hw, IXGBE_EICS, reg);
6080 }
6081
6082 return (0);
6083 }
6084
6085 /*
6086 ** Enable the hardware to drop packets when the buffer is
6087 ** full. This is useful when multiqueue,so that no single
6088 ** queue being full stalls the entire RX engine. We only
6089 ** enable this when Multiqueue AND when Flow Control is
6090 ** disabled.
6091 */
6092 static void
6093 ixgbe_enable_rx_drop(struct adapter *adapter)
6094 {
6095 struct ixgbe_hw *hw = &adapter->hw;
6096
6097 for (int i = 0; i < adapter->num_queues; i++) {
6098 u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
6099 srrctl |= IXGBE_SRRCTL_DROP_EN;
6100 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
6101 }
6102 }
6103
6104 static void
6105 ixgbe_disable_rx_drop(struct adapter *adapter)
6106 {
6107 struct ixgbe_hw *hw = &adapter->hw;
6108
6109 for (int i = 0; i < adapter->num_queues; i++) {
6110 u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
6111 srrctl &= ~IXGBE_SRRCTL_DROP_EN;
6112 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
6113 }
6114 }
6115