ixgbe.c revision 1.14.4.2 1 /******************************************************************************
2
3 Copyright (c) 2001-2013, Intel Corporation
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*
34 * Copyright (c) 2011 The NetBSD Foundation, Inc.
35 * All rights reserved.
36 *
37 * This code is derived from software contributed to The NetBSD Foundation
38 * by Coyote Point Systems, Inc.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
50 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
51 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
52 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
53 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
54 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
55 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
56 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
57 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
58 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
59 * POSSIBILITY OF SUCH DAMAGE.
60 */
61 /*$FreeBSD: head/sys/dev/ixgbe/ixgbe.c 250108 2013-04-30 16:18:29Z luigi $*/
62 /*$NetBSD: ixgbe.c,v 1.14.4.2 2015/06/06 14:40:12 skrll Exp $*/
63
64 #include "opt_inet.h"
65 #include "opt_inet6.h"
66
67 #include "ixgbe.h"
68 #include "vlan.h"
69
70 /*********************************************************************
71 * Set this to one to display debug statistics
72 *********************************************************************/
73 int ixgbe_display_debug_stats = 0;
74
75 /*********************************************************************
76 * Driver version
77 *********************************************************************/
78 char ixgbe_driver_version[] = "2.5.8 - HEAD";
79
80 /*********************************************************************
81 * PCI Device ID Table
82 *
83 * Used by probe to select devices to load on
84 * Last field stores an index into ixgbe_strings
85 * Last entry must be all 0s
86 *
87 * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
88 *********************************************************************/
89
90 static ixgbe_vendor_info_t ixgbe_vendor_info_array[] =
91 {
92 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, 0, 0, 0},
93 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, 0, 0, 0},
94 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, 0, 0, 0},
95 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, 0, 0, 0},
96 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2, 0, 0, 0},
97 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, 0, 0, 0},
98 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, 0, 0, 0},
99 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, 0, 0, 0},
100 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, 0, 0, 0},
101 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, 0, 0, 0},
102 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, 0, 0, 0},
103 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, 0, 0, 0},
104 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ, 0, 0, 0},
105 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, 0, 0, 0},
106 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, 0, 0, 0},
107 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, 0, 0, 0},
108 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, 0, 0, 0},
109 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, 0, 0, 0},
110 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE, 0, 0, 0},
111 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2, 0, 0, 0},
112 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, 0, 0, 0},
113 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, 0, 0, 0},
114 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF_QP, 0, 0, 0},
115 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, 0, 0, 0},
116 /* required last entry */
117 {0, 0, 0, 0, 0}
118 };
119
120 /*********************************************************************
121 * Table of branding strings
122 *********************************************************************/
123
124 static const char *ixgbe_strings[] = {
125 "Intel(R) PRO/10GbE PCI-Express Network Driver"
126 };
127
128 /*********************************************************************
129 * Function prototypes
130 *********************************************************************/
131 static int ixgbe_probe(device_t, cfdata_t, void *);
132 static void ixgbe_attach(device_t, device_t, void *);
133 static int ixgbe_detach(device_t, int);
134 #if 0
135 static int ixgbe_shutdown(device_t);
136 #endif
137 #if IXGBE_LEGACY_TX
138 static void ixgbe_start(struct ifnet *);
139 static void ixgbe_start_locked(struct tx_ring *, struct ifnet *);
140 #else
141 static int ixgbe_mq_start(struct ifnet *, struct mbuf *);
142 static int ixgbe_mq_start_locked(struct ifnet *,
143 struct tx_ring *, struct mbuf *);
144 static void ixgbe_qflush(struct ifnet *);
145 static void ixgbe_deferred_mq_start(void *);
146 #endif
147 static int ixgbe_ioctl(struct ifnet *, u_long, void *);
148 static void ixgbe_ifstop(struct ifnet *, int);
149 static int ixgbe_init(struct ifnet *);
150 static void ixgbe_init_locked(struct adapter *);
151 static void ixgbe_stop(void *);
152 static void ixgbe_media_status(struct ifnet *, struct ifmediareq *);
153 static int ixgbe_media_change(struct ifnet *);
154 static void ixgbe_identify_hardware(struct adapter *);
155 static int ixgbe_allocate_pci_resources(struct adapter *,
156 const struct pci_attach_args *);
157 static int ixgbe_allocate_msix(struct adapter *,
158 const struct pci_attach_args *);
159 static int ixgbe_allocate_legacy(struct adapter *,
160 const struct pci_attach_args *);
161 static int ixgbe_allocate_queues(struct adapter *);
162 static int ixgbe_setup_msix(struct adapter *);
163 static void ixgbe_free_pci_resources(struct adapter *);
164 static void ixgbe_local_timer(void *);
165 static int ixgbe_setup_interface(device_t, struct adapter *);
166 static void ixgbe_config_link(struct adapter *);
167
168 static int ixgbe_allocate_transmit_buffers(struct tx_ring *);
169 static int ixgbe_setup_transmit_structures(struct adapter *);
170 static void ixgbe_setup_transmit_ring(struct tx_ring *);
171 static void ixgbe_initialize_transmit_units(struct adapter *);
172 static void ixgbe_free_transmit_structures(struct adapter *);
173 static void ixgbe_free_transmit_buffers(struct tx_ring *);
174
175 static int ixgbe_allocate_receive_buffers(struct rx_ring *);
176 static int ixgbe_setup_receive_structures(struct adapter *);
177 static int ixgbe_setup_receive_ring(struct rx_ring *);
178 static void ixgbe_initialize_receive_units(struct adapter *);
179 static void ixgbe_free_receive_structures(struct adapter *);
180 static void ixgbe_free_receive_buffers(struct rx_ring *);
181 static void ixgbe_setup_hw_rsc(struct rx_ring *);
182
183 static void ixgbe_enable_intr(struct adapter *);
184 static void ixgbe_disable_intr(struct adapter *);
185 static void ixgbe_update_stats_counters(struct adapter *);
186 static bool ixgbe_txeof(struct tx_ring *);
187 static bool ixgbe_rxeof(struct ix_queue *);
188 static void ixgbe_rx_checksum(u32, struct mbuf *, u32,
189 struct ixgbe_hw_stats *);
190 static void ixgbe_set_promisc(struct adapter *);
191 static void ixgbe_set_multi(struct adapter *);
192 static void ixgbe_update_link_status(struct adapter *);
193 static void ixgbe_refresh_mbufs(struct rx_ring *, int);
194 static int ixgbe_xmit(struct tx_ring *, struct mbuf *);
195 static int ixgbe_set_flowcntl(SYSCTLFN_PROTO);
196 static int ixgbe_set_advertise(SYSCTLFN_PROTO);
197 static int ixgbe_set_thermal_test(SYSCTLFN_PROTO);
198 static int ixgbe_dma_malloc(struct adapter *, bus_size_t,
199 struct ixgbe_dma_alloc *, int);
200 static void ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
201 static int ixgbe_tx_ctx_setup(struct tx_ring *,
202 struct mbuf *, u32 *, u32 *);
203 static int ixgbe_tso_setup(struct tx_ring *,
204 struct mbuf *, u32 *, u32 *);
205 static void ixgbe_set_ivar(struct adapter *, u8, u8, s8);
206 static void ixgbe_configure_ivars(struct adapter *);
207 static u8 * ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *);
208
209 static void ixgbe_setup_vlan_hw_support(struct adapter *);
210 #if 0
211 static void ixgbe_register_vlan(void *, struct ifnet *, u16);
212 static void ixgbe_unregister_vlan(void *, struct ifnet *, u16);
213 #endif
214
215 static void ixgbe_add_hw_stats(struct adapter *adapter);
216
217 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
218 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
219 struct mbuf *, u32);
220
221 static void ixgbe_enable_rx_drop(struct adapter *);
222 static void ixgbe_disable_rx_drop(struct adapter *);
223
224 /* Support for pluggable optic modules */
225 static bool ixgbe_sfp_probe(struct adapter *);
226 static void ixgbe_setup_optics(struct adapter *);
227
228 /* Legacy (single vector interrupt handler */
229 static int ixgbe_legacy_irq(void *);
230
231 #if defined(NETBSD_MSI_OR_MSIX)
232 /* The MSI/X Interrupt handlers */
233 static void ixgbe_msix_que(void *);
234 static void ixgbe_msix_link(void *);
235 #endif
236
237 /* Software interrupts for deferred work */
238 static void ixgbe_handle_que(void *);
239 static void ixgbe_handle_link(void *);
240 static void ixgbe_handle_msf(void *);
241 static void ixgbe_handle_mod(void *);
242
243 const struct sysctlnode *ixgbe_sysctl_instance(struct adapter *);
244 static ixgbe_vendor_info_t *ixgbe_lookup(const struct pci_attach_args *);
245
246 #ifdef IXGBE_FDIR
247 static void ixgbe_atr(struct tx_ring *, struct mbuf *);
248 static void ixgbe_reinit_fdir(void *, int);
249 #endif
250
251 /*********************************************************************
252 * FreeBSD Device Interface Entry Points
253 *********************************************************************/
254
255 CFATTACH_DECL3_NEW(ixg, sizeof(struct adapter),
256 ixgbe_probe, ixgbe_attach, ixgbe_detach, NULL, NULL, NULL,
257 DVF_DETACH_SHUTDOWN);
258
259 #if 0
260 devclass_t ixgbe_devclass;
261 DRIVER_MODULE(ixgbe, pci, ixgbe_driver, ixgbe_devclass, 0, 0);
262
263 MODULE_DEPEND(ixgbe, pci, 1, 1, 1);
264 MODULE_DEPEND(ixgbe, ether, 1, 1, 1);
265 #endif
266
267 /*
268 ** TUNEABLE PARAMETERS:
269 */
270
271 /*
272 ** AIM: Adaptive Interrupt Moderation
273 ** which means that the interrupt rate
274 ** is varied over time based on the
275 ** traffic for that interrupt vector
276 */
277 static int ixgbe_enable_aim = TRUE;
278 #define TUNABLE_INT(__x, __y)
279 TUNABLE_INT("hw.ixgbe.enable_aim", &ixgbe_enable_aim);
280
281 static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY);
282 TUNABLE_INT("hw.ixgbe.max_interrupt_rate", &ixgbe_max_interrupt_rate);
283
284 /* How many packets rxeof tries to clean at a time */
285 static int ixgbe_rx_process_limit = 256;
286 TUNABLE_INT("hw.ixgbe.rx_process_limit", &ixgbe_rx_process_limit);
287
288 /* How many packets txeof tries to clean at a time */
289 static int ixgbe_tx_process_limit = 256;
290 TUNABLE_INT("hw.ixgbe.tx_process_limit", &ixgbe_tx_process_limit);
291
292 /*
293 ** Smart speed setting, default to on
294 ** this only works as a compile option
295 ** right now as its during attach, set
296 ** this to 'ixgbe_smart_speed_off' to
297 ** disable.
298 */
299 static int ixgbe_smart_speed = ixgbe_smart_speed_on;
300
301 /*
302 * MSIX should be the default for best performance,
303 * but this allows it to be forced off for testing.
304 */
305 static int ixgbe_enable_msix = 1;
306 TUNABLE_INT("hw.ixgbe.enable_msix", &ixgbe_enable_msix);
307
308 #if defined(NETBSD_MSI_OR_MSIX)
309 /*
310 * Number of Queues, can be set to 0,
311 * it then autoconfigures based on the
312 * number of cpus with a max of 8. This
313 * can be overriden manually here.
314 */
315 static int ixgbe_num_queues = 0;
316 TUNABLE_INT("hw.ixgbe.num_queues", &ixgbe_num_queues);
317 #endif
318
319 /*
320 ** Number of TX descriptors per ring,
321 ** setting higher than RX as this seems
322 ** the better performing choice.
323 */
324 static int ixgbe_txd = PERFORM_TXD;
325 TUNABLE_INT("hw.ixgbe.txd", &ixgbe_txd);
326
327 /* Number of RX descriptors per ring */
328 static int ixgbe_rxd = PERFORM_RXD;
329 TUNABLE_INT("hw.ixgbe.rxd", &ixgbe_rxd);
330
331 /*
332 ** HW RSC control:
333 ** this feature only works with
334 ** IPv4, and only on 82599 and later.
335 ** Also this will cause IP forwarding to
336 ** fail and that can't be controlled by
337 ** the stack as LRO can. For all these
338 ** reasons I've deemed it best to leave
339 ** this off and not bother with a tuneable
340 ** interface, this would need to be compiled
341 ** to enable.
342 */
343 static bool ixgbe_rsc_enable = FALSE;
344
345 /* Keep running tab on them for sanity check */
346 static int ixgbe_total_ports;
347
348 #ifdef IXGBE_FDIR
349 /*
350 ** For Flow Director: this is the
351 ** number of TX packets we sample
352 ** for the filter pool, this means
353 ** every 20th packet will be probed.
354 **
355 ** This feature can be disabled by
356 ** setting this to 0.
357 */
358 static int atr_sample_rate = 20;
359 /*
360 ** Flow Director actually 'steals'
361 ** part of the packet buffer as its
362 ** filter pool, this variable controls
363 ** how much it uses:
364 ** 0 = 64K, 1 = 128K, 2 = 256K
365 */
366 static int fdir_pballoc = 1;
367 #endif
368
369 #ifdef DEV_NETMAP
370 /*
371 * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
372 * be a reference on how to implement netmap support in a driver.
373 * Additional comments are in ixgbe_netmap.h .
374 *
375 * <dev/netmap/ixgbe_netmap.h> contains functions for netmap support
376 * that extend the standard driver.
377 */
378 #include <dev/netmap/ixgbe_netmap.h>
379 #endif /* DEV_NETMAP */
380
381 /*********************************************************************
382 * Device identification routine
383 *
384 * ixgbe_probe determines if the driver should be loaded on
385 * adapter based on PCI vendor/device id of the adapter.
386 *
387 * return 1 on success, 0 on failure
388 *********************************************************************/
389
390 static int
391 ixgbe_probe(device_t dev, cfdata_t cf, void *aux)
392 {
393 const struct pci_attach_args *pa = aux;
394
395 return (ixgbe_lookup(pa) != NULL) ? 1 : 0;
396 }
397
398 static ixgbe_vendor_info_t *
399 ixgbe_lookup(const struct pci_attach_args *pa)
400 {
401 pcireg_t subid;
402 ixgbe_vendor_info_t *ent;
403
404 INIT_DEBUGOUT("ixgbe_probe: begin");
405
406 if (PCI_VENDOR(pa->pa_id) != IXGBE_INTEL_VENDOR_ID)
407 return NULL;
408
409 subid = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_SUBSYS_ID_REG);
410
411 for (ent = ixgbe_vendor_info_array; ent->vendor_id != 0; ent++) {
412 if (PCI_VENDOR(pa->pa_id) == ent->vendor_id &&
413 PCI_PRODUCT(pa->pa_id) == ent->device_id &&
414
415 (PCI_SUBSYS_VENDOR(subid) == ent->subvendor_id ||
416 ent->subvendor_id == 0) &&
417
418 (PCI_SUBSYS_ID(subid) == ent->subdevice_id ||
419 ent->subdevice_id == 0)) {
420 ++ixgbe_total_ports;
421 return ent;
422 }
423 }
424 return NULL;
425 }
426
427
428 static void
429 ixgbe_sysctl_attach(struct adapter *adapter)
430 {
431 struct sysctllog **log;
432 const struct sysctlnode *rnode, *cnode;
433 device_t dev;
434
435 dev = adapter->dev;
436 log = &adapter->sysctllog;
437
438 if ((rnode = ixgbe_sysctl_instance(adapter)) == NULL) {
439 aprint_error_dev(dev, "could not create sysctl root\n");
440 return;
441 }
442
443 if (sysctl_createv(log, 0, &rnode, &cnode,
444 CTLFLAG_READONLY, CTLTYPE_INT,
445 "num_rx_desc", SYSCTL_DESCR("Number of rx descriptors"),
446 NULL, 0, &adapter->num_rx_desc, 0, CTL_CREATE, CTL_EOL) != 0)
447 aprint_error_dev(dev, "could not create sysctl\n");
448
449 if (sysctl_createv(log, 0, &rnode, &cnode,
450 CTLFLAG_READONLY, CTLTYPE_INT,
451 "num_queues", SYSCTL_DESCR("Number of queues"),
452 NULL, 0, &adapter->num_queues, 0, CTL_CREATE, CTL_EOL) != 0)
453 aprint_error_dev(dev, "could not create sysctl\n");
454
455 if (sysctl_createv(log, 0, &rnode, &cnode,
456 CTLFLAG_READWRITE, CTLTYPE_INT,
457 "fc", SYSCTL_DESCR("Flow Control"),
458 ixgbe_set_flowcntl, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0)
459 aprint_error_dev(dev, "could not create sysctl\n");
460
461 /* XXX This is an *instance* sysctl controlling a *global* variable.
462 * XXX It's that way in the FreeBSD driver that this derives from.
463 */
464 if (sysctl_createv(log, 0, &rnode, &cnode,
465 CTLFLAG_READWRITE, CTLTYPE_INT,
466 "enable_aim", SYSCTL_DESCR("Interrupt Moderation"),
467 NULL, 0, &ixgbe_enable_aim, 0, CTL_CREATE, CTL_EOL) != 0)
468 aprint_error_dev(dev, "could not create sysctl\n");
469
470 if (sysctl_createv(log, 0, &rnode, &cnode,
471 CTLFLAG_READWRITE, CTLTYPE_INT,
472 "advertise_speed", SYSCTL_DESCR("Link Speed"),
473 ixgbe_set_advertise, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0)
474 aprint_error_dev(dev, "could not create sysctl\n");
475
476 if (sysctl_createv(log, 0, &rnode, &cnode,
477 CTLFLAG_READWRITE, CTLTYPE_INT,
478 "ts", SYSCTL_DESCR("Thermal Test"),
479 ixgbe_set_thermal_test, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0)
480 aprint_error_dev(dev, "could not create sysctl\n");
481 }
482
483 /*********************************************************************
484 * Device initialization routine
485 *
486 * The attach entry point is called when the driver is being loaded.
487 * This routine identifies the type of hardware, allocates all resources
488 * and initializes the hardware.
489 *
490 * return 0 on success, positive on failure
491 *********************************************************************/
492
493 static void
494 ixgbe_attach(device_t parent, device_t dev, void *aux)
495 {
496 struct adapter *adapter;
497 struct ixgbe_hw *hw;
498 int error = 0;
499 u16 csum;
500 u32 ctrl_ext;
501 ixgbe_vendor_info_t *ent;
502 const struct pci_attach_args *pa = aux;
503
504 INIT_DEBUGOUT("ixgbe_attach: begin");
505
506 /* Allocate, clear, and link in our adapter structure */
507 adapter = device_private(dev);
508 adapter->dev = adapter->osdep.dev = dev;
509 hw = &adapter->hw;
510 adapter->osdep.pc = pa->pa_pc;
511 adapter->osdep.tag = pa->pa_tag;
512 adapter->osdep.dmat = pa->pa_dmat;
513
514 ent = ixgbe_lookup(pa);
515
516 KASSERT(ent != NULL);
517
518 aprint_normal(": %s, Version - %s\n",
519 ixgbe_strings[ent->index], ixgbe_driver_version);
520
521 /* Core Lock Init*/
522 IXGBE_CORE_LOCK_INIT(adapter, device_xname(dev));
523
524 /* SYSCTL APIs */
525
526 ixgbe_sysctl_attach(adapter);
527
528 /* Set up the timer callout */
529 callout_init(&adapter->timer, 0);
530
531 /* Determine hardware revision */
532 ixgbe_identify_hardware(adapter);
533
534 /* Do base PCI setup - map BAR0 */
535 if (ixgbe_allocate_pci_resources(adapter, pa)) {
536 aprint_error_dev(dev, "Allocation of PCI resources failed\n");
537 error = ENXIO;
538 goto err_out;
539 }
540
541 /* Do descriptor calc and sanity checks */
542 if (((ixgbe_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 ||
543 ixgbe_txd < MIN_TXD || ixgbe_txd > MAX_TXD) {
544 aprint_error_dev(dev, "TXD config issue, using default!\n");
545 adapter->num_tx_desc = DEFAULT_TXD;
546 } else
547 adapter->num_tx_desc = ixgbe_txd;
548
549 /*
550 ** With many RX rings it is easy to exceed the
551 ** system mbuf allocation. Tuning nmbclusters
552 ** can alleviate this.
553 */
554 if (nmbclusters > 0 ) {
555 int s;
556 s = (ixgbe_rxd * adapter->num_queues) * ixgbe_total_ports;
557 if (s > nmbclusters) {
558 aprint_error_dev(dev, "RX Descriptors exceed "
559 "system mbuf max, using default instead!\n");
560 ixgbe_rxd = DEFAULT_RXD;
561 }
562 }
563
564 if (((ixgbe_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 ||
565 ixgbe_rxd < MIN_TXD || ixgbe_rxd > MAX_TXD) {
566 aprint_error_dev(dev, "RXD config issue, using default!\n");
567 adapter->num_rx_desc = DEFAULT_RXD;
568 } else
569 adapter->num_rx_desc = ixgbe_rxd;
570
571 /* Allocate our TX/RX Queues */
572 if (ixgbe_allocate_queues(adapter)) {
573 error = ENOMEM;
574 goto err_out;
575 }
576
577 /* Allocate multicast array memory. */
578 adapter->mta = malloc(sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
579 MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
580 if (adapter->mta == NULL) {
581 aprint_error_dev(dev, "Cannot allocate multicast setup array\n");
582 error = ENOMEM;
583 goto err_late;
584 }
585
586 /* Initialize the shared code */
587 error = ixgbe_init_shared_code(hw);
588 if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
589 /*
590 ** No optics in this port, set up
591 ** so the timer routine will probe
592 ** for later insertion.
593 */
594 adapter->sfp_probe = TRUE;
595 error = 0;
596 } else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
597 aprint_error_dev(dev,"Unsupported SFP+ module detected!\n");
598 error = EIO;
599 goto err_late;
600 } else if (error) {
601 aprint_error_dev(dev,"Unable to initialize the shared code\n");
602 error = EIO;
603 goto err_late;
604 }
605
606 /* Make sure we have a good EEPROM before we read from it */
607 if (ixgbe_validate_eeprom_checksum(&adapter->hw, &csum) < 0) {
608 aprint_error_dev(dev,"The EEPROM Checksum Is Not Valid\n");
609 error = EIO;
610 goto err_late;
611 }
612
613 error = ixgbe_init_hw(hw);
614 switch (error) {
615 case IXGBE_ERR_EEPROM_VERSION:
616 aprint_error_dev(dev, "This device is a pre-production adapter/"
617 "LOM. Please be aware there may be issues associated "
618 "with your hardware.\n If you are experiencing problems "
619 "please contact your Intel or hardware representative "
620 "who provided you with this hardware.\n");
621 break;
622 case IXGBE_ERR_SFP_NOT_SUPPORTED:
623 aprint_error_dev(dev,"Unsupported SFP+ Module\n");
624 error = EIO;
625 aprint_error_dev(dev,"Hardware Initialization Failure\n");
626 goto err_late;
627 case IXGBE_ERR_SFP_NOT_PRESENT:
628 device_printf(dev,"No SFP+ Module found\n");
629 /* falls thru */
630 default:
631 break;
632 }
633
634 /* Detect and set physical type */
635 ixgbe_setup_optics(adapter);
636
637 if ((adapter->msix > 1) && (ixgbe_enable_msix))
638 error = ixgbe_allocate_msix(adapter, pa);
639 else
640 error = ixgbe_allocate_legacy(adapter, pa);
641 if (error)
642 goto err_late;
643
644 /* Setup OS specific network interface */
645 if (ixgbe_setup_interface(dev, adapter) != 0)
646 goto err_late;
647
648 /* Initialize statistics */
649 ixgbe_update_stats_counters(adapter);
650
651 /* Print PCIE bus type/speed/width info */
652 ixgbe_get_bus_info(hw);
653 aprint_normal_dev(dev,"PCI Express Bus: Speed %s %s\n",
654 ((hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0Gb/s":
655 (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5Gb/s":"Unknown"),
656 (hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" :
657 (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" :
658 (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" :
659 ("Unknown"));
660
661 if ((hw->bus.width <= ixgbe_bus_width_pcie_x4) &&
662 (hw->bus.speed == ixgbe_bus_speed_2500)) {
663 aprint_error_dev(dev, "PCI-Express bandwidth available"
664 " for this card\n is not sufficient for"
665 " optimal performance.\n");
666 aprint_error_dev(dev, "For optimal performance a x8 "
667 "PCIE, or x4 PCIE 2 slot is required.\n");
668 }
669
670 /* Set an initial default flow control value */
671 adapter->fc = ixgbe_fc_full;
672
673 /* let hardware know driver is loaded */
674 ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
675 ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
676 IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
677
678 ixgbe_add_hw_stats(adapter);
679
680 #ifdef DEV_NETMAP
681 ixgbe_netmap_attach(adapter);
682 #endif /* DEV_NETMAP */
683 INIT_DEBUGOUT("ixgbe_attach: end");
684 return;
685 err_late:
686 ixgbe_free_transmit_structures(adapter);
687 ixgbe_free_receive_structures(adapter);
688 err_out:
689 if (adapter->ifp != NULL)
690 if_free(adapter->ifp);
691 ixgbe_free_pci_resources(adapter);
692 if (adapter->mta != NULL)
693 free(adapter->mta, M_DEVBUF);
694 return;
695
696 }
697
698 /*********************************************************************
699 * Device removal routine
700 *
701 * The detach entry point is called when the driver is being removed.
702 * This routine stops the adapter and deallocates all the resources
703 * that were allocated for driver operation.
704 *
705 * return 0 on success, positive on failure
706 *********************************************************************/
707
708 static int
709 ixgbe_detach(device_t dev, int flags)
710 {
711 struct adapter *adapter = device_private(dev);
712 struct rx_ring *rxr = adapter->rx_rings;
713 struct ixgbe_hw_stats *stats = &adapter->stats;
714 struct ix_queue *que = adapter->queues;
715 struct tx_ring *txr = adapter->tx_rings;
716 u32 ctrl_ext;
717
718 INIT_DEBUGOUT("ixgbe_detach: begin");
719
720 #if NVLAN > 0
721 /* Make sure VLANs are not using driver */
722 if (!VLAN_ATTACHED(&adapter->osdep.ec))
723 ; /* nothing to do: no VLANs */
724 else if ((flags & (DETACH_SHUTDOWN|DETACH_FORCE)) != 0)
725 vlan_ifdetach(adapter->ifp);
726 else {
727 aprint_error_dev(dev, "VLANs in use\n");
728 return EBUSY;
729 }
730 #endif
731
732 IXGBE_CORE_LOCK(adapter);
733 ixgbe_stop(adapter);
734 IXGBE_CORE_UNLOCK(adapter);
735
736 for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
737 #ifndef IXGBE_LEGACY_TX
738 softint_disestablish(txr->txq_si);
739 #endif
740 softint_disestablish(que->que_si);
741 }
742
743 /* Drain the Link queue */
744 softint_disestablish(adapter->link_si);
745 softint_disestablish(adapter->mod_si);
746 softint_disestablish(adapter->msf_si);
747 #ifdef IXGBE_FDIR
748 softint_disestablish(adapter->fdir_si);
749 #endif
750
751 /* let hardware know driver is unloading */
752 ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
753 ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
754 IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext);
755
756 ether_ifdetach(adapter->ifp);
757 callout_halt(&adapter->timer, NULL);
758 #ifdef DEV_NETMAP
759 netmap_detach(adapter->ifp);
760 #endif /* DEV_NETMAP */
761 ixgbe_free_pci_resources(adapter);
762 #if 0 /* XXX the NetBSD port is probably missing something here */
763 bus_generic_detach(dev);
764 #endif
765 if_detach(adapter->ifp);
766
767 sysctl_teardown(&adapter->sysctllog);
768 evcnt_detach(&adapter->handleq);
769 evcnt_detach(&adapter->req);
770 evcnt_detach(&adapter->morerx);
771 evcnt_detach(&adapter->moretx);
772 evcnt_detach(&adapter->txloops);
773 evcnt_detach(&adapter->efbig_tx_dma_setup);
774 evcnt_detach(&adapter->m_defrag_failed);
775 evcnt_detach(&adapter->efbig2_tx_dma_setup);
776 evcnt_detach(&adapter->einval_tx_dma_setup);
777 evcnt_detach(&adapter->other_tx_dma_setup);
778 evcnt_detach(&adapter->eagain_tx_dma_setup);
779 evcnt_detach(&adapter->enomem_tx_dma_setup);
780 evcnt_detach(&adapter->watchdog_events);
781 evcnt_detach(&adapter->tso_err);
782 evcnt_detach(&adapter->link_irq);
783
784 txr = adapter->tx_rings;
785 for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
786 evcnt_detach(&txr->no_desc_avail);
787 evcnt_detach(&txr->total_packets);
788 evcnt_detach(&txr->tso_tx);
789
790 if (i < __arraycount(adapter->stats.mpc)) {
791 evcnt_detach(&adapter->stats.mpc[i]);
792 }
793 if (i < __arraycount(adapter->stats.pxontxc)) {
794 evcnt_detach(&adapter->stats.pxontxc[i]);
795 evcnt_detach(&adapter->stats.pxonrxc[i]);
796 evcnt_detach(&adapter->stats.pxofftxc[i]);
797 evcnt_detach(&adapter->stats.pxoffrxc[i]);
798 evcnt_detach(&adapter->stats.pxon2offc[i]);
799 }
800 if (i < __arraycount(adapter->stats.qprc)) {
801 evcnt_detach(&adapter->stats.qprc[i]);
802 evcnt_detach(&adapter->stats.qptc[i]);
803 evcnt_detach(&adapter->stats.qbrc[i]);
804 evcnt_detach(&adapter->stats.qbtc[i]);
805 evcnt_detach(&adapter->stats.qprdc[i]);
806 }
807
808 evcnt_detach(&rxr->rx_packets);
809 evcnt_detach(&rxr->rx_bytes);
810 evcnt_detach(&rxr->no_jmbuf);
811 evcnt_detach(&rxr->rx_discarded);
812 evcnt_detach(&rxr->rx_irq);
813 }
814 evcnt_detach(&stats->ipcs);
815 evcnt_detach(&stats->l4cs);
816 evcnt_detach(&stats->ipcs_bad);
817 evcnt_detach(&stats->l4cs_bad);
818 evcnt_detach(&stats->intzero);
819 evcnt_detach(&stats->legint);
820 evcnt_detach(&stats->crcerrs);
821 evcnt_detach(&stats->illerrc);
822 evcnt_detach(&stats->errbc);
823 evcnt_detach(&stats->mspdc);
824 evcnt_detach(&stats->mlfc);
825 evcnt_detach(&stats->mrfc);
826 evcnt_detach(&stats->rlec);
827 evcnt_detach(&stats->lxontxc);
828 evcnt_detach(&stats->lxonrxc);
829 evcnt_detach(&stats->lxofftxc);
830 evcnt_detach(&stats->lxoffrxc);
831
832 /* Packet Reception Stats */
833 evcnt_detach(&stats->tor);
834 evcnt_detach(&stats->gorc);
835 evcnt_detach(&stats->tpr);
836 evcnt_detach(&stats->gprc);
837 evcnt_detach(&stats->mprc);
838 evcnt_detach(&stats->bprc);
839 evcnt_detach(&stats->prc64);
840 evcnt_detach(&stats->prc127);
841 evcnt_detach(&stats->prc255);
842 evcnt_detach(&stats->prc511);
843 evcnt_detach(&stats->prc1023);
844 evcnt_detach(&stats->prc1522);
845 evcnt_detach(&stats->ruc);
846 evcnt_detach(&stats->rfc);
847 evcnt_detach(&stats->roc);
848 evcnt_detach(&stats->rjc);
849 evcnt_detach(&stats->mngprc);
850 evcnt_detach(&stats->xec);
851
852 /* Packet Transmission Stats */
853 evcnt_detach(&stats->gotc);
854 evcnt_detach(&stats->tpt);
855 evcnt_detach(&stats->gptc);
856 evcnt_detach(&stats->bptc);
857 evcnt_detach(&stats->mptc);
858 evcnt_detach(&stats->mngptc);
859 evcnt_detach(&stats->ptc64);
860 evcnt_detach(&stats->ptc127);
861 evcnt_detach(&stats->ptc255);
862 evcnt_detach(&stats->ptc511);
863 evcnt_detach(&stats->ptc1023);
864 evcnt_detach(&stats->ptc1522);
865
866 ixgbe_free_transmit_structures(adapter);
867 ixgbe_free_receive_structures(adapter);
868 free(adapter->mta, M_DEVBUF);
869
870 IXGBE_CORE_LOCK_DESTROY(adapter);
871 return (0);
872 }
873
874 /*********************************************************************
875 *
876 * Shutdown entry point
877 *
878 **********************************************************************/
879
880 #if 0 /* XXX NetBSD ought to register something like this through pmf(9) */
881 static int
882 ixgbe_shutdown(device_t dev)
883 {
884 struct adapter *adapter = device_private(dev);
885 IXGBE_CORE_LOCK(adapter);
886 ixgbe_stop(adapter);
887 IXGBE_CORE_UNLOCK(adapter);
888 return (0);
889 }
890 #endif
891
892
893 #ifdef IXGBE_LEGACY_TX
894 /*********************************************************************
895 * Transmit entry point
896 *
897 * ixgbe_start is called by the stack to initiate a transmit.
898 * The driver will remain in this routine as long as there are
899 * packets to transmit and transmit resources are available.
900 * In case resources are not available stack is notified and
901 * the packet is requeued.
902 **********************************************************************/
903
904 static void
905 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
906 {
907 int rc;
908 struct mbuf *m_head;
909 struct adapter *adapter = txr->adapter;
910
911 IXGBE_TX_LOCK_ASSERT(txr);
912
913 if ((ifp->if_flags & IFF_RUNNING) == 0)
914 return;
915 if (!adapter->link_active)
916 return;
917
918 while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
919 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
920 break;
921
922 IFQ_POLL(&ifp->if_snd, m_head);
923 if (m_head == NULL)
924 break;
925
926 if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
927 break;
928 }
929 IFQ_DEQUEUE(&ifp->if_snd, m_head);
930 if (rc == EFBIG) {
931 struct mbuf *mtmp;
932
933 if ((mtmp = m_defrag(m_head, M_NOWAIT)) != NULL) {
934 m_head = mtmp;
935 rc = ixgbe_xmit(txr, m_head);
936 if (rc != 0)
937 adapter->efbig2_tx_dma_setup.ev_count++;
938 } else
939 adapter->m_defrag_failed.ev_count++;
940 }
941 if (rc != 0) {
942 m_freem(m_head);
943 continue;
944 }
945
946 /* Send a copy of the frame to the BPF listener */
947 bpf_mtap(ifp, m_head);
948
949 /* Set watchdog on */
950 getmicrotime(&txr->watchdog_time);
951 txr->queue_status = IXGBE_QUEUE_WORKING;
952
953 }
954 return;
955 }
956
957 /*
958 * Legacy TX start - called by the stack, this
959 * always uses the first tx ring, and should
960 * not be used with multiqueue tx enabled.
961 */
962 static void
963 ixgbe_start(struct ifnet *ifp)
964 {
965 struct adapter *adapter = ifp->if_softc;
966 struct tx_ring *txr = adapter->tx_rings;
967
968 if (ifp->if_flags & IFF_RUNNING) {
969 IXGBE_TX_LOCK(txr);
970 ixgbe_start_locked(txr, ifp);
971 IXGBE_TX_UNLOCK(txr);
972 }
973 return;
974 }
975
976 #else /* ! IXGBE_LEGACY_TX */
977
978 /*
979 ** Multiqueue Transmit driver
980 **
981 */
982 static int
983 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
984 {
985 struct adapter *adapter = ifp->if_softc;
986 struct ix_queue *que;
987 struct tx_ring *txr;
988 int i = 0, err = 0;
989
990 /* Which queue to use */
991 if ((m->m_flags & M_FLOWID) != 0)
992 i = m->m_pkthdr.flowid % adapter->num_queues;
993 else
994 i = cpu_index(curcpu()) % adapter->num_queues;
995
996 txr = &adapter->tx_rings[i];
997 que = &adapter->queues[i];
998
999 if (IXGBE_TX_TRYLOCK(txr)) {
1000 err = ixgbe_mq_start_locked(ifp, txr, m);
1001 IXGBE_TX_UNLOCK(txr);
1002 } else {
1003 err = drbr_enqueue(ifp, txr->br, m);
1004 softint_schedule(txr->txq_si);
1005 }
1006
1007 return (err);
1008 }
1009
1010 static int
1011 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
1012 {
1013 struct adapter *adapter = txr->adapter;
1014 struct mbuf *next;
1015 int enqueued, err = 0;
1016
1017 if (((ifp->if_flags & IFF_RUNNING) == 0) ||
1018 adapter->link_active == 0) {
1019 if (m != NULL)
1020 err = drbr_enqueue(ifp, txr->br, m);
1021 return (err);
1022 }
1023
1024 enqueued = 0;
1025 if (m != NULL) {
1026 err = drbr_enqueue(ifp, txr->br, m);
1027 if (err) {
1028 return (err);
1029 }
1030 }
1031
1032 /* Process the queue */
1033 while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1034 if ((err = ixgbe_xmit(txr, &next)) != 0) {
1035 if (next == NULL) {
1036 drbr_advance(ifp, txr->br);
1037 } else {
1038 drbr_putback(ifp, txr->br, next);
1039 }
1040 break;
1041 }
1042 drbr_advance(ifp, txr->br);
1043 enqueued++;
1044 /* Send a copy of the frame to the BPF listener */
1045 bpf_mtap(ifp, next);
1046 if ((ifp->if_flags & IFF_RUNNING) == 0)
1047 break;
1048 if (txr->tx_avail < IXGBE_TX_OP_THRESHOLD)
1049 ixgbe_txeof(txr);
1050 }
1051
1052 if (enqueued > 0) {
1053 /* Set watchdog on */
1054 txr->queue_status = IXGBE_QUEUE_WORKING;
1055 getmicrotime(&txr->watchdog_time);
1056 }
1057
1058 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
1059 ixgbe_txeof(txr);
1060
1061 return (err);
1062 }
1063
1064 /*
1065 * Called from a taskqueue to drain queued transmit packets.
1066 */
1067 static void
1068 ixgbe_deferred_mq_start(void *arg)
1069 {
1070 struct tx_ring *txr = arg;
1071 struct adapter *adapter = txr->adapter;
1072 struct ifnet *ifp = adapter->ifp;
1073
1074 IXGBE_TX_LOCK(txr);
1075 if (!drbr_empty(ifp, txr->br))
1076 ixgbe_mq_start_locked(ifp, txr, NULL);
1077 IXGBE_TX_UNLOCK(txr);
1078 }
1079
1080 /*
1081 ** Flush all ring buffers
1082 */
1083 static void
1084 ixgbe_qflush(struct ifnet *ifp)
1085 {
1086 struct adapter *adapter = ifp->if_softc;
1087 struct tx_ring *txr = adapter->tx_rings;
1088 struct mbuf *m;
1089
1090 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1091 IXGBE_TX_LOCK(txr);
1092 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1093 m_freem(m);
1094 IXGBE_TX_UNLOCK(txr);
1095 }
1096 if_qflush(ifp);
1097 }
1098 #endif /* IXGBE_LEGACY_TX */
1099
1100 static int
1101 ixgbe_ifflags_cb(struct ethercom *ec)
1102 {
1103 struct ifnet *ifp = &ec->ec_if;
1104 struct adapter *adapter = ifp->if_softc;
1105 int change = ifp->if_flags ^ adapter->if_flags, rc = 0;
1106
1107 IXGBE_CORE_LOCK(adapter);
1108
1109 if (change != 0)
1110 adapter->if_flags = ifp->if_flags;
1111
1112 if ((change & ~(IFF_CANTCHANGE|IFF_DEBUG)) != 0)
1113 rc = ENETRESET;
1114 else if ((change & (IFF_PROMISC | IFF_ALLMULTI)) != 0)
1115 ixgbe_set_promisc(adapter);
1116
1117 /* Set up VLAN support and filter */
1118 ixgbe_setup_vlan_hw_support(adapter);
1119
1120 IXGBE_CORE_UNLOCK(adapter);
1121
1122 return rc;
1123 }
1124
1125 /*********************************************************************
1126 * Ioctl entry point
1127 *
1128 * ixgbe_ioctl is called when the user wants to configure the
1129 * interface.
1130 *
1131 * return 0 on success, positive on failure
1132 **********************************************************************/
1133
1134 static int
1135 ixgbe_ioctl(struct ifnet * ifp, u_long command, void *data)
1136 {
1137 struct adapter *adapter = ifp->if_softc;
1138 struct ixgbe_hw *hw = &adapter->hw;
1139 struct ifcapreq *ifcr = data;
1140 struct ifreq *ifr = data;
1141 int error = 0;
1142 int l4csum_en;
1143 const int l4csum = IFCAP_CSUM_TCPv4_Rx|IFCAP_CSUM_UDPv4_Rx|
1144 IFCAP_CSUM_TCPv6_Rx|IFCAP_CSUM_UDPv6_Rx;
1145
1146 switch (command) {
1147 case SIOCSIFFLAGS:
1148 IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)");
1149 break;
1150 case SIOCADDMULTI:
1151 case SIOCDELMULTI:
1152 IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI");
1153 break;
1154 case SIOCSIFMEDIA:
1155 case SIOCGIFMEDIA:
1156 IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)");
1157 break;
1158 case SIOCSIFCAP:
1159 IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)");
1160 break;
1161 case SIOCSIFMTU:
1162 IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)");
1163 break;
1164 default:
1165 IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command);
1166 break;
1167 }
1168
1169 switch (command) {
1170 case SIOCSIFMEDIA:
1171 case SIOCGIFMEDIA:
1172 return ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1173 case SIOCGI2C:
1174 {
1175 struct ixgbe_i2c_req i2c;
1176 IOCTL_DEBUGOUT("ioctl: SIOCGI2C (Get I2C Data)");
1177 error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
1178 if (error)
1179 break;
1180 if ((i2c.dev_addr != 0xA0) || (i2c.dev_addr != 0xA2)){
1181 error = EINVAL;
1182 break;
1183 }
1184 hw->phy.ops.read_i2c_byte(hw, i2c.offset,
1185 i2c.dev_addr, i2c.data);
1186 error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
1187 break;
1188 }
1189 case SIOCSIFCAP:
1190 /* Layer-4 Rx checksum offload has to be turned on and
1191 * off as a unit.
1192 */
1193 l4csum_en = ifcr->ifcr_capenable & l4csum;
1194 if (l4csum_en != l4csum && l4csum_en != 0)
1195 return EINVAL;
1196 /*FALLTHROUGH*/
1197 case SIOCADDMULTI:
1198 case SIOCDELMULTI:
1199 case SIOCSIFFLAGS:
1200 case SIOCSIFMTU:
1201 default:
1202 if ((error = ether_ioctl(ifp, command, data)) != ENETRESET)
1203 return error;
1204 if ((ifp->if_flags & IFF_RUNNING) == 0)
1205 ;
1206 else if (command == SIOCSIFCAP || command == SIOCSIFMTU) {
1207 IXGBE_CORE_LOCK(adapter);
1208 ixgbe_init_locked(adapter);
1209 IXGBE_CORE_UNLOCK(adapter);
1210 } else if (command == SIOCADDMULTI || command == SIOCDELMULTI) {
1211 /*
1212 * Multicast list has changed; set the hardware filter
1213 * accordingly.
1214 */
1215 IXGBE_CORE_LOCK(adapter);
1216 ixgbe_disable_intr(adapter);
1217 ixgbe_set_multi(adapter);
1218 ixgbe_enable_intr(adapter);
1219 IXGBE_CORE_UNLOCK(adapter);
1220 }
1221 return 0;
1222 }
1223
1224 return error;
1225 }
1226
1227 /*********************************************************************
1228 * Init entry point
1229 *
1230 * This routine is used in two ways. It is used by the stack as
1231 * init entry point in network interface structure. It is also used
1232 * by the driver as a hw/sw initialization routine to get to a
1233 * consistent state.
1234 *
1235 * return 0 on success, positive on failure
1236 **********************************************************************/
1237 #define IXGBE_MHADD_MFS_SHIFT 16
1238
1239 static void
1240 ixgbe_init_locked(struct adapter *adapter)
1241 {
1242 struct ifnet *ifp = adapter->ifp;
1243 device_t dev = adapter->dev;
1244 struct ixgbe_hw *hw = &adapter->hw;
1245 u32 k, txdctl, mhadd, gpie;
1246 u32 rxdctl, rxctrl;
1247
1248 /* XXX check IFF_UP and IFF_RUNNING, power-saving state! */
1249
1250 KASSERT(mutex_owned(&adapter->core_mtx));
1251 INIT_DEBUGOUT("ixgbe_init: begin");
1252 hw->adapter_stopped = FALSE;
1253 ixgbe_stop_adapter(hw);
1254 callout_stop(&adapter->timer);
1255
1256 /* XXX I moved this here from the SIOCSIFMTU case in ixgbe_ioctl(). */
1257 adapter->max_frame_size =
1258 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1259
1260 /* reprogram the RAR[0] in case user changed it. */
1261 ixgbe_set_rar(hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
1262
1263 /* Get the latest mac address, User can use a LAA */
1264 memcpy(hw->mac.addr, CLLADDR(adapter->ifp->if_sadl),
1265 IXGBE_ETH_LENGTH_OF_ADDRESS);
1266 ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
1267 hw->addr_ctrl.rar_used_count = 1;
1268
1269 /* Prepare transmit descriptors and buffers */
1270 if (ixgbe_setup_transmit_structures(adapter)) {
1271 device_printf(dev,"Could not setup transmit structures\n");
1272 ixgbe_stop(adapter);
1273 return;
1274 }
1275
1276 ixgbe_init_hw(hw);
1277 ixgbe_initialize_transmit_units(adapter);
1278
1279 /* Setup Multicast table */
1280 ixgbe_set_multi(adapter);
1281
1282 /*
1283 ** Determine the correct mbuf pool
1284 ** for doing jumbo frames
1285 */
1286 if (adapter->max_frame_size <= 2048)
1287 adapter->rx_mbuf_sz = MCLBYTES;
1288 else if (adapter->max_frame_size <= 4096)
1289 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1290 else if (adapter->max_frame_size <= 9216)
1291 adapter->rx_mbuf_sz = MJUM9BYTES;
1292 else
1293 adapter->rx_mbuf_sz = MJUM16BYTES;
1294
1295 /* Prepare receive descriptors and buffers */
1296 if (ixgbe_setup_receive_structures(adapter)) {
1297 device_printf(dev,"Could not setup receive structures\n");
1298 ixgbe_stop(adapter);
1299 return;
1300 }
1301
1302 /* Configure RX settings */
1303 ixgbe_initialize_receive_units(adapter);
1304
1305 gpie = IXGBE_READ_REG(&adapter->hw, IXGBE_GPIE);
1306
1307 /* Enable Fan Failure Interrupt */
1308 gpie |= IXGBE_SDP1_GPIEN;
1309
1310 /* Add for Thermal detection */
1311 if (hw->mac.type == ixgbe_mac_82599EB)
1312 gpie |= IXGBE_SDP2_GPIEN;
1313
1314 /* Thermal Failure Detection */
1315 if (hw->mac.type == ixgbe_mac_X540)
1316 gpie |= IXGBE_SDP0_GPIEN;
1317
1318 if (adapter->msix > 1) {
1319 /* Enable Enhanced MSIX mode */
1320 gpie |= IXGBE_GPIE_MSIX_MODE;
1321 gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
1322 IXGBE_GPIE_OCD;
1323 }
1324 IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
1325
1326 /* Set MTU size */
1327 if (ifp->if_mtu > ETHERMTU) {
1328 mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
1329 mhadd &= ~IXGBE_MHADD_MFS_MASK;
1330 mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
1331 IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
1332 }
1333
1334 /* Now enable all the queues */
1335
1336 for (int i = 0; i < adapter->num_queues; i++) {
1337 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
1338 txdctl |= IXGBE_TXDCTL_ENABLE;
1339 /* Set WTHRESH to 8, burst writeback */
1340 txdctl |= (8 << 16);
1341 /*
1342 * When the internal queue falls below PTHRESH (32),
1343 * start prefetching as long as there are at least
1344 * HTHRESH (1) buffers ready. The values are taken
1345 * from the Intel linux driver 3.8.21.
1346 * Prefetching enables tx line rate even with 1 queue.
1347 */
1348 txdctl |= (32 << 0) | (1 << 8);
1349 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
1350 }
1351
1352 for (int i = 0; i < adapter->num_queues; i++) {
1353 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
1354 if (hw->mac.type == ixgbe_mac_82598EB) {
1355 /*
1356 ** PTHRESH = 21
1357 ** HTHRESH = 4
1358 ** WTHRESH = 8
1359 */
1360 rxdctl &= ~0x3FFFFF;
1361 rxdctl |= 0x080420;
1362 }
1363 rxdctl |= IXGBE_RXDCTL_ENABLE;
1364 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
1365 /* XXX I don't trust this loop, and I don't trust the
1366 * XXX memory barrier. What is this meant to do? --dyoung
1367 */
1368 for (k = 0; k < 10; k++) {
1369 if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
1370 IXGBE_RXDCTL_ENABLE)
1371 break;
1372 else
1373 msec_delay(1);
1374 }
1375 wmb();
1376 #ifdef DEV_NETMAP
1377 /*
1378 * In netmap mode, we must preserve the buffers made
1379 * available to userspace before the if_init()
1380 * (this is true by default on the TX side, because
1381 * init makes all buffers available to userspace).
1382 *
1383 * netmap_reset() and the device specific routines
1384 * (e.g. ixgbe_setup_receive_rings()) map these
1385 * buffers at the end of the NIC ring, so here we
1386 * must set the RDT (tail) register to make sure
1387 * they are not overwritten.
1388 *
1389 * In this driver the NIC ring starts at RDH = 0,
1390 * RDT points to the last slot available for reception (?),
1391 * so RDT = num_rx_desc - 1 means the whole ring is available.
1392 */
1393 if (ifp->if_capenable & IFCAP_NETMAP) {
1394 struct netmap_adapter *na = NA(adapter->ifp);
1395 struct netmap_kring *kring = &na->rx_rings[i];
1396 int t = na->num_rx_desc - 1 - kring->nr_hwavail;
1397
1398 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), t);
1399 } else
1400 #endif /* DEV_NETMAP */
1401 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), adapter->num_rx_desc - 1);
1402 }
1403
1404 /* Set up VLAN support and filter */
1405 ixgbe_setup_vlan_hw_support(adapter);
1406
1407 /* Enable Receive engine */
1408 rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
1409 if (hw->mac.type == ixgbe_mac_82598EB)
1410 rxctrl |= IXGBE_RXCTRL_DMBYPS;
1411 rxctrl |= IXGBE_RXCTRL_RXEN;
1412 ixgbe_enable_rx_dma(hw, rxctrl);
1413
1414 callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
1415
1416 /* Set up MSI/X routing */
1417 if (ixgbe_enable_msix) {
1418 ixgbe_configure_ivars(adapter);
1419 /* Set up auto-mask */
1420 if (hw->mac.type == ixgbe_mac_82598EB)
1421 IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
1422 else {
1423 IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
1424 IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
1425 }
1426 } else { /* Simple settings for Legacy/MSI */
1427 ixgbe_set_ivar(adapter, 0, 0, 0);
1428 ixgbe_set_ivar(adapter, 0, 0, 1);
1429 IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
1430 }
1431
1432 #ifdef IXGBE_FDIR
1433 /* Init Flow director */
1434 if (hw->mac.type != ixgbe_mac_82598EB) {
1435 u32 hdrm = 32 << fdir_pballoc;
1436
1437 hw->mac.ops.setup_rxpba(hw, 0, hdrm, PBA_STRATEGY_EQUAL);
1438 ixgbe_init_fdir_signature_82599(&adapter->hw, fdir_pballoc);
1439 }
1440 #endif
1441
1442 /*
1443 ** Check on any SFP devices that
1444 ** need to be kick-started
1445 */
1446 if (hw->phy.type == ixgbe_phy_none) {
1447 int err = hw->phy.ops.identify(hw);
1448 if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
1449 device_printf(dev,
1450 "Unsupported SFP+ module type was detected.\n");
1451 return;
1452 }
1453 }
1454
1455 /* Set moderation on the Link interrupt */
1456 IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->linkvec), IXGBE_LINK_ITR);
1457
1458 /* Config/Enable Link */
1459 ixgbe_config_link(adapter);
1460
1461 /* Hardware Packet Buffer & Flow Control setup */
1462 {
1463 u32 rxpb, frame, size, tmp;
1464
1465 frame = adapter->max_frame_size;
1466
1467 /* Calculate High Water */
1468 if (hw->mac.type == ixgbe_mac_X540)
1469 tmp = IXGBE_DV_X540(frame, frame);
1470 else
1471 tmp = IXGBE_DV(frame, frame);
1472 size = IXGBE_BT2KB(tmp);
1473 rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
1474 hw->fc.high_water[0] = rxpb - size;
1475
1476 /* Now calculate Low Water */
1477 if (hw->mac.type == ixgbe_mac_X540)
1478 tmp = IXGBE_LOW_DV_X540(frame);
1479 else
1480 tmp = IXGBE_LOW_DV(frame);
1481 hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
1482
1483 hw->fc.requested_mode = adapter->fc;
1484 hw->fc.pause_time = IXGBE_FC_PAUSE;
1485 hw->fc.send_xon = TRUE;
1486 }
1487 /* Initialize the FC settings */
1488 ixgbe_start_hw(hw);
1489
1490 /* And now turn on interrupts */
1491 ixgbe_enable_intr(adapter);
1492
1493 /* Now inform the stack we're ready */
1494 ifp->if_flags |= IFF_RUNNING;
1495
1496 return;
1497 }
1498
1499 static int
1500 ixgbe_init(struct ifnet *ifp)
1501 {
1502 struct adapter *adapter = ifp->if_softc;
1503
1504 IXGBE_CORE_LOCK(adapter);
1505 ixgbe_init_locked(adapter);
1506 IXGBE_CORE_UNLOCK(adapter);
1507 return 0; /* XXX ixgbe_init_locked cannot fail? really? */
1508 }
1509
1510
1511 /*
1512 **
1513 ** MSIX Interrupt Handlers and Tasklets
1514 **
1515 */
1516
1517 static inline void
1518 ixgbe_enable_queue(struct adapter *adapter, u32 vector)
1519 {
1520 struct ixgbe_hw *hw = &adapter->hw;
1521 u64 queue = (u64)(1ULL << vector);
1522 u32 mask;
1523
1524 if (hw->mac.type == ixgbe_mac_82598EB) {
1525 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1526 IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
1527 } else {
1528 mask = (queue & 0xFFFFFFFF);
1529 if (mask)
1530 IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask);
1531 mask = (queue >> 32);
1532 if (mask)
1533 IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask);
1534 }
1535 }
1536
1537 __unused static inline void
1538 ixgbe_disable_queue(struct adapter *adapter, u32 vector)
1539 {
1540 struct ixgbe_hw *hw = &adapter->hw;
1541 u64 queue = (u64)(1ULL << vector);
1542 u32 mask;
1543
1544 if (hw->mac.type == ixgbe_mac_82598EB) {
1545 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1546 IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask);
1547 } else {
1548 mask = (queue & 0xFFFFFFFF);
1549 if (mask)
1550 IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask);
1551 mask = (queue >> 32);
1552 if (mask)
1553 IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask);
1554 }
1555 }
1556
1557 static inline void
1558 ixgbe_rearm_queues(struct adapter *adapter, u64 queues)
1559 {
1560 u32 mask;
1561
1562 if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
1563 mask = (IXGBE_EIMS_RTX_QUEUE & queues);
1564 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, mask);
1565 } else {
1566 mask = (queues & 0xFFFFFFFF);
1567 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(0), mask);
1568 mask = (queues >> 32);
1569 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(1), mask);
1570 }
1571 }
1572
1573
1574 static void
1575 ixgbe_handle_que(void *context)
1576 {
1577 struct ix_queue *que = context;
1578 struct adapter *adapter = que->adapter;
1579 struct tx_ring *txr = que->txr;
1580 struct ifnet *ifp = adapter->ifp;
1581 bool more;
1582
1583 adapter->handleq.ev_count++;
1584
1585 if (ifp->if_flags & IFF_RUNNING) {
1586 more = ixgbe_rxeof(que);
1587 IXGBE_TX_LOCK(txr);
1588 ixgbe_txeof(txr);
1589 #ifndef IXGBE_LEGACY_TX
1590 if (!drbr_empty(ifp, txr->br))
1591 ixgbe_mq_start_locked(ifp, txr, NULL);
1592 #else
1593 if (!IFQ_IS_EMPTY(&ifp->if_snd))
1594 ixgbe_start_locked(txr, ifp);
1595 #endif
1596 IXGBE_TX_UNLOCK(txr);
1597 if (more) {
1598 adapter->req.ev_count++;
1599 softint_schedule(que->que_si);
1600 return;
1601 }
1602 }
1603
1604 /* Reenable this interrupt */
1605 ixgbe_enable_queue(adapter, que->msix);
1606 return;
1607 }
1608
1609
1610 /*********************************************************************
1611 *
1612 * Legacy Interrupt Service routine
1613 *
1614 **********************************************************************/
1615
1616 static int
1617 ixgbe_legacy_irq(void *arg)
1618 {
1619 struct ix_queue *que = arg;
1620 struct adapter *adapter = que->adapter;
1621 struct ifnet *ifp = adapter->ifp;
1622 struct ixgbe_hw *hw = &adapter->hw;
1623 struct tx_ring *txr = adapter->tx_rings;
1624 bool more_tx = false, more_rx = false;
1625 u32 reg_eicr, loop = MAX_LOOP;
1626
1627 reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
1628
1629 adapter->stats.legint.ev_count++;
1630 ++que->irqs;
1631 if (reg_eicr == 0) {
1632 adapter->stats.intzero.ev_count++;
1633 if ((ifp->if_flags & IFF_UP) != 0)
1634 ixgbe_enable_intr(adapter);
1635 return 0;
1636 }
1637
1638 if ((ifp->if_flags & IFF_RUNNING) != 0) {
1639 more_rx = ixgbe_rxeof(que);
1640
1641 IXGBE_TX_LOCK(txr);
1642 do {
1643 adapter->txloops.ev_count++;
1644 more_tx = ixgbe_txeof(txr);
1645 } while (loop-- && more_tx);
1646 IXGBE_TX_UNLOCK(txr);
1647 }
1648
1649 if (more_rx || more_tx) {
1650 if (more_rx)
1651 adapter->morerx.ev_count++;
1652 if (more_tx)
1653 adapter->moretx.ev_count++;
1654 softint_schedule(que->que_si);
1655 }
1656
1657 /* Check for fan failure */
1658 if ((hw->phy.media_type == ixgbe_media_type_copper) &&
1659 (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1660 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
1661 "REPLACE IMMEDIATELY!!\n");
1662 IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1);
1663 }
1664
1665 /* Link status change */
1666 if (reg_eicr & IXGBE_EICR_LSC)
1667 softint_schedule(adapter->link_si);
1668
1669 ixgbe_enable_intr(adapter);
1670 return 1;
1671 }
1672
1673
1674 #if defined(NETBSD_MSI_OR_MSIX)
1675 /*********************************************************************
1676 *
1677 * MSIX Queue Interrupt Service routine
1678 *
1679 **********************************************************************/
1680 void
1681 ixgbe_msix_que(void *arg)
1682 {
1683 struct ix_queue *que = arg;
1684 struct adapter *adapter = que->adapter;
1685 struct tx_ring *txr = que->txr;
1686 struct rx_ring *rxr = que->rxr;
1687 bool more_tx, more_rx;
1688 u32 newitr = 0;
1689
1690 ixgbe_disable_queue(adapter, que->msix);
1691 ++que->irqs;
1692
1693 more_rx = ixgbe_rxeof(que);
1694
1695 IXGBE_TX_LOCK(txr);
1696 more_tx = ixgbe_txeof(txr);
1697 /*
1698 ** Make certain that if the stack
1699 ** has anything queued the task gets
1700 ** scheduled to handle it.
1701 */
1702 #ifdef IXGBE_LEGACY_TX
1703 if (!IFQ_IS_EMPTY(&adapter->ifp->if_snd))
1704 #else
1705 if (!drbr_empty(adapter->ifp, txr->br))
1706 #endif
1707 more_tx = 1;
1708 IXGBE_TX_UNLOCK(txr);
1709
1710 /* Do AIM now? */
1711
1712 if (ixgbe_enable_aim == FALSE)
1713 goto no_calc;
1714 /*
1715 ** Do Adaptive Interrupt Moderation:
1716 ** - Write out last calculated setting
1717 ** - Calculate based on average size over
1718 ** the last interval.
1719 */
1720 if (que->eitr_setting)
1721 IXGBE_WRITE_REG(&adapter->hw,
1722 IXGBE_EITR(que->msix), que->eitr_setting);
1723
1724 que->eitr_setting = 0;
1725
1726 /* Idle, do nothing */
1727 if ((txr->bytes == 0) && (rxr->bytes == 0))
1728 goto no_calc;
1729
1730 if ((txr->bytes) && (txr->packets))
1731 newitr = txr->bytes/txr->packets;
1732 if ((rxr->bytes) && (rxr->packets))
1733 newitr = max(newitr,
1734 (rxr->bytes / rxr->packets));
1735 newitr += 24; /* account for hardware frame, crc */
1736
1737 /* set an upper boundary */
1738 newitr = min(newitr, 3000);
1739
1740 /* Be nice to the mid range */
1741 if ((newitr > 300) && (newitr < 1200))
1742 newitr = (newitr / 3);
1743 else
1744 newitr = (newitr / 2);
1745
1746 if (adapter->hw.mac.type == ixgbe_mac_82598EB)
1747 newitr |= newitr << 16;
1748 else
1749 newitr |= IXGBE_EITR_CNT_WDIS;
1750
1751 /* save for next interrupt */
1752 que->eitr_setting = newitr;
1753
1754 /* Reset state */
1755 txr->bytes = 0;
1756 txr->packets = 0;
1757 rxr->bytes = 0;
1758 rxr->packets = 0;
1759
1760 no_calc:
1761 if (more_tx || more_rx)
1762 softint_schedule(que->que_si);
1763 else /* Reenable this interrupt */
1764 ixgbe_enable_queue(adapter, que->msix);
1765 return;
1766 }
1767
1768
1769 static void
1770 ixgbe_msix_link(void *arg)
1771 {
1772 struct adapter *adapter = arg;
1773 struct ixgbe_hw *hw = &adapter->hw;
1774 u32 reg_eicr;
1775
1776 ++adapter->link_irq.ev_count;
1777
1778 /* First get the cause */
1779 reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICS);
1780 /* Clear interrupt with write */
1781 IXGBE_WRITE_REG(hw, IXGBE_EICR, reg_eicr);
1782
1783 /* Link status change */
1784 if (reg_eicr & IXGBE_EICR_LSC)
1785 softint_schedule(adapter->link_si);
1786
1787 if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
1788 #ifdef IXGBE_FDIR
1789 if (reg_eicr & IXGBE_EICR_FLOW_DIR) {
1790 /* This is probably overkill :) */
1791 if (!atomic_cmpset_int(&adapter->fdir_reinit, 0, 1))
1792 return;
1793 /* Disable the interrupt */
1794 IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FLOW_DIR);
1795 softint_schedule(adapter->fdir_si);
1796 } else
1797 #endif
1798 if (reg_eicr & IXGBE_EICR_ECC) {
1799 device_printf(adapter->dev, "\nCRITICAL: ECC ERROR!! "
1800 "Please Reboot!!\n");
1801 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
1802 } else
1803
1804 if (reg_eicr & IXGBE_EICR_GPI_SDP1) {
1805 /* Clear the interrupt */
1806 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1807 softint_schedule(adapter->msf_si);
1808 } else if (reg_eicr & IXGBE_EICR_GPI_SDP2) {
1809 /* Clear the interrupt */
1810 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP2);
1811 softint_schedule(adapter->mod_si);
1812 }
1813 }
1814
1815 /* Check for fan failure */
1816 if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
1817 (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1818 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
1819 "REPLACE IMMEDIATELY!!\n");
1820 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1821 }
1822
1823 /* Check for over temp condition */
1824 if ((hw->mac.type == ixgbe_mac_X540) &&
1825 (reg_eicr & IXGBE_EICR_TS)) {
1826 device_printf(adapter->dev, "\nCRITICAL: OVER TEMP!! "
1827 "PHY IS SHUT DOWN!!\n");
1828 device_printf(adapter->dev, "System shutdown required\n");
1829 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_TS);
1830 }
1831
1832 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_OTHER);
1833 return;
1834 }
1835 #endif
1836
1837 /*********************************************************************
1838 *
1839 * Media Ioctl callback
1840 *
1841 * This routine is called whenever the user queries the status of
1842 * the interface using ifconfig.
1843 *
1844 **********************************************************************/
1845 static void
1846 ixgbe_media_status(struct ifnet * ifp, struct ifmediareq * ifmr)
1847 {
1848 struct adapter *adapter = ifp->if_softc;
1849
1850 INIT_DEBUGOUT("ixgbe_media_status: begin");
1851 IXGBE_CORE_LOCK(adapter);
1852 ixgbe_update_link_status(adapter);
1853
1854 ifmr->ifm_status = IFM_AVALID;
1855 ifmr->ifm_active = IFM_ETHER;
1856
1857 if (!adapter->link_active) {
1858 IXGBE_CORE_UNLOCK(adapter);
1859 return;
1860 }
1861
1862 ifmr->ifm_status |= IFM_ACTIVE;
1863
1864 switch (adapter->link_speed) {
1865 case IXGBE_LINK_SPEED_100_FULL:
1866 ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
1867 break;
1868 case IXGBE_LINK_SPEED_1GB_FULL:
1869 ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1870 break;
1871 case IXGBE_LINK_SPEED_10GB_FULL:
1872 ifmr->ifm_active |= adapter->optics | IFM_FDX;
1873 break;
1874 }
1875
1876 IXGBE_CORE_UNLOCK(adapter);
1877
1878 return;
1879 }
1880
1881 /*********************************************************************
1882 *
1883 * Media Ioctl callback
1884 *
1885 * This routine is called when the user changes speed/duplex using
1886 * media/mediopt option with ifconfig.
1887 *
1888 **********************************************************************/
1889 static int
1890 ixgbe_media_change(struct ifnet * ifp)
1891 {
1892 struct adapter *adapter = ifp->if_softc;
1893 struct ifmedia *ifm = &adapter->media;
1894
1895 INIT_DEBUGOUT("ixgbe_media_change: begin");
1896
1897 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1898 return (EINVAL);
1899
1900 switch (IFM_SUBTYPE(ifm->ifm_media)) {
1901 case IFM_AUTO:
1902 adapter->hw.phy.autoneg_advertised =
1903 IXGBE_LINK_SPEED_100_FULL |
1904 IXGBE_LINK_SPEED_1GB_FULL |
1905 IXGBE_LINK_SPEED_10GB_FULL;
1906 break;
1907 default:
1908 device_printf(adapter->dev, "Only auto media type\n");
1909 return (EINVAL);
1910 }
1911
1912 return (0);
1913 }
1914
1915 /*********************************************************************
1916 *
1917 * This routine maps the mbufs to tx descriptors, allowing the
1918 * TX engine to transmit the packets.
1919 * - return 0 on success, positive on failure
1920 *
1921 **********************************************************************/
1922
1923 static int
1924 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
1925 {
1926 struct m_tag *mtag;
1927 struct adapter *adapter = txr->adapter;
1928 struct ethercom *ec = &adapter->osdep.ec;
1929 u32 olinfo_status = 0, cmd_type_len;
1930 int i, j, error;
1931 int first;
1932 bus_dmamap_t map;
1933 struct ixgbe_tx_buf *txbuf;
1934 union ixgbe_adv_tx_desc *txd = NULL;
1935
1936 /* Basic descriptor defines */
1937 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
1938 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
1939
1940 if ((mtag = VLAN_OUTPUT_TAG(ec, m_head)) != NULL)
1941 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
1942
1943 /*
1944 * Important to capture the first descriptor
1945 * used because it will contain the index of
1946 * the one we tell the hardware to report back
1947 */
1948 first = txr->next_avail_desc;
1949 txbuf = &txr->tx_buffers[first];
1950 map = txbuf->map;
1951
1952 /*
1953 * Map the packet for DMA.
1954 */
1955 error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map,
1956 m_head, BUS_DMA_NOWAIT);
1957
1958 if (__predict_false(error)) {
1959
1960 switch (error) {
1961 case EAGAIN:
1962 adapter->eagain_tx_dma_setup.ev_count++;
1963 return EAGAIN;
1964 case ENOMEM:
1965 adapter->enomem_tx_dma_setup.ev_count++;
1966 return EAGAIN;
1967 case EFBIG:
1968 /*
1969 * XXX Try it again?
1970 * do m_defrag() and retry bus_dmamap_load_mbuf().
1971 */
1972 adapter->efbig_tx_dma_setup.ev_count++;
1973 return error;
1974 case EINVAL:
1975 adapter->einval_tx_dma_setup.ev_count++;
1976 return error;
1977 default:
1978 adapter->other_tx_dma_setup.ev_count++;
1979 return error;
1980 }
1981 }
1982
1983 /* Make certain there are enough descriptors */
1984 if (map->dm_nsegs > txr->tx_avail - 2) {
1985 txr->no_desc_avail.ev_count++;
1986 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
1987 return EAGAIN;
1988 }
1989
1990 /*
1991 ** Set up the appropriate offload context
1992 ** this will consume the first descriptor
1993 */
1994 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
1995 if (__predict_false(error)) {
1996 return (error);
1997 }
1998
1999 #ifdef IXGBE_FDIR
2000 /* Do the flow director magic */
2001 if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
2002 ++txr->atr_count;
2003 if (txr->atr_count >= atr_sample_rate) {
2004 ixgbe_atr(txr, m_head);
2005 txr->atr_count = 0;
2006 }
2007 }
2008 #endif
2009
2010 i = txr->next_avail_desc;
2011 for (j = 0; j < map->dm_nsegs; j++) {
2012 bus_size_t seglen;
2013 bus_addr_t segaddr;
2014
2015 txbuf = &txr->tx_buffers[i];
2016 txd = &txr->tx_base[i];
2017 seglen = map->dm_segs[j].ds_len;
2018 segaddr = htole64(map->dm_segs[j].ds_addr);
2019
2020 txd->read.buffer_addr = segaddr;
2021 txd->read.cmd_type_len = htole32(txr->txd_cmd |
2022 cmd_type_len |seglen);
2023 txd->read.olinfo_status = htole32(olinfo_status);
2024
2025 if (++i == txr->num_desc)
2026 i = 0;
2027 }
2028
2029 txd->read.cmd_type_len |=
2030 htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
2031 txr->tx_avail -= map->dm_nsegs;
2032 txr->next_avail_desc = i;
2033
2034 txbuf->m_head = m_head;
2035 /*
2036 ** Here we swap the map so the last descriptor,
2037 ** which gets the completion interrupt has the
2038 ** real map, and the first descriptor gets the
2039 ** unused map from this descriptor.
2040 */
2041 txr->tx_buffers[first].map = txbuf->map;
2042 txbuf->map = map;
2043 bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
2044 BUS_DMASYNC_PREWRITE);
2045
2046 /* Set the EOP descriptor that will be marked done */
2047 txbuf = &txr->tx_buffers[first];
2048 txbuf->eop = txd;
2049
2050 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2051 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2052 /*
2053 * Advance the Transmit Descriptor Tail (Tdt), this tells the
2054 * hardware that this frame is available to transmit.
2055 */
2056 ++txr->total_packets.ev_count;
2057 IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), i);
2058
2059 return 0;
2060 }
2061
2062 static void
2063 ixgbe_set_promisc(struct adapter *adapter)
2064 {
2065 struct ether_multi *enm;
2066 struct ether_multistep step;
2067 u_int32_t reg_rctl;
2068 struct ethercom *ec = &adapter->osdep.ec;
2069 struct ifnet *ifp = adapter->ifp;
2070 int mcnt = 0;
2071
2072 reg_rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
2073 reg_rctl &= (~IXGBE_FCTRL_UPE);
2074 if (ifp->if_flags & IFF_ALLMULTI)
2075 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2076 else {
2077 ETHER_FIRST_MULTI(step, ec, enm);
2078 while (enm != NULL) {
2079 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2080 break;
2081 mcnt++;
2082 ETHER_NEXT_MULTI(step, enm);
2083 }
2084 }
2085 if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2086 reg_rctl &= (~IXGBE_FCTRL_MPE);
2087 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
2088
2089 if (ifp->if_flags & IFF_PROMISC) {
2090 reg_rctl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
2091 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
2092 } else if (ifp->if_flags & IFF_ALLMULTI) {
2093 reg_rctl |= IXGBE_FCTRL_MPE;
2094 reg_rctl &= ~IXGBE_FCTRL_UPE;
2095 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
2096 }
2097 return;
2098 }
2099
2100
2101 /*********************************************************************
2102 * Multicast Update
2103 *
2104 * This routine is called whenever multicast address list is updated.
2105 *
2106 **********************************************************************/
2107 #define IXGBE_RAR_ENTRIES 16
2108
2109 static void
2110 ixgbe_set_multi(struct adapter *adapter)
2111 {
2112 struct ether_multi *enm;
2113 struct ether_multistep step;
2114 u32 fctrl;
2115 u8 *mta;
2116 u8 *update_ptr;
2117 int mcnt = 0;
2118 struct ethercom *ec = &adapter->osdep.ec;
2119 struct ifnet *ifp = adapter->ifp;
2120
2121 IOCTL_DEBUGOUT("ixgbe_set_multi: begin");
2122
2123 mta = adapter->mta;
2124 bzero(mta, sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
2125 MAX_NUM_MULTICAST_ADDRESSES);
2126
2127 ifp->if_flags &= ~IFF_ALLMULTI;
2128 ETHER_FIRST_MULTI(step, ec, enm);
2129 while (enm != NULL) {
2130 if ((mcnt == MAX_NUM_MULTICAST_ADDRESSES) ||
2131 (memcmp(enm->enm_addrlo, enm->enm_addrhi,
2132 ETHER_ADDR_LEN) != 0)) {
2133 ifp->if_flags |= IFF_ALLMULTI;
2134 break;
2135 }
2136 bcopy(enm->enm_addrlo,
2137 &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
2138 IXGBE_ETH_LENGTH_OF_ADDRESS);
2139 mcnt++;
2140 ETHER_NEXT_MULTI(step, enm);
2141 }
2142
2143 fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
2144 fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
2145 if (ifp->if_flags & IFF_PROMISC)
2146 fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
2147 else if (ifp->if_flags & IFF_ALLMULTI) {
2148 fctrl |= IXGBE_FCTRL_MPE;
2149 }
2150
2151 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl);
2152
2153 if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) {
2154 update_ptr = mta;
2155 ixgbe_update_mc_addr_list(&adapter->hw,
2156 update_ptr, mcnt, ixgbe_mc_array_itr, TRUE);
2157 }
2158
2159 return;
2160 }
2161
2162 /*
2163 * This is an iterator function now needed by the multicast
2164 * shared code. It simply feeds the shared code routine the
2165 * addresses in the array of ixgbe_set_multi() one by one.
2166 */
2167 static u8 *
2168 ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq)
2169 {
2170 u8 *addr = *update_ptr;
2171 u8 *newptr;
2172 *vmdq = 0;
2173
2174 newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
2175 *update_ptr = newptr;
2176 return addr;
2177 }
2178
2179
2180 /*********************************************************************
2181 * Timer routine
2182 *
2183 * This routine checks for link status,updates statistics,
2184 * and runs the watchdog check.
2185 *
2186 **********************************************************************/
2187
2188 static void
2189 ixgbe_local_timer1(void *arg)
2190 {
2191 struct adapter *adapter = arg;
2192 device_t dev = adapter->dev;
2193 struct ix_queue *que = adapter->queues;
2194 struct tx_ring *txr = adapter->tx_rings;
2195 int hung = 0, paused = 0;
2196
2197 KASSERT(mutex_owned(&adapter->core_mtx));
2198
2199 /* Check for pluggable optics */
2200 if (adapter->sfp_probe)
2201 if (!ixgbe_sfp_probe(adapter))
2202 goto out; /* Nothing to do */
2203
2204 ixgbe_update_link_status(adapter);
2205 ixgbe_update_stats_counters(adapter);
2206
2207 /*
2208 * If the interface has been paused
2209 * then don't do the watchdog check
2210 */
2211 if (IXGBE_READ_REG(&adapter->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF)
2212 paused = 1;
2213
2214 /*
2215 ** Check the TX queues status
2216 ** - watchdog only if all queues show hung
2217 */
2218 for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2219 if ((txr->queue_status == IXGBE_QUEUE_HUNG) &&
2220 (paused == 0))
2221 ++hung;
2222 else if (txr->queue_status == IXGBE_QUEUE_WORKING)
2223 softint_schedule(que->que_si);
2224 }
2225 /* Only truely watchdog if all queues show hung */
2226 if (hung == adapter->num_queues)
2227 goto watchdog;
2228
2229 out:
2230 ixgbe_rearm_queues(adapter, adapter->que_mask);
2231 callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
2232 return;
2233
2234 watchdog:
2235 device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2236 device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2237 IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(txr->me)),
2238 IXGBE_READ_REG(&adapter->hw, IXGBE_TDT(txr->me)));
2239 device_printf(dev,"TX(%d) desc avail = %d,"
2240 "Next TX to Clean = %d\n",
2241 txr->me, txr->tx_avail, txr->next_to_clean);
2242 adapter->ifp->if_flags &= ~IFF_RUNNING;
2243 adapter->watchdog_events.ev_count++;
2244 ixgbe_init_locked(adapter);
2245 }
2246
2247 static void
2248 ixgbe_local_timer(void *arg)
2249 {
2250 struct adapter *adapter = arg;
2251
2252 IXGBE_CORE_LOCK(adapter);
2253 ixgbe_local_timer1(adapter);
2254 IXGBE_CORE_UNLOCK(adapter);
2255 }
2256
2257 /*
2258 ** Note: this routine updates the OS on the link state
2259 ** the real check of the hardware only happens with
2260 ** a link interrupt.
2261 */
2262 static void
2263 ixgbe_update_link_status(struct adapter *adapter)
2264 {
2265 struct ifnet *ifp = adapter->ifp;
2266 device_t dev = adapter->dev;
2267
2268
2269 if (adapter->link_up){
2270 if (adapter->link_active == FALSE) {
2271 if (bootverbose)
2272 device_printf(dev,"Link is up %d Gbps %s \n",
2273 ((adapter->link_speed == 128)? 10:1),
2274 "Full Duplex");
2275 adapter->link_active = TRUE;
2276 /* Update any Flow Control changes */
2277 ixgbe_fc_enable(&adapter->hw);
2278 if_link_state_change(ifp, LINK_STATE_UP);
2279 }
2280 } else { /* Link down */
2281 if (adapter->link_active == TRUE) {
2282 if (bootverbose)
2283 device_printf(dev,"Link is Down\n");
2284 if_link_state_change(ifp, LINK_STATE_DOWN);
2285 adapter->link_active = FALSE;
2286 }
2287 }
2288
2289 return;
2290 }
2291
2292
2293 static void
2294 ixgbe_ifstop(struct ifnet *ifp, int disable)
2295 {
2296 struct adapter *adapter = ifp->if_softc;
2297
2298 IXGBE_CORE_LOCK(adapter);
2299 ixgbe_stop(adapter);
2300 IXGBE_CORE_UNLOCK(adapter);
2301 }
2302
2303 /*********************************************************************
2304 *
2305 * This routine disables all traffic on the adapter by issuing a
2306 * global reset on the MAC and deallocates TX/RX buffers.
2307 *
2308 **********************************************************************/
2309
2310 static void
2311 ixgbe_stop(void *arg)
2312 {
2313 struct ifnet *ifp;
2314 struct adapter *adapter = arg;
2315 struct ixgbe_hw *hw = &adapter->hw;
2316 ifp = adapter->ifp;
2317
2318 KASSERT(mutex_owned(&adapter->core_mtx));
2319
2320 INIT_DEBUGOUT("ixgbe_stop: begin\n");
2321 ixgbe_disable_intr(adapter);
2322 callout_stop(&adapter->timer);
2323
2324 /* Let the stack know...*/
2325 ifp->if_flags &= ~IFF_RUNNING;
2326
2327 ixgbe_reset_hw(hw);
2328 hw->adapter_stopped = FALSE;
2329 ixgbe_stop_adapter(hw);
2330 /* Turn off the laser */
2331 if (hw->phy.multispeed_fiber)
2332 ixgbe_disable_tx_laser(hw);
2333
2334 /* reprogram the RAR[0] in case user changed it. */
2335 ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
2336
2337 return;
2338 }
2339
2340
2341 /*********************************************************************
2342 *
2343 * Determine hardware revision.
2344 *
2345 **********************************************************************/
2346 static void
2347 ixgbe_identify_hardware(struct adapter *adapter)
2348 {
2349 pcitag_t tag;
2350 pci_chipset_tag_t pc;
2351 pcireg_t subid, id;
2352 struct ixgbe_hw *hw = &adapter->hw;
2353
2354 pc = adapter->osdep.pc;
2355 tag = adapter->osdep.tag;
2356
2357 id = pci_conf_read(pc, tag, PCI_ID_REG);
2358 subid = pci_conf_read(pc, tag, PCI_SUBSYS_ID_REG);
2359
2360 /* Save off the information about this board */
2361 hw->vendor_id = PCI_VENDOR(id);
2362 hw->device_id = PCI_PRODUCT(id);
2363 hw->revision_id =
2364 PCI_REVISION(pci_conf_read(pc, tag, PCI_CLASS_REG));
2365 hw->subsystem_vendor_id = PCI_SUBSYS_VENDOR(subid);
2366 hw->subsystem_device_id = PCI_SUBSYS_ID(subid);
2367
2368 /* We need this here to set the num_segs below */
2369 ixgbe_set_mac_type(hw);
2370
2371 /* Pick up the 82599 and VF settings */
2372 if (hw->mac.type != ixgbe_mac_82598EB) {
2373 hw->phy.smart_speed = ixgbe_smart_speed;
2374 adapter->num_segs = IXGBE_82599_SCATTER;
2375 } else
2376 adapter->num_segs = IXGBE_82598_SCATTER;
2377
2378 return;
2379 }
2380
2381 /*********************************************************************
2382 *
2383 * Determine optic type
2384 *
2385 **********************************************************************/
2386 static void
2387 ixgbe_setup_optics(struct adapter *adapter)
2388 {
2389 struct ixgbe_hw *hw = &adapter->hw;
2390 int layer;
2391
2392 layer = ixgbe_get_supported_physical_layer(hw);
2393
2394 if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
2395 adapter->optics = IFM_10G_T;
2396 return;
2397 }
2398
2399 if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
2400 adapter->optics = IFM_1000_T;
2401 return;
2402 }
2403
2404 if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) {
2405 adapter->optics = IFM_1000_SX;
2406 return;
2407 }
2408
2409 if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR |
2410 IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) {
2411 adapter->optics = IFM_10G_LR;
2412 return;
2413 }
2414
2415 if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
2416 adapter->optics = IFM_10G_SR;
2417 return;
2418 }
2419
2420 if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) {
2421 adapter->optics = IFM_10G_TWINAX;
2422 return;
2423 }
2424
2425 if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 |
2426 IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) {
2427 adapter->optics = IFM_10G_CX4;
2428 return;
2429 }
2430
2431 /* If we get here just set the default */
2432 adapter->optics = IFM_ETHER | IFM_AUTO;
2433 return;
2434 }
2435
2436 /*********************************************************************
2437 *
2438 * Setup the Legacy or MSI Interrupt handler
2439 *
2440 **********************************************************************/
2441 static int
2442 ixgbe_allocate_legacy(struct adapter *adapter, const struct pci_attach_args *pa)
2443 {
2444 device_t dev = adapter->dev;
2445 struct ix_queue *que = adapter->queues;
2446 #ifndef IXGBE_LEGACY_TX
2447 struct tx_ring *txr = adapter->tx_rings;
2448 #endif
2449 char intrbuf[PCI_INTRSTR_LEN];
2450 #if 0
2451 int rid = 0;
2452
2453 /* MSI RID at 1 */
2454 if (adapter->msix == 1)
2455 rid = 1;
2456 #endif
2457
2458 /* We allocate a single interrupt resource */
2459 if (pci_intr_map(pa, &adapter->osdep.ih) != 0) {
2460 aprint_error_dev(dev, "unable to map interrupt\n");
2461 return ENXIO;
2462 } else {
2463 aprint_normal_dev(dev, "interrupting at %s\n",
2464 pci_intr_string(adapter->osdep.pc, adapter->osdep.ih,
2465 intrbuf, sizeof(intrbuf)));
2466 }
2467
2468 /*
2469 * Try allocating a fast interrupt and the associated deferred
2470 * processing contexts.
2471 */
2472 #ifndef IXGBE_LEGACY_TX
2473 txr->txq_si = softint_establish(SOFTINT_NET, ixgbe_deferred_mq_start,
2474 txr);
2475 #endif
2476 que->que_si = softint_establish(SOFTINT_NET, ixgbe_handle_que, que);
2477
2478 /* Tasklets for Link, SFP and Multispeed Fiber */
2479 adapter->link_si =
2480 softint_establish(SOFTINT_NET, ixgbe_handle_link, adapter);
2481 adapter->mod_si =
2482 softint_establish(SOFTINT_NET, ixgbe_handle_mod, adapter);
2483 adapter->msf_si =
2484 softint_establish(SOFTINT_NET, ixgbe_handle_msf, adapter);
2485
2486 #ifdef IXGBE_FDIR
2487 adapter->fdir_si =
2488 softint_establish(SOFTINT_NET, ixgbe_reinit_fdir, adapter);
2489 #endif
2490 if (que->que_si == NULL ||
2491 adapter->link_si == NULL ||
2492 adapter->mod_si == NULL ||
2493 #ifdef IXGBE_FDIR
2494 adapter->fdir_si == NULL ||
2495 #endif
2496 adapter->msf_si == NULL) {
2497 aprint_error_dev(dev,
2498 "could not establish software interrupts\n");
2499 return ENXIO;
2500 }
2501
2502 adapter->osdep.intr = pci_intr_establish(adapter->osdep.pc,
2503 adapter->osdep.ih, IPL_NET, ixgbe_legacy_irq, que);
2504 if (adapter->osdep.intr == NULL) {
2505 aprint_error_dev(dev, "failed to register interrupt handler\n");
2506 softint_disestablish(que->que_si);
2507 softint_disestablish(adapter->link_si);
2508 softint_disestablish(adapter->mod_si);
2509 softint_disestablish(adapter->msf_si);
2510 #ifdef IXGBE_FDIR
2511 softint_disestablish(adapter->fdir_si);
2512 #endif
2513 return ENXIO;
2514 }
2515 /* For simplicity in the handlers */
2516 adapter->que_mask = IXGBE_EIMS_ENABLE_MASK;
2517
2518 return (0);
2519 }
2520
2521
2522 /*********************************************************************
2523 *
2524 * Setup MSIX Interrupt resources and handlers
2525 *
2526 **********************************************************************/
2527 static int
2528 ixgbe_allocate_msix(struct adapter *adapter, const struct pci_attach_args *pa)
2529 {
2530 #if !defined(NETBSD_MSI_OR_MSIX)
2531 return 0;
2532 #else
2533 device_t dev = adapter->dev;
2534 struct ix_queue *que = adapter->queues;
2535 struct tx_ring *txr = adapter->tx_rings;
2536 int error, rid, vector = 0;
2537
2538 for (int i = 0; i < adapter->num_queues; i++, vector++, que++, txr++) {
2539 rid = vector + 1;
2540 que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
2541 RF_SHAREABLE | RF_ACTIVE);
2542 if (que->res == NULL) {
2543 aprint_error_dev(dev,"Unable to allocate"
2544 " bus resource: que interrupt [%d]\n", vector);
2545 return (ENXIO);
2546 }
2547 /* Set the handler function */
2548 error = bus_setup_intr(dev, que->res,
2549 INTR_TYPE_NET | INTR_MPSAFE, NULL,
2550 ixgbe_msix_que, que, &que->tag);
2551 if (error) {
2552 que->res = NULL;
2553 aprint_error_dev(dev,
2554 "Failed to register QUE handler\n");
2555 return error;
2556 }
2557 #if __FreeBSD_version >= 800504
2558 bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2559 #endif
2560 que->msix = vector;
2561 adapter->que_mask |= (u64)(1 << que->msix);
2562 /*
2563 ** Bind the msix vector, and thus the
2564 ** ring to the corresponding cpu.
2565 */
2566 if (adapter->num_queues > 1)
2567 bus_bind_intr(dev, que->res, i);
2568
2569 #ifndef IXGBE_LEGACY_TX
2570 txr->txq_si = softint_establish(SOFTINT_NET,
2571 ixgbe_deferred_mq_start, txr);
2572 #endif
2573 que->que_si = softint_establish(SOFTINT_NET, ixgbe_handle_que,
2574 que);
2575 if (que->que_si == NULL) {
2576 aprint_error_dev(dev,
2577 "could not establish software interrupt\n");
2578 }
2579 }
2580
2581 /* and Link */
2582 rid = vector + 1;
2583 adapter->res = bus_alloc_resource_any(dev,
2584 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2585 if (!adapter->res) {
2586 aprint_error_dev(dev,"Unable to allocate bus resource: "
2587 "Link interrupt [%d]\n", rid);
2588 return (ENXIO);
2589 }
2590 /* Set the link handler function */
2591 error = bus_setup_intr(dev, adapter->res,
2592 INTR_TYPE_NET | INTR_MPSAFE, NULL,
2593 ixgbe_msix_link, adapter, &adapter->tag);
2594 if (error) {
2595 adapter->res = NULL;
2596 aprint_error_dev(dev, "Failed to register LINK handler\n");
2597 return (error);
2598 }
2599 #if __FreeBSD_version >= 800504
2600 bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2601 #endif
2602 adapter->linkvec = vector;
2603 /* Tasklets for Link, SFP and Multispeed Fiber */
2604 adapter->link_si =
2605 softint_establish(SOFTINT_NET, ixgbe_handle_link, adapter);
2606 adapter->mod_si =
2607 softint_establish(SOFTINT_NET, ixgbe_handle_mod, adapter);
2608 adapter->msf_si =
2609 softint_establish(SOFTINT_NET, ixgbe_handle_msf, adapter);
2610 #ifdef IXGBE_FDIR
2611 adapter->fdir_si =
2612 softint_establish(SOFTINT_NET, ixgbe_reinit_fdir, adapter);
2613 #endif
2614
2615 return (0);
2616 #endif
2617 }
2618
2619 /*
2620 * Setup Either MSI/X or MSI
2621 */
2622 static int
2623 ixgbe_setup_msix(struct adapter *adapter)
2624 {
2625 #if !defined(NETBSD_MSI_OR_MSIX)
2626 return 0;
2627 #else
2628 device_t dev = adapter->dev;
2629 int rid, want, queues, msgs;
2630
2631 /* Override by tuneable */
2632 if (ixgbe_enable_msix == 0)
2633 goto msi;
2634
2635 /* First try MSI/X */
2636 rid = PCI_BAR(MSIX_82598_BAR);
2637 adapter->msix_mem = bus_alloc_resource_any(dev,
2638 SYS_RES_MEMORY, &rid, RF_ACTIVE);
2639 if (!adapter->msix_mem) {
2640 rid += 4; /* 82599 maps in higher BAR */
2641 adapter->msix_mem = bus_alloc_resource_any(dev,
2642 SYS_RES_MEMORY, &rid, RF_ACTIVE);
2643 }
2644 if (!adapter->msix_mem) {
2645 /* May not be enabled */
2646 device_printf(adapter->dev,
2647 "Unable to map MSIX table \n");
2648 goto msi;
2649 }
2650
2651 msgs = pci_msix_count(dev);
2652 if (msgs == 0) { /* system has msix disabled */
2653 bus_release_resource(dev, SYS_RES_MEMORY,
2654 rid, adapter->msix_mem);
2655 adapter->msix_mem = NULL;
2656 goto msi;
2657 }
2658
2659 /* Figure out a reasonable auto config value */
2660 queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2661
2662 if (ixgbe_num_queues != 0)
2663 queues = ixgbe_num_queues;
2664 /* Set max queues to 8 when autoconfiguring */
2665 else if ((ixgbe_num_queues == 0) && (queues > 8))
2666 queues = 8;
2667
2668 /*
2669 ** Want one vector (RX/TX pair) per queue
2670 ** plus an additional for Link.
2671 */
2672 want = queues + 1;
2673 if (msgs >= want)
2674 msgs = want;
2675 else {
2676 device_printf(adapter->dev,
2677 "MSIX Configuration Problem, "
2678 "%d vectors but %d queues wanted!\n",
2679 msgs, want);
2680 return (0); /* Will go to Legacy setup */
2681 }
2682 if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2683 device_printf(adapter->dev,
2684 "Using MSIX interrupts with %d vectors\n", msgs);
2685 adapter->num_queues = queues;
2686 return (msgs);
2687 }
2688 msi:
2689 msgs = pci_msi_count(dev);
2690 if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2691 device_printf(adapter->dev,"Using an MSI interrupt\n");
2692 else
2693 device_printf(adapter->dev,"Using a Legacy interrupt\n");
2694 return (msgs);
2695 #endif
2696 }
2697
2698
2699 static int
2700 ixgbe_allocate_pci_resources(struct adapter *adapter, const struct pci_attach_args *pa)
2701 {
2702 pcireg_t memtype;
2703 device_t dev = adapter->dev;
2704 bus_addr_t addr;
2705 int flags;
2706
2707 memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, PCI_BAR(0));
2708 switch (memtype) {
2709 case PCI_MAPREG_TYPE_MEM | PCI_MAPREG_MEM_TYPE_32BIT:
2710 case PCI_MAPREG_TYPE_MEM | PCI_MAPREG_MEM_TYPE_64BIT:
2711 adapter->osdep.mem_bus_space_tag = pa->pa_memt;
2712 if (pci_mapreg_info(pa->pa_pc, pa->pa_tag, PCI_BAR(0),
2713 memtype, &addr, &adapter->osdep.mem_size, &flags) != 0)
2714 goto map_err;
2715 if ((flags & BUS_SPACE_MAP_PREFETCHABLE) != 0) {
2716 aprint_normal_dev(dev, "clearing prefetchable bit\n");
2717 flags &= ~BUS_SPACE_MAP_PREFETCHABLE;
2718 }
2719 if (bus_space_map(adapter->osdep.mem_bus_space_tag, addr,
2720 adapter->osdep.mem_size, flags,
2721 &adapter->osdep.mem_bus_space_handle) != 0) {
2722 map_err:
2723 adapter->osdep.mem_size = 0;
2724 aprint_error_dev(dev, "unable to map BAR0\n");
2725 return ENXIO;
2726 }
2727 break;
2728 default:
2729 aprint_error_dev(dev, "unexpected type on BAR0\n");
2730 return ENXIO;
2731 }
2732
2733 /* Legacy defaults */
2734 adapter->num_queues = 1;
2735 adapter->hw.back = &adapter->osdep;
2736
2737 /*
2738 ** Now setup MSI or MSI/X, should
2739 ** return us the number of supported
2740 ** vectors. (Will be 1 for MSI)
2741 */
2742 adapter->msix = ixgbe_setup_msix(adapter);
2743 return (0);
2744 }
2745
2746 static void
2747 ixgbe_free_pci_resources(struct adapter * adapter)
2748 {
2749 #if defined(NETBSD_MSI_OR_MSIX)
2750 struct ix_queue *que = adapter->queues;
2751 device_t dev = adapter->dev;
2752 #endif
2753 int rid;
2754
2755 #if defined(NETBSD_MSI_OR_MSIX)
2756 int memrid;
2757 if (adapter->hw.mac.type == ixgbe_mac_82598EB)
2758 memrid = PCI_BAR(MSIX_82598_BAR);
2759 else
2760 memrid = PCI_BAR(MSIX_82599_BAR);
2761
2762 /*
2763 ** There is a slight possibility of a failure mode
2764 ** in attach that will result in entering this function
2765 ** before interrupt resources have been initialized, and
2766 ** in that case we do not want to execute the loops below
2767 ** We can detect this reliably by the state of the adapter
2768 ** res pointer.
2769 */
2770 if (adapter->res == NULL)
2771 goto mem;
2772
2773 /*
2774 ** Release all msix queue resources:
2775 */
2776 for (int i = 0; i < adapter->num_queues; i++, que++) {
2777 rid = que->msix + 1;
2778 if (que->tag != NULL) {
2779 bus_teardown_intr(dev, que->res, que->tag);
2780 que->tag = NULL;
2781 }
2782 if (que->res != NULL)
2783 bus_release_resource(dev, SYS_RES_IRQ, rid, que->res);
2784 }
2785 #endif
2786
2787 /* Clean the Legacy or Link interrupt last */
2788 if (adapter->linkvec) /* we are doing MSIX */
2789 rid = adapter->linkvec + 1;
2790 else
2791 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2792
2793 pci_intr_disestablish(adapter->osdep.pc, adapter->osdep.intr);
2794 adapter->osdep.intr = NULL;
2795
2796 #if defined(NETBSD_MSI_OR_MSIX)
2797 mem:
2798 if (adapter->msix)
2799 pci_release_msi(dev);
2800
2801 if (adapter->msix_mem != NULL)
2802 bus_release_resource(dev, SYS_RES_MEMORY,
2803 memrid, adapter->msix_mem);
2804 #endif
2805
2806 if (adapter->osdep.mem_size != 0) {
2807 bus_space_unmap(adapter->osdep.mem_bus_space_tag,
2808 adapter->osdep.mem_bus_space_handle,
2809 adapter->osdep.mem_size);
2810 }
2811
2812 return;
2813 }
2814
2815 /*********************************************************************
2816 *
2817 * Setup networking device structure and register an interface.
2818 *
2819 **********************************************************************/
2820 static int
2821 ixgbe_setup_interface(device_t dev, struct adapter *adapter)
2822 {
2823 struct ethercom *ec = &adapter->osdep.ec;
2824 struct ixgbe_hw *hw = &adapter->hw;
2825 struct ifnet *ifp;
2826
2827 INIT_DEBUGOUT("ixgbe_setup_interface: begin");
2828
2829 ifp = adapter->ifp = &ec->ec_if;
2830 strlcpy(ifp->if_xname, device_xname(dev), IFNAMSIZ);
2831 ifp->if_baudrate = IF_Gbps(10);
2832 ifp->if_init = ixgbe_init;
2833 ifp->if_stop = ixgbe_ifstop;
2834 ifp->if_softc = adapter;
2835 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2836 ifp->if_ioctl = ixgbe_ioctl;
2837 #ifndef IXGBE_LEGACY_TX
2838 ifp->if_transmit = ixgbe_mq_start;
2839 ifp->if_qflush = ixgbe_qflush;
2840 #else
2841 ifp->if_start = ixgbe_start;
2842 IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 2);
2843 #endif
2844
2845 if_attach(ifp);
2846 ether_ifattach(ifp, adapter->hw.mac.addr);
2847 ether_set_ifflags_cb(ec, ixgbe_ifflags_cb);
2848
2849 adapter->max_frame_size =
2850 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
2851
2852 /*
2853 * Tell the upper layer(s) we support long frames.
2854 */
2855 ifp->if_hdrlen = sizeof(struct ether_vlan_header);
2856
2857 ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSOv4 | IFCAP_TSOv6;
2858 ifp->if_capenable = 0;
2859
2860 ec->ec_capabilities |= ETHERCAP_VLAN_HWCSUM;
2861 ec->ec_capabilities |= ETHERCAP_JUMBO_MTU;
2862 ifp->if_capabilities |= IFCAP_LRO;
2863 ec->ec_capabilities |= ETHERCAP_VLAN_HWTAGGING
2864 | ETHERCAP_VLAN_MTU;
2865 ec->ec_capenable = ec->ec_capabilities;
2866
2867 /*
2868 ** Don't turn this on by default, if vlans are
2869 ** created on another pseudo device (eg. lagg)
2870 ** then vlan events are not passed thru, breaking
2871 ** operation, but with HW FILTER off it works. If
2872 ** using vlans directly on the ixgbe driver you can
2873 ** enable this and get full hardware tag filtering.
2874 */
2875 ec->ec_capabilities |= ETHERCAP_VLAN_HWFILTER;
2876
2877 /*
2878 * Specify the media types supported by this adapter and register
2879 * callbacks to update media and link information
2880 */
2881 ifmedia_init(&adapter->media, IFM_IMASK, ixgbe_media_change,
2882 ixgbe_media_status);
2883 ifmedia_add(&adapter->media, IFM_ETHER | adapter->optics, 0, NULL);
2884 ifmedia_set(&adapter->media, IFM_ETHER | adapter->optics);
2885 if (hw->device_id == IXGBE_DEV_ID_82598AT) {
2886 ifmedia_add(&adapter->media,
2887 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2888 ifmedia_add(&adapter->media,
2889 IFM_ETHER | IFM_1000_T, 0, NULL);
2890 }
2891 ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2892 ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2893
2894 return (0);
2895 }
2896
2897 static void
2898 ixgbe_config_link(struct adapter *adapter)
2899 {
2900 struct ixgbe_hw *hw = &adapter->hw;
2901 u32 autoneg, err = 0;
2902 bool sfp, negotiate;
2903
2904 sfp = ixgbe_is_sfp(hw);
2905
2906 if (sfp) {
2907 void *ip;
2908
2909 if (hw->phy.multispeed_fiber) {
2910 hw->mac.ops.setup_sfp(hw);
2911 ixgbe_enable_tx_laser(hw);
2912 ip = adapter->msf_si;
2913 } else {
2914 ip = adapter->mod_si;
2915 }
2916
2917 kpreempt_disable();
2918 softint_schedule(ip);
2919 kpreempt_enable();
2920 } else {
2921 if (hw->mac.ops.check_link)
2922 err = ixgbe_check_link(hw, &adapter->link_speed,
2923 &adapter->link_up, FALSE);
2924 if (err)
2925 goto out;
2926 autoneg = hw->phy.autoneg_advertised;
2927 if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
2928 err = hw->mac.ops.get_link_capabilities(hw,
2929 &autoneg, &negotiate);
2930 else
2931 negotiate = 0;
2932 if (err)
2933 goto out;
2934 if (hw->mac.ops.setup_link)
2935 err = hw->mac.ops.setup_link(hw,
2936 autoneg, adapter->link_up);
2937 }
2938 out:
2939 return;
2940 }
2941
2942 /********************************************************************
2943 * Manage DMA'able memory.
2944 *******************************************************************/
2945
2946 static int
2947 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
2948 struct ixgbe_dma_alloc *dma, const int mapflags)
2949 {
2950 device_t dev = adapter->dev;
2951 int r, rsegs;
2952
2953 r = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */
2954 DBA_ALIGN, 0, /* alignment, bounds */
2955 size, /* maxsize */
2956 1, /* nsegments */
2957 size, /* maxsegsize */
2958 BUS_DMA_ALLOCNOW, /* flags */
2959 &dma->dma_tag);
2960 if (r != 0) {
2961 aprint_error_dev(dev,
2962 "%s: ixgbe_dma_tag_create failed; error %d\n", __func__, r);
2963 goto fail_0;
2964 }
2965
2966 r = bus_dmamem_alloc(dma->dma_tag->dt_dmat,
2967 size,
2968 dma->dma_tag->dt_alignment,
2969 dma->dma_tag->dt_boundary,
2970 &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
2971 if (r != 0) {
2972 aprint_error_dev(dev,
2973 "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
2974 goto fail_1;
2975 }
2976
2977 r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
2978 size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
2979 if (r != 0) {
2980 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2981 __func__, r);
2982 goto fail_2;
2983 }
2984
2985 r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
2986 if (r != 0) {
2987 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2988 __func__, r);
2989 goto fail_3;
2990 }
2991
2992 r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map, dma->dma_vaddr,
2993 size,
2994 NULL,
2995 mapflags | BUS_DMA_NOWAIT);
2996 if (r != 0) {
2997 aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
2998 __func__, r);
2999 goto fail_4;
3000 }
3001 dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
3002 dma->dma_size = size;
3003 return 0;
3004 fail_4:
3005 ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
3006 fail_3:
3007 bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
3008 fail_2:
3009 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
3010 fail_1:
3011 ixgbe_dma_tag_destroy(dma->dma_tag);
3012 fail_0:
3013 return r;
3014 }
3015
3016 static void
3017 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
3018 {
3019 bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
3020 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3021 ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
3022 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
3023 ixgbe_dma_tag_destroy(dma->dma_tag);
3024 }
3025
3026
3027 /*********************************************************************
3028 *
3029 * Allocate memory for the transmit and receive rings, and then
3030 * the descriptors associated with each, called only once at attach.
3031 *
3032 **********************************************************************/
3033 static int
3034 ixgbe_allocate_queues(struct adapter *adapter)
3035 {
3036 device_t dev = adapter->dev;
3037 struct ix_queue *que;
3038 struct tx_ring *txr;
3039 struct rx_ring *rxr;
3040 int rsize, tsize, error = IXGBE_SUCCESS;
3041 int txconf = 0, rxconf = 0;
3042
3043 /* First allocate the top level queue structs */
3044 if (!(adapter->queues =
3045 (struct ix_queue *) malloc(sizeof(struct ix_queue) *
3046 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3047 aprint_error_dev(dev, "Unable to allocate queue memory\n");
3048 error = ENOMEM;
3049 goto fail;
3050 }
3051
3052 /* First allocate the TX ring struct memory */
3053 if (!(adapter->tx_rings =
3054 (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3055 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3056 aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
3057 error = ENOMEM;
3058 goto tx_fail;
3059 }
3060
3061 /* Next allocate the RX */
3062 if (!(adapter->rx_rings =
3063 (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3064 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3065 aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
3066 error = ENOMEM;
3067 goto rx_fail;
3068 }
3069
3070 /* For the ring itself */
3071 tsize = roundup2(adapter->num_tx_desc *
3072 sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
3073
3074 /*
3075 * Now set up the TX queues, txconf is needed to handle the
3076 * possibility that things fail midcourse and we need to
3077 * undo memory gracefully
3078 */
3079 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3080 /* Set up some basics */
3081 txr = &adapter->tx_rings[i];
3082 txr->adapter = adapter;
3083 txr->me = i;
3084 txr->num_desc = adapter->num_tx_desc;
3085
3086 /* Initialize the TX side lock */
3087 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3088 device_xname(dev), txr->me);
3089 mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
3090
3091 if (ixgbe_dma_malloc(adapter, tsize,
3092 &txr->txdma, BUS_DMA_NOWAIT)) {
3093 aprint_error_dev(dev,
3094 "Unable to allocate TX Descriptor memory\n");
3095 error = ENOMEM;
3096 goto err_tx_desc;
3097 }
3098 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
3099 bzero((void *)txr->tx_base, tsize);
3100
3101 /* Now allocate transmit buffers for the ring */
3102 if (ixgbe_allocate_transmit_buffers(txr)) {
3103 aprint_error_dev(dev,
3104 "Critical Failure setting up transmit buffers\n");
3105 error = ENOMEM;
3106 goto err_tx_desc;
3107 }
3108 #ifndef IXGBE_LEGACY_TX
3109 /* Allocate a buf ring */
3110 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
3111 M_WAITOK, &txr->tx_mtx);
3112 if (txr->br == NULL) {
3113 aprint_error_dev(dev,
3114 "Critical Failure setting up buf ring\n");
3115 error = ENOMEM;
3116 goto err_tx_desc;
3117 }
3118 #endif
3119 }
3120
3121 /*
3122 * Next the RX queues...
3123 */
3124 rsize = roundup2(adapter->num_rx_desc *
3125 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
3126 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3127 rxr = &adapter->rx_rings[i];
3128 /* Set up some basics */
3129 rxr->adapter = adapter;
3130 rxr->me = i;
3131 rxr->num_desc = adapter->num_rx_desc;
3132
3133 /* Initialize the RX side lock */
3134 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3135 device_xname(dev), rxr->me);
3136 mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
3137
3138 if (ixgbe_dma_malloc(adapter, rsize,
3139 &rxr->rxdma, BUS_DMA_NOWAIT)) {
3140 aprint_error_dev(dev,
3141 "Unable to allocate RxDescriptor memory\n");
3142 error = ENOMEM;
3143 goto err_rx_desc;
3144 }
3145 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3146 bzero((void *)rxr->rx_base, rsize);
3147
3148 /* Allocate receive buffers for the ring*/
3149 if (ixgbe_allocate_receive_buffers(rxr)) {
3150 aprint_error_dev(dev,
3151 "Critical Failure setting up receive buffers\n");
3152 error = ENOMEM;
3153 goto err_rx_desc;
3154 }
3155 }
3156
3157 /*
3158 ** Finally set up the queue holding structs
3159 */
3160 for (int i = 0; i < adapter->num_queues; i++) {
3161 que = &adapter->queues[i];
3162 que->adapter = adapter;
3163 que->txr = &adapter->tx_rings[i];
3164 que->rxr = &adapter->rx_rings[i];
3165 }
3166
3167 return (0);
3168
3169 err_rx_desc:
3170 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3171 ixgbe_dma_free(adapter, &rxr->rxdma);
3172 err_tx_desc:
3173 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3174 ixgbe_dma_free(adapter, &txr->txdma);
3175 free(adapter->rx_rings, M_DEVBUF);
3176 rx_fail:
3177 free(adapter->tx_rings, M_DEVBUF);
3178 tx_fail:
3179 free(adapter->queues, M_DEVBUF);
3180 fail:
3181 return (error);
3182 }
3183
3184 /*********************************************************************
3185 *
3186 * Allocate memory for tx_buffer structures. The tx_buffer stores all
3187 * the information needed to transmit a packet on the wire. This is
3188 * called only once at attach, setup is done every reset.
3189 *
3190 **********************************************************************/
3191 static int
3192 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
3193 {
3194 struct adapter *adapter = txr->adapter;
3195 device_t dev = adapter->dev;
3196 struct ixgbe_tx_buf *txbuf;
3197 int error, i;
3198
3199 /*
3200 * Setup DMA descriptor areas.
3201 */
3202 if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */
3203 1, 0, /* alignment, bounds */
3204 IXGBE_TSO_SIZE, /* maxsize */
3205 adapter->num_segs, /* nsegments */
3206 PAGE_SIZE, /* maxsegsize */
3207 0, /* flags */
3208 &txr->txtag))) {
3209 aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
3210 goto fail;
3211 }
3212
3213 if (!(txr->tx_buffers =
3214 (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
3215 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3216 aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
3217 error = ENOMEM;
3218 goto fail;
3219 }
3220
3221 /* Create the descriptor buffer dma maps */
3222 txbuf = txr->tx_buffers;
3223 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3224 error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
3225 if (error != 0) {
3226 aprint_error_dev(dev,
3227 "Unable to create TX DMA map (%d)\n", error);
3228 goto fail;
3229 }
3230 }
3231
3232 return 0;
3233 fail:
3234 /* We free all, it handles case where we are in the middle */
3235 ixgbe_free_transmit_structures(adapter);
3236 return (error);
3237 }
3238
3239 /*********************************************************************
3240 *
3241 * Initialize a transmit ring.
3242 *
3243 **********************************************************************/
3244 static void
3245 ixgbe_setup_transmit_ring(struct tx_ring *txr)
3246 {
3247 struct adapter *adapter = txr->adapter;
3248 struct ixgbe_tx_buf *txbuf;
3249 int i;
3250 #ifdef DEV_NETMAP
3251 struct netmap_adapter *na = NA(adapter->ifp);
3252 struct netmap_slot *slot;
3253 #endif /* DEV_NETMAP */
3254
3255 /* Clear the old ring contents */
3256 IXGBE_TX_LOCK(txr);
3257 #ifdef DEV_NETMAP
3258 /*
3259 * (under lock): if in netmap mode, do some consistency
3260 * checks and set slot to entry 0 of the netmap ring.
3261 */
3262 slot = netmap_reset(na, NR_TX, txr->me, 0);
3263 #endif /* DEV_NETMAP */
3264 bzero((void *)txr->tx_base,
3265 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
3266 /* Reset indices */
3267 txr->next_avail_desc = 0;
3268 txr->next_to_clean = 0;
3269
3270 /* Free any existing tx buffers. */
3271 txbuf = txr->tx_buffers;
3272 for (i = 0; i < txr->num_desc; i++, txbuf++) {
3273 if (txbuf->m_head != NULL) {
3274 bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
3275 0, txbuf->m_head->m_pkthdr.len,
3276 BUS_DMASYNC_POSTWRITE);
3277 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
3278 m_freem(txbuf->m_head);
3279 txbuf->m_head = NULL;
3280 }
3281 #ifdef DEV_NETMAP
3282 /*
3283 * In netmap mode, set the map for the packet buffer.
3284 * NOTE: Some drivers (not this one) also need to set
3285 * the physical buffer address in the NIC ring.
3286 * Slots in the netmap ring (indexed by "si") are
3287 * kring->nkr_hwofs positions "ahead" wrt the
3288 * corresponding slot in the NIC ring. In some drivers
3289 * (not here) nkr_hwofs can be negative. Function
3290 * netmap_idx_n2k() handles wraparounds properly.
3291 */
3292 if (slot) {
3293 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3294 netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
3295 }
3296 #endif /* DEV_NETMAP */
3297 /* Clear the EOP descriptor pointer */
3298 txbuf->eop = NULL;
3299 }
3300
3301 #ifdef IXGBE_FDIR
3302 /* Set the rate at which we sample packets */
3303 if (adapter->hw.mac.type != ixgbe_mac_82598EB)
3304 txr->atr_sample = atr_sample_rate;
3305 #endif
3306
3307 /* Set number of descriptors available */
3308 txr->tx_avail = adapter->num_tx_desc;
3309
3310 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3311 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3312 IXGBE_TX_UNLOCK(txr);
3313 }
3314
3315 /*********************************************************************
3316 *
3317 * Initialize all transmit rings.
3318 *
3319 **********************************************************************/
3320 static int
3321 ixgbe_setup_transmit_structures(struct adapter *adapter)
3322 {
3323 struct tx_ring *txr = adapter->tx_rings;
3324
3325 for (int i = 0; i < adapter->num_queues; i++, txr++)
3326 ixgbe_setup_transmit_ring(txr);
3327
3328 return (0);
3329 }
3330
3331 /*********************************************************************
3332 *
3333 * Enable transmit unit.
3334 *
3335 **********************************************************************/
3336 static void
3337 ixgbe_initialize_transmit_units(struct adapter *adapter)
3338 {
3339 struct tx_ring *txr = adapter->tx_rings;
3340 struct ixgbe_hw *hw = &adapter->hw;
3341
3342 /* Setup the Base and Length of the Tx Descriptor Ring */
3343
3344 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3345 u64 tdba = txr->txdma.dma_paddr;
3346 u32 txctrl;
3347
3348 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i),
3349 (tdba & 0x00000000ffffffffULL));
3350 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (tdba >> 32));
3351 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
3352 adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc));
3353
3354 /* Setup the HW Tx Head and Tail descriptor pointers */
3355 IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
3356 IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
3357
3358 /* Setup Transmit Descriptor Cmd Settings */
3359 txr->txd_cmd = IXGBE_TXD_CMD_IFCS;
3360 txr->queue_status = IXGBE_QUEUE_IDLE;
3361
3362 /* Set the processing limit */
3363 txr->process_limit = ixgbe_tx_process_limit;
3364
3365 /* Disable Head Writeback */
3366 switch (hw->mac.type) {
3367 case ixgbe_mac_82598EB:
3368 txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
3369 break;
3370 case ixgbe_mac_82599EB:
3371 case ixgbe_mac_X540:
3372 default:
3373 txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
3374 break;
3375 }
3376 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
3377 switch (hw->mac.type) {
3378 case ixgbe_mac_82598EB:
3379 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
3380 break;
3381 case ixgbe_mac_82599EB:
3382 case ixgbe_mac_X540:
3383 default:
3384 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
3385 break;
3386 }
3387
3388 }
3389
3390 if (hw->mac.type != ixgbe_mac_82598EB) {
3391 u32 dmatxctl, rttdcs;
3392 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
3393 dmatxctl |= IXGBE_DMATXCTL_TE;
3394 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
3395 /* Disable arbiter to set MTQC */
3396 rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3397 rttdcs |= IXGBE_RTTDCS_ARBDIS;
3398 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3399 IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
3400 rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
3401 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3402 }
3403
3404 return;
3405 }
3406
3407 /*********************************************************************
3408 *
3409 * Free all transmit rings.
3410 *
3411 **********************************************************************/
3412 static void
3413 ixgbe_free_transmit_structures(struct adapter *adapter)
3414 {
3415 struct tx_ring *txr = adapter->tx_rings;
3416
3417 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3418 ixgbe_free_transmit_buffers(txr);
3419 ixgbe_dma_free(adapter, &txr->txdma);
3420 IXGBE_TX_LOCK_DESTROY(txr);
3421 }
3422 free(adapter->tx_rings, M_DEVBUF);
3423 }
3424
3425 /*********************************************************************
3426 *
3427 * Free transmit ring related data structures.
3428 *
3429 **********************************************************************/
3430 static void
3431 ixgbe_free_transmit_buffers(struct tx_ring *txr)
3432 {
3433 struct adapter *adapter = txr->adapter;
3434 struct ixgbe_tx_buf *tx_buffer;
3435 int i;
3436
3437 INIT_DEBUGOUT("free_transmit_ring: begin");
3438
3439 if (txr->tx_buffers == NULL)
3440 return;
3441
3442 tx_buffer = txr->tx_buffers;
3443 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3444 if (tx_buffer->m_head != NULL) {
3445 bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
3446 0, tx_buffer->m_head->m_pkthdr.len,
3447 BUS_DMASYNC_POSTWRITE);
3448 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
3449 m_freem(tx_buffer->m_head);
3450 tx_buffer->m_head = NULL;
3451 if (tx_buffer->map != NULL) {
3452 ixgbe_dmamap_destroy(txr->txtag,
3453 tx_buffer->map);
3454 tx_buffer->map = NULL;
3455 }
3456 } else if (tx_buffer->map != NULL) {
3457 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
3458 ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
3459 tx_buffer->map = NULL;
3460 }
3461 }
3462 #ifndef IXGBE_LEGACY_TX
3463 if (txr->br != NULL)
3464 buf_ring_free(txr->br, M_DEVBUF);
3465 #endif
3466 if (txr->tx_buffers != NULL) {
3467 free(txr->tx_buffers, M_DEVBUF);
3468 txr->tx_buffers = NULL;
3469 }
3470 if (txr->txtag != NULL) {
3471 ixgbe_dma_tag_destroy(txr->txtag);
3472 txr->txtag = NULL;
3473 }
3474 return;
3475 }
3476
3477 /*********************************************************************
3478 *
3479 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
3480 *
3481 **********************************************************************/
3482
3483 static int
3484 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
3485 u32 *cmd_type_len, u32 *olinfo_status)
3486 {
3487 struct m_tag *mtag;
3488 struct adapter *adapter = txr->adapter;
3489 struct ethercom *ec = &adapter->osdep.ec;
3490 struct ixgbe_adv_tx_context_desc *TXD;
3491 struct ether_vlan_header *eh;
3492 struct ip ip;
3493 struct ip6_hdr ip6;
3494 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3495 int ehdrlen, ip_hlen = 0;
3496 u16 etype;
3497 u8 ipproto __diagused = 0;
3498 int offload = TRUE;
3499 int ctxd = txr->next_avail_desc;
3500 u16 vtag = 0;
3501
3502 /* First check if TSO is to be used */
3503 if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4|M_CSUM_TSOv6))
3504 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
3505
3506 if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
3507 offload = FALSE;
3508
3509 /* Indicate the whole packet as payload when not doing TSO */
3510 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
3511
3512 /* Now ready a context descriptor */
3513 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
3514
3515 /*
3516 ** In advanced descriptors the vlan tag must
3517 ** be placed into the context descriptor. Hence
3518 ** we need to make one even if not doing offloads.
3519 */
3520 if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
3521 vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
3522 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
3523 } else if (offload == FALSE) /* ... no offload to do */
3524 return 0;
3525
3526 /*
3527 * Determine where frame payload starts.
3528 * Jump over vlan headers if already present,
3529 * helpful for QinQ too.
3530 */
3531 KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
3532 eh = mtod(mp, struct ether_vlan_header *);
3533 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3534 KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
3535 etype = ntohs(eh->evl_proto);
3536 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3537 } else {
3538 etype = ntohs(eh->evl_encap_proto);
3539 ehdrlen = ETHER_HDR_LEN;
3540 }
3541
3542 /* Set the ether header length */
3543 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
3544
3545 switch (etype) {
3546 case ETHERTYPE_IP:
3547 m_copydata(mp, ehdrlen, sizeof(ip), &ip);
3548 ip_hlen = ip.ip_hl << 2;
3549 ipproto = ip.ip_p;
3550 #if 0
3551 ip.ip_sum = 0;
3552 m_copyback(mp, ehdrlen, sizeof(ip), &ip);
3553 #else
3554 KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
3555 ip.ip_sum == 0);
3556 #endif
3557 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
3558 break;
3559 case ETHERTYPE_IPV6:
3560 m_copydata(mp, ehdrlen, sizeof(ip6), &ip6);
3561 ip_hlen = sizeof(ip6);
3562 /* XXX-BZ this will go badly in case of ext hdrs. */
3563 ipproto = ip6.ip6_nxt;
3564 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
3565 break;
3566 default:
3567 break;
3568 }
3569
3570 if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
3571 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
3572
3573 vlan_macip_lens |= ip_hlen;
3574 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
3575
3576 if (mp->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_TCPv6)) {
3577 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
3578 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
3579 KASSERT(ipproto == IPPROTO_TCP);
3580 } else if (mp->m_pkthdr.csum_flags & (M_CSUM_UDPv4|M_CSUM_UDPv6)) {
3581 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
3582 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
3583 KASSERT(ipproto == IPPROTO_UDP);
3584 }
3585
3586 /* Now copy bits into descriptor */
3587 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3588 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3589 TXD->seqnum_seed = htole32(0);
3590 TXD->mss_l4len_idx = htole32(0);
3591
3592 /* We've consumed the first desc, adjust counters */
3593 if (++ctxd == txr->num_desc)
3594 ctxd = 0;
3595 txr->next_avail_desc = ctxd;
3596 --txr->tx_avail;
3597
3598 return 0;
3599 }
3600
3601 /**********************************************************************
3602 *
3603 * Setup work for hardware segmentation offload (TSO) on
3604 * adapters using advanced tx descriptors
3605 *
3606 **********************************************************************/
3607 static int
3608 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
3609 u32 *cmd_type_len, u32 *olinfo_status)
3610 {
3611 struct m_tag *mtag;
3612 struct adapter *adapter = txr->adapter;
3613 struct ethercom *ec = &adapter->osdep.ec;
3614 struct ixgbe_adv_tx_context_desc *TXD;
3615 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3616 u32 mss_l4len_idx = 0, paylen;
3617 u16 vtag = 0, eh_type;
3618 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3619 struct ether_vlan_header *eh;
3620 #ifdef INET6
3621 struct ip6_hdr *ip6;
3622 #endif
3623 #ifdef INET
3624 struct ip *ip;
3625 #endif
3626 struct tcphdr *th;
3627
3628
3629 /*
3630 * Determine where frame payload starts.
3631 * Jump over vlan headers if already present
3632 */
3633 eh = mtod(mp, struct ether_vlan_header *);
3634 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3635 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3636 eh_type = eh->evl_proto;
3637 } else {
3638 ehdrlen = ETHER_HDR_LEN;
3639 eh_type = eh->evl_encap_proto;
3640 }
3641
3642 switch (ntohs(eh_type)) {
3643 #ifdef INET6
3644 case ETHERTYPE_IPV6:
3645 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3646 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
3647 if (ip6->ip6_nxt != IPPROTO_TCP)
3648 return (ENXIO);
3649 ip_hlen = sizeof(struct ip6_hdr);
3650 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3651 th = (struct tcphdr *)((char *)ip6 + ip_hlen);
3652 th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
3653 &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
3654 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
3655 break;
3656 #endif
3657 #ifdef INET
3658 case ETHERTYPE_IP:
3659 ip = (struct ip *)(mp->m_data + ehdrlen);
3660 if (ip->ip_p != IPPROTO_TCP)
3661 return (ENXIO);
3662 ip->ip_sum = 0;
3663 ip_hlen = ip->ip_hl << 2;
3664 th = (struct tcphdr *)((char *)ip + ip_hlen);
3665 th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
3666 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3667 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
3668 /* Tell transmit desc to also do IPv4 checksum. */
3669 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
3670 break;
3671 #endif
3672 default:
3673 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3674 __func__, ntohs(eh_type));
3675 break;
3676 }
3677
3678 ctxd = txr->next_avail_desc;
3679 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
3680
3681 tcp_hlen = th->th_off << 2;
3682
3683 /* This is used in the transmit desc in encap */
3684 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3685
3686 /* VLAN MACLEN IPLEN */
3687 if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
3688 vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
3689 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
3690 }
3691
3692 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
3693 vlan_macip_lens |= ip_hlen;
3694 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3695
3696 /* ADV DTYPE TUCMD */
3697 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
3698 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
3699 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3700
3701 /* MSS L4LEN IDX */
3702 mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
3703 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
3704 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3705
3706 TXD->seqnum_seed = htole32(0);
3707
3708 if (++ctxd == txr->num_desc)
3709 ctxd = 0;
3710
3711 txr->tx_avail--;
3712 txr->next_avail_desc = ctxd;
3713 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
3714 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
3715 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
3716 ++txr->tso_tx.ev_count;
3717 return (0);
3718 }
3719
3720 #ifdef IXGBE_FDIR
3721 /*
3722 ** This routine parses packet headers so that Flow
3723 ** Director can make a hashed filter table entry
3724 ** allowing traffic flows to be identified and kept
3725 ** on the same cpu. This would be a performance
3726 ** hit, but we only do it at IXGBE_FDIR_RATE of
3727 ** packets.
3728 */
3729 static void
3730 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
3731 {
3732 struct adapter *adapter = txr->adapter;
3733 struct ix_queue *que;
3734 struct ip *ip;
3735 struct tcphdr *th;
3736 struct udphdr *uh;
3737 struct ether_vlan_header *eh;
3738 union ixgbe_atr_hash_dword input = {.dword = 0};
3739 union ixgbe_atr_hash_dword common = {.dword = 0};
3740 int ehdrlen, ip_hlen;
3741 u16 etype;
3742
3743 eh = mtod(mp, struct ether_vlan_header *);
3744 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3745 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3746 etype = eh->evl_proto;
3747 } else {
3748 ehdrlen = ETHER_HDR_LEN;
3749 etype = eh->evl_encap_proto;
3750 }
3751
3752 /* Only handling IPv4 */
3753 if (etype != htons(ETHERTYPE_IP))
3754 return;
3755
3756 ip = (struct ip *)(mp->m_data + ehdrlen);
3757 ip_hlen = ip->ip_hl << 2;
3758
3759 /* check if we're UDP or TCP */
3760 switch (ip->ip_p) {
3761 case IPPROTO_TCP:
3762 th = (struct tcphdr *)((char *)ip + ip_hlen);
3763 /* src and dst are inverted */
3764 common.port.dst ^= th->th_sport;
3765 common.port.src ^= th->th_dport;
3766 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
3767 break;
3768 case IPPROTO_UDP:
3769 uh = (struct udphdr *)((char *)ip + ip_hlen);
3770 /* src and dst are inverted */
3771 common.port.dst ^= uh->uh_sport;
3772 common.port.src ^= uh->uh_dport;
3773 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
3774 break;
3775 default:
3776 return;
3777 }
3778
3779 input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
3780 if (mp->m_pkthdr.ether_vtag)
3781 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
3782 else
3783 common.flex_bytes ^= etype;
3784 common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
3785
3786 que = &adapter->queues[txr->me];
3787 /*
3788 ** This assumes the Rx queue and Tx
3789 ** queue are bound to the same CPU
3790 */
3791 ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
3792 input, common, que->msix);
3793 }
3794 #endif /* IXGBE_FDIR */
3795
3796 /**********************************************************************
3797 *
3798 * Examine each tx_buffer in the used queue. If the hardware is done
3799 * processing the packet then free associated resources. The
3800 * tx_buffer is put back on the free queue.
3801 *
3802 **********************************************************************/
3803 static bool
3804 ixgbe_txeof(struct tx_ring *txr)
3805 {
3806 struct adapter *adapter = txr->adapter;
3807 struct ifnet *ifp = adapter->ifp;
3808 u32 work, processed = 0;
3809 u16 limit = txr->process_limit;
3810 struct ixgbe_tx_buf *buf;
3811 union ixgbe_adv_tx_desc *txd;
3812 struct timeval now, elapsed;
3813
3814 KASSERT(mutex_owned(&txr->tx_mtx));
3815
3816 #ifdef DEV_NETMAP
3817 if (ifp->if_capenable & IFCAP_NETMAP) {
3818 struct netmap_adapter *na = NA(ifp);
3819 struct netmap_kring *kring = &na->tx_rings[txr->me];
3820 txd = txr->tx_base;
3821 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3822 BUS_DMASYNC_POSTREAD);
3823 /*
3824 * In netmap mode, all the work is done in the context
3825 * of the client thread. Interrupt handlers only wake up
3826 * clients, which may be sleeping on individual rings
3827 * or on a global resource for all rings.
3828 * To implement tx interrupt mitigation, we wake up the client
3829 * thread roughly every half ring, even if the NIC interrupts
3830 * more frequently. This is implemented as follows:
3831 * - ixgbe_txsync() sets kring->nr_kflags with the index of
3832 * the slot that should wake up the thread (nkr_num_slots
3833 * means the user thread should not be woken up);
3834 * - the driver ignores tx interrupts unless netmap_mitigate=0
3835 * or the slot has the DD bit set.
3836 *
3837 * When the driver has separate locks, we need to
3838 * release and re-acquire txlock to avoid deadlocks.
3839 * XXX see if we can find a better way.
3840 */
3841 if (!netmap_mitigate ||
3842 (kring->nr_kflags < kring->nkr_num_slots &&
3843 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
3844 netmap_tx_irq(ifp, txr->me |
3845 (NETMAP_LOCKED_ENTER|NETMAP_LOCKED_EXIT));
3846 }
3847 return FALSE;
3848 }
3849 #endif /* DEV_NETMAP */
3850
3851 if (txr->tx_avail == txr->num_desc) {
3852 txr->queue_status = IXGBE_QUEUE_IDLE;
3853 return false;
3854 }
3855
3856 /* Get work starting point */
3857 work = txr->next_to_clean;
3858 buf = &txr->tx_buffers[work];
3859 txd = &txr->tx_base[work];
3860 work -= txr->num_desc; /* The distance to ring end */
3861 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3862 BUS_DMASYNC_POSTREAD);
3863 do {
3864 union ixgbe_adv_tx_desc *eop= buf->eop;
3865 if (eop == NULL) /* No work */
3866 break;
3867
3868 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
3869 break; /* I/O not complete */
3870
3871 if (buf->m_head) {
3872 txr->bytes +=
3873 buf->m_head->m_pkthdr.len;
3874 bus_dmamap_sync(txr->txtag->dt_dmat,
3875 buf->map,
3876 0, buf->m_head->m_pkthdr.len,
3877 BUS_DMASYNC_POSTWRITE);
3878 ixgbe_dmamap_unload(txr->txtag,
3879 buf->map);
3880 m_freem(buf->m_head);
3881 buf->m_head = NULL;
3882 /*
3883 * NetBSD: Don't override buf->map with NULL here.
3884 * It'll panic when a ring runs one lap around.
3885 */
3886 }
3887 buf->eop = NULL;
3888 ++txr->tx_avail;
3889
3890 /* We clean the range if multi segment */
3891 while (txd != eop) {
3892 ++txd;
3893 ++buf;
3894 ++work;
3895 /* wrap the ring? */
3896 if (__predict_false(!work)) {
3897 work -= txr->num_desc;
3898 buf = txr->tx_buffers;
3899 txd = txr->tx_base;
3900 }
3901 if (buf->m_head) {
3902 txr->bytes +=
3903 buf->m_head->m_pkthdr.len;
3904 bus_dmamap_sync(txr->txtag->dt_dmat,
3905 buf->map,
3906 0, buf->m_head->m_pkthdr.len,
3907 BUS_DMASYNC_POSTWRITE);
3908 ixgbe_dmamap_unload(txr->txtag,
3909 buf->map);
3910 m_freem(buf->m_head);
3911 buf->m_head = NULL;
3912 /*
3913 * NetBSD: Don't override buf->map with NULL
3914 * here. It'll panic when a ring runs one lap
3915 * around.
3916 */
3917 }
3918 ++txr->tx_avail;
3919 buf->eop = NULL;
3920
3921 }
3922 ++txr->packets;
3923 ++processed;
3924 ++ifp->if_opackets;
3925 getmicrotime(&txr->watchdog_time);
3926
3927 /* Try the next packet */
3928 ++txd;
3929 ++buf;
3930 ++work;
3931 /* reset with a wrap */
3932 if (__predict_false(!work)) {
3933 work -= txr->num_desc;
3934 buf = txr->tx_buffers;
3935 txd = txr->tx_base;
3936 }
3937 prefetch(txd);
3938 } while (__predict_true(--limit));
3939
3940 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3941 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3942
3943 work += txr->num_desc;
3944 txr->next_to_clean = work;
3945
3946 /*
3947 ** Watchdog calculation, we know there's
3948 ** work outstanding or the first return
3949 ** would have been taken, so none processed
3950 ** for too long indicates a hang.
3951 */
3952 getmicrotime(&now);
3953 timersub(&now, &txr->watchdog_time, &elapsed);
3954 if (!processed && tvtohz(&elapsed) > IXGBE_WATCHDOG)
3955 txr->queue_status = IXGBE_QUEUE_HUNG;
3956
3957 if (txr->tx_avail == txr->num_desc) {
3958 txr->queue_status = IXGBE_QUEUE_IDLE;
3959 return false;
3960 }
3961
3962 return true;
3963 }
3964
3965 /*********************************************************************
3966 *
3967 * Refresh mbuf buffers for RX descriptor rings
3968 * - now keeps its own state so discards due to resource
3969 * exhaustion are unnecessary, if an mbuf cannot be obtained
3970 * it just returns, keeping its placeholder, thus it can simply
3971 * be recalled to try again.
3972 *
3973 **********************************************************************/
3974 static void
3975 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
3976 {
3977 struct adapter *adapter = rxr->adapter;
3978 struct ixgbe_rx_buf *rxbuf;
3979 struct mbuf *mp;
3980 int i, j, error;
3981 bool refreshed = false;
3982
3983 i = j = rxr->next_to_refresh;
3984 /* Control the loop with one beyond */
3985 if (++j == rxr->num_desc)
3986 j = 0;
3987
3988 while (j != limit) {
3989 rxbuf = &rxr->rx_buffers[i];
3990 if (rxbuf->buf == NULL) {
3991 mp = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
3992 MT_DATA, M_PKTHDR, rxr->mbuf_sz);
3993 if (mp == NULL) {
3994 rxr->no_jmbuf.ev_count++;
3995 goto update;
3996 }
3997 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3998 m_adj(mp, ETHER_ALIGN);
3999 } else
4000 mp = rxbuf->buf;
4001
4002 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
4003 /* If we're dealing with an mbuf that was copied rather
4004 * than replaced, there's no need to go through busdma.
4005 */
4006 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
4007 /* Get the memory mapping */
4008 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
4009 rxbuf->pmap, mp, BUS_DMA_NOWAIT);
4010 if (error != 0) {
4011 printf("Refresh mbufs: payload dmamap load"
4012 " failure - %d\n", error);
4013 m_free(mp);
4014 rxbuf->buf = NULL;
4015 goto update;
4016 }
4017 rxbuf->buf = mp;
4018 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
4019 0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
4020 rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
4021 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
4022 } else {
4023 rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
4024 rxbuf->flags &= ~IXGBE_RX_COPY;
4025 }
4026
4027 refreshed = true;
4028 /* Next is precalculated */
4029 i = j;
4030 rxr->next_to_refresh = i;
4031 if (++j == rxr->num_desc)
4032 j = 0;
4033 }
4034 update:
4035 if (refreshed) /* Update hardware tail index */
4036 IXGBE_WRITE_REG(&adapter->hw,
4037 IXGBE_RDT(rxr->me), rxr->next_to_refresh);
4038 return;
4039 }
4040
4041 /*********************************************************************
4042 *
4043 * Allocate memory for rx_buffer structures. Since we use one
4044 * rx_buffer per received packet, the maximum number of rx_buffer's
4045 * that we'll need is equal to the number of receive descriptors
4046 * that we've allocated.
4047 *
4048 **********************************************************************/
4049 static int
4050 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
4051 {
4052 struct adapter *adapter = rxr->adapter;
4053 device_t dev = adapter->dev;
4054 struct ixgbe_rx_buf *rxbuf;
4055 int i, bsize, error;
4056
4057 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
4058 if (!(rxr->rx_buffers =
4059 (struct ixgbe_rx_buf *) malloc(bsize,
4060 M_DEVBUF, M_NOWAIT | M_ZERO))) {
4061 aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
4062 error = ENOMEM;
4063 goto fail;
4064 }
4065
4066 if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */
4067 1, 0, /* alignment, bounds */
4068 MJUM16BYTES, /* maxsize */
4069 1, /* nsegments */
4070 MJUM16BYTES, /* maxsegsize */
4071 0, /* flags */
4072 &rxr->ptag))) {
4073 aprint_error_dev(dev, "Unable to create RX DMA tag\n");
4074 goto fail;
4075 }
4076
4077 for (i = 0; i < rxr->num_desc; i++, rxbuf++) {
4078 rxbuf = &rxr->rx_buffers[i];
4079 error = ixgbe_dmamap_create(rxr->ptag,
4080 BUS_DMA_NOWAIT, &rxbuf->pmap);
4081 if (error) {
4082 aprint_error_dev(dev, "Unable to create RX dma map\n");
4083 goto fail;
4084 }
4085 }
4086
4087 return (0);
4088
4089 fail:
4090 /* Frees all, but can handle partial completion */
4091 ixgbe_free_receive_structures(adapter);
4092 return (error);
4093 }
4094
4095 /*
4096 ** Used to detect a descriptor that has
4097 ** been merged by Hardware RSC.
4098 */
4099 static inline u32
4100 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
4101 {
4102 return (le32toh(rx->wb.lower.lo_dword.data) &
4103 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
4104 }
4105
4106 /*********************************************************************
4107 *
4108 * Initialize Hardware RSC (LRO) feature on 82599
4109 * for an RX ring, this is toggled by the LRO capability
4110 * even though it is transparent to the stack.
4111 *
4112 * NOTE: since this HW feature only works with IPV4 and
4113 * our testing has shown soft LRO to be as effective
4114 * I have decided to disable this by default.
4115 *
4116 **********************************************************************/
4117 static void
4118 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
4119 {
4120 struct adapter *adapter = rxr->adapter;
4121 struct ixgbe_hw *hw = &adapter->hw;
4122 u32 rscctrl, rdrxctl;
4123
4124 /* If turning LRO/RSC off we need to disable it */
4125 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
4126 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
4127 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
4128 return;
4129 }
4130
4131 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4132 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
4133 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
4134 if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
4135 #endif /* DEV_NETMAP */
4136 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
4137 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4138 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4139
4140 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
4141 rscctrl |= IXGBE_RSCCTL_RSCEN;
4142 /*
4143 ** Limit the total number of descriptors that
4144 ** can be combined, so it does not exceed 64K
4145 */
4146 if (rxr->mbuf_sz == MCLBYTES)
4147 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
4148 else if (rxr->mbuf_sz == MJUMPAGESIZE)
4149 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
4150 else if (rxr->mbuf_sz == MJUM9BYTES)
4151 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
4152 else /* Using 16K cluster */
4153 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
4154
4155 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
4156
4157 /* Enable TCP header recognition */
4158 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
4159 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
4160 IXGBE_PSRTYPE_TCPHDR));
4161
4162 /* Disable RSC for ACK packets */
4163 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
4164 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
4165
4166 rxr->hw_rsc = TRUE;
4167 }
4168
4169
4170 static void
4171 ixgbe_free_receive_ring(struct rx_ring *rxr)
4172 {
4173 struct ixgbe_rx_buf *rxbuf;
4174 int i;
4175
4176 for (i = 0; i < rxr->num_desc; i++) {
4177 rxbuf = &rxr->rx_buffers[i];
4178 if (rxbuf->buf != NULL) {
4179 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
4180 0, rxbuf->buf->m_pkthdr.len,
4181 BUS_DMASYNC_POSTREAD);
4182 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
4183 rxbuf->buf->m_flags |= M_PKTHDR;
4184 m_freem(rxbuf->buf);
4185 rxbuf->buf = NULL;
4186 }
4187 }
4188 }
4189
4190
4191 /*********************************************************************
4192 *
4193 * Initialize a receive ring and its buffers.
4194 *
4195 **********************************************************************/
4196 static int
4197 ixgbe_setup_receive_ring(struct rx_ring *rxr)
4198 {
4199 struct adapter *adapter;
4200 struct ixgbe_rx_buf *rxbuf;
4201 #ifdef LRO
4202 struct ifnet *ifp;
4203 struct lro_ctrl *lro = &rxr->lro;
4204 #endif /* LRO */
4205 int rsize, error = 0;
4206 #ifdef DEV_NETMAP
4207 struct netmap_adapter *na = NA(rxr->adapter->ifp);
4208 struct netmap_slot *slot;
4209 #endif /* DEV_NETMAP */
4210
4211 adapter = rxr->adapter;
4212 #ifdef LRO
4213 ifp = adapter->ifp;
4214 #endif /* LRO */
4215
4216 /* Clear the ring contents */
4217 IXGBE_RX_LOCK(rxr);
4218 #ifdef DEV_NETMAP
4219 /* same as in ixgbe_setup_transmit_ring() */
4220 slot = netmap_reset(na, NR_RX, rxr->me, 0);
4221 #endif /* DEV_NETMAP */
4222 rsize = roundup2(adapter->num_rx_desc *
4223 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
4224 bzero((void *)rxr->rx_base, rsize);
4225 /* Cache the size */
4226 rxr->mbuf_sz = adapter->rx_mbuf_sz;
4227
4228 /* Free current RX buffer structs and their mbufs */
4229 ixgbe_free_receive_ring(rxr);
4230
4231 IXGBE_RX_UNLOCK(rxr);
4232
4233 /* Now reinitialize our supply of jumbo mbufs. The number
4234 * or size of jumbo mbufs may have changed.
4235 */
4236 ixgbe_jcl_reinit(&adapter->jcl_head, rxr->ptag->dt_dmat,
4237 2 * adapter->num_rx_desc, adapter->rx_mbuf_sz);
4238
4239 IXGBE_RX_LOCK(rxr);
4240
4241 /* Now replenish the mbufs */
4242 for (int j = 0; j != rxr->num_desc; ++j) {
4243 struct mbuf *mp;
4244
4245 rxbuf = &rxr->rx_buffers[j];
4246 #ifdef DEV_NETMAP
4247 /*
4248 * In netmap mode, fill the map and set the buffer
4249 * address in the NIC ring, considering the offset
4250 * between the netmap and NIC rings (see comment in
4251 * ixgbe_setup_transmit_ring() ). No need to allocate
4252 * an mbuf, so end the block with a continue;
4253 */
4254 if (slot) {
4255 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4256 uint64_t paddr;
4257 void *addr;
4258
4259 addr = PNMB(slot + sj, &paddr);
4260 netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
4261 /* Update descriptor */
4262 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4263 continue;
4264 }
4265 #endif /* DEV_NETMAP */
4266 rxbuf->buf = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
4267 MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
4268 if (rxbuf->buf == NULL) {
4269 error = ENOBUFS;
4270 goto fail;
4271 }
4272 mp = rxbuf->buf;
4273 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
4274 /* Get the memory mapping */
4275 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
4276 rxbuf->pmap, mp, BUS_DMA_NOWAIT);
4277 if (error != 0)
4278 goto fail;
4279 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
4280 0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
4281 /* Update descriptor */
4282 rxr->rx_base[j].read.pkt_addr =
4283 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
4284 }
4285
4286
4287 /* Setup our descriptor indices */
4288 rxr->next_to_check = 0;
4289 rxr->next_to_refresh = 0;
4290 rxr->lro_enabled = FALSE;
4291 rxr->rx_copies.ev_count = 0;
4292 rxr->rx_bytes.ev_count = 0;
4293 rxr->discard = FALSE;
4294 rxr->vtag_strip = FALSE;
4295
4296 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4297 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4298
4299 /*
4300 ** Now set up the LRO interface:
4301 */
4302 if (ixgbe_rsc_enable)
4303 ixgbe_setup_hw_rsc(rxr);
4304 #ifdef LRO
4305 else if (ifp->if_capenable & IFCAP_LRO) {
4306 device_t dev = adapter->dev;
4307 int err = tcp_lro_init(lro);
4308 if (err) {
4309 device_printf(dev, "LRO Initialization failed!\n");
4310 goto fail;
4311 }
4312 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
4313 rxr->lro_enabled = TRUE;
4314 lro->ifp = adapter->ifp;
4315 }
4316 #endif /* LRO */
4317
4318 IXGBE_RX_UNLOCK(rxr);
4319 return (0);
4320
4321 fail:
4322 ixgbe_free_receive_ring(rxr);
4323 IXGBE_RX_UNLOCK(rxr);
4324 return (error);
4325 }
4326
4327 /*********************************************************************
4328 *
4329 * Initialize all receive rings.
4330 *
4331 **********************************************************************/
4332 static int
4333 ixgbe_setup_receive_structures(struct adapter *adapter)
4334 {
4335 struct rx_ring *rxr = adapter->rx_rings;
4336 int j;
4337
4338 for (j = 0; j < adapter->num_queues; j++, rxr++)
4339 if (ixgbe_setup_receive_ring(rxr))
4340 goto fail;
4341
4342 return (0);
4343 fail:
4344 /*
4345 * Free RX buffers allocated so far, we will only handle
4346 * the rings that completed, the failing case will have
4347 * cleaned up for itself. 'j' failed, so its the terminus.
4348 */
4349 for (int i = 0; i < j; ++i) {
4350 rxr = &adapter->rx_rings[i];
4351 ixgbe_free_receive_ring(rxr);
4352 }
4353
4354 return (ENOBUFS);
4355 }
4356
4357 /*********************************************************************
4358 *
4359 * Setup receive registers and features.
4360 *
4361 **********************************************************************/
4362 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
4363
4364 #define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
4365
4366 static void
4367 ixgbe_initialize_receive_units(struct adapter *adapter)
4368 {
4369 int i;
4370 struct rx_ring *rxr = adapter->rx_rings;
4371 struct ixgbe_hw *hw = &adapter->hw;
4372 struct ifnet *ifp = adapter->ifp;
4373 u32 bufsz, rxctrl, fctrl, srrctl, rxcsum;
4374 u32 reta, mrqc = 0, hlreg, r[10];
4375
4376
4377 /*
4378 * Make sure receives are disabled while
4379 * setting up the descriptor ring
4380 */
4381 rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4382 IXGBE_WRITE_REG(hw, IXGBE_RXCTRL,
4383 rxctrl & ~IXGBE_RXCTRL_RXEN);
4384
4385 /* Enable broadcasts */
4386 fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4387 fctrl |= IXGBE_FCTRL_BAM;
4388 fctrl |= IXGBE_FCTRL_DPF;
4389 fctrl |= IXGBE_FCTRL_PMCF;
4390 IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4391
4392 /* Set for Jumbo Frames? */
4393 hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4394 if (ifp->if_mtu > ETHERMTU)
4395 hlreg |= IXGBE_HLREG0_JUMBOEN;
4396 else
4397 hlreg &= ~IXGBE_HLREG0_JUMBOEN;
4398 #ifdef DEV_NETMAP
4399 /* crcstrip is conditional in netmap (in RDRXCTL too ?) */
4400 if (ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
4401 hlreg &= ~IXGBE_HLREG0_RXCRCSTRP;
4402 else
4403 hlreg |= IXGBE_HLREG0_RXCRCSTRP;
4404 #endif /* DEV_NETMAP */
4405 IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
4406
4407 bufsz = (adapter->rx_mbuf_sz +
4408 BSIZEPKT_ROUNDUP) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
4409
4410 for (i = 0; i < adapter->num_queues; i++, rxr++) {
4411 u64 rdba = rxr->rxdma.dma_paddr;
4412
4413 /* Setup the Base and Length of the Rx Descriptor Ring */
4414 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i),
4415 (rdba & 0x00000000ffffffffULL));
4416 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (rdba >> 32));
4417 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
4418 adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4419
4420 /* Set up the SRRCTL register */
4421 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
4422 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4423 srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
4424 srrctl |= bufsz;
4425 srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4426 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
4427
4428 /* Setup the HW Rx Head and Tail Descriptor Pointers */
4429 IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
4430 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
4431
4432 /* Set the processing limit */
4433 rxr->process_limit = ixgbe_rx_process_limit;
4434 }
4435
4436 if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
4437 u32 psrtype = IXGBE_PSRTYPE_TCPHDR |
4438 IXGBE_PSRTYPE_UDPHDR |
4439 IXGBE_PSRTYPE_IPV4HDR |
4440 IXGBE_PSRTYPE_IPV6HDR;
4441 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype);
4442 }
4443
4444 rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4445
4446 /* Setup RSS */
4447 if (adapter->num_queues > 1) {
4448 int j;
4449 reta = 0;
4450
4451 /* set up random bits */
4452 cprng_fast(&r, sizeof(r));
4453
4454 /* Set up the redirection table */
4455 for (i = 0, j = 0; i < 128; i++, j++) {
4456 if (j == adapter->num_queues) j = 0;
4457 reta = (reta << 8) | (j * 0x11);
4458 if ((i & 3) == 3)
4459 IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta);
4460 }
4461
4462 /* Now fill our hash function seeds */
4463 for (i = 0; i < 10; i++)
4464 IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), r[i]);
4465
4466 /* Perform hash on these packet types */
4467 mrqc = IXGBE_MRQC_RSSEN
4468 | IXGBE_MRQC_RSS_FIELD_IPV4
4469 | IXGBE_MRQC_RSS_FIELD_IPV4_TCP
4470 | IXGBE_MRQC_RSS_FIELD_IPV4_UDP
4471 | IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP
4472 | IXGBE_MRQC_RSS_FIELD_IPV6_EX
4473 | IXGBE_MRQC_RSS_FIELD_IPV6
4474 | IXGBE_MRQC_RSS_FIELD_IPV6_TCP
4475 | IXGBE_MRQC_RSS_FIELD_IPV6_UDP
4476 | IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
4477 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4478
4479 /* RSS and RX IPP Checksum are mutually exclusive */
4480 rxcsum |= IXGBE_RXCSUM_PCSD;
4481 }
4482
4483 if (ifp->if_capenable & IFCAP_RXCSUM)
4484 rxcsum |= IXGBE_RXCSUM_PCSD;
4485
4486 if (!(rxcsum & IXGBE_RXCSUM_PCSD))
4487 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4488
4489 IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4490
4491 return;
4492 }
4493
4494 /*********************************************************************
4495 *
4496 * Free all receive rings.
4497 *
4498 **********************************************************************/
4499 static void
4500 ixgbe_free_receive_structures(struct adapter *adapter)
4501 {
4502 struct rx_ring *rxr = adapter->rx_rings;
4503
4504 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4505 #ifdef LRO
4506 struct lro_ctrl *lro = &rxr->lro;
4507 #endif /* LRO */
4508 ixgbe_free_receive_buffers(rxr);
4509 #ifdef LRO
4510 /* Free LRO memory */
4511 tcp_lro_free(lro);
4512 #endif /* LRO */
4513 /* Free the ring memory as well */
4514 ixgbe_dma_free(adapter, &rxr->rxdma);
4515 IXGBE_RX_LOCK_DESTROY(rxr);
4516 }
4517
4518 free(adapter->rx_rings, M_DEVBUF);
4519 }
4520
4521
4522 /*********************************************************************
4523 *
4524 * Free receive ring data structures
4525 *
4526 **********************************************************************/
4527 static void
4528 ixgbe_free_receive_buffers(struct rx_ring *rxr)
4529 {
4530 struct adapter *adapter = rxr->adapter;
4531 struct ixgbe_rx_buf *rxbuf;
4532
4533 INIT_DEBUGOUT("free_receive_structures: begin");
4534
4535 /* Cleanup any existing buffers */
4536 if (rxr->rx_buffers != NULL) {
4537 for (int i = 0; i < adapter->num_rx_desc; i++) {
4538 rxbuf = &rxr->rx_buffers[i];
4539 if (rxbuf->buf != NULL) {
4540 bus_dmamap_sync(rxr->ptag->dt_dmat,
4541 rxbuf->pmap, 0, rxbuf->buf->m_pkthdr.len,
4542 BUS_DMASYNC_POSTREAD);
4543 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
4544 rxbuf->buf->m_flags |= M_PKTHDR;
4545 m_freem(rxbuf->buf);
4546 }
4547 rxbuf->buf = NULL;
4548 if (rxbuf->pmap != NULL) {
4549 ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4550 rxbuf->pmap = NULL;
4551 }
4552 }
4553 if (rxr->rx_buffers != NULL) {
4554 free(rxr->rx_buffers, M_DEVBUF);
4555 rxr->rx_buffers = NULL;
4556 }
4557 }
4558
4559 if (rxr->ptag != NULL) {
4560 ixgbe_dma_tag_destroy(rxr->ptag);
4561 rxr->ptag = NULL;
4562 }
4563
4564 return;
4565 }
4566
4567 static __inline void
4568 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4569 {
4570 int s;
4571
4572 #ifdef LRO
4573 struct adapter *adapter = ifp->if_softc;
4574 struct ethercom *ec = &adapter->osdep.ec;
4575
4576 /*
4577 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
4578 * should be computed by hardware. Also it should not have VLAN tag in
4579 * ethernet header. In case of IPv6 we do not yet support ext. hdrs.
4580 */
4581 if (rxr->lro_enabled &&
4582 (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
4583 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
4584 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
4585 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
4586 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
4587 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
4588 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4589 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4590 /*
4591 * Send to the stack if:
4592 ** - LRO not enabled, or
4593 ** - no LRO resources, or
4594 ** - lro enqueue fails
4595 */
4596 if (rxr->lro.lro_cnt != 0)
4597 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4598 return;
4599 }
4600 #endif /* LRO */
4601
4602 IXGBE_RX_UNLOCK(rxr);
4603
4604 s = splnet();
4605 /* Pass this up to any BPF listeners. */
4606 bpf_mtap(ifp, m);
4607 (*ifp->if_input)(ifp, m);
4608 splx(s);
4609
4610 IXGBE_RX_LOCK(rxr);
4611 }
4612
4613 static __inline void
4614 ixgbe_rx_discard(struct rx_ring *rxr, int i)
4615 {
4616 struct ixgbe_rx_buf *rbuf;
4617
4618 rbuf = &rxr->rx_buffers[i];
4619
4620 if (rbuf->fmp != NULL) {/* Partial chain ? */
4621 rbuf->fmp->m_flags |= M_PKTHDR;
4622 m_freem(rbuf->fmp);
4623 rbuf->fmp = NULL;
4624 }
4625
4626 /*
4627 ** With advanced descriptors the writeback
4628 ** clobbers the buffer addrs, so its easier
4629 ** to just free the existing mbufs and take
4630 ** the normal refresh path to get new buffers
4631 ** and mapping.
4632 */
4633 if (rbuf->buf) {
4634 m_free(rbuf->buf);
4635 rbuf->buf = NULL;
4636 }
4637
4638 return;
4639 }
4640
4641
4642 /*********************************************************************
4643 *
4644 * This routine executes in interrupt context. It replenishes
4645 * the mbufs in the descriptor and sends data which has been
4646 * dma'ed into host memory to upper layer.
4647 *
4648 * We loop at most count times if count is > 0, or until done if
4649 * count < 0.
4650 *
4651 * Return TRUE for more work, FALSE for all clean.
4652 *********************************************************************/
4653 static bool
4654 ixgbe_rxeof(struct ix_queue *que)
4655 {
4656 struct adapter *adapter = que->adapter;
4657 struct rx_ring *rxr = que->rxr;
4658 struct ifnet *ifp = adapter->ifp;
4659 #ifdef LRO
4660 struct lro_ctrl *lro = &rxr->lro;
4661 struct lro_entry *queued;
4662 #endif /* LRO */
4663 int i, nextp, processed = 0;
4664 u32 staterr = 0;
4665 u16 count = rxr->process_limit;
4666 union ixgbe_adv_rx_desc *cur;
4667 struct ixgbe_rx_buf *rbuf, *nbuf;
4668
4669 IXGBE_RX_LOCK(rxr);
4670
4671 #ifdef DEV_NETMAP
4672 /* Same as the txeof routine: wakeup clients on intr. */
4673 if (netmap_rx_irq(ifp, rxr->me | NETMAP_LOCKED_ENTER, &processed))
4674 return (FALSE);
4675 #endif /* DEV_NETMAP */
4676 for (i = rxr->next_to_check; count != 0;) {
4677 struct mbuf *sendmp, *mp;
4678 u32 rsc, ptype;
4679 u16 len;
4680 u16 vtag = 0;
4681 bool eop;
4682
4683 /* Sync the ring. */
4684 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4685 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4686
4687 cur = &rxr->rx_base[i];
4688 staterr = le32toh(cur->wb.upper.status_error);
4689
4690 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
4691 break;
4692 if ((ifp->if_flags & IFF_RUNNING) == 0)
4693 break;
4694
4695 count--;
4696 sendmp = NULL;
4697 nbuf = NULL;
4698 rsc = 0;
4699 cur->wb.upper.status_error = 0;
4700 rbuf = &rxr->rx_buffers[i];
4701 mp = rbuf->buf;
4702
4703 len = le16toh(cur->wb.upper.length);
4704 ptype = le32toh(cur->wb.lower.lo_dword.data) &
4705 IXGBE_RXDADV_PKTTYPE_MASK;
4706 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
4707
4708 /* Make sure bad packets are discarded */
4709 if (((staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) ||
4710 (rxr->discard)) {
4711 rxr->rx_discarded.ev_count++;
4712 if (eop)
4713 rxr->discard = FALSE;
4714 else
4715 rxr->discard = TRUE;
4716 ixgbe_rx_discard(rxr, i);
4717 goto next_desc;
4718 }
4719
4720 /*
4721 ** On 82599 which supports a hardware
4722 ** LRO (called HW RSC), packets need
4723 ** not be fragmented across sequential
4724 ** descriptors, rather the next descriptor
4725 ** is indicated in bits of the descriptor.
4726 ** This also means that we might proceses
4727 ** more than one packet at a time, something
4728 ** that has never been true before, it
4729 ** required eliminating global chain pointers
4730 ** in favor of what we are doing here. -jfv
4731 */
4732 if (!eop) {
4733 /*
4734 ** Figure out the next descriptor
4735 ** of this frame.
4736 */
4737 if (rxr->hw_rsc == TRUE) {
4738 rsc = ixgbe_rsc_count(cur);
4739 rxr->rsc_num += (rsc - 1);
4740 }
4741 if (rsc) { /* Get hardware index */
4742 nextp = ((staterr &
4743 IXGBE_RXDADV_NEXTP_MASK) >>
4744 IXGBE_RXDADV_NEXTP_SHIFT);
4745 } else { /* Just sequential */
4746 nextp = i + 1;
4747 if (nextp == adapter->num_rx_desc)
4748 nextp = 0;
4749 }
4750 nbuf = &rxr->rx_buffers[nextp];
4751 prefetch(nbuf);
4752 }
4753 /*
4754 ** Rather than using the fmp/lmp global pointers
4755 ** we now keep the head of a packet chain in the
4756 ** buffer struct and pass this along from one
4757 ** descriptor to the next, until we get EOP.
4758 */
4759 mp->m_len = len;
4760 /*
4761 ** See if there is a stored head
4762 ** that determines what we are
4763 */
4764 sendmp = rbuf->fmp;
4765
4766 if (sendmp != NULL) { /* secondary frag */
4767 rbuf->buf = rbuf->fmp = NULL;
4768 mp->m_flags &= ~M_PKTHDR;
4769 sendmp->m_pkthdr.len += mp->m_len;
4770 } else {
4771 /*
4772 * Optimize. This might be a small packet,
4773 * maybe just a TCP ACK. Do a fast copy that
4774 * is cache aligned into a new mbuf, and
4775 * leave the old mbuf+cluster for re-use.
4776 */
4777 if (eop && len <= IXGBE_RX_COPY_LEN) {
4778 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
4779 if (sendmp != NULL) {
4780 sendmp->m_data +=
4781 IXGBE_RX_COPY_ALIGN;
4782 ixgbe_bcopy(mp->m_data,
4783 sendmp->m_data, len);
4784 sendmp->m_len = len;
4785 rxr->rx_copies.ev_count++;
4786 rbuf->flags |= IXGBE_RX_COPY;
4787 }
4788 }
4789 if (sendmp == NULL) {
4790 rbuf->buf = rbuf->fmp = NULL;
4791 sendmp = mp;
4792 }
4793
4794 /* first desc of a non-ps chain */
4795 sendmp->m_flags |= M_PKTHDR;
4796 sendmp->m_pkthdr.len = mp->m_len;
4797 }
4798 ++processed;
4799 /* Pass the head pointer on */
4800 if (eop == 0) {
4801 nbuf->fmp = sendmp;
4802 sendmp = NULL;
4803 mp->m_next = nbuf->buf;
4804 } else { /* Sending this frame */
4805 sendmp->m_pkthdr.rcvif = ifp;
4806 ifp->if_ipackets++;
4807 rxr->rx_packets.ev_count++;
4808 /* capture data for AIM */
4809 rxr->bytes += sendmp->m_pkthdr.len;
4810 rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
4811 /* Process vlan info */
4812 if ((rxr->vtag_strip) &&
4813 (staterr & IXGBE_RXD_STAT_VP))
4814 vtag = le16toh(cur->wb.upper.vlan);
4815 if (vtag) {
4816 VLAN_INPUT_TAG(ifp, sendmp, vtag,
4817 printf("%s: could not apply VLAN "
4818 "tag", __func__));
4819 }
4820 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
4821 ixgbe_rx_checksum(staterr, sendmp, ptype,
4822 &adapter->stats);
4823 }
4824 #if __FreeBSD_version >= 800000
4825 sendmp->m_pkthdr.flowid = que->msix;
4826 sendmp->m_flags |= M_FLOWID;
4827 #endif
4828 }
4829 next_desc:
4830 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4831 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4832
4833 /* Advance our pointers to the next descriptor. */
4834 if (++i == rxr->num_desc)
4835 i = 0;
4836
4837 /* Now send to the stack or do LRO */
4838 if (sendmp != NULL) {
4839 rxr->next_to_check = i;
4840 ixgbe_rx_input(rxr, ifp, sendmp, ptype);
4841 i = rxr->next_to_check;
4842 }
4843
4844 /* Every 8 descriptors we go to refresh mbufs */
4845 if (processed == 8) {
4846 ixgbe_refresh_mbufs(rxr, i);
4847 processed = 0;
4848 }
4849 }
4850
4851 /* Refresh any remaining buf structs */
4852 if (ixgbe_rx_unrefreshed(rxr))
4853 ixgbe_refresh_mbufs(rxr, i);
4854
4855 rxr->next_to_check = i;
4856
4857 #ifdef LRO
4858 /*
4859 * Flush any outstanding LRO work
4860 */
4861 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4862 SLIST_REMOVE_HEAD(&lro->lro_active, next);
4863 tcp_lro_flush(lro, queued);
4864 }
4865 #endif /* LRO */
4866
4867 IXGBE_RX_UNLOCK(rxr);
4868
4869 /*
4870 ** We still have cleaning to do?
4871 ** Schedule another interrupt if so.
4872 */
4873 if ((staterr & IXGBE_RXD_STAT_DD) != 0) {
4874 ixgbe_rearm_queues(adapter, (u64)(1ULL << que->msix));
4875 return true;
4876 }
4877
4878 return false;
4879 }
4880
4881
4882 /*********************************************************************
4883 *
4884 * Verify that the hardware indicated that the checksum is valid.
4885 * Inform the stack about the status of checksum so that stack
4886 * doesn't spend time verifying the checksum.
4887 *
4888 *********************************************************************/
4889 static void
4890 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
4891 struct ixgbe_hw_stats *stats)
4892 {
4893 u16 status = (u16) staterr;
4894 u8 errors = (u8) (staterr >> 24);
4895 #if 0
4896 bool sctp = FALSE;
4897
4898 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
4899 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
4900 sctp = TRUE;
4901 #endif
4902
4903 if (status & IXGBE_RXD_STAT_IPCS) {
4904 stats->ipcs.ev_count++;
4905 if (!(errors & IXGBE_RXD_ERR_IPE)) {
4906 /* IP Checksum Good */
4907 mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
4908
4909 } else {
4910 stats->ipcs_bad.ev_count++;
4911 mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
4912 }
4913 }
4914 if (status & IXGBE_RXD_STAT_L4CS) {
4915 stats->l4cs.ev_count++;
4916 u16 type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
4917 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
4918 mp->m_pkthdr.csum_flags |= type;
4919 } else {
4920 stats->l4cs_bad.ev_count++;
4921 mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
4922 }
4923 }
4924 return;
4925 }
4926
4927
4928 #if 0 /* XXX Badly need to overhaul vlan(4) on NetBSD. */
4929 /*
4930 ** This routine is run via an vlan config EVENT,
4931 ** it enables us to use the HW Filter table since
4932 ** we can get the vlan id. This just creates the
4933 ** entry in the soft version of the VFTA, init will
4934 ** repopulate the real table.
4935 */
4936 static void
4937 ixgbe_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4938 {
4939 struct adapter *adapter = ifp->if_softc;
4940 u16 index, bit;
4941
4942 if (ifp->if_softc != arg) /* Not our event */
4943 return;
4944
4945 if ((vtag == 0) || (vtag > 4095)) /* Invalid */
4946 return;
4947
4948 IXGBE_CORE_LOCK(adapter);
4949 index = (vtag >> 5) & 0x7F;
4950 bit = vtag & 0x1F;
4951 adapter->shadow_vfta[index] |= (1 << bit);
4952 ixgbe_init_locked(adapter);
4953 IXGBE_CORE_UNLOCK(adapter);
4954 }
4955
4956 /*
4957 ** This routine is run via an vlan
4958 ** unconfig EVENT, remove our entry
4959 ** in the soft vfta.
4960 */
4961 static void
4962 ixgbe_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4963 {
4964 struct adapter *adapter = ifp->if_softc;
4965 u16 index, bit;
4966
4967 if (ifp->if_softc != arg)
4968 return;
4969
4970 if ((vtag == 0) || (vtag > 4095)) /* Invalid */
4971 return;
4972
4973 IXGBE_CORE_LOCK(adapter);
4974 index = (vtag >> 5) & 0x7F;
4975 bit = vtag & 0x1F;
4976 adapter->shadow_vfta[index] &= ~(1 << bit);
4977 /* Re-init to load the changes */
4978 ixgbe_init_locked(adapter);
4979 IXGBE_CORE_UNLOCK(adapter);
4980 }
4981 #endif
4982
4983 static void
4984 ixgbe_setup_vlan_hw_support(struct adapter *adapter)
4985 {
4986 struct ethercom *ec = &adapter->osdep.ec;
4987 struct ixgbe_hw *hw = &adapter->hw;
4988 struct rx_ring *rxr;
4989 u32 ctrl;
4990
4991 /*
4992 ** We get here thru init_locked, meaning
4993 ** a soft reset, this has already cleared
4994 ** the VFTA and other state, so if there
4995 ** have been no vlan's registered do nothing.
4996 */
4997 if (!VLAN_ATTACHED(&adapter->osdep.ec)) {
4998 return;
4999 }
5000
5001 /*
5002 ** A soft reset zero's out the VFTA, so
5003 ** we need to repopulate it now.
5004 */
5005 for (int i = 0; i < IXGBE_VFTA_SIZE; i++)
5006 if (adapter->shadow_vfta[i] != 0)
5007 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i),
5008 adapter->shadow_vfta[i]);
5009
5010 ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
5011 /* Enable the Filter Table if enabled */
5012 if (ec->ec_capenable & ETHERCAP_VLAN_HWFILTER) {
5013 ctrl &= ~IXGBE_VLNCTRL_CFIEN;
5014 ctrl |= IXGBE_VLNCTRL_VFE;
5015 }
5016 if (hw->mac.type == ixgbe_mac_82598EB)
5017 ctrl |= IXGBE_VLNCTRL_VME;
5018 IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
5019
5020 /* Setup the queues for vlans */
5021 for (int i = 0; i < adapter->num_queues; i++) {
5022 rxr = &adapter->rx_rings[i];
5023 /* On 82599 the VLAN enable is per/queue in RXDCTL */
5024 if (hw->mac.type != ixgbe_mac_82598EB) {
5025 ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
5026 ctrl |= IXGBE_RXDCTL_VME;
5027 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
5028 }
5029 rxr->vtag_strip = TRUE;
5030 }
5031 }
5032
5033 static void
5034 ixgbe_enable_intr(struct adapter *adapter)
5035 {
5036 struct ixgbe_hw *hw = &adapter->hw;
5037 struct ix_queue *que = adapter->queues;
5038 u32 mask, fwsm;
5039
5040 mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
5041 /* Enable Fan Failure detection */
5042 if (hw->device_id == IXGBE_DEV_ID_82598AT)
5043 mask |= IXGBE_EIMS_GPI_SDP1;
5044
5045 switch (adapter->hw.mac.type) {
5046 case ixgbe_mac_82599EB:
5047 mask |= IXGBE_EIMS_ECC;
5048 mask |= IXGBE_EIMS_GPI_SDP0;
5049 mask |= IXGBE_EIMS_GPI_SDP1;
5050 mask |= IXGBE_EIMS_GPI_SDP2;
5051 #ifdef IXGBE_FDIR
5052 mask |= IXGBE_EIMS_FLOW_DIR;
5053 #endif
5054 break;
5055 case ixgbe_mac_X540:
5056 mask |= IXGBE_EIMS_ECC;
5057 /* Detect if Thermal Sensor is enabled */
5058 fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
5059 if (fwsm & IXGBE_FWSM_TS_ENABLED)
5060 mask |= IXGBE_EIMS_TS;
5061 #ifdef IXGBE_FDIR
5062 mask |= IXGBE_EIMS_FLOW_DIR;
5063 #endif
5064 /* falls through */
5065 default:
5066 break;
5067 }
5068
5069 IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
5070
5071 /* With RSS we use auto clear */
5072 if (adapter->msix_mem) {
5073 mask = IXGBE_EIMS_ENABLE_MASK;
5074 /* Don't autoclear Link */
5075 mask &= ~IXGBE_EIMS_OTHER;
5076 mask &= ~IXGBE_EIMS_LSC;
5077 IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask);
5078 }
5079
5080 /*
5081 ** Now enable all queues, this is done separately to
5082 ** allow for handling the extended (beyond 32) MSIX
5083 ** vectors that can be used by 82599
5084 */
5085 for (int i = 0; i < adapter->num_queues; i++, que++)
5086 ixgbe_enable_queue(adapter, que->msix);
5087
5088 IXGBE_WRITE_FLUSH(hw);
5089
5090 return;
5091 }
5092
5093 static void
5094 ixgbe_disable_intr(struct adapter *adapter)
5095 {
5096 if (adapter->msix_mem)
5097 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, 0);
5098 if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
5099 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0);
5100 } else {
5101 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000);
5102 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0);
5103 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0);
5104 }
5105 IXGBE_WRITE_FLUSH(&adapter->hw);
5106 return;
5107 }
5108
5109 u16
5110 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
5111 {
5112 switch (reg % 4) {
5113 case 0:
5114 return pci_conf_read(hw->back->pc, hw->back->tag, reg) &
5115 __BITS(15, 0);
5116 case 2:
5117 return __SHIFTOUT(pci_conf_read(hw->back->pc, hw->back->tag,
5118 reg - 2), __BITS(31, 16));
5119 default:
5120 panic("%s: invalid register (%" PRIx32, __func__, reg);
5121 break;
5122 }
5123 }
5124
5125 void
5126 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
5127 {
5128 pcireg_t old;
5129
5130 switch (reg % 4) {
5131 case 0:
5132 old = pci_conf_read(hw->back->pc, hw->back->tag, reg) &
5133 __BITS(31, 16);
5134 pci_conf_write(hw->back->pc, hw->back->tag, reg, value | old);
5135 break;
5136 case 2:
5137 old = pci_conf_read(hw->back->pc, hw->back->tag, reg - 2) &
5138 __BITS(15, 0);
5139 pci_conf_write(hw->back->pc, hw->back->tag, reg - 2,
5140 __SHIFTIN(value, __BITS(31, 16)) | old);
5141 break;
5142 default:
5143 panic("%s: invalid register (%" PRIx32, __func__, reg);
5144 break;
5145 }
5146
5147 return;
5148 }
5149
5150 /*
5151 ** Setup the correct IVAR register for a particular MSIX interrupt
5152 ** (yes this is all very magic and confusing :)
5153 ** - entry is the register array entry
5154 ** - vector is the MSIX vector for this queue
5155 ** - type is RX/TX/MISC
5156 */
5157 static void
5158 ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type)
5159 {
5160 struct ixgbe_hw *hw = &adapter->hw;
5161 u32 ivar, index;
5162
5163 vector |= IXGBE_IVAR_ALLOC_VAL;
5164
5165 switch (hw->mac.type) {
5166
5167 case ixgbe_mac_82598EB:
5168 if (type == -1)
5169 entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
5170 else
5171 entry += (type * 64);
5172 index = (entry >> 2) & 0x1F;
5173 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
5174 ivar &= ~(0xFF << (8 * (entry & 0x3)));
5175 ivar |= (vector << (8 * (entry & 0x3)));
5176 IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR(index), ivar);
5177 break;
5178
5179 case ixgbe_mac_82599EB:
5180 case ixgbe_mac_X540:
5181 if (type == -1) { /* MISC IVAR */
5182 index = (entry & 1) * 8;
5183 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
5184 ivar &= ~(0xFF << index);
5185 ivar |= (vector << index);
5186 IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
5187 } else { /* RX/TX IVARS */
5188 index = (16 * (entry & 1)) + (8 * type);
5189 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
5190 ivar &= ~(0xFF << index);
5191 ivar |= (vector << index);
5192 IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
5193 }
5194
5195 default:
5196 break;
5197 }
5198 }
5199
5200 static void
5201 ixgbe_configure_ivars(struct adapter *adapter)
5202 {
5203 struct ix_queue *que = adapter->queues;
5204 u32 newitr;
5205
5206 if (ixgbe_max_interrupt_rate > 0)
5207 newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8;
5208 else
5209 newitr = 0;
5210
5211 for (int i = 0; i < adapter->num_queues; i++, que++) {
5212 /* First the RX queue entry */
5213 ixgbe_set_ivar(adapter, i, que->msix, 0);
5214 /* ... and the TX */
5215 ixgbe_set_ivar(adapter, i, que->msix, 1);
5216 /* Set an Initial EITR value */
5217 IXGBE_WRITE_REG(&adapter->hw,
5218 IXGBE_EITR(que->msix), newitr);
5219 }
5220
5221 /* For the Link interrupt */
5222 ixgbe_set_ivar(adapter, 1, adapter->linkvec, -1);
5223 }
5224
5225 /*
5226 ** ixgbe_sfp_probe - called in the local timer to
5227 ** determine if a port had optics inserted.
5228 */
5229 static bool ixgbe_sfp_probe(struct adapter *adapter)
5230 {
5231 struct ixgbe_hw *hw = &adapter->hw;
5232 device_t dev = adapter->dev;
5233 bool result = FALSE;
5234
5235 if ((hw->phy.type == ixgbe_phy_nl) &&
5236 (hw->phy.sfp_type == ixgbe_sfp_type_not_present)) {
5237 s32 ret = hw->phy.ops.identify_sfp(hw);
5238 if (ret)
5239 goto out;
5240 ret = hw->phy.ops.reset(hw);
5241 if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5242 device_printf(dev,"Unsupported SFP+ module detected!");
5243 device_printf(dev, "Reload driver with supported module.\n");
5244 adapter->sfp_probe = FALSE;
5245 goto out;
5246 } else
5247 device_printf(dev,"SFP+ module detected!\n");
5248 /* We now have supported optics */
5249 adapter->sfp_probe = FALSE;
5250 /* Set the optics type so system reports correctly */
5251 ixgbe_setup_optics(adapter);
5252 result = TRUE;
5253 }
5254 out:
5255 return (result);
5256 }
5257
5258 /*
5259 ** Tasklet handler for MSIX Link interrupts
5260 ** - do outside interrupt since it might sleep
5261 */
5262 static void
5263 ixgbe_handle_link(void *context)
5264 {
5265 struct adapter *adapter = context;
5266
5267 if (ixgbe_check_link(&adapter->hw,
5268 &adapter->link_speed, &adapter->link_up, 0) == 0)
5269 ixgbe_update_link_status(adapter);
5270 }
5271
5272 /*
5273 ** Tasklet for handling SFP module interrupts
5274 */
5275 static void
5276 ixgbe_handle_mod(void *context)
5277 {
5278 struct adapter *adapter = context;
5279 struct ixgbe_hw *hw = &adapter->hw;
5280 device_t dev = adapter->dev;
5281 u32 err;
5282
5283 err = hw->phy.ops.identify_sfp(hw);
5284 if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5285 device_printf(dev,
5286 "Unsupported SFP+ module type was detected.\n");
5287 return;
5288 }
5289 err = hw->mac.ops.setup_sfp(hw);
5290 if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5291 device_printf(dev,
5292 "Setup failure - unsupported SFP+ module type.\n");
5293 return;
5294 }
5295 softint_schedule(adapter->msf_si);
5296 return;
5297 }
5298
5299
5300 /*
5301 ** Tasklet for handling MSF (multispeed fiber) interrupts
5302 */
5303 static void
5304 ixgbe_handle_msf(void *context)
5305 {
5306 struct adapter *adapter = context;
5307 struct ixgbe_hw *hw = &adapter->hw;
5308 u32 autoneg;
5309 bool negotiate;
5310
5311 autoneg = hw->phy.autoneg_advertised;
5312 if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
5313 hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate);
5314 else
5315 negotiate = 0;
5316 if (hw->mac.ops.setup_link)
5317 hw->mac.ops.setup_link(hw, autoneg, TRUE);
5318 return;
5319 }
5320
5321 #ifdef IXGBE_FDIR
5322 /*
5323 ** Tasklet for reinitializing the Flow Director filter table
5324 */
5325 static void
5326 ixgbe_reinit_fdir(void *context)
5327 {
5328 struct adapter *adapter = context;
5329 struct ifnet *ifp = adapter->ifp;
5330
5331 if (adapter->fdir_reinit != 1) /* Shouldn't happen */
5332 return;
5333 ixgbe_reinit_fdir_tables_82599(&adapter->hw);
5334 adapter->fdir_reinit = 0;
5335 /* re-enable flow director interrupts */
5336 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_FLOW_DIR);
5337 /* Restart the interface */
5338 ifp->if_flags |= IFF_RUNNING;
5339 return;
5340 }
5341 #endif
5342
5343 /**********************************************************************
5344 *
5345 * Update the board statistics counters.
5346 *
5347 **********************************************************************/
5348 static void
5349 ixgbe_update_stats_counters(struct adapter *adapter)
5350 {
5351 struct ifnet *ifp = adapter->ifp;
5352 struct ixgbe_hw *hw = &adapter->hw;
5353 u32 missed_rx = 0, bprc, lxon, lxoff, total;
5354 u64 total_missed_rx = 0;
5355 uint64_t crcerrs, rlec;
5356
5357 crcerrs = IXGBE_READ_REG(hw, IXGBE_CRCERRS);
5358 adapter->stats.crcerrs.ev_count += crcerrs;
5359 adapter->stats.illerrc.ev_count += IXGBE_READ_REG(hw, IXGBE_ILLERRC);
5360 adapter->stats.errbc.ev_count += IXGBE_READ_REG(hw, IXGBE_ERRBC);
5361 adapter->stats.mspdc.ev_count += IXGBE_READ_REG(hw, IXGBE_MSPDC);
5362
5363 /*
5364 ** Note: these are for the 8 possible traffic classes,
5365 ** which in current implementation is unused,
5366 ** therefore only 0 should read real data.
5367 */
5368 for (int i = 0; i < __arraycount(adapter->stats.mpc); i++) {
5369 int j = i % adapter->num_queues;
5370 u32 mp;
5371 mp = IXGBE_READ_REG(hw, IXGBE_MPC(i));
5372 /* missed_rx tallies misses for the gprc workaround */
5373 missed_rx += mp;
5374 /* global total per queue */
5375 adapter->stats.mpc[j].ev_count += mp;
5376 /* Running comprehensive total for stats display */
5377 total_missed_rx += mp;
5378 if (hw->mac.type == ixgbe_mac_82598EB) {
5379 adapter->stats.rnbc[j] +=
5380 IXGBE_READ_REG(hw, IXGBE_RNBC(i));
5381 adapter->stats.qbtc[j].ev_count +=
5382 IXGBE_READ_REG(hw, IXGBE_QBTC(i));
5383 adapter->stats.qbrc[j].ev_count +=
5384 IXGBE_READ_REG(hw, IXGBE_QBRC(i));
5385 adapter->stats.pxonrxc[j].ev_count +=
5386 IXGBE_READ_REG(hw, IXGBE_PXONRXC(i));
5387 } else {
5388 adapter->stats.pxonrxc[j].ev_count +=
5389 IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i));
5390 }
5391 adapter->stats.pxontxc[j].ev_count +=
5392 IXGBE_READ_REG(hw, IXGBE_PXONTXC(i));
5393 adapter->stats.pxofftxc[j].ev_count +=
5394 IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i));
5395 adapter->stats.pxoffrxc[j].ev_count +=
5396 IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
5397 adapter->stats.pxon2offc[j].ev_count +=
5398 IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i));
5399 }
5400 for (int i = 0; i < __arraycount(adapter->stats.qprc); i++) {
5401 int j = i % adapter->num_queues;
5402 adapter->stats.qprc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
5403 adapter->stats.qptc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
5404 adapter->stats.qprdc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
5405 }
5406 adapter->stats.mlfc.ev_count += IXGBE_READ_REG(hw, IXGBE_MLFC);
5407 adapter->stats.mrfc.ev_count += IXGBE_READ_REG(hw, IXGBE_MRFC);
5408 rlec = IXGBE_READ_REG(hw, IXGBE_RLEC);
5409 adapter->stats.rlec.ev_count += rlec;
5410
5411 /* Hardware workaround, gprc counts missed packets */
5412 adapter->stats.gprc.ev_count += IXGBE_READ_REG(hw, IXGBE_GPRC) - missed_rx;
5413
5414 lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
5415 adapter->stats.lxontxc.ev_count += lxon;
5416 lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
5417 adapter->stats.lxofftxc.ev_count += lxoff;
5418 total = lxon + lxoff;
5419
5420 if (hw->mac.type != ixgbe_mac_82598EB) {
5421 adapter->stats.gorc.ev_count += IXGBE_READ_REG(hw, IXGBE_GORCL) +
5422 ((u64)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32);
5423 adapter->stats.gotc.ev_count += IXGBE_READ_REG(hw, IXGBE_GOTCL) +
5424 ((u64)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32) - total * ETHER_MIN_LEN;
5425 adapter->stats.tor.ev_count += IXGBE_READ_REG(hw, IXGBE_TORL) +
5426 ((u64)IXGBE_READ_REG(hw, IXGBE_TORH) << 32);
5427 adapter->stats.lxonrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
5428 adapter->stats.lxoffrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
5429 } else {
5430 adapter->stats.lxonrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
5431 adapter->stats.lxoffrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
5432 /* 82598 only has a counter in the high register */
5433 adapter->stats.gorc.ev_count += IXGBE_READ_REG(hw, IXGBE_GORCH);
5434 adapter->stats.gotc.ev_count += IXGBE_READ_REG(hw, IXGBE_GOTCH) - total * ETHER_MIN_LEN;
5435 adapter->stats.tor.ev_count += IXGBE_READ_REG(hw, IXGBE_TORH);
5436 }
5437
5438 /*
5439 * Workaround: mprc hardware is incorrectly counting
5440 * broadcasts, so for now we subtract those.
5441 */
5442 bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
5443 adapter->stats.bprc.ev_count += bprc;
5444 adapter->stats.mprc.ev_count += IXGBE_READ_REG(hw, IXGBE_MPRC) - ((hw->mac.type == ixgbe_mac_82598EB) ? bprc : 0);
5445
5446 adapter->stats.prc64.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC64);
5447 adapter->stats.prc127.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC127);
5448 adapter->stats.prc255.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC255);
5449 adapter->stats.prc511.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC511);
5450 adapter->stats.prc1023.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC1023);
5451 adapter->stats.prc1522.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC1522);
5452
5453 adapter->stats.gptc.ev_count += IXGBE_READ_REG(hw, IXGBE_GPTC) - total;
5454 adapter->stats.mptc.ev_count += IXGBE_READ_REG(hw, IXGBE_MPTC) - total;
5455 adapter->stats.ptc64.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC64) - total;
5456
5457 adapter->stats.ruc.ev_count += IXGBE_READ_REG(hw, IXGBE_RUC);
5458 adapter->stats.rfc.ev_count += IXGBE_READ_REG(hw, IXGBE_RFC);
5459 adapter->stats.roc.ev_count += IXGBE_READ_REG(hw, IXGBE_ROC);
5460 adapter->stats.rjc.ev_count += IXGBE_READ_REG(hw, IXGBE_RJC);
5461 adapter->stats.mngprc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPRC);
5462 adapter->stats.mngpdc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPDC);
5463 adapter->stats.mngptc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPTC);
5464 adapter->stats.tpr.ev_count += IXGBE_READ_REG(hw, IXGBE_TPR);
5465 adapter->stats.tpt.ev_count += IXGBE_READ_REG(hw, IXGBE_TPT);
5466 adapter->stats.ptc127.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC127);
5467 adapter->stats.ptc255.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC255);
5468 adapter->stats.ptc511.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC511);
5469 adapter->stats.ptc1023.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC1023);
5470 adapter->stats.ptc1522.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC1522);
5471 adapter->stats.bptc.ev_count += IXGBE_READ_REG(hw, IXGBE_BPTC);
5472 adapter->stats.xec.ev_count += IXGBE_READ_REG(hw, IXGBE_XEC);
5473 adapter->stats.fccrc.ev_count += IXGBE_READ_REG(hw, IXGBE_FCCRC);
5474 adapter->stats.fclast.ev_count += IXGBE_READ_REG(hw, IXGBE_FCLAST);
5475
5476 /* Only read FCOE on 82599 */
5477 if (hw->mac.type != ixgbe_mac_82598EB) {
5478 adapter->stats.fcoerpdc.ev_count +=
5479 IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
5480 adapter->stats.fcoeprc.ev_count +=
5481 IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
5482 adapter->stats.fcoeptc.ev_count +=
5483 IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
5484 adapter->stats.fcoedwrc.ev_count +=
5485 IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
5486 adapter->stats.fcoedwtc.ev_count +=
5487 IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
5488 }
5489
5490 /* Fill out the OS statistics structure */
5491 /*
5492 * NetBSD: Don't override if_{i|o}{packets|bytes|mcasts} with
5493 * adapter->stats counters. It's required to make ifconfig -z
5494 * (SOICZIFDATA) work.
5495 */
5496 ifp->if_collisions = 0;
5497
5498 /* Rx Errors */
5499 ifp->if_iqdrops += total_missed_rx;
5500 ifp->if_ierrors += crcerrs + rlec;
5501 }
5502
5503 /** ixgbe_sysctl_tdh_handler - Handler function
5504 * Retrieves the TDH value from the hardware
5505 */
5506 static int
5507 ixgbe_sysctl_tdh_handler(SYSCTLFN_ARGS)
5508 {
5509 struct sysctlnode node;
5510 uint32_t val;
5511 struct tx_ring *txr;
5512
5513 node = *rnode;
5514 txr = (struct tx_ring *)node.sysctl_data;
5515 if (txr == NULL)
5516 return 0;
5517 val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDH(txr->me));
5518 node.sysctl_data = &val;
5519 return sysctl_lookup(SYSCTLFN_CALL(&node));
5520 }
5521
5522 /** ixgbe_sysctl_tdt_handler - Handler function
5523 * Retrieves the TDT value from the hardware
5524 */
5525 static int
5526 ixgbe_sysctl_tdt_handler(SYSCTLFN_ARGS)
5527 {
5528 struct sysctlnode node;
5529 uint32_t val;
5530 struct tx_ring *txr;
5531
5532 node = *rnode;
5533 txr = (struct tx_ring *)node.sysctl_data;
5534 if (txr == NULL)
5535 return 0;
5536 val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDT(txr->me));
5537 node.sysctl_data = &val;
5538 return sysctl_lookup(SYSCTLFN_CALL(&node));
5539 }
5540
5541 /** ixgbe_sysctl_rdh_handler - Handler function
5542 * Retrieves the RDH value from the hardware
5543 */
5544 static int
5545 ixgbe_sysctl_rdh_handler(SYSCTLFN_ARGS)
5546 {
5547 struct sysctlnode node;
5548 uint32_t val;
5549 struct rx_ring *rxr;
5550
5551 node = *rnode;
5552 rxr = (struct rx_ring *)node.sysctl_data;
5553 if (rxr == NULL)
5554 return 0;
5555 val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDH(rxr->me));
5556 node.sysctl_data = &val;
5557 return sysctl_lookup(SYSCTLFN_CALL(&node));
5558 }
5559
5560 /** ixgbe_sysctl_rdt_handler - Handler function
5561 * Retrieves the RDT value from the hardware
5562 */
5563 static int
5564 ixgbe_sysctl_rdt_handler(SYSCTLFN_ARGS)
5565 {
5566 struct sysctlnode node;
5567 uint32_t val;
5568 struct rx_ring *rxr;
5569
5570 node = *rnode;
5571 rxr = (struct rx_ring *)node.sysctl_data;
5572 if (rxr == NULL)
5573 return 0;
5574 val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDT(rxr->me));
5575 node.sysctl_data = &val;
5576 return sysctl_lookup(SYSCTLFN_CALL(&node));
5577 }
5578
5579 static int
5580 ixgbe_sysctl_interrupt_rate_handler(SYSCTLFN_ARGS)
5581 {
5582 int error;
5583 struct sysctlnode node;
5584 struct ix_queue *que;
5585 uint32_t reg, usec, rate;
5586
5587 node = *rnode;
5588 que = (struct ix_queue *)node.sysctl_data;
5589 if (que == NULL)
5590 return 0;
5591 reg = IXGBE_READ_REG(&que->adapter->hw, IXGBE_EITR(que->msix));
5592 usec = ((reg & 0x0FF8) >> 3);
5593 if (usec > 0)
5594 rate = 500000 / usec;
5595 else
5596 rate = 0;
5597 node.sysctl_data = &rate;
5598 error = sysctl_lookup(SYSCTLFN_CALL(&node));
5599 if (error)
5600 return error;
5601 reg &= ~0xfff; /* default, no limitation */
5602 ixgbe_max_interrupt_rate = 0;
5603 if (rate > 0 && rate < 500000) {
5604 if (rate < 1000)
5605 rate = 1000;
5606 ixgbe_max_interrupt_rate = rate;
5607 reg |= ((4000000/rate) & 0xff8 );
5608 }
5609 IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg);
5610 return 0;
5611 }
5612
5613 const struct sysctlnode *
5614 ixgbe_sysctl_instance(struct adapter *adapter)
5615 {
5616 const char *dvname;
5617 struct sysctllog **log;
5618 int rc;
5619 const struct sysctlnode *rnode;
5620
5621 log = &adapter->sysctllog;
5622 dvname = device_xname(adapter->dev);
5623
5624 if ((rc = sysctl_createv(log, 0, NULL, &rnode,
5625 0, CTLTYPE_NODE, dvname,
5626 SYSCTL_DESCR("ixgbe information and settings"),
5627 NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL)) != 0)
5628 goto err;
5629
5630 return rnode;
5631 err:
5632 printf("%s: sysctl_createv failed, rc = %d\n", __func__, rc);
5633 return NULL;
5634 }
5635
5636 /*
5637 * Add sysctl variables, one per statistic, to the system.
5638 */
5639 static void
5640 ixgbe_add_hw_stats(struct adapter *adapter)
5641 {
5642 device_t dev = adapter->dev;
5643 const struct sysctlnode *rnode, *cnode;
5644 struct sysctllog **log = &adapter->sysctllog;
5645 struct tx_ring *txr = adapter->tx_rings;
5646 struct rx_ring *rxr = adapter->rx_rings;
5647 struct ixgbe_hw_stats *stats = &adapter->stats;
5648
5649 /* Driver Statistics */
5650 #if 0
5651 /* These counters are not updated by the software */
5652 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5653 CTLFLAG_RD, &adapter->dropped_pkts,
5654 "Driver dropped packets");
5655 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_header_failed",
5656 CTLFLAG_RD, &adapter->mbuf_header_failed,
5657 "???");
5658 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_packet_failed",
5659 CTLFLAG_RD, &adapter->mbuf_packet_failed,
5660 "???");
5661 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "no_tx_map_avail",
5662 CTLFLAG_RD, &adapter->no_tx_map_avail,
5663 "???");
5664 #endif
5665 evcnt_attach_dynamic(&adapter->handleq, EVCNT_TYPE_MISC,
5666 NULL, device_xname(dev), "Handled queue in softint");
5667 evcnt_attach_dynamic(&adapter->req, EVCNT_TYPE_MISC,
5668 NULL, device_xname(dev), "Requeued in softint");
5669 evcnt_attach_dynamic(&adapter->morerx, EVCNT_TYPE_MISC,
5670 NULL, device_xname(dev), "Interrupt handler more rx");
5671 evcnt_attach_dynamic(&adapter->moretx, EVCNT_TYPE_MISC,
5672 NULL, device_xname(dev), "Interrupt handler more tx");
5673 evcnt_attach_dynamic(&adapter->txloops, EVCNT_TYPE_MISC,
5674 NULL, device_xname(dev), "Interrupt handler tx loops");
5675 evcnt_attach_dynamic(&adapter->efbig_tx_dma_setup, EVCNT_TYPE_MISC,
5676 NULL, device_xname(dev), "Driver tx dma soft fail EFBIG");
5677 evcnt_attach_dynamic(&adapter->m_defrag_failed, EVCNT_TYPE_MISC,
5678 NULL, device_xname(dev), "m_defrag() failed");
5679 evcnt_attach_dynamic(&adapter->efbig2_tx_dma_setup, EVCNT_TYPE_MISC,
5680 NULL, device_xname(dev), "Driver tx dma hard fail EFBIG");
5681 evcnt_attach_dynamic(&adapter->einval_tx_dma_setup, EVCNT_TYPE_MISC,
5682 NULL, device_xname(dev), "Driver tx dma hard fail EINVAL");
5683 evcnt_attach_dynamic(&adapter->other_tx_dma_setup, EVCNT_TYPE_MISC,
5684 NULL, device_xname(dev), "Driver tx dma hard fail other");
5685 evcnt_attach_dynamic(&adapter->eagain_tx_dma_setup, EVCNT_TYPE_MISC,
5686 NULL, device_xname(dev), "Driver tx dma soft fail EAGAIN");
5687 evcnt_attach_dynamic(&adapter->enomem_tx_dma_setup, EVCNT_TYPE_MISC,
5688 NULL, device_xname(dev), "Driver tx dma soft fail ENOMEM");
5689 evcnt_attach_dynamic(&adapter->watchdog_events, EVCNT_TYPE_MISC,
5690 NULL, device_xname(dev), "Watchdog timeouts");
5691 evcnt_attach_dynamic(&adapter->tso_err, EVCNT_TYPE_MISC,
5692 NULL, device_xname(dev), "TSO errors");
5693 evcnt_attach_dynamic(&adapter->link_irq, EVCNT_TYPE_MISC,
5694 NULL, device_xname(dev), "Link MSIX IRQ Handled");
5695
5696 for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5697 snprintf(adapter->queues[i].evnamebuf,
5698 sizeof(adapter->queues[i].evnamebuf), "%s queue%d",
5699 device_xname(dev), i);
5700 snprintf(adapter->queues[i].namebuf,
5701 sizeof(adapter->queues[i].namebuf), "queue%d", i);
5702
5703 if ((rnode = ixgbe_sysctl_instance(adapter)) == NULL) {
5704 aprint_error_dev(dev, "could not create sysctl root\n");
5705 break;
5706 }
5707
5708 if (sysctl_createv(log, 0, &rnode, &rnode,
5709 0, CTLTYPE_NODE,
5710 adapter->queues[i].namebuf, SYSCTL_DESCR("Queue Name"),
5711 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL) != 0)
5712 break;
5713
5714 if (sysctl_createv(log, 0, &rnode, &cnode,
5715 CTLFLAG_READWRITE, CTLTYPE_INT,
5716 "interrupt_rate", SYSCTL_DESCR("Interrupt Rate"),
5717 ixgbe_sysctl_interrupt_rate_handler, 0,
5718 (void *)&adapter->queues[i], 0, CTL_CREATE, CTL_EOL) != 0)
5719 break;
5720
5721 if (sysctl_createv(log, 0, &rnode, &cnode,
5722 CTLFLAG_READONLY, CTLTYPE_QUAD,
5723 "irqs", SYSCTL_DESCR("irqs on this queue"),
5724 NULL, 0, &(adapter->queues[i].irqs),
5725 0, CTL_CREATE, CTL_EOL) != 0)
5726 break;
5727
5728 if (sysctl_createv(log, 0, &rnode, &cnode,
5729 CTLFLAG_READONLY, CTLTYPE_INT,
5730 "txd_head", SYSCTL_DESCR("Transmit Descriptor Head"),
5731 ixgbe_sysctl_tdh_handler, 0, (void *)txr,
5732 0, CTL_CREATE, CTL_EOL) != 0)
5733 break;
5734
5735 if (sysctl_createv(log, 0, &rnode, &cnode,
5736 CTLFLAG_READONLY, CTLTYPE_INT,
5737 "txd_tail", SYSCTL_DESCR("Transmit Descriptor Tail"),
5738 ixgbe_sysctl_tdt_handler, 0, (void *)txr,
5739 0, CTL_CREATE, CTL_EOL) != 0)
5740 break;
5741
5742 evcnt_attach_dynamic(&txr->tso_tx, EVCNT_TYPE_MISC,
5743 NULL, device_xname(dev), "TSO");
5744 evcnt_attach_dynamic(&txr->no_desc_avail, EVCNT_TYPE_MISC,
5745 NULL, adapter->queues[i].evnamebuf,
5746 "Queue No Descriptor Available");
5747 evcnt_attach_dynamic(&txr->total_packets, EVCNT_TYPE_MISC,
5748 NULL, adapter->queues[i].evnamebuf,
5749 "Queue Packets Transmitted");
5750
5751 #ifdef LRO
5752 struct lro_ctrl *lro = &rxr->lro;
5753 #endif /* LRO */
5754
5755 if (sysctl_createv(log, 0, &rnode, &cnode,
5756 CTLFLAG_READONLY,
5757 CTLTYPE_INT,
5758 "rxd_head", SYSCTL_DESCR("Receive Descriptor Head"),
5759 ixgbe_sysctl_rdh_handler, 0, (void *)rxr, 0,
5760 CTL_CREATE, CTL_EOL) != 0)
5761 break;
5762
5763 if (sysctl_createv(log, 0, &rnode, &cnode,
5764 CTLFLAG_READONLY,
5765 CTLTYPE_INT,
5766 "rxd_tail", SYSCTL_DESCR("Receive Descriptor Tail"),
5767 ixgbe_sysctl_rdt_handler, 0, (void *)rxr, 0,
5768 CTL_CREATE, CTL_EOL) != 0)
5769 break;
5770
5771 if (i < __arraycount(adapter->stats.mpc)) {
5772 evcnt_attach_dynamic(&adapter->stats.mpc[i],
5773 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
5774 "Missed Packet Count");
5775 }
5776 if (i < __arraycount(adapter->stats.pxontxc)) {
5777 evcnt_attach_dynamic(&adapter->stats.pxontxc[i],
5778 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
5779 "pxontxc");
5780 evcnt_attach_dynamic(&adapter->stats.pxonrxc[i],
5781 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
5782 "pxonrxc");
5783 evcnt_attach_dynamic(&adapter->stats.pxofftxc[i],
5784 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
5785 "pxofftxc");
5786 evcnt_attach_dynamic(&adapter->stats.pxoffrxc[i],
5787 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
5788 "pxoffrxc");
5789 evcnt_attach_dynamic(&adapter->stats.pxon2offc[i],
5790 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
5791 "pxon2offc");
5792 }
5793 if (i < __arraycount(adapter->stats.qprc)) {
5794 evcnt_attach_dynamic(&adapter->stats.qprc[i],
5795 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
5796 "qprc");
5797 evcnt_attach_dynamic(&adapter->stats.qptc[i],
5798 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
5799 "qptc");
5800 evcnt_attach_dynamic(&adapter->stats.qbrc[i],
5801 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
5802 "qbrc");
5803 evcnt_attach_dynamic(&adapter->stats.qbtc[i],
5804 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
5805 "qbtc");
5806 evcnt_attach_dynamic(&adapter->stats.qprdc[i],
5807 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
5808 "qprdc");
5809 }
5810
5811 evcnt_attach_dynamic(&rxr->rx_packets, EVCNT_TYPE_MISC,
5812 NULL, adapter->queues[i].evnamebuf, "Queue Packets Received");
5813 evcnt_attach_dynamic(&rxr->rx_bytes, EVCNT_TYPE_MISC,
5814 NULL, adapter->queues[i].evnamebuf, "Queue Bytes Received");
5815 evcnt_attach_dynamic(&rxr->rx_copies, EVCNT_TYPE_MISC,
5816 NULL, adapter->queues[i].evnamebuf, "Copied RX Frames");
5817 evcnt_attach_dynamic(&rxr->no_jmbuf, EVCNT_TYPE_MISC,
5818 NULL, adapter->queues[i].evnamebuf, "Rx no jumbo mbuf");
5819 evcnt_attach_dynamic(&rxr->rx_discarded, EVCNT_TYPE_MISC,
5820 NULL, adapter->queues[i].evnamebuf, "Rx discarded");
5821 evcnt_attach_dynamic(&rxr->rx_irq, EVCNT_TYPE_MISC,
5822 NULL, adapter->queues[i].evnamebuf, "Rx interrupts");
5823 #ifdef LRO
5824 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_queued",
5825 CTLFLAG_RD, &lro->lro_queued, 0,
5826 "LRO Queued");
5827 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_flushed",
5828 CTLFLAG_RD, &lro->lro_flushed, 0,
5829 "LRO Flushed");
5830 #endif /* LRO */
5831 }
5832
5833 /* MAC stats get the own sub node */
5834
5835
5836 snprintf(stats->namebuf,
5837 sizeof(stats->namebuf), "%s MAC Statistics", device_xname(dev));
5838
5839 evcnt_attach_dynamic(&stats->ipcs, EVCNT_TYPE_MISC, NULL,
5840 stats->namebuf, "rx csum offload - IP");
5841 evcnt_attach_dynamic(&stats->l4cs, EVCNT_TYPE_MISC, NULL,
5842 stats->namebuf, "rx csum offload - L4");
5843 evcnt_attach_dynamic(&stats->ipcs_bad, EVCNT_TYPE_MISC, NULL,
5844 stats->namebuf, "rx csum offload - IP bad");
5845 evcnt_attach_dynamic(&stats->l4cs_bad, EVCNT_TYPE_MISC, NULL,
5846 stats->namebuf, "rx csum offload - L4 bad");
5847 evcnt_attach_dynamic(&stats->intzero, EVCNT_TYPE_MISC, NULL,
5848 stats->namebuf, "Interrupt conditions zero");
5849 evcnt_attach_dynamic(&stats->legint, EVCNT_TYPE_MISC, NULL,
5850 stats->namebuf, "Legacy interrupts");
5851 evcnt_attach_dynamic(&stats->crcerrs, EVCNT_TYPE_MISC, NULL,
5852 stats->namebuf, "CRC Errors");
5853 evcnt_attach_dynamic(&stats->illerrc, EVCNT_TYPE_MISC, NULL,
5854 stats->namebuf, "Illegal Byte Errors");
5855 evcnt_attach_dynamic(&stats->errbc, EVCNT_TYPE_MISC, NULL,
5856 stats->namebuf, "Byte Errors");
5857 evcnt_attach_dynamic(&stats->mspdc, EVCNT_TYPE_MISC, NULL,
5858 stats->namebuf, "MAC Short Packets Discarded");
5859 evcnt_attach_dynamic(&stats->mlfc, EVCNT_TYPE_MISC, NULL,
5860 stats->namebuf, "MAC Local Faults");
5861 evcnt_attach_dynamic(&stats->mrfc, EVCNT_TYPE_MISC, NULL,
5862 stats->namebuf, "MAC Remote Faults");
5863 evcnt_attach_dynamic(&stats->rlec, EVCNT_TYPE_MISC, NULL,
5864 stats->namebuf, "Receive Length Errors");
5865 evcnt_attach_dynamic(&stats->lxontxc, EVCNT_TYPE_MISC, NULL,
5866 stats->namebuf, "Link XON Transmitted");
5867 evcnt_attach_dynamic(&stats->lxonrxc, EVCNT_TYPE_MISC, NULL,
5868 stats->namebuf, "Link XON Received");
5869 evcnt_attach_dynamic(&stats->lxofftxc, EVCNT_TYPE_MISC, NULL,
5870 stats->namebuf, "Link XOFF Transmitted");
5871 evcnt_attach_dynamic(&stats->lxoffrxc, EVCNT_TYPE_MISC, NULL,
5872 stats->namebuf, "Link XOFF Received");
5873
5874 /* Packet Reception Stats */
5875 evcnt_attach_dynamic(&stats->tor, EVCNT_TYPE_MISC, NULL,
5876 stats->namebuf, "Total Octets Received");
5877 evcnt_attach_dynamic(&stats->gorc, EVCNT_TYPE_MISC, NULL,
5878 stats->namebuf, "Good Octets Received");
5879 evcnt_attach_dynamic(&stats->tpr, EVCNT_TYPE_MISC, NULL,
5880 stats->namebuf, "Total Packets Received");
5881 evcnt_attach_dynamic(&stats->gprc, EVCNT_TYPE_MISC, NULL,
5882 stats->namebuf, "Good Packets Received");
5883 evcnt_attach_dynamic(&stats->mprc, EVCNT_TYPE_MISC, NULL,
5884 stats->namebuf, "Multicast Packets Received");
5885 evcnt_attach_dynamic(&stats->bprc, EVCNT_TYPE_MISC, NULL,
5886 stats->namebuf, "Broadcast Packets Received");
5887 evcnt_attach_dynamic(&stats->prc64, EVCNT_TYPE_MISC, NULL,
5888 stats->namebuf, "64 byte frames received ");
5889 evcnt_attach_dynamic(&stats->prc127, EVCNT_TYPE_MISC, NULL,
5890 stats->namebuf, "65-127 byte frames received");
5891 evcnt_attach_dynamic(&stats->prc255, EVCNT_TYPE_MISC, NULL,
5892 stats->namebuf, "128-255 byte frames received");
5893 evcnt_attach_dynamic(&stats->prc511, EVCNT_TYPE_MISC, NULL,
5894 stats->namebuf, "256-511 byte frames received");
5895 evcnt_attach_dynamic(&stats->prc1023, EVCNT_TYPE_MISC, NULL,
5896 stats->namebuf, "512-1023 byte frames received");
5897 evcnt_attach_dynamic(&stats->prc1522, EVCNT_TYPE_MISC, NULL,
5898 stats->namebuf, "1023-1522 byte frames received");
5899 evcnt_attach_dynamic(&stats->ruc, EVCNT_TYPE_MISC, NULL,
5900 stats->namebuf, "Receive Undersized");
5901 evcnt_attach_dynamic(&stats->rfc, EVCNT_TYPE_MISC, NULL,
5902 stats->namebuf, "Fragmented Packets Received ");
5903 evcnt_attach_dynamic(&stats->roc, EVCNT_TYPE_MISC, NULL,
5904 stats->namebuf, "Oversized Packets Received");
5905 evcnt_attach_dynamic(&stats->rjc, EVCNT_TYPE_MISC, NULL,
5906 stats->namebuf, "Received Jabber");
5907 evcnt_attach_dynamic(&stats->mngprc, EVCNT_TYPE_MISC, NULL,
5908 stats->namebuf, "Management Packets Received");
5909 evcnt_attach_dynamic(&stats->xec, EVCNT_TYPE_MISC, NULL,
5910 stats->namebuf, "Checksum Errors");
5911
5912 /* Packet Transmission Stats */
5913 evcnt_attach_dynamic(&stats->gotc, EVCNT_TYPE_MISC, NULL,
5914 stats->namebuf, "Good Octets Transmitted");
5915 evcnt_attach_dynamic(&stats->tpt, EVCNT_TYPE_MISC, NULL,
5916 stats->namebuf, "Total Packets Transmitted");
5917 evcnt_attach_dynamic(&stats->gptc, EVCNT_TYPE_MISC, NULL,
5918 stats->namebuf, "Good Packets Transmitted");
5919 evcnt_attach_dynamic(&stats->bptc, EVCNT_TYPE_MISC, NULL,
5920 stats->namebuf, "Broadcast Packets Transmitted");
5921 evcnt_attach_dynamic(&stats->mptc, EVCNT_TYPE_MISC, NULL,
5922 stats->namebuf, "Multicast Packets Transmitted");
5923 evcnt_attach_dynamic(&stats->mngptc, EVCNT_TYPE_MISC, NULL,
5924 stats->namebuf, "Management Packets Transmitted");
5925 evcnt_attach_dynamic(&stats->ptc64, EVCNT_TYPE_MISC, NULL,
5926 stats->namebuf, "64 byte frames transmitted ");
5927 evcnt_attach_dynamic(&stats->ptc127, EVCNT_TYPE_MISC, NULL,
5928 stats->namebuf, "65-127 byte frames transmitted");
5929 evcnt_attach_dynamic(&stats->ptc255, EVCNT_TYPE_MISC, NULL,
5930 stats->namebuf, "128-255 byte frames transmitted");
5931 evcnt_attach_dynamic(&stats->ptc511, EVCNT_TYPE_MISC, NULL,
5932 stats->namebuf, "256-511 byte frames transmitted");
5933 evcnt_attach_dynamic(&stats->ptc1023, EVCNT_TYPE_MISC, NULL,
5934 stats->namebuf, "512-1023 byte frames transmitted");
5935 evcnt_attach_dynamic(&stats->ptc1522, EVCNT_TYPE_MISC, NULL,
5936 stats->namebuf, "1024-1522 byte frames transmitted");
5937 }
5938
5939 /*
5940 ** Set flow control using sysctl:
5941 ** Flow control values:
5942 ** 0 - off
5943 ** 1 - rx pause
5944 ** 2 - tx pause
5945 ** 3 - full
5946 */
5947 static int
5948 ixgbe_set_flowcntl(SYSCTLFN_ARGS)
5949 {
5950 struct sysctlnode node;
5951 int error, last;
5952 struct adapter *adapter;
5953
5954 node = *rnode;
5955 adapter = (struct adapter *)node.sysctl_data;
5956 node.sysctl_data = &adapter->fc;
5957 last = adapter->fc;
5958 error = sysctl_lookup(SYSCTLFN_CALL(&node));
5959 if (error != 0 || newp == NULL)
5960 return error;
5961
5962 /* Don't bother if it's not changed */
5963 if (adapter->fc == last)
5964 return (0);
5965
5966 switch (adapter->fc) {
5967 case ixgbe_fc_rx_pause:
5968 case ixgbe_fc_tx_pause:
5969 case ixgbe_fc_full:
5970 adapter->hw.fc.requested_mode = adapter->fc;
5971 if (adapter->num_queues > 1)
5972 ixgbe_disable_rx_drop(adapter);
5973 break;
5974 case ixgbe_fc_none:
5975 adapter->hw.fc.requested_mode = ixgbe_fc_none;
5976 if (adapter->num_queues > 1)
5977 ixgbe_enable_rx_drop(adapter);
5978 break;
5979 default:
5980 adapter->fc = last;
5981 return (EINVAL);
5982 }
5983 /* Don't autoneg if forcing a value */
5984 adapter->hw.fc.disable_fc_autoneg = TRUE;
5985 ixgbe_fc_enable(&adapter->hw);
5986 return 0;
5987 }
5988
5989 /*
5990 ** Control link advertise speed:
5991 ** 1 - advertise only 1G
5992 ** 2 - advertise 100Mb
5993 ** 3 - advertise normal
5994 */
5995 static int
5996 ixgbe_set_advertise(SYSCTLFN_ARGS)
5997 {
5998 struct sysctlnode node;
5999 int t, error = 0;
6000 struct adapter *adapter;
6001 device_t dev;
6002 struct ixgbe_hw *hw;
6003 ixgbe_link_speed speed, last;
6004
6005 node = *rnode;
6006 adapter = (struct adapter *)node.sysctl_data;
6007 dev = adapter->dev;
6008 hw = &adapter->hw;
6009 last = adapter->advertise;
6010 t = adapter->advertise;
6011 node.sysctl_data = &t;
6012 error = sysctl_lookup(SYSCTLFN_CALL(&node));
6013 if (error != 0 || newp == NULL)
6014 return error;
6015
6016 if (adapter->advertise == last) /* no change */
6017 return (0);
6018
6019 if (t == -1)
6020 return 0;
6021
6022 adapter->advertise = t;
6023
6024 if (!((hw->phy.media_type == ixgbe_media_type_copper) ||
6025 (hw->phy.multispeed_fiber)))
6026 return (EINVAL);
6027
6028 if ((adapter->advertise == 2) && (hw->mac.type != ixgbe_mac_X540)) {
6029 device_printf(dev, "Set Advertise: 100Mb on X540 only\n");
6030 return (EINVAL);
6031 }
6032
6033 if (adapter->advertise == 1)
6034 speed = IXGBE_LINK_SPEED_1GB_FULL;
6035 else if (adapter->advertise == 2)
6036 speed = IXGBE_LINK_SPEED_100_FULL;
6037 else if (adapter->advertise == 3)
6038 speed = IXGBE_LINK_SPEED_1GB_FULL |
6039 IXGBE_LINK_SPEED_10GB_FULL;
6040 else {/* bogus value */
6041 adapter->advertise = last;
6042 return (EINVAL);
6043 }
6044
6045 hw->mac.autotry_restart = TRUE;
6046 hw->mac.ops.setup_link(hw, speed, TRUE);
6047
6048 return 0;
6049 }
6050
6051 /*
6052 ** Thermal Shutdown Trigger
6053 ** - cause a Thermal Overtemp IRQ
6054 */
6055 static int
6056 ixgbe_set_thermal_test(SYSCTLFN_ARGS)
6057 {
6058 struct sysctlnode node;
6059 int error, fire = 0;
6060 struct adapter *adapter;
6061 struct ixgbe_hw *hw;
6062
6063 node = *rnode;
6064 adapter = (struct adapter *)node.sysctl_data;
6065 hw = &adapter->hw;
6066
6067 if (hw->mac.type != ixgbe_mac_X540)
6068 return (0);
6069
6070 node.sysctl_data = &fire;
6071 error = sysctl_lookup(SYSCTLFN_CALL(&node));
6072 if ((error) || (newp == NULL))
6073 return (error);
6074
6075 if (fire) {
6076 u32 reg = IXGBE_READ_REG(hw, IXGBE_EICS);
6077 reg |= IXGBE_EICR_TS;
6078 IXGBE_WRITE_REG(hw, IXGBE_EICS, reg);
6079 }
6080
6081 return (0);
6082 }
6083
6084 /*
6085 ** Enable the hardware to drop packets when the buffer is
6086 ** full. This is useful when multiqueue,so that no single
6087 ** queue being full stalls the entire RX engine. We only
6088 ** enable this when Multiqueue AND when Flow Control is
6089 ** disabled.
6090 */
6091 static void
6092 ixgbe_enable_rx_drop(struct adapter *adapter)
6093 {
6094 struct ixgbe_hw *hw = &adapter->hw;
6095
6096 for (int i = 0; i < adapter->num_queues; i++) {
6097 u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
6098 srrctl |= IXGBE_SRRCTL_DROP_EN;
6099 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
6100 }
6101 }
6102
6103 static void
6104 ixgbe_disable_rx_drop(struct adapter *adapter)
6105 {
6106 struct ixgbe_hw *hw = &adapter->hw;
6107
6108 for (int i = 0; i < adapter->num_queues; i++) {
6109 u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
6110 srrctl &= ~IXGBE_SRRCTL_DROP_EN;
6111 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
6112 }
6113 }
6114