ixgbe.c revision 1.29 1 /******************************************************************************
2
3 Copyright (c) 2001-2013, Intel Corporation
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*
34 * Copyright (c) 2011 The NetBSD Foundation, Inc.
35 * All rights reserved.
36 *
37 * This code is derived from software contributed to The NetBSD Foundation
38 * by Coyote Point Systems, Inc.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
50 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
51 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
52 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
53 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
54 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
55 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
56 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
57 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
58 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
59 * POSSIBILITY OF SUCH DAMAGE.
60 */
61 /*$FreeBSD: head/sys/dev/ixgbe/ixgbe.c 250108 2013-04-30 16:18:29Z luigi $*/
62 /*$NetBSD: ixgbe.c,v 1.29 2015/05/06 09:21:22 msaitoh Exp $*/
63
64 #include "opt_inet.h"
65 #include "opt_inet6.h"
66
67 #include "ixgbe.h"
68 #include "vlan.h"
69
70 /*********************************************************************
71 * Set this to one to display debug statistics
72 *********************************************************************/
73 int ixgbe_display_debug_stats = 0;
74
75 /*********************************************************************
76 * Driver version
77 *********************************************************************/
78 char ixgbe_driver_version[] = "2.5.8 - HEAD";
79
80 /*********************************************************************
81 * PCI Device ID Table
82 *
83 * Used by probe to select devices to load on
84 * Last field stores an index into ixgbe_strings
85 * Last entry must be all 0s
86 *
87 * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
88 *********************************************************************/
89
90 static ixgbe_vendor_info_t ixgbe_vendor_info_array[] =
91 {
92 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, 0, 0, 0},
93 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, 0, 0, 0},
94 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, 0, 0, 0},
95 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, 0, 0, 0},
96 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2, 0, 0, 0},
97 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, 0, 0, 0},
98 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, 0, 0, 0},
99 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, 0, 0, 0},
100 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, 0, 0, 0},
101 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, 0, 0, 0},
102 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, 0, 0, 0},
103 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, 0, 0, 0},
104 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ, 0, 0, 0},
105 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, 0, 0, 0},
106 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, 0, 0, 0},
107 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, 0, 0, 0},
108 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, 0, 0, 0},
109 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, 0, 0, 0},
110 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE, 0, 0, 0},
111 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2, 0, 0, 0},
112 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, 0, 0, 0},
113 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, 0, 0, 0},
114 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF_QP, 0, 0, 0},
115 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, 0, 0, 0},
116 /* required last entry */
117 {0, 0, 0, 0, 0}
118 };
119
120 /*********************************************************************
121 * Table of branding strings
122 *********************************************************************/
123
124 static const char *ixgbe_strings[] = {
125 "Intel(R) PRO/10GbE PCI-Express Network Driver"
126 };
127
128 /*********************************************************************
129 * Function prototypes
130 *********************************************************************/
131 static int ixgbe_probe(device_t, cfdata_t, void *);
132 static void ixgbe_attach(device_t, device_t, void *);
133 static int ixgbe_detach(device_t, int);
134 #if 0
135 static int ixgbe_shutdown(device_t);
136 #endif
137 #if IXGBE_LEGACY_TX
138 static void ixgbe_start(struct ifnet *);
139 static void ixgbe_start_locked(struct tx_ring *, struct ifnet *);
140 #else
141 static int ixgbe_mq_start(struct ifnet *, struct mbuf *);
142 static int ixgbe_mq_start_locked(struct ifnet *,
143 struct tx_ring *, struct mbuf *);
144 static void ixgbe_qflush(struct ifnet *);
145 static void ixgbe_deferred_mq_start(void *);
146 #endif
147 static int ixgbe_ioctl(struct ifnet *, u_long, void *);
148 static void ixgbe_ifstop(struct ifnet *, int);
149 static int ixgbe_init(struct ifnet *);
150 static void ixgbe_init_locked(struct adapter *);
151 static void ixgbe_stop(void *);
152 static void ixgbe_media_status(struct ifnet *, struct ifmediareq *);
153 static int ixgbe_media_change(struct ifnet *);
154 static void ixgbe_identify_hardware(struct adapter *);
155 static int ixgbe_allocate_pci_resources(struct adapter *,
156 const struct pci_attach_args *);
157 static int ixgbe_allocate_msix(struct adapter *,
158 const struct pci_attach_args *);
159 static int ixgbe_allocate_legacy(struct adapter *,
160 const struct pci_attach_args *);
161 static int ixgbe_allocate_queues(struct adapter *);
162 static int ixgbe_setup_msix(struct adapter *);
163 static void ixgbe_free_pci_resources(struct adapter *);
164 static void ixgbe_local_timer(void *);
165 static int ixgbe_setup_interface(device_t, struct adapter *);
166 static void ixgbe_config_link(struct adapter *);
167
168 static int ixgbe_allocate_transmit_buffers(struct tx_ring *);
169 static int ixgbe_setup_transmit_structures(struct adapter *);
170 static void ixgbe_setup_transmit_ring(struct tx_ring *);
171 static void ixgbe_initialize_transmit_units(struct adapter *);
172 static void ixgbe_free_transmit_structures(struct adapter *);
173 static void ixgbe_free_transmit_buffers(struct tx_ring *);
174
175 static int ixgbe_allocate_receive_buffers(struct rx_ring *);
176 static int ixgbe_setup_receive_structures(struct adapter *);
177 static int ixgbe_setup_receive_ring(struct rx_ring *);
178 static void ixgbe_initialize_receive_units(struct adapter *);
179 static void ixgbe_free_receive_structures(struct adapter *);
180 static void ixgbe_free_receive_buffers(struct rx_ring *);
181 static void ixgbe_setup_hw_rsc(struct rx_ring *);
182
183 static void ixgbe_enable_intr(struct adapter *);
184 static void ixgbe_disable_intr(struct adapter *);
185 static void ixgbe_update_stats_counters(struct adapter *);
186 static bool ixgbe_txeof(struct tx_ring *);
187 static bool ixgbe_rxeof(struct ix_queue *);
188 static void ixgbe_rx_checksum(u32, struct mbuf *, u32,
189 struct ixgbe_hw_stats *);
190 static void ixgbe_set_promisc(struct adapter *);
191 static void ixgbe_set_multi(struct adapter *);
192 static void ixgbe_update_link_status(struct adapter *);
193 static void ixgbe_refresh_mbufs(struct rx_ring *, int);
194 static int ixgbe_xmit(struct tx_ring *, struct mbuf *);
195 static int ixgbe_set_flowcntl(SYSCTLFN_PROTO);
196 static int ixgbe_set_advertise(SYSCTLFN_PROTO);
197 static int ixgbe_set_thermal_test(SYSCTLFN_PROTO);
198 static int ixgbe_dma_malloc(struct adapter *, bus_size_t,
199 struct ixgbe_dma_alloc *, int);
200 static void ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
201 static int ixgbe_tx_ctx_setup(struct tx_ring *,
202 struct mbuf *, u32 *, u32 *);
203 static int ixgbe_tso_setup(struct tx_ring *,
204 struct mbuf *, u32 *, u32 *);
205 static void ixgbe_set_ivar(struct adapter *, u8, u8, s8);
206 static void ixgbe_configure_ivars(struct adapter *);
207 static u8 * ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *);
208
209 static void ixgbe_setup_vlan_hw_support(struct adapter *);
210 #if 0
211 static void ixgbe_register_vlan(void *, struct ifnet *, u16);
212 static void ixgbe_unregister_vlan(void *, struct ifnet *, u16);
213 #endif
214
215 static void ixgbe_add_hw_stats(struct adapter *adapter);
216
217 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
218 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
219 struct mbuf *, u32);
220
221 static void ixgbe_enable_rx_drop(struct adapter *);
222 static void ixgbe_disable_rx_drop(struct adapter *);
223
224 /* Support for pluggable optic modules */
225 static bool ixgbe_sfp_probe(struct adapter *);
226 static void ixgbe_setup_optics(struct adapter *);
227
228 /* Legacy (single vector interrupt handler */
229 static int ixgbe_legacy_irq(void *);
230
231 #if defined(NETBSD_MSI_OR_MSIX)
232 /* The MSI/X Interrupt handlers */
233 static void ixgbe_msix_que(void *);
234 static void ixgbe_msix_link(void *);
235 #endif
236
237 /* Software interrupts for deferred work */
238 static void ixgbe_handle_que(void *);
239 static void ixgbe_handle_link(void *);
240 static void ixgbe_handle_msf(void *);
241 static void ixgbe_handle_mod(void *);
242
243 const struct sysctlnode *ixgbe_sysctl_instance(struct adapter *);
244 static ixgbe_vendor_info_t *ixgbe_lookup(const struct pci_attach_args *);
245
246 #ifdef IXGBE_FDIR
247 static void ixgbe_atr(struct tx_ring *, struct mbuf *);
248 static void ixgbe_reinit_fdir(void *, int);
249 #endif
250
251 /*********************************************************************
252 * FreeBSD Device Interface Entry Points
253 *********************************************************************/
254
255 CFATTACH_DECL3_NEW(ixg, sizeof(struct adapter),
256 ixgbe_probe, ixgbe_attach, ixgbe_detach, NULL, NULL, NULL,
257 DVF_DETACH_SHUTDOWN);
258
259 #if 0
260 devclass_t ixgbe_devclass;
261 DRIVER_MODULE(ixgbe, pci, ixgbe_driver, ixgbe_devclass, 0, 0);
262
263 MODULE_DEPEND(ixgbe, pci, 1, 1, 1);
264 MODULE_DEPEND(ixgbe, ether, 1, 1, 1);
265 #endif
266
267 /*
268 ** TUNEABLE PARAMETERS:
269 */
270
271 /*
272 ** AIM: Adaptive Interrupt Moderation
273 ** which means that the interrupt rate
274 ** is varied over time based on the
275 ** traffic for that interrupt vector
276 */
277 static int ixgbe_enable_aim = TRUE;
278 #define TUNABLE_INT(__x, __y)
279 TUNABLE_INT("hw.ixgbe.enable_aim", &ixgbe_enable_aim);
280
281 static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY);
282 TUNABLE_INT("hw.ixgbe.max_interrupt_rate", &ixgbe_max_interrupt_rate);
283
284 /* How many packets rxeof tries to clean at a time */
285 static int ixgbe_rx_process_limit = 256;
286 TUNABLE_INT("hw.ixgbe.rx_process_limit", &ixgbe_rx_process_limit);
287
288 /* How many packets txeof tries to clean at a time */
289 static int ixgbe_tx_process_limit = 256;
290 TUNABLE_INT("hw.ixgbe.tx_process_limit", &ixgbe_tx_process_limit);
291
292 /*
293 ** Smart speed setting, default to on
294 ** this only works as a compile option
295 ** right now as its during attach, set
296 ** this to 'ixgbe_smart_speed_off' to
297 ** disable.
298 */
299 static int ixgbe_smart_speed = ixgbe_smart_speed_on;
300
301 /*
302 * MSIX should be the default for best performance,
303 * but this allows it to be forced off for testing.
304 */
305 static int ixgbe_enable_msix = 1;
306 TUNABLE_INT("hw.ixgbe.enable_msix", &ixgbe_enable_msix);
307
308 #if defined(NETBSD_MSI_OR_MSIX)
309 /*
310 * Number of Queues, can be set to 0,
311 * it then autoconfigures based on the
312 * number of cpus with a max of 8. This
313 * can be overriden manually here.
314 */
315 static int ixgbe_num_queues = 0;
316 TUNABLE_INT("hw.ixgbe.num_queues", &ixgbe_num_queues);
317 #endif
318
319 /*
320 ** Number of TX descriptors per ring,
321 ** setting higher than RX as this seems
322 ** the better performing choice.
323 */
324 static int ixgbe_txd = PERFORM_TXD;
325 TUNABLE_INT("hw.ixgbe.txd", &ixgbe_txd);
326
327 /* Number of RX descriptors per ring */
328 static int ixgbe_rxd = PERFORM_RXD;
329 TUNABLE_INT("hw.ixgbe.rxd", &ixgbe_rxd);
330
331 /*
332 ** HW RSC control:
333 ** this feature only works with
334 ** IPv4, and only on 82599 and later.
335 ** Also this will cause IP forwarding to
336 ** fail and that can't be controlled by
337 ** the stack as LRO can. For all these
338 ** reasons I've deemed it best to leave
339 ** this off and not bother with a tuneable
340 ** interface, this would need to be compiled
341 ** to enable.
342 */
343 static bool ixgbe_rsc_enable = FALSE;
344
345 /* Keep running tab on them for sanity check */
346 static int ixgbe_total_ports;
347
348 #ifdef IXGBE_FDIR
349 /*
350 ** For Flow Director: this is the
351 ** number of TX packets we sample
352 ** for the filter pool, this means
353 ** every 20th packet will be probed.
354 **
355 ** This feature can be disabled by
356 ** setting this to 0.
357 */
358 static int atr_sample_rate = 20;
359 /*
360 ** Flow Director actually 'steals'
361 ** part of the packet buffer as its
362 ** filter pool, this variable controls
363 ** how much it uses:
364 ** 0 = 64K, 1 = 128K, 2 = 256K
365 */
366 static int fdir_pballoc = 1;
367 #endif
368
369 #ifdef DEV_NETMAP
370 /*
371 * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
372 * be a reference on how to implement netmap support in a driver.
373 * Additional comments are in ixgbe_netmap.h .
374 *
375 * <dev/netmap/ixgbe_netmap.h> contains functions for netmap support
376 * that extend the standard driver.
377 */
378 #include <dev/netmap/ixgbe_netmap.h>
379 #endif /* DEV_NETMAP */
380
381 /*********************************************************************
382 * Device identification routine
383 *
384 * ixgbe_probe determines if the driver should be loaded on
385 * adapter based on PCI vendor/device id of the adapter.
386 *
387 * return 1 on success, 0 on failure
388 *********************************************************************/
389
390 static int
391 ixgbe_probe(device_t dev, cfdata_t cf, void *aux)
392 {
393 const struct pci_attach_args *pa = aux;
394
395 return (ixgbe_lookup(pa) != NULL) ? 1 : 0;
396 }
397
398 static ixgbe_vendor_info_t *
399 ixgbe_lookup(const struct pci_attach_args *pa)
400 {
401 pcireg_t subid;
402 ixgbe_vendor_info_t *ent;
403
404 INIT_DEBUGOUT("ixgbe_probe: begin");
405
406 if (PCI_VENDOR(pa->pa_id) != IXGBE_INTEL_VENDOR_ID)
407 return NULL;
408
409 subid = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_SUBSYS_ID_REG);
410
411 for (ent = ixgbe_vendor_info_array; ent->vendor_id != 0; ent++) {
412 if (PCI_VENDOR(pa->pa_id) == ent->vendor_id &&
413 PCI_PRODUCT(pa->pa_id) == ent->device_id &&
414
415 (PCI_SUBSYS_VENDOR(subid) == ent->subvendor_id ||
416 ent->subvendor_id == 0) &&
417
418 (PCI_SUBSYS_ID(subid) == ent->subdevice_id ||
419 ent->subdevice_id == 0)) {
420 ++ixgbe_total_ports;
421 return ent;
422 }
423 }
424 return NULL;
425 }
426
427
428 static void
429 ixgbe_sysctl_attach(struct adapter *adapter)
430 {
431 struct sysctllog **log;
432 const struct sysctlnode *rnode, *cnode;
433 device_t dev;
434
435 dev = adapter->dev;
436 log = &adapter->sysctllog;
437
438 if ((rnode = ixgbe_sysctl_instance(adapter)) == NULL) {
439 aprint_error_dev(dev, "could not create sysctl root\n");
440 return;
441 }
442
443 if (sysctl_createv(log, 0, &rnode, &cnode,
444 CTLFLAG_READONLY, CTLTYPE_INT,
445 "num_rx_desc", SYSCTL_DESCR("Number of rx descriptors"),
446 NULL, 0, &adapter->num_rx_desc, 0, CTL_CREATE, CTL_EOL) != 0)
447 aprint_error_dev(dev, "could not create sysctl\n");
448
449 if (sysctl_createv(log, 0, &rnode, &cnode,
450 CTLFLAG_READONLY, CTLTYPE_INT,
451 "num_queues", SYSCTL_DESCR("Number of queues"),
452 NULL, 0, &adapter->num_queues, 0, CTL_CREATE, CTL_EOL) != 0)
453 aprint_error_dev(dev, "could not create sysctl\n");
454
455 if (sysctl_createv(log, 0, &rnode, &cnode,
456 CTLFLAG_READWRITE, CTLTYPE_INT,
457 "fc", SYSCTL_DESCR("Flow Control"),
458 ixgbe_set_flowcntl, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0)
459 aprint_error_dev(dev, "could not create sysctl\n");
460
461 /* XXX This is an *instance* sysctl controlling a *global* variable.
462 * XXX It's that way in the FreeBSD driver that this derives from.
463 */
464 if (sysctl_createv(log, 0, &rnode, &cnode,
465 CTLFLAG_READWRITE, CTLTYPE_INT,
466 "enable_aim", SYSCTL_DESCR("Interrupt Moderation"),
467 NULL, 0, &ixgbe_enable_aim, 0, CTL_CREATE, CTL_EOL) != 0)
468 aprint_error_dev(dev, "could not create sysctl\n");
469
470 if (sysctl_createv(log, 0, &rnode, &cnode,
471 CTLFLAG_READWRITE, CTLTYPE_INT,
472 "advertise_speed", SYSCTL_DESCR("Link Speed"),
473 ixgbe_set_advertise, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0)
474 aprint_error_dev(dev, "could not create sysctl\n");
475
476 if (sysctl_createv(log, 0, &rnode, &cnode,
477 CTLFLAG_READWRITE, CTLTYPE_INT,
478 "ts", SYSCTL_DESCR("Thermal Test"),
479 ixgbe_set_thermal_test, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0)
480 aprint_error_dev(dev, "could not create sysctl\n");
481 }
482
483 /*********************************************************************
484 * Device initialization routine
485 *
486 * The attach entry point is called when the driver is being loaded.
487 * This routine identifies the type of hardware, allocates all resources
488 * and initializes the hardware.
489 *
490 * return 0 on success, positive on failure
491 *********************************************************************/
492
493 static void
494 ixgbe_attach(device_t parent, device_t dev, void *aux)
495 {
496 struct adapter *adapter;
497 struct ixgbe_hw *hw;
498 int error = 0;
499 u16 csum;
500 u32 ctrl_ext;
501 ixgbe_vendor_info_t *ent;
502 const struct pci_attach_args *pa = aux;
503
504 INIT_DEBUGOUT("ixgbe_attach: begin");
505
506 /* Allocate, clear, and link in our adapter structure */
507 adapter = device_private(dev);
508 adapter->dev = adapter->osdep.dev = dev;
509 hw = &adapter->hw;
510 adapter->osdep.pc = pa->pa_pc;
511 adapter->osdep.tag = pa->pa_tag;
512 adapter->osdep.dmat = pa->pa_dmat;
513
514 ent = ixgbe_lookup(pa);
515
516 KASSERT(ent != NULL);
517
518 aprint_normal(": %s, Version - %s\n",
519 ixgbe_strings[ent->index], ixgbe_driver_version);
520
521 /* Core Lock Init*/
522 IXGBE_CORE_LOCK_INIT(adapter, device_xname(dev));
523
524 /* SYSCTL APIs */
525
526 ixgbe_sysctl_attach(adapter);
527
528 /* Set up the timer callout */
529 callout_init(&adapter->timer, 0);
530
531 /* Determine hardware revision */
532 ixgbe_identify_hardware(adapter);
533
534 /* Do base PCI setup - map BAR0 */
535 if (ixgbe_allocate_pci_resources(adapter, pa)) {
536 aprint_error_dev(dev, "Allocation of PCI resources failed\n");
537 error = ENXIO;
538 goto err_out;
539 }
540
541 /* Do descriptor calc and sanity checks */
542 if (((ixgbe_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 ||
543 ixgbe_txd < MIN_TXD || ixgbe_txd > MAX_TXD) {
544 aprint_error_dev(dev, "TXD config issue, using default!\n");
545 adapter->num_tx_desc = DEFAULT_TXD;
546 } else
547 adapter->num_tx_desc = ixgbe_txd;
548
549 /*
550 ** With many RX rings it is easy to exceed the
551 ** system mbuf allocation. Tuning nmbclusters
552 ** can alleviate this.
553 */
554 if (nmbclusters > 0 ) {
555 int s;
556 s = (ixgbe_rxd * adapter->num_queues) * ixgbe_total_ports;
557 if (s > nmbclusters) {
558 aprint_error_dev(dev, "RX Descriptors exceed "
559 "system mbuf max, using default instead!\n");
560 ixgbe_rxd = DEFAULT_RXD;
561 }
562 }
563
564 if (((ixgbe_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 ||
565 ixgbe_rxd < MIN_TXD || ixgbe_rxd > MAX_TXD) {
566 aprint_error_dev(dev, "RXD config issue, using default!\n");
567 adapter->num_rx_desc = DEFAULT_RXD;
568 } else
569 adapter->num_rx_desc = ixgbe_rxd;
570
571 /* Allocate our TX/RX Queues */
572 if (ixgbe_allocate_queues(adapter)) {
573 error = ENOMEM;
574 goto err_out;
575 }
576
577 /* Allocate multicast array memory. */
578 adapter->mta = malloc(sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
579 MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
580 if (adapter->mta == NULL) {
581 aprint_error_dev(dev, "Cannot allocate multicast setup array\n");
582 error = ENOMEM;
583 goto err_late;
584 }
585
586 /* Initialize the shared code */
587 error = ixgbe_init_shared_code(hw);
588 if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
589 /*
590 ** No optics in this port, set up
591 ** so the timer routine will probe
592 ** for later insertion.
593 */
594 adapter->sfp_probe = TRUE;
595 error = 0;
596 } else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
597 aprint_error_dev(dev,"Unsupported SFP+ module detected!\n");
598 error = EIO;
599 goto err_late;
600 } else if (error) {
601 aprint_error_dev(dev,"Unable to initialize the shared code\n");
602 error = EIO;
603 goto err_late;
604 }
605
606 /* Make sure we have a good EEPROM before we read from it */
607 if (ixgbe_validate_eeprom_checksum(&adapter->hw, &csum) < 0) {
608 aprint_error_dev(dev,"The EEPROM Checksum Is Not Valid\n");
609 error = EIO;
610 goto err_late;
611 }
612
613 error = ixgbe_init_hw(hw);
614 switch (error) {
615 case IXGBE_ERR_EEPROM_VERSION:
616 aprint_error_dev(dev, "This device is a pre-production adapter/"
617 "LOM. Please be aware there may be issues associated "
618 "with your hardware.\n If you are experiencing problems "
619 "please contact your Intel or hardware representative "
620 "who provided you with this hardware.\n");
621 break;
622 case IXGBE_ERR_SFP_NOT_SUPPORTED:
623 aprint_error_dev(dev,"Unsupported SFP+ Module\n");
624 error = EIO;
625 aprint_error_dev(dev,"Hardware Initialization Failure\n");
626 goto err_late;
627 case IXGBE_ERR_SFP_NOT_PRESENT:
628 device_printf(dev,"No SFP+ Module found\n");
629 /* falls thru */
630 default:
631 break;
632 }
633
634 /* Detect and set physical type */
635 ixgbe_setup_optics(adapter);
636
637 if ((adapter->msix > 1) && (ixgbe_enable_msix))
638 error = ixgbe_allocate_msix(adapter, pa);
639 else
640 error = ixgbe_allocate_legacy(adapter, pa);
641 if (error)
642 goto err_late;
643
644 /* Setup OS specific network interface */
645 if (ixgbe_setup_interface(dev, adapter) != 0)
646 goto err_late;
647
648 /* Initialize statistics */
649 ixgbe_update_stats_counters(adapter);
650
651 /* Print PCIE bus type/speed/width info */
652 ixgbe_get_bus_info(hw);
653 aprint_normal_dev(dev,"PCI Express Bus: Speed %s %s\n",
654 ((hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0Gb/s":
655 (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5Gb/s":"Unknown"),
656 (hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" :
657 (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" :
658 (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" :
659 ("Unknown"));
660
661 if ((hw->bus.width <= ixgbe_bus_width_pcie_x4) &&
662 (hw->bus.speed == ixgbe_bus_speed_2500)) {
663 aprint_error_dev(dev, "PCI-Express bandwidth available"
664 " for this card\n is not sufficient for"
665 " optimal performance.\n");
666 aprint_error_dev(dev, "For optimal performance a x8 "
667 "PCIE, or x4 PCIE 2 slot is required.\n");
668 }
669
670 /* Set an initial default flow control value */
671 adapter->fc = ixgbe_fc_full;
672
673 /* let hardware know driver is loaded */
674 ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
675 ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
676 IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
677
678 ixgbe_add_hw_stats(adapter);
679
680 #ifdef DEV_NETMAP
681 ixgbe_netmap_attach(adapter);
682 #endif /* DEV_NETMAP */
683 INIT_DEBUGOUT("ixgbe_attach: end");
684 return;
685 err_late:
686 ixgbe_free_transmit_structures(adapter);
687 ixgbe_free_receive_structures(adapter);
688 err_out:
689 if (adapter->ifp != NULL)
690 if_free(adapter->ifp);
691 ixgbe_free_pci_resources(adapter);
692 if (adapter->mta != NULL)
693 free(adapter->mta, M_DEVBUF);
694 return;
695
696 }
697
698 /*********************************************************************
699 * Device removal routine
700 *
701 * The detach entry point is called when the driver is being removed.
702 * This routine stops the adapter and deallocates all the resources
703 * that were allocated for driver operation.
704 *
705 * return 0 on success, positive on failure
706 *********************************************************************/
707
708 static int
709 ixgbe_detach(device_t dev, int flags)
710 {
711 struct adapter *adapter = device_private(dev);
712 struct rx_ring *rxr = adapter->rx_rings;
713 struct ixgbe_hw_stats *stats = &adapter->stats;
714 struct ix_queue *que = adapter->queues;
715 struct tx_ring *txr = adapter->tx_rings;
716 u32 ctrl_ext;
717
718 INIT_DEBUGOUT("ixgbe_detach: begin");
719
720 #if NVLAN > 0
721 /* Make sure VLANs are not using driver */
722 if (!VLAN_ATTACHED(&adapter->osdep.ec))
723 ; /* nothing to do: no VLANs */
724 else if ((flags & (DETACH_SHUTDOWN|DETACH_FORCE)) != 0)
725 vlan_ifdetach(adapter->ifp);
726 else {
727 aprint_error_dev(dev, "VLANs in use\n");
728 return EBUSY;
729 }
730 #endif
731
732 IXGBE_CORE_LOCK(adapter);
733 ixgbe_stop(adapter);
734 IXGBE_CORE_UNLOCK(adapter);
735
736 for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
737 #ifndef IXGBE_LEGACY_TX
738 softint_disestablish(txr->txq_si);
739 #endif
740 softint_disestablish(que->que_si);
741 }
742
743 /* Drain the Link queue */
744 softint_disestablish(adapter->link_si);
745 softint_disestablish(adapter->mod_si);
746 softint_disestablish(adapter->msf_si);
747 #ifdef IXGBE_FDIR
748 softint_disestablish(adapter->fdir_si);
749 #endif
750
751 /* let hardware know driver is unloading */
752 ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
753 ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
754 IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext);
755
756 ether_ifdetach(adapter->ifp);
757 callout_halt(&adapter->timer, NULL);
758 #ifdef DEV_NETMAP
759 netmap_detach(adapter->ifp);
760 #endif /* DEV_NETMAP */
761 ixgbe_free_pci_resources(adapter);
762 #if 0 /* XXX the NetBSD port is probably missing something here */
763 bus_generic_detach(dev);
764 #endif
765 if_detach(adapter->ifp);
766
767 sysctl_teardown(&adapter->sysctllog);
768 evcnt_detach(&adapter->handleq);
769 evcnt_detach(&adapter->req);
770 evcnt_detach(&adapter->morerx);
771 evcnt_detach(&adapter->moretx);
772 evcnt_detach(&adapter->txloops);
773 evcnt_detach(&adapter->efbig_tx_dma_setup);
774 evcnt_detach(&adapter->m_defrag_failed);
775 evcnt_detach(&adapter->efbig2_tx_dma_setup);
776 evcnt_detach(&adapter->einval_tx_dma_setup);
777 evcnt_detach(&adapter->other_tx_dma_setup);
778 evcnt_detach(&adapter->eagain_tx_dma_setup);
779 evcnt_detach(&adapter->enomem_tx_dma_setup);
780 evcnt_detach(&adapter->watchdog_events);
781 evcnt_detach(&adapter->tso_err);
782 evcnt_detach(&adapter->link_irq);
783
784 txr = adapter->tx_rings;
785 for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
786 evcnt_detach(&txr->no_desc_avail);
787 evcnt_detach(&txr->total_packets);
788 evcnt_detach(&txr->tso_tx);
789
790 if (i < __arraycount(adapter->stats.mpc)) {
791 evcnt_detach(&adapter->stats.mpc[i]);
792 }
793 if (i < __arraycount(adapter->stats.pxontxc)) {
794 evcnt_detach(&adapter->stats.pxontxc[i]);
795 evcnt_detach(&adapter->stats.pxonrxc[i]);
796 evcnt_detach(&adapter->stats.pxofftxc[i]);
797 evcnt_detach(&adapter->stats.pxoffrxc[i]);
798 evcnt_detach(&adapter->stats.pxon2offc[i]);
799 }
800 if (i < __arraycount(adapter->stats.qprc)) {
801 evcnt_detach(&adapter->stats.qprc[i]);
802 evcnt_detach(&adapter->stats.qptc[i]);
803 evcnt_detach(&adapter->stats.qbrc[i]);
804 evcnt_detach(&adapter->stats.qbtc[i]);
805 evcnt_detach(&adapter->stats.qprdc[i]);
806 }
807
808 evcnt_detach(&rxr->rx_packets);
809 evcnt_detach(&rxr->rx_bytes);
810 evcnt_detach(&rxr->no_jmbuf);
811 evcnt_detach(&rxr->rx_discarded);
812 evcnt_detach(&rxr->rx_irq);
813 }
814 evcnt_detach(&stats->ipcs);
815 evcnt_detach(&stats->l4cs);
816 evcnt_detach(&stats->ipcs_bad);
817 evcnt_detach(&stats->l4cs_bad);
818 evcnt_detach(&stats->intzero);
819 evcnt_detach(&stats->legint);
820 evcnt_detach(&stats->crcerrs);
821 evcnt_detach(&stats->illerrc);
822 evcnt_detach(&stats->errbc);
823 evcnt_detach(&stats->mspdc);
824 evcnt_detach(&stats->mlfc);
825 evcnt_detach(&stats->mrfc);
826 evcnt_detach(&stats->rlec);
827 evcnt_detach(&stats->lxontxc);
828 evcnt_detach(&stats->lxonrxc);
829 evcnt_detach(&stats->lxofftxc);
830 evcnt_detach(&stats->lxoffrxc);
831
832 /* Packet Reception Stats */
833 evcnt_detach(&stats->tor);
834 evcnt_detach(&stats->gorc);
835 evcnt_detach(&stats->tpr);
836 evcnt_detach(&stats->gprc);
837 evcnt_detach(&stats->mprc);
838 evcnt_detach(&stats->bprc);
839 evcnt_detach(&stats->prc64);
840 evcnt_detach(&stats->prc127);
841 evcnt_detach(&stats->prc255);
842 evcnt_detach(&stats->prc511);
843 evcnt_detach(&stats->prc1023);
844 evcnt_detach(&stats->prc1522);
845 evcnt_detach(&stats->ruc);
846 evcnt_detach(&stats->rfc);
847 evcnt_detach(&stats->roc);
848 evcnt_detach(&stats->rjc);
849 evcnt_detach(&stats->mngprc);
850 evcnt_detach(&stats->xec);
851
852 /* Packet Transmission Stats */
853 evcnt_detach(&stats->gotc);
854 evcnt_detach(&stats->tpt);
855 evcnt_detach(&stats->gptc);
856 evcnt_detach(&stats->bptc);
857 evcnt_detach(&stats->mptc);
858 evcnt_detach(&stats->mngptc);
859 evcnt_detach(&stats->ptc64);
860 evcnt_detach(&stats->ptc127);
861 evcnt_detach(&stats->ptc255);
862 evcnt_detach(&stats->ptc511);
863 evcnt_detach(&stats->ptc1023);
864 evcnt_detach(&stats->ptc1522);
865
866 ixgbe_free_transmit_structures(adapter);
867 ixgbe_free_receive_structures(adapter);
868 free(adapter->mta, M_DEVBUF);
869
870 IXGBE_CORE_LOCK_DESTROY(adapter);
871 return (0);
872 }
873
874 /*********************************************************************
875 *
876 * Shutdown entry point
877 *
878 **********************************************************************/
879
880 #if 0 /* XXX NetBSD ought to register something like this through pmf(9) */
881 static int
882 ixgbe_shutdown(device_t dev)
883 {
884 struct adapter *adapter = device_private(dev);
885 IXGBE_CORE_LOCK(adapter);
886 ixgbe_stop(adapter);
887 IXGBE_CORE_UNLOCK(adapter);
888 return (0);
889 }
890 #endif
891
892
893 #ifdef IXGBE_LEGACY_TX
894 /*********************************************************************
895 * Transmit entry point
896 *
897 * ixgbe_start is called by the stack to initiate a transmit.
898 * The driver will remain in this routine as long as there are
899 * packets to transmit and transmit resources are available.
900 * In case resources are not available stack is notified and
901 * the packet is requeued.
902 **********************************************************************/
903
904 static void
905 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
906 {
907 int rc;
908 struct mbuf *m_head;
909 struct adapter *adapter = txr->adapter;
910
911 IXGBE_TX_LOCK_ASSERT(txr);
912
913 if ((ifp->if_flags & IFF_RUNNING) == 0)
914 return;
915 if (!adapter->link_active)
916 return;
917
918 while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
919 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
920 break;
921
922 IFQ_POLL(&ifp->if_snd, m_head);
923 if (m_head == NULL)
924 break;
925
926 if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
927 break;
928 }
929 IFQ_DEQUEUE(&ifp->if_snd, m_head);
930 if (rc == EFBIG) {
931 struct mbuf *mtmp;
932
933 if ((mtmp = m_defrag(m_head, M_NOWAIT)) != NULL) {
934 m_head = mtmp;
935 rc = ixgbe_xmit(txr, m_head);
936 if (rc != 0)
937 adapter->efbig2_tx_dma_setup.ev_count++;
938 } else
939 adapter->m_defrag_failed.ev_count++;
940 }
941 if (rc != 0) {
942 m_freem(m_head);
943 continue;
944 }
945
946 /* Send a copy of the frame to the BPF listener */
947 bpf_mtap(ifp, m_head);
948
949 /* Set watchdog on */
950 getmicrotime(&txr->watchdog_time);
951 txr->queue_status = IXGBE_QUEUE_WORKING;
952
953 }
954 return;
955 }
956
957 /*
958 * Legacy TX start - called by the stack, this
959 * always uses the first tx ring, and should
960 * not be used with multiqueue tx enabled.
961 */
962 static void
963 ixgbe_start(struct ifnet *ifp)
964 {
965 struct adapter *adapter = ifp->if_softc;
966 struct tx_ring *txr = adapter->tx_rings;
967
968 if (ifp->if_flags & IFF_RUNNING) {
969 IXGBE_TX_LOCK(txr);
970 ixgbe_start_locked(txr, ifp);
971 IXGBE_TX_UNLOCK(txr);
972 }
973 return;
974 }
975
976 #else /* ! IXGBE_LEGACY_TX */
977
978 /*
979 ** Multiqueue Transmit driver
980 **
981 */
982 static int
983 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
984 {
985 struct adapter *adapter = ifp->if_softc;
986 struct ix_queue *que;
987 struct tx_ring *txr;
988 int i = 0, err = 0;
989
990 /* Which queue to use */
991 if ((m->m_flags & M_FLOWID) != 0)
992 i = m->m_pkthdr.flowid % adapter->num_queues;
993 else
994 i = cpu_index(curcpu()) % adapter->num_queues;
995
996 txr = &adapter->tx_rings[i];
997 que = &adapter->queues[i];
998
999 if (IXGBE_TX_TRYLOCK(txr)) {
1000 err = ixgbe_mq_start_locked(ifp, txr, m);
1001 IXGBE_TX_UNLOCK(txr);
1002 } else {
1003 err = drbr_enqueue(ifp, txr->br, m);
1004 softint_schedule(txr->txq_si);
1005 }
1006
1007 return (err);
1008 }
1009
1010 static int
1011 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
1012 {
1013 struct adapter *adapter = txr->adapter;
1014 struct mbuf *next;
1015 int enqueued, err = 0;
1016
1017 if (((ifp->if_flags & IFF_RUNNING) == 0) ||
1018 adapter->link_active == 0) {
1019 if (m != NULL)
1020 err = drbr_enqueue(ifp, txr->br, m);
1021 return (err);
1022 }
1023
1024 enqueued = 0;
1025 if (m != NULL) {
1026 err = drbr_enqueue(ifp, txr->br, m);
1027 if (err) {
1028 return (err);
1029 }
1030 }
1031
1032 /* Process the queue */
1033 while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1034 if ((err = ixgbe_xmit(txr, &next)) != 0) {
1035 if (next == NULL) {
1036 drbr_advance(ifp, txr->br);
1037 } else {
1038 drbr_putback(ifp, txr->br, next);
1039 }
1040 break;
1041 }
1042 drbr_advance(ifp, txr->br);
1043 enqueued++;
1044 /* Send a copy of the frame to the BPF listener */
1045 bpf_mtap(ifp, next);
1046 if ((ifp->if_flags & IFF_RUNNING) == 0)
1047 break;
1048 if (txr->tx_avail < IXGBE_TX_OP_THRESHOLD)
1049 ixgbe_txeof(txr);
1050 }
1051
1052 if (enqueued > 0) {
1053 /* Set watchdog on */
1054 txr->queue_status = IXGBE_QUEUE_WORKING;
1055 getmicrotime(&txr->watchdog_time);
1056 }
1057
1058 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
1059 ixgbe_txeof(txr);
1060
1061 return (err);
1062 }
1063
1064 /*
1065 * Called from a taskqueue to drain queued transmit packets.
1066 */
1067 static void
1068 ixgbe_deferred_mq_start(void *arg)
1069 {
1070 struct tx_ring *txr = arg;
1071 struct adapter *adapter = txr->adapter;
1072 struct ifnet *ifp = adapter->ifp;
1073
1074 IXGBE_TX_LOCK(txr);
1075 if (!drbr_empty(ifp, txr->br))
1076 ixgbe_mq_start_locked(ifp, txr, NULL);
1077 IXGBE_TX_UNLOCK(txr);
1078 }
1079
1080 /*
1081 ** Flush all ring buffers
1082 */
1083 static void
1084 ixgbe_qflush(struct ifnet *ifp)
1085 {
1086 struct adapter *adapter = ifp->if_softc;
1087 struct tx_ring *txr = adapter->tx_rings;
1088 struct mbuf *m;
1089
1090 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1091 IXGBE_TX_LOCK(txr);
1092 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1093 m_freem(m);
1094 IXGBE_TX_UNLOCK(txr);
1095 }
1096 if_qflush(ifp);
1097 }
1098 #endif /* IXGBE_LEGACY_TX */
1099
1100 static int
1101 ixgbe_ifflags_cb(struct ethercom *ec)
1102 {
1103 struct ifnet *ifp = &ec->ec_if;
1104 struct adapter *adapter = ifp->if_softc;
1105 int change = ifp->if_flags ^ adapter->if_flags, rc = 0;
1106
1107 IXGBE_CORE_LOCK(adapter);
1108
1109 if (change != 0)
1110 adapter->if_flags = ifp->if_flags;
1111
1112 if ((change & ~(IFF_CANTCHANGE|IFF_DEBUG)) != 0)
1113 rc = ENETRESET;
1114 else if ((change & (IFF_PROMISC | IFF_ALLMULTI)) != 0)
1115 ixgbe_set_promisc(adapter);
1116
1117 /* Set up VLAN support and filter */
1118 ixgbe_setup_vlan_hw_support(adapter);
1119
1120 IXGBE_CORE_UNLOCK(adapter);
1121
1122 return rc;
1123 }
1124
1125 /*********************************************************************
1126 * Ioctl entry point
1127 *
1128 * ixgbe_ioctl is called when the user wants to configure the
1129 * interface.
1130 *
1131 * return 0 on success, positive on failure
1132 **********************************************************************/
1133
1134 static int
1135 ixgbe_ioctl(struct ifnet * ifp, u_long command, void *data)
1136 {
1137 struct adapter *adapter = ifp->if_softc;
1138 struct ixgbe_hw *hw = &adapter->hw;
1139 struct ifcapreq *ifcr = data;
1140 struct ifreq *ifr = data;
1141 int error = 0;
1142 int l4csum_en;
1143 const int l4csum = IFCAP_CSUM_TCPv4_Rx|IFCAP_CSUM_UDPv4_Rx|
1144 IFCAP_CSUM_TCPv6_Rx|IFCAP_CSUM_UDPv6_Rx;
1145
1146 switch (command) {
1147 case SIOCSIFFLAGS:
1148 IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)");
1149 break;
1150 case SIOCADDMULTI:
1151 case SIOCDELMULTI:
1152 IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI");
1153 break;
1154 case SIOCSIFMEDIA:
1155 case SIOCGIFMEDIA:
1156 IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)");
1157 break;
1158 case SIOCSIFCAP:
1159 IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)");
1160 break;
1161 case SIOCSIFMTU:
1162 IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)");
1163 break;
1164 default:
1165 IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command);
1166 break;
1167 }
1168
1169 switch (command) {
1170 case SIOCSIFMEDIA:
1171 case SIOCGIFMEDIA:
1172 return ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1173 case SIOCGI2C:
1174 {
1175 struct ixgbe_i2c_req i2c;
1176 IOCTL_DEBUGOUT("ioctl: SIOCGI2C (Get I2C Data)");
1177 error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
1178 if (error)
1179 break;
1180 if ((i2c.dev_addr != 0xA0) || (i2c.dev_addr != 0xA2)){
1181 error = EINVAL;
1182 break;
1183 }
1184 hw->phy.ops.read_i2c_byte(hw, i2c.offset,
1185 i2c.dev_addr, i2c.data);
1186 error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
1187 break;
1188 }
1189 case SIOCSIFCAP:
1190 /* Layer-4 Rx checksum offload has to be turned on and
1191 * off as a unit.
1192 */
1193 l4csum_en = ifcr->ifcr_capenable & l4csum;
1194 if (l4csum_en != l4csum && l4csum_en != 0)
1195 return EINVAL;
1196 /*FALLTHROUGH*/
1197 case SIOCADDMULTI:
1198 case SIOCDELMULTI:
1199 case SIOCSIFFLAGS:
1200 case SIOCSIFMTU:
1201 default:
1202 if ((error = ether_ioctl(ifp, command, data)) != ENETRESET)
1203 return error;
1204 if ((ifp->if_flags & IFF_RUNNING) == 0)
1205 ;
1206 else if (command == SIOCSIFCAP || command == SIOCSIFMTU) {
1207 IXGBE_CORE_LOCK(adapter);
1208 ixgbe_init_locked(adapter);
1209 IXGBE_CORE_UNLOCK(adapter);
1210 } else if (command == SIOCADDMULTI || command == SIOCDELMULTI) {
1211 /*
1212 * Multicast list has changed; set the hardware filter
1213 * accordingly.
1214 */
1215 IXGBE_CORE_LOCK(adapter);
1216 ixgbe_disable_intr(adapter);
1217 ixgbe_set_multi(adapter);
1218 ixgbe_enable_intr(adapter);
1219 IXGBE_CORE_UNLOCK(adapter);
1220 }
1221 return 0;
1222 }
1223
1224 return error;
1225 }
1226
1227 /*********************************************************************
1228 * Init entry point
1229 *
1230 * This routine is used in two ways. It is used by the stack as
1231 * init entry point in network interface structure. It is also used
1232 * by the driver as a hw/sw initialization routine to get to a
1233 * consistent state.
1234 *
1235 * return 0 on success, positive on failure
1236 **********************************************************************/
1237 #define IXGBE_MHADD_MFS_SHIFT 16
1238
1239 static void
1240 ixgbe_init_locked(struct adapter *adapter)
1241 {
1242 struct ifnet *ifp = adapter->ifp;
1243 device_t dev = adapter->dev;
1244 struct ixgbe_hw *hw = &adapter->hw;
1245 u32 k, txdctl, mhadd, gpie;
1246 u32 rxdctl, rxctrl;
1247
1248 /* XXX check IFF_UP and IFF_RUNNING, power-saving state! */
1249
1250 KASSERT(mutex_owned(&adapter->core_mtx));
1251 INIT_DEBUGOUT("ixgbe_init: begin");
1252 hw->adapter_stopped = FALSE;
1253 ixgbe_stop_adapter(hw);
1254 callout_stop(&adapter->timer);
1255
1256 /* XXX I moved this here from the SIOCSIFMTU case in ixgbe_ioctl(). */
1257 adapter->max_frame_size =
1258 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1259
1260 /* reprogram the RAR[0] in case user changed it. */
1261 ixgbe_set_rar(hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
1262
1263 /* Get the latest mac address, User can use a LAA */
1264 memcpy(hw->mac.addr, CLLADDR(adapter->ifp->if_sadl),
1265 IXGBE_ETH_LENGTH_OF_ADDRESS);
1266 ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
1267 hw->addr_ctrl.rar_used_count = 1;
1268
1269 /* Prepare transmit descriptors and buffers */
1270 if (ixgbe_setup_transmit_structures(adapter)) {
1271 device_printf(dev,"Could not setup transmit structures\n");
1272 ixgbe_stop(adapter);
1273 return;
1274 }
1275
1276 ixgbe_init_hw(hw);
1277 ixgbe_initialize_transmit_units(adapter);
1278
1279 /* Setup Multicast table */
1280 ixgbe_set_multi(adapter);
1281
1282 /*
1283 ** Determine the correct mbuf pool
1284 ** for doing jumbo frames
1285 */
1286 if (adapter->max_frame_size <= 2048)
1287 adapter->rx_mbuf_sz = MCLBYTES;
1288 else if (adapter->max_frame_size <= 4096)
1289 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1290 else if (adapter->max_frame_size <= 9216)
1291 adapter->rx_mbuf_sz = MJUM9BYTES;
1292 else
1293 adapter->rx_mbuf_sz = MJUM16BYTES;
1294
1295 /* Prepare receive descriptors and buffers */
1296 if (ixgbe_setup_receive_structures(adapter)) {
1297 device_printf(dev,"Could not setup receive structures\n");
1298 ixgbe_stop(adapter);
1299 return;
1300 }
1301
1302 /* Configure RX settings */
1303 ixgbe_initialize_receive_units(adapter);
1304
1305 gpie = IXGBE_READ_REG(&adapter->hw, IXGBE_GPIE);
1306
1307 /* Enable Fan Failure Interrupt */
1308 gpie |= IXGBE_SDP1_GPIEN;
1309
1310 /* Add for Thermal detection */
1311 if (hw->mac.type == ixgbe_mac_82599EB)
1312 gpie |= IXGBE_SDP2_GPIEN;
1313
1314 /* Thermal Failure Detection */
1315 if (hw->mac.type == ixgbe_mac_X540)
1316 gpie |= IXGBE_SDP0_GPIEN;
1317
1318 if (adapter->msix > 1) {
1319 /* Enable Enhanced MSIX mode */
1320 gpie |= IXGBE_GPIE_MSIX_MODE;
1321 gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
1322 IXGBE_GPIE_OCD;
1323 }
1324 IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
1325
1326 /* Set MTU size */
1327 if (ifp->if_mtu > ETHERMTU) {
1328 mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
1329 mhadd &= ~IXGBE_MHADD_MFS_MASK;
1330 mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
1331 IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
1332 }
1333
1334 /* Now enable all the queues */
1335
1336 for (int i = 0; i < adapter->num_queues; i++) {
1337 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
1338 txdctl |= IXGBE_TXDCTL_ENABLE;
1339 /* Set WTHRESH to 8, burst writeback */
1340 txdctl |= (8 << 16);
1341 /*
1342 * When the internal queue falls below PTHRESH (32),
1343 * start prefetching as long as there are at least
1344 * HTHRESH (1) buffers ready. The values are taken
1345 * from the Intel linux driver 3.8.21.
1346 * Prefetching enables tx line rate even with 1 queue.
1347 */
1348 txdctl |= (32 << 0) | (1 << 8);
1349 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
1350 }
1351
1352 for (int i = 0; i < adapter->num_queues; i++) {
1353 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
1354 if (hw->mac.type == ixgbe_mac_82598EB) {
1355 /*
1356 ** PTHRESH = 21
1357 ** HTHRESH = 4
1358 ** WTHRESH = 8
1359 */
1360 rxdctl &= ~0x3FFFFF;
1361 rxdctl |= 0x080420;
1362 }
1363 rxdctl |= IXGBE_RXDCTL_ENABLE;
1364 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
1365 /* XXX I don't trust this loop, and I don't trust the
1366 * XXX memory barrier. What is this meant to do? --dyoung
1367 */
1368 for (k = 0; k < 10; k++) {
1369 if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
1370 IXGBE_RXDCTL_ENABLE)
1371 break;
1372 else
1373 msec_delay(1);
1374 }
1375 wmb();
1376 #ifdef DEV_NETMAP
1377 /*
1378 * In netmap mode, we must preserve the buffers made
1379 * available to userspace before the if_init()
1380 * (this is true by default on the TX side, because
1381 * init makes all buffers available to userspace).
1382 *
1383 * netmap_reset() and the device specific routines
1384 * (e.g. ixgbe_setup_receive_rings()) map these
1385 * buffers at the end of the NIC ring, so here we
1386 * must set the RDT (tail) register to make sure
1387 * they are not overwritten.
1388 *
1389 * In this driver the NIC ring starts at RDH = 0,
1390 * RDT points to the last slot available for reception (?),
1391 * so RDT = num_rx_desc - 1 means the whole ring is available.
1392 */
1393 if (ifp->if_capenable & IFCAP_NETMAP) {
1394 struct netmap_adapter *na = NA(adapter->ifp);
1395 struct netmap_kring *kring = &na->rx_rings[i];
1396 int t = na->num_rx_desc - 1 - kring->nr_hwavail;
1397
1398 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), t);
1399 } else
1400 #endif /* DEV_NETMAP */
1401 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), adapter->num_rx_desc - 1);
1402 }
1403
1404 /* Set up VLAN support and filter */
1405 ixgbe_setup_vlan_hw_support(adapter);
1406
1407 /* Enable Receive engine */
1408 rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
1409 if (hw->mac.type == ixgbe_mac_82598EB)
1410 rxctrl |= IXGBE_RXCTRL_DMBYPS;
1411 rxctrl |= IXGBE_RXCTRL_RXEN;
1412 ixgbe_enable_rx_dma(hw, rxctrl);
1413
1414 callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
1415
1416 /* Set up MSI/X routing */
1417 if (ixgbe_enable_msix) {
1418 ixgbe_configure_ivars(adapter);
1419 /* Set up auto-mask */
1420 if (hw->mac.type == ixgbe_mac_82598EB)
1421 IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
1422 else {
1423 IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
1424 IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
1425 }
1426 } else { /* Simple settings for Legacy/MSI */
1427 ixgbe_set_ivar(adapter, 0, 0, 0);
1428 ixgbe_set_ivar(adapter, 0, 0, 1);
1429 IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
1430 }
1431
1432 #ifdef IXGBE_FDIR
1433 /* Init Flow director */
1434 if (hw->mac.type != ixgbe_mac_82598EB) {
1435 u32 hdrm = 32 << fdir_pballoc;
1436
1437 hw->mac.ops.setup_rxpba(hw, 0, hdrm, PBA_STRATEGY_EQUAL);
1438 ixgbe_init_fdir_signature_82599(&adapter->hw, fdir_pballoc);
1439 }
1440 #endif
1441
1442 /*
1443 ** Check on any SFP devices that
1444 ** need to be kick-started
1445 */
1446 if (hw->phy.type == ixgbe_phy_none) {
1447 int err = hw->phy.ops.identify(hw);
1448 if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
1449 device_printf(dev,
1450 "Unsupported SFP+ module type was detected.\n");
1451 return;
1452 }
1453 }
1454
1455 /* Set moderation on the Link interrupt */
1456 IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->linkvec), IXGBE_LINK_ITR);
1457
1458 /* Config/Enable Link */
1459 ixgbe_config_link(adapter);
1460
1461 /* Hardware Packet Buffer & Flow Control setup */
1462 {
1463 u32 rxpb, frame, size, tmp;
1464
1465 frame = adapter->max_frame_size;
1466
1467 /* Calculate High Water */
1468 if (hw->mac.type == ixgbe_mac_X540)
1469 tmp = IXGBE_DV_X540(frame, frame);
1470 else
1471 tmp = IXGBE_DV(frame, frame);
1472 size = IXGBE_BT2KB(tmp);
1473 rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
1474 hw->fc.high_water[0] = rxpb - size;
1475
1476 /* Now calculate Low Water */
1477 if (hw->mac.type == ixgbe_mac_X540)
1478 tmp = IXGBE_LOW_DV_X540(frame);
1479 else
1480 tmp = IXGBE_LOW_DV(frame);
1481 hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
1482
1483 hw->fc.requested_mode = adapter->fc;
1484 hw->fc.pause_time = IXGBE_FC_PAUSE;
1485 hw->fc.send_xon = TRUE;
1486 }
1487 /* Initialize the FC settings */
1488 ixgbe_start_hw(hw);
1489
1490 /* And now turn on interrupts */
1491 ixgbe_enable_intr(adapter);
1492
1493 /* Now inform the stack we're ready */
1494 ifp->if_flags |= IFF_RUNNING;
1495
1496 return;
1497 }
1498
1499 static int
1500 ixgbe_init(struct ifnet *ifp)
1501 {
1502 struct adapter *adapter = ifp->if_softc;
1503
1504 IXGBE_CORE_LOCK(adapter);
1505 ixgbe_init_locked(adapter);
1506 IXGBE_CORE_UNLOCK(adapter);
1507 return 0; /* XXX ixgbe_init_locked cannot fail? really? */
1508 }
1509
1510
1511 /*
1512 **
1513 ** MSIX Interrupt Handlers and Tasklets
1514 **
1515 */
1516
1517 static inline void
1518 ixgbe_enable_queue(struct adapter *adapter, u32 vector)
1519 {
1520 struct ixgbe_hw *hw = &adapter->hw;
1521 u64 queue = (u64)(1ULL << vector);
1522 u32 mask;
1523
1524 if (hw->mac.type == ixgbe_mac_82598EB) {
1525 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1526 IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
1527 } else {
1528 mask = (queue & 0xFFFFFFFF);
1529 if (mask)
1530 IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask);
1531 mask = (queue >> 32);
1532 if (mask)
1533 IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask);
1534 }
1535 }
1536
1537 __unused static inline void
1538 ixgbe_disable_queue(struct adapter *adapter, u32 vector)
1539 {
1540 struct ixgbe_hw *hw = &adapter->hw;
1541 u64 queue = (u64)(1ULL << vector);
1542 u32 mask;
1543
1544 if (hw->mac.type == ixgbe_mac_82598EB) {
1545 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1546 IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask);
1547 } else {
1548 mask = (queue & 0xFFFFFFFF);
1549 if (mask)
1550 IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask);
1551 mask = (queue >> 32);
1552 if (mask)
1553 IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask);
1554 }
1555 }
1556
1557 static inline void
1558 ixgbe_rearm_queues(struct adapter *adapter, u64 queues)
1559 {
1560 u32 mask;
1561
1562 if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
1563 mask = (IXGBE_EIMS_RTX_QUEUE & queues);
1564 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, mask);
1565 } else {
1566 mask = (queues & 0xFFFFFFFF);
1567 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(0), mask);
1568 mask = (queues >> 32);
1569 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(1), mask);
1570 }
1571 }
1572
1573
1574 static void
1575 ixgbe_handle_que(void *context)
1576 {
1577 struct ix_queue *que = context;
1578 struct adapter *adapter = que->adapter;
1579 struct tx_ring *txr = que->txr;
1580 struct ifnet *ifp = adapter->ifp;
1581 bool more;
1582
1583 adapter->handleq.ev_count++;
1584
1585 if (ifp->if_flags & IFF_RUNNING) {
1586 more = ixgbe_rxeof(que);
1587 IXGBE_TX_LOCK(txr);
1588 ixgbe_txeof(txr);
1589 #ifndef IXGBE_LEGACY_TX
1590 if (!drbr_empty(ifp, txr->br))
1591 ixgbe_mq_start_locked(ifp, txr, NULL);
1592 #else
1593 if (!IFQ_IS_EMPTY(&ifp->if_snd))
1594 ixgbe_start_locked(txr, ifp);
1595 #endif
1596 IXGBE_TX_UNLOCK(txr);
1597 if (more) {
1598 adapter->req.ev_count++;
1599 softint_schedule(que->que_si);
1600 return;
1601 }
1602 }
1603
1604 /* Reenable this interrupt */
1605 ixgbe_enable_queue(adapter, que->msix);
1606 return;
1607 }
1608
1609
1610 /*********************************************************************
1611 *
1612 * Legacy Interrupt Service routine
1613 *
1614 **********************************************************************/
1615
1616 static int
1617 ixgbe_legacy_irq(void *arg)
1618 {
1619 struct ix_queue *que = arg;
1620 struct adapter *adapter = que->adapter;
1621 struct ifnet *ifp = adapter->ifp;
1622 struct ixgbe_hw *hw = &adapter->hw;
1623 struct tx_ring *txr = adapter->tx_rings;
1624 bool more_tx = false, more_rx = false;
1625 u32 reg_eicr, loop = MAX_LOOP;
1626
1627 reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
1628
1629 adapter->stats.legint.ev_count++;
1630 ++que->irqs;
1631 if (reg_eicr == 0) {
1632 adapter->stats.intzero.ev_count++;
1633 if ((ifp->if_flags & IFF_UP) != 0)
1634 ixgbe_enable_intr(adapter);
1635 return 0;
1636 }
1637
1638 if ((ifp->if_flags & IFF_RUNNING) != 0) {
1639 more_rx = ixgbe_rxeof(que);
1640
1641 IXGBE_TX_LOCK(txr);
1642 do {
1643 adapter->txloops.ev_count++;
1644 more_tx = ixgbe_txeof(txr);
1645 } while (loop-- && more_tx);
1646 IXGBE_TX_UNLOCK(txr);
1647 }
1648
1649 if (more_rx || more_tx) {
1650 if (more_rx)
1651 adapter->morerx.ev_count++;
1652 if (more_tx)
1653 adapter->moretx.ev_count++;
1654 softint_schedule(que->que_si);
1655 }
1656
1657 /* Check for fan failure */
1658 if ((hw->phy.media_type == ixgbe_media_type_copper) &&
1659 (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1660 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
1661 "REPLACE IMMEDIATELY!!\n");
1662 IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1);
1663 }
1664
1665 /* Link status change */
1666 if (reg_eicr & IXGBE_EICR_LSC)
1667 softint_schedule(adapter->link_si);
1668
1669 ixgbe_enable_intr(adapter);
1670 return 1;
1671 }
1672
1673
1674 #if defined(NETBSD_MSI_OR_MSIX)
1675 /*********************************************************************
1676 *
1677 * MSIX Queue Interrupt Service routine
1678 *
1679 **********************************************************************/
1680 void
1681 ixgbe_msix_que(void *arg)
1682 {
1683 struct ix_queue *que = arg;
1684 struct adapter *adapter = que->adapter;
1685 struct tx_ring *txr = que->txr;
1686 struct rx_ring *rxr = que->rxr;
1687 bool more_tx, more_rx;
1688 u32 newitr = 0;
1689
1690 ixgbe_disable_queue(adapter, que->msix);
1691 ++que->irqs;
1692
1693 more_rx = ixgbe_rxeof(que);
1694
1695 IXGBE_TX_LOCK(txr);
1696 more_tx = ixgbe_txeof(txr);
1697 /*
1698 ** Make certain that if the stack
1699 ** has anything queued the task gets
1700 ** scheduled to handle it.
1701 */
1702 #ifdef IXGBE_LEGACY_TX
1703 if (!IFQ_IS_EMPTY(&adapter->ifp->if_snd))
1704 #else
1705 if (!drbr_empty(adapter->ifp, txr->br))
1706 #endif
1707 more_tx = 1;
1708 IXGBE_TX_UNLOCK(txr);
1709
1710 /* Do AIM now? */
1711
1712 if (ixgbe_enable_aim == FALSE)
1713 goto no_calc;
1714 /*
1715 ** Do Adaptive Interrupt Moderation:
1716 ** - Write out last calculated setting
1717 ** - Calculate based on average size over
1718 ** the last interval.
1719 */
1720 if (que->eitr_setting)
1721 IXGBE_WRITE_REG(&adapter->hw,
1722 IXGBE_EITR(que->msix), que->eitr_setting);
1723
1724 que->eitr_setting = 0;
1725
1726 /* Idle, do nothing */
1727 if ((txr->bytes == 0) && (rxr->bytes == 0))
1728 goto no_calc;
1729
1730 if ((txr->bytes) && (txr->packets))
1731 newitr = txr->bytes/txr->packets;
1732 if ((rxr->bytes) && (rxr->packets))
1733 newitr = max(newitr,
1734 (rxr->bytes / rxr->packets));
1735 newitr += 24; /* account for hardware frame, crc */
1736
1737 /* set an upper boundary */
1738 newitr = min(newitr, 3000);
1739
1740 /* Be nice to the mid range */
1741 if ((newitr > 300) && (newitr < 1200))
1742 newitr = (newitr / 3);
1743 else
1744 newitr = (newitr / 2);
1745
1746 if (adapter->hw.mac.type == ixgbe_mac_82598EB)
1747 newitr |= newitr << 16;
1748 else
1749 newitr |= IXGBE_EITR_CNT_WDIS;
1750
1751 /* save for next interrupt */
1752 que->eitr_setting = newitr;
1753
1754 /* Reset state */
1755 txr->bytes = 0;
1756 txr->packets = 0;
1757 rxr->bytes = 0;
1758 rxr->packets = 0;
1759
1760 no_calc:
1761 if (more_tx || more_rx)
1762 softint_schedule(que->que_si);
1763 else /* Reenable this interrupt */
1764 ixgbe_enable_queue(adapter, que->msix);
1765 return;
1766 }
1767
1768
1769 static void
1770 ixgbe_msix_link(void *arg)
1771 {
1772 struct adapter *adapter = arg;
1773 struct ixgbe_hw *hw = &adapter->hw;
1774 u32 reg_eicr;
1775
1776 ++adapter->link_irq.ev_count;
1777
1778 /* First get the cause */
1779 reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICS);
1780 /* Clear interrupt with write */
1781 IXGBE_WRITE_REG(hw, IXGBE_EICR, reg_eicr);
1782
1783 /* Link status change */
1784 if (reg_eicr & IXGBE_EICR_LSC)
1785 softint_schedule(adapter->link_si);
1786
1787 if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
1788 #ifdef IXGBE_FDIR
1789 if (reg_eicr & IXGBE_EICR_FLOW_DIR) {
1790 /* This is probably overkill :) */
1791 if (!atomic_cmpset_int(&adapter->fdir_reinit, 0, 1))
1792 return;
1793 /* Disable the interrupt */
1794 IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FLOW_DIR);
1795 softint_schedule(adapter->fdir_si);
1796 } else
1797 #endif
1798 if (reg_eicr & IXGBE_EICR_ECC) {
1799 device_printf(adapter->dev, "\nCRITICAL: ECC ERROR!! "
1800 "Please Reboot!!\n");
1801 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
1802 } else
1803
1804 if (reg_eicr & IXGBE_EICR_GPI_SDP1) {
1805 /* Clear the interrupt */
1806 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1807 softint_schedule(adapter->msf_si);
1808 } else if (reg_eicr & IXGBE_EICR_GPI_SDP2) {
1809 /* Clear the interrupt */
1810 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP2);
1811 softint_schedule(adapter->mod_si);
1812 }
1813 }
1814
1815 /* Check for fan failure */
1816 if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
1817 (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1818 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
1819 "REPLACE IMMEDIATELY!!\n");
1820 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1821 }
1822
1823 /* Check for over temp condition */
1824 if ((hw->mac.type == ixgbe_mac_X540) &&
1825 (reg_eicr & IXGBE_EICR_TS)) {
1826 device_printf(adapter->dev, "\nCRITICAL: OVER TEMP!! "
1827 "PHY IS SHUT DOWN!!\n");
1828 device_printf(adapter->dev, "System shutdown required\n");
1829 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_TS);
1830 }
1831
1832 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_OTHER);
1833 return;
1834 }
1835 #endif
1836
1837 /*********************************************************************
1838 *
1839 * Media Ioctl callback
1840 *
1841 * This routine is called whenever the user queries the status of
1842 * the interface using ifconfig.
1843 *
1844 **********************************************************************/
1845 static void
1846 ixgbe_media_status(struct ifnet * ifp, struct ifmediareq * ifmr)
1847 {
1848 struct adapter *adapter = ifp->if_softc;
1849
1850 INIT_DEBUGOUT("ixgbe_media_status: begin");
1851 IXGBE_CORE_LOCK(adapter);
1852 ixgbe_update_link_status(adapter);
1853
1854 ifmr->ifm_status = IFM_AVALID;
1855 ifmr->ifm_active = IFM_ETHER;
1856
1857 if (!adapter->link_active) {
1858 IXGBE_CORE_UNLOCK(adapter);
1859 return;
1860 }
1861
1862 ifmr->ifm_status |= IFM_ACTIVE;
1863
1864 switch (adapter->link_speed) {
1865 case IXGBE_LINK_SPEED_100_FULL:
1866 ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
1867 break;
1868 case IXGBE_LINK_SPEED_1GB_FULL:
1869 ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1870 break;
1871 case IXGBE_LINK_SPEED_10GB_FULL:
1872 ifmr->ifm_active |= adapter->optics | IFM_FDX;
1873 break;
1874 }
1875
1876 IXGBE_CORE_UNLOCK(adapter);
1877
1878 return;
1879 }
1880
1881 /*********************************************************************
1882 *
1883 * Media Ioctl callback
1884 *
1885 * This routine is called when the user changes speed/duplex using
1886 * media/mediopt option with ifconfig.
1887 *
1888 **********************************************************************/
1889 static int
1890 ixgbe_media_change(struct ifnet * ifp)
1891 {
1892 struct adapter *adapter = ifp->if_softc;
1893 struct ifmedia *ifm = &adapter->media;
1894
1895 INIT_DEBUGOUT("ixgbe_media_change: begin");
1896
1897 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1898 return (EINVAL);
1899
1900 switch (IFM_SUBTYPE(ifm->ifm_media)) {
1901 case IFM_AUTO:
1902 adapter->hw.phy.autoneg_advertised =
1903 IXGBE_LINK_SPEED_100_FULL |
1904 IXGBE_LINK_SPEED_1GB_FULL |
1905 IXGBE_LINK_SPEED_10GB_FULL;
1906 break;
1907 default:
1908 device_printf(adapter->dev, "Only auto media type\n");
1909 return (EINVAL);
1910 }
1911
1912 return (0);
1913 }
1914
1915 /*********************************************************************
1916 *
1917 * This routine maps the mbufs to tx descriptors, allowing the
1918 * TX engine to transmit the packets.
1919 * - return 0 on success, positive on failure
1920 *
1921 **********************************************************************/
1922
1923 static int
1924 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
1925 {
1926 struct m_tag *mtag;
1927 struct adapter *adapter = txr->adapter;
1928 struct ethercom *ec = &adapter->osdep.ec;
1929 u32 olinfo_status = 0, cmd_type_len;
1930 int i, j, error;
1931 int first;
1932 bus_dmamap_t map;
1933 struct ixgbe_tx_buf *txbuf;
1934 union ixgbe_adv_tx_desc *txd = NULL;
1935
1936 /* Basic descriptor defines */
1937 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
1938 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
1939
1940 if ((mtag = VLAN_OUTPUT_TAG(ec, m_head)) != NULL)
1941 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
1942
1943 /*
1944 * Important to capture the first descriptor
1945 * used because it will contain the index of
1946 * the one we tell the hardware to report back
1947 */
1948 first = txr->next_avail_desc;
1949 txbuf = &txr->tx_buffers[first];
1950 map = txbuf->map;
1951
1952 /*
1953 * Map the packet for DMA.
1954 */
1955 error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map,
1956 m_head, BUS_DMA_NOWAIT);
1957
1958 if (__predict_false(error)) {
1959
1960 switch (error) {
1961 case EAGAIN:
1962 adapter->eagain_tx_dma_setup.ev_count++;
1963 return EAGAIN;
1964 case ENOMEM:
1965 adapter->enomem_tx_dma_setup.ev_count++;
1966 return EAGAIN;
1967 case EFBIG:
1968 /*
1969 * XXX Try it again?
1970 * do m_defrag() and retry bus_dmamap_load_mbuf().
1971 */
1972 adapter->efbig_tx_dma_setup.ev_count++;
1973 return error;
1974 case EINVAL:
1975 adapter->einval_tx_dma_setup.ev_count++;
1976 return error;
1977 default:
1978 adapter->other_tx_dma_setup.ev_count++;
1979 return error;
1980 case 0:
1981 break;
1982 }
1983 }
1984
1985 /* Make certain there are enough descriptors */
1986 if (map->dm_nsegs > txr->tx_avail - 2) {
1987 txr->no_desc_avail.ev_count++;
1988 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
1989 return EAGAIN;
1990 }
1991
1992 /*
1993 ** Set up the appropriate offload context
1994 ** this will consume the first descriptor
1995 */
1996 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
1997 if (__predict_false(error)) {
1998 return (error);
1999 }
2000
2001 #ifdef IXGBE_FDIR
2002 /* Do the flow director magic */
2003 if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
2004 ++txr->atr_count;
2005 if (txr->atr_count >= atr_sample_rate) {
2006 ixgbe_atr(txr, m_head);
2007 txr->atr_count = 0;
2008 }
2009 }
2010 #endif
2011
2012 i = txr->next_avail_desc;
2013 for (j = 0; j < map->dm_nsegs; j++) {
2014 bus_size_t seglen;
2015 bus_addr_t segaddr;
2016
2017 txbuf = &txr->tx_buffers[i];
2018 txd = &txr->tx_base[i];
2019 seglen = map->dm_segs[j].ds_len;
2020 segaddr = htole64(map->dm_segs[j].ds_addr);
2021
2022 txd->read.buffer_addr = segaddr;
2023 txd->read.cmd_type_len = htole32(txr->txd_cmd |
2024 cmd_type_len |seglen);
2025 txd->read.olinfo_status = htole32(olinfo_status);
2026
2027 if (++i == txr->num_desc)
2028 i = 0;
2029 }
2030
2031 txd->read.cmd_type_len |=
2032 htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
2033 txr->tx_avail -= map->dm_nsegs;
2034 txr->next_avail_desc = i;
2035
2036 txbuf->m_head = m_head;
2037 /*
2038 ** Here we swap the map so the last descriptor,
2039 ** which gets the completion interrupt has the
2040 ** real map, and the first descriptor gets the
2041 ** unused map from this descriptor.
2042 */
2043 txr->tx_buffers[first].map = txbuf->map;
2044 txbuf->map = map;
2045 bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
2046 BUS_DMASYNC_PREWRITE);
2047
2048 /* Set the EOP descriptor that will be marked done */
2049 txbuf = &txr->tx_buffers[first];
2050 txbuf->eop = txd;
2051
2052 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2053 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2054 /*
2055 * Advance the Transmit Descriptor Tail (Tdt), this tells the
2056 * hardware that this frame is available to transmit.
2057 */
2058 ++txr->total_packets.ev_count;
2059 IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), i);
2060
2061 return 0;
2062 }
2063
2064 static void
2065 ixgbe_set_promisc(struct adapter *adapter)
2066 {
2067 struct ether_multi *enm;
2068 struct ether_multistep step;
2069 u_int32_t reg_rctl;
2070 struct ethercom *ec = &adapter->osdep.ec;
2071 struct ifnet *ifp = adapter->ifp;
2072 int mcnt = 0;
2073
2074 reg_rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
2075 reg_rctl &= (~IXGBE_FCTRL_UPE);
2076 if (ifp->if_flags & IFF_ALLMULTI)
2077 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2078 else {
2079 ETHER_FIRST_MULTI(step, ec, enm);
2080 while (enm != NULL) {
2081 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2082 break;
2083 mcnt++;
2084 ETHER_NEXT_MULTI(step, enm);
2085 }
2086 }
2087 if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2088 reg_rctl &= (~IXGBE_FCTRL_MPE);
2089 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
2090
2091 if (ifp->if_flags & IFF_PROMISC) {
2092 reg_rctl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
2093 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
2094 } else if (ifp->if_flags & IFF_ALLMULTI) {
2095 reg_rctl |= IXGBE_FCTRL_MPE;
2096 reg_rctl &= ~IXGBE_FCTRL_UPE;
2097 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
2098 }
2099 return;
2100 }
2101
2102
2103 /*********************************************************************
2104 * Multicast Update
2105 *
2106 * This routine is called whenever multicast address list is updated.
2107 *
2108 **********************************************************************/
2109 #define IXGBE_RAR_ENTRIES 16
2110
2111 static void
2112 ixgbe_set_multi(struct adapter *adapter)
2113 {
2114 struct ether_multi *enm;
2115 struct ether_multistep step;
2116 u32 fctrl;
2117 u8 *mta;
2118 u8 *update_ptr;
2119 int mcnt = 0;
2120 struct ethercom *ec = &adapter->osdep.ec;
2121 struct ifnet *ifp = adapter->ifp;
2122
2123 IOCTL_DEBUGOUT("ixgbe_set_multi: begin");
2124
2125 mta = adapter->mta;
2126 bzero(mta, sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
2127 MAX_NUM_MULTICAST_ADDRESSES);
2128
2129 ifp->if_flags &= ~IFF_ALLMULTI;
2130 ETHER_FIRST_MULTI(step, ec, enm);
2131 while (enm != NULL) {
2132 if ((mcnt == MAX_NUM_MULTICAST_ADDRESSES) ||
2133 (memcmp(enm->enm_addrlo, enm->enm_addrhi,
2134 ETHER_ADDR_LEN) != 0)) {
2135 ifp->if_flags |= IFF_ALLMULTI;
2136 break;
2137 }
2138 bcopy(enm->enm_addrlo,
2139 &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
2140 IXGBE_ETH_LENGTH_OF_ADDRESS);
2141 mcnt++;
2142 ETHER_NEXT_MULTI(step, enm);
2143 }
2144
2145 fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
2146 fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
2147 if (ifp->if_flags & IFF_PROMISC)
2148 fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
2149 else if (ifp->if_flags & IFF_ALLMULTI) {
2150 fctrl |= IXGBE_FCTRL_MPE;
2151 }
2152
2153 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl);
2154
2155 if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) {
2156 update_ptr = mta;
2157 ixgbe_update_mc_addr_list(&adapter->hw,
2158 update_ptr, mcnt, ixgbe_mc_array_itr, TRUE);
2159 }
2160
2161 return;
2162 }
2163
2164 /*
2165 * This is an iterator function now needed by the multicast
2166 * shared code. It simply feeds the shared code routine the
2167 * addresses in the array of ixgbe_set_multi() one by one.
2168 */
2169 static u8 *
2170 ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq)
2171 {
2172 u8 *addr = *update_ptr;
2173 u8 *newptr;
2174 *vmdq = 0;
2175
2176 newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
2177 *update_ptr = newptr;
2178 return addr;
2179 }
2180
2181
2182 /*********************************************************************
2183 * Timer routine
2184 *
2185 * This routine checks for link status,updates statistics,
2186 * and runs the watchdog check.
2187 *
2188 **********************************************************************/
2189
2190 static void
2191 ixgbe_local_timer1(void *arg)
2192 {
2193 struct adapter *adapter = arg;
2194 device_t dev = adapter->dev;
2195 struct ix_queue *que = adapter->queues;
2196 struct tx_ring *txr = adapter->tx_rings;
2197 int hung = 0, paused = 0;
2198
2199 KASSERT(mutex_owned(&adapter->core_mtx));
2200
2201 /* Check for pluggable optics */
2202 if (adapter->sfp_probe)
2203 if (!ixgbe_sfp_probe(adapter))
2204 goto out; /* Nothing to do */
2205
2206 ixgbe_update_link_status(adapter);
2207 ixgbe_update_stats_counters(adapter);
2208
2209 /*
2210 * If the interface has been paused
2211 * then don't do the watchdog check
2212 */
2213 if (IXGBE_READ_REG(&adapter->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF)
2214 paused = 1;
2215
2216 /*
2217 ** Check the TX queues status
2218 ** - watchdog only if all queues show hung
2219 */
2220 for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2221 if ((txr->queue_status == IXGBE_QUEUE_HUNG) &&
2222 (paused == 0))
2223 ++hung;
2224 else if (txr->queue_status == IXGBE_QUEUE_WORKING)
2225 softint_schedule(que->que_si);
2226 }
2227 /* Only truely watchdog if all queues show hung */
2228 if (hung == adapter->num_queues)
2229 goto watchdog;
2230
2231 out:
2232 ixgbe_rearm_queues(adapter, adapter->que_mask);
2233 callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
2234 return;
2235
2236 watchdog:
2237 device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2238 device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2239 IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(txr->me)),
2240 IXGBE_READ_REG(&adapter->hw, IXGBE_TDT(txr->me)));
2241 device_printf(dev,"TX(%d) desc avail = %d,"
2242 "Next TX to Clean = %d\n",
2243 txr->me, txr->tx_avail, txr->next_to_clean);
2244 adapter->ifp->if_flags &= ~IFF_RUNNING;
2245 adapter->watchdog_events.ev_count++;
2246 ixgbe_init_locked(adapter);
2247 }
2248
2249 static void
2250 ixgbe_local_timer(void *arg)
2251 {
2252 struct adapter *adapter = arg;
2253
2254 IXGBE_CORE_LOCK(adapter);
2255 ixgbe_local_timer1(adapter);
2256 IXGBE_CORE_UNLOCK(adapter);
2257 }
2258
2259 /*
2260 ** Note: this routine updates the OS on the link state
2261 ** the real check of the hardware only happens with
2262 ** a link interrupt.
2263 */
2264 static void
2265 ixgbe_update_link_status(struct adapter *adapter)
2266 {
2267 struct ifnet *ifp = adapter->ifp;
2268 device_t dev = adapter->dev;
2269
2270
2271 if (adapter->link_up){
2272 if (adapter->link_active == FALSE) {
2273 if (bootverbose)
2274 device_printf(dev,"Link is up %d Gbps %s \n",
2275 ((adapter->link_speed == 128)? 10:1),
2276 "Full Duplex");
2277 adapter->link_active = TRUE;
2278 /* Update any Flow Control changes */
2279 ixgbe_fc_enable(&adapter->hw);
2280 if_link_state_change(ifp, LINK_STATE_UP);
2281 }
2282 } else { /* Link down */
2283 if (adapter->link_active == TRUE) {
2284 if (bootverbose)
2285 device_printf(dev,"Link is Down\n");
2286 if_link_state_change(ifp, LINK_STATE_DOWN);
2287 adapter->link_active = FALSE;
2288 }
2289 }
2290
2291 return;
2292 }
2293
2294
2295 static void
2296 ixgbe_ifstop(struct ifnet *ifp, int disable)
2297 {
2298 struct adapter *adapter = ifp->if_softc;
2299
2300 IXGBE_CORE_LOCK(adapter);
2301 ixgbe_stop(adapter);
2302 IXGBE_CORE_UNLOCK(adapter);
2303 }
2304
2305 /*********************************************************************
2306 *
2307 * This routine disables all traffic on the adapter by issuing a
2308 * global reset on the MAC and deallocates TX/RX buffers.
2309 *
2310 **********************************************************************/
2311
2312 static void
2313 ixgbe_stop(void *arg)
2314 {
2315 struct ifnet *ifp;
2316 struct adapter *adapter = arg;
2317 struct ixgbe_hw *hw = &adapter->hw;
2318 ifp = adapter->ifp;
2319
2320 KASSERT(mutex_owned(&adapter->core_mtx));
2321
2322 INIT_DEBUGOUT("ixgbe_stop: begin\n");
2323 ixgbe_disable_intr(adapter);
2324 callout_stop(&adapter->timer);
2325
2326 /* Let the stack know...*/
2327 ifp->if_flags &= ~IFF_RUNNING;
2328
2329 ixgbe_reset_hw(hw);
2330 hw->adapter_stopped = FALSE;
2331 ixgbe_stop_adapter(hw);
2332 /* Turn off the laser */
2333 if (hw->phy.multispeed_fiber)
2334 ixgbe_disable_tx_laser(hw);
2335
2336 /* reprogram the RAR[0] in case user changed it. */
2337 ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
2338
2339 return;
2340 }
2341
2342
2343 /*********************************************************************
2344 *
2345 * Determine hardware revision.
2346 *
2347 **********************************************************************/
2348 static void
2349 ixgbe_identify_hardware(struct adapter *adapter)
2350 {
2351 pcitag_t tag;
2352 pci_chipset_tag_t pc;
2353 pcireg_t subid, id;
2354 struct ixgbe_hw *hw = &adapter->hw;
2355
2356 pc = adapter->osdep.pc;
2357 tag = adapter->osdep.tag;
2358
2359 id = pci_conf_read(pc, tag, PCI_ID_REG);
2360 subid = pci_conf_read(pc, tag, PCI_SUBSYS_ID_REG);
2361
2362 /* Save off the information about this board */
2363 hw->vendor_id = PCI_VENDOR(id);
2364 hw->device_id = PCI_PRODUCT(id);
2365 hw->revision_id =
2366 PCI_REVISION(pci_conf_read(pc, tag, PCI_CLASS_REG));
2367 hw->subsystem_vendor_id = PCI_SUBSYS_VENDOR(subid);
2368 hw->subsystem_device_id = PCI_SUBSYS_ID(subid);
2369
2370 /* We need this here to set the num_segs below */
2371 ixgbe_set_mac_type(hw);
2372
2373 /* Pick up the 82599 and VF settings */
2374 if (hw->mac.type != ixgbe_mac_82598EB) {
2375 hw->phy.smart_speed = ixgbe_smart_speed;
2376 adapter->num_segs = IXGBE_82599_SCATTER;
2377 } else
2378 adapter->num_segs = IXGBE_82598_SCATTER;
2379
2380 return;
2381 }
2382
2383 /*********************************************************************
2384 *
2385 * Determine optic type
2386 *
2387 **********************************************************************/
2388 static void
2389 ixgbe_setup_optics(struct adapter *adapter)
2390 {
2391 struct ixgbe_hw *hw = &adapter->hw;
2392 int layer;
2393
2394 layer = ixgbe_get_supported_physical_layer(hw);
2395
2396 if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
2397 adapter->optics = IFM_10G_T;
2398 return;
2399 }
2400
2401 if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
2402 adapter->optics = IFM_1000_T;
2403 return;
2404 }
2405
2406 if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) {
2407 adapter->optics = IFM_1000_SX;
2408 return;
2409 }
2410
2411 if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR |
2412 IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) {
2413 adapter->optics = IFM_10G_LR;
2414 return;
2415 }
2416
2417 if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
2418 adapter->optics = IFM_10G_SR;
2419 return;
2420 }
2421
2422 if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) {
2423 adapter->optics = IFM_10G_TWINAX;
2424 return;
2425 }
2426
2427 if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 |
2428 IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) {
2429 adapter->optics = IFM_10G_CX4;
2430 return;
2431 }
2432
2433 /* If we get here just set the default */
2434 adapter->optics = IFM_ETHER | IFM_AUTO;
2435 return;
2436 }
2437
2438 /*********************************************************************
2439 *
2440 * Setup the Legacy or MSI Interrupt handler
2441 *
2442 **********************************************************************/
2443 static int
2444 ixgbe_allocate_legacy(struct adapter *adapter, const struct pci_attach_args *pa)
2445 {
2446 device_t dev = adapter->dev;
2447 struct ix_queue *que = adapter->queues;
2448 #ifndef IXGBE_LEGACY_TX
2449 struct tx_ring *txr = adapter->tx_rings;
2450 #endif
2451 char intrbuf[PCI_INTRSTR_LEN];
2452 #if 0
2453 int rid = 0;
2454
2455 /* MSI RID at 1 */
2456 if (adapter->msix == 1)
2457 rid = 1;
2458 #endif
2459
2460 /* We allocate a single interrupt resource */
2461 if (pci_intr_map(pa, &adapter->osdep.ih) != 0) {
2462 aprint_error_dev(dev, "unable to map interrupt\n");
2463 return ENXIO;
2464 } else {
2465 aprint_normal_dev(dev, "interrupting at %s\n",
2466 pci_intr_string(adapter->osdep.pc, adapter->osdep.ih,
2467 intrbuf, sizeof(intrbuf)));
2468 }
2469
2470 /*
2471 * Try allocating a fast interrupt and the associated deferred
2472 * processing contexts.
2473 */
2474 #ifndef IXGBE_LEGACY_TX
2475 txr->txq_si = softint_establish(SOFTINT_NET, ixgbe_deferred_mq_start,
2476 txr);
2477 #endif
2478 que->que_si = softint_establish(SOFTINT_NET, ixgbe_handle_que, que);
2479
2480 /* Tasklets for Link, SFP and Multispeed Fiber */
2481 adapter->link_si =
2482 softint_establish(SOFTINT_NET, ixgbe_handle_link, adapter);
2483 adapter->mod_si =
2484 softint_establish(SOFTINT_NET, ixgbe_handle_mod, adapter);
2485 adapter->msf_si =
2486 softint_establish(SOFTINT_NET, ixgbe_handle_msf, adapter);
2487
2488 #ifdef IXGBE_FDIR
2489 adapter->fdir_si =
2490 softint_establish(SOFTINT_NET, ixgbe_reinit_fdir, adapter);
2491 #endif
2492 if (que->que_si == NULL ||
2493 adapter->link_si == NULL ||
2494 adapter->mod_si == NULL ||
2495 #ifdef IXGBE_FDIR
2496 adapter->fdir_si == NULL ||
2497 #endif
2498 adapter->msf_si == NULL) {
2499 aprint_error_dev(dev,
2500 "could not establish software interrupts\n");
2501 return ENXIO;
2502 }
2503
2504 adapter->osdep.intr = pci_intr_establish(adapter->osdep.pc,
2505 adapter->osdep.ih, IPL_NET, ixgbe_legacy_irq, que);
2506 if (adapter->osdep.intr == NULL) {
2507 aprint_error_dev(dev, "failed to register interrupt handler\n");
2508 softint_disestablish(que->que_si);
2509 softint_disestablish(adapter->link_si);
2510 softint_disestablish(adapter->mod_si);
2511 softint_disestablish(adapter->msf_si);
2512 #ifdef IXGBE_FDIR
2513 softint_disestablish(adapter->fdir_si);
2514 #endif
2515 return ENXIO;
2516 }
2517 /* For simplicity in the handlers */
2518 adapter->que_mask = IXGBE_EIMS_ENABLE_MASK;
2519
2520 return (0);
2521 }
2522
2523
2524 /*********************************************************************
2525 *
2526 * Setup MSIX Interrupt resources and handlers
2527 *
2528 **********************************************************************/
2529 static int
2530 ixgbe_allocate_msix(struct adapter *adapter, const struct pci_attach_args *pa)
2531 {
2532 #if !defined(NETBSD_MSI_OR_MSIX)
2533 return 0;
2534 #else
2535 device_t dev = adapter->dev;
2536 struct ix_queue *que = adapter->queues;
2537 struct tx_ring *txr = adapter->tx_rings;
2538 int error, rid, vector = 0;
2539
2540 for (int i = 0; i < adapter->num_queues; i++, vector++, que++, txr++) {
2541 rid = vector + 1;
2542 que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
2543 RF_SHAREABLE | RF_ACTIVE);
2544 if (que->res == NULL) {
2545 aprint_error_dev(dev,"Unable to allocate"
2546 " bus resource: que interrupt [%d]\n", vector);
2547 return (ENXIO);
2548 }
2549 /* Set the handler function */
2550 error = bus_setup_intr(dev, que->res,
2551 INTR_TYPE_NET | INTR_MPSAFE, NULL,
2552 ixgbe_msix_que, que, &que->tag);
2553 if (error) {
2554 que->res = NULL;
2555 aprint_error_dev(dev,
2556 "Failed to register QUE handler\n");
2557 return error;
2558 }
2559 #if __FreeBSD_version >= 800504
2560 bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2561 #endif
2562 que->msix = vector;
2563 adapter->que_mask |= (u64)(1 << que->msix);
2564 /*
2565 ** Bind the msix vector, and thus the
2566 ** ring to the corresponding cpu.
2567 */
2568 if (adapter->num_queues > 1)
2569 bus_bind_intr(dev, que->res, i);
2570
2571 #ifndef IXGBE_LEGACY_TX
2572 txr->txq_si = softint_establish(SOFTINT_NET,
2573 ixgbe_deferred_mq_start, txr);
2574 #endif
2575 que->que_si = softint_establish(SOFTINT_NET, ixgbe_handle_que,
2576 que);
2577 if (que->que_si == NULL) {
2578 aprint_error_dev(dev,
2579 "could not establish software interrupt\n");
2580 }
2581 }
2582
2583 /* and Link */
2584 rid = vector + 1;
2585 adapter->res = bus_alloc_resource_any(dev,
2586 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2587 if (!adapter->res) {
2588 aprint_error_dev(dev,"Unable to allocate bus resource: "
2589 "Link interrupt [%d]\n", rid);
2590 return (ENXIO);
2591 }
2592 /* Set the link handler function */
2593 error = bus_setup_intr(dev, adapter->res,
2594 INTR_TYPE_NET | INTR_MPSAFE, NULL,
2595 ixgbe_msix_link, adapter, &adapter->tag);
2596 if (error) {
2597 adapter->res = NULL;
2598 aprint_error_dev(dev, "Failed to register LINK handler\n");
2599 return (error);
2600 }
2601 #if __FreeBSD_version >= 800504
2602 bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2603 #endif
2604 adapter->linkvec = vector;
2605 /* Tasklets for Link, SFP and Multispeed Fiber */
2606 adapter->link_si =
2607 softint_establish(SOFTINT_NET, ixgbe_handle_link, adapter);
2608 adapter->mod_si =
2609 softint_establish(SOFTINT_NET, ixgbe_handle_mod, adapter);
2610 adapter->msf_si =
2611 softint_establish(SOFTINT_NET, ixgbe_handle_msf, adapter);
2612 #ifdef IXGBE_FDIR
2613 adapter->fdir_si =
2614 softint_establish(SOFTINT_NET, ixgbe_reinit_fdir, adapter);
2615 #endif
2616
2617 return (0);
2618 #endif
2619 }
2620
2621 /*
2622 * Setup Either MSI/X or MSI
2623 */
2624 static int
2625 ixgbe_setup_msix(struct adapter *adapter)
2626 {
2627 #if !defined(NETBSD_MSI_OR_MSIX)
2628 return 0;
2629 #else
2630 device_t dev = adapter->dev;
2631 int rid, want, queues, msgs;
2632
2633 /* Override by tuneable */
2634 if (ixgbe_enable_msix == 0)
2635 goto msi;
2636
2637 /* First try MSI/X */
2638 rid = PCI_BAR(MSIX_82598_BAR);
2639 adapter->msix_mem = bus_alloc_resource_any(dev,
2640 SYS_RES_MEMORY, &rid, RF_ACTIVE);
2641 if (!adapter->msix_mem) {
2642 rid += 4; /* 82599 maps in higher BAR */
2643 adapter->msix_mem = bus_alloc_resource_any(dev,
2644 SYS_RES_MEMORY, &rid, RF_ACTIVE);
2645 }
2646 if (!adapter->msix_mem) {
2647 /* May not be enabled */
2648 device_printf(adapter->dev,
2649 "Unable to map MSIX table \n");
2650 goto msi;
2651 }
2652
2653 msgs = pci_msix_count(dev);
2654 if (msgs == 0) { /* system has msix disabled */
2655 bus_release_resource(dev, SYS_RES_MEMORY,
2656 rid, adapter->msix_mem);
2657 adapter->msix_mem = NULL;
2658 goto msi;
2659 }
2660
2661 /* Figure out a reasonable auto config value */
2662 queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2663
2664 if (ixgbe_num_queues != 0)
2665 queues = ixgbe_num_queues;
2666 /* Set max queues to 8 when autoconfiguring */
2667 else if ((ixgbe_num_queues == 0) && (queues > 8))
2668 queues = 8;
2669
2670 /*
2671 ** Want one vector (RX/TX pair) per queue
2672 ** plus an additional for Link.
2673 */
2674 want = queues + 1;
2675 if (msgs >= want)
2676 msgs = want;
2677 else {
2678 device_printf(adapter->dev,
2679 "MSIX Configuration Problem, "
2680 "%d vectors but %d queues wanted!\n",
2681 msgs, want);
2682 return (0); /* Will go to Legacy setup */
2683 }
2684 if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2685 device_printf(adapter->dev,
2686 "Using MSIX interrupts with %d vectors\n", msgs);
2687 adapter->num_queues = queues;
2688 return (msgs);
2689 }
2690 msi:
2691 msgs = pci_msi_count(dev);
2692 if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2693 device_printf(adapter->dev,"Using an MSI interrupt\n");
2694 else
2695 device_printf(adapter->dev,"Using a Legacy interrupt\n");
2696 return (msgs);
2697 #endif
2698 }
2699
2700
2701 static int
2702 ixgbe_allocate_pci_resources(struct adapter *adapter, const struct pci_attach_args *pa)
2703 {
2704 pcireg_t memtype;
2705 device_t dev = adapter->dev;
2706 bus_addr_t addr;
2707 int flags;
2708
2709 memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, PCI_BAR(0));
2710 switch (memtype) {
2711 case PCI_MAPREG_TYPE_MEM | PCI_MAPREG_MEM_TYPE_32BIT:
2712 case PCI_MAPREG_TYPE_MEM | PCI_MAPREG_MEM_TYPE_64BIT:
2713 adapter->osdep.mem_bus_space_tag = pa->pa_memt;
2714 if (pci_mapreg_info(pa->pa_pc, pa->pa_tag, PCI_BAR(0),
2715 memtype, &addr, &adapter->osdep.mem_size, &flags) != 0)
2716 goto map_err;
2717 if ((flags & BUS_SPACE_MAP_PREFETCHABLE) != 0) {
2718 aprint_normal_dev(dev, "clearing prefetchable bit\n");
2719 flags &= ~BUS_SPACE_MAP_PREFETCHABLE;
2720 }
2721 if (bus_space_map(adapter->osdep.mem_bus_space_tag, addr,
2722 adapter->osdep.mem_size, flags,
2723 &adapter->osdep.mem_bus_space_handle) != 0) {
2724 map_err:
2725 adapter->osdep.mem_size = 0;
2726 aprint_error_dev(dev, "unable to map BAR0\n");
2727 return ENXIO;
2728 }
2729 break;
2730 default:
2731 aprint_error_dev(dev, "unexpected type on BAR0\n");
2732 return ENXIO;
2733 }
2734
2735 /* Legacy defaults */
2736 adapter->num_queues = 1;
2737 adapter->hw.back = &adapter->osdep;
2738
2739 /*
2740 ** Now setup MSI or MSI/X, should
2741 ** return us the number of supported
2742 ** vectors. (Will be 1 for MSI)
2743 */
2744 adapter->msix = ixgbe_setup_msix(adapter);
2745 return (0);
2746 }
2747
2748 static void
2749 ixgbe_free_pci_resources(struct adapter * adapter)
2750 {
2751 #if defined(NETBSD_MSI_OR_MSIX)
2752 struct ix_queue *que = adapter->queues;
2753 device_t dev = adapter->dev;
2754 #endif
2755 int rid;
2756
2757 #if defined(NETBSD_MSI_OR_MSIX)
2758 int memrid;
2759 if (adapter->hw.mac.type == ixgbe_mac_82598EB)
2760 memrid = PCI_BAR(MSIX_82598_BAR);
2761 else
2762 memrid = PCI_BAR(MSIX_82599_BAR);
2763
2764 /*
2765 ** There is a slight possibility of a failure mode
2766 ** in attach that will result in entering this function
2767 ** before interrupt resources have been initialized, and
2768 ** in that case we do not want to execute the loops below
2769 ** We can detect this reliably by the state of the adapter
2770 ** res pointer.
2771 */
2772 if (adapter->res == NULL)
2773 goto mem;
2774
2775 /*
2776 ** Release all msix queue resources:
2777 */
2778 for (int i = 0; i < adapter->num_queues; i++, que++) {
2779 rid = que->msix + 1;
2780 if (que->tag != NULL) {
2781 bus_teardown_intr(dev, que->res, que->tag);
2782 que->tag = NULL;
2783 }
2784 if (que->res != NULL)
2785 bus_release_resource(dev, SYS_RES_IRQ, rid, que->res);
2786 }
2787 #endif
2788
2789 /* Clean the Legacy or Link interrupt last */
2790 if (adapter->linkvec) /* we are doing MSIX */
2791 rid = adapter->linkvec + 1;
2792 else
2793 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2794
2795 pci_intr_disestablish(adapter->osdep.pc, adapter->osdep.intr);
2796 adapter->osdep.intr = NULL;
2797
2798 #if defined(NETBSD_MSI_OR_MSIX)
2799 mem:
2800 if (adapter->msix)
2801 pci_release_msi(dev);
2802
2803 if (adapter->msix_mem != NULL)
2804 bus_release_resource(dev, SYS_RES_MEMORY,
2805 memrid, adapter->msix_mem);
2806 #endif
2807
2808 if (adapter->osdep.mem_size != 0) {
2809 bus_space_unmap(adapter->osdep.mem_bus_space_tag,
2810 adapter->osdep.mem_bus_space_handle,
2811 adapter->osdep.mem_size);
2812 }
2813
2814 return;
2815 }
2816
2817 /*********************************************************************
2818 *
2819 * Setup networking device structure and register an interface.
2820 *
2821 **********************************************************************/
2822 static int
2823 ixgbe_setup_interface(device_t dev, struct adapter *adapter)
2824 {
2825 struct ethercom *ec = &adapter->osdep.ec;
2826 struct ixgbe_hw *hw = &adapter->hw;
2827 struct ifnet *ifp;
2828
2829 INIT_DEBUGOUT("ixgbe_setup_interface: begin");
2830
2831 ifp = adapter->ifp = &ec->ec_if;
2832 strlcpy(ifp->if_xname, device_xname(dev), IFNAMSIZ);
2833 ifp->if_baudrate = IF_Gbps(10);
2834 ifp->if_init = ixgbe_init;
2835 ifp->if_stop = ixgbe_ifstop;
2836 ifp->if_softc = adapter;
2837 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2838 ifp->if_ioctl = ixgbe_ioctl;
2839 #ifndef IXGBE_LEGACY_TX
2840 ifp->if_transmit = ixgbe_mq_start;
2841 ifp->if_qflush = ixgbe_qflush;
2842 #else
2843 ifp->if_start = ixgbe_start;
2844 IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 2);
2845 #endif
2846
2847 if_attach(ifp);
2848 ether_ifattach(ifp, adapter->hw.mac.addr);
2849 ether_set_ifflags_cb(ec, ixgbe_ifflags_cb);
2850
2851 adapter->max_frame_size =
2852 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
2853
2854 /*
2855 * Tell the upper layer(s) we support long frames.
2856 */
2857 ifp->if_hdrlen = sizeof(struct ether_vlan_header);
2858
2859 ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSOv4 | IFCAP_TSOv6;
2860 ifp->if_capenable = 0;
2861
2862 ec->ec_capabilities |= ETHERCAP_VLAN_HWCSUM;
2863 ec->ec_capabilities |= ETHERCAP_JUMBO_MTU;
2864 ifp->if_capabilities |= IFCAP_LRO;
2865 ec->ec_capabilities |= ETHERCAP_VLAN_HWTAGGING
2866 | ETHERCAP_VLAN_MTU;
2867 ec->ec_capenable = ec->ec_capabilities;
2868
2869 /*
2870 ** Don't turn this on by default, if vlans are
2871 ** created on another pseudo device (eg. lagg)
2872 ** then vlan events are not passed thru, breaking
2873 ** operation, but with HW FILTER off it works. If
2874 ** using vlans directly on the ixgbe driver you can
2875 ** enable this and get full hardware tag filtering.
2876 */
2877 ec->ec_capabilities |= ETHERCAP_VLAN_HWFILTER;
2878
2879 /*
2880 * Specify the media types supported by this adapter and register
2881 * callbacks to update media and link information
2882 */
2883 ifmedia_init(&adapter->media, IFM_IMASK, ixgbe_media_change,
2884 ixgbe_media_status);
2885 ifmedia_add(&adapter->media, IFM_ETHER | adapter->optics, 0, NULL);
2886 ifmedia_set(&adapter->media, IFM_ETHER | adapter->optics);
2887 if (hw->device_id == IXGBE_DEV_ID_82598AT) {
2888 ifmedia_add(&adapter->media,
2889 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2890 ifmedia_add(&adapter->media,
2891 IFM_ETHER | IFM_1000_T, 0, NULL);
2892 }
2893 ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2894 ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2895
2896 return (0);
2897 }
2898
2899 static void
2900 ixgbe_config_link(struct adapter *adapter)
2901 {
2902 struct ixgbe_hw *hw = &adapter->hw;
2903 u32 autoneg, err = 0;
2904 bool sfp, negotiate;
2905
2906 sfp = ixgbe_is_sfp(hw);
2907
2908 if (sfp) {
2909 void *ip;
2910
2911 if (hw->phy.multispeed_fiber) {
2912 hw->mac.ops.setup_sfp(hw);
2913 ixgbe_enable_tx_laser(hw);
2914 ip = adapter->msf_si;
2915 } else {
2916 ip = adapter->mod_si;
2917 }
2918
2919 kpreempt_disable();
2920 softint_schedule(ip);
2921 kpreempt_enable();
2922 } else {
2923 if (hw->mac.ops.check_link)
2924 err = ixgbe_check_link(hw, &adapter->link_speed,
2925 &adapter->link_up, FALSE);
2926 if (err)
2927 goto out;
2928 autoneg = hw->phy.autoneg_advertised;
2929 if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
2930 err = hw->mac.ops.get_link_capabilities(hw,
2931 &autoneg, &negotiate);
2932 else
2933 negotiate = 0;
2934 if (err)
2935 goto out;
2936 if (hw->mac.ops.setup_link)
2937 err = hw->mac.ops.setup_link(hw,
2938 autoneg, adapter->link_up);
2939 }
2940 out:
2941 return;
2942 }
2943
2944 /********************************************************************
2945 * Manage DMA'able memory.
2946 *******************************************************************/
2947
2948 static int
2949 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
2950 struct ixgbe_dma_alloc *dma, const int mapflags)
2951 {
2952 device_t dev = adapter->dev;
2953 int r, rsegs;
2954
2955 r = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */
2956 DBA_ALIGN, 0, /* alignment, bounds */
2957 size, /* maxsize */
2958 1, /* nsegments */
2959 size, /* maxsegsize */
2960 BUS_DMA_ALLOCNOW, /* flags */
2961 &dma->dma_tag);
2962 if (r != 0) {
2963 aprint_error_dev(dev,
2964 "%s: ixgbe_dma_tag_create failed; error %d\n", __func__, r);
2965 goto fail_0;
2966 }
2967
2968 r = bus_dmamem_alloc(dma->dma_tag->dt_dmat,
2969 size,
2970 dma->dma_tag->dt_alignment,
2971 dma->dma_tag->dt_boundary,
2972 &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
2973 if (r != 0) {
2974 aprint_error_dev(dev,
2975 "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
2976 goto fail_1;
2977 }
2978
2979 r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
2980 size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
2981 if (r != 0) {
2982 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2983 __func__, r);
2984 goto fail_2;
2985 }
2986
2987 r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
2988 if (r != 0) {
2989 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2990 __func__, r);
2991 goto fail_3;
2992 }
2993
2994 r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map, dma->dma_vaddr,
2995 size,
2996 NULL,
2997 mapflags | BUS_DMA_NOWAIT);
2998 if (r != 0) {
2999 aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
3000 __func__, r);
3001 goto fail_4;
3002 }
3003 dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
3004 dma->dma_size = size;
3005 return 0;
3006 fail_4:
3007 ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
3008 fail_3:
3009 bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
3010 fail_2:
3011 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
3012 fail_1:
3013 ixgbe_dma_tag_destroy(dma->dma_tag);
3014 fail_0:
3015 return r;
3016 }
3017
3018 static void
3019 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
3020 {
3021 bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
3022 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3023 ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
3024 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
3025 ixgbe_dma_tag_destroy(dma->dma_tag);
3026 }
3027
3028
3029 /*********************************************************************
3030 *
3031 * Allocate memory for the transmit and receive rings, and then
3032 * the descriptors associated with each, called only once at attach.
3033 *
3034 **********************************************************************/
3035 static int
3036 ixgbe_allocate_queues(struct adapter *adapter)
3037 {
3038 device_t dev = adapter->dev;
3039 struct ix_queue *que;
3040 struct tx_ring *txr;
3041 struct rx_ring *rxr;
3042 int rsize, tsize, error = IXGBE_SUCCESS;
3043 int txconf = 0, rxconf = 0;
3044
3045 /* First allocate the top level queue structs */
3046 if (!(adapter->queues =
3047 (struct ix_queue *) malloc(sizeof(struct ix_queue) *
3048 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3049 aprint_error_dev(dev, "Unable to allocate queue memory\n");
3050 error = ENOMEM;
3051 goto fail;
3052 }
3053
3054 /* First allocate the TX ring struct memory */
3055 if (!(adapter->tx_rings =
3056 (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3057 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3058 aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
3059 error = ENOMEM;
3060 goto tx_fail;
3061 }
3062
3063 /* Next allocate the RX */
3064 if (!(adapter->rx_rings =
3065 (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3066 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3067 aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
3068 error = ENOMEM;
3069 goto rx_fail;
3070 }
3071
3072 /* For the ring itself */
3073 tsize = roundup2(adapter->num_tx_desc *
3074 sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
3075
3076 /*
3077 * Now set up the TX queues, txconf is needed to handle the
3078 * possibility that things fail midcourse and we need to
3079 * undo memory gracefully
3080 */
3081 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3082 /* Set up some basics */
3083 txr = &adapter->tx_rings[i];
3084 txr->adapter = adapter;
3085 txr->me = i;
3086 txr->num_desc = adapter->num_tx_desc;
3087
3088 /* Initialize the TX side lock */
3089 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3090 device_xname(dev), txr->me);
3091 mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
3092
3093 if (ixgbe_dma_malloc(adapter, tsize,
3094 &txr->txdma, BUS_DMA_NOWAIT)) {
3095 aprint_error_dev(dev,
3096 "Unable to allocate TX Descriptor memory\n");
3097 error = ENOMEM;
3098 goto err_tx_desc;
3099 }
3100 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
3101 bzero((void *)txr->tx_base, tsize);
3102
3103 /* Now allocate transmit buffers for the ring */
3104 if (ixgbe_allocate_transmit_buffers(txr)) {
3105 aprint_error_dev(dev,
3106 "Critical Failure setting up transmit buffers\n");
3107 error = ENOMEM;
3108 goto err_tx_desc;
3109 }
3110 #ifndef IXGBE_LEGACY_TX
3111 /* Allocate a buf ring */
3112 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
3113 M_WAITOK, &txr->tx_mtx);
3114 if (txr->br == NULL) {
3115 aprint_error_dev(dev,
3116 "Critical Failure setting up buf ring\n");
3117 error = ENOMEM;
3118 goto err_tx_desc;
3119 }
3120 #endif
3121 }
3122
3123 /*
3124 * Next the RX queues...
3125 */
3126 rsize = roundup2(adapter->num_rx_desc *
3127 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
3128 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3129 rxr = &adapter->rx_rings[i];
3130 /* Set up some basics */
3131 rxr->adapter = adapter;
3132 rxr->me = i;
3133 rxr->num_desc = adapter->num_rx_desc;
3134
3135 /* Initialize the RX side lock */
3136 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3137 device_xname(dev), rxr->me);
3138 mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
3139
3140 if (ixgbe_dma_malloc(adapter, rsize,
3141 &rxr->rxdma, BUS_DMA_NOWAIT)) {
3142 aprint_error_dev(dev,
3143 "Unable to allocate RxDescriptor memory\n");
3144 error = ENOMEM;
3145 goto err_rx_desc;
3146 }
3147 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3148 bzero((void *)rxr->rx_base, rsize);
3149
3150 /* Allocate receive buffers for the ring*/
3151 if (ixgbe_allocate_receive_buffers(rxr)) {
3152 aprint_error_dev(dev,
3153 "Critical Failure setting up receive buffers\n");
3154 error = ENOMEM;
3155 goto err_rx_desc;
3156 }
3157 }
3158
3159 /*
3160 ** Finally set up the queue holding structs
3161 */
3162 for (int i = 0; i < adapter->num_queues; i++) {
3163 que = &adapter->queues[i];
3164 que->adapter = adapter;
3165 que->txr = &adapter->tx_rings[i];
3166 que->rxr = &adapter->rx_rings[i];
3167 }
3168
3169 return (0);
3170
3171 err_rx_desc:
3172 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3173 ixgbe_dma_free(adapter, &rxr->rxdma);
3174 err_tx_desc:
3175 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3176 ixgbe_dma_free(adapter, &txr->txdma);
3177 free(adapter->rx_rings, M_DEVBUF);
3178 rx_fail:
3179 free(adapter->tx_rings, M_DEVBUF);
3180 tx_fail:
3181 free(adapter->queues, M_DEVBUF);
3182 fail:
3183 return (error);
3184 }
3185
3186 /*********************************************************************
3187 *
3188 * Allocate memory for tx_buffer structures. The tx_buffer stores all
3189 * the information needed to transmit a packet on the wire. This is
3190 * called only once at attach, setup is done every reset.
3191 *
3192 **********************************************************************/
3193 static int
3194 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
3195 {
3196 struct adapter *adapter = txr->adapter;
3197 device_t dev = adapter->dev;
3198 struct ixgbe_tx_buf *txbuf;
3199 int error, i;
3200
3201 /*
3202 * Setup DMA descriptor areas.
3203 */
3204 if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */
3205 1, 0, /* alignment, bounds */
3206 IXGBE_TSO_SIZE, /* maxsize */
3207 adapter->num_segs, /* nsegments */
3208 PAGE_SIZE, /* maxsegsize */
3209 0, /* flags */
3210 &txr->txtag))) {
3211 aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
3212 goto fail;
3213 }
3214
3215 if (!(txr->tx_buffers =
3216 (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
3217 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3218 aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
3219 error = ENOMEM;
3220 goto fail;
3221 }
3222
3223 /* Create the descriptor buffer dma maps */
3224 txbuf = txr->tx_buffers;
3225 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3226 error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
3227 if (error != 0) {
3228 aprint_error_dev(dev,
3229 "Unable to create TX DMA map (%d)\n", error);
3230 goto fail;
3231 }
3232 }
3233
3234 return 0;
3235 fail:
3236 /* We free all, it handles case where we are in the middle */
3237 ixgbe_free_transmit_structures(adapter);
3238 return (error);
3239 }
3240
3241 /*********************************************************************
3242 *
3243 * Initialize a transmit ring.
3244 *
3245 **********************************************************************/
3246 static void
3247 ixgbe_setup_transmit_ring(struct tx_ring *txr)
3248 {
3249 struct adapter *adapter = txr->adapter;
3250 struct ixgbe_tx_buf *txbuf;
3251 int i;
3252 #ifdef DEV_NETMAP
3253 struct netmap_adapter *na = NA(adapter->ifp);
3254 struct netmap_slot *slot;
3255 #endif /* DEV_NETMAP */
3256
3257 /* Clear the old ring contents */
3258 IXGBE_TX_LOCK(txr);
3259 #ifdef DEV_NETMAP
3260 /*
3261 * (under lock): if in netmap mode, do some consistency
3262 * checks and set slot to entry 0 of the netmap ring.
3263 */
3264 slot = netmap_reset(na, NR_TX, txr->me, 0);
3265 #endif /* DEV_NETMAP */
3266 bzero((void *)txr->tx_base,
3267 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
3268 /* Reset indices */
3269 txr->next_avail_desc = 0;
3270 txr->next_to_clean = 0;
3271
3272 /* Free any existing tx buffers. */
3273 txbuf = txr->tx_buffers;
3274 for (i = 0; i < txr->num_desc; i++, txbuf++) {
3275 if (txbuf->m_head != NULL) {
3276 bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
3277 0, txbuf->m_head->m_pkthdr.len,
3278 BUS_DMASYNC_POSTWRITE);
3279 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
3280 m_freem(txbuf->m_head);
3281 txbuf->m_head = NULL;
3282 }
3283 #ifdef DEV_NETMAP
3284 /*
3285 * In netmap mode, set the map for the packet buffer.
3286 * NOTE: Some drivers (not this one) also need to set
3287 * the physical buffer address in the NIC ring.
3288 * Slots in the netmap ring (indexed by "si") are
3289 * kring->nkr_hwofs positions "ahead" wrt the
3290 * corresponding slot in the NIC ring. In some drivers
3291 * (not here) nkr_hwofs can be negative. Function
3292 * netmap_idx_n2k() handles wraparounds properly.
3293 */
3294 if (slot) {
3295 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3296 netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
3297 }
3298 #endif /* DEV_NETMAP */
3299 /* Clear the EOP descriptor pointer */
3300 txbuf->eop = NULL;
3301 }
3302
3303 #ifdef IXGBE_FDIR
3304 /* Set the rate at which we sample packets */
3305 if (adapter->hw.mac.type != ixgbe_mac_82598EB)
3306 txr->atr_sample = atr_sample_rate;
3307 #endif
3308
3309 /* Set number of descriptors available */
3310 txr->tx_avail = adapter->num_tx_desc;
3311
3312 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3313 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3314 IXGBE_TX_UNLOCK(txr);
3315 }
3316
3317 /*********************************************************************
3318 *
3319 * Initialize all transmit rings.
3320 *
3321 **********************************************************************/
3322 static int
3323 ixgbe_setup_transmit_structures(struct adapter *adapter)
3324 {
3325 struct tx_ring *txr = adapter->tx_rings;
3326
3327 for (int i = 0; i < adapter->num_queues; i++, txr++)
3328 ixgbe_setup_transmit_ring(txr);
3329
3330 return (0);
3331 }
3332
3333 /*********************************************************************
3334 *
3335 * Enable transmit unit.
3336 *
3337 **********************************************************************/
3338 static void
3339 ixgbe_initialize_transmit_units(struct adapter *adapter)
3340 {
3341 struct tx_ring *txr = adapter->tx_rings;
3342 struct ixgbe_hw *hw = &adapter->hw;
3343
3344 /* Setup the Base and Length of the Tx Descriptor Ring */
3345
3346 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3347 u64 tdba = txr->txdma.dma_paddr;
3348 u32 txctrl;
3349
3350 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i),
3351 (tdba & 0x00000000ffffffffULL));
3352 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (tdba >> 32));
3353 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
3354 adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc));
3355
3356 /* Setup the HW Tx Head and Tail descriptor pointers */
3357 IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
3358 IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
3359
3360 /* Setup Transmit Descriptor Cmd Settings */
3361 txr->txd_cmd = IXGBE_TXD_CMD_IFCS;
3362 txr->queue_status = IXGBE_QUEUE_IDLE;
3363
3364 /* Set the processing limit */
3365 txr->process_limit = ixgbe_tx_process_limit;
3366
3367 /* Disable Head Writeback */
3368 switch (hw->mac.type) {
3369 case ixgbe_mac_82598EB:
3370 txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
3371 break;
3372 case ixgbe_mac_82599EB:
3373 case ixgbe_mac_X540:
3374 default:
3375 txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
3376 break;
3377 }
3378 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
3379 switch (hw->mac.type) {
3380 case ixgbe_mac_82598EB:
3381 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
3382 break;
3383 case ixgbe_mac_82599EB:
3384 case ixgbe_mac_X540:
3385 default:
3386 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
3387 break;
3388 }
3389
3390 }
3391
3392 if (hw->mac.type != ixgbe_mac_82598EB) {
3393 u32 dmatxctl, rttdcs;
3394 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
3395 dmatxctl |= IXGBE_DMATXCTL_TE;
3396 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
3397 /* Disable arbiter to set MTQC */
3398 rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3399 rttdcs |= IXGBE_RTTDCS_ARBDIS;
3400 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3401 IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
3402 rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
3403 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3404 }
3405
3406 return;
3407 }
3408
3409 /*********************************************************************
3410 *
3411 * Free all transmit rings.
3412 *
3413 **********************************************************************/
3414 static void
3415 ixgbe_free_transmit_structures(struct adapter *adapter)
3416 {
3417 struct tx_ring *txr = adapter->tx_rings;
3418
3419 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3420 ixgbe_free_transmit_buffers(txr);
3421 ixgbe_dma_free(adapter, &txr->txdma);
3422 IXGBE_TX_LOCK_DESTROY(txr);
3423 }
3424 free(adapter->tx_rings, M_DEVBUF);
3425 }
3426
3427 /*********************************************************************
3428 *
3429 * Free transmit ring related data structures.
3430 *
3431 **********************************************************************/
3432 static void
3433 ixgbe_free_transmit_buffers(struct tx_ring *txr)
3434 {
3435 struct adapter *adapter = txr->adapter;
3436 struct ixgbe_tx_buf *tx_buffer;
3437 int i;
3438
3439 INIT_DEBUGOUT("free_transmit_ring: begin");
3440
3441 if (txr->tx_buffers == NULL)
3442 return;
3443
3444 tx_buffer = txr->tx_buffers;
3445 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3446 if (tx_buffer->m_head != NULL) {
3447 bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
3448 0, tx_buffer->m_head->m_pkthdr.len,
3449 BUS_DMASYNC_POSTWRITE);
3450 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
3451 m_freem(tx_buffer->m_head);
3452 tx_buffer->m_head = NULL;
3453 if (tx_buffer->map != NULL) {
3454 ixgbe_dmamap_destroy(txr->txtag,
3455 tx_buffer->map);
3456 tx_buffer->map = NULL;
3457 }
3458 } else if (tx_buffer->map != NULL) {
3459 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
3460 ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
3461 tx_buffer->map = NULL;
3462 }
3463 }
3464 #ifndef IXGBE_LEGACY_TX
3465 if (txr->br != NULL)
3466 buf_ring_free(txr->br, M_DEVBUF);
3467 #endif
3468 if (txr->tx_buffers != NULL) {
3469 free(txr->tx_buffers, M_DEVBUF);
3470 txr->tx_buffers = NULL;
3471 }
3472 if (txr->txtag != NULL) {
3473 ixgbe_dma_tag_destroy(txr->txtag);
3474 txr->txtag = NULL;
3475 }
3476 return;
3477 }
3478
3479 /*********************************************************************
3480 *
3481 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
3482 *
3483 **********************************************************************/
3484
3485 static int
3486 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
3487 u32 *cmd_type_len, u32 *olinfo_status)
3488 {
3489 struct m_tag *mtag;
3490 struct adapter *adapter = txr->adapter;
3491 struct ethercom *ec = &adapter->osdep.ec;
3492 struct ixgbe_adv_tx_context_desc *TXD;
3493 struct ether_vlan_header *eh;
3494 struct ip ip;
3495 struct ip6_hdr ip6;
3496 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3497 int ehdrlen, ip_hlen = 0;
3498 u16 etype;
3499 u8 ipproto __diagused = 0;
3500 int offload = TRUE;
3501 int ctxd = txr->next_avail_desc;
3502 u16 vtag = 0;
3503
3504 /* First check if TSO is to be used */
3505 if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4|M_CSUM_TSOv6))
3506 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
3507
3508 if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
3509 offload = FALSE;
3510
3511 /* Indicate the whole packet as payload when not doing TSO */
3512 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
3513
3514 /* Now ready a context descriptor */
3515 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
3516
3517 /*
3518 ** In advanced descriptors the vlan tag must
3519 ** be placed into the context descriptor. Hence
3520 ** we need to make one even if not doing offloads.
3521 */
3522 if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
3523 vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
3524 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
3525 } else if (offload == FALSE) /* ... no offload to do */
3526 return 0;
3527
3528 /*
3529 * Determine where frame payload starts.
3530 * Jump over vlan headers if already present,
3531 * helpful for QinQ too.
3532 */
3533 KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
3534 eh = mtod(mp, struct ether_vlan_header *);
3535 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3536 KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
3537 etype = ntohs(eh->evl_proto);
3538 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3539 } else {
3540 etype = ntohs(eh->evl_encap_proto);
3541 ehdrlen = ETHER_HDR_LEN;
3542 }
3543
3544 /* Set the ether header length */
3545 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
3546
3547 switch (etype) {
3548 case ETHERTYPE_IP:
3549 m_copydata(mp, ehdrlen, sizeof(ip), &ip);
3550 ip_hlen = ip.ip_hl << 2;
3551 ipproto = ip.ip_p;
3552 #if 0
3553 ip.ip_sum = 0;
3554 m_copyback(mp, ehdrlen, sizeof(ip), &ip);
3555 #else
3556 KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
3557 ip.ip_sum == 0);
3558 #endif
3559 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
3560 break;
3561 case ETHERTYPE_IPV6:
3562 m_copydata(mp, ehdrlen, sizeof(ip6), &ip6);
3563 ip_hlen = sizeof(ip6);
3564 /* XXX-BZ this will go badly in case of ext hdrs. */
3565 ipproto = ip6.ip6_nxt;
3566 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
3567 break;
3568 default:
3569 break;
3570 }
3571
3572 if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
3573 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
3574
3575 vlan_macip_lens |= ip_hlen;
3576 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
3577
3578 if (mp->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_TCPv6)) {
3579 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
3580 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
3581 KASSERT(ipproto == IPPROTO_TCP);
3582 } else if (mp->m_pkthdr.csum_flags & (M_CSUM_UDPv4|M_CSUM_UDPv6)) {
3583 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
3584 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
3585 KASSERT(ipproto == IPPROTO_UDP);
3586 }
3587
3588 /* Now copy bits into descriptor */
3589 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3590 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3591 TXD->seqnum_seed = htole32(0);
3592 TXD->mss_l4len_idx = htole32(0);
3593
3594 /* We've consumed the first desc, adjust counters */
3595 if (++ctxd == txr->num_desc)
3596 ctxd = 0;
3597 txr->next_avail_desc = ctxd;
3598 --txr->tx_avail;
3599
3600 return 0;
3601 }
3602
3603 /**********************************************************************
3604 *
3605 * Setup work for hardware segmentation offload (TSO) on
3606 * adapters using advanced tx descriptors
3607 *
3608 **********************************************************************/
3609 static int
3610 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
3611 u32 *cmd_type_len, u32 *olinfo_status)
3612 {
3613 struct m_tag *mtag;
3614 struct adapter *adapter = txr->adapter;
3615 struct ethercom *ec = &adapter->osdep.ec;
3616 struct ixgbe_adv_tx_context_desc *TXD;
3617 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3618 u32 mss_l4len_idx = 0, paylen;
3619 u16 vtag = 0, eh_type;
3620 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3621 struct ether_vlan_header *eh;
3622 #ifdef INET6
3623 struct ip6_hdr *ip6;
3624 #endif
3625 #ifdef INET
3626 struct ip *ip;
3627 #endif
3628 struct tcphdr *th;
3629
3630
3631 /*
3632 * Determine where frame payload starts.
3633 * Jump over vlan headers if already present
3634 */
3635 eh = mtod(mp, struct ether_vlan_header *);
3636 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3637 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3638 eh_type = eh->evl_proto;
3639 } else {
3640 ehdrlen = ETHER_HDR_LEN;
3641 eh_type = eh->evl_encap_proto;
3642 }
3643
3644 switch (ntohs(eh_type)) {
3645 #ifdef INET6
3646 case ETHERTYPE_IPV6:
3647 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3648 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
3649 if (ip6->ip6_nxt != IPPROTO_TCP)
3650 return (ENXIO);
3651 ip_hlen = sizeof(struct ip6_hdr);
3652 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3653 th = (struct tcphdr *)((char *)ip6 + ip_hlen);
3654 th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
3655 &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
3656 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
3657 break;
3658 #endif
3659 #ifdef INET
3660 case ETHERTYPE_IP:
3661 ip = (struct ip *)(mp->m_data + ehdrlen);
3662 if (ip->ip_p != IPPROTO_TCP)
3663 return (ENXIO);
3664 ip->ip_sum = 0;
3665 ip_hlen = ip->ip_hl << 2;
3666 th = (struct tcphdr *)((char *)ip + ip_hlen);
3667 th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
3668 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3669 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
3670 /* Tell transmit desc to also do IPv4 checksum. */
3671 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
3672 break;
3673 #endif
3674 default:
3675 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3676 __func__, ntohs(eh_type));
3677 break;
3678 }
3679
3680 ctxd = txr->next_avail_desc;
3681 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
3682
3683 tcp_hlen = th->th_off << 2;
3684
3685 /* This is used in the transmit desc in encap */
3686 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3687
3688 /* VLAN MACLEN IPLEN */
3689 if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
3690 vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
3691 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
3692 }
3693
3694 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
3695 vlan_macip_lens |= ip_hlen;
3696 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3697
3698 /* ADV DTYPE TUCMD */
3699 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
3700 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
3701 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3702
3703 /* MSS L4LEN IDX */
3704 mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
3705 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
3706 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3707
3708 TXD->seqnum_seed = htole32(0);
3709
3710 if (++ctxd == txr->num_desc)
3711 ctxd = 0;
3712
3713 txr->tx_avail--;
3714 txr->next_avail_desc = ctxd;
3715 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
3716 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
3717 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
3718 ++txr->tso_tx.ev_count;
3719 return (0);
3720 }
3721
3722 #ifdef IXGBE_FDIR
3723 /*
3724 ** This routine parses packet headers so that Flow
3725 ** Director can make a hashed filter table entry
3726 ** allowing traffic flows to be identified and kept
3727 ** on the same cpu. This would be a performance
3728 ** hit, but we only do it at IXGBE_FDIR_RATE of
3729 ** packets.
3730 */
3731 static void
3732 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
3733 {
3734 struct adapter *adapter = txr->adapter;
3735 struct ix_queue *que;
3736 struct ip *ip;
3737 struct tcphdr *th;
3738 struct udphdr *uh;
3739 struct ether_vlan_header *eh;
3740 union ixgbe_atr_hash_dword input = {.dword = 0};
3741 union ixgbe_atr_hash_dword common = {.dword = 0};
3742 int ehdrlen, ip_hlen;
3743 u16 etype;
3744
3745 eh = mtod(mp, struct ether_vlan_header *);
3746 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3747 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3748 etype = eh->evl_proto;
3749 } else {
3750 ehdrlen = ETHER_HDR_LEN;
3751 etype = eh->evl_encap_proto;
3752 }
3753
3754 /* Only handling IPv4 */
3755 if (etype != htons(ETHERTYPE_IP))
3756 return;
3757
3758 ip = (struct ip *)(mp->m_data + ehdrlen);
3759 ip_hlen = ip->ip_hl << 2;
3760
3761 /* check if we're UDP or TCP */
3762 switch (ip->ip_p) {
3763 case IPPROTO_TCP:
3764 th = (struct tcphdr *)((char *)ip + ip_hlen);
3765 /* src and dst are inverted */
3766 common.port.dst ^= th->th_sport;
3767 common.port.src ^= th->th_dport;
3768 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
3769 break;
3770 case IPPROTO_UDP:
3771 uh = (struct udphdr *)((char *)ip + ip_hlen);
3772 /* src and dst are inverted */
3773 common.port.dst ^= uh->uh_sport;
3774 common.port.src ^= uh->uh_dport;
3775 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
3776 break;
3777 default:
3778 return;
3779 }
3780
3781 input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
3782 if (mp->m_pkthdr.ether_vtag)
3783 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
3784 else
3785 common.flex_bytes ^= etype;
3786 common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
3787
3788 que = &adapter->queues[txr->me];
3789 /*
3790 ** This assumes the Rx queue and Tx
3791 ** queue are bound to the same CPU
3792 */
3793 ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
3794 input, common, que->msix);
3795 }
3796 #endif /* IXGBE_FDIR */
3797
3798 /**********************************************************************
3799 *
3800 * Examine each tx_buffer in the used queue. If the hardware is done
3801 * processing the packet then free associated resources. The
3802 * tx_buffer is put back on the free queue.
3803 *
3804 **********************************************************************/
3805 static bool
3806 ixgbe_txeof(struct tx_ring *txr)
3807 {
3808 struct adapter *adapter = txr->adapter;
3809 struct ifnet *ifp = adapter->ifp;
3810 u32 work, processed = 0;
3811 u16 limit = txr->process_limit;
3812 struct ixgbe_tx_buf *buf;
3813 union ixgbe_adv_tx_desc *txd;
3814 struct timeval now, elapsed;
3815
3816 KASSERT(mutex_owned(&txr->tx_mtx));
3817
3818 #ifdef DEV_NETMAP
3819 if (ifp->if_capenable & IFCAP_NETMAP) {
3820 struct netmap_adapter *na = NA(ifp);
3821 struct netmap_kring *kring = &na->tx_rings[txr->me];
3822 txd = txr->tx_base;
3823 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3824 BUS_DMASYNC_POSTREAD);
3825 /*
3826 * In netmap mode, all the work is done in the context
3827 * of the client thread. Interrupt handlers only wake up
3828 * clients, which may be sleeping on individual rings
3829 * or on a global resource for all rings.
3830 * To implement tx interrupt mitigation, we wake up the client
3831 * thread roughly every half ring, even if the NIC interrupts
3832 * more frequently. This is implemented as follows:
3833 * - ixgbe_txsync() sets kring->nr_kflags with the index of
3834 * the slot that should wake up the thread (nkr_num_slots
3835 * means the user thread should not be woken up);
3836 * - the driver ignores tx interrupts unless netmap_mitigate=0
3837 * or the slot has the DD bit set.
3838 *
3839 * When the driver has separate locks, we need to
3840 * release and re-acquire txlock to avoid deadlocks.
3841 * XXX see if we can find a better way.
3842 */
3843 if (!netmap_mitigate ||
3844 (kring->nr_kflags < kring->nkr_num_slots &&
3845 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
3846 netmap_tx_irq(ifp, txr->me |
3847 (NETMAP_LOCKED_ENTER|NETMAP_LOCKED_EXIT));
3848 }
3849 return FALSE;
3850 }
3851 #endif /* DEV_NETMAP */
3852
3853 if (txr->tx_avail == txr->num_desc) {
3854 txr->queue_status = IXGBE_QUEUE_IDLE;
3855 return false;
3856 }
3857
3858 /* Get work starting point */
3859 work = txr->next_to_clean;
3860 buf = &txr->tx_buffers[work];
3861 txd = &txr->tx_base[work];
3862 work -= txr->num_desc; /* The distance to ring end */
3863 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3864 BUS_DMASYNC_POSTREAD);
3865 do {
3866 union ixgbe_adv_tx_desc *eop= buf->eop;
3867 if (eop == NULL) /* No work */
3868 break;
3869
3870 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
3871 break; /* I/O not complete */
3872
3873 if (buf->m_head) {
3874 txr->bytes +=
3875 buf->m_head->m_pkthdr.len;
3876 bus_dmamap_sync(txr->txtag->dt_dmat,
3877 buf->map,
3878 0, buf->m_head->m_pkthdr.len,
3879 BUS_DMASYNC_POSTWRITE);
3880 ixgbe_dmamap_unload(txr->txtag,
3881 buf->map);
3882 m_freem(buf->m_head);
3883 buf->m_head = NULL;
3884 /*
3885 * NetBSD: Don't override buf->map with NULL here.
3886 * It'll panic when a ring runs one lap around.
3887 */
3888 }
3889 buf->eop = NULL;
3890 ++txr->tx_avail;
3891
3892 /* We clean the range if multi segment */
3893 while (txd != eop) {
3894 ++txd;
3895 ++buf;
3896 ++work;
3897 /* wrap the ring? */
3898 if (__predict_false(!work)) {
3899 work -= txr->num_desc;
3900 buf = txr->tx_buffers;
3901 txd = txr->tx_base;
3902 }
3903 if (buf->m_head) {
3904 txr->bytes +=
3905 buf->m_head->m_pkthdr.len;
3906 bus_dmamap_sync(txr->txtag->dt_dmat,
3907 buf->map,
3908 0, buf->m_head->m_pkthdr.len,
3909 BUS_DMASYNC_POSTWRITE);
3910 ixgbe_dmamap_unload(txr->txtag,
3911 buf->map);
3912 m_freem(buf->m_head);
3913 buf->m_head = NULL;
3914 /*
3915 * NetBSD: Don't override buf->map with NULL
3916 * here. It'll panic when a ring runs one lap
3917 * around.
3918 */
3919 }
3920 ++txr->tx_avail;
3921 buf->eop = NULL;
3922
3923 }
3924 ++txr->packets;
3925 ++processed;
3926 ++ifp->if_opackets;
3927 getmicrotime(&txr->watchdog_time);
3928
3929 /* Try the next packet */
3930 ++txd;
3931 ++buf;
3932 ++work;
3933 /* reset with a wrap */
3934 if (__predict_false(!work)) {
3935 work -= txr->num_desc;
3936 buf = txr->tx_buffers;
3937 txd = txr->tx_base;
3938 }
3939 prefetch(txd);
3940 } while (__predict_true(--limit));
3941
3942 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3943 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3944
3945 work += txr->num_desc;
3946 txr->next_to_clean = work;
3947
3948 /*
3949 ** Watchdog calculation, we know there's
3950 ** work outstanding or the first return
3951 ** would have been taken, so none processed
3952 ** for too long indicates a hang.
3953 */
3954 getmicrotime(&now);
3955 timersub(&now, &txr->watchdog_time, &elapsed);
3956 if (!processed && tvtohz(&elapsed) > IXGBE_WATCHDOG)
3957 txr->queue_status = IXGBE_QUEUE_HUNG;
3958
3959 if (txr->tx_avail == txr->num_desc) {
3960 txr->queue_status = IXGBE_QUEUE_IDLE;
3961 return false;
3962 }
3963
3964 return true;
3965 }
3966
3967 /*********************************************************************
3968 *
3969 * Refresh mbuf buffers for RX descriptor rings
3970 * - now keeps its own state so discards due to resource
3971 * exhaustion are unnecessary, if an mbuf cannot be obtained
3972 * it just returns, keeping its placeholder, thus it can simply
3973 * be recalled to try again.
3974 *
3975 **********************************************************************/
3976 static void
3977 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
3978 {
3979 struct adapter *adapter = rxr->adapter;
3980 struct ixgbe_rx_buf *rxbuf;
3981 struct mbuf *mp;
3982 int i, j, error;
3983 bool refreshed = false;
3984
3985 i = j = rxr->next_to_refresh;
3986 /* Control the loop with one beyond */
3987 if (++j == rxr->num_desc)
3988 j = 0;
3989
3990 while (j != limit) {
3991 rxbuf = &rxr->rx_buffers[i];
3992 if (rxbuf->buf == NULL) {
3993 mp = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
3994 MT_DATA, M_PKTHDR, rxr->mbuf_sz);
3995 if (mp == NULL) {
3996 rxr->no_jmbuf.ev_count++;
3997 goto update;
3998 }
3999 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
4000 m_adj(mp, ETHER_ALIGN);
4001 } else
4002 mp = rxbuf->buf;
4003
4004 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
4005 /* If we're dealing with an mbuf that was copied rather
4006 * than replaced, there's no need to go through busdma.
4007 */
4008 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
4009 /* Get the memory mapping */
4010 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
4011 rxbuf->pmap, mp, BUS_DMA_NOWAIT);
4012 if (error != 0) {
4013 printf("Refresh mbufs: payload dmamap load"
4014 " failure - %d\n", error);
4015 m_free(mp);
4016 rxbuf->buf = NULL;
4017 goto update;
4018 }
4019 rxbuf->buf = mp;
4020 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
4021 0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
4022 rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
4023 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
4024 } else {
4025 rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
4026 rxbuf->flags &= ~IXGBE_RX_COPY;
4027 }
4028
4029 refreshed = true;
4030 /* Next is precalculated */
4031 i = j;
4032 rxr->next_to_refresh = i;
4033 if (++j == rxr->num_desc)
4034 j = 0;
4035 }
4036 update:
4037 if (refreshed) /* Update hardware tail index */
4038 IXGBE_WRITE_REG(&adapter->hw,
4039 IXGBE_RDT(rxr->me), rxr->next_to_refresh);
4040 return;
4041 }
4042
4043 /*********************************************************************
4044 *
4045 * Allocate memory for rx_buffer structures. Since we use one
4046 * rx_buffer per received packet, the maximum number of rx_buffer's
4047 * that we'll need is equal to the number of receive descriptors
4048 * that we've allocated.
4049 *
4050 **********************************************************************/
4051 static int
4052 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
4053 {
4054 struct adapter *adapter = rxr->adapter;
4055 device_t dev = adapter->dev;
4056 struct ixgbe_rx_buf *rxbuf;
4057 int i, bsize, error;
4058
4059 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
4060 if (!(rxr->rx_buffers =
4061 (struct ixgbe_rx_buf *) malloc(bsize,
4062 M_DEVBUF, M_NOWAIT | M_ZERO))) {
4063 aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
4064 error = ENOMEM;
4065 goto fail;
4066 }
4067
4068 if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */
4069 1, 0, /* alignment, bounds */
4070 MJUM16BYTES, /* maxsize */
4071 1, /* nsegments */
4072 MJUM16BYTES, /* maxsegsize */
4073 0, /* flags */
4074 &rxr->ptag))) {
4075 aprint_error_dev(dev, "Unable to create RX DMA tag\n");
4076 goto fail;
4077 }
4078
4079 for (i = 0; i < rxr->num_desc; i++, rxbuf++) {
4080 rxbuf = &rxr->rx_buffers[i];
4081 error = ixgbe_dmamap_create(rxr->ptag,
4082 BUS_DMA_NOWAIT, &rxbuf->pmap);
4083 if (error) {
4084 aprint_error_dev(dev, "Unable to create RX dma map\n");
4085 goto fail;
4086 }
4087 }
4088
4089 return (0);
4090
4091 fail:
4092 /* Frees all, but can handle partial completion */
4093 ixgbe_free_receive_structures(adapter);
4094 return (error);
4095 }
4096
4097 /*
4098 ** Used to detect a descriptor that has
4099 ** been merged by Hardware RSC.
4100 */
4101 static inline u32
4102 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
4103 {
4104 return (le32toh(rx->wb.lower.lo_dword.data) &
4105 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
4106 }
4107
4108 /*********************************************************************
4109 *
4110 * Initialize Hardware RSC (LRO) feature on 82599
4111 * for an RX ring, this is toggled by the LRO capability
4112 * even though it is transparent to the stack.
4113 *
4114 * NOTE: since this HW feature only works with IPV4 and
4115 * our testing has shown soft LRO to be as effective
4116 * I have decided to disable this by default.
4117 *
4118 **********************************************************************/
4119 static void
4120 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
4121 {
4122 struct adapter *adapter = rxr->adapter;
4123 struct ixgbe_hw *hw = &adapter->hw;
4124 u32 rscctrl, rdrxctl;
4125
4126 /* If turning LRO/RSC off we need to disable it */
4127 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
4128 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
4129 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
4130 return;
4131 }
4132
4133 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4134 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
4135 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
4136 if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
4137 #endif /* DEV_NETMAP */
4138 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
4139 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4140 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4141
4142 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
4143 rscctrl |= IXGBE_RSCCTL_RSCEN;
4144 /*
4145 ** Limit the total number of descriptors that
4146 ** can be combined, so it does not exceed 64K
4147 */
4148 if (rxr->mbuf_sz == MCLBYTES)
4149 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
4150 else if (rxr->mbuf_sz == MJUMPAGESIZE)
4151 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
4152 else if (rxr->mbuf_sz == MJUM9BYTES)
4153 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
4154 else /* Using 16K cluster */
4155 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
4156
4157 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
4158
4159 /* Enable TCP header recognition */
4160 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
4161 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
4162 IXGBE_PSRTYPE_TCPHDR));
4163
4164 /* Disable RSC for ACK packets */
4165 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
4166 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
4167
4168 rxr->hw_rsc = TRUE;
4169 }
4170
4171
4172 static void
4173 ixgbe_free_receive_ring(struct rx_ring *rxr)
4174 {
4175 struct ixgbe_rx_buf *rxbuf;
4176 int i;
4177
4178 for (i = 0; i < rxr->num_desc; i++) {
4179 rxbuf = &rxr->rx_buffers[i];
4180 if (rxbuf->buf != NULL) {
4181 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
4182 0, rxbuf->buf->m_pkthdr.len,
4183 BUS_DMASYNC_POSTREAD);
4184 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
4185 rxbuf->buf->m_flags |= M_PKTHDR;
4186 m_freem(rxbuf->buf);
4187 rxbuf->buf = NULL;
4188 }
4189 }
4190 }
4191
4192
4193 /*********************************************************************
4194 *
4195 * Initialize a receive ring and its buffers.
4196 *
4197 **********************************************************************/
4198 static int
4199 ixgbe_setup_receive_ring(struct rx_ring *rxr)
4200 {
4201 struct adapter *adapter;
4202 struct ixgbe_rx_buf *rxbuf;
4203 #ifdef LRO
4204 struct ifnet *ifp;
4205 struct lro_ctrl *lro = &rxr->lro;
4206 #endif /* LRO */
4207 int rsize, error = 0;
4208 #ifdef DEV_NETMAP
4209 struct netmap_adapter *na = NA(rxr->adapter->ifp);
4210 struct netmap_slot *slot;
4211 #endif /* DEV_NETMAP */
4212
4213 adapter = rxr->adapter;
4214 #ifdef LRO
4215 ifp = adapter->ifp;
4216 #endif /* LRO */
4217
4218 /* Clear the ring contents */
4219 IXGBE_RX_LOCK(rxr);
4220 #ifdef DEV_NETMAP
4221 /* same as in ixgbe_setup_transmit_ring() */
4222 slot = netmap_reset(na, NR_RX, rxr->me, 0);
4223 #endif /* DEV_NETMAP */
4224 rsize = roundup2(adapter->num_rx_desc *
4225 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
4226 bzero((void *)rxr->rx_base, rsize);
4227 /* Cache the size */
4228 rxr->mbuf_sz = adapter->rx_mbuf_sz;
4229
4230 /* Free current RX buffer structs and their mbufs */
4231 ixgbe_free_receive_ring(rxr);
4232
4233 IXGBE_RX_UNLOCK(rxr);
4234
4235 /* Now reinitialize our supply of jumbo mbufs. The number
4236 * or size of jumbo mbufs may have changed.
4237 */
4238 ixgbe_jcl_reinit(&adapter->jcl_head, rxr->ptag->dt_dmat,
4239 2 * adapter->num_rx_desc, adapter->rx_mbuf_sz);
4240
4241 IXGBE_RX_LOCK(rxr);
4242
4243 /* Now replenish the mbufs */
4244 for (int j = 0; j != rxr->num_desc; ++j) {
4245 struct mbuf *mp;
4246
4247 rxbuf = &rxr->rx_buffers[j];
4248 #ifdef DEV_NETMAP
4249 /*
4250 * In netmap mode, fill the map and set the buffer
4251 * address in the NIC ring, considering the offset
4252 * between the netmap and NIC rings (see comment in
4253 * ixgbe_setup_transmit_ring() ). No need to allocate
4254 * an mbuf, so end the block with a continue;
4255 */
4256 if (slot) {
4257 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4258 uint64_t paddr;
4259 void *addr;
4260
4261 addr = PNMB(slot + sj, &paddr);
4262 netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
4263 /* Update descriptor */
4264 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4265 continue;
4266 }
4267 #endif /* DEV_NETMAP */
4268 rxbuf->buf = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
4269 MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
4270 if (rxbuf->buf == NULL) {
4271 error = ENOBUFS;
4272 goto fail;
4273 }
4274 mp = rxbuf->buf;
4275 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
4276 /* Get the memory mapping */
4277 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
4278 rxbuf->pmap, mp, BUS_DMA_NOWAIT);
4279 if (error != 0)
4280 goto fail;
4281 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
4282 0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
4283 /* Update descriptor */
4284 rxr->rx_base[j].read.pkt_addr =
4285 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
4286 }
4287
4288
4289 /* Setup our descriptor indices */
4290 rxr->next_to_check = 0;
4291 rxr->next_to_refresh = 0;
4292 rxr->lro_enabled = FALSE;
4293 rxr->rx_copies.ev_count = 0;
4294 rxr->rx_bytes.ev_count = 0;
4295 rxr->discard = FALSE;
4296 rxr->vtag_strip = FALSE;
4297
4298 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4299 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4300
4301 /*
4302 ** Now set up the LRO interface:
4303 */
4304 if (ixgbe_rsc_enable)
4305 ixgbe_setup_hw_rsc(rxr);
4306 #ifdef LRO
4307 else if (ifp->if_capenable & IFCAP_LRO) {
4308 device_t dev = adapter->dev;
4309 int err = tcp_lro_init(lro);
4310 if (err) {
4311 device_printf(dev, "LRO Initialization failed!\n");
4312 goto fail;
4313 }
4314 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
4315 rxr->lro_enabled = TRUE;
4316 lro->ifp = adapter->ifp;
4317 }
4318 #endif /* LRO */
4319
4320 IXGBE_RX_UNLOCK(rxr);
4321 return (0);
4322
4323 fail:
4324 ixgbe_free_receive_ring(rxr);
4325 IXGBE_RX_UNLOCK(rxr);
4326 return (error);
4327 }
4328
4329 /*********************************************************************
4330 *
4331 * Initialize all receive rings.
4332 *
4333 **********************************************************************/
4334 static int
4335 ixgbe_setup_receive_structures(struct adapter *adapter)
4336 {
4337 struct rx_ring *rxr = adapter->rx_rings;
4338 int j;
4339
4340 for (j = 0; j < adapter->num_queues; j++, rxr++)
4341 if (ixgbe_setup_receive_ring(rxr))
4342 goto fail;
4343
4344 return (0);
4345 fail:
4346 /*
4347 * Free RX buffers allocated so far, we will only handle
4348 * the rings that completed, the failing case will have
4349 * cleaned up for itself. 'j' failed, so its the terminus.
4350 */
4351 for (int i = 0; i < j; ++i) {
4352 rxr = &adapter->rx_rings[i];
4353 ixgbe_free_receive_ring(rxr);
4354 }
4355
4356 return (ENOBUFS);
4357 }
4358
4359 /*********************************************************************
4360 *
4361 * Setup receive registers and features.
4362 *
4363 **********************************************************************/
4364 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
4365
4366 #define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
4367
4368 static void
4369 ixgbe_initialize_receive_units(struct adapter *adapter)
4370 {
4371 int i;
4372 struct rx_ring *rxr = adapter->rx_rings;
4373 struct ixgbe_hw *hw = &adapter->hw;
4374 struct ifnet *ifp = adapter->ifp;
4375 u32 bufsz, rxctrl, fctrl, srrctl, rxcsum;
4376 u32 reta, mrqc = 0, hlreg, r[10];
4377
4378
4379 /*
4380 * Make sure receives are disabled while
4381 * setting up the descriptor ring
4382 */
4383 rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4384 IXGBE_WRITE_REG(hw, IXGBE_RXCTRL,
4385 rxctrl & ~IXGBE_RXCTRL_RXEN);
4386
4387 /* Enable broadcasts */
4388 fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4389 fctrl |= IXGBE_FCTRL_BAM;
4390 fctrl |= IXGBE_FCTRL_DPF;
4391 fctrl |= IXGBE_FCTRL_PMCF;
4392 IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4393
4394 /* Set for Jumbo Frames? */
4395 hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4396 if (ifp->if_mtu > ETHERMTU)
4397 hlreg |= IXGBE_HLREG0_JUMBOEN;
4398 else
4399 hlreg &= ~IXGBE_HLREG0_JUMBOEN;
4400 #ifdef DEV_NETMAP
4401 /* crcstrip is conditional in netmap (in RDRXCTL too ?) */
4402 if (ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
4403 hlreg &= ~IXGBE_HLREG0_RXCRCSTRP;
4404 else
4405 hlreg |= IXGBE_HLREG0_RXCRCSTRP;
4406 #endif /* DEV_NETMAP */
4407 IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
4408
4409 bufsz = (adapter->rx_mbuf_sz +
4410 BSIZEPKT_ROUNDUP) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
4411
4412 for (i = 0; i < adapter->num_queues; i++, rxr++) {
4413 u64 rdba = rxr->rxdma.dma_paddr;
4414
4415 /* Setup the Base and Length of the Rx Descriptor Ring */
4416 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i),
4417 (rdba & 0x00000000ffffffffULL));
4418 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (rdba >> 32));
4419 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
4420 adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4421
4422 /* Set up the SRRCTL register */
4423 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
4424 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4425 srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
4426 srrctl |= bufsz;
4427 srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4428 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
4429
4430 /* Setup the HW Rx Head and Tail Descriptor Pointers */
4431 IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
4432 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
4433
4434 /* Set the processing limit */
4435 rxr->process_limit = ixgbe_rx_process_limit;
4436 }
4437
4438 if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
4439 u32 psrtype = IXGBE_PSRTYPE_TCPHDR |
4440 IXGBE_PSRTYPE_UDPHDR |
4441 IXGBE_PSRTYPE_IPV4HDR |
4442 IXGBE_PSRTYPE_IPV6HDR;
4443 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype);
4444 }
4445
4446 rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4447
4448 /* Setup RSS */
4449 if (adapter->num_queues > 1) {
4450 int j;
4451 reta = 0;
4452
4453 /* set up random bits */
4454 cprng_fast(&r, sizeof(r));
4455
4456 /* Set up the redirection table */
4457 for (i = 0, j = 0; i < 128; i++, j++) {
4458 if (j == adapter->num_queues) j = 0;
4459 reta = (reta << 8) | (j * 0x11);
4460 if ((i & 3) == 3)
4461 IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta);
4462 }
4463
4464 /* Now fill our hash function seeds */
4465 for (i = 0; i < 10; i++)
4466 IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), r[i]);
4467
4468 /* Perform hash on these packet types */
4469 mrqc = IXGBE_MRQC_RSSEN
4470 | IXGBE_MRQC_RSS_FIELD_IPV4
4471 | IXGBE_MRQC_RSS_FIELD_IPV4_TCP
4472 | IXGBE_MRQC_RSS_FIELD_IPV4_UDP
4473 | IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP
4474 | IXGBE_MRQC_RSS_FIELD_IPV6_EX
4475 | IXGBE_MRQC_RSS_FIELD_IPV6
4476 | IXGBE_MRQC_RSS_FIELD_IPV6_TCP
4477 | IXGBE_MRQC_RSS_FIELD_IPV6_UDP
4478 | IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
4479 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4480
4481 /* RSS and RX IPP Checksum are mutually exclusive */
4482 rxcsum |= IXGBE_RXCSUM_PCSD;
4483 }
4484
4485 if (ifp->if_capenable & IFCAP_RXCSUM)
4486 rxcsum |= IXGBE_RXCSUM_PCSD;
4487
4488 if (!(rxcsum & IXGBE_RXCSUM_PCSD))
4489 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4490
4491 IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4492
4493 return;
4494 }
4495
4496 /*********************************************************************
4497 *
4498 * Free all receive rings.
4499 *
4500 **********************************************************************/
4501 static void
4502 ixgbe_free_receive_structures(struct adapter *adapter)
4503 {
4504 struct rx_ring *rxr = adapter->rx_rings;
4505
4506 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4507 #ifdef LRO
4508 struct lro_ctrl *lro = &rxr->lro;
4509 #endif /* LRO */
4510 ixgbe_free_receive_buffers(rxr);
4511 #ifdef LRO
4512 /* Free LRO memory */
4513 tcp_lro_free(lro);
4514 #endif /* LRO */
4515 /* Free the ring memory as well */
4516 ixgbe_dma_free(adapter, &rxr->rxdma);
4517 IXGBE_RX_LOCK_DESTROY(rxr);
4518 }
4519
4520 free(adapter->rx_rings, M_DEVBUF);
4521 }
4522
4523
4524 /*********************************************************************
4525 *
4526 * Free receive ring data structures
4527 *
4528 **********************************************************************/
4529 static void
4530 ixgbe_free_receive_buffers(struct rx_ring *rxr)
4531 {
4532 struct adapter *adapter = rxr->adapter;
4533 struct ixgbe_rx_buf *rxbuf;
4534
4535 INIT_DEBUGOUT("free_receive_structures: begin");
4536
4537 /* Cleanup any existing buffers */
4538 if (rxr->rx_buffers != NULL) {
4539 for (int i = 0; i < adapter->num_rx_desc; i++) {
4540 rxbuf = &rxr->rx_buffers[i];
4541 if (rxbuf->buf != NULL) {
4542 bus_dmamap_sync(rxr->ptag->dt_dmat,
4543 rxbuf->pmap, 0, rxbuf->buf->m_pkthdr.len,
4544 BUS_DMASYNC_POSTREAD);
4545 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
4546 rxbuf->buf->m_flags |= M_PKTHDR;
4547 m_freem(rxbuf->buf);
4548 }
4549 rxbuf->buf = NULL;
4550 if (rxbuf->pmap != NULL) {
4551 ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4552 rxbuf->pmap = NULL;
4553 }
4554 }
4555 if (rxr->rx_buffers != NULL) {
4556 free(rxr->rx_buffers, M_DEVBUF);
4557 rxr->rx_buffers = NULL;
4558 }
4559 }
4560
4561 if (rxr->ptag != NULL) {
4562 ixgbe_dma_tag_destroy(rxr->ptag);
4563 rxr->ptag = NULL;
4564 }
4565
4566 return;
4567 }
4568
4569 static __inline void
4570 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4571 {
4572 int s;
4573
4574 #ifdef LRO
4575 struct adapter *adapter = ifp->if_softc;
4576 struct ethercom *ec = &adapter->osdep.ec;
4577
4578 /*
4579 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
4580 * should be computed by hardware. Also it should not have VLAN tag in
4581 * ethernet header. In case of IPv6 we do not yet support ext. hdrs.
4582 */
4583 if (rxr->lro_enabled &&
4584 (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
4585 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
4586 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
4587 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
4588 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
4589 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
4590 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4591 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4592 /*
4593 * Send to the stack if:
4594 ** - LRO not enabled, or
4595 ** - no LRO resources, or
4596 ** - lro enqueue fails
4597 */
4598 if (rxr->lro.lro_cnt != 0)
4599 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4600 return;
4601 }
4602 #endif /* LRO */
4603
4604 IXGBE_RX_UNLOCK(rxr);
4605
4606 s = splnet();
4607 /* Pass this up to any BPF listeners. */
4608 bpf_mtap(ifp, m);
4609 (*ifp->if_input)(ifp, m);
4610 splx(s);
4611
4612 IXGBE_RX_LOCK(rxr);
4613 }
4614
4615 static __inline void
4616 ixgbe_rx_discard(struct rx_ring *rxr, int i)
4617 {
4618 struct ixgbe_rx_buf *rbuf;
4619
4620 rbuf = &rxr->rx_buffers[i];
4621
4622 if (rbuf->fmp != NULL) {/* Partial chain ? */
4623 rbuf->fmp->m_flags |= M_PKTHDR;
4624 m_freem(rbuf->fmp);
4625 rbuf->fmp = NULL;
4626 }
4627
4628 /*
4629 ** With advanced descriptors the writeback
4630 ** clobbers the buffer addrs, so its easier
4631 ** to just free the existing mbufs and take
4632 ** the normal refresh path to get new buffers
4633 ** and mapping.
4634 */
4635 if (rbuf->buf) {
4636 m_free(rbuf->buf);
4637 rbuf->buf = NULL;
4638 }
4639
4640 return;
4641 }
4642
4643
4644 /*********************************************************************
4645 *
4646 * This routine executes in interrupt context. It replenishes
4647 * the mbufs in the descriptor and sends data which has been
4648 * dma'ed into host memory to upper layer.
4649 *
4650 * We loop at most count times if count is > 0, or until done if
4651 * count < 0.
4652 *
4653 * Return TRUE for more work, FALSE for all clean.
4654 *********************************************************************/
4655 static bool
4656 ixgbe_rxeof(struct ix_queue *que)
4657 {
4658 struct adapter *adapter = que->adapter;
4659 struct rx_ring *rxr = que->rxr;
4660 struct ifnet *ifp = adapter->ifp;
4661 #ifdef LRO
4662 struct lro_ctrl *lro = &rxr->lro;
4663 struct lro_entry *queued;
4664 #endif /* LRO */
4665 int i, nextp, processed = 0;
4666 u32 staterr = 0;
4667 u16 count = rxr->process_limit;
4668 union ixgbe_adv_rx_desc *cur;
4669 struct ixgbe_rx_buf *rbuf, *nbuf;
4670
4671 IXGBE_RX_LOCK(rxr);
4672
4673 #ifdef DEV_NETMAP
4674 /* Same as the txeof routine: wakeup clients on intr. */
4675 if (netmap_rx_irq(ifp, rxr->me | NETMAP_LOCKED_ENTER, &processed))
4676 return (FALSE);
4677 #endif /* DEV_NETMAP */
4678 for (i = rxr->next_to_check; count != 0;) {
4679 struct mbuf *sendmp, *mp;
4680 u32 rsc, ptype;
4681 u16 len;
4682 u16 vtag = 0;
4683 bool eop;
4684
4685 /* Sync the ring. */
4686 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4687 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4688
4689 cur = &rxr->rx_base[i];
4690 staterr = le32toh(cur->wb.upper.status_error);
4691
4692 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
4693 break;
4694 if ((ifp->if_flags & IFF_RUNNING) == 0)
4695 break;
4696
4697 count--;
4698 sendmp = NULL;
4699 nbuf = NULL;
4700 rsc = 0;
4701 cur->wb.upper.status_error = 0;
4702 rbuf = &rxr->rx_buffers[i];
4703 mp = rbuf->buf;
4704
4705 len = le16toh(cur->wb.upper.length);
4706 ptype = le32toh(cur->wb.lower.lo_dword.data) &
4707 IXGBE_RXDADV_PKTTYPE_MASK;
4708 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
4709
4710 /* Make sure bad packets are discarded */
4711 if (((staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) ||
4712 (rxr->discard)) {
4713 rxr->rx_discarded.ev_count++;
4714 if (eop)
4715 rxr->discard = FALSE;
4716 else
4717 rxr->discard = TRUE;
4718 ixgbe_rx_discard(rxr, i);
4719 goto next_desc;
4720 }
4721
4722 /*
4723 ** On 82599 which supports a hardware
4724 ** LRO (called HW RSC), packets need
4725 ** not be fragmented across sequential
4726 ** descriptors, rather the next descriptor
4727 ** is indicated in bits of the descriptor.
4728 ** This also means that we might proceses
4729 ** more than one packet at a time, something
4730 ** that has never been true before, it
4731 ** required eliminating global chain pointers
4732 ** in favor of what we are doing here. -jfv
4733 */
4734 if (!eop) {
4735 /*
4736 ** Figure out the next descriptor
4737 ** of this frame.
4738 */
4739 if (rxr->hw_rsc == TRUE) {
4740 rsc = ixgbe_rsc_count(cur);
4741 rxr->rsc_num += (rsc - 1);
4742 }
4743 if (rsc) { /* Get hardware index */
4744 nextp = ((staterr &
4745 IXGBE_RXDADV_NEXTP_MASK) >>
4746 IXGBE_RXDADV_NEXTP_SHIFT);
4747 } else { /* Just sequential */
4748 nextp = i + 1;
4749 if (nextp == adapter->num_rx_desc)
4750 nextp = 0;
4751 }
4752 nbuf = &rxr->rx_buffers[nextp];
4753 prefetch(nbuf);
4754 }
4755 /*
4756 ** Rather than using the fmp/lmp global pointers
4757 ** we now keep the head of a packet chain in the
4758 ** buffer struct and pass this along from one
4759 ** descriptor to the next, until we get EOP.
4760 */
4761 mp->m_len = len;
4762 /*
4763 ** See if there is a stored head
4764 ** that determines what we are
4765 */
4766 sendmp = rbuf->fmp;
4767
4768 if (sendmp != NULL) { /* secondary frag */
4769 rbuf->buf = rbuf->fmp = NULL;
4770 mp->m_flags &= ~M_PKTHDR;
4771 sendmp->m_pkthdr.len += mp->m_len;
4772 } else {
4773 /*
4774 * Optimize. This might be a small packet,
4775 * maybe just a TCP ACK. Do a fast copy that
4776 * is cache aligned into a new mbuf, and
4777 * leave the old mbuf+cluster for re-use.
4778 */
4779 if (eop && len <= IXGBE_RX_COPY_LEN) {
4780 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
4781 if (sendmp != NULL) {
4782 sendmp->m_data +=
4783 IXGBE_RX_COPY_ALIGN;
4784 ixgbe_bcopy(mp->m_data,
4785 sendmp->m_data, len);
4786 sendmp->m_len = len;
4787 rxr->rx_copies.ev_count++;
4788 rbuf->flags |= IXGBE_RX_COPY;
4789 }
4790 }
4791 if (sendmp == NULL) {
4792 rbuf->buf = rbuf->fmp = NULL;
4793 sendmp = mp;
4794 }
4795
4796 /* first desc of a non-ps chain */
4797 sendmp->m_flags |= M_PKTHDR;
4798 sendmp->m_pkthdr.len = mp->m_len;
4799 }
4800 ++processed;
4801 /* Pass the head pointer on */
4802 if (eop == 0) {
4803 nbuf->fmp = sendmp;
4804 sendmp = NULL;
4805 mp->m_next = nbuf->buf;
4806 } else { /* Sending this frame */
4807 sendmp->m_pkthdr.rcvif = ifp;
4808 ifp->if_ipackets++;
4809 rxr->rx_packets.ev_count++;
4810 /* capture data for AIM */
4811 rxr->bytes += sendmp->m_pkthdr.len;
4812 rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
4813 /* Process vlan info */
4814 if ((rxr->vtag_strip) &&
4815 (staterr & IXGBE_RXD_STAT_VP))
4816 vtag = le16toh(cur->wb.upper.vlan);
4817 if (vtag) {
4818 VLAN_INPUT_TAG(ifp, sendmp, vtag,
4819 printf("%s: could not apply VLAN "
4820 "tag", __func__));
4821 }
4822 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
4823 ixgbe_rx_checksum(staterr, sendmp, ptype,
4824 &adapter->stats);
4825 }
4826 #if __FreeBSD_version >= 800000
4827 sendmp->m_pkthdr.flowid = que->msix;
4828 sendmp->m_flags |= M_FLOWID;
4829 #endif
4830 }
4831 next_desc:
4832 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4833 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4834
4835 /* Advance our pointers to the next descriptor. */
4836 if (++i == rxr->num_desc)
4837 i = 0;
4838
4839 /* Now send to the stack or do LRO */
4840 if (sendmp != NULL) {
4841 rxr->next_to_check = i;
4842 ixgbe_rx_input(rxr, ifp, sendmp, ptype);
4843 i = rxr->next_to_check;
4844 }
4845
4846 /* Every 8 descriptors we go to refresh mbufs */
4847 if (processed == 8) {
4848 ixgbe_refresh_mbufs(rxr, i);
4849 processed = 0;
4850 }
4851 }
4852
4853 /* Refresh any remaining buf structs */
4854 if (ixgbe_rx_unrefreshed(rxr))
4855 ixgbe_refresh_mbufs(rxr, i);
4856
4857 rxr->next_to_check = i;
4858
4859 #ifdef LRO
4860 /*
4861 * Flush any outstanding LRO work
4862 */
4863 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4864 SLIST_REMOVE_HEAD(&lro->lro_active, next);
4865 tcp_lro_flush(lro, queued);
4866 }
4867 #endif /* LRO */
4868
4869 IXGBE_RX_UNLOCK(rxr);
4870
4871 /*
4872 ** We still have cleaning to do?
4873 ** Schedule another interrupt if so.
4874 */
4875 if ((staterr & IXGBE_RXD_STAT_DD) != 0) {
4876 ixgbe_rearm_queues(adapter, (u64)(1ULL << que->msix));
4877 return true;
4878 }
4879
4880 return false;
4881 }
4882
4883
4884 /*********************************************************************
4885 *
4886 * Verify that the hardware indicated that the checksum is valid.
4887 * Inform the stack about the status of checksum so that stack
4888 * doesn't spend time verifying the checksum.
4889 *
4890 *********************************************************************/
4891 static void
4892 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
4893 struct ixgbe_hw_stats *stats)
4894 {
4895 u16 status = (u16) staterr;
4896 u8 errors = (u8) (staterr >> 24);
4897 #if 0
4898 bool sctp = FALSE;
4899
4900 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
4901 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
4902 sctp = TRUE;
4903 #endif
4904
4905 if (status & IXGBE_RXD_STAT_IPCS) {
4906 stats->ipcs.ev_count++;
4907 if (!(errors & IXGBE_RXD_ERR_IPE)) {
4908 /* IP Checksum Good */
4909 mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
4910
4911 } else {
4912 stats->ipcs_bad.ev_count++;
4913 mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
4914 }
4915 }
4916 if (status & IXGBE_RXD_STAT_L4CS) {
4917 stats->l4cs.ev_count++;
4918 u16 type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
4919 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
4920 mp->m_pkthdr.csum_flags |= type;
4921 } else {
4922 stats->l4cs_bad.ev_count++;
4923 mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
4924 }
4925 }
4926 return;
4927 }
4928
4929
4930 #if 0 /* XXX Badly need to overhaul vlan(4) on NetBSD. */
4931 /*
4932 ** This routine is run via an vlan config EVENT,
4933 ** it enables us to use the HW Filter table since
4934 ** we can get the vlan id. This just creates the
4935 ** entry in the soft version of the VFTA, init will
4936 ** repopulate the real table.
4937 */
4938 static void
4939 ixgbe_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4940 {
4941 struct adapter *adapter = ifp->if_softc;
4942 u16 index, bit;
4943
4944 if (ifp->if_softc != arg) /* Not our event */
4945 return;
4946
4947 if ((vtag == 0) || (vtag > 4095)) /* Invalid */
4948 return;
4949
4950 IXGBE_CORE_LOCK(adapter);
4951 index = (vtag >> 5) & 0x7F;
4952 bit = vtag & 0x1F;
4953 adapter->shadow_vfta[index] |= (1 << bit);
4954 ixgbe_init_locked(adapter);
4955 IXGBE_CORE_UNLOCK(adapter);
4956 }
4957
4958 /*
4959 ** This routine is run via an vlan
4960 ** unconfig EVENT, remove our entry
4961 ** in the soft vfta.
4962 */
4963 static void
4964 ixgbe_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4965 {
4966 struct adapter *adapter = ifp->if_softc;
4967 u16 index, bit;
4968
4969 if (ifp->if_softc != arg)
4970 return;
4971
4972 if ((vtag == 0) || (vtag > 4095)) /* Invalid */
4973 return;
4974
4975 IXGBE_CORE_LOCK(adapter);
4976 index = (vtag >> 5) & 0x7F;
4977 bit = vtag & 0x1F;
4978 adapter->shadow_vfta[index] &= ~(1 << bit);
4979 /* Re-init to load the changes */
4980 ixgbe_init_locked(adapter);
4981 IXGBE_CORE_UNLOCK(adapter);
4982 }
4983 #endif
4984
4985 static void
4986 ixgbe_setup_vlan_hw_support(struct adapter *adapter)
4987 {
4988 struct ethercom *ec = &adapter->osdep.ec;
4989 struct ixgbe_hw *hw = &adapter->hw;
4990 struct rx_ring *rxr;
4991 u32 ctrl;
4992
4993 /*
4994 ** We get here thru init_locked, meaning
4995 ** a soft reset, this has already cleared
4996 ** the VFTA and other state, so if there
4997 ** have been no vlan's registered do nothing.
4998 */
4999 if (!VLAN_ATTACHED(&adapter->osdep.ec)) {
5000 return;
5001 }
5002
5003 /*
5004 ** A soft reset zero's out the VFTA, so
5005 ** we need to repopulate it now.
5006 */
5007 for (int i = 0; i < IXGBE_VFTA_SIZE; i++)
5008 if (adapter->shadow_vfta[i] != 0)
5009 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i),
5010 adapter->shadow_vfta[i]);
5011
5012 ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
5013 /* Enable the Filter Table if enabled */
5014 if (ec->ec_capenable & ETHERCAP_VLAN_HWFILTER) {
5015 ctrl &= ~IXGBE_VLNCTRL_CFIEN;
5016 ctrl |= IXGBE_VLNCTRL_VFE;
5017 }
5018 if (hw->mac.type == ixgbe_mac_82598EB)
5019 ctrl |= IXGBE_VLNCTRL_VME;
5020 IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
5021
5022 /* Setup the queues for vlans */
5023 for (int i = 0; i < adapter->num_queues; i++) {
5024 rxr = &adapter->rx_rings[i];
5025 /* On 82599 the VLAN enable is per/queue in RXDCTL */
5026 if (hw->mac.type != ixgbe_mac_82598EB) {
5027 ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
5028 ctrl |= IXGBE_RXDCTL_VME;
5029 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
5030 }
5031 rxr->vtag_strip = TRUE;
5032 }
5033 }
5034
5035 static void
5036 ixgbe_enable_intr(struct adapter *adapter)
5037 {
5038 struct ixgbe_hw *hw = &adapter->hw;
5039 struct ix_queue *que = adapter->queues;
5040 u32 mask, fwsm;
5041
5042 mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
5043 /* Enable Fan Failure detection */
5044 if (hw->device_id == IXGBE_DEV_ID_82598AT)
5045 mask |= IXGBE_EIMS_GPI_SDP1;
5046
5047 switch (adapter->hw.mac.type) {
5048 case ixgbe_mac_82599EB:
5049 mask |= IXGBE_EIMS_ECC;
5050 mask |= IXGBE_EIMS_GPI_SDP0;
5051 mask |= IXGBE_EIMS_GPI_SDP1;
5052 mask |= IXGBE_EIMS_GPI_SDP2;
5053 #ifdef IXGBE_FDIR
5054 mask |= IXGBE_EIMS_FLOW_DIR;
5055 #endif
5056 break;
5057 case ixgbe_mac_X540:
5058 mask |= IXGBE_EIMS_ECC;
5059 /* Detect if Thermal Sensor is enabled */
5060 fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
5061 if (fwsm & IXGBE_FWSM_TS_ENABLED)
5062 mask |= IXGBE_EIMS_TS;
5063 #ifdef IXGBE_FDIR
5064 mask |= IXGBE_EIMS_FLOW_DIR;
5065 #endif
5066 /* falls through */
5067 default:
5068 break;
5069 }
5070
5071 IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
5072
5073 /* With RSS we use auto clear */
5074 if (adapter->msix_mem) {
5075 mask = IXGBE_EIMS_ENABLE_MASK;
5076 /* Don't autoclear Link */
5077 mask &= ~IXGBE_EIMS_OTHER;
5078 mask &= ~IXGBE_EIMS_LSC;
5079 IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask);
5080 }
5081
5082 /*
5083 ** Now enable all queues, this is done separately to
5084 ** allow for handling the extended (beyond 32) MSIX
5085 ** vectors that can be used by 82599
5086 */
5087 for (int i = 0; i < adapter->num_queues; i++, que++)
5088 ixgbe_enable_queue(adapter, que->msix);
5089
5090 IXGBE_WRITE_FLUSH(hw);
5091
5092 return;
5093 }
5094
5095 static void
5096 ixgbe_disable_intr(struct adapter *adapter)
5097 {
5098 if (adapter->msix_mem)
5099 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, 0);
5100 if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
5101 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0);
5102 } else {
5103 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000);
5104 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0);
5105 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0);
5106 }
5107 IXGBE_WRITE_FLUSH(&adapter->hw);
5108 return;
5109 }
5110
5111 u16
5112 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
5113 {
5114 switch (reg % 4) {
5115 case 0:
5116 return pci_conf_read(hw->back->pc, hw->back->tag, reg) &
5117 __BITS(15, 0);
5118 case 2:
5119 return __SHIFTOUT(pci_conf_read(hw->back->pc, hw->back->tag,
5120 reg - 2), __BITS(31, 16));
5121 default:
5122 panic("%s: invalid register (%" PRIx32, __func__, reg);
5123 break;
5124 }
5125 }
5126
5127 void
5128 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
5129 {
5130 pcireg_t old;
5131
5132 switch (reg % 4) {
5133 case 0:
5134 old = pci_conf_read(hw->back->pc, hw->back->tag, reg) &
5135 __BITS(31, 16);
5136 pci_conf_write(hw->back->pc, hw->back->tag, reg, value | old);
5137 break;
5138 case 2:
5139 old = pci_conf_read(hw->back->pc, hw->back->tag, reg - 2) &
5140 __BITS(15, 0);
5141 pci_conf_write(hw->back->pc, hw->back->tag, reg - 2,
5142 __SHIFTIN(value, __BITS(31, 16)) | old);
5143 break;
5144 default:
5145 panic("%s: invalid register (%" PRIx32, __func__, reg);
5146 break;
5147 }
5148
5149 return;
5150 }
5151
5152 /*
5153 ** Setup the correct IVAR register for a particular MSIX interrupt
5154 ** (yes this is all very magic and confusing :)
5155 ** - entry is the register array entry
5156 ** - vector is the MSIX vector for this queue
5157 ** - type is RX/TX/MISC
5158 */
5159 static void
5160 ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type)
5161 {
5162 struct ixgbe_hw *hw = &adapter->hw;
5163 u32 ivar, index;
5164
5165 vector |= IXGBE_IVAR_ALLOC_VAL;
5166
5167 switch (hw->mac.type) {
5168
5169 case ixgbe_mac_82598EB:
5170 if (type == -1)
5171 entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
5172 else
5173 entry += (type * 64);
5174 index = (entry >> 2) & 0x1F;
5175 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
5176 ivar &= ~(0xFF << (8 * (entry & 0x3)));
5177 ivar |= (vector << (8 * (entry & 0x3)));
5178 IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR(index), ivar);
5179 break;
5180
5181 case ixgbe_mac_82599EB:
5182 case ixgbe_mac_X540:
5183 if (type == -1) { /* MISC IVAR */
5184 index = (entry & 1) * 8;
5185 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
5186 ivar &= ~(0xFF << index);
5187 ivar |= (vector << index);
5188 IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
5189 } else { /* RX/TX IVARS */
5190 index = (16 * (entry & 1)) + (8 * type);
5191 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
5192 ivar &= ~(0xFF << index);
5193 ivar |= (vector << index);
5194 IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
5195 }
5196
5197 default:
5198 break;
5199 }
5200 }
5201
5202 static void
5203 ixgbe_configure_ivars(struct adapter *adapter)
5204 {
5205 struct ix_queue *que = adapter->queues;
5206 u32 newitr;
5207
5208 if (ixgbe_max_interrupt_rate > 0)
5209 newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8;
5210 else
5211 newitr = 0;
5212
5213 for (int i = 0; i < adapter->num_queues; i++, que++) {
5214 /* First the RX queue entry */
5215 ixgbe_set_ivar(adapter, i, que->msix, 0);
5216 /* ... and the TX */
5217 ixgbe_set_ivar(adapter, i, que->msix, 1);
5218 /* Set an Initial EITR value */
5219 IXGBE_WRITE_REG(&adapter->hw,
5220 IXGBE_EITR(que->msix), newitr);
5221 }
5222
5223 /* For the Link interrupt */
5224 ixgbe_set_ivar(adapter, 1, adapter->linkvec, -1);
5225 }
5226
5227 /*
5228 ** ixgbe_sfp_probe - called in the local timer to
5229 ** determine if a port had optics inserted.
5230 */
5231 static bool ixgbe_sfp_probe(struct adapter *adapter)
5232 {
5233 struct ixgbe_hw *hw = &adapter->hw;
5234 device_t dev = adapter->dev;
5235 bool result = FALSE;
5236
5237 if ((hw->phy.type == ixgbe_phy_nl) &&
5238 (hw->phy.sfp_type == ixgbe_sfp_type_not_present)) {
5239 s32 ret = hw->phy.ops.identify_sfp(hw);
5240 if (ret)
5241 goto out;
5242 ret = hw->phy.ops.reset(hw);
5243 if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5244 device_printf(dev,"Unsupported SFP+ module detected!");
5245 device_printf(dev, "Reload driver with supported module.\n");
5246 adapter->sfp_probe = FALSE;
5247 goto out;
5248 } else
5249 device_printf(dev,"SFP+ module detected!\n");
5250 /* We now have supported optics */
5251 adapter->sfp_probe = FALSE;
5252 /* Set the optics type so system reports correctly */
5253 ixgbe_setup_optics(adapter);
5254 result = TRUE;
5255 }
5256 out:
5257 return (result);
5258 }
5259
5260 /*
5261 ** Tasklet handler for MSIX Link interrupts
5262 ** - do outside interrupt since it might sleep
5263 */
5264 static void
5265 ixgbe_handle_link(void *context)
5266 {
5267 struct adapter *adapter = context;
5268
5269 if (ixgbe_check_link(&adapter->hw,
5270 &adapter->link_speed, &adapter->link_up, 0) == 0)
5271 ixgbe_update_link_status(adapter);
5272 }
5273
5274 /*
5275 ** Tasklet for handling SFP module interrupts
5276 */
5277 static void
5278 ixgbe_handle_mod(void *context)
5279 {
5280 struct adapter *adapter = context;
5281 struct ixgbe_hw *hw = &adapter->hw;
5282 device_t dev = adapter->dev;
5283 u32 err;
5284
5285 err = hw->phy.ops.identify_sfp(hw);
5286 if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5287 device_printf(dev,
5288 "Unsupported SFP+ module type was detected.\n");
5289 return;
5290 }
5291 err = hw->mac.ops.setup_sfp(hw);
5292 if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5293 device_printf(dev,
5294 "Setup failure - unsupported SFP+ module type.\n");
5295 return;
5296 }
5297 softint_schedule(adapter->msf_si);
5298 return;
5299 }
5300
5301
5302 /*
5303 ** Tasklet for handling MSF (multispeed fiber) interrupts
5304 */
5305 static void
5306 ixgbe_handle_msf(void *context)
5307 {
5308 struct adapter *adapter = context;
5309 struct ixgbe_hw *hw = &adapter->hw;
5310 u32 autoneg;
5311 bool negotiate;
5312
5313 autoneg = hw->phy.autoneg_advertised;
5314 if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
5315 hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate);
5316 else
5317 negotiate = 0;
5318 if (hw->mac.ops.setup_link)
5319 hw->mac.ops.setup_link(hw, autoneg, TRUE);
5320 return;
5321 }
5322
5323 #ifdef IXGBE_FDIR
5324 /*
5325 ** Tasklet for reinitializing the Flow Director filter table
5326 */
5327 static void
5328 ixgbe_reinit_fdir(void *context)
5329 {
5330 struct adapter *adapter = context;
5331 struct ifnet *ifp = adapter->ifp;
5332
5333 if (adapter->fdir_reinit != 1) /* Shouldn't happen */
5334 return;
5335 ixgbe_reinit_fdir_tables_82599(&adapter->hw);
5336 adapter->fdir_reinit = 0;
5337 /* re-enable flow director interrupts */
5338 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_FLOW_DIR);
5339 /* Restart the interface */
5340 ifp->if_flags |= IFF_RUNNING;
5341 return;
5342 }
5343 #endif
5344
5345 /**********************************************************************
5346 *
5347 * Update the board statistics counters.
5348 *
5349 **********************************************************************/
5350 static void
5351 ixgbe_update_stats_counters(struct adapter *adapter)
5352 {
5353 struct ifnet *ifp = adapter->ifp;
5354 struct ixgbe_hw *hw = &adapter->hw;
5355 u32 missed_rx = 0, bprc, lxon, lxoff, total;
5356 u64 total_missed_rx = 0;
5357 uint64_t crcerrs, rlec;
5358
5359 crcerrs = IXGBE_READ_REG(hw, IXGBE_CRCERRS);
5360 adapter->stats.crcerrs.ev_count += crcerrs;
5361 adapter->stats.illerrc.ev_count += IXGBE_READ_REG(hw, IXGBE_ILLERRC);
5362 adapter->stats.errbc.ev_count += IXGBE_READ_REG(hw, IXGBE_ERRBC);
5363 adapter->stats.mspdc.ev_count += IXGBE_READ_REG(hw, IXGBE_MSPDC);
5364
5365 /*
5366 ** Note: these are for the 8 possible traffic classes,
5367 ** which in current implementation is unused,
5368 ** therefore only 0 should read real data.
5369 */
5370 for (int i = 0; i < __arraycount(adapter->stats.mpc); i++) {
5371 int j = i % adapter->num_queues;
5372 u32 mp;
5373 mp = IXGBE_READ_REG(hw, IXGBE_MPC(i));
5374 /* missed_rx tallies misses for the gprc workaround */
5375 missed_rx += mp;
5376 /* global total per queue */
5377 adapter->stats.mpc[j].ev_count += mp;
5378 /* Running comprehensive total for stats display */
5379 total_missed_rx += mp;
5380 if (hw->mac.type == ixgbe_mac_82598EB) {
5381 adapter->stats.rnbc[j] +=
5382 IXGBE_READ_REG(hw, IXGBE_RNBC(i));
5383 adapter->stats.qbtc[j].ev_count +=
5384 IXGBE_READ_REG(hw, IXGBE_QBTC(i));
5385 adapter->stats.qbrc[j].ev_count +=
5386 IXGBE_READ_REG(hw, IXGBE_QBRC(i));
5387 adapter->stats.pxonrxc[j].ev_count +=
5388 IXGBE_READ_REG(hw, IXGBE_PXONRXC(i));
5389 } else {
5390 adapter->stats.pxonrxc[j].ev_count +=
5391 IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i));
5392 }
5393 adapter->stats.pxontxc[j].ev_count +=
5394 IXGBE_READ_REG(hw, IXGBE_PXONTXC(i));
5395 adapter->stats.pxofftxc[j].ev_count +=
5396 IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i));
5397 adapter->stats.pxoffrxc[j].ev_count +=
5398 IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
5399 adapter->stats.pxon2offc[j].ev_count +=
5400 IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i));
5401 }
5402 for (int i = 0; i < __arraycount(adapter->stats.qprc); i++) {
5403 int j = i % adapter->num_queues;
5404 adapter->stats.qprc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
5405 adapter->stats.qptc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
5406 adapter->stats.qprdc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
5407 }
5408 adapter->stats.mlfc.ev_count += IXGBE_READ_REG(hw, IXGBE_MLFC);
5409 adapter->stats.mrfc.ev_count += IXGBE_READ_REG(hw, IXGBE_MRFC);
5410 rlec = IXGBE_READ_REG(hw, IXGBE_RLEC);
5411 adapter->stats.rlec.ev_count += rlec;
5412
5413 /* Hardware workaround, gprc counts missed packets */
5414 adapter->stats.gprc.ev_count += IXGBE_READ_REG(hw, IXGBE_GPRC) - missed_rx;
5415
5416 lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
5417 adapter->stats.lxontxc.ev_count += lxon;
5418 lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
5419 adapter->stats.lxofftxc.ev_count += lxoff;
5420 total = lxon + lxoff;
5421
5422 if (hw->mac.type != ixgbe_mac_82598EB) {
5423 adapter->stats.gorc.ev_count += IXGBE_READ_REG(hw, IXGBE_GORCL) +
5424 ((u64)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32);
5425 adapter->stats.gotc.ev_count += IXGBE_READ_REG(hw, IXGBE_GOTCL) +
5426 ((u64)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32) - total * ETHER_MIN_LEN;
5427 adapter->stats.tor.ev_count += IXGBE_READ_REG(hw, IXGBE_TORL) +
5428 ((u64)IXGBE_READ_REG(hw, IXGBE_TORH) << 32);
5429 adapter->stats.lxonrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
5430 adapter->stats.lxoffrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
5431 } else {
5432 adapter->stats.lxonrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
5433 adapter->stats.lxoffrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
5434 /* 82598 only has a counter in the high register */
5435 adapter->stats.gorc.ev_count += IXGBE_READ_REG(hw, IXGBE_GORCH);
5436 adapter->stats.gotc.ev_count += IXGBE_READ_REG(hw, IXGBE_GOTCH) - total * ETHER_MIN_LEN;
5437 adapter->stats.tor.ev_count += IXGBE_READ_REG(hw, IXGBE_TORH);
5438 }
5439
5440 /*
5441 * Workaround: mprc hardware is incorrectly counting
5442 * broadcasts, so for now we subtract those.
5443 */
5444 bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
5445 adapter->stats.bprc.ev_count += bprc;
5446 adapter->stats.mprc.ev_count += IXGBE_READ_REG(hw, IXGBE_MPRC) - ((hw->mac.type == ixgbe_mac_82598EB) ? bprc : 0);
5447
5448 adapter->stats.prc64.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC64);
5449 adapter->stats.prc127.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC127);
5450 adapter->stats.prc255.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC255);
5451 adapter->stats.prc511.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC511);
5452 adapter->stats.prc1023.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC1023);
5453 adapter->stats.prc1522.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC1522);
5454
5455 adapter->stats.gptc.ev_count += IXGBE_READ_REG(hw, IXGBE_GPTC) - total;
5456 adapter->stats.mptc.ev_count += IXGBE_READ_REG(hw, IXGBE_MPTC) - total;
5457 adapter->stats.ptc64.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC64) - total;
5458
5459 adapter->stats.ruc.ev_count += IXGBE_READ_REG(hw, IXGBE_RUC);
5460 adapter->stats.rfc.ev_count += IXGBE_READ_REG(hw, IXGBE_RFC);
5461 adapter->stats.roc.ev_count += IXGBE_READ_REG(hw, IXGBE_ROC);
5462 adapter->stats.rjc.ev_count += IXGBE_READ_REG(hw, IXGBE_RJC);
5463 adapter->stats.mngprc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPRC);
5464 adapter->stats.mngpdc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPDC);
5465 adapter->stats.mngptc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPTC);
5466 adapter->stats.tpr.ev_count += IXGBE_READ_REG(hw, IXGBE_TPR);
5467 adapter->stats.tpt.ev_count += IXGBE_READ_REG(hw, IXGBE_TPT);
5468 adapter->stats.ptc127.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC127);
5469 adapter->stats.ptc255.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC255);
5470 adapter->stats.ptc511.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC511);
5471 adapter->stats.ptc1023.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC1023);
5472 adapter->stats.ptc1522.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC1522);
5473 adapter->stats.bptc.ev_count += IXGBE_READ_REG(hw, IXGBE_BPTC);
5474 adapter->stats.xec.ev_count += IXGBE_READ_REG(hw, IXGBE_XEC);
5475 adapter->stats.fccrc.ev_count += IXGBE_READ_REG(hw, IXGBE_FCCRC);
5476 adapter->stats.fclast.ev_count += IXGBE_READ_REG(hw, IXGBE_FCLAST);
5477
5478 /* Only read FCOE on 82599 */
5479 if (hw->mac.type != ixgbe_mac_82598EB) {
5480 adapter->stats.fcoerpdc.ev_count +=
5481 IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
5482 adapter->stats.fcoeprc.ev_count +=
5483 IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
5484 adapter->stats.fcoeptc.ev_count +=
5485 IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
5486 adapter->stats.fcoedwrc.ev_count +=
5487 IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
5488 adapter->stats.fcoedwtc.ev_count +=
5489 IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
5490 }
5491
5492 /* Fill out the OS statistics structure */
5493 /*
5494 * NetBSD: Don't override if_{i|o}{packets|bytes|mcasts} with
5495 * adapter->stats counters. It's required to make ifconfig -z
5496 * (SOICZIFDATA) work.
5497 */
5498 ifp->if_collisions = 0;
5499
5500 /* Rx Errors */
5501 ifp->if_iqdrops += total_missed_rx;
5502 ifp->if_ierrors += crcerrs + rlec;
5503 }
5504
5505 /** ixgbe_sysctl_tdh_handler - Handler function
5506 * Retrieves the TDH value from the hardware
5507 */
5508 static int
5509 ixgbe_sysctl_tdh_handler(SYSCTLFN_ARGS)
5510 {
5511 struct sysctlnode node;
5512 uint32_t val;
5513 struct tx_ring *txr;
5514
5515 node = *rnode;
5516 txr = (struct tx_ring *)node.sysctl_data;
5517 if (txr == NULL)
5518 return 0;
5519 val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDH(txr->me));
5520 node.sysctl_data = &val;
5521 return sysctl_lookup(SYSCTLFN_CALL(&node));
5522 }
5523
5524 /** ixgbe_sysctl_tdt_handler - Handler function
5525 * Retrieves the TDT value from the hardware
5526 */
5527 static int
5528 ixgbe_sysctl_tdt_handler(SYSCTLFN_ARGS)
5529 {
5530 struct sysctlnode node;
5531 uint32_t val;
5532 struct tx_ring *txr;
5533
5534 node = *rnode;
5535 txr = (struct tx_ring *)node.sysctl_data;
5536 if (txr == NULL)
5537 return 0;
5538 val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDT(txr->me));
5539 node.sysctl_data = &val;
5540 return sysctl_lookup(SYSCTLFN_CALL(&node));
5541 }
5542
5543 /** ixgbe_sysctl_rdh_handler - Handler function
5544 * Retrieves the RDH value from the hardware
5545 */
5546 static int
5547 ixgbe_sysctl_rdh_handler(SYSCTLFN_ARGS)
5548 {
5549 struct sysctlnode node;
5550 uint32_t val;
5551 struct rx_ring *rxr;
5552
5553 node = *rnode;
5554 rxr = (struct rx_ring *)node.sysctl_data;
5555 if (rxr == NULL)
5556 return 0;
5557 val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDH(rxr->me));
5558 node.sysctl_data = &val;
5559 return sysctl_lookup(SYSCTLFN_CALL(&node));
5560 }
5561
5562 /** ixgbe_sysctl_rdt_handler - Handler function
5563 * Retrieves the RDT value from the hardware
5564 */
5565 static int
5566 ixgbe_sysctl_rdt_handler(SYSCTLFN_ARGS)
5567 {
5568 struct sysctlnode node;
5569 uint32_t val;
5570 struct rx_ring *rxr;
5571
5572 node = *rnode;
5573 rxr = (struct rx_ring *)node.sysctl_data;
5574 if (rxr == NULL)
5575 return 0;
5576 val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDT(rxr->me));
5577 node.sysctl_data = &val;
5578 return sysctl_lookup(SYSCTLFN_CALL(&node));
5579 }
5580
5581 static int
5582 ixgbe_sysctl_interrupt_rate_handler(SYSCTLFN_ARGS)
5583 {
5584 int error;
5585 struct sysctlnode node;
5586 struct ix_queue *que;
5587 uint32_t reg, usec, rate;
5588
5589 node = *rnode;
5590 que = (struct ix_queue *)node.sysctl_data;
5591 if (que == NULL)
5592 return 0;
5593 reg = IXGBE_READ_REG(&que->adapter->hw, IXGBE_EITR(que->msix));
5594 usec = ((reg & 0x0FF8) >> 3);
5595 if (usec > 0)
5596 rate = 500000 / usec;
5597 else
5598 rate = 0;
5599 node.sysctl_data = &rate;
5600 error = sysctl_lookup(SYSCTLFN_CALL(&node));
5601 if (error)
5602 return error;
5603 reg &= ~0xfff; /* default, no limitation */
5604 ixgbe_max_interrupt_rate = 0;
5605 if (rate > 0 && rate < 500000) {
5606 if (rate < 1000)
5607 rate = 1000;
5608 ixgbe_max_interrupt_rate = rate;
5609 reg |= ((4000000/rate) & 0xff8 );
5610 }
5611 IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg);
5612 return 0;
5613 }
5614
5615 const struct sysctlnode *
5616 ixgbe_sysctl_instance(struct adapter *adapter)
5617 {
5618 const char *dvname;
5619 struct sysctllog **log;
5620 int rc;
5621 const struct sysctlnode *rnode;
5622
5623 log = &adapter->sysctllog;
5624 dvname = device_xname(adapter->dev);
5625
5626 if ((rc = sysctl_createv(log, 0, NULL, &rnode,
5627 0, CTLTYPE_NODE, dvname,
5628 SYSCTL_DESCR("ixgbe information and settings"),
5629 NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL)) != 0)
5630 goto err;
5631
5632 return rnode;
5633 err:
5634 printf("%s: sysctl_createv failed, rc = %d\n", __func__, rc);
5635 return NULL;
5636 }
5637
5638 /*
5639 * Add sysctl variables, one per statistic, to the system.
5640 */
5641 static void
5642 ixgbe_add_hw_stats(struct adapter *adapter)
5643 {
5644 device_t dev = adapter->dev;
5645 const struct sysctlnode *rnode, *cnode;
5646 struct sysctllog **log = &adapter->sysctllog;
5647 struct tx_ring *txr = adapter->tx_rings;
5648 struct rx_ring *rxr = adapter->rx_rings;
5649 struct ixgbe_hw_stats *stats = &adapter->stats;
5650
5651 /* Driver Statistics */
5652 #if 0
5653 /* These counters are not updated by the software */
5654 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5655 CTLFLAG_RD, &adapter->dropped_pkts,
5656 "Driver dropped packets");
5657 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_header_failed",
5658 CTLFLAG_RD, &adapter->mbuf_header_failed,
5659 "???");
5660 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_packet_failed",
5661 CTLFLAG_RD, &adapter->mbuf_packet_failed,
5662 "???");
5663 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "no_tx_map_avail",
5664 CTLFLAG_RD, &adapter->no_tx_map_avail,
5665 "???");
5666 #endif
5667 evcnt_attach_dynamic(&adapter->handleq, EVCNT_TYPE_MISC,
5668 NULL, device_xname(dev), "Handled queue in softint");
5669 evcnt_attach_dynamic(&adapter->req, EVCNT_TYPE_MISC,
5670 NULL, device_xname(dev), "Requeued in softint");
5671 evcnt_attach_dynamic(&adapter->morerx, EVCNT_TYPE_MISC,
5672 NULL, device_xname(dev), "Interrupt handler more rx");
5673 evcnt_attach_dynamic(&adapter->moretx, EVCNT_TYPE_MISC,
5674 NULL, device_xname(dev), "Interrupt handler more tx");
5675 evcnt_attach_dynamic(&adapter->txloops, EVCNT_TYPE_MISC,
5676 NULL, device_xname(dev), "Interrupt handler tx loops");
5677 evcnt_attach_dynamic(&adapter->efbig_tx_dma_setup, EVCNT_TYPE_MISC,
5678 NULL, device_xname(dev), "Driver tx dma soft fail EFBIG");
5679 evcnt_attach_dynamic(&adapter->m_defrag_failed, EVCNT_TYPE_MISC,
5680 NULL, device_xname(dev), "m_defrag() failed");
5681 evcnt_attach_dynamic(&adapter->efbig2_tx_dma_setup, EVCNT_TYPE_MISC,
5682 NULL, device_xname(dev), "Driver tx dma hard fail EFBIG");
5683 evcnt_attach_dynamic(&adapter->einval_tx_dma_setup, EVCNT_TYPE_MISC,
5684 NULL, device_xname(dev), "Driver tx dma hard fail EINVAL");
5685 evcnt_attach_dynamic(&adapter->other_tx_dma_setup, EVCNT_TYPE_MISC,
5686 NULL, device_xname(dev), "Driver tx dma hard fail other");
5687 evcnt_attach_dynamic(&adapter->eagain_tx_dma_setup, EVCNT_TYPE_MISC,
5688 NULL, device_xname(dev), "Driver tx dma soft fail EAGAIN");
5689 evcnt_attach_dynamic(&adapter->enomem_tx_dma_setup, EVCNT_TYPE_MISC,
5690 NULL, device_xname(dev), "Driver tx dma soft fail ENOMEM");
5691 evcnt_attach_dynamic(&adapter->watchdog_events, EVCNT_TYPE_MISC,
5692 NULL, device_xname(dev), "Watchdog timeouts");
5693 evcnt_attach_dynamic(&adapter->tso_err, EVCNT_TYPE_MISC,
5694 NULL, device_xname(dev), "TSO errors");
5695 evcnt_attach_dynamic(&adapter->link_irq, EVCNT_TYPE_MISC,
5696 NULL, device_xname(dev), "Link MSIX IRQ Handled");
5697
5698 for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5699 snprintf(adapter->queues[i].evnamebuf,
5700 sizeof(adapter->queues[i].evnamebuf), "%s queue%d",
5701 device_xname(dev), i);
5702 snprintf(adapter->queues[i].namebuf,
5703 sizeof(adapter->queues[i].namebuf), "queue%d", i);
5704
5705 if ((rnode = ixgbe_sysctl_instance(adapter)) == NULL) {
5706 aprint_error_dev(dev, "could not create sysctl root\n");
5707 break;
5708 }
5709
5710 if (sysctl_createv(log, 0, &rnode, &rnode,
5711 0, CTLTYPE_NODE,
5712 adapter->queues[i].namebuf, SYSCTL_DESCR("Queue Name"),
5713 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL) != 0)
5714 break;
5715
5716 if (sysctl_createv(log, 0, &rnode, &cnode,
5717 CTLFLAG_READWRITE, CTLTYPE_INT,
5718 "interrupt_rate", SYSCTL_DESCR("Interrupt Rate"),
5719 ixgbe_sysctl_interrupt_rate_handler, 0,
5720 (void *)&adapter->queues[i], 0, CTL_CREATE, CTL_EOL) != 0)
5721 break;
5722
5723 if (sysctl_createv(log, 0, &rnode, &cnode,
5724 CTLFLAG_READONLY, CTLTYPE_QUAD,
5725 "irqs", SYSCTL_DESCR("irqs on this queue"),
5726 NULL, 0, &(adapter->queues[i].irqs),
5727 0, CTL_CREATE, CTL_EOL) != 0)
5728 break;
5729
5730 if (sysctl_createv(log, 0, &rnode, &cnode,
5731 CTLFLAG_READONLY, CTLTYPE_INT,
5732 "txd_head", SYSCTL_DESCR("Transmit Descriptor Head"),
5733 ixgbe_sysctl_tdh_handler, 0, (void *)txr,
5734 0, CTL_CREATE, CTL_EOL) != 0)
5735 break;
5736
5737 if (sysctl_createv(log, 0, &rnode, &cnode,
5738 CTLFLAG_READONLY, CTLTYPE_INT,
5739 "txd_tail", SYSCTL_DESCR("Transmit Descriptor Tail"),
5740 ixgbe_sysctl_tdt_handler, 0, (void *)txr,
5741 0, CTL_CREATE, CTL_EOL) != 0)
5742 break;
5743
5744 evcnt_attach_dynamic(&txr->tso_tx, EVCNT_TYPE_MISC,
5745 NULL, device_xname(dev), "TSO");
5746 evcnt_attach_dynamic(&txr->no_desc_avail, EVCNT_TYPE_MISC,
5747 NULL, adapter->queues[i].evnamebuf,
5748 "Queue No Descriptor Available");
5749 evcnt_attach_dynamic(&txr->total_packets, EVCNT_TYPE_MISC,
5750 NULL, adapter->queues[i].evnamebuf,
5751 "Queue Packets Transmitted");
5752
5753 #ifdef LRO
5754 struct lro_ctrl *lro = &rxr->lro;
5755 #endif /* LRO */
5756
5757 if (sysctl_createv(log, 0, &rnode, &cnode,
5758 CTLFLAG_READONLY,
5759 CTLTYPE_INT,
5760 "rxd_head", SYSCTL_DESCR("Receive Descriptor Head"),
5761 ixgbe_sysctl_rdh_handler, 0, (void *)rxr, 0,
5762 CTL_CREATE, CTL_EOL) != 0)
5763 break;
5764
5765 if (sysctl_createv(log, 0, &rnode, &cnode,
5766 CTLFLAG_READONLY,
5767 CTLTYPE_INT,
5768 "rxd_tail", SYSCTL_DESCR("Receive Descriptor Tail"),
5769 ixgbe_sysctl_rdt_handler, 0, (void *)rxr, 0,
5770 CTL_CREATE, CTL_EOL) != 0)
5771 break;
5772
5773 if (i < __arraycount(adapter->stats.mpc)) {
5774 evcnt_attach_dynamic(&adapter->stats.mpc[i],
5775 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
5776 "Missed Packet Count");
5777 }
5778 if (i < __arraycount(adapter->stats.pxontxc)) {
5779 evcnt_attach_dynamic(&adapter->stats.pxontxc[i],
5780 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
5781 "pxontxc");
5782 evcnt_attach_dynamic(&adapter->stats.pxonrxc[i],
5783 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
5784 "pxonrxc");
5785 evcnt_attach_dynamic(&adapter->stats.pxofftxc[i],
5786 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
5787 "pxofftxc");
5788 evcnt_attach_dynamic(&adapter->stats.pxoffrxc[i],
5789 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
5790 "pxoffrxc");
5791 evcnt_attach_dynamic(&adapter->stats.pxon2offc[i],
5792 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
5793 "pxon2offc");
5794 }
5795 if (i < __arraycount(adapter->stats.qprc)) {
5796 evcnt_attach_dynamic(&adapter->stats.qprc[i],
5797 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
5798 "qprc");
5799 evcnt_attach_dynamic(&adapter->stats.qptc[i],
5800 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
5801 "qptc");
5802 evcnt_attach_dynamic(&adapter->stats.qbrc[i],
5803 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
5804 "qbrc");
5805 evcnt_attach_dynamic(&adapter->stats.qbtc[i],
5806 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
5807 "qbtc");
5808 evcnt_attach_dynamic(&adapter->stats.qprdc[i],
5809 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
5810 "qprdc");
5811 }
5812
5813 evcnt_attach_dynamic(&rxr->rx_packets, EVCNT_TYPE_MISC,
5814 NULL, adapter->queues[i].evnamebuf, "Queue Packets Received");
5815 evcnt_attach_dynamic(&rxr->rx_bytes, EVCNT_TYPE_MISC,
5816 NULL, adapter->queues[i].evnamebuf, "Queue Bytes Received");
5817 evcnt_attach_dynamic(&rxr->rx_copies, EVCNT_TYPE_MISC,
5818 NULL, adapter->queues[i].evnamebuf, "Copied RX Frames");
5819 evcnt_attach_dynamic(&rxr->no_jmbuf, EVCNT_TYPE_MISC,
5820 NULL, adapter->queues[i].evnamebuf, "Rx no jumbo mbuf");
5821 evcnt_attach_dynamic(&rxr->rx_discarded, EVCNT_TYPE_MISC,
5822 NULL, adapter->queues[i].evnamebuf, "Rx discarded");
5823 evcnt_attach_dynamic(&rxr->rx_irq, EVCNT_TYPE_MISC,
5824 NULL, adapter->queues[i].evnamebuf, "Rx interrupts");
5825 #ifdef LRO
5826 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_queued",
5827 CTLFLAG_RD, &lro->lro_queued, 0,
5828 "LRO Queued");
5829 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_flushed",
5830 CTLFLAG_RD, &lro->lro_flushed, 0,
5831 "LRO Flushed");
5832 #endif /* LRO */
5833 }
5834
5835 /* MAC stats get the own sub node */
5836
5837
5838 snprintf(stats->namebuf,
5839 sizeof(stats->namebuf), "%s MAC Statistics", device_xname(dev));
5840
5841 evcnt_attach_dynamic(&stats->ipcs, EVCNT_TYPE_MISC, NULL,
5842 stats->namebuf, "rx csum offload - IP");
5843 evcnt_attach_dynamic(&stats->l4cs, EVCNT_TYPE_MISC, NULL,
5844 stats->namebuf, "rx csum offload - L4");
5845 evcnt_attach_dynamic(&stats->ipcs_bad, EVCNT_TYPE_MISC, NULL,
5846 stats->namebuf, "rx csum offload - IP bad");
5847 evcnt_attach_dynamic(&stats->l4cs_bad, EVCNT_TYPE_MISC, NULL,
5848 stats->namebuf, "rx csum offload - L4 bad");
5849 evcnt_attach_dynamic(&stats->intzero, EVCNT_TYPE_MISC, NULL,
5850 stats->namebuf, "Interrupt conditions zero");
5851 evcnt_attach_dynamic(&stats->legint, EVCNT_TYPE_MISC, NULL,
5852 stats->namebuf, "Legacy interrupts");
5853 evcnt_attach_dynamic(&stats->crcerrs, EVCNT_TYPE_MISC, NULL,
5854 stats->namebuf, "CRC Errors");
5855 evcnt_attach_dynamic(&stats->illerrc, EVCNT_TYPE_MISC, NULL,
5856 stats->namebuf, "Illegal Byte Errors");
5857 evcnt_attach_dynamic(&stats->errbc, EVCNT_TYPE_MISC, NULL,
5858 stats->namebuf, "Byte Errors");
5859 evcnt_attach_dynamic(&stats->mspdc, EVCNT_TYPE_MISC, NULL,
5860 stats->namebuf, "MAC Short Packets Discarded");
5861 evcnt_attach_dynamic(&stats->mlfc, EVCNT_TYPE_MISC, NULL,
5862 stats->namebuf, "MAC Local Faults");
5863 evcnt_attach_dynamic(&stats->mrfc, EVCNT_TYPE_MISC, NULL,
5864 stats->namebuf, "MAC Remote Faults");
5865 evcnt_attach_dynamic(&stats->rlec, EVCNT_TYPE_MISC, NULL,
5866 stats->namebuf, "Receive Length Errors");
5867 evcnt_attach_dynamic(&stats->lxontxc, EVCNT_TYPE_MISC, NULL,
5868 stats->namebuf, "Link XON Transmitted");
5869 evcnt_attach_dynamic(&stats->lxonrxc, EVCNT_TYPE_MISC, NULL,
5870 stats->namebuf, "Link XON Received");
5871 evcnt_attach_dynamic(&stats->lxofftxc, EVCNT_TYPE_MISC, NULL,
5872 stats->namebuf, "Link XOFF Transmitted");
5873 evcnt_attach_dynamic(&stats->lxoffrxc, EVCNT_TYPE_MISC, NULL,
5874 stats->namebuf, "Link XOFF Received");
5875
5876 /* Packet Reception Stats */
5877 evcnt_attach_dynamic(&stats->tor, EVCNT_TYPE_MISC, NULL,
5878 stats->namebuf, "Total Octets Received");
5879 evcnt_attach_dynamic(&stats->gorc, EVCNT_TYPE_MISC, NULL,
5880 stats->namebuf, "Good Octets Received");
5881 evcnt_attach_dynamic(&stats->tpr, EVCNT_TYPE_MISC, NULL,
5882 stats->namebuf, "Total Packets Received");
5883 evcnt_attach_dynamic(&stats->gprc, EVCNT_TYPE_MISC, NULL,
5884 stats->namebuf, "Good Packets Received");
5885 evcnt_attach_dynamic(&stats->mprc, EVCNT_TYPE_MISC, NULL,
5886 stats->namebuf, "Multicast Packets Received");
5887 evcnt_attach_dynamic(&stats->bprc, EVCNT_TYPE_MISC, NULL,
5888 stats->namebuf, "Broadcast Packets Received");
5889 evcnt_attach_dynamic(&stats->prc64, EVCNT_TYPE_MISC, NULL,
5890 stats->namebuf, "64 byte frames received ");
5891 evcnt_attach_dynamic(&stats->prc127, EVCNT_TYPE_MISC, NULL,
5892 stats->namebuf, "65-127 byte frames received");
5893 evcnt_attach_dynamic(&stats->prc255, EVCNT_TYPE_MISC, NULL,
5894 stats->namebuf, "128-255 byte frames received");
5895 evcnt_attach_dynamic(&stats->prc511, EVCNT_TYPE_MISC, NULL,
5896 stats->namebuf, "256-511 byte frames received");
5897 evcnt_attach_dynamic(&stats->prc1023, EVCNT_TYPE_MISC, NULL,
5898 stats->namebuf, "512-1023 byte frames received");
5899 evcnt_attach_dynamic(&stats->prc1522, EVCNT_TYPE_MISC, NULL,
5900 stats->namebuf, "1023-1522 byte frames received");
5901 evcnt_attach_dynamic(&stats->ruc, EVCNT_TYPE_MISC, NULL,
5902 stats->namebuf, "Receive Undersized");
5903 evcnt_attach_dynamic(&stats->rfc, EVCNT_TYPE_MISC, NULL,
5904 stats->namebuf, "Fragmented Packets Received ");
5905 evcnt_attach_dynamic(&stats->roc, EVCNT_TYPE_MISC, NULL,
5906 stats->namebuf, "Oversized Packets Received");
5907 evcnt_attach_dynamic(&stats->rjc, EVCNT_TYPE_MISC, NULL,
5908 stats->namebuf, "Received Jabber");
5909 evcnt_attach_dynamic(&stats->mngprc, EVCNT_TYPE_MISC, NULL,
5910 stats->namebuf, "Management Packets Received");
5911 evcnt_attach_dynamic(&stats->xec, EVCNT_TYPE_MISC, NULL,
5912 stats->namebuf, "Checksum Errors");
5913
5914 /* Packet Transmission Stats */
5915 evcnt_attach_dynamic(&stats->gotc, EVCNT_TYPE_MISC, NULL,
5916 stats->namebuf, "Good Octets Transmitted");
5917 evcnt_attach_dynamic(&stats->tpt, EVCNT_TYPE_MISC, NULL,
5918 stats->namebuf, "Total Packets Transmitted");
5919 evcnt_attach_dynamic(&stats->gptc, EVCNT_TYPE_MISC, NULL,
5920 stats->namebuf, "Good Packets Transmitted");
5921 evcnt_attach_dynamic(&stats->bptc, EVCNT_TYPE_MISC, NULL,
5922 stats->namebuf, "Broadcast Packets Transmitted");
5923 evcnt_attach_dynamic(&stats->mptc, EVCNT_TYPE_MISC, NULL,
5924 stats->namebuf, "Multicast Packets Transmitted");
5925 evcnt_attach_dynamic(&stats->mngptc, EVCNT_TYPE_MISC, NULL,
5926 stats->namebuf, "Management Packets Transmitted");
5927 evcnt_attach_dynamic(&stats->ptc64, EVCNT_TYPE_MISC, NULL,
5928 stats->namebuf, "64 byte frames transmitted ");
5929 evcnt_attach_dynamic(&stats->ptc127, EVCNT_TYPE_MISC, NULL,
5930 stats->namebuf, "65-127 byte frames transmitted");
5931 evcnt_attach_dynamic(&stats->ptc255, EVCNT_TYPE_MISC, NULL,
5932 stats->namebuf, "128-255 byte frames transmitted");
5933 evcnt_attach_dynamic(&stats->ptc511, EVCNT_TYPE_MISC, NULL,
5934 stats->namebuf, "256-511 byte frames transmitted");
5935 evcnt_attach_dynamic(&stats->ptc1023, EVCNT_TYPE_MISC, NULL,
5936 stats->namebuf, "512-1023 byte frames transmitted");
5937 evcnt_attach_dynamic(&stats->ptc1522, EVCNT_TYPE_MISC, NULL,
5938 stats->namebuf, "1024-1522 byte frames transmitted");
5939 }
5940
5941 /*
5942 ** Set flow control using sysctl:
5943 ** Flow control values:
5944 ** 0 - off
5945 ** 1 - rx pause
5946 ** 2 - tx pause
5947 ** 3 - full
5948 */
5949 static int
5950 ixgbe_set_flowcntl(SYSCTLFN_ARGS)
5951 {
5952 struct sysctlnode node;
5953 int error, last;
5954 struct adapter *adapter;
5955
5956 node = *rnode;
5957 adapter = (struct adapter *)node.sysctl_data;
5958 node.sysctl_data = &adapter->fc;
5959 last = adapter->fc;
5960 error = sysctl_lookup(SYSCTLFN_CALL(&node));
5961 if (error != 0 || newp == NULL)
5962 return error;
5963
5964 /* Don't bother if it's not changed */
5965 if (adapter->fc == last)
5966 return (0);
5967
5968 switch (adapter->fc) {
5969 case ixgbe_fc_rx_pause:
5970 case ixgbe_fc_tx_pause:
5971 case ixgbe_fc_full:
5972 adapter->hw.fc.requested_mode = adapter->fc;
5973 if (adapter->num_queues > 1)
5974 ixgbe_disable_rx_drop(adapter);
5975 break;
5976 case ixgbe_fc_none:
5977 adapter->hw.fc.requested_mode = ixgbe_fc_none;
5978 if (adapter->num_queues > 1)
5979 ixgbe_enable_rx_drop(adapter);
5980 break;
5981 default:
5982 adapter->fc = last;
5983 return (EINVAL);
5984 }
5985 /* Don't autoneg if forcing a value */
5986 adapter->hw.fc.disable_fc_autoneg = TRUE;
5987 ixgbe_fc_enable(&adapter->hw);
5988 return 0;
5989 }
5990
5991 /*
5992 ** Control link advertise speed:
5993 ** 1 - advertise only 1G
5994 ** 2 - advertise 100Mb
5995 ** 3 - advertise normal
5996 */
5997 static int
5998 ixgbe_set_advertise(SYSCTLFN_ARGS)
5999 {
6000 struct sysctlnode node;
6001 int t, error = 0;
6002 struct adapter *adapter;
6003 device_t dev;
6004 struct ixgbe_hw *hw;
6005 ixgbe_link_speed speed, last;
6006
6007 node = *rnode;
6008 adapter = (struct adapter *)node.sysctl_data;
6009 dev = adapter->dev;
6010 hw = &adapter->hw;
6011 last = adapter->advertise;
6012 t = adapter->advertise;
6013 node.sysctl_data = &t;
6014 error = sysctl_lookup(SYSCTLFN_CALL(&node));
6015 if (error != 0 || newp == NULL)
6016 return error;
6017
6018 if (adapter->advertise == last) /* no change */
6019 return (0);
6020
6021 if (t == -1)
6022 return 0;
6023
6024 adapter->advertise = t;
6025
6026 if (!((hw->phy.media_type == ixgbe_media_type_copper) ||
6027 (hw->phy.multispeed_fiber)))
6028 return (EINVAL);
6029
6030 if ((adapter->advertise == 2) && (hw->mac.type != ixgbe_mac_X540)) {
6031 device_printf(dev, "Set Advertise: 100Mb on X540 only\n");
6032 return (EINVAL);
6033 }
6034
6035 if (adapter->advertise == 1)
6036 speed = IXGBE_LINK_SPEED_1GB_FULL;
6037 else if (adapter->advertise == 2)
6038 speed = IXGBE_LINK_SPEED_100_FULL;
6039 else if (adapter->advertise == 3)
6040 speed = IXGBE_LINK_SPEED_1GB_FULL |
6041 IXGBE_LINK_SPEED_10GB_FULL;
6042 else {/* bogus value */
6043 adapter->advertise = last;
6044 return (EINVAL);
6045 }
6046
6047 hw->mac.autotry_restart = TRUE;
6048 hw->mac.ops.setup_link(hw, speed, TRUE);
6049
6050 return 0;
6051 }
6052
6053 /*
6054 ** Thermal Shutdown Trigger
6055 ** - cause a Thermal Overtemp IRQ
6056 */
6057 static int
6058 ixgbe_set_thermal_test(SYSCTLFN_ARGS)
6059 {
6060 struct sysctlnode node;
6061 int error, fire = 0;
6062 struct adapter *adapter;
6063 struct ixgbe_hw *hw;
6064
6065 node = *rnode;
6066 adapter = (struct adapter *)node.sysctl_data;
6067 hw = &adapter->hw;
6068
6069 if (hw->mac.type != ixgbe_mac_X540)
6070 return (0);
6071
6072 node.sysctl_data = &fire;
6073 error = sysctl_lookup(SYSCTLFN_CALL(&node));
6074 if ((error) || (newp == NULL))
6075 return (error);
6076
6077 if (fire) {
6078 u32 reg = IXGBE_READ_REG(hw, IXGBE_EICS);
6079 reg |= IXGBE_EICR_TS;
6080 IXGBE_WRITE_REG(hw, IXGBE_EICS, reg);
6081 }
6082
6083 return (0);
6084 }
6085
6086 /*
6087 ** Enable the hardware to drop packets when the buffer is
6088 ** full. This is useful when multiqueue,so that no single
6089 ** queue being full stalls the entire RX engine. We only
6090 ** enable this when Multiqueue AND when Flow Control is
6091 ** disabled.
6092 */
6093 static void
6094 ixgbe_enable_rx_drop(struct adapter *adapter)
6095 {
6096 struct ixgbe_hw *hw = &adapter->hw;
6097
6098 for (int i = 0; i < adapter->num_queues; i++) {
6099 u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
6100 srrctl |= IXGBE_SRRCTL_DROP_EN;
6101 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
6102 }
6103 }
6104
6105 static void
6106 ixgbe_disable_rx_drop(struct adapter *adapter)
6107 {
6108 struct ixgbe_hw *hw = &adapter->hw;
6109
6110 for (int i = 0; i < adapter->num_queues; i++) {
6111 u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
6112 srrctl &= ~IXGBE_SRRCTL_DROP_EN;
6113 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
6114 }
6115 }
6116