ixgbe.c revision 1.37 1 /******************************************************************************
2
3 Copyright (c) 2001-2013, Intel Corporation
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*
34 * Copyright (c) 2011 The NetBSD Foundation, Inc.
35 * All rights reserved.
36 *
37 * This code is derived from software contributed to The NetBSD Foundation
38 * by Coyote Point Systems, Inc.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
50 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
51 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
52 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
53 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
54 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
55 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
56 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
57 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
58 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
59 * POSSIBILITY OF SUCH DAMAGE.
60 */
61 /*$FreeBSD: head/sys/dev/ixgbe/ixgbe.c 279805 2015-03-09 10:29:15Z araujo $*/
62 /*$NetBSD: ixgbe.c,v 1.37 2016/02/09 08:32:11 ozaki-r Exp $*/
63
64 #include "opt_inet.h"
65 #include "opt_inet6.h"
66
67 #include "ixgbe.h"
68 #include "vlan.h"
69
70 #include <sys/cprng.h>
71
72 /*********************************************************************
73 * Set this to one to display debug statistics
74 *********************************************************************/
75 int ixgbe_display_debug_stats = 0;
76
77 /*********************************************************************
78 * Driver version
79 *********************************************************************/
80 char ixgbe_driver_version[] = "2.5.15";
81
82 /*********************************************************************
83 * PCI Device ID Table
84 *
85 * Used by probe to select devices to load on
86 * Last field stores an index into ixgbe_strings
87 * Last entry must be all 0s
88 *
89 * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
90 *********************************************************************/
91
92 static ixgbe_vendor_info_t ixgbe_vendor_info_array[] =
93 {
94 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, 0, 0, 0},
95 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, 0, 0, 0},
96 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, 0, 0, 0},
97 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, 0, 0, 0},
98 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2, 0, 0, 0},
99 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, 0, 0, 0},
100 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, 0, 0, 0},
101 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, 0, 0, 0},
102 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, 0, 0, 0},
103 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, 0, 0, 0},
104 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, 0, 0, 0},
105 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, 0, 0, 0},
106 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ, 0, 0, 0},
107 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, 0, 0, 0},
108 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, 0, 0, 0},
109 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, 0, 0, 0},
110 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, 0, 0, 0},
111 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, 0, 0, 0},
112 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE, 0, 0, 0},
113 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2, 0, 0, 0},
114 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, 0, 0, 0},
115 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, 0, 0, 0},
116 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF_QP, 0, 0, 0},
117 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, 0, 0, 0},
118 /* required last entry */
119 {0, 0, 0, 0, 0}
120 };
121
122 /*********************************************************************
123 * Table of branding strings
124 *********************************************************************/
125
126 static const char *ixgbe_strings[] = {
127 "Intel(R) PRO/10GbE PCI-Express Network Driver"
128 };
129
130 /*********************************************************************
131 * Function prototypes
132 *********************************************************************/
133 static int ixgbe_probe(device_t, cfdata_t, void *);
134 static void ixgbe_attach(device_t, device_t, void *);
135 static int ixgbe_detach(device_t, int);
136 #if 0
137 static int ixgbe_shutdown(device_t);
138 #endif
139 #ifdef IXGBE_LEGACY_TX
140 static void ixgbe_start(struct ifnet *);
141 static void ixgbe_start_locked(struct tx_ring *, struct ifnet *);
142 #else /* ! IXGBE_LEGACY_TX */
143 static int ixgbe_mq_start(struct ifnet *, struct mbuf *);
144 static int ixgbe_mq_start_locked(struct ifnet *, struct tx_ring *);
145 static void ixgbe_qflush(struct ifnet *);
146 static void ixgbe_deferred_mq_start(void *, int);
147 #endif /* IXGBE_LEGACY_TX */
148 static int ixgbe_ioctl(struct ifnet *, u_long, void *);
149 static void ixgbe_ifstop(struct ifnet *, int);
150 static int ixgbe_init(struct ifnet *);
151 static void ixgbe_init_locked(struct adapter *);
152 static void ixgbe_stop(void *);
153 static void ixgbe_media_status(struct ifnet *, struct ifmediareq *);
154 static int ixgbe_media_change(struct ifnet *);
155 static void ixgbe_identify_hardware(struct adapter *);
156 static int ixgbe_allocate_pci_resources(struct adapter *,
157 const struct pci_attach_args *);
158 static void ixgbe_get_slot_info(struct ixgbe_hw *);
159 static int ixgbe_allocate_msix(struct adapter *,
160 const struct pci_attach_args *);
161 static int ixgbe_allocate_legacy(struct adapter *,
162 const struct pci_attach_args *);
163 static int ixgbe_allocate_queues(struct adapter *);
164 static int ixgbe_setup_msix(struct adapter *);
165 static void ixgbe_free_pci_resources(struct adapter *);
166 static void ixgbe_local_timer(void *);
167 static int ixgbe_setup_interface(device_t, struct adapter *);
168 static void ixgbe_config_link(struct adapter *);
169
170 static int ixgbe_allocate_transmit_buffers(struct tx_ring *);
171 static int ixgbe_setup_transmit_structures(struct adapter *);
172 static void ixgbe_setup_transmit_ring(struct tx_ring *);
173 static void ixgbe_initialize_transmit_units(struct adapter *);
174 static void ixgbe_free_transmit_structures(struct adapter *);
175 static void ixgbe_free_transmit_buffers(struct tx_ring *);
176
177 static int ixgbe_allocate_receive_buffers(struct rx_ring *);
178 static int ixgbe_setup_receive_structures(struct adapter *);
179 static int ixgbe_setup_receive_ring(struct rx_ring *);
180 static void ixgbe_initialize_receive_units(struct adapter *);
181 static void ixgbe_free_receive_structures(struct adapter *);
182 static void ixgbe_free_receive_buffers(struct rx_ring *);
183 static void ixgbe_setup_hw_rsc(struct rx_ring *);
184
185 static void ixgbe_enable_intr(struct adapter *);
186 static void ixgbe_disable_intr(struct adapter *);
187 static void ixgbe_update_stats_counters(struct adapter *);
188 static void ixgbe_txeof(struct tx_ring *);
189 static bool ixgbe_rxeof(struct ix_queue *);
190 static void ixgbe_rx_checksum(u32, struct mbuf *, u32,
191 struct ixgbe_hw_stats *);
192 static void ixgbe_set_promisc(struct adapter *);
193 static void ixgbe_set_multi(struct adapter *);
194 static void ixgbe_update_link_status(struct adapter *);
195 static void ixgbe_refresh_mbufs(struct rx_ring *, int);
196 static int ixgbe_xmit(struct tx_ring *, struct mbuf *);
197 static int ixgbe_set_flowcntl(SYSCTLFN_PROTO);
198 static int ixgbe_set_advertise(SYSCTLFN_PROTO);
199 static int ixgbe_set_thermal_test(SYSCTLFN_PROTO);
200 static int ixgbe_dma_malloc(struct adapter *, bus_size_t,
201 struct ixgbe_dma_alloc *, int);
202 static void ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
203 static int ixgbe_tx_ctx_setup(struct tx_ring *,
204 struct mbuf *, u32 *, u32 *);
205 static int ixgbe_tso_setup(struct tx_ring *,
206 struct mbuf *, u32 *, u32 *);
207 static void ixgbe_set_ivar(struct adapter *, u8, u8, s8);
208 static void ixgbe_configure_ivars(struct adapter *);
209 static u8 * ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *);
210
211 static void ixgbe_setup_vlan_hw_support(struct adapter *);
212 #if 0
213 static void ixgbe_register_vlan(void *, struct ifnet *, u16);
214 static void ixgbe_unregister_vlan(void *, struct ifnet *, u16);
215 #endif
216
217 static void ixgbe_add_hw_stats(struct adapter *adapter);
218
219 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
220 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
221 struct mbuf *, u32);
222
223 static void ixgbe_enable_rx_drop(struct adapter *);
224 static void ixgbe_disable_rx_drop(struct adapter *);
225
226 /* Support for pluggable optic modules */
227 static bool ixgbe_sfp_probe(struct adapter *);
228 static void ixgbe_setup_optics(struct adapter *);
229
230 /* Legacy (single vector interrupt handler */
231 static int ixgbe_legacy_irq(void *);
232
233 #if defined(NETBSD_MSI_OR_MSIX)
234 /* The MSI/X Interrupt handlers */
235 static int ixgbe_msix_que(void *);
236 static int ixgbe_msix_link(void *);
237 #endif
238
239 /* Software interrupts for deferred work */
240 static void ixgbe_handle_que(void *);
241 static void ixgbe_handle_link(void *);
242 static void ixgbe_handle_msf(void *);
243 static void ixgbe_handle_mod(void *);
244
245 const struct sysctlnode *ixgbe_sysctl_instance(struct adapter *);
246 static ixgbe_vendor_info_t *ixgbe_lookup(const struct pci_attach_args *);
247
248 #ifdef IXGBE_FDIR
249 static void ixgbe_atr(struct tx_ring *, struct mbuf *);
250 static void ixgbe_reinit_fdir(void *, int);
251 #endif
252
253 /* Missing shared code prototype */
254 extern void ixgbe_stop_mac_link_on_d3_82599(struct ixgbe_hw *hw);
255
256 /*********************************************************************
257 * FreeBSD Device Interface Entry Points
258 *********************************************************************/
259
260 CFATTACH_DECL3_NEW(ixg, sizeof(struct adapter),
261 ixgbe_probe, ixgbe_attach, ixgbe_detach, NULL, NULL, NULL,
262 DVF_DETACH_SHUTDOWN);
263
264 #if 0
265 devclass_t ixgbe_devclass;
266 DRIVER_MODULE(ixgbe, pci, ixgbe_driver, ixgbe_devclass, 0, 0);
267
268 MODULE_DEPEND(ixgbe, pci, 1, 1, 1);
269 MODULE_DEPEND(ixgbe, ether, 1, 1, 1);
270 #endif
271
272 /*
273 ** TUNEABLE PARAMETERS:
274 */
275
276 /*
277 ** AIM: Adaptive Interrupt Moderation
278 ** which means that the interrupt rate
279 ** is varied over time based on the
280 ** traffic for that interrupt vector
281 */
282 static int ixgbe_enable_aim = TRUE;
283 #define SYSCTL_INT(__x, __y)
284 SYSCTL_INT("hw.ixgbe.enable_aim", &ixgbe_enable_aim);
285
286 static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY);
287 SYSCTL_INT("hw.ixgbe.max_interrupt_rate", &ixgbe_max_interrupt_rate);
288
289 /* How many packets rxeof tries to clean at a time */
290 static int ixgbe_rx_process_limit = 256;
291 SYSCTL_INT("hw.ixgbe.rx_process_limit", &ixgbe_rx_process_limit);
292
293 /* How many packets txeof tries to clean at a time */
294 static int ixgbe_tx_process_limit = 256;
295 SYSCTL_INT("hw.ixgbe.tx_process_limit", &ixgbe_tx_process_limit);
296
297 /*
298 ** Smart speed setting, default to on
299 ** this only works as a compile option
300 ** right now as its during attach, set
301 ** this to 'ixgbe_smart_speed_off' to
302 ** disable.
303 */
304 static int ixgbe_smart_speed = ixgbe_smart_speed_on;
305
306 /*
307 * MSIX should be the default for best performance,
308 * but this allows it to be forced off for testing.
309 */
310 static int ixgbe_enable_msix = 1;
311 SYSCTL_INT("hw.ixgbe.enable_msix", &ixgbe_enable_msix);
312
313 #if defined(NETBSD_MSI_OR_MSIX)
314 /*
315 * Number of Queues, can be set to 0,
316 * it then autoconfigures based on the
317 * number of cpus with a max of 8. This
318 * can be overriden manually here.
319 */
320 static int ixgbe_num_queues = 1;
321 SYSCTL_INT("hw.ixgbe.num_queues", &ixgbe_num_queues);
322 #endif
323
324 /*
325 ** Number of TX descriptors per ring,
326 ** setting higher than RX as this seems
327 ** the better performing choice.
328 */
329 static int ixgbe_txd = PERFORM_TXD;
330 SYSCTL_INT("hw.ixgbe.txd", &ixgbe_txd);
331
332 /* Number of RX descriptors per ring */
333 static int ixgbe_rxd = PERFORM_RXD;
334 SYSCTL_INT("hw.ixgbe.rxd", &ixgbe_rxd);
335
336 /*
337 ** Defining this on will allow the use
338 ** of unsupported SFP+ modules, note that
339 ** doing so you are on your own :)
340 */
341 static int allow_unsupported_sfp = false;
342 SYSCTL_INT("hw.ix.unsupported_sfp", &allow_unsupported_sfp);
343
344 /*
345 ** HW RSC control:
346 ** this feature only works with
347 ** IPv4, and only on 82599 and later.
348 ** Also this will cause IP forwarding to
349 ** fail and that can't be controlled by
350 ** the stack as LRO can. For all these
351 ** reasons I've deemed it best to leave
352 ** this off and not bother with a tuneable
353 ** interface, this would need to be compiled
354 ** to enable.
355 */
356 static bool ixgbe_rsc_enable = FALSE;
357
358 /* Keep running tab on them for sanity check */
359 static int ixgbe_total_ports;
360
361 #ifdef IXGBE_FDIR
362 /*
363 ** For Flow Director: this is the
364 ** number of TX packets we sample
365 ** for the filter pool, this means
366 ** every 20th packet will be probed.
367 **
368 ** This feature can be disabled by
369 ** setting this to 0.
370 */
371 static int atr_sample_rate = 20;
372 /*
373 ** Flow Director actually 'steals'
374 ** part of the packet buffer as its
375 ** filter pool, this variable controls
376 ** how much it uses:
377 ** 0 = 64K, 1 = 128K, 2 = 256K
378 */
379 static int fdir_pballoc = 1;
380 #endif
381
382 #ifdef DEV_NETMAP
383 /*
384 * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
385 * be a reference on how to implement netmap support in a driver.
386 * Additional comments are in ixgbe_netmap.h .
387 *
388 * <dev/netmap/ixgbe_netmap.h> contains functions for netmap support
389 * that extend the standard driver.
390 */
391 #include <dev/netmap/ixgbe_netmap.h>
392 #endif /* DEV_NETMAP */
393
394 /*********************************************************************
395 * Device identification routine
396 *
397 * ixgbe_probe determines if the driver should be loaded on
398 * adapter based on PCI vendor/device id of the adapter.
399 *
400 * return 1 on success, 0 on failure
401 *********************************************************************/
402
403 static int
404 ixgbe_probe(device_t dev, cfdata_t cf, void *aux)
405 {
406 const struct pci_attach_args *pa = aux;
407
408 return (ixgbe_lookup(pa) != NULL) ? 1 : 0;
409 }
410
411 static ixgbe_vendor_info_t *
412 ixgbe_lookup(const struct pci_attach_args *pa)
413 {
414 pcireg_t subid;
415 ixgbe_vendor_info_t *ent;
416
417 INIT_DEBUGOUT("ixgbe_probe: begin");
418
419 if (PCI_VENDOR(pa->pa_id) != IXGBE_INTEL_VENDOR_ID)
420 return NULL;
421
422 subid = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_SUBSYS_ID_REG);
423
424 for (ent = ixgbe_vendor_info_array; ent->vendor_id != 0; ent++) {
425 if (PCI_VENDOR(pa->pa_id) == ent->vendor_id &&
426 PCI_PRODUCT(pa->pa_id) == ent->device_id &&
427
428 (PCI_SUBSYS_VENDOR(subid) == ent->subvendor_id ||
429 ent->subvendor_id == 0) &&
430
431 (PCI_SUBSYS_ID(subid) == ent->subdevice_id ||
432 ent->subdevice_id == 0)) {
433 ++ixgbe_total_ports;
434 return ent;
435 }
436 }
437 return NULL;
438 }
439
440
441 static void
442 ixgbe_sysctl_attach(struct adapter *adapter)
443 {
444 struct sysctllog **log;
445 const struct sysctlnode *rnode, *cnode;
446 device_t dev;
447
448 dev = adapter->dev;
449 log = &adapter->sysctllog;
450
451 if ((rnode = ixgbe_sysctl_instance(adapter)) == NULL) {
452 aprint_error_dev(dev, "could not create sysctl root\n");
453 return;
454 }
455
456 if (sysctl_createv(log, 0, &rnode, &cnode,
457 CTLFLAG_READONLY, CTLTYPE_INT,
458 "num_rx_desc", SYSCTL_DESCR("Number of rx descriptors"),
459 NULL, 0, &adapter->num_rx_desc, 0, CTL_CREATE, CTL_EOL) != 0)
460 aprint_error_dev(dev, "could not create sysctl\n");
461
462 if (sysctl_createv(log, 0, &rnode, &cnode,
463 CTLFLAG_READONLY, CTLTYPE_INT,
464 "num_queues", SYSCTL_DESCR("Number of queues"),
465 NULL, 0, &adapter->num_queues, 0, CTL_CREATE, CTL_EOL) != 0)
466 aprint_error_dev(dev, "could not create sysctl\n");
467
468 if (sysctl_createv(log, 0, &rnode, &cnode,
469 CTLFLAG_READWRITE, CTLTYPE_INT,
470 "fc", SYSCTL_DESCR("Flow Control"),
471 ixgbe_set_flowcntl, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0)
472 aprint_error_dev(dev, "could not create sysctl\n");
473
474 /* XXX This is an *instance* sysctl controlling a *global* variable.
475 * XXX It's that way in the FreeBSD driver that this derives from.
476 */
477 if (sysctl_createv(log, 0, &rnode, &cnode,
478 CTLFLAG_READWRITE, CTLTYPE_INT,
479 "enable_aim", SYSCTL_DESCR("Interrupt Moderation"),
480 NULL, 0, &ixgbe_enable_aim, 0, CTL_CREATE, CTL_EOL) != 0)
481 aprint_error_dev(dev, "could not create sysctl\n");
482
483 if (sysctl_createv(log, 0, &rnode, &cnode,
484 CTLFLAG_READWRITE, CTLTYPE_INT,
485 "advertise_speed", SYSCTL_DESCR("Link Speed"),
486 ixgbe_set_advertise, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0)
487 aprint_error_dev(dev, "could not create sysctl\n");
488
489 if (sysctl_createv(log, 0, &rnode, &cnode,
490 CTLFLAG_READWRITE, CTLTYPE_INT,
491 "ts", SYSCTL_DESCR("Thermal Test"),
492 ixgbe_set_thermal_test, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0)
493 aprint_error_dev(dev, "could not create sysctl\n");
494 }
495
496 /*********************************************************************
497 * Device initialization routine
498 *
499 * The attach entry point is called when the driver is being loaded.
500 * This routine identifies the type of hardware, allocates all resources
501 * and initializes the hardware.
502 *
503 * return 0 on success, positive on failure
504 *********************************************************************/
505
506 static void
507 ixgbe_attach(device_t parent, device_t dev, void *aux)
508 {
509 struct adapter *adapter;
510 struct ixgbe_hw *hw;
511 int error = -1;
512 u16 csum;
513 u32 ctrl_ext;
514 ixgbe_vendor_info_t *ent;
515 const struct pci_attach_args *pa = aux;
516
517 INIT_DEBUGOUT("ixgbe_attach: begin");
518
519 /* Allocate, clear, and link in our adapter structure */
520 adapter = device_private(dev);
521 adapter->dev = adapter->osdep.dev = dev;
522 hw = &adapter->hw;
523 adapter->osdep.pc = pa->pa_pc;
524 adapter->osdep.tag = pa->pa_tag;
525 adapter->osdep.dmat = pa->pa_dmat;
526 adapter->osdep.attached = false;
527
528 ent = ixgbe_lookup(pa);
529
530 KASSERT(ent != NULL);
531
532 aprint_normal(": %s, Version - %s\n",
533 ixgbe_strings[ent->index], ixgbe_driver_version);
534
535 /* Core Lock Init*/
536 IXGBE_CORE_LOCK_INIT(adapter, device_xname(dev));
537
538 /* SYSCTL APIs */
539
540 ixgbe_sysctl_attach(adapter);
541
542 /* Set up the timer callout */
543 callout_init(&adapter->timer, 0);
544
545 /* Determine hardware revision */
546 ixgbe_identify_hardware(adapter);
547
548 /* Do base PCI setup - map BAR0 */
549 if (ixgbe_allocate_pci_resources(adapter, pa)) {
550 aprint_error_dev(dev, "Allocation of PCI resources failed\n");
551 error = ENXIO;
552 goto err_out;
553 }
554
555 /* Do descriptor calc and sanity checks */
556 if (((ixgbe_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 ||
557 ixgbe_txd < MIN_TXD || ixgbe_txd > MAX_TXD) {
558 aprint_error_dev(dev, "TXD config issue, using default!\n");
559 adapter->num_tx_desc = DEFAULT_TXD;
560 } else
561 adapter->num_tx_desc = ixgbe_txd;
562
563 /*
564 ** With many RX rings it is easy to exceed the
565 ** system mbuf allocation. Tuning nmbclusters
566 ** can alleviate this.
567 */
568 if (nmbclusters > 0 ) {
569 int s;
570 s = (ixgbe_rxd * adapter->num_queues) * ixgbe_total_ports;
571 if (s > nmbclusters) {
572 aprint_error_dev(dev, "RX Descriptors exceed "
573 "system mbuf max, using default instead!\n");
574 ixgbe_rxd = DEFAULT_RXD;
575 }
576 }
577
578 if (((ixgbe_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 ||
579 ixgbe_rxd < MIN_RXD || ixgbe_rxd > MAX_RXD) {
580 aprint_error_dev(dev, "RXD config issue, using default!\n");
581 adapter->num_rx_desc = DEFAULT_RXD;
582 } else
583 adapter->num_rx_desc = ixgbe_rxd;
584
585 /* Allocate our TX/RX Queues */
586 if (ixgbe_allocate_queues(adapter)) {
587 error = ENOMEM;
588 goto err_out;
589 }
590
591 /* Allocate multicast array memory. */
592 adapter->mta = malloc(sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
593 MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
594 if (adapter->mta == NULL) {
595 aprint_error_dev(dev, "Cannot allocate multicast setup array\n");
596 error = ENOMEM;
597 goto err_late;
598 }
599
600 /* Initialize the shared code */
601 hw->allow_unsupported_sfp = allow_unsupported_sfp;
602 error = ixgbe_init_shared_code(hw);
603 if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
604 /*
605 ** No optics in this port, set up
606 ** so the timer routine will probe
607 ** for later insertion.
608 */
609 adapter->sfp_probe = TRUE;
610 error = 0;
611 } else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
612 aprint_error_dev(dev,"Unsupported SFP+ module detected!\n");
613 error = EIO;
614 goto err_late;
615 } else if (error) {
616 aprint_error_dev(dev,"Unable to initialize the shared code\n");
617 error = EIO;
618 goto err_late;
619 }
620
621 /* Make sure we have a good EEPROM before we read from it */
622 if (ixgbe_validate_eeprom_checksum(&adapter->hw, &csum) < 0) {
623 aprint_error_dev(dev,"The EEPROM Checksum Is Not Valid\n");
624 error = EIO;
625 goto err_late;
626 }
627
628 error = ixgbe_init_hw(hw);
629 switch (error) {
630 case IXGBE_ERR_EEPROM_VERSION:
631 aprint_error_dev(dev, "This device is a pre-production adapter/"
632 "LOM. Please be aware there may be issues associated "
633 "with your hardware.\n If you are experiencing problems "
634 "please contact your Intel or hardware representative "
635 "who provided you with this hardware.\n");
636 break;
637 case IXGBE_ERR_SFP_NOT_SUPPORTED:
638 aprint_error_dev(dev,"Unsupported SFP+ Module\n");
639 error = EIO;
640 aprint_error_dev(dev,"Hardware Initialization Failure\n");
641 goto err_late;
642 case IXGBE_ERR_SFP_NOT_PRESENT:
643 device_printf(dev,"No SFP+ Module found\n");
644 /* falls thru */
645 default:
646 break;
647 }
648
649 /* Detect and set physical type */
650 ixgbe_setup_optics(adapter);
651
652 error = -1;
653 if ((adapter->msix > 1) && (ixgbe_enable_msix))
654 error = ixgbe_allocate_msix(adapter, pa);
655 if (error != 0)
656 error = ixgbe_allocate_legacy(adapter, pa);
657 if (error)
658 goto err_late;
659
660 /* Setup OS specific network interface */
661 if (ixgbe_setup_interface(dev, adapter) != 0)
662 goto err_late;
663
664 /* Initialize statistics */
665 ixgbe_update_stats_counters(adapter);
666
667 /*
668 ** Check PCIE slot type/speed/width
669 */
670 ixgbe_get_slot_info(hw);
671
672 /* Set an initial default flow control value */
673 adapter->fc = ixgbe_fc_full;
674
675 /* let hardware know driver is loaded */
676 ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
677 ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
678 IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
679
680 ixgbe_add_hw_stats(adapter);
681
682 #ifdef DEV_NETMAP
683 ixgbe_netmap_attach(adapter);
684 #endif /* DEV_NETMAP */
685 INIT_DEBUGOUT("ixgbe_attach: end");
686 adapter->osdep.attached = true;
687 return;
688 err_late:
689 ixgbe_free_transmit_structures(adapter);
690 ixgbe_free_receive_structures(adapter);
691 err_out:
692 if (adapter->ifp != NULL)
693 if_free(adapter->ifp);
694 ixgbe_free_pci_resources(adapter);
695 if (adapter->mta != NULL)
696 free(adapter->mta, M_DEVBUF);
697 return;
698
699 }
700
701 /*********************************************************************
702 * Device removal routine
703 *
704 * The detach entry point is called when the driver is being removed.
705 * This routine stops the adapter and deallocates all the resources
706 * that were allocated for driver operation.
707 *
708 * return 0 on success, positive on failure
709 *********************************************************************/
710
711 static int
712 ixgbe_detach(device_t dev, int flags)
713 {
714 struct adapter *adapter = device_private(dev);
715 struct rx_ring *rxr = adapter->rx_rings;
716 struct ixgbe_hw_stats *stats = &adapter->stats;
717 struct ix_queue *que = adapter->queues;
718 struct tx_ring *txr = adapter->tx_rings;
719 u32 ctrl_ext;
720
721 INIT_DEBUGOUT("ixgbe_detach: begin");
722 if (adapter->osdep.attached == false)
723 return 0;
724
725 #if NVLAN > 0
726 /* Make sure VLANs are not using driver */
727 if (!VLAN_ATTACHED(&adapter->osdep.ec))
728 ; /* nothing to do: no VLANs */
729 else if ((flags & (DETACH_SHUTDOWN|DETACH_FORCE)) != 0)
730 vlan_ifdetach(adapter->ifp);
731 else {
732 aprint_error_dev(dev, "VLANs in use\n");
733 return EBUSY;
734 }
735 #endif
736
737 IXGBE_CORE_LOCK(adapter);
738 ixgbe_stop(adapter);
739 IXGBE_CORE_UNLOCK(adapter);
740
741 for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
742 #ifndef IXGBE_LEGACY_TX
743 softint_disestablish(txr->txq_si);
744 #endif
745 softint_disestablish(que->que_si);
746 }
747
748 /* Drain the Link queue */
749 softint_disestablish(adapter->link_si);
750 softint_disestablish(adapter->mod_si);
751 softint_disestablish(adapter->msf_si);
752 #ifdef IXGBE_FDIR
753 softint_disestablish(adapter->fdir_si);
754 #endif
755
756 /* let hardware know driver is unloading */
757 ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
758 ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
759 IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext);
760
761 ether_ifdetach(adapter->ifp);
762 callout_halt(&adapter->timer, NULL);
763 #ifdef DEV_NETMAP
764 netmap_detach(adapter->ifp);
765 #endif /* DEV_NETMAP */
766 ixgbe_free_pci_resources(adapter);
767 #if 0 /* XXX the NetBSD port is probably missing something here */
768 bus_generic_detach(dev);
769 #endif
770 if_detach(adapter->ifp);
771
772 sysctl_teardown(&adapter->sysctllog);
773 evcnt_detach(&adapter->handleq);
774 evcnt_detach(&adapter->req);
775 evcnt_detach(&adapter->morerx);
776 evcnt_detach(&adapter->moretx);
777 evcnt_detach(&adapter->txloops);
778 evcnt_detach(&adapter->efbig_tx_dma_setup);
779 evcnt_detach(&adapter->m_defrag_failed);
780 evcnt_detach(&adapter->efbig2_tx_dma_setup);
781 evcnt_detach(&adapter->einval_tx_dma_setup);
782 evcnt_detach(&adapter->other_tx_dma_setup);
783 evcnt_detach(&adapter->eagain_tx_dma_setup);
784 evcnt_detach(&adapter->enomem_tx_dma_setup);
785 evcnt_detach(&adapter->watchdog_events);
786 evcnt_detach(&adapter->tso_err);
787 evcnt_detach(&adapter->link_irq);
788
789 txr = adapter->tx_rings;
790 for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
791 evcnt_detach(&txr->no_desc_avail);
792 evcnt_detach(&txr->total_packets);
793 evcnt_detach(&txr->tso_tx);
794
795 if (i < __arraycount(adapter->stats.mpc)) {
796 evcnt_detach(&adapter->stats.mpc[i]);
797 }
798 if (i < __arraycount(adapter->stats.pxontxc)) {
799 evcnt_detach(&adapter->stats.pxontxc[i]);
800 evcnt_detach(&adapter->stats.pxonrxc[i]);
801 evcnt_detach(&adapter->stats.pxofftxc[i]);
802 evcnt_detach(&adapter->stats.pxoffrxc[i]);
803 evcnt_detach(&adapter->stats.pxon2offc[i]);
804 }
805 if (i < __arraycount(adapter->stats.qprc)) {
806 evcnt_detach(&adapter->stats.qprc[i]);
807 evcnt_detach(&adapter->stats.qptc[i]);
808 evcnt_detach(&adapter->stats.qbrc[i]);
809 evcnt_detach(&adapter->stats.qbtc[i]);
810 evcnt_detach(&adapter->stats.qprdc[i]);
811 }
812
813 evcnt_detach(&rxr->rx_packets);
814 evcnt_detach(&rxr->rx_bytes);
815 evcnt_detach(&rxr->rx_copies);
816 evcnt_detach(&rxr->no_jmbuf);
817 evcnt_detach(&rxr->rx_discarded);
818 evcnt_detach(&rxr->rx_irq);
819 }
820 evcnt_detach(&stats->ipcs);
821 evcnt_detach(&stats->l4cs);
822 evcnt_detach(&stats->ipcs_bad);
823 evcnt_detach(&stats->l4cs_bad);
824 evcnt_detach(&stats->intzero);
825 evcnt_detach(&stats->legint);
826 evcnt_detach(&stats->crcerrs);
827 evcnt_detach(&stats->illerrc);
828 evcnt_detach(&stats->errbc);
829 evcnt_detach(&stats->mspdc);
830 evcnt_detach(&stats->mlfc);
831 evcnt_detach(&stats->mrfc);
832 evcnt_detach(&stats->rlec);
833 evcnt_detach(&stats->lxontxc);
834 evcnt_detach(&stats->lxonrxc);
835 evcnt_detach(&stats->lxofftxc);
836 evcnt_detach(&stats->lxoffrxc);
837
838 /* Packet Reception Stats */
839 evcnt_detach(&stats->tor);
840 evcnt_detach(&stats->gorc);
841 evcnt_detach(&stats->tpr);
842 evcnt_detach(&stats->gprc);
843 evcnt_detach(&stats->mprc);
844 evcnt_detach(&stats->bprc);
845 evcnt_detach(&stats->prc64);
846 evcnt_detach(&stats->prc127);
847 evcnt_detach(&stats->prc255);
848 evcnt_detach(&stats->prc511);
849 evcnt_detach(&stats->prc1023);
850 evcnt_detach(&stats->prc1522);
851 evcnt_detach(&stats->ruc);
852 evcnt_detach(&stats->rfc);
853 evcnt_detach(&stats->roc);
854 evcnt_detach(&stats->rjc);
855 evcnt_detach(&stats->mngprc);
856 evcnt_detach(&stats->xec);
857
858 /* Packet Transmission Stats */
859 evcnt_detach(&stats->gotc);
860 evcnt_detach(&stats->tpt);
861 evcnt_detach(&stats->gptc);
862 evcnt_detach(&stats->bptc);
863 evcnt_detach(&stats->mptc);
864 evcnt_detach(&stats->mngptc);
865 evcnt_detach(&stats->ptc64);
866 evcnt_detach(&stats->ptc127);
867 evcnt_detach(&stats->ptc255);
868 evcnt_detach(&stats->ptc511);
869 evcnt_detach(&stats->ptc1023);
870 evcnt_detach(&stats->ptc1522);
871
872 ixgbe_free_transmit_structures(adapter);
873 ixgbe_free_receive_structures(adapter);
874 free(adapter->mta, M_DEVBUF);
875
876 IXGBE_CORE_LOCK_DESTROY(adapter);
877 return (0);
878 }
879
880 /*********************************************************************
881 *
882 * Shutdown entry point
883 *
884 **********************************************************************/
885
886 #if 0 /* XXX NetBSD ought to register something like this through pmf(9) */
887 static int
888 ixgbe_shutdown(device_t dev)
889 {
890 struct adapter *adapter = device_private(dev);
891 IXGBE_CORE_LOCK(adapter);
892 ixgbe_stop(adapter);
893 IXGBE_CORE_UNLOCK(adapter);
894 return (0);
895 }
896 #endif
897
898
899 #ifdef IXGBE_LEGACY_TX
900 /*********************************************************************
901 * Transmit entry point
902 *
903 * ixgbe_start is called by the stack to initiate a transmit.
904 * The driver will remain in this routine as long as there are
905 * packets to transmit and transmit resources are available.
906 * In case resources are not available stack is notified and
907 * the packet is requeued.
908 **********************************************************************/
909
910 static void
911 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
912 {
913 int rc;
914 struct mbuf *m_head;
915 struct adapter *adapter = txr->adapter;
916
917 IXGBE_TX_LOCK_ASSERT(txr);
918
919 if ((ifp->if_flags & IFF_RUNNING) == 0)
920 return;
921 if (!adapter->link_active)
922 return;
923
924 while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
925 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
926 break;
927
928 IFQ_POLL(&ifp->if_snd, m_head);
929 if (m_head == NULL)
930 break;
931
932 if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
933 break;
934 }
935 IFQ_DEQUEUE(&ifp->if_snd, m_head);
936 if (rc == EFBIG) {
937 struct mbuf *mtmp;
938
939 if ((mtmp = m_defrag(m_head, M_NOWAIT)) != NULL) {
940 m_head = mtmp;
941 rc = ixgbe_xmit(txr, m_head);
942 if (rc != 0)
943 adapter->efbig2_tx_dma_setup.ev_count++;
944 } else
945 adapter->m_defrag_failed.ev_count++;
946 }
947 if (rc != 0) {
948 m_freem(m_head);
949 continue;
950 }
951
952 /* Send a copy of the frame to the BPF listener */
953 bpf_mtap(ifp, m_head);
954
955 /* Set watchdog on */
956 getmicrotime(&txr->watchdog_time);
957 txr->queue_status = IXGBE_QUEUE_WORKING;
958
959 }
960 return;
961 }
962
963 /*
964 * Legacy TX start - called by the stack, this
965 * always uses the first tx ring, and should
966 * not be used with multiqueue tx enabled.
967 */
968 static void
969 ixgbe_start(struct ifnet *ifp)
970 {
971 struct adapter *adapter = ifp->if_softc;
972 struct tx_ring *txr = adapter->tx_rings;
973
974 if (ifp->if_flags & IFF_RUNNING) {
975 IXGBE_TX_LOCK(txr);
976 ixgbe_start_locked(txr, ifp);
977 IXGBE_TX_UNLOCK(txr);
978 }
979 return;
980 }
981
982 #else /* ! IXGBE_LEGACY_TX */
983
984 /*
985 ** Multiqueue Transmit driver
986 **
987 */
988 static int
989 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
990 {
991 struct adapter *adapter = ifp->if_softc;
992 struct ix_queue *que;
993 struct tx_ring *txr;
994 int i, err = 0;
995 #ifdef RSS
996 uint32_t bucket_id;
997 #endif
998
999 /* Which queue to use */
1000 /*
1001 * When doing RSS, map it to the same outbound queue
1002 * as the incoming flow would be mapped to.
1003 *
1004 * If everything is setup correctly, it should be the
1005 * same bucket that the current CPU we're on is.
1006 */
1007 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
1008 #ifdef RSS
1009 if (rss_hash2bucket(m->m_pkthdr.flowid,
1010 M_HASHTYPE_GET(m), &bucket_id) == 0) {
1011 /* XXX TODO: spit out something if bucket_id > num_queues? */
1012 i = bucket_id % adapter->num_queues;
1013 } else {
1014 #endif
1015 i = m->m_pkthdr.flowid % adapter->num_queues;
1016 #ifdef RSS
1017 }
1018 #endif
1019 } else {
1020 i = curcpu % adapter->num_queues;
1021 }
1022
1023 txr = &adapter->tx_rings[i];
1024 que = &adapter->queues[i];
1025
1026 err = drbr_enqueue(ifp, txr->br, m);
1027 if (err)
1028 return (err);
1029 if (IXGBE_TX_TRYLOCK(txr)) {
1030 ixgbe_mq_start_locked(ifp, txr);
1031 IXGBE_TX_UNLOCK(txr);
1032 } else
1033 softint_schedule(txr->txq_si);
1034
1035 return (0);
1036 }
1037
1038 static int
1039 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
1040 {
1041 struct adapter *adapter = txr->adapter;
1042 struct mbuf *next;
1043 int enqueued = 0, err = 0;
1044
1045 if (((ifp->if_flags & IFF_RUNNING) == 0) ||
1046 adapter->link_active == 0)
1047 return (ENETDOWN);
1048
1049 /* Process the queue */
1050 #if __FreeBSD_version < 901504
1051 next = drbr_dequeue(ifp, txr->br);
1052 while (next != NULL) {
1053 if ((err = ixgbe_xmit(txr, &next)) != 0) {
1054 if (next != NULL)
1055 err = drbr_enqueue(ifp, txr->br, next);
1056 #else
1057 while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1058 if ((err = ixgbe_xmit(txr, &next)) != 0) {
1059 if (next == NULL) {
1060 drbr_advance(ifp, txr->br);
1061 } else {
1062 drbr_putback(ifp, txr->br, next);
1063 }
1064 #endif
1065 break;
1066 }
1067 #if __FreeBSD_version >= 901504
1068 drbr_advance(ifp, txr->br);
1069 #endif
1070 enqueued++;
1071 /* Send a copy of the frame to the BPF listener */
1072 bpf_mtap(ifp, next);
1073 if ((ifp->if_flags & IFF_RUNNING) == 0)
1074 break;
1075 #if __FreeBSD_version < 901504
1076 next = drbr_dequeue(ifp, txr->br);
1077 #endif
1078 }
1079
1080 if (enqueued > 0) {
1081 /* Set watchdog on */
1082 txr->queue_status = IXGBE_QUEUE_WORKING;
1083 getmicrotime(&txr->watchdog_time);
1084 }
1085
1086 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
1087 ixgbe_txeof(txr);
1088
1089 return (err);
1090 }
1091
1092 /*
1093 * Called from a taskqueue to drain queued transmit packets.
1094 */
1095 static void
1096 ixgbe_deferred_mq_start(void *arg, int pending)
1097 {
1098 struct tx_ring *txr = arg;
1099 struct adapter *adapter = txr->adapter;
1100 struct ifnet *ifp = adapter->ifp;
1101
1102 IXGBE_TX_LOCK(txr);
1103 if (!drbr_empty(ifp, txr->br))
1104 ixgbe_mq_start_locked(ifp, txr);
1105 IXGBE_TX_UNLOCK(txr);
1106 }
1107
1108 /*
1109 ** Flush all ring buffers
1110 */
1111 static void
1112 ixgbe_qflush(struct ifnet *ifp)
1113 {
1114 struct adapter *adapter = ifp->if_softc;
1115 struct tx_ring *txr = adapter->tx_rings;
1116 struct mbuf *m;
1117
1118 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1119 IXGBE_TX_LOCK(txr);
1120 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1121 m_freem(m);
1122 IXGBE_TX_UNLOCK(txr);
1123 }
1124 if_qflush(ifp);
1125 }
1126 #endif /* IXGBE_LEGACY_TX */
1127
1128 static int
1129 ixgbe_ifflags_cb(struct ethercom *ec)
1130 {
1131 struct ifnet *ifp = &ec->ec_if;
1132 struct adapter *adapter = ifp->if_softc;
1133 int change = ifp->if_flags ^ adapter->if_flags, rc = 0;
1134
1135 IXGBE_CORE_LOCK(adapter);
1136
1137 if (change != 0)
1138 adapter->if_flags = ifp->if_flags;
1139
1140 if ((change & ~(IFF_CANTCHANGE|IFF_DEBUG)) != 0)
1141 rc = ENETRESET;
1142 else if ((change & (IFF_PROMISC | IFF_ALLMULTI)) != 0)
1143 ixgbe_set_promisc(adapter);
1144
1145 /* Set up VLAN support and filter */
1146 ixgbe_setup_vlan_hw_support(adapter);
1147
1148 IXGBE_CORE_UNLOCK(adapter);
1149
1150 return rc;
1151 }
1152
1153 /*********************************************************************
1154 * Ioctl entry point
1155 *
1156 * ixgbe_ioctl is called when the user wants to configure the
1157 * interface.
1158 *
1159 * return 0 on success, positive on failure
1160 **********************************************************************/
1161
1162 static int
1163 ixgbe_ioctl(struct ifnet * ifp, u_long command, void *data)
1164 {
1165 struct adapter *adapter = ifp->if_softc;
1166 struct ixgbe_hw *hw = &adapter->hw;
1167 struct ifcapreq *ifcr = data;
1168 struct ifreq *ifr = data;
1169 int error = 0;
1170 int l4csum_en;
1171 const int l4csum = IFCAP_CSUM_TCPv4_Rx|IFCAP_CSUM_UDPv4_Rx|
1172 IFCAP_CSUM_TCPv6_Rx|IFCAP_CSUM_UDPv6_Rx;
1173
1174 switch (command) {
1175 case SIOCSIFFLAGS:
1176 IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)");
1177 break;
1178 case SIOCADDMULTI:
1179 case SIOCDELMULTI:
1180 IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI");
1181 break;
1182 case SIOCSIFMEDIA:
1183 case SIOCGIFMEDIA:
1184 IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)");
1185 break;
1186 case SIOCSIFCAP:
1187 IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)");
1188 break;
1189 case SIOCSIFMTU:
1190 IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)");
1191 break;
1192 default:
1193 IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command);
1194 break;
1195 }
1196
1197 switch (command) {
1198 case SIOCSIFMEDIA:
1199 case SIOCGIFMEDIA:
1200 return ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1201 case SIOCGI2C:
1202 {
1203 struct ixgbe_i2c_req i2c;
1204 IOCTL_DEBUGOUT("ioctl: SIOCGI2C (Get I2C Data)");
1205 error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
1206 if (error != 0)
1207 break;
1208 if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) {
1209 error = EINVAL;
1210 break;
1211 }
1212 if (i2c.len > sizeof(i2c.data)) {
1213 error = EINVAL;
1214 break;
1215 }
1216
1217 hw->phy.ops.read_i2c_byte(hw, i2c.offset,
1218 i2c.dev_addr, i2c.data);
1219 error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
1220 break;
1221 }
1222 case SIOCSIFCAP:
1223 /* Layer-4 Rx checksum offload has to be turned on and
1224 * off as a unit.
1225 */
1226 l4csum_en = ifcr->ifcr_capenable & l4csum;
1227 if (l4csum_en != l4csum && l4csum_en != 0)
1228 return EINVAL;
1229 /*FALLTHROUGH*/
1230 case SIOCADDMULTI:
1231 case SIOCDELMULTI:
1232 case SIOCSIFFLAGS:
1233 case SIOCSIFMTU:
1234 default:
1235 if ((error = ether_ioctl(ifp, command, data)) != ENETRESET)
1236 return error;
1237 if ((ifp->if_flags & IFF_RUNNING) == 0)
1238 ;
1239 else if (command == SIOCSIFCAP || command == SIOCSIFMTU) {
1240 IXGBE_CORE_LOCK(adapter);
1241 ixgbe_init_locked(adapter);
1242 IXGBE_CORE_UNLOCK(adapter);
1243 } else if (command == SIOCADDMULTI || command == SIOCDELMULTI) {
1244 /*
1245 * Multicast list has changed; set the hardware filter
1246 * accordingly.
1247 */
1248 IXGBE_CORE_LOCK(adapter);
1249 ixgbe_disable_intr(adapter);
1250 ixgbe_set_multi(adapter);
1251 ixgbe_enable_intr(adapter);
1252 IXGBE_CORE_UNLOCK(adapter);
1253 }
1254 return 0;
1255 }
1256
1257 return error;
1258 }
1259
1260 /*********************************************************************
1261 * Init entry point
1262 *
1263 * This routine is used in two ways. It is used by the stack as
1264 * init entry point in network interface structure. It is also used
1265 * by the driver as a hw/sw initialization routine to get to a
1266 * consistent state.
1267 *
1268 * return 0 on success, positive on failure
1269 **********************************************************************/
1270 #define IXGBE_MHADD_MFS_SHIFT 16
1271
1272 static void
1273 ixgbe_init_locked(struct adapter *adapter)
1274 {
1275 struct ifnet *ifp = adapter->ifp;
1276 device_t dev = adapter->dev;
1277 struct ixgbe_hw *hw = &adapter->hw;
1278 u32 k, txdctl, mhadd, gpie;
1279 u32 rxdctl, rxctrl;
1280
1281 /* XXX check IFF_UP and IFF_RUNNING, power-saving state! */
1282
1283 KASSERT(mutex_owned(&adapter->core_mtx));
1284 INIT_DEBUGOUT("ixgbe_init_locked: begin");
1285 hw->adapter_stopped = FALSE;
1286 ixgbe_stop_adapter(hw);
1287 callout_stop(&adapter->timer);
1288
1289 /* XXX I moved this here from the SIOCSIFMTU case in ixgbe_ioctl(). */
1290 adapter->max_frame_size =
1291 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1292
1293 /* reprogram the RAR[0] in case user changed it. */
1294 ixgbe_set_rar(hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
1295
1296 /* Get the latest mac address, User can use a LAA */
1297 memcpy(hw->mac.addr, CLLADDR(adapter->ifp->if_sadl),
1298 IXGBE_ETH_LENGTH_OF_ADDRESS);
1299 ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
1300 hw->addr_ctrl.rar_used_count = 1;
1301
1302 /* Prepare transmit descriptors and buffers */
1303 if (ixgbe_setup_transmit_structures(adapter)) {
1304 device_printf(dev,"Could not setup transmit structures\n");
1305 ixgbe_stop(adapter);
1306 return;
1307 }
1308
1309 ixgbe_init_hw(hw);
1310 ixgbe_initialize_transmit_units(adapter);
1311
1312 /* Setup Multicast table */
1313 ixgbe_set_multi(adapter);
1314
1315 /*
1316 ** Determine the correct mbuf pool
1317 ** for doing jumbo frames
1318 */
1319 if (adapter->max_frame_size <= 2048)
1320 adapter->rx_mbuf_sz = MCLBYTES;
1321 else if (adapter->max_frame_size <= 4096)
1322 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1323 else if (adapter->max_frame_size <= 9216)
1324 adapter->rx_mbuf_sz = MJUM9BYTES;
1325 else
1326 adapter->rx_mbuf_sz = MJUM16BYTES;
1327
1328 /* Prepare receive descriptors and buffers */
1329 if (ixgbe_setup_receive_structures(adapter)) {
1330 device_printf(dev,"Could not setup receive structures\n");
1331 ixgbe_stop(adapter);
1332 return;
1333 }
1334
1335 /* Configure RX settings */
1336 ixgbe_initialize_receive_units(adapter);
1337
1338 gpie = IXGBE_READ_REG(&adapter->hw, IXGBE_GPIE);
1339
1340 /* Enable Fan Failure Interrupt */
1341 gpie |= IXGBE_SDP1_GPIEN;
1342
1343 /* Add for Module detection */
1344 if (hw->mac.type == ixgbe_mac_82599EB)
1345 gpie |= IXGBE_SDP2_GPIEN;
1346
1347 /* Thermal Failure Detection */
1348 if (hw->mac.type == ixgbe_mac_X540)
1349 gpie |= IXGBE_SDP0_GPIEN;
1350
1351 if (adapter->msix > 1) {
1352 /* Enable Enhanced MSIX mode */
1353 gpie |= IXGBE_GPIE_MSIX_MODE;
1354 gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
1355 IXGBE_GPIE_OCD;
1356 }
1357 IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
1358
1359 /* Set MTU size */
1360 if (ifp->if_mtu > ETHERMTU) {
1361 mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
1362 mhadd &= ~IXGBE_MHADD_MFS_MASK;
1363 mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
1364 IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
1365 }
1366
1367 /* Now enable all the queues */
1368
1369 for (int i = 0; i < adapter->num_queues; i++) {
1370 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
1371 txdctl |= IXGBE_TXDCTL_ENABLE;
1372 /* Set WTHRESH to 8, burst writeback */
1373 txdctl |= (8 << 16);
1374 /*
1375 * When the internal queue falls below PTHRESH (32),
1376 * start prefetching as long as there are at least
1377 * HTHRESH (1) buffers ready. The values are taken
1378 * from the Intel linux driver 3.8.21.
1379 * Prefetching enables tx line rate even with 1 queue.
1380 */
1381 txdctl |= (32 << 0) | (1 << 8);
1382 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
1383 }
1384
1385 for (int i = 0; i < adapter->num_queues; i++) {
1386 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
1387 if (hw->mac.type == ixgbe_mac_82598EB) {
1388 /*
1389 ** PTHRESH = 21
1390 ** HTHRESH = 4
1391 ** WTHRESH = 8
1392 */
1393 rxdctl &= ~0x3FFFFF;
1394 rxdctl |= 0x080420;
1395 }
1396 rxdctl |= IXGBE_RXDCTL_ENABLE;
1397 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
1398 /* XXX I don't trust this loop, and I don't trust the
1399 * XXX memory barrier. What is this meant to do? --dyoung
1400 */
1401 for (k = 0; k < 10; k++) {
1402 if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
1403 IXGBE_RXDCTL_ENABLE)
1404 break;
1405 else
1406 msec_delay(1);
1407 }
1408 wmb();
1409 #ifdef DEV_NETMAP
1410 /*
1411 * In netmap mode, we must preserve the buffers made
1412 * available to userspace before the if_init()
1413 * (this is true by default on the TX side, because
1414 * init makes all buffers available to userspace).
1415 *
1416 * netmap_reset() and the device specific routines
1417 * (e.g. ixgbe_setup_receive_rings()) map these
1418 * buffers at the end of the NIC ring, so here we
1419 * must set the RDT (tail) register to make sure
1420 * they are not overwritten.
1421 *
1422 * In this driver the NIC ring starts at RDH = 0,
1423 * RDT points to the last slot available for reception (?),
1424 * so RDT = num_rx_desc - 1 means the whole ring is available.
1425 */
1426 if (ifp->if_capenable & IFCAP_NETMAP) {
1427 struct netmap_adapter *na = NA(adapter->ifp);
1428 struct netmap_kring *kring = &na->rx_rings[i];
1429 int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring);
1430
1431 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), t);
1432 } else
1433 #endif /* DEV_NETMAP */
1434 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), adapter->num_rx_desc - 1);
1435 }
1436
1437 /* Enable Receive engine */
1438 rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
1439 if (hw->mac.type == ixgbe_mac_82598EB)
1440 rxctrl |= IXGBE_RXCTRL_DMBYPS;
1441 rxctrl |= IXGBE_RXCTRL_RXEN;
1442 ixgbe_enable_rx_dma(hw, rxctrl);
1443
1444 callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
1445
1446 /* Set up MSI/X routing */
1447 if (ixgbe_enable_msix) {
1448 ixgbe_configure_ivars(adapter);
1449 /* Set up auto-mask */
1450 if (hw->mac.type == ixgbe_mac_82598EB)
1451 IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
1452 else {
1453 IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
1454 IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
1455 }
1456 } else { /* Simple settings for Legacy/MSI */
1457 ixgbe_set_ivar(adapter, 0, 0, 0);
1458 ixgbe_set_ivar(adapter, 0, 0, 1);
1459 IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
1460 }
1461
1462 #ifdef IXGBE_FDIR
1463 /* Init Flow director */
1464 if (hw->mac.type != ixgbe_mac_82598EB) {
1465 u32 hdrm = 32 << fdir_pballoc;
1466
1467 hw->mac.ops.setup_rxpba(hw, 0, hdrm, PBA_STRATEGY_EQUAL);
1468 ixgbe_init_fdir_signature_82599(&adapter->hw, fdir_pballoc);
1469 }
1470 #endif
1471
1472 /*
1473 ** Check on any SFP devices that
1474 ** need to be kick-started
1475 */
1476 if (hw->phy.type == ixgbe_phy_none) {
1477 int err = hw->phy.ops.identify(hw);
1478 if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
1479 device_printf(dev,
1480 "Unsupported SFP+ module type was detected.\n");
1481 return;
1482 }
1483 }
1484
1485 /* Set moderation on the Link interrupt */
1486 IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->linkvec), IXGBE_LINK_ITR);
1487
1488 /* Config/Enable Link */
1489 ixgbe_config_link(adapter);
1490
1491 /* Hardware Packet Buffer & Flow Control setup */
1492 {
1493 u32 rxpb, frame, size, tmp;
1494
1495 frame = adapter->max_frame_size;
1496
1497 /* Calculate High Water */
1498 if (hw->mac.type == ixgbe_mac_X540)
1499 tmp = IXGBE_DV_X540(frame, frame);
1500 else
1501 tmp = IXGBE_DV(frame, frame);
1502 size = IXGBE_BT2KB(tmp);
1503 rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
1504 hw->fc.high_water[0] = rxpb - size;
1505
1506 /* Now calculate Low Water */
1507 if (hw->mac.type == ixgbe_mac_X540)
1508 tmp = IXGBE_LOW_DV_X540(frame);
1509 else
1510 tmp = IXGBE_LOW_DV(frame);
1511 hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
1512
1513 hw->fc.requested_mode = adapter->fc;
1514 hw->fc.pause_time = IXGBE_FC_PAUSE;
1515 hw->fc.send_xon = TRUE;
1516 }
1517 /* Initialize the FC settings */
1518 ixgbe_start_hw(hw);
1519
1520 /* Set up VLAN support and filter */
1521 ixgbe_setup_vlan_hw_support(adapter);
1522
1523 /* And now turn on interrupts */
1524 ixgbe_enable_intr(adapter);
1525
1526 /* Now inform the stack we're ready */
1527 ifp->if_flags |= IFF_RUNNING;
1528
1529 return;
1530 }
1531
1532 static int
1533 ixgbe_init(struct ifnet *ifp)
1534 {
1535 struct adapter *adapter = ifp->if_softc;
1536
1537 IXGBE_CORE_LOCK(adapter);
1538 ixgbe_init_locked(adapter);
1539 IXGBE_CORE_UNLOCK(adapter);
1540 return 0; /* XXX ixgbe_init_locked cannot fail? really? */
1541 }
1542
1543
1544 /*
1545 **
1546 ** MSIX Interrupt Handlers and Tasklets
1547 **
1548 */
1549
1550 static inline void
1551 ixgbe_enable_queue(struct adapter *adapter, u32 vector)
1552 {
1553 struct ixgbe_hw *hw = &adapter->hw;
1554 u64 queue = (u64)(1ULL << vector);
1555 u32 mask;
1556
1557 if (hw->mac.type == ixgbe_mac_82598EB) {
1558 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1559 IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
1560 } else {
1561 mask = (queue & 0xFFFFFFFF);
1562 if (mask)
1563 IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask);
1564 mask = (queue >> 32);
1565 if (mask)
1566 IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask);
1567 }
1568 }
1569
1570 __unused static inline void
1571 ixgbe_disable_queue(struct adapter *adapter, u32 vector)
1572 {
1573 struct ixgbe_hw *hw = &adapter->hw;
1574 u64 queue = (u64)(1ULL << vector);
1575 u32 mask;
1576
1577 if (hw->mac.type == ixgbe_mac_82598EB) {
1578 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1579 IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask);
1580 } else {
1581 mask = (queue & 0xFFFFFFFF);
1582 if (mask)
1583 IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask);
1584 mask = (queue >> 32);
1585 if (mask)
1586 IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask);
1587 }
1588 }
1589
1590 static void
1591 ixgbe_handle_que(void *context)
1592 {
1593 struct ix_queue *que = context;
1594 struct adapter *adapter = que->adapter;
1595 struct tx_ring *txr = que->txr;
1596 struct ifnet *ifp = adapter->ifp;
1597
1598 adapter->handleq.ev_count++;
1599
1600 if (ifp->if_flags & IFF_RUNNING) {
1601 ixgbe_rxeof(que);
1602 IXGBE_TX_LOCK(txr);
1603 ixgbe_txeof(txr);
1604 #ifndef IXGBE_LEGACY_TX
1605 if (!drbr_empty(ifp, txr->br))
1606 ixgbe_mq_start_locked(ifp, txr);
1607 #else
1608 if (!IFQ_IS_EMPTY(&ifp->if_snd))
1609 ixgbe_start_locked(txr, ifp);
1610 #endif
1611 IXGBE_TX_UNLOCK(txr);
1612 }
1613
1614 /* Reenable this interrupt */
1615 if (que->res != NULL)
1616 ixgbe_enable_queue(adapter, que->msix);
1617 else
1618 ixgbe_enable_intr(adapter);
1619 return;
1620 }
1621
1622
1623 /*********************************************************************
1624 *
1625 * Legacy Interrupt Service routine
1626 *
1627 **********************************************************************/
1628
1629 static int
1630 ixgbe_legacy_irq(void *arg)
1631 {
1632 struct ix_queue *que = arg;
1633 struct adapter *adapter = que->adapter;
1634 struct ixgbe_hw *hw = &adapter->hw;
1635 struct ifnet *ifp = adapter->ifp;
1636 struct tx_ring *txr = adapter->tx_rings;
1637 bool more = false;
1638 u32 reg_eicr;
1639
1640
1641 reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
1642
1643 adapter->stats.legint.ev_count++;
1644 ++que->irqs;
1645 if (reg_eicr == 0) {
1646 adapter->stats.intzero.ev_count++;
1647 if ((ifp->if_flags & IFF_UP) != 0)
1648 ixgbe_enable_intr(adapter);
1649 return 0;
1650 }
1651
1652 if ((ifp->if_flags & IFF_RUNNING) != 0) {
1653 #ifdef __NetBSD__
1654 /* Don't run ixgbe_rxeof in interrupt context */
1655 more = true;
1656 #else
1657 more = ixgbe_rxeof(que);
1658 #endif
1659
1660 IXGBE_TX_LOCK(txr);
1661 ixgbe_txeof(txr);
1662 #ifdef IXGBE_LEGACY_TX
1663 if (!IFQ_IS_EMPTY(&ifp->if_snd))
1664 ixgbe_start_locked(txr, ifp);
1665 #else
1666 if (!drbr_empty(ifp, txr->br))
1667 ixgbe_mq_start_locked(ifp, txr);
1668 #endif
1669 IXGBE_TX_UNLOCK(txr);
1670 }
1671
1672 /* Check for fan failure */
1673 if ((hw->phy.media_type == ixgbe_media_type_copper) &&
1674 (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1675 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
1676 "REPLACE IMMEDIATELY!!\n");
1677 IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1);
1678 }
1679
1680 /* Link status change */
1681 if (reg_eicr & IXGBE_EICR_LSC)
1682 softint_schedule(adapter->link_si);
1683
1684 if (more)
1685 #ifndef IXGBE_LEGACY_TX
1686 softint_schedule(txr->txq_si);
1687 #else
1688 softint_schedule(que->que_si);
1689 #endif
1690 else
1691 ixgbe_enable_intr(adapter);
1692 return 1;
1693 }
1694
1695
1696 #if defined(NETBSD_MSI_OR_MSIX)
1697 /*********************************************************************
1698 *
1699 * MSIX Queue Interrupt Service routine
1700 *
1701 **********************************************************************/
1702 static int
1703 ixgbe_msix_que(void *arg)
1704 {
1705 struct ix_queue *que = arg;
1706 struct adapter *adapter = que->adapter;
1707 struct ifnet *ifp = adapter->ifp;
1708 struct tx_ring *txr = que->txr;
1709 struct rx_ring *rxr = que->rxr;
1710 bool more;
1711 u32 newitr = 0;
1712
1713 /* Protect against spurious interrupts */
1714 if ((ifp->if_flags & IFF_RUNNING) == 0)
1715 return 0;
1716
1717 ixgbe_disable_queue(adapter, que->msix);
1718 ++que->irqs;
1719
1720 #ifdef __NetBSD__
1721 /* Don't run ixgbe_rxeof in interrupt context */
1722 more = true;
1723 #else
1724 more = ixgbe_rxeof(que);
1725 #endif
1726
1727 IXGBE_TX_LOCK(txr);
1728 ixgbe_txeof(txr);
1729 #ifdef IXGBE_LEGACY_TX
1730 if (!IFQ_IS_EMPTY(&adapter->ifp->if_snd))
1731 ixgbe_start_locked(txr, ifp);
1732 #else
1733 if (!drbr_empty(ifp, txr->br))
1734 ixgbe_mq_start_locked(ifp, txr);
1735 #endif
1736 IXGBE_TX_UNLOCK(txr);
1737
1738 /* Do AIM now? */
1739
1740 if (ixgbe_enable_aim == FALSE)
1741 goto no_calc;
1742 /*
1743 ** Do Adaptive Interrupt Moderation:
1744 ** - Write out last calculated setting
1745 ** - Calculate based on average size over
1746 ** the last interval.
1747 */
1748 if (que->eitr_setting)
1749 IXGBE_WRITE_REG(&adapter->hw,
1750 IXGBE_EITR(que->msix), que->eitr_setting);
1751
1752 que->eitr_setting = 0;
1753
1754 /* Idle, do nothing */
1755 if ((txr->bytes == 0) && (rxr->bytes == 0))
1756 goto no_calc;
1757
1758 if ((txr->bytes) && (txr->packets))
1759 newitr = txr->bytes/txr->packets;
1760 if ((rxr->bytes) && (rxr->packets))
1761 newitr = max(newitr,
1762 (rxr->bytes / rxr->packets));
1763 newitr += 24; /* account for hardware frame, crc */
1764
1765 /* set an upper boundary */
1766 newitr = min(newitr, 3000);
1767
1768 /* Be nice to the mid range */
1769 if ((newitr > 300) && (newitr < 1200))
1770 newitr = (newitr / 3);
1771 else
1772 newitr = (newitr / 2);
1773
1774 if (adapter->hw.mac.type == ixgbe_mac_82598EB)
1775 newitr |= newitr << 16;
1776 else
1777 newitr |= IXGBE_EITR_CNT_WDIS;
1778
1779 /* save for next interrupt */
1780 que->eitr_setting = newitr;
1781
1782 /* Reset state */
1783 txr->bytes = 0;
1784 txr->packets = 0;
1785 rxr->bytes = 0;
1786 rxr->packets = 0;
1787
1788 no_calc:
1789 if (more)
1790 softint_schedule(que->que_si);
1791 else
1792 ixgbe_enable_queue(adapter, que->msix);
1793 return 1;
1794 }
1795
1796
1797 static int
1798 ixgbe_msix_link(void *arg)
1799 {
1800 struct adapter *adapter = arg;
1801 struct ixgbe_hw *hw = &adapter->hw;
1802 u32 reg_eicr;
1803
1804 ++adapter->link_irq.ev_count;
1805
1806 /* First get the cause */
1807 reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICS);
1808 /* Be sure the queue bits are not cleared */
1809 reg_eicr &= ~IXGBE_EICR_RTX_QUEUE;
1810 /* Clear interrupt with write */
1811 IXGBE_WRITE_REG(hw, IXGBE_EICR, reg_eicr);
1812
1813 /* Link status change */
1814 if (reg_eicr & IXGBE_EICR_LSC)
1815 softint_schedule(adapter->link_si);
1816
1817 if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
1818 #ifdef IXGBE_FDIR
1819 if (reg_eicr & IXGBE_EICR_FLOW_DIR) {
1820 /* This is probably overkill :) */
1821 if (!atomic_cmpset_int(&adapter->fdir_reinit, 0, 1))
1822 return 1;
1823 /* Disable the interrupt */
1824 IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FLOW_DIR);
1825 softint_schedule(adapter->fdir_si);
1826 } else
1827 #endif
1828 if (reg_eicr & IXGBE_EICR_ECC) {
1829 device_printf(adapter->dev, "\nCRITICAL: ECC ERROR!! "
1830 "Please Reboot!!\n");
1831 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
1832 } else
1833
1834 if (reg_eicr & IXGBE_EICR_GPI_SDP1) {
1835 /* Clear the interrupt */
1836 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1837 softint_schedule(adapter->msf_si);
1838 } else if (reg_eicr & IXGBE_EICR_GPI_SDP2) {
1839 /* Clear the interrupt */
1840 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP2);
1841 softint_schedule(adapter->mod_si);
1842 }
1843 }
1844
1845 /* Check for fan failure */
1846 if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
1847 (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1848 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
1849 "REPLACE IMMEDIATELY!!\n");
1850 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1851 }
1852
1853 /* Check for over temp condition */
1854 if ((hw->mac.type == ixgbe_mac_X540) &&
1855 (reg_eicr & IXGBE_EICR_TS)) {
1856 device_printf(adapter->dev, "\nCRITICAL: OVER TEMP!! "
1857 "PHY IS SHUT DOWN!!\n");
1858 device_printf(adapter->dev, "System shutdown required\n");
1859 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_TS);
1860 }
1861
1862 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_OTHER);
1863 return 1;
1864 }
1865 #endif
1866
1867 /*********************************************************************
1868 *
1869 * Media Ioctl callback
1870 *
1871 * This routine is called whenever the user queries the status of
1872 * the interface using ifconfig.
1873 *
1874 **********************************************************************/
1875 static void
1876 ixgbe_media_status(struct ifnet * ifp, struct ifmediareq * ifmr)
1877 {
1878 struct adapter *adapter = ifp->if_softc;
1879 struct ixgbe_hw *hw = &adapter->hw;
1880
1881 INIT_DEBUGOUT("ixgbe_media_status: begin");
1882 IXGBE_CORE_LOCK(adapter);
1883 ixgbe_update_link_status(adapter);
1884
1885 ifmr->ifm_status = IFM_AVALID;
1886 ifmr->ifm_active = IFM_ETHER;
1887
1888 if (!adapter->link_active) {
1889 IXGBE_CORE_UNLOCK(adapter);
1890 return;
1891 }
1892
1893 ifmr->ifm_status |= IFM_ACTIVE;
1894
1895 /*
1896 * Not all NIC are 1000baseSX as an example X540T.
1897 * We must set properly the media based on NIC model.
1898 */
1899 switch (hw->device_id) {
1900 case IXGBE_DEV_ID_X540T:
1901 if (adapter->link_speed == IXGBE_LINK_SPEED_100_FULL)
1902 ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
1903 else if (adapter->link_speed == IXGBE_LINK_SPEED_1GB_FULL)
1904 ifmr->ifm_active |= IFM_1000_T | IFM_FDX;
1905 else if (adapter->link_speed == IXGBE_LINK_SPEED_10GB_FULL)
1906 ifmr->ifm_active |= adapter->optics | IFM_FDX;
1907 break;
1908 default:
1909 if (adapter->link_speed == IXGBE_LINK_SPEED_100_FULL)
1910 ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
1911 else if (adapter->link_speed == IXGBE_LINK_SPEED_1GB_FULL)
1912 ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1913 else if (adapter->link_speed == IXGBE_LINK_SPEED_10GB_FULL)
1914 ifmr->ifm_active |= adapter->optics | IFM_FDX;
1915 break;
1916 }
1917
1918 IXGBE_CORE_UNLOCK(adapter);
1919
1920 return;
1921 }
1922
1923 /*********************************************************************
1924 *
1925 * Media Ioctl callback
1926 *
1927 * This routine is called when the user changes speed/duplex using
1928 * media/mediopt option with ifconfig.
1929 *
1930 **********************************************************************/
1931 static int
1932 ixgbe_media_change(struct ifnet * ifp)
1933 {
1934 struct adapter *adapter = ifp->if_softc;
1935 struct ifmedia *ifm = &adapter->media;
1936
1937 INIT_DEBUGOUT("ixgbe_media_change: begin");
1938
1939 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1940 return (EINVAL);
1941
1942 switch (IFM_SUBTYPE(ifm->ifm_media)) {
1943 case IFM_10G_T:
1944 case IFM_AUTO:
1945 adapter->hw.phy.autoneg_advertised =
1946 IXGBE_LINK_SPEED_100_FULL |
1947 IXGBE_LINK_SPEED_1GB_FULL |
1948 IXGBE_LINK_SPEED_10GB_FULL;
1949 break;
1950 default:
1951 device_printf(adapter->dev, "Only auto media type\n");
1952 return (EINVAL);
1953 }
1954
1955 return (0);
1956 }
1957
1958 /*********************************************************************
1959 *
1960 * This routine maps the mbufs to tx descriptors, allowing the
1961 * TX engine to transmit the packets.
1962 * - return 0 on success, positive on failure
1963 *
1964 **********************************************************************/
1965
1966 static int
1967 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
1968 {
1969 struct m_tag *mtag;
1970 struct adapter *adapter = txr->adapter;
1971 struct ethercom *ec = &adapter->osdep.ec;
1972 u32 olinfo_status = 0, cmd_type_len;
1973 int i, j, error;
1974 int first;
1975 bus_dmamap_t map;
1976 struct ixgbe_tx_buf *txbuf;
1977 union ixgbe_adv_tx_desc *txd = NULL;
1978
1979 /* Basic descriptor defines */
1980 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
1981 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
1982
1983 if ((mtag = VLAN_OUTPUT_TAG(ec, m_head)) != NULL)
1984 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
1985
1986 /*
1987 * Important to capture the first descriptor
1988 * used because it will contain the index of
1989 * the one we tell the hardware to report back
1990 */
1991 first = txr->next_avail_desc;
1992 txbuf = &txr->tx_buffers[first];
1993 map = txbuf->map;
1994
1995 /*
1996 * Map the packet for DMA.
1997 */
1998 error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map,
1999 m_head, BUS_DMA_NOWAIT);
2000
2001 if (__predict_false(error)) {
2002
2003 switch (error) {
2004 case EAGAIN:
2005 adapter->eagain_tx_dma_setup.ev_count++;
2006 return EAGAIN;
2007 case ENOMEM:
2008 adapter->enomem_tx_dma_setup.ev_count++;
2009 return EAGAIN;
2010 case EFBIG:
2011 /*
2012 * XXX Try it again?
2013 * do m_defrag() and retry bus_dmamap_load_mbuf().
2014 */
2015 adapter->efbig_tx_dma_setup.ev_count++;
2016 return error;
2017 case EINVAL:
2018 adapter->einval_tx_dma_setup.ev_count++;
2019 return error;
2020 default:
2021 adapter->other_tx_dma_setup.ev_count++;
2022 return error;
2023 }
2024 }
2025
2026 /* Make certain there are enough descriptors */
2027 if (map->dm_nsegs > txr->tx_avail - 2) {
2028 txr->no_desc_avail.ev_count++;
2029 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
2030 return EAGAIN;
2031 }
2032
2033 /*
2034 ** Set up the appropriate offload context
2035 ** this will consume the first descriptor
2036 */
2037 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
2038 if (__predict_false(error)) {
2039 return (error);
2040 }
2041
2042 #ifdef IXGBE_FDIR
2043 /* Do the flow director magic */
2044 if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
2045 ++txr->atr_count;
2046 if (txr->atr_count >= atr_sample_rate) {
2047 ixgbe_atr(txr, m_head);
2048 txr->atr_count = 0;
2049 }
2050 }
2051 #endif
2052
2053 i = txr->next_avail_desc;
2054 for (j = 0; j < map->dm_nsegs; j++) {
2055 bus_size_t seglen;
2056 bus_addr_t segaddr;
2057
2058 txbuf = &txr->tx_buffers[i];
2059 txd = &txr->tx_base[i];
2060 seglen = map->dm_segs[j].ds_len;
2061 segaddr = htole64(map->dm_segs[j].ds_addr);
2062
2063 txd->read.buffer_addr = segaddr;
2064 txd->read.cmd_type_len = htole32(txr->txd_cmd |
2065 cmd_type_len |seglen);
2066 txd->read.olinfo_status = htole32(olinfo_status);
2067
2068 if (++i == txr->num_desc)
2069 i = 0;
2070 }
2071
2072 txd->read.cmd_type_len |=
2073 htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
2074 txr->tx_avail -= map->dm_nsegs;
2075 txr->next_avail_desc = i;
2076
2077 txbuf->m_head = m_head;
2078 /*
2079 ** Here we swap the map so the last descriptor,
2080 ** which gets the completion interrupt has the
2081 ** real map, and the first descriptor gets the
2082 ** unused map from this descriptor.
2083 */
2084 txr->tx_buffers[first].map = txbuf->map;
2085 txbuf->map = map;
2086 bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
2087 BUS_DMASYNC_PREWRITE);
2088
2089 /* Set the EOP descriptor that will be marked done */
2090 txbuf = &txr->tx_buffers[first];
2091 txbuf->eop = txd;
2092
2093 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2094 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2095 /*
2096 * Advance the Transmit Descriptor Tail (Tdt), this tells the
2097 * hardware that this frame is available to transmit.
2098 */
2099 ++txr->total_packets.ev_count;
2100 IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), i);
2101
2102 return 0;
2103 }
2104
2105 static void
2106 ixgbe_set_promisc(struct adapter *adapter)
2107 {
2108 struct ether_multi *enm;
2109 struct ether_multistep step;
2110 u_int32_t reg_rctl;
2111 struct ethercom *ec = &adapter->osdep.ec;
2112 struct ifnet *ifp = adapter->ifp;
2113 int mcnt = 0;
2114
2115 reg_rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
2116 reg_rctl &= (~IXGBE_FCTRL_UPE);
2117 if (ifp->if_flags & IFF_ALLMULTI)
2118 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2119 else {
2120 ETHER_FIRST_MULTI(step, ec, enm);
2121 while (enm != NULL) {
2122 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2123 break;
2124 mcnt++;
2125 ETHER_NEXT_MULTI(step, enm);
2126 }
2127 }
2128 if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2129 reg_rctl &= (~IXGBE_FCTRL_MPE);
2130 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
2131
2132 if (ifp->if_flags & IFF_PROMISC) {
2133 reg_rctl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
2134 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
2135 } else if (ifp->if_flags & IFF_ALLMULTI) {
2136 reg_rctl |= IXGBE_FCTRL_MPE;
2137 reg_rctl &= ~IXGBE_FCTRL_UPE;
2138 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
2139 }
2140 return;
2141 }
2142
2143
2144 /*********************************************************************
2145 * Multicast Update
2146 *
2147 * This routine is called whenever multicast address list is updated.
2148 *
2149 **********************************************************************/
2150 #define IXGBE_RAR_ENTRIES 16
2151
2152 static void
2153 ixgbe_set_multi(struct adapter *adapter)
2154 {
2155 struct ether_multi *enm;
2156 struct ether_multistep step;
2157 u32 fctrl;
2158 u8 *mta;
2159 u8 *update_ptr;
2160 int mcnt = 0;
2161 struct ethercom *ec = &adapter->osdep.ec;
2162 struct ifnet *ifp = adapter->ifp;
2163
2164 IOCTL_DEBUGOUT("ixgbe_set_multi: begin");
2165
2166 mta = adapter->mta;
2167 bzero(mta, sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
2168 MAX_NUM_MULTICAST_ADDRESSES);
2169
2170 ifp->if_flags &= ~IFF_ALLMULTI;
2171 ETHER_FIRST_MULTI(step, ec, enm);
2172 while (enm != NULL) {
2173 if ((mcnt == MAX_NUM_MULTICAST_ADDRESSES) ||
2174 (memcmp(enm->enm_addrlo, enm->enm_addrhi,
2175 ETHER_ADDR_LEN) != 0)) {
2176 ifp->if_flags |= IFF_ALLMULTI;
2177 break;
2178 }
2179 bcopy(enm->enm_addrlo,
2180 &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
2181 IXGBE_ETH_LENGTH_OF_ADDRESS);
2182 mcnt++;
2183 ETHER_NEXT_MULTI(step, enm);
2184 }
2185
2186 fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
2187 fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
2188 if (ifp->if_flags & IFF_PROMISC)
2189 fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
2190 else if (ifp->if_flags & IFF_ALLMULTI) {
2191 fctrl |= IXGBE_FCTRL_MPE;
2192 }
2193
2194 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl);
2195
2196 if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) {
2197 update_ptr = mta;
2198 ixgbe_update_mc_addr_list(&adapter->hw,
2199 update_ptr, mcnt, ixgbe_mc_array_itr, TRUE);
2200 }
2201
2202 return;
2203 }
2204
2205 /*
2206 * This is an iterator function now needed by the multicast
2207 * shared code. It simply feeds the shared code routine the
2208 * addresses in the array of ixgbe_set_multi() one by one.
2209 */
2210 static u8 *
2211 ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq)
2212 {
2213 u8 *addr = *update_ptr;
2214 u8 *newptr;
2215 *vmdq = 0;
2216
2217 newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
2218 *update_ptr = newptr;
2219 return addr;
2220 }
2221
2222
2223 /*********************************************************************
2224 * Timer routine
2225 *
2226 * This routine checks for link status,updates statistics,
2227 * and runs the watchdog check.
2228 *
2229 **********************************************************************/
2230
2231 static void
2232 ixgbe_local_timer1(void *arg)
2233 {
2234 struct adapter *adapter = arg;
2235 device_t dev = adapter->dev;
2236 struct ix_queue *que = adapter->queues;
2237 struct tx_ring *txr = adapter->tx_rings;
2238 int hung = 0, paused = 0;
2239
2240 KASSERT(mutex_owned(&adapter->core_mtx));
2241
2242 /* Check for pluggable optics */
2243 if (adapter->sfp_probe)
2244 if (!ixgbe_sfp_probe(adapter))
2245 goto out; /* Nothing to do */
2246
2247 ixgbe_update_link_status(adapter);
2248 ixgbe_update_stats_counters(adapter);
2249
2250 /*
2251 * If the interface has been paused
2252 * then don't do the watchdog check
2253 */
2254 if (IXGBE_READ_REG(&adapter->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF)
2255 paused = 1;
2256
2257 /*
2258 ** Check the TX queues status
2259 ** - watchdog only if all queues show hung
2260 */
2261 for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2262 if ((txr->queue_status == IXGBE_QUEUE_HUNG) &&
2263 (paused == 0))
2264 ++hung;
2265 else if (txr->queue_status == IXGBE_QUEUE_WORKING)
2266 #ifndef IXGBE_LEGACY_TX
2267 softint_schedule(txr->txq_si);
2268 #else
2269 softint_schedule(que->que_si);
2270 #endif
2271 }
2272 /* Only truely watchdog if all queues show hung */
2273 if (hung == adapter->num_queues)
2274 goto watchdog;
2275
2276 out:
2277 callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
2278 return;
2279
2280 watchdog:
2281 device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2282 device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2283 IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(txr->me)),
2284 IXGBE_READ_REG(&adapter->hw, IXGBE_TDT(txr->me)));
2285 device_printf(dev,"TX(%d) desc avail = %d,"
2286 "Next TX to Clean = %d\n",
2287 txr->me, txr->tx_avail, txr->next_to_clean);
2288 adapter->ifp->if_flags &= ~IFF_RUNNING;
2289 adapter->watchdog_events.ev_count++;
2290 ixgbe_init_locked(adapter);
2291 }
2292
2293 static void
2294 ixgbe_local_timer(void *arg)
2295 {
2296 struct adapter *adapter = arg;
2297
2298 IXGBE_CORE_LOCK(adapter);
2299 ixgbe_local_timer1(adapter);
2300 IXGBE_CORE_UNLOCK(adapter);
2301 }
2302
2303 /*
2304 ** Note: this routine updates the OS on the link state
2305 ** the real check of the hardware only happens with
2306 ** a link interrupt.
2307 */
2308 static void
2309 ixgbe_update_link_status(struct adapter *adapter)
2310 {
2311 struct ifnet *ifp = adapter->ifp;
2312 device_t dev = adapter->dev;
2313
2314
2315 if (adapter->link_up){
2316 if (adapter->link_active == FALSE) {
2317 if (bootverbose)
2318 device_printf(dev,"Link is up %d Gbps %s \n",
2319 ((adapter->link_speed == 128)? 10:1),
2320 "Full Duplex");
2321 adapter->link_active = TRUE;
2322 /* Update any Flow Control changes */
2323 ixgbe_fc_enable(&adapter->hw);
2324 if_link_state_change(ifp, LINK_STATE_UP);
2325 }
2326 } else { /* Link down */
2327 if (adapter->link_active == TRUE) {
2328 if (bootverbose)
2329 device_printf(dev,"Link is Down\n");
2330 if_link_state_change(ifp, LINK_STATE_DOWN);
2331 adapter->link_active = FALSE;
2332 }
2333 }
2334
2335 return;
2336 }
2337
2338
2339 static void
2340 ixgbe_ifstop(struct ifnet *ifp, int disable)
2341 {
2342 struct adapter *adapter = ifp->if_softc;
2343
2344 IXGBE_CORE_LOCK(adapter);
2345 ixgbe_stop(adapter);
2346 IXGBE_CORE_UNLOCK(adapter);
2347 }
2348
2349 /*********************************************************************
2350 *
2351 * This routine disables all traffic on the adapter by issuing a
2352 * global reset on the MAC and deallocates TX/RX buffers.
2353 *
2354 **********************************************************************/
2355
2356 static void
2357 ixgbe_stop(void *arg)
2358 {
2359 struct ifnet *ifp;
2360 struct adapter *adapter = arg;
2361 struct ixgbe_hw *hw = &adapter->hw;
2362 ifp = adapter->ifp;
2363
2364 KASSERT(mutex_owned(&adapter->core_mtx));
2365
2366 INIT_DEBUGOUT("ixgbe_stop: begin\n");
2367 ixgbe_disable_intr(adapter);
2368 callout_stop(&adapter->timer);
2369
2370 /* Let the stack know...*/
2371 ifp->if_flags &= ~IFF_RUNNING;
2372
2373 ixgbe_reset_hw(hw);
2374 hw->adapter_stopped = FALSE;
2375 ixgbe_stop_adapter(hw);
2376 if (hw->mac.type == ixgbe_mac_82599EB)
2377 ixgbe_stop_mac_link_on_d3_82599(hw);
2378 /* Turn off the laser - noop with no optics */
2379 ixgbe_disable_tx_laser(hw);
2380
2381 /* Update the stack */
2382 adapter->link_up = FALSE;
2383 ixgbe_update_link_status(adapter);
2384
2385 /* reprogram the RAR[0] in case user changed it. */
2386 ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
2387
2388 return;
2389 }
2390
2391
2392 /*********************************************************************
2393 *
2394 * Determine hardware revision.
2395 *
2396 **********************************************************************/
2397 static void
2398 ixgbe_identify_hardware(struct adapter *adapter)
2399 {
2400 pcitag_t tag;
2401 pci_chipset_tag_t pc;
2402 pcireg_t subid, id;
2403 struct ixgbe_hw *hw = &adapter->hw;
2404
2405 pc = adapter->osdep.pc;
2406 tag = adapter->osdep.tag;
2407
2408 id = pci_conf_read(pc, tag, PCI_ID_REG);
2409 subid = pci_conf_read(pc, tag, PCI_SUBSYS_ID_REG);
2410
2411 /* Save off the information about this board */
2412 hw->vendor_id = PCI_VENDOR(id);
2413 hw->device_id = PCI_PRODUCT(id);
2414 hw->revision_id =
2415 PCI_REVISION(pci_conf_read(pc, tag, PCI_CLASS_REG));
2416 hw->subsystem_vendor_id = PCI_SUBSYS_VENDOR(subid);
2417 hw->subsystem_device_id = PCI_SUBSYS_ID(subid);
2418
2419 /* We need this here to set the num_segs below */
2420 ixgbe_set_mac_type(hw);
2421
2422 /* Pick up the 82599 and VF settings */
2423 if (hw->mac.type != ixgbe_mac_82598EB) {
2424 hw->phy.smart_speed = ixgbe_smart_speed;
2425 adapter->num_segs = IXGBE_82599_SCATTER;
2426 } else
2427 adapter->num_segs = IXGBE_82598_SCATTER;
2428
2429 return;
2430 }
2431
2432 /*********************************************************************
2433 *
2434 * Determine optic type
2435 *
2436 **********************************************************************/
2437 static void
2438 ixgbe_setup_optics(struct adapter *adapter)
2439 {
2440 struct ixgbe_hw *hw = &adapter->hw;
2441 int layer;
2442
2443 layer = ixgbe_get_supported_physical_layer(hw);
2444
2445 if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
2446 adapter->optics = IFM_10G_T;
2447 return;
2448 }
2449
2450 if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
2451 adapter->optics = IFM_1000_T;
2452 return;
2453 }
2454
2455 if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) {
2456 adapter->optics = IFM_1000_SX;
2457 return;
2458 }
2459
2460 if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR |
2461 IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) {
2462 adapter->optics = IFM_10G_LR;
2463 return;
2464 }
2465
2466 if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
2467 adapter->optics = IFM_10G_SR;
2468 return;
2469 }
2470
2471 if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) {
2472 adapter->optics = IFM_10G_TWINAX;
2473 return;
2474 }
2475
2476 if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 |
2477 IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) {
2478 adapter->optics = IFM_10G_CX4;
2479 return;
2480 }
2481
2482 /* If we get here just set the default */
2483 adapter->optics = IFM_ETHER | IFM_AUTO;
2484 return;
2485 }
2486
2487 /*********************************************************************
2488 *
2489 * Setup the Legacy or MSI Interrupt handler
2490 *
2491 **********************************************************************/
2492 static int
2493 ixgbe_allocate_legacy(struct adapter *adapter,
2494 const struct pci_attach_args *pa)
2495 {
2496 device_t dev = adapter->dev;
2497 struct ix_queue *que = adapter->queues;
2498 #ifndef IXGBE_LEGACY_TX
2499 struct tx_ring *txr = adapter->tx_rings;
2500 #endif
2501 #ifndef NETBSD_MSI_OR_MSIX
2502 pci_intr_handle_t ih;
2503 #else
2504 int counts[PCI_INTR_TYPE_SIZE];
2505 pci_intr_type_t intr_type, max_type;
2506 #endif
2507 char intrbuf[PCI_INTRSTR_LEN];
2508 const char *intrstr = NULL;
2509
2510 #ifndef NETBSD_MSI_OR_MSIX
2511 /* We allocate a single interrupt resource */
2512 if (pci_intr_map(pa, &ih) != 0) {
2513 aprint_error_dev(dev, "unable to map interrupt\n");
2514 return ENXIO;
2515 } else {
2516 intrstr = pci_intr_string(adapter->osdep.pc, ih, intrbuf,
2517 sizeof(intrbuf));
2518 }
2519 adapter->osdep.ihs[0] = pci_intr_establish(adapter->osdep.pc, ih,
2520 IPL_NET, ixgbe_legacy_irq, que);
2521 #else
2522 /* Allocation settings */
2523 max_type = PCI_INTR_TYPE_MSI;
2524 counts[PCI_INTR_TYPE_MSIX] = 0;
2525 counts[PCI_INTR_TYPE_MSI] = 1;
2526 counts[PCI_INTR_TYPE_INTX] = 1;
2527
2528 alloc_retry:
2529 if (pci_intr_alloc(pa, &adapter->osdep.intrs, counts, max_type) != 0) {
2530 aprint_error_dev(dev, "couldn't alloc interrupt\n");
2531 return ENXIO;
2532 }
2533 adapter->osdep.nintrs = 1;
2534 intrstr = pci_intr_string(adapter->osdep.pc, adapter->osdep.intrs[0],
2535 intrbuf, sizeof(intrbuf));
2536 adapter->osdep.ihs[0] = pci_intr_establish(adapter->osdep.pc,
2537 adapter->osdep.intrs[0], IPL_NET, ixgbe_legacy_irq, que);
2538 if (adapter->osdep.ihs[0] == NULL) {
2539 intr_type = pci_intr_type(adapter->osdep.intrs[0]);
2540 aprint_error_dev(dev,"unable to establish %s\n",
2541 (intr_type == PCI_INTR_TYPE_MSI) ? "MSI" : "INTx");
2542 pci_intr_release(adapter->osdep.pc, adapter->osdep.intrs, 1);
2543 switch (intr_type) {
2544 case PCI_INTR_TYPE_MSI:
2545 /* The next try is for INTx: Disable MSI */
2546 max_type = PCI_INTR_TYPE_INTX;
2547 counts[PCI_INTR_TYPE_INTX] = 1;
2548 goto alloc_retry;
2549 case PCI_INTR_TYPE_INTX:
2550 default:
2551 /* See below */
2552 break;
2553 }
2554 }
2555 #endif
2556 if (adapter->osdep.ihs[0] == NULL) {
2557 aprint_error_dev(dev,
2558 "couldn't establish interrupt%s%s\n",
2559 intrstr ? " at " : "", intrstr ? intrstr : "");
2560 #ifdef NETBSD_MSI_OR_MSIX
2561 pci_intr_release(adapter->osdep.pc, adapter->osdep.intrs, 1);
2562 #endif
2563 return ENXIO;
2564 }
2565 aprint_normal_dev(dev, "interrupting at %s\n", intrstr);
2566 /*
2567 * Try allocating a fast interrupt and the associated deferred
2568 * processing contexts.
2569 */
2570 #ifndef IXGBE_LEGACY_TX
2571 txr->txq_si = softint_establish(SOFTINT_NET, ixgbe_deferred_mq_start,
2572 txr);
2573 #endif
2574 que->que_si = softint_establish(SOFTINT_NET, ixgbe_handle_que, que);
2575
2576 /* Tasklets for Link, SFP and Multispeed Fiber */
2577 adapter->link_si =
2578 softint_establish(SOFTINT_NET, ixgbe_handle_link, adapter);
2579 adapter->mod_si =
2580 softint_establish(SOFTINT_NET, ixgbe_handle_mod, adapter);
2581 adapter->msf_si =
2582 softint_establish(SOFTINT_NET, ixgbe_handle_msf, adapter);
2583
2584 #ifdef IXGBE_FDIR
2585 adapter->fdir_si =
2586 softint_establish(SOFTINT_NET, ixgbe_reinit_fdir, adapter);
2587 #endif
2588 if (que->que_si == NULL ||
2589 adapter->link_si == NULL ||
2590 adapter->mod_si == NULL ||
2591 #ifdef IXGBE_FDIR
2592 adapter->fdir_si == NULL ||
2593 #endif
2594 adapter->msf_si == NULL) {
2595 aprint_error_dev(dev,
2596 "could not establish software interrupts\n");
2597 return ENXIO;
2598 }
2599
2600 /* For simplicity in the handlers */
2601 adapter->que_mask = IXGBE_EIMS_ENABLE_MASK;
2602
2603 return (0);
2604 }
2605
2606
2607 /*********************************************************************
2608 *
2609 * Setup MSIX Interrupt resources and handlers
2610 *
2611 **********************************************************************/
2612 static int
2613 ixgbe_allocate_msix(struct adapter *adapter, const struct pci_attach_args *pa)
2614 {
2615 #if !defined(NETBSD_MSI_OR_MSIX)
2616 return 0;
2617 #else
2618 device_t dev = adapter->dev;
2619 struct ix_queue *que = adapter->queues;
2620 struct tx_ring *txr = adapter->tx_rings;
2621 pci_chipset_tag_t pc;
2622 char intrbuf[PCI_INTRSTR_LEN];
2623 const char *intrstr = NULL;
2624 int error, vector = 0;
2625 int cpu_id = 0;
2626 kcpuset_t *affinity;
2627
2628 pc = adapter->osdep.pc;
2629 #ifdef RSS
2630 cpuset_t cpu_mask;
2631 /*
2632 * If we're doing RSS, the number of queues needs to
2633 * match the number of RSS buckets that are configured.
2634 *
2635 * + If there's more queues than RSS buckets, we'll end
2636 * up with queues that get no traffic.
2637 *
2638 * + If there's more RSS buckets than queues, we'll end
2639 * up having multiple RSS buckets map to the same queue,
2640 * so there'll be some contention.
2641 */
2642 if (adapter->num_queues != rss_getnumbuckets()) {
2643 device_printf(dev,
2644 "%s: number of queues (%d) != number of RSS buckets (%d)"
2645 "; performance will be impacted.\n",
2646 __func__,
2647 adapter->num_queues,
2648 rss_getnumbuckets());
2649 }
2650 #endif
2651
2652 adapter->osdep.nintrs = adapter->num_queues + 1;
2653 if (pci_msix_alloc_exact(pa, &adapter->osdep.intrs,
2654 adapter->osdep.nintrs) != 0) {
2655 aprint_error_dev(dev,
2656 "failed to allocate MSI-X interrupt\n");
2657 return (ENXIO);
2658 }
2659
2660 kcpuset_create(&affinity, false);
2661 for (int i = 0; i < adapter->num_queues; i++, vector++, que++, txr++) {
2662 intrstr = pci_intr_string(pc, adapter->osdep.intrs[i], intrbuf,
2663 sizeof(intrbuf));
2664 #ifdef IXG_MPSAFE
2665 pci_intr_setattr(pc, adapter->osdep.intrs[i], PCI_INTR_MPSAFE,
2666 true);
2667 #endif
2668 /* Set the handler function */
2669 que->res = adapter->osdep.ihs[i] = pci_intr_establish(pc,
2670 adapter->osdep.intrs[i], IPL_NET, ixgbe_msix_que, que);
2671 if (que->res == NULL) {
2672 pci_intr_release(pc, adapter->osdep.intrs,
2673 adapter->osdep.nintrs);
2674 aprint_error_dev(dev,
2675 "Failed to register QUE handler\n");
2676 kcpuset_destroy(affinity);
2677 return ENXIO;
2678 }
2679 que->msix = vector;
2680 adapter->que_mask |= (u64)(1 << que->msix);
2681 #ifdef RSS
2682 /*
2683 * The queue ID is used as the RSS layer bucket ID.
2684 * We look up the queue ID -> RSS CPU ID and select
2685 * that.
2686 */
2687 cpu_id = rss_getcpu(i % rss_getnumbuckets());
2688 #else
2689 /*
2690 * Bind the msix vector, and thus the
2691 * rings to the corresponding cpu.
2692 *
2693 * This just happens to match the default RSS round-robin
2694 * bucket -> queue -> CPU allocation.
2695 */
2696 if (adapter->num_queues > 1)
2697 cpu_id = i;
2698 #endif
2699 /* Round-robin affinity */
2700 kcpuset_zero(affinity);
2701 kcpuset_set(affinity, cpu_id % ncpu);
2702 error = interrupt_distribute(adapter->osdep.ihs[i], affinity,
2703 NULL);
2704 aprint_normal_dev(dev, "for TX/RX, interrupting at %s",
2705 intrstr);
2706 if (error == 0) {
2707 #ifdef RSS
2708 aprintf_normal(", bound RSS bucket %d to CPU %d\n",
2709 i, cpu_id);
2710 #else
2711 aprint_normal(", bound queue %d to cpu %d\n",
2712 i, cpu_id);
2713 #endif
2714 } else
2715 aprint_normal("\n");
2716
2717 #ifndef IXGBE_LEGACY_TX
2718 txr->txq_si = softint_establish(SOFTINT_NET,
2719 ixgbe_deferred_mq_start, txr);
2720 #endif
2721 que->que_si = softint_establish(SOFTINT_NET, ixgbe_handle_que,
2722 que);
2723 if (que->que_si == NULL) {
2724 aprint_error_dev(dev,
2725 "could not establish software interrupt\n");
2726 }
2727 }
2728
2729 /* and Link */
2730 cpu_id++;
2731 intrstr = pci_intr_string(pc, adapter->osdep.intrs[vector], intrbuf,
2732 sizeof(intrbuf));
2733 #ifdef IXG_MPSAFE
2734 pci_intr_setattr(pc, &adapter->osdep.intrs[vector], PCI_INTR_MPSAFE,
2735 true);
2736 #endif
2737 /* Set the link handler function */
2738 adapter->osdep.ihs[vector] = pci_intr_establish(pc,
2739 adapter->osdep.intrs[vector], IPL_NET, ixgbe_msix_link, adapter);
2740 if (adapter->osdep.ihs[vector] == NULL) {
2741 adapter->res = NULL;
2742 aprint_error_dev(dev, "Failed to register LINK handler\n");
2743 kcpuset_destroy(affinity);
2744 return (ENXIO);
2745 }
2746 /* Round-robin affinity */
2747 kcpuset_zero(affinity);
2748 kcpuset_set(affinity, cpu_id % ncpu);
2749 error = interrupt_distribute(adapter->osdep.ihs[vector], affinity,NULL);
2750
2751 aprint_normal_dev(dev,
2752 "for link, interrupting at %s", intrstr);
2753 if (error == 0)
2754 aprint_normal(", affinity to cpu %d\n", cpu_id);
2755 else
2756 aprint_normal("\n");
2757
2758 adapter->linkvec = vector;
2759 /* Tasklets for Link, SFP and Multispeed Fiber */
2760 adapter->link_si =
2761 softint_establish(SOFTINT_NET, ixgbe_handle_link, adapter);
2762 adapter->mod_si =
2763 softint_establish(SOFTINT_NET, ixgbe_handle_mod, adapter);
2764 adapter->msf_si =
2765 softint_establish(SOFTINT_NET, ixgbe_handle_msf, adapter);
2766 #ifdef IXGBE_FDIR
2767 adapter->fdir_si =
2768 softint_establish(SOFTINT_NET, ixgbe_reinit_fdir, adapter);
2769 #endif
2770
2771 kcpuset_destroy(affinity);
2772 return (0);
2773 #endif
2774 }
2775
2776 /*
2777 * Setup Either MSI/X or MSI
2778 */
2779 static int
2780 ixgbe_setup_msix(struct adapter *adapter)
2781 {
2782 #if !defined(NETBSD_MSI_OR_MSIX)
2783 return 0;
2784 #else
2785 device_t dev = adapter->dev;
2786 int want, queues, msgs;
2787
2788 /* Override by tuneable */
2789 if (ixgbe_enable_msix == 0)
2790 goto msi;
2791
2792 /* First try MSI/X */
2793 msgs = pci_msix_count(adapter->osdep.pc, adapter->osdep.tag);
2794 if (msgs < IXG_MSIX_NINTR)
2795 goto msi;
2796
2797 adapter->msix_mem = (void *)1; /* XXX */
2798
2799 /* Figure out a reasonable auto config value */
2800 queues = (ncpu > (msgs-1)) ? (msgs-1) : ncpu;
2801
2802 /* Override based on tuneable */
2803 if (ixgbe_num_queues != 0)
2804 queues = ixgbe_num_queues;
2805
2806 #ifdef RSS
2807 /* If we're doing RSS, clamp at the number of RSS buckets */
2808 if (queues > rss_getnumbuckets())
2809 queues = rss_getnumbuckets();
2810 #endif
2811
2812 /* reflect correct sysctl value */
2813 ixgbe_num_queues = queues;
2814
2815 /*
2816 ** Want one vector (RX/TX pair) per queue
2817 ** plus an additional for Link.
2818 */
2819 want = queues + 1;
2820 if (msgs >= want)
2821 msgs = want;
2822 else {
2823 aprint_error_dev(dev,
2824 "MSIX Configuration Problem, "
2825 "%d vectors but %d queues wanted!\n",
2826 msgs, want);
2827 goto msi;
2828 }
2829 device_printf(dev,
2830 "Using MSIX interrupts with %d vectors\n", msgs);
2831 adapter->num_queues = queues;
2832 return (msgs);
2833
2834 /*
2835 ** If MSIX alloc failed or provided us with
2836 ** less than needed, free and fall through to MSI
2837 */
2838 msi:
2839 msgs = pci_msi_count(adapter->osdep.pc, adapter->osdep.tag);
2840 adapter->msix_mem = NULL; /* XXX */
2841 msgs = 1;
2842 aprint_normal_dev(dev,"Using an MSI interrupt\n");
2843 return (msgs);
2844 #endif
2845 }
2846
2847
2848 static int
2849 ixgbe_allocate_pci_resources(struct adapter *adapter,
2850 const struct pci_attach_args *pa)
2851 {
2852 pcireg_t memtype;
2853 device_t dev = adapter->dev;
2854 bus_addr_t addr;
2855 int flags;
2856
2857 memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, PCI_BAR(0));
2858 switch (memtype) {
2859 case PCI_MAPREG_TYPE_MEM | PCI_MAPREG_MEM_TYPE_32BIT:
2860 case PCI_MAPREG_TYPE_MEM | PCI_MAPREG_MEM_TYPE_64BIT:
2861 adapter->osdep.mem_bus_space_tag = pa->pa_memt;
2862 if (pci_mapreg_info(pa->pa_pc, pa->pa_tag, PCI_BAR(0),
2863 memtype, &addr, &adapter->osdep.mem_size, &flags) != 0)
2864 goto map_err;
2865 if ((flags & BUS_SPACE_MAP_PREFETCHABLE) != 0) {
2866 aprint_normal_dev(dev, "clearing prefetchable bit\n");
2867 flags &= ~BUS_SPACE_MAP_PREFETCHABLE;
2868 }
2869 if (bus_space_map(adapter->osdep.mem_bus_space_tag, addr,
2870 adapter->osdep.mem_size, flags,
2871 &adapter->osdep.mem_bus_space_handle) != 0) {
2872 map_err:
2873 adapter->osdep.mem_size = 0;
2874 aprint_error_dev(dev, "unable to map BAR0\n");
2875 return ENXIO;
2876 }
2877 break;
2878 default:
2879 aprint_error_dev(dev, "unexpected type on BAR0\n");
2880 return ENXIO;
2881 }
2882
2883 /* Legacy defaults */
2884 adapter->num_queues = 1;
2885 adapter->hw.back = &adapter->osdep;
2886
2887 /*
2888 ** Now setup MSI or MSI/X, should
2889 ** return us the number of supported
2890 ** vectors. (Will be 1 for MSI)
2891 */
2892 adapter->msix = ixgbe_setup_msix(adapter);
2893 return (0);
2894 }
2895
2896 static void
2897 ixgbe_free_pci_resources(struct adapter * adapter)
2898 {
2899 #if defined(NETBSD_MSI_OR_MSIX)
2900 struct ix_queue *que = adapter->queues;
2901 #endif
2902 int rid;
2903
2904 #if defined(NETBSD_MSI_OR_MSIX)
2905 /*
2906 ** Release all msix queue resources:
2907 */
2908 for (int i = 0; i < adapter->num_queues; i++, que++) {
2909 if (que->res != NULL)
2910 pci_intr_disestablish(adapter->osdep.pc,
2911 adapter->osdep.ihs[i]);
2912 }
2913 #endif
2914
2915 /* Clean the Legacy or Link interrupt last */
2916 if (adapter->linkvec) /* we are doing MSIX */
2917 rid = adapter->linkvec;
2918 else
2919 rid = 0;
2920
2921 if (adapter->osdep.ihs[rid] != NULL) {
2922 pci_intr_disestablish(adapter->osdep.pc,
2923 adapter->osdep.ihs[rid]);
2924 adapter->osdep.ihs[rid] = NULL;
2925 }
2926
2927 #if defined(NETBSD_MSI_OR_MSIX)
2928 pci_intr_release(adapter->osdep.pc, adapter->osdep.intrs,
2929 adapter->osdep.nintrs);
2930 #endif
2931
2932 if (adapter->osdep.mem_size != 0) {
2933 bus_space_unmap(adapter->osdep.mem_bus_space_tag,
2934 adapter->osdep.mem_bus_space_handle,
2935 adapter->osdep.mem_size);
2936 }
2937
2938 return;
2939 }
2940
2941 /*********************************************************************
2942 *
2943 * Setup networking device structure and register an interface.
2944 *
2945 **********************************************************************/
2946 static int
2947 ixgbe_setup_interface(device_t dev, struct adapter *adapter)
2948 {
2949 struct ethercom *ec = &adapter->osdep.ec;
2950 struct ixgbe_hw *hw = &adapter->hw;
2951 struct ifnet *ifp;
2952
2953 INIT_DEBUGOUT("ixgbe_setup_interface: begin");
2954
2955 ifp = adapter->ifp = &ec->ec_if;
2956 strlcpy(ifp->if_xname, device_xname(dev), IFNAMSIZ);
2957 ifp->if_baudrate = IF_Gbps(10);
2958 ifp->if_init = ixgbe_init;
2959 ifp->if_stop = ixgbe_ifstop;
2960 ifp->if_softc = adapter;
2961 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2962 ifp->if_ioctl = ixgbe_ioctl;
2963 #ifndef IXGBE_LEGACY_TX
2964 ifp->if_transmit = ixgbe_mq_start;
2965 ifp->if_qflush = ixgbe_qflush;
2966 #else
2967 ifp->if_start = ixgbe_start;
2968 IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 2);
2969 #if 0
2970 ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 2;
2971 #endif
2972 IFQ_SET_READY(&ifp->if_snd);
2973 #endif
2974
2975 if_initialize(ifp);
2976 ether_ifattach(ifp, adapter->hw.mac.addr);
2977 if_register(ifp);
2978 ether_set_ifflags_cb(ec, ixgbe_ifflags_cb);
2979
2980 adapter->max_frame_size =
2981 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
2982
2983 /*
2984 * Tell the upper layer(s) we support long frames.
2985 */
2986 ifp->if_hdrlen = sizeof(struct ether_vlan_header);
2987
2988 ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSOv4 | IFCAP_TSOv6;
2989 ifp->if_capenable = 0;
2990
2991 ec->ec_capabilities |= ETHERCAP_VLAN_HWCSUM;
2992 ec->ec_capabilities |= ETHERCAP_JUMBO_MTU;
2993 ifp->if_capabilities |= IFCAP_LRO;
2994 ec->ec_capabilities |= ETHERCAP_VLAN_HWTAGGING
2995 | ETHERCAP_VLAN_MTU;
2996 ec->ec_capenable = ec->ec_capabilities;
2997
2998 /*
2999 ** Don't turn this on by default, if vlans are
3000 ** created on another pseudo device (eg. lagg)
3001 ** then vlan events are not passed thru, breaking
3002 ** operation, but with HW FILTER off it works. If
3003 ** using vlans directly on the ixgbe driver you can
3004 ** enable this and get full hardware tag filtering.
3005 */
3006 ec->ec_capabilities |= ETHERCAP_VLAN_HWFILTER;
3007
3008 /*
3009 * Specify the media types supported by this adapter and register
3010 * callbacks to update media and link information
3011 */
3012 ifmedia_init(&adapter->media, IFM_IMASK, ixgbe_media_change,
3013 ixgbe_media_status);
3014 ifmedia_add(&adapter->media, IFM_ETHER | adapter->optics, 0, NULL);
3015 ifmedia_set(&adapter->media, IFM_ETHER | adapter->optics);
3016 if (hw->device_id == IXGBE_DEV_ID_82598AT) {
3017 ifmedia_add(&adapter->media,
3018 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3019 ifmedia_add(&adapter->media,
3020 IFM_ETHER | IFM_1000_T, 0, NULL);
3021 }
3022 ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3023 ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3024
3025 return (0);
3026 }
3027
3028 static void
3029 ixgbe_config_link(struct adapter *adapter)
3030 {
3031 struct ixgbe_hw *hw = &adapter->hw;
3032 u32 autoneg, err = 0;
3033 bool sfp, negotiate;
3034
3035 sfp = ixgbe_is_sfp(hw);
3036
3037 if (sfp) {
3038 void *ip;
3039
3040 if (hw->phy.multispeed_fiber) {
3041 hw->mac.ops.setup_sfp(hw);
3042 ixgbe_enable_tx_laser(hw);
3043 ip = adapter->msf_si;
3044 } else {
3045 ip = adapter->mod_si;
3046 }
3047
3048 kpreempt_disable();
3049 softint_schedule(ip);
3050 kpreempt_enable();
3051 } else {
3052 if (hw->mac.ops.check_link)
3053 err = ixgbe_check_link(hw, &adapter->link_speed,
3054 &adapter->link_up, FALSE);
3055 if (err)
3056 goto out;
3057 autoneg = hw->phy.autoneg_advertised;
3058 if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
3059 err = hw->mac.ops.get_link_capabilities(hw,
3060 &autoneg, &negotiate);
3061 else
3062 negotiate = 0;
3063 if (err)
3064 goto out;
3065 if (hw->mac.ops.setup_link)
3066 err = hw->mac.ops.setup_link(hw,
3067 autoneg, adapter->link_up);
3068 }
3069 out:
3070 return;
3071 }
3072
3073 /********************************************************************
3074 * Manage DMA'able memory.
3075 *******************************************************************/
3076
3077 static int
3078 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
3079 struct ixgbe_dma_alloc *dma, const int mapflags)
3080 {
3081 device_t dev = adapter->dev;
3082 int r, rsegs;
3083
3084 r = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */
3085 DBA_ALIGN, 0, /* alignment, bounds */
3086 size, /* maxsize */
3087 1, /* nsegments */
3088 size, /* maxsegsize */
3089 BUS_DMA_ALLOCNOW, /* flags */
3090 &dma->dma_tag);
3091 if (r != 0) {
3092 aprint_error_dev(dev,
3093 "%s: ixgbe_dma_tag_create failed; error %d\n", __func__, r);
3094 goto fail_0;
3095 }
3096
3097 r = bus_dmamem_alloc(dma->dma_tag->dt_dmat,
3098 size,
3099 dma->dma_tag->dt_alignment,
3100 dma->dma_tag->dt_boundary,
3101 &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
3102 if (r != 0) {
3103 aprint_error_dev(dev,
3104 "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
3105 goto fail_1;
3106 }
3107
3108 r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
3109 size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
3110 if (r != 0) {
3111 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
3112 __func__, r);
3113 goto fail_2;
3114 }
3115
3116 r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
3117 if (r != 0) {
3118 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
3119 __func__, r);
3120 goto fail_3;
3121 }
3122
3123 r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map, dma->dma_vaddr,
3124 size,
3125 NULL,
3126 mapflags | BUS_DMA_NOWAIT);
3127 if (r != 0) {
3128 aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
3129 __func__, r);
3130 goto fail_4;
3131 }
3132 dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
3133 dma->dma_size = size;
3134 return 0;
3135 fail_4:
3136 ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
3137 fail_3:
3138 bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
3139 fail_2:
3140 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
3141 fail_1:
3142 ixgbe_dma_tag_destroy(dma->dma_tag);
3143 fail_0:
3144 return r;
3145 }
3146
3147 static void
3148 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
3149 {
3150 bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
3151 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3152 ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
3153 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
3154 ixgbe_dma_tag_destroy(dma->dma_tag);
3155 }
3156
3157
3158 /*********************************************************************
3159 *
3160 * Allocate memory for the transmit and receive rings, and then
3161 * the descriptors associated with each, called only once at attach.
3162 *
3163 **********************************************************************/
3164 static int
3165 ixgbe_allocate_queues(struct adapter *adapter)
3166 {
3167 device_t dev = adapter->dev;
3168 struct ix_queue *que;
3169 struct tx_ring *txr;
3170 struct rx_ring *rxr;
3171 int rsize, tsize, error = IXGBE_SUCCESS;
3172 int txconf = 0, rxconf = 0;
3173
3174 /* First allocate the top level queue structs */
3175 if (!(adapter->queues =
3176 (struct ix_queue *) malloc(sizeof(struct ix_queue) *
3177 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3178 aprint_error_dev(dev, "Unable to allocate queue memory\n");
3179 error = ENOMEM;
3180 goto fail;
3181 }
3182
3183 /* First allocate the TX ring struct memory */
3184 if (!(adapter->tx_rings =
3185 (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3186 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3187 aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
3188 error = ENOMEM;
3189 goto tx_fail;
3190 }
3191
3192 /* Next allocate the RX */
3193 if (!(adapter->rx_rings =
3194 (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3195 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3196 aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
3197 error = ENOMEM;
3198 goto rx_fail;
3199 }
3200
3201 /* For the ring itself */
3202 tsize = roundup2(adapter->num_tx_desc *
3203 sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
3204
3205 /*
3206 * Now set up the TX queues, txconf is needed to handle the
3207 * possibility that things fail midcourse and we need to
3208 * undo memory gracefully
3209 */
3210 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3211 /* Set up some basics */
3212 txr = &adapter->tx_rings[i];
3213 txr->adapter = adapter;
3214 txr->me = i;
3215 txr->num_desc = adapter->num_tx_desc;
3216
3217 /* Initialize the TX side lock */
3218 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3219 device_xname(dev), txr->me);
3220 mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
3221
3222 if (ixgbe_dma_malloc(adapter, tsize,
3223 &txr->txdma, BUS_DMA_NOWAIT)) {
3224 aprint_error_dev(dev,
3225 "Unable to allocate TX Descriptor memory\n");
3226 error = ENOMEM;
3227 goto err_tx_desc;
3228 }
3229 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
3230 bzero((void *)txr->tx_base, tsize);
3231
3232 /* Now allocate transmit buffers for the ring */
3233 if (ixgbe_allocate_transmit_buffers(txr)) {
3234 aprint_error_dev(dev,
3235 "Critical Failure setting up transmit buffers\n");
3236 error = ENOMEM;
3237 goto err_tx_desc;
3238 }
3239 #ifndef IXGBE_LEGACY_TX
3240 /* Allocate a buf ring */
3241 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
3242 M_WAITOK, &txr->tx_mtx);
3243 if (txr->br == NULL) {
3244 aprint_error_dev(dev,
3245 "Critical Failure setting up buf ring\n");
3246 error = ENOMEM;
3247 goto err_tx_desc;
3248 }
3249 #endif
3250 }
3251
3252 /*
3253 * Next the RX queues...
3254 */
3255 rsize = roundup2(adapter->num_rx_desc *
3256 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
3257 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3258 rxr = &adapter->rx_rings[i];
3259 /* Set up some basics */
3260 rxr->adapter = adapter;
3261 rxr->me = i;
3262 rxr->num_desc = adapter->num_rx_desc;
3263
3264 /* Initialize the RX side lock */
3265 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3266 device_xname(dev), rxr->me);
3267 mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
3268
3269 if (ixgbe_dma_malloc(adapter, rsize,
3270 &rxr->rxdma, BUS_DMA_NOWAIT)) {
3271 aprint_error_dev(dev,
3272 "Unable to allocate RxDescriptor memory\n");
3273 error = ENOMEM;
3274 goto err_rx_desc;
3275 }
3276 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3277 bzero((void *)rxr->rx_base, rsize);
3278
3279 /* Allocate receive buffers for the ring*/
3280 if (ixgbe_allocate_receive_buffers(rxr)) {
3281 aprint_error_dev(dev,
3282 "Critical Failure setting up receive buffers\n");
3283 error = ENOMEM;
3284 goto err_rx_desc;
3285 }
3286 }
3287
3288 /*
3289 ** Finally set up the queue holding structs
3290 */
3291 for (int i = 0; i < adapter->num_queues; i++) {
3292 que = &adapter->queues[i];
3293 que->adapter = adapter;
3294 que->txr = &adapter->tx_rings[i];
3295 que->rxr = &adapter->rx_rings[i];
3296 }
3297
3298 return (0);
3299
3300 err_rx_desc:
3301 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3302 ixgbe_dma_free(adapter, &rxr->rxdma);
3303 err_tx_desc:
3304 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3305 ixgbe_dma_free(adapter, &txr->txdma);
3306 free(adapter->rx_rings, M_DEVBUF);
3307 rx_fail:
3308 free(adapter->tx_rings, M_DEVBUF);
3309 tx_fail:
3310 free(adapter->queues, M_DEVBUF);
3311 fail:
3312 return (error);
3313 }
3314
3315 /*********************************************************************
3316 *
3317 * Allocate memory for tx_buffer structures. The tx_buffer stores all
3318 * the information needed to transmit a packet on the wire. This is
3319 * called only once at attach, setup is done every reset.
3320 *
3321 **********************************************************************/
3322 static int
3323 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
3324 {
3325 struct adapter *adapter = txr->adapter;
3326 device_t dev = adapter->dev;
3327 struct ixgbe_tx_buf *txbuf;
3328 int error, i;
3329
3330 /*
3331 * Setup DMA descriptor areas.
3332 */
3333 if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */
3334 1, 0, /* alignment, bounds */
3335 IXGBE_TSO_SIZE, /* maxsize */
3336 adapter->num_segs, /* nsegments */
3337 PAGE_SIZE, /* maxsegsize */
3338 0, /* flags */
3339 &txr->txtag))) {
3340 aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
3341 goto fail;
3342 }
3343
3344 if (!(txr->tx_buffers =
3345 (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
3346 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3347 aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
3348 error = ENOMEM;
3349 goto fail;
3350 }
3351
3352 /* Create the descriptor buffer dma maps */
3353 txbuf = txr->tx_buffers;
3354 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3355 error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
3356 if (error != 0) {
3357 aprint_error_dev(dev,
3358 "Unable to create TX DMA map (%d)\n", error);
3359 goto fail;
3360 }
3361 }
3362
3363 return 0;
3364 fail:
3365 /* We free all, it handles case where we are in the middle */
3366 ixgbe_free_transmit_structures(adapter);
3367 return (error);
3368 }
3369
3370 /*********************************************************************
3371 *
3372 * Initialize a transmit ring.
3373 *
3374 **********************************************************************/
3375 static void
3376 ixgbe_setup_transmit_ring(struct tx_ring *txr)
3377 {
3378 struct adapter *adapter = txr->adapter;
3379 struct ixgbe_tx_buf *txbuf;
3380 int i;
3381 #ifdef DEV_NETMAP
3382 struct netmap_adapter *na = NA(adapter->ifp);
3383 struct netmap_slot *slot;
3384 #endif /* DEV_NETMAP */
3385
3386 /* Clear the old ring contents */
3387 IXGBE_TX_LOCK(txr);
3388 #ifdef DEV_NETMAP
3389 /*
3390 * (under lock): if in netmap mode, do some consistency
3391 * checks and set slot to entry 0 of the netmap ring.
3392 */
3393 slot = netmap_reset(na, NR_TX, txr->me, 0);
3394 #endif /* DEV_NETMAP */
3395 bzero((void *)txr->tx_base,
3396 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
3397 /* Reset indices */
3398 txr->next_avail_desc = 0;
3399 txr->next_to_clean = 0;
3400
3401 /* Free any existing tx buffers. */
3402 txbuf = txr->tx_buffers;
3403 for (i = 0; i < txr->num_desc; i++, txbuf++) {
3404 if (txbuf->m_head != NULL) {
3405 bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
3406 0, txbuf->m_head->m_pkthdr.len,
3407 BUS_DMASYNC_POSTWRITE);
3408 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
3409 m_freem(txbuf->m_head);
3410 txbuf->m_head = NULL;
3411 }
3412 #ifdef DEV_NETMAP
3413 /*
3414 * In netmap mode, set the map for the packet buffer.
3415 * NOTE: Some drivers (not this one) also need to set
3416 * the physical buffer address in the NIC ring.
3417 * Slots in the netmap ring (indexed by "si") are
3418 * kring->nkr_hwofs positions "ahead" wrt the
3419 * corresponding slot in the NIC ring. In some drivers
3420 * (not here) nkr_hwofs can be negative. Function
3421 * netmap_idx_n2k() handles wraparounds properly.
3422 */
3423 if (slot) {
3424 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3425 netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
3426 }
3427 #endif /* DEV_NETMAP */
3428 /* Clear the EOP descriptor pointer */
3429 txbuf->eop = NULL;
3430 }
3431
3432 #ifdef IXGBE_FDIR
3433 /* Set the rate at which we sample packets */
3434 if (adapter->hw.mac.type != ixgbe_mac_82598EB)
3435 txr->atr_sample = atr_sample_rate;
3436 #endif
3437
3438 /* Set number of descriptors available */
3439 txr->tx_avail = adapter->num_tx_desc;
3440
3441 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3442 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3443 IXGBE_TX_UNLOCK(txr);
3444 }
3445
3446 /*********************************************************************
3447 *
3448 * Initialize all transmit rings.
3449 *
3450 **********************************************************************/
3451 static int
3452 ixgbe_setup_transmit_structures(struct adapter *adapter)
3453 {
3454 struct tx_ring *txr = adapter->tx_rings;
3455
3456 for (int i = 0; i < adapter->num_queues; i++, txr++)
3457 ixgbe_setup_transmit_ring(txr);
3458
3459 return (0);
3460 }
3461
3462 /*********************************************************************
3463 *
3464 * Enable transmit unit.
3465 *
3466 **********************************************************************/
3467 static void
3468 ixgbe_initialize_transmit_units(struct adapter *adapter)
3469 {
3470 struct tx_ring *txr = adapter->tx_rings;
3471 struct ixgbe_hw *hw = &adapter->hw;
3472
3473 /* Setup the Base and Length of the Tx Descriptor Ring */
3474
3475 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3476 u64 tdba = txr->txdma.dma_paddr;
3477 u32 txctrl;
3478
3479 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i),
3480 (tdba & 0x00000000ffffffffULL));
3481 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (tdba >> 32));
3482 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
3483 adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc));
3484
3485 /* Setup the HW Tx Head and Tail descriptor pointers */
3486 IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
3487 IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
3488
3489 /* Setup Transmit Descriptor Cmd Settings */
3490 txr->txd_cmd = IXGBE_TXD_CMD_IFCS;
3491 txr->queue_status = IXGBE_QUEUE_IDLE;
3492
3493 /* Set the processing limit */
3494 txr->process_limit = ixgbe_tx_process_limit;
3495
3496 /* Disable Head Writeback */
3497 switch (hw->mac.type) {
3498 case ixgbe_mac_82598EB:
3499 txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
3500 break;
3501 case ixgbe_mac_82599EB:
3502 case ixgbe_mac_X540:
3503 default:
3504 txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
3505 break;
3506 }
3507 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
3508 switch (hw->mac.type) {
3509 case ixgbe_mac_82598EB:
3510 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
3511 break;
3512 case ixgbe_mac_82599EB:
3513 case ixgbe_mac_X540:
3514 default:
3515 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
3516 break;
3517 }
3518
3519 }
3520
3521 if (hw->mac.type != ixgbe_mac_82598EB) {
3522 u32 dmatxctl, rttdcs;
3523 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
3524 dmatxctl |= IXGBE_DMATXCTL_TE;
3525 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
3526 /* Disable arbiter to set MTQC */
3527 rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3528 rttdcs |= IXGBE_RTTDCS_ARBDIS;
3529 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3530 IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
3531 rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
3532 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3533 }
3534
3535 return;
3536 }
3537
3538 /*********************************************************************
3539 *
3540 * Free all transmit rings.
3541 *
3542 **********************************************************************/
3543 static void
3544 ixgbe_free_transmit_structures(struct adapter *adapter)
3545 {
3546 struct tx_ring *txr = adapter->tx_rings;
3547
3548 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3549 ixgbe_free_transmit_buffers(txr);
3550 ixgbe_dma_free(adapter, &txr->txdma);
3551 IXGBE_TX_LOCK_DESTROY(txr);
3552 }
3553 free(adapter->tx_rings, M_DEVBUF);
3554 }
3555
3556 /*********************************************************************
3557 *
3558 * Free transmit ring related data structures.
3559 *
3560 **********************************************************************/
3561 static void
3562 ixgbe_free_transmit_buffers(struct tx_ring *txr)
3563 {
3564 struct adapter *adapter = txr->adapter;
3565 struct ixgbe_tx_buf *tx_buffer;
3566 int i;
3567
3568 INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
3569
3570 if (txr->tx_buffers == NULL)
3571 return;
3572
3573 tx_buffer = txr->tx_buffers;
3574 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3575 if (tx_buffer->m_head != NULL) {
3576 bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
3577 0, tx_buffer->m_head->m_pkthdr.len,
3578 BUS_DMASYNC_POSTWRITE);
3579 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
3580 m_freem(tx_buffer->m_head);
3581 tx_buffer->m_head = NULL;
3582 if (tx_buffer->map != NULL) {
3583 ixgbe_dmamap_destroy(txr->txtag,
3584 tx_buffer->map);
3585 tx_buffer->map = NULL;
3586 }
3587 } else if (tx_buffer->map != NULL) {
3588 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
3589 ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
3590 tx_buffer->map = NULL;
3591 }
3592 }
3593 #ifndef IXGBE_LEGACY_TX
3594 if (txr->br != NULL)
3595 buf_ring_free(txr->br, M_DEVBUF);
3596 #endif
3597 if (txr->tx_buffers != NULL) {
3598 free(txr->tx_buffers, M_DEVBUF);
3599 txr->tx_buffers = NULL;
3600 }
3601 if (txr->txtag != NULL) {
3602 ixgbe_dma_tag_destroy(txr->txtag);
3603 txr->txtag = NULL;
3604 }
3605 return;
3606 }
3607
3608 /*********************************************************************
3609 *
3610 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
3611 *
3612 **********************************************************************/
3613
3614 static int
3615 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
3616 u32 *cmd_type_len, u32 *olinfo_status)
3617 {
3618 struct m_tag *mtag;
3619 struct adapter *adapter = txr->adapter;
3620 struct ethercom *ec = &adapter->osdep.ec;
3621 struct ixgbe_adv_tx_context_desc *TXD;
3622 struct ether_vlan_header *eh;
3623 struct ip ip;
3624 struct ip6_hdr ip6;
3625 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3626 int ehdrlen, ip_hlen = 0;
3627 u16 etype;
3628 u8 ipproto __diagused = 0;
3629 int offload = TRUE;
3630 int ctxd = txr->next_avail_desc;
3631 u16 vtag = 0;
3632
3633 /* First check if TSO is to be used */
3634 if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4|M_CSUM_TSOv6))
3635 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
3636
3637 if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
3638 offload = FALSE;
3639
3640 /* Indicate the whole packet as payload when not doing TSO */
3641 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
3642
3643 /* Now ready a context descriptor */
3644 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
3645
3646 /*
3647 ** In advanced descriptors the vlan tag must
3648 ** be placed into the context descriptor. Hence
3649 ** we need to make one even if not doing offloads.
3650 */
3651 if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
3652 vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
3653 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
3654 } else if (offload == FALSE) /* ... no offload to do */
3655 return 0;
3656
3657 /*
3658 * Determine where frame payload starts.
3659 * Jump over vlan headers if already present,
3660 * helpful for QinQ too.
3661 */
3662 KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
3663 eh = mtod(mp, struct ether_vlan_header *);
3664 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3665 KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
3666 etype = ntohs(eh->evl_proto);
3667 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3668 } else {
3669 etype = ntohs(eh->evl_encap_proto);
3670 ehdrlen = ETHER_HDR_LEN;
3671 }
3672
3673 /* Set the ether header length */
3674 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
3675
3676 switch (etype) {
3677 case ETHERTYPE_IP:
3678 m_copydata(mp, ehdrlen, sizeof(ip), &ip);
3679 ip_hlen = ip.ip_hl << 2;
3680 ipproto = ip.ip_p;
3681 #if 0
3682 ip.ip_sum = 0;
3683 m_copyback(mp, ehdrlen, sizeof(ip), &ip);
3684 #else
3685 KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
3686 ip.ip_sum == 0);
3687 #endif
3688 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
3689 break;
3690 case ETHERTYPE_IPV6:
3691 m_copydata(mp, ehdrlen, sizeof(ip6), &ip6);
3692 ip_hlen = sizeof(ip6);
3693 /* XXX-BZ this will go badly in case of ext hdrs. */
3694 ipproto = ip6.ip6_nxt;
3695 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
3696 break;
3697 default:
3698 break;
3699 }
3700
3701 if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
3702 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
3703
3704 vlan_macip_lens |= ip_hlen;
3705 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
3706
3707 if (mp->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_TCPv6)) {
3708 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
3709 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
3710 KASSERT(ipproto == IPPROTO_TCP);
3711 } else if (mp->m_pkthdr.csum_flags & (M_CSUM_UDPv4|M_CSUM_UDPv6)) {
3712 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
3713 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
3714 KASSERT(ipproto == IPPROTO_UDP);
3715 }
3716
3717 /* Now copy bits into descriptor */
3718 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3719 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3720 TXD->seqnum_seed = htole32(0);
3721 TXD->mss_l4len_idx = htole32(0);
3722
3723 /* We've consumed the first desc, adjust counters */
3724 if (++ctxd == txr->num_desc)
3725 ctxd = 0;
3726 txr->next_avail_desc = ctxd;
3727 --txr->tx_avail;
3728
3729 return 0;
3730 }
3731
3732 /**********************************************************************
3733 *
3734 * Setup work for hardware segmentation offload (TSO) on
3735 * adapters using advanced tx descriptors
3736 *
3737 **********************************************************************/
3738 static int
3739 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
3740 u32 *cmd_type_len, u32 *olinfo_status)
3741 {
3742 struct m_tag *mtag;
3743 struct adapter *adapter = txr->adapter;
3744 struct ethercom *ec = &adapter->osdep.ec;
3745 struct ixgbe_adv_tx_context_desc *TXD;
3746 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3747 u32 mss_l4len_idx = 0, paylen;
3748 u16 vtag = 0, eh_type;
3749 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3750 struct ether_vlan_header *eh;
3751 #ifdef INET6
3752 struct ip6_hdr *ip6;
3753 #endif
3754 #ifdef INET
3755 struct ip *ip;
3756 #endif
3757 struct tcphdr *th;
3758
3759
3760 /*
3761 * Determine where frame payload starts.
3762 * Jump over vlan headers if already present
3763 */
3764 eh = mtod(mp, struct ether_vlan_header *);
3765 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3766 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3767 eh_type = eh->evl_proto;
3768 } else {
3769 ehdrlen = ETHER_HDR_LEN;
3770 eh_type = eh->evl_encap_proto;
3771 }
3772
3773 switch (ntohs(eh_type)) {
3774 #ifdef INET6
3775 case ETHERTYPE_IPV6:
3776 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3777 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
3778 if (ip6->ip6_nxt != IPPROTO_TCP)
3779 return (ENXIO);
3780 ip_hlen = sizeof(struct ip6_hdr);
3781 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3782 th = (struct tcphdr *)((char *)ip6 + ip_hlen);
3783 th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
3784 &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
3785 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
3786 break;
3787 #endif
3788 #ifdef INET
3789 case ETHERTYPE_IP:
3790 ip = (struct ip *)(mp->m_data + ehdrlen);
3791 if (ip->ip_p != IPPROTO_TCP)
3792 return (ENXIO);
3793 ip->ip_sum = 0;
3794 ip_hlen = ip->ip_hl << 2;
3795 th = (struct tcphdr *)((char *)ip + ip_hlen);
3796 th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
3797 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3798 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
3799 /* Tell transmit desc to also do IPv4 checksum. */
3800 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
3801 break;
3802 #endif
3803 default:
3804 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3805 __func__, ntohs(eh_type));
3806 break;
3807 }
3808
3809 ctxd = txr->next_avail_desc;
3810 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
3811
3812 tcp_hlen = th->th_off << 2;
3813
3814 /* This is used in the transmit desc in encap */
3815 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3816
3817 /* VLAN MACLEN IPLEN */
3818 if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
3819 vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
3820 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
3821 }
3822
3823 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
3824 vlan_macip_lens |= ip_hlen;
3825 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3826
3827 /* ADV DTYPE TUCMD */
3828 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
3829 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
3830 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3831
3832 /* MSS L4LEN IDX */
3833 mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
3834 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
3835 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3836
3837 TXD->seqnum_seed = htole32(0);
3838
3839 if (++ctxd == txr->num_desc)
3840 ctxd = 0;
3841
3842 txr->tx_avail--;
3843 txr->next_avail_desc = ctxd;
3844 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
3845 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
3846 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
3847 ++txr->tso_tx.ev_count;
3848 return (0);
3849 }
3850
3851 #ifdef IXGBE_FDIR
3852 /*
3853 ** This routine parses packet headers so that Flow
3854 ** Director can make a hashed filter table entry
3855 ** allowing traffic flows to be identified and kept
3856 ** on the same cpu. This would be a performance
3857 ** hit, but we only do it at IXGBE_FDIR_RATE of
3858 ** packets.
3859 */
3860 static void
3861 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
3862 {
3863 struct adapter *adapter = txr->adapter;
3864 struct ix_queue *que;
3865 struct ip *ip;
3866 struct tcphdr *th;
3867 struct udphdr *uh;
3868 struct ether_vlan_header *eh;
3869 union ixgbe_atr_hash_dword input = {.dword = 0};
3870 union ixgbe_atr_hash_dword common = {.dword = 0};
3871 int ehdrlen, ip_hlen;
3872 u16 etype;
3873
3874 eh = mtod(mp, struct ether_vlan_header *);
3875 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3876 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3877 etype = eh->evl_proto;
3878 } else {
3879 ehdrlen = ETHER_HDR_LEN;
3880 etype = eh->evl_encap_proto;
3881 }
3882
3883 /* Only handling IPv4 */
3884 if (etype != htons(ETHERTYPE_IP))
3885 return;
3886
3887 ip = (struct ip *)(mp->m_data + ehdrlen);
3888 ip_hlen = ip->ip_hl << 2;
3889
3890 /* check if we're UDP or TCP */
3891 switch (ip->ip_p) {
3892 case IPPROTO_TCP:
3893 th = (struct tcphdr *)((char *)ip + ip_hlen);
3894 /* src and dst are inverted */
3895 common.port.dst ^= th->th_sport;
3896 common.port.src ^= th->th_dport;
3897 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
3898 break;
3899 case IPPROTO_UDP:
3900 uh = (struct udphdr *)((char *)ip + ip_hlen);
3901 /* src and dst are inverted */
3902 common.port.dst ^= uh->uh_sport;
3903 common.port.src ^= uh->uh_dport;
3904 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
3905 break;
3906 default:
3907 return;
3908 }
3909
3910 input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
3911 if (mp->m_pkthdr.ether_vtag)
3912 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
3913 else
3914 common.flex_bytes ^= etype;
3915 common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
3916
3917 que = &adapter->queues[txr->me];
3918 /*
3919 ** This assumes the Rx queue and Tx
3920 ** queue are bound to the same CPU
3921 */
3922 ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
3923 input, common, que->msix);
3924 }
3925 #endif /* IXGBE_FDIR */
3926
3927 /**********************************************************************
3928 *
3929 * Examine each tx_buffer in the used queue. If the hardware is done
3930 * processing the packet then free associated resources. The
3931 * tx_buffer is put back on the free queue.
3932 *
3933 **********************************************************************/
3934 static void
3935 ixgbe_txeof(struct tx_ring *txr)
3936 {
3937 struct adapter *adapter = txr->adapter;
3938 struct ifnet *ifp = adapter->ifp;
3939 u32 work, processed = 0;
3940 u16 limit = txr->process_limit;
3941 struct ixgbe_tx_buf *buf;
3942 union ixgbe_adv_tx_desc *txd;
3943 struct timeval now, elapsed;
3944
3945 KASSERT(mutex_owned(&txr->tx_mtx));
3946
3947 #ifdef DEV_NETMAP
3948 if (ifp->if_capenable & IFCAP_NETMAP) {
3949 struct netmap_adapter *na = NA(ifp);
3950 struct netmap_kring *kring = &na->tx_rings[txr->me];
3951 txd = txr->tx_base;
3952 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3953 BUS_DMASYNC_POSTREAD);
3954 /*
3955 * In netmap mode, all the work is done in the context
3956 * of the client thread. Interrupt handlers only wake up
3957 * clients, which may be sleeping on individual rings
3958 * or on a global resource for all rings.
3959 * To implement tx interrupt mitigation, we wake up the client
3960 * thread roughly every half ring, even if the NIC interrupts
3961 * more frequently. This is implemented as follows:
3962 * - ixgbe_txsync() sets kring->nr_kflags with the index of
3963 * the slot that should wake up the thread (nkr_num_slots
3964 * means the user thread should not be woken up);
3965 * - the driver ignores tx interrupts unless netmap_mitigate=0
3966 * or the slot has the DD bit set.
3967 */
3968 if (!netmap_mitigate ||
3969 (kring->nr_kflags < kring->nkr_num_slots &&
3970 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
3971 netmap_tx_irq(ifp, txr->me);
3972 }
3973 return;
3974 }
3975 #endif /* DEV_NETMAP */
3976
3977 if (txr->tx_avail == txr->num_desc) {
3978 txr->queue_status = IXGBE_QUEUE_IDLE;
3979 return;
3980 }
3981
3982 /* Get work starting point */
3983 work = txr->next_to_clean;
3984 buf = &txr->tx_buffers[work];
3985 txd = &txr->tx_base[work];
3986 work -= txr->num_desc; /* The distance to ring end */
3987 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3988 BUS_DMASYNC_POSTREAD);
3989 do {
3990 union ixgbe_adv_tx_desc *eop= buf->eop;
3991 if (eop == NULL) /* No work */
3992 break;
3993
3994 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
3995 break; /* I/O not complete */
3996
3997 if (buf->m_head) {
3998 txr->bytes +=
3999 buf->m_head->m_pkthdr.len;
4000 bus_dmamap_sync(txr->txtag->dt_dmat,
4001 buf->map,
4002 0, buf->m_head->m_pkthdr.len,
4003 BUS_DMASYNC_POSTWRITE);
4004 ixgbe_dmamap_unload(txr->txtag,
4005 buf->map);
4006 m_freem(buf->m_head);
4007 buf->m_head = NULL;
4008 /*
4009 * NetBSD: Don't override buf->map with NULL here.
4010 * It'll panic when a ring runs one lap around.
4011 */
4012 }
4013 buf->eop = NULL;
4014 ++txr->tx_avail;
4015
4016 /* We clean the range if multi segment */
4017 while (txd != eop) {
4018 ++txd;
4019 ++buf;
4020 ++work;
4021 /* wrap the ring? */
4022 if (__predict_false(!work)) {
4023 work -= txr->num_desc;
4024 buf = txr->tx_buffers;
4025 txd = txr->tx_base;
4026 }
4027 if (buf->m_head) {
4028 txr->bytes +=
4029 buf->m_head->m_pkthdr.len;
4030 bus_dmamap_sync(txr->txtag->dt_dmat,
4031 buf->map,
4032 0, buf->m_head->m_pkthdr.len,
4033 BUS_DMASYNC_POSTWRITE);
4034 ixgbe_dmamap_unload(txr->txtag,
4035 buf->map);
4036 m_freem(buf->m_head);
4037 buf->m_head = NULL;
4038 /*
4039 * NetBSD: Don't override buf->map with NULL
4040 * here. It'll panic when a ring runs one lap
4041 * around.
4042 */
4043 }
4044 ++txr->tx_avail;
4045 buf->eop = NULL;
4046
4047 }
4048 ++txr->packets;
4049 ++processed;
4050 ++ifp->if_opackets;
4051 getmicrotime(&txr->watchdog_time);
4052
4053 /* Try the next packet */
4054 ++txd;
4055 ++buf;
4056 ++work;
4057 /* reset with a wrap */
4058 if (__predict_false(!work)) {
4059 work -= txr->num_desc;
4060 buf = txr->tx_buffers;
4061 txd = txr->tx_base;
4062 }
4063 prefetch(txd);
4064 } while (__predict_true(--limit));
4065
4066 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4067 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4068
4069 work += txr->num_desc;
4070 txr->next_to_clean = work;
4071
4072 /*
4073 ** Watchdog calculation, we know there's
4074 ** work outstanding or the first return
4075 ** would have been taken, so none processed
4076 ** for too long indicates a hang.
4077 */
4078 getmicrotime(&now);
4079 timersub(&now, &txr->watchdog_time, &elapsed);
4080 if (!processed && tvtohz(&elapsed) > IXGBE_WATCHDOG)
4081 txr->queue_status = IXGBE_QUEUE_HUNG;
4082
4083 if (txr->tx_avail == txr->num_desc)
4084 txr->queue_status = IXGBE_QUEUE_IDLE;
4085
4086 return;
4087 }
4088
4089 /*********************************************************************
4090 *
4091 * Refresh mbuf buffers for RX descriptor rings
4092 * - now keeps its own state so discards due to resource
4093 * exhaustion are unnecessary, if an mbuf cannot be obtained
4094 * it just returns, keeping its placeholder, thus it can simply
4095 * be recalled to try again.
4096 *
4097 **********************************************************************/
4098 static void
4099 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
4100 {
4101 struct adapter *adapter = rxr->adapter;
4102 struct ixgbe_rx_buf *rxbuf;
4103 struct mbuf *mp;
4104 int i, j, error;
4105 bool refreshed = false;
4106
4107 i = j = rxr->next_to_refresh;
4108 /* Control the loop with one beyond */
4109 if (++j == rxr->num_desc)
4110 j = 0;
4111
4112 while (j != limit) {
4113 rxbuf = &rxr->rx_buffers[i];
4114 if (rxbuf->buf == NULL) {
4115 mp = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
4116 MT_DATA, M_PKTHDR, rxr->mbuf_sz);
4117 if (mp == NULL) {
4118 rxr->no_jmbuf.ev_count++;
4119 goto update;
4120 }
4121 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
4122 m_adj(mp, ETHER_ALIGN);
4123 } else
4124 mp = rxbuf->buf;
4125
4126 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
4127
4128 /* If we're dealing with an mbuf that was copied rather
4129 * than replaced, there's no need to go through busdma.
4130 */
4131 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
4132 /* Get the memory mapping */
4133 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
4134 rxbuf->pmap, mp, BUS_DMA_NOWAIT);
4135 if (error != 0) {
4136 printf("Refresh mbufs: payload dmamap load"
4137 " failure - %d\n", error);
4138 m_free(mp);
4139 rxbuf->buf = NULL;
4140 goto update;
4141 }
4142 rxbuf->buf = mp;
4143 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
4144 0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
4145 rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
4146 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
4147 } else {
4148 rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
4149 rxbuf->flags &= ~IXGBE_RX_COPY;
4150 }
4151
4152 refreshed = true;
4153 /* Next is precalculated */
4154 i = j;
4155 rxr->next_to_refresh = i;
4156 if (++j == rxr->num_desc)
4157 j = 0;
4158 }
4159 update:
4160 if (refreshed) /* Update hardware tail index */
4161 IXGBE_WRITE_REG(&adapter->hw,
4162 IXGBE_RDT(rxr->me), rxr->next_to_refresh);
4163 return;
4164 }
4165
4166 /*********************************************************************
4167 *
4168 * Allocate memory for rx_buffer structures. Since we use one
4169 * rx_buffer per received packet, the maximum number of rx_buffer's
4170 * that we'll need is equal to the number of receive descriptors
4171 * that we've allocated.
4172 *
4173 **********************************************************************/
4174 static int
4175 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
4176 {
4177 struct adapter *adapter = rxr->adapter;
4178 device_t dev = adapter->dev;
4179 struct ixgbe_rx_buf *rxbuf;
4180 int i, bsize, error;
4181
4182 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
4183 if (!(rxr->rx_buffers =
4184 (struct ixgbe_rx_buf *) malloc(bsize,
4185 M_DEVBUF, M_NOWAIT | M_ZERO))) {
4186 aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
4187 error = ENOMEM;
4188 goto fail;
4189 }
4190
4191 if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */
4192 1, 0, /* alignment, bounds */
4193 MJUM16BYTES, /* maxsize */
4194 1, /* nsegments */
4195 MJUM16BYTES, /* maxsegsize */
4196 0, /* flags */
4197 &rxr->ptag))) {
4198 aprint_error_dev(dev, "Unable to create RX DMA tag\n");
4199 goto fail;
4200 }
4201
4202 for (i = 0; i < rxr->num_desc; i++, rxbuf++) {
4203 rxbuf = &rxr->rx_buffers[i];
4204 error = ixgbe_dmamap_create(rxr->ptag,
4205 BUS_DMA_NOWAIT, &rxbuf->pmap);
4206 if (error) {
4207 aprint_error_dev(dev, "Unable to create RX dma map\n");
4208 goto fail;
4209 }
4210 }
4211
4212 return (0);
4213
4214 fail:
4215 /* Frees all, but can handle partial completion */
4216 ixgbe_free_receive_structures(adapter);
4217 return (error);
4218 }
4219
4220 /*
4221 ** Used to detect a descriptor that has
4222 ** been merged by Hardware RSC.
4223 */
4224 static inline u32
4225 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
4226 {
4227 return (le32toh(rx->wb.lower.lo_dword.data) &
4228 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
4229 }
4230
4231 /*********************************************************************
4232 *
4233 * Initialize Hardware RSC (LRO) feature on 82599
4234 * for an RX ring, this is toggled by the LRO capability
4235 * even though it is transparent to the stack.
4236 *
4237 * NOTE: since this HW feature only works with IPV4 and
4238 * our testing has shown soft LRO to be as effective
4239 * I have decided to disable this by default.
4240 *
4241 **********************************************************************/
4242 static void
4243 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
4244 {
4245 struct adapter *adapter = rxr->adapter;
4246 struct ixgbe_hw *hw = &adapter->hw;
4247 u32 rscctrl, rdrxctl;
4248
4249 /* If turning LRO/RSC off we need to disable it */
4250 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
4251 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
4252 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
4253 return;
4254 }
4255
4256 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4257 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
4258 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
4259 if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
4260 #endif /* DEV_NETMAP */
4261 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
4262 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4263 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4264
4265 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
4266 rscctrl |= IXGBE_RSCCTL_RSCEN;
4267 /*
4268 ** Limit the total number of descriptors that
4269 ** can be combined, so it does not exceed 64K
4270 */
4271 if (rxr->mbuf_sz == MCLBYTES)
4272 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
4273 else if (rxr->mbuf_sz == MJUMPAGESIZE)
4274 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
4275 else if (rxr->mbuf_sz == MJUM9BYTES)
4276 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
4277 else /* Using 16K cluster */
4278 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
4279
4280 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
4281
4282 /* Enable TCP header recognition */
4283 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
4284 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
4285 IXGBE_PSRTYPE_TCPHDR));
4286
4287 /* Disable RSC for ACK packets */
4288 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
4289 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
4290
4291 rxr->hw_rsc = TRUE;
4292 }
4293
4294
4295 static void
4296 ixgbe_free_receive_ring(struct rx_ring *rxr)
4297 {
4298 struct ixgbe_rx_buf *rxbuf;
4299 int i;
4300
4301 for (i = 0; i < rxr->num_desc; i++) {
4302 rxbuf = &rxr->rx_buffers[i];
4303 if (rxbuf->buf != NULL) {
4304 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
4305 0, rxbuf->buf->m_pkthdr.len,
4306 BUS_DMASYNC_POSTREAD);
4307 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
4308 rxbuf->buf->m_flags |= M_PKTHDR;
4309 m_freem(rxbuf->buf);
4310 rxbuf->buf = NULL;
4311 rxbuf->flags = 0;
4312 }
4313 }
4314 }
4315
4316
4317 /*********************************************************************
4318 *
4319 * Initialize a receive ring and its buffers.
4320 *
4321 **********************************************************************/
4322 static int
4323 ixgbe_setup_receive_ring(struct rx_ring *rxr)
4324 {
4325 struct adapter *adapter;
4326 struct ixgbe_rx_buf *rxbuf;
4327 #ifdef LRO
4328 struct ifnet *ifp;
4329 struct lro_ctrl *lro = &rxr->lro;
4330 #endif /* LRO */
4331 int rsize, error = 0;
4332 #ifdef DEV_NETMAP
4333 struct netmap_adapter *na = NA(rxr->adapter->ifp);
4334 struct netmap_slot *slot;
4335 #endif /* DEV_NETMAP */
4336
4337 adapter = rxr->adapter;
4338 #ifdef LRO
4339 ifp = adapter->ifp;
4340 #endif /* LRO */
4341
4342 /* Clear the ring contents */
4343 IXGBE_RX_LOCK(rxr);
4344 #ifdef DEV_NETMAP
4345 /* same as in ixgbe_setup_transmit_ring() */
4346 slot = netmap_reset(na, NR_RX, rxr->me, 0);
4347 #endif /* DEV_NETMAP */
4348 rsize = roundup2(adapter->num_rx_desc *
4349 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
4350 bzero((void *)rxr->rx_base, rsize);
4351 /* Cache the size */
4352 rxr->mbuf_sz = adapter->rx_mbuf_sz;
4353
4354 /* Free current RX buffer structs and their mbufs */
4355 ixgbe_free_receive_ring(rxr);
4356
4357 IXGBE_RX_UNLOCK(rxr);
4358
4359 /* Now reinitialize our supply of jumbo mbufs. The number
4360 * or size of jumbo mbufs may have changed.
4361 */
4362 ixgbe_jcl_reinit(&adapter->jcl_head, rxr->ptag->dt_dmat,
4363 2 * adapter->num_rx_desc, adapter->rx_mbuf_sz);
4364
4365 IXGBE_RX_LOCK(rxr);
4366
4367 /* Now replenish the mbufs */
4368 for (int j = 0; j != rxr->num_desc; ++j) {
4369 struct mbuf *mp;
4370
4371 rxbuf = &rxr->rx_buffers[j];
4372 #ifdef DEV_NETMAP
4373 /*
4374 * In netmap mode, fill the map and set the buffer
4375 * address in the NIC ring, considering the offset
4376 * between the netmap and NIC rings (see comment in
4377 * ixgbe_setup_transmit_ring() ). No need to allocate
4378 * an mbuf, so end the block with a continue;
4379 */
4380 if (slot) {
4381 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4382 uint64_t paddr;
4383 void *addr;
4384
4385 addr = PNMB(na, slot + sj, &paddr);
4386 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
4387 /* Update descriptor and the cached value */
4388 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4389 rxbuf->addr = htole64(paddr);
4390 continue;
4391 }
4392 #endif /* DEV_NETMAP */
4393 rxbuf->flags = 0;
4394 rxbuf->buf = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
4395 MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
4396 if (rxbuf->buf == NULL) {
4397 error = ENOBUFS;
4398 goto fail;
4399 }
4400 mp = rxbuf->buf;
4401 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
4402 /* Get the memory mapping */
4403 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
4404 rxbuf->pmap, mp, BUS_DMA_NOWAIT);
4405 if (error != 0)
4406 goto fail;
4407 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
4408 0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
4409 /* Update the descriptor and the cached value */
4410 rxr->rx_base[j].read.pkt_addr =
4411 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
4412 rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
4413 }
4414
4415
4416 /* Setup our descriptor indices */
4417 rxr->next_to_check = 0;
4418 rxr->next_to_refresh = 0;
4419 rxr->lro_enabled = FALSE;
4420 rxr->rx_copies.ev_count = 0;
4421 rxr->rx_bytes.ev_count = 0;
4422 rxr->vtag_strip = FALSE;
4423
4424 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4425 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4426
4427 /*
4428 ** Now set up the LRO interface:
4429 */
4430 if (ixgbe_rsc_enable)
4431 ixgbe_setup_hw_rsc(rxr);
4432 #ifdef LRO
4433 else if (ifp->if_capenable & IFCAP_LRO) {
4434 device_t dev = adapter->dev;
4435 int err = tcp_lro_init(lro);
4436 if (err) {
4437 device_printf(dev, "LRO Initialization failed!\n");
4438 goto fail;
4439 }
4440 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
4441 rxr->lro_enabled = TRUE;
4442 lro->ifp = adapter->ifp;
4443 }
4444 #endif /* LRO */
4445
4446 IXGBE_RX_UNLOCK(rxr);
4447 return (0);
4448
4449 fail:
4450 ixgbe_free_receive_ring(rxr);
4451 IXGBE_RX_UNLOCK(rxr);
4452 return (error);
4453 }
4454
4455 /*********************************************************************
4456 *
4457 * Initialize all receive rings.
4458 *
4459 **********************************************************************/
4460 static int
4461 ixgbe_setup_receive_structures(struct adapter *adapter)
4462 {
4463 struct rx_ring *rxr = adapter->rx_rings;
4464 int j;
4465
4466 for (j = 0; j < adapter->num_queues; j++, rxr++)
4467 if (ixgbe_setup_receive_ring(rxr))
4468 goto fail;
4469
4470 return (0);
4471 fail:
4472 /*
4473 * Free RX buffers allocated so far, we will only handle
4474 * the rings that completed, the failing case will have
4475 * cleaned up for itself. 'j' failed, so its the terminus.
4476 */
4477 for (int i = 0; i < j; ++i) {
4478 rxr = &adapter->rx_rings[i];
4479 ixgbe_free_receive_ring(rxr);
4480 }
4481
4482 return (ENOBUFS);
4483 }
4484
4485 static void
4486 ixgbe_initialise_rss_mapping(struct adapter *adapter)
4487 {
4488 struct ixgbe_hw *hw = &adapter->hw;
4489 uint32_t reta;
4490 int i, j, queue_id;
4491 uint32_t rss_key[10];
4492 uint32_t mrqc;
4493 #ifdef RSS
4494 uint32_t rss_hash_config;
4495 #endif
4496
4497 /* Setup RSS */
4498 reta = 0;
4499
4500 #ifdef RSS
4501 /* Fetch the configured RSS key */
4502 rss_getkey((uint8_t *) &rss_key);
4503 #else
4504 /* set up random bits */
4505 cprng_fast(&rss_key, sizeof(rss_key));
4506 #endif
4507
4508 /* Set up the redirection table */
4509 for (i = 0, j = 0; i < 128; i++, j++) {
4510 if (j == adapter->num_queues) j = 0;
4511 #ifdef RSS
4512 /*
4513 * Fetch the RSS bucket id for the given indirection entry.
4514 * Cap it at the number of configured buckets (which is
4515 * num_queues.)
4516 */
4517 queue_id = rss_get_indirection_to_bucket(i);
4518 queue_id = queue_id % adapter->num_queues;
4519 #else
4520 queue_id = (j * 0x11);
4521 #endif
4522 /*
4523 * The low 8 bits are for hash value (n+0);
4524 * The next 8 bits are for hash value (n+1), etc.
4525 */
4526 reta = reta >> 8;
4527 reta = reta | ( ((uint32_t) queue_id) << 24);
4528 if ((i & 3) == 3) {
4529 IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta);
4530 reta = 0;
4531 }
4532 }
4533
4534 /* Now fill our hash function seeds */
4535 for (i = 0; i < 10; i++)
4536 IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), rss_key[i]);
4537
4538 /* Perform hash on these packet types */
4539 #ifdef RSS
4540 mrqc = IXGBE_MRQC_RSSEN;
4541 rss_hash_config = rss_gethashconfig();
4542 if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4)
4543 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
4544 if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4)
4545 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
4546 if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6)
4547 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
4548 if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6)
4549 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
4550 if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6_EX)
4551 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
4552 if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6_EX)
4553 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
4554 if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4)
4555 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
4556 if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4_EX)
4557 device_printf(adapter->dev,
4558 "%s: RSS_HASHTYPE_RSS_UDP_IPV4_EX defined, "
4559 "but not supported\n", __func__);
4560 if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6)
4561 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
4562 if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6_EX)
4563 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
4564 #else
4565 /*
4566 * Disable UDP - IP fragments aren't currently being handled
4567 * and so we end up with a mix of 2-tuple and 4-tuple
4568 * traffic.
4569 */
4570 mrqc = IXGBE_MRQC_RSSEN
4571 | IXGBE_MRQC_RSS_FIELD_IPV4
4572 | IXGBE_MRQC_RSS_FIELD_IPV4_TCP
4573 #if 0
4574 | IXGBE_MRQC_RSS_FIELD_IPV4_UDP
4575 #endif
4576 | IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP
4577 | IXGBE_MRQC_RSS_FIELD_IPV6_EX
4578 | IXGBE_MRQC_RSS_FIELD_IPV6
4579 | IXGBE_MRQC_RSS_FIELD_IPV6_TCP
4580 #if 0
4581 | IXGBE_MRQC_RSS_FIELD_IPV6_UDP
4582 | IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP
4583 #endif
4584 ;
4585 #endif /* RSS */
4586 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4587 }
4588
4589
4590 /*********************************************************************
4591 *
4592 * Setup receive registers and features.
4593 *
4594 **********************************************************************/
4595 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
4596
4597 #define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
4598
4599 static void
4600 ixgbe_initialize_receive_units(struct adapter *adapter)
4601 {
4602 int i;
4603 struct rx_ring *rxr = adapter->rx_rings;
4604 struct ixgbe_hw *hw = &adapter->hw;
4605 struct ifnet *ifp = adapter->ifp;
4606 u32 bufsz, rxctrl, fctrl, srrctl, rxcsum;
4607 u32 hlreg;
4608
4609
4610 /*
4611 * Make sure receives are disabled while
4612 * setting up the descriptor ring
4613 */
4614 rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4615 IXGBE_WRITE_REG(hw, IXGBE_RXCTRL,
4616 rxctrl & ~IXGBE_RXCTRL_RXEN);
4617
4618 /* Enable broadcasts */
4619 fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4620 fctrl |= IXGBE_FCTRL_BAM;
4621 fctrl |= IXGBE_FCTRL_DPF;
4622 fctrl |= IXGBE_FCTRL_PMCF;
4623 IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4624
4625 /* Set for Jumbo Frames? */
4626 hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4627 if (ifp->if_mtu > ETHERMTU)
4628 hlreg |= IXGBE_HLREG0_JUMBOEN;
4629 else
4630 hlreg &= ~IXGBE_HLREG0_JUMBOEN;
4631 #ifdef DEV_NETMAP
4632 /* crcstrip is conditional in netmap (in RDRXCTL too ?) */
4633 if (ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
4634 hlreg &= ~IXGBE_HLREG0_RXCRCSTRP;
4635 else
4636 hlreg |= IXGBE_HLREG0_RXCRCSTRP;
4637 #endif /* DEV_NETMAP */
4638 IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
4639
4640 bufsz = (adapter->rx_mbuf_sz +
4641 BSIZEPKT_ROUNDUP) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
4642
4643 for (i = 0; i < adapter->num_queues; i++, rxr++) {
4644 u64 rdba = rxr->rxdma.dma_paddr;
4645
4646 /* Setup the Base and Length of the Rx Descriptor Ring */
4647 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i),
4648 (rdba & 0x00000000ffffffffULL));
4649 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (rdba >> 32));
4650 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
4651 adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4652
4653 /* Set up the SRRCTL register */
4654 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
4655 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4656 srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
4657 srrctl |= bufsz;
4658 srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4659
4660 /*
4661 * Set DROP_EN iff we have no flow control and >1 queue.
4662 * Note that srrctl was cleared shortly before during reset,
4663 * so we do not need to clear the bit, but do it just in case
4664 * this code is moved elsewhere.
4665 */
4666 if (adapter->num_queues > 1 &&
4667 adapter->fc == ixgbe_fc_none) {
4668 srrctl |= IXGBE_SRRCTL_DROP_EN;
4669 } else {
4670 srrctl &= ~IXGBE_SRRCTL_DROP_EN;
4671 }
4672
4673 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
4674
4675 /* Setup the HW Rx Head and Tail Descriptor Pointers */
4676 IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
4677 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
4678
4679 /* Set the processing limit */
4680 rxr->process_limit = ixgbe_rx_process_limit;
4681 }
4682
4683 if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
4684 u32 psrtype = IXGBE_PSRTYPE_TCPHDR |
4685 IXGBE_PSRTYPE_UDPHDR |
4686 IXGBE_PSRTYPE_IPV4HDR |
4687 IXGBE_PSRTYPE_IPV6HDR;
4688 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype);
4689 }
4690
4691 rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4692
4693 ixgbe_initialise_rss_mapping(adapter);
4694
4695 if (adapter->num_queues > 1) {
4696 /* RSS and RX IPP Checksum are mutually exclusive */
4697 rxcsum |= IXGBE_RXCSUM_PCSD;
4698 }
4699
4700 if (ifp->if_capenable & IFCAP_RXCSUM)
4701 rxcsum |= IXGBE_RXCSUM_PCSD;
4702
4703 if (!(rxcsum & IXGBE_RXCSUM_PCSD))
4704 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4705
4706 IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4707
4708 return;
4709 }
4710
4711 /*********************************************************************
4712 *
4713 * Free all receive rings.
4714 *
4715 **********************************************************************/
4716 static void
4717 ixgbe_free_receive_structures(struct adapter *adapter)
4718 {
4719 struct rx_ring *rxr = adapter->rx_rings;
4720
4721 INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
4722
4723 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4724 #ifdef LRO
4725 struct lro_ctrl *lro = &rxr->lro;
4726 #endif /* LRO */
4727 ixgbe_free_receive_buffers(rxr);
4728 #ifdef LRO
4729 /* Free LRO memory */
4730 tcp_lro_free(lro);
4731 #endif /* LRO */
4732 /* Free the ring memory as well */
4733 ixgbe_dma_free(adapter, &rxr->rxdma);
4734 IXGBE_RX_LOCK_DESTROY(rxr);
4735 }
4736
4737 free(adapter->rx_rings, M_DEVBUF);
4738 }
4739
4740
4741 /*********************************************************************
4742 *
4743 * Free receive ring data structures
4744 *
4745 **********************************************************************/
4746 static void
4747 ixgbe_free_receive_buffers(struct rx_ring *rxr)
4748 {
4749 struct adapter *adapter = rxr->adapter;
4750 struct ixgbe_rx_buf *rxbuf;
4751
4752 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
4753
4754 /* Cleanup any existing buffers */
4755 if (rxr->rx_buffers != NULL) {
4756 for (int i = 0; i < adapter->num_rx_desc; i++) {
4757 rxbuf = &rxr->rx_buffers[i];
4758 if (rxbuf->buf != NULL) {
4759 bus_dmamap_sync(rxr->ptag->dt_dmat,
4760 rxbuf->pmap, 0, rxbuf->buf->m_pkthdr.len,
4761 BUS_DMASYNC_POSTREAD);
4762 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
4763 rxbuf->buf->m_flags |= M_PKTHDR;
4764 m_freem(rxbuf->buf);
4765 }
4766 rxbuf->buf = NULL;
4767 if (rxbuf->pmap != NULL) {
4768 ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4769 rxbuf->pmap = NULL;
4770 }
4771 }
4772 if (rxr->rx_buffers != NULL) {
4773 free(rxr->rx_buffers, M_DEVBUF);
4774 rxr->rx_buffers = NULL;
4775 }
4776 }
4777
4778 if (rxr->ptag != NULL) {
4779 ixgbe_dma_tag_destroy(rxr->ptag);
4780 rxr->ptag = NULL;
4781 }
4782
4783 return;
4784 }
4785
4786 static __inline void
4787 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4788 {
4789 int s;
4790
4791 #ifdef LRO
4792 struct adapter *adapter = ifp->if_softc;
4793 struct ethercom *ec = &adapter->osdep.ec;
4794
4795 /*
4796 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
4797 * should be computed by hardware. Also it should not have VLAN tag in
4798 * ethernet header. In case of IPv6 we do not yet support ext. hdrs.
4799 */
4800 if (rxr->lro_enabled &&
4801 (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
4802 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
4803 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
4804 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
4805 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
4806 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
4807 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4808 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4809 /*
4810 * Send to the stack if:
4811 ** - LRO not enabled, or
4812 ** - no LRO resources, or
4813 ** - lro enqueue fails
4814 */
4815 if (rxr->lro.lro_cnt != 0)
4816 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4817 return;
4818 }
4819 #endif /* LRO */
4820
4821 IXGBE_RX_UNLOCK(rxr);
4822
4823 s = splnet();
4824 /* Pass this up to any BPF listeners. */
4825 bpf_mtap(ifp, m);
4826 if_input(ifp, m);
4827 splx(s);
4828
4829 IXGBE_RX_LOCK(rxr);
4830 }
4831
4832 static __inline void
4833 ixgbe_rx_discard(struct rx_ring *rxr, int i)
4834 {
4835 struct ixgbe_rx_buf *rbuf;
4836
4837 rbuf = &rxr->rx_buffers[i];
4838
4839
4840 /*
4841 ** With advanced descriptors the writeback
4842 ** clobbers the buffer addrs, so its easier
4843 ** to just free the existing mbufs and take
4844 ** the normal refresh path to get new buffers
4845 ** and mapping.
4846 */
4847
4848 if (rbuf->buf != NULL) {/* Partial chain ? */
4849 rbuf->fmp->m_flags |= M_PKTHDR;
4850 m_freem(rbuf->fmp);
4851 rbuf->fmp = NULL;
4852 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
4853 } else if (rbuf->buf) {
4854 m_free(rbuf->buf);
4855 rbuf->buf = NULL;
4856 }
4857
4858 rbuf->flags = 0;
4859
4860 return;
4861 }
4862
4863
4864 /*********************************************************************
4865 *
4866 * This routine executes in interrupt context. It replenishes
4867 * the mbufs in the descriptor and sends data which has been
4868 * dma'ed into host memory to upper layer.
4869 *
4870 * We loop at most count times if count is > 0, or until done if
4871 * count < 0.
4872 *
4873 * Return TRUE for more work, FALSE for all clean.
4874 *********************************************************************/
4875 static bool
4876 ixgbe_rxeof(struct ix_queue *que)
4877 {
4878 struct adapter *adapter = que->adapter;
4879 struct rx_ring *rxr = que->rxr;
4880 struct ifnet *ifp = adapter->ifp;
4881 #ifdef LRO
4882 struct lro_ctrl *lro = &rxr->lro;
4883 struct lro_entry *queued;
4884 #endif /* LRO */
4885 int i, nextp, processed = 0;
4886 u32 staterr = 0;
4887 u16 count = rxr->process_limit;
4888 union ixgbe_adv_rx_desc *cur;
4889 struct ixgbe_rx_buf *rbuf, *nbuf;
4890 #ifdef RSS
4891 u16 pkt_info;
4892 #endif
4893
4894 IXGBE_RX_LOCK(rxr);
4895
4896 #ifdef DEV_NETMAP
4897 /* Same as the txeof routine: wakeup clients on intr. */
4898 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4899 IXGBE_RX_UNLOCK(rxr);
4900 return (FALSE);
4901 }
4902 #endif /* DEV_NETMAP */
4903
4904 for (i = rxr->next_to_check; count != 0;) {
4905 struct mbuf *sendmp, *mp;
4906 u32 rsc, ptype;
4907 u16 len;
4908 u16 vtag = 0;
4909 bool eop;
4910
4911 /* Sync the ring. */
4912 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4913 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4914
4915 cur = &rxr->rx_base[i];
4916 staterr = le32toh(cur->wb.upper.status_error);
4917 #ifdef RSS
4918 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
4919 #endif
4920
4921 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
4922 break;
4923 if ((ifp->if_flags & IFF_RUNNING) == 0)
4924 break;
4925
4926 count--;
4927 sendmp = NULL;
4928 nbuf = NULL;
4929 rsc = 0;
4930 cur->wb.upper.status_error = 0;
4931 rbuf = &rxr->rx_buffers[i];
4932 mp = rbuf->buf;
4933
4934 len = le16toh(cur->wb.upper.length);
4935 ptype = le32toh(cur->wb.lower.lo_dword.data) &
4936 IXGBE_RXDADV_PKTTYPE_MASK;
4937 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
4938
4939 /* Make sure bad packets are discarded */
4940 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
4941 rxr->rx_discarded.ev_count++;
4942 ixgbe_rx_discard(rxr, i);
4943 goto next_desc;
4944 }
4945
4946 /*
4947 ** On 82599 which supports a hardware
4948 ** LRO (called HW RSC), packets need
4949 ** not be fragmented across sequential
4950 ** descriptors, rather the next descriptor
4951 ** is indicated in bits of the descriptor.
4952 ** This also means that we might proceses
4953 ** more than one packet at a time, something
4954 ** that has never been true before, it
4955 ** required eliminating global chain pointers
4956 ** in favor of what we are doing here. -jfv
4957 */
4958 if (!eop) {
4959 /*
4960 ** Figure out the next descriptor
4961 ** of this frame.
4962 */
4963 if (rxr->hw_rsc == TRUE) {
4964 rsc = ixgbe_rsc_count(cur);
4965 rxr->rsc_num += (rsc - 1);
4966 }
4967 if (rsc) { /* Get hardware index */
4968 nextp = ((staterr &
4969 IXGBE_RXDADV_NEXTP_MASK) >>
4970 IXGBE_RXDADV_NEXTP_SHIFT);
4971 } else { /* Just sequential */
4972 nextp = i + 1;
4973 if (nextp == adapter->num_rx_desc)
4974 nextp = 0;
4975 }
4976 nbuf = &rxr->rx_buffers[nextp];
4977 prefetch(nbuf);
4978 }
4979 /*
4980 ** Rather than using the fmp/lmp global pointers
4981 ** we now keep the head of a packet chain in the
4982 ** buffer struct and pass this along from one
4983 ** descriptor to the next, until we get EOP.
4984 */
4985 mp->m_len = len;
4986 /*
4987 ** See if there is a stored head
4988 ** that determines what we are
4989 */
4990 sendmp = rbuf->fmp;
4991 if (sendmp != NULL) { /* secondary frag */
4992 rbuf->buf = rbuf->fmp = NULL;
4993 mp->m_flags &= ~M_PKTHDR;
4994 sendmp->m_pkthdr.len += mp->m_len;
4995 } else {
4996 /*
4997 * Optimize. This might be a small packet,
4998 * maybe just a TCP ACK. Do a fast copy that
4999 * is cache aligned into a new mbuf, and
5000 * leave the old mbuf+cluster for re-use.
5001 */
5002 if (eop && len <= IXGBE_RX_COPY_LEN) {
5003 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
5004 if (sendmp != NULL) {
5005 sendmp->m_data +=
5006 IXGBE_RX_COPY_ALIGN;
5007 ixgbe_bcopy(mp->m_data,
5008 sendmp->m_data, len);
5009 sendmp->m_len = len;
5010 rxr->rx_copies.ev_count++;
5011 rbuf->flags |= IXGBE_RX_COPY;
5012 }
5013 }
5014 if (sendmp == NULL) {
5015 rbuf->buf = rbuf->fmp = NULL;
5016 sendmp = mp;
5017 }
5018
5019 /* first desc of a non-ps chain */
5020 sendmp->m_flags |= M_PKTHDR;
5021 sendmp->m_pkthdr.len = mp->m_len;
5022 }
5023 ++processed;
5024
5025 /* Pass the head pointer on */
5026 if (eop == 0) {
5027 nbuf->fmp = sendmp;
5028 sendmp = NULL;
5029 mp->m_next = nbuf->buf;
5030 } else { /* Sending this frame */
5031 sendmp->m_pkthdr.rcvif = ifp;
5032 ifp->if_ipackets++;
5033 rxr->rx_packets.ev_count++;
5034 /* capture data for AIM */
5035 rxr->bytes += sendmp->m_pkthdr.len;
5036 rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
5037 /* Process vlan info */
5038 if ((rxr->vtag_strip) &&
5039 (staterr & IXGBE_RXD_STAT_VP))
5040 vtag = le16toh(cur->wb.upper.vlan);
5041 if (vtag) {
5042 VLAN_INPUT_TAG(ifp, sendmp, vtag,
5043 printf("%s: could not apply VLAN "
5044 "tag", __func__));
5045 }
5046 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
5047 ixgbe_rx_checksum(staterr, sendmp, ptype,
5048 &adapter->stats);
5049 }
5050 #if __FreeBSD_version >= 800000
5051 #ifdef RSS
5052 sendmp->m_pkthdr.flowid =
5053 le32toh(cur->wb.lower.hi_dword.rss);
5054 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
5055 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
5056 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV4);
5057 break;
5058 case IXGBE_RXDADV_RSSTYPE_IPV4:
5059 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV4);
5060 break;
5061 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
5062 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6);
5063 break;
5064 case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
5065 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6_EX);
5066 break;
5067 case IXGBE_RXDADV_RSSTYPE_IPV6:
5068 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6);
5069 break;
5070 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
5071 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6_EX);
5072 break;
5073 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
5074 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV4);
5075 break;
5076 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
5077 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6);
5078 break;
5079 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
5080 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6_EX);
5081 break;
5082 default:
5083 /* XXX fallthrough */
5084 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
5085 break;
5086 }
5087 #else /* RSS */
5088 sendmp->m_pkthdr.flowid = que->msix;
5089 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
5090 #endif /* RSS */
5091 #endif /* FreeBSD_version */
5092 }
5093 next_desc:
5094 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
5095 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
5096
5097 /* Advance our pointers to the next descriptor. */
5098 if (++i == rxr->num_desc)
5099 i = 0;
5100
5101 /* Now send to the stack or do LRO */
5102 if (sendmp != NULL) {
5103 rxr->next_to_check = i;
5104 ixgbe_rx_input(rxr, ifp, sendmp, ptype);
5105 i = rxr->next_to_check;
5106 }
5107
5108 /* Every 8 descriptors we go to refresh mbufs */
5109 if (processed == 8) {
5110 ixgbe_refresh_mbufs(rxr, i);
5111 processed = 0;
5112 }
5113 }
5114
5115 /* Refresh any remaining buf structs */
5116 if (ixgbe_rx_unrefreshed(rxr))
5117 ixgbe_refresh_mbufs(rxr, i);
5118
5119 rxr->next_to_check = i;
5120
5121 #ifdef LRO
5122 /*
5123 * Flush any outstanding LRO work
5124 */
5125 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
5126 SLIST_REMOVE_HEAD(&lro->lro_active, next);
5127 tcp_lro_flush(lro, queued);
5128 }
5129 #endif /* LRO */
5130
5131 IXGBE_RX_UNLOCK(rxr);
5132
5133 /*
5134 ** Still have cleaning to do?
5135 */
5136 if ((staterr & IXGBE_RXD_STAT_DD) != 0)
5137 return true;
5138 else
5139 return false;
5140 }
5141
5142
5143 /*********************************************************************
5144 *
5145 * Verify that the hardware indicated that the checksum is valid.
5146 * Inform the stack about the status of checksum so that stack
5147 * doesn't spend time verifying the checksum.
5148 *
5149 *********************************************************************/
5150 static void
5151 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
5152 struct ixgbe_hw_stats *stats)
5153 {
5154 u16 status = (u16) staterr;
5155 u8 errors = (u8) (staterr >> 24);
5156 #if 0
5157 bool sctp = FALSE;
5158
5159 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
5160 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
5161 sctp = TRUE;
5162 #endif
5163
5164 if (status & IXGBE_RXD_STAT_IPCS) {
5165 stats->ipcs.ev_count++;
5166 if (!(errors & IXGBE_RXD_ERR_IPE)) {
5167 /* IP Checksum Good */
5168 mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
5169
5170 } else {
5171 stats->ipcs_bad.ev_count++;
5172 mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
5173 }
5174 }
5175 if (status & IXGBE_RXD_STAT_L4CS) {
5176 stats->l4cs.ev_count++;
5177 int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
5178 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
5179 mp->m_pkthdr.csum_flags |= type;
5180 } else {
5181 stats->l4cs_bad.ev_count++;
5182 mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
5183 }
5184 }
5185 return;
5186 }
5187
5188
5189 #if 0 /* XXX Badly need to overhaul vlan(4) on NetBSD. */
5190 /*
5191 ** This routine is run via an vlan config EVENT,
5192 ** it enables us to use the HW Filter table since
5193 ** we can get the vlan id. This just creates the
5194 ** entry in the soft version of the VFTA, init will
5195 ** repopulate the real table.
5196 */
5197 static void
5198 ixgbe_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5199 {
5200 struct adapter *adapter = ifp->if_softc;
5201 u16 index, bit;
5202
5203 if (ifp->if_softc != arg) /* Not our event */
5204 return;
5205
5206 if ((vtag == 0) || (vtag > 4095)) /* Invalid */
5207 return;
5208
5209 IXGBE_CORE_LOCK(adapter);
5210 index = (vtag >> 5) & 0x7F;
5211 bit = vtag & 0x1F;
5212 adapter->shadow_vfta[index] |= (1 << bit);
5213 ixgbe_setup_vlan_hw_support(adapter);
5214 IXGBE_CORE_UNLOCK(adapter);
5215 }
5216
5217 /*
5218 ** This routine is run via an vlan
5219 ** unconfig EVENT, remove our entry
5220 ** in the soft vfta.
5221 */
5222 static void
5223 ixgbe_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5224 {
5225 struct adapter *adapter = ifp->if_softc;
5226 u16 index, bit;
5227
5228 if (ifp->if_softc != arg)
5229 return;
5230
5231 if ((vtag == 0) || (vtag > 4095)) /* Invalid */
5232 return;
5233
5234 IXGBE_CORE_LOCK(adapter);
5235 index = (vtag >> 5) & 0x7F;
5236 bit = vtag & 0x1F;
5237 adapter->shadow_vfta[index] &= ~(1 << bit);
5238 /* Re-init to load the changes */
5239 ixgbe_setup_vlan_hw_support(adapter);
5240 IXGBE_CORE_UNLOCK(adapter);
5241 }
5242 #endif
5243
5244 static void
5245 ixgbe_setup_vlan_hw_support(struct adapter *adapter)
5246 {
5247 struct ethercom *ec = &adapter->osdep.ec;
5248 struct ixgbe_hw *hw = &adapter->hw;
5249 struct rx_ring *rxr;
5250 u32 ctrl;
5251
5252
5253 /*
5254 ** We get here thru init_locked, meaning
5255 ** a soft reset, this has already cleared
5256 ** the VFTA and other state, so if there
5257 ** have been no vlan's registered do nothing.
5258 */
5259 if (!VLAN_ATTACHED(&adapter->osdep.ec)) {
5260 return;
5261 }
5262
5263 /* Setup the queues for vlans */
5264 for (int i = 0; i < adapter->num_queues; i++) {
5265 rxr = &adapter->rx_rings[i];
5266 /* On 82599 the VLAN enable is per/queue in RXDCTL */
5267 if (hw->mac.type != ixgbe_mac_82598EB) {
5268 ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
5269 ctrl |= IXGBE_RXDCTL_VME;
5270 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
5271 }
5272 rxr->vtag_strip = TRUE;
5273 }
5274
5275 if ((ec->ec_capenable & ETHERCAP_VLAN_HWFILTER) == 0)
5276 return;
5277 /*
5278 ** A soft reset zero's out the VFTA, so
5279 ** we need to repopulate it now.
5280 */
5281 for (int i = 0; i < IXGBE_VFTA_SIZE; i++)
5282 if (adapter->shadow_vfta[i] != 0)
5283 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i),
5284 adapter->shadow_vfta[i]);
5285
5286 ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
5287 /* Enable the Filter Table if enabled */
5288 if (ec->ec_capenable & ETHERCAP_VLAN_HWFILTER) {
5289 ctrl &= ~IXGBE_VLNCTRL_CFIEN;
5290 ctrl |= IXGBE_VLNCTRL_VFE;
5291 }
5292 if (hw->mac.type == ixgbe_mac_82598EB)
5293 ctrl |= IXGBE_VLNCTRL_VME;
5294 IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
5295 }
5296
5297 static void
5298 ixgbe_enable_intr(struct adapter *adapter)
5299 {
5300 struct ixgbe_hw *hw = &adapter->hw;
5301 struct ix_queue *que = adapter->queues;
5302 u32 mask, fwsm;
5303
5304 mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
5305 /* Enable Fan Failure detection */
5306 if (hw->device_id == IXGBE_DEV_ID_82598AT)
5307 mask |= IXGBE_EIMS_GPI_SDP1;
5308
5309 switch (adapter->hw.mac.type) {
5310 case ixgbe_mac_82599EB:
5311 mask |= IXGBE_EIMS_ECC;
5312 mask |= IXGBE_EIMS_GPI_SDP0;
5313 mask |= IXGBE_EIMS_GPI_SDP1;
5314 mask |= IXGBE_EIMS_GPI_SDP2;
5315 #ifdef IXGBE_FDIR
5316 mask |= IXGBE_EIMS_FLOW_DIR;
5317 #endif
5318 break;
5319 case ixgbe_mac_X540:
5320 mask |= IXGBE_EIMS_ECC;
5321 /* Detect if Thermal Sensor is enabled */
5322 fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
5323 if (fwsm & IXGBE_FWSM_TS_ENABLED)
5324 mask |= IXGBE_EIMS_TS;
5325 #ifdef IXGBE_FDIR
5326 mask |= IXGBE_EIMS_FLOW_DIR;
5327 #endif
5328 /* falls through */
5329 default:
5330 break;
5331 }
5332
5333 IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
5334
5335 /* With RSS we use auto clear */
5336 if (adapter->msix_mem) {
5337 mask = IXGBE_EIMS_ENABLE_MASK;
5338 /* Don't autoclear Link */
5339 mask &= ~IXGBE_EIMS_OTHER;
5340 mask &= ~IXGBE_EIMS_LSC;
5341 IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask);
5342 }
5343
5344 /*
5345 ** Now enable all queues, this is done separately to
5346 ** allow for handling the extended (beyond 32) MSIX
5347 ** vectors that can be used by 82599
5348 */
5349 for (int i = 0; i < adapter->num_queues; i++, que++)
5350 ixgbe_enable_queue(adapter, que->msix);
5351
5352 IXGBE_WRITE_FLUSH(hw);
5353
5354 return;
5355 }
5356
5357 static void
5358 ixgbe_disable_intr(struct adapter *adapter)
5359 {
5360 if (adapter->msix_mem)
5361 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, 0);
5362 if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
5363 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0);
5364 } else {
5365 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000);
5366 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0);
5367 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0);
5368 }
5369 IXGBE_WRITE_FLUSH(&adapter->hw);
5370 return;
5371 }
5372
5373 u16
5374 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
5375 {
5376 switch (reg % 4) {
5377 case 0:
5378 return pci_conf_read(hw->back->pc, hw->back->tag, reg) &
5379 __BITS(15, 0);
5380 case 2:
5381 return __SHIFTOUT(pci_conf_read(hw->back->pc, hw->back->tag,
5382 reg - 2), __BITS(31, 16));
5383 default:
5384 panic("%s: invalid register (%" PRIx32, __func__, reg);
5385 break;
5386 }
5387 }
5388
5389 void
5390 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
5391 {
5392 pcireg_t old;
5393
5394 switch (reg % 4) {
5395 case 0:
5396 old = pci_conf_read(hw->back->pc, hw->back->tag, reg) &
5397 __BITS(31, 16);
5398 pci_conf_write(hw->back->pc, hw->back->tag, reg, value | old);
5399 break;
5400 case 2:
5401 old = pci_conf_read(hw->back->pc, hw->back->tag, reg - 2) &
5402 __BITS(15, 0);
5403 pci_conf_write(hw->back->pc, hw->back->tag, reg - 2,
5404 __SHIFTIN(value, __BITS(31, 16)) | old);
5405 break;
5406 default:
5407 panic("%s: invalid register (%" PRIx32, __func__, reg);
5408 break;
5409 }
5410
5411 return;
5412 }
5413
5414 /*
5415 ** Get the width and transaction speed of
5416 ** the slot this adapter is plugged into.
5417 */
5418 static void
5419 ixgbe_get_slot_info(struct ixgbe_hw *hw)
5420 {
5421 device_t dev = ((struct ixgbe_osdep *)hw->back)->dev;
5422 struct ixgbe_mac_info *mac = &hw->mac;
5423 u16 link;
5424
5425 /* For most devices simply call the shared code routine */
5426 if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) {
5427 ixgbe_get_bus_info(hw);
5428 goto display;
5429 }
5430
5431 /*
5432 ** For the Quad port adapter we need to parse back
5433 ** up the PCI tree to find the speed of the expansion
5434 ** slot into which this adapter is plugged. A bit more work.
5435 */
5436 dev = device_parent(device_parent(dev));
5437 #ifdef IXGBE_DEBUG
5438 device_printf(dev, "parent pcib = %x,%x,%x\n",
5439 pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
5440 #endif
5441 dev = device_parent(device_parent(dev));
5442 #ifdef IXGBE_DEBUG
5443 device_printf(dev, "slot pcib = %x,%x,%x\n",
5444 pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
5445 #endif
5446 /* Now get the PCI Express Capabilities offset */
5447 /* ...and read the Link Status Register */
5448 link = IXGBE_READ_PCIE_WORD(hw, IXGBE_PCI_LINK_STATUS);
5449 switch (link & IXGBE_PCI_LINK_WIDTH) {
5450 case IXGBE_PCI_LINK_WIDTH_1:
5451 hw->bus.width = ixgbe_bus_width_pcie_x1;
5452 break;
5453 case IXGBE_PCI_LINK_WIDTH_2:
5454 hw->bus.width = ixgbe_bus_width_pcie_x2;
5455 break;
5456 case IXGBE_PCI_LINK_WIDTH_4:
5457 hw->bus.width = ixgbe_bus_width_pcie_x4;
5458 break;
5459 case IXGBE_PCI_LINK_WIDTH_8:
5460 hw->bus.width = ixgbe_bus_width_pcie_x8;
5461 break;
5462 default:
5463 hw->bus.width = ixgbe_bus_width_unknown;
5464 break;
5465 }
5466
5467 switch (link & IXGBE_PCI_LINK_SPEED) {
5468 case IXGBE_PCI_LINK_SPEED_2500:
5469 hw->bus.speed = ixgbe_bus_speed_2500;
5470 break;
5471 case IXGBE_PCI_LINK_SPEED_5000:
5472 hw->bus.speed = ixgbe_bus_speed_5000;
5473 break;
5474 case IXGBE_PCI_LINK_SPEED_8000:
5475 hw->bus.speed = ixgbe_bus_speed_8000;
5476 break;
5477 default:
5478 hw->bus.speed = ixgbe_bus_speed_unknown;
5479 break;
5480 }
5481
5482 mac->ops.set_lan_id(hw);
5483
5484 display:
5485 device_printf(dev,"PCI Express Bus: Speed %s %s\n",
5486 ((hw->bus.speed == ixgbe_bus_speed_8000) ? "8.0GT/s":
5487 (hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0GT/s":
5488 (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5GT/s":"Unknown"),
5489 (hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" :
5490 (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" :
5491 (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" :
5492 ("Unknown"));
5493
5494 if ((hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) &&
5495 ((hw->bus.width <= ixgbe_bus_width_pcie_x4) &&
5496 (hw->bus.speed == ixgbe_bus_speed_2500))) {
5497 device_printf(dev, "PCI-Express bandwidth available"
5498 " for this card\n is not sufficient for"
5499 " optimal performance.\n");
5500 device_printf(dev, "For optimal performance a x8 "
5501 "PCIE, or x4 PCIE Gen2 slot is required.\n");
5502 }
5503 if ((hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP) &&
5504 ((hw->bus.width <= ixgbe_bus_width_pcie_x8) &&
5505 (hw->bus.speed < ixgbe_bus_speed_8000))) {
5506 device_printf(dev, "PCI-Express bandwidth available"
5507 " for this card\n is not sufficient for"
5508 " optimal performance.\n");
5509 device_printf(dev, "For optimal performance a x8 "
5510 "PCIE Gen3 slot is required.\n");
5511 }
5512
5513 return;
5514 }
5515
5516
5517 /*
5518 ** Setup the correct IVAR register for a particular MSIX interrupt
5519 ** (yes this is all very magic and confusing :)
5520 ** - entry is the register array entry
5521 ** - vector is the MSIX vector for this queue
5522 ** - type is RX/TX/MISC
5523 */
5524 static void
5525 ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type)
5526 {
5527 struct ixgbe_hw *hw = &adapter->hw;
5528 u32 ivar, index;
5529
5530 vector |= IXGBE_IVAR_ALLOC_VAL;
5531
5532 switch (hw->mac.type) {
5533
5534 case ixgbe_mac_82598EB:
5535 if (type == -1)
5536 entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
5537 else
5538 entry += (type * 64);
5539 index = (entry >> 2) & 0x1F;
5540 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
5541 ivar &= ~(0xFF << (8 * (entry & 0x3)));
5542 ivar |= (vector << (8 * (entry & 0x3)));
5543 IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR(index), ivar);
5544 break;
5545
5546 case ixgbe_mac_82599EB:
5547 case ixgbe_mac_X540:
5548 if (type == -1) { /* MISC IVAR */
5549 index = (entry & 1) * 8;
5550 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
5551 ivar &= ~(0xFF << index);
5552 ivar |= (vector << index);
5553 IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
5554 } else { /* RX/TX IVARS */
5555 index = (16 * (entry & 1)) + (8 * type);
5556 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
5557 ivar &= ~(0xFF << index);
5558 ivar |= (vector << index);
5559 IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
5560 }
5561
5562 default:
5563 break;
5564 }
5565 }
5566
5567 static void
5568 ixgbe_configure_ivars(struct adapter *adapter)
5569 {
5570 struct ix_queue *que = adapter->queues;
5571 u32 newitr;
5572
5573 if (ixgbe_max_interrupt_rate > 0)
5574 newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8;
5575 else
5576 newitr = 0;
5577
5578 for (int i = 0; i < adapter->num_queues; i++, que++) {
5579 /* First the RX queue entry */
5580 ixgbe_set_ivar(adapter, i, que->msix, 0);
5581 /* ... and the TX */
5582 ixgbe_set_ivar(adapter, i, que->msix, 1);
5583 /* Set an Initial EITR value */
5584 IXGBE_WRITE_REG(&adapter->hw,
5585 IXGBE_EITR(que->msix), newitr);
5586 }
5587
5588 /* For the Link interrupt */
5589 ixgbe_set_ivar(adapter, 1, adapter->linkvec, -1);
5590 }
5591
5592 /*
5593 ** ixgbe_sfp_probe - called in the local timer to
5594 ** determine if a port had optics inserted.
5595 */
5596 static bool ixgbe_sfp_probe(struct adapter *adapter)
5597 {
5598 struct ixgbe_hw *hw = &adapter->hw;
5599 device_t dev = adapter->dev;
5600 bool result = FALSE;
5601
5602 if ((hw->phy.type == ixgbe_phy_nl) &&
5603 (hw->phy.sfp_type == ixgbe_sfp_type_not_present)) {
5604 s32 ret = hw->phy.ops.identify_sfp(hw);
5605 if (ret)
5606 goto out;
5607 ret = hw->phy.ops.reset(hw);
5608 if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5609 device_printf(dev,"Unsupported SFP+ module detected!");
5610 device_printf(dev, "Reload driver with supported module.\n");
5611 adapter->sfp_probe = FALSE;
5612 goto out;
5613 } else
5614 device_printf(dev,"SFP+ module detected!\n");
5615 /* We now have supported optics */
5616 adapter->sfp_probe = FALSE;
5617 /* Set the optics type so system reports correctly */
5618 ixgbe_setup_optics(adapter);
5619 result = TRUE;
5620 }
5621 out:
5622 return (result);
5623 }
5624
5625 /*
5626 ** Tasklet handler for MSIX Link interrupts
5627 ** - do outside interrupt since it might sleep
5628 */
5629 static void
5630 ixgbe_handle_link(void *context)
5631 {
5632 struct adapter *adapter = context;
5633
5634 if (ixgbe_check_link(&adapter->hw,
5635 &adapter->link_speed, &adapter->link_up, 0) == 0)
5636 ixgbe_update_link_status(adapter);
5637 }
5638
5639 /*
5640 ** Tasklet for handling SFP module interrupts
5641 */
5642 static void
5643 ixgbe_handle_mod(void *context)
5644 {
5645 struct adapter *adapter = context;
5646 struct ixgbe_hw *hw = &adapter->hw;
5647 device_t dev = adapter->dev;
5648 u32 err;
5649
5650 err = hw->phy.ops.identify_sfp(hw);
5651 if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5652 device_printf(dev,
5653 "Unsupported SFP+ module type was detected.\n");
5654 return;
5655 }
5656 err = hw->mac.ops.setup_sfp(hw);
5657 if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5658 device_printf(dev,
5659 "Setup failure - unsupported SFP+ module type.\n");
5660 return;
5661 }
5662 softint_schedule(adapter->msf_si);
5663 return;
5664 }
5665
5666
5667 /*
5668 ** Tasklet for handling MSF (multispeed fiber) interrupts
5669 */
5670 static void
5671 ixgbe_handle_msf(void *context)
5672 {
5673 struct adapter *adapter = context;
5674 struct ixgbe_hw *hw = &adapter->hw;
5675 u32 autoneg;
5676 bool negotiate;
5677
5678 autoneg = hw->phy.autoneg_advertised;
5679 if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
5680 hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate);
5681 else
5682 negotiate = 0;
5683 if (hw->mac.ops.setup_link)
5684 hw->mac.ops.setup_link(hw, autoneg, TRUE);
5685 return;
5686 }
5687
5688 #ifdef IXGBE_FDIR
5689 /*
5690 ** Tasklet for reinitializing the Flow Director filter table
5691 */
5692 static void
5693 ixgbe_reinit_fdir(void *context)
5694 {
5695 struct adapter *adapter = context;
5696 struct ifnet *ifp = adapter->ifp;
5697
5698 if (adapter->fdir_reinit != 1) /* Shouldn't happen */
5699 return;
5700 ixgbe_reinit_fdir_tables_82599(&adapter->hw);
5701 adapter->fdir_reinit = 0;
5702 /* re-enable flow director interrupts */
5703 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_FLOW_DIR);
5704 /* Restart the interface */
5705 ifp->if_flags |= IFF_RUNNING;
5706 return;
5707 }
5708 #endif
5709
5710 /**********************************************************************
5711 *
5712 * Update the board statistics counters.
5713 *
5714 **********************************************************************/
5715 static void
5716 ixgbe_update_stats_counters(struct adapter *adapter)
5717 {
5718 struct ifnet *ifp = adapter->ifp;
5719 struct ixgbe_hw *hw = &adapter->hw;
5720 u32 missed_rx = 0, bprc, lxon, lxoff, total;
5721 u64 total_missed_rx = 0;
5722 uint64_t crcerrs, rlec;
5723
5724 crcerrs = IXGBE_READ_REG(hw, IXGBE_CRCERRS);
5725 adapter->stats.crcerrs.ev_count += crcerrs;
5726 adapter->stats.illerrc.ev_count += IXGBE_READ_REG(hw, IXGBE_ILLERRC);
5727 adapter->stats.errbc.ev_count += IXGBE_READ_REG(hw, IXGBE_ERRBC);
5728 adapter->stats.mspdc.ev_count += IXGBE_READ_REG(hw, IXGBE_MSPDC);
5729
5730 /*
5731 ** Note: these are for the 8 possible traffic classes,
5732 ** which in current implementation is unused,
5733 ** therefore only 0 should read real data.
5734 */
5735 for (int i = 0; i < __arraycount(adapter->stats.mpc); i++) {
5736 int j = i % adapter->num_queues;
5737 u32 mp;
5738 mp = IXGBE_READ_REG(hw, IXGBE_MPC(i));
5739 /* missed_rx tallies misses for the gprc workaround */
5740 missed_rx += mp;
5741 /* global total per queue */
5742 adapter->stats.mpc[j].ev_count += mp;
5743 /* Running comprehensive total for stats display */
5744 total_missed_rx += mp;
5745 if (hw->mac.type == ixgbe_mac_82598EB) {
5746 adapter->stats.rnbc[j] +=
5747 IXGBE_READ_REG(hw, IXGBE_RNBC(i));
5748 adapter->stats.qbtc[j].ev_count +=
5749 IXGBE_READ_REG(hw, IXGBE_QBTC(i));
5750 adapter->stats.qbrc[j].ev_count +=
5751 IXGBE_READ_REG(hw, IXGBE_QBRC(i));
5752 adapter->stats.pxonrxc[j].ev_count +=
5753 IXGBE_READ_REG(hw, IXGBE_PXONRXC(i));
5754 } else {
5755 adapter->stats.pxonrxc[j].ev_count +=
5756 IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i));
5757 }
5758 adapter->stats.pxontxc[j].ev_count +=
5759 IXGBE_READ_REG(hw, IXGBE_PXONTXC(i));
5760 adapter->stats.pxofftxc[j].ev_count +=
5761 IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i));
5762 adapter->stats.pxoffrxc[j].ev_count +=
5763 IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
5764 adapter->stats.pxon2offc[j].ev_count +=
5765 IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i));
5766 }
5767 for (int i = 0; i < __arraycount(adapter->stats.qprc); i++) {
5768 int j = i % adapter->num_queues;
5769 adapter->stats.qprc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
5770 adapter->stats.qptc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
5771 adapter->stats.qprdc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
5772 }
5773 adapter->stats.mlfc.ev_count += IXGBE_READ_REG(hw, IXGBE_MLFC);
5774 adapter->stats.mrfc.ev_count += IXGBE_READ_REG(hw, IXGBE_MRFC);
5775 rlec = IXGBE_READ_REG(hw, IXGBE_RLEC);
5776 adapter->stats.rlec.ev_count += rlec;
5777
5778 /* Hardware workaround, gprc counts missed packets */
5779 adapter->stats.gprc.ev_count += IXGBE_READ_REG(hw, IXGBE_GPRC) - missed_rx;
5780
5781 lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
5782 adapter->stats.lxontxc.ev_count += lxon;
5783 lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
5784 adapter->stats.lxofftxc.ev_count += lxoff;
5785 total = lxon + lxoff;
5786
5787 if (hw->mac.type != ixgbe_mac_82598EB) {
5788 adapter->stats.gorc.ev_count += IXGBE_READ_REG(hw, IXGBE_GORCL) +
5789 ((u64)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32);
5790 adapter->stats.gotc.ev_count += IXGBE_READ_REG(hw, IXGBE_GOTCL) +
5791 ((u64)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32) - total * ETHER_MIN_LEN;
5792 adapter->stats.tor.ev_count += IXGBE_READ_REG(hw, IXGBE_TORL) +
5793 ((u64)IXGBE_READ_REG(hw, IXGBE_TORH) << 32);
5794 adapter->stats.lxonrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
5795 adapter->stats.lxoffrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
5796 } else {
5797 adapter->stats.lxonrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
5798 adapter->stats.lxoffrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
5799 /* 82598 only has a counter in the high register */
5800 adapter->stats.gorc.ev_count += IXGBE_READ_REG(hw, IXGBE_GORCH);
5801 adapter->stats.gotc.ev_count += IXGBE_READ_REG(hw, IXGBE_GOTCH) - total * ETHER_MIN_LEN;
5802 adapter->stats.tor.ev_count += IXGBE_READ_REG(hw, IXGBE_TORH);
5803 }
5804
5805 /*
5806 * Workaround: mprc hardware is incorrectly counting
5807 * broadcasts, so for now we subtract those.
5808 */
5809 bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
5810 adapter->stats.bprc.ev_count += bprc;
5811 adapter->stats.mprc.ev_count += IXGBE_READ_REG(hw, IXGBE_MPRC) - ((hw->mac.type == ixgbe_mac_82598EB) ? bprc : 0);
5812
5813 adapter->stats.prc64.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC64);
5814 adapter->stats.prc127.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC127);
5815 adapter->stats.prc255.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC255);
5816 adapter->stats.prc511.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC511);
5817 adapter->stats.prc1023.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC1023);
5818 adapter->stats.prc1522.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC1522);
5819
5820 adapter->stats.gptc.ev_count += IXGBE_READ_REG(hw, IXGBE_GPTC) - total;
5821 adapter->stats.mptc.ev_count += IXGBE_READ_REG(hw, IXGBE_MPTC) - total;
5822 adapter->stats.ptc64.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC64) - total;
5823
5824 adapter->stats.ruc.ev_count += IXGBE_READ_REG(hw, IXGBE_RUC);
5825 adapter->stats.rfc.ev_count += IXGBE_READ_REG(hw, IXGBE_RFC);
5826 adapter->stats.roc.ev_count += IXGBE_READ_REG(hw, IXGBE_ROC);
5827 adapter->stats.rjc.ev_count += IXGBE_READ_REG(hw, IXGBE_RJC);
5828 adapter->stats.mngprc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPRC);
5829 adapter->stats.mngpdc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPDC);
5830 adapter->stats.mngptc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPTC);
5831 adapter->stats.tpr.ev_count += IXGBE_READ_REG(hw, IXGBE_TPR);
5832 adapter->stats.tpt.ev_count += IXGBE_READ_REG(hw, IXGBE_TPT);
5833 adapter->stats.ptc127.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC127);
5834 adapter->stats.ptc255.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC255);
5835 adapter->stats.ptc511.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC511);
5836 adapter->stats.ptc1023.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC1023);
5837 adapter->stats.ptc1522.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC1522);
5838 adapter->stats.bptc.ev_count += IXGBE_READ_REG(hw, IXGBE_BPTC);
5839 adapter->stats.xec.ev_count += IXGBE_READ_REG(hw, IXGBE_XEC);
5840 adapter->stats.fccrc.ev_count += IXGBE_READ_REG(hw, IXGBE_FCCRC);
5841 adapter->stats.fclast.ev_count += IXGBE_READ_REG(hw, IXGBE_FCLAST);
5842
5843 /* Only read FCOE on 82599 */
5844 if (hw->mac.type != ixgbe_mac_82598EB) {
5845 adapter->stats.fcoerpdc.ev_count +=
5846 IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
5847 adapter->stats.fcoeprc.ev_count +=
5848 IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
5849 adapter->stats.fcoeptc.ev_count +=
5850 IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
5851 adapter->stats.fcoedwrc.ev_count +=
5852 IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
5853 adapter->stats.fcoedwtc.ev_count +=
5854 IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
5855 }
5856
5857 /* Fill out the OS statistics structure */
5858 /*
5859 * NetBSD: Don't override if_{i|o}{packets|bytes|mcasts} with
5860 * adapter->stats counters. It's required to make ifconfig -z
5861 * (SOICZIFDATA) work.
5862 */
5863 ifp->if_collisions = 0;
5864
5865 /* Rx Errors */
5866 ifp->if_iqdrops += total_missed_rx;
5867 ifp->if_ierrors += crcerrs + rlec;
5868 }
5869
5870 /** ixgbe_sysctl_tdh_handler - Handler function
5871 * Retrieves the TDH value from the hardware
5872 */
5873 static int
5874 ixgbe_sysctl_tdh_handler(SYSCTLFN_ARGS)
5875 {
5876 struct sysctlnode node;
5877 uint32_t val;
5878 struct tx_ring *txr;
5879
5880 node = *rnode;
5881 txr = (struct tx_ring *)node.sysctl_data;
5882 if (txr == NULL)
5883 return 0;
5884 val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDH(txr->me));
5885 node.sysctl_data = &val;
5886 return sysctl_lookup(SYSCTLFN_CALL(&node));
5887 }
5888
5889 /** ixgbe_sysctl_tdt_handler - Handler function
5890 * Retrieves the TDT value from the hardware
5891 */
5892 static int
5893 ixgbe_sysctl_tdt_handler(SYSCTLFN_ARGS)
5894 {
5895 struct sysctlnode node;
5896 uint32_t val;
5897 struct tx_ring *txr;
5898
5899 node = *rnode;
5900 txr = (struct tx_ring *)node.sysctl_data;
5901 if (txr == NULL)
5902 return 0;
5903 val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDT(txr->me));
5904 node.sysctl_data = &val;
5905 return sysctl_lookup(SYSCTLFN_CALL(&node));
5906 }
5907
5908 /** ixgbe_sysctl_rdh_handler - Handler function
5909 * Retrieves the RDH value from the hardware
5910 */
5911 static int
5912 ixgbe_sysctl_rdh_handler(SYSCTLFN_ARGS)
5913 {
5914 struct sysctlnode node;
5915 uint32_t val;
5916 struct rx_ring *rxr;
5917
5918 node = *rnode;
5919 rxr = (struct rx_ring *)node.sysctl_data;
5920 if (rxr == NULL)
5921 return 0;
5922 val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDH(rxr->me));
5923 node.sysctl_data = &val;
5924 return sysctl_lookup(SYSCTLFN_CALL(&node));
5925 }
5926
5927 /** ixgbe_sysctl_rdt_handler - Handler function
5928 * Retrieves the RDT value from the hardware
5929 */
5930 static int
5931 ixgbe_sysctl_rdt_handler(SYSCTLFN_ARGS)
5932 {
5933 struct sysctlnode node;
5934 uint32_t val;
5935 struct rx_ring *rxr;
5936
5937 node = *rnode;
5938 rxr = (struct rx_ring *)node.sysctl_data;
5939 if (rxr == NULL)
5940 return 0;
5941 val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDT(rxr->me));
5942 node.sysctl_data = &val;
5943 return sysctl_lookup(SYSCTLFN_CALL(&node));
5944 }
5945
5946 static int
5947 ixgbe_sysctl_interrupt_rate_handler(SYSCTLFN_ARGS)
5948 {
5949 int error;
5950 struct sysctlnode node;
5951 struct ix_queue *que;
5952 uint32_t reg, usec, rate;
5953
5954 node = *rnode;
5955 que = (struct ix_queue *)node.sysctl_data;
5956 if (que == NULL)
5957 return 0;
5958 reg = IXGBE_READ_REG(&que->adapter->hw, IXGBE_EITR(que->msix));
5959 usec = ((reg & 0x0FF8) >> 3);
5960 if (usec > 0)
5961 rate = 500000 / usec;
5962 else
5963 rate = 0;
5964 node.sysctl_data = &rate;
5965 error = sysctl_lookup(SYSCTLFN_CALL(&node));
5966 if (error)
5967 return error;
5968 reg &= ~0xfff; /* default, no limitation */
5969 ixgbe_max_interrupt_rate = 0;
5970 if (rate > 0 && rate < 500000) {
5971 if (rate < 1000)
5972 rate = 1000;
5973 ixgbe_max_interrupt_rate = rate;
5974 reg |= ((4000000/rate) & 0xff8 );
5975 }
5976 IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg);
5977 return 0;
5978 }
5979
5980 const struct sysctlnode *
5981 ixgbe_sysctl_instance(struct adapter *adapter)
5982 {
5983 const char *dvname;
5984 struct sysctllog **log;
5985 int rc;
5986 const struct sysctlnode *rnode;
5987
5988 log = &adapter->sysctllog;
5989 dvname = device_xname(adapter->dev);
5990
5991 if ((rc = sysctl_createv(log, 0, NULL, &rnode,
5992 0, CTLTYPE_NODE, dvname,
5993 SYSCTL_DESCR("ixgbe information and settings"),
5994 NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL)) != 0)
5995 goto err;
5996
5997 return rnode;
5998 err:
5999 printf("%s: sysctl_createv failed, rc = %d\n", __func__, rc);
6000 return NULL;
6001 }
6002
6003 /*
6004 * Add sysctl variables, one per statistic, to the system.
6005 */
6006 static void
6007 ixgbe_add_hw_stats(struct adapter *adapter)
6008 {
6009 device_t dev = adapter->dev;
6010 const struct sysctlnode *rnode, *cnode;
6011 struct sysctllog **log = &adapter->sysctllog;
6012 struct tx_ring *txr = adapter->tx_rings;
6013 struct rx_ring *rxr = adapter->rx_rings;
6014 struct ixgbe_hw_stats *stats = &adapter->stats;
6015
6016 /* Driver Statistics */
6017 #if 0
6018 /* These counters are not updated by the software */
6019 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
6020 CTLFLAG_RD, &adapter->dropped_pkts,
6021 "Driver dropped packets");
6022 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_header_failed",
6023 CTLFLAG_RD, &adapter->mbuf_header_failed,
6024 "???");
6025 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_packet_failed",
6026 CTLFLAG_RD, &adapter->mbuf_packet_failed,
6027 "???");
6028 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "no_tx_map_avail",
6029 CTLFLAG_RD, &adapter->no_tx_map_avail,
6030 "???");
6031 #endif
6032 evcnt_attach_dynamic(&adapter->handleq, EVCNT_TYPE_MISC,
6033 NULL, device_xname(dev), "Handled queue in softint");
6034 evcnt_attach_dynamic(&adapter->req, EVCNT_TYPE_MISC,
6035 NULL, device_xname(dev), "Requeued in softint");
6036 evcnt_attach_dynamic(&adapter->morerx, EVCNT_TYPE_MISC,
6037 NULL, device_xname(dev), "Interrupt handler more rx");
6038 evcnt_attach_dynamic(&adapter->moretx, EVCNT_TYPE_MISC,
6039 NULL, device_xname(dev), "Interrupt handler more tx");
6040 evcnt_attach_dynamic(&adapter->txloops, EVCNT_TYPE_MISC,
6041 NULL, device_xname(dev), "Interrupt handler tx loops");
6042 evcnt_attach_dynamic(&adapter->efbig_tx_dma_setup, EVCNT_TYPE_MISC,
6043 NULL, device_xname(dev), "Driver tx dma soft fail EFBIG");
6044 evcnt_attach_dynamic(&adapter->m_defrag_failed, EVCNT_TYPE_MISC,
6045 NULL, device_xname(dev), "m_defrag() failed");
6046 evcnt_attach_dynamic(&adapter->efbig2_tx_dma_setup, EVCNT_TYPE_MISC,
6047 NULL, device_xname(dev), "Driver tx dma hard fail EFBIG");
6048 evcnt_attach_dynamic(&adapter->einval_tx_dma_setup, EVCNT_TYPE_MISC,
6049 NULL, device_xname(dev), "Driver tx dma hard fail EINVAL");
6050 evcnt_attach_dynamic(&adapter->other_tx_dma_setup, EVCNT_TYPE_MISC,
6051 NULL, device_xname(dev), "Driver tx dma hard fail other");
6052 evcnt_attach_dynamic(&adapter->eagain_tx_dma_setup, EVCNT_TYPE_MISC,
6053 NULL, device_xname(dev), "Driver tx dma soft fail EAGAIN");
6054 evcnt_attach_dynamic(&adapter->enomem_tx_dma_setup, EVCNT_TYPE_MISC,
6055 NULL, device_xname(dev), "Driver tx dma soft fail ENOMEM");
6056 evcnt_attach_dynamic(&adapter->watchdog_events, EVCNT_TYPE_MISC,
6057 NULL, device_xname(dev), "Watchdog timeouts");
6058 evcnt_attach_dynamic(&adapter->tso_err, EVCNT_TYPE_MISC,
6059 NULL, device_xname(dev), "TSO errors");
6060 evcnt_attach_dynamic(&adapter->link_irq, EVCNT_TYPE_MISC,
6061 NULL, device_xname(dev), "Link MSIX IRQ Handled");
6062
6063 for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
6064 snprintf(adapter->queues[i].evnamebuf,
6065 sizeof(adapter->queues[i].evnamebuf), "%s queue%d",
6066 device_xname(dev), i);
6067 snprintf(adapter->queues[i].namebuf,
6068 sizeof(adapter->queues[i].namebuf), "queue%d", i);
6069
6070 if ((rnode = ixgbe_sysctl_instance(adapter)) == NULL) {
6071 aprint_error_dev(dev, "could not create sysctl root\n");
6072 break;
6073 }
6074
6075 if (sysctl_createv(log, 0, &rnode, &rnode,
6076 0, CTLTYPE_NODE,
6077 adapter->queues[i].namebuf, SYSCTL_DESCR("Queue Name"),
6078 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL) != 0)
6079 break;
6080
6081 if (sysctl_createv(log, 0, &rnode, &cnode,
6082 CTLFLAG_READWRITE, CTLTYPE_INT,
6083 "interrupt_rate", SYSCTL_DESCR("Interrupt Rate"),
6084 ixgbe_sysctl_interrupt_rate_handler, 0,
6085 (void *)&adapter->queues[i], 0, CTL_CREATE, CTL_EOL) != 0)
6086 break;
6087
6088 if (sysctl_createv(log, 0, &rnode, &cnode,
6089 CTLFLAG_READONLY, CTLTYPE_QUAD,
6090 "irqs", SYSCTL_DESCR("irqs on this queue"),
6091 NULL, 0, &(adapter->queues[i].irqs),
6092 0, CTL_CREATE, CTL_EOL) != 0)
6093 break;
6094
6095 if (sysctl_createv(log, 0, &rnode, &cnode,
6096 CTLFLAG_READONLY, CTLTYPE_INT,
6097 "txd_head", SYSCTL_DESCR("Transmit Descriptor Head"),
6098 ixgbe_sysctl_tdh_handler, 0, (void *)txr,
6099 0, CTL_CREATE, CTL_EOL) != 0)
6100 break;
6101
6102 if (sysctl_createv(log, 0, &rnode, &cnode,
6103 CTLFLAG_READONLY, CTLTYPE_INT,
6104 "txd_tail", SYSCTL_DESCR("Transmit Descriptor Tail"),
6105 ixgbe_sysctl_tdt_handler, 0, (void *)txr,
6106 0, CTL_CREATE, CTL_EOL) != 0)
6107 break;
6108
6109 evcnt_attach_dynamic(&txr->tso_tx, EVCNT_TYPE_MISC,
6110 NULL, device_xname(dev), "TSO");
6111 evcnt_attach_dynamic(&txr->no_desc_avail, EVCNT_TYPE_MISC,
6112 NULL, adapter->queues[i].evnamebuf,
6113 "Queue No Descriptor Available");
6114 evcnt_attach_dynamic(&txr->total_packets, EVCNT_TYPE_MISC,
6115 NULL, adapter->queues[i].evnamebuf,
6116 "Queue Packets Transmitted");
6117
6118 #ifdef LRO
6119 struct lro_ctrl *lro = &rxr->lro;
6120 #endif /* LRO */
6121
6122 if (sysctl_createv(log, 0, &rnode, &cnode,
6123 CTLFLAG_READONLY,
6124 CTLTYPE_INT,
6125 "rxd_head", SYSCTL_DESCR("Receive Descriptor Head"),
6126 ixgbe_sysctl_rdh_handler, 0, (void *)rxr, 0,
6127 CTL_CREATE, CTL_EOL) != 0)
6128 break;
6129
6130 if (sysctl_createv(log, 0, &rnode, &cnode,
6131 CTLFLAG_READONLY,
6132 CTLTYPE_INT,
6133 "rxd_tail", SYSCTL_DESCR("Receive Descriptor Tail"),
6134 ixgbe_sysctl_rdt_handler, 0, (void *)rxr, 0,
6135 CTL_CREATE, CTL_EOL) != 0)
6136 break;
6137
6138 if (i < __arraycount(adapter->stats.mpc)) {
6139 evcnt_attach_dynamic(&adapter->stats.mpc[i],
6140 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
6141 "Missed Packet Count");
6142 }
6143 if (i < __arraycount(adapter->stats.pxontxc)) {
6144 evcnt_attach_dynamic(&adapter->stats.pxontxc[i],
6145 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
6146 "pxontxc");
6147 evcnt_attach_dynamic(&adapter->stats.pxonrxc[i],
6148 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
6149 "pxonrxc");
6150 evcnt_attach_dynamic(&adapter->stats.pxofftxc[i],
6151 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
6152 "pxofftxc");
6153 evcnt_attach_dynamic(&adapter->stats.pxoffrxc[i],
6154 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
6155 "pxoffrxc");
6156 evcnt_attach_dynamic(&adapter->stats.pxon2offc[i],
6157 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
6158 "pxon2offc");
6159 }
6160 if (i < __arraycount(adapter->stats.qprc)) {
6161 evcnt_attach_dynamic(&adapter->stats.qprc[i],
6162 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
6163 "qprc");
6164 evcnt_attach_dynamic(&adapter->stats.qptc[i],
6165 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
6166 "qptc");
6167 evcnt_attach_dynamic(&adapter->stats.qbrc[i],
6168 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
6169 "qbrc");
6170 evcnt_attach_dynamic(&adapter->stats.qbtc[i],
6171 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
6172 "qbtc");
6173 evcnt_attach_dynamic(&adapter->stats.qprdc[i],
6174 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
6175 "qprdc");
6176 }
6177
6178 evcnt_attach_dynamic(&rxr->rx_packets, EVCNT_TYPE_MISC,
6179 NULL, adapter->queues[i].evnamebuf, "Queue Packets Received");
6180 evcnt_attach_dynamic(&rxr->rx_bytes, EVCNT_TYPE_MISC,
6181 NULL, adapter->queues[i].evnamebuf, "Queue Bytes Received");
6182 evcnt_attach_dynamic(&rxr->rx_copies, EVCNT_TYPE_MISC,
6183 NULL, adapter->queues[i].evnamebuf, "Copied RX Frames");
6184 evcnt_attach_dynamic(&rxr->no_jmbuf, EVCNT_TYPE_MISC,
6185 NULL, adapter->queues[i].evnamebuf, "Rx no jumbo mbuf");
6186 evcnt_attach_dynamic(&rxr->rx_discarded, EVCNT_TYPE_MISC,
6187 NULL, adapter->queues[i].evnamebuf, "Rx discarded");
6188 evcnt_attach_dynamic(&rxr->rx_irq, EVCNT_TYPE_MISC,
6189 NULL, adapter->queues[i].evnamebuf, "Rx interrupts");
6190 #ifdef LRO
6191 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_queued",
6192 CTLFLAG_RD, &lro->lro_queued, 0,
6193 "LRO Queued");
6194 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_flushed",
6195 CTLFLAG_RD, &lro->lro_flushed, 0,
6196 "LRO Flushed");
6197 #endif /* LRO */
6198 }
6199
6200 /* MAC stats get the own sub node */
6201
6202
6203 snprintf(stats->namebuf,
6204 sizeof(stats->namebuf), "%s MAC Statistics", device_xname(dev));
6205
6206 evcnt_attach_dynamic(&stats->ipcs, EVCNT_TYPE_MISC, NULL,
6207 stats->namebuf, "rx csum offload - IP");
6208 evcnt_attach_dynamic(&stats->l4cs, EVCNT_TYPE_MISC, NULL,
6209 stats->namebuf, "rx csum offload - L4");
6210 evcnt_attach_dynamic(&stats->ipcs_bad, EVCNT_TYPE_MISC, NULL,
6211 stats->namebuf, "rx csum offload - IP bad");
6212 evcnt_attach_dynamic(&stats->l4cs_bad, EVCNT_TYPE_MISC, NULL,
6213 stats->namebuf, "rx csum offload - L4 bad");
6214 evcnt_attach_dynamic(&stats->intzero, EVCNT_TYPE_MISC, NULL,
6215 stats->namebuf, "Interrupt conditions zero");
6216 evcnt_attach_dynamic(&stats->legint, EVCNT_TYPE_MISC, NULL,
6217 stats->namebuf, "Legacy interrupts");
6218 evcnt_attach_dynamic(&stats->crcerrs, EVCNT_TYPE_MISC, NULL,
6219 stats->namebuf, "CRC Errors");
6220 evcnt_attach_dynamic(&stats->illerrc, EVCNT_TYPE_MISC, NULL,
6221 stats->namebuf, "Illegal Byte Errors");
6222 evcnt_attach_dynamic(&stats->errbc, EVCNT_TYPE_MISC, NULL,
6223 stats->namebuf, "Byte Errors");
6224 evcnt_attach_dynamic(&stats->mspdc, EVCNT_TYPE_MISC, NULL,
6225 stats->namebuf, "MAC Short Packets Discarded");
6226 evcnt_attach_dynamic(&stats->mlfc, EVCNT_TYPE_MISC, NULL,
6227 stats->namebuf, "MAC Local Faults");
6228 evcnt_attach_dynamic(&stats->mrfc, EVCNT_TYPE_MISC, NULL,
6229 stats->namebuf, "MAC Remote Faults");
6230 evcnt_attach_dynamic(&stats->rlec, EVCNT_TYPE_MISC, NULL,
6231 stats->namebuf, "Receive Length Errors");
6232 evcnt_attach_dynamic(&stats->lxontxc, EVCNT_TYPE_MISC, NULL,
6233 stats->namebuf, "Link XON Transmitted");
6234 evcnt_attach_dynamic(&stats->lxonrxc, EVCNT_TYPE_MISC, NULL,
6235 stats->namebuf, "Link XON Received");
6236 evcnt_attach_dynamic(&stats->lxofftxc, EVCNT_TYPE_MISC, NULL,
6237 stats->namebuf, "Link XOFF Transmitted");
6238 evcnt_attach_dynamic(&stats->lxoffrxc, EVCNT_TYPE_MISC, NULL,
6239 stats->namebuf, "Link XOFF Received");
6240
6241 /* Packet Reception Stats */
6242 evcnt_attach_dynamic(&stats->tor, EVCNT_TYPE_MISC, NULL,
6243 stats->namebuf, "Total Octets Received");
6244 evcnt_attach_dynamic(&stats->gorc, EVCNT_TYPE_MISC, NULL,
6245 stats->namebuf, "Good Octets Received");
6246 evcnt_attach_dynamic(&stats->tpr, EVCNT_TYPE_MISC, NULL,
6247 stats->namebuf, "Total Packets Received");
6248 evcnt_attach_dynamic(&stats->gprc, EVCNT_TYPE_MISC, NULL,
6249 stats->namebuf, "Good Packets Received");
6250 evcnt_attach_dynamic(&stats->mprc, EVCNT_TYPE_MISC, NULL,
6251 stats->namebuf, "Multicast Packets Received");
6252 evcnt_attach_dynamic(&stats->bprc, EVCNT_TYPE_MISC, NULL,
6253 stats->namebuf, "Broadcast Packets Received");
6254 evcnt_attach_dynamic(&stats->prc64, EVCNT_TYPE_MISC, NULL,
6255 stats->namebuf, "64 byte frames received ");
6256 evcnt_attach_dynamic(&stats->prc127, EVCNT_TYPE_MISC, NULL,
6257 stats->namebuf, "65-127 byte frames received");
6258 evcnt_attach_dynamic(&stats->prc255, EVCNT_TYPE_MISC, NULL,
6259 stats->namebuf, "128-255 byte frames received");
6260 evcnt_attach_dynamic(&stats->prc511, EVCNT_TYPE_MISC, NULL,
6261 stats->namebuf, "256-511 byte frames received");
6262 evcnt_attach_dynamic(&stats->prc1023, EVCNT_TYPE_MISC, NULL,
6263 stats->namebuf, "512-1023 byte frames received");
6264 evcnt_attach_dynamic(&stats->prc1522, EVCNT_TYPE_MISC, NULL,
6265 stats->namebuf, "1023-1522 byte frames received");
6266 evcnt_attach_dynamic(&stats->ruc, EVCNT_TYPE_MISC, NULL,
6267 stats->namebuf, "Receive Undersized");
6268 evcnt_attach_dynamic(&stats->rfc, EVCNT_TYPE_MISC, NULL,
6269 stats->namebuf, "Fragmented Packets Received ");
6270 evcnt_attach_dynamic(&stats->roc, EVCNT_TYPE_MISC, NULL,
6271 stats->namebuf, "Oversized Packets Received");
6272 evcnt_attach_dynamic(&stats->rjc, EVCNT_TYPE_MISC, NULL,
6273 stats->namebuf, "Received Jabber");
6274 evcnt_attach_dynamic(&stats->mngprc, EVCNT_TYPE_MISC, NULL,
6275 stats->namebuf, "Management Packets Received");
6276 evcnt_attach_dynamic(&stats->xec, EVCNT_TYPE_MISC, NULL,
6277 stats->namebuf, "Checksum Errors");
6278
6279 /* Packet Transmission Stats */
6280 evcnt_attach_dynamic(&stats->gotc, EVCNT_TYPE_MISC, NULL,
6281 stats->namebuf, "Good Octets Transmitted");
6282 evcnt_attach_dynamic(&stats->tpt, EVCNT_TYPE_MISC, NULL,
6283 stats->namebuf, "Total Packets Transmitted");
6284 evcnt_attach_dynamic(&stats->gptc, EVCNT_TYPE_MISC, NULL,
6285 stats->namebuf, "Good Packets Transmitted");
6286 evcnt_attach_dynamic(&stats->bptc, EVCNT_TYPE_MISC, NULL,
6287 stats->namebuf, "Broadcast Packets Transmitted");
6288 evcnt_attach_dynamic(&stats->mptc, EVCNT_TYPE_MISC, NULL,
6289 stats->namebuf, "Multicast Packets Transmitted");
6290 evcnt_attach_dynamic(&stats->mngptc, EVCNT_TYPE_MISC, NULL,
6291 stats->namebuf, "Management Packets Transmitted");
6292 evcnt_attach_dynamic(&stats->ptc64, EVCNT_TYPE_MISC, NULL,
6293 stats->namebuf, "64 byte frames transmitted ");
6294 evcnt_attach_dynamic(&stats->ptc127, EVCNT_TYPE_MISC, NULL,
6295 stats->namebuf, "65-127 byte frames transmitted");
6296 evcnt_attach_dynamic(&stats->ptc255, EVCNT_TYPE_MISC, NULL,
6297 stats->namebuf, "128-255 byte frames transmitted");
6298 evcnt_attach_dynamic(&stats->ptc511, EVCNT_TYPE_MISC, NULL,
6299 stats->namebuf, "256-511 byte frames transmitted");
6300 evcnt_attach_dynamic(&stats->ptc1023, EVCNT_TYPE_MISC, NULL,
6301 stats->namebuf, "512-1023 byte frames transmitted");
6302 evcnt_attach_dynamic(&stats->ptc1522, EVCNT_TYPE_MISC, NULL,
6303 stats->namebuf, "1024-1522 byte frames transmitted");
6304 }
6305
6306 /*
6307 ** Set flow control using sysctl:
6308 ** Flow control values:
6309 ** 0 - off
6310 ** 1 - rx pause
6311 ** 2 - tx pause
6312 ** 3 - full
6313 */
6314 static int
6315 ixgbe_set_flowcntl(SYSCTLFN_ARGS)
6316 {
6317 struct sysctlnode node;
6318 int error, last;
6319 struct adapter *adapter;
6320
6321 node = *rnode;
6322 adapter = (struct adapter *)node.sysctl_data;
6323 node.sysctl_data = &adapter->fc;
6324 last = adapter->fc;
6325 error = sysctl_lookup(SYSCTLFN_CALL(&node));
6326 if (error != 0 || newp == NULL)
6327 return error;
6328
6329 /* Don't bother if it's not changed */
6330 if (adapter->fc == last)
6331 return (0);
6332
6333 switch (adapter->fc) {
6334 case ixgbe_fc_rx_pause:
6335 case ixgbe_fc_tx_pause:
6336 case ixgbe_fc_full:
6337 adapter->hw.fc.requested_mode = adapter->fc;
6338 if (adapter->num_queues > 1)
6339 ixgbe_disable_rx_drop(adapter);
6340 break;
6341 case ixgbe_fc_none:
6342 adapter->hw.fc.requested_mode = ixgbe_fc_none;
6343 if (adapter->num_queues > 1)
6344 ixgbe_enable_rx_drop(adapter);
6345 break;
6346 default:
6347 adapter->fc = last;
6348 return (EINVAL);
6349 }
6350 /* Don't autoneg if forcing a value */
6351 adapter->hw.fc.disable_fc_autoneg = TRUE;
6352 ixgbe_fc_enable(&adapter->hw);
6353 return 0;
6354 }
6355
6356
6357 /*
6358 ** Control link advertise speed:
6359 ** 1 - advertise only 1G
6360 ** 2 - advertise 100Mb
6361 ** 3 - advertise normal
6362 */
6363 static int
6364 ixgbe_set_advertise(SYSCTLFN_ARGS)
6365 {
6366 struct sysctlnode node;
6367 int t, error = 0;
6368 struct adapter *adapter;
6369 device_t dev;
6370 struct ixgbe_hw *hw;
6371 ixgbe_link_speed speed, last;
6372
6373 node = *rnode;
6374 adapter = (struct adapter *)node.sysctl_data;
6375 dev = adapter->dev;
6376 hw = &adapter->hw;
6377 last = adapter->advertise;
6378 t = adapter->advertise;
6379 node.sysctl_data = &t;
6380 error = sysctl_lookup(SYSCTLFN_CALL(&node));
6381 if (error != 0 || newp == NULL)
6382 return error;
6383
6384 if (adapter->advertise == last) /* no change */
6385 return (0);
6386
6387 if (t == -1)
6388 return 0;
6389
6390 adapter->advertise = t;
6391
6392 if (!((hw->phy.media_type == ixgbe_media_type_copper) ||
6393 (hw->phy.multispeed_fiber)))
6394 return (EINVAL);
6395
6396 if ((adapter->advertise == 2) && (hw->mac.type != ixgbe_mac_X540)) {
6397 device_printf(dev, "Set Advertise: 100Mb on X540 only\n");
6398 return (EINVAL);
6399 }
6400
6401 if (adapter->advertise == 1)
6402 speed = IXGBE_LINK_SPEED_1GB_FULL;
6403 else if (adapter->advertise == 2)
6404 speed = IXGBE_LINK_SPEED_100_FULL;
6405 else if (adapter->advertise == 3)
6406 speed = IXGBE_LINK_SPEED_1GB_FULL |
6407 IXGBE_LINK_SPEED_10GB_FULL;
6408 else { /* bogus value */
6409 adapter->advertise = last;
6410 return (EINVAL);
6411 }
6412
6413 hw->mac.autotry_restart = TRUE;
6414 hw->mac.ops.setup_link(hw, speed, TRUE);
6415
6416 return 0;
6417 }
6418
6419 /*
6420 ** Thermal Shutdown Trigger
6421 ** - cause a Thermal Overtemp IRQ
6422 ** - this now requires firmware enabling
6423 */
6424 static int
6425 ixgbe_set_thermal_test(SYSCTLFN_ARGS)
6426 {
6427 struct sysctlnode node;
6428 int error, fire = 0;
6429 struct adapter *adapter;
6430 struct ixgbe_hw *hw;
6431
6432 node = *rnode;
6433 adapter = (struct adapter *)node.sysctl_data;
6434 hw = &adapter->hw;
6435
6436 if (hw->mac.type != ixgbe_mac_X540)
6437 return (0);
6438
6439 node.sysctl_data = &fire;
6440 error = sysctl_lookup(SYSCTLFN_CALL(&node));
6441 if ((error) || (newp == NULL))
6442 return (error);
6443
6444 if (fire) {
6445 u32 reg = IXGBE_READ_REG(hw, IXGBE_EICS);
6446 reg |= IXGBE_EICR_TS;
6447 IXGBE_WRITE_REG(hw, IXGBE_EICS, reg);
6448 }
6449
6450 return (0);
6451 }
6452
6453 /*
6454 ** Enable the hardware to drop packets when the buffer is
6455 ** full. This is useful when multiqueue,so that no single
6456 ** queue being full stalls the entire RX engine. We only
6457 ** enable this when Multiqueue AND when Flow Control is
6458 ** disabled.
6459 */
6460 static void
6461 ixgbe_enable_rx_drop(struct adapter *adapter)
6462 {
6463 struct ixgbe_hw *hw = &adapter->hw;
6464
6465 for (int i = 0; i < adapter->num_queues; i++) {
6466 u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
6467 srrctl |= IXGBE_SRRCTL_DROP_EN;
6468 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
6469 }
6470 }
6471
6472 static void
6473 ixgbe_disable_rx_drop(struct adapter *adapter)
6474 {
6475 struct ixgbe_hw *hw = &adapter->hw;
6476
6477 for (int i = 0; i < adapter->num_queues; i++) {
6478 u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
6479 srrctl &= ~IXGBE_SRRCTL_DROP_EN;
6480 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
6481 }
6482 }
6483