ixgbe.c revision 1.28 1 /******************************************************************************
2
3 Copyright (c) 2001-2013, Intel Corporation
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*
34 * Copyright (c) 2011 The NetBSD Foundation, Inc.
35 * All rights reserved.
36 *
37 * This code is derived from software contributed to The NetBSD Foundation
38 * by Coyote Point Systems, Inc.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
50 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
51 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
52 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
53 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
54 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
55 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
56 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
57 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
58 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
59 * POSSIBILITY OF SUCH DAMAGE.
60 */
61 /*$FreeBSD: head/sys/dev/ixgbe/ixgbe.c 250108 2013-04-30 16:18:29Z luigi $*/
62 /*$NetBSD: ixgbe.c,v 1.28 2015/04/24 07:00:51 msaitoh Exp $*/
63
64 #include "opt_inet.h"
65 #include "opt_inet6.h"
66
67 #include "ixgbe.h"
68
69 /*********************************************************************
70 * Set this to one to display debug statistics
71 *********************************************************************/
72 int ixgbe_display_debug_stats = 0;
73
74 /*********************************************************************
75 * Driver version
76 *********************************************************************/
77 char ixgbe_driver_version[] = "2.5.8 - HEAD";
78
79 /*********************************************************************
80 * PCI Device ID Table
81 *
82 * Used by probe to select devices to load on
83 * Last field stores an index into ixgbe_strings
84 * Last entry must be all 0s
85 *
86 * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
87 *********************************************************************/
88
89 static ixgbe_vendor_info_t ixgbe_vendor_info_array[] =
90 {
91 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, 0, 0, 0},
92 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, 0, 0, 0},
93 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, 0, 0, 0},
94 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, 0, 0, 0},
95 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2, 0, 0, 0},
96 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, 0, 0, 0},
97 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, 0, 0, 0},
98 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, 0, 0, 0},
99 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, 0, 0, 0},
100 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, 0, 0, 0},
101 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, 0, 0, 0},
102 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, 0, 0, 0},
103 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ, 0, 0, 0},
104 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, 0, 0, 0},
105 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, 0, 0, 0},
106 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, 0, 0, 0},
107 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, 0, 0, 0},
108 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, 0, 0, 0},
109 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE, 0, 0, 0},
110 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2, 0, 0, 0},
111 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, 0, 0, 0},
112 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, 0, 0, 0},
113 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF_QP, 0, 0, 0},
114 {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, 0, 0, 0},
115 /* required last entry */
116 {0, 0, 0, 0, 0}
117 };
118
119 /*********************************************************************
120 * Table of branding strings
121 *********************************************************************/
122
123 static const char *ixgbe_strings[] = {
124 "Intel(R) PRO/10GbE PCI-Express Network Driver"
125 };
126
127 /*********************************************************************
128 * Function prototypes
129 *********************************************************************/
130 static int ixgbe_probe(device_t, cfdata_t, void *);
131 static void ixgbe_attach(device_t, device_t, void *);
132 static int ixgbe_detach(device_t, int);
133 #if 0
134 static int ixgbe_shutdown(device_t);
135 #endif
136 #if IXGBE_LEGACY_TX
137 static void ixgbe_start(struct ifnet *);
138 static void ixgbe_start_locked(struct tx_ring *, struct ifnet *);
139 #else
140 static int ixgbe_mq_start(struct ifnet *, struct mbuf *);
141 static int ixgbe_mq_start_locked(struct ifnet *,
142 struct tx_ring *, struct mbuf *);
143 static void ixgbe_qflush(struct ifnet *);
144 static void ixgbe_deferred_mq_start(void *);
145 #endif
146 static int ixgbe_ioctl(struct ifnet *, u_long, void *);
147 static void ixgbe_ifstop(struct ifnet *, int);
148 static int ixgbe_init(struct ifnet *);
149 static void ixgbe_init_locked(struct adapter *);
150 static void ixgbe_stop(void *);
151 static void ixgbe_media_status(struct ifnet *, struct ifmediareq *);
152 static int ixgbe_media_change(struct ifnet *);
153 static void ixgbe_identify_hardware(struct adapter *);
154 static int ixgbe_allocate_pci_resources(struct adapter *,
155 const struct pci_attach_args *);
156 static int ixgbe_allocate_msix(struct adapter *,
157 const struct pci_attach_args *);
158 static int ixgbe_allocate_legacy(struct adapter *,
159 const struct pci_attach_args *);
160 static int ixgbe_allocate_queues(struct adapter *);
161 static int ixgbe_setup_msix(struct adapter *);
162 static void ixgbe_free_pci_resources(struct adapter *);
163 static void ixgbe_local_timer(void *);
164 static int ixgbe_setup_interface(device_t, struct adapter *);
165 static void ixgbe_config_link(struct adapter *);
166
167 static int ixgbe_allocate_transmit_buffers(struct tx_ring *);
168 static int ixgbe_setup_transmit_structures(struct adapter *);
169 static void ixgbe_setup_transmit_ring(struct tx_ring *);
170 static void ixgbe_initialize_transmit_units(struct adapter *);
171 static void ixgbe_free_transmit_structures(struct adapter *);
172 static void ixgbe_free_transmit_buffers(struct tx_ring *);
173
174 static int ixgbe_allocate_receive_buffers(struct rx_ring *);
175 static int ixgbe_setup_receive_structures(struct adapter *);
176 static int ixgbe_setup_receive_ring(struct rx_ring *);
177 static void ixgbe_initialize_receive_units(struct adapter *);
178 static void ixgbe_free_receive_structures(struct adapter *);
179 static void ixgbe_free_receive_buffers(struct rx_ring *);
180 static void ixgbe_setup_hw_rsc(struct rx_ring *);
181
182 static void ixgbe_enable_intr(struct adapter *);
183 static void ixgbe_disable_intr(struct adapter *);
184 static void ixgbe_update_stats_counters(struct adapter *);
185 static bool ixgbe_txeof(struct tx_ring *);
186 static bool ixgbe_rxeof(struct ix_queue *);
187 static void ixgbe_rx_checksum(u32, struct mbuf *, u32,
188 struct ixgbe_hw_stats *);
189 static void ixgbe_set_promisc(struct adapter *);
190 static void ixgbe_set_multi(struct adapter *);
191 static void ixgbe_update_link_status(struct adapter *);
192 static void ixgbe_refresh_mbufs(struct rx_ring *, int);
193 static int ixgbe_xmit(struct tx_ring *, struct mbuf *);
194 static int ixgbe_set_flowcntl(SYSCTLFN_PROTO);
195 static int ixgbe_set_advertise(SYSCTLFN_PROTO);
196 static int ixgbe_set_thermal_test(SYSCTLFN_PROTO);
197 static int ixgbe_dma_malloc(struct adapter *, bus_size_t,
198 struct ixgbe_dma_alloc *, int);
199 static void ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
200 static int ixgbe_tx_ctx_setup(struct tx_ring *,
201 struct mbuf *, u32 *, u32 *);
202 static int ixgbe_tso_setup(struct tx_ring *,
203 struct mbuf *, u32 *, u32 *);
204 static void ixgbe_set_ivar(struct adapter *, u8, u8, s8);
205 static void ixgbe_configure_ivars(struct adapter *);
206 static u8 * ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *);
207
208 static void ixgbe_setup_vlan_hw_support(struct adapter *);
209 #if 0
210 static void ixgbe_register_vlan(void *, struct ifnet *, u16);
211 static void ixgbe_unregister_vlan(void *, struct ifnet *, u16);
212 #endif
213
214 static void ixgbe_add_hw_stats(struct adapter *adapter);
215
216 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
217 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
218 struct mbuf *, u32);
219
220 static void ixgbe_enable_rx_drop(struct adapter *);
221 static void ixgbe_disable_rx_drop(struct adapter *);
222
223 /* Support for pluggable optic modules */
224 static bool ixgbe_sfp_probe(struct adapter *);
225 static void ixgbe_setup_optics(struct adapter *);
226
227 /* Legacy (single vector interrupt handler */
228 static int ixgbe_legacy_irq(void *);
229
230 #if defined(NETBSD_MSI_OR_MSIX)
231 /* The MSI/X Interrupt handlers */
232 static void ixgbe_msix_que(void *);
233 static void ixgbe_msix_link(void *);
234 #endif
235
236 /* Software interrupts for deferred work */
237 static void ixgbe_handle_que(void *);
238 static void ixgbe_handle_link(void *);
239 static void ixgbe_handle_msf(void *);
240 static void ixgbe_handle_mod(void *);
241
242 const struct sysctlnode *ixgbe_sysctl_instance(struct adapter *);
243 static ixgbe_vendor_info_t *ixgbe_lookup(const struct pci_attach_args *);
244
245 #ifdef IXGBE_FDIR
246 static void ixgbe_atr(struct tx_ring *, struct mbuf *);
247 static void ixgbe_reinit_fdir(void *, int);
248 #endif
249
250 /*********************************************************************
251 * FreeBSD Device Interface Entry Points
252 *********************************************************************/
253
254 CFATTACH_DECL3_NEW(ixg, sizeof(struct adapter),
255 ixgbe_probe, ixgbe_attach, ixgbe_detach, NULL, NULL, NULL,
256 DVF_DETACH_SHUTDOWN);
257
258 #if 0
259 devclass_t ixgbe_devclass;
260 DRIVER_MODULE(ixgbe, pci, ixgbe_driver, ixgbe_devclass, 0, 0);
261
262 MODULE_DEPEND(ixgbe, pci, 1, 1, 1);
263 MODULE_DEPEND(ixgbe, ether, 1, 1, 1);
264 #endif
265
266 /*
267 ** TUNEABLE PARAMETERS:
268 */
269
270 /*
271 ** AIM: Adaptive Interrupt Moderation
272 ** which means that the interrupt rate
273 ** is varied over time based on the
274 ** traffic for that interrupt vector
275 */
276 static int ixgbe_enable_aim = TRUE;
277 #define TUNABLE_INT(__x, __y)
278 TUNABLE_INT("hw.ixgbe.enable_aim", &ixgbe_enable_aim);
279
280 static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY);
281 TUNABLE_INT("hw.ixgbe.max_interrupt_rate", &ixgbe_max_interrupt_rate);
282
283 /* How many packets rxeof tries to clean at a time */
284 static int ixgbe_rx_process_limit = 256;
285 TUNABLE_INT("hw.ixgbe.rx_process_limit", &ixgbe_rx_process_limit);
286
287 /* How many packets txeof tries to clean at a time */
288 static int ixgbe_tx_process_limit = 256;
289 TUNABLE_INT("hw.ixgbe.tx_process_limit", &ixgbe_tx_process_limit);
290
291 /*
292 ** Smart speed setting, default to on
293 ** this only works as a compile option
294 ** right now as its during attach, set
295 ** this to 'ixgbe_smart_speed_off' to
296 ** disable.
297 */
298 static int ixgbe_smart_speed = ixgbe_smart_speed_on;
299
300 /*
301 * MSIX should be the default for best performance,
302 * but this allows it to be forced off for testing.
303 */
304 static int ixgbe_enable_msix = 1;
305 TUNABLE_INT("hw.ixgbe.enable_msix", &ixgbe_enable_msix);
306
307 #if defined(NETBSD_MSI_OR_MSIX)
308 /*
309 * Number of Queues, can be set to 0,
310 * it then autoconfigures based on the
311 * number of cpus with a max of 8. This
312 * can be overriden manually here.
313 */
314 static int ixgbe_num_queues = 0;
315 TUNABLE_INT("hw.ixgbe.num_queues", &ixgbe_num_queues);
316 #endif
317
318 /*
319 ** Number of TX descriptors per ring,
320 ** setting higher than RX as this seems
321 ** the better performing choice.
322 */
323 static int ixgbe_txd = PERFORM_TXD;
324 TUNABLE_INT("hw.ixgbe.txd", &ixgbe_txd);
325
326 /* Number of RX descriptors per ring */
327 static int ixgbe_rxd = PERFORM_RXD;
328 TUNABLE_INT("hw.ixgbe.rxd", &ixgbe_rxd);
329
330 /*
331 ** HW RSC control:
332 ** this feature only works with
333 ** IPv4, and only on 82599 and later.
334 ** Also this will cause IP forwarding to
335 ** fail and that can't be controlled by
336 ** the stack as LRO can. For all these
337 ** reasons I've deemed it best to leave
338 ** this off and not bother with a tuneable
339 ** interface, this would need to be compiled
340 ** to enable.
341 */
342 static bool ixgbe_rsc_enable = FALSE;
343
344 /* Keep running tab on them for sanity check */
345 static int ixgbe_total_ports;
346
347 #ifdef IXGBE_FDIR
348 /*
349 ** For Flow Director: this is the
350 ** number of TX packets we sample
351 ** for the filter pool, this means
352 ** every 20th packet will be probed.
353 **
354 ** This feature can be disabled by
355 ** setting this to 0.
356 */
357 static int atr_sample_rate = 20;
358 /*
359 ** Flow Director actually 'steals'
360 ** part of the packet buffer as its
361 ** filter pool, this variable controls
362 ** how much it uses:
363 ** 0 = 64K, 1 = 128K, 2 = 256K
364 */
365 static int fdir_pballoc = 1;
366 #endif
367
368 #ifdef DEV_NETMAP
369 /*
370 * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
371 * be a reference on how to implement netmap support in a driver.
372 * Additional comments are in ixgbe_netmap.h .
373 *
374 * <dev/netmap/ixgbe_netmap.h> contains functions for netmap support
375 * that extend the standard driver.
376 */
377 #include <dev/netmap/ixgbe_netmap.h>
378 #endif /* DEV_NETMAP */
379
380 /*********************************************************************
381 * Device identification routine
382 *
383 * ixgbe_probe determines if the driver should be loaded on
384 * adapter based on PCI vendor/device id of the adapter.
385 *
386 * return 1 on success, 0 on failure
387 *********************************************************************/
388
389 static int
390 ixgbe_probe(device_t dev, cfdata_t cf, void *aux)
391 {
392 const struct pci_attach_args *pa = aux;
393
394 return (ixgbe_lookup(pa) != NULL) ? 1 : 0;
395 }
396
397 static ixgbe_vendor_info_t *
398 ixgbe_lookup(const struct pci_attach_args *pa)
399 {
400 pcireg_t subid;
401 ixgbe_vendor_info_t *ent;
402
403 INIT_DEBUGOUT("ixgbe_probe: begin");
404
405 if (PCI_VENDOR(pa->pa_id) != IXGBE_INTEL_VENDOR_ID)
406 return NULL;
407
408 subid = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_SUBSYS_ID_REG);
409
410 for (ent = ixgbe_vendor_info_array; ent->vendor_id != 0; ent++) {
411 if (PCI_VENDOR(pa->pa_id) == ent->vendor_id &&
412 PCI_PRODUCT(pa->pa_id) == ent->device_id &&
413
414 (PCI_SUBSYS_VENDOR(subid) == ent->subvendor_id ||
415 ent->subvendor_id == 0) &&
416
417 (PCI_SUBSYS_ID(subid) == ent->subdevice_id ||
418 ent->subdevice_id == 0)) {
419 ++ixgbe_total_ports;
420 return ent;
421 }
422 }
423 return NULL;
424 }
425
426
427 static void
428 ixgbe_sysctl_attach(struct adapter *adapter)
429 {
430 struct sysctllog **log;
431 const struct sysctlnode *rnode, *cnode;
432 device_t dev;
433
434 dev = adapter->dev;
435 log = &adapter->sysctllog;
436
437 if ((rnode = ixgbe_sysctl_instance(adapter)) == NULL) {
438 aprint_error_dev(dev, "could not create sysctl root\n");
439 return;
440 }
441
442 if (sysctl_createv(log, 0, &rnode, &cnode,
443 CTLFLAG_READONLY, CTLTYPE_INT,
444 "num_rx_desc", SYSCTL_DESCR("Number of rx descriptors"),
445 NULL, 0, &adapter->num_rx_desc, 0, CTL_CREATE, CTL_EOL) != 0)
446 aprint_error_dev(dev, "could not create sysctl\n");
447
448 if (sysctl_createv(log, 0, &rnode, &cnode,
449 CTLFLAG_READONLY, CTLTYPE_INT,
450 "num_queues", SYSCTL_DESCR("Number of queues"),
451 NULL, 0, &adapter->num_queues, 0, CTL_CREATE, CTL_EOL) != 0)
452 aprint_error_dev(dev, "could not create sysctl\n");
453
454 if (sysctl_createv(log, 0, &rnode, &cnode,
455 CTLFLAG_READWRITE, CTLTYPE_INT,
456 "fc", SYSCTL_DESCR("Flow Control"),
457 ixgbe_set_flowcntl, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0)
458 aprint_error_dev(dev, "could not create sysctl\n");
459
460 /* XXX This is an *instance* sysctl controlling a *global* variable.
461 * XXX It's that way in the FreeBSD driver that this derives from.
462 */
463 if (sysctl_createv(log, 0, &rnode, &cnode,
464 CTLFLAG_READWRITE, CTLTYPE_INT,
465 "enable_aim", SYSCTL_DESCR("Interrupt Moderation"),
466 NULL, 0, &ixgbe_enable_aim, 0, CTL_CREATE, CTL_EOL) != 0)
467 aprint_error_dev(dev, "could not create sysctl\n");
468
469 if (sysctl_createv(log, 0, &rnode, &cnode,
470 CTLFLAG_READWRITE, CTLTYPE_INT,
471 "advertise_speed", SYSCTL_DESCR("Link Speed"),
472 ixgbe_set_advertise, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0)
473 aprint_error_dev(dev, "could not create sysctl\n");
474
475 if (sysctl_createv(log, 0, &rnode, &cnode,
476 CTLFLAG_READWRITE, CTLTYPE_INT,
477 "ts", SYSCTL_DESCR("Thermal Test"),
478 ixgbe_set_thermal_test, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0)
479 aprint_error_dev(dev, "could not create sysctl\n");
480 }
481
482 /*********************************************************************
483 * Device initialization routine
484 *
485 * The attach entry point is called when the driver is being loaded.
486 * This routine identifies the type of hardware, allocates all resources
487 * and initializes the hardware.
488 *
489 * return 0 on success, positive on failure
490 *********************************************************************/
491
492 static void
493 ixgbe_attach(device_t parent, device_t dev, void *aux)
494 {
495 struct adapter *adapter;
496 struct ixgbe_hw *hw;
497 int error = 0;
498 u16 csum;
499 u32 ctrl_ext;
500 ixgbe_vendor_info_t *ent;
501 const struct pci_attach_args *pa = aux;
502
503 INIT_DEBUGOUT("ixgbe_attach: begin");
504
505 /* Allocate, clear, and link in our adapter structure */
506 adapter = device_private(dev);
507 adapter->dev = adapter->osdep.dev = dev;
508 hw = &adapter->hw;
509 adapter->osdep.pc = pa->pa_pc;
510 adapter->osdep.tag = pa->pa_tag;
511 adapter->osdep.dmat = pa->pa_dmat;
512
513 ent = ixgbe_lookup(pa);
514
515 KASSERT(ent != NULL);
516
517 aprint_normal(": %s, Version - %s\n",
518 ixgbe_strings[ent->index], ixgbe_driver_version);
519
520 /* Core Lock Init*/
521 IXGBE_CORE_LOCK_INIT(adapter, device_xname(dev));
522
523 /* SYSCTL APIs */
524
525 ixgbe_sysctl_attach(adapter);
526
527 /* Set up the timer callout */
528 callout_init(&adapter->timer, 0);
529
530 /* Determine hardware revision */
531 ixgbe_identify_hardware(adapter);
532
533 /* Do base PCI setup - map BAR0 */
534 if (ixgbe_allocate_pci_resources(adapter, pa)) {
535 aprint_error_dev(dev, "Allocation of PCI resources failed\n");
536 error = ENXIO;
537 goto err_out;
538 }
539
540 /* Do descriptor calc and sanity checks */
541 if (((ixgbe_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 ||
542 ixgbe_txd < MIN_TXD || ixgbe_txd > MAX_TXD) {
543 aprint_error_dev(dev, "TXD config issue, using default!\n");
544 adapter->num_tx_desc = DEFAULT_TXD;
545 } else
546 adapter->num_tx_desc = ixgbe_txd;
547
548 /*
549 ** With many RX rings it is easy to exceed the
550 ** system mbuf allocation. Tuning nmbclusters
551 ** can alleviate this.
552 */
553 if (nmbclusters > 0 ) {
554 int s;
555 s = (ixgbe_rxd * adapter->num_queues) * ixgbe_total_ports;
556 if (s > nmbclusters) {
557 aprint_error_dev(dev, "RX Descriptors exceed "
558 "system mbuf max, using default instead!\n");
559 ixgbe_rxd = DEFAULT_RXD;
560 }
561 }
562
563 if (((ixgbe_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 ||
564 ixgbe_rxd < MIN_TXD || ixgbe_rxd > MAX_TXD) {
565 aprint_error_dev(dev, "RXD config issue, using default!\n");
566 adapter->num_rx_desc = DEFAULT_RXD;
567 } else
568 adapter->num_rx_desc = ixgbe_rxd;
569
570 /* Allocate our TX/RX Queues */
571 if (ixgbe_allocate_queues(adapter)) {
572 error = ENOMEM;
573 goto err_out;
574 }
575
576 /* Allocate multicast array memory. */
577 adapter->mta = malloc(sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
578 MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
579 if (adapter->mta == NULL) {
580 aprint_error_dev(dev, "Cannot allocate multicast setup array\n");
581 error = ENOMEM;
582 goto err_late;
583 }
584
585 /* Initialize the shared code */
586 error = ixgbe_init_shared_code(hw);
587 if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
588 /*
589 ** No optics in this port, set up
590 ** so the timer routine will probe
591 ** for later insertion.
592 */
593 adapter->sfp_probe = TRUE;
594 error = 0;
595 } else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
596 aprint_error_dev(dev,"Unsupported SFP+ module detected!\n");
597 error = EIO;
598 goto err_late;
599 } else if (error) {
600 aprint_error_dev(dev,"Unable to initialize the shared code\n");
601 error = EIO;
602 goto err_late;
603 }
604
605 /* Make sure we have a good EEPROM before we read from it */
606 if (ixgbe_validate_eeprom_checksum(&adapter->hw, &csum) < 0) {
607 aprint_error_dev(dev,"The EEPROM Checksum Is Not Valid\n");
608 error = EIO;
609 goto err_late;
610 }
611
612 error = ixgbe_init_hw(hw);
613 switch (error) {
614 case IXGBE_ERR_EEPROM_VERSION:
615 aprint_error_dev(dev, "This device is a pre-production adapter/"
616 "LOM. Please be aware there may be issues associated "
617 "with your hardware.\n If you are experiencing problems "
618 "please contact your Intel or hardware representative "
619 "who provided you with this hardware.\n");
620 break;
621 case IXGBE_ERR_SFP_NOT_SUPPORTED:
622 aprint_error_dev(dev,"Unsupported SFP+ Module\n");
623 error = EIO;
624 aprint_error_dev(dev,"Hardware Initialization Failure\n");
625 goto err_late;
626 case IXGBE_ERR_SFP_NOT_PRESENT:
627 device_printf(dev,"No SFP+ Module found\n");
628 /* falls thru */
629 default:
630 break;
631 }
632
633 /* Detect and set physical type */
634 ixgbe_setup_optics(adapter);
635
636 if ((adapter->msix > 1) && (ixgbe_enable_msix))
637 error = ixgbe_allocate_msix(adapter, pa);
638 else
639 error = ixgbe_allocate_legacy(adapter, pa);
640 if (error)
641 goto err_late;
642
643 /* Setup OS specific network interface */
644 if (ixgbe_setup_interface(dev, adapter) != 0)
645 goto err_late;
646
647 /* Initialize statistics */
648 ixgbe_update_stats_counters(adapter);
649
650 /* Print PCIE bus type/speed/width info */
651 ixgbe_get_bus_info(hw);
652 aprint_normal_dev(dev,"PCI Express Bus: Speed %s %s\n",
653 ((hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0Gb/s":
654 (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5Gb/s":"Unknown"),
655 (hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" :
656 (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" :
657 (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" :
658 ("Unknown"));
659
660 if ((hw->bus.width <= ixgbe_bus_width_pcie_x4) &&
661 (hw->bus.speed == ixgbe_bus_speed_2500)) {
662 aprint_error_dev(dev, "PCI-Express bandwidth available"
663 " for this card\n is not sufficient for"
664 " optimal performance.\n");
665 aprint_error_dev(dev, "For optimal performance a x8 "
666 "PCIE, or x4 PCIE 2 slot is required.\n");
667 }
668
669 /* Set an initial default flow control value */
670 adapter->fc = ixgbe_fc_full;
671
672 /* let hardware know driver is loaded */
673 ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
674 ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
675 IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
676
677 ixgbe_add_hw_stats(adapter);
678
679 #ifdef DEV_NETMAP
680 ixgbe_netmap_attach(adapter);
681 #endif /* DEV_NETMAP */
682 INIT_DEBUGOUT("ixgbe_attach: end");
683 return;
684 err_late:
685 ixgbe_free_transmit_structures(adapter);
686 ixgbe_free_receive_structures(adapter);
687 err_out:
688 if (adapter->ifp != NULL)
689 if_free(adapter->ifp);
690 ixgbe_free_pci_resources(adapter);
691 if (adapter->mta != NULL)
692 free(adapter->mta, M_DEVBUF);
693 return;
694
695 }
696
697 /*********************************************************************
698 * Device removal routine
699 *
700 * The detach entry point is called when the driver is being removed.
701 * This routine stops the adapter and deallocates all the resources
702 * that were allocated for driver operation.
703 *
704 * return 0 on success, positive on failure
705 *********************************************************************/
706
707 static int
708 ixgbe_detach(device_t dev, int flags)
709 {
710 struct adapter *adapter = device_private(dev);
711 struct rx_ring *rxr = adapter->rx_rings;
712 struct ixgbe_hw_stats *stats = &adapter->stats;
713 struct ix_queue *que = adapter->queues;
714 struct tx_ring *txr = adapter->tx_rings;
715 u32 ctrl_ext;
716
717 INIT_DEBUGOUT("ixgbe_detach: begin");
718
719 /* Make sure VLANs are not using driver */
720 if (!VLAN_ATTACHED(&adapter->osdep.ec))
721 ; /* nothing to do: no VLANs */
722 else if ((flags & (DETACH_SHUTDOWN|DETACH_FORCE)) != 0)
723 vlan_ifdetach(adapter->ifp);
724 else {
725 aprint_error_dev(dev, "VLANs in use\n");
726 return EBUSY;
727 }
728
729 IXGBE_CORE_LOCK(adapter);
730 ixgbe_stop(adapter);
731 IXGBE_CORE_UNLOCK(adapter);
732
733 for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
734 #ifndef IXGBE_LEGACY_TX
735 softint_disestablish(txr->txq_si);
736 #endif
737 softint_disestablish(que->que_si);
738 }
739
740 /* Drain the Link queue */
741 softint_disestablish(adapter->link_si);
742 softint_disestablish(adapter->mod_si);
743 softint_disestablish(adapter->msf_si);
744 #ifdef IXGBE_FDIR
745 softint_disestablish(adapter->fdir_si);
746 #endif
747
748 /* let hardware know driver is unloading */
749 ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
750 ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
751 IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext);
752
753 ether_ifdetach(adapter->ifp);
754 callout_halt(&adapter->timer, NULL);
755 #ifdef DEV_NETMAP
756 netmap_detach(adapter->ifp);
757 #endif /* DEV_NETMAP */
758 ixgbe_free_pci_resources(adapter);
759 #if 0 /* XXX the NetBSD port is probably missing something here */
760 bus_generic_detach(dev);
761 #endif
762 if_detach(adapter->ifp);
763
764 sysctl_teardown(&adapter->sysctllog);
765 evcnt_detach(&adapter->handleq);
766 evcnt_detach(&adapter->req);
767 evcnt_detach(&adapter->morerx);
768 evcnt_detach(&adapter->moretx);
769 evcnt_detach(&adapter->txloops);
770 evcnt_detach(&adapter->efbig_tx_dma_setup);
771 evcnt_detach(&adapter->m_defrag_failed);
772 evcnt_detach(&adapter->efbig2_tx_dma_setup);
773 evcnt_detach(&adapter->einval_tx_dma_setup);
774 evcnt_detach(&adapter->other_tx_dma_setup);
775 evcnt_detach(&adapter->eagain_tx_dma_setup);
776 evcnt_detach(&adapter->enomem_tx_dma_setup);
777 evcnt_detach(&adapter->watchdog_events);
778 evcnt_detach(&adapter->tso_err);
779 evcnt_detach(&adapter->link_irq);
780
781 txr = adapter->tx_rings;
782 for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
783 evcnt_detach(&txr->no_desc_avail);
784 evcnt_detach(&txr->total_packets);
785 evcnt_detach(&txr->tso_tx);
786
787 if (i < __arraycount(adapter->stats.mpc)) {
788 evcnt_detach(&adapter->stats.mpc[i]);
789 }
790 if (i < __arraycount(adapter->stats.pxontxc)) {
791 evcnt_detach(&adapter->stats.pxontxc[i]);
792 evcnt_detach(&adapter->stats.pxonrxc[i]);
793 evcnt_detach(&adapter->stats.pxofftxc[i]);
794 evcnt_detach(&adapter->stats.pxoffrxc[i]);
795 evcnt_detach(&adapter->stats.pxon2offc[i]);
796 }
797 if (i < __arraycount(adapter->stats.qprc)) {
798 evcnt_detach(&adapter->stats.qprc[i]);
799 evcnt_detach(&adapter->stats.qptc[i]);
800 evcnt_detach(&adapter->stats.qbrc[i]);
801 evcnt_detach(&adapter->stats.qbtc[i]);
802 evcnt_detach(&adapter->stats.qprdc[i]);
803 }
804
805 evcnt_detach(&rxr->rx_packets);
806 evcnt_detach(&rxr->rx_bytes);
807 evcnt_detach(&rxr->no_jmbuf);
808 evcnt_detach(&rxr->rx_discarded);
809 evcnt_detach(&rxr->rx_irq);
810 }
811 evcnt_detach(&stats->ipcs);
812 evcnt_detach(&stats->l4cs);
813 evcnt_detach(&stats->ipcs_bad);
814 evcnt_detach(&stats->l4cs_bad);
815 evcnt_detach(&stats->intzero);
816 evcnt_detach(&stats->legint);
817 evcnt_detach(&stats->crcerrs);
818 evcnt_detach(&stats->illerrc);
819 evcnt_detach(&stats->errbc);
820 evcnt_detach(&stats->mspdc);
821 evcnt_detach(&stats->mlfc);
822 evcnt_detach(&stats->mrfc);
823 evcnt_detach(&stats->rlec);
824 evcnt_detach(&stats->lxontxc);
825 evcnt_detach(&stats->lxonrxc);
826 evcnt_detach(&stats->lxofftxc);
827 evcnt_detach(&stats->lxoffrxc);
828
829 /* Packet Reception Stats */
830 evcnt_detach(&stats->tor);
831 evcnt_detach(&stats->gorc);
832 evcnt_detach(&stats->tpr);
833 evcnt_detach(&stats->gprc);
834 evcnt_detach(&stats->mprc);
835 evcnt_detach(&stats->bprc);
836 evcnt_detach(&stats->prc64);
837 evcnt_detach(&stats->prc127);
838 evcnt_detach(&stats->prc255);
839 evcnt_detach(&stats->prc511);
840 evcnt_detach(&stats->prc1023);
841 evcnt_detach(&stats->prc1522);
842 evcnt_detach(&stats->ruc);
843 evcnt_detach(&stats->rfc);
844 evcnt_detach(&stats->roc);
845 evcnt_detach(&stats->rjc);
846 evcnt_detach(&stats->mngprc);
847 evcnt_detach(&stats->xec);
848
849 /* Packet Transmission Stats */
850 evcnt_detach(&stats->gotc);
851 evcnt_detach(&stats->tpt);
852 evcnt_detach(&stats->gptc);
853 evcnt_detach(&stats->bptc);
854 evcnt_detach(&stats->mptc);
855 evcnt_detach(&stats->mngptc);
856 evcnt_detach(&stats->ptc64);
857 evcnt_detach(&stats->ptc127);
858 evcnt_detach(&stats->ptc255);
859 evcnt_detach(&stats->ptc511);
860 evcnt_detach(&stats->ptc1023);
861 evcnt_detach(&stats->ptc1522);
862
863 ixgbe_free_transmit_structures(adapter);
864 ixgbe_free_receive_structures(adapter);
865 free(adapter->mta, M_DEVBUF);
866
867 IXGBE_CORE_LOCK_DESTROY(adapter);
868 return (0);
869 }
870
871 /*********************************************************************
872 *
873 * Shutdown entry point
874 *
875 **********************************************************************/
876
877 #if 0 /* XXX NetBSD ought to register something like this through pmf(9) */
878 static int
879 ixgbe_shutdown(device_t dev)
880 {
881 struct adapter *adapter = device_private(dev);
882 IXGBE_CORE_LOCK(adapter);
883 ixgbe_stop(adapter);
884 IXGBE_CORE_UNLOCK(adapter);
885 return (0);
886 }
887 #endif
888
889
890 #ifdef IXGBE_LEGACY_TX
891 /*********************************************************************
892 * Transmit entry point
893 *
894 * ixgbe_start is called by the stack to initiate a transmit.
895 * The driver will remain in this routine as long as there are
896 * packets to transmit and transmit resources are available.
897 * In case resources are not available stack is notified and
898 * the packet is requeued.
899 **********************************************************************/
900
901 static void
902 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
903 {
904 int rc;
905 struct mbuf *m_head;
906 struct adapter *adapter = txr->adapter;
907
908 IXGBE_TX_LOCK_ASSERT(txr);
909
910 if ((ifp->if_flags & IFF_RUNNING) == 0)
911 return;
912 if (!adapter->link_active)
913 return;
914
915 while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
916 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
917 break;
918
919 IFQ_POLL(&ifp->if_snd, m_head);
920 if (m_head == NULL)
921 break;
922
923 if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
924 break;
925 }
926 IFQ_DEQUEUE(&ifp->if_snd, m_head);
927 if (rc == EFBIG) {
928 struct mbuf *mtmp;
929
930 if ((mtmp = m_defrag(m_head, M_NOWAIT)) != NULL) {
931 m_head = mtmp;
932 rc = ixgbe_xmit(txr, m_head);
933 if (rc != 0)
934 adapter->efbig2_tx_dma_setup.ev_count++;
935 } else
936 adapter->m_defrag_failed.ev_count++;
937 }
938 if (rc != 0) {
939 m_freem(m_head);
940 continue;
941 }
942
943 /* Send a copy of the frame to the BPF listener */
944 bpf_mtap(ifp, m_head);
945
946 /* Set watchdog on */
947 getmicrotime(&txr->watchdog_time);
948 txr->queue_status = IXGBE_QUEUE_WORKING;
949
950 }
951 return;
952 }
953
954 /*
955 * Legacy TX start - called by the stack, this
956 * always uses the first tx ring, and should
957 * not be used with multiqueue tx enabled.
958 */
959 static void
960 ixgbe_start(struct ifnet *ifp)
961 {
962 struct adapter *adapter = ifp->if_softc;
963 struct tx_ring *txr = adapter->tx_rings;
964
965 if (ifp->if_flags & IFF_RUNNING) {
966 IXGBE_TX_LOCK(txr);
967 ixgbe_start_locked(txr, ifp);
968 IXGBE_TX_UNLOCK(txr);
969 }
970 return;
971 }
972
973 #else /* ! IXGBE_LEGACY_TX */
974
975 /*
976 ** Multiqueue Transmit driver
977 **
978 */
979 static int
980 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
981 {
982 struct adapter *adapter = ifp->if_softc;
983 struct ix_queue *que;
984 struct tx_ring *txr;
985 int i = 0, err = 0;
986
987 /* Which queue to use */
988 if ((m->m_flags & M_FLOWID) != 0)
989 i = m->m_pkthdr.flowid % adapter->num_queues;
990 else
991 i = cpu_index(curcpu()) % adapter->num_queues;
992
993 txr = &adapter->tx_rings[i];
994 que = &adapter->queues[i];
995
996 if (IXGBE_TX_TRYLOCK(txr)) {
997 err = ixgbe_mq_start_locked(ifp, txr, m);
998 IXGBE_TX_UNLOCK(txr);
999 } else {
1000 err = drbr_enqueue(ifp, txr->br, m);
1001 softint_schedule(txr->txq_si);
1002 }
1003
1004 return (err);
1005 }
1006
1007 static int
1008 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
1009 {
1010 struct adapter *adapter = txr->adapter;
1011 struct mbuf *next;
1012 int enqueued, err = 0;
1013
1014 if (((ifp->if_flags & IFF_RUNNING) == 0) ||
1015 adapter->link_active == 0) {
1016 if (m != NULL)
1017 err = drbr_enqueue(ifp, txr->br, m);
1018 return (err);
1019 }
1020
1021 enqueued = 0;
1022 if (m != NULL) {
1023 err = drbr_enqueue(ifp, txr->br, m);
1024 if (err) {
1025 return (err);
1026 }
1027 }
1028
1029 /* Process the queue */
1030 while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1031 if ((err = ixgbe_xmit(txr, &next)) != 0) {
1032 if (next == NULL) {
1033 drbr_advance(ifp, txr->br);
1034 } else {
1035 drbr_putback(ifp, txr->br, next);
1036 }
1037 break;
1038 }
1039 drbr_advance(ifp, txr->br);
1040 enqueued++;
1041 /* Send a copy of the frame to the BPF listener */
1042 bpf_mtap(ifp, next);
1043 if ((ifp->if_flags & IFF_RUNNING) == 0)
1044 break;
1045 if (txr->tx_avail < IXGBE_TX_OP_THRESHOLD)
1046 ixgbe_txeof(txr);
1047 }
1048
1049 if (enqueued > 0) {
1050 /* Set watchdog on */
1051 txr->queue_status = IXGBE_QUEUE_WORKING;
1052 getmicrotime(&txr->watchdog_time);
1053 }
1054
1055 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
1056 ixgbe_txeof(txr);
1057
1058 return (err);
1059 }
1060
1061 /*
1062 * Called from a taskqueue to drain queued transmit packets.
1063 */
1064 static void
1065 ixgbe_deferred_mq_start(void *arg)
1066 {
1067 struct tx_ring *txr = arg;
1068 struct adapter *adapter = txr->adapter;
1069 struct ifnet *ifp = adapter->ifp;
1070
1071 IXGBE_TX_LOCK(txr);
1072 if (!drbr_empty(ifp, txr->br))
1073 ixgbe_mq_start_locked(ifp, txr, NULL);
1074 IXGBE_TX_UNLOCK(txr);
1075 }
1076
1077 /*
1078 ** Flush all ring buffers
1079 */
1080 static void
1081 ixgbe_qflush(struct ifnet *ifp)
1082 {
1083 struct adapter *adapter = ifp->if_softc;
1084 struct tx_ring *txr = adapter->tx_rings;
1085 struct mbuf *m;
1086
1087 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1088 IXGBE_TX_LOCK(txr);
1089 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1090 m_freem(m);
1091 IXGBE_TX_UNLOCK(txr);
1092 }
1093 if_qflush(ifp);
1094 }
1095 #endif /* IXGBE_LEGACY_TX */
1096
1097 static int
1098 ixgbe_ifflags_cb(struct ethercom *ec)
1099 {
1100 struct ifnet *ifp = &ec->ec_if;
1101 struct adapter *adapter = ifp->if_softc;
1102 int change = ifp->if_flags ^ adapter->if_flags, rc = 0;
1103
1104 IXGBE_CORE_LOCK(adapter);
1105
1106 if (change != 0)
1107 adapter->if_flags = ifp->if_flags;
1108
1109 if ((change & ~(IFF_CANTCHANGE|IFF_DEBUG)) != 0)
1110 rc = ENETRESET;
1111 else if ((change & (IFF_PROMISC | IFF_ALLMULTI)) != 0)
1112 ixgbe_set_promisc(adapter);
1113
1114 /* Set up VLAN support and filter */
1115 ixgbe_setup_vlan_hw_support(adapter);
1116
1117 IXGBE_CORE_UNLOCK(adapter);
1118
1119 return rc;
1120 }
1121
1122 /*********************************************************************
1123 * Ioctl entry point
1124 *
1125 * ixgbe_ioctl is called when the user wants to configure the
1126 * interface.
1127 *
1128 * return 0 on success, positive on failure
1129 **********************************************************************/
1130
1131 static int
1132 ixgbe_ioctl(struct ifnet * ifp, u_long command, void *data)
1133 {
1134 struct adapter *adapter = ifp->if_softc;
1135 struct ixgbe_hw *hw = &adapter->hw;
1136 struct ifcapreq *ifcr = data;
1137 struct ifreq *ifr = data;
1138 int error = 0;
1139 int l4csum_en;
1140 const int l4csum = IFCAP_CSUM_TCPv4_Rx|IFCAP_CSUM_UDPv4_Rx|
1141 IFCAP_CSUM_TCPv6_Rx|IFCAP_CSUM_UDPv6_Rx;
1142
1143 switch (command) {
1144 case SIOCSIFFLAGS:
1145 IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)");
1146 break;
1147 case SIOCADDMULTI:
1148 case SIOCDELMULTI:
1149 IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI");
1150 break;
1151 case SIOCSIFMEDIA:
1152 case SIOCGIFMEDIA:
1153 IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)");
1154 break;
1155 case SIOCSIFCAP:
1156 IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)");
1157 break;
1158 case SIOCSIFMTU:
1159 IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)");
1160 break;
1161 default:
1162 IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command);
1163 break;
1164 }
1165
1166 switch (command) {
1167 case SIOCSIFMEDIA:
1168 case SIOCGIFMEDIA:
1169 return ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1170 case SIOCGI2C:
1171 {
1172 struct ixgbe_i2c_req i2c;
1173 IOCTL_DEBUGOUT("ioctl: SIOCGI2C (Get I2C Data)");
1174 error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
1175 if (error)
1176 break;
1177 if ((i2c.dev_addr != 0xA0) || (i2c.dev_addr != 0xA2)){
1178 error = EINVAL;
1179 break;
1180 }
1181 hw->phy.ops.read_i2c_byte(hw, i2c.offset,
1182 i2c.dev_addr, i2c.data);
1183 error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
1184 break;
1185 }
1186 case SIOCSIFCAP:
1187 /* Layer-4 Rx checksum offload has to be turned on and
1188 * off as a unit.
1189 */
1190 l4csum_en = ifcr->ifcr_capenable & l4csum;
1191 if (l4csum_en != l4csum && l4csum_en != 0)
1192 return EINVAL;
1193 /*FALLTHROUGH*/
1194 case SIOCADDMULTI:
1195 case SIOCDELMULTI:
1196 case SIOCSIFFLAGS:
1197 case SIOCSIFMTU:
1198 default:
1199 if ((error = ether_ioctl(ifp, command, data)) != ENETRESET)
1200 return error;
1201 if ((ifp->if_flags & IFF_RUNNING) == 0)
1202 ;
1203 else if (command == SIOCSIFCAP || command == SIOCSIFMTU) {
1204 IXGBE_CORE_LOCK(adapter);
1205 ixgbe_init_locked(adapter);
1206 IXGBE_CORE_UNLOCK(adapter);
1207 } else if (command == SIOCADDMULTI || command == SIOCDELMULTI) {
1208 /*
1209 * Multicast list has changed; set the hardware filter
1210 * accordingly.
1211 */
1212 IXGBE_CORE_LOCK(adapter);
1213 ixgbe_disable_intr(adapter);
1214 ixgbe_set_multi(adapter);
1215 ixgbe_enable_intr(adapter);
1216 IXGBE_CORE_UNLOCK(adapter);
1217 }
1218 return 0;
1219 }
1220
1221 return error;
1222 }
1223
1224 /*********************************************************************
1225 * Init entry point
1226 *
1227 * This routine is used in two ways. It is used by the stack as
1228 * init entry point in network interface structure. It is also used
1229 * by the driver as a hw/sw initialization routine to get to a
1230 * consistent state.
1231 *
1232 * return 0 on success, positive on failure
1233 **********************************************************************/
1234 #define IXGBE_MHADD_MFS_SHIFT 16
1235
1236 static void
1237 ixgbe_init_locked(struct adapter *adapter)
1238 {
1239 struct ifnet *ifp = adapter->ifp;
1240 device_t dev = adapter->dev;
1241 struct ixgbe_hw *hw = &adapter->hw;
1242 u32 k, txdctl, mhadd, gpie;
1243 u32 rxdctl, rxctrl;
1244
1245 /* XXX check IFF_UP and IFF_RUNNING, power-saving state! */
1246
1247 KASSERT(mutex_owned(&adapter->core_mtx));
1248 INIT_DEBUGOUT("ixgbe_init: begin");
1249 hw->adapter_stopped = FALSE;
1250 ixgbe_stop_adapter(hw);
1251 callout_stop(&adapter->timer);
1252
1253 /* XXX I moved this here from the SIOCSIFMTU case in ixgbe_ioctl(). */
1254 adapter->max_frame_size =
1255 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1256
1257 /* reprogram the RAR[0] in case user changed it. */
1258 ixgbe_set_rar(hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
1259
1260 /* Get the latest mac address, User can use a LAA */
1261 memcpy(hw->mac.addr, CLLADDR(adapter->ifp->if_sadl),
1262 IXGBE_ETH_LENGTH_OF_ADDRESS);
1263 ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
1264 hw->addr_ctrl.rar_used_count = 1;
1265
1266 /* Prepare transmit descriptors and buffers */
1267 if (ixgbe_setup_transmit_structures(adapter)) {
1268 device_printf(dev,"Could not setup transmit structures\n");
1269 ixgbe_stop(adapter);
1270 return;
1271 }
1272
1273 ixgbe_init_hw(hw);
1274 ixgbe_initialize_transmit_units(adapter);
1275
1276 /* Setup Multicast table */
1277 ixgbe_set_multi(adapter);
1278
1279 /*
1280 ** Determine the correct mbuf pool
1281 ** for doing jumbo frames
1282 */
1283 if (adapter->max_frame_size <= 2048)
1284 adapter->rx_mbuf_sz = MCLBYTES;
1285 else if (adapter->max_frame_size <= 4096)
1286 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1287 else if (adapter->max_frame_size <= 9216)
1288 adapter->rx_mbuf_sz = MJUM9BYTES;
1289 else
1290 adapter->rx_mbuf_sz = MJUM16BYTES;
1291
1292 /* Prepare receive descriptors and buffers */
1293 if (ixgbe_setup_receive_structures(adapter)) {
1294 device_printf(dev,"Could not setup receive structures\n");
1295 ixgbe_stop(adapter);
1296 return;
1297 }
1298
1299 /* Configure RX settings */
1300 ixgbe_initialize_receive_units(adapter);
1301
1302 gpie = IXGBE_READ_REG(&adapter->hw, IXGBE_GPIE);
1303
1304 /* Enable Fan Failure Interrupt */
1305 gpie |= IXGBE_SDP1_GPIEN;
1306
1307 /* Add for Thermal detection */
1308 if (hw->mac.type == ixgbe_mac_82599EB)
1309 gpie |= IXGBE_SDP2_GPIEN;
1310
1311 /* Thermal Failure Detection */
1312 if (hw->mac.type == ixgbe_mac_X540)
1313 gpie |= IXGBE_SDP0_GPIEN;
1314
1315 if (adapter->msix > 1) {
1316 /* Enable Enhanced MSIX mode */
1317 gpie |= IXGBE_GPIE_MSIX_MODE;
1318 gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
1319 IXGBE_GPIE_OCD;
1320 }
1321 IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
1322
1323 /* Set MTU size */
1324 if (ifp->if_mtu > ETHERMTU) {
1325 mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
1326 mhadd &= ~IXGBE_MHADD_MFS_MASK;
1327 mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
1328 IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
1329 }
1330
1331 /* Now enable all the queues */
1332
1333 for (int i = 0; i < adapter->num_queues; i++) {
1334 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
1335 txdctl |= IXGBE_TXDCTL_ENABLE;
1336 /* Set WTHRESH to 8, burst writeback */
1337 txdctl |= (8 << 16);
1338 /*
1339 * When the internal queue falls below PTHRESH (32),
1340 * start prefetching as long as there are at least
1341 * HTHRESH (1) buffers ready. The values are taken
1342 * from the Intel linux driver 3.8.21.
1343 * Prefetching enables tx line rate even with 1 queue.
1344 */
1345 txdctl |= (32 << 0) | (1 << 8);
1346 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
1347 }
1348
1349 for (int i = 0; i < adapter->num_queues; i++) {
1350 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
1351 if (hw->mac.type == ixgbe_mac_82598EB) {
1352 /*
1353 ** PTHRESH = 21
1354 ** HTHRESH = 4
1355 ** WTHRESH = 8
1356 */
1357 rxdctl &= ~0x3FFFFF;
1358 rxdctl |= 0x080420;
1359 }
1360 rxdctl |= IXGBE_RXDCTL_ENABLE;
1361 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
1362 /* XXX I don't trust this loop, and I don't trust the
1363 * XXX memory barrier. What is this meant to do? --dyoung
1364 */
1365 for (k = 0; k < 10; k++) {
1366 if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
1367 IXGBE_RXDCTL_ENABLE)
1368 break;
1369 else
1370 msec_delay(1);
1371 }
1372 wmb();
1373 #ifdef DEV_NETMAP
1374 /*
1375 * In netmap mode, we must preserve the buffers made
1376 * available to userspace before the if_init()
1377 * (this is true by default on the TX side, because
1378 * init makes all buffers available to userspace).
1379 *
1380 * netmap_reset() and the device specific routines
1381 * (e.g. ixgbe_setup_receive_rings()) map these
1382 * buffers at the end of the NIC ring, so here we
1383 * must set the RDT (tail) register to make sure
1384 * they are not overwritten.
1385 *
1386 * In this driver the NIC ring starts at RDH = 0,
1387 * RDT points to the last slot available for reception (?),
1388 * so RDT = num_rx_desc - 1 means the whole ring is available.
1389 */
1390 if (ifp->if_capenable & IFCAP_NETMAP) {
1391 struct netmap_adapter *na = NA(adapter->ifp);
1392 struct netmap_kring *kring = &na->rx_rings[i];
1393 int t = na->num_rx_desc - 1 - kring->nr_hwavail;
1394
1395 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), t);
1396 } else
1397 #endif /* DEV_NETMAP */
1398 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), adapter->num_rx_desc - 1);
1399 }
1400
1401 /* Set up VLAN support and filter */
1402 ixgbe_setup_vlan_hw_support(adapter);
1403
1404 /* Enable Receive engine */
1405 rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
1406 if (hw->mac.type == ixgbe_mac_82598EB)
1407 rxctrl |= IXGBE_RXCTRL_DMBYPS;
1408 rxctrl |= IXGBE_RXCTRL_RXEN;
1409 ixgbe_enable_rx_dma(hw, rxctrl);
1410
1411 callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
1412
1413 /* Set up MSI/X routing */
1414 if (ixgbe_enable_msix) {
1415 ixgbe_configure_ivars(adapter);
1416 /* Set up auto-mask */
1417 if (hw->mac.type == ixgbe_mac_82598EB)
1418 IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
1419 else {
1420 IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
1421 IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
1422 }
1423 } else { /* Simple settings for Legacy/MSI */
1424 ixgbe_set_ivar(adapter, 0, 0, 0);
1425 ixgbe_set_ivar(adapter, 0, 0, 1);
1426 IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
1427 }
1428
1429 #ifdef IXGBE_FDIR
1430 /* Init Flow director */
1431 if (hw->mac.type != ixgbe_mac_82598EB) {
1432 u32 hdrm = 32 << fdir_pballoc;
1433
1434 hw->mac.ops.setup_rxpba(hw, 0, hdrm, PBA_STRATEGY_EQUAL);
1435 ixgbe_init_fdir_signature_82599(&adapter->hw, fdir_pballoc);
1436 }
1437 #endif
1438
1439 /*
1440 ** Check on any SFP devices that
1441 ** need to be kick-started
1442 */
1443 if (hw->phy.type == ixgbe_phy_none) {
1444 int err = hw->phy.ops.identify(hw);
1445 if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
1446 device_printf(dev,
1447 "Unsupported SFP+ module type was detected.\n");
1448 return;
1449 }
1450 }
1451
1452 /* Set moderation on the Link interrupt */
1453 IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->linkvec), IXGBE_LINK_ITR);
1454
1455 /* Config/Enable Link */
1456 ixgbe_config_link(adapter);
1457
1458 /* Hardware Packet Buffer & Flow Control setup */
1459 {
1460 u32 rxpb, frame, size, tmp;
1461
1462 frame = adapter->max_frame_size;
1463
1464 /* Calculate High Water */
1465 if (hw->mac.type == ixgbe_mac_X540)
1466 tmp = IXGBE_DV_X540(frame, frame);
1467 else
1468 tmp = IXGBE_DV(frame, frame);
1469 size = IXGBE_BT2KB(tmp);
1470 rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
1471 hw->fc.high_water[0] = rxpb - size;
1472
1473 /* Now calculate Low Water */
1474 if (hw->mac.type == ixgbe_mac_X540)
1475 tmp = IXGBE_LOW_DV_X540(frame);
1476 else
1477 tmp = IXGBE_LOW_DV(frame);
1478 hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
1479
1480 hw->fc.requested_mode = adapter->fc;
1481 hw->fc.pause_time = IXGBE_FC_PAUSE;
1482 hw->fc.send_xon = TRUE;
1483 }
1484 /* Initialize the FC settings */
1485 ixgbe_start_hw(hw);
1486
1487 /* And now turn on interrupts */
1488 ixgbe_enable_intr(adapter);
1489
1490 /* Now inform the stack we're ready */
1491 ifp->if_flags |= IFF_RUNNING;
1492
1493 return;
1494 }
1495
1496 static int
1497 ixgbe_init(struct ifnet *ifp)
1498 {
1499 struct adapter *adapter = ifp->if_softc;
1500
1501 IXGBE_CORE_LOCK(adapter);
1502 ixgbe_init_locked(adapter);
1503 IXGBE_CORE_UNLOCK(adapter);
1504 return 0; /* XXX ixgbe_init_locked cannot fail? really? */
1505 }
1506
1507
1508 /*
1509 **
1510 ** MSIX Interrupt Handlers and Tasklets
1511 **
1512 */
1513
1514 static inline void
1515 ixgbe_enable_queue(struct adapter *adapter, u32 vector)
1516 {
1517 struct ixgbe_hw *hw = &adapter->hw;
1518 u64 queue = (u64)(1ULL << vector);
1519 u32 mask;
1520
1521 if (hw->mac.type == ixgbe_mac_82598EB) {
1522 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1523 IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
1524 } else {
1525 mask = (queue & 0xFFFFFFFF);
1526 if (mask)
1527 IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask);
1528 mask = (queue >> 32);
1529 if (mask)
1530 IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask);
1531 }
1532 }
1533
1534 __unused static inline void
1535 ixgbe_disable_queue(struct adapter *adapter, u32 vector)
1536 {
1537 struct ixgbe_hw *hw = &adapter->hw;
1538 u64 queue = (u64)(1ULL << vector);
1539 u32 mask;
1540
1541 if (hw->mac.type == ixgbe_mac_82598EB) {
1542 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1543 IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask);
1544 } else {
1545 mask = (queue & 0xFFFFFFFF);
1546 if (mask)
1547 IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask);
1548 mask = (queue >> 32);
1549 if (mask)
1550 IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask);
1551 }
1552 }
1553
1554 static inline void
1555 ixgbe_rearm_queues(struct adapter *adapter, u64 queues)
1556 {
1557 u32 mask;
1558
1559 if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
1560 mask = (IXGBE_EIMS_RTX_QUEUE & queues);
1561 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, mask);
1562 } else {
1563 mask = (queues & 0xFFFFFFFF);
1564 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(0), mask);
1565 mask = (queues >> 32);
1566 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(1), mask);
1567 }
1568 }
1569
1570
1571 static void
1572 ixgbe_handle_que(void *context)
1573 {
1574 struct ix_queue *que = context;
1575 struct adapter *adapter = que->adapter;
1576 struct tx_ring *txr = que->txr;
1577 struct ifnet *ifp = adapter->ifp;
1578 bool more;
1579
1580 adapter->handleq.ev_count++;
1581
1582 if (ifp->if_flags & IFF_RUNNING) {
1583 more = ixgbe_rxeof(que);
1584 IXGBE_TX_LOCK(txr);
1585 ixgbe_txeof(txr);
1586 #ifndef IXGBE_LEGACY_TX
1587 if (!drbr_empty(ifp, txr->br))
1588 ixgbe_mq_start_locked(ifp, txr, NULL);
1589 #else
1590 if (!IFQ_IS_EMPTY(&ifp->if_snd))
1591 ixgbe_start_locked(txr, ifp);
1592 #endif
1593 IXGBE_TX_UNLOCK(txr);
1594 if (more) {
1595 adapter->req.ev_count++;
1596 softint_schedule(que->que_si);
1597 return;
1598 }
1599 }
1600
1601 /* Reenable this interrupt */
1602 ixgbe_enable_queue(adapter, que->msix);
1603 return;
1604 }
1605
1606
1607 /*********************************************************************
1608 *
1609 * Legacy Interrupt Service routine
1610 *
1611 **********************************************************************/
1612
1613 static int
1614 ixgbe_legacy_irq(void *arg)
1615 {
1616 struct ix_queue *que = arg;
1617 struct adapter *adapter = que->adapter;
1618 struct ifnet *ifp = adapter->ifp;
1619 struct ixgbe_hw *hw = &adapter->hw;
1620 struct tx_ring *txr = adapter->tx_rings;
1621 bool more_tx = false, more_rx = false;
1622 u32 reg_eicr, loop = MAX_LOOP;
1623
1624 reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
1625
1626 adapter->stats.legint.ev_count++;
1627 ++que->irqs;
1628 if (reg_eicr == 0) {
1629 adapter->stats.intzero.ev_count++;
1630 if ((ifp->if_flags & IFF_UP) != 0)
1631 ixgbe_enable_intr(adapter);
1632 return 0;
1633 }
1634
1635 if ((ifp->if_flags & IFF_RUNNING) != 0) {
1636 more_rx = ixgbe_rxeof(que);
1637
1638 IXGBE_TX_LOCK(txr);
1639 do {
1640 adapter->txloops.ev_count++;
1641 more_tx = ixgbe_txeof(txr);
1642 } while (loop-- && more_tx);
1643 IXGBE_TX_UNLOCK(txr);
1644 }
1645
1646 if (more_rx || more_tx) {
1647 if (more_rx)
1648 adapter->morerx.ev_count++;
1649 if (more_tx)
1650 adapter->moretx.ev_count++;
1651 softint_schedule(que->que_si);
1652 }
1653
1654 /* Check for fan failure */
1655 if ((hw->phy.media_type == ixgbe_media_type_copper) &&
1656 (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1657 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
1658 "REPLACE IMMEDIATELY!!\n");
1659 IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1);
1660 }
1661
1662 /* Link status change */
1663 if (reg_eicr & IXGBE_EICR_LSC)
1664 softint_schedule(adapter->link_si);
1665
1666 ixgbe_enable_intr(adapter);
1667 return 1;
1668 }
1669
1670
1671 #if defined(NETBSD_MSI_OR_MSIX)
1672 /*********************************************************************
1673 *
1674 * MSIX Queue Interrupt Service routine
1675 *
1676 **********************************************************************/
1677 void
1678 ixgbe_msix_que(void *arg)
1679 {
1680 struct ix_queue *que = arg;
1681 struct adapter *adapter = que->adapter;
1682 struct tx_ring *txr = que->txr;
1683 struct rx_ring *rxr = que->rxr;
1684 bool more_tx, more_rx;
1685 u32 newitr = 0;
1686
1687 ixgbe_disable_queue(adapter, que->msix);
1688 ++que->irqs;
1689
1690 more_rx = ixgbe_rxeof(que);
1691
1692 IXGBE_TX_LOCK(txr);
1693 more_tx = ixgbe_txeof(txr);
1694 /*
1695 ** Make certain that if the stack
1696 ** has anything queued the task gets
1697 ** scheduled to handle it.
1698 */
1699 #ifdef IXGBE_LEGACY_TX
1700 if (!IFQ_IS_EMPTY(&adapter->ifp->if_snd))
1701 #else
1702 if (!drbr_empty(adapter->ifp, txr->br))
1703 #endif
1704 more_tx = 1;
1705 IXGBE_TX_UNLOCK(txr);
1706
1707 /* Do AIM now? */
1708
1709 if (ixgbe_enable_aim == FALSE)
1710 goto no_calc;
1711 /*
1712 ** Do Adaptive Interrupt Moderation:
1713 ** - Write out last calculated setting
1714 ** - Calculate based on average size over
1715 ** the last interval.
1716 */
1717 if (que->eitr_setting)
1718 IXGBE_WRITE_REG(&adapter->hw,
1719 IXGBE_EITR(que->msix), que->eitr_setting);
1720
1721 que->eitr_setting = 0;
1722
1723 /* Idle, do nothing */
1724 if ((txr->bytes == 0) && (rxr->bytes == 0))
1725 goto no_calc;
1726
1727 if ((txr->bytes) && (txr->packets))
1728 newitr = txr->bytes/txr->packets;
1729 if ((rxr->bytes) && (rxr->packets))
1730 newitr = max(newitr,
1731 (rxr->bytes / rxr->packets));
1732 newitr += 24; /* account for hardware frame, crc */
1733
1734 /* set an upper boundary */
1735 newitr = min(newitr, 3000);
1736
1737 /* Be nice to the mid range */
1738 if ((newitr > 300) && (newitr < 1200))
1739 newitr = (newitr / 3);
1740 else
1741 newitr = (newitr / 2);
1742
1743 if (adapter->hw.mac.type == ixgbe_mac_82598EB)
1744 newitr |= newitr << 16;
1745 else
1746 newitr |= IXGBE_EITR_CNT_WDIS;
1747
1748 /* save for next interrupt */
1749 que->eitr_setting = newitr;
1750
1751 /* Reset state */
1752 txr->bytes = 0;
1753 txr->packets = 0;
1754 rxr->bytes = 0;
1755 rxr->packets = 0;
1756
1757 no_calc:
1758 if (more_tx || more_rx)
1759 softint_schedule(que->que_si);
1760 else /* Reenable this interrupt */
1761 ixgbe_enable_queue(adapter, que->msix);
1762 return;
1763 }
1764
1765
1766 static void
1767 ixgbe_msix_link(void *arg)
1768 {
1769 struct adapter *adapter = arg;
1770 struct ixgbe_hw *hw = &adapter->hw;
1771 u32 reg_eicr;
1772
1773 ++adapter->link_irq.ev_count;
1774
1775 /* First get the cause */
1776 reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICS);
1777 /* Clear interrupt with write */
1778 IXGBE_WRITE_REG(hw, IXGBE_EICR, reg_eicr);
1779
1780 /* Link status change */
1781 if (reg_eicr & IXGBE_EICR_LSC)
1782 softint_schedule(adapter->link_si);
1783
1784 if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
1785 #ifdef IXGBE_FDIR
1786 if (reg_eicr & IXGBE_EICR_FLOW_DIR) {
1787 /* This is probably overkill :) */
1788 if (!atomic_cmpset_int(&adapter->fdir_reinit, 0, 1))
1789 return;
1790 /* Disable the interrupt */
1791 IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FLOW_DIR);
1792 softint_schedule(adapter->fdir_si);
1793 } else
1794 #endif
1795 if (reg_eicr & IXGBE_EICR_ECC) {
1796 device_printf(adapter->dev, "\nCRITICAL: ECC ERROR!! "
1797 "Please Reboot!!\n");
1798 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
1799 } else
1800
1801 if (reg_eicr & IXGBE_EICR_GPI_SDP1) {
1802 /* Clear the interrupt */
1803 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1804 softint_schedule(adapter->msf_si);
1805 } else if (reg_eicr & IXGBE_EICR_GPI_SDP2) {
1806 /* Clear the interrupt */
1807 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP2);
1808 softint_schedule(adapter->mod_si);
1809 }
1810 }
1811
1812 /* Check for fan failure */
1813 if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
1814 (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1815 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
1816 "REPLACE IMMEDIATELY!!\n");
1817 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1818 }
1819
1820 /* Check for over temp condition */
1821 if ((hw->mac.type == ixgbe_mac_X540) &&
1822 (reg_eicr & IXGBE_EICR_TS)) {
1823 device_printf(adapter->dev, "\nCRITICAL: OVER TEMP!! "
1824 "PHY IS SHUT DOWN!!\n");
1825 device_printf(adapter->dev, "System shutdown required\n");
1826 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_TS);
1827 }
1828
1829 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_OTHER);
1830 return;
1831 }
1832 #endif
1833
1834 /*********************************************************************
1835 *
1836 * Media Ioctl callback
1837 *
1838 * This routine is called whenever the user queries the status of
1839 * the interface using ifconfig.
1840 *
1841 **********************************************************************/
1842 static void
1843 ixgbe_media_status(struct ifnet * ifp, struct ifmediareq * ifmr)
1844 {
1845 struct adapter *adapter = ifp->if_softc;
1846
1847 INIT_DEBUGOUT("ixgbe_media_status: begin");
1848 IXGBE_CORE_LOCK(adapter);
1849 ixgbe_update_link_status(adapter);
1850
1851 ifmr->ifm_status = IFM_AVALID;
1852 ifmr->ifm_active = IFM_ETHER;
1853
1854 if (!adapter->link_active) {
1855 IXGBE_CORE_UNLOCK(adapter);
1856 return;
1857 }
1858
1859 ifmr->ifm_status |= IFM_ACTIVE;
1860
1861 switch (adapter->link_speed) {
1862 case IXGBE_LINK_SPEED_100_FULL:
1863 ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
1864 break;
1865 case IXGBE_LINK_SPEED_1GB_FULL:
1866 ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1867 break;
1868 case IXGBE_LINK_SPEED_10GB_FULL:
1869 ifmr->ifm_active |= adapter->optics | IFM_FDX;
1870 break;
1871 }
1872
1873 IXGBE_CORE_UNLOCK(adapter);
1874
1875 return;
1876 }
1877
1878 /*********************************************************************
1879 *
1880 * Media Ioctl callback
1881 *
1882 * This routine is called when the user changes speed/duplex using
1883 * media/mediopt option with ifconfig.
1884 *
1885 **********************************************************************/
1886 static int
1887 ixgbe_media_change(struct ifnet * ifp)
1888 {
1889 struct adapter *adapter = ifp->if_softc;
1890 struct ifmedia *ifm = &adapter->media;
1891
1892 INIT_DEBUGOUT("ixgbe_media_change: begin");
1893
1894 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1895 return (EINVAL);
1896
1897 switch (IFM_SUBTYPE(ifm->ifm_media)) {
1898 case IFM_AUTO:
1899 adapter->hw.phy.autoneg_advertised =
1900 IXGBE_LINK_SPEED_100_FULL |
1901 IXGBE_LINK_SPEED_1GB_FULL |
1902 IXGBE_LINK_SPEED_10GB_FULL;
1903 break;
1904 default:
1905 device_printf(adapter->dev, "Only auto media type\n");
1906 return (EINVAL);
1907 }
1908
1909 return (0);
1910 }
1911
1912 /*********************************************************************
1913 *
1914 * This routine maps the mbufs to tx descriptors, allowing the
1915 * TX engine to transmit the packets.
1916 * - return 0 on success, positive on failure
1917 *
1918 **********************************************************************/
1919
1920 static int
1921 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
1922 {
1923 struct m_tag *mtag;
1924 struct adapter *adapter = txr->adapter;
1925 struct ethercom *ec = &adapter->osdep.ec;
1926 u32 olinfo_status = 0, cmd_type_len;
1927 int i, j, error;
1928 int first;
1929 bus_dmamap_t map;
1930 struct ixgbe_tx_buf *txbuf;
1931 union ixgbe_adv_tx_desc *txd = NULL;
1932
1933 /* Basic descriptor defines */
1934 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
1935 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
1936
1937 if ((mtag = VLAN_OUTPUT_TAG(ec, m_head)) != NULL)
1938 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
1939
1940 /*
1941 * Important to capture the first descriptor
1942 * used because it will contain the index of
1943 * the one we tell the hardware to report back
1944 */
1945 first = txr->next_avail_desc;
1946 txbuf = &txr->tx_buffers[first];
1947 map = txbuf->map;
1948
1949 /*
1950 * Map the packet for DMA.
1951 */
1952 error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map,
1953 m_head, BUS_DMA_NOWAIT);
1954
1955 if (__predict_false(error)) {
1956
1957 switch (error) {
1958 case EAGAIN:
1959 adapter->eagain_tx_dma_setup.ev_count++;
1960 return EAGAIN;
1961 case ENOMEM:
1962 adapter->enomem_tx_dma_setup.ev_count++;
1963 return EAGAIN;
1964 case EFBIG:
1965 /*
1966 * XXX Try it again?
1967 * do m_defrag() and retry bus_dmamap_load_mbuf().
1968 */
1969 adapter->efbig_tx_dma_setup.ev_count++;
1970 return error;
1971 case EINVAL:
1972 adapter->einval_tx_dma_setup.ev_count++;
1973 return error;
1974 default:
1975 adapter->other_tx_dma_setup.ev_count++;
1976 return error;
1977 case 0:
1978 break;
1979 }
1980 }
1981
1982 /* Make certain there are enough descriptors */
1983 if (map->dm_nsegs > txr->tx_avail - 2) {
1984 txr->no_desc_avail.ev_count++;
1985 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
1986 return EAGAIN;
1987 }
1988
1989 /*
1990 ** Set up the appropriate offload context
1991 ** this will consume the first descriptor
1992 */
1993 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
1994 if (__predict_false(error)) {
1995 return (error);
1996 }
1997
1998 #ifdef IXGBE_FDIR
1999 /* Do the flow director magic */
2000 if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
2001 ++txr->atr_count;
2002 if (txr->atr_count >= atr_sample_rate) {
2003 ixgbe_atr(txr, m_head);
2004 txr->atr_count = 0;
2005 }
2006 }
2007 #endif
2008
2009 i = txr->next_avail_desc;
2010 for (j = 0; j < map->dm_nsegs; j++) {
2011 bus_size_t seglen;
2012 bus_addr_t segaddr;
2013
2014 txbuf = &txr->tx_buffers[i];
2015 txd = &txr->tx_base[i];
2016 seglen = map->dm_segs[j].ds_len;
2017 segaddr = htole64(map->dm_segs[j].ds_addr);
2018
2019 txd->read.buffer_addr = segaddr;
2020 txd->read.cmd_type_len = htole32(txr->txd_cmd |
2021 cmd_type_len |seglen);
2022 txd->read.olinfo_status = htole32(olinfo_status);
2023
2024 if (++i == txr->num_desc)
2025 i = 0;
2026 }
2027
2028 txd->read.cmd_type_len |=
2029 htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
2030 txr->tx_avail -= map->dm_nsegs;
2031 txr->next_avail_desc = i;
2032
2033 txbuf->m_head = m_head;
2034 /*
2035 ** Here we swap the map so the last descriptor,
2036 ** which gets the completion interrupt has the
2037 ** real map, and the first descriptor gets the
2038 ** unused map from this descriptor.
2039 */
2040 txr->tx_buffers[first].map = txbuf->map;
2041 txbuf->map = map;
2042 bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
2043 BUS_DMASYNC_PREWRITE);
2044
2045 /* Set the EOP descriptor that will be marked done */
2046 txbuf = &txr->tx_buffers[first];
2047 txbuf->eop = txd;
2048
2049 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2050 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2051 /*
2052 * Advance the Transmit Descriptor Tail (Tdt), this tells the
2053 * hardware that this frame is available to transmit.
2054 */
2055 ++txr->total_packets.ev_count;
2056 IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), i);
2057
2058 return 0;
2059 }
2060
2061 static void
2062 ixgbe_set_promisc(struct adapter *adapter)
2063 {
2064 struct ether_multi *enm;
2065 struct ether_multistep step;
2066 u_int32_t reg_rctl;
2067 struct ethercom *ec = &adapter->osdep.ec;
2068 struct ifnet *ifp = adapter->ifp;
2069 int mcnt = 0;
2070
2071 reg_rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
2072 reg_rctl &= (~IXGBE_FCTRL_UPE);
2073 if (ifp->if_flags & IFF_ALLMULTI)
2074 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2075 else {
2076 ETHER_FIRST_MULTI(step, ec, enm);
2077 while (enm != NULL) {
2078 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2079 break;
2080 mcnt++;
2081 ETHER_NEXT_MULTI(step, enm);
2082 }
2083 }
2084 if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2085 reg_rctl &= (~IXGBE_FCTRL_MPE);
2086 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
2087
2088 if (ifp->if_flags & IFF_PROMISC) {
2089 reg_rctl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
2090 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
2091 } else if (ifp->if_flags & IFF_ALLMULTI) {
2092 reg_rctl |= IXGBE_FCTRL_MPE;
2093 reg_rctl &= ~IXGBE_FCTRL_UPE;
2094 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
2095 }
2096 return;
2097 }
2098
2099
2100 /*********************************************************************
2101 * Multicast Update
2102 *
2103 * This routine is called whenever multicast address list is updated.
2104 *
2105 **********************************************************************/
2106 #define IXGBE_RAR_ENTRIES 16
2107
2108 static void
2109 ixgbe_set_multi(struct adapter *adapter)
2110 {
2111 struct ether_multi *enm;
2112 struct ether_multistep step;
2113 u32 fctrl;
2114 u8 *mta;
2115 u8 *update_ptr;
2116 int mcnt = 0;
2117 struct ethercom *ec = &adapter->osdep.ec;
2118 struct ifnet *ifp = adapter->ifp;
2119
2120 IOCTL_DEBUGOUT("ixgbe_set_multi: begin");
2121
2122 mta = adapter->mta;
2123 bzero(mta, sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
2124 MAX_NUM_MULTICAST_ADDRESSES);
2125
2126 ifp->if_flags &= ~IFF_ALLMULTI;
2127 ETHER_FIRST_MULTI(step, ec, enm);
2128 while (enm != NULL) {
2129 if ((mcnt == MAX_NUM_MULTICAST_ADDRESSES) ||
2130 (memcmp(enm->enm_addrlo, enm->enm_addrhi,
2131 ETHER_ADDR_LEN) != 0)) {
2132 ifp->if_flags |= IFF_ALLMULTI;
2133 break;
2134 }
2135 bcopy(enm->enm_addrlo,
2136 &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
2137 IXGBE_ETH_LENGTH_OF_ADDRESS);
2138 mcnt++;
2139 ETHER_NEXT_MULTI(step, enm);
2140 }
2141
2142 fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
2143 fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
2144 if (ifp->if_flags & IFF_PROMISC)
2145 fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
2146 else if (ifp->if_flags & IFF_ALLMULTI) {
2147 fctrl |= IXGBE_FCTRL_MPE;
2148 }
2149
2150 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl);
2151
2152 if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) {
2153 update_ptr = mta;
2154 ixgbe_update_mc_addr_list(&adapter->hw,
2155 update_ptr, mcnt, ixgbe_mc_array_itr, TRUE);
2156 }
2157
2158 return;
2159 }
2160
2161 /*
2162 * This is an iterator function now needed by the multicast
2163 * shared code. It simply feeds the shared code routine the
2164 * addresses in the array of ixgbe_set_multi() one by one.
2165 */
2166 static u8 *
2167 ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq)
2168 {
2169 u8 *addr = *update_ptr;
2170 u8 *newptr;
2171 *vmdq = 0;
2172
2173 newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
2174 *update_ptr = newptr;
2175 return addr;
2176 }
2177
2178
2179 /*********************************************************************
2180 * Timer routine
2181 *
2182 * This routine checks for link status,updates statistics,
2183 * and runs the watchdog check.
2184 *
2185 **********************************************************************/
2186
2187 static void
2188 ixgbe_local_timer1(void *arg)
2189 {
2190 struct adapter *adapter = arg;
2191 device_t dev = adapter->dev;
2192 struct ix_queue *que = adapter->queues;
2193 struct tx_ring *txr = adapter->tx_rings;
2194 int hung = 0, paused = 0;
2195
2196 KASSERT(mutex_owned(&adapter->core_mtx));
2197
2198 /* Check for pluggable optics */
2199 if (adapter->sfp_probe)
2200 if (!ixgbe_sfp_probe(adapter))
2201 goto out; /* Nothing to do */
2202
2203 ixgbe_update_link_status(adapter);
2204 ixgbe_update_stats_counters(adapter);
2205
2206 /*
2207 * If the interface has been paused
2208 * then don't do the watchdog check
2209 */
2210 if (IXGBE_READ_REG(&adapter->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF)
2211 paused = 1;
2212
2213 /*
2214 ** Check the TX queues status
2215 ** - watchdog only if all queues show hung
2216 */
2217 for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2218 if ((txr->queue_status == IXGBE_QUEUE_HUNG) &&
2219 (paused == 0))
2220 ++hung;
2221 else if (txr->queue_status == IXGBE_QUEUE_WORKING)
2222 softint_schedule(que->que_si);
2223 }
2224 /* Only truely watchdog if all queues show hung */
2225 if (hung == adapter->num_queues)
2226 goto watchdog;
2227
2228 out:
2229 ixgbe_rearm_queues(adapter, adapter->que_mask);
2230 callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
2231 return;
2232
2233 watchdog:
2234 device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2235 device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2236 IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(txr->me)),
2237 IXGBE_READ_REG(&adapter->hw, IXGBE_TDT(txr->me)));
2238 device_printf(dev,"TX(%d) desc avail = %d,"
2239 "Next TX to Clean = %d\n",
2240 txr->me, txr->tx_avail, txr->next_to_clean);
2241 adapter->ifp->if_flags &= ~IFF_RUNNING;
2242 adapter->watchdog_events.ev_count++;
2243 ixgbe_init_locked(adapter);
2244 }
2245
2246 static void
2247 ixgbe_local_timer(void *arg)
2248 {
2249 struct adapter *adapter = arg;
2250
2251 IXGBE_CORE_LOCK(adapter);
2252 ixgbe_local_timer1(adapter);
2253 IXGBE_CORE_UNLOCK(adapter);
2254 }
2255
2256 /*
2257 ** Note: this routine updates the OS on the link state
2258 ** the real check of the hardware only happens with
2259 ** a link interrupt.
2260 */
2261 static void
2262 ixgbe_update_link_status(struct adapter *adapter)
2263 {
2264 struct ifnet *ifp = adapter->ifp;
2265 device_t dev = adapter->dev;
2266
2267
2268 if (adapter->link_up){
2269 if (adapter->link_active == FALSE) {
2270 if (bootverbose)
2271 device_printf(dev,"Link is up %d Gbps %s \n",
2272 ((adapter->link_speed == 128)? 10:1),
2273 "Full Duplex");
2274 adapter->link_active = TRUE;
2275 /* Update any Flow Control changes */
2276 ixgbe_fc_enable(&adapter->hw);
2277 if_link_state_change(ifp, LINK_STATE_UP);
2278 }
2279 } else { /* Link down */
2280 if (adapter->link_active == TRUE) {
2281 if (bootverbose)
2282 device_printf(dev,"Link is Down\n");
2283 if_link_state_change(ifp, LINK_STATE_DOWN);
2284 adapter->link_active = FALSE;
2285 }
2286 }
2287
2288 return;
2289 }
2290
2291
2292 static void
2293 ixgbe_ifstop(struct ifnet *ifp, int disable)
2294 {
2295 struct adapter *adapter = ifp->if_softc;
2296
2297 IXGBE_CORE_LOCK(adapter);
2298 ixgbe_stop(adapter);
2299 IXGBE_CORE_UNLOCK(adapter);
2300 }
2301
2302 /*********************************************************************
2303 *
2304 * This routine disables all traffic on the adapter by issuing a
2305 * global reset on the MAC and deallocates TX/RX buffers.
2306 *
2307 **********************************************************************/
2308
2309 static void
2310 ixgbe_stop(void *arg)
2311 {
2312 struct ifnet *ifp;
2313 struct adapter *adapter = arg;
2314 struct ixgbe_hw *hw = &adapter->hw;
2315 ifp = adapter->ifp;
2316
2317 KASSERT(mutex_owned(&adapter->core_mtx));
2318
2319 INIT_DEBUGOUT("ixgbe_stop: begin\n");
2320 ixgbe_disable_intr(adapter);
2321 callout_stop(&adapter->timer);
2322
2323 /* Let the stack know...*/
2324 ifp->if_flags &= ~IFF_RUNNING;
2325
2326 ixgbe_reset_hw(hw);
2327 hw->adapter_stopped = FALSE;
2328 ixgbe_stop_adapter(hw);
2329 /* Turn off the laser */
2330 if (hw->phy.multispeed_fiber)
2331 ixgbe_disable_tx_laser(hw);
2332
2333 /* reprogram the RAR[0] in case user changed it. */
2334 ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
2335
2336 return;
2337 }
2338
2339
2340 /*********************************************************************
2341 *
2342 * Determine hardware revision.
2343 *
2344 **********************************************************************/
2345 static void
2346 ixgbe_identify_hardware(struct adapter *adapter)
2347 {
2348 pcitag_t tag;
2349 pci_chipset_tag_t pc;
2350 pcireg_t subid, id;
2351 struct ixgbe_hw *hw = &adapter->hw;
2352
2353 pc = adapter->osdep.pc;
2354 tag = adapter->osdep.tag;
2355
2356 id = pci_conf_read(pc, tag, PCI_ID_REG);
2357 subid = pci_conf_read(pc, tag, PCI_SUBSYS_ID_REG);
2358
2359 /* Save off the information about this board */
2360 hw->vendor_id = PCI_VENDOR(id);
2361 hw->device_id = PCI_PRODUCT(id);
2362 hw->revision_id =
2363 PCI_REVISION(pci_conf_read(pc, tag, PCI_CLASS_REG));
2364 hw->subsystem_vendor_id = PCI_SUBSYS_VENDOR(subid);
2365 hw->subsystem_device_id = PCI_SUBSYS_ID(subid);
2366
2367 /* We need this here to set the num_segs below */
2368 ixgbe_set_mac_type(hw);
2369
2370 /* Pick up the 82599 and VF settings */
2371 if (hw->mac.type != ixgbe_mac_82598EB) {
2372 hw->phy.smart_speed = ixgbe_smart_speed;
2373 adapter->num_segs = IXGBE_82599_SCATTER;
2374 } else
2375 adapter->num_segs = IXGBE_82598_SCATTER;
2376
2377 return;
2378 }
2379
2380 /*********************************************************************
2381 *
2382 * Determine optic type
2383 *
2384 **********************************************************************/
2385 static void
2386 ixgbe_setup_optics(struct adapter *adapter)
2387 {
2388 struct ixgbe_hw *hw = &adapter->hw;
2389 int layer;
2390
2391 layer = ixgbe_get_supported_physical_layer(hw);
2392
2393 if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
2394 adapter->optics = IFM_10G_T;
2395 return;
2396 }
2397
2398 if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
2399 adapter->optics = IFM_1000_T;
2400 return;
2401 }
2402
2403 if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) {
2404 adapter->optics = IFM_1000_SX;
2405 return;
2406 }
2407
2408 if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR |
2409 IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) {
2410 adapter->optics = IFM_10G_LR;
2411 return;
2412 }
2413
2414 if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
2415 adapter->optics = IFM_10G_SR;
2416 return;
2417 }
2418
2419 if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) {
2420 adapter->optics = IFM_10G_TWINAX;
2421 return;
2422 }
2423
2424 if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 |
2425 IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) {
2426 adapter->optics = IFM_10G_CX4;
2427 return;
2428 }
2429
2430 /* If we get here just set the default */
2431 adapter->optics = IFM_ETHER | IFM_AUTO;
2432 return;
2433 }
2434
2435 /*********************************************************************
2436 *
2437 * Setup the Legacy or MSI Interrupt handler
2438 *
2439 **********************************************************************/
2440 static int
2441 ixgbe_allocate_legacy(struct adapter *adapter, const struct pci_attach_args *pa)
2442 {
2443 device_t dev = adapter->dev;
2444 struct ix_queue *que = adapter->queues;
2445 #ifndef IXGBE_LEGACY_TX
2446 struct tx_ring *txr = adapter->tx_rings;
2447 #endif
2448 char intrbuf[PCI_INTRSTR_LEN];
2449 #if 0
2450 int rid = 0;
2451
2452 /* MSI RID at 1 */
2453 if (adapter->msix == 1)
2454 rid = 1;
2455 #endif
2456
2457 /* We allocate a single interrupt resource */
2458 if (pci_intr_map(pa, &adapter->osdep.ih) != 0) {
2459 aprint_error_dev(dev, "unable to map interrupt\n");
2460 return ENXIO;
2461 } else {
2462 aprint_normal_dev(dev, "interrupting at %s\n",
2463 pci_intr_string(adapter->osdep.pc, adapter->osdep.ih,
2464 intrbuf, sizeof(intrbuf)));
2465 }
2466
2467 /*
2468 * Try allocating a fast interrupt and the associated deferred
2469 * processing contexts.
2470 */
2471 #ifndef IXGBE_LEGACY_TX
2472 txr->txq_si = softint_establish(SOFTINT_NET, ixgbe_deferred_mq_start,
2473 txr);
2474 #endif
2475 que->que_si = softint_establish(SOFTINT_NET, ixgbe_handle_que, que);
2476
2477 /* Tasklets for Link, SFP and Multispeed Fiber */
2478 adapter->link_si =
2479 softint_establish(SOFTINT_NET, ixgbe_handle_link, adapter);
2480 adapter->mod_si =
2481 softint_establish(SOFTINT_NET, ixgbe_handle_mod, adapter);
2482 adapter->msf_si =
2483 softint_establish(SOFTINT_NET, ixgbe_handle_msf, adapter);
2484
2485 #ifdef IXGBE_FDIR
2486 adapter->fdir_si =
2487 softint_establish(SOFTINT_NET, ixgbe_reinit_fdir, adapter);
2488 #endif
2489 if (que->que_si == NULL ||
2490 adapter->link_si == NULL ||
2491 adapter->mod_si == NULL ||
2492 #ifdef IXGBE_FDIR
2493 adapter->fdir_si == NULL ||
2494 #endif
2495 adapter->msf_si == NULL) {
2496 aprint_error_dev(dev,
2497 "could not establish software interrupts\n");
2498 return ENXIO;
2499 }
2500
2501 adapter->osdep.intr = pci_intr_establish(adapter->osdep.pc,
2502 adapter->osdep.ih, IPL_NET, ixgbe_legacy_irq, que);
2503 if (adapter->osdep.intr == NULL) {
2504 aprint_error_dev(dev, "failed to register interrupt handler\n");
2505 softint_disestablish(que->que_si);
2506 softint_disestablish(adapter->link_si);
2507 softint_disestablish(adapter->mod_si);
2508 softint_disestablish(adapter->msf_si);
2509 #ifdef IXGBE_FDIR
2510 softint_disestablish(adapter->fdir_si);
2511 #endif
2512 return ENXIO;
2513 }
2514 /* For simplicity in the handlers */
2515 adapter->que_mask = IXGBE_EIMS_ENABLE_MASK;
2516
2517 return (0);
2518 }
2519
2520
2521 /*********************************************************************
2522 *
2523 * Setup MSIX Interrupt resources and handlers
2524 *
2525 **********************************************************************/
2526 static int
2527 ixgbe_allocate_msix(struct adapter *adapter, const struct pci_attach_args *pa)
2528 {
2529 #if !defined(NETBSD_MSI_OR_MSIX)
2530 return 0;
2531 #else
2532 device_t dev = adapter->dev;
2533 struct ix_queue *que = adapter->queues;
2534 struct tx_ring *txr = adapter->tx_rings;
2535 int error, rid, vector = 0;
2536
2537 for (int i = 0; i < adapter->num_queues; i++, vector++, que++, txr++) {
2538 rid = vector + 1;
2539 que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
2540 RF_SHAREABLE | RF_ACTIVE);
2541 if (que->res == NULL) {
2542 aprint_error_dev(dev,"Unable to allocate"
2543 " bus resource: que interrupt [%d]\n", vector);
2544 return (ENXIO);
2545 }
2546 /* Set the handler function */
2547 error = bus_setup_intr(dev, que->res,
2548 INTR_TYPE_NET | INTR_MPSAFE, NULL,
2549 ixgbe_msix_que, que, &que->tag);
2550 if (error) {
2551 que->res = NULL;
2552 aprint_error_dev(dev,
2553 "Failed to register QUE handler\n");
2554 return error;
2555 }
2556 #if __FreeBSD_version >= 800504
2557 bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2558 #endif
2559 que->msix = vector;
2560 adapter->que_mask |= (u64)(1 << que->msix);
2561 /*
2562 ** Bind the msix vector, and thus the
2563 ** ring to the corresponding cpu.
2564 */
2565 if (adapter->num_queues > 1)
2566 bus_bind_intr(dev, que->res, i);
2567
2568 #ifndef IXGBE_LEGACY_TX
2569 txr->txq_si = softint_establish(SOFTINT_NET,
2570 ixgbe_deferred_mq_start, txr);
2571 #endif
2572 que->que_si = softint_establish(SOFTINT_NET, ixgbe_handle_que,
2573 que);
2574 if (que->que_si == NULL) {
2575 aprint_error_dev(dev,
2576 "could not establish software interrupt\n");
2577 }
2578 }
2579
2580 /* and Link */
2581 rid = vector + 1;
2582 adapter->res = bus_alloc_resource_any(dev,
2583 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2584 if (!adapter->res) {
2585 aprint_error_dev(dev,"Unable to allocate bus resource: "
2586 "Link interrupt [%d]\n", rid);
2587 return (ENXIO);
2588 }
2589 /* Set the link handler function */
2590 error = bus_setup_intr(dev, adapter->res,
2591 INTR_TYPE_NET | INTR_MPSAFE, NULL,
2592 ixgbe_msix_link, adapter, &adapter->tag);
2593 if (error) {
2594 adapter->res = NULL;
2595 aprint_error_dev(dev, "Failed to register LINK handler\n");
2596 return (error);
2597 }
2598 #if __FreeBSD_version >= 800504
2599 bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2600 #endif
2601 adapter->linkvec = vector;
2602 /* Tasklets for Link, SFP and Multispeed Fiber */
2603 adapter->link_si =
2604 softint_establish(SOFTINT_NET, ixgbe_handle_link, adapter);
2605 adapter->mod_si =
2606 softint_establish(SOFTINT_NET, ixgbe_handle_mod, adapter);
2607 adapter->msf_si =
2608 softint_establish(SOFTINT_NET, ixgbe_handle_msf, adapter);
2609 #ifdef IXGBE_FDIR
2610 adapter->fdir_si =
2611 softint_establish(SOFTINT_NET, ixgbe_reinit_fdir, adapter);
2612 #endif
2613
2614 return (0);
2615 #endif
2616 }
2617
2618 /*
2619 * Setup Either MSI/X or MSI
2620 */
2621 static int
2622 ixgbe_setup_msix(struct adapter *adapter)
2623 {
2624 #if !defined(NETBSD_MSI_OR_MSIX)
2625 return 0;
2626 #else
2627 device_t dev = adapter->dev;
2628 int rid, want, queues, msgs;
2629
2630 /* Override by tuneable */
2631 if (ixgbe_enable_msix == 0)
2632 goto msi;
2633
2634 /* First try MSI/X */
2635 rid = PCI_BAR(MSIX_82598_BAR);
2636 adapter->msix_mem = bus_alloc_resource_any(dev,
2637 SYS_RES_MEMORY, &rid, RF_ACTIVE);
2638 if (!adapter->msix_mem) {
2639 rid += 4; /* 82599 maps in higher BAR */
2640 adapter->msix_mem = bus_alloc_resource_any(dev,
2641 SYS_RES_MEMORY, &rid, RF_ACTIVE);
2642 }
2643 if (!adapter->msix_mem) {
2644 /* May not be enabled */
2645 device_printf(adapter->dev,
2646 "Unable to map MSIX table \n");
2647 goto msi;
2648 }
2649
2650 msgs = pci_msix_count(dev);
2651 if (msgs == 0) { /* system has msix disabled */
2652 bus_release_resource(dev, SYS_RES_MEMORY,
2653 rid, adapter->msix_mem);
2654 adapter->msix_mem = NULL;
2655 goto msi;
2656 }
2657
2658 /* Figure out a reasonable auto config value */
2659 queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2660
2661 if (ixgbe_num_queues != 0)
2662 queues = ixgbe_num_queues;
2663 /* Set max queues to 8 when autoconfiguring */
2664 else if ((ixgbe_num_queues == 0) && (queues > 8))
2665 queues = 8;
2666
2667 /*
2668 ** Want one vector (RX/TX pair) per queue
2669 ** plus an additional for Link.
2670 */
2671 want = queues + 1;
2672 if (msgs >= want)
2673 msgs = want;
2674 else {
2675 device_printf(adapter->dev,
2676 "MSIX Configuration Problem, "
2677 "%d vectors but %d queues wanted!\n",
2678 msgs, want);
2679 return (0); /* Will go to Legacy setup */
2680 }
2681 if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2682 device_printf(adapter->dev,
2683 "Using MSIX interrupts with %d vectors\n", msgs);
2684 adapter->num_queues = queues;
2685 return (msgs);
2686 }
2687 msi:
2688 msgs = pci_msi_count(dev);
2689 if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2690 device_printf(adapter->dev,"Using an MSI interrupt\n");
2691 else
2692 device_printf(adapter->dev,"Using a Legacy interrupt\n");
2693 return (msgs);
2694 #endif
2695 }
2696
2697
2698 static int
2699 ixgbe_allocate_pci_resources(struct adapter *adapter, const struct pci_attach_args *pa)
2700 {
2701 pcireg_t memtype;
2702 device_t dev = adapter->dev;
2703 bus_addr_t addr;
2704 int flags;
2705
2706 memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, PCI_BAR(0));
2707 switch (memtype) {
2708 case PCI_MAPREG_TYPE_MEM | PCI_MAPREG_MEM_TYPE_32BIT:
2709 case PCI_MAPREG_TYPE_MEM | PCI_MAPREG_MEM_TYPE_64BIT:
2710 adapter->osdep.mem_bus_space_tag = pa->pa_memt;
2711 if (pci_mapreg_info(pa->pa_pc, pa->pa_tag, PCI_BAR(0),
2712 memtype, &addr, &adapter->osdep.mem_size, &flags) != 0)
2713 goto map_err;
2714 if ((flags & BUS_SPACE_MAP_PREFETCHABLE) != 0) {
2715 aprint_normal_dev(dev, "clearing prefetchable bit\n");
2716 flags &= ~BUS_SPACE_MAP_PREFETCHABLE;
2717 }
2718 if (bus_space_map(adapter->osdep.mem_bus_space_tag, addr,
2719 adapter->osdep.mem_size, flags,
2720 &adapter->osdep.mem_bus_space_handle) != 0) {
2721 map_err:
2722 adapter->osdep.mem_size = 0;
2723 aprint_error_dev(dev, "unable to map BAR0\n");
2724 return ENXIO;
2725 }
2726 break;
2727 default:
2728 aprint_error_dev(dev, "unexpected type on BAR0\n");
2729 return ENXIO;
2730 }
2731
2732 /* Legacy defaults */
2733 adapter->num_queues = 1;
2734 adapter->hw.back = &adapter->osdep;
2735
2736 /*
2737 ** Now setup MSI or MSI/X, should
2738 ** return us the number of supported
2739 ** vectors. (Will be 1 for MSI)
2740 */
2741 adapter->msix = ixgbe_setup_msix(adapter);
2742 return (0);
2743 }
2744
2745 static void
2746 ixgbe_free_pci_resources(struct adapter * adapter)
2747 {
2748 #if defined(NETBSD_MSI_OR_MSIX)
2749 struct ix_queue *que = adapter->queues;
2750 device_t dev = adapter->dev;
2751 #endif
2752 int rid;
2753
2754 #if defined(NETBSD_MSI_OR_MSIX)
2755 int memrid;
2756 if (adapter->hw.mac.type == ixgbe_mac_82598EB)
2757 memrid = PCI_BAR(MSIX_82598_BAR);
2758 else
2759 memrid = PCI_BAR(MSIX_82599_BAR);
2760
2761 /*
2762 ** There is a slight possibility of a failure mode
2763 ** in attach that will result in entering this function
2764 ** before interrupt resources have been initialized, and
2765 ** in that case we do not want to execute the loops below
2766 ** We can detect this reliably by the state of the adapter
2767 ** res pointer.
2768 */
2769 if (adapter->res == NULL)
2770 goto mem;
2771
2772 /*
2773 ** Release all msix queue resources:
2774 */
2775 for (int i = 0; i < adapter->num_queues; i++, que++) {
2776 rid = que->msix + 1;
2777 if (que->tag != NULL) {
2778 bus_teardown_intr(dev, que->res, que->tag);
2779 que->tag = NULL;
2780 }
2781 if (que->res != NULL)
2782 bus_release_resource(dev, SYS_RES_IRQ, rid, que->res);
2783 }
2784 #endif
2785
2786 /* Clean the Legacy or Link interrupt last */
2787 if (adapter->linkvec) /* we are doing MSIX */
2788 rid = adapter->linkvec + 1;
2789 else
2790 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2791
2792 pci_intr_disestablish(adapter->osdep.pc, adapter->osdep.intr);
2793 adapter->osdep.intr = NULL;
2794
2795 #if defined(NETBSD_MSI_OR_MSIX)
2796 mem:
2797 if (adapter->msix)
2798 pci_release_msi(dev);
2799
2800 if (adapter->msix_mem != NULL)
2801 bus_release_resource(dev, SYS_RES_MEMORY,
2802 memrid, adapter->msix_mem);
2803 #endif
2804
2805 if (adapter->osdep.mem_size != 0) {
2806 bus_space_unmap(adapter->osdep.mem_bus_space_tag,
2807 adapter->osdep.mem_bus_space_handle,
2808 adapter->osdep.mem_size);
2809 }
2810
2811 return;
2812 }
2813
2814 /*********************************************************************
2815 *
2816 * Setup networking device structure and register an interface.
2817 *
2818 **********************************************************************/
2819 static int
2820 ixgbe_setup_interface(device_t dev, struct adapter *adapter)
2821 {
2822 struct ethercom *ec = &adapter->osdep.ec;
2823 struct ixgbe_hw *hw = &adapter->hw;
2824 struct ifnet *ifp;
2825
2826 INIT_DEBUGOUT("ixgbe_setup_interface: begin");
2827
2828 ifp = adapter->ifp = &ec->ec_if;
2829 strlcpy(ifp->if_xname, device_xname(dev), IFNAMSIZ);
2830 ifp->if_baudrate = IF_Gbps(10);
2831 ifp->if_init = ixgbe_init;
2832 ifp->if_stop = ixgbe_ifstop;
2833 ifp->if_softc = adapter;
2834 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2835 ifp->if_ioctl = ixgbe_ioctl;
2836 #ifndef IXGBE_LEGACY_TX
2837 ifp->if_transmit = ixgbe_mq_start;
2838 ifp->if_qflush = ixgbe_qflush;
2839 #else
2840 ifp->if_start = ixgbe_start;
2841 IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 2);
2842 #endif
2843
2844 if_attach(ifp);
2845 ether_ifattach(ifp, adapter->hw.mac.addr);
2846 ether_set_ifflags_cb(ec, ixgbe_ifflags_cb);
2847
2848 adapter->max_frame_size =
2849 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
2850
2851 /*
2852 * Tell the upper layer(s) we support long frames.
2853 */
2854 ifp->if_hdrlen = sizeof(struct ether_vlan_header);
2855
2856 ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSOv4 | IFCAP_TSOv6;
2857 ifp->if_capenable = 0;
2858
2859 ec->ec_capabilities |= ETHERCAP_VLAN_HWCSUM;
2860 ec->ec_capabilities |= ETHERCAP_JUMBO_MTU;
2861 ifp->if_capabilities |= IFCAP_LRO;
2862 ec->ec_capabilities |= ETHERCAP_VLAN_HWTAGGING
2863 | ETHERCAP_VLAN_MTU;
2864 ec->ec_capenable = ec->ec_capabilities;
2865
2866 /*
2867 ** Don't turn this on by default, if vlans are
2868 ** created on another pseudo device (eg. lagg)
2869 ** then vlan events are not passed thru, breaking
2870 ** operation, but with HW FILTER off it works. If
2871 ** using vlans directly on the ixgbe driver you can
2872 ** enable this and get full hardware tag filtering.
2873 */
2874 ec->ec_capabilities |= ETHERCAP_VLAN_HWFILTER;
2875
2876 /*
2877 * Specify the media types supported by this adapter and register
2878 * callbacks to update media and link information
2879 */
2880 ifmedia_init(&adapter->media, IFM_IMASK, ixgbe_media_change,
2881 ixgbe_media_status);
2882 ifmedia_add(&adapter->media, IFM_ETHER | adapter->optics, 0, NULL);
2883 ifmedia_set(&adapter->media, IFM_ETHER | adapter->optics);
2884 if (hw->device_id == IXGBE_DEV_ID_82598AT) {
2885 ifmedia_add(&adapter->media,
2886 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2887 ifmedia_add(&adapter->media,
2888 IFM_ETHER | IFM_1000_T, 0, NULL);
2889 }
2890 ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2891 ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2892
2893 return (0);
2894 }
2895
2896 static void
2897 ixgbe_config_link(struct adapter *adapter)
2898 {
2899 struct ixgbe_hw *hw = &adapter->hw;
2900 u32 autoneg, err = 0;
2901 bool sfp, negotiate;
2902
2903 sfp = ixgbe_is_sfp(hw);
2904
2905 if (sfp) {
2906 void *ip;
2907
2908 if (hw->phy.multispeed_fiber) {
2909 hw->mac.ops.setup_sfp(hw);
2910 ixgbe_enable_tx_laser(hw);
2911 ip = adapter->msf_si;
2912 } else {
2913 ip = adapter->mod_si;
2914 }
2915
2916 kpreempt_disable();
2917 softint_schedule(ip);
2918 kpreempt_enable();
2919 } else {
2920 if (hw->mac.ops.check_link)
2921 err = ixgbe_check_link(hw, &adapter->link_speed,
2922 &adapter->link_up, FALSE);
2923 if (err)
2924 goto out;
2925 autoneg = hw->phy.autoneg_advertised;
2926 if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
2927 err = hw->mac.ops.get_link_capabilities(hw,
2928 &autoneg, &negotiate);
2929 else
2930 negotiate = 0;
2931 if (err)
2932 goto out;
2933 if (hw->mac.ops.setup_link)
2934 err = hw->mac.ops.setup_link(hw,
2935 autoneg, adapter->link_up);
2936 }
2937 out:
2938 return;
2939 }
2940
2941 /********************************************************************
2942 * Manage DMA'able memory.
2943 *******************************************************************/
2944
2945 static int
2946 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
2947 struct ixgbe_dma_alloc *dma, const int mapflags)
2948 {
2949 device_t dev = adapter->dev;
2950 int r, rsegs;
2951
2952 r = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */
2953 DBA_ALIGN, 0, /* alignment, bounds */
2954 size, /* maxsize */
2955 1, /* nsegments */
2956 size, /* maxsegsize */
2957 BUS_DMA_ALLOCNOW, /* flags */
2958 &dma->dma_tag);
2959 if (r != 0) {
2960 aprint_error_dev(dev,
2961 "%s: ixgbe_dma_tag_create failed; error %d\n", __func__, r);
2962 goto fail_0;
2963 }
2964
2965 r = bus_dmamem_alloc(dma->dma_tag->dt_dmat,
2966 size,
2967 dma->dma_tag->dt_alignment,
2968 dma->dma_tag->dt_boundary,
2969 &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
2970 if (r != 0) {
2971 aprint_error_dev(dev,
2972 "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
2973 goto fail_1;
2974 }
2975
2976 r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
2977 size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
2978 if (r != 0) {
2979 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2980 __func__, r);
2981 goto fail_2;
2982 }
2983
2984 r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
2985 if (r != 0) {
2986 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2987 __func__, r);
2988 goto fail_3;
2989 }
2990
2991 r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map, dma->dma_vaddr,
2992 size,
2993 NULL,
2994 mapflags | BUS_DMA_NOWAIT);
2995 if (r != 0) {
2996 aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
2997 __func__, r);
2998 goto fail_4;
2999 }
3000 dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
3001 dma->dma_size = size;
3002 return 0;
3003 fail_4:
3004 ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
3005 fail_3:
3006 bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
3007 fail_2:
3008 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
3009 fail_1:
3010 ixgbe_dma_tag_destroy(dma->dma_tag);
3011 fail_0:
3012 return r;
3013 }
3014
3015 static void
3016 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
3017 {
3018 bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
3019 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3020 ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
3021 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
3022 ixgbe_dma_tag_destroy(dma->dma_tag);
3023 }
3024
3025
3026 /*********************************************************************
3027 *
3028 * Allocate memory for the transmit and receive rings, and then
3029 * the descriptors associated with each, called only once at attach.
3030 *
3031 **********************************************************************/
3032 static int
3033 ixgbe_allocate_queues(struct adapter *adapter)
3034 {
3035 device_t dev = adapter->dev;
3036 struct ix_queue *que;
3037 struct tx_ring *txr;
3038 struct rx_ring *rxr;
3039 int rsize, tsize, error = IXGBE_SUCCESS;
3040 int txconf = 0, rxconf = 0;
3041
3042 /* First allocate the top level queue structs */
3043 if (!(adapter->queues =
3044 (struct ix_queue *) malloc(sizeof(struct ix_queue) *
3045 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3046 aprint_error_dev(dev, "Unable to allocate queue memory\n");
3047 error = ENOMEM;
3048 goto fail;
3049 }
3050
3051 /* First allocate the TX ring struct memory */
3052 if (!(adapter->tx_rings =
3053 (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3054 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3055 aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
3056 error = ENOMEM;
3057 goto tx_fail;
3058 }
3059
3060 /* Next allocate the RX */
3061 if (!(adapter->rx_rings =
3062 (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3063 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3064 aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
3065 error = ENOMEM;
3066 goto rx_fail;
3067 }
3068
3069 /* For the ring itself */
3070 tsize = roundup2(adapter->num_tx_desc *
3071 sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
3072
3073 /*
3074 * Now set up the TX queues, txconf is needed to handle the
3075 * possibility that things fail midcourse and we need to
3076 * undo memory gracefully
3077 */
3078 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3079 /* Set up some basics */
3080 txr = &adapter->tx_rings[i];
3081 txr->adapter = adapter;
3082 txr->me = i;
3083 txr->num_desc = adapter->num_tx_desc;
3084
3085 /* Initialize the TX side lock */
3086 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3087 device_xname(dev), txr->me);
3088 mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
3089
3090 if (ixgbe_dma_malloc(adapter, tsize,
3091 &txr->txdma, BUS_DMA_NOWAIT)) {
3092 aprint_error_dev(dev,
3093 "Unable to allocate TX Descriptor memory\n");
3094 error = ENOMEM;
3095 goto err_tx_desc;
3096 }
3097 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
3098 bzero((void *)txr->tx_base, tsize);
3099
3100 /* Now allocate transmit buffers for the ring */
3101 if (ixgbe_allocate_transmit_buffers(txr)) {
3102 aprint_error_dev(dev,
3103 "Critical Failure setting up transmit buffers\n");
3104 error = ENOMEM;
3105 goto err_tx_desc;
3106 }
3107 #ifndef IXGBE_LEGACY_TX
3108 /* Allocate a buf ring */
3109 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
3110 M_WAITOK, &txr->tx_mtx);
3111 if (txr->br == NULL) {
3112 aprint_error_dev(dev,
3113 "Critical Failure setting up buf ring\n");
3114 error = ENOMEM;
3115 goto err_tx_desc;
3116 }
3117 #endif
3118 }
3119
3120 /*
3121 * Next the RX queues...
3122 */
3123 rsize = roundup2(adapter->num_rx_desc *
3124 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
3125 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3126 rxr = &adapter->rx_rings[i];
3127 /* Set up some basics */
3128 rxr->adapter = adapter;
3129 rxr->me = i;
3130 rxr->num_desc = adapter->num_rx_desc;
3131
3132 /* Initialize the RX side lock */
3133 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3134 device_xname(dev), rxr->me);
3135 mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
3136
3137 if (ixgbe_dma_malloc(adapter, rsize,
3138 &rxr->rxdma, BUS_DMA_NOWAIT)) {
3139 aprint_error_dev(dev,
3140 "Unable to allocate RxDescriptor memory\n");
3141 error = ENOMEM;
3142 goto err_rx_desc;
3143 }
3144 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3145 bzero((void *)rxr->rx_base, rsize);
3146
3147 /* Allocate receive buffers for the ring*/
3148 if (ixgbe_allocate_receive_buffers(rxr)) {
3149 aprint_error_dev(dev,
3150 "Critical Failure setting up receive buffers\n");
3151 error = ENOMEM;
3152 goto err_rx_desc;
3153 }
3154 }
3155
3156 /*
3157 ** Finally set up the queue holding structs
3158 */
3159 for (int i = 0; i < adapter->num_queues; i++) {
3160 que = &adapter->queues[i];
3161 que->adapter = adapter;
3162 que->txr = &adapter->tx_rings[i];
3163 que->rxr = &adapter->rx_rings[i];
3164 }
3165
3166 return (0);
3167
3168 err_rx_desc:
3169 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3170 ixgbe_dma_free(adapter, &rxr->rxdma);
3171 err_tx_desc:
3172 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3173 ixgbe_dma_free(adapter, &txr->txdma);
3174 free(adapter->rx_rings, M_DEVBUF);
3175 rx_fail:
3176 free(adapter->tx_rings, M_DEVBUF);
3177 tx_fail:
3178 free(adapter->queues, M_DEVBUF);
3179 fail:
3180 return (error);
3181 }
3182
3183 /*********************************************************************
3184 *
3185 * Allocate memory for tx_buffer structures. The tx_buffer stores all
3186 * the information needed to transmit a packet on the wire. This is
3187 * called only once at attach, setup is done every reset.
3188 *
3189 **********************************************************************/
3190 static int
3191 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
3192 {
3193 struct adapter *adapter = txr->adapter;
3194 device_t dev = adapter->dev;
3195 struct ixgbe_tx_buf *txbuf;
3196 int error, i;
3197
3198 /*
3199 * Setup DMA descriptor areas.
3200 */
3201 if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */
3202 1, 0, /* alignment, bounds */
3203 IXGBE_TSO_SIZE, /* maxsize */
3204 adapter->num_segs, /* nsegments */
3205 PAGE_SIZE, /* maxsegsize */
3206 0, /* flags */
3207 &txr->txtag))) {
3208 aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
3209 goto fail;
3210 }
3211
3212 if (!(txr->tx_buffers =
3213 (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
3214 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3215 aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
3216 error = ENOMEM;
3217 goto fail;
3218 }
3219
3220 /* Create the descriptor buffer dma maps */
3221 txbuf = txr->tx_buffers;
3222 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3223 error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
3224 if (error != 0) {
3225 aprint_error_dev(dev,
3226 "Unable to create TX DMA map (%d)\n", error);
3227 goto fail;
3228 }
3229 }
3230
3231 return 0;
3232 fail:
3233 /* We free all, it handles case where we are in the middle */
3234 ixgbe_free_transmit_structures(adapter);
3235 return (error);
3236 }
3237
3238 /*********************************************************************
3239 *
3240 * Initialize a transmit ring.
3241 *
3242 **********************************************************************/
3243 static void
3244 ixgbe_setup_transmit_ring(struct tx_ring *txr)
3245 {
3246 struct adapter *adapter = txr->adapter;
3247 struct ixgbe_tx_buf *txbuf;
3248 int i;
3249 #ifdef DEV_NETMAP
3250 struct netmap_adapter *na = NA(adapter->ifp);
3251 struct netmap_slot *slot;
3252 #endif /* DEV_NETMAP */
3253
3254 /* Clear the old ring contents */
3255 IXGBE_TX_LOCK(txr);
3256 #ifdef DEV_NETMAP
3257 /*
3258 * (under lock): if in netmap mode, do some consistency
3259 * checks and set slot to entry 0 of the netmap ring.
3260 */
3261 slot = netmap_reset(na, NR_TX, txr->me, 0);
3262 #endif /* DEV_NETMAP */
3263 bzero((void *)txr->tx_base,
3264 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
3265 /* Reset indices */
3266 txr->next_avail_desc = 0;
3267 txr->next_to_clean = 0;
3268
3269 /* Free any existing tx buffers. */
3270 txbuf = txr->tx_buffers;
3271 for (i = 0; i < txr->num_desc; i++, txbuf++) {
3272 if (txbuf->m_head != NULL) {
3273 bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
3274 0, txbuf->m_head->m_pkthdr.len,
3275 BUS_DMASYNC_POSTWRITE);
3276 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
3277 m_freem(txbuf->m_head);
3278 txbuf->m_head = NULL;
3279 }
3280 #ifdef DEV_NETMAP
3281 /*
3282 * In netmap mode, set the map for the packet buffer.
3283 * NOTE: Some drivers (not this one) also need to set
3284 * the physical buffer address in the NIC ring.
3285 * Slots in the netmap ring (indexed by "si") are
3286 * kring->nkr_hwofs positions "ahead" wrt the
3287 * corresponding slot in the NIC ring. In some drivers
3288 * (not here) nkr_hwofs can be negative. Function
3289 * netmap_idx_n2k() handles wraparounds properly.
3290 */
3291 if (slot) {
3292 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3293 netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
3294 }
3295 #endif /* DEV_NETMAP */
3296 /* Clear the EOP descriptor pointer */
3297 txbuf->eop = NULL;
3298 }
3299
3300 #ifdef IXGBE_FDIR
3301 /* Set the rate at which we sample packets */
3302 if (adapter->hw.mac.type != ixgbe_mac_82598EB)
3303 txr->atr_sample = atr_sample_rate;
3304 #endif
3305
3306 /* Set number of descriptors available */
3307 txr->tx_avail = adapter->num_tx_desc;
3308
3309 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3310 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3311 IXGBE_TX_UNLOCK(txr);
3312 }
3313
3314 /*********************************************************************
3315 *
3316 * Initialize all transmit rings.
3317 *
3318 **********************************************************************/
3319 static int
3320 ixgbe_setup_transmit_structures(struct adapter *adapter)
3321 {
3322 struct tx_ring *txr = adapter->tx_rings;
3323
3324 for (int i = 0; i < adapter->num_queues; i++, txr++)
3325 ixgbe_setup_transmit_ring(txr);
3326
3327 return (0);
3328 }
3329
3330 /*********************************************************************
3331 *
3332 * Enable transmit unit.
3333 *
3334 **********************************************************************/
3335 static void
3336 ixgbe_initialize_transmit_units(struct adapter *adapter)
3337 {
3338 struct tx_ring *txr = adapter->tx_rings;
3339 struct ixgbe_hw *hw = &adapter->hw;
3340
3341 /* Setup the Base and Length of the Tx Descriptor Ring */
3342
3343 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3344 u64 tdba = txr->txdma.dma_paddr;
3345 u32 txctrl;
3346
3347 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i),
3348 (tdba & 0x00000000ffffffffULL));
3349 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (tdba >> 32));
3350 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
3351 adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc));
3352
3353 /* Setup the HW Tx Head and Tail descriptor pointers */
3354 IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
3355 IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
3356
3357 /* Setup Transmit Descriptor Cmd Settings */
3358 txr->txd_cmd = IXGBE_TXD_CMD_IFCS;
3359 txr->queue_status = IXGBE_QUEUE_IDLE;
3360
3361 /* Set the processing limit */
3362 txr->process_limit = ixgbe_tx_process_limit;
3363
3364 /* Disable Head Writeback */
3365 switch (hw->mac.type) {
3366 case ixgbe_mac_82598EB:
3367 txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
3368 break;
3369 case ixgbe_mac_82599EB:
3370 case ixgbe_mac_X540:
3371 default:
3372 txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
3373 break;
3374 }
3375 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
3376 switch (hw->mac.type) {
3377 case ixgbe_mac_82598EB:
3378 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
3379 break;
3380 case ixgbe_mac_82599EB:
3381 case ixgbe_mac_X540:
3382 default:
3383 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
3384 break;
3385 }
3386
3387 }
3388
3389 if (hw->mac.type != ixgbe_mac_82598EB) {
3390 u32 dmatxctl, rttdcs;
3391 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
3392 dmatxctl |= IXGBE_DMATXCTL_TE;
3393 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
3394 /* Disable arbiter to set MTQC */
3395 rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3396 rttdcs |= IXGBE_RTTDCS_ARBDIS;
3397 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3398 IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
3399 rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
3400 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3401 }
3402
3403 return;
3404 }
3405
3406 /*********************************************************************
3407 *
3408 * Free all transmit rings.
3409 *
3410 **********************************************************************/
3411 static void
3412 ixgbe_free_transmit_structures(struct adapter *adapter)
3413 {
3414 struct tx_ring *txr = adapter->tx_rings;
3415
3416 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3417 ixgbe_free_transmit_buffers(txr);
3418 ixgbe_dma_free(adapter, &txr->txdma);
3419 IXGBE_TX_LOCK_DESTROY(txr);
3420 }
3421 free(adapter->tx_rings, M_DEVBUF);
3422 }
3423
3424 /*********************************************************************
3425 *
3426 * Free transmit ring related data structures.
3427 *
3428 **********************************************************************/
3429 static void
3430 ixgbe_free_transmit_buffers(struct tx_ring *txr)
3431 {
3432 struct adapter *adapter = txr->adapter;
3433 struct ixgbe_tx_buf *tx_buffer;
3434 int i;
3435
3436 INIT_DEBUGOUT("free_transmit_ring: begin");
3437
3438 if (txr->tx_buffers == NULL)
3439 return;
3440
3441 tx_buffer = txr->tx_buffers;
3442 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3443 if (tx_buffer->m_head != NULL) {
3444 bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
3445 0, tx_buffer->m_head->m_pkthdr.len,
3446 BUS_DMASYNC_POSTWRITE);
3447 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
3448 m_freem(tx_buffer->m_head);
3449 tx_buffer->m_head = NULL;
3450 if (tx_buffer->map != NULL) {
3451 ixgbe_dmamap_destroy(txr->txtag,
3452 tx_buffer->map);
3453 tx_buffer->map = NULL;
3454 }
3455 } else if (tx_buffer->map != NULL) {
3456 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
3457 ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
3458 tx_buffer->map = NULL;
3459 }
3460 }
3461 #ifndef IXGBE_LEGACY_TX
3462 if (txr->br != NULL)
3463 buf_ring_free(txr->br, M_DEVBUF);
3464 #endif
3465 if (txr->tx_buffers != NULL) {
3466 free(txr->tx_buffers, M_DEVBUF);
3467 txr->tx_buffers = NULL;
3468 }
3469 if (txr->txtag != NULL) {
3470 ixgbe_dma_tag_destroy(txr->txtag);
3471 txr->txtag = NULL;
3472 }
3473 return;
3474 }
3475
3476 /*********************************************************************
3477 *
3478 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
3479 *
3480 **********************************************************************/
3481
3482 static int
3483 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
3484 u32 *cmd_type_len, u32 *olinfo_status)
3485 {
3486 struct m_tag *mtag;
3487 struct adapter *adapter = txr->adapter;
3488 struct ethercom *ec = &adapter->osdep.ec;
3489 struct ixgbe_adv_tx_context_desc *TXD;
3490 struct ether_vlan_header *eh;
3491 struct ip ip;
3492 struct ip6_hdr ip6;
3493 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3494 int ehdrlen, ip_hlen = 0;
3495 u16 etype;
3496 u8 ipproto __diagused = 0;
3497 int offload = TRUE;
3498 int ctxd = txr->next_avail_desc;
3499 u16 vtag = 0;
3500
3501 /* First check if TSO is to be used */
3502 if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4|M_CSUM_TSOv6))
3503 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
3504
3505 if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
3506 offload = FALSE;
3507
3508 /* Indicate the whole packet as payload when not doing TSO */
3509 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
3510
3511 /* Now ready a context descriptor */
3512 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
3513
3514 /*
3515 ** In advanced descriptors the vlan tag must
3516 ** be placed into the context descriptor. Hence
3517 ** we need to make one even if not doing offloads.
3518 */
3519 if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
3520 vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
3521 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
3522 } else if (offload == FALSE) /* ... no offload to do */
3523 return 0;
3524
3525 /*
3526 * Determine where frame payload starts.
3527 * Jump over vlan headers if already present,
3528 * helpful for QinQ too.
3529 */
3530 KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
3531 eh = mtod(mp, struct ether_vlan_header *);
3532 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3533 KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
3534 etype = ntohs(eh->evl_proto);
3535 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3536 } else {
3537 etype = ntohs(eh->evl_encap_proto);
3538 ehdrlen = ETHER_HDR_LEN;
3539 }
3540
3541 /* Set the ether header length */
3542 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
3543
3544 switch (etype) {
3545 case ETHERTYPE_IP:
3546 m_copydata(mp, ehdrlen, sizeof(ip), &ip);
3547 ip_hlen = ip.ip_hl << 2;
3548 ipproto = ip.ip_p;
3549 #if 0
3550 ip.ip_sum = 0;
3551 m_copyback(mp, ehdrlen, sizeof(ip), &ip);
3552 #else
3553 KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
3554 ip.ip_sum == 0);
3555 #endif
3556 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
3557 break;
3558 case ETHERTYPE_IPV6:
3559 m_copydata(mp, ehdrlen, sizeof(ip6), &ip6);
3560 ip_hlen = sizeof(ip6);
3561 /* XXX-BZ this will go badly in case of ext hdrs. */
3562 ipproto = ip6.ip6_nxt;
3563 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
3564 break;
3565 default:
3566 break;
3567 }
3568
3569 if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
3570 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
3571
3572 vlan_macip_lens |= ip_hlen;
3573 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
3574
3575 if (mp->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_TCPv6)) {
3576 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
3577 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
3578 KASSERT(ipproto == IPPROTO_TCP);
3579 } else if (mp->m_pkthdr.csum_flags & (M_CSUM_UDPv4|M_CSUM_UDPv6)) {
3580 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
3581 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
3582 KASSERT(ipproto == IPPROTO_UDP);
3583 }
3584
3585 /* Now copy bits into descriptor */
3586 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3587 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3588 TXD->seqnum_seed = htole32(0);
3589 TXD->mss_l4len_idx = htole32(0);
3590
3591 /* We've consumed the first desc, adjust counters */
3592 if (++ctxd == txr->num_desc)
3593 ctxd = 0;
3594 txr->next_avail_desc = ctxd;
3595 --txr->tx_avail;
3596
3597 return 0;
3598 }
3599
3600 /**********************************************************************
3601 *
3602 * Setup work for hardware segmentation offload (TSO) on
3603 * adapters using advanced tx descriptors
3604 *
3605 **********************************************************************/
3606 static int
3607 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
3608 u32 *cmd_type_len, u32 *olinfo_status)
3609 {
3610 struct m_tag *mtag;
3611 struct adapter *adapter = txr->adapter;
3612 struct ethercom *ec = &adapter->osdep.ec;
3613 struct ixgbe_adv_tx_context_desc *TXD;
3614 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3615 u32 mss_l4len_idx = 0, paylen;
3616 u16 vtag = 0, eh_type;
3617 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3618 struct ether_vlan_header *eh;
3619 #ifdef INET6
3620 struct ip6_hdr *ip6;
3621 #endif
3622 #ifdef INET
3623 struct ip *ip;
3624 #endif
3625 struct tcphdr *th;
3626
3627
3628 /*
3629 * Determine where frame payload starts.
3630 * Jump over vlan headers if already present
3631 */
3632 eh = mtod(mp, struct ether_vlan_header *);
3633 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3634 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3635 eh_type = eh->evl_proto;
3636 } else {
3637 ehdrlen = ETHER_HDR_LEN;
3638 eh_type = eh->evl_encap_proto;
3639 }
3640
3641 switch (ntohs(eh_type)) {
3642 #ifdef INET6
3643 case ETHERTYPE_IPV6:
3644 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3645 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
3646 if (ip6->ip6_nxt != IPPROTO_TCP)
3647 return (ENXIO);
3648 ip_hlen = sizeof(struct ip6_hdr);
3649 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3650 th = (struct tcphdr *)((char *)ip6 + ip_hlen);
3651 th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
3652 &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
3653 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
3654 break;
3655 #endif
3656 #ifdef INET
3657 case ETHERTYPE_IP:
3658 ip = (struct ip *)(mp->m_data + ehdrlen);
3659 if (ip->ip_p != IPPROTO_TCP)
3660 return (ENXIO);
3661 ip->ip_sum = 0;
3662 ip_hlen = ip->ip_hl << 2;
3663 th = (struct tcphdr *)((char *)ip + ip_hlen);
3664 th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
3665 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3666 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
3667 /* Tell transmit desc to also do IPv4 checksum. */
3668 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
3669 break;
3670 #endif
3671 default:
3672 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3673 __func__, ntohs(eh_type));
3674 break;
3675 }
3676
3677 ctxd = txr->next_avail_desc;
3678 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
3679
3680 tcp_hlen = th->th_off << 2;
3681
3682 /* This is used in the transmit desc in encap */
3683 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3684
3685 /* VLAN MACLEN IPLEN */
3686 if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
3687 vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
3688 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
3689 }
3690
3691 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
3692 vlan_macip_lens |= ip_hlen;
3693 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3694
3695 /* ADV DTYPE TUCMD */
3696 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
3697 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
3698 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3699
3700 /* MSS L4LEN IDX */
3701 mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
3702 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
3703 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3704
3705 TXD->seqnum_seed = htole32(0);
3706
3707 if (++ctxd == txr->num_desc)
3708 ctxd = 0;
3709
3710 txr->tx_avail--;
3711 txr->next_avail_desc = ctxd;
3712 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
3713 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
3714 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
3715 ++txr->tso_tx.ev_count;
3716 return (0);
3717 }
3718
3719 #ifdef IXGBE_FDIR
3720 /*
3721 ** This routine parses packet headers so that Flow
3722 ** Director can make a hashed filter table entry
3723 ** allowing traffic flows to be identified and kept
3724 ** on the same cpu. This would be a performance
3725 ** hit, but we only do it at IXGBE_FDIR_RATE of
3726 ** packets.
3727 */
3728 static void
3729 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
3730 {
3731 struct adapter *adapter = txr->adapter;
3732 struct ix_queue *que;
3733 struct ip *ip;
3734 struct tcphdr *th;
3735 struct udphdr *uh;
3736 struct ether_vlan_header *eh;
3737 union ixgbe_atr_hash_dword input = {.dword = 0};
3738 union ixgbe_atr_hash_dword common = {.dword = 0};
3739 int ehdrlen, ip_hlen;
3740 u16 etype;
3741
3742 eh = mtod(mp, struct ether_vlan_header *);
3743 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3744 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3745 etype = eh->evl_proto;
3746 } else {
3747 ehdrlen = ETHER_HDR_LEN;
3748 etype = eh->evl_encap_proto;
3749 }
3750
3751 /* Only handling IPv4 */
3752 if (etype != htons(ETHERTYPE_IP))
3753 return;
3754
3755 ip = (struct ip *)(mp->m_data + ehdrlen);
3756 ip_hlen = ip->ip_hl << 2;
3757
3758 /* check if we're UDP or TCP */
3759 switch (ip->ip_p) {
3760 case IPPROTO_TCP:
3761 th = (struct tcphdr *)((char *)ip + ip_hlen);
3762 /* src and dst are inverted */
3763 common.port.dst ^= th->th_sport;
3764 common.port.src ^= th->th_dport;
3765 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
3766 break;
3767 case IPPROTO_UDP:
3768 uh = (struct udphdr *)((char *)ip + ip_hlen);
3769 /* src and dst are inverted */
3770 common.port.dst ^= uh->uh_sport;
3771 common.port.src ^= uh->uh_dport;
3772 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
3773 break;
3774 default:
3775 return;
3776 }
3777
3778 input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
3779 if (mp->m_pkthdr.ether_vtag)
3780 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
3781 else
3782 common.flex_bytes ^= etype;
3783 common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
3784
3785 que = &adapter->queues[txr->me];
3786 /*
3787 ** This assumes the Rx queue and Tx
3788 ** queue are bound to the same CPU
3789 */
3790 ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
3791 input, common, que->msix);
3792 }
3793 #endif /* IXGBE_FDIR */
3794
3795 /**********************************************************************
3796 *
3797 * Examine each tx_buffer in the used queue. If the hardware is done
3798 * processing the packet then free associated resources. The
3799 * tx_buffer is put back on the free queue.
3800 *
3801 **********************************************************************/
3802 static bool
3803 ixgbe_txeof(struct tx_ring *txr)
3804 {
3805 struct adapter *adapter = txr->adapter;
3806 struct ifnet *ifp = adapter->ifp;
3807 u32 work, processed = 0;
3808 u16 limit = txr->process_limit;
3809 struct ixgbe_tx_buf *buf;
3810 union ixgbe_adv_tx_desc *txd;
3811 struct timeval now, elapsed;
3812
3813 KASSERT(mutex_owned(&txr->tx_mtx));
3814
3815 #ifdef DEV_NETMAP
3816 if (ifp->if_capenable & IFCAP_NETMAP) {
3817 struct netmap_adapter *na = NA(ifp);
3818 struct netmap_kring *kring = &na->tx_rings[txr->me];
3819 txd = txr->tx_base;
3820 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3821 BUS_DMASYNC_POSTREAD);
3822 /*
3823 * In netmap mode, all the work is done in the context
3824 * of the client thread. Interrupt handlers only wake up
3825 * clients, which may be sleeping on individual rings
3826 * or on a global resource for all rings.
3827 * To implement tx interrupt mitigation, we wake up the client
3828 * thread roughly every half ring, even if the NIC interrupts
3829 * more frequently. This is implemented as follows:
3830 * - ixgbe_txsync() sets kring->nr_kflags with the index of
3831 * the slot that should wake up the thread (nkr_num_slots
3832 * means the user thread should not be woken up);
3833 * - the driver ignores tx interrupts unless netmap_mitigate=0
3834 * or the slot has the DD bit set.
3835 *
3836 * When the driver has separate locks, we need to
3837 * release and re-acquire txlock to avoid deadlocks.
3838 * XXX see if we can find a better way.
3839 */
3840 if (!netmap_mitigate ||
3841 (kring->nr_kflags < kring->nkr_num_slots &&
3842 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
3843 netmap_tx_irq(ifp, txr->me |
3844 (NETMAP_LOCKED_ENTER|NETMAP_LOCKED_EXIT));
3845 }
3846 return FALSE;
3847 }
3848 #endif /* DEV_NETMAP */
3849
3850 if (txr->tx_avail == txr->num_desc) {
3851 txr->queue_status = IXGBE_QUEUE_IDLE;
3852 return false;
3853 }
3854
3855 /* Get work starting point */
3856 work = txr->next_to_clean;
3857 buf = &txr->tx_buffers[work];
3858 txd = &txr->tx_base[work];
3859 work -= txr->num_desc; /* The distance to ring end */
3860 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3861 BUS_DMASYNC_POSTREAD);
3862 do {
3863 union ixgbe_adv_tx_desc *eop= buf->eop;
3864 if (eop == NULL) /* No work */
3865 break;
3866
3867 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
3868 break; /* I/O not complete */
3869
3870 if (buf->m_head) {
3871 txr->bytes +=
3872 buf->m_head->m_pkthdr.len;
3873 bus_dmamap_sync(txr->txtag->dt_dmat,
3874 buf->map,
3875 0, buf->m_head->m_pkthdr.len,
3876 BUS_DMASYNC_POSTWRITE);
3877 ixgbe_dmamap_unload(txr->txtag,
3878 buf->map);
3879 m_freem(buf->m_head);
3880 buf->m_head = NULL;
3881 /*
3882 * NetBSD: Don't override buf->map with NULL here.
3883 * It'll panic when a ring runs one lap around.
3884 */
3885 }
3886 buf->eop = NULL;
3887 ++txr->tx_avail;
3888
3889 /* We clean the range if multi segment */
3890 while (txd != eop) {
3891 ++txd;
3892 ++buf;
3893 ++work;
3894 /* wrap the ring? */
3895 if (__predict_false(!work)) {
3896 work -= txr->num_desc;
3897 buf = txr->tx_buffers;
3898 txd = txr->tx_base;
3899 }
3900 if (buf->m_head) {
3901 txr->bytes +=
3902 buf->m_head->m_pkthdr.len;
3903 bus_dmamap_sync(txr->txtag->dt_dmat,
3904 buf->map,
3905 0, buf->m_head->m_pkthdr.len,
3906 BUS_DMASYNC_POSTWRITE);
3907 ixgbe_dmamap_unload(txr->txtag,
3908 buf->map);
3909 m_freem(buf->m_head);
3910 buf->m_head = NULL;
3911 /*
3912 * NetBSD: Don't override buf->map with NULL
3913 * here. It'll panic when a ring runs one lap
3914 * around.
3915 */
3916 }
3917 ++txr->tx_avail;
3918 buf->eop = NULL;
3919
3920 }
3921 ++txr->packets;
3922 ++processed;
3923 ++ifp->if_opackets;
3924 getmicrotime(&txr->watchdog_time);
3925
3926 /* Try the next packet */
3927 ++txd;
3928 ++buf;
3929 ++work;
3930 /* reset with a wrap */
3931 if (__predict_false(!work)) {
3932 work -= txr->num_desc;
3933 buf = txr->tx_buffers;
3934 txd = txr->tx_base;
3935 }
3936 prefetch(txd);
3937 } while (__predict_true(--limit));
3938
3939 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3940 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3941
3942 work += txr->num_desc;
3943 txr->next_to_clean = work;
3944
3945 /*
3946 ** Watchdog calculation, we know there's
3947 ** work outstanding or the first return
3948 ** would have been taken, so none processed
3949 ** for too long indicates a hang.
3950 */
3951 getmicrotime(&now);
3952 timersub(&now, &txr->watchdog_time, &elapsed);
3953 if (!processed && tvtohz(&elapsed) > IXGBE_WATCHDOG)
3954 txr->queue_status = IXGBE_QUEUE_HUNG;
3955
3956 if (txr->tx_avail == txr->num_desc) {
3957 txr->queue_status = IXGBE_QUEUE_IDLE;
3958 return false;
3959 }
3960
3961 return true;
3962 }
3963
3964 /*********************************************************************
3965 *
3966 * Refresh mbuf buffers for RX descriptor rings
3967 * - now keeps its own state so discards due to resource
3968 * exhaustion are unnecessary, if an mbuf cannot be obtained
3969 * it just returns, keeping its placeholder, thus it can simply
3970 * be recalled to try again.
3971 *
3972 **********************************************************************/
3973 static void
3974 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
3975 {
3976 struct adapter *adapter = rxr->adapter;
3977 struct ixgbe_rx_buf *rxbuf;
3978 struct mbuf *mp;
3979 int i, j, error;
3980 bool refreshed = false;
3981
3982 i = j = rxr->next_to_refresh;
3983 /* Control the loop with one beyond */
3984 if (++j == rxr->num_desc)
3985 j = 0;
3986
3987 while (j != limit) {
3988 rxbuf = &rxr->rx_buffers[i];
3989 if (rxbuf->buf == NULL) {
3990 mp = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
3991 MT_DATA, M_PKTHDR, rxr->mbuf_sz);
3992 if (mp == NULL) {
3993 rxr->no_jmbuf.ev_count++;
3994 goto update;
3995 }
3996 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3997 m_adj(mp, ETHER_ALIGN);
3998 } else
3999 mp = rxbuf->buf;
4000
4001 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
4002 /* If we're dealing with an mbuf that was copied rather
4003 * than replaced, there's no need to go through busdma.
4004 */
4005 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
4006 /* Get the memory mapping */
4007 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
4008 rxbuf->pmap, mp, BUS_DMA_NOWAIT);
4009 if (error != 0) {
4010 printf("Refresh mbufs: payload dmamap load"
4011 " failure - %d\n", error);
4012 m_free(mp);
4013 rxbuf->buf = NULL;
4014 goto update;
4015 }
4016 rxbuf->buf = mp;
4017 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
4018 0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
4019 rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
4020 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
4021 } else {
4022 rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
4023 rxbuf->flags &= ~IXGBE_RX_COPY;
4024 }
4025
4026 refreshed = true;
4027 /* Next is precalculated */
4028 i = j;
4029 rxr->next_to_refresh = i;
4030 if (++j == rxr->num_desc)
4031 j = 0;
4032 }
4033 update:
4034 if (refreshed) /* Update hardware tail index */
4035 IXGBE_WRITE_REG(&adapter->hw,
4036 IXGBE_RDT(rxr->me), rxr->next_to_refresh);
4037 return;
4038 }
4039
4040 /*********************************************************************
4041 *
4042 * Allocate memory for rx_buffer structures. Since we use one
4043 * rx_buffer per received packet, the maximum number of rx_buffer's
4044 * that we'll need is equal to the number of receive descriptors
4045 * that we've allocated.
4046 *
4047 **********************************************************************/
4048 static int
4049 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
4050 {
4051 struct adapter *adapter = rxr->adapter;
4052 device_t dev = adapter->dev;
4053 struct ixgbe_rx_buf *rxbuf;
4054 int i, bsize, error;
4055
4056 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
4057 if (!(rxr->rx_buffers =
4058 (struct ixgbe_rx_buf *) malloc(bsize,
4059 M_DEVBUF, M_NOWAIT | M_ZERO))) {
4060 aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
4061 error = ENOMEM;
4062 goto fail;
4063 }
4064
4065 if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */
4066 1, 0, /* alignment, bounds */
4067 MJUM16BYTES, /* maxsize */
4068 1, /* nsegments */
4069 MJUM16BYTES, /* maxsegsize */
4070 0, /* flags */
4071 &rxr->ptag))) {
4072 aprint_error_dev(dev, "Unable to create RX DMA tag\n");
4073 goto fail;
4074 }
4075
4076 for (i = 0; i < rxr->num_desc; i++, rxbuf++) {
4077 rxbuf = &rxr->rx_buffers[i];
4078 error = ixgbe_dmamap_create(rxr->ptag,
4079 BUS_DMA_NOWAIT, &rxbuf->pmap);
4080 if (error) {
4081 aprint_error_dev(dev, "Unable to create RX dma map\n");
4082 goto fail;
4083 }
4084 }
4085
4086 return (0);
4087
4088 fail:
4089 /* Frees all, but can handle partial completion */
4090 ixgbe_free_receive_structures(adapter);
4091 return (error);
4092 }
4093
4094 /*
4095 ** Used to detect a descriptor that has
4096 ** been merged by Hardware RSC.
4097 */
4098 static inline u32
4099 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
4100 {
4101 return (le32toh(rx->wb.lower.lo_dword.data) &
4102 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
4103 }
4104
4105 /*********************************************************************
4106 *
4107 * Initialize Hardware RSC (LRO) feature on 82599
4108 * for an RX ring, this is toggled by the LRO capability
4109 * even though it is transparent to the stack.
4110 *
4111 * NOTE: since this HW feature only works with IPV4 and
4112 * our testing has shown soft LRO to be as effective
4113 * I have decided to disable this by default.
4114 *
4115 **********************************************************************/
4116 static void
4117 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
4118 {
4119 struct adapter *adapter = rxr->adapter;
4120 struct ixgbe_hw *hw = &adapter->hw;
4121 u32 rscctrl, rdrxctl;
4122
4123 /* If turning LRO/RSC off we need to disable it */
4124 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
4125 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
4126 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
4127 return;
4128 }
4129
4130 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4131 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
4132 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
4133 if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
4134 #endif /* DEV_NETMAP */
4135 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
4136 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4137 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4138
4139 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
4140 rscctrl |= IXGBE_RSCCTL_RSCEN;
4141 /*
4142 ** Limit the total number of descriptors that
4143 ** can be combined, so it does not exceed 64K
4144 */
4145 if (rxr->mbuf_sz == MCLBYTES)
4146 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
4147 else if (rxr->mbuf_sz == MJUMPAGESIZE)
4148 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
4149 else if (rxr->mbuf_sz == MJUM9BYTES)
4150 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
4151 else /* Using 16K cluster */
4152 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
4153
4154 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
4155
4156 /* Enable TCP header recognition */
4157 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
4158 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
4159 IXGBE_PSRTYPE_TCPHDR));
4160
4161 /* Disable RSC for ACK packets */
4162 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
4163 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
4164
4165 rxr->hw_rsc = TRUE;
4166 }
4167
4168
4169 static void
4170 ixgbe_free_receive_ring(struct rx_ring *rxr)
4171 {
4172 struct ixgbe_rx_buf *rxbuf;
4173 int i;
4174
4175 for (i = 0; i < rxr->num_desc; i++) {
4176 rxbuf = &rxr->rx_buffers[i];
4177 if (rxbuf->buf != NULL) {
4178 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
4179 0, rxbuf->buf->m_pkthdr.len,
4180 BUS_DMASYNC_POSTREAD);
4181 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
4182 rxbuf->buf->m_flags |= M_PKTHDR;
4183 m_freem(rxbuf->buf);
4184 rxbuf->buf = NULL;
4185 }
4186 }
4187 }
4188
4189
4190 /*********************************************************************
4191 *
4192 * Initialize a receive ring and its buffers.
4193 *
4194 **********************************************************************/
4195 static int
4196 ixgbe_setup_receive_ring(struct rx_ring *rxr)
4197 {
4198 struct adapter *adapter;
4199 struct ixgbe_rx_buf *rxbuf;
4200 #ifdef LRO
4201 struct ifnet *ifp;
4202 struct lro_ctrl *lro = &rxr->lro;
4203 #endif /* LRO */
4204 int rsize, error = 0;
4205 #ifdef DEV_NETMAP
4206 struct netmap_adapter *na = NA(rxr->adapter->ifp);
4207 struct netmap_slot *slot;
4208 #endif /* DEV_NETMAP */
4209
4210 adapter = rxr->adapter;
4211 #ifdef LRO
4212 ifp = adapter->ifp;
4213 #endif /* LRO */
4214
4215 /* Clear the ring contents */
4216 IXGBE_RX_LOCK(rxr);
4217 #ifdef DEV_NETMAP
4218 /* same as in ixgbe_setup_transmit_ring() */
4219 slot = netmap_reset(na, NR_RX, rxr->me, 0);
4220 #endif /* DEV_NETMAP */
4221 rsize = roundup2(adapter->num_rx_desc *
4222 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
4223 bzero((void *)rxr->rx_base, rsize);
4224 /* Cache the size */
4225 rxr->mbuf_sz = adapter->rx_mbuf_sz;
4226
4227 /* Free current RX buffer structs and their mbufs */
4228 ixgbe_free_receive_ring(rxr);
4229
4230 IXGBE_RX_UNLOCK(rxr);
4231
4232 /* Now reinitialize our supply of jumbo mbufs. The number
4233 * or size of jumbo mbufs may have changed.
4234 */
4235 ixgbe_jcl_reinit(&adapter->jcl_head, rxr->ptag->dt_dmat,
4236 2 * adapter->num_rx_desc, adapter->rx_mbuf_sz);
4237
4238 IXGBE_RX_LOCK(rxr);
4239
4240 /* Now replenish the mbufs */
4241 for (int j = 0; j != rxr->num_desc; ++j) {
4242 struct mbuf *mp;
4243
4244 rxbuf = &rxr->rx_buffers[j];
4245 #ifdef DEV_NETMAP
4246 /*
4247 * In netmap mode, fill the map and set the buffer
4248 * address in the NIC ring, considering the offset
4249 * between the netmap and NIC rings (see comment in
4250 * ixgbe_setup_transmit_ring() ). No need to allocate
4251 * an mbuf, so end the block with a continue;
4252 */
4253 if (slot) {
4254 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4255 uint64_t paddr;
4256 void *addr;
4257
4258 addr = PNMB(slot + sj, &paddr);
4259 netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
4260 /* Update descriptor */
4261 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4262 continue;
4263 }
4264 #endif /* DEV_NETMAP */
4265 rxbuf->buf = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
4266 MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
4267 if (rxbuf->buf == NULL) {
4268 error = ENOBUFS;
4269 goto fail;
4270 }
4271 mp = rxbuf->buf;
4272 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
4273 /* Get the memory mapping */
4274 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
4275 rxbuf->pmap, mp, BUS_DMA_NOWAIT);
4276 if (error != 0)
4277 goto fail;
4278 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
4279 0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
4280 /* Update descriptor */
4281 rxr->rx_base[j].read.pkt_addr =
4282 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
4283 }
4284
4285
4286 /* Setup our descriptor indices */
4287 rxr->next_to_check = 0;
4288 rxr->next_to_refresh = 0;
4289 rxr->lro_enabled = FALSE;
4290 rxr->rx_copies.ev_count = 0;
4291 rxr->rx_bytes.ev_count = 0;
4292 rxr->discard = FALSE;
4293 rxr->vtag_strip = FALSE;
4294
4295 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4296 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4297
4298 /*
4299 ** Now set up the LRO interface:
4300 */
4301 if (ixgbe_rsc_enable)
4302 ixgbe_setup_hw_rsc(rxr);
4303 #ifdef LRO
4304 else if (ifp->if_capenable & IFCAP_LRO) {
4305 device_t dev = adapter->dev;
4306 int err = tcp_lro_init(lro);
4307 if (err) {
4308 device_printf(dev, "LRO Initialization failed!\n");
4309 goto fail;
4310 }
4311 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
4312 rxr->lro_enabled = TRUE;
4313 lro->ifp = adapter->ifp;
4314 }
4315 #endif /* LRO */
4316
4317 IXGBE_RX_UNLOCK(rxr);
4318 return (0);
4319
4320 fail:
4321 ixgbe_free_receive_ring(rxr);
4322 IXGBE_RX_UNLOCK(rxr);
4323 return (error);
4324 }
4325
4326 /*********************************************************************
4327 *
4328 * Initialize all receive rings.
4329 *
4330 **********************************************************************/
4331 static int
4332 ixgbe_setup_receive_structures(struct adapter *adapter)
4333 {
4334 struct rx_ring *rxr = adapter->rx_rings;
4335 int j;
4336
4337 for (j = 0; j < adapter->num_queues; j++, rxr++)
4338 if (ixgbe_setup_receive_ring(rxr))
4339 goto fail;
4340
4341 return (0);
4342 fail:
4343 /*
4344 * Free RX buffers allocated so far, we will only handle
4345 * the rings that completed, the failing case will have
4346 * cleaned up for itself. 'j' failed, so its the terminus.
4347 */
4348 for (int i = 0; i < j; ++i) {
4349 rxr = &adapter->rx_rings[i];
4350 ixgbe_free_receive_ring(rxr);
4351 }
4352
4353 return (ENOBUFS);
4354 }
4355
4356 /*********************************************************************
4357 *
4358 * Setup receive registers and features.
4359 *
4360 **********************************************************************/
4361 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
4362
4363 #define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
4364
4365 static void
4366 ixgbe_initialize_receive_units(struct adapter *adapter)
4367 {
4368 int i;
4369 struct rx_ring *rxr = adapter->rx_rings;
4370 struct ixgbe_hw *hw = &adapter->hw;
4371 struct ifnet *ifp = adapter->ifp;
4372 u32 bufsz, rxctrl, fctrl, srrctl, rxcsum;
4373 u32 reta, mrqc = 0, hlreg, r[10];
4374
4375
4376 /*
4377 * Make sure receives are disabled while
4378 * setting up the descriptor ring
4379 */
4380 rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4381 IXGBE_WRITE_REG(hw, IXGBE_RXCTRL,
4382 rxctrl & ~IXGBE_RXCTRL_RXEN);
4383
4384 /* Enable broadcasts */
4385 fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4386 fctrl |= IXGBE_FCTRL_BAM;
4387 fctrl |= IXGBE_FCTRL_DPF;
4388 fctrl |= IXGBE_FCTRL_PMCF;
4389 IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4390
4391 /* Set for Jumbo Frames? */
4392 hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4393 if (ifp->if_mtu > ETHERMTU)
4394 hlreg |= IXGBE_HLREG0_JUMBOEN;
4395 else
4396 hlreg &= ~IXGBE_HLREG0_JUMBOEN;
4397 #ifdef DEV_NETMAP
4398 /* crcstrip is conditional in netmap (in RDRXCTL too ?) */
4399 if (ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
4400 hlreg &= ~IXGBE_HLREG0_RXCRCSTRP;
4401 else
4402 hlreg |= IXGBE_HLREG0_RXCRCSTRP;
4403 #endif /* DEV_NETMAP */
4404 IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
4405
4406 bufsz = (adapter->rx_mbuf_sz +
4407 BSIZEPKT_ROUNDUP) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
4408
4409 for (i = 0; i < adapter->num_queues; i++, rxr++) {
4410 u64 rdba = rxr->rxdma.dma_paddr;
4411
4412 /* Setup the Base and Length of the Rx Descriptor Ring */
4413 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i),
4414 (rdba & 0x00000000ffffffffULL));
4415 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (rdba >> 32));
4416 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
4417 adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4418
4419 /* Set up the SRRCTL register */
4420 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
4421 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4422 srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
4423 srrctl |= bufsz;
4424 srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4425 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
4426
4427 /* Setup the HW Rx Head and Tail Descriptor Pointers */
4428 IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
4429 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
4430
4431 /* Set the processing limit */
4432 rxr->process_limit = ixgbe_rx_process_limit;
4433 }
4434
4435 if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
4436 u32 psrtype = IXGBE_PSRTYPE_TCPHDR |
4437 IXGBE_PSRTYPE_UDPHDR |
4438 IXGBE_PSRTYPE_IPV4HDR |
4439 IXGBE_PSRTYPE_IPV6HDR;
4440 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype);
4441 }
4442
4443 rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4444
4445 /* Setup RSS */
4446 if (adapter->num_queues > 1) {
4447 int j;
4448 reta = 0;
4449
4450 /* set up random bits */
4451 cprng_fast(&r, sizeof(r));
4452
4453 /* Set up the redirection table */
4454 for (i = 0, j = 0; i < 128; i++, j++) {
4455 if (j == adapter->num_queues) j = 0;
4456 reta = (reta << 8) | (j * 0x11);
4457 if ((i & 3) == 3)
4458 IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta);
4459 }
4460
4461 /* Now fill our hash function seeds */
4462 for (i = 0; i < 10; i++)
4463 IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), r[i]);
4464
4465 /* Perform hash on these packet types */
4466 mrqc = IXGBE_MRQC_RSSEN
4467 | IXGBE_MRQC_RSS_FIELD_IPV4
4468 | IXGBE_MRQC_RSS_FIELD_IPV4_TCP
4469 | IXGBE_MRQC_RSS_FIELD_IPV4_UDP
4470 | IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP
4471 | IXGBE_MRQC_RSS_FIELD_IPV6_EX
4472 | IXGBE_MRQC_RSS_FIELD_IPV6
4473 | IXGBE_MRQC_RSS_FIELD_IPV6_TCP
4474 | IXGBE_MRQC_RSS_FIELD_IPV6_UDP
4475 | IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
4476 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4477
4478 /* RSS and RX IPP Checksum are mutually exclusive */
4479 rxcsum |= IXGBE_RXCSUM_PCSD;
4480 }
4481
4482 if (ifp->if_capenable & IFCAP_RXCSUM)
4483 rxcsum |= IXGBE_RXCSUM_PCSD;
4484
4485 if (!(rxcsum & IXGBE_RXCSUM_PCSD))
4486 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4487
4488 IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4489
4490 return;
4491 }
4492
4493 /*********************************************************************
4494 *
4495 * Free all receive rings.
4496 *
4497 **********************************************************************/
4498 static void
4499 ixgbe_free_receive_structures(struct adapter *adapter)
4500 {
4501 struct rx_ring *rxr = adapter->rx_rings;
4502
4503 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4504 #ifdef LRO
4505 struct lro_ctrl *lro = &rxr->lro;
4506 #endif /* LRO */
4507 ixgbe_free_receive_buffers(rxr);
4508 #ifdef LRO
4509 /* Free LRO memory */
4510 tcp_lro_free(lro);
4511 #endif /* LRO */
4512 /* Free the ring memory as well */
4513 ixgbe_dma_free(adapter, &rxr->rxdma);
4514 IXGBE_RX_LOCK_DESTROY(rxr);
4515 }
4516
4517 free(adapter->rx_rings, M_DEVBUF);
4518 }
4519
4520
4521 /*********************************************************************
4522 *
4523 * Free receive ring data structures
4524 *
4525 **********************************************************************/
4526 static void
4527 ixgbe_free_receive_buffers(struct rx_ring *rxr)
4528 {
4529 struct adapter *adapter = rxr->adapter;
4530 struct ixgbe_rx_buf *rxbuf;
4531
4532 INIT_DEBUGOUT("free_receive_structures: begin");
4533
4534 /* Cleanup any existing buffers */
4535 if (rxr->rx_buffers != NULL) {
4536 for (int i = 0; i < adapter->num_rx_desc; i++) {
4537 rxbuf = &rxr->rx_buffers[i];
4538 if (rxbuf->buf != NULL) {
4539 bus_dmamap_sync(rxr->ptag->dt_dmat,
4540 rxbuf->pmap, 0, rxbuf->buf->m_pkthdr.len,
4541 BUS_DMASYNC_POSTREAD);
4542 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
4543 rxbuf->buf->m_flags |= M_PKTHDR;
4544 m_freem(rxbuf->buf);
4545 }
4546 rxbuf->buf = NULL;
4547 if (rxbuf->pmap != NULL) {
4548 ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4549 rxbuf->pmap = NULL;
4550 }
4551 }
4552 if (rxr->rx_buffers != NULL) {
4553 free(rxr->rx_buffers, M_DEVBUF);
4554 rxr->rx_buffers = NULL;
4555 }
4556 }
4557
4558 if (rxr->ptag != NULL) {
4559 ixgbe_dma_tag_destroy(rxr->ptag);
4560 rxr->ptag = NULL;
4561 }
4562
4563 return;
4564 }
4565
4566 static __inline void
4567 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4568 {
4569 int s;
4570
4571 #ifdef LRO
4572 struct adapter *adapter = ifp->if_softc;
4573 struct ethercom *ec = &adapter->osdep.ec;
4574
4575 /*
4576 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
4577 * should be computed by hardware. Also it should not have VLAN tag in
4578 * ethernet header. In case of IPv6 we do not yet support ext. hdrs.
4579 */
4580 if (rxr->lro_enabled &&
4581 (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
4582 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
4583 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
4584 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
4585 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
4586 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
4587 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4588 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4589 /*
4590 * Send to the stack if:
4591 ** - LRO not enabled, or
4592 ** - no LRO resources, or
4593 ** - lro enqueue fails
4594 */
4595 if (rxr->lro.lro_cnt != 0)
4596 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4597 return;
4598 }
4599 #endif /* LRO */
4600
4601 IXGBE_RX_UNLOCK(rxr);
4602
4603 s = splnet();
4604 /* Pass this up to any BPF listeners. */
4605 bpf_mtap(ifp, m);
4606 (*ifp->if_input)(ifp, m);
4607 splx(s);
4608
4609 IXGBE_RX_LOCK(rxr);
4610 }
4611
4612 static __inline void
4613 ixgbe_rx_discard(struct rx_ring *rxr, int i)
4614 {
4615 struct ixgbe_rx_buf *rbuf;
4616
4617 rbuf = &rxr->rx_buffers[i];
4618
4619 if (rbuf->fmp != NULL) {/* Partial chain ? */
4620 rbuf->fmp->m_flags |= M_PKTHDR;
4621 m_freem(rbuf->fmp);
4622 rbuf->fmp = NULL;
4623 }
4624
4625 /*
4626 ** With advanced descriptors the writeback
4627 ** clobbers the buffer addrs, so its easier
4628 ** to just free the existing mbufs and take
4629 ** the normal refresh path to get new buffers
4630 ** and mapping.
4631 */
4632 if (rbuf->buf) {
4633 m_free(rbuf->buf);
4634 rbuf->buf = NULL;
4635 }
4636
4637 return;
4638 }
4639
4640
4641 /*********************************************************************
4642 *
4643 * This routine executes in interrupt context. It replenishes
4644 * the mbufs in the descriptor and sends data which has been
4645 * dma'ed into host memory to upper layer.
4646 *
4647 * We loop at most count times if count is > 0, or until done if
4648 * count < 0.
4649 *
4650 * Return TRUE for more work, FALSE for all clean.
4651 *********************************************************************/
4652 static bool
4653 ixgbe_rxeof(struct ix_queue *que)
4654 {
4655 struct adapter *adapter = que->adapter;
4656 struct rx_ring *rxr = que->rxr;
4657 struct ifnet *ifp = adapter->ifp;
4658 #ifdef LRO
4659 struct lro_ctrl *lro = &rxr->lro;
4660 struct lro_entry *queued;
4661 #endif /* LRO */
4662 int i, nextp, processed = 0;
4663 u32 staterr = 0;
4664 u16 count = rxr->process_limit;
4665 union ixgbe_adv_rx_desc *cur;
4666 struct ixgbe_rx_buf *rbuf, *nbuf;
4667
4668 IXGBE_RX_LOCK(rxr);
4669
4670 #ifdef DEV_NETMAP
4671 /* Same as the txeof routine: wakeup clients on intr. */
4672 if (netmap_rx_irq(ifp, rxr->me | NETMAP_LOCKED_ENTER, &processed))
4673 return (FALSE);
4674 #endif /* DEV_NETMAP */
4675 for (i = rxr->next_to_check; count != 0;) {
4676 struct mbuf *sendmp, *mp;
4677 u32 rsc, ptype;
4678 u16 len;
4679 u16 vtag = 0;
4680 bool eop;
4681
4682 /* Sync the ring. */
4683 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4684 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4685
4686 cur = &rxr->rx_base[i];
4687 staterr = le32toh(cur->wb.upper.status_error);
4688
4689 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
4690 break;
4691 if ((ifp->if_flags & IFF_RUNNING) == 0)
4692 break;
4693
4694 count--;
4695 sendmp = NULL;
4696 nbuf = NULL;
4697 rsc = 0;
4698 cur->wb.upper.status_error = 0;
4699 rbuf = &rxr->rx_buffers[i];
4700 mp = rbuf->buf;
4701
4702 len = le16toh(cur->wb.upper.length);
4703 ptype = le32toh(cur->wb.lower.lo_dword.data) &
4704 IXGBE_RXDADV_PKTTYPE_MASK;
4705 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
4706
4707 /* Make sure bad packets are discarded */
4708 if (((staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) ||
4709 (rxr->discard)) {
4710 rxr->rx_discarded.ev_count++;
4711 if (eop)
4712 rxr->discard = FALSE;
4713 else
4714 rxr->discard = TRUE;
4715 ixgbe_rx_discard(rxr, i);
4716 goto next_desc;
4717 }
4718
4719 /*
4720 ** On 82599 which supports a hardware
4721 ** LRO (called HW RSC), packets need
4722 ** not be fragmented across sequential
4723 ** descriptors, rather the next descriptor
4724 ** is indicated in bits of the descriptor.
4725 ** This also means that we might proceses
4726 ** more than one packet at a time, something
4727 ** that has never been true before, it
4728 ** required eliminating global chain pointers
4729 ** in favor of what we are doing here. -jfv
4730 */
4731 if (!eop) {
4732 /*
4733 ** Figure out the next descriptor
4734 ** of this frame.
4735 */
4736 if (rxr->hw_rsc == TRUE) {
4737 rsc = ixgbe_rsc_count(cur);
4738 rxr->rsc_num += (rsc - 1);
4739 }
4740 if (rsc) { /* Get hardware index */
4741 nextp = ((staterr &
4742 IXGBE_RXDADV_NEXTP_MASK) >>
4743 IXGBE_RXDADV_NEXTP_SHIFT);
4744 } else { /* Just sequential */
4745 nextp = i + 1;
4746 if (nextp == adapter->num_rx_desc)
4747 nextp = 0;
4748 }
4749 nbuf = &rxr->rx_buffers[nextp];
4750 prefetch(nbuf);
4751 }
4752 /*
4753 ** Rather than using the fmp/lmp global pointers
4754 ** we now keep the head of a packet chain in the
4755 ** buffer struct and pass this along from one
4756 ** descriptor to the next, until we get EOP.
4757 */
4758 mp->m_len = len;
4759 /*
4760 ** See if there is a stored head
4761 ** that determines what we are
4762 */
4763 sendmp = rbuf->fmp;
4764
4765 if (sendmp != NULL) { /* secondary frag */
4766 rbuf->buf = rbuf->fmp = NULL;
4767 mp->m_flags &= ~M_PKTHDR;
4768 sendmp->m_pkthdr.len += mp->m_len;
4769 } else {
4770 /*
4771 * Optimize. This might be a small packet,
4772 * maybe just a TCP ACK. Do a fast copy that
4773 * is cache aligned into a new mbuf, and
4774 * leave the old mbuf+cluster for re-use.
4775 */
4776 if (eop && len <= IXGBE_RX_COPY_LEN) {
4777 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
4778 if (sendmp != NULL) {
4779 sendmp->m_data +=
4780 IXGBE_RX_COPY_ALIGN;
4781 ixgbe_bcopy(mp->m_data,
4782 sendmp->m_data, len);
4783 sendmp->m_len = len;
4784 rxr->rx_copies.ev_count++;
4785 rbuf->flags |= IXGBE_RX_COPY;
4786 }
4787 }
4788 if (sendmp == NULL) {
4789 rbuf->buf = rbuf->fmp = NULL;
4790 sendmp = mp;
4791 }
4792
4793 /* first desc of a non-ps chain */
4794 sendmp->m_flags |= M_PKTHDR;
4795 sendmp->m_pkthdr.len = mp->m_len;
4796 }
4797 ++processed;
4798 /* Pass the head pointer on */
4799 if (eop == 0) {
4800 nbuf->fmp = sendmp;
4801 sendmp = NULL;
4802 mp->m_next = nbuf->buf;
4803 } else { /* Sending this frame */
4804 sendmp->m_pkthdr.rcvif = ifp;
4805 ifp->if_ipackets++;
4806 rxr->rx_packets.ev_count++;
4807 /* capture data for AIM */
4808 rxr->bytes += sendmp->m_pkthdr.len;
4809 rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
4810 /* Process vlan info */
4811 if ((rxr->vtag_strip) &&
4812 (staterr & IXGBE_RXD_STAT_VP))
4813 vtag = le16toh(cur->wb.upper.vlan);
4814 if (vtag) {
4815 VLAN_INPUT_TAG(ifp, sendmp, vtag,
4816 printf("%s: could not apply VLAN "
4817 "tag", __func__));
4818 }
4819 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
4820 ixgbe_rx_checksum(staterr, sendmp, ptype,
4821 &adapter->stats);
4822 }
4823 #if __FreeBSD_version >= 800000
4824 sendmp->m_pkthdr.flowid = que->msix;
4825 sendmp->m_flags |= M_FLOWID;
4826 #endif
4827 }
4828 next_desc:
4829 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4830 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4831
4832 /* Advance our pointers to the next descriptor. */
4833 if (++i == rxr->num_desc)
4834 i = 0;
4835
4836 /* Now send to the stack or do LRO */
4837 if (sendmp != NULL) {
4838 rxr->next_to_check = i;
4839 ixgbe_rx_input(rxr, ifp, sendmp, ptype);
4840 i = rxr->next_to_check;
4841 }
4842
4843 /* Every 8 descriptors we go to refresh mbufs */
4844 if (processed == 8) {
4845 ixgbe_refresh_mbufs(rxr, i);
4846 processed = 0;
4847 }
4848 }
4849
4850 /* Refresh any remaining buf structs */
4851 if (ixgbe_rx_unrefreshed(rxr))
4852 ixgbe_refresh_mbufs(rxr, i);
4853
4854 rxr->next_to_check = i;
4855
4856 #ifdef LRO
4857 /*
4858 * Flush any outstanding LRO work
4859 */
4860 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4861 SLIST_REMOVE_HEAD(&lro->lro_active, next);
4862 tcp_lro_flush(lro, queued);
4863 }
4864 #endif /* LRO */
4865
4866 IXGBE_RX_UNLOCK(rxr);
4867
4868 /*
4869 ** We still have cleaning to do?
4870 ** Schedule another interrupt if so.
4871 */
4872 if ((staterr & IXGBE_RXD_STAT_DD) != 0) {
4873 ixgbe_rearm_queues(adapter, (u64)(1ULL << que->msix));
4874 return true;
4875 }
4876
4877 return false;
4878 }
4879
4880
4881 /*********************************************************************
4882 *
4883 * Verify that the hardware indicated that the checksum is valid.
4884 * Inform the stack about the status of checksum so that stack
4885 * doesn't spend time verifying the checksum.
4886 *
4887 *********************************************************************/
4888 static void
4889 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
4890 struct ixgbe_hw_stats *stats)
4891 {
4892 u16 status = (u16) staterr;
4893 u8 errors = (u8) (staterr >> 24);
4894 #if 0
4895 bool sctp = FALSE;
4896
4897 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
4898 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
4899 sctp = TRUE;
4900 #endif
4901
4902 if (status & IXGBE_RXD_STAT_IPCS) {
4903 stats->ipcs.ev_count++;
4904 if (!(errors & IXGBE_RXD_ERR_IPE)) {
4905 /* IP Checksum Good */
4906 mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
4907
4908 } else {
4909 stats->ipcs_bad.ev_count++;
4910 mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
4911 }
4912 }
4913 if (status & IXGBE_RXD_STAT_L4CS) {
4914 stats->l4cs.ev_count++;
4915 u16 type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
4916 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
4917 mp->m_pkthdr.csum_flags |= type;
4918 } else {
4919 stats->l4cs_bad.ev_count++;
4920 mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
4921 }
4922 }
4923 return;
4924 }
4925
4926
4927 #if 0 /* XXX Badly need to overhaul vlan(4) on NetBSD. */
4928 /*
4929 ** This routine is run via an vlan config EVENT,
4930 ** it enables us to use the HW Filter table since
4931 ** we can get the vlan id. This just creates the
4932 ** entry in the soft version of the VFTA, init will
4933 ** repopulate the real table.
4934 */
4935 static void
4936 ixgbe_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4937 {
4938 struct adapter *adapter = ifp->if_softc;
4939 u16 index, bit;
4940
4941 if (ifp->if_softc != arg) /* Not our event */
4942 return;
4943
4944 if ((vtag == 0) || (vtag > 4095)) /* Invalid */
4945 return;
4946
4947 IXGBE_CORE_LOCK(adapter);
4948 index = (vtag >> 5) & 0x7F;
4949 bit = vtag & 0x1F;
4950 adapter->shadow_vfta[index] |= (1 << bit);
4951 ixgbe_init_locked(adapter);
4952 IXGBE_CORE_UNLOCK(adapter);
4953 }
4954
4955 /*
4956 ** This routine is run via an vlan
4957 ** unconfig EVENT, remove our entry
4958 ** in the soft vfta.
4959 */
4960 static void
4961 ixgbe_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4962 {
4963 struct adapter *adapter = ifp->if_softc;
4964 u16 index, bit;
4965
4966 if (ifp->if_softc != arg)
4967 return;
4968
4969 if ((vtag == 0) || (vtag > 4095)) /* Invalid */
4970 return;
4971
4972 IXGBE_CORE_LOCK(adapter);
4973 index = (vtag >> 5) & 0x7F;
4974 bit = vtag & 0x1F;
4975 adapter->shadow_vfta[index] &= ~(1 << bit);
4976 /* Re-init to load the changes */
4977 ixgbe_init_locked(adapter);
4978 IXGBE_CORE_UNLOCK(adapter);
4979 }
4980 #endif
4981
4982 static void
4983 ixgbe_setup_vlan_hw_support(struct adapter *adapter)
4984 {
4985 struct ethercom *ec = &adapter->osdep.ec;
4986 struct ixgbe_hw *hw = &adapter->hw;
4987 struct rx_ring *rxr;
4988 u32 ctrl;
4989
4990 /*
4991 ** We get here thru init_locked, meaning
4992 ** a soft reset, this has already cleared
4993 ** the VFTA and other state, so if there
4994 ** have been no vlan's registered do nothing.
4995 */
4996 if (!VLAN_ATTACHED(&adapter->osdep.ec)) {
4997 return;
4998 }
4999
5000 /*
5001 ** A soft reset zero's out the VFTA, so
5002 ** we need to repopulate it now.
5003 */
5004 for (int i = 0; i < IXGBE_VFTA_SIZE; i++)
5005 if (adapter->shadow_vfta[i] != 0)
5006 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i),
5007 adapter->shadow_vfta[i]);
5008
5009 ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
5010 /* Enable the Filter Table if enabled */
5011 if (ec->ec_capenable & ETHERCAP_VLAN_HWFILTER) {
5012 ctrl &= ~IXGBE_VLNCTRL_CFIEN;
5013 ctrl |= IXGBE_VLNCTRL_VFE;
5014 }
5015 if (hw->mac.type == ixgbe_mac_82598EB)
5016 ctrl |= IXGBE_VLNCTRL_VME;
5017 IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
5018
5019 /* Setup the queues for vlans */
5020 for (int i = 0; i < adapter->num_queues; i++) {
5021 rxr = &adapter->rx_rings[i];
5022 /* On 82599 the VLAN enable is per/queue in RXDCTL */
5023 if (hw->mac.type != ixgbe_mac_82598EB) {
5024 ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
5025 ctrl |= IXGBE_RXDCTL_VME;
5026 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
5027 }
5028 rxr->vtag_strip = TRUE;
5029 }
5030 }
5031
5032 static void
5033 ixgbe_enable_intr(struct adapter *adapter)
5034 {
5035 struct ixgbe_hw *hw = &adapter->hw;
5036 struct ix_queue *que = adapter->queues;
5037 u32 mask, fwsm;
5038
5039 mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
5040 /* Enable Fan Failure detection */
5041 if (hw->device_id == IXGBE_DEV_ID_82598AT)
5042 mask |= IXGBE_EIMS_GPI_SDP1;
5043
5044 switch (adapter->hw.mac.type) {
5045 case ixgbe_mac_82599EB:
5046 mask |= IXGBE_EIMS_ECC;
5047 mask |= IXGBE_EIMS_GPI_SDP0;
5048 mask |= IXGBE_EIMS_GPI_SDP1;
5049 mask |= IXGBE_EIMS_GPI_SDP2;
5050 #ifdef IXGBE_FDIR
5051 mask |= IXGBE_EIMS_FLOW_DIR;
5052 #endif
5053 break;
5054 case ixgbe_mac_X540:
5055 mask |= IXGBE_EIMS_ECC;
5056 /* Detect if Thermal Sensor is enabled */
5057 fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
5058 if (fwsm & IXGBE_FWSM_TS_ENABLED)
5059 mask |= IXGBE_EIMS_TS;
5060 #ifdef IXGBE_FDIR
5061 mask |= IXGBE_EIMS_FLOW_DIR;
5062 #endif
5063 /* falls through */
5064 default:
5065 break;
5066 }
5067
5068 IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
5069
5070 /* With RSS we use auto clear */
5071 if (adapter->msix_mem) {
5072 mask = IXGBE_EIMS_ENABLE_MASK;
5073 /* Don't autoclear Link */
5074 mask &= ~IXGBE_EIMS_OTHER;
5075 mask &= ~IXGBE_EIMS_LSC;
5076 IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask);
5077 }
5078
5079 /*
5080 ** Now enable all queues, this is done separately to
5081 ** allow for handling the extended (beyond 32) MSIX
5082 ** vectors that can be used by 82599
5083 */
5084 for (int i = 0; i < adapter->num_queues; i++, que++)
5085 ixgbe_enable_queue(adapter, que->msix);
5086
5087 IXGBE_WRITE_FLUSH(hw);
5088
5089 return;
5090 }
5091
5092 static void
5093 ixgbe_disable_intr(struct adapter *adapter)
5094 {
5095 if (adapter->msix_mem)
5096 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, 0);
5097 if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
5098 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0);
5099 } else {
5100 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000);
5101 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0);
5102 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0);
5103 }
5104 IXGBE_WRITE_FLUSH(&adapter->hw);
5105 return;
5106 }
5107
5108 u16
5109 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
5110 {
5111 switch (reg % 4) {
5112 case 0:
5113 return pci_conf_read(hw->back->pc, hw->back->tag, reg) &
5114 __BITS(15, 0);
5115 case 2:
5116 return __SHIFTOUT(pci_conf_read(hw->back->pc, hw->back->tag,
5117 reg - 2), __BITS(31, 16));
5118 default:
5119 panic("%s: invalid register (%" PRIx32, __func__, reg);
5120 break;
5121 }
5122 }
5123
5124 void
5125 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
5126 {
5127 pcireg_t old;
5128
5129 switch (reg % 4) {
5130 case 0:
5131 old = pci_conf_read(hw->back->pc, hw->back->tag, reg) &
5132 __BITS(31, 16);
5133 pci_conf_write(hw->back->pc, hw->back->tag, reg, value | old);
5134 break;
5135 case 2:
5136 old = pci_conf_read(hw->back->pc, hw->back->tag, reg - 2) &
5137 __BITS(15, 0);
5138 pci_conf_write(hw->back->pc, hw->back->tag, reg - 2,
5139 __SHIFTIN(value, __BITS(31, 16)) | old);
5140 break;
5141 default:
5142 panic("%s: invalid register (%" PRIx32, __func__, reg);
5143 break;
5144 }
5145
5146 return;
5147 }
5148
5149 /*
5150 ** Setup the correct IVAR register for a particular MSIX interrupt
5151 ** (yes this is all very magic and confusing :)
5152 ** - entry is the register array entry
5153 ** - vector is the MSIX vector for this queue
5154 ** - type is RX/TX/MISC
5155 */
5156 static void
5157 ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type)
5158 {
5159 struct ixgbe_hw *hw = &adapter->hw;
5160 u32 ivar, index;
5161
5162 vector |= IXGBE_IVAR_ALLOC_VAL;
5163
5164 switch (hw->mac.type) {
5165
5166 case ixgbe_mac_82598EB:
5167 if (type == -1)
5168 entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
5169 else
5170 entry += (type * 64);
5171 index = (entry >> 2) & 0x1F;
5172 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
5173 ivar &= ~(0xFF << (8 * (entry & 0x3)));
5174 ivar |= (vector << (8 * (entry & 0x3)));
5175 IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR(index), ivar);
5176 break;
5177
5178 case ixgbe_mac_82599EB:
5179 case ixgbe_mac_X540:
5180 if (type == -1) { /* MISC IVAR */
5181 index = (entry & 1) * 8;
5182 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
5183 ivar &= ~(0xFF << index);
5184 ivar |= (vector << index);
5185 IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
5186 } else { /* RX/TX IVARS */
5187 index = (16 * (entry & 1)) + (8 * type);
5188 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
5189 ivar &= ~(0xFF << index);
5190 ivar |= (vector << index);
5191 IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
5192 }
5193
5194 default:
5195 break;
5196 }
5197 }
5198
5199 static void
5200 ixgbe_configure_ivars(struct adapter *adapter)
5201 {
5202 struct ix_queue *que = adapter->queues;
5203 u32 newitr;
5204
5205 if (ixgbe_max_interrupt_rate > 0)
5206 newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8;
5207 else
5208 newitr = 0;
5209
5210 for (int i = 0; i < adapter->num_queues; i++, que++) {
5211 /* First the RX queue entry */
5212 ixgbe_set_ivar(adapter, i, que->msix, 0);
5213 /* ... and the TX */
5214 ixgbe_set_ivar(adapter, i, que->msix, 1);
5215 /* Set an Initial EITR value */
5216 IXGBE_WRITE_REG(&adapter->hw,
5217 IXGBE_EITR(que->msix), newitr);
5218 }
5219
5220 /* For the Link interrupt */
5221 ixgbe_set_ivar(adapter, 1, adapter->linkvec, -1);
5222 }
5223
5224 /*
5225 ** ixgbe_sfp_probe - called in the local timer to
5226 ** determine if a port had optics inserted.
5227 */
5228 static bool ixgbe_sfp_probe(struct adapter *adapter)
5229 {
5230 struct ixgbe_hw *hw = &adapter->hw;
5231 device_t dev = adapter->dev;
5232 bool result = FALSE;
5233
5234 if ((hw->phy.type == ixgbe_phy_nl) &&
5235 (hw->phy.sfp_type == ixgbe_sfp_type_not_present)) {
5236 s32 ret = hw->phy.ops.identify_sfp(hw);
5237 if (ret)
5238 goto out;
5239 ret = hw->phy.ops.reset(hw);
5240 if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5241 device_printf(dev,"Unsupported SFP+ module detected!");
5242 device_printf(dev, "Reload driver with supported module.\n");
5243 adapter->sfp_probe = FALSE;
5244 goto out;
5245 } else
5246 device_printf(dev,"SFP+ module detected!\n");
5247 /* We now have supported optics */
5248 adapter->sfp_probe = FALSE;
5249 /* Set the optics type so system reports correctly */
5250 ixgbe_setup_optics(adapter);
5251 result = TRUE;
5252 }
5253 out:
5254 return (result);
5255 }
5256
5257 /*
5258 ** Tasklet handler for MSIX Link interrupts
5259 ** - do outside interrupt since it might sleep
5260 */
5261 static void
5262 ixgbe_handle_link(void *context)
5263 {
5264 struct adapter *adapter = context;
5265
5266 if (ixgbe_check_link(&adapter->hw,
5267 &adapter->link_speed, &adapter->link_up, 0) == 0)
5268 ixgbe_update_link_status(adapter);
5269 }
5270
5271 /*
5272 ** Tasklet for handling SFP module interrupts
5273 */
5274 static void
5275 ixgbe_handle_mod(void *context)
5276 {
5277 struct adapter *adapter = context;
5278 struct ixgbe_hw *hw = &adapter->hw;
5279 device_t dev = adapter->dev;
5280 u32 err;
5281
5282 err = hw->phy.ops.identify_sfp(hw);
5283 if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5284 device_printf(dev,
5285 "Unsupported SFP+ module type was detected.\n");
5286 return;
5287 }
5288 err = hw->mac.ops.setup_sfp(hw);
5289 if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5290 device_printf(dev,
5291 "Setup failure - unsupported SFP+ module type.\n");
5292 return;
5293 }
5294 softint_schedule(adapter->msf_si);
5295 return;
5296 }
5297
5298
5299 /*
5300 ** Tasklet for handling MSF (multispeed fiber) interrupts
5301 */
5302 static void
5303 ixgbe_handle_msf(void *context)
5304 {
5305 struct adapter *adapter = context;
5306 struct ixgbe_hw *hw = &adapter->hw;
5307 u32 autoneg;
5308 bool negotiate;
5309
5310 autoneg = hw->phy.autoneg_advertised;
5311 if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
5312 hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate);
5313 else
5314 negotiate = 0;
5315 if (hw->mac.ops.setup_link)
5316 hw->mac.ops.setup_link(hw, autoneg, TRUE);
5317 return;
5318 }
5319
5320 #ifdef IXGBE_FDIR
5321 /*
5322 ** Tasklet for reinitializing the Flow Director filter table
5323 */
5324 static void
5325 ixgbe_reinit_fdir(void *context)
5326 {
5327 struct adapter *adapter = context;
5328 struct ifnet *ifp = adapter->ifp;
5329
5330 if (adapter->fdir_reinit != 1) /* Shouldn't happen */
5331 return;
5332 ixgbe_reinit_fdir_tables_82599(&adapter->hw);
5333 adapter->fdir_reinit = 0;
5334 /* re-enable flow director interrupts */
5335 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_FLOW_DIR);
5336 /* Restart the interface */
5337 ifp->if_flags |= IFF_RUNNING;
5338 return;
5339 }
5340 #endif
5341
5342 /**********************************************************************
5343 *
5344 * Update the board statistics counters.
5345 *
5346 **********************************************************************/
5347 static void
5348 ixgbe_update_stats_counters(struct adapter *adapter)
5349 {
5350 struct ifnet *ifp = adapter->ifp;
5351 struct ixgbe_hw *hw = &adapter->hw;
5352 u32 missed_rx = 0, bprc, lxon, lxoff, total;
5353 u64 total_missed_rx = 0;
5354 uint64_t crcerrs, rlec;
5355
5356 crcerrs = IXGBE_READ_REG(hw, IXGBE_CRCERRS);
5357 adapter->stats.crcerrs.ev_count += crcerrs;
5358 adapter->stats.illerrc.ev_count += IXGBE_READ_REG(hw, IXGBE_ILLERRC);
5359 adapter->stats.errbc.ev_count += IXGBE_READ_REG(hw, IXGBE_ERRBC);
5360 adapter->stats.mspdc.ev_count += IXGBE_READ_REG(hw, IXGBE_MSPDC);
5361
5362 /*
5363 ** Note: these are for the 8 possible traffic classes,
5364 ** which in current implementation is unused,
5365 ** therefore only 0 should read real data.
5366 */
5367 for (int i = 0; i < __arraycount(adapter->stats.mpc); i++) {
5368 int j = i % adapter->num_queues;
5369 u32 mp;
5370 mp = IXGBE_READ_REG(hw, IXGBE_MPC(i));
5371 /* missed_rx tallies misses for the gprc workaround */
5372 missed_rx += mp;
5373 /* global total per queue */
5374 adapter->stats.mpc[j].ev_count += mp;
5375 /* Running comprehensive total for stats display */
5376 total_missed_rx += mp;
5377 if (hw->mac.type == ixgbe_mac_82598EB) {
5378 adapter->stats.rnbc[j] +=
5379 IXGBE_READ_REG(hw, IXGBE_RNBC(i));
5380 adapter->stats.qbtc[j].ev_count +=
5381 IXGBE_READ_REG(hw, IXGBE_QBTC(i));
5382 adapter->stats.qbrc[j].ev_count +=
5383 IXGBE_READ_REG(hw, IXGBE_QBRC(i));
5384 adapter->stats.pxonrxc[j].ev_count +=
5385 IXGBE_READ_REG(hw, IXGBE_PXONRXC(i));
5386 } else {
5387 adapter->stats.pxonrxc[j].ev_count +=
5388 IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i));
5389 }
5390 adapter->stats.pxontxc[j].ev_count +=
5391 IXGBE_READ_REG(hw, IXGBE_PXONTXC(i));
5392 adapter->stats.pxofftxc[j].ev_count +=
5393 IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i));
5394 adapter->stats.pxoffrxc[j].ev_count +=
5395 IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
5396 adapter->stats.pxon2offc[j].ev_count +=
5397 IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i));
5398 }
5399 for (int i = 0; i < __arraycount(adapter->stats.qprc); i++) {
5400 int j = i % adapter->num_queues;
5401 adapter->stats.qprc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
5402 adapter->stats.qptc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
5403 adapter->stats.qprdc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
5404 }
5405 adapter->stats.mlfc.ev_count += IXGBE_READ_REG(hw, IXGBE_MLFC);
5406 adapter->stats.mrfc.ev_count += IXGBE_READ_REG(hw, IXGBE_MRFC);
5407 rlec = IXGBE_READ_REG(hw, IXGBE_RLEC);
5408 adapter->stats.rlec.ev_count += rlec;
5409
5410 /* Hardware workaround, gprc counts missed packets */
5411 adapter->stats.gprc.ev_count += IXGBE_READ_REG(hw, IXGBE_GPRC) - missed_rx;
5412
5413 lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
5414 adapter->stats.lxontxc.ev_count += lxon;
5415 lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
5416 adapter->stats.lxofftxc.ev_count += lxoff;
5417 total = lxon + lxoff;
5418
5419 if (hw->mac.type != ixgbe_mac_82598EB) {
5420 adapter->stats.gorc.ev_count += IXGBE_READ_REG(hw, IXGBE_GORCL) +
5421 ((u64)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32);
5422 adapter->stats.gotc.ev_count += IXGBE_READ_REG(hw, IXGBE_GOTCL) +
5423 ((u64)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32) - total * ETHER_MIN_LEN;
5424 adapter->stats.tor.ev_count += IXGBE_READ_REG(hw, IXGBE_TORL) +
5425 ((u64)IXGBE_READ_REG(hw, IXGBE_TORH) << 32);
5426 adapter->stats.lxonrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
5427 adapter->stats.lxoffrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
5428 } else {
5429 adapter->stats.lxonrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
5430 adapter->stats.lxoffrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
5431 /* 82598 only has a counter in the high register */
5432 adapter->stats.gorc.ev_count += IXGBE_READ_REG(hw, IXGBE_GORCH);
5433 adapter->stats.gotc.ev_count += IXGBE_READ_REG(hw, IXGBE_GOTCH) - total * ETHER_MIN_LEN;
5434 adapter->stats.tor.ev_count += IXGBE_READ_REG(hw, IXGBE_TORH);
5435 }
5436
5437 /*
5438 * Workaround: mprc hardware is incorrectly counting
5439 * broadcasts, so for now we subtract those.
5440 */
5441 bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
5442 adapter->stats.bprc.ev_count += bprc;
5443 adapter->stats.mprc.ev_count += IXGBE_READ_REG(hw, IXGBE_MPRC) - ((hw->mac.type == ixgbe_mac_82598EB) ? bprc : 0);
5444
5445 adapter->stats.prc64.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC64);
5446 adapter->stats.prc127.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC127);
5447 adapter->stats.prc255.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC255);
5448 adapter->stats.prc511.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC511);
5449 adapter->stats.prc1023.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC1023);
5450 adapter->stats.prc1522.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC1522);
5451
5452 adapter->stats.gptc.ev_count += IXGBE_READ_REG(hw, IXGBE_GPTC) - total;
5453 adapter->stats.mptc.ev_count += IXGBE_READ_REG(hw, IXGBE_MPTC) - total;
5454 adapter->stats.ptc64.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC64) - total;
5455
5456 adapter->stats.ruc.ev_count += IXGBE_READ_REG(hw, IXGBE_RUC);
5457 adapter->stats.rfc.ev_count += IXGBE_READ_REG(hw, IXGBE_RFC);
5458 adapter->stats.roc.ev_count += IXGBE_READ_REG(hw, IXGBE_ROC);
5459 adapter->stats.rjc.ev_count += IXGBE_READ_REG(hw, IXGBE_RJC);
5460 adapter->stats.mngprc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPRC);
5461 adapter->stats.mngpdc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPDC);
5462 adapter->stats.mngptc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPTC);
5463 adapter->stats.tpr.ev_count += IXGBE_READ_REG(hw, IXGBE_TPR);
5464 adapter->stats.tpt.ev_count += IXGBE_READ_REG(hw, IXGBE_TPT);
5465 adapter->stats.ptc127.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC127);
5466 adapter->stats.ptc255.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC255);
5467 adapter->stats.ptc511.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC511);
5468 adapter->stats.ptc1023.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC1023);
5469 adapter->stats.ptc1522.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC1522);
5470 adapter->stats.bptc.ev_count += IXGBE_READ_REG(hw, IXGBE_BPTC);
5471 adapter->stats.xec.ev_count += IXGBE_READ_REG(hw, IXGBE_XEC);
5472 adapter->stats.fccrc.ev_count += IXGBE_READ_REG(hw, IXGBE_FCCRC);
5473 adapter->stats.fclast.ev_count += IXGBE_READ_REG(hw, IXGBE_FCLAST);
5474
5475 /* Only read FCOE on 82599 */
5476 if (hw->mac.type != ixgbe_mac_82598EB) {
5477 adapter->stats.fcoerpdc.ev_count +=
5478 IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
5479 adapter->stats.fcoeprc.ev_count +=
5480 IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
5481 adapter->stats.fcoeptc.ev_count +=
5482 IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
5483 adapter->stats.fcoedwrc.ev_count +=
5484 IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
5485 adapter->stats.fcoedwtc.ev_count +=
5486 IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
5487 }
5488
5489 /* Fill out the OS statistics structure */
5490 /*
5491 * NetBSD: Don't override if_{i|o}{packets|bytes|mcasts} with
5492 * adapter->stats counters. It's required to make ifconfig -z
5493 * (SOICZIFDATA) work.
5494 */
5495 ifp->if_collisions = 0;
5496
5497 /* Rx Errors */
5498 ifp->if_iqdrops += total_missed_rx;
5499 ifp->if_ierrors += crcerrs + rlec;
5500 }
5501
5502 /** ixgbe_sysctl_tdh_handler - Handler function
5503 * Retrieves the TDH value from the hardware
5504 */
5505 static int
5506 ixgbe_sysctl_tdh_handler(SYSCTLFN_ARGS)
5507 {
5508 struct sysctlnode node;
5509 uint32_t val;
5510 struct tx_ring *txr;
5511
5512 node = *rnode;
5513 txr = (struct tx_ring *)node.sysctl_data;
5514 if (txr == NULL)
5515 return 0;
5516 val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDH(txr->me));
5517 node.sysctl_data = &val;
5518 return sysctl_lookup(SYSCTLFN_CALL(&node));
5519 }
5520
5521 /** ixgbe_sysctl_tdt_handler - Handler function
5522 * Retrieves the TDT value from the hardware
5523 */
5524 static int
5525 ixgbe_sysctl_tdt_handler(SYSCTLFN_ARGS)
5526 {
5527 struct sysctlnode node;
5528 uint32_t val;
5529 struct tx_ring *txr;
5530
5531 node = *rnode;
5532 txr = (struct tx_ring *)node.sysctl_data;
5533 if (txr == NULL)
5534 return 0;
5535 val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDT(txr->me));
5536 node.sysctl_data = &val;
5537 return sysctl_lookup(SYSCTLFN_CALL(&node));
5538 }
5539
5540 /** ixgbe_sysctl_rdh_handler - Handler function
5541 * Retrieves the RDH value from the hardware
5542 */
5543 static int
5544 ixgbe_sysctl_rdh_handler(SYSCTLFN_ARGS)
5545 {
5546 struct sysctlnode node;
5547 uint32_t val;
5548 struct rx_ring *rxr;
5549
5550 node = *rnode;
5551 rxr = (struct rx_ring *)node.sysctl_data;
5552 if (rxr == NULL)
5553 return 0;
5554 val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDH(rxr->me));
5555 node.sysctl_data = &val;
5556 return sysctl_lookup(SYSCTLFN_CALL(&node));
5557 }
5558
5559 /** ixgbe_sysctl_rdt_handler - Handler function
5560 * Retrieves the RDT value from the hardware
5561 */
5562 static int
5563 ixgbe_sysctl_rdt_handler(SYSCTLFN_ARGS)
5564 {
5565 struct sysctlnode node;
5566 uint32_t val;
5567 struct rx_ring *rxr;
5568
5569 node = *rnode;
5570 rxr = (struct rx_ring *)node.sysctl_data;
5571 if (rxr == NULL)
5572 return 0;
5573 val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDT(rxr->me));
5574 node.sysctl_data = &val;
5575 return sysctl_lookup(SYSCTLFN_CALL(&node));
5576 }
5577
5578 static int
5579 ixgbe_sysctl_interrupt_rate_handler(SYSCTLFN_ARGS)
5580 {
5581 int error;
5582 struct sysctlnode node;
5583 struct ix_queue *que;
5584 uint32_t reg, usec, rate;
5585
5586 node = *rnode;
5587 que = (struct ix_queue *)node.sysctl_data;
5588 if (que == NULL)
5589 return 0;
5590 reg = IXGBE_READ_REG(&que->adapter->hw, IXGBE_EITR(que->msix));
5591 usec = ((reg & 0x0FF8) >> 3);
5592 if (usec > 0)
5593 rate = 500000 / usec;
5594 else
5595 rate = 0;
5596 node.sysctl_data = &rate;
5597 error = sysctl_lookup(SYSCTLFN_CALL(&node));
5598 if (error)
5599 return error;
5600 reg &= ~0xfff; /* default, no limitation */
5601 ixgbe_max_interrupt_rate = 0;
5602 if (rate > 0 && rate < 500000) {
5603 if (rate < 1000)
5604 rate = 1000;
5605 ixgbe_max_interrupt_rate = rate;
5606 reg |= ((4000000/rate) & 0xff8 );
5607 }
5608 IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg);
5609 return 0;
5610 }
5611
5612 const struct sysctlnode *
5613 ixgbe_sysctl_instance(struct adapter *adapter)
5614 {
5615 const char *dvname;
5616 struct sysctllog **log;
5617 int rc;
5618 const struct sysctlnode *rnode;
5619
5620 log = &adapter->sysctllog;
5621 dvname = device_xname(adapter->dev);
5622
5623 if ((rc = sysctl_createv(log, 0, NULL, &rnode,
5624 0, CTLTYPE_NODE, dvname,
5625 SYSCTL_DESCR("ixgbe information and settings"),
5626 NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL)) != 0)
5627 goto err;
5628
5629 return rnode;
5630 err:
5631 printf("%s: sysctl_createv failed, rc = %d\n", __func__, rc);
5632 return NULL;
5633 }
5634
5635 /*
5636 * Add sysctl variables, one per statistic, to the system.
5637 */
5638 static void
5639 ixgbe_add_hw_stats(struct adapter *adapter)
5640 {
5641 device_t dev = adapter->dev;
5642 const struct sysctlnode *rnode, *cnode;
5643 struct sysctllog **log = &adapter->sysctllog;
5644 struct tx_ring *txr = adapter->tx_rings;
5645 struct rx_ring *rxr = adapter->rx_rings;
5646 struct ixgbe_hw_stats *stats = &adapter->stats;
5647
5648 /* Driver Statistics */
5649 #if 0
5650 /* These counters are not updated by the software */
5651 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5652 CTLFLAG_RD, &adapter->dropped_pkts,
5653 "Driver dropped packets");
5654 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_header_failed",
5655 CTLFLAG_RD, &adapter->mbuf_header_failed,
5656 "???");
5657 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_packet_failed",
5658 CTLFLAG_RD, &adapter->mbuf_packet_failed,
5659 "???");
5660 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "no_tx_map_avail",
5661 CTLFLAG_RD, &adapter->no_tx_map_avail,
5662 "???");
5663 #endif
5664 evcnt_attach_dynamic(&adapter->handleq, EVCNT_TYPE_MISC,
5665 NULL, device_xname(dev), "Handled queue in softint");
5666 evcnt_attach_dynamic(&adapter->req, EVCNT_TYPE_MISC,
5667 NULL, device_xname(dev), "Requeued in softint");
5668 evcnt_attach_dynamic(&adapter->morerx, EVCNT_TYPE_MISC,
5669 NULL, device_xname(dev), "Interrupt handler more rx");
5670 evcnt_attach_dynamic(&adapter->moretx, EVCNT_TYPE_MISC,
5671 NULL, device_xname(dev), "Interrupt handler more tx");
5672 evcnt_attach_dynamic(&adapter->txloops, EVCNT_TYPE_MISC,
5673 NULL, device_xname(dev), "Interrupt handler tx loops");
5674 evcnt_attach_dynamic(&adapter->efbig_tx_dma_setup, EVCNT_TYPE_MISC,
5675 NULL, device_xname(dev), "Driver tx dma soft fail EFBIG");
5676 evcnt_attach_dynamic(&adapter->m_defrag_failed, EVCNT_TYPE_MISC,
5677 NULL, device_xname(dev), "m_defrag() failed");
5678 evcnt_attach_dynamic(&adapter->efbig2_tx_dma_setup, EVCNT_TYPE_MISC,
5679 NULL, device_xname(dev), "Driver tx dma hard fail EFBIG");
5680 evcnt_attach_dynamic(&adapter->einval_tx_dma_setup, EVCNT_TYPE_MISC,
5681 NULL, device_xname(dev), "Driver tx dma hard fail EINVAL");
5682 evcnt_attach_dynamic(&adapter->other_tx_dma_setup, EVCNT_TYPE_MISC,
5683 NULL, device_xname(dev), "Driver tx dma hard fail other");
5684 evcnt_attach_dynamic(&adapter->eagain_tx_dma_setup, EVCNT_TYPE_MISC,
5685 NULL, device_xname(dev), "Driver tx dma soft fail EAGAIN");
5686 evcnt_attach_dynamic(&adapter->enomem_tx_dma_setup, EVCNT_TYPE_MISC,
5687 NULL, device_xname(dev), "Driver tx dma soft fail ENOMEM");
5688 evcnt_attach_dynamic(&adapter->watchdog_events, EVCNT_TYPE_MISC,
5689 NULL, device_xname(dev), "Watchdog timeouts");
5690 evcnt_attach_dynamic(&adapter->tso_err, EVCNT_TYPE_MISC,
5691 NULL, device_xname(dev), "TSO errors");
5692 evcnt_attach_dynamic(&adapter->link_irq, EVCNT_TYPE_MISC,
5693 NULL, device_xname(dev), "Link MSIX IRQ Handled");
5694
5695 for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5696 snprintf(adapter->queues[i].evnamebuf,
5697 sizeof(adapter->queues[i].evnamebuf), "%s queue%d",
5698 device_xname(dev), i);
5699 snprintf(adapter->queues[i].namebuf,
5700 sizeof(adapter->queues[i].namebuf), "queue%d", i);
5701
5702 if ((rnode = ixgbe_sysctl_instance(adapter)) == NULL) {
5703 aprint_error_dev(dev, "could not create sysctl root\n");
5704 break;
5705 }
5706
5707 if (sysctl_createv(log, 0, &rnode, &rnode,
5708 0, CTLTYPE_NODE,
5709 adapter->queues[i].namebuf, SYSCTL_DESCR("Queue Name"),
5710 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL) != 0)
5711 break;
5712
5713 if (sysctl_createv(log, 0, &rnode, &cnode,
5714 CTLFLAG_READWRITE, CTLTYPE_INT,
5715 "interrupt_rate", SYSCTL_DESCR("Interrupt Rate"),
5716 ixgbe_sysctl_interrupt_rate_handler, 0,
5717 (void *)&adapter->queues[i], 0, CTL_CREATE, CTL_EOL) != 0)
5718 break;
5719
5720 if (sysctl_createv(log, 0, &rnode, &cnode,
5721 CTLFLAG_READONLY, CTLTYPE_QUAD,
5722 "irqs", SYSCTL_DESCR("irqs on this queue"),
5723 NULL, 0, &(adapter->queues[i].irqs),
5724 0, CTL_CREATE, CTL_EOL) != 0)
5725 break;
5726
5727 if (sysctl_createv(log, 0, &rnode, &cnode,
5728 CTLFLAG_READONLY, CTLTYPE_INT,
5729 "txd_head", SYSCTL_DESCR("Transmit Descriptor Head"),
5730 ixgbe_sysctl_tdh_handler, 0, (void *)txr,
5731 0, CTL_CREATE, CTL_EOL) != 0)
5732 break;
5733
5734 if (sysctl_createv(log, 0, &rnode, &cnode,
5735 CTLFLAG_READONLY, CTLTYPE_INT,
5736 "txd_tail", SYSCTL_DESCR("Transmit Descriptor Tail"),
5737 ixgbe_sysctl_tdt_handler, 0, (void *)txr,
5738 0, CTL_CREATE, CTL_EOL) != 0)
5739 break;
5740
5741 evcnt_attach_dynamic(&txr->tso_tx, EVCNT_TYPE_MISC,
5742 NULL, device_xname(dev), "TSO");
5743 evcnt_attach_dynamic(&txr->no_desc_avail, EVCNT_TYPE_MISC,
5744 NULL, adapter->queues[i].evnamebuf,
5745 "Queue No Descriptor Available");
5746 evcnt_attach_dynamic(&txr->total_packets, EVCNT_TYPE_MISC,
5747 NULL, adapter->queues[i].evnamebuf,
5748 "Queue Packets Transmitted");
5749
5750 #ifdef LRO
5751 struct lro_ctrl *lro = &rxr->lro;
5752 #endif /* LRO */
5753
5754 if (sysctl_createv(log, 0, &rnode, &cnode,
5755 CTLFLAG_READONLY,
5756 CTLTYPE_INT,
5757 "rxd_head", SYSCTL_DESCR("Receive Descriptor Head"),
5758 ixgbe_sysctl_rdh_handler, 0, (void *)rxr, 0,
5759 CTL_CREATE, CTL_EOL) != 0)
5760 break;
5761
5762 if (sysctl_createv(log, 0, &rnode, &cnode,
5763 CTLFLAG_READONLY,
5764 CTLTYPE_INT,
5765 "rxd_tail", SYSCTL_DESCR("Receive Descriptor Tail"),
5766 ixgbe_sysctl_rdt_handler, 0, (void *)rxr, 0,
5767 CTL_CREATE, CTL_EOL) != 0)
5768 break;
5769
5770 if (i < __arraycount(adapter->stats.mpc)) {
5771 evcnt_attach_dynamic(&adapter->stats.mpc[i],
5772 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
5773 "Missed Packet Count");
5774 }
5775 if (i < __arraycount(adapter->stats.pxontxc)) {
5776 evcnt_attach_dynamic(&adapter->stats.pxontxc[i],
5777 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
5778 "pxontxc");
5779 evcnt_attach_dynamic(&adapter->stats.pxonrxc[i],
5780 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
5781 "pxonrxc");
5782 evcnt_attach_dynamic(&adapter->stats.pxofftxc[i],
5783 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
5784 "pxofftxc");
5785 evcnt_attach_dynamic(&adapter->stats.pxoffrxc[i],
5786 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
5787 "pxoffrxc");
5788 evcnt_attach_dynamic(&adapter->stats.pxon2offc[i],
5789 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
5790 "pxon2offc");
5791 }
5792 if (i < __arraycount(adapter->stats.qprc)) {
5793 evcnt_attach_dynamic(&adapter->stats.qprc[i],
5794 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
5795 "qprc");
5796 evcnt_attach_dynamic(&adapter->stats.qptc[i],
5797 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
5798 "qptc");
5799 evcnt_attach_dynamic(&adapter->stats.qbrc[i],
5800 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
5801 "qbrc");
5802 evcnt_attach_dynamic(&adapter->stats.qbtc[i],
5803 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
5804 "qbtc");
5805 evcnt_attach_dynamic(&adapter->stats.qprdc[i],
5806 EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
5807 "qprdc");
5808 }
5809
5810 evcnt_attach_dynamic(&rxr->rx_packets, EVCNT_TYPE_MISC,
5811 NULL, adapter->queues[i].evnamebuf, "Queue Packets Received");
5812 evcnt_attach_dynamic(&rxr->rx_bytes, EVCNT_TYPE_MISC,
5813 NULL, adapter->queues[i].evnamebuf, "Queue Bytes Received");
5814 evcnt_attach_dynamic(&rxr->rx_copies, EVCNT_TYPE_MISC,
5815 NULL, adapter->queues[i].evnamebuf, "Copied RX Frames");
5816 evcnt_attach_dynamic(&rxr->no_jmbuf, EVCNT_TYPE_MISC,
5817 NULL, adapter->queues[i].evnamebuf, "Rx no jumbo mbuf");
5818 evcnt_attach_dynamic(&rxr->rx_discarded, EVCNT_TYPE_MISC,
5819 NULL, adapter->queues[i].evnamebuf, "Rx discarded");
5820 evcnt_attach_dynamic(&rxr->rx_irq, EVCNT_TYPE_MISC,
5821 NULL, adapter->queues[i].evnamebuf, "Rx interrupts");
5822 #ifdef LRO
5823 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_queued",
5824 CTLFLAG_RD, &lro->lro_queued, 0,
5825 "LRO Queued");
5826 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_flushed",
5827 CTLFLAG_RD, &lro->lro_flushed, 0,
5828 "LRO Flushed");
5829 #endif /* LRO */
5830 }
5831
5832 /* MAC stats get the own sub node */
5833
5834
5835 snprintf(stats->namebuf,
5836 sizeof(stats->namebuf), "%s MAC Statistics", device_xname(dev));
5837
5838 evcnt_attach_dynamic(&stats->ipcs, EVCNT_TYPE_MISC, NULL,
5839 stats->namebuf, "rx csum offload - IP");
5840 evcnt_attach_dynamic(&stats->l4cs, EVCNT_TYPE_MISC, NULL,
5841 stats->namebuf, "rx csum offload - L4");
5842 evcnt_attach_dynamic(&stats->ipcs_bad, EVCNT_TYPE_MISC, NULL,
5843 stats->namebuf, "rx csum offload - IP bad");
5844 evcnt_attach_dynamic(&stats->l4cs_bad, EVCNT_TYPE_MISC, NULL,
5845 stats->namebuf, "rx csum offload - L4 bad");
5846 evcnt_attach_dynamic(&stats->intzero, EVCNT_TYPE_MISC, NULL,
5847 stats->namebuf, "Interrupt conditions zero");
5848 evcnt_attach_dynamic(&stats->legint, EVCNT_TYPE_MISC, NULL,
5849 stats->namebuf, "Legacy interrupts");
5850 evcnt_attach_dynamic(&stats->crcerrs, EVCNT_TYPE_MISC, NULL,
5851 stats->namebuf, "CRC Errors");
5852 evcnt_attach_dynamic(&stats->illerrc, EVCNT_TYPE_MISC, NULL,
5853 stats->namebuf, "Illegal Byte Errors");
5854 evcnt_attach_dynamic(&stats->errbc, EVCNT_TYPE_MISC, NULL,
5855 stats->namebuf, "Byte Errors");
5856 evcnt_attach_dynamic(&stats->mspdc, EVCNT_TYPE_MISC, NULL,
5857 stats->namebuf, "MAC Short Packets Discarded");
5858 evcnt_attach_dynamic(&stats->mlfc, EVCNT_TYPE_MISC, NULL,
5859 stats->namebuf, "MAC Local Faults");
5860 evcnt_attach_dynamic(&stats->mrfc, EVCNT_TYPE_MISC, NULL,
5861 stats->namebuf, "MAC Remote Faults");
5862 evcnt_attach_dynamic(&stats->rlec, EVCNT_TYPE_MISC, NULL,
5863 stats->namebuf, "Receive Length Errors");
5864 evcnt_attach_dynamic(&stats->lxontxc, EVCNT_TYPE_MISC, NULL,
5865 stats->namebuf, "Link XON Transmitted");
5866 evcnt_attach_dynamic(&stats->lxonrxc, EVCNT_TYPE_MISC, NULL,
5867 stats->namebuf, "Link XON Received");
5868 evcnt_attach_dynamic(&stats->lxofftxc, EVCNT_TYPE_MISC, NULL,
5869 stats->namebuf, "Link XOFF Transmitted");
5870 evcnt_attach_dynamic(&stats->lxoffrxc, EVCNT_TYPE_MISC, NULL,
5871 stats->namebuf, "Link XOFF Received");
5872
5873 /* Packet Reception Stats */
5874 evcnt_attach_dynamic(&stats->tor, EVCNT_TYPE_MISC, NULL,
5875 stats->namebuf, "Total Octets Received");
5876 evcnt_attach_dynamic(&stats->gorc, EVCNT_TYPE_MISC, NULL,
5877 stats->namebuf, "Good Octets Received");
5878 evcnt_attach_dynamic(&stats->tpr, EVCNT_TYPE_MISC, NULL,
5879 stats->namebuf, "Total Packets Received");
5880 evcnt_attach_dynamic(&stats->gprc, EVCNT_TYPE_MISC, NULL,
5881 stats->namebuf, "Good Packets Received");
5882 evcnt_attach_dynamic(&stats->mprc, EVCNT_TYPE_MISC, NULL,
5883 stats->namebuf, "Multicast Packets Received");
5884 evcnt_attach_dynamic(&stats->bprc, EVCNT_TYPE_MISC, NULL,
5885 stats->namebuf, "Broadcast Packets Received");
5886 evcnt_attach_dynamic(&stats->prc64, EVCNT_TYPE_MISC, NULL,
5887 stats->namebuf, "64 byte frames received ");
5888 evcnt_attach_dynamic(&stats->prc127, EVCNT_TYPE_MISC, NULL,
5889 stats->namebuf, "65-127 byte frames received");
5890 evcnt_attach_dynamic(&stats->prc255, EVCNT_TYPE_MISC, NULL,
5891 stats->namebuf, "128-255 byte frames received");
5892 evcnt_attach_dynamic(&stats->prc511, EVCNT_TYPE_MISC, NULL,
5893 stats->namebuf, "256-511 byte frames received");
5894 evcnt_attach_dynamic(&stats->prc1023, EVCNT_TYPE_MISC, NULL,
5895 stats->namebuf, "512-1023 byte frames received");
5896 evcnt_attach_dynamic(&stats->prc1522, EVCNT_TYPE_MISC, NULL,
5897 stats->namebuf, "1023-1522 byte frames received");
5898 evcnt_attach_dynamic(&stats->ruc, EVCNT_TYPE_MISC, NULL,
5899 stats->namebuf, "Receive Undersized");
5900 evcnt_attach_dynamic(&stats->rfc, EVCNT_TYPE_MISC, NULL,
5901 stats->namebuf, "Fragmented Packets Received ");
5902 evcnt_attach_dynamic(&stats->roc, EVCNT_TYPE_MISC, NULL,
5903 stats->namebuf, "Oversized Packets Received");
5904 evcnt_attach_dynamic(&stats->rjc, EVCNT_TYPE_MISC, NULL,
5905 stats->namebuf, "Received Jabber");
5906 evcnt_attach_dynamic(&stats->mngprc, EVCNT_TYPE_MISC, NULL,
5907 stats->namebuf, "Management Packets Received");
5908 evcnt_attach_dynamic(&stats->xec, EVCNT_TYPE_MISC, NULL,
5909 stats->namebuf, "Checksum Errors");
5910
5911 /* Packet Transmission Stats */
5912 evcnt_attach_dynamic(&stats->gotc, EVCNT_TYPE_MISC, NULL,
5913 stats->namebuf, "Good Octets Transmitted");
5914 evcnt_attach_dynamic(&stats->tpt, EVCNT_TYPE_MISC, NULL,
5915 stats->namebuf, "Total Packets Transmitted");
5916 evcnt_attach_dynamic(&stats->gptc, EVCNT_TYPE_MISC, NULL,
5917 stats->namebuf, "Good Packets Transmitted");
5918 evcnt_attach_dynamic(&stats->bptc, EVCNT_TYPE_MISC, NULL,
5919 stats->namebuf, "Broadcast Packets Transmitted");
5920 evcnt_attach_dynamic(&stats->mptc, EVCNT_TYPE_MISC, NULL,
5921 stats->namebuf, "Multicast Packets Transmitted");
5922 evcnt_attach_dynamic(&stats->mngptc, EVCNT_TYPE_MISC, NULL,
5923 stats->namebuf, "Management Packets Transmitted");
5924 evcnt_attach_dynamic(&stats->ptc64, EVCNT_TYPE_MISC, NULL,
5925 stats->namebuf, "64 byte frames transmitted ");
5926 evcnt_attach_dynamic(&stats->ptc127, EVCNT_TYPE_MISC, NULL,
5927 stats->namebuf, "65-127 byte frames transmitted");
5928 evcnt_attach_dynamic(&stats->ptc255, EVCNT_TYPE_MISC, NULL,
5929 stats->namebuf, "128-255 byte frames transmitted");
5930 evcnt_attach_dynamic(&stats->ptc511, EVCNT_TYPE_MISC, NULL,
5931 stats->namebuf, "256-511 byte frames transmitted");
5932 evcnt_attach_dynamic(&stats->ptc1023, EVCNT_TYPE_MISC, NULL,
5933 stats->namebuf, "512-1023 byte frames transmitted");
5934 evcnt_attach_dynamic(&stats->ptc1522, EVCNT_TYPE_MISC, NULL,
5935 stats->namebuf, "1024-1522 byte frames transmitted");
5936 }
5937
5938 /*
5939 ** Set flow control using sysctl:
5940 ** Flow control values:
5941 ** 0 - off
5942 ** 1 - rx pause
5943 ** 2 - tx pause
5944 ** 3 - full
5945 */
5946 static int
5947 ixgbe_set_flowcntl(SYSCTLFN_ARGS)
5948 {
5949 struct sysctlnode node;
5950 int error, last;
5951 struct adapter *adapter;
5952
5953 node = *rnode;
5954 adapter = (struct adapter *)node.sysctl_data;
5955 node.sysctl_data = &adapter->fc;
5956 last = adapter->fc;
5957 error = sysctl_lookup(SYSCTLFN_CALL(&node));
5958 if (error != 0 || newp == NULL)
5959 return error;
5960
5961 /* Don't bother if it's not changed */
5962 if (adapter->fc == last)
5963 return (0);
5964
5965 switch (adapter->fc) {
5966 case ixgbe_fc_rx_pause:
5967 case ixgbe_fc_tx_pause:
5968 case ixgbe_fc_full:
5969 adapter->hw.fc.requested_mode = adapter->fc;
5970 if (adapter->num_queues > 1)
5971 ixgbe_disable_rx_drop(adapter);
5972 break;
5973 case ixgbe_fc_none:
5974 adapter->hw.fc.requested_mode = ixgbe_fc_none;
5975 if (adapter->num_queues > 1)
5976 ixgbe_enable_rx_drop(adapter);
5977 break;
5978 default:
5979 adapter->fc = last;
5980 return (EINVAL);
5981 }
5982 /* Don't autoneg if forcing a value */
5983 adapter->hw.fc.disable_fc_autoneg = TRUE;
5984 ixgbe_fc_enable(&adapter->hw);
5985 return 0;
5986 }
5987
5988 /*
5989 ** Control link advertise speed:
5990 ** 1 - advertise only 1G
5991 ** 2 - advertise 100Mb
5992 ** 3 - advertise normal
5993 */
5994 static int
5995 ixgbe_set_advertise(SYSCTLFN_ARGS)
5996 {
5997 struct sysctlnode node;
5998 int t, error = 0;
5999 struct adapter *adapter;
6000 device_t dev;
6001 struct ixgbe_hw *hw;
6002 ixgbe_link_speed speed, last;
6003
6004 node = *rnode;
6005 adapter = (struct adapter *)node.sysctl_data;
6006 dev = adapter->dev;
6007 hw = &adapter->hw;
6008 last = adapter->advertise;
6009 t = adapter->advertise;
6010 node.sysctl_data = &t;
6011 error = sysctl_lookup(SYSCTLFN_CALL(&node));
6012 if (error != 0 || newp == NULL)
6013 return error;
6014
6015 if (adapter->advertise == last) /* no change */
6016 return (0);
6017
6018 if (t == -1)
6019 return 0;
6020
6021 adapter->advertise = t;
6022
6023 if (!((hw->phy.media_type == ixgbe_media_type_copper) ||
6024 (hw->phy.multispeed_fiber)))
6025 return (EINVAL);
6026
6027 if ((adapter->advertise == 2) && (hw->mac.type != ixgbe_mac_X540)) {
6028 device_printf(dev, "Set Advertise: 100Mb on X540 only\n");
6029 return (EINVAL);
6030 }
6031
6032 if (adapter->advertise == 1)
6033 speed = IXGBE_LINK_SPEED_1GB_FULL;
6034 else if (adapter->advertise == 2)
6035 speed = IXGBE_LINK_SPEED_100_FULL;
6036 else if (adapter->advertise == 3)
6037 speed = IXGBE_LINK_SPEED_1GB_FULL |
6038 IXGBE_LINK_SPEED_10GB_FULL;
6039 else {/* bogus value */
6040 adapter->advertise = last;
6041 return (EINVAL);
6042 }
6043
6044 hw->mac.autotry_restart = TRUE;
6045 hw->mac.ops.setup_link(hw, speed, TRUE);
6046
6047 return 0;
6048 }
6049
6050 /*
6051 ** Thermal Shutdown Trigger
6052 ** - cause a Thermal Overtemp IRQ
6053 */
6054 static int
6055 ixgbe_set_thermal_test(SYSCTLFN_ARGS)
6056 {
6057 struct sysctlnode node;
6058 int error, fire = 0;
6059 struct adapter *adapter;
6060 struct ixgbe_hw *hw;
6061
6062 node = *rnode;
6063 adapter = (struct adapter *)node.sysctl_data;
6064 hw = &adapter->hw;
6065
6066 if (hw->mac.type != ixgbe_mac_X540)
6067 return (0);
6068
6069 node.sysctl_data = &fire;
6070 error = sysctl_lookup(SYSCTLFN_CALL(&node));
6071 if ((error) || (newp == NULL))
6072 return (error);
6073
6074 if (fire) {
6075 u32 reg = IXGBE_READ_REG(hw, IXGBE_EICS);
6076 reg |= IXGBE_EICR_TS;
6077 IXGBE_WRITE_REG(hw, IXGBE_EICS, reg);
6078 }
6079
6080 return (0);
6081 }
6082
6083 /*
6084 ** Enable the hardware to drop packets when the buffer is
6085 ** full. This is useful when multiqueue,so that no single
6086 ** queue being full stalls the entire RX engine. We only
6087 ** enable this when Multiqueue AND when Flow Control is
6088 ** disabled.
6089 */
6090 static void
6091 ixgbe_enable_rx_drop(struct adapter *adapter)
6092 {
6093 struct ixgbe_hw *hw = &adapter->hw;
6094
6095 for (int i = 0; i < adapter->num_queues; i++) {
6096 u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
6097 srrctl |= IXGBE_SRRCTL_DROP_EN;
6098 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
6099 }
6100 }
6101
6102 static void
6103 ixgbe_disable_rx_drop(struct adapter *adapter)
6104 {
6105 struct ixgbe_hw *hw = &adapter->hw;
6106
6107 for (int i = 0; i < adapter->num_queues; i++) {
6108 u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
6109 srrctl &= ~IXGBE_SRRCTL_DROP_EN;
6110 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
6111 }
6112 }
6113