Home | History | Annotate | Line # | Download | only in ixgbe
ixgbe.c revision 1.36
      1 /******************************************************************************
      2 
      3   Copyright (c) 2001-2013, Intel Corporation
      4   All rights reserved.
      5 
      6   Redistribution and use in source and binary forms, with or without
      7   modification, are permitted provided that the following conditions are met:
      8 
      9    1. Redistributions of source code must retain the above copyright notice,
     10       this list of conditions and the following disclaimer.
     11 
     12    2. Redistributions in binary form must reproduce the above copyright
     13       notice, this list of conditions and the following disclaimer in the
     14       documentation and/or other materials provided with the distribution.
     15 
     16    3. Neither the name of the Intel Corporation nor the names of its
     17       contributors may be used to endorse or promote products derived from
     18       this software without specific prior written permission.
     19 
     20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     30   POSSIBILITY OF SUCH DAMAGE.
     31 
     32 ******************************************************************************/
     33 /*
     34  * Copyright (c) 2011 The NetBSD Foundation, Inc.
     35  * All rights reserved.
     36  *
     37  * This code is derived from software contributed to The NetBSD Foundation
     38  * by Coyote Point Systems, Inc.
     39  *
     40  * Redistribution and use in source and binary forms, with or without
     41  * modification, are permitted provided that the following conditions
     42  * are met:
     43  * 1. Redistributions of source code must retain the above copyright
     44  *    notice, this list of conditions and the following disclaimer.
     45  * 2. Redistributions in binary form must reproduce the above copyright
     46  *    notice, this list of conditions and the following disclaimer in the
     47  *    documentation and/or other materials provided with the distribution.
     48  *
     49  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     50  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     51  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     52  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     53  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     54  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     55  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     56  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     57  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     58  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     59  * POSSIBILITY OF SUCH DAMAGE.
     60  */
     61 /*$FreeBSD: head/sys/dev/ixgbe/ixgbe.c 279805 2015-03-09 10:29:15Z araujo $*/
     62 /*$NetBSD: ixgbe.c,v 1.36 2015/08/17 06:16:03 knakahara Exp $*/
     63 
     64 #include "opt_inet.h"
     65 #include "opt_inet6.h"
     66 
     67 #include "ixgbe.h"
     68 #include "vlan.h"
     69 
     70 #include <sys/cprng.h>
     71 
     72 /*********************************************************************
     73  *  Set this to one to display debug statistics
     74  *********************************************************************/
     75 int             ixgbe_display_debug_stats = 0;
     76 
     77 /*********************************************************************
     78  *  Driver version
     79  *********************************************************************/
     80 char ixgbe_driver_version[] = "2.5.15";
     81 
     82 /*********************************************************************
     83  *  PCI Device ID Table
     84  *
     85  *  Used by probe to select devices to load on
     86  *  Last field stores an index into ixgbe_strings
     87  *  Last entry must be all 0s
     88  *
     89  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
     90  *********************************************************************/
     91 
     92 static ixgbe_vendor_info_t ixgbe_vendor_info_array[] =
     93 {
     94 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, 0, 0, 0},
     95 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, 0, 0, 0},
     96 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, 0, 0, 0},
     97 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, 0, 0, 0},
     98 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2, 0, 0, 0},
     99 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, 0, 0, 0},
    100 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, 0, 0, 0},
    101 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, 0, 0, 0},
    102 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, 0, 0, 0},
    103 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, 0, 0, 0},
    104 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, 0, 0, 0},
    105 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, 0, 0, 0},
    106 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ, 0, 0, 0},
    107 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, 0, 0, 0},
    108 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, 0, 0, 0},
    109 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, 0, 0, 0},
    110 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, 0, 0, 0},
    111 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, 0, 0, 0},
    112 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE, 0, 0, 0},
    113 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2, 0, 0, 0},
    114 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, 0, 0, 0},
    115 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, 0, 0, 0},
    116 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF_QP, 0, 0, 0},
    117 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, 0, 0, 0},
    118 	/* required last entry */
    119 	{0, 0, 0, 0, 0}
    120 };
    121 
    122 /*********************************************************************
    123  *  Table of branding strings
    124  *********************************************************************/
    125 
    126 static const char    *ixgbe_strings[] = {
    127 	"Intel(R) PRO/10GbE PCI-Express Network Driver"
    128 };
    129 
    130 /*********************************************************************
    131  *  Function prototypes
    132  *********************************************************************/
    133 static int      ixgbe_probe(device_t, cfdata_t, void *);
    134 static void     ixgbe_attach(device_t, device_t, void *);
    135 static int      ixgbe_detach(device_t, int);
    136 #if 0
    137 static int      ixgbe_shutdown(device_t);
    138 #endif
    139 #ifdef IXGBE_LEGACY_TX
    140 static void     ixgbe_start(struct ifnet *);
    141 static void     ixgbe_start_locked(struct tx_ring *, struct ifnet *);
    142 #else /* ! IXGBE_LEGACY_TX */
    143 static int	ixgbe_mq_start(struct ifnet *, struct mbuf *);
    144 static int	ixgbe_mq_start_locked(struct ifnet *, struct tx_ring *);
    145 static void	ixgbe_qflush(struct ifnet *);
    146 static void	ixgbe_deferred_mq_start(void *, int);
    147 #endif /* IXGBE_LEGACY_TX */
    148 static int      ixgbe_ioctl(struct ifnet *, u_long, void *);
    149 static void	ixgbe_ifstop(struct ifnet *, int);
    150 static int	ixgbe_init(struct ifnet *);
    151 static void	ixgbe_init_locked(struct adapter *);
    152 static void     ixgbe_stop(void *);
    153 static void     ixgbe_media_status(struct ifnet *, struct ifmediareq *);
    154 static int      ixgbe_media_change(struct ifnet *);
    155 static void     ixgbe_identify_hardware(struct adapter *);
    156 static int      ixgbe_allocate_pci_resources(struct adapter *,
    157 		    const struct pci_attach_args *);
    158 static void	ixgbe_get_slot_info(struct ixgbe_hw *);
    159 static int      ixgbe_allocate_msix(struct adapter *,
    160 		    const struct pci_attach_args *);
    161 static int      ixgbe_allocate_legacy(struct adapter *,
    162 		    const struct pci_attach_args *);
    163 static int	ixgbe_allocate_queues(struct adapter *);
    164 static int	ixgbe_setup_msix(struct adapter *);
    165 static void	ixgbe_free_pci_resources(struct adapter *);
    166 static void	ixgbe_local_timer(void *);
    167 static int	ixgbe_setup_interface(device_t, struct adapter *);
    168 static void	ixgbe_config_link(struct adapter *);
    169 
    170 static int      ixgbe_allocate_transmit_buffers(struct tx_ring *);
    171 static int	ixgbe_setup_transmit_structures(struct adapter *);
    172 static void	ixgbe_setup_transmit_ring(struct tx_ring *);
    173 static void     ixgbe_initialize_transmit_units(struct adapter *);
    174 static void     ixgbe_free_transmit_structures(struct adapter *);
    175 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
    176 
    177 static int      ixgbe_allocate_receive_buffers(struct rx_ring *);
    178 static int      ixgbe_setup_receive_structures(struct adapter *);
    179 static int	ixgbe_setup_receive_ring(struct rx_ring *);
    180 static void     ixgbe_initialize_receive_units(struct adapter *);
    181 static void     ixgbe_free_receive_structures(struct adapter *);
    182 static void     ixgbe_free_receive_buffers(struct rx_ring *);
    183 static void	ixgbe_setup_hw_rsc(struct rx_ring *);
    184 
    185 static void     ixgbe_enable_intr(struct adapter *);
    186 static void     ixgbe_disable_intr(struct adapter *);
    187 static void     ixgbe_update_stats_counters(struct adapter *);
    188 static void	ixgbe_txeof(struct tx_ring *);
    189 static bool	ixgbe_rxeof(struct ix_queue *);
    190 static void	ixgbe_rx_checksum(u32, struct mbuf *, u32,
    191 		    struct ixgbe_hw_stats *);
    192 static void     ixgbe_set_promisc(struct adapter *);
    193 static void     ixgbe_set_multi(struct adapter *);
    194 static void     ixgbe_update_link_status(struct adapter *);
    195 static void	ixgbe_refresh_mbufs(struct rx_ring *, int);
    196 static int      ixgbe_xmit(struct tx_ring *, struct mbuf *);
    197 static int	ixgbe_set_flowcntl(SYSCTLFN_PROTO);
    198 static int	ixgbe_set_advertise(SYSCTLFN_PROTO);
    199 static int	ixgbe_set_thermal_test(SYSCTLFN_PROTO);
    200 static int	ixgbe_dma_malloc(struct adapter *, bus_size_t,
    201 		    struct ixgbe_dma_alloc *, int);
    202 static void     ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
    203 static int	ixgbe_tx_ctx_setup(struct tx_ring *,
    204 		    struct mbuf *, u32 *, u32 *);
    205 static int	ixgbe_tso_setup(struct tx_ring *,
    206 		    struct mbuf *, u32 *, u32 *);
    207 static void	ixgbe_set_ivar(struct adapter *, u8, u8, s8);
    208 static void	ixgbe_configure_ivars(struct adapter *);
    209 static u8 *	ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *);
    210 
    211 static void	ixgbe_setup_vlan_hw_support(struct adapter *);
    212 #if 0
    213 static void	ixgbe_register_vlan(void *, struct ifnet *, u16);
    214 static void	ixgbe_unregister_vlan(void *, struct ifnet *, u16);
    215 #endif
    216 
    217 static void     ixgbe_add_hw_stats(struct adapter *adapter);
    218 
    219 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
    220 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
    221 		    struct mbuf *, u32);
    222 
    223 static void	ixgbe_enable_rx_drop(struct adapter *);
    224 static void	ixgbe_disable_rx_drop(struct adapter *);
    225 
    226 /* Support for pluggable optic modules */
    227 static bool	ixgbe_sfp_probe(struct adapter *);
    228 static void	ixgbe_setup_optics(struct adapter *);
    229 
    230 /* Legacy (single vector interrupt handler */
    231 static int	ixgbe_legacy_irq(void *);
    232 
    233 #if defined(NETBSD_MSI_OR_MSIX)
    234 /* The MSI/X Interrupt handlers */
    235 static int	ixgbe_msix_que(void *);
    236 static int	ixgbe_msix_link(void *);
    237 #endif
    238 
    239 /* Software interrupts for deferred work */
    240 static void	ixgbe_handle_que(void *);
    241 static void	ixgbe_handle_link(void *);
    242 static void	ixgbe_handle_msf(void *);
    243 static void	ixgbe_handle_mod(void *);
    244 
    245 const struct sysctlnode *ixgbe_sysctl_instance(struct adapter *);
    246 static ixgbe_vendor_info_t *ixgbe_lookup(const struct pci_attach_args *);
    247 
    248 #ifdef IXGBE_FDIR
    249 static void	ixgbe_atr(struct tx_ring *, struct mbuf *);
    250 static void	ixgbe_reinit_fdir(void *, int);
    251 #endif
    252 
    253 /* Missing shared code prototype */
    254 extern void ixgbe_stop_mac_link_on_d3_82599(struct ixgbe_hw *hw);
    255 
    256 /*********************************************************************
    257  *  FreeBSD Device Interface Entry Points
    258  *********************************************************************/
    259 
    260 CFATTACH_DECL3_NEW(ixg, sizeof(struct adapter),
    261     ixgbe_probe, ixgbe_attach, ixgbe_detach, NULL, NULL, NULL,
    262     DVF_DETACH_SHUTDOWN);
    263 
    264 #if 0
    265 devclass_t ixgbe_devclass;
    266 DRIVER_MODULE(ixgbe, pci, ixgbe_driver, ixgbe_devclass, 0, 0);
    267 
    268 MODULE_DEPEND(ixgbe, pci, 1, 1, 1);
    269 MODULE_DEPEND(ixgbe, ether, 1, 1, 1);
    270 #endif
    271 
    272 /*
    273 ** TUNEABLE PARAMETERS:
    274 */
    275 
    276 /*
    277 ** AIM: Adaptive Interrupt Moderation
    278 ** which means that the interrupt rate
    279 ** is varied over time based on the
    280 ** traffic for that interrupt vector
    281 */
    282 static int ixgbe_enable_aim = TRUE;
    283 #define SYSCTL_INT(__x, __y)
    284 SYSCTL_INT("hw.ixgbe.enable_aim", &ixgbe_enable_aim);
    285 
    286 static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY);
    287 SYSCTL_INT("hw.ixgbe.max_interrupt_rate", &ixgbe_max_interrupt_rate);
    288 
    289 /* How many packets rxeof tries to clean at a time */
    290 static int ixgbe_rx_process_limit = 256;
    291 SYSCTL_INT("hw.ixgbe.rx_process_limit", &ixgbe_rx_process_limit);
    292 
    293 /* How many packets txeof tries to clean at a time */
    294 static int ixgbe_tx_process_limit = 256;
    295 SYSCTL_INT("hw.ixgbe.tx_process_limit", &ixgbe_tx_process_limit);
    296 
    297 /*
    298 ** Smart speed setting, default to on
    299 ** this only works as a compile option
    300 ** right now as its during attach, set
    301 ** this to 'ixgbe_smart_speed_off' to
    302 ** disable.
    303 */
    304 static int ixgbe_smart_speed = ixgbe_smart_speed_on;
    305 
    306 /*
    307  * MSIX should be the default for best performance,
    308  * but this allows it to be forced off for testing.
    309  */
    310 static int ixgbe_enable_msix = 1;
    311 SYSCTL_INT("hw.ixgbe.enable_msix", &ixgbe_enable_msix);
    312 
    313 #if defined(NETBSD_MSI_OR_MSIX)
    314 /*
    315  * Number of Queues, can be set to 0,
    316  * it then autoconfigures based on the
    317  * number of cpus with a max of 8. This
    318  * can be overriden manually here.
    319  */
    320 static int ixgbe_num_queues = 1;
    321 SYSCTL_INT("hw.ixgbe.num_queues", &ixgbe_num_queues);
    322 #endif
    323 
    324 /*
    325 ** Number of TX descriptors per ring,
    326 ** setting higher than RX as this seems
    327 ** the better performing choice.
    328 */
    329 static int ixgbe_txd = PERFORM_TXD;
    330 SYSCTL_INT("hw.ixgbe.txd", &ixgbe_txd);
    331 
    332 /* Number of RX descriptors per ring */
    333 static int ixgbe_rxd = PERFORM_RXD;
    334 SYSCTL_INT("hw.ixgbe.rxd", &ixgbe_rxd);
    335 
    336 /*
    337 ** Defining this on will allow the use
    338 ** of unsupported SFP+ modules, note that
    339 ** doing so you are on your own :)
    340 */
    341 static int allow_unsupported_sfp = false;
    342 SYSCTL_INT("hw.ix.unsupported_sfp", &allow_unsupported_sfp);
    343 
    344 /*
    345 ** HW RSC control:
    346 **  this feature only works with
    347 **  IPv4, and only on 82599 and later.
    348 **  Also this will cause IP forwarding to
    349 **  fail and that can't be controlled by
    350 **  the stack as LRO can. For all these
    351 **  reasons I've deemed it best to leave
    352 **  this off and not bother with a tuneable
    353 **  interface, this would need to be compiled
    354 **  to enable.
    355 */
    356 static bool ixgbe_rsc_enable = FALSE;
    357 
    358 /* Keep running tab on them for sanity check */
    359 static int ixgbe_total_ports;
    360 
    361 #ifdef IXGBE_FDIR
    362 /*
    363 ** For Flow Director: this is the
    364 ** number of TX packets we sample
    365 ** for the filter pool, this means
    366 ** every 20th packet will be probed.
    367 **
    368 ** This feature can be disabled by
    369 ** setting this to 0.
    370 */
    371 static int atr_sample_rate = 20;
    372 /*
    373 ** Flow Director actually 'steals'
    374 ** part of the packet buffer as its
    375 ** filter pool, this variable controls
    376 ** how much it uses:
    377 **  0 = 64K, 1 = 128K, 2 = 256K
    378 */
    379 static int fdir_pballoc = 1;
    380 #endif
    381 
    382 #ifdef DEV_NETMAP
    383 /*
    384  * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
    385  * be a reference on how to implement netmap support in a driver.
    386  * Additional comments are in ixgbe_netmap.h .
    387  *
    388  * <dev/netmap/ixgbe_netmap.h> contains functions for netmap support
    389  * that extend the standard driver.
    390  */
    391 #include <dev/netmap/ixgbe_netmap.h>
    392 #endif /* DEV_NETMAP */
    393 
    394 /*********************************************************************
    395  *  Device identification routine
    396  *
    397  *  ixgbe_probe determines if the driver should be loaded on
    398  *  adapter based on PCI vendor/device id of the adapter.
    399  *
    400  *  return 1 on success, 0 on failure
    401  *********************************************************************/
    402 
    403 static int
    404 ixgbe_probe(device_t dev, cfdata_t cf, void *aux)
    405 {
    406 	const struct pci_attach_args *pa = aux;
    407 
    408 	return (ixgbe_lookup(pa) != NULL) ? 1 : 0;
    409 }
    410 
    411 static ixgbe_vendor_info_t *
    412 ixgbe_lookup(const struct pci_attach_args *pa)
    413 {
    414 	pcireg_t subid;
    415 	ixgbe_vendor_info_t *ent;
    416 
    417 	INIT_DEBUGOUT("ixgbe_probe: begin");
    418 
    419 	if (PCI_VENDOR(pa->pa_id) != IXGBE_INTEL_VENDOR_ID)
    420 		return NULL;
    421 
    422 	subid = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_SUBSYS_ID_REG);
    423 
    424 	for (ent = ixgbe_vendor_info_array; ent->vendor_id != 0; ent++) {
    425 		if (PCI_VENDOR(pa->pa_id) == ent->vendor_id &&
    426 		    PCI_PRODUCT(pa->pa_id) == ent->device_id &&
    427 
    428 		    (PCI_SUBSYS_VENDOR(subid) == ent->subvendor_id ||
    429 		     ent->subvendor_id == 0) &&
    430 
    431 		    (PCI_SUBSYS_ID(subid) == ent->subdevice_id ||
    432 		     ent->subdevice_id == 0)) {
    433 			++ixgbe_total_ports;
    434 			return ent;
    435 		}
    436 	}
    437 	return NULL;
    438 }
    439 
    440 
    441 static void
    442 ixgbe_sysctl_attach(struct adapter *adapter)
    443 {
    444 	struct sysctllog **log;
    445 	const struct sysctlnode *rnode, *cnode;
    446 	device_t dev;
    447 
    448 	dev = adapter->dev;
    449 	log = &adapter->sysctllog;
    450 
    451 	if ((rnode = ixgbe_sysctl_instance(adapter)) == NULL) {
    452 		aprint_error_dev(dev, "could not create sysctl root\n");
    453 		return;
    454 	}
    455 
    456 	if (sysctl_createv(log, 0, &rnode, &cnode,
    457 	    CTLFLAG_READONLY, CTLTYPE_INT,
    458 	    "num_rx_desc", SYSCTL_DESCR("Number of rx descriptors"),
    459 	    NULL, 0, &adapter->num_rx_desc, 0, CTL_CREATE, CTL_EOL) != 0)
    460 		aprint_error_dev(dev, "could not create sysctl\n");
    461 
    462 	if (sysctl_createv(log, 0, &rnode, &cnode,
    463 	    CTLFLAG_READONLY, CTLTYPE_INT,
    464 	    "num_queues", SYSCTL_DESCR("Number of queues"),
    465 	    NULL, 0, &adapter->num_queues, 0, CTL_CREATE, CTL_EOL) != 0)
    466 		aprint_error_dev(dev, "could not create sysctl\n");
    467 
    468 	if (sysctl_createv(log, 0, &rnode, &cnode,
    469 	    CTLFLAG_READWRITE, CTLTYPE_INT,
    470 	    "fc", SYSCTL_DESCR("Flow Control"),
    471 	    ixgbe_set_flowcntl, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0)
    472 		aprint_error_dev(dev, "could not create sysctl\n");
    473 
    474 	/* XXX This is an *instance* sysctl controlling a *global* variable.
    475 	 * XXX It's that way in the FreeBSD driver that this derives from.
    476 	 */
    477 	if (sysctl_createv(log, 0, &rnode, &cnode,
    478 	    CTLFLAG_READWRITE, CTLTYPE_INT,
    479 	    "enable_aim", SYSCTL_DESCR("Interrupt Moderation"),
    480 	    NULL, 0, &ixgbe_enable_aim, 0, CTL_CREATE, CTL_EOL) != 0)
    481 		aprint_error_dev(dev, "could not create sysctl\n");
    482 
    483 	if (sysctl_createv(log, 0, &rnode, &cnode,
    484 	    CTLFLAG_READWRITE, CTLTYPE_INT,
    485 	    "advertise_speed", SYSCTL_DESCR("Link Speed"),
    486 	    ixgbe_set_advertise, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0)
    487 		aprint_error_dev(dev, "could not create sysctl\n");
    488 
    489 	if (sysctl_createv(log, 0, &rnode, &cnode,
    490 	    CTLFLAG_READWRITE, CTLTYPE_INT,
    491 	    "ts", SYSCTL_DESCR("Thermal Test"),
    492 	    ixgbe_set_thermal_test, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0)
    493 		aprint_error_dev(dev, "could not create sysctl\n");
    494 }
    495 
    496 /*********************************************************************
    497  *  Device initialization routine
    498  *
    499  *  The attach entry point is called when the driver is being loaded.
    500  *  This routine identifies the type of hardware, allocates all resources
    501  *  and initializes the hardware.
    502  *
    503  *  return 0 on success, positive on failure
    504  *********************************************************************/
    505 
    506 static void
    507 ixgbe_attach(device_t parent, device_t dev, void *aux)
    508 {
    509 	struct adapter *adapter;
    510 	struct ixgbe_hw *hw;
    511 	int             error = -1;
    512 	u16		csum;
    513 	u32		ctrl_ext;
    514 	ixgbe_vendor_info_t *ent;
    515 	const struct pci_attach_args *pa = aux;
    516 
    517 	INIT_DEBUGOUT("ixgbe_attach: begin");
    518 
    519 	/* Allocate, clear, and link in our adapter structure */
    520 	adapter = device_private(dev);
    521 	adapter->dev = adapter->osdep.dev = dev;
    522 	hw = &adapter->hw;
    523 	adapter->osdep.pc = pa->pa_pc;
    524 	adapter->osdep.tag = pa->pa_tag;
    525 	adapter->osdep.dmat = pa->pa_dmat;
    526 	adapter->osdep.attached = false;
    527 
    528 	ent = ixgbe_lookup(pa);
    529 
    530 	KASSERT(ent != NULL);
    531 
    532 	aprint_normal(": %s, Version - %s\n",
    533 	    ixgbe_strings[ent->index], ixgbe_driver_version);
    534 
    535 	/* Core Lock Init*/
    536 	IXGBE_CORE_LOCK_INIT(adapter, device_xname(dev));
    537 
    538 	/* SYSCTL APIs */
    539 
    540 	ixgbe_sysctl_attach(adapter);
    541 
    542 	/* Set up the timer callout */
    543 	callout_init(&adapter->timer, 0);
    544 
    545 	/* Determine hardware revision */
    546 	ixgbe_identify_hardware(adapter);
    547 
    548 	/* Do base PCI setup - map BAR0 */
    549 	if (ixgbe_allocate_pci_resources(adapter, pa)) {
    550 		aprint_error_dev(dev, "Allocation of PCI resources failed\n");
    551 		error = ENXIO;
    552 		goto err_out;
    553 	}
    554 
    555 	/* Do descriptor calc and sanity checks */
    556 	if (((ixgbe_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 ||
    557 	    ixgbe_txd < MIN_TXD || ixgbe_txd > MAX_TXD) {
    558 		aprint_error_dev(dev, "TXD config issue, using default!\n");
    559 		adapter->num_tx_desc = DEFAULT_TXD;
    560 	} else
    561 		adapter->num_tx_desc = ixgbe_txd;
    562 
    563 	/*
    564 	** With many RX rings it is easy to exceed the
    565 	** system mbuf allocation. Tuning nmbclusters
    566 	** can alleviate this.
    567 	*/
    568 	if (nmbclusters > 0 ) {
    569 		int s;
    570 		s = (ixgbe_rxd * adapter->num_queues) * ixgbe_total_ports;
    571 		if (s > nmbclusters) {
    572 			aprint_error_dev(dev, "RX Descriptors exceed "
    573 			    "system mbuf max, using default instead!\n");
    574 			ixgbe_rxd = DEFAULT_RXD;
    575 		}
    576 	}
    577 
    578 	if (((ixgbe_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 ||
    579 	    ixgbe_rxd < MIN_RXD || ixgbe_rxd > MAX_RXD) {
    580 		aprint_error_dev(dev, "RXD config issue, using default!\n");
    581 		adapter->num_rx_desc = DEFAULT_RXD;
    582 	} else
    583 		adapter->num_rx_desc = ixgbe_rxd;
    584 
    585 	/* Allocate our TX/RX Queues */
    586 	if (ixgbe_allocate_queues(adapter)) {
    587 		error = ENOMEM;
    588 		goto err_out;
    589 	}
    590 
    591 	/* Allocate multicast array memory. */
    592 	adapter->mta = malloc(sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
    593 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
    594 	if (adapter->mta == NULL) {
    595 		aprint_error_dev(dev, "Cannot allocate multicast setup array\n");
    596 		error = ENOMEM;
    597 		goto err_late;
    598 	}
    599 
    600 	/* Initialize the shared code */
    601 	hw->allow_unsupported_sfp = allow_unsupported_sfp;
    602 	error = ixgbe_init_shared_code(hw);
    603 	if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
    604 		/*
    605 		** No optics in this port, set up
    606 		** so the timer routine will probe
    607 		** for later insertion.
    608 		*/
    609 		adapter->sfp_probe = TRUE;
    610 		error = 0;
    611 	} else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
    612 		aprint_error_dev(dev,"Unsupported SFP+ module detected!\n");
    613 		error = EIO;
    614 		goto err_late;
    615 	} else if (error) {
    616 		aprint_error_dev(dev,"Unable to initialize the shared code\n");
    617 		error = EIO;
    618 		goto err_late;
    619 	}
    620 
    621 	/* Make sure we have a good EEPROM before we read from it */
    622 	if (ixgbe_validate_eeprom_checksum(&adapter->hw, &csum) < 0) {
    623 		aprint_error_dev(dev,"The EEPROM Checksum Is Not Valid\n");
    624 		error = EIO;
    625 		goto err_late;
    626 	}
    627 
    628 	error = ixgbe_init_hw(hw);
    629 	switch (error) {
    630 	case IXGBE_ERR_EEPROM_VERSION:
    631 		aprint_error_dev(dev, "This device is a pre-production adapter/"
    632 		    "LOM.  Please be aware there may be issues associated "
    633 		    "with your hardware.\n If you are experiencing problems "
    634 		    "please contact your Intel or hardware representative "
    635 		    "who provided you with this hardware.\n");
    636 		break;
    637 	case IXGBE_ERR_SFP_NOT_SUPPORTED:
    638 		aprint_error_dev(dev,"Unsupported SFP+ Module\n");
    639 		error = EIO;
    640 		aprint_error_dev(dev,"Hardware Initialization Failure\n");
    641 		goto err_late;
    642 	case IXGBE_ERR_SFP_NOT_PRESENT:
    643 		device_printf(dev,"No SFP+ Module found\n");
    644 		/* falls thru */
    645 	default:
    646 		break;
    647 	}
    648 
    649 	/* Detect and set physical type */
    650 	ixgbe_setup_optics(adapter);
    651 
    652 	error = -1;
    653 	if ((adapter->msix > 1) && (ixgbe_enable_msix))
    654 		error = ixgbe_allocate_msix(adapter, pa);
    655 	if (error != 0)
    656 		error = ixgbe_allocate_legacy(adapter, pa);
    657 	if (error)
    658 		goto err_late;
    659 
    660 	/* Setup OS specific network interface */
    661 	if (ixgbe_setup_interface(dev, adapter) != 0)
    662 		goto err_late;
    663 
    664 	/* Initialize statistics */
    665 	ixgbe_update_stats_counters(adapter);
    666 
    667 	/*
    668 	** Check PCIE slot type/speed/width
    669 	*/
    670 	ixgbe_get_slot_info(hw);
    671 
    672 	/* Set an initial default flow control value */
    673 	adapter->fc =  ixgbe_fc_full;
    674 
    675 	/* let hardware know driver is loaded */
    676 	ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
    677 	ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
    678 	IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
    679 
    680 	ixgbe_add_hw_stats(adapter);
    681 
    682 #ifdef DEV_NETMAP
    683 	ixgbe_netmap_attach(adapter);
    684 #endif /* DEV_NETMAP */
    685 	INIT_DEBUGOUT("ixgbe_attach: end");
    686 	adapter->osdep.attached = true;
    687 	return;
    688 err_late:
    689 	ixgbe_free_transmit_structures(adapter);
    690 	ixgbe_free_receive_structures(adapter);
    691 err_out:
    692 	if (adapter->ifp != NULL)
    693 		if_free(adapter->ifp);
    694 	ixgbe_free_pci_resources(adapter);
    695 	if (adapter->mta != NULL)
    696 		free(adapter->mta, M_DEVBUF);
    697 	return;
    698 
    699 }
    700 
    701 /*********************************************************************
    702  *  Device removal routine
    703  *
    704  *  The detach entry point is called when the driver is being removed.
    705  *  This routine stops the adapter and deallocates all the resources
    706  *  that were allocated for driver operation.
    707  *
    708  *  return 0 on success, positive on failure
    709  *********************************************************************/
    710 
    711 static int
    712 ixgbe_detach(device_t dev, int flags)
    713 {
    714 	struct adapter *adapter = device_private(dev);
    715 	struct rx_ring *rxr = adapter->rx_rings;
    716 	struct ixgbe_hw_stats *stats = &adapter->stats;
    717 	struct ix_queue *que = adapter->queues;
    718 	struct tx_ring *txr = adapter->tx_rings;
    719 	u32	ctrl_ext;
    720 
    721 	INIT_DEBUGOUT("ixgbe_detach: begin");
    722 	if (adapter->osdep.attached == false)
    723 		return 0;
    724 
    725 #if NVLAN > 0
    726 	/* Make sure VLANs are not using driver */
    727 	if (!VLAN_ATTACHED(&adapter->osdep.ec))
    728 		;	/* nothing to do: no VLANs */
    729 	else if ((flags & (DETACH_SHUTDOWN|DETACH_FORCE)) != 0)
    730 		vlan_ifdetach(adapter->ifp);
    731 	else {
    732 		aprint_error_dev(dev, "VLANs in use\n");
    733 		return EBUSY;
    734 	}
    735 #endif
    736 
    737 	IXGBE_CORE_LOCK(adapter);
    738 	ixgbe_stop(adapter);
    739 	IXGBE_CORE_UNLOCK(adapter);
    740 
    741 	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
    742 #ifndef IXGBE_LEGACY_TX
    743 		softint_disestablish(txr->txq_si);
    744 #endif
    745 		softint_disestablish(que->que_si);
    746 	}
    747 
    748 	/* Drain the Link queue */
    749 	softint_disestablish(adapter->link_si);
    750 	softint_disestablish(adapter->mod_si);
    751 	softint_disestablish(adapter->msf_si);
    752 #ifdef IXGBE_FDIR
    753 	softint_disestablish(adapter->fdir_si);
    754 #endif
    755 
    756 	/* let hardware know driver is unloading */
    757 	ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
    758 	ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
    759 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext);
    760 
    761 	ether_ifdetach(adapter->ifp);
    762 	callout_halt(&adapter->timer, NULL);
    763 #ifdef DEV_NETMAP
    764 	netmap_detach(adapter->ifp);
    765 #endif /* DEV_NETMAP */
    766 	ixgbe_free_pci_resources(adapter);
    767 #if 0	/* XXX the NetBSD port is probably missing something here */
    768 	bus_generic_detach(dev);
    769 #endif
    770 	if_detach(adapter->ifp);
    771 
    772 	sysctl_teardown(&adapter->sysctllog);
    773 	evcnt_detach(&adapter->handleq);
    774 	evcnt_detach(&adapter->req);
    775 	evcnt_detach(&adapter->morerx);
    776 	evcnt_detach(&adapter->moretx);
    777 	evcnt_detach(&adapter->txloops);
    778 	evcnt_detach(&adapter->efbig_tx_dma_setup);
    779 	evcnt_detach(&adapter->m_defrag_failed);
    780 	evcnt_detach(&adapter->efbig2_tx_dma_setup);
    781 	evcnt_detach(&adapter->einval_tx_dma_setup);
    782 	evcnt_detach(&adapter->other_tx_dma_setup);
    783 	evcnt_detach(&adapter->eagain_tx_dma_setup);
    784 	evcnt_detach(&adapter->enomem_tx_dma_setup);
    785 	evcnt_detach(&adapter->watchdog_events);
    786 	evcnt_detach(&adapter->tso_err);
    787 	evcnt_detach(&adapter->link_irq);
    788 
    789 	txr = adapter->tx_rings;
    790 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
    791 		evcnt_detach(&txr->no_desc_avail);
    792 		evcnt_detach(&txr->total_packets);
    793 		evcnt_detach(&txr->tso_tx);
    794 
    795 		if (i < __arraycount(adapter->stats.mpc)) {
    796 			evcnt_detach(&adapter->stats.mpc[i]);
    797 		}
    798 		if (i < __arraycount(adapter->stats.pxontxc)) {
    799 			evcnt_detach(&adapter->stats.pxontxc[i]);
    800 			evcnt_detach(&adapter->stats.pxonrxc[i]);
    801 			evcnt_detach(&adapter->stats.pxofftxc[i]);
    802 			evcnt_detach(&adapter->stats.pxoffrxc[i]);
    803 			evcnt_detach(&adapter->stats.pxon2offc[i]);
    804 		}
    805 		if (i < __arraycount(adapter->stats.qprc)) {
    806 			evcnt_detach(&adapter->stats.qprc[i]);
    807 			evcnt_detach(&adapter->stats.qptc[i]);
    808 			evcnt_detach(&adapter->stats.qbrc[i]);
    809 			evcnt_detach(&adapter->stats.qbtc[i]);
    810 			evcnt_detach(&adapter->stats.qprdc[i]);
    811 		}
    812 
    813 		evcnt_detach(&rxr->rx_packets);
    814 		evcnt_detach(&rxr->rx_bytes);
    815 		evcnt_detach(&rxr->rx_copies);
    816 		evcnt_detach(&rxr->no_jmbuf);
    817 		evcnt_detach(&rxr->rx_discarded);
    818 		evcnt_detach(&rxr->rx_irq);
    819 	}
    820 	evcnt_detach(&stats->ipcs);
    821 	evcnt_detach(&stats->l4cs);
    822 	evcnt_detach(&stats->ipcs_bad);
    823 	evcnt_detach(&stats->l4cs_bad);
    824 	evcnt_detach(&stats->intzero);
    825 	evcnt_detach(&stats->legint);
    826 	evcnt_detach(&stats->crcerrs);
    827 	evcnt_detach(&stats->illerrc);
    828 	evcnt_detach(&stats->errbc);
    829 	evcnt_detach(&stats->mspdc);
    830 	evcnt_detach(&stats->mlfc);
    831 	evcnt_detach(&stats->mrfc);
    832 	evcnt_detach(&stats->rlec);
    833 	evcnt_detach(&stats->lxontxc);
    834 	evcnt_detach(&stats->lxonrxc);
    835 	evcnt_detach(&stats->lxofftxc);
    836 	evcnt_detach(&stats->lxoffrxc);
    837 
    838 	/* Packet Reception Stats */
    839 	evcnt_detach(&stats->tor);
    840 	evcnt_detach(&stats->gorc);
    841 	evcnt_detach(&stats->tpr);
    842 	evcnt_detach(&stats->gprc);
    843 	evcnt_detach(&stats->mprc);
    844 	evcnt_detach(&stats->bprc);
    845 	evcnt_detach(&stats->prc64);
    846 	evcnt_detach(&stats->prc127);
    847 	evcnt_detach(&stats->prc255);
    848 	evcnt_detach(&stats->prc511);
    849 	evcnt_detach(&stats->prc1023);
    850 	evcnt_detach(&stats->prc1522);
    851 	evcnt_detach(&stats->ruc);
    852 	evcnt_detach(&stats->rfc);
    853 	evcnt_detach(&stats->roc);
    854 	evcnt_detach(&stats->rjc);
    855 	evcnt_detach(&stats->mngprc);
    856 	evcnt_detach(&stats->xec);
    857 
    858 	/* Packet Transmission Stats */
    859 	evcnt_detach(&stats->gotc);
    860 	evcnt_detach(&stats->tpt);
    861 	evcnt_detach(&stats->gptc);
    862 	evcnt_detach(&stats->bptc);
    863 	evcnt_detach(&stats->mptc);
    864 	evcnt_detach(&stats->mngptc);
    865 	evcnt_detach(&stats->ptc64);
    866 	evcnt_detach(&stats->ptc127);
    867 	evcnt_detach(&stats->ptc255);
    868 	evcnt_detach(&stats->ptc511);
    869 	evcnt_detach(&stats->ptc1023);
    870 	evcnt_detach(&stats->ptc1522);
    871 
    872 	ixgbe_free_transmit_structures(adapter);
    873 	ixgbe_free_receive_structures(adapter);
    874 	free(adapter->mta, M_DEVBUF);
    875 
    876 	IXGBE_CORE_LOCK_DESTROY(adapter);
    877 	return (0);
    878 }
    879 
    880 /*********************************************************************
    881  *
    882  *  Shutdown entry point
    883  *
    884  **********************************************************************/
    885 
    886 #if 0 /* XXX NetBSD ought to register something like this through pmf(9) */
    887 static int
    888 ixgbe_shutdown(device_t dev)
    889 {
    890 	struct adapter *adapter = device_private(dev);
    891 	IXGBE_CORE_LOCK(adapter);
    892 	ixgbe_stop(adapter);
    893 	IXGBE_CORE_UNLOCK(adapter);
    894 	return (0);
    895 }
    896 #endif
    897 
    898 
    899 #ifdef IXGBE_LEGACY_TX
    900 /*********************************************************************
    901  *  Transmit entry point
    902  *
    903  *  ixgbe_start is called by the stack to initiate a transmit.
    904  *  The driver will remain in this routine as long as there are
    905  *  packets to transmit and transmit resources are available.
    906  *  In case resources are not available stack is notified and
    907  *  the packet is requeued.
    908  **********************************************************************/
    909 
    910 static void
    911 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
    912 {
    913 	int rc;
    914 	struct mbuf    *m_head;
    915 	struct adapter *adapter = txr->adapter;
    916 
    917 	IXGBE_TX_LOCK_ASSERT(txr);
    918 
    919 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    920 		return;
    921 	if (!adapter->link_active)
    922 		return;
    923 
    924 	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    925 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
    926 			break;
    927 
    928 		IFQ_POLL(&ifp->if_snd, m_head);
    929 		if (m_head == NULL)
    930 			break;
    931 
    932 		if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
    933 			break;
    934 		}
    935 		IFQ_DEQUEUE(&ifp->if_snd, m_head);
    936 		if (rc == EFBIG) {
    937 			struct mbuf *mtmp;
    938 
    939 			if ((mtmp = m_defrag(m_head, M_NOWAIT)) != NULL) {
    940 				m_head = mtmp;
    941 				rc = ixgbe_xmit(txr, m_head);
    942 				if (rc != 0)
    943 					adapter->efbig2_tx_dma_setup.ev_count++;
    944 			} else
    945 				adapter->m_defrag_failed.ev_count++;
    946 		}
    947 		if (rc != 0) {
    948 			m_freem(m_head);
    949 			continue;
    950 		}
    951 
    952 		/* Send a copy of the frame to the BPF listener */
    953 		bpf_mtap(ifp, m_head);
    954 
    955 		/* Set watchdog on */
    956 		getmicrotime(&txr->watchdog_time);
    957 		txr->queue_status = IXGBE_QUEUE_WORKING;
    958 
    959 	}
    960 	return;
    961 }
    962 
    963 /*
    964  * Legacy TX start - called by the stack, this
    965  * always uses the first tx ring, and should
    966  * not be used with multiqueue tx enabled.
    967  */
    968 static void
    969 ixgbe_start(struct ifnet *ifp)
    970 {
    971 	struct adapter *adapter = ifp->if_softc;
    972 	struct tx_ring	*txr = adapter->tx_rings;
    973 
    974 	if (ifp->if_flags & IFF_RUNNING) {
    975 		IXGBE_TX_LOCK(txr);
    976 		ixgbe_start_locked(txr, ifp);
    977 		IXGBE_TX_UNLOCK(txr);
    978 	}
    979 	return;
    980 }
    981 
    982 #else /* ! IXGBE_LEGACY_TX */
    983 
    984 /*
    985 ** Multiqueue Transmit driver
    986 **
    987 */
    988 static int
    989 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
    990 {
    991 	struct adapter	*adapter = ifp->if_softc;
    992 	struct ix_queue	*que;
    993 	struct tx_ring	*txr;
    994 	int 		i, err = 0;
    995 #ifdef	RSS
    996 	uint32_t bucket_id;
    997 #endif
    998 
    999 	/* Which queue to use */
   1000 	/*
   1001 	 * When doing RSS, map it to the same outbound queue
   1002 	 * as the incoming flow would be mapped to.
   1003 	 *
   1004 	 * If everything is setup correctly, it should be the
   1005 	 * same bucket that the current CPU we're on is.
   1006 	 */
   1007 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
   1008 #ifdef	RSS
   1009 		if (rss_hash2bucket(m->m_pkthdr.flowid,
   1010 		    M_HASHTYPE_GET(m), &bucket_id) == 0) {
   1011 			/* XXX TODO: spit out something if bucket_id > num_queues? */
   1012 			i = bucket_id % adapter->num_queues;
   1013 		} else {
   1014 #endif
   1015 			i = m->m_pkthdr.flowid % adapter->num_queues;
   1016 #ifdef	RSS
   1017 		}
   1018 #endif
   1019 	} else {
   1020 		i = curcpu % adapter->num_queues;
   1021 	}
   1022 
   1023 	txr = &adapter->tx_rings[i];
   1024 	que = &adapter->queues[i];
   1025 
   1026 	err = drbr_enqueue(ifp, txr->br, m);
   1027 	if (err)
   1028 		return (err);
   1029 	if (IXGBE_TX_TRYLOCK(txr)) {
   1030 		ixgbe_mq_start_locked(ifp, txr);
   1031 		IXGBE_TX_UNLOCK(txr);
   1032 	} else
   1033 		softint_schedule(txr->txq_si);
   1034 
   1035 	return (0);
   1036 }
   1037 
   1038 static int
   1039 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
   1040 {
   1041 	struct adapter  *adapter = txr->adapter;
   1042 	struct mbuf     *next;
   1043 	int             enqueued = 0, err = 0;
   1044 
   1045 	if (((ifp->if_flags & IFF_RUNNING) == 0) ||
   1046 	    adapter->link_active == 0)
   1047 		return (ENETDOWN);
   1048 
   1049 	/* Process the queue */
   1050 #if __FreeBSD_version < 901504
   1051 	next = drbr_dequeue(ifp, txr->br);
   1052 	while (next != NULL) {
   1053 		if ((err = ixgbe_xmit(txr, &next)) != 0) {
   1054 			if (next != NULL)
   1055 				err = drbr_enqueue(ifp, txr->br, next);
   1056 #else
   1057 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
   1058 		if ((err = ixgbe_xmit(txr, &next)) != 0) {
   1059 			if (next == NULL) {
   1060 				drbr_advance(ifp, txr->br);
   1061 			} else {
   1062 				drbr_putback(ifp, txr->br, next);
   1063 			}
   1064 #endif
   1065 			break;
   1066 		}
   1067 #if __FreeBSD_version >= 901504
   1068 		drbr_advance(ifp, txr->br);
   1069 #endif
   1070 		enqueued++;
   1071 		/* Send a copy of the frame to the BPF listener */
   1072 		bpf_mtap(ifp, next);
   1073 		if ((ifp->if_flags & IFF_RUNNING) == 0)
   1074 			break;
   1075 #if __FreeBSD_version < 901504
   1076 		next = drbr_dequeue(ifp, txr->br);
   1077 #endif
   1078 	}
   1079 
   1080 	if (enqueued > 0) {
   1081 		/* Set watchdog on */
   1082 		txr->queue_status = IXGBE_QUEUE_WORKING;
   1083 		getmicrotime(&txr->watchdog_time);
   1084 	}
   1085 
   1086 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
   1087 		ixgbe_txeof(txr);
   1088 
   1089 	return (err);
   1090 }
   1091 
   1092 /*
   1093  * Called from a taskqueue to drain queued transmit packets.
   1094  */
   1095 static void
   1096 ixgbe_deferred_mq_start(void *arg, int pending)
   1097 {
   1098 	struct tx_ring *txr = arg;
   1099 	struct adapter *adapter = txr->adapter;
   1100 	struct ifnet *ifp = adapter->ifp;
   1101 
   1102 	IXGBE_TX_LOCK(txr);
   1103 	if (!drbr_empty(ifp, txr->br))
   1104 		ixgbe_mq_start_locked(ifp, txr);
   1105 	IXGBE_TX_UNLOCK(txr);
   1106 }
   1107 
   1108 /*
   1109 ** Flush all ring buffers
   1110 */
   1111 static void
   1112 ixgbe_qflush(struct ifnet *ifp)
   1113 {
   1114 	struct adapter	*adapter = ifp->if_softc;
   1115 	struct tx_ring	*txr = adapter->tx_rings;
   1116 	struct mbuf	*m;
   1117 
   1118 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
   1119 		IXGBE_TX_LOCK(txr);
   1120 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
   1121 			m_freem(m);
   1122 		IXGBE_TX_UNLOCK(txr);
   1123 	}
   1124 	if_qflush(ifp);
   1125 }
   1126 #endif /* IXGBE_LEGACY_TX */
   1127 
   1128 static int
   1129 ixgbe_ifflags_cb(struct ethercom *ec)
   1130 {
   1131 	struct ifnet *ifp = &ec->ec_if;
   1132 	struct adapter *adapter = ifp->if_softc;
   1133 	int change = ifp->if_flags ^ adapter->if_flags, rc = 0;
   1134 
   1135 	IXGBE_CORE_LOCK(adapter);
   1136 
   1137 	if (change != 0)
   1138 		adapter->if_flags = ifp->if_flags;
   1139 
   1140 	if ((change & ~(IFF_CANTCHANGE|IFF_DEBUG)) != 0)
   1141 		rc = ENETRESET;
   1142 	else if ((change & (IFF_PROMISC | IFF_ALLMULTI)) != 0)
   1143 		ixgbe_set_promisc(adapter);
   1144 
   1145 	/* Set up VLAN support and filter */
   1146 	ixgbe_setup_vlan_hw_support(adapter);
   1147 
   1148 	IXGBE_CORE_UNLOCK(adapter);
   1149 
   1150 	return rc;
   1151 }
   1152 
   1153 /*********************************************************************
   1154  *  Ioctl entry point
   1155  *
   1156  *  ixgbe_ioctl is called when the user wants to configure the
   1157  *  interface.
   1158  *
   1159  *  return 0 on success, positive on failure
   1160  **********************************************************************/
   1161 
   1162 static int
   1163 ixgbe_ioctl(struct ifnet * ifp, u_long command, void *data)
   1164 {
   1165 	struct adapter	*adapter = ifp->if_softc;
   1166 	struct ixgbe_hw *hw = &adapter->hw;
   1167 	struct ifcapreq *ifcr = data;
   1168 	struct ifreq	*ifr = data;
   1169 	int             error = 0;
   1170 	int l4csum_en;
   1171 	const int l4csum = IFCAP_CSUM_TCPv4_Rx|IFCAP_CSUM_UDPv4_Rx|
   1172 	     IFCAP_CSUM_TCPv6_Rx|IFCAP_CSUM_UDPv6_Rx;
   1173 
   1174 	switch (command) {
   1175 	case SIOCSIFFLAGS:
   1176 		IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)");
   1177 		break;
   1178 	case SIOCADDMULTI:
   1179 	case SIOCDELMULTI:
   1180 		IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI");
   1181 		break;
   1182 	case SIOCSIFMEDIA:
   1183 	case SIOCGIFMEDIA:
   1184 		IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)");
   1185 		break;
   1186 	case SIOCSIFCAP:
   1187 		IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)");
   1188 		break;
   1189 	case SIOCSIFMTU:
   1190 		IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)");
   1191 		break;
   1192 	default:
   1193 		IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command);
   1194 		break;
   1195 	}
   1196 
   1197 	switch (command) {
   1198 	case SIOCSIFMEDIA:
   1199 	case SIOCGIFMEDIA:
   1200 		return ifmedia_ioctl(ifp, ifr, &adapter->media, command);
   1201 	case SIOCGI2C:
   1202 	{
   1203 		struct ixgbe_i2c_req	i2c;
   1204 		IOCTL_DEBUGOUT("ioctl: SIOCGI2C (Get I2C Data)");
   1205 		error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
   1206 		if (error != 0)
   1207 			break;
   1208 		if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) {
   1209 			error = EINVAL;
   1210 			break;
   1211 		}
   1212 		if (i2c.len > sizeof(i2c.data)) {
   1213 			error = EINVAL;
   1214 			break;
   1215 		}
   1216 
   1217 		hw->phy.ops.read_i2c_byte(hw, i2c.offset,
   1218 		    i2c.dev_addr, i2c.data);
   1219 		error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
   1220 		break;
   1221 	}
   1222 	case SIOCSIFCAP:
   1223 		/* Layer-4 Rx checksum offload has to be turned on and
   1224 		 * off as a unit.
   1225 		 */
   1226 		l4csum_en = ifcr->ifcr_capenable & l4csum;
   1227 		if (l4csum_en != l4csum && l4csum_en != 0)
   1228 			return EINVAL;
   1229 		/*FALLTHROUGH*/
   1230 	case SIOCADDMULTI:
   1231 	case SIOCDELMULTI:
   1232 	case SIOCSIFFLAGS:
   1233 	case SIOCSIFMTU:
   1234 	default:
   1235 		if ((error = ether_ioctl(ifp, command, data)) != ENETRESET)
   1236 			return error;
   1237 		if ((ifp->if_flags & IFF_RUNNING) == 0)
   1238 			;
   1239 		else if (command == SIOCSIFCAP || command == SIOCSIFMTU) {
   1240 			IXGBE_CORE_LOCK(adapter);
   1241 			ixgbe_init_locked(adapter);
   1242 			IXGBE_CORE_UNLOCK(adapter);
   1243 		} else if (command == SIOCADDMULTI || command == SIOCDELMULTI) {
   1244 			/*
   1245 			 * Multicast list has changed; set the hardware filter
   1246 			 * accordingly.
   1247 			 */
   1248 			IXGBE_CORE_LOCK(adapter);
   1249 			ixgbe_disable_intr(adapter);
   1250 			ixgbe_set_multi(adapter);
   1251 			ixgbe_enable_intr(adapter);
   1252 			IXGBE_CORE_UNLOCK(adapter);
   1253 		}
   1254 		return 0;
   1255 	}
   1256 
   1257 	return error;
   1258 }
   1259 
   1260 /*********************************************************************
   1261  *  Init entry point
   1262  *
   1263  *  This routine is used in two ways. It is used by the stack as
   1264  *  init entry point in network interface structure. It is also used
   1265  *  by the driver as a hw/sw initialization routine to get to a
   1266  *  consistent state.
   1267  *
   1268  *  return 0 on success, positive on failure
   1269  **********************************************************************/
   1270 #define IXGBE_MHADD_MFS_SHIFT 16
   1271 
   1272 static void
   1273 ixgbe_init_locked(struct adapter *adapter)
   1274 {
   1275 	struct ifnet   *ifp = adapter->ifp;
   1276 	device_t 	dev = adapter->dev;
   1277 	struct ixgbe_hw *hw = &adapter->hw;
   1278 	u32		k, txdctl, mhadd, gpie;
   1279 	u32		rxdctl, rxctrl;
   1280 
   1281 	/* XXX check IFF_UP and IFF_RUNNING, power-saving state! */
   1282 
   1283 	KASSERT(mutex_owned(&adapter->core_mtx));
   1284 	INIT_DEBUGOUT("ixgbe_init_locked: begin");
   1285 	hw->adapter_stopped = FALSE;
   1286 	ixgbe_stop_adapter(hw);
   1287         callout_stop(&adapter->timer);
   1288 
   1289 	/* XXX I moved this here from the SIOCSIFMTU case in ixgbe_ioctl(). */
   1290 	adapter->max_frame_size =
   1291 		ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
   1292 
   1293         /* reprogram the RAR[0] in case user changed it. */
   1294         ixgbe_set_rar(hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
   1295 
   1296 	/* Get the latest mac address, User can use a LAA */
   1297 	memcpy(hw->mac.addr, CLLADDR(adapter->ifp->if_sadl),
   1298 	    IXGBE_ETH_LENGTH_OF_ADDRESS);
   1299 	ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
   1300 	hw->addr_ctrl.rar_used_count = 1;
   1301 
   1302 	/* Prepare transmit descriptors and buffers */
   1303 	if (ixgbe_setup_transmit_structures(adapter)) {
   1304 		device_printf(dev,"Could not setup transmit structures\n");
   1305 		ixgbe_stop(adapter);
   1306 		return;
   1307 	}
   1308 
   1309 	ixgbe_init_hw(hw);
   1310 	ixgbe_initialize_transmit_units(adapter);
   1311 
   1312 	/* Setup Multicast table */
   1313 	ixgbe_set_multi(adapter);
   1314 
   1315 	/*
   1316 	** Determine the correct mbuf pool
   1317 	** for doing jumbo frames
   1318 	*/
   1319 	if (adapter->max_frame_size <= 2048)
   1320 		adapter->rx_mbuf_sz = MCLBYTES;
   1321 	else if (adapter->max_frame_size <= 4096)
   1322 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
   1323 	else if (adapter->max_frame_size <= 9216)
   1324 		adapter->rx_mbuf_sz = MJUM9BYTES;
   1325 	else
   1326 		adapter->rx_mbuf_sz = MJUM16BYTES;
   1327 
   1328 	/* Prepare receive descriptors and buffers */
   1329 	if (ixgbe_setup_receive_structures(adapter)) {
   1330 		device_printf(dev,"Could not setup receive structures\n");
   1331 		ixgbe_stop(adapter);
   1332 		return;
   1333 	}
   1334 
   1335 	/* Configure RX settings */
   1336 	ixgbe_initialize_receive_units(adapter);
   1337 
   1338 	gpie = IXGBE_READ_REG(&adapter->hw, IXGBE_GPIE);
   1339 
   1340 	/* Enable Fan Failure Interrupt */
   1341 	gpie |= IXGBE_SDP1_GPIEN;
   1342 
   1343 	/* Add for Module detection */
   1344 	if (hw->mac.type == ixgbe_mac_82599EB)
   1345 		gpie |= IXGBE_SDP2_GPIEN;
   1346 
   1347 	/* Thermal Failure Detection */
   1348 	if (hw->mac.type == ixgbe_mac_X540)
   1349 		gpie |= IXGBE_SDP0_GPIEN;
   1350 
   1351 	if (adapter->msix > 1) {
   1352 		/* Enable Enhanced MSIX mode */
   1353 		gpie |= IXGBE_GPIE_MSIX_MODE;
   1354 		gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
   1355 		    IXGBE_GPIE_OCD;
   1356 	}
   1357 	IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
   1358 
   1359 	/* Set MTU size */
   1360 	if (ifp->if_mtu > ETHERMTU) {
   1361 		mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
   1362 		mhadd &= ~IXGBE_MHADD_MFS_MASK;
   1363 		mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
   1364 		IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
   1365 	}
   1366 
   1367 	/* Now enable all the queues */
   1368 
   1369 	for (int i = 0; i < adapter->num_queues; i++) {
   1370 		txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
   1371 		txdctl |= IXGBE_TXDCTL_ENABLE;
   1372 		/* Set WTHRESH to 8, burst writeback */
   1373 		txdctl |= (8 << 16);
   1374 		/*
   1375 		 * When the internal queue falls below PTHRESH (32),
   1376 		 * start prefetching as long as there are at least
   1377 		 * HTHRESH (1) buffers ready. The values are taken
   1378 		 * from the Intel linux driver 3.8.21.
   1379 		 * Prefetching enables tx line rate even with 1 queue.
   1380 		 */
   1381 		txdctl |= (32 << 0) | (1 << 8);
   1382 		IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
   1383 	}
   1384 
   1385 	for (int i = 0; i < adapter->num_queues; i++) {
   1386 		rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
   1387 		if (hw->mac.type == ixgbe_mac_82598EB) {
   1388 			/*
   1389 			** PTHRESH = 21
   1390 			** HTHRESH = 4
   1391 			** WTHRESH = 8
   1392 			*/
   1393 			rxdctl &= ~0x3FFFFF;
   1394 			rxdctl |= 0x080420;
   1395 		}
   1396 		rxdctl |= IXGBE_RXDCTL_ENABLE;
   1397 		IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
   1398 		/* XXX I don't trust this loop, and I don't trust the
   1399 		 * XXX memory barrier.  What is this meant to do? --dyoung
   1400 		 */
   1401 		for (k = 0; k < 10; k++) {
   1402 			if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
   1403 			    IXGBE_RXDCTL_ENABLE)
   1404 				break;
   1405 			else
   1406 				msec_delay(1);
   1407 		}
   1408 		wmb();
   1409 #ifdef DEV_NETMAP
   1410 		/*
   1411 		 * In netmap mode, we must preserve the buffers made
   1412 		 * available to userspace before the if_init()
   1413 		 * (this is true by default on the TX side, because
   1414 		 * init makes all buffers available to userspace).
   1415 		 *
   1416 		 * netmap_reset() and the device specific routines
   1417 		 * (e.g. ixgbe_setup_receive_rings()) map these
   1418 		 * buffers at the end of the NIC ring, so here we
   1419 		 * must set the RDT (tail) register to make sure
   1420 		 * they are not overwritten.
   1421 		 *
   1422 		 * In this driver the NIC ring starts at RDH = 0,
   1423 		 * RDT points to the last slot available for reception (?),
   1424 		 * so RDT = num_rx_desc - 1 means the whole ring is available.
   1425 		 */
   1426 		if (ifp->if_capenable & IFCAP_NETMAP) {
   1427 			struct netmap_adapter *na = NA(adapter->ifp);
   1428 			struct netmap_kring *kring = &na->rx_rings[i];
   1429 			int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring);
   1430 
   1431 			IXGBE_WRITE_REG(hw, IXGBE_RDT(i), t);
   1432 		} else
   1433 #endif /* DEV_NETMAP */
   1434 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i), adapter->num_rx_desc - 1);
   1435 	}
   1436 
   1437 	/* Enable Receive engine */
   1438 	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
   1439 	if (hw->mac.type == ixgbe_mac_82598EB)
   1440 		rxctrl |= IXGBE_RXCTRL_DMBYPS;
   1441 	rxctrl |= IXGBE_RXCTRL_RXEN;
   1442 	ixgbe_enable_rx_dma(hw, rxctrl);
   1443 
   1444 	callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
   1445 
   1446 	/* Set up MSI/X routing */
   1447 	if (ixgbe_enable_msix)  {
   1448 		ixgbe_configure_ivars(adapter);
   1449 		/* Set up auto-mask */
   1450 		if (hw->mac.type == ixgbe_mac_82598EB)
   1451 			IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
   1452 		else {
   1453 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
   1454 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
   1455 		}
   1456 	} else {  /* Simple settings for Legacy/MSI */
   1457                 ixgbe_set_ivar(adapter, 0, 0, 0);
   1458                 ixgbe_set_ivar(adapter, 0, 0, 1);
   1459 		IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
   1460 	}
   1461 
   1462 #ifdef IXGBE_FDIR
   1463 	/* Init Flow director */
   1464 	if (hw->mac.type != ixgbe_mac_82598EB) {
   1465 		u32 hdrm = 32 << fdir_pballoc;
   1466 
   1467 		hw->mac.ops.setup_rxpba(hw, 0, hdrm, PBA_STRATEGY_EQUAL);
   1468 		ixgbe_init_fdir_signature_82599(&adapter->hw, fdir_pballoc);
   1469 	}
   1470 #endif
   1471 
   1472 	/*
   1473 	** Check on any SFP devices that
   1474 	** need to be kick-started
   1475 	*/
   1476 	if (hw->phy.type == ixgbe_phy_none) {
   1477 		int err = hw->phy.ops.identify(hw);
   1478 		if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
   1479                 	device_printf(dev,
   1480 			    "Unsupported SFP+ module type was detected.\n");
   1481 			return;
   1482         	}
   1483 	}
   1484 
   1485 	/* Set moderation on the Link interrupt */
   1486 	IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->linkvec), IXGBE_LINK_ITR);
   1487 
   1488 	/* Config/Enable Link */
   1489 	ixgbe_config_link(adapter);
   1490 
   1491 	/* Hardware Packet Buffer & Flow Control setup */
   1492 	{
   1493 		u32 rxpb, frame, size, tmp;
   1494 
   1495 		frame = adapter->max_frame_size;
   1496 
   1497 		/* Calculate High Water */
   1498 		if (hw->mac.type == ixgbe_mac_X540)
   1499 			tmp = IXGBE_DV_X540(frame, frame);
   1500 		else
   1501 			tmp = IXGBE_DV(frame, frame);
   1502 		size = IXGBE_BT2KB(tmp);
   1503 		rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
   1504 		hw->fc.high_water[0] = rxpb - size;
   1505 
   1506 		/* Now calculate Low Water */
   1507 		if (hw->mac.type == ixgbe_mac_X540)
   1508 			tmp = IXGBE_LOW_DV_X540(frame);
   1509 		else
   1510 			tmp = IXGBE_LOW_DV(frame);
   1511 		hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
   1512 
   1513 		hw->fc.requested_mode = adapter->fc;
   1514 		hw->fc.pause_time = IXGBE_FC_PAUSE;
   1515 		hw->fc.send_xon = TRUE;
   1516 	}
   1517 	/* Initialize the FC settings */
   1518 	ixgbe_start_hw(hw);
   1519 
   1520 	/* Set up VLAN support and filter */
   1521 	ixgbe_setup_vlan_hw_support(adapter);
   1522 
   1523 	/* And now turn on interrupts */
   1524 	ixgbe_enable_intr(adapter);
   1525 
   1526 	/* Now inform the stack we're ready */
   1527 	ifp->if_flags |= IFF_RUNNING;
   1528 
   1529 	return;
   1530 }
   1531 
   1532 static int
   1533 ixgbe_init(struct ifnet *ifp)
   1534 {
   1535 	struct adapter *adapter = ifp->if_softc;
   1536 
   1537 	IXGBE_CORE_LOCK(adapter);
   1538 	ixgbe_init_locked(adapter);
   1539 	IXGBE_CORE_UNLOCK(adapter);
   1540 	return 0;	/* XXX ixgbe_init_locked cannot fail?  really? */
   1541 }
   1542 
   1543 
   1544 /*
   1545 **
   1546 ** MSIX Interrupt Handlers and Tasklets
   1547 **
   1548 */
   1549 
   1550 static inline void
   1551 ixgbe_enable_queue(struct adapter *adapter, u32 vector)
   1552 {
   1553 	struct ixgbe_hw *hw = &adapter->hw;
   1554 	u64	queue = (u64)(1ULL << vector);
   1555 	u32	mask;
   1556 
   1557 	if (hw->mac.type == ixgbe_mac_82598EB) {
   1558                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
   1559                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
   1560 	} else {
   1561                 mask = (queue & 0xFFFFFFFF);
   1562                 if (mask)
   1563                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask);
   1564                 mask = (queue >> 32);
   1565                 if (mask)
   1566                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask);
   1567 	}
   1568 }
   1569 
   1570 __unused static inline void
   1571 ixgbe_disable_queue(struct adapter *adapter, u32 vector)
   1572 {
   1573 	struct ixgbe_hw *hw = &adapter->hw;
   1574 	u64	queue = (u64)(1ULL << vector);
   1575 	u32	mask;
   1576 
   1577 	if (hw->mac.type == ixgbe_mac_82598EB) {
   1578                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
   1579                 IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask);
   1580 	} else {
   1581                 mask = (queue & 0xFFFFFFFF);
   1582                 if (mask)
   1583                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask);
   1584                 mask = (queue >> 32);
   1585                 if (mask)
   1586                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask);
   1587 	}
   1588 }
   1589 
   1590 static void
   1591 ixgbe_handle_que(void *context)
   1592 {
   1593 	struct ix_queue *que = context;
   1594 	struct adapter  *adapter = que->adapter;
   1595 	struct tx_ring  *txr = que->txr;
   1596 	struct ifnet    *ifp = adapter->ifp;
   1597 
   1598 	adapter->handleq.ev_count++;
   1599 
   1600 	if (ifp->if_flags & IFF_RUNNING) {
   1601 		ixgbe_rxeof(que);
   1602 		IXGBE_TX_LOCK(txr);
   1603 		ixgbe_txeof(txr);
   1604 #ifndef IXGBE_LEGACY_TX
   1605 		if (!drbr_empty(ifp, txr->br))
   1606 			ixgbe_mq_start_locked(ifp, txr);
   1607 #else
   1608 		if (!IFQ_IS_EMPTY(&ifp->if_snd))
   1609 			ixgbe_start_locked(txr, ifp);
   1610 #endif
   1611 		IXGBE_TX_UNLOCK(txr);
   1612 	}
   1613 
   1614 	/* Reenable this interrupt */
   1615 	if (que->res != NULL)
   1616 		ixgbe_enable_queue(adapter, que->msix);
   1617 	else
   1618 		ixgbe_enable_intr(adapter);
   1619 	return;
   1620 }
   1621 
   1622 
   1623 /*********************************************************************
   1624  *
   1625  *  Legacy Interrupt Service routine
   1626  *
   1627  **********************************************************************/
   1628 
   1629 static int
   1630 ixgbe_legacy_irq(void *arg)
   1631 {
   1632 	struct ix_queue *que = arg;
   1633 	struct adapter	*adapter = que->adapter;
   1634 	struct ixgbe_hw	*hw = &adapter->hw;
   1635 	struct ifnet    *ifp = adapter->ifp;
   1636 	struct 		tx_ring *txr = adapter->tx_rings;
   1637 	bool		more = false;
   1638 	u32       	reg_eicr;
   1639 
   1640 
   1641 	reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
   1642 
   1643 	adapter->stats.legint.ev_count++;
   1644 	++que->irqs;
   1645 	if (reg_eicr == 0) {
   1646 		adapter->stats.intzero.ev_count++;
   1647 		if ((ifp->if_flags & IFF_UP) != 0)
   1648 			ixgbe_enable_intr(adapter);
   1649 		return 0;
   1650 	}
   1651 
   1652 	if ((ifp->if_flags & IFF_RUNNING) != 0) {
   1653 		more = ixgbe_rxeof(que);
   1654 
   1655 		IXGBE_TX_LOCK(txr);
   1656 		ixgbe_txeof(txr);
   1657 #ifdef IXGBE_LEGACY_TX
   1658 		if (!IFQ_IS_EMPTY(&ifp->if_snd))
   1659 			ixgbe_start_locked(txr, ifp);
   1660 #else
   1661 		if (!drbr_empty(ifp, txr->br))
   1662 			ixgbe_mq_start_locked(ifp, txr);
   1663 #endif
   1664 		IXGBE_TX_UNLOCK(txr);
   1665 	}
   1666 
   1667 	/* Check for fan failure */
   1668 	if ((hw->phy.media_type == ixgbe_media_type_copper) &&
   1669 	    (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
   1670                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
   1671 		    "REPLACE IMMEDIATELY!!\n");
   1672 		IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1);
   1673 	}
   1674 
   1675 	/* Link status change */
   1676 	if (reg_eicr & IXGBE_EICR_LSC)
   1677 		softint_schedule(adapter->link_si);
   1678 
   1679 	if (more)
   1680 #ifndef IXGBE_LEGACY_TX
   1681 		softint_schedule(txr->txq_si);
   1682 #else
   1683 		softint_schedule(que->que_si);
   1684 #endif
   1685 	else
   1686 		ixgbe_enable_intr(adapter);
   1687 	return 1;
   1688 }
   1689 
   1690 
   1691 #if defined(NETBSD_MSI_OR_MSIX)
   1692 /*********************************************************************
   1693  *
   1694  *  MSIX Queue Interrupt Service routine
   1695  *
   1696  **********************************************************************/
   1697 static int
   1698 ixgbe_msix_que(void *arg)
   1699 {
   1700 	struct ix_queue	*que = arg;
   1701 	struct adapter  *adapter = que->adapter;
   1702 	struct ifnet    *ifp = adapter->ifp;
   1703 	struct tx_ring	*txr = que->txr;
   1704 	struct rx_ring	*rxr = que->rxr;
   1705 	bool		more;
   1706 	u32		newitr = 0;
   1707 
   1708 	/* Protect against spurious interrupts */
   1709 	if ((ifp->if_flags & IFF_RUNNING) == 0)
   1710 		return 0;
   1711 
   1712 	ixgbe_disable_queue(adapter, que->msix);
   1713 	++que->irqs;
   1714 
   1715 	more = ixgbe_rxeof(que);
   1716 
   1717 	IXGBE_TX_LOCK(txr);
   1718 	ixgbe_txeof(txr);
   1719 #ifdef IXGBE_LEGACY_TX
   1720 	if (!IFQ_IS_EMPTY(&adapter->ifp->if_snd))
   1721 		ixgbe_start_locked(txr, ifp);
   1722 #else
   1723 	if (!drbr_empty(ifp, txr->br))
   1724 		ixgbe_mq_start_locked(ifp, txr);
   1725 #endif
   1726 	IXGBE_TX_UNLOCK(txr);
   1727 
   1728 	/* Do AIM now? */
   1729 
   1730 	if (ixgbe_enable_aim == FALSE)
   1731 		goto no_calc;
   1732 	/*
   1733 	** Do Adaptive Interrupt Moderation:
   1734         **  - Write out last calculated setting
   1735 	**  - Calculate based on average size over
   1736 	**    the last interval.
   1737 	*/
   1738         if (que->eitr_setting)
   1739                 IXGBE_WRITE_REG(&adapter->hw,
   1740                     IXGBE_EITR(que->msix), que->eitr_setting);
   1741 
   1742         que->eitr_setting = 0;
   1743 
   1744         /* Idle, do nothing */
   1745         if ((txr->bytes == 0) && (rxr->bytes == 0))
   1746                 goto no_calc;
   1747 
   1748 	if ((txr->bytes) && (txr->packets))
   1749                	newitr = txr->bytes/txr->packets;
   1750 	if ((rxr->bytes) && (rxr->packets))
   1751 		newitr = max(newitr,
   1752 		    (rxr->bytes / rxr->packets));
   1753 	newitr += 24; /* account for hardware frame, crc */
   1754 
   1755 	/* set an upper boundary */
   1756 	newitr = min(newitr, 3000);
   1757 
   1758 	/* Be nice to the mid range */
   1759 	if ((newitr > 300) && (newitr < 1200))
   1760 		newitr = (newitr / 3);
   1761 	else
   1762 		newitr = (newitr / 2);
   1763 
   1764         if (adapter->hw.mac.type == ixgbe_mac_82598EB)
   1765                 newitr |= newitr << 16;
   1766         else
   1767                 newitr |= IXGBE_EITR_CNT_WDIS;
   1768 
   1769         /* save for next interrupt */
   1770         que->eitr_setting = newitr;
   1771 
   1772         /* Reset state */
   1773         txr->bytes = 0;
   1774         txr->packets = 0;
   1775         rxr->bytes = 0;
   1776         rxr->packets = 0;
   1777 
   1778 no_calc:
   1779 	if (more)
   1780 		softint_schedule(que->que_si);
   1781 	else
   1782 		ixgbe_enable_queue(adapter, que->msix);
   1783 	return 1;
   1784 }
   1785 
   1786 
   1787 static int
   1788 ixgbe_msix_link(void *arg)
   1789 {
   1790 	struct adapter	*adapter = arg;
   1791 	struct ixgbe_hw *hw = &adapter->hw;
   1792 	u32		reg_eicr;
   1793 
   1794 	++adapter->link_irq.ev_count;
   1795 
   1796 	/* First get the cause */
   1797 	reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICS);
   1798 	/* Be sure the queue bits are not cleared */
   1799 	reg_eicr &= ~IXGBE_EICR_RTX_QUEUE;
   1800 	/* Clear interrupt with write */
   1801 	IXGBE_WRITE_REG(hw, IXGBE_EICR, reg_eicr);
   1802 
   1803 	/* Link status change */
   1804 	if (reg_eicr & IXGBE_EICR_LSC)
   1805 		softint_schedule(adapter->link_si);
   1806 
   1807 	if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
   1808 #ifdef IXGBE_FDIR
   1809 		if (reg_eicr & IXGBE_EICR_FLOW_DIR) {
   1810 			/* This is probably overkill :) */
   1811 			if (!atomic_cmpset_int(&adapter->fdir_reinit, 0, 1))
   1812 				return 1;
   1813                 	/* Disable the interrupt */
   1814 			IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FLOW_DIR);
   1815 			softint_schedule(adapter->fdir_si);
   1816 		} else
   1817 #endif
   1818 		if (reg_eicr & IXGBE_EICR_ECC) {
   1819                 	device_printf(adapter->dev, "\nCRITICAL: ECC ERROR!! "
   1820 			    "Please Reboot!!\n");
   1821 			IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
   1822 		} else
   1823 
   1824 		if (reg_eicr & IXGBE_EICR_GPI_SDP1) {
   1825                 	/* Clear the interrupt */
   1826                 	IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
   1827 			softint_schedule(adapter->msf_si);
   1828         	} else if (reg_eicr & IXGBE_EICR_GPI_SDP2) {
   1829                 	/* Clear the interrupt */
   1830                 	IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP2);
   1831 			softint_schedule(adapter->mod_si);
   1832 		}
   1833         }
   1834 
   1835 	/* Check for fan failure */
   1836 	if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
   1837 	    (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
   1838                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
   1839 		    "REPLACE IMMEDIATELY!!\n");
   1840 		IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
   1841 	}
   1842 
   1843 	/* Check for over temp condition */
   1844 	if ((hw->mac.type == ixgbe_mac_X540) &&
   1845 	    (reg_eicr & IXGBE_EICR_TS)) {
   1846 		device_printf(adapter->dev, "\nCRITICAL: OVER TEMP!! "
   1847 		    "PHY IS SHUT DOWN!!\n");
   1848 		device_printf(adapter->dev, "System shutdown required\n");
   1849 		IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_TS);
   1850 	}
   1851 
   1852 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_OTHER);
   1853 	return 1;
   1854 }
   1855 #endif
   1856 
   1857 /*********************************************************************
   1858  *
   1859  *  Media Ioctl callback
   1860  *
   1861  *  This routine is called whenever the user queries the status of
   1862  *  the interface using ifconfig.
   1863  *
   1864  **********************************************************************/
   1865 static void
   1866 ixgbe_media_status(struct ifnet * ifp, struct ifmediareq * ifmr)
   1867 {
   1868 	struct adapter *adapter = ifp->if_softc;
   1869 	struct ixgbe_hw *hw = &adapter->hw;
   1870 
   1871 	INIT_DEBUGOUT("ixgbe_media_status: begin");
   1872 	IXGBE_CORE_LOCK(adapter);
   1873 	ixgbe_update_link_status(adapter);
   1874 
   1875 	ifmr->ifm_status = IFM_AVALID;
   1876 	ifmr->ifm_active = IFM_ETHER;
   1877 
   1878 	if (!adapter->link_active) {
   1879 		IXGBE_CORE_UNLOCK(adapter);
   1880 		return;
   1881 	}
   1882 
   1883 	ifmr->ifm_status |= IFM_ACTIVE;
   1884 
   1885 	/*
   1886 	 * Not all NIC are 1000baseSX as an example X540T.
   1887 	 * We must set properly the media based on NIC model.
   1888 	 */
   1889 	switch (hw->device_id) {
   1890 	case IXGBE_DEV_ID_X540T:
   1891 		if (adapter->link_speed == IXGBE_LINK_SPEED_100_FULL)
   1892 			ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
   1893 		else if (adapter->link_speed == IXGBE_LINK_SPEED_1GB_FULL)
   1894 			ifmr->ifm_active |= IFM_1000_T | IFM_FDX;
   1895 		else if (adapter->link_speed == IXGBE_LINK_SPEED_10GB_FULL)
   1896 			ifmr->ifm_active |= adapter->optics | IFM_FDX;
   1897 		break;
   1898 	default:
   1899 		if (adapter->link_speed == IXGBE_LINK_SPEED_100_FULL)
   1900 			ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
   1901 		else if (adapter->link_speed == IXGBE_LINK_SPEED_1GB_FULL)
   1902 			ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
   1903 		else if (adapter->link_speed == IXGBE_LINK_SPEED_10GB_FULL)
   1904 			ifmr->ifm_active |= adapter->optics | IFM_FDX;
   1905 		break;
   1906 	}
   1907 
   1908 	IXGBE_CORE_UNLOCK(adapter);
   1909 
   1910 	return;
   1911 }
   1912 
   1913 /*********************************************************************
   1914  *
   1915  *  Media Ioctl callback
   1916  *
   1917  *  This routine is called when the user changes speed/duplex using
   1918  *  media/mediopt option with ifconfig.
   1919  *
   1920  **********************************************************************/
   1921 static int
   1922 ixgbe_media_change(struct ifnet * ifp)
   1923 {
   1924 	struct adapter *adapter = ifp->if_softc;
   1925 	struct ifmedia *ifm = &adapter->media;
   1926 
   1927 	INIT_DEBUGOUT("ixgbe_media_change: begin");
   1928 
   1929 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
   1930 		return (EINVAL);
   1931 
   1932         switch (IFM_SUBTYPE(ifm->ifm_media)) {
   1933 	case IFM_10G_T:
   1934 	case IFM_AUTO:
   1935 		adapter->hw.phy.autoneg_advertised =
   1936 		    IXGBE_LINK_SPEED_100_FULL |
   1937 		    IXGBE_LINK_SPEED_1GB_FULL |
   1938 		    IXGBE_LINK_SPEED_10GB_FULL;
   1939                 break;
   1940         default:
   1941                 device_printf(adapter->dev, "Only auto media type\n");
   1942 		return (EINVAL);
   1943         }
   1944 
   1945 	return (0);
   1946 }
   1947 
   1948 /*********************************************************************
   1949  *
   1950  *  This routine maps the mbufs to tx descriptors, allowing the
   1951  *  TX engine to transmit the packets.
   1952  *  	- return 0 on success, positive on failure
   1953  *
   1954  **********************************************************************/
   1955 
   1956 static int
   1957 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
   1958 {
   1959 	struct m_tag *mtag;
   1960 	struct adapter  *adapter = txr->adapter;
   1961 	struct ethercom *ec = &adapter->osdep.ec;
   1962 	u32		olinfo_status = 0, cmd_type_len;
   1963 	int             i, j, error;
   1964 	int		first;
   1965 	bus_dmamap_t	map;
   1966 	struct ixgbe_tx_buf *txbuf;
   1967 	union ixgbe_adv_tx_desc *txd = NULL;
   1968 
   1969 	/* Basic descriptor defines */
   1970         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
   1971 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
   1972 
   1973 	if ((mtag = VLAN_OUTPUT_TAG(ec, m_head)) != NULL)
   1974         	cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
   1975 
   1976         /*
   1977          * Important to capture the first descriptor
   1978          * used because it will contain the index of
   1979          * the one we tell the hardware to report back
   1980          */
   1981         first = txr->next_avail_desc;
   1982 	txbuf = &txr->tx_buffers[first];
   1983 	map = txbuf->map;
   1984 
   1985 	/*
   1986 	 * Map the packet for DMA.
   1987 	 */
   1988 	error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map,
   1989 	    m_head, BUS_DMA_NOWAIT);
   1990 
   1991 	if (__predict_false(error)) {
   1992 
   1993 		switch (error) {
   1994 		case EAGAIN:
   1995 			adapter->eagain_tx_dma_setup.ev_count++;
   1996 			return EAGAIN;
   1997 		case ENOMEM:
   1998 			adapter->enomem_tx_dma_setup.ev_count++;
   1999 			return EAGAIN;
   2000 		case EFBIG:
   2001 			/*
   2002 			 * XXX Try it again?
   2003 			 * do m_defrag() and retry bus_dmamap_load_mbuf().
   2004 			 */
   2005 			adapter->efbig_tx_dma_setup.ev_count++;
   2006 			return error;
   2007 		case EINVAL:
   2008 			adapter->einval_tx_dma_setup.ev_count++;
   2009 			return error;
   2010 		default:
   2011 			adapter->other_tx_dma_setup.ev_count++;
   2012 			return error;
   2013 		}
   2014 	}
   2015 
   2016 	/* Make certain there are enough descriptors */
   2017 	if (map->dm_nsegs > txr->tx_avail - 2) {
   2018 		txr->no_desc_avail.ev_count++;
   2019 		ixgbe_dmamap_unload(txr->txtag, txbuf->map);
   2020 		return EAGAIN;
   2021 	}
   2022 
   2023 	/*
   2024 	** Set up the appropriate offload context
   2025 	** this will consume the first descriptor
   2026 	*/
   2027 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
   2028 	if (__predict_false(error)) {
   2029 		return (error);
   2030 	}
   2031 
   2032 #ifdef IXGBE_FDIR
   2033 	/* Do the flow director magic */
   2034 	if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
   2035 		++txr->atr_count;
   2036 		if (txr->atr_count >= atr_sample_rate) {
   2037 			ixgbe_atr(txr, m_head);
   2038 			txr->atr_count = 0;
   2039 		}
   2040 	}
   2041 #endif
   2042 
   2043 	i = txr->next_avail_desc;
   2044 	for (j = 0; j < map->dm_nsegs; j++) {
   2045 		bus_size_t seglen;
   2046 		bus_addr_t segaddr;
   2047 
   2048 		txbuf = &txr->tx_buffers[i];
   2049 		txd = &txr->tx_base[i];
   2050 		seglen = map->dm_segs[j].ds_len;
   2051 		segaddr = htole64(map->dm_segs[j].ds_addr);
   2052 
   2053 		txd->read.buffer_addr = segaddr;
   2054 		txd->read.cmd_type_len = htole32(txr->txd_cmd |
   2055 		    cmd_type_len |seglen);
   2056 		txd->read.olinfo_status = htole32(olinfo_status);
   2057 
   2058 		if (++i == txr->num_desc)
   2059 			i = 0;
   2060 	}
   2061 
   2062 	txd->read.cmd_type_len |=
   2063 	    htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
   2064 	txr->tx_avail -= map->dm_nsegs;
   2065 	txr->next_avail_desc = i;
   2066 
   2067 	txbuf->m_head = m_head;
   2068 	/*
   2069 	** Here we swap the map so the last descriptor,
   2070 	** which gets the completion interrupt has the
   2071 	** real map, and the first descriptor gets the
   2072 	** unused map from this descriptor.
   2073 	*/
   2074 	txr->tx_buffers[first].map = txbuf->map;
   2075 	txbuf->map = map;
   2076 	bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
   2077 	    BUS_DMASYNC_PREWRITE);
   2078 
   2079         /* Set the EOP descriptor that will be marked done */
   2080         txbuf = &txr->tx_buffers[first];
   2081 	txbuf->eop = txd;
   2082 
   2083         ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   2084 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   2085 	/*
   2086 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
   2087 	 * hardware that this frame is available to transmit.
   2088 	 */
   2089 	++txr->total_packets.ev_count;
   2090 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), i);
   2091 
   2092 	return 0;
   2093 }
   2094 
   2095 static void
   2096 ixgbe_set_promisc(struct adapter *adapter)
   2097 {
   2098 	struct ether_multi *enm;
   2099 	struct ether_multistep step;
   2100 	u_int32_t       reg_rctl;
   2101 	struct ethercom *ec = &adapter->osdep.ec;
   2102 	struct ifnet   *ifp = adapter->ifp;
   2103 	int		mcnt = 0;
   2104 
   2105 	reg_rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
   2106 	reg_rctl &= (~IXGBE_FCTRL_UPE);
   2107 	if (ifp->if_flags & IFF_ALLMULTI)
   2108 		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
   2109 	else {
   2110 		ETHER_FIRST_MULTI(step, ec, enm);
   2111 		while (enm != NULL) {
   2112 			if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
   2113 				break;
   2114 			mcnt++;
   2115 			ETHER_NEXT_MULTI(step, enm);
   2116 		}
   2117 	}
   2118 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
   2119 		reg_rctl &= (~IXGBE_FCTRL_MPE);
   2120 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
   2121 
   2122 	if (ifp->if_flags & IFF_PROMISC) {
   2123 		reg_rctl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
   2124 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
   2125 	} else if (ifp->if_flags & IFF_ALLMULTI) {
   2126 		reg_rctl |= IXGBE_FCTRL_MPE;
   2127 		reg_rctl &= ~IXGBE_FCTRL_UPE;
   2128 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
   2129 	}
   2130 	return;
   2131 }
   2132 
   2133 
   2134 /*********************************************************************
   2135  *  Multicast Update
   2136  *
   2137  *  This routine is called whenever multicast address list is updated.
   2138  *
   2139  **********************************************************************/
   2140 #define IXGBE_RAR_ENTRIES 16
   2141 
   2142 static void
   2143 ixgbe_set_multi(struct adapter *adapter)
   2144 {
   2145 	struct ether_multi *enm;
   2146 	struct ether_multistep step;
   2147 	u32	fctrl;
   2148 	u8	*mta;
   2149 	u8	*update_ptr;
   2150 	int	mcnt = 0;
   2151 	struct ethercom *ec = &adapter->osdep.ec;
   2152 	struct ifnet   *ifp = adapter->ifp;
   2153 
   2154 	IOCTL_DEBUGOUT("ixgbe_set_multi: begin");
   2155 
   2156 	mta = adapter->mta;
   2157 	bzero(mta, sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
   2158 	    MAX_NUM_MULTICAST_ADDRESSES);
   2159 
   2160 	ifp->if_flags &= ~IFF_ALLMULTI;
   2161 	ETHER_FIRST_MULTI(step, ec, enm);
   2162 	while (enm != NULL) {
   2163 		if ((mcnt == MAX_NUM_MULTICAST_ADDRESSES) ||
   2164 		    (memcmp(enm->enm_addrlo, enm->enm_addrhi,
   2165 			ETHER_ADDR_LEN) != 0)) {
   2166 			ifp->if_flags |= IFF_ALLMULTI;
   2167 			break;
   2168 		}
   2169 		bcopy(enm->enm_addrlo,
   2170 		    &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
   2171 		    IXGBE_ETH_LENGTH_OF_ADDRESS);
   2172 		mcnt++;
   2173 		ETHER_NEXT_MULTI(step, enm);
   2174 	}
   2175 
   2176 	fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
   2177 	fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
   2178 	if (ifp->if_flags & IFF_PROMISC)
   2179 		fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
   2180 	else if (ifp->if_flags & IFF_ALLMULTI) {
   2181 		fctrl |= IXGBE_FCTRL_MPE;
   2182 	}
   2183 
   2184 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl);
   2185 
   2186 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) {
   2187 		update_ptr = mta;
   2188 		ixgbe_update_mc_addr_list(&adapter->hw,
   2189 		    update_ptr, mcnt, ixgbe_mc_array_itr, TRUE);
   2190 	}
   2191 
   2192 	return;
   2193 }
   2194 
   2195 /*
   2196  * This is an iterator function now needed by the multicast
   2197  * shared code. It simply feeds the shared code routine the
   2198  * addresses in the array of ixgbe_set_multi() one by one.
   2199  */
   2200 static u8 *
   2201 ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq)
   2202 {
   2203 	u8 *addr = *update_ptr;
   2204 	u8 *newptr;
   2205 	*vmdq = 0;
   2206 
   2207 	newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
   2208 	*update_ptr = newptr;
   2209 	return addr;
   2210 }
   2211 
   2212 
   2213 /*********************************************************************
   2214  *  Timer routine
   2215  *
   2216  *  This routine checks for link status,updates statistics,
   2217  *  and runs the watchdog check.
   2218  *
   2219  **********************************************************************/
   2220 
   2221 static void
   2222 ixgbe_local_timer1(void *arg)
   2223 {
   2224 	struct adapter	*adapter = arg;
   2225 	device_t	dev = adapter->dev;
   2226 	struct ix_queue *que = adapter->queues;
   2227 	struct tx_ring	*txr = adapter->tx_rings;
   2228 	int		hung = 0, paused = 0;
   2229 
   2230 	KASSERT(mutex_owned(&adapter->core_mtx));
   2231 
   2232 	/* Check for pluggable optics */
   2233 	if (adapter->sfp_probe)
   2234 		if (!ixgbe_sfp_probe(adapter))
   2235 			goto out; /* Nothing to do */
   2236 
   2237 	ixgbe_update_link_status(adapter);
   2238 	ixgbe_update_stats_counters(adapter);
   2239 
   2240 	/*
   2241 	 * If the interface has been paused
   2242 	 * then don't do the watchdog check
   2243 	 */
   2244 	if (IXGBE_READ_REG(&adapter->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF)
   2245 		paused = 1;
   2246 
   2247 	/*
   2248 	** Check the TX queues status
   2249 	**      - watchdog only if all queues show hung
   2250 	*/
   2251 	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
   2252 		if ((txr->queue_status == IXGBE_QUEUE_HUNG) &&
   2253 		    (paused == 0))
   2254 			++hung;
   2255 		else if (txr->queue_status == IXGBE_QUEUE_WORKING)
   2256 #ifndef IXGBE_LEGACY_TX
   2257 			softint_schedule(txr->txq_si);
   2258 #else
   2259 			softint_schedule(que->que_si);
   2260 #endif
   2261 	}
   2262 	/* Only truely watchdog if all queues show hung */
   2263 	if (hung == adapter->num_queues)
   2264 		goto watchdog;
   2265 
   2266 out:
   2267 	callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
   2268 	return;
   2269 
   2270 watchdog:
   2271 	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
   2272 	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
   2273 	    IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(txr->me)),
   2274 	    IXGBE_READ_REG(&adapter->hw, IXGBE_TDT(txr->me)));
   2275 	device_printf(dev,"TX(%d) desc avail = %d,"
   2276 	    "Next TX to Clean = %d\n",
   2277 	    txr->me, txr->tx_avail, txr->next_to_clean);
   2278 	adapter->ifp->if_flags &= ~IFF_RUNNING;
   2279 	adapter->watchdog_events.ev_count++;
   2280 	ixgbe_init_locked(adapter);
   2281 }
   2282 
   2283 static void
   2284 ixgbe_local_timer(void *arg)
   2285 {
   2286 	struct adapter *adapter = arg;
   2287 
   2288 	IXGBE_CORE_LOCK(adapter);
   2289 	ixgbe_local_timer1(adapter);
   2290 	IXGBE_CORE_UNLOCK(adapter);
   2291 }
   2292 
   2293 /*
   2294 ** Note: this routine updates the OS on the link state
   2295 **	the real check of the hardware only happens with
   2296 **	a link interrupt.
   2297 */
   2298 static void
   2299 ixgbe_update_link_status(struct adapter *adapter)
   2300 {
   2301 	struct ifnet	*ifp = adapter->ifp;
   2302 	device_t dev = adapter->dev;
   2303 
   2304 
   2305 	if (adapter->link_up){
   2306 		if (adapter->link_active == FALSE) {
   2307 			if (bootverbose)
   2308 				device_printf(dev,"Link is up %d Gbps %s \n",
   2309 				    ((adapter->link_speed == 128)? 10:1),
   2310 				    "Full Duplex");
   2311 			adapter->link_active = TRUE;
   2312 			/* Update any Flow Control changes */
   2313 			ixgbe_fc_enable(&adapter->hw);
   2314 			if_link_state_change(ifp, LINK_STATE_UP);
   2315 		}
   2316 	} else { /* Link down */
   2317 		if (adapter->link_active == TRUE) {
   2318 			if (bootverbose)
   2319 				device_printf(dev,"Link is Down\n");
   2320 			if_link_state_change(ifp, LINK_STATE_DOWN);
   2321 			adapter->link_active = FALSE;
   2322 		}
   2323 	}
   2324 
   2325 	return;
   2326 }
   2327 
   2328 
   2329 static void
   2330 ixgbe_ifstop(struct ifnet *ifp, int disable)
   2331 {
   2332 	struct adapter *adapter = ifp->if_softc;
   2333 
   2334 	IXGBE_CORE_LOCK(adapter);
   2335 	ixgbe_stop(adapter);
   2336 	IXGBE_CORE_UNLOCK(adapter);
   2337 }
   2338 
   2339 /*********************************************************************
   2340  *
   2341  *  This routine disables all traffic on the adapter by issuing a
   2342  *  global reset on the MAC and deallocates TX/RX buffers.
   2343  *
   2344  **********************************************************************/
   2345 
   2346 static void
   2347 ixgbe_stop(void *arg)
   2348 {
   2349 	struct ifnet   *ifp;
   2350 	struct adapter *adapter = arg;
   2351 	struct ixgbe_hw *hw = &adapter->hw;
   2352 	ifp = adapter->ifp;
   2353 
   2354 	KASSERT(mutex_owned(&adapter->core_mtx));
   2355 
   2356 	INIT_DEBUGOUT("ixgbe_stop: begin\n");
   2357 	ixgbe_disable_intr(adapter);
   2358 	callout_stop(&adapter->timer);
   2359 
   2360 	/* Let the stack know...*/
   2361 	ifp->if_flags &= ~IFF_RUNNING;
   2362 
   2363 	ixgbe_reset_hw(hw);
   2364 	hw->adapter_stopped = FALSE;
   2365 	ixgbe_stop_adapter(hw);
   2366 	if (hw->mac.type == ixgbe_mac_82599EB)
   2367 		ixgbe_stop_mac_link_on_d3_82599(hw);
   2368 	/* Turn off the laser - noop with no optics */
   2369 	ixgbe_disable_tx_laser(hw);
   2370 
   2371 	/* Update the stack */
   2372 	adapter->link_up = FALSE;
   2373 	ixgbe_update_link_status(adapter);
   2374 
   2375 	/* reprogram the RAR[0] in case user changed it. */
   2376 	ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
   2377 
   2378 	return;
   2379 }
   2380 
   2381 
   2382 /*********************************************************************
   2383  *
   2384  *  Determine hardware revision.
   2385  *
   2386  **********************************************************************/
   2387 static void
   2388 ixgbe_identify_hardware(struct adapter *adapter)
   2389 {
   2390 	pcitag_t tag;
   2391 	pci_chipset_tag_t pc;
   2392 	pcireg_t subid, id;
   2393 	struct ixgbe_hw *hw = &adapter->hw;
   2394 
   2395 	pc = adapter->osdep.pc;
   2396 	tag = adapter->osdep.tag;
   2397 
   2398 	id = pci_conf_read(pc, tag, PCI_ID_REG);
   2399 	subid = pci_conf_read(pc, tag, PCI_SUBSYS_ID_REG);
   2400 
   2401 	/* Save off the information about this board */
   2402 	hw->vendor_id = PCI_VENDOR(id);
   2403 	hw->device_id = PCI_PRODUCT(id);
   2404 	hw->revision_id =
   2405 	    PCI_REVISION(pci_conf_read(pc, tag, PCI_CLASS_REG));
   2406 	hw->subsystem_vendor_id = PCI_SUBSYS_VENDOR(subid);
   2407 	hw->subsystem_device_id = PCI_SUBSYS_ID(subid);
   2408 
   2409 	/* We need this here to set the num_segs below */
   2410 	ixgbe_set_mac_type(hw);
   2411 
   2412 	/* Pick up the 82599 and VF settings */
   2413 	if (hw->mac.type != ixgbe_mac_82598EB) {
   2414 		hw->phy.smart_speed = ixgbe_smart_speed;
   2415 		adapter->num_segs = IXGBE_82599_SCATTER;
   2416 	} else
   2417 		adapter->num_segs = IXGBE_82598_SCATTER;
   2418 
   2419 	return;
   2420 }
   2421 
   2422 /*********************************************************************
   2423  *
   2424  *  Determine optic type
   2425  *
   2426  **********************************************************************/
   2427 static void
   2428 ixgbe_setup_optics(struct adapter *adapter)
   2429 {
   2430 	struct ixgbe_hw *hw = &adapter->hw;
   2431 	int		layer;
   2432 
   2433 	layer = ixgbe_get_supported_physical_layer(hw);
   2434 
   2435 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
   2436 		adapter->optics = IFM_10G_T;
   2437 		return;
   2438 	}
   2439 
   2440 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
   2441 		adapter->optics = IFM_1000_T;
   2442 		return;
   2443 	}
   2444 
   2445 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) {
   2446 		adapter->optics = IFM_1000_SX;
   2447 		return;
   2448 	}
   2449 
   2450 	if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR |
   2451 	    IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) {
   2452 		adapter->optics = IFM_10G_LR;
   2453 		return;
   2454 	}
   2455 
   2456 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
   2457 		adapter->optics = IFM_10G_SR;
   2458 		return;
   2459 	}
   2460 
   2461 	if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) {
   2462 		adapter->optics = IFM_10G_TWINAX;
   2463 		return;
   2464 	}
   2465 
   2466 	if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 |
   2467 	    IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) {
   2468 		adapter->optics = IFM_10G_CX4;
   2469 		return;
   2470 	}
   2471 
   2472 	/* If we get here just set the default */
   2473 	adapter->optics = IFM_ETHER | IFM_AUTO;
   2474 	return;
   2475 }
   2476 
   2477 /*********************************************************************
   2478  *
   2479  *  Setup the Legacy or MSI Interrupt handler
   2480  *
   2481  **********************************************************************/
   2482 static int
   2483 ixgbe_allocate_legacy(struct adapter *adapter,
   2484     const struct pci_attach_args *pa)
   2485 {
   2486 	device_t	dev = adapter->dev;
   2487 	struct		ix_queue *que = adapter->queues;
   2488 #ifndef IXGBE_LEGACY_TX
   2489 	struct tx_ring		*txr = adapter->tx_rings;
   2490 #endif
   2491 #ifndef NETBSD_MSI_OR_MSIX
   2492 	pci_intr_handle_t	ih;
   2493 #else
   2494 	int		counts[PCI_INTR_TYPE_SIZE];
   2495 	pci_intr_type_t intr_type, max_type;
   2496 #endif
   2497 	char intrbuf[PCI_INTRSTR_LEN];
   2498 	const char	*intrstr = NULL;
   2499 
   2500 #ifndef NETBSD_MSI_OR_MSIX
   2501 	/* We allocate a single interrupt resource */
   2502  	if (pci_intr_map(pa, &ih) != 0) {
   2503 		aprint_error_dev(dev, "unable to map interrupt\n");
   2504 		return ENXIO;
   2505 	} else {
   2506 		intrstr = pci_intr_string(adapter->osdep.pc, ih, intrbuf,
   2507 		    sizeof(intrbuf));
   2508 	}
   2509 	adapter->osdep.ihs[0] = pci_intr_establish(adapter->osdep.pc, ih,
   2510 	    IPL_NET, ixgbe_legacy_irq, que);
   2511 #else
   2512 	/* Allocation settings */
   2513 	max_type = PCI_INTR_TYPE_MSI;
   2514 	counts[PCI_INTR_TYPE_MSIX] = 0;
   2515 	counts[PCI_INTR_TYPE_MSI] = 1;
   2516 	counts[PCI_INTR_TYPE_INTX] = 1;
   2517 
   2518 alloc_retry:
   2519 	if (pci_intr_alloc(pa, &adapter->osdep.intrs, counts, max_type) != 0) {
   2520 		aprint_error_dev(dev, "couldn't alloc interrupt\n");
   2521 		return ENXIO;
   2522 	}
   2523 	adapter->osdep.nintrs = 1;
   2524 	intrstr = pci_intr_string(adapter->osdep.pc, adapter->osdep.intrs[0],
   2525 	    intrbuf, sizeof(intrbuf));
   2526 	adapter->osdep.ihs[0] = pci_intr_establish(adapter->osdep.pc,
   2527 	    adapter->osdep.intrs[0], IPL_NET, ixgbe_legacy_irq, que);
   2528 	if (adapter->osdep.ihs[0] == NULL) {
   2529 		intr_type = pci_intr_type(adapter->osdep.intrs[0]);
   2530 		aprint_error_dev(dev,"unable to establish %s\n",
   2531 		    (intr_type == PCI_INTR_TYPE_MSI) ? "MSI" : "INTx");
   2532 		pci_intr_release(adapter->osdep.pc, adapter->osdep.intrs, 1);
   2533 		switch (intr_type) {
   2534 		case PCI_INTR_TYPE_MSI:
   2535 			/* The next try is for INTx: Disable MSI */
   2536 			max_type = PCI_INTR_TYPE_INTX;
   2537 			counts[PCI_INTR_TYPE_INTX] = 1;
   2538 			goto alloc_retry;
   2539 		case PCI_INTR_TYPE_INTX:
   2540 		default:
   2541 			/* See below */
   2542 			break;
   2543 		}
   2544 	}
   2545 #endif
   2546 	if (adapter->osdep.ihs[0] == NULL) {
   2547 		aprint_error_dev(dev,
   2548 		    "couldn't establish interrupt%s%s\n",
   2549 		    intrstr ? " at " : "", intrstr ? intrstr : "");
   2550 #ifdef NETBSD_MSI_OR_MSIX
   2551 		pci_intr_release(adapter->osdep.pc, adapter->osdep.intrs, 1);
   2552 #endif
   2553 		return ENXIO;
   2554 	}
   2555 	aprint_normal_dev(dev, "interrupting at %s\n", intrstr);
   2556 	/*
   2557 	 * Try allocating a fast interrupt and the associated deferred
   2558 	 * processing contexts.
   2559 	 */
   2560 #ifndef IXGBE_LEGACY_TX
   2561 	txr->txq_si = softint_establish(SOFTINT_NET, ixgbe_deferred_mq_start,
   2562 	    txr);
   2563 #endif
   2564 	que->que_si = softint_establish(SOFTINT_NET, ixgbe_handle_que, que);
   2565 
   2566 	/* Tasklets for Link, SFP and Multispeed Fiber */
   2567 	adapter->link_si =
   2568 	    softint_establish(SOFTINT_NET, ixgbe_handle_link, adapter);
   2569 	adapter->mod_si =
   2570 	    softint_establish(SOFTINT_NET, ixgbe_handle_mod, adapter);
   2571 	adapter->msf_si =
   2572 	    softint_establish(SOFTINT_NET, ixgbe_handle_msf, adapter);
   2573 
   2574 #ifdef IXGBE_FDIR
   2575 	adapter->fdir_si =
   2576 	    softint_establish(SOFTINT_NET, ixgbe_reinit_fdir, adapter);
   2577 #endif
   2578 	if (que->que_si == NULL ||
   2579 	    adapter->link_si == NULL ||
   2580 	    adapter->mod_si == NULL ||
   2581 #ifdef IXGBE_FDIR
   2582 	    adapter->fdir_si == NULL ||
   2583 #endif
   2584 	    adapter->msf_si == NULL) {
   2585 		aprint_error_dev(dev,
   2586 		    "could not establish software interrupts\n");
   2587 		return ENXIO;
   2588 	}
   2589 
   2590 	/* For simplicity in the handlers */
   2591 	adapter->que_mask = IXGBE_EIMS_ENABLE_MASK;
   2592 
   2593 	return (0);
   2594 }
   2595 
   2596 
   2597 /*********************************************************************
   2598  *
   2599  *  Setup MSIX Interrupt resources and handlers
   2600  *
   2601  **********************************************************************/
   2602 static int
   2603 ixgbe_allocate_msix(struct adapter *adapter, const struct pci_attach_args *pa)
   2604 {
   2605 #if !defined(NETBSD_MSI_OR_MSIX)
   2606 	return 0;
   2607 #else
   2608 	device_t        dev = adapter->dev;
   2609 	struct 		ix_queue *que = adapter->queues;
   2610 	struct  	tx_ring *txr = adapter->tx_rings;
   2611 	pci_chipset_tag_t pc;
   2612 	char		intrbuf[PCI_INTRSTR_LEN];
   2613 	const char	*intrstr = NULL;
   2614 	int 		error, vector = 0;
   2615 	int		cpu_id = 0;
   2616 	kcpuset_t	*affinity;
   2617 
   2618 	pc = adapter->osdep.pc;
   2619 #ifdef	RSS
   2620 	cpuset_t cpu_mask;
   2621 	/*
   2622 	 * If we're doing RSS, the number of queues needs to
   2623 	 * match the number of RSS buckets that are configured.
   2624 	 *
   2625 	 * + If there's more queues than RSS buckets, we'll end
   2626 	 *   up with queues that get no traffic.
   2627 	 *
   2628 	 * + If there's more RSS buckets than queues, we'll end
   2629 	 *   up having multiple RSS buckets map to the same queue,
   2630 	 *   so there'll be some contention.
   2631 	 */
   2632 	if (adapter->num_queues != rss_getnumbuckets()) {
   2633 		device_printf(dev,
   2634 		    "%s: number of queues (%d) != number of RSS buckets (%d)"
   2635 		    "; performance will be impacted.\n",
   2636 		    __func__,
   2637 		    adapter->num_queues,
   2638 		    rss_getnumbuckets());
   2639 	}
   2640 #endif
   2641 
   2642 	adapter->osdep.nintrs = adapter->num_queues + 1;
   2643 	if (pci_msix_alloc_exact(pa, &adapter->osdep.intrs,
   2644 	    adapter->osdep.nintrs) != 0) {
   2645 		aprint_error_dev(dev,
   2646 		    "failed to allocate MSI-X interrupt\n");
   2647 		return (ENXIO);
   2648 	}
   2649 
   2650 	kcpuset_create(&affinity, false);
   2651 	for (int i = 0; i < adapter->num_queues; i++, vector++, que++, txr++) {
   2652 		intrstr = pci_intr_string(pc, adapter->osdep.intrs[i], intrbuf,
   2653 		    sizeof(intrbuf));
   2654 #ifdef IXG_MPSAFE
   2655 		pci_intr_setattr(pc, adapter->osdep.intrs[i], PCI_INTR_MPSAFE,
   2656 		    true);
   2657 #endif
   2658 		/* Set the handler function */
   2659 		que->res = adapter->osdep.ihs[i] = pci_intr_establish(pc,
   2660 		    adapter->osdep.intrs[i], IPL_NET, ixgbe_msix_que, que);
   2661 		if (que->res == NULL) {
   2662 			pci_intr_release(pc, adapter->osdep.intrs,
   2663 			    adapter->osdep.nintrs);
   2664 			aprint_error_dev(dev,
   2665 			    "Failed to register QUE handler\n");
   2666 			kcpuset_destroy(affinity);
   2667 			return ENXIO;
   2668 		}
   2669 		que->msix = vector;
   2670         	adapter->que_mask |= (u64)(1 << que->msix);
   2671 #ifdef	RSS
   2672 		/*
   2673 		 * The queue ID is used as the RSS layer bucket ID.
   2674 		 * We look up the queue ID -> RSS CPU ID and select
   2675 		 * that.
   2676 		 */
   2677 		cpu_id = rss_getcpu(i % rss_getnumbuckets());
   2678 #else
   2679 		/*
   2680 		 * Bind the msix vector, and thus the
   2681 		 * rings to the corresponding cpu.
   2682 		 *
   2683 		 * This just happens to match the default RSS round-robin
   2684 		 * bucket -> queue -> CPU allocation.
   2685 		 */
   2686 		if (adapter->num_queues > 1)
   2687 			cpu_id = i;
   2688 #endif
   2689 		/* Round-robin affinity */
   2690 		kcpuset_zero(affinity);
   2691 		kcpuset_set(affinity, cpu_id % ncpu);
   2692 		error = interrupt_distribute(adapter->osdep.ihs[i], affinity,
   2693 		    NULL);
   2694 		aprint_normal_dev(dev, "for TX/RX, interrupting at %s",
   2695 		    intrstr);
   2696 		if (error == 0) {
   2697 #ifdef	RSS
   2698 			aprintf_normal(", bound RSS bucket %d to CPU %d\n",
   2699 			    i, cpu_id);
   2700 #else
   2701 			aprint_normal(", bound queue %d to cpu %d\n",
   2702 			    i, cpu_id);
   2703 #endif
   2704 		} else
   2705 			aprint_normal("\n");
   2706 
   2707 #ifndef IXGBE_LEGACY_TX
   2708 		txr->txq_si = softint_establish(SOFTINT_NET,
   2709 		    ixgbe_deferred_mq_start, txr);
   2710 #endif
   2711 		que->que_si = softint_establish(SOFTINT_NET, ixgbe_handle_que,
   2712 		    que);
   2713 		if (que->que_si == NULL) {
   2714 			aprint_error_dev(dev,
   2715 			    "could not establish software interrupt\n");
   2716 		}
   2717 	}
   2718 
   2719 	/* and Link */
   2720 	cpu_id++;
   2721 	intrstr = pci_intr_string(pc, adapter->osdep.intrs[vector], intrbuf,
   2722 	    sizeof(intrbuf));
   2723 #ifdef IXG_MPSAFE
   2724 	pci_intr_setattr(pc, &adapter->osdep.intrs[vector], PCI_INTR_MPSAFE,
   2725 	    true);
   2726 #endif
   2727 	/* Set the link handler function */
   2728 	adapter->osdep.ihs[vector] = pci_intr_establish(pc,
   2729 	    adapter->osdep.intrs[vector], IPL_NET, ixgbe_msix_link, adapter);
   2730 	if (adapter->osdep.ihs[vector] == NULL) {
   2731 		adapter->res = NULL;
   2732 		aprint_error_dev(dev, "Failed to register LINK handler\n");
   2733 		kcpuset_destroy(affinity);
   2734 		return (ENXIO);
   2735 	}
   2736 	/* Round-robin affinity */
   2737 	kcpuset_zero(affinity);
   2738 	kcpuset_set(affinity, cpu_id % ncpu);
   2739 	error = interrupt_distribute(adapter->osdep.ihs[vector], affinity,NULL);
   2740 
   2741 	aprint_normal_dev(dev,
   2742 	    "for link, interrupting at %s", intrstr);
   2743 	if (error == 0)
   2744 		aprint_normal(", affinity to cpu %d\n", cpu_id);
   2745 	else
   2746 		aprint_normal("\n");
   2747 
   2748 	adapter->linkvec = vector;
   2749 	/* Tasklets for Link, SFP and Multispeed Fiber */
   2750 	adapter->link_si =
   2751 	    softint_establish(SOFTINT_NET, ixgbe_handle_link, adapter);
   2752 	adapter->mod_si =
   2753 	    softint_establish(SOFTINT_NET, ixgbe_handle_mod, adapter);
   2754 	adapter->msf_si =
   2755 	    softint_establish(SOFTINT_NET, ixgbe_handle_msf, adapter);
   2756 #ifdef IXGBE_FDIR
   2757 	adapter->fdir_si =
   2758 	    softint_establish(SOFTINT_NET, ixgbe_reinit_fdir, adapter);
   2759 #endif
   2760 
   2761 	kcpuset_destroy(affinity);
   2762 	return (0);
   2763 #endif
   2764 }
   2765 
   2766 /*
   2767  * Setup Either MSI/X or MSI
   2768  */
   2769 static int
   2770 ixgbe_setup_msix(struct adapter *adapter)
   2771 {
   2772 #if !defined(NETBSD_MSI_OR_MSIX)
   2773 	return 0;
   2774 #else
   2775 	device_t dev = adapter->dev;
   2776 	int want, queues, msgs;
   2777 
   2778 	/* Override by tuneable */
   2779 	if (ixgbe_enable_msix == 0)
   2780 		goto msi;
   2781 
   2782 	/* First try MSI/X */
   2783 	msgs = pci_msix_count(adapter->osdep.pc, adapter->osdep.tag);
   2784 	if (msgs < IXG_MSIX_NINTR)
   2785 		goto msi;
   2786 
   2787 	adapter->msix_mem = (void *)1; /* XXX */
   2788 
   2789 	/* Figure out a reasonable auto config value */
   2790 	queues = (ncpu > (msgs-1)) ? (msgs-1) : ncpu;
   2791 
   2792 	/* Override based on tuneable */
   2793 	if (ixgbe_num_queues != 0)
   2794 		queues = ixgbe_num_queues;
   2795 
   2796 #ifdef	RSS
   2797 	/* If we're doing RSS, clamp at the number of RSS buckets */
   2798 	if (queues > rss_getnumbuckets())
   2799 		queues = rss_getnumbuckets();
   2800 #endif
   2801 
   2802 	/* reflect correct sysctl value */
   2803 	ixgbe_num_queues = queues;
   2804 
   2805 	/*
   2806 	** Want one vector (RX/TX pair) per queue
   2807 	** plus an additional for Link.
   2808 	*/
   2809 	want = queues + 1;
   2810 	if (msgs >= want)
   2811 		msgs = want;
   2812 	else {
   2813                	aprint_error_dev(dev,
   2814 		    "MSIX Configuration Problem, "
   2815 		    "%d vectors but %d queues wanted!\n",
   2816 		    msgs, want);
   2817 		goto msi;
   2818 	}
   2819 	device_printf(dev,
   2820 	    "Using MSIX interrupts with %d vectors\n", msgs);
   2821 	adapter->num_queues = queues;
   2822 	return (msgs);
   2823 
   2824 	/*
   2825 	** If MSIX alloc failed or provided us with
   2826 	** less than needed, free and fall through to MSI
   2827 	*/
   2828 msi:
   2829        	msgs = pci_msi_count(adapter->osdep.pc, adapter->osdep.tag);
   2830 	adapter->msix_mem = NULL; /* XXX */
   2831        	msgs = 1;
   2832 	aprint_normal_dev(dev,"Using an MSI interrupt\n");
   2833 	return (msgs);
   2834 #endif
   2835 }
   2836 
   2837 
   2838 static int
   2839 ixgbe_allocate_pci_resources(struct adapter *adapter,
   2840     const struct pci_attach_args *pa)
   2841 {
   2842 	pcireg_t	memtype;
   2843 	device_t        dev = adapter->dev;
   2844 	bus_addr_t addr;
   2845 	int flags;
   2846 
   2847 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, PCI_BAR(0));
   2848 	switch (memtype) {
   2849 	case PCI_MAPREG_TYPE_MEM | PCI_MAPREG_MEM_TYPE_32BIT:
   2850 	case PCI_MAPREG_TYPE_MEM | PCI_MAPREG_MEM_TYPE_64BIT:
   2851 		adapter->osdep.mem_bus_space_tag = pa->pa_memt;
   2852 		if (pci_mapreg_info(pa->pa_pc, pa->pa_tag, PCI_BAR(0),
   2853 	              memtype, &addr, &adapter->osdep.mem_size, &flags) != 0)
   2854 			goto map_err;
   2855 		if ((flags & BUS_SPACE_MAP_PREFETCHABLE) != 0) {
   2856 			aprint_normal_dev(dev, "clearing prefetchable bit\n");
   2857 			flags &= ~BUS_SPACE_MAP_PREFETCHABLE;
   2858 		}
   2859 		if (bus_space_map(adapter->osdep.mem_bus_space_tag, addr,
   2860 		     adapter->osdep.mem_size, flags,
   2861 		     &adapter->osdep.mem_bus_space_handle) != 0) {
   2862 map_err:
   2863 			adapter->osdep.mem_size = 0;
   2864 			aprint_error_dev(dev, "unable to map BAR0\n");
   2865 			return ENXIO;
   2866 		}
   2867 		break;
   2868 	default:
   2869 		aprint_error_dev(dev, "unexpected type on BAR0\n");
   2870 		return ENXIO;
   2871 	}
   2872 
   2873 	/* Legacy defaults */
   2874 	adapter->num_queues = 1;
   2875 	adapter->hw.back = &adapter->osdep;
   2876 
   2877 	/*
   2878 	** Now setup MSI or MSI/X, should
   2879 	** return us the number of supported
   2880 	** vectors. (Will be 1 for MSI)
   2881 	*/
   2882 	adapter->msix = ixgbe_setup_msix(adapter);
   2883 	return (0);
   2884 }
   2885 
   2886 static void
   2887 ixgbe_free_pci_resources(struct adapter * adapter)
   2888 {
   2889 #if defined(NETBSD_MSI_OR_MSIX)
   2890 	struct 		ix_queue *que = adapter->queues;
   2891 #endif
   2892 	int		rid;
   2893 
   2894 #if defined(NETBSD_MSI_OR_MSIX)
   2895 	/*
   2896 	**  Release all msix queue resources:
   2897 	*/
   2898 	for (int i = 0; i < adapter->num_queues; i++, que++) {
   2899 		if (que->res != NULL)
   2900 			pci_intr_disestablish(adapter->osdep.pc,
   2901 			    adapter->osdep.ihs[i]);
   2902 	}
   2903 #endif
   2904 
   2905 	/* Clean the Legacy or Link interrupt last */
   2906 	if (adapter->linkvec) /* we are doing MSIX */
   2907 		rid = adapter->linkvec;
   2908 	else
   2909 		rid = 0;
   2910 
   2911 	if (adapter->osdep.ihs[rid] != NULL) {
   2912 		pci_intr_disestablish(adapter->osdep.pc,
   2913 		    adapter->osdep.ihs[rid]);
   2914 		adapter->osdep.ihs[rid] = NULL;
   2915 	}
   2916 
   2917 #if defined(NETBSD_MSI_OR_MSIX)
   2918 	pci_intr_release(adapter->osdep.pc, adapter->osdep.intrs,
   2919 	    adapter->osdep.nintrs);
   2920 #endif
   2921 
   2922 	if (adapter->osdep.mem_size != 0) {
   2923 		bus_space_unmap(adapter->osdep.mem_bus_space_tag,
   2924 		    adapter->osdep.mem_bus_space_handle,
   2925 		    adapter->osdep.mem_size);
   2926 	}
   2927 
   2928 	return;
   2929 }
   2930 
   2931 /*********************************************************************
   2932  *
   2933  *  Setup networking device structure and register an interface.
   2934  *
   2935  **********************************************************************/
   2936 static int
   2937 ixgbe_setup_interface(device_t dev, struct adapter *adapter)
   2938 {
   2939 	struct ethercom *ec = &adapter->osdep.ec;
   2940 	struct ixgbe_hw *hw = &adapter->hw;
   2941 	struct ifnet   *ifp;
   2942 
   2943 	INIT_DEBUGOUT("ixgbe_setup_interface: begin");
   2944 
   2945 	ifp = adapter->ifp = &ec->ec_if;
   2946 	strlcpy(ifp->if_xname, device_xname(dev), IFNAMSIZ);
   2947 	ifp->if_baudrate = IF_Gbps(10);
   2948 	ifp->if_init = ixgbe_init;
   2949 	ifp->if_stop = ixgbe_ifstop;
   2950 	ifp->if_softc = adapter;
   2951 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
   2952 	ifp->if_ioctl = ixgbe_ioctl;
   2953 #ifndef IXGBE_LEGACY_TX
   2954 	ifp->if_transmit = ixgbe_mq_start;
   2955 	ifp->if_qflush = ixgbe_qflush;
   2956 #else
   2957 	ifp->if_start = ixgbe_start;
   2958 	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 2);
   2959 #if 0
   2960 	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 2;
   2961 #endif
   2962 	IFQ_SET_READY(&ifp->if_snd);
   2963 #endif
   2964 
   2965 	if_attach(ifp);
   2966 	ether_ifattach(ifp, adapter->hw.mac.addr);
   2967 	ether_set_ifflags_cb(ec, ixgbe_ifflags_cb);
   2968 
   2969 	adapter->max_frame_size =
   2970 	    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
   2971 
   2972 	/*
   2973 	 * Tell the upper layer(s) we support long frames.
   2974 	 */
   2975 	ifp->if_hdrlen = sizeof(struct ether_vlan_header);
   2976 
   2977 	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSOv4 | IFCAP_TSOv6;
   2978 	ifp->if_capenable = 0;
   2979 
   2980 	ec->ec_capabilities |= ETHERCAP_VLAN_HWCSUM;
   2981 	ec->ec_capabilities |= ETHERCAP_JUMBO_MTU;
   2982 	ifp->if_capabilities |= IFCAP_LRO;
   2983 	ec->ec_capabilities |= ETHERCAP_VLAN_HWTAGGING
   2984 	    		    | ETHERCAP_VLAN_MTU;
   2985 	ec->ec_capenable = ec->ec_capabilities;
   2986 
   2987 	/*
   2988 	** Don't turn this on by default, if vlans are
   2989 	** created on another pseudo device (eg. lagg)
   2990 	** then vlan events are not passed thru, breaking
   2991 	** operation, but with HW FILTER off it works. If
   2992 	** using vlans directly on the ixgbe driver you can
   2993 	** enable this and get full hardware tag filtering.
   2994 	*/
   2995 	ec->ec_capabilities |= ETHERCAP_VLAN_HWFILTER;
   2996 
   2997 	/*
   2998 	 * Specify the media types supported by this adapter and register
   2999 	 * callbacks to update media and link information
   3000 	 */
   3001 	ifmedia_init(&adapter->media, IFM_IMASK, ixgbe_media_change,
   3002 		     ixgbe_media_status);
   3003 	ifmedia_add(&adapter->media, IFM_ETHER | adapter->optics, 0, NULL);
   3004 	ifmedia_set(&adapter->media, IFM_ETHER | adapter->optics);
   3005 	if (hw->device_id == IXGBE_DEV_ID_82598AT) {
   3006 		ifmedia_add(&adapter->media,
   3007 		    IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
   3008 		ifmedia_add(&adapter->media,
   3009 		    IFM_ETHER | IFM_1000_T, 0, NULL);
   3010 	}
   3011 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
   3012 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
   3013 
   3014 	return (0);
   3015 }
   3016 
   3017 static void
   3018 ixgbe_config_link(struct adapter *adapter)
   3019 {
   3020 	struct ixgbe_hw *hw = &adapter->hw;
   3021 	u32	autoneg, err = 0;
   3022 	bool	sfp, negotiate;
   3023 
   3024 	sfp = ixgbe_is_sfp(hw);
   3025 
   3026 	if (sfp) {
   3027 		void *ip;
   3028 
   3029 		if (hw->phy.multispeed_fiber) {
   3030 			hw->mac.ops.setup_sfp(hw);
   3031 			ixgbe_enable_tx_laser(hw);
   3032 			ip = adapter->msf_si;
   3033 		} else {
   3034 			ip = adapter->mod_si;
   3035 		}
   3036 
   3037 		kpreempt_disable();
   3038 		softint_schedule(ip);
   3039 		kpreempt_enable();
   3040 	} else {
   3041 		if (hw->mac.ops.check_link)
   3042 			err = ixgbe_check_link(hw, &adapter->link_speed,
   3043 			    &adapter->link_up, FALSE);
   3044 		if (err)
   3045 			goto out;
   3046 		autoneg = hw->phy.autoneg_advertised;
   3047 		if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
   3048                 	err  = hw->mac.ops.get_link_capabilities(hw,
   3049 			    &autoneg, &negotiate);
   3050 		else
   3051 			negotiate = 0;
   3052 		if (err)
   3053 			goto out;
   3054 		if (hw->mac.ops.setup_link)
   3055                 	err = hw->mac.ops.setup_link(hw,
   3056 			    autoneg, adapter->link_up);
   3057 	}
   3058 out:
   3059 	return;
   3060 }
   3061 
   3062 /********************************************************************
   3063  * Manage DMA'able memory.
   3064  *******************************************************************/
   3065 
   3066 static int
   3067 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
   3068 		struct ixgbe_dma_alloc *dma, const int mapflags)
   3069 {
   3070 	device_t dev = adapter->dev;
   3071 	int             r, rsegs;
   3072 
   3073 	r = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
   3074 			       DBA_ALIGN, 0,	/* alignment, bounds */
   3075 			       size,	/* maxsize */
   3076 			       1,	/* nsegments */
   3077 			       size,	/* maxsegsize */
   3078 			       BUS_DMA_ALLOCNOW,	/* flags */
   3079 			       &dma->dma_tag);
   3080 	if (r != 0) {
   3081 		aprint_error_dev(dev,
   3082 		    "%s: ixgbe_dma_tag_create failed; error %d\n", __func__, r);
   3083 		goto fail_0;
   3084 	}
   3085 
   3086 	r = bus_dmamem_alloc(dma->dma_tag->dt_dmat,
   3087 		size,
   3088 		dma->dma_tag->dt_alignment,
   3089 		dma->dma_tag->dt_boundary,
   3090 		&dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
   3091 	if (r != 0) {
   3092 		aprint_error_dev(dev,
   3093 		    "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
   3094 		goto fail_1;
   3095 	}
   3096 
   3097 	r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
   3098 	    size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
   3099 	if (r != 0) {
   3100 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   3101 		    __func__, r);
   3102 		goto fail_2;
   3103 	}
   3104 
   3105 	r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
   3106 	if (r != 0) {
   3107 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   3108 		    __func__, r);
   3109 		goto fail_3;
   3110 	}
   3111 
   3112 	r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map, dma->dma_vaddr,
   3113 			    size,
   3114 			    NULL,
   3115 			    mapflags | BUS_DMA_NOWAIT);
   3116 	if (r != 0) {
   3117 		aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
   3118 		    __func__, r);
   3119 		goto fail_4;
   3120 	}
   3121 	dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
   3122 	dma->dma_size = size;
   3123 	return 0;
   3124 fail_4:
   3125 	ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
   3126 fail_3:
   3127 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
   3128 fail_2:
   3129 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
   3130 fail_1:
   3131 	ixgbe_dma_tag_destroy(dma->dma_tag);
   3132 fail_0:
   3133 	return r;
   3134 }
   3135 
   3136 static void
   3137 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
   3138 {
   3139 	bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
   3140 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   3141 	ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
   3142 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
   3143 	ixgbe_dma_tag_destroy(dma->dma_tag);
   3144 }
   3145 
   3146 
   3147 /*********************************************************************
   3148  *
   3149  *  Allocate memory for the transmit and receive rings, and then
   3150  *  the descriptors associated with each, called only once at attach.
   3151  *
   3152  **********************************************************************/
   3153 static int
   3154 ixgbe_allocate_queues(struct adapter *adapter)
   3155 {
   3156 	device_t	dev = adapter->dev;
   3157 	struct ix_queue	*que;
   3158 	struct tx_ring	*txr;
   3159 	struct rx_ring	*rxr;
   3160 	int rsize, tsize, error = IXGBE_SUCCESS;
   3161 	int txconf = 0, rxconf = 0;
   3162 
   3163         /* First allocate the top level queue structs */
   3164         if (!(adapter->queues =
   3165             (struct ix_queue *) malloc(sizeof(struct ix_queue) *
   3166             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   3167                 aprint_error_dev(dev, "Unable to allocate queue memory\n");
   3168                 error = ENOMEM;
   3169                 goto fail;
   3170         }
   3171 
   3172 	/* First allocate the TX ring struct memory */
   3173 	if (!(adapter->tx_rings =
   3174 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
   3175 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   3176 		aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
   3177 		error = ENOMEM;
   3178 		goto tx_fail;
   3179 	}
   3180 
   3181 	/* Next allocate the RX */
   3182 	if (!(adapter->rx_rings =
   3183 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
   3184 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   3185 		aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
   3186 		error = ENOMEM;
   3187 		goto rx_fail;
   3188 	}
   3189 
   3190 	/* For the ring itself */
   3191 	tsize = roundup2(adapter->num_tx_desc *
   3192 	    sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
   3193 
   3194 	/*
   3195 	 * Now set up the TX queues, txconf is needed to handle the
   3196 	 * possibility that things fail midcourse and we need to
   3197 	 * undo memory gracefully
   3198 	 */
   3199 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
   3200 		/* Set up some basics */
   3201 		txr = &adapter->tx_rings[i];
   3202 		txr->adapter = adapter;
   3203 		txr->me = i;
   3204 		txr->num_desc = adapter->num_tx_desc;
   3205 
   3206 		/* Initialize the TX side lock */
   3207 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
   3208 		    device_xname(dev), txr->me);
   3209 		mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
   3210 
   3211 		if (ixgbe_dma_malloc(adapter, tsize,
   3212 			&txr->txdma, BUS_DMA_NOWAIT)) {
   3213 			aprint_error_dev(dev,
   3214 			    "Unable to allocate TX Descriptor memory\n");
   3215 			error = ENOMEM;
   3216 			goto err_tx_desc;
   3217 		}
   3218 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
   3219 		bzero((void *)txr->tx_base, tsize);
   3220 
   3221         	/* Now allocate transmit buffers for the ring */
   3222         	if (ixgbe_allocate_transmit_buffers(txr)) {
   3223 			aprint_error_dev(dev,
   3224 			    "Critical Failure setting up transmit buffers\n");
   3225 			error = ENOMEM;
   3226 			goto err_tx_desc;
   3227         	}
   3228 #ifndef IXGBE_LEGACY_TX
   3229 		/* Allocate a buf ring */
   3230 		txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
   3231 		    M_WAITOK, &txr->tx_mtx);
   3232 		if (txr->br == NULL) {
   3233 			aprint_error_dev(dev,
   3234 			    "Critical Failure setting up buf ring\n");
   3235 			error = ENOMEM;
   3236 			goto err_tx_desc;
   3237         	}
   3238 #endif
   3239 	}
   3240 
   3241 	/*
   3242 	 * Next the RX queues...
   3243 	 */
   3244 	rsize = roundup2(adapter->num_rx_desc *
   3245 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   3246 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
   3247 		rxr = &adapter->rx_rings[i];
   3248 		/* Set up some basics */
   3249 		rxr->adapter = adapter;
   3250 		rxr->me = i;
   3251 		rxr->num_desc = adapter->num_rx_desc;
   3252 
   3253 		/* Initialize the RX side lock */
   3254 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
   3255 		    device_xname(dev), rxr->me);
   3256 		mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
   3257 
   3258 		if (ixgbe_dma_malloc(adapter, rsize,
   3259 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
   3260 			aprint_error_dev(dev,
   3261 			    "Unable to allocate RxDescriptor memory\n");
   3262 			error = ENOMEM;
   3263 			goto err_rx_desc;
   3264 		}
   3265 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
   3266 		bzero((void *)rxr->rx_base, rsize);
   3267 
   3268         	/* Allocate receive buffers for the ring*/
   3269 		if (ixgbe_allocate_receive_buffers(rxr)) {
   3270 			aprint_error_dev(dev,
   3271 			    "Critical Failure setting up receive buffers\n");
   3272 			error = ENOMEM;
   3273 			goto err_rx_desc;
   3274 		}
   3275 	}
   3276 
   3277 	/*
   3278 	** Finally set up the queue holding structs
   3279 	*/
   3280 	for (int i = 0; i < adapter->num_queues; i++) {
   3281 		que = &adapter->queues[i];
   3282 		que->adapter = adapter;
   3283 		que->txr = &adapter->tx_rings[i];
   3284 		que->rxr = &adapter->rx_rings[i];
   3285 	}
   3286 
   3287 	return (0);
   3288 
   3289 err_rx_desc:
   3290 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
   3291 		ixgbe_dma_free(adapter, &rxr->rxdma);
   3292 err_tx_desc:
   3293 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
   3294 		ixgbe_dma_free(adapter, &txr->txdma);
   3295 	free(adapter->rx_rings, M_DEVBUF);
   3296 rx_fail:
   3297 	free(adapter->tx_rings, M_DEVBUF);
   3298 tx_fail:
   3299 	free(adapter->queues, M_DEVBUF);
   3300 fail:
   3301 	return (error);
   3302 }
   3303 
   3304 /*********************************************************************
   3305  *
   3306  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
   3307  *  the information needed to transmit a packet on the wire. This is
   3308  *  called only once at attach, setup is done every reset.
   3309  *
   3310  **********************************************************************/
   3311 static int
   3312 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
   3313 {
   3314 	struct adapter *adapter = txr->adapter;
   3315 	device_t dev = adapter->dev;
   3316 	struct ixgbe_tx_buf *txbuf;
   3317 	int error, i;
   3318 
   3319 	/*
   3320 	 * Setup DMA descriptor areas.
   3321 	 */
   3322 	if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
   3323 			       1, 0,		/* alignment, bounds */
   3324 			       IXGBE_TSO_SIZE,		/* maxsize */
   3325 			       adapter->num_segs,	/* nsegments */
   3326 			       PAGE_SIZE,		/* maxsegsize */
   3327 			       0,			/* flags */
   3328 			       &txr->txtag))) {
   3329 		aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
   3330 		goto fail;
   3331 	}
   3332 
   3333 	if (!(txr->tx_buffers =
   3334 	    (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
   3335 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   3336 		aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
   3337 		error = ENOMEM;
   3338 		goto fail;
   3339 	}
   3340 
   3341         /* Create the descriptor buffer dma maps */
   3342 	txbuf = txr->tx_buffers;
   3343 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
   3344 		error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
   3345 		if (error != 0) {
   3346 			aprint_error_dev(dev,
   3347 			    "Unable to create TX DMA map (%d)\n", error);
   3348 			goto fail;
   3349 		}
   3350 	}
   3351 
   3352 	return 0;
   3353 fail:
   3354 	/* We free all, it handles case where we are in the middle */
   3355 	ixgbe_free_transmit_structures(adapter);
   3356 	return (error);
   3357 }
   3358 
   3359 /*********************************************************************
   3360  *
   3361  *  Initialize a transmit ring.
   3362  *
   3363  **********************************************************************/
   3364 static void
   3365 ixgbe_setup_transmit_ring(struct tx_ring *txr)
   3366 {
   3367 	struct adapter *adapter = txr->adapter;
   3368 	struct ixgbe_tx_buf *txbuf;
   3369 	int i;
   3370 #ifdef DEV_NETMAP
   3371 	struct netmap_adapter *na = NA(adapter->ifp);
   3372 	struct netmap_slot *slot;
   3373 #endif /* DEV_NETMAP */
   3374 
   3375 	/* Clear the old ring contents */
   3376 	IXGBE_TX_LOCK(txr);
   3377 #ifdef DEV_NETMAP
   3378 	/*
   3379 	 * (under lock): if in netmap mode, do some consistency
   3380 	 * checks and set slot to entry 0 of the netmap ring.
   3381 	 */
   3382 	slot = netmap_reset(na, NR_TX, txr->me, 0);
   3383 #endif /* DEV_NETMAP */
   3384 	bzero((void *)txr->tx_base,
   3385 	      (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
   3386 	/* Reset indices */
   3387 	txr->next_avail_desc = 0;
   3388 	txr->next_to_clean = 0;
   3389 
   3390 	/* Free any existing tx buffers. */
   3391         txbuf = txr->tx_buffers;
   3392 	for (i = 0; i < txr->num_desc; i++, txbuf++) {
   3393 		if (txbuf->m_head != NULL) {
   3394 			bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
   3395 			    0, txbuf->m_head->m_pkthdr.len,
   3396 			    BUS_DMASYNC_POSTWRITE);
   3397 			ixgbe_dmamap_unload(txr->txtag, txbuf->map);
   3398 			m_freem(txbuf->m_head);
   3399 			txbuf->m_head = NULL;
   3400 		}
   3401 #ifdef DEV_NETMAP
   3402 		/*
   3403 		 * In netmap mode, set the map for the packet buffer.
   3404 		 * NOTE: Some drivers (not this one) also need to set
   3405 		 * the physical buffer address in the NIC ring.
   3406 		 * Slots in the netmap ring (indexed by "si") are
   3407 		 * kring->nkr_hwofs positions "ahead" wrt the
   3408 		 * corresponding slot in the NIC ring. In some drivers
   3409 		 * (not here) nkr_hwofs can be negative. Function
   3410 		 * netmap_idx_n2k() handles wraparounds properly.
   3411 		 */
   3412 		if (slot) {
   3413 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
   3414 			netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
   3415 		}
   3416 #endif /* DEV_NETMAP */
   3417 		/* Clear the EOP descriptor pointer */
   3418 		txbuf->eop = NULL;
   3419         }
   3420 
   3421 #ifdef IXGBE_FDIR
   3422 	/* Set the rate at which we sample packets */
   3423 	if (adapter->hw.mac.type != ixgbe_mac_82598EB)
   3424 		txr->atr_sample = atr_sample_rate;
   3425 #endif
   3426 
   3427 	/* Set number of descriptors available */
   3428 	txr->tx_avail = adapter->num_tx_desc;
   3429 
   3430 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   3431 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   3432 	IXGBE_TX_UNLOCK(txr);
   3433 }
   3434 
   3435 /*********************************************************************
   3436  *
   3437  *  Initialize all transmit rings.
   3438  *
   3439  **********************************************************************/
   3440 static int
   3441 ixgbe_setup_transmit_structures(struct adapter *adapter)
   3442 {
   3443 	struct tx_ring *txr = adapter->tx_rings;
   3444 
   3445 	for (int i = 0; i < adapter->num_queues; i++, txr++)
   3446 		ixgbe_setup_transmit_ring(txr);
   3447 
   3448 	return (0);
   3449 }
   3450 
   3451 /*********************************************************************
   3452  *
   3453  *  Enable transmit unit.
   3454  *
   3455  **********************************************************************/
   3456 static void
   3457 ixgbe_initialize_transmit_units(struct adapter *adapter)
   3458 {
   3459 	struct tx_ring	*txr = adapter->tx_rings;
   3460 	struct ixgbe_hw	*hw = &adapter->hw;
   3461 
   3462 	/* Setup the Base and Length of the Tx Descriptor Ring */
   3463 
   3464 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
   3465 		u64	tdba = txr->txdma.dma_paddr;
   3466 		u32	txctrl;
   3467 
   3468 		IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i),
   3469 		       (tdba & 0x00000000ffffffffULL));
   3470 		IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (tdba >> 32));
   3471 		IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
   3472 		    adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc));
   3473 
   3474 		/* Setup the HW Tx Head and Tail descriptor pointers */
   3475 		IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
   3476 		IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
   3477 
   3478 		/* Setup Transmit Descriptor Cmd Settings */
   3479 		txr->txd_cmd = IXGBE_TXD_CMD_IFCS;
   3480 		txr->queue_status = IXGBE_QUEUE_IDLE;
   3481 
   3482 		/* Set the processing limit */
   3483 		txr->process_limit = ixgbe_tx_process_limit;
   3484 
   3485 		/* Disable Head Writeback */
   3486 		switch (hw->mac.type) {
   3487 		case ixgbe_mac_82598EB:
   3488 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
   3489 			break;
   3490 		case ixgbe_mac_82599EB:
   3491 		case ixgbe_mac_X540:
   3492 		default:
   3493 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
   3494 			break;
   3495                 }
   3496 		txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
   3497 		switch (hw->mac.type) {
   3498 		case ixgbe_mac_82598EB:
   3499 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
   3500 			break;
   3501 		case ixgbe_mac_82599EB:
   3502 		case ixgbe_mac_X540:
   3503 		default:
   3504 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
   3505 			break;
   3506 		}
   3507 
   3508 	}
   3509 
   3510 	if (hw->mac.type != ixgbe_mac_82598EB) {
   3511 		u32 dmatxctl, rttdcs;
   3512 		dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
   3513 		dmatxctl |= IXGBE_DMATXCTL_TE;
   3514 		IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
   3515 		/* Disable arbiter to set MTQC */
   3516 		rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
   3517 		rttdcs |= IXGBE_RTTDCS_ARBDIS;
   3518 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
   3519 		IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
   3520 		rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
   3521 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
   3522 	}
   3523 
   3524 	return;
   3525 }
   3526 
   3527 /*********************************************************************
   3528  *
   3529  *  Free all transmit rings.
   3530  *
   3531  **********************************************************************/
   3532 static void
   3533 ixgbe_free_transmit_structures(struct adapter *adapter)
   3534 {
   3535 	struct tx_ring *txr = adapter->tx_rings;
   3536 
   3537 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
   3538 		ixgbe_free_transmit_buffers(txr);
   3539 		ixgbe_dma_free(adapter, &txr->txdma);
   3540 		IXGBE_TX_LOCK_DESTROY(txr);
   3541 	}
   3542 	free(adapter->tx_rings, M_DEVBUF);
   3543 }
   3544 
   3545 /*********************************************************************
   3546  *
   3547  *  Free transmit ring related data structures.
   3548  *
   3549  **********************************************************************/
   3550 static void
   3551 ixgbe_free_transmit_buffers(struct tx_ring *txr)
   3552 {
   3553 	struct adapter *adapter = txr->adapter;
   3554 	struct ixgbe_tx_buf *tx_buffer;
   3555 	int             i;
   3556 
   3557 	INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
   3558 
   3559 	if (txr->tx_buffers == NULL)
   3560 		return;
   3561 
   3562 	tx_buffer = txr->tx_buffers;
   3563 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
   3564 		if (tx_buffer->m_head != NULL) {
   3565 			bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
   3566 			    0, tx_buffer->m_head->m_pkthdr.len,
   3567 			    BUS_DMASYNC_POSTWRITE);
   3568 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
   3569 			m_freem(tx_buffer->m_head);
   3570 			tx_buffer->m_head = NULL;
   3571 			if (tx_buffer->map != NULL) {
   3572 				ixgbe_dmamap_destroy(txr->txtag,
   3573 				    tx_buffer->map);
   3574 				tx_buffer->map = NULL;
   3575 			}
   3576 		} else if (tx_buffer->map != NULL) {
   3577 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
   3578 			ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
   3579 			tx_buffer->map = NULL;
   3580 		}
   3581 	}
   3582 #ifndef IXGBE_LEGACY_TX
   3583 	if (txr->br != NULL)
   3584 		buf_ring_free(txr->br, M_DEVBUF);
   3585 #endif
   3586 	if (txr->tx_buffers != NULL) {
   3587 		free(txr->tx_buffers, M_DEVBUF);
   3588 		txr->tx_buffers = NULL;
   3589 	}
   3590 	if (txr->txtag != NULL) {
   3591 		ixgbe_dma_tag_destroy(txr->txtag);
   3592 		txr->txtag = NULL;
   3593 	}
   3594 	return;
   3595 }
   3596 
   3597 /*********************************************************************
   3598  *
   3599  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
   3600  *
   3601  **********************************************************************/
   3602 
   3603 static int
   3604 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
   3605     u32 *cmd_type_len, u32 *olinfo_status)
   3606 {
   3607 	struct m_tag *mtag;
   3608 	struct adapter *adapter = txr->adapter;
   3609 	struct ethercom *ec = &adapter->osdep.ec;
   3610 	struct ixgbe_adv_tx_context_desc *TXD;
   3611 	struct ether_vlan_header *eh;
   3612 	struct ip ip;
   3613 	struct ip6_hdr ip6;
   3614 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
   3615 	int	ehdrlen, ip_hlen = 0;
   3616 	u16	etype;
   3617 	u8	ipproto __diagused = 0;
   3618 	int	offload = TRUE;
   3619 	int	ctxd = txr->next_avail_desc;
   3620 	u16	vtag = 0;
   3621 
   3622 	/* First check if TSO is to be used */
   3623 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4|M_CSUM_TSOv6))
   3624 		return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
   3625 
   3626 	if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
   3627 		offload = FALSE;
   3628 
   3629 	/* Indicate the whole packet as payload when not doing TSO */
   3630        	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
   3631 
   3632 	/* Now ready a context descriptor */
   3633 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
   3634 
   3635 	/*
   3636 	** In advanced descriptors the vlan tag must
   3637 	** be placed into the context descriptor. Hence
   3638 	** we need to make one even if not doing offloads.
   3639 	*/
   3640 	if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
   3641 		vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
   3642 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
   3643 	} else if (offload == FALSE) /* ... no offload to do */
   3644 		return 0;
   3645 
   3646 	/*
   3647 	 * Determine where frame payload starts.
   3648 	 * Jump over vlan headers if already present,
   3649 	 * helpful for QinQ too.
   3650 	 */
   3651 	KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
   3652 	eh = mtod(mp, struct ether_vlan_header *);
   3653 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   3654 		KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
   3655 		etype = ntohs(eh->evl_proto);
   3656 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   3657 	} else {
   3658 		etype = ntohs(eh->evl_encap_proto);
   3659 		ehdrlen = ETHER_HDR_LEN;
   3660 	}
   3661 
   3662 	/* Set the ether header length */
   3663 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
   3664 
   3665 	switch (etype) {
   3666 	case ETHERTYPE_IP:
   3667 		m_copydata(mp, ehdrlen, sizeof(ip), &ip);
   3668 		ip_hlen = ip.ip_hl << 2;
   3669 		ipproto = ip.ip_p;
   3670 #if 0
   3671 		ip.ip_sum = 0;
   3672 		m_copyback(mp, ehdrlen, sizeof(ip), &ip);
   3673 #else
   3674 		KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
   3675 		    ip.ip_sum == 0);
   3676 #endif
   3677 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
   3678 		break;
   3679 	case ETHERTYPE_IPV6:
   3680 		m_copydata(mp, ehdrlen, sizeof(ip6), &ip6);
   3681 		ip_hlen = sizeof(ip6);
   3682 		/* XXX-BZ this will go badly in case of ext hdrs. */
   3683 		ipproto = ip6.ip6_nxt;
   3684 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
   3685 		break;
   3686 	default:
   3687 		break;
   3688 	}
   3689 
   3690 	if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
   3691 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
   3692 
   3693 	vlan_macip_lens |= ip_hlen;
   3694 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
   3695 
   3696 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_TCPv6)) {
   3697 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
   3698 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   3699 		KASSERT(ipproto == IPPROTO_TCP);
   3700 	} else if (mp->m_pkthdr.csum_flags & (M_CSUM_UDPv4|M_CSUM_UDPv6)) {
   3701 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
   3702 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   3703 		KASSERT(ipproto == IPPROTO_UDP);
   3704 	}
   3705 
   3706 	/* Now copy bits into descriptor */
   3707 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
   3708 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
   3709 	TXD->seqnum_seed = htole32(0);
   3710 	TXD->mss_l4len_idx = htole32(0);
   3711 
   3712 	/* We've consumed the first desc, adjust counters */
   3713 	if (++ctxd == txr->num_desc)
   3714 		ctxd = 0;
   3715 	txr->next_avail_desc = ctxd;
   3716 	--txr->tx_avail;
   3717 
   3718         return 0;
   3719 }
   3720 
   3721 /**********************************************************************
   3722  *
   3723  *  Setup work for hardware segmentation offload (TSO) on
   3724  *  adapters using advanced tx descriptors
   3725  *
   3726  **********************************************************************/
   3727 static int
   3728 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
   3729     u32 *cmd_type_len, u32 *olinfo_status)
   3730 {
   3731 	struct m_tag *mtag;
   3732 	struct adapter *adapter = txr->adapter;
   3733 	struct ethercom *ec = &adapter->osdep.ec;
   3734 	struct ixgbe_adv_tx_context_desc *TXD;
   3735 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
   3736 	u32 mss_l4len_idx = 0, paylen;
   3737 	u16 vtag = 0, eh_type;
   3738 	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
   3739 	struct ether_vlan_header *eh;
   3740 #ifdef INET6
   3741 	struct ip6_hdr *ip6;
   3742 #endif
   3743 #ifdef INET
   3744 	struct ip *ip;
   3745 #endif
   3746 	struct tcphdr *th;
   3747 
   3748 
   3749 	/*
   3750 	 * Determine where frame payload starts.
   3751 	 * Jump over vlan headers if already present
   3752 	 */
   3753 	eh = mtod(mp, struct ether_vlan_header *);
   3754 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   3755 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   3756 		eh_type = eh->evl_proto;
   3757 	} else {
   3758 		ehdrlen = ETHER_HDR_LEN;
   3759 		eh_type = eh->evl_encap_proto;
   3760 	}
   3761 
   3762 	switch (ntohs(eh_type)) {
   3763 #ifdef INET6
   3764 	case ETHERTYPE_IPV6:
   3765 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   3766 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
   3767 		if (ip6->ip6_nxt != IPPROTO_TCP)
   3768 			return (ENXIO);
   3769 		ip_hlen = sizeof(struct ip6_hdr);
   3770 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   3771 		th = (struct tcphdr *)((char *)ip6 + ip_hlen);
   3772 		th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
   3773 		    &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
   3774 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
   3775 		break;
   3776 #endif
   3777 #ifdef INET
   3778 	case ETHERTYPE_IP:
   3779 		ip = (struct ip *)(mp->m_data + ehdrlen);
   3780 		if (ip->ip_p != IPPROTO_TCP)
   3781 			return (ENXIO);
   3782 		ip->ip_sum = 0;
   3783 		ip_hlen = ip->ip_hl << 2;
   3784 		th = (struct tcphdr *)((char *)ip + ip_hlen);
   3785 		th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
   3786 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
   3787 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
   3788 		/* Tell transmit desc to also do IPv4 checksum. */
   3789 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
   3790 		break;
   3791 #endif
   3792 	default:
   3793 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
   3794 		    __func__, ntohs(eh_type));
   3795 		break;
   3796 	}
   3797 
   3798 	ctxd = txr->next_avail_desc;
   3799 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
   3800 
   3801 	tcp_hlen = th->th_off << 2;
   3802 
   3803 	/* This is used in the transmit desc in encap */
   3804 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
   3805 
   3806 	/* VLAN MACLEN IPLEN */
   3807 	if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
   3808 		vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
   3809                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
   3810 	}
   3811 
   3812 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
   3813 	vlan_macip_lens |= ip_hlen;
   3814 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
   3815 
   3816 	/* ADV DTYPE TUCMD */
   3817 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
   3818 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
   3819 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
   3820 
   3821 	/* MSS L4LEN IDX */
   3822 	mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
   3823 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
   3824 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
   3825 
   3826 	TXD->seqnum_seed = htole32(0);
   3827 
   3828 	if (++ctxd == txr->num_desc)
   3829 		ctxd = 0;
   3830 
   3831 	txr->tx_avail--;
   3832 	txr->next_avail_desc = ctxd;
   3833 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
   3834 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   3835 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
   3836 	++txr->tso_tx.ev_count;
   3837 	return (0);
   3838 }
   3839 
   3840 #ifdef IXGBE_FDIR
   3841 /*
   3842 ** This routine parses packet headers so that Flow
   3843 ** Director can make a hashed filter table entry
   3844 ** allowing traffic flows to be identified and kept
   3845 ** on the same cpu.  This would be a performance
   3846 ** hit, but we only do it at IXGBE_FDIR_RATE of
   3847 ** packets.
   3848 */
   3849 static void
   3850 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
   3851 {
   3852 	struct adapter			*adapter = txr->adapter;
   3853 	struct ix_queue			*que;
   3854 	struct ip			*ip;
   3855 	struct tcphdr			*th;
   3856 	struct udphdr			*uh;
   3857 	struct ether_vlan_header	*eh;
   3858 	union ixgbe_atr_hash_dword	input = {.dword = 0};
   3859 	union ixgbe_atr_hash_dword	common = {.dword = 0};
   3860 	int  				ehdrlen, ip_hlen;
   3861 	u16				etype;
   3862 
   3863 	eh = mtod(mp, struct ether_vlan_header *);
   3864 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   3865 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   3866 		etype = eh->evl_proto;
   3867 	} else {
   3868 		ehdrlen = ETHER_HDR_LEN;
   3869 		etype = eh->evl_encap_proto;
   3870 	}
   3871 
   3872 	/* Only handling IPv4 */
   3873 	if (etype != htons(ETHERTYPE_IP))
   3874 		return;
   3875 
   3876 	ip = (struct ip *)(mp->m_data + ehdrlen);
   3877 	ip_hlen = ip->ip_hl << 2;
   3878 
   3879 	/* check if we're UDP or TCP */
   3880 	switch (ip->ip_p) {
   3881 	case IPPROTO_TCP:
   3882 		th = (struct tcphdr *)((char *)ip + ip_hlen);
   3883 		/* src and dst are inverted */
   3884 		common.port.dst ^= th->th_sport;
   3885 		common.port.src ^= th->th_dport;
   3886 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
   3887 		break;
   3888 	case IPPROTO_UDP:
   3889 		uh = (struct udphdr *)((char *)ip + ip_hlen);
   3890 		/* src and dst are inverted */
   3891 		common.port.dst ^= uh->uh_sport;
   3892 		common.port.src ^= uh->uh_dport;
   3893 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
   3894 		break;
   3895 	default:
   3896 		return;
   3897 	}
   3898 
   3899 	input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
   3900 	if (mp->m_pkthdr.ether_vtag)
   3901 		common.flex_bytes ^= htons(ETHERTYPE_VLAN);
   3902 	else
   3903 		common.flex_bytes ^= etype;
   3904 	common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
   3905 
   3906 	que = &adapter->queues[txr->me];
   3907 	/*
   3908 	** This assumes the Rx queue and Tx
   3909 	** queue are bound to the same CPU
   3910 	*/
   3911 	ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
   3912 	    input, common, que->msix);
   3913 }
   3914 #endif /* IXGBE_FDIR */
   3915 
   3916 /**********************************************************************
   3917  *
   3918  *  Examine each tx_buffer in the used queue. If the hardware is done
   3919  *  processing the packet then free associated resources. The
   3920  *  tx_buffer is put back on the free queue.
   3921  *
   3922  **********************************************************************/
   3923 static void
   3924 ixgbe_txeof(struct tx_ring *txr)
   3925 {
   3926 	struct adapter		*adapter = txr->adapter;
   3927 	struct ifnet		*ifp = adapter->ifp;
   3928 	u32			work, processed = 0;
   3929 	u16			limit = txr->process_limit;
   3930 	struct ixgbe_tx_buf	*buf;
   3931 	union ixgbe_adv_tx_desc *txd;
   3932 	struct timeval now, elapsed;
   3933 
   3934 	KASSERT(mutex_owned(&txr->tx_mtx));
   3935 
   3936 #ifdef DEV_NETMAP
   3937 	if (ifp->if_capenable & IFCAP_NETMAP) {
   3938 		struct netmap_adapter *na = NA(ifp);
   3939 		struct netmap_kring *kring = &na->tx_rings[txr->me];
   3940 		txd = txr->tx_base;
   3941 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   3942 		    BUS_DMASYNC_POSTREAD);
   3943 		/*
   3944 		 * In netmap mode, all the work is done in the context
   3945 		 * of the client thread. Interrupt handlers only wake up
   3946 		 * clients, which may be sleeping on individual rings
   3947 		 * or on a global resource for all rings.
   3948 		 * To implement tx interrupt mitigation, we wake up the client
   3949 		 * thread roughly every half ring, even if the NIC interrupts
   3950 		 * more frequently. This is implemented as follows:
   3951 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
   3952 		 *   the slot that should wake up the thread (nkr_num_slots
   3953 		 *   means the user thread should not be woken up);
   3954 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
   3955 		 *   or the slot has the DD bit set.
   3956 		 */
   3957 		if (!netmap_mitigate ||
   3958 		    (kring->nr_kflags < kring->nkr_num_slots &&
   3959 		    txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
   3960 			netmap_tx_irq(ifp, txr->me);
   3961 		}
   3962 		return;
   3963 	}
   3964 #endif /* DEV_NETMAP */
   3965 
   3966 	if (txr->tx_avail == txr->num_desc) {
   3967 		txr->queue_status = IXGBE_QUEUE_IDLE;
   3968 		return;
   3969 	}
   3970 
   3971 	/* Get work starting point */
   3972 	work = txr->next_to_clean;
   3973 	buf = &txr->tx_buffers[work];
   3974 	txd = &txr->tx_base[work];
   3975 	work -= txr->num_desc; /* The distance to ring end */
   3976         ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   3977 	    BUS_DMASYNC_POSTREAD);
   3978 	do {
   3979 		union ixgbe_adv_tx_desc *eop= buf->eop;
   3980 		if (eop == NULL) /* No work */
   3981 			break;
   3982 
   3983 		if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
   3984 			break;	/* I/O not complete */
   3985 
   3986 		if (buf->m_head) {
   3987 			txr->bytes +=
   3988 			    buf->m_head->m_pkthdr.len;
   3989 			bus_dmamap_sync(txr->txtag->dt_dmat,
   3990 			    buf->map,
   3991 			    0, buf->m_head->m_pkthdr.len,
   3992 			    BUS_DMASYNC_POSTWRITE);
   3993 			ixgbe_dmamap_unload(txr->txtag,
   3994 			    buf->map);
   3995 			m_freem(buf->m_head);
   3996 			buf->m_head = NULL;
   3997 			/*
   3998 			 * NetBSD: Don't override buf->map with NULL here.
   3999 			 * It'll panic when a ring runs one lap around.
   4000 			 */
   4001 		}
   4002 		buf->eop = NULL;
   4003 		++txr->tx_avail;
   4004 
   4005 		/* We clean the range if multi segment */
   4006 		while (txd != eop) {
   4007 			++txd;
   4008 			++buf;
   4009 			++work;
   4010 			/* wrap the ring? */
   4011 			if (__predict_false(!work)) {
   4012 				work -= txr->num_desc;
   4013 				buf = txr->tx_buffers;
   4014 				txd = txr->tx_base;
   4015 			}
   4016 			if (buf->m_head) {
   4017 				txr->bytes +=
   4018 				    buf->m_head->m_pkthdr.len;
   4019 				bus_dmamap_sync(txr->txtag->dt_dmat,
   4020 				    buf->map,
   4021 				    0, buf->m_head->m_pkthdr.len,
   4022 				    BUS_DMASYNC_POSTWRITE);
   4023 				ixgbe_dmamap_unload(txr->txtag,
   4024 				    buf->map);
   4025 				m_freem(buf->m_head);
   4026 				buf->m_head = NULL;
   4027 				/*
   4028 				 * NetBSD: Don't override buf->map with NULL
   4029 				 * here. It'll panic when a ring runs one lap
   4030 				 * around.
   4031 				 */
   4032 			}
   4033 			++txr->tx_avail;
   4034 			buf->eop = NULL;
   4035 
   4036 		}
   4037 		++txr->packets;
   4038 		++processed;
   4039 		++ifp->if_opackets;
   4040 		getmicrotime(&txr->watchdog_time);
   4041 
   4042 		/* Try the next packet */
   4043 		++txd;
   4044 		++buf;
   4045 		++work;
   4046 		/* reset with a wrap */
   4047 		if (__predict_false(!work)) {
   4048 			work -= txr->num_desc;
   4049 			buf = txr->tx_buffers;
   4050 			txd = txr->tx_base;
   4051 		}
   4052 		prefetch(txd);
   4053 	} while (__predict_true(--limit));
   4054 
   4055 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   4056 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   4057 
   4058 	work += txr->num_desc;
   4059 	txr->next_to_clean = work;
   4060 
   4061 	/*
   4062 	** Watchdog calculation, we know there's
   4063 	** work outstanding or the first return
   4064 	** would have been taken, so none processed
   4065 	** for too long indicates a hang.
   4066 	*/
   4067 	getmicrotime(&now);
   4068 	timersub(&now, &txr->watchdog_time, &elapsed);
   4069 	if (!processed && tvtohz(&elapsed) > IXGBE_WATCHDOG)
   4070 		txr->queue_status = IXGBE_QUEUE_HUNG;
   4071 
   4072 	if (txr->tx_avail == txr->num_desc)
   4073 		txr->queue_status = IXGBE_QUEUE_IDLE;
   4074 
   4075 	return;
   4076 }
   4077 
   4078 /*********************************************************************
   4079  *
   4080  *  Refresh mbuf buffers for RX descriptor rings
   4081  *   - now keeps its own state so discards due to resource
   4082  *     exhaustion are unnecessary, if an mbuf cannot be obtained
   4083  *     it just returns, keeping its placeholder, thus it can simply
   4084  *     be recalled to try again.
   4085  *
   4086  **********************************************************************/
   4087 static void
   4088 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
   4089 {
   4090 	struct adapter		*adapter = rxr->adapter;
   4091 	struct ixgbe_rx_buf	*rxbuf;
   4092 	struct mbuf		*mp;
   4093 	int			i, j, error;
   4094 	bool			refreshed = false;
   4095 
   4096 	i = j = rxr->next_to_refresh;
   4097 	/* Control the loop with one beyond */
   4098 	if (++j == rxr->num_desc)
   4099 		j = 0;
   4100 
   4101 	while (j != limit) {
   4102 		rxbuf = &rxr->rx_buffers[i];
   4103 		if (rxbuf->buf == NULL) {
   4104 			mp = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
   4105 			    MT_DATA, M_PKTHDR, rxr->mbuf_sz);
   4106 			if (mp == NULL) {
   4107 				rxr->no_jmbuf.ev_count++;
   4108 				goto update;
   4109 			}
   4110 			if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
   4111 				m_adj(mp, ETHER_ALIGN);
   4112 		} else
   4113 			mp = rxbuf->buf;
   4114 
   4115 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   4116 
   4117 		/* If we're dealing with an mbuf that was copied rather
   4118 		 * than replaced, there's no need to go through busdma.
   4119 		 */
   4120 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
   4121 			/* Get the memory mapping */
   4122 			error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   4123 			    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   4124 			if (error != 0) {
   4125 				printf("Refresh mbufs: payload dmamap load"
   4126 				    " failure - %d\n", error);
   4127 				m_free(mp);
   4128 				rxbuf->buf = NULL;
   4129 				goto update;
   4130 			}
   4131 			rxbuf->buf = mp;
   4132 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   4133 			    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
   4134 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
   4135 			    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   4136 		} else {
   4137 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
   4138 			rxbuf->flags &= ~IXGBE_RX_COPY;
   4139 		}
   4140 
   4141 		refreshed = true;
   4142 		/* Next is precalculated */
   4143 		i = j;
   4144 		rxr->next_to_refresh = i;
   4145 		if (++j == rxr->num_desc)
   4146 			j = 0;
   4147 	}
   4148 update:
   4149 	if (refreshed) /* Update hardware tail index */
   4150 		IXGBE_WRITE_REG(&adapter->hw,
   4151 		    IXGBE_RDT(rxr->me), rxr->next_to_refresh);
   4152 	return;
   4153 }
   4154 
   4155 /*********************************************************************
   4156  *
   4157  *  Allocate memory for rx_buffer structures. Since we use one
   4158  *  rx_buffer per received packet, the maximum number of rx_buffer's
   4159  *  that we'll need is equal to the number of receive descriptors
   4160  *  that we've allocated.
   4161  *
   4162  **********************************************************************/
   4163 static int
   4164 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
   4165 {
   4166 	struct	adapter 	*adapter = rxr->adapter;
   4167 	device_t 		dev = adapter->dev;
   4168 	struct ixgbe_rx_buf 	*rxbuf;
   4169 	int             	i, bsize, error;
   4170 
   4171 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
   4172 	if (!(rxr->rx_buffers =
   4173 	    (struct ixgbe_rx_buf *) malloc(bsize,
   4174 	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
   4175 		aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
   4176 		error = ENOMEM;
   4177 		goto fail;
   4178 	}
   4179 
   4180 	if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
   4181 				   1, 0,	/* alignment, bounds */
   4182 				   MJUM16BYTES,		/* maxsize */
   4183 				   1,			/* nsegments */
   4184 				   MJUM16BYTES,		/* maxsegsize */
   4185 				   0,			/* flags */
   4186 				   &rxr->ptag))) {
   4187 		aprint_error_dev(dev, "Unable to create RX DMA tag\n");
   4188 		goto fail;
   4189 	}
   4190 
   4191 	for (i = 0; i < rxr->num_desc; i++, rxbuf++) {
   4192 		rxbuf = &rxr->rx_buffers[i];
   4193 		error = ixgbe_dmamap_create(rxr->ptag,
   4194 		    BUS_DMA_NOWAIT, &rxbuf->pmap);
   4195 		if (error) {
   4196 			aprint_error_dev(dev, "Unable to create RX dma map\n");
   4197 			goto fail;
   4198 		}
   4199 	}
   4200 
   4201 	return (0);
   4202 
   4203 fail:
   4204 	/* Frees all, but can handle partial completion */
   4205 	ixgbe_free_receive_structures(adapter);
   4206 	return (error);
   4207 }
   4208 
   4209 /*
   4210 ** Used to detect a descriptor that has
   4211 ** been merged by Hardware RSC.
   4212 */
   4213 static inline u32
   4214 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
   4215 {
   4216 	return (le32toh(rx->wb.lower.lo_dword.data) &
   4217 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
   4218 }
   4219 
   4220 /*********************************************************************
   4221  *
   4222  *  Initialize Hardware RSC (LRO) feature on 82599
   4223  *  for an RX ring, this is toggled by the LRO capability
   4224  *  even though it is transparent to the stack.
   4225  *
   4226  *  NOTE: since this HW feature only works with IPV4 and
   4227  *        our testing has shown soft LRO to be as effective
   4228  *        I have decided to disable this by default.
   4229  *
   4230  **********************************************************************/
   4231 static void
   4232 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
   4233 {
   4234 	struct	adapter 	*adapter = rxr->adapter;
   4235 	struct	ixgbe_hw	*hw = &adapter->hw;
   4236 	u32			rscctrl, rdrxctl;
   4237 
   4238 	/* If turning LRO/RSC off we need to disable it */
   4239 	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
   4240 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   4241 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
   4242 		return;
   4243 	}
   4244 
   4245 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
   4246 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
   4247 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
   4248 	if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
   4249 #endif /* DEV_NETMAP */
   4250 	rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
   4251 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
   4252 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
   4253 
   4254 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   4255 	rscctrl |= IXGBE_RSCCTL_RSCEN;
   4256 	/*
   4257 	** Limit the total number of descriptors that
   4258 	** can be combined, so it does not exceed 64K
   4259 	*/
   4260 	if (rxr->mbuf_sz == MCLBYTES)
   4261 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
   4262 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
   4263 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
   4264 	else if (rxr->mbuf_sz == MJUM9BYTES)
   4265 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
   4266 	else  /* Using 16K cluster */
   4267 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
   4268 
   4269 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
   4270 
   4271 	/* Enable TCP header recognition */
   4272 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
   4273 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
   4274 	    IXGBE_PSRTYPE_TCPHDR));
   4275 
   4276 	/* Disable RSC for ACK packets */
   4277 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
   4278 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
   4279 
   4280 	rxr->hw_rsc = TRUE;
   4281 }
   4282 
   4283 
   4284 static void
   4285 ixgbe_free_receive_ring(struct rx_ring *rxr)
   4286 {
   4287 	struct ixgbe_rx_buf       *rxbuf;
   4288 	int i;
   4289 
   4290 	for (i = 0; i < rxr->num_desc; i++) {
   4291 		rxbuf = &rxr->rx_buffers[i];
   4292 		if (rxbuf->buf != NULL) {
   4293 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   4294 			    0, rxbuf->buf->m_pkthdr.len,
   4295 			    BUS_DMASYNC_POSTREAD);
   4296 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   4297 			rxbuf->buf->m_flags |= M_PKTHDR;
   4298 			m_freem(rxbuf->buf);
   4299 			rxbuf->buf = NULL;
   4300 			rxbuf->flags = 0;
   4301 		}
   4302 	}
   4303 }
   4304 
   4305 
   4306 /*********************************************************************
   4307  *
   4308  *  Initialize a receive ring and its buffers.
   4309  *
   4310  **********************************************************************/
   4311 static int
   4312 ixgbe_setup_receive_ring(struct rx_ring *rxr)
   4313 {
   4314 	struct	adapter 	*adapter;
   4315 	struct ixgbe_rx_buf	*rxbuf;
   4316 #ifdef LRO
   4317 	struct ifnet		*ifp;
   4318 	struct lro_ctrl		*lro = &rxr->lro;
   4319 #endif /* LRO */
   4320 	int			rsize, error = 0;
   4321 #ifdef DEV_NETMAP
   4322 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
   4323 	struct netmap_slot *slot;
   4324 #endif /* DEV_NETMAP */
   4325 
   4326 	adapter = rxr->adapter;
   4327 #ifdef LRO
   4328 	ifp = adapter->ifp;
   4329 #endif /* LRO */
   4330 
   4331 	/* Clear the ring contents */
   4332 	IXGBE_RX_LOCK(rxr);
   4333 #ifdef DEV_NETMAP
   4334 	/* same as in ixgbe_setup_transmit_ring() */
   4335 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
   4336 #endif /* DEV_NETMAP */
   4337 	rsize = roundup2(adapter->num_rx_desc *
   4338 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   4339 	bzero((void *)rxr->rx_base, rsize);
   4340 	/* Cache the size */
   4341 	rxr->mbuf_sz = adapter->rx_mbuf_sz;
   4342 
   4343 	/* Free current RX buffer structs and their mbufs */
   4344 	ixgbe_free_receive_ring(rxr);
   4345 
   4346 	IXGBE_RX_UNLOCK(rxr);
   4347 
   4348 	/* Now reinitialize our supply of jumbo mbufs.  The number
   4349 	 * or size of jumbo mbufs may have changed.
   4350 	 */
   4351 	ixgbe_jcl_reinit(&adapter->jcl_head, rxr->ptag->dt_dmat,
   4352 	    2 * adapter->num_rx_desc, adapter->rx_mbuf_sz);
   4353 
   4354 	IXGBE_RX_LOCK(rxr);
   4355 
   4356 	/* Now replenish the mbufs */
   4357 	for (int j = 0; j != rxr->num_desc; ++j) {
   4358 		struct mbuf	*mp;
   4359 
   4360 		rxbuf = &rxr->rx_buffers[j];
   4361 #ifdef DEV_NETMAP
   4362 		/*
   4363 		 * In netmap mode, fill the map and set the buffer
   4364 		 * address in the NIC ring, considering the offset
   4365 		 * between the netmap and NIC rings (see comment in
   4366 		 * ixgbe_setup_transmit_ring() ). No need to allocate
   4367 		 * an mbuf, so end the block with a continue;
   4368 		 */
   4369 		if (slot) {
   4370 			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
   4371 			uint64_t paddr;
   4372 			void *addr;
   4373 
   4374 			addr = PNMB(na, slot + sj, &paddr);
   4375 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
   4376 			/* Update descriptor and the cached value */
   4377 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
   4378 			rxbuf->addr = htole64(paddr);
   4379 			continue;
   4380 		}
   4381 #endif /* DEV_NETMAP */
   4382 		rxbuf->flags = 0;
   4383 		rxbuf->buf = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
   4384 		    MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
   4385 		if (rxbuf->buf == NULL) {
   4386 			error = ENOBUFS;
   4387                         goto fail;
   4388 		}
   4389 		mp = rxbuf->buf;
   4390 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   4391 		/* Get the memory mapping */
   4392 		error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   4393 		    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   4394 		if (error != 0)
   4395                         goto fail;
   4396 		bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   4397 		    0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
   4398 		/* Update the descriptor and the cached value */
   4399 		rxr->rx_base[j].read.pkt_addr =
   4400 		    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   4401 		rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   4402 	}
   4403 
   4404 
   4405 	/* Setup our descriptor indices */
   4406 	rxr->next_to_check = 0;
   4407 	rxr->next_to_refresh = 0;
   4408 	rxr->lro_enabled = FALSE;
   4409 	rxr->rx_copies.ev_count = 0;
   4410 	rxr->rx_bytes.ev_count = 0;
   4411 	rxr->vtag_strip = FALSE;
   4412 
   4413 	ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   4414 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   4415 
   4416 	/*
   4417 	** Now set up the LRO interface:
   4418 	*/
   4419 	if (ixgbe_rsc_enable)
   4420 		ixgbe_setup_hw_rsc(rxr);
   4421 #ifdef LRO
   4422 	else if (ifp->if_capenable & IFCAP_LRO) {
   4423 		device_t dev = adapter->dev;
   4424 		int err = tcp_lro_init(lro);
   4425 		if (err) {
   4426 			device_printf(dev, "LRO Initialization failed!\n");
   4427 			goto fail;
   4428 		}
   4429 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
   4430 		rxr->lro_enabled = TRUE;
   4431 		lro->ifp = adapter->ifp;
   4432 	}
   4433 #endif /* LRO */
   4434 
   4435 	IXGBE_RX_UNLOCK(rxr);
   4436 	return (0);
   4437 
   4438 fail:
   4439 	ixgbe_free_receive_ring(rxr);
   4440 	IXGBE_RX_UNLOCK(rxr);
   4441 	return (error);
   4442 }
   4443 
   4444 /*********************************************************************
   4445  *
   4446  *  Initialize all receive rings.
   4447  *
   4448  **********************************************************************/
   4449 static int
   4450 ixgbe_setup_receive_structures(struct adapter *adapter)
   4451 {
   4452 	struct rx_ring *rxr = adapter->rx_rings;
   4453 	int j;
   4454 
   4455 	for (j = 0; j < adapter->num_queues; j++, rxr++)
   4456 		if (ixgbe_setup_receive_ring(rxr))
   4457 			goto fail;
   4458 
   4459 	return (0);
   4460 fail:
   4461 	/*
   4462 	 * Free RX buffers allocated so far, we will only handle
   4463 	 * the rings that completed, the failing case will have
   4464 	 * cleaned up for itself. 'j' failed, so its the terminus.
   4465 	 */
   4466 	for (int i = 0; i < j; ++i) {
   4467 		rxr = &adapter->rx_rings[i];
   4468 		ixgbe_free_receive_ring(rxr);
   4469 	}
   4470 
   4471 	return (ENOBUFS);
   4472 }
   4473 
   4474 static void
   4475 ixgbe_initialise_rss_mapping(struct adapter *adapter)
   4476 {
   4477 	struct ixgbe_hw	*hw = &adapter->hw;
   4478 	uint32_t reta;
   4479 	int i, j, queue_id;
   4480 	uint32_t rss_key[10];
   4481 	uint32_t mrqc;
   4482 #ifdef	RSS
   4483 	uint32_t rss_hash_config;
   4484 #endif
   4485 
   4486 	/* Setup RSS */
   4487 	reta = 0;
   4488 
   4489 #ifdef	RSS
   4490 	/* Fetch the configured RSS key */
   4491 	rss_getkey((uint8_t *) &rss_key);
   4492 #else
   4493 	/* set up random bits */
   4494 	cprng_fast(&rss_key, sizeof(rss_key));
   4495 #endif
   4496 
   4497 	/* Set up the redirection table */
   4498 	for (i = 0, j = 0; i < 128; i++, j++) {
   4499 		if (j == adapter->num_queues) j = 0;
   4500 #ifdef	RSS
   4501 		/*
   4502 		 * Fetch the RSS bucket id for the given indirection entry.
   4503 		 * Cap it at the number of configured buckets (which is
   4504 		 * num_queues.)
   4505 		 */
   4506 		queue_id = rss_get_indirection_to_bucket(i);
   4507 		queue_id = queue_id % adapter->num_queues;
   4508 #else
   4509 		queue_id = (j * 0x11);
   4510 #endif
   4511 		/*
   4512 		 * The low 8 bits are for hash value (n+0);
   4513 		 * The next 8 bits are for hash value (n+1), etc.
   4514 		 */
   4515 		reta = reta >> 8;
   4516 		reta = reta | ( ((uint32_t) queue_id) << 24);
   4517 		if ((i & 3) == 3) {
   4518 			IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta);
   4519 			reta = 0;
   4520 		}
   4521 	}
   4522 
   4523 	/* Now fill our hash function seeds */
   4524 	for (i = 0; i < 10; i++)
   4525 		IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), rss_key[i]);
   4526 
   4527 	/* Perform hash on these packet types */
   4528 #ifdef	RSS
   4529 	mrqc = IXGBE_MRQC_RSSEN;
   4530 	rss_hash_config = rss_gethashconfig();
   4531 	if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4)
   4532 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
   4533 	if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4)
   4534 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
   4535 	if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6)
   4536 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
   4537 	if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6)
   4538 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
   4539 	if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6_EX)
   4540 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
   4541 	if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6_EX)
   4542 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
   4543 	if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4)
   4544 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
   4545 	if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4_EX)
   4546 		device_printf(adapter->dev,
   4547 		    "%s: RSS_HASHTYPE_RSS_UDP_IPV4_EX defined, "
   4548 		    "but not supported\n", __func__);
   4549 	if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6)
   4550 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
   4551 	if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6_EX)
   4552 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
   4553 #else
   4554 	/*
   4555 	 * Disable UDP - IP fragments aren't currently being handled
   4556 	 * and so we end up with a mix of 2-tuple and 4-tuple
   4557 	 * traffic.
   4558 	 */
   4559 	mrqc = IXGBE_MRQC_RSSEN
   4560 	     | IXGBE_MRQC_RSS_FIELD_IPV4
   4561 	     | IXGBE_MRQC_RSS_FIELD_IPV4_TCP
   4562 #if 0
   4563 	     | IXGBE_MRQC_RSS_FIELD_IPV4_UDP
   4564 #endif
   4565 	     | IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP
   4566 	     | IXGBE_MRQC_RSS_FIELD_IPV6_EX
   4567 	     | IXGBE_MRQC_RSS_FIELD_IPV6
   4568 	     | IXGBE_MRQC_RSS_FIELD_IPV6_TCP
   4569 #if 0
   4570 	     | IXGBE_MRQC_RSS_FIELD_IPV6_UDP
   4571 	     | IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP
   4572 #endif
   4573 	;
   4574 #endif /* RSS */
   4575 	IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
   4576 }
   4577 
   4578 
   4579 /*********************************************************************
   4580  *
   4581  *  Setup receive registers and features.
   4582  *
   4583  **********************************************************************/
   4584 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
   4585 
   4586 #define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
   4587 
   4588 static void
   4589 ixgbe_initialize_receive_units(struct adapter *adapter)
   4590 {
   4591 	int i;
   4592 	struct	rx_ring	*rxr = adapter->rx_rings;
   4593 	struct ixgbe_hw	*hw = &adapter->hw;
   4594 	struct ifnet   *ifp = adapter->ifp;
   4595 	u32		bufsz, rxctrl, fctrl, srrctl, rxcsum;
   4596 	u32		hlreg;
   4597 
   4598 
   4599 	/*
   4600 	 * Make sure receives are disabled while
   4601 	 * setting up the descriptor ring
   4602 	 */
   4603 	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
   4604 	IXGBE_WRITE_REG(hw, IXGBE_RXCTRL,
   4605 	    rxctrl & ~IXGBE_RXCTRL_RXEN);
   4606 
   4607 	/* Enable broadcasts */
   4608 	fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
   4609 	fctrl |= IXGBE_FCTRL_BAM;
   4610 	fctrl |= IXGBE_FCTRL_DPF;
   4611 	fctrl |= IXGBE_FCTRL_PMCF;
   4612 	IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
   4613 
   4614 	/* Set for Jumbo Frames? */
   4615 	hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
   4616 	if (ifp->if_mtu > ETHERMTU)
   4617 		hlreg |= IXGBE_HLREG0_JUMBOEN;
   4618 	else
   4619 		hlreg &= ~IXGBE_HLREG0_JUMBOEN;
   4620 #ifdef DEV_NETMAP
   4621 	/* crcstrip is conditional in netmap (in RDRXCTL too ?) */
   4622 	if (ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
   4623 		hlreg &= ~IXGBE_HLREG0_RXCRCSTRP;
   4624 	else
   4625 		hlreg |= IXGBE_HLREG0_RXCRCSTRP;
   4626 #endif /* DEV_NETMAP */
   4627 	IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
   4628 
   4629 	bufsz = (adapter->rx_mbuf_sz +
   4630 	    BSIZEPKT_ROUNDUP) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
   4631 
   4632 	for (i = 0; i < adapter->num_queues; i++, rxr++) {
   4633 		u64 rdba = rxr->rxdma.dma_paddr;
   4634 
   4635 		/* Setup the Base and Length of the Rx Descriptor Ring */
   4636 		IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i),
   4637 			       (rdba & 0x00000000ffffffffULL));
   4638 		IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (rdba >> 32));
   4639 		IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
   4640 		    adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc));
   4641 
   4642 		/* Set up the SRRCTL register */
   4643 		srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
   4644 		srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
   4645 		srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
   4646 		srrctl |= bufsz;
   4647 		srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
   4648 
   4649 		/*
   4650 		 * Set DROP_EN iff we have no flow control and >1 queue.
   4651 		 * Note that srrctl was cleared shortly before during reset,
   4652 		 * so we do not need to clear the bit, but do it just in case
   4653 		 * this code is moved elsewhere.
   4654 		 */
   4655 		if (adapter->num_queues > 1 &&
   4656 		    adapter->fc == ixgbe_fc_none) {
   4657 			srrctl |= IXGBE_SRRCTL_DROP_EN;
   4658 		} else {
   4659 			srrctl &= ~IXGBE_SRRCTL_DROP_EN;
   4660 		}
   4661 
   4662 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
   4663 
   4664 		/* Setup the HW Rx Head and Tail Descriptor Pointers */
   4665 		IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
   4666 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
   4667 
   4668 		/* Set the processing limit */
   4669 		rxr->process_limit = ixgbe_rx_process_limit;
   4670 	}
   4671 
   4672 	if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
   4673 		u32 psrtype = IXGBE_PSRTYPE_TCPHDR |
   4674 			      IXGBE_PSRTYPE_UDPHDR |
   4675 			      IXGBE_PSRTYPE_IPV4HDR |
   4676 			      IXGBE_PSRTYPE_IPV6HDR;
   4677 		IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype);
   4678 	}
   4679 
   4680 	rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
   4681 
   4682 	ixgbe_initialise_rss_mapping(adapter);
   4683 
   4684 	if (adapter->num_queues > 1) {
   4685 		/* RSS and RX IPP Checksum are mutually exclusive */
   4686 		rxcsum |= IXGBE_RXCSUM_PCSD;
   4687 	}
   4688 
   4689 	if (ifp->if_capenable & IFCAP_RXCSUM)
   4690 		rxcsum |= IXGBE_RXCSUM_PCSD;
   4691 
   4692 	if (!(rxcsum & IXGBE_RXCSUM_PCSD))
   4693 		rxcsum |= IXGBE_RXCSUM_IPPCSE;
   4694 
   4695 	IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
   4696 
   4697 	return;
   4698 }
   4699 
   4700 /*********************************************************************
   4701  *
   4702  *  Free all receive rings.
   4703  *
   4704  **********************************************************************/
   4705 static void
   4706 ixgbe_free_receive_structures(struct adapter *adapter)
   4707 {
   4708 	struct rx_ring *rxr = adapter->rx_rings;
   4709 
   4710 	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
   4711 
   4712 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
   4713 #ifdef LRO
   4714 		struct lro_ctrl		*lro = &rxr->lro;
   4715 #endif /* LRO */
   4716 		ixgbe_free_receive_buffers(rxr);
   4717 #ifdef LRO
   4718 		/* Free LRO memory */
   4719 		tcp_lro_free(lro);
   4720 #endif /* LRO */
   4721 		/* Free the ring memory as well */
   4722 		ixgbe_dma_free(adapter, &rxr->rxdma);
   4723 		IXGBE_RX_LOCK_DESTROY(rxr);
   4724 	}
   4725 
   4726 	free(adapter->rx_rings, M_DEVBUF);
   4727 }
   4728 
   4729 
   4730 /*********************************************************************
   4731  *
   4732  *  Free receive ring data structures
   4733  *
   4734  **********************************************************************/
   4735 static void
   4736 ixgbe_free_receive_buffers(struct rx_ring *rxr)
   4737 {
   4738 	struct adapter		*adapter = rxr->adapter;
   4739 	struct ixgbe_rx_buf	*rxbuf;
   4740 
   4741 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
   4742 
   4743 	/* Cleanup any existing buffers */
   4744 	if (rxr->rx_buffers != NULL) {
   4745 		for (int i = 0; i < adapter->num_rx_desc; i++) {
   4746 			rxbuf = &rxr->rx_buffers[i];
   4747 			if (rxbuf->buf != NULL) {
   4748 				bus_dmamap_sync(rxr->ptag->dt_dmat,
   4749 				    rxbuf->pmap, 0, rxbuf->buf->m_pkthdr.len,
   4750 				    BUS_DMASYNC_POSTREAD);
   4751 				ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   4752 				rxbuf->buf->m_flags |= M_PKTHDR;
   4753 				m_freem(rxbuf->buf);
   4754 			}
   4755 			rxbuf->buf = NULL;
   4756 			if (rxbuf->pmap != NULL) {
   4757 				ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
   4758 				rxbuf->pmap = NULL;
   4759 			}
   4760 		}
   4761 		if (rxr->rx_buffers != NULL) {
   4762 			free(rxr->rx_buffers, M_DEVBUF);
   4763 			rxr->rx_buffers = NULL;
   4764 		}
   4765 	}
   4766 
   4767 	if (rxr->ptag != NULL) {
   4768 		ixgbe_dma_tag_destroy(rxr->ptag);
   4769 		rxr->ptag = NULL;
   4770 	}
   4771 
   4772 	return;
   4773 }
   4774 
   4775 static __inline void
   4776 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
   4777 {
   4778 	int s;
   4779 
   4780 #ifdef LRO
   4781 	struct adapter	*adapter = ifp->if_softc;
   4782 	struct ethercom *ec = &adapter->osdep.ec;
   4783 
   4784         /*
   4785          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
   4786          * should be computed by hardware. Also it should not have VLAN tag in
   4787          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
   4788          */
   4789         if (rxr->lro_enabled &&
   4790             (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
   4791             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   4792             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   4793             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
   4794             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   4795             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
   4796             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
   4797             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
   4798                 /*
   4799                  * Send to the stack if:
   4800                  **  - LRO not enabled, or
   4801                  **  - no LRO resources, or
   4802                  **  - lro enqueue fails
   4803                  */
   4804                 if (rxr->lro.lro_cnt != 0)
   4805                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
   4806                                 return;
   4807         }
   4808 #endif /* LRO */
   4809 
   4810 	IXGBE_RX_UNLOCK(rxr);
   4811 
   4812 	s = splnet();
   4813 	/* Pass this up to any BPF listeners. */
   4814 	bpf_mtap(ifp, m);
   4815 	(*ifp->if_input)(ifp, m);
   4816 	splx(s);
   4817 
   4818 	IXGBE_RX_LOCK(rxr);
   4819 }
   4820 
   4821 static __inline void
   4822 ixgbe_rx_discard(struct rx_ring *rxr, int i)
   4823 {
   4824 	struct ixgbe_rx_buf	*rbuf;
   4825 
   4826 	rbuf = &rxr->rx_buffers[i];
   4827 
   4828 
   4829 	/*
   4830 	** With advanced descriptors the writeback
   4831 	** clobbers the buffer addrs, so its easier
   4832 	** to just free the existing mbufs and take
   4833 	** the normal refresh path to get new buffers
   4834 	** and mapping.
   4835 	*/
   4836 
   4837 	if (rbuf->buf != NULL) {/* Partial chain ? */
   4838 		rbuf->fmp->m_flags |= M_PKTHDR;
   4839 		m_freem(rbuf->fmp);
   4840 		rbuf->fmp = NULL;
   4841 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
   4842 	} else if (rbuf->buf) {
   4843 		m_free(rbuf->buf);
   4844 		rbuf->buf = NULL;
   4845 	}
   4846 
   4847 	rbuf->flags = 0;
   4848 
   4849 	return;
   4850 }
   4851 
   4852 
   4853 /*********************************************************************
   4854  *
   4855  *  This routine executes in interrupt context. It replenishes
   4856  *  the mbufs in the descriptor and sends data which has been
   4857  *  dma'ed into host memory to upper layer.
   4858  *
   4859  *  We loop at most count times if count is > 0, or until done if
   4860  *  count < 0.
   4861  *
   4862  *  Return TRUE for more work, FALSE for all clean.
   4863  *********************************************************************/
   4864 static bool
   4865 ixgbe_rxeof(struct ix_queue *que)
   4866 {
   4867 	struct adapter		*adapter = que->adapter;
   4868 	struct rx_ring		*rxr = que->rxr;
   4869 	struct ifnet		*ifp = adapter->ifp;
   4870 #ifdef LRO
   4871 	struct lro_ctrl		*lro = &rxr->lro;
   4872 	struct lro_entry	*queued;
   4873 #endif /* LRO */
   4874 	int			i, nextp, processed = 0;
   4875 	u32			staterr = 0;
   4876 	u16			count = rxr->process_limit;
   4877 	union ixgbe_adv_rx_desc	*cur;
   4878 	struct ixgbe_rx_buf	*rbuf, *nbuf;
   4879 #ifdef RSS
   4880 	u16			pkt_info;
   4881 #endif
   4882 
   4883 	IXGBE_RX_LOCK(rxr);
   4884 
   4885 #ifdef DEV_NETMAP
   4886 	/* Same as the txeof routine: wakeup clients on intr. */
   4887 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
   4888 		IXGBE_RX_UNLOCK(rxr);
   4889 		return (FALSE);
   4890 	}
   4891 #endif /* DEV_NETMAP */
   4892 
   4893 	for (i = rxr->next_to_check; count != 0;) {
   4894 		struct mbuf	*sendmp, *mp;
   4895 		u32		rsc, ptype;
   4896 		u16		len;
   4897 		u16		vtag = 0;
   4898 		bool		eop;
   4899 
   4900 		/* Sync the ring. */
   4901 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   4902 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   4903 
   4904 		cur = &rxr->rx_base[i];
   4905 		staterr = le32toh(cur->wb.upper.status_error);
   4906 #ifdef RSS
   4907 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
   4908 #endif
   4909 
   4910 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
   4911 			break;
   4912 		if ((ifp->if_flags & IFF_RUNNING) == 0)
   4913 			break;
   4914 
   4915 		count--;
   4916 		sendmp = NULL;
   4917 		nbuf = NULL;
   4918 		rsc = 0;
   4919 		cur->wb.upper.status_error = 0;
   4920 		rbuf = &rxr->rx_buffers[i];
   4921 		mp = rbuf->buf;
   4922 
   4923 		len = le16toh(cur->wb.upper.length);
   4924 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
   4925 		    IXGBE_RXDADV_PKTTYPE_MASK;
   4926 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
   4927 
   4928 		/* Make sure bad packets are discarded */
   4929 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
   4930 			rxr->rx_discarded.ev_count++;
   4931 			ixgbe_rx_discard(rxr, i);
   4932 			goto next_desc;
   4933 		}
   4934 
   4935 		/*
   4936 		** On 82599 which supports a hardware
   4937 		** LRO (called HW RSC), packets need
   4938 		** not be fragmented across sequential
   4939 		** descriptors, rather the next descriptor
   4940 		** is indicated in bits of the descriptor.
   4941 		** This also means that we might proceses
   4942 		** more than one packet at a time, something
   4943 		** that has never been true before, it
   4944 		** required eliminating global chain pointers
   4945 		** in favor of what we are doing here.  -jfv
   4946 		*/
   4947 		if (!eop) {
   4948 			/*
   4949 			** Figure out the next descriptor
   4950 			** of this frame.
   4951 			*/
   4952 			if (rxr->hw_rsc == TRUE) {
   4953 				rsc = ixgbe_rsc_count(cur);
   4954 				rxr->rsc_num += (rsc - 1);
   4955 			}
   4956 			if (rsc) { /* Get hardware index */
   4957 				nextp = ((staterr &
   4958 				    IXGBE_RXDADV_NEXTP_MASK) >>
   4959 				    IXGBE_RXDADV_NEXTP_SHIFT);
   4960 			} else { /* Just sequential */
   4961 				nextp = i + 1;
   4962 				if (nextp == adapter->num_rx_desc)
   4963 					nextp = 0;
   4964 			}
   4965 			nbuf = &rxr->rx_buffers[nextp];
   4966 			prefetch(nbuf);
   4967 		}
   4968 		/*
   4969 		** Rather than using the fmp/lmp global pointers
   4970 		** we now keep the head of a packet chain in the
   4971 		** buffer struct and pass this along from one
   4972 		** descriptor to the next, until we get EOP.
   4973 		*/
   4974 		mp->m_len = len;
   4975 		/*
   4976 		** See if there is a stored head
   4977 		** that determines what we are
   4978 		*/
   4979 		sendmp = rbuf->fmp;
   4980 		if (sendmp != NULL) {  /* secondary frag */
   4981 			rbuf->buf = rbuf->fmp = NULL;
   4982 			mp->m_flags &= ~M_PKTHDR;
   4983 			sendmp->m_pkthdr.len += mp->m_len;
   4984 		} else {
   4985 			/*
   4986 			 * Optimize.  This might be a small packet,
   4987 			 * maybe just a TCP ACK.  Do a fast copy that
   4988 			 * is cache aligned into a new mbuf, and
   4989 			 * leave the old mbuf+cluster for re-use.
   4990 			 */
   4991 			if (eop && len <= IXGBE_RX_COPY_LEN) {
   4992 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
   4993 				if (sendmp != NULL) {
   4994 					sendmp->m_data +=
   4995 					    IXGBE_RX_COPY_ALIGN;
   4996 					ixgbe_bcopy(mp->m_data,
   4997 					    sendmp->m_data, len);
   4998 					sendmp->m_len = len;
   4999 					rxr->rx_copies.ev_count++;
   5000 					rbuf->flags |= IXGBE_RX_COPY;
   5001 				}
   5002 			}
   5003 			if (sendmp == NULL) {
   5004 				rbuf->buf = rbuf->fmp = NULL;
   5005 				sendmp = mp;
   5006 			}
   5007 
   5008 			/* first desc of a non-ps chain */
   5009 			sendmp->m_flags |= M_PKTHDR;
   5010 			sendmp->m_pkthdr.len = mp->m_len;
   5011 		}
   5012 		++processed;
   5013 
   5014 		/* Pass the head pointer on */
   5015 		if (eop == 0) {
   5016 			nbuf->fmp = sendmp;
   5017 			sendmp = NULL;
   5018 			mp->m_next = nbuf->buf;
   5019 		} else { /* Sending this frame */
   5020 			sendmp->m_pkthdr.rcvif = ifp;
   5021 			ifp->if_ipackets++;
   5022 			rxr->rx_packets.ev_count++;
   5023 			/* capture data for AIM */
   5024 			rxr->bytes += sendmp->m_pkthdr.len;
   5025 			rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
   5026 			/* Process vlan info */
   5027 			if ((rxr->vtag_strip) &&
   5028 			    (staterr & IXGBE_RXD_STAT_VP))
   5029 				vtag = le16toh(cur->wb.upper.vlan);
   5030 			if (vtag) {
   5031 				VLAN_INPUT_TAG(ifp, sendmp, vtag,
   5032 				    printf("%s: could not apply VLAN "
   5033 					"tag", __func__));
   5034 			}
   5035 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
   5036 				ixgbe_rx_checksum(staterr, sendmp, ptype,
   5037 				   &adapter->stats);
   5038 			}
   5039 #if __FreeBSD_version >= 800000
   5040 #ifdef RSS
   5041 			sendmp->m_pkthdr.flowid =
   5042 			    le32toh(cur->wb.lower.hi_dword.rss);
   5043 			switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
   5044 			case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
   5045 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV4);
   5046 				break;
   5047 			case IXGBE_RXDADV_RSSTYPE_IPV4:
   5048 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV4);
   5049 				break;
   5050 			case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
   5051 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6);
   5052 				break;
   5053 			case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
   5054 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6_EX);
   5055 				break;
   5056 			case IXGBE_RXDADV_RSSTYPE_IPV6:
   5057 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6);
   5058 				break;
   5059 			case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
   5060 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6_EX);
   5061 				break;
   5062 			case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
   5063 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV4);
   5064 				break;
   5065 			case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
   5066 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6);
   5067 				break;
   5068 			case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
   5069 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6_EX);
   5070 				break;
   5071 			default:
   5072 				/* XXX fallthrough */
   5073 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
   5074 				break;
   5075 			}
   5076 #else /* RSS */
   5077 			sendmp->m_pkthdr.flowid = que->msix;
   5078 			M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
   5079 #endif /* RSS */
   5080 #endif /* FreeBSD_version */
   5081 		}
   5082 next_desc:
   5083 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   5084 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   5085 
   5086 		/* Advance our pointers to the next descriptor. */
   5087 		if (++i == rxr->num_desc)
   5088 			i = 0;
   5089 
   5090 		/* Now send to the stack or do LRO */
   5091 		if (sendmp != NULL) {
   5092 			rxr->next_to_check = i;
   5093 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
   5094 			i = rxr->next_to_check;
   5095 		}
   5096 
   5097                /* Every 8 descriptors we go to refresh mbufs */
   5098 		if (processed == 8) {
   5099 			ixgbe_refresh_mbufs(rxr, i);
   5100 			processed = 0;
   5101 		}
   5102 	}
   5103 
   5104 	/* Refresh any remaining buf structs */
   5105 	if (ixgbe_rx_unrefreshed(rxr))
   5106 		ixgbe_refresh_mbufs(rxr, i);
   5107 
   5108 	rxr->next_to_check = i;
   5109 
   5110 #ifdef LRO
   5111 	/*
   5112 	 * Flush any outstanding LRO work
   5113 	 */
   5114 	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
   5115 		SLIST_REMOVE_HEAD(&lro->lro_active, next);
   5116 		tcp_lro_flush(lro, queued);
   5117 	}
   5118 #endif /* LRO */
   5119 
   5120 	IXGBE_RX_UNLOCK(rxr);
   5121 
   5122 	/*
   5123 	** Still have cleaning to do?
   5124 	*/
   5125 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
   5126 		return true;
   5127 	else
   5128 		return false;
   5129 }
   5130 
   5131 
   5132 /*********************************************************************
   5133  *
   5134  *  Verify that the hardware indicated that the checksum is valid.
   5135  *  Inform the stack about the status of checksum so that stack
   5136  *  doesn't spend time verifying the checksum.
   5137  *
   5138  *********************************************************************/
   5139 static void
   5140 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
   5141     struct ixgbe_hw_stats *stats)
   5142 {
   5143 	u16	status = (u16) staterr;
   5144 	u8	errors = (u8) (staterr >> 24);
   5145 #if 0
   5146 	bool	sctp = FALSE;
   5147 
   5148 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   5149 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
   5150 		sctp = TRUE;
   5151 #endif
   5152 
   5153 	if (status & IXGBE_RXD_STAT_IPCS) {
   5154 		stats->ipcs.ev_count++;
   5155 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
   5156 			/* IP Checksum Good */
   5157 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
   5158 
   5159 		} else {
   5160 			stats->ipcs_bad.ev_count++;
   5161 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
   5162 		}
   5163 	}
   5164 	if (status & IXGBE_RXD_STAT_L4CS) {
   5165 		stats->l4cs.ev_count++;
   5166 		int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
   5167 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
   5168 			mp->m_pkthdr.csum_flags |= type;
   5169 		} else {
   5170 			stats->l4cs_bad.ev_count++;
   5171 			mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
   5172 		}
   5173 	}
   5174 	return;
   5175 }
   5176 
   5177 
   5178 #if 0	/* XXX Badly need to overhaul vlan(4) on NetBSD. */
   5179 /*
   5180 ** This routine is run via an vlan config EVENT,
   5181 ** it enables us to use the HW Filter table since
   5182 ** we can get the vlan id. This just creates the
   5183 ** entry in the soft version of the VFTA, init will
   5184 ** repopulate the real table.
   5185 */
   5186 static void
   5187 ixgbe_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
   5188 {
   5189 	struct adapter	*adapter = ifp->if_softc;
   5190 	u16		index, bit;
   5191 
   5192 	if (ifp->if_softc !=  arg)   /* Not our event */
   5193 		return;
   5194 
   5195 	if ((vtag == 0) || (vtag > 4095))	/* Invalid */
   5196 		return;
   5197 
   5198 	IXGBE_CORE_LOCK(adapter);
   5199 	index = (vtag >> 5) & 0x7F;
   5200 	bit = vtag & 0x1F;
   5201 	adapter->shadow_vfta[index] |= (1 << bit);
   5202 	ixgbe_setup_vlan_hw_support(adapter);
   5203 	IXGBE_CORE_UNLOCK(adapter);
   5204 }
   5205 
   5206 /*
   5207 ** This routine is run via an vlan
   5208 ** unconfig EVENT, remove our entry
   5209 ** in the soft vfta.
   5210 */
   5211 static void
   5212 ixgbe_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
   5213 {
   5214 	struct adapter	*adapter = ifp->if_softc;
   5215 	u16		index, bit;
   5216 
   5217 	if (ifp->if_softc !=  arg)
   5218 		return;
   5219 
   5220 	if ((vtag == 0) || (vtag > 4095))	/* Invalid */
   5221 		return;
   5222 
   5223 	IXGBE_CORE_LOCK(adapter);
   5224 	index = (vtag >> 5) & 0x7F;
   5225 	bit = vtag & 0x1F;
   5226 	adapter->shadow_vfta[index] &= ~(1 << bit);
   5227 	/* Re-init to load the changes */
   5228 	ixgbe_setup_vlan_hw_support(adapter);
   5229 	IXGBE_CORE_UNLOCK(adapter);
   5230 }
   5231 #endif
   5232 
   5233 static void
   5234 ixgbe_setup_vlan_hw_support(struct adapter *adapter)
   5235 {
   5236 	struct ethercom *ec = &adapter->osdep.ec;
   5237 	struct ixgbe_hw *hw = &adapter->hw;
   5238 	struct rx_ring	*rxr;
   5239 	u32		ctrl;
   5240 
   5241 
   5242 	/*
   5243 	** We get here thru init_locked, meaning
   5244 	** a soft reset, this has already cleared
   5245 	** the VFTA and other state, so if there
   5246 	** have been no vlan's registered do nothing.
   5247 	*/
   5248 	if (!VLAN_ATTACHED(&adapter->osdep.ec)) {
   5249 		return;
   5250 	}
   5251 
   5252 	/* Setup the queues for vlans */
   5253 	for (int i = 0; i < adapter->num_queues; i++) {
   5254 		rxr = &adapter->rx_rings[i];
   5255 		/* On 82599 the VLAN enable is per/queue in RXDCTL */
   5256 		if (hw->mac.type != ixgbe_mac_82598EB) {
   5257 			ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
   5258 			ctrl |= IXGBE_RXDCTL_VME;
   5259 			IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
   5260 		}
   5261 		rxr->vtag_strip = TRUE;
   5262 	}
   5263 
   5264 	if ((ec->ec_capenable & ETHERCAP_VLAN_HWFILTER) == 0)
   5265 		return;
   5266 	/*
   5267 	** A soft reset zero's out the VFTA, so
   5268 	** we need to repopulate it now.
   5269 	*/
   5270 	for (int i = 0; i < IXGBE_VFTA_SIZE; i++)
   5271 		if (adapter->shadow_vfta[i] != 0)
   5272 			IXGBE_WRITE_REG(hw, IXGBE_VFTA(i),
   5273 			    adapter->shadow_vfta[i]);
   5274 
   5275 	ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
   5276 	/* Enable the Filter Table if enabled */
   5277 	if (ec->ec_capenable & ETHERCAP_VLAN_HWFILTER) {
   5278 		ctrl &= ~IXGBE_VLNCTRL_CFIEN;
   5279 		ctrl |= IXGBE_VLNCTRL_VFE;
   5280 	}
   5281 	if (hw->mac.type == ixgbe_mac_82598EB)
   5282 		ctrl |= IXGBE_VLNCTRL_VME;
   5283 	IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
   5284 }
   5285 
   5286 static void
   5287 ixgbe_enable_intr(struct adapter *adapter)
   5288 {
   5289 	struct ixgbe_hw	*hw = &adapter->hw;
   5290 	struct ix_queue	*que = adapter->queues;
   5291 	u32		mask, fwsm;
   5292 
   5293 	mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
   5294 	/* Enable Fan Failure detection */
   5295 	if (hw->device_id == IXGBE_DEV_ID_82598AT)
   5296 		    mask |= IXGBE_EIMS_GPI_SDP1;
   5297 
   5298 	switch (adapter->hw.mac.type) {
   5299 		case ixgbe_mac_82599EB:
   5300 			mask |= IXGBE_EIMS_ECC;
   5301 			mask |= IXGBE_EIMS_GPI_SDP0;
   5302 			mask |= IXGBE_EIMS_GPI_SDP1;
   5303 			mask |= IXGBE_EIMS_GPI_SDP2;
   5304 #ifdef IXGBE_FDIR
   5305 			mask |= IXGBE_EIMS_FLOW_DIR;
   5306 #endif
   5307 			break;
   5308 		case ixgbe_mac_X540:
   5309 			mask |= IXGBE_EIMS_ECC;
   5310 			/* Detect if Thermal Sensor is enabled */
   5311 			fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
   5312 			if (fwsm & IXGBE_FWSM_TS_ENABLED)
   5313 				mask |= IXGBE_EIMS_TS;
   5314 #ifdef IXGBE_FDIR
   5315 			mask |= IXGBE_EIMS_FLOW_DIR;
   5316 #endif
   5317 		/* falls through */
   5318 		default:
   5319 			break;
   5320 	}
   5321 
   5322 	IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
   5323 
   5324 	/* With RSS we use auto clear */
   5325 	if (adapter->msix_mem) {
   5326 		mask = IXGBE_EIMS_ENABLE_MASK;
   5327 		/* Don't autoclear Link */
   5328 		mask &= ~IXGBE_EIMS_OTHER;
   5329 		mask &= ~IXGBE_EIMS_LSC;
   5330 		IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask);
   5331 	}
   5332 
   5333 	/*
   5334 	** Now enable all queues, this is done separately to
   5335 	** allow for handling the extended (beyond 32) MSIX
   5336 	** vectors that can be used by 82599
   5337 	*/
   5338         for (int i = 0; i < adapter->num_queues; i++, que++)
   5339                 ixgbe_enable_queue(adapter, que->msix);
   5340 
   5341 	IXGBE_WRITE_FLUSH(hw);
   5342 
   5343 	return;
   5344 }
   5345 
   5346 static void
   5347 ixgbe_disable_intr(struct adapter *adapter)
   5348 {
   5349 	if (adapter->msix_mem)
   5350 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, 0);
   5351 	if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
   5352 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0);
   5353 	} else {
   5354 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000);
   5355 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0);
   5356 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0);
   5357 	}
   5358 	IXGBE_WRITE_FLUSH(&adapter->hw);
   5359 	return;
   5360 }
   5361 
   5362 u16
   5363 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
   5364 {
   5365 	switch (reg % 4) {
   5366 	case 0:
   5367 		return pci_conf_read(hw->back->pc, hw->back->tag, reg) &
   5368 		    __BITS(15, 0);
   5369 	case 2:
   5370 		return __SHIFTOUT(pci_conf_read(hw->back->pc, hw->back->tag,
   5371 		    reg - 2), __BITS(31, 16));
   5372 	default:
   5373 		panic("%s: invalid register (%" PRIx32, __func__, reg);
   5374 		break;
   5375 	}
   5376 }
   5377 
   5378 void
   5379 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
   5380 {
   5381 	pcireg_t old;
   5382 
   5383 	switch (reg % 4) {
   5384 	case 0:
   5385 		old = pci_conf_read(hw->back->pc, hw->back->tag, reg) &
   5386 		    __BITS(31, 16);
   5387 		pci_conf_write(hw->back->pc, hw->back->tag, reg, value | old);
   5388 		break;
   5389 	case 2:
   5390 		old = pci_conf_read(hw->back->pc, hw->back->tag, reg - 2) &
   5391 		    __BITS(15, 0);
   5392 		pci_conf_write(hw->back->pc, hw->back->tag, reg - 2,
   5393 		    __SHIFTIN(value, __BITS(31, 16)) | old);
   5394 		break;
   5395 	default:
   5396 		panic("%s: invalid register (%" PRIx32, __func__, reg);
   5397 		break;
   5398 	}
   5399 
   5400 	return;
   5401 }
   5402 
   5403 /*
   5404 ** Get the width and transaction speed of
   5405 ** the slot this adapter is plugged into.
   5406 */
   5407 static void
   5408 ixgbe_get_slot_info(struct ixgbe_hw *hw)
   5409 {
   5410 	device_t		dev = ((struct ixgbe_osdep *)hw->back)->dev;
   5411 	struct ixgbe_mac_info	*mac = &hw->mac;
   5412 	u16			link;
   5413 
   5414 	/* For most devices simply call the shared code routine */
   5415 	if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) {
   5416 		ixgbe_get_bus_info(hw);
   5417 		goto display;
   5418 	}
   5419 
   5420 	/*
   5421 	** For the Quad port adapter we need to parse back
   5422 	** up the PCI tree to find the speed of the expansion
   5423 	** slot into which this adapter is plugged. A bit more work.
   5424 	*/
   5425 	dev = device_parent(device_parent(dev));
   5426 #ifdef IXGBE_DEBUG
   5427 	device_printf(dev, "parent pcib = %x,%x,%x\n",
   5428 	    pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
   5429 #endif
   5430 	dev = device_parent(device_parent(dev));
   5431 #ifdef IXGBE_DEBUG
   5432 	device_printf(dev, "slot pcib = %x,%x,%x\n",
   5433 	    pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
   5434 #endif
   5435 	/* Now get the PCI Express Capabilities offset */
   5436 	/* ...and read the Link Status Register */
   5437 	link = IXGBE_READ_PCIE_WORD(hw, IXGBE_PCI_LINK_STATUS);
   5438 	switch (link & IXGBE_PCI_LINK_WIDTH) {
   5439 	case IXGBE_PCI_LINK_WIDTH_1:
   5440 		hw->bus.width = ixgbe_bus_width_pcie_x1;
   5441 		break;
   5442 	case IXGBE_PCI_LINK_WIDTH_2:
   5443 		hw->bus.width = ixgbe_bus_width_pcie_x2;
   5444 		break;
   5445 	case IXGBE_PCI_LINK_WIDTH_4:
   5446 		hw->bus.width = ixgbe_bus_width_pcie_x4;
   5447 		break;
   5448 	case IXGBE_PCI_LINK_WIDTH_8:
   5449 		hw->bus.width = ixgbe_bus_width_pcie_x8;
   5450 		break;
   5451 	default:
   5452 		hw->bus.width = ixgbe_bus_width_unknown;
   5453 		break;
   5454 	}
   5455 
   5456 	switch (link & IXGBE_PCI_LINK_SPEED) {
   5457 	case IXGBE_PCI_LINK_SPEED_2500:
   5458 		hw->bus.speed = ixgbe_bus_speed_2500;
   5459 		break;
   5460 	case IXGBE_PCI_LINK_SPEED_5000:
   5461 		hw->bus.speed = ixgbe_bus_speed_5000;
   5462 		break;
   5463 	case IXGBE_PCI_LINK_SPEED_8000:
   5464 		hw->bus.speed = ixgbe_bus_speed_8000;
   5465 		break;
   5466 	default:
   5467 		hw->bus.speed = ixgbe_bus_speed_unknown;
   5468 		break;
   5469 	}
   5470 
   5471 	mac->ops.set_lan_id(hw);
   5472 
   5473 display:
   5474 	device_printf(dev,"PCI Express Bus: Speed %s %s\n",
   5475 	    ((hw->bus.speed == ixgbe_bus_speed_8000) ? "8.0GT/s":
   5476 	    (hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0GT/s":
   5477 	    (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5GT/s":"Unknown"),
   5478 	    (hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" :
   5479 	    (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" :
   5480 	    (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" :
   5481 	    ("Unknown"));
   5482 
   5483 	if ((hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) &&
   5484 	    ((hw->bus.width <= ixgbe_bus_width_pcie_x4) &&
   5485 	    (hw->bus.speed == ixgbe_bus_speed_2500))) {
   5486 		device_printf(dev, "PCI-Express bandwidth available"
   5487 		    " for this card\n     is not sufficient for"
   5488 		    " optimal performance.\n");
   5489 		device_printf(dev, "For optimal performance a x8 "
   5490 		    "PCIE, or x4 PCIE Gen2 slot is required.\n");
   5491         }
   5492 	if ((hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP) &&
   5493 	    ((hw->bus.width <= ixgbe_bus_width_pcie_x8) &&
   5494 	    (hw->bus.speed < ixgbe_bus_speed_8000))) {
   5495 		device_printf(dev, "PCI-Express bandwidth available"
   5496 		    " for this card\n     is not sufficient for"
   5497 		    " optimal performance.\n");
   5498 		device_printf(dev, "For optimal performance a x8 "
   5499 		    "PCIE Gen3 slot is required.\n");
   5500         }
   5501 
   5502 	return;
   5503 }
   5504 
   5505 
   5506 /*
   5507 ** Setup the correct IVAR register for a particular MSIX interrupt
   5508 **   (yes this is all very magic and confusing :)
   5509 **  - entry is the register array entry
   5510 **  - vector is the MSIX vector for this queue
   5511 **  - type is RX/TX/MISC
   5512 */
   5513 static void
   5514 ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type)
   5515 {
   5516 	struct ixgbe_hw *hw = &adapter->hw;
   5517 	u32 ivar, index;
   5518 
   5519 	vector |= IXGBE_IVAR_ALLOC_VAL;
   5520 
   5521 	switch (hw->mac.type) {
   5522 
   5523 	case ixgbe_mac_82598EB:
   5524 		if (type == -1)
   5525 			entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
   5526 		else
   5527 			entry += (type * 64);
   5528 		index = (entry >> 2) & 0x1F;
   5529 		ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
   5530 		ivar &= ~(0xFF << (8 * (entry & 0x3)));
   5531 		ivar |= (vector << (8 * (entry & 0x3)));
   5532 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR(index), ivar);
   5533 		break;
   5534 
   5535 	case ixgbe_mac_82599EB:
   5536 	case ixgbe_mac_X540:
   5537 		if (type == -1) { /* MISC IVAR */
   5538 			index = (entry & 1) * 8;
   5539 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
   5540 			ivar &= ~(0xFF << index);
   5541 			ivar |= (vector << index);
   5542 			IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
   5543 		} else {	/* RX/TX IVARS */
   5544 			index = (16 * (entry & 1)) + (8 * type);
   5545 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
   5546 			ivar &= ~(0xFF << index);
   5547 			ivar |= (vector << index);
   5548 			IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
   5549 		}
   5550 
   5551 	default:
   5552 		break;
   5553 	}
   5554 }
   5555 
   5556 static void
   5557 ixgbe_configure_ivars(struct adapter *adapter)
   5558 {
   5559 	struct  ix_queue *que = adapter->queues;
   5560 	u32 newitr;
   5561 
   5562 	if (ixgbe_max_interrupt_rate > 0)
   5563 		newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8;
   5564 	else
   5565 		newitr = 0;
   5566 
   5567         for (int i = 0; i < adapter->num_queues; i++, que++) {
   5568 		/* First the RX queue entry */
   5569                 ixgbe_set_ivar(adapter, i, que->msix, 0);
   5570 		/* ... and the TX */
   5571 		ixgbe_set_ivar(adapter, i, que->msix, 1);
   5572 		/* Set an Initial EITR value */
   5573                 IXGBE_WRITE_REG(&adapter->hw,
   5574                     IXGBE_EITR(que->msix), newitr);
   5575 	}
   5576 
   5577 	/* For the Link interrupt */
   5578         ixgbe_set_ivar(adapter, 1, adapter->linkvec, -1);
   5579 }
   5580 
   5581 /*
   5582 ** ixgbe_sfp_probe - called in the local timer to
   5583 ** determine if a port had optics inserted.
   5584 */
   5585 static bool ixgbe_sfp_probe(struct adapter *adapter)
   5586 {
   5587 	struct ixgbe_hw	*hw = &adapter->hw;
   5588 	device_t	dev = adapter->dev;
   5589 	bool		result = FALSE;
   5590 
   5591 	if ((hw->phy.type == ixgbe_phy_nl) &&
   5592 	    (hw->phy.sfp_type == ixgbe_sfp_type_not_present)) {
   5593 		s32 ret = hw->phy.ops.identify_sfp(hw);
   5594 		if (ret)
   5595                         goto out;
   5596 		ret = hw->phy.ops.reset(hw);
   5597 		if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
   5598 			device_printf(dev,"Unsupported SFP+ module detected!");
   5599 			device_printf(dev, "Reload driver with supported module.\n");
   5600 			adapter->sfp_probe = FALSE;
   5601                         goto out;
   5602 		} else
   5603 			device_printf(dev,"SFP+ module detected!\n");
   5604 		/* We now have supported optics */
   5605 		adapter->sfp_probe = FALSE;
   5606 		/* Set the optics type so system reports correctly */
   5607 		ixgbe_setup_optics(adapter);
   5608 		result = TRUE;
   5609 	}
   5610 out:
   5611 	return (result);
   5612 }
   5613 
   5614 /*
   5615 ** Tasklet handler for MSIX Link interrupts
   5616 **  - do outside interrupt since it might sleep
   5617 */
   5618 static void
   5619 ixgbe_handle_link(void *context)
   5620 {
   5621 	struct adapter  *adapter = context;
   5622 
   5623 	if (ixgbe_check_link(&adapter->hw,
   5624 	    &adapter->link_speed, &adapter->link_up, 0) == 0)
   5625 	    ixgbe_update_link_status(adapter);
   5626 }
   5627 
   5628 /*
   5629 ** Tasklet for handling SFP module interrupts
   5630 */
   5631 static void
   5632 ixgbe_handle_mod(void *context)
   5633 {
   5634 	struct adapter  *adapter = context;
   5635 	struct ixgbe_hw *hw = &adapter->hw;
   5636 	device_t	dev = adapter->dev;
   5637 	u32 err;
   5638 
   5639 	err = hw->phy.ops.identify_sfp(hw);
   5640 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
   5641 		device_printf(dev,
   5642 		    "Unsupported SFP+ module type was detected.\n");
   5643 		return;
   5644 	}
   5645 	err = hw->mac.ops.setup_sfp(hw);
   5646 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
   5647 		device_printf(dev,
   5648 		    "Setup failure - unsupported SFP+ module type.\n");
   5649 		return;
   5650 	}
   5651 	softint_schedule(adapter->msf_si);
   5652 	return;
   5653 }
   5654 
   5655 
   5656 /*
   5657 ** Tasklet for handling MSF (multispeed fiber) interrupts
   5658 */
   5659 static void
   5660 ixgbe_handle_msf(void *context)
   5661 {
   5662 	struct adapter  *adapter = context;
   5663 	struct ixgbe_hw *hw = &adapter->hw;
   5664 	u32 autoneg;
   5665 	bool negotiate;
   5666 
   5667 	autoneg = hw->phy.autoneg_advertised;
   5668 	if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
   5669 		hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate);
   5670 	else
   5671 		negotiate = 0;
   5672 	if (hw->mac.ops.setup_link)
   5673 		hw->mac.ops.setup_link(hw, autoneg, TRUE);
   5674 	return;
   5675 }
   5676 
   5677 #ifdef IXGBE_FDIR
   5678 /*
   5679 ** Tasklet for reinitializing the Flow Director filter table
   5680 */
   5681 static void
   5682 ixgbe_reinit_fdir(void *context)
   5683 {
   5684 	struct adapter  *adapter = context;
   5685 	struct ifnet   *ifp = adapter->ifp;
   5686 
   5687 	if (adapter->fdir_reinit != 1) /* Shouldn't happen */
   5688 		return;
   5689 	ixgbe_reinit_fdir_tables_82599(&adapter->hw);
   5690 	adapter->fdir_reinit = 0;
   5691 	/* re-enable flow director interrupts */
   5692 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_FLOW_DIR);
   5693 	/* Restart the interface */
   5694 	ifp->if_flags |= IFF_RUNNING;
   5695 	return;
   5696 }
   5697 #endif
   5698 
   5699 /**********************************************************************
   5700  *
   5701  *  Update the board statistics counters.
   5702  *
   5703  **********************************************************************/
   5704 static void
   5705 ixgbe_update_stats_counters(struct adapter *adapter)
   5706 {
   5707 	struct ifnet   *ifp = adapter->ifp;
   5708 	struct ixgbe_hw *hw = &adapter->hw;
   5709 	u32  missed_rx = 0, bprc, lxon, lxoff, total;
   5710 	u64  total_missed_rx = 0;
   5711 	uint64_t crcerrs, rlec;
   5712 
   5713 	crcerrs = IXGBE_READ_REG(hw, IXGBE_CRCERRS);
   5714 	adapter->stats.crcerrs.ev_count += crcerrs;
   5715 	adapter->stats.illerrc.ev_count += IXGBE_READ_REG(hw, IXGBE_ILLERRC);
   5716 	adapter->stats.errbc.ev_count += IXGBE_READ_REG(hw, IXGBE_ERRBC);
   5717 	adapter->stats.mspdc.ev_count += IXGBE_READ_REG(hw, IXGBE_MSPDC);
   5718 
   5719 	/*
   5720 	** Note: these are for the 8 possible traffic classes,
   5721 	**	 which in current implementation is unused,
   5722 	**	 therefore only 0 should read real data.
   5723 	*/
   5724 	for (int i = 0; i < __arraycount(adapter->stats.mpc); i++) {
   5725 		int j = i % adapter->num_queues;
   5726 		u32 mp;
   5727 		mp = IXGBE_READ_REG(hw, IXGBE_MPC(i));
   5728 		/* missed_rx tallies misses for the gprc workaround */
   5729 		missed_rx += mp;
   5730 		/* global total per queue */
   5731         	adapter->stats.mpc[j].ev_count += mp;
   5732 		/* Running comprehensive total for stats display */
   5733 		total_missed_rx += mp;
   5734 		if (hw->mac.type == ixgbe_mac_82598EB) {
   5735 			adapter->stats.rnbc[j] +=
   5736 			    IXGBE_READ_REG(hw, IXGBE_RNBC(i));
   5737 			adapter->stats.qbtc[j].ev_count +=
   5738 			    IXGBE_READ_REG(hw, IXGBE_QBTC(i));
   5739 			adapter->stats.qbrc[j].ev_count +=
   5740 			    IXGBE_READ_REG(hw, IXGBE_QBRC(i));
   5741 			adapter->stats.pxonrxc[j].ev_count +=
   5742 			    IXGBE_READ_REG(hw, IXGBE_PXONRXC(i));
   5743 		} else {
   5744 			adapter->stats.pxonrxc[j].ev_count +=
   5745 			    IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i));
   5746 		}
   5747 		adapter->stats.pxontxc[j].ev_count +=
   5748 		    IXGBE_READ_REG(hw, IXGBE_PXONTXC(i));
   5749 		adapter->stats.pxofftxc[j].ev_count +=
   5750 		    IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i));
   5751 		adapter->stats.pxoffrxc[j].ev_count +=
   5752 		    IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
   5753 		adapter->stats.pxon2offc[j].ev_count +=
   5754 		    IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i));
   5755 	}
   5756 	for (int i = 0; i < __arraycount(adapter->stats.qprc); i++) {
   5757 		int j = i % adapter->num_queues;
   5758 		adapter->stats.qprc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
   5759 		adapter->stats.qptc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
   5760 		adapter->stats.qprdc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
   5761 	}
   5762 	adapter->stats.mlfc.ev_count += IXGBE_READ_REG(hw, IXGBE_MLFC);
   5763 	adapter->stats.mrfc.ev_count += IXGBE_READ_REG(hw, IXGBE_MRFC);
   5764 	rlec = IXGBE_READ_REG(hw, IXGBE_RLEC);
   5765 	adapter->stats.rlec.ev_count += rlec;
   5766 
   5767 	/* Hardware workaround, gprc counts missed packets */
   5768 	adapter->stats.gprc.ev_count += IXGBE_READ_REG(hw, IXGBE_GPRC) - missed_rx;
   5769 
   5770 	lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
   5771 	adapter->stats.lxontxc.ev_count += lxon;
   5772 	lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
   5773 	adapter->stats.lxofftxc.ev_count += lxoff;
   5774 	total = lxon + lxoff;
   5775 
   5776 	if (hw->mac.type != ixgbe_mac_82598EB) {
   5777 		adapter->stats.gorc.ev_count += IXGBE_READ_REG(hw, IXGBE_GORCL) +
   5778 		    ((u64)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32);
   5779 		adapter->stats.gotc.ev_count += IXGBE_READ_REG(hw, IXGBE_GOTCL) +
   5780 		    ((u64)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32) - total * ETHER_MIN_LEN;
   5781 		adapter->stats.tor.ev_count += IXGBE_READ_REG(hw, IXGBE_TORL) +
   5782 		    ((u64)IXGBE_READ_REG(hw, IXGBE_TORH) << 32);
   5783 		adapter->stats.lxonrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
   5784 		adapter->stats.lxoffrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
   5785 	} else {
   5786 		adapter->stats.lxonrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
   5787 		adapter->stats.lxoffrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
   5788 		/* 82598 only has a counter in the high register */
   5789 		adapter->stats.gorc.ev_count += IXGBE_READ_REG(hw, IXGBE_GORCH);
   5790 		adapter->stats.gotc.ev_count += IXGBE_READ_REG(hw, IXGBE_GOTCH) - total * ETHER_MIN_LEN;
   5791 		adapter->stats.tor.ev_count += IXGBE_READ_REG(hw, IXGBE_TORH);
   5792 	}
   5793 
   5794 	/*
   5795 	 * Workaround: mprc hardware is incorrectly counting
   5796 	 * broadcasts, so for now we subtract those.
   5797 	 */
   5798 	bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
   5799 	adapter->stats.bprc.ev_count += bprc;
   5800 	adapter->stats.mprc.ev_count += IXGBE_READ_REG(hw, IXGBE_MPRC) - ((hw->mac.type == ixgbe_mac_82598EB) ? bprc : 0);
   5801 
   5802 	adapter->stats.prc64.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC64);
   5803 	adapter->stats.prc127.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC127);
   5804 	adapter->stats.prc255.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC255);
   5805 	adapter->stats.prc511.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC511);
   5806 	adapter->stats.prc1023.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC1023);
   5807 	adapter->stats.prc1522.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC1522);
   5808 
   5809 	adapter->stats.gptc.ev_count += IXGBE_READ_REG(hw, IXGBE_GPTC) - total;
   5810 	adapter->stats.mptc.ev_count += IXGBE_READ_REG(hw, IXGBE_MPTC) - total;
   5811 	adapter->stats.ptc64.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC64) - total;
   5812 
   5813 	adapter->stats.ruc.ev_count += IXGBE_READ_REG(hw, IXGBE_RUC);
   5814 	adapter->stats.rfc.ev_count += IXGBE_READ_REG(hw, IXGBE_RFC);
   5815 	adapter->stats.roc.ev_count += IXGBE_READ_REG(hw, IXGBE_ROC);
   5816 	adapter->stats.rjc.ev_count += IXGBE_READ_REG(hw, IXGBE_RJC);
   5817 	adapter->stats.mngprc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPRC);
   5818 	adapter->stats.mngpdc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPDC);
   5819 	adapter->stats.mngptc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPTC);
   5820 	adapter->stats.tpr.ev_count += IXGBE_READ_REG(hw, IXGBE_TPR);
   5821 	adapter->stats.tpt.ev_count += IXGBE_READ_REG(hw, IXGBE_TPT);
   5822 	adapter->stats.ptc127.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC127);
   5823 	adapter->stats.ptc255.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC255);
   5824 	adapter->stats.ptc511.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC511);
   5825 	adapter->stats.ptc1023.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC1023);
   5826 	adapter->stats.ptc1522.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC1522);
   5827 	adapter->stats.bptc.ev_count += IXGBE_READ_REG(hw, IXGBE_BPTC);
   5828 	adapter->stats.xec.ev_count += IXGBE_READ_REG(hw, IXGBE_XEC);
   5829 	adapter->stats.fccrc.ev_count += IXGBE_READ_REG(hw, IXGBE_FCCRC);
   5830 	adapter->stats.fclast.ev_count += IXGBE_READ_REG(hw, IXGBE_FCLAST);
   5831 
   5832 	/* Only read FCOE on 82599 */
   5833 	if (hw->mac.type != ixgbe_mac_82598EB) {
   5834 		adapter->stats.fcoerpdc.ev_count +=
   5835 		    IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
   5836 		adapter->stats.fcoeprc.ev_count +=
   5837 		    IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
   5838 		adapter->stats.fcoeptc.ev_count +=
   5839 		    IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
   5840 		adapter->stats.fcoedwrc.ev_count +=
   5841 		    IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
   5842 		adapter->stats.fcoedwtc.ev_count +=
   5843 		    IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
   5844 	}
   5845 
   5846 	/* Fill out the OS statistics structure */
   5847 	/*
   5848 	 * NetBSD: Don't override if_{i|o}{packets|bytes|mcasts} with
   5849 	 * adapter->stats counters. It's required to make ifconfig -z
   5850 	 * (SOICZIFDATA) work.
   5851 	 */
   5852 	ifp->if_collisions = 0;
   5853 
   5854 	/* Rx Errors */
   5855 	ifp->if_iqdrops += total_missed_rx;
   5856 	ifp->if_ierrors += crcerrs + rlec;
   5857 }
   5858 
   5859 /** ixgbe_sysctl_tdh_handler - Handler function
   5860  *  Retrieves the TDH value from the hardware
   5861  */
   5862 static int
   5863 ixgbe_sysctl_tdh_handler(SYSCTLFN_ARGS)
   5864 {
   5865 	struct sysctlnode node;
   5866 	uint32_t val;
   5867 	struct tx_ring *txr;
   5868 
   5869 	node = *rnode;
   5870 	txr = (struct tx_ring *)node.sysctl_data;
   5871 	if (txr == NULL)
   5872 		return 0;
   5873 	val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDH(txr->me));
   5874 	node.sysctl_data = &val;
   5875 	return sysctl_lookup(SYSCTLFN_CALL(&node));
   5876 }
   5877 
   5878 /** ixgbe_sysctl_tdt_handler - Handler function
   5879  *  Retrieves the TDT value from the hardware
   5880  */
   5881 static int
   5882 ixgbe_sysctl_tdt_handler(SYSCTLFN_ARGS)
   5883 {
   5884 	struct sysctlnode node;
   5885 	uint32_t val;
   5886 	struct tx_ring *txr;
   5887 
   5888 	node = *rnode;
   5889 	txr = (struct tx_ring *)node.sysctl_data;
   5890 	if (txr == NULL)
   5891 		return 0;
   5892 	val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDT(txr->me));
   5893 	node.sysctl_data = &val;
   5894 	return sysctl_lookup(SYSCTLFN_CALL(&node));
   5895 }
   5896 
   5897 /** ixgbe_sysctl_rdh_handler - Handler function
   5898  *  Retrieves the RDH value from the hardware
   5899  */
   5900 static int
   5901 ixgbe_sysctl_rdh_handler(SYSCTLFN_ARGS)
   5902 {
   5903 	struct sysctlnode node;
   5904 	uint32_t val;
   5905 	struct rx_ring *rxr;
   5906 
   5907 	node = *rnode;
   5908 	rxr = (struct rx_ring *)node.sysctl_data;
   5909 	if (rxr == NULL)
   5910 		return 0;
   5911 	val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDH(rxr->me));
   5912 	node.sysctl_data = &val;
   5913 	return sysctl_lookup(SYSCTLFN_CALL(&node));
   5914 }
   5915 
   5916 /** ixgbe_sysctl_rdt_handler - Handler function
   5917  *  Retrieves the RDT value from the hardware
   5918  */
   5919 static int
   5920 ixgbe_sysctl_rdt_handler(SYSCTLFN_ARGS)
   5921 {
   5922 	struct sysctlnode node;
   5923 	uint32_t val;
   5924 	struct rx_ring *rxr;
   5925 
   5926 	node = *rnode;
   5927 	rxr = (struct rx_ring *)node.sysctl_data;
   5928 	if (rxr == NULL)
   5929 		return 0;
   5930 	val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDT(rxr->me));
   5931 	node.sysctl_data = &val;
   5932 	return sysctl_lookup(SYSCTLFN_CALL(&node));
   5933 }
   5934 
   5935 static int
   5936 ixgbe_sysctl_interrupt_rate_handler(SYSCTLFN_ARGS)
   5937 {
   5938 	int error;
   5939 	struct sysctlnode node;
   5940 	struct ix_queue *que;
   5941 	uint32_t reg, usec, rate;
   5942 
   5943 	node = *rnode;
   5944 	que = (struct ix_queue *)node.sysctl_data;
   5945 	if (que == NULL)
   5946 		return 0;
   5947 	reg = IXGBE_READ_REG(&que->adapter->hw, IXGBE_EITR(que->msix));
   5948 	usec = ((reg & 0x0FF8) >> 3);
   5949 	if (usec > 0)
   5950 		rate = 500000 / usec;
   5951 	else
   5952 		rate = 0;
   5953 	node.sysctl_data = &rate;
   5954 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   5955 	if (error)
   5956 		return error;
   5957 	reg &= ~0xfff; /* default, no limitation */
   5958 	ixgbe_max_interrupt_rate = 0;
   5959 	if (rate > 0 && rate < 500000) {
   5960 		if (rate < 1000)
   5961 			rate = 1000;
   5962 		ixgbe_max_interrupt_rate = rate;
   5963 		reg |= ((4000000/rate) & 0xff8 );
   5964 	}
   5965 	IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg);
   5966 	return 0;
   5967 }
   5968 
   5969 const struct sysctlnode *
   5970 ixgbe_sysctl_instance(struct adapter *adapter)
   5971 {
   5972 	const char *dvname;
   5973 	struct sysctllog **log;
   5974 	int rc;
   5975 	const struct sysctlnode *rnode;
   5976 
   5977 	log = &adapter->sysctllog;
   5978 	dvname = device_xname(adapter->dev);
   5979 
   5980 	if ((rc = sysctl_createv(log, 0, NULL, &rnode,
   5981 	    0, CTLTYPE_NODE, dvname,
   5982 	    SYSCTL_DESCR("ixgbe information and settings"),
   5983 	    NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL)) != 0)
   5984 		goto err;
   5985 
   5986 	return rnode;
   5987 err:
   5988 	printf("%s: sysctl_createv failed, rc = %d\n", __func__, rc);
   5989 	return NULL;
   5990 }
   5991 
   5992 /*
   5993  * Add sysctl variables, one per statistic, to the system.
   5994  */
   5995 static void
   5996 ixgbe_add_hw_stats(struct adapter *adapter)
   5997 {
   5998 	device_t dev = adapter->dev;
   5999 	const struct sysctlnode *rnode, *cnode;
   6000 	struct sysctllog **log = &adapter->sysctllog;
   6001 	struct tx_ring *txr = adapter->tx_rings;
   6002 	struct rx_ring *rxr = adapter->rx_rings;
   6003 	struct ixgbe_hw_stats *stats = &adapter->stats;
   6004 
   6005 	/* Driver Statistics */
   6006 #if 0
   6007 	/* These counters are not updated by the software */
   6008 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
   6009 			CTLFLAG_RD, &adapter->dropped_pkts,
   6010 			"Driver dropped packets");
   6011 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_header_failed",
   6012 			CTLFLAG_RD, &adapter->mbuf_header_failed,
   6013 			"???");
   6014 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_packet_failed",
   6015 			CTLFLAG_RD, &adapter->mbuf_packet_failed,
   6016 			"???");
   6017 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "no_tx_map_avail",
   6018 			CTLFLAG_RD, &adapter->no_tx_map_avail,
   6019 			"???");
   6020 #endif
   6021 	evcnt_attach_dynamic(&adapter->handleq, EVCNT_TYPE_MISC,
   6022 	    NULL, device_xname(dev), "Handled queue in softint");
   6023 	evcnt_attach_dynamic(&adapter->req, EVCNT_TYPE_MISC,
   6024 	    NULL, device_xname(dev), "Requeued in softint");
   6025 	evcnt_attach_dynamic(&adapter->morerx, EVCNT_TYPE_MISC,
   6026 	    NULL, device_xname(dev), "Interrupt handler more rx");
   6027 	evcnt_attach_dynamic(&adapter->moretx, EVCNT_TYPE_MISC,
   6028 	    NULL, device_xname(dev), "Interrupt handler more tx");
   6029 	evcnt_attach_dynamic(&adapter->txloops, EVCNT_TYPE_MISC,
   6030 	    NULL, device_xname(dev), "Interrupt handler tx loops");
   6031 	evcnt_attach_dynamic(&adapter->efbig_tx_dma_setup, EVCNT_TYPE_MISC,
   6032 	    NULL, device_xname(dev), "Driver tx dma soft fail EFBIG");
   6033 	evcnt_attach_dynamic(&adapter->m_defrag_failed, EVCNT_TYPE_MISC,
   6034 	    NULL, device_xname(dev), "m_defrag() failed");
   6035 	evcnt_attach_dynamic(&adapter->efbig2_tx_dma_setup, EVCNT_TYPE_MISC,
   6036 	    NULL, device_xname(dev), "Driver tx dma hard fail EFBIG");
   6037 	evcnt_attach_dynamic(&adapter->einval_tx_dma_setup, EVCNT_TYPE_MISC,
   6038 	    NULL, device_xname(dev), "Driver tx dma hard fail EINVAL");
   6039 	evcnt_attach_dynamic(&adapter->other_tx_dma_setup, EVCNT_TYPE_MISC,
   6040 	    NULL, device_xname(dev), "Driver tx dma hard fail other");
   6041 	evcnt_attach_dynamic(&adapter->eagain_tx_dma_setup, EVCNT_TYPE_MISC,
   6042 	    NULL, device_xname(dev), "Driver tx dma soft fail EAGAIN");
   6043 	evcnt_attach_dynamic(&adapter->enomem_tx_dma_setup, EVCNT_TYPE_MISC,
   6044 	    NULL, device_xname(dev), "Driver tx dma soft fail ENOMEM");
   6045 	evcnt_attach_dynamic(&adapter->watchdog_events, EVCNT_TYPE_MISC,
   6046 	    NULL, device_xname(dev), "Watchdog timeouts");
   6047 	evcnt_attach_dynamic(&adapter->tso_err, EVCNT_TYPE_MISC,
   6048 	    NULL, device_xname(dev), "TSO errors");
   6049 	evcnt_attach_dynamic(&adapter->link_irq, EVCNT_TYPE_MISC,
   6050 	    NULL, device_xname(dev), "Link MSIX IRQ Handled");
   6051 
   6052 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
   6053 		snprintf(adapter->queues[i].evnamebuf,
   6054 		    sizeof(adapter->queues[i].evnamebuf), "%s queue%d",
   6055 		    device_xname(dev), i);
   6056 		snprintf(adapter->queues[i].namebuf,
   6057 		    sizeof(adapter->queues[i].namebuf), "queue%d", i);
   6058 
   6059 		if ((rnode = ixgbe_sysctl_instance(adapter)) == NULL) {
   6060 			aprint_error_dev(dev, "could not create sysctl root\n");
   6061 			break;
   6062 		}
   6063 
   6064 		if (sysctl_createv(log, 0, &rnode, &rnode,
   6065 		    0, CTLTYPE_NODE,
   6066 		    adapter->queues[i].namebuf, SYSCTL_DESCR("Queue Name"),
   6067 		    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL) != 0)
   6068 			break;
   6069 
   6070 		if (sysctl_createv(log, 0, &rnode, &cnode,
   6071 		    CTLFLAG_READWRITE, CTLTYPE_INT,
   6072 		    "interrupt_rate", SYSCTL_DESCR("Interrupt Rate"),
   6073 		    ixgbe_sysctl_interrupt_rate_handler, 0,
   6074 		    (void *)&adapter->queues[i], 0, CTL_CREATE, CTL_EOL) != 0)
   6075 			break;
   6076 
   6077 		if (sysctl_createv(log, 0, &rnode, &cnode,
   6078 		    CTLFLAG_READONLY, CTLTYPE_QUAD,
   6079 		    "irqs", SYSCTL_DESCR("irqs on this queue"),
   6080 			NULL, 0, &(adapter->queues[i].irqs),
   6081 		    0, CTL_CREATE, CTL_EOL) != 0)
   6082 			break;
   6083 
   6084 		if (sysctl_createv(log, 0, &rnode, &cnode,
   6085 		    CTLFLAG_READONLY, CTLTYPE_INT,
   6086 		    "txd_head", SYSCTL_DESCR("Transmit Descriptor Head"),
   6087 		    ixgbe_sysctl_tdh_handler, 0, (void *)txr,
   6088 		    0, CTL_CREATE, CTL_EOL) != 0)
   6089 			break;
   6090 
   6091 		if (sysctl_createv(log, 0, &rnode, &cnode,
   6092 		    CTLFLAG_READONLY, CTLTYPE_INT,
   6093 		    "txd_tail", SYSCTL_DESCR("Transmit Descriptor Tail"),
   6094 		    ixgbe_sysctl_tdt_handler, 0, (void *)txr,
   6095 		    0, CTL_CREATE, CTL_EOL) != 0)
   6096 			break;
   6097 
   6098 		evcnt_attach_dynamic(&txr->tso_tx, EVCNT_TYPE_MISC,
   6099 		    NULL, device_xname(dev), "TSO");
   6100 		evcnt_attach_dynamic(&txr->no_desc_avail, EVCNT_TYPE_MISC,
   6101 		    NULL, adapter->queues[i].evnamebuf,
   6102 		    "Queue No Descriptor Available");
   6103 		evcnt_attach_dynamic(&txr->total_packets, EVCNT_TYPE_MISC,
   6104 		    NULL, adapter->queues[i].evnamebuf,
   6105 		    "Queue Packets Transmitted");
   6106 
   6107 #ifdef LRO
   6108 		struct lro_ctrl *lro = &rxr->lro;
   6109 #endif /* LRO */
   6110 
   6111 		if (sysctl_createv(log, 0, &rnode, &cnode,
   6112 		    CTLFLAG_READONLY,
   6113 		    CTLTYPE_INT,
   6114 		    "rxd_head", SYSCTL_DESCR("Receive Descriptor Head"),
   6115 		    ixgbe_sysctl_rdh_handler, 0, (void *)rxr, 0,
   6116 		    CTL_CREATE, CTL_EOL) != 0)
   6117 			break;
   6118 
   6119 		if (sysctl_createv(log, 0, &rnode, &cnode,
   6120 		    CTLFLAG_READONLY,
   6121 		    CTLTYPE_INT,
   6122 		    "rxd_tail", SYSCTL_DESCR("Receive Descriptor Tail"),
   6123 		    ixgbe_sysctl_rdt_handler, 0, (void *)rxr, 0,
   6124 		    CTL_CREATE, CTL_EOL) != 0)
   6125 			break;
   6126 
   6127 		if (i < __arraycount(adapter->stats.mpc)) {
   6128 			evcnt_attach_dynamic(&adapter->stats.mpc[i],
   6129 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6130 			    "Missed Packet Count");
   6131 		}
   6132 		if (i < __arraycount(adapter->stats.pxontxc)) {
   6133 			evcnt_attach_dynamic(&adapter->stats.pxontxc[i],
   6134 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6135 			    "pxontxc");
   6136 			evcnt_attach_dynamic(&adapter->stats.pxonrxc[i],
   6137 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6138 			    "pxonrxc");
   6139 			evcnt_attach_dynamic(&adapter->stats.pxofftxc[i],
   6140 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6141 			    "pxofftxc");
   6142 			evcnt_attach_dynamic(&adapter->stats.pxoffrxc[i],
   6143 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6144 			    "pxoffrxc");
   6145 			evcnt_attach_dynamic(&adapter->stats.pxon2offc[i],
   6146 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6147 			    "pxon2offc");
   6148 		}
   6149 		if (i < __arraycount(adapter->stats.qprc)) {
   6150 			evcnt_attach_dynamic(&adapter->stats.qprc[i],
   6151 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6152 			    "qprc");
   6153 			evcnt_attach_dynamic(&adapter->stats.qptc[i],
   6154 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6155 			    "qptc");
   6156 			evcnt_attach_dynamic(&adapter->stats.qbrc[i],
   6157 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6158 			    "qbrc");
   6159 			evcnt_attach_dynamic(&adapter->stats.qbtc[i],
   6160 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6161 			    "qbtc");
   6162 			evcnt_attach_dynamic(&adapter->stats.qprdc[i],
   6163 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6164 			    "qprdc");
   6165 		}
   6166 
   6167 		evcnt_attach_dynamic(&rxr->rx_packets, EVCNT_TYPE_MISC,
   6168 		    NULL, adapter->queues[i].evnamebuf, "Queue Packets Received");
   6169 		evcnt_attach_dynamic(&rxr->rx_bytes, EVCNT_TYPE_MISC,
   6170 		    NULL, adapter->queues[i].evnamebuf, "Queue Bytes Received");
   6171 		evcnt_attach_dynamic(&rxr->rx_copies, EVCNT_TYPE_MISC,
   6172 		    NULL, adapter->queues[i].evnamebuf, "Copied RX Frames");
   6173 		evcnt_attach_dynamic(&rxr->no_jmbuf, EVCNT_TYPE_MISC,
   6174 		    NULL, adapter->queues[i].evnamebuf, "Rx no jumbo mbuf");
   6175 		evcnt_attach_dynamic(&rxr->rx_discarded, EVCNT_TYPE_MISC,
   6176 		    NULL, adapter->queues[i].evnamebuf, "Rx discarded");
   6177 		evcnt_attach_dynamic(&rxr->rx_irq, EVCNT_TYPE_MISC,
   6178 		    NULL, adapter->queues[i].evnamebuf, "Rx interrupts");
   6179 #ifdef LRO
   6180 		SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_queued",
   6181 				CTLFLAG_RD, &lro->lro_queued, 0,
   6182 				"LRO Queued");
   6183 		SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_flushed",
   6184 				CTLFLAG_RD, &lro->lro_flushed, 0,
   6185 				"LRO Flushed");
   6186 #endif /* LRO */
   6187 	}
   6188 
   6189 	/* MAC stats get the own sub node */
   6190 
   6191 
   6192 	snprintf(stats->namebuf,
   6193 	    sizeof(stats->namebuf), "%s MAC Statistics", device_xname(dev));
   6194 
   6195 	evcnt_attach_dynamic(&stats->ipcs, EVCNT_TYPE_MISC, NULL,
   6196 	    stats->namebuf, "rx csum offload - IP");
   6197 	evcnt_attach_dynamic(&stats->l4cs, EVCNT_TYPE_MISC, NULL,
   6198 	    stats->namebuf, "rx csum offload - L4");
   6199 	evcnt_attach_dynamic(&stats->ipcs_bad, EVCNT_TYPE_MISC, NULL,
   6200 	    stats->namebuf, "rx csum offload - IP bad");
   6201 	evcnt_attach_dynamic(&stats->l4cs_bad, EVCNT_TYPE_MISC, NULL,
   6202 	    stats->namebuf, "rx csum offload - L4 bad");
   6203 	evcnt_attach_dynamic(&stats->intzero, EVCNT_TYPE_MISC, NULL,
   6204 	    stats->namebuf, "Interrupt conditions zero");
   6205 	evcnt_attach_dynamic(&stats->legint, EVCNT_TYPE_MISC, NULL,
   6206 	    stats->namebuf, "Legacy interrupts");
   6207 	evcnt_attach_dynamic(&stats->crcerrs, EVCNT_TYPE_MISC, NULL,
   6208 	    stats->namebuf, "CRC Errors");
   6209 	evcnt_attach_dynamic(&stats->illerrc, EVCNT_TYPE_MISC, NULL,
   6210 	    stats->namebuf, "Illegal Byte Errors");
   6211 	evcnt_attach_dynamic(&stats->errbc, EVCNT_TYPE_MISC, NULL,
   6212 	    stats->namebuf, "Byte Errors");
   6213 	evcnt_attach_dynamic(&stats->mspdc, EVCNT_TYPE_MISC, NULL,
   6214 	    stats->namebuf, "MAC Short Packets Discarded");
   6215 	evcnt_attach_dynamic(&stats->mlfc, EVCNT_TYPE_MISC, NULL,
   6216 	    stats->namebuf, "MAC Local Faults");
   6217 	evcnt_attach_dynamic(&stats->mrfc, EVCNT_TYPE_MISC, NULL,
   6218 	    stats->namebuf, "MAC Remote Faults");
   6219 	evcnt_attach_dynamic(&stats->rlec, EVCNT_TYPE_MISC, NULL,
   6220 	    stats->namebuf, "Receive Length Errors");
   6221 	evcnt_attach_dynamic(&stats->lxontxc, EVCNT_TYPE_MISC, NULL,
   6222 	    stats->namebuf, "Link XON Transmitted");
   6223 	evcnt_attach_dynamic(&stats->lxonrxc, EVCNT_TYPE_MISC, NULL,
   6224 	    stats->namebuf, "Link XON Received");
   6225 	evcnt_attach_dynamic(&stats->lxofftxc, EVCNT_TYPE_MISC, NULL,
   6226 	    stats->namebuf, "Link XOFF Transmitted");
   6227 	evcnt_attach_dynamic(&stats->lxoffrxc, EVCNT_TYPE_MISC, NULL,
   6228 	    stats->namebuf, "Link XOFF Received");
   6229 
   6230 	/* Packet Reception Stats */
   6231 	evcnt_attach_dynamic(&stats->tor, EVCNT_TYPE_MISC, NULL,
   6232 	    stats->namebuf, "Total Octets Received");
   6233 	evcnt_attach_dynamic(&stats->gorc, EVCNT_TYPE_MISC, NULL,
   6234 	    stats->namebuf, "Good Octets Received");
   6235 	evcnt_attach_dynamic(&stats->tpr, EVCNT_TYPE_MISC, NULL,
   6236 	    stats->namebuf, "Total Packets Received");
   6237 	evcnt_attach_dynamic(&stats->gprc, EVCNT_TYPE_MISC, NULL,
   6238 	    stats->namebuf, "Good Packets Received");
   6239 	evcnt_attach_dynamic(&stats->mprc, EVCNT_TYPE_MISC, NULL,
   6240 	    stats->namebuf, "Multicast Packets Received");
   6241 	evcnt_attach_dynamic(&stats->bprc, EVCNT_TYPE_MISC, NULL,
   6242 	    stats->namebuf, "Broadcast Packets Received");
   6243 	evcnt_attach_dynamic(&stats->prc64, EVCNT_TYPE_MISC, NULL,
   6244 	    stats->namebuf, "64 byte frames received ");
   6245 	evcnt_attach_dynamic(&stats->prc127, EVCNT_TYPE_MISC, NULL,
   6246 	    stats->namebuf, "65-127 byte frames received");
   6247 	evcnt_attach_dynamic(&stats->prc255, EVCNT_TYPE_MISC, NULL,
   6248 	    stats->namebuf, "128-255 byte frames received");
   6249 	evcnt_attach_dynamic(&stats->prc511, EVCNT_TYPE_MISC, NULL,
   6250 	    stats->namebuf, "256-511 byte frames received");
   6251 	evcnt_attach_dynamic(&stats->prc1023, EVCNT_TYPE_MISC, NULL,
   6252 	    stats->namebuf, "512-1023 byte frames received");
   6253 	evcnt_attach_dynamic(&stats->prc1522, EVCNT_TYPE_MISC, NULL,
   6254 	    stats->namebuf, "1023-1522 byte frames received");
   6255 	evcnt_attach_dynamic(&stats->ruc, EVCNT_TYPE_MISC, NULL,
   6256 	    stats->namebuf, "Receive Undersized");
   6257 	evcnt_attach_dynamic(&stats->rfc, EVCNT_TYPE_MISC, NULL,
   6258 	    stats->namebuf, "Fragmented Packets Received ");
   6259 	evcnt_attach_dynamic(&stats->roc, EVCNT_TYPE_MISC, NULL,
   6260 	    stats->namebuf, "Oversized Packets Received");
   6261 	evcnt_attach_dynamic(&stats->rjc, EVCNT_TYPE_MISC, NULL,
   6262 	    stats->namebuf, "Received Jabber");
   6263 	evcnt_attach_dynamic(&stats->mngprc, EVCNT_TYPE_MISC, NULL,
   6264 	    stats->namebuf, "Management Packets Received");
   6265 	evcnt_attach_dynamic(&stats->xec, EVCNT_TYPE_MISC, NULL,
   6266 	    stats->namebuf, "Checksum Errors");
   6267 
   6268 	/* Packet Transmission Stats */
   6269 	evcnt_attach_dynamic(&stats->gotc, EVCNT_TYPE_MISC, NULL,
   6270 	    stats->namebuf, "Good Octets Transmitted");
   6271 	evcnt_attach_dynamic(&stats->tpt, EVCNT_TYPE_MISC, NULL,
   6272 	    stats->namebuf, "Total Packets Transmitted");
   6273 	evcnt_attach_dynamic(&stats->gptc, EVCNT_TYPE_MISC, NULL,
   6274 	    stats->namebuf, "Good Packets Transmitted");
   6275 	evcnt_attach_dynamic(&stats->bptc, EVCNT_TYPE_MISC, NULL,
   6276 	    stats->namebuf, "Broadcast Packets Transmitted");
   6277 	evcnt_attach_dynamic(&stats->mptc, EVCNT_TYPE_MISC, NULL,
   6278 	    stats->namebuf, "Multicast Packets Transmitted");
   6279 	evcnt_attach_dynamic(&stats->mngptc, EVCNT_TYPE_MISC, NULL,
   6280 	    stats->namebuf, "Management Packets Transmitted");
   6281 	evcnt_attach_dynamic(&stats->ptc64, EVCNT_TYPE_MISC, NULL,
   6282 	    stats->namebuf, "64 byte frames transmitted ");
   6283 	evcnt_attach_dynamic(&stats->ptc127, EVCNT_TYPE_MISC, NULL,
   6284 	    stats->namebuf, "65-127 byte frames transmitted");
   6285 	evcnt_attach_dynamic(&stats->ptc255, EVCNT_TYPE_MISC, NULL,
   6286 	    stats->namebuf, "128-255 byte frames transmitted");
   6287 	evcnt_attach_dynamic(&stats->ptc511, EVCNT_TYPE_MISC, NULL,
   6288 	    stats->namebuf, "256-511 byte frames transmitted");
   6289 	evcnt_attach_dynamic(&stats->ptc1023, EVCNT_TYPE_MISC, NULL,
   6290 	    stats->namebuf, "512-1023 byte frames transmitted");
   6291 	evcnt_attach_dynamic(&stats->ptc1522, EVCNT_TYPE_MISC, NULL,
   6292 	    stats->namebuf, "1024-1522 byte frames transmitted");
   6293 }
   6294 
   6295 /*
   6296 ** Set flow control using sysctl:
   6297 ** Flow control values:
   6298 ** 	0 - off
   6299 **	1 - rx pause
   6300 **	2 - tx pause
   6301 **	3 - full
   6302 */
   6303 static int
   6304 ixgbe_set_flowcntl(SYSCTLFN_ARGS)
   6305 {
   6306 	struct sysctlnode node;
   6307 	int error, last;
   6308 	struct adapter *adapter;
   6309 
   6310 	node = *rnode;
   6311 	adapter = (struct adapter *)node.sysctl_data;
   6312 	node.sysctl_data = &adapter->fc;
   6313 	last = adapter->fc;
   6314 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   6315 	if (error != 0 || newp == NULL)
   6316 		return error;
   6317 
   6318 	/* Don't bother if it's not changed */
   6319 	if (adapter->fc == last)
   6320 		return (0);
   6321 
   6322 	switch (adapter->fc) {
   6323 		case ixgbe_fc_rx_pause:
   6324 		case ixgbe_fc_tx_pause:
   6325 		case ixgbe_fc_full:
   6326 			adapter->hw.fc.requested_mode = adapter->fc;
   6327 			if (adapter->num_queues > 1)
   6328 				ixgbe_disable_rx_drop(adapter);
   6329 			break;
   6330 		case ixgbe_fc_none:
   6331 			adapter->hw.fc.requested_mode = ixgbe_fc_none;
   6332 			if (adapter->num_queues > 1)
   6333 				ixgbe_enable_rx_drop(adapter);
   6334 			break;
   6335 		default:
   6336 			adapter->fc = last;
   6337 			return (EINVAL);
   6338 	}
   6339 	/* Don't autoneg if forcing a value */
   6340 	adapter->hw.fc.disable_fc_autoneg = TRUE;
   6341 	ixgbe_fc_enable(&adapter->hw);
   6342 	return 0;
   6343 }
   6344 
   6345 
   6346 /*
   6347 ** Control link advertise speed:
   6348 **	1 - advertise only 1G
   6349 **	2 - advertise 100Mb
   6350 **	3 - advertise normal
   6351 */
   6352 static int
   6353 ixgbe_set_advertise(SYSCTLFN_ARGS)
   6354 {
   6355 	struct sysctlnode	node;
   6356 	int			t, error = 0;
   6357 	struct adapter		*adapter;
   6358 	device_t		dev;
   6359 	struct ixgbe_hw		*hw;
   6360 	ixgbe_link_speed	speed, last;
   6361 
   6362 	node = *rnode;
   6363 	adapter = (struct adapter *)node.sysctl_data;
   6364 	dev = adapter->dev;
   6365 	hw = &adapter->hw;
   6366 	last = adapter->advertise;
   6367 	t = adapter->advertise;
   6368 	node.sysctl_data = &t;
   6369 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   6370 	if (error != 0 || newp == NULL)
   6371 		return error;
   6372 
   6373 	if (adapter->advertise == last) /* no change */
   6374 		return (0);
   6375 
   6376 	if (t == -1)
   6377 		return 0;
   6378 
   6379 	adapter->advertise = t;
   6380 
   6381 	if (!((hw->phy.media_type == ixgbe_media_type_copper) ||
   6382             (hw->phy.multispeed_fiber)))
   6383 		return (EINVAL);
   6384 
   6385 	if ((adapter->advertise == 2) && (hw->mac.type != ixgbe_mac_X540)) {
   6386 		device_printf(dev, "Set Advertise: 100Mb on X540 only\n");
   6387 		return (EINVAL);
   6388 	}
   6389 
   6390 	if (adapter->advertise == 1)
   6391                 speed = IXGBE_LINK_SPEED_1GB_FULL;
   6392 	else if (adapter->advertise == 2)
   6393                 speed = IXGBE_LINK_SPEED_100_FULL;
   6394 	else if (adapter->advertise == 3)
   6395                 speed = IXGBE_LINK_SPEED_1GB_FULL |
   6396 			IXGBE_LINK_SPEED_10GB_FULL;
   6397 	else {	/* bogus value */
   6398 		adapter->advertise = last;
   6399 		return (EINVAL);
   6400 	}
   6401 
   6402 	hw->mac.autotry_restart = TRUE;
   6403 	hw->mac.ops.setup_link(hw, speed, TRUE);
   6404 
   6405 	return 0;
   6406 }
   6407 
   6408 /*
   6409 ** Thermal Shutdown Trigger
   6410 **   - cause a Thermal Overtemp IRQ
   6411 **   - this now requires firmware enabling
   6412 */
   6413 static int
   6414 ixgbe_set_thermal_test(SYSCTLFN_ARGS)
   6415 {
   6416 	struct sysctlnode node;
   6417 	int		error, fire = 0;
   6418 	struct adapter	*adapter;
   6419 	struct ixgbe_hw *hw;
   6420 
   6421 	node = *rnode;
   6422 	adapter = (struct adapter *)node.sysctl_data;
   6423 	hw = &adapter->hw;
   6424 
   6425 	if (hw->mac.type != ixgbe_mac_X540)
   6426 		return (0);
   6427 
   6428 	node.sysctl_data = &fire;
   6429 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   6430 	if ((error) || (newp == NULL))
   6431 		return (error);
   6432 
   6433 	if (fire) {
   6434 		u32 reg = IXGBE_READ_REG(hw, IXGBE_EICS);
   6435 		reg |= IXGBE_EICR_TS;
   6436 		IXGBE_WRITE_REG(hw, IXGBE_EICS, reg);
   6437 	}
   6438 
   6439 	return (0);
   6440 }
   6441 
   6442 /*
   6443 ** Enable the hardware to drop packets when the buffer is
   6444 ** full. This is useful when multiqueue,so that no single
   6445 ** queue being full stalls the entire RX engine. We only
   6446 ** enable this when Multiqueue AND when Flow Control is
   6447 ** disabled.
   6448 */
   6449 static void
   6450 ixgbe_enable_rx_drop(struct adapter *adapter)
   6451 {
   6452         struct ixgbe_hw *hw = &adapter->hw;
   6453 
   6454 	for (int i = 0; i < adapter->num_queues; i++) {
   6455         	u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
   6456         	srrctl |= IXGBE_SRRCTL_DROP_EN;
   6457         	IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
   6458 	}
   6459 }
   6460 
   6461 static void
   6462 ixgbe_disable_rx_drop(struct adapter *adapter)
   6463 {
   6464         struct ixgbe_hw *hw = &adapter->hw;
   6465 
   6466 	for (int i = 0; i < adapter->num_queues; i++) {
   6467         	u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
   6468         	srrctl &= ~IXGBE_SRRCTL_DROP_EN;
   6469         	IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
   6470 	}
   6471 }
   6472