Home | History | Annotate | Line # | Download | only in ixgbe
ixgbe.c revision 1.14.2.6
      1 /******************************************************************************
      2 
      3   Copyright (c) 2001-2013, Intel Corporation
      4   All rights reserved.
      5 
      6   Redistribution and use in source and binary forms, with or without
      7   modification, are permitted provided that the following conditions are met:
      8 
      9    1. Redistributions of source code must retain the above copyright notice,
     10       this list of conditions and the following disclaimer.
     11 
     12    2. Redistributions in binary form must reproduce the above copyright
     13       notice, this list of conditions and the following disclaimer in the
     14       documentation and/or other materials provided with the distribution.
     15 
     16    3. Neither the name of the Intel Corporation nor the names of its
     17       contributors may be used to endorse or promote products derived from
     18       this software without specific prior written permission.
     19 
     20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     30   POSSIBILITY OF SUCH DAMAGE.
     31 
     32 ******************************************************************************/
     33 /*
     34  * Copyright (c) 2011 The NetBSD Foundation, Inc.
     35  * All rights reserved.
     36  *
     37  * This code is derived from software contributed to The NetBSD Foundation
     38  * by Coyote Point Systems, Inc.
     39  *
     40  * Redistribution and use in source and binary forms, with or without
     41  * modification, are permitted provided that the following conditions
     42  * are met:
     43  * 1. Redistributions of source code must retain the above copyright
     44  *    notice, this list of conditions and the following disclaimer.
     45  * 2. Redistributions in binary form must reproduce the above copyright
     46  *    notice, this list of conditions and the following disclaimer in the
     47  *    documentation and/or other materials provided with the distribution.
     48  *
     49  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     50  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     51  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     52  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     53  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     54  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     55  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     56  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     57  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     58  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     59  * POSSIBILITY OF SUCH DAMAGE.
     60  */
     61 /*$FreeBSD: head/sys/dev/ixgbe/ixgbe.c 279805 2015-03-09 10:29:15Z araujo $*/
     62 /*$NetBSD: ixgbe.c,v 1.14.2.6 2016/06/14 08:42:34 snj Exp $*/
     63 
     64 #include "opt_inet.h"
     65 #include "opt_inet6.h"
     66 
     67 #include "ixgbe.h"
     68 #include "vlan.h"
     69 
     70 #include <sys/cprng.h>
     71 
     72 /*********************************************************************
     73  *  Set this to one to display debug statistics
     74  *********************************************************************/
     75 int             ixgbe_display_debug_stats = 0;
     76 
     77 /*********************************************************************
     78  *  Driver version
     79  *********************************************************************/
     80 char ixgbe_driver_version[] = "2.5.15";
     81 
     82 /*********************************************************************
     83  *  PCI Device ID Table
     84  *
     85  *  Used by probe to select devices to load on
     86  *  Last field stores an index into ixgbe_strings
     87  *  Last entry must be all 0s
     88  *
     89  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
     90  *********************************************************************/
     91 
     92 static ixgbe_vendor_info_t ixgbe_vendor_info_array[] =
     93 {
     94 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, 0, 0, 0},
     95 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, 0, 0, 0},
     96 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, 0, 0, 0},
     97 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, 0, 0, 0},
     98 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2, 0, 0, 0},
     99 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, 0, 0, 0},
    100 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, 0, 0, 0},
    101 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, 0, 0, 0},
    102 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, 0, 0, 0},
    103 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, 0, 0, 0},
    104 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, 0, 0, 0},
    105 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, 0, 0, 0},
    106 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ, 0, 0, 0},
    107 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, 0, 0, 0},
    108 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, 0, 0, 0},
    109 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, 0, 0, 0},
    110 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, 0, 0, 0},
    111 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, 0, 0, 0},
    112 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE, 0, 0, 0},
    113 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2, 0, 0, 0},
    114 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, 0, 0, 0},
    115 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, 0, 0, 0},
    116 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF_QP, 0, 0, 0},
    117 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, 0, 0, 0},
    118 	/* required last entry */
    119 	{0, 0, 0, 0, 0}
    120 };
    121 
    122 /*********************************************************************
    123  *  Table of branding strings
    124  *********************************************************************/
    125 
    126 static const char    *ixgbe_strings[] = {
    127 	"Intel(R) PRO/10GbE PCI-Express Network Driver"
    128 };
    129 
    130 /*********************************************************************
    131  *  Function prototypes
    132  *********************************************************************/
    133 static int      ixgbe_probe(device_t, cfdata_t, void *);
    134 static void     ixgbe_attach(device_t, device_t, void *);
    135 static int      ixgbe_detach(device_t, int);
    136 #if 0
    137 static int      ixgbe_shutdown(device_t);
    138 #endif
    139 #ifdef IXGBE_LEGACY_TX
    140 static void     ixgbe_start(struct ifnet *);
    141 static void     ixgbe_start_locked(struct tx_ring *, struct ifnet *);
    142 #else /* ! IXGBE_LEGACY_TX */
    143 static int	ixgbe_mq_start(struct ifnet *, struct mbuf *);
    144 static int	ixgbe_mq_start_locked(struct ifnet *, struct tx_ring *);
    145 static void	ixgbe_qflush(struct ifnet *);
    146 static void	ixgbe_deferred_mq_start(void *, int);
    147 #endif /* IXGBE_LEGACY_TX */
    148 static int      ixgbe_ioctl(struct ifnet *, u_long, void *);
    149 static void	ixgbe_ifstop(struct ifnet *, int);
    150 static int	ixgbe_init(struct ifnet *);
    151 static void	ixgbe_init_locked(struct adapter *);
    152 static void     ixgbe_stop(void *);
    153 static void     ixgbe_media_status(struct ifnet *, struct ifmediareq *);
    154 static int      ixgbe_media_change(struct ifnet *);
    155 static void     ixgbe_identify_hardware(struct adapter *);
    156 static int      ixgbe_allocate_pci_resources(struct adapter *,
    157 		    const struct pci_attach_args *);
    158 static void	ixgbe_get_slot_info(struct ixgbe_hw *);
    159 static int      ixgbe_allocate_msix(struct adapter *,
    160 		    const struct pci_attach_args *);
    161 static int      ixgbe_allocate_legacy(struct adapter *,
    162 		    const struct pci_attach_args *);
    163 static int	ixgbe_allocate_queues(struct adapter *);
    164 static int	ixgbe_setup_msix(struct adapter *);
    165 static void	ixgbe_free_pci_resources(struct adapter *);
    166 static void	ixgbe_local_timer(void *);
    167 static int	ixgbe_setup_interface(device_t, struct adapter *);
    168 static void	ixgbe_config_link(struct adapter *);
    169 
    170 static int      ixgbe_allocate_transmit_buffers(struct tx_ring *);
    171 static int	ixgbe_setup_transmit_structures(struct adapter *);
    172 static void	ixgbe_setup_transmit_ring(struct tx_ring *);
    173 static void     ixgbe_initialize_transmit_units(struct adapter *);
    174 static void     ixgbe_free_transmit_structures(struct adapter *);
    175 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
    176 
    177 static int      ixgbe_allocate_receive_buffers(struct rx_ring *);
    178 static int      ixgbe_setup_receive_structures(struct adapter *);
    179 static int	ixgbe_setup_receive_ring(struct rx_ring *);
    180 static void     ixgbe_initialize_receive_units(struct adapter *);
    181 static void     ixgbe_free_receive_structures(struct adapter *);
    182 static void     ixgbe_free_receive_buffers(struct rx_ring *);
    183 static void	ixgbe_setup_hw_rsc(struct rx_ring *);
    184 
    185 static void     ixgbe_enable_intr(struct adapter *);
    186 static void     ixgbe_disable_intr(struct adapter *);
    187 static void     ixgbe_update_stats_counters(struct adapter *);
    188 static void	ixgbe_txeof(struct tx_ring *);
    189 static bool	ixgbe_rxeof(struct ix_queue *);
    190 static void	ixgbe_rx_checksum(u32, struct mbuf *, u32,
    191 		    struct ixgbe_hw_stats *);
    192 static void     ixgbe_set_promisc(struct adapter *);
    193 static void     ixgbe_set_multi(struct adapter *);
    194 static void     ixgbe_update_link_status(struct adapter *);
    195 static void	ixgbe_refresh_mbufs(struct rx_ring *, int);
    196 static int      ixgbe_xmit(struct tx_ring *, struct mbuf *);
    197 static int	ixgbe_set_flowcntl(SYSCTLFN_PROTO);
    198 static int	ixgbe_set_advertise(SYSCTLFN_PROTO);
    199 static int	ixgbe_set_thermal_test(SYSCTLFN_PROTO);
    200 static int	ixgbe_dma_malloc(struct adapter *, bus_size_t,
    201 		    struct ixgbe_dma_alloc *, int);
    202 static void     ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
    203 static int	ixgbe_tx_ctx_setup(struct tx_ring *,
    204 		    struct mbuf *, u32 *, u32 *);
    205 static int	ixgbe_tso_setup(struct tx_ring *,
    206 		    struct mbuf *, u32 *, u32 *);
    207 static void	ixgbe_set_ivar(struct adapter *, u8, u8, s8);
    208 static void	ixgbe_configure_ivars(struct adapter *);
    209 static u8 *	ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *);
    210 
    211 static void	ixgbe_setup_vlan_hw_support(struct adapter *);
    212 #if 0
    213 static void	ixgbe_register_vlan(void *, struct ifnet *, u16);
    214 static void	ixgbe_unregister_vlan(void *, struct ifnet *, u16);
    215 #endif
    216 
    217 static void     ixgbe_add_hw_stats(struct adapter *adapter);
    218 
    219 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
    220 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
    221 		    struct mbuf *, u32);
    222 
    223 static void	ixgbe_enable_rx_drop(struct adapter *);
    224 static void	ixgbe_disable_rx_drop(struct adapter *);
    225 
    226 /* Support for pluggable optic modules */
    227 static bool	ixgbe_sfp_probe(struct adapter *);
    228 static void	ixgbe_setup_optics(struct adapter *);
    229 
    230 /* Legacy (single vector interrupt handler */
    231 static int	ixgbe_legacy_irq(void *);
    232 
    233 #if defined(NETBSD_MSI_OR_MSIX)
    234 /* The MSI/X Interrupt handlers */
    235 static void	ixgbe_msix_que(void *);
    236 static void	ixgbe_msix_link(void *);
    237 #endif
    238 
    239 /* Software interrupts for deferred work */
    240 static void	ixgbe_handle_que(void *);
    241 static void	ixgbe_handle_link(void *);
    242 static void	ixgbe_handle_msf(void *);
    243 static void	ixgbe_handle_mod(void *);
    244 
    245 const struct sysctlnode *ixgbe_sysctl_instance(struct adapter *);
    246 static ixgbe_vendor_info_t *ixgbe_lookup(const struct pci_attach_args *);
    247 
    248 #ifdef IXGBE_FDIR
    249 static void	ixgbe_atr(struct tx_ring *, struct mbuf *);
    250 static void	ixgbe_reinit_fdir(void *, int);
    251 #endif
    252 
    253 /* Missing shared code prototype */
    254 extern void ixgbe_stop_mac_link_on_d3_82599(struct ixgbe_hw *hw);
    255 
    256 /*********************************************************************
    257  *  FreeBSD Device Interface Entry Points
    258  *********************************************************************/
    259 
    260 CFATTACH_DECL3_NEW(ixg, sizeof(struct adapter),
    261     ixgbe_probe, ixgbe_attach, ixgbe_detach, NULL, NULL, NULL,
    262     DVF_DETACH_SHUTDOWN);
    263 
    264 #if 0
    265 devclass_t ixgbe_devclass;
    266 DRIVER_MODULE(ixgbe, pci, ixgbe_driver, ixgbe_devclass, 0, 0);
    267 
    268 MODULE_DEPEND(ixgbe, pci, 1, 1, 1);
    269 MODULE_DEPEND(ixgbe, ether, 1, 1, 1);
    270 #endif
    271 
    272 /*
    273 ** TUNEABLE PARAMETERS:
    274 */
    275 
    276 /*
    277 ** AIM: Adaptive Interrupt Moderation
    278 ** which means that the interrupt rate
    279 ** is varied over time based on the
    280 ** traffic for that interrupt vector
    281 */
    282 static int ixgbe_enable_aim = TRUE;
    283 #define SYSCTL_INT(__x, __y)
    284 SYSCTL_INT("hw.ixgbe.enable_aim", &ixgbe_enable_aim);
    285 
    286 static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY);
    287 SYSCTL_INT("hw.ixgbe.max_interrupt_rate", &ixgbe_max_interrupt_rate);
    288 
    289 /* How many packets rxeof tries to clean at a time */
    290 static int ixgbe_rx_process_limit = 256;
    291 SYSCTL_INT("hw.ixgbe.rx_process_limit", &ixgbe_rx_process_limit);
    292 
    293 /* How many packets txeof tries to clean at a time */
    294 static int ixgbe_tx_process_limit = 256;
    295 SYSCTL_INT("hw.ixgbe.tx_process_limit", &ixgbe_tx_process_limit);
    296 
    297 /*
    298 ** Smart speed setting, default to on
    299 ** this only works as a compile option
    300 ** right now as its during attach, set
    301 ** this to 'ixgbe_smart_speed_off' to
    302 ** disable.
    303 */
    304 static int ixgbe_smart_speed = ixgbe_smart_speed_on;
    305 
    306 /*
    307  * MSIX should be the default for best performance,
    308  * but this allows it to be forced off for testing.
    309  */
    310 static int ixgbe_enable_msix = 1;
    311 SYSCTL_INT("hw.ixgbe.enable_msix", &ixgbe_enable_msix);
    312 
    313 #if defined(NETBSD_MSI_OR_MSIX)
    314 /*
    315  * Number of Queues, can be set to 0,
    316  * it then autoconfigures based on the
    317  * number of cpus with a max of 8. This
    318  * can be overriden manually here.
    319  */
    320 static int ixgbe_num_queues = 0;
    321 SYSCTL_INT("hw.ixgbe.num_queues", &ixgbe_num_queues);
    322 #endif
    323 
    324 /*
    325 ** Number of TX descriptors per ring,
    326 ** setting higher than RX as this seems
    327 ** the better performing choice.
    328 */
    329 static int ixgbe_txd = PERFORM_TXD;
    330 SYSCTL_INT("hw.ixgbe.txd", &ixgbe_txd);
    331 
    332 /* Number of RX descriptors per ring */
    333 static int ixgbe_rxd = PERFORM_RXD;
    334 SYSCTL_INT("hw.ixgbe.rxd", &ixgbe_rxd);
    335 
    336 /*
    337 ** Defining this on will allow the use
    338 ** of unsupported SFP+ modules, note that
    339 ** doing so you are on your own :)
    340 */
    341 static int allow_unsupported_sfp = false;
    342 SYSCTL_INT("hw.ix.unsupported_sfp", &allow_unsupported_sfp);
    343 
    344 /*
    345 ** HW RSC control:
    346 **  this feature only works with
    347 **  IPv4, and only on 82599 and later.
    348 **  Also this will cause IP forwarding to
    349 **  fail and that can't be controlled by
    350 **  the stack as LRO can. For all these
    351 **  reasons I've deemed it best to leave
    352 **  this off and not bother with a tuneable
    353 **  interface, this would need to be compiled
    354 **  to enable.
    355 */
    356 static bool ixgbe_rsc_enable = FALSE;
    357 
    358 /* Keep running tab on them for sanity check */
    359 static int ixgbe_total_ports;
    360 
    361 #ifdef IXGBE_FDIR
    362 /*
    363 ** For Flow Director: this is the
    364 ** number of TX packets we sample
    365 ** for the filter pool, this means
    366 ** every 20th packet will be probed.
    367 **
    368 ** This feature can be disabled by
    369 ** setting this to 0.
    370 */
    371 static int atr_sample_rate = 20;
    372 /*
    373 ** Flow Director actually 'steals'
    374 ** part of the packet buffer as its
    375 ** filter pool, this variable controls
    376 ** how much it uses:
    377 **  0 = 64K, 1 = 128K, 2 = 256K
    378 */
    379 static int fdir_pballoc = 1;
    380 #endif
    381 
    382 #ifdef DEV_NETMAP
    383 /*
    384  * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
    385  * be a reference on how to implement netmap support in a driver.
    386  * Additional comments are in ixgbe_netmap.h .
    387  *
    388  * <dev/netmap/ixgbe_netmap.h> contains functions for netmap support
    389  * that extend the standard driver.
    390  */
    391 #include <dev/netmap/ixgbe_netmap.h>
    392 #endif /* DEV_NETMAP */
    393 
    394 /*********************************************************************
    395  *  Device identification routine
    396  *
    397  *  ixgbe_probe determines if the driver should be loaded on
    398  *  adapter based on PCI vendor/device id of the adapter.
    399  *
    400  *  return 1 on success, 0 on failure
    401  *********************************************************************/
    402 
    403 static int
    404 ixgbe_probe(device_t dev, cfdata_t cf, void *aux)
    405 {
    406 	const struct pci_attach_args *pa = aux;
    407 
    408 	return (ixgbe_lookup(pa) != NULL) ? 1 : 0;
    409 }
    410 
    411 static ixgbe_vendor_info_t *
    412 ixgbe_lookup(const struct pci_attach_args *pa)
    413 {
    414 	pcireg_t subid;
    415 	ixgbe_vendor_info_t *ent;
    416 
    417 	INIT_DEBUGOUT("ixgbe_probe: begin");
    418 
    419 	if (PCI_VENDOR(pa->pa_id) != IXGBE_INTEL_VENDOR_ID)
    420 		return NULL;
    421 
    422 	subid = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_SUBSYS_ID_REG);
    423 
    424 	for (ent = ixgbe_vendor_info_array; ent->vendor_id != 0; ent++) {
    425 		if (PCI_VENDOR(pa->pa_id) == ent->vendor_id &&
    426 		    PCI_PRODUCT(pa->pa_id) == ent->device_id &&
    427 
    428 		    (PCI_SUBSYS_VENDOR(subid) == ent->subvendor_id ||
    429 		     ent->subvendor_id == 0) &&
    430 
    431 		    (PCI_SUBSYS_ID(subid) == ent->subdevice_id ||
    432 		     ent->subdevice_id == 0)) {
    433 			++ixgbe_total_ports;
    434 			return ent;
    435 		}
    436 	}
    437 	return NULL;
    438 }
    439 
    440 
    441 static void
    442 ixgbe_sysctl_attach(struct adapter *adapter)
    443 {
    444 	struct sysctllog **log;
    445 	const struct sysctlnode *rnode, *cnode;
    446 	device_t dev;
    447 
    448 	dev = adapter->dev;
    449 	log = &adapter->sysctllog;
    450 
    451 	if ((rnode = ixgbe_sysctl_instance(adapter)) == NULL) {
    452 		aprint_error_dev(dev, "could not create sysctl root\n");
    453 		return;
    454 	}
    455 
    456 	if (sysctl_createv(log, 0, &rnode, &cnode,
    457 	    CTLFLAG_READONLY, CTLTYPE_INT,
    458 	    "num_rx_desc", SYSCTL_DESCR("Number of rx descriptors"),
    459 	    NULL, 0, &adapter->num_rx_desc, 0, CTL_CREATE, CTL_EOL) != 0)
    460 		aprint_error_dev(dev, "could not create sysctl\n");
    461 
    462 	if (sysctl_createv(log, 0, &rnode, &cnode,
    463 	    CTLFLAG_READONLY, CTLTYPE_INT,
    464 	    "num_queues", SYSCTL_DESCR("Number of queues"),
    465 	    NULL, 0, &adapter->num_queues, 0, CTL_CREATE, CTL_EOL) != 0)
    466 		aprint_error_dev(dev, "could not create sysctl\n");
    467 
    468 	if (sysctl_createv(log, 0, &rnode, &cnode,
    469 	    CTLFLAG_READWRITE, CTLTYPE_INT,
    470 	    "fc", SYSCTL_DESCR("Flow Control"),
    471 	    ixgbe_set_flowcntl, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0)
    472 		aprint_error_dev(dev, "could not create sysctl\n");
    473 
    474 	/* XXX This is an *instance* sysctl controlling a *global* variable.
    475 	 * XXX It's that way in the FreeBSD driver that this derives from.
    476 	 */
    477 	if (sysctl_createv(log, 0, &rnode, &cnode,
    478 	    CTLFLAG_READWRITE, CTLTYPE_INT,
    479 	    "enable_aim", SYSCTL_DESCR("Interrupt Moderation"),
    480 	    NULL, 0, &ixgbe_enable_aim, 0, CTL_CREATE, CTL_EOL) != 0)
    481 		aprint_error_dev(dev, "could not create sysctl\n");
    482 
    483 	if (sysctl_createv(log, 0, &rnode, &cnode,
    484 	    CTLFLAG_READWRITE, CTLTYPE_INT,
    485 	    "advertise_speed", SYSCTL_DESCR("Link Speed"),
    486 	    ixgbe_set_advertise, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0)
    487 		aprint_error_dev(dev, "could not create sysctl\n");
    488 
    489 	if (sysctl_createv(log, 0, &rnode, &cnode,
    490 	    CTLFLAG_READWRITE, CTLTYPE_INT,
    491 	    "ts", SYSCTL_DESCR("Thermal Test"),
    492 	    ixgbe_set_thermal_test, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0)
    493 		aprint_error_dev(dev, "could not create sysctl\n");
    494 }
    495 
    496 /*********************************************************************
    497  *  Device initialization routine
    498  *
    499  *  The attach entry point is called when the driver is being loaded.
    500  *  This routine identifies the type of hardware, allocates all resources
    501  *  and initializes the hardware.
    502  *
    503  *  return 0 on success, positive on failure
    504  *********************************************************************/
    505 
    506 static void
    507 ixgbe_attach(device_t parent, device_t dev, void *aux)
    508 {
    509 	struct adapter *adapter;
    510 	struct ixgbe_hw *hw;
    511 	int             error = 0;
    512 	u16		csum;
    513 	u32		ctrl_ext;
    514 	ixgbe_vendor_info_t *ent;
    515 	const struct pci_attach_args *pa = aux;
    516 
    517 	INIT_DEBUGOUT("ixgbe_attach: begin");
    518 
    519 	/* Allocate, clear, and link in our adapter structure */
    520 	adapter = device_private(dev);
    521 	adapter->dev = adapter->osdep.dev = dev;
    522 	hw = &adapter->hw;
    523 	adapter->osdep.pc = pa->pa_pc;
    524 	adapter->osdep.tag = pa->pa_tag;
    525 	adapter->osdep.dmat = pa->pa_dmat;
    526 	adapter->osdep.attached = false;
    527 
    528 	ent = ixgbe_lookup(pa);
    529 
    530 	KASSERT(ent != NULL);
    531 
    532 	aprint_normal(": %s, Version - %s\n",
    533 	    ixgbe_strings[ent->index], ixgbe_driver_version);
    534 
    535 	/* Core Lock Init*/
    536 	IXGBE_CORE_LOCK_INIT(adapter, device_xname(dev));
    537 
    538 	/* SYSCTL APIs */
    539 
    540 	ixgbe_sysctl_attach(adapter);
    541 
    542 	/* Set up the timer callout */
    543 	callout_init(&adapter->timer, 0);
    544 
    545 	/* Determine hardware revision */
    546 	ixgbe_identify_hardware(adapter);
    547 
    548 	/* Do base PCI setup - map BAR0 */
    549 	if (ixgbe_allocate_pci_resources(adapter, pa)) {
    550 		aprint_error_dev(dev, "Allocation of PCI resources failed\n");
    551 		error = ENXIO;
    552 		goto err_out;
    553 	}
    554 
    555 	/* Do descriptor calc and sanity checks */
    556 	if (((ixgbe_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 ||
    557 	    ixgbe_txd < MIN_TXD || ixgbe_txd > MAX_TXD) {
    558 		aprint_error_dev(dev, "TXD config issue, using default!\n");
    559 		adapter->num_tx_desc = DEFAULT_TXD;
    560 	} else
    561 		adapter->num_tx_desc = ixgbe_txd;
    562 
    563 	/*
    564 	** With many RX rings it is easy to exceed the
    565 	** system mbuf allocation. Tuning nmbclusters
    566 	** can alleviate this.
    567 	*/
    568 	if (nmbclusters > 0 ) {
    569 		int s;
    570 		s = (ixgbe_rxd * adapter->num_queues) * ixgbe_total_ports;
    571 		if (s > nmbclusters) {
    572 			aprint_error_dev(dev, "RX Descriptors exceed "
    573 			    "system mbuf max, using default instead!\n");
    574 			ixgbe_rxd = DEFAULT_RXD;
    575 		}
    576 	}
    577 
    578 	if (((ixgbe_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 ||
    579 	    ixgbe_rxd < MIN_RXD || ixgbe_rxd > MAX_RXD) {
    580 		aprint_error_dev(dev, "RXD config issue, using default!\n");
    581 		adapter->num_rx_desc = DEFAULT_RXD;
    582 	} else
    583 		adapter->num_rx_desc = ixgbe_rxd;
    584 
    585 	/* Allocate our TX/RX Queues */
    586 	if (ixgbe_allocate_queues(adapter)) {
    587 		error = ENOMEM;
    588 		goto err_out;
    589 	}
    590 
    591 	/* Allocate multicast array memory. */
    592 	adapter->mta = malloc(sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
    593 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
    594 	if (adapter->mta == NULL) {
    595 		aprint_error_dev(dev, "Cannot allocate multicast setup array\n");
    596 		error = ENOMEM;
    597 		goto err_late;
    598 	}
    599 
    600 	/* Initialize the shared code */
    601 	hw->allow_unsupported_sfp = allow_unsupported_sfp;
    602 	error = ixgbe_init_shared_code(hw);
    603 	if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
    604 		/*
    605 		** No optics in this port, set up
    606 		** so the timer routine will probe
    607 		** for later insertion.
    608 		*/
    609 		adapter->sfp_probe = TRUE;
    610 		error = 0;
    611 	} else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
    612 		aprint_error_dev(dev,"Unsupported SFP+ module detected!\n");
    613 		error = EIO;
    614 		goto err_late;
    615 	} else if (error) {
    616 		aprint_error_dev(dev,"Unable to initialize the shared code\n");
    617 		error = EIO;
    618 		goto err_late;
    619 	}
    620 
    621 	/* Make sure we have a good EEPROM before we read from it */
    622 	if (ixgbe_validate_eeprom_checksum(&adapter->hw, &csum) < 0) {
    623 		aprint_error_dev(dev,"The EEPROM Checksum Is Not Valid\n");
    624 		error = EIO;
    625 		goto err_late;
    626 	}
    627 
    628 	error = ixgbe_init_hw(hw);
    629 	switch (error) {
    630 	case IXGBE_ERR_EEPROM_VERSION:
    631 		aprint_error_dev(dev, "This device is a pre-production adapter/"
    632 		    "LOM.  Please be aware there may be issues associated "
    633 		    "with your hardware.\n If you are experiencing problems "
    634 		    "please contact your Intel or hardware representative "
    635 		    "who provided you with this hardware.\n");
    636 		break;
    637 	case IXGBE_ERR_SFP_NOT_SUPPORTED:
    638 		aprint_error_dev(dev,"Unsupported SFP+ Module\n");
    639 		error = EIO;
    640 		aprint_error_dev(dev,"Hardware Initialization Failure\n");
    641 		goto err_late;
    642 	case IXGBE_ERR_SFP_NOT_PRESENT:
    643 		device_printf(dev,"No SFP+ Module found\n");
    644 		/* falls thru */
    645 	default:
    646 		break;
    647 	}
    648 
    649 	/* Detect and set physical type */
    650 	ixgbe_setup_optics(adapter);
    651 
    652 	if ((adapter->msix > 1) && (ixgbe_enable_msix))
    653 		error = ixgbe_allocate_msix(adapter, pa);
    654 	else
    655 		error = ixgbe_allocate_legacy(adapter, pa);
    656 	if (error)
    657 		goto err_late;
    658 
    659 	/* Setup OS specific network interface */
    660 	if (ixgbe_setup_interface(dev, adapter) != 0)
    661 		goto err_late;
    662 
    663 	/* Initialize statistics */
    664 	ixgbe_update_stats_counters(adapter);
    665 
    666 	/*
    667 	** Check PCIE slot type/speed/width
    668 	*/
    669 	ixgbe_get_slot_info(hw);
    670 
    671 	/* Set an initial default flow control value */
    672 	adapter->fc =  ixgbe_fc_full;
    673 
    674 	/* let hardware know driver is loaded */
    675 	ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
    676 	ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
    677 	IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
    678 
    679 	ixgbe_add_hw_stats(adapter);
    680 
    681 #ifdef DEV_NETMAP
    682 	ixgbe_netmap_attach(adapter);
    683 #endif /* DEV_NETMAP */
    684 	INIT_DEBUGOUT("ixgbe_attach: end");
    685 	adapter->osdep.attached = true;
    686 	return;
    687 err_late:
    688 	ixgbe_free_transmit_structures(adapter);
    689 	ixgbe_free_receive_structures(adapter);
    690 err_out:
    691 	if (adapter->ifp != NULL)
    692 		if_free(adapter->ifp);
    693 	ixgbe_free_pci_resources(adapter);
    694 	if (adapter->mta != NULL)
    695 		free(adapter->mta, M_DEVBUF);
    696 	return;
    697 
    698 }
    699 
    700 /*********************************************************************
    701  *  Device removal routine
    702  *
    703  *  The detach entry point is called when the driver is being removed.
    704  *  This routine stops the adapter and deallocates all the resources
    705  *  that were allocated for driver operation.
    706  *
    707  *  return 0 on success, positive on failure
    708  *********************************************************************/
    709 
    710 static int
    711 ixgbe_detach(device_t dev, int flags)
    712 {
    713 	struct adapter *adapter = device_private(dev);
    714 	struct rx_ring *rxr = adapter->rx_rings;
    715 	struct ixgbe_hw_stats *stats = &adapter->stats;
    716 	struct ix_queue *que = adapter->queues;
    717 	struct tx_ring *txr = adapter->tx_rings;
    718 	u32	ctrl_ext;
    719 
    720 	INIT_DEBUGOUT("ixgbe_detach: begin");
    721 	if (adapter->osdep.attached == false)
    722 		return 0;
    723 
    724 #if NVLAN > 0
    725 	/* Make sure VLANs are not using driver */
    726 	if (!VLAN_ATTACHED(&adapter->osdep.ec))
    727 		;	/* nothing to do: no VLANs */
    728 	else if ((flags & (DETACH_SHUTDOWN|DETACH_FORCE)) != 0)
    729 		vlan_ifdetach(adapter->ifp);
    730 	else {
    731 		aprint_error_dev(dev, "VLANs in use\n");
    732 		return EBUSY;
    733 	}
    734 #endif
    735 
    736 	IXGBE_CORE_LOCK(adapter);
    737 	ixgbe_stop(adapter);
    738 	IXGBE_CORE_UNLOCK(adapter);
    739 
    740 	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
    741 #ifndef IXGBE_LEGACY_TX
    742 		softint_disestablish(txr->txq_si);
    743 #endif
    744 		softint_disestablish(que->que_si);
    745 	}
    746 
    747 	/* Drain the Link queue */
    748 	softint_disestablish(adapter->link_si);
    749 	softint_disestablish(adapter->mod_si);
    750 	softint_disestablish(adapter->msf_si);
    751 #ifdef IXGBE_FDIR
    752 	softint_disestablish(adapter->fdir_si);
    753 #endif
    754 
    755 	/* let hardware know driver is unloading */
    756 	ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
    757 	ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
    758 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext);
    759 
    760 	ether_ifdetach(adapter->ifp);
    761 	callout_halt(&adapter->timer, NULL);
    762 #ifdef DEV_NETMAP
    763 	netmap_detach(adapter->ifp);
    764 #endif /* DEV_NETMAP */
    765 	ixgbe_free_pci_resources(adapter);
    766 #if 0	/* XXX the NetBSD port is probably missing something here */
    767 	bus_generic_detach(dev);
    768 #endif
    769 	if_detach(adapter->ifp);
    770 
    771 	sysctl_teardown(&adapter->sysctllog);
    772 	evcnt_detach(&adapter->handleq);
    773 	evcnt_detach(&adapter->req);
    774 	evcnt_detach(&adapter->morerx);
    775 	evcnt_detach(&adapter->moretx);
    776 	evcnt_detach(&adapter->txloops);
    777 	evcnt_detach(&adapter->efbig_tx_dma_setup);
    778 	evcnt_detach(&adapter->m_defrag_failed);
    779 	evcnt_detach(&adapter->efbig2_tx_dma_setup);
    780 	evcnt_detach(&adapter->einval_tx_dma_setup);
    781 	evcnt_detach(&adapter->other_tx_dma_setup);
    782 	evcnt_detach(&adapter->eagain_tx_dma_setup);
    783 	evcnt_detach(&adapter->enomem_tx_dma_setup);
    784 	evcnt_detach(&adapter->watchdog_events);
    785 	evcnt_detach(&adapter->tso_err);
    786 	evcnt_detach(&adapter->link_irq);
    787 
    788 	txr = adapter->tx_rings;
    789 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
    790 		evcnt_detach(&txr->no_desc_avail);
    791 		evcnt_detach(&txr->total_packets);
    792 		evcnt_detach(&txr->tso_tx);
    793 
    794 		if (i < __arraycount(adapter->stats.mpc)) {
    795 			evcnt_detach(&adapter->stats.mpc[i]);
    796 		}
    797 		if (i < __arraycount(adapter->stats.pxontxc)) {
    798 			evcnt_detach(&adapter->stats.pxontxc[i]);
    799 			evcnt_detach(&adapter->stats.pxonrxc[i]);
    800 			evcnt_detach(&adapter->stats.pxofftxc[i]);
    801 			evcnt_detach(&adapter->stats.pxoffrxc[i]);
    802 			evcnt_detach(&adapter->stats.pxon2offc[i]);
    803 		}
    804 		if (i < __arraycount(adapter->stats.qprc)) {
    805 			evcnt_detach(&adapter->stats.qprc[i]);
    806 			evcnt_detach(&adapter->stats.qptc[i]);
    807 			evcnt_detach(&adapter->stats.qbrc[i]);
    808 			evcnt_detach(&adapter->stats.qbtc[i]);
    809 			evcnt_detach(&adapter->stats.qprdc[i]);
    810 		}
    811 
    812 		evcnt_detach(&rxr->rx_packets);
    813 		evcnt_detach(&rxr->rx_bytes);
    814 		evcnt_detach(&rxr->rx_copies);
    815 		evcnt_detach(&rxr->no_jmbuf);
    816 		evcnt_detach(&rxr->rx_discarded);
    817 		evcnt_detach(&rxr->rx_irq);
    818 	}
    819 	evcnt_detach(&stats->ipcs);
    820 	evcnt_detach(&stats->l4cs);
    821 	evcnt_detach(&stats->ipcs_bad);
    822 	evcnt_detach(&stats->l4cs_bad);
    823 	evcnt_detach(&stats->intzero);
    824 	evcnt_detach(&stats->legint);
    825 	evcnt_detach(&stats->crcerrs);
    826 	evcnt_detach(&stats->illerrc);
    827 	evcnt_detach(&stats->errbc);
    828 	evcnt_detach(&stats->mspdc);
    829 	evcnt_detach(&stats->mlfc);
    830 	evcnt_detach(&stats->mrfc);
    831 	evcnt_detach(&stats->rlec);
    832 	evcnt_detach(&stats->lxontxc);
    833 	evcnt_detach(&stats->lxonrxc);
    834 	evcnt_detach(&stats->lxofftxc);
    835 	evcnt_detach(&stats->lxoffrxc);
    836 
    837 	/* Packet Reception Stats */
    838 	evcnt_detach(&stats->tor);
    839 	evcnt_detach(&stats->gorc);
    840 	evcnt_detach(&stats->tpr);
    841 	evcnt_detach(&stats->gprc);
    842 	evcnt_detach(&stats->mprc);
    843 	evcnt_detach(&stats->bprc);
    844 	evcnt_detach(&stats->prc64);
    845 	evcnt_detach(&stats->prc127);
    846 	evcnt_detach(&stats->prc255);
    847 	evcnt_detach(&stats->prc511);
    848 	evcnt_detach(&stats->prc1023);
    849 	evcnt_detach(&stats->prc1522);
    850 	evcnt_detach(&stats->ruc);
    851 	evcnt_detach(&stats->rfc);
    852 	evcnt_detach(&stats->roc);
    853 	evcnt_detach(&stats->rjc);
    854 	evcnt_detach(&stats->mngprc);
    855 	evcnt_detach(&stats->xec);
    856 
    857 	/* Packet Transmission Stats */
    858 	evcnt_detach(&stats->gotc);
    859 	evcnt_detach(&stats->tpt);
    860 	evcnt_detach(&stats->gptc);
    861 	evcnt_detach(&stats->bptc);
    862 	evcnt_detach(&stats->mptc);
    863 	evcnt_detach(&stats->mngptc);
    864 	evcnt_detach(&stats->ptc64);
    865 	evcnt_detach(&stats->ptc127);
    866 	evcnt_detach(&stats->ptc255);
    867 	evcnt_detach(&stats->ptc511);
    868 	evcnt_detach(&stats->ptc1023);
    869 	evcnt_detach(&stats->ptc1522);
    870 
    871 	ixgbe_free_transmit_structures(adapter);
    872 	ixgbe_free_receive_structures(adapter);
    873 	free(adapter->mta, M_DEVBUF);
    874 
    875 	IXGBE_CORE_LOCK_DESTROY(adapter);
    876 	return (0);
    877 }
    878 
    879 /*********************************************************************
    880  *
    881  *  Shutdown entry point
    882  *
    883  **********************************************************************/
    884 
    885 #if 0 /* XXX NetBSD ought to register something like this through pmf(9) */
    886 static int
    887 ixgbe_shutdown(device_t dev)
    888 {
    889 	struct adapter *adapter = device_private(dev);
    890 	IXGBE_CORE_LOCK(adapter);
    891 	ixgbe_stop(adapter);
    892 	IXGBE_CORE_UNLOCK(adapter);
    893 	return (0);
    894 }
    895 #endif
    896 
    897 
    898 #ifdef IXGBE_LEGACY_TX
    899 /*********************************************************************
    900  *  Transmit entry point
    901  *
    902  *  ixgbe_start is called by the stack to initiate a transmit.
    903  *  The driver will remain in this routine as long as there are
    904  *  packets to transmit and transmit resources are available.
    905  *  In case resources are not available stack is notified and
    906  *  the packet is requeued.
    907  **********************************************************************/
    908 
    909 static void
    910 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
    911 {
    912 	int rc;
    913 	struct mbuf    *m_head;
    914 	struct adapter *adapter = txr->adapter;
    915 
    916 	IXGBE_TX_LOCK_ASSERT(txr);
    917 
    918 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    919 		return;
    920 	if (!adapter->link_active)
    921 		return;
    922 
    923 	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    924 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
    925 			break;
    926 
    927 		IFQ_POLL(&ifp->if_snd, m_head);
    928 		if (m_head == NULL)
    929 			break;
    930 
    931 		if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
    932 			break;
    933 		}
    934 		IFQ_DEQUEUE(&ifp->if_snd, m_head);
    935 		if (rc == EFBIG) {
    936 			struct mbuf *mtmp;
    937 
    938 			if ((mtmp = m_defrag(m_head, M_NOWAIT)) != NULL) {
    939 				m_head = mtmp;
    940 				rc = ixgbe_xmit(txr, m_head);
    941 				if (rc != 0)
    942 					adapter->efbig2_tx_dma_setup.ev_count++;
    943 			} else
    944 				adapter->m_defrag_failed.ev_count++;
    945 		}
    946 		if (rc != 0) {
    947 			m_freem(m_head);
    948 			continue;
    949 		}
    950 
    951 		/* Send a copy of the frame to the BPF listener */
    952 		bpf_mtap(ifp, m_head);
    953 
    954 		/* Set watchdog on */
    955 		getmicrotime(&txr->watchdog_time);
    956 		txr->queue_status = IXGBE_QUEUE_WORKING;
    957 
    958 	}
    959 	return;
    960 }
    961 
    962 /*
    963  * Legacy TX start - called by the stack, this
    964  * always uses the first tx ring, and should
    965  * not be used with multiqueue tx enabled.
    966  */
    967 static void
    968 ixgbe_start(struct ifnet *ifp)
    969 {
    970 	struct adapter *adapter = ifp->if_softc;
    971 	struct tx_ring	*txr = adapter->tx_rings;
    972 
    973 	if (ifp->if_flags & IFF_RUNNING) {
    974 		IXGBE_TX_LOCK(txr);
    975 		ixgbe_start_locked(txr, ifp);
    976 		IXGBE_TX_UNLOCK(txr);
    977 	}
    978 	return;
    979 }
    980 
    981 #else /* ! IXGBE_LEGACY_TX */
    982 
    983 /*
    984 ** Multiqueue Transmit driver
    985 **
    986 */
    987 static int
    988 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
    989 {
    990 	struct adapter	*adapter = ifp->if_softc;
    991 	struct ix_queue	*que;
    992 	struct tx_ring	*txr;
    993 	int 		i, err = 0;
    994 #ifdef	RSS
    995 	uint32_t bucket_id;
    996 #endif
    997 
    998 	/* Which queue to use */
    999 	/*
   1000 	 * When doing RSS, map it to the same outbound queue
   1001 	 * as the incoming flow would be mapped to.
   1002 	 *
   1003 	 * If everything is setup correctly, it should be the
   1004 	 * same bucket that the current CPU we're on is.
   1005 	 */
   1006 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
   1007 #ifdef	RSS
   1008 		if (rss_hash2bucket(m->m_pkthdr.flowid,
   1009 		    M_HASHTYPE_GET(m), &bucket_id) == 0) {
   1010 			/* XXX TODO: spit out something if bucket_id > num_queues? */
   1011 			i = bucket_id % adapter->num_queues;
   1012 		} else {
   1013 #endif
   1014 			i = m->m_pkthdr.flowid % adapter->num_queues;
   1015 #ifdef	RSS
   1016 		}
   1017 #endif
   1018 	} else {
   1019 		i = curcpu % adapter->num_queues;
   1020 	}
   1021 
   1022 	txr = &adapter->tx_rings[i];
   1023 	que = &adapter->queues[i];
   1024 
   1025 	err = drbr_enqueue(ifp, txr->br, m);
   1026 	if (err)
   1027 		return (err);
   1028 	if (IXGBE_TX_TRYLOCK(txr)) {
   1029 		ixgbe_mq_start_locked(ifp, txr);
   1030 		IXGBE_TX_UNLOCK(txr);
   1031 	} else
   1032 		softint_schedule(txr->txq_si);
   1033 
   1034 	return (0);
   1035 }
   1036 
   1037 static int
   1038 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
   1039 {
   1040 	struct adapter  *adapter = txr->adapter;
   1041 	struct mbuf     *next;
   1042 	int             enqueued = 0, err = 0;
   1043 
   1044 	if (((ifp->if_flags & IFF_RUNNING) == 0) ||
   1045 	    adapter->link_active == 0)
   1046 		return (ENETDOWN);
   1047 
   1048 	/* Process the queue */
   1049 #if __FreeBSD_version < 901504
   1050 	next = drbr_dequeue(ifp, txr->br);
   1051 	while (next != NULL) {
   1052 		if ((err = ixgbe_xmit(txr, &next)) != 0) {
   1053 			if (next != NULL)
   1054 				err = drbr_enqueue(ifp, txr->br, next);
   1055 #else
   1056 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
   1057 		if ((err = ixgbe_xmit(txr, &next)) != 0) {
   1058 			if (next == NULL) {
   1059 				drbr_advance(ifp, txr->br);
   1060 			} else {
   1061 				drbr_putback(ifp, txr->br, next);
   1062 			}
   1063 #endif
   1064 			break;
   1065 		}
   1066 #if __FreeBSD_version >= 901504
   1067 		drbr_advance(ifp, txr->br);
   1068 #endif
   1069 		enqueued++;
   1070 		/* Send a copy of the frame to the BPF listener */
   1071 		bpf_mtap(ifp, next);
   1072 		if ((ifp->if_flags & IFF_RUNNING) == 0)
   1073 			break;
   1074 #if __FreeBSD_version < 901504
   1075 		next = drbr_dequeue(ifp, txr->br);
   1076 #endif
   1077 	}
   1078 
   1079 	if (enqueued > 0) {
   1080 		/* Set watchdog on */
   1081 		txr->queue_status = IXGBE_QUEUE_WORKING;
   1082 		getmicrotime(&txr->watchdog_time);
   1083 	}
   1084 
   1085 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
   1086 		ixgbe_txeof(txr);
   1087 
   1088 	return (err);
   1089 }
   1090 
   1091 /*
   1092  * Called from a taskqueue to drain queued transmit packets.
   1093  */
   1094 static void
   1095 ixgbe_deferred_mq_start(void *arg, int pending)
   1096 {
   1097 	struct tx_ring *txr = arg;
   1098 	struct adapter *adapter = txr->adapter;
   1099 	struct ifnet *ifp = adapter->ifp;
   1100 
   1101 	IXGBE_TX_LOCK(txr);
   1102 	if (!drbr_empty(ifp, txr->br))
   1103 		ixgbe_mq_start_locked(ifp, txr);
   1104 	IXGBE_TX_UNLOCK(txr);
   1105 }
   1106 
   1107 /*
   1108 ** Flush all ring buffers
   1109 */
   1110 static void
   1111 ixgbe_qflush(struct ifnet *ifp)
   1112 {
   1113 	struct adapter	*adapter = ifp->if_softc;
   1114 	struct tx_ring	*txr = adapter->tx_rings;
   1115 	struct mbuf	*m;
   1116 
   1117 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
   1118 		IXGBE_TX_LOCK(txr);
   1119 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
   1120 			m_freem(m);
   1121 		IXGBE_TX_UNLOCK(txr);
   1122 	}
   1123 	if_qflush(ifp);
   1124 }
   1125 #endif /* IXGBE_LEGACY_TX */
   1126 
   1127 static int
   1128 ixgbe_ifflags_cb(struct ethercom *ec)
   1129 {
   1130 	struct ifnet *ifp = &ec->ec_if;
   1131 	struct adapter *adapter = ifp->if_softc;
   1132 	int change = ifp->if_flags ^ adapter->if_flags, rc = 0;
   1133 
   1134 	IXGBE_CORE_LOCK(adapter);
   1135 
   1136 	if (change != 0)
   1137 		adapter->if_flags = ifp->if_flags;
   1138 
   1139 	if ((change & ~(IFF_CANTCHANGE|IFF_DEBUG)) != 0)
   1140 		rc = ENETRESET;
   1141 	else if ((change & (IFF_PROMISC | IFF_ALLMULTI)) != 0)
   1142 		ixgbe_set_promisc(adapter);
   1143 
   1144 	/* Set up VLAN support and filter */
   1145 	ixgbe_setup_vlan_hw_support(adapter);
   1146 
   1147 	IXGBE_CORE_UNLOCK(adapter);
   1148 
   1149 	return rc;
   1150 }
   1151 
   1152 /*********************************************************************
   1153  *  Ioctl entry point
   1154  *
   1155  *  ixgbe_ioctl is called when the user wants to configure the
   1156  *  interface.
   1157  *
   1158  *  return 0 on success, positive on failure
   1159  **********************************************************************/
   1160 
   1161 static int
   1162 ixgbe_ioctl(struct ifnet * ifp, u_long command, void *data)
   1163 {
   1164 	struct adapter	*adapter = ifp->if_softc;
   1165 	struct ixgbe_hw *hw = &adapter->hw;
   1166 	struct ifcapreq *ifcr = data;
   1167 	struct ifreq	*ifr = data;
   1168 	int             error = 0;
   1169 	int l4csum_en;
   1170 	const int l4csum = IFCAP_CSUM_TCPv4_Rx|IFCAP_CSUM_UDPv4_Rx|
   1171 	     IFCAP_CSUM_TCPv6_Rx|IFCAP_CSUM_UDPv6_Rx;
   1172 
   1173 	switch (command) {
   1174 	case SIOCSIFFLAGS:
   1175 		IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)");
   1176 		break;
   1177 	case SIOCADDMULTI:
   1178 	case SIOCDELMULTI:
   1179 		IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI");
   1180 		break;
   1181 	case SIOCSIFMEDIA:
   1182 	case SIOCGIFMEDIA:
   1183 		IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)");
   1184 		break;
   1185 	case SIOCSIFCAP:
   1186 		IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)");
   1187 		break;
   1188 	case SIOCSIFMTU:
   1189 		IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)");
   1190 		break;
   1191 	default:
   1192 		IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command);
   1193 		break;
   1194 	}
   1195 
   1196 	switch (command) {
   1197 	case SIOCSIFMEDIA:
   1198 	case SIOCGIFMEDIA:
   1199 		return ifmedia_ioctl(ifp, ifr, &adapter->media, command);
   1200 	case SIOCGI2C:
   1201 	{
   1202 		struct ixgbe_i2c_req	i2c;
   1203 		IOCTL_DEBUGOUT("ioctl: SIOCGI2C (Get I2C Data)");
   1204 		error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
   1205 		if (error != 0)
   1206 			break;
   1207 		if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) {
   1208 			error = EINVAL;
   1209 			break;
   1210 		}
   1211 		if (i2c.len > sizeof(i2c.data)) {
   1212 			error = EINVAL;
   1213 			break;
   1214 		}
   1215 
   1216 		hw->phy.ops.read_i2c_byte(hw, i2c.offset,
   1217 		    i2c.dev_addr, i2c.data);
   1218 		error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
   1219 		break;
   1220 	}
   1221 	case SIOCSIFCAP:
   1222 		/* Layer-4 Rx checksum offload has to be turned on and
   1223 		 * off as a unit.
   1224 		 */
   1225 		l4csum_en = ifcr->ifcr_capenable & l4csum;
   1226 		if (l4csum_en != l4csum && l4csum_en != 0)
   1227 			return EINVAL;
   1228 		/*FALLTHROUGH*/
   1229 	case SIOCADDMULTI:
   1230 	case SIOCDELMULTI:
   1231 	case SIOCSIFFLAGS:
   1232 	case SIOCSIFMTU:
   1233 	default:
   1234 		if ((error = ether_ioctl(ifp, command, data)) != ENETRESET)
   1235 			return error;
   1236 		if ((ifp->if_flags & IFF_RUNNING) == 0)
   1237 			;
   1238 		else if (command == SIOCSIFCAP || command == SIOCSIFMTU) {
   1239 			IXGBE_CORE_LOCK(adapter);
   1240 			ixgbe_init_locked(adapter);
   1241 			IXGBE_CORE_UNLOCK(adapter);
   1242 		} else if (command == SIOCADDMULTI || command == SIOCDELMULTI) {
   1243 			/*
   1244 			 * Multicast list has changed; set the hardware filter
   1245 			 * accordingly.
   1246 			 */
   1247 			IXGBE_CORE_LOCK(adapter);
   1248 			ixgbe_disable_intr(adapter);
   1249 			ixgbe_set_multi(adapter);
   1250 			ixgbe_enable_intr(adapter);
   1251 			IXGBE_CORE_UNLOCK(adapter);
   1252 		}
   1253 		return 0;
   1254 	}
   1255 
   1256 	return error;
   1257 }
   1258 
   1259 /*********************************************************************
   1260  *  Init entry point
   1261  *
   1262  *  This routine is used in two ways. It is used by the stack as
   1263  *  init entry point in network interface structure. It is also used
   1264  *  by the driver as a hw/sw initialization routine to get to a
   1265  *  consistent state.
   1266  *
   1267  *  return 0 on success, positive on failure
   1268  **********************************************************************/
   1269 #define IXGBE_MHADD_MFS_SHIFT 16
   1270 
   1271 static void
   1272 ixgbe_init_locked(struct adapter *adapter)
   1273 {
   1274 	struct ifnet   *ifp = adapter->ifp;
   1275 	device_t 	dev = adapter->dev;
   1276 	struct ixgbe_hw *hw = &adapter->hw;
   1277 	u32		k, txdctl, mhadd, gpie;
   1278 	u32		rxdctl, rxctrl;
   1279 
   1280 	/* XXX check IFF_UP and IFF_RUNNING, power-saving state! */
   1281 
   1282 	KASSERT(mutex_owned(&adapter->core_mtx));
   1283 	INIT_DEBUGOUT("ixgbe_init_locked: begin");
   1284 	hw->adapter_stopped = FALSE;
   1285 	ixgbe_stop_adapter(hw);
   1286         callout_stop(&adapter->timer);
   1287 
   1288 	/* XXX I moved this here from the SIOCSIFMTU case in ixgbe_ioctl(). */
   1289 	adapter->max_frame_size =
   1290 		ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
   1291 
   1292         /* reprogram the RAR[0] in case user changed it. */
   1293         ixgbe_set_rar(hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
   1294 
   1295 	/* Get the latest mac address, User can use a LAA */
   1296 	memcpy(hw->mac.addr, CLLADDR(adapter->ifp->if_sadl),
   1297 	    IXGBE_ETH_LENGTH_OF_ADDRESS);
   1298 	ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
   1299 	hw->addr_ctrl.rar_used_count = 1;
   1300 
   1301 	/* Prepare transmit descriptors and buffers */
   1302 	if (ixgbe_setup_transmit_structures(adapter)) {
   1303 		device_printf(dev,"Could not setup transmit structures\n");
   1304 		ixgbe_stop(adapter);
   1305 		return;
   1306 	}
   1307 
   1308 	ixgbe_init_hw(hw);
   1309 	ixgbe_initialize_transmit_units(adapter);
   1310 
   1311 	/* Setup Multicast table */
   1312 	ixgbe_set_multi(adapter);
   1313 
   1314 	/*
   1315 	** Determine the correct mbuf pool
   1316 	** for doing jumbo frames
   1317 	*/
   1318 	if (adapter->max_frame_size <= 2048)
   1319 		adapter->rx_mbuf_sz = MCLBYTES;
   1320 	else if (adapter->max_frame_size <= 4096)
   1321 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
   1322 	else if (adapter->max_frame_size <= 9216)
   1323 		adapter->rx_mbuf_sz = MJUM9BYTES;
   1324 	else
   1325 		adapter->rx_mbuf_sz = MJUM16BYTES;
   1326 
   1327 	/* Prepare receive descriptors and buffers */
   1328 	if (ixgbe_setup_receive_structures(adapter)) {
   1329 		device_printf(dev,"Could not setup receive structures\n");
   1330 		ixgbe_stop(adapter);
   1331 		return;
   1332 	}
   1333 
   1334 	/* Configure RX settings */
   1335 	ixgbe_initialize_receive_units(adapter);
   1336 
   1337 	gpie = IXGBE_READ_REG(&adapter->hw, IXGBE_GPIE);
   1338 
   1339 	/* Enable Fan Failure Interrupt */
   1340 	gpie |= IXGBE_SDP1_GPIEN;
   1341 
   1342 	/* Add for Module detection */
   1343 	if (hw->mac.type == ixgbe_mac_82599EB)
   1344 		gpie |= IXGBE_SDP2_GPIEN;
   1345 
   1346 	/* Thermal Failure Detection */
   1347 	if (hw->mac.type == ixgbe_mac_X540)
   1348 		gpie |= IXGBE_SDP0_GPIEN;
   1349 
   1350 	if (adapter->msix > 1) {
   1351 		/* Enable Enhanced MSIX mode */
   1352 		gpie |= IXGBE_GPIE_MSIX_MODE;
   1353 		gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
   1354 		    IXGBE_GPIE_OCD;
   1355 	}
   1356 	IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
   1357 
   1358 	/* Set MTU size */
   1359 	if (ifp->if_mtu > ETHERMTU) {
   1360 		mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
   1361 		mhadd &= ~IXGBE_MHADD_MFS_MASK;
   1362 		mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
   1363 		IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
   1364 	}
   1365 
   1366 	/* Now enable all the queues */
   1367 
   1368 	for (int i = 0; i < adapter->num_queues; i++) {
   1369 		txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
   1370 		txdctl |= IXGBE_TXDCTL_ENABLE;
   1371 		/* Set WTHRESH to 8, burst writeback */
   1372 		txdctl |= (8 << 16);
   1373 		/*
   1374 		 * When the internal queue falls below PTHRESH (32),
   1375 		 * start prefetching as long as there are at least
   1376 		 * HTHRESH (1) buffers ready. The values are taken
   1377 		 * from the Intel linux driver 3.8.21.
   1378 		 * Prefetching enables tx line rate even with 1 queue.
   1379 		 */
   1380 		txdctl |= (32 << 0) | (1 << 8);
   1381 		IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
   1382 	}
   1383 
   1384 	for (int i = 0; i < adapter->num_queues; i++) {
   1385 		rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
   1386 		if (hw->mac.type == ixgbe_mac_82598EB) {
   1387 			/*
   1388 			** PTHRESH = 21
   1389 			** HTHRESH = 4
   1390 			** WTHRESH = 8
   1391 			*/
   1392 			rxdctl &= ~0x3FFFFF;
   1393 			rxdctl |= 0x080420;
   1394 		}
   1395 		rxdctl |= IXGBE_RXDCTL_ENABLE;
   1396 		IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
   1397 		/* XXX I don't trust this loop, and I don't trust the
   1398 		 * XXX memory barrier.  What is this meant to do? --dyoung
   1399 		 */
   1400 		for (k = 0; k < 10; k++) {
   1401 			if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
   1402 			    IXGBE_RXDCTL_ENABLE)
   1403 				break;
   1404 			else
   1405 				msec_delay(1);
   1406 		}
   1407 		wmb();
   1408 #ifdef DEV_NETMAP
   1409 		/*
   1410 		 * In netmap mode, we must preserve the buffers made
   1411 		 * available to userspace before the if_init()
   1412 		 * (this is true by default on the TX side, because
   1413 		 * init makes all buffers available to userspace).
   1414 		 *
   1415 		 * netmap_reset() and the device specific routines
   1416 		 * (e.g. ixgbe_setup_receive_rings()) map these
   1417 		 * buffers at the end of the NIC ring, so here we
   1418 		 * must set the RDT (tail) register to make sure
   1419 		 * they are not overwritten.
   1420 		 *
   1421 		 * In this driver the NIC ring starts at RDH = 0,
   1422 		 * RDT points to the last slot available for reception (?),
   1423 		 * so RDT = num_rx_desc - 1 means the whole ring is available.
   1424 		 */
   1425 		if (ifp->if_capenable & IFCAP_NETMAP) {
   1426 			struct netmap_adapter *na = NA(adapter->ifp);
   1427 			struct netmap_kring *kring = &na->rx_rings[i];
   1428 			int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring);
   1429 
   1430 			IXGBE_WRITE_REG(hw, IXGBE_RDT(i), t);
   1431 		} else
   1432 #endif /* DEV_NETMAP */
   1433 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i), adapter->num_rx_desc - 1);
   1434 	}
   1435 
   1436 	/* Enable Receive engine */
   1437 	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
   1438 	if (hw->mac.type == ixgbe_mac_82598EB)
   1439 		rxctrl |= IXGBE_RXCTRL_DMBYPS;
   1440 	rxctrl |= IXGBE_RXCTRL_RXEN;
   1441 	ixgbe_enable_rx_dma(hw, rxctrl);
   1442 
   1443 	callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
   1444 
   1445 	/* Set up MSI/X routing */
   1446 	if (ixgbe_enable_msix)  {
   1447 		ixgbe_configure_ivars(adapter);
   1448 		/* Set up auto-mask */
   1449 		if (hw->mac.type == ixgbe_mac_82598EB)
   1450 			IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
   1451 		else {
   1452 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
   1453 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
   1454 		}
   1455 	} else {  /* Simple settings for Legacy/MSI */
   1456                 ixgbe_set_ivar(adapter, 0, 0, 0);
   1457                 ixgbe_set_ivar(adapter, 0, 0, 1);
   1458 		IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
   1459 	}
   1460 
   1461 #ifdef IXGBE_FDIR
   1462 	/* Init Flow director */
   1463 	if (hw->mac.type != ixgbe_mac_82598EB) {
   1464 		u32 hdrm = 32 << fdir_pballoc;
   1465 
   1466 		hw->mac.ops.setup_rxpba(hw, 0, hdrm, PBA_STRATEGY_EQUAL);
   1467 		ixgbe_init_fdir_signature_82599(&adapter->hw, fdir_pballoc);
   1468 	}
   1469 #endif
   1470 
   1471 	/*
   1472 	** Check on any SFP devices that
   1473 	** need to be kick-started
   1474 	*/
   1475 	if (hw->phy.type == ixgbe_phy_none) {
   1476 		int err = hw->phy.ops.identify(hw);
   1477 		if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
   1478                 	device_printf(dev,
   1479 			    "Unsupported SFP+ module type was detected.\n");
   1480 			return;
   1481         	}
   1482 	}
   1483 
   1484 	/* Set moderation on the Link interrupt */
   1485 	IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->linkvec), IXGBE_LINK_ITR);
   1486 
   1487 	/* Config/Enable Link */
   1488 	ixgbe_config_link(adapter);
   1489 
   1490 	/* Hardware Packet Buffer & Flow Control setup */
   1491 	{
   1492 		u32 rxpb, frame, size, tmp;
   1493 
   1494 		frame = adapter->max_frame_size;
   1495 
   1496 		/* Calculate High Water */
   1497 		if (hw->mac.type == ixgbe_mac_X540)
   1498 			tmp = IXGBE_DV_X540(frame, frame);
   1499 		else
   1500 			tmp = IXGBE_DV(frame, frame);
   1501 		size = IXGBE_BT2KB(tmp);
   1502 		rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
   1503 		hw->fc.high_water[0] = rxpb - size;
   1504 
   1505 		/* Now calculate Low Water */
   1506 		if (hw->mac.type == ixgbe_mac_X540)
   1507 			tmp = IXGBE_LOW_DV_X540(frame);
   1508 		else
   1509 			tmp = IXGBE_LOW_DV(frame);
   1510 		hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
   1511 
   1512 		hw->fc.requested_mode = adapter->fc;
   1513 		hw->fc.pause_time = IXGBE_FC_PAUSE;
   1514 		hw->fc.send_xon = TRUE;
   1515 	}
   1516 	/* Initialize the FC settings */
   1517 	ixgbe_start_hw(hw);
   1518 
   1519 	/* Set up VLAN support and filter */
   1520 	ixgbe_setup_vlan_hw_support(adapter);
   1521 
   1522 	/* And now turn on interrupts */
   1523 	ixgbe_enable_intr(adapter);
   1524 
   1525 	/* Now inform the stack we're ready */
   1526 	ifp->if_flags |= IFF_RUNNING;
   1527 
   1528 	return;
   1529 }
   1530 
   1531 static int
   1532 ixgbe_init(struct ifnet *ifp)
   1533 {
   1534 	struct adapter *adapter = ifp->if_softc;
   1535 
   1536 	IXGBE_CORE_LOCK(adapter);
   1537 	ixgbe_init_locked(adapter);
   1538 	IXGBE_CORE_UNLOCK(adapter);
   1539 	return 0;	/* XXX ixgbe_init_locked cannot fail?  really? */
   1540 }
   1541 
   1542 
   1543 /*
   1544 **
   1545 ** MSIX Interrupt Handlers and Tasklets
   1546 **
   1547 */
   1548 
   1549 static inline void
   1550 ixgbe_enable_queue(struct adapter *adapter, u32 vector)
   1551 {
   1552 	struct ixgbe_hw *hw = &adapter->hw;
   1553 	u64	queue = (u64)(1ULL << vector);
   1554 	u32	mask;
   1555 
   1556 	if (hw->mac.type == ixgbe_mac_82598EB) {
   1557                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
   1558                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
   1559 	} else {
   1560                 mask = (queue & 0xFFFFFFFF);
   1561                 if (mask)
   1562                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask);
   1563                 mask = (queue >> 32);
   1564                 if (mask)
   1565                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask);
   1566 	}
   1567 }
   1568 
   1569 __unused static inline void
   1570 ixgbe_disable_queue(struct adapter *adapter, u32 vector)
   1571 {
   1572 	struct ixgbe_hw *hw = &adapter->hw;
   1573 	u64	queue = (u64)(1ULL << vector);
   1574 	u32	mask;
   1575 
   1576 	if (hw->mac.type == ixgbe_mac_82598EB) {
   1577                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
   1578                 IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask);
   1579 	} else {
   1580                 mask = (queue & 0xFFFFFFFF);
   1581                 if (mask)
   1582                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask);
   1583                 mask = (queue >> 32);
   1584                 if (mask)
   1585                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask);
   1586 	}
   1587 }
   1588 
   1589 static void
   1590 ixgbe_handle_que(void *context)
   1591 {
   1592 	struct ix_queue *que = context;
   1593 	struct adapter  *adapter = que->adapter;
   1594 	struct tx_ring  *txr = que->txr;
   1595 	struct ifnet    *ifp = adapter->ifp;
   1596 
   1597 	adapter->handleq.ev_count++;
   1598 
   1599 	if (ifp->if_flags & IFF_RUNNING) {
   1600 		ixgbe_rxeof(que);
   1601 		IXGBE_TX_LOCK(txr);
   1602 		ixgbe_txeof(txr);
   1603 #ifndef IXGBE_LEGACY_TX
   1604 		if (!drbr_empty(ifp, txr->br))
   1605 			ixgbe_mq_start_locked(ifp, txr);
   1606 #else
   1607 		if (!IFQ_IS_EMPTY(&ifp->if_snd))
   1608 			ixgbe_start_locked(txr, ifp);
   1609 #endif
   1610 		IXGBE_TX_UNLOCK(txr);
   1611 	}
   1612 
   1613 	/* Reenable this interrupt */
   1614 	if (que->res != NULL)
   1615 		ixgbe_enable_queue(adapter, que->msix);
   1616 	else
   1617 		ixgbe_enable_intr(adapter);
   1618 	return;
   1619 }
   1620 
   1621 
   1622 /*********************************************************************
   1623  *
   1624  *  Legacy Interrupt Service routine
   1625  *
   1626  **********************************************************************/
   1627 
   1628 static int
   1629 ixgbe_legacy_irq(void *arg)
   1630 {
   1631 	struct ix_queue *que = arg;
   1632 	struct adapter	*adapter = que->adapter;
   1633 	struct ixgbe_hw	*hw = &adapter->hw;
   1634 	struct ifnet    *ifp = adapter->ifp;
   1635 	struct 		tx_ring *txr = adapter->tx_rings;
   1636 	bool		more = false;
   1637 	u32       	reg_eicr;
   1638 
   1639 
   1640 	reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
   1641 
   1642 	adapter->stats.legint.ev_count++;
   1643 	++que->irqs;
   1644 	if (reg_eicr == 0) {
   1645 		adapter->stats.intzero.ev_count++;
   1646 		if ((ifp->if_flags & IFF_UP) != 0)
   1647 			ixgbe_enable_intr(adapter);
   1648 		return 0;
   1649 	}
   1650 
   1651 	if ((ifp->if_flags & IFF_RUNNING) != 0) {
   1652 		more = ixgbe_rxeof(que);
   1653 
   1654 		IXGBE_TX_LOCK(txr);
   1655 		ixgbe_txeof(txr);
   1656 #ifdef IXGBE_LEGACY_TX
   1657 		if (!IFQ_IS_EMPTY(&ifp->if_snd))
   1658 			ixgbe_start_locked(txr, ifp);
   1659 #else
   1660 		if (!drbr_empty(ifp, txr->br))
   1661 			ixgbe_mq_start_locked(ifp, txr);
   1662 #endif
   1663 		IXGBE_TX_UNLOCK(txr);
   1664 	}
   1665 
   1666 	/* Check for fan failure */
   1667 	if ((hw->phy.media_type == ixgbe_media_type_copper) &&
   1668 	    (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
   1669                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
   1670 		    "REPLACE IMMEDIATELY!!\n");
   1671 		IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1);
   1672 	}
   1673 
   1674 	/* Link status change */
   1675 	if (reg_eicr & IXGBE_EICR_LSC)
   1676 		softint_schedule(adapter->link_si);
   1677 
   1678 	if (more)
   1679 #ifndef IXGBE_LEGACY_TX
   1680 		softint_schedule(txr->txq_si);
   1681 #else
   1682 		softint_schedule(que->que_si);
   1683 #endif
   1684 	else
   1685 		ixgbe_enable_intr(adapter);
   1686 	return 1;
   1687 }
   1688 
   1689 
   1690 #if defined(NETBSD_MSI_OR_MSIX)
   1691 /*********************************************************************
   1692  *
   1693  *  MSIX Queue Interrupt Service routine
   1694  *
   1695  **********************************************************************/
   1696 void
   1697 ixgbe_msix_que(void *arg)
   1698 {
   1699 	struct ix_queue	*que = arg;
   1700 	struct adapter  *adapter = que->adapter;
   1701 	struct ifnet    *ifp = adapter->ifp;
   1702 	struct tx_ring	*txr = que->txr;
   1703 	struct rx_ring	*rxr = que->rxr;
   1704 	bool		more;
   1705 	u32		newitr = 0;
   1706 
   1707 	/* Protect against spurious interrupts */
   1708 	if ((ifp->if_flags & IFF_RUNNING) == 0)
   1709 		return;
   1710 
   1711 	ixgbe_disable_queue(adapter, que->msix);
   1712 	++que->irqs;
   1713 
   1714 	more = ixgbe_rxeof(que);
   1715 
   1716 	IXGBE_TX_LOCK(txr);
   1717 	ixgbe_txeof(txr);
   1718 #ifdef IXGBE_LEGACY_TX
   1719 	if (!IFQ_IS_EMPTY(&adapter->ifp->if_snd))
   1720 #else
   1721 	if (!drbr_empty(ifp, txr->br))
   1722 		ixgbe_mq_start_locked(ifp, txr);
   1723 #endif
   1724 	IXGBE_TX_UNLOCK(txr);
   1725 
   1726 	/* Do AIM now? */
   1727 
   1728 	if (ixgbe_enable_aim == FALSE)
   1729 		goto no_calc;
   1730 	/*
   1731 	** Do Adaptive Interrupt Moderation:
   1732         **  - Write out last calculated setting
   1733 	**  - Calculate based on average size over
   1734 	**    the last interval.
   1735 	*/
   1736         if (que->eitr_setting)
   1737                 IXGBE_WRITE_REG(&adapter->hw,
   1738                     IXGBE_EITR(que->msix), que->eitr_setting);
   1739 
   1740         que->eitr_setting = 0;
   1741 
   1742         /* Idle, do nothing */
   1743         if ((txr->bytes == 0) && (rxr->bytes == 0))
   1744                 goto no_calc;
   1745 
   1746 	if ((txr->bytes) && (txr->packets))
   1747                	newitr = txr->bytes/txr->packets;
   1748 	if ((rxr->bytes) && (rxr->packets))
   1749 		newitr = max(newitr,
   1750 		    (rxr->bytes / rxr->packets));
   1751 	newitr += 24; /* account for hardware frame, crc */
   1752 
   1753 	/* set an upper boundary */
   1754 	newitr = min(newitr, 3000);
   1755 
   1756 	/* Be nice to the mid range */
   1757 	if ((newitr > 300) && (newitr < 1200))
   1758 		newitr = (newitr / 3);
   1759 	else
   1760 		newitr = (newitr / 2);
   1761 
   1762         if (adapter->hw.mac.type == ixgbe_mac_82598EB)
   1763                 newitr |= newitr << 16;
   1764         else
   1765                 newitr |= IXGBE_EITR_CNT_WDIS;
   1766 
   1767         /* save for next interrupt */
   1768         que->eitr_setting = newitr;
   1769 
   1770         /* Reset state */
   1771         txr->bytes = 0;
   1772         txr->packets = 0;
   1773         rxr->bytes = 0;
   1774         rxr->packets = 0;
   1775 
   1776 no_calc:
   1777 	if (more)
   1778 		softint_schedule(que->que_si);
   1779 	else
   1780 		ixgbe_enable_queue(adapter, que->msix);
   1781 	return;
   1782 }
   1783 
   1784 
   1785 static void
   1786 ixgbe_msix_link(void *arg)
   1787 {
   1788 	struct adapter	*adapter = arg;
   1789 	struct ixgbe_hw *hw = &adapter->hw;
   1790 	u32		reg_eicr;
   1791 
   1792 	++adapter->link_irq.ev_count;
   1793 
   1794 	/* First get the cause */
   1795 	reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICS);
   1796 	/* Be sure the queue bits are not cleared */
   1797 	reg_eicr &= ~IXGBE_EICR_RTX_QUEUE;
   1798 	/* Clear interrupt with write */
   1799 	IXGBE_WRITE_REG(hw, IXGBE_EICR, reg_eicr);
   1800 
   1801 	/* Link status change */
   1802 	if (reg_eicr & IXGBE_EICR_LSC)
   1803 		softint_schedule(adapter->link_si);
   1804 
   1805 	if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
   1806 #ifdef IXGBE_FDIR
   1807 		if (reg_eicr & IXGBE_EICR_FLOW_DIR) {
   1808 			/* This is probably overkill :) */
   1809 			if (!atomic_cmpset_int(&adapter->fdir_reinit, 0, 1))
   1810 				return;
   1811                 	/* Disable the interrupt */
   1812 			IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FLOW_DIR);
   1813 			softint_schedule(adapter->fdir_si);
   1814 		} else
   1815 #endif
   1816 		if (reg_eicr & IXGBE_EICR_ECC) {
   1817                 	device_printf(adapter->dev, "\nCRITICAL: ECC ERROR!! "
   1818 			    "Please Reboot!!\n");
   1819 			IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
   1820 		} else
   1821 
   1822 		if (reg_eicr & IXGBE_EICR_GPI_SDP1) {
   1823                 	/* Clear the interrupt */
   1824                 	IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
   1825 			softint_schedule(adapter->msf_si);
   1826         	} else if (reg_eicr & IXGBE_EICR_GPI_SDP2) {
   1827                 	/* Clear the interrupt */
   1828                 	IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP2);
   1829 			softint_schedule(adapter->mod_si);
   1830 		}
   1831         }
   1832 
   1833 	/* Check for fan failure */
   1834 	if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
   1835 	    (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
   1836                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
   1837 		    "REPLACE IMMEDIATELY!!\n");
   1838 		IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
   1839 	}
   1840 
   1841 	/* Check for over temp condition */
   1842 	if ((hw->mac.type == ixgbe_mac_X540) &&
   1843 	    (reg_eicr & IXGBE_EICR_TS)) {
   1844 		device_printf(adapter->dev, "\nCRITICAL: OVER TEMP!! "
   1845 		    "PHY IS SHUT DOWN!!\n");
   1846 		device_printf(adapter->dev, "System shutdown required\n");
   1847 		IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_TS);
   1848 	}
   1849 
   1850 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_OTHER);
   1851 	return;
   1852 }
   1853 #endif
   1854 
   1855 /*********************************************************************
   1856  *
   1857  *  Media Ioctl callback
   1858  *
   1859  *  This routine is called whenever the user queries the status of
   1860  *  the interface using ifconfig.
   1861  *
   1862  **********************************************************************/
   1863 static void
   1864 ixgbe_media_status(struct ifnet * ifp, struct ifmediareq * ifmr)
   1865 {
   1866 	struct adapter *adapter = ifp->if_softc;
   1867 	struct ixgbe_hw *hw = &adapter->hw;
   1868 
   1869 	INIT_DEBUGOUT("ixgbe_media_status: begin");
   1870 	IXGBE_CORE_LOCK(adapter);
   1871 	ixgbe_update_link_status(adapter);
   1872 
   1873 	ifmr->ifm_status = IFM_AVALID;
   1874 	ifmr->ifm_active = IFM_ETHER;
   1875 
   1876 	if (!adapter->link_active) {
   1877 		IXGBE_CORE_UNLOCK(adapter);
   1878 		return;
   1879 	}
   1880 
   1881 	ifmr->ifm_status |= IFM_ACTIVE;
   1882 
   1883 	/*
   1884 	 * Not all NIC are 1000baseSX as an example X540T.
   1885 	 * We must set properly the media based on NIC model.
   1886 	 */
   1887 	switch (hw->device_id) {
   1888 	case IXGBE_DEV_ID_X540T:
   1889 		if (adapter->link_speed == IXGBE_LINK_SPEED_100_FULL)
   1890 			ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
   1891 		else if (adapter->link_speed == IXGBE_LINK_SPEED_1GB_FULL)
   1892 			ifmr->ifm_active |= IFM_1000_T | IFM_FDX;
   1893 		else if (adapter->link_speed == IXGBE_LINK_SPEED_10GB_FULL)
   1894 			ifmr->ifm_active |= adapter->optics | IFM_FDX;
   1895 		break;
   1896 	default:
   1897 		if (adapter->link_speed == IXGBE_LINK_SPEED_100_FULL)
   1898 			ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
   1899 		else if (adapter->link_speed == IXGBE_LINK_SPEED_1GB_FULL)
   1900 			ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
   1901 		else if (adapter->link_speed == IXGBE_LINK_SPEED_10GB_FULL)
   1902 			ifmr->ifm_active |= adapter->optics | IFM_FDX;
   1903 		break;
   1904 	}
   1905 
   1906 	IXGBE_CORE_UNLOCK(adapter);
   1907 
   1908 	return;
   1909 }
   1910 
   1911 /*********************************************************************
   1912  *
   1913  *  Media Ioctl callback
   1914  *
   1915  *  This routine is called when the user changes speed/duplex using
   1916  *  media/mediopt option with ifconfig.
   1917  *
   1918  **********************************************************************/
   1919 static int
   1920 ixgbe_media_change(struct ifnet * ifp)
   1921 {
   1922 	struct adapter *adapter = ifp->if_softc;
   1923 	struct ifmedia *ifm = &adapter->media;
   1924 
   1925 	INIT_DEBUGOUT("ixgbe_media_change: begin");
   1926 
   1927 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
   1928 		return (EINVAL);
   1929 
   1930         switch (IFM_SUBTYPE(ifm->ifm_media)) {
   1931 	case IFM_10G_T:
   1932 	case IFM_AUTO:
   1933 		adapter->hw.phy.autoneg_advertised =
   1934 		    IXGBE_LINK_SPEED_100_FULL |
   1935 		    IXGBE_LINK_SPEED_1GB_FULL |
   1936 		    IXGBE_LINK_SPEED_10GB_FULL;
   1937                 break;
   1938         default:
   1939                 device_printf(adapter->dev, "Only auto media type\n");
   1940 		return (EINVAL);
   1941         }
   1942 
   1943 	return (0);
   1944 }
   1945 
   1946 /*********************************************************************
   1947  *
   1948  *  This routine maps the mbufs to tx descriptors, allowing the
   1949  *  TX engine to transmit the packets.
   1950  *  	- return 0 on success, positive on failure
   1951  *
   1952  **********************************************************************/
   1953 
   1954 static int
   1955 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
   1956 {
   1957 	struct m_tag *mtag;
   1958 	struct adapter  *adapter = txr->adapter;
   1959 	struct ethercom *ec = &adapter->osdep.ec;
   1960 	u32		olinfo_status = 0, cmd_type_len;
   1961 	int             i, j, error;
   1962 	int		first;
   1963 	bus_dmamap_t	map;
   1964 	struct ixgbe_tx_buf *txbuf;
   1965 	union ixgbe_adv_tx_desc *txd = NULL;
   1966 
   1967 	/* Basic descriptor defines */
   1968         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
   1969 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
   1970 
   1971 	if ((mtag = VLAN_OUTPUT_TAG(ec, m_head)) != NULL)
   1972         	cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
   1973 
   1974         /*
   1975          * Important to capture the first descriptor
   1976          * used because it will contain the index of
   1977          * the one we tell the hardware to report back
   1978          */
   1979         first = txr->next_avail_desc;
   1980 	txbuf = &txr->tx_buffers[first];
   1981 	map = txbuf->map;
   1982 
   1983 	/*
   1984 	 * Map the packet for DMA.
   1985 	 */
   1986 	error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map,
   1987 	    m_head, BUS_DMA_NOWAIT);
   1988 
   1989 	if (__predict_false(error)) {
   1990 
   1991 		switch (error) {
   1992 		case EAGAIN:
   1993 			adapter->eagain_tx_dma_setup.ev_count++;
   1994 			return EAGAIN;
   1995 		case ENOMEM:
   1996 			adapter->enomem_tx_dma_setup.ev_count++;
   1997 			return EAGAIN;
   1998 		case EFBIG:
   1999 			/*
   2000 			 * XXX Try it again?
   2001 			 * do m_defrag() and retry bus_dmamap_load_mbuf().
   2002 			 */
   2003 			adapter->efbig_tx_dma_setup.ev_count++;
   2004 			return error;
   2005 		case EINVAL:
   2006 			adapter->einval_tx_dma_setup.ev_count++;
   2007 			return error;
   2008 		default:
   2009 			adapter->other_tx_dma_setup.ev_count++;
   2010 			return error;
   2011 		}
   2012 	}
   2013 
   2014 	/* Make certain there are enough descriptors */
   2015 	if (map->dm_nsegs > txr->tx_avail - 2) {
   2016 		txr->no_desc_avail.ev_count++;
   2017 		ixgbe_dmamap_unload(txr->txtag, txbuf->map);
   2018 		return EAGAIN;
   2019 	}
   2020 
   2021 	/*
   2022 	** Set up the appropriate offload context
   2023 	** this will consume the first descriptor
   2024 	*/
   2025 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
   2026 	if (__predict_false(error)) {
   2027 		return (error);
   2028 	}
   2029 
   2030 #ifdef IXGBE_FDIR
   2031 	/* Do the flow director magic */
   2032 	if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
   2033 		++txr->atr_count;
   2034 		if (txr->atr_count >= atr_sample_rate) {
   2035 			ixgbe_atr(txr, m_head);
   2036 			txr->atr_count = 0;
   2037 		}
   2038 	}
   2039 #endif
   2040 
   2041 	i = txr->next_avail_desc;
   2042 	for (j = 0; j < map->dm_nsegs; j++) {
   2043 		bus_size_t seglen;
   2044 		bus_addr_t segaddr;
   2045 
   2046 		txbuf = &txr->tx_buffers[i];
   2047 		txd = &txr->tx_base[i];
   2048 		seglen = map->dm_segs[j].ds_len;
   2049 		segaddr = htole64(map->dm_segs[j].ds_addr);
   2050 
   2051 		txd->read.buffer_addr = segaddr;
   2052 		txd->read.cmd_type_len = htole32(txr->txd_cmd |
   2053 		    cmd_type_len |seglen);
   2054 		txd->read.olinfo_status = htole32(olinfo_status);
   2055 
   2056 		if (++i == txr->num_desc)
   2057 			i = 0;
   2058 	}
   2059 
   2060 	txd->read.cmd_type_len |=
   2061 	    htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
   2062 	txr->tx_avail -= map->dm_nsegs;
   2063 	txr->next_avail_desc = i;
   2064 
   2065 	txbuf->m_head = m_head;
   2066 	/*
   2067 	** Here we swap the map so the last descriptor,
   2068 	** which gets the completion interrupt has the
   2069 	** real map, and the first descriptor gets the
   2070 	** unused map from this descriptor.
   2071 	*/
   2072 	txr->tx_buffers[first].map = txbuf->map;
   2073 	txbuf->map = map;
   2074 	bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
   2075 	    BUS_DMASYNC_PREWRITE);
   2076 
   2077         /* Set the EOP descriptor that will be marked done */
   2078         txbuf = &txr->tx_buffers[first];
   2079 	txbuf->eop = txd;
   2080 
   2081         ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   2082 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   2083 	/*
   2084 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
   2085 	 * hardware that this frame is available to transmit.
   2086 	 */
   2087 	++txr->total_packets.ev_count;
   2088 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), i);
   2089 
   2090 	return 0;
   2091 }
   2092 
   2093 static void
   2094 ixgbe_set_promisc(struct adapter *adapter)
   2095 {
   2096 	struct ether_multi *enm;
   2097 	struct ether_multistep step;
   2098 	u_int32_t       reg_rctl;
   2099 	struct ethercom *ec = &adapter->osdep.ec;
   2100 	struct ifnet   *ifp = adapter->ifp;
   2101 	int		mcnt = 0;
   2102 
   2103 	reg_rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
   2104 	reg_rctl &= (~IXGBE_FCTRL_UPE);
   2105 	if (ifp->if_flags & IFF_ALLMULTI)
   2106 		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
   2107 	else {
   2108 		ETHER_FIRST_MULTI(step, ec, enm);
   2109 		while (enm != NULL) {
   2110 			if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
   2111 				break;
   2112 			mcnt++;
   2113 			ETHER_NEXT_MULTI(step, enm);
   2114 		}
   2115 	}
   2116 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
   2117 		reg_rctl &= (~IXGBE_FCTRL_MPE);
   2118 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
   2119 
   2120 	if (ifp->if_flags & IFF_PROMISC) {
   2121 		reg_rctl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
   2122 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
   2123 	} else if (ifp->if_flags & IFF_ALLMULTI) {
   2124 		reg_rctl |= IXGBE_FCTRL_MPE;
   2125 		reg_rctl &= ~IXGBE_FCTRL_UPE;
   2126 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
   2127 	}
   2128 	return;
   2129 }
   2130 
   2131 
   2132 /*********************************************************************
   2133  *  Multicast Update
   2134  *
   2135  *  This routine is called whenever multicast address list is updated.
   2136  *
   2137  **********************************************************************/
   2138 #define IXGBE_RAR_ENTRIES 16
   2139 
   2140 static void
   2141 ixgbe_set_multi(struct adapter *adapter)
   2142 {
   2143 	struct ether_multi *enm;
   2144 	struct ether_multistep step;
   2145 	u32	fctrl;
   2146 	u8	*mta;
   2147 	u8	*update_ptr;
   2148 	int	mcnt = 0;
   2149 	struct ethercom *ec = &adapter->osdep.ec;
   2150 	struct ifnet   *ifp = adapter->ifp;
   2151 
   2152 	IOCTL_DEBUGOUT("ixgbe_set_multi: begin");
   2153 
   2154 	mta = adapter->mta;
   2155 	bzero(mta, sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
   2156 	    MAX_NUM_MULTICAST_ADDRESSES);
   2157 
   2158 	ifp->if_flags &= ~IFF_ALLMULTI;
   2159 	ETHER_FIRST_MULTI(step, ec, enm);
   2160 	while (enm != NULL) {
   2161 		if ((mcnt == MAX_NUM_MULTICAST_ADDRESSES) ||
   2162 		    (memcmp(enm->enm_addrlo, enm->enm_addrhi,
   2163 			ETHER_ADDR_LEN) != 0)) {
   2164 			ifp->if_flags |= IFF_ALLMULTI;
   2165 			break;
   2166 		}
   2167 		bcopy(enm->enm_addrlo,
   2168 		    &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
   2169 		    IXGBE_ETH_LENGTH_OF_ADDRESS);
   2170 		mcnt++;
   2171 		ETHER_NEXT_MULTI(step, enm);
   2172 	}
   2173 
   2174 	fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
   2175 	fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
   2176 	if (ifp->if_flags & IFF_PROMISC)
   2177 		fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
   2178 	else if (ifp->if_flags & IFF_ALLMULTI) {
   2179 		fctrl |= IXGBE_FCTRL_MPE;
   2180 	}
   2181 
   2182 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl);
   2183 
   2184 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) {
   2185 		update_ptr = mta;
   2186 		ixgbe_update_mc_addr_list(&adapter->hw,
   2187 		    update_ptr, mcnt, ixgbe_mc_array_itr, TRUE);
   2188 	}
   2189 
   2190 	return;
   2191 }
   2192 
   2193 /*
   2194  * This is an iterator function now needed by the multicast
   2195  * shared code. It simply feeds the shared code routine the
   2196  * addresses in the array of ixgbe_set_multi() one by one.
   2197  */
   2198 static u8 *
   2199 ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq)
   2200 {
   2201 	u8 *addr = *update_ptr;
   2202 	u8 *newptr;
   2203 	*vmdq = 0;
   2204 
   2205 	newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
   2206 	*update_ptr = newptr;
   2207 	return addr;
   2208 }
   2209 
   2210 
   2211 /*********************************************************************
   2212  *  Timer routine
   2213  *
   2214  *  This routine checks for link status,updates statistics,
   2215  *  and runs the watchdog check.
   2216  *
   2217  **********************************************************************/
   2218 
   2219 static void
   2220 ixgbe_local_timer1(void *arg)
   2221 {
   2222 	struct adapter	*adapter = arg;
   2223 	device_t	dev = adapter->dev;
   2224 	struct ix_queue *que = adapter->queues;
   2225 	struct tx_ring	*txr = adapter->tx_rings;
   2226 	int		hung = 0, paused = 0;
   2227 
   2228 	KASSERT(mutex_owned(&adapter->core_mtx));
   2229 
   2230 	/* Check for pluggable optics */
   2231 	if (adapter->sfp_probe)
   2232 		if (!ixgbe_sfp_probe(adapter))
   2233 			goto out; /* Nothing to do */
   2234 
   2235 	ixgbe_update_link_status(adapter);
   2236 	ixgbe_update_stats_counters(adapter);
   2237 
   2238 	/*
   2239 	 * If the interface has been paused
   2240 	 * then don't do the watchdog check
   2241 	 */
   2242 	if (IXGBE_READ_REG(&adapter->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF)
   2243 		paused = 1;
   2244 
   2245 	/*
   2246 	** Check the TX queues status
   2247 	**      - watchdog only if all queues show hung
   2248 	*/
   2249 	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
   2250 		if ((txr->queue_status == IXGBE_QUEUE_HUNG) &&
   2251 		    (paused == 0))
   2252 			++hung;
   2253 		else if (txr->queue_status == IXGBE_QUEUE_WORKING)
   2254 #ifndef IXGBE_LEGACY_TX
   2255 			softint_schedule(txr->txq_si);
   2256 #else
   2257 			softint_schedule(que->que_si);
   2258 #endif
   2259 	}
   2260 	/* Only truely watchdog if all queues show hung */
   2261 	if (hung == adapter->num_queues)
   2262 		goto watchdog;
   2263 
   2264 out:
   2265 	callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
   2266 	return;
   2267 
   2268 watchdog:
   2269 	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
   2270 	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
   2271 	    IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(txr->me)),
   2272 	    IXGBE_READ_REG(&adapter->hw, IXGBE_TDT(txr->me)));
   2273 	device_printf(dev,"TX(%d) desc avail = %d,"
   2274 	    "Next TX to Clean = %d\n",
   2275 	    txr->me, txr->tx_avail, txr->next_to_clean);
   2276 	adapter->ifp->if_flags &= ~IFF_RUNNING;
   2277 	adapter->watchdog_events.ev_count++;
   2278 	ixgbe_init_locked(adapter);
   2279 }
   2280 
   2281 static void
   2282 ixgbe_local_timer(void *arg)
   2283 {
   2284 	struct adapter *adapter = arg;
   2285 
   2286 	IXGBE_CORE_LOCK(adapter);
   2287 	ixgbe_local_timer1(adapter);
   2288 	IXGBE_CORE_UNLOCK(adapter);
   2289 }
   2290 
   2291 /*
   2292 ** Note: this routine updates the OS on the link state
   2293 **	the real check of the hardware only happens with
   2294 **	a link interrupt.
   2295 */
   2296 static void
   2297 ixgbe_update_link_status(struct adapter *adapter)
   2298 {
   2299 	struct ifnet	*ifp = adapter->ifp;
   2300 	device_t dev = adapter->dev;
   2301 
   2302 
   2303 	if (adapter->link_up){
   2304 		if (adapter->link_active == FALSE) {
   2305 			if (bootverbose)
   2306 				device_printf(dev,"Link is up %d Gbps %s \n",
   2307 				    ((adapter->link_speed == 128)? 10:1),
   2308 				    "Full Duplex");
   2309 			adapter->link_active = TRUE;
   2310 			/* Update any Flow Control changes */
   2311 			ixgbe_fc_enable(&adapter->hw);
   2312 			if_link_state_change(ifp, LINK_STATE_UP);
   2313 		}
   2314 	} else { /* Link down */
   2315 		if (adapter->link_active == TRUE) {
   2316 			if (bootverbose)
   2317 				device_printf(dev,"Link is Down\n");
   2318 			if_link_state_change(ifp, LINK_STATE_DOWN);
   2319 			adapter->link_active = FALSE;
   2320 		}
   2321 	}
   2322 
   2323 	return;
   2324 }
   2325 
   2326 
   2327 static void
   2328 ixgbe_ifstop(struct ifnet *ifp, int disable)
   2329 {
   2330 	struct adapter *adapter = ifp->if_softc;
   2331 
   2332 	IXGBE_CORE_LOCK(adapter);
   2333 	ixgbe_stop(adapter);
   2334 	IXGBE_CORE_UNLOCK(adapter);
   2335 }
   2336 
   2337 /*********************************************************************
   2338  *
   2339  *  This routine disables all traffic on the adapter by issuing a
   2340  *  global reset on the MAC and deallocates TX/RX buffers.
   2341  *
   2342  **********************************************************************/
   2343 
   2344 static void
   2345 ixgbe_stop(void *arg)
   2346 {
   2347 	struct ifnet   *ifp;
   2348 	struct adapter *adapter = arg;
   2349 	struct ixgbe_hw *hw = &adapter->hw;
   2350 	ifp = adapter->ifp;
   2351 
   2352 	KASSERT(mutex_owned(&adapter->core_mtx));
   2353 
   2354 	INIT_DEBUGOUT("ixgbe_stop: begin\n");
   2355 	ixgbe_disable_intr(adapter);
   2356 	callout_stop(&adapter->timer);
   2357 
   2358 	/* Let the stack know...*/
   2359 	ifp->if_flags &= ~IFF_RUNNING;
   2360 
   2361 	ixgbe_reset_hw(hw);
   2362 	hw->adapter_stopped = FALSE;
   2363 	ixgbe_stop_adapter(hw);
   2364 	if (hw->mac.type == ixgbe_mac_82599EB)
   2365 		ixgbe_stop_mac_link_on_d3_82599(hw);
   2366 	/* Turn off the laser - noop with no optics */
   2367 	ixgbe_disable_tx_laser(hw);
   2368 
   2369 	/* Update the stack */
   2370 	adapter->link_up = FALSE;
   2371 	ixgbe_update_link_status(adapter);
   2372 
   2373 	/* reprogram the RAR[0] in case user changed it. */
   2374 	ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
   2375 
   2376 	return;
   2377 }
   2378 
   2379 
   2380 /*********************************************************************
   2381  *
   2382  *  Determine hardware revision.
   2383  *
   2384  **********************************************************************/
   2385 static void
   2386 ixgbe_identify_hardware(struct adapter *adapter)
   2387 {
   2388 	pcitag_t tag;
   2389 	pci_chipset_tag_t pc;
   2390 	pcireg_t subid, id;
   2391 	struct ixgbe_hw *hw = &adapter->hw;
   2392 
   2393 	pc = adapter->osdep.pc;
   2394 	tag = adapter->osdep.tag;
   2395 
   2396 	id = pci_conf_read(pc, tag, PCI_ID_REG);
   2397 	subid = pci_conf_read(pc, tag, PCI_SUBSYS_ID_REG);
   2398 
   2399 	/* Save off the information about this board */
   2400 	hw->vendor_id = PCI_VENDOR(id);
   2401 	hw->device_id = PCI_PRODUCT(id);
   2402 	hw->revision_id =
   2403 	    PCI_REVISION(pci_conf_read(pc, tag, PCI_CLASS_REG));
   2404 	hw->subsystem_vendor_id = PCI_SUBSYS_VENDOR(subid);
   2405 	hw->subsystem_device_id = PCI_SUBSYS_ID(subid);
   2406 
   2407 	/* We need this here to set the num_segs below */
   2408 	ixgbe_set_mac_type(hw);
   2409 
   2410 	/* Pick up the 82599 and VF settings */
   2411 	if (hw->mac.type != ixgbe_mac_82598EB) {
   2412 		hw->phy.smart_speed = ixgbe_smart_speed;
   2413 		adapter->num_segs = IXGBE_82599_SCATTER;
   2414 	} else
   2415 		adapter->num_segs = IXGBE_82598_SCATTER;
   2416 
   2417 	return;
   2418 }
   2419 
   2420 /*********************************************************************
   2421  *
   2422  *  Determine optic type
   2423  *
   2424  **********************************************************************/
   2425 static void
   2426 ixgbe_setup_optics(struct adapter *adapter)
   2427 {
   2428 	struct ixgbe_hw *hw = &adapter->hw;
   2429 	int		layer;
   2430 
   2431 	layer = ixgbe_get_supported_physical_layer(hw);
   2432 
   2433 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
   2434 		adapter->optics = IFM_10G_T;
   2435 		return;
   2436 	}
   2437 
   2438 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
   2439 		adapter->optics = IFM_1000_T;
   2440 		return;
   2441 	}
   2442 
   2443 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) {
   2444 		adapter->optics = IFM_1000_SX;
   2445 		return;
   2446 	}
   2447 
   2448 	if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR |
   2449 	    IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) {
   2450 		adapter->optics = IFM_10G_LR;
   2451 		return;
   2452 	}
   2453 
   2454 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
   2455 		adapter->optics = IFM_10G_SR;
   2456 		return;
   2457 	}
   2458 
   2459 	if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) {
   2460 		adapter->optics = IFM_10G_TWINAX;
   2461 		return;
   2462 	}
   2463 
   2464 	if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 |
   2465 	    IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) {
   2466 		adapter->optics = IFM_10G_CX4;
   2467 		return;
   2468 	}
   2469 
   2470 	/* If we get here just set the default */
   2471 	adapter->optics = IFM_ETHER | IFM_AUTO;
   2472 	return;
   2473 }
   2474 
   2475 /*********************************************************************
   2476  *
   2477  *  Setup the Legacy or MSI Interrupt handler
   2478  *
   2479  **********************************************************************/
   2480 static int
   2481 ixgbe_allocate_legacy(struct adapter *adapter, const struct pci_attach_args *pa)
   2482 {
   2483 	device_t	dev = adapter->dev;
   2484 	struct		ix_queue *que = adapter->queues;
   2485 #ifndef IXGBE_LEGACY_TX
   2486 	struct tx_ring		*txr = adapter->tx_rings;
   2487 #endif
   2488 	char intrbuf[PCI_INTRSTR_LEN];
   2489 #if 0
   2490 	int		rid = 0;
   2491 
   2492 	/* MSI RID at 1 */
   2493 	if (adapter->msix == 1)
   2494 		rid = 1;
   2495 #endif
   2496 
   2497 	/* We allocate a single interrupt resource */
   2498  	if (pci_intr_map(pa, &adapter->osdep.ih) != 0) {
   2499 		aprint_error_dev(dev, "unable to map interrupt\n");
   2500 		return ENXIO;
   2501 	} else {
   2502 		aprint_normal_dev(dev, "interrupting at %s\n",
   2503 		    pci_intr_string(adapter->osdep.pc, adapter->osdep.ih,
   2504 			intrbuf, sizeof(intrbuf)));
   2505 	}
   2506 
   2507 	/*
   2508 	 * Try allocating a fast interrupt and the associated deferred
   2509 	 * processing contexts.
   2510 	 */
   2511 #ifndef IXGBE_LEGACY_TX
   2512 	txr->txq_si = softint_establish(SOFTINT_NET, ixgbe_deferred_mq_start,
   2513 	    txr);
   2514 #endif
   2515 	que->que_si = softint_establish(SOFTINT_NET, ixgbe_handle_que, que);
   2516 
   2517 	/* Tasklets for Link, SFP and Multispeed Fiber */
   2518 	adapter->link_si =
   2519 	    softint_establish(SOFTINT_NET, ixgbe_handle_link, adapter);
   2520 	adapter->mod_si =
   2521 	    softint_establish(SOFTINT_NET, ixgbe_handle_mod, adapter);
   2522 	adapter->msf_si =
   2523 	    softint_establish(SOFTINT_NET, ixgbe_handle_msf, adapter);
   2524 
   2525 #ifdef IXGBE_FDIR
   2526 	adapter->fdir_si =
   2527 	    softint_establish(SOFTINT_NET, ixgbe_reinit_fdir, adapter);
   2528 #endif
   2529 	if (que->que_si == NULL ||
   2530 	    adapter->link_si == NULL ||
   2531 	    adapter->mod_si == NULL ||
   2532 #ifdef IXGBE_FDIR
   2533 	    adapter->fdir_si == NULL ||
   2534 #endif
   2535 	    adapter->msf_si == NULL) {
   2536 		aprint_error_dev(dev,
   2537 		    "could not establish software interrupts\n");
   2538 		return ENXIO;
   2539 	}
   2540 
   2541 	adapter->osdep.intr = pci_intr_establish(adapter->osdep.pc,
   2542 	    adapter->osdep.ih, IPL_NET, ixgbe_legacy_irq, que);
   2543 	if (adapter->osdep.intr == NULL) {
   2544 		aprint_error_dev(dev, "failed to register interrupt handler\n");
   2545 		softint_disestablish(que->que_si);
   2546 		softint_disestablish(adapter->link_si);
   2547 		softint_disestablish(adapter->mod_si);
   2548 		softint_disestablish(adapter->msf_si);
   2549 #ifdef IXGBE_FDIR
   2550 		softint_disestablish(adapter->fdir_si);
   2551 #endif
   2552 		return ENXIO;
   2553 	}
   2554 	/* For simplicity in the handlers */
   2555 	adapter->que_mask = IXGBE_EIMS_ENABLE_MASK;
   2556 
   2557 	return (0);
   2558 }
   2559 
   2560 
   2561 /*********************************************************************
   2562  *
   2563  *  Setup MSIX Interrupt resources and handlers
   2564  *
   2565  **********************************************************************/
   2566 static int
   2567 ixgbe_allocate_msix(struct adapter *adapter, const struct pci_attach_args *pa)
   2568 {
   2569 #if !defined(NETBSD_MSI_OR_MSIX)
   2570 	return 0;
   2571 #else
   2572 	device_t        dev = adapter->dev;
   2573 	struct 		ix_queue *que = adapter->queues;
   2574 	struct  	tx_ring *txr = adapter->tx_rings;
   2575 	int 		error, rid, vector = 0;
   2576 	int		cpu_id = 0;
   2577 #ifdef	RSS
   2578 	cpuset_t cpu_mask;
   2579 #endif
   2580 
   2581 #ifdef	RSS
   2582 	/*
   2583 	 * If we're doing RSS, the number of queues needs to
   2584 	 * match the number of RSS buckets that are configured.
   2585 	 *
   2586 	 * + If there's more queues than RSS buckets, we'll end
   2587 	 *   up with queues that get no traffic.
   2588 	 *
   2589 	 * + If there's more RSS buckets than queues, we'll end
   2590 	 *   up having multiple RSS buckets map to the same queue,
   2591 	 *   so there'll be some contention.
   2592 	 */
   2593 	if (adapter->num_queues != rss_getnumbuckets()) {
   2594 		device_printf(dev,
   2595 		    "%s: number of queues (%d) != number of RSS buckets (%d)"
   2596 		    "; performance will be impacted.\n",
   2597 		    __func__,
   2598 		    adapter->num_queues,
   2599 		    rss_getnumbuckets());
   2600 	}
   2601 #endif
   2602 
   2603 	for (int i = 0; i < adapter->num_queues; i++, vector++, que++, txr++) {
   2604 		rid = vector + 1;
   2605 		que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
   2606 		    RF_SHAREABLE | RF_ACTIVE);
   2607 		if (que->res == NULL) {
   2608 			aprint_error_dev(dev,"Unable to allocate"
   2609 		    	    " bus resource: que interrupt [%d]\n", vector);
   2610 			return (ENXIO);
   2611 		}
   2612 		/* Set the handler function */
   2613 		error = bus_setup_intr(dev, que->res,
   2614 		    INTR_TYPE_NET | INTR_MPSAFE, NULL,
   2615 		    ixgbe_msix_que, que, &que->tag);
   2616 		if (error) {
   2617 			que->res = NULL;
   2618 			aprint_error_dev(dev,
   2619 			    "Failed to register QUE handler\n");
   2620 			return error;
   2621 		}
   2622 #if __FreeBSD_version >= 800504
   2623 		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
   2624 #endif
   2625 		que->msix = vector;
   2626         	adapter->que_mask |= (u64)(1 << que->msix);
   2627 #ifdef	RSS
   2628 		/*
   2629 		 * The queue ID is used as the RSS layer bucket ID.
   2630 		 * We look up the queue ID -> RSS CPU ID and select
   2631 		 * that.
   2632 		 */
   2633 		cpu_id = rss_getcpu(i % rss_getnumbuckets());
   2634 #else
   2635 		/*
   2636 		 * Bind the msix vector, and thus the
   2637 		 * rings to the corresponding cpu.
   2638 		 *
   2639 		 * This just happens to match the default RSS round-robin
   2640 		 * bucket -> queue -> CPU allocation.
   2641 		 */
   2642 		if (adapter->num_queues > 1)
   2643 			cpu_id = i;
   2644 #endif
   2645 		if (adapter->num_queues > 1)
   2646 			bus_bind_intr(dev, que->res, cpu_id);
   2647 
   2648 #ifdef	RSS
   2649 		device_printf(dev,
   2650 		    "Bound RSS bucket %d to CPU %d\n",
   2651 		    i, cpu_id);
   2652 #else
   2653 		device_printf(dev,
   2654 		    "Bound queue %d to cpu %d\n",
   2655 		    i, cpu_id);
   2656 #endif
   2657 
   2658 #ifndef IXGBE_LEGACY_TX
   2659 		txr->txq_si = softint_establish(SOFTINT_NET,
   2660 		    ixgbe_deferred_mq_start, txr);
   2661 #endif
   2662 		que->que_si = softint_establish(SOFTINT_NET, ixgbe_handle_que,
   2663 		    que);
   2664 		if (que->que_si == NULL) {
   2665 			aprint_error_dev(dev,
   2666 			    "could not establish software interrupt\n");
   2667 		}
   2668 	}
   2669 
   2670 	/* and Link */
   2671 	rid = vector + 1;
   2672 	adapter->res = bus_alloc_resource_any(dev,
   2673     	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
   2674 	if (!adapter->res) {
   2675 		aprint_error_dev(dev,"Unable to allocate bus resource: "
   2676 		    "Link interrupt [%d]\n", rid);
   2677 		return (ENXIO);
   2678 	}
   2679 	/* Set the link handler function */
   2680 	error = bus_setup_intr(dev, adapter->res,
   2681 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
   2682 	    ixgbe_msix_link, adapter, &adapter->tag);
   2683 	if (error) {
   2684 		adapter->res = NULL;
   2685 		aprint_error_dev(dev, "Failed to register LINK handler\n");
   2686 		return (error);
   2687 	}
   2688 #if __FreeBSD_version >= 800504
   2689 	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
   2690 #endif
   2691 	adapter->linkvec = vector;
   2692 	/* Tasklets for Link, SFP and Multispeed Fiber */
   2693 	adapter->link_si =
   2694 	    softint_establish(SOFTINT_NET, ixgbe_handle_link, adapter);
   2695 	adapter->mod_si =
   2696 	    softint_establish(SOFTINT_NET, ixgbe_handle_mod, adapter);
   2697 	adapter->msf_si =
   2698 	    softint_establish(SOFTINT_NET, ixgbe_handle_msf, adapter);
   2699 #ifdef IXGBE_FDIR
   2700 	adapter->fdir_si =
   2701 	    softint_establish(SOFTINT_NET, ixgbe_reinit_fdir, adapter);
   2702 #endif
   2703 
   2704 	return (0);
   2705 #endif
   2706 }
   2707 
   2708 /*
   2709  * Setup Either MSI/X or MSI
   2710  */
   2711 static int
   2712 ixgbe_setup_msix(struct adapter *adapter)
   2713 {
   2714 #if !defined(NETBSD_MSI_OR_MSIX)
   2715 	return 0;
   2716 #else
   2717 	device_t dev = adapter->dev;
   2718 	int rid, want, queues, msgs;
   2719 
   2720 	/* Override by tuneable */
   2721 	if (ixgbe_enable_msix == 0)
   2722 		goto msi;
   2723 
   2724 	/* First try MSI/X */
   2725 	msgs = pci_msix_count(dev);
   2726 	if (msgs == 0)
   2727 		goto msi;
   2728 	rid = PCI_BAR(MSIX_82598_BAR);
   2729 	adapter->msix_mem = bus_alloc_resource_any(dev,
   2730 	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
   2731        	if (adapter->msix_mem == NULL) {
   2732 		rid += 4;	/* 82599 maps in higher BAR */
   2733 		adapter->msix_mem = bus_alloc_resource_any(dev,
   2734 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
   2735 	}
   2736        	if (adapter->msix_mem == NULL) {
   2737 		/* May not be enabled */
   2738 		device_printf(adapter->dev,
   2739 		    "Unable to map MSIX table \n");
   2740 		goto msi;
   2741 	}
   2742 
   2743 	/* Figure out a reasonable auto config value */
   2744 	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
   2745 
   2746 	/* Override based on tuneable */
   2747 	if (ixgbe_num_queues != 0)
   2748 		queues = ixgbe_num_queues;
   2749 
   2750 #ifdef	RSS
   2751 	/* If we're doing RSS, clamp at the number of RSS buckets */
   2752 	if (queues > rss_getnumbuckets())
   2753 		queues = rss_getnumbuckets();
   2754 #endif
   2755 
   2756 	/* reflect correct sysctl value */
   2757 	ixgbe_num_queues = queues;
   2758 
   2759 	/*
   2760 	** Want one vector (RX/TX pair) per queue
   2761 	** plus an additional for Link.
   2762 	*/
   2763 	want = queues + 1;
   2764 	if (msgs >= want)
   2765 		msgs = want;
   2766 	else {
   2767                	device_printf(adapter->dev,
   2768 		    "MSIX Configuration Problem, "
   2769 		    "%d vectors but %d queues wanted!\n",
   2770 		    msgs, want);
   2771 		goto msi;
   2772 	}
   2773 	if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) {
   2774                	device_printf(adapter->dev,
   2775 		    "Using MSIX interrupts with %d vectors\n", msgs);
   2776 		adapter->num_queues = queues;
   2777 		return (msgs);
   2778 	}
   2779 	/*
   2780 	** If MSIX alloc failed or provided us with
   2781 	** less than needed, free and fall through to MSI
   2782 	*/
   2783 	pci_release_msi(dev);
   2784 
   2785 msi:
   2786        	msgs = pci_msi_count(dev);
   2787        	if (adapter->msix_mem != NULL) {
   2788 		bus_release_resource(dev, SYS_RES_MEMORY,
   2789 		    rid, adapter->msix_mem);
   2790 		adapter->msix_mem = NULL;
   2791 	}
   2792        	msgs = 1;
   2793        	if (pci_alloc_msi(dev, &msgs) == 0) {
   2794                	device_printf(adapter->dev,"Using an MSI interrupt\n");
   2795 		return (msgs);
   2796 	}
   2797 	device_printf(adapter->dev,"Using a Legacy interrupt\n");
   2798 	return (0);
   2799 #endif
   2800 }
   2801 
   2802 
   2803 static int
   2804 ixgbe_allocate_pci_resources(struct adapter *adapter, const struct pci_attach_args *pa)
   2805 {
   2806 	pcireg_t	memtype;
   2807 	device_t        dev = adapter->dev;
   2808 	bus_addr_t addr;
   2809 	int flags;
   2810 
   2811 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, PCI_BAR(0));
   2812 	switch (memtype) {
   2813 	case PCI_MAPREG_TYPE_MEM | PCI_MAPREG_MEM_TYPE_32BIT:
   2814 	case PCI_MAPREG_TYPE_MEM | PCI_MAPREG_MEM_TYPE_64BIT:
   2815 		adapter->osdep.mem_bus_space_tag = pa->pa_memt;
   2816 		if (pci_mapreg_info(pa->pa_pc, pa->pa_tag, PCI_BAR(0),
   2817 	              memtype, &addr, &adapter->osdep.mem_size, &flags) != 0)
   2818 			goto map_err;
   2819 		if ((flags & BUS_SPACE_MAP_PREFETCHABLE) != 0) {
   2820 			aprint_normal_dev(dev, "clearing prefetchable bit\n");
   2821 			flags &= ~BUS_SPACE_MAP_PREFETCHABLE;
   2822 		}
   2823 		if (bus_space_map(adapter->osdep.mem_bus_space_tag, addr,
   2824 		     adapter->osdep.mem_size, flags,
   2825 		     &adapter->osdep.mem_bus_space_handle) != 0) {
   2826 map_err:
   2827 			adapter->osdep.mem_size = 0;
   2828 			aprint_error_dev(dev, "unable to map BAR0\n");
   2829 			return ENXIO;
   2830 		}
   2831 		break;
   2832 	default:
   2833 		aprint_error_dev(dev, "unexpected type on BAR0\n");
   2834 		return ENXIO;
   2835 	}
   2836 
   2837 	/* Legacy defaults */
   2838 	adapter->num_queues = 1;
   2839 	adapter->hw.back = &adapter->osdep;
   2840 
   2841 	/*
   2842 	** Now setup MSI or MSI/X, should
   2843 	** return us the number of supported
   2844 	** vectors. (Will be 1 for MSI)
   2845 	*/
   2846 	adapter->msix = ixgbe_setup_msix(adapter);
   2847 	return (0);
   2848 }
   2849 
   2850 static void
   2851 ixgbe_free_pci_resources(struct adapter * adapter)
   2852 {
   2853 #if defined(NETBSD_MSI_OR_MSIX)
   2854 	struct 		ix_queue *que = adapter->queues;
   2855 	device_t	dev = adapter->dev;
   2856 #endif
   2857 	int		rid;
   2858 
   2859 #if defined(NETBSD_MSI_OR_MSIX)
   2860 	int		 memrid;
   2861 	if (adapter->hw.mac.type == ixgbe_mac_82598EB)
   2862 		memrid = PCI_BAR(MSIX_82598_BAR);
   2863 	else
   2864 		memrid = PCI_BAR(MSIX_82599_BAR);
   2865 
   2866 	/*
   2867 	** There is a slight possibility of a failure mode
   2868 	** in attach that will result in entering this function
   2869 	** before interrupt resources have been initialized, and
   2870 	** in that case we do not want to execute the loops below
   2871 	** We can detect this reliably by the state of the adapter
   2872 	** res pointer.
   2873 	*/
   2874 	if (adapter->res == NULL)
   2875 		goto mem;
   2876 
   2877 	/*
   2878 	**  Release all msix queue resources:
   2879 	*/
   2880 	for (int i = 0; i < adapter->num_queues; i++, que++) {
   2881 		rid = que->msix + 1;
   2882 		if (que->tag != NULL) {
   2883 			bus_teardown_intr(dev, que->res, que->tag);
   2884 			que->tag = NULL;
   2885 		}
   2886 		if (que->res != NULL)
   2887 			bus_release_resource(dev, SYS_RES_IRQ, rid, que->res);
   2888 	}
   2889 #endif
   2890 
   2891 	/* Clean the Legacy or Link interrupt last */
   2892 	if (adapter->linkvec) /* we are doing MSIX */
   2893 		rid = adapter->linkvec + 1;
   2894 	else
   2895 		(adapter->msix != 0) ? (rid = 1):(rid = 0);
   2896 
   2897 	if (adapter->osdep.intr != NULL)
   2898 		pci_intr_disestablish(adapter->osdep.pc, adapter->osdep.intr);
   2899 	adapter->osdep.intr = NULL;
   2900 
   2901 #if defined(NETBSD_MSI_OR_MSIX)
   2902 mem:
   2903 	if (adapter->msix)
   2904 		pci_release_msi(dev);
   2905 
   2906 	if (adapter->msix_mem != NULL)
   2907 		bus_release_resource(dev, SYS_RES_MEMORY,
   2908 		    memrid, adapter->msix_mem);
   2909 #endif
   2910 
   2911 	if (adapter->osdep.mem_size != 0) {
   2912 		bus_space_unmap(adapter->osdep.mem_bus_space_tag,
   2913 		    adapter->osdep.mem_bus_space_handle,
   2914 		    adapter->osdep.mem_size);
   2915 	}
   2916 
   2917 	return;
   2918 }
   2919 
   2920 /*********************************************************************
   2921  *
   2922  *  Setup networking device structure and register an interface.
   2923  *
   2924  **********************************************************************/
   2925 static int
   2926 ixgbe_setup_interface(device_t dev, struct adapter *adapter)
   2927 {
   2928 	struct ethercom *ec = &adapter->osdep.ec;
   2929 	struct ixgbe_hw *hw = &adapter->hw;
   2930 	struct ifnet   *ifp;
   2931 
   2932 	INIT_DEBUGOUT("ixgbe_setup_interface: begin");
   2933 
   2934 	ifp = adapter->ifp = &ec->ec_if;
   2935 	strlcpy(ifp->if_xname, device_xname(dev), IFNAMSIZ);
   2936 	ifp->if_baudrate = IF_Gbps(10);
   2937 	ifp->if_init = ixgbe_init;
   2938 	ifp->if_stop = ixgbe_ifstop;
   2939 	ifp->if_softc = adapter;
   2940 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
   2941 	ifp->if_ioctl = ixgbe_ioctl;
   2942 #ifndef IXGBE_LEGACY_TX
   2943 	ifp->if_transmit = ixgbe_mq_start;
   2944 	ifp->if_qflush = ixgbe_qflush;
   2945 #else
   2946 	ifp->if_start = ixgbe_start;
   2947 	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 2);
   2948 #if 0
   2949 	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 2;
   2950 #endif
   2951 	IFQ_SET_READY(&ifp->if_snd);
   2952 #endif
   2953 
   2954 	if_attach(ifp);
   2955 	ether_ifattach(ifp, adapter->hw.mac.addr);
   2956 	ether_set_ifflags_cb(ec, ixgbe_ifflags_cb);
   2957 
   2958 	adapter->max_frame_size =
   2959 	    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
   2960 
   2961 	/*
   2962 	 * Tell the upper layer(s) we support long frames.
   2963 	 */
   2964 	ifp->if_hdrlen = sizeof(struct ether_vlan_header);
   2965 
   2966 	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSOv4 | IFCAP_TSOv6;
   2967 	ifp->if_capenable = 0;
   2968 
   2969 	ec->ec_capabilities |= ETHERCAP_VLAN_HWCSUM;
   2970 	ec->ec_capabilities |= ETHERCAP_JUMBO_MTU;
   2971 	ifp->if_capabilities |= IFCAP_LRO;
   2972 	ec->ec_capabilities |= ETHERCAP_VLAN_HWTAGGING
   2973 	    		    | ETHERCAP_VLAN_MTU;
   2974 	ec->ec_capenable = ec->ec_capabilities;
   2975 
   2976 	/*
   2977 	** Don't turn this on by default, if vlans are
   2978 	** created on another pseudo device (eg. lagg)
   2979 	** then vlan events are not passed thru, breaking
   2980 	** operation, but with HW FILTER off it works. If
   2981 	** using vlans directly on the ixgbe driver you can
   2982 	** enable this and get full hardware tag filtering.
   2983 	*/
   2984 	ec->ec_capabilities |= ETHERCAP_VLAN_HWFILTER;
   2985 
   2986 	/*
   2987 	 * Specify the media types supported by this adapter and register
   2988 	 * callbacks to update media and link information
   2989 	 */
   2990 	ifmedia_init(&adapter->media, IFM_IMASK, ixgbe_media_change,
   2991 		     ixgbe_media_status);
   2992 	ifmedia_add(&adapter->media, IFM_ETHER | adapter->optics, 0, NULL);
   2993 	ifmedia_set(&adapter->media, IFM_ETHER | adapter->optics);
   2994 	if (hw->device_id == IXGBE_DEV_ID_82598AT) {
   2995 		ifmedia_add(&adapter->media,
   2996 		    IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
   2997 		ifmedia_add(&adapter->media,
   2998 		    IFM_ETHER | IFM_1000_T, 0, NULL);
   2999 	}
   3000 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
   3001 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
   3002 
   3003 	return (0);
   3004 }
   3005 
   3006 static void
   3007 ixgbe_config_link(struct adapter *adapter)
   3008 {
   3009 	struct ixgbe_hw *hw = &adapter->hw;
   3010 	u32	autoneg, err = 0;
   3011 	bool	sfp, negotiate;
   3012 
   3013 	sfp = ixgbe_is_sfp(hw);
   3014 
   3015 	if (sfp) {
   3016 		void *ip;
   3017 
   3018 		if (hw->phy.multispeed_fiber) {
   3019 			hw->mac.ops.setup_sfp(hw);
   3020 			ixgbe_enable_tx_laser(hw);
   3021 			ip = adapter->msf_si;
   3022 		} else {
   3023 			ip = adapter->mod_si;
   3024 		}
   3025 
   3026 		kpreempt_disable();
   3027 		softint_schedule(ip);
   3028 		kpreempt_enable();
   3029 	} else {
   3030 		if (hw->mac.ops.check_link)
   3031 			err = ixgbe_check_link(hw, &adapter->link_speed,
   3032 			    &adapter->link_up, FALSE);
   3033 		if (err)
   3034 			goto out;
   3035 		autoneg = hw->phy.autoneg_advertised;
   3036 		if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
   3037                 	err  = hw->mac.ops.get_link_capabilities(hw,
   3038 			    &autoneg, &negotiate);
   3039 		else
   3040 			negotiate = 0;
   3041 		if (err)
   3042 			goto out;
   3043 		if (hw->mac.ops.setup_link)
   3044                 	err = hw->mac.ops.setup_link(hw,
   3045 			    autoneg, adapter->link_up);
   3046 	}
   3047 out:
   3048 	return;
   3049 }
   3050 
   3051 /********************************************************************
   3052  * Manage DMA'able memory.
   3053  *******************************************************************/
   3054 
   3055 static int
   3056 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
   3057 		struct ixgbe_dma_alloc *dma, const int mapflags)
   3058 {
   3059 	device_t dev = adapter->dev;
   3060 	int             r, rsegs;
   3061 
   3062 	r = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
   3063 			       DBA_ALIGN, 0,	/* alignment, bounds */
   3064 			       size,	/* maxsize */
   3065 			       1,	/* nsegments */
   3066 			       size,	/* maxsegsize */
   3067 			       BUS_DMA_ALLOCNOW,	/* flags */
   3068 			       &dma->dma_tag);
   3069 	if (r != 0) {
   3070 		aprint_error_dev(dev,
   3071 		    "%s: ixgbe_dma_tag_create failed; error %d\n", __func__, r);
   3072 		goto fail_0;
   3073 	}
   3074 
   3075 	r = bus_dmamem_alloc(dma->dma_tag->dt_dmat,
   3076 		size,
   3077 		dma->dma_tag->dt_alignment,
   3078 		dma->dma_tag->dt_boundary,
   3079 		&dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
   3080 	if (r != 0) {
   3081 		aprint_error_dev(dev,
   3082 		    "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
   3083 		goto fail_1;
   3084 	}
   3085 
   3086 	r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
   3087 	    size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
   3088 	if (r != 0) {
   3089 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   3090 		    __func__, r);
   3091 		goto fail_2;
   3092 	}
   3093 
   3094 	r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
   3095 	if (r != 0) {
   3096 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   3097 		    __func__, r);
   3098 		goto fail_3;
   3099 	}
   3100 
   3101 	r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map, dma->dma_vaddr,
   3102 			    size,
   3103 			    NULL,
   3104 			    mapflags | BUS_DMA_NOWAIT);
   3105 	if (r != 0) {
   3106 		aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
   3107 		    __func__, r);
   3108 		goto fail_4;
   3109 	}
   3110 	dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
   3111 	dma->dma_size = size;
   3112 	return 0;
   3113 fail_4:
   3114 	ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
   3115 fail_3:
   3116 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
   3117 fail_2:
   3118 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
   3119 fail_1:
   3120 	ixgbe_dma_tag_destroy(dma->dma_tag);
   3121 fail_0:
   3122 	return r;
   3123 }
   3124 
   3125 static void
   3126 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
   3127 {
   3128 	bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
   3129 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   3130 	ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
   3131 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
   3132 	ixgbe_dma_tag_destroy(dma->dma_tag);
   3133 }
   3134 
   3135 
   3136 /*********************************************************************
   3137  *
   3138  *  Allocate memory for the transmit and receive rings, and then
   3139  *  the descriptors associated with each, called only once at attach.
   3140  *
   3141  **********************************************************************/
   3142 static int
   3143 ixgbe_allocate_queues(struct adapter *adapter)
   3144 {
   3145 	device_t	dev = adapter->dev;
   3146 	struct ix_queue	*que;
   3147 	struct tx_ring	*txr;
   3148 	struct rx_ring	*rxr;
   3149 	int rsize, tsize, error = IXGBE_SUCCESS;
   3150 	int txconf = 0, rxconf = 0;
   3151 
   3152         /* First allocate the top level queue structs */
   3153         if (!(adapter->queues =
   3154             (struct ix_queue *) malloc(sizeof(struct ix_queue) *
   3155             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   3156                 aprint_error_dev(dev, "Unable to allocate queue memory\n");
   3157                 error = ENOMEM;
   3158                 goto fail;
   3159         }
   3160 
   3161 	/* First allocate the TX ring struct memory */
   3162 	if (!(adapter->tx_rings =
   3163 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
   3164 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   3165 		aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
   3166 		error = ENOMEM;
   3167 		goto tx_fail;
   3168 	}
   3169 
   3170 	/* Next allocate the RX */
   3171 	if (!(adapter->rx_rings =
   3172 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
   3173 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   3174 		aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
   3175 		error = ENOMEM;
   3176 		goto rx_fail;
   3177 	}
   3178 
   3179 	/* For the ring itself */
   3180 	tsize = roundup2(adapter->num_tx_desc *
   3181 	    sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
   3182 
   3183 	/*
   3184 	 * Now set up the TX queues, txconf is needed to handle the
   3185 	 * possibility that things fail midcourse and we need to
   3186 	 * undo memory gracefully
   3187 	 */
   3188 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
   3189 		/* Set up some basics */
   3190 		txr = &adapter->tx_rings[i];
   3191 		txr->adapter = adapter;
   3192 		txr->me = i;
   3193 		txr->num_desc = adapter->num_tx_desc;
   3194 
   3195 		/* Initialize the TX side lock */
   3196 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
   3197 		    device_xname(dev), txr->me);
   3198 		mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
   3199 
   3200 		if (ixgbe_dma_malloc(adapter, tsize,
   3201 			&txr->txdma, BUS_DMA_NOWAIT)) {
   3202 			aprint_error_dev(dev,
   3203 			    "Unable to allocate TX Descriptor memory\n");
   3204 			error = ENOMEM;
   3205 			goto err_tx_desc;
   3206 		}
   3207 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
   3208 		bzero((void *)txr->tx_base, tsize);
   3209 
   3210         	/* Now allocate transmit buffers for the ring */
   3211         	if (ixgbe_allocate_transmit_buffers(txr)) {
   3212 			aprint_error_dev(dev,
   3213 			    "Critical Failure setting up transmit buffers\n");
   3214 			error = ENOMEM;
   3215 			goto err_tx_desc;
   3216         	}
   3217 #ifndef IXGBE_LEGACY_TX
   3218 		/* Allocate a buf ring */
   3219 		txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
   3220 		    M_WAITOK, &txr->tx_mtx);
   3221 		if (txr->br == NULL) {
   3222 			aprint_error_dev(dev,
   3223 			    "Critical Failure setting up buf ring\n");
   3224 			error = ENOMEM;
   3225 			goto err_tx_desc;
   3226         	}
   3227 #endif
   3228 	}
   3229 
   3230 	/*
   3231 	 * Next the RX queues...
   3232 	 */
   3233 	rsize = roundup2(adapter->num_rx_desc *
   3234 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   3235 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
   3236 		rxr = &adapter->rx_rings[i];
   3237 		/* Set up some basics */
   3238 		rxr->adapter = adapter;
   3239 		rxr->me = i;
   3240 		rxr->num_desc = adapter->num_rx_desc;
   3241 
   3242 		/* Initialize the RX side lock */
   3243 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
   3244 		    device_xname(dev), rxr->me);
   3245 		mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
   3246 
   3247 		if (ixgbe_dma_malloc(adapter, rsize,
   3248 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
   3249 			aprint_error_dev(dev,
   3250 			    "Unable to allocate RxDescriptor memory\n");
   3251 			error = ENOMEM;
   3252 			goto err_rx_desc;
   3253 		}
   3254 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
   3255 		bzero((void *)rxr->rx_base, rsize);
   3256 
   3257         	/* Allocate receive buffers for the ring*/
   3258 		if (ixgbe_allocate_receive_buffers(rxr)) {
   3259 			aprint_error_dev(dev,
   3260 			    "Critical Failure setting up receive buffers\n");
   3261 			error = ENOMEM;
   3262 			goto err_rx_desc;
   3263 		}
   3264 	}
   3265 
   3266 	/*
   3267 	** Finally set up the queue holding structs
   3268 	*/
   3269 	for (int i = 0; i < adapter->num_queues; i++) {
   3270 		que = &adapter->queues[i];
   3271 		que->adapter = adapter;
   3272 		que->txr = &adapter->tx_rings[i];
   3273 		que->rxr = &adapter->rx_rings[i];
   3274 	}
   3275 
   3276 	return (0);
   3277 
   3278 err_rx_desc:
   3279 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
   3280 		ixgbe_dma_free(adapter, &rxr->rxdma);
   3281 err_tx_desc:
   3282 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
   3283 		ixgbe_dma_free(adapter, &txr->txdma);
   3284 	free(adapter->rx_rings, M_DEVBUF);
   3285 rx_fail:
   3286 	free(adapter->tx_rings, M_DEVBUF);
   3287 tx_fail:
   3288 	free(adapter->queues, M_DEVBUF);
   3289 fail:
   3290 	return (error);
   3291 }
   3292 
   3293 /*********************************************************************
   3294  *
   3295  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
   3296  *  the information needed to transmit a packet on the wire. This is
   3297  *  called only once at attach, setup is done every reset.
   3298  *
   3299  **********************************************************************/
   3300 static int
   3301 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
   3302 {
   3303 	struct adapter *adapter = txr->adapter;
   3304 	device_t dev = adapter->dev;
   3305 	struct ixgbe_tx_buf *txbuf;
   3306 	int error, i;
   3307 
   3308 	/*
   3309 	 * Setup DMA descriptor areas.
   3310 	 */
   3311 	if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
   3312 			       1, 0,		/* alignment, bounds */
   3313 			       IXGBE_TSO_SIZE,		/* maxsize */
   3314 			       adapter->num_segs,	/* nsegments */
   3315 			       PAGE_SIZE,		/* maxsegsize */
   3316 			       0,			/* flags */
   3317 			       &txr->txtag))) {
   3318 		aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
   3319 		goto fail;
   3320 	}
   3321 
   3322 	if (!(txr->tx_buffers =
   3323 	    (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
   3324 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   3325 		aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
   3326 		error = ENOMEM;
   3327 		goto fail;
   3328 	}
   3329 
   3330         /* Create the descriptor buffer dma maps */
   3331 	txbuf = txr->tx_buffers;
   3332 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
   3333 		error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
   3334 		if (error != 0) {
   3335 			aprint_error_dev(dev,
   3336 			    "Unable to create TX DMA map (%d)\n", error);
   3337 			goto fail;
   3338 		}
   3339 	}
   3340 
   3341 	return 0;
   3342 fail:
   3343 	/* We free all, it handles case where we are in the middle */
   3344 	ixgbe_free_transmit_structures(adapter);
   3345 	return (error);
   3346 }
   3347 
   3348 /*********************************************************************
   3349  *
   3350  *  Initialize a transmit ring.
   3351  *
   3352  **********************************************************************/
   3353 static void
   3354 ixgbe_setup_transmit_ring(struct tx_ring *txr)
   3355 {
   3356 	struct adapter *adapter = txr->adapter;
   3357 	struct ixgbe_tx_buf *txbuf;
   3358 	int i;
   3359 #ifdef DEV_NETMAP
   3360 	struct netmap_adapter *na = NA(adapter->ifp);
   3361 	struct netmap_slot *slot;
   3362 #endif /* DEV_NETMAP */
   3363 
   3364 	/* Clear the old ring contents */
   3365 	IXGBE_TX_LOCK(txr);
   3366 #ifdef DEV_NETMAP
   3367 	/*
   3368 	 * (under lock): if in netmap mode, do some consistency
   3369 	 * checks and set slot to entry 0 of the netmap ring.
   3370 	 */
   3371 	slot = netmap_reset(na, NR_TX, txr->me, 0);
   3372 #endif /* DEV_NETMAP */
   3373 	bzero((void *)txr->tx_base,
   3374 	      (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
   3375 	/* Reset indices */
   3376 	txr->next_avail_desc = 0;
   3377 	txr->next_to_clean = 0;
   3378 
   3379 	/* Free any existing tx buffers. */
   3380         txbuf = txr->tx_buffers;
   3381 	for (i = 0; i < txr->num_desc; i++, txbuf++) {
   3382 		if (txbuf->m_head != NULL) {
   3383 			bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
   3384 			    0, txbuf->m_head->m_pkthdr.len,
   3385 			    BUS_DMASYNC_POSTWRITE);
   3386 			ixgbe_dmamap_unload(txr->txtag, txbuf->map);
   3387 			m_freem(txbuf->m_head);
   3388 			txbuf->m_head = NULL;
   3389 		}
   3390 #ifdef DEV_NETMAP
   3391 		/*
   3392 		 * In netmap mode, set the map for the packet buffer.
   3393 		 * NOTE: Some drivers (not this one) also need to set
   3394 		 * the physical buffer address in the NIC ring.
   3395 		 * Slots in the netmap ring (indexed by "si") are
   3396 		 * kring->nkr_hwofs positions "ahead" wrt the
   3397 		 * corresponding slot in the NIC ring. In some drivers
   3398 		 * (not here) nkr_hwofs can be negative. Function
   3399 		 * netmap_idx_n2k() handles wraparounds properly.
   3400 		 */
   3401 		if (slot) {
   3402 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
   3403 			netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
   3404 		}
   3405 #endif /* DEV_NETMAP */
   3406 		/* Clear the EOP descriptor pointer */
   3407 		txbuf->eop = NULL;
   3408         }
   3409 
   3410 #ifdef IXGBE_FDIR
   3411 	/* Set the rate at which we sample packets */
   3412 	if (adapter->hw.mac.type != ixgbe_mac_82598EB)
   3413 		txr->atr_sample = atr_sample_rate;
   3414 #endif
   3415 
   3416 	/* Set number of descriptors available */
   3417 	txr->tx_avail = adapter->num_tx_desc;
   3418 
   3419 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   3420 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   3421 	IXGBE_TX_UNLOCK(txr);
   3422 }
   3423 
   3424 /*********************************************************************
   3425  *
   3426  *  Initialize all transmit rings.
   3427  *
   3428  **********************************************************************/
   3429 static int
   3430 ixgbe_setup_transmit_structures(struct adapter *adapter)
   3431 {
   3432 	struct tx_ring *txr = adapter->tx_rings;
   3433 
   3434 	for (int i = 0; i < adapter->num_queues; i++, txr++)
   3435 		ixgbe_setup_transmit_ring(txr);
   3436 
   3437 	return (0);
   3438 }
   3439 
   3440 /*********************************************************************
   3441  *
   3442  *  Enable transmit unit.
   3443  *
   3444  **********************************************************************/
   3445 static void
   3446 ixgbe_initialize_transmit_units(struct adapter *adapter)
   3447 {
   3448 	struct tx_ring	*txr = adapter->tx_rings;
   3449 	struct ixgbe_hw	*hw = &adapter->hw;
   3450 
   3451 	/* Setup the Base and Length of the Tx Descriptor Ring */
   3452 
   3453 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
   3454 		u64	tdba = txr->txdma.dma_paddr;
   3455 		u32	txctrl;
   3456 
   3457 		IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i),
   3458 		       (tdba & 0x00000000ffffffffULL));
   3459 		IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (tdba >> 32));
   3460 		IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
   3461 		    adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc));
   3462 
   3463 		/* Setup the HW Tx Head and Tail descriptor pointers */
   3464 		IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
   3465 		IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
   3466 
   3467 		/* Setup Transmit Descriptor Cmd Settings */
   3468 		txr->txd_cmd = IXGBE_TXD_CMD_IFCS;
   3469 		txr->queue_status = IXGBE_QUEUE_IDLE;
   3470 
   3471 		/* Set the processing limit */
   3472 		txr->process_limit = ixgbe_tx_process_limit;
   3473 
   3474 		/* Disable Head Writeback */
   3475 		switch (hw->mac.type) {
   3476 		case ixgbe_mac_82598EB:
   3477 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
   3478 			break;
   3479 		case ixgbe_mac_82599EB:
   3480 		case ixgbe_mac_X540:
   3481 		default:
   3482 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
   3483 			break;
   3484                 }
   3485 		txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
   3486 		switch (hw->mac.type) {
   3487 		case ixgbe_mac_82598EB:
   3488 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
   3489 			break;
   3490 		case ixgbe_mac_82599EB:
   3491 		case ixgbe_mac_X540:
   3492 		default:
   3493 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
   3494 			break;
   3495 		}
   3496 
   3497 	}
   3498 
   3499 	if (hw->mac.type != ixgbe_mac_82598EB) {
   3500 		u32 dmatxctl, rttdcs;
   3501 		dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
   3502 		dmatxctl |= IXGBE_DMATXCTL_TE;
   3503 		IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
   3504 		/* Disable arbiter to set MTQC */
   3505 		rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
   3506 		rttdcs |= IXGBE_RTTDCS_ARBDIS;
   3507 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
   3508 		IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
   3509 		rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
   3510 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
   3511 	}
   3512 
   3513 	return;
   3514 }
   3515 
   3516 /*********************************************************************
   3517  *
   3518  *  Free all transmit rings.
   3519  *
   3520  **********************************************************************/
   3521 static void
   3522 ixgbe_free_transmit_structures(struct adapter *adapter)
   3523 {
   3524 	struct tx_ring *txr = adapter->tx_rings;
   3525 
   3526 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
   3527 		ixgbe_free_transmit_buffers(txr);
   3528 		ixgbe_dma_free(adapter, &txr->txdma);
   3529 		IXGBE_TX_LOCK_DESTROY(txr);
   3530 	}
   3531 	free(adapter->tx_rings, M_DEVBUF);
   3532 }
   3533 
   3534 /*********************************************************************
   3535  *
   3536  *  Free transmit ring related data structures.
   3537  *
   3538  **********************************************************************/
   3539 static void
   3540 ixgbe_free_transmit_buffers(struct tx_ring *txr)
   3541 {
   3542 	struct adapter *adapter = txr->adapter;
   3543 	struct ixgbe_tx_buf *tx_buffer;
   3544 	int             i;
   3545 
   3546 	INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
   3547 
   3548 	if (txr->tx_buffers == NULL)
   3549 		return;
   3550 
   3551 	tx_buffer = txr->tx_buffers;
   3552 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
   3553 		if (tx_buffer->m_head != NULL) {
   3554 			bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
   3555 			    0, tx_buffer->m_head->m_pkthdr.len,
   3556 			    BUS_DMASYNC_POSTWRITE);
   3557 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
   3558 			m_freem(tx_buffer->m_head);
   3559 			tx_buffer->m_head = NULL;
   3560 			if (tx_buffer->map != NULL) {
   3561 				ixgbe_dmamap_destroy(txr->txtag,
   3562 				    tx_buffer->map);
   3563 				tx_buffer->map = NULL;
   3564 			}
   3565 		} else if (tx_buffer->map != NULL) {
   3566 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
   3567 			ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
   3568 			tx_buffer->map = NULL;
   3569 		}
   3570 	}
   3571 #ifndef IXGBE_LEGACY_TX
   3572 	if (txr->br != NULL)
   3573 		buf_ring_free(txr->br, M_DEVBUF);
   3574 #endif
   3575 	if (txr->tx_buffers != NULL) {
   3576 		free(txr->tx_buffers, M_DEVBUF);
   3577 		txr->tx_buffers = NULL;
   3578 	}
   3579 	if (txr->txtag != NULL) {
   3580 		ixgbe_dma_tag_destroy(txr->txtag);
   3581 		txr->txtag = NULL;
   3582 	}
   3583 	return;
   3584 }
   3585 
   3586 /*********************************************************************
   3587  *
   3588  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
   3589  *
   3590  **********************************************************************/
   3591 
   3592 static int
   3593 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
   3594     u32 *cmd_type_len, u32 *olinfo_status)
   3595 {
   3596 	struct m_tag *mtag;
   3597 	struct adapter *adapter = txr->adapter;
   3598 	struct ethercom *ec = &adapter->osdep.ec;
   3599 	struct ixgbe_adv_tx_context_desc *TXD;
   3600 	struct ether_vlan_header *eh;
   3601 	struct ip ip;
   3602 	struct ip6_hdr ip6;
   3603 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
   3604 	int	ehdrlen, ip_hlen = 0;
   3605 	u16	etype;
   3606 	u8	ipproto __diagused = 0;
   3607 	int	offload = TRUE;
   3608 	int	ctxd = txr->next_avail_desc;
   3609 	u16	vtag = 0;
   3610 
   3611 	/* First check if TSO is to be used */
   3612 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4|M_CSUM_TSOv6))
   3613 		return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
   3614 
   3615 	if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
   3616 		offload = FALSE;
   3617 
   3618 	/* Indicate the whole packet as payload when not doing TSO */
   3619        	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
   3620 
   3621 	/* Now ready a context descriptor */
   3622 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
   3623 
   3624 	/*
   3625 	** In advanced descriptors the vlan tag must
   3626 	** be placed into the context descriptor. Hence
   3627 	** we need to make one even if not doing offloads.
   3628 	*/
   3629 	if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
   3630 		vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
   3631 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
   3632 	} else if (offload == FALSE) /* ... no offload to do */
   3633 		return 0;
   3634 
   3635 	/*
   3636 	 * Determine where frame payload starts.
   3637 	 * Jump over vlan headers if already present,
   3638 	 * helpful for QinQ too.
   3639 	 */
   3640 	KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
   3641 	eh = mtod(mp, struct ether_vlan_header *);
   3642 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   3643 		KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
   3644 		etype = ntohs(eh->evl_proto);
   3645 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   3646 	} else {
   3647 		etype = ntohs(eh->evl_encap_proto);
   3648 		ehdrlen = ETHER_HDR_LEN;
   3649 	}
   3650 
   3651 	/* Set the ether header length */
   3652 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
   3653 
   3654 	switch (etype) {
   3655 	case ETHERTYPE_IP:
   3656 		m_copydata(mp, ehdrlen, sizeof(ip), &ip);
   3657 		ip_hlen = ip.ip_hl << 2;
   3658 		ipproto = ip.ip_p;
   3659 #if 0
   3660 		ip.ip_sum = 0;
   3661 		m_copyback(mp, ehdrlen, sizeof(ip), &ip);
   3662 #else
   3663 		KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
   3664 		    ip.ip_sum == 0);
   3665 #endif
   3666 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
   3667 		break;
   3668 	case ETHERTYPE_IPV6:
   3669 		m_copydata(mp, ehdrlen, sizeof(ip6), &ip6);
   3670 		ip_hlen = sizeof(ip6);
   3671 		/* XXX-BZ this will go badly in case of ext hdrs. */
   3672 		ipproto = ip6.ip6_nxt;
   3673 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
   3674 		break;
   3675 	default:
   3676 		break;
   3677 	}
   3678 
   3679 	if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
   3680 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
   3681 
   3682 	vlan_macip_lens |= ip_hlen;
   3683 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
   3684 
   3685 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_TCPv6)) {
   3686 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
   3687 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   3688 		KASSERT(ipproto == IPPROTO_TCP);
   3689 	} else if (mp->m_pkthdr.csum_flags & (M_CSUM_UDPv4|M_CSUM_UDPv6)) {
   3690 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
   3691 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   3692 		KASSERT(ipproto == IPPROTO_UDP);
   3693 	}
   3694 
   3695 	/* Now copy bits into descriptor */
   3696 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
   3697 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
   3698 	TXD->seqnum_seed = htole32(0);
   3699 	TXD->mss_l4len_idx = htole32(0);
   3700 
   3701 	/* We've consumed the first desc, adjust counters */
   3702 	if (++ctxd == txr->num_desc)
   3703 		ctxd = 0;
   3704 	txr->next_avail_desc = ctxd;
   3705 	--txr->tx_avail;
   3706 
   3707         return 0;
   3708 }
   3709 
   3710 /**********************************************************************
   3711  *
   3712  *  Setup work for hardware segmentation offload (TSO) on
   3713  *  adapters using advanced tx descriptors
   3714  *
   3715  **********************************************************************/
   3716 static int
   3717 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
   3718     u32 *cmd_type_len, u32 *olinfo_status)
   3719 {
   3720 	struct m_tag *mtag;
   3721 	struct adapter *adapter = txr->adapter;
   3722 	struct ethercom *ec = &adapter->osdep.ec;
   3723 	struct ixgbe_adv_tx_context_desc *TXD;
   3724 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
   3725 	u32 mss_l4len_idx = 0, paylen;
   3726 	u16 vtag = 0, eh_type;
   3727 	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
   3728 	struct ether_vlan_header *eh;
   3729 #ifdef INET6
   3730 	struct ip6_hdr *ip6;
   3731 #endif
   3732 #ifdef INET
   3733 	struct ip *ip;
   3734 #endif
   3735 	struct tcphdr *th;
   3736 
   3737 
   3738 	/*
   3739 	 * Determine where frame payload starts.
   3740 	 * Jump over vlan headers if already present
   3741 	 */
   3742 	eh = mtod(mp, struct ether_vlan_header *);
   3743 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   3744 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   3745 		eh_type = eh->evl_proto;
   3746 	} else {
   3747 		ehdrlen = ETHER_HDR_LEN;
   3748 		eh_type = eh->evl_encap_proto;
   3749 	}
   3750 
   3751 	switch (ntohs(eh_type)) {
   3752 #ifdef INET6
   3753 	case ETHERTYPE_IPV6:
   3754 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   3755 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
   3756 		if (ip6->ip6_nxt != IPPROTO_TCP)
   3757 			return (ENXIO);
   3758 		ip_hlen = sizeof(struct ip6_hdr);
   3759 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   3760 		th = (struct tcphdr *)((char *)ip6 + ip_hlen);
   3761 		th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
   3762 		    &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
   3763 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
   3764 		break;
   3765 #endif
   3766 #ifdef INET
   3767 	case ETHERTYPE_IP:
   3768 		ip = (struct ip *)(mp->m_data + ehdrlen);
   3769 		if (ip->ip_p != IPPROTO_TCP)
   3770 			return (ENXIO);
   3771 		ip->ip_sum = 0;
   3772 		ip_hlen = ip->ip_hl << 2;
   3773 		th = (struct tcphdr *)((char *)ip + ip_hlen);
   3774 		th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
   3775 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
   3776 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
   3777 		/* Tell transmit desc to also do IPv4 checksum. */
   3778 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
   3779 		break;
   3780 #endif
   3781 	default:
   3782 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
   3783 		    __func__, ntohs(eh_type));
   3784 		break;
   3785 	}
   3786 
   3787 	ctxd = txr->next_avail_desc;
   3788 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
   3789 
   3790 	tcp_hlen = th->th_off << 2;
   3791 
   3792 	/* This is used in the transmit desc in encap */
   3793 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
   3794 
   3795 	/* VLAN MACLEN IPLEN */
   3796 	if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
   3797 		vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
   3798                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
   3799 	}
   3800 
   3801 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
   3802 	vlan_macip_lens |= ip_hlen;
   3803 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
   3804 
   3805 	/* ADV DTYPE TUCMD */
   3806 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
   3807 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
   3808 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
   3809 
   3810 	/* MSS L4LEN IDX */
   3811 	mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
   3812 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
   3813 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
   3814 
   3815 	TXD->seqnum_seed = htole32(0);
   3816 
   3817 	if (++ctxd == txr->num_desc)
   3818 		ctxd = 0;
   3819 
   3820 	txr->tx_avail--;
   3821 	txr->next_avail_desc = ctxd;
   3822 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
   3823 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   3824 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
   3825 	++txr->tso_tx.ev_count;
   3826 	return (0);
   3827 }
   3828 
   3829 #ifdef IXGBE_FDIR
   3830 /*
   3831 ** This routine parses packet headers so that Flow
   3832 ** Director can make a hashed filter table entry
   3833 ** allowing traffic flows to be identified and kept
   3834 ** on the same cpu.  This would be a performance
   3835 ** hit, but we only do it at IXGBE_FDIR_RATE of
   3836 ** packets.
   3837 */
   3838 static void
   3839 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
   3840 {
   3841 	struct adapter			*adapter = txr->adapter;
   3842 	struct ix_queue			*que;
   3843 	struct ip			*ip;
   3844 	struct tcphdr			*th;
   3845 	struct udphdr			*uh;
   3846 	struct ether_vlan_header	*eh;
   3847 	union ixgbe_atr_hash_dword	input = {.dword = 0};
   3848 	union ixgbe_atr_hash_dword	common = {.dword = 0};
   3849 	int  				ehdrlen, ip_hlen;
   3850 	u16				etype;
   3851 
   3852 	eh = mtod(mp, struct ether_vlan_header *);
   3853 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   3854 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   3855 		etype = eh->evl_proto;
   3856 	} else {
   3857 		ehdrlen = ETHER_HDR_LEN;
   3858 		etype = eh->evl_encap_proto;
   3859 	}
   3860 
   3861 	/* Only handling IPv4 */
   3862 	if (etype != htons(ETHERTYPE_IP))
   3863 		return;
   3864 
   3865 	ip = (struct ip *)(mp->m_data + ehdrlen);
   3866 	ip_hlen = ip->ip_hl << 2;
   3867 
   3868 	/* check if we're UDP or TCP */
   3869 	switch (ip->ip_p) {
   3870 	case IPPROTO_TCP:
   3871 		th = (struct tcphdr *)((char *)ip + ip_hlen);
   3872 		/* src and dst are inverted */
   3873 		common.port.dst ^= th->th_sport;
   3874 		common.port.src ^= th->th_dport;
   3875 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
   3876 		break;
   3877 	case IPPROTO_UDP:
   3878 		uh = (struct udphdr *)((char *)ip + ip_hlen);
   3879 		/* src and dst are inverted */
   3880 		common.port.dst ^= uh->uh_sport;
   3881 		common.port.src ^= uh->uh_dport;
   3882 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
   3883 		break;
   3884 	default:
   3885 		return;
   3886 	}
   3887 
   3888 	input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
   3889 	if (mp->m_pkthdr.ether_vtag)
   3890 		common.flex_bytes ^= htons(ETHERTYPE_VLAN);
   3891 	else
   3892 		common.flex_bytes ^= etype;
   3893 	common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
   3894 
   3895 	que = &adapter->queues[txr->me];
   3896 	/*
   3897 	** This assumes the Rx queue and Tx
   3898 	** queue are bound to the same CPU
   3899 	*/
   3900 	ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
   3901 	    input, common, que->msix);
   3902 }
   3903 #endif /* IXGBE_FDIR */
   3904 
   3905 /**********************************************************************
   3906  *
   3907  *  Examine each tx_buffer in the used queue. If the hardware is done
   3908  *  processing the packet then free associated resources. The
   3909  *  tx_buffer is put back on the free queue.
   3910  *
   3911  **********************************************************************/
   3912 static void
   3913 ixgbe_txeof(struct tx_ring *txr)
   3914 {
   3915 	struct adapter		*adapter = txr->adapter;
   3916 	struct ifnet		*ifp = adapter->ifp;
   3917 	u32			work, processed = 0;
   3918 	u16			limit = txr->process_limit;
   3919 	struct ixgbe_tx_buf	*buf;
   3920 	union ixgbe_adv_tx_desc *txd;
   3921 	struct timeval now, elapsed;
   3922 
   3923 	KASSERT(mutex_owned(&txr->tx_mtx));
   3924 
   3925 #ifdef DEV_NETMAP
   3926 	if (ifp->if_capenable & IFCAP_NETMAP) {
   3927 		struct netmap_adapter *na = NA(ifp);
   3928 		struct netmap_kring *kring = &na->tx_rings[txr->me];
   3929 		txd = txr->tx_base;
   3930 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   3931 		    BUS_DMASYNC_POSTREAD);
   3932 		/*
   3933 		 * In netmap mode, all the work is done in the context
   3934 		 * of the client thread. Interrupt handlers only wake up
   3935 		 * clients, which may be sleeping on individual rings
   3936 		 * or on a global resource for all rings.
   3937 		 * To implement tx interrupt mitigation, we wake up the client
   3938 		 * thread roughly every half ring, even if the NIC interrupts
   3939 		 * more frequently. This is implemented as follows:
   3940 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
   3941 		 *   the slot that should wake up the thread (nkr_num_slots
   3942 		 *   means the user thread should not be woken up);
   3943 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
   3944 		 *   or the slot has the DD bit set.
   3945 		 */
   3946 		if (!netmap_mitigate ||
   3947 		    (kring->nr_kflags < kring->nkr_num_slots &&
   3948 		    txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
   3949 			netmap_tx_irq(ifp, txr->me);
   3950 		}
   3951 		return;
   3952 	}
   3953 #endif /* DEV_NETMAP */
   3954 
   3955 	if (txr->tx_avail == txr->num_desc) {
   3956 		txr->queue_status = IXGBE_QUEUE_IDLE;
   3957 		return;
   3958 	}
   3959 
   3960 	/* Get work starting point */
   3961 	work = txr->next_to_clean;
   3962 	buf = &txr->tx_buffers[work];
   3963 	txd = &txr->tx_base[work];
   3964 	work -= txr->num_desc; /* The distance to ring end */
   3965         ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   3966 	    BUS_DMASYNC_POSTREAD);
   3967 	do {
   3968 		union ixgbe_adv_tx_desc *eop= buf->eop;
   3969 		if (eop == NULL) /* No work */
   3970 			break;
   3971 
   3972 		if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
   3973 			break;	/* I/O not complete */
   3974 
   3975 		if (buf->m_head) {
   3976 			txr->bytes +=
   3977 			    buf->m_head->m_pkthdr.len;
   3978 			bus_dmamap_sync(txr->txtag->dt_dmat,
   3979 			    buf->map,
   3980 			    0, buf->m_head->m_pkthdr.len,
   3981 			    BUS_DMASYNC_POSTWRITE);
   3982 			ixgbe_dmamap_unload(txr->txtag,
   3983 			    buf->map);
   3984 			m_freem(buf->m_head);
   3985 			buf->m_head = NULL;
   3986 			/*
   3987 			 * NetBSD: Don't override buf->map with NULL here.
   3988 			 * It'll panic when a ring runs one lap around.
   3989 			 */
   3990 		}
   3991 		buf->eop = NULL;
   3992 		++txr->tx_avail;
   3993 
   3994 		/* We clean the range if multi segment */
   3995 		while (txd != eop) {
   3996 			++txd;
   3997 			++buf;
   3998 			++work;
   3999 			/* wrap the ring? */
   4000 			if (__predict_false(!work)) {
   4001 				work -= txr->num_desc;
   4002 				buf = txr->tx_buffers;
   4003 				txd = txr->tx_base;
   4004 			}
   4005 			if (buf->m_head) {
   4006 				txr->bytes +=
   4007 				    buf->m_head->m_pkthdr.len;
   4008 				bus_dmamap_sync(txr->txtag->dt_dmat,
   4009 				    buf->map,
   4010 				    0, buf->m_head->m_pkthdr.len,
   4011 				    BUS_DMASYNC_POSTWRITE);
   4012 				ixgbe_dmamap_unload(txr->txtag,
   4013 				    buf->map);
   4014 				m_freem(buf->m_head);
   4015 				buf->m_head = NULL;
   4016 				/*
   4017 				 * NetBSD: Don't override buf->map with NULL
   4018 				 * here. It'll panic when a ring runs one lap
   4019 				 * around.
   4020 				 */
   4021 			}
   4022 			++txr->tx_avail;
   4023 			buf->eop = NULL;
   4024 
   4025 		}
   4026 		++txr->packets;
   4027 		++processed;
   4028 		++ifp->if_opackets;
   4029 		getmicrotime(&txr->watchdog_time);
   4030 
   4031 		/* Try the next packet */
   4032 		++txd;
   4033 		++buf;
   4034 		++work;
   4035 		/* reset with a wrap */
   4036 		if (__predict_false(!work)) {
   4037 			work -= txr->num_desc;
   4038 			buf = txr->tx_buffers;
   4039 			txd = txr->tx_base;
   4040 		}
   4041 		prefetch(txd);
   4042 	} while (__predict_true(--limit));
   4043 
   4044 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   4045 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   4046 
   4047 	work += txr->num_desc;
   4048 	txr->next_to_clean = work;
   4049 
   4050 	/*
   4051 	** Watchdog calculation, we know there's
   4052 	** work outstanding or the first return
   4053 	** would have been taken, so none processed
   4054 	** for too long indicates a hang.
   4055 	*/
   4056 	getmicrotime(&now);
   4057 	timersub(&now, &txr->watchdog_time, &elapsed);
   4058 	if (!processed && tvtohz(&elapsed) > IXGBE_WATCHDOG)
   4059 		txr->queue_status = IXGBE_QUEUE_HUNG;
   4060 
   4061 	if (txr->tx_avail == txr->num_desc)
   4062 		txr->queue_status = IXGBE_QUEUE_IDLE;
   4063 
   4064 	return;
   4065 }
   4066 
   4067 /*********************************************************************
   4068  *
   4069  *  Refresh mbuf buffers for RX descriptor rings
   4070  *   - now keeps its own state so discards due to resource
   4071  *     exhaustion are unnecessary, if an mbuf cannot be obtained
   4072  *     it just returns, keeping its placeholder, thus it can simply
   4073  *     be recalled to try again.
   4074  *
   4075  **********************************************************************/
   4076 static void
   4077 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
   4078 {
   4079 	struct adapter		*adapter = rxr->adapter;
   4080 	struct ixgbe_rx_buf	*rxbuf;
   4081 	struct mbuf		*mp;
   4082 	int			i, j, error;
   4083 	bool			refreshed = false;
   4084 
   4085 	i = j = rxr->next_to_refresh;
   4086 	/* Control the loop with one beyond */
   4087 	if (++j == rxr->num_desc)
   4088 		j = 0;
   4089 
   4090 	while (j != limit) {
   4091 		rxbuf = &rxr->rx_buffers[i];
   4092 		if (rxbuf->buf == NULL) {
   4093 			mp = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
   4094 			    MT_DATA, M_PKTHDR, rxr->mbuf_sz);
   4095 			if (mp == NULL) {
   4096 				rxr->no_jmbuf.ev_count++;
   4097 				goto update;
   4098 			}
   4099 			if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
   4100 				m_adj(mp, ETHER_ALIGN);
   4101 		} else
   4102 			mp = rxbuf->buf;
   4103 
   4104 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   4105 
   4106 		/* If we're dealing with an mbuf that was copied rather
   4107 		 * than replaced, there's no need to go through busdma.
   4108 		 */
   4109 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
   4110 			/* Get the memory mapping */
   4111 			error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   4112 			    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   4113 			if (error != 0) {
   4114 				printf("Refresh mbufs: payload dmamap load"
   4115 				    " failure - %d\n", error);
   4116 				m_free(mp);
   4117 				rxbuf->buf = NULL;
   4118 				goto update;
   4119 			}
   4120 			rxbuf->buf = mp;
   4121 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   4122 			    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
   4123 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
   4124 			    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   4125 		} else {
   4126 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
   4127 			rxbuf->flags &= ~IXGBE_RX_COPY;
   4128 		}
   4129 
   4130 		refreshed = true;
   4131 		/* Next is precalculated */
   4132 		i = j;
   4133 		rxr->next_to_refresh = i;
   4134 		if (++j == rxr->num_desc)
   4135 			j = 0;
   4136 	}
   4137 update:
   4138 	if (refreshed) /* Update hardware tail index */
   4139 		IXGBE_WRITE_REG(&adapter->hw,
   4140 		    IXGBE_RDT(rxr->me), rxr->next_to_refresh);
   4141 	return;
   4142 }
   4143 
   4144 /*********************************************************************
   4145  *
   4146  *  Allocate memory for rx_buffer structures. Since we use one
   4147  *  rx_buffer per received packet, the maximum number of rx_buffer's
   4148  *  that we'll need is equal to the number of receive descriptors
   4149  *  that we've allocated.
   4150  *
   4151  **********************************************************************/
   4152 static int
   4153 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
   4154 {
   4155 	struct	adapter 	*adapter = rxr->adapter;
   4156 	device_t 		dev = adapter->dev;
   4157 	struct ixgbe_rx_buf 	*rxbuf;
   4158 	int             	i, bsize, error;
   4159 
   4160 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
   4161 	if (!(rxr->rx_buffers =
   4162 	    (struct ixgbe_rx_buf *) malloc(bsize,
   4163 	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
   4164 		aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
   4165 		error = ENOMEM;
   4166 		goto fail;
   4167 	}
   4168 
   4169 	if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
   4170 				   1, 0,	/* alignment, bounds */
   4171 				   MJUM16BYTES,		/* maxsize */
   4172 				   1,			/* nsegments */
   4173 				   MJUM16BYTES,		/* maxsegsize */
   4174 				   0,			/* flags */
   4175 				   &rxr->ptag))) {
   4176 		aprint_error_dev(dev, "Unable to create RX DMA tag\n");
   4177 		goto fail;
   4178 	}
   4179 
   4180 	for (i = 0; i < rxr->num_desc; i++, rxbuf++) {
   4181 		rxbuf = &rxr->rx_buffers[i];
   4182 		error = ixgbe_dmamap_create(rxr->ptag,
   4183 		    BUS_DMA_NOWAIT, &rxbuf->pmap);
   4184 		if (error) {
   4185 			aprint_error_dev(dev, "Unable to create RX dma map\n");
   4186 			goto fail;
   4187 		}
   4188 	}
   4189 
   4190 	return (0);
   4191 
   4192 fail:
   4193 	/* Frees all, but can handle partial completion */
   4194 	ixgbe_free_receive_structures(adapter);
   4195 	return (error);
   4196 }
   4197 
   4198 /*
   4199 ** Used to detect a descriptor that has
   4200 ** been merged by Hardware RSC.
   4201 */
   4202 static inline u32
   4203 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
   4204 {
   4205 	return (le32toh(rx->wb.lower.lo_dword.data) &
   4206 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
   4207 }
   4208 
   4209 /*********************************************************************
   4210  *
   4211  *  Initialize Hardware RSC (LRO) feature on 82599
   4212  *  for an RX ring, this is toggled by the LRO capability
   4213  *  even though it is transparent to the stack.
   4214  *
   4215  *  NOTE: since this HW feature only works with IPV4 and
   4216  *        our testing has shown soft LRO to be as effective
   4217  *        I have decided to disable this by default.
   4218  *
   4219  **********************************************************************/
   4220 static void
   4221 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
   4222 {
   4223 	struct	adapter 	*adapter = rxr->adapter;
   4224 	struct	ixgbe_hw	*hw = &adapter->hw;
   4225 	u32			rscctrl, rdrxctl;
   4226 
   4227 	/* If turning LRO/RSC off we need to disable it */
   4228 	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
   4229 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   4230 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
   4231 		return;
   4232 	}
   4233 
   4234 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
   4235 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
   4236 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
   4237 	if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
   4238 #endif /* DEV_NETMAP */
   4239 	rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
   4240 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
   4241 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
   4242 
   4243 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   4244 	rscctrl |= IXGBE_RSCCTL_RSCEN;
   4245 	/*
   4246 	** Limit the total number of descriptors that
   4247 	** can be combined, so it does not exceed 64K
   4248 	*/
   4249 	if (rxr->mbuf_sz == MCLBYTES)
   4250 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
   4251 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
   4252 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
   4253 	else if (rxr->mbuf_sz == MJUM9BYTES)
   4254 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
   4255 	else  /* Using 16K cluster */
   4256 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
   4257 
   4258 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
   4259 
   4260 	/* Enable TCP header recognition */
   4261 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
   4262 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
   4263 	    IXGBE_PSRTYPE_TCPHDR));
   4264 
   4265 	/* Disable RSC for ACK packets */
   4266 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
   4267 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
   4268 
   4269 	rxr->hw_rsc = TRUE;
   4270 }
   4271 
   4272 
   4273 static void
   4274 ixgbe_free_receive_ring(struct rx_ring *rxr)
   4275 {
   4276 	struct ixgbe_rx_buf       *rxbuf;
   4277 	int i;
   4278 
   4279 	for (i = 0; i < rxr->num_desc; i++) {
   4280 		rxbuf = &rxr->rx_buffers[i];
   4281 		if (rxbuf->buf != NULL) {
   4282 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   4283 			    0, rxbuf->buf->m_pkthdr.len,
   4284 			    BUS_DMASYNC_POSTREAD);
   4285 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   4286 			rxbuf->buf->m_flags |= M_PKTHDR;
   4287 			m_freem(rxbuf->buf);
   4288 			rxbuf->buf = NULL;
   4289 			rxbuf->flags = 0;
   4290 		}
   4291 	}
   4292 }
   4293 
   4294 
   4295 /*********************************************************************
   4296  *
   4297  *  Initialize a receive ring and its buffers.
   4298  *
   4299  **********************************************************************/
   4300 static int
   4301 ixgbe_setup_receive_ring(struct rx_ring *rxr)
   4302 {
   4303 	struct	adapter 	*adapter;
   4304 	struct ixgbe_rx_buf	*rxbuf;
   4305 #ifdef LRO
   4306 	struct ifnet		*ifp;
   4307 	struct lro_ctrl		*lro = &rxr->lro;
   4308 #endif /* LRO */
   4309 	int			rsize, error = 0;
   4310 #ifdef DEV_NETMAP
   4311 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
   4312 	struct netmap_slot *slot;
   4313 #endif /* DEV_NETMAP */
   4314 
   4315 	adapter = rxr->adapter;
   4316 #ifdef LRO
   4317 	ifp = adapter->ifp;
   4318 #endif /* LRO */
   4319 
   4320 	/* Clear the ring contents */
   4321 	IXGBE_RX_LOCK(rxr);
   4322 #ifdef DEV_NETMAP
   4323 	/* same as in ixgbe_setup_transmit_ring() */
   4324 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
   4325 #endif /* DEV_NETMAP */
   4326 	rsize = roundup2(adapter->num_rx_desc *
   4327 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   4328 	bzero((void *)rxr->rx_base, rsize);
   4329 	/* Cache the size */
   4330 	rxr->mbuf_sz = adapter->rx_mbuf_sz;
   4331 
   4332 	/* Free current RX buffer structs and their mbufs */
   4333 	ixgbe_free_receive_ring(rxr);
   4334 
   4335 	IXGBE_RX_UNLOCK(rxr);
   4336 
   4337 	/* Now reinitialize our supply of jumbo mbufs.  The number
   4338 	 * or size of jumbo mbufs may have changed.
   4339 	 */
   4340 	ixgbe_jcl_reinit(&adapter->jcl_head, rxr->ptag->dt_dmat,
   4341 	    2 * adapter->num_rx_desc, adapter->rx_mbuf_sz);
   4342 
   4343 	IXGBE_RX_LOCK(rxr);
   4344 
   4345 	/* Now replenish the mbufs */
   4346 	for (int j = 0; j != rxr->num_desc; ++j) {
   4347 		struct mbuf	*mp;
   4348 
   4349 		rxbuf = &rxr->rx_buffers[j];
   4350 #ifdef DEV_NETMAP
   4351 		/*
   4352 		 * In netmap mode, fill the map and set the buffer
   4353 		 * address in the NIC ring, considering the offset
   4354 		 * between the netmap and NIC rings (see comment in
   4355 		 * ixgbe_setup_transmit_ring() ). No need to allocate
   4356 		 * an mbuf, so end the block with a continue;
   4357 		 */
   4358 		if (slot) {
   4359 			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
   4360 			uint64_t paddr;
   4361 			void *addr;
   4362 
   4363 			addr = PNMB(na, slot + sj, &paddr);
   4364 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
   4365 			/* Update descriptor and the cached value */
   4366 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
   4367 			rxbuf->addr = htole64(paddr);
   4368 			continue;
   4369 		}
   4370 #endif /* DEV_NETMAP */
   4371 		rxbuf->flags = 0;
   4372 		rxbuf->buf = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
   4373 		    MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
   4374 		if (rxbuf->buf == NULL) {
   4375 			error = ENOBUFS;
   4376                         goto fail;
   4377 		}
   4378 		mp = rxbuf->buf;
   4379 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   4380 		/* Get the memory mapping */
   4381 		error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   4382 		    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   4383 		if (error != 0)
   4384                         goto fail;
   4385 		bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   4386 		    0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
   4387 		/* Update the descriptor and the cached value */
   4388 		rxr->rx_base[j].read.pkt_addr =
   4389 		    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   4390 		rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   4391 	}
   4392 
   4393 
   4394 	/* Setup our descriptor indices */
   4395 	rxr->next_to_check = 0;
   4396 	rxr->next_to_refresh = 0;
   4397 	rxr->lro_enabled = FALSE;
   4398 	rxr->rx_copies.ev_count = 0;
   4399 	rxr->rx_bytes.ev_count = 0;
   4400 	rxr->vtag_strip = FALSE;
   4401 
   4402 	ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   4403 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   4404 
   4405 	/*
   4406 	** Now set up the LRO interface:
   4407 	*/
   4408 	if (ixgbe_rsc_enable)
   4409 		ixgbe_setup_hw_rsc(rxr);
   4410 #ifdef LRO
   4411 	else if (ifp->if_capenable & IFCAP_LRO) {
   4412 		device_t dev = adapter->dev;
   4413 		int err = tcp_lro_init(lro);
   4414 		if (err) {
   4415 			device_printf(dev, "LRO Initialization failed!\n");
   4416 			goto fail;
   4417 		}
   4418 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
   4419 		rxr->lro_enabled = TRUE;
   4420 		lro->ifp = adapter->ifp;
   4421 	}
   4422 #endif /* LRO */
   4423 
   4424 	IXGBE_RX_UNLOCK(rxr);
   4425 	return (0);
   4426 
   4427 fail:
   4428 	ixgbe_free_receive_ring(rxr);
   4429 	IXGBE_RX_UNLOCK(rxr);
   4430 	return (error);
   4431 }
   4432 
   4433 /*********************************************************************
   4434  *
   4435  *  Initialize all receive rings.
   4436  *
   4437  **********************************************************************/
   4438 static int
   4439 ixgbe_setup_receive_structures(struct adapter *adapter)
   4440 {
   4441 	struct rx_ring *rxr = adapter->rx_rings;
   4442 	int j;
   4443 
   4444 	for (j = 0; j < adapter->num_queues; j++, rxr++)
   4445 		if (ixgbe_setup_receive_ring(rxr))
   4446 			goto fail;
   4447 
   4448 	return (0);
   4449 fail:
   4450 	/*
   4451 	 * Free RX buffers allocated so far, we will only handle
   4452 	 * the rings that completed, the failing case will have
   4453 	 * cleaned up for itself. 'j' failed, so its the terminus.
   4454 	 */
   4455 	for (int i = 0; i < j; ++i) {
   4456 		rxr = &adapter->rx_rings[i];
   4457 		ixgbe_free_receive_ring(rxr);
   4458 	}
   4459 
   4460 	return (ENOBUFS);
   4461 }
   4462 
   4463 static void
   4464 ixgbe_initialise_rss_mapping(struct adapter *adapter)
   4465 {
   4466 	struct ixgbe_hw	*hw = &adapter->hw;
   4467 	uint32_t reta;
   4468 	int i, j, queue_id;
   4469 	uint32_t rss_key[10];
   4470 	uint32_t mrqc;
   4471 #ifdef	RSS
   4472 	uint32_t rss_hash_config;
   4473 #endif
   4474 
   4475 	/* Setup RSS */
   4476 	reta = 0;
   4477 
   4478 #ifdef	RSS
   4479 	/* Fetch the configured RSS key */
   4480 	rss_getkey((uint8_t *) &rss_key);
   4481 #else
   4482 	/* set up random bits */
   4483 	cprng_fast(&rss_key, sizeof(rss_key));
   4484 #endif
   4485 
   4486 	/* Set up the redirection table */
   4487 	for (i = 0, j = 0; i < 128; i++, j++) {
   4488 		if (j == adapter->num_queues) j = 0;
   4489 #ifdef	RSS
   4490 		/*
   4491 		 * Fetch the RSS bucket id for the given indirection entry.
   4492 		 * Cap it at the number of configured buckets (which is
   4493 		 * num_queues.)
   4494 		 */
   4495 		queue_id = rss_get_indirection_to_bucket(i);
   4496 		queue_id = queue_id % adapter->num_queues;
   4497 #else
   4498 		queue_id = (j * 0x11);
   4499 #endif
   4500 		/*
   4501 		 * The low 8 bits are for hash value (n+0);
   4502 		 * The next 8 bits are for hash value (n+1), etc.
   4503 		 */
   4504 		reta = reta >> 8;
   4505 		reta = reta | ( ((uint32_t) queue_id) << 24);
   4506 		if ((i & 3) == 3) {
   4507 			IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta);
   4508 			reta = 0;
   4509 		}
   4510 	}
   4511 
   4512 	/* Now fill our hash function seeds */
   4513 	for (i = 0; i < 10; i++)
   4514 		IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), rss_key[i]);
   4515 
   4516 	/* Perform hash on these packet types */
   4517 #ifdef	RSS
   4518 	mrqc = IXGBE_MRQC_RSSEN;
   4519 	rss_hash_config = rss_gethashconfig();
   4520 	if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4)
   4521 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
   4522 	if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4)
   4523 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
   4524 	if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6)
   4525 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
   4526 	if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6)
   4527 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
   4528 	if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6_EX)
   4529 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
   4530 	if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6_EX)
   4531 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
   4532 	if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4)
   4533 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
   4534 	if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4_EX)
   4535 		device_printf(adapter->dev,
   4536 		    "%s: RSS_HASHTYPE_RSS_UDP_IPV4_EX defined, "
   4537 		    "but not supported\n", __func__);
   4538 	if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6)
   4539 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
   4540 	if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6_EX)
   4541 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
   4542 #else
   4543 	/*
   4544 	 * Disable UDP - IP fragments aren't currently being handled
   4545 	 * and so we end up with a mix of 2-tuple and 4-tuple
   4546 	 * traffic.
   4547 	 */
   4548 	mrqc = IXGBE_MRQC_RSSEN
   4549 	     | IXGBE_MRQC_RSS_FIELD_IPV4
   4550 	     | IXGBE_MRQC_RSS_FIELD_IPV4_TCP
   4551 #if 0
   4552 	     | IXGBE_MRQC_RSS_FIELD_IPV4_UDP
   4553 #endif
   4554 	     | IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP
   4555 	     | IXGBE_MRQC_RSS_FIELD_IPV6_EX
   4556 	     | IXGBE_MRQC_RSS_FIELD_IPV6
   4557 	     | IXGBE_MRQC_RSS_FIELD_IPV6_TCP
   4558 #if 0
   4559 	     | IXGBE_MRQC_RSS_FIELD_IPV6_UDP
   4560 	     | IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP
   4561 #endif
   4562 	;
   4563 #endif /* RSS */
   4564 	IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
   4565 }
   4566 
   4567 
   4568 /*********************************************************************
   4569  *
   4570  *  Setup receive registers and features.
   4571  *
   4572  **********************************************************************/
   4573 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
   4574 
   4575 #define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
   4576 
   4577 static void
   4578 ixgbe_initialize_receive_units(struct adapter *adapter)
   4579 {
   4580 	int i;
   4581 	struct	rx_ring	*rxr = adapter->rx_rings;
   4582 	struct ixgbe_hw	*hw = &adapter->hw;
   4583 	struct ifnet   *ifp = adapter->ifp;
   4584 	u32		bufsz, rxctrl, fctrl, srrctl, rxcsum;
   4585 	u32		hlreg;
   4586 
   4587 
   4588 	/*
   4589 	 * Make sure receives are disabled while
   4590 	 * setting up the descriptor ring
   4591 	 */
   4592 	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
   4593 	IXGBE_WRITE_REG(hw, IXGBE_RXCTRL,
   4594 	    rxctrl & ~IXGBE_RXCTRL_RXEN);
   4595 
   4596 	/* Enable broadcasts */
   4597 	fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
   4598 	fctrl |= IXGBE_FCTRL_BAM;
   4599 	fctrl |= IXGBE_FCTRL_DPF;
   4600 	fctrl |= IXGBE_FCTRL_PMCF;
   4601 	IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
   4602 
   4603 	/* Set for Jumbo Frames? */
   4604 	hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
   4605 	if (ifp->if_mtu > ETHERMTU)
   4606 		hlreg |= IXGBE_HLREG0_JUMBOEN;
   4607 	else
   4608 		hlreg &= ~IXGBE_HLREG0_JUMBOEN;
   4609 #ifdef DEV_NETMAP
   4610 	/* crcstrip is conditional in netmap (in RDRXCTL too ?) */
   4611 	if (ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
   4612 		hlreg &= ~IXGBE_HLREG0_RXCRCSTRP;
   4613 	else
   4614 		hlreg |= IXGBE_HLREG0_RXCRCSTRP;
   4615 #endif /* DEV_NETMAP */
   4616 	IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
   4617 
   4618 	bufsz = (adapter->rx_mbuf_sz +
   4619 	    BSIZEPKT_ROUNDUP) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
   4620 
   4621 	for (i = 0; i < adapter->num_queues; i++, rxr++) {
   4622 		u64 rdba = rxr->rxdma.dma_paddr;
   4623 
   4624 		/* Setup the Base and Length of the Rx Descriptor Ring */
   4625 		IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i),
   4626 			       (rdba & 0x00000000ffffffffULL));
   4627 		IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (rdba >> 32));
   4628 		IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
   4629 		    adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc));
   4630 
   4631 		/* Set up the SRRCTL register */
   4632 		srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
   4633 		srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
   4634 		srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
   4635 		srrctl |= bufsz;
   4636 		srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
   4637 
   4638 		/*
   4639 		 * Set DROP_EN iff we have no flow control and >1 queue.
   4640 		 * Note that srrctl was cleared shortly before during reset,
   4641 		 * so we do not need to clear the bit, but do it just in case
   4642 		 * this code is moved elsewhere.
   4643 		 */
   4644 		if (adapter->num_queues > 1 &&
   4645 		    adapter->fc == ixgbe_fc_none) {
   4646 			srrctl |= IXGBE_SRRCTL_DROP_EN;
   4647 		} else {
   4648 			srrctl &= ~IXGBE_SRRCTL_DROP_EN;
   4649 		}
   4650 
   4651 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
   4652 
   4653 		/* Setup the HW Rx Head and Tail Descriptor Pointers */
   4654 		IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
   4655 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
   4656 
   4657 		/* Set the processing limit */
   4658 		rxr->process_limit = ixgbe_rx_process_limit;
   4659 	}
   4660 
   4661 	if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
   4662 		u32 psrtype = IXGBE_PSRTYPE_TCPHDR |
   4663 			      IXGBE_PSRTYPE_UDPHDR |
   4664 			      IXGBE_PSRTYPE_IPV4HDR |
   4665 			      IXGBE_PSRTYPE_IPV6HDR;
   4666 		IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype);
   4667 	}
   4668 
   4669 	rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
   4670 
   4671 	ixgbe_initialise_rss_mapping(adapter);
   4672 
   4673 	if (adapter->num_queues > 1) {
   4674 		/* RSS and RX IPP Checksum are mutually exclusive */
   4675 		rxcsum |= IXGBE_RXCSUM_PCSD;
   4676 	}
   4677 
   4678 	if (ifp->if_capenable & IFCAP_RXCSUM)
   4679 		rxcsum |= IXGBE_RXCSUM_PCSD;
   4680 
   4681 	if (!(rxcsum & IXGBE_RXCSUM_PCSD))
   4682 		rxcsum |= IXGBE_RXCSUM_IPPCSE;
   4683 
   4684 	IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
   4685 
   4686 	return;
   4687 }
   4688 
   4689 /*********************************************************************
   4690  *
   4691  *  Free all receive rings.
   4692  *
   4693  **********************************************************************/
   4694 static void
   4695 ixgbe_free_receive_structures(struct adapter *adapter)
   4696 {
   4697 	struct rx_ring *rxr = adapter->rx_rings;
   4698 
   4699 	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
   4700 
   4701 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
   4702 #ifdef LRO
   4703 		struct lro_ctrl		*lro = &rxr->lro;
   4704 #endif /* LRO */
   4705 		ixgbe_free_receive_buffers(rxr);
   4706 #ifdef LRO
   4707 		/* Free LRO memory */
   4708 		tcp_lro_free(lro);
   4709 #endif /* LRO */
   4710 		/* Free the ring memory as well */
   4711 		ixgbe_dma_free(adapter, &rxr->rxdma);
   4712 		IXGBE_RX_LOCK_DESTROY(rxr);
   4713 	}
   4714 
   4715 	free(adapter->rx_rings, M_DEVBUF);
   4716 }
   4717 
   4718 
   4719 /*********************************************************************
   4720  *
   4721  *  Free receive ring data structures
   4722  *
   4723  **********************************************************************/
   4724 static void
   4725 ixgbe_free_receive_buffers(struct rx_ring *rxr)
   4726 {
   4727 	struct adapter		*adapter = rxr->adapter;
   4728 	struct ixgbe_rx_buf	*rxbuf;
   4729 
   4730 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
   4731 
   4732 	/* Cleanup any existing buffers */
   4733 	if (rxr->rx_buffers != NULL) {
   4734 		for (int i = 0; i < adapter->num_rx_desc; i++) {
   4735 			rxbuf = &rxr->rx_buffers[i];
   4736 			if (rxbuf->buf != NULL) {
   4737 				bus_dmamap_sync(rxr->ptag->dt_dmat,
   4738 				    rxbuf->pmap, 0, rxbuf->buf->m_pkthdr.len,
   4739 				    BUS_DMASYNC_POSTREAD);
   4740 				ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   4741 				rxbuf->buf->m_flags |= M_PKTHDR;
   4742 				m_freem(rxbuf->buf);
   4743 			}
   4744 			rxbuf->buf = NULL;
   4745 			if (rxbuf->pmap != NULL) {
   4746 				ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
   4747 				rxbuf->pmap = NULL;
   4748 			}
   4749 		}
   4750 		if (rxr->rx_buffers != NULL) {
   4751 			free(rxr->rx_buffers, M_DEVBUF);
   4752 			rxr->rx_buffers = NULL;
   4753 		}
   4754 	}
   4755 
   4756 	if (rxr->ptag != NULL) {
   4757 		ixgbe_dma_tag_destroy(rxr->ptag);
   4758 		rxr->ptag = NULL;
   4759 	}
   4760 
   4761 	return;
   4762 }
   4763 
   4764 static __inline void
   4765 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
   4766 {
   4767 	int s;
   4768 
   4769 #ifdef LRO
   4770 	struct adapter	*adapter = ifp->if_softc;
   4771 	struct ethercom *ec = &adapter->osdep.ec;
   4772 
   4773         /*
   4774          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
   4775          * should be computed by hardware. Also it should not have VLAN tag in
   4776          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
   4777          */
   4778         if (rxr->lro_enabled &&
   4779             (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
   4780             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   4781             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   4782             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
   4783             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   4784             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
   4785             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
   4786             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
   4787                 /*
   4788                  * Send to the stack if:
   4789                  **  - LRO not enabled, or
   4790                  **  - no LRO resources, or
   4791                  **  - lro enqueue fails
   4792                  */
   4793                 if (rxr->lro.lro_cnt != 0)
   4794                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
   4795                                 return;
   4796         }
   4797 #endif /* LRO */
   4798 
   4799 	IXGBE_RX_UNLOCK(rxr);
   4800 
   4801 	s = splnet();
   4802 	/* Pass this up to any BPF listeners. */
   4803 	bpf_mtap(ifp, m);
   4804 	(*ifp->if_input)(ifp, m);
   4805 	splx(s);
   4806 
   4807 	IXGBE_RX_LOCK(rxr);
   4808 }
   4809 
   4810 static __inline void
   4811 ixgbe_rx_discard(struct rx_ring *rxr, int i)
   4812 {
   4813 	struct ixgbe_rx_buf	*rbuf;
   4814 
   4815 	rbuf = &rxr->rx_buffers[i];
   4816 
   4817 
   4818 	/*
   4819 	** With advanced descriptors the writeback
   4820 	** clobbers the buffer addrs, so its easier
   4821 	** to just free the existing mbufs and take
   4822 	** the normal refresh path to get new buffers
   4823 	** and mapping.
   4824 	*/
   4825 
   4826 	if (rbuf->buf != NULL) {/* Partial chain ? */
   4827 		rbuf->fmp->m_flags |= M_PKTHDR;
   4828 		m_freem(rbuf->fmp);
   4829 		rbuf->fmp = NULL;
   4830 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
   4831 	} else if (rbuf->buf) {
   4832 		m_free(rbuf->buf);
   4833 		rbuf->buf = NULL;
   4834 	}
   4835 
   4836 	rbuf->flags = 0;
   4837 
   4838 	return;
   4839 }
   4840 
   4841 
   4842 /*********************************************************************
   4843  *
   4844  *  This routine executes in interrupt context. It replenishes
   4845  *  the mbufs in the descriptor and sends data which has been
   4846  *  dma'ed into host memory to upper layer.
   4847  *
   4848  *  We loop at most count times if count is > 0, or until done if
   4849  *  count < 0.
   4850  *
   4851  *  Return TRUE for more work, FALSE for all clean.
   4852  *********************************************************************/
   4853 static bool
   4854 ixgbe_rxeof(struct ix_queue *que)
   4855 {
   4856 	struct adapter		*adapter = que->adapter;
   4857 	struct rx_ring		*rxr = que->rxr;
   4858 	struct ifnet		*ifp = adapter->ifp;
   4859 #ifdef LRO
   4860 	struct lro_ctrl		*lro = &rxr->lro;
   4861 	struct lro_entry	*queued;
   4862 #endif /* LRO */
   4863 	int			i, nextp, processed = 0;
   4864 	u32			staterr = 0;
   4865 	u16			count = rxr->process_limit;
   4866 	union ixgbe_adv_rx_desc	*cur;
   4867 	struct ixgbe_rx_buf	*rbuf, *nbuf;
   4868 #ifdef RSS
   4869 	u16			pkt_info;
   4870 #endif
   4871 
   4872 	IXGBE_RX_LOCK(rxr);
   4873 
   4874 #ifdef DEV_NETMAP
   4875 	/* Same as the txeof routine: wakeup clients on intr. */
   4876 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
   4877 		IXGBE_RX_UNLOCK(rxr);
   4878 		return (FALSE);
   4879 	}
   4880 #endif /* DEV_NETMAP */
   4881 
   4882 	for (i = rxr->next_to_check; count != 0;) {
   4883 		struct mbuf	*sendmp, *mp;
   4884 		u32		rsc, ptype;
   4885 		u16		len;
   4886 		u16		vtag = 0;
   4887 		bool		eop;
   4888 
   4889 		/* Sync the ring. */
   4890 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   4891 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   4892 
   4893 		cur = &rxr->rx_base[i];
   4894 		staterr = le32toh(cur->wb.upper.status_error);
   4895 #ifdef RSS
   4896 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
   4897 #endif
   4898 
   4899 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
   4900 			break;
   4901 		if ((ifp->if_flags & IFF_RUNNING) == 0)
   4902 			break;
   4903 
   4904 		count--;
   4905 		sendmp = NULL;
   4906 		nbuf = NULL;
   4907 		rsc = 0;
   4908 		cur->wb.upper.status_error = 0;
   4909 		rbuf = &rxr->rx_buffers[i];
   4910 		mp = rbuf->buf;
   4911 
   4912 		len = le16toh(cur->wb.upper.length);
   4913 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
   4914 		    IXGBE_RXDADV_PKTTYPE_MASK;
   4915 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
   4916 
   4917 		/* Make sure bad packets are discarded */
   4918 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
   4919 			rxr->rx_discarded.ev_count++;
   4920 			ixgbe_rx_discard(rxr, i);
   4921 			goto next_desc;
   4922 		}
   4923 
   4924 		/*
   4925 		** On 82599 which supports a hardware
   4926 		** LRO (called HW RSC), packets need
   4927 		** not be fragmented across sequential
   4928 		** descriptors, rather the next descriptor
   4929 		** is indicated in bits of the descriptor.
   4930 		** This also means that we might proceses
   4931 		** more than one packet at a time, something
   4932 		** that has never been true before, it
   4933 		** required eliminating global chain pointers
   4934 		** in favor of what we are doing here.  -jfv
   4935 		*/
   4936 		if (!eop) {
   4937 			/*
   4938 			** Figure out the next descriptor
   4939 			** of this frame.
   4940 			*/
   4941 			if (rxr->hw_rsc == TRUE) {
   4942 				rsc = ixgbe_rsc_count(cur);
   4943 				rxr->rsc_num += (rsc - 1);
   4944 			}
   4945 			if (rsc) { /* Get hardware index */
   4946 				nextp = ((staterr &
   4947 				    IXGBE_RXDADV_NEXTP_MASK) >>
   4948 				    IXGBE_RXDADV_NEXTP_SHIFT);
   4949 			} else { /* Just sequential */
   4950 				nextp = i + 1;
   4951 				if (nextp == adapter->num_rx_desc)
   4952 					nextp = 0;
   4953 			}
   4954 			nbuf = &rxr->rx_buffers[nextp];
   4955 			prefetch(nbuf);
   4956 		}
   4957 		/*
   4958 		** Rather than using the fmp/lmp global pointers
   4959 		** we now keep the head of a packet chain in the
   4960 		** buffer struct and pass this along from one
   4961 		** descriptor to the next, until we get EOP.
   4962 		*/
   4963 		mp->m_len = len;
   4964 		/*
   4965 		** See if there is a stored head
   4966 		** that determines what we are
   4967 		*/
   4968 		sendmp = rbuf->fmp;
   4969 		if (sendmp != NULL) {  /* secondary frag */
   4970 			rbuf->buf = rbuf->fmp = NULL;
   4971 			mp->m_flags &= ~M_PKTHDR;
   4972 			sendmp->m_pkthdr.len += mp->m_len;
   4973 		} else {
   4974 			/*
   4975 			 * Optimize.  This might be a small packet,
   4976 			 * maybe just a TCP ACK.  Do a fast copy that
   4977 			 * is cache aligned into a new mbuf, and
   4978 			 * leave the old mbuf+cluster for re-use.
   4979 			 */
   4980 			if (eop && len <= IXGBE_RX_COPY_LEN) {
   4981 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
   4982 				if (sendmp != NULL) {
   4983 					sendmp->m_data +=
   4984 					    IXGBE_RX_COPY_ALIGN;
   4985 					ixgbe_bcopy(mp->m_data,
   4986 					    sendmp->m_data, len);
   4987 					sendmp->m_len = len;
   4988 					rxr->rx_copies.ev_count++;
   4989 					rbuf->flags |= IXGBE_RX_COPY;
   4990 				}
   4991 			}
   4992 			if (sendmp == NULL) {
   4993 				rbuf->buf = rbuf->fmp = NULL;
   4994 				sendmp = mp;
   4995 			}
   4996 
   4997 			/* first desc of a non-ps chain */
   4998 			sendmp->m_flags |= M_PKTHDR;
   4999 			sendmp->m_pkthdr.len = mp->m_len;
   5000 		}
   5001 		++processed;
   5002 
   5003 		/* Pass the head pointer on */
   5004 		if (eop == 0) {
   5005 			nbuf->fmp = sendmp;
   5006 			sendmp = NULL;
   5007 			mp->m_next = nbuf->buf;
   5008 		} else { /* Sending this frame */
   5009 			sendmp->m_pkthdr.rcvif = ifp;
   5010 			ifp->if_ipackets++;
   5011 			rxr->rx_packets.ev_count++;
   5012 			/* capture data for AIM */
   5013 			rxr->bytes += sendmp->m_pkthdr.len;
   5014 			rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
   5015 			/* Process vlan info */
   5016 			if ((rxr->vtag_strip) &&
   5017 			    (staterr & IXGBE_RXD_STAT_VP))
   5018 				vtag = le16toh(cur->wb.upper.vlan);
   5019 			if (vtag) {
   5020 				VLAN_INPUT_TAG(ifp, sendmp, vtag,
   5021 				    printf("%s: could not apply VLAN "
   5022 					"tag", __func__));
   5023 			}
   5024 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
   5025 				ixgbe_rx_checksum(staterr, sendmp, ptype,
   5026 				   &adapter->stats);
   5027 			}
   5028 #if __FreeBSD_version >= 800000
   5029 #ifdef RSS
   5030 			sendmp->m_pkthdr.flowid =
   5031 			    le32toh(cur->wb.lower.hi_dword.rss);
   5032 			switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
   5033 			case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
   5034 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV4);
   5035 				break;
   5036 			case IXGBE_RXDADV_RSSTYPE_IPV4:
   5037 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV4);
   5038 				break;
   5039 			case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
   5040 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6);
   5041 				break;
   5042 			case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
   5043 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6_EX);
   5044 				break;
   5045 			case IXGBE_RXDADV_RSSTYPE_IPV6:
   5046 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6);
   5047 				break;
   5048 			case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
   5049 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6_EX);
   5050 				break;
   5051 			case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
   5052 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV4);
   5053 				break;
   5054 			case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
   5055 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6);
   5056 				break;
   5057 			case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
   5058 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6_EX);
   5059 				break;
   5060 			default:
   5061 				/* XXX fallthrough */
   5062 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
   5063 				break;
   5064 			}
   5065 #else /* RSS */
   5066 			sendmp->m_pkthdr.flowid = que->msix;
   5067 			M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
   5068 #endif /* RSS */
   5069 #endif /* FreeBSD_version */
   5070 		}
   5071 next_desc:
   5072 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   5073 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   5074 
   5075 		/* Advance our pointers to the next descriptor. */
   5076 		if (++i == rxr->num_desc)
   5077 			i = 0;
   5078 
   5079 		/* Now send to the stack or do LRO */
   5080 		if (sendmp != NULL) {
   5081 			rxr->next_to_check = i;
   5082 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
   5083 			i = rxr->next_to_check;
   5084 		}
   5085 
   5086                /* Every 8 descriptors we go to refresh mbufs */
   5087 		if (processed == 8) {
   5088 			ixgbe_refresh_mbufs(rxr, i);
   5089 			processed = 0;
   5090 		}
   5091 	}
   5092 
   5093 	/* Refresh any remaining buf structs */
   5094 	if (ixgbe_rx_unrefreshed(rxr))
   5095 		ixgbe_refresh_mbufs(rxr, i);
   5096 
   5097 	rxr->next_to_check = i;
   5098 
   5099 #ifdef LRO
   5100 	/*
   5101 	 * Flush any outstanding LRO work
   5102 	 */
   5103 	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
   5104 		SLIST_REMOVE_HEAD(&lro->lro_active, next);
   5105 		tcp_lro_flush(lro, queued);
   5106 	}
   5107 #endif /* LRO */
   5108 
   5109 	IXGBE_RX_UNLOCK(rxr);
   5110 
   5111 	/*
   5112 	** Still have cleaning to do?
   5113 	*/
   5114 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
   5115 		return true;
   5116 	else
   5117 		return false;
   5118 }
   5119 
   5120 
   5121 /*********************************************************************
   5122  *
   5123  *  Verify that the hardware indicated that the checksum is valid.
   5124  *  Inform the stack about the status of checksum so that stack
   5125  *  doesn't spend time verifying the checksum.
   5126  *
   5127  *********************************************************************/
   5128 static void
   5129 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
   5130     struct ixgbe_hw_stats *stats)
   5131 {
   5132 	u16	status = (u16) staterr;
   5133 	u8	errors = (u8) (staterr >> 24);
   5134 #if 0
   5135 	bool	sctp = FALSE;
   5136 
   5137 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   5138 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
   5139 		sctp = TRUE;
   5140 #endif
   5141 
   5142 	if (status & IXGBE_RXD_STAT_IPCS) {
   5143 		stats->ipcs.ev_count++;
   5144 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
   5145 			/* IP Checksum Good */
   5146 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
   5147 
   5148 		} else {
   5149 			stats->ipcs_bad.ev_count++;
   5150 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
   5151 		}
   5152 	}
   5153 	if (status & IXGBE_RXD_STAT_L4CS) {
   5154 		stats->l4cs.ev_count++;
   5155 		int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
   5156 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
   5157 			mp->m_pkthdr.csum_flags |= type;
   5158 		} else {
   5159 			stats->l4cs_bad.ev_count++;
   5160 			mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
   5161 		}
   5162 	}
   5163 	return;
   5164 }
   5165 
   5166 
   5167 #if 0	/* XXX Badly need to overhaul vlan(4) on NetBSD. */
   5168 /*
   5169 ** This routine is run via an vlan config EVENT,
   5170 ** it enables us to use the HW Filter table since
   5171 ** we can get the vlan id. This just creates the
   5172 ** entry in the soft version of the VFTA, init will
   5173 ** repopulate the real table.
   5174 */
   5175 static void
   5176 ixgbe_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
   5177 {
   5178 	struct adapter	*adapter = ifp->if_softc;
   5179 	u16		index, bit;
   5180 
   5181 	if (ifp->if_softc !=  arg)   /* Not our event */
   5182 		return;
   5183 
   5184 	if ((vtag == 0) || (vtag > 4095))	/* Invalid */
   5185 		return;
   5186 
   5187 	IXGBE_CORE_LOCK(adapter);
   5188 	index = (vtag >> 5) & 0x7F;
   5189 	bit = vtag & 0x1F;
   5190 	adapter->shadow_vfta[index] |= (1 << bit);
   5191 	ixgbe_setup_vlan_hw_support(adapter);
   5192 	IXGBE_CORE_UNLOCK(adapter);
   5193 }
   5194 
   5195 /*
   5196 ** This routine is run via an vlan
   5197 ** unconfig EVENT, remove our entry
   5198 ** in the soft vfta.
   5199 */
   5200 static void
   5201 ixgbe_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
   5202 {
   5203 	struct adapter	*adapter = ifp->if_softc;
   5204 	u16		index, bit;
   5205 
   5206 	if (ifp->if_softc !=  arg)
   5207 		return;
   5208 
   5209 	if ((vtag == 0) || (vtag > 4095))	/* Invalid */
   5210 		return;
   5211 
   5212 	IXGBE_CORE_LOCK(adapter);
   5213 	index = (vtag >> 5) & 0x7F;
   5214 	bit = vtag & 0x1F;
   5215 	adapter->shadow_vfta[index] &= ~(1 << bit);
   5216 	/* Re-init to load the changes */
   5217 	ixgbe_setup_vlan_hw_support(adapter);
   5218 	IXGBE_CORE_UNLOCK(adapter);
   5219 }
   5220 #endif
   5221 
   5222 static void
   5223 ixgbe_setup_vlan_hw_support(struct adapter *adapter)
   5224 {
   5225 	struct ethercom *ec = &adapter->osdep.ec;
   5226 	struct ixgbe_hw *hw = &adapter->hw;
   5227 	struct rx_ring	*rxr;
   5228 	u32		ctrl;
   5229 
   5230 
   5231 	/*
   5232 	** We get here thru init_locked, meaning
   5233 	** a soft reset, this has already cleared
   5234 	** the VFTA and other state, so if there
   5235 	** have been no vlan's registered do nothing.
   5236 	*/
   5237 	if (!VLAN_ATTACHED(&adapter->osdep.ec)) {
   5238 		return;
   5239 	}
   5240 
   5241 	/* Setup the queues for vlans */
   5242 	for (int i = 0; i < adapter->num_queues; i++) {
   5243 		rxr = &adapter->rx_rings[i];
   5244 		/* On 82599 the VLAN enable is per/queue in RXDCTL */
   5245 		if (hw->mac.type != ixgbe_mac_82598EB) {
   5246 			ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
   5247 			ctrl |= IXGBE_RXDCTL_VME;
   5248 			IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
   5249 		}
   5250 		rxr->vtag_strip = TRUE;
   5251 	}
   5252 
   5253 	if ((ec->ec_capenable & ETHERCAP_VLAN_HWFILTER) == 0)
   5254 		return;
   5255 	/*
   5256 	** A soft reset zero's out the VFTA, so
   5257 	** we need to repopulate it now.
   5258 	*/
   5259 	for (int i = 0; i < IXGBE_VFTA_SIZE; i++)
   5260 		if (adapter->shadow_vfta[i] != 0)
   5261 			IXGBE_WRITE_REG(hw, IXGBE_VFTA(i),
   5262 			    adapter->shadow_vfta[i]);
   5263 
   5264 	ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
   5265 	/* Enable the Filter Table if enabled */
   5266 	if (ec->ec_capenable & ETHERCAP_VLAN_HWFILTER) {
   5267 		ctrl &= ~IXGBE_VLNCTRL_CFIEN;
   5268 		ctrl |= IXGBE_VLNCTRL_VFE;
   5269 	}
   5270 	if (hw->mac.type == ixgbe_mac_82598EB)
   5271 		ctrl |= IXGBE_VLNCTRL_VME;
   5272 	IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
   5273 }
   5274 
   5275 static void
   5276 ixgbe_enable_intr(struct adapter *adapter)
   5277 {
   5278 	struct ixgbe_hw	*hw = &adapter->hw;
   5279 	struct ix_queue	*que = adapter->queues;
   5280 	u32		mask, fwsm;
   5281 
   5282 	mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
   5283 	/* Enable Fan Failure detection */
   5284 	if (hw->device_id == IXGBE_DEV_ID_82598AT)
   5285 		    mask |= IXGBE_EIMS_GPI_SDP1;
   5286 
   5287 	switch (adapter->hw.mac.type) {
   5288 		case ixgbe_mac_82599EB:
   5289 			mask |= IXGBE_EIMS_ECC;
   5290 			mask |= IXGBE_EIMS_GPI_SDP0;
   5291 			mask |= IXGBE_EIMS_GPI_SDP1;
   5292 			mask |= IXGBE_EIMS_GPI_SDP2;
   5293 #ifdef IXGBE_FDIR
   5294 			mask |= IXGBE_EIMS_FLOW_DIR;
   5295 #endif
   5296 			break;
   5297 		case ixgbe_mac_X540:
   5298 			mask |= IXGBE_EIMS_ECC;
   5299 			/* Detect if Thermal Sensor is enabled */
   5300 			fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
   5301 			if (fwsm & IXGBE_FWSM_TS_ENABLED)
   5302 				mask |= IXGBE_EIMS_TS;
   5303 #ifdef IXGBE_FDIR
   5304 			mask |= IXGBE_EIMS_FLOW_DIR;
   5305 #endif
   5306 		/* falls through */
   5307 		default:
   5308 			break;
   5309 	}
   5310 
   5311 	IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
   5312 
   5313 	/* With RSS we use auto clear */
   5314 	if (adapter->msix_mem) {
   5315 		mask = IXGBE_EIMS_ENABLE_MASK;
   5316 		/* Don't autoclear Link */
   5317 		mask &= ~IXGBE_EIMS_OTHER;
   5318 		mask &= ~IXGBE_EIMS_LSC;
   5319 		IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask);
   5320 	}
   5321 
   5322 	/*
   5323 	** Now enable all queues, this is done separately to
   5324 	** allow for handling the extended (beyond 32) MSIX
   5325 	** vectors that can be used by 82599
   5326 	*/
   5327         for (int i = 0; i < adapter->num_queues; i++, que++)
   5328                 ixgbe_enable_queue(adapter, que->msix);
   5329 
   5330 	IXGBE_WRITE_FLUSH(hw);
   5331 
   5332 	return;
   5333 }
   5334 
   5335 static void
   5336 ixgbe_disable_intr(struct adapter *adapter)
   5337 {
   5338 	if (adapter->msix_mem)
   5339 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, 0);
   5340 	if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
   5341 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0);
   5342 	} else {
   5343 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000);
   5344 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0);
   5345 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0);
   5346 	}
   5347 	IXGBE_WRITE_FLUSH(&adapter->hw);
   5348 	return;
   5349 }
   5350 
   5351 u16
   5352 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
   5353 {
   5354 	switch (reg % 4) {
   5355 	case 0:
   5356 		return pci_conf_read(hw->back->pc, hw->back->tag, reg) &
   5357 		    __BITS(15, 0);
   5358 	case 2:
   5359 		return __SHIFTOUT(pci_conf_read(hw->back->pc, hw->back->tag,
   5360 		    reg - 2), __BITS(31, 16));
   5361 	default:
   5362 		panic("%s: invalid register (%" PRIx32, __func__, reg);
   5363 		break;
   5364 	}
   5365 }
   5366 
   5367 void
   5368 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
   5369 {
   5370 	pcireg_t old;
   5371 
   5372 	switch (reg % 4) {
   5373 	case 0:
   5374 		old = pci_conf_read(hw->back->pc, hw->back->tag, reg) &
   5375 		    __BITS(31, 16);
   5376 		pci_conf_write(hw->back->pc, hw->back->tag, reg, value | old);
   5377 		break;
   5378 	case 2:
   5379 		old = pci_conf_read(hw->back->pc, hw->back->tag, reg - 2) &
   5380 		    __BITS(15, 0);
   5381 		pci_conf_write(hw->back->pc, hw->back->tag, reg - 2,
   5382 		    __SHIFTIN(value, __BITS(31, 16)) | old);
   5383 		break;
   5384 	default:
   5385 		panic("%s: invalid register (%" PRIx32, __func__, reg);
   5386 		break;
   5387 	}
   5388 
   5389 	return;
   5390 }
   5391 
   5392 /*
   5393 ** Get the width and transaction speed of
   5394 ** the slot this adapter is plugged into.
   5395 */
   5396 static void
   5397 ixgbe_get_slot_info(struct ixgbe_hw *hw)
   5398 {
   5399 	device_t		dev = ((struct ixgbe_osdep *)hw->back)->dev;
   5400 	struct ixgbe_mac_info	*mac = &hw->mac;
   5401 	u16			link;
   5402 
   5403 	/* For most devices simply call the shared code routine */
   5404 	if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) {
   5405 		ixgbe_get_bus_info(hw);
   5406 		goto display;
   5407 	}
   5408 
   5409 	/*
   5410 	** For the Quad port adapter we need to parse back
   5411 	** up the PCI tree to find the speed of the expansion
   5412 	** slot into which this adapter is plugged. A bit more work.
   5413 	*/
   5414 	dev = device_parent(device_parent(dev));
   5415 #ifdef IXGBE_DEBUG
   5416 	device_printf(dev, "parent pcib = %x,%x,%x\n",
   5417 	    pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
   5418 #endif
   5419 	dev = device_parent(device_parent(dev));
   5420 #ifdef IXGBE_DEBUG
   5421 	device_printf(dev, "slot pcib = %x,%x,%x\n",
   5422 	    pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
   5423 #endif
   5424 	/* Now get the PCI Express Capabilities offset */
   5425 	/* ...and read the Link Status Register */
   5426 	link = IXGBE_READ_PCIE_WORD(hw, IXGBE_PCI_LINK_STATUS);
   5427 	switch (link & IXGBE_PCI_LINK_WIDTH) {
   5428 	case IXGBE_PCI_LINK_WIDTH_1:
   5429 		hw->bus.width = ixgbe_bus_width_pcie_x1;
   5430 		break;
   5431 	case IXGBE_PCI_LINK_WIDTH_2:
   5432 		hw->bus.width = ixgbe_bus_width_pcie_x2;
   5433 		break;
   5434 	case IXGBE_PCI_LINK_WIDTH_4:
   5435 		hw->bus.width = ixgbe_bus_width_pcie_x4;
   5436 		break;
   5437 	case IXGBE_PCI_LINK_WIDTH_8:
   5438 		hw->bus.width = ixgbe_bus_width_pcie_x8;
   5439 		break;
   5440 	default:
   5441 		hw->bus.width = ixgbe_bus_width_unknown;
   5442 		break;
   5443 	}
   5444 
   5445 	switch (link & IXGBE_PCI_LINK_SPEED) {
   5446 	case IXGBE_PCI_LINK_SPEED_2500:
   5447 		hw->bus.speed = ixgbe_bus_speed_2500;
   5448 		break;
   5449 	case IXGBE_PCI_LINK_SPEED_5000:
   5450 		hw->bus.speed = ixgbe_bus_speed_5000;
   5451 		break;
   5452 	case IXGBE_PCI_LINK_SPEED_8000:
   5453 		hw->bus.speed = ixgbe_bus_speed_8000;
   5454 		break;
   5455 	default:
   5456 		hw->bus.speed = ixgbe_bus_speed_unknown;
   5457 		break;
   5458 	}
   5459 
   5460 	mac->ops.set_lan_id(hw);
   5461 
   5462 display:
   5463 	device_printf(dev,"PCI Express Bus: Speed %s %s\n",
   5464 	    ((hw->bus.speed == ixgbe_bus_speed_8000) ? "8.0GT/s":
   5465 	    (hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0GT/s":
   5466 	    (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5GT/s":"Unknown"),
   5467 	    (hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" :
   5468 	    (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" :
   5469 	    (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" :
   5470 	    ("Unknown"));
   5471 
   5472 	if ((hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) &&
   5473 	    ((hw->bus.width <= ixgbe_bus_width_pcie_x4) &&
   5474 	    (hw->bus.speed == ixgbe_bus_speed_2500))) {
   5475 		device_printf(dev, "PCI-Express bandwidth available"
   5476 		    " for this card\n     is not sufficient for"
   5477 		    " optimal performance.\n");
   5478 		device_printf(dev, "For optimal performance a x8 "
   5479 		    "PCIE, or x4 PCIE Gen2 slot is required.\n");
   5480         }
   5481 	if ((hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP) &&
   5482 	    ((hw->bus.width <= ixgbe_bus_width_pcie_x8) &&
   5483 	    (hw->bus.speed < ixgbe_bus_speed_8000))) {
   5484 		device_printf(dev, "PCI-Express bandwidth available"
   5485 		    " for this card\n     is not sufficient for"
   5486 		    " optimal performance.\n");
   5487 		device_printf(dev, "For optimal performance a x8 "
   5488 		    "PCIE Gen3 slot is required.\n");
   5489         }
   5490 
   5491 	return;
   5492 }
   5493 
   5494 
   5495 /*
   5496 ** Setup the correct IVAR register for a particular MSIX interrupt
   5497 **   (yes this is all very magic and confusing :)
   5498 **  - entry is the register array entry
   5499 **  - vector is the MSIX vector for this queue
   5500 **  - type is RX/TX/MISC
   5501 */
   5502 static void
   5503 ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type)
   5504 {
   5505 	struct ixgbe_hw *hw = &adapter->hw;
   5506 	u32 ivar, index;
   5507 
   5508 	vector |= IXGBE_IVAR_ALLOC_VAL;
   5509 
   5510 	switch (hw->mac.type) {
   5511 
   5512 	case ixgbe_mac_82598EB:
   5513 		if (type == -1)
   5514 			entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
   5515 		else
   5516 			entry += (type * 64);
   5517 		index = (entry >> 2) & 0x1F;
   5518 		ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
   5519 		ivar &= ~(0xFF << (8 * (entry & 0x3)));
   5520 		ivar |= (vector << (8 * (entry & 0x3)));
   5521 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR(index), ivar);
   5522 		break;
   5523 
   5524 	case ixgbe_mac_82599EB:
   5525 	case ixgbe_mac_X540:
   5526 		if (type == -1) { /* MISC IVAR */
   5527 			index = (entry & 1) * 8;
   5528 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
   5529 			ivar &= ~(0xFF << index);
   5530 			ivar |= (vector << index);
   5531 			IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
   5532 		} else {	/* RX/TX IVARS */
   5533 			index = (16 * (entry & 1)) + (8 * type);
   5534 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
   5535 			ivar &= ~(0xFF << index);
   5536 			ivar |= (vector << index);
   5537 			IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
   5538 		}
   5539 
   5540 	default:
   5541 		break;
   5542 	}
   5543 }
   5544 
   5545 static void
   5546 ixgbe_configure_ivars(struct adapter *adapter)
   5547 {
   5548 	struct  ix_queue *que = adapter->queues;
   5549 	u32 newitr;
   5550 
   5551 	if (ixgbe_max_interrupt_rate > 0)
   5552 		newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8;
   5553 	else
   5554 		newitr = 0;
   5555 
   5556         for (int i = 0; i < adapter->num_queues; i++, que++) {
   5557 		/* First the RX queue entry */
   5558                 ixgbe_set_ivar(adapter, i, que->msix, 0);
   5559 		/* ... and the TX */
   5560 		ixgbe_set_ivar(adapter, i, que->msix, 1);
   5561 		/* Set an Initial EITR value */
   5562                 IXGBE_WRITE_REG(&adapter->hw,
   5563                     IXGBE_EITR(que->msix), newitr);
   5564 	}
   5565 
   5566 	/* For the Link interrupt */
   5567         ixgbe_set_ivar(adapter, 1, adapter->linkvec, -1);
   5568 }
   5569 
   5570 /*
   5571 ** ixgbe_sfp_probe - called in the local timer to
   5572 ** determine if a port had optics inserted.
   5573 */
   5574 static bool ixgbe_sfp_probe(struct adapter *adapter)
   5575 {
   5576 	struct ixgbe_hw	*hw = &adapter->hw;
   5577 	device_t	dev = adapter->dev;
   5578 	bool		result = FALSE;
   5579 
   5580 	if ((hw->phy.type == ixgbe_phy_nl) &&
   5581 	    (hw->phy.sfp_type == ixgbe_sfp_type_not_present)) {
   5582 		s32 ret = hw->phy.ops.identify_sfp(hw);
   5583 		if (ret)
   5584                         goto out;
   5585 		ret = hw->phy.ops.reset(hw);
   5586 		if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
   5587 			device_printf(dev,"Unsupported SFP+ module detected!");
   5588 			device_printf(dev, "Reload driver with supported module.\n");
   5589 			adapter->sfp_probe = FALSE;
   5590                         goto out;
   5591 		} else
   5592 			device_printf(dev,"SFP+ module detected!\n");
   5593 		/* We now have supported optics */
   5594 		adapter->sfp_probe = FALSE;
   5595 		/* Set the optics type so system reports correctly */
   5596 		ixgbe_setup_optics(adapter);
   5597 		result = TRUE;
   5598 	}
   5599 out:
   5600 	return (result);
   5601 }
   5602 
   5603 /*
   5604 ** Tasklet handler for MSIX Link interrupts
   5605 **  - do outside interrupt since it might sleep
   5606 */
   5607 static void
   5608 ixgbe_handle_link(void *context)
   5609 {
   5610 	struct adapter  *adapter = context;
   5611 
   5612 	if (ixgbe_check_link(&adapter->hw,
   5613 	    &adapter->link_speed, &adapter->link_up, 0) == 0)
   5614 	    ixgbe_update_link_status(adapter);
   5615 }
   5616 
   5617 /*
   5618 ** Tasklet for handling SFP module interrupts
   5619 */
   5620 static void
   5621 ixgbe_handle_mod(void *context)
   5622 {
   5623 	struct adapter  *adapter = context;
   5624 	struct ixgbe_hw *hw = &adapter->hw;
   5625 	device_t	dev = adapter->dev;
   5626 	u32 err;
   5627 
   5628 	err = hw->phy.ops.identify_sfp(hw);
   5629 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
   5630 		device_printf(dev,
   5631 		    "Unsupported SFP+ module type was detected.\n");
   5632 		return;
   5633 	}
   5634 	err = hw->mac.ops.setup_sfp(hw);
   5635 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
   5636 		device_printf(dev,
   5637 		    "Setup failure - unsupported SFP+ module type.\n");
   5638 		return;
   5639 	}
   5640 	softint_schedule(adapter->msf_si);
   5641 	return;
   5642 }
   5643 
   5644 
   5645 /*
   5646 ** Tasklet for handling MSF (multispeed fiber) interrupts
   5647 */
   5648 static void
   5649 ixgbe_handle_msf(void *context)
   5650 {
   5651 	struct adapter  *adapter = context;
   5652 	struct ixgbe_hw *hw = &adapter->hw;
   5653 	u32 autoneg;
   5654 	bool negotiate;
   5655 
   5656 	autoneg = hw->phy.autoneg_advertised;
   5657 	if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
   5658 		hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate);
   5659 	else
   5660 		negotiate = 0;
   5661 	if (hw->mac.ops.setup_link)
   5662 		hw->mac.ops.setup_link(hw, autoneg, TRUE);
   5663 	return;
   5664 }
   5665 
   5666 #ifdef IXGBE_FDIR
   5667 /*
   5668 ** Tasklet for reinitializing the Flow Director filter table
   5669 */
   5670 static void
   5671 ixgbe_reinit_fdir(void *context)
   5672 {
   5673 	struct adapter  *adapter = context;
   5674 	struct ifnet   *ifp = adapter->ifp;
   5675 
   5676 	if (adapter->fdir_reinit != 1) /* Shouldn't happen */
   5677 		return;
   5678 	ixgbe_reinit_fdir_tables_82599(&adapter->hw);
   5679 	adapter->fdir_reinit = 0;
   5680 	/* re-enable flow director interrupts */
   5681 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_FLOW_DIR);
   5682 	/* Restart the interface */
   5683 	ifp->if_flags |= IFF_RUNNING;
   5684 	return;
   5685 }
   5686 #endif
   5687 
   5688 /**********************************************************************
   5689  *
   5690  *  Update the board statistics counters.
   5691  *
   5692  **********************************************************************/
   5693 static void
   5694 ixgbe_update_stats_counters(struct adapter *adapter)
   5695 {
   5696 	struct ifnet   *ifp = adapter->ifp;
   5697 	struct ixgbe_hw *hw = &adapter->hw;
   5698 	u32  missed_rx = 0, bprc, lxon, lxoff, total;
   5699 	u64  total_missed_rx = 0;
   5700 	uint64_t crcerrs, rlec;
   5701 
   5702 	crcerrs = IXGBE_READ_REG(hw, IXGBE_CRCERRS);
   5703 	adapter->stats.crcerrs.ev_count += crcerrs;
   5704 	adapter->stats.illerrc.ev_count += IXGBE_READ_REG(hw, IXGBE_ILLERRC);
   5705 	adapter->stats.errbc.ev_count += IXGBE_READ_REG(hw, IXGBE_ERRBC);
   5706 	adapter->stats.mspdc.ev_count += IXGBE_READ_REG(hw, IXGBE_MSPDC);
   5707 
   5708 	/*
   5709 	** Note: these are for the 8 possible traffic classes,
   5710 	**	 which in current implementation is unused,
   5711 	**	 therefore only 0 should read real data.
   5712 	*/
   5713 	for (int i = 0; i < __arraycount(adapter->stats.mpc); i++) {
   5714 		int j = i % adapter->num_queues;
   5715 		u32 mp;
   5716 		mp = IXGBE_READ_REG(hw, IXGBE_MPC(i));
   5717 		/* missed_rx tallies misses for the gprc workaround */
   5718 		missed_rx += mp;
   5719 		/* global total per queue */
   5720         	adapter->stats.mpc[j].ev_count += mp;
   5721 		/* Running comprehensive total for stats display */
   5722 		total_missed_rx += mp;
   5723 		if (hw->mac.type == ixgbe_mac_82598EB) {
   5724 			adapter->stats.rnbc[j] +=
   5725 			    IXGBE_READ_REG(hw, IXGBE_RNBC(i));
   5726 			adapter->stats.qbtc[j].ev_count +=
   5727 			    IXGBE_READ_REG(hw, IXGBE_QBTC(i));
   5728 			adapter->stats.qbrc[j].ev_count +=
   5729 			    IXGBE_READ_REG(hw, IXGBE_QBRC(i));
   5730 			adapter->stats.pxonrxc[j].ev_count +=
   5731 			    IXGBE_READ_REG(hw, IXGBE_PXONRXC(i));
   5732 		} else {
   5733 			adapter->stats.pxonrxc[j].ev_count +=
   5734 			    IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i));
   5735 		}
   5736 		adapter->stats.pxontxc[j].ev_count +=
   5737 		    IXGBE_READ_REG(hw, IXGBE_PXONTXC(i));
   5738 		adapter->stats.pxofftxc[j].ev_count +=
   5739 		    IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i));
   5740 		adapter->stats.pxoffrxc[j].ev_count +=
   5741 		    IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
   5742 		adapter->stats.pxon2offc[j].ev_count +=
   5743 		    IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i));
   5744 	}
   5745 	for (int i = 0; i < __arraycount(adapter->stats.qprc); i++) {
   5746 		int j = i % adapter->num_queues;
   5747 		adapter->stats.qprc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
   5748 		adapter->stats.qptc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
   5749 		adapter->stats.qprdc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
   5750 	}
   5751 	adapter->stats.mlfc.ev_count += IXGBE_READ_REG(hw, IXGBE_MLFC);
   5752 	adapter->stats.mrfc.ev_count += IXGBE_READ_REG(hw, IXGBE_MRFC);
   5753 	rlec = IXGBE_READ_REG(hw, IXGBE_RLEC);
   5754 	adapter->stats.rlec.ev_count += rlec;
   5755 
   5756 	/* Hardware workaround, gprc counts missed packets */
   5757 	adapter->stats.gprc.ev_count += IXGBE_READ_REG(hw, IXGBE_GPRC) - missed_rx;
   5758 
   5759 	lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
   5760 	adapter->stats.lxontxc.ev_count += lxon;
   5761 	lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
   5762 	adapter->stats.lxofftxc.ev_count += lxoff;
   5763 	total = lxon + lxoff;
   5764 
   5765 	if (hw->mac.type != ixgbe_mac_82598EB) {
   5766 		adapter->stats.gorc.ev_count += IXGBE_READ_REG(hw, IXGBE_GORCL) +
   5767 		    ((u64)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32);
   5768 		adapter->stats.gotc.ev_count += IXGBE_READ_REG(hw, IXGBE_GOTCL) +
   5769 		    ((u64)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32) - total * ETHER_MIN_LEN;
   5770 		adapter->stats.tor.ev_count += IXGBE_READ_REG(hw, IXGBE_TORL) +
   5771 		    ((u64)IXGBE_READ_REG(hw, IXGBE_TORH) << 32);
   5772 		adapter->stats.lxonrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
   5773 		adapter->stats.lxoffrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
   5774 	} else {
   5775 		adapter->stats.lxonrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
   5776 		adapter->stats.lxoffrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
   5777 		/* 82598 only has a counter in the high register */
   5778 		adapter->stats.gorc.ev_count += IXGBE_READ_REG(hw, IXGBE_GORCH);
   5779 		adapter->stats.gotc.ev_count += IXGBE_READ_REG(hw, IXGBE_GOTCH) - total * ETHER_MIN_LEN;
   5780 		adapter->stats.tor.ev_count += IXGBE_READ_REG(hw, IXGBE_TORH);
   5781 	}
   5782 
   5783 	/*
   5784 	 * Workaround: mprc hardware is incorrectly counting
   5785 	 * broadcasts, so for now we subtract those.
   5786 	 */
   5787 	bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
   5788 	adapter->stats.bprc.ev_count += bprc;
   5789 	adapter->stats.mprc.ev_count += IXGBE_READ_REG(hw, IXGBE_MPRC) - ((hw->mac.type == ixgbe_mac_82598EB) ? bprc : 0);
   5790 
   5791 	adapter->stats.prc64.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC64);
   5792 	adapter->stats.prc127.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC127);
   5793 	adapter->stats.prc255.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC255);
   5794 	adapter->stats.prc511.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC511);
   5795 	adapter->stats.prc1023.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC1023);
   5796 	adapter->stats.prc1522.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC1522);
   5797 
   5798 	adapter->stats.gptc.ev_count += IXGBE_READ_REG(hw, IXGBE_GPTC) - total;
   5799 	adapter->stats.mptc.ev_count += IXGBE_READ_REG(hw, IXGBE_MPTC) - total;
   5800 	adapter->stats.ptc64.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC64) - total;
   5801 
   5802 	adapter->stats.ruc.ev_count += IXGBE_READ_REG(hw, IXGBE_RUC);
   5803 	adapter->stats.rfc.ev_count += IXGBE_READ_REG(hw, IXGBE_RFC);
   5804 	adapter->stats.roc.ev_count += IXGBE_READ_REG(hw, IXGBE_ROC);
   5805 	adapter->stats.rjc.ev_count += IXGBE_READ_REG(hw, IXGBE_RJC);
   5806 	adapter->stats.mngprc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPRC);
   5807 	adapter->stats.mngpdc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPDC);
   5808 	adapter->stats.mngptc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPTC);
   5809 	adapter->stats.tpr.ev_count += IXGBE_READ_REG(hw, IXGBE_TPR);
   5810 	adapter->stats.tpt.ev_count += IXGBE_READ_REG(hw, IXGBE_TPT);
   5811 	adapter->stats.ptc127.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC127);
   5812 	adapter->stats.ptc255.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC255);
   5813 	adapter->stats.ptc511.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC511);
   5814 	adapter->stats.ptc1023.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC1023);
   5815 	adapter->stats.ptc1522.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC1522);
   5816 	adapter->stats.bptc.ev_count += IXGBE_READ_REG(hw, IXGBE_BPTC);
   5817 	adapter->stats.xec.ev_count += IXGBE_READ_REG(hw, IXGBE_XEC);
   5818 	adapter->stats.fccrc.ev_count += IXGBE_READ_REG(hw, IXGBE_FCCRC);
   5819 	adapter->stats.fclast.ev_count += IXGBE_READ_REG(hw, IXGBE_FCLAST);
   5820 
   5821 	/* Only read FCOE on 82599 */
   5822 	if (hw->mac.type != ixgbe_mac_82598EB) {
   5823 		adapter->stats.fcoerpdc.ev_count +=
   5824 		    IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
   5825 		adapter->stats.fcoeprc.ev_count +=
   5826 		    IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
   5827 		adapter->stats.fcoeptc.ev_count +=
   5828 		    IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
   5829 		adapter->stats.fcoedwrc.ev_count +=
   5830 		    IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
   5831 		adapter->stats.fcoedwtc.ev_count +=
   5832 		    IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
   5833 	}
   5834 
   5835 	/* Fill out the OS statistics structure */
   5836 	/*
   5837 	 * NetBSD: Don't override if_{i|o}{packets|bytes|mcasts} with
   5838 	 * adapter->stats counters. It's required to make ifconfig -z
   5839 	 * (SOICZIFDATA) work.
   5840 	 */
   5841 	ifp->if_collisions = 0;
   5842 
   5843 	/* Rx Errors */
   5844 	ifp->if_iqdrops += total_missed_rx;
   5845 	ifp->if_ierrors += crcerrs + rlec;
   5846 }
   5847 
   5848 /** ixgbe_sysctl_tdh_handler - Handler function
   5849  *  Retrieves the TDH value from the hardware
   5850  */
   5851 static int
   5852 ixgbe_sysctl_tdh_handler(SYSCTLFN_ARGS)
   5853 {
   5854 	struct sysctlnode node;
   5855 	uint32_t val;
   5856 	struct tx_ring *txr;
   5857 
   5858 	node = *rnode;
   5859 	txr = (struct tx_ring *)node.sysctl_data;
   5860 	if (txr == NULL)
   5861 		return 0;
   5862 	val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDH(txr->me));
   5863 	node.sysctl_data = &val;
   5864 	return sysctl_lookup(SYSCTLFN_CALL(&node));
   5865 }
   5866 
   5867 /** ixgbe_sysctl_tdt_handler - Handler function
   5868  *  Retrieves the TDT value from the hardware
   5869  */
   5870 static int
   5871 ixgbe_sysctl_tdt_handler(SYSCTLFN_ARGS)
   5872 {
   5873 	struct sysctlnode node;
   5874 	uint32_t val;
   5875 	struct tx_ring *txr;
   5876 
   5877 	node = *rnode;
   5878 	txr = (struct tx_ring *)node.sysctl_data;
   5879 	if (txr == NULL)
   5880 		return 0;
   5881 	val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDT(txr->me));
   5882 	node.sysctl_data = &val;
   5883 	return sysctl_lookup(SYSCTLFN_CALL(&node));
   5884 }
   5885 
   5886 /** ixgbe_sysctl_rdh_handler - Handler function
   5887  *  Retrieves the RDH value from the hardware
   5888  */
   5889 static int
   5890 ixgbe_sysctl_rdh_handler(SYSCTLFN_ARGS)
   5891 {
   5892 	struct sysctlnode node;
   5893 	uint32_t val;
   5894 	struct rx_ring *rxr;
   5895 
   5896 	node = *rnode;
   5897 	rxr = (struct rx_ring *)node.sysctl_data;
   5898 	if (rxr == NULL)
   5899 		return 0;
   5900 	val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDH(rxr->me));
   5901 	node.sysctl_data = &val;
   5902 	return sysctl_lookup(SYSCTLFN_CALL(&node));
   5903 }
   5904 
   5905 /** ixgbe_sysctl_rdt_handler - Handler function
   5906  *  Retrieves the RDT value from the hardware
   5907  */
   5908 static int
   5909 ixgbe_sysctl_rdt_handler(SYSCTLFN_ARGS)
   5910 {
   5911 	struct sysctlnode node;
   5912 	uint32_t val;
   5913 	struct rx_ring *rxr;
   5914 
   5915 	node = *rnode;
   5916 	rxr = (struct rx_ring *)node.sysctl_data;
   5917 	if (rxr == NULL)
   5918 		return 0;
   5919 	val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDT(rxr->me));
   5920 	node.sysctl_data = &val;
   5921 	return sysctl_lookup(SYSCTLFN_CALL(&node));
   5922 }
   5923 
   5924 static int
   5925 ixgbe_sysctl_interrupt_rate_handler(SYSCTLFN_ARGS)
   5926 {
   5927 	int error;
   5928 	struct sysctlnode node;
   5929 	struct ix_queue *que;
   5930 	uint32_t reg, usec, rate;
   5931 
   5932 	node = *rnode;
   5933 	que = (struct ix_queue *)node.sysctl_data;
   5934 	if (que == NULL)
   5935 		return 0;
   5936 	reg = IXGBE_READ_REG(&que->adapter->hw, IXGBE_EITR(que->msix));
   5937 	usec = ((reg & 0x0FF8) >> 3);
   5938 	if (usec > 0)
   5939 		rate = 500000 / usec;
   5940 	else
   5941 		rate = 0;
   5942 	node.sysctl_data = &rate;
   5943 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   5944 	if (error)
   5945 		return error;
   5946 	reg &= ~0xfff; /* default, no limitation */
   5947 	ixgbe_max_interrupt_rate = 0;
   5948 	if (rate > 0 && rate < 500000) {
   5949 		if (rate < 1000)
   5950 			rate = 1000;
   5951 		ixgbe_max_interrupt_rate = rate;
   5952 		reg |= ((4000000/rate) & 0xff8 );
   5953 	}
   5954 	IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg);
   5955 	return 0;
   5956 }
   5957 
   5958 const struct sysctlnode *
   5959 ixgbe_sysctl_instance(struct adapter *adapter)
   5960 {
   5961 	const char *dvname;
   5962 	struct sysctllog **log;
   5963 	int rc;
   5964 	const struct sysctlnode *rnode;
   5965 
   5966 	log = &adapter->sysctllog;
   5967 	dvname = device_xname(adapter->dev);
   5968 
   5969 	if ((rc = sysctl_createv(log, 0, NULL, &rnode,
   5970 	    0, CTLTYPE_NODE, dvname,
   5971 	    SYSCTL_DESCR("ixgbe information and settings"),
   5972 	    NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL)) != 0)
   5973 		goto err;
   5974 
   5975 	return rnode;
   5976 err:
   5977 	printf("%s: sysctl_createv failed, rc = %d\n", __func__, rc);
   5978 	return NULL;
   5979 }
   5980 
   5981 /*
   5982  * Add sysctl variables, one per statistic, to the system.
   5983  */
   5984 static void
   5985 ixgbe_add_hw_stats(struct adapter *adapter)
   5986 {
   5987 	device_t dev = adapter->dev;
   5988 	const struct sysctlnode *rnode, *cnode;
   5989 	struct sysctllog **log = &adapter->sysctllog;
   5990 	struct tx_ring *txr = adapter->tx_rings;
   5991 	struct rx_ring *rxr = adapter->rx_rings;
   5992 	struct ixgbe_hw_stats *stats = &adapter->stats;
   5993 
   5994 	/* Driver Statistics */
   5995 #if 0
   5996 	/* These counters are not updated by the software */
   5997 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
   5998 			CTLFLAG_RD, &adapter->dropped_pkts,
   5999 			"Driver dropped packets");
   6000 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_header_failed",
   6001 			CTLFLAG_RD, &adapter->mbuf_header_failed,
   6002 			"???");
   6003 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_packet_failed",
   6004 			CTLFLAG_RD, &adapter->mbuf_packet_failed,
   6005 			"???");
   6006 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "no_tx_map_avail",
   6007 			CTLFLAG_RD, &adapter->no_tx_map_avail,
   6008 			"???");
   6009 #endif
   6010 	evcnt_attach_dynamic(&adapter->handleq, EVCNT_TYPE_MISC,
   6011 	    NULL, device_xname(dev), "Handled queue in softint");
   6012 	evcnt_attach_dynamic(&adapter->req, EVCNT_TYPE_MISC,
   6013 	    NULL, device_xname(dev), "Requeued in softint");
   6014 	evcnt_attach_dynamic(&adapter->morerx, EVCNT_TYPE_MISC,
   6015 	    NULL, device_xname(dev), "Interrupt handler more rx");
   6016 	evcnt_attach_dynamic(&adapter->moretx, EVCNT_TYPE_MISC,
   6017 	    NULL, device_xname(dev), "Interrupt handler more tx");
   6018 	evcnt_attach_dynamic(&adapter->txloops, EVCNT_TYPE_MISC,
   6019 	    NULL, device_xname(dev), "Interrupt handler tx loops");
   6020 	evcnt_attach_dynamic(&adapter->efbig_tx_dma_setup, EVCNT_TYPE_MISC,
   6021 	    NULL, device_xname(dev), "Driver tx dma soft fail EFBIG");
   6022 	evcnt_attach_dynamic(&adapter->m_defrag_failed, EVCNT_TYPE_MISC,
   6023 	    NULL, device_xname(dev), "m_defrag() failed");
   6024 	evcnt_attach_dynamic(&adapter->efbig2_tx_dma_setup, EVCNT_TYPE_MISC,
   6025 	    NULL, device_xname(dev), "Driver tx dma hard fail EFBIG");
   6026 	evcnt_attach_dynamic(&adapter->einval_tx_dma_setup, EVCNT_TYPE_MISC,
   6027 	    NULL, device_xname(dev), "Driver tx dma hard fail EINVAL");
   6028 	evcnt_attach_dynamic(&adapter->other_tx_dma_setup, EVCNT_TYPE_MISC,
   6029 	    NULL, device_xname(dev), "Driver tx dma hard fail other");
   6030 	evcnt_attach_dynamic(&adapter->eagain_tx_dma_setup, EVCNT_TYPE_MISC,
   6031 	    NULL, device_xname(dev), "Driver tx dma soft fail EAGAIN");
   6032 	evcnt_attach_dynamic(&adapter->enomem_tx_dma_setup, EVCNT_TYPE_MISC,
   6033 	    NULL, device_xname(dev), "Driver tx dma soft fail ENOMEM");
   6034 	evcnt_attach_dynamic(&adapter->watchdog_events, EVCNT_TYPE_MISC,
   6035 	    NULL, device_xname(dev), "Watchdog timeouts");
   6036 	evcnt_attach_dynamic(&adapter->tso_err, EVCNT_TYPE_MISC,
   6037 	    NULL, device_xname(dev), "TSO errors");
   6038 	evcnt_attach_dynamic(&adapter->link_irq, EVCNT_TYPE_MISC,
   6039 	    NULL, device_xname(dev), "Link MSIX IRQ Handled");
   6040 
   6041 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
   6042 		snprintf(adapter->queues[i].evnamebuf,
   6043 		    sizeof(adapter->queues[i].evnamebuf), "%s queue%d",
   6044 		    device_xname(dev), i);
   6045 		snprintf(adapter->queues[i].namebuf,
   6046 		    sizeof(adapter->queues[i].namebuf), "queue%d", i);
   6047 
   6048 		if ((rnode = ixgbe_sysctl_instance(adapter)) == NULL) {
   6049 			aprint_error_dev(dev, "could not create sysctl root\n");
   6050 			break;
   6051 		}
   6052 
   6053 		if (sysctl_createv(log, 0, &rnode, &rnode,
   6054 		    0, CTLTYPE_NODE,
   6055 		    adapter->queues[i].namebuf, SYSCTL_DESCR("Queue Name"),
   6056 		    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL) != 0)
   6057 			break;
   6058 
   6059 		if (sysctl_createv(log, 0, &rnode, &cnode,
   6060 		    CTLFLAG_READWRITE, CTLTYPE_INT,
   6061 		    "interrupt_rate", SYSCTL_DESCR("Interrupt Rate"),
   6062 		    ixgbe_sysctl_interrupt_rate_handler, 0,
   6063 		    (void *)&adapter->queues[i], 0, CTL_CREATE, CTL_EOL) != 0)
   6064 			break;
   6065 
   6066 		if (sysctl_createv(log, 0, &rnode, &cnode,
   6067 		    CTLFLAG_READONLY, CTLTYPE_QUAD,
   6068 		    "irqs", SYSCTL_DESCR("irqs on this queue"),
   6069 			NULL, 0, &(adapter->queues[i].irqs),
   6070 		    0, CTL_CREATE, CTL_EOL) != 0)
   6071 			break;
   6072 
   6073 		if (sysctl_createv(log, 0, &rnode, &cnode,
   6074 		    CTLFLAG_READONLY, CTLTYPE_INT,
   6075 		    "txd_head", SYSCTL_DESCR("Transmit Descriptor Head"),
   6076 		    ixgbe_sysctl_tdh_handler, 0, (void *)txr,
   6077 		    0, CTL_CREATE, CTL_EOL) != 0)
   6078 			break;
   6079 
   6080 		if (sysctl_createv(log, 0, &rnode, &cnode,
   6081 		    CTLFLAG_READONLY, CTLTYPE_INT,
   6082 		    "txd_tail", SYSCTL_DESCR("Transmit Descriptor Tail"),
   6083 		    ixgbe_sysctl_tdt_handler, 0, (void *)txr,
   6084 		    0, CTL_CREATE, CTL_EOL) != 0)
   6085 			break;
   6086 
   6087 		evcnt_attach_dynamic(&txr->tso_tx, EVCNT_TYPE_MISC,
   6088 		    NULL, device_xname(dev), "TSO");
   6089 		evcnt_attach_dynamic(&txr->no_desc_avail, EVCNT_TYPE_MISC,
   6090 		    NULL, adapter->queues[i].evnamebuf,
   6091 		    "Queue No Descriptor Available");
   6092 		evcnt_attach_dynamic(&txr->total_packets, EVCNT_TYPE_MISC,
   6093 		    NULL, adapter->queues[i].evnamebuf,
   6094 		    "Queue Packets Transmitted");
   6095 
   6096 #ifdef LRO
   6097 		struct lro_ctrl *lro = &rxr->lro;
   6098 #endif /* LRO */
   6099 
   6100 		if (sysctl_createv(log, 0, &rnode, &cnode,
   6101 		    CTLFLAG_READONLY,
   6102 		    CTLTYPE_INT,
   6103 		    "rxd_head", SYSCTL_DESCR("Receive Descriptor Head"),
   6104 		    ixgbe_sysctl_rdh_handler, 0, (void *)rxr, 0,
   6105 		    CTL_CREATE, CTL_EOL) != 0)
   6106 			break;
   6107 
   6108 		if (sysctl_createv(log, 0, &rnode, &cnode,
   6109 		    CTLFLAG_READONLY,
   6110 		    CTLTYPE_INT,
   6111 		    "rxd_tail", SYSCTL_DESCR("Receive Descriptor Tail"),
   6112 		    ixgbe_sysctl_rdt_handler, 0, (void *)rxr, 0,
   6113 		    CTL_CREATE, CTL_EOL) != 0)
   6114 			break;
   6115 
   6116 		if (i < __arraycount(adapter->stats.mpc)) {
   6117 			evcnt_attach_dynamic(&adapter->stats.mpc[i],
   6118 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6119 			    "Missed Packet Count");
   6120 		}
   6121 		if (i < __arraycount(adapter->stats.pxontxc)) {
   6122 			evcnt_attach_dynamic(&adapter->stats.pxontxc[i],
   6123 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6124 			    "pxontxc");
   6125 			evcnt_attach_dynamic(&adapter->stats.pxonrxc[i],
   6126 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6127 			    "pxonrxc");
   6128 			evcnt_attach_dynamic(&adapter->stats.pxofftxc[i],
   6129 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6130 			    "pxofftxc");
   6131 			evcnt_attach_dynamic(&adapter->stats.pxoffrxc[i],
   6132 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6133 			    "pxoffrxc");
   6134 			evcnt_attach_dynamic(&adapter->stats.pxon2offc[i],
   6135 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6136 			    "pxon2offc");
   6137 		}
   6138 		if (i < __arraycount(adapter->stats.qprc)) {
   6139 			evcnt_attach_dynamic(&adapter->stats.qprc[i],
   6140 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6141 			    "qprc");
   6142 			evcnt_attach_dynamic(&adapter->stats.qptc[i],
   6143 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6144 			    "qptc");
   6145 			evcnt_attach_dynamic(&adapter->stats.qbrc[i],
   6146 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6147 			    "qbrc");
   6148 			evcnt_attach_dynamic(&adapter->stats.qbtc[i],
   6149 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6150 			    "qbtc");
   6151 			evcnt_attach_dynamic(&adapter->stats.qprdc[i],
   6152 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6153 			    "qprdc");
   6154 		}
   6155 
   6156 		evcnt_attach_dynamic(&rxr->rx_packets, EVCNT_TYPE_MISC,
   6157 		    NULL, adapter->queues[i].evnamebuf, "Queue Packets Received");
   6158 		evcnt_attach_dynamic(&rxr->rx_bytes, EVCNT_TYPE_MISC,
   6159 		    NULL, adapter->queues[i].evnamebuf, "Queue Bytes Received");
   6160 		evcnt_attach_dynamic(&rxr->rx_copies, EVCNT_TYPE_MISC,
   6161 		    NULL, adapter->queues[i].evnamebuf, "Copied RX Frames");
   6162 		evcnt_attach_dynamic(&rxr->no_jmbuf, EVCNT_TYPE_MISC,
   6163 		    NULL, adapter->queues[i].evnamebuf, "Rx no jumbo mbuf");
   6164 		evcnt_attach_dynamic(&rxr->rx_discarded, EVCNT_TYPE_MISC,
   6165 		    NULL, adapter->queues[i].evnamebuf, "Rx discarded");
   6166 		evcnt_attach_dynamic(&rxr->rx_irq, EVCNT_TYPE_MISC,
   6167 		    NULL, adapter->queues[i].evnamebuf, "Rx interrupts");
   6168 #ifdef LRO
   6169 		SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_queued",
   6170 				CTLFLAG_RD, &lro->lro_queued, 0,
   6171 				"LRO Queued");
   6172 		SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_flushed",
   6173 				CTLFLAG_RD, &lro->lro_flushed, 0,
   6174 				"LRO Flushed");
   6175 #endif /* LRO */
   6176 	}
   6177 
   6178 	/* MAC stats get the own sub node */
   6179 
   6180 
   6181 	snprintf(stats->namebuf,
   6182 	    sizeof(stats->namebuf), "%s MAC Statistics", device_xname(dev));
   6183 
   6184 	evcnt_attach_dynamic(&stats->ipcs, EVCNT_TYPE_MISC, NULL,
   6185 	    stats->namebuf, "rx csum offload - IP");
   6186 	evcnt_attach_dynamic(&stats->l4cs, EVCNT_TYPE_MISC, NULL,
   6187 	    stats->namebuf, "rx csum offload - L4");
   6188 	evcnt_attach_dynamic(&stats->ipcs_bad, EVCNT_TYPE_MISC, NULL,
   6189 	    stats->namebuf, "rx csum offload - IP bad");
   6190 	evcnt_attach_dynamic(&stats->l4cs_bad, EVCNT_TYPE_MISC, NULL,
   6191 	    stats->namebuf, "rx csum offload - L4 bad");
   6192 	evcnt_attach_dynamic(&stats->intzero, EVCNT_TYPE_MISC, NULL,
   6193 	    stats->namebuf, "Interrupt conditions zero");
   6194 	evcnt_attach_dynamic(&stats->legint, EVCNT_TYPE_MISC, NULL,
   6195 	    stats->namebuf, "Legacy interrupts");
   6196 	evcnt_attach_dynamic(&stats->crcerrs, EVCNT_TYPE_MISC, NULL,
   6197 	    stats->namebuf, "CRC Errors");
   6198 	evcnt_attach_dynamic(&stats->illerrc, EVCNT_TYPE_MISC, NULL,
   6199 	    stats->namebuf, "Illegal Byte Errors");
   6200 	evcnt_attach_dynamic(&stats->errbc, EVCNT_TYPE_MISC, NULL,
   6201 	    stats->namebuf, "Byte Errors");
   6202 	evcnt_attach_dynamic(&stats->mspdc, EVCNT_TYPE_MISC, NULL,
   6203 	    stats->namebuf, "MAC Short Packets Discarded");
   6204 	evcnt_attach_dynamic(&stats->mlfc, EVCNT_TYPE_MISC, NULL,
   6205 	    stats->namebuf, "MAC Local Faults");
   6206 	evcnt_attach_dynamic(&stats->mrfc, EVCNT_TYPE_MISC, NULL,
   6207 	    stats->namebuf, "MAC Remote Faults");
   6208 	evcnt_attach_dynamic(&stats->rlec, EVCNT_TYPE_MISC, NULL,
   6209 	    stats->namebuf, "Receive Length Errors");
   6210 	evcnt_attach_dynamic(&stats->lxontxc, EVCNT_TYPE_MISC, NULL,
   6211 	    stats->namebuf, "Link XON Transmitted");
   6212 	evcnt_attach_dynamic(&stats->lxonrxc, EVCNT_TYPE_MISC, NULL,
   6213 	    stats->namebuf, "Link XON Received");
   6214 	evcnt_attach_dynamic(&stats->lxofftxc, EVCNT_TYPE_MISC, NULL,
   6215 	    stats->namebuf, "Link XOFF Transmitted");
   6216 	evcnt_attach_dynamic(&stats->lxoffrxc, EVCNT_TYPE_MISC, NULL,
   6217 	    stats->namebuf, "Link XOFF Received");
   6218 
   6219 	/* Packet Reception Stats */
   6220 	evcnt_attach_dynamic(&stats->tor, EVCNT_TYPE_MISC, NULL,
   6221 	    stats->namebuf, "Total Octets Received");
   6222 	evcnt_attach_dynamic(&stats->gorc, EVCNT_TYPE_MISC, NULL,
   6223 	    stats->namebuf, "Good Octets Received");
   6224 	evcnt_attach_dynamic(&stats->tpr, EVCNT_TYPE_MISC, NULL,
   6225 	    stats->namebuf, "Total Packets Received");
   6226 	evcnt_attach_dynamic(&stats->gprc, EVCNT_TYPE_MISC, NULL,
   6227 	    stats->namebuf, "Good Packets Received");
   6228 	evcnt_attach_dynamic(&stats->mprc, EVCNT_TYPE_MISC, NULL,
   6229 	    stats->namebuf, "Multicast Packets Received");
   6230 	evcnt_attach_dynamic(&stats->bprc, EVCNT_TYPE_MISC, NULL,
   6231 	    stats->namebuf, "Broadcast Packets Received");
   6232 	evcnt_attach_dynamic(&stats->prc64, EVCNT_TYPE_MISC, NULL,
   6233 	    stats->namebuf, "64 byte frames received ");
   6234 	evcnt_attach_dynamic(&stats->prc127, EVCNT_TYPE_MISC, NULL,
   6235 	    stats->namebuf, "65-127 byte frames received");
   6236 	evcnt_attach_dynamic(&stats->prc255, EVCNT_TYPE_MISC, NULL,
   6237 	    stats->namebuf, "128-255 byte frames received");
   6238 	evcnt_attach_dynamic(&stats->prc511, EVCNT_TYPE_MISC, NULL,
   6239 	    stats->namebuf, "256-511 byte frames received");
   6240 	evcnt_attach_dynamic(&stats->prc1023, EVCNT_TYPE_MISC, NULL,
   6241 	    stats->namebuf, "512-1023 byte frames received");
   6242 	evcnt_attach_dynamic(&stats->prc1522, EVCNT_TYPE_MISC, NULL,
   6243 	    stats->namebuf, "1023-1522 byte frames received");
   6244 	evcnt_attach_dynamic(&stats->ruc, EVCNT_TYPE_MISC, NULL,
   6245 	    stats->namebuf, "Receive Undersized");
   6246 	evcnt_attach_dynamic(&stats->rfc, EVCNT_TYPE_MISC, NULL,
   6247 	    stats->namebuf, "Fragmented Packets Received ");
   6248 	evcnt_attach_dynamic(&stats->roc, EVCNT_TYPE_MISC, NULL,
   6249 	    stats->namebuf, "Oversized Packets Received");
   6250 	evcnt_attach_dynamic(&stats->rjc, EVCNT_TYPE_MISC, NULL,
   6251 	    stats->namebuf, "Received Jabber");
   6252 	evcnt_attach_dynamic(&stats->mngprc, EVCNT_TYPE_MISC, NULL,
   6253 	    stats->namebuf, "Management Packets Received");
   6254 	evcnt_attach_dynamic(&stats->xec, EVCNT_TYPE_MISC, NULL,
   6255 	    stats->namebuf, "Checksum Errors");
   6256 
   6257 	/* Packet Transmission Stats */
   6258 	evcnt_attach_dynamic(&stats->gotc, EVCNT_TYPE_MISC, NULL,
   6259 	    stats->namebuf, "Good Octets Transmitted");
   6260 	evcnt_attach_dynamic(&stats->tpt, EVCNT_TYPE_MISC, NULL,
   6261 	    stats->namebuf, "Total Packets Transmitted");
   6262 	evcnt_attach_dynamic(&stats->gptc, EVCNT_TYPE_MISC, NULL,
   6263 	    stats->namebuf, "Good Packets Transmitted");
   6264 	evcnt_attach_dynamic(&stats->bptc, EVCNT_TYPE_MISC, NULL,
   6265 	    stats->namebuf, "Broadcast Packets Transmitted");
   6266 	evcnt_attach_dynamic(&stats->mptc, EVCNT_TYPE_MISC, NULL,
   6267 	    stats->namebuf, "Multicast Packets Transmitted");
   6268 	evcnt_attach_dynamic(&stats->mngptc, EVCNT_TYPE_MISC, NULL,
   6269 	    stats->namebuf, "Management Packets Transmitted");
   6270 	evcnt_attach_dynamic(&stats->ptc64, EVCNT_TYPE_MISC, NULL,
   6271 	    stats->namebuf, "64 byte frames transmitted ");
   6272 	evcnt_attach_dynamic(&stats->ptc127, EVCNT_TYPE_MISC, NULL,
   6273 	    stats->namebuf, "65-127 byte frames transmitted");
   6274 	evcnt_attach_dynamic(&stats->ptc255, EVCNT_TYPE_MISC, NULL,
   6275 	    stats->namebuf, "128-255 byte frames transmitted");
   6276 	evcnt_attach_dynamic(&stats->ptc511, EVCNT_TYPE_MISC, NULL,
   6277 	    stats->namebuf, "256-511 byte frames transmitted");
   6278 	evcnt_attach_dynamic(&stats->ptc1023, EVCNT_TYPE_MISC, NULL,
   6279 	    stats->namebuf, "512-1023 byte frames transmitted");
   6280 	evcnt_attach_dynamic(&stats->ptc1522, EVCNT_TYPE_MISC, NULL,
   6281 	    stats->namebuf, "1024-1522 byte frames transmitted");
   6282 }
   6283 
   6284 /*
   6285 ** Set flow control using sysctl:
   6286 ** Flow control values:
   6287 ** 	0 - off
   6288 **	1 - rx pause
   6289 **	2 - tx pause
   6290 **	3 - full
   6291 */
   6292 static int
   6293 ixgbe_set_flowcntl(SYSCTLFN_ARGS)
   6294 {
   6295 	struct sysctlnode node;
   6296 	int error, last;
   6297 	struct adapter *adapter;
   6298 
   6299 	node = *rnode;
   6300 	adapter = (struct adapter *)node.sysctl_data;
   6301 	node.sysctl_data = &adapter->fc;
   6302 	last = adapter->fc;
   6303 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   6304 	if (error != 0 || newp == NULL)
   6305 		return error;
   6306 
   6307 	/* Don't bother if it's not changed */
   6308 	if (adapter->fc == last)
   6309 		return (0);
   6310 
   6311 	switch (adapter->fc) {
   6312 		case ixgbe_fc_rx_pause:
   6313 		case ixgbe_fc_tx_pause:
   6314 		case ixgbe_fc_full:
   6315 			adapter->hw.fc.requested_mode = adapter->fc;
   6316 			if (adapter->num_queues > 1)
   6317 				ixgbe_disable_rx_drop(adapter);
   6318 			break;
   6319 		case ixgbe_fc_none:
   6320 			adapter->hw.fc.requested_mode = ixgbe_fc_none;
   6321 			if (adapter->num_queues > 1)
   6322 				ixgbe_enable_rx_drop(adapter);
   6323 			break;
   6324 		default:
   6325 			adapter->fc = last;
   6326 			return (EINVAL);
   6327 	}
   6328 	/* Don't autoneg if forcing a value */
   6329 	adapter->hw.fc.disable_fc_autoneg = TRUE;
   6330 	ixgbe_fc_enable(&adapter->hw);
   6331 	return 0;
   6332 }
   6333 
   6334 
   6335 /*
   6336 ** Control link advertise speed:
   6337 **	1 - advertise only 1G
   6338 **	2 - advertise 100Mb
   6339 **	3 - advertise normal
   6340 */
   6341 static int
   6342 ixgbe_set_advertise(SYSCTLFN_ARGS)
   6343 {
   6344 	struct sysctlnode	node;
   6345 	int			t, error = 0;
   6346 	struct adapter		*adapter;
   6347 	device_t		dev;
   6348 	struct ixgbe_hw		*hw;
   6349 	ixgbe_link_speed	speed, last;
   6350 
   6351 	node = *rnode;
   6352 	adapter = (struct adapter *)node.sysctl_data;
   6353 	dev = adapter->dev;
   6354 	hw = &adapter->hw;
   6355 	last = adapter->advertise;
   6356 	t = adapter->advertise;
   6357 	node.sysctl_data = &t;
   6358 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   6359 	if (error != 0 || newp == NULL)
   6360 		return error;
   6361 
   6362 	if (adapter->advertise == last) /* no change */
   6363 		return (0);
   6364 
   6365 	if (t == -1)
   6366 		return 0;
   6367 
   6368 	adapter->advertise = t;
   6369 
   6370 	if (!((hw->phy.media_type == ixgbe_media_type_copper) ||
   6371             (hw->phy.multispeed_fiber)))
   6372 		return (EINVAL);
   6373 
   6374 	if ((adapter->advertise == 2) && (hw->mac.type != ixgbe_mac_X540)) {
   6375 		device_printf(dev, "Set Advertise: 100Mb on X540 only\n");
   6376 		return (EINVAL);
   6377 	}
   6378 
   6379 	if (adapter->advertise == 1)
   6380                 speed = IXGBE_LINK_SPEED_1GB_FULL;
   6381 	else if (adapter->advertise == 2)
   6382                 speed = IXGBE_LINK_SPEED_100_FULL;
   6383 	else if (adapter->advertise == 3)
   6384                 speed = IXGBE_LINK_SPEED_1GB_FULL |
   6385 			IXGBE_LINK_SPEED_10GB_FULL;
   6386 	else {	/* bogus value */
   6387 		adapter->advertise = last;
   6388 		return (EINVAL);
   6389 	}
   6390 
   6391 	hw->mac.autotry_restart = TRUE;
   6392 	hw->mac.ops.setup_link(hw, speed, TRUE);
   6393 
   6394 	return 0;
   6395 }
   6396 
   6397 /*
   6398 ** Thermal Shutdown Trigger
   6399 **   - cause a Thermal Overtemp IRQ
   6400 **   - this now requires firmware enabling
   6401 */
   6402 static int
   6403 ixgbe_set_thermal_test(SYSCTLFN_ARGS)
   6404 {
   6405 	struct sysctlnode node;
   6406 	int		error, fire = 0;
   6407 	struct adapter	*adapter;
   6408 	struct ixgbe_hw *hw;
   6409 
   6410 	node = *rnode;
   6411 	adapter = (struct adapter *)node.sysctl_data;
   6412 	hw = &adapter->hw;
   6413 
   6414 	if (hw->mac.type != ixgbe_mac_X540)
   6415 		return (0);
   6416 
   6417 	node.sysctl_data = &fire;
   6418 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   6419 	if ((error) || (newp == NULL))
   6420 		return (error);
   6421 
   6422 	if (fire) {
   6423 		u32 reg = IXGBE_READ_REG(hw, IXGBE_EICS);
   6424 		reg |= IXGBE_EICR_TS;
   6425 		IXGBE_WRITE_REG(hw, IXGBE_EICS, reg);
   6426 	}
   6427 
   6428 	return (0);
   6429 }
   6430 
   6431 /*
   6432 ** Enable the hardware to drop packets when the buffer is
   6433 ** full. This is useful when multiqueue,so that no single
   6434 ** queue being full stalls the entire RX engine. We only
   6435 ** enable this when Multiqueue AND when Flow Control is
   6436 ** disabled.
   6437 */
   6438 static void
   6439 ixgbe_enable_rx_drop(struct adapter *adapter)
   6440 {
   6441         struct ixgbe_hw *hw = &adapter->hw;
   6442 
   6443 	for (int i = 0; i < adapter->num_queues; i++) {
   6444         	u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
   6445         	srrctl |= IXGBE_SRRCTL_DROP_EN;
   6446         	IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
   6447 	}
   6448 }
   6449 
   6450 static void
   6451 ixgbe_disable_rx_drop(struct adapter *adapter)
   6452 {
   6453         struct ixgbe_hw *hw = &adapter->hw;
   6454 
   6455 	for (int i = 0; i < adapter->num_queues; i++) {
   6456         	u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
   6457         	srrctl &= ~IXGBE_SRRCTL_DROP_EN;
   6458         	IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
   6459 	}
   6460 }
   6461