Home | History | Annotate | Line # | Download | only in ixgbe
ixgbe.c revision 1.38
      1 /******************************************************************************
      2 
      3   Copyright (c) 2001-2013, Intel Corporation
      4   All rights reserved.
      5 
      6   Redistribution and use in source and binary forms, with or without
      7   modification, are permitted provided that the following conditions are met:
      8 
      9    1. Redistributions of source code must retain the above copyright notice,
     10       this list of conditions and the following disclaimer.
     11 
     12    2. Redistributions in binary form must reproduce the above copyright
     13       notice, this list of conditions and the following disclaimer in the
     14       documentation and/or other materials provided with the distribution.
     15 
     16    3. Neither the name of the Intel Corporation nor the names of its
     17       contributors may be used to endorse or promote products derived from
     18       this software without specific prior written permission.
     19 
     20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     30   POSSIBILITY OF SUCH DAMAGE.
     31 
     32 ******************************************************************************/
     33 /*
     34  * Copyright (c) 2011 The NetBSD Foundation, Inc.
     35  * All rights reserved.
     36  *
     37  * This code is derived from software contributed to The NetBSD Foundation
     38  * by Coyote Point Systems, Inc.
     39  *
     40  * Redistribution and use in source and binary forms, with or without
     41  * modification, are permitted provided that the following conditions
     42  * are met:
     43  * 1. Redistributions of source code must retain the above copyright
     44  *    notice, this list of conditions and the following disclaimer.
     45  * 2. Redistributions in binary form must reproduce the above copyright
     46  *    notice, this list of conditions and the following disclaimer in the
     47  *    documentation and/or other materials provided with the distribution.
     48  *
     49  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     50  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     51  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     52  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     53  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     54  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     55  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     56  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     57  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     58  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     59  * POSSIBILITY OF SUCH DAMAGE.
     60  */
     61 /*$FreeBSD: head/sys/dev/ixgbe/ixgbe.c 279805 2015-03-09 10:29:15Z araujo $*/
     62 /*$NetBSD: ixgbe.c,v 1.38 2016/06/10 13:27:14 ozaki-r Exp $*/
     63 
     64 #include "opt_inet.h"
     65 #include "opt_inet6.h"
     66 
     67 #include "ixgbe.h"
     68 #include "vlan.h"
     69 
     70 #include <sys/cprng.h>
     71 
     72 /*********************************************************************
     73  *  Set this to one to display debug statistics
     74  *********************************************************************/
     75 int             ixgbe_display_debug_stats = 0;
     76 
     77 /*********************************************************************
     78  *  Driver version
     79  *********************************************************************/
     80 char ixgbe_driver_version[] = "2.5.15";
     81 
     82 /*********************************************************************
     83  *  PCI Device ID Table
     84  *
     85  *  Used by probe to select devices to load on
     86  *  Last field stores an index into ixgbe_strings
     87  *  Last entry must be all 0s
     88  *
     89  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
     90  *********************************************************************/
     91 
     92 static ixgbe_vendor_info_t ixgbe_vendor_info_array[] =
     93 {
     94 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, 0, 0, 0},
     95 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, 0, 0, 0},
     96 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, 0, 0, 0},
     97 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, 0, 0, 0},
     98 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2, 0, 0, 0},
     99 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, 0, 0, 0},
    100 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, 0, 0, 0},
    101 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, 0, 0, 0},
    102 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, 0, 0, 0},
    103 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, 0, 0, 0},
    104 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, 0, 0, 0},
    105 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, 0, 0, 0},
    106 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ, 0, 0, 0},
    107 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, 0, 0, 0},
    108 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, 0, 0, 0},
    109 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, 0, 0, 0},
    110 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, 0, 0, 0},
    111 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, 0, 0, 0},
    112 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE, 0, 0, 0},
    113 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2, 0, 0, 0},
    114 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, 0, 0, 0},
    115 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, 0, 0, 0},
    116 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF_QP, 0, 0, 0},
    117 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, 0, 0, 0},
    118 	/* required last entry */
    119 	{0, 0, 0, 0, 0}
    120 };
    121 
    122 /*********************************************************************
    123  *  Table of branding strings
    124  *********************************************************************/
    125 
    126 static const char    *ixgbe_strings[] = {
    127 	"Intel(R) PRO/10GbE PCI-Express Network Driver"
    128 };
    129 
    130 /*********************************************************************
    131  *  Function prototypes
    132  *********************************************************************/
    133 static int      ixgbe_probe(device_t, cfdata_t, void *);
    134 static void     ixgbe_attach(device_t, device_t, void *);
    135 static int      ixgbe_detach(device_t, int);
    136 #if 0
    137 static int      ixgbe_shutdown(device_t);
    138 #endif
    139 #ifdef IXGBE_LEGACY_TX
    140 static void     ixgbe_start(struct ifnet *);
    141 static void     ixgbe_start_locked(struct tx_ring *, struct ifnet *);
    142 #else /* ! IXGBE_LEGACY_TX */
    143 static int	ixgbe_mq_start(struct ifnet *, struct mbuf *);
    144 static int	ixgbe_mq_start_locked(struct ifnet *, struct tx_ring *);
    145 static void	ixgbe_qflush(struct ifnet *);
    146 static void	ixgbe_deferred_mq_start(void *, int);
    147 #endif /* IXGBE_LEGACY_TX */
    148 static int      ixgbe_ioctl(struct ifnet *, u_long, void *);
    149 static void	ixgbe_ifstop(struct ifnet *, int);
    150 static int	ixgbe_init(struct ifnet *);
    151 static void	ixgbe_init_locked(struct adapter *);
    152 static void     ixgbe_stop(void *);
    153 static void     ixgbe_media_status(struct ifnet *, struct ifmediareq *);
    154 static int      ixgbe_media_change(struct ifnet *);
    155 static void     ixgbe_identify_hardware(struct adapter *);
    156 static int      ixgbe_allocate_pci_resources(struct adapter *,
    157 		    const struct pci_attach_args *);
    158 static void	ixgbe_get_slot_info(struct ixgbe_hw *);
    159 static int      ixgbe_allocate_msix(struct adapter *,
    160 		    const struct pci_attach_args *);
    161 static int      ixgbe_allocate_legacy(struct adapter *,
    162 		    const struct pci_attach_args *);
    163 static int	ixgbe_allocate_queues(struct adapter *);
    164 static int	ixgbe_setup_msix(struct adapter *);
    165 static void	ixgbe_free_pci_resources(struct adapter *);
    166 static void	ixgbe_local_timer(void *);
    167 static int	ixgbe_setup_interface(device_t, struct adapter *);
    168 static void	ixgbe_config_link(struct adapter *);
    169 
    170 static int      ixgbe_allocate_transmit_buffers(struct tx_ring *);
    171 static int	ixgbe_setup_transmit_structures(struct adapter *);
    172 static void	ixgbe_setup_transmit_ring(struct tx_ring *);
    173 static void     ixgbe_initialize_transmit_units(struct adapter *);
    174 static void     ixgbe_free_transmit_structures(struct adapter *);
    175 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
    176 
    177 static int      ixgbe_allocate_receive_buffers(struct rx_ring *);
    178 static int      ixgbe_setup_receive_structures(struct adapter *);
    179 static int	ixgbe_setup_receive_ring(struct rx_ring *);
    180 static void     ixgbe_initialize_receive_units(struct adapter *);
    181 static void     ixgbe_free_receive_structures(struct adapter *);
    182 static void     ixgbe_free_receive_buffers(struct rx_ring *);
    183 static void	ixgbe_setup_hw_rsc(struct rx_ring *);
    184 
    185 static void     ixgbe_enable_intr(struct adapter *);
    186 static void     ixgbe_disable_intr(struct adapter *);
    187 static void     ixgbe_update_stats_counters(struct adapter *);
    188 static void	ixgbe_txeof(struct tx_ring *);
    189 static bool	ixgbe_rxeof(struct ix_queue *);
    190 static void	ixgbe_rx_checksum(u32, struct mbuf *, u32,
    191 		    struct ixgbe_hw_stats *);
    192 static void     ixgbe_set_promisc(struct adapter *);
    193 static void     ixgbe_set_multi(struct adapter *);
    194 static void     ixgbe_update_link_status(struct adapter *);
    195 static void	ixgbe_refresh_mbufs(struct rx_ring *, int);
    196 static int      ixgbe_xmit(struct tx_ring *, struct mbuf *);
    197 static int	ixgbe_set_flowcntl(SYSCTLFN_PROTO);
    198 static int	ixgbe_set_advertise(SYSCTLFN_PROTO);
    199 static int	ixgbe_set_thermal_test(SYSCTLFN_PROTO);
    200 static int	ixgbe_dma_malloc(struct adapter *, bus_size_t,
    201 		    struct ixgbe_dma_alloc *, int);
    202 static void     ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
    203 static int	ixgbe_tx_ctx_setup(struct tx_ring *,
    204 		    struct mbuf *, u32 *, u32 *);
    205 static int	ixgbe_tso_setup(struct tx_ring *,
    206 		    struct mbuf *, u32 *, u32 *);
    207 static void	ixgbe_set_ivar(struct adapter *, u8, u8, s8);
    208 static void	ixgbe_configure_ivars(struct adapter *);
    209 static u8 *	ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *);
    210 
    211 static void	ixgbe_setup_vlan_hw_support(struct adapter *);
    212 #if 0
    213 static void	ixgbe_register_vlan(void *, struct ifnet *, u16);
    214 static void	ixgbe_unregister_vlan(void *, struct ifnet *, u16);
    215 #endif
    216 
    217 static void     ixgbe_add_hw_stats(struct adapter *adapter);
    218 
    219 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
    220 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
    221 		    struct mbuf *, u32);
    222 
    223 static void	ixgbe_enable_rx_drop(struct adapter *);
    224 static void	ixgbe_disable_rx_drop(struct adapter *);
    225 
    226 /* Support for pluggable optic modules */
    227 static bool	ixgbe_sfp_probe(struct adapter *);
    228 static void	ixgbe_setup_optics(struct adapter *);
    229 
    230 /* Legacy (single vector interrupt handler */
    231 static int	ixgbe_legacy_irq(void *);
    232 
    233 #if defined(NETBSD_MSI_OR_MSIX)
    234 /* The MSI/X Interrupt handlers */
    235 static int	ixgbe_msix_que(void *);
    236 static int	ixgbe_msix_link(void *);
    237 #endif
    238 
    239 /* Software interrupts for deferred work */
    240 static void	ixgbe_handle_que(void *);
    241 static void	ixgbe_handle_link(void *);
    242 static void	ixgbe_handle_msf(void *);
    243 static void	ixgbe_handle_mod(void *);
    244 
    245 const struct sysctlnode *ixgbe_sysctl_instance(struct adapter *);
    246 static ixgbe_vendor_info_t *ixgbe_lookup(const struct pci_attach_args *);
    247 
    248 #ifdef IXGBE_FDIR
    249 static void	ixgbe_atr(struct tx_ring *, struct mbuf *);
    250 static void	ixgbe_reinit_fdir(void *, int);
    251 #endif
    252 
    253 /* Missing shared code prototype */
    254 extern void ixgbe_stop_mac_link_on_d3_82599(struct ixgbe_hw *hw);
    255 
    256 /*********************************************************************
    257  *  FreeBSD Device Interface Entry Points
    258  *********************************************************************/
    259 
    260 CFATTACH_DECL3_NEW(ixg, sizeof(struct adapter),
    261     ixgbe_probe, ixgbe_attach, ixgbe_detach, NULL, NULL, NULL,
    262     DVF_DETACH_SHUTDOWN);
    263 
    264 #if 0
    265 devclass_t ixgbe_devclass;
    266 DRIVER_MODULE(ixgbe, pci, ixgbe_driver, ixgbe_devclass, 0, 0);
    267 
    268 MODULE_DEPEND(ixgbe, pci, 1, 1, 1);
    269 MODULE_DEPEND(ixgbe, ether, 1, 1, 1);
    270 #endif
    271 
    272 /*
    273 ** TUNEABLE PARAMETERS:
    274 */
    275 
    276 /*
    277 ** AIM: Adaptive Interrupt Moderation
    278 ** which means that the interrupt rate
    279 ** is varied over time based on the
    280 ** traffic for that interrupt vector
    281 */
    282 static int ixgbe_enable_aim = TRUE;
    283 #define SYSCTL_INT(__x, __y)
    284 SYSCTL_INT("hw.ixgbe.enable_aim", &ixgbe_enable_aim);
    285 
    286 static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY);
    287 SYSCTL_INT("hw.ixgbe.max_interrupt_rate", &ixgbe_max_interrupt_rate);
    288 
    289 /* How many packets rxeof tries to clean at a time */
    290 static int ixgbe_rx_process_limit = 256;
    291 SYSCTL_INT("hw.ixgbe.rx_process_limit", &ixgbe_rx_process_limit);
    292 
    293 /* How many packets txeof tries to clean at a time */
    294 static int ixgbe_tx_process_limit = 256;
    295 SYSCTL_INT("hw.ixgbe.tx_process_limit", &ixgbe_tx_process_limit);
    296 
    297 /*
    298 ** Smart speed setting, default to on
    299 ** this only works as a compile option
    300 ** right now as its during attach, set
    301 ** this to 'ixgbe_smart_speed_off' to
    302 ** disable.
    303 */
    304 static int ixgbe_smart_speed = ixgbe_smart_speed_on;
    305 
    306 /*
    307  * MSIX should be the default for best performance,
    308  * but this allows it to be forced off for testing.
    309  */
    310 static int ixgbe_enable_msix = 1;
    311 SYSCTL_INT("hw.ixgbe.enable_msix", &ixgbe_enable_msix);
    312 
    313 #if defined(NETBSD_MSI_OR_MSIX)
    314 /*
    315  * Number of Queues, can be set to 0,
    316  * it then autoconfigures based on the
    317  * number of cpus with a max of 8. This
    318  * can be overriden manually here.
    319  */
    320 static int ixgbe_num_queues = 1;
    321 SYSCTL_INT("hw.ixgbe.num_queues", &ixgbe_num_queues);
    322 #endif
    323 
    324 /*
    325 ** Number of TX descriptors per ring,
    326 ** setting higher than RX as this seems
    327 ** the better performing choice.
    328 */
    329 static int ixgbe_txd = PERFORM_TXD;
    330 SYSCTL_INT("hw.ixgbe.txd", &ixgbe_txd);
    331 
    332 /* Number of RX descriptors per ring */
    333 static int ixgbe_rxd = PERFORM_RXD;
    334 SYSCTL_INT("hw.ixgbe.rxd", &ixgbe_rxd);
    335 
    336 /*
    337 ** Defining this on will allow the use
    338 ** of unsupported SFP+ modules, note that
    339 ** doing so you are on your own :)
    340 */
    341 static int allow_unsupported_sfp = false;
    342 SYSCTL_INT("hw.ix.unsupported_sfp", &allow_unsupported_sfp);
    343 
    344 /*
    345 ** HW RSC control:
    346 **  this feature only works with
    347 **  IPv4, and only on 82599 and later.
    348 **  Also this will cause IP forwarding to
    349 **  fail and that can't be controlled by
    350 **  the stack as LRO can. For all these
    351 **  reasons I've deemed it best to leave
    352 **  this off and not bother with a tuneable
    353 **  interface, this would need to be compiled
    354 **  to enable.
    355 */
    356 static bool ixgbe_rsc_enable = FALSE;
    357 
    358 /* Keep running tab on them for sanity check */
    359 static int ixgbe_total_ports;
    360 
    361 #ifdef IXGBE_FDIR
    362 /*
    363 ** For Flow Director: this is the
    364 ** number of TX packets we sample
    365 ** for the filter pool, this means
    366 ** every 20th packet will be probed.
    367 **
    368 ** This feature can be disabled by
    369 ** setting this to 0.
    370 */
    371 static int atr_sample_rate = 20;
    372 /*
    373 ** Flow Director actually 'steals'
    374 ** part of the packet buffer as its
    375 ** filter pool, this variable controls
    376 ** how much it uses:
    377 **  0 = 64K, 1 = 128K, 2 = 256K
    378 */
    379 static int fdir_pballoc = 1;
    380 #endif
    381 
    382 #ifdef DEV_NETMAP
    383 /*
    384  * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
    385  * be a reference on how to implement netmap support in a driver.
    386  * Additional comments are in ixgbe_netmap.h .
    387  *
    388  * <dev/netmap/ixgbe_netmap.h> contains functions for netmap support
    389  * that extend the standard driver.
    390  */
    391 #include <dev/netmap/ixgbe_netmap.h>
    392 #endif /* DEV_NETMAP */
    393 
    394 /*********************************************************************
    395  *  Device identification routine
    396  *
    397  *  ixgbe_probe determines if the driver should be loaded on
    398  *  adapter based on PCI vendor/device id of the adapter.
    399  *
    400  *  return 1 on success, 0 on failure
    401  *********************************************************************/
    402 
    403 static int
    404 ixgbe_probe(device_t dev, cfdata_t cf, void *aux)
    405 {
    406 	const struct pci_attach_args *pa = aux;
    407 
    408 	return (ixgbe_lookup(pa) != NULL) ? 1 : 0;
    409 }
    410 
    411 static ixgbe_vendor_info_t *
    412 ixgbe_lookup(const struct pci_attach_args *pa)
    413 {
    414 	pcireg_t subid;
    415 	ixgbe_vendor_info_t *ent;
    416 
    417 	INIT_DEBUGOUT("ixgbe_probe: begin");
    418 
    419 	if (PCI_VENDOR(pa->pa_id) != IXGBE_INTEL_VENDOR_ID)
    420 		return NULL;
    421 
    422 	subid = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_SUBSYS_ID_REG);
    423 
    424 	for (ent = ixgbe_vendor_info_array; ent->vendor_id != 0; ent++) {
    425 		if (PCI_VENDOR(pa->pa_id) == ent->vendor_id &&
    426 		    PCI_PRODUCT(pa->pa_id) == ent->device_id &&
    427 
    428 		    (PCI_SUBSYS_VENDOR(subid) == ent->subvendor_id ||
    429 		     ent->subvendor_id == 0) &&
    430 
    431 		    (PCI_SUBSYS_ID(subid) == ent->subdevice_id ||
    432 		     ent->subdevice_id == 0)) {
    433 			++ixgbe_total_ports;
    434 			return ent;
    435 		}
    436 	}
    437 	return NULL;
    438 }
    439 
    440 
    441 static void
    442 ixgbe_sysctl_attach(struct adapter *adapter)
    443 {
    444 	struct sysctllog **log;
    445 	const struct sysctlnode *rnode, *cnode;
    446 	device_t dev;
    447 
    448 	dev = adapter->dev;
    449 	log = &adapter->sysctllog;
    450 
    451 	if ((rnode = ixgbe_sysctl_instance(adapter)) == NULL) {
    452 		aprint_error_dev(dev, "could not create sysctl root\n");
    453 		return;
    454 	}
    455 
    456 	if (sysctl_createv(log, 0, &rnode, &cnode,
    457 	    CTLFLAG_READONLY, CTLTYPE_INT,
    458 	    "num_rx_desc", SYSCTL_DESCR("Number of rx descriptors"),
    459 	    NULL, 0, &adapter->num_rx_desc, 0, CTL_CREATE, CTL_EOL) != 0)
    460 		aprint_error_dev(dev, "could not create sysctl\n");
    461 
    462 	if (sysctl_createv(log, 0, &rnode, &cnode,
    463 	    CTLFLAG_READONLY, CTLTYPE_INT,
    464 	    "num_queues", SYSCTL_DESCR("Number of queues"),
    465 	    NULL, 0, &adapter->num_queues, 0, CTL_CREATE, CTL_EOL) != 0)
    466 		aprint_error_dev(dev, "could not create sysctl\n");
    467 
    468 	if (sysctl_createv(log, 0, &rnode, &cnode,
    469 	    CTLFLAG_READWRITE, CTLTYPE_INT,
    470 	    "fc", SYSCTL_DESCR("Flow Control"),
    471 	    ixgbe_set_flowcntl, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0)
    472 		aprint_error_dev(dev, "could not create sysctl\n");
    473 
    474 	/* XXX This is an *instance* sysctl controlling a *global* variable.
    475 	 * XXX It's that way in the FreeBSD driver that this derives from.
    476 	 */
    477 	if (sysctl_createv(log, 0, &rnode, &cnode,
    478 	    CTLFLAG_READWRITE, CTLTYPE_INT,
    479 	    "enable_aim", SYSCTL_DESCR("Interrupt Moderation"),
    480 	    NULL, 0, &ixgbe_enable_aim, 0, CTL_CREATE, CTL_EOL) != 0)
    481 		aprint_error_dev(dev, "could not create sysctl\n");
    482 
    483 	if (sysctl_createv(log, 0, &rnode, &cnode,
    484 	    CTLFLAG_READWRITE, CTLTYPE_INT,
    485 	    "advertise_speed", SYSCTL_DESCR("Link Speed"),
    486 	    ixgbe_set_advertise, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0)
    487 		aprint_error_dev(dev, "could not create sysctl\n");
    488 
    489 	if (sysctl_createv(log, 0, &rnode, &cnode,
    490 	    CTLFLAG_READWRITE, CTLTYPE_INT,
    491 	    "ts", SYSCTL_DESCR("Thermal Test"),
    492 	    ixgbe_set_thermal_test, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0)
    493 		aprint_error_dev(dev, "could not create sysctl\n");
    494 }
    495 
    496 /*********************************************************************
    497  *  Device initialization routine
    498  *
    499  *  The attach entry point is called when the driver is being loaded.
    500  *  This routine identifies the type of hardware, allocates all resources
    501  *  and initializes the hardware.
    502  *
    503  *  return 0 on success, positive on failure
    504  *********************************************************************/
    505 
    506 static void
    507 ixgbe_attach(device_t parent, device_t dev, void *aux)
    508 {
    509 	struct adapter *adapter;
    510 	struct ixgbe_hw *hw;
    511 	int             error = -1;
    512 	u16		csum;
    513 	u32		ctrl_ext;
    514 	ixgbe_vendor_info_t *ent;
    515 	const struct pci_attach_args *pa = aux;
    516 
    517 	INIT_DEBUGOUT("ixgbe_attach: begin");
    518 
    519 	/* Allocate, clear, and link in our adapter structure */
    520 	adapter = device_private(dev);
    521 	adapter->dev = adapter->osdep.dev = dev;
    522 	hw = &adapter->hw;
    523 	adapter->osdep.pc = pa->pa_pc;
    524 	adapter->osdep.tag = pa->pa_tag;
    525 	adapter->osdep.dmat = pa->pa_dmat;
    526 	adapter->osdep.attached = false;
    527 
    528 	ent = ixgbe_lookup(pa);
    529 
    530 	KASSERT(ent != NULL);
    531 
    532 	aprint_normal(": %s, Version - %s\n",
    533 	    ixgbe_strings[ent->index], ixgbe_driver_version);
    534 
    535 	/* Core Lock Init*/
    536 	IXGBE_CORE_LOCK_INIT(adapter, device_xname(dev));
    537 
    538 	/* SYSCTL APIs */
    539 
    540 	ixgbe_sysctl_attach(adapter);
    541 
    542 	/* Set up the timer callout */
    543 	callout_init(&adapter->timer, 0);
    544 
    545 	/* Determine hardware revision */
    546 	ixgbe_identify_hardware(adapter);
    547 
    548 	/* Do base PCI setup - map BAR0 */
    549 	if (ixgbe_allocate_pci_resources(adapter, pa)) {
    550 		aprint_error_dev(dev, "Allocation of PCI resources failed\n");
    551 		error = ENXIO;
    552 		goto err_out;
    553 	}
    554 
    555 	/* Do descriptor calc and sanity checks */
    556 	if (((ixgbe_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 ||
    557 	    ixgbe_txd < MIN_TXD || ixgbe_txd > MAX_TXD) {
    558 		aprint_error_dev(dev, "TXD config issue, using default!\n");
    559 		adapter->num_tx_desc = DEFAULT_TXD;
    560 	} else
    561 		adapter->num_tx_desc = ixgbe_txd;
    562 
    563 	/*
    564 	** With many RX rings it is easy to exceed the
    565 	** system mbuf allocation. Tuning nmbclusters
    566 	** can alleviate this.
    567 	*/
    568 	if (nmbclusters > 0 ) {
    569 		int s;
    570 		s = (ixgbe_rxd * adapter->num_queues) * ixgbe_total_ports;
    571 		if (s > nmbclusters) {
    572 			aprint_error_dev(dev, "RX Descriptors exceed "
    573 			    "system mbuf max, using default instead!\n");
    574 			ixgbe_rxd = DEFAULT_RXD;
    575 		}
    576 	}
    577 
    578 	if (((ixgbe_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 ||
    579 	    ixgbe_rxd < MIN_RXD || ixgbe_rxd > MAX_RXD) {
    580 		aprint_error_dev(dev, "RXD config issue, using default!\n");
    581 		adapter->num_rx_desc = DEFAULT_RXD;
    582 	} else
    583 		adapter->num_rx_desc = ixgbe_rxd;
    584 
    585 	/* Allocate our TX/RX Queues */
    586 	if (ixgbe_allocate_queues(adapter)) {
    587 		error = ENOMEM;
    588 		goto err_out;
    589 	}
    590 
    591 	/* Allocate multicast array memory. */
    592 	adapter->mta = malloc(sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
    593 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
    594 	if (adapter->mta == NULL) {
    595 		aprint_error_dev(dev, "Cannot allocate multicast setup array\n");
    596 		error = ENOMEM;
    597 		goto err_late;
    598 	}
    599 
    600 	/* Initialize the shared code */
    601 	hw->allow_unsupported_sfp = allow_unsupported_sfp;
    602 	error = ixgbe_init_shared_code(hw);
    603 	if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
    604 		/*
    605 		** No optics in this port, set up
    606 		** so the timer routine will probe
    607 		** for later insertion.
    608 		*/
    609 		adapter->sfp_probe = TRUE;
    610 		error = 0;
    611 	} else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
    612 		aprint_error_dev(dev,"Unsupported SFP+ module detected!\n");
    613 		error = EIO;
    614 		goto err_late;
    615 	} else if (error) {
    616 		aprint_error_dev(dev,"Unable to initialize the shared code\n");
    617 		error = EIO;
    618 		goto err_late;
    619 	}
    620 
    621 	/* Make sure we have a good EEPROM before we read from it */
    622 	if (ixgbe_validate_eeprom_checksum(&adapter->hw, &csum) < 0) {
    623 		aprint_error_dev(dev,"The EEPROM Checksum Is Not Valid\n");
    624 		error = EIO;
    625 		goto err_late;
    626 	}
    627 
    628 	error = ixgbe_init_hw(hw);
    629 	switch (error) {
    630 	case IXGBE_ERR_EEPROM_VERSION:
    631 		aprint_error_dev(dev, "This device is a pre-production adapter/"
    632 		    "LOM.  Please be aware there may be issues associated "
    633 		    "with your hardware.\n If you are experiencing problems "
    634 		    "please contact your Intel or hardware representative "
    635 		    "who provided you with this hardware.\n");
    636 		break;
    637 	case IXGBE_ERR_SFP_NOT_SUPPORTED:
    638 		aprint_error_dev(dev,"Unsupported SFP+ Module\n");
    639 		error = EIO;
    640 		aprint_error_dev(dev,"Hardware Initialization Failure\n");
    641 		goto err_late;
    642 	case IXGBE_ERR_SFP_NOT_PRESENT:
    643 		device_printf(dev,"No SFP+ Module found\n");
    644 		/* falls thru */
    645 	default:
    646 		break;
    647 	}
    648 
    649 	/* Detect and set physical type */
    650 	ixgbe_setup_optics(adapter);
    651 
    652 	error = -1;
    653 	if ((adapter->msix > 1) && (ixgbe_enable_msix))
    654 		error = ixgbe_allocate_msix(adapter, pa);
    655 	if (error != 0)
    656 		error = ixgbe_allocate_legacy(adapter, pa);
    657 	if (error)
    658 		goto err_late;
    659 
    660 	/* Setup OS specific network interface */
    661 	if (ixgbe_setup_interface(dev, adapter) != 0)
    662 		goto err_late;
    663 
    664 	/* Initialize statistics */
    665 	ixgbe_update_stats_counters(adapter);
    666 
    667 	/*
    668 	** Check PCIE slot type/speed/width
    669 	*/
    670 	ixgbe_get_slot_info(hw);
    671 
    672 	/* Set an initial default flow control value */
    673 	adapter->fc =  ixgbe_fc_full;
    674 
    675 	/* let hardware know driver is loaded */
    676 	ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
    677 	ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
    678 	IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
    679 
    680 	ixgbe_add_hw_stats(adapter);
    681 
    682 #ifdef DEV_NETMAP
    683 	ixgbe_netmap_attach(adapter);
    684 #endif /* DEV_NETMAP */
    685 	INIT_DEBUGOUT("ixgbe_attach: end");
    686 	adapter->osdep.attached = true;
    687 	return;
    688 err_late:
    689 	ixgbe_free_transmit_structures(adapter);
    690 	ixgbe_free_receive_structures(adapter);
    691 err_out:
    692 	if (adapter->ifp != NULL)
    693 		if_free(adapter->ifp);
    694 	ixgbe_free_pci_resources(adapter);
    695 	if (adapter->mta != NULL)
    696 		free(adapter->mta, M_DEVBUF);
    697 	return;
    698 
    699 }
    700 
    701 /*********************************************************************
    702  *  Device removal routine
    703  *
    704  *  The detach entry point is called when the driver is being removed.
    705  *  This routine stops the adapter and deallocates all the resources
    706  *  that were allocated for driver operation.
    707  *
    708  *  return 0 on success, positive on failure
    709  *********************************************************************/
    710 
    711 static int
    712 ixgbe_detach(device_t dev, int flags)
    713 {
    714 	struct adapter *adapter = device_private(dev);
    715 	struct rx_ring *rxr = adapter->rx_rings;
    716 	struct ixgbe_hw_stats *stats = &adapter->stats;
    717 	struct ix_queue *que = adapter->queues;
    718 	struct tx_ring *txr = adapter->tx_rings;
    719 	u32	ctrl_ext;
    720 
    721 	INIT_DEBUGOUT("ixgbe_detach: begin");
    722 	if (adapter->osdep.attached == false)
    723 		return 0;
    724 
    725 #if NVLAN > 0
    726 	/* Make sure VLANs are not using driver */
    727 	if (!VLAN_ATTACHED(&adapter->osdep.ec))
    728 		;	/* nothing to do: no VLANs */
    729 	else if ((flags & (DETACH_SHUTDOWN|DETACH_FORCE)) != 0)
    730 		vlan_ifdetach(adapter->ifp);
    731 	else {
    732 		aprint_error_dev(dev, "VLANs in use\n");
    733 		return EBUSY;
    734 	}
    735 #endif
    736 
    737 	IXGBE_CORE_LOCK(adapter);
    738 	ixgbe_stop(adapter);
    739 	IXGBE_CORE_UNLOCK(adapter);
    740 
    741 	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
    742 #ifndef IXGBE_LEGACY_TX
    743 		softint_disestablish(txr->txq_si);
    744 #endif
    745 		softint_disestablish(que->que_si);
    746 	}
    747 
    748 	/* Drain the Link queue */
    749 	softint_disestablish(adapter->link_si);
    750 	softint_disestablish(adapter->mod_si);
    751 	softint_disestablish(adapter->msf_si);
    752 #ifdef IXGBE_FDIR
    753 	softint_disestablish(adapter->fdir_si);
    754 #endif
    755 
    756 	/* let hardware know driver is unloading */
    757 	ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
    758 	ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
    759 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext);
    760 
    761 	ether_ifdetach(adapter->ifp);
    762 	callout_halt(&adapter->timer, NULL);
    763 #ifdef DEV_NETMAP
    764 	netmap_detach(adapter->ifp);
    765 #endif /* DEV_NETMAP */
    766 	ixgbe_free_pci_resources(adapter);
    767 #if 0	/* XXX the NetBSD port is probably missing something here */
    768 	bus_generic_detach(dev);
    769 #endif
    770 	if_detach(adapter->ifp);
    771 
    772 	sysctl_teardown(&adapter->sysctllog);
    773 	evcnt_detach(&adapter->handleq);
    774 	evcnt_detach(&adapter->req);
    775 	evcnt_detach(&adapter->morerx);
    776 	evcnt_detach(&adapter->moretx);
    777 	evcnt_detach(&adapter->txloops);
    778 	evcnt_detach(&adapter->efbig_tx_dma_setup);
    779 	evcnt_detach(&adapter->m_defrag_failed);
    780 	evcnt_detach(&adapter->efbig2_tx_dma_setup);
    781 	evcnt_detach(&adapter->einval_tx_dma_setup);
    782 	evcnt_detach(&adapter->other_tx_dma_setup);
    783 	evcnt_detach(&adapter->eagain_tx_dma_setup);
    784 	evcnt_detach(&adapter->enomem_tx_dma_setup);
    785 	evcnt_detach(&adapter->watchdog_events);
    786 	evcnt_detach(&adapter->tso_err);
    787 	evcnt_detach(&adapter->link_irq);
    788 
    789 	txr = adapter->tx_rings;
    790 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
    791 		evcnt_detach(&txr->no_desc_avail);
    792 		evcnt_detach(&txr->total_packets);
    793 		evcnt_detach(&txr->tso_tx);
    794 
    795 		if (i < __arraycount(adapter->stats.mpc)) {
    796 			evcnt_detach(&adapter->stats.mpc[i]);
    797 		}
    798 		if (i < __arraycount(adapter->stats.pxontxc)) {
    799 			evcnt_detach(&adapter->stats.pxontxc[i]);
    800 			evcnt_detach(&adapter->stats.pxonrxc[i]);
    801 			evcnt_detach(&adapter->stats.pxofftxc[i]);
    802 			evcnt_detach(&adapter->stats.pxoffrxc[i]);
    803 			evcnt_detach(&adapter->stats.pxon2offc[i]);
    804 		}
    805 		if (i < __arraycount(adapter->stats.qprc)) {
    806 			evcnt_detach(&adapter->stats.qprc[i]);
    807 			evcnt_detach(&adapter->stats.qptc[i]);
    808 			evcnt_detach(&adapter->stats.qbrc[i]);
    809 			evcnt_detach(&adapter->stats.qbtc[i]);
    810 			evcnt_detach(&adapter->stats.qprdc[i]);
    811 		}
    812 
    813 		evcnt_detach(&rxr->rx_packets);
    814 		evcnt_detach(&rxr->rx_bytes);
    815 		evcnt_detach(&rxr->rx_copies);
    816 		evcnt_detach(&rxr->no_jmbuf);
    817 		evcnt_detach(&rxr->rx_discarded);
    818 		evcnt_detach(&rxr->rx_irq);
    819 	}
    820 	evcnt_detach(&stats->ipcs);
    821 	evcnt_detach(&stats->l4cs);
    822 	evcnt_detach(&stats->ipcs_bad);
    823 	evcnt_detach(&stats->l4cs_bad);
    824 	evcnt_detach(&stats->intzero);
    825 	evcnt_detach(&stats->legint);
    826 	evcnt_detach(&stats->crcerrs);
    827 	evcnt_detach(&stats->illerrc);
    828 	evcnt_detach(&stats->errbc);
    829 	evcnt_detach(&stats->mspdc);
    830 	evcnt_detach(&stats->mlfc);
    831 	evcnt_detach(&stats->mrfc);
    832 	evcnt_detach(&stats->rlec);
    833 	evcnt_detach(&stats->lxontxc);
    834 	evcnt_detach(&stats->lxonrxc);
    835 	evcnt_detach(&stats->lxofftxc);
    836 	evcnt_detach(&stats->lxoffrxc);
    837 
    838 	/* Packet Reception Stats */
    839 	evcnt_detach(&stats->tor);
    840 	evcnt_detach(&stats->gorc);
    841 	evcnt_detach(&stats->tpr);
    842 	evcnt_detach(&stats->gprc);
    843 	evcnt_detach(&stats->mprc);
    844 	evcnt_detach(&stats->bprc);
    845 	evcnt_detach(&stats->prc64);
    846 	evcnt_detach(&stats->prc127);
    847 	evcnt_detach(&stats->prc255);
    848 	evcnt_detach(&stats->prc511);
    849 	evcnt_detach(&stats->prc1023);
    850 	evcnt_detach(&stats->prc1522);
    851 	evcnt_detach(&stats->ruc);
    852 	evcnt_detach(&stats->rfc);
    853 	evcnt_detach(&stats->roc);
    854 	evcnt_detach(&stats->rjc);
    855 	evcnt_detach(&stats->mngprc);
    856 	evcnt_detach(&stats->xec);
    857 
    858 	/* Packet Transmission Stats */
    859 	evcnt_detach(&stats->gotc);
    860 	evcnt_detach(&stats->tpt);
    861 	evcnt_detach(&stats->gptc);
    862 	evcnt_detach(&stats->bptc);
    863 	evcnt_detach(&stats->mptc);
    864 	evcnt_detach(&stats->mngptc);
    865 	evcnt_detach(&stats->ptc64);
    866 	evcnt_detach(&stats->ptc127);
    867 	evcnt_detach(&stats->ptc255);
    868 	evcnt_detach(&stats->ptc511);
    869 	evcnt_detach(&stats->ptc1023);
    870 	evcnt_detach(&stats->ptc1522);
    871 
    872 	ixgbe_free_transmit_structures(adapter);
    873 	ixgbe_free_receive_structures(adapter);
    874 	free(adapter->mta, M_DEVBUF);
    875 
    876 	IXGBE_CORE_LOCK_DESTROY(adapter);
    877 	return (0);
    878 }
    879 
    880 /*********************************************************************
    881  *
    882  *  Shutdown entry point
    883  *
    884  **********************************************************************/
    885 
    886 #if 0 /* XXX NetBSD ought to register something like this through pmf(9) */
    887 static int
    888 ixgbe_shutdown(device_t dev)
    889 {
    890 	struct adapter *adapter = device_private(dev);
    891 	IXGBE_CORE_LOCK(adapter);
    892 	ixgbe_stop(adapter);
    893 	IXGBE_CORE_UNLOCK(adapter);
    894 	return (0);
    895 }
    896 #endif
    897 
    898 
    899 #ifdef IXGBE_LEGACY_TX
    900 /*********************************************************************
    901  *  Transmit entry point
    902  *
    903  *  ixgbe_start is called by the stack to initiate a transmit.
    904  *  The driver will remain in this routine as long as there are
    905  *  packets to transmit and transmit resources are available.
    906  *  In case resources are not available stack is notified and
    907  *  the packet is requeued.
    908  **********************************************************************/
    909 
    910 static void
    911 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
    912 {
    913 	int rc;
    914 	struct mbuf    *m_head;
    915 	struct adapter *adapter = txr->adapter;
    916 
    917 	IXGBE_TX_LOCK_ASSERT(txr);
    918 
    919 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    920 		return;
    921 	if (!adapter->link_active)
    922 		return;
    923 
    924 	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    925 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
    926 			break;
    927 
    928 		IFQ_POLL(&ifp->if_snd, m_head);
    929 		if (m_head == NULL)
    930 			break;
    931 
    932 		if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
    933 			break;
    934 		}
    935 		IFQ_DEQUEUE(&ifp->if_snd, m_head);
    936 		if (rc == EFBIG) {
    937 			struct mbuf *mtmp;
    938 
    939 			if ((mtmp = m_defrag(m_head, M_NOWAIT)) != NULL) {
    940 				m_head = mtmp;
    941 				rc = ixgbe_xmit(txr, m_head);
    942 				if (rc != 0)
    943 					adapter->efbig2_tx_dma_setup.ev_count++;
    944 			} else
    945 				adapter->m_defrag_failed.ev_count++;
    946 		}
    947 		if (rc != 0) {
    948 			m_freem(m_head);
    949 			continue;
    950 		}
    951 
    952 		/* Send a copy of the frame to the BPF listener */
    953 		bpf_mtap(ifp, m_head);
    954 
    955 		/* Set watchdog on */
    956 		getmicrotime(&txr->watchdog_time);
    957 		txr->queue_status = IXGBE_QUEUE_WORKING;
    958 
    959 	}
    960 	return;
    961 }
    962 
    963 /*
    964  * Legacy TX start - called by the stack, this
    965  * always uses the first tx ring, and should
    966  * not be used with multiqueue tx enabled.
    967  */
    968 static void
    969 ixgbe_start(struct ifnet *ifp)
    970 {
    971 	struct adapter *adapter = ifp->if_softc;
    972 	struct tx_ring	*txr = adapter->tx_rings;
    973 
    974 	if (ifp->if_flags & IFF_RUNNING) {
    975 		IXGBE_TX_LOCK(txr);
    976 		ixgbe_start_locked(txr, ifp);
    977 		IXGBE_TX_UNLOCK(txr);
    978 	}
    979 	return;
    980 }
    981 
    982 #else /* ! IXGBE_LEGACY_TX */
    983 
    984 /*
    985 ** Multiqueue Transmit driver
    986 **
    987 */
    988 static int
    989 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
    990 {
    991 	struct adapter	*adapter = ifp->if_softc;
    992 	struct ix_queue	*que;
    993 	struct tx_ring	*txr;
    994 	int 		i, err = 0;
    995 #ifdef	RSS
    996 	uint32_t bucket_id;
    997 #endif
    998 
    999 	/* Which queue to use */
   1000 	/*
   1001 	 * When doing RSS, map it to the same outbound queue
   1002 	 * as the incoming flow would be mapped to.
   1003 	 *
   1004 	 * If everything is setup correctly, it should be the
   1005 	 * same bucket that the current CPU we're on is.
   1006 	 */
   1007 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
   1008 #ifdef	RSS
   1009 		if (rss_hash2bucket(m->m_pkthdr.flowid,
   1010 		    M_HASHTYPE_GET(m), &bucket_id) == 0) {
   1011 			/* XXX TODO: spit out something if bucket_id > num_queues? */
   1012 			i = bucket_id % adapter->num_queues;
   1013 		} else {
   1014 #endif
   1015 			i = m->m_pkthdr.flowid % adapter->num_queues;
   1016 #ifdef	RSS
   1017 		}
   1018 #endif
   1019 	} else {
   1020 		i = curcpu % adapter->num_queues;
   1021 	}
   1022 
   1023 	txr = &adapter->tx_rings[i];
   1024 	que = &adapter->queues[i];
   1025 
   1026 	err = drbr_enqueue(ifp, txr->br, m);
   1027 	if (err)
   1028 		return (err);
   1029 	if (IXGBE_TX_TRYLOCK(txr)) {
   1030 		ixgbe_mq_start_locked(ifp, txr);
   1031 		IXGBE_TX_UNLOCK(txr);
   1032 	} else
   1033 		softint_schedule(txr->txq_si);
   1034 
   1035 	return (0);
   1036 }
   1037 
   1038 static int
   1039 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
   1040 {
   1041 	struct adapter  *adapter = txr->adapter;
   1042 	struct mbuf     *next;
   1043 	int             enqueued = 0, err = 0;
   1044 
   1045 	if (((ifp->if_flags & IFF_RUNNING) == 0) ||
   1046 	    adapter->link_active == 0)
   1047 		return (ENETDOWN);
   1048 
   1049 	/* Process the queue */
   1050 #if __FreeBSD_version < 901504
   1051 	next = drbr_dequeue(ifp, txr->br);
   1052 	while (next != NULL) {
   1053 		if ((err = ixgbe_xmit(txr, &next)) != 0) {
   1054 			if (next != NULL)
   1055 				err = drbr_enqueue(ifp, txr->br, next);
   1056 #else
   1057 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
   1058 		if ((err = ixgbe_xmit(txr, &next)) != 0) {
   1059 			if (next == NULL) {
   1060 				drbr_advance(ifp, txr->br);
   1061 			} else {
   1062 				drbr_putback(ifp, txr->br, next);
   1063 			}
   1064 #endif
   1065 			break;
   1066 		}
   1067 #if __FreeBSD_version >= 901504
   1068 		drbr_advance(ifp, txr->br);
   1069 #endif
   1070 		enqueued++;
   1071 		/* Send a copy of the frame to the BPF listener */
   1072 		bpf_mtap(ifp, next);
   1073 		if ((ifp->if_flags & IFF_RUNNING) == 0)
   1074 			break;
   1075 #if __FreeBSD_version < 901504
   1076 		next = drbr_dequeue(ifp, txr->br);
   1077 #endif
   1078 	}
   1079 
   1080 	if (enqueued > 0) {
   1081 		/* Set watchdog on */
   1082 		txr->queue_status = IXGBE_QUEUE_WORKING;
   1083 		getmicrotime(&txr->watchdog_time);
   1084 	}
   1085 
   1086 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
   1087 		ixgbe_txeof(txr);
   1088 
   1089 	return (err);
   1090 }
   1091 
   1092 /*
   1093  * Called from a taskqueue to drain queued transmit packets.
   1094  */
   1095 static void
   1096 ixgbe_deferred_mq_start(void *arg, int pending)
   1097 {
   1098 	struct tx_ring *txr = arg;
   1099 	struct adapter *adapter = txr->adapter;
   1100 	struct ifnet *ifp = adapter->ifp;
   1101 
   1102 	IXGBE_TX_LOCK(txr);
   1103 	if (!drbr_empty(ifp, txr->br))
   1104 		ixgbe_mq_start_locked(ifp, txr);
   1105 	IXGBE_TX_UNLOCK(txr);
   1106 }
   1107 
   1108 /*
   1109 ** Flush all ring buffers
   1110 */
   1111 static void
   1112 ixgbe_qflush(struct ifnet *ifp)
   1113 {
   1114 	struct adapter	*adapter = ifp->if_softc;
   1115 	struct tx_ring	*txr = adapter->tx_rings;
   1116 	struct mbuf	*m;
   1117 
   1118 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
   1119 		IXGBE_TX_LOCK(txr);
   1120 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
   1121 			m_freem(m);
   1122 		IXGBE_TX_UNLOCK(txr);
   1123 	}
   1124 	if_qflush(ifp);
   1125 }
   1126 #endif /* IXGBE_LEGACY_TX */
   1127 
   1128 static int
   1129 ixgbe_ifflags_cb(struct ethercom *ec)
   1130 {
   1131 	struct ifnet *ifp = &ec->ec_if;
   1132 	struct adapter *adapter = ifp->if_softc;
   1133 	int change = ifp->if_flags ^ adapter->if_flags, rc = 0;
   1134 
   1135 	IXGBE_CORE_LOCK(adapter);
   1136 
   1137 	if (change != 0)
   1138 		adapter->if_flags = ifp->if_flags;
   1139 
   1140 	if ((change & ~(IFF_CANTCHANGE|IFF_DEBUG)) != 0)
   1141 		rc = ENETRESET;
   1142 	else if ((change & (IFF_PROMISC | IFF_ALLMULTI)) != 0)
   1143 		ixgbe_set_promisc(adapter);
   1144 
   1145 	/* Set up VLAN support and filter */
   1146 	ixgbe_setup_vlan_hw_support(adapter);
   1147 
   1148 	IXGBE_CORE_UNLOCK(adapter);
   1149 
   1150 	return rc;
   1151 }
   1152 
   1153 /*********************************************************************
   1154  *  Ioctl entry point
   1155  *
   1156  *  ixgbe_ioctl is called when the user wants to configure the
   1157  *  interface.
   1158  *
   1159  *  return 0 on success, positive on failure
   1160  **********************************************************************/
   1161 
   1162 static int
   1163 ixgbe_ioctl(struct ifnet * ifp, u_long command, void *data)
   1164 {
   1165 	struct adapter	*adapter = ifp->if_softc;
   1166 	struct ixgbe_hw *hw = &adapter->hw;
   1167 	struct ifcapreq *ifcr = data;
   1168 	struct ifreq	*ifr = data;
   1169 	int             error = 0;
   1170 	int l4csum_en;
   1171 	const int l4csum = IFCAP_CSUM_TCPv4_Rx|IFCAP_CSUM_UDPv4_Rx|
   1172 	     IFCAP_CSUM_TCPv6_Rx|IFCAP_CSUM_UDPv6_Rx;
   1173 
   1174 	switch (command) {
   1175 	case SIOCSIFFLAGS:
   1176 		IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)");
   1177 		break;
   1178 	case SIOCADDMULTI:
   1179 	case SIOCDELMULTI:
   1180 		IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI");
   1181 		break;
   1182 	case SIOCSIFMEDIA:
   1183 	case SIOCGIFMEDIA:
   1184 		IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)");
   1185 		break;
   1186 	case SIOCSIFCAP:
   1187 		IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)");
   1188 		break;
   1189 	case SIOCSIFMTU:
   1190 		IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)");
   1191 		break;
   1192 	default:
   1193 		IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command);
   1194 		break;
   1195 	}
   1196 
   1197 	switch (command) {
   1198 	case SIOCSIFMEDIA:
   1199 	case SIOCGIFMEDIA:
   1200 		return ifmedia_ioctl(ifp, ifr, &adapter->media, command);
   1201 	case SIOCGI2C:
   1202 	{
   1203 		struct ixgbe_i2c_req	i2c;
   1204 		IOCTL_DEBUGOUT("ioctl: SIOCGI2C (Get I2C Data)");
   1205 		error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
   1206 		if (error != 0)
   1207 			break;
   1208 		if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) {
   1209 			error = EINVAL;
   1210 			break;
   1211 		}
   1212 		if (i2c.len > sizeof(i2c.data)) {
   1213 			error = EINVAL;
   1214 			break;
   1215 		}
   1216 
   1217 		hw->phy.ops.read_i2c_byte(hw, i2c.offset,
   1218 		    i2c.dev_addr, i2c.data);
   1219 		error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
   1220 		break;
   1221 	}
   1222 	case SIOCSIFCAP:
   1223 		/* Layer-4 Rx checksum offload has to be turned on and
   1224 		 * off as a unit.
   1225 		 */
   1226 		l4csum_en = ifcr->ifcr_capenable & l4csum;
   1227 		if (l4csum_en != l4csum && l4csum_en != 0)
   1228 			return EINVAL;
   1229 		/*FALLTHROUGH*/
   1230 	case SIOCADDMULTI:
   1231 	case SIOCDELMULTI:
   1232 	case SIOCSIFFLAGS:
   1233 	case SIOCSIFMTU:
   1234 	default:
   1235 		if ((error = ether_ioctl(ifp, command, data)) != ENETRESET)
   1236 			return error;
   1237 		if ((ifp->if_flags & IFF_RUNNING) == 0)
   1238 			;
   1239 		else if (command == SIOCSIFCAP || command == SIOCSIFMTU) {
   1240 			IXGBE_CORE_LOCK(adapter);
   1241 			ixgbe_init_locked(adapter);
   1242 			IXGBE_CORE_UNLOCK(adapter);
   1243 		} else if (command == SIOCADDMULTI || command == SIOCDELMULTI) {
   1244 			/*
   1245 			 * Multicast list has changed; set the hardware filter
   1246 			 * accordingly.
   1247 			 */
   1248 			IXGBE_CORE_LOCK(adapter);
   1249 			ixgbe_disable_intr(adapter);
   1250 			ixgbe_set_multi(adapter);
   1251 			ixgbe_enable_intr(adapter);
   1252 			IXGBE_CORE_UNLOCK(adapter);
   1253 		}
   1254 		return 0;
   1255 	}
   1256 
   1257 	return error;
   1258 }
   1259 
   1260 /*********************************************************************
   1261  *  Init entry point
   1262  *
   1263  *  This routine is used in two ways. It is used by the stack as
   1264  *  init entry point in network interface structure. It is also used
   1265  *  by the driver as a hw/sw initialization routine to get to a
   1266  *  consistent state.
   1267  *
   1268  *  return 0 on success, positive on failure
   1269  **********************************************************************/
   1270 #define IXGBE_MHADD_MFS_SHIFT 16
   1271 
   1272 static void
   1273 ixgbe_init_locked(struct adapter *adapter)
   1274 {
   1275 	struct ifnet   *ifp = adapter->ifp;
   1276 	device_t 	dev = adapter->dev;
   1277 	struct ixgbe_hw *hw = &adapter->hw;
   1278 	u32		k, txdctl, mhadd, gpie;
   1279 	u32		rxdctl, rxctrl;
   1280 
   1281 	/* XXX check IFF_UP and IFF_RUNNING, power-saving state! */
   1282 
   1283 	KASSERT(mutex_owned(&adapter->core_mtx));
   1284 	INIT_DEBUGOUT("ixgbe_init_locked: begin");
   1285 	hw->adapter_stopped = FALSE;
   1286 	ixgbe_stop_adapter(hw);
   1287         callout_stop(&adapter->timer);
   1288 
   1289 	/* XXX I moved this here from the SIOCSIFMTU case in ixgbe_ioctl(). */
   1290 	adapter->max_frame_size =
   1291 		ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
   1292 
   1293         /* reprogram the RAR[0] in case user changed it. */
   1294         ixgbe_set_rar(hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
   1295 
   1296 	/* Get the latest mac address, User can use a LAA */
   1297 	memcpy(hw->mac.addr, CLLADDR(adapter->ifp->if_sadl),
   1298 	    IXGBE_ETH_LENGTH_OF_ADDRESS);
   1299 	ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
   1300 	hw->addr_ctrl.rar_used_count = 1;
   1301 
   1302 	/* Prepare transmit descriptors and buffers */
   1303 	if (ixgbe_setup_transmit_structures(adapter)) {
   1304 		device_printf(dev,"Could not setup transmit structures\n");
   1305 		ixgbe_stop(adapter);
   1306 		return;
   1307 	}
   1308 
   1309 	ixgbe_init_hw(hw);
   1310 	ixgbe_initialize_transmit_units(adapter);
   1311 
   1312 	/* Setup Multicast table */
   1313 	ixgbe_set_multi(adapter);
   1314 
   1315 	/*
   1316 	** Determine the correct mbuf pool
   1317 	** for doing jumbo frames
   1318 	*/
   1319 	if (adapter->max_frame_size <= 2048)
   1320 		adapter->rx_mbuf_sz = MCLBYTES;
   1321 	else if (adapter->max_frame_size <= 4096)
   1322 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
   1323 	else if (adapter->max_frame_size <= 9216)
   1324 		adapter->rx_mbuf_sz = MJUM9BYTES;
   1325 	else
   1326 		adapter->rx_mbuf_sz = MJUM16BYTES;
   1327 
   1328 	/* Prepare receive descriptors and buffers */
   1329 	if (ixgbe_setup_receive_structures(adapter)) {
   1330 		device_printf(dev,"Could not setup receive structures\n");
   1331 		ixgbe_stop(adapter);
   1332 		return;
   1333 	}
   1334 
   1335 	/* Configure RX settings */
   1336 	ixgbe_initialize_receive_units(adapter);
   1337 
   1338 	gpie = IXGBE_READ_REG(&adapter->hw, IXGBE_GPIE);
   1339 
   1340 	/* Enable Fan Failure Interrupt */
   1341 	gpie |= IXGBE_SDP1_GPIEN;
   1342 
   1343 	/* Add for Module detection */
   1344 	if (hw->mac.type == ixgbe_mac_82599EB)
   1345 		gpie |= IXGBE_SDP2_GPIEN;
   1346 
   1347 	/* Thermal Failure Detection */
   1348 	if (hw->mac.type == ixgbe_mac_X540)
   1349 		gpie |= IXGBE_SDP0_GPIEN;
   1350 
   1351 	if (adapter->msix > 1) {
   1352 		/* Enable Enhanced MSIX mode */
   1353 		gpie |= IXGBE_GPIE_MSIX_MODE;
   1354 		gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
   1355 		    IXGBE_GPIE_OCD;
   1356 	}
   1357 	IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
   1358 
   1359 	/* Set MTU size */
   1360 	if (ifp->if_mtu > ETHERMTU) {
   1361 		mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
   1362 		mhadd &= ~IXGBE_MHADD_MFS_MASK;
   1363 		mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
   1364 		IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
   1365 	}
   1366 
   1367 	/* Now enable all the queues */
   1368 
   1369 	for (int i = 0; i < adapter->num_queues; i++) {
   1370 		txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
   1371 		txdctl |= IXGBE_TXDCTL_ENABLE;
   1372 		/* Set WTHRESH to 8, burst writeback */
   1373 		txdctl |= (8 << 16);
   1374 		/*
   1375 		 * When the internal queue falls below PTHRESH (32),
   1376 		 * start prefetching as long as there are at least
   1377 		 * HTHRESH (1) buffers ready. The values are taken
   1378 		 * from the Intel linux driver 3.8.21.
   1379 		 * Prefetching enables tx line rate even with 1 queue.
   1380 		 */
   1381 		txdctl |= (32 << 0) | (1 << 8);
   1382 		IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
   1383 	}
   1384 
   1385 	for (int i = 0; i < adapter->num_queues; i++) {
   1386 		rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
   1387 		if (hw->mac.type == ixgbe_mac_82598EB) {
   1388 			/*
   1389 			** PTHRESH = 21
   1390 			** HTHRESH = 4
   1391 			** WTHRESH = 8
   1392 			*/
   1393 			rxdctl &= ~0x3FFFFF;
   1394 			rxdctl |= 0x080420;
   1395 		}
   1396 		rxdctl |= IXGBE_RXDCTL_ENABLE;
   1397 		IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
   1398 		/* XXX I don't trust this loop, and I don't trust the
   1399 		 * XXX memory barrier.  What is this meant to do? --dyoung
   1400 		 */
   1401 		for (k = 0; k < 10; k++) {
   1402 			if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
   1403 			    IXGBE_RXDCTL_ENABLE)
   1404 				break;
   1405 			else
   1406 				msec_delay(1);
   1407 		}
   1408 		wmb();
   1409 #ifdef DEV_NETMAP
   1410 		/*
   1411 		 * In netmap mode, we must preserve the buffers made
   1412 		 * available to userspace before the if_init()
   1413 		 * (this is true by default on the TX side, because
   1414 		 * init makes all buffers available to userspace).
   1415 		 *
   1416 		 * netmap_reset() and the device specific routines
   1417 		 * (e.g. ixgbe_setup_receive_rings()) map these
   1418 		 * buffers at the end of the NIC ring, so here we
   1419 		 * must set the RDT (tail) register to make sure
   1420 		 * they are not overwritten.
   1421 		 *
   1422 		 * In this driver the NIC ring starts at RDH = 0,
   1423 		 * RDT points to the last slot available for reception (?),
   1424 		 * so RDT = num_rx_desc - 1 means the whole ring is available.
   1425 		 */
   1426 		if (ifp->if_capenable & IFCAP_NETMAP) {
   1427 			struct netmap_adapter *na = NA(adapter->ifp);
   1428 			struct netmap_kring *kring = &na->rx_rings[i];
   1429 			int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring);
   1430 
   1431 			IXGBE_WRITE_REG(hw, IXGBE_RDT(i), t);
   1432 		} else
   1433 #endif /* DEV_NETMAP */
   1434 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i), adapter->num_rx_desc - 1);
   1435 	}
   1436 
   1437 	/* Enable Receive engine */
   1438 	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
   1439 	if (hw->mac.type == ixgbe_mac_82598EB)
   1440 		rxctrl |= IXGBE_RXCTRL_DMBYPS;
   1441 	rxctrl |= IXGBE_RXCTRL_RXEN;
   1442 	ixgbe_enable_rx_dma(hw, rxctrl);
   1443 
   1444 	callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
   1445 
   1446 	/* Set up MSI/X routing */
   1447 	if (ixgbe_enable_msix)  {
   1448 		ixgbe_configure_ivars(adapter);
   1449 		/* Set up auto-mask */
   1450 		if (hw->mac.type == ixgbe_mac_82598EB)
   1451 			IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
   1452 		else {
   1453 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
   1454 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
   1455 		}
   1456 	} else {  /* Simple settings for Legacy/MSI */
   1457                 ixgbe_set_ivar(adapter, 0, 0, 0);
   1458                 ixgbe_set_ivar(adapter, 0, 0, 1);
   1459 		IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
   1460 	}
   1461 
   1462 #ifdef IXGBE_FDIR
   1463 	/* Init Flow director */
   1464 	if (hw->mac.type != ixgbe_mac_82598EB) {
   1465 		u32 hdrm = 32 << fdir_pballoc;
   1466 
   1467 		hw->mac.ops.setup_rxpba(hw, 0, hdrm, PBA_STRATEGY_EQUAL);
   1468 		ixgbe_init_fdir_signature_82599(&adapter->hw, fdir_pballoc);
   1469 	}
   1470 #endif
   1471 
   1472 	/*
   1473 	** Check on any SFP devices that
   1474 	** need to be kick-started
   1475 	*/
   1476 	if (hw->phy.type == ixgbe_phy_none) {
   1477 		int err = hw->phy.ops.identify(hw);
   1478 		if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
   1479                 	device_printf(dev,
   1480 			    "Unsupported SFP+ module type was detected.\n");
   1481 			return;
   1482         	}
   1483 	}
   1484 
   1485 	/* Set moderation on the Link interrupt */
   1486 	IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->linkvec), IXGBE_LINK_ITR);
   1487 
   1488 	/* Config/Enable Link */
   1489 	ixgbe_config_link(adapter);
   1490 
   1491 	/* Hardware Packet Buffer & Flow Control setup */
   1492 	{
   1493 		u32 rxpb, frame, size, tmp;
   1494 
   1495 		frame = adapter->max_frame_size;
   1496 
   1497 		/* Calculate High Water */
   1498 		if (hw->mac.type == ixgbe_mac_X540)
   1499 			tmp = IXGBE_DV_X540(frame, frame);
   1500 		else
   1501 			tmp = IXGBE_DV(frame, frame);
   1502 		size = IXGBE_BT2KB(tmp);
   1503 		rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
   1504 		hw->fc.high_water[0] = rxpb - size;
   1505 
   1506 		/* Now calculate Low Water */
   1507 		if (hw->mac.type == ixgbe_mac_X540)
   1508 			tmp = IXGBE_LOW_DV_X540(frame);
   1509 		else
   1510 			tmp = IXGBE_LOW_DV(frame);
   1511 		hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
   1512 
   1513 		hw->fc.requested_mode = adapter->fc;
   1514 		hw->fc.pause_time = IXGBE_FC_PAUSE;
   1515 		hw->fc.send_xon = TRUE;
   1516 	}
   1517 	/* Initialize the FC settings */
   1518 	ixgbe_start_hw(hw);
   1519 
   1520 	/* Set up VLAN support and filter */
   1521 	ixgbe_setup_vlan_hw_support(adapter);
   1522 
   1523 	/* And now turn on interrupts */
   1524 	ixgbe_enable_intr(adapter);
   1525 
   1526 	/* Now inform the stack we're ready */
   1527 	ifp->if_flags |= IFF_RUNNING;
   1528 
   1529 	return;
   1530 }
   1531 
   1532 static int
   1533 ixgbe_init(struct ifnet *ifp)
   1534 {
   1535 	struct adapter *adapter = ifp->if_softc;
   1536 
   1537 	IXGBE_CORE_LOCK(adapter);
   1538 	ixgbe_init_locked(adapter);
   1539 	IXGBE_CORE_UNLOCK(adapter);
   1540 	return 0;	/* XXX ixgbe_init_locked cannot fail?  really? */
   1541 }
   1542 
   1543 
   1544 /*
   1545 **
   1546 ** MSIX Interrupt Handlers and Tasklets
   1547 **
   1548 */
   1549 
   1550 static inline void
   1551 ixgbe_enable_queue(struct adapter *adapter, u32 vector)
   1552 {
   1553 	struct ixgbe_hw *hw = &adapter->hw;
   1554 	u64	queue = (u64)(1ULL << vector);
   1555 	u32	mask;
   1556 
   1557 	if (hw->mac.type == ixgbe_mac_82598EB) {
   1558                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
   1559                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
   1560 	} else {
   1561                 mask = (queue & 0xFFFFFFFF);
   1562                 if (mask)
   1563                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask);
   1564                 mask = (queue >> 32);
   1565                 if (mask)
   1566                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask);
   1567 	}
   1568 }
   1569 
   1570 __unused static inline void
   1571 ixgbe_disable_queue(struct adapter *adapter, u32 vector)
   1572 {
   1573 	struct ixgbe_hw *hw = &adapter->hw;
   1574 	u64	queue = (u64)(1ULL << vector);
   1575 	u32	mask;
   1576 
   1577 	if (hw->mac.type == ixgbe_mac_82598EB) {
   1578                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
   1579                 IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask);
   1580 	} else {
   1581                 mask = (queue & 0xFFFFFFFF);
   1582                 if (mask)
   1583                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask);
   1584                 mask = (queue >> 32);
   1585                 if (mask)
   1586                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask);
   1587 	}
   1588 }
   1589 
   1590 static void
   1591 ixgbe_handle_que(void *context)
   1592 {
   1593 	struct ix_queue *que = context;
   1594 	struct adapter  *adapter = que->adapter;
   1595 	struct tx_ring  *txr = que->txr;
   1596 	struct ifnet    *ifp = adapter->ifp;
   1597 
   1598 	adapter->handleq.ev_count++;
   1599 
   1600 	if (ifp->if_flags & IFF_RUNNING) {
   1601 		ixgbe_rxeof(que);
   1602 		IXGBE_TX_LOCK(txr);
   1603 		ixgbe_txeof(txr);
   1604 #ifndef IXGBE_LEGACY_TX
   1605 		if (!drbr_empty(ifp, txr->br))
   1606 			ixgbe_mq_start_locked(ifp, txr);
   1607 #else
   1608 		if (!IFQ_IS_EMPTY(&ifp->if_snd))
   1609 			ixgbe_start_locked(txr, ifp);
   1610 #endif
   1611 		IXGBE_TX_UNLOCK(txr);
   1612 	}
   1613 
   1614 	/* Reenable this interrupt */
   1615 	if (que->res != NULL)
   1616 		ixgbe_enable_queue(adapter, que->msix);
   1617 	else
   1618 		ixgbe_enable_intr(adapter);
   1619 	return;
   1620 }
   1621 
   1622 
   1623 /*********************************************************************
   1624  *
   1625  *  Legacy Interrupt Service routine
   1626  *
   1627  **********************************************************************/
   1628 
   1629 static int
   1630 ixgbe_legacy_irq(void *arg)
   1631 {
   1632 	struct ix_queue *que = arg;
   1633 	struct adapter	*adapter = que->adapter;
   1634 	struct ixgbe_hw	*hw = &adapter->hw;
   1635 	struct ifnet    *ifp = adapter->ifp;
   1636 	struct 		tx_ring *txr = adapter->tx_rings;
   1637 	bool		more = false;
   1638 	u32       	reg_eicr;
   1639 
   1640 
   1641 	reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
   1642 
   1643 	adapter->stats.legint.ev_count++;
   1644 	++que->irqs;
   1645 	if (reg_eicr == 0) {
   1646 		adapter->stats.intzero.ev_count++;
   1647 		if ((ifp->if_flags & IFF_UP) != 0)
   1648 			ixgbe_enable_intr(adapter);
   1649 		return 0;
   1650 	}
   1651 
   1652 	if ((ifp->if_flags & IFF_RUNNING) != 0) {
   1653 #ifdef __NetBSD__
   1654 		/* Don't run ixgbe_rxeof in interrupt context */
   1655 		more = true;
   1656 #else
   1657 		more = ixgbe_rxeof(que);
   1658 #endif
   1659 
   1660 		IXGBE_TX_LOCK(txr);
   1661 		ixgbe_txeof(txr);
   1662 #ifdef IXGBE_LEGACY_TX
   1663 		if (!IFQ_IS_EMPTY(&ifp->if_snd))
   1664 			ixgbe_start_locked(txr, ifp);
   1665 #else
   1666 		if (!drbr_empty(ifp, txr->br))
   1667 			ixgbe_mq_start_locked(ifp, txr);
   1668 #endif
   1669 		IXGBE_TX_UNLOCK(txr);
   1670 	}
   1671 
   1672 	/* Check for fan failure */
   1673 	if ((hw->phy.media_type == ixgbe_media_type_copper) &&
   1674 	    (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
   1675                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
   1676 		    "REPLACE IMMEDIATELY!!\n");
   1677 		IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1);
   1678 	}
   1679 
   1680 	/* Link status change */
   1681 	if (reg_eicr & IXGBE_EICR_LSC)
   1682 		softint_schedule(adapter->link_si);
   1683 
   1684 	if (more)
   1685 #ifndef IXGBE_LEGACY_TX
   1686 		softint_schedule(txr->txq_si);
   1687 #else
   1688 		softint_schedule(que->que_si);
   1689 #endif
   1690 	else
   1691 		ixgbe_enable_intr(adapter);
   1692 	return 1;
   1693 }
   1694 
   1695 
   1696 #if defined(NETBSD_MSI_OR_MSIX)
   1697 /*********************************************************************
   1698  *
   1699  *  MSIX Queue Interrupt Service routine
   1700  *
   1701  **********************************************************************/
   1702 static int
   1703 ixgbe_msix_que(void *arg)
   1704 {
   1705 	struct ix_queue	*que = arg;
   1706 	struct adapter  *adapter = que->adapter;
   1707 	struct ifnet    *ifp = adapter->ifp;
   1708 	struct tx_ring	*txr = que->txr;
   1709 	struct rx_ring	*rxr = que->rxr;
   1710 	bool		more;
   1711 	u32		newitr = 0;
   1712 
   1713 	/* Protect against spurious interrupts */
   1714 	if ((ifp->if_flags & IFF_RUNNING) == 0)
   1715 		return 0;
   1716 
   1717 	ixgbe_disable_queue(adapter, que->msix);
   1718 	++que->irqs;
   1719 
   1720 #ifdef __NetBSD__
   1721 	/* Don't run ixgbe_rxeof in interrupt context */
   1722 	more = true;
   1723 #else
   1724 	more = ixgbe_rxeof(que);
   1725 #endif
   1726 
   1727 	IXGBE_TX_LOCK(txr);
   1728 	ixgbe_txeof(txr);
   1729 #ifdef IXGBE_LEGACY_TX
   1730 	if (!IFQ_IS_EMPTY(&adapter->ifp->if_snd))
   1731 		ixgbe_start_locked(txr, ifp);
   1732 #else
   1733 	if (!drbr_empty(ifp, txr->br))
   1734 		ixgbe_mq_start_locked(ifp, txr);
   1735 #endif
   1736 	IXGBE_TX_UNLOCK(txr);
   1737 
   1738 	/* Do AIM now? */
   1739 
   1740 	if (ixgbe_enable_aim == FALSE)
   1741 		goto no_calc;
   1742 	/*
   1743 	** Do Adaptive Interrupt Moderation:
   1744         **  - Write out last calculated setting
   1745 	**  - Calculate based on average size over
   1746 	**    the last interval.
   1747 	*/
   1748         if (que->eitr_setting)
   1749                 IXGBE_WRITE_REG(&adapter->hw,
   1750                     IXGBE_EITR(que->msix), que->eitr_setting);
   1751 
   1752         que->eitr_setting = 0;
   1753 
   1754         /* Idle, do nothing */
   1755         if ((txr->bytes == 0) && (rxr->bytes == 0))
   1756                 goto no_calc;
   1757 
   1758 	if ((txr->bytes) && (txr->packets))
   1759                	newitr = txr->bytes/txr->packets;
   1760 	if ((rxr->bytes) && (rxr->packets))
   1761 		newitr = max(newitr,
   1762 		    (rxr->bytes / rxr->packets));
   1763 	newitr += 24; /* account for hardware frame, crc */
   1764 
   1765 	/* set an upper boundary */
   1766 	newitr = min(newitr, 3000);
   1767 
   1768 	/* Be nice to the mid range */
   1769 	if ((newitr > 300) && (newitr < 1200))
   1770 		newitr = (newitr / 3);
   1771 	else
   1772 		newitr = (newitr / 2);
   1773 
   1774         if (adapter->hw.mac.type == ixgbe_mac_82598EB)
   1775                 newitr |= newitr << 16;
   1776         else
   1777                 newitr |= IXGBE_EITR_CNT_WDIS;
   1778 
   1779         /* save for next interrupt */
   1780         que->eitr_setting = newitr;
   1781 
   1782         /* Reset state */
   1783         txr->bytes = 0;
   1784         txr->packets = 0;
   1785         rxr->bytes = 0;
   1786         rxr->packets = 0;
   1787 
   1788 no_calc:
   1789 	if (more)
   1790 		softint_schedule(que->que_si);
   1791 	else
   1792 		ixgbe_enable_queue(adapter, que->msix);
   1793 	return 1;
   1794 }
   1795 
   1796 
   1797 static int
   1798 ixgbe_msix_link(void *arg)
   1799 {
   1800 	struct adapter	*adapter = arg;
   1801 	struct ixgbe_hw *hw = &adapter->hw;
   1802 	u32		reg_eicr;
   1803 
   1804 	++adapter->link_irq.ev_count;
   1805 
   1806 	/* First get the cause */
   1807 	reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICS);
   1808 	/* Be sure the queue bits are not cleared */
   1809 	reg_eicr &= ~IXGBE_EICR_RTX_QUEUE;
   1810 	/* Clear interrupt with write */
   1811 	IXGBE_WRITE_REG(hw, IXGBE_EICR, reg_eicr);
   1812 
   1813 	/* Link status change */
   1814 	if (reg_eicr & IXGBE_EICR_LSC)
   1815 		softint_schedule(adapter->link_si);
   1816 
   1817 	if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
   1818 #ifdef IXGBE_FDIR
   1819 		if (reg_eicr & IXGBE_EICR_FLOW_DIR) {
   1820 			/* This is probably overkill :) */
   1821 			if (!atomic_cmpset_int(&adapter->fdir_reinit, 0, 1))
   1822 				return 1;
   1823                 	/* Disable the interrupt */
   1824 			IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FLOW_DIR);
   1825 			softint_schedule(adapter->fdir_si);
   1826 		} else
   1827 #endif
   1828 		if (reg_eicr & IXGBE_EICR_ECC) {
   1829                 	device_printf(adapter->dev, "\nCRITICAL: ECC ERROR!! "
   1830 			    "Please Reboot!!\n");
   1831 			IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
   1832 		} else
   1833 
   1834 		if (reg_eicr & IXGBE_EICR_GPI_SDP1) {
   1835                 	/* Clear the interrupt */
   1836                 	IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
   1837 			softint_schedule(adapter->msf_si);
   1838         	} else if (reg_eicr & IXGBE_EICR_GPI_SDP2) {
   1839                 	/* Clear the interrupt */
   1840                 	IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP2);
   1841 			softint_schedule(adapter->mod_si);
   1842 		}
   1843         }
   1844 
   1845 	/* Check for fan failure */
   1846 	if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
   1847 	    (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
   1848                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
   1849 		    "REPLACE IMMEDIATELY!!\n");
   1850 		IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
   1851 	}
   1852 
   1853 	/* Check for over temp condition */
   1854 	if ((hw->mac.type == ixgbe_mac_X540) &&
   1855 	    (reg_eicr & IXGBE_EICR_TS)) {
   1856 		device_printf(adapter->dev, "\nCRITICAL: OVER TEMP!! "
   1857 		    "PHY IS SHUT DOWN!!\n");
   1858 		device_printf(adapter->dev, "System shutdown required\n");
   1859 		IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_TS);
   1860 	}
   1861 
   1862 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_OTHER);
   1863 	return 1;
   1864 }
   1865 #endif
   1866 
   1867 /*********************************************************************
   1868  *
   1869  *  Media Ioctl callback
   1870  *
   1871  *  This routine is called whenever the user queries the status of
   1872  *  the interface using ifconfig.
   1873  *
   1874  **********************************************************************/
   1875 static void
   1876 ixgbe_media_status(struct ifnet * ifp, struct ifmediareq * ifmr)
   1877 {
   1878 	struct adapter *adapter = ifp->if_softc;
   1879 	struct ixgbe_hw *hw = &adapter->hw;
   1880 
   1881 	INIT_DEBUGOUT("ixgbe_media_status: begin");
   1882 	IXGBE_CORE_LOCK(adapter);
   1883 	ixgbe_update_link_status(adapter);
   1884 
   1885 	ifmr->ifm_status = IFM_AVALID;
   1886 	ifmr->ifm_active = IFM_ETHER;
   1887 
   1888 	if (!adapter->link_active) {
   1889 		IXGBE_CORE_UNLOCK(adapter);
   1890 		return;
   1891 	}
   1892 
   1893 	ifmr->ifm_status |= IFM_ACTIVE;
   1894 
   1895 	/*
   1896 	 * Not all NIC are 1000baseSX as an example X540T.
   1897 	 * We must set properly the media based on NIC model.
   1898 	 */
   1899 	switch (hw->device_id) {
   1900 	case IXGBE_DEV_ID_X540T:
   1901 		if (adapter->link_speed == IXGBE_LINK_SPEED_100_FULL)
   1902 			ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
   1903 		else if (adapter->link_speed == IXGBE_LINK_SPEED_1GB_FULL)
   1904 			ifmr->ifm_active |= IFM_1000_T | IFM_FDX;
   1905 		else if (adapter->link_speed == IXGBE_LINK_SPEED_10GB_FULL)
   1906 			ifmr->ifm_active |= adapter->optics | IFM_FDX;
   1907 		break;
   1908 	default:
   1909 		if (adapter->link_speed == IXGBE_LINK_SPEED_100_FULL)
   1910 			ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
   1911 		else if (adapter->link_speed == IXGBE_LINK_SPEED_1GB_FULL)
   1912 			ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
   1913 		else if (adapter->link_speed == IXGBE_LINK_SPEED_10GB_FULL)
   1914 			ifmr->ifm_active |= adapter->optics | IFM_FDX;
   1915 		break;
   1916 	}
   1917 
   1918 	IXGBE_CORE_UNLOCK(adapter);
   1919 
   1920 	return;
   1921 }
   1922 
   1923 /*********************************************************************
   1924  *
   1925  *  Media Ioctl callback
   1926  *
   1927  *  This routine is called when the user changes speed/duplex using
   1928  *  media/mediopt option with ifconfig.
   1929  *
   1930  **********************************************************************/
   1931 static int
   1932 ixgbe_media_change(struct ifnet * ifp)
   1933 {
   1934 	struct adapter *adapter = ifp->if_softc;
   1935 	struct ifmedia *ifm = &adapter->media;
   1936 
   1937 	INIT_DEBUGOUT("ixgbe_media_change: begin");
   1938 
   1939 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
   1940 		return (EINVAL);
   1941 
   1942         switch (IFM_SUBTYPE(ifm->ifm_media)) {
   1943 	case IFM_10G_T:
   1944 	case IFM_AUTO:
   1945 		adapter->hw.phy.autoneg_advertised =
   1946 		    IXGBE_LINK_SPEED_100_FULL |
   1947 		    IXGBE_LINK_SPEED_1GB_FULL |
   1948 		    IXGBE_LINK_SPEED_10GB_FULL;
   1949                 break;
   1950         default:
   1951                 device_printf(adapter->dev, "Only auto media type\n");
   1952 		return (EINVAL);
   1953         }
   1954 
   1955 	return (0);
   1956 }
   1957 
   1958 /*********************************************************************
   1959  *
   1960  *  This routine maps the mbufs to tx descriptors, allowing the
   1961  *  TX engine to transmit the packets.
   1962  *  	- return 0 on success, positive on failure
   1963  *
   1964  **********************************************************************/
   1965 
   1966 static int
   1967 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
   1968 {
   1969 	struct m_tag *mtag;
   1970 	struct adapter  *adapter = txr->adapter;
   1971 	struct ethercom *ec = &adapter->osdep.ec;
   1972 	u32		olinfo_status = 0, cmd_type_len;
   1973 	int             i, j, error;
   1974 	int		first;
   1975 	bus_dmamap_t	map;
   1976 	struct ixgbe_tx_buf *txbuf;
   1977 	union ixgbe_adv_tx_desc *txd = NULL;
   1978 
   1979 	/* Basic descriptor defines */
   1980         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
   1981 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
   1982 
   1983 	if ((mtag = VLAN_OUTPUT_TAG(ec, m_head)) != NULL)
   1984         	cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
   1985 
   1986         /*
   1987          * Important to capture the first descriptor
   1988          * used because it will contain the index of
   1989          * the one we tell the hardware to report back
   1990          */
   1991         first = txr->next_avail_desc;
   1992 	txbuf = &txr->tx_buffers[first];
   1993 	map = txbuf->map;
   1994 
   1995 	/*
   1996 	 * Map the packet for DMA.
   1997 	 */
   1998 	error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map,
   1999 	    m_head, BUS_DMA_NOWAIT);
   2000 
   2001 	if (__predict_false(error)) {
   2002 
   2003 		switch (error) {
   2004 		case EAGAIN:
   2005 			adapter->eagain_tx_dma_setup.ev_count++;
   2006 			return EAGAIN;
   2007 		case ENOMEM:
   2008 			adapter->enomem_tx_dma_setup.ev_count++;
   2009 			return EAGAIN;
   2010 		case EFBIG:
   2011 			/*
   2012 			 * XXX Try it again?
   2013 			 * do m_defrag() and retry bus_dmamap_load_mbuf().
   2014 			 */
   2015 			adapter->efbig_tx_dma_setup.ev_count++;
   2016 			return error;
   2017 		case EINVAL:
   2018 			adapter->einval_tx_dma_setup.ev_count++;
   2019 			return error;
   2020 		default:
   2021 			adapter->other_tx_dma_setup.ev_count++;
   2022 			return error;
   2023 		}
   2024 	}
   2025 
   2026 	/* Make certain there are enough descriptors */
   2027 	if (map->dm_nsegs > txr->tx_avail - 2) {
   2028 		txr->no_desc_avail.ev_count++;
   2029 		ixgbe_dmamap_unload(txr->txtag, txbuf->map);
   2030 		return EAGAIN;
   2031 	}
   2032 
   2033 	/*
   2034 	** Set up the appropriate offload context
   2035 	** this will consume the first descriptor
   2036 	*/
   2037 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
   2038 	if (__predict_false(error)) {
   2039 		return (error);
   2040 	}
   2041 
   2042 #ifdef IXGBE_FDIR
   2043 	/* Do the flow director magic */
   2044 	if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
   2045 		++txr->atr_count;
   2046 		if (txr->atr_count >= atr_sample_rate) {
   2047 			ixgbe_atr(txr, m_head);
   2048 			txr->atr_count = 0;
   2049 		}
   2050 	}
   2051 #endif
   2052 
   2053 	i = txr->next_avail_desc;
   2054 	for (j = 0; j < map->dm_nsegs; j++) {
   2055 		bus_size_t seglen;
   2056 		bus_addr_t segaddr;
   2057 
   2058 		txbuf = &txr->tx_buffers[i];
   2059 		txd = &txr->tx_base[i];
   2060 		seglen = map->dm_segs[j].ds_len;
   2061 		segaddr = htole64(map->dm_segs[j].ds_addr);
   2062 
   2063 		txd->read.buffer_addr = segaddr;
   2064 		txd->read.cmd_type_len = htole32(txr->txd_cmd |
   2065 		    cmd_type_len |seglen);
   2066 		txd->read.olinfo_status = htole32(olinfo_status);
   2067 
   2068 		if (++i == txr->num_desc)
   2069 			i = 0;
   2070 	}
   2071 
   2072 	txd->read.cmd_type_len |=
   2073 	    htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
   2074 	txr->tx_avail -= map->dm_nsegs;
   2075 	txr->next_avail_desc = i;
   2076 
   2077 	txbuf->m_head = m_head;
   2078 	/*
   2079 	** Here we swap the map so the last descriptor,
   2080 	** which gets the completion interrupt has the
   2081 	** real map, and the first descriptor gets the
   2082 	** unused map from this descriptor.
   2083 	*/
   2084 	txr->tx_buffers[first].map = txbuf->map;
   2085 	txbuf->map = map;
   2086 	bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
   2087 	    BUS_DMASYNC_PREWRITE);
   2088 
   2089         /* Set the EOP descriptor that will be marked done */
   2090         txbuf = &txr->tx_buffers[first];
   2091 	txbuf->eop = txd;
   2092 
   2093         ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   2094 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   2095 	/*
   2096 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
   2097 	 * hardware that this frame is available to transmit.
   2098 	 */
   2099 	++txr->total_packets.ev_count;
   2100 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), i);
   2101 
   2102 	return 0;
   2103 }
   2104 
   2105 static void
   2106 ixgbe_set_promisc(struct adapter *adapter)
   2107 {
   2108 	struct ether_multi *enm;
   2109 	struct ether_multistep step;
   2110 	u_int32_t       reg_rctl;
   2111 	struct ethercom *ec = &adapter->osdep.ec;
   2112 	struct ifnet   *ifp = adapter->ifp;
   2113 	int		mcnt = 0;
   2114 
   2115 	reg_rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
   2116 	reg_rctl &= (~IXGBE_FCTRL_UPE);
   2117 	if (ifp->if_flags & IFF_ALLMULTI)
   2118 		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
   2119 	else {
   2120 		ETHER_FIRST_MULTI(step, ec, enm);
   2121 		while (enm != NULL) {
   2122 			if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
   2123 				break;
   2124 			mcnt++;
   2125 			ETHER_NEXT_MULTI(step, enm);
   2126 		}
   2127 	}
   2128 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
   2129 		reg_rctl &= (~IXGBE_FCTRL_MPE);
   2130 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
   2131 
   2132 	if (ifp->if_flags & IFF_PROMISC) {
   2133 		reg_rctl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
   2134 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
   2135 	} else if (ifp->if_flags & IFF_ALLMULTI) {
   2136 		reg_rctl |= IXGBE_FCTRL_MPE;
   2137 		reg_rctl &= ~IXGBE_FCTRL_UPE;
   2138 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
   2139 	}
   2140 	return;
   2141 }
   2142 
   2143 
   2144 /*********************************************************************
   2145  *  Multicast Update
   2146  *
   2147  *  This routine is called whenever multicast address list is updated.
   2148  *
   2149  **********************************************************************/
   2150 #define IXGBE_RAR_ENTRIES 16
   2151 
   2152 static void
   2153 ixgbe_set_multi(struct adapter *adapter)
   2154 {
   2155 	struct ether_multi *enm;
   2156 	struct ether_multistep step;
   2157 	u32	fctrl;
   2158 	u8	*mta;
   2159 	u8	*update_ptr;
   2160 	int	mcnt = 0;
   2161 	struct ethercom *ec = &adapter->osdep.ec;
   2162 	struct ifnet   *ifp = adapter->ifp;
   2163 
   2164 	IOCTL_DEBUGOUT("ixgbe_set_multi: begin");
   2165 
   2166 	mta = adapter->mta;
   2167 	bzero(mta, sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
   2168 	    MAX_NUM_MULTICAST_ADDRESSES);
   2169 
   2170 	ifp->if_flags &= ~IFF_ALLMULTI;
   2171 	ETHER_FIRST_MULTI(step, ec, enm);
   2172 	while (enm != NULL) {
   2173 		if ((mcnt == MAX_NUM_MULTICAST_ADDRESSES) ||
   2174 		    (memcmp(enm->enm_addrlo, enm->enm_addrhi,
   2175 			ETHER_ADDR_LEN) != 0)) {
   2176 			ifp->if_flags |= IFF_ALLMULTI;
   2177 			break;
   2178 		}
   2179 		bcopy(enm->enm_addrlo,
   2180 		    &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
   2181 		    IXGBE_ETH_LENGTH_OF_ADDRESS);
   2182 		mcnt++;
   2183 		ETHER_NEXT_MULTI(step, enm);
   2184 	}
   2185 
   2186 	fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
   2187 	fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
   2188 	if (ifp->if_flags & IFF_PROMISC)
   2189 		fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
   2190 	else if (ifp->if_flags & IFF_ALLMULTI) {
   2191 		fctrl |= IXGBE_FCTRL_MPE;
   2192 	}
   2193 
   2194 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl);
   2195 
   2196 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) {
   2197 		update_ptr = mta;
   2198 		ixgbe_update_mc_addr_list(&adapter->hw,
   2199 		    update_ptr, mcnt, ixgbe_mc_array_itr, TRUE);
   2200 	}
   2201 
   2202 	return;
   2203 }
   2204 
   2205 /*
   2206  * This is an iterator function now needed by the multicast
   2207  * shared code. It simply feeds the shared code routine the
   2208  * addresses in the array of ixgbe_set_multi() one by one.
   2209  */
   2210 static u8 *
   2211 ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq)
   2212 {
   2213 	u8 *addr = *update_ptr;
   2214 	u8 *newptr;
   2215 	*vmdq = 0;
   2216 
   2217 	newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
   2218 	*update_ptr = newptr;
   2219 	return addr;
   2220 }
   2221 
   2222 
   2223 /*********************************************************************
   2224  *  Timer routine
   2225  *
   2226  *  This routine checks for link status,updates statistics,
   2227  *  and runs the watchdog check.
   2228  *
   2229  **********************************************************************/
   2230 
   2231 static void
   2232 ixgbe_local_timer1(void *arg)
   2233 {
   2234 	struct adapter	*adapter = arg;
   2235 	device_t	dev = adapter->dev;
   2236 	struct ix_queue *que = adapter->queues;
   2237 	struct tx_ring	*txr = adapter->tx_rings;
   2238 	int		hung = 0, paused = 0;
   2239 
   2240 	KASSERT(mutex_owned(&adapter->core_mtx));
   2241 
   2242 	/* Check for pluggable optics */
   2243 	if (adapter->sfp_probe)
   2244 		if (!ixgbe_sfp_probe(adapter))
   2245 			goto out; /* Nothing to do */
   2246 
   2247 	ixgbe_update_link_status(adapter);
   2248 	ixgbe_update_stats_counters(adapter);
   2249 
   2250 	/*
   2251 	 * If the interface has been paused
   2252 	 * then don't do the watchdog check
   2253 	 */
   2254 	if (IXGBE_READ_REG(&adapter->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF)
   2255 		paused = 1;
   2256 
   2257 	/*
   2258 	** Check the TX queues status
   2259 	**      - watchdog only if all queues show hung
   2260 	*/
   2261 	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
   2262 		if ((txr->queue_status == IXGBE_QUEUE_HUNG) &&
   2263 		    (paused == 0))
   2264 			++hung;
   2265 		else if (txr->queue_status == IXGBE_QUEUE_WORKING)
   2266 #ifndef IXGBE_LEGACY_TX
   2267 			softint_schedule(txr->txq_si);
   2268 #else
   2269 			softint_schedule(que->que_si);
   2270 #endif
   2271 	}
   2272 	/* Only truely watchdog if all queues show hung */
   2273 	if (hung == adapter->num_queues)
   2274 		goto watchdog;
   2275 
   2276 out:
   2277 	callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
   2278 	return;
   2279 
   2280 watchdog:
   2281 	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
   2282 	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
   2283 	    IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(txr->me)),
   2284 	    IXGBE_READ_REG(&adapter->hw, IXGBE_TDT(txr->me)));
   2285 	device_printf(dev,"TX(%d) desc avail = %d,"
   2286 	    "Next TX to Clean = %d\n",
   2287 	    txr->me, txr->tx_avail, txr->next_to_clean);
   2288 	adapter->ifp->if_flags &= ~IFF_RUNNING;
   2289 	adapter->watchdog_events.ev_count++;
   2290 	ixgbe_init_locked(adapter);
   2291 }
   2292 
   2293 static void
   2294 ixgbe_local_timer(void *arg)
   2295 {
   2296 	struct adapter *adapter = arg;
   2297 
   2298 	IXGBE_CORE_LOCK(adapter);
   2299 	ixgbe_local_timer1(adapter);
   2300 	IXGBE_CORE_UNLOCK(adapter);
   2301 }
   2302 
   2303 /*
   2304 ** Note: this routine updates the OS on the link state
   2305 **	the real check of the hardware only happens with
   2306 **	a link interrupt.
   2307 */
   2308 static void
   2309 ixgbe_update_link_status(struct adapter *adapter)
   2310 {
   2311 	struct ifnet	*ifp = adapter->ifp;
   2312 	device_t dev = adapter->dev;
   2313 
   2314 
   2315 	if (adapter->link_up){
   2316 		if (adapter->link_active == FALSE) {
   2317 			if (bootverbose)
   2318 				device_printf(dev,"Link is up %d Gbps %s \n",
   2319 				    ((adapter->link_speed == 128)? 10:1),
   2320 				    "Full Duplex");
   2321 			adapter->link_active = TRUE;
   2322 			/* Update any Flow Control changes */
   2323 			ixgbe_fc_enable(&adapter->hw);
   2324 			if_link_state_change(ifp, LINK_STATE_UP);
   2325 		}
   2326 	} else { /* Link down */
   2327 		if (adapter->link_active == TRUE) {
   2328 			if (bootverbose)
   2329 				device_printf(dev,"Link is Down\n");
   2330 			if_link_state_change(ifp, LINK_STATE_DOWN);
   2331 			adapter->link_active = FALSE;
   2332 		}
   2333 	}
   2334 
   2335 	return;
   2336 }
   2337 
   2338 
   2339 static void
   2340 ixgbe_ifstop(struct ifnet *ifp, int disable)
   2341 {
   2342 	struct adapter *adapter = ifp->if_softc;
   2343 
   2344 	IXGBE_CORE_LOCK(adapter);
   2345 	ixgbe_stop(adapter);
   2346 	IXGBE_CORE_UNLOCK(adapter);
   2347 }
   2348 
   2349 /*********************************************************************
   2350  *
   2351  *  This routine disables all traffic on the adapter by issuing a
   2352  *  global reset on the MAC and deallocates TX/RX buffers.
   2353  *
   2354  **********************************************************************/
   2355 
   2356 static void
   2357 ixgbe_stop(void *arg)
   2358 {
   2359 	struct ifnet   *ifp;
   2360 	struct adapter *adapter = arg;
   2361 	struct ixgbe_hw *hw = &adapter->hw;
   2362 	ifp = adapter->ifp;
   2363 
   2364 	KASSERT(mutex_owned(&adapter->core_mtx));
   2365 
   2366 	INIT_DEBUGOUT("ixgbe_stop: begin\n");
   2367 	ixgbe_disable_intr(adapter);
   2368 	callout_stop(&adapter->timer);
   2369 
   2370 	/* Let the stack know...*/
   2371 	ifp->if_flags &= ~IFF_RUNNING;
   2372 
   2373 	ixgbe_reset_hw(hw);
   2374 	hw->adapter_stopped = FALSE;
   2375 	ixgbe_stop_adapter(hw);
   2376 	if (hw->mac.type == ixgbe_mac_82599EB)
   2377 		ixgbe_stop_mac_link_on_d3_82599(hw);
   2378 	/* Turn off the laser - noop with no optics */
   2379 	ixgbe_disable_tx_laser(hw);
   2380 
   2381 	/* Update the stack */
   2382 	adapter->link_up = FALSE;
   2383 	ixgbe_update_link_status(adapter);
   2384 
   2385 	/* reprogram the RAR[0] in case user changed it. */
   2386 	ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
   2387 
   2388 	return;
   2389 }
   2390 
   2391 
   2392 /*********************************************************************
   2393  *
   2394  *  Determine hardware revision.
   2395  *
   2396  **********************************************************************/
   2397 static void
   2398 ixgbe_identify_hardware(struct adapter *adapter)
   2399 {
   2400 	pcitag_t tag;
   2401 	pci_chipset_tag_t pc;
   2402 	pcireg_t subid, id;
   2403 	struct ixgbe_hw *hw = &adapter->hw;
   2404 
   2405 	pc = adapter->osdep.pc;
   2406 	tag = adapter->osdep.tag;
   2407 
   2408 	id = pci_conf_read(pc, tag, PCI_ID_REG);
   2409 	subid = pci_conf_read(pc, tag, PCI_SUBSYS_ID_REG);
   2410 
   2411 	/* Save off the information about this board */
   2412 	hw->vendor_id = PCI_VENDOR(id);
   2413 	hw->device_id = PCI_PRODUCT(id);
   2414 	hw->revision_id =
   2415 	    PCI_REVISION(pci_conf_read(pc, tag, PCI_CLASS_REG));
   2416 	hw->subsystem_vendor_id = PCI_SUBSYS_VENDOR(subid);
   2417 	hw->subsystem_device_id = PCI_SUBSYS_ID(subid);
   2418 
   2419 	/* We need this here to set the num_segs below */
   2420 	ixgbe_set_mac_type(hw);
   2421 
   2422 	/* Pick up the 82599 and VF settings */
   2423 	if (hw->mac.type != ixgbe_mac_82598EB) {
   2424 		hw->phy.smart_speed = ixgbe_smart_speed;
   2425 		adapter->num_segs = IXGBE_82599_SCATTER;
   2426 	} else
   2427 		adapter->num_segs = IXGBE_82598_SCATTER;
   2428 
   2429 	return;
   2430 }
   2431 
   2432 /*********************************************************************
   2433  *
   2434  *  Determine optic type
   2435  *
   2436  **********************************************************************/
   2437 static void
   2438 ixgbe_setup_optics(struct adapter *adapter)
   2439 {
   2440 	struct ixgbe_hw *hw = &adapter->hw;
   2441 	int		layer;
   2442 
   2443 	layer = ixgbe_get_supported_physical_layer(hw);
   2444 
   2445 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
   2446 		adapter->optics = IFM_10G_T;
   2447 		return;
   2448 	}
   2449 
   2450 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
   2451 		adapter->optics = IFM_1000_T;
   2452 		return;
   2453 	}
   2454 
   2455 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) {
   2456 		adapter->optics = IFM_1000_SX;
   2457 		return;
   2458 	}
   2459 
   2460 	if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR |
   2461 	    IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) {
   2462 		adapter->optics = IFM_10G_LR;
   2463 		return;
   2464 	}
   2465 
   2466 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
   2467 		adapter->optics = IFM_10G_SR;
   2468 		return;
   2469 	}
   2470 
   2471 	if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) {
   2472 		adapter->optics = IFM_10G_TWINAX;
   2473 		return;
   2474 	}
   2475 
   2476 	if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 |
   2477 	    IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) {
   2478 		adapter->optics = IFM_10G_CX4;
   2479 		return;
   2480 	}
   2481 
   2482 	/* If we get here just set the default */
   2483 	adapter->optics = IFM_ETHER | IFM_AUTO;
   2484 	return;
   2485 }
   2486 
   2487 /*********************************************************************
   2488  *
   2489  *  Setup the Legacy or MSI Interrupt handler
   2490  *
   2491  **********************************************************************/
   2492 static int
   2493 ixgbe_allocate_legacy(struct adapter *adapter,
   2494     const struct pci_attach_args *pa)
   2495 {
   2496 	device_t	dev = adapter->dev;
   2497 	struct		ix_queue *que = adapter->queues;
   2498 #ifndef IXGBE_LEGACY_TX
   2499 	struct tx_ring		*txr = adapter->tx_rings;
   2500 #endif
   2501 #ifndef NETBSD_MSI_OR_MSIX
   2502 	pci_intr_handle_t	ih;
   2503 #else
   2504 	int		counts[PCI_INTR_TYPE_SIZE];
   2505 	pci_intr_type_t intr_type, max_type;
   2506 #endif
   2507 	char intrbuf[PCI_INTRSTR_LEN];
   2508 	const char	*intrstr = NULL;
   2509 
   2510 #ifndef NETBSD_MSI_OR_MSIX
   2511 	/* We allocate a single interrupt resource */
   2512  	if (pci_intr_map(pa, &ih) != 0) {
   2513 		aprint_error_dev(dev, "unable to map interrupt\n");
   2514 		return ENXIO;
   2515 	} else {
   2516 		intrstr = pci_intr_string(adapter->osdep.pc, ih, intrbuf,
   2517 		    sizeof(intrbuf));
   2518 	}
   2519 	adapter->osdep.ihs[0] = pci_intr_establish(adapter->osdep.pc, ih,
   2520 	    IPL_NET, ixgbe_legacy_irq, que);
   2521 #else
   2522 	/* Allocation settings */
   2523 	max_type = PCI_INTR_TYPE_MSI;
   2524 	counts[PCI_INTR_TYPE_MSIX] = 0;
   2525 	counts[PCI_INTR_TYPE_MSI] = 1;
   2526 	counts[PCI_INTR_TYPE_INTX] = 1;
   2527 
   2528 alloc_retry:
   2529 	if (pci_intr_alloc(pa, &adapter->osdep.intrs, counts, max_type) != 0) {
   2530 		aprint_error_dev(dev, "couldn't alloc interrupt\n");
   2531 		return ENXIO;
   2532 	}
   2533 	adapter->osdep.nintrs = 1;
   2534 	intrstr = pci_intr_string(adapter->osdep.pc, adapter->osdep.intrs[0],
   2535 	    intrbuf, sizeof(intrbuf));
   2536 	adapter->osdep.ihs[0] = pci_intr_establish(adapter->osdep.pc,
   2537 	    adapter->osdep.intrs[0], IPL_NET, ixgbe_legacy_irq, que);
   2538 	if (adapter->osdep.ihs[0] == NULL) {
   2539 		intr_type = pci_intr_type(adapter->osdep.intrs[0]);
   2540 		aprint_error_dev(dev,"unable to establish %s\n",
   2541 		    (intr_type == PCI_INTR_TYPE_MSI) ? "MSI" : "INTx");
   2542 		pci_intr_release(adapter->osdep.pc, adapter->osdep.intrs, 1);
   2543 		switch (intr_type) {
   2544 		case PCI_INTR_TYPE_MSI:
   2545 			/* The next try is for INTx: Disable MSI */
   2546 			max_type = PCI_INTR_TYPE_INTX;
   2547 			counts[PCI_INTR_TYPE_INTX] = 1;
   2548 			goto alloc_retry;
   2549 		case PCI_INTR_TYPE_INTX:
   2550 		default:
   2551 			/* See below */
   2552 			break;
   2553 		}
   2554 	}
   2555 #endif
   2556 	if (adapter->osdep.ihs[0] == NULL) {
   2557 		aprint_error_dev(dev,
   2558 		    "couldn't establish interrupt%s%s\n",
   2559 		    intrstr ? " at " : "", intrstr ? intrstr : "");
   2560 #ifdef NETBSD_MSI_OR_MSIX
   2561 		pci_intr_release(adapter->osdep.pc, adapter->osdep.intrs, 1);
   2562 #endif
   2563 		return ENXIO;
   2564 	}
   2565 	aprint_normal_dev(dev, "interrupting at %s\n", intrstr);
   2566 	/*
   2567 	 * Try allocating a fast interrupt and the associated deferred
   2568 	 * processing contexts.
   2569 	 */
   2570 #ifndef IXGBE_LEGACY_TX
   2571 	txr->txq_si = softint_establish(SOFTINT_NET, ixgbe_deferred_mq_start,
   2572 	    txr);
   2573 #endif
   2574 	que->que_si = softint_establish(SOFTINT_NET, ixgbe_handle_que, que);
   2575 
   2576 	/* Tasklets for Link, SFP and Multispeed Fiber */
   2577 	adapter->link_si =
   2578 	    softint_establish(SOFTINT_NET, ixgbe_handle_link, adapter);
   2579 	adapter->mod_si =
   2580 	    softint_establish(SOFTINT_NET, ixgbe_handle_mod, adapter);
   2581 	adapter->msf_si =
   2582 	    softint_establish(SOFTINT_NET, ixgbe_handle_msf, adapter);
   2583 
   2584 #ifdef IXGBE_FDIR
   2585 	adapter->fdir_si =
   2586 	    softint_establish(SOFTINT_NET, ixgbe_reinit_fdir, adapter);
   2587 #endif
   2588 	if (que->que_si == NULL ||
   2589 	    adapter->link_si == NULL ||
   2590 	    adapter->mod_si == NULL ||
   2591 #ifdef IXGBE_FDIR
   2592 	    adapter->fdir_si == NULL ||
   2593 #endif
   2594 	    adapter->msf_si == NULL) {
   2595 		aprint_error_dev(dev,
   2596 		    "could not establish software interrupts\n");
   2597 		return ENXIO;
   2598 	}
   2599 
   2600 	/* For simplicity in the handlers */
   2601 	adapter->que_mask = IXGBE_EIMS_ENABLE_MASK;
   2602 
   2603 	return (0);
   2604 }
   2605 
   2606 
   2607 /*********************************************************************
   2608  *
   2609  *  Setup MSIX Interrupt resources and handlers
   2610  *
   2611  **********************************************************************/
   2612 static int
   2613 ixgbe_allocate_msix(struct adapter *adapter, const struct pci_attach_args *pa)
   2614 {
   2615 #if !defined(NETBSD_MSI_OR_MSIX)
   2616 	return 0;
   2617 #else
   2618 	device_t        dev = adapter->dev;
   2619 	struct 		ix_queue *que = adapter->queues;
   2620 	struct  	tx_ring *txr = adapter->tx_rings;
   2621 	pci_chipset_tag_t pc;
   2622 	char		intrbuf[PCI_INTRSTR_LEN];
   2623 	const char	*intrstr = NULL;
   2624 	int 		error, vector = 0;
   2625 	int		cpu_id = 0;
   2626 	kcpuset_t	*affinity;
   2627 
   2628 	pc = adapter->osdep.pc;
   2629 #ifdef	RSS
   2630 	cpuset_t cpu_mask;
   2631 	/*
   2632 	 * If we're doing RSS, the number of queues needs to
   2633 	 * match the number of RSS buckets that are configured.
   2634 	 *
   2635 	 * + If there's more queues than RSS buckets, we'll end
   2636 	 *   up with queues that get no traffic.
   2637 	 *
   2638 	 * + If there's more RSS buckets than queues, we'll end
   2639 	 *   up having multiple RSS buckets map to the same queue,
   2640 	 *   so there'll be some contention.
   2641 	 */
   2642 	if (adapter->num_queues != rss_getnumbuckets()) {
   2643 		device_printf(dev,
   2644 		    "%s: number of queues (%d) != number of RSS buckets (%d)"
   2645 		    "; performance will be impacted.\n",
   2646 		    __func__,
   2647 		    adapter->num_queues,
   2648 		    rss_getnumbuckets());
   2649 	}
   2650 #endif
   2651 
   2652 	adapter->osdep.nintrs = adapter->num_queues + 1;
   2653 	if (pci_msix_alloc_exact(pa, &adapter->osdep.intrs,
   2654 	    adapter->osdep.nintrs) != 0) {
   2655 		aprint_error_dev(dev,
   2656 		    "failed to allocate MSI-X interrupt\n");
   2657 		return (ENXIO);
   2658 	}
   2659 
   2660 	kcpuset_create(&affinity, false);
   2661 	for (int i = 0; i < adapter->num_queues; i++, vector++, que++, txr++) {
   2662 		intrstr = pci_intr_string(pc, adapter->osdep.intrs[i], intrbuf,
   2663 		    sizeof(intrbuf));
   2664 #ifdef IXG_MPSAFE
   2665 		pci_intr_setattr(pc, adapter->osdep.intrs[i], PCI_INTR_MPSAFE,
   2666 		    true);
   2667 #endif
   2668 		/* Set the handler function */
   2669 		que->res = adapter->osdep.ihs[i] = pci_intr_establish(pc,
   2670 		    adapter->osdep.intrs[i], IPL_NET, ixgbe_msix_que, que);
   2671 		if (que->res == NULL) {
   2672 			pci_intr_release(pc, adapter->osdep.intrs,
   2673 			    adapter->osdep.nintrs);
   2674 			aprint_error_dev(dev,
   2675 			    "Failed to register QUE handler\n");
   2676 			kcpuset_destroy(affinity);
   2677 			return ENXIO;
   2678 		}
   2679 		que->msix = vector;
   2680         	adapter->que_mask |= (u64)(1 << que->msix);
   2681 #ifdef	RSS
   2682 		/*
   2683 		 * The queue ID is used as the RSS layer bucket ID.
   2684 		 * We look up the queue ID -> RSS CPU ID and select
   2685 		 * that.
   2686 		 */
   2687 		cpu_id = rss_getcpu(i % rss_getnumbuckets());
   2688 #else
   2689 		/*
   2690 		 * Bind the msix vector, and thus the
   2691 		 * rings to the corresponding cpu.
   2692 		 *
   2693 		 * This just happens to match the default RSS round-robin
   2694 		 * bucket -> queue -> CPU allocation.
   2695 		 */
   2696 		if (adapter->num_queues > 1)
   2697 			cpu_id = i;
   2698 #endif
   2699 		/* Round-robin affinity */
   2700 		kcpuset_zero(affinity);
   2701 		kcpuset_set(affinity, cpu_id % ncpu);
   2702 		error = interrupt_distribute(adapter->osdep.ihs[i], affinity,
   2703 		    NULL);
   2704 		aprint_normal_dev(dev, "for TX/RX, interrupting at %s",
   2705 		    intrstr);
   2706 		if (error == 0) {
   2707 #ifdef	RSS
   2708 			aprintf_normal(", bound RSS bucket %d to CPU %d\n",
   2709 			    i, cpu_id);
   2710 #else
   2711 			aprint_normal(", bound queue %d to cpu %d\n",
   2712 			    i, cpu_id);
   2713 #endif
   2714 		} else
   2715 			aprint_normal("\n");
   2716 
   2717 #ifndef IXGBE_LEGACY_TX
   2718 		txr->txq_si = softint_establish(SOFTINT_NET,
   2719 		    ixgbe_deferred_mq_start, txr);
   2720 #endif
   2721 		que->que_si = softint_establish(SOFTINT_NET, ixgbe_handle_que,
   2722 		    que);
   2723 		if (que->que_si == NULL) {
   2724 			aprint_error_dev(dev,
   2725 			    "could not establish software interrupt\n");
   2726 		}
   2727 	}
   2728 
   2729 	/* and Link */
   2730 	cpu_id++;
   2731 	intrstr = pci_intr_string(pc, adapter->osdep.intrs[vector], intrbuf,
   2732 	    sizeof(intrbuf));
   2733 #ifdef IXG_MPSAFE
   2734 	pci_intr_setattr(pc, &adapter->osdep.intrs[vector], PCI_INTR_MPSAFE,
   2735 	    true);
   2736 #endif
   2737 	/* Set the link handler function */
   2738 	adapter->osdep.ihs[vector] = pci_intr_establish(pc,
   2739 	    adapter->osdep.intrs[vector], IPL_NET, ixgbe_msix_link, adapter);
   2740 	if (adapter->osdep.ihs[vector] == NULL) {
   2741 		adapter->res = NULL;
   2742 		aprint_error_dev(dev, "Failed to register LINK handler\n");
   2743 		kcpuset_destroy(affinity);
   2744 		return (ENXIO);
   2745 	}
   2746 	/* Round-robin affinity */
   2747 	kcpuset_zero(affinity);
   2748 	kcpuset_set(affinity, cpu_id % ncpu);
   2749 	error = interrupt_distribute(adapter->osdep.ihs[vector], affinity,NULL);
   2750 
   2751 	aprint_normal_dev(dev,
   2752 	    "for link, interrupting at %s", intrstr);
   2753 	if (error == 0)
   2754 		aprint_normal(", affinity to cpu %d\n", cpu_id);
   2755 	else
   2756 		aprint_normal("\n");
   2757 
   2758 	adapter->linkvec = vector;
   2759 	/* Tasklets for Link, SFP and Multispeed Fiber */
   2760 	adapter->link_si =
   2761 	    softint_establish(SOFTINT_NET, ixgbe_handle_link, adapter);
   2762 	adapter->mod_si =
   2763 	    softint_establish(SOFTINT_NET, ixgbe_handle_mod, adapter);
   2764 	adapter->msf_si =
   2765 	    softint_establish(SOFTINT_NET, ixgbe_handle_msf, adapter);
   2766 #ifdef IXGBE_FDIR
   2767 	adapter->fdir_si =
   2768 	    softint_establish(SOFTINT_NET, ixgbe_reinit_fdir, adapter);
   2769 #endif
   2770 
   2771 	kcpuset_destroy(affinity);
   2772 	return (0);
   2773 #endif
   2774 }
   2775 
   2776 /*
   2777  * Setup Either MSI/X or MSI
   2778  */
   2779 static int
   2780 ixgbe_setup_msix(struct adapter *adapter)
   2781 {
   2782 #if !defined(NETBSD_MSI_OR_MSIX)
   2783 	return 0;
   2784 #else
   2785 	device_t dev = adapter->dev;
   2786 	int want, queues, msgs;
   2787 
   2788 	/* Override by tuneable */
   2789 	if (ixgbe_enable_msix == 0)
   2790 		goto msi;
   2791 
   2792 	/* First try MSI/X */
   2793 	msgs = pci_msix_count(adapter->osdep.pc, adapter->osdep.tag);
   2794 	if (msgs < IXG_MSIX_NINTR)
   2795 		goto msi;
   2796 
   2797 	adapter->msix_mem = (void *)1; /* XXX */
   2798 
   2799 	/* Figure out a reasonable auto config value */
   2800 	queues = (ncpu > (msgs-1)) ? (msgs-1) : ncpu;
   2801 
   2802 	/* Override based on tuneable */
   2803 	if (ixgbe_num_queues != 0)
   2804 		queues = ixgbe_num_queues;
   2805 
   2806 #ifdef	RSS
   2807 	/* If we're doing RSS, clamp at the number of RSS buckets */
   2808 	if (queues > rss_getnumbuckets())
   2809 		queues = rss_getnumbuckets();
   2810 #endif
   2811 
   2812 	/* reflect correct sysctl value */
   2813 	ixgbe_num_queues = queues;
   2814 
   2815 	/*
   2816 	** Want one vector (RX/TX pair) per queue
   2817 	** plus an additional for Link.
   2818 	*/
   2819 	want = queues + 1;
   2820 	if (msgs >= want)
   2821 		msgs = want;
   2822 	else {
   2823                	aprint_error_dev(dev,
   2824 		    "MSIX Configuration Problem, "
   2825 		    "%d vectors but %d queues wanted!\n",
   2826 		    msgs, want);
   2827 		goto msi;
   2828 	}
   2829 	device_printf(dev,
   2830 	    "Using MSIX interrupts with %d vectors\n", msgs);
   2831 	adapter->num_queues = queues;
   2832 	return (msgs);
   2833 
   2834 	/*
   2835 	** If MSIX alloc failed or provided us with
   2836 	** less than needed, free and fall through to MSI
   2837 	*/
   2838 msi:
   2839        	msgs = pci_msi_count(adapter->osdep.pc, adapter->osdep.tag);
   2840 	adapter->msix_mem = NULL; /* XXX */
   2841        	msgs = 1;
   2842 	aprint_normal_dev(dev,"Using an MSI interrupt\n");
   2843 	return (msgs);
   2844 #endif
   2845 }
   2846 
   2847 
   2848 static int
   2849 ixgbe_allocate_pci_resources(struct adapter *adapter,
   2850     const struct pci_attach_args *pa)
   2851 {
   2852 	pcireg_t	memtype;
   2853 	device_t        dev = adapter->dev;
   2854 	bus_addr_t addr;
   2855 	int flags;
   2856 
   2857 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, PCI_BAR(0));
   2858 	switch (memtype) {
   2859 	case PCI_MAPREG_TYPE_MEM | PCI_MAPREG_MEM_TYPE_32BIT:
   2860 	case PCI_MAPREG_TYPE_MEM | PCI_MAPREG_MEM_TYPE_64BIT:
   2861 		adapter->osdep.mem_bus_space_tag = pa->pa_memt;
   2862 		if (pci_mapreg_info(pa->pa_pc, pa->pa_tag, PCI_BAR(0),
   2863 	              memtype, &addr, &adapter->osdep.mem_size, &flags) != 0)
   2864 			goto map_err;
   2865 		if ((flags & BUS_SPACE_MAP_PREFETCHABLE) != 0) {
   2866 			aprint_normal_dev(dev, "clearing prefetchable bit\n");
   2867 			flags &= ~BUS_SPACE_MAP_PREFETCHABLE;
   2868 		}
   2869 		if (bus_space_map(adapter->osdep.mem_bus_space_tag, addr,
   2870 		     adapter->osdep.mem_size, flags,
   2871 		     &adapter->osdep.mem_bus_space_handle) != 0) {
   2872 map_err:
   2873 			adapter->osdep.mem_size = 0;
   2874 			aprint_error_dev(dev, "unable to map BAR0\n");
   2875 			return ENXIO;
   2876 		}
   2877 		break;
   2878 	default:
   2879 		aprint_error_dev(dev, "unexpected type on BAR0\n");
   2880 		return ENXIO;
   2881 	}
   2882 
   2883 	/* Legacy defaults */
   2884 	adapter->num_queues = 1;
   2885 	adapter->hw.back = &adapter->osdep;
   2886 
   2887 	/*
   2888 	** Now setup MSI or MSI/X, should
   2889 	** return us the number of supported
   2890 	** vectors. (Will be 1 for MSI)
   2891 	*/
   2892 	adapter->msix = ixgbe_setup_msix(adapter);
   2893 	return (0);
   2894 }
   2895 
   2896 static void
   2897 ixgbe_free_pci_resources(struct adapter * adapter)
   2898 {
   2899 #if defined(NETBSD_MSI_OR_MSIX)
   2900 	struct 		ix_queue *que = adapter->queues;
   2901 #endif
   2902 	int		rid;
   2903 
   2904 #if defined(NETBSD_MSI_OR_MSIX)
   2905 	/*
   2906 	**  Release all msix queue resources:
   2907 	*/
   2908 	for (int i = 0; i < adapter->num_queues; i++, que++) {
   2909 		if (que->res != NULL)
   2910 			pci_intr_disestablish(adapter->osdep.pc,
   2911 			    adapter->osdep.ihs[i]);
   2912 	}
   2913 #endif
   2914 
   2915 	/* Clean the Legacy or Link interrupt last */
   2916 	if (adapter->linkvec) /* we are doing MSIX */
   2917 		rid = adapter->linkvec;
   2918 	else
   2919 		rid = 0;
   2920 
   2921 	if (adapter->osdep.ihs[rid] != NULL) {
   2922 		pci_intr_disestablish(adapter->osdep.pc,
   2923 		    adapter->osdep.ihs[rid]);
   2924 		adapter->osdep.ihs[rid] = NULL;
   2925 	}
   2926 
   2927 #if defined(NETBSD_MSI_OR_MSIX)
   2928 	pci_intr_release(adapter->osdep.pc, adapter->osdep.intrs,
   2929 	    adapter->osdep.nintrs);
   2930 #endif
   2931 
   2932 	if (adapter->osdep.mem_size != 0) {
   2933 		bus_space_unmap(adapter->osdep.mem_bus_space_tag,
   2934 		    adapter->osdep.mem_bus_space_handle,
   2935 		    adapter->osdep.mem_size);
   2936 	}
   2937 
   2938 	return;
   2939 }
   2940 
   2941 /*********************************************************************
   2942  *
   2943  *  Setup networking device structure and register an interface.
   2944  *
   2945  **********************************************************************/
   2946 static int
   2947 ixgbe_setup_interface(device_t dev, struct adapter *adapter)
   2948 {
   2949 	struct ethercom *ec = &adapter->osdep.ec;
   2950 	struct ixgbe_hw *hw = &adapter->hw;
   2951 	struct ifnet   *ifp;
   2952 
   2953 	INIT_DEBUGOUT("ixgbe_setup_interface: begin");
   2954 
   2955 	ifp = adapter->ifp = &ec->ec_if;
   2956 	strlcpy(ifp->if_xname, device_xname(dev), IFNAMSIZ);
   2957 	ifp->if_baudrate = IF_Gbps(10);
   2958 	ifp->if_init = ixgbe_init;
   2959 	ifp->if_stop = ixgbe_ifstop;
   2960 	ifp->if_softc = adapter;
   2961 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
   2962 	ifp->if_ioctl = ixgbe_ioctl;
   2963 #ifndef IXGBE_LEGACY_TX
   2964 	ifp->if_transmit = ixgbe_mq_start;
   2965 	ifp->if_qflush = ixgbe_qflush;
   2966 #else
   2967 	ifp->if_start = ixgbe_start;
   2968 	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 2);
   2969 #if 0
   2970 	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 2;
   2971 #endif
   2972 	IFQ_SET_READY(&ifp->if_snd);
   2973 #endif
   2974 
   2975 	if_initialize(ifp);
   2976 	ether_ifattach(ifp, adapter->hw.mac.addr);
   2977 	if_register(ifp);
   2978 	ether_set_ifflags_cb(ec, ixgbe_ifflags_cb);
   2979 
   2980 	adapter->max_frame_size =
   2981 	    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
   2982 
   2983 	/*
   2984 	 * Tell the upper layer(s) we support long frames.
   2985 	 */
   2986 	ifp->if_hdrlen = sizeof(struct ether_vlan_header);
   2987 
   2988 	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSOv4 | IFCAP_TSOv6;
   2989 	ifp->if_capenable = 0;
   2990 
   2991 	ec->ec_capabilities |= ETHERCAP_VLAN_HWCSUM;
   2992 	ec->ec_capabilities |= ETHERCAP_JUMBO_MTU;
   2993 	ifp->if_capabilities |= IFCAP_LRO;
   2994 	ec->ec_capabilities |= ETHERCAP_VLAN_HWTAGGING
   2995 	    		    | ETHERCAP_VLAN_MTU;
   2996 	ec->ec_capenable = ec->ec_capabilities;
   2997 
   2998 	/*
   2999 	** Don't turn this on by default, if vlans are
   3000 	** created on another pseudo device (eg. lagg)
   3001 	** then vlan events are not passed thru, breaking
   3002 	** operation, but with HW FILTER off it works. If
   3003 	** using vlans directly on the ixgbe driver you can
   3004 	** enable this and get full hardware tag filtering.
   3005 	*/
   3006 	ec->ec_capabilities |= ETHERCAP_VLAN_HWFILTER;
   3007 
   3008 	/*
   3009 	 * Specify the media types supported by this adapter and register
   3010 	 * callbacks to update media and link information
   3011 	 */
   3012 	ifmedia_init(&adapter->media, IFM_IMASK, ixgbe_media_change,
   3013 		     ixgbe_media_status);
   3014 	ifmedia_add(&adapter->media, IFM_ETHER | adapter->optics, 0, NULL);
   3015 	ifmedia_set(&adapter->media, IFM_ETHER | adapter->optics);
   3016 	if (hw->device_id == IXGBE_DEV_ID_82598AT) {
   3017 		ifmedia_add(&adapter->media,
   3018 		    IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
   3019 		ifmedia_add(&adapter->media,
   3020 		    IFM_ETHER | IFM_1000_T, 0, NULL);
   3021 	}
   3022 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
   3023 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
   3024 
   3025 	return (0);
   3026 }
   3027 
   3028 static void
   3029 ixgbe_config_link(struct adapter *adapter)
   3030 {
   3031 	struct ixgbe_hw *hw = &adapter->hw;
   3032 	u32	autoneg, err = 0;
   3033 	bool	sfp, negotiate;
   3034 
   3035 	sfp = ixgbe_is_sfp(hw);
   3036 
   3037 	if (sfp) {
   3038 		void *ip;
   3039 
   3040 		if (hw->phy.multispeed_fiber) {
   3041 			hw->mac.ops.setup_sfp(hw);
   3042 			ixgbe_enable_tx_laser(hw);
   3043 			ip = adapter->msf_si;
   3044 		} else {
   3045 			ip = adapter->mod_si;
   3046 		}
   3047 
   3048 		kpreempt_disable();
   3049 		softint_schedule(ip);
   3050 		kpreempt_enable();
   3051 	} else {
   3052 		if (hw->mac.ops.check_link)
   3053 			err = ixgbe_check_link(hw, &adapter->link_speed,
   3054 			    &adapter->link_up, FALSE);
   3055 		if (err)
   3056 			goto out;
   3057 		autoneg = hw->phy.autoneg_advertised;
   3058 		if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
   3059                 	err  = hw->mac.ops.get_link_capabilities(hw,
   3060 			    &autoneg, &negotiate);
   3061 		else
   3062 			negotiate = 0;
   3063 		if (err)
   3064 			goto out;
   3065 		if (hw->mac.ops.setup_link)
   3066                 	err = hw->mac.ops.setup_link(hw,
   3067 			    autoneg, adapter->link_up);
   3068 	}
   3069 out:
   3070 	return;
   3071 }
   3072 
   3073 /********************************************************************
   3074  * Manage DMA'able memory.
   3075  *******************************************************************/
   3076 
   3077 static int
   3078 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
   3079 		struct ixgbe_dma_alloc *dma, const int mapflags)
   3080 {
   3081 	device_t dev = adapter->dev;
   3082 	int             r, rsegs;
   3083 
   3084 	r = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
   3085 			       DBA_ALIGN, 0,	/* alignment, bounds */
   3086 			       size,	/* maxsize */
   3087 			       1,	/* nsegments */
   3088 			       size,	/* maxsegsize */
   3089 			       BUS_DMA_ALLOCNOW,	/* flags */
   3090 			       &dma->dma_tag);
   3091 	if (r != 0) {
   3092 		aprint_error_dev(dev,
   3093 		    "%s: ixgbe_dma_tag_create failed; error %d\n", __func__, r);
   3094 		goto fail_0;
   3095 	}
   3096 
   3097 	r = bus_dmamem_alloc(dma->dma_tag->dt_dmat,
   3098 		size,
   3099 		dma->dma_tag->dt_alignment,
   3100 		dma->dma_tag->dt_boundary,
   3101 		&dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
   3102 	if (r != 0) {
   3103 		aprint_error_dev(dev,
   3104 		    "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
   3105 		goto fail_1;
   3106 	}
   3107 
   3108 	r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
   3109 	    size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
   3110 	if (r != 0) {
   3111 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   3112 		    __func__, r);
   3113 		goto fail_2;
   3114 	}
   3115 
   3116 	r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
   3117 	if (r != 0) {
   3118 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   3119 		    __func__, r);
   3120 		goto fail_3;
   3121 	}
   3122 
   3123 	r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map, dma->dma_vaddr,
   3124 			    size,
   3125 			    NULL,
   3126 			    mapflags | BUS_DMA_NOWAIT);
   3127 	if (r != 0) {
   3128 		aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
   3129 		    __func__, r);
   3130 		goto fail_4;
   3131 	}
   3132 	dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
   3133 	dma->dma_size = size;
   3134 	return 0;
   3135 fail_4:
   3136 	ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
   3137 fail_3:
   3138 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
   3139 fail_2:
   3140 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
   3141 fail_1:
   3142 	ixgbe_dma_tag_destroy(dma->dma_tag);
   3143 fail_0:
   3144 	return r;
   3145 }
   3146 
   3147 static void
   3148 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
   3149 {
   3150 	bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
   3151 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   3152 	ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
   3153 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
   3154 	ixgbe_dma_tag_destroy(dma->dma_tag);
   3155 }
   3156 
   3157 
   3158 /*********************************************************************
   3159  *
   3160  *  Allocate memory for the transmit and receive rings, and then
   3161  *  the descriptors associated with each, called only once at attach.
   3162  *
   3163  **********************************************************************/
   3164 static int
   3165 ixgbe_allocate_queues(struct adapter *adapter)
   3166 {
   3167 	device_t	dev = adapter->dev;
   3168 	struct ix_queue	*que;
   3169 	struct tx_ring	*txr;
   3170 	struct rx_ring	*rxr;
   3171 	int rsize, tsize, error = IXGBE_SUCCESS;
   3172 	int txconf = 0, rxconf = 0;
   3173 
   3174         /* First allocate the top level queue structs */
   3175         if (!(adapter->queues =
   3176             (struct ix_queue *) malloc(sizeof(struct ix_queue) *
   3177             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   3178                 aprint_error_dev(dev, "Unable to allocate queue memory\n");
   3179                 error = ENOMEM;
   3180                 goto fail;
   3181         }
   3182 
   3183 	/* First allocate the TX ring struct memory */
   3184 	if (!(adapter->tx_rings =
   3185 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
   3186 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   3187 		aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
   3188 		error = ENOMEM;
   3189 		goto tx_fail;
   3190 	}
   3191 
   3192 	/* Next allocate the RX */
   3193 	if (!(adapter->rx_rings =
   3194 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
   3195 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   3196 		aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
   3197 		error = ENOMEM;
   3198 		goto rx_fail;
   3199 	}
   3200 
   3201 	/* For the ring itself */
   3202 	tsize = roundup2(adapter->num_tx_desc *
   3203 	    sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
   3204 
   3205 	/*
   3206 	 * Now set up the TX queues, txconf is needed to handle the
   3207 	 * possibility that things fail midcourse and we need to
   3208 	 * undo memory gracefully
   3209 	 */
   3210 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
   3211 		/* Set up some basics */
   3212 		txr = &adapter->tx_rings[i];
   3213 		txr->adapter = adapter;
   3214 		txr->me = i;
   3215 		txr->num_desc = adapter->num_tx_desc;
   3216 
   3217 		/* Initialize the TX side lock */
   3218 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
   3219 		    device_xname(dev), txr->me);
   3220 		mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
   3221 
   3222 		if (ixgbe_dma_malloc(adapter, tsize,
   3223 			&txr->txdma, BUS_DMA_NOWAIT)) {
   3224 			aprint_error_dev(dev,
   3225 			    "Unable to allocate TX Descriptor memory\n");
   3226 			error = ENOMEM;
   3227 			goto err_tx_desc;
   3228 		}
   3229 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
   3230 		bzero((void *)txr->tx_base, tsize);
   3231 
   3232         	/* Now allocate transmit buffers for the ring */
   3233         	if (ixgbe_allocate_transmit_buffers(txr)) {
   3234 			aprint_error_dev(dev,
   3235 			    "Critical Failure setting up transmit buffers\n");
   3236 			error = ENOMEM;
   3237 			goto err_tx_desc;
   3238         	}
   3239 #ifndef IXGBE_LEGACY_TX
   3240 		/* Allocate a buf ring */
   3241 		txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
   3242 		    M_WAITOK, &txr->tx_mtx);
   3243 		if (txr->br == NULL) {
   3244 			aprint_error_dev(dev,
   3245 			    "Critical Failure setting up buf ring\n");
   3246 			error = ENOMEM;
   3247 			goto err_tx_desc;
   3248         	}
   3249 #endif
   3250 	}
   3251 
   3252 	/*
   3253 	 * Next the RX queues...
   3254 	 */
   3255 	rsize = roundup2(adapter->num_rx_desc *
   3256 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   3257 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
   3258 		rxr = &adapter->rx_rings[i];
   3259 		/* Set up some basics */
   3260 		rxr->adapter = adapter;
   3261 		rxr->me = i;
   3262 		rxr->num_desc = adapter->num_rx_desc;
   3263 
   3264 		/* Initialize the RX side lock */
   3265 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
   3266 		    device_xname(dev), rxr->me);
   3267 		mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
   3268 
   3269 		if (ixgbe_dma_malloc(adapter, rsize,
   3270 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
   3271 			aprint_error_dev(dev,
   3272 			    "Unable to allocate RxDescriptor memory\n");
   3273 			error = ENOMEM;
   3274 			goto err_rx_desc;
   3275 		}
   3276 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
   3277 		bzero((void *)rxr->rx_base, rsize);
   3278 
   3279         	/* Allocate receive buffers for the ring*/
   3280 		if (ixgbe_allocate_receive_buffers(rxr)) {
   3281 			aprint_error_dev(dev,
   3282 			    "Critical Failure setting up receive buffers\n");
   3283 			error = ENOMEM;
   3284 			goto err_rx_desc;
   3285 		}
   3286 	}
   3287 
   3288 	/*
   3289 	** Finally set up the queue holding structs
   3290 	*/
   3291 	for (int i = 0; i < adapter->num_queues; i++) {
   3292 		que = &adapter->queues[i];
   3293 		que->adapter = adapter;
   3294 		que->txr = &adapter->tx_rings[i];
   3295 		que->rxr = &adapter->rx_rings[i];
   3296 	}
   3297 
   3298 	return (0);
   3299 
   3300 err_rx_desc:
   3301 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
   3302 		ixgbe_dma_free(adapter, &rxr->rxdma);
   3303 err_tx_desc:
   3304 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
   3305 		ixgbe_dma_free(adapter, &txr->txdma);
   3306 	free(adapter->rx_rings, M_DEVBUF);
   3307 rx_fail:
   3308 	free(adapter->tx_rings, M_DEVBUF);
   3309 tx_fail:
   3310 	free(adapter->queues, M_DEVBUF);
   3311 fail:
   3312 	return (error);
   3313 }
   3314 
   3315 /*********************************************************************
   3316  *
   3317  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
   3318  *  the information needed to transmit a packet on the wire. This is
   3319  *  called only once at attach, setup is done every reset.
   3320  *
   3321  **********************************************************************/
   3322 static int
   3323 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
   3324 {
   3325 	struct adapter *adapter = txr->adapter;
   3326 	device_t dev = adapter->dev;
   3327 	struct ixgbe_tx_buf *txbuf;
   3328 	int error, i;
   3329 
   3330 	/*
   3331 	 * Setup DMA descriptor areas.
   3332 	 */
   3333 	if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
   3334 			       1, 0,		/* alignment, bounds */
   3335 			       IXGBE_TSO_SIZE,		/* maxsize */
   3336 			       adapter->num_segs,	/* nsegments */
   3337 			       PAGE_SIZE,		/* maxsegsize */
   3338 			       0,			/* flags */
   3339 			       &txr->txtag))) {
   3340 		aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
   3341 		goto fail;
   3342 	}
   3343 
   3344 	if (!(txr->tx_buffers =
   3345 	    (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
   3346 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   3347 		aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
   3348 		error = ENOMEM;
   3349 		goto fail;
   3350 	}
   3351 
   3352         /* Create the descriptor buffer dma maps */
   3353 	txbuf = txr->tx_buffers;
   3354 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
   3355 		error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
   3356 		if (error != 0) {
   3357 			aprint_error_dev(dev,
   3358 			    "Unable to create TX DMA map (%d)\n", error);
   3359 			goto fail;
   3360 		}
   3361 	}
   3362 
   3363 	return 0;
   3364 fail:
   3365 	/* We free all, it handles case where we are in the middle */
   3366 	ixgbe_free_transmit_structures(adapter);
   3367 	return (error);
   3368 }
   3369 
   3370 /*********************************************************************
   3371  *
   3372  *  Initialize a transmit ring.
   3373  *
   3374  **********************************************************************/
   3375 static void
   3376 ixgbe_setup_transmit_ring(struct tx_ring *txr)
   3377 {
   3378 	struct adapter *adapter = txr->adapter;
   3379 	struct ixgbe_tx_buf *txbuf;
   3380 	int i;
   3381 #ifdef DEV_NETMAP
   3382 	struct netmap_adapter *na = NA(adapter->ifp);
   3383 	struct netmap_slot *slot;
   3384 #endif /* DEV_NETMAP */
   3385 
   3386 	/* Clear the old ring contents */
   3387 	IXGBE_TX_LOCK(txr);
   3388 #ifdef DEV_NETMAP
   3389 	/*
   3390 	 * (under lock): if in netmap mode, do some consistency
   3391 	 * checks and set slot to entry 0 of the netmap ring.
   3392 	 */
   3393 	slot = netmap_reset(na, NR_TX, txr->me, 0);
   3394 #endif /* DEV_NETMAP */
   3395 	bzero((void *)txr->tx_base,
   3396 	      (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
   3397 	/* Reset indices */
   3398 	txr->next_avail_desc = 0;
   3399 	txr->next_to_clean = 0;
   3400 
   3401 	/* Free any existing tx buffers. */
   3402         txbuf = txr->tx_buffers;
   3403 	for (i = 0; i < txr->num_desc; i++, txbuf++) {
   3404 		if (txbuf->m_head != NULL) {
   3405 			bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
   3406 			    0, txbuf->m_head->m_pkthdr.len,
   3407 			    BUS_DMASYNC_POSTWRITE);
   3408 			ixgbe_dmamap_unload(txr->txtag, txbuf->map);
   3409 			m_freem(txbuf->m_head);
   3410 			txbuf->m_head = NULL;
   3411 		}
   3412 #ifdef DEV_NETMAP
   3413 		/*
   3414 		 * In netmap mode, set the map for the packet buffer.
   3415 		 * NOTE: Some drivers (not this one) also need to set
   3416 		 * the physical buffer address in the NIC ring.
   3417 		 * Slots in the netmap ring (indexed by "si") are
   3418 		 * kring->nkr_hwofs positions "ahead" wrt the
   3419 		 * corresponding slot in the NIC ring. In some drivers
   3420 		 * (not here) nkr_hwofs can be negative. Function
   3421 		 * netmap_idx_n2k() handles wraparounds properly.
   3422 		 */
   3423 		if (slot) {
   3424 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
   3425 			netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
   3426 		}
   3427 #endif /* DEV_NETMAP */
   3428 		/* Clear the EOP descriptor pointer */
   3429 		txbuf->eop = NULL;
   3430         }
   3431 
   3432 #ifdef IXGBE_FDIR
   3433 	/* Set the rate at which we sample packets */
   3434 	if (adapter->hw.mac.type != ixgbe_mac_82598EB)
   3435 		txr->atr_sample = atr_sample_rate;
   3436 #endif
   3437 
   3438 	/* Set number of descriptors available */
   3439 	txr->tx_avail = adapter->num_tx_desc;
   3440 
   3441 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   3442 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   3443 	IXGBE_TX_UNLOCK(txr);
   3444 }
   3445 
   3446 /*********************************************************************
   3447  *
   3448  *  Initialize all transmit rings.
   3449  *
   3450  **********************************************************************/
   3451 static int
   3452 ixgbe_setup_transmit_structures(struct adapter *adapter)
   3453 {
   3454 	struct tx_ring *txr = adapter->tx_rings;
   3455 
   3456 	for (int i = 0; i < adapter->num_queues; i++, txr++)
   3457 		ixgbe_setup_transmit_ring(txr);
   3458 
   3459 	return (0);
   3460 }
   3461 
   3462 /*********************************************************************
   3463  *
   3464  *  Enable transmit unit.
   3465  *
   3466  **********************************************************************/
   3467 static void
   3468 ixgbe_initialize_transmit_units(struct adapter *adapter)
   3469 {
   3470 	struct tx_ring	*txr = adapter->tx_rings;
   3471 	struct ixgbe_hw	*hw = &adapter->hw;
   3472 
   3473 	/* Setup the Base and Length of the Tx Descriptor Ring */
   3474 
   3475 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
   3476 		u64	tdba = txr->txdma.dma_paddr;
   3477 		u32	txctrl;
   3478 
   3479 		IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i),
   3480 		       (tdba & 0x00000000ffffffffULL));
   3481 		IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (tdba >> 32));
   3482 		IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
   3483 		    adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc));
   3484 
   3485 		/* Setup the HW Tx Head and Tail descriptor pointers */
   3486 		IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
   3487 		IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
   3488 
   3489 		/* Setup Transmit Descriptor Cmd Settings */
   3490 		txr->txd_cmd = IXGBE_TXD_CMD_IFCS;
   3491 		txr->queue_status = IXGBE_QUEUE_IDLE;
   3492 
   3493 		/* Set the processing limit */
   3494 		txr->process_limit = ixgbe_tx_process_limit;
   3495 
   3496 		/* Disable Head Writeback */
   3497 		switch (hw->mac.type) {
   3498 		case ixgbe_mac_82598EB:
   3499 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
   3500 			break;
   3501 		case ixgbe_mac_82599EB:
   3502 		case ixgbe_mac_X540:
   3503 		default:
   3504 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
   3505 			break;
   3506                 }
   3507 		txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
   3508 		switch (hw->mac.type) {
   3509 		case ixgbe_mac_82598EB:
   3510 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
   3511 			break;
   3512 		case ixgbe_mac_82599EB:
   3513 		case ixgbe_mac_X540:
   3514 		default:
   3515 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
   3516 			break;
   3517 		}
   3518 
   3519 	}
   3520 
   3521 	if (hw->mac.type != ixgbe_mac_82598EB) {
   3522 		u32 dmatxctl, rttdcs;
   3523 		dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
   3524 		dmatxctl |= IXGBE_DMATXCTL_TE;
   3525 		IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
   3526 		/* Disable arbiter to set MTQC */
   3527 		rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
   3528 		rttdcs |= IXGBE_RTTDCS_ARBDIS;
   3529 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
   3530 		IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
   3531 		rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
   3532 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
   3533 	}
   3534 
   3535 	return;
   3536 }
   3537 
   3538 /*********************************************************************
   3539  *
   3540  *  Free all transmit rings.
   3541  *
   3542  **********************************************************************/
   3543 static void
   3544 ixgbe_free_transmit_structures(struct adapter *adapter)
   3545 {
   3546 	struct tx_ring *txr = adapter->tx_rings;
   3547 
   3548 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
   3549 		ixgbe_free_transmit_buffers(txr);
   3550 		ixgbe_dma_free(adapter, &txr->txdma);
   3551 		IXGBE_TX_LOCK_DESTROY(txr);
   3552 	}
   3553 	free(adapter->tx_rings, M_DEVBUF);
   3554 }
   3555 
   3556 /*********************************************************************
   3557  *
   3558  *  Free transmit ring related data structures.
   3559  *
   3560  **********************************************************************/
   3561 static void
   3562 ixgbe_free_transmit_buffers(struct tx_ring *txr)
   3563 {
   3564 	struct adapter *adapter = txr->adapter;
   3565 	struct ixgbe_tx_buf *tx_buffer;
   3566 	int             i;
   3567 
   3568 	INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
   3569 
   3570 	if (txr->tx_buffers == NULL)
   3571 		return;
   3572 
   3573 	tx_buffer = txr->tx_buffers;
   3574 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
   3575 		if (tx_buffer->m_head != NULL) {
   3576 			bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
   3577 			    0, tx_buffer->m_head->m_pkthdr.len,
   3578 			    BUS_DMASYNC_POSTWRITE);
   3579 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
   3580 			m_freem(tx_buffer->m_head);
   3581 			tx_buffer->m_head = NULL;
   3582 			if (tx_buffer->map != NULL) {
   3583 				ixgbe_dmamap_destroy(txr->txtag,
   3584 				    tx_buffer->map);
   3585 				tx_buffer->map = NULL;
   3586 			}
   3587 		} else if (tx_buffer->map != NULL) {
   3588 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
   3589 			ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
   3590 			tx_buffer->map = NULL;
   3591 		}
   3592 	}
   3593 #ifndef IXGBE_LEGACY_TX
   3594 	if (txr->br != NULL)
   3595 		buf_ring_free(txr->br, M_DEVBUF);
   3596 #endif
   3597 	if (txr->tx_buffers != NULL) {
   3598 		free(txr->tx_buffers, M_DEVBUF);
   3599 		txr->tx_buffers = NULL;
   3600 	}
   3601 	if (txr->txtag != NULL) {
   3602 		ixgbe_dma_tag_destroy(txr->txtag);
   3603 		txr->txtag = NULL;
   3604 	}
   3605 	return;
   3606 }
   3607 
   3608 /*********************************************************************
   3609  *
   3610  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
   3611  *
   3612  **********************************************************************/
   3613 
   3614 static int
   3615 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
   3616     u32 *cmd_type_len, u32 *olinfo_status)
   3617 {
   3618 	struct m_tag *mtag;
   3619 	struct adapter *adapter = txr->adapter;
   3620 	struct ethercom *ec = &adapter->osdep.ec;
   3621 	struct ixgbe_adv_tx_context_desc *TXD;
   3622 	struct ether_vlan_header *eh;
   3623 	struct ip ip;
   3624 	struct ip6_hdr ip6;
   3625 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
   3626 	int	ehdrlen, ip_hlen = 0;
   3627 	u16	etype;
   3628 	u8	ipproto __diagused = 0;
   3629 	int	offload = TRUE;
   3630 	int	ctxd = txr->next_avail_desc;
   3631 	u16	vtag = 0;
   3632 
   3633 	/* First check if TSO is to be used */
   3634 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4|M_CSUM_TSOv6))
   3635 		return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
   3636 
   3637 	if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
   3638 		offload = FALSE;
   3639 
   3640 	/* Indicate the whole packet as payload when not doing TSO */
   3641        	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
   3642 
   3643 	/* Now ready a context descriptor */
   3644 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
   3645 
   3646 	/*
   3647 	** In advanced descriptors the vlan tag must
   3648 	** be placed into the context descriptor. Hence
   3649 	** we need to make one even if not doing offloads.
   3650 	*/
   3651 	if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
   3652 		vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
   3653 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
   3654 	} else if (offload == FALSE) /* ... no offload to do */
   3655 		return 0;
   3656 
   3657 	/*
   3658 	 * Determine where frame payload starts.
   3659 	 * Jump over vlan headers if already present,
   3660 	 * helpful for QinQ too.
   3661 	 */
   3662 	KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
   3663 	eh = mtod(mp, struct ether_vlan_header *);
   3664 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   3665 		KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
   3666 		etype = ntohs(eh->evl_proto);
   3667 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   3668 	} else {
   3669 		etype = ntohs(eh->evl_encap_proto);
   3670 		ehdrlen = ETHER_HDR_LEN;
   3671 	}
   3672 
   3673 	/* Set the ether header length */
   3674 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
   3675 
   3676 	switch (etype) {
   3677 	case ETHERTYPE_IP:
   3678 		m_copydata(mp, ehdrlen, sizeof(ip), &ip);
   3679 		ip_hlen = ip.ip_hl << 2;
   3680 		ipproto = ip.ip_p;
   3681 #if 0
   3682 		ip.ip_sum = 0;
   3683 		m_copyback(mp, ehdrlen, sizeof(ip), &ip);
   3684 #else
   3685 		KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
   3686 		    ip.ip_sum == 0);
   3687 #endif
   3688 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
   3689 		break;
   3690 	case ETHERTYPE_IPV6:
   3691 		m_copydata(mp, ehdrlen, sizeof(ip6), &ip6);
   3692 		ip_hlen = sizeof(ip6);
   3693 		/* XXX-BZ this will go badly in case of ext hdrs. */
   3694 		ipproto = ip6.ip6_nxt;
   3695 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
   3696 		break;
   3697 	default:
   3698 		break;
   3699 	}
   3700 
   3701 	if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
   3702 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
   3703 
   3704 	vlan_macip_lens |= ip_hlen;
   3705 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
   3706 
   3707 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_TCPv6)) {
   3708 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
   3709 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   3710 		KASSERT(ipproto == IPPROTO_TCP);
   3711 	} else if (mp->m_pkthdr.csum_flags & (M_CSUM_UDPv4|M_CSUM_UDPv6)) {
   3712 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
   3713 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   3714 		KASSERT(ipproto == IPPROTO_UDP);
   3715 	}
   3716 
   3717 	/* Now copy bits into descriptor */
   3718 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
   3719 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
   3720 	TXD->seqnum_seed = htole32(0);
   3721 	TXD->mss_l4len_idx = htole32(0);
   3722 
   3723 	/* We've consumed the first desc, adjust counters */
   3724 	if (++ctxd == txr->num_desc)
   3725 		ctxd = 0;
   3726 	txr->next_avail_desc = ctxd;
   3727 	--txr->tx_avail;
   3728 
   3729         return 0;
   3730 }
   3731 
   3732 /**********************************************************************
   3733  *
   3734  *  Setup work for hardware segmentation offload (TSO) on
   3735  *  adapters using advanced tx descriptors
   3736  *
   3737  **********************************************************************/
   3738 static int
   3739 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
   3740     u32 *cmd_type_len, u32 *olinfo_status)
   3741 {
   3742 	struct m_tag *mtag;
   3743 	struct adapter *adapter = txr->adapter;
   3744 	struct ethercom *ec = &adapter->osdep.ec;
   3745 	struct ixgbe_adv_tx_context_desc *TXD;
   3746 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
   3747 	u32 mss_l4len_idx = 0, paylen;
   3748 	u16 vtag = 0, eh_type;
   3749 	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
   3750 	struct ether_vlan_header *eh;
   3751 #ifdef INET6
   3752 	struct ip6_hdr *ip6;
   3753 #endif
   3754 #ifdef INET
   3755 	struct ip *ip;
   3756 #endif
   3757 	struct tcphdr *th;
   3758 
   3759 
   3760 	/*
   3761 	 * Determine where frame payload starts.
   3762 	 * Jump over vlan headers if already present
   3763 	 */
   3764 	eh = mtod(mp, struct ether_vlan_header *);
   3765 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   3766 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   3767 		eh_type = eh->evl_proto;
   3768 	} else {
   3769 		ehdrlen = ETHER_HDR_LEN;
   3770 		eh_type = eh->evl_encap_proto;
   3771 	}
   3772 
   3773 	switch (ntohs(eh_type)) {
   3774 #ifdef INET6
   3775 	case ETHERTYPE_IPV6:
   3776 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   3777 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
   3778 		if (ip6->ip6_nxt != IPPROTO_TCP)
   3779 			return (ENXIO);
   3780 		ip_hlen = sizeof(struct ip6_hdr);
   3781 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   3782 		th = (struct tcphdr *)((char *)ip6 + ip_hlen);
   3783 		th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
   3784 		    &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
   3785 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
   3786 		break;
   3787 #endif
   3788 #ifdef INET
   3789 	case ETHERTYPE_IP:
   3790 		ip = (struct ip *)(mp->m_data + ehdrlen);
   3791 		if (ip->ip_p != IPPROTO_TCP)
   3792 			return (ENXIO);
   3793 		ip->ip_sum = 0;
   3794 		ip_hlen = ip->ip_hl << 2;
   3795 		th = (struct tcphdr *)((char *)ip + ip_hlen);
   3796 		th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
   3797 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
   3798 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
   3799 		/* Tell transmit desc to also do IPv4 checksum. */
   3800 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
   3801 		break;
   3802 #endif
   3803 	default:
   3804 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
   3805 		    __func__, ntohs(eh_type));
   3806 		break;
   3807 	}
   3808 
   3809 	ctxd = txr->next_avail_desc;
   3810 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
   3811 
   3812 	tcp_hlen = th->th_off << 2;
   3813 
   3814 	/* This is used in the transmit desc in encap */
   3815 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
   3816 
   3817 	/* VLAN MACLEN IPLEN */
   3818 	if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
   3819 		vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
   3820                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
   3821 	}
   3822 
   3823 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
   3824 	vlan_macip_lens |= ip_hlen;
   3825 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
   3826 
   3827 	/* ADV DTYPE TUCMD */
   3828 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
   3829 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
   3830 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
   3831 
   3832 	/* MSS L4LEN IDX */
   3833 	mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
   3834 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
   3835 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
   3836 
   3837 	TXD->seqnum_seed = htole32(0);
   3838 
   3839 	if (++ctxd == txr->num_desc)
   3840 		ctxd = 0;
   3841 
   3842 	txr->tx_avail--;
   3843 	txr->next_avail_desc = ctxd;
   3844 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
   3845 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   3846 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
   3847 	++txr->tso_tx.ev_count;
   3848 	return (0);
   3849 }
   3850 
   3851 #ifdef IXGBE_FDIR
   3852 /*
   3853 ** This routine parses packet headers so that Flow
   3854 ** Director can make a hashed filter table entry
   3855 ** allowing traffic flows to be identified and kept
   3856 ** on the same cpu.  This would be a performance
   3857 ** hit, but we only do it at IXGBE_FDIR_RATE of
   3858 ** packets.
   3859 */
   3860 static void
   3861 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
   3862 {
   3863 	struct adapter			*adapter = txr->adapter;
   3864 	struct ix_queue			*que;
   3865 	struct ip			*ip;
   3866 	struct tcphdr			*th;
   3867 	struct udphdr			*uh;
   3868 	struct ether_vlan_header	*eh;
   3869 	union ixgbe_atr_hash_dword	input = {.dword = 0};
   3870 	union ixgbe_atr_hash_dword	common = {.dword = 0};
   3871 	int  				ehdrlen, ip_hlen;
   3872 	u16				etype;
   3873 
   3874 	eh = mtod(mp, struct ether_vlan_header *);
   3875 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   3876 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   3877 		etype = eh->evl_proto;
   3878 	} else {
   3879 		ehdrlen = ETHER_HDR_LEN;
   3880 		etype = eh->evl_encap_proto;
   3881 	}
   3882 
   3883 	/* Only handling IPv4 */
   3884 	if (etype != htons(ETHERTYPE_IP))
   3885 		return;
   3886 
   3887 	ip = (struct ip *)(mp->m_data + ehdrlen);
   3888 	ip_hlen = ip->ip_hl << 2;
   3889 
   3890 	/* check if we're UDP or TCP */
   3891 	switch (ip->ip_p) {
   3892 	case IPPROTO_TCP:
   3893 		th = (struct tcphdr *)((char *)ip + ip_hlen);
   3894 		/* src and dst are inverted */
   3895 		common.port.dst ^= th->th_sport;
   3896 		common.port.src ^= th->th_dport;
   3897 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
   3898 		break;
   3899 	case IPPROTO_UDP:
   3900 		uh = (struct udphdr *)((char *)ip + ip_hlen);
   3901 		/* src and dst are inverted */
   3902 		common.port.dst ^= uh->uh_sport;
   3903 		common.port.src ^= uh->uh_dport;
   3904 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
   3905 		break;
   3906 	default:
   3907 		return;
   3908 	}
   3909 
   3910 	input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
   3911 	if (mp->m_pkthdr.ether_vtag)
   3912 		common.flex_bytes ^= htons(ETHERTYPE_VLAN);
   3913 	else
   3914 		common.flex_bytes ^= etype;
   3915 	common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
   3916 
   3917 	que = &adapter->queues[txr->me];
   3918 	/*
   3919 	** This assumes the Rx queue and Tx
   3920 	** queue are bound to the same CPU
   3921 	*/
   3922 	ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
   3923 	    input, common, que->msix);
   3924 }
   3925 #endif /* IXGBE_FDIR */
   3926 
   3927 /**********************************************************************
   3928  *
   3929  *  Examine each tx_buffer in the used queue. If the hardware is done
   3930  *  processing the packet then free associated resources. The
   3931  *  tx_buffer is put back on the free queue.
   3932  *
   3933  **********************************************************************/
   3934 static void
   3935 ixgbe_txeof(struct tx_ring *txr)
   3936 {
   3937 	struct adapter		*adapter = txr->adapter;
   3938 	struct ifnet		*ifp = adapter->ifp;
   3939 	u32			work, processed = 0;
   3940 	u16			limit = txr->process_limit;
   3941 	struct ixgbe_tx_buf	*buf;
   3942 	union ixgbe_adv_tx_desc *txd;
   3943 	struct timeval now, elapsed;
   3944 
   3945 	KASSERT(mutex_owned(&txr->tx_mtx));
   3946 
   3947 #ifdef DEV_NETMAP
   3948 	if (ifp->if_capenable & IFCAP_NETMAP) {
   3949 		struct netmap_adapter *na = NA(ifp);
   3950 		struct netmap_kring *kring = &na->tx_rings[txr->me];
   3951 		txd = txr->tx_base;
   3952 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   3953 		    BUS_DMASYNC_POSTREAD);
   3954 		/*
   3955 		 * In netmap mode, all the work is done in the context
   3956 		 * of the client thread. Interrupt handlers only wake up
   3957 		 * clients, which may be sleeping on individual rings
   3958 		 * or on a global resource for all rings.
   3959 		 * To implement tx interrupt mitigation, we wake up the client
   3960 		 * thread roughly every half ring, even if the NIC interrupts
   3961 		 * more frequently. This is implemented as follows:
   3962 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
   3963 		 *   the slot that should wake up the thread (nkr_num_slots
   3964 		 *   means the user thread should not be woken up);
   3965 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
   3966 		 *   or the slot has the DD bit set.
   3967 		 */
   3968 		if (!netmap_mitigate ||
   3969 		    (kring->nr_kflags < kring->nkr_num_slots &&
   3970 		    txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
   3971 			netmap_tx_irq(ifp, txr->me);
   3972 		}
   3973 		return;
   3974 	}
   3975 #endif /* DEV_NETMAP */
   3976 
   3977 	if (txr->tx_avail == txr->num_desc) {
   3978 		txr->queue_status = IXGBE_QUEUE_IDLE;
   3979 		return;
   3980 	}
   3981 
   3982 	/* Get work starting point */
   3983 	work = txr->next_to_clean;
   3984 	buf = &txr->tx_buffers[work];
   3985 	txd = &txr->tx_base[work];
   3986 	work -= txr->num_desc; /* The distance to ring end */
   3987         ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   3988 	    BUS_DMASYNC_POSTREAD);
   3989 	do {
   3990 		union ixgbe_adv_tx_desc *eop= buf->eop;
   3991 		if (eop == NULL) /* No work */
   3992 			break;
   3993 
   3994 		if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
   3995 			break;	/* I/O not complete */
   3996 
   3997 		if (buf->m_head) {
   3998 			txr->bytes +=
   3999 			    buf->m_head->m_pkthdr.len;
   4000 			bus_dmamap_sync(txr->txtag->dt_dmat,
   4001 			    buf->map,
   4002 			    0, buf->m_head->m_pkthdr.len,
   4003 			    BUS_DMASYNC_POSTWRITE);
   4004 			ixgbe_dmamap_unload(txr->txtag,
   4005 			    buf->map);
   4006 			m_freem(buf->m_head);
   4007 			buf->m_head = NULL;
   4008 			/*
   4009 			 * NetBSD: Don't override buf->map with NULL here.
   4010 			 * It'll panic when a ring runs one lap around.
   4011 			 */
   4012 		}
   4013 		buf->eop = NULL;
   4014 		++txr->tx_avail;
   4015 
   4016 		/* We clean the range if multi segment */
   4017 		while (txd != eop) {
   4018 			++txd;
   4019 			++buf;
   4020 			++work;
   4021 			/* wrap the ring? */
   4022 			if (__predict_false(!work)) {
   4023 				work -= txr->num_desc;
   4024 				buf = txr->tx_buffers;
   4025 				txd = txr->tx_base;
   4026 			}
   4027 			if (buf->m_head) {
   4028 				txr->bytes +=
   4029 				    buf->m_head->m_pkthdr.len;
   4030 				bus_dmamap_sync(txr->txtag->dt_dmat,
   4031 				    buf->map,
   4032 				    0, buf->m_head->m_pkthdr.len,
   4033 				    BUS_DMASYNC_POSTWRITE);
   4034 				ixgbe_dmamap_unload(txr->txtag,
   4035 				    buf->map);
   4036 				m_freem(buf->m_head);
   4037 				buf->m_head = NULL;
   4038 				/*
   4039 				 * NetBSD: Don't override buf->map with NULL
   4040 				 * here. It'll panic when a ring runs one lap
   4041 				 * around.
   4042 				 */
   4043 			}
   4044 			++txr->tx_avail;
   4045 			buf->eop = NULL;
   4046 
   4047 		}
   4048 		++txr->packets;
   4049 		++processed;
   4050 		++ifp->if_opackets;
   4051 		getmicrotime(&txr->watchdog_time);
   4052 
   4053 		/* Try the next packet */
   4054 		++txd;
   4055 		++buf;
   4056 		++work;
   4057 		/* reset with a wrap */
   4058 		if (__predict_false(!work)) {
   4059 			work -= txr->num_desc;
   4060 			buf = txr->tx_buffers;
   4061 			txd = txr->tx_base;
   4062 		}
   4063 		prefetch(txd);
   4064 	} while (__predict_true(--limit));
   4065 
   4066 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   4067 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   4068 
   4069 	work += txr->num_desc;
   4070 	txr->next_to_clean = work;
   4071 
   4072 	/*
   4073 	** Watchdog calculation, we know there's
   4074 	** work outstanding or the first return
   4075 	** would have been taken, so none processed
   4076 	** for too long indicates a hang.
   4077 	*/
   4078 	getmicrotime(&now);
   4079 	timersub(&now, &txr->watchdog_time, &elapsed);
   4080 	if (!processed && tvtohz(&elapsed) > IXGBE_WATCHDOG)
   4081 		txr->queue_status = IXGBE_QUEUE_HUNG;
   4082 
   4083 	if (txr->tx_avail == txr->num_desc)
   4084 		txr->queue_status = IXGBE_QUEUE_IDLE;
   4085 
   4086 	return;
   4087 }
   4088 
   4089 /*********************************************************************
   4090  *
   4091  *  Refresh mbuf buffers for RX descriptor rings
   4092  *   - now keeps its own state so discards due to resource
   4093  *     exhaustion are unnecessary, if an mbuf cannot be obtained
   4094  *     it just returns, keeping its placeholder, thus it can simply
   4095  *     be recalled to try again.
   4096  *
   4097  **********************************************************************/
   4098 static void
   4099 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
   4100 {
   4101 	struct adapter		*adapter = rxr->adapter;
   4102 	struct ixgbe_rx_buf	*rxbuf;
   4103 	struct mbuf		*mp;
   4104 	int			i, j, error;
   4105 	bool			refreshed = false;
   4106 
   4107 	i = j = rxr->next_to_refresh;
   4108 	/* Control the loop with one beyond */
   4109 	if (++j == rxr->num_desc)
   4110 		j = 0;
   4111 
   4112 	while (j != limit) {
   4113 		rxbuf = &rxr->rx_buffers[i];
   4114 		if (rxbuf->buf == NULL) {
   4115 			mp = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
   4116 			    MT_DATA, M_PKTHDR, rxr->mbuf_sz);
   4117 			if (mp == NULL) {
   4118 				rxr->no_jmbuf.ev_count++;
   4119 				goto update;
   4120 			}
   4121 			if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
   4122 				m_adj(mp, ETHER_ALIGN);
   4123 		} else
   4124 			mp = rxbuf->buf;
   4125 
   4126 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   4127 
   4128 		/* If we're dealing with an mbuf that was copied rather
   4129 		 * than replaced, there's no need to go through busdma.
   4130 		 */
   4131 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
   4132 			/* Get the memory mapping */
   4133 			error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   4134 			    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   4135 			if (error != 0) {
   4136 				printf("Refresh mbufs: payload dmamap load"
   4137 				    " failure - %d\n", error);
   4138 				m_free(mp);
   4139 				rxbuf->buf = NULL;
   4140 				goto update;
   4141 			}
   4142 			rxbuf->buf = mp;
   4143 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   4144 			    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
   4145 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
   4146 			    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   4147 		} else {
   4148 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
   4149 			rxbuf->flags &= ~IXGBE_RX_COPY;
   4150 		}
   4151 
   4152 		refreshed = true;
   4153 		/* Next is precalculated */
   4154 		i = j;
   4155 		rxr->next_to_refresh = i;
   4156 		if (++j == rxr->num_desc)
   4157 			j = 0;
   4158 	}
   4159 update:
   4160 	if (refreshed) /* Update hardware tail index */
   4161 		IXGBE_WRITE_REG(&adapter->hw,
   4162 		    IXGBE_RDT(rxr->me), rxr->next_to_refresh);
   4163 	return;
   4164 }
   4165 
   4166 /*********************************************************************
   4167  *
   4168  *  Allocate memory for rx_buffer structures. Since we use one
   4169  *  rx_buffer per received packet, the maximum number of rx_buffer's
   4170  *  that we'll need is equal to the number of receive descriptors
   4171  *  that we've allocated.
   4172  *
   4173  **********************************************************************/
   4174 static int
   4175 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
   4176 {
   4177 	struct	adapter 	*adapter = rxr->adapter;
   4178 	device_t 		dev = adapter->dev;
   4179 	struct ixgbe_rx_buf 	*rxbuf;
   4180 	int             	i, bsize, error;
   4181 
   4182 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
   4183 	if (!(rxr->rx_buffers =
   4184 	    (struct ixgbe_rx_buf *) malloc(bsize,
   4185 	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
   4186 		aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
   4187 		error = ENOMEM;
   4188 		goto fail;
   4189 	}
   4190 
   4191 	if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
   4192 				   1, 0,	/* alignment, bounds */
   4193 				   MJUM16BYTES,		/* maxsize */
   4194 				   1,			/* nsegments */
   4195 				   MJUM16BYTES,		/* maxsegsize */
   4196 				   0,			/* flags */
   4197 				   &rxr->ptag))) {
   4198 		aprint_error_dev(dev, "Unable to create RX DMA tag\n");
   4199 		goto fail;
   4200 	}
   4201 
   4202 	for (i = 0; i < rxr->num_desc; i++, rxbuf++) {
   4203 		rxbuf = &rxr->rx_buffers[i];
   4204 		error = ixgbe_dmamap_create(rxr->ptag,
   4205 		    BUS_DMA_NOWAIT, &rxbuf->pmap);
   4206 		if (error) {
   4207 			aprint_error_dev(dev, "Unable to create RX dma map\n");
   4208 			goto fail;
   4209 		}
   4210 	}
   4211 
   4212 	return (0);
   4213 
   4214 fail:
   4215 	/* Frees all, but can handle partial completion */
   4216 	ixgbe_free_receive_structures(adapter);
   4217 	return (error);
   4218 }
   4219 
   4220 /*
   4221 ** Used to detect a descriptor that has
   4222 ** been merged by Hardware RSC.
   4223 */
   4224 static inline u32
   4225 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
   4226 {
   4227 	return (le32toh(rx->wb.lower.lo_dword.data) &
   4228 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
   4229 }
   4230 
   4231 /*********************************************************************
   4232  *
   4233  *  Initialize Hardware RSC (LRO) feature on 82599
   4234  *  for an RX ring, this is toggled by the LRO capability
   4235  *  even though it is transparent to the stack.
   4236  *
   4237  *  NOTE: since this HW feature only works with IPV4 and
   4238  *        our testing has shown soft LRO to be as effective
   4239  *        I have decided to disable this by default.
   4240  *
   4241  **********************************************************************/
   4242 static void
   4243 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
   4244 {
   4245 	struct	adapter 	*adapter = rxr->adapter;
   4246 	struct	ixgbe_hw	*hw = &adapter->hw;
   4247 	u32			rscctrl, rdrxctl;
   4248 
   4249 	/* If turning LRO/RSC off we need to disable it */
   4250 	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
   4251 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   4252 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
   4253 		return;
   4254 	}
   4255 
   4256 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
   4257 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
   4258 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
   4259 	if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
   4260 #endif /* DEV_NETMAP */
   4261 	rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
   4262 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
   4263 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
   4264 
   4265 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   4266 	rscctrl |= IXGBE_RSCCTL_RSCEN;
   4267 	/*
   4268 	** Limit the total number of descriptors that
   4269 	** can be combined, so it does not exceed 64K
   4270 	*/
   4271 	if (rxr->mbuf_sz == MCLBYTES)
   4272 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
   4273 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
   4274 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
   4275 	else if (rxr->mbuf_sz == MJUM9BYTES)
   4276 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
   4277 	else  /* Using 16K cluster */
   4278 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
   4279 
   4280 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
   4281 
   4282 	/* Enable TCP header recognition */
   4283 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
   4284 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
   4285 	    IXGBE_PSRTYPE_TCPHDR));
   4286 
   4287 	/* Disable RSC for ACK packets */
   4288 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
   4289 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
   4290 
   4291 	rxr->hw_rsc = TRUE;
   4292 }
   4293 
   4294 
   4295 static void
   4296 ixgbe_free_receive_ring(struct rx_ring *rxr)
   4297 {
   4298 	struct ixgbe_rx_buf       *rxbuf;
   4299 	int i;
   4300 
   4301 	for (i = 0; i < rxr->num_desc; i++) {
   4302 		rxbuf = &rxr->rx_buffers[i];
   4303 		if (rxbuf->buf != NULL) {
   4304 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   4305 			    0, rxbuf->buf->m_pkthdr.len,
   4306 			    BUS_DMASYNC_POSTREAD);
   4307 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   4308 			rxbuf->buf->m_flags |= M_PKTHDR;
   4309 			m_freem(rxbuf->buf);
   4310 			rxbuf->buf = NULL;
   4311 			rxbuf->flags = 0;
   4312 		}
   4313 	}
   4314 }
   4315 
   4316 
   4317 /*********************************************************************
   4318  *
   4319  *  Initialize a receive ring and its buffers.
   4320  *
   4321  **********************************************************************/
   4322 static int
   4323 ixgbe_setup_receive_ring(struct rx_ring *rxr)
   4324 {
   4325 	struct	adapter 	*adapter;
   4326 	struct ixgbe_rx_buf	*rxbuf;
   4327 #ifdef LRO
   4328 	struct ifnet		*ifp;
   4329 	struct lro_ctrl		*lro = &rxr->lro;
   4330 #endif /* LRO */
   4331 	int			rsize, error = 0;
   4332 #ifdef DEV_NETMAP
   4333 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
   4334 	struct netmap_slot *slot;
   4335 #endif /* DEV_NETMAP */
   4336 
   4337 	adapter = rxr->adapter;
   4338 #ifdef LRO
   4339 	ifp = adapter->ifp;
   4340 #endif /* LRO */
   4341 
   4342 	/* Clear the ring contents */
   4343 	IXGBE_RX_LOCK(rxr);
   4344 #ifdef DEV_NETMAP
   4345 	/* same as in ixgbe_setup_transmit_ring() */
   4346 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
   4347 #endif /* DEV_NETMAP */
   4348 	rsize = roundup2(adapter->num_rx_desc *
   4349 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   4350 	bzero((void *)rxr->rx_base, rsize);
   4351 	/* Cache the size */
   4352 	rxr->mbuf_sz = adapter->rx_mbuf_sz;
   4353 
   4354 	/* Free current RX buffer structs and their mbufs */
   4355 	ixgbe_free_receive_ring(rxr);
   4356 
   4357 	IXGBE_RX_UNLOCK(rxr);
   4358 
   4359 	/* Now reinitialize our supply of jumbo mbufs.  The number
   4360 	 * or size of jumbo mbufs may have changed.
   4361 	 */
   4362 	ixgbe_jcl_reinit(&adapter->jcl_head, rxr->ptag->dt_dmat,
   4363 	    2 * adapter->num_rx_desc, adapter->rx_mbuf_sz);
   4364 
   4365 	IXGBE_RX_LOCK(rxr);
   4366 
   4367 	/* Now replenish the mbufs */
   4368 	for (int j = 0; j != rxr->num_desc; ++j) {
   4369 		struct mbuf	*mp;
   4370 
   4371 		rxbuf = &rxr->rx_buffers[j];
   4372 #ifdef DEV_NETMAP
   4373 		/*
   4374 		 * In netmap mode, fill the map and set the buffer
   4375 		 * address in the NIC ring, considering the offset
   4376 		 * between the netmap and NIC rings (see comment in
   4377 		 * ixgbe_setup_transmit_ring() ). No need to allocate
   4378 		 * an mbuf, so end the block with a continue;
   4379 		 */
   4380 		if (slot) {
   4381 			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
   4382 			uint64_t paddr;
   4383 			void *addr;
   4384 
   4385 			addr = PNMB(na, slot + sj, &paddr);
   4386 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
   4387 			/* Update descriptor and the cached value */
   4388 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
   4389 			rxbuf->addr = htole64(paddr);
   4390 			continue;
   4391 		}
   4392 #endif /* DEV_NETMAP */
   4393 		rxbuf->flags = 0;
   4394 		rxbuf->buf = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
   4395 		    MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
   4396 		if (rxbuf->buf == NULL) {
   4397 			error = ENOBUFS;
   4398                         goto fail;
   4399 		}
   4400 		mp = rxbuf->buf;
   4401 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   4402 		/* Get the memory mapping */
   4403 		error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   4404 		    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   4405 		if (error != 0)
   4406                         goto fail;
   4407 		bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   4408 		    0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
   4409 		/* Update the descriptor and the cached value */
   4410 		rxr->rx_base[j].read.pkt_addr =
   4411 		    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   4412 		rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   4413 	}
   4414 
   4415 
   4416 	/* Setup our descriptor indices */
   4417 	rxr->next_to_check = 0;
   4418 	rxr->next_to_refresh = 0;
   4419 	rxr->lro_enabled = FALSE;
   4420 	rxr->rx_copies.ev_count = 0;
   4421 	rxr->rx_bytes.ev_count = 0;
   4422 	rxr->vtag_strip = FALSE;
   4423 
   4424 	ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   4425 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   4426 
   4427 	/*
   4428 	** Now set up the LRO interface:
   4429 	*/
   4430 	if (ixgbe_rsc_enable)
   4431 		ixgbe_setup_hw_rsc(rxr);
   4432 #ifdef LRO
   4433 	else if (ifp->if_capenable & IFCAP_LRO) {
   4434 		device_t dev = adapter->dev;
   4435 		int err = tcp_lro_init(lro);
   4436 		if (err) {
   4437 			device_printf(dev, "LRO Initialization failed!\n");
   4438 			goto fail;
   4439 		}
   4440 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
   4441 		rxr->lro_enabled = TRUE;
   4442 		lro->ifp = adapter->ifp;
   4443 	}
   4444 #endif /* LRO */
   4445 
   4446 	IXGBE_RX_UNLOCK(rxr);
   4447 	return (0);
   4448 
   4449 fail:
   4450 	ixgbe_free_receive_ring(rxr);
   4451 	IXGBE_RX_UNLOCK(rxr);
   4452 	return (error);
   4453 }
   4454 
   4455 /*********************************************************************
   4456  *
   4457  *  Initialize all receive rings.
   4458  *
   4459  **********************************************************************/
   4460 static int
   4461 ixgbe_setup_receive_structures(struct adapter *adapter)
   4462 {
   4463 	struct rx_ring *rxr = adapter->rx_rings;
   4464 	int j;
   4465 
   4466 	for (j = 0; j < adapter->num_queues; j++, rxr++)
   4467 		if (ixgbe_setup_receive_ring(rxr))
   4468 			goto fail;
   4469 
   4470 	return (0);
   4471 fail:
   4472 	/*
   4473 	 * Free RX buffers allocated so far, we will only handle
   4474 	 * the rings that completed, the failing case will have
   4475 	 * cleaned up for itself. 'j' failed, so its the terminus.
   4476 	 */
   4477 	for (int i = 0; i < j; ++i) {
   4478 		rxr = &adapter->rx_rings[i];
   4479 		ixgbe_free_receive_ring(rxr);
   4480 	}
   4481 
   4482 	return (ENOBUFS);
   4483 }
   4484 
   4485 static void
   4486 ixgbe_initialise_rss_mapping(struct adapter *adapter)
   4487 {
   4488 	struct ixgbe_hw	*hw = &adapter->hw;
   4489 	uint32_t reta;
   4490 	int i, j, queue_id;
   4491 	uint32_t rss_key[10];
   4492 	uint32_t mrqc;
   4493 #ifdef	RSS
   4494 	uint32_t rss_hash_config;
   4495 #endif
   4496 
   4497 	/* Setup RSS */
   4498 	reta = 0;
   4499 
   4500 #ifdef	RSS
   4501 	/* Fetch the configured RSS key */
   4502 	rss_getkey((uint8_t *) &rss_key);
   4503 #else
   4504 	/* set up random bits */
   4505 	cprng_fast(&rss_key, sizeof(rss_key));
   4506 #endif
   4507 
   4508 	/* Set up the redirection table */
   4509 	for (i = 0, j = 0; i < 128; i++, j++) {
   4510 		if (j == adapter->num_queues) j = 0;
   4511 #ifdef	RSS
   4512 		/*
   4513 		 * Fetch the RSS bucket id for the given indirection entry.
   4514 		 * Cap it at the number of configured buckets (which is
   4515 		 * num_queues.)
   4516 		 */
   4517 		queue_id = rss_get_indirection_to_bucket(i);
   4518 		queue_id = queue_id % adapter->num_queues;
   4519 #else
   4520 		queue_id = (j * 0x11);
   4521 #endif
   4522 		/*
   4523 		 * The low 8 bits are for hash value (n+0);
   4524 		 * The next 8 bits are for hash value (n+1), etc.
   4525 		 */
   4526 		reta = reta >> 8;
   4527 		reta = reta | ( ((uint32_t) queue_id) << 24);
   4528 		if ((i & 3) == 3) {
   4529 			IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta);
   4530 			reta = 0;
   4531 		}
   4532 	}
   4533 
   4534 	/* Now fill our hash function seeds */
   4535 	for (i = 0; i < 10; i++)
   4536 		IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), rss_key[i]);
   4537 
   4538 	/* Perform hash on these packet types */
   4539 #ifdef	RSS
   4540 	mrqc = IXGBE_MRQC_RSSEN;
   4541 	rss_hash_config = rss_gethashconfig();
   4542 	if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4)
   4543 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
   4544 	if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4)
   4545 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
   4546 	if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6)
   4547 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
   4548 	if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6)
   4549 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
   4550 	if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6_EX)
   4551 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
   4552 	if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6_EX)
   4553 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
   4554 	if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4)
   4555 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
   4556 	if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4_EX)
   4557 		device_printf(adapter->dev,
   4558 		    "%s: RSS_HASHTYPE_RSS_UDP_IPV4_EX defined, "
   4559 		    "but not supported\n", __func__);
   4560 	if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6)
   4561 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
   4562 	if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6_EX)
   4563 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
   4564 #else
   4565 	/*
   4566 	 * Disable UDP - IP fragments aren't currently being handled
   4567 	 * and so we end up with a mix of 2-tuple and 4-tuple
   4568 	 * traffic.
   4569 	 */
   4570 	mrqc = IXGBE_MRQC_RSSEN
   4571 	     | IXGBE_MRQC_RSS_FIELD_IPV4
   4572 	     | IXGBE_MRQC_RSS_FIELD_IPV4_TCP
   4573 #if 0
   4574 	     | IXGBE_MRQC_RSS_FIELD_IPV4_UDP
   4575 #endif
   4576 	     | IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP
   4577 	     | IXGBE_MRQC_RSS_FIELD_IPV6_EX
   4578 	     | IXGBE_MRQC_RSS_FIELD_IPV6
   4579 	     | IXGBE_MRQC_RSS_FIELD_IPV6_TCP
   4580 #if 0
   4581 	     | IXGBE_MRQC_RSS_FIELD_IPV6_UDP
   4582 	     | IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP
   4583 #endif
   4584 	;
   4585 #endif /* RSS */
   4586 	IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
   4587 }
   4588 
   4589 
   4590 /*********************************************************************
   4591  *
   4592  *  Setup receive registers and features.
   4593  *
   4594  **********************************************************************/
   4595 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
   4596 
   4597 #define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
   4598 
   4599 static void
   4600 ixgbe_initialize_receive_units(struct adapter *adapter)
   4601 {
   4602 	int i;
   4603 	struct	rx_ring	*rxr = adapter->rx_rings;
   4604 	struct ixgbe_hw	*hw = &adapter->hw;
   4605 	struct ifnet   *ifp = adapter->ifp;
   4606 	u32		bufsz, rxctrl, fctrl, srrctl, rxcsum;
   4607 	u32		hlreg;
   4608 
   4609 
   4610 	/*
   4611 	 * Make sure receives are disabled while
   4612 	 * setting up the descriptor ring
   4613 	 */
   4614 	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
   4615 	IXGBE_WRITE_REG(hw, IXGBE_RXCTRL,
   4616 	    rxctrl & ~IXGBE_RXCTRL_RXEN);
   4617 
   4618 	/* Enable broadcasts */
   4619 	fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
   4620 	fctrl |= IXGBE_FCTRL_BAM;
   4621 	fctrl |= IXGBE_FCTRL_DPF;
   4622 	fctrl |= IXGBE_FCTRL_PMCF;
   4623 	IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
   4624 
   4625 	/* Set for Jumbo Frames? */
   4626 	hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
   4627 	if (ifp->if_mtu > ETHERMTU)
   4628 		hlreg |= IXGBE_HLREG0_JUMBOEN;
   4629 	else
   4630 		hlreg &= ~IXGBE_HLREG0_JUMBOEN;
   4631 #ifdef DEV_NETMAP
   4632 	/* crcstrip is conditional in netmap (in RDRXCTL too ?) */
   4633 	if (ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
   4634 		hlreg &= ~IXGBE_HLREG0_RXCRCSTRP;
   4635 	else
   4636 		hlreg |= IXGBE_HLREG0_RXCRCSTRP;
   4637 #endif /* DEV_NETMAP */
   4638 	IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
   4639 
   4640 	bufsz = (adapter->rx_mbuf_sz +
   4641 	    BSIZEPKT_ROUNDUP) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
   4642 
   4643 	for (i = 0; i < adapter->num_queues; i++, rxr++) {
   4644 		u64 rdba = rxr->rxdma.dma_paddr;
   4645 
   4646 		/* Setup the Base and Length of the Rx Descriptor Ring */
   4647 		IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i),
   4648 			       (rdba & 0x00000000ffffffffULL));
   4649 		IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (rdba >> 32));
   4650 		IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
   4651 		    adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc));
   4652 
   4653 		/* Set up the SRRCTL register */
   4654 		srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
   4655 		srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
   4656 		srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
   4657 		srrctl |= bufsz;
   4658 		srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
   4659 
   4660 		/*
   4661 		 * Set DROP_EN iff we have no flow control and >1 queue.
   4662 		 * Note that srrctl was cleared shortly before during reset,
   4663 		 * so we do not need to clear the bit, but do it just in case
   4664 		 * this code is moved elsewhere.
   4665 		 */
   4666 		if (adapter->num_queues > 1 &&
   4667 		    adapter->fc == ixgbe_fc_none) {
   4668 			srrctl |= IXGBE_SRRCTL_DROP_EN;
   4669 		} else {
   4670 			srrctl &= ~IXGBE_SRRCTL_DROP_EN;
   4671 		}
   4672 
   4673 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
   4674 
   4675 		/* Setup the HW Rx Head and Tail Descriptor Pointers */
   4676 		IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
   4677 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
   4678 
   4679 		/* Set the processing limit */
   4680 		rxr->process_limit = ixgbe_rx_process_limit;
   4681 	}
   4682 
   4683 	if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
   4684 		u32 psrtype = IXGBE_PSRTYPE_TCPHDR |
   4685 			      IXGBE_PSRTYPE_UDPHDR |
   4686 			      IXGBE_PSRTYPE_IPV4HDR |
   4687 			      IXGBE_PSRTYPE_IPV6HDR;
   4688 		IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype);
   4689 	}
   4690 
   4691 	rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
   4692 
   4693 	ixgbe_initialise_rss_mapping(adapter);
   4694 
   4695 	if (adapter->num_queues > 1) {
   4696 		/* RSS and RX IPP Checksum are mutually exclusive */
   4697 		rxcsum |= IXGBE_RXCSUM_PCSD;
   4698 	}
   4699 
   4700 	if (ifp->if_capenable & IFCAP_RXCSUM)
   4701 		rxcsum |= IXGBE_RXCSUM_PCSD;
   4702 
   4703 	if (!(rxcsum & IXGBE_RXCSUM_PCSD))
   4704 		rxcsum |= IXGBE_RXCSUM_IPPCSE;
   4705 
   4706 	IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
   4707 
   4708 	return;
   4709 }
   4710 
   4711 /*********************************************************************
   4712  *
   4713  *  Free all receive rings.
   4714  *
   4715  **********************************************************************/
   4716 static void
   4717 ixgbe_free_receive_structures(struct adapter *adapter)
   4718 {
   4719 	struct rx_ring *rxr = adapter->rx_rings;
   4720 
   4721 	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
   4722 
   4723 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
   4724 #ifdef LRO
   4725 		struct lro_ctrl		*lro = &rxr->lro;
   4726 #endif /* LRO */
   4727 		ixgbe_free_receive_buffers(rxr);
   4728 #ifdef LRO
   4729 		/* Free LRO memory */
   4730 		tcp_lro_free(lro);
   4731 #endif /* LRO */
   4732 		/* Free the ring memory as well */
   4733 		ixgbe_dma_free(adapter, &rxr->rxdma);
   4734 		IXGBE_RX_LOCK_DESTROY(rxr);
   4735 	}
   4736 
   4737 	free(adapter->rx_rings, M_DEVBUF);
   4738 }
   4739 
   4740 
   4741 /*********************************************************************
   4742  *
   4743  *  Free receive ring data structures
   4744  *
   4745  **********************************************************************/
   4746 static void
   4747 ixgbe_free_receive_buffers(struct rx_ring *rxr)
   4748 {
   4749 	struct adapter		*adapter = rxr->adapter;
   4750 	struct ixgbe_rx_buf	*rxbuf;
   4751 
   4752 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
   4753 
   4754 	/* Cleanup any existing buffers */
   4755 	if (rxr->rx_buffers != NULL) {
   4756 		for (int i = 0; i < adapter->num_rx_desc; i++) {
   4757 			rxbuf = &rxr->rx_buffers[i];
   4758 			if (rxbuf->buf != NULL) {
   4759 				bus_dmamap_sync(rxr->ptag->dt_dmat,
   4760 				    rxbuf->pmap, 0, rxbuf->buf->m_pkthdr.len,
   4761 				    BUS_DMASYNC_POSTREAD);
   4762 				ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   4763 				rxbuf->buf->m_flags |= M_PKTHDR;
   4764 				m_freem(rxbuf->buf);
   4765 			}
   4766 			rxbuf->buf = NULL;
   4767 			if (rxbuf->pmap != NULL) {
   4768 				ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
   4769 				rxbuf->pmap = NULL;
   4770 			}
   4771 		}
   4772 		if (rxr->rx_buffers != NULL) {
   4773 			free(rxr->rx_buffers, M_DEVBUF);
   4774 			rxr->rx_buffers = NULL;
   4775 		}
   4776 	}
   4777 
   4778 	if (rxr->ptag != NULL) {
   4779 		ixgbe_dma_tag_destroy(rxr->ptag);
   4780 		rxr->ptag = NULL;
   4781 	}
   4782 
   4783 	return;
   4784 }
   4785 
   4786 static __inline void
   4787 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
   4788 {
   4789 	int s;
   4790 
   4791 #ifdef LRO
   4792 	struct adapter	*adapter = ifp->if_softc;
   4793 	struct ethercom *ec = &adapter->osdep.ec;
   4794 
   4795         /*
   4796          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
   4797          * should be computed by hardware. Also it should not have VLAN tag in
   4798          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
   4799          */
   4800         if (rxr->lro_enabled &&
   4801             (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
   4802             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   4803             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   4804             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
   4805             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   4806             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
   4807             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
   4808             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
   4809                 /*
   4810                  * Send to the stack if:
   4811                  **  - LRO not enabled, or
   4812                  **  - no LRO resources, or
   4813                  **  - lro enqueue fails
   4814                  */
   4815                 if (rxr->lro.lro_cnt != 0)
   4816                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
   4817                                 return;
   4818         }
   4819 #endif /* LRO */
   4820 
   4821 	IXGBE_RX_UNLOCK(rxr);
   4822 
   4823 	s = splnet();
   4824 	/* Pass this up to any BPF listeners. */
   4825 	bpf_mtap(ifp, m);
   4826 	if_input(ifp, m);
   4827 	splx(s);
   4828 
   4829 	IXGBE_RX_LOCK(rxr);
   4830 }
   4831 
   4832 static __inline void
   4833 ixgbe_rx_discard(struct rx_ring *rxr, int i)
   4834 {
   4835 	struct ixgbe_rx_buf	*rbuf;
   4836 
   4837 	rbuf = &rxr->rx_buffers[i];
   4838 
   4839 
   4840 	/*
   4841 	** With advanced descriptors the writeback
   4842 	** clobbers the buffer addrs, so its easier
   4843 	** to just free the existing mbufs and take
   4844 	** the normal refresh path to get new buffers
   4845 	** and mapping.
   4846 	*/
   4847 
   4848 	if (rbuf->buf != NULL) {/* Partial chain ? */
   4849 		rbuf->fmp->m_flags |= M_PKTHDR;
   4850 		m_freem(rbuf->fmp);
   4851 		rbuf->fmp = NULL;
   4852 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
   4853 	} else if (rbuf->buf) {
   4854 		m_free(rbuf->buf);
   4855 		rbuf->buf = NULL;
   4856 	}
   4857 
   4858 	rbuf->flags = 0;
   4859 
   4860 	return;
   4861 }
   4862 
   4863 
   4864 /*********************************************************************
   4865  *
   4866  *  This routine executes in interrupt context. It replenishes
   4867  *  the mbufs in the descriptor and sends data which has been
   4868  *  dma'ed into host memory to upper layer.
   4869  *
   4870  *  We loop at most count times if count is > 0, or until done if
   4871  *  count < 0.
   4872  *
   4873  *  Return TRUE for more work, FALSE for all clean.
   4874  *********************************************************************/
   4875 static bool
   4876 ixgbe_rxeof(struct ix_queue *que)
   4877 {
   4878 	struct adapter		*adapter = que->adapter;
   4879 	struct rx_ring		*rxr = que->rxr;
   4880 	struct ifnet		*ifp = adapter->ifp;
   4881 #ifdef LRO
   4882 	struct lro_ctrl		*lro = &rxr->lro;
   4883 	struct lro_entry	*queued;
   4884 #endif /* LRO */
   4885 	int			i, nextp, processed = 0;
   4886 	u32			staterr = 0;
   4887 	u16			count = rxr->process_limit;
   4888 	union ixgbe_adv_rx_desc	*cur;
   4889 	struct ixgbe_rx_buf	*rbuf, *nbuf;
   4890 #ifdef RSS
   4891 	u16			pkt_info;
   4892 #endif
   4893 
   4894 	IXGBE_RX_LOCK(rxr);
   4895 
   4896 #ifdef DEV_NETMAP
   4897 	/* Same as the txeof routine: wakeup clients on intr. */
   4898 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
   4899 		IXGBE_RX_UNLOCK(rxr);
   4900 		return (FALSE);
   4901 	}
   4902 #endif /* DEV_NETMAP */
   4903 
   4904 	for (i = rxr->next_to_check; count != 0;) {
   4905 		struct mbuf	*sendmp, *mp;
   4906 		u32		rsc, ptype;
   4907 		u16		len;
   4908 		u16		vtag = 0;
   4909 		bool		eop;
   4910 
   4911 		/* Sync the ring. */
   4912 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   4913 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   4914 
   4915 		cur = &rxr->rx_base[i];
   4916 		staterr = le32toh(cur->wb.upper.status_error);
   4917 #ifdef RSS
   4918 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
   4919 #endif
   4920 
   4921 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
   4922 			break;
   4923 		if ((ifp->if_flags & IFF_RUNNING) == 0)
   4924 			break;
   4925 
   4926 		count--;
   4927 		sendmp = NULL;
   4928 		nbuf = NULL;
   4929 		rsc = 0;
   4930 		cur->wb.upper.status_error = 0;
   4931 		rbuf = &rxr->rx_buffers[i];
   4932 		mp = rbuf->buf;
   4933 
   4934 		len = le16toh(cur->wb.upper.length);
   4935 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
   4936 		    IXGBE_RXDADV_PKTTYPE_MASK;
   4937 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
   4938 
   4939 		/* Make sure bad packets are discarded */
   4940 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
   4941 			rxr->rx_discarded.ev_count++;
   4942 			ixgbe_rx_discard(rxr, i);
   4943 			goto next_desc;
   4944 		}
   4945 
   4946 		/*
   4947 		** On 82599 which supports a hardware
   4948 		** LRO (called HW RSC), packets need
   4949 		** not be fragmented across sequential
   4950 		** descriptors, rather the next descriptor
   4951 		** is indicated in bits of the descriptor.
   4952 		** This also means that we might proceses
   4953 		** more than one packet at a time, something
   4954 		** that has never been true before, it
   4955 		** required eliminating global chain pointers
   4956 		** in favor of what we are doing here.  -jfv
   4957 		*/
   4958 		if (!eop) {
   4959 			/*
   4960 			** Figure out the next descriptor
   4961 			** of this frame.
   4962 			*/
   4963 			if (rxr->hw_rsc == TRUE) {
   4964 				rsc = ixgbe_rsc_count(cur);
   4965 				rxr->rsc_num += (rsc - 1);
   4966 			}
   4967 			if (rsc) { /* Get hardware index */
   4968 				nextp = ((staterr &
   4969 				    IXGBE_RXDADV_NEXTP_MASK) >>
   4970 				    IXGBE_RXDADV_NEXTP_SHIFT);
   4971 			} else { /* Just sequential */
   4972 				nextp = i + 1;
   4973 				if (nextp == adapter->num_rx_desc)
   4974 					nextp = 0;
   4975 			}
   4976 			nbuf = &rxr->rx_buffers[nextp];
   4977 			prefetch(nbuf);
   4978 		}
   4979 		/*
   4980 		** Rather than using the fmp/lmp global pointers
   4981 		** we now keep the head of a packet chain in the
   4982 		** buffer struct and pass this along from one
   4983 		** descriptor to the next, until we get EOP.
   4984 		*/
   4985 		mp->m_len = len;
   4986 		/*
   4987 		** See if there is a stored head
   4988 		** that determines what we are
   4989 		*/
   4990 		sendmp = rbuf->fmp;
   4991 		if (sendmp != NULL) {  /* secondary frag */
   4992 			rbuf->buf = rbuf->fmp = NULL;
   4993 			mp->m_flags &= ~M_PKTHDR;
   4994 			sendmp->m_pkthdr.len += mp->m_len;
   4995 		} else {
   4996 			/*
   4997 			 * Optimize.  This might be a small packet,
   4998 			 * maybe just a TCP ACK.  Do a fast copy that
   4999 			 * is cache aligned into a new mbuf, and
   5000 			 * leave the old mbuf+cluster for re-use.
   5001 			 */
   5002 			if (eop && len <= IXGBE_RX_COPY_LEN) {
   5003 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
   5004 				if (sendmp != NULL) {
   5005 					sendmp->m_data +=
   5006 					    IXGBE_RX_COPY_ALIGN;
   5007 					ixgbe_bcopy(mp->m_data,
   5008 					    sendmp->m_data, len);
   5009 					sendmp->m_len = len;
   5010 					rxr->rx_copies.ev_count++;
   5011 					rbuf->flags |= IXGBE_RX_COPY;
   5012 				}
   5013 			}
   5014 			if (sendmp == NULL) {
   5015 				rbuf->buf = rbuf->fmp = NULL;
   5016 				sendmp = mp;
   5017 			}
   5018 
   5019 			/* first desc of a non-ps chain */
   5020 			sendmp->m_flags |= M_PKTHDR;
   5021 			sendmp->m_pkthdr.len = mp->m_len;
   5022 		}
   5023 		++processed;
   5024 
   5025 		/* Pass the head pointer on */
   5026 		if (eop == 0) {
   5027 			nbuf->fmp = sendmp;
   5028 			sendmp = NULL;
   5029 			mp->m_next = nbuf->buf;
   5030 		} else { /* Sending this frame */
   5031 			m_set_rcvif(sendmp, ifp);
   5032 			ifp->if_ipackets++;
   5033 			rxr->rx_packets.ev_count++;
   5034 			/* capture data for AIM */
   5035 			rxr->bytes += sendmp->m_pkthdr.len;
   5036 			rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
   5037 			/* Process vlan info */
   5038 			if ((rxr->vtag_strip) &&
   5039 			    (staterr & IXGBE_RXD_STAT_VP))
   5040 				vtag = le16toh(cur->wb.upper.vlan);
   5041 			if (vtag) {
   5042 				VLAN_INPUT_TAG(ifp, sendmp, vtag,
   5043 				    printf("%s: could not apply VLAN "
   5044 					"tag", __func__));
   5045 			}
   5046 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
   5047 				ixgbe_rx_checksum(staterr, sendmp, ptype,
   5048 				   &adapter->stats);
   5049 			}
   5050 #if __FreeBSD_version >= 800000
   5051 #ifdef RSS
   5052 			sendmp->m_pkthdr.flowid =
   5053 			    le32toh(cur->wb.lower.hi_dword.rss);
   5054 			switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
   5055 			case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
   5056 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV4);
   5057 				break;
   5058 			case IXGBE_RXDADV_RSSTYPE_IPV4:
   5059 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV4);
   5060 				break;
   5061 			case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
   5062 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6);
   5063 				break;
   5064 			case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
   5065 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6_EX);
   5066 				break;
   5067 			case IXGBE_RXDADV_RSSTYPE_IPV6:
   5068 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6);
   5069 				break;
   5070 			case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
   5071 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6_EX);
   5072 				break;
   5073 			case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
   5074 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV4);
   5075 				break;
   5076 			case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
   5077 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6);
   5078 				break;
   5079 			case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
   5080 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6_EX);
   5081 				break;
   5082 			default:
   5083 				/* XXX fallthrough */
   5084 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
   5085 				break;
   5086 			}
   5087 #else /* RSS */
   5088 			sendmp->m_pkthdr.flowid = que->msix;
   5089 			M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
   5090 #endif /* RSS */
   5091 #endif /* FreeBSD_version */
   5092 		}
   5093 next_desc:
   5094 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   5095 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   5096 
   5097 		/* Advance our pointers to the next descriptor. */
   5098 		if (++i == rxr->num_desc)
   5099 			i = 0;
   5100 
   5101 		/* Now send to the stack or do LRO */
   5102 		if (sendmp != NULL) {
   5103 			rxr->next_to_check = i;
   5104 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
   5105 			i = rxr->next_to_check;
   5106 		}
   5107 
   5108                /* Every 8 descriptors we go to refresh mbufs */
   5109 		if (processed == 8) {
   5110 			ixgbe_refresh_mbufs(rxr, i);
   5111 			processed = 0;
   5112 		}
   5113 	}
   5114 
   5115 	/* Refresh any remaining buf structs */
   5116 	if (ixgbe_rx_unrefreshed(rxr))
   5117 		ixgbe_refresh_mbufs(rxr, i);
   5118 
   5119 	rxr->next_to_check = i;
   5120 
   5121 #ifdef LRO
   5122 	/*
   5123 	 * Flush any outstanding LRO work
   5124 	 */
   5125 	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
   5126 		SLIST_REMOVE_HEAD(&lro->lro_active, next);
   5127 		tcp_lro_flush(lro, queued);
   5128 	}
   5129 #endif /* LRO */
   5130 
   5131 	IXGBE_RX_UNLOCK(rxr);
   5132 
   5133 	/*
   5134 	** Still have cleaning to do?
   5135 	*/
   5136 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
   5137 		return true;
   5138 	else
   5139 		return false;
   5140 }
   5141 
   5142 
   5143 /*********************************************************************
   5144  *
   5145  *  Verify that the hardware indicated that the checksum is valid.
   5146  *  Inform the stack about the status of checksum so that stack
   5147  *  doesn't spend time verifying the checksum.
   5148  *
   5149  *********************************************************************/
   5150 static void
   5151 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
   5152     struct ixgbe_hw_stats *stats)
   5153 {
   5154 	u16	status = (u16) staterr;
   5155 	u8	errors = (u8) (staterr >> 24);
   5156 #if 0
   5157 	bool	sctp = FALSE;
   5158 
   5159 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   5160 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
   5161 		sctp = TRUE;
   5162 #endif
   5163 
   5164 	if (status & IXGBE_RXD_STAT_IPCS) {
   5165 		stats->ipcs.ev_count++;
   5166 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
   5167 			/* IP Checksum Good */
   5168 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
   5169 
   5170 		} else {
   5171 			stats->ipcs_bad.ev_count++;
   5172 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
   5173 		}
   5174 	}
   5175 	if (status & IXGBE_RXD_STAT_L4CS) {
   5176 		stats->l4cs.ev_count++;
   5177 		int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
   5178 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
   5179 			mp->m_pkthdr.csum_flags |= type;
   5180 		} else {
   5181 			stats->l4cs_bad.ev_count++;
   5182 			mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
   5183 		}
   5184 	}
   5185 	return;
   5186 }
   5187 
   5188 
   5189 #if 0	/* XXX Badly need to overhaul vlan(4) on NetBSD. */
   5190 /*
   5191 ** This routine is run via an vlan config EVENT,
   5192 ** it enables us to use the HW Filter table since
   5193 ** we can get the vlan id. This just creates the
   5194 ** entry in the soft version of the VFTA, init will
   5195 ** repopulate the real table.
   5196 */
   5197 static void
   5198 ixgbe_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
   5199 {
   5200 	struct adapter	*adapter = ifp->if_softc;
   5201 	u16		index, bit;
   5202 
   5203 	if (ifp->if_softc !=  arg)   /* Not our event */
   5204 		return;
   5205 
   5206 	if ((vtag == 0) || (vtag > 4095))	/* Invalid */
   5207 		return;
   5208 
   5209 	IXGBE_CORE_LOCK(adapter);
   5210 	index = (vtag >> 5) & 0x7F;
   5211 	bit = vtag & 0x1F;
   5212 	adapter->shadow_vfta[index] |= (1 << bit);
   5213 	ixgbe_setup_vlan_hw_support(adapter);
   5214 	IXGBE_CORE_UNLOCK(adapter);
   5215 }
   5216 
   5217 /*
   5218 ** This routine is run via an vlan
   5219 ** unconfig EVENT, remove our entry
   5220 ** in the soft vfta.
   5221 */
   5222 static void
   5223 ixgbe_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
   5224 {
   5225 	struct adapter	*adapter = ifp->if_softc;
   5226 	u16		index, bit;
   5227 
   5228 	if (ifp->if_softc !=  arg)
   5229 		return;
   5230 
   5231 	if ((vtag == 0) || (vtag > 4095))	/* Invalid */
   5232 		return;
   5233 
   5234 	IXGBE_CORE_LOCK(adapter);
   5235 	index = (vtag >> 5) & 0x7F;
   5236 	bit = vtag & 0x1F;
   5237 	adapter->shadow_vfta[index] &= ~(1 << bit);
   5238 	/* Re-init to load the changes */
   5239 	ixgbe_setup_vlan_hw_support(adapter);
   5240 	IXGBE_CORE_UNLOCK(adapter);
   5241 }
   5242 #endif
   5243 
   5244 static void
   5245 ixgbe_setup_vlan_hw_support(struct adapter *adapter)
   5246 {
   5247 	struct ethercom *ec = &adapter->osdep.ec;
   5248 	struct ixgbe_hw *hw = &adapter->hw;
   5249 	struct rx_ring	*rxr;
   5250 	u32		ctrl;
   5251 
   5252 
   5253 	/*
   5254 	** We get here thru init_locked, meaning
   5255 	** a soft reset, this has already cleared
   5256 	** the VFTA and other state, so if there
   5257 	** have been no vlan's registered do nothing.
   5258 	*/
   5259 	if (!VLAN_ATTACHED(&adapter->osdep.ec)) {
   5260 		return;
   5261 	}
   5262 
   5263 	/* Setup the queues for vlans */
   5264 	for (int i = 0; i < adapter->num_queues; i++) {
   5265 		rxr = &adapter->rx_rings[i];
   5266 		/* On 82599 the VLAN enable is per/queue in RXDCTL */
   5267 		if (hw->mac.type != ixgbe_mac_82598EB) {
   5268 			ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
   5269 			ctrl |= IXGBE_RXDCTL_VME;
   5270 			IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
   5271 		}
   5272 		rxr->vtag_strip = TRUE;
   5273 	}
   5274 
   5275 	if ((ec->ec_capenable & ETHERCAP_VLAN_HWFILTER) == 0)
   5276 		return;
   5277 	/*
   5278 	** A soft reset zero's out the VFTA, so
   5279 	** we need to repopulate it now.
   5280 	*/
   5281 	for (int i = 0; i < IXGBE_VFTA_SIZE; i++)
   5282 		if (adapter->shadow_vfta[i] != 0)
   5283 			IXGBE_WRITE_REG(hw, IXGBE_VFTA(i),
   5284 			    adapter->shadow_vfta[i]);
   5285 
   5286 	ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
   5287 	/* Enable the Filter Table if enabled */
   5288 	if (ec->ec_capenable & ETHERCAP_VLAN_HWFILTER) {
   5289 		ctrl &= ~IXGBE_VLNCTRL_CFIEN;
   5290 		ctrl |= IXGBE_VLNCTRL_VFE;
   5291 	}
   5292 	if (hw->mac.type == ixgbe_mac_82598EB)
   5293 		ctrl |= IXGBE_VLNCTRL_VME;
   5294 	IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
   5295 }
   5296 
   5297 static void
   5298 ixgbe_enable_intr(struct adapter *adapter)
   5299 {
   5300 	struct ixgbe_hw	*hw = &adapter->hw;
   5301 	struct ix_queue	*que = adapter->queues;
   5302 	u32		mask, fwsm;
   5303 
   5304 	mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
   5305 	/* Enable Fan Failure detection */
   5306 	if (hw->device_id == IXGBE_DEV_ID_82598AT)
   5307 		    mask |= IXGBE_EIMS_GPI_SDP1;
   5308 
   5309 	switch (adapter->hw.mac.type) {
   5310 		case ixgbe_mac_82599EB:
   5311 			mask |= IXGBE_EIMS_ECC;
   5312 			mask |= IXGBE_EIMS_GPI_SDP0;
   5313 			mask |= IXGBE_EIMS_GPI_SDP1;
   5314 			mask |= IXGBE_EIMS_GPI_SDP2;
   5315 #ifdef IXGBE_FDIR
   5316 			mask |= IXGBE_EIMS_FLOW_DIR;
   5317 #endif
   5318 			break;
   5319 		case ixgbe_mac_X540:
   5320 			mask |= IXGBE_EIMS_ECC;
   5321 			/* Detect if Thermal Sensor is enabled */
   5322 			fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
   5323 			if (fwsm & IXGBE_FWSM_TS_ENABLED)
   5324 				mask |= IXGBE_EIMS_TS;
   5325 #ifdef IXGBE_FDIR
   5326 			mask |= IXGBE_EIMS_FLOW_DIR;
   5327 #endif
   5328 		/* falls through */
   5329 		default:
   5330 			break;
   5331 	}
   5332 
   5333 	IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
   5334 
   5335 	/* With RSS we use auto clear */
   5336 	if (adapter->msix_mem) {
   5337 		mask = IXGBE_EIMS_ENABLE_MASK;
   5338 		/* Don't autoclear Link */
   5339 		mask &= ~IXGBE_EIMS_OTHER;
   5340 		mask &= ~IXGBE_EIMS_LSC;
   5341 		IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask);
   5342 	}
   5343 
   5344 	/*
   5345 	** Now enable all queues, this is done separately to
   5346 	** allow for handling the extended (beyond 32) MSIX
   5347 	** vectors that can be used by 82599
   5348 	*/
   5349         for (int i = 0; i < adapter->num_queues; i++, que++)
   5350                 ixgbe_enable_queue(adapter, que->msix);
   5351 
   5352 	IXGBE_WRITE_FLUSH(hw);
   5353 
   5354 	return;
   5355 }
   5356 
   5357 static void
   5358 ixgbe_disable_intr(struct adapter *adapter)
   5359 {
   5360 	if (adapter->msix_mem)
   5361 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, 0);
   5362 	if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
   5363 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0);
   5364 	} else {
   5365 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000);
   5366 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0);
   5367 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0);
   5368 	}
   5369 	IXGBE_WRITE_FLUSH(&adapter->hw);
   5370 	return;
   5371 }
   5372 
   5373 u16
   5374 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
   5375 {
   5376 	switch (reg % 4) {
   5377 	case 0:
   5378 		return pci_conf_read(hw->back->pc, hw->back->tag, reg) &
   5379 		    __BITS(15, 0);
   5380 	case 2:
   5381 		return __SHIFTOUT(pci_conf_read(hw->back->pc, hw->back->tag,
   5382 		    reg - 2), __BITS(31, 16));
   5383 	default:
   5384 		panic("%s: invalid register (%" PRIx32, __func__, reg);
   5385 		break;
   5386 	}
   5387 }
   5388 
   5389 void
   5390 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
   5391 {
   5392 	pcireg_t old;
   5393 
   5394 	switch (reg % 4) {
   5395 	case 0:
   5396 		old = pci_conf_read(hw->back->pc, hw->back->tag, reg) &
   5397 		    __BITS(31, 16);
   5398 		pci_conf_write(hw->back->pc, hw->back->tag, reg, value | old);
   5399 		break;
   5400 	case 2:
   5401 		old = pci_conf_read(hw->back->pc, hw->back->tag, reg - 2) &
   5402 		    __BITS(15, 0);
   5403 		pci_conf_write(hw->back->pc, hw->back->tag, reg - 2,
   5404 		    __SHIFTIN(value, __BITS(31, 16)) | old);
   5405 		break;
   5406 	default:
   5407 		panic("%s: invalid register (%" PRIx32, __func__, reg);
   5408 		break;
   5409 	}
   5410 
   5411 	return;
   5412 }
   5413 
   5414 /*
   5415 ** Get the width and transaction speed of
   5416 ** the slot this adapter is plugged into.
   5417 */
   5418 static void
   5419 ixgbe_get_slot_info(struct ixgbe_hw *hw)
   5420 {
   5421 	device_t		dev = ((struct ixgbe_osdep *)hw->back)->dev;
   5422 	struct ixgbe_mac_info	*mac = &hw->mac;
   5423 	u16			link;
   5424 
   5425 	/* For most devices simply call the shared code routine */
   5426 	if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) {
   5427 		ixgbe_get_bus_info(hw);
   5428 		goto display;
   5429 	}
   5430 
   5431 	/*
   5432 	** For the Quad port adapter we need to parse back
   5433 	** up the PCI tree to find the speed of the expansion
   5434 	** slot into which this adapter is plugged. A bit more work.
   5435 	*/
   5436 	dev = device_parent(device_parent(dev));
   5437 #ifdef IXGBE_DEBUG
   5438 	device_printf(dev, "parent pcib = %x,%x,%x\n",
   5439 	    pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
   5440 #endif
   5441 	dev = device_parent(device_parent(dev));
   5442 #ifdef IXGBE_DEBUG
   5443 	device_printf(dev, "slot pcib = %x,%x,%x\n",
   5444 	    pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
   5445 #endif
   5446 	/* Now get the PCI Express Capabilities offset */
   5447 	/* ...and read the Link Status Register */
   5448 	link = IXGBE_READ_PCIE_WORD(hw, IXGBE_PCI_LINK_STATUS);
   5449 	switch (link & IXGBE_PCI_LINK_WIDTH) {
   5450 	case IXGBE_PCI_LINK_WIDTH_1:
   5451 		hw->bus.width = ixgbe_bus_width_pcie_x1;
   5452 		break;
   5453 	case IXGBE_PCI_LINK_WIDTH_2:
   5454 		hw->bus.width = ixgbe_bus_width_pcie_x2;
   5455 		break;
   5456 	case IXGBE_PCI_LINK_WIDTH_4:
   5457 		hw->bus.width = ixgbe_bus_width_pcie_x4;
   5458 		break;
   5459 	case IXGBE_PCI_LINK_WIDTH_8:
   5460 		hw->bus.width = ixgbe_bus_width_pcie_x8;
   5461 		break;
   5462 	default:
   5463 		hw->bus.width = ixgbe_bus_width_unknown;
   5464 		break;
   5465 	}
   5466 
   5467 	switch (link & IXGBE_PCI_LINK_SPEED) {
   5468 	case IXGBE_PCI_LINK_SPEED_2500:
   5469 		hw->bus.speed = ixgbe_bus_speed_2500;
   5470 		break;
   5471 	case IXGBE_PCI_LINK_SPEED_5000:
   5472 		hw->bus.speed = ixgbe_bus_speed_5000;
   5473 		break;
   5474 	case IXGBE_PCI_LINK_SPEED_8000:
   5475 		hw->bus.speed = ixgbe_bus_speed_8000;
   5476 		break;
   5477 	default:
   5478 		hw->bus.speed = ixgbe_bus_speed_unknown;
   5479 		break;
   5480 	}
   5481 
   5482 	mac->ops.set_lan_id(hw);
   5483 
   5484 display:
   5485 	device_printf(dev,"PCI Express Bus: Speed %s %s\n",
   5486 	    ((hw->bus.speed == ixgbe_bus_speed_8000) ? "8.0GT/s":
   5487 	    (hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0GT/s":
   5488 	    (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5GT/s":"Unknown"),
   5489 	    (hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" :
   5490 	    (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" :
   5491 	    (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" :
   5492 	    ("Unknown"));
   5493 
   5494 	if ((hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) &&
   5495 	    ((hw->bus.width <= ixgbe_bus_width_pcie_x4) &&
   5496 	    (hw->bus.speed == ixgbe_bus_speed_2500))) {
   5497 		device_printf(dev, "PCI-Express bandwidth available"
   5498 		    " for this card\n     is not sufficient for"
   5499 		    " optimal performance.\n");
   5500 		device_printf(dev, "For optimal performance a x8 "
   5501 		    "PCIE, or x4 PCIE Gen2 slot is required.\n");
   5502         }
   5503 	if ((hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP) &&
   5504 	    ((hw->bus.width <= ixgbe_bus_width_pcie_x8) &&
   5505 	    (hw->bus.speed < ixgbe_bus_speed_8000))) {
   5506 		device_printf(dev, "PCI-Express bandwidth available"
   5507 		    " for this card\n     is not sufficient for"
   5508 		    " optimal performance.\n");
   5509 		device_printf(dev, "For optimal performance a x8 "
   5510 		    "PCIE Gen3 slot is required.\n");
   5511         }
   5512 
   5513 	return;
   5514 }
   5515 
   5516 
   5517 /*
   5518 ** Setup the correct IVAR register for a particular MSIX interrupt
   5519 **   (yes this is all very magic and confusing :)
   5520 **  - entry is the register array entry
   5521 **  - vector is the MSIX vector for this queue
   5522 **  - type is RX/TX/MISC
   5523 */
   5524 static void
   5525 ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type)
   5526 {
   5527 	struct ixgbe_hw *hw = &adapter->hw;
   5528 	u32 ivar, index;
   5529 
   5530 	vector |= IXGBE_IVAR_ALLOC_VAL;
   5531 
   5532 	switch (hw->mac.type) {
   5533 
   5534 	case ixgbe_mac_82598EB:
   5535 		if (type == -1)
   5536 			entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
   5537 		else
   5538 			entry += (type * 64);
   5539 		index = (entry >> 2) & 0x1F;
   5540 		ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
   5541 		ivar &= ~(0xFF << (8 * (entry & 0x3)));
   5542 		ivar |= (vector << (8 * (entry & 0x3)));
   5543 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR(index), ivar);
   5544 		break;
   5545 
   5546 	case ixgbe_mac_82599EB:
   5547 	case ixgbe_mac_X540:
   5548 		if (type == -1) { /* MISC IVAR */
   5549 			index = (entry & 1) * 8;
   5550 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
   5551 			ivar &= ~(0xFF << index);
   5552 			ivar |= (vector << index);
   5553 			IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
   5554 		} else {	/* RX/TX IVARS */
   5555 			index = (16 * (entry & 1)) + (8 * type);
   5556 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
   5557 			ivar &= ~(0xFF << index);
   5558 			ivar |= (vector << index);
   5559 			IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
   5560 		}
   5561 
   5562 	default:
   5563 		break;
   5564 	}
   5565 }
   5566 
   5567 static void
   5568 ixgbe_configure_ivars(struct adapter *adapter)
   5569 {
   5570 	struct  ix_queue *que = adapter->queues;
   5571 	u32 newitr;
   5572 
   5573 	if (ixgbe_max_interrupt_rate > 0)
   5574 		newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8;
   5575 	else
   5576 		newitr = 0;
   5577 
   5578         for (int i = 0; i < adapter->num_queues; i++, que++) {
   5579 		/* First the RX queue entry */
   5580                 ixgbe_set_ivar(adapter, i, que->msix, 0);
   5581 		/* ... and the TX */
   5582 		ixgbe_set_ivar(adapter, i, que->msix, 1);
   5583 		/* Set an Initial EITR value */
   5584                 IXGBE_WRITE_REG(&adapter->hw,
   5585                     IXGBE_EITR(que->msix), newitr);
   5586 	}
   5587 
   5588 	/* For the Link interrupt */
   5589         ixgbe_set_ivar(adapter, 1, adapter->linkvec, -1);
   5590 }
   5591 
   5592 /*
   5593 ** ixgbe_sfp_probe - called in the local timer to
   5594 ** determine if a port had optics inserted.
   5595 */
   5596 static bool ixgbe_sfp_probe(struct adapter *adapter)
   5597 {
   5598 	struct ixgbe_hw	*hw = &adapter->hw;
   5599 	device_t	dev = adapter->dev;
   5600 	bool		result = FALSE;
   5601 
   5602 	if ((hw->phy.type == ixgbe_phy_nl) &&
   5603 	    (hw->phy.sfp_type == ixgbe_sfp_type_not_present)) {
   5604 		s32 ret = hw->phy.ops.identify_sfp(hw);
   5605 		if (ret)
   5606                         goto out;
   5607 		ret = hw->phy.ops.reset(hw);
   5608 		if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
   5609 			device_printf(dev,"Unsupported SFP+ module detected!");
   5610 			device_printf(dev, "Reload driver with supported module.\n");
   5611 			adapter->sfp_probe = FALSE;
   5612                         goto out;
   5613 		} else
   5614 			device_printf(dev,"SFP+ module detected!\n");
   5615 		/* We now have supported optics */
   5616 		adapter->sfp_probe = FALSE;
   5617 		/* Set the optics type so system reports correctly */
   5618 		ixgbe_setup_optics(adapter);
   5619 		result = TRUE;
   5620 	}
   5621 out:
   5622 	return (result);
   5623 }
   5624 
   5625 /*
   5626 ** Tasklet handler for MSIX Link interrupts
   5627 **  - do outside interrupt since it might sleep
   5628 */
   5629 static void
   5630 ixgbe_handle_link(void *context)
   5631 {
   5632 	struct adapter  *adapter = context;
   5633 
   5634 	if (ixgbe_check_link(&adapter->hw,
   5635 	    &adapter->link_speed, &adapter->link_up, 0) == 0)
   5636 	    ixgbe_update_link_status(adapter);
   5637 }
   5638 
   5639 /*
   5640 ** Tasklet for handling SFP module interrupts
   5641 */
   5642 static void
   5643 ixgbe_handle_mod(void *context)
   5644 {
   5645 	struct adapter  *adapter = context;
   5646 	struct ixgbe_hw *hw = &adapter->hw;
   5647 	device_t	dev = adapter->dev;
   5648 	u32 err;
   5649 
   5650 	err = hw->phy.ops.identify_sfp(hw);
   5651 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
   5652 		device_printf(dev,
   5653 		    "Unsupported SFP+ module type was detected.\n");
   5654 		return;
   5655 	}
   5656 	err = hw->mac.ops.setup_sfp(hw);
   5657 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
   5658 		device_printf(dev,
   5659 		    "Setup failure - unsupported SFP+ module type.\n");
   5660 		return;
   5661 	}
   5662 	softint_schedule(adapter->msf_si);
   5663 	return;
   5664 }
   5665 
   5666 
   5667 /*
   5668 ** Tasklet for handling MSF (multispeed fiber) interrupts
   5669 */
   5670 static void
   5671 ixgbe_handle_msf(void *context)
   5672 {
   5673 	struct adapter  *adapter = context;
   5674 	struct ixgbe_hw *hw = &adapter->hw;
   5675 	u32 autoneg;
   5676 	bool negotiate;
   5677 
   5678 	autoneg = hw->phy.autoneg_advertised;
   5679 	if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
   5680 		hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate);
   5681 	else
   5682 		negotiate = 0;
   5683 	if (hw->mac.ops.setup_link)
   5684 		hw->mac.ops.setup_link(hw, autoneg, TRUE);
   5685 	return;
   5686 }
   5687 
   5688 #ifdef IXGBE_FDIR
   5689 /*
   5690 ** Tasklet for reinitializing the Flow Director filter table
   5691 */
   5692 static void
   5693 ixgbe_reinit_fdir(void *context)
   5694 {
   5695 	struct adapter  *adapter = context;
   5696 	struct ifnet   *ifp = adapter->ifp;
   5697 
   5698 	if (adapter->fdir_reinit != 1) /* Shouldn't happen */
   5699 		return;
   5700 	ixgbe_reinit_fdir_tables_82599(&adapter->hw);
   5701 	adapter->fdir_reinit = 0;
   5702 	/* re-enable flow director interrupts */
   5703 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_FLOW_DIR);
   5704 	/* Restart the interface */
   5705 	ifp->if_flags |= IFF_RUNNING;
   5706 	return;
   5707 }
   5708 #endif
   5709 
   5710 /**********************************************************************
   5711  *
   5712  *  Update the board statistics counters.
   5713  *
   5714  **********************************************************************/
   5715 static void
   5716 ixgbe_update_stats_counters(struct adapter *adapter)
   5717 {
   5718 	struct ifnet   *ifp = adapter->ifp;
   5719 	struct ixgbe_hw *hw = &adapter->hw;
   5720 	u32  missed_rx = 0, bprc, lxon, lxoff, total;
   5721 	u64  total_missed_rx = 0;
   5722 	uint64_t crcerrs, rlec;
   5723 
   5724 	crcerrs = IXGBE_READ_REG(hw, IXGBE_CRCERRS);
   5725 	adapter->stats.crcerrs.ev_count += crcerrs;
   5726 	adapter->stats.illerrc.ev_count += IXGBE_READ_REG(hw, IXGBE_ILLERRC);
   5727 	adapter->stats.errbc.ev_count += IXGBE_READ_REG(hw, IXGBE_ERRBC);
   5728 	adapter->stats.mspdc.ev_count += IXGBE_READ_REG(hw, IXGBE_MSPDC);
   5729 
   5730 	/*
   5731 	** Note: these are for the 8 possible traffic classes,
   5732 	**	 which in current implementation is unused,
   5733 	**	 therefore only 0 should read real data.
   5734 	*/
   5735 	for (int i = 0; i < __arraycount(adapter->stats.mpc); i++) {
   5736 		int j = i % adapter->num_queues;
   5737 		u32 mp;
   5738 		mp = IXGBE_READ_REG(hw, IXGBE_MPC(i));
   5739 		/* missed_rx tallies misses for the gprc workaround */
   5740 		missed_rx += mp;
   5741 		/* global total per queue */
   5742         	adapter->stats.mpc[j].ev_count += mp;
   5743 		/* Running comprehensive total for stats display */
   5744 		total_missed_rx += mp;
   5745 		if (hw->mac.type == ixgbe_mac_82598EB) {
   5746 			adapter->stats.rnbc[j] +=
   5747 			    IXGBE_READ_REG(hw, IXGBE_RNBC(i));
   5748 			adapter->stats.qbtc[j].ev_count +=
   5749 			    IXGBE_READ_REG(hw, IXGBE_QBTC(i));
   5750 			adapter->stats.qbrc[j].ev_count +=
   5751 			    IXGBE_READ_REG(hw, IXGBE_QBRC(i));
   5752 			adapter->stats.pxonrxc[j].ev_count +=
   5753 			    IXGBE_READ_REG(hw, IXGBE_PXONRXC(i));
   5754 		} else {
   5755 			adapter->stats.pxonrxc[j].ev_count +=
   5756 			    IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i));
   5757 		}
   5758 		adapter->stats.pxontxc[j].ev_count +=
   5759 		    IXGBE_READ_REG(hw, IXGBE_PXONTXC(i));
   5760 		adapter->stats.pxofftxc[j].ev_count +=
   5761 		    IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i));
   5762 		adapter->stats.pxoffrxc[j].ev_count +=
   5763 		    IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
   5764 		adapter->stats.pxon2offc[j].ev_count +=
   5765 		    IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i));
   5766 	}
   5767 	for (int i = 0; i < __arraycount(adapter->stats.qprc); i++) {
   5768 		int j = i % adapter->num_queues;
   5769 		adapter->stats.qprc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
   5770 		adapter->stats.qptc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
   5771 		adapter->stats.qprdc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
   5772 	}
   5773 	adapter->stats.mlfc.ev_count += IXGBE_READ_REG(hw, IXGBE_MLFC);
   5774 	adapter->stats.mrfc.ev_count += IXGBE_READ_REG(hw, IXGBE_MRFC);
   5775 	rlec = IXGBE_READ_REG(hw, IXGBE_RLEC);
   5776 	adapter->stats.rlec.ev_count += rlec;
   5777 
   5778 	/* Hardware workaround, gprc counts missed packets */
   5779 	adapter->stats.gprc.ev_count += IXGBE_READ_REG(hw, IXGBE_GPRC) - missed_rx;
   5780 
   5781 	lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
   5782 	adapter->stats.lxontxc.ev_count += lxon;
   5783 	lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
   5784 	adapter->stats.lxofftxc.ev_count += lxoff;
   5785 	total = lxon + lxoff;
   5786 
   5787 	if (hw->mac.type != ixgbe_mac_82598EB) {
   5788 		adapter->stats.gorc.ev_count += IXGBE_READ_REG(hw, IXGBE_GORCL) +
   5789 		    ((u64)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32);
   5790 		adapter->stats.gotc.ev_count += IXGBE_READ_REG(hw, IXGBE_GOTCL) +
   5791 		    ((u64)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32) - total * ETHER_MIN_LEN;
   5792 		adapter->stats.tor.ev_count += IXGBE_READ_REG(hw, IXGBE_TORL) +
   5793 		    ((u64)IXGBE_READ_REG(hw, IXGBE_TORH) << 32);
   5794 		adapter->stats.lxonrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
   5795 		adapter->stats.lxoffrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
   5796 	} else {
   5797 		adapter->stats.lxonrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
   5798 		adapter->stats.lxoffrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
   5799 		/* 82598 only has a counter in the high register */
   5800 		adapter->stats.gorc.ev_count += IXGBE_READ_REG(hw, IXGBE_GORCH);
   5801 		adapter->stats.gotc.ev_count += IXGBE_READ_REG(hw, IXGBE_GOTCH) - total * ETHER_MIN_LEN;
   5802 		adapter->stats.tor.ev_count += IXGBE_READ_REG(hw, IXGBE_TORH);
   5803 	}
   5804 
   5805 	/*
   5806 	 * Workaround: mprc hardware is incorrectly counting
   5807 	 * broadcasts, so for now we subtract those.
   5808 	 */
   5809 	bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
   5810 	adapter->stats.bprc.ev_count += bprc;
   5811 	adapter->stats.mprc.ev_count += IXGBE_READ_REG(hw, IXGBE_MPRC) - ((hw->mac.type == ixgbe_mac_82598EB) ? bprc : 0);
   5812 
   5813 	adapter->stats.prc64.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC64);
   5814 	adapter->stats.prc127.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC127);
   5815 	adapter->stats.prc255.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC255);
   5816 	adapter->stats.prc511.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC511);
   5817 	adapter->stats.prc1023.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC1023);
   5818 	adapter->stats.prc1522.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC1522);
   5819 
   5820 	adapter->stats.gptc.ev_count += IXGBE_READ_REG(hw, IXGBE_GPTC) - total;
   5821 	adapter->stats.mptc.ev_count += IXGBE_READ_REG(hw, IXGBE_MPTC) - total;
   5822 	adapter->stats.ptc64.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC64) - total;
   5823 
   5824 	adapter->stats.ruc.ev_count += IXGBE_READ_REG(hw, IXGBE_RUC);
   5825 	adapter->stats.rfc.ev_count += IXGBE_READ_REG(hw, IXGBE_RFC);
   5826 	adapter->stats.roc.ev_count += IXGBE_READ_REG(hw, IXGBE_ROC);
   5827 	adapter->stats.rjc.ev_count += IXGBE_READ_REG(hw, IXGBE_RJC);
   5828 	adapter->stats.mngprc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPRC);
   5829 	adapter->stats.mngpdc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPDC);
   5830 	adapter->stats.mngptc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPTC);
   5831 	adapter->stats.tpr.ev_count += IXGBE_READ_REG(hw, IXGBE_TPR);
   5832 	adapter->stats.tpt.ev_count += IXGBE_READ_REG(hw, IXGBE_TPT);
   5833 	adapter->stats.ptc127.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC127);
   5834 	adapter->stats.ptc255.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC255);
   5835 	adapter->stats.ptc511.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC511);
   5836 	adapter->stats.ptc1023.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC1023);
   5837 	adapter->stats.ptc1522.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC1522);
   5838 	adapter->stats.bptc.ev_count += IXGBE_READ_REG(hw, IXGBE_BPTC);
   5839 	adapter->stats.xec.ev_count += IXGBE_READ_REG(hw, IXGBE_XEC);
   5840 	adapter->stats.fccrc.ev_count += IXGBE_READ_REG(hw, IXGBE_FCCRC);
   5841 	adapter->stats.fclast.ev_count += IXGBE_READ_REG(hw, IXGBE_FCLAST);
   5842 
   5843 	/* Only read FCOE on 82599 */
   5844 	if (hw->mac.type != ixgbe_mac_82598EB) {
   5845 		adapter->stats.fcoerpdc.ev_count +=
   5846 		    IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
   5847 		adapter->stats.fcoeprc.ev_count +=
   5848 		    IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
   5849 		adapter->stats.fcoeptc.ev_count +=
   5850 		    IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
   5851 		adapter->stats.fcoedwrc.ev_count +=
   5852 		    IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
   5853 		adapter->stats.fcoedwtc.ev_count +=
   5854 		    IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
   5855 	}
   5856 
   5857 	/* Fill out the OS statistics structure */
   5858 	/*
   5859 	 * NetBSD: Don't override if_{i|o}{packets|bytes|mcasts} with
   5860 	 * adapter->stats counters. It's required to make ifconfig -z
   5861 	 * (SOICZIFDATA) work.
   5862 	 */
   5863 	ifp->if_collisions = 0;
   5864 
   5865 	/* Rx Errors */
   5866 	ifp->if_iqdrops += total_missed_rx;
   5867 	ifp->if_ierrors += crcerrs + rlec;
   5868 }
   5869 
   5870 /** ixgbe_sysctl_tdh_handler - Handler function
   5871  *  Retrieves the TDH value from the hardware
   5872  */
   5873 static int
   5874 ixgbe_sysctl_tdh_handler(SYSCTLFN_ARGS)
   5875 {
   5876 	struct sysctlnode node;
   5877 	uint32_t val;
   5878 	struct tx_ring *txr;
   5879 
   5880 	node = *rnode;
   5881 	txr = (struct tx_ring *)node.sysctl_data;
   5882 	if (txr == NULL)
   5883 		return 0;
   5884 	val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDH(txr->me));
   5885 	node.sysctl_data = &val;
   5886 	return sysctl_lookup(SYSCTLFN_CALL(&node));
   5887 }
   5888 
   5889 /** ixgbe_sysctl_tdt_handler - Handler function
   5890  *  Retrieves the TDT value from the hardware
   5891  */
   5892 static int
   5893 ixgbe_sysctl_tdt_handler(SYSCTLFN_ARGS)
   5894 {
   5895 	struct sysctlnode node;
   5896 	uint32_t val;
   5897 	struct tx_ring *txr;
   5898 
   5899 	node = *rnode;
   5900 	txr = (struct tx_ring *)node.sysctl_data;
   5901 	if (txr == NULL)
   5902 		return 0;
   5903 	val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDT(txr->me));
   5904 	node.sysctl_data = &val;
   5905 	return sysctl_lookup(SYSCTLFN_CALL(&node));
   5906 }
   5907 
   5908 /** ixgbe_sysctl_rdh_handler - Handler function
   5909  *  Retrieves the RDH value from the hardware
   5910  */
   5911 static int
   5912 ixgbe_sysctl_rdh_handler(SYSCTLFN_ARGS)
   5913 {
   5914 	struct sysctlnode node;
   5915 	uint32_t val;
   5916 	struct rx_ring *rxr;
   5917 
   5918 	node = *rnode;
   5919 	rxr = (struct rx_ring *)node.sysctl_data;
   5920 	if (rxr == NULL)
   5921 		return 0;
   5922 	val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDH(rxr->me));
   5923 	node.sysctl_data = &val;
   5924 	return sysctl_lookup(SYSCTLFN_CALL(&node));
   5925 }
   5926 
   5927 /** ixgbe_sysctl_rdt_handler - Handler function
   5928  *  Retrieves the RDT value from the hardware
   5929  */
   5930 static int
   5931 ixgbe_sysctl_rdt_handler(SYSCTLFN_ARGS)
   5932 {
   5933 	struct sysctlnode node;
   5934 	uint32_t val;
   5935 	struct rx_ring *rxr;
   5936 
   5937 	node = *rnode;
   5938 	rxr = (struct rx_ring *)node.sysctl_data;
   5939 	if (rxr == NULL)
   5940 		return 0;
   5941 	val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDT(rxr->me));
   5942 	node.sysctl_data = &val;
   5943 	return sysctl_lookup(SYSCTLFN_CALL(&node));
   5944 }
   5945 
   5946 static int
   5947 ixgbe_sysctl_interrupt_rate_handler(SYSCTLFN_ARGS)
   5948 {
   5949 	int error;
   5950 	struct sysctlnode node;
   5951 	struct ix_queue *que;
   5952 	uint32_t reg, usec, rate;
   5953 
   5954 	node = *rnode;
   5955 	que = (struct ix_queue *)node.sysctl_data;
   5956 	if (que == NULL)
   5957 		return 0;
   5958 	reg = IXGBE_READ_REG(&que->adapter->hw, IXGBE_EITR(que->msix));
   5959 	usec = ((reg & 0x0FF8) >> 3);
   5960 	if (usec > 0)
   5961 		rate = 500000 / usec;
   5962 	else
   5963 		rate = 0;
   5964 	node.sysctl_data = &rate;
   5965 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   5966 	if (error)
   5967 		return error;
   5968 	reg &= ~0xfff; /* default, no limitation */
   5969 	ixgbe_max_interrupt_rate = 0;
   5970 	if (rate > 0 && rate < 500000) {
   5971 		if (rate < 1000)
   5972 			rate = 1000;
   5973 		ixgbe_max_interrupt_rate = rate;
   5974 		reg |= ((4000000/rate) & 0xff8 );
   5975 	}
   5976 	IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg);
   5977 	return 0;
   5978 }
   5979 
   5980 const struct sysctlnode *
   5981 ixgbe_sysctl_instance(struct adapter *adapter)
   5982 {
   5983 	const char *dvname;
   5984 	struct sysctllog **log;
   5985 	int rc;
   5986 	const struct sysctlnode *rnode;
   5987 
   5988 	log = &adapter->sysctllog;
   5989 	dvname = device_xname(adapter->dev);
   5990 
   5991 	if ((rc = sysctl_createv(log, 0, NULL, &rnode,
   5992 	    0, CTLTYPE_NODE, dvname,
   5993 	    SYSCTL_DESCR("ixgbe information and settings"),
   5994 	    NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL)) != 0)
   5995 		goto err;
   5996 
   5997 	return rnode;
   5998 err:
   5999 	printf("%s: sysctl_createv failed, rc = %d\n", __func__, rc);
   6000 	return NULL;
   6001 }
   6002 
   6003 /*
   6004  * Add sysctl variables, one per statistic, to the system.
   6005  */
   6006 static void
   6007 ixgbe_add_hw_stats(struct adapter *adapter)
   6008 {
   6009 	device_t dev = adapter->dev;
   6010 	const struct sysctlnode *rnode, *cnode;
   6011 	struct sysctllog **log = &adapter->sysctllog;
   6012 	struct tx_ring *txr = adapter->tx_rings;
   6013 	struct rx_ring *rxr = adapter->rx_rings;
   6014 	struct ixgbe_hw_stats *stats = &adapter->stats;
   6015 
   6016 	/* Driver Statistics */
   6017 #if 0
   6018 	/* These counters are not updated by the software */
   6019 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
   6020 			CTLFLAG_RD, &adapter->dropped_pkts,
   6021 			"Driver dropped packets");
   6022 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_header_failed",
   6023 			CTLFLAG_RD, &adapter->mbuf_header_failed,
   6024 			"???");
   6025 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_packet_failed",
   6026 			CTLFLAG_RD, &adapter->mbuf_packet_failed,
   6027 			"???");
   6028 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "no_tx_map_avail",
   6029 			CTLFLAG_RD, &adapter->no_tx_map_avail,
   6030 			"???");
   6031 #endif
   6032 	evcnt_attach_dynamic(&adapter->handleq, EVCNT_TYPE_MISC,
   6033 	    NULL, device_xname(dev), "Handled queue in softint");
   6034 	evcnt_attach_dynamic(&adapter->req, EVCNT_TYPE_MISC,
   6035 	    NULL, device_xname(dev), "Requeued in softint");
   6036 	evcnt_attach_dynamic(&adapter->morerx, EVCNT_TYPE_MISC,
   6037 	    NULL, device_xname(dev), "Interrupt handler more rx");
   6038 	evcnt_attach_dynamic(&adapter->moretx, EVCNT_TYPE_MISC,
   6039 	    NULL, device_xname(dev), "Interrupt handler more tx");
   6040 	evcnt_attach_dynamic(&adapter->txloops, EVCNT_TYPE_MISC,
   6041 	    NULL, device_xname(dev), "Interrupt handler tx loops");
   6042 	evcnt_attach_dynamic(&adapter->efbig_tx_dma_setup, EVCNT_TYPE_MISC,
   6043 	    NULL, device_xname(dev), "Driver tx dma soft fail EFBIG");
   6044 	evcnt_attach_dynamic(&adapter->m_defrag_failed, EVCNT_TYPE_MISC,
   6045 	    NULL, device_xname(dev), "m_defrag() failed");
   6046 	evcnt_attach_dynamic(&adapter->efbig2_tx_dma_setup, EVCNT_TYPE_MISC,
   6047 	    NULL, device_xname(dev), "Driver tx dma hard fail EFBIG");
   6048 	evcnt_attach_dynamic(&adapter->einval_tx_dma_setup, EVCNT_TYPE_MISC,
   6049 	    NULL, device_xname(dev), "Driver tx dma hard fail EINVAL");
   6050 	evcnt_attach_dynamic(&adapter->other_tx_dma_setup, EVCNT_TYPE_MISC,
   6051 	    NULL, device_xname(dev), "Driver tx dma hard fail other");
   6052 	evcnt_attach_dynamic(&adapter->eagain_tx_dma_setup, EVCNT_TYPE_MISC,
   6053 	    NULL, device_xname(dev), "Driver tx dma soft fail EAGAIN");
   6054 	evcnt_attach_dynamic(&adapter->enomem_tx_dma_setup, EVCNT_TYPE_MISC,
   6055 	    NULL, device_xname(dev), "Driver tx dma soft fail ENOMEM");
   6056 	evcnt_attach_dynamic(&adapter->watchdog_events, EVCNT_TYPE_MISC,
   6057 	    NULL, device_xname(dev), "Watchdog timeouts");
   6058 	evcnt_attach_dynamic(&adapter->tso_err, EVCNT_TYPE_MISC,
   6059 	    NULL, device_xname(dev), "TSO errors");
   6060 	evcnt_attach_dynamic(&adapter->link_irq, EVCNT_TYPE_MISC,
   6061 	    NULL, device_xname(dev), "Link MSIX IRQ Handled");
   6062 
   6063 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
   6064 		snprintf(adapter->queues[i].evnamebuf,
   6065 		    sizeof(adapter->queues[i].evnamebuf), "%s queue%d",
   6066 		    device_xname(dev), i);
   6067 		snprintf(adapter->queues[i].namebuf,
   6068 		    sizeof(adapter->queues[i].namebuf), "queue%d", i);
   6069 
   6070 		if ((rnode = ixgbe_sysctl_instance(adapter)) == NULL) {
   6071 			aprint_error_dev(dev, "could not create sysctl root\n");
   6072 			break;
   6073 		}
   6074 
   6075 		if (sysctl_createv(log, 0, &rnode, &rnode,
   6076 		    0, CTLTYPE_NODE,
   6077 		    adapter->queues[i].namebuf, SYSCTL_DESCR("Queue Name"),
   6078 		    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL) != 0)
   6079 			break;
   6080 
   6081 		if (sysctl_createv(log, 0, &rnode, &cnode,
   6082 		    CTLFLAG_READWRITE, CTLTYPE_INT,
   6083 		    "interrupt_rate", SYSCTL_DESCR("Interrupt Rate"),
   6084 		    ixgbe_sysctl_interrupt_rate_handler, 0,
   6085 		    (void *)&adapter->queues[i], 0, CTL_CREATE, CTL_EOL) != 0)
   6086 			break;
   6087 
   6088 		if (sysctl_createv(log, 0, &rnode, &cnode,
   6089 		    CTLFLAG_READONLY, CTLTYPE_QUAD,
   6090 		    "irqs", SYSCTL_DESCR("irqs on this queue"),
   6091 			NULL, 0, &(adapter->queues[i].irqs),
   6092 		    0, CTL_CREATE, CTL_EOL) != 0)
   6093 			break;
   6094 
   6095 		if (sysctl_createv(log, 0, &rnode, &cnode,
   6096 		    CTLFLAG_READONLY, CTLTYPE_INT,
   6097 		    "txd_head", SYSCTL_DESCR("Transmit Descriptor Head"),
   6098 		    ixgbe_sysctl_tdh_handler, 0, (void *)txr,
   6099 		    0, CTL_CREATE, CTL_EOL) != 0)
   6100 			break;
   6101 
   6102 		if (sysctl_createv(log, 0, &rnode, &cnode,
   6103 		    CTLFLAG_READONLY, CTLTYPE_INT,
   6104 		    "txd_tail", SYSCTL_DESCR("Transmit Descriptor Tail"),
   6105 		    ixgbe_sysctl_tdt_handler, 0, (void *)txr,
   6106 		    0, CTL_CREATE, CTL_EOL) != 0)
   6107 			break;
   6108 
   6109 		evcnt_attach_dynamic(&txr->tso_tx, EVCNT_TYPE_MISC,
   6110 		    NULL, device_xname(dev), "TSO");
   6111 		evcnt_attach_dynamic(&txr->no_desc_avail, EVCNT_TYPE_MISC,
   6112 		    NULL, adapter->queues[i].evnamebuf,
   6113 		    "Queue No Descriptor Available");
   6114 		evcnt_attach_dynamic(&txr->total_packets, EVCNT_TYPE_MISC,
   6115 		    NULL, adapter->queues[i].evnamebuf,
   6116 		    "Queue Packets Transmitted");
   6117 
   6118 #ifdef LRO
   6119 		struct lro_ctrl *lro = &rxr->lro;
   6120 #endif /* LRO */
   6121 
   6122 		if (sysctl_createv(log, 0, &rnode, &cnode,
   6123 		    CTLFLAG_READONLY,
   6124 		    CTLTYPE_INT,
   6125 		    "rxd_head", SYSCTL_DESCR("Receive Descriptor Head"),
   6126 		    ixgbe_sysctl_rdh_handler, 0, (void *)rxr, 0,
   6127 		    CTL_CREATE, CTL_EOL) != 0)
   6128 			break;
   6129 
   6130 		if (sysctl_createv(log, 0, &rnode, &cnode,
   6131 		    CTLFLAG_READONLY,
   6132 		    CTLTYPE_INT,
   6133 		    "rxd_tail", SYSCTL_DESCR("Receive Descriptor Tail"),
   6134 		    ixgbe_sysctl_rdt_handler, 0, (void *)rxr, 0,
   6135 		    CTL_CREATE, CTL_EOL) != 0)
   6136 			break;
   6137 
   6138 		if (i < __arraycount(adapter->stats.mpc)) {
   6139 			evcnt_attach_dynamic(&adapter->stats.mpc[i],
   6140 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6141 			    "Missed Packet Count");
   6142 		}
   6143 		if (i < __arraycount(adapter->stats.pxontxc)) {
   6144 			evcnt_attach_dynamic(&adapter->stats.pxontxc[i],
   6145 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6146 			    "pxontxc");
   6147 			evcnt_attach_dynamic(&adapter->stats.pxonrxc[i],
   6148 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6149 			    "pxonrxc");
   6150 			evcnt_attach_dynamic(&adapter->stats.pxofftxc[i],
   6151 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6152 			    "pxofftxc");
   6153 			evcnt_attach_dynamic(&adapter->stats.pxoffrxc[i],
   6154 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6155 			    "pxoffrxc");
   6156 			evcnt_attach_dynamic(&adapter->stats.pxon2offc[i],
   6157 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6158 			    "pxon2offc");
   6159 		}
   6160 		if (i < __arraycount(adapter->stats.qprc)) {
   6161 			evcnt_attach_dynamic(&adapter->stats.qprc[i],
   6162 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6163 			    "qprc");
   6164 			evcnt_attach_dynamic(&adapter->stats.qptc[i],
   6165 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6166 			    "qptc");
   6167 			evcnt_attach_dynamic(&adapter->stats.qbrc[i],
   6168 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6169 			    "qbrc");
   6170 			evcnt_attach_dynamic(&adapter->stats.qbtc[i],
   6171 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6172 			    "qbtc");
   6173 			evcnt_attach_dynamic(&adapter->stats.qprdc[i],
   6174 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6175 			    "qprdc");
   6176 		}
   6177 
   6178 		evcnt_attach_dynamic(&rxr->rx_packets, EVCNT_TYPE_MISC,
   6179 		    NULL, adapter->queues[i].evnamebuf, "Queue Packets Received");
   6180 		evcnt_attach_dynamic(&rxr->rx_bytes, EVCNT_TYPE_MISC,
   6181 		    NULL, adapter->queues[i].evnamebuf, "Queue Bytes Received");
   6182 		evcnt_attach_dynamic(&rxr->rx_copies, EVCNT_TYPE_MISC,
   6183 		    NULL, adapter->queues[i].evnamebuf, "Copied RX Frames");
   6184 		evcnt_attach_dynamic(&rxr->no_jmbuf, EVCNT_TYPE_MISC,
   6185 		    NULL, adapter->queues[i].evnamebuf, "Rx no jumbo mbuf");
   6186 		evcnt_attach_dynamic(&rxr->rx_discarded, EVCNT_TYPE_MISC,
   6187 		    NULL, adapter->queues[i].evnamebuf, "Rx discarded");
   6188 		evcnt_attach_dynamic(&rxr->rx_irq, EVCNT_TYPE_MISC,
   6189 		    NULL, adapter->queues[i].evnamebuf, "Rx interrupts");
   6190 #ifdef LRO
   6191 		SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_queued",
   6192 				CTLFLAG_RD, &lro->lro_queued, 0,
   6193 				"LRO Queued");
   6194 		SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_flushed",
   6195 				CTLFLAG_RD, &lro->lro_flushed, 0,
   6196 				"LRO Flushed");
   6197 #endif /* LRO */
   6198 	}
   6199 
   6200 	/* MAC stats get the own sub node */
   6201 
   6202 
   6203 	snprintf(stats->namebuf,
   6204 	    sizeof(stats->namebuf), "%s MAC Statistics", device_xname(dev));
   6205 
   6206 	evcnt_attach_dynamic(&stats->ipcs, EVCNT_TYPE_MISC, NULL,
   6207 	    stats->namebuf, "rx csum offload - IP");
   6208 	evcnt_attach_dynamic(&stats->l4cs, EVCNT_TYPE_MISC, NULL,
   6209 	    stats->namebuf, "rx csum offload - L4");
   6210 	evcnt_attach_dynamic(&stats->ipcs_bad, EVCNT_TYPE_MISC, NULL,
   6211 	    stats->namebuf, "rx csum offload - IP bad");
   6212 	evcnt_attach_dynamic(&stats->l4cs_bad, EVCNT_TYPE_MISC, NULL,
   6213 	    stats->namebuf, "rx csum offload - L4 bad");
   6214 	evcnt_attach_dynamic(&stats->intzero, EVCNT_TYPE_MISC, NULL,
   6215 	    stats->namebuf, "Interrupt conditions zero");
   6216 	evcnt_attach_dynamic(&stats->legint, EVCNT_TYPE_MISC, NULL,
   6217 	    stats->namebuf, "Legacy interrupts");
   6218 	evcnt_attach_dynamic(&stats->crcerrs, EVCNT_TYPE_MISC, NULL,
   6219 	    stats->namebuf, "CRC Errors");
   6220 	evcnt_attach_dynamic(&stats->illerrc, EVCNT_TYPE_MISC, NULL,
   6221 	    stats->namebuf, "Illegal Byte Errors");
   6222 	evcnt_attach_dynamic(&stats->errbc, EVCNT_TYPE_MISC, NULL,
   6223 	    stats->namebuf, "Byte Errors");
   6224 	evcnt_attach_dynamic(&stats->mspdc, EVCNT_TYPE_MISC, NULL,
   6225 	    stats->namebuf, "MAC Short Packets Discarded");
   6226 	evcnt_attach_dynamic(&stats->mlfc, EVCNT_TYPE_MISC, NULL,
   6227 	    stats->namebuf, "MAC Local Faults");
   6228 	evcnt_attach_dynamic(&stats->mrfc, EVCNT_TYPE_MISC, NULL,
   6229 	    stats->namebuf, "MAC Remote Faults");
   6230 	evcnt_attach_dynamic(&stats->rlec, EVCNT_TYPE_MISC, NULL,
   6231 	    stats->namebuf, "Receive Length Errors");
   6232 	evcnt_attach_dynamic(&stats->lxontxc, EVCNT_TYPE_MISC, NULL,
   6233 	    stats->namebuf, "Link XON Transmitted");
   6234 	evcnt_attach_dynamic(&stats->lxonrxc, EVCNT_TYPE_MISC, NULL,
   6235 	    stats->namebuf, "Link XON Received");
   6236 	evcnt_attach_dynamic(&stats->lxofftxc, EVCNT_TYPE_MISC, NULL,
   6237 	    stats->namebuf, "Link XOFF Transmitted");
   6238 	evcnt_attach_dynamic(&stats->lxoffrxc, EVCNT_TYPE_MISC, NULL,
   6239 	    stats->namebuf, "Link XOFF Received");
   6240 
   6241 	/* Packet Reception Stats */
   6242 	evcnt_attach_dynamic(&stats->tor, EVCNT_TYPE_MISC, NULL,
   6243 	    stats->namebuf, "Total Octets Received");
   6244 	evcnt_attach_dynamic(&stats->gorc, EVCNT_TYPE_MISC, NULL,
   6245 	    stats->namebuf, "Good Octets Received");
   6246 	evcnt_attach_dynamic(&stats->tpr, EVCNT_TYPE_MISC, NULL,
   6247 	    stats->namebuf, "Total Packets Received");
   6248 	evcnt_attach_dynamic(&stats->gprc, EVCNT_TYPE_MISC, NULL,
   6249 	    stats->namebuf, "Good Packets Received");
   6250 	evcnt_attach_dynamic(&stats->mprc, EVCNT_TYPE_MISC, NULL,
   6251 	    stats->namebuf, "Multicast Packets Received");
   6252 	evcnt_attach_dynamic(&stats->bprc, EVCNT_TYPE_MISC, NULL,
   6253 	    stats->namebuf, "Broadcast Packets Received");
   6254 	evcnt_attach_dynamic(&stats->prc64, EVCNT_TYPE_MISC, NULL,
   6255 	    stats->namebuf, "64 byte frames received ");
   6256 	evcnt_attach_dynamic(&stats->prc127, EVCNT_TYPE_MISC, NULL,
   6257 	    stats->namebuf, "65-127 byte frames received");
   6258 	evcnt_attach_dynamic(&stats->prc255, EVCNT_TYPE_MISC, NULL,
   6259 	    stats->namebuf, "128-255 byte frames received");
   6260 	evcnt_attach_dynamic(&stats->prc511, EVCNT_TYPE_MISC, NULL,
   6261 	    stats->namebuf, "256-511 byte frames received");
   6262 	evcnt_attach_dynamic(&stats->prc1023, EVCNT_TYPE_MISC, NULL,
   6263 	    stats->namebuf, "512-1023 byte frames received");
   6264 	evcnt_attach_dynamic(&stats->prc1522, EVCNT_TYPE_MISC, NULL,
   6265 	    stats->namebuf, "1023-1522 byte frames received");
   6266 	evcnt_attach_dynamic(&stats->ruc, EVCNT_TYPE_MISC, NULL,
   6267 	    stats->namebuf, "Receive Undersized");
   6268 	evcnt_attach_dynamic(&stats->rfc, EVCNT_TYPE_MISC, NULL,
   6269 	    stats->namebuf, "Fragmented Packets Received ");
   6270 	evcnt_attach_dynamic(&stats->roc, EVCNT_TYPE_MISC, NULL,
   6271 	    stats->namebuf, "Oversized Packets Received");
   6272 	evcnt_attach_dynamic(&stats->rjc, EVCNT_TYPE_MISC, NULL,
   6273 	    stats->namebuf, "Received Jabber");
   6274 	evcnt_attach_dynamic(&stats->mngprc, EVCNT_TYPE_MISC, NULL,
   6275 	    stats->namebuf, "Management Packets Received");
   6276 	evcnt_attach_dynamic(&stats->xec, EVCNT_TYPE_MISC, NULL,
   6277 	    stats->namebuf, "Checksum Errors");
   6278 
   6279 	/* Packet Transmission Stats */
   6280 	evcnt_attach_dynamic(&stats->gotc, EVCNT_TYPE_MISC, NULL,
   6281 	    stats->namebuf, "Good Octets Transmitted");
   6282 	evcnt_attach_dynamic(&stats->tpt, EVCNT_TYPE_MISC, NULL,
   6283 	    stats->namebuf, "Total Packets Transmitted");
   6284 	evcnt_attach_dynamic(&stats->gptc, EVCNT_TYPE_MISC, NULL,
   6285 	    stats->namebuf, "Good Packets Transmitted");
   6286 	evcnt_attach_dynamic(&stats->bptc, EVCNT_TYPE_MISC, NULL,
   6287 	    stats->namebuf, "Broadcast Packets Transmitted");
   6288 	evcnt_attach_dynamic(&stats->mptc, EVCNT_TYPE_MISC, NULL,
   6289 	    stats->namebuf, "Multicast Packets Transmitted");
   6290 	evcnt_attach_dynamic(&stats->mngptc, EVCNT_TYPE_MISC, NULL,
   6291 	    stats->namebuf, "Management Packets Transmitted");
   6292 	evcnt_attach_dynamic(&stats->ptc64, EVCNT_TYPE_MISC, NULL,
   6293 	    stats->namebuf, "64 byte frames transmitted ");
   6294 	evcnt_attach_dynamic(&stats->ptc127, EVCNT_TYPE_MISC, NULL,
   6295 	    stats->namebuf, "65-127 byte frames transmitted");
   6296 	evcnt_attach_dynamic(&stats->ptc255, EVCNT_TYPE_MISC, NULL,
   6297 	    stats->namebuf, "128-255 byte frames transmitted");
   6298 	evcnt_attach_dynamic(&stats->ptc511, EVCNT_TYPE_MISC, NULL,
   6299 	    stats->namebuf, "256-511 byte frames transmitted");
   6300 	evcnt_attach_dynamic(&stats->ptc1023, EVCNT_TYPE_MISC, NULL,
   6301 	    stats->namebuf, "512-1023 byte frames transmitted");
   6302 	evcnt_attach_dynamic(&stats->ptc1522, EVCNT_TYPE_MISC, NULL,
   6303 	    stats->namebuf, "1024-1522 byte frames transmitted");
   6304 }
   6305 
   6306 /*
   6307 ** Set flow control using sysctl:
   6308 ** Flow control values:
   6309 ** 	0 - off
   6310 **	1 - rx pause
   6311 **	2 - tx pause
   6312 **	3 - full
   6313 */
   6314 static int
   6315 ixgbe_set_flowcntl(SYSCTLFN_ARGS)
   6316 {
   6317 	struct sysctlnode node;
   6318 	int error, last;
   6319 	struct adapter *adapter;
   6320 
   6321 	node = *rnode;
   6322 	adapter = (struct adapter *)node.sysctl_data;
   6323 	node.sysctl_data = &adapter->fc;
   6324 	last = adapter->fc;
   6325 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   6326 	if (error != 0 || newp == NULL)
   6327 		return error;
   6328 
   6329 	/* Don't bother if it's not changed */
   6330 	if (adapter->fc == last)
   6331 		return (0);
   6332 
   6333 	switch (adapter->fc) {
   6334 		case ixgbe_fc_rx_pause:
   6335 		case ixgbe_fc_tx_pause:
   6336 		case ixgbe_fc_full:
   6337 			adapter->hw.fc.requested_mode = adapter->fc;
   6338 			if (adapter->num_queues > 1)
   6339 				ixgbe_disable_rx_drop(adapter);
   6340 			break;
   6341 		case ixgbe_fc_none:
   6342 			adapter->hw.fc.requested_mode = ixgbe_fc_none;
   6343 			if (adapter->num_queues > 1)
   6344 				ixgbe_enable_rx_drop(adapter);
   6345 			break;
   6346 		default:
   6347 			adapter->fc = last;
   6348 			return (EINVAL);
   6349 	}
   6350 	/* Don't autoneg if forcing a value */
   6351 	adapter->hw.fc.disable_fc_autoneg = TRUE;
   6352 	ixgbe_fc_enable(&adapter->hw);
   6353 	return 0;
   6354 }
   6355 
   6356 
   6357 /*
   6358 ** Control link advertise speed:
   6359 **	1 - advertise only 1G
   6360 **	2 - advertise 100Mb
   6361 **	3 - advertise normal
   6362 */
   6363 static int
   6364 ixgbe_set_advertise(SYSCTLFN_ARGS)
   6365 {
   6366 	struct sysctlnode	node;
   6367 	int			t, error = 0;
   6368 	struct adapter		*adapter;
   6369 	device_t		dev;
   6370 	struct ixgbe_hw		*hw;
   6371 	ixgbe_link_speed	speed, last;
   6372 
   6373 	node = *rnode;
   6374 	adapter = (struct adapter *)node.sysctl_data;
   6375 	dev = adapter->dev;
   6376 	hw = &adapter->hw;
   6377 	last = adapter->advertise;
   6378 	t = adapter->advertise;
   6379 	node.sysctl_data = &t;
   6380 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   6381 	if (error != 0 || newp == NULL)
   6382 		return error;
   6383 
   6384 	if (adapter->advertise == last) /* no change */
   6385 		return (0);
   6386 
   6387 	if (t == -1)
   6388 		return 0;
   6389 
   6390 	adapter->advertise = t;
   6391 
   6392 	if (!((hw->phy.media_type == ixgbe_media_type_copper) ||
   6393             (hw->phy.multispeed_fiber)))
   6394 		return (EINVAL);
   6395 
   6396 	if ((adapter->advertise == 2) && (hw->mac.type != ixgbe_mac_X540)) {
   6397 		device_printf(dev, "Set Advertise: 100Mb on X540 only\n");
   6398 		return (EINVAL);
   6399 	}
   6400 
   6401 	if (adapter->advertise == 1)
   6402                 speed = IXGBE_LINK_SPEED_1GB_FULL;
   6403 	else if (adapter->advertise == 2)
   6404                 speed = IXGBE_LINK_SPEED_100_FULL;
   6405 	else if (adapter->advertise == 3)
   6406                 speed = IXGBE_LINK_SPEED_1GB_FULL |
   6407 			IXGBE_LINK_SPEED_10GB_FULL;
   6408 	else {	/* bogus value */
   6409 		adapter->advertise = last;
   6410 		return (EINVAL);
   6411 	}
   6412 
   6413 	hw->mac.autotry_restart = TRUE;
   6414 	hw->mac.ops.setup_link(hw, speed, TRUE);
   6415 
   6416 	return 0;
   6417 }
   6418 
   6419 /*
   6420 ** Thermal Shutdown Trigger
   6421 **   - cause a Thermal Overtemp IRQ
   6422 **   - this now requires firmware enabling
   6423 */
   6424 static int
   6425 ixgbe_set_thermal_test(SYSCTLFN_ARGS)
   6426 {
   6427 	struct sysctlnode node;
   6428 	int		error, fire = 0;
   6429 	struct adapter	*adapter;
   6430 	struct ixgbe_hw *hw;
   6431 
   6432 	node = *rnode;
   6433 	adapter = (struct adapter *)node.sysctl_data;
   6434 	hw = &adapter->hw;
   6435 
   6436 	if (hw->mac.type != ixgbe_mac_X540)
   6437 		return (0);
   6438 
   6439 	node.sysctl_data = &fire;
   6440 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   6441 	if ((error) || (newp == NULL))
   6442 		return (error);
   6443 
   6444 	if (fire) {
   6445 		u32 reg = IXGBE_READ_REG(hw, IXGBE_EICS);
   6446 		reg |= IXGBE_EICR_TS;
   6447 		IXGBE_WRITE_REG(hw, IXGBE_EICS, reg);
   6448 	}
   6449 
   6450 	return (0);
   6451 }
   6452 
   6453 /*
   6454 ** Enable the hardware to drop packets when the buffer is
   6455 ** full. This is useful when multiqueue,so that no single
   6456 ** queue being full stalls the entire RX engine. We only
   6457 ** enable this when Multiqueue AND when Flow Control is
   6458 ** disabled.
   6459 */
   6460 static void
   6461 ixgbe_enable_rx_drop(struct adapter *adapter)
   6462 {
   6463         struct ixgbe_hw *hw = &adapter->hw;
   6464 
   6465 	for (int i = 0; i < adapter->num_queues; i++) {
   6466         	u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
   6467         	srrctl |= IXGBE_SRRCTL_DROP_EN;
   6468         	IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
   6469 	}
   6470 }
   6471 
   6472 static void
   6473 ixgbe_disable_rx_drop(struct adapter *adapter)
   6474 {
   6475         struct ixgbe_hw *hw = &adapter->hw;
   6476 
   6477 	for (int i = 0; i < adapter->num_queues; i++) {
   6478         	u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
   6479         	srrctl &= ~IXGBE_SRRCTL_DROP_EN;
   6480         	IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
   6481 	}
   6482 }
   6483