Home | History | Annotate | Line # | Download | only in ixgbe
ixgbe.c revision 1.34
      1 /******************************************************************************
      2 
      3   Copyright (c) 2001-2013, Intel Corporation
      4   All rights reserved.
      5 
      6   Redistribution and use in source and binary forms, with or without
      7   modification, are permitted provided that the following conditions are met:
      8 
      9    1. Redistributions of source code must retain the above copyright notice,
     10       this list of conditions and the following disclaimer.
     11 
     12    2. Redistributions in binary form must reproduce the above copyright
     13       notice, this list of conditions and the following disclaimer in the
     14       documentation and/or other materials provided with the distribution.
     15 
     16    3. Neither the name of the Intel Corporation nor the names of its
     17       contributors may be used to endorse or promote products derived from
     18       this software without specific prior written permission.
     19 
     20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     30   POSSIBILITY OF SUCH DAMAGE.
     31 
     32 ******************************************************************************/
     33 /*
     34  * Copyright (c) 2011 The NetBSD Foundation, Inc.
     35  * All rights reserved.
     36  *
     37  * This code is derived from software contributed to The NetBSD Foundation
     38  * by Coyote Point Systems, Inc.
     39  *
     40  * Redistribution and use in source and binary forms, with or without
     41  * modification, are permitted provided that the following conditions
     42  * are met:
     43  * 1. Redistributions of source code must retain the above copyright
     44  *    notice, this list of conditions and the following disclaimer.
     45  * 2. Redistributions in binary form must reproduce the above copyright
     46  *    notice, this list of conditions and the following disclaimer in the
     47  *    documentation and/or other materials provided with the distribution.
     48  *
     49  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     50  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     51  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     52  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     53  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     54  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     55  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     56  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     57  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     58  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     59  * POSSIBILITY OF SUCH DAMAGE.
     60  */
     61 /*$FreeBSD: head/sys/dev/ixgbe/ixgbe.c 279805 2015-03-09 10:29:15Z araujo $*/
     62 /*$NetBSD: ixgbe.c,v 1.34 2015/08/13 04:56:43 msaitoh Exp $*/
     63 
     64 #include "opt_inet.h"
     65 #include "opt_inet6.h"
     66 
     67 #include "ixgbe.h"
     68 #include "vlan.h"
     69 
     70 #include <sys/cprng.h>
     71 
     72 /*********************************************************************
     73  *  Set this to one to display debug statistics
     74  *********************************************************************/
     75 int             ixgbe_display_debug_stats = 0;
     76 
     77 /*********************************************************************
     78  *  Driver version
     79  *********************************************************************/
     80 char ixgbe_driver_version[] = "2.5.15";
     81 
     82 /*********************************************************************
     83  *  PCI Device ID Table
     84  *
     85  *  Used by probe to select devices to load on
     86  *  Last field stores an index into ixgbe_strings
     87  *  Last entry must be all 0s
     88  *
     89  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
     90  *********************************************************************/
     91 
     92 static ixgbe_vendor_info_t ixgbe_vendor_info_array[] =
     93 {
     94 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, 0, 0, 0},
     95 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, 0, 0, 0},
     96 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, 0, 0, 0},
     97 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, 0, 0, 0},
     98 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2, 0, 0, 0},
     99 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, 0, 0, 0},
    100 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, 0, 0, 0},
    101 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, 0, 0, 0},
    102 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, 0, 0, 0},
    103 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, 0, 0, 0},
    104 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, 0, 0, 0},
    105 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, 0, 0, 0},
    106 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ, 0, 0, 0},
    107 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, 0, 0, 0},
    108 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, 0, 0, 0},
    109 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, 0, 0, 0},
    110 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, 0, 0, 0},
    111 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, 0, 0, 0},
    112 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE, 0, 0, 0},
    113 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2, 0, 0, 0},
    114 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, 0, 0, 0},
    115 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, 0, 0, 0},
    116 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF_QP, 0, 0, 0},
    117 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, 0, 0, 0},
    118 	/* required last entry */
    119 	{0, 0, 0, 0, 0}
    120 };
    121 
    122 /*********************************************************************
    123  *  Table of branding strings
    124  *********************************************************************/
    125 
    126 static const char    *ixgbe_strings[] = {
    127 	"Intel(R) PRO/10GbE PCI-Express Network Driver"
    128 };
    129 
    130 /*********************************************************************
    131  *  Function prototypes
    132  *********************************************************************/
    133 static int      ixgbe_probe(device_t, cfdata_t, void *);
    134 static void     ixgbe_attach(device_t, device_t, void *);
    135 static int      ixgbe_detach(device_t, int);
    136 #if 0
    137 static int      ixgbe_shutdown(device_t);
    138 #endif
    139 #if IXGBE_LEGACY_TX
    140 static void     ixgbe_start(struct ifnet *);
    141 static void     ixgbe_start_locked(struct tx_ring *, struct ifnet *);
    142 #else
    143 static int	ixgbe_mq_start(struct ifnet *, struct mbuf *);
    144 static int	ixgbe_mq_start_locked(struct ifnet *, struct tx_ring *);
    145 static void	ixgbe_qflush(struct ifnet *);
    146 static void	ixgbe_deferred_mq_start(void *);
    147 #endif
    148 static int      ixgbe_ioctl(struct ifnet *, u_long, void *);
    149 static void	ixgbe_ifstop(struct ifnet *, int);
    150 static int	ixgbe_init(struct ifnet *);
    151 static void	ixgbe_init_locked(struct adapter *);
    152 static void     ixgbe_stop(void *);
    153 static void     ixgbe_media_status(struct ifnet *, struct ifmediareq *);
    154 static int      ixgbe_media_change(struct ifnet *);
    155 static void     ixgbe_identify_hardware(struct adapter *);
    156 static int      ixgbe_allocate_pci_resources(struct adapter *,
    157 		    const struct pci_attach_args *);
    158 static void	ixgbe_get_slot_info(struct ixgbe_hw *);
    159 static int      ixgbe_allocate_msix(struct adapter *,
    160 		    const struct pci_attach_args *);
    161 static int      ixgbe_allocate_legacy(struct adapter *,
    162 		    const struct pci_attach_args *);
    163 static int	ixgbe_allocate_queues(struct adapter *);
    164 static int	ixgbe_setup_msix(struct adapter *);
    165 static void	ixgbe_free_pci_resources(struct adapter *);
    166 static void	ixgbe_local_timer(void *);
    167 static int	ixgbe_setup_interface(device_t, struct adapter *);
    168 static void	ixgbe_config_link(struct adapter *);
    169 
    170 static int      ixgbe_allocate_transmit_buffers(struct tx_ring *);
    171 static int	ixgbe_setup_transmit_structures(struct adapter *);
    172 static void	ixgbe_setup_transmit_ring(struct tx_ring *);
    173 static void     ixgbe_initialize_transmit_units(struct adapter *);
    174 static void     ixgbe_free_transmit_structures(struct adapter *);
    175 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
    176 
    177 static int      ixgbe_allocate_receive_buffers(struct rx_ring *);
    178 static int      ixgbe_setup_receive_structures(struct adapter *);
    179 static int	ixgbe_setup_receive_ring(struct rx_ring *);
    180 static void     ixgbe_initialize_receive_units(struct adapter *);
    181 static void     ixgbe_free_receive_structures(struct adapter *);
    182 static void     ixgbe_free_receive_buffers(struct rx_ring *);
    183 static void	ixgbe_setup_hw_rsc(struct rx_ring *);
    184 
    185 static void     ixgbe_enable_intr(struct adapter *);
    186 static void     ixgbe_disable_intr(struct adapter *);
    187 static void     ixgbe_update_stats_counters(struct adapter *);
    188 static void	ixgbe_txeof(struct tx_ring *);
    189 static bool	ixgbe_rxeof(struct ix_queue *);
    190 static void	ixgbe_rx_checksum(u32, struct mbuf *, u32,
    191 		    struct ixgbe_hw_stats *);
    192 static void     ixgbe_set_promisc(struct adapter *);
    193 static void     ixgbe_set_multi(struct adapter *);
    194 static void     ixgbe_update_link_status(struct adapter *);
    195 static void	ixgbe_refresh_mbufs(struct rx_ring *, int);
    196 static int      ixgbe_xmit(struct tx_ring *, struct mbuf *);
    197 static int	ixgbe_set_flowcntl(SYSCTLFN_PROTO);
    198 static int	ixgbe_set_advertise(SYSCTLFN_PROTO);
    199 static int	ixgbe_set_thermal_test(SYSCTLFN_PROTO);
    200 static int	ixgbe_dma_malloc(struct adapter *, bus_size_t,
    201 		    struct ixgbe_dma_alloc *, int);
    202 static void     ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
    203 static int	ixgbe_tx_ctx_setup(struct tx_ring *,
    204 		    struct mbuf *, u32 *, u32 *);
    205 static int	ixgbe_tso_setup(struct tx_ring *,
    206 		    struct mbuf *, u32 *, u32 *);
    207 static void	ixgbe_set_ivar(struct adapter *, u8, u8, s8);
    208 static void	ixgbe_configure_ivars(struct adapter *);
    209 static u8 *	ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *);
    210 
    211 static void	ixgbe_setup_vlan_hw_support(struct adapter *);
    212 #if 0
    213 static void	ixgbe_register_vlan(void *, struct ifnet *, u16);
    214 static void	ixgbe_unregister_vlan(void *, struct ifnet *, u16);
    215 #endif
    216 
    217 static void     ixgbe_add_hw_stats(struct adapter *adapter);
    218 
    219 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
    220 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
    221 		    struct mbuf *, u32);
    222 
    223 static void	ixgbe_enable_rx_drop(struct adapter *);
    224 static void	ixgbe_disable_rx_drop(struct adapter *);
    225 
    226 /* Support for pluggable optic modules */
    227 static bool	ixgbe_sfp_probe(struct adapter *);
    228 static void	ixgbe_setup_optics(struct adapter *);
    229 
    230 /* Legacy (single vector interrupt handler */
    231 static int	ixgbe_legacy_irq(void *);
    232 
    233 #if defined(NETBSD_MSI_OR_MSIX)
    234 /* The MSI/X Interrupt handlers */
    235 static int	ixgbe_msix_que(void *);
    236 static int	ixgbe_msix_link(void *);
    237 #endif
    238 
    239 /* Software interrupts for deferred work */
    240 static void	ixgbe_handle_que(void *);
    241 static void	ixgbe_handle_link(void *);
    242 static void	ixgbe_handle_msf(void *);
    243 static void	ixgbe_handle_mod(void *);
    244 
    245 const struct sysctlnode *ixgbe_sysctl_instance(struct adapter *);
    246 static ixgbe_vendor_info_t *ixgbe_lookup(const struct pci_attach_args *);
    247 
    248 #ifdef IXGBE_FDIR
    249 static void	ixgbe_atr(struct tx_ring *, struct mbuf *);
    250 static void	ixgbe_reinit_fdir(void *, int);
    251 #endif
    252 
    253 /* Missing shared code prototype */
    254 extern void ixgbe_stop_mac_link_on_d3_82599(struct ixgbe_hw *hw);
    255 
    256 /*********************************************************************
    257  *  FreeBSD Device Interface Entry Points
    258  *********************************************************************/
    259 
    260 CFATTACH_DECL3_NEW(ixg, sizeof(struct adapter),
    261     ixgbe_probe, ixgbe_attach, ixgbe_detach, NULL, NULL, NULL,
    262     DVF_DETACH_SHUTDOWN);
    263 
    264 #if 0
    265 devclass_t ixgbe_devclass;
    266 DRIVER_MODULE(ixgbe, pci, ixgbe_driver, ixgbe_devclass, 0, 0);
    267 
    268 MODULE_DEPEND(ixgbe, pci, 1, 1, 1);
    269 MODULE_DEPEND(ixgbe, ether, 1, 1, 1);
    270 #endif
    271 
    272 /*
    273 ** TUNEABLE PARAMETERS:
    274 */
    275 
    276 /*
    277 ** AIM: Adaptive Interrupt Moderation
    278 ** which means that the interrupt rate
    279 ** is varied over time based on the
    280 ** traffic for that interrupt vector
    281 */
    282 static int ixgbe_enable_aim = TRUE;
    283 #define SYSCTL_INT(__x, __y)
    284 SYSCTL_INT("hw.ixgbe.enable_aim", &ixgbe_enable_aim);
    285 
    286 static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY);
    287 SYSCTL_INT("hw.ixgbe.max_interrupt_rate", &ixgbe_max_interrupt_rate);
    288 
    289 /* How many packets rxeof tries to clean at a time */
    290 static int ixgbe_rx_process_limit = 256;
    291 SYSCTL_INT("hw.ixgbe.rx_process_limit", &ixgbe_rx_process_limit);
    292 
    293 /* How many packets txeof tries to clean at a time */
    294 static int ixgbe_tx_process_limit = 256;
    295 SYSCTL_INT("hw.ixgbe.tx_process_limit", &ixgbe_tx_process_limit);
    296 
    297 /*
    298 ** Smart speed setting, default to on
    299 ** this only works as a compile option
    300 ** right now as its during attach, set
    301 ** this to 'ixgbe_smart_speed_off' to
    302 ** disable.
    303 */
    304 static int ixgbe_smart_speed = ixgbe_smart_speed_on;
    305 
    306 /*
    307  * MSIX should be the default for best performance,
    308  * but this allows it to be forced off for testing.
    309  */
    310 static int ixgbe_enable_msix = 1;
    311 SYSCTL_INT("hw.ixgbe.enable_msix", &ixgbe_enable_msix);
    312 
    313 #if defined(NETBSD_MSI_OR_MSIX)
    314 /*
    315  * Number of Queues, can be set to 0,
    316  * it then autoconfigures based on the
    317  * number of cpus with a max of 8. This
    318  * can be overriden manually here.
    319  */
    320 static int ixgbe_num_queues = 1;
    321 SYSCTL_INT("hw.ixgbe.num_queues", &ixgbe_num_queues);
    322 #endif
    323 
    324 /*
    325 ** Number of TX descriptors per ring,
    326 ** setting higher than RX as this seems
    327 ** the better performing choice.
    328 */
    329 static int ixgbe_txd = PERFORM_TXD;
    330 SYSCTL_INT("hw.ixgbe.txd", &ixgbe_txd);
    331 
    332 /* Number of RX descriptors per ring */
    333 static int ixgbe_rxd = PERFORM_RXD;
    334 SYSCTL_INT("hw.ixgbe.rxd", &ixgbe_rxd);
    335 
    336 /*
    337 ** Defining this on will allow the use
    338 ** of unsupported SFP+ modules, note that
    339 ** doing so you are on your own :)
    340 */
    341 static int allow_unsupported_sfp = true;
    342 SYSCTL_INT("hw.ix.unsupported_sfp", &allow_unsupported_sfp);
    343 
    344 /*
    345 ** HW RSC control:
    346 **  this feature only works with
    347 **  IPv4, and only on 82599 and later.
    348 **  Also this will cause IP forwarding to
    349 **  fail and that can't be controlled by
    350 **  the stack as LRO can. For all these
    351 **  reasons I've deemed it best to leave
    352 **  this off and not bother with a tuneable
    353 **  interface, this would need to be compiled
    354 **  to enable.
    355 */
    356 static bool ixgbe_rsc_enable = FALSE;
    357 
    358 /* Keep running tab on them for sanity check */
    359 static int ixgbe_total_ports;
    360 
    361 #ifdef IXGBE_FDIR
    362 /*
    363 ** For Flow Director: this is the
    364 ** number of TX packets we sample
    365 ** for the filter pool, this means
    366 ** every 20th packet will be probed.
    367 **
    368 ** This feature can be disabled by
    369 ** setting this to 0.
    370 */
    371 static int atr_sample_rate = 20;
    372 /*
    373 ** Flow Director actually 'steals'
    374 ** part of the packet buffer as its
    375 ** filter pool, this variable controls
    376 ** how much it uses:
    377 **  0 = 64K, 1 = 128K, 2 = 256K
    378 */
    379 static int fdir_pballoc = 1;
    380 #endif
    381 
    382 #ifdef DEV_NETMAP
    383 /*
    384  * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
    385  * be a reference on how to implement netmap support in a driver.
    386  * Additional comments are in ixgbe_netmap.h .
    387  *
    388  * <dev/netmap/ixgbe_netmap.h> contains functions for netmap support
    389  * that extend the standard driver.
    390  */
    391 #include <dev/netmap/ixgbe_netmap.h>
    392 #endif /* DEV_NETMAP */
    393 
    394 /*********************************************************************
    395  *  Device identification routine
    396  *
    397  *  ixgbe_probe determines if the driver should be loaded on
    398  *  adapter based on PCI vendor/device id of the adapter.
    399  *
    400  *  return 1 on success, 0 on failure
    401  *********************************************************************/
    402 
    403 static int
    404 ixgbe_probe(device_t dev, cfdata_t cf, void *aux)
    405 {
    406 	const struct pci_attach_args *pa = aux;
    407 
    408 	return (ixgbe_lookup(pa) != NULL) ? 1 : 0;
    409 }
    410 
    411 static ixgbe_vendor_info_t *
    412 ixgbe_lookup(const struct pci_attach_args *pa)
    413 {
    414 	pcireg_t subid;
    415 	ixgbe_vendor_info_t *ent;
    416 
    417 	INIT_DEBUGOUT("ixgbe_probe: begin");
    418 
    419 	if (PCI_VENDOR(pa->pa_id) != IXGBE_INTEL_VENDOR_ID)
    420 		return NULL;
    421 
    422 	subid = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_SUBSYS_ID_REG);
    423 
    424 	for (ent = ixgbe_vendor_info_array; ent->vendor_id != 0; ent++) {
    425 		if (PCI_VENDOR(pa->pa_id) == ent->vendor_id &&
    426 		    PCI_PRODUCT(pa->pa_id) == ent->device_id &&
    427 
    428 		    (PCI_SUBSYS_VENDOR(subid) == ent->subvendor_id ||
    429 		     ent->subvendor_id == 0) &&
    430 
    431 		    (PCI_SUBSYS_ID(subid) == ent->subdevice_id ||
    432 		     ent->subdevice_id == 0)) {
    433 			++ixgbe_total_ports;
    434 			return ent;
    435 		}
    436 	}
    437 	return NULL;
    438 }
    439 
    440 
    441 static void
    442 ixgbe_sysctl_attach(struct adapter *adapter)
    443 {
    444 	struct sysctllog **log;
    445 	const struct sysctlnode *rnode, *cnode;
    446 	device_t dev;
    447 
    448 	dev = adapter->dev;
    449 	log = &adapter->sysctllog;
    450 
    451 	if ((rnode = ixgbe_sysctl_instance(adapter)) == NULL) {
    452 		aprint_error_dev(dev, "could not create sysctl root\n");
    453 		return;
    454 	}
    455 
    456 	if (sysctl_createv(log, 0, &rnode, &cnode,
    457 	    CTLFLAG_READONLY, CTLTYPE_INT,
    458 	    "num_rx_desc", SYSCTL_DESCR("Number of rx descriptors"),
    459 	    NULL, 0, &adapter->num_rx_desc, 0, CTL_CREATE, CTL_EOL) != 0)
    460 		aprint_error_dev(dev, "could not create sysctl\n");
    461 
    462 	if (sysctl_createv(log, 0, &rnode, &cnode,
    463 	    CTLFLAG_READONLY, CTLTYPE_INT,
    464 	    "num_queues", SYSCTL_DESCR("Number of queues"),
    465 	    NULL, 0, &adapter->num_queues, 0, CTL_CREATE, CTL_EOL) != 0)
    466 		aprint_error_dev(dev, "could not create sysctl\n");
    467 
    468 	if (sysctl_createv(log, 0, &rnode, &cnode,
    469 	    CTLFLAG_READWRITE, CTLTYPE_INT,
    470 	    "fc", SYSCTL_DESCR("Flow Control"),
    471 	    ixgbe_set_flowcntl, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0)
    472 		aprint_error_dev(dev, "could not create sysctl\n");
    473 
    474 	/* XXX This is an *instance* sysctl controlling a *global* variable.
    475 	 * XXX It's that way in the FreeBSD driver that this derives from.
    476 	 */
    477 	if (sysctl_createv(log, 0, &rnode, &cnode,
    478 	    CTLFLAG_READWRITE, CTLTYPE_INT,
    479 	    "enable_aim", SYSCTL_DESCR("Interrupt Moderation"),
    480 	    NULL, 0, &ixgbe_enable_aim, 0, CTL_CREATE, CTL_EOL) != 0)
    481 		aprint_error_dev(dev, "could not create sysctl\n");
    482 
    483 	if (sysctl_createv(log, 0, &rnode, &cnode,
    484 	    CTLFLAG_READWRITE, CTLTYPE_INT,
    485 	    "advertise_speed", SYSCTL_DESCR("Link Speed"),
    486 	    ixgbe_set_advertise, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0)
    487 		aprint_error_dev(dev, "could not create sysctl\n");
    488 
    489 	if (sysctl_createv(log, 0, &rnode, &cnode,
    490 	    CTLFLAG_READWRITE, CTLTYPE_INT,
    491 	    "ts", SYSCTL_DESCR("Thermal Test"),
    492 	    ixgbe_set_thermal_test, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0)
    493 		aprint_error_dev(dev, "could not create sysctl\n");
    494 }
    495 
    496 /*********************************************************************
    497  *  Device initialization routine
    498  *
    499  *  The attach entry point is called when the driver is being loaded.
    500  *  This routine identifies the type of hardware, allocates all resources
    501  *  and initializes the hardware.
    502  *
    503  *  return 0 on success, positive on failure
    504  *********************************************************************/
    505 
    506 static void
    507 ixgbe_attach(device_t parent, device_t dev, void *aux)
    508 {
    509 	struct adapter *adapter;
    510 	struct ixgbe_hw *hw;
    511 	int             error = -1;
    512 	u16		csum;
    513 	u32		ctrl_ext;
    514 	ixgbe_vendor_info_t *ent;
    515 	const struct pci_attach_args *pa = aux;
    516 
    517 	INIT_DEBUGOUT("ixgbe_attach: begin");
    518 
    519 	/* Allocate, clear, and link in our adapter structure */
    520 	adapter = device_private(dev);
    521 	adapter->dev = adapter->osdep.dev = dev;
    522 	hw = &adapter->hw;
    523 	adapter->osdep.pc = pa->pa_pc;
    524 	adapter->osdep.tag = pa->pa_tag;
    525 	adapter->osdep.dmat = pa->pa_dmat;
    526 	adapter->osdep.attached = false;
    527 
    528 	ent = ixgbe_lookup(pa);
    529 
    530 	KASSERT(ent != NULL);
    531 
    532 	aprint_normal(": %s, Version - %s\n",
    533 	    ixgbe_strings[ent->index], ixgbe_driver_version);
    534 
    535 	/* Core Lock Init*/
    536 	IXGBE_CORE_LOCK_INIT(adapter, device_xname(dev));
    537 
    538 	/* SYSCTL APIs */
    539 
    540 	ixgbe_sysctl_attach(adapter);
    541 
    542 	/* Set up the timer callout */
    543 	callout_init(&adapter->timer, 0);
    544 
    545 	/* Determine hardware revision */
    546 	ixgbe_identify_hardware(adapter);
    547 
    548 	/* Do base PCI setup - map BAR0 */
    549 	if (ixgbe_allocate_pci_resources(adapter, pa)) {
    550 		aprint_error_dev(dev, "Allocation of PCI resources failed\n");
    551 		error = ENXIO;
    552 		goto err_out;
    553 	}
    554 
    555 	/* Do descriptor calc and sanity checks */
    556 	if (((ixgbe_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 ||
    557 	    ixgbe_txd < MIN_TXD || ixgbe_txd > MAX_TXD) {
    558 		aprint_error_dev(dev, "TXD config issue, using default!\n");
    559 		adapter->num_tx_desc = DEFAULT_TXD;
    560 	} else
    561 		adapter->num_tx_desc = ixgbe_txd;
    562 
    563 	/*
    564 	** With many RX rings it is easy to exceed the
    565 	** system mbuf allocation. Tuning nmbclusters
    566 	** can alleviate this.
    567 	*/
    568 	if (nmbclusters > 0 ) {
    569 		int s;
    570 		s = (ixgbe_rxd * adapter->num_queues) * ixgbe_total_ports;
    571 		if (s > nmbclusters) {
    572 			aprint_error_dev(dev, "RX Descriptors exceed "
    573 			    "system mbuf max, using default instead!\n");
    574 			ixgbe_rxd = DEFAULT_RXD;
    575 		}
    576 	}
    577 
    578 	if (((ixgbe_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 ||
    579 	    ixgbe_rxd < MIN_RXD || ixgbe_rxd > MAX_RXD) {
    580 		aprint_error_dev(dev, "RXD config issue, using default!\n");
    581 		adapter->num_rx_desc = DEFAULT_RXD;
    582 	} else
    583 		adapter->num_rx_desc = ixgbe_rxd;
    584 
    585 	/* Allocate our TX/RX Queues */
    586 	if (ixgbe_allocate_queues(adapter)) {
    587 		error = ENOMEM;
    588 		goto err_out;
    589 	}
    590 
    591 	/* Allocate multicast array memory. */
    592 	adapter->mta = malloc(sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
    593 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
    594 	if (adapter->mta == NULL) {
    595 		aprint_error_dev(dev, "Cannot allocate multicast setup array\n");
    596 		error = ENOMEM;
    597 		goto err_late;
    598 	}
    599 
    600 	/* Initialize the shared code */
    601 	hw->allow_unsupported_sfp = allow_unsupported_sfp;
    602 	error = ixgbe_init_shared_code(hw);
    603 	if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
    604 		/*
    605 		** No optics in this port, set up
    606 		** so the timer routine will probe
    607 		** for later insertion.
    608 		*/
    609 		adapter->sfp_probe = TRUE;
    610 		error = 0;
    611 	} else if ((error == IXGBE_ERR_SFP_NOT_SUPPORTED)
    612 	    && (hw->allow_unsupported_sfp == false)) {
    613 		aprint_error_dev(dev,"Unsupported SFP+ module detected!\n");
    614 		error = EIO;
    615 		goto err_late;
    616 	} else if (error) {
    617 		aprint_error_dev(dev,"Unable to initialize the shared code\n");
    618 		error = EIO;
    619 		goto err_late;
    620 	}
    621 
    622 	/* Make sure we have a good EEPROM before we read from it */
    623 	if (ixgbe_validate_eeprom_checksum(&adapter->hw, &csum) < 0) {
    624 		aprint_error_dev(dev,"The EEPROM Checksum Is Not Valid\n");
    625 		error = EIO;
    626 		goto err_late;
    627 	}
    628 
    629 	error = ixgbe_init_hw(hw);
    630 	switch (error) {
    631 	case IXGBE_ERR_EEPROM_VERSION:
    632 		aprint_error_dev(dev, "This device is a pre-production adapter/"
    633 		    "LOM.  Please be aware there may be issues associated "
    634 		    "with your hardware.\n If you are experiencing problems "
    635 		    "please contact your Intel or hardware representative "
    636 		    "who provided you with this hardware.\n");
    637 		break;
    638 	case IXGBE_ERR_SFP_NOT_SUPPORTED:
    639 		aprint_error_dev(dev,"Unsupported SFP+ Module\n");
    640 		error = EIO;
    641 		aprint_error_dev(dev,"Hardware Initialization Failure\n");
    642 		goto err_late;
    643 	case IXGBE_ERR_SFP_NOT_PRESENT:
    644 		device_printf(dev,"No SFP+ Module found\n");
    645 		/* falls thru */
    646 	default:
    647 		break;
    648 	}
    649 
    650 	/* Detect and set physical type */
    651 	ixgbe_setup_optics(adapter);
    652 
    653 	error = -1;
    654 	if ((adapter->msix > 1) && (ixgbe_enable_msix))
    655 		error = ixgbe_allocate_msix(adapter, pa);
    656 	if (error != 0)
    657 		error = ixgbe_allocate_legacy(adapter, pa);
    658 	if (error)
    659 		goto err_late;
    660 
    661 	/* Setup OS specific network interface */
    662 	if (ixgbe_setup_interface(dev, adapter) != 0)
    663 		goto err_late;
    664 
    665 	/* Initialize statistics */
    666 	ixgbe_update_stats_counters(adapter);
    667 
    668 	/*
    669 	** Check PCIE slot type/speed/width
    670 	*/
    671 	ixgbe_get_slot_info(hw);
    672 
    673 	/* Set an initial default flow control value */
    674 	adapter->fc =  ixgbe_fc_full;
    675 
    676 	/* let hardware know driver is loaded */
    677 	ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
    678 	ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
    679 	IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
    680 
    681 	ixgbe_add_hw_stats(adapter);
    682 
    683 #ifdef DEV_NETMAP
    684 	ixgbe_netmap_attach(adapter);
    685 #endif /* DEV_NETMAP */
    686 	INIT_DEBUGOUT("ixgbe_attach: end");
    687 	adapter->osdep.attached = true;
    688 	return;
    689 err_late:
    690 	ixgbe_free_transmit_structures(adapter);
    691 	ixgbe_free_receive_structures(adapter);
    692 err_out:
    693 	if (adapter->ifp != NULL)
    694 		if_free(adapter->ifp);
    695 	ixgbe_free_pci_resources(adapter);
    696 	if (adapter->mta != NULL)
    697 		free(adapter->mta, M_DEVBUF);
    698 	return;
    699 
    700 }
    701 
    702 /*********************************************************************
    703  *  Device removal routine
    704  *
    705  *  The detach entry point is called when the driver is being removed.
    706  *  This routine stops the adapter and deallocates all the resources
    707  *  that were allocated for driver operation.
    708  *
    709  *  return 0 on success, positive on failure
    710  *********************************************************************/
    711 
    712 static int
    713 ixgbe_detach(device_t dev, int flags)
    714 {
    715 	struct adapter *adapter = device_private(dev);
    716 	struct rx_ring *rxr = adapter->rx_rings;
    717 	struct ixgbe_hw_stats *stats = &adapter->stats;
    718 	struct ix_queue *que = adapter->queues;
    719 	struct tx_ring *txr = adapter->tx_rings;
    720 	u32	ctrl_ext;
    721 
    722 	INIT_DEBUGOUT("ixgbe_detach: begin");
    723 	if (adapter->osdep.attached == false)
    724 		return 0;
    725 
    726 #if NVLAN > 0
    727 	/* Make sure VLANs are not using driver */
    728 	if (!VLAN_ATTACHED(&adapter->osdep.ec))
    729 		;	/* nothing to do: no VLANs */
    730 	else if ((flags & (DETACH_SHUTDOWN|DETACH_FORCE)) != 0)
    731 		vlan_ifdetach(adapter->ifp);
    732 	else {
    733 		aprint_error_dev(dev, "VLANs in use\n");
    734 		return EBUSY;
    735 	}
    736 #endif
    737 
    738 	IXGBE_CORE_LOCK(adapter);
    739 	ixgbe_stop(adapter);
    740 	IXGBE_CORE_UNLOCK(adapter);
    741 
    742 	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
    743 #ifndef IXGBE_LEGACY_TX
    744 		softint_disestablish(txr->txq_si);
    745 #endif
    746 		softint_disestablish(que->que_si);
    747 	}
    748 
    749 	/* Drain the Link queue */
    750 	softint_disestablish(adapter->link_si);
    751 	softint_disestablish(adapter->mod_si);
    752 	softint_disestablish(adapter->msf_si);
    753 #ifdef IXGBE_FDIR
    754 	softint_disestablish(adapter->fdir_si);
    755 #endif
    756 
    757 	/* let hardware know driver is unloading */
    758 	ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
    759 	ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
    760 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext);
    761 
    762 	ether_ifdetach(adapter->ifp);
    763 	callout_halt(&adapter->timer, NULL);
    764 #ifdef DEV_NETMAP
    765 	netmap_detach(adapter->ifp);
    766 #endif /* DEV_NETMAP */
    767 	ixgbe_free_pci_resources(adapter);
    768 #if 0	/* XXX the NetBSD port is probably missing something here */
    769 	bus_generic_detach(dev);
    770 #endif
    771 	if_detach(adapter->ifp);
    772 
    773 	sysctl_teardown(&adapter->sysctllog);
    774 	evcnt_detach(&adapter->handleq);
    775 	evcnt_detach(&adapter->req);
    776 	evcnt_detach(&adapter->morerx);
    777 	evcnt_detach(&adapter->moretx);
    778 	evcnt_detach(&adapter->txloops);
    779 	evcnt_detach(&adapter->efbig_tx_dma_setup);
    780 	evcnt_detach(&adapter->m_defrag_failed);
    781 	evcnt_detach(&adapter->efbig2_tx_dma_setup);
    782 	evcnt_detach(&adapter->einval_tx_dma_setup);
    783 	evcnt_detach(&adapter->other_tx_dma_setup);
    784 	evcnt_detach(&adapter->eagain_tx_dma_setup);
    785 	evcnt_detach(&adapter->enomem_tx_dma_setup);
    786 	evcnt_detach(&adapter->watchdog_events);
    787 	evcnt_detach(&adapter->tso_err);
    788 	evcnt_detach(&adapter->link_irq);
    789 
    790 	txr = adapter->tx_rings;
    791 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
    792 		evcnt_detach(&txr->no_desc_avail);
    793 		evcnt_detach(&txr->total_packets);
    794 		evcnt_detach(&txr->tso_tx);
    795 
    796 		if (i < __arraycount(adapter->stats.mpc)) {
    797 			evcnt_detach(&adapter->stats.mpc[i]);
    798 		}
    799 		if (i < __arraycount(adapter->stats.pxontxc)) {
    800 			evcnt_detach(&adapter->stats.pxontxc[i]);
    801 			evcnt_detach(&adapter->stats.pxonrxc[i]);
    802 			evcnt_detach(&adapter->stats.pxofftxc[i]);
    803 			evcnt_detach(&adapter->stats.pxoffrxc[i]);
    804 			evcnt_detach(&adapter->stats.pxon2offc[i]);
    805 		}
    806 		if (i < __arraycount(adapter->stats.qprc)) {
    807 			evcnt_detach(&adapter->stats.qprc[i]);
    808 			evcnt_detach(&adapter->stats.qptc[i]);
    809 			evcnt_detach(&adapter->stats.qbrc[i]);
    810 			evcnt_detach(&adapter->stats.qbtc[i]);
    811 			evcnt_detach(&adapter->stats.qprdc[i]);
    812 		}
    813 
    814 		evcnt_detach(&rxr->rx_packets);
    815 		evcnt_detach(&rxr->rx_bytes);
    816 		evcnt_detach(&rxr->rx_copies);
    817 		evcnt_detach(&rxr->no_jmbuf);
    818 		evcnt_detach(&rxr->rx_discarded);
    819 		evcnt_detach(&rxr->rx_irq);
    820 	}
    821 	evcnt_detach(&stats->ipcs);
    822 	evcnt_detach(&stats->l4cs);
    823 	evcnt_detach(&stats->ipcs_bad);
    824 	evcnt_detach(&stats->l4cs_bad);
    825 	evcnt_detach(&stats->intzero);
    826 	evcnt_detach(&stats->legint);
    827 	evcnt_detach(&stats->crcerrs);
    828 	evcnt_detach(&stats->illerrc);
    829 	evcnt_detach(&stats->errbc);
    830 	evcnt_detach(&stats->mspdc);
    831 	evcnt_detach(&stats->mlfc);
    832 	evcnt_detach(&stats->mrfc);
    833 	evcnt_detach(&stats->rlec);
    834 	evcnt_detach(&stats->lxontxc);
    835 	evcnt_detach(&stats->lxonrxc);
    836 	evcnt_detach(&stats->lxofftxc);
    837 	evcnt_detach(&stats->lxoffrxc);
    838 
    839 	/* Packet Reception Stats */
    840 	evcnt_detach(&stats->tor);
    841 	evcnt_detach(&stats->gorc);
    842 	evcnt_detach(&stats->tpr);
    843 	evcnt_detach(&stats->gprc);
    844 	evcnt_detach(&stats->mprc);
    845 	evcnt_detach(&stats->bprc);
    846 	evcnt_detach(&stats->prc64);
    847 	evcnt_detach(&stats->prc127);
    848 	evcnt_detach(&stats->prc255);
    849 	evcnt_detach(&stats->prc511);
    850 	evcnt_detach(&stats->prc1023);
    851 	evcnt_detach(&stats->prc1522);
    852 	evcnt_detach(&stats->ruc);
    853 	evcnt_detach(&stats->rfc);
    854 	evcnt_detach(&stats->roc);
    855 	evcnt_detach(&stats->rjc);
    856 	evcnt_detach(&stats->mngprc);
    857 	evcnt_detach(&stats->xec);
    858 
    859 	/* Packet Transmission Stats */
    860 	evcnt_detach(&stats->gotc);
    861 	evcnt_detach(&stats->tpt);
    862 	evcnt_detach(&stats->gptc);
    863 	evcnt_detach(&stats->bptc);
    864 	evcnt_detach(&stats->mptc);
    865 	evcnt_detach(&stats->mngptc);
    866 	evcnt_detach(&stats->ptc64);
    867 	evcnt_detach(&stats->ptc127);
    868 	evcnt_detach(&stats->ptc255);
    869 	evcnt_detach(&stats->ptc511);
    870 	evcnt_detach(&stats->ptc1023);
    871 	evcnt_detach(&stats->ptc1522);
    872 
    873 	ixgbe_free_transmit_structures(adapter);
    874 	ixgbe_free_receive_structures(adapter);
    875 	free(adapter->mta, M_DEVBUF);
    876 
    877 	IXGBE_CORE_LOCK_DESTROY(adapter);
    878 	return (0);
    879 }
    880 
    881 /*********************************************************************
    882  *
    883  *  Shutdown entry point
    884  *
    885  **********************************************************************/
    886 
    887 #if 0 /* XXX NetBSD ought to register something like this through pmf(9) */
    888 static int
    889 ixgbe_shutdown(device_t dev)
    890 {
    891 	struct adapter *adapter = device_private(dev);
    892 	IXGBE_CORE_LOCK(adapter);
    893 	ixgbe_stop(adapter);
    894 	IXGBE_CORE_UNLOCK(adapter);
    895 	return (0);
    896 }
    897 #endif
    898 
    899 
    900 #ifdef IXGBE_LEGACY_TX
    901 /*********************************************************************
    902  *  Transmit entry point
    903  *
    904  *  ixgbe_start is called by the stack to initiate a transmit.
    905  *  The driver will remain in this routine as long as there are
    906  *  packets to transmit and transmit resources are available.
    907  *  In case resources are not available stack is notified and
    908  *  the packet is requeued.
    909  **********************************************************************/
    910 
    911 static void
    912 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
    913 {
    914 	int rc;
    915 	struct mbuf    *m_head;
    916 	struct adapter *adapter = txr->adapter;
    917 
    918 	IXGBE_TX_LOCK_ASSERT(txr);
    919 
    920 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    921 		return;
    922 	if (!adapter->link_active)
    923 		return;
    924 
    925 	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    926 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
    927 			break;
    928 
    929 		IFQ_POLL(&ifp->if_snd, m_head);
    930 		if (m_head == NULL)
    931 			break;
    932 
    933 		if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
    934 			break;
    935 		}
    936 		IFQ_DEQUEUE(&ifp->if_snd, m_head);
    937 		if (rc == EFBIG) {
    938 			struct mbuf *mtmp;
    939 
    940 			if ((mtmp = m_defrag(m_head, M_NOWAIT)) != NULL) {
    941 				m_head = mtmp;
    942 				rc = ixgbe_xmit(txr, m_head);
    943 				if (rc != 0)
    944 					adapter->efbig2_tx_dma_setup.ev_count++;
    945 			} else
    946 				adapter->m_defrag_failed.ev_count++;
    947 		}
    948 		if (rc != 0) {
    949 			m_freem(m_head);
    950 			continue;
    951 		}
    952 
    953 		/* Send a copy of the frame to the BPF listener */
    954 		bpf_mtap(ifp, m_head);
    955 
    956 		/* Set watchdog on */
    957 		getmicrotime(&txr->watchdog_time);
    958 		txr->queue_status = IXGBE_QUEUE_WORKING;
    959 
    960 	}
    961 	return;
    962 }
    963 
    964 /*
    965  * Legacy TX start - called by the stack, this
    966  * always uses the first tx ring, and should
    967  * not be used with multiqueue tx enabled.
    968  */
    969 static void
    970 ixgbe_start(struct ifnet *ifp)
    971 {
    972 	struct adapter *adapter = ifp->if_softc;
    973 	struct tx_ring	*txr = adapter->tx_rings;
    974 
    975 	if (ifp->if_flags & IFF_RUNNING) {
    976 		IXGBE_TX_LOCK(txr);
    977 		ixgbe_start_locked(txr, ifp);
    978 		IXGBE_TX_UNLOCK(txr);
    979 	}
    980 	return;
    981 }
    982 
    983 #else /* ! IXGBE_LEGACY_TX */
    984 
    985 /*
    986 ** Multiqueue Transmit driver
    987 **
    988 */
    989 static int
    990 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
    991 {
    992 	struct adapter	*adapter = ifp->if_softc;
    993 	struct ix_queue	*que;
    994 	struct tx_ring	*txr;
    995 	int 		i, err = 0;
    996 #ifdef	RSS
    997 	uint32_t bucket_id;
    998 #endif
    999 
   1000 	/* Which queue to use */
   1001 	/*
   1002 	 * When doing RSS, map it to the same outbound queue
   1003 	 * as the incoming flow would be mapped to.
   1004 	 *
   1005 	 * If everything is setup correctly, it should be the
   1006 	 * same bucket that the current CPU we're on is.
   1007 	 */
   1008 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
   1009 #ifdef	RSS
   1010 		if (rss_hash2bucket(m->m_pkthdr.flowid,
   1011 		    M_HASHTYPE_GET(m), &bucket_id) == 0) {
   1012 			/* XXX TODO: spit out something if bucket_id > num_queues? */
   1013 			i = bucket_id % adapter->num_queues;
   1014 		} else {
   1015 #endif
   1016 			i = m->m_pkthdr.flowid % adapter->num_queues;
   1017 #ifdef	RSS
   1018 		}
   1019 #endif
   1020 	} else {
   1021 		i = curcpu % adapter->num_queues;
   1022 	}
   1023 
   1024 	txr = &adapter->tx_rings[i];
   1025 	que = &adapter->queues[i];
   1026 
   1027 	err = drbr_enqueue(ifp, txr->br, m);
   1028 	if (err)
   1029 		return (err);
   1030 	if (IXGBE_TX_TRYLOCK(txr)) {
   1031 		ixgbe_mq_start_locked(ifp, txr);
   1032 		IXGBE_TX_UNLOCK(txr);
   1033 	} else
   1034 		softint_schedule(txr->txq_si);
   1035 
   1036 	return (0);
   1037 }
   1038 
   1039 static int
   1040 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
   1041 {
   1042 	struct adapter  *adapter = txr->adapter;
   1043 	struct mbuf     *next;
   1044 	int             enqueued = 0, err = 0;
   1045 
   1046 	if (((ifp->if_flags & IFF_RUNNING) == 0) ||
   1047 	    adapter->link_active == 0)
   1048 		return (ENETDOWN);
   1049 
   1050 	/* Process the queue */
   1051 #if __FreeBSD_version < 901504
   1052 	next = drbr_dequeue(ifp, txr->br);
   1053 	while (next != NULL) {
   1054 		if ((err = ixgbe_xmit(txr, &next)) != 0) {
   1055 			if (next != NULL)
   1056 				err = drbr_enqueue(ifp, txr->br, next);
   1057 #else
   1058 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
   1059 		if ((err = ixgbe_xmit(txr, &next)) != 0) {
   1060 			if (next == NULL) {
   1061 				drbr_advance(ifp, txr->br);
   1062 			} else {
   1063 				drbr_putback(ifp, txr->br, next);
   1064 			}
   1065 #endif
   1066 			break;
   1067 		}
   1068 #if __FreeBSD_version >= 901504
   1069 		drbr_advance(ifp, txr->br);
   1070 #endif
   1071 		enqueued++;
   1072 		/* Send a copy of the frame to the BPF listener */
   1073 		bpf_mtap(ifp, next);
   1074 		if ((ifp->if_flags & IFF_RUNNING) == 0)
   1075 			break;
   1076 #if __FreeBSD_version < 901504
   1077 		next = drbr_dequeue(ifp, txr->br);
   1078 #endif
   1079 	}
   1080 
   1081 	if (enqueued > 0) {
   1082 		/* Set watchdog on */
   1083 		txr->queue_status = IXGBE_QUEUE_WORKING;
   1084 		getmicrotime(&txr->watchdog_time);
   1085 	}
   1086 
   1087 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
   1088 		ixgbe_txeof(txr);
   1089 
   1090 	return (err);
   1091 }
   1092 
   1093 /*
   1094  * Called from a taskqueue to drain queued transmit packets.
   1095  */
   1096 static void
   1097 ixgbe_deferred_mq_start(void *arg)
   1098 {
   1099 	struct tx_ring *txr = arg;
   1100 	struct adapter *adapter = txr->adapter;
   1101 	struct ifnet *ifp = adapter->ifp;
   1102 
   1103 	IXGBE_TX_LOCK(txr);
   1104 	if (!drbr_empty(ifp, txr->br))
   1105 		ixgbe_mq_start_locked(ifp, txr);
   1106 	IXGBE_TX_UNLOCK(txr);
   1107 }
   1108 
   1109 /*
   1110 ** Flush all ring buffers
   1111 */
   1112 static void
   1113 ixgbe_qflush(struct ifnet *ifp)
   1114 {
   1115 	struct adapter	*adapter = ifp->if_softc;
   1116 	struct tx_ring	*txr = adapter->tx_rings;
   1117 	struct mbuf	*m;
   1118 
   1119 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
   1120 		IXGBE_TX_LOCK(txr);
   1121 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
   1122 			m_freem(m);
   1123 		IXGBE_TX_UNLOCK(txr);
   1124 	}
   1125 	if_qflush(ifp);
   1126 }
   1127 #endif /* IXGBE_LEGACY_TX */
   1128 
   1129 static int
   1130 ixgbe_ifflags_cb(struct ethercom *ec)
   1131 {
   1132 	struct ifnet *ifp = &ec->ec_if;
   1133 	struct adapter *adapter = ifp->if_softc;
   1134 	int change = ifp->if_flags ^ adapter->if_flags, rc = 0;
   1135 
   1136 	IXGBE_CORE_LOCK(adapter);
   1137 
   1138 	if (change != 0)
   1139 		adapter->if_flags = ifp->if_flags;
   1140 
   1141 	if ((change & ~(IFF_CANTCHANGE|IFF_DEBUG)) != 0)
   1142 		rc = ENETRESET;
   1143 	else if ((change & (IFF_PROMISC | IFF_ALLMULTI)) != 0)
   1144 		ixgbe_set_promisc(adapter);
   1145 
   1146 	/* Set up VLAN support and filter */
   1147 	ixgbe_setup_vlan_hw_support(adapter);
   1148 
   1149 	IXGBE_CORE_UNLOCK(adapter);
   1150 
   1151 	return rc;
   1152 }
   1153 
   1154 /*********************************************************************
   1155  *  Ioctl entry point
   1156  *
   1157  *  ixgbe_ioctl is called when the user wants to configure the
   1158  *  interface.
   1159  *
   1160  *  return 0 on success, positive on failure
   1161  **********************************************************************/
   1162 
   1163 static int
   1164 ixgbe_ioctl(struct ifnet * ifp, u_long command, void *data)
   1165 {
   1166 	struct adapter	*adapter = ifp->if_softc;
   1167 	struct ixgbe_hw *hw = &adapter->hw;
   1168 	struct ifcapreq *ifcr = data;
   1169 	struct ifreq	*ifr = data;
   1170 	int             error = 0;
   1171 	int l4csum_en;
   1172 	const int l4csum = IFCAP_CSUM_TCPv4_Rx|IFCAP_CSUM_UDPv4_Rx|
   1173 	     IFCAP_CSUM_TCPv6_Rx|IFCAP_CSUM_UDPv6_Rx;
   1174 
   1175 	switch (command) {
   1176 	case SIOCSIFFLAGS:
   1177 		IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)");
   1178 		break;
   1179 	case SIOCADDMULTI:
   1180 	case SIOCDELMULTI:
   1181 		IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI");
   1182 		break;
   1183 	case SIOCSIFMEDIA:
   1184 	case SIOCGIFMEDIA:
   1185 		IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)");
   1186 		break;
   1187 	case SIOCSIFCAP:
   1188 		IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)");
   1189 		break;
   1190 	case SIOCSIFMTU:
   1191 		IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)");
   1192 		break;
   1193 	default:
   1194 		IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command);
   1195 		break;
   1196 	}
   1197 
   1198 	switch (command) {
   1199 	case SIOCSIFMEDIA:
   1200 	case SIOCGIFMEDIA:
   1201 		return ifmedia_ioctl(ifp, ifr, &adapter->media, command);
   1202 	case SIOCGI2C:
   1203 	{
   1204 		struct ixgbe_i2c_req	i2c;
   1205 		IOCTL_DEBUGOUT("ioctl: SIOCGI2C (Get I2C Data)");
   1206 		error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
   1207 		if (error != 0)
   1208 			break;
   1209 		if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) {
   1210 			error = EINVAL;
   1211 			break;
   1212 		}
   1213 		if (i2c.len > sizeof(i2c.data)) {
   1214 			error = EINVAL;
   1215 			break;
   1216 		}
   1217 
   1218 		hw->phy.ops.read_i2c_byte(hw, i2c.offset,
   1219 		    i2c.dev_addr, i2c.data);
   1220 		error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
   1221 		break;
   1222 	}
   1223 	case SIOCSIFCAP:
   1224 		/* Layer-4 Rx checksum offload has to be turned on and
   1225 		 * off as a unit.
   1226 		 */
   1227 		l4csum_en = ifcr->ifcr_capenable & l4csum;
   1228 		if (l4csum_en != l4csum && l4csum_en != 0)
   1229 			return EINVAL;
   1230 		/*FALLTHROUGH*/
   1231 	case SIOCADDMULTI:
   1232 	case SIOCDELMULTI:
   1233 	case SIOCSIFFLAGS:
   1234 	case SIOCSIFMTU:
   1235 	default:
   1236 		if ((error = ether_ioctl(ifp, command, data)) != ENETRESET)
   1237 			return error;
   1238 		if ((ifp->if_flags & IFF_RUNNING) == 0)
   1239 			;
   1240 		else if (command == SIOCSIFCAP || command == SIOCSIFMTU) {
   1241 			IXGBE_CORE_LOCK(adapter);
   1242 			ixgbe_init_locked(adapter);
   1243 			IXGBE_CORE_UNLOCK(adapter);
   1244 		} else if (command == SIOCADDMULTI || command == SIOCDELMULTI) {
   1245 			/*
   1246 			 * Multicast list has changed; set the hardware filter
   1247 			 * accordingly.
   1248 			 */
   1249 			IXGBE_CORE_LOCK(adapter);
   1250 			ixgbe_disable_intr(adapter);
   1251 			ixgbe_set_multi(adapter);
   1252 			ixgbe_enable_intr(adapter);
   1253 			IXGBE_CORE_UNLOCK(adapter);
   1254 		}
   1255 		return 0;
   1256 	}
   1257 
   1258 	return error;
   1259 }
   1260 
   1261 /*********************************************************************
   1262  *  Init entry point
   1263  *
   1264  *  This routine is used in two ways. It is used by the stack as
   1265  *  init entry point in network interface structure. It is also used
   1266  *  by the driver as a hw/sw initialization routine to get to a
   1267  *  consistent state.
   1268  *
   1269  *  return 0 on success, positive on failure
   1270  **********************************************************************/
   1271 #define IXGBE_MHADD_MFS_SHIFT 16
   1272 
   1273 static void
   1274 ixgbe_init_locked(struct adapter *adapter)
   1275 {
   1276 	struct ifnet   *ifp = adapter->ifp;
   1277 	device_t 	dev = adapter->dev;
   1278 	struct ixgbe_hw *hw = &adapter->hw;
   1279 	u32		k, txdctl, mhadd, gpie;
   1280 	u32		rxdctl, rxctrl;
   1281 
   1282 	/* XXX check IFF_UP and IFF_RUNNING, power-saving state! */
   1283 
   1284 	KASSERT(mutex_owned(&adapter->core_mtx));
   1285 	INIT_DEBUGOUT("ixgbe_init_locked: begin");
   1286 	hw->adapter_stopped = FALSE;
   1287 	ixgbe_stop_adapter(hw);
   1288         callout_stop(&adapter->timer);
   1289 
   1290 	/* XXX I moved this here from the SIOCSIFMTU case in ixgbe_ioctl(). */
   1291 	adapter->max_frame_size =
   1292 		ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
   1293 
   1294         /* reprogram the RAR[0] in case user changed it. */
   1295         ixgbe_set_rar(hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
   1296 
   1297 	/* Get the latest mac address, User can use a LAA */
   1298 	memcpy(hw->mac.addr, CLLADDR(adapter->ifp->if_sadl),
   1299 	    IXGBE_ETH_LENGTH_OF_ADDRESS);
   1300 	ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
   1301 	hw->addr_ctrl.rar_used_count = 1;
   1302 
   1303 	/* Prepare transmit descriptors and buffers */
   1304 	if (ixgbe_setup_transmit_structures(adapter)) {
   1305 		device_printf(dev,"Could not setup transmit structures\n");
   1306 		ixgbe_stop(adapter);
   1307 		return;
   1308 	}
   1309 
   1310 	ixgbe_init_hw(hw);
   1311 	ixgbe_initialize_transmit_units(adapter);
   1312 
   1313 	/* Setup Multicast table */
   1314 	ixgbe_set_multi(adapter);
   1315 
   1316 	/*
   1317 	** Determine the correct mbuf pool
   1318 	** for doing jumbo frames
   1319 	*/
   1320 	if (adapter->max_frame_size <= 2048)
   1321 		adapter->rx_mbuf_sz = MCLBYTES;
   1322 	else if (adapter->max_frame_size <= 4096)
   1323 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
   1324 	else if (adapter->max_frame_size <= 9216)
   1325 		adapter->rx_mbuf_sz = MJUM9BYTES;
   1326 	else
   1327 		adapter->rx_mbuf_sz = MJUM16BYTES;
   1328 
   1329 	/* Prepare receive descriptors and buffers */
   1330 	if (ixgbe_setup_receive_structures(adapter)) {
   1331 		device_printf(dev,"Could not setup receive structures\n");
   1332 		ixgbe_stop(adapter);
   1333 		return;
   1334 	}
   1335 
   1336 	/* Configure RX settings */
   1337 	ixgbe_initialize_receive_units(adapter);
   1338 
   1339 	gpie = IXGBE_READ_REG(&adapter->hw, IXGBE_GPIE);
   1340 
   1341 	/* Enable Fan Failure Interrupt */
   1342 	gpie |= IXGBE_SDP1_GPIEN;
   1343 
   1344 	/* Add for Thermal detection */
   1345 	if (hw->mac.type == ixgbe_mac_82599EB)
   1346 		gpie |= IXGBE_SDP2_GPIEN;
   1347 
   1348 	/* Thermal Failure Detection */
   1349 	if (hw->mac.type == ixgbe_mac_X540)
   1350 		gpie |= IXGBE_SDP0_GPIEN;
   1351 
   1352 	if (adapter->msix > 1) {
   1353 		/* Enable Enhanced MSIX mode */
   1354 		gpie |= IXGBE_GPIE_MSIX_MODE;
   1355 		gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
   1356 		    IXGBE_GPIE_OCD;
   1357 	}
   1358 	IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
   1359 
   1360 	/* Set MTU size */
   1361 	if (ifp->if_mtu > ETHERMTU) {
   1362 		mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
   1363 		mhadd &= ~IXGBE_MHADD_MFS_MASK;
   1364 		mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
   1365 		IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
   1366 	}
   1367 
   1368 	/* Now enable all the queues */
   1369 
   1370 	for (int i = 0; i < adapter->num_queues; i++) {
   1371 		txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
   1372 		txdctl |= IXGBE_TXDCTL_ENABLE;
   1373 		/* Set WTHRESH to 8, burst writeback */
   1374 		txdctl |= (8 << 16);
   1375 		/*
   1376 		 * When the internal queue falls below PTHRESH (32),
   1377 		 * start prefetching as long as there are at least
   1378 		 * HTHRESH (1) buffers ready. The values are taken
   1379 		 * from the Intel linux driver 3.8.21.
   1380 		 * Prefetching enables tx line rate even with 1 queue.
   1381 		 */
   1382 		txdctl |= (32 << 0) | (1 << 8);
   1383 		IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
   1384 	}
   1385 
   1386 	for (int i = 0; i < adapter->num_queues; i++) {
   1387 		rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
   1388 		if (hw->mac.type == ixgbe_mac_82598EB) {
   1389 			/*
   1390 			** PTHRESH = 21
   1391 			** HTHRESH = 4
   1392 			** WTHRESH = 8
   1393 			*/
   1394 			rxdctl &= ~0x3FFFFF;
   1395 			rxdctl |= 0x080420;
   1396 		}
   1397 		rxdctl |= IXGBE_RXDCTL_ENABLE;
   1398 		IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
   1399 		/* XXX I don't trust this loop, and I don't trust the
   1400 		 * XXX memory barrier.  What is this meant to do? --dyoung
   1401 		 */
   1402 		for (k = 0; k < 10; k++) {
   1403 			if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
   1404 			    IXGBE_RXDCTL_ENABLE)
   1405 				break;
   1406 			else
   1407 				msec_delay(1);
   1408 		}
   1409 		wmb();
   1410 #ifdef DEV_NETMAP
   1411 		/*
   1412 		 * In netmap mode, we must preserve the buffers made
   1413 		 * available to userspace before the if_init()
   1414 		 * (this is true by default on the TX side, because
   1415 		 * init makes all buffers available to userspace).
   1416 		 *
   1417 		 * netmap_reset() and the device specific routines
   1418 		 * (e.g. ixgbe_setup_receive_rings()) map these
   1419 		 * buffers at the end of the NIC ring, so here we
   1420 		 * must set the RDT (tail) register to make sure
   1421 		 * they are not overwritten.
   1422 		 *
   1423 		 * In this driver the NIC ring starts at RDH = 0,
   1424 		 * RDT points to the last slot available for reception (?),
   1425 		 * so RDT = num_rx_desc - 1 means the whole ring is available.
   1426 		 */
   1427 		if (ifp->if_capenable & IFCAP_NETMAP) {
   1428 			struct netmap_adapter *na = NA(adapter->ifp);
   1429 			struct netmap_kring *kring = &na->rx_rings[i];
   1430 			int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring);
   1431 
   1432 			IXGBE_WRITE_REG(hw, IXGBE_RDT(i), t);
   1433 		} else
   1434 #endif /* DEV_NETMAP */
   1435 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i), adapter->num_rx_desc - 1);
   1436 	}
   1437 
   1438 	/* Enable Receive engine */
   1439 	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
   1440 	if (hw->mac.type == ixgbe_mac_82598EB)
   1441 		rxctrl |= IXGBE_RXCTRL_DMBYPS;
   1442 	rxctrl |= IXGBE_RXCTRL_RXEN;
   1443 	ixgbe_enable_rx_dma(hw, rxctrl);
   1444 
   1445 	callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
   1446 
   1447 	/* Set up MSI/X routing */
   1448 	if (ixgbe_enable_msix)  {
   1449 		ixgbe_configure_ivars(adapter);
   1450 		/* Set up auto-mask */
   1451 		if (hw->mac.type == ixgbe_mac_82598EB)
   1452 			IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
   1453 		else {
   1454 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
   1455 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
   1456 		}
   1457 	} else {  /* Simple settings for Legacy/MSI */
   1458                 ixgbe_set_ivar(adapter, 0, 0, 0);
   1459                 ixgbe_set_ivar(adapter, 0, 0, 1);
   1460 		IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
   1461 	}
   1462 
   1463 #ifdef IXGBE_FDIR
   1464 	/* Init Flow director */
   1465 	if (hw->mac.type != ixgbe_mac_82598EB) {
   1466 		u32 hdrm = 32 << fdir_pballoc;
   1467 
   1468 		hw->mac.ops.setup_rxpba(hw, 0, hdrm, PBA_STRATEGY_EQUAL);
   1469 		ixgbe_init_fdir_signature_82599(&adapter->hw, fdir_pballoc);
   1470 	}
   1471 #endif
   1472 
   1473 	/*
   1474 	** Check on any SFP devices that
   1475 	** need to be kick-started
   1476 	*/
   1477 	if (hw->phy.type == ixgbe_phy_none) {
   1478 		int err = hw->phy.ops.identify(hw);
   1479 		if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
   1480                 	device_printf(dev,
   1481 			    "Unsupported SFP+ module type was detected.\n");
   1482 			return;
   1483         	}
   1484 	}
   1485 
   1486 	/* Set moderation on the Link interrupt */
   1487 	IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->linkvec), IXGBE_LINK_ITR);
   1488 
   1489 	/* Config/Enable Link */
   1490 	ixgbe_config_link(adapter);
   1491 
   1492 	/* Hardware Packet Buffer & Flow Control setup */
   1493 	{
   1494 		u32 rxpb, frame, size, tmp;
   1495 
   1496 		frame = adapter->max_frame_size;
   1497 
   1498 		/* Calculate High Water */
   1499 		if (hw->mac.type == ixgbe_mac_X540)
   1500 			tmp = IXGBE_DV_X540(frame, frame);
   1501 		else
   1502 			tmp = IXGBE_DV(frame, frame);
   1503 		size = IXGBE_BT2KB(tmp);
   1504 		rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
   1505 		hw->fc.high_water[0] = rxpb - size;
   1506 
   1507 		/* Now calculate Low Water */
   1508 		if (hw->mac.type == ixgbe_mac_X540)
   1509 			tmp = IXGBE_LOW_DV_X540(frame);
   1510 		else
   1511 			tmp = IXGBE_LOW_DV(frame);
   1512 		hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
   1513 
   1514 		hw->fc.requested_mode = adapter->fc;
   1515 		hw->fc.pause_time = IXGBE_FC_PAUSE;
   1516 		hw->fc.send_xon = TRUE;
   1517 	}
   1518 	/* Initialize the FC settings */
   1519 	ixgbe_start_hw(hw);
   1520 
   1521 	/* Set up VLAN support and filter */
   1522 	ixgbe_setup_vlan_hw_support(adapter);
   1523 
   1524 	/* And now turn on interrupts */
   1525 	ixgbe_enable_intr(adapter);
   1526 
   1527 	/* Now inform the stack we're ready */
   1528 	ifp->if_flags |= IFF_RUNNING;
   1529 
   1530 	return;
   1531 }
   1532 
   1533 static int
   1534 ixgbe_init(struct ifnet *ifp)
   1535 {
   1536 	struct adapter *adapter = ifp->if_softc;
   1537 
   1538 	IXGBE_CORE_LOCK(adapter);
   1539 	ixgbe_init_locked(adapter);
   1540 	IXGBE_CORE_UNLOCK(adapter);
   1541 	return 0;	/* XXX ixgbe_init_locked cannot fail?  really? */
   1542 }
   1543 
   1544 
   1545 /*
   1546 **
   1547 ** MSIX Interrupt Handlers and Tasklets
   1548 **
   1549 */
   1550 
   1551 static inline void
   1552 ixgbe_enable_queue(struct adapter *adapter, u32 vector)
   1553 {
   1554 	struct ixgbe_hw *hw = &adapter->hw;
   1555 	u64	queue = (u64)(1ULL << vector);
   1556 	u32	mask;
   1557 
   1558 	if (hw->mac.type == ixgbe_mac_82598EB) {
   1559                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
   1560                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
   1561 	} else {
   1562                 mask = (queue & 0xFFFFFFFF);
   1563                 if (mask)
   1564                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask);
   1565                 mask = (queue >> 32);
   1566                 if (mask)
   1567                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask);
   1568 	}
   1569 }
   1570 
   1571 __unused static inline void
   1572 ixgbe_disable_queue(struct adapter *adapter, u32 vector)
   1573 {
   1574 	struct ixgbe_hw *hw = &adapter->hw;
   1575 	u64	queue = (u64)(1ULL << vector);
   1576 	u32	mask;
   1577 
   1578 	if (hw->mac.type == ixgbe_mac_82598EB) {
   1579                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
   1580                 IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask);
   1581 	} else {
   1582                 mask = (queue & 0xFFFFFFFF);
   1583                 if (mask)
   1584                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask);
   1585                 mask = (queue >> 32);
   1586                 if (mask)
   1587                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask);
   1588 	}
   1589 }
   1590 
   1591 static void
   1592 ixgbe_handle_que(void *context)
   1593 {
   1594 	struct ix_queue *que = context;
   1595 	struct adapter  *adapter = que->adapter;
   1596 	struct tx_ring  *txr = que->txr;
   1597 	struct ifnet    *ifp = adapter->ifp;
   1598 
   1599 	adapter->handleq.ev_count++;
   1600 
   1601 	if (ifp->if_flags & IFF_RUNNING) {
   1602 		ixgbe_rxeof(que);
   1603 		IXGBE_TX_LOCK(txr);
   1604 		ixgbe_txeof(txr);
   1605 #ifndef IXGBE_LEGACY_TX
   1606 		if (!drbr_empty(ifp, txr->br))
   1607 			ixgbe_mq_start_locked(ifp, txr);
   1608 #else
   1609 		if (!IFQ_IS_EMPTY(&ifp->if_snd))
   1610 			ixgbe_start_locked(txr, ifp);
   1611 #endif
   1612 		IXGBE_TX_UNLOCK(txr);
   1613 	}
   1614 
   1615 	/* Reenable this interrupt */
   1616 	if (que->res != NULL)
   1617 		ixgbe_enable_queue(adapter, que->msix);
   1618 	else
   1619 		ixgbe_enable_intr(adapter);
   1620 	return;
   1621 }
   1622 
   1623 
   1624 /*********************************************************************
   1625  *
   1626  *  Legacy Interrupt Service routine
   1627  *
   1628  **********************************************************************/
   1629 
   1630 static int
   1631 ixgbe_legacy_irq(void *arg)
   1632 {
   1633 	struct ix_queue *que = arg;
   1634 	struct adapter	*adapter = que->adapter;
   1635 	struct ixgbe_hw	*hw = &adapter->hw;
   1636 	struct ifnet   *ifp = adapter->ifp;
   1637 	struct 		tx_ring *txr = adapter->tx_rings;
   1638 	bool		more = false;
   1639 	u32       	reg_eicr;
   1640 
   1641 	reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
   1642 
   1643 	adapter->stats.legint.ev_count++;
   1644 	++que->irqs;
   1645 	if (reg_eicr == 0) {
   1646 		adapter->stats.intzero.ev_count++;
   1647 		if ((ifp->if_flags & IFF_UP) != 0)
   1648 			ixgbe_enable_intr(adapter);
   1649 		return 0;
   1650 	}
   1651 
   1652 	if ((ifp->if_flags & IFF_RUNNING) != 0) {
   1653 		more = ixgbe_rxeof(que);
   1654 
   1655 		IXGBE_TX_LOCK(txr);
   1656 		ixgbe_txeof(txr);
   1657 #ifdef IXGBE_LEGACY_TX
   1658 		if (!IFQ_IS_EMPTY(&ifp->if_snd))
   1659 			ixgbe_start_locked(txr, ifp);
   1660 #else
   1661 		if (!drbr_empty(ifp, txr->br))
   1662 			ixgbe_mq_start_locked(ifp, txr);
   1663 #endif
   1664 		IXGBE_TX_UNLOCK(txr);
   1665 	}
   1666 
   1667 	/* Check for fan failure */
   1668 	if ((hw->phy.media_type == ixgbe_media_type_copper) &&
   1669 	    (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
   1670                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
   1671 		    "REPLACE IMMEDIATELY!!\n");
   1672 		IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1);
   1673 	}
   1674 
   1675 	/* Link status change */
   1676 	if (reg_eicr & IXGBE_EICR_LSC)
   1677 		softint_schedule(adapter->link_si);
   1678 
   1679 	if (more)
   1680 #ifndef IXGBE_LEGACY_TX
   1681 		softint_schedule(txr->txq_si);
   1682 #else
   1683 		softint_schedule(que->que_si);
   1684 #endif
   1685 	else
   1686 		ixgbe_enable_intr(adapter);
   1687 	return 1;
   1688 }
   1689 
   1690 
   1691 #if defined(NETBSD_MSI_OR_MSIX)
   1692 /*********************************************************************
   1693  *
   1694  *  MSIX Queue Interrupt Service routine
   1695  *
   1696  **********************************************************************/
   1697 static int
   1698 ixgbe_msix_que(void *arg)
   1699 {
   1700 	struct ix_queue	*que = arg;
   1701 	struct adapter  *adapter = que->adapter;
   1702 	struct ifnet    *ifp = adapter->ifp;
   1703 	struct tx_ring	*txr = que->txr;
   1704 	struct rx_ring	*rxr = que->rxr;
   1705 	bool		more;
   1706 	u32		newitr = 0;
   1707 
   1708 	/* Protect against spurious interrupts */
   1709 	if ((ifp->if_flags & IFF_RUNNING) == 0)
   1710 		return 0;
   1711 
   1712 	ixgbe_disable_queue(adapter, que->msix);
   1713 	++que->irqs;
   1714 
   1715 	more = ixgbe_rxeof(que);
   1716 
   1717 	IXGBE_TX_LOCK(txr);
   1718 	ixgbe_txeof(txr);
   1719 #ifdef IXGBE_LEGACY_TX
   1720 	if (!IFQ_IS_EMPTY(&adapter->ifp->if_snd))
   1721 		ixgbe_start_locked(txr, ifp);
   1722 #else
   1723 	if (!drbr_empty(ifp, txr->br))
   1724 		ixgbe_mq_start_locked(ifp, txr);
   1725 #endif
   1726 	IXGBE_TX_UNLOCK(txr);
   1727 
   1728 	/* Do AIM now? */
   1729 
   1730 	if (ixgbe_enable_aim == FALSE)
   1731 		goto no_calc;
   1732 	/*
   1733 	** Do Adaptive Interrupt Moderation:
   1734         **  - Write out last calculated setting
   1735 	**  - Calculate based on average size over
   1736 	**    the last interval.
   1737 	*/
   1738         if (que->eitr_setting)
   1739                 IXGBE_WRITE_REG(&adapter->hw,
   1740                     IXGBE_EITR(que->msix), que->eitr_setting);
   1741 
   1742         que->eitr_setting = 0;
   1743 
   1744         /* Idle, do nothing */
   1745         if ((txr->bytes == 0) && (rxr->bytes == 0))
   1746                 goto no_calc;
   1747 
   1748 	if ((txr->bytes) && (txr->packets))
   1749                	newitr = txr->bytes/txr->packets;
   1750 	if ((rxr->bytes) && (rxr->packets))
   1751 		newitr = max(newitr,
   1752 		    (rxr->bytes / rxr->packets));
   1753 	newitr += 24; /* account for hardware frame, crc */
   1754 
   1755 	/* set an upper boundary */
   1756 	newitr = min(newitr, 3000);
   1757 
   1758 	/* Be nice to the mid range */
   1759 	if ((newitr > 300) && (newitr < 1200))
   1760 		newitr = (newitr / 3);
   1761 	else
   1762 		newitr = (newitr / 2);
   1763 
   1764         if (adapter->hw.mac.type == ixgbe_mac_82598EB)
   1765                 newitr |= newitr << 16;
   1766         else
   1767                 newitr |= IXGBE_EITR_CNT_WDIS;
   1768 
   1769         /* save for next interrupt */
   1770         que->eitr_setting = newitr;
   1771 
   1772         /* Reset state */
   1773         txr->bytes = 0;
   1774         txr->packets = 0;
   1775         rxr->bytes = 0;
   1776         rxr->packets = 0;
   1777 
   1778 no_calc:
   1779 	if (more)
   1780 		softint_schedule(que->que_si);
   1781 	else
   1782 		ixgbe_enable_queue(adapter, que->msix);
   1783 	return 1;
   1784 }
   1785 
   1786 
   1787 static int
   1788 ixgbe_msix_link(void *arg)
   1789 {
   1790 	struct adapter	*adapter = arg;
   1791 	struct ixgbe_hw *hw = &adapter->hw;
   1792 	u32		reg_eicr;
   1793 
   1794 	++adapter->link_irq.ev_count;
   1795 
   1796 	/* First get the cause */
   1797 	reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICS);
   1798 	/* Be sure the queue bits are not cleared */
   1799 	reg_eicr &= ~IXGBE_EICR_RTX_QUEUE;
   1800 	/* Clear interrupt with write */
   1801 	IXGBE_WRITE_REG(hw, IXGBE_EICR, reg_eicr);
   1802 
   1803 	/* Link status change */
   1804 	if (reg_eicr & IXGBE_EICR_LSC)
   1805 		softint_schedule(adapter->link_si);
   1806 
   1807 	if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
   1808 #ifdef IXGBE_FDIR
   1809 		if (reg_eicr & IXGBE_EICR_FLOW_DIR) {
   1810 			/* This is probably overkill :) */
   1811 			if (!atomic_cmpset_int(&adapter->fdir_reinit, 0, 1))
   1812 				return 1;
   1813                 	/* Disable the interrupt */
   1814 			IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FLOW_DIR);
   1815 			softint_schedule(adapter->fdir_si);
   1816 		} else
   1817 #endif
   1818 		if (reg_eicr & IXGBE_EICR_ECC) {
   1819                 	device_printf(adapter->dev, "\nCRITICAL: ECC ERROR!! "
   1820 			    "Please Reboot!!\n");
   1821 			IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
   1822 		} else
   1823 
   1824 		if (reg_eicr & IXGBE_EICR_GPI_SDP1) {
   1825                 	/* Clear the interrupt */
   1826                 	IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
   1827 			softint_schedule(adapter->msf_si);
   1828         	} else if (reg_eicr & IXGBE_EICR_GPI_SDP2) {
   1829                 	/* Clear the interrupt */
   1830                 	IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP2);
   1831 			softint_schedule(adapter->mod_si);
   1832 		}
   1833         }
   1834 
   1835 	/* Check for fan failure */
   1836 	if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
   1837 	    (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
   1838                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
   1839 		    "REPLACE IMMEDIATELY!!\n");
   1840 		IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
   1841 	}
   1842 
   1843 	/* Check for over temp condition */
   1844 	if ((hw->mac.type == ixgbe_mac_X540) &&
   1845 	    (reg_eicr & IXGBE_EICR_TS)) {
   1846 		device_printf(adapter->dev, "\nCRITICAL: OVER TEMP!! "
   1847 		    "PHY IS SHUT DOWN!!\n");
   1848 		device_printf(adapter->dev, "System shutdown required\n");
   1849 		IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_TS);
   1850 	}
   1851 
   1852 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_OTHER);
   1853 	return 1;
   1854 }
   1855 #endif
   1856 
   1857 /*********************************************************************
   1858  *
   1859  *  Media Ioctl callback
   1860  *
   1861  *  This routine is called whenever the user queries the status of
   1862  *  the interface using ifconfig.
   1863  *
   1864  **********************************************************************/
   1865 static void
   1866 ixgbe_media_status(struct ifnet * ifp, struct ifmediareq * ifmr)
   1867 {
   1868 	struct adapter *adapter = ifp->if_softc;
   1869 	struct ixgbe_hw *hw = &adapter->hw;
   1870 
   1871 	INIT_DEBUGOUT("ixgbe_media_status: begin");
   1872 	IXGBE_CORE_LOCK(adapter);
   1873 	ixgbe_update_link_status(adapter);
   1874 
   1875 	ifmr->ifm_status = IFM_AVALID;
   1876 	ifmr->ifm_active = IFM_ETHER;
   1877 
   1878 	if (!adapter->link_active) {
   1879 		IXGBE_CORE_UNLOCK(adapter);
   1880 		return;
   1881 	}
   1882 
   1883 	ifmr->ifm_status |= IFM_ACTIVE;
   1884 
   1885 	/*
   1886 	 * Not all NIC are 1000baseSX as an example X540T.
   1887 	 * We must set properly the media based on NIC model.
   1888 	 */
   1889 	switch (hw->device_id) {
   1890 	case IXGBE_DEV_ID_X540T:
   1891 		if (adapter->link_speed == IXGBE_LINK_SPEED_100_FULL)
   1892 			ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
   1893 		else if (adapter->link_speed == IXGBE_LINK_SPEED_1GB_FULL)
   1894 			ifmr->ifm_active |= IFM_1000_T | IFM_FDX;
   1895 		else if (adapter->link_speed == IXGBE_LINK_SPEED_10GB_FULL)
   1896 			ifmr->ifm_active |= adapter->optics | IFM_FDX;
   1897 		break;
   1898 	default:
   1899 		if (adapter->link_speed == IXGBE_LINK_SPEED_100_FULL)
   1900 			ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
   1901 		else if (adapter->link_speed == IXGBE_LINK_SPEED_1GB_FULL)
   1902 			ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
   1903 		else if (adapter->link_speed == IXGBE_LINK_SPEED_10GB_FULL)
   1904 			ifmr->ifm_active |= adapter->optics | IFM_FDX;
   1905 		break;
   1906 	}
   1907 
   1908 	IXGBE_CORE_UNLOCK(adapter);
   1909 
   1910 	return;
   1911 }
   1912 
   1913 /*********************************************************************
   1914  *
   1915  *  Media Ioctl callback
   1916  *
   1917  *  This routine is called when the user changes speed/duplex using
   1918  *  media/mediopt option with ifconfig.
   1919  *
   1920  **********************************************************************/
   1921 static int
   1922 ixgbe_media_change(struct ifnet * ifp)
   1923 {
   1924 	struct adapter *adapter = ifp->if_softc;
   1925 	struct ifmedia *ifm = &adapter->media;
   1926 
   1927 	INIT_DEBUGOUT("ixgbe_media_change: begin");
   1928 
   1929 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
   1930 		return (EINVAL);
   1931 
   1932         switch (IFM_SUBTYPE(ifm->ifm_media)) {
   1933 	case IFM_10G_T:
   1934 	case IFM_AUTO:
   1935 		adapter->hw.phy.autoneg_advertised =
   1936 		    IXGBE_LINK_SPEED_100_FULL |
   1937 		    IXGBE_LINK_SPEED_1GB_FULL |
   1938 		    IXGBE_LINK_SPEED_10GB_FULL;
   1939                 break;
   1940         default:
   1941                 device_printf(adapter->dev, "Only auto media type\n");
   1942 		return (EINVAL);
   1943         }
   1944 
   1945 	return (0);
   1946 }
   1947 
   1948 /*********************************************************************
   1949  *
   1950  *  This routine maps the mbufs to tx descriptors, allowing the
   1951  *  TX engine to transmit the packets.
   1952  *  	- return 0 on success, positive on failure
   1953  *
   1954  **********************************************************************/
   1955 
   1956 static int
   1957 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
   1958 {
   1959 	struct m_tag *mtag;
   1960 	struct adapter  *adapter = txr->adapter;
   1961 	struct ethercom *ec = &adapter->osdep.ec;
   1962 	u32		olinfo_status = 0, cmd_type_len;
   1963 	int             i, j, error;
   1964 	int		first;
   1965 	bus_dmamap_t	map;
   1966 	struct ixgbe_tx_buf *txbuf;
   1967 	union ixgbe_adv_tx_desc *txd = NULL;
   1968 
   1969 	/* Basic descriptor defines */
   1970         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
   1971 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
   1972 
   1973 	if ((mtag = VLAN_OUTPUT_TAG(ec, m_head)) != NULL)
   1974         	cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
   1975 
   1976         /*
   1977          * Important to capture the first descriptor
   1978          * used because it will contain the index of
   1979          * the one we tell the hardware to report back
   1980          */
   1981         first = txr->next_avail_desc;
   1982 	txbuf = &txr->tx_buffers[first];
   1983 	map = txbuf->map;
   1984 
   1985 	/*
   1986 	 * Map the packet for DMA.
   1987 	 */
   1988 	error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map,
   1989 	    m_head, BUS_DMA_NOWAIT);
   1990 
   1991 	if (__predict_false(error)) {
   1992 
   1993 		switch (error) {
   1994 		case EAGAIN:
   1995 			adapter->eagain_tx_dma_setup.ev_count++;
   1996 			return EAGAIN;
   1997 		case ENOMEM:
   1998 			adapter->enomem_tx_dma_setup.ev_count++;
   1999 			return EAGAIN;
   2000 		case EFBIG:
   2001 			/*
   2002 			 * XXX Try it again?
   2003 			 * do m_defrag() and retry bus_dmamap_load_mbuf().
   2004 			 */
   2005 			adapter->efbig_tx_dma_setup.ev_count++;
   2006 			return error;
   2007 		case EINVAL:
   2008 			adapter->einval_tx_dma_setup.ev_count++;
   2009 			return error;
   2010 		default:
   2011 			adapter->other_tx_dma_setup.ev_count++;
   2012 			return error;
   2013 		}
   2014 	}
   2015 
   2016 	/* Make certain there are enough descriptors */
   2017 	if (map->dm_nsegs > txr->tx_avail - 2) {
   2018 		txr->no_desc_avail.ev_count++;
   2019 		ixgbe_dmamap_unload(txr->txtag, txbuf->map);
   2020 		return EAGAIN;
   2021 	}
   2022 
   2023 	/*
   2024 	** Set up the appropriate offload context
   2025 	** this will consume the first descriptor
   2026 	*/
   2027 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
   2028 	if (__predict_false(error)) {
   2029 		return (error);
   2030 	}
   2031 
   2032 #ifdef IXGBE_FDIR
   2033 	/* Do the flow director magic */
   2034 	if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
   2035 		++txr->atr_count;
   2036 		if (txr->atr_count >= atr_sample_rate) {
   2037 			ixgbe_atr(txr, m_head);
   2038 			txr->atr_count = 0;
   2039 		}
   2040 	}
   2041 #endif
   2042 
   2043 	i = txr->next_avail_desc;
   2044 	for (j = 0; j < map->dm_nsegs; j++) {
   2045 		bus_size_t seglen;
   2046 		bus_addr_t segaddr;
   2047 
   2048 		txbuf = &txr->tx_buffers[i];
   2049 		txd = &txr->tx_base[i];
   2050 		seglen = map->dm_segs[j].ds_len;
   2051 		segaddr = htole64(map->dm_segs[j].ds_addr);
   2052 
   2053 		txd->read.buffer_addr = segaddr;
   2054 		txd->read.cmd_type_len = htole32(txr->txd_cmd |
   2055 		    cmd_type_len |seglen);
   2056 		txd->read.olinfo_status = htole32(olinfo_status);
   2057 
   2058 		if (++i == txr->num_desc)
   2059 			i = 0;
   2060 	}
   2061 
   2062 	txd->read.cmd_type_len |=
   2063 	    htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
   2064 	txr->tx_avail -= map->dm_nsegs;
   2065 	txr->next_avail_desc = i;
   2066 
   2067 	txbuf->m_head = m_head;
   2068 	/*
   2069 	** Here we swap the map so the last descriptor,
   2070 	** which gets the completion interrupt has the
   2071 	** real map, and the first descriptor gets the
   2072 	** unused map from this descriptor.
   2073 	*/
   2074 	txr->tx_buffers[first].map = txbuf->map;
   2075 	txbuf->map = map;
   2076 	bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
   2077 	    BUS_DMASYNC_PREWRITE);
   2078 
   2079         /* Set the EOP descriptor that will be marked done */
   2080         txbuf = &txr->tx_buffers[first];
   2081 	txbuf->eop = txd;
   2082 
   2083         ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   2084 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   2085 	/*
   2086 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
   2087 	 * hardware that this frame is available to transmit.
   2088 	 */
   2089 	++txr->total_packets.ev_count;
   2090 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), i);
   2091 
   2092 	return 0;
   2093 }
   2094 
   2095 static void
   2096 ixgbe_set_promisc(struct adapter *adapter)
   2097 {
   2098 	struct ether_multi *enm;
   2099 	struct ether_multistep step;
   2100 	u_int32_t       reg_rctl;
   2101 	struct ethercom *ec = &adapter->osdep.ec;
   2102 	struct ifnet   *ifp = adapter->ifp;
   2103 	int		mcnt = 0;
   2104 
   2105 	reg_rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
   2106 	reg_rctl &= (~IXGBE_FCTRL_UPE);
   2107 	if (ifp->if_flags & IFF_ALLMULTI)
   2108 		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
   2109 	else {
   2110 		ETHER_FIRST_MULTI(step, ec, enm);
   2111 		while (enm != NULL) {
   2112 			if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
   2113 				break;
   2114 			mcnt++;
   2115 			ETHER_NEXT_MULTI(step, enm);
   2116 		}
   2117 	}
   2118 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
   2119 		reg_rctl &= (~IXGBE_FCTRL_MPE);
   2120 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
   2121 
   2122 	if (ifp->if_flags & IFF_PROMISC) {
   2123 		reg_rctl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
   2124 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
   2125 	} else if (ifp->if_flags & IFF_ALLMULTI) {
   2126 		reg_rctl |= IXGBE_FCTRL_MPE;
   2127 		reg_rctl &= ~IXGBE_FCTRL_UPE;
   2128 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
   2129 	}
   2130 	return;
   2131 }
   2132 
   2133 
   2134 /*********************************************************************
   2135  *  Multicast Update
   2136  *
   2137  *  This routine is called whenever multicast address list is updated.
   2138  *
   2139  **********************************************************************/
   2140 #define IXGBE_RAR_ENTRIES 16
   2141 
   2142 static void
   2143 ixgbe_set_multi(struct adapter *adapter)
   2144 {
   2145 	struct ether_multi *enm;
   2146 	struct ether_multistep step;
   2147 	u32	fctrl;
   2148 	u8	*mta;
   2149 	u8	*update_ptr;
   2150 	int	mcnt = 0;
   2151 	struct ethercom *ec = &adapter->osdep.ec;
   2152 	struct ifnet   *ifp = adapter->ifp;
   2153 
   2154 	IOCTL_DEBUGOUT("ixgbe_set_multi: begin");
   2155 
   2156 	mta = adapter->mta;
   2157 	bzero(mta, sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
   2158 	    MAX_NUM_MULTICAST_ADDRESSES);
   2159 
   2160 	ifp->if_flags &= ~IFF_ALLMULTI;
   2161 	ETHER_FIRST_MULTI(step, ec, enm);
   2162 	while (enm != NULL) {
   2163 		if ((mcnt == MAX_NUM_MULTICAST_ADDRESSES) ||
   2164 		    (memcmp(enm->enm_addrlo, enm->enm_addrhi,
   2165 			ETHER_ADDR_LEN) != 0)) {
   2166 			ifp->if_flags |= IFF_ALLMULTI;
   2167 			break;
   2168 		}
   2169 		bcopy(enm->enm_addrlo,
   2170 		    &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
   2171 		    IXGBE_ETH_LENGTH_OF_ADDRESS);
   2172 		mcnt++;
   2173 		ETHER_NEXT_MULTI(step, enm);
   2174 	}
   2175 
   2176 	fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
   2177 	fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
   2178 	if (ifp->if_flags & IFF_PROMISC)
   2179 		fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
   2180 	else if (ifp->if_flags & IFF_ALLMULTI) {
   2181 		fctrl |= IXGBE_FCTRL_MPE;
   2182 	}
   2183 
   2184 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl);
   2185 
   2186 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) {
   2187 		update_ptr = mta;
   2188 		ixgbe_update_mc_addr_list(&adapter->hw,
   2189 		    update_ptr, mcnt, ixgbe_mc_array_itr, TRUE);
   2190 	}
   2191 
   2192 	return;
   2193 }
   2194 
   2195 /*
   2196  * This is an iterator function now needed by the multicast
   2197  * shared code. It simply feeds the shared code routine the
   2198  * addresses in the array of ixgbe_set_multi() one by one.
   2199  */
   2200 static u8 *
   2201 ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq)
   2202 {
   2203 	u8 *addr = *update_ptr;
   2204 	u8 *newptr;
   2205 	*vmdq = 0;
   2206 
   2207 	newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
   2208 	*update_ptr = newptr;
   2209 	return addr;
   2210 }
   2211 
   2212 
   2213 /*********************************************************************
   2214  *  Timer routine
   2215  *
   2216  *  This routine checks for link status,updates statistics,
   2217  *  and runs the watchdog check.
   2218  *
   2219  **********************************************************************/
   2220 
   2221 static void
   2222 ixgbe_local_timer1(void *arg)
   2223 {
   2224 	struct adapter	*adapter = arg;
   2225 	device_t	dev = adapter->dev;
   2226 	struct ix_queue *que = adapter->queues;
   2227 	struct tx_ring	*txr = adapter->tx_rings;
   2228 	int		hung = 0, paused = 0;
   2229 
   2230 	KASSERT(mutex_owned(&adapter->core_mtx));
   2231 
   2232 	/* Check for pluggable optics */
   2233 	if (adapter->sfp_probe)
   2234 		if (!ixgbe_sfp_probe(adapter))
   2235 			goto out; /* Nothing to do */
   2236 
   2237 	ixgbe_update_link_status(adapter);
   2238 	ixgbe_update_stats_counters(adapter);
   2239 
   2240 	/*
   2241 	 * If the interface has been paused
   2242 	 * then don't do the watchdog check
   2243 	 */
   2244 	if (IXGBE_READ_REG(&adapter->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF)
   2245 		paused = 1;
   2246 
   2247 	/*
   2248 	** Check the TX queues status
   2249 	**      - watchdog only if all queues show hung
   2250 	*/
   2251 	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
   2252 		if ((txr->queue_status == IXGBE_QUEUE_HUNG) &&
   2253 		    (paused == 0))
   2254 			++hung;
   2255 		else if (txr->queue_status == IXGBE_QUEUE_WORKING)
   2256 #ifndef IXGBE_LEGACY_TX
   2257 			softint_schedule(txr->txq_si);
   2258 #else
   2259 			softint_schedule(que->que_si);
   2260 #endif
   2261 	}
   2262 	/* Only truely watchdog if all queues show hung */
   2263 	if (hung == adapter->num_queues)
   2264 		goto watchdog;
   2265 
   2266 out:
   2267 	callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
   2268 	return;
   2269 
   2270 watchdog:
   2271 	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
   2272 	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
   2273 	    IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(txr->me)),
   2274 	    IXGBE_READ_REG(&adapter->hw, IXGBE_TDT(txr->me)));
   2275 	device_printf(dev,"TX(%d) desc avail = %d,"
   2276 	    "Next TX to Clean = %d\n",
   2277 	    txr->me, txr->tx_avail, txr->next_to_clean);
   2278 	adapter->ifp->if_flags &= ~IFF_RUNNING;
   2279 	adapter->watchdog_events.ev_count++;
   2280 	ixgbe_init_locked(adapter);
   2281 }
   2282 
   2283 static void
   2284 ixgbe_local_timer(void *arg)
   2285 {
   2286 	struct adapter *adapter = arg;
   2287 
   2288 	IXGBE_CORE_LOCK(adapter);
   2289 	ixgbe_local_timer1(adapter);
   2290 	IXGBE_CORE_UNLOCK(adapter);
   2291 }
   2292 
   2293 /*
   2294 ** Note: this routine updates the OS on the link state
   2295 **	the real check of the hardware only happens with
   2296 **	a link interrupt.
   2297 */
   2298 static void
   2299 ixgbe_update_link_status(struct adapter *adapter)
   2300 {
   2301 	struct ifnet	*ifp = adapter->ifp;
   2302 	device_t dev = adapter->dev;
   2303 
   2304 
   2305 	if (adapter->link_up){
   2306 		if (adapter->link_active == FALSE) {
   2307 			if (bootverbose)
   2308 				device_printf(dev,"Link is up %d Gbps %s \n",
   2309 				    ((adapter->link_speed == 128)? 10:1),
   2310 				    "Full Duplex");
   2311 			adapter->link_active = TRUE;
   2312 			/* Update any Flow Control changes */
   2313 			ixgbe_fc_enable(&adapter->hw);
   2314 			if_link_state_change(ifp, LINK_STATE_UP);
   2315 		}
   2316 	} else { /* Link down */
   2317 		if (adapter->link_active == TRUE) {
   2318 			if (bootverbose)
   2319 				device_printf(dev,"Link is Down\n");
   2320 			if_link_state_change(ifp, LINK_STATE_DOWN);
   2321 			adapter->link_active = FALSE;
   2322 		}
   2323 	}
   2324 
   2325 	return;
   2326 }
   2327 
   2328 
   2329 static void
   2330 ixgbe_ifstop(struct ifnet *ifp, int disable)
   2331 {
   2332 	struct adapter *adapter = ifp->if_softc;
   2333 
   2334 	IXGBE_CORE_LOCK(adapter);
   2335 	ixgbe_stop(adapter);
   2336 	IXGBE_CORE_UNLOCK(adapter);
   2337 }
   2338 
   2339 /*********************************************************************
   2340  *
   2341  *  This routine disables all traffic on the adapter by issuing a
   2342  *  global reset on the MAC and deallocates TX/RX buffers.
   2343  *
   2344  **********************************************************************/
   2345 
   2346 static void
   2347 ixgbe_stop(void *arg)
   2348 {
   2349 	struct ifnet   *ifp;
   2350 	struct adapter *adapter = arg;
   2351 	struct ixgbe_hw *hw = &adapter->hw;
   2352 	ifp = adapter->ifp;
   2353 
   2354 	KASSERT(mutex_owned(&adapter->core_mtx));
   2355 
   2356 	INIT_DEBUGOUT("ixgbe_stop: begin\n");
   2357 	ixgbe_disable_intr(adapter);
   2358 	callout_stop(&adapter->timer);
   2359 
   2360 	/* Let the stack know...*/
   2361 	ifp->if_flags &= ~IFF_RUNNING;
   2362 
   2363 	ixgbe_reset_hw(hw);
   2364 	hw->adapter_stopped = FALSE;
   2365 	ixgbe_stop_adapter(hw);
   2366 	if (hw->mac.type == ixgbe_mac_82599EB)
   2367 		ixgbe_stop_mac_link_on_d3_82599(hw);
   2368 	/* Turn off the laser - noop with no optics */
   2369 	ixgbe_disable_tx_laser(hw);
   2370 
   2371 	/* Update the stack */
   2372 	adapter->link_up = FALSE;
   2373 	ixgbe_update_link_status(adapter);
   2374 
   2375 	/* reprogram the RAR[0] in case user changed it. */
   2376 	ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
   2377 
   2378 	return;
   2379 }
   2380 
   2381 
   2382 /*********************************************************************
   2383  *
   2384  *  Determine hardware revision.
   2385  *
   2386  **********************************************************************/
   2387 static void
   2388 ixgbe_identify_hardware(struct adapter *adapter)
   2389 {
   2390 	pcitag_t tag;
   2391 	pci_chipset_tag_t pc;
   2392 	pcireg_t subid, id;
   2393 	struct ixgbe_hw *hw = &adapter->hw;
   2394 
   2395 	pc = adapter->osdep.pc;
   2396 	tag = adapter->osdep.tag;
   2397 
   2398 	id = pci_conf_read(pc, tag, PCI_ID_REG);
   2399 	subid = pci_conf_read(pc, tag, PCI_SUBSYS_ID_REG);
   2400 
   2401 	/* Save off the information about this board */
   2402 	hw->vendor_id = PCI_VENDOR(id);
   2403 	hw->device_id = PCI_PRODUCT(id);
   2404 	hw->revision_id =
   2405 	    PCI_REVISION(pci_conf_read(pc, tag, PCI_CLASS_REG));
   2406 	hw->subsystem_vendor_id = PCI_SUBSYS_VENDOR(subid);
   2407 	hw->subsystem_device_id = PCI_SUBSYS_ID(subid);
   2408 
   2409 	/* We need this here to set the num_segs below */
   2410 	ixgbe_set_mac_type(hw);
   2411 
   2412 	/* Pick up the 82599 and VF settings */
   2413 	if (hw->mac.type != ixgbe_mac_82598EB) {
   2414 		hw->phy.smart_speed = ixgbe_smart_speed;
   2415 		adapter->num_segs = IXGBE_82599_SCATTER;
   2416 	} else
   2417 		adapter->num_segs = IXGBE_82598_SCATTER;
   2418 
   2419 	return;
   2420 }
   2421 
   2422 /*********************************************************************
   2423  *
   2424  *  Determine optic type
   2425  *
   2426  **********************************************************************/
   2427 static void
   2428 ixgbe_setup_optics(struct adapter *adapter)
   2429 {
   2430 	struct ixgbe_hw *hw = &adapter->hw;
   2431 	int		layer;
   2432 
   2433 	layer = ixgbe_get_supported_physical_layer(hw);
   2434 
   2435 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
   2436 		adapter->optics = IFM_10G_T;
   2437 		return;
   2438 	}
   2439 
   2440 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
   2441 		adapter->optics = IFM_1000_T;
   2442 		return;
   2443 	}
   2444 
   2445 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) {
   2446 		adapter->optics = IFM_1000_SX;
   2447 		return;
   2448 	}
   2449 
   2450 	if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR |
   2451 	    IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) {
   2452 		adapter->optics = IFM_10G_LR;
   2453 		return;
   2454 	}
   2455 
   2456 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
   2457 		adapter->optics = IFM_10G_SR;
   2458 		return;
   2459 	}
   2460 
   2461 	if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) {
   2462 		adapter->optics = IFM_10G_TWINAX;
   2463 		return;
   2464 	}
   2465 
   2466 	if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 |
   2467 	    IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) {
   2468 		adapter->optics = IFM_10G_CX4;
   2469 		return;
   2470 	}
   2471 
   2472 	/* If we get here just set the default */
   2473 	adapter->optics = IFM_ETHER | IFM_AUTO;
   2474 	return;
   2475 }
   2476 
   2477 /*********************************************************************
   2478  *
   2479  *  Setup the Legacy or MSI Interrupt handler
   2480  *
   2481  **********************************************************************/
   2482 static int
   2483 ixgbe_allocate_legacy(struct adapter *adapter,
   2484     const struct pci_attach_args *pa)
   2485 {
   2486 	device_t	dev = adapter->dev;
   2487 	struct		ix_queue *que = adapter->queues;
   2488 #ifndef IXGBE_LEGACY_TX
   2489 	struct tx_ring		*txr = adapter->tx_rings;
   2490 #endif
   2491 #ifndef NETBSD_MSI_OR_MSIX
   2492 	pci_intr_handle_t	ih;
   2493 #else
   2494 	int		counts[PCI_INTR_TYPE_SIZE];
   2495 	pci_intr_type_t intr_type, max_type;
   2496 #endif
   2497 	char intrbuf[PCI_INTRSTR_LEN];
   2498 	const char	*intrstr = NULL;
   2499 
   2500 #ifndef NETBSD_MSI_OR_MSIX
   2501 	/* We allocate a single interrupt resource */
   2502  	if (pci_intr_map(pa, &ih) != 0) {
   2503 		aprint_error_dev(dev, "unable to map interrupt\n");
   2504 		return ENXIO;
   2505 	} else {
   2506 		intrstr = pci_intr_string(adapter->osdep.pc, ih, intrbuf,
   2507 		    sizeof(intrbuf));
   2508 	}
   2509 	adapter->osdep.ihs[0] = pci_intr_establish(adapter->osdep.pc, ih,
   2510 	    IPL_NET, ixgbe_legacy_irq, que);
   2511 #else
   2512 	/* Allocation settings */
   2513 	max_type = PCI_INTR_TYPE_MSI;
   2514 	counts[PCI_INTR_TYPE_MSIX] = 0;
   2515 	counts[PCI_INTR_TYPE_MSI] = 1;
   2516 	counts[PCI_INTR_TYPE_INTX] = 1;
   2517 
   2518 alloc_retry:
   2519 	if (pci_intr_alloc(pa, &adapter->osdep.intrs, counts, max_type) != 0) {
   2520 		aprint_error_dev(dev, "couldn't alloc interrupt\n");
   2521 		return ENXIO;
   2522 	}
   2523 	adapter->osdep.nintrs = 1;
   2524 	intrstr = pci_intr_string(adapter->osdep.pc, adapter->osdep.intrs[0],
   2525 	    intrbuf, sizeof(intrbuf));
   2526 	adapter->osdep.ihs[0] = pci_intr_establish(adapter->osdep.pc,
   2527 	    adapter->osdep.intrs[0], IPL_NET, ixgbe_legacy_irq, que);
   2528 	if (adapter->osdep.ihs[0] == NULL) {
   2529 		intr_type = pci_intr_type(adapter->osdep.intrs[0]);
   2530 		aprint_error_dev(dev,"unable to establish %s\n",
   2531 		    (intr_type == PCI_INTR_TYPE_MSI) ? "MSI" : "INTx");
   2532 		pci_intr_release(adapter->osdep.pc, adapter->osdep.intrs, 1);
   2533 		switch (intr_type) {
   2534 		case PCI_INTR_TYPE_MSI:
   2535 			/* The next try is for INTx: Disable MSI */
   2536 			max_type = PCI_INTR_TYPE_INTX;
   2537 			counts[PCI_INTR_TYPE_INTX] = 1;
   2538 			goto alloc_retry;
   2539 		case PCI_INTR_TYPE_INTX:
   2540 		default:
   2541 			/* See below */
   2542 			break;
   2543 		}
   2544 	}
   2545 #endif
   2546 	if (adapter->osdep.ihs[0] == NULL) {
   2547 		aprint_error_dev(dev,
   2548 		    "couldn't establish interrupt%s%s\n",
   2549 		    intrstr ? " at " : "", intrstr ? intrstr : "");
   2550 #ifdef NETBSD_MSI_OR_MSIX
   2551 		pci_intr_release(adapter->osdep.pc, adapter->osdep.intrs, 1);
   2552 #endif
   2553 		return ENXIO;
   2554 	}
   2555 	aprint_normal_dev(dev, "interrupting at %s\n", intrstr);
   2556 	/*
   2557 	 * Try allocating a fast interrupt and the associated deferred
   2558 	 * processing contexts.
   2559 	 */
   2560 #ifndef IXGBE_LEGACY_TX
   2561 	txr->txq_si = softint_establish(SOFTINT_NET, ixgbe_deferred_mq_start,
   2562 	    txr);
   2563 #endif
   2564 	que->que_si = softint_establish(SOFTINT_NET, ixgbe_handle_que, que);
   2565 
   2566 	/* Tasklets for Link, SFP and Multispeed Fiber */
   2567 	adapter->link_si =
   2568 	    softint_establish(SOFTINT_NET, ixgbe_handle_link, adapter);
   2569 	adapter->mod_si =
   2570 	    softint_establish(SOFTINT_NET, ixgbe_handle_mod, adapter);
   2571 	adapter->msf_si =
   2572 	    softint_establish(SOFTINT_NET, ixgbe_handle_msf, adapter);
   2573 
   2574 #ifdef IXGBE_FDIR
   2575 	adapter->fdir_si =
   2576 	    softint_establish(SOFTINT_NET, ixgbe_reinit_fdir, adapter);
   2577 #endif
   2578 	if (que->que_si == NULL ||
   2579 	    adapter->link_si == NULL ||
   2580 	    adapter->mod_si == NULL ||
   2581 #ifdef IXGBE_FDIR
   2582 	    adapter->fdir_si == NULL ||
   2583 #endif
   2584 	    adapter->msf_si == NULL) {
   2585 		aprint_error_dev(dev,
   2586 		    "could not establish software interrupts\n");
   2587 		return ENXIO;
   2588 	}
   2589 
   2590 	/* For simplicity in the handlers */
   2591 	adapter->que_mask = IXGBE_EIMS_ENABLE_MASK;
   2592 
   2593 	return (0);
   2594 }
   2595 
   2596 
   2597 /*********************************************************************
   2598  *
   2599  *  Setup MSIX Interrupt resources and handlers
   2600  *
   2601  **********************************************************************/
   2602 static int
   2603 ixgbe_allocate_msix(struct adapter *adapter, const struct pci_attach_args *pa)
   2604 {
   2605 #if !defined(NETBSD_MSI_OR_MSIX)
   2606 	return 0;
   2607 #else
   2608 	device_t        dev = adapter->dev;
   2609 	struct 		ix_queue *que = adapter->queues;
   2610 	struct  	tx_ring *txr = adapter->tx_rings;
   2611 	pci_chipset_tag_t pc;
   2612 	char		intrbuf[PCI_INTRSTR_LEN];
   2613 	const char	*intrstr = NULL;
   2614 	int 		error, vector = 0;
   2615 	int		cpu_id = 0;
   2616 	kcpuset_t	*affinity;
   2617 
   2618 	pc = adapter->osdep.pc;
   2619 #ifdef	RSS
   2620 	cpuset_t cpu_mask;
   2621 	/*
   2622 	 * If we're doing RSS, the number of queues needs to
   2623 	 * match the number of RSS buckets that are configured.
   2624 	 *
   2625 	 * + If there's more queues than RSS buckets, we'll end
   2626 	 *   up with queues that get no traffic.
   2627 	 *
   2628 	 * + If there's more RSS buckets than queues, we'll end
   2629 	 *   up having multiple RSS buckets map to the same queue,
   2630 	 *   so there'll be some contention.
   2631 	 */
   2632 	if (adapter->num_queues != rss_getnumbuckets()) {
   2633 		device_printf(dev,
   2634 		    "%s: number of queues (%d) != number of RSS buckets (%d)"
   2635 		    "; performance will be impacted.\n",
   2636 		    __func__,
   2637 		    adapter->num_queues,
   2638 		    rss_getnumbuckets());
   2639 	}
   2640 #endif
   2641 
   2642 	adapter->osdep.nintrs = adapter->num_queues + 1;
   2643 	if (pci_msix_alloc_exact(pa, &adapter->osdep.intrs,
   2644 	    adapter->osdep.nintrs) != 0) {
   2645 		aprint_error_dev(dev,
   2646 		    "failed to allocate MSI-X interrupt\n");
   2647 		return (ENXIO);
   2648 	}
   2649 
   2650 	kcpuset_create(&affinity, false);
   2651 	for (int i = 0; i < adapter->num_queues; i++, vector++, que++, txr++) {
   2652 		intrstr = pci_intr_string(pc, adapter->osdep.intrs[i], intrbuf,
   2653 		    sizeof(intrbuf));
   2654 #ifdef IXG_MPSAFE
   2655 		pci_intr_setattr(pc, adapter->osdep.intrs[i], PCI_INTR_MPSAFE,
   2656 		    true);
   2657 #endif
   2658 		/* Set the handler function */
   2659 		que->res = adapter->osdep.ihs[i] = pci_intr_establish(pc,
   2660 		    adapter->osdep.intrs[i], IPL_NET, ixgbe_msix_que, que);
   2661 		if (que->res == NULL) {
   2662 			pci_intr_release(pc, adapter->osdep.intrs,
   2663 			    adapter->osdep.nintrs);
   2664 			aprint_error_dev(dev,
   2665 			    "Failed to register QUE handler\n");
   2666 			kcpuset_destroy(affinity);
   2667 			return ENXIO;
   2668 		}
   2669 		que->msix = vector;
   2670         	adapter->que_mask |= (u64)(1 << que->msix);
   2671 #ifdef	RSS
   2672 		 * The queue ID is used as the RSS layer bucket ID.
   2673 		 * We look up the queue ID -> RSS CPU ID and select
   2674 		 * that.
   2675 		 */
   2676 		cpu_id = rss_getcpu(i % rss_getnumbuckets());
   2677 #else
   2678 		/*
   2679 		 * Bind the msix vector, and thus the
   2680 		 * ring to the corresponding cpu.
   2681 		 *
   2682 		 * This just happens to match the default RSS round-robin
   2683 		 * bucket -> queue -> CPU allocation.
   2684 		 */
   2685 		if (adapter->num_queues > 1)
   2686 			cpu_id = i;
   2687 #endif
   2688 		/* Round-robin affinity */
   2689 		kcpuset_zero(affinity);
   2690 		kcpuset_set(affinity, cpu_id % ncpu);
   2691 		error = pci_intr_distribute(adapter->osdep.ihs[i], affinity,
   2692 		    NULL);
   2693 		aprint_normal_dev(dev, "for TX/RX, interrupting at %s",
   2694 		    intrstr);
   2695 		if (error == 0) {
   2696 #ifdef	RSS
   2697 			aprintf_normal(", bound RSS bucket %d to CPU %d\n",
   2698 			    i, cpu_id);
   2699 #else
   2700 			aprint_normal(", bound queue %d to cpu %d\n",
   2701 			    i, cpu_id);
   2702 #endif
   2703 		} else
   2704 			aprint_normal("\n");
   2705 
   2706 #ifndef IXGBE_LEGACY_TX
   2707 		txr->txq_si = softint_establish(SOFTINT_NET,
   2708 		    ixgbe_deferred_mq_start, txr);
   2709 #endif
   2710 		que->que_si = softint_establish(SOFTINT_NET, ixgbe_handle_que,
   2711 		    que);
   2712 		if (que->que_si == NULL) {
   2713 			aprint_error_dev(dev,
   2714 			    "could not establish software interrupt\n");
   2715 		}
   2716 	}
   2717 
   2718 	/* and Link */
   2719 	cpu_id++;
   2720 	intrstr = pci_intr_string(pc, adapter->osdep.intrs[vector], intrbuf,
   2721 	    sizeof(intrbuf));
   2722 #ifdef IXG_MPSAFE
   2723 	pci_intr_setattr(pc, &adapter->osdep.intrs[vector], PCI_INTR_MPSAFE,
   2724 	    true);
   2725 #endif
   2726 	/* Set the link handler function */
   2727 	adapter->osdep.ihs[vector] = pci_intr_establish(pc,
   2728 	    adapter->osdep.intrs[vector], IPL_NET, ixgbe_msix_link, adapter);
   2729 	if (adapter->osdep.ihs[vector] == NULL) {
   2730 		adapter->res = NULL;
   2731 		aprint_error_dev(dev, "Failed to register LINK handler\n");
   2732 		kcpuset_destroy(affinity);
   2733 		return (ENXIO);
   2734 	}
   2735 	/* Round-robin affinity */
   2736 	kcpuset_zero(affinity);
   2737 	kcpuset_set(affinity, cpu_id % ncpu);
   2738 	error = pci_intr_distribute(adapter->osdep.ihs[vector], affinity,NULL);
   2739 
   2740 	aprint_normal_dev(dev,
   2741 	    "for link, interrupting at %s", intrstr);
   2742 	if (error == 0)
   2743 		aprint_normal(", affinity to cpu %d\n", cpu_id);
   2744 	else
   2745 		aprint_normal("\n");
   2746 
   2747 	adapter->linkvec = vector;
   2748 	/* Tasklets for Link, SFP and Multispeed Fiber */
   2749 	adapter->link_si =
   2750 	    softint_establish(SOFTINT_NET, ixgbe_handle_link, adapter);
   2751 	adapter->mod_si =
   2752 	    softint_establish(SOFTINT_NET, ixgbe_handle_mod, adapter);
   2753 	adapter->msf_si =
   2754 	    softint_establish(SOFTINT_NET, ixgbe_handle_msf, adapter);
   2755 #ifdef IXGBE_FDIR
   2756 	adapter->fdir_si =
   2757 	    softint_establish(SOFTINT_NET, ixgbe_reinit_fdir, adapter);
   2758 #endif
   2759 
   2760 	kcpuset_destroy(affinity);
   2761 	return (0);
   2762 #endif
   2763 }
   2764 
   2765 /*
   2766  * Setup Either MSI/X or MSI
   2767  */
   2768 static int
   2769 ixgbe_setup_msix(struct adapter *adapter)
   2770 {
   2771 #if !defined(NETBSD_MSI_OR_MSIX)
   2772 	return 0;
   2773 #else
   2774 	device_t dev = adapter->dev;
   2775 	int want, queues, msgs;
   2776 
   2777 	/* Override by tuneable */
   2778 	if (ixgbe_enable_msix == 0)
   2779 		goto msi;
   2780 
   2781 	/* First try MSI/X */
   2782 	msgs = pci_msix_count(adapter->osdep.pc, adapter->osdep.tag);
   2783 	if (msgs < IXG_MSIX_NINTR)
   2784 		goto msi;
   2785 
   2786 	adapter->msix_mem = (void *)1; /* XXX */
   2787 
   2788 	/* Figure out a reasonable auto config value */
   2789 	queues = (ncpu > (msgs-1)) ? (msgs-1) : ncpu;
   2790 
   2791 	/* Override based on tuneable */
   2792 	if (ixgbe_num_queues != 0)
   2793 		queues = ixgbe_num_queues;
   2794 
   2795 #ifdef	RSS
   2796 	/* If we're doing RSS, clamp at the number of RSS buckets */
   2797 	if (queues > rss_getnumbuckets())
   2798 		queues = rss_getnumbuckets();
   2799 #endif
   2800 
   2801 	/* reflect correct sysctl value */
   2802 	ixgbe_num_queues = queues;
   2803 
   2804 	/*
   2805 	** Want one vector (RX/TX pair) per queue
   2806 	** plus an additional for Link.
   2807 	*/
   2808 	want = queues + 1;
   2809 	if (msgs >= want)
   2810 		msgs = want;
   2811 	else {
   2812                	aprint_error_dev(dev,
   2813 		    "MSIX Configuration Problem, "
   2814 		    "%d vectors but %d queues wanted!\n",
   2815 		    msgs, want);
   2816 		goto msi;
   2817 	}
   2818 	device_printf(dev,
   2819 	    "Using MSIX interrupts with %d vectors\n", msgs);
   2820 	adapter->num_queues = queues;
   2821 	return (msgs);
   2822 
   2823 	/*
   2824 	** If MSIX alloc failed or provided us with
   2825 	** less than needed, free and fall through to MSI
   2826 	*/
   2827 msi:
   2828        	msgs = pci_msi_count(adapter->osdep.pc, adapter->osdep.tag);
   2829 	adapter->msix_mem = NULL; /* XXX */
   2830        	msgs = 1;
   2831 	aprint_normal_dev(dev,"Using an MSI interrupt\n");
   2832 	return (msgs);
   2833 #endif
   2834 }
   2835 
   2836 
   2837 static int
   2838 ixgbe_allocate_pci_resources(struct adapter *adapter,
   2839     const struct pci_attach_args *pa)
   2840 {
   2841 	pcireg_t	memtype;
   2842 	device_t        dev = adapter->dev;
   2843 	bus_addr_t addr;
   2844 	int flags;
   2845 
   2846 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, PCI_BAR(0));
   2847 	switch (memtype) {
   2848 	case PCI_MAPREG_TYPE_MEM | PCI_MAPREG_MEM_TYPE_32BIT:
   2849 	case PCI_MAPREG_TYPE_MEM | PCI_MAPREG_MEM_TYPE_64BIT:
   2850 		adapter->osdep.mem_bus_space_tag = pa->pa_memt;
   2851 		if (pci_mapreg_info(pa->pa_pc, pa->pa_tag, PCI_BAR(0),
   2852 	              memtype, &addr, &adapter->osdep.mem_size, &flags) != 0)
   2853 			goto map_err;
   2854 		if ((flags & BUS_SPACE_MAP_PREFETCHABLE) != 0) {
   2855 			aprint_normal_dev(dev, "clearing prefetchable bit\n");
   2856 			flags &= ~BUS_SPACE_MAP_PREFETCHABLE;
   2857 		}
   2858 		if (bus_space_map(adapter->osdep.mem_bus_space_tag, addr,
   2859 		     adapter->osdep.mem_size, flags,
   2860 		     &adapter->osdep.mem_bus_space_handle) != 0) {
   2861 map_err:
   2862 			adapter->osdep.mem_size = 0;
   2863 			aprint_error_dev(dev, "unable to map BAR0\n");
   2864 			return ENXIO;
   2865 		}
   2866 		break;
   2867 	default:
   2868 		aprint_error_dev(dev, "unexpected type on BAR0\n");
   2869 		return ENXIO;
   2870 	}
   2871 
   2872 	/* Legacy defaults */
   2873 	adapter->num_queues = 1;
   2874 	adapter->hw.back = &adapter->osdep;
   2875 
   2876 	/*
   2877 	** Now setup MSI or MSI/X, should
   2878 	** return us the number of supported
   2879 	** vectors. (Will be 1 for MSI)
   2880 	*/
   2881 	adapter->msix = ixgbe_setup_msix(adapter);
   2882 	return (0);
   2883 }
   2884 
   2885 static void
   2886 ixgbe_free_pci_resources(struct adapter * adapter)
   2887 {
   2888 #if defined(NETBSD_MSI_OR_MSIX)
   2889 	struct 		ix_queue *que = adapter->queues;
   2890 #endif
   2891 	int		rid;
   2892 
   2893 #if defined(NETBSD_MSI_OR_MSIX)
   2894 	/*
   2895 	**  Release all msix queue resources:
   2896 	*/
   2897 	for (int i = 0; i < adapter->num_queues; i++, que++) {
   2898 		if (que->res != NULL)
   2899 			pci_intr_disestablish(adapter->osdep.pc,
   2900 			    adapter->osdep.ihs[i]);
   2901 	}
   2902 #endif
   2903 
   2904 	/* Clean the Legacy or Link interrupt last */
   2905 	if (adapter->linkvec) /* we are doing MSIX */
   2906 		rid = adapter->linkvec;
   2907 	else
   2908 		rid = 0;
   2909 
   2910 	if (adapter->osdep.ihs[rid] != NULL) {
   2911 		pci_intr_disestablish(adapter->osdep.pc,
   2912 		    adapter->osdep.ihs[rid]);
   2913 		adapter->osdep.ihs[rid] = NULL;
   2914 	}
   2915 
   2916 #if defined(NETBSD_MSI_OR_MSIX)
   2917 	pci_intr_release(adapter->osdep.pc, adapter->osdep.intrs,
   2918 	    adapter->osdep.nintrs);
   2919 #endif
   2920 
   2921 	if (adapter->osdep.mem_size != 0) {
   2922 		bus_space_unmap(adapter->osdep.mem_bus_space_tag,
   2923 		    adapter->osdep.mem_bus_space_handle,
   2924 		    adapter->osdep.mem_size);
   2925 	}
   2926 
   2927 	return;
   2928 }
   2929 
   2930 /*********************************************************************
   2931  *
   2932  *  Setup networking device structure and register an interface.
   2933  *
   2934  **********************************************************************/
   2935 static int
   2936 ixgbe_setup_interface(device_t dev, struct adapter *adapter)
   2937 {
   2938 	struct ethercom *ec = &adapter->osdep.ec;
   2939 	struct ixgbe_hw *hw = &adapter->hw;
   2940 	struct ifnet   *ifp;
   2941 
   2942 	INIT_DEBUGOUT("ixgbe_setup_interface: begin");
   2943 
   2944 	ifp = adapter->ifp = &ec->ec_if;
   2945 	strlcpy(ifp->if_xname, device_xname(dev), IFNAMSIZ);
   2946 	ifp->if_baudrate = IF_Gbps(10);
   2947 	ifp->if_init = ixgbe_init;
   2948 	ifp->if_stop = ixgbe_ifstop;
   2949 	ifp->if_softc = adapter;
   2950 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
   2951 	ifp->if_ioctl = ixgbe_ioctl;
   2952 #ifndef IXGBE_LEGACY_TX
   2953 	ifp->if_transmit = ixgbe_mq_start;
   2954 	ifp->if_qflush = ixgbe_qflush;
   2955 #else
   2956 	ifp->if_start = ixgbe_start;
   2957 	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 2);
   2958 #if 0
   2959 	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 2;
   2960 #endif
   2961 	IFQ_SET_READY(&ifp->if_snd);
   2962 #endif
   2963 
   2964 	if_attach(ifp);
   2965 	ether_ifattach(ifp, adapter->hw.mac.addr);
   2966 	ether_set_ifflags_cb(ec, ixgbe_ifflags_cb);
   2967 
   2968 	adapter->max_frame_size =
   2969 	    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
   2970 
   2971 	/*
   2972 	 * Tell the upper layer(s) we support long frames.
   2973 	 */
   2974 	ifp->if_hdrlen = sizeof(struct ether_vlan_header);
   2975 
   2976 	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSOv4 | IFCAP_TSOv6;
   2977 	ifp->if_capenable = 0;
   2978 
   2979 	ec->ec_capabilities |= ETHERCAP_VLAN_HWCSUM;
   2980 	ec->ec_capabilities |= ETHERCAP_JUMBO_MTU;
   2981 	ifp->if_capabilities |= IFCAP_LRO;
   2982 	ec->ec_capabilities |= ETHERCAP_VLAN_HWTAGGING
   2983 	    		    | ETHERCAP_VLAN_MTU;
   2984 	ec->ec_capenable = ec->ec_capabilities;
   2985 
   2986 	/*
   2987 	** Don't turn this on by default, if vlans are
   2988 	** created on another pseudo device (eg. lagg)
   2989 	** then vlan events are not passed thru, breaking
   2990 	** operation, but with HW FILTER off it works. If
   2991 	** using vlans directly on the ixgbe driver you can
   2992 	** enable this and get full hardware tag filtering.
   2993 	*/
   2994 	ec->ec_capabilities |= ETHERCAP_VLAN_HWFILTER;
   2995 
   2996 	/*
   2997 	 * Specify the media types supported by this adapter and register
   2998 	 * callbacks to update media and link information
   2999 	 */
   3000 	ifmedia_init(&adapter->media, IFM_IMASK, ixgbe_media_change,
   3001 		     ixgbe_media_status);
   3002 	ifmedia_add(&adapter->media, IFM_ETHER | adapter->optics, 0, NULL);
   3003 	ifmedia_set(&adapter->media, IFM_ETHER | adapter->optics);
   3004 	if (hw->device_id == IXGBE_DEV_ID_82598AT) {
   3005 		ifmedia_add(&adapter->media,
   3006 		    IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
   3007 		ifmedia_add(&adapter->media,
   3008 		    IFM_ETHER | IFM_1000_T, 0, NULL);
   3009 	}
   3010 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
   3011 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
   3012 
   3013 	return (0);
   3014 }
   3015 
   3016 static void
   3017 ixgbe_config_link(struct adapter *adapter)
   3018 {
   3019 	struct ixgbe_hw *hw = &adapter->hw;
   3020 	u32	autoneg, err = 0;
   3021 	bool	sfp, negotiate;
   3022 
   3023 	sfp = ixgbe_is_sfp(hw);
   3024 
   3025 	if (sfp) {
   3026 		void *ip;
   3027 
   3028 		if (hw->phy.multispeed_fiber) {
   3029 			hw->mac.ops.setup_sfp(hw);
   3030 			ixgbe_enable_tx_laser(hw);
   3031 			ip = adapter->msf_si;
   3032 		} else {
   3033 			ip = adapter->mod_si;
   3034 		}
   3035 
   3036 		kpreempt_disable();
   3037 		softint_schedule(ip);
   3038 		kpreempt_enable();
   3039 	} else {
   3040 		if (hw->mac.ops.check_link)
   3041 			err = ixgbe_check_link(hw, &adapter->link_speed,
   3042 			    &adapter->link_up, FALSE);
   3043 		if (err)
   3044 			goto out;
   3045 		autoneg = hw->phy.autoneg_advertised;
   3046 		if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
   3047                 	err  = hw->mac.ops.get_link_capabilities(hw,
   3048 			    &autoneg, &negotiate);
   3049 		else
   3050 			negotiate = 0;
   3051 		if (err)
   3052 			goto out;
   3053 		if (hw->mac.ops.setup_link)
   3054                 	err = hw->mac.ops.setup_link(hw,
   3055 			    autoneg, adapter->link_up);
   3056 	}
   3057 out:
   3058 	return;
   3059 }
   3060 
   3061 /********************************************************************
   3062  * Manage DMA'able memory.
   3063  *******************************************************************/
   3064 
   3065 static int
   3066 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
   3067 		struct ixgbe_dma_alloc *dma, const int mapflags)
   3068 {
   3069 	device_t dev = adapter->dev;
   3070 	int             r, rsegs;
   3071 
   3072 	r = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
   3073 			       DBA_ALIGN, 0,	/* alignment, bounds */
   3074 			       size,	/* maxsize */
   3075 			       1,	/* nsegments */
   3076 			       size,	/* maxsegsize */
   3077 			       BUS_DMA_ALLOCNOW,	/* flags */
   3078 			       &dma->dma_tag);
   3079 	if (r != 0) {
   3080 		aprint_error_dev(dev,
   3081 		    "%s: ixgbe_dma_tag_create failed; error %d\n", __func__, r);
   3082 		goto fail_0;
   3083 	}
   3084 
   3085 	r = bus_dmamem_alloc(dma->dma_tag->dt_dmat,
   3086 		size,
   3087 		dma->dma_tag->dt_alignment,
   3088 		dma->dma_tag->dt_boundary,
   3089 		&dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
   3090 	if (r != 0) {
   3091 		aprint_error_dev(dev,
   3092 		    "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
   3093 		goto fail_1;
   3094 	}
   3095 
   3096 	r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
   3097 	    size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
   3098 	if (r != 0) {
   3099 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   3100 		    __func__, r);
   3101 		goto fail_2;
   3102 	}
   3103 
   3104 	r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
   3105 	if (r != 0) {
   3106 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   3107 		    __func__, r);
   3108 		goto fail_3;
   3109 	}
   3110 
   3111 	r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map, dma->dma_vaddr,
   3112 			    size,
   3113 			    NULL,
   3114 			    mapflags | BUS_DMA_NOWAIT);
   3115 	if (r != 0) {
   3116 		aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
   3117 		    __func__, r);
   3118 		goto fail_4;
   3119 	}
   3120 	dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
   3121 	dma->dma_size = size;
   3122 	return 0;
   3123 fail_4:
   3124 	ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
   3125 fail_3:
   3126 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
   3127 fail_2:
   3128 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
   3129 fail_1:
   3130 	ixgbe_dma_tag_destroy(dma->dma_tag);
   3131 fail_0:
   3132 	return r;
   3133 }
   3134 
   3135 static void
   3136 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
   3137 {
   3138 	bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
   3139 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   3140 	ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
   3141 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
   3142 	ixgbe_dma_tag_destroy(dma->dma_tag);
   3143 }
   3144 
   3145 
   3146 /*********************************************************************
   3147  *
   3148  *  Allocate memory for the transmit and receive rings, and then
   3149  *  the descriptors associated with each, called only once at attach.
   3150  *
   3151  **********************************************************************/
   3152 static int
   3153 ixgbe_allocate_queues(struct adapter *adapter)
   3154 {
   3155 	device_t	dev = adapter->dev;
   3156 	struct ix_queue	*que;
   3157 	struct tx_ring	*txr;
   3158 	struct rx_ring	*rxr;
   3159 	int rsize, tsize, error = IXGBE_SUCCESS;
   3160 	int txconf = 0, rxconf = 0;
   3161 
   3162         /* First allocate the top level queue structs */
   3163         if (!(adapter->queues =
   3164             (struct ix_queue *) malloc(sizeof(struct ix_queue) *
   3165             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   3166                 aprint_error_dev(dev, "Unable to allocate queue memory\n");
   3167                 error = ENOMEM;
   3168                 goto fail;
   3169         }
   3170 
   3171 	/* First allocate the TX ring struct memory */
   3172 	if (!(adapter->tx_rings =
   3173 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
   3174 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   3175 		aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
   3176 		error = ENOMEM;
   3177 		goto tx_fail;
   3178 	}
   3179 
   3180 	/* Next allocate the RX */
   3181 	if (!(adapter->rx_rings =
   3182 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
   3183 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   3184 		aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
   3185 		error = ENOMEM;
   3186 		goto rx_fail;
   3187 	}
   3188 
   3189 	/* For the ring itself */
   3190 	tsize = roundup2(adapter->num_tx_desc *
   3191 	    sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
   3192 
   3193 	/*
   3194 	 * Now set up the TX queues, txconf is needed to handle the
   3195 	 * possibility that things fail midcourse and we need to
   3196 	 * undo memory gracefully
   3197 	 */
   3198 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
   3199 		/* Set up some basics */
   3200 		txr = &adapter->tx_rings[i];
   3201 		txr->adapter = adapter;
   3202 		txr->me = i;
   3203 		txr->num_desc = adapter->num_tx_desc;
   3204 
   3205 		/* Initialize the TX side lock */
   3206 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
   3207 		    device_xname(dev), txr->me);
   3208 		mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
   3209 
   3210 		if (ixgbe_dma_malloc(adapter, tsize,
   3211 			&txr->txdma, BUS_DMA_NOWAIT)) {
   3212 			aprint_error_dev(dev,
   3213 			    "Unable to allocate TX Descriptor memory\n");
   3214 			error = ENOMEM;
   3215 			goto err_tx_desc;
   3216 		}
   3217 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
   3218 		bzero((void *)txr->tx_base, tsize);
   3219 
   3220         	/* Now allocate transmit buffers for the ring */
   3221         	if (ixgbe_allocate_transmit_buffers(txr)) {
   3222 			aprint_error_dev(dev,
   3223 			    "Critical Failure setting up transmit buffers\n");
   3224 			error = ENOMEM;
   3225 			goto err_tx_desc;
   3226         	}
   3227 #ifndef IXGBE_LEGACY_TX
   3228 		/* Allocate a buf ring */
   3229 		txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
   3230 		    M_WAITOK, &txr->tx_mtx);
   3231 		if (txr->br == NULL) {
   3232 			aprint_error_dev(dev,
   3233 			    "Critical Failure setting up buf ring\n");
   3234 			error = ENOMEM;
   3235 			goto err_tx_desc;
   3236         	}
   3237 #endif
   3238 	}
   3239 
   3240 	/*
   3241 	 * Next the RX queues...
   3242 	 */
   3243 	rsize = roundup2(adapter->num_rx_desc *
   3244 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   3245 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
   3246 		rxr = &adapter->rx_rings[i];
   3247 		/* Set up some basics */
   3248 		rxr->adapter = adapter;
   3249 		rxr->me = i;
   3250 		rxr->num_desc = adapter->num_rx_desc;
   3251 
   3252 		/* Initialize the RX side lock */
   3253 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
   3254 		    device_xname(dev), rxr->me);
   3255 		mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
   3256 
   3257 		if (ixgbe_dma_malloc(adapter, rsize,
   3258 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
   3259 			aprint_error_dev(dev,
   3260 			    "Unable to allocate RxDescriptor memory\n");
   3261 			error = ENOMEM;
   3262 			goto err_rx_desc;
   3263 		}
   3264 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
   3265 		bzero((void *)rxr->rx_base, rsize);
   3266 
   3267         	/* Allocate receive buffers for the ring*/
   3268 		if (ixgbe_allocate_receive_buffers(rxr)) {
   3269 			aprint_error_dev(dev,
   3270 			    "Critical Failure setting up receive buffers\n");
   3271 			error = ENOMEM;
   3272 			goto err_rx_desc;
   3273 		}
   3274 	}
   3275 
   3276 	/*
   3277 	** Finally set up the queue holding structs
   3278 	*/
   3279 	for (int i = 0; i < adapter->num_queues; i++) {
   3280 		que = &adapter->queues[i];
   3281 		que->adapter = adapter;
   3282 		que->txr = &adapter->tx_rings[i];
   3283 		que->rxr = &adapter->rx_rings[i];
   3284 	}
   3285 
   3286 	return (0);
   3287 
   3288 err_rx_desc:
   3289 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
   3290 		ixgbe_dma_free(adapter, &rxr->rxdma);
   3291 err_tx_desc:
   3292 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
   3293 		ixgbe_dma_free(adapter, &txr->txdma);
   3294 	free(adapter->rx_rings, M_DEVBUF);
   3295 rx_fail:
   3296 	free(adapter->tx_rings, M_DEVBUF);
   3297 tx_fail:
   3298 	free(adapter->queues, M_DEVBUF);
   3299 fail:
   3300 	return (error);
   3301 }
   3302 
   3303 /*********************************************************************
   3304  *
   3305  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
   3306  *  the information needed to transmit a packet on the wire. This is
   3307  *  called only once at attach, setup is done every reset.
   3308  *
   3309  **********************************************************************/
   3310 static int
   3311 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
   3312 {
   3313 	struct adapter *adapter = txr->adapter;
   3314 	device_t dev = adapter->dev;
   3315 	struct ixgbe_tx_buf *txbuf;
   3316 	int error, i;
   3317 
   3318 	/*
   3319 	 * Setup DMA descriptor areas.
   3320 	 */
   3321 	if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
   3322 			       1, 0,		/* alignment, bounds */
   3323 			       IXGBE_TSO_SIZE,		/* maxsize */
   3324 			       adapter->num_segs,	/* nsegments */
   3325 			       PAGE_SIZE,		/* maxsegsize */
   3326 			       0,			/* flags */
   3327 			       &txr->txtag))) {
   3328 		aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
   3329 		goto fail;
   3330 	}
   3331 
   3332 	if (!(txr->tx_buffers =
   3333 	    (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
   3334 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   3335 		aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
   3336 		error = ENOMEM;
   3337 		goto fail;
   3338 	}
   3339 
   3340         /* Create the descriptor buffer dma maps */
   3341 	txbuf = txr->tx_buffers;
   3342 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
   3343 		error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
   3344 		if (error != 0) {
   3345 			aprint_error_dev(dev,
   3346 			    "Unable to create TX DMA map (%d)\n", error);
   3347 			goto fail;
   3348 		}
   3349 	}
   3350 
   3351 	return 0;
   3352 fail:
   3353 	/* We free all, it handles case where we are in the middle */
   3354 	ixgbe_free_transmit_structures(adapter);
   3355 	return (error);
   3356 }
   3357 
   3358 /*********************************************************************
   3359  *
   3360  *  Initialize a transmit ring.
   3361  *
   3362  **********************************************************************/
   3363 static void
   3364 ixgbe_setup_transmit_ring(struct tx_ring *txr)
   3365 {
   3366 	struct adapter *adapter = txr->adapter;
   3367 	struct ixgbe_tx_buf *txbuf;
   3368 	int i;
   3369 #ifdef DEV_NETMAP
   3370 	struct netmap_adapter *na = NA(adapter->ifp);
   3371 	struct netmap_slot *slot;
   3372 #endif /* DEV_NETMAP */
   3373 
   3374 	/* Clear the old ring contents */
   3375 	IXGBE_TX_LOCK(txr);
   3376 #ifdef DEV_NETMAP
   3377 	/*
   3378 	 * (under lock): if in netmap mode, do some consistency
   3379 	 * checks and set slot to entry 0 of the netmap ring.
   3380 	 */
   3381 	slot = netmap_reset(na, NR_TX, txr->me, 0);
   3382 #endif /* DEV_NETMAP */
   3383 	bzero((void *)txr->tx_base,
   3384 	      (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
   3385 	/* Reset indices */
   3386 	txr->next_avail_desc = 0;
   3387 	txr->next_to_clean = 0;
   3388 
   3389 	/* Free any existing tx buffers. */
   3390         txbuf = txr->tx_buffers;
   3391 	for (i = 0; i < txr->num_desc; i++, txbuf++) {
   3392 		if (txbuf->m_head != NULL) {
   3393 			bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
   3394 			    0, txbuf->m_head->m_pkthdr.len,
   3395 			    BUS_DMASYNC_POSTWRITE);
   3396 			ixgbe_dmamap_unload(txr->txtag, txbuf->map);
   3397 			m_freem(txbuf->m_head);
   3398 			txbuf->m_head = NULL;
   3399 		}
   3400 #ifdef DEV_NETMAP
   3401 		/*
   3402 		 * In netmap mode, set the map for the packet buffer.
   3403 		 * NOTE: Some drivers (not this one) also need to set
   3404 		 * the physical buffer address in the NIC ring.
   3405 		 * Slots in the netmap ring (indexed by "si") are
   3406 		 * kring->nkr_hwofs positions "ahead" wrt the
   3407 		 * corresponding slot in the NIC ring. In some drivers
   3408 		 * (not here) nkr_hwofs can be negative. Function
   3409 		 * netmap_idx_n2k() handles wraparounds properly.
   3410 		 */
   3411 		if (slot) {
   3412 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
   3413 			netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
   3414 		}
   3415 #endif /* DEV_NETMAP */
   3416 		/* Clear the EOP descriptor pointer */
   3417 		txbuf->eop = NULL;
   3418         }
   3419 
   3420 #ifdef IXGBE_FDIR
   3421 	/* Set the rate at which we sample packets */
   3422 	if (adapter->hw.mac.type != ixgbe_mac_82598EB)
   3423 		txr->atr_sample = atr_sample_rate;
   3424 #endif
   3425 
   3426 	/* Set number of descriptors available */
   3427 	txr->tx_avail = adapter->num_tx_desc;
   3428 
   3429 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   3430 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   3431 	IXGBE_TX_UNLOCK(txr);
   3432 }
   3433 
   3434 /*********************************************************************
   3435  *
   3436  *  Initialize all transmit rings.
   3437  *
   3438  **********************************************************************/
   3439 static int
   3440 ixgbe_setup_transmit_structures(struct adapter *adapter)
   3441 {
   3442 	struct tx_ring *txr = adapter->tx_rings;
   3443 
   3444 	for (int i = 0; i < adapter->num_queues; i++, txr++)
   3445 		ixgbe_setup_transmit_ring(txr);
   3446 
   3447 	return (0);
   3448 }
   3449 
   3450 /*********************************************************************
   3451  *
   3452  *  Enable transmit unit.
   3453  *
   3454  **********************************************************************/
   3455 static void
   3456 ixgbe_initialize_transmit_units(struct adapter *adapter)
   3457 {
   3458 	struct tx_ring	*txr = adapter->tx_rings;
   3459 	struct ixgbe_hw	*hw = &adapter->hw;
   3460 
   3461 	/* Setup the Base and Length of the Tx Descriptor Ring */
   3462 
   3463 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
   3464 		u64	tdba = txr->txdma.dma_paddr;
   3465 		u32	txctrl;
   3466 
   3467 		IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i),
   3468 		       (tdba & 0x00000000ffffffffULL));
   3469 		IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (tdba >> 32));
   3470 		IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
   3471 		    adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc));
   3472 
   3473 		/* Setup the HW Tx Head and Tail descriptor pointers */
   3474 		IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
   3475 		IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
   3476 
   3477 		/* Setup Transmit Descriptor Cmd Settings */
   3478 		txr->txd_cmd = IXGBE_TXD_CMD_IFCS;
   3479 		txr->queue_status = IXGBE_QUEUE_IDLE;
   3480 
   3481 		/* Set the processing limit */
   3482 		txr->process_limit = ixgbe_tx_process_limit;
   3483 
   3484 		/* Disable Head Writeback */
   3485 		switch (hw->mac.type) {
   3486 		case ixgbe_mac_82598EB:
   3487 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
   3488 			break;
   3489 		case ixgbe_mac_82599EB:
   3490 		case ixgbe_mac_X540:
   3491 		default:
   3492 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
   3493 			break;
   3494                 }
   3495 		txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
   3496 		switch (hw->mac.type) {
   3497 		case ixgbe_mac_82598EB:
   3498 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
   3499 			break;
   3500 		case ixgbe_mac_82599EB:
   3501 		case ixgbe_mac_X540:
   3502 		default:
   3503 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
   3504 			break;
   3505 		}
   3506 
   3507 	}
   3508 
   3509 	if (hw->mac.type != ixgbe_mac_82598EB) {
   3510 		u32 dmatxctl, rttdcs;
   3511 		dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
   3512 		dmatxctl |= IXGBE_DMATXCTL_TE;
   3513 		IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
   3514 		/* Disable arbiter to set MTQC */
   3515 		rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
   3516 		rttdcs |= IXGBE_RTTDCS_ARBDIS;
   3517 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
   3518 		IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
   3519 		rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
   3520 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
   3521 	}
   3522 
   3523 	return;
   3524 }
   3525 
   3526 /*********************************************************************
   3527  *
   3528  *  Free all transmit rings.
   3529  *
   3530  **********************************************************************/
   3531 static void
   3532 ixgbe_free_transmit_structures(struct adapter *adapter)
   3533 {
   3534 	struct tx_ring *txr = adapter->tx_rings;
   3535 
   3536 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
   3537 		ixgbe_free_transmit_buffers(txr);
   3538 		ixgbe_dma_free(adapter, &txr->txdma);
   3539 		IXGBE_TX_LOCK_DESTROY(txr);
   3540 	}
   3541 	free(adapter->tx_rings, M_DEVBUF);
   3542 }
   3543 
   3544 /*********************************************************************
   3545  *
   3546  *  Free transmit ring related data structures.
   3547  *
   3548  **********************************************************************/
   3549 static void
   3550 ixgbe_free_transmit_buffers(struct tx_ring *txr)
   3551 {
   3552 	struct adapter *adapter = txr->adapter;
   3553 	struct ixgbe_tx_buf *tx_buffer;
   3554 	int             i;
   3555 
   3556 	INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
   3557 
   3558 	if (txr->tx_buffers == NULL)
   3559 		return;
   3560 
   3561 	tx_buffer = txr->tx_buffers;
   3562 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
   3563 		if (tx_buffer->m_head != NULL) {
   3564 			bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
   3565 			    0, tx_buffer->m_head->m_pkthdr.len,
   3566 			    BUS_DMASYNC_POSTWRITE);
   3567 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
   3568 			m_freem(tx_buffer->m_head);
   3569 			tx_buffer->m_head = NULL;
   3570 			if (tx_buffer->map != NULL) {
   3571 				ixgbe_dmamap_destroy(txr->txtag,
   3572 				    tx_buffer->map);
   3573 				tx_buffer->map = NULL;
   3574 			}
   3575 		} else if (tx_buffer->map != NULL) {
   3576 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
   3577 			ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
   3578 			tx_buffer->map = NULL;
   3579 		}
   3580 	}
   3581 #ifndef IXGBE_LEGACY_TX
   3582 	if (txr->br != NULL)
   3583 		buf_ring_free(txr->br, M_DEVBUF);
   3584 #endif
   3585 	if (txr->tx_buffers != NULL) {
   3586 		free(txr->tx_buffers, M_DEVBUF);
   3587 		txr->tx_buffers = NULL;
   3588 	}
   3589 	if (txr->txtag != NULL) {
   3590 		ixgbe_dma_tag_destroy(txr->txtag);
   3591 		txr->txtag = NULL;
   3592 	}
   3593 	return;
   3594 }
   3595 
   3596 /*********************************************************************
   3597  *
   3598  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
   3599  *
   3600  **********************************************************************/
   3601 
   3602 static int
   3603 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
   3604     u32 *cmd_type_len, u32 *olinfo_status)
   3605 {
   3606 	struct m_tag *mtag;
   3607 	struct adapter *adapter = txr->adapter;
   3608 	struct ethercom *ec = &adapter->osdep.ec;
   3609 	struct ixgbe_adv_tx_context_desc *TXD;
   3610 	struct ether_vlan_header *eh;
   3611 	struct ip ip;
   3612 	struct ip6_hdr ip6;
   3613 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
   3614 	int	ehdrlen, ip_hlen = 0;
   3615 	u16	etype;
   3616 	u8	ipproto __diagused = 0;
   3617 	int	offload = TRUE;
   3618 	int	ctxd = txr->next_avail_desc;
   3619 	u16	vtag = 0;
   3620 
   3621 	/* First check if TSO is to be used */
   3622 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4|M_CSUM_TSOv6))
   3623 		return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
   3624 
   3625 	if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
   3626 		offload = FALSE;
   3627 
   3628 	/* Indicate the whole packet as payload when not doing TSO */
   3629        	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
   3630 
   3631 	/* Now ready a context descriptor */
   3632 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
   3633 
   3634 	/*
   3635 	** In advanced descriptors the vlan tag must
   3636 	** be placed into the context descriptor. Hence
   3637 	** we need to make one even if not doing offloads.
   3638 	*/
   3639 	if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
   3640 		vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
   3641 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
   3642 	} else if (offload == FALSE) /* ... no offload to do */
   3643 		return 0;
   3644 
   3645 	/*
   3646 	 * Determine where frame payload starts.
   3647 	 * Jump over vlan headers if already present,
   3648 	 * helpful for QinQ too.
   3649 	 */
   3650 	KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
   3651 	eh = mtod(mp, struct ether_vlan_header *);
   3652 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   3653 		KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
   3654 		etype = ntohs(eh->evl_proto);
   3655 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   3656 	} else {
   3657 		etype = ntohs(eh->evl_encap_proto);
   3658 		ehdrlen = ETHER_HDR_LEN;
   3659 	}
   3660 
   3661 	/* Set the ether header length */
   3662 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
   3663 
   3664 	switch (etype) {
   3665 	case ETHERTYPE_IP:
   3666 		m_copydata(mp, ehdrlen, sizeof(ip), &ip);
   3667 		ip_hlen = ip.ip_hl << 2;
   3668 		ipproto = ip.ip_p;
   3669 #if 0
   3670 		ip.ip_sum = 0;
   3671 		m_copyback(mp, ehdrlen, sizeof(ip), &ip);
   3672 #else
   3673 		KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
   3674 		    ip.ip_sum == 0);
   3675 #endif
   3676 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
   3677 		break;
   3678 	case ETHERTYPE_IPV6:
   3679 		m_copydata(mp, ehdrlen, sizeof(ip6), &ip6);
   3680 		ip_hlen = sizeof(ip6);
   3681 		/* XXX-BZ this will go badly in case of ext hdrs. */
   3682 		ipproto = ip6.ip6_nxt;
   3683 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
   3684 		break;
   3685 	default:
   3686 		break;
   3687 	}
   3688 
   3689 	if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
   3690 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
   3691 
   3692 	vlan_macip_lens |= ip_hlen;
   3693 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
   3694 
   3695 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_TCPv6)) {
   3696 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
   3697 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   3698 		KASSERT(ipproto == IPPROTO_TCP);
   3699 	} else if (mp->m_pkthdr.csum_flags & (M_CSUM_UDPv4|M_CSUM_UDPv6)) {
   3700 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
   3701 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   3702 		KASSERT(ipproto == IPPROTO_UDP);
   3703 	}
   3704 
   3705 	/* Now copy bits into descriptor */
   3706 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
   3707 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
   3708 	TXD->seqnum_seed = htole32(0);
   3709 	TXD->mss_l4len_idx = htole32(0);
   3710 
   3711 	/* We've consumed the first desc, adjust counters */
   3712 	if (++ctxd == txr->num_desc)
   3713 		ctxd = 0;
   3714 	txr->next_avail_desc = ctxd;
   3715 	--txr->tx_avail;
   3716 
   3717         return 0;
   3718 }
   3719 
   3720 /**********************************************************************
   3721  *
   3722  *  Setup work for hardware segmentation offload (TSO) on
   3723  *  adapters using advanced tx descriptors
   3724  *
   3725  **********************************************************************/
   3726 static int
   3727 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
   3728     u32 *cmd_type_len, u32 *olinfo_status)
   3729 {
   3730 	struct m_tag *mtag;
   3731 	struct adapter *adapter = txr->adapter;
   3732 	struct ethercom *ec = &adapter->osdep.ec;
   3733 	struct ixgbe_adv_tx_context_desc *TXD;
   3734 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
   3735 	u32 mss_l4len_idx = 0, paylen;
   3736 	u16 vtag = 0, eh_type;
   3737 	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
   3738 	struct ether_vlan_header *eh;
   3739 #ifdef INET6
   3740 	struct ip6_hdr *ip6;
   3741 #endif
   3742 #ifdef INET
   3743 	struct ip *ip;
   3744 #endif
   3745 	struct tcphdr *th;
   3746 
   3747 
   3748 	/*
   3749 	 * Determine where frame payload starts.
   3750 	 * Jump over vlan headers if already present
   3751 	 */
   3752 	eh = mtod(mp, struct ether_vlan_header *);
   3753 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   3754 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   3755 		eh_type = eh->evl_proto;
   3756 	} else {
   3757 		ehdrlen = ETHER_HDR_LEN;
   3758 		eh_type = eh->evl_encap_proto;
   3759 	}
   3760 
   3761 	switch (ntohs(eh_type)) {
   3762 #ifdef INET6
   3763 	case ETHERTYPE_IPV6:
   3764 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   3765 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
   3766 		if (ip6->ip6_nxt != IPPROTO_TCP)
   3767 			return (ENXIO);
   3768 		ip_hlen = sizeof(struct ip6_hdr);
   3769 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   3770 		th = (struct tcphdr *)((char *)ip6 + ip_hlen);
   3771 		th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
   3772 		    &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
   3773 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
   3774 		break;
   3775 #endif
   3776 #ifdef INET
   3777 	case ETHERTYPE_IP:
   3778 		ip = (struct ip *)(mp->m_data + ehdrlen);
   3779 		if (ip->ip_p != IPPROTO_TCP)
   3780 			return (ENXIO);
   3781 		ip->ip_sum = 0;
   3782 		ip_hlen = ip->ip_hl << 2;
   3783 		th = (struct tcphdr *)((char *)ip + ip_hlen);
   3784 		th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
   3785 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
   3786 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
   3787 		/* Tell transmit desc to also do IPv4 checksum. */
   3788 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
   3789 		break;
   3790 #endif
   3791 	default:
   3792 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
   3793 		    __func__, ntohs(eh_type));
   3794 		break;
   3795 	}
   3796 
   3797 	ctxd = txr->next_avail_desc;
   3798 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
   3799 
   3800 	tcp_hlen = th->th_off << 2;
   3801 
   3802 	/* This is used in the transmit desc in encap */
   3803 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
   3804 
   3805 	/* VLAN MACLEN IPLEN */
   3806 	if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
   3807 		vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
   3808                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
   3809 	}
   3810 
   3811 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
   3812 	vlan_macip_lens |= ip_hlen;
   3813 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
   3814 
   3815 	/* ADV DTYPE TUCMD */
   3816 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
   3817 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
   3818 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
   3819 
   3820 	/* MSS L4LEN IDX */
   3821 	mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
   3822 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
   3823 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
   3824 
   3825 	TXD->seqnum_seed = htole32(0);
   3826 
   3827 	if (++ctxd == txr->num_desc)
   3828 		ctxd = 0;
   3829 
   3830 	txr->tx_avail--;
   3831 	txr->next_avail_desc = ctxd;
   3832 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
   3833 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   3834 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
   3835 	++txr->tso_tx.ev_count;
   3836 	return (0);
   3837 }
   3838 
   3839 #ifdef IXGBE_FDIR
   3840 /*
   3841 ** This routine parses packet headers so that Flow
   3842 ** Director can make a hashed filter table entry
   3843 ** allowing traffic flows to be identified and kept
   3844 ** on the same cpu.  This would be a performance
   3845 ** hit, but we only do it at IXGBE_FDIR_RATE of
   3846 ** packets.
   3847 */
   3848 static void
   3849 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
   3850 {
   3851 	struct adapter			*adapter = txr->adapter;
   3852 	struct ix_queue			*que;
   3853 	struct ip			*ip;
   3854 	struct tcphdr			*th;
   3855 	struct udphdr			*uh;
   3856 	struct ether_vlan_header	*eh;
   3857 	union ixgbe_atr_hash_dword	input = {.dword = 0};
   3858 	union ixgbe_atr_hash_dword	common = {.dword = 0};
   3859 	int  				ehdrlen, ip_hlen;
   3860 	u16				etype;
   3861 
   3862 	eh = mtod(mp, struct ether_vlan_header *);
   3863 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   3864 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   3865 		etype = eh->evl_proto;
   3866 	} else {
   3867 		ehdrlen = ETHER_HDR_LEN;
   3868 		etype = eh->evl_encap_proto;
   3869 	}
   3870 
   3871 	/* Only handling IPv4 */
   3872 	if (etype != htons(ETHERTYPE_IP))
   3873 		return;
   3874 
   3875 	ip = (struct ip *)(mp->m_data + ehdrlen);
   3876 	ip_hlen = ip->ip_hl << 2;
   3877 
   3878 	/* check if we're UDP or TCP */
   3879 	switch (ip->ip_p) {
   3880 	case IPPROTO_TCP:
   3881 		th = (struct tcphdr *)((char *)ip + ip_hlen);
   3882 		/* src and dst are inverted */
   3883 		common.port.dst ^= th->th_sport;
   3884 		common.port.src ^= th->th_dport;
   3885 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
   3886 		break;
   3887 	case IPPROTO_UDP:
   3888 		uh = (struct udphdr *)((char *)ip + ip_hlen);
   3889 		/* src and dst are inverted */
   3890 		common.port.dst ^= uh->uh_sport;
   3891 		common.port.src ^= uh->uh_dport;
   3892 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
   3893 		break;
   3894 	default:
   3895 		return;
   3896 	}
   3897 
   3898 	input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
   3899 	if (mp->m_pkthdr.ether_vtag)
   3900 		common.flex_bytes ^= htons(ETHERTYPE_VLAN);
   3901 	else
   3902 		common.flex_bytes ^= etype;
   3903 	common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
   3904 
   3905 	que = &adapter->queues[txr->me];
   3906 	/*
   3907 	** This assumes the Rx queue and Tx
   3908 	** queue are bound to the same CPU
   3909 	*/
   3910 	ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
   3911 	    input, common, que->msix);
   3912 }
   3913 #endif /* IXGBE_FDIR */
   3914 
   3915 /**********************************************************************
   3916  *
   3917  *  Examine each tx_buffer in the used queue. If the hardware is done
   3918  *  processing the packet then free associated resources. The
   3919  *  tx_buffer is put back on the free queue.
   3920  *
   3921  **********************************************************************/
   3922 static void
   3923 ixgbe_txeof(struct tx_ring *txr)
   3924 {
   3925 	struct adapter		*adapter = txr->adapter;
   3926 	struct ifnet		*ifp = adapter->ifp;
   3927 	u32			work, processed = 0;
   3928 	u16			limit = txr->process_limit;
   3929 	struct ixgbe_tx_buf	*buf;
   3930 	union ixgbe_adv_tx_desc *txd;
   3931 	struct timeval now, elapsed;
   3932 
   3933 	KASSERT(mutex_owned(&txr->tx_mtx));
   3934 
   3935 #ifdef DEV_NETMAP
   3936 	if (ifp->if_capenable & IFCAP_NETMAP) {
   3937 		struct netmap_adapter *na = NA(ifp);
   3938 		struct netmap_kring *kring = &na->tx_rings[txr->me];
   3939 		txd = txr->tx_base;
   3940 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   3941 		    BUS_DMASYNC_POSTREAD);
   3942 		/*
   3943 		 * In netmap mode, all the work is done in the context
   3944 		 * of the client thread. Interrupt handlers only wake up
   3945 		 * clients, which may be sleeping on individual rings
   3946 		 * or on a global resource for all rings.
   3947 		 * To implement tx interrupt mitigation, we wake up the client
   3948 		 * thread roughly every half ring, even if the NIC interrupts
   3949 		 * more frequently. This is implemented as follows:
   3950 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
   3951 		 *   the slot that should wake up the thread (nkr_num_slots
   3952 		 *   means the user thread should not be woken up);
   3953 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
   3954 		 *   or the slot has the DD bit set.
   3955 		 */
   3956 		if (!netmap_mitigate ||
   3957 		    (kring->nr_kflags < kring->nkr_num_slots &&
   3958 		    txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
   3959 			netmap_tx_irq(ifp, txr->me);
   3960 		}
   3961 		return;
   3962 	}
   3963 #endif /* DEV_NETMAP */
   3964 
   3965 	if (txr->tx_avail == txr->num_desc) {
   3966 		txr->queue_status = IXGBE_QUEUE_IDLE;
   3967 		return;
   3968 	}
   3969 
   3970 	/* Get work starting point */
   3971 	work = txr->next_to_clean;
   3972 	buf = &txr->tx_buffers[work];
   3973 	txd = &txr->tx_base[work];
   3974 	work -= txr->num_desc; /* The distance to ring end */
   3975         ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   3976 	    BUS_DMASYNC_POSTREAD);
   3977 	do {
   3978 		union ixgbe_adv_tx_desc *eop= buf->eop;
   3979 		if (eop == NULL) /* No work */
   3980 			break;
   3981 
   3982 		if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
   3983 			break;	/* I/O not complete */
   3984 
   3985 		if (buf->m_head) {
   3986 			txr->bytes +=
   3987 			    buf->m_head->m_pkthdr.len;
   3988 			bus_dmamap_sync(txr->txtag->dt_dmat,
   3989 			    buf->map,
   3990 			    0, buf->m_head->m_pkthdr.len,
   3991 			    BUS_DMASYNC_POSTWRITE);
   3992 			ixgbe_dmamap_unload(txr->txtag,
   3993 			    buf->map);
   3994 			m_freem(buf->m_head);
   3995 			buf->m_head = NULL;
   3996 			/*
   3997 			 * NetBSD: Don't override buf->map with NULL here.
   3998 			 * It'll panic when a ring runs one lap around.
   3999 			 */
   4000 		}
   4001 		buf->eop = NULL;
   4002 		++txr->tx_avail;
   4003 
   4004 		/* We clean the range if multi segment */
   4005 		while (txd != eop) {
   4006 			++txd;
   4007 			++buf;
   4008 			++work;
   4009 			/* wrap the ring? */
   4010 			if (__predict_false(!work)) {
   4011 				work -= txr->num_desc;
   4012 				buf = txr->tx_buffers;
   4013 				txd = txr->tx_base;
   4014 			}
   4015 			if (buf->m_head) {
   4016 				txr->bytes +=
   4017 				    buf->m_head->m_pkthdr.len;
   4018 				bus_dmamap_sync(txr->txtag->dt_dmat,
   4019 				    buf->map,
   4020 				    0, buf->m_head->m_pkthdr.len,
   4021 				    BUS_DMASYNC_POSTWRITE);
   4022 				ixgbe_dmamap_unload(txr->txtag,
   4023 				    buf->map);
   4024 				m_freem(buf->m_head);
   4025 				buf->m_head = NULL;
   4026 				/*
   4027 				 * NetBSD: Don't override buf->map with NULL
   4028 				 * here. It'll panic when a ring runs one lap
   4029 				 * around.
   4030 				 */
   4031 			}
   4032 			++txr->tx_avail;
   4033 			buf->eop = NULL;
   4034 
   4035 		}
   4036 		++txr->packets;
   4037 		++processed;
   4038 		++ifp->if_opackets;
   4039 		getmicrotime(&txr->watchdog_time);
   4040 
   4041 		/* Try the next packet */
   4042 		++txd;
   4043 		++buf;
   4044 		++work;
   4045 		/* reset with a wrap */
   4046 		if (__predict_false(!work)) {
   4047 			work -= txr->num_desc;
   4048 			buf = txr->tx_buffers;
   4049 			txd = txr->tx_base;
   4050 		}
   4051 		prefetch(txd);
   4052 	} while (__predict_true(--limit));
   4053 
   4054 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   4055 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   4056 
   4057 	work += txr->num_desc;
   4058 	txr->next_to_clean = work;
   4059 
   4060 	/*
   4061 	** Watchdog calculation, we know there's
   4062 	** work outstanding or the first return
   4063 	** would have been taken, so none processed
   4064 	** for too long indicates a hang.
   4065 	*/
   4066 	getmicrotime(&now);
   4067 	timersub(&now, &txr->watchdog_time, &elapsed);
   4068 	if (!processed && tvtohz(&elapsed) > IXGBE_WATCHDOG)
   4069 		txr->queue_status = IXGBE_QUEUE_HUNG;
   4070 
   4071 	if (txr->tx_avail == txr->num_desc)
   4072 		txr->queue_status = IXGBE_QUEUE_IDLE;
   4073 
   4074 	return;
   4075 }
   4076 
   4077 /*********************************************************************
   4078  *
   4079  *  Refresh mbuf buffers for RX descriptor rings
   4080  *   - now keeps its own state so discards due to resource
   4081  *     exhaustion are unnecessary, if an mbuf cannot be obtained
   4082  *     it just returns, keeping its placeholder, thus it can simply
   4083  *     be recalled to try again.
   4084  *
   4085  **********************************************************************/
   4086 static void
   4087 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
   4088 {
   4089 	struct adapter		*adapter = rxr->adapter;
   4090 	struct ixgbe_rx_buf	*rxbuf;
   4091 	struct mbuf		*mp;
   4092 	int			i, j, error;
   4093 	bool			refreshed = false;
   4094 
   4095 	i = j = rxr->next_to_refresh;
   4096 	/* Control the loop with one beyond */
   4097 	if (++j == rxr->num_desc)
   4098 		j = 0;
   4099 
   4100 	while (j != limit) {
   4101 		rxbuf = &rxr->rx_buffers[i];
   4102 		if (rxbuf->buf == NULL) {
   4103 			mp = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
   4104 			    MT_DATA, M_PKTHDR, rxr->mbuf_sz);
   4105 			if (mp == NULL) {
   4106 				rxr->no_jmbuf.ev_count++;
   4107 				goto update;
   4108 			}
   4109 			if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
   4110 				m_adj(mp, ETHER_ALIGN);
   4111 		} else
   4112 			mp = rxbuf->buf;
   4113 
   4114 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   4115 		/* If we're dealing with an mbuf that was copied rather
   4116 		 * than replaced, there's no need to go through busdma.
   4117 		 */
   4118 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
   4119 			/* Get the memory mapping */
   4120 			error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   4121 			    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   4122 			if (error != 0) {
   4123 				printf("Refresh mbufs: payload dmamap load"
   4124 				    " failure - %d\n", error);
   4125 				m_free(mp);
   4126 				rxbuf->buf = NULL;
   4127 				goto update;
   4128 			}
   4129 			rxbuf->buf = mp;
   4130 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   4131 			    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
   4132 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
   4133 			    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   4134 		} else {
   4135 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
   4136 			rxbuf->flags &= ~IXGBE_RX_COPY;
   4137 		}
   4138 
   4139 		refreshed = true;
   4140 		/* Next is precalculated */
   4141 		i = j;
   4142 		rxr->next_to_refresh = i;
   4143 		if (++j == rxr->num_desc)
   4144 			j = 0;
   4145 	}
   4146 update:
   4147 	if (refreshed) /* Update hardware tail index */
   4148 		IXGBE_WRITE_REG(&adapter->hw,
   4149 		    IXGBE_RDT(rxr->me), rxr->next_to_refresh);
   4150 	return;
   4151 }
   4152 
   4153 /*********************************************************************
   4154  *
   4155  *  Allocate memory for rx_buffer structures. Since we use one
   4156  *  rx_buffer per received packet, the maximum number of rx_buffer's
   4157  *  that we'll need is equal to the number of receive descriptors
   4158  *  that we've allocated.
   4159  *
   4160  **********************************************************************/
   4161 static int
   4162 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
   4163 {
   4164 	struct	adapter 	*adapter = rxr->adapter;
   4165 	device_t 		dev = adapter->dev;
   4166 	struct ixgbe_rx_buf 	*rxbuf;
   4167 	int             	i, bsize, error;
   4168 
   4169 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
   4170 	if (!(rxr->rx_buffers =
   4171 	    (struct ixgbe_rx_buf *) malloc(bsize,
   4172 	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
   4173 		aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
   4174 		error = ENOMEM;
   4175 		goto fail;
   4176 	}
   4177 
   4178 	if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
   4179 				   1, 0,	/* alignment, bounds */
   4180 				   MJUM16BYTES,		/* maxsize */
   4181 				   1,			/* nsegments */
   4182 				   MJUM16BYTES,		/* maxsegsize */
   4183 				   0,			/* flags */
   4184 				   &rxr->ptag))) {
   4185 		aprint_error_dev(dev, "Unable to create RX DMA tag\n");
   4186 		goto fail;
   4187 	}
   4188 
   4189 	for (i = 0; i < rxr->num_desc; i++, rxbuf++) {
   4190 		rxbuf = &rxr->rx_buffers[i];
   4191 		error = ixgbe_dmamap_create(rxr->ptag,
   4192 		    BUS_DMA_NOWAIT, &rxbuf->pmap);
   4193 		if (error) {
   4194 			aprint_error_dev(dev, "Unable to create RX dma map\n");
   4195 			goto fail;
   4196 		}
   4197 	}
   4198 
   4199 	return (0);
   4200 
   4201 fail:
   4202 	/* Frees all, but can handle partial completion */
   4203 	ixgbe_free_receive_structures(adapter);
   4204 	return (error);
   4205 }
   4206 
   4207 /*
   4208 ** Used to detect a descriptor that has
   4209 ** been merged by Hardware RSC.
   4210 */
   4211 static inline u32
   4212 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
   4213 {
   4214 	return (le32toh(rx->wb.lower.lo_dword.data) &
   4215 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
   4216 }
   4217 
   4218 /*********************************************************************
   4219  *
   4220  *  Initialize Hardware RSC (LRO) feature on 82599
   4221  *  for an RX ring, this is toggled by the LRO capability
   4222  *  even though it is transparent to the stack.
   4223  *
   4224  *  NOTE: since this HW feature only works with IPV4 and
   4225  *        our testing has shown soft LRO to be as effective
   4226  *        I have decided to disable this by default.
   4227  *
   4228  **********************************************************************/
   4229 static void
   4230 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
   4231 {
   4232 	struct	adapter 	*adapter = rxr->adapter;
   4233 	struct	ixgbe_hw	*hw = &adapter->hw;
   4234 	u32			rscctrl, rdrxctl;
   4235 
   4236 	/* If turning LRO/RSC off we need to disable it */
   4237 	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
   4238 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   4239 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
   4240 		return;
   4241 	}
   4242 
   4243 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
   4244 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
   4245 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
   4246 	if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
   4247 #endif /* DEV_NETMAP */
   4248 	rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
   4249 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
   4250 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
   4251 
   4252 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   4253 	rscctrl |= IXGBE_RSCCTL_RSCEN;
   4254 	/*
   4255 	** Limit the total number of descriptors that
   4256 	** can be combined, so it does not exceed 64K
   4257 	*/
   4258 	if (rxr->mbuf_sz == MCLBYTES)
   4259 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
   4260 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
   4261 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
   4262 	else if (rxr->mbuf_sz == MJUM9BYTES)
   4263 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
   4264 	else  /* Using 16K cluster */
   4265 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
   4266 
   4267 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
   4268 
   4269 	/* Enable TCP header recognition */
   4270 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
   4271 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
   4272 	    IXGBE_PSRTYPE_TCPHDR));
   4273 
   4274 	/* Disable RSC for ACK packets */
   4275 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
   4276 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
   4277 
   4278 	rxr->hw_rsc = TRUE;
   4279 }
   4280 
   4281 
   4282 static void
   4283 ixgbe_free_receive_ring(struct rx_ring *rxr)
   4284 {
   4285 	struct ixgbe_rx_buf       *rxbuf;
   4286 	int i;
   4287 
   4288 	for (i = 0; i < rxr->num_desc; i++) {
   4289 		rxbuf = &rxr->rx_buffers[i];
   4290 		if (rxbuf->buf != NULL) {
   4291 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   4292 			    0, rxbuf->buf->m_pkthdr.len,
   4293 			    BUS_DMASYNC_POSTREAD);
   4294 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   4295 			rxbuf->buf->m_flags |= M_PKTHDR;
   4296 			m_freem(rxbuf->buf);
   4297 			rxbuf->buf = NULL;
   4298 			rxbuf->flags = 0;
   4299 		}
   4300 	}
   4301 }
   4302 
   4303 
   4304 /*********************************************************************
   4305  *
   4306  *  Initialize a receive ring and its buffers.
   4307  *
   4308  **********************************************************************/
   4309 static int
   4310 ixgbe_setup_receive_ring(struct rx_ring *rxr)
   4311 {
   4312 	struct	adapter 	*adapter;
   4313 	struct ixgbe_rx_buf	*rxbuf;
   4314 #ifdef LRO
   4315 	struct ifnet		*ifp;
   4316 	struct lro_ctrl		*lro = &rxr->lro;
   4317 #endif /* LRO */
   4318 	int			rsize, error = 0;
   4319 #ifdef DEV_NETMAP
   4320 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
   4321 	struct netmap_slot *slot;
   4322 #endif /* DEV_NETMAP */
   4323 
   4324 	adapter = rxr->adapter;
   4325 #ifdef LRO
   4326 	ifp = adapter->ifp;
   4327 #endif /* LRO */
   4328 
   4329 	/* Clear the ring contents */
   4330 	IXGBE_RX_LOCK(rxr);
   4331 #ifdef DEV_NETMAP
   4332 	/* same as in ixgbe_setup_transmit_ring() */
   4333 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
   4334 #endif /* DEV_NETMAP */
   4335 	rsize = roundup2(adapter->num_rx_desc *
   4336 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   4337 	bzero((void *)rxr->rx_base, rsize);
   4338 	/* Cache the size */
   4339 	rxr->mbuf_sz = adapter->rx_mbuf_sz;
   4340 
   4341 	/* Free current RX buffer structs and their mbufs */
   4342 	ixgbe_free_receive_ring(rxr);
   4343 
   4344 	IXGBE_RX_UNLOCK(rxr);
   4345 
   4346 	/* Now reinitialize our supply of jumbo mbufs.  The number
   4347 	 * or size of jumbo mbufs may have changed.
   4348 	 */
   4349 	ixgbe_jcl_reinit(&adapter->jcl_head, rxr->ptag->dt_dmat,
   4350 	    2 * adapter->num_rx_desc, adapter->rx_mbuf_sz);
   4351 
   4352 	IXGBE_RX_LOCK(rxr);
   4353 
   4354 	/* Now replenish the mbufs */
   4355 	for (int j = 0; j != rxr->num_desc; ++j) {
   4356 		struct mbuf	*mp;
   4357 
   4358 		rxbuf = &rxr->rx_buffers[j];
   4359 #ifdef DEV_NETMAP
   4360 		/*
   4361 		 * In netmap mode, fill the map and set the buffer
   4362 		 * address in the NIC ring, considering the offset
   4363 		 * between the netmap and NIC rings (see comment in
   4364 		 * ixgbe_setup_transmit_ring() ). No need to allocate
   4365 		 * an mbuf, so end the block with a continue;
   4366 		 */
   4367 		if (slot) {
   4368 			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
   4369 			uint64_t paddr;
   4370 			void *addr;
   4371 
   4372 			addr = PNMB(na, slot + sj, &paddr);
   4373 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
   4374 			/* Update descriptor and the cached value */
   4375 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
   4376 			rxbuf->addr = htole64(paddr);
   4377 			continue;
   4378 		}
   4379 #endif /* DEV_NETMAP */
   4380 		rxbuf->flags = 0;
   4381 		rxbuf->buf = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
   4382 		    MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
   4383 		if (rxbuf->buf == NULL) {
   4384 			error = ENOBUFS;
   4385                         goto fail;
   4386 		}
   4387 		mp = rxbuf->buf;
   4388 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   4389 		/* Get the memory mapping */
   4390 		error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   4391 		    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   4392 		if (error != 0)
   4393                         goto fail;
   4394 		bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   4395 		    0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
   4396 		/* Update the descriptor and the cached value */
   4397 		rxr->rx_base[j].read.pkt_addr =
   4398 		    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   4399 		rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   4400 	}
   4401 
   4402 
   4403 	/* Setup our descriptor indices */
   4404 	rxr->next_to_check = 0;
   4405 	rxr->next_to_refresh = 0;
   4406 	rxr->lro_enabled = FALSE;
   4407 	rxr->rx_copies.ev_count = 0;
   4408 	rxr->rx_bytes.ev_count = 0;
   4409 	rxr->vtag_strip = FALSE;
   4410 
   4411 	ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   4412 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   4413 
   4414 	/*
   4415 	** Now set up the LRO interface:
   4416 	*/
   4417 	if (ixgbe_rsc_enable)
   4418 		ixgbe_setup_hw_rsc(rxr);
   4419 #ifdef LRO
   4420 	else if (ifp->if_capenable & IFCAP_LRO) {
   4421 		device_t dev = adapter->dev;
   4422 		int err = tcp_lro_init(lro);
   4423 		if (err) {
   4424 			device_printf(dev, "LRO Initialization failed!\n");
   4425 			goto fail;
   4426 		}
   4427 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
   4428 		rxr->lro_enabled = TRUE;
   4429 		lro->ifp = adapter->ifp;
   4430 	}
   4431 #endif /* LRO */
   4432 
   4433 	IXGBE_RX_UNLOCK(rxr);
   4434 	return (0);
   4435 
   4436 fail:
   4437 	ixgbe_free_receive_ring(rxr);
   4438 	IXGBE_RX_UNLOCK(rxr);
   4439 	return (error);
   4440 }
   4441 
   4442 /*********************************************************************
   4443  *
   4444  *  Initialize all receive rings.
   4445  *
   4446  **********************************************************************/
   4447 static int
   4448 ixgbe_setup_receive_structures(struct adapter *adapter)
   4449 {
   4450 	struct rx_ring *rxr = adapter->rx_rings;
   4451 	int j;
   4452 
   4453 	for (j = 0; j < adapter->num_queues; j++, rxr++)
   4454 		if (ixgbe_setup_receive_ring(rxr))
   4455 			goto fail;
   4456 
   4457 	return (0);
   4458 fail:
   4459 	/*
   4460 	 * Free RX buffers allocated so far, we will only handle
   4461 	 * the rings that completed, the failing case will have
   4462 	 * cleaned up for itself. 'j' failed, so its the terminus.
   4463 	 */
   4464 	for (int i = 0; i < j; ++i) {
   4465 		rxr = &adapter->rx_rings[i];
   4466 		ixgbe_free_receive_ring(rxr);
   4467 	}
   4468 
   4469 	return (ENOBUFS);
   4470 }
   4471 
   4472 static void
   4473 ixgbe_initialise_rss_mapping(struct adapter *adapter)
   4474 {
   4475 	struct ixgbe_hw	*hw = &adapter->hw;
   4476 	uint32_t reta;
   4477 	int i, j, queue_id;
   4478 	uint32_t rss_key[10];
   4479 	uint32_t mrqc;
   4480 #ifdef	RSS
   4481 	uint32_t rss_hash_config;
   4482 #endif
   4483 
   4484 	/* Setup RSS */
   4485 	reta = 0;
   4486 
   4487 #ifdef	RSS
   4488 	/* Fetch the configured RSS key */
   4489 	rss_getkey((uint8_t *) &rss_key);
   4490 #else
   4491 	/* set up random bits */
   4492 	cprng_fast(&rss_key, sizeof(rss_key));
   4493 #endif
   4494 
   4495 	/* Set up the redirection table */
   4496 	for (i = 0, j = 0; i < 128; i++, j++) {
   4497 		if (j == adapter->num_queues) j = 0;
   4498 #ifdef	RSS
   4499 		/*
   4500 		 * Fetch the RSS bucket id for the given indirection entry.
   4501 		 * Cap it at the number of configured buckets (which is
   4502 		 * num_queues.)
   4503 		 */
   4504 		queue_id = rss_get_indirection_to_bucket(i);
   4505 		queue_id = queue_id % adapter->num_queues;
   4506 #else
   4507 		queue_id = (j * 0x11);
   4508 #endif
   4509 		/*
   4510 		 * The low 8 bits are for hash value (n+0);
   4511 		 * The next 8 bits are for hash value (n+1), etc.
   4512 		 */
   4513 		reta = reta >> 8;
   4514 		reta = reta | ( ((uint32_t) queue_id) << 24);
   4515 		if ((i & 3) == 3) {
   4516 			IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta);
   4517 			reta = 0;
   4518 		}
   4519 	}
   4520 
   4521 	/* Now fill our hash function seeds */
   4522 	for (i = 0; i < 10; i++)
   4523 		IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), rss_key[i]);
   4524 
   4525 	/* Perform hash on these packet types */
   4526 #ifdef	RSS
   4527 	mrqc = IXGBE_MRQC_RSSEN;
   4528 	rss_hash_config = rss_gethashconfig();
   4529 	if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4)
   4530 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
   4531 	if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4)
   4532 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
   4533 	if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6)
   4534 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
   4535 	if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6)
   4536 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
   4537 	if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6_EX)
   4538 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
   4539 	if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6_EX)
   4540 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
   4541 	if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4)
   4542 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
   4543 	if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4_EX)
   4544 		device_printf(adapter->dev,
   4545 		    "%s: RSS_HASHTYPE_RSS_UDP_IPV4_EX defined, "
   4546 		    "but not supported\n", __func__);
   4547 	if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6)
   4548 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
   4549 	if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6_EX)
   4550 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
   4551 #else
   4552 	/*
   4553 	 * Disable UDP - IP fragments aren't currently being handled
   4554 	 * and so we end up with a mix of 2-tuple and 4-tuple
   4555 	 * traffic.
   4556 	 */
   4557 	mrqc = IXGBE_MRQC_RSSEN
   4558 	     | IXGBE_MRQC_RSS_FIELD_IPV4
   4559 	     | IXGBE_MRQC_RSS_FIELD_IPV4_TCP
   4560 #if 0
   4561 	     | IXGBE_MRQC_RSS_FIELD_IPV4_UDP
   4562 #endif
   4563 	     | IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP
   4564 	     | IXGBE_MRQC_RSS_FIELD_IPV6_EX
   4565 	     | IXGBE_MRQC_RSS_FIELD_IPV6
   4566 	     | IXGBE_MRQC_RSS_FIELD_IPV6_TCP
   4567 #if 0
   4568 	     | IXGBE_MRQC_RSS_FIELD_IPV6_UDP
   4569 	     | IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP
   4570 #endif
   4571 	;
   4572 #endif /* RSS */
   4573 	IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
   4574 }
   4575 
   4576 
   4577 /*********************************************************************
   4578  *
   4579  *  Setup receive registers and features.
   4580  *
   4581  **********************************************************************/
   4582 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
   4583 
   4584 #define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
   4585 
   4586 static void
   4587 ixgbe_initialize_receive_units(struct adapter *adapter)
   4588 {
   4589 	int i;
   4590 	struct	rx_ring	*rxr = adapter->rx_rings;
   4591 	struct ixgbe_hw	*hw = &adapter->hw;
   4592 	struct ifnet   *ifp = adapter->ifp;
   4593 	u32		bufsz, rxctrl, fctrl, srrctl, rxcsum;
   4594 	u32		hlreg;
   4595 
   4596 
   4597 	/*
   4598 	 * Make sure receives are disabled while
   4599 	 * setting up the descriptor ring
   4600 	 */
   4601 	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
   4602 	IXGBE_WRITE_REG(hw, IXGBE_RXCTRL,
   4603 	    rxctrl & ~IXGBE_RXCTRL_RXEN);
   4604 
   4605 	/* Enable broadcasts */
   4606 	fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
   4607 	fctrl |= IXGBE_FCTRL_BAM;
   4608 	fctrl |= IXGBE_FCTRL_DPF;
   4609 	fctrl |= IXGBE_FCTRL_PMCF;
   4610 	IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
   4611 
   4612 	/* Set for Jumbo Frames? */
   4613 	hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
   4614 	if (ifp->if_mtu > ETHERMTU)
   4615 		hlreg |= IXGBE_HLREG0_JUMBOEN;
   4616 	else
   4617 		hlreg &= ~IXGBE_HLREG0_JUMBOEN;
   4618 #ifdef DEV_NETMAP
   4619 	/* crcstrip is conditional in netmap (in RDRXCTL too ?) */
   4620 	if (ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
   4621 		hlreg &= ~IXGBE_HLREG0_RXCRCSTRP;
   4622 	else
   4623 		hlreg |= IXGBE_HLREG0_RXCRCSTRP;
   4624 #endif /* DEV_NETMAP */
   4625 	IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
   4626 
   4627 	bufsz = (adapter->rx_mbuf_sz +
   4628 	    BSIZEPKT_ROUNDUP) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
   4629 
   4630 	for (i = 0; i < adapter->num_queues; i++, rxr++) {
   4631 		u64 rdba = rxr->rxdma.dma_paddr;
   4632 
   4633 		/* Setup the Base and Length of the Rx Descriptor Ring */
   4634 		IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i),
   4635 			       (rdba & 0x00000000ffffffffULL));
   4636 		IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (rdba >> 32));
   4637 		IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
   4638 		    adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc));
   4639 
   4640 		/* Set up the SRRCTL register */
   4641 		srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
   4642 		srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
   4643 		srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
   4644 		srrctl |= bufsz;
   4645 		srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
   4646 
   4647 		/*
   4648 		 * Set DROP_EN iff we have no flow control and >1 queue.
   4649 		 * Note that srrctl was cleared shortly before during reset,
   4650 		 * so we do not need to clear the bit, but do it just in case
   4651 		 * this code is moved elsewhere.
   4652 		 */
   4653 		if (adapter->num_queues > 1 &&
   4654 		    adapter->fc == ixgbe_fc_none) {
   4655 			srrctl |= IXGBE_SRRCTL_DROP_EN;
   4656 		} else {
   4657 			srrctl &= ~IXGBE_SRRCTL_DROP_EN;
   4658 		}
   4659 
   4660 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
   4661 
   4662 		/* Setup the HW Rx Head and Tail Descriptor Pointers */
   4663 		IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
   4664 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
   4665 
   4666 		/* Set the processing limit */
   4667 		rxr->process_limit = ixgbe_rx_process_limit;
   4668 	}
   4669 
   4670 	if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
   4671 		u32 psrtype = IXGBE_PSRTYPE_TCPHDR |
   4672 			      IXGBE_PSRTYPE_UDPHDR |
   4673 			      IXGBE_PSRTYPE_IPV4HDR |
   4674 			      IXGBE_PSRTYPE_IPV6HDR;
   4675 		IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype);
   4676 	}
   4677 
   4678 	rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
   4679 
   4680 	ixgbe_initialise_rss_mapping(adapter);
   4681 
   4682 	if (adapter->num_queues > 1) {
   4683 		/* RSS and RX IPP Checksum are mutually exclusive */
   4684 		rxcsum |= IXGBE_RXCSUM_PCSD;
   4685 	}
   4686 
   4687 	if (ifp->if_capenable & IFCAP_RXCSUM)
   4688 		rxcsum |= IXGBE_RXCSUM_PCSD;
   4689 
   4690 	if (!(rxcsum & IXGBE_RXCSUM_PCSD))
   4691 		rxcsum |= IXGBE_RXCSUM_IPPCSE;
   4692 
   4693 	IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
   4694 
   4695 	return;
   4696 }
   4697 
   4698 /*********************************************************************
   4699  *
   4700  *  Free all receive rings.
   4701  *
   4702  **********************************************************************/
   4703 static void
   4704 ixgbe_free_receive_structures(struct adapter *adapter)
   4705 {
   4706 	struct rx_ring *rxr = adapter->rx_rings;
   4707 
   4708 	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
   4709 
   4710 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
   4711 #ifdef LRO
   4712 		struct lro_ctrl		*lro = &rxr->lro;
   4713 #endif /* LRO */
   4714 		ixgbe_free_receive_buffers(rxr);
   4715 #ifdef LRO
   4716 		/* Free LRO memory */
   4717 		tcp_lro_free(lro);
   4718 #endif /* LRO */
   4719 		/* Free the ring memory as well */
   4720 		ixgbe_dma_free(adapter, &rxr->rxdma);
   4721 		IXGBE_RX_LOCK_DESTROY(rxr);
   4722 	}
   4723 
   4724 	free(adapter->rx_rings, M_DEVBUF);
   4725 }
   4726 
   4727 
   4728 /*********************************************************************
   4729  *
   4730  *  Free receive ring data structures
   4731  *
   4732  **********************************************************************/
   4733 static void
   4734 ixgbe_free_receive_buffers(struct rx_ring *rxr)
   4735 {
   4736 	struct adapter		*adapter = rxr->adapter;
   4737 	struct ixgbe_rx_buf	*rxbuf;
   4738 
   4739 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
   4740 
   4741 	/* Cleanup any existing buffers */
   4742 	if (rxr->rx_buffers != NULL) {
   4743 		for (int i = 0; i < adapter->num_rx_desc; i++) {
   4744 			rxbuf = &rxr->rx_buffers[i];
   4745 			if (rxbuf->buf != NULL) {
   4746 				bus_dmamap_sync(rxr->ptag->dt_dmat,
   4747 				    rxbuf->pmap, 0, rxbuf->buf->m_pkthdr.len,
   4748 				    BUS_DMASYNC_POSTREAD);
   4749 				ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   4750 				rxbuf->buf->m_flags |= M_PKTHDR;
   4751 				m_freem(rxbuf->buf);
   4752 			}
   4753 			rxbuf->buf = NULL;
   4754 			if (rxbuf->pmap != NULL) {
   4755 				ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
   4756 				rxbuf->pmap = NULL;
   4757 			}
   4758 		}
   4759 		if (rxr->rx_buffers != NULL) {
   4760 			free(rxr->rx_buffers, M_DEVBUF);
   4761 			rxr->rx_buffers = NULL;
   4762 		}
   4763 	}
   4764 
   4765 	if (rxr->ptag != NULL) {
   4766 		ixgbe_dma_tag_destroy(rxr->ptag);
   4767 		rxr->ptag = NULL;
   4768 	}
   4769 
   4770 	return;
   4771 }
   4772 
   4773 static __inline void
   4774 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
   4775 {
   4776 	int s;
   4777 
   4778 #ifdef LRO
   4779 	struct adapter	*adapter = ifp->if_softc;
   4780 	struct ethercom *ec = &adapter->osdep.ec;
   4781 
   4782         /*
   4783          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
   4784          * should be computed by hardware. Also it should not have VLAN tag in
   4785          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
   4786          */
   4787         if (rxr->lro_enabled &&
   4788             (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
   4789             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   4790             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   4791             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
   4792             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   4793             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
   4794             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
   4795             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
   4796                 /*
   4797                  * Send to the stack if:
   4798                  **  - LRO not enabled, or
   4799                  **  - no LRO resources, or
   4800                  **  - lro enqueue fails
   4801                  */
   4802                 if (rxr->lro.lro_cnt != 0)
   4803                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
   4804                                 return;
   4805         }
   4806 #endif /* LRO */
   4807 
   4808 	IXGBE_RX_UNLOCK(rxr);
   4809 
   4810 	s = splnet();
   4811 	/* Pass this up to any BPF listeners. */
   4812 	bpf_mtap(ifp, m);
   4813 	(*ifp->if_input)(ifp, m);
   4814 	splx(s);
   4815 
   4816 	IXGBE_RX_LOCK(rxr);
   4817 }
   4818 
   4819 static __inline void
   4820 ixgbe_rx_discard(struct rx_ring *rxr, int i)
   4821 {
   4822 	struct ixgbe_rx_buf	*rbuf;
   4823 
   4824 	rbuf = &rxr->rx_buffers[i];
   4825 
   4826 	/*
   4827 	** With advanced descriptors the writeback
   4828 	** clobbers the buffer addrs, so its easier
   4829 	** to just free the existing mbufs and take
   4830 	** the normal refresh path to get new buffers
   4831 	** and mapping.
   4832 	*/
   4833 	if (rbuf->buf != NULL) {/* Partial chain ? */
   4834 		rbuf->fmp->m_flags |= M_PKTHDR;
   4835 		m_freem(rbuf->fmp);
   4836 		rbuf->fmp = NULL;
   4837 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
   4838 	} else if (rbuf->buf) {
   4839  		m_free(rbuf->buf);
   4840  		rbuf->buf = NULL;
   4841 	}
   4842 
   4843 	rbuf->flags = 0;
   4844 
   4845 	return;
   4846 }
   4847 
   4848 
   4849 /*********************************************************************
   4850  *
   4851  *  This routine executes in interrupt context. It replenishes
   4852  *  the mbufs in the descriptor and sends data which has been
   4853  *  dma'ed into host memory to upper layer.
   4854  *
   4855  *  We loop at most count times if count is > 0, or until done if
   4856  *  count < 0.
   4857  *
   4858  *  Return TRUE for more work, FALSE for all clean.
   4859  *********************************************************************/
   4860 static bool
   4861 ixgbe_rxeof(struct ix_queue *que)
   4862 {
   4863 	struct adapter		*adapter = que->adapter;
   4864 	struct rx_ring		*rxr = que->rxr;
   4865 	struct ifnet		*ifp = adapter->ifp;
   4866 #ifdef LRO
   4867 	struct lro_ctrl		*lro = &rxr->lro;
   4868 	struct lro_entry	*queued;
   4869 #endif /* LRO */
   4870 	int			i, nextp, processed = 0;
   4871 	u32			staterr = 0;
   4872 	u16			count = rxr->process_limit;
   4873 	union ixgbe_adv_rx_desc	*cur;
   4874 	struct ixgbe_rx_buf	*rbuf, *nbuf;
   4875 #ifdef RSS
   4876 	u16			pkt_info;
   4877 #endif
   4878 
   4879 	IXGBE_RX_LOCK(rxr);
   4880 
   4881 #ifdef DEV_NETMAP
   4882 	/* Same as the txeof routine: wakeup clients on intr. */
   4883 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
   4884 		IXGBE_RX_UNLOCK(rxr);
   4885 		return (FALSE);
   4886 	}
   4887 #endif /* DEV_NETMAP */
   4888 
   4889 	for (i = rxr->next_to_check; count != 0;) {
   4890 		struct mbuf	*sendmp, *mp;
   4891 		u32		rsc, ptype;
   4892 		u16		len;
   4893 		u16		vtag = 0;
   4894 		bool		eop;
   4895 
   4896 		/* Sync the ring. */
   4897 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   4898 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   4899 
   4900 		cur = &rxr->rx_base[i];
   4901 		staterr = le32toh(cur->wb.upper.status_error);
   4902 #ifdef RSS
   4903 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
   4904 #endif
   4905 
   4906 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
   4907 			break;
   4908 		if ((ifp->if_flags & IFF_RUNNING) == 0)
   4909 			break;
   4910 
   4911 		count--;
   4912 		sendmp = NULL;
   4913 		nbuf = NULL;
   4914 		rsc = 0;
   4915 		cur->wb.upper.status_error = 0;
   4916 		rbuf = &rxr->rx_buffers[i];
   4917 		mp = rbuf->buf;
   4918 
   4919 		len = le16toh(cur->wb.upper.length);
   4920 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
   4921 		    IXGBE_RXDADV_PKTTYPE_MASK;
   4922 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
   4923 
   4924 		/* Make sure bad packets are discarded */
   4925 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
   4926 			rxr->rx_discarded.ev_count++;
   4927 			ixgbe_rx_discard(rxr, i);
   4928 			goto next_desc;
   4929 		}
   4930 
   4931 		/*
   4932 		** On 82599 which supports a hardware
   4933 		** LRO (called HW RSC), packets need
   4934 		** not be fragmented across sequential
   4935 		** descriptors, rather the next descriptor
   4936 		** is indicated in bits of the descriptor.
   4937 		** This also means that we might proceses
   4938 		** more than one packet at a time, something
   4939 		** that has never been true before, it
   4940 		** required eliminating global chain pointers
   4941 		** in favor of what we are doing here.  -jfv
   4942 		*/
   4943 		if (!eop) {
   4944 			/*
   4945 			** Figure out the next descriptor
   4946 			** of this frame.
   4947 			*/
   4948 			if (rxr->hw_rsc == TRUE) {
   4949 				rsc = ixgbe_rsc_count(cur);
   4950 				rxr->rsc_num += (rsc - 1);
   4951 			}
   4952 			if (rsc) { /* Get hardware index */
   4953 				nextp = ((staterr &
   4954 				    IXGBE_RXDADV_NEXTP_MASK) >>
   4955 				    IXGBE_RXDADV_NEXTP_SHIFT);
   4956 			} else { /* Just sequential */
   4957 				nextp = i + 1;
   4958 				if (nextp == adapter->num_rx_desc)
   4959 					nextp = 0;
   4960 			}
   4961 			nbuf = &rxr->rx_buffers[nextp];
   4962 			prefetch(nbuf);
   4963 		}
   4964 		/*
   4965 		** Rather than using the fmp/lmp global pointers
   4966 		** we now keep the head of a packet chain in the
   4967 		** buffer struct and pass this along from one
   4968 		** descriptor to the next, until we get EOP.
   4969 		*/
   4970 		mp->m_len = len;
   4971 		/*
   4972 		** See if there is a stored head
   4973 		** that determines what we are
   4974 		*/
   4975 		sendmp = rbuf->fmp;
   4976 
   4977 		if (sendmp != NULL) {  /* secondary frag */
   4978 			rbuf->buf = rbuf->fmp = NULL;
   4979 			mp->m_flags &= ~M_PKTHDR;
   4980 			sendmp->m_pkthdr.len += mp->m_len;
   4981 		} else {
   4982 			/*
   4983 			 * Optimize.  This might be a small packet,
   4984 			 * maybe just a TCP ACK.  Do a fast copy that
   4985 			 * is cache aligned into a new mbuf, and
   4986 			 * leave the old mbuf+cluster for re-use.
   4987 			 */
   4988 			if (eop && len <= IXGBE_RX_COPY_LEN) {
   4989 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
   4990 				if (sendmp != NULL) {
   4991 					sendmp->m_data +=
   4992 					    IXGBE_RX_COPY_ALIGN;
   4993 					ixgbe_bcopy(mp->m_data,
   4994 					    sendmp->m_data, len);
   4995 					sendmp->m_len = len;
   4996 					rxr->rx_copies.ev_count++;
   4997 					rbuf->flags |= IXGBE_RX_COPY;
   4998 				}
   4999 			}
   5000 			if (sendmp == NULL) {
   5001 				rbuf->buf = rbuf->fmp = NULL;
   5002 				sendmp = mp;
   5003 			}
   5004 
   5005 			/* first desc of a non-ps chain */
   5006 			sendmp->m_flags |= M_PKTHDR;
   5007 			sendmp->m_pkthdr.len = mp->m_len;
   5008 		}
   5009 		++processed;
   5010 		/* Pass the head pointer on */
   5011 		if (eop == 0) {
   5012 			nbuf->fmp = sendmp;
   5013 			sendmp = NULL;
   5014 			mp->m_next = nbuf->buf;
   5015 		} else { /* Sending this frame */
   5016 			sendmp->m_pkthdr.rcvif = ifp;
   5017 			ifp->if_ipackets++;
   5018 			rxr->rx_packets.ev_count++;
   5019 			/* capture data for AIM */
   5020 			rxr->bytes += sendmp->m_pkthdr.len;
   5021 			rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
   5022 			/* Process vlan info */
   5023 			if ((rxr->vtag_strip) &&
   5024 			    (staterr & IXGBE_RXD_STAT_VP))
   5025 				vtag = le16toh(cur->wb.upper.vlan);
   5026 			if (vtag) {
   5027 				VLAN_INPUT_TAG(ifp, sendmp, vtag,
   5028 				    printf("%s: could not apply VLAN "
   5029 					"tag", __func__));
   5030 			}
   5031 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
   5032 				ixgbe_rx_checksum(staterr, sendmp, ptype,
   5033 				   &adapter->stats);
   5034 			}
   5035 #if __FreeBSD_version >= 800000
   5036 #ifdef RSS
   5037 			sendmp->m_pkthdr.flowid =
   5038 			    le32toh(cur->wb.lower.hi_dword.rss);
   5039 			switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
   5040 			case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
   5041 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV4);
   5042 				break;
   5043 			case IXGBE_RXDADV_RSSTYPE_IPV4:
   5044 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV4);
   5045 				break;
   5046 			case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
   5047 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6);
   5048 				break;
   5049 			case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
   5050 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6_EX);
   5051 				break;
   5052 			case IXGBE_RXDADV_RSSTYPE_IPV6:
   5053 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6);
   5054 				break;
   5055 			case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
   5056 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6_EX);
   5057 				break;
   5058 			case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
   5059 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV4);
   5060 				break;
   5061 			case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
   5062 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6);
   5063 				break;
   5064 			case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
   5065 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6_EX);
   5066 				break;
   5067 			default:
   5068 				/* XXX fallthrough */
   5069 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
   5070 				break;
   5071 			}
   5072 #else /* RSS */
   5073 			sendmp->m_pkthdr.flowid = que->msix;
   5074 			M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
   5075 #endif /* RSS */
   5076 #endif /* FreeBSD_version */
   5077 		}
   5078 next_desc:
   5079 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   5080 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   5081 
   5082 		/* Advance our pointers to the next descriptor. */
   5083 		if (++i == rxr->num_desc)
   5084 			i = 0;
   5085 
   5086 		/* Now send to the stack or do LRO */
   5087 		if (sendmp != NULL) {
   5088 			rxr->next_to_check = i;
   5089 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
   5090 			i = rxr->next_to_check;
   5091 		}
   5092 
   5093                /* Every 8 descriptors we go to refresh mbufs */
   5094 		if (processed == 8) {
   5095 			ixgbe_refresh_mbufs(rxr, i);
   5096 			processed = 0;
   5097 		}
   5098 	}
   5099 
   5100 	/* Refresh any remaining buf structs */
   5101 	if (ixgbe_rx_unrefreshed(rxr))
   5102 		ixgbe_refresh_mbufs(rxr, i);
   5103 
   5104 	rxr->next_to_check = i;
   5105 
   5106 #ifdef LRO
   5107 	/*
   5108 	 * Flush any outstanding LRO work
   5109 	 */
   5110 	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
   5111 		SLIST_REMOVE_HEAD(&lro->lro_active, next);
   5112 		tcp_lro_flush(lro, queued);
   5113 	}
   5114 #endif /* LRO */
   5115 
   5116 	IXGBE_RX_UNLOCK(rxr);
   5117 
   5118 	/*
   5119 	** Still have cleaning to do?
   5120 	*/
   5121 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
   5122 		return true;
   5123 	else
   5124 		return false;
   5125 }
   5126 
   5127 
   5128 /*********************************************************************
   5129  *
   5130  *  Verify that the hardware indicated that the checksum is valid.
   5131  *  Inform the stack about the status of checksum so that stack
   5132  *  doesn't spend time verifying the checksum.
   5133  *
   5134  *********************************************************************/
   5135 static void
   5136 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
   5137     struct ixgbe_hw_stats *stats)
   5138 {
   5139 	u16	status = (u16) staterr;
   5140 	u8	errors = (u8) (staterr >> 24);
   5141 #if 0
   5142 	bool	sctp = FALSE;
   5143 
   5144 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   5145 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
   5146 		sctp = TRUE;
   5147 #endif
   5148 
   5149 	if (status & IXGBE_RXD_STAT_IPCS) {
   5150 		stats->ipcs.ev_count++;
   5151 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
   5152 			/* IP Checksum Good */
   5153 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
   5154 
   5155 		} else {
   5156 			stats->ipcs_bad.ev_count++;
   5157 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
   5158 		}
   5159 	}
   5160 	if (status & IXGBE_RXD_STAT_L4CS) {
   5161 		stats->l4cs.ev_count++;
   5162 		int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
   5163 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
   5164 			mp->m_pkthdr.csum_flags |= type;
   5165 		} else {
   5166 			stats->l4cs_bad.ev_count++;
   5167 			mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
   5168 		}
   5169 	}
   5170 	return;
   5171 }
   5172 
   5173 
   5174 #if 0	/* XXX Badly need to overhaul vlan(4) on NetBSD. */
   5175 /*
   5176 ** This routine is run via an vlan config EVENT,
   5177 ** it enables us to use the HW Filter table since
   5178 ** we can get the vlan id. This just creates the
   5179 ** entry in the soft version of the VFTA, init will
   5180 ** repopulate the real table.
   5181 */
   5182 static void
   5183 ixgbe_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
   5184 {
   5185 	struct adapter	*adapter = ifp->if_softc;
   5186 	u16		index, bit;
   5187 
   5188 	if (ifp->if_softc !=  arg)   /* Not our event */
   5189 		return;
   5190 
   5191 	if ((vtag == 0) || (vtag > 4095))	/* Invalid */
   5192 		return;
   5193 
   5194 	IXGBE_CORE_LOCK(adapter);
   5195 	index = (vtag >> 5) & 0x7F;
   5196 	bit = vtag & 0x1F;
   5197 	adapter->shadow_vfta[index] |= (1 << bit);
   5198 	ixgbe_setup_vlan_hw_support(adapter);
   5199 	IXGBE_CORE_UNLOCK(adapter);
   5200 }
   5201 
   5202 /*
   5203 ** This routine is run via an vlan
   5204 ** unconfig EVENT, remove our entry
   5205 ** in the soft vfta.
   5206 */
   5207 static void
   5208 ixgbe_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
   5209 {
   5210 	struct adapter	*adapter = ifp->if_softc;
   5211 	u16		index, bit;
   5212 
   5213 	if (ifp->if_softc !=  arg)
   5214 		return;
   5215 
   5216 	if ((vtag == 0) || (vtag > 4095))	/* Invalid */
   5217 		return;
   5218 
   5219 	IXGBE_CORE_LOCK(adapter);
   5220 	index = (vtag >> 5) & 0x7F;
   5221 	bit = vtag & 0x1F;
   5222 	adapter->shadow_vfta[index] &= ~(1 << bit);
   5223 	/* Re-init to load the changes */
   5224 	ixgbe_setup_vlan_hw_support(adapter);
   5225 	IXGBE_CORE_UNLOCK(adapter);
   5226 }
   5227 #endif
   5228 
   5229 static void
   5230 ixgbe_setup_vlan_hw_support(struct adapter *adapter)
   5231 {
   5232 	struct ethercom *ec = &adapter->osdep.ec;
   5233 	struct ixgbe_hw *hw = &adapter->hw;
   5234 	struct rx_ring	*rxr;
   5235 	u32		ctrl;
   5236 
   5237 	/*
   5238 	** We get here thru init_locked, meaning
   5239 	** a soft reset, this has already cleared
   5240 	** the VFTA and other state, so if there
   5241 	** have been no vlan's registered do nothing.
   5242 	*/
   5243 	if (!VLAN_ATTACHED(&adapter->osdep.ec)) {
   5244 		return;
   5245 	}
   5246 
   5247 	/* Setup the queues for vlans */
   5248 	for (int i = 0; i < adapter->num_queues; i++) {
   5249 		rxr = &adapter->rx_rings[i];
   5250 		/* On 82599 the VLAN enable is per/queue in RXDCTL */
   5251 		if (hw->mac.type != ixgbe_mac_82598EB) {
   5252 			ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
   5253 			ctrl |= IXGBE_RXDCTL_VME;
   5254 			IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
   5255 		}
   5256 		rxr->vtag_strip = TRUE;
   5257 	}
   5258 
   5259 	if ((ec->ec_capenable & ETHERCAP_VLAN_HWFILTER) == 0)
   5260 		return;
   5261 
   5262 	/*
   5263 	** A soft reset zero's out the VFTA, so
   5264 	** we need to repopulate it now.
   5265 	*/
   5266 	for (int i = 0; i < IXGBE_VFTA_SIZE; i++)
   5267 		if (adapter->shadow_vfta[i] != 0)
   5268 			IXGBE_WRITE_REG(hw, IXGBE_VFTA(i),
   5269 			    adapter->shadow_vfta[i]);
   5270 
   5271 	ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
   5272 	/* Enable the Filter Table if enabled */
   5273 	if (ec->ec_capenable & ETHERCAP_VLAN_HWFILTER) {
   5274 		ctrl &= ~IXGBE_VLNCTRL_CFIEN;
   5275 		ctrl |= IXGBE_VLNCTRL_VFE;
   5276 	}
   5277 	if (hw->mac.type == ixgbe_mac_82598EB)
   5278 		ctrl |= IXGBE_VLNCTRL_VME;
   5279 	IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
   5280 }
   5281 
   5282 static void
   5283 ixgbe_enable_intr(struct adapter *adapter)
   5284 {
   5285 	struct ixgbe_hw	*hw = &adapter->hw;
   5286 	struct ix_queue	*que = adapter->queues;
   5287 	u32		mask, fwsm;
   5288 
   5289 	mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
   5290 	/* Enable Fan Failure detection */
   5291 	if (hw->device_id == IXGBE_DEV_ID_82598AT)
   5292 		    mask |= IXGBE_EIMS_GPI_SDP1;
   5293 
   5294 	switch (adapter->hw.mac.type) {
   5295 		case ixgbe_mac_82599EB:
   5296 			mask |= IXGBE_EIMS_ECC;
   5297 			mask |= IXGBE_EIMS_GPI_SDP0;
   5298 			mask |= IXGBE_EIMS_GPI_SDP1;
   5299 			mask |= IXGBE_EIMS_GPI_SDP2;
   5300 #ifdef IXGBE_FDIR
   5301 			mask |= IXGBE_EIMS_FLOW_DIR;
   5302 #endif
   5303 			break;
   5304 		case ixgbe_mac_X540:
   5305 			mask |= IXGBE_EIMS_ECC;
   5306 			/* Detect if Thermal Sensor is enabled */
   5307 			fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
   5308 			if (fwsm & IXGBE_FWSM_TS_ENABLED)
   5309 				mask |= IXGBE_EIMS_TS;
   5310 #ifdef IXGBE_FDIR
   5311 			mask |= IXGBE_EIMS_FLOW_DIR;
   5312 #endif
   5313 		/* falls through */
   5314 		default:
   5315 			break;
   5316 	}
   5317 
   5318 	IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
   5319 
   5320 	/* With RSS we use auto clear */
   5321 	if (adapter->msix_mem) {
   5322 		mask = IXGBE_EIMS_ENABLE_MASK;
   5323 		/* Don't autoclear Link */
   5324 		mask &= ~IXGBE_EIMS_OTHER;
   5325 		mask &= ~IXGBE_EIMS_LSC;
   5326 		IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask);
   5327 	}
   5328 
   5329 	/*
   5330 	** Now enable all queues, this is done separately to
   5331 	** allow for handling the extended (beyond 32) MSIX
   5332 	** vectors that can be used by 82599
   5333 	*/
   5334         for (int i = 0; i < adapter->num_queues; i++, que++)
   5335                 ixgbe_enable_queue(adapter, que->msix);
   5336 
   5337 	IXGBE_WRITE_FLUSH(hw);
   5338 
   5339 	return;
   5340 }
   5341 
   5342 static void
   5343 ixgbe_disable_intr(struct adapter *adapter)
   5344 {
   5345 	if (adapter->msix_mem)
   5346 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, 0);
   5347 	if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
   5348 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0);
   5349 	} else {
   5350 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000);
   5351 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0);
   5352 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0);
   5353 	}
   5354 	IXGBE_WRITE_FLUSH(&adapter->hw);
   5355 	return;
   5356 }
   5357 
   5358 u16
   5359 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
   5360 {
   5361 	switch (reg % 4) {
   5362 	case 0:
   5363 		return pci_conf_read(hw->back->pc, hw->back->tag, reg) &
   5364 		    __BITS(15, 0);
   5365 	case 2:
   5366 		return __SHIFTOUT(pci_conf_read(hw->back->pc, hw->back->tag,
   5367 		    reg - 2), __BITS(31, 16));
   5368 	default:
   5369 		panic("%s: invalid register (%" PRIx32, __func__, reg);
   5370 		break;
   5371 	}
   5372 }
   5373 
   5374 void
   5375 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
   5376 {
   5377 	pcireg_t old;
   5378 
   5379 	switch (reg % 4) {
   5380 	case 0:
   5381 		old = pci_conf_read(hw->back->pc, hw->back->tag, reg) &
   5382 		    __BITS(31, 16);
   5383 		pci_conf_write(hw->back->pc, hw->back->tag, reg, value | old);
   5384 		break;
   5385 	case 2:
   5386 		old = pci_conf_read(hw->back->pc, hw->back->tag, reg - 2) &
   5387 		    __BITS(15, 0);
   5388 		pci_conf_write(hw->back->pc, hw->back->tag, reg - 2,
   5389 		    __SHIFTIN(value, __BITS(31, 16)) | old);
   5390 		break;
   5391 	default:
   5392 		panic("%s: invalid register (%" PRIx32, __func__, reg);
   5393 		break;
   5394 	}
   5395 
   5396 	return;
   5397 }
   5398 
   5399 /*
   5400 ** Get the width and transaction speed of
   5401 ** the slot this adapter is plugged into.
   5402 */
   5403 static void
   5404 ixgbe_get_slot_info(struct ixgbe_hw *hw)
   5405 {
   5406 	device_t		dev = ((struct ixgbe_osdep *)hw->back)->dev;
   5407 	struct ixgbe_mac_info	*mac = &hw->mac;
   5408 	u16			link;
   5409 
   5410 	/* For most devices simply call the shared code routine */
   5411 	if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) {
   5412 		ixgbe_get_bus_info(hw);
   5413 		goto display;
   5414 	}
   5415 
   5416 	/*
   5417 	** For the Quad port adapter we need to parse back
   5418 	** up the PCI tree to find the speed of the expansion
   5419 	** slot into which this adapter is plugged. A bit more work.
   5420 	*/
   5421 	dev = device_parent(device_parent(dev));
   5422 #ifdef IXGBE_DEBUG
   5423 	device_printf(dev, "parent pcib = %x,%x,%x\n",
   5424 	    pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
   5425 #endif
   5426 	dev = device_parent(device_parent(dev));
   5427 #ifdef IXGBE_DEBUG
   5428 	device_printf(dev, "slot pcib = %x,%x,%x\n",
   5429 	    pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
   5430 #endif
   5431 	/* Now get the PCI Express Capabilities offset */
   5432 	/* ...and read the Link Status Register */
   5433 	link = IXGBE_READ_PCIE_WORD(hw, IXGBE_PCI_LINK_STATUS);
   5434 	switch (link & IXGBE_PCI_LINK_WIDTH) {
   5435 	case IXGBE_PCI_LINK_WIDTH_1:
   5436 		hw->bus.width = ixgbe_bus_width_pcie_x1;
   5437 		break;
   5438 	case IXGBE_PCI_LINK_WIDTH_2:
   5439 		hw->bus.width = ixgbe_bus_width_pcie_x2;
   5440 		break;
   5441 	case IXGBE_PCI_LINK_WIDTH_4:
   5442 		hw->bus.width = ixgbe_bus_width_pcie_x4;
   5443 		break;
   5444 	case IXGBE_PCI_LINK_WIDTH_8:
   5445 		hw->bus.width = ixgbe_bus_width_pcie_x8;
   5446 		break;
   5447 	default:
   5448 		hw->bus.width = ixgbe_bus_width_unknown;
   5449 		break;
   5450 	}
   5451 
   5452 	switch (link & IXGBE_PCI_LINK_SPEED) {
   5453 	case IXGBE_PCI_LINK_SPEED_2500:
   5454 		hw->bus.speed = ixgbe_bus_speed_2500;
   5455 		break;
   5456 	case IXGBE_PCI_LINK_SPEED_5000:
   5457 		hw->bus.speed = ixgbe_bus_speed_5000;
   5458 		break;
   5459 	case IXGBE_PCI_LINK_SPEED_8000:
   5460 		hw->bus.speed = ixgbe_bus_speed_8000;
   5461 		break;
   5462 	default:
   5463 		hw->bus.speed = ixgbe_bus_speed_unknown;
   5464 		break;
   5465 	}
   5466 
   5467 	mac->ops.set_lan_id(hw);
   5468 
   5469 display:
   5470 	device_printf(dev,"PCI Express Bus: Speed %s %s\n",
   5471 	    ((hw->bus.speed == ixgbe_bus_speed_8000) ? "8.0GT/s":
   5472 	    (hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0GT/s":
   5473 	    (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5GT/s":"Unknown"),
   5474 	    (hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" :
   5475 	    (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" :
   5476 	    (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" :
   5477 	    ("Unknown"));
   5478 
   5479 	if ((hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) &&
   5480 	    ((hw->bus.width <= ixgbe_bus_width_pcie_x4) &&
   5481 	    (hw->bus.speed == ixgbe_bus_speed_2500))) {
   5482 		device_printf(dev, "PCI-Express bandwidth available"
   5483 		    " for this card\n     is not sufficient for"
   5484 		    " optimal performance.\n");
   5485 		device_printf(dev, "For optimal performance a x8 "
   5486 		    "PCIE, or x4 PCIE Gen2 slot is required.\n");
   5487         }
   5488 	if ((hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP) &&
   5489 	    ((hw->bus.width <= ixgbe_bus_width_pcie_x8) &&
   5490 	    (hw->bus.speed < ixgbe_bus_speed_8000))) {
   5491 		device_printf(dev, "PCI-Express bandwidth available"
   5492 		    " for this card\n     is not sufficient for"
   5493 		    " optimal performance.\n");
   5494 		device_printf(dev, "For optimal performance a x8 "
   5495 		    "PCIE Gen3 slot is required.\n");
   5496         }
   5497 
   5498 	return;
   5499 }
   5500 
   5501 
   5502 /*
   5503 ** Setup the correct IVAR register for a particular MSIX interrupt
   5504 **   (yes this is all very magic and confusing :)
   5505 **  - entry is the register array entry
   5506 **  - vector is the MSIX vector for this queue
   5507 **  - type is RX/TX/MISC
   5508 */
   5509 static void
   5510 ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type)
   5511 {
   5512 	struct ixgbe_hw *hw = &adapter->hw;
   5513 	u32 ivar, index;
   5514 
   5515 	vector |= IXGBE_IVAR_ALLOC_VAL;
   5516 
   5517 	switch (hw->mac.type) {
   5518 
   5519 	case ixgbe_mac_82598EB:
   5520 		if (type == -1)
   5521 			entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
   5522 		else
   5523 			entry += (type * 64);
   5524 		index = (entry >> 2) & 0x1F;
   5525 		ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
   5526 		ivar &= ~(0xFF << (8 * (entry & 0x3)));
   5527 		ivar |= (vector << (8 * (entry & 0x3)));
   5528 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR(index), ivar);
   5529 		break;
   5530 
   5531 	case ixgbe_mac_82599EB:
   5532 	case ixgbe_mac_X540:
   5533 		if (type == -1) { /* MISC IVAR */
   5534 			index = (entry & 1) * 8;
   5535 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
   5536 			ivar &= ~(0xFF << index);
   5537 			ivar |= (vector << index);
   5538 			IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
   5539 		} else {	/* RX/TX IVARS */
   5540 			index = (16 * (entry & 1)) + (8 * type);
   5541 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
   5542 			ivar &= ~(0xFF << index);
   5543 			ivar |= (vector << index);
   5544 			IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
   5545 		}
   5546 
   5547 	default:
   5548 		break;
   5549 	}
   5550 }
   5551 
   5552 static void
   5553 ixgbe_configure_ivars(struct adapter *adapter)
   5554 {
   5555 	struct  ix_queue *que = adapter->queues;
   5556 	u32 newitr;
   5557 
   5558 	if (ixgbe_max_interrupt_rate > 0)
   5559 		newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8;
   5560 	else
   5561 		newitr = 0;
   5562 
   5563         for (int i = 0; i < adapter->num_queues; i++, que++) {
   5564 		/* First the RX queue entry */
   5565                 ixgbe_set_ivar(adapter, i, que->msix, 0);
   5566 		/* ... and the TX */
   5567 		ixgbe_set_ivar(adapter, i, que->msix, 1);
   5568 		/* Set an Initial EITR value */
   5569                 IXGBE_WRITE_REG(&adapter->hw,
   5570                     IXGBE_EITR(que->msix), newitr);
   5571 	}
   5572 
   5573 	/* For the Link interrupt */
   5574         ixgbe_set_ivar(adapter, 1, adapter->linkvec, -1);
   5575 }
   5576 
   5577 /*
   5578 ** ixgbe_sfp_probe - called in the local timer to
   5579 ** determine if a port had optics inserted.
   5580 */
   5581 static bool ixgbe_sfp_probe(struct adapter *adapter)
   5582 {
   5583 	struct ixgbe_hw	*hw = &adapter->hw;
   5584 	device_t	dev = adapter->dev;
   5585 	bool		result = FALSE;
   5586 
   5587 	if ((hw->phy.type == ixgbe_phy_nl) &&
   5588 	    (hw->phy.sfp_type == ixgbe_sfp_type_not_present)) {
   5589 		s32 ret = hw->phy.ops.identify_sfp(hw);
   5590 		if (ret)
   5591                         goto out;
   5592 		ret = hw->phy.ops.reset(hw);
   5593 		if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
   5594 			device_printf(dev,"Unsupported SFP+ module detected!");
   5595 			device_printf(dev, "Reload driver with supported module.\n");
   5596 			adapter->sfp_probe = FALSE;
   5597                         goto out;
   5598 		} else
   5599 			device_printf(dev,"SFP+ module detected!\n");
   5600 		/* We now have supported optics */
   5601 		adapter->sfp_probe = FALSE;
   5602 		/* Set the optics type so system reports correctly */
   5603 		ixgbe_setup_optics(adapter);
   5604 		result = TRUE;
   5605 	}
   5606 out:
   5607 	return (result);
   5608 }
   5609 
   5610 /*
   5611 ** Tasklet handler for MSIX Link interrupts
   5612 **  - do outside interrupt since it might sleep
   5613 */
   5614 static void
   5615 ixgbe_handle_link(void *context)
   5616 {
   5617 	struct adapter  *adapter = context;
   5618 
   5619 	if (ixgbe_check_link(&adapter->hw,
   5620 	    &adapter->link_speed, &adapter->link_up, 0) == 0)
   5621 	    ixgbe_update_link_status(adapter);
   5622 }
   5623 
   5624 /*
   5625 ** Tasklet for handling SFP module interrupts
   5626 */
   5627 static void
   5628 ixgbe_handle_mod(void *context)
   5629 {
   5630 	struct adapter  *adapter = context;
   5631 	struct ixgbe_hw *hw = &adapter->hw;
   5632 	device_t	dev = adapter->dev;
   5633 	u32 err;
   5634 
   5635 	err = hw->phy.ops.identify_sfp(hw);
   5636 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
   5637 		device_printf(dev,
   5638 		    "Unsupported SFP+ module type was detected.\n");
   5639 		return;
   5640 	}
   5641 	err = hw->mac.ops.setup_sfp(hw);
   5642 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
   5643 		device_printf(dev,
   5644 		    "Setup failure - unsupported SFP+ module type.\n");
   5645 		return;
   5646 	}
   5647 	softint_schedule(adapter->msf_si);
   5648 	return;
   5649 }
   5650 
   5651 
   5652 /*
   5653 ** Tasklet for handling MSF (multispeed fiber) interrupts
   5654 */
   5655 static void
   5656 ixgbe_handle_msf(void *context)
   5657 {
   5658 	struct adapter  *adapter = context;
   5659 	struct ixgbe_hw *hw = &adapter->hw;
   5660 	u32 autoneg;
   5661 	bool negotiate;
   5662 
   5663 	autoneg = hw->phy.autoneg_advertised;
   5664 	if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
   5665 		hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate);
   5666 	else
   5667 		negotiate = 0;
   5668 	if (hw->mac.ops.setup_link)
   5669 		hw->mac.ops.setup_link(hw, autoneg, TRUE);
   5670 	return;
   5671 }
   5672 
   5673 #ifdef IXGBE_FDIR
   5674 /*
   5675 ** Tasklet for reinitializing the Flow Director filter table
   5676 */
   5677 static void
   5678 ixgbe_reinit_fdir(void *context)
   5679 {
   5680 	struct adapter  *adapter = context;
   5681 	struct ifnet   *ifp = adapter->ifp;
   5682 
   5683 	if (adapter->fdir_reinit != 1) /* Shouldn't happen */
   5684 		return;
   5685 	ixgbe_reinit_fdir_tables_82599(&adapter->hw);
   5686 	adapter->fdir_reinit = 0;
   5687 	/* re-enable flow director interrupts */
   5688 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_FLOW_DIR);
   5689 	/* Restart the interface */
   5690 	ifp->if_flags |= IFF_RUNNING;
   5691 	return;
   5692 }
   5693 #endif
   5694 
   5695 /**********************************************************************
   5696  *
   5697  *  Update the board statistics counters.
   5698  *
   5699  **********************************************************************/
   5700 static void
   5701 ixgbe_update_stats_counters(struct adapter *adapter)
   5702 {
   5703 	struct ifnet   *ifp = adapter->ifp;
   5704 	struct ixgbe_hw *hw = &adapter->hw;
   5705 	u32  missed_rx = 0, bprc, lxon, lxoff, total;
   5706 	u64  total_missed_rx = 0;
   5707 	uint64_t crcerrs, rlec;
   5708 
   5709 	crcerrs = IXGBE_READ_REG(hw, IXGBE_CRCERRS);
   5710 	adapter->stats.crcerrs.ev_count += crcerrs;
   5711 	adapter->stats.illerrc.ev_count += IXGBE_READ_REG(hw, IXGBE_ILLERRC);
   5712 	adapter->stats.errbc.ev_count += IXGBE_READ_REG(hw, IXGBE_ERRBC);
   5713 	adapter->stats.mspdc.ev_count += IXGBE_READ_REG(hw, IXGBE_MSPDC);
   5714 
   5715 	/*
   5716 	** Note: these are for the 8 possible traffic classes,
   5717 	**	 which in current implementation is unused,
   5718 	**	 therefore only 0 should read real data.
   5719 	*/
   5720 	for (int i = 0; i < __arraycount(adapter->stats.mpc); i++) {
   5721 		int j = i % adapter->num_queues;
   5722 		u32 mp;
   5723 		mp = IXGBE_READ_REG(hw, IXGBE_MPC(i));
   5724 		/* missed_rx tallies misses for the gprc workaround */
   5725 		missed_rx += mp;
   5726 		/* global total per queue */
   5727         	adapter->stats.mpc[j].ev_count += mp;
   5728 		/* Running comprehensive total for stats display */
   5729 		total_missed_rx += mp;
   5730 		if (hw->mac.type == ixgbe_mac_82598EB) {
   5731 			adapter->stats.rnbc[j] +=
   5732 			    IXGBE_READ_REG(hw, IXGBE_RNBC(i));
   5733 			adapter->stats.qbtc[j].ev_count +=
   5734 			    IXGBE_READ_REG(hw, IXGBE_QBTC(i));
   5735 			adapter->stats.qbrc[j].ev_count +=
   5736 			    IXGBE_READ_REG(hw, IXGBE_QBRC(i));
   5737 			adapter->stats.pxonrxc[j].ev_count +=
   5738 			    IXGBE_READ_REG(hw, IXGBE_PXONRXC(i));
   5739 		} else {
   5740 			adapter->stats.pxonrxc[j].ev_count +=
   5741 			    IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i));
   5742 		}
   5743 		adapter->stats.pxontxc[j].ev_count +=
   5744 		    IXGBE_READ_REG(hw, IXGBE_PXONTXC(i));
   5745 		adapter->stats.pxofftxc[j].ev_count +=
   5746 		    IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i));
   5747 		adapter->stats.pxoffrxc[j].ev_count +=
   5748 		    IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
   5749 		adapter->stats.pxon2offc[j].ev_count +=
   5750 		    IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i));
   5751 	}
   5752 	for (int i = 0; i < __arraycount(adapter->stats.qprc); i++) {
   5753 		int j = i % adapter->num_queues;
   5754 		adapter->stats.qprc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
   5755 		adapter->stats.qptc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
   5756 		adapter->stats.qprdc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
   5757 	}
   5758 	adapter->stats.mlfc.ev_count += IXGBE_READ_REG(hw, IXGBE_MLFC);
   5759 	adapter->stats.mrfc.ev_count += IXGBE_READ_REG(hw, IXGBE_MRFC);
   5760 	rlec = IXGBE_READ_REG(hw, IXGBE_RLEC);
   5761 	adapter->stats.rlec.ev_count += rlec;
   5762 
   5763 	/* Hardware workaround, gprc counts missed packets */
   5764 	adapter->stats.gprc.ev_count += IXGBE_READ_REG(hw, IXGBE_GPRC) - missed_rx;
   5765 
   5766 	lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
   5767 	adapter->stats.lxontxc.ev_count += lxon;
   5768 	lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
   5769 	adapter->stats.lxofftxc.ev_count += lxoff;
   5770 	total = lxon + lxoff;
   5771 
   5772 	if (hw->mac.type != ixgbe_mac_82598EB) {
   5773 		adapter->stats.gorc.ev_count += IXGBE_READ_REG(hw, IXGBE_GORCL) +
   5774 		    ((u64)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32);
   5775 		adapter->stats.gotc.ev_count += IXGBE_READ_REG(hw, IXGBE_GOTCL) +
   5776 		    ((u64)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32) - total * ETHER_MIN_LEN;
   5777 		adapter->stats.tor.ev_count += IXGBE_READ_REG(hw, IXGBE_TORL) +
   5778 		    ((u64)IXGBE_READ_REG(hw, IXGBE_TORH) << 32);
   5779 		adapter->stats.lxonrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
   5780 		adapter->stats.lxoffrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
   5781 	} else {
   5782 		adapter->stats.lxonrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
   5783 		adapter->stats.lxoffrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
   5784 		/* 82598 only has a counter in the high register */
   5785 		adapter->stats.gorc.ev_count += IXGBE_READ_REG(hw, IXGBE_GORCH);
   5786 		adapter->stats.gotc.ev_count += IXGBE_READ_REG(hw, IXGBE_GOTCH) - total * ETHER_MIN_LEN;
   5787 		adapter->stats.tor.ev_count += IXGBE_READ_REG(hw, IXGBE_TORH);
   5788 	}
   5789 
   5790 	/*
   5791 	 * Workaround: mprc hardware is incorrectly counting
   5792 	 * broadcasts, so for now we subtract those.
   5793 	 */
   5794 	bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
   5795 	adapter->stats.bprc.ev_count += bprc;
   5796 	adapter->stats.mprc.ev_count += IXGBE_READ_REG(hw, IXGBE_MPRC) - ((hw->mac.type == ixgbe_mac_82598EB) ? bprc : 0);
   5797 
   5798 	adapter->stats.prc64.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC64);
   5799 	adapter->stats.prc127.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC127);
   5800 	adapter->stats.prc255.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC255);
   5801 	adapter->stats.prc511.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC511);
   5802 	adapter->stats.prc1023.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC1023);
   5803 	adapter->stats.prc1522.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC1522);
   5804 
   5805 	adapter->stats.gptc.ev_count += IXGBE_READ_REG(hw, IXGBE_GPTC) - total;
   5806 	adapter->stats.mptc.ev_count += IXGBE_READ_REG(hw, IXGBE_MPTC) - total;
   5807 	adapter->stats.ptc64.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC64) - total;
   5808 
   5809 	adapter->stats.ruc.ev_count += IXGBE_READ_REG(hw, IXGBE_RUC);
   5810 	adapter->stats.rfc.ev_count += IXGBE_READ_REG(hw, IXGBE_RFC);
   5811 	adapter->stats.roc.ev_count += IXGBE_READ_REG(hw, IXGBE_ROC);
   5812 	adapter->stats.rjc.ev_count += IXGBE_READ_REG(hw, IXGBE_RJC);
   5813 	adapter->stats.mngprc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPRC);
   5814 	adapter->stats.mngpdc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPDC);
   5815 	adapter->stats.mngptc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPTC);
   5816 	adapter->stats.tpr.ev_count += IXGBE_READ_REG(hw, IXGBE_TPR);
   5817 	adapter->stats.tpt.ev_count += IXGBE_READ_REG(hw, IXGBE_TPT);
   5818 	adapter->stats.ptc127.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC127);
   5819 	adapter->stats.ptc255.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC255);
   5820 	adapter->stats.ptc511.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC511);
   5821 	adapter->stats.ptc1023.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC1023);
   5822 	adapter->stats.ptc1522.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC1522);
   5823 	adapter->stats.bptc.ev_count += IXGBE_READ_REG(hw, IXGBE_BPTC);
   5824 	adapter->stats.xec.ev_count += IXGBE_READ_REG(hw, IXGBE_XEC);
   5825 	adapter->stats.fccrc.ev_count += IXGBE_READ_REG(hw, IXGBE_FCCRC);
   5826 	adapter->stats.fclast.ev_count += IXGBE_READ_REG(hw, IXGBE_FCLAST);
   5827 
   5828 	/* Only read FCOE on 82599 */
   5829 	if (hw->mac.type != ixgbe_mac_82598EB) {
   5830 		adapter->stats.fcoerpdc.ev_count +=
   5831 		    IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
   5832 		adapter->stats.fcoeprc.ev_count +=
   5833 		    IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
   5834 		adapter->stats.fcoeptc.ev_count +=
   5835 		    IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
   5836 		adapter->stats.fcoedwrc.ev_count +=
   5837 		    IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
   5838 		adapter->stats.fcoedwtc.ev_count +=
   5839 		    IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
   5840 	}
   5841 
   5842 	/* Fill out the OS statistics structure */
   5843 	/*
   5844 	 * NetBSD: Don't override if_{i|o}{packets|bytes|mcasts} with
   5845 	 * adapter->stats counters. It's required to make ifconfig -z
   5846 	 * (SOICZIFDATA) work.
   5847 	 */
   5848 	ifp->if_collisions = 0;
   5849 
   5850 	/* Rx Errors */
   5851 	ifp->if_iqdrops += total_missed_rx;
   5852 	ifp->if_ierrors += crcerrs + rlec;
   5853 }
   5854 
   5855 /** ixgbe_sysctl_tdh_handler - Handler function
   5856  *  Retrieves the TDH value from the hardware
   5857  */
   5858 static int
   5859 ixgbe_sysctl_tdh_handler(SYSCTLFN_ARGS)
   5860 {
   5861 	struct sysctlnode node;
   5862 	uint32_t val;
   5863 	struct tx_ring *txr;
   5864 
   5865 	node = *rnode;
   5866 	txr = (struct tx_ring *)node.sysctl_data;
   5867 	if (txr == NULL)
   5868 		return 0;
   5869 	val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDH(txr->me));
   5870 	node.sysctl_data = &val;
   5871 	return sysctl_lookup(SYSCTLFN_CALL(&node));
   5872 }
   5873 
   5874 /** ixgbe_sysctl_tdt_handler - Handler function
   5875  *  Retrieves the TDT value from the hardware
   5876  */
   5877 static int
   5878 ixgbe_sysctl_tdt_handler(SYSCTLFN_ARGS)
   5879 {
   5880 	struct sysctlnode node;
   5881 	uint32_t val;
   5882 	struct tx_ring *txr;
   5883 
   5884 	node = *rnode;
   5885 	txr = (struct tx_ring *)node.sysctl_data;
   5886 	if (txr == NULL)
   5887 		return 0;
   5888 	val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDT(txr->me));
   5889 	node.sysctl_data = &val;
   5890 	return sysctl_lookup(SYSCTLFN_CALL(&node));
   5891 }
   5892 
   5893 /** ixgbe_sysctl_rdh_handler - Handler function
   5894  *  Retrieves the RDH value from the hardware
   5895  */
   5896 static int
   5897 ixgbe_sysctl_rdh_handler(SYSCTLFN_ARGS)
   5898 {
   5899 	struct sysctlnode node;
   5900 	uint32_t val;
   5901 	struct rx_ring *rxr;
   5902 
   5903 	node = *rnode;
   5904 	rxr = (struct rx_ring *)node.sysctl_data;
   5905 	if (rxr == NULL)
   5906 		return 0;
   5907 	val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDH(rxr->me));
   5908 	node.sysctl_data = &val;
   5909 	return sysctl_lookup(SYSCTLFN_CALL(&node));
   5910 }
   5911 
   5912 /** ixgbe_sysctl_rdt_handler - Handler function
   5913  *  Retrieves the RDT value from the hardware
   5914  */
   5915 static int
   5916 ixgbe_sysctl_rdt_handler(SYSCTLFN_ARGS)
   5917 {
   5918 	struct sysctlnode node;
   5919 	uint32_t val;
   5920 	struct rx_ring *rxr;
   5921 
   5922 	node = *rnode;
   5923 	rxr = (struct rx_ring *)node.sysctl_data;
   5924 	if (rxr == NULL)
   5925 		return 0;
   5926 	val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDT(rxr->me));
   5927 	node.sysctl_data = &val;
   5928 	return sysctl_lookup(SYSCTLFN_CALL(&node));
   5929 }
   5930 
   5931 static int
   5932 ixgbe_sysctl_interrupt_rate_handler(SYSCTLFN_ARGS)
   5933 {
   5934 	int error;
   5935 	struct sysctlnode node;
   5936 	struct ix_queue *que;
   5937 	uint32_t reg, usec, rate;
   5938 
   5939 	node = *rnode;
   5940 	que = (struct ix_queue *)node.sysctl_data;
   5941 	if (que == NULL)
   5942 		return 0;
   5943 	reg = IXGBE_READ_REG(&que->adapter->hw, IXGBE_EITR(que->msix));
   5944 	usec = ((reg & 0x0FF8) >> 3);
   5945 	if (usec > 0)
   5946 		rate = 500000 / usec;
   5947 	else
   5948 		rate = 0;
   5949 	node.sysctl_data = &rate;
   5950 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   5951 	if (error)
   5952 		return error;
   5953 	reg &= ~0xfff; /* default, no limitation */
   5954 	ixgbe_max_interrupt_rate = 0;
   5955 	if (rate > 0 && rate < 500000) {
   5956 		if (rate < 1000)
   5957 			rate = 1000;
   5958 		ixgbe_max_interrupt_rate = rate;
   5959 		reg |= ((4000000/rate) & 0xff8 );
   5960 	}
   5961 	IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg);
   5962 	return 0;
   5963 }
   5964 
   5965 const struct sysctlnode *
   5966 ixgbe_sysctl_instance(struct adapter *adapter)
   5967 {
   5968 	const char *dvname;
   5969 	struct sysctllog **log;
   5970 	int rc;
   5971 	const struct sysctlnode *rnode;
   5972 
   5973 	log = &adapter->sysctllog;
   5974 	dvname = device_xname(adapter->dev);
   5975 
   5976 	if ((rc = sysctl_createv(log, 0, NULL, &rnode,
   5977 	    0, CTLTYPE_NODE, dvname,
   5978 	    SYSCTL_DESCR("ixgbe information and settings"),
   5979 	    NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL)) != 0)
   5980 		goto err;
   5981 
   5982 	return rnode;
   5983 err:
   5984 	printf("%s: sysctl_createv failed, rc = %d\n", __func__, rc);
   5985 	return NULL;
   5986 }
   5987 
   5988 /*
   5989  * Add sysctl variables, one per statistic, to the system.
   5990  */
   5991 static void
   5992 ixgbe_add_hw_stats(struct adapter *adapter)
   5993 {
   5994 	device_t dev = adapter->dev;
   5995 	const struct sysctlnode *rnode, *cnode;
   5996 	struct sysctllog **log = &adapter->sysctllog;
   5997 	struct tx_ring *txr = adapter->tx_rings;
   5998 	struct rx_ring *rxr = adapter->rx_rings;
   5999 	struct ixgbe_hw_stats *stats = &adapter->stats;
   6000 
   6001 	/* Driver Statistics */
   6002 #if 0
   6003 	/* These counters are not updated by the software */
   6004 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
   6005 			CTLFLAG_RD, &adapter->dropped_pkts,
   6006 			"Driver dropped packets");
   6007 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_header_failed",
   6008 			CTLFLAG_RD, &adapter->mbuf_header_failed,
   6009 			"???");
   6010 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_packet_failed",
   6011 			CTLFLAG_RD, &adapter->mbuf_packet_failed,
   6012 			"???");
   6013 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "no_tx_map_avail",
   6014 			CTLFLAG_RD, &adapter->no_tx_map_avail,
   6015 			"???");
   6016 #endif
   6017 	evcnt_attach_dynamic(&adapter->handleq, EVCNT_TYPE_MISC,
   6018 	    NULL, device_xname(dev), "Handled queue in softint");
   6019 	evcnt_attach_dynamic(&adapter->req, EVCNT_TYPE_MISC,
   6020 	    NULL, device_xname(dev), "Requeued in softint");
   6021 	evcnt_attach_dynamic(&adapter->morerx, EVCNT_TYPE_MISC,
   6022 	    NULL, device_xname(dev), "Interrupt handler more rx");
   6023 	evcnt_attach_dynamic(&adapter->moretx, EVCNT_TYPE_MISC,
   6024 	    NULL, device_xname(dev), "Interrupt handler more tx");
   6025 	evcnt_attach_dynamic(&adapter->txloops, EVCNT_TYPE_MISC,
   6026 	    NULL, device_xname(dev), "Interrupt handler tx loops");
   6027 	evcnt_attach_dynamic(&adapter->efbig_tx_dma_setup, EVCNT_TYPE_MISC,
   6028 	    NULL, device_xname(dev), "Driver tx dma soft fail EFBIG");
   6029 	evcnt_attach_dynamic(&adapter->m_defrag_failed, EVCNT_TYPE_MISC,
   6030 	    NULL, device_xname(dev), "m_defrag() failed");
   6031 	evcnt_attach_dynamic(&adapter->efbig2_tx_dma_setup, EVCNT_TYPE_MISC,
   6032 	    NULL, device_xname(dev), "Driver tx dma hard fail EFBIG");
   6033 	evcnt_attach_dynamic(&adapter->einval_tx_dma_setup, EVCNT_TYPE_MISC,
   6034 	    NULL, device_xname(dev), "Driver tx dma hard fail EINVAL");
   6035 	evcnt_attach_dynamic(&adapter->other_tx_dma_setup, EVCNT_TYPE_MISC,
   6036 	    NULL, device_xname(dev), "Driver tx dma hard fail other");
   6037 	evcnt_attach_dynamic(&adapter->eagain_tx_dma_setup, EVCNT_TYPE_MISC,
   6038 	    NULL, device_xname(dev), "Driver tx dma soft fail EAGAIN");
   6039 	evcnt_attach_dynamic(&adapter->enomem_tx_dma_setup, EVCNT_TYPE_MISC,
   6040 	    NULL, device_xname(dev), "Driver tx dma soft fail ENOMEM");
   6041 	evcnt_attach_dynamic(&adapter->watchdog_events, EVCNT_TYPE_MISC,
   6042 	    NULL, device_xname(dev), "Watchdog timeouts");
   6043 	evcnt_attach_dynamic(&adapter->tso_err, EVCNT_TYPE_MISC,
   6044 	    NULL, device_xname(dev), "TSO errors");
   6045 	evcnt_attach_dynamic(&adapter->link_irq, EVCNT_TYPE_MISC,
   6046 	    NULL, device_xname(dev), "Link MSIX IRQ Handled");
   6047 
   6048 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
   6049 		snprintf(adapter->queues[i].evnamebuf,
   6050 		    sizeof(adapter->queues[i].evnamebuf), "%s queue%d",
   6051 		    device_xname(dev), i);
   6052 		snprintf(adapter->queues[i].namebuf,
   6053 		    sizeof(adapter->queues[i].namebuf), "queue%d", i);
   6054 
   6055 		if ((rnode = ixgbe_sysctl_instance(adapter)) == NULL) {
   6056 			aprint_error_dev(dev, "could not create sysctl root\n");
   6057 			break;
   6058 		}
   6059 
   6060 		if (sysctl_createv(log, 0, &rnode, &rnode,
   6061 		    0, CTLTYPE_NODE,
   6062 		    adapter->queues[i].namebuf, SYSCTL_DESCR("Queue Name"),
   6063 		    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL) != 0)
   6064 			break;
   6065 
   6066 		if (sysctl_createv(log, 0, &rnode, &cnode,
   6067 		    CTLFLAG_READWRITE, CTLTYPE_INT,
   6068 		    "interrupt_rate", SYSCTL_DESCR("Interrupt Rate"),
   6069 		    ixgbe_sysctl_interrupt_rate_handler, 0,
   6070 		    (void *)&adapter->queues[i], 0, CTL_CREATE, CTL_EOL) != 0)
   6071 			break;
   6072 
   6073 		if (sysctl_createv(log, 0, &rnode, &cnode,
   6074 		    CTLFLAG_READONLY, CTLTYPE_QUAD,
   6075 		    "irqs", SYSCTL_DESCR("irqs on this queue"),
   6076 			NULL, 0, &(adapter->queues[i].irqs),
   6077 		    0, CTL_CREATE, CTL_EOL) != 0)
   6078 			break;
   6079 
   6080 		if (sysctl_createv(log, 0, &rnode, &cnode,
   6081 		    CTLFLAG_READONLY, CTLTYPE_INT,
   6082 		    "txd_head", SYSCTL_DESCR("Transmit Descriptor Head"),
   6083 		    ixgbe_sysctl_tdh_handler, 0, (void *)txr,
   6084 		    0, CTL_CREATE, CTL_EOL) != 0)
   6085 			break;
   6086 
   6087 		if (sysctl_createv(log, 0, &rnode, &cnode,
   6088 		    CTLFLAG_READONLY, CTLTYPE_INT,
   6089 		    "txd_tail", SYSCTL_DESCR("Transmit Descriptor Tail"),
   6090 		    ixgbe_sysctl_tdt_handler, 0, (void *)txr,
   6091 		    0, CTL_CREATE, CTL_EOL) != 0)
   6092 			break;
   6093 
   6094 		evcnt_attach_dynamic(&txr->tso_tx, EVCNT_TYPE_MISC,
   6095 		    NULL, device_xname(dev), "TSO");
   6096 		evcnt_attach_dynamic(&txr->no_desc_avail, EVCNT_TYPE_MISC,
   6097 		    NULL, adapter->queues[i].evnamebuf,
   6098 		    "Queue No Descriptor Available");
   6099 		evcnt_attach_dynamic(&txr->total_packets, EVCNT_TYPE_MISC,
   6100 		    NULL, adapter->queues[i].evnamebuf,
   6101 		    "Queue Packets Transmitted");
   6102 
   6103 #ifdef LRO
   6104 		struct lro_ctrl *lro = &rxr->lro;
   6105 #endif /* LRO */
   6106 
   6107 		if (sysctl_createv(log, 0, &rnode, &cnode,
   6108 		    CTLFLAG_READONLY,
   6109 		    CTLTYPE_INT,
   6110 		    "rxd_head", SYSCTL_DESCR("Receive Descriptor Head"),
   6111 		    ixgbe_sysctl_rdh_handler, 0, (void *)rxr, 0,
   6112 		    CTL_CREATE, CTL_EOL) != 0)
   6113 			break;
   6114 
   6115 		if (sysctl_createv(log, 0, &rnode, &cnode,
   6116 		    CTLFLAG_READONLY,
   6117 		    CTLTYPE_INT,
   6118 		    "rxd_tail", SYSCTL_DESCR("Receive Descriptor Tail"),
   6119 		    ixgbe_sysctl_rdt_handler, 0, (void *)rxr, 0,
   6120 		    CTL_CREATE, CTL_EOL) != 0)
   6121 			break;
   6122 
   6123 		if (i < __arraycount(adapter->stats.mpc)) {
   6124 			evcnt_attach_dynamic(&adapter->stats.mpc[i],
   6125 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6126 			    "Missed Packet Count");
   6127 		}
   6128 		if (i < __arraycount(adapter->stats.pxontxc)) {
   6129 			evcnt_attach_dynamic(&adapter->stats.pxontxc[i],
   6130 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6131 			    "pxontxc");
   6132 			evcnt_attach_dynamic(&adapter->stats.pxonrxc[i],
   6133 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6134 			    "pxonrxc");
   6135 			evcnt_attach_dynamic(&adapter->stats.pxofftxc[i],
   6136 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6137 			    "pxofftxc");
   6138 			evcnt_attach_dynamic(&adapter->stats.pxoffrxc[i],
   6139 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6140 			    "pxoffrxc");
   6141 			evcnt_attach_dynamic(&adapter->stats.pxon2offc[i],
   6142 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6143 			    "pxon2offc");
   6144 		}
   6145 		if (i < __arraycount(adapter->stats.qprc)) {
   6146 			evcnt_attach_dynamic(&adapter->stats.qprc[i],
   6147 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6148 			    "qprc");
   6149 			evcnt_attach_dynamic(&adapter->stats.qptc[i],
   6150 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6151 			    "qptc");
   6152 			evcnt_attach_dynamic(&adapter->stats.qbrc[i],
   6153 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6154 			    "qbrc");
   6155 			evcnt_attach_dynamic(&adapter->stats.qbtc[i],
   6156 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6157 			    "qbtc");
   6158 			evcnt_attach_dynamic(&adapter->stats.qprdc[i],
   6159 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6160 			    "qprdc");
   6161 		}
   6162 
   6163 		evcnt_attach_dynamic(&rxr->rx_packets, EVCNT_TYPE_MISC,
   6164 		    NULL, adapter->queues[i].evnamebuf, "Queue Packets Received");
   6165 		evcnt_attach_dynamic(&rxr->rx_bytes, EVCNT_TYPE_MISC,
   6166 		    NULL, adapter->queues[i].evnamebuf, "Queue Bytes Received");
   6167 		evcnt_attach_dynamic(&rxr->rx_copies, EVCNT_TYPE_MISC,
   6168 		    NULL, adapter->queues[i].evnamebuf, "Copied RX Frames");
   6169 		evcnt_attach_dynamic(&rxr->no_jmbuf, EVCNT_TYPE_MISC,
   6170 		    NULL, adapter->queues[i].evnamebuf, "Rx no jumbo mbuf");
   6171 		evcnt_attach_dynamic(&rxr->rx_discarded, EVCNT_TYPE_MISC,
   6172 		    NULL, adapter->queues[i].evnamebuf, "Rx discarded");
   6173 		evcnt_attach_dynamic(&rxr->rx_irq, EVCNT_TYPE_MISC,
   6174 		    NULL, adapter->queues[i].evnamebuf, "Rx interrupts");
   6175 #ifdef LRO
   6176 		SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_queued",
   6177 				CTLFLAG_RD, &lro->lro_queued, 0,
   6178 				"LRO Queued");
   6179 		SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_flushed",
   6180 				CTLFLAG_RD, &lro->lro_flushed, 0,
   6181 				"LRO Flushed");
   6182 #endif /* LRO */
   6183 	}
   6184 
   6185 	/* MAC stats get the own sub node */
   6186 
   6187 
   6188 	snprintf(stats->namebuf,
   6189 	    sizeof(stats->namebuf), "%s MAC Statistics", device_xname(dev));
   6190 
   6191 	evcnt_attach_dynamic(&stats->ipcs, EVCNT_TYPE_MISC, NULL,
   6192 	    stats->namebuf, "rx csum offload - IP");
   6193 	evcnt_attach_dynamic(&stats->l4cs, EVCNT_TYPE_MISC, NULL,
   6194 	    stats->namebuf, "rx csum offload - L4");
   6195 	evcnt_attach_dynamic(&stats->ipcs_bad, EVCNT_TYPE_MISC, NULL,
   6196 	    stats->namebuf, "rx csum offload - IP bad");
   6197 	evcnt_attach_dynamic(&stats->l4cs_bad, EVCNT_TYPE_MISC, NULL,
   6198 	    stats->namebuf, "rx csum offload - L4 bad");
   6199 	evcnt_attach_dynamic(&stats->intzero, EVCNT_TYPE_MISC, NULL,
   6200 	    stats->namebuf, "Interrupt conditions zero");
   6201 	evcnt_attach_dynamic(&stats->legint, EVCNT_TYPE_MISC, NULL,
   6202 	    stats->namebuf, "Legacy interrupts");
   6203 	evcnt_attach_dynamic(&stats->crcerrs, EVCNT_TYPE_MISC, NULL,
   6204 	    stats->namebuf, "CRC Errors");
   6205 	evcnt_attach_dynamic(&stats->illerrc, EVCNT_TYPE_MISC, NULL,
   6206 	    stats->namebuf, "Illegal Byte Errors");
   6207 	evcnt_attach_dynamic(&stats->errbc, EVCNT_TYPE_MISC, NULL,
   6208 	    stats->namebuf, "Byte Errors");
   6209 	evcnt_attach_dynamic(&stats->mspdc, EVCNT_TYPE_MISC, NULL,
   6210 	    stats->namebuf, "MAC Short Packets Discarded");
   6211 	evcnt_attach_dynamic(&stats->mlfc, EVCNT_TYPE_MISC, NULL,
   6212 	    stats->namebuf, "MAC Local Faults");
   6213 	evcnt_attach_dynamic(&stats->mrfc, EVCNT_TYPE_MISC, NULL,
   6214 	    stats->namebuf, "MAC Remote Faults");
   6215 	evcnt_attach_dynamic(&stats->rlec, EVCNT_TYPE_MISC, NULL,
   6216 	    stats->namebuf, "Receive Length Errors");
   6217 	evcnt_attach_dynamic(&stats->lxontxc, EVCNT_TYPE_MISC, NULL,
   6218 	    stats->namebuf, "Link XON Transmitted");
   6219 	evcnt_attach_dynamic(&stats->lxonrxc, EVCNT_TYPE_MISC, NULL,
   6220 	    stats->namebuf, "Link XON Received");
   6221 	evcnt_attach_dynamic(&stats->lxofftxc, EVCNT_TYPE_MISC, NULL,
   6222 	    stats->namebuf, "Link XOFF Transmitted");
   6223 	evcnt_attach_dynamic(&stats->lxoffrxc, EVCNT_TYPE_MISC, NULL,
   6224 	    stats->namebuf, "Link XOFF Received");
   6225 
   6226 	/* Packet Reception Stats */
   6227 	evcnt_attach_dynamic(&stats->tor, EVCNT_TYPE_MISC, NULL,
   6228 	    stats->namebuf, "Total Octets Received");
   6229 	evcnt_attach_dynamic(&stats->gorc, EVCNT_TYPE_MISC, NULL,
   6230 	    stats->namebuf, "Good Octets Received");
   6231 	evcnt_attach_dynamic(&stats->tpr, EVCNT_TYPE_MISC, NULL,
   6232 	    stats->namebuf, "Total Packets Received");
   6233 	evcnt_attach_dynamic(&stats->gprc, EVCNT_TYPE_MISC, NULL,
   6234 	    stats->namebuf, "Good Packets Received");
   6235 	evcnt_attach_dynamic(&stats->mprc, EVCNT_TYPE_MISC, NULL,
   6236 	    stats->namebuf, "Multicast Packets Received");
   6237 	evcnt_attach_dynamic(&stats->bprc, EVCNT_TYPE_MISC, NULL,
   6238 	    stats->namebuf, "Broadcast Packets Received");
   6239 	evcnt_attach_dynamic(&stats->prc64, EVCNT_TYPE_MISC, NULL,
   6240 	    stats->namebuf, "64 byte frames received ");
   6241 	evcnt_attach_dynamic(&stats->prc127, EVCNT_TYPE_MISC, NULL,
   6242 	    stats->namebuf, "65-127 byte frames received");
   6243 	evcnt_attach_dynamic(&stats->prc255, EVCNT_TYPE_MISC, NULL,
   6244 	    stats->namebuf, "128-255 byte frames received");
   6245 	evcnt_attach_dynamic(&stats->prc511, EVCNT_TYPE_MISC, NULL,
   6246 	    stats->namebuf, "256-511 byte frames received");
   6247 	evcnt_attach_dynamic(&stats->prc1023, EVCNT_TYPE_MISC, NULL,
   6248 	    stats->namebuf, "512-1023 byte frames received");
   6249 	evcnt_attach_dynamic(&stats->prc1522, EVCNT_TYPE_MISC, NULL,
   6250 	    stats->namebuf, "1023-1522 byte frames received");
   6251 	evcnt_attach_dynamic(&stats->ruc, EVCNT_TYPE_MISC, NULL,
   6252 	    stats->namebuf, "Receive Undersized");
   6253 	evcnt_attach_dynamic(&stats->rfc, EVCNT_TYPE_MISC, NULL,
   6254 	    stats->namebuf, "Fragmented Packets Received ");
   6255 	evcnt_attach_dynamic(&stats->roc, EVCNT_TYPE_MISC, NULL,
   6256 	    stats->namebuf, "Oversized Packets Received");
   6257 	evcnt_attach_dynamic(&stats->rjc, EVCNT_TYPE_MISC, NULL,
   6258 	    stats->namebuf, "Received Jabber");
   6259 	evcnt_attach_dynamic(&stats->mngprc, EVCNT_TYPE_MISC, NULL,
   6260 	    stats->namebuf, "Management Packets Received");
   6261 	evcnt_attach_dynamic(&stats->xec, EVCNT_TYPE_MISC, NULL,
   6262 	    stats->namebuf, "Checksum Errors");
   6263 
   6264 	/* Packet Transmission Stats */
   6265 	evcnt_attach_dynamic(&stats->gotc, EVCNT_TYPE_MISC, NULL,
   6266 	    stats->namebuf, "Good Octets Transmitted");
   6267 	evcnt_attach_dynamic(&stats->tpt, EVCNT_TYPE_MISC, NULL,
   6268 	    stats->namebuf, "Total Packets Transmitted");
   6269 	evcnt_attach_dynamic(&stats->gptc, EVCNT_TYPE_MISC, NULL,
   6270 	    stats->namebuf, "Good Packets Transmitted");
   6271 	evcnt_attach_dynamic(&stats->bptc, EVCNT_TYPE_MISC, NULL,
   6272 	    stats->namebuf, "Broadcast Packets Transmitted");
   6273 	evcnt_attach_dynamic(&stats->mptc, EVCNT_TYPE_MISC, NULL,
   6274 	    stats->namebuf, "Multicast Packets Transmitted");
   6275 	evcnt_attach_dynamic(&stats->mngptc, EVCNT_TYPE_MISC, NULL,
   6276 	    stats->namebuf, "Management Packets Transmitted");
   6277 	evcnt_attach_dynamic(&stats->ptc64, EVCNT_TYPE_MISC, NULL,
   6278 	    stats->namebuf, "64 byte frames transmitted ");
   6279 	evcnt_attach_dynamic(&stats->ptc127, EVCNT_TYPE_MISC, NULL,
   6280 	    stats->namebuf, "65-127 byte frames transmitted");
   6281 	evcnt_attach_dynamic(&stats->ptc255, EVCNT_TYPE_MISC, NULL,
   6282 	    stats->namebuf, "128-255 byte frames transmitted");
   6283 	evcnt_attach_dynamic(&stats->ptc511, EVCNT_TYPE_MISC, NULL,
   6284 	    stats->namebuf, "256-511 byte frames transmitted");
   6285 	evcnt_attach_dynamic(&stats->ptc1023, EVCNT_TYPE_MISC, NULL,
   6286 	    stats->namebuf, "512-1023 byte frames transmitted");
   6287 	evcnt_attach_dynamic(&stats->ptc1522, EVCNT_TYPE_MISC, NULL,
   6288 	    stats->namebuf, "1024-1522 byte frames transmitted");
   6289 }
   6290 
   6291 /*
   6292 ** Set flow control using sysctl:
   6293 ** Flow control values:
   6294 ** 	0 - off
   6295 **	1 - rx pause
   6296 **	2 - tx pause
   6297 **	3 - full
   6298 */
   6299 static int
   6300 ixgbe_set_flowcntl(SYSCTLFN_ARGS)
   6301 {
   6302 	struct sysctlnode node;
   6303 	int error, last;
   6304 	struct adapter *adapter;
   6305 
   6306 	node = *rnode;
   6307 	adapter = (struct adapter *)node.sysctl_data;
   6308 	node.sysctl_data = &adapter->fc;
   6309 	last = adapter->fc;
   6310 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   6311 	if (error != 0 || newp == NULL)
   6312 		return error;
   6313 
   6314 	/* Don't bother if it's not changed */
   6315 	if (adapter->fc == last)
   6316 		return (0);
   6317 
   6318 	switch (adapter->fc) {
   6319 		case ixgbe_fc_rx_pause:
   6320 		case ixgbe_fc_tx_pause:
   6321 		case ixgbe_fc_full:
   6322 			adapter->hw.fc.requested_mode = adapter->fc;
   6323 			if (adapter->num_queues > 1)
   6324 				ixgbe_disable_rx_drop(adapter);
   6325 			break;
   6326 		case ixgbe_fc_none:
   6327 			adapter->hw.fc.requested_mode = ixgbe_fc_none;
   6328 			if (adapter->num_queues > 1)
   6329 				ixgbe_enable_rx_drop(adapter);
   6330 			break;
   6331 		default:
   6332 			adapter->fc = last;
   6333 			return (EINVAL);
   6334 	}
   6335 	/* Don't autoneg if forcing a value */
   6336 	adapter->hw.fc.disable_fc_autoneg = TRUE;
   6337 	ixgbe_fc_enable(&adapter->hw);
   6338 	return 0;
   6339 }
   6340 
   6341 
   6342 /*
   6343 ** Control link advertise speed:
   6344 **	1 - advertise only 1G
   6345 **	2 - advertise 100Mb
   6346 **	3 - advertise normal
   6347 */
   6348 static int
   6349 ixgbe_set_advertise(SYSCTLFN_ARGS)
   6350 {
   6351 	struct sysctlnode	node;
   6352 	int			t, error = 0;
   6353 	struct adapter		*adapter;
   6354 	device_t		dev;
   6355 	struct ixgbe_hw		*hw;
   6356 	ixgbe_link_speed	speed, last;
   6357 
   6358 	node = *rnode;
   6359 	adapter = (struct adapter *)node.sysctl_data;
   6360 	dev = adapter->dev;
   6361 	hw = &adapter->hw;
   6362 	last = adapter->advertise;
   6363 	t = adapter->advertise;
   6364 	node.sysctl_data = &t;
   6365 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   6366 	if (error != 0 || newp == NULL)
   6367 		return error;
   6368 
   6369 	if (adapter->advertise == last) /* no change */
   6370 		return (0);
   6371 
   6372 	if (t == -1)
   6373 		return 0;
   6374 
   6375 	adapter->advertise = t;
   6376 
   6377 	if (!((hw->phy.media_type == ixgbe_media_type_copper) ||
   6378             (hw->phy.multispeed_fiber)))
   6379 		return (EINVAL);
   6380 
   6381 	if ((adapter->advertise == 2) && (hw->mac.type != ixgbe_mac_X540)) {
   6382 		device_printf(dev, "Set Advertise: 100Mb on X540 only\n");
   6383 		return (EINVAL);
   6384 	}
   6385 
   6386 	if (adapter->advertise == 1)
   6387                 speed = IXGBE_LINK_SPEED_1GB_FULL;
   6388 	else if (adapter->advertise == 2)
   6389                 speed = IXGBE_LINK_SPEED_100_FULL;
   6390 	else if (adapter->advertise == 3)
   6391                 speed = IXGBE_LINK_SPEED_1GB_FULL |
   6392 			IXGBE_LINK_SPEED_10GB_FULL;
   6393 	else {/* bogus value */
   6394 		adapter->advertise = last;
   6395 		return (EINVAL);
   6396 	}
   6397 
   6398 	hw->mac.autotry_restart = TRUE;
   6399 	hw->mac.ops.setup_link(hw, speed, TRUE);
   6400 
   6401 	return 0;
   6402 }
   6403 
   6404 /*
   6405 ** Thermal Shutdown Trigger
   6406 **   - cause a Thermal Overtemp IRQ
   6407 **   - this now requires firmware enabling
   6408 */
   6409 static int
   6410 ixgbe_set_thermal_test(SYSCTLFN_ARGS)
   6411 {
   6412 	struct sysctlnode node;
   6413 	int		error, fire = 0;
   6414 	struct adapter	*adapter;
   6415 	struct ixgbe_hw *hw;
   6416 
   6417 	node = *rnode;
   6418 	adapter = (struct adapter *)node.sysctl_data;
   6419 	hw = &adapter->hw;
   6420 
   6421 	if (hw->mac.type != ixgbe_mac_X540)
   6422 		return (0);
   6423 
   6424 	node.sysctl_data = &fire;
   6425 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   6426 	if ((error) || (newp == NULL))
   6427 		return (error);
   6428 
   6429 	if (fire) {
   6430 		u32 reg = IXGBE_READ_REG(hw, IXGBE_EICS);
   6431 		reg |= IXGBE_EICR_TS;
   6432 		IXGBE_WRITE_REG(hw, IXGBE_EICS, reg);
   6433 	}
   6434 
   6435 	return (0);
   6436 }
   6437 
   6438 /*
   6439 ** Enable the hardware to drop packets when the buffer is
   6440 ** full. This is useful when multiqueue,so that no single
   6441 ** queue being full stalls the entire RX engine. We only
   6442 ** enable this when Multiqueue AND when Flow Control is
   6443 ** disabled.
   6444 */
   6445 static void
   6446 ixgbe_enable_rx_drop(struct adapter *adapter)
   6447 {
   6448         struct ixgbe_hw *hw = &adapter->hw;
   6449 
   6450 	for (int i = 0; i < adapter->num_queues; i++) {
   6451         	u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
   6452         	srrctl |= IXGBE_SRRCTL_DROP_EN;
   6453         	IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
   6454 	}
   6455 }
   6456 
   6457 static void
   6458 ixgbe_disable_rx_drop(struct adapter *adapter)
   6459 {
   6460         struct ixgbe_hw *hw = &adapter->hw;
   6461 
   6462 	for (int i = 0; i < adapter->num_queues; i++) {
   6463         	u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
   6464         	srrctl &= ~IXGBE_SRRCTL_DROP_EN;
   6465         	IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
   6466 	}
   6467 }
   6468