Home | History | Annotate | Line # | Download | only in ixgbe
ixgbe.c revision 1.33
      1 /******************************************************************************
      2 
      3   Copyright (c) 2001-2013, Intel Corporation
      4   All rights reserved.
      5 
      6   Redistribution and use in source and binary forms, with or without
      7   modification, are permitted provided that the following conditions are met:
      8 
      9    1. Redistributions of source code must retain the above copyright notice,
     10       this list of conditions and the following disclaimer.
     11 
     12    2. Redistributions in binary form must reproduce the above copyright
     13       notice, this list of conditions and the following disclaimer in the
     14       documentation and/or other materials provided with the distribution.
     15 
     16    3. Neither the name of the Intel Corporation nor the names of its
     17       contributors may be used to endorse or promote products derived from
     18       this software without specific prior written permission.
     19 
     20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     30   POSSIBILITY OF SUCH DAMAGE.
     31 
     32 ******************************************************************************/
     33 /*
     34  * Copyright (c) 2011 The NetBSD Foundation, Inc.
     35  * All rights reserved.
     36  *
     37  * This code is derived from software contributed to The NetBSD Foundation
     38  * by Coyote Point Systems, Inc.
     39  *
     40  * Redistribution and use in source and binary forms, with or without
     41  * modification, are permitted provided that the following conditions
     42  * are met:
     43  * 1. Redistributions of source code must retain the above copyright
     44  *    notice, this list of conditions and the following disclaimer.
     45  * 2. Redistributions in binary form must reproduce the above copyright
     46  *    notice, this list of conditions and the following disclaimer in the
     47  *    documentation and/or other materials provided with the distribution.
     48  *
     49  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     50  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     51  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     52  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     53  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     54  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     55  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     56  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     57  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     58  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     59  * POSSIBILITY OF SUCH DAMAGE.
     60  */
     61 /*$FreeBSD: head/sys/dev/ixgbe/ixgbe.c 279805 2015-03-09 10:29:15Z araujo $*/
     62 /*$NetBSD: ixgbe.c,v 1.33 2015/08/05 04:08:44 msaitoh Exp $*/
     63 
     64 #include "opt_inet.h"
     65 #include "opt_inet6.h"
     66 
     67 #include "ixgbe.h"
     68 #include "vlan.h"
     69 
     70 #include <sys/cprng.h>
     71 
     72 /*********************************************************************
     73  *  Set this to one to display debug statistics
     74  *********************************************************************/
     75 int             ixgbe_display_debug_stats = 0;
     76 
     77 /*********************************************************************
     78  *  Driver version
     79  *********************************************************************/
     80 char ixgbe_driver_version[] = "2.5.15";
     81 
     82 /*********************************************************************
     83  *  PCI Device ID Table
     84  *
     85  *  Used by probe to select devices to load on
     86  *  Last field stores an index into ixgbe_strings
     87  *  Last entry must be all 0s
     88  *
     89  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
     90  *********************************************************************/
     91 
     92 static ixgbe_vendor_info_t ixgbe_vendor_info_array[] =
     93 {
     94 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, 0, 0, 0},
     95 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, 0, 0, 0},
     96 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, 0, 0, 0},
     97 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, 0, 0, 0},
     98 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2, 0, 0, 0},
     99 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, 0, 0, 0},
    100 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, 0, 0, 0},
    101 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, 0, 0, 0},
    102 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, 0, 0, 0},
    103 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, 0, 0, 0},
    104 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, 0, 0, 0},
    105 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, 0, 0, 0},
    106 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ, 0, 0, 0},
    107 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, 0, 0, 0},
    108 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, 0, 0, 0},
    109 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, 0, 0, 0},
    110 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, 0, 0, 0},
    111 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, 0, 0, 0},
    112 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE, 0, 0, 0},
    113 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2, 0, 0, 0},
    114 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, 0, 0, 0},
    115 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, 0, 0, 0},
    116 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF_QP, 0, 0, 0},
    117 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, 0, 0, 0},
    118 	/* required last entry */
    119 	{0, 0, 0, 0, 0}
    120 };
    121 
    122 /*********************************************************************
    123  *  Table of branding strings
    124  *********************************************************************/
    125 
    126 static const char    *ixgbe_strings[] = {
    127 	"Intel(R) PRO/10GbE PCI-Express Network Driver"
    128 };
    129 
    130 /*********************************************************************
    131  *  Function prototypes
    132  *********************************************************************/
    133 static int      ixgbe_probe(device_t, cfdata_t, void *);
    134 static void     ixgbe_attach(device_t, device_t, void *);
    135 static int      ixgbe_detach(device_t, int);
    136 #if 0
    137 static int      ixgbe_shutdown(device_t);
    138 #endif
    139 #if IXGBE_LEGACY_TX
    140 static void     ixgbe_start(struct ifnet *);
    141 static void     ixgbe_start_locked(struct tx_ring *, struct ifnet *);
    142 #else
    143 static int	ixgbe_mq_start(struct ifnet *, struct mbuf *);
    144 static int	ixgbe_mq_start_locked(struct ifnet *, struct tx_ring *);
    145 static void	ixgbe_qflush(struct ifnet *);
    146 static void	ixgbe_deferred_mq_start(void *);
    147 #endif
    148 static int      ixgbe_ioctl(struct ifnet *, u_long, void *);
    149 static void	ixgbe_ifstop(struct ifnet *, int);
    150 static int	ixgbe_init(struct ifnet *);
    151 static void	ixgbe_init_locked(struct adapter *);
    152 static void     ixgbe_stop(void *);
    153 static void     ixgbe_media_status(struct ifnet *, struct ifmediareq *);
    154 static int      ixgbe_media_change(struct ifnet *);
    155 static void     ixgbe_identify_hardware(struct adapter *);
    156 static int      ixgbe_allocate_pci_resources(struct adapter *,
    157 		    const struct pci_attach_args *);
    158 static void	ixgbe_get_slot_info(struct ixgbe_hw *);
    159 static int      ixgbe_allocate_msix(struct adapter *,
    160 		    const struct pci_attach_args *);
    161 static int      ixgbe_allocate_legacy(struct adapter *,
    162 		    const struct pci_attach_args *);
    163 static int	ixgbe_allocate_queues(struct adapter *);
    164 static int	ixgbe_setup_msix(struct adapter *);
    165 static void	ixgbe_free_pci_resources(struct adapter *);
    166 static void	ixgbe_local_timer(void *);
    167 static int	ixgbe_setup_interface(device_t, struct adapter *);
    168 static void	ixgbe_config_link(struct adapter *);
    169 
    170 static int      ixgbe_allocate_transmit_buffers(struct tx_ring *);
    171 static int	ixgbe_setup_transmit_structures(struct adapter *);
    172 static void	ixgbe_setup_transmit_ring(struct tx_ring *);
    173 static void     ixgbe_initialize_transmit_units(struct adapter *);
    174 static void     ixgbe_free_transmit_structures(struct adapter *);
    175 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
    176 
    177 static int      ixgbe_allocate_receive_buffers(struct rx_ring *);
    178 static int      ixgbe_setup_receive_structures(struct adapter *);
    179 static int	ixgbe_setup_receive_ring(struct rx_ring *);
    180 static void     ixgbe_initialize_receive_units(struct adapter *);
    181 static void     ixgbe_free_receive_structures(struct adapter *);
    182 static void     ixgbe_free_receive_buffers(struct rx_ring *);
    183 static void	ixgbe_setup_hw_rsc(struct rx_ring *);
    184 
    185 static void     ixgbe_enable_intr(struct adapter *);
    186 static void     ixgbe_disable_intr(struct adapter *);
    187 static void     ixgbe_update_stats_counters(struct adapter *);
    188 static void	ixgbe_txeof(struct tx_ring *);
    189 static bool	ixgbe_rxeof(struct ix_queue *);
    190 static void	ixgbe_rx_checksum(u32, struct mbuf *, u32,
    191 		    struct ixgbe_hw_stats *);
    192 static void     ixgbe_set_promisc(struct adapter *);
    193 static void     ixgbe_set_multi(struct adapter *);
    194 static void     ixgbe_update_link_status(struct adapter *);
    195 static void	ixgbe_refresh_mbufs(struct rx_ring *, int);
    196 static int      ixgbe_xmit(struct tx_ring *, struct mbuf *);
    197 static int	ixgbe_set_flowcntl(SYSCTLFN_PROTO);
    198 static int	ixgbe_set_advertise(SYSCTLFN_PROTO);
    199 static int	ixgbe_set_thermal_test(SYSCTLFN_PROTO);
    200 static int	ixgbe_dma_malloc(struct adapter *, bus_size_t,
    201 		    struct ixgbe_dma_alloc *, int);
    202 static void     ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
    203 static int	ixgbe_tx_ctx_setup(struct tx_ring *,
    204 		    struct mbuf *, u32 *, u32 *);
    205 static int	ixgbe_tso_setup(struct tx_ring *,
    206 		    struct mbuf *, u32 *, u32 *);
    207 static void	ixgbe_set_ivar(struct adapter *, u8, u8, s8);
    208 static void	ixgbe_configure_ivars(struct adapter *);
    209 static u8 *	ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *);
    210 
    211 static void	ixgbe_setup_vlan_hw_support(struct adapter *);
    212 #if 0
    213 static void	ixgbe_register_vlan(void *, struct ifnet *, u16);
    214 static void	ixgbe_unregister_vlan(void *, struct ifnet *, u16);
    215 #endif
    216 
    217 static void     ixgbe_add_hw_stats(struct adapter *adapter);
    218 
    219 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
    220 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
    221 		    struct mbuf *, u32);
    222 
    223 static void	ixgbe_enable_rx_drop(struct adapter *);
    224 static void	ixgbe_disable_rx_drop(struct adapter *);
    225 
    226 /* Support for pluggable optic modules */
    227 static bool	ixgbe_sfp_probe(struct adapter *);
    228 static void	ixgbe_setup_optics(struct adapter *);
    229 
    230 /* Legacy (single vector interrupt handler */
    231 static int	ixgbe_legacy_irq(void *);
    232 
    233 #if defined(NETBSD_MSI_OR_MSIX)
    234 /* The MSI/X Interrupt handlers */
    235 static void	ixgbe_msix_que(void *);
    236 static void	ixgbe_msix_link(void *);
    237 #endif
    238 
    239 /* Software interrupts for deferred work */
    240 static void	ixgbe_handle_que(void *);
    241 static void	ixgbe_handle_link(void *);
    242 static void	ixgbe_handle_msf(void *);
    243 static void	ixgbe_handle_mod(void *);
    244 
    245 const struct sysctlnode *ixgbe_sysctl_instance(struct adapter *);
    246 static ixgbe_vendor_info_t *ixgbe_lookup(const struct pci_attach_args *);
    247 
    248 #ifdef IXGBE_FDIR
    249 static void	ixgbe_atr(struct tx_ring *, struct mbuf *);
    250 static void	ixgbe_reinit_fdir(void *, int);
    251 #endif
    252 
    253 /* Missing shared code prototype */
    254 extern void ixgbe_stop_mac_link_on_d3_82599(struct ixgbe_hw *hw);
    255 
    256 /*********************************************************************
    257  *  FreeBSD Device Interface Entry Points
    258  *********************************************************************/
    259 
    260 CFATTACH_DECL3_NEW(ixg, sizeof(struct adapter),
    261     ixgbe_probe, ixgbe_attach, ixgbe_detach, NULL, NULL, NULL,
    262     DVF_DETACH_SHUTDOWN);
    263 
    264 #if 0
    265 devclass_t ixgbe_devclass;
    266 DRIVER_MODULE(ixgbe, pci, ixgbe_driver, ixgbe_devclass, 0, 0);
    267 
    268 MODULE_DEPEND(ixgbe, pci, 1, 1, 1);
    269 MODULE_DEPEND(ixgbe, ether, 1, 1, 1);
    270 #endif
    271 
    272 /*
    273 ** TUNEABLE PARAMETERS:
    274 */
    275 
    276 /*
    277 ** AIM: Adaptive Interrupt Moderation
    278 ** which means that the interrupt rate
    279 ** is varied over time based on the
    280 ** traffic for that interrupt vector
    281 */
    282 static int ixgbe_enable_aim = TRUE;
    283 #define SYSCTL_INT(__x, __y)
    284 SYSCTL_INT("hw.ixgbe.enable_aim", &ixgbe_enable_aim);
    285 
    286 static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY);
    287 SYSCTL_INT("hw.ixgbe.max_interrupt_rate", &ixgbe_max_interrupt_rate);
    288 
    289 /* How many packets rxeof tries to clean at a time */
    290 static int ixgbe_rx_process_limit = 256;
    291 SYSCTL_INT("hw.ixgbe.rx_process_limit", &ixgbe_rx_process_limit);
    292 
    293 /* How many packets txeof tries to clean at a time */
    294 static int ixgbe_tx_process_limit = 256;
    295 SYSCTL_INT("hw.ixgbe.tx_process_limit", &ixgbe_tx_process_limit);
    296 
    297 /*
    298 ** Smart speed setting, default to on
    299 ** this only works as a compile option
    300 ** right now as its during attach, set
    301 ** this to 'ixgbe_smart_speed_off' to
    302 ** disable.
    303 */
    304 static int ixgbe_smart_speed = ixgbe_smart_speed_on;
    305 
    306 /*
    307  * MSIX should be the default for best performance,
    308  * but this allows it to be forced off for testing.
    309  */
    310 static int ixgbe_enable_msix = 1;
    311 SYSCTL_INT("hw.ixgbe.enable_msix", &ixgbe_enable_msix);
    312 
    313 #if defined(NETBSD_MSI_OR_MSIX)
    314 /*
    315  * Number of Queues, can be set to 0,
    316  * it then autoconfigures based on the
    317  * number of cpus with a max of 8. This
    318  * can be overriden manually here.
    319  */
    320 static int ixgbe_num_queues = 0;
    321 SYSCTL_INT("hw.ixgbe.num_queues", &ixgbe_num_queues);
    322 #endif
    323 
    324 /*
    325 ** Number of TX descriptors per ring,
    326 ** setting higher than RX as this seems
    327 ** the better performing choice.
    328 */
    329 static int ixgbe_txd = PERFORM_TXD;
    330 SYSCTL_INT("hw.ixgbe.txd", &ixgbe_txd);
    331 
    332 /* Number of RX descriptors per ring */
    333 static int ixgbe_rxd = PERFORM_RXD;
    334 SYSCTL_INT("hw.ixgbe.rxd", &ixgbe_rxd);
    335 
    336 /*
    337 ** Defining this on will allow the use
    338 ** of unsupported SFP+ modules, note that
    339 ** doing so you are on your own :)
    340 */
    341 static int allow_unsupported_sfp = true;
    342 SYSCTL_INT("hw.ix.unsupported_sfp", &allow_unsupported_sfp);
    343 
    344 /*
    345 ** HW RSC control:
    346 **  this feature only works with
    347 **  IPv4, and only on 82599 and later.
    348 **  Also this will cause IP forwarding to
    349 **  fail and that can't be controlled by
    350 **  the stack as LRO can. For all these
    351 **  reasons I've deemed it best to leave
    352 **  this off and not bother with a tuneable
    353 **  interface, this would need to be compiled
    354 **  to enable.
    355 */
    356 static bool ixgbe_rsc_enable = FALSE;
    357 
    358 /* Keep running tab on them for sanity check */
    359 static int ixgbe_total_ports;
    360 
    361 #ifdef IXGBE_FDIR
    362 /*
    363 ** For Flow Director: this is the
    364 ** number of TX packets we sample
    365 ** for the filter pool, this means
    366 ** every 20th packet will be probed.
    367 **
    368 ** This feature can be disabled by
    369 ** setting this to 0.
    370 */
    371 static int atr_sample_rate = 20;
    372 /*
    373 ** Flow Director actually 'steals'
    374 ** part of the packet buffer as its
    375 ** filter pool, this variable controls
    376 ** how much it uses:
    377 **  0 = 64K, 1 = 128K, 2 = 256K
    378 */
    379 static int fdir_pballoc = 1;
    380 #endif
    381 
    382 #ifdef DEV_NETMAP
    383 /*
    384  * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
    385  * be a reference on how to implement netmap support in a driver.
    386  * Additional comments are in ixgbe_netmap.h .
    387  *
    388  * <dev/netmap/ixgbe_netmap.h> contains functions for netmap support
    389  * that extend the standard driver.
    390  */
    391 #include <dev/netmap/ixgbe_netmap.h>
    392 #endif /* DEV_NETMAP */
    393 
    394 /*********************************************************************
    395  *  Device identification routine
    396  *
    397  *  ixgbe_probe determines if the driver should be loaded on
    398  *  adapter based on PCI vendor/device id of the adapter.
    399  *
    400  *  return 1 on success, 0 on failure
    401  *********************************************************************/
    402 
    403 static int
    404 ixgbe_probe(device_t dev, cfdata_t cf, void *aux)
    405 {
    406 	const struct pci_attach_args *pa = aux;
    407 
    408 	return (ixgbe_lookup(pa) != NULL) ? 1 : 0;
    409 }
    410 
    411 static ixgbe_vendor_info_t *
    412 ixgbe_lookup(const struct pci_attach_args *pa)
    413 {
    414 	pcireg_t subid;
    415 	ixgbe_vendor_info_t *ent;
    416 
    417 	INIT_DEBUGOUT("ixgbe_probe: begin");
    418 
    419 	if (PCI_VENDOR(pa->pa_id) != IXGBE_INTEL_VENDOR_ID)
    420 		return NULL;
    421 
    422 	subid = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_SUBSYS_ID_REG);
    423 
    424 	for (ent = ixgbe_vendor_info_array; ent->vendor_id != 0; ent++) {
    425 		if (PCI_VENDOR(pa->pa_id) == ent->vendor_id &&
    426 		    PCI_PRODUCT(pa->pa_id) == ent->device_id &&
    427 
    428 		    (PCI_SUBSYS_VENDOR(subid) == ent->subvendor_id ||
    429 		     ent->subvendor_id == 0) &&
    430 
    431 		    (PCI_SUBSYS_ID(subid) == ent->subdevice_id ||
    432 		     ent->subdevice_id == 0)) {
    433 			++ixgbe_total_ports;
    434 			return ent;
    435 		}
    436 	}
    437 	return NULL;
    438 }
    439 
    440 
    441 static void
    442 ixgbe_sysctl_attach(struct adapter *adapter)
    443 {
    444 	struct sysctllog **log;
    445 	const struct sysctlnode *rnode, *cnode;
    446 	device_t dev;
    447 
    448 	dev = adapter->dev;
    449 	log = &adapter->sysctllog;
    450 
    451 	if ((rnode = ixgbe_sysctl_instance(adapter)) == NULL) {
    452 		aprint_error_dev(dev, "could not create sysctl root\n");
    453 		return;
    454 	}
    455 
    456 	if (sysctl_createv(log, 0, &rnode, &cnode,
    457 	    CTLFLAG_READONLY, CTLTYPE_INT,
    458 	    "num_rx_desc", SYSCTL_DESCR("Number of rx descriptors"),
    459 	    NULL, 0, &adapter->num_rx_desc, 0, CTL_CREATE, CTL_EOL) != 0)
    460 		aprint_error_dev(dev, "could not create sysctl\n");
    461 
    462 	if (sysctl_createv(log, 0, &rnode, &cnode,
    463 	    CTLFLAG_READONLY, CTLTYPE_INT,
    464 	    "num_queues", SYSCTL_DESCR("Number of queues"),
    465 	    NULL, 0, &adapter->num_queues, 0, CTL_CREATE, CTL_EOL) != 0)
    466 		aprint_error_dev(dev, "could not create sysctl\n");
    467 
    468 	if (sysctl_createv(log, 0, &rnode, &cnode,
    469 	    CTLFLAG_READWRITE, CTLTYPE_INT,
    470 	    "fc", SYSCTL_DESCR("Flow Control"),
    471 	    ixgbe_set_flowcntl, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0)
    472 		aprint_error_dev(dev, "could not create sysctl\n");
    473 
    474 	/* XXX This is an *instance* sysctl controlling a *global* variable.
    475 	 * XXX It's that way in the FreeBSD driver that this derives from.
    476 	 */
    477 	if (sysctl_createv(log, 0, &rnode, &cnode,
    478 	    CTLFLAG_READWRITE, CTLTYPE_INT,
    479 	    "enable_aim", SYSCTL_DESCR("Interrupt Moderation"),
    480 	    NULL, 0, &ixgbe_enable_aim, 0, CTL_CREATE, CTL_EOL) != 0)
    481 		aprint_error_dev(dev, "could not create sysctl\n");
    482 
    483 	if (sysctl_createv(log, 0, &rnode, &cnode,
    484 	    CTLFLAG_READWRITE, CTLTYPE_INT,
    485 	    "advertise_speed", SYSCTL_DESCR("Link Speed"),
    486 	    ixgbe_set_advertise, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0)
    487 		aprint_error_dev(dev, "could not create sysctl\n");
    488 
    489 	if (sysctl_createv(log, 0, &rnode, &cnode,
    490 	    CTLFLAG_READWRITE, CTLTYPE_INT,
    491 	    "ts", SYSCTL_DESCR("Thermal Test"),
    492 	    ixgbe_set_thermal_test, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0)
    493 		aprint_error_dev(dev, "could not create sysctl\n");
    494 }
    495 
    496 /*********************************************************************
    497  *  Device initialization routine
    498  *
    499  *  The attach entry point is called when the driver is being loaded.
    500  *  This routine identifies the type of hardware, allocates all resources
    501  *  and initializes the hardware.
    502  *
    503  *  return 0 on success, positive on failure
    504  *********************************************************************/
    505 
    506 static void
    507 ixgbe_attach(device_t parent, device_t dev, void *aux)
    508 {
    509 	struct adapter *adapter;
    510 	struct ixgbe_hw *hw;
    511 	int             error = 0;
    512 	u16		csum;
    513 	u32		ctrl_ext;
    514 	ixgbe_vendor_info_t *ent;
    515 	const struct pci_attach_args *pa = aux;
    516 
    517 	INIT_DEBUGOUT("ixgbe_attach: begin");
    518 
    519 	/* Allocate, clear, and link in our adapter structure */
    520 	adapter = device_private(dev);
    521 	adapter->dev = adapter->osdep.dev = dev;
    522 	hw = &adapter->hw;
    523 	adapter->osdep.pc = pa->pa_pc;
    524 	adapter->osdep.tag = pa->pa_tag;
    525 	adapter->osdep.dmat = pa->pa_dmat;
    526 	adapter->osdep.attached = false;
    527 
    528 	ent = ixgbe_lookup(pa);
    529 
    530 	KASSERT(ent != NULL);
    531 
    532 	aprint_normal(": %s, Version - %s\n",
    533 	    ixgbe_strings[ent->index], ixgbe_driver_version);
    534 
    535 	/* Core Lock Init*/
    536 	IXGBE_CORE_LOCK_INIT(adapter, device_xname(dev));
    537 
    538 	/* SYSCTL APIs */
    539 
    540 	ixgbe_sysctl_attach(adapter);
    541 
    542 	/* Set up the timer callout */
    543 	callout_init(&adapter->timer, 0);
    544 
    545 	/* Determine hardware revision */
    546 	ixgbe_identify_hardware(adapter);
    547 
    548 	/* Do base PCI setup - map BAR0 */
    549 	if (ixgbe_allocate_pci_resources(adapter, pa)) {
    550 		aprint_error_dev(dev, "Allocation of PCI resources failed\n");
    551 		error = ENXIO;
    552 		goto err_out;
    553 	}
    554 
    555 	/* Do descriptor calc and sanity checks */
    556 	if (((ixgbe_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 ||
    557 	    ixgbe_txd < MIN_TXD || ixgbe_txd > MAX_TXD) {
    558 		aprint_error_dev(dev, "TXD config issue, using default!\n");
    559 		adapter->num_tx_desc = DEFAULT_TXD;
    560 	} else
    561 		adapter->num_tx_desc = ixgbe_txd;
    562 
    563 	/*
    564 	** With many RX rings it is easy to exceed the
    565 	** system mbuf allocation. Tuning nmbclusters
    566 	** can alleviate this.
    567 	*/
    568 	if (nmbclusters > 0 ) {
    569 		int s;
    570 		s = (ixgbe_rxd * adapter->num_queues) * ixgbe_total_ports;
    571 		if (s > nmbclusters) {
    572 			aprint_error_dev(dev, "RX Descriptors exceed "
    573 			    "system mbuf max, using default instead!\n");
    574 			ixgbe_rxd = DEFAULT_RXD;
    575 		}
    576 	}
    577 
    578 	if (((ixgbe_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 ||
    579 	    ixgbe_rxd < MIN_RXD || ixgbe_rxd > MAX_RXD) {
    580 		aprint_error_dev(dev, "RXD config issue, using default!\n");
    581 		adapter->num_rx_desc = DEFAULT_RXD;
    582 	} else
    583 		adapter->num_rx_desc = ixgbe_rxd;
    584 
    585 	/* Allocate our TX/RX Queues */
    586 	if (ixgbe_allocate_queues(adapter)) {
    587 		error = ENOMEM;
    588 		goto err_out;
    589 	}
    590 
    591 	/* Allocate multicast array memory. */
    592 	adapter->mta = malloc(sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
    593 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
    594 	if (adapter->mta == NULL) {
    595 		aprint_error_dev(dev, "Cannot allocate multicast setup array\n");
    596 		error = ENOMEM;
    597 		goto err_late;
    598 	}
    599 
    600 	/* Initialize the shared code */
    601 	hw->allow_unsupported_sfp = allow_unsupported_sfp;
    602 	error = ixgbe_init_shared_code(hw);
    603 	if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
    604 		/*
    605 		** No optics in this port, set up
    606 		** so the timer routine will probe
    607 		** for later insertion.
    608 		*/
    609 		adapter->sfp_probe = TRUE;
    610 		error = 0;
    611 	} else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
    612 		aprint_error_dev(dev,"Unsupported SFP+ module detected!\n");
    613 		error = EIO;
    614 		goto err_late;
    615 	} else if (error) {
    616 		aprint_error_dev(dev,"Unable to initialize the shared code\n");
    617 		error = EIO;
    618 		goto err_late;
    619 	}
    620 
    621 	/* Make sure we have a good EEPROM before we read from it */
    622 	if (ixgbe_validate_eeprom_checksum(&adapter->hw, &csum) < 0) {
    623 		aprint_error_dev(dev,"The EEPROM Checksum Is Not Valid\n");
    624 		error = EIO;
    625 		goto err_late;
    626 	}
    627 
    628 	error = ixgbe_init_hw(hw);
    629 	switch (error) {
    630 	case IXGBE_ERR_EEPROM_VERSION:
    631 		aprint_error_dev(dev, "This device is a pre-production adapter/"
    632 		    "LOM.  Please be aware there may be issues associated "
    633 		    "with your hardware.\n If you are experiencing problems "
    634 		    "please contact your Intel or hardware representative "
    635 		    "who provided you with this hardware.\n");
    636 		break;
    637 	case IXGBE_ERR_SFP_NOT_SUPPORTED:
    638 		aprint_error_dev(dev,"Unsupported SFP+ Module\n");
    639 		error = EIO;
    640 		aprint_error_dev(dev,"Hardware Initialization Failure\n");
    641 		goto err_late;
    642 	case IXGBE_ERR_SFP_NOT_PRESENT:
    643 		device_printf(dev,"No SFP+ Module found\n");
    644 		/* falls thru */
    645 	default:
    646 		break;
    647 	}
    648 
    649 	/* Detect and set physical type */
    650 	ixgbe_setup_optics(adapter);
    651 
    652 	if ((adapter->msix > 1) && (ixgbe_enable_msix))
    653 		error = ixgbe_allocate_msix(adapter, pa);
    654 	else
    655 		error = ixgbe_allocate_legacy(adapter, pa);
    656 	if (error)
    657 		goto err_late;
    658 
    659 	/* Setup OS specific network interface */
    660 	if (ixgbe_setup_interface(dev, adapter) != 0)
    661 		goto err_late;
    662 
    663 	/* Initialize statistics */
    664 	ixgbe_update_stats_counters(adapter);
    665 
    666 	/*
    667 	** Check PCIE slot type/speed/width
    668 	*/
    669 	ixgbe_get_slot_info(hw);
    670 
    671 	/* Set an initial default flow control value */
    672 	adapter->fc =  ixgbe_fc_full;
    673 
    674 	/* let hardware know driver is loaded */
    675 	ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
    676 	ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
    677 	IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
    678 
    679 	ixgbe_add_hw_stats(adapter);
    680 
    681 #ifdef DEV_NETMAP
    682 	ixgbe_netmap_attach(adapter);
    683 #endif /* DEV_NETMAP */
    684 	INIT_DEBUGOUT("ixgbe_attach: end");
    685 	adapter->osdep.attached = true;
    686 	return;
    687 err_late:
    688 	ixgbe_free_transmit_structures(adapter);
    689 	ixgbe_free_receive_structures(adapter);
    690 err_out:
    691 	if (adapter->ifp != NULL)
    692 		if_free(adapter->ifp);
    693 	ixgbe_free_pci_resources(adapter);
    694 	if (adapter->mta != NULL)
    695 		free(adapter->mta, M_DEVBUF);
    696 	return;
    697 
    698 }
    699 
    700 /*********************************************************************
    701  *  Device removal routine
    702  *
    703  *  The detach entry point is called when the driver is being removed.
    704  *  This routine stops the adapter and deallocates all the resources
    705  *  that were allocated for driver operation.
    706  *
    707  *  return 0 on success, positive on failure
    708  *********************************************************************/
    709 
    710 static int
    711 ixgbe_detach(device_t dev, int flags)
    712 {
    713 	struct adapter *adapter = device_private(dev);
    714 	struct rx_ring *rxr = adapter->rx_rings;
    715 	struct ixgbe_hw_stats *stats = &adapter->stats;
    716 	struct ix_queue *que = adapter->queues;
    717 	struct tx_ring *txr = adapter->tx_rings;
    718 	u32	ctrl_ext;
    719 
    720 	INIT_DEBUGOUT("ixgbe_detach: begin");
    721 	if (adapter->osdep.attached == false)
    722 		return 0;
    723 
    724 #if NVLAN > 0
    725 	/* Make sure VLANs are not using driver */
    726 	if (!VLAN_ATTACHED(&adapter->osdep.ec))
    727 		;	/* nothing to do: no VLANs */
    728 	else if ((flags & (DETACH_SHUTDOWN|DETACH_FORCE)) != 0)
    729 		vlan_ifdetach(adapter->ifp);
    730 	else {
    731 		aprint_error_dev(dev, "VLANs in use\n");
    732 		return EBUSY;
    733 	}
    734 #endif
    735 
    736 	IXGBE_CORE_LOCK(adapter);
    737 	ixgbe_stop(adapter);
    738 	IXGBE_CORE_UNLOCK(adapter);
    739 
    740 	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
    741 #ifndef IXGBE_LEGACY_TX
    742 		softint_disestablish(txr->txq_si);
    743 #endif
    744 		softint_disestablish(que->que_si);
    745 	}
    746 
    747 	/* Drain the Link queue */
    748 	softint_disestablish(adapter->link_si);
    749 	softint_disestablish(adapter->mod_si);
    750 	softint_disestablish(adapter->msf_si);
    751 #ifdef IXGBE_FDIR
    752 	softint_disestablish(adapter->fdir_si);
    753 #endif
    754 
    755 	/* let hardware know driver is unloading */
    756 	ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
    757 	ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
    758 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext);
    759 
    760 	ether_ifdetach(adapter->ifp);
    761 	callout_halt(&adapter->timer, NULL);
    762 #ifdef DEV_NETMAP
    763 	netmap_detach(adapter->ifp);
    764 #endif /* DEV_NETMAP */
    765 	ixgbe_free_pci_resources(adapter);
    766 #if 0	/* XXX the NetBSD port is probably missing something here */
    767 	bus_generic_detach(dev);
    768 #endif
    769 	if_detach(adapter->ifp);
    770 
    771 	sysctl_teardown(&adapter->sysctllog);
    772 	evcnt_detach(&adapter->handleq);
    773 	evcnt_detach(&adapter->req);
    774 	evcnt_detach(&adapter->morerx);
    775 	evcnt_detach(&adapter->moretx);
    776 	evcnt_detach(&adapter->txloops);
    777 	evcnt_detach(&adapter->efbig_tx_dma_setup);
    778 	evcnt_detach(&adapter->m_defrag_failed);
    779 	evcnt_detach(&adapter->efbig2_tx_dma_setup);
    780 	evcnt_detach(&adapter->einval_tx_dma_setup);
    781 	evcnt_detach(&adapter->other_tx_dma_setup);
    782 	evcnt_detach(&adapter->eagain_tx_dma_setup);
    783 	evcnt_detach(&adapter->enomem_tx_dma_setup);
    784 	evcnt_detach(&adapter->watchdog_events);
    785 	evcnt_detach(&adapter->tso_err);
    786 	evcnt_detach(&adapter->link_irq);
    787 
    788 	txr = adapter->tx_rings;
    789 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
    790 		evcnt_detach(&txr->no_desc_avail);
    791 		evcnt_detach(&txr->total_packets);
    792 		evcnt_detach(&txr->tso_tx);
    793 
    794 		if (i < __arraycount(adapter->stats.mpc)) {
    795 			evcnt_detach(&adapter->stats.mpc[i]);
    796 		}
    797 		if (i < __arraycount(adapter->stats.pxontxc)) {
    798 			evcnt_detach(&adapter->stats.pxontxc[i]);
    799 			evcnt_detach(&adapter->stats.pxonrxc[i]);
    800 			evcnt_detach(&adapter->stats.pxofftxc[i]);
    801 			evcnt_detach(&adapter->stats.pxoffrxc[i]);
    802 			evcnt_detach(&adapter->stats.pxon2offc[i]);
    803 		}
    804 		if (i < __arraycount(adapter->stats.qprc)) {
    805 			evcnt_detach(&adapter->stats.qprc[i]);
    806 			evcnt_detach(&adapter->stats.qptc[i]);
    807 			evcnt_detach(&adapter->stats.qbrc[i]);
    808 			evcnt_detach(&adapter->stats.qbtc[i]);
    809 			evcnt_detach(&adapter->stats.qprdc[i]);
    810 		}
    811 
    812 		evcnt_detach(&rxr->rx_packets);
    813 		evcnt_detach(&rxr->rx_bytes);
    814 		evcnt_detach(&rxr->rx_copies);
    815 		evcnt_detach(&rxr->no_jmbuf);
    816 		evcnt_detach(&rxr->rx_discarded);
    817 		evcnt_detach(&rxr->rx_irq);
    818 	}
    819 	evcnt_detach(&stats->ipcs);
    820 	evcnt_detach(&stats->l4cs);
    821 	evcnt_detach(&stats->ipcs_bad);
    822 	evcnt_detach(&stats->l4cs_bad);
    823 	evcnt_detach(&stats->intzero);
    824 	evcnt_detach(&stats->legint);
    825 	evcnt_detach(&stats->crcerrs);
    826 	evcnt_detach(&stats->illerrc);
    827 	evcnt_detach(&stats->errbc);
    828 	evcnt_detach(&stats->mspdc);
    829 	evcnt_detach(&stats->mlfc);
    830 	evcnt_detach(&stats->mrfc);
    831 	evcnt_detach(&stats->rlec);
    832 	evcnt_detach(&stats->lxontxc);
    833 	evcnt_detach(&stats->lxonrxc);
    834 	evcnt_detach(&stats->lxofftxc);
    835 	evcnt_detach(&stats->lxoffrxc);
    836 
    837 	/* Packet Reception Stats */
    838 	evcnt_detach(&stats->tor);
    839 	evcnt_detach(&stats->gorc);
    840 	evcnt_detach(&stats->tpr);
    841 	evcnt_detach(&stats->gprc);
    842 	evcnt_detach(&stats->mprc);
    843 	evcnt_detach(&stats->bprc);
    844 	evcnt_detach(&stats->prc64);
    845 	evcnt_detach(&stats->prc127);
    846 	evcnt_detach(&stats->prc255);
    847 	evcnt_detach(&stats->prc511);
    848 	evcnt_detach(&stats->prc1023);
    849 	evcnt_detach(&stats->prc1522);
    850 	evcnt_detach(&stats->ruc);
    851 	evcnt_detach(&stats->rfc);
    852 	evcnt_detach(&stats->roc);
    853 	evcnt_detach(&stats->rjc);
    854 	evcnt_detach(&stats->mngprc);
    855 	evcnt_detach(&stats->xec);
    856 
    857 	/* Packet Transmission Stats */
    858 	evcnt_detach(&stats->gotc);
    859 	evcnt_detach(&stats->tpt);
    860 	evcnt_detach(&stats->gptc);
    861 	evcnt_detach(&stats->bptc);
    862 	evcnt_detach(&stats->mptc);
    863 	evcnt_detach(&stats->mngptc);
    864 	evcnt_detach(&stats->ptc64);
    865 	evcnt_detach(&stats->ptc127);
    866 	evcnt_detach(&stats->ptc255);
    867 	evcnt_detach(&stats->ptc511);
    868 	evcnt_detach(&stats->ptc1023);
    869 	evcnt_detach(&stats->ptc1522);
    870 
    871 	ixgbe_free_transmit_structures(adapter);
    872 	ixgbe_free_receive_structures(adapter);
    873 	free(adapter->mta, M_DEVBUF);
    874 
    875 	IXGBE_CORE_LOCK_DESTROY(adapter);
    876 	return (0);
    877 }
    878 
    879 /*********************************************************************
    880  *
    881  *  Shutdown entry point
    882  *
    883  **********************************************************************/
    884 
    885 #if 0 /* XXX NetBSD ought to register something like this through pmf(9) */
    886 static int
    887 ixgbe_shutdown(device_t dev)
    888 {
    889 	struct adapter *adapter = device_private(dev);
    890 	IXGBE_CORE_LOCK(adapter);
    891 	ixgbe_stop(adapter);
    892 	IXGBE_CORE_UNLOCK(adapter);
    893 	return (0);
    894 }
    895 #endif
    896 
    897 
    898 #ifdef IXGBE_LEGACY_TX
    899 /*********************************************************************
    900  *  Transmit entry point
    901  *
    902  *  ixgbe_start is called by the stack to initiate a transmit.
    903  *  The driver will remain in this routine as long as there are
    904  *  packets to transmit and transmit resources are available.
    905  *  In case resources are not available stack is notified and
    906  *  the packet is requeued.
    907  **********************************************************************/
    908 
    909 static void
    910 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
    911 {
    912 	int rc;
    913 	struct mbuf    *m_head;
    914 	struct adapter *adapter = txr->adapter;
    915 
    916 	IXGBE_TX_LOCK_ASSERT(txr);
    917 
    918 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    919 		return;
    920 	if (!adapter->link_active)
    921 		return;
    922 
    923 	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    924 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
    925 			break;
    926 
    927 		IFQ_POLL(&ifp->if_snd, m_head);
    928 		if (m_head == NULL)
    929 			break;
    930 
    931 		if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
    932 			break;
    933 		}
    934 		IFQ_DEQUEUE(&ifp->if_snd, m_head);
    935 		if (rc == EFBIG) {
    936 			struct mbuf *mtmp;
    937 
    938 			if ((mtmp = m_defrag(m_head, M_NOWAIT)) != NULL) {
    939 				m_head = mtmp;
    940 				rc = ixgbe_xmit(txr, m_head);
    941 				if (rc != 0)
    942 					adapter->efbig2_tx_dma_setup.ev_count++;
    943 			} else
    944 				adapter->m_defrag_failed.ev_count++;
    945 		}
    946 		if (rc != 0) {
    947 			m_freem(m_head);
    948 			continue;
    949 		}
    950 
    951 		/* Send a copy of the frame to the BPF listener */
    952 		bpf_mtap(ifp, m_head);
    953 
    954 		/* Set watchdog on */
    955 		getmicrotime(&txr->watchdog_time);
    956 		txr->queue_status = IXGBE_QUEUE_WORKING;
    957 
    958 	}
    959 	return;
    960 }
    961 
    962 /*
    963  * Legacy TX start - called by the stack, this
    964  * always uses the first tx ring, and should
    965  * not be used with multiqueue tx enabled.
    966  */
    967 static void
    968 ixgbe_start(struct ifnet *ifp)
    969 {
    970 	struct adapter *adapter = ifp->if_softc;
    971 	struct tx_ring	*txr = adapter->tx_rings;
    972 
    973 	if (ifp->if_flags & IFF_RUNNING) {
    974 		IXGBE_TX_LOCK(txr);
    975 		ixgbe_start_locked(txr, ifp);
    976 		IXGBE_TX_UNLOCK(txr);
    977 	}
    978 	return;
    979 }
    980 
    981 #else /* ! IXGBE_LEGACY_TX */
    982 
    983 /*
    984 ** Multiqueue Transmit driver
    985 **
    986 */
    987 static int
    988 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
    989 {
    990 	struct adapter	*adapter = ifp->if_softc;
    991 	struct ix_queue	*que;
    992 	struct tx_ring	*txr;
    993 	int 		i, err = 0;
    994 #ifdef	RSS
    995 	uint32_t bucket_id;
    996 #endif
    997 
    998 	/* Which queue to use */
    999 	/*
   1000 	 * When doing RSS, map it to the same outbound queue
   1001 	 * as the incoming flow would be mapped to.
   1002 	 *
   1003 	 * If everything is setup correctly, it should be the
   1004 	 * same bucket that the current CPU we're on is.
   1005 	 */
   1006 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
   1007 #ifdef	RSS
   1008 		if (rss_hash2bucket(m->m_pkthdr.flowid,
   1009 		    M_HASHTYPE_GET(m), &bucket_id) == 0) {
   1010 			/* XXX TODO: spit out something if bucket_id > num_queues? */
   1011 			i = bucket_id % adapter->num_queues;
   1012 		} else {
   1013 #endif
   1014 			i = m->m_pkthdr.flowid % adapter->num_queues;
   1015 #ifdef	RSS
   1016 		}
   1017 #endif
   1018 	} else {
   1019 		i = curcpu % adapter->num_queues;
   1020 	}
   1021 
   1022 	txr = &adapter->tx_rings[i];
   1023 	que = &adapter->queues[i];
   1024 
   1025 	err = drbr_enqueue(ifp, txr->br, m);
   1026 	if (err)
   1027 		return (err);
   1028 	if (IXGBE_TX_TRYLOCK(txr)) {
   1029 		ixgbe_mq_start_locked(ifp, txr);
   1030 		IXGBE_TX_UNLOCK(txr);
   1031 	} else
   1032 		softint_schedule(txr->txq_si);
   1033 
   1034 	return (0);
   1035 }
   1036 
   1037 static int
   1038 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
   1039 {
   1040 	struct adapter  *adapter = txr->adapter;
   1041 	struct mbuf     *next;
   1042 	int             enqueued = 0, err = 0;
   1043 
   1044 	if (((ifp->if_flags & IFF_RUNNING) == 0) ||
   1045 	    adapter->link_active == 0)
   1046 		return (ENETDOWN);
   1047 
   1048 	/* Process the queue */
   1049 #if __FreeBSD_version < 901504
   1050 	next = drbr_dequeue(ifp, txr->br);
   1051 	while (next != NULL) {
   1052 		if ((err = ixgbe_xmit(txr, &next)) != 0) {
   1053 			if (next != NULL)
   1054 				err = drbr_enqueue(ifp, txr->br, next);
   1055 #else
   1056 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
   1057 		if ((err = ixgbe_xmit(txr, &next)) != 0) {
   1058 			if (next == NULL) {
   1059 				drbr_advance(ifp, txr->br);
   1060 			} else {
   1061 				drbr_putback(ifp, txr->br, next);
   1062 			}
   1063 #endif
   1064 			break;
   1065 		}
   1066 #if __FreeBSD_version >= 901504
   1067 		drbr_advance(ifp, txr->br);
   1068 #endif
   1069 		enqueued++;
   1070 		/* Send a copy of the frame to the BPF listener */
   1071 		bpf_mtap(ifp, next);
   1072 		if ((ifp->if_flags & IFF_RUNNING) == 0)
   1073 			break;
   1074 #if __FreeBSD_version < 901504
   1075 		next = drbr_dequeue(ifp, txr->br);
   1076 #endif
   1077 	}
   1078 
   1079 	if (enqueued > 0) {
   1080 		/* Set watchdog on */
   1081 		txr->queue_status = IXGBE_QUEUE_WORKING;
   1082 		getmicrotime(&txr->watchdog_time);
   1083 	}
   1084 
   1085 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
   1086 		ixgbe_txeof(txr);
   1087 
   1088 	return (err);
   1089 }
   1090 
   1091 /*
   1092  * Called from a taskqueue to drain queued transmit packets.
   1093  */
   1094 static void
   1095 ixgbe_deferred_mq_start(void *arg)
   1096 {
   1097 	struct tx_ring *txr = arg;
   1098 	struct adapter *adapter = txr->adapter;
   1099 	struct ifnet *ifp = adapter->ifp;
   1100 
   1101 	IXGBE_TX_LOCK(txr);
   1102 	if (!drbr_empty(ifp, txr->br))
   1103 		ixgbe_mq_start_locked(ifp, txr);
   1104 	IXGBE_TX_UNLOCK(txr);
   1105 }
   1106 
   1107 /*
   1108 ** Flush all ring buffers
   1109 */
   1110 static void
   1111 ixgbe_qflush(struct ifnet *ifp)
   1112 {
   1113 	struct adapter	*adapter = ifp->if_softc;
   1114 	struct tx_ring	*txr = adapter->tx_rings;
   1115 	struct mbuf	*m;
   1116 
   1117 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
   1118 		IXGBE_TX_LOCK(txr);
   1119 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
   1120 			m_freem(m);
   1121 		IXGBE_TX_UNLOCK(txr);
   1122 	}
   1123 	if_qflush(ifp);
   1124 }
   1125 #endif /* IXGBE_LEGACY_TX */
   1126 
   1127 static int
   1128 ixgbe_ifflags_cb(struct ethercom *ec)
   1129 {
   1130 	struct ifnet *ifp = &ec->ec_if;
   1131 	struct adapter *adapter = ifp->if_softc;
   1132 	int change = ifp->if_flags ^ adapter->if_flags, rc = 0;
   1133 
   1134 	IXGBE_CORE_LOCK(adapter);
   1135 
   1136 	if (change != 0)
   1137 		adapter->if_flags = ifp->if_flags;
   1138 
   1139 	if ((change & ~(IFF_CANTCHANGE|IFF_DEBUG)) != 0)
   1140 		rc = ENETRESET;
   1141 	else if ((change & (IFF_PROMISC | IFF_ALLMULTI)) != 0)
   1142 		ixgbe_set_promisc(adapter);
   1143 
   1144 	/* Set up VLAN support and filter */
   1145 	ixgbe_setup_vlan_hw_support(adapter);
   1146 
   1147 	IXGBE_CORE_UNLOCK(adapter);
   1148 
   1149 	return rc;
   1150 }
   1151 
   1152 /*********************************************************************
   1153  *  Ioctl entry point
   1154  *
   1155  *  ixgbe_ioctl is called when the user wants to configure the
   1156  *  interface.
   1157  *
   1158  *  return 0 on success, positive on failure
   1159  **********************************************************************/
   1160 
   1161 static int
   1162 ixgbe_ioctl(struct ifnet * ifp, u_long command, void *data)
   1163 {
   1164 	struct adapter	*adapter = ifp->if_softc;
   1165 	struct ixgbe_hw *hw = &adapter->hw;
   1166 	struct ifcapreq *ifcr = data;
   1167 	struct ifreq	*ifr = data;
   1168 	int             error = 0;
   1169 	int l4csum_en;
   1170 	const int l4csum = IFCAP_CSUM_TCPv4_Rx|IFCAP_CSUM_UDPv4_Rx|
   1171 	     IFCAP_CSUM_TCPv6_Rx|IFCAP_CSUM_UDPv6_Rx;
   1172 
   1173 	switch (command) {
   1174 	case SIOCSIFFLAGS:
   1175 		IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)");
   1176 		break;
   1177 	case SIOCADDMULTI:
   1178 	case SIOCDELMULTI:
   1179 		IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI");
   1180 		break;
   1181 	case SIOCSIFMEDIA:
   1182 	case SIOCGIFMEDIA:
   1183 		IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)");
   1184 		break;
   1185 	case SIOCSIFCAP:
   1186 		IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)");
   1187 		break;
   1188 	case SIOCSIFMTU:
   1189 		IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)");
   1190 		break;
   1191 	default:
   1192 		IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command);
   1193 		break;
   1194 	}
   1195 
   1196 	switch (command) {
   1197 	case SIOCSIFMEDIA:
   1198 	case SIOCGIFMEDIA:
   1199 		return ifmedia_ioctl(ifp, ifr, &adapter->media, command);
   1200 	case SIOCGI2C:
   1201 	{
   1202 		struct ixgbe_i2c_req	i2c;
   1203 		IOCTL_DEBUGOUT("ioctl: SIOCGI2C (Get I2C Data)");
   1204 		error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
   1205 		if (error != 0)
   1206 			break;
   1207 		if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) {
   1208 			error = EINVAL;
   1209 			break;
   1210 		}
   1211 		if (i2c.len > sizeof(i2c.data)) {
   1212 			error = EINVAL;
   1213 			break;
   1214 		}
   1215 
   1216 		hw->phy.ops.read_i2c_byte(hw, i2c.offset,
   1217 		    i2c.dev_addr, i2c.data);
   1218 		error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
   1219 		break;
   1220 	}
   1221 	case SIOCSIFCAP:
   1222 		/* Layer-4 Rx checksum offload has to be turned on and
   1223 		 * off as a unit.
   1224 		 */
   1225 		l4csum_en = ifcr->ifcr_capenable & l4csum;
   1226 		if (l4csum_en != l4csum && l4csum_en != 0)
   1227 			return EINVAL;
   1228 		/*FALLTHROUGH*/
   1229 	case SIOCADDMULTI:
   1230 	case SIOCDELMULTI:
   1231 	case SIOCSIFFLAGS:
   1232 	case SIOCSIFMTU:
   1233 	default:
   1234 		if ((error = ether_ioctl(ifp, command, data)) != ENETRESET)
   1235 			return error;
   1236 		if ((ifp->if_flags & IFF_RUNNING) == 0)
   1237 			;
   1238 		else if (command == SIOCSIFCAP || command == SIOCSIFMTU) {
   1239 			IXGBE_CORE_LOCK(adapter);
   1240 			ixgbe_init_locked(adapter);
   1241 			IXGBE_CORE_UNLOCK(adapter);
   1242 		} else if (command == SIOCADDMULTI || command == SIOCDELMULTI) {
   1243 			/*
   1244 			 * Multicast list has changed; set the hardware filter
   1245 			 * accordingly.
   1246 			 */
   1247 			IXGBE_CORE_LOCK(adapter);
   1248 			ixgbe_disable_intr(adapter);
   1249 			ixgbe_set_multi(adapter);
   1250 			ixgbe_enable_intr(adapter);
   1251 			IXGBE_CORE_UNLOCK(adapter);
   1252 		}
   1253 		return 0;
   1254 	}
   1255 
   1256 	return error;
   1257 }
   1258 
   1259 /*********************************************************************
   1260  *  Init entry point
   1261  *
   1262  *  This routine is used in two ways. It is used by the stack as
   1263  *  init entry point in network interface structure. It is also used
   1264  *  by the driver as a hw/sw initialization routine to get to a
   1265  *  consistent state.
   1266  *
   1267  *  return 0 on success, positive on failure
   1268  **********************************************************************/
   1269 #define IXGBE_MHADD_MFS_SHIFT 16
   1270 
   1271 static void
   1272 ixgbe_init_locked(struct adapter *adapter)
   1273 {
   1274 	struct ifnet   *ifp = adapter->ifp;
   1275 	device_t 	dev = adapter->dev;
   1276 	struct ixgbe_hw *hw = &adapter->hw;
   1277 	u32		k, txdctl, mhadd, gpie;
   1278 	u32		rxdctl, rxctrl;
   1279 
   1280 	/* XXX check IFF_UP and IFF_RUNNING, power-saving state! */
   1281 
   1282 	KASSERT(mutex_owned(&adapter->core_mtx));
   1283 	INIT_DEBUGOUT("ixgbe_init_locked: begin");
   1284 	hw->adapter_stopped = FALSE;
   1285 	ixgbe_stop_adapter(hw);
   1286         callout_stop(&adapter->timer);
   1287 
   1288 	/* XXX I moved this here from the SIOCSIFMTU case in ixgbe_ioctl(). */
   1289 	adapter->max_frame_size =
   1290 		ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
   1291 
   1292         /* reprogram the RAR[0] in case user changed it. */
   1293         ixgbe_set_rar(hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
   1294 
   1295 	/* Get the latest mac address, User can use a LAA */
   1296 	memcpy(hw->mac.addr, CLLADDR(adapter->ifp->if_sadl),
   1297 	    IXGBE_ETH_LENGTH_OF_ADDRESS);
   1298 	ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
   1299 	hw->addr_ctrl.rar_used_count = 1;
   1300 
   1301 	/* Prepare transmit descriptors and buffers */
   1302 	if (ixgbe_setup_transmit_structures(adapter)) {
   1303 		device_printf(dev,"Could not setup transmit structures\n");
   1304 		ixgbe_stop(adapter);
   1305 		return;
   1306 	}
   1307 
   1308 	ixgbe_init_hw(hw);
   1309 	ixgbe_initialize_transmit_units(adapter);
   1310 
   1311 	/* Setup Multicast table */
   1312 	ixgbe_set_multi(adapter);
   1313 
   1314 	/*
   1315 	** Determine the correct mbuf pool
   1316 	** for doing jumbo frames
   1317 	*/
   1318 	if (adapter->max_frame_size <= 2048)
   1319 		adapter->rx_mbuf_sz = MCLBYTES;
   1320 	else if (adapter->max_frame_size <= 4096)
   1321 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
   1322 	else if (adapter->max_frame_size <= 9216)
   1323 		adapter->rx_mbuf_sz = MJUM9BYTES;
   1324 	else
   1325 		adapter->rx_mbuf_sz = MJUM16BYTES;
   1326 
   1327 	/* Prepare receive descriptors and buffers */
   1328 	if (ixgbe_setup_receive_structures(adapter)) {
   1329 		device_printf(dev,"Could not setup receive structures\n");
   1330 		ixgbe_stop(adapter);
   1331 		return;
   1332 	}
   1333 
   1334 	/* Configure RX settings */
   1335 	ixgbe_initialize_receive_units(adapter);
   1336 
   1337 	gpie = IXGBE_READ_REG(&adapter->hw, IXGBE_GPIE);
   1338 
   1339 	/* Enable Fan Failure Interrupt */
   1340 	gpie |= IXGBE_SDP1_GPIEN;
   1341 
   1342 	/* Add for Thermal detection */
   1343 	if (hw->mac.type == ixgbe_mac_82599EB)
   1344 		gpie |= IXGBE_SDP2_GPIEN;
   1345 
   1346 	/* Thermal Failure Detection */
   1347 	if (hw->mac.type == ixgbe_mac_X540)
   1348 		gpie |= IXGBE_SDP0_GPIEN;
   1349 
   1350 	if (adapter->msix > 1) {
   1351 		/* Enable Enhanced MSIX mode */
   1352 		gpie |= IXGBE_GPIE_MSIX_MODE;
   1353 		gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
   1354 		    IXGBE_GPIE_OCD;
   1355 	}
   1356 	IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
   1357 
   1358 	/* Set MTU size */
   1359 	if (ifp->if_mtu > ETHERMTU) {
   1360 		mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
   1361 		mhadd &= ~IXGBE_MHADD_MFS_MASK;
   1362 		mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
   1363 		IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
   1364 	}
   1365 
   1366 	/* Now enable all the queues */
   1367 
   1368 	for (int i = 0; i < adapter->num_queues; i++) {
   1369 		txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
   1370 		txdctl |= IXGBE_TXDCTL_ENABLE;
   1371 		/* Set WTHRESH to 8, burst writeback */
   1372 		txdctl |= (8 << 16);
   1373 		/*
   1374 		 * When the internal queue falls below PTHRESH (32),
   1375 		 * start prefetching as long as there are at least
   1376 		 * HTHRESH (1) buffers ready. The values are taken
   1377 		 * from the Intel linux driver 3.8.21.
   1378 		 * Prefetching enables tx line rate even with 1 queue.
   1379 		 */
   1380 		txdctl |= (32 << 0) | (1 << 8);
   1381 		IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
   1382 	}
   1383 
   1384 	for (int i = 0; i < adapter->num_queues; i++) {
   1385 		rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
   1386 		if (hw->mac.type == ixgbe_mac_82598EB) {
   1387 			/*
   1388 			** PTHRESH = 21
   1389 			** HTHRESH = 4
   1390 			** WTHRESH = 8
   1391 			*/
   1392 			rxdctl &= ~0x3FFFFF;
   1393 			rxdctl |= 0x080420;
   1394 		}
   1395 		rxdctl |= IXGBE_RXDCTL_ENABLE;
   1396 		IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
   1397 		/* XXX I don't trust this loop, and I don't trust the
   1398 		 * XXX memory barrier.  What is this meant to do? --dyoung
   1399 		 */
   1400 		for (k = 0; k < 10; k++) {
   1401 			if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
   1402 			    IXGBE_RXDCTL_ENABLE)
   1403 				break;
   1404 			else
   1405 				msec_delay(1);
   1406 		}
   1407 		wmb();
   1408 #ifdef DEV_NETMAP
   1409 		/*
   1410 		 * In netmap mode, we must preserve the buffers made
   1411 		 * available to userspace before the if_init()
   1412 		 * (this is true by default on the TX side, because
   1413 		 * init makes all buffers available to userspace).
   1414 		 *
   1415 		 * netmap_reset() and the device specific routines
   1416 		 * (e.g. ixgbe_setup_receive_rings()) map these
   1417 		 * buffers at the end of the NIC ring, so here we
   1418 		 * must set the RDT (tail) register to make sure
   1419 		 * they are not overwritten.
   1420 		 *
   1421 		 * In this driver the NIC ring starts at RDH = 0,
   1422 		 * RDT points to the last slot available for reception (?),
   1423 		 * so RDT = num_rx_desc - 1 means the whole ring is available.
   1424 		 */
   1425 		if (ifp->if_capenable & IFCAP_NETMAP) {
   1426 			struct netmap_adapter *na = NA(adapter->ifp);
   1427 			struct netmap_kring *kring = &na->rx_rings[i];
   1428 			int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring);
   1429 
   1430 			IXGBE_WRITE_REG(hw, IXGBE_RDT(i), t);
   1431 		} else
   1432 #endif /* DEV_NETMAP */
   1433 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i), adapter->num_rx_desc - 1);
   1434 	}
   1435 
   1436 	/* Enable Receive engine */
   1437 	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
   1438 	if (hw->mac.type == ixgbe_mac_82598EB)
   1439 		rxctrl |= IXGBE_RXCTRL_DMBYPS;
   1440 	rxctrl |= IXGBE_RXCTRL_RXEN;
   1441 	ixgbe_enable_rx_dma(hw, rxctrl);
   1442 
   1443 	callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
   1444 
   1445 	/* Set up MSI/X routing */
   1446 	if (ixgbe_enable_msix)  {
   1447 		ixgbe_configure_ivars(adapter);
   1448 		/* Set up auto-mask */
   1449 		if (hw->mac.type == ixgbe_mac_82598EB)
   1450 			IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
   1451 		else {
   1452 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
   1453 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
   1454 		}
   1455 	} else {  /* Simple settings for Legacy/MSI */
   1456                 ixgbe_set_ivar(adapter, 0, 0, 0);
   1457                 ixgbe_set_ivar(adapter, 0, 0, 1);
   1458 		IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
   1459 	}
   1460 
   1461 #ifdef IXGBE_FDIR
   1462 	/* Init Flow director */
   1463 	if (hw->mac.type != ixgbe_mac_82598EB) {
   1464 		u32 hdrm = 32 << fdir_pballoc;
   1465 
   1466 		hw->mac.ops.setup_rxpba(hw, 0, hdrm, PBA_STRATEGY_EQUAL);
   1467 		ixgbe_init_fdir_signature_82599(&adapter->hw, fdir_pballoc);
   1468 	}
   1469 #endif
   1470 
   1471 	/*
   1472 	** Check on any SFP devices that
   1473 	** need to be kick-started
   1474 	*/
   1475 	if (hw->phy.type == ixgbe_phy_none) {
   1476 		int err = hw->phy.ops.identify(hw);
   1477 		if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
   1478                 	device_printf(dev,
   1479 			    "Unsupported SFP+ module type was detected.\n");
   1480 			return;
   1481         	}
   1482 	}
   1483 
   1484 	/* Set moderation on the Link interrupt */
   1485 	IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->linkvec), IXGBE_LINK_ITR);
   1486 
   1487 	/* Config/Enable Link */
   1488 	ixgbe_config_link(adapter);
   1489 
   1490 	/* Hardware Packet Buffer & Flow Control setup */
   1491 	{
   1492 		u32 rxpb, frame, size, tmp;
   1493 
   1494 		frame = adapter->max_frame_size;
   1495 
   1496 		/* Calculate High Water */
   1497 		if (hw->mac.type == ixgbe_mac_X540)
   1498 			tmp = IXGBE_DV_X540(frame, frame);
   1499 		else
   1500 			tmp = IXGBE_DV(frame, frame);
   1501 		size = IXGBE_BT2KB(tmp);
   1502 		rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
   1503 		hw->fc.high_water[0] = rxpb - size;
   1504 
   1505 		/* Now calculate Low Water */
   1506 		if (hw->mac.type == ixgbe_mac_X540)
   1507 			tmp = IXGBE_LOW_DV_X540(frame);
   1508 		else
   1509 			tmp = IXGBE_LOW_DV(frame);
   1510 		hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
   1511 
   1512 		hw->fc.requested_mode = adapter->fc;
   1513 		hw->fc.pause_time = IXGBE_FC_PAUSE;
   1514 		hw->fc.send_xon = TRUE;
   1515 	}
   1516 	/* Initialize the FC settings */
   1517 	ixgbe_start_hw(hw);
   1518 
   1519 	/* Set up VLAN support and filter */
   1520 	ixgbe_setup_vlan_hw_support(adapter);
   1521 
   1522 	/* And now turn on interrupts */
   1523 	ixgbe_enable_intr(adapter);
   1524 
   1525 	/* Now inform the stack we're ready */
   1526 	ifp->if_flags |= IFF_RUNNING;
   1527 
   1528 	return;
   1529 }
   1530 
   1531 static int
   1532 ixgbe_init(struct ifnet *ifp)
   1533 {
   1534 	struct adapter *adapter = ifp->if_softc;
   1535 
   1536 	IXGBE_CORE_LOCK(adapter);
   1537 	ixgbe_init_locked(adapter);
   1538 	IXGBE_CORE_UNLOCK(adapter);
   1539 	return 0;	/* XXX ixgbe_init_locked cannot fail?  really? */
   1540 }
   1541 
   1542 
   1543 /*
   1544 **
   1545 ** MSIX Interrupt Handlers and Tasklets
   1546 **
   1547 */
   1548 
   1549 static inline void
   1550 ixgbe_enable_queue(struct adapter *adapter, u32 vector)
   1551 {
   1552 	struct ixgbe_hw *hw = &adapter->hw;
   1553 	u64	queue = (u64)(1ULL << vector);
   1554 	u32	mask;
   1555 
   1556 	if (hw->mac.type == ixgbe_mac_82598EB) {
   1557                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
   1558                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
   1559 	} else {
   1560                 mask = (queue & 0xFFFFFFFF);
   1561                 if (mask)
   1562                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask);
   1563                 mask = (queue >> 32);
   1564                 if (mask)
   1565                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask);
   1566 	}
   1567 }
   1568 
   1569 __unused static inline void
   1570 ixgbe_disable_queue(struct adapter *adapter, u32 vector)
   1571 {
   1572 	struct ixgbe_hw *hw = &adapter->hw;
   1573 	u64	queue = (u64)(1ULL << vector);
   1574 	u32	mask;
   1575 
   1576 	if (hw->mac.type == ixgbe_mac_82598EB) {
   1577                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
   1578                 IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask);
   1579 	} else {
   1580                 mask = (queue & 0xFFFFFFFF);
   1581                 if (mask)
   1582                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask);
   1583                 mask = (queue >> 32);
   1584                 if (mask)
   1585                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask);
   1586 	}
   1587 }
   1588 
   1589 static void
   1590 ixgbe_handle_que(void *context)
   1591 {
   1592 	struct ix_queue *que = context;
   1593 	struct adapter  *adapter = que->adapter;
   1594 	struct tx_ring  *txr = que->txr;
   1595 	struct ifnet    *ifp = adapter->ifp;
   1596 
   1597 	adapter->handleq.ev_count++;
   1598 
   1599 	if (ifp->if_flags & IFF_RUNNING) {
   1600 		ixgbe_rxeof(que);
   1601 		IXGBE_TX_LOCK(txr);
   1602 		ixgbe_txeof(txr);
   1603 #ifndef IXGBE_LEGACY_TX
   1604 		if (!drbr_empty(ifp, txr->br))
   1605 			ixgbe_mq_start_locked(ifp, txr);
   1606 #else
   1607 		if (!IFQ_IS_EMPTY(&ifp->if_snd))
   1608 			ixgbe_start_locked(txr, ifp);
   1609 #endif
   1610 		IXGBE_TX_UNLOCK(txr);
   1611 	}
   1612 
   1613 	/* Reenable this interrupt */
   1614 	if (que->res != NULL)
   1615 		ixgbe_enable_queue(adapter, que->msix);
   1616 	else
   1617 		ixgbe_enable_intr(adapter);
   1618 	return;
   1619 }
   1620 
   1621 
   1622 /*********************************************************************
   1623  *
   1624  *  Legacy Interrupt Service routine
   1625  *
   1626  **********************************************************************/
   1627 
   1628 static int
   1629 ixgbe_legacy_irq(void *arg)
   1630 {
   1631 	struct ix_queue *que = arg;
   1632 	struct adapter	*adapter = que->adapter;
   1633 	struct ixgbe_hw	*hw = &adapter->hw;
   1634 	struct ifnet   *ifp = adapter->ifp;
   1635 	struct 		tx_ring *txr = adapter->tx_rings;
   1636 	bool		more = false;
   1637 	u32       	reg_eicr;
   1638 
   1639 	reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
   1640 
   1641 	adapter->stats.legint.ev_count++;
   1642 	++que->irqs;
   1643 	if (reg_eicr == 0) {
   1644 		adapter->stats.intzero.ev_count++;
   1645 		if ((ifp->if_flags & IFF_UP) != 0)
   1646 			ixgbe_enable_intr(adapter);
   1647 		return 0;
   1648 	}
   1649 
   1650 	if ((ifp->if_flags & IFF_RUNNING) != 0) {
   1651 		more = ixgbe_rxeof(que);
   1652 
   1653 		IXGBE_TX_LOCK(txr);
   1654 		ixgbe_txeof(txr);
   1655 #ifdef IXGBE_LEGACY_TX
   1656 		if (!IFQ_IS_EMPTY(&ifp->if_snd))
   1657 			ixgbe_start_locked(txr, ifp);
   1658 #else
   1659 		if (!drbr_empty(ifp, txr->br))
   1660 			ixgbe_mq_start_locked(ifp, txr);
   1661 #endif
   1662 		IXGBE_TX_UNLOCK(txr);
   1663 	}
   1664 
   1665 	/* Check for fan failure */
   1666 	if ((hw->phy.media_type == ixgbe_media_type_copper) &&
   1667 	    (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
   1668                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
   1669 		    "REPLACE IMMEDIATELY!!\n");
   1670 		IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1);
   1671 	}
   1672 
   1673 	/* Link status change */
   1674 	if (reg_eicr & IXGBE_EICR_LSC)
   1675 		softint_schedule(adapter->link_si);
   1676 
   1677 	if (more)
   1678 #ifndef IXGBE_LEGACY_TX
   1679 		softint_schedule(txr->txq_si);
   1680 #else
   1681 		softint_schedule(que->que_si);
   1682 #endif
   1683 	else
   1684 		ixgbe_enable_intr(adapter);
   1685 	return 1;
   1686 }
   1687 
   1688 
   1689 #if defined(NETBSD_MSI_OR_MSIX)
   1690 /*********************************************************************
   1691  *
   1692  *  MSIX Queue Interrupt Service routine
   1693  *
   1694  **********************************************************************/
   1695 void
   1696 ixgbe_msix_que(void *arg)
   1697 {
   1698 	struct ix_queue	*que = arg;
   1699 	struct adapter  *adapter = que->adapter;
   1700 	struct ifnet    *ifp = adapter->ifp;
   1701 	struct tx_ring	*txr = que->txr;
   1702 	struct rx_ring	*rxr = que->rxr;
   1703 	bool		more;
   1704 	u32		newitr = 0;
   1705 
   1706 	/* Protect against spurious interrupts */
   1707 	if ((ifp->if_flags & IFF_RUNNING) == 0)
   1708 		return;
   1709 
   1710 	ixgbe_disable_queue(adapter, que->msix);
   1711 	++que->irqs;
   1712 
   1713 	more = ixgbe_rxeof(que);
   1714 
   1715 	IXGBE_TX_LOCK(txr);
   1716 	ixgbe_txeof(txr);
   1717 #ifdef IXGBE_LEGACY_TX
   1718 	if (!IFQ_IS_EMPTY(&adapter->ifp->if_snd))
   1719 #else
   1720 	if (!drbr_empty(ifp, txr->br))
   1721 		ixgbe_mq_start_locked(ifp, txr);
   1722 #endif
   1723 	IXGBE_TX_UNLOCK(txr);
   1724 
   1725 	/* Do AIM now? */
   1726 
   1727 	if (ixgbe_enable_aim == FALSE)
   1728 		goto no_calc;
   1729 	/*
   1730 	** Do Adaptive Interrupt Moderation:
   1731         **  - Write out last calculated setting
   1732 	**  - Calculate based on average size over
   1733 	**    the last interval.
   1734 	*/
   1735         if (que->eitr_setting)
   1736                 IXGBE_WRITE_REG(&adapter->hw,
   1737                     IXGBE_EITR(que->msix), que->eitr_setting);
   1738 
   1739         que->eitr_setting = 0;
   1740 
   1741         /* Idle, do nothing */
   1742         if ((txr->bytes == 0) && (rxr->bytes == 0))
   1743                 goto no_calc;
   1744 
   1745 	if ((txr->bytes) && (txr->packets))
   1746                	newitr = txr->bytes/txr->packets;
   1747 	if ((rxr->bytes) && (rxr->packets))
   1748 		newitr = max(newitr,
   1749 		    (rxr->bytes / rxr->packets));
   1750 	newitr += 24; /* account for hardware frame, crc */
   1751 
   1752 	/* set an upper boundary */
   1753 	newitr = min(newitr, 3000);
   1754 
   1755 	/* Be nice to the mid range */
   1756 	if ((newitr > 300) && (newitr < 1200))
   1757 		newitr = (newitr / 3);
   1758 	else
   1759 		newitr = (newitr / 2);
   1760 
   1761         if (adapter->hw.mac.type == ixgbe_mac_82598EB)
   1762                 newitr |= newitr << 16;
   1763         else
   1764                 newitr |= IXGBE_EITR_CNT_WDIS;
   1765 
   1766         /* save for next interrupt */
   1767         que->eitr_setting = newitr;
   1768 
   1769         /* Reset state */
   1770         txr->bytes = 0;
   1771         txr->packets = 0;
   1772         rxr->bytes = 0;
   1773         rxr->packets = 0;
   1774 
   1775 no_calc:
   1776 	if (more)
   1777 		softint_schedule(que->que_si);
   1778 	else
   1779 		ixgbe_enable_queue(adapter, que->msix);
   1780 	return;
   1781 }
   1782 
   1783 
   1784 static void
   1785 ixgbe_msix_link(void *arg)
   1786 {
   1787 	struct adapter	*adapter = arg;
   1788 	struct ixgbe_hw *hw = &adapter->hw;
   1789 	u32		reg_eicr;
   1790 
   1791 	++adapter->link_irq.ev_count;
   1792 
   1793 	/* First get the cause */
   1794 	reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICS);
   1795 	/* Be sure the queue bits are not cleared */
   1796 	reg_eicr &= ~IXGBE_EICR_RTX_QUEUE;
   1797 	/* Clear interrupt with write */
   1798 	IXGBE_WRITE_REG(hw, IXGBE_EICR, reg_eicr);
   1799 
   1800 	/* Link status change */
   1801 	if (reg_eicr & IXGBE_EICR_LSC)
   1802 		softint_schedule(adapter->link_si);
   1803 
   1804 	if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
   1805 #ifdef IXGBE_FDIR
   1806 		if (reg_eicr & IXGBE_EICR_FLOW_DIR) {
   1807 			/* This is probably overkill :) */
   1808 			if (!atomic_cmpset_int(&adapter->fdir_reinit, 0, 1))
   1809 				return;
   1810                 	/* Disable the interrupt */
   1811 			IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FLOW_DIR);
   1812 			softint_schedule(adapter->fdir_si);
   1813 		} else
   1814 #endif
   1815 		if (reg_eicr & IXGBE_EICR_ECC) {
   1816                 	device_printf(adapter->dev, "\nCRITICAL: ECC ERROR!! "
   1817 			    "Please Reboot!!\n");
   1818 			IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
   1819 		} else
   1820 
   1821 		if (reg_eicr & IXGBE_EICR_GPI_SDP1) {
   1822                 	/* Clear the interrupt */
   1823                 	IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
   1824 			softint_schedule(adapter->msf_si);
   1825         	} else if (reg_eicr & IXGBE_EICR_GPI_SDP2) {
   1826                 	/* Clear the interrupt */
   1827                 	IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP2);
   1828 			softint_schedule(adapter->mod_si);
   1829 		}
   1830         }
   1831 
   1832 	/* Check for fan failure */
   1833 	if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
   1834 	    (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
   1835                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
   1836 		    "REPLACE IMMEDIATELY!!\n");
   1837 		IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
   1838 	}
   1839 
   1840 	/* Check for over temp condition */
   1841 	if ((hw->mac.type == ixgbe_mac_X540) &&
   1842 	    (reg_eicr & IXGBE_EICR_TS)) {
   1843 		device_printf(adapter->dev, "\nCRITICAL: OVER TEMP!! "
   1844 		    "PHY IS SHUT DOWN!!\n");
   1845 		device_printf(adapter->dev, "System shutdown required\n");
   1846 		IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_TS);
   1847 	}
   1848 
   1849 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_OTHER);
   1850 	return;
   1851 }
   1852 #endif
   1853 
   1854 /*********************************************************************
   1855  *
   1856  *  Media Ioctl callback
   1857  *
   1858  *  This routine is called whenever the user queries the status of
   1859  *  the interface using ifconfig.
   1860  *
   1861  **********************************************************************/
   1862 static void
   1863 ixgbe_media_status(struct ifnet * ifp, struct ifmediareq * ifmr)
   1864 {
   1865 	struct adapter *adapter = ifp->if_softc;
   1866 	struct ixgbe_hw *hw = &adapter->hw;
   1867 
   1868 	INIT_DEBUGOUT("ixgbe_media_status: begin");
   1869 	IXGBE_CORE_LOCK(adapter);
   1870 	ixgbe_update_link_status(adapter);
   1871 
   1872 	ifmr->ifm_status = IFM_AVALID;
   1873 	ifmr->ifm_active = IFM_ETHER;
   1874 
   1875 	if (!adapter->link_active) {
   1876 		IXGBE_CORE_UNLOCK(adapter);
   1877 		return;
   1878 	}
   1879 
   1880 	ifmr->ifm_status |= IFM_ACTIVE;
   1881 
   1882 	/*
   1883 	 * Not all NIC are 1000baseSX as an example X540T.
   1884 	 * We must set properly the media based on NIC model.
   1885 	 */
   1886 	switch (hw->device_id) {
   1887 	case IXGBE_DEV_ID_X540T:
   1888 		if (adapter->link_speed == IXGBE_LINK_SPEED_100_FULL)
   1889 			ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
   1890 		else if (adapter->link_speed == IXGBE_LINK_SPEED_1GB_FULL)
   1891 			ifmr->ifm_active |= IFM_1000_T | IFM_FDX;
   1892 		else if (adapter->link_speed == IXGBE_LINK_SPEED_10GB_FULL)
   1893 			ifmr->ifm_active |= adapter->optics | IFM_FDX;
   1894 		break;
   1895 	default:
   1896 		if (adapter->link_speed == IXGBE_LINK_SPEED_100_FULL)
   1897 			ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
   1898 		else if (adapter->link_speed == IXGBE_LINK_SPEED_1GB_FULL)
   1899 			ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
   1900 		else if (adapter->link_speed == IXGBE_LINK_SPEED_10GB_FULL)
   1901 			ifmr->ifm_active |= adapter->optics | IFM_FDX;
   1902 		break;
   1903 	}
   1904 
   1905 	IXGBE_CORE_UNLOCK(adapter);
   1906 
   1907 	return;
   1908 }
   1909 
   1910 /*********************************************************************
   1911  *
   1912  *  Media Ioctl callback
   1913  *
   1914  *  This routine is called when the user changes speed/duplex using
   1915  *  media/mediopt option with ifconfig.
   1916  *
   1917  **********************************************************************/
   1918 static int
   1919 ixgbe_media_change(struct ifnet * ifp)
   1920 {
   1921 	struct adapter *adapter = ifp->if_softc;
   1922 	struct ifmedia *ifm = &adapter->media;
   1923 
   1924 	INIT_DEBUGOUT("ixgbe_media_change: begin");
   1925 
   1926 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
   1927 		return (EINVAL);
   1928 
   1929         switch (IFM_SUBTYPE(ifm->ifm_media)) {
   1930 	case IFM_10G_T:
   1931 	case IFM_AUTO:
   1932 		adapter->hw.phy.autoneg_advertised =
   1933 		    IXGBE_LINK_SPEED_100_FULL |
   1934 		    IXGBE_LINK_SPEED_1GB_FULL |
   1935 		    IXGBE_LINK_SPEED_10GB_FULL;
   1936                 break;
   1937         default:
   1938                 device_printf(adapter->dev, "Only auto media type\n");
   1939 		return (EINVAL);
   1940         }
   1941 
   1942 	return (0);
   1943 }
   1944 
   1945 /*********************************************************************
   1946  *
   1947  *  This routine maps the mbufs to tx descriptors, allowing the
   1948  *  TX engine to transmit the packets.
   1949  *  	- return 0 on success, positive on failure
   1950  *
   1951  **********************************************************************/
   1952 
   1953 static int
   1954 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
   1955 {
   1956 	struct m_tag *mtag;
   1957 	struct adapter  *adapter = txr->adapter;
   1958 	struct ethercom *ec = &adapter->osdep.ec;
   1959 	u32		olinfo_status = 0, cmd_type_len;
   1960 	int             i, j, error;
   1961 	int		first;
   1962 	bus_dmamap_t	map;
   1963 	struct ixgbe_tx_buf *txbuf;
   1964 	union ixgbe_adv_tx_desc *txd = NULL;
   1965 
   1966 	/* Basic descriptor defines */
   1967         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
   1968 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
   1969 
   1970 	if ((mtag = VLAN_OUTPUT_TAG(ec, m_head)) != NULL)
   1971         	cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
   1972 
   1973         /*
   1974          * Important to capture the first descriptor
   1975          * used because it will contain the index of
   1976          * the one we tell the hardware to report back
   1977          */
   1978         first = txr->next_avail_desc;
   1979 	txbuf = &txr->tx_buffers[first];
   1980 	map = txbuf->map;
   1981 
   1982 	/*
   1983 	 * Map the packet for DMA.
   1984 	 */
   1985 	error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map,
   1986 	    m_head, BUS_DMA_NOWAIT);
   1987 
   1988 	if (__predict_false(error)) {
   1989 
   1990 		switch (error) {
   1991 		case EAGAIN:
   1992 			adapter->eagain_tx_dma_setup.ev_count++;
   1993 			return EAGAIN;
   1994 		case ENOMEM:
   1995 			adapter->enomem_tx_dma_setup.ev_count++;
   1996 			return EAGAIN;
   1997 		case EFBIG:
   1998 			/*
   1999 			 * XXX Try it again?
   2000 			 * do m_defrag() and retry bus_dmamap_load_mbuf().
   2001 			 */
   2002 			adapter->efbig_tx_dma_setup.ev_count++;
   2003 			return error;
   2004 		case EINVAL:
   2005 			adapter->einval_tx_dma_setup.ev_count++;
   2006 			return error;
   2007 		default:
   2008 			adapter->other_tx_dma_setup.ev_count++;
   2009 			return error;
   2010 		}
   2011 	}
   2012 
   2013 	/* Make certain there are enough descriptors */
   2014 	if (map->dm_nsegs > txr->tx_avail - 2) {
   2015 		txr->no_desc_avail.ev_count++;
   2016 		ixgbe_dmamap_unload(txr->txtag, txbuf->map);
   2017 		return EAGAIN;
   2018 	}
   2019 
   2020 	/*
   2021 	** Set up the appropriate offload context
   2022 	** this will consume the first descriptor
   2023 	*/
   2024 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
   2025 	if (__predict_false(error)) {
   2026 		return (error);
   2027 	}
   2028 
   2029 #ifdef IXGBE_FDIR
   2030 	/* Do the flow director magic */
   2031 	if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
   2032 		++txr->atr_count;
   2033 		if (txr->atr_count >= atr_sample_rate) {
   2034 			ixgbe_atr(txr, m_head);
   2035 			txr->atr_count = 0;
   2036 		}
   2037 	}
   2038 #endif
   2039 
   2040 	i = txr->next_avail_desc;
   2041 	for (j = 0; j < map->dm_nsegs; j++) {
   2042 		bus_size_t seglen;
   2043 		bus_addr_t segaddr;
   2044 
   2045 		txbuf = &txr->tx_buffers[i];
   2046 		txd = &txr->tx_base[i];
   2047 		seglen = map->dm_segs[j].ds_len;
   2048 		segaddr = htole64(map->dm_segs[j].ds_addr);
   2049 
   2050 		txd->read.buffer_addr = segaddr;
   2051 		txd->read.cmd_type_len = htole32(txr->txd_cmd |
   2052 		    cmd_type_len |seglen);
   2053 		txd->read.olinfo_status = htole32(olinfo_status);
   2054 
   2055 		if (++i == txr->num_desc)
   2056 			i = 0;
   2057 	}
   2058 
   2059 	txd->read.cmd_type_len |=
   2060 	    htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
   2061 	txr->tx_avail -= map->dm_nsegs;
   2062 	txr->next_avail_desc = i;
   2063 
   2064 	txbuf->m_head = m_head;
   2065 	/*
   2066 	** Here we swap the map so the last descriptor,
   2067 	** which gets the completion interrupt has the
   2068 	** real map, and the first descriptor gets the
   2069 	** unused map from this descriptor.
   2070 	*/
   2071 	txr->tx_buffers[first].map = txbuf->map;
   2072 	txbuf->map = map;
   2073 	bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
   2074 	    BUS_DMASYNC_PREWRITE);
   2075 
   2076         /* Set the EOP descriptor that will be marked done */
   2077         txbuf = &txr->tx_buffers[first];
   2078 	txbuf->eop = txd;
   2079 
   2080         ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   2081 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   2082 	/*
   2083 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
   2084 	 * hardware that this frame is available to transmit.
   2085 	 */
   2086 	++txr->total_packets.ev_count;
   2087 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), i);
   2088 
   2089 	return 0;
   2090 }
   2091 
   2092 static void
   2093 ixgbe_set_promisc(struct adapter *adapter)
   2094 {
   2095 	struct ether_multi *enm;
   2096 	struct ether_multistep step;
   2097 	u_int32_t       reg_rctl;
   2098 	struct ethercom *ec = &adapter->osdep.ec;
   2099 	struct ifnet   *ifp = adapter->ifp;
   2100 	int		mcnt = 0;
   2101 
   2102 	reg_rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
   2103 	reg_rctl &= (~IXGBE_FCTRL_UPE);
   2104 	if (ifp->if_flags & IFF_ALLMULTI)
   2105 		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
   2106 	else {
   2107 		ETHER_FIRST_MULTI(step, ec, enm);
   2108 		while (enm != NULL) {
   2109 			if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
   2110 				break;
   2111 			mcnt++;
   2112 			ETHER_NEXT_MULTI(step, enm);
   2113 		}
   2114 	}
   2115 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
   2116 		reg_rctl &= (~IXGBE_FCTRL_MPE);
   2117 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
   2118 
   2119 	if (ifp->if_flags & IFF_PROMISC) {
   2120 		reg_rctl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
   2121 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
   2122 	} else if (ifp->if_flags & IFF_ALLMULTI) {
   2123 		reg_rctl |= IXGBE_FCTRL_MPE;
   2124 		reg_rctl &= ~IXGBE_FCTRL_UPE;
   2125 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
   2126 	}
   2127 	return;
   2128 }
   2129 
   2130 
   2131 /*********************************************************************
   2132  *  Multicast Update
   2133  *
   2134  *  This routine is called whenever multicast address list is updated.
   2135  *
   2136  **********************************************************************/
   2137 #define IXGBE_RAR_ENTRIES 16
   2138 
   2139 static void
   2140 ixgbe_set_multi(struct adapter *adapter)
   2141 {
   2142 	struct ether_multi *enm;
   2143 	struct ether_multistep step;
   2144 	u32	fctrl;
   2145 	u8	*mta;
   2146 	u8	*update_ptr;
   2147 	int	mcnt = 0;
   2148 	struct ethercom *ec = &adapter->osdep.ec;
   2149 	struct ifnet   *ifp = adapter->ifp;
   2150 
   2151 	IOCTL_DEBUGOUT("ixgbe_set_multi: begin");
   2152 
   2153 	mta = adapter->mta;
   2154 	bzero(mta, sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
   2155 	    MAX_NUM_MULTICAST_ADDRESSES);
   2156 
   2157 	ifp->if_flags &= ~IFF_ALLMULTI;
   2158 	ETHER_FIRST_MULTI(step, ec, enm);
   2159 	while (enm != NULL) {
   2160 		if ((mcnt == MAX_NUM_MULTICAST_ADDRESSES) ||
   2161 		    (memcmp(enm->enm_addrlo, enm->enm_addrhi,
   2162 			ETHER_ADDR_LEN) != 0)) {
   2163 			ifp->if_flags |= IFF_ALLMULTI;
   2164 			break;
   2165 		}
   2166 		bcopy(enm->enm_addrlo,
   2167 		    &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
   2168 		    IXGBE_ETH_LENGTH_OF_ADDRESS);
   2169 		mcnt++;
   2170 		ETHER_NEXT_MULTI(step, enm);
   2171 	}
   2172 
   2173 	fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
   2174 	fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
   2175 	if (ifp->if_flags & IFF_PROMISC)
   2176 		fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
   2177 	else if (ifp->if_flags & IFF_ALLMULTI) {
   2178 		fctrl |= IXGBE_FCTRL_MPE;
   2179 	}
   2180 
   2181 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl);
   2182 
   2183 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) {
   2184 		update_ptr = mta;
   2185 		ixgbe_update_mc_addr_list(&adapter->hw,
   2186 		    update_ptr, mcnt, ixgbe_mc_array_itr, TRUE);
   2187 	}
   2188 
   2189 	return;
   2190 }
   2191 
   2192 /*
   2193  * This is an iterator function now needed by the multicast
   2194  * shared code. It simply feeds the shared code routine the
   2195  * addresses in the array of ixgbe_set_multi() one by one.
   2196  */
   2197 static u8 *
   2198 ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq)
   2199 {
   2200 	u8 *addr = *update_ptr;
   2201 	u8 *newptr;
   2202 	*vmdq = 0;
   2203 
   2204 	newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
   2205 	*update_ptr = newptr;
   2206 	return addr;
   2207 }
   2208 
   2209 
   2210 /*********************************************************************
   2211  *  Timer routine
   2212  *
   2213  *  This routine checks for link status,updates statistics,
   2214  *  and runs the watchdog check.
   2215  *
   2216  **********************************************************************/
   2217 
   2218 static void
   2219 ixgbe_local_timer1(void *arg)
   2220 {
   2221 	struct adapter	*adapter = arg;
   2222 	device_t	dev = adapter->dev;
   2223 	struct ix_queue *que = adapter->queues;
   2224 	struct tx_ring	*txr = adapter->tx_rings;
   2225 	int		hung = 0, paused = 0;
   2226 
   2227 	KASSERT(mutex_owned(&adapter->core_mtx));
   2228 
   2229 	/* Check for pluggable optics */
   2230 	if (adapter->sfp_probe)
   2231 		if (!ixgbe_sfp_probe(adapter))
   2232 			goto out; /* Nothing to do */
   2233 
   2234 	ixgbe_update_link_status(adapter);
   2235 	ixgbe_update_stats_counters(adapter);
   2236 
   2237 	/*
   2238 	 * If the interface has been paused
   2239 	 * then don't do the watchdog check
   2240 	 */
   2241 	if (IXGBE_READ_REG(&adapter->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF)
   2242 		paused = 1;
   2243 
   2244 	/*
   2245 	** Check the TX queues status
   2246 	**      - watchdog only if all queues show hung
   2247 	*/
   2248 	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
   2249 		if ((txr->queue_status == IXGBE_QUEUE_HUNG) &&
   2250 		    (paused == 0))
   2251 			++hung;
   2252 		else if (txr->queue_status == IXGBE_QUEUE_WORKING)
   2253 #ifndef IXGBE_LEGACY_TX
   2254 			softint_schedule(txr->txq_si);
   2255 #else
   2256 			softint_schedule(que->que_si);
   2257 #endif
   2258 	}
   2259 	/* Only truely watchdog if all queues show hung */
   2260 	if (hung == adapter->num_queues)
   2261 		goto watchdog;
   2262 
   2263 out:
   2264 	callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
   2265 	return;
   2266 
   2267 watchdog:
   2268 	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
   2269 	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
   2270 	    IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(txr->me)),
   2271 	    IXGBE_READ_REG(&adapter->hw, IXGBE_TDT(txr->me)));
   2272 	device_printf(dev,"TX(%d) desc avail = %d,"
   2273 	    "Next TX to Clean = %d\n",
   2274 	    txr->me, txr->tx_avail, txr->next_to_clean);
   2275 	adapter->ifp->if_flags &= ~IFF_RUNNING;
   2276 	adapter->watchdog_events.ev_count++;
   2277 	ixgbe_init_locked(adapter);
   2278 }
   2279 
   2280 static void
   2281 ixgbe_local_timer(void *arg)
   2282 {
   2283 	struct adapter *adapter = arg;
   2284 
   2285 	IXGBE_CORE_LOCK(adapter);
   2286 	ixgbe_local_timer1(adapter);
   2287 	IXGBE_CORE_UNLOCK(adapter);
   2288 }
   2289 
   2290 /*
   2291 ** Note: this routine updates the OS on the link state
   2292 **	the real check of the hardware only happens with
   2293 **	a link interrupt.
   2294 */
   2295 static void
   2296 ixgbe_update_link_status(struct adapter *adapter)
   2297 {
   2298 	struct ifnet	*ifp = adapter->ifp;
   2299 	device_t dev = adapter->dev;
   2300 
   2301 
   2302 	if (adapter->link_up){
   2303 		if (adapter->link_active == FALSE) {
   2304 			if (bootverbose)
   2305 				device_printf(dev,"Link is up %d Gbps %s \n",
   2306 				    ((adapter->link_speed == 128)? 10:1),
   2307 				    "Full Duplex");
   2308 			adapter->link_active = TRUE;
   2309 			/* Update any Flow Control changes */
   2310 			ixgbe_fc_enable(&adapter->hw);
   2311 			if_link_state_change(ifp, LINK_STATE_UP);
   2312 		}
   2313 	} else { /* Link down */
   2314 		if (adapter->link_active == TRUE) {
   2315 			if (bootverbose)
   2316 				device_printf(dev,"Link is Down\n");
   2317 			if_link_state_change(ifp, LINK_STATE_DOWN);
   2318 			adapter->link_active = FALSE;
   2319 		}
   2320 	}
   2321 
   2322 	return;
   2323 }
   2324 
   2325 
   2326 static void
   2327 ixgbe_ifstop(struct ifnet *ifp, int disable)
   2328 {
   2329 	struct adapter *adapter = ifp->if_softc;
   2330 
   2331 	IXGBE_CORE_LOCK(adapter);
   2332 	ixgbe_stop(adapter);
   2333 	IXGBE_CORE_UNLOCK(adapter);
   2334 }
   2335 
   2336 /*********************************************************************
   2337  *
   2338  *  This routine disables all traffic on the adapter by issuing a
   2339  *  global reset on the MAC and deallocates TX/RX buffers.
   2340  *
   2341  **********************************************************************/
   2342 
   2343 static void
   2344 ixgbe_stop(void *arg)
   2345 {
   2346 	struct ifnet   *ifp;
   2347 	struct adapter *adapter = arg;
   2348 	struct ixgbe_hw *hw = &adapter->hw;
   2349 	ifp = adapter->ifp;
   2350 
   2351 	KASSERT(mutex_owned(&adapter->core_mtx));
   2352 
   2353 	INIT_DEBUGOUT("ixgbe_stop: begin\n");
   2354 	ixgbe_disable_intr(adapter);
   2355 	callout_stop(&adapter->timer);
   2356 
   2357 	/* Let the stack know...*/
   2358 	ifp->if_flags &= ~IFF_RUNNING;
   2359 
   2360 	ixgbe_reset_hw(hw);
   2361 	hw->adapter_stopped = FALSE;
   2362 	ixgbe_stop_adapter(hw);
   2363 	if (hw->mac.type == ixgbe_mac_82599EB)
   2364 		ixgbe_stop_mac_link_on_d3_82599(hw);
   2365 	/* Turn off the laser - noop with no optics */
   2366 	ixgbe_disable_tx_laser(hw);
   2367 
   2368 	/* Update the stack */
   2369 	adapter->link_up = FALSE;
   2370 	ixgbe_update_link_status(adapter);
   2371 
   2372 	/* reprogram the RAR[0] in case user changed it. */
   2373 	ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
   2374 
   2375 	return;
   2376 }
   2377 
   2378 
   2379 /*********************************************************************
   2380  *
   2381  *  Determine hardware revision.
   2382  *
   2383  **********************************************************************/
   2384 static void
   2385 ixgbe_identify_hardware(struct adapter *adapter)
   2386 {
   2387 	pcitag_t tag;
   2388 	pci_chipset_tag_t pc;
   2389 	pcireg_t subid, id;
   2390 	struct ixgbe_hw *hw = &adapter->hw;
   2391 
   2392 	pc = adapter->osdep.pc;
   2393 	tag = adapter->osdep.tag;
   2394 
   2395 	id = pci_conf_read(pc, tag, PCI_ID_REG);
   2396 	subid = pci_conf_read(pc, tag, PCI_SUBSYS_ID_REG);
   2397 
   2398 	/* Save off the information about this board */
   2399 	hw->vendor_id = PCI_VENDOR(id);
   2400 	hw->device_id = PCI_PRODUCT(id);
   2401 	hw->revision_id =
   2402 	    PCI_REVISION(pci_conf_read(pc, tag, PCI_CLASS_REG));
   2403 	hw->subsystem_vendor_id = PCI_SUBSYS_VENDOR(subid);
   2404 	hw->subsystem_device_id = PCI_SUBSYS_ID(subid);
   2405 
   2406 	/* We need this here to set the num_segs below */
   2407 	ixgbe_set_mac_type(hw);
   2408 
   2409 	/* Pick up the 82599 and VF settings */
   2410 	if (hw->mac.type != ixgbe_mac_82598EB) {
   2411 		hw->phy.smart_speed = ixgbe_smart_speed;
   2412 		adapter->num_segs = IXGBE_82599_SCATTER;
   2413 	} else
   2414 		adapter->num_segs = IXGBE_82598_SCATTER;
   2415 
   2416 	return;
   2417 }
   2418 
   2419 /*********************************************************************
   2420  *
   2421  *  Determine optic type
   2422  *
   2423  **********************************************************************/
   2424 static void
   2425 ixgbe_setup_optics(struct adapter *adapter)
   2426 {
   2427 	struct ixgbe_hw *hw = &adapter->hw;
   2428 	int		layer;
   2429 
   2430 	layer = ixgbe_get_supported_physical_layer(hw);
   2431 
   2432 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
   2433 		adapter->optics = IFM_10G_T;
   2434 		return;
   2435 	}
   2436 
   2437 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
   2438 		adapter->optics = IFM_1000_T;
   2439 		return;
   2440 	}
   2441 
   2442 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) {
   2443 		adapter->optics = IFM_1000_SX;
   2444 		return;
   2445 	}
   2446 
   2447 	if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR |
   2448 	    IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) {
   2449 		adapter->optics = IFM_10G_LR;
   2450 		return;
   2451 	}
   2452 
   2453 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
   2454 		adapter->optics = IFM_10G_SR;
   2455 		return;
   2456 	}
   2457 
   2458 	if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) {
   2459 		adapter->optics = IFM_10G_TWINAX;
   2460 		return;
   2461 	}
   2462 
   2463 	if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 |
   2464 	    IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) {
   2465 		adapter->optics = IFM_10G_CX4;
   2466 		return;
   2467 	}
   2468 
   2469 	/* If we get here just set the default */
   2470 	adapter->optics = IFM_ETHER | IFM_AUTO;
   2471 	return;
   2472 }
   2473 
   2474 /*********************************************************************
   2475  *
   2476  *  Setup the Legacy or MSI Interrupt handler
   2477  *
   2478  **********************************************************************/
   2479 static int
   2480 ixgbe_allocate_legacy(struct adapter *adapter, const struct pci_attach_args *pa)
   2481 {
   2482 	device_t	dev = adapter->dev;
   2483 	struct		ix_queue *que = adapter->queues;
   2484 #ifndef IXGBE_LEGACY_TX
   2485 	struct tx_ring		*txr = adapter->tx_rings;
   2486 #endif
   2487 	char intrbuf[PCI_INTRSTR_LEN];
   2488 #if 0
   2489 	int		rid = 0;
   2490 
   2491 	/* MSI RID at 1 */
   2492 	if (adapter->msix == 1)
   2493 		rid = 1;
   2494 #endif
   2495 
   2496 	/* We allocate a single interrupt resource */
   2497  	if (pci_intr_map(pa, &adapter->osdep.ih) != 0) {
   2498 		aprint_error_dev(dev, "unable to map interrupt\n");
   2499 		return ENXIO;
   2500 	} else {
   2501 		aprint_normal_dev(dev, "interrupting at %s\n",
   2502 		    pci_intr_string(adapter->osdep.pc, adapter->osdep.ih,
   2503 			intrbuf, sizeof(intrbuf)));
   2504 	}
   2505 
   2506 	/*
   2507 	 * Try allocating a fast interrupt and the associated deferred
   2508 	 * processing contexts.
   2509 	 */
   2510 #ifndef IXGBE_LEGACY_TX
   2511 	txr->txq_si = softint_establish(SOFTINT_NET, ixgbe_deferred_mq_start,
   2512 	    txr);
   2513 #endif
   2514 	que->que_si = softint_establish(SOFTINT_NET, ixgbe_handle_que, que);
   2515 
   2516 	/* Tasklets for Link, SFP and Multispeed Fiber */
   2517 	adapter->link_si =
   2518 	    softint_establish(SOFTINT_NET, ixgbe_handle_link, adapter);
   2519 	adapter->mod_si =
   2520 	    softint_establish(SOFTINT_NET, ixgbe_handle_mod, adapter);
   2521 	adapter->msf_si =
   2522 	    softint_establish(SOFTINT_NET, ixgbe_handle_msf, adapter);
   2523 
   2524 #ifdef IXGBE_FDIR
   2525 	adapter->fdir_si =
   2526 	    softint_establish(SOFTINT_NET, ixgbe_reinit_fdir, adapter);
   2527 #endif
   2528 	if (que->que_si == NULL ||
   2529 	    adapter->link_si == NULL ||
   2530 	    adapter->mod_si == NULL ||
   2531 #ifdef IXGBE_FDIR
   2532 	    adapter->fdir_si == NULL ||
   2533 #endif
   2534 	    adapter->msf_si == NULL) {
   2535 		aprint_error_dev(dev,
   2536 		    "could not establish software interrupts\n");
   2537 		return ENXIO;
   2538 	}
   2539 
   2540 	adapter->osdep.intr = pci_intr_establish(adapter->osdep.pc,
   2541 	    adapter->osdep.ih, IPL_NET, ixgbe_legacy_irq, que);
   2542 	if (adapter->osdep.intr == NULL) {
   2543 		aprint_error_dev(dev, "failed to register interrupt handler\n");
   2544 		softint_disestablish(que->que_si);
   2545 		softint_disestablish(adapter->link_si);
   2546 		softint_disestablish(adapter->mod_si);
   2547 		softint_disestablish(adapter->msf_si);
   2548 #ifdef IXGBE_FDIR
   2549 		softint_disestablish(adapter->fdir_si);
   2550 #endif
   2551 		return ENXIO;
   2552 	}
   2553 	/* For simplicity in the handlers */
   2554 	adapter->que_mask = IXGBE_EIMS_ENABLE_MASK;
   2555 
   2556 	return (0);
   2557 }
   2558 
   2559 
   2560 /*********************************************************************
   2561  *
   2562  *  Setup MSIX Interrupt resources and handlers
   2563  *
   2564  **********************************************************************/
   2565 static int
   2566 ixgbe_allocate_msix(struct adapter *adapter, const struct pci_attach_args *pa)
   2567 {
   2568 #if !defined(NETBSD_MSI_OR_MSIX)
   2569 	return 0;
   2570 #else
   2571 	device_t        dev = adapter->dev;
   2572 	struct 		ix_queue *que = adapter->queues;
   2573 	struct  	tx_ring *txr = adapter->tx_rings;
   2574 	int 		error, rid, vector = 0;
   2575 	int		cpu_id = 0;
   2576 #ifdef	RSS
   2577 	cpuset_t cpu_mask;
   2578 #endif
   2579 
   2580 #ifdef	RSS
   2581 	/*
   2582 	 * If we're doing RSS, the number of queues needs to
   2583 	 * match the number of RSS buckets that are configured.
   2584 	 *
   2585 	 * + If there's more queues than RSS buckets, we'll end
   2586 	 *   up with queues that get no traffic.
   2587 	 *
   2588 	 * + If there's more RSS buckets than queues, we'll end
   2589 	 *   up having multiple RSS buckets map to the same queue,
   2590 	 *   so there'll be some contention.
   2591 	 */
   2592 	if (adapter->num_queues != rss_getnumbuckets()) {
   2593 		device_printf(dev,
   2594 		    "%s: number of queues (%d) != number of RSS buckets (%d)"
   2595 		    "; performance will be impacted.\n",
   2596 		    __func__,
   2597 		    adapter->num_queues,
   2598 		    rss_getnumbuckets());
   2599 	}
   2600 #endif
   2601 
   2602 	for (int i = 0; i < adapter->num_queues; i++, vector++, que++, txr++) {
   2603 		rid = vector + 1;
   2604 		que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
   2605 		    RF_SHAREABLE | RF_ACTIVE);
   2606 		if (que->res == NULL) {
   2607 			aprint_error_dev(dev,"Unable to allocate"
   2608 		    	    " bus resource: que interrupt [%d]\n", vector);
   2609 			return (ENXIO);
   2610 		}
   2611 		/* Set the handler function */
   2612 		error = bus_setup_intr(dev, que->res,
   2613 		    INTR_TYPE_NET | INTR_MPSAFE, NULL,
   2614 		    ixgbe_msix_que, que, &que->tag);
   2615 		if (error) {
   2616 			que->res = NULL;
   2617 			aprint_error_dev(dev,
   2618 			    "Failed to register QUE handler\n");
   2619 			return error;
   2620 		}
   2621 #if __FreeBSD_version >= 800504
   2622 		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
   2623 #endif
   2624 		que->msix = vector;
   2625         	adapter->que_mask |= (u64)(1 << que->msix);
   2626 #ifdef	RSS
   2627 		 * The queue ID is used as the RSS layer bucket ID.
   2628 		 * We look up the queue ID -> RSS CPU ID and select
   2629 		 * that.
   2630 		 */
   2631 		cpu_id = rss_getcpu(i % rss_getnumbuckets());
   2632 #else
   2633 		/*
   2634 		 * Bind the msix vector, and thus the
   2635 		 * ring to the corresponding cpu.
   2636 		 *
   2637 		 * This just happens to match the default RSS round-robin
   2638 		 * bucket -> queue -> CPU allocation.
   2639 		 */
   2640 		if (adapter->num_queues > 1)
   2641 			cpu_id = i;
   2642 #endif
   2643 		if (adapter->num_queues > 1)
   2644 			bus_bind_intr(dev, que->res, cpu_id);
   2645 
   2646 #ifdef	RSS
   2647 		device_printf(dev,
   2648 		    "Bound RSS bucket %d to CPU %d\n",
   2649 		    i, cpu_id);
   2650 #else
   2651 		device_printf(dev,
   2652 		    "Bound queue %d to cpu %d\n",
   2653 		    i, cpu_id);
   2654 #endif
   2655 
   2656 #ifndef IXGBE_LEGACY_TX
   2657 		txr->txq_si = softint_establish(SOFTINT_NET,
   2658 		    ixgbe_deferred_mq_start, txr);
   2659 #endif
   2660 		que->que_si = softint_establish(SOFTINT_NET, ixgbe_handle_que,
   2661 		    que);
   2662 		if (que->que_si == NULL) {
   2663 			aprint_error_dev(dev,
   2664 			    "could not establish software interrupt\n");
   2665 		}
   2666 	}
   2667 
   2668 	/* and Link */
   2669 	rid = vector + 1;
   2670 	adapter->res = bus_alloc_resource_any(dev,
   2671     	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
   2672 	if (!adapter->res) {
   2673 		aprint_error_dev(dev,"Unable to allocate bus resource: "
   2674 		    "Link interrupt [%d]\n", rid);
   2675 		return (ENXIO);
   2676 	}
   2677 	/* Set the link handler function */
   2678 	error = bus_setup_intr(dev, adapter->res,
   2679 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
   2680 	    ixgbe_msix_link, adapter, &adapter->tag);
   2681 	if (error) {
   2682 		adapter->res = NULL;
   2683 		aprint_error_dev(dev, "Failed to register LINK handler\n");
   2684 		return (error);
   2685 	}
   2686 #if __FreeBSD_version >= 800504
   2687 	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
   2688 #endif
   2689 	adapter->linkvec = vector;
   2690 	/* Tasklets for Link, SFP and Multispeed Fiber */
   2691 	adapter->link_si =
   2692 	    softint_establish(SOFTINT_NET, ixgbe_handle_link, adapter);
   2693 	adapter->mod_si =
   2694 	    softint_establish(SOFTINT_NET, ixgbe_handle_mod, adapter);
   2695 	adapter->msf_si =
   2696 	    softint_establish(SOFTINT_NET, ixgbe_handle_msf, adapter);
   2697 #ifdef IXGBE_FDIR
   2698 	adapter->fdir_si =
   2699 	    softint_establish(SOFTINT_NET, ixgbe_reinit_fdir, adapter);
   2700 #endif
   2701 
   2702 	return (0);
   2703 #endif
   2704 }
   2705 
   2706 /*
   2707  * Setup Either MSI/X or MSI
   2708  */
   2709 static int
   2710 ixgbe_setup_msix(struct adapter *adapter)
   2711 {
   2712 #if !defined(NETBSD_MSI_OR_MSIX)
   2713 	return 0;
   2714 #else
   2715 	device_t dev = adapter->dev;
   2716 	int rid, want, queues, msgs;
   2717 
   2718 	/* Override by tuneable */
   2719 	if (ixgbe_enable_msix == 0)
   2720 		goto msi;
   2721 
   2722 	/* First try MSI/X */
   2723 	msgs = pci_msix_count(dev);
   2724 	if (msgs == 0)
   2725 		goto msi;
   2726 	rid = PCI_BAR(MSIX_82598_BAR);
   2727 	adapter->msix_mem = bus_alloc_resource_any(dev,
   2728 	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
   2729        	if (adapter->msix_mem == NULL) {
   2730 		rid += 4;	/* 82599 maps in higher BAR */
   2731 		adapter->msix_mem = bus_alloc_resource_any(dev,
   2732 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
   2733 	}
   2734        	if (adapter->msix_mem == NULL) {
   2735 		/* May not be enabled */
   2736 		device_printf(adapter->dev,
   2737 		    "Unable to map MSIX table \n");
   2738 		goto msi;
   2739 	}
   2740 
   2741 	/* Figure out a reasonable auto config value */
   2742 	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
   2743 
   2744 	/* Override based on tuneable */
   2745 	if (ixgbe_num_queues != 0)
   2746 		queues = ixgbe_num_queues;
   2747 
   2748 #ifdef	RSS
   2749 	/* If we're doing RSS, clamp at the number of RSS buckets */
   2750 	if (queues > rss_getnumbuckets())
   2751 		queues = rss_getnumbuckets();
   2752 #endif
   2753 
   2754 	/* reflect correct sysctl value */
   2755 	ixgbe_num_queues = queues;
   2756 
   2757 	/*
   2758 	** Want one vector (RX/TX pair) per queue
   2759 	** plus an additional for Link.
   2760 	*/
   2761 	want = queues + 1;
   2762 	if (msgs >= want)
   2763 		msgs = want;
   2764 	else {
   2765                	device_printf(adapter->dev,
   2766 		    "MSIX Configuration Problem, "
   2767 		    "%d vectors but %d queues wanted!\n",
   2768 		    msgs, want);
   2769 		goto msi;
   2770 	}
   2771 	if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) {
   2772                	device_printf(adapter->dev,
   2773 		    "Using MSIX interrupts with %d vectors\n", msgs);
   2774 		adapter->num_queues = queues;
   2775 		return (msgs);
   2776 	}
   2777 	/*
   2778 	** If MSIX alloc failed or provided us with
   2779 	** less than needed, free and fall through to MSI
   2780 	*/
   2781 	pci_release_msi(dev);
   2782 
   2783 msi:
   2784        	msgs = pci_msi_count(dev);
   2785        	if (adapter->msix_mem != NULL) {
   2786 		bus_release_resource(dev, SYS_RES_MEMORY,
   2787 		    rid, adapter->msix_mem);
   2788 		adapter->msix_mem = NULL;
   2789 	}
   2790        	msgs = 1;
   2791        	if (pci_alloc_msi(dev, &msgs) == 0) {
   2792                	device_printf(adapter->dev,"Using an MSI interrupt\n");
   2793 		return (msgs);
   2794 	}
   2795 	device_printf(adapter->dev,"Using a Legacy interrupt\n");
   2796 	return (0);
   2797 #endif
   2798 }
   2799 
   2800 
   2801 static int
   2802 ixgbe_allocate_pci_resources(struct adapter *adapter, const struct pci_attach_args *pa)
   2803 {
   2804 	pcireg_t	memtype;
   2805 	device_t        dev = adapter->dev;
   2806 	bus_addr_t addr;
   2807 	int flags;
   2808 
   2809 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, PCI_BAR(0));
   2810 	switch (memtype) {
   2811 	case PCI_MAPREG_TYPE_MEM | PCI_MAPREG_MEM_TYPE_32BIT:
   2812 	case PCI_MAPREG_TYPE_MEM | PCI_MAPREG_MEM_TYPE_64BIT:
   2813 		adapter->osdep.mem_bus_space_tag = pa->pa_memt;
   2814 		if (pci_mapreg_info(pa->pa_pc, pa->pa_tag, PCI_BAR(0),
   2815 	              memtype, &addr, &adapter->osdep.mem_size, &flags) != 0)
   2816 			goto map_err;
   2817 		if ((flags & BUS_SPACE_MAP_PREFETCHABLE) != 0) {
   2818 			aprint_normal_dev(dev, "clearing prefetchable bit\n");
   2819 			flags &= ~BUS_SPACE_MAP_PREFETCHABLE;
   2820 		}
   2821 		if (bus_space_map(adapter->osdep.mem_bus_space_tag, addr,
   2822 		     adapter->osdep.mem_size, flags,
   2823 		     &adapter->osdep.mem_bus_space_handle) != 0) {
   2824 map_err:
   2825 			adapter->osdep.mem_size = 0;
   2826 			aprint_error_dev(dev, "unable to map BAR0\n");
   2827 			return ENXIO;
   2828 		}
   2829 		break;
   2830 	default:
   2831 		aprint_error_dev(dev, "unexpected type on BAR0\n");
   2832 		return ENXIO;
   2833 	}
   2834 
   2835 	/* Legacy defaults */
   2836 	adapter->num_queues = 1;
   2837 	adapter->hw.back = &adapter->osdep;
   2838 
   2839 	/*
   2840 	** Now setup MSI or MSI/X, should
   2841 	** return us the number of supported
   2842 	** vectors. (Will be 1 for MSI)
   2843 	*/
   2844 	adapter->msix = ixgbe_setup_msix(adapter);
   2845 	return (0);
   2846 }
   2847 
   2848 static void
   2849 ixgbe_free_pci_resources(struct adapter * adapter)
   2850 {
   2851 #if defined(NETBSD_MSI_OR_MSIX)
   2852 	struct 		ix_queue *que = adapter->queues;
   2853 	device_t	dev = adapter->dev;
   2854 #endif
   2855 	int		rid;
   2856 
   2857 #if defined(NETBSD_MSI_OR_MSIX)
   2858 	int		 memrid;
   2859 	if (adapter->hw.mac.type == ixgbe_mac_82598EB)
   2860 		memrid = PCI_BAR(MSIX_82598_BAR);
   2861 	else
   2862 		memrid = PCI_BAR(MSIX_82599_BAR);
   2863 
   2864 	/*
   2865 	** There is a slight possibility of a failure mode
   2866 	** in attach that will result in entering this function
   2867 	** before interrupt resources have been initialized, and
   2868 	** in that case we do not want to execute the loops below
   2869 	** We can detect this reliably by the state of the adapter
   2870 	** res pointer.
   2871 	*/
   2872 	if (adapter->res == NULL)
   2873 		goto mem;
   2874 
   2875 	/*
   2876 	**  Release all msix queue resources:
   2877 	*/
   2878 	for (int i = 0; i < adapter->num_queues; i++, que++) {
   2879 		rid = que->msix + 1;
   2880 		if (que->tag != NULL) {
   2881 			bus_teardown_intr(dev, que->res, que->tag);
   2882 			que->tag = NULL;
   2883 		}
   2884 		if (que->res != NULL)
   2885 			bus_release_resource(dev, SYS_RES_IRQ, rid, que->res);
   2886 	}
   2887 #endif
   2888 
   2889 	/* Clean the Legacy or Link interrupt last */
   2890 	if (adapter->linkvec) /* we are doing MSIX */
   2891 		rid = adapter->linkvec + 1;
   2892 	else
   2893 		(adapter->msix != 0) ? (rid = 1):(rid = 0);
   2894 
   2895 	if (adapter->osdep.intr != NULL)
   2896 		pci_intr_disestablish(adapter->osdep.pc, adapter->osdep.intr);
   2897 	adapter->osdep.intr = NULL;
   2898 
   2899 #if defined(NETBSD_MSI_OR_MSIX)
   2900 mem:
   2901 	if (adapter->msix)
   2902 		pci_release_msi(dev);
   2903 
   2904 	if (adapter->msix_mem != NULL)
   2905 		bus_release_resource(dev, SYS_RES_MEMORY,
   2906 		    memrid, adapter->msix_mem);
   2907 #endif
   2908 
   2909 	if (adapter->osdep.mem_size != 0) {
   2910 		bus_space_unmap(adapter->osdep.mem_bus_space_tag,
   2911 		    adapter->osdep.mem_bus_space_handle,
   2912 		    adapter->osdep.mem_size);
   2913 	}
   2914 
   2915 	return;
   2916 }
   2917 
   2918 /*********************************************************************
   2919  *
   2920  *  Setup networking device structure and register an interface.
   2921  *
   2922  **********************************************************************/
   2923 static int
   2924 ixgbe_setup_interface(device_t dev, struct adapter *adapter)
   2925 {
   2926 	struct ethercom *ec = &adapter->osdep.ec;
   2927 	struct ixgbe_hw *hw = &adapter->hw;
   2928 	struct ifnet   *ifp;
   2929 
   2930 	INIT_DEBUGOUT("ixgbe_setup_interface: begin");
   2931 
   2932 	ifp = adapter->ifp = &ec->ec_if;
   2933 	strlcpy(ifp->if_xname, device_xname(dev), IFNAMSIZ);
   2934 	ifp->if_baudrate = IF_Gbps(10);
   2935 	ifp->if_init = ixgbe_init;
   2936 	ifp->if_stop = ixgbe_ifstop;
   2937 	ifp->if_softc = adapter;
   2938 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
   2939 	ifp->if_ioctl = ixgbe_ioctl;
   2940 #ifndef IXGBE_LEGACY_TX
   2941 	ifp->if_transmit = ixgbe_mq_start;
   2942 	ifp->if_qflush = ixgbe_qflush;
   2943 #else
   2944 	ifp->if_start = ixgbe_start;
   2945 	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 2);
   2946 #if 0
   2947 	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 2;
   2948 #endif
   2949 	IFQ_SET_READY(&ifp->if_snd);
   2950 #endif
   2951 
   2952 	if_attach(ifp);
   2953 	ether_ifattach(ifp, adapter->hw.mac.addr);
   2954 	ether_set_ifflags_cb(ec, ixgbe_ifflags_cb);
   2955 
   2956 	adapter->max_frame_size =
   2957 	    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
   2958 
   2959 	/*
   2960 	 * Tell the upper layer(s) we support long frames.
   2961 	 */
   2962 	ifp->if_hdrlen = sizeof(struct ether_vlan_header);
   2963 
   2964 	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSOv4 | IFCAP_TSOv6;
   2965 	ifp->if_capenable = 0;
   2966 
   2967 	ec->ec_capabilities |= ETHERCAP_VLAN_HWCSUM;
   2968 	ec->ec_capabilities |= ETHERCAP_JUMBO_MTU;
   2969 	ifp->if_capabilities |= IFCAP_LRO;
   2970 	ec->ec_capabilities |= ETHERCAP_VLAN_HWTAGGING
   2971 	    		    | ETHERCAP_VLAN_MTU;
   2972 	ec->ec_capenable = ec->ec_capabilities;
   2973 
   2974 	/*
   2975 	** Don't turn this on by default, if vlans are
   2976 	** created on another pseudo device (eg. lagg)
   2977 	** then vlan events are not passed thru, breaking
   2978 	** operation, but with HW FILTER off it works. If
   2979 	** using vlans directly on the ixgbe driver you can
   2980 	** enable this and get full hardware tag filtering.
   2981 	*/
   2982 	ec->ec_capabilities |= ETHERCAP_VLAN_HWFILTER;
   2983 
   2984 	/*
   2985 	 * Specify the media types supported by this adapter and register
   2986 	 * callbacks to update media and link information
   2987 	 */
   2988 	ifmedia_init(&adapter->media, IFM_IMASK, ixgbe_media_change,
   2989 		     ixgbe_media_status);
   2990 	ifmedia_add(&adapter->media, IFM_ETHER | adapter->optics, 0, NULL);
   2991 	ifmedia_set(&adapter->media, IFM_ETHER | adapter->optics);
   2992 	if (hw->device_id == IXGBE_DEV_ID_82598AT) {
   2993 		ifmedia_add(&adapter->media,
   2994 		    IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
   2995 		ifmedia_add(&adapter->media,
   2996 		    IFM_ETHER | IFM_1000_T, 0, NULL);
   2997 	}
   2998 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
   2999 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
   3000 
   3001 	return (0);
   3002 }
   3003 
   3004 static void
   3005 ixgbe_config_link(struct adapter *adapter)
   3006 {
   3007 	struct ixgbe_hw *hw = &adapter->hw;
   3008 	u32	autoneg, err = 0;
   3009 	bool	sfp, negotiate;
   3010 
   3011 	sfp = ixgbe_is_sfp(hw);
   3012 
   3013 	if (sfp) {
   3014 		void *ip;
   3015 
   3016 		if (hw->phy.multispeed_fiber) {
   3017 			hw->mac.ops.setup_sfp(hw);
   3018 			ixgbe_enable_tx_laser(hw);
   3019 			ip = adapter->msf_si;
   3020 		} else {
   3021 			ip = adapter->mod_si;
   3022 		}
   3023 
   3024 		kpreempt_disable();
   3025 		softint_schedule(ip);
   3026 		kpreempt_enable();
   3027 	} else {
   3028 		if (hw->mac.ops.check_link)
   3029 			err = ixgbe_check_link(hw, &adapter->link_speed,
   3030 			    &adapter->link_up, FALSE);
   3031 		if (err)
   3032 			goto out;
   3033 		autoneg = hw->phy.autoneg_advertised;
   3034 		if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
   3035                 	err  = hw->mac.ops.get_link_capabilities(hw,
   3036 			    &autoneg, &negotiate);
   3037 		else
   3038 			negotiate = 0;
   3039 		if (err)
   3040 			goto out;
   3041 		if (hw->mac.ops.setup_link)
   3042                 	err = hw->mac.ops.setup_link(hw,
   3043 			    autoneg, adapter->link_up);
   3044 	}
   3045 out:
   3046 	return;
   3047 }
   3048 
   3049 /********************************************************************
   3050  * Manage DMA'able memory.
   3051  *******************************************************************/
   3052 
   3053 static int
   3054 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
   3055 		struct ixgbe_dma_alloc *dma, const int mapflags)
   3056 {
   3057 	device_t dev = adapter->dev;
   3058 	int             r, rsegs;
   3059 
   3060 	r = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
   3061 			       DBA_ALIGN, 0,	/* alignment, bounds */
   3062 			       size,	/* maxsize */
   3063 			       1,	/* nsegments */
   3064 			       size,	/* maxsegsize */
   3065 			       BUS_DMA_ALLOCNOW,	/* flags */
   3066 			       &dma->dma_tag);
   3067 	if (r != 0) {
   3068 		aprint_error_dev(dev,
   3069 		    "%s: ixgbe_dma_tag_create failed; error %d\n", __func__, r);
   3070 		goto fail_0;
   3071 	}
   3072 
   3073 	r = bus_dmamem_alloc(dma->dma_tag->dt_dmat,
   3074 		size,
   3075 		dma->dma_tag->dt_alignment,
   3076 		dma->dma_tag->dt_boundary,
   3077 		&dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
   3078 	if (r != 0) {
   3079 		aprint_error_dev(dev,
   3080 		    "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
   3081 		goto fail_1;
   3082 	}
   3083 
   3084 	r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
   3085 	    size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
   3086 	if (r != 0) {
   3087 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   3088 		    __func__, r);
   3089 		goto fail_2;
   3090 	}
   3091 
   3092 	r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
   3093 	if (r != 0) {
   3094 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   3095 		    __func__, r);
   3096 		goto fail_3;
   3097 	}
   3098 
   3099 	r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map, dma->dma_vaddr,
   3100 			    size,
   3101 			    NULL,
   3102 			    mapflags | BUS_DMA_NOWAIT);
   3103 	if (r != 0) {
   3104 		aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
   3105 		    __func__, r);
   3106 		goto fail_4;
   3107 	}
   3108 	dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
   3109 	dma->dma_size = size;
   3110 	return 0;
   3111 fail_4:
   3112 	ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
   3113 fail_3:
   3114 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
   3115 fail_2:
   3116 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
   3117 fail_1:
   3118 	ixgbe_dma_tag_destroy(dma->dma_tag);
   3119 fail_0:
   3120 	return r;
   3121 }
   3122 
   3123 static void
   3124 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
   3125 {
   3126 	bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
   3127 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   3128 	ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
   3129 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
   3130 	ixgbe_dma_tag_destroy(dma->dma_tag);
   3131 }
   3132 
   3133 
   3134 /*********************************************************************
   3135  *
   3136  *  Allocate memory for the transmit and receive rings, and then
   3137  *  the descriptors associated with each, called only once at attach.
   3138  *
   3139  **********************************************************************/
   3140 static int
   3141 ixgbe_allocate_queues(struct adapter *adapter)
   3142 {
   3143 	device_t	dev = adapter->dev;
   3144 	struct ix_queue	*que;
   3145 	struct tx_ring	*txr;
   3146 	struct rx_ring	*rxr;
   3147 	int rsize, tsize, error = IXGBE_SUCCESS;
   3148 	int txconf = 0, rxconf = 0;
   3149 
   3150         /* First allocate the top level queue structs */
   3151         if (!(adapter->queues =
   3152             (struct ix_queue *) malloc(sizeof(struct ix_queue) *
   3153             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   3154                 aprint_error_dev(dev, "Unable to allocate queue memory\n");
   3155                 error = ENOMEM;
   3156                 goto fail;
   3157         }
   3158 
   3159 	/* First allocate the TX ring struct memory */
   3160 	if (!(adapter->tx_rings =
   3161 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
   3162 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   3163 		aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
   3164 		error = ENOMEM;
   3165 		goto tx_fail;
   3166 	}
   3167 
   3168 	/* Next allocate the RX */
   3169 	if (!(adapter->rx_rings =
   3170 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
   3171 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   3172 		aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
   3173 		error = ENOMEM;
   3174 		goto rx_fail;
   3175 	}
   3176 
   3177 	/* For the ring itself */
   3178 	tsize = roundup2(adapter->num_tx_desc *
   3179 	    sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
   3180 
   3181 	/*
   3182 	 * Now set up the TX queues, txconf is needed to handle the
   3183 	 * possibility that things fail midcourse and we need to
   3184 	 * undo memory gracefully
   3185 	 */
   3186 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
   3187 		/* Set up some basics */
   3188 		txr = &adapter->tx_rings[i];
   3189 		txr->adapter = adapter;
   3190 		txr->me = i;
   3191 		txr->num_desc = adapter->num_tx_desc;
   3192 
   3193 		/* Initialize the TX side lock */
   3194 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
   3195 		    device_xname(dev), txr->me);
   3196 		mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
   3197 
   3198 		if (ixgbe_dma_malloc(adapter, tsize,
   3199 			&txr->txdma, BUS_DMA_NOWAIT)) {
   3200 			aprint_error_dev(dev,
   3201 			    "Unable to allocate TX Descriptor memory\n");
   3202 			error = ENOMEM;
   3203 			goto err_tx_desc;
   3204 		}
   3205 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
   3206 		bzero((void *)txr->tx_base, tsize);
   3207 
   3208         	/* Now allocate transmit buffers for the ring */
   3209         	if (ixgbe_allocate_transmit_buffers(txr)) {
   3210 			aprint_error_dev(dev,
   3211 			    "Critical Failure setting up transmit buffers\n");
   3212 			error = ENOMEM;
   3213 			goto err_tx_desc;
   3214         	}
   3215 #ifndef IXGBE_LEGACY_TX
   3216 		/* Allocate a buf ring */
   3217 		txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
   3218 		    M_WAITOK, &txr->tx_mtx);
   3219 		if (txr->br == NULL) {
   3220 			aprint_error_dev(dev,
   3221 			    "Critical Failure setting up buf ring\n");
   3222 			error = ENOMEM;
   3223 			goto err_tx_desc;
   3224         	}
   3225 #endif
   3226 	}
   3227 
   3228 	/*
   3229 	 * Next the RX queues...
   3230 	 */
   3231 	rsize = roundup2(adapter->num_rx_desc *
   3232 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   3233 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
   3234 		rxr = &adapter->rx_rings[i];
   3235 		/* Set up some basics */
   3236 		rxr->adapter = adapter;
   3237 		rxr->me = i;
   3238 		rxr->num_desc = adapter->num_rx_desc;
   3239 
   3240 		/* Initialize the RX side lock */
   3241 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
   3242 		    device_xname(dev), rxr->me);
   3243 		mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
   3244 
   3245 		if (ixgbe_dma_malloc(adapter, rsize,
   3246 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
   3247 			aprint_error_dev(dev,
   3248 			    "Unable to allocate RxDescriptor memory\n");
   3249 			error = ENOMEM;
   3250 			goto err_rx_desc;
   3251 		}
   3252 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
   3253 		bzero((void *)rxr->rx_base, rsize);
   3254 
   3255         	/* Allocate receive buffers for the ring*/
   3256 		if (ixgbe_allocate_receive_buffers(rxr)) {
   3257 			aprint_error_dev(dev,
   3258 			    "Critical Failure setting up receive buffers\n");
   3259 			error = ENOMEM;
   3260 			goto err_rx_desc;
   3261 		}
   3262 	}
   3263 
   3264 	/*
   3265 	** Finally set up the queue holding structs
   3266 	*/
   3267 	for (int i = 0; i < adapter->num_queues; i++) {
   3268 		que = &adapter->queues[i];
   3269 		que->adapter = adapter;
   3270 		que->txr = &adapter->tx_rings[i];
   3271 		que->rxr = &adapter->rx_rings[i];
   3272 	}
   3273 
   3274 	return (0);
   3275 
   3276 err_rx_desc:
   3277 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
   3278 		ixgbe_dma_free(adapter, &rxr->rxdma);
   3279 err_tx_desc:
   3280 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
   3281 		ixgbe_dma_free(adapter, &txr->txdma);
   3282 	free(adapter->rx_rings, M_DEVBUF);
   3283 rx_fail:
   3284 	free(adapter->tx_rings, M_DEVBUF);
   3285 tx_fail:
   3286 	free(adapter->queues, M_DEVBUF);
   3287 fail:
   3288 	return (error);
   3289 }
   3290 
   3291 /*********************************************************************
   3292  *
   3293  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
   3294  *  the information needed to transmit a packet on the wire. This is
   3295  *  called only once at attach, setup is done every reset.
   3296  *
   3297  **********************************************************************/
   3298 static int
   3299 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
   3300 {
   3301 	struct adapter *adapter = txr->adapter;
   3302 	device_t dev = adapter->dev;
   3303 	struct ixgbe_tx_buf *txbuf;
   3304 	int error, i;
   3305 
   3306 	/*
   3307 	 * Setup DMA descriptor areas.
   3308 	 */
   3309 	if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
   3310 			       1, 0,		/* alignment, bounds */
   3311 			       IXGBE_TSO_SIZE,		/* maxsize */
   3312 			       adapter->num_segs,	/* nsegments */
   3313 			       PAGE_SIZE,		/* maxsegsize */
   3314 			       0,			/* flags */
   3315 			       &txr->txtag))) {
   3316 		aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
   3317 		goto fail;
   3318 	}
   3319 
   3320 	if (!(txr->tx_buffers =
   3321 	    (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
   3322 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   3323 		aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
   3324 		error = ENOMEM;
   3325 		goto fail;
   3326 	}
   3327 
   3328         /* Create the descriptor buffer dma maps */
   3329 	txbuf = txr->tx_buffers;
   3330 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
   3331 		error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
   3332 		if (error != 0) {
   3333 			aprint_error_dev(dev,
   3334 			    "Unable to create TX DMA map (%d)\n", error);
   3335 			goto fail;
   3336 		}
   3337 	}
   3338 
   3339 	return 0;
   3340 fail:
   3341 	/* We free all, it handles case where we are in the middle */
   3342 	ixgbe_free_transmit_structures(adapter);
   3343 	return (error);
   3344 }
   3345 
   3346 /*********************************************************************
   3347  *
   3348  *  Initialize a transmit ring.
   3349  *
   3350  **********************************************************************/
   3351 static void
   3352 ixgbe_setup_transmit_ring(struct tx_ring *txr)
   3353 {
   3354 	struct adapter *adapter = txr->adapter;
   3355 	struct ixgbe_tx_buf *txbuf;
   3356 	int i;
   3357 #ifdef DEV_NETMAP
   3358 	struct netmap_adapter *na = NA(adapter->ifp);
   3359 	struct netmap_slot *slot;
   3360 #endif /* DEV_NETMAP */
   3361 
   3362 	/* Clear the old ring contents */
   3363 	IXGBE_TX_LOCK(txr);
   3364 #ifdef DEV_NETMAP
   3365 	/*
   3366 	 * (under lock): if in netmap mode, do some consistency
   3367 	 * checks and set slot to entry 0 of the netmap ring.
   3368 	 */
   3369 	slot = netmap_reset(na, NR_TX, txr->me, 0);
   3370 #endif /* DEV_NETMAP */
   3371 	bzero((void *)txr->tx_base,
   3372 	      (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
   3373 	/* Reset indices */
   3374 	txr->next_avail_desc = 0;
   3375 	txr->next_to_clean = 0;
   3376 
   3377 	/* Free any existing tx buffers. */
   3378         txbuf = txr->tx_buffers;
   3379 	for (i = 0; i < txr->num_desc; i++, txbuf++) {
   3380 		if (txbuf->m_head != NULL) {
   3381 			bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
   3382 			    0, txbuf->m_head->m_pkthdr.len,
   3383 			    BUS_DMASYNC_POSTWRITE);
   3384 			ixgbe_dmamap_unload(txr->txtag, txbuf->map);
   3385 			m_freem(txbuf->m_head);
   3386 			txbuf->m_head = NULL;
   3387 		}
   3388 #ifdef DEV_NETMAP
   3389 		/*
   3390 		 * In netmap mode, set the map for the packet buffer.
   3391 		 * NOTE: Some drivers (not this one) also need to set
   3392 		 * the physical buffer address in the NIC ring.
   3393 		 * Slots in the netmap ring (indexed by "si") are
   3394 		 * kring->nkr_hwofs positions "ahead" wrt the
   3395 		 * corresponding slot in the NIC ring. In some drivers
   3396 		 * (not here) nkr_hwofs can be negative. Function
   3397 		 * netmap_idx_n2k() handles wraparounds properly.
   3398 		 */
   3399 		if (slot) {
   3400 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
   3401 			netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
   3402 		}
   3403 #endif /* DEV_NETMAP */
   3404 		/* Clear the EOP descriptor pointer */
   3405 		txbuf->eop = NULL;
   3406         }
   3407 
   3408 #ifdef IXGBE_FDIR
   3409 	/* Set the rate at which we sample packets */
   3410 	if (adapter->hw.mac.type != ixgbe_mac_82598EB)
   3411 		txr->atr_sample = atr_sample_rate;
   3412 #endif
   3413 
   3414 	/* Set number of descriptors available */
   3415 	txr->tx_avail = adapter->num_tx_desc;
   3416 
   3417 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   3418 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   3419 	IXGBE_TX_UNLOCK(txr);
   3420 }
   3421 
   3422 /*********************************************************************
   3423  *
   3424  *  Initialize all transmit rings.
   3425  *
   3426  **********************************************************************/
   3427 static int
   3428 ixgbe_setup_transmit_structures(struct adapter *adapter)
   3429 {
   3430 	struct tx_ring *txr = adapter->tx_rings;
   3431 
   3432 	for (int i = 0; i < adapter->num_queues; i++, txr++)
   3433 		ixgbe_setup_transmit_ring(txr);
   3434 
   3435 	return (0);
   3436 }
   3437 
   3438 /*********************************************************************
   3439  *
   3440  *  Enable transmit unit.
   3441  *
   3442  **********************************************************************/
   3443 static void
   3444 ixgbe_initialize_transmit_units(struct adapter *adapter)
   3445 {
   3446 	struct tx_ring	*txr = adapter->tx_rings;
   3447 	struct ixgbe_hw	*hw = &adapter->hw;
   3448 
   3449 	/* Setup the Base and Length of the Tx Descriptor Ring */
   3450 
   3451 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
   3452 		u64	tdba = txr->txdma.dma_paddr;
   3453 		u32	txctrl;
   3454 
   3455 		IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i),
   3456 		       (tdba & 0x00000000ffffffffULL));
   3457 		IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (tdba >> 32));
   3458 		IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
   3459 		    adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc));
   3460 
   3461 		/* Setup the HW Tx Head and Tail descriptor pointers */
   3462 		IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
   3463 		IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
   3464 
   3465 		/* Setup Transmit Descriptor Cmd Settings */
   3466 		txr->txd_cmd = IXGBE_TXD_CMD_IFCS;
   3467 		txr->queue_status = IXGBE_QUEUE_IDLE;
   3468 
   3469 		/* Set the processing limit */
   3470 		txr->process_limit = ixgbe_tx_process_limit;
   3471 
   3472 		/* Disable Head Writeback */
   3473 		switch (hw->mac.type) {
   3474 		case ixgbe_mac_82598EB:
   3475 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
   3476 			break;
   3477 		case ixgbe_mac_82599EB:
   3478 		case ixgbe_mac_X540:
   3479 		default:
   3480 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
   3481 			break;
   3482                 }
   3483 		txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
   3484 		switch (hw->mac.type) {
   3485 		case ixgbe_mac_82598EB:
   3486 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
   3487 			break;
   3488 		case ixgbe_mac_82599EB:
   3489 		case ixgbe_mac_X540:
   3490 		default:
   3491 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
   3492 			break;
   3493 		}
   3494 
   3495 	}
   3496 
   3497 	if (hw->mac.type != ixgbe_mac_82598EB) {
   3498 		u32 dmatxctl, rttdcs;
   3499 		dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
   3500 		dmatxctl |= IXGBE_DMATXCTL_TE;
   3501 		IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
   3502 		/* Disable arbiter to set MTQC */
   3503 		rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
   3504 		rttdcs |= IXGBE_RTTDCS_ARBDIS;
   3505 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
   3506 		IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
   3507 		rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
   3508 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
   3509 	}
   3510 
   3511 	return;
   3512 }
   3513 
   3514 /*********************************************************************
   3515  *
   3516  *  Free all transmit rings.
   3517  *
   3518  **********************************************************************/
   3519 static void
   3520 ixgbe_free_transmit_structures(struct adapter *adapter)
   3521 {
   3522 	struct tx_ring *txr = adapter->tx_rings;
   3523 
   3524 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
   3525 		ixgbe_free_transmit_buffers(txr);
   3526 		ixgbe_dma_free(adapter, &txr->txdma);
   3527 		IXGBE_TX_LOCK_DESTROY(txr);
   3528 	}
   3529 	free(adapter->tx_rings, M_DEVBUF);
   3530 }
   3531 
   3532 /*********************************************************************
   3533  *
   3534  *  Free transmit ring related data structures.
   3535  *
   3536  **********************************************************************/
   3537 static void
   3538 ixgbe_free_transmit_buffers(struct tx_ring *txr)
   3539 {
   3540 	struct adapter *adapter = txr->adapter;
   3541 	struct ixgbe_tx_buf *tx_buffer;
   3542 	int             i;
   3543 
   3544 	INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
   3545 
   3546 	if (txr->tx_buffers == NULL)
   3547 		return;
   3548 
   3549 	tx_buffer = txr->tx_buffers;
   3550 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
   3551 		if (tx_buffer->m_head != NULL) {
   3552 			bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
   3553 			    0, tx_buffer->m_head->m_pkthdr.len,
   3554 			    BUS_DMASYNC_POSTWRITE);
   3555 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
   3556 			m_freem(tx_buffer->m_head);
   3557 			tx_buffer->m_head = NULL;
   3558 			if (tx_buffer->map != NULL) {
   3559 				ixgbe_dmamap_destroy(txr->txtag,
   3560 				    tx_buffer->map);
   3561 				tx_buffer->map = NULL;
   3562 			}
   3563 		} else if (tx_buffer->map != NULL) {
   3564 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
   3565 			ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
   3566 			tx_buffer->map = NULL;
   3567 		}
   3568 	}
   3569 #ifndef IXGBE_LEGACY_TX
   3570 	if (txr->br != NULL)
   3571 		buf_ring_free(txr->br, M_DEVBUF);
   3572 #endif
   3573 	if (txr->tx_buffers != NULL) {
   3574 		free(txr->tx_buffers, M_DEVBUF);
   3575 		txr->tx_buffers = NULL;
   3576 	}
   3577 	if (txr->txtag != NULL) {
   3578 		ixgbe_dma_tag_destroy(txr->txtag);
   3579 		txr->txtag = NULL;
   3580 	}
   3581 	return;
   3582 }
   3583 
   3584 /*********************************************************************
   3585  *
   3586  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
   3587  *
   3588  **********************************************************************/
   3589 
   3590 static int
   3591 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
   3592     u32 *cmd_type_len, u32 *olinfo_status)
   3593 {
   3594 	struct m_tag *mtag;
   3595 	struct adapter *adapter = txr->adapter;
   3596 	struct ethercom *ec = &adapter->osdep.ec;
   3597 	struct ixgbe_adv_tx_context_desc *TXD;
   3598 	struct ether_vlan_header *eh;
   3599 	struct ip ip;
   3600 	struct ip6_hdr ip6;
   3601 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
   3602 	int	ehdrlen, ip_hlen = 0;
   3603 	u16	etype;
   3604 	u8	ipproto __diagused = 0;
   3605 	int	offload = TRUE;
   3606 	int	ctxd = txr->next_avail_desc;
   3607 	u16	vtag = 0;
   3608 
   3609 	/* First check if TSO is to be used */
   3610 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4|M_CSUM_TSOv6))
   3611 		return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
   3612 
   3613 	if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
   3614 		offload = FALSE;
   3615 
   3616 	/* Indicate the whole packet as payload when not doing TSO */
   3617        	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
   3618 
   3619 	/* Now ready a context descriptor */
   3620 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
   3621 
   3622 	/*
   3623 	** In advanced descriptors the vlan tag must
   3624 	** be placed into the context descriptor. Hence
   3625 	** we need to make one even if not doing offloads.
   3626 	*/
   3627 	if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
   3628 		vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
   3629 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
   3630 	} else if (offload == FALSE) /* ... no offload to do */
   3631 		return 0;
   3632 
   3633 	/*
   3634 	 * Determine where frame payload starts.
   3635 	 * Jump over vlan headers if already present,
   3636 	 * helpful for QinQ too.
   3637 	 */
   3638 	KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
   3639 	eh = mtod(mp, struct ether_vlan_header *);
   3640 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   3641 		KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
   3642 		etype = ntohs(eh->evl_proto);
   3643 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   3644 	} else {
   3645 		etype = ntohs(eh->evl_encap_proto);
   3646 		ehdrlen = ETHER_HDR_LEN;
   3647 	}
   3648 
   3649 	/* Set the ether header length */
   3650 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
   3651 
   3652 	switch (etype) {
   3653 	case ETHERTYPE_IP:
   3654 		m_copydata(mp, ehdrlen, sizeof(ip), &ip);
   3655 		ip_hlen = ip.ip_hl << 2;
   3656 		ipproto = ip.ip_p;
   3657 #if 0
   3658 		ip.ip_sum = 0;
   3659 		m_copyback(mp, ehdrlen, sizeof(ip), &ip);
   3660 #else
   3661 		KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
   3662 		    ip.ip_sum == 0);
   3663 #endif
   3664 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
   3665 		break;
   3666 	case ETHERTYPE_IPV6:
   3667 		m_copydata(mp, ehdrlen, sizeof(ip6), &ip6);
   3668 		ip_hlen = sizeof(ip6);
   3669 		/* XXX-BZ this will go badly in case of ext hdrs. */
   3670 		ipproto = ip6.ip6_nxt;
   3671 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
   3672 		break;
   3673 	default:
   3674 		break;
   3675 	}
   3676 
   3677 	if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
   3678 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
   3679 
   3680 	vlan_macip_lens |= ip_hlen;
   3681 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
   3682 
   3683 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_TCPv6)) {
   3684 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
   3685 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   3686 		KASSERT(ipproto == IPPROTO_TCP);
   3687 	} else if (mp->m_pkthdr.csum_flags & (M_CSUM_UDPv4|M_CSUM_UDPv6)) {
   3688 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
   3689 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   3690 		KASSERT(ipproto == IPPROTO_UDP);
   3691 	}
   3692 
   3693 	/* Now copy bits into descriptor */
   3694 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
   3695 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
   3696 	TXD->seqnum_seed = htole32(0);
   3697 	TXD->mss_l4len_idx = htole32(0);
   3698 
   3699 	/* We've consumed the first desc, adjust counters */
   3700 	if (++ctxd == txr->num_desc)
   3701 		ctxd = 0;
   3702 	txr->next_avail_desc = ctxd;
   3703 	--txr->tx_avail;
   3704 
   3705         return 0;
   3706 }
   3707 
   3708 /**********************************************************************
   3709  *
   3710  *  Setup work for hardware segmentation offload (TSO) on
   3711  *  adapters using advanced tx descriptors
   3712  *
   3713  **********************************************************************/
   3714 static int
   3715 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
   3716     u32 *cmd_type_len, u32 *olinfo_status)
   3717 {
   3718 	struct m_tag *mtag;
   3719 	struct adapter *adapter = txr->adapter;
   3720 	struct ethercom *ec = &adapter->osdep.ec;
   3721 	struct ixgbe_adv_tx_context_desc *TXD;
   3722 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
   3723 	u32 mss_l4len_idx = 0, paylen;
   3724 	u16 vtag = 0, eh_type;
   3725 	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
   3726 	struct ether_vlan_header *eh;
   3727 #ifdef INET6
   3728 	struct ip6_hdr *ip6;
   3729 #endif
   3730 #ifdef INET
   3731 	struct ip *ip;
   3732 #endif
   3733 	struct tcphdr *th;
   3734 
   3735 
   3736 	/*
   3737 	 * Determine where frame payload starts.
   3738 	 * Jump over vlan headers if already present
   3739 	 */
   3740 	eh = mtod(mp, struct ether_vlan_header *);
   3741 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   3742 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   3743 		eh_type = eh->evl_proto;
   3744 	} else {
   3745 		ehdrlen = ETHER_HDR_LEN;
   3746 		eh_type = eh->evl_encap_proto;
   3747 	}
   3748 
   3749 	switch (ntohs(eh_type)) {
   3750 #ifdef INET6
   3751 	case ETHERTYPE_IPV6:
   3752 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   3753 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
   3754 		if (ip6->ip6_nxt != IPPROTO_TCP)
   3755 			return (ENXIO);
   3756 		ip_hlen = sizeof(struct ip6_hdr);
   3757 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   3758 		th = (struct tcphdr *)((char *)ip6 + ip_hlen);
   3759 		th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
   3760 		    &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
   3761 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
   3762 		break;
   3763 #endif
   3764 #ifdef INET
   3765 	case ETHERTYPE_IP:
   3766 		ip = (struct ip *)(mp->m_data + ehdrlen);
   3767 		if (ip->ip_p != IPPROTO_TCP)
   3768 			return (ENXIO);
   3769 		ip->ip_sum = 0;
   3770 		ip_hlen = ip->ip_hl << 2;
   3771 		th = (struct tcphdr *)((char *)ip + ip_hlen);
   3772 		th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
   3773 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
   3774 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
   3775 		/* Tell transmit desc to also do IPv4 checksum. */
   3776 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
   3777 		break;
   3778 #endif
   3779 	default:
   3780 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
   3781 		    __func__, ntohs(eh_type));
   3782 		break;
   3783 	}
   3784 
   3785 	ctxd = txr->next_avail_desc;
   3786 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
   3787 
   3788 	tcp_hlen = th->th_off << 2;
   3789 
   3790 	/* This is used in the transmit desc in encap */
   3791 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
   3792 
   3793 	/* VLAN MACLEN IPLEN */
   3794 	if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
   3795 		vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
   3796                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
   3797 	}
   3798 
   3799 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
   3800 	vlan_macip_lens |= ip_hlen;
   3801 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
   3802 
   3803 	/* ADV DTYPE TUCMD */
   3804 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
   3805 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
   3806 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
   3807 
   3808 	/* MSS L4LEN IDX */
   3809 	mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
   3810 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
   3811 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
   3812 
   3813 	TXD->seqnum_seed = htole32(0);
   3814 
   3815 	if (++ctxd == txr->num_desc)
   3816 		ctxd = 0;
   3817 
   3818 	txr->tx_avail--;
   3819 	txr->next_avail_desc = ctxd;
   3820 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
   3821 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   3822 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
   3823 	++txr->tso_tx.ev_count;
   3824 	return (0);
   3825 }
   3826 
   3827 #ifdef IXGBE_FDIR
   3828 /*
   3829 ** This routine parses packet headers so that Flow
   3830 ** Director can make a hashed filter table entry
   3831 ** allowing traffic flows to be identified and kept
   3832 ** on the same cpu.  This would be a performance
   3833 ** hit, but we only do it at IXGBE_FDIR_RATE of
   3834 ** packets.
   3835 */
   3836 static void
   3837 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
   3838 {
   3839 	struct adapter			*adapter = txr->adapter;
   3840 	struct ix_queue			*que;
   3841 	struct ip			*ip;
   3842 	struct tcphdr			*th;
   3843 	struct udphdr			*uh;
   3844 	struct ether_vlan_header	*eh;
   3845 	union ixgbe_atr_hash_dword	input = {.dword = 0};
   3846 	union ixgbe_atr_hash_dword	common = {.dword = 0};
   3847 	int  				ehdrlen, ip_hlen;
   3848 	u16				etype;
   3849 
   3850 	eh = mtod(mp, struct ether_vlan_header *);
   3851 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   3852 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   3853 		etype = eh->evl_proto;
   3854 	} else {
   3855 		ehdrlen = ETHER_HDR_LEN;
   3856 		etype = eh->evl_encap_proto;
   3857 	}
   3858 
   3859 	/* Only handling IPv4 */
   3860 	if (etype != htons(ETHERTYPE_IP))
   3861 		return;
   3862 
   3863 	ip = (struct ip *)(mp->m_data + ehdrlen);
   3864 	ip_hlen = ip->ip_hl << 2;
   3865 
   3866 	/* check if we're UDP or TCP */
   3867 	switch (ip->ip_p) {
   3868 	case IPPROTO_TCP:
   3869 		th = (struct tcphdr *)((char *)ip + ip_hlen);
   3870 		/* src and dst are inverted */
   3871 		common.port.dst ^= th->th_sport;
   3872 		common.port.src ^= th->th_dport;
   3873 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
   3874 		break;
   3875 	case IPPROTO_UDP:
   3876 		uh = (struct udphdr *)((char *)ip + ip_hlen);
   3877 		/* src and dst are inverted */
   3878 		common.port.dst ^= uh->uh_sport;
   3879 		common.port.src ^= uh->uh_dport;
   3880 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
   3881 		break;
   3882 	default:
   3883 		return;
   3884 	}
   3885 
   3886 	input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
   3887 	if (mp->m_pkthdr.ether_vtag)
   3888 		common.flex_bytes ^= htons(ETHERTYPE_VLAN);
   3889 	else
   3890 		common.flex_bytes ^= etype;
   3891 	common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
   3892 
   3893 	que = &adapter->queues[txr->me];
   3894 	/*
   3895 	** This assumes the Rx queue and Tx
   3896 	** queue are bound to the same CPU
   3897 	*/
   3898 	ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
   3899 	    input, common, que->msix);
   3900 }
   3901 #endif /* IXGBE_FDIR */
   3902 
   3903 /**********************************************************************
   3904  *
   3905  *  Examine each tx_buffer in the used queue. If the hardware is done
   3906  *  processing the packet then free associated resources. The
   3907  *  tx_buffer is put back on the free queue.
   3908  *
   3909  **********************************************************************/
   3910 static void
   3911 ixgbe_txeof(struct tx_ring *txr)
   3912 {
   3913 	struct adapter		*adapter = txr->adapter;
   3914 	struct ifnet		*ifp = adapter->ifp;
   3915 	u32			work, processed = 0;
   3916 	u16			limit = txr->process_limit;
   3917 	struct ixgbe_tx_buf	*buf;
   3918 	union ixgbe_adv_tx_desc *txd;
   3919 	struct timeval now, elapsed;
   3920 
   3921 	KASSERT(mutex_owned(&txr->tx_mtx));
   3922 
   3923 #ifdef DEV_NETMAP
   3924 	if (ifp->if_capenable & IFCAP_NETMAP) {
   3925 		struct netmap_adapter *na = NA(ifp);
   3926 		struct netmap_kring *kring = &na->tx_rings[txr->me];
   3927 		txd = txr->tx_base;
   3928 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   3929 		    BUS_DMASYNC_POSTREAD);
   3930 		/*
   3931 		 * In netmap mode, all the work is done in the context
   3932 		 * of the client thread. Interrupt handlers only wake up
   3933 		 * clients, which may be sleeping on individual rings
   3934 		 * or on a global resource for all rings.
   3935 		 * To implement tx interrupt mitigation, we wake up the client
   3936 		 * thread roughly every half ring, even if the NIC interrupts
   3937 		 * more frequently. This is implemented as follows:
   3938 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
   3939 		 *   the slot that should wake up the thread (nkr_num_slots
   3940 		 *   means the user thread should not be woken up);
   3941 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
   3942 		 *   or the slot has the DD bit set.
   3943 		 */
   3944 		if (!netmap_mitigate ||
   3945 		    (kring->nr_kflags < kring->nkr_num_slots &&
   3946 		    txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
   3947 			netmap_tx_irq(ifp, txr->me);
   3948 		}
   3949 		return;
   3950 	}
   3951 #endif /* DEV_NETMAP */
   3952 
   3953 	if (txr->tx_avail == txr->num_desc) {
   3954 		txr->queue_status = IXGBE_QUEUE_IDLE;
   3955 		return;
   3956 	}
   3957 
   3958 	/* Get work starting point */
   3959 	work = txr->next_to_clean;
   3960 	buf = &txr->tx_buffers[work];
   3961 	txd = &txr->tx_base[work];
   3962 	work -= txr->num_desc; /* The distance to ring end */
   3963         ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   3964 	    BUS_DMASYNC_POSTREAD);
   3965 	do {
   3966 		union ixgbe_adv_tx_desc *eop= buf->eop;
   3967 		if (eop == NULL) /* No work */
   3968 			break;
   3969 
   3970 		if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
   3971 			break;	/* I/O not complete */
   3972 
   3973 		if (buf->m_head) {
   3974 			txr->bytes +=
   3975 			    buf->m_head->m_pkthdr.len;
   3976 			bus_dmamap_sync(txr->txtag->dt_dmat,
   3977 			    buf->map,
   3978 			    0, buf->m_head->m_pkthdr.len,
   3979 			    BUS_DMASYNC_POSTWRITE);
   3980 			ixgbe_dmamap_unload(txr->txtag,
   3981 			    buf->map);
   3982 			m_freem(buf->m_head);
   3983 			buf->m_head = NULL;
   3984 			/*
   3985 			 * NetBSD: Don't override buf->map with NULL here.
   3986 			 * It'll panic when a ring runs one lap around.
   3987 			 */
   3988 		}
   3989 		buf->eop = NULL;
   3990 		++txr->tx_avail;
   3991 
   3992 		/* We clean the range if multi segment */
   3993 		while (txd != eop) {
   3994 			++txd;
   3995 			++buf;
   3996 			++work;
   3997 			/* wrap the ring? */
   3998 			if (__predict_false(!work)) {
   3999 				work -= txr->num_desc;
   4000 				buf = txr->tx_buffers;
   4001 				txd = txr->tx_base;
   4002 			}
   4003 			if (buf->m_head) {
   4004 				txr->bytes +=
   4005 				    buf->m_head->m_pkthdr.len;
   4006 				bus_dmamap_sync(txr->txtag->dt_dmat,
   4007 				    buf->map,
   4008 				    0, buf->m_head->m_pkthdr.len,
   4009 				    BUS_DMASYNC_POSTWRITE);
   4010 				ixgbe_dmamap_unload(txr->txtag,
   4011 				    buf->map);
   4012 				m_freem(buf->m_head);
   4013 				buf->m_head = NULL;
   4014 				/*
   4015 				 * NetBSD: Don't override buf->map with NULL
   4016 				 * here. It'll panic when a ring runs one lap
   4017 				 * around.
   4018 				 */
   4019 			}
   4020 			++txr->tx_avail;
   4021 			buf->eop = NULL;
   4022 
   4023 		}
   4024 		++txr->packets;
   4025 		++processed;
   4026 		++ifp->if_opackets;
   4027 		getmicrotime(&txr->watchdog_time);
   4028 
   4029 		/* Try the next packet */
   4030 		++txd;
   4031 		++buf;
   4032 		++work;
   4033 		/* reset with a wrap */
   4034 		if (__predict_false(!work)) {
   4035 			work -= txr->num_desc;
   4036 			buf = txr->tx_buffers;
   4037 			txd = txr->tx_base;
   4038 		}
   4039 		prefetch(txd);
   4040 	} while (__predict_true(--limit));
   4041 
   4042 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   4043 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   4044 
   4045 	work += txr->num_desc;
   4046 	txr->next_to_clean = work;
   4047 
   4048 	/*
   4049 	** Watchdog calculation, we know there's
   4050 	** work outstanding or the first return
   4051 	** would have been taken, so none processed
   4052 	** for too long indicates a hang.
   4053 	*/
   4054 	getmicrotime(&now);
   4055 	timersub(&now, &txr->watchdog_time, &elapsed);
   4056 	if (!processed && tvtohz(&elapsed) > IXGBE_WATCHDOG)
   4057 		txr->queue_status = IXGBE_QUEUE_HUNG;
   4058 
   4059 	if (txr->tx_avail == txr->num_desc)
   4060 		txr->queue_status = IXGBE_QUEUE_IDLE;
   4061 
   4062 	return;
   4063 }
   4064 
   4065 /*********************************************************************
   4066  *
   4067  *  Refresh mbuf buffers for RX descriptor rings
   4068  *   - now keeps its own state so discards due to resource
   4069  *     exhaustion are unnecessary, if an mbuf cannot be obtained
   4070  *     it just returns, keeping its placeholder, thus it can simply
   4071  *     be recalled to try again.
   4072  *
   4073  **********************************************************************/
   4074 static void
   4075 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
   4076 {
   4077 	struct adapter		*adapter = rxr->adapter;
   4078 	struct ixgbe_rx_buf	*rxbuf;
   4079 	struct mbuf		*mp;
   4080 	int			i, j, error;
   4081 	bool			refreshed = false;
   4082 
   4083 	i = j = rxr->next_to_refresh;
   4084 	/* Control the loop with one beyond */
   4085 	if (++j == rxr->num_desc)
   4086 		j = 0;
   4087 
   4088 	while (j != limit) {
   4089 		rxbuf = &rxr->rx_buffers[i];
   4090 		if (rxbuf->buf == NULL) {
   4091 			mp = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
   4092 			    MT_DATA, M_PKTHDR, rxr->mbuf_sz);
   4093 			if (mp == NULL) {
   4094 				rxr->no_jmbuf.ev_count++;
   4095 				goto update;
   4096 			}
   4097 			if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
   4098 				m_adj(mp, ETHER_ALIGN);
   4099 		} else
   4100 			mp = rxbuf->buf;
   4101 
   4102 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   4103 		/* If we're dealing with an mbuf that was copied rather
   4104 		 * than replaced, there's no need to go through busdma.
   4105 		 */
   4106 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
   4107 			/* Get the memory mapping */
   4108 			error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   4109 			    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   4110 			if (error != 0) {
   4111 				printf("Refresh mbufs: payload dmamap load"
   4112 				    " failure - %d\n", error);
   4113 				m_free(mp);
   4114 				rxbuf->buf = NULL;
   4115 				goto update;
   4116 			}
   4117 			rxbuf->buf = mp;
   4118 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   4119 			    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
   4120 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
   4121 			    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   4122 		} else {
   4123 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
   4124 			rxbuf->flags &= ~IXGBE_RX_COPY;
   4125 		}
   4126 
   4127 		refreshed = true;
   4128 		/* Next is precalculated */
   4129 		i = j;
   4130 		rxr->next_to_refresh = i;
   4131 		if (++j == rxr->num_desc)
   4132 			j = 0;
   4133 	}
   4134 update:
   4135 	if (refreshed) /* Update hardware tail index */
   4136 		IXGBE_WRITE_REG(&adapter->hw,
   4137 		    IXGBE_RDT(rxr->me), rxr->next_to_refresh);
   4138 	return;
   4139 }
   4140 
   4141 /*********************************************************************
   4142  *
   4143  *  Allocate memory for rx_buffer structures. Since we use one
   4144  *  rx_buffer per received packet, the maximum number of rx_buffer's
   4145  *  that we'll need is equal to the number of receive descriptors
   4146  *  that we've allocated.
   4147  *
   4148  **********************************************************************/
   4149 static int
   4150 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
   4151 {
   4152 	struct	adapter 	*adapter = rxr->adapter;
   4153 	device_t 		dev = adapter->dev;
   4154 	struct ixgbe_rx_buf 	*rxbuf;
   4155 	int             	i, bsize, error;
   4156 
   4157 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
   4158 	if (!(rxr->rx_buffers =
   4159 	    (struct ixgbe_rx_buf *) malloc(bsize,
   4160 	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
   4161 		aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
   4162 		error = ENOMEM;
   4163 		goto fail;
   4164 	}
   4165 
   4166 	if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
   4167 				   1, 0,	/* alignment, bounds */
   4168 				   MJUM16BYTES,		/* maxsize */
   4169 				   1,			/* nsegments */
   4170 				   MJUM16BYTES,		/* maxsegsize */
   4171 				   0,			/* flags */
   4172 				   &rxr->ptag))) {
   4173 		aprint_error_dev(dev, "Unable to create RX DMA tag\n");
   4174 		goto fail;
   4175 	}
   4176 
   4177 	for (i = 0; i < rxr->num_desc; i++, rxbuf++) {
   4178 		rxbuf = &rxr->rx_buffers[i];
   4179 		error = ixgbe_dmamap_create(rxr->ptag,
   4180 		    BUS_DMA_NOWAIT, &rxbuf->pmap);
   4181 		if (error) {
   4182 			aprint_error_dev(dev, "Unable to create RX dma map\n");
   4183 			goto fail;
   4184 		}
   4185 	}
   4186 
   4187 	return (0);
   4188 
   4189 fail:
   4190 	/* Frees all, but can handle partial completion */
   4191 	ixgbe_free_receive_structures(adapter);
   4192 	return (error);
   4193 }
   4194 
   4195 /*
   4196 ** Used to detect a descriptor that has
   4197 ** been merged by Hardware RSC.
   4198 */
   4199 static inline u32
   4200 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
   4201 {
   4202 	return (le32toh(rx->wb.lower.lo_dword.data) &
   4203 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
   4204 }
   4205 
   4206 /*********************************************************************
   4207  *
   4208  *  Initialize Hardware RSC (LRO) feature on 82599
   4209  *  for an RX ring, this is toggled by the LRO capability
   4210  *  even though it is transparent to the stack.
   4211  *
   4212  *  NOTE: since this HW feature only works with IPV4 and
   4213  *        our testing has shown soft LRO to be as effective
   4214  *        I have decided to disable this by default.
   4215  *
   4216  **********************************************************************/
   4217 static void
   4218 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
   4219 {
   4220 	struct	adapter 	*adapter = rxr->adapter;
   4221 	struct	ixgbe_hw	*hw = &adapter->hw;
   4222 	u32			rscctrl, rdrxctl;
   4223 
   4224 	/* If turning LRO/RSC off we need to disable it */
   4225 	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
   4226 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   4227 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
   4228 		return;
   4229 	}
   4230 
   4231 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
   4232 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
   4233 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
   4234 	if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
   4235 #endif /* DEV_NETMAP */
   4236 	rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
   4237 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
   4238 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
   4239 
   4240 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   4241 	rscctrl |= IXGBE_RSCCTL_RSCEN;
   4242 	/*
   4243 	** Limit the total number of descriptors that
   4244 	** can be combined, so it does not exceed 64K
   4245 	*/
   4246 	if (rxr->mbuf_sz == MCLBYTES)
   4247 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
   4248 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
   4249 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
   4250 	else if (rxr->mbuf_sz == MJUM9BYTES)
   4251 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
   4252 	else  /* Using 16K cluster */
   4253 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
   4254 
   4255 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
   4256 
   4257 	/* Enable TCP header recognition */
   4258 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
   4259 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
   4260 	    IXGBE_PSRTYPE_TCPHDR));
   4261 
   4262 	/* Disable RSC for ACK packets */
   4263 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
   4264 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
   4265 
   4266 	rxr->hw_rsc = TRUE;
   4267 }
   4268 
   4269 
   4270 static void
   4271 ixgbe_free_receive_ring(struct rx_ring *rxr)
   4272 {
   4273 	struct ixgbe_rx_buf       *rxbuf;
   4274 	int i;
   4275 
   4276 	for (i = 0; i < rxr->num_desc; i++) {
   4277 		rxbuf = &rxr->rx_buffers[i];
   4278 		if (rxbuf->buf != NULL) {
   4279 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   4280 			    0, rxbuf->buf->m_pkthdr.len,
   4281 			    BUS_DMASYNC_POSTREAD);
   4282 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   4283 			rxbuf->buf->m_flags |= M_PKTHDR;
   4284 			m_freem(rxbuf->buf);
   4285 			rxbuf->buf = NULL;
   4286 			rxbuf->flags = 0;
   4287 		}
   4288 	}
   4289 }
   4290 
   4291 
   4292 /*********************************************************************
   4293  *
   4294  *  Initialize a receive ring and its buffers.
   4295  *
   4296  **********************************************************************/
   4297 static int
   4298 ixgbe_setup_receive_ring(struct rx_ring *rxr)
   4299 {
   4300 	struct	adapter 	*adapter;
   4301 	struct ixgbe_rx_buf	*rxbuf;
   4302 #ifdef LRO
   4303 	struct ifnet		*ifp;
   4304 	struct lro_ctrl		*lro = &rxr->lro;
   4305 #endif /* LRO */
   4306 	int			rsize, error = 0;
   4307 #ifdef DEV_NETMAP
   4308 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
   4309 	struct netmap_slot *slot;
   4310 #endif /* DEV_NETMAP */
   4311 
   4312 	adapter = rxr->adapter;
   4313 #ifdef LRO
   4314 	ifp = adapter->ifp;
   4315 #endif /* LRO */
   4316 
   4317 	/* Clear the ring contents */
   4318 	IXGBE_RX_LOCK(rxr);
   4319 #ifdef DEV_NETMAP
   4320 	/* same as in ixgbe_setup_transmit_ring() */
   4321 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
   4322 #endif /* DEV_NETMAP */
   4323 	rsize = roundup2(adapter->num_rx_desc *
   4324 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   4325 	bzero((void *)rxr->rx_base, rsize);
   4326 	/* Cache the size */
   4327 	rxr->mbuf_sz = adapter->rx_mbuf_sz;
   4328 
   4329 	/* Free current RX buffer structs and their mbufs */
   4330 	ixgbe_free_receive_ring(rxr);
   4331 
   4332 	IXGBE_RX_UNLOCK(rxr);
   4333 
   4334 	/* Now reinitialize our supply of jumbo mbufs.  The number
   4335 	 * or size of jumbo mbufs may have changed.
   4336 	 */
   4337 	ixgbe_jcl_reinit(&adapter->jcl_head, rxr->ptag->dt_dmat,
   4338 	    2 * adapter->num_rx_desc, adapter->rx_mbuf_sz);
   4339 
   4340 	IXGBE_RX_LOCK(rxr);
   4341 
   4342 	/* Now replenish the mbufs */
   4343 	for (int j = 0; j != rxr->num_desc; ++j) {
   4344 		struct mbuf	*mp;
   4345 
   4346 		rxbuf = &rxr->rx_buffers[j];
   4347 #ifdef DEV_NETMAP
   4348 		/*
   4349 		 * In netmap mode, fill the map and set the buffer
   4350 		 * address in the NIC ring, considering the offset
   4351 		 * between the netmap and NIC rings (see comment in
   4352 		 * ixgbe_setup_transmit_ring() ). No need to allocate
   4353 		 * an mbuf, so end the block with a continue;
   4354 		 */
   4355 		if (slot) {
   4356 			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
   4357 			uint64_t paddr;
   4358 			void *addr;
   4359 
   4360 			addr = PNMB(na, slot + sj, &paddr);
   4361 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
   4362 			/* Update descriptor and the cached value */
   4363 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
   4364 			rxbuf->addr = htole64(paddr);
   4365 			continue;
   4366 		}
   4367 #endif /* DEV_NETMAP */
   4368 		rxbuf->flags = 0;
   4369 		rxbuf->buf = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
   4370 		    MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
   4371 		if (rxbuf->buf == NULL) {
   4372 			error = ENOBUFS;
   4373                         goto fail;
   4374 		}
   4375 		mp = rxbuf->buf;
   4376 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   4377 		/* Get the memory mapping */
   4378 		error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   4379 		    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   4380 		if (error != 0)
   4381                         goto fail;
   4382 		bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   4383 		    0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
   4384 		/* Update the descriptor and the cached value */
   4385 		rxr->rx_base[j].read.pkt_addr =
   4386 		    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   4387 		rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   4388 	}
   4389 
   4390 
   4391 	/* Setup our descriptor indices */
   4392 	rxr->next_to_check = 0;
   4393 	rxr->next_to_refresh = 0;
   4394 	rxr->lro_enabled = FALSE;
   4395 	rxr->rx_copies.ev_count = 0;
   4396 	rxr->rx_bytes.ev_count = 0;
   4397 	rxr->vtag_strip = FALSE;
   4398 
   4399 	ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   4400 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   4401 
   4402 	/*
   4403 	** Now set up the LRO interface:
   4404 	*/
   4405 	if (ixgbe_rsc_enable)
   4406 		ixgbe_setup_hw_rsc(rxr);
   4407 #ifdef LRO
   4408 	else if (ifp->if_capenable & IFCAP_LRO) {
   4409 		device_t dev = adapter->dev;
   4410 		int err = tcp_lro_init(lro);
   4411 		if (err) {
   4412 			device_printf(dev, "LRO Initialization failed!\n");
   4413 			goto fail;
   4414 		}
   4415 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
   4416 		rxr->lro_enabled = TRUE;
   4417 		lro->ifp = adapter->ifp;
   4418 	}
   4419 #endif /* LRO */
   4420 
   4421 	IXGBE_RX_UNLOCK(rxr);
   4422 	return (0);
   4423 
   4424 fail:
   4425 	ixgbe_free_receive_ring(rxr);
   4426 	IXGBE_RX_UNLOCK(rxr);
   4427 	return (error);
   4428 }
   4429 
   4430 /*********************************************************************
   4431  *
   4432  *  Initialize all receive rings.
   4433  *
   4434  **********************************************************************/
   4435 static int
   4436 ixgbe_setup_receive_structures(struct adapter *adapter)
   4437 {
   4438 	struct rx_ring *rxr = adapter->rx_rings;
   4439 	int j;
   4440 
   4441 	for (j = 0; j < adapter->num_queues; j++, rxr++)
   4442 		if (ixgbe_setup_receive_ring(rxr))
   4443 			goto fail;
   4444 
   4445 	return (0);
   4446 fail:
   4447 	/*
   4448 	 * Free RX buffers allocated so far, we will only handle
   4449 	 * the rings that completed, the failing case will have
   4450 	 * cleaned up for itself. 'j' failed, so its the terminus.
   4451 	 */
   4452 	for (int i = 0; i < j; ++i) {
   4453 		rxr = &adapter->rx_rings[i];
   4454 		ixgbe_free_receive_ring(rxr);
   4455 	}
   4456 
   4457 	return (ENOBUFS);
   4458 }
   4459 
   4460 static void
   4461 ixgbe_initialise_rss_mapping(struct adapter *adapter)
   4462 {
   4463 	struct ixgbe_hw	*hw = &adapter->hw;
   4464 	uint32_t reta;
   4465 	int i, j, queue_id;
   4466 	uint32_t rss_key[10];
   4467 	uint32_t mrqc;
   4468 #ifdef	RSS
   4469 	uint32_t rss_hash_config;
   4470 #endif
   4471 
   4472 	/* Setup RSS */
   4473 	reta = 0;
   4474 
   4475 #ifdef	RSS
   4476 	/* Fetch the configured RSS key */
   4477 	rss_getkey((uint8_t *) &rss_key);
   4478 #else
   4479 	/* set up random bits */
   4480 	cprng_fast(&rss_key, sizeof(rss_key));
   4481 #endif
   4482 
   4483 	/* Set up the redirection table */
   4484 	for (i = 0, j = 0; i < 128; i++, j++) {
   4485 		if (j == adapter->num_queues) j = 0;
   4486 #ifdef	RSS
   4487 		/*
   4488 		 * Fetch the RSS bucket id for the given indirection entry.
   4489 		 * Cap it at the number of configured buckets (which is
   4490 		 * num_queues.)
   4491 		 */
   4492 		queue_id = rss_get_indirection_to_bucket(i);
   4493 		queue_id = queue_id % adapter->num_queues;
   4494 #else
   4495 		queue_id = (j * 0x11);
   4496 #endif
   4497 		/*
   4498 		 * The low 8 bits are for hash value (n+0);
   4499 		 * The next 8 bits are for hash value (n+1), etc.
   4500 		 */
   4501 		reta = reta >> 8;
   4502 		reta = reta | ( ((uint32_t) queue_id) << 24);
   4503 		if ((i & 3) == 3) {
   4504 			IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta);
   4505 			reta = 0;
   4506 		}
   4507 	}
   4508 
   4509 	/* Now fill our hash function seeds */
   4510 	for (i = 0; i < 10; i++)
   4511 		IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), rss_key[i]);
   4512 
   4513 	/* Perform hash on these packet types */
   4514 #ifdef	RSS
   4515 	mrqc = IXGBE_MRQC_RSSEN;
   4516 	rss_hash_config = rss_gethashconfig();
   4517 	if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4)
   4518 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
   4519 	if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4)
   4520 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
   4521 	if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6)
   4522 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
   4523 	if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6)
   4524 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
   4525 	if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6_EX)
   4526 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
   4527 	if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6_EX)
   4528 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
   4529 	if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4)
   4530 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
   4531 	if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4_EX)
   4532 		device_printf(adapter->dev,
   4533 		    "%s: RSS_HASHTYPE_RSS_UDP_IPV4_EX defined, "
   4534 		    "but not supported\n", __func__);
   4535 	if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6)
   4536 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
   4537 	if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6_EX)
   4538 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
   4539 #else
   4540 	/*
   4541 	 * Disable UDP - IP fragments aren't currently being handled
   4542 	 * and so we end up with a mix of 2-tuple and 4-tuple
   4543 	 * traffic.
   4544 	 */
   4545 	mrqc = IXGBE_MRQC_RSSEN
   4546 	     | IXGBE_MRQC_RSS_FIELD_IPV4
   4547 	     | IXGBE_MRQC_RSS_FIELD_IPV4_TCP
   4548 #if 0
   4549 	     | IXGBE_MRQC_RSS_FIELD_IPV4_UDP
   4550 #endif
   4551 	     | IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP
   4552 	     | IXGBE_MRQC_RSS_FIELD_IPV6_EX
   4553 	     | IXGBE_MRQC_RSS_FIELD_IPV6
   4554 	     | IXGBE_MRQC_RSS_FIELD_IPV6_TCP
   4555 #if 0
   4556 	     | IXGBE_MRQC_RSS_FIELD_IPV6_UDP
   4557 	     | IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP
   4558 #endif
   4559 	;
   4560 #endif /* RSS */
   4561 	IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
   4562 }
   4563 
   4564 
   4565 /*********************************************************************
   4566  *
   4567  *  Setup receive registers and features.
   4568  *
   4569  **********************************************************************/
   4570 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
   4571 
   4572 #define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
   4573 
   4574 static void
   4575 ixgbe_initialize_receive_units(struct adapter *adapter)
   4576 {
   4577 	int i;
   4578 	struct	rx_ring	*rxr = adapter->rx_rings;
   4579 	struct ixgbe_hw	*hw = &adapter->hw;
   4580 	struct ifnet   *ifp = adapter->ifp;
   4581 	u32		bufsz, rxctrl, fctrl, srrctl, rxcsum;
   4582 	u32		hlreg;
   4583 
   4584 
   4585 	/*
   4586 	 * Make sure receives are disabled while
   4587 	 * setting up the descriptor ring
   4588 	 */
   4589 	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
   4590 	IXGBE_WRITE_REG(hw, IXGBE_RXCTRL,
   4591 	    rxctrl & ~IXGBE_RXCTRL_RXEN);
   4592 
   4593 	/* Enable broadcasts */
   4594 	fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
   4595 	fctrl |= IXGBE_FCTRL_BAM;
   4596 	fctrl |= IXGBE_FCTRL_DPF;
   4597 	fctrl |= IXGBE_FCTRL_PMCF;
   4598 	IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
   4599 
   4600 	/* Set for Jumbo Frames? */
   4601 	hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
   4602 	if (ifp->if_mtu > ETHERMTU)
   4603 		hlreg |= IXGBE_HLREG0_JUMBOEN;
   4604 	else
   4605 		hlreg &= ~IXGBE_HLREG0_JUMBOEN;
   4606 #ifdef DEV_NETMAP
   4607 	/* crcstrip is conditional in netmap (in RDRXCTL too ?) */
   4608 	if (ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
   4609 		hlreg &= ~IXGBE_HLREG0_RXCRCSTRP;
   4610 	else
   4611 		hlreg |= IXGBE_HLREG0_RXCRCSTRP;
   4612 #endif /* DEV_NETMAP */
   4613 	IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
   4614 
   4615 	bufsz = (adapter->rx_mbuf_sz +
   4616 	    BSIZEPKT_ROUNDUP) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
   4617 
   4618 	for (i = 0; i < adapter->num_queues; i++, rxr++) {
   4619 		u64 rdba = rxr->rxdma.dma_paddr;
   4620 
   4621 		/* Setup the Base and Length of the Rx Descriptor Ring */
   4622 		IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i),
   4623 			       (rdba & 0x00000000ffffffffULL));
   4624 		IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (rdba >> 32));
   4625 		IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
   4626 		    adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc));
   4627 
   4628 		/* Set up the SRRCTL register */
   4629 		srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
   4630 		srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
   4631 		srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
   4632 		srrctl |= bufsz;
   4633 		srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
   4634 
   4635 		/*
   4636 		 * Set DROP_EN iff we have no flow control and >1 queue.
   4637 		 * Note that srrctl was cleared shortly before during reset,
   4638 		 * so we do not need to clear the bit, but do it just in case
   4639 		 * this code is moved elsewhere.
   4640 		 */
   4641 		if (adapter->num_queues > 1 &&
   4642 		    adapter->fc == ixgbe_fc_none) {
   4643 			srrctl |= IXGBE_SRRCTL_DROP_EN;
   4644 		} else {
   4645 			srrctl &= ~IXGBE_SRRCTL_DROP_EN;
   4646 		}
   4647 
   4648 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
   4649 
   4650 		/* Setup the HW Rx Head and Tail Descriptor Pointers */
   4651 		IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
   4652 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
   4653 
   4654 		/* Set the processing limit */
   4655 		rxr->process_limit = ixgbe_rx_process_limit;
   4656 	}
   4657 
   4658 	if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
   4659 		u32 psrtype = IXGBE_PSRTYPE_TCPHDR |
   4660 			      IXGBE_PSRTYPE_UDPHDR |
   4661 			      IXGBE_PSRTYPE_IPV4HDR |
   4662 			      IXGBE_PSRTYPE_IPV6HDR;
   4663 		IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype);
   4664 	}
   4665 
   4666 	rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
   4667 
   4668 	ixgbe_initialise_rss_mapping(adapter);
   4669 
   4670 	if (adapter->num_queues > 1) {
   4671 		/* RSS and RX IPP Checksum are mutually exclusive */
   4672 		rxcsum |= IXGBE_RXCSUM_PCSD;
   4673 	}
   4674 
   4675 	if (ifp->if_capenable & IFCAP_RXCSUM)
   4676 		rxcsum |= IXGBE_RXCSUM_PCSD;
   4677 
   4678 	if (!(rxcsum & IXGBE_RXCSUM_PCSD))
   4679 		rxcsum |= IXGBE_RXCSUM_IPPCSE;
   4680 
   4681 	IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
   4682 
   4683 	return;
   4684 }
   4685 
   4686 /*********************************************************************
   4687  *
   4688  *  Free all receive rings.
   4689  *
   4690  **********************************************************************/
   4691 static void
   4692 ixgbe_free_receive_structures(struct adapter *adapter)
   4693 {
   4694 	struct rx_ring *rxr = adapter->rx_rings;
   4695 
   4696 	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
   4697 
   4698 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
   4699 #ifdef LRO
   4700 		struct lro_ctrl		*lro = &rxr->lro;
   4701 #endif /* LRO */
   4702 		ixgbe_free_receive_buffers(rxr);
   4703 #ifdef LRO
   4704 		/* Free LRO memory */
   4705 		tcp_lro_free(lro);
   4706 #endif /* LRO */
   4707 		/* Free the ring memory as well */
   4708 		ixgbe_dma_free(adapter, &rxr->rxdma);
   4709 		IXGBE_RX_LOCK_DESTROY(rxr);
   4710 	}
   4711 
   4712 	free(adapter->rx_rings, M_DEVBUF);
   4713 }
   4714 
   4715 
   4716 /*********************************************************************
   4717  *
   4718  *  Free receive ring data structures
   4719  *
   4720  **********************************************************************/
   4721 static void
   4722 ixgbe_free_receive_buffers(struct rx_ring *rxr)
   4723 {
   4724 	struct adapter		*adapter = rxr->adapter;
   4725 	struct ixgbe_rx_buf	*rxbuf;
   4726 
   4727 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
   4728 
   4729 	/* Cleanup any existing buffers */
   4730 	if (rxr->rx_buffers != NULL) {
   4731 		for (int i = 0; i < adapter->num_rx_desc; i++) {
   4732 			rxbuf = &rxr->rx_buffers[i];
   4733 			if (rxbuf->buf != NULL) {
   4734 				bus_dmamap_sync(rxr->ptag->dt_dmat,
   4735 				    rxbuf->pmap, 0, rxbuf->buf->m_pkthdr.len,
   4736 				    BUS_DMASYNC_POSTREAD);
   4737 				ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   4738 				rxbuf->buf->m_flags |= M_PKTHDR;
   4739 				m_freem(rxbuf->buf);
   4740 			}
   4741 			rxbuf->buf = NULL;
   4742 			if (rxbuf->pmap != NULL) {
   4743 				ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
   4744 				rxbuf->pmap = NULL;
   4745 			}
   4746 		}
   4747 		if (rxr->rx_buffers != NULL) {
   4748 			free(rxr->rx_buffers, M_DEVBUF);
   4749 			rxr->rx_buffers = NULL;
   4750 		}
   4751 	}
   4752 
   4753 	if (rxr->ptag != NULL) {
   4754 		ixgbe_dma_tag_destroy(rxr->ptag);
   4755 		rxr->ptag = NULL;
   4756 	}
   4757 
   4758 	return;
   4759 }
   4760 
   4761 static __inline void
   4762 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
   4763 {
   4764 	int s;
   4765 
   4766 #ifdef LRO
   4767 	struct adapter	*adapter = ifp->if_softc;
   4768 	struct ethercom *ec = &adapter->osdep.ec;
   4769 
   4770         /*
   4771          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
   4772          * should be computed by hardware. Also it should not have VLAN tag in
   4773          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
   4774          */
   4775         if (rxr->lro_enabled &&
   4776             (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
   4777             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   4778             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   4779             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
   4780             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   4781             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
   4782             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
   4783             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
   4784                 /*
   4785                  * Send to the stack if:
   4786                  **  - LRO not enabled, or
   4787                  **  - no LRO resources, or
   4788                  **  - lro enqueue fails
   4789                  */
   4790                 if (rxr->lro.lro_cnt != 0)
   4791                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
   4792                                 return;
   4793         }
   4794 #endif /* LRO */
   4795 
   4796 	IXGBE_RX_UNLOCK(rxr);
   4797 
   4798 	s = splnet();
   4799 	/* Pass this up to any BPF listeners. */
   4800 	bpf_mtap(ifp, m);
   4801 	(*ifp->if_input)(ifp, m);
   4802 	splx(s);
   4803 
   4804 	IXGBE_RX_LOCK(rxr);
   4805 }
   4806 
   4807 static __inline void
   4808 ixgbe_rx_discard(struct rx_ring *rxr, int i)
   4809 {
   4810 	struct ixgbe_rx_buf	*rbuf;
   4811 
   4812 	rbuf = &rxr->rx_buffers[i];
   4813 
   4814 	/*
   4815 	** With advanced descriptors the writeback
   4816 	** clobbers the buffer addrs, so its easier
   4817 	** to just free the existing mbufs and take
   4818 	** the normal refresh path to get new buffers
   4819 	** and mapping.
   4820 	*/
   4821 	if (rbuf->buf != NULL) {/* Partial chain ? */
   4822 		rbuf->fmp->m_flags |= M_PKTHDR;
   4823 		m_freem(rbuf->fmp);
   4824 		rbuf->fmp = NULL;
   4825 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
   4826 	} else if (rbuf->buf) {
   4827  		m_free(rbuf->buf);
   4828  		rbuf->buf = NULL;
   4829 	}
   4830 
   4831 	rbuf->flags = 0;
   4832 
   4833 	return;
   4834 }
   4835 
   4836 
   4837 /*********************************************************************
   4838  *
   4839  *  This routine executes in interrupt context. It replenishes
   4840  *  the mbufs in the descriptor and sends data which has been
   4841  *  dma'ed into host memory to upper layer.
   4842  *
   4843  *  We loop at most count times if count is > 0, or until done if
   4844  *  count < 0.
   4845  *
   4846  *  Return TRUE for more work, FALSE for all clean.
   4847  *********************************************************************/
   4848 static bool
   4849 ixgbe_rxeof(struct ix_queue *que)
   4850 {
   4851 	struct adapter		*adapter = que->adapter;
   4852 	struct rx_ring		*rxr = que->rxr;
   4853 	struct ifnet		*ifp = adapter->ifp;
   4854 #ifdef LRO
   4855 	struct lro_ctrl		*lro = &rxr->lro;
   4856 	struct lro_entry	*queued;
   4857 #endif /* LRO */
   4858 	int			i, nextp, processed = 0;
   4859 	u32			staterr = 0;
   4860 	u16			count = rxr->process_limit;
   4861 	union ixgbe_adv_rx_desc	*cur;
   4862 	struct ixgbe_rx_buf	*rbuf, *nbuf;
   4863 #ifdef RSS
   4864 	u16			pkt_info;
   4865 #endif
   4866 
   4867 	IXGBE_RX_LOCK(rxr);
   4868 
   4869 #ifdef DEV_NETMAP
   4870 	/* Same as the txeof routine: wakeup clients on intr. */
   4871 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
   4872 		IXGBE_RX_UNLOCK(rxr);
   4873 		return (FALSE);
   4874 	}
   4875 #endif /* DEV_NETMAP */
   4876 
   4877 	for (i = rxr->next_to_check; count != 0;) {
   4878 		struct mbuf	*sendmp, *mp;
   4879 		u32		rsc, ptype;
   4880 		u16		len;
   4881 		u16		vtag = 0;
   4882 		bool		eop;
   4883 
   4884 		/* Sync the ring. */
   4885 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   4886 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   4887 
   4888 		cur = &rxr->rx_base[i];
   4889 		staterr = le32toh(cur->wb.upper.status_error);
   4890 #ifdef RSS
   4891 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
   4892 #endif
   4893 
   4894 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
   4895 			break;
   4896 		if ((ifp->if_flags & IFF_RUNNING) == 0)
   4897 			break;
   4898 
   4899 		count--;
   4900 		sendmp = NULL;
   4901 		nbuf = NULL;
   4902 		rsc = 0;
   4903 		cur->wb.upper.status_error = 0;
   4904 		rbuf = &rxr->rx_buffers[i];
   4905 		mp = rbuf->buf;
   4906 
   4907 		len = le16toh(cur->wb.upper.length);
   4908 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
   4909 		    IXGBE_RXDADV_PKTTYPE_MASK;
   4910 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
   4911 
   4912 		/* Make sure bad packets are discarded */
   4913 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
   4914 			rxr->rx_discarded.ev_count++;
   4915 			ixgbe_rx_discard(rxr, i);
   4916 			goto next_desc;
   4917 		}
   4918 
   4919 		/*
   4920 		** On 82599 which supports a hardware
   4921 		** LRO (called HW RSC), packets need
   4922 		** not be fragmented across sequential
   4923 		** descriptors, rather the next descriptor
   4924 		** is indicated in bits of the descriptor.
   4925 		** This also means that we might proceses
   4926 		** more than one packet at a time, something
   4927 		** that has never been true before, it
   4928 		** required eliminating global chain pointers
   4929 		** in favor of what we are doing here.  -jfv
   4930 		*/
   4931 		if (!eop) {
   4932 			/*
   4933 			** Figure out the next descriptor
   4934 			** of this frame.
   4935 			*/
   4936 			if (rxr->hw_rsc == TRUE) {
   4937 				rsc = ixgbe_rsc_count(cur);
   4938 				rxr->rsc_num += (rsc - 1);
   4939 			}
   4940 			if (rsc) { /* Get hardware index */
   4941 				nextp = ((staterr &
   4942 				    IXGBE_RXDADV_NEXTP_MASK) >>
   4943 				    IXGBE_RXDADV_NEXTP_SHIFT);
   4944 			} else { /* Just sequential */
   4945 				nextp = i + 1;
   4946 				if (nextp == adapter->num_rx_desc)
   4947 					nextp = 0;
   4948 			}
   4949 			nbuf = &rxr->rx_buffers[nextp];
   4950 			prefetch(nbuf);
   4951 		}
   4952 		/*
   4953 		** Rather than using the fmp/lmp global pointers
   4954 		** we now keep the head of a packet chain in the
   4955 		** buffer struct and pass this along from one
   4956 		** descriptor to the next, until we get EOP.
   4957 		*/
   4958 		mp->m_len = len;
   4959 		/*
   4960 		** See if there is a stored head
   4961 		** that determines what we are
   4962 		*/
   4963 		sendmp = rbuf->fmp;
   4964 
   4965 		if (sendmp != NULL) {  /* secondary frag */
   4966 			rbuf->buf = rbuf->fmp = NULL;
   4967 			mp->m_flags &= ~M_PKTHDR;
   4968 			sendmp->m_pkthdr.len += mp->m_len;
   4969 		} else {
   4970 			/*
   4971 			 * Optimize.  This might be a small packet,
   4972 			 * maybe just a TCP ACK.  Do a fast copy that
   4973 			 * is cache aligned into a new mbuf, and
   4974 			 * leave the old mbuf+cluster for re-use.
   4975 			 */
   4976 			if (eop && len <= IXGBE_RX_COPY_LEN) {
   4977 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
   4978 				if (sendmp != NULL) {
   4979 					sendmp->m_data +=
   4980 					    IXGBE_RX_COPY_ALIGN;
   4981 					ixgbe_bcopy(mp->m_data,
   4982 					    sendmp->m_data, len);
   4983 					sendmp->m_len = len;
   4984 					rxr->rx_copies.ev_count++;
   4985 					rbuf->flags |= IXGBE_RX_COPY;
   4986 				}
   4987 			}
   4988 			if (sendmp == NULL) {
   4989 				rbuf->buf = rbuf->fmp = NULL;
   4990 				sendmp = mp;
   4991 			}
   4992 
   4993 			/* first desc of a non-ps chain */
   4994 			sendmp->m_flags |= M_PKTHDR;
   4995 			sendmp->m_pkthdr.len = mp->m_len;
   4996 		}
   4997 		++processed;
   4998 		/* Pass the head pointer on */
   4999 		if (eop == 0) {
   5000 			nbuf->fmp = sendmp;
   5001 			sendmp = NULL;
   5002 			mp->m_next = nbuf->buf;
   5003 		} else { /* Sending this frame */
   5004 			sendmp->m_pkthdr.rcvif = ifp;
   5005 			ifp->if_ipackets++;
   5006 			rxr->rx_packets.ev_count++;
   5007 			/* capture data for AIM */
   5008 			rxr->bytes += sendmp->m_pkthdr.len;
   5009 			rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
   5010 			/* Process vlan info */
   5011 			if ((rxr->vtag_strip) &&
   5012 			    (staterr & IXGBE_RXD_STAT_VP))
   5013 				vtag = le16toh(cur->wb.upper.vlan);
   5014 			if (vtag) {
   5015 				VLAN_INPUT_TAG(ifp, sendmp, vtag,
   5016 				    printf("%s: could not apply VLAN "
   5017 					"tag", __func__));
   5018 			}
   5019 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
   5020 				ixgbe_rx_checksum(staterr, sendmp, ptype,
   5021 				   &adapter->stats);
   5022 			}
   5023 #if __FreeBSD_version >= 800000
   5024 #ifdef RSS
   5025 			sendmp->m_pkthdr.flowid =
   5026 			    le32toh(cur->wb.lower.hi_dword.rss);
   5027 			switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
   5028 			case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
   5029 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV4);
   5030 				break;
   5031 			case IXGBE_RXDADV_RSSTYPE_IPV4:
   5032 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV4);
   5033 				break;
   5034 			case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
   5035 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6);
   5036 				break;
   5037 			case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
   5038 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6_EX);
   5039 				break;
   5040 			case IXGBE_RXDADV_RSSTYPE_IPV6:
   5041 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6);
   5042 				break;
   5043 			case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
   5044 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6_EX);
   5045 				break;
   5046 			case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
   5047 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV4);
   5048 				break;
   5049 			case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
   5050 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6);
   5051 				break;
   5052 			case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
   5053 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6_EX);
   5054 				break;
   5055 			default:
   5056 				/* XXX fallthrough */
   5057 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
   5058 				break;
   5059 			}
   5060 #else /* RSS */
   5061 			sendmp->m_pkthdr.flowid = que->msix;
   5062 			M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
   5063 #endif /* RSS */
   5064 #endif /* FreeBSD_version */
   5065 		}
   5066 next_desc:
   5067 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   5068 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   5069 
   5070 		/* Advance our pointers to the next descriptor. */
   5071 		if (++i == rxr->num_desc)
   5072 			i = 0;
   5073 
   5074 		/* Now send to the stack or do LRO */
   5075 		if (sendmp != NULL) {
   5076 			rxr->next_to_check = i;
   5077 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
   5078 			i = rxr->next_to_check;
   5079 		}
   5080 
   5081                /* Every 8 descriptors we go to refresh mbufs */
   5082 		if (processed == 8) {
   5083 			ixgbe_refresh_mbufs(rxr, i);
   5084 			processed = 0;
   5085 		}
   5086 	}
   5087 
   5088 	/* Refresh any remaining buf structs */
   5089 	if (ixgbe_rx_unrefreshed(rxr))
   5090 		ixgbe_refresh_mbufs(rxr, i);
   5091 
   5092 	rxr->next_to_check = i;
   5093 
   5094 #ifdef LRO
   5095 	/*
   5096 	 * Flush any outstanding LRO work
   5097 	 */
   5098 	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
   5099 		SLIST_REMOVE_HEAD(&lro->lro_active, next);
   5100 		tcp_lro_flush(lro, queued);
   5101 	}
   5102 #endif /* LRO */
   5103 
   5104 	IXGBE_RX_UNLOCK(rxr);
   5105 
   5106 	/*
   5107 	** Still have cleaning to do?
   5108 	*/
   5109 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
   5110 		return true;
   5111 	else
   5112 		return false;
   5113 }
   5114 
   5115 
   5116 /*********************************************************************
   5117  *
   5118  *  Verify that the hardware indicated that the checksum is valid.
   5119  *  Inform the stack about the status of checksum so that stack
   5120  *  doesn't spend time verifying the checksum.
   5121  *
   5122  *********************************************************************/
   5123 static void
   5124 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
   5125     struct ixgbe_hw_stats *stats)
   5126 {
   5127 	u16	status = (u16) staterr;
   5128 	u8	errors = (u8) (staterr >> 24);
   5129 #if 0
   5130 	bool	sctp = FALSE;
   5131 
   5132 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   5133 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
   5134 		sctp = TRUE;
   5135 #endif
   5136 
   5137 	if (status & IXGBE_RXD_STAT_IPCS) {
   5138 		stats->ipcs.ev_count++;
   5139 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
   5140 			/* IP Checksum Good */
   5141 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
   5142 
   5143 		} else {
   5144 			stats->ipcs_bad.ev_count++;
   5145 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
   5146 		}
   5147 	}
   5148 	if (status & IXGBE_RXD_STAT_L4CS) {
   5149 		stats->l4cs.ev_count++;
   5150 		int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
   5151 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
   5152 			mp->m_pkthdr.csum_flags |= type;
   5153 		} else {
   5154 			stats->l4cs_bad.ev_count++;
   5155 			mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
   5156 		}
   5157 	}
   5158 	return;
   5159 }
   5160 
   5161 
   5162 #if 0	/* XXX Badly need to overhaul vlan(4) on NetBSD. */
   5163 /*
   5164 ** This routine is run via an vlan config EVENT,
   5165 ** it enables us to use the HW Filter table since
   5166 ** we can get the vlan id. This just creates the
   5167 ** entry in the soft version of the VFTA, init will
   5168 ** repopulate the real table.
   5169 */
   5170 static void
   5171 ixgbe_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
   5172 {
   5173 	struct adapter	*adapter = ifp->if_softc;
   5174 	u16		index, bit;
   5175 
   5176 	if (ifp->if_softc !=  arg)   /* Not our event */
   5177 		return;
   5178 
   5179 	if ((vtag == 0) || (vtag > 4095))	/* Invalid */
   5180 		return;
   5181 
   5182 	IXGBE_CORE_LOCK(adapter);
   5183 	index = (vtag >> 5) & 0x7F;
   5184 	bit = vtag & 0x1F;
   5185 	adapter->shadow_vfta[index] |= (1 << bit);
   5186 	ixgbe_setup_vlan_hw_support(adapter);
   5187 	IXGBE_CORE_UNLOCK(adapter);
   5188 }
   5189 
   5190 /*
   5191 ** This routine is run via an vlan
   5192 ** unconfig EVENT, remove our entry
   5193 ** in the soft vfta.
   5194 */
   5195 static void
   5196 ixgbe_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
   5197 {
   5198 	struct adapter	*adapter = ifp->if_softc;
   5199 	u16		index, bit;
   5200 
   5201 	if (ifp->if_softc !=  arg)
   5202 		return;
   5203 
   5204 	if ((vtag == 0) || (vtag > 4095))	/* Invalid */
   5205 		return;
   5206 
   5207 	IXGBE_CORE_LOCK(adapter);
   5208 	index = (vtag >> 5) & 0x7F;
   5209 	bit = vtag & 0x1F;
   5210 	adapter->shadow_vfta[index] &= ~(1 << bit);
   5211 	/* Re-init to load the changes */
   5212 	ixgbe_setup_vlan_hw_support(adapter);
   5213 	IXGBE_CORE_UNLOCK(adapter);
   5214 }
   5215 #endif
   5216 
   5217 static void
   5218 ixgbe_setup_vlan_hw_support(struct adapter *adapter)
   5219 {
   5220 	struct ethercom *ec = &adapter->osdep.ec;
   5221 	struct ixgbe_hw *hw = &adapter->hw;
   5222 	struct rx_ring	*rxr;
   5223 	u32		ctrl;
   5224 
   5225 	/*
   5226 	** We get here thru init_locked, meaning
   5227 	** a soft reset, this has already cleared
   5228 	** the VFTA and other state, so if there
   5229 	** have been no vlan's registered do nothing.
   5230 	*/
   5231 	if (!VLAN_ATTACHED(&adapter->osdep.ec)) {
   5232 		return;
   5233 	}
   5234 
   5235 	/* Setup the queues for vlans */
   5236 	for (int i = 0; i < adapter->num_queues; i++) {
   5237 		rxr = &adapter->rx_rings[i];
   5238 		/* On 82599 the VLAN enable is per/queue in RXDCTL */
   5239 		if (hw->mac.type != ixgbe_mac_82598EB) {
   5240 			ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
   5241 			ctrl |= IXGBE_RXDCTL_VME;
   5242 			IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
   5243 		}
   5244 		rxr->vtag_strip = TRUE;
   5245 	}
   5246 
   5247 	if ((ec->ec_capenable & ETHERCAP_VLAN_HWFILTER) == 0)
   5248 		return;
   5249 
   5250 	/*
   5251 	** A soft reset zero's out the VFTA, so
   5252 	** we need to repopulate it now.
   5253 	*/
   5254 	for (int i = 0; i < IXGBE_VFTA_SIZE; i++)
   5255 		if (adapter->shadow_vfta[i] != 0)
   5256 			IXGBE_WRITE_REG(hw, IXGBE_VFTA(i),
   5257 			    adapter->shadow_vfta[i]);
   5258 
   5259 	ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
   5260 	/* Enable the Filter Table if enabled */
   5261 	if (ec->ec_capenable & ETHERCAP_VLAN_HWFILTER) {
   5262 		ctrl &= ~IXGBE_VLNCTRL_CFIEN;
   5263 		ctrl |= IXGBE_VLNCTRL_VFE;
   5264 	}
   5265 	if (hw->mac.type == ixgbe_mac_82598EB)
   5266 		ctrl |= IXGBE_VLNCTRL_VME;
   5267 	IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
   5268 }
   5269 
   5270 static void
   5271 ixgbe_enable_intr(struct adapter *adapter)
   5272 {
   5273 	struct ixgbe_hw	*hw = &adapter->hw;
   5274 	struct ix_queue	*que = adapter->queues;
   5275 	u32		mask, fwsm;
   5276 
   5277 	mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
   5278 	/* Enable Fan Failure detection */
   5279 	if (hw->device_id == IXGBE_DEV_ID_82598AT)
   5280 		    mask |= IXGBE_EIMS_GPI_SDP1;
   5281 
   5282 	switch (adapter->hw.mac.type) {
   5283 		case ixgbe_mac_82599EB:
   5284 			mask |= IXGBE_EIMS_ECC;
   5285 			mask |= IXGBE_EIMS_GPI_SDP0;
   5286 			mask |= IXGBE_EIMS_GPI_SDP1;
   5287 			mask |= IXGBE_EIMS_GPI_SDP2;
   5288 #ifdef IXGBE_FDIR
   5289 			mask |= IXGBE_EIMS_FLOW_DIR;
   5290 #endif
   5291 			break;
   5292 		case ixgbe_mac_X540:
   5293 			mask |= IXGBE_EIMS_ECC;
   5294 			/* Detect if Thermal Sensor is enabled */
   5295 			fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
   5296 			if (fwsm & IXGBE_FWSM_TS_ENABLED)
   5297 				mask |= IXGBE_EIMS_TS;
   5298 #ifdef IXGBE_FDIR
   5299 			mask |= IXGBE_EIMS_FLOW_DIR;
   5300 #endif
   5301 		/* falls through */
   5302 		default:
   5303 			break;
   5304 	}
   5305 
   5306 	IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
   5307 
   5308 	/* With RSS we use auto clear */
   5309 	if (adapter->msix_mem) {
   5310 		mask = IXGBE_EIMS_ENABLE_MASK;
   5311 		/* Don't autoclear Link */
   5312 		mask &= ~IXGBE_EIMS_OTHER;
   5313 		mask &= ~IXGBE_EIMS_LSC;
   5314 		IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask);
   5315 	}
   5316 
   5317 	/*
   5318 	** Now enable all queues, this is done separately to
   5319 	** allow for handling the extended (beyond 32) MSIX
   5320 	** vectors that can be used by 82599
   5321 	*/
   5322         for (int i = 0; i < adapter->num_queues; i++, que++)
   5323                 ixgbe_enable_queue(adapter, que->msix);
   5324 
   5325 	IXGBE_WRITE_FLUSH(hw);
   5326 
   5327 	return;
   5328 }
   5329 
   5330 static void
   5331 ixgbe_disable_intr(struct adapter *adapter)
   5332 {
   5333 	if (adapter->msix_mem)
   5334 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, 0);
   5335 	if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
   5336 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0);
   5337 	} else {
   5338 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000);
   5339 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0);
   5340 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0);
   5341 	}
   5342 	IXGBE_WRITE_FLUSH(&adapter->hw);
   5343 	return;
   5344 }
   5345 
   5346 u16
   5347 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
   5348 {
   5349 	switch (reg % 4) {
   5350 	case 0:
   5351 		return pci_conf_read(hw->back->pc, hw->back->tag, reg) &
   5352 		    __BITS(15, 0);
   5353 	case 2:
   5354 		return __SHIFTOUT(pci_conf_read(hw->back->pc, hw->back->tag,
   5355 		    reg - 2), __BITS(31, 16));
   5356 	default:
   5357 		panic("%s: invalid register (%" PRIx32, __func__, reg);
   5358 		break;
   5359 	}
   5360 }
   5361 
   5362 void
   5363 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
   5364 {
   5365 	pcireg_t old;
   5366 
   5367 	switch (reg % 4) {
   5368 	case 0:
   5369 		old = pci_conf_read(hw->back->pc, hw->back->tag, reg) &
   5370 		    __BITS(31, 16);
   5371 		pci_conf_write(hw->back->pc, hw->back->tag, reg, value | old);
   5372 		break;
   5373 	case 2:
   5374 		old = pci_conf_read(hw->back->pc, hw->back->tag, reg - 2) &
   5375 		    __BITS(15, 0);
   5376 		pci_conf_write(hw->back->pc, hw->back->tag, reg - 2,
   5377 		    __SHIFTIN(value, __BITS(31, 16)) | old);
   5378 		break;
   5379 	default:
   5380 		panic("%s: invalid register (%" PRIx32, __func__, reg);
   5381 		break;
   5382 	}
   5383 
   5384 	return;
   5385 }
   5386 
   5387 /*
   5388 ** Get the width and transaction speed of
   5389 ** the slot this adapter is plugged into.
   5390 */
   5391 static void
   5392 ixgbe_get_slot_info(struct ixgbe_hw *hw)
   5393 {
   5394 	device_t		dev = ((struct ixgbe_osdep *)hw->back)->dev;
   5395 	struct ixgbe_mac_info	*mac = &hw->mac;
   5396 	u16			link;
   5397 
   5398 	/* For most devices simply call the shared code routine */
   5399 	if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) {
   5400 		ixgbe_get_bus_info(hw);
   5401 		goto display;
   5402 	}
   5403 
   5404 	/*
   5405 	** For the Quad port adapter we need to parse back
   5406 	** up the PCI tree to find the speed of the expansion
   5407 	** slot into which this adapter is plugged. A bit more work.
   5408 	*/
   5409 	dev = device_parent(device_parent(dev));
   5410 #ifdef IXGBE_DEBUG
   5411 	device_printf(dev, "parent pcib = %x,%x,%x\n",
   5412 	    pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
   5413 #endif
   5414 	dev = device_parent(device_parent(dev));
   5415 #ifdef IXGBE_DEBUG
   5416 	device_printf(dev, "slot pcib = %x,%x,%x\n",
   5417 	    pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
   5418 #endif
   5419 	/* Now get the PCI Express Capabilities offset */
   5420 	/* ...and read the Link Status Register */
   5421 	link = IXGBE_READ_PCIE_WORD(hw, IXGBE_PCI_LINK_STATUS);
   5422 	switch (link & IXGBE_PCI_LINK_WIDTH) {
   5423 	case IXGBE_PCI_LINK_WIDTH_1:
   5424 		hw->bus.width = ixgbe_bus_width_pcie_x1;
   5425 		break;
   5426 	case IXGBE_PCI_LINK_WIDTH_2:
   5427 		hw->bus.width = ixgbe_bus_width_pcie_x2;
   5428 		break;
   5429 	case IXGBE_PCI_LINK_WIDTH_4:
   5430 		hw->bus.width = ixgbe_bus_width_pcie_x4;
   5431 		break;
   5432 	case IXGBE_PCI_LINK_WIDTH_8:
   5433 		hw->bus.width = ixgbe_bus_width_pcie_x8;
   5434 		break;
   5435 	default:
   5436 		hw->bus.width = ixgbe_bus_width_unknown;
   5437 		break;
   5438 	}
   5439 
   5440 	switch (link & IXGBE_PCI_LINK_SPEED) {
   5441 	case IXGBE_PCI_LINK_SPEED_2500:
   5442 		hw->bus.speed = ixgbe_bus_speed_2500;
   5443 		break;
   5444 	case IXGBE_PCI_LINK_SPEED_5000:
   5445 		hw->bus.speed = ixgbe_bus_speed_5000;
   5446 		break;
   5447 	case IXGBE_PCI_LINK_SPEED_8000:
   5448 		hw->bus.speed = ixgbe_bus_speed_8000;
   5449 		break;
   5450 	default:
   5451 		hw->bus.speed = ixgbe_bus_speed_unknown;
   5452 		break;
   5453 	}
   5454 
   5455 	mac->ops.set_lan_id(hw);
   5456 
   5457 display:
   5458 	device_printf(dev,"PCI Express Bus: Speed %s %s\n",
   5459 	    ((hw->bus.speed == ixgbe_bus_speed_8000) ? "8.0GT/s":
   5460 	    (hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0GT/s":
   5461 	    (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5GT/s":"Unknown"),
   5462 	    (hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" :
   5463 	    (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" :
   5464 	    (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" :
   5465 	    ("Unknown"));
   5466 
   5467 	if ((hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) &&
   5468 	    ((hw->bus.width <= ixgbe_bus_width_pcie_x4) &&
   5469 	    (hw->bus.speed == ixgbe_bus_speed_2500))) {
   5470 		device_printf(dev, "PCI-Express bandwidth available"
   5471 		    " for this card\n     is not sufficient for"
   5472 		    " optimal performance.\n");
   5473 		device_printf(dev, "For optimal performance a x8 "
   5474 		    "PCIE, or x4 PCIE Gen2 slot is required.\n");
   5475         }
   5476 	if ((hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP) &&
   5477 	    ((hw->bus.width <= ixgbe_bus_width_pcie_x8) &&
   5478 	    (hw->bus.speed < ixgbe_bus_speed_8000))) {
   5479 		device_printf(dev, "PCI-Express bandwidth available"
   5480 		    " for this card\n     is not sufficient for"
   5481 		    " optimal performance.\n");
   5482 		device_printf(dev, "For optimal performance a x8 "
   5483 		    "PCIE Gen3 slot is required.\n");
   5484         }
   5485 
   5486 	return;
   5487 }
   5488 
   5489 
   5490 /*
   5491 ** Setup the correct IVAR register for a particular MSIX interrupt
   5492 **   (yes this is all very magic and confusing :)
   5493 **  - entry is the register array entry
   5494 **  - vector is the MSIX vector for this queue
   5495 **  - type is RX/TX/MISC
   5496 */
   5497 static void
   5498 ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type)
   5499 {
   5500 	struct ixgbe_hw *hw = &adapter->hw;
   5501 	u32 ivar, index;
   5502 
   5503 	vector |= IXGBE_IVAR_ALLOC_VAL;
   5504 
   5505 	switch (hw->mac.type) {
   5506 
   5507 	case ixgbe_mac_82598EB:
   5508 		if (type == -1)
   5509 			entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
   5510 		else
   5511 			entry += (type * 64);
   5512 		index = (entry >> 2) & 0x1F;
   5513 		ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
   5514 		ivar &= ~(0xFF << (8 * (entry & 0x3)));
   5515 		ivar |= (vector << (8 * (entry & 0x3)));
   5516 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR(index), ivar);
   5517 		break;
   5518 
   5519 	case ixgbe_mac_82599EB:
   5520 	case ixgbe_mac_X540:
   5521 		if (type == -1) { /* MISC IVAR */
   5522 			index = (entry & 1) * 8;
   5523 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
   5524 			ivar &= ~(0xFF << index);
   5525 			ivar |= (vector << index);
   5526 			IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
   5527 		} else {	/* RX/TX IVARS */
   5528 			index = (16 * (entry & 1)) + (8 * type);
   5529 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
   5530 			ivar &= ~(0xFF << index);
   5531 			ivar |= (vector << index);
   5532 			IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
   5533 		}
   5534 
   5535 	default:
   5536 		break;
   5537 	}
   5538 }
   5539 
   5540 static void
   5541 ixgbe_configure_ivars(struct adapter *adapter)
   5542 {
   5543 	struct  ix_queue *que = adapter->queues;
   5544 	u32 newitr;
   5545 
   5546 	if (ixgbe_max_interrupt_rate > 0)
   5547 		newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8;
   5548 	else
   5549 		newitr = 0;
   5550 
   5551         for (int i = 0; i < adapter->num_queues; i++, que++) {
   5552 		/* First the RX queue entry */
   5553                 ixgbe_set_ivar(adapter, i, que->msix, 0);
   5554 		/* ... and the TX */
   5555 		ixgbe_set_ivar(adapter, i, que->msix, 1);
   5556 		/* Set an Initial EITR value */
   5557                 IXGBE_WRITE_REG(&adapter->hw,
   5558                     IXGBE_EITR(que->msix), newitr);
   5559 	}
   5560 
   5561 	/* For the Link interrupt */
   5562         ixgbe_set_ivar(adapter, 1, adapter->linkvec, -1);
   5563 }
   5564 
   5565 /*
   5566 ** ixgbe_sfp_probe - called in the local timer to
   5567 ** determine if a port had optics inserted.
   5568 */
   5569 static bool ixgbe_sfp_probe(struct adapter *adapter)
   5570 {
   5571 	struct ixgbe_hw	*hw = &adapter->hw;
   5572 	device_t	dev = adapter->dev;
   5573 	bool		result = FALSE;
   5574 
   5575 	if ((hw->phy.type == ixgbe_phy_nl) &&
   5576 	    (hw->phy.sfp_type == ixgbe_sfp_type_not_present)) {
   5577 		s32 ret = hw->phy.ops.identify_sfp(hw);
   5578 		if (ret)
   5579                         goto out;
   5580 		ret = hw->phy.ops.reset(hw);
   5581 		if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
   5582 			device_printf(dev,"Unsupported SFP+ module detected!");
   5583 			device_printf(dev, "Reload driver with supported module.\n");
   5584 			adapter->sfp_probe = FALSE;
   5585                         goto out;
   5586 		} else
   5587 			device_printf(dev,"SFP+ module detected!\n");
   5588 		/* We now have supported optics */
   5589 		adapter->sfp_probe = FALSE;
   5590 		/* Set the optics type so system reports correctly */
   5591 		ixgbe_setup_optics(adapter);
   5592 		result = TRUE;
   5593 	}
   5594 out:
   5595 	return (result);
   5596 }
   5597 
   5598 /*
   5599 ** Tasklet handler for MSIX Link interrupts
   5600 **  - do outside interrupt since it might sleep
   5601 */
   5602 static void
   5603 ixgbe_handle_link(void *context)
   5604 {
   5605 	struct adapter  *adapter = context;
   5606 
   5607 	if (ixgbe_check_link(&adapter->hw,
   5608 	    &adapter->link_speed, &adapter->link_up, 0) == 0)
   5609 	    ixgbe_update_link_status(adapter);
   5610 }
   5611 
   5612 /*
   5613 ** Tasklet for handling SFP module interrupts
   5614 */
   5615 static void
   5616 ixgbe_handle_mod(void *context)
   5617 {
   5618 	struct adapter  *adapter = context;
   5619 	struct ixgbe_hw *hw = &adapter->hw;
   5620 	device_t	dev = adapter->dev;
   5621 	u32 err;
   5622 
   5623 	err = hw->phy.ops.identify_sfp(hw);
   5624 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
   5625 		device_printf(dev,
   5626 		    "Unsupported SFP+ module type was detected.\n");
   5627 		return;
   5628 	}
   5629 	err = hw->mac.ops.setup_sfp(hw);
   5630 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
   5631 		device_printf(dev,
   5632 		    "Setup failure - unsupported SFP+ module type.\n");
   5633 		return;
   5634 	}
   5635 	softint_schedule(adapter->msf_si);
   5636 	return;
   5637 }
   5638 
   5639 
   5640 /*
   5641 ** Tasklet for handling MSF (multispeed fiber) interrupts
   5642 */
   5643 static void
   5644 ixgbe_handle_msf(void *context)
   5645 {
   5646 	struct adapter  *adapter = context;
   5647 	struct ixgbe_hw *hw = &adapter->hw;
   5648 	u32 autoneg;
   5649 	bool negotiate;
   5650 
   5651 	autoneg = hw->phy.autoneg_advertised;
   5652 	if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
   5653 		hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate);
   5654 	else
   5655 		negotiate = 0;
   5656 	if (hw->mac.ops.setup_link)
   5657 		hw->mac.ops.setup_link(hw, autoneg, TRUE);
   5658 	return;
   5659 }
   5660 
   5661 #ifdef IXGBE_FDIR
   5662 /*
   5663 ** Tasklet for reinitializing the Flow Director filter table
   5664 */
   5665 static void
   5666 ixgbe_reinit_fdir(void *context)
   5667 {
   5668 	struct adapter  *adapter = context;
   5669 	struct ifnet   *ifp = adapter->ifp;
   5670 
   5671 	if (adapter->fdir_reinit != 1) /* Shouldn't happen */
   5672 		return;
   5673 	ixgbe_reinit_fdir_tables_82599(&adapter->hw);
   5674 	adapter->fdir_reinit = 0;
   5675 	/* re-enable flow director interrupts */
   5676 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_FLOW_DIR);
   5677 	/* Restart the interface */
   5678 	ifp->if_flags |= IFF_RUNNING;
   5679 	return;
   5680 }
   5681 #endif
   5682 
   5683 /**********************************************************************
   5684  *
   5685  *  Update the board statistics counters.
   5686  *
   5687  **********************************************************************/
   5688 static void
   5689 ixgbe_update_stats_counters(struct adapter *adapter)
   5690 {
   5691 	struct ifnet   *ifp = adapter->ifp;
   5692 	struct ixgbe_hw *hw = &adapter->hw;
   5693 	u32  missed_rx = 0, bprc, lxon, lxoff, total;
   5694 	u64  total_missed_rx = 0;
   5695 	uint64_t crcerrs, rlec;
   5696 
   5697 	crcerrs = IXGBE_READ_REG(hw, IXGBE_CRCERRS);
   5698 	adapter->stats.crcerrs.ev_count += crcerrs;
   5699 	adapter->stats.illerrc.ev_count += IXGBE_READ_REG(hw, IXGBE_ILLERRC);
   5700 	adapter->stats.errbc.ev_count += IXGBE_READ_REG(hw, IXGBE_ERRBC);
   5701 	adapter->stats.mspdc.ev_count += IXGBE_READ_REG(hw, IXGBE_MSPDC);
   5702 
   5703 	/*
   5704 	** Note: these are for the 8 possible traffic classes,
   5705 	**	 which in current implementation is unused,
   5706 	**	 therefore only 0 should read real data.
   5707 	*/
   5708 	for (int i = 0; i < __arraycount(adapter->stats.mpc); i++) {
   5709 		int j = i % adapter->num_queues;
   5710 		u32 mp;
   5711 		mp = IXGBE_READ_REG(hw, IXGBE_MPC(i));
   5712 		/* missed_rx tallies misses for the gprc workaround */
   5713 		missed_rx += mp;
   5714 		/* global total per queue */
   5715         	adapter->stats.mpc[j].ev_count += mp;
   5716 		/* Running comprehensive total for stats display */
   5717 		total_missed_rx += mp;
   5718 		if (hw->mac.type == ixgbe_mac_82598EB) {
   5719 			adapter->stats.rnbc[j] +=
   5720 			    IXGBE_READ_REG(hw, IXGBE_RNBC(i));
   5721 			adapter->stats.qbtc[j].ev_count +=
   5722 			    IXGBE_READ_REG(hw, IXGBE_QBTC(i));
   5723 			adapter->stats.qbrc[j].ev_count +=
   5724 			    IXGBE_READ_REG(hw, IXGBE_QBRC(i));
   5725 			adapter->stats.pxonrxc[j].ev_count +=
   5726 			    IXGBE_READ_REG(hw, IXGBE_PXONRXC(i));
   5727 		} else {
   5728 			adapter->stats.pxonrxc[j].ev_count +=
   5729 			    IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i));
   5730 		}
   5731 		adapter->stats.pxontxc[j].ev_count +=
   5732 		    IXGBE_READ_REG(hw, IXGBE_PXONTXC(i));
   5733 		adapter->stats.pxofftxc[j].ev_count +=
   5734 		    IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i));
   5735 		adapter->stats.pxoffrxc[j].ev_count +=
   5736 		    IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
   5737 		adapter->stats.pxon2offc[j].ev_count +=
   5738 		    IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i));
   5739 	}
   5740 	for (int i = 0; i < __arraycount(adapter->stats.qprc); i++) {
   5741 		int j = i % adapter->num_queues;
   5742 		adapter->stats.qprc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
   5743 		adapter->stats.qptc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
   5744 		adapter->stats.qprdc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
   5745 	}
   5746 	adapter->stats.mlfc.ev_count += IXGBE_READ_REG(hw, IXGBE_MLFC);
   5747 	adapter->stats.mrfc.ev_count += IXGBE_READ_REG(hw, IXGBE_MRFC);
   5748 	rlec = IXGBE_READ_REG(hw, IXGBE_RLEC);
   5749 	adapter->stats.rlec.ev_count += rlec;
   5750 
   5751 	/* Hardware workaround, gprc counts missed packets */
   5752 	adapter->stats.gprc.ev_count += IXGBE_READ_REG(hw, IXGBE_GPRC) - missed_rx;
   5753 
   5754 	lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
   5755 	adapter->stats.lxontxc.ev_count += lxon;
   5756 	lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
   5757 	adapter->stats.lxofftxc.ev_count += lxoff;
   5758 	total = lxon + lxoff;
   5759 
   5760 	if (hw->mac.type != ixgbe_mac_82598EB) {
   5761 		adapter->stats.gorc.ev_count += IXGBE_READ_REG(hw, IXGBE_GORCL) +
   5762 		    ((u64)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32);
   5763 		adapter->stats.gotc.ev_count += IXGBE_READ_REG(hw, IXGBE_GOTCL) +
   5764 		    ((u64)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32) - total * ETHER_MIN_LEN;
   5765 		adapter->stats.tor.ev_count += IXGBE_READ_REG(hw, IXGBE_TORL) +
   5766 		    ((u64)IXGBE_READ_REG(hw, IXGBE_TORH) << 32);
   5767 		adapter->stats.lxonrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
   5768 		adapter->stats.lxoffrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
   5769 	} else {
   5770 		adapter->stats.lxonrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
   5771 		adapter->stats.lxoffrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
   5772 		/* 82598 only has a counter in the high register */
   5773 		adapter->stats.gorc.ev_count += IXGBE_READ_REG(hw, IXGBE_GORCH);
   5774 		adapter->stats.gotc.ev_count += IXGBE_READ_REG(hw, IXGBE_GOTCH) - total * ETHER_MIN_LEN;
   5775 		adapter->stats.tor.ev_count += IXGBE_READ_REG(hw, IXGBE_TORH);
   5776 	}
   5777 
   5778 	/*
   5779 	 * Workaround: mprc hardware is incorrectly counting
   5780 	 * broadcasts, so for now we subtract those.
   5781 	 */
   5782 	bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
   5783 	adapter->stats.bprc.ev_count += bprc;
   5784 	adapter->stats.mprc.ev_count += IXGBE_READ_REG(hw, IXGBE_MPRC) - ((hw->mac.type == ixgbe_mac_82598EB) ? bprc : 0);
   5785 
   5786 	adapter->stats.prc64.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC64);
   5787 	adapter->stats.prc127.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC127);
   5788 	adapter->stats.prc255.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC255);
   5789 	adapter->stats.prc511.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC511);
   5790 	adapter->stats.prc1023.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC1023);
   5791 	adapter->stats.prc1522.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC1522);
   5792 
   5793 	adapter->stats.gptc.ev_count += IXGBE_READ_REG(hw, IXGBE_GPTC) - total;
   5794 	adapter->stats.mptc.ev_count += IXGBE_READ_REG(hw, IXGBE_MPTC) - total;
   5795 	adapter->stats.ptc64.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC64) - total;
   5796 
   5797 	adapter->stats.ruc.ev_count += IXGBE_READ_REG(hw, IXGBE_RUC);
   5798 	adapter->stats.rfc.ev_count += IXGBE_READ_REG(hw, IXGBE_RFC);
   5799 	adapter->stats.roc.ev_count += IXGBE_READ_REG(hw, IXGBE_ROC);
   5800 	adapter->stats.rjc.ev_count += IXGBE_READ_REG(hw, IXGBE_RJC);
   5801 	adapter->stats.mngprc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPRC);
   5802 	adapter->stats.mngpdc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPDC);
   5803 	adapter->stats.mngptc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPTC);
   5804 	adapter->stats.tpr.ev_count += IXGBE_READ_REG(hw, IXGBE_TPR);
   5805 	adapter->stats.tpt.ev_count += IXGBE_READ_REG(hw, IXGBE_TPT);
   5806 	adapter->stats.ptc127.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC127);
   5807 	adapter->stats.ptc255.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC255);
   5808 	adapter->stats.ptc511.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC511);
   5809 	adapter->stats.ptc1023.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC1023);
   5810 	adapter->stats.ptc1522.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC1522);
   5811 	adapter->stats.bptc.ev_count += IXGBE_READ_REG(hw, IXGBE_BPTC);
   5812 	adapter->stats.xec.ev_count += IXGBE_READ_REG(hw, IXGBE_XEC);
   5813 	adapter->stats.fccrc.ev_count += IXGBE_READ_REG(hw, IXGBE_FCCRC);
   5814 	adapter->stats.fclast.ev_count += IXGBE_READ_REG(hw, IXGBE_FCLAST);
   5815 
   5816 	/* Only read FCOE on 82599 */
   5817 	if (hw->mac.type != ixgbe_mac_82598EB) {
   5818 		adapter->stats.fcoerpdc.ev_count +=
   5819 		    IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
   5820 		adapter->stats.fcoeprc.ev_count +=
   5821 		    IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
   5822 		adapter->stats.fcoeptc.ev_count +=
   5823 		    IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
   5824 		adapter->stats.fcoedwrc.ev_count +=
   5825 		    IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
   5826 		adapter->stats.fcoedwtc.ev_count +=
   5827 		    IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
   5828 	}
   5829 
   5830 	/* Fill out the OS statistics structure */
   5831 	/*
   5832 	 * NetBSD: Don't override if_{i|o}{packets|bytes|mcasts} with
   5833 	 * adapter->stats counters. It's required to make ifconfig -z
   5834 	 * (SOICZIFDATA) work.
   5835 	 */
   5836 	ifp->if_collisions = 0;
   5837 
   5838 	/* Rx Errors */
   5839 	ifp->if_iqdrops += total_missed_rx;
   5840 	ifp->if_ierrors += crcerrs + rlec;
   5841 }
   5842 
   5843 /** ixgbe_sysctl_tdh_handler - Handler function
   5844  *  Retrieves the TDH value from the hardware
   5845  */
   5846 static int
   5847 ixgbe_sysctl_tdh_handler(SYSCTLFN_ARGS)
   5848 {
   5849 	struct sysctlnode node;
   5850 	uint32_t val;
   5851 	struct tx_ring *txr;
   5852 
   5853 	node = *rnode;
   5854 	txr = (struct tx_ring *)node.sysctl_data;
   5855 	if (txr == NULL)
   5856 		return 0;
   5857 	val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDH(txr->me));
   5858 	node.sysctl_data = &val;
   5859 	return sysctl_lookup(SYSCTLFN_CALL(&node));
   5860 }
   5861 
   5862 /** ixgbe_sysctl_tdt_handler - Handler function
   5863  *  Retrieves the TDT value from the hardware
   5864  */
   5865 static int
   5866 ixgbe_sysctl_tdt_handler(SYSCTLFN_ARGS)
   5867 {
   5868 	struct sysctlnode node;
   5869 	uint32_t val;
   5870 	struct tx_ring *txr;
   5871 
   5872 	node = *rnode;
   5873 	txr = (struct tx_ring *)node.sysctl_data;
   5874 	if (txr == NULL)
   5875 		return 0;
   5876 	val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDT(txr->me));
   5877 	node.sysctl_data = &val;
   5878 	return sysctl_lookup(SYSCTLFN_CALL(&node));
   5879 }
   5880 
   5881 /** ixgbe_sysctl_rdh_handler - Handler function
   5882  *  Retrieves the RDH value from the hardware
   5883  */
   5884 static int
   5885 ixgbe_sysctl_rdh_handler(SYSCTLFN_ARGS)
   5886 {
   5887 	struct sysctlnode node;
   5888 	uint32_t val;
   5889 	struct rx_ring *rxr;
   5890 
   5891 	node = *rnode;
   5892 	rxr = (struct rx_ring *)node.sysctl_data;
   5893 	if (rxr == NULL)
   5894 		return 0;
   5895 	val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDH(rxr->me));
   5896 	node.sysctl_data = &val;
   5897 	return sysctl_lookup(SYSCTLFN_CALL(&node));
   5898 }
   5899 
   5900 /** ixgbe_sysctl_rdt_handler - Handler function
   5901  *  Retrieves the RDT value from the hardware
   5902  */
   5903 static int
   5904 ixgbe_sysctl_rdt_handler(SYSCTLFN_ARGS)
   5905 {
   5906 	struct sysctlnode node;
   5907 	uint32_t val;
   5908 	struct rx_ring *rxr;
   5909 
   5910 	node = *rnode;
   5911 	rxr = (struct rx_ring *)node.sysctl_data;
   5912 	if (rxr == NULL)
   5913 		return 0;
   5914 	val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDT(rxr->me));
   5915 	node.sysctl_data = &val;
   5916 	return sysctl_lookup(SYSCTLFN_CALL(&node));
   5917 }
   5918 
   5919 static int
   5920 ixgbe_sysctl_interrupt_rate_handler(SYSCTLFN_ARGS)
   5921 {
   5922 	int error;
   5923 	struct sysctlnode node;
   5924 	struct ix_queue *que;
   5925 	uint32_t reg, usec, rate;
   5926 
   5927 	node = *rnode;
   5928 	que = (struct ix_queue *)node.sysctl_data;
   5929 	if (que == NULL)
   5930 		return 0;
   5931 	reg = IXGBE_READ_REG(&que->adapter->hw, IXGBE_EITR(que->msix));
   5932 	usec = ((reg & 0x0FF8) >> 3);
   5933 	if (usec > 0)
   5934 		rate = 500000 / usec;
   5935 	else
   5936 		rate = 0;
   5937 	node.sysctl_data = &rate;
   5938 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   5939 	if (error)
   5940 		return error;
   5941 	reg &= ~0xfff; /* default, no limitation */
   5942 	ixgbe_max_interrupt_rate = 0;
   5943 	if (rate > 0 && rate < 500000) {
   5944 		if (rate < 1000)
   5945 			rate = 1000;
   5946 		ixgbe_max_interrupt_rate = rate;
   5947 		reg |= ((4000000/rate) & 0xff8 );
   5948 	}
   5949 	IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg);
   5950 	return 0;
   5951 }
   5952 
   5953 const struct sysctlnode *
   5954 ixgbe_sysctl_instance(struct adapter *adapter)
   5955 {
   5956 	const char *dvname;
   5957 	struct sysctllog **log;
   5958 	int rc;
   5959 	const struct sysctlnode *rnode;
   5960 
   5961 	log = &adapter->sysctllog;
   5962 	dvname = device_xname(adapter->dev);
   5963 
   5964 	if ((rc = sysctl_createv(log, 0, NULL, &rnode,
   5965 	    0, CTLTYPE_NODE, dvname,
   5966 	    SYSCTL_DESCR("ixgbe information and settings"),
   5967 	    NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL)) != 0)
   5968 		goto err;
   5969 
   5970 	return rnode;
   5971 err:
   5972 	printf("%s: sysctl_createv failed, rc = %d\n", __func__, rc);
   5973 	return NULL;
   5974 }
   5975 
   5976 /*
   5977  * Add sysctl variables, one per statistic, to the system.
   5978  */
   5979 static void
   5980 ixgbe_add_hw_stats(struct adapter *adapter)
   5981 {
   5982 	device_t dev = adapter->dev;
   5983 	const struct sysctlnode *rnode, *cnode;
   5984 	struct sysctllog **log = &adapter->sysctllog;
   5985 	struct tx_ring *txr = adapter->tx_rings;
   5986 	struct rx_ring *rxr = adapter->rx_rings;
   5987 	struct ixgbe_hw_stats *stats = &adapter->stats;
   5988 
   5989 	/* Driver Statistics */
   5990 #if 0
   5991 	/* These counters are not updated by the software */
   5992 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
   5993 			CTLFLAG_RD, &adapter->dropped_pkts,
   5994 			"Driver dropped packets");
   5995 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_header_failed",
   5996 			CTLFLAG_RD, &adapter->mbuf_header_failed,
   5997 			"???");
   5998 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_packet_failed",
   5999 			CTLFLAG_RD, &adapter->mbuf_packet_failed,
   6000 			"???");
   6001 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "no_tx_map_avail",
   6002 			CTLFLAG_RD, &adapter->no_tx_map_avail,
   6003 			"???");
   6004 #endif
   6005 	evcnt_attach_dynamic(&adapter->handleq, EVCNT_TYPE_MISC,
   6006 	    NULL, device_xname(dev), "Handled queue in softint");
   6007 	evcnt_attach_dynamic(&adapter->req, EVCNT_TYPE_MISC,
   6008 	    NULL, device_xname(dev), "Requeued in softint");
   6009 	evcnt_attach_dynamic(&adapter->morerx, EVCNT_TYPE_MISC,
   6010 	    NULL, device_xname(dev), "Interrupt handler more rx");
   6011 	evcnt_attach_dynamic(&adapter->moretx, EVCNT_TYPE_MISC,
   6012 	    NULL, device_xname(dev), "Interrupt handler more tx");
   6013 	evcnt_attach_dynamic(&adapter->txloops, EVCNT_TYPE_MISC,
   6014 	    NULL, device_xname(dev), "Interrupt handler tx loops");
   6015 	evcnt_attach_dynamic(&adapter->efbig_tx_dma_setup, EVCNT_TYPE_MISC,
   6016 	    NULL, device_xname(dev), "Driver tx dma soft fail EFBIG");
   6017 	evcnt_attach_dynamic(&adapter->m_defrag_failed, EVCNT_TYPE_MISC,
   6018 	    NULL, device_xname(dev), "m_defrag() failed");
   6019 	evcnt_attach_dynamic(&adapter->efbig2_tx_dma_setup, EVCNT_TYPE_MISC,
   6020 	    NULL, device_xname(dev), "Driver tx dma hard fail EFBIG");
   6021 	evcnt_attach_dynamic(&adapter->einval_tx_dma_setup, EVCNT_TYPE_MISC,
   6022 	    NULL, device_xname(dev), "Driver tx dma hard fail EINVAL");
   6023 	evcnt_attach_dynamic(&adapter->other_tx_dma_setup, EVCNT_TYPE_MISC,
   6024 	    NULL, device_xname(dev), "Driver tx dma hard fail other");
   6025 	evcnt_attach_dynamic(&adapter->eagain_tx_dma_setup, EVCNT_TYPE_MISC,
   6026 	    NULL, device_xname(dev), "Driver tx dma soft fail EAGAIN");
   6027 	evcnt_attach_dynamic(&adapter->enomem_tx_dma_setup, EVCNT_TYPE_MISC,
   6028 	    NULL, device_xname(dev), "Driver tx dma soft fail ENOMEM");
   6029 	evcnt_attach_dynamic(&adapter->watchdog_events, EVCNT_TYPE_MISC,
   6030 	    NULL, device_xname(dev), "Watchdog timeouts");
   6031 	evcnt_attach_dynamic(&adapter->tso_err, EVCNT_TYPE_MISC,
   6032 	    NULL, device_xname(dev), "TSO errors");
   6033 	evcnt_attach_dynamic(&adapter->link_irq, EVCNT_TYPE_MISC,
   6034 	    NULL, device_xname(dev), "Link MSIX IRQ Handled");
   6035 
   6036 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
   6037 		snprintf(adapter->queues[i].evnamebuf,
   6038 		    sizeof(adapter->queues[i].evnamebuf), "%s queue%d",
   6039 		    device_xname(dev), i);
   6040 		snprintf(adapter->queues[i].namebuf,
   6041 		    sizeof(adapter->queues[i].namebuf), "queue%d", i);
   6042 
   6043 		if ((rnode = ixgbe_sysctl_instance(adapter)) == NULL) {
   6044 			aprint_error_dev(dev, "could not create sysctl root\n");
   6045 			break;
   6046 		}
   6047 
   6048 		if (sysctl_createv(log, 0, &rnode, &rnode,
   6049 		    0, CTLTYPE_NODE,
   6050 		    adapter->queues[i].namebuf, SYSCTL_DESCR("Queue Name"),
   6051 		    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL) != 0)
   6052 			break;
   6053 
   6054 		if (sysctl_createv(log, 0, &rnode, &cnode,
   6055 		    CTLFLAG_READWRITE, CTLTYPE_INT,
   6056 		    "interrupt_rate", SYSCTL_DESCR("Interrupt Rate"),
   6057 		    ixgbe_sysctl_interrupt_rate_handler, 0,
   6058 		    (void *)&adapter->queues[i], 0, CTL_CREATE, CTL_EOL) != 0)
   6059 			break;
   6060 
   6061 		if (sysctl_createv(log, 0, &rnode, &cnode,
   6062 		    CTLFLAG_READONLY, CTLTYPE_QUAD,
   6063 		    "irqs", SYSCTL_DESCR("irqs on this queue"),
   6064 			NULL, 0, &(adapter->queues[i].irqs),
   6065 		    0, CTL_CREATE, CTL_EOL) != 0)
   6066 			break;
   6067 
   6068 		if (sysctl_createv(log, 0, &rnode, &cnode,
   6069 		    CTLFLAG_READONLY, CTLTYPE_INT,
   6070 		    "txd_head", SYSCTL_DESCR("Transmit Descriptor Head"),
   6071 		    ixgbe_sysctl_tdh_handler, 0, (void *)txr,
   6072 		    0, CTL_CREATE, CTL_EOL) != 0)
   6073 			break;
   6074 
   6075 		if (sysctl_createv(log, 0, &rnode, &cnode,
   6076 		    CTLFLAG_READONLY, CTLTYPE_INT,
   6077 		    "txd_tail", SYSCTL_DESCR("Transmit Descriptor Tail"),
   6078 		    ixgbe_sysctl_tdt_handler, 0, (void *)txr,
   6079 		    0, CTL_CREATE, CTL_EOL) != 0)
   6080 			break;
   6081 
   6082 		evcnt_attach_dynamic(&txr->tso_tx, EVCNT_TYPE_MISC,
   6083 		    NULL, device_xname(dev), "TSO");
   6084 		evcnt_attach_dynamic(&txr->no_desc_avail, EVCNT_TYPE_MISC,
   6085 		    NULL, adapter->queues[i].evnamebuf,
   6086 		    "Queue No Descriptor Available");
   6087 		evcnt_attach_dynamic(&txr->total_packets, EVCNT_TYPE_MISC,
   6088 		    NULL, adapter->queues[i].evnamebuf,
   6089 		    "Queue Packets Transmitted");
   6090 
   6091 #ifdef LRO
   6092 		struct lro_ctrl *lro = &rxr->lro;
   6093 #endif /* LRO */
   6094 
   6095 		if (sysctl_createv(log, 0, &rnode, &cnode,
   6096 		    CTLFLAG_READONLY,
   6097 		    CTLTYPE_INT,
   6098 		    "rxd_head", SYSCTL_DESCR("Receive Descriptor Head"),
   6099 		    ixgbe_sysctl_rdh_handler, 0, (void *)rxr, 0,
   6100 		    CTL_CREATE, CTL_EOL) != 0)
   6101 			break;
   6102 
   6103 		if (sysctl_createv(log, 0, &rnode, &cnode,
   6104 		    CTLFLAG_READONLY,
   6105 		    CTLTYPE_INT,
   6106 		    "rxd_tail", SYSCTL_DESCR("Receive Descriptor Tail"),
   6107 		    ixgbe_sysctl_rdt_handler, 0, (void *)rxr, 0,
   6108 		    CTL_CREATE, CTL_EOL) != 0)
   6109 			break;
   6110 
   6111 		if (i < __arraycount(adapter->stats.mpc)) {
   6112 			evcnt_attach_dynamic(&adapter->stats.mpc[i],
   6113 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6114 			    "Missed Packet Count");
   6115 		}
   6116 		if (i < __arraycount(adapter->stats.pxontxc)) {
   6117 			evcnt_attach_dynamic(&adapter->stats.pxontxc[i],
   6118 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6119 			    "pxontxc");
   6120 			evcnt_attach_dynamic(&adapter->stats.pxonrxc[i],
   6121 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6122 			    "pxonrxc");
   6123 			evcnt_attach_dynamic(&adapter->stats.pxofftxc[i],
   6124 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6125 			    "pxofftxc");
   6126 			evcnt_attach_dynamic(&adapter->stats.pxoffrxc[i],
   6127 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6128 			    "pxoffrxc");
   6129 			evcnt_attach_dynamic(&adapter->stats.pxon2offc[i],
   6130 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6131 			    "pxon2offc");
   6132 		}
   6133 		if (i < __arraycount(adapter->stats.qprc)) {
   6134 			evcnt_attach_dynamic(&adapter->stats.qprc[i],
   6135 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6136 			    "qprc");
   6137 			evcnt_attach_dynamic(&adapter->stats.qptc[i],
   6138 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6139 			    "qptc");
   6140 			evcnt_attach_dynamic(&adapter->stats.qbrc[i],
   6141 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6142 			    "qbrc");
   6143 			evcnt_attach_dynamic(&adapter->stats.qbtc[i],
   6144 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6145 			    "qbtc");
   6146 			evcnt_attach_dynamic(&adapter->stats.qprdc[i],
   6147 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   6148 			    "qprdc");
   6149 		}
   6150 
   6151 		evcnt_attach_dynamic(&rxr->rx_packets, EVCNT_TYPE_MISC,
   6152 		    NULL, adapter->queues[i].evnamebuf, "Queue Packets Received");
   6153 		evcnt_attach_dynamic(&rxr->rx_bytes, EVCNT_TYPE_MISC,
   6154 		    NULL, adapter->queues[i].evnamebuf, "Queue Bytes Received");
   6155 		evcnt_attach_dynamic(&rxr->rx_copies, EVCNT_TYPE_MISC,
   6156 		    NULL, adapter->queues[i].evnamebuf, "Copied RX Frames");
   6157 		evcnt_attach_dynamic(&rxr->no_jmbuf, EVCNT_TYPE_MISC,
   6158 		    NULL, adapter->queues[i].evnamebuf, "Rx no jumbo mbuf");
   6159 		evcnt_attach_dynamic(&rxr->rx_discarded, EVCNT_TYPE_MISC,
   6160 		    NULL, adapter->queues[i].evnamebuf, "Rx discarded");
   6161 		evcnt_attach_dynamic(&rxr->rx_irq, EVCNT_TYPE_MISC,
   6162 		    NULL, adapter->queues[i].evnamebuf, "Rx interrupts");
   6163 #ifdef LRO
   6164 		SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_queued",
   6165 				CTLFLAG_RD, &lro->lro_queued, 0,
   6166 				"LRO Queued");
   6167 		SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_flushed",
   6168 				CTLFLAG_RD, &lro->lro_flushed, 0,
   6169 				"LRO Flushed");
   6170 #endif /* LRO */
   6171 	}
   6172 
   6173 	/* MAC stats get the own sub node */
   6174 
   6175 
   6176 	snprintf(stats->namebuf,
   6177 	    sizeof(stats->namebuf), "%s MAC Statistics", device_xname(dev));
   6178 
   6179 	evcnt_attach_dynamic(&stats->ipcs, EVCNT_TYPE_MISC, NULL,
   6180 	    stats->namebuf, "rx csum offload - IP");
   6181 	evcnt_attach_dynamic(&stats->l4cs, EVCNT_TYPE_MISC, NULL,
   6182 	    stats->namebuf, "rx csum offload - L4");
   6183 	evcnt_attach_dynamic(&stats->ipcs_bad, EVCNT_TYPE_MISC, NULL,
   6184 	    stats->namebuf, "rx csum offload - IP bad");
   6185 	evcnt_attach_dynamic(&stats->l4cs_bad, EVCNT_TYPE_MISC, NULL,
   6186 	    stats->namebuf, "rx csum offload - L4 bad");
   6187 	evcnt_attach_dynamic(&stats->intzero, EVCNT_TYPE_MISC, NULL,
   6188 	    stats->namebuf, "Interrupt conditions zero");
   6189 	evcnt_attach_dynamic(&stats->legint, EVCNT_TYPE_MISC, NULL,
   6190 	    stats->namebuf, "Legacy interrupts");
   6191 	evcnt_attach_dynamic(&stats->crcerrs, EVCNT_TYPE_MISC, NULL,
   6192 	    stats->namebuf, "CRC Errors");
   6193 	evcnt_attach_dynamic(&stats->illerrc, EVCNT_TYPE_MISC, NULL,
   6194 	    stats->namebuf, "Illegal Byte Errors");
   6195 	evcnt_attach_dynamic(&stats->errbc, EVCNT_TYPE_MISC, NULL,
   6196 	    stats->namebuf, "Byte Errors");
   6197 	evcnt_attach_dynamic(&stats->mspdc, EVCNT_TYPE_MISC, NULL,
   6198 	    stats->namebuf, "MAC Short Packets Discarded");
   6199 	evcnt_attach_dynamic(&stats->mlfc, EVCNT_TYPE_MISC, NULL,
   6200 	    stats->namebuf, "MAC Local Faults");
   6201 	evcnt_attach_dynamic(&stats->mrfc, EVCNT_TYPE_MISC, NULL,
   6202 	    stats->namebuf, "MAC Remote Faults");
   6203 	evcnt_attach_dynamic(&stats->rlec, EVCNT_TYPE_MISC, NULL,
   6204 	    stats->namebuf, "Receive Length Errors");
   6205 	evcnt_attach_dynamic(&stats->lxontxc, EVCNT_TYPE_MISC, NULL,
   6206 	    stats->namebuf, "Link XON Transmitted");
   6207 	evcnt_attach_dynamic(&stats->lxonrxc, EVCNT_TYPE_MISC, NULL,
   6208 	    stats->namebuf, "Link XON Received");
   6209 	evcnt_attach_dynamic(&stats->lxofftxc, EVCNT_TYPE_MISC, NULL,
   6210 	    stats->namebuf, "Link XOFF Transmitted");
   6211 	evcnt_attach_dynamic(&stats->lxoffrxc, EVCNT_TYPE_MISC, NULL,
   6212 	    stats->namebuf, "Link XOFF Received");
   6213 
   6214 	/* Packet Reception Stats */
   6215 	evcnt_attach_dynamic(&stats->tor, EVCNT_TYPE_MISC, NULL,
   6216 	    stats->namebuf, "Total Octets Received");
   6217 	evcnt_attach_dynamic(&stats->gorc, EVCNT_TYPE_MISC, NULL,
   6218 	    stats->namebuf, "Good Octets Received");
   6219 	evcnt_attach_dynamic(&stats->tpr, EVCNT_TYPE_MISC, NULL,
   6220 	    stats->namebuf, "Total Packets Received");
   6221 	evcnt_attach_dynamic(&stats->gprc, EVCNT_TYPE_MISC, NULL,
   6222 	    stats->namebuf, "Good Packets Received");
   6223 	evcnt_attach_dynamic(&stats->mprc, EVCNT_TYPE_MISC, NULL,
   6224 	    stats->namebuf, "Multicast Packets Received");
   6225 	evcnt_attach_dynamic(&stats->bprc, EVCNT_TYPE_MISC, NULL,
   6226 	    stats->namebuf, "Broadcast Packets Received");
   6227 	evcnt_attach_dynamic(&stats->prc64, EVCNT_TYPE_MISC, NULL,
   6228 	    stats->namebuf, "64 byte frames received ");
   6229 	evcnt_attach_dynamic(&stats->prc127, EVCNT_TYPE_MISC, NULL,
   6230 	    stats->namebuf, "65-127 byte frames received");
   6231 	evcnt_attach_dynamic(&stats->prc255, EVCNT_TYPE_MISC, NULL,
   6232 	    stats->namebuf, "128-255 byte frames received");
   6233 	evcnt_attach_dynamic(&stats->prc511, EVCNT_TYPE_MISC, NULL,
   6234 	    stats->namebuf, "256-511 byte frames received");
   6235 	evcnt_attach_dynamic(&stats->prc1023, EVCNT_TYPE_MISC, NULL,
   6236 	    stats->namebuf, "512-1023 byte frames received");
   6237 	evcnt_attach_dynamic(&stats->prc1522, EVCNT_TYPE_MISC, NULL,
   6238 	    stats->namebuf, "1023-1522 byte frames received");
   6239 	evcnt_attach_dynamic(&stats->ruc, EVCNT_TYPE_MISC, NULL,
   6240 	    stats->namebuf, "Receive Undersized");
   6241 	evcnt_attach_dynamic(&stats->rfc, EVCNT_TYPE_MISC, NULL,
   6242 	    stats->namebuf, "Fragmented Packets Received ");
   6243 	evcnt_attach_dynamic(&stats->roc, EVCNT_TYPE_MISC, NULL,
   6244 	    stats->namebuf, "Oversized Packets Received");
   6245 	evcnt_attach_dynamic(&stats->rjc, EVCNT_TYPE_MISC, NULL,
   6246 	    stats->namebuf, "Received Jabber");
   6247 	evcnt_attach_dynamic(&stats->mngprc, EVCNT_TYPE_MISC, NULL,
   6248 	    stats->namebuf, "Management Packets Received");
   6249 	evcnt_attach_dynamic(&stats->xec, EVCNT_TYPE_MISC, NULL,
   6250 	    stats->namebuf, "Checksum Errors");
   6251 
   6252 	/* Packet Transmission Stats */
   6253 	evcnt_attach_dynamic(&stats->gotc, EVCNT_TYPE_MISC, NULL,
   6254 	    stats->namebuf, "Good Octets Transmitted");
   6255 	evcnt_attach_dynamic(&stats->tpt, EVCNT_TYPE_MISC, NULL,
   6256 	    stats->namebuf, "Total Packets Transmitted");
   6257 	evcnt_attach_dynamic(&stats->gptc, EVCNT_TYPE_MISC, NULL,
   6258 	    stats->namebuf, "Good Packets Transmitted");
   6259 	evcnt_attach_dynamic(&stats->bptc, EVCNT_TYPE_MISC, NULL,
   6260 	    stats->namebuf, "Broadcast Packets Transmitted");
   6261 	evcnt_attach_dynamic(&stats->mptc, EVCNT_TYPE_MISC, NULL,
   6262 	    stats->namebuf, "Multicast Packets Transmitted");
   6263 	evcnt_attach_dynamic(&stats->mngptc, EVCNT_TYPE_MISC, NULL,
   6264 	    stats->namebuf, "Management Packets Transmitted");
   6265 	evcnt_attach_dynamic(&stats->ptc64, EVCNT_TYPE_MISC, NULL,
   6266 	    stats->namebuf, "64 byte frames transmitted ");
   6267 	evcnt_attach_dynamic(&stats->ptc127, EVCNT_TYPE_MISC, NULL,
   6268 	    stats->namebuf, "65-127 byte frames transmitted");
   6269 	evcnt_attach_dynamic(&stats->ptc255, EVCNT_TYPE_MISC, NULL,
   6270 	    stats->namebuf, "128-255 byte frames transmitted");
   6271 	evcnt_attach_dynamic(&stats->ptc511, EVCNT_TYPE_MISC, NULL,
   6272 	    stats->namebuf, "256-511 byte frames transmitted");
   6273 	evcnt_attach_dynamic(&stats->ptc1023, EVCNT_TYPE_MISC, NULL,
   6274 	    stats->namebuf, "512-1023 byte frames transmitted");
   6275 	evcnt_attach_dynamic(&stats->ptc1522, EVCNT_TYPE_MISC, NULL,
   6276 	    stats->namebuf, "1024-1522 byte frames transmitted");
   6277 }
   6278 
   6279 /*
   6280 ** Set flow control using sysctl:
   6281 ** Flow control values:
   6282 ** 	0 - off
   6283 **	1 - rx pause
   6284 **	2 - tx pause
   6285 **	3 - full
   6286 */
   6287 static int
   6288 ixgbe_set_flowcntl(SYSCTLFN_ARGS)
   6289 {
   6290 	struct sysctlnode node;
   6291 	int error, last;
   6292 	struct adapter *adapter;
   6293 
   6294 	node = *rnode;
   6295 	adapter = (struct adapter *)node.sysctl_data;
   6296 	node.sysctl_data = &adapter->fc;
   6297 	last = adapter->fc;
   6298 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   6299 	if (error != 0 || newp == NULL)
   6300 		return error;
   6301 
   6302 	/* Don't bother if it's not changed */
   6303 	if (adapter->fc == last)
   6304 		return (0);
   6305 
   6306 	switch (adapter->fc) {
   6307 		case ixgbe_fc_rx_pause:
   6308 		case ixgbe_fc_tx_pause:
   6309 		case ixgbe_fc_full:
   6310 			adapter->hw.fc.requested_mode = adapter->fc;
   6311 			if (adapter->num_queues > 1)
   6312 				ixgbe_disable_rx_drop(adapter);
   6313 			break;
   6314 		case ixgbe_fc_none:
   6315 			adapter->hw.fc.requested_mode = ixgbe_fc_none;
   6316 			if (adapter->num_queues > 1)
   6317 				ixgbe_enable_rx_drop(adapter);
   6318 			break;
   6319 		default:
   6320 			adapter->fc = last;
   6321 			return (EINVAL);
   6322 	}
   6323 	/* Don't autoneg if forcing a value */
   6324 	adapter->hw.fc.disable_fc_autoneg = TRUE;
   6325 	ixgbe_fc_enable(&adapter->hw);
   6326 	return 0;
   6327 }
   6328 
   6329 
   6330 /*
   6331 ** Control link advertise speed:
   6332 **	1 - advertise only 1G
   6333 **	2 - advertise 100Mb
   6334 **	3 - advertise normal
   6335 */
   6336 static int
   6337 ixgbe_set_advertise(SYSCTLFN_ARGS)
   6338 {
   6339 	struct sysctlnode	node;
   6340 	int			t, error = 0;
   6341 	struct adapter		*adapter;
   6342 	device_t		dev;
   6343 	struct ixgbe_hw		*hw;
   6344 	ixgbe_link_speed	speed, last;
   6345 
   6346 	node = *rnode;
   6347 	adapter = (struct adapter *)node.sysctl_data;
   6348 	dev = adapter->dev;
   6349 	hw = &adapter->hw;
   6350 	last = adapter->advertise;
   6351 	t = adapter->advertise;
   6352 	node.sysctl_data = &t;
   6353 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   6354 	if (error != 0 || newp == NULL)
   6355 		return error;
   6356 
   6357 	if (adapter->advertise == last) /* no change */
   6358 		return (0);
   6359 
   6360 	if (t == -1)
   6361 		return 0;
   6362 
   6363 	adapter->advertise = t;
   6364 
   6365 	if (!((hw->phy.media_type == ixgbe_media_type_copper) ||
   6366             (hw->phy.multispeed_fiber)))
   6367 		return (EINVAL);
   6368 
   6369 	if ((adapter->advertise == 2) && (hw->mac.type != ixgbe_mac_X540)) {
   6370 		device_printf(dev, "Set Advertise: 100Mb on X540 only\n");
   6371 		return (EINVAL);
   6372 	}
   6373 
   6374 	if (adapter->advertise == 1)
   6375                 speed = IXGBE_LINK_SPEED_1GB_FULL;
   6376 	else if (adapter->advertise == 2)
   6377                 speed = IXGBE_LINK_SPEED_100_FULL;
   6378 	else if (adapter->advertise == 3)
   6379                 speed = IXGBE_LINK_SPEED_1GB_FULL |
   6380 			IXGBE_LINK_SPEED_10GB_FULL;
   6381 	else {/* bogus value */
   6382 		adapter->advertise = last;
   6383 		return (EINVAL);
   6384 	}
   6385 
   6386 	hw->mac.autotry_restart = TRUE;
   6387 	hw->mac.ops.setup_link(hw, speed, TRUE);
   6388 
   6389 	return 0;
   6390 }
   6391 
   6392 /*
   6393 ** Thermal Shutdown Trigger
   6394 **   - cause a Thermal Overtemp IRQ
   6395 **   - this now requires firmware enabling
   6396 */
   6397 static int
   6398 ixgbe_set_thermal_test(SYSCTLFN_ARGS)
   6399 {
   6400 	struct sysctlnode node;
   6401 	int		error, fire = 0;
   6402 	struct adapter	*adapter;
   6403 	struct ixgbe_hw *hw;
   6404 
   6405 	node = *rnode;
   6406 	adapter = (struct adapter *)node.sysctl_data;
   6407 	hw = &adapter->hw;
   6408 
   6409 	if (hw->mac.type != ixgbe_mac_X540)
   6410 		return (0);
   6411 
   6412 	node.sysctl_data = &fire;
   6413 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   6414 	if ((error) || (newp == NULL))
   6415 		return (error);
   6416 
   6417 	if (fire) {
   6418 		u32 reg = IXGBE_READ_REG(hw, IXGBE_EICS);
   6419 		reg |= IXGBE_EICR_TS;
   6420 		IXGBE_WRITE_REG(hw, IXGBE_EICS, reg);
   6421 	}
   6422 
   6423 	return (0);
   6424 }
   6425 
   6426 /*
   6427 ** Enable the hardware to drop packets when the buffer is
   6428 ** full. This is useful when multiqueue,so that no single
   6429 ** queue being full stalls the entire RX engine. We only
   6430 ** enable this when Multiqueue AND when Flow Control is
   6431 ** disabled.
   6432 */
   6433 static void
   6434 ixgbe_enable_rx_drop(struct adapter *adapter)
   6435 {
   6436         struct ixgbe_hw *hw = &adapter->hw;
   6437 
   6438 	for (int i = 0; i < adapter->num_queues; i++) {
   6439         	u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
   6440         	srrctl |= IXGBE_SRRCTL_DROP_EN;
   6441         	IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
   6442 	}
   6443 }
   6444 
   6445 static void
   6446 ixgbe_disable_rx_drop(struct adapter *adapter)
   6447 {
   6448         struct ixgbe_hw *hw = &adapter->hw;
   6449 
   6450 	for (int i = 0; i < adapter->num_queues; i++) {
   6451         	u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
   6452         	srrctl &= ~IXGBE_SRRCTL_DROP_EN;
   6453         	IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
   6454 	}
   6455 }
   6456