Home | History | Annotate | Line # | Download | only in ixgbe
ixgbe.c revision 1.31
      1 /******************************************************************************
      2 
      3   Copyright (c) 2001-2013, Intel Corporation
      4   All rights reserved.
      5 
      6   Redistribution and use in source and binary forms, with or without
      7   modification, are permitted provided that the following conditions are met:
      8 
      9    1. Redistributions of source code must retain the above copyright notice,
     10       this list of conditions and the following disclaimer.
     11 
     12    2. Redistributions in binary form must reproduce the above copyright
     13       notice, this list of conditions and the following disclaimer in the
     14       documentation and/or other materials provided with the distribution.
     15 
     16    3. Neither the name of the Intel Corporation nor the names of its
     17       contributors may be used to endorse or promote products derived from
     18       this software without specific prior written permission.
     19 
     20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     30   POSSIBILITY OF SUCH DAMAGE.
     31 
     32 ******************************************************************************/
     33 /*
     34  * Copyright (c) 2011 The NetBSD Foundation, Inc.
     35  * All rights reserved.
     36  *
     37  * This code is derived from software contributed to The NetBSD Foundation
     38  * by Coyote Point Systems, Inc.
     39  *
     40  * Redistribution and use in source and binary forms, with or without
     41  * modification, are permitted provided that the following conditions
     42  * are met:
     43  * 1. Redistributions of source code must retain the above copyright
     44  *    notice, this list of conditions and the following disclaimer.
     45  * 2. Redistributions in binary form must reproduce the above copyright
     46  *    notice, this list of conditions and the following disclaimer in the
     47  *    documentation and/or other materials provided with the distribution.
     48  *
     49  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     50  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     51  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     52  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     53  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     54  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     55  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     56  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     57  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     58  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     59  * POSSIBILITY OF SUCH DAMAGE.
     60  */
     61 /*$FreeBSD: head/sys/dev/ixgbe/ixgbe.c 250108 2013-04-30 16:18:29Z luigi $*/
     62 /*$NetBSD: ixgbe.c,v 1.31 2015/08/03 05:43:01 msaitoh Exp $*/
     63 
     64 #include "opt_inet.h"
     65 #include "opt_inet6.h"
     66 
     67 #include "ixgbe.h"
     68 #include "vlan.h"
     69 
     70 /*********************************************************************
     71  *  Set this to one to display debug statistics
     72  *********************************************************************/
     73 int             ixgbe_display_debug_stats = 0;
     74 
     75 /*********************************************************************
     76  *  Driver version
     77  *********************************************************************/
     78 char ixgbe_driver_version[] = "2.5.8 - HEAD";
     79 
     80 /*********************************************************************
     81  *  PCI Device ID Table
     82  *
     83  *  Used by probe to select devices to load on
     84  *  Last field stores an index into ixgbe_strings
     85  *  Last entry must be all 0s
     86  *
     87  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
     88  *********************************************************************/
     89 
     90 static ixgbe_vendor_info_t ixgbe_vendor_info_array[] =
     91 {
     92 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, 0, 0, 0},
     93 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, 0, 0, 0},
     94 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, 0, 0, 0},
     95 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, 0, 0, 0},
     96 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2, 0, 0, 0},
     97 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, 0, 0, 0},
     98 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, 0, 0, 0},
     99 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, 0, 0, 0},
    100 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, 0, 0, 0},
    101 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, 0, 0, 0},
    102 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, 0, 0, 0},
    103 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, 0, 0, 0},
    104 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ, 0, 0, 0},
    105 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, 0, 0, 0},
    106 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, 0, 0, 0},
    107 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, 0, 0, 0},
    108 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, 0, 0, 0},
    109 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, 0, 0, 0},
    110 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE, 0, 0, 0},
    111 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2, 0, 0, 0},
    112 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, 0, 0, 0},
    113 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, 0, 0, 0},
    114 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF_QP, 0, 0, 0},
    115 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, 0, 0, 0},
    116 	/* required last entry */
    117 	{0, 0, 0, 0, 0}
    118 };
    119 
    120 /*********************************************************************
    121  *  Table of branding strings
    122  *********************************************************************/
    123 
    124 static const char    *ixgbe_strings[] = {
    125 	"Intel(R) PRO/10GbE PCI-Express Network Driver"
    126 };
    127 
    128 /*********************************************************************
    129  *  Function prototypes
    130  *********************************************************************/
    131 static int      ixgbe_probe(device_t, cfdata_t, void *);
    132 static void     ixgbe_attach(device_t, device_t, void *);
    133 static int      ixgbe_detach(device_t, int);
    134 #if 0
    135 static int      ixgbe_shutdown(device_t);
    136 #endif
    137 #if IXGBE_LEGACY_TX
    138 static void     ixgbe_start(struct ifnet *);
    139 static void     ixgbe_start_locked(struct tx_ring *, struct ifnet *);
    140 #else
    141 static int	ixgbe_mq_start(struct ifnet *, struct mbuf *);
    142 static int	ixgbe_mq_start_locked(struct ifnet *,
    143                     struct tx_ring *, struct mbuf *);
    144 static void	ixgbe_qflush(struct ifnet *);
    145 static void	ixgbe_deferred_mq_start(void *);
    146 #endif
    147 static int      ixgbe_ioctl(struct ifnet *, u_long, void *);
    148 static void	ixgbe_ifstop(struct ifnet *, int);
    149 static int	ixgbe_init(struct ifnet *);
    150 static void	ixgbe_init_locked(struct adapter *);
    151 static void     ixgbe_stop(void *);
    152 static void     ixgbe_media_status(struct ifnet *, struct ifmediareq *);
    153 static int      ixgbe_media_change(struct ifnet *);
    154 static void     ixgbe_identify_hardware(struct adapter *);
    155 static int      ixgbe_allocate_pci_resources(struct adapter *,
    156 		    const struct pci_attach_args *);
    157 static int      ixgbe_allocate_msix(struct adapter *,
    158 		    const struct pci_attach_args *);
    159 static int      ixgbe_allocate_legacy(struct adapter *,
    160 		    const struct pci_attach_args *);
    161 static int	ixgbe_allocate_queues(struct adapter *);
    162 static int	ixgbe_setup_msix(struct adapter *);
    163 static void	ixgbe_free_pci_resources(struct adapter *);
    164 static void	ixgbe_local_timer(void *);
    165 static int	ixgbe_setup_interface(device_t, struct adapter *);
    166 static void	ixgbe_config_link(struct adapter *);
    167 
    168 static int      ixgbe_allocate_transmit_buffers(struct tx_ring *);
    169 static int	ixgbe_setup_transmit_structures(struct adapter *);
    170 static void	ixgbe_setup_transmit_ring(struct tx_ring *);
    171 static void     ixgbe_initialize_transmit_units(struct adapter *);
    172 static void     ixgbe_free_transmit_structures(struct adapter *);
    173 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
    174 
    175 static int      ixgbe_allocate_receive_buffers(struct rx_ring *);
    176 static int      ixgbe_setup_receive_structures(struct adapter *);
    177 static int	ixgbe_setup_receive_ring(struct rx_ring *);
    178 static void     ixgbe_initialize_receive_units(struct adapter *);
    179 static void     ixgbe_free_receive_structures(struct adapter *);
    180 static void     ixgbe_free_receive_buffers(struct rx_ring *);
    181 static void	ixgbe_setup_hw_rsc(struct rx_ring *);
    182 
    183 static void     ixgbe_enable_intr(struct adapter *);
    184 static void     ixgbe_disable_intr(struct adapter *);
    185 static void     ixgbe_update_stats_counters(struct adapter *);
    186 static bool	ixgbe_txeof(struct tx_ring *);
    187 static bool	ixgbe_rxeof(struct ix_queue *);
    188 static void	ixgbe_rx_checksum(u32, struct mbuf *, u32,
    189 		    struct ixgbe_hw_stats *);
    190 static void     ixgbe_set_promisc(struct adapter *);
    191 static void     ixgbe_set_multi(struct adapter *);
    192 static void     ixgbe_update_link_status(struct adapter *);
    193 static void	ixgbe_refresh_mbufs(struct rx_ring *, int);
    194 static int      ixgbe_xmit(struct tx_ring *, struct mbuf *);
    195 static int	ixgbe_set_flowcntl(SYSCTLFN_PROTO);
    196 static int	ixgbe_set_advertise(SYSCTLFN_PROTO);
    197 static int	ixgbe_set_thermal_test(SYSCTLFN_PROTO);
    198 static int	ixgbe_dma_malloc(struct adapter *, bus_size_t,
    199 		    struct ixgbe_dma_alloc *, int);
    200 static void     ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
    201 static int	ixgbe_tx_ctx_setup(struct tx_ring *,
    202 		    struct mbuf *, u32 *, u32 *);
    203 static int	ixgbe_tso_setup(struct tx_ring *,
    204 		    struct mbuf *, u32 *, u32 *);
    205 static void	ixgbe_set_ivar(struct adapter *, u8, u8, s8);
    206 static void	ixgbe_configure_ivars(struct adapter *);
    207 static u8 *	ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *);
    208 
    209 static void	ixgbe_setup_vlan_hw_support(struct adapter *);
    210 #if 0
    211 static void	ixgbe_register_vlan(void *, struct ifnet *, u16);
    212 static void	ixgbe_unregister_vlan(void *, struct ifnet *, u16);
    213 #endif
    214 
    215 static void     ixgbe_add_hw_stats(struct adapter *adapter);
    216 
    217 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
    218 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
    219 		    struct mbuf *, u32);
    220 
    221 static void	ixgbe_enable_rx_drop(struct adapter *);
    222 static void	ixgbe_disable_rx_drop(struct adapter *);
    223 
    224 /* Support for pluggable optic modules */
    225 static bool	ixgbe_sfp_probe(struct adapter *);
    226 static void	ixgbe_setup_optics(struct adapter *);
    227 
    228 /* Legacy (single vector interrupt handler */
    229 static int	ixgbe_legacy_irq(void *);
    230 
    231 #if defined(NETBSD_MSI_OR_MSIX)
    232 /* The MSI/X Interrupt handlers */
    233 static void	ixgbe_msix_que(void *);
    234 static void	ixgbe_msix_link(void *);
    235 #endif
    236 
    237 /* Software interrupts for deferred work */
    238 static void	ixgbe_handle_que(void *);
    239 static void	ixgbe_handle_link(void *);
    240 static void	ixgbe_handle_msf(void *);
    241 static void	ixgbe_handle_mod(void *);
    242 
    243 const struct sysctlnode *ixgbe_sysctl_instance(struct adapter *);
    244 static ixgbe_vendor_info_t *ixgbe_lookup(const struct pci_attach_args *);
    245 
    246 #ifdef IXGBE_FDIR
    247 static void	ixgbe_atr(struct tx_ring *, struct mbuf *);
    248 static void	ixgbe_reinit_fdir(void *, int);
    249 #endif
    250 
    251 /*********************************************************************
    252  *  FreeBSD Device Interface Entry Points
    253  *********************************************************************/
    254 
    255 CFATTACH_DECL3_NEW(ixg, sizeof(struct adapter),
    256     ixgbe_probe, ixgbe_attach, ixgbe_detach, NULL, NULL, NULL,
    257     DVF_DETACH_SHUTDOWN);
    258 
    259 #if 0
    260 devclass_t ixgbe_devclass;
    261 DRIVER_MODULE(ixgbe, pci, ixgbe_driver, ixgbe_devclass, 0, 0);
    262 
    263 MODULE_DEPEND(ixgbe, pci, 1, 1, 1);
    264 MODULE_DEPEND(ixgbe, ether, 1, 1, 1);
    265 #endif
    266 
    267 /*
    268 ** TUNEABLE PARAMETERS:
    269 */
    270 
    271 /*
    272 ** AIM: Adaptive Interrupt Moderation
    273 ** which means that the interrupt rate
    274 ** is varied over time based on the
    275 ** traffic for that interrupt vector
    276 */
    277 static int ixgbe_enable_aim = TRUE;
    278 #define TUNABLE_INT(__x, __y)
    279 TUNABLE_INT("hw.ixgbe.enable_aim", &ixgbe_enable_aim);
    280 
    281 static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY);
    282 TUNABLE_INT("hw.ixgbe.max_interrupt_rate", &ixgbe_max_interrupt_rate);
    283 
    284 /* How many packets rxeof tries to clean at a time */
    285 static int ixgbe_rx_process_limit = 256;
    286 TUNABLE_INT("hw.ixgbe.rx_process_limit", &ixgbe_rx_process_limit);
    287 
    288 /* How many packets txeof tries to clean at a time */
    289 static int ixgbe_tx_process_limit = 256;
    290 TUNABLE_INT("hw.ixgbe.tx_process_limit", &ixgbe_tx_process_limit);
    291 
    292 /*
    293 ** Smart speed setting, default to on
    294 ** this only works as a compile option
    295 ** right now as its during attach, set
    296 ** this to 'ixgbe_smart_speed_off' to
    297 ** disable.
    298 */
    299 static int ixgbe_smart_speed = ixgbe_smart_speed_on;
    300 
    301 /*
    302  * MSIX should be the default for best performance,
    303  * but this allows it to be forced off for testing.
    304  */
    305 static int ixgbe_enable_msix = 1;
    306 TUNABLE_INT("hw.ixgbe.enable_msix", &ixgbe_enable_msix);
    307 
    308 #if defined(NETBSD_MSI_OR_MSIX)
    309 /*
    310  * Number of Queues, can be set to 0,
    311  * it then autoconfigures based on the
    312  * number of cpus with a max of 8. This
    313  * can be overriden manually here.
    314  */
    315 static int ixgbe_num_queues = 0;
    316 TUNABLE_INT("hw.ixgbe.num_queues", &ixgbe_num_queues);
    317 #endif
    318 
    319 /*
    320 ** Number of TX descriptors per ring,
    321 ** setting higher than RX as this seems
    322 ** the better performing choice.
    323 */
    324 static int ixgbe_txd = PERFORM_TXD;
    325 TUNABLE_INT("hw.ixgbe.txd", &ixgbe_txd);
    326 
    327 /* Number of RX descriptors per ring */
    328 static int ixgbe_rxd = PERFORM_RXD;
    329 TUNABLE_INT("hw.ixgbe.rxd", &ixgbe_rxd);
    330 
    331 /*
    332 ** HW RSC control:
    333 **  this feature only works with
    334 **  IPv4, and only on 82599 and later.
    335 **  Also this will cause IP forwarding to
    336 **  fail and that can't be controlled by
    337 **  the stack as LRO can. For all these
    338 **  reasons I've deemed it best to leave
    339 **  this off and not bother with a tuneable
    340 **  interface, this would need to be compiled
    341 **  to enable.
    342 */
    343 static bool ixgbe_rsc_enable = FALSE;
    344 
    345 /* Keep running tab on them for sanity check */
    346 static int ixgbe_total_ports;
    347 
    348 #ifdef IXGBE_FDIR
    349 /*
    350 ** For Flow Director: this is the
    351 ** number of TX packets we sample
    352 ** for the filter pool, this means
    353 ** every 20th packet will be probed.
    354 **
    355 ** This feature can be disabled by
    356 ** setting this to 0.
    357 */
    358 static int atr_sample_rate = 20;
    359 /*
    360 ** Flow Director actually 'steals'
    361 ** part of the packet buffer as its
    362 ** filter pool, this variable controls
    363 ** how much it uses:
    364 **  0 = 64K, 1 = 128K, 2 = 256K
    365 */
    366 static int fdir_pballoc = 1;
    367 #endif
    368 
    369 #ifdef DEV_NETMAP
    370 /*
    371  * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
    372  * be a reference on how to implement netmap support in a driver.
    373  * Additional comments are in ixgbe_netmap.h .
    374  *
    375  * <dev/netmap/ixgbe_netmap.h> contains functions for netmap support
    376  * that extend the standard driver.
    377  */
    378 #include <dev/netmap/ixgbe_netmap.h>
    379 #endif /* DEV_NETMAP */
    380 
    381 /*********************************************************************
    382  *  Device identification routine
    383  *
    384  *  ixgbe_probe determines if the driver should be loaded on
    385  *  adapter based on PCI vendor/device id of the adapter.
    386  *
    387  *  return 1 on success, 0 on failure
    388  *********************************************************************/
    389 
    390 static int
    391 ixgbe_probe(device_t dev, cfdata_t cf, void *aux)
    392 {
    393 	const struct pci_attach_args *pa = aux;
    394 
    395 	return (ixgbe_lookup(pa) != NULL) ? 1 : 0;
    396 }
    397 
    398 static ixgbe_vendor_info_t *
    399 ixgbe_lookup(const struct pci_attach_args *pa)
    400 {
    401 	pcireg_t subid;
    402 	ixgbe_vendor_info_t *ent;
    403 
    404 	INIT_DEBUGOUT("ixgbe_probe: begin");
    405 
    406 	if (PCI_VENDOR(pa->pa_id) != IXGBE_INTEL_VENDOR_ID)
    407 		return NULL;
    408 
    409 	subid = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_SUBSYS_ID_REG);
    410 
    411 	for (ent = ixgbe_vendor_info_array; ent->vendor_id != 0; ent++) {
    412 		if (PCI_VENDOR(pa->pa_id) == ent->vendor_id &&
    413 		    PCI_PRODUCT(pa->pa_id) == ent->device_id &&
    414 
    415 		    (PCI_SUBSYS_VENDOR(subid) == ent->subvendor_id ||
    416 		     ent->subvendor_id == 0) &&
    417 
    418 		    (PCI_SUBSYS_ID(subid) == ent->subdevice_id ||
    419 		     ent->subdevice_id == 0)) {
    420 			++ixgbe_total_ports;
    421 			return ent;
    422 		}
    423 	}
    424 	return NULL;
    425 }
    426 
    427 
    428 static void
    429 ixgbe_sysctl_attach(struct adapter *adapter)
    430 {
    431 	struct sysctllog **log;
    432 	const struct sysctlnode *rnode, *cnode;
    433 	device_t dev;
    434 
    435 	dev = adapter->dev;
    436 	log = &adapter->sysctllog;
    437 
    438 	if ((rnode = ixgbe_sysctl_instance(adapter)) == NULL) {
    439 		aprint_error_dev(dev, "could not create sysctl root\n");
    440 		return;
    441 	}
    442 
    443 	if (sysctl_createv(log, 0, &rnode, &cnode,
    444 	    CTLFLAG_READONLY, CTLTYPE_INT,
    445 	    "num_rx_desc", SYSCTL_DESCR("Number of rx descriptors"),
    446 	    NULL, 0, &adapter->num_rx_desc, 0, CTL_CREATE, CTL_EOL) != 0)
    447 		aprint_error_dev(dev, "could not create sysctl\n");
    448 
    449 	if (sysctl_createv(log, 0, &rnode, &cnode,
    450 	    CTLFLAG_READONLY, CTLTYPE_INT,
    451 	    "num_queues", SYSCTL_DESCR("Number of queues"),
    452 	    NULL, 0, &adapter->num_queues, 0, CTL_CREATE, CTL_EOL) != 0)
    453 		aprint_error_dev(dev, "could not create sysctl\n");
    454 
    455 	if (sysctl_createv(log, 0, &rnode, &cnode,
    456 	    CTLFLAG_READWRITE, CTLTYPE_INT,
    457 	    "fc", SYSCTL_DESCR("Flow Control"),
    458 	    ixgbe_set_flowcntl, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0)
    459 		aprint_error_dev(dev, "could not create sysctl\n");
    460 
    461 	/* XXX This is an *instance* sysctl controlling a *global* variable.
    462 	 * XXX It's that way in the FreeBSD driver that this derives from.
    463 	 */
    464 	if (sysctl_createv(log, 0, &rnode, &cnode,
    465 	    CTLFLAG_READWRITE, CTLTYPE_INT,
    466 	    "enable_aim", SYSCTL_DESCR("Interrupt Moderation"),
    467 	    NULL, 0, &ixgbe_enable_aim, 0, CTL_CREATE, CTL_EOL) != 0)
    468 		aprint_error_dev(dev, "could not create sysctl\n");
    469 
    470 	if (sysctl_createv(log, 0, &rnode, &cnode,
    471 	    CTLFLAG_READWRITE, CTLTYPE_INT,
    472 	    "advertise_speed", SYSCTL_DESCR("Link Speed"),
    473 	    ixgbe_set_advertise, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0)
    474 		aprint_error_dev(dev, "could not create sysctl\n");
    475 
    476 	if (sysctl_createv(log, 0, &rnode, &cnode,
    477 	    CTLFLAG_READWRITE, CTLTYPE_INT,
    478 	    "ts", SYSCTL_DESCR("Thermal Test"),
    479 	    ixgbe_set_thermal_test, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0)
    480 		aprint_error_dev(dev, "could not create sysctl\n");
    481 }
    482 
    483 /*********************************************************************
    484  *  Device initialization routine
    485  *
    486  *  The attach entry point is called when the driver is being loaded.
    487  *  This routine identifies the type of hardware, allocates all resources
    488  *  and initializes the hardware.
    489  *
    490  *  return 0 on success, positive on failure
    491  *********************************************************************/
    492 
    493 static void
    494 ixgbe_attach(device_t parent, device_t dev, void *aux)
    495 {
    496 	struct adapter *adapter;
    497 	struct ixgbe_hw *hw;
    498 	int             error = 0;
    499 	u16		csum;
    500 	u32		ctrl_ext;
    501 	ixgbe_vendor_info_t *ent;
    502 	const struct pci_attach_args *pa = aux;
    503 
    504 	INIT_DEBUGOUT("ixgbe_attach: begin");
    505 
    506 	/* Allocate, clear, and link in our adapter structure */
    507 	adapter = device_private(dev);
    508 	adapter->dev = adapter->osdep.dev = dev;
    509 	hw = &adapter->hw;
    510 	adapter->osdep.pc = pa->pa_pc;
    511 	adapter->osdep.tag = pa->pa_tag;
    512 	adapter->osdep.dmat = pa->pa_dmat;
    513 
    514 	ent = ixgbe_lookup(pa);
    515 
    516 	KASSERT(ent != NULL);
    517 
    518 	aprint_normal(": %s, Version - %s\n",
    519 	    ixgbe_strings[ent->index], ixgbe_driver_version);
    520 
    521 	/* Core Lock Init*/
    522 	IXGBE_CORE_LOCK_INIT(adapter, device_xname(dev));
    523 
    524 	/* SYSCTL APIs */
    525 
    526 	ixgbe_sysctl_attach(adapter);
    527 
    528 	/* Set up the timer callout */
    529 	callout_init(&adapter->timer, 0);
    530 
    531 	/* Determine hardware revision */
    532 	ixgbe_identify_hardware(adapter);
    533 
    534 	/* Do base PCI setup - map BAR0 */
    535 	if (ixgbe_allocate_pci_resources(adapter, pa)) {
    536 		aprint_error_dev(dev, "Allocation of PCI resources failed\n");
    537 		error = ENXIO;
    538 		goto err_out;
    539 	}
    540 
    541 	/* Do descriptor calc and sanity checks */
    542 	if (((ixgbe_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 ||
    543 	    ixgbe_txd < MIN_TXD || ixgbe_txd > MAX_TXD) {
    544 		aprint_error_dev(dev, "TXD config issue, using default!\n");
    545 		adapter->num_tx_desc = DEFAULT_TXD;
    546 	} else
    547 		adapter->num_tx_desc = ixgbe_txd;
    548 
    549 	/*
    550 	** With many RX rings it is easy to exceed the
    551 	** system mbuf allocation. Tuning nmbclusters
    552 	** can alleviate this.
    553 	*/
    554 	if (nmbclusters > 0 ) {
    555 		int s;
    556 		s = (ixgbe_rxd * adapter->num_queues) * ixgbe_total_ports;
    557 		if (s > nmbclusters) {
    558 			aprint_error_dev(dev, "RX Descriptors exceed "
    559 			    "system mbuf max, using default instead!\n");
    560 			ixgbe_rxd = DEFAULT_RXD;
    561 		}
    562 	}
    563 
    564 	if (((ixgbe_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 ||
    565 	    ixgbe_rxd < MIN_TXD || ixgbe_rxd > MAX_TXD) {
    566 		aprint_error_dev(dev, "RXD config issue, using default!\n");
    567 		adapter->num_rx_desc = DEFAULT_RXD;
    568 	} else
    569 		adapter->num_rx_desc = ixgbe_rxd;
    570 
    571 	/* Allocate our TX/RX Queues */
    572 	if (ixgbe_allocate_queues(adapter)) {
    573 		error = ENOMEM;
    574 		goto err_out;
    575 	}
    576 
    577 	/* Allocate multicast array memory. */
    578 	adapter->mta = malloc(sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
    579 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
    580 	if (adapter->mta == NULL) {
    581 		aprint_error_dev(dev, "Cannot allocate multicast setup array\n");
    582 		error = ENOMEM;
    583 		goto err_late;
    584 	}
    585 
    586 	/* Initialize the shared code */
    587 	error = ixgbe_init_shared_code(hw);
    588 	if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
    589 		/*
    590 		** No optics in this port, set up
    591 		** so the timer routine will probe
    592 		** for later insertion.
    593 		*/
    594 		adapter->sfp_probe = TRUE;
    595 		error = 0;
    596 	} else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
    597 		aprint_error_dev(dev,"Unsupported SFP+ module detected!\n");
    598 		error = EIO;
    599 		goto err_late;
    600 	} else if (error) {
    601 		aprint_error_dev(dev,"Unable to initialize the shared code\n");
    602 		error = EIO;
    603 		goto err_late;
    604 	}
    605 
    606 	/* Make sure we have a good EEPROM before we read from it */
    607 	if (ixgbe_validate_eeprom_checksum(&adapter->hw, &csum) < 0) {
    608 		aprint_error_dev(dev,"The EEPROM Checksum Is Not Valid\n");
    609 		error = EIO;
    610 		goto err_late;
    611 	}
    612 
    613 	error = ixgbe_init_hw(hw);
    614 	switch (error) {
    615 	case IXGBE_ERR_EEPROM_VERSION:
    616 		aprint_error_dev(dev, "This device is a pre-production adapter/"
    617 		    "LOM.  Please be aware there may be issues associated "
    618 		    "with your hardware.\n If you are experiencing problems "
    619 		    "please contact your Intel or hardware representative "
    620 		    "who provided you with this hardware.\n");
    621 		break;
    622 	case IXGBE_ERR_SFP_NOT_SUPPORTED:
    623 		aprint_error_dev(dev,"Unsupported SFP+ Module\n");
    624 		error = EIO;
    625 		aprint_error_dev(dev,"Hardware Initialization Failure\n");
    626 		goto err_late;
    627 	case IXGBE_ERR_SFP_NOT_PRESENT:
    628 		device_printf(dev,"No SFP+ Module found\n");
    629 		/* falls thru */
    630 	default:
    631 		break;
    632 	}
    633 
    634 	/* Detect and set physical type */
    635 	ixgbe_setup_optics(adapter);
    636 
    637 	if ((adapter->msix > 1) && (ixgbe_enable_msix))
    638 		error = ixgbe_allocate_msix(adapter, pa);
    639 	else
    640 		error = ixgbe_allocate_legacy(adapter, pa);
    641 	if (error)
    642 		goto err_late;
    643 
    644 	/* Setup OS specific network interface */
    645 	if (ixgbe_setup_interface(dev, adapter) != 0)
    646 		goto err_late;
    647 
    648 	/* Initialize statistics */
    649 	ixgbe_update_stats_counters(adapter);
    650 
    651         /* Print PCIE bus type/speed/width info */
    652 	ixgbe_get_bus_info(hw);
    653 	aprint_normal_dev(dev,"PCI Express Bus: Speed %s %s\n",
    654 	    ((hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0Gb/s":
    655 	    (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5Gb/s":"Unknown"),
    656 	    (hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" :
    657 	    (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" :
    658 	    (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" :
    659 	    ("Unknown"));
    660 
    661 	if ((hw->bus.width <= ixgbe_bus_width_pcie_x4) &&
    662 	    (hw->bus.speed == ixgbe_bus_speed_2500)) {
    663 		aprint_error_dev(dev, "PCI-Express bandwidth available"
    664 		    " for this card\n     is not sufficient for"
    665 		    " optimal performance.\n");
    666 		aprint_error_dev(dev, "For optimal performance a x8 "
    667 		    "PCIE, or x4 PCIE 2 slot is required.\n");
    668         }
    669 
    670 	/* Set an initial default flow control value */
    671 	adapter->fc =  ixgbe_fc_full;
    672 
    673 	/* let hardware know driver is loaded */
    674 	ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
    675 	ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
    676 	IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
    677 
    678 	ixgbe_add_hw_stats(adapter);
    679 
    680 #ifdef DEV_NETMAP
    681 	ixgbe_netmap_attach(adapter);
    682 #endif /* DEV_NETMAP */
    683 	INIT_DEBUGOUT("ixgbe_attach: end");
    684 	return;
    685 err_late:
    686 	ixgbe_free_transmit_structures(adapter);
    687 	ixgbe_free_receive_structures(adapter);
    688 err_out:
    689 	if (adapter->ifp != NULL)
    690 		if_free(adapter->ifp);
    691 	ixgbe_free_pci_resources(adapter);
    692 	if (adapter->mta != NULL)
    693 		free(adapter->mta, M_DEVBUF);
    694 	return;
    695 
    696 }
    697 
    698 /*********************************************************************
    699  *  Device removal routine
    700  *
    701  *  The detach entry point is called when the driver is being removed.
    702  *  This routine stops the adapter and deallocates all the resources
    703  *  that were allocated for driver operation.
    704  *
    705  *  return 0 on success, positive on failure
    706  *********************************************************************/
    707 
    708 static int
    709 ixgbe_detach(device_t dev, int flags)
    710 {
    711 	struct adapter *adapter = device_private(dev);
    712 	struct rx_ring *rxr = adapter->rx_rings;
    713 	struct ixgbe_hw_stats *stats = &adapter->stats;
    714 	struct ix_queue *que = adapter->queues;
    715 	struct tx_ring *txr = adapter->tx_rings;
    716 	u32	ctrl_ext;
    717 
    718 	INIT_DEBUGOUT("ixgbe_detach: begin");
    719 
    720 #if NVLAN > 0
    721 	/* Make sure VLANs are not using driver */
    722 	if (!VLAN_ATTACHED(&adapter->osdep.ec))
    723 		;	/* nothing to do: no VLANs */
    724 	else if ((flags & (DETACH_SHUTDOWN|DETACH_FORCE)) != 0)
    725 		vlan_ifdetach(adapter->ifp);
    726 	else {
    727 		aprint_error_dev(dev, "VLANs in use\n");
    728 		return EBUSY;
    729 	}
    730 #endif
    731 
    732 	IXGBE_CORE_LOCK(adapter);
    733 	ixgbe_stop(adapter);
    734 	IXGBE_CORE_UNLOCK(adapter);
    735 
    736 	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
    737 #ifndef IXGBE_LEGACY_TX
    738 		softint_disestablish(txr->txq_si);
    739 #endif
    740 		softint_disestablish(que->que_si);
    741 	}
    742 
    743 	/* Drain the Link queue */
    744 	softint_disestablish(adapter->link_si);
    745 	softint_disestablish(adapter->mod_si);
    746 	softint_disestablish(adapter->msf_si);
    747 #ifdef IXGBE_FDIR
    748 	softint_disestablish(adapter->fdir_si);
    749 #endif
    750 
    751 	/* let hardware know driver is unloading */
    752 	ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
    753 	ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
    754 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext);
    755 
    756 	ether_ifdetach(adapter->ifp);
    757 	callout_halt(&adapter->timer, NULL);
    758 #ifdef DEV_NETMAP
    759 	netmap_detach(adapter->ifp);
    760 #endif /* DEV_NETMAP */
    761 	ixgbe_free_pci_resources(adapter);
    762 #if 0	/* XXX the NetBSD port is probably missing something here */
    763 	bus_generic_detach(dev);
    764 #endif
    765 	if_detach(adapter->ifp);
    766 
    767 	sysctl_teardown(&adapter->sysctllog);
    768 	evcnt_detach(&adapter->handleq);
    769 	evcnt_detach(&adapter->req);
    770 	evcnt_detach(&adapter->morerx);
    771 	evcnt_detach(&adapter->moretx);
    772 	evcnt_detach(&adapter->txloops);
    773 	evcnt_detach(&adapter->efbig_tx_dma_setup);
    774 	evcnt_detach(&adapter->m_defrag_failed);
    775 	evcnt_detach(&adapter->efbig2_tx_dma_setup);
    776 	evcnt_detach(&adapter->einval_tx_dma_setup);
    777 	evcnt_detach(&adapter->other_tx_dma_setup);
    778 	evcnt_detach(&adapter->eagain_tx_dma_setup);
    779 	evcnt_detach(&adapter->enomem_tx_dma_setup);
    780 	evcnt_detach(&adapter->watchdog_events);
    781 	evcnt_detach(&adapter->tso_err);
    782 	evcnt_detach(&adapter->link_irq);
    783 
    784 	txr = adapter->tx_rings;
    785 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
    786 		evcnt_detach(&txr->no_desc_avail);
    787 		evcnt_detach(&txr->total_packets);
    788 		evcnt_detach(&txr->tso_tx);
    789 
    790 		if (i < __arraycount(adapter->stats.mpc)) {
    791 			evcnt_detach(&adapter->stats.mpc[i]);
    792 		}
    793 		if (i < __arraycount(adapter->stats.pxontxc)) {
    794 			evcnt_detach(&adapter->stats.pxontxc[i]);
    795 			evcnt_detach(&adapter->stats.pxonrxc[i]);
    796 			evcnt_detach(&adapter->stats.pxofftxc[i]);
    797 			evcnt_detach(&adapter->stats.pxoffrxc[i]);
    798 			evcnt_detach(&adapter->stats.pxon2offc[i]);
    799 		}
    800 		if (i < __arraycount(adapter->stats.qprc)) {
    801 			evcnt_detach(&adapter->stats.qprc[i]);
    802 			evcnt_detach(&adapter->stats.qptc[i]);
    803 			evcnt_detach(&adapter->stats.qbrc[i]);
    804 			evcnt_detach(&adapter->stats.qbtc[i]);
    805 			evcnt_detach(&adapter->stats.qprdc[i]);
    806 		}
    807 
    808 		evcnt_detach(&rxr->rx_packets);
    809 		evcnt_detach(&rxr->rx_bytes);
    810 		evcnt_detach(&rxr->rx_copies);
    811 		evcnt_detach(&rxr->no_jmbuf);
    812 		evcnt_detach(&rxr->rx_discarded);
    813 		evcnt_detach(&rxr->rx_irq);
    814 	}
    815 	evcnt_detach(&stats->ipcs);
    816 	evcnt_detach(&stats->l4cs);
    817 	evcnt_detach(&stats->ipcs_bad);
    818 	evcnt_detach(&stats->l4cs_bad);
    819 	evcnt_detach(&stats->intzero);
    820 	evcnt_detach(&stats->legint);
    821 	evcnt_detach(&stats->crcerrs);
    822 	evcnt_detach(&stats->illerrc);
    823 	evcnt_detach(&stats->errbc);
    824 	evcnt_detach(&stats->mspdc);
    825 	evcnt_detach(&stats->mlfc);
    826 	evcnt_detach(&stats->mrfc);
    827 	evcnt_detach(&stats->rlec);
    828 	evcnt_detach(&stats->lxontxc);
    829 	evcnt_detach(&stats->lxonrxc);
    830 	evcnt_detach(&stats->lxofftxc);
    831 	evcnt_detach(&stats->lxoffrxc);
    832 
    833 	/* Packet Reception Stats */
    834 	evcnt_detach(&stats->tor);
    835 	evcnt_detach(&stats->gorc);
    836 	evcnt_detach(&stats->tpr);
    837 	evcnt_detach(&stats->gprc);
    838 	evcnt_detach(&stats->mprc);
    839 	evcnt_detach(&stats->bprc);
    840 	evcnt_detach(&stats->prc64);
    841 	evcnt_detach(&stats->prc127);
    842 	evcnt_detach(&stats->prc255);
    843 	evcnt_detach(&stats->prc511);
    844 	evcnt_detach(&stats->prc1023);
    845 	evcnt_detach(&stats->prc1522);
    846 	evcnt_detach(&stats->ruc);
    847 	evcnt_detach(&stats->rfc);
    848 	evcnt_detach(&stats->roc);
    849 	evcnt_detach(&stats->rjc);
    850 	evcnt_detach(&stats->mngprc);
    851 	evcnt_detach(&stats->xec);
    852 
    853 	/* Packet Transmission Stats */
    854 	evcnt_detach(&stats->gotc);
    855 	evcnt_detach(&stats->tpt);
    856 	evcnt_detach(&stats->gptc);
    857 	evcnt_detach(&stats->bptc);
    858 	evcnt_detach(&stats->mptc);
    859 	evcnt_detach(&stats->mngptc);
    860 	evcnt_detach(&stats->ptc64);
    861 	evcnt_detach(&stats->ptc127);
    862 	evcnt_detach(&stats->ptc255);
    863 	evcnt_detach(&stats->ptc511);
    864 	evcnt_detach(&stats->ptc1023);
    865 	evcnt_detach(&stats->ptc1522);
    866 
    867 	ixgbe_free_transmit_structures(adapter);
    868 	ixgbe_free_receive_structures(adapter);
    869 	free(adapter->mta, M_DEVBUF);
    870 
    871 	IXGBE_CORE_LOCK_DESTROY(adapter);
    872 	return (0);
    873 }
    874 
    875 /*********************************************************************
    876  *
    877  *  Shutdown entry point
    878  *
    879  **********************************************************************/
    880 
    881 #if 0 /* XXX NetBSD ought to register something like this through pmf(9) */
    882 static int
    883 ixgbe_shutdown(device_t dev)
    884 {
    885 	struct adapter *adapter = device_private(dev);
    886 	IXGBE_CORE_LOCK(adapter);
    887 	ixgbe_stop(adapter);
    888 	IXGBE_CORE_UNLOCK(adapter);
    889 	return (0);
    890 }
    891 #endif
    892 
    893 
    894 #ifdef IXGBE_LEGACY_TX
    895 /*********************************************************************
    896  *  Transmit entry point
    897  *
    898  *  ixgbe_start is called by the stack to initiate a transmit.
    899  *  The driver will remain in this routine as long as there are
    900  *  packets to transmit and transmit resources are available.
    901  *  In case resources are not available stack is notified and
    902  *  the packet is requeued.
    903  **********************************************************************/
    904 
    905 static void
    906 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
    907 {
    908 	int rc;
    909 	struct mbuf    *m_head;
    910 	struct adapter *adapter = txr->adapter;
    911 
    912 	IXGBE_TX_LOCK_ASSERT(txr);
    913 
    914 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    915 		return;
    916 	if (!adapter->link_active)
    917 		return;
    918 
    919 	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    920 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
    921 			break;
    922 
    923 		IFQ_POLL(&ifp->if_snd, m_head);
    924 		if (m_head == NULL)
    925 			break;
    926 
    927 		if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
    928 			break;
    929 		}
    930 		IFQ_DEQUEUE(&ifp->if_snd, m_head);
    931 		if (rc == EFBIG) {
    932 			struct mbuf *mtmp;
    933 
    934 			if ((mtmp = m_defrag(m_head, M_NOWAIT)) != NULL) {
    935 				m_head = mtmp;
    936 				rc = ixgbe_xmit(txr, m_head);
    937 				if (rc != 0)
    938 					adapter->efbig2_tx_dma_setup.ev_count++;
    939 			} else
    940 				adapter->m_defrag_failed.ev_count++;
    941 		}
    942 		if (rc != 0) {
    943 			m_freem(m_head);
    944 			continue;
    945 		}
    946 
    947 		/* Send a copy of the frame to the BPF listener */
    948 		bpf_mtap(ifp, m_head);
    949 
    950 		/* Set watchdog on */
    951 		getmicrotime(&txr->watchdog_time);
    952 		txr->queue_status = IXGBE_QUEUE_WORKING;
    953 
    954 	}
    955 	return;
    956 }
    957 
    958 /*
    959  * Legacy TX start - called by the stack, this
    960  * always uses the first tx ring, and should
    961  * not be used with multiqueue tx enabled.
    962  */
    963 static void
    964 ixgbe_start(struct ifnet *ifp)
    965 {
    966 	struct adapter *adapter = ifp->if_softc;
    967 	struct tx_ring	*txr = adapter->tx_rings;
    968 
    969 	if (ifp->if_flags & IFF_RUNNING) {
    970 		IXGBE_TX_LOCK(txr);
    971 		ixgbe_start_locked(txr, ifp);
    972 		IXGBE_TX_UNLOCK(txr);
    973 	}
    974 	return;
    975 }
    976 
    977 #else /* ! IXGBE_LEGACY_TX */
    978 
    979 /*
    980 ** Multiqueue Transmit driver
    981 **
    982 */
    983 static int
    984 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
    985 {
    986 	struct adapter	*adapter = ifp->if_softc;
    987 	struct ix_queue	*que;
    988 	struct tx_ring	*txr;
    989 	int 		i = 0, err = 0;
    990 
    991 	/* Which queue to use */
    992 	if ((m->m_flags & M_FLOWID) != 0)
    993 		i = m->m_pkthdr.flowid % adapter->num_queues;
    994 	else
    995 		i = cpu_index(curcpu()) % adapter->num_queues;
    996 
    997 	txr = &adapter->tx_rings[i];
    998 	que = &adapter->queues[i];
    999 
   1000 	if (IXGBE_TX_TRYLOCK(txr)) {
   1001 		err = ixgbe_mq_start_locked(ifp, txr, m);
   1002 		IXGBE_TX_UNLOCK(txr);
   1003 	} else {
   1004 		err = drbr_enqueue(ifp, txr->br, m);
   1005 		softint_schedule(txr->txq_si);
   1006 	}
   1007 
   1008 	return (err);
   1009 }
   1010 
   1011 static int
   1012 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
   1013 {
   1014 	struct adapter  *adapter = txr->adapter;
   1015         struct mbuf     *next;
   1016         int             enqueued, err = 0;
   1017 
   1018 	if (((ifp->if_flags & IFF_RUNNING) == 0) ||
   1019 	    adapter->link_active == 0) {
   1020 		if (m != NULL)
   1021 			err = drbr_enqueue(ifp, txr->br, m);
   1022 		return (err);
   1023 	}
   1024 
   1025 	enqueued = 0;
   1026 	if (m != NULL) {
   1027 		err = drbr_enqueue(ifp, txr->br, m);
   1028 		if (err) {
   1029 			return (err);
   1030 		}
   1031 	}
   1032 
   1033 	/* Process the queue */
   1034 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
   1035 		if ((err = ixgbe_xmit(txr, &next)) != 0) {
   1036 			if (next == NULL) {
   1037 				drbr_advance(ifp, txr->br);
   1038 			} else {
   1039 				drbr_putback(ifp, txr->br, next);
   1040 			}
   1041 			break;
   1042 		}
   1043 		drbr_advance(ifp, txr->br);
   1044 		enqueued++;
   1045 		/* Send a copy of the frame to the BPF listener */
   1046 		bpf_mtap(ifp, next);
   1047 		if ((ifp->if_flags & IFF_RUNNING) == 0)
   1048 			break;
   1049 		if (txr->tx_avail < IXGBE_TX_OP_THRESHOLD)
   1050 			ixgbe_txeof(txr);
   1051 	}
   1052 
   1053 	if (enqueued > 0) {
   1054 		/* Set watchdog on */
   1055 		txr->queue_status = IXGBE_QUEUE_WORKING;
   1056 		getmicrotime(&txr->watchdog_time);
   1057 	}
   1058 
   1059 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
   1060 		ixgbe_txeof(txr);
   1061 
   1062 	return (err);
   1063 }
   1064 
   1065 /*
   1066  * Called from a taskqueue to drain queued transmit packets.
   1067  */
   1068 static void
   1069 ixgbe_deferred_mq_start(void *arg)
   1070 {
   1071 	struct tx_ring *txr = arg;
   1072 	struct adapter *adapter = txr->adapter;
   1073 	struct ifnet *ifp = adapter->ifp;
   1074 
   1075 	IXGBE_TX_LOCK(txr);
   1076 	if (!drbr_empty(ifp, txr->br))
   1077 		ixgbe_mq_start_locked(ifp, txr, NULL);
   1078 	IXGBE_TX_UNLOCK(txr);
   1079 }
   1080 
   1081 /*
   1082 ** Flush all ring buffers
   1083 */
   1084 static void
   1085 ixgbe_qflush(struct ifnet *ifp)
   1086 {
   1087 	struct adapter	*adapter = ifp->if_softc;
   1088 	struct tx_ring	*txr = adapter->tx_rings;
   1089 	struct mbuf	*m;
   1090 
   1091 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
   1092 		IXGBE_TX_LOCK(txr);
   1093 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
   1094 			m_freem(m);
   1095 		IXGBE_TX_UNLOCK(txr);
   1096 	}
   1097 	if_qflush(ifp);
   1098 }
   1099 #endif /* IXGBE_LEGACY_TX */
   1100 
   1101 static int
   1102 ixgbe_ifflags_cb(struct ethercom *ec)
   1103 {
   1104 	struct ifnet *ifp = &ec->ec_if;
   1105 	struct adapter *adapter = ifp->if_softc;
   1106 	int change = ifp->if_flags ^ adapter->if_flags, rc = 0;
   1107 
   1108 	IXGBE_CORE_LOCK(adapter);
   1109 
   1110 	if (change != 0)
   1111 		adapter->if_flags = ifp->if_flags;
   1112 
   1113 	if ((change & ~(IFF_CANTCHANGE|IFF_DEBUG)) != 0)
   1114 		rc = ENETRESET;
   1115 	else if ((change & (IFF_PROMISC | IFF_ALLMULTI)) != 0)
   1116 		ixgbe_set_promisc(adapter);
   1117 
   1118 	/* Set up VLAN support and filter */
   1119 	ixgbe_setup_vlan_hw_support(adapter);
   1120 
   1121 	IXGBE_CORE_UNLOCK(adapter);
   1122 
   1123 	return rc;
   1124 }
   1125 
   1126 /*********************************************************************
   1127  *  Ioctl entry point
   1128  *
   1129  *  ixgbe_ioctl is called when the user wants to configure the
   1130  *  interface.
   1131  *
   1132  *  return 0 on success, positive on failure
   1133  **********************************************************************/
   1134 
   1135 static int
   1136 ixgbe_ioctl(struct ifnet * ifp, u_long command, void *data)
   1137 {
   1138 	struct adapter	*adapter = ifp->if_softc;
   1139 	struct ixgbe_hw *hw = &adapter->hw;
   1140 	struct ifcapreq *ifcr = data;
   1141 	struct ifreq	*ifr = data;
   1142 	int             error = 0;
   1143 	int l4csum_en;
   1144 	const int l4csum = IFCAP_CSUM_TCPv4_Rx|IFCAP_CSUM_UDPv4_Rx|
   1145 	     IFCAP_CSUM_TCPv6_Rx|IFCAP_CSUM_UDPv6_Rx;
   1146 
   1147 	switch (command) {
   1148 	case SIOCSIFFLAGS:
   1149 		IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)");
   1150 		break;
   1151 	case SIOCADDMULTI:
   1152 	case SIOCDELMULTI:
   1153 		IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI");
   1154 		break;
   1155 	case SIOCSIFMEDIA:
   1156 	case SIOCGIFMEDIA:
   1157 		IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)");
   1158 		break;
   1159 	case SIOCSIFCAP:
   1160 		IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)");
   1161 		break;
   1162 	case SIOCSIFMTU:
   1163 		IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)");
   1164 		break;
   1165 	default:
   1166 		IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command);
   1167 		break;
   1168 	}
   1169 
   1170 	switch (command) {
   1171 	case SIOCSIFMEDIA:
   1172 	case SIOCGIFMEDIA:
   1173 		return ifmedia_ioctl(ifp, ifr, &adapter->media, command);
   1174 	case SIOCGI2C:
   1175 	{
   1176 		struct ixgbe_i2c_req	i2c;
   1177 		IOCTL_DEBUGOUT("ioctl: SIOCGI2C (Get I2C Data)");
   1178 		error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
   1179 		if (error)
   1180 			break;
   1181 		if ((i2c.dev_addr != 0xA0) || (i2c.dev_addr != 0xA2)){
   1182 			error = EINVAL;
   1183 			break;
   1184 		}
   1185 		hw->phy.ops.read_i2c_byte(hw, i2c.offset,
   1186 		    i2c.dev_addr, i2c.data);
   1187 		error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
   1188 		break;
   1189 	}
   1190 	case SIOCSIFCAP:
   1191 		/* Layer-4 Rx checksum offload has to be turned on and
   1192 		 * off as a unit.
   1193 		 */
   1194 		l4csum_en = ifcr->ifcr_capenable & l4csum;
   1195 		if (l4csum_en != l4csum && l4csum_en != 0)
   1196 			return EINVAL;
   1197 		/*FALLTHROUGH*/
   1198 	case SIOCADDMULTI:
   1199 	case SIOCDELMULTI:
   1200 	case SIOCSIFFLAGS:
   1201 	case SIOCSIFMTU:
   1202 	default:
   1203 		if ((error = ether_ioctl(ifp, command, data)) != ENETRESET)
   1204 			return error;
   1205 		if ((ifp->if_flags & IFF_RUNNING) == 0)
   1206 			;
   1207 		else if (command == SIOCSIFCAP || command == SIOCSIFMTU) {
   1208 			IXGBE_CORE_LOCK(adapter);
   1209 			ixgbe_init_locked(adapter);
   1210 			IXGBE_CORE_UNLOCK(adapter);
   1211 		} else if (command == SIOCADDMULTI || command == SIOCDELMULTI) {
   1212 			/*
   1213 			 * Multicast list has changed; set the hardware filter
   1214 			 * accordingly.
   1215 			 */
   1216 			IXGBE_CORE_LOCK(adapter);
   1217 			ixgbe_disable_intr(adapter);
   1218 			ixgbe_set_multi(adapter);
   1219 			ixgbe_enable_intr(adapter);
   1220 			IXGBE_CORE_UNLOCK(adapter);
   1221 		}
   1222 		return 0;
   1223 	}
   1224 
   1225 	return error;
   1226 }
   1227 
   1228 /*********************************************************************
   1229  *  Init entry point
   1230  *
   1231  *  This routine is used in two ways. It is used by the stack as
   1232  *  init entry point in network interface structure. It is also used
   1233  *  by the driver as a hw/sw initialization routine to get to a
   1234  *  consistent state.
   1235  *
   1236  *  return 0 on success, positive on failure
   1237  **********************************************************************/
   1238 #define IXGBE_MHADD_MFS_SHIFT 16
   1239 
   1240 static void
   1241 ixgbe_init_locked(struct adapter *adapter)
   1242 {
   1243 	struct ifnet   *ifp = adapter->ifp;
   1244 	device_t 	dev = adapter->dev;
   1245 	struct ixgbe_hw *hw = &adapter->hw;
   1246 	u32		k, txdctl, mhadd, gpie;
   1247 	u32		rxdctl, rxctrl;
   1248 
   1249 	/* XXX check IFF_UP and IFF_RUNNING, power-saving state! */
   1250 
   1251 	KASSERT(mutex_owned(&adapter->core_mtx));
   1252 	INIT_DEBUGOUT("ixgbe_init: begin");
   1253 	hw->adapter_stopped = FALSE;
   1254 	ixgbe_stop_adapter(hw);
   1255         callout_stop(&adapter->timer);
   1256 
   1257 	/* XXX I moved this here from the SIOCSIFMTU case in ixgbe_ioctl(). */
   1258 	adapter->max_frame_size =
   1259 		ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
   1260 
   1261         /* reprogram the RAR[0] in case user changed it. */
   1262         ixgbe_set_rar(hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
   1263 
   1264 	/* Get the latest mac address, User can use a LAA */
   1265 	memcpy(hw->mac.addr, CLLADDR(adapter->ifp->if_sadl),
   1266 	    IXGBE_ETH_LENGTH_OF_ADDRESS);
   1267 	ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
   1268 	hw->addr_ctrl.rar_used_count = 1;
   1269 
   1270 	/* Prepare transmit descriptors and buffers */
   1271 	if (ixgbe_setup_transmit_structures(adapter)) {
   1272 		device_printf(dev,"Could not setup transmit structures\n");
   1273 		ixgbe_stop(adapter);
   1274 		return;
   1275 	}
   1276 
   1277 	ixgbe_init_hw(hw);
   1278 	ixgbe_initialize_transmit_units(adapter);
   1279 
   1280 	/* Setup Multicast table */
   1281 	ixgbe_set_multi(adapter);
   1282 
   1283 	/*
   1284 	** Determine the correct mbuf pool
   1285 	** for doing jumbo frames
   1286 	*/
   1287 	if (adapter->max_frame_size <= 2048)
   1288 		adapter->rx_mbuf_sz = MCLBYTES;
   1289 	else if (adapter->max_frame_size <= 4096)
   1290 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
   1291 	else if (adapter->max_frame_size <= 9216)
   1292 		adapter->rx_mbuf_sz = MJUM9BYTES;
   1293 	else
   1294 		adapter->rx_mbuf_sz = MJUM16BYTES;
   1295 
   1296 	/* Prepare receive descriptors and buffers */
   1297 	if (ixgbe_setup_receive_structures(adapter)) {
   1298 		device_printf(dev,"Could not setup receive structures\n");
   1299 		ixgbe_stop(adapter);
   1300 		return;
   1301 	}
   1302 
   1303 	/* Configure RX settings */
   1304 	ixgbe_initialize_receive_units(adapter);
   1305 
   1306 	gpie = IXGBE_READ_REG(&adapter->hw, IXGBE_GPIE);
   1307 
   1308 	/* Enable Fan Failure Interrupt */
   1309 	gpie |= IXGBE_SDP1_GPIEN;
   1310 
   1311 	/* Add for Thermal detection */
   1312 	if (hw->mac.type == ixgbe_mac_82599EB)
   1313 		gpie |= IXGBE_SDP2_GPIEN;
   1314 
   1315 	/* Thermal Failure Detection */
   1316 	if (hw->mac.type == ixgbe_mac_X540)
   1317 		gpie |= IXGBE_SDP0_GPIEN;
   1318 
   1319 	if (adapter->msix > 1) {
   1320 		/* Enable Enhanced MSIX mode */
   1321 		gpie |= IXGBE_GPIE_MSIX_MODE;
   1322 		gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
   1323 		    IXGBE_GPIE_OCD;
   1324 	}
   1325 	IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
   1326 
   1327 	/* Set MTU size */
   1328 	if (ifp->if_mtu > ETHERMTU) {
   1329 		mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
   1330 		mhadd &= ~IXGBE_MHADD_MFS_MASK;
   1331 		mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
   1332 		IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
   1333 	}
   1334 
   1335 	/* Now enable all the queues */
   1336 
   1337 	for (int i = 0; i < adapter->num_queues; i++) {
   1338 		txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
   1339 		txdctl |= IXGBE_TXDCTL_ENABLE;
   1340 		/* Set WTHRESH to 8, burst writeback */
   1341 		txdctl |= (8 << 16);
   1342 		/*
   1343 		 * When the internal queue falls below PTHRESH (32),
   1344 		 * start prefetching as long as there are at least
   1345 		 * HTHRESH (1) buffers ready. The values are taken
   1346 		 * from the Intel linux driver 3.8.21.
   1347 		 * Prefetching enables tx line rate even with 1 queue.
   1348 		 */
   1349 		txdctl |= (32 << 0) | (1 << 8);
   1350 		IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
   1351 	}
   1352 
   1353 	for (int i = 0; i < adapter->num_queues; i++) {
   1354 		rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
   1355 		if (hw->mac.type == ixgbe_mac_82598EB) {
   1356 			/*
   1357 			** PTHRESH = 21
   1358 			** HTHRESH = 4
   1359 			** WTHRESH = 8
   1360 			*/
   1361 			rxdctl &= ~0x3FFFFF;
   1362 			rxdctl |= 0x080420;
   1363 		}
   1364 		rxdctl |= IXGBE_RXDCTL_ENABLE;
   1365 		IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
   1366 		/* XXX I don't trust this loop, and I don't trust the
   1367 		 * XXX memory barrier.  What is this meant to do? --dyoung
   1368 		 */
   1369 		for (k = 0; k < 10; k++) {
   1370 			if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
   1371 			    IXGBE_RXDCTL_ENABLE)
   1372 				break;
   1373 			else
   1374 				msec_delay(1);
   1375 		}
   1376 		wmb();
   1377 #ifdef DEV_NETMAP
   1378 		/*
   1379 		 * In netmap mode, we must preserve the buffers made
   1380 		 * available to userspace before the if_init()
   1381 		 * (this is true by default on the TX side, because
   1382 		 * init makes all buffers available to userspace).
   1383 		 *
   1384 		 * netmap_reset() and the device specific routines
   1385 		 * (e.g. ixgbe_setup_receive_rings()) map these
   1386 		 * buffers at the end of the NIC ring, so here we
   1387 		 * must set the RDT (tail) register to make sure
   1388 		 * they are not overwritten.
   1389 		 *
   1390 		 * In this driver the NIC ring starts at RDH = 0,
   1391 		 * RDT points to the last slot available for reception (?),
   1392 		 * so RDT = num_rx_desc - 1 means the whole ring is available.
   1393 		 */
   1394 		if (ifp->if_capenable & IFCAP_NETMAP) {
   1395 			struct netmap_adapter *na = NA(adapter->ifp);
   1396 			struct netmap_kring *kring = &na->rx_rings[i];
   1397 			int t = na->num_rx_desc - 1 - kring->nr_hwavail;
   1398 
   1399 			IXGBE_WRITE_REG(hw, IXGBE_RDT(i), t);
   1400 		} else
   1401 #endif /* DEV_NETMAP */
   1402 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i), adapter->num_rx_desc - 1);
   1403 	}
   1404 
   1405 	/* Set up VLAN support and filter */
   1406 	ixgbe_setup_vlan_hw_support(adapter);
   1407 
   1408 	/* Enable Receive engine */
   1409 	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
   1410 	if (hw->mac.type == ixgbe_mac_82598EB)
   1411 		rxctrl |= IXGBE_RXCTRL_DMBYPS;
   1412 	rxctrl |= IXGBE_RXCTRL_RXEN;
   1413 	ixgbe_enable_rx_dma(hw, rxctrl);
   1414 
   1415 	callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
   1416 
   1417 	/* Set up MSI/X routing */
   1418 	if (ixgbe_enable_msix)  {
   1419 		ixgbe_configure_ivars(adapter);
   1420 		/* Set up auto-mask */
   1421 		if (hw->mac.type == ixgbe_mac_82598EB)
   1422 			IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
   1423 		else {
   1424 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
   1425 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
   1426 		}
   1427 	} else {  /* Simple settings for Legacy/MSI */
   1428                 ixgbe_set_ivar(adapter, 0, 0, 0);
   1429                 ixgbe_set_ivar(adapter, 0, 0, 1);
   1430 		IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
   1431 	}
   1432 
   1433 #ifdef IXGBE_FDIR
   1434 	/* Init Flow director */
   1435 	if (hw->mac.type != ixgbe_mac_82598EB) {
   1436 		u32 hdrm = 32 << fdir_pballoc;
   1437 
   1438 		hw->mac.ops.setup_rxpba(hw, 0, hdrm, PBA_STRATEGY_EQUAL);
   1439 		ixgbe_init_fdir_signature_82599(&adapter->hw, fdir_pballoc);
   1440 	}
   1441 #endif
   1442 
   1443 	/*
   1444 	** Check on any SFP devices that
   1445 	** need to be kick-started
   1446 	*/
   1447 	if (hw->phy.type == ixgbe_phy_none) {
   1448 		int err = hw->phy.ops.identify(hw);
   1449 		if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
   1450                 	device_printf(dev,
   1451 			    "Unsupported SFP+ module type was detected.\n");
   1452 			return;
   1453         	}
   1454 	}
   1455 
   1456 	/* Set moderation on the Link interrupt */
   1457 	IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->linkvec), IXGBE_LINK_ITR);
   1458 
   1459 	/* Config/Enable Link */
   1460 	ixgbe_config_link(adapter);
   1461 
   1462 	/* Hardware Packet Buffer & Flow Control setup */
   1463 	{
   1464 		u32 rxpb, frame, size, tmp;
   1465 
   1466 		frame = adapter->max_frame_size;
   1467 
   1468 		/* Calculate High Water */
   1469 		if (hw->mac.type == ixgbe_mac_X540)
   1470 			tmp = IXGBE_DV_X540(frame, frame);
   1471 		else
   1472 			tmp = IXGBE_DV(frame, frame);
   1473 		size = IXGBE_BT2KB(tmp);
   1474 		rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
   1475 		hw->fc.high_water[0] = rxpb - size;
   1476 
   1477 		/* Now calculate Low Water */
   1478 		if (hw->mac.type == ixgbe_mac_X540)
   1479 			tmp = IXGBE_LOW_DV_X540(frame);
   1480 		else
   1481 			tmp = IXGBE_LOW_DV(frame);
   1482 		hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
   1483 
   1484 		hw->fc.requested_mode = adapter->fc;
   1485 		hw->fc.pause_time = IXGBE_FC_PAUSE;
   1486 		hw->fc.send_xon = TRUE;
   1487 	}
   1488 	/* Initialize the FC settings */
   1489 	ixgbe_start_hw(hw);
   1490 
   1491 	/* And now turn on interrupts */
   1492 	ixgbe_enable_intr(adapter);
   1493 
   1494 	/* Now inform the stack we're ready */
   1495 	ifp->if_flags |= IFF_RUNNING;
   1496 
   1497 	return;
   1498 }
   1499 
   1500 static int
   1501 ixgbe_init(struct ifnet *ifp)
   1502 {
   1503 	struct adapter *adapter = ifp->if_softc;
   1504 
   1505 	IXGBE_CORE_LOCK(adapter);
   1506 	ixgbe_init_locked(adapter);
   1507 	IXGBE_CORE_UNLOCK(adapter);
   1508 	return 0;	/* XXX ixgbe_init_locked cannot fail?  really? */
   1509 }
   1510 
   1511 
   1512 /*
   1513 **
   1514 ** MSIX Interrupt Handlers and Tasklets
   1515 **
   1516 */
   1517 
   1518 static inline void
   1519 ixgbe_enable_queue(struct adapter *adapter, u32 vector)
   1520 {
   1521 	struct ixgbe_hw *hw = &adapter->hw;
   1522 	u64	queue = (u64)(1ULL << vector);
   1523 	u32	mask;
   1524 
   1525 	if (hw->mac.type == ixgbe_mac_82598EB) {
   1526                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
   1527                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
   1528 	} else {
   1529                 mask = (queue & 0xFFFFFFFF);
   1530                 if (mask)
   1531                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask);
   1532                 mask = (queue >> 32);
   1533                 if (mask)
   1534                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask);
   1535 	}
   1536 }
   1537 
   1538 __unused static inline void
   1539 ixgbe_disable_queue(struct adapter *adapter, u32 vector)
   1540 {
   1541 	struct ixgbe_hw *hw = &adapter->hw;
   1542 	u64	queue = (u64)(1ULL << vector);
   1543 	u32	mask;
   1544 
   1545 	if (hw->mac.type == ixgbe_mac_82598EB) {
   1546                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
   1547                 IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask);
   1548 	} else {
   1549                 mask = (queue & 0xFFFFFFFF);
   1550                 if (mask)
   1551                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask);
   1552                 mask = (queue >> 32);
   1553                 if (mask)
   1554                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask);
   1555 	}
   1556 }
   1557 
   1558 static inline void
   1559 ixgbe_rearm_queues(struct adapter *adapter, u64 queues)
   1560 {
   1561 	u32 mask;
   1562 
   1563 	if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
   1564 		mask = (IXGBE_EIMS_RTX_QUEUE & queues);
   1565 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, mask);
   1566 	} else {
   1567 		mask = (queues & 0xFFFFFFFF);
   1568 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(0), mask);
   1569 		mask = (queues >> 32);
   1570 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(1), mask);
   1571 	}
   1572 }
   1573 
   1574 
   1575 static void
   1576 ixgbe_handle_que(void *context)
   1577 {
   1578 	struct ix_queue *que = context;
   1579 	struct adapter  *adapter = que->adapter;
   1580 	struct tx_ring  *txr = que->txr;
   1581 	struct ifnet    *ifp = adapter->ifp;
   1582 	bool		more;
   1583 
   1584 	adapter->handleq.ev_count++;
   1585 
   1586 	if (ifp->if_flags & IFF_RUNNING) {
   1587 		more = ixgbe_rxeof(que);
   1588 		IXGBE_TX_LOCK(txr);
   1589 		ixgbe_txeof(txr);
   1590 #ifndef IXGBE_LEGACY_TX
   1591 		if (!drbr_empty(ifp, txr->br))
   1592 			ixgbe_mq_start_locked(ifp, txr, NULL);
   1593 #else
   1594 		if (!IFQ_IS_EMPTY(&ifp->if_snd))
   1595 			ixgbe_start_locked(txr, ifp);
   1596 #endif
   1597 		IXGBE_TX_UNLOCK(txr);
   1598 		if (more) {
   1599 			adapter->req.ev_count++;
   1600 			softint_schedule(que->que_si);
   1601 			return;
   1602 		}
   1603 	}
   1604 
   1605 	/* Reenable this interrupt */
   1606 	ixgbe_enable_queue(adapter, que->msix);
   1607 	return;
   1608 }
   1609 
   1610 
   1611 /*********************************************************************
   1612  *
   1613  *  Legacy Interrupt Service routine
   1614  *
   1615  **********************************************************************/
   1616 
   1617 static int
   1618 ixgbe_legacy_irq(void *arg)
   1619 {
   1620 	struct ix_queue *que = arg;
   1621 	struct adapter	*adapter = que->adapter;
   1622 	struct ifnet   *ifp = adapter->ifp;
   1623 	struct ixgbe_hw	*hw = &adapter->hw;
   1624 	struct 		tx_ring *txr = adapter->tx_rings;
   1625 	bool		more_tx = false, more_rx = false;
   1626 	u32       	reg_eicr, loop = MAX_LOOP;
   1627 
   1628 	reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
   1629 
   1630 	adapter->stats.legint.ev_count++;
   1631 	++que->irqs;
   1632 	if (reg_eicr == 0) {
   1633 		adapter->stats.intzero.ev_count++;
   1634 		if ((ifp->if_flags & IFF_UP) != 0)
   1635 			ixgbe_enable_intr(adapter);
   1636 		return 0;
   1637 	}
   1638 
   1639 	if ((ifp->if_flags & IFF_RUNNING) != 0) {
   1640 		more_rx = ixgbe_rxeof(que);
   1641 
   1642 		IXGBE_TX_LOCK(txr);
   1643 		do {
   1644 			adapter->txloops.ev_count++;
   1645 			more_tx = ixgbe_txeof(txr);
   1646 		} while (loop-- && more_tx);
   1647 		IXGBE_TX_UNLOCK(txr);
   1648 	}
   1649 
   1650 	if (more_rx || more_tx) {
   1651 		if (more_rx)
   1652 			adapter->morerx.ev_count++;
   1653 		if (more_tx)
   1654 			adapter->moretx.ev_count++;
   1655 		softint_schedule(que->que_si);
   1656 	}
   1657 
   1658 	/* Check for fan failure */
   1659 	if ((hw->phy.media_type == ixgbe_media_type_copper) &&
   1660 	    (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
   1661                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
   1662 		    "REPLACE IMMEDIATELY!!\n");
   1663 		IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1);
   1664 	}
   1665 
   1666 	/* Link status change */
   1667 	if (reg_eicr & IXGBE_EICR_LSC)
   1668 		softint_schedule(adapter->link_si);
   1669 
   1670 	ixgbe_enable_intr(adapter);
   1671 	return 1;
   1672 }
   1673 
   1674 
   1675 #if defined(NETBSD_MSI_OR_MSIX)
   1676 /*********************************************************************
   1677  *
   1678  *  MSIX Queue Interrupt Service routine
   1679  *
   1680  **********************************************************************/
   1681 void
   1682 ixgbe_msix_que(void *arg)
   1683 {
   1684 	struct ix_queue	*que = arg;
   1685 	struct adapter  *adapter = que->adapter;
   1686 	struct tx_ring	*txr = que->txr;
   1687 	struct rx_ring	*rxr = que->rxr;
   1688 	bool		more_tx, more_rx;
   1689 	u32		newitr = 0;
   1690 
   1691 	ixgbe_disable_queue(adapter, que->msix);
   1692 	++que->irqs;
   1693 
   1694 	more_rx = ixgbe_rxeof(que);
   1695 
   1696 	IXGBE_TX_LOCK(txr);
   1697 	more_tx = ixgbe_txeof(txr);
   1698 	/*
   1699 	** Make certain that if the stack
   1700 	** has anything queued the task gets
   1701 	** scheduled to handle it.
   1702 	*/
   1703 #ifdef IXGBE_LEGACY_TX
   1704 	if (!IFQ_IS_EMPTY(&adapter->ifp->if_snd))
   1705 #else
   1706 	if (!drbr_empty(adapter->ifp, txr->br))
   1707 #endif
   1708 		more_tx = 1;
   1709 	IXGBE_TX_UNLOCK(txr);
   1710 
   1711 	/* Do AIM now? */
   1712 
   1713 	if (ixgbe_enable_aim == FALSE)
   1714 		goto no_calc;
   1715 	/*
   1716 	** Do Adaptive Interrupt Moderation:
   1717         **  - Write out last calculated setting
   1718 	**  - Calculate based on average size over
   1719 	**    the last interval.
   1720 	*/
   1721         if (que->eitr_setting)
   1722                 IXGBE_WRITE_REG(&adapter->hw,
   1723                     IXGBE_EITR(que->msix), que->eitr_setting);
   1724 
   1725         que->eitr_setting = 0;
   1726 
   1727         /* Idle, do nothing */
   1728         if ((txr->bytes == 0) && (rxr->bytes == 0))
   1729                 goto no_calc;
   1730 
   1731 	if ((txr->bytes) && (txr->packets))
   1732                	newitr = txr->bytes/txr->packets;
   1733 	if ((rxr->bytes) && (rxr->packets))
   1734 		newitr = max(newitr,
   1735 		    (rxr->bytes / rxr->packets));
   1736 	newitr += 24; /* account for hardware frame, crc */
   1737 
   1738 	/* set an upper boundary */
   1739 	newitr = min(newitr, 3000);
   1740 
   1741 	/* Be nice to the mid range */
   1742 	if ((newitr > 300) && (newitr < 1200))
   1743 		newitr = (newitr / 3);
   1744 	else
   1745 		newitr = (newitr / 2);
   1746 
   1747         if (adapter->hw.mac.type == ixgbe_mac_82598EB)
   1748                 newitr |= newitr << 16;
   1749         else
   1750                 newitr |= IXGBE_EITR_CNT_WDIS;
   1751 
   1752         /* save for next interrupt */
   1753         que->eitr_setting = newitr;
   1754 
   1755         /* Reset state */
   1756         txr->bytes = 0;
   1757         txr->packets = 0;
   1758         rxr->bytes = 0;
   1759         rxr->packets = 0;
   1760 
   1761 no_calc:
   1762 	if (more_tx || more_rx)
   1763 		softint_schedule(que->que_si);
   1764 	else /* Reenable this interrupt */
   1765 		ixgbe_enable_queue(adapter, que->msix);
   1766 	return;
   1767 }
   1768 
   1769 
   1770 static void
   1771 ixgbe_msix_link(void *arg)
   1772 {
   1773 	struct adapter	*adapter = arg;
   1774 	struct ixgbe_hw *hw = &adapter->hw;
   1775 	u32		reg_eicr;
   1776 
   1777 	++adapter->link_irq.ev_count;
   1778 
   1779 	/* First get the cause */
   1780 	reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICS);
   1781 	/* Clear interrupt with write */
   1782 	IXGBE_WRITE_REG(hw, IXGBE_EICR, reg_eicr);
   1783 
   1784 	/* Link status change */
   1785 	if (reg_eicr & IXGBE_EICR_LSC)
   1786 		softint_schedule(adapter->link_si);
   1787 
   1788 	if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
   1789 #ifdef IXGBE_FDIR
   1790 		if (reg_eicr & IXGBE_EICR_FLOW_DIR) {
   1791 			/* This is probably overkill :) */
   1792 			if (!atomic_cmpset_int(&adapter->fdir_reinit, 0, 1))
   1793 				return;
   1794                 	/* Disable the interrupt */
   1795 			IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FLOW_DIR);
   1796 			softint_schedule(adapter->fdir_si);
   1797 		} else
   1798 #endif
   1799 		if (reg_eicr & IXGBE_EICR_ECC) {
   1800                 	device_printf(adapter->dev, "\nCRITICAL: ECC ERROR!! "
   1801 			    "Please Reboot!!\n");
   1802 			IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
   1803 		} else
   1804 
   1805 		if (reg_eicr & IXGBE_EICR_GPI_SDP1) {
   1806                 	/* Clear the interrupt */
   1807                 	IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
   1808 			softint_schedule(adapter->msf_si);
   1809         	} else if (reg_eicr & IXGBE_EICR_GPI_SDP2) {
   1810                 	/* Clear the interrupt */
   1811                 	IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP2);
   1812 			softint_schedule(adapter->mod_si);
   1813 		}
   1814         }
   1815 
   1816 	/* Check for fan failure */
   1817 	if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
   1818 	    (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
   1819                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
   1820 		    "REPLACE IMMEDIATELY!!\n");
   1821 		IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
   1822 	}
   1823 
   1824 	/* Check for over temp condition */
   1825 	if ((hw->mac.type == ixgbe_mac_X540) &&
   1826 	    (reg_eicr & IXGBE_EICR_TS)) {
   1827 		device_printf(adapter->dev, "\nCRITICAL: OVER TEMP!! "
   1828 		    "PHY IS SHUT DOWN!!\n");
   1829 		device_printf(adapter->dev, "System shutdown required\n");
   1830 		IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_TS);
   1831 	}
   1832 
   1833 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_OTHER);
   1834 	return;
   1835 }
   1836 #endif
   1837 
   1838 /*********************************************************************
   1839  *
   1840  *  Media Ioctl callback
   1841  *
   1842  *  This routine is called whenever the user queries the status of
   1843  *  the interface using ifconfig.
   1844  *
   1845  **********************************************************************/
   1846 static void
   1847 ixgbe_media_status(struct ifnet * ifp, struct ifmediareq * ifmr)
   1848 {
   1849 	struct adapter *adapter = ifp->if_softc;
   1850 
   1851 	INIT_DEBUGOUT("ixgbe_media_status: begin");
   1852 	IXGBE_CORE_LOCK(adapter);
   1853 	ixgbe_update_link_status(adapter);
   1854 
   1855 	ifmr->ifm_status = IFM_AVALID;
   1856 	ifmr->ifm_active = IFM_ETHER;
   1857 
   1858 	if (!adapter->link_active) {
   1859 		IXGBE_CORE_UNLOCK(adapter);
   1860 		return;
   1861 	}
   1862 
   1863 	ifmr->ifm_status |= IFM_ACTIVE;
   1864 
   1865 	switch (adapter->link_speed) {
   1866 		case IXGBE_LINK_SPEED_100_FULL:
   1867 			ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
   1868 			break;
   1869 		case IXGBE_LINK_SPEED_1GB_FULL:
   1870 			ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
   1871 			break;
   1872 		case IXGBE_LINK_SPEED_10GB_FULL:
   1873 			ifmr->ifm_active |= adapter->optics | IFM_FDX;
   1874 			break;
   1875 	}
   1876 
   1877 	IXGBE_CORE_UNLOCK(adapter);
   1878 
   1879 	return;
   1880 }
   1881 
   1882 /*********************************************************************
   1883  *
   1884  *  Media Ioctl callback
   1885  *
   1886  *  This routine is called when the user changes speed/duplex using
   1887  *  media/mediopt option with ifconfig.
   1888  *
   1889  **********************************************************************/
   1890 static int
   1891 ixgbe_media_change(struct ifnet * ifp)
   1892 {
   1893 	struct adapter *adapter = ifp->if_softc;
   1894 	struct ifmedia *ifm = &adapter->media;
   1895 
   1896 	INIT_DEBUGOUT("ixgbe_media_change: begin");
   1897 
   1898 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
   1899 		return (EINVAL);
   1900 
   1901         switch (IFM_SUBTYPE(ifm->ifm_media)) {
   1902         case IFM_AUTO:
   1903                 adapter->hw.phy.autoneg_advertised =
   1904 		    IXGBE_LINK_SPEED_100_FULL |
   1905 		    IXGBE_LINK_SPEED_1GB_FULL |
   1906 		    IXGBE_LINK_SPEED_10GB_FULL;
   1907                 break;
   1908         default:
   1909                 device_printf(adapter->dev, "Only auto media type\n");
   1910 		return (EINVAL);
   1911         }
   1912 
   1913 	return (0);
   1914 }
   1915 
   1916 /*********************************************************************
   1917  *
   1918  *  This routine maps the mbufs to tx descriptors, allowing the
   1919  *  TX engine to transmit the packets.
   1920  *  	- return 0 on success, positive on failure
   1921  *
   1922  **********************************************************************/
   1923 
   1924 static int
   1925 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
   1926 {
   1927 	struct m_tag *mtag;
   1928 	struct adapter  *adapter = txr->adapter;
   1929 	struct ethercom *ec = &adapter->osdep.ec;
   1930 	u32		olinfo_status = 0, cmd_type_len;
   1931 	int             i, j, error;
   1932 	int		first;
   1933 	bus_dmamap_t	map;
   1934 	struct ixgbe_tx_buf *txbuf;
   1935 	union ixgbe_adv_tx_desc *txd = NULL;
   1936 
   1937 	/* Basic descriptor defines */
   1938         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
   1939 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
   1940 
   1941 	if ((mtag = VLAN_OUTPUT_TAG(ec, m_head)) != NULL)
   1942         	cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
   1943 
   1944         /*
   1945          * Important to capture the first descriptor
   1946          * used because it will contain the index of
   1947          * the one we tell the hardware to report back
   1948          */
   1949         first = txr->next_avail_desc;
   1950 	txbuf = &txr->tx_buffers[first];
   1951 	map = txbuf->map;
   1952 
   1953 	/*
   1954 	 * Map the packet for DMA.
   1955 	 */
   1956 	error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map,
   1957 	    m_head, BUS_DMA_NOWAIT);
   1958 
   1959 	if (__predict_false(error)) {
   1960 
   1961 		switch (error) {
   1962 		case EAGAIN:
   1963 			adapter->eagain_tx_dma_setup.ev_count++;
   1964 			return EAGAIN;
   1965 		case ENOMEM:
   1966 			adapter->enomem_tx_dma_setup.ev_count++;
   1967 			return EAGAIN;
   1968 		case EFBIG:
   1969 			/*
   1970 			 * XXX Try it again?
   1971 			 * do m_defrag() and retry bus_dmamap_load_mbuf().
   1972 			 */
   1973 			adapter->efbig_tx_dma_setup.ev_count++;
   1974 			return error;
   1975 		case EINVAL:
   1976 			adapter->einval_tx_dma_setup.ev_count++;
   1977 			return error;
   1978 		default:
   1979 			adapter->other_tx_dma_setup.ev_count++;
   1980 			return error;
   1981 		}
   1982 	}
   1983 
   1984 	/* Make certain there are enough descriptors */
   1985 	if (map->dm_nsegs > txr->tx_avail - 2) {
   1986 		txr->no_desc_avail.ev_count++;
   1987 		ixgbe_dmamap_unload(txr->txtag, txbuf->map);
   1988 		return EAGAIN;
   1989 	}
   1990 
   1991 	/*
   1992 	** Set up the appropriate offload context
   1993 	** this will consume the first descriptor
   1994 	*/
   1995 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
   1996 	if (__predict_false(error)) {
   1997 		return (error);
   1998 	}
   1999 
   2000 #ifdef IXGBE_FDIR
   2001 	/* Do the flow director magic */
   2002 	if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
   2003 		++txr->atr_count;
   2004 		if (txr->atr_count >= atr_sample_rate) {
   2005 			ixgbe_atr(txr, m_head);
   2006 			txr->atr_count = 0;
   2007 		}
   2008 	}
   2009 #endif
   2010 
   2011 	i = txr->next_avail_desc;
   2012 	for (j = 0; j < map->dm_nsegs; j++) {
   2013 		bus_size_t seglen;
   2014 		bus_addr_t segaddr;
   2015 
   2016 		txbuf = &txr->tx_buffers[i];
   2017 		txd = &txr->tx_base[i];
   2018 		seglen = map->dm_segs[j].ds_len;
   2019 		segaddr = htole64(map->dm_segs[j].ds_addr);
   2020 
   2021 		txd->read.buffer_addr = segaddr;
   2022 		txd->read.cmd_type_len = htole32(txr->txd_cmd |
   2023 		    cmd_type_len |seglen);
   2024 		txd->read.olinfo_status = htole32(olinfo_status);
   2025 
   2026 		if (++i == txr->num_desc)
   2027 			i = 0;
   2028 	}
   2029 
   2030 	txd->read.cmd_type_len |=
   2031 	    htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
   2032 	txr->tx_avail -= map->dm_nsegs;
   2033 	txr->next_avail_desc = i;
   2034 
   2035 	txbuf->m_head = m_head;
   2036 	/*
   2037 	** Here we swap the map so the last descriptor,
   2038 	** which gets the completion interrupt has the
   2039 	** real map, and the first descriptor gets the
   2040 	** unused map from this descriptor.
   2041 	*/
   2042 	txr->tx_buffers[first].map = txbuf->map;
   2043 	txbuf->map = map;
   2044 	bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
   2045 	    BUS_DMASYNC_PREWRITE);
   2046 
   2047         /* Set the EOP descriptor that will be marked done */
   2048         txbuf = &txr->tx_buffers[first];
   2049 	txbuf->eop = txd;
   2050 
   2051         ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   2052 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   2053 	/*
   2054 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
   2055 	 * hardware that this frame is available to transmit.
   2056 	 */
   2057 	++txr->total_packets.ev_count;
   2058 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), i);
   2059 
   2060 	return 0;
   2061 }
   2062 
   2063 static void
   2064 ixgbe_set_promisc(struct adapter *adapter)
   2065 {
   2066 	struct ether_multi *enm;
   2067 	struct ether_multistep step;
   2068 	u_int32_t       reg_rctl;
   2069 	struct ethercom *ec = &adapter->osdep.ec;
   2070 	struct ifnet   *ifp = adapter->ifp;
   2071 	int		mcnt = 0;
   2072 
   2073 	reg_rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
   2074 	reg_rctl &= (~IXGBE_FCTRL_UPE);
   2075 	if (ifp->if_flags & IFF_ALLMULTI)
   2076 		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
   2077 	else {
   2078 		ETHER_FIRST_MULTI(step, ec, enm);
   2079 		while (enm != NULL) {
   2080 			if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
   2081 				break;
   2082 			mcnt++;
   2083 			ETHER_NEXT_MULTI(step, enm);
   2084 		}
   2085 	}
   2086 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
   2087 		reg_rctl &= (~IXGBE_FCTRL_MPE);
   2088 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
   2089 
   2090 	if (ifp->if_flags & IFF_PROMISC) {
   2091 		reg_rctl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
   2092 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
   2093 	} else if (ifp->if_flags & IFF_ALLMULTI) {
   2094 		reg_rctl |= IXGBE_FCTRL_MPE;
   2095 		reg_rctl &= ~IXGBE_FCTRL_UPE;
   2096 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
   2097 	}
   2098 	return;
   2099 }
   2100 
   2101 
   2102 /*********************************************************************
   2103  *  Multicast Update
   2104  *
   2105  *  This routine is called whenever multicast address list is updated.
   2106  *
   2107  **********************************************************************/
   2108 #define IXGBE_RAR_ENTRIES 16
   2109 
   2110 static void
   2111 ixgbe_set_multi(struct adapter *adapter)
   2112 {
   2113 	struct ether_multi *enm;
   2114 	struct ether_multistep step;
   2115 	u32	fctrl;
   2116 	u8	*mta;
   2117 	u8	*update_ptr;
   2118 	int	mcnt = 0;
   2119 	struct ethercom *ec = &adapter->osdep.ec;
   2120 	struct ifnet   *ifp = adapter->ifp;
   2121 
   2122 	IOCTL_DEBUGOUT("ixgbe_set_multi: begin");
   2123 
   2124 	mta = adapter->mta;
   2125 	bzero(mta, sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
   2126 	    MAX_NUM_MULTICAST_ADDRESSES);
   2127 
   2128 	ifp->if_flags &= ~IFF_ALLMULTI;
   2129 	ETHER_FIRST_MULTI(step, ec, enm);
   2130 	while (enm != NULL) {
   2131 		if ((mcnt == MAX_NUM_MULTICAST_ADDRESSES) ||
   2132 		    (memcmp(enm->enm_addrlo, enm->enm_addrhi,
   2133 			ETHER_ADDR_LEN) != 0)) {
   2134 			ifp->if_flags |= IFF_ALLMULTI;
   2135 			break;
   2136 		}
   2137 		bcopy(enm->enm_addrlo,
   2138 		    &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
   2139 		    IXGBE_ETH_LENGTH_OF_ADDRESS);
   2140 		mcnt++;
   2141 		ETHER_NEXT_MULTI(step, enm);
   2142 	}
   2143 
   2144 	fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
   2145 	fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
   2146 	if (ifp->if_flags & IFF_PROMISC)
   2147 		fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
   2148 	else if (ifp->if_flags & IFF_ALLMULTI) {
   2149 		fctrl |= IXGBE_FCTRL_MPE;
   2150 	}
   2151 
   2152 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl);
   2153 
   2154 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) {
   2155 		update_ptr = mta;
   2156 		ixgbe_update_mc_addr_list(&adapter->hw,
   2157 		    update_ptr, mcnt, ixgbe_mc_array_itr, TRUE);
   2158 	}
   2159 
   2160 	return;
   2161 }
   2162 
   2163 /*
   2164  * This is an iterator function now needed by the multicast
   2165  * shared code. It simply feeds the shared code routine the
   2166  * addresses in the array of ixgbe_set_multi() one by one.
   2167  */
   2168 static u8 *
   2169 ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq)
   2170 {
   2171 	u8 *addr = *update_ptr;
   2172 	u8 *newptr;
   2173 	*vmdq = 0;
   2174 
   2175 	newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
   2176 	*update_ptr = newptr;
   2177 	return addr;
   2178 }
   2179 
   2180 
   2181 /*********************************************************************
   2182  *  Timer routine
   2183  *
   2184  *  This routine checks for link status,updates statistics,
   2185  *  and runs the watchdog check.
   2186  *
   2187  **********************************************************************/
   2188 
   2189 static void
   2190 ixgbe_local_timer1(void *arg)
   2191 {
   2192 	struct adapter	*adapter = arg;
   2193 	device_t	dev = adapter->dev;
   2194 	struct ix_queue *que = adapter->queues;
   2195 	struct tx_ring	*txr = adapter->tx_rings;
   2196 	int		hung = 0, paused = 0;
   2197 
   2198 	KASSERT(mutex_owned(&adapter->core_mtx));
   2199 
   2200 	/* Check for pluggable optics */
   2201 	if (adapter->sfp_probe)
   2202 		if (!ixgbe_sfp_probe(adapter))
   2203 			goto out; /* Nothing to do */
   2204 
   2205 	ixgbe_update_link_status(adapter);
   2206 	ixgbe_update_stats_counters(adapter);
   2207 
   2208 	/*
   2209 	 * If the interface has been paused
   2210 	 * then don't do the watchdog check
   2211 	 */
   2212 	if (IXGBE_READ_REG(&adapter->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF)
   2213 		paused = 1;
   2214 
   2215 	/*
   2216 	** Check the TX queues status
   2217 	**      - watchdog only if all queues show hung
   2218 	*/
   2219 	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
   2220 		if ((txr->queue_status == IXGBE_QUEUE_HUNG) &&
   2221 		    (paused == 0))
   2222 			++hung;
   2223 		else if (txr->queue_status == IXGBE_QUEUE_WORKING)
   2224 			softint_schedule(que->que_si);
   2225 	}
   2226 	/* Only truely watchdog if all queues show hung */
   2227 	if (hung == adapter->num_queues)
   2228 		goto watchdog;
   2229 
   2230 out:
   2231 	ixgbe_rearm_queues(adapter, adapter->que_mask);
   2232 	callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
   2233 	return;
   2234 
   2235 watchdog:
   2236 	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
   2237 	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
   2238 	    IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(txr->me)),
   2239 	    IXGBE_READ_REG(&adapter->hw, IXGBE_TDT(txr->me)));
   2240 	device_printf(dev,"TX(%d) desc avail = %d,"
   2241 	    "Next TX to Clean = %d\n",
   2242 	    txr->me, txr->tx_avail, txr->next_to_clean);
   2243 	adapter->ifp->if_flags &= ~IFF_RUNNING;
   2244 	adapter->watchdog_events.ev_count++;
   2245 	ixgbe_init_locked(adapter);
   2246 }
   2247 
   2248 static void
   2249 ixgbe_local_timer(void *arg)
   2250 {
   2251 	struct adapter *adapter = arg;
   2252 
   2253 	IXGBE_CORE_LOCK(adapter);
   2254 	ixgbe_local_timer1(adapter);
   2255 	IXGBE_CORE_UNLOCK(adapter);
   2256 }
   2257 
   2258 /*
   2259 ** Note: this routine updates the OS on the link state
   2260 **	the real check of the hardware only happens with
   2261 **	a link interrupt.
   2262 */
   2263 static void
   2264 ixgbe_update_link_status(struct adapter *adapter)
   2265 {
   2266 	struct ifnet	*ifp = adapter->ifp;
   2267 	device_t dev = adapter->dev;
   2268 
   2269 
   2270 	if (adapter->link_up){
   2271 		if (adapter->link_active == FALSE) {
   2272 			if (bootverbose)
   2273 				device_printf(dev,"Link is up %d Gbps %s \n",
   2274 				    ((adapter->link_speed == 128)? 10:1),
   2275 				    "Full Duplex");
   2276 			adapter->link_active = TRUE;
   2277 			/* Update any Flow Control changes */
   2278 			ixgbe_fc_enable(&adapter->hw);
   2279 			if_link_state_change(ifp, LINK_STATE_UP);
   2280 		}
   2281 	} else { /* Link down */
   2282 		if (adapter->link_active == TRUE) {
   2283 			if (bootverbose)
   2284 				device_printf(dev,"Link is Down\n");
   2285 			if_link_state_change(ifp, LINK_STATE_DOWN);
   2286 			adapter->link_active = FALSE;
   2287 		}
   2288 	}
   2289 
   2290 	return;
   2291 }
   2292 
   2293 
   2294 static void
   2295 ixgbe_ifstop(struct ifnet *ifp, int disable)
   2296 {
   2297 	struct adapter *adapter = ifp->if_softc;
   2298 
   2299 	IXGBE_CORE_LOCK(adapter);
   2300 	ixgbe_stop(adapter);
   2301 	IXGBE_CORE_UNLOCK(adapter);
   2302 }
   2303 
   2304 /*********************************************************************
   2305  *
   2306  *  This routine disables all traffic on the adapter by issuing a
   2307  *  global reset on the MAC and deallocates TX/RX buffers.
   2308  *
   2309  **********************************************************************/
   2310 
   2311 static void
   2312 ixgbe_stop(void *arg)
   2313 {
   2314 	struct ifnet   *ifp;
   2315 	struct adapter *adapter = arg;
   2316 	struct ixgbe_hw *hw = &adapter->hw;
   2317 	ifp = adapter->ifp;
   2318 
   2319 	KASSERT(mutex_owned(&adapter->core_mtx));
   2320 
   2321 	INIT_DEBUGOUT("ixgbe_stop: begin\n");
   2322 	ixgbe_disable_intr(adapter);
   2323 	callout_stop(&adapter->timer);
   2324 
   2325 	/* Let the stack know...*/
   2326 	ifp->if_flags &= ~IFF_RUNNING;
   2327 
   2328 	ixgbe_reset_hw(hw);
   2329 	hw->adapter_stopped = FALSE;
   2330 	ixgbe_stop_adapter(hw);
   2331 	/* Turn off the laser */
   2332 	if (hw->phy.multispeed_fiber)
   2333 		ixgbe_disable_tx_laser(hw);
   2334 
   2335 	/* reprogram the RAR[0] in case user changed it. */
   2336 	ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
   2337 
   2338 	return;
   2339 }
   2340 
   2341 
   2342 /*********************************************************************
   2343  *
   2344  *  Determine hardware revision.
   2345  *
   2346  **********************************************************************/
   2347 static void
   2348 ixgbe_identify_hardware(struct adapter *adapter)
   2349 {
   2350 	pcitag_t tag;
   2351 	pci_chipset_tag_t pc;
   2352 	pcireg_t subid, id;
   2353 	struct ixgbe_hw *hw = &adapter->hw;
   2354 
   2355 	pc = adapter->osdep.pc;
   2356 	tag = adapter->osdep.tag;
   2357 
   2358 	id = pci_conf_read(pc, tag, PCI_ID_REG);
   2359 	subid = pci_conf_read(pc, tag, PCI_SUBSYS_ID_REG);
   2360 
   2361 	/* Save off the information about this board */
   2362 	hw->vendor_id = PCI_VENDOR(id);
   2363 	hw->device_id = PCI_PRODUCT(id);
   2364 	hw->revision_id =
   2365 	    PCI_REVISION(pci_conf_read(pc, tag, PCI_CLASS_REG));
   2366 	hw->subsystem_vendor_id = PCI_SUBSYS_VENDOR(subid);
   2367 	hw->subsystem_device_id = PCI_SUBSYS_ID(subid);
   2368 
   2369 	/* We need this here to set the num_segs below */
   2370 	ixgbe_set_mac_type(hw);
   2371 
   2372 	/* Pick up the 82599 and VF settings */
   2373 	if (hw->mac.type != ixgbe_mac_82598EB) {
   2374 		hw->phy.smart_speed = ixgbe_smart_speed;
   2375 		adapter->num_segs = IXGBE_82599_SCATTER;
   2376 	} else
   2377 		adapter->num_segs = IXGBE_82598_SCATTER;
   2378 
   2379 	return;
   2380 }
   2381 
   2382 /*********************************************************************
   2383  *
   2384  *  Determine optic type
   2385  *
   2386  **********************************************************************/
   2387 static void
   2388 ixgbe_setup_optics(struct adapter *adapter)
   2389 {
   2390 	struct ixgbe_hw *hw = &adapter->hw;
   2391 	int		layer;
   2392 
   2393 	layer = ixgbe_get_supported_physical_layer(hw);
   2394 
   2395 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
   2396 		adapter->optics = IFM_10G_T;
   2397 		return;
   2398 	}
   2399 
   2400 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
   2401 		adapter->optics = IFM_1000_T;
   2402 		return;
   2403 	}
   2404 
   2405 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) {
   2406 		adapter->optics = IFM_1000_SX;
   2407 		return;
   2408 	}
   2409 
   2410 	if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR |
   2411 	    IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) {
   2412 		adapter->optics = IFM_10G_LR;
   2413 		return;
   2414 	}
   2415 
   2416 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
   2417 		adapter->optics = IFM_10G_SR;
   2418 		return;
   2419 	}
   2420 
   2421 	if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) {
   2422 		adapter->optics = IFM_10G_TWINAX;
   2423 		return;
   2424 	}
   2425 
   2426 	if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 |
   2427 	    IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) {
   2428 		adapter->optics = IFM_10G_CX4;
   2429 		return;
   2430 	}
   2431 
   2432 	/* If we get here just set the default */
   2433 	adapter->optics = IFM_ETHER | IFM_AUTO;
   2434 	return;
   2435 }
   2436 
   2437 /*********************************************************************
   2438  *
   2439  *  Setup the Legacy or MSI Interrupt handler
   2440  *
   2441  **********************************************************************/
   2442 static int
   2443 ixgbe_allocate_legacy(struct adapter *adapter, const struct pci_attach_args *pa)
   2444 {
   2445 	device_t	dev = adapter->dev;
   2446 	struct		ix_queue *que = adapter->queues;
   2447 #ifndef IXGBE_LEGACY_TX
   2448 	struct tx_ring		*txr = adapter->tx_rings;
   2449 #endif
   2450 	char intrbuf[PCI_INTRSTR_LEN];
   2451 #if 0
   2452 	int		rid = 0;
   2453 
   2454 	/* MSI RID at 1 */
   2455 	if (adapter->msix == 1)
   2456 		rid = 1;
   2457 #endif
   2458 
   2459 	/* We allocate a single interrupt resource */
   2460  	if (pci_intr_map(pa, &adapter->osdep.ih) != 0) {
   2461 		aprint_error_dev(dev, "unable to map interrupt\n");
   2462 		return ENXIO;
   2463 	} else {
   2464 		aprint_normal_dev(dev, "interrupting at %s\n",
   2465 		    pci_intr_string(adapter->osdep.pc, adapter->osdep.ih,
   2466 			intrbuf, sizeof(intrbuf)));
   2467 	}
   2468 
   2469 	/*
   2470 	 * Try allocating a fast interrupt and the associated deferred
   2471 	 * processing contexts.
   2472 	 */
   2473 #ifndef IXGBE_LEGACY_TX
   2474 	txr->txq_si = softint_establish(SOFTINT_NET, ixgbe_deferred_mq_start,
   2475 	    txr);
   2476 #endif
   2477 	que->que_si = softint_establish(SOFTINT_NET, ixgbe_handle_que, que);
   2478 
   2479 	/* Tasklets for Link, SFP and Multispeed Fiber */
   2480 	adapter->link_si =
   2481 	    softint_establish(SOFTINT_NET, ixgbe_handle_link, adapter);
   2482 	adapter->mod_si =
   2483 	    softint_establish(SOFTINT_NET, ixgbe_handle_mod, adapter);
   2484 	adapter->msf_si =
   2485 	    softint_establish(SOFTINT_NET, ixgbe_handle_msf, adapter);
   2486 
   2487 #ifdef IXGBE_FDIR
   2488 	adapter->fdir_si =
   2489 	    softint_establish(SOFTINT_NET, ixgbe_reinit_fdir, adapter);
   2490 #endif
   2491 	if (que->que_si == NULL ||
   2492 	    adapter->link_si == NULL ||
   2493 	    adapter->mod_si == NULL ||
   2494 #ifdef IXGBE_FDIR
   2495 	    adapter->fdir_si == NULL ||
   2496 #endif
   2497 	    adapter->msf_si == NULL) {
   2498 		aprint_error_dev(dev,
   2499 		    "could not establish software interrupts\n");
   2500 		return ENXIO;
   2501 	}
   2502 
   2503 	adapter->osdep.intr = pci_intr_establish(adapter->osdep.pc,
   2504 	    adapter->osdep.ih, IPL_NET, ixgbe_legacy_irq, que);
   2505 	if (adapter->osdep.intr == NULL) {
   2506 		aprint_error_dev(dev, "failed to register interrupt handler\n");
   2507 		softint_disestablish(que->que_si);
   2508 		softint_disestablish(adapter->link_si);
   2509 		softint_disestablish(adapter->mod_si);
   2510 		softint_disestablish(adapter->msf_si);
   2511 #ifdef IXGBE_FDIR
   2512 		softint_disestablish(adapter->fdir_si);
   2513 #endif
   2514 		return ENXIO;
   2515 	}
   2516 	/* For simplicity in the handlers */
   2517 	adapter->que_mask = IXGBE_EIMS_ENABLE_MASK;
   2518 
   2519 	return (0);
   2520 }
   2521 
   2522 
   2523 /*********************************************************************
   2524  *
   2525  *  Setup MSIX Interrupt resources and handlers
   2526  *
   2527  **********************************************************************/
   2528 static int
   2529 ixgbe_allocate_msix(struct adapter *adapter, const struct pci_attach_args *pa)
   2530 {
   2531 #if !defined(NETBSD_MSI_OR_MSIX)
   2532 	return 0;
   2533 #else
   2534 	device_t        dev = adapter->dev;
   2535 	struct 		ix_queue *que = adapter->queues;
   2536 	struct  	tx_ring *txr = adapter->tx_rings;
   2537 	int 		error, rid, vector = 0;
   2538 
   2539 	for (int i = 0; i < adapter->num_queues; i++, vector++, que++, txr++) {
   2540 		rid = vector + 1;
   2541 		que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
   2542 		    RF_SHAREABLE | RF_ACTIVE);
   2543 		if (que->res == NULL) {
   2544 			aprint_error_dev(dev,"Unable to allocate"
   2545 		    	    " bus resource: que interrupt [%d]\n", vector);
   2546 			return (ENXIO);
   2547 		}
   2548 		/* Set the handler function */
   2549 		error = bus_setup_intr(dev, que->res,
   2550 		    INTR_TYPE_NET | INTR_MPSAFE, NULL,
   2551 		    ixgbe_msix_que, que, &que->tag);
   2552 		if (error) {
   2553 			que->res = NULL;
   2554 			aprint_error_dev(dev,
   2555 			    "Failed to register QUE handler\n");
   2556 			return error;
   2557 		}
   2558 #if __FreeBSD_version >= 800504
   2559 		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
   2560 #endif
   2561 		que->msix = vector;
   2562         	adapter->que_mask |= (u64)(1 << que->msix);
   2563 		/*
   2564 		** Bind the msix vector, and thus the
   2565 		** ring to the corresponding cpu.
   2566 		*/
   2567 		if (adapter->num_queues > 1)
   2568 			bus_bind_intr(dev, que->res, i);
   2569 
   2570 #ifndef IXGBE_LEGACY_TX
   2571 		txr->txq_si = softint_establish(SOFTINT_NET,
   2572 		    ixgbe_deferred_mq_start, txr);
   2573 #endif
   2574 		que->que_si = softint_establish(SOFTINT_NET, ixgbe_handle_que,
   2575 		    que);
   2576 		if (que->que_si == NULL) {
   2577 			aprint_error_dev(dev,
   2578 			    "could not establish software interrupt\n");
   2579 		}
   2580 	}
   2581 
   2582 	/* and Link */
   2583 	rid = vector + 1;
   2584 	adapter->res = bus_alloc_resource_any(dev,
   2585     	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
   2586 	if (!adapter->res) {
   2587 		aprint_error_dev(dev,"Unable to allocate bus resource: "
   2588 		    "Link interrupt [%d]\n", rid);
   2589 		return (ENXIO);
   2590 	}
   2591 	/* Set the link handler function */
   2592 	error = bus_setup_intr(dev, adapter->res,
   2593 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
   2594 	    ixgbe_msix_link, adapter, &adapter->tag);
   2595 	if (error) {
   2596 		adapter->res = NULL;
   2597 		aprint_error_dev(dev, "Failed to register LINK handler\n");
   2598 		return (error);
   2599 	}
   2600 #if __FreeBSD_version >= 800504
   2601 	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
   2602 #endif
   2603 	adapter->linkvec = vector;
   2604 	/* Tasklets for Link, SFP and Multispeed Fiber */
   2605 	adapter->link_si =
   2606 	    softint_establish(SOFTINT_NET, ixgbe_handle_link, adapter);
   2607 	adapter->mod_si =
   2608 	    softint_establish(SOFTINT_NET, ixgbe_handle_mod, adapter);
   2609 	adapter->msf_si =
   2610 	    softint_establish(SOFTINT_NET, ixgbe_handle_msf, adapter);
   2611 #ifdef IXGBE_FDIR
   2612 	adapter->fdir_si =
   2613 	    softint_establish(SOFTINT_NET, ixgbe_reinit_fdir, adapter);
   2614 #endif
   2615 
   2616 	return (0);
   2617 #endif
   2618 }
   2619 
   2620 /*
   2621  * Setup Either MSI/X or MSI
   2622  */
   2623 static int
   2624 ixgbe_setup_msix(struct adapter *adapter)
   2625 {
   2626 #if !defined(NETBSD_MSI_OR_MSIX)
   2627 	return 0;
   2628 #else
   2629 	device_t dev = adapter->dev;
   2630 	int rid, want, queues, msgs;
   2631 
   2632 	/* Override by tuneable */
   2633 	if (ixgbe_enable_msix == 0)
   2634 		goto msi;
   2635 
   2636 	/* First try MSI/X */
   2637 	rid = PCI_BAR(MSIX_82598_BAR);
   2638 	adapter->msix_mem = bus_alloc_resource_any(dev,
   2639 	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
   2640        	if (!adapter->msix_mem) {
   2641 		rid += 4;	/* 82599 maps in higher BAR */
   2642 		adapter->msix_mem = bus_alloc_resource_any(dev,
   2643 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
   2644 	}
   2645        	if (!adapter->msix_mem) {
   2646 		/* May not be enabled */
   2647 		device_printf(adapter->dev,
   2648 		    "Unable to map MSIX table \n");
   2649 		goto msi;
   2650 	}
   2651 
   2652 	msgs = pci_msix_count(dev);
   2653 	if (msgs == 0) { /* system has msix disabled */
   2654 		bus_release_resource(dev, SYS_RES_MEMORY,
   2655 		    rid, adapter->msix_mem);
   2656 		adapter->msix_mem = NULL;
   2657 		goto msi;
   2658 	}
   2659 
   2660 	/* Figure out a reasonable auto config value */
   2661 	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
   2662 
   2663 	if (ixgbe_num_queues != 0)
   2664 		queues = ixgbe_num_queues;
   2665 	/* Set max queues to 8 when autoconfiguring */
   2666 	else if ((ixgbe_num_queues == 0) && (queues > 8))
   2667 		queues = 8;
   2668 
   2669 	/*
   2670 	** Want one vector (RX/TX pair) per queue
   2671 	** plus an additional for Link.
   2672 	*/
   2673 	want = queues + 1;
   2674 	if (msgs >= want)
   2675 		msgs = want;
   2676 	else {
   2677                	device_printf(adapter->dev,
   2678 		    "MSIX Configuration Problem, "
   2679 		    "%d vectors but %d queues wanted!\n",
   2680 		    msgs, want);
   2681 		return (0); /* Will go to Legacy setup */
   2682 	}
   2683 	if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
   2684                	device_printf(adapter->dev,
   2685 		    "Using MSIX interrupts with %d vectors\n", msgs);
   2686 		adapter->num_queues = queues;
   2687 		return (msgs);
   2688 	}
   2689 msi:
   2690        	msgs = pci_msi_count(dev);
   2691        	if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
   2692                	device_printf(adapter->dev,"Using an MSI interrupt\n");
   2693 	else
   2694                	device_printf(adapter->dev,"Using a Legacy interrupt\n");
   2695 	return (msgs);
   2696 #endif
   2697 }
   2698 
   2699 
   2700 static int
   2701 ixgbe_allocate_pci_resources(struct adapter *adapter, const struct pci_attach_args *pa)
   2702 {
   2703 	pcireg_t	memtype;
   2704 	device_t        dev = adapter->dev;
   2705 	bus_addr_t addr;
   2706 	int flags;
   2707 
   2708 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, PCI_BAR(0));
   2709 	switch (memtype) {
   2710 	case PCI_MAPREG_TYPE_MEM | PCI_MAPREG_MEM_TYPE_32BIT:
   2711 	case PCI_MAPREG_TYPE_MEM | PCI_MAPREG_MEM_TYPE_64BIT:
   2712 		adapter->osdep.mem_bus_space_tag = pa->pa_memt;
   2713 		if (pci_mapreg_info(pa->pa_pc, pa->pa_tag, PCI_BAR(0),
   2714 	              memtype, &addr, &adapter->osdep.mem_size, &flags) != 0)
   2715 			goto map_err;
   2716 		if ((flags & BUS_SPACE_MAP_PREFETCHABLE) != 0) {
   2717 			aprint_normal_dev(dev, "clearing prefetchable bit\n");
   2718 			flags &= ~BUS_SPACE_MAP_PREFETCHABLE;
   2719 		}
   2720 		if (bus_space_map(adapter->osdep.mem_bus_space_tag, addr,
   2721 		     adapter->osdep.mem_size, flags,
   2722 		     &adapter->osdep.mem_bus_space_handle) != 0) {
   2723 map_err:
   2724 			adapter->osdep.mem_size = 0;
   2725 			aprint_error_dev(dev, "unable to map BAR0\n");
   2726 			return ENXIO;
   2727 		}
   2728 		break;
   2729 	default:
   2730 		aprint_error_dev(dev, "unexpected type on BAR0\n");
   2731 		return ENXIO;
   2732 	}
   2733 
   2734 	/* Legacy defaults */
   2735 	adapter->num_queues = 1;
   2736 	adapter->hw.back = &adapter->osdep;
   2737 
   2738 	/*
   2739 	** Now setup MSI or MSI/X, should
   2740 	** return us the number of supported
   2741 	** vectors. (Will be 1 for MSI)
   2742 	*/
   2743 	adapter->msix = ixgbe_setup_msix(adapter);
   2744 	return (0);
   2745 }
   2746 
   2747 static void
   2748 ixgbe_free_pci_resources(struct adapter * adapter)
   2749 {
   2750 #if defined(NETBSD_MSI_OR_MSIX)
   2751 	struct 		ix_queue *que = adapter->queues;
   2752 	device_t	dev = adapter->dev;
   2753 #endif
   2754 	int		rid;
   2755 
   2756 #if defined(NETBSD_MSI_OR_MSIX)
   2757 	int		 memrid;
   2758 	if (adapter->hw.mac.type == ixgbe_mac_82598EB)
   2759 		memrid = PCI_BAR(MSIX_82598_BAR);
   2760 	else
   2761 		memrid = PCI_BAR(MSIX_82599_BAR);
   2762 
   2763 	/*
   2764 	** There is a slight possibility of a failure mode
   2765 	** in attach that will result in entering this function
   2766 	** before interrupt resources have been initialized, and
   2767 	** in that case we do not want to execute the loops below
   2768 	** We can detect this reliably by the state of the adapter
   2769 	** res pointer.
   2770 	*/
   2771 	if (adapter->res == NULL)
   2772 		goto mem;
   2773 
   2774 	/*
   2775 	**  Release all msix queue resources:
   2776 	*/
   2777 	for (int i = 0; i < adapter->num_queues; i++, que++) {
   2778 		rid = que->msix + 1;
   2779 		if (que->tag != NULL) {
   2780 			bus_teardown_intr(dev, que->res, que->tag);
   2781 			que->tag = NULL;
   2782 		}
   2783 		if (que->res != NULL)
   2784 			bus_release_resource(dev, SYS_RES_IRQ, rid, que->res);
   2785 	}
   2786 #endif
   2787 
   2788 	/* Clean the Legacy or Link interrupt last */
   2789 	if (adapter->linkvec) /* we are doing MSIX */
   2790 		rid = adapter->linkvec + 1;
   2791 	else
   2792 		(adapter->msix != 0) ? (rid = 1):(rid = 0);
   2793 
   2794 	pci_intr_disestablish(adapter->osdep.pc, adapter->osdep.intr);
   2795 	adapter->osdep.intr = NULL;
   2796 
   2797 #if defined(NETBSD_MSI_OR_MSIX)
   2798 mem:
   2799 	if (adapter->msix)
   2800 		pci_release_msi(dev);
   2801 
   2802 	if (adapter->msix_mem != NULL)
   2803 		bus_release_resource(dev, SYS_RES_MEMORY,
   2804 		    memrid, adapter->msix_mem);
   2805 #endif
   2806 
   2807 	if (adapter->osdep.mem_size != 0) {
   2808 		bus_space_unmap(adapter->osdep.mem_bus_space_tag,
   2809 		    adapter->osdep.mem_bus_space_handle,
   2810 		    adapter->osdep.mem_size);
   2811 	}
   2812 
   2813 	return;
   2814 }
   2815 
   2816 /*********************************************************************
   2817  *
   2818  *  Setup networking device structure and register an interface.
   2819  *
   2820  **********************************************************************/
   2821 static int
   2822 ixgbe_setup_interface(device_t dev, struct adapter *adapter)
   2823 {
   2824 	struct ethercom *ec = &adapter->osdep.ec;
   2825 	struct ixgbe_hw *hw = &adapter->hw;
   2826 	struct ifnet   *ifp;
   2827 
   2828 	INIT_DEBUGOUT("ixgbe_setup_interface: begin");
   2829 
   2830 	ifp = adapter->ifp = &ec->ec_if;
   2831 	strlcpy(ifp->if_xname, device_xname(dev), IFNAMSIZ);
   2832 	ifp->if_baudrate = IF_Gbps(10);
   2833 	ifp->if_init = ixgbe_init;
   2834 	ifp->if_stop = ixgbe_ifstop;
   2835 	ifp->if_softc = adapter;
   2836 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
   2837 	ifp->if_ioctl = ixgbe_ioctl;
   2838 #ifndef IXGBE_LEGACY_TX
   2839 	ifp->if_transmit = ixgbe_mq_start;
   2840 	ifp->if_qflush = ixgbe_qflush;
   2841 #else
   2842 	ifp->if_start = ixgbe_start;
   2843 	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 2);
   2844 #endif
   2845 
   2846 	if_attach(ifp);
   2847 	ether_ifattach(ifp, adapter->hw.mac.addr);
   2848 	ether_set_ifflags_cb(ec, ixgbe_ifflags_cb);
   2849 
   2850 	adapter->max_frame_size =
   2851 	    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
   2852 
   2853 	/*
   2854 	 * Tell the upper layer(s) we support long frames.
   2855 	 */
   2856 	ifp->if_hdrlen = sizeof(struct ether_vlan_header);
   2857 
   2858 	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSOv4 | IFCAP_TSOv6;
   2859 	ifp->if_capenable = 0;
   2860 
   2861 	ec->ec_capabilities |= ETHERCAP_VLAN_HWCSUM;
   2862 	ec->ec_capabilities |= ETHERCAP_JUMBO_MTU;
   2863 	ifp->if_capabilities |= IFCAP_LRO;
   2864 	ec->ec_capabilities |= ETHERCAP_VLAN_HWTAGGING
   2865 	    		    | ETHERCAP_VLAN_MTU;
   2866 	ec->ec_capenable = ec->ec_capabilities;
   2867 
   2868 	/*
   2869 	** Don't turn this on by default, if vlans are
   2870 	** created on another pseudo device (eg. lagg)
   2871 	** then vlan events are not passed thru, breaking
   2872 	** operation, but with HW FILTER off it works. If
   2873 	** using vlans directly on the ixgbe driver you can
   2874 	** enable this and get full hardware tag filtering.
   2875 	*/
   2876 	ec->ec_capabilities |= ETHERCAP_VLAN_HWFILTER;
   2877 
   2878 	/*
   2879 	 * Specify the media types supported by this adapter and register
   2880 	 * callbacks to update media and link information
   2881 	 */
   2882 	ifmedia_init(&adapter->media, IFM_IMASK, ixgbe_media_change,
   2883 		     ixgbe_media_status);
   2884 	ifmedia_add(&adapter->media, IFM_ETHER | adapter->optics, 0, NULL);
   2885 	ifmedia_set(&adapter->media, IFM_ETHER | adapter->optics);
   2886 	if (hw->device_id == IXGBE_DEV_ID_82598AT) {
   2887 		ifmedia_add(&adapter->media,
   2888 		    IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
   2889 		ifmedia_add(&adapter->media,
   2890 		    IFM_ETHER | IFM_1000_T, 0, NULL);
   2891 	}
   2892 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
   2893 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
   2894 
   2895 	return (0);
   2896 }
   2897 
   2898 static void
   2899 ixgbe_config_link(struct adapter *adapter)
   2900 {
   2901 	struct ixgbe_hw *hw = &adapter->hw;
   2902 	u32	autoneg, err = 0;
   2903 	bool	sfp, negotiate;
   2904 
   2905 	sfp = ixgbe_is_sfp(hw);
   2906 
   2907 	if (sfp) {
   2908 		void *ip;
   2909 
   2910 		if (hw->phy.multispeed_fiber) {
   2911 			hw->mac.ops.setup_sfp(hw);
   2912 			ixgbe_enable_tx_laser(hw);
   2913 			ip = adapter->msf_si;
   2914 		} else {
   2915 			ip = adapter->mod_si;
   2916 		}
   2917 
   2918 		kpreempt_disable();
   2919 		softint_schedule(ip);
   2920 		kpreempt_enable();
   2921 	} else {
   2922 		if (hw->mac.ops.check_link)
   2923 			err = ixgbe_check_link(hw, &adapter->link_speed,
   2924 			    &adapter->link_up, FALSE);
   2925 		if (err)
   2926 			goto out;
   2927 		autoneg = hw->phy.autoneg_advertised;
   2928 		if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
   2929                 	err  = hw->mac.ops.get_link_capabilities(hw,
   2930 			    &autoneg, &negotiate);
   2931 		else
   2932 			negotiate = 0;
   2933 		if (err)
   2934 			goto out;
   2935 		if (hw->mac.ops.setup_link)
   2936                 	err = hw->mac.ops.setup_link(hw,
   2937 			    autoneg, adapter->link_up);
   2938 	}
   2939 out:
   2940 	return;
   2941 }
   2942 
   2943 /********************************************************************
   2944  * Manage DMA'able memory.
   2945  *******************************************************************/
   2946 
   2947 static int
   2948 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
   2949 		struct ixgbe_dma_alloc *dma, const int mapflags)
   2950 {
   2951 	device_t dev = adapter->dev;
   2952 	int             r, rsegs;
   2953 
   2954 	r = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
   2955 			       DBA_ALIGN, 0,	/* alignment, bounds */
   2956 			       size,	/* maxsize */
   2957 			       1,	/* nsegments */
   2958 			       size,	/* maxsegsize */
   2959 			       BUS_DMA_ALLOCNOW,	/* flags */
   2960 			       &dma->dma_tag);
   2961 	if (r != 0) {
   2962 		aprint_error_dev(dev,
   2963 		    "%s: ixgbe_dma_tag_create failed; error %d\n", __func__, r);
   2964 		goto fail_0;
   2965 	}
   2966 
   2967 	r = bus_dmamem_alloc(dma->dma_tag->dt_dmat,
   2968 		size,
   2969 		dma->dma_tag->dt_alignment,
   2970 		dma->dma_tag->dt_boundary,
   2971 		&dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
   2972 	if (r != 0) {
   2973 		aprint_error_dev(dev,
   2974 		    "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
   2975 		goto fail_1;
   2976 	}
   2977 
   2978 	r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
   2979 	    size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
   2980 	if (r != 0) {
   2981 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2982 		    __func__, r);
   2983 		goto fail_2;
   2984 	}
   2985 
   2986 	r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
   2987 	if (r != 0) {
   2988 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2989 		    __func__, r);
   2990 		goto fail_3;
   2991 	}
   2992 
   2993 	r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map, dma->dma_vaddr,
   2994 			    size,
   2995 			    NULL,
   2996 			    mapflags | BUS_DMA_NOWAIT);
   2997 	if (r != 0) {
   2998 		aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
   2999 		    __func__, r);
   3000 		goto fail_4;
   3001 	}
   3002 	dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
   3003 	dma->dma_size = size;
   3004 	return 0;
   3005 fail_4:
   3006 	ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
   3007 fail_3:
   3008 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
   3009 fail_2:
   3010 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
   3011 fail_1:
   3012 	ixgbe_dma_tag_destroy(dma->dma_tag);
   3013 fail_0:
   3014 	return r;
   3015 }
   3016 
   3017 static void
   3018 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
   3019 {
   3020 	bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
   3021 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   3022 	ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
   3023 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
   3024 	ixgbe_dma_tag_destroy(dma->dma_tag);
   3025 }
   3026 
   3027 
   3028 /*********************************************************************
   3029  *
   3030  *  Allocate memory for the transmit and receive rings, and then
   3031  *  the descriptors associated with each, called only once at attach.
   3032  *
   3033  **********************************************************************/
   3034 static int
   3035 ixgbe_allocate_queues(struct adapter *adapter)
   3036 {
   3037 	device_t	dev = adapter->dev;
   3038 	struct ix_queue	*que;
   3039 	struct tx_ring	*txr;
   3040 	struct rx_ring	*rxr;
   3041 	int rsize, tsize, error = IXGBE_SUCCESS;
   3042 	int txconf = 0, rxconf = 0;
   3043 
   3044         /* First allocate the top level queue structs */
   3045         if (!(adapter->queues =
   3046             (struct ix_queue *) malloc(sizeof(struct ix_queue) *
   3047             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   3048                 aprint_error_dev(dev, "Unable to allocate queue memory\n");
   3049                 error = ENOMEM;
   3050                 goto fail;
   3051         }
   3052 
   3053 	/* First allocate the TX ring struct memory */
   3054 	if (!(adapter->tx_rings =
   3055 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
   3056 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   3057 		aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
   3058 		error = ENOMEM;
   3059 		goto tx_fail;
   3060 	}
   3061 
   3062 	/* Next allocate the RX */
   3063 	if (!(adapter->rx_rings =
   3064 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
   3065 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   3066 		aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
   3067 		error = ENOMEM;
   3068 		goto rx_fail;
   3069 	}
   3070 
   3071 	/* For the ring itself */
   3072 	tsize = roundup2(adapter->num_tx_desc *
   3073 	    sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
   3074 
   3075 	/*
   3076 	 * Now set up the TX queues, txconf is needed to handle the
   3077 	 * possibility that things fail midcourse and we need to
   3078 	 * undo memory gracefully
   3079 	 */
   3080 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
   3081 		/* Set up some basics */
   3082 		txr = &adapter->tx_rings[i];
   3083 		txr->adapter = adapter;
   3084 		txr->me = i;
   3085 		txr->num_desc = adapter->num_tx_desc;
   3086 
   3087 		/* Initialize the TX side lock */
   3088 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
   3089 		    device_xname(dev), txr->me);
   3090 		mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
   3091 
   3092 		if (ixgbe_dma_malloc(adapter, tsize,
   3093 			&txr->txdma, BUS_DMA_NOWAIT)) {
   3094 			aprint_error_dev(dev,
   3095 			    "Unable to allocate TX Descriptor memory\n");
   3096 			error = ENOMEM;
   3097 			goto err_tx_desc;
   3098 		}
   3099 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
   3100 		bzero((void *)txr->tx_base, tsize);
   3101 
   3102         	/* Now allocate transmit buffers for the ring */
   3103         	if (ixgbe_allocate_transmit_buffers(txr)) {
   3104 			aprint_error_dev(dev,
   3105 			    "Critical Failure setting up transmit buffers\n");
   3106 			error = ENOMEM;
   3107 			goto err_tx_desc;
   3108         	}
   3109 #ifndef IXGBE_LEGACY_TX
   3110 		/* Allocate a buf ring */
   3111 		txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
   3112 		    M_WAITOK, &txr->tx_mtx);
   3113 		if (txr->br == NULL) {
   3114 			aprint_error_dev(dev,
   3115 			    "Critical Failure setting up buf ring\n");
   3116 			error = ENOMEM;
   3117 			goto err_tx_desc;
   3118         	}
   3119 #endif
   3120 	}
   3121 
   3122 	/*
   3123 	 * Next the RX queues...
   3124 	 */
   3125 	rsize = roundup2(adapter->num_rx_desc *
   3126 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   3127 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
   3128 		rxr = &adapter->rx_rings[i];
   3129 		/* Set up some basics */
   3130 		rxr->adapter = adapter;
   3131 		rxr->me = i;
   3132 		rxr->num_desc = adapter->num_rx_desc;
   3133 
   3134 		/* Initialize the RX side lock */
   3135 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
   3136 		    device_xname(dev), rxr->me);
   3137 		mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
   3138 
   3139 		if (ixgbe_dma_malloc(adapter, rsize,
   3140 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
   3141 			aprint_error_dev(dev,
   3142 			    "Unable to allocate RxDescriptor memory\n");
   3143 			error = ENOMEM;
   3144 			goto err_rx_desc;
   3145 		}
   3146 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
   3147 		bzero((void *)rxr->rx_base, rsize);
   3148 
   3149         	/* Allocate receive buffers for the ring*/
   3150 		if (ixgbe_allocate_receive_buffers(rxr)) {
   3151 			aprint_error_dev(dev,
   3152 			    "Critical Failure setting up receive buffers\n");
   3153 			error = ENOMEM;
   3154 			goto err_rx_desc;
   3155 		}
   3156 	}
   3157 
   3158 	/*
   3159 	** Finally set up the queue holding structs
   3160 	*/
   3161 	for (int i = 0; i < adapter->num_queues; i++) {
   3162 		que = &adapter->queues[i];
   3163 		que->adapter = adapter;
   3164 		que->txr = &adapter->tx_rings[i];
   3165 		que->rxr = &adapter->rx_rings[i];
   3166 	}
   3167 
   3168 	return (0);
   3169 
   3170 err_rx_desc:
   3171 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
   3172 		ixgbe_dma_free(adapter, &rxr->rxdma);
   3173 err_tx_desc:
   3174 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
   3175 		ixgbe_dma_free(adapter, &txr->txdma);
   3176 	free(adapter->rx_rings, M_DEVBUF);
   3177 rx_fail:
   3178 	free(adapter->tx_rings, M_DEVBUF);
   3179 tx_fail:
   3180 	free(adapter->queues, M_DEVBUF);
   3181 fail:
   3182 	return (error);
   3183 }
   3184 
   3185 /*********************************************************************
   3186  *
   3187  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
   3188  *  the information needed to transmit a packet on the wire. This is
   3189  *  called only once at attach, setup is done every reset.
   3190  *
   3191  **********************************************************************/
   3192 static int
   3193 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
   3194 {
   3195 	struct adapter *adapter = txr->adapter;
   3196 	device_t dev = adapter->dev;
   3197 	struct ixgbe_tx_buf *txbuf;
   3198 	int error, i;
   3199 
   3200 	/*
   3201 	 * Setup DMA descriptor areas.
   3202 	 */
   3203 	if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
   3204 			       1, 0,		/* alignment, bounds */
   3205 			       IXGBE_TSO_SIZE,		/* maxsize */
   3206 			       adapter->num_segs,	/* nsegments */
   3207 			       PAGE_SIZE,		/* maxsegsize */
   3208 			       0,			/* flags */
   3209 			       &txr->txtag))) {
   3210 		aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
   3211 		goto fail;
   3212 	}
   3213 
   3214 	if (!(txr->tx_buffers =
   3215 	    (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
   3216 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   3217 		aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
   3218 		error = ENOMEM;
   3219 		goto fail;
   3220 	}
   3221 
   3222         /* Create the descriptor buffer dma maps */
   3223 	txbuf = txr->tx_buffers;
   3224 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
   3225 		error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
   3226 		if (error != 0) {
   3227 			aprint_error_dev(dev,
   3228 			    "Unable to create TX DMA map (%d)\n", error);
   3229 			goto fail;
   3230 		}
   3231 	}
   3232 
   3233 	return 0;
   3234 fail:
   3235 	/* We free all, it handles case where we are in the middle */
   3236 	ixgbe_free_transmit_structures(adapter);
   3237 	return (error);
   3238 }
   3239 
   3240 /*********************************************************************
   3241  *
   3242  *  Initialize a transmit ring.
   3243  *
   3244  **********************************************************************/
   3245 static void
   3246 ixgbe_setup_transmit_ring(struct tx_ring *txr)
   3247 {
   3248 	struct adapter *adapter = txr->adapter;
   3249 	struct ixgbe_tx_buf *txbuf;
   3250 	int i;
   3251 #ifdef DEV_NETMAP
   3252 	struct netmap_adapter *na = NA(adapter->ifp);
   3253 	struct netmap_slot *slot;
   3254 #endif /* DEV_NETMAP */
   3255 
   3256 	/* Clear the old ring contents */
   3257 	IXGBE_TX_LOCK(txr);
   3258 #ifdef DEV_NETMAP
   3259 	/*
   3260 	 * (under lock): if in netmap mode, do some consistency
   3261 	 * checks and set slot to entry 0 of the netmap ring.
   3262 	 */
   3263 	slot = netmap_reset(na, NR_TX, txr->me, 0);
   3264 #endif /* DEV_NETMAP */
   3265 	bzero((void *)txr->tx_base,
   3266 	      (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
   3267 	/* Reset indices */
   3268 	txr->next_avail_desc = 0;
   3269 	txr->next_to_clean = 0;
   3270 
   3271 	/* Free any existing tx buffers. */
   3272         txbuf = txr->tx_buffers;
   3273 	for (i = 0; i < txr->num_desc; i++, txbuf++) {
   3274 		if (txbuf->m_head != NULL) {
   3275 			bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
   3276 			    0, txbuf->m_head->m_pkthdr.len,
   3277 			    BUS_DMASYNC_POSTWRITE);
   3278 			ixgbe_dmamap_unload(txr->txtag, txbuf->map);
   3279 			m_freem(txbuf->m_head);
   3280 			txbuf->m_head = NULL;
   3281 		}
   3282 #ifdef DEV_NETMAP
   3283 		/*
   3284 		 * In netmap mode, set the map for the packet buffer.
   3285 		 * NOTE: Some drivers (not this one) also need to set
   3286 		 * the physical buffer address in the NIC ring.
   3287 		 * Slots in the netmap ring (indexed by "si") are
   3288 		 * kring->nkr_hwofs positions "ahead" wrt the
   3289 		 * corresponding slot in the NIC ring. In some drivers
   3290 		 * (not here) nkr_hwofs can be negative. Function
   3291 		 * netmap_idx_n2k() handles wraparounds properly.
   3292 		 */
   3293 		if (slot) {
   3294 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
   3295 			netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
   3296 		}
   3297 #endif /* DEV_NETMAP */
   3298 		/* Clear the EOP descriptor pointer */
   3299 		txbuf->eop = NULL;
   3300         }
   3301 
   3302 #ifdef IXGBE_FDIR
   3303 	/* Set the rate at which we sample packets */
   3304 	if (adapter->hw.mac.type != ixgbe_mac_82598EB)
   3305 		txr->atr_sample = atr_sample_rate;
   3306 #endif
   3307 
   3308 	/* Set number of descriptors available */
   3309 	txr->tx_avail = adapter->num_tx_desc;
   3310 
   3311 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   3312 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   3313 	IXGBE_TX_UNLOCK(txr);
   3314 }
   3315 
   3316 /*********************************************************************
   3317  *
   3318  *  Initialize all transmit rings.
   3319  *
   3320  **********************************************************************/
   3321 static int
   3322 ixgbe_setup_transmit_structures(struct adapter *adapter)
   3323 {
   3324 	struct tx_ring *txr = adapter->tx_rings;
   3325 
   3326 	for (int i = 0; i < adapter->num_queues; i++, txr++)
   3327 		ixgbe_setup_transmit_ring(txr);
   3328 
   3329 	return (0);
   3330 }
   3331 
   3332 /*********************************************************************
   3333  *
   3334  *  Enable transmit unit.
   3335  *
   3336  **********************************************************************/
   3337 static void
   3338 ixgbe_initialize_transmit_units(struct adapter *adapter)
   3339 {
   3340 	struct tx_ring	*txr = adapter->tx_rings;
   3341 	struct ixgbe_hw	*hw = &adapter->hw;
   3342 
   3343 	/* Setup the Base and Length of the Tx Descriptor Ring */
   3344 
   3345 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
   3346 		u64	tdba = txr->txdma.dma_paddr;
   3347 		u32	txctrl;
   3348 
   3349 		IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i),
   3350 		       (tdba & 0x00000000ffffffffULL));
   3351 		IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (tdba >> 32));
   3352 		IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
   3353 		    adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc));
   3354 
   3355 		/* Setup the HW Tx Head and Tail descriptor pointers */
   3356 		IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
   3357 		IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
   3358 
   3359 		/* Setup Transmit Descriptor Cmd Settings */
   3360 		txr->txd_cmd = IXGBE_TXD_CMD_IFCS;
   3361 		txr->queue_status = IXGBE_QUEUE_IDLE;
   3362 
   3363 		/* Set the processing limit */
   3364 		txr->process_limit = ixgbe_tx_process_limit;
   3365 
   3366 		/* Disable Head Writeback */
   3367 		switch (hw->mac.type) {
   3368 		case ixgbe_mac_82598EB:
   3369 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
   3370 			break;
   3371 		case ixgbe_mac_82599EB:
   3372 		case ixgbe_mac_X540:
   3373 		default:
   3374 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
   3375 			break;
   3376                 }
   3377 		txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
   3378 		switch (hw->mac.type) {
   3379 		case ixgbe_mac_82598EB:
   3380 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
   3381 			break;
   3382 		case ixgbe_mac_82599EB:
   3383 		case ixgbe_mac_X540:
   3384 		default:
   3385 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
   3386 			break;
   3387 		}
   3388 
   3389 	}
   3390 
   3391 	if (hw->mac.type != ixgbe_mac_82598EB) {
   3392 		u32 dmatxctl, rttdcs;
   3393 		dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
   3394 		dmatxctl |= IXGBE_DMATXCTL_TE;
   3395 		IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
   3396 		/* Disable arbiter to set MTQC */
   3397 		rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
   3398 		rttdcs |= IXGBE_RTTDCS_ARBDIS;
   3399 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
   3400 		IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
   3401 		rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
   3402 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
   3403 	}
   3404 
   3405 	return;
   3406 }
   3407 
   3408 /*********************************************************************
   3409  *
   3410  *  Free all transmit rings.
   3411  *
   3412  **********************************************************************/
   3413 static void
   3414 ixgbe_free_transmit_structures(struct adapter *adapter)
   3415 {
   3416 	struct tx_ring *txr = adapter->tx_rings;
   3417 
   3418 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
   3419 		ixgbe_free_transmit_buffers(txr);
   3420 		ixgbe_dma_free(adapter, &txr->txdma);
   3421 		IXGBE_TX_LOCK_DESTROY(txr);
   3422 	}
   3423 	free(adapter->tx_rings, M_DEVBUF);
   3424 }
   3425 
   3426 /*********************************************************************
   3427  *
   3428  *  Free transmit ring related data structures.
   3429  *
   3430  **********************************************************************/
   3431 static void
   3432 ixgbe_free_transmit_buffers(struct tx_ring *txr)
   3433 {
   3434 	struct adapter *adapter = txr->adapter;
   3435 	struct ixgbe_tx_buf *tx_buffer;
   3436 	int             i;
   3437 
   3438 	INIT_DEBUGOUT("free_transmit_ring: begin");
   3439 
   3440 	if (txr->tx_buffers == NULL)
   3441 		return;
   3442 
   3443 	tx_buffer = txr->tx_buffers;
   3444 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
   3445 		if (tx_buffer->m_head != NULL) {
   3446 			bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
   3447 			    0, tx_buffer->m_head->m_pkthdr.len,
   3448 			    BUS_DMASYNC_POSTWRITE);
   3449 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
   3450 			m_freem(tx_buffer->m_head);
   3451 			tx_buffer->m_head = NULL;
   3452 			if (tx_buffer->map != NULL) {
   3453 				ixgbe_dmamap_destroy(txr->txtag,
   3454 				    tx_buffer->map);
   3455 				tx_buffer->map = NULL;
   3456 			}
   3457 		} else if (tx_buffer->map != NULL) {
   3458 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
   3459 			ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
   3460 			tx_buffer->map = NULL;
   3461 		}
   3462 	}
   3463 #ifndef IXGBE_LEGACY_TX
   3464 	if (txr->br != NULL)
   3465 		buf_ring_free(txr->br, M_DEVBUF);
   3466 #endif
   3467 	if (txr->tx_buffers != NULL) {
   3468 		free(txr->tx_buffers, M_DEVBUF);
   3469 		txr->tx_buffers = NULL;
   3470 	}
   3471 	if (txr->txtag != NULL) {
   3472 		ixgbe_dma_tag_destroy(txr->txtag);
   3473 		txr->txtag = NULL;
   3474 	}
   3475 	return;
   3476 }
   3477 
   3478 /*********************************************************************
   3479  *
   3480  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
   3481  *
   3482  **********************************************************************/
   3483 
   3484 static int
   3485 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
   3486     u32 *cmd_type_len, u32 *olinfo_status)
   3487 {
   3488 	struct m_tag *mtag;
   3489 	struct adapter *adapter = txr->adapter;
   3490 	struct ethercom *ec = &adapter->osdep.ec;
   3491 	struct ixgbe_adv_tx_context_desc *TXD;
   3492 	struct ether_vlan_header *eh;
   3493 	struct ip ip;
   3494 	struct ip6_hdr ip6;
   3495 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
   3496 	int	ehdrlen, ip_hlen = 0;
   3497 	u16	etype;
   3498 	u8	ipproto __diagused = 0;
   3499 	int	offload = TRUE;
   3500 	int	ctxd = txr->next_avail_desc;
   3501 	u16	vtag = 0;
   3502 
   3503 	/* First check if TSO is to be used */
   3504 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4|M_CSUM_TSOv6))
   3505 		return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
   3506 
   3507 	if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
   3508 		offload = FALSE;
   3509 
   3510 	/* Indicate the whole packet as payload when not doing TSO */
   3511        	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
   3512 
   3513 	/* Now ready a context descriptor */
   3514 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
   3515 
   3516 	/*
   3517 	** In advanced descriptors the vlan tag must
   3518 	** be placed into the context descriptor. Hence
   3519 	** we need to make one even if not doing offloads.
   3520 	*/
   3521 	if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
   3522 		vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
   3523 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
   3524 	} else if (offload == FALSE) /* ... no offload to do */
   3525 		return 0;
   3526 
   3527 	/*
   3528 	 * Determine where frame payload starts.
   3529 	 * Jump over vlan headers if already present,
   3530 	 * helpful for QinQ too.
   3531 	 */
   3532 	KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
   3533 	eh = mtod(mp, struct ether_vlan_header *);
   3534 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   3535 		KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
   3536 		etype = ntohs(eh->evl_proto);
   3537 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   3538 	} else {
   3539 		etype = ntohs(eh->evl_encap_proto);
   3540 		ehdrlen = ETHER_HDR_LEN;
   3541 	}
   3542 
   3543 	/* Set the ether header length */
   3544 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
   3545 
   3546 	switch (etype) {
   3547 	case ETHERTYPE_IP:
   3548 		m_copydata(mp, ehdrlen, sizeof(ip), &ip);
   3549 		ip_hlen = ip.ip_hl << 2;
   3550 		ipproto = ip.ip_p;
   3551 #if 0
   3552 		ip.ip_sum = 0;
   3553 		m_copyback(mp, ehdrlen, sizeof(ip), &ip);
   3554 #else
   3555 		KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
   3556 		    ip.ip_sum == 0);
   3557 #endif
   3558 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
   3559 		break;
   3560 	case ETHERTYPE_IPV6:
   3561 		m_copydata(mp, ehdrlen, sizeof(ip6), &ip6);
   3562 		ip_hlen = sizeof(ip6);
   3563 		/* XXX-BZ this will go badly in case of ext hdrs. */
   3564 		ipproto = ip6.ip6_nxt;
   3565 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
   3566 		break;
   3567 	default:
   3568 		break;
   3569 	}
   3570 
   3571 	if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
   3572 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
   3573 
   3574 	vlan_macip_lens |= ip_hlen;
   3575 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
   3576 
   3577 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_TCPv6)) {
   3578 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
   3579 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   3580 		KASSERT(ipproto == IPPROTO_TCP);
   3581 	} else if (mp->m_pkthdr.csum_flags & (M_CSUM_UDPv4|M_CSUM_UDPv6)) {
   3582 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
   3583 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   3584 		KASSERT(ipproto == IPPROTO_UDP);
   3585 	}
   3586 
   3587 	/* Now copy bits into descriptor */
   3588 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
   3589 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
   3590 	TXD->seqnum_seed = htole32(0);
   3591 	TXD->mss_l4len_idx = htole32(0);
   3592 
   3593 	/* We've consumed the first desc, adjust counters */
   3594 	if (++ctxd == txr->num_desc)
   3595 		ctxd = 0;
   3596 	txr->next_avail_desc = ctxd;
   3597 	--txr->tx_avail;
   3598 
   3599         return 0;
   3600 }
   3601 
   3602 /**********************************************************************
   3603  *
   3604  *  Setup work for hardware segmentation offload (TSO) on
   3605  *  adapters using advanced tx descriptors
   3606  *
   3607  **********************************************************************/
   3608 static int
   3609 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
   3610     u32 *cmd_type_len, u32 *olinfo_status)
   3611 {
   3612 	struct m_tag *mtag;
   3613 	struct adapter *adapter = txr->adapter;
   3614 	struct ethercom *ec = &adapter->osdep.ec;
   3615 	struct ixgbe_adv_tx_context_desc *TXD;
   3616 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
   3617 	u32 mss_l4len_idx = 0, paylen;
   3618 	u16 vtag = 0, eh_type;
   3619 	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
   3620 	struct ether_vlan_header *eh;
   3621 #ifdef INET6
   3622 	struct ip6_hdr *ip6;
   3623 #endif
   3624 #ifdef INET
   3625 	struct ip *ip;
   3626 #endif
   3627 	struct tcphdr *th;
   3628 
   3629 
   3630 	/*
   3631 	 * Determine where frame payload starts.
   3632 	 * Jump over vlan headers if already present
   3633 	 */
   3634 	eh = mtod(mp, struct ether_vlan_header *);
   3635 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   3636 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   3637 		eh_type = eh->evl_proto;
   3638 	} else {
   3639 		ehdrlen = ETHER_HDR_LEN;
   3640 		eh_type = eh->evl_encap_proto;
   3641 	}
   3642 
   3643 	switch (ntohs(eh_type)) {
   3644 #ifdef INET6
   3645 	case ETHERTYPE_IPV6:
   3646 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   3647 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
   3648 		if (ip6->ip6_nxt != IPPROTO_TCP)
   3649 			return (ENXIO);
   3650 		ip_hlen = sizeof(struct ip6_hdr);
   3651 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   3652 		th = (struct tcphdr *)((char *)ip6 + ip_hlen);
   3653 		th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
   3654 		    &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
   3655 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
   3656 		break;
   3657 #endif
   3658 #ifdef INET
   3659 	case ETHERTYPE_IP:
   3660 		ip = (struct ip *)(mp->m_data + ehdrlen);
   3661 		if (ip->ip_p != IPPROTO_TCP)
   3662 			return (ENXIO);
   3663 		ip->ip_sum = 0;
   3664 		ip_hlen = ip->ip_hl << 2;
   3665 		th = (struct tcphdr *)((char *)ip + ip_hlen);
   3666 		th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
   3667 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
   3668 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
   3669 		/* Tell transmit desc to also do IPv4 checksum. */
   3670 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
   3671 		break;
   3672 #endif
   3673 	default:
   3674 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
   3675 		    __func__, ntohs(eh_type));
   3676 		break;
   3677 	}
   3678 
   3679 	ctxd = txr->next_avail_desc;
   3680 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
   3681 
   3682 	tcp_hlen = th->th_off << 2;
   3683 
   3684 	/* This is used in the transmit desc in encap */
   3685 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
   3686 
   3687 	/* VLAN MACLEN IPLEN */
   3688 	if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
   3689 		vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
   3690                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
   3691 	}
   3692 
   3693 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
   3694 	vlan_macip_lens |= ip_hlen;
   3695 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
   3696 
   3697 	/* ADV DTYPE TUCMD */
   3698 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
   3699 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
   3700 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
   3701 
   3702 	/* MSS L4LEN IDX */
   3703 	mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
   3704 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
   3705 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
   3706 
   3707 	TXD->seqnum_seed = htole32(0);
   3708 
   3709 	if (++ctxd == txr->num_desc)
   3710 		ctxd = 0;
   3711 
   3712 	txr->tx_avail--;
   3713 	txr->next_avail_desc = ctxd;
   3714 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
   3715 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   3716 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
   3717 	++txr->tso_tx.ev_count;
   3718 	return (0);
   3719 }
   3720 
   3721 #ifdef IXGBE_FDIR
   3722 /*
   3723 ** This routine parses packet headers so that Flow
   3724 ** Director can make a hashed filter table entry
   3725 ** allowing traffic flows to be identified and kept
   3726 ** on the same cpu.  This would be a performance
   3727 ** hit, but we only do it at IXGBE_FDIR_RATE of
   3728 ** packets.
   3729 */
   3730 static void
   3731 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
   3732 {
   3733 	struct adapter			*adapter = txr->adapter;
   3734 	struct ix_queue			*que;
   3735 	struct ip			*ip;
   3736 	struct tcphdr			*th;
   3737 	struct udphdr			*uh;
   3738 	struct ether_vlan_header	*eh;
   3739 	union ixgbe_atr_hash_dword	input = {.dword = 0};
   3740 	union ixgbe_atr_hash_dword	common = {.dword = 0};
   3741 	int  				ehdrlen, ip_hlen;
   3742 	u16				etype;
   3743 
   3744 	eh = mtod(mp, struct ether_vlan_header *);
   3745 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   3746 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   3747 		etype = eh->evl_proto;
   3748 	} else {
   3749 		ehdrlen = ETHER_HDR_LEN;
   3750 		etype = eh->evl_encap_proto;
   3751 	}
   3752 
   3753 	/* Only handling IPv4 */
   3754 	if (etype != htons(ETHERTYPE_IP))
   3755 		return;
   3756 
   3757 	ip = (struct ip *)(mp->m_data + ehdrlen);
   3758 	ip_hlen = ip->ip_hl << 2;
   3759 
   3760 	/* check if we're UDP or TCP */
   3761 	switch (ip->ip_p) {
   3762 	case IPPROTO_TCP:
   3763 		th = (struct tcphdr *)((char *)ip + ip_hlen);
   3764 		/* src and dst are inverted */
   3765 		common.port.dst ^= th->th_sport;
   3766 		common.port.src ^= th->th_dport;
   3767 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
   3768 		break;
   3769 	case IPPROTO_UDP:
   3770 		uh = (struct udphdr *)((char *)ip + ip_hlen);
   3771 		/* src and dst are inverted */
   3772 		common.port.dst ^= uh->uh_sport;
   3773 		common.port.src ^= uh->uh_dport;
   3774 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
   3775 		break;
   3776 	default:
   3777 		return;
   3778 	}
   3779 
   3780 	input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
   3781 	if (mp->m_pkthdr.ether_vtag)
   3782 		common.flex_bytes ^= htons(ETHERTYPE_VLAN);
   3783 	else
   3784 		common.flex_bytes ^= etype;
   3785 	common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
   3786 
   3787 	que = &adapter->queues[txr->me];
   3788 	/*
   3789 	** This assumes the Rx queue and Tx
   3790 	** queue are bound to the same CPU
   3791 	*/
   3792 	ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
   3793 	    input, common, que->msix);
   3794 }
   3795 #endif /* IXGBE_FDIR */
   3796 
   3797 /**********************************************************************
   3798  *
   3799  *  Examine each tx_buffer in the used queue. If the hardware is done
   3800  *  processing the packet then free associated resources. The
   3801  *  tx_buffer is put back on the free queue.
   3802  *
   3803  **********************************************************************/
   3804 static bool
   3805 ixgbe_txeof(struct tx_ring *txr)
   3806 {
   3807 	struct adapter		*adapter = txr->adapter;
   3808 	struct ifnet		*ifp = adapter->ifp;
   3809 	u32			work, processed = 0;
   3810 	u16			limit = txr->process_limit;
   3811 	struct ixgbe_tx_buf	*buf;
   3812 	union ixgbe_adv_tx_desc *txd;
   3813 	struct timeval now, elapsed;
   3814 
   3815 	KASSERT(mutex_owned(&txr->tx_mtx));
   3816 
   3817 #ifdef DEV_NETMAP
   3818 	if (ifp->if_capenable & IFCAP_NETMAP) {
   3819 		struct netmap_adapter *na = NA(ifp);
   3820 		struct netmap_kring *kring = &na->tx_rings[txr->me];
   3821 		txd = txr->tx_base;
   3822 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   3823 		    BUS_DMASYNC_POSTREAD);
   3824 		/*
   3825 		 * In netmap mode, all the work is done in the context
   3826 		 * of the client thread. Interrupt handlers only wake up
   3827 		 * clients, which may be sleeping on individual rings
   3828 		 * or on a global resource for all rings.
   3829 		 * To implement tx interrupt mitigation, we wake up the client
   3830 		 * thread roughly every half ring, even if the NIC interrupts
   3831 		 * more frequently. This is implemented as follows:
   3832 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
   3833 		 *   the slot that should wake up the thread (nkr_num_slots
   3834 		 *   means the user thread should not be woken up);
   3835 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
   3836 		 *   or the slot has the DD bit set.
   3837 		 *
   3838 		 * When the driver has separate locks, we need to
   3839 		 * release and re-acquire txlock to avoid deadlocks.
   3840 		 * XXX see if we can find a better way.
   3841 		 */
   3842 		if (!netmap_mitigate ||
   3843 		    (kring->nr_kflags < kring->nkr_num_slots &&
   3844 		    txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
   3845 			netmap_tx_irq(ifp, txr->me |
   3846 			    (NETMAP_LOCKED_ENTER|NETMAP_LOCKED_EXIT));
   3847 		}
   3848 		return FALSE;
   3849 	}
   3850 #endif /* DEV_NETMAP */
   3851 
   3852 	if (txr->tx_avail == txr->num_desc) {
   3853 		txr->queue_status = IXGBE_QUEUE_IDLE;
   3854 		return false;
   3855 	}
   3856 
   3857 	/* Get work starting point */
   3858 	work = txr->next_to_clean;
   3859 	buf = &txr->tx_buffers[work];
   3860 	txd = &txr->tx_base[work];
   3861 	work -= txr->num_desc; /* The distance to ring end */
   3862         ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   3863 	    BUS_DMASYNC_POSTREAD);
   3864 	do {
   3865 		union ixgbe_adv_tx_desc *eop= buf->eop;
   3866 		if (eop == NULL) /* No work */
   3867 			break;
   3868 
   3869 		if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
   3870 			break;	/* I/O not complete */
   3871 
   3872 		if (buf->m_head) {
   3873 			txr->bytes +=
   3874 			    buf->m_head->m_pkthdr.len;
   3875 			bus_dmamap_sync(txr->txtag->dt_dmat,
   3876 			    buf->map,
   3877 			    0, buf->m_head->m_pkthdr.len,
   3878 			    BUS_DMASYNC_POSTWRITE);
   3879 			ixgbe_dmamap_unload(txr->txtag,
   3880 			    buf->map);
   3881 			m_freem(buf->m_head);
   3882 			buf->m_head = NULL;
   3883 			/*
   3884 			 * NetBSD: Don't override buf->map with NULL here.
   3885 			 * It'll panic when a ring runs one lap around.
   3886 			 */
   3887 		}
   3888 		buf->eop = NULL;
   3889 		++txr->tx_avail;
   3890 
   3891 		/* We clean the range if multi segment */
   3892 		while (txd != eop) {
   3893 			++txd;
   3894 			++buf;
   3895 			++work;
   3896 			/* wrap the ring? */
   3897 			if (__predict_false(!work)) {
   3898 				work -= txr->num_desc;
   3899 				buf = txr->tx_buffers;
   3900 				txd = txr->tx_base;
   3901 			}
   3902 			if (buf->m_head) {
   3903 				txr->bytes +=
   3904 				    buf->m_head->m_pkthdr.len;
   3905 				bus_dmamap_sync(txr->txtag->dt_dmat,
   3906 				    buf->map,
   3907 				    0, buf->m_head->m_pkthdr.len,
   3908 				    BUS_DMASYNC_POSTWRITE);
   3909 				ixgbe_dmamap_unload(txr->txtag,
   3910 				    buf->map);
   3911 				m_freem(buf->m_head);
   3912 				buf->m_head = NULL;
   3913 				/*
   3914 				 * NetBSD: Don't override buf->map with NULL
   3915 				 * here. It'll panic when a ring runs one lap
   3916 				 * around.
   3917 				 */
   3918 			}
   3919 			++txr->tx_avail;
   3920 			buf->eop = NULL;
   3921 
   3922 		}
   3923 		++txr->packets;
   3924 		++processed;
   3925 		++ifp->if_opackets;
   3926 		getmicrotime(&txr->watchdog_time);
   3927 
   3928 		/* Try the next packet */
   3929 		++txd;
   3930 		++buf;
   3931 		++work;
   3932 		/* reset with a wrap */
   3933 		if (__predict_false(!work)) {
   3934 			work -= txr->num_desc;
   3935 			buf = txr->tx_buffers;
   3936 			txd = txr->tx_base;
   3937 		}
   3938 		prefetch(txd);
   3939 	} while (__predict_true(--limit));
   3940 
   3941 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   3942 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   3943 
   3944 	work += txr->num_desc;
   3945 	txr->next_to_clean = work;
   3946 
   3947 	/*
   3948 	** Watchdog calculation, we know there's
   3949 	** work outstanding or the first return
   3950 	** would have been taken, so none processed
   3951 	** for too long indicates a hang.
   3952 	*/
   3953 	getmicrotime(&now);
   3954 	timersub(&now, &txr->watchdog_time, &elapsed);
   3955 	if (!processed && tvtohz(&elapsed) > IXGBE_WATCHDOG)
   3956 		txr->queue_status = IXGBE_QUEUE_HUNG;
   3957 
   3958 	if (txr->tx_avail == txr->num_desc) {
   3959 		txr->queue_status = IXGBE_QUEUE_IDLE;
   3960 		return false;
   3961 	}
   3962 
   3963 	return true;
   3964 }
   3965 
   3966 /*********************************************************************
   3967  *
   3968  *  Refresh mbuf buffers for RX descriptor rings
   3969  *   - now keeps its own state so discards due to resource
   3970  *     exhaustion are unnecessary, if an mbuf cannot be obtained
   3971  *     it just returns, keeping its placeholder, thus it can simply
   3972  *     be recalled to try again.
   3973  *
   3974  **********************************************************************/
   3975 static void
   3976 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
   3977 {
   3978 	struct adapter		*adapter = rxr->adapter;
   3979 	struct ixgbe_rx_buf	*rxbuf;
   3980 	struct mbuf		*mp;
   3981 	int			i, j, error;
   3982 	bool			refreshed = false;
   3983 
   3984 	i = j = rxr->next_to_refresh;
   3985 	/* Control the loop with one beyond */
   3986 	if (++j == rxr->num_desc)
   3987 		j = 0;
   3988 
   3989 	while (j != limit) {
   3990 		rxbuf = &rxr->rx_buffers[i];
   3991 		if (rxbuf->buf == NULL) {
   3992 			mp = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
   3993 			    MT_DATA, M_PKTHDR, rxr->mbuf_sz);
   3994 			if (mp == NULL) {
   3995 				rxr->no_jmbuf.ev_count++;
   3996 				goto update;
   3997 			}
   3998 			if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
   3999 				m_adj(mp, ETHER_ALIGN);
   4000 		} else
   4001 			mp = rxbuf->buf;
   4002 
   4003 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   4004 		/* If we're dealing with an mbuf that was copied rather
   4005 		 * than replaced, there's no need to go through busdma.
   4006 		 */
   4007 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
   4008 			/* Get the memory mapping */
   4009 			error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   4010 			    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   4011 			if (error != 0) {
   4012 				printf("Refresh mbufs: payload dmamap load"
   4013 				    " failure - %d\n", error);
   4014 				m_free(mp);
   4015 				rxbuf->buf = NULL;
   4016 				goto update;
   4017 			}
   4018 			rxbuf->buf = mp;
   4019 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   4020 			    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
   4021 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
   4022 			    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   4023 		} else {
   4024 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
   4025 			rxbuf->flags &= ~IXGBE_RX_COPY;
   4026 		}
   4027 
   4028 		refreshed = true;
   4029 		/* Next is precalculated */
   4030 		i = j;
   4031 		rxr->next_to_refresh = i;
   4032 		if (++j == rxr->num_desc)
   4033 			j = 0;
   4034 	}
   4035 update:
   4036 	if (refreshed) /* Update hardware tail index */
   4037 		IXGBE_WRITE_REG(&adapter->hw,
   4038 		    IXGBE_RDT(rxr->me), rxr->next_to_refresh);
   4039 	return;
   4040 }
   4041 
   4042 /*********************************************************************
   4043  *
   4044  *  Allocate memory for rx_buffer structures. Since we use one
   4045  *  rx_buffer per received packet, the maximum number of rx_buffer's
   4046  *  that we'll need is equal to the number of receive descriptors
   4047  *  that we've allocated.
   4048  *
   4049  **********************************************************************/
   4050 static int
   4051 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
   4052 {
   4053 	struct	adapter 	*adapter = rxr->adapter;
   4054 	device_t 		dev = adapter->dev;
   4055 	struct ixgbe_rx_buf 	*rxbuf;
   4056 	int             	i, bsize, error;
   4057 
   4058 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
   4059 	if (!(rxr->rx_buffers =
   4060 	    (struct ixgbe_rx_buf *) malloc(bsize,
   4061 	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
   4062 		aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
   4063 		error = ENOMEM;
   4064 		goto fail;
   4065 	}
   4066 
   4067 	if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
   4068 				   1, 0,	/* alignment, bounds */
   4069 				   MJUM16BYTES,		/* maxsize */
   4070 				   1,			/* nsegments */
   4071 				   MJUM16BYTES,		/* maxsegsize */
   4072 				   0,			/* flags */
   4073 				   &rxr->ptag))) {
   4074 		aprint_error_dev(dev, "Unable to create RX DMA tag\n");
   4075 		goto fail;
   4076 	}
   4077 
   4078 	for (i = 0; i < rxr->num_desc; i++, rxbuf++) {
   4079 		rxbuf = &rxr->rx_buffers[i];
   4080 		error = ixgbe_dmamap_create(rxr->ptag,
   4081 		    BUS_DMA_NOWAIT, &rxbuf->pmap);
   4082 		if (error) {
   4083 			aprint_error_dev(dev, "Unable to create RX dma map\n");
   4084 			goto fail;
   4085 		}
   4086 	}
   4087 
   4088 	return (0);
   4089 
   4090 fail:
   4091 	/* Frees all, but can handle partial completion */
   4092 	ixgbe_free_receive_structures(adapter);
   4093 	return (error);
   4094 }
   4095 
   4096 /*
   4097 ** Used to detect a descriptor that has
   4098 ** been merged by Hardware RSC.
   4099 */
   4100 static inline u32
   4101 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
   4102 {
   4103 	return (le32toh(rx->wb.lower.lo_dword.data) &
   4104 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
   4105 }
   4106 
   4107 /*********************************************************************
   4108  *
   4109  *  Initialize Hardware RSC (LRO) feature on 82599
   4110  *  for an RX ring, this is toggled by the LRO capability
   4111  *  even though it is transparent to the stack.
   4112  *
   4113  *  NOTE: since this HW feature only works with IPV4 and
   4114  *        our testing has shown soft LRO to be as effective
   4115  *        I have decided to disable this by default.
   4116  *
   4117  **********************************************************************/
   4118 static void
   4119 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
   4120 {
   4121 	struct	adapter 	*adapter = rxr->adapter;
   4122 	struct	ixgbe_hw	*hw = &adapter->hw;
   4123 	u32			rscctrl, rdrxctl;
   4124 
   4125 	/* If turning LRO/RSC off we need to disable it */
   4126 	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
   4127 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   4128 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
   4129 		return;
   4130 	}
   4131 
   4132 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
   4133 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
   4134 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
   4135 	if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
   4136 #endif /* DEV_NETMAP */
   4137 	rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
   4138 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
   4139 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
   4140 
   4141 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   4142 	rscctrl |= IXGBE_RSCCTL_RSCEN;
   4143 	/*
   4144 	** Limit the total number of descriptors that
   4145 	** can be combined, so it does not exceed 64K
   4146 	*/
   4147 	if (rxr->mbuf_sz == MCLBYTES)
   4148 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
   4149 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
   4150 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
   4151 	else if (rxr->mbuf_sz == MJUM9BYTES)
   4152 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
   4153 	else  /* Using 16K cluster */
   4154 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
   4155 
   4156 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
   4157 
   4158 	/* Enable TCP header recognition */
   4159 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
   4160 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
   4161 	    IXGBE_PSRTYPE_TCPHDR));
   4162 
   4163 	/* Disable RSC for ACK packets */
   4164 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
   4165 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
   4166 
   4167 	rxr->hw_rsc = TRUE;
   4168 }
   4169 
   4170 
   4171 static void
   4172 ixgbe_free_receive_ring(struct rx_ring *rxr)
   4173 {
   4174 	struct ixgbe_rx_buf       *rxbuf;
   4175 	int i;
   4176 
   4177 	for (i = 0; i < rxr->num_desc; i++) {
   4178 		rxbuf = &rxr->rx_buffers[i];
   4179 		if (rxbuf->buf != NULL) {
   4180 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   4181 			    0, rxbuf->buf->m_pkthdr.len,
   4182 			    BUS_DMASYNC_POSTREAD);
   4183 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   4184 			rxbuf->buf->m_flags |= M_PKTHDR;
   4185 			m_freem(rxbuf->buf);
   4186 			rxbuf->buf = NULL;
   4187 		}
   4188 	}
   4189 }
   4190 
   4191 
   4192 /*********************************************************************
   4193  *
   4194  *  Initialize a receive ring and its buffers.
   4195  *
   4196  **********************************************************************/
   4197 static int
   4198 ixgbe_setup_receive_ring(struct rx_ring *rxr)
   4199 {
   4200 	struct	adapter 	*adapter;
   4201 	struct ixgbe_rx_buf	*rxbuf;
   4202 #ifdef LRO
   4203 	struct ifnet		*ifp;
   4204 	struct lro_ctrl		*lro = &rxr->lro;
   4205 #endif /* LRO */
   4206 	int			rsize, error = 0;
   4207 #ifdef DEV_NETMAP
   4208 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
   4209 	struct netmap_slot *slot;
   4210 #endif /* DEV_NETMAP */
   4211 
   4212 	adapter = rxr->adapter;
   4213 #ifdef LRO
   4214 	ifp = adapter->ifp;
   4215 #endif /* LRO */
   4216 
   4217 	/* Clear the ring contents */
   4218 	IXGBE_RX_LOCK(rxr);
   4219 #ifdef DEV_NETMAP
   4220 	/* same as in ixgbe_setup_transmit_ring() */
   4221 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
   4222 #endif /* DEV_NETMAP */
   4223 	rsize = roundup2(adapter->num_rx_desc *
   4224 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   4225 	bzero((void *)rxr->rx_base, rsize);
   4226 	/* Cache the size */
   4227 	rxr->mbuf_sz = adapter->rx_mbuf_sz;
   4228 
   4229 	/* Free current RX buffer structs and their mbufs */
   4230 	ixgbe_free_receive_ring(rxr);
   4231 
   4232 	IXGBE_RX_UNLOCK(rxr);
   4233 
   4234 	/* Now reinitialize our supply of jumbo mbufs.  The number
   4235 	 * or size of jumbo mbufs may have changed.
   4236 	 */
   4237 	ixgbe_jcl_reinit(&adapter->jcl_head, rxr->ptag->dt_dmat,
   4238 	    2 * adapter->num_rx_desc, adapter->rx_mbuf_sz);
   4239 
   4240 	IXGBE_RX_LOCK(rxr);
   4241 
   4242 	/* Now replenish the mbufs */
   4243 	for (int j = 0; j != rxr->num_desc; ++j) {
   4244 		struct mbuf	*mp;
   4245 
   4246 		rxbuf = &rxr->rx_buffers[j];
   4247 #ifdef DEV_NETMAP
   4248 		/*
   4249 		 * In netmap mode, fill the map and set the buffer
   4250 		 * address in the NIC ring, considering the offset
   4251 		 * between the netmap and NIC rings (see comment in
   4252 		 * ixgbe_setup_transmit_ring() ). No need to allocate
   4253 		 * an mbuf, so end the block with a continue;
   4254 		 */
   4255 		if (slot) {
   4256 			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
   4257 			uint64_t paddr;
   4258 			void *addr;
   4259 
   4260 			addr = PNMB(slot + sj, &paddr);
   4261 			netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
   4262 			/* Update descriptor */
   4263 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
   4264 			continue;
   4265 		}
   4266 #endif /* DEV_NETMAP */
   4267 		rxbuf->buf = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
   4268 		    MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
   4269 		if (rxbuf->buf == NULL) {
   4270 			error = ENOBUFS;
   4271                         goto fail;
   4272 		}
   4273 		mp = rxbuf->buf;
   4274 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   4275 		/* Get the memory mapping */
   4276 		error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   4277 		    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   4278 		if (error != 0)
   4279                         goto fail;
   4280 		bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   4281 		    0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
   4282 		/* Update descriptor */
   4283 		rxr->rx_base[j].read.pkt_addr =
   4284 		    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   4285 	}
   4286 
   4287 
   4288 	/* Setup our descriptor indices */
   4289 	rxr->next_to_check = 0;
   4290 	rxr->next_to_refresh = 0;
   4291 	rxr->lro_enabled = FALSE;
   4292 	rxr->rx_copies.ev_count = 0;
   4293 	rxr->rx_bytes.ev_count = 0;
   4294 	rxr->discard = FALSE;
   4295 	rxr->vtag_strip = FALSE;
   4296 
   4297 	ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   4298 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   4299 
   4300 	/*
   4301 	** Now set up the LRO interface:
   4302 	*/
   4303 	if (ixgbe_rsc_enable)
   4304 		ixgbe_setup_hw_rsc(rxr);
   4305 #ifdef LRO
   4306 	else if (ifp->if_capenable & IFCAP_LRO) {
   4307 		device_t dev = adapter->dev;
   4308 		int err = tcp_lro_init(lro);
   4309 		if (err) {
   4310 			device_printf(dev, "LRO Initialization failed!\n");
   4311 			goto fail;
   4312 		}
   4313 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
   4314 		rxr->lro_enabled = TRUE;
   4315 		lro->ifp = adapter->ifp;
   4316 	}
   4317 #endif /* LRO */
   4318 
   4319 	IXGBE_RX_UNLOCK(rxr);
   4320 	return (0);
   4321 
   4322 fail:
   4323 	ixgbe_free_receive_ring(rxr);
   4324 	IXGBE_RX_UNLOCK(rxr);
   4325 	return (error);
   4326 }
   4327 
   4328 /*********************************************************************
   4329  *
   4330  *  Initialize all receive rings.
   4331  *
   4332  **********************************************************************/
   4333 static int
   4334 ixgbe_setup_receive_structures(struct adapter *adapter)
   4335 {
   4336 	struct rx_ring *rxr = adapter->rx_rings;
   4337 	int j;
   4338 
   4339 	for (j = 0; j < adapter->num_queues; j++, rxr++)
   4340 		if (ixgbe_setup_receive_ring(rxr))
   4341 			goto fail;
   4342 
   4343 	return (0);
   4344 fail:
   4345 	/*
   4346 	 * Free RX buffers allocated so far, we will only handle
   4347 	 * the rings that completed, the failing case will have
   4348 	 * cleaned up for itself. 'j' failed, so its the terminus.
   4349 	 */
   4350 	for (int i = 0; i < j; ++i) {
   4351 		rxr = &adapter->rx_rings[i];
   4352 		ixgbe_free_receive_ring(rxr);
   4353 	}
   4354 
   4355 	return (ENOBUFS);
   4356 }
   4357 
   4358 /*********************************************************************
   4359  *
   4360  *  Setup receive registers and features.
   4361  *
   4362  **********************************************************************/
   4363 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
   4364 
   4365 #define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
   4366 
   4367 static void
   4368 ixgbe_initialize_receive_units(struct adapter *adapter)
   4369 {
   4370 	int i;
   4371 	struct	rx_ring	*rxr = adapter->rx_rings;
   4372 	struct ixgbe_hw	*hw = &adapter->hw;
   4373 	struct ifnet   *ifp = adapter->ifp;
   4374 	u32		bufsz, rxctrl, fctrl, srrctl, rxcsum;
   4375 	u32		reta, mrqc = 0, hlreg, r[10];
   4376 
   4377 
   4378 	/*
   4379 	 * Make sure receives are disabled while
   4380 	 * setting up the descriptor ring
   4381 	 */
   4382 	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
   4383 	IXGBE_WRITE_REG(hw, IXGBE_RXCTRL,
   4384 	    rxctrl & ~IXGBE_RXCTRL_RXEN);
   4385 
   4386 	/* Enable broadcasts */
   4387 	fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
   4388 	fctrl |= IXGBE_FCTRL_BAM;
   4389 	fctrl |= IXGBE_FCTRL_DPF;
   4390 	fctrl |= IXGBE_FCTRL_PMCF;
   4391 	IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
   4392 
   4393 	/* Set for Jumbo Frames? */
   4394 	hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
   4395 	if (ifp->if_mtu > ETHERMTU)
   4396 		hlreg |= IXGBE_HLREG0_JUMBOEN;
   4397 	else
   4398 		hlreg &= ~IXGBE_HLREG0_JUMBOEN;
   4399 #ifdef DEV_NETMAP
   4400 	/* crcstrip is conditional in netmap (in RDRXCTL too ?) */
   4401 	if (ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
   4402 		hlreg &= ~IXGBE_HLREG0_RXCRCSTRP;
   4403 	else
   4404 		hlreg |= IXGBE_HLREG0_RXCRCSTRP;
   4405 #endif /* DEV_NETMAP */
   4406 	IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
   4407 
   4408 	bufsz = (adapter->rx_mbuf_sz +
   4409 	    BSIZEPKT_ROUNDUP) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
   4410 
   4411 	for (i = 0; i < adapter->num_queues; i++, rxr++) {
   4412 		u64 rdba = rxr->rxdma.dma_paddr;
   4413 
   4414 		/* Setup the Base and Length of the Rx Descriptor Ring */
   4415 		IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i),
   4416 			       (rdba & 0x00000000ffffffffULL));
   4417 		IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (rdba >> 32));
   4418 		IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
   4419 		    adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc));
   4420 
   4421 		/* Set up the SRRCTL register */
   4422 		srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
   4423 		srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
   4424 		srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
   4425 		srrctl |= bufsz;
   4426 		srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
   4427 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
   4428 
   4429 		/* Setup the HW Rx Head and Tail Descriptor Pointers */
   4430 		IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
   4431 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
   4432 
   4433 		/* Set the processing limit */
   4434 		rxr->process_limit = ixgbe_rx_process_limit;
   4435 	}
   4436 
   4437 	if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
   4438 		u32 psrtype = IXGBE_PSRTYPE_TCPHDR |
   4439 			      IXGBE_PSRTYPE_UDPHDR |
   4440 			      IXGBE_PSRTYPE_IPV4HDR |
   4441 			      IXGBE_PSRTYPE_IPV6HDR;
   4442 		IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype);
   4443 	}
   4444 
   4445 	rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
   4446 
   4447 	/* Setup RSS */
   4448 	if (adapter->num_queues > 1) {
   4449 		int j;
   4450 		reta = 0;
   4451 
   4452 		/* set up random bits */
   4453 		cprng_fast(&r, sizeof(r));
   4454 
   4455 		/* Set up the redirection table */
   4456 		for (i = 0, j = 0; i < 128; i++, j++) {
   4457 			if (j == adapter->num_queues) j = 0;
   4458 			reta = (reta << 8) | (j * 0x11);
   4459 			if ((i & 3) == 3)
   4460 				IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta);
   4461 		}
   4462 
   4463 		/* Now fill our hash function seeds */
   4464 		for (i = 0; i < 10; i++)
   4465 			IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), r[i]);
   4466 
   4467 		/* Perform hash on these packet types */
   4468 		mrqc = IXGBE_MRQC_RSSEN
   4469 		     | IXGBE_MRQC_RSS_FIELD_IPV4
   4470 		     | IXGBE_MRQC_RSS_FIELD_IPV4_TCP
   4471 		     | IXGBE_MRQC_RSS_FIELD_IPV4_UDP
   4472 		     | IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP
   4473 		     | IXGBE_MRQC_RSS_FIELD_IPV6_EX
   4474 		     | IXGBE_MRQC_RSS_FIELD_IPV6
   4475 		     | IXGBE_MRQC_RSS_FIELD_IPV6_TCP
   4476 		     | IXGBE_MRQC_RSS_FIELD_IPV6_UDP
   4477 		     | IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
   4478 		IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
   4479 
   4480 		/* RSS and RX IPP Checksum are mutually exclusive */
   4481 		rxcsum |= IXGBE_RXCSUM_PCSD;
   4482 	}
   4483 
   4484 	if (ifp->if_capenable & IFCAP_RXCSUM)
   4485 		rxcsum |= IXGBE_RXCSUM_PCSD;
   4486 
   4487 	if (!(rxcsum & IXGBE_RXCSUM_PCSD))
   4488 		rxcsum |= IXGBE_RXCSUM_IPPCSE;
   4489 
   4490 	IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
   4491 
   4492 	return;
   4493 }
   4494 
   4495 /*********************************************************************
   4496  *
   4497  *  Free all receive rings.
   4498  *
   4499  **********************************************************************/
   4500 static void
   4501 ixgbe_free_receive_structures(struct adapter *adapter)
   4502 {
   4503 	struct rx_ring *rxr = adapter->rx_rings;
   4504 
   4505 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
   4506 #ifdef LRO
   4507 		struct lro_ctrl		*lro = &rxr->lro;
   4508 #endif /* LRO */
   4509 		ixgbe_free_receive_buffers(rxr);
   4510 #ifdef LRO
   4511 		/* Free LRO memory */
   4512 		tcp_lro_free(lro);
   4513 #endif /* LRO */
   4514 		/* Free the ring memory as well */
   4515 		ixgbe_dma_free(adapter, &rxr->rxdma);
   4516 		IXGBE_RX_LOCK_DESTROY(rxr);
   4517 	}
   4518 
   4519 	free(adapter->rx_rings, M_DEVBUF);
   4520 }
   4521 
   4522 
   4523 /*********************************************************************
   4524  *
   4525  *  Free receive ring data structures
   4526  *
   4527  **********************************************************************/
   4528 static void
   4529 ixgbe_free_receive_buffers(struct rx_ring *rxr)
   4530 {
   4531 	struct adapter		*adapter = rxr->adapter;
   4532 	struct ixgbe_rx_buf	*rxbuf;
   4533 
   4534 	INIT_DEBUGOUT("free_receive_structures: begin");
   4535 
   4536 	/* Cleanup any existing buffers */
   4537 	if (rxr->rx_buffers != NULL) {
   4538 		for (int i = 0; i < adapter->num_rx_desc; i++) {
   4539 			rxbuf = &rxr->rx_buffers[i];
   4540 			if (rxbuf->buf != NULL) {
   4541 				bus_dmamap_sync(rxr->ptag->dt_dmat,
   4542 				    rxbuf->pmap, 0, rxbuf->buf->m_pkthdr.len,
   4543 				    BUS_DMASYNC_POSTREAD);
   4544 				ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   4545 				rxbuf->buf->m_flags |= M_PKTHDR;
   4546 				m_freem(rxbuf->buf);
   4547 			}
   4548 			rxbuf->buf = NULL;
   4549 			if (rxbuf->pmap != NULL) {
   4550 				ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
   4551 				rxbuf->pmap = NULL;
   4552 			}
   4553 		}
   4554 		if (rxr->rx_buffers != NULL) {
   4555 			free(rxr->rx_buffers, M_DEVBUF);
   4556 			rxr->rx_buffers = NULL;
   4557 		}
   4558 	}
   4559 
   4560 	if (rxr->ptag != NULL) {
   4561 		ixgbe_dma_tag_destroy(rxr->ptag);
   4562 		rxr->ptag = NULL;
   4563 	}
   4564 
   4565 	return;
   4566 }
   4567 
   4568 static __inline void
   4569 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
   4570 {
   4571 	int s;
   4572 
   4573 #ifdef LRO
   4574 	struct adapter	*adapter = ifp->if_softc;
   4575 	struct ethercom *ec = &adapter->osdep.ec;
   4576 
   4577         /*
   4578          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
   4579          * should be computed by hardware. Also it should not have VLAN tag in
   4580          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
   4581          */
   4582         if (rxr->lro_enabled &&
   4583             (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
   4584             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   4585             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   4586             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
   4587             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   4588             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
   4589             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
   4590             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
   4591                 /*
   4592                  * Send to the stack if:
   4593                  **  - LRO not enabled, or
   4594                  **  - no LRO resources, or
   4595                  **  - lro enqueue fails
   4596                  */
   4597                 if (rxr->lro.lro_cnt != 0)
   4598                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
   4599                                 return;
   4600         }
   4601 #endif /* LRO */
   4602 
   4603 	IXGBE_RX_UNLOCK(rxr);
   4604 
   4605 	s = splnet();
   4606 	/* Pass this up to any BPF listeners. */
   4607 	bpf_mtap(ifp, m);
   4608 	(*ifp->if_input)(ifp, m);
   4609 	splx(s);
   4610 
   4611 	IXGBE_RX_LOCK(rxr);
   4612 }
   4613 
   4614 static __inline void
   4615 ixgbe_rx_discard(struct rx_ring *rxr, int i)
   4616 {
   4617 	struct ixgbe_rx_buf	*rbuf;
   4618 
   4619 	rbuf = &rxr->rx_buffers[i];
   4620 
   4621         if (rbuf->fmp != NULL) {/* Partial chain ? */
   4622 		rbuf->fmp->m_flags |= M_PKTHDR;
   4623                 m_freem(rbuf->fmp);
   4624                 rbuf->fmp = NULL;
   4625 	}
   4626 
   4627 	/*
   4628 	** With advanced descriptors the writeback
   4629 	** clobbers the buffer addrs, so its easier
   4630 	** to just free the existing mbufs and take
   4631 	** the normal refresh path to get new buffers
   4632 	** and mapping.
   4633 	*/
   4634 	if (rbuf->buf) {
   4635 		m_free(rbuf->buf);
   4636 		rbuf->buf = NULL;
   4637 	}
   4638 
   4639 	return;
   4640 }
   4641 
   4642 
   4643 /*********************************************************************
   4644  *
   4645  *  This routine executes in interrupt context. It replenishes
   4646  *  the mbufs in the descriptor and sends data which has been
   4647  *  dma'ed into host memory to upper layer.
   4648  *
   4649  *  We loop at most count times if count is > 0, or until done if
   4650  *  count < 0.
   4651  *
   4652  *  Return TRUE for more work, FALSE for all clean.
   4653  *********************************************************************/
   4654 static bool
   4655 ixgbe_rxeof(struct ix_queue *que)
   4656 {
   4657 	struct adapter		*adapter = que->adapter;
   4658 	struct rx_ring		*rxr = que->rxr;
   4659 	struct ifnet		*ifp = adapter->ifp;
   4660 #ifdef LRO
   4661 	struct lro_ctrl		*lro = &rxr->lro;
   4662 	struct lro_entry	*queued;
   4663 #endif /* LRO */
   4664 	int			i, nextp, processed = 0;
   4665 	u32			staterr = 0;
   4666 	u16			count = rxr->process_limit;
   4667 	union ixgbe_adv_rx_desc	*cur;
   4668 	struct ixgbe_rx_buf	*rbuf, *nbuf;
   4669 
   4670 	IXGBE_RX_LOCK(rxr);
   4671 
   4672 #ifdef DEV_NETMAP
   4673 	/* Same as the txeof routine: wakeup clients on intr. */
   4674 	if (netmap_rx_irq(ifp, rxr->me | NETMAP_LOCKED_ENTER, &processed))
   4675 		return (FALSE);
   4676 #endif /* DEV_NETMAP */
   4677 	for (i = rxr->next_to_check; count != 0;) {
   4678 		struct mbuf	*sendmp, *mp;
   4679 		u32		rsc, ptype;
   4680 		u16		len;
   4681 		u16		vtag = 0;
   4682 		bool		eop;
   4683 
   4684 		/* Sync the ring. */
   4685 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   4686 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   4687 
   4688 		cur = &rxr->rx_base[i];
   4689 		staterr = le32toh(cur->wb.upper.status_error);
   4690 
   4691 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
   4692 			break;
   4693 		if ((ifp->if_flags & IFF_RUNNING) == 0)
   4694 			break;
   4695 
   4696 		count--;
   4697 		sendmp = NULL;
   4698 		nbuf = NULL;
   4699 		rsc = 0;
   4700 		cur->wb.upper.status_error = 0;
   4701 		rbuf = &rxr->rx_buffers[i];
   4702 		mp = rbuf->buf;
   4703 
   4704 		len = le16toh(cur->wb.upper.length);
   4705 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
   4706 		    IXGBE_RXDADV_PKTTYPE_MASK;
   4707 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
   4708 
   4709 		/* Make sure bad packets are discarded */
   4710 		if (((staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) ||
   4711 		    (rxr->discard)) {
   4712 			rxr->rx_discarded.ev_count++;
   4713 			if (eop)
   4714 				rxr->discard = FALSE;
   4715 			else
   4716 				rxr->discard = TRUE;
   4717 			ixgbe_rx_discard(rxr, i);
   4718 			goto next_desc;
   4719 		}
   4720 
   4721 		/*
   4722 		** On 82599 which supports a hardware
   4723 		** LRO (called HW RSC), packets need
   4724 		** not be fragmented across sequential
   4725 		** descriptors, rather the next descriptor
   4726 		** is indicated in bits of the descriptor.
   4727 		** This also means that we might proceses
   4728 		** more than one packet at a time, something
   4729 		** that has never been true before, it
   4730 		** required eliminating global chain pointers
   4731 		** in favor of what we are doing here.  -jfv
   4732 		*/
   4733 		if (!eop) {
   4734 			/*
   4735 			** Figure out the next descriptor
   4736 			** of this frame.
   4737 			*/
   4738 			if (rxr->hw_rsc == TRUE) {
   4739 				rsc = ixgbe_rsc_count(cur);
   4740 				rxr->rsc_num += (rsc - 1);
   4741 			}
   4742 			if (rsc) { /* Get hardware index */
   4743 				nextp = ((staterr &
   4744 				    IXGBE_RXDADV_NEXTP_MASK) >>
   4745 				    IXGBE_RXDADV_NEXTP_SHIFT);
   4746 			} else { /* Just sequential */
   4747 				nextp = i + 1;
   4748 				if (nextp == adapter->num_rx_desc)
   4749 					nextp = 0;
   4750 			}
   4751 			nbuf = &rxr->rx_buffers[nextp];
   4752 			prefetch(nbuf);
   4753 		}
   4754 		/*
   4755 		** Rather than using the fmp/lmp global pointers
   4756 		** we now keep the head of a packet chain in the
   4757 		** buffer struct and pass this along from one
   4758 		** descriptor to the next, until we get EOP.
   4759 		*/
   4760 		mp->m_len = len;
   4761 		/*
   4762 		** See if there is a stored head
   4763 		** that determines what we are
   4764 		*/
   4765 		sendmp = rbuf->fmp;
   4766 
   4767 		if (sendmp != NULL) {  /* secondary frag */
   4768 			rbuf->buf = rbuf->fmp = NULL;
   4769 			mp->m_flags &= ~M_PKTHDR;
   4770 			sendmp->m_pkthdr.len += mp->m_len;
   4771 		} else {
   4772 			/*
   4773 			 * Optimize.  This might be a small packet,
   4774 			 * maybe just a TCP ACK.  Do a fast copy that
   4775 			 * is cache aligned into a new mbuf, and
   4776 			 * leave the old mbuf+cluster for re-use.
   4777 			 */
   4778 			if (eop && len <= IXGBE_RX_COPY_LEN) {
   4779 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
   4780 				if (sendmp != NULL) {
   4781 					sendmp->m_data +=
   4782 					    IXGBE_RX_COPY_ALIGN;
   4783 					ixgbe_bcopy(mp->m_data,
   4784 					    sendmp->m_data, len);
   4785 					sendmp->m_len = len;
   4786 					rxr->rx_copies.ev_count++;
   4787 					rbuf->flags |= IXGBE_RX_COPY;
   4788 				}
   4789 			}
   4790 			if (sendmp == NULL) {
   4791 				rbuf->buf = rbuf->fmp = NULL;
   4792 				sendmp = mp;
   4793 			}
   4794 
   4795 			/* first desc of a non-ps chain */
   4796 			sendmp->m_flags |= M_PKTHDR;
   4797 			sendmp->m_pkthdr.len = mp->m_len;
   4798 		}
   4799 		++processed;
   4800 		/* Pass the head pointer on */
   4801 		if (eop == 0) {
   4802 			nbuf->fmp = sendmp;
   4803 			sendmp = NULL;
   4804 			mp->m_next = nbuf->buf;
   4805 		} else { /* Sending this frame */
   4806 			sendmp->m_pkthdr.rcvif = ifp;
   4807 			ifp->if_ipackets++;
   4808 			rxr->rx_packets.ev_count++;
   4809 			/* capture data for AIM */
   4810 			rxr->bytes += sendmp->m_pkthdr.len;
   4811 			rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
   4812 			/* Process vlan info */
   4813 			if ((rxr->vtag_strip) &&
   4814 			    (staterr & IXGBE_RXD_STAT_VP))
   4815 				vtag = le16toh(cur->wb.upper.vlan);
   4816 			if (vtag) {
   4817 				VLAN_INPUT_TAG(ifp, sendmp, vtag,
   4818 				    printf("%s: could not apply VLAN "
   4819 					"tag", __func__));
   4820 			}
   4821 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
   4822 				ixgbe_rx_checksum(staterr, sendmp, ptype,
   4823 				   &adapter->stats);
   4824 			}
   4825 #if __FreeBSD_version >= 800000
   4826 			sendmp->m_pkthdr.flowid = que->msix;
   4827 			sendmp->m_flags |= M_FLOWID;
   4828 #endif
   4829 		}
   4830 next_desc:
   4831 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   4832 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   4833 
   4834 		/* Advance our pointers to the next descriptor. */
   4835 		if (++i == rxr->num_desc)
   4836 			i = 0;
   4837 
   4838 		/* Now send to the stack or do LRO */
   4839 		if (sendmp != NULL) {
   4840 			rxr->next_to_check = i;
   4841 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
   4842 			i = rxr->next_to_check;
   4843 		}
   4844 
   4845                /* Every 8 descriptors we go to refresh mbufs */
   4846 		if (processed == 8) {
   4847 			ixgbe_refresh_mbufs(rxr, i);
   4848 			processed = 0;
   4849 		}
   4850 	}
   4851 
   4852 	/* Refresh any remaining buf structs */
   4853 	if (ixgbe_rx_unrefreshed(rxr))
   4854 		ixgbe_refresh_mbufs(rxr, i);
   4855 
   4856 	rxr->next_to_check = i;
   4857 
   4858 #ifdef LRO
   4859 	/*
   4860 	 * Flush any outstanding LRO work
   4861 	 */
   4862 	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
   4863 		SLIST_REMOVE_HEAD(&lro->lro_active, next);
   4864 		tcp_lro_flush(lro, queued);
   4865 	}
   4866 #endif /* LRO */
   4867 
   4868 	IXGBE_RX_UNLOCK(rxr);
   4869 
   4870 	/*
   4871 	** We still have cleaning to do?
   4872 	** Schedule another interrupt if so.
   4873 	*/
   4874 	if ((staterr & IXGBE_RXD_STAT_DD) != 0) {
   4875 		ixgbe_rearm_queues(adapter, (u64)(1ULL << que->msix));
   4876 		return true;
   4877 	}
   4878 
   4879 	return false;
   4880 }
   4881 
   4882 
   4883 /*********************************************************************
   4884  *
   4885  *  Verify that the hardware indicated that the checksum is valid.
   4886  *  Inform the stack about the status of checksum so that stack
   4887  *  doesn't spend time verifying the checksum.
   4888  *
   4889  *********************************************************************/
   4890 static void
   4891 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
   4892     struct ixgbe_hw_stats *stats)
   4893 {
   4894 	u16	status = (u16) staterr;
   4895 	u8	errors = (u8) (staterr >> 24);
   4896 #if 0
   4897 	bool	sctp = FALSE;
   4898 
   4899 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   4900 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
   4901 		sctp = TRUE;
   4902 #endif
   4903 
   4904 	if (status & IXGBE_RXD_STAT_IPCS) {
   4905 		stats->ipcs.ev_count++;
   4906 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
   4907 			/* IP Checksum Good */
   4908 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
   4909 
   4910 		} else {
   4911 			stats->ipcs_bad.ev_count++;
   4912 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
   4913 		}
   4914 	}
   4915 	if (status & IXGBE_RXD_STAT_L4CS) {
   4916 		stats->l4cs.ev_count++;
   4917 		u16 type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
   4918 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
   4919 			mp->m_pkthdr.csum_flags |= type;
   4920 		} else {
   4921 			stats->l4cs_bad.ev_count++;
   4922 			mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
   4923 		}
   4924 	}
   4925 	return;
   4926 }
   4927 
   4928 
   4929 #if 0	/* XXX Badly need to overhaul vlan(4) on NetBSD. */
   4930 /*
   4931 ** This routine is run via an vlan config EVENT,
   4932 ** it enables us to use the HW Filter table since
   4933 ** we can get the vlan id. This just creates the
   4934 ** entry in the soft version of the VFTA, init will
   4935 ** repopulate the real table.
   4936 */
   4937 static void
   4938 ixgbe_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
   4939 {
   4940 	struct adapter	*adapter = ifp->if_softc;
   4941 	u16		index, bit;
   4942 
   4943 	if (ifp->if_softc !=  arg)   /* Not our event */
   4944 		return;
   4945 
   4946 	if ((vtag == 0) || (vtag > 4095))	/* Invalid */
   4947 		return;
   4948 
   4949 	IXGBE_CORE_LOCK(adapter);
   4950 	index = (vtag >> 5) & 0x7F;
   4951 	bit = vtag & 0x1F;
   4952 	adapter->shadow_vfta[index] |= (1 << bit);
   4953 	ixgbe_init_locked(adapter);
   4954 	IXGBE_CORE_UNLOCK(adapter);
   4955 }
   4956 
   4957 /*
   4958 ** This routine is run via an vlan
   4959 ** unconfig EVENT, remove our entry
   4960 ** in the soft vfta.
   4961 */
   4962 static void
   4963 ixgbe_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
   4964 {
   4965 	struct adapter	*adapter = ifp->if_softc;
   4966 	u16		index, bit;
   4967 
   4968 	if (ifp->if_softc !=  arg)
   4969 		return;
   4970 
   4971 	if ((vtag == 0) || (vtag > 4095))	/* Invalid */
   4972 		return;
   4973 
   4974 	IXGBE_CORE_LOCK(adapter);
   4975 	index = (vtag >> 5) & 0x7F;
   4976 	bit = vtag & 0x1F;
   4977 	adapter->shadow_vfta[index] &= ~(1 << bit);
   4978 	/* Re-init to load the changes */
   4979 	ixgbe_init_locked(adapter);
   4980 	IXGBE_CORE_UNLOCK(adapter);
   4981 }
   4982 #endif
   4983 
   4984 static void
   4985 ixgbe_setup_vlan_hw_support(struct adapter *adapter)
   4986 {
   4987 	struct ethercom *ec = &adapter->osdep.ec;
   4988 	struct ixgbe_hw *hw = &adapter->hw;
   4989 	struct rx_ring	*rxr;
   4990 	u32		ctrl;
   4991 
   4992 	/*
   4993 	** We get here thru init_locked, meaning
   4994 	** a soft reset, this has already cleared
   4995 	** the VFTA and other state, so if there
   4996 	** have been no vlan's registered do nothing.
   4997 	*/
   4998 	if (!VLAN_ATTACHED(&adapter->osdep.ec)) {
   4999 		return;
   5000 	}
   5001 
   5002 	/*
   5003 	** A soft reset zero's out the VFTA, so
   5004 	** we need to repopulate it now.
   5005 	*/
   5006 	for (int i = 0; i < IXGBE_VFTA_SIZE; i++)
   5007 		if (adapter->shadow_vfta[i] != 0)
   5008 			IXGBE_WRITE_REG(hw, IXGBE_VFTA(i),
   5009 			    adapter->shadow_vfta[i]);
   5010 
   5011 	ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
   5012 	/* Enable the Filter Table if enabled */
   5013 	if (ec->ec_capenable & ETHERCAP_VLAN_HWFILTER) {
   5014 		ctrl &= ~IXGBE_VLNCTRL_CFIEN;
   5015 		ctrl |= IXGBE_VLNCTRL_VFE;
   5016 	}
   5017 	if (hw->mac.type == ixgbe_mac_82598EB)
   5018 		ctrl |= IXGBE_VLNCTRL_VME;
   5019 	IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
   5020 
   5021 	/* Setup the queues for vlans */
   5022 	for (int i = 0; i < adapter->num_queues; i++) {
   5023 		rxr = &adapter->rx_rings[i];
   5024 		/* On 82599 the VLAN enable is per/queue in RXDCTL */
   5025 		if (hw->mac.type != ixgbe_mac_82598EB) {
   5026 			ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
   5027 			ctrl |= IXGBE_RXDCTL_VME;
   5028 			IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
   5029 		}
   5030 		rxr->vtag_strip = TRUE;
   5031 	}
   5032 }
   5033 
   5034 static void
   5035 ixgbe_enable_intr(struct adapter *adapter)
   5036 {
   5037 	struct ixgbe_hw	*hw = &adapter->hw;
   5038 	struct ix_queue	*que = adapter->queues;
   5039 	u32		mask, fwsm;
   5040 
   5041 	mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
   5042 	/* Enable Fan Failure detection */
   5043 	if (hw->device_id == IXGBE_DEV_ID_82598AT)
   5044 		    mask |= IXGBE_EIMS_GPI_SDP1;
   5045 
   5046 	switch (adapter->hw.mac.type) {
   5047 		case ixgbe_mac_82599EB:
   5048 			mask |= IXGBE_EIMS_ECC;
   5049 			mask |= IXGBE_EIMS_GPI_SDP0;
   5050 			mask |= IXGBE_EIMS_GPI_SDP1;
   5051 			mask |= IXGBE_EIMS_GPI_SDP2;
   5052 #ifdef IXGBE_FDIR
   5053 			mask |= IXGBE_EIMS_FLOW_DIR;
   5054 #endif
   5055 			break;
   5056 		case ixgbe_mac_X540:
   5057 			mask |= IXGBE_EIMS_ECC;
   5058 			/* Detect if Thermal Sensor is enabled */
   5059 			fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
   5060 			if (fwsm & IXGBE_FWSM_TS_ENABLED)
   5061 				mask |= IXGBE_EIMS_TS;
   5062 #ifdef IXGBE_FDIR
   5063 			mask |= IXGBE_EIMS_FLOW_DIR;
   5064 #endif
   5065 		/* falls through */
   5066 		default:
   5067 			break;
   5068 	}
   5069 
   5070 	IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
   5071 
   5072 	/* With RSS we use auto clear */
   5073 	if (adapter->msix_mem) {
   5074 		mask = IXGBE_EIMS_ENABLE_MASK;
   5075 		/* Don't autoclear Link */
   5076 		mask &= ~IXGBE_EIMS_OTHER;
   5077 		mask &= ~IXGBE_EIMS_LSC;
   5078 		IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask);
   5079 	}
   5080 
   5081 	/*
   5082 	** Now enable all queues, this is done separately to
   5083 	** allow for handling the extended (beyond 32) MSIX
   5084 	** vectors that can be used by 82599
   5085 	*/
   5086         for (int i = 0; i < adapter->num_queues; i++, que++)
   5087                 ixgbe_enable_queue(adapter, que->msix);
   5088 
   5089 	IXGBE_WRITE_FLUSH(hw);
   5090 
   5091 	return;
   5092 }
   5093 
   5094 static void
   5095 ixgbe_disable_intr(struct adapter *adapter)
   5096 {
   5097 	if (adapter->msix_mem)
   5098 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, 0);
   5099 	if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
   5100 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0);
   5101 	} else {
   5102 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000);
   5103 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0);
   5104 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0);
   5105 	}
   5106 	IXGBE_WRITE_FLUSH(&adapter->hw);
   5107 	return;
   5108 }
   5109 
   5110 u16
   5111 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
   5112 {
   5113 	switch (reg % 4) {
   5114 	case 0:
   5115 		return pci_conf_read(hw->back->pc, hw->back->tag, reg) &
   5116 		    __BITS(15, 0);
   5117 	case 2:
   5118 		return __SHIFTOUT(pci_conf_read(hw->back->pc, hw->back->tag,
   5119 		    reg - 2), __BITS(31, 16));
   5120 	default:
   5121 		panic("%s: invalid register (%" PRIx32, __func__, reg);
   5122 		break;
   5123 	}
   5124 }
   5125 
   5126 void
   5127 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
   5128 {
   5129 	pcireg_t old;
   5130 
   5131 	switch (reg % 4) {
   5132 	case 0:
   5133 		old = pci_conf_read(hw->back->pc, hw->back->tag, reg) &
   5134 		    __BITS(31, 16);
   5135 		pci_conf_write(hw->back->pc, hw->back->tag, reg, value | old);
   5136 		break;
   5137 	case 2:
   5138 		old = pci_conf_read(hw->back->pc, hw->back->tag, reg - 2) &
   5139 		    __BITS(15, 0);
   5140 		pci_conf_write(hw->back->pc, hw->back->tag, reg - 2,
   5141 		    __SHIFTIN(value, __BITS(31, 16)) | old);
   5142 		break;
   5143 	default:
   5144 		panic("%s: invalid register (%" PRIx32, __func__, reg);
   5145 		break;
   5146 	}
   5147 
   5148 	return;
   5149 }
   5150 
   5151 /*
   5152 ** Setup the correct IVAR register for a particular MSIX interrupt
   5153 **   (yes this is all very magic and confusing :)
   5154 **  - entry is the register array entry
   5155 **  - vector is the MSIX vector for this queue
   5156 **  - type is RX/TX/MISC
   5157 */
   5158 static void
   5159 ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type)
   5160 {
   5161 	struct ixgbe_hw *hw = &adapter->hw;
   5162 	u32 ivar, index;
   5163 
   5164 	vector |= IXGBE_IVAR_ALLOC_VAL;
   5165 
   5166 	switch (hw->mac.type) {
   5167 
   5168 	case ixgbe_mac_82598EB:
   5169 		if (type == -1)
   5170 			entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
   5171 		else
   5172 			entry += (type * 64);
   5173 		index = (entry >> 2) & 0x1F;
   5174 		ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
   5175 		ivar &= ~(0xFF << (8 * (entry & 0x3)));
   5176 		ivar |= (vector << (8 * (entry & 0x3)));
   5177 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR(index), ivar);
   5178 		break;
   5179 
   5180 	case ixgbe_mac_82599EB:
   5181 	case ixgbe_mac_X540:
   5182 		if (type == -1) { /* MISC IVAR */
   5183 			index = (entry & 1) * 8;
   5184 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
   5185 			ivar &= ~(0xFF << index);
   5186 			ivar |= (vector << index);
   5187 			IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
   5188 		} else {	/* RX/TX IVARS */
   5189 			index = (16 * (entry & 1)) + (8 * type);
   5190 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
   5191 			ivar &= ~(0xFF << index);
   5192 			ivar |= (vector << index);
   5193 			IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
   5194 		}
   5195 
   5196 	default:
   5197 		break;
   5198 	}
   5199 }
   5200 
   5201 static void
   5202 ixgbe_configure_ivars(struct adapter *adapter)
   5203 {
   5204 	struct  ix_queue *que = adapter->queues;
   5205 	u32 newitr;
   5206 
   5207 	if (ixgbe_max_interrupt_rate > 0)
   5208 		newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8;
   5209 	else
   5210 		newitr = 0;
   5211 
   5212         for (int i = 0; i < adapter->num_queues; i++, que++) {
   5213 		/* First the RX queue entry */
   5214                 ixgbe_set_ivar(adapter, i, que->msix, 0);
   5215 		/* ... and the TX */
   5216 		ixgbe_set_ivar(adapter, i, que->msix, 1);
   5217 		/* Set an Initial EITR value */
   5218                 IXGBE_WRITE_REG(&adapter->hw,
   5219                     IXGBE_EITR(que->msix), newitr);
   5220 	}
   5221 
   5222 	/* For the Link interrupt */
   5223         ixgbe_set_ivar(adapter, 1, adapter->linkvec, -1);
   5224 }
   5225 
   5226 /*
   5227 ** ixgbe_sfp_probe - called in the local timer to
   5228 ** determine if a port had optics inserted.
   5229 */
   5230 static bool ixgbe_sfp_probe(struct adapter *adapter)
   5231 {
   5232 	struct ixgbe_hw	*hw = &adapter->hw;
   5233 	device_t	dev = adapter->dev;
   5234 	bool		result = FALSE;
   5235 
   5236 	if ((hw->phy.type == ixgbe_phy_nl) &&
   5237 	    (hw->phy.sfp_type == ixgbe_sfp_type_not_present)) {
   5238 		s32 ret = hw->phy.ops.identify_sfp(hw);
   5239 		if (ret)
   5240                         goto out;
   5241 		ret = hw->phy.ops.reset(hw);
   5242 		if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
   5243 			device_printf(dev,"Unsupported SFP+ module detected!");
   5244 			device_printf(dev, "Reload driver with supported module.\n");
   5245 			adapter->sfp_probe = FALSE;
   5246                         goto out;
   5247 		} else
   5248 			device_printf(dev,"SFP+ module detected!\n");
   5249 		/* We now have supported optics */
   5250 		adapter->sfp_probe = FALSE;
   5251 		/* Set the optics type so system reports correctly */
   5252 		ixgbe_setup_optics(adapter);
   5253 		result = TRUE;
   5254 	}
   5255 out:
   5256 	return (result);
   5257 }
   5258 
   5259 /*
   5260 ** Tasklet handler for MSIX Link interrupts
   5261 **  - do outside interrupt since it might sleep
   5262 */
   5263 static void
   5264 ixgbe_handle_link(void *context)
   5265 {
   5266 	struct adapter  *adapter = context;
   5267 
   5268 	if (ixgbe_check_link(&adapter->hw,
   5269 	    &adapter->link_speed, &adapter->link_up, 0) == 0)
   5270 	    ixgbe_update_link_status(adapter);
   5271 }
   5272 
   5273 /*
   5274 ** Tasklet for handling SFP module interrupts
   5275 */
   5276 static void
   5277 ixgbe_handle_mod(void *context)
   5278 {
   5279 	struct adapter  *adapter = context;
   5280 	struct ixgbe_hw *hw = &adapter->hw;
   5281 	device_t	dev = adapter->dev;
   5282 	u32 err;
   5283 
   5284 	err = hw->phy.ops.identify_sfp(hw);
   5285 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
   5286 		device_printf(dev,
   5287 		    "Unsupported SFP+ module type was detected.\n");
   5288 		return;
   5289 	}
   5290 	err = hw->mac.ops.setup_sfp(hw);
   5291 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
   5292 		device_printf(dev,
   5293 		    "Setup failure - unsupported SFP+ module type.\n");
   5294 		return;
   5295 	}
   5296 	softint_schedule(adapter->msf_si);
   5297 	return;
   5298 }
   5299 
   5300 
   5301 /*
   5302 ** Tasklet for handling MSF (multispeed fiber) interrupts
   5303 */
   5304 static void
   5305 ixgbe_handle_msf(void *context)
   5306 {
   5307 	struct adapter  *adapter = context;
   5308 	struct ixgbe_hw *hw = &adapter->hw;
   5309 	u32 autoneg;
   5310 	bool negotiate;
   5311 
   5312 	autoneg = hw->phy.autoneg_advertised;
   5313 	if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
   5314 		hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate);
   5315 	else
   5316 		negotiate = 0;
   5317 	if (hw->mac.ops.setup_link)
   5318 		hw->mac.ops.setup_link(hw, autoneg, TRUE);
   5319 	return;
   5320 }
   5321 
   5322 #ifdef IXGBE_FDIR
   5323 /*
   5324 ** Tasklet for reinitializing the Flow Director filter table
   5325 */
   5326 static void
   5327 ixgbe_reinit_fdir(void *context)
   5328 {
   5329 	struct adapter  *adapter = context;
   5330 	struct ifnet   *ifp = adapter->ifp;
   5331 
   5332 	if (adapter->fdir_reinit != 1) /* Shouldn't happen */
   5333 		return;
   5334 	ixgbe_reinit_fdir_tables_82599(&adapter->hw);
   5335 	adapter->fdir_reinit = 0;
   5336 	/* re-enable flow director interrupts */
   5337 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_FLOW_DIR);
   5338 	/* Restart the interface */
   5339 	ifp->if_flags |= IFF_RUNNING;
   5340 	return;
   5341 }
   5342 #endif
   5343 
   5344 /**********************************************************************
   5345  *
   5346  *  Update the board statistics counters.
   5347  *
   5348  **********************************************************************/
   5349 static void
   5350 ixgbe_update_stats_counters(struct adapter *adapter)
   5351 {
   5352 	struct ifnet   *ifp = adapter->ifp;
   5353 	struct ixgbe_hw *hw = &adapter->hw;
   5354 	u32  missed_rx = 0, bprc, lxon, lxoff, total;
   5355 	u64  total_missed_rx = 0;
   5356 	uint64_t crcerrs, rlec;
   5357 
   5358 	crcerrs = IXGBE_READ_REG(hw, IXGBE_CRCERRS);
   5359 	adapter->stats.crcerrs.ev_count += crcerrs;
   5360 	adapter->stats.illerrc.ev_count += IXGBE_READ_REG(hw, IXGBE_ILLERRC);
   5361 	adapter->stats.errbc.ev_count += IXGBE_READ_REG(hw, IXGBE_ERRBC);
   5362 	adapter->stats.mspdc.ev_count += IXGBE_READ_REG(hw, IXGBE_MSPDC);
   5363 
   5364 	/*
   5365 	** Note: these are for the 8 possible traffic classes,
   5366 	**	 which in current implementation is unused,
   5367 	**	 therefore only 0 should read real data.
   5368 	*/
   5369 	for (int i = 0; i < __arraycount(adapter->stats.mpc); i++) {
   5370 		int j = i % adapter->num_queues;
   5371 		u32 mp;
   5372 		mp = IXGBE_READ_REG(hw, IXGBE_MPC(i));
   5373 		/* missed_rx tallies misses for the gprc workaround */
   5374 		missed_rx += mp;
   5375 		/* global total per queue */
   5376         	adapter->stats.mpc[j].ev_count += mp;
   5377 		/* Running comprehensive total for stats display */
   5378 		total_missed_rx += mp;
   5379 		if (hw->mac.type == ixgbe_mac_82598EB) {
   5380 			adapter->stats.rnbc[j] +=
   5381 			    IXGBE_READ_REG(hw, IXGBE_RNBC(i));
   5382 			adapter->stats.qbtc[j].ev_count +=
   5383 			    IXGBE_READ_REG(hw, IXGBE_QBTC(i));
   5384 			adapter->stats.qbrc[j].ev_count +=
   5385 			    IXGBE_READ_REG(hw, IXGBE_QBRC(i));
   5386 			adapter->stats.pxonrxc[j].ev_count +=
   5387 			    IXGBE_READ_REG(hw, IXGBE_PXONRXC(i));
   5388 		} else {
   5389 			adapter->stats.pxonrxc[j].ev_count +=
   5390 			    IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i));
   5391 		}
   5392 		adapter->stats.pxontxc[j].ev_count +=
   5393 		    IXGBE_READ_REG(hw, IXGBE_PXONTXC(i));
   5394 		adapter->stats.pxofftxc[j].ev_count +=
   5395 		    IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i));
   5396 		adapter->stats.pxoffrxc[j].ev_count +=
   5397 		    IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
   5398 		adapter->stats.pxon2offc[j].ev_count +=
   5399 		    IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i));
   5400 	}
   5401 	for (int i = 0; i < __arraycount(adapter->stats.qprc); i++) {
   5402 		int j = i % adapter->num_queues;
   5403 		adapter->stats.qprc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
   5404 		adapter->stats.qptc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
   5405 		adapter->stats.qprdc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
   5406 	}
   5407 	adapter->stats.mlfc.ev_count += IXGBE_READ_REG(hw, IXGBE_MLFC);
   5408 	adapter->stats.mrfc.ev_count += IXGBE_READ_REG(hw, IXGBE_MRFC);
   5409 	rlec = IXGBE_READ_REG(hw, IXGBE_RLEC);
   5410 	adapter->stats.rlec.ev_count += rlec;
   5411 
   5412 	/* Hardware workaround, gprc counts missed packets */
   5413 	adapter->stats.gprc.ev_count += IXGBE_READ_REG(hw, IXGBE_GPRC) - missed_rx;
   5414 
   5415 	lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
   5416 	adapter->stats.lxontxc.ev_count += lxon;
   5417 	lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
   5418 	adapter->stats.lxofftxc.ev_count += lxoff;
   5419 	total = lxon + lxoff;
   5420 
   5421 	if (hw->mac.type != ixgbe_mac_82598EB) {
   5422 		adapter->stats.gorc.ev_count += IXGBE_READ_REG(hw, IXGBE_GORCL) +
   5423 		    ((u64)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32);
   5424 		adapter->stats.gotc.ev_count += IXGBE_READ_REG(hw, IXGBE_GOTCL) +
   5425 		    ((u64)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32) - total * ETHER_MIN_LEN;
   5426 		adapter->stats.tor.ev_count += IXGBE_READ_REG(hw, IXGBE_TORL) +
   5427 		    ((u64)IXGBE_READ_REG(hw, IXGBE_TORH) << 32);
   5428 		adapter->stats.lxonrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
   5429 		adapter->stats.lxoffrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
   5430 	} else {
   5431 		adapter->stats.lxonrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
   5432 		adapter->stats.lxoffrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
   5433 		/* 82598 only has a counter in the high register */
   5434 		adapter->stats.gorc.ev_count += IXGBE_READ_REG(hw, IXGBE_GORCH);
   5435 		adapter->stats.gotc.ev_count += IXGBE_READ_REG(hw, IXGBE_GOTCH) - total * ETHER_MIN_LEN;
   5436 		adapter->stats.tor.ev_count += IXGBE_READ_REG(hw, IXGBE_TORH);
   5437 	}
   5438 
   5439 	/*
   5440 	 * Workaround: mprc hardware is incorrectly counting
   5441 	 * broadcasts, so for now we subtract those.
   5442 	 */
   5443 	bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
   5444 	adapter->stats.bprc.ev_count += bprc;
   5445 	adapter->stats.mprc.ev_count += IXGBE_READ_REG(hw, IXGBE_MPRC) - ((hw->mac.type == ixgbe_mac_82598EB) ? bprc : 0);
   5446 
   5447 	adapter->stats.prc64.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC64);
   5448 	adapter->stats.prc127.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC127);
   5449 	adapter->stats.prc255.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC255);
   5450 	adapter->stats.prc511.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC511);
   5451 	adapter->stats.prc1023.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC1023);
   5452 	adapter->stats.prc1522.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC1522);
   5453 
   5454 	adapter->stats.gptc.ev_count += IXGBE_READ_REG(hw, IXGBE_GPTC) - total;
   5455 	adapter->stats.mptc.ev_count += IXGBE_READ_REG(hw, IXGBE_MPTC) - total;
   5456 	adapter->stats.ptc64.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC64) - total;
   5457 
   5458 	adapter->stats.ruc.ev_count += IXGBE_READ_REG(hw, IXGBE_RUC);
   5459 	adapter->stats.rfc.ev_count += IXGBE_READ_REG(hw, IXGBE_RFC);
   5460 	adapter->stats.roc.ev_count += IXGBE_READ_REG(hw, IXGBE_ROC);
   5461 	adapter->stats.rjc.ev_count += IXGBE_READ_REG(hw, IXGBE_RJC);
   5462 	adapter->stats.mngprc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPRC);
   5463 	adapter->stats.mngpdc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPDC);
   5464 	adapter->stats.mngptc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPTC);
   5465 	adapter->stats.tpr.ev_count += IXGBE_READ_REG(hw, IXGBE_TPR);
   5466 	adapter->stats.tpt.ev_count += IXGBE_READ_REG(hw, IXGBE_TPT);
   5467 	adapter->stats.ptc127.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC127);
   5468 	adapter->stats.ptc255.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC255);
   5469 	adapter->stats.ptc511.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC511);
   5470 	adapter->stats.ptc1023.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC1023);
   5471 	adapter->stats.ptc1522.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC1522);
   5472 	adapter->stats.bptc.ev_count += IXGBE_READ_REG(hw, IXGBE_BPTC);
   5473 	adapter->stats.xec.ev_count += IXGBE_READ_REG(hw, IXGBE_XEC);
   5474 	adapter->stats.fccrc.ev_count += IXGBE_READ_REG(hw, IXGBE_FCCRC);
   5475 	adapter->stats.fclast.ev_count += IXGBE_READ_REG(hw, IXGBE_FCLAST);
   5476 
   5477 	/* Only read FCOE on 82599 */
   5478 	if (hw->mac.type != ixgbe_mac_82598EB) {
   5479 		adapter->stats.fcoerpdc.ev_count +=
   5480 		    IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
   5481 		adapter->stats.fcoeprc.ev_count +=
   5482 		    IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
   5483 		adapter->stats.fcoeptc.ev_count +=
   5484 		    IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
   5485 		adapter->stats.fcoedwrc.ev_count +=
   5486 		    IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
   5487 		adapter->stats.fcoedwtc.ev_count +=
   5488 		    IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
   5489 	}
   5490 
   5491 	/* Fill out the OS statistics structure */
   5492 	/*
   5493 	 * NetBSD: Don't override if_{i|o}{packets|bytes|mcasts} with
   5494 	 * adapter->stats counters. It's required to make ifconfig -z
   5495 	 * (SOICZIFDATA) work.
   5496 	 */
   5497 	ifp->if_collisions = 0;
   5498 
   5499 	/* Rx Errors */
   5500 	ifp->if_iqdrops += total_missed_rx;
   5501 	ifp->if_ierrors += crcerrs + rlec;
   5502 }
   5503 
   5504 /** ixgbe_sysctl_tdh_handler - Handler function
   5505  *  Retrieves the TDH value from the hardware
   5506  */
   5507 static int
   5508 ixgbe_sysctl_tdh_handler(SYSCTLFN_ARGS)
   5509 {
   5510 	struct sysctlnode node;
   5511 	uint32_t val;
   5512 	struct tx_ring *txr;
   5513 
   5514 	node = *rnode;
   5515 	txr = (struct tx_ring *)node.sysctl_data;
   5516 	if (txr == NULL)
   5517 		return 0;
   5518 	val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDH(txr->me));
   5519 	node.sysctl_data = &val;
   5520 	return sysctl_lookup(SYSCTLFN_CALL(&node));
   5521 }
   5522 
   5523 /** ixgbe_sysctl_tdt_handler - Handler function
   5524  *  Retrieves the TDT value from the hardware
   5525  */
   5526 static int
   5527 ixgbe_sysctl_tdt_handler(SYSCTLFN_ARGS)
   5528 {
   5529 	struct sysctlnode node;
   5530 	uint32_t val;
   5531 	struct tx_ring *txr;
   5532 
   5533 	node = *rnode;
   5534 	txr = (struct tx_ring *)node.sysctl_data;
   5535 	if (txr == NULL)
   5536 		return 0;
   5537 	val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDT(txr->me));
   5538 	node.sysctl_data = &val;
   5539 	return sysctl_lookup(SYSCTLFN_CALL(&node));
   5540 }
   5541 
   5542 /** ixgbe_sysctl_rdh_handler - Handler function
   5543  *  Retrieves the RDH value from the hardware
   5544  */
   5545 static int
   5546 ixgbe_sysctl_rdh_handler(SYSCTLFN_ARGS)
   5547 {
   5548 	struct sysctlnode node;
   5549 	uint32_t val;
   5550 	struct rx_ring *rxr;
   5551 
   5552 	node = *rnode;
   5553 	rxr = (struct rx_ring *)node.sysctl_data;
   5554 	if (rxr == NULL)
   5555 		return 0;
   5556 	val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDH(rxr->me));
   5557 	node.sysctl_data = &val;
   5558 	return sysctl_lookup(SYSCTLFN_CALL(&node));
   5559 }
   5560 
   5561 /** ixgbe_sysctl_rdt_handler - Handler function
   5562  *  Retrieves the RDT value from the hardware
   5563  */
   5564 static int
   5565 ixgbe_sysctl_rdt_handler(SYSCTLFN_ARGS)
   5566 {
   5567 	struct sysctlnode node;
   5568 	uint32_t val;
   5569 	struct rx_ring *rxr;
   5570 
   5571 	node = *rnode;
   5572 	rxr = (struct rx_ring *)node.sysctl_data;
   5573 	if (rxr == NULL)
   5574 		return 0;
   5575 	val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDT(rxr->me));
   5576 	node.sysctl_data = &val;
   5577 	return sysctl_lookup(SYSCTLFN_CALL(&node));
   5578 }
   5579 
   5580 static int
   5581 ixgbe_sysctl_interrupt_rate_handler(SYSCTLFN_ARGS)
   5582 {
   5583 	int error;
   5584 	struct sysctlnode node;
   5585 	struct ix_queue *que;
   5586 	uint32_t reg, usec, rate;
   5587 
   5588 	node = *rnode;
   5589 	que = (struct ix_queue *)node.sysctl_data;
   5590 	if (que == NULL)
   5591 		return 0;
   5592 	reg = IXGBE_READ_REG(&que->adapter->hw, IXGBE_EITR(que->msix));
   5593 	usec = ((reg & 0x0FF8) >> 3);
   5594 	if (usec > 0)
   5595 		rate = 500000 / usec;
   5596 	else
   5597 		rate = 0;
   5598 	node.sysctl_data = &rate;
   5599 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   5600 	if (error)
   5601 		return error;
   5602 	reg &= ~0xfff; /* default, no limitation */
   5603 	ixgbe_max_interrupt_rate = 0;
   5604 	if (rate > 0 && rate < 500000) {
   5605 		if (rate < 1000)
   5606 			rate = 1000;
   5607 		ixgbe_max_interrupt_rate = rate;
   5608 		reg |= ((4000000/rate) & 0xff8 );
   5609 	}
   5610 	IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg);
   5611 	return 0;
   5612 }
   5613 
   5614 const struct sysctlnode *
   5615 ixgbe_sysctl_instance(struct adapter *adapter)
   5616 {
   5617 	const char *dvname;
   5618 	struct sysctllog **log;
   5619 	int rc;
   5620 	const struct sysctlnode *rnode;
   5621 
   5622 	log = &adapter->sysctllog;
   5623 	dvname = device_xname(adapter->dev);
   5624 
   5625 	if ((rc = sysctl_createv(log, 0, NULL, &rnode,
   5626 	    0, CTLTYPE_NODE, dvname,
   5627 	    SYSCTL_DESCR("ixgbe information and settings"),
   5628 	    NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL)) != 0)
   5629 		goto err;
   5630 
   5631 	return rnode;
   5632 err:
   5633 	printf("%s: sysctl_createv failed, rc = %d\n", __func__, rc);
   5634 	return NULL;
   5635 }
   5636 
   5637 /*
   5638  * Add sysctl variables, one per statistic, to the system.
   5639  */
   5640 static void
   5641 ixgbe_add_hw_stats(struct adapter *adapter)
   5642 {
   5643 	device_t dev = adapter->dev;
   5644 	const struct sysctlnode *rnode, *cnode;
   5645 	struct sysctllog **log = &adapter->sysctllog;
   5646 	struct tx_ring *txr = adapter->tx_rings;
   5647 	struct rx_ring *rxr = adapter->rx_rings;
   5648 	struct ixgbe_hw_stats *stats = &adapter->stats;
   5649 
   5650 	/* Driver Statistics */
   5651 #if 0
   5652 	/* These counters are not updated by the software */
   5653 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
   5654 			CTLFLAG_RD, &adapter->dropped_pkts,
   5655 			"Driver dropped packets");
   5656 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_header_failed",
   5657 			CTLFLAG_RD, &adapter->mbuf_header_failed,
   5658 			"???");
   5659 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_packet_failed",
   5660 			CTLFLAG_RD, &adapter->mbuf_packet_failed,
   5661 			"???");
   5662 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "no_tx_map_avail",
   5663 			CTLFLAG_RD, &adapter->no_tx_map_avail,
   5664 			"???");
   5665 #endif
   5666 	evcnt_attach_dynamic(&adapter->handleq, EVCNT_TYPE_MISC,
   5667 	    NULL, device_xname(dev), "Handled queue in softint");
   5668 	evcnt_attach_dynamic(&adapter->req, EVCNT_TYPE_MISC,
   5669 	    NULL, device_xname(dev), "Requeued in softint");
   5670 	evcnt_attach_dynamic(&adapter->morerx, EVCNT_TYPE_MISC,
   5671 	    NULL, device_xname(dev), "Interrupt handler more rx");
   5672 	evcnt_attach_dynamic(&adapter->moretx, EVCNT_TYPE_MISC,
   5673 	    NULL, device_xname(dev), "Interrupt handler more tx");
   5674 	evcnt_attach_dynamic(&adapter->txloops, EVCNT_TYPE_MISC,
   5675 	    NULL, device_xname(dev), "Interrupt handler tx loops");
   5676 	evcnt_attach_dynamic(&adapter->efbig_tx_dma_setup, EVCNT_TYPE_MISC,
   5677 	    NULL, device_xname(dev), "Driver tx dma soft fail EFBIG");
   5678 	evcnt_attach_dynamic(&adapter->m_defrag_failed, EVCNT_TYPE_MISC,
   5679 	    NULL, device_xname(dev), "m_defrag() failed");
   5680 	evcnt_attach_dynamic(&adapter->efbig2_tx_dma_setup, EVCNT_TYPE_MISC,
   5681 	    NULL, device_xname(dev), "Driver tx dma hard fail EFBIG");
   5682 	evcnt_attach_dynamic(&adapter->einval_tx_dma_setup, EVCNT_TYPE_MISC,
   5683 	    NULL, device_xname(dev), "Driver tx dma hard fail EINVAL");
   5684 	evcnt_attach_dynamic(&adapter->other_tx_dma_setup, EVCNT_TYPE_MISC,
   5685 	    NULL, device_xname(dev), "Driver tx dma hard fail other");
   5686 	evcnt_attach_dynamic(&adapter->eagain_tx_dma_setup, EVCNT_TYPE_MISC,
   5687 	    NULL, device_xname(dev), "Driver tx dma soft fail EAGAIN");
   5688 	evcnt_attach_dynamic(&adapter->enomem_tx_dma_setup, EVCNT_TYPE_MISC,
   5689 	    NULL, device_xname(dev), "Driver tx dma soft fail ENOMEM");
   5690 	evcnt_attach_dynamic(&adapter->watchdog_events, EVCNT_TYPE_MISC,
   5691 	    NULL, device_xname(dev), "Watchdog timeouts");
   5692 	evcnt_attach_dynamic(&adapter->tso_err, EVCNT_TYPE_MISC,
   5693 	    NULL, device_xname(dev), "TSO errors");
   5694 	evcnt_attach_dynamic(&adapter->link_irq, EVCNT_TYPE_MISC,
   5695 	    NULL, device_xname(dev), "Link MSIX IRQ Handled");
   5696 
   5697 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
   5698 		snprintf(adapter->queues[i].evnamebuf,
   5699 		    sizeof(adapter->queues[i].evnamebuf), "%s queue%d",
   5700 		    device_xname(dev), i);
   5701 		snprintf(adapter->queues[i].namebuf,
   5702 		    sizeof(adapter->queues[i].namebuf), "queue%d", i);
   5703 
   5704 		if ((rnode = ixgbe_sysctl_instance(adapter)) == NULL) {
   5705 			aprint_error_dev(dev, "could not create sysctl root\n");
   5706 			break;
   5707 		}
   5708 
   5709 		if (sysctl_createv(log, 0, &rnode, &rnode,
   5710 		    0, CTLTYPE_NODE,
   5711 		    adapter->queues[i].namebuf, SYSCTL_DESCR("Queue Name"),
   5712 		    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL) != 0)
   5713 			break;
   5714 
   5715 		if (sysctl_createv(log, 0, &rnode, &cnode,
   5716 		    CTLFLAG_READWRITE, CTLTYPE_INT,
   5717 		    "interrupt_rate", SYSCTL_DESCR("Interrupt Rate"),
   5718 		    ixgbe_sysctl_interrupt_rate_handler, 0,
   5719 		    (void *)&adapter->queues[i], 0, CTL_CREATE, CTL_EOL) != 0)
   5720 			break;
   5721 
   5722 		if (sysctl_createv(log, 0, &rnode, &cnode,
   5723 		    CTLFLAG_READONLY, CTLTYPE_QUAD,
   5724 		    "irqs", SYSCTL_DESCR("irqs on this queue"),
   5725 			NULL, 0, &(adapter->queues[i].irqs),
   5726 		    0, CTL_CREATE, CTL_EOL) != 0)
   5727 			break;
   5728 
   5729 		if (sysctl_createv(log, 0, &rnode, &cnode,
   5730 		    CTLFLAG_READONLY, CTLTYPE_INT,
   5731 		    "txd_head", SYSCTL_DESCR("Transmit Descriptor Head"),
   5732 		    ixgbe_sysctl_tdh_handler, 0, (void *)txr,
   5733 		    0, CTL_CREATE, CTL_EOL) != 0)
   5734 			break;
   5735 
   5736 		if (sysctl_createv(log, 0, &rnode, &cnode,
   5737 		    CTLFLAG_READONLY, CTLTYPE_INT,
   5738 		    "txd_tail", SYSCTL_DESCR("Transmit Descriptor Tail"),
   5739 		    ixgbe_sysctl_tdt_handler, 0, (void *)txr,
   5740 		    0, CTL_CREATE, CTL_EOL) != 0)
   5741 			break;
   5742 
   5743 		evcnt_attach_dynamic(&txr->tso_tx, EVCNT_TYPE_MISC,
   5744 		    NULL, device_xname(dev), "TSO");
   5745 		evcnt_attach_dynamic(&txr->no_desc_avail, EVCNT_TYPE_MISC,
   5746 		    NULL, adapter->queues[i].evnamebuf,
   5747 		    "Queue No Descriptor Available");
   5748 		evcnt_attach_dynamic(&txr->total_packets, EVCNT_TYPE_MISC,
   5749 		    NULL, adapter->queues[i].evnamebuf,
   5750 		    "Queue Packets Transmitted");
   5751 
   5752 #ifdef LRO
   5753 		struct lro_ctrl *lro = &rxr->lro;
   5754 #endif /* LRO */
   5755 
   5756 		if (sysctl_createv(log, 0, &rnode, &cnode,
   5757 		    CTLFLAG_READONLY,
   5758 		    CTLTYPE_INT,
   5759 		    "rxd_head", SYSCTL_DESCR("Receive Descriptor Head"),
   5760 		    ixgbe_sysctl_rdh_handler, 0, (void *)rxr, 0,
   5761 		    CTL_CREATE, CTL_EOL) != 0)
   5762 			break;
   5763 
   5764 		if (sysctl_createv(log, 0, &rnode, &cnode,
   5765 		    CTLFLAG_READONLY,
   5766 		    CTLTYPE_INT,
   5767 		    "rxd_tail", SYSCTL_DESCR("Receive Descriptor Tail"),
   5768 		    ixgbe_sysctl_rdt_handler, 0, (void *)rxr, 0,
   5769 		    CTL_CREATE, CTL_EOL) != 0)
   5770 			break;
   5771 
   5772 		if (i < __arraycount(adapter->stats.mpc)) {
   5773 			evcnt_attach_dynamic(&adapter->stats.mpc[i],
   5774 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   5775 			    "Missed Packet Count");
   5776 		}
   5777 		if (i < __arraycount(adapter->stats.pxontxc)) {
   5778 			evcnt_attach_dynamic(&adapter->stats.pxontxc[i],
   5779 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   5780 			    "pxontxc");
   5781 			evcnt_attach_dynamic(&adapter->stats.pxonrxc[i],
   5782 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   5783 			    "pxonrxc");
   5784 			evcnt_attach_dynamic(&adapter->stats.pxofftxc[i],
   5785 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   5786 			    "pxofftxc");
   5787 			evcnt_attach_dynamic(&adapter->stats.pxoffrxc[i],
   5788 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   5789 			    "pxoffrxc");
   5790 			evcnt_attach_dynamic(&adapter->stats.pxon2offc[i],
   5791 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   5792 			    "pxon2offc");
   5793 		}
   5794 		if (i < __arraycount(adapter->stats.qprc)) {
   5795 			evcnt_attach_dynamic(&adapter->stats.qprc[i],
   5796 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   5797 			    "qprc");
   5798 			evcnt_attach_dynamic(&adapter->stats.qptc[i],
   5799 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   5800 			    "qptc");
   5801 			evcnt_attach_dynamic(&adapter->stats.qbrc[i],
   5802 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   5803 			    "qbrc");
   5804 			evcnt_attach_dynamic(&adapter->stats.qbtc[i],
   5805 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   5806 			    "qbtc");
   5807 			evcnt_attach_dynamic(&adapter->stats.qprdc[i],
   5808 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   5809 			    "qprdc");
   5810 		}
   5811 
   5812 		evcnt_attach_dynamic(&rxr->rx_packets, EVCNT_TYPE_MISC,
   5813 		    NULL, adapter->queues[i].evnamebuf, "Queue Packets Received");
   5814 		evcnt_attach_dynamic(&rxr->rx_bytes, EVCNT_TYPE_MISC,
   5815 		    NULL, adapter->queues[i].evnamebuf, "Queue Bytes Received");
   5816 		evcnt_attach_dynamic(&rxr->rx_copies, EVCNT_TYPE_MISC,
   5817 		    NULL, adapter->queues[i].evnamebuf, "Copied RX Frames");
   5818 		evcnt_attach_dynamic(&rxr->no_jmbuf, EVCNT_TYPE_MISC,
   5819 		    NULL, adapter->queues[i].evnamebuf, "Rx no jumbo mbuf");
   5820 		evcnt_attach_dynamic(&rxr->rx_discarded, EVCNT_TYPE_MISC,
   5821 		    NULL, adapter->queues[i].evnamebuf, "Rx discarded");
   5822 		evcnt_attach_dynamic(&rxr->rx_irq, EVCNT_TYPE_MISC,
   5823 		    NULL, adapter->queues[i].evnamebuf, "Rx interrupts");
   5824 #ifdef LRO
   5825 		SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_queued",
   5826 				CTLFLAG_RD, &lro->lro_queued, 0,
   5827 				"LRO Queued");
   5828 		SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_flushed",
   5829 				CTLFLAG_RD, &lro->lro_flushed, 0,
   5830 				"LRO Flushed");
   5831 #endif /* LRO */
   5832 	}
   5833 
   5834 	/* MAC stats get the own sub node */
   5835 
   5836 
   5837 	snprintf(stats->namebuf,
   5838 	    sizeof(stats->namebuf), "%s MAC Statistics", device_xname(dev));
   5839 
   5840 	evcnt_attach_dynamic(&stats->ipcs, EVCNT_TYPE_MISC, NULL,
   5841 	    stats->namebuf, "rx csum offload - IP");
   5842 	evcnt_attach_dynamic(&stats->l4cs, EVCNT_TYPE_MISC, NULL,
   5843 	    stats->namebuf, "rx csum offload - L4");
   5844 	evcnt_attach_dynamic(&stats->ipcs_bad, EVCNT_TYPE_MISC, NULL,
   5845 	    stats->namebuf, "rx csum offload - IP bad");
   5846 	evcnt_attach_dynamic(&stats->l4cs_bad, EVCNT_TYPE_MISC, NULL,
   5847 	    stats->namebuf, "rx csum offload - L4 bad");
   5848 	evcnt_attach_dynamic(&stats->intzero, EVCNT_TYPE_MISC, NULL,
   5849 	    stats->namebuf, "Interrupt conditions zero");
   5850 	evcnt_attach_dynamic(&stats->legint, EVCNT_TYPE_MISC, NULL,
   5851 	    stats->namebuf, "Legacy interrupts");
   5852 	evcnt_attach_dynamic(&stats->crcerrs, EVCNT_TYPE_MISC, NULL,
   5853 	    stats->namebuf, "CRC Errors");
   5854 	evcnt_attach_dynamic(&stats->illerrc, EVCNT_TYPE_MISC, NULL,
   5855 	    stats->namebuf, "Illegal Byte Errors");
   5856 	evcnt_attach_dynamic(&stats->errbc, EVCNT_TYPE_MISC, NULL,
   5857 	    stats->namebuf, "Byte Errors");
   5858 	evcnt_attach_dynamic(&stats->mspdc, EVCNT_TYPE_MISC, NULL,
   5859 	    stats->namebuf, "MAC Short Packets Discarded");
   5860 	evcnt_attach_dynamic(&stats->mlfc, EVCNT_TYPE_MISC, NULL,
   5861 	    stats->namebuf, "MAC Local Faults");
   5862 	evcnt_attach_dynamic(&stats->mrfc, EVCNT_TYPE_MISC, NULL,
   5863 	    stats->namebuf, "MAC Remote Faults");
   5864 	evcnt_attach_dynamic(&stats->rlec, EVCNT_TYPE_MISC, NULL,
   5865 	    stats->namebuf, "Receive Length Errors");
   5866 	evcnt_attach_dynamic(&stats->lxontxc, EVCNT_TYPE_MISC, NULL,
   5867 	    stats->namebuf, "Link XON Transmitted");
   5868 	evcnt_attach_dynamic(&stats->lxonrxc, EVCNT_TYPE_MISC, NULL,
   5869 	    stats->namebuf, "Link XON Received");
   5870 	evcnt_attach_dynamic(&stats->lxofftxc, EVCNT_TYPE_MISC, NULL,
   5871 	    stats->namebuf, "Link XOFF Transmitted");
   5872 	evcnt_attach_dynamic(&stats->lxoffrxc, EVCNT_TYPE_MISC, NULL,
   5873 	    stats->namebuf, "Link XOFF Received");
   5874 
   5875 	/* Packet Reception Stats */
   5876 	evcnt_attach_dynamic(&stats->tor, EVCNT_TYPE_MISC, NULL,
   5877 	    stats->namebuf, "Total Octets Received");
   5878 	evcnt_attach_dynamic(&stats->gorc, EVCNT_TYPE_MISC, NULL,
   5879 	    stats->namebuf, "Good Octets Received");
   5880 	evcnt_attach_dynamic(&stats->tpr, EVCNT_TYPE_MISC, NULL,
   5881 	    stats->namebuf, "Total Packets Received");
   5882 	evcnt_attach_dynamic(&stats->gprc, EVCNT_TYPE_MISC, NULL,
   5883 	    stats->namebuf, "Good Packets Received");
   5884 	evcnt_attach_dynamic(&stats->mprc, EVCNT_TYPE_MISC, NULL,
   5885 	    stats->namebuf, "Multicast Packets Received");
   5886 	evcnt_attach_dynamic(&stats->bprc, EVCNT_TYPE_MISC, NULL,
   5887 	    stats->namebuf, "Broadcast Packets Received");
   5888 	evcnt_attach_dynamic(&stats->prc64, EVCNT_TYPE_MISC, NULL,
   5889 	    stats->namebuf, "64 byte frames received ");
   5890 	evcnt_attach_dynamic(&stats->prc127, EVCNT_TYPE_MISC, NULL,
   5891 	    stats->namebuf, "65-127 byte frames received");
   5892 	evcnt_attach_dynamic(&stats->prc255, EVCNT_TYPE_MISC, NULL,
   5893 	    stats->namebuf, "128-255 byte frames received");
   5894 	evcnt_attach_dynamic(&stats->prc511, EVCNT_TYPE_MISC, NULL,
   5895 	    stats->namebuf, "256-511 byte frames received");
   5896 	evcnt_attach_dynamic(&stats->prc1023, EVCNT_TYPE_MISC, NULL,
   5897 	    stats->namebuf, "512-1023 byte frames received");
   5898 	evcnt_attach_dynamic(&stats->prc1522, EVCNT_TYPE_MISC, NULL,
   5899 	    stats->namebuf, "1023-1522 byte frames received");
   5900 	evcnt_attach_dynamic(&stats->ruc, EVCNT_TYPE_MISC, NULL,
   5901 	    stats->namebuf, "Receive Undersized");
   5902 	evcnt_attach_dynamic(&stats->rfc, EVCNT_TYPE_MISC, NULL,
   5903 	    stats->namebuf, "Fragmented Packets Received ");
   5904 	evcnt_attach_dynamic(&stats->roc, EVCNT_TYPE_MISC, NULL,
   5905 	    stats->namebuf, "Oversized Packets Received");
   5906 	evcnt_attach_dynamic(&stats->rjc, EVCNT_TYPE_MISC, NULL,
   5907 	    stats->namebuf, "Received Jabber");
   5908 	evcnt_attach_dynamic(&stats->mngprc, EVCNT_TYPE_MISC, NULL,
   5909 	    stats->namebuf, "Management Packets Received");
   5910 	evcnt_attach_dynamic(&stats->xec, EVCNT_TYPE_MISC, NULL,
   5911 	    stats->namebuf, "Checksum Errors");
   5912 
   5913 	/* Packet Transmission Stats */
   5914 	evcnt_attach_dynamic(&stats->gotc, EVCNT_TYPE_MISC, NULL,
   5915 	    stats->namebuf, "Good Octets Transmitted");
   5916 	evcnt_attach_dynamic(&stats->tpt, EVCNT_TYPE_MISC, NULL,
   5917 	    stats->namebuf, "Total Packets Transmitted");
   5918 	evcnt_attach_dynamic(&stats->gptc, EVCNT_TYPE_MISC, NULL,
   5919 	    stats->namebuf, "Good Packets Transmitted");
   5920 	evcnt_attach_dynamic(&stats->bptc, EVCNT_TYPE_MISC, NULL,
   5921 	    stats->namebuf, "Broadcast Packets Transmitted");
   5922 	evcnt_attach_dynamic(&stats->mptc, EVCNT_TYPE_MISC, NULL,
   5923 	    stats->namebuf, "Multicast Packets Transmitted");
   5924 	evcnt_attach_dynamic(&stats->mngptc, EVCNT_TYPE_MISC, NULL,
   5925 	    stats->namebuf, "Management Packets Transmitted");
   5926 	evcnt_attach_dynamic(&stats->ptc64, EVCNT_TYPE_MISC, NULL,
   5927 	    stats->namebuf, "64 byte frames transmitted ");
   5928 	evcnt_attach_dynamic(&stats->ptc127, EVCNT_TYPE_MISC, NULL,
   5929 	    stats->namebuf, "65-127 byte frames transmitted");
   5930 	evcnt_attach_dynamic(&stats->ptc255, EVCNT_TYPE_MISC, NULL,
   5931 	    stats->namebuf, "128-255 byte frames transmitted");
   5932 	evcnt_attach_dynamic(&stats->ptc511, EVCNT_TYPE_MISC, NULL,
   5933 	    stats->namebuf, "256-511 byte frames transmitted");
   5934 	evcnt_attach_dynamic(&stats->ptc1023, EVCNT_TYPE_MISC, NULL,
   5935 	    stats->namebuf, "512-1023 byte frames transmitted");
   5936 	evcnt_attach_dynamic(&stats->ptc1522, EVCNT_TYPE_MISC, NULL,
   5937 	    stats->namebuf, "1024-1522 byte frames transmitted");
   5938 }
   5939 
   5940 /*
   5941 ** Set flow control using sysctl:
   5942 ** Flow control values:
   5943 ** 	0 - off
   5944 **	1 - rx pause
   5945 **	2 - tx pause
   5946 **	3 - full
   5947 */
   5948 static int
   5949 ixgbe_set_flowcntl(SYSCTLFN_ARGS)
   5950 {
   5951 	struct sysctlnode node;
   5952 	int error, last;
   5953 	struct adapter *adapter;
   5954 
   5955 	node = *rnode;
   5956 	adapter = (struct adapter *)node.sysctl_data;
   5957 	node.sysctl_data = &adapter->fc;
   5958 	last = adapter->fc;
   5959 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   5960 	if (error != 0 || newp == NULL)
   5961 		return error;
   5962 
   5963 	/* Don't bother if it's not changed */
   5964 	if (adapter->fc == last)
   5965 		return (0);
   5966 
   5967 	switch (adapter->fc) {
   5968 		case ixgbe_fc_rx_pause:
   5969 		case ixgbe_fc_tx_pause:
   5970 		case ixgbe_fc_full:
   5971 			adapter->hw.fc.requested_mode = adapter->fc;
   5972 			if (adapter->num_queues > 1)
   5973 				ixgbe_disable_rx_drop(adapter);
   5974 			break;
   5975 		case ixgbe_fc_none:
   5976 			adapter->hw.fc.requested_mode = ixgbe_fc_none;
   5977 			if (adapter->num_queues > 1)
   5978 				ixgbe_enable_rx_drop(adapter);
   5979 			break;
   5980 		default:
   5981 			adapter->fc = last;
   5982 			return (EINVAL);
   5983 	}
   5984 	/* Don't autoneg if forcing a value */
   5985 	adapter->hw.fc.disable_fc_autoneg = TRUE;
   5986 	ixgbe_fc_enable(&adapter->hw);
   5987 	return 0;
   5988 }
   5989 
   5990 /*
   5991 ** Control link advertise speed:
   5992 **	1 - advertise only 1G
   5993 **	2 - advertise 100Mb
   5994 **	3 - advertise normal
   5995 */
   5996 static int
   5997 ixgbe_set_advertise(SYSCTLFN_ARGS)
   5998 {
   5999 	struct sysctlnode	node;
   6000 	int			t, error = 0;
   6001 	struct adapter		*adapter;
   6002 	device_t		dev;
   6003 	struct ixgbe_hw		*hw;
   6004 	ixgbe_link_speed	speed, last;
   6005 
   6006 	node = *rnode;
   6007 	adapter = (struct adapter *)node.sysctl_data;
   6008 	dev = adapter->dev;
   6009 	hw = &adapter->hw;
   6010 	last = adapter->advertise;
   6011 	t = adapter->advertise;
   6012 	node.sysctl_data = &t;
   6013 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   6014 	if (error != 0 || newp == NULL)
   6015 		return error;
   6016 
   6017 	if (adapter->advertise == last) /* no change */
   6018 		return (0);
   6019 
   6020 	if (t == -1)
   6021 		return 0;
   6022 
   6023 	adapter->advertise = t;
   6024 
   6025 	if (!((hw->phy.media_type == ixgbe_media_type_copper) ||
   6026             (hw->phy.multispeed_fiber)))
   6027 		return (EINVAL);
   6028 
   6029 	if ((adapter->advertise == 2) && (hw->mac.type != ixgbe_mac_X540)) {
   6030 		device_printf(dev, "Set Advertise: 100Mb on X540 only\n");
   6031 		return (EINVAL);
   6032 	}
   6033 
   6034 	if (adapter->advertise == 1)
   6035                 speed = IXGBE_LINK_SPEED_1GB_FULL;
   6036 	else if (adapter->advertise == 2)
   6037                 speed = IXGBE_LINK_SPEED_100_FULL;
   6038 	else if (adapter->advertise == 3)
   6039                 speed = IXGBE_LINK_SPEED_1GB_FULL |
   6040 			IXGBE_LINK_SPEED_10GB_FULL;
   6041 	else {/* bogus value */
   6042 		adapter->advertise = last;
   6043 		return (EINVAL);
   6044 	}
   6045 
   6046 	hw->mac.autotry_restart = TRUE;
   6047 	hw->mac.ops.setup_link(hw, speed, TRUE);
   6048 
   6049 	return 0;
   6050 }
   6051 
   6052 /*
   6053 ** Thermal Shutdown Trigger
   6054 **   - cause a Thermal Overtemp IRQ
   6055 */
   6056 static int
   6057 ixgbe_set_thermal_test(SYSCTLFN_ARGS)
   6058 {
   6059 	struct sysctlnode node;
   6060 	int		error, fire = 0;
   6061 	struct adapter	*adapter;
   6062 	struct ixgbe_hw *hw;
   6063 
   6064 	node = *rnode;
   6065 	adapter = (struct adapter *)node.sysctl_data;
   6066 	hw = &adapter->hw;
   6067 
   6068 	if (hw->mac.type != ixgbe_mac_X540)
   6069 		return (0);
   6070 
   6071 	node.sysctl_data = &fire;
   6072 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   6073 	if ((error) || (newp == NULL))
   6074 		return (error);
   6075 
   6076 	if (fire) {
   6077 		u32 reg = IXGBE_READ_REG(hw, IXGBE_EICS);
   6078 		reg |= IXGBE_EICR_TS;
   6079 		IXGBE_WRITE_REG(hw, IXGBE_EICS, reg);
   6080 	}
   6081 
   6082 	return (0);
   6083 }
   6084 
   6085 /*
   6086 ** Enable the hardware to drop packets when the buffer is
   6087 ** full. This is useful when multiqueue,so that no single
   6088 ** queue being full stalls the entire RX engine. We only
   6089 ** enable this when Multiqueue AND when Flow Control is
   6090 ** disabled.
   6091 */
   6092 static void
   6093 ixgbe_enable_rx_drop(struct adapter *adapter)
   6094 {
   6095         struct ixgbe_hw *hw = &adapter->hw;
   6096 
   6097 	for (int i = 0; i < adapter->num_queues; i++) {
   6098         	u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
   6099         	srrctl |= IXGBE_SRRCTL_DROP_EN;
   6100         	IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
   6101 	}
   6102 }
   6103 
   6104 static void
   6105 ixgbe_disable_rx_drop(struct adapter *adapter)
   6106 {
   6107         struct ixgbe_hw *hw = &adapter->hw;
   6108 
   6109 	for (int i = 0; i < adapter->num_queues; i++) {
   6110         	u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
   6111         	srrctl &= ~IXGBE_SRRCTL_DROP_EN;
   6112         	IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
   6113 	}
   6114 }
   6115