Home | History | Annotate | Line # | Download | only in ixgbe
ixgbe.c revision 1.14.4.2
      1 /******************************************************************************
      2 
      3   Copyright (c) 2001-2013, Intel Corporation
      4   All rights reserved.
      5 
      6   Redistribution and use in source and binary forms, with or without
      7   modification, are permitted provided that the following conditions are met:
      8 
      9    1. Redistributions of source code must retain the above copyright notice,
     10       this list of conditions and the following disclaimer.
     11 
     12    2. Redistributions in binary form must reproduce the above copyright
     13       notice, this list of conditions and the following disclaimer in the
     14       documentation and/or other materials provided with the distribution.
     15 
     16    3. Neither the name of the Intel Corporation nor the names of its
     17       contributors may be used to endorse or promote products derived from
     18       this software without specific prior written permission.
     19 
     20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     30   POSSIBILITY OF SUCH DAMAGE.
     31 
     32 ******************************************************************************/
     33 /*
     34  * Copyright (c) 2011 The NetBSD Foundation, Inc.
     35  * All rights reserved.
     36  *
     37  * This code is derived from software contributed to The NetBSD Foundation
     38  * by Coyote Point Systems, Inc.
     39  *
     40  * Redistribution and use in source and binary forms, with or without
     41  * modification, are permitted provided that the following conditions
     42  * are met:
     43  * 1. Redistributions of source code must retain the above copyright
     44  *    notice, this list of conditions and the following disclaimer.
     45  * 2. Redistributions in binary form must reproduce the above copyright
     46  *    notice, this list of conditions and the following disclaimer in the
     47  *    documentation and/or other materials provided with the distribution.
     48  *
     49  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     50  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     51  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     52  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     53  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     54  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     55  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     56  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     57  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     58  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     59  * POSSIBILITY OF SUCH DAMAGE.
     60  */
     61 /*$FreeBSD: head/sys/dev/ixgbe/ixgbe.c 250108 2013-04-30 16:18:29Z luigi $*/
     62 /*$NetBSD: ixgbe.c,v 1.14.4.2 2015/06/06 14:40:12 skrll Exp $*/
     63 
     64 #include "opt_inet.h"
     65 #include "opt_inet6.h"
     66 
     67 #include "ixgbe.h"
     68 #include "vlan.h"
     69 
     70 /*********************************************************************
     71  *  Set this to one to display debug statistics
     72  *********************************************************************/
     73 int             ixgbe_display_debug_stats = 0;
     74 
     75 /*********************************************************************
     76  *  Driver version
     77  *********************************************************************/
     78 char ixgbe_driver_version[] = "2.5.8 - HEAD";
     79 
     80 /*********************************************************************
     81  *  PCI Device ID Table
     82  *
     83  *  Used by probe to select devices to load on
     84  *  Last field stores an index into ixgbe_strings
     85  *  Last entry must be all 0s
     86  *
     87  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
     88  *********************************************************************/
     89 
     90 static ixgbe_vendor_info_t ixgbe_vendor_info_array[] =
     91 {
     92 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, 0, 0, 0},
     93 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, 0, 0, 0},
     94 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, 0, 0, 0},
     95 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, 0, 0, 0},
     96 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2, 0, 0, 0},
     97 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, 0, 0, 0},
     98 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, 0, 0, 0},
     99 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, 0, 0, 0},
    100 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, 0, 0, 0},
    101 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, 0, 0, 0},
    102 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, 0, 0, 0},
    103 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, 0, 0, 0},
    104 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ, 0, 0, 0},
    105 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, 0, 0, 0},
    106 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, 0, 0, 0},
    107 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, 0, 0, 0},
    108 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, 0, 0, 0},
    109 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, 0, 0, 0},
    110 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE, 0, 0, 0},
    111 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2, 0, 0, 0},
    112 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, 0, 0, 0},
    113 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, 0, 0, 0},
    114 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF_QP, 0, 0, 0},
    115 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, 0, 0, 0},
    116 	/* required last entry */
    117 	{0, 0, 0, 0, 0}
    118 };
    119 
    120 /*********************************************************************
    121  *  Table of branding strings
    122  *********************************************************************/
    123 
    124 static const char    *ixgbe_strings[] = {
    125 	"Intel(R) PRO/10GbE PCI-Express Network Driver"
    126 };
    127 
    128 /*********************************************************************
    129  *  Function prototypes
    130  *********************************************************************/
    131 static int      ixgbe_probe(device_t, cfdata_t, void *);
    132 static void     ixgbe_attach(device_t, device_t, void *);
    133 static int      ixgbe_detach(device_t, int);
    134 #if 0
    135 static int      ixgbe_shutdown(device_t);
    136 #endif
    137 #if IXGBE_LEGACY_TX
    138 static void     ixgbe_start(struct ifnet *);
    139 static void     ixgbe_start_locked(struct tx_ring *, struct ifnet *);
    140 #else
    141 static int	ixgbe_mq_start(struct ifnet *, struct mbuf *);
    142 static int	ixgbe_mq_start_locked(struct ifnet *,
    143                     struct tx_ring *, struct mbuf *);
    144 static void	ixgbe_qflush(struct ifnet *);
    145 static void	ixgbe_deferred_mq_start(void *);
    146 #endif
    147 static int      ixgbe_ioctl(struct ifnet *, u_long, void *);
    148 static void	ixgbe_ifstop(struct ifnet *, int);
    149 static int	ixgbe_init(struct ifnet *);
    150 static void	ixgbe_init_locked(struct adapter *);
    151 static void     ixgbe_stop(void *);
    152 static void     ixgbe_media_status(struct ifnet *, struct ifmediareq *);
    153 static int      ixgbe_media_change(struct ifnet *);
    154 static void     ixgbe_identify_hardware(struct adapter *);
    155 static int      ixgbe_allocate_pci_resources(struct adapter *,
    156 		    const struct pci_attach_args *);
    157 static int      ixgbe_allocate_msix(struct adapter *,
    158 		    const struct pci_attach_args *);
    159 static int      ixgbe_allocate_legacy(struct adapter *,
    160 		    const struct pci_attach_args *);
    161 static int	ixgbe_allocate_queues(struct adapter *);
    162 static int	ixgbe_setup_msix(struct adapter *);
    163 static void	ixgbe_free_pci_resources(struct adapter *);
    164 static void	ixgbe_local_timer(void *);
    165 static int	ixgbe_setup_interface(device_t, struct adapter *);
    166 static void	ixgbe_config_link(struct adapter *);
    167 
    168 static int      ixgbe_allocate_transmit_buffers(struct tx_ring *);
    169 static int	ixgbe_setup_transmit_structures(struct adapter *);
    170 static void	ixgbe_setup_transmit_ring(struct tx_ring *);
    171 static void     ixgbe_initialize_transmit_units(struct adapter *);
    172 static void     ixgbe_free_transmit_structures(struct adapter *);
    173 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
    174 
    175 static int      ixgbe_allocate_receive_buffers(struct rx_ring *);
    176 static int      ixgbe_setup_receive_structures(struct adapter *);
    177 static int	ixgbe_setup_receive_ring(struct rx_ring *);
    178 static void     ixgbe_initialize_receive_units(struct adapter *);
    179 static void     ixgbe_free_receive_structures(struct adapter *);
    180 static void     ixgbe_free_receive_buffers(struct rx_ring *);
    181 static void	ixgbe_setup_hw_rsc(struct rx_ring *);
    182 
    183 static void     ixgbe_enable_intr(struct adapter *);
    184 static void     ixgbe_disable_intr(struct adapter *);
    185 static void     ixgbe_update_stats_counters(struct adapter *);
    186 static bool	ixgbe_txeof(struct tx_ring *);
    187 static bool	ixgbe_rxeof(struct ix_queue *);
    188 static void	ixgbe_rx_checksum(u32, struct mbuf *, u32,
    189 		    struct ixgbe_hw_stats *);
    190 static void     ixgbe_set_promisc(struct adapter *);
    191 static void     ixgbe_set_multi(struct adapter *);
    192 static void     ixgbe_update_link_status(struct adapter *);
    193 static void	ixgbe_refresh_mbufs(struct rx_ring *, int);
    194 static int      ixgbe_xmit(struct tx_ring *, struct mbuf *);
    195 static int	ixgbe_set_flowcntl(SYSCTLFN_PROTO);
    196 static int	ixgbe_set_advertise(SYSCTLFN_PROTO);
    197 static int	ixgbe_set_thermal_test(SYSCTLFN_PROTO);
    198 static int	ixgbe_dma_malloc(struct adapter *, bus_size_t,
    199 		    struct ixgbe_dma_alloc *, int);
    200 static void     ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
    201 static int	ixgbe_tx_ctx_setup(struct tx_ring *,
    202 		    struct mbuf *, u32 *, u32 *);
    203 static int	ixgbe_tso_setup(struct tx_ring *,
    204 		    struct mbuf *, u32 *, u32 *);
    205 static void	ixgbe_set_ivar(struct adapter *, u8, u8, s8);
    206 static void	ixgbe_configure_ivars(struct adapter *);
    207 static u8 *	ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *);
    208 
    209 static void	ixgbe_setup_vlan_hw_support(struct adapter *);
    210 #if 0
    211 static void	ixgbe_register_vlan(void *, struct ifnet *, u16);
    212 static void	ixgbe_unregister_vlan(void *, struct ifnet *, u16);
    213 #endif
    214 
    215 static void     ixgbe_add_hw_stats(struct adapter *adapter);
    216 
    217 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
    218 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
    219 		    struct mbuf *, u32);
    220 
    221 static void	ixgbe_enable_rx_drop(struct adapter *);
    222 static void	ixgbe_disable_rx_drop(struct adapter *);
    223 
    224 /* Support for pluggable optic modules */
    225 static bool	ixgbe_sfp_probe(struct adapter *);
    226 static void	ixgbe_setup_optics(struct adapter *);
    227 
    228 /* Legacy (single vector interrupt handler */
    229 static int	ixgbe_legacy_irq(void *);
    230 
    231 #if defined(NETBSD_MSI_OR_MSIX)
    232 /* The MSI/X Interrupt handlers */
    233 static void	ixgbe_msix_que(void *);
    234 static void	ixgbe_msix_link(void *);
    235 #endif
    236 
    237 /* Software interrupts for deferred work */
    238 static void	ixgbe_handle_que(void *);
    239 static void	ixgbe_handle_link(void *);
    240 static void	ixgbe_handle_msf(void *);
    241 static void	ixgbe_handle_mod(void *);
    242 
    243 const struct sysctlnode *ixgbe_sysctl_instance(struct adapter *);
    244 static ixgbe_vendor_info_t *ixgbe_lookup(const struct pci_attach_args *);
    245 
    246 #ifdef IXGBE_FDIR
    247 static void	ixgbe_atr(struct tx_ring *, struct mbuf *);
    248 static void	ixgbe_reinit_fdir(void *, int);
    249 #endif
    250 
    251 /*********************************************************************
    252  *  FreeBSD Device Interface Entry Points
    253  *********************************************************************/
    254 
    255 CFATTACH_DECL3_NEW(ixg, sizeof(struct adapter),
    256     ixgbe_probe, ixgbe_attach, ixgbe_detach, NULL, NULL, NULL,
    257     DVF_DETACH_SHUTDOWN);
    258 
    259 #if 0
    260 devclass_t ixgbe_devclass;
    261 DRIVER_MODULE(ixgbe, pci, ixgbe_driver, ixgbe_devclass, 0, 0);
    262 
    263 MODULE_DEPEND(ixgbe, pci, 1, 1, 1);
    264 MODULE_DEPEND(ixgbe, ether, 1, 1, 1);
    265 #endif
    266 
    267 /*
    268 ** TUNEABLE PARAMETERS:
    269 */
    270 
    271 /*
    272 ** AIM: Adaptive Interrupt Moderation
    273 ** which means that the interrupt rate
    274 ** is varied over time based on the
    275 ** traffic for that interrupt vector
    276 */
    277 static int ixgbe_enable_aim = TRUE;
    278 #define TUNABLE_INT(__x, __y)
    279 TUNABLE_INT("hw.ixgbe.enable_aim", &ixgbe_enable_aim);
    280 
    281 static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY);
    282 TUNABLE_INT("hw.ixgbe.max_interrupt_rate", &ixgbe_max_interrupt_rate);
    283 
    284 /* How many packets rxeof tries to clean at a time */
    285 static int ixgbe_rx_process_limit = 256;
    286 TUNABLE_INT("hw.ixgbe.rx_process_limit", &ixgbe_rx_process_limit);
    287 
    288 /* How many packets txeof tries to clean at a time */
    289 static int ixgbe_tx_process_limit = 256;
    290 TUNABLE_INT("hw.ixgbe.tx_process_limit", &ixgbe_tx_process_limit);
    291 
    292 /*
    293 ** Smart speed setting, default to on
    294 ** this only works as a compile option
    295 ** right now as its during attach, set
    296 ** this to 'ixgbe_smart_speed_off' to
    297 ** disable.
    298 */
    299 static int ixgbe_smart_speed = ixgbe_smart_speed_on;
    300 
    301 /*
    302  * MSIX should be the default for best performance,
    303  * but this allows it to be forced off for testing.
    304  */
    305 static int ixgbe_enable_msix = 1;
    306 TUNABLE_INT("hw.ixgbe.enable_msix", &ixgbe_enable_msix);
    307 
    308 #if defined(NETBSD_MSI_OR_MSIX)
    309 /*
    310  * Number of Queues, can be set to 0,
    311  * it then autoconfigures based on the
    312  * number of cpus with a max of 8. This
    313  * can be overriden manually here.
    314  */
    315 static int ixgbe_num_queues = 0;
    316 TUNABLE_INT("hw.ixgbe.num_queues", &ixgbe_num_queues);
    317 #endif
    318 
    319 /*
    320 ** Number of TX descriptors per ring,
    321 ** setting higher than RX as this seems
    322 ** the better performing choice.
    323 */
    324 static int ixgbe_txd = PERFORM_TXD;
    325 TUNABLE_INT("hw.ixgbe.txd", &ixgbe_txd);
    326 
    327 /* Number of RX descriptors per ring */
    328 static int ixgbe_rxd = PERFORM_RXD;
    329 TUNABLE_INT("hw.ixgbe.rxd", &ixgbe_rxd);
    330 
    331 /*
    332 ** HW RSC control:
    333 **  this feature only works with
    334 **  IPv4, and only on 82599 and later.
    335 **  Also this will cause IP forwarding to
    336 **  fail and that can't be controlled by
    337 **  the stack as LRO can. For all these
    338 **  reasons I've deemed it best to leave
    339 **  this off and not bother with a tuneable
    340 **  interface, this would need to be compiled
    341 **  to enable.
    342 */
    343 static bool ixgbe_rsc_enable = FALSE;
    344 
    345 /* Keep running tab on them for sanity check */
    346 static int ixgbe_total_ports;
    347 
    348 #ifdef IXGBE_FDIR
    349 /*
    350 ** For Flow Director: this is the
    351 ** number of TX packets we sample
    352 ** for the filter pool, this means
    353 ** every 20th packet will be probed.
    354 **
    355 ** This feature can be disabled by
    356 ** setting this to 0.
    357 */
    358 static int atr_sample_rate = 20;
    359 /*
    360 ** Flow Director actually 'steals'
    361 ** part of the packet buffer as its
    362 ** filter pool, this variable controls
    363 ** how much it uses:
    364 **  0 = 64K, 1 = 128K, 2 = 256K
    365 */
    366 static int fdir_pballoc = 1;
    367 #endif
    368 
    369 #ifdef DEV_NETMAP
    370 /*
    371  * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
    372  * be a reference on how to implement netmap support in a driver.
    373  * Additional comments are in ixgbe_netmap.h .
    374  *
    375  * <dev/netmap/ixgbe_netmap.h> contains functions for netmap support
    376  * that extend the standard driver.
    377  */
    378 #include <dev/netmap/ixgbe_netmap.h>
    379 #endif /* DEV_NETMAP */
    380 
    381 /*********************************************************************
    382  *  Device identification routine
    383  *
    384  *  ixgbe_probe determines if the driver should be loaded on
    385  *  adapter based on PCI vendor/device id of the adapter.
    386  *
    387  *  return 1 on success, 0 on failure
    388  *********************************************************************/
    389 
    390 static int
    391 ixgbe_probe(device_t dev, cfdata_t cf, void *aux)
    392 {
    393 	const struct pci_attach_args *pa = aux;
    394 
    395 	return (ixgbe_lookup(pa) != NULL) ? 1 : 0;
    396 }
    397 
    398 static ixgbe_vendor_info_t *
    399 ixgbe_lookup(const struct pci_attach_args *pa)
    400 {
    401 	pcireg_t subid;
    402 	ixgbe_vendor_info_t *ent;
    403 
    404 	INIT_DEBUGOUT("ixgbe_probe: begin");
    405 
    406 	if (PCI_VENDOR(pa->pa_id) != IXGBE_INTEL_VENDOR_ID)
    407 		return NULL;
    408 
    409 	subid = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_SUBSYS_ID_REG);
    410 
    411 	for (ent = ixgbe_vendor_info_array; ent->vendor_id != 0; ent++) {
    412 		if (PCI_VENDOR(pa->pa_id) == ent->vendor_id &&
    413 		    PCI_PRODUCT(pa->pa_id) == ent->device_id &&
    414 
    415 		    (PCI_SUBSYS_VENDOR(subid) == ent->subvendor_id ||
    416 		     ent->subvendor_id == 0) &&
    417 
    418 		    (PCI_SUBSYS_ID(subid) == ent->subdevice_id ||
    419 		     ent->subdevice_id == 0)) {
    420 			++ixgbe_total_ports;
    421 			return ent;
    422 		}
    423 	}
    424 	return NULL;
    425 }
    426 
    427 
    428 static void
    429 ixgbe_sysctl_attach(struct adapter *adapter)
    430 {
    431 	struct sysctllog **log;
    432 	const struct sysctlnode *rnode, *cnode;
    433 	device_t dev;
    434 
    435 	dev = adapter->dev;
    436 	log = &adapter->sysctllog;
    437 
    438 	if ((rnode = ixgbe_sysctl_instance(adapter)) == NULL) {
    439 		aprint_error_dev(dev, "could not create sysctl root\n");
    440 		return;
    441 	}
    442 
    443 	if (sysctl_createv(log, 0, &rnode, &cnode,
    444 	    CTLFLAG_READONLY, CTLTYPE_INT,
    445 	    "num_rx_desc", SYSCTL_DESCR("Number of rx descriptors"),
    446 	    NULL, 0, &adapter->num_rx_desc, 0, CTL_CREATE, CTL_EOL) != 0)
    447 		aprint_error_dev(dev, "could not create sysctl\n");
    448 
    449 	if (sysctl_createv(log, 0, &rnode, &cnode,
    450 	    CTLFLAG_READONLY, CTLTYPE_INT,
    451 	    "num_queues", SYSCTL_DESCR("Number of queues"),
    452 	    NULL, 0, &adapter->num_queues, 0, CTL_CREATE, CTL_EOL) != 0)
    453 		aprint_error_dev(dev, "could not create sysctl\n");
    454 
    455 	if (sysctl_createv(log, 0, &rnode, &cnode,
    456 	    CTLFLAG_READWRITE, CTLTYPE_INT,
    457 	    "fc", SYSCTL_DESCR("Flow Control"),
    458 	    ixgbe_set_flowcntl, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0)
    459 		aprint_error_dev(dev, "could not create sysctl\n");
    460 
    461 	/* XXX This is an *instance* sysctl controlling a *global* variable.
    462 	 * XXX It's that way in the FreeBSD driver that this derives from.
    463 	 */
    464 	if (sysctl_createv(log, 0, &rnode, &cnode,
    465 	    CTLFLAG_READWRITE, CTLTYPE_INT,
    466 	    "enable_aim", SYSCTL_DESCR("Interrupt Moderation"),
    467 	    NULL, 0, &ixgbe_enable_aim, 0, CTL_CREATE, CTL_EOL) != 0)
    468 		aprint_error_dev(dev, "could not create sysctl\n");
    469 
    470 	if (sysctl_createv(log, 0, &rnode, &cnode,
    471 	    CTLFLAG_READWRITE, CTLTYPE_INT,
    472 	    "advertise_speed", SYSCTL_DESCR("Link Speed"),
    473 	    ixgbe_set_advertise, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0)
    474 		aprint_error_dev(dev, "could not create sysctl\n");
    475 
    476 	if (sysctl_createv(log, 0, &rnode, &cnode,
    477 	    CTLFLAG_READWRITE, CTLTYPE_INT,
    478 	    "ts", SYSCTL_DESCR("Thermal Test"),
    479 	    ixgbe_set_thermal_test, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0)
    480 		aprint_error_dev(dev, "could not create sysctl\n");
    481 }
    482 
    483 /*********************************************************************
    484  *  Device initialization routine
    485  *
    486  *  The attach entry point is called when the driver is being loaded.
    487  *  This routine identifies the type of hardware, allocates all resources
    488  *  and initializes the hardware.
    489  *
    490  *  return 0 on success, positive on failure
    491  *********************************************************************/
    492 
    493 static void
    494 ixgbe_attach(device_t parent, device_t dev, void *aux)
    495 {
    496 	struct adapter *adapter;
    497 	struct ixgbe_hw *hw;
    498 	int             error = 0;
    499 	u16		csum;
    500 	u32		ctrl_ext;
    501 	ixgbe_vendor_info_t *ent;
    502 	const struct pci_attach_args *pa = aux;
    503 
    504 	INIT_DEBUGOUT("ixgbe_attach: begin");
    505 
    506 	/* Allocate, clear, and link in our adapter structure */
    507 	adapter = device_private(dev);
    508 	adapter->dev = adapter->osdep.dev = dev;
    509 	hw = &adapter->hw;
    510 	adapter->osdep.pc = pa->pa_pc;
    511 	adapter->osdep.tag = pa->pa_tag;
    512 	adapter->osdep.dmat = pa->pa_dmat;
    513 
    514 	ent = ixgbe_lookup(pa);
    515 
    516 	KASSERT(ent != NULL);
    517 
    518 	aprint_normal(": %s, Version - %s\n",
    519 	    ixgbe_strings[ent->index], ixgbe_driver_version);
    520 
    521 	/* Core Lock Init*/
    522 	IXGBE_CORE_LOCK_INIT(adapter, device_xname(dev));
    523 
    524 	/* SYSCTL APIs */
    525 
    526 	ixgbe_sysctl_attach(adapter);
    527 
    528 	/* Set up the timer callout */
    529 	callout_init(&adapter->timer, 0);
    530 
    531 	/* Determine hardware revision */
    532 	ixgbe_identify_hardware(adapter);
    533 
    534 	/* Do base PCI setup - map BAR0 */
    535 	if (ixgbe_allocate_pci_resources(adapter, pa)) {
    536 		aprint_error_dev(dev, "Allocation of PCI resources failed\n");
    537 		error = ENXIO;
    538 		goto err_out;
    539 	}
    540 
    541 	/* Do descriptor calc and sanity checks */
    542 	if (((ixgbe_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 ||
    543 	    ixgbe_txd < MIN_TXD || ixgbe_txd > MAX_TXD) {
    544 		aprint_error_dev(dev, "TXD config issue, using default!\n");
    545 		adapter->num_tx_desc = DEFAULT_TXD;
    546 	} else
    547 		adapter->num_tx_desc = ixgbe_txd;
    548 
    549 	/*
    550 	** With many RX rings it is easy to exceed the
    551 	** system mbuf allocation. Tuning nmbclusters
    552 	** can alleviate this.
    553 	*/
    554 	if (nmbclusters > 0 ) {
    555 		int s;
    556 		s = (ixgbe_rxd * adapter->num_queues) * ixgbe_total_ports;
    557 		if (s > nmbclusters) {
    558 			aprint_error_dev(dev, "RX Descriptors exceed "
    559 			    "system mbuf max, using default instead!\n");
    560 			ixgbe_rxd = DEFAULT_RXD;
    561 		}
    562 	}
    563 
    564 	if (((ixgbe_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 ||
    565 	    ixgbe_rxd < MIN_TXD || ixgbe_rxd > MAX_TXD) {
    566 		aprint_error_dev(dev, "RXD config issue, using default!\n");
    567 		adapter->num_rx_desc = DEFAULT_RXD;
    568 	} else
    569 		adapter->num_rx_desc = ixgbe_rxd;
    570 
    571 	/* Allocate our TX/RX Queues */
    572 	if (ixgbe_allocate_queues(adapter)) {
    573 		error = ENOMEM;
    574 		goto err_out;
    575 	}
    576 
    577 	/* Allocate multicast array memory. */
    578 	adapter->mta = malloc(sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
    579 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
    580 	if (adapter->mta == NULL) {
    581 		aprint_error_dev(dev, "Cannot allocate multicast setup array\n");
    582 		error = ENOMEM;
    583 		goto err_late;
    584 	}
    585 
    586 	/* Initialize the shared code */
    587 	error = ixgbe_init_shared_code(hw);
    588 	if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
    589 		/*
    590 		** No optics in this port, set up
    591 		** so the timer routine will probe
    592 		** for later insertion.
    593 		*/
    594 		adapter->sfp_probe = TRUE;
    595 		error = 0;
    596 	} else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
    597 		aprint_error_dev(dev,"Unsupported SFP+ module detected!\n");
    598 		error = EIO;
    599 		goto err_late;
    600 	} else if (error) {
    601 		aprint_error_dev(dev,"Unable to initialize the shared code\n");
    602 		error = EIO;
    603 		goto err_late;
    604 	}
    605 
    606 	/* Make sure we have a good EEPROM before we read from it */
    607 	if (ixgbe_validate_eeprom_checksum(&adapter->hw, &csum) < 0) {
    608 		aprint_error_dev(dev,"The EEPROM Checksum Is Not Valid\n");
    609 		error = EIO;
    610 		goto err_late;
    611 	}
    612 
    613 	error = ixgbe_init_hw(hw);
    614 	switch (error) {
    615 	case IXGBE_ERR_EEPROM_VERSION:
    616 		aprint_error_dev(dev, "This device is a pre-production adapter/"
    617 		    "LOM.  Please be aware there may be issues associated "
    618 		    "with your hardware.\n If you are experiencing problems "
    619 		    "please contact your Intel or hardware representative "
    620 		    "who provided you with this hardware.\n");
    621 		break;
    622 	case IXGBE_ERR_SFP_NOT_SUPPORTED:
    623 		aprint_error_dev(dev,"Unsupported SFP+ Module\n");
    624 		error = EIO;
    625 		aprint_error_dev(dev,"Hardware Initialization Failure\n");
    626 		goto err_late;
    627 	case IXGBE_ERR_SFP_NOT_PRESENT:
    628 		device_printf(dev,"No SFP+ Module found\n");
    629 		/* falls thru */
    630 	default:
    631 		break;
    632 	}
    633 
    634 	/* Detect and set physical type */
    635 	ixgbe_setup_optics(adapter);
    636 
    637 	if ((adapter->msix > 1) && (ixgbe_enable_msix))
    638 		error = ixgbe_allocate_msix(adapter, pa);
    639 	else
    640 		error = ixgbe_allocate_legacy(adapter, pa);
    641 	if (error)
    642 		goto err_late;
    643 
    644 	/* Setup OS specific network interface */
    645 	if (ixgbe_setup_interface(dev, adapter) != 0)
    646 		goto err_late;
    647 
    648 	/* Initialize statistics */
    649 	ixgbe_update_stats_counters(adapter);
    650 
    651         /* Print PCIE bus type/speed/width info */
    652 	ixgbe_get_bus_info(hw);
    653 	aprint_normal_dev(dev,"PCI Express Bus: Speed %s %s\n",
    654 	    ((hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0Gb/s":
    655 	    (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5Gb/s":"Unknown"),
    656 	    (hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" :
    657 	    (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" :
    658 	    (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" :
    659 	    ("Unknown"));
    660 
    661 	if ((hw->bus.width <= ixgbe_bus_width_pcie_x4) &&
    662 	    (hw->bus.speed == ixgbe_bus_speed_2500)) {
    663 		aprint_error_dev(dev, "PCI-Express bandwidth available"
    664 		    " for this card\n     is not sufficient for"
    665 		    " optimal performance.\n");
    666 		aprint_error_dev(dev, "For optimal performance a x8 "
    667 		    "PCIE, or x4 PCIE 2 slot is required.\n");
    668         }
    669 
    670 	/* Set an initial default flow control value */
    671 	adapter->fc =  ixgbe_fc_full;
    672 
    673 	/* let hardware know driver is loaded */
    674 	ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
    675 	ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
    676 	IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
    677 
    678 	ixgbe_add_hw_stats(adapter);
    679 
    680 #ifdef DEV_NETMAP
    681 	ixgbe_netmap_attach(adapter);
    682 #endif /* DEV_NETMAP */
    683 	INIT_DEBUGOUT("ixgbe_attach: end");
    684 	return;
    685 err_late:
    686 	ixgbe_free_transmit_structures(adapter);
    687 	ixgbe_free_receive_structures(adapter);
    688 err_out:
    689 	if (adapter->ifp != NULL)
    690 		if_free(adapter->ifp);
    691 	ixgbe_free_pci_resources(adapter);
    692 	if (adapter->mta != NULL)
    693 		free(adapter->mta, M_DEVBUF);
    694 	return;
    695 
    696 }
    697 
    698 /*********************************************************************
    699  *  Device removal routine
    700  *
    701  *  The detach entry point is called when the driver is being removed.
    702  *  This routine stops the adapter and deallocates all the resources
    703  *  that were allocated for driver operation.
    704  *
    705  *  return 0 on success, positive on failure
    706  *********************************************************************/
    707 
    708 static int
    709 ixgbe_detach(device_t dev, int flags)
    710 {
    711 	struct adapter *adapter = device_private(dev);
    712 	struct rx_ring *rxr = adapter->rx_rings;
    713 	struct ixgbe_hw_stats *stats = &adapter->stats;
    714 	struct ix_queue *que = adapter->queues;
    715 	struct tx_ring *txr = adapter->tx_rings;
    716 	u32	ctrl_ext;
    717 
    718 	INIT_DEBUGOUT("ixgbe_detach: begin");
    719 
    720 #if NVLAN > 0
    721 	/* Make sure VLANs are not using driver */
    722 	if (!VLAN_ATTACHED(&adapter->osdep.ec))
    723 		;	/* nothing to do: no VLANs */
    724 	else if ((flags & (DETACH_SHUTDOWN|DETACH_FORCE)) != 0)
    725 		vlan_ifdetach(adapter->ifp);
    726 	else {
    727 		aprint_error_dev(dev, "VLANs in use\n");
    728 		return EBUSY;
    729 	}
    730 #endif
    731 
    732 	IXGBE_CORE_LOCK(adapter);
    733 	ixgbe_stop(adapter);
    734 	IXGBE_CORE_UNLOCK(adapter);
    735 
    736 	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
    737 #ifndef IXGBE_LEGACY_TX
    738 		softint_disestablish(txr->txq_si);
    739 #endif
    740 		softint_disestablish(que->que_si);
    741 	}
    742 
    743 	/* Drain the Link queue */
    744 	softint_disestablish(adapter->link_si);
    745 	softint_disestablish(adapter->mod_si);
    746 	softint_disestablish(adapter->msf_si);
    747 #ifdef IXGBE_FDIR
    748 	softint_disestablish(adapter->fdir_si);
    749 #endif
    750 
    751 	/* let hardware know driver is unloading */
    752 	ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
    753 	ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
    754 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext);
    755 
    756 	ether_ifdetach(adapter->ifp);
    757 	callout_halt(&adapter->timer, NULL);
    758 #ifdef DEV_NETMAP
    759 	netmap_detach(adapter->ifp);
    760 #endif /* DEV_NETMAP */
    761 	ixgbe_free_pci_resources(adapter);
    762 #if 0	/* XXX the NetBSD port is probably missing something here */
    763 	bus_generic_detach(dev);
    764 #endif
    765 	if_detach(adapter->ifp);
    766 
    767 	sysctl_teardown(&adapter->sysctllog);
    768 	evcnt_detach(&adapter->handleq);
    769 	evcnt_detach(&adapter->req);
    770 	evcnt_detach(&adapter->morerx);
    771 	evcnt_detach(&adapter->moretx);
    772 	evcnt_detach(&adapter->txloops);
    773 	evcnt_detach(&adapter->efbig_tx_dma_setup);
    774 	evcnt_detach(&adapter->m_defrag_failed);
    775 	evcnt_detach(&adapter->efbig2_tx_dma_setup);
    776 	evcnt_detach(&adapter->einval_tx_dma_setup);
    777 	evcnt_detach(&adapter->other_tx_dma_setup);
    778 	evcnt_detach(&adapter->eagain_tx_dma_setup);
    779 	evcnt_detach(&adapter->enomem_tx_dma_setup);
    780 	evcnt_detach(&adapter->watchdog_events);
    781 	evcnt_detach(&adapter->tso_err);
    782 	evcnt_detach(&adapter->link_irq);
    783 
    784 	txr = adapter->tx_rings;
    785 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
    786 		evcnt_detach(&txr->no_desc_avail);
    787 		evcnt_detach(&txr->total_packets);
    788 		evcnt_detach(&txr->tso_tx);
    789 
    790 		if (i < __arraycount(adapter->stats.mpc)) {
    791 			evcnt_detach(&adapter->stats.mpc[i]);
    792 		}
    793 		if (i < __arraycount(adapter->stats.pxontxc)) {
    794 			evcnt_detach(&adapter->stats.pxontxc[i]);
    795 			evcnt_detach(&adapter->stats.pxonrxc[i]);
    796 			evcnt_detach(&adapter->stats.pxofftxc[i]);
    797 			evcnt_detach(&adapter->stats.pxoffrxc[i]);
    798 			evcnt_detach(&adapter->stats.pxon2offc[i]);
    799 		}
    800 		if (i < __arraycount(adapter->stats.qprc)) {
    801 			evcnt_detach(&adapter->stats.qprc[i]);
    802 			evcnt_detach(&adapter->stats.qptc[i]);
    803 			evcnt_detach(&adapter->stats.qbrc[i]);
    804 			evcnt_detach(&adapter->stats.qbtc[i]);
    805 			evcnt_detach(&adapter->stats.qprdc[i]);
    806 		}
    807 
    808 		evcnt_detach(&rxr->rx_packets);
    809 		evcnt_detach(&rxr->rx_bytes);
    810 		evcnt_detach(&rxr->no_jmbuf);
    811 		evcnt_detach(&rxr->rx_discarded);
    812 		evcnt_detach(&rxr->rx_irq);
    813 	}
    814 	evcnt_detach(&stats->ipcs);
    815 	evcnt_detach(&stats->l4cs);
    816 	evcnt_detach(&stats->ipcs_bad);
    817 	evcnt_detach(&stats->l4cs_bad);
    818 	evcnt_detach(&stats->intzero);
    819 	evcnt_detach(&stats->legint);
    820 	evcnt_detach(&stats->crcerrs);
    821 	evcnt_detach(&stats->illerrc);
    822 	evcnt_detach(&stats->errbc);
    823 	evcnt_detach(&stats->mspdc);
    824 	evcnt_detach(&stats->mlfc);
    825 	evcnt_detach(&stats->mrfc);
    826 	evcnt_detach(&stats->rlec);
    827 	evcnt_detach(&stats->lxontxc);
    828 	evcnt_detach(&stats->lxonrxc);
    829 	evcnt_detach(&stats->lxofftxc);
    830 	evcnt_detach(&stats->lxoffrxc);
    831 
    832 	/* Packet Reception Stats */
    833 	evcnt_detach(&stats->tor);
    834 	evcnt_detach(&stats->gorc);
    835 	evcnt_detach(&stats->tpr);
    836 	evcnt_detach(&stats->gprc);
    837 	evcnt_detach(&stats->mprc);
    838 	evcnt_detach(&stats->bprc);
    839 	evcnt_detach(&stats->prc64);
    840 	evcnt_detach(&stats->prc127);
    841 	evcnt_detach(&stats->prc255);
    842 	evcnt_detach(&stats->prc511);
    843 	evcnt_detach(&stats->prc1023);
    844 	evcnt_detach(&stats->prc1522);
    845 	evcnt_detach(&stats->ruc);
    846 	evcnt_detach(&stats->rfc);
    847 	evcnt_detach(&stats->roc);
    848 	evcnt_detach(&stats->rjc);
    849 	evcnt_detach(&stats->mngprc);
    850 	evcnt_detach(&stats->xec);
    851 
    852 	/* Packet Transmission Stats */
    853 	evcnt_detach(&stats->gotc);
    854 	evcnt_detach(&stats->tpt);
    855 	evcnt_detach(&stats->gptc);
    856 	evcnt_detach(&stats->bptc);
    857 	evcnt_detach(&stats->mptc);
    858 	evcnt_detach(&stats->mngptc);
    859 	evcnt_detach(&stats->ptc64);
    860 	evcnt_detach(&stats->ptc127);
    861 	evcnt_detach(&stats->ptc255);
    862 	evcnt_detach(&stats->ptc511);
    863 	evcnt_detach(&stats->ptc1023);
    864 	evcnt_detach(&stats->ptc1522);
    865 
    866 	ixgbe_free_transmit_structures(adapter);
    867 	ixgbe_free_receive_structures(adapter);
    868 	free(adapter->mta, M_DEVBUF);
    869 
    870 	IXGBE_CORE_LOCK_DESTROY(adapter);
    871 	return (0);
    872 }
    873 
    874 /*********************************************************************
    875  *
    876  *  Shutdown entry point
    877  *
    878  **********************************************************************/
    879 
    880 #if 0 /* XXX NetBSD ought to register something like this through pmf(9) */
    881 static int
    882 ixgbe_shutdown(device_t dev)
    883 {
    884 	struct adapter *adapter = device_private(dev);
    885 	IXGBE_CORE_LOCK(adapter);
    886 	ixgbe_stop(adapter);
    887 	IXGBE_CORE_UNLOCK(adapter);
    888 	return (0);
    889 }
    890 #endif
    891 
    892 
    893 #ifdef IXGBE_LEGACY_TX
    894 /*********************************************************************
    895  *  Transmit entry point
    896  *
    897  *  ixgbe_start is called by the stack to initiate a transmit.
    898  *  The driver will remain in this routine as long as there are
    899  *  packets to transmit and transmit resources are available.
    900  *  In case resources are not available stack is notified and
    901  *  the packet is requeued.
    902  **********************************************************************/
    903 
    904 static void
    905 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
    906 {
    907 	int rc;
    908 	struct mbuf    *m_head;
    909 	struct adapter *adapter = txr->adapter;
    910 
    911 	IXGBE_TX_LOCK_ASSERT(txr);
    912 
    913 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    914 		return;
    915 	if (!adapter->link_active)
    916 		return;
    917 
    918 	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    919 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
    920 			break;
    921 
    922 		IFQ_POLL(&ifp->if_snd, m_head);
    923 		if (m_head == NULL)
    924 			break;
    925 
    926 		if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
    927 			break;
    928 		}
    929 		IFQ_DEQUEUE(&ifp->if_snd, m_head);
    930 		if (rc == EFBIG) {
    931 			struct mbuf *mtmp;
    932 
    933 			if ((mtmp = m_defrag(m_head, M_NOWAIT)) != NULL) {
    934 				m_head = mtmp;
    935 				rc = ixgbe_xmit(txr, m_head);
    936 				if (rc != 0)
    937 					adapter->efbig2_tx_dma_setup.ev_count++;
    938 			} else
    939 				adapter->m_defrag_failed.ev_count++;
    940 		}
    941 		if (rc != 0) {
    942 			m_freem(m_head);
    943 			continue;
    944 		}
    945 
    946 		/* Send a copy of the frame to the BPF listener */
    947 		bpf_mtap(ifp, m_head);
    948 
    949 		/* Set watchdog on */
    950 		getmicrotime(&txr->watchdog_time);
    951 		txr->queue_status = IXGBE_QUEUE_WORKING;
    952 
    953 	}
    954 	return;
    955 }
    956 
    957 /*
    958  * Legacy TX start - called by the stack, this
    959  * always uses the first tx ring, and should
    960  * not be used with multiqueue tx enabled.
    961  */
    962 static void
    963 ixgbe_start(struct ifnet *ifp)
    964 {
    965 	struct adapter *adapter = ifp->if_softc;
    966 	struct tx_ring	*txr = adapter->tx_rings;
    967 
    968 	if (ifp->if_flags & IFF_RUNNING) {
    969 		IXGBE_TX_LOCK(txr);
    970 		ixgbe_start_locked(txr, ifp);
    971 		IXGBE_TX_UNLOCK(txr);
    972 	}
    973 	return;
    974 }
    975 
    976 #else /* ! IXGBE_LEGACY_TX */
    977 
    978 /*
    979 ** Multiqueue Transmit driver
    980 **
    981 */
    982 static int
    983 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
    984 {
    985 	struct adapter	*adapter = ifp->if_softc;
    986 	struct ix_queue	*que;
    987 	struct tx_ring	*txr;
    988 	int 		i = 0, err = 0;
    989 
    990 	/* Which queue to use */
    991 	if ((m->m_flags & M_FLOWID) != 0)
    992 		i = m->m_pkthdr.flowid % adapter->num_queues;
    993 	else
    994 		i = cpu_index(curcpu()) % adapter->num_queues;
    995 
    996 	txr = &adapter->tx_rings[i];
    997 	que = &adapter->queues[i];
    998 
    999 	if (IXGBE_TX_TRYLOCK(txr)) {
   1000 		err = ixgbe_mq_start_locked(ifp, txr, m);
   1001 		IXGBE_TX_UNLOCK(txr);
   1002 	} else {
   1003 		err = drbr_enqueue(ifp, txr->br, m);
   1004 		softint_schedule(txr->txq_si);
   1005 	}
   1006 
   1007 	return (err);
   1008 }
   1009 
   1010 static int
   1011 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
   1012 {
   1013 	struct adapter  *adapter = txr->adapter;
   1014         struct mbuf     *next;
   1015         int             enqueued, err = 0;
   1016 
   1017 	if (((ifp->if_flags & IFF_RUNNING) == 0) ||
   1018 	    adapter->link_active == 0) {
   1019 		if (m != NULL)
   1020 			err = drbr_enqueue(ifp, txr->br, m);
   1021 		return (err);
   1022 	}
   1023 
   1024 	enqueued = 0;
   1025 	if (m != NULL) {
   1026 		err = drbr_enqueue(ifp, txr->br, m);
   1027 		if (err) {
   1028 			return (err);
   1029 		}
   1030 	}
   1031 
   1032 	/* Process the queue */
   1033 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
   1034 		if ((err = ixgbe_xmit(txr, &next)) != 0) {
   1035 			if (next == NULL) {
   1036 				drbr_advance(ifp, txr->br);
   1037 			} else {
   1038 				drbr_putback(ifp, txr->br, next);
   1039 			}
   1040 			break;
   1041 		}
   1042 		drbr_advance(ifp, txr->br);
   1043 		enqueued++;
   1044 		/* Send a copy of the frame to the BPF listener */
   1045 		bpf_mtap(ifp, next);
   1046 		if ((ifp->if_flags & IFF_RUNNING) == 0)
   1047 			break;
   1048 		if (txr->tx_avail < IXGBE_TX_OP_THRESHOLD)
   1049 			ixgbe_txeof(txr);
   1050 	}
   1051 
   1052 	if (enqueued > 0) {
   1053 		/* Set watchdog on */
   1054 		txr->queue_status = IXGBE_QUEUE_WORKING;
   1055 		getmicrotime(&txr->watchdog_time);
   1056 	}
   1057 
   1058 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
   1059 		ixgbe_txeof(txr);
   1060 
   1061 	return (err);
   1062 }
   1063 
   1064 /*
   1065  * Called from a taskqueue to drain queued transmit packets.
   1066  */
   1067 static void
   1068 ixgbe_deferred_mq_start(void *arg)
   1069 {
   1070 	struct tx_ring *txr = arg;
   1071 	struct adapter *adapter = txr->adapter;
   1072 	struct ifnet *ifp = adapter->ifp;
   1073 
   1074 	IXGBE_TX_LOCK(txr);
   1075 	if (!drbr_empty(ifp, txr->br))
   1076 		ixgbe_mq_start_locked(ifp, txr, NULL);
   1077 	IXGBE_TX_UNLOCK(txr);
   1078 }
   1079 
   1080 /*
   1081 ** Flush all ring buffers
   1082 */
   1083 static void
   1084 ixgbe_qflush(struct ifnet *ifp)
   1085 {
   1086 	struct adapter	*adapter = ifp->if_softc;
   1087 	struct tx_ring	*txr = adapter->tx_rings;
   1088 	struct mbuf	*m;
   1089 
   1090 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
   1091 		IXGBE_TX_LOCK(txr);
   1092 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
   1093 			m_freem(m);
   1094 		IXGBE_TX_UNLOCK(txr);
   1095 	}
   1096 	if_qflush(ifp);
   1097 }
   1098 #endif /* IXGBE_LEGACY_TX */
   1099 
   1100 static int
   1101 ixgbe_ifflags_cb(struct ethercom *ec)
   1102 {
   1103 	struct ifnet *ifp = &ec->ec_if;
   1104 	struct adapter *adapter = ifp->if_softc;
   1105 	int change = ifp->if_flags ^ adapter->if_flags, rc = 0;
   1106 
   1107 	IXGBE_CORE_LOCK(adapter);
   1108 
   1109 	if (change != 0)
   1110 		adapter->if_flags = ifp->if_flags;
   1111 
   1112 	if ((change & ~(IFF_CANTCHANGE|IFF_DEBUG)) != 0)
   1113 		rc = ENETRESET;
   1114 	else if ((change & (IFF_PROMISC | IFF_ALLMULTI)) != 0)
   1115 		ixgbe_set_promisc(adapter);
   1116 
   1117 	/* Set up VLAN support and filter */
   1118 	ixgbe_setup_vlan_hw_support(adapter);
   1119 
   1120 	IXGBE_CORE_UNLOCK(adapter);
   1121 
   1122 	return rc;
   1123 }
   1124 
   1125 /*********************************************************************
   1126  *  Ioctl entry point
   1127  *
   1128  *  ixgbe_ioctl is called when the user wants to configure the
   1129  *  interface.
   1130  *
   1131  *  return 0 on success, positive on failure
   1132  **********************************************************************/
   1133 
   1134 static int
   1135 ixgbe_ioctl(struct ifnet * ifp, u_long command, void *data)
   1136 {
   1137 	struct adapter	*adapter = ifp->if_softc;
   1138 	struct ixgbe_hw *hw = &adapter->hw;
   1139 	struct ifcapreq *ifcr = data;
   1140 	struct ifreq	*ifr = data;
   1141 	int             error = 0;
   1142 	int l4csum_en;
   1143 	const int l4csum = IFCAP_CSUM_TCPv4_Rx|IFCAP_CSUM_UDPv4_Rx|
   1144 	     IFCAP_CSUM_TCPv6_Rx|IFCAP_CSUM_UDPv6_Rx;
   1145 
   1146 	switch (command) {
   1147 	case SIOCSIFFLAGS:
   1148 		IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)");
   1149 		break;
   1150 	case SIOCADDMULTI:
   1151 	case SIOCDELMULTI:
   1152 		IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI");
   1153 		break;
   1154 	case SIOCSIFMEDIA:
   1155 	case SIOCGIFMEDIA:
   1156 		IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)");
   1157 		break;
   1158 	case SIOCSIFCAP:
   1159 		IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)");
   1160 		break;
   1161 	case SIOCSIFMTU:
   1162 		IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)");
   1163 		break;
   1164 	default:
   1165 		IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command);
   1166 		break;
   1167 	}
   1168 
   1169 	switch (command) {
   1170 	case SIOCSIFMEDIA:
   1171 	case SIOCGIFMEDIA:
   1172 		return ifmedia_ioctl(ifp, ifr, &adapter->media, command);
   1173 	case SIOCGI2C:
   1174 	{
   1175 		struct ixgbe_i2c_req	i2c;
   1176 		IOCTL_DEBUGOUT("ioctl: SIOCGI2C (Get I2C Data)");
   1177 		error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
   1178 		if (error)
   1179 			break;
   1180 		if ((i2c.dev_addr != 0xA0) || (i2c.dev_addr != 0xA2)){
   1181 			error = EINVAL;
   1182 			break;
   1183 		}
   1184 		hw->phy.ops.read_i2c_byte(hw, i2c.offset,
   1185 		    i2c.dev_addr, i2c.data);
   1186 		error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
   1187 		break;
   1188 	}
   1189 	case SIOCSIFCAP:
   1190 		/* Layer-4 Rx checksum offload has to be turned on and
   1191 		 * off as a unit.
   1192 		 */
   1193 		l4csum_en = ifcr->ifcr_capenable & l4csum;
   1194 		if (l4csum_en != l4csum && l4csum_en != 0)
   1195 			return EINVAL;
   1196 		/*FALLTHROUGH*/
   1197 	case SIOCADDMULTI:
   1198 	case SIOCDELMULTI:
   1199 	case SIOCSIFFLAGS:
   1200 	case SIOCSIFMTU:
   1201 	default:
   1202 		if ((error = ether_ioctl(ifp, command, data)) != ENETRESET)
   1203 			return error;
   1204 		if ((ifp->if_flags & IFF_RUNNING) == 0)
   1205 			;
   1206 		else if (command == SIOCSIFCAP || command == SIOCSIFMTU) {
   1207 			IXGBE_CORE_LOCK(adapter);
   1208 			ixgbe_init_locked(adapter);
   1209 			IXGBE_CORE_UNLOCK(adapter);
   1210 		} else if (command == SIOCADDMULTI || command == SIOCDELMULTI) {
   1211 			/*
   1212 			 * Multicast list has changed; set the hardware filter
   1213 			 * accordingly.
   1214 			 */
   1215 			IXGBE_CORE_LOCK(adapter);
   1216 			ixgbe_disable_intr(adapter);
   1217 			ixgbe_set_multi(adapter);
   1218 			ixgbe_enable_intr(adapter);
   1219 			IXGBE_CORE_UNLOCK(adapter);
   1220 		}
   1221 		return 0;
   1222 	}
   1223 
   1224 	return error;
   1225 }
   1226 
   1227 /*********************************************************************
   1228  *  Init entry point
   1229  *
   1230  *  This routine is used in two ways. It is used by the stack as
   1231  *  init entry point in network interface structure. It is also used
   1232  *  by the driver as a hw/sw initialization routine to get to a
   1233  *  consistent state.
   1234  *
   1235  *  return 0 on success, positive on failure
   1236  **********************************************************************/
   1237 #define IXGBE_MHADD_MFS_SHIFT 16
   1238 
   1239 static void
   1240 ixgbe_init_locked(struct adapter *adapter)
   1241 {
   1242 	struct ifnet   *ifp = adapter->ifp;
   1243 	device_t 	dev = adapter->dev;
   1244 	struct ixgbe_hw *hw = &adapter->hw;
   1245 	u32		k, txdctl, mhadd, gpie;
   1246 	u32		rxdctl, rxctrl;
   1247 
   1248 	/* XXX check IFF_UP and IFF_RUNNING, power-saving state! */
   1249 
   1250 	KASSERT(mutex_owned(&adapter->core_mtx));
   1251 	INIT_DEBUGOUT("ixgbe_init: begin");
   1252 	hw->adapter_stopped = FALSE;
   1253 	ixgbe_stop_adapter(hw);
   1254         callout_stop(&adapter->timer);
   1255 
   1256 	/* XXX I moved this here from the SIOCSIFMTU case in ixgbe_ioctl(). */
   1257 	adapter->max_frame_size =
   1258 		ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
   1259 
   1260         /* reprogram the RAR[0] in case user changed it. */
   1261         ixgbe_set_rar(hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
   1262 
   1263 	/* Get the latest mac address, User can use a LAA */
   1264 	memcpy(hw->mac.addr, CLLADDR(adapter->ifp->if_sadl),
   1265 	    IXGBE_ETH_LENGTH_OF_ADDRESS);
   1266 	ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
   1267 	hw->addr_ctrl.rar_used_count = 1;
   1268 
   1269 	/* Prepare transmit descriptors and buffers */
   1270 	if (ixgbe_setup_transmit_structures(adapter)) {
   1271 		device_printf(dev,"Could not setup transmit structures\n");
   1272 		ixgbe_stop(adapter);
   1273 		return;
   1274 	}
   1275 
   1276 	ixgbe_init_hw(hw);
   1277 	ixgbe_initialize_transmit_units(adapter);
   1278 
   1279 	/* Setup Multicast table */
   1280 	ixgbe_set_multi(adapter);
   1281 
   1282 	/*
   1283 	** Determine the correct mbuf pool
   1284 	** for doing jumbo frames
   1285 	*/
   1286 	if (adapter->max_frame_size <= 2048)
   1287 		adapter->rx_mbuf_sz = MCLBYTES;
   1288 	else if (adapter->max_frame_size <= 4096)
   1289 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
   1290 	else if (adapter->max_frame_size <= 9216)
   1291 		adapter->rx_mbuf_sz = MJUM9BYTES;
   1292 	else
   1293 		adapter->rx_mbuf_sz = MJUM16BYTES;
   1294 
   1295 	/* Prepare receive descriptors and buffers */
   1296 	if (ixgbe_setup_receive_structures(adapter)) {
   1297 		device_printf(dev,"Could not setup receive structures\n");
   1298 		ixgbe_stop(adapter);
   1299 		return;
   1300 	}
   1301 
   1302 	/* Configure RX settings */
   1303 	ixgbe_initialize_receive_units(adapter);
   1304 
   1305 	gpie = IXGBE_READ_REG(&adapter->hw, IXGBE_GPIE);
   1306 
   1307 	/* Enable Fan Failure Interrupt */
   1308 	gpie |= IXGBE_SDP1_GPIEN;
   1309 
   1310 	/* Add for Thermal detection */
   1311 	if (hw->mac.type == ixgbe_mac_82599EB)
   1312 		gpie |= IXGBE_SDP2_GPIEN;
   1313 
   1314 	/* Thermal Failure Detection */
   1315 	if (hw->mac.type == ixgbe_mac_X540)
   1316 		gpie |= IXGBE_SDP0_GPIEN;
   1317 
   1318 	if (adapter->msix > 1) {
   1319 		/* Enable Enhanced MSIX mode */
   1320 		gpie |= IXGBE_GPIE_MSIX_MODE;
   1321 		gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
   1322 		    IXGBE_GPIE_OCD;
   1323 	}
   1324 	IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
   1325 
   1326 	/* Set MTU size */
   1327 	if (ifp->if_mtu > ETHERMTU) {
   1328 		mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
   1329 		mhadd &= ~IXGBE_MHADD_MFS_MASK;
   1330 		mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
   1331 		IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
   1332 	}
   1333 
   1334 	/* Now enable all the queues */
   1335 
   1336 	for (int i = 0; i < adapter->num_queues; i++) {
   1337 		txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
   1338 		txdctl |= IXGBE_TXDCTL_ENABLE;
   1339 		/* Set WTHRESH to 8, burst writeback */
   1340 		txdctl |= (8 << 16);
   1341 		/*
   1342 		 * When the internal queue falls below PTHRESH (32),
   1343 		 * start prefetching as long as there are at least
   1344 		 * HTHRESH (1) buffers ready. The values are taken
   1345 		 * from the Intel linux driver 3.8.21.
   1346 		 * Prefetching enables tx line rate even with 1 queue.
   1347 		 */
   1348 		txdctl |= (32 << 0) | (1 << 8);
   1349 		IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
   1350 	}
   1351 
   1352 	for (int i = 0; i < adapter->num_queues; i++) {
   1353 		rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
   1354 		if (hw->mac.type == ixgbe_mac_82598EB) {
   1355 			/*
   1356 			** PTHRESH = 21
   1357 			** HTHRESH = 4
   1358 			** WTHRESH = 8
   1359 			*/
   1360 			rxdctl &= ~0x3FFFFF;
   1361 			rxdctl |= 0x080420;
   1362 		}
   1363 		rxdctl |= IXGBE_RXDCTL_ENABLE;
   1364 		IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
   1365 		/* XXX I don't trust this loop, and I don't trust the
   1366 		 * XXX memory barrier.  What is this meant to do? --dyoung
   1367 		 */
   1368 		for (k = 0; k < 10; k++) {
   1369 			if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
   1370 			    IXGBE_RXDCTL_ENABLE)
   1371 				break;
   1372 			else
   1373 				msec_delay(1);
   1374 		}
   1375 		wmb();
   1376 #ifdef DEV_NETMAP
   1377 		/*
   1378 		 * In netmap mode, we must preserve the buffers made
   1379 		 * available to userspace before the if_init()
   1380 		 * (this is true by default on the TX side, because
   1381 		 * init makes all buffers available to userspace).
   1382 		 *
   1383 		 * netmap_reset() and the device specific routines
   1384 		 * (e.g. ixgbe_setup_receive_rings()) map these
   1385 		 * buffers at the end of the NIC ring, so here we
   1386 		 * must set the RDT (tail) register to make sure
   1387 		 * they are not overwritten.
   1388 		 *
   1389 		 * In this driver the NIC ring starts at RDH = 0,
   1390 		 * RDT points to the last slot available for reception (?),
   1391 		 * so RDT = num_rx_desc - 1 means the whole ring is available.
   1392 		 */
   1393 		if (ifp->if_capenable & IFCAP_NETMAP) {
   1394 			struct netmap_adapter *na = NA(adapter->ifp);
   1395 			struct netmap_kring *kring = &na->rx_rings[i];
   1396 			int t = na->num_rx_desc - 1 - kring->nr_hwavail;
   1397 
   1398 			IXGBE_WRITE_REG(hw, IXGBE_RDT(i), t);
   1399 		} else
   1400 #endif /* DEV_NETMAP */
   1401 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i), adapter->num_rx_desc - 1);
   1402 	}
   1403 
   1404 	/* Set up VLAN support and filter */
   1405 	ixgbe_setup_vlan_hw_support(adapter);
   1406 
   1407 	/* Enable Receive engine */
   1408 	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
   1409 	if (hw->mac.type == ixgbe_mac_82598EB)
   1410 		rxctrl |= IXGBE_RXCTRL_DMBYPS;
   1411 	rxctrl |= IXGBE_RXCTRL_RXEN;
   1412 	ixgbe_enable_rx_dma(hw, rxctrl);
   1413 
   1414 	callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
   1415 
   1416 	/* Set up MSI/X routing */
   1417 	if (ixgbe_enable_msix)  {
   1418 		ixgbe_configure_ivars(adapter);
   1419 		/* Set up auto-mask */
   1420 		if (hw->mac.type == ixgbe_mac_82598EB)
   1421 			IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
   1422 		else {
   1423 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
   1424 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
   1425 		}
   1426 	} else {  /* Simple settings for Legacy/MSI */
   1427                 ixgbe_set_ivar(adapter, 0, 0, 0);
   1428                 ixgbe_set_ivar(adapter, 0, 0, 1);
   1429 		IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
   1430 	}
   1431 
   1432 #ifdef IXGBE_FDIR
   1433 	/* Init Flow director */
   1434 	if (hw->mac.type != ixgbe_mac_82598EB) {
   1435 		u32 hdrm = 32 << fdir_pballoc;
   1436 
   1437 		hw->mac.ops.setup_rxpba(hw, 0, hdrm, PBA_STRATEGY_EQUAL);
   1438 		ixgbe_init_fdir_signature_82599(&adapter->hw, fdir_pballoc);
   1439 	}
   1440 #endif
   1441 
   1442 	/*
   1443 	** Check on any SFP devices that
   1444 	** need to be kick-started
   1445 	*/
   1446 	if (hw->phy.type == ixgbe_phy_none) {
   1447 		int err = hw->phy.ops.identify(hw);
   1448 		if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
   1449                 	device_printf(dev,
   1450 			    "Unsupported SFP+ module type was detected.\n");
   1451 			return;
   1452         	}
   1453 	}
   1454 
   1455 	/* Set moderation on the Link interrupt */
   1456 	IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->linkvec), IXGBE_LINK_ITR);
   1457 
   1458 	/* Config/Enable Link */
   1459 	ixgbe_config_link(adapter);
   1460 
   1461 	/* Hardware Packet Buffer & Flow Control setup */
   1462 	{
   1463 		u32 rxpb, frame, size, tmp;
   1464 
   1465 		frame = adapter->max_frame_size;
   1466 
   1467 		/* Calculate High Water */
   1468 		if (hw->mac.type == ixgbe_mac_X540)
   1469 			tmp = IXGBE_DV_X540(frame, frame);
   1470 		else
   1471 			tmp = IXGBE_DV(frame, frame);
   1472 		size = IXGBE_BT2KB(tmp);
   1473 		rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
   1474 		hw->fc.high_water[0] = rxpb - size;
   1475 
   1476 		/* Now calculate Low Water */
   1477 		if (hw->mac.type == ixgbe_mac_X540)
   1478 			tmp = IXGBE_LOW_DV_X540(frame);
   1479 		else
   1480 			tmp = IXGBE_LOW_DV(frame);
   1481 		hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
   1482 
   1483 		hw->fc.requested_mode = adapter->fc;
   1484 		hw->fc.pause_time = IXGBE_FC_PAUSE;
   1485 		hw->fc.send_xon = TRUE;
   1486 	}
   1487 	/* Initialize the FC settings */
   1488 	ixgbe_start_hw(hw);
   1489 
   1490 	/* And now turn on interrupts */
   1491 	ixgbe_enable_intr(adapter);
   1492 
   1493 	/* Now inform the stack we're ready */
   1494 	ifp->if_flags |= IFF_RUNNING;
   1495 
   1496 	return;
   1497 }
   1498 
   1499 static int
   1500 ixgbe_init(struct ifnet *ifp)
   1501 {
   1502 	struct adapter *adapter = ifp->if_softc;
   1503 
   1504 	IXGBE_CORE_LOCK(adapter);
   1505 	ixgbe_init_locked(adapter);
   1506 	IXGBE_CORE_UNLOCK(adapter);
   1507 	return 0;	/* XXX ixgbe_init_locked cannot fail?  really? */
   1508 }
   1509 
   1510 
   1511 /*
   1512 **
   1513 ** MSIX Interrupt Handlers and Tasklets
   1514 **
   1515 */
   1516 
   1517 static inline void
   1518 ixgbe_enable_queue(struct adapter *adapter, u32 vector)
   1519 {
   1520 	struct ixgbe_hw *hw = &adapter->hw;
   1521 	u64	queue = (u64)(1ULL << vector);
   1522 	u32	mask;
   1523 
   1524 	if (hw->mac.type == ixgbe_mac_82598EB) {
   1525                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
   1526                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
   1527 	} else {
   1528                 mask = (queue & 0xFFFFFFFF);
   1529                 if (mask)
   1530                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask);
   1531                 mask = (queue >> 32);
   1532                 if (mask)
   1533                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask);
   1534 	}
   1535 }
   1536 
   1537 __unused static inline void
   1538 ixgbe_disable_queue(struct adapter *adapter, u32 vector)
   1539 {
   1540 	struct ixgbe_hw *hw = &adapter->hw;
   1541 	u64	queue = (u64)(1ULL << vector);
   1542 	u32	mask;
   1543 
   1544 	if (hw->mac.type == ixgbe_mac_82598EB) {
   1545                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
   1546                 IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask);
   1547 	} else {
   1548                 mask = (queue & 0xFFFFFFFF);
   1549                 if (mask)
   1550                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask);
   1551                 mask = (queue >> 32);
   1552                 if (mask)
   1553                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask);
   1554 	}
   1555 }
   1556 
   1557 static inline void
   1558 ixgbe_rearm_queues(struct adapter *adapter, u64 queues)
   1559 {
   1560 	u32 mask;
   1561 
   1562 	if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
   1563 		mask = (IXGBE_EIMS_RTX_QUEUE & queues);
   1564 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, mask);
   1565 	} else {
   1566 		mask = (queues & 0xFFFFFFFF);
   1567 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(0), mask);
   1568 		mask = (queues >> 32);
   1569 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(1), mask);
   1570 	}
   1571 }
   1572 
   1573 
   1574 static void
   1575 ixgbe_handle_que(void *context)
   1576 {
   1577 	struct ix_queue *que = context;
   1578 	struct adapter  *adapter = que->adapter;
   1579 	struct tx_ring  *txr = que->txr;
   1580 	struct ifnet    *ifp = adapter->ifp;
   1581 	bool		more;
   1582 
   1583 	adapter->handleq.ev_count++;
   1584 
   1585 	if (ifp->if_flags & IFF_RUNNING) {
   1586 		more = ixgbe_rxeof(que);
   1587 		IXGBE_TX_LOCK(txr);
   1588 		ixgbe_txeof(txr);
   1589 #ifndef IXGBE_LEGACY_TX
   1590 		if (!drbr_empty(ifp, txr->br))
   1591 			ixgbe_mq_start_locked(ifp, txr, NULL);
   1592 #else
   1593 		if (!IFQ_IS_EMPTY(&ifp->if_snd))
   1594 			ixgbe_start_locked(txr, ifp);
   1595 #endif
   1596 		IXGBE_TX_UNLOCK(txr);
   1597 		if (more) {
   1598 			adapter->req.ev_count++;
   1599 			softint_schedule(que->que_si);
   1600 			return;
   1601 		}
   1602 	}
   1603 
   1604 	/* Reenable this interrupt */
   1605 	ixgbe_enable_queue(adapter, que->msix);
   1606 	return;
   1607 }
   1608 
   1609 
   1610 /*********************************************************************
   1611  *
   1612  *  Legacy Interrupt Service routine
   1613  *
   1614  **********************************************************************/
   1615 
   1616 static int
   1617 ixgbe_legacy_irq(void *arg)
   1618 {
   1619 	struct ix_queue *que = arg;
   1620 	struct adapter	*adapter = que->adapter;
   1621 	struct ifnet   *ifp = adapter->ifp;
   1622 	struct ixgbe_hw	*hw = &adapter->hw;
   1623 	struct 		tx_ring *txr = adapter->tx_rings;
   1624 	bool		more_tx = false, more_rx = false;
   1625 	u32       	reg_eicr, loop = MAX_LOOP;
   1626 
   1627 	reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
   1628 
   1629 	adapter->stats.legint.ev_count++;
   1630 	++que->irqs;
   1631 	if (reg_eicr == 0) {
   1632 		adapter->stats.intzero.ev_count++;
   1633 		if ((ifp->if_flags & IFF_UP) != 0)
   1634 			ixgbe_enable_intr(adapter);
   1635 		return 0;
   1636 	}
   1637 
   1638 	if ((ifp->if_flags & IFF_RUNNING) != 0) {
   1639 		more_rx = ixgbe_rxeof(que);
   1640 
   1641 		IXGBE_TX_LOCK(txr);
   1642 		do {
   1643 			adapter->txloops.ev_count++;
   1644 			more_tx = ixgbe_txeof(txr);
   1645 		} while (loop-- && more_tx);
   1646 		IXGBE_TX_UNLOCK(txr);
   1647 	}
   1648 
   1649 	if (more_rx || more_tx) {
   1650 		if (more_rx)
   1651 			adapter->morerx.ev_count++;
   1652 		if (more_tx)
   1653 			adapter->moretx.ev_count++;
   1654 		softint_schedule(que->que_si);
   1655 	}
   1656 
   1657 	/* Check for fan failure */
   1658 	if ((hw->phy.media_type == ixgbe_media_type_copper) &&
   1659 	    (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
   1660                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
   1661 		    "REPLACE IMMEDIATELY!!\n");
   1662 		IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1);
   1663 	}
   1664 
   1665 	/* Link status change */
   1666 	if (reg_eicr & IXGBE_EICR_LSC)
   1667 		softint_schedule(adapter->link_si);
   1668 
   1669 	ixgbe_enable_intr(adapter);
   1670 	return 1;
   1671 }
   1672 
   1673 
   1674 #if defined(NETBSD_MSI_OR_MSIX)
   1675 /*********************************************************************
   1676  *
   1677  *  MSIX Queue Interrupt Service routine
   1678  *
   1679  **********************************************************************/
   1680 void
   1681 ixgbe_msix_que(void *arg)
   1682 {
   1683 	struct ix_queue	*que = arg;
   1684 	struct adapter  *adapter = que->adapter;
   1685 	struct tx_ring	*txr = que->txr;
   1686 	struct rx_ring	*rxr = que->rxr;
   1687 	bool		more_tx, more_rx;
   1688 	u32		newitr = 0;
   1689 
   1690 	ixgbe_disable_queue(adapter, que->msix);
   1691 	++que->irqs;
   1692 
   1693 	more_rx = ixgbe_rxeof(que);
   1694 
   1695 	IXGBE_TX_LOCK(txr);
   1696 	more_tx = ixgbe_txeof(txr);
   1697 	/*
   1698 	** Make certain that if the stack
   1699 	** has anything queued the task gets
   1700 	** scheduled to handle it.
   1701 	*/
   1702 #ifdef IXGBE_LEGACY_TX
   1703 	if (!IFQ_IS_EMPTY(&adapter->ifp->if_snd))
   1704 #else
   1705 	if (!drbr_empty(adapter->ifp, txr->br))
   1706 #endif
   1707 		more_tx = 1;
   1708 	IXGBE_TX_UNLOCK(txr);
   1709 
   1710 	/* Do AIM now? */
   1711 
   1712 	if (ixgbe_enable_aim == FALSE)
   1713 		goto no_calc;
   1714 	/*
   1715 	** Do Adaptive Interrupt Moderation:
   1716         **  - Write out last calculated setting
   1717 	**  - Calculate based on average size over
   1718 	**    the last interval.
   1719 	*/
   1720         if (que->eitr_setting)
   1721                 IXGBE_WRITE_REG(&adapter->hw,
   1722                     IXGBE_EITR(que->msix), que->eitr_setting);
   1723 
   1724         que->eitr_setting = 0;
   1725 
   1726         /* Idle, do nothing */
   1727         if ((txr->bytes == 0) && (rxr->bytes == 0))
   1728                 goto no_calc;
   1729 
   1730 	if ((txr->bytes) && (txr->packets))
   1731                	newitr = txr->bytes/txr->packets;
   1732 	if ((rxr->bytes) && (rxr->packets))
   1733 		newitr = max(newitr,
   1734 		    (rxr->bytes / rxr->packets));
   1735 	newitr += 24; /* account for hardware frame, crc */
   1736 
   1737 	/* set an upper boundary */
   1738 	newitr = min(newitr, 3000);
   1739 
   1740 	/* Be nice to the mid range */
   1741 	if ((newitr > 300) && (newitr < 1200))
   1742 		newitr = (newitr / 3);
   1743 	else
   1744 		newitr = (newitr / 2);
   1745 
   1746         if (adapter->hw.mac.type == ixgbe_mac_82598EB)
   1747                 newitr |= newitr << 16;
   1748         else
   1749                 newitr |= IXGBE_EITR_CNT_WDIS;
   1750 
   1751         /* save for next interrupt */
   1752         que->eitr_setting = newitr;
   1753 
   1754         /* Reset state */
   1755         txr->bytes = 0;
   1756         txr->packets = 0;
   1757         rxr->bytes = 0;
   1758         rxr->packets = 0;
   1759 
   1760 no_calc:
   1761 	if (more_tx || more_rx)
   1762 		softint_schedule(que->que_si);
   1763 	else /* Reenable this interrupt */
   1764 		ixgbe_enable_queue(adapter, que->msix);
   1765 	return;
   1766 }
   1767 
   1768 
   1769 static void
   1770 ixgbe_msix_link(void *arg)
   1771 {
   1772 	struct adapter	*adapter = arg;
   1773 	struct ixgbe_hw *hw = &adapter->hw;
   1774 	u32		reg_eicr;
   1775 
   1776 	++adapter->link_irq.ev_count;
   1777 
   1778 	/* First get the cause */
   1779 	reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICS);
   1780 	/* Clear interrupt with write */
   1781 	IXGBE_WRITE_REG(hw, IXGBE_EICR, reg_eicr);
   1782 
   1783 	/* Link status change */
   1784 	if (reg_eicr & IXGBE_EICR_LSC)
   1785 		softint_schedule(adapter->link_si);
   1786 
   1787 	if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
   1788 #ifdef IXGBE_FDIR
   1789 		if (reg_eicr & IXGBE_EICR_FLOW_DIR) {
   1790 			/* This is probably overkill :) */
   1791 			if (!atomic_cmpset_int(&adapter->fdir_reinit, 0, 1))
   1792 				return;
   1793                 	/* Disable the interrupt */
   1794 			IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FLOW_DIR);
   1795 			softint_schedule(adapter->fdir_si);
   1796 		} else
   1797 #endif
   1798 		if (reg_eicr & IXGBE_EICR_ECC) {
   1799                 	device_printf(adapter->dev, "\nCRITICAL: ECC ERROR!! "
   1800 			    "Please Reboot!!\n");
   1801 			IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
   1802 		} else
   1803 
   1804 		if (reg_eicr & IXGBE_EICR_GPI_SDP1) {
   1805                 	/* Clear the interrupt */
   1806                 	IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
   1807 			softint_schedule(adapter->msf_si);
   1808         	} else if (reg_eicr & IXGBE_EICR_GPI_SDP2) {
   1809                 	/* Clear the interrupt */
   1810                 	IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP2);
   1811 			softint_schedule(adapter->mod_si);
   1812 		}
   1813         }
   1814 
   1815 	/* Check for fan failure */
   1816 	if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
   1817 	    (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
   1818                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
   1819 		    "REPLACE IMMEDIATELY!!\n");
   1820 		IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
   1821 	}
   1822 
   1823 	/* Check for over temp condition */
   1824 	if ((hw->mac.type == ixgbe_mac_X540) &&
   1825 	    (reg_eicr & IXGBE_EICR_TS)) {
   1826 		device_printf(adapter->dev, "\nCRITICAL: OVER TEMP!! "
   1827 		    "PHY IS SHUT DOWN!!\n");
   1828 		device_printf(adapter->dev, "System shutdown required\n");
   1829 		IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_TS);
   1830 	}
   1831 
   1832 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_OTHER);
   1833 	return;
   1834 }
   1835 #endif
   1836 
   1837 /*********************************************************************
   1838  *
   1839  *  Media Ioctl callback
   1840  *
   1841  *  This routine is called whenever the user queries the status of
   1842  *  the interface using ifconfig.
   1843  *
   1844  **********************************************************************/
   1845 static void
   1846 ixgbe_media_status(struct ifnet * ifp, struct ifmediareq * ifmr)
   1847 {
   1848 	struct adapter *adapter = ifp->if_softc;
   1849 
   1850 	INIT_DEBUGOUT("ixgbe_media_status: begin");
   1851 	IXGBE_CORE_LOCK(adapter);
   1852 	ixgbe_update_link_status(adapter);
   1853 
   1854 	ifmr->ifm_status = IFM_AVALID;
   1855 	ifmr->ifm_active = IFM_ETHER;
   1856 
   1857 	if (!adapter->link_active) {
   1858 		IXGBE_CORE_UNLOCK(adapter);
   1859 		return;
   1860 	}
   1861 
   1862 	ifmr->ifm_status |= IFM_ACTIVE;
   1863 
   1864 	switch (adapter->link_speed) {
   1865 		case IXGBE_LINK_SPEED_100_FULL:
   1866 			ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
   1867 			break;
   1868 		case IXGBE_LINK_SPEED_1GB_FULL:
   1869 			ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
   1870 			break;
   1871 		case IXGBE_LINK_SPEED_10GB_FULL:
   1872 			ifmr->ifm_active |= adapter->optics | IFM_FDX;
   1873 			break;
   1874 	}
   1875 
   1876 	IXGBE_CORE_UNLOCK(adapter);
   1877 
   1878 	return;
   1879 }
   1880 
   1881 /*********************************************************************
   1882  *
   1883  *  Media Ioctl callback
   1884  *
   1885  *  This routine is called when the user changes speed/duplex using
   1886  *  media/mediopt option with ifconfig.
   1887  *
   1888  **********************************************************************/
   1889 static int
   1890 ixgbe_media_change(struct ifnet * ifp)
   1891 {
   1892 	struct adapter *adapter = ifp->if_softc;
   1893 	struct ifmedia *ifm = &adapter->media;
   1894 
   1895 	INIT_DEBUGOUT("ixgbe_media_change: begin");
   1896 
   1897 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
   1898 		return (EINVAL);
   1899 
   1900         switch (IFM_SUBTYPE(ifm->ifm_media)) {
   1901         case IFM_AUTO:
   1902                 adapter->hw.phy.autoneg_advertised =
   1903 		    IXGBE_LINK_SPEED_100_FULL |
   1904 		    IXGBE_LINK_SPEED_1GB_FULL |
   1905 		    IXGBE_LINK_SPEED_10GB_FULL;
   1906                 break;
   1907         default:
   1908                 device_printf(adapter->dev, "Only auto media type\n");
   1909 		return (EINVAL);
   1910         }
   1911 
   1912 	return (0);
   1913 }
   1914 
   1915 /*********************************************************************
   1916  *
   1917  *  This routine maps the mbufs to tx descriptors, allowing the
   1918  *  TX engine to transmit the packets.
   1919  *  	- return 0 on success, positive on failure
   1920  *
   1921  **********************************************************************/
   1922 
   1923 static int
   1924 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
   1925 {
   1926 	struct m_tag *mtag;
   1927 	struct adapter  *adapter = txr->adapter;
   1928 	struct ethercom *ec = &adapter->osdep.ec;
   1929 	u32		olinfo_status = 0, cmd_type_len;
   1930 	int             i, j, error;
   1931 	int		first;
   1932 	bus_dmamap_t	map;
   1933 	struct ixgbe_tx_buf *txbuf;
   1934 	union ixgbe_adv_tx_desc *txd = NULL;
   1935 
   1936 	/* Basic descriptor defines */
   1937         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
   1938 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
   1939 
   1940 	if ((mtag = VLAN_OUTPUT_TAG(ec, m_head)) != NULL)
   1941         	cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
   1942 
   1943         /*
   1944          * Important to capture the first descriptor
   1945          * used because it will contain the index of
   1946          * the one we tell the hardware to report back
   1947          */
   1948         first = txr->next_avail_desc;
   1949 	txbuf = &txr->tx_buffers[first];
   1950 	map = txbuf->map;
   1951 
   1952 	/*
   1953 	 * Map the packet for DMA.
   1954 	 */
   1955 	error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map,
   1956 	    m_head, BUS_DMA_NOWAIT);
   1957 
   1958 	if (__predict_false(error)) {
   1959 
   1960 		switch (error) {
   1961 		case EAGAIN:
   1962 			adapter->eagain_tx_dma_setup.ev_count++;
   1963 			return EAGAIN;
   1964 		case ENOMEM:
   1965 			adapter->enomem_tx_dma_setup.ev_count++;
   1966 			return EAGAIN;
   1967 		case EFBIG:
   1968 			/*
   1969 			 * XXX Try it again?
   1970 			 * do m_defrag() and retry bus_dmamap_load_mbuf().
   1971 			 */
   1972 			adapter->efbig_tx_dma_setup.ev_count++;
   1973 			return error;
   1974 		case EINVAL:
   1975 			adapter->einval_tx_dma_setup.ev_count++;
   1976 			return error;
   1977 		default:
   1978 			adapter->other_tx_dma_setup.ev_count++;
   1979 			return error;
   1980 		}
   1981 	}
   1982 
   1983 	/* Make certain there are enough descriptors */
   1984 	if (map->dm_nsegs > txr->tx_avail - 2) {
   1985 		txr->no_desc_avail.ev_count++;
   1986 		ixgbe_dmamap_unload(txr->txtag, txbuf->map);
   1987 		return EAGAIN;
   1988 	}
   1989 
   1990 	/*
   1991 	** Set up the appropriate offload context
   1992 	** this will consume the first descriptor
   1993 	*/
   1994 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
   1995 	if (__predict_false(error)) {
   1996 		return (error);
   1997 	}
   1998 
   1999 #ifdef IXGBE_FDIR
   2000 	/* Do the flow director magic */
   2001 	if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
   2002 		++txr->atr_count;
   2003 		if (txr->atr_count >= atr_sample_rate) {
   2004 			ixgbe_atr(txr, m_head);
   2005 			txr->atr_count = 0;
   2006 		}
   2007 	}
   2008 #endif
   2009 
   2010 	i = txr->next_avail_desc;
   2011 	for (j = 0; j < map->dm_nsegs; j++) {
   2012 		bus_size_t seglen;
   2013 		bus_addr_t segaddr;
   2014 
   2015 		txbuf = &txr->tx_buffers[i];
   2016 		txd = &txr->tx_base[i];
   2017 		seglen = map->dm_segs[j].ds_len;
   2018 		segaddr = htole64(map->dm_segs[j].ds_addr);
   2019 
   2020 		txd->read.buffer_addr = segaddr;
   2021 		txd->read.cmd_type_len = htole32(txr->txd_cmd |
   2022 		    cmd_type_len |seglen);
   2023 		txd->read.olinfo_status = htole32(olinfo_status);
   2024 
   2025 		if (++i == txr->num_desc)
   2026 			i = 0;
   2027 	}
   2028 
   2029 	txd->read.cmd_type_len |=
   2030 	    htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
   2031 	txr->tx_avail -= map->dm_nsegs;
   2032 	txr->next_avail_desc = i;
   2033 
   2034 	txbuf->m_head = m_head;
   2035 	/*
   2036 	** Here we swap the map so the last descriptor,
   2037 	** which gets the completion interrupt has the
   2038 	** real map, and the first descriptor gets the
   2039 	** unused map from this descriptor.
   2040 	*/
   2041 	txr->tx_buffers[first].map = txbuf->map;
   2042 	txbuf->map = map;
   2043 	bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
   2044 	    BUS_DMASYNC_PREWRITE);
   2045 
   2046         /* Set the EOP descriptor that will be marked done */
   2047         txbuf = &txr->tx_buffers[first];
   2048 	txbuf->eop = txd;
   2049 
   2050         ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   2051 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   2052 	/*
   2053 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
   2054 	 * hardware that this frame is available to transmit.
   2055 	 */
   2056 	++txr->total_packets.ev_count;
   2057 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), i);
   2058 
   2059 	return 0;
   2060 }
   2061 
   2062 static void
   2063 ixgbe_set_promisc(struct adapter *adapter)
   2064 {
   2065 	struct ether_multi *enm;
   2066 	struct ether_multistep step;
   2067 	u_int32_t       reg_rctl;
   2068 	struct ethercom *ec = &adapter->osdep.ec;
   2069 	struct ifnet   *ifp = adapter->ifp;
   2070 	int		mcnt = 0;
   2071 
   2072 	reg_rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
   2073 	reg_rctl &= (~IXGBE_FCTRL_UPE);
   2074 	if (ifp->if_flags & IFF_ALLMULTI)
   2075 		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
   2076 	else {
   2077 		ETHER_FIRST_MULTI(step, ec, enm);
   2078 		while (enm != NULL) {
   2079 			if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
   2080 				break;
   2081 			mcnt++;
   2082 			ETHER_NEXT_MULTI(step, enm);
   2083 		}
   2084 	}
   2085 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
   2086 		reg_rctl &= (~IXGBE_FCTRL_MPE);
   2087 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
   2088 
   2089 	if (ifp->if_flags & IFF_PROMISC) {
   2090 		reg_rctl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
   2091 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
   2092 	} else if (ifp->if_flags & IFF_ALLMULTI) {
   2093 		reg_rctl |= IXGBE_FCTRL_MPE;
   2094 		reg_rctl &= ~IXGBE_FCTRL_UPE;
   2095 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
   2096 	}
   2097 	return;
   2098 }
   2099 
   2100 
   2101 /*********************************************************************
   2102  *  Multicast Update
   2103  *
   2104  *  This routine is called whenever multicast address list is updated.
   2105  *
   2106  **********************************************************************/
   2107 #define IXGBE_RAR_ENTRIES 16
   2108 
   2109 static void
   2110 ixgbe_set_multi(struct adapter *adapter)
   2111 {
   2112 	struct ether_multi *enm;
   2113 	struct ether_multistep step;
   2114 	u32	fctrl;
   2115 	u8	*mta;
   2116 	u8	*update_ptr;
   2117 	int	mcnt = 0;
   2118 	struct ethercom *ec = &adapter->osdep.ec;
   2119 	struct ifnet   *ifp = adapter->ifp;
   2120 
   2121 	IOCTL_DEBUGOUT("ixgbe_set_multi: begin");
   2122 
   2123 	mta = adapter->mta;
   2124 	bzero(mta, sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
   2125 	    MAX_NUM_MULTICAST_ADDRESSES);
   2126 
   2127 	ifp->if_flags &= ~IFF_ALLMULTI;
   2128 	ETHER_FIRST_MULTI(step, ec, enm);
   2129 	while (enm != NULL) {
   2130 		if ((mcnt == MAX_NUM_MULTICAST_ADDRESSES) ||
   2131 		    (memcmp(enm->enm_addrlo, enm->enm_addrhi,
   2132 			ETHER_ADDR_LEN) != 0)) {
   2133 			ifp->if_flags |= IFF_ALLMULTI;
   2134 			break;
   2135 		}
   2136 		bcopy(enm->enm_addrlo,
   2137 		    &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
   2138 		    IXGBE_ETH_LENGTH_OF_ADDRESS);
   2139 		mcnt++;
   2140 		ETHER_NEXT_MULTI(step, enm);
   2141 	}
   2142 
   2143 	fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
   2144 	fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
   2145 	if (ifp->if_flags & IFF_PROMISC)
   2146 		fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
   2147 	else if (ifp->if_flags & IFF_ALLMULTI) {
   2148 		fctrl |= IXGBE_FCTRL_MPE;
   2149 	}
   2150 
   2151 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl);
   2152 
   2153 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) {
   2154 		update_ptr = mta;
   2155 		ixgbe_update_mc_addr_list(&adapter->hw,
   2156 		    update_ptr, mcnt, ixgbe_mc_array_itr, TRUE);
   2157 	}
   2158 
   2159 	return;
   2160 }
   2161 
   2162 /*
   2163  * This is an iterator function now needed by the multicast
   2164  * shared code. It simply feeds the shared code routine the
   2165  * addresses in the array of ixgbe_set_multi() one by one.
   2166  */
   2167 static u8 *
   2168 ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq)
   2169 {
   2170 	u8 *addr = *update_ptr;
   2171 	u8 *newptr;
   2172 	*vmdq = 0;
   2173 
   2174 	newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
   2175 	*update_ptr = newptr;
   2176 	return addr;
   2177 }
   2178 
   2179 
   2180 /*********************************************************************
   2181  *  Timer routine
   2182  *
   2183  *  This routine checks for link status,updates statistics,
   2184  *  and runs the watchdog check.
   2185  *
   2186  **********************************************************************/
   2187 
   2188 static void
   2189 ixgbe_local_timer1(void *arg)
   2190 {
   2191 	struct adapter	*adapter = arg;
   2192 	device_t	dev = adapter->dev;
   2193 	struct ix_queue *que = adapter->queues;
   2194 	struct tx_ring	*txr = adapter->tx_rings;
   2195 	int		hung = 0, paused = 0;
   2196 
   2197 	KASSERT(mutex_owned(&adapter->core_mtx));
   2198 
   2199 	/* Check for pluggable optics */
   2200 	if (adapter->sfp_probe)
   2201 		if (!ixgbe_sfp_probe(adapter))
   2202 			goto out; /* Nothing to do */
   2203 
   2204 	ixgbe_update_link_status(adapter);
   2205 	ixgbe_update_stats_counters(adapter);
   2206 
   2207 	/*
   2208 	 * If the interface has been paused
   2209 	 * then don't do the watchdog check
   2210 	 */
   2211 	if (IXGBE_READ_REG(&adapter->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF)
   2212 		paused = 1;
   2213 
   2214 	/*
   2215 	** Check the TX queues status
   2216 	**      - watchdog only if all queues show hung
   2217 	*/
   2218 	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
   2219 		if ((txr->queue_status == IXGBE_QUEUE_HUNG) &&
   2220 		    (paused == 0))
   2221 			++hung;
   2222 		else if (txr->queue_status == IXGBE_QUEUE_WORKING)
   2223 			softint_schedule(que->que_si);
   2224 	}
   2225 	/* Only truely watchdog if all queues show hung */
   2226 	if (hung == adapter->num_queues)
   2227 		goto watchdog;
   2228 
   2229 out:
   2230 	ixgbe_rearm_queues(adapter, adapter->que_mask);
   2231 	callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
   2232 	return;
   2233 
   2234 watchdog:
   2235 	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
   2236 	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
   2237 	    IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(txr->me)),
   2238 	    IXGBE_READ_REG(&adapter->hw, IXGBE_TDT(txr->me)));
   2239 	device_printf(dev,"TX(%d) desc avail = %d,"
   2240 	    "Next TX to Clean = %d\n",
   2241 	    txr->me, txr->tx_avail, txr->next_to_clean);
   2242 	adapter->ifp->if_flags &= ~IFF_RUNNING;
   2243 	adapter->watchdog_events.ev_count++;
   2244 	ixgbe_init_locked(adapter);
   2245 }
   2246 
   2247 static void
   2248 ixgbe_local_timer(void *arg)
   2249 {
   2250 	struct adapter *adapter = arg;
   2251 
   2252 	IXGBE_CORE_LOCK(adapter);
   2253 	ixgbe_local_timer1(adapter);
   2254 	IXGBE_CORE_UNLOCK(adapter);
   2255 }
   2256 
   2257 /*
   2258 ** Note: this routine updates the OS on the link state
   2259 **	the real check of the hardware only happens with
   2260 **	a link interrupt.
   2261 */
   2262 static void
   2263 ixgbe_update_link_status(struct adapter *adapter)
   2264 {
   2265 	struct ifnet	*ifp = adapter->ifp;
   2266 	device_t dev = adapter->dev;
   2267 
   2268 
   2269 	if (adapter->link_up){
   2270 		if (adapter->link_active == FALSE) {
   2271 			if (bootverbose)
   2272 				device_printf(dev,"Link is up %d Gbps %s \n",
   2273 				    ((adapter->link_speed == 128)? 10:1),
   2274 				    "Full Duplex");
   2275 			adapter->link_active = TRUE;
   2276 			/* Update any Flow Control changes */
   2277 			ixgbe_fc_enable(&adapter->hw);
   2278 			if_link_state_change(ifp, LINK_STATE_UP);
   2279 		}
   2280 	} else { /* Link down */
   2281 		if (adapter->link_active == TRUE) {
   2282 			if (bootverbose)
   2283 				device_printf(dev,"Link is Down\n");
   2284 			if_link_state_change(ifp, LINK_STATE_DOWN);
   2285 			adapter->link_active = FALSE;
   2286 		}
   2287 	}
   2288 
   2289 	return;
   2290 }
   2291 
   2292 
   2293 static void
   2294 ixgbe_ifstop(struct ifnet *ifp, int disable)
   2295 {
   2296 	struct adapter *adapter = ifp->if_softc;
   2297 
   2298 	IXGBE_CORE_LOCK(adapter);
   2299 	ixgbe_stop(adapter);
   2300 	IXGBE_CORE_UNLOCK(adapter);
   2301 }
   2302 
   2303 /*********************************************************************
   2304  *
   2305  *  This routine disables all traffic on the adapter by issuing a
   2306  *  global reset on the MAC and deallocates TX/RX buffers.
   2307  *
   2308  **********************************************************************/
   2309 
   2310 static void
   2311 ixgbe_stop(void *arg)
   2312 {
   2313 	struct ifnet   *ifp;
   2314 	struct adapter *adapter = arg;
   2315 	struct ixgbe_hw *hw = &adapter->hw;
   2316 	ifp = adapter->ifp;
   2317 
   2318 	KASSERT(mutex_owned(&adapter->core_mtx));
   2319 
   2320 	INIT_DEBUGOUT("ixgbe_stop: begin\n");
   2321 	ixgbe_disable_intr(adapter);
   2322 	callout_stop(&adapter->timer);
   2323 
   2324 	/* Let the stack know...*/
   2325 	ifp->if_flags &= ~IFF_RUNNING;
   2326 
   2327 	ixgbe_reset_hw(hw);
   2328 	hw->adapter_stopped = FALSE;
   2329 	ixgbe_stop_adapter(hw);
   2330 	/* Turn off the laser */
   2331 	if (hw->phy.multispeed_fiber)
   2332 		ixgbe_disable_tx_laser(hw);
   2333 
   2334 	/* reprogram the RAR[0] in case user changed it. */
   2335 	ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
   2336 
   2337 	return;
   2338 }
   2339 
   2340 
   2341 /*********************************************************************
   2342  *
   2343  *  Determine hardware revision.
   2344  *
   2345  **********************************************************************/
   2346 static void
   2347 ixgbe_identify_hardware(struct adapter *adapter)
   2348 {
   2349 	pcitag_t tag;
   2350 	pci_chipset_tag_t pc;
   2351 	pcireg_t subid, id;
   2352 	struct ixgbe_hw *hw = &adapter->hw;
   2353 
   2354 	pc = adapter->osdep.pc;
   2355 	tag = adapter->osdep.tag;
   2356 
   2357 	id = pci_conf_read(pc, tag, PCI_ID_REG);
   2358 	subid = pci_conf_read(pc, tag, PCI_SUBSYS_ID_REG);
   2359 
   2360 	/* Save off the information about this board */
   2361 	hw->vendor_id = PCI_VENDOR(id);
   2362 	hw->device_id = PCI_PRODUCT(id);
   2363 	hw->revision_id =
   2364 	    PCI_REVISION(pci_conf_read(pc, tag, PCI_CLASS_REG));
   2365 	hw->subsystem_vendor_id = PCI_SUBSYS_VENDOR(subid);
   2366 	hw->subsystem_device_id = PCI_SUBSYS_ID(subid);
   2367 
   2368 	/* We need this here to set the num_segs below */
   2369 	ixgbe_set_mac_type(hw);
   2370 
   2371 	/* Pick up the 82599 and VF settings */
   2372 	if (hw->mac.type != ixgbe_mac_82598EB) {
   2373 		hw->phy.smart_speed = ixgbe_smart_speed;
   2374 		adapter->num_segs = IXGBE_82599_SCATTER;
   2375 	} else
   2376 		adapter->num_segs = IXGBE_82598_SCATTER;
   2377 
   2378 	return;
   2379 }
   2380 
   2381 /*********************************************************************
   2382  *
   2383  *  Determine optic type
   2384  *
   2385  **********************************************************************/
   2386 static void
   2387 ixgbe_setup_optics(struct adapter *adapter)
   2388 {
   2389 	struct ixgbe_hw *hw = &adapter->hw;
   2390 	int		layer;
   2391 
   2392 	layer = ixgbe_get_supported_physical_layer(hw);
   2393 
   2394 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
   2395 		adapter->optics = IFM_10G_T;
   2396 		return;
   2397 	}
   2398 
   2399 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
   2400 		adapter->optics = IFM_1000_T;
   2401 		return;
   2402 	}
   2403 
   2404 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) {
   2405 		adapter->optics = IFM_1000_SX;
   2406 		return;
   2407 	}
   2408 
   2409 	if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR |
   2410 	    IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) {
   2411 		adapter->optics = IFM_10G_LR;
   2412 		return;
   2413 	}
   2414 
   2415 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
   2416 		adapter->optics = IFM_10G_SR;
   2417 		return;
   2418 	}
   2419 
   2420 	if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) {
   2421 		adapter->optics = IFM_10G_TWINAX;
   2422 		return;
   2423 	}
   2424 
   2425 	if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 |
   2426 	    IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) {
   2427 		adapter->optics = IFM_10G_CX4;
   2428 		return;
   2429 	}
   2430 
   2431 	/* If we get here just set the default */
   2432 	adapter->optics = IFM_ETHER | IFM_AUTO;
   2433 	return;
   2434 }
   2435 
   2436 /*********************************************************************
   2437  *
   2438  *  Setup the Legacy or MSI Interrupt handler
   2439  *
   2440  **********************************************************************/
   2441 static int
   2442 ixgbe_allocate_legacy(struct adapter *adapter, const struct pci_attach_args *pa)
   2443 {
   2444 	device_t	dev = adapter->dev;
   2445 	struct		ix_queue *que = adapter->queues;
   2446 #ifndef IXGBE_LEGACY_TX
   2447 	struct tx_ring		*txr = adapter->tx_rings;
   2448 #endif
   2449 	char intrbuf[PCI_INTRSTR_LEN];
   2450 #if 0
   2451 	int		rid = 0;
   2452 
   2453 	/* MSI RID at 1 */
   2454 	if (adapter->msix == 1)
   2455 		rid = 1;
   2456 #endif
   2457 
   2458 	/* We allocate a single interrupt resource */
   2459  	if (pci_intr_map(pa, &adapter->osdep.ih) != 0) {
   2460 		aprint_error_dev(dev, "unable to map interrupt\n");
   2461 		return ENXIO;
   2462 	} else {
   2463 		aprint_normal_dev(dev, "interrupting at %s\n",
   2464 		    pci_intr_string(adapter->osdep.pc, adapter->osdep.ih,
   2465 			intrbuf, sizeof(intrbuf)));
   2466 	}
   2467 
   2468 	/*
   2469 	 * Try allocating a fast interrupt and the associated deferred
   2470 	 * processing contexts.
   2471 	 */
   2472 #ifndef IXGBE_LEGACY_TX
   2473 	txr->txq_si = softint_establish(SOFTINT_NET, ixgbe_deferred_mq_start,
   2474 	    txr);
   2475 #endif
   2476 	que->que_si = softint_establish(SOFTINT_NET, ixgbe_handle_que, que);
   2477 
   2478 	/* Tasklets for Link, SFP and Multispeed Fiber */
   2479 	adapter->link_si =
   2480 	    softint_establish(SOFTINT_NET, ixgbe_handle_link, adapter);
   2481 	adapter->mod_si =
   2482 	    softint_establish(SOFTINT_NET, ixgbe_handle_mod, adapter);
   2483 	adapter->msf_si =
   2484 	    softint_establish(SOFTINT_NET, ixgbe_handle_msf, adapter);
   2485 
   2486 #ifdef IXGBE_FDIR
   2487 	adapter->fdir_si =
   2488 	    softint_establish(SOFTINT_NET, ixgbe_reinit_fdir, adapter);
   2489 #endif
   2490 	if (que->que_si == NULL ||
   2491 	    adapter->link_si == NULL ||
   2492 	    adapter->mod_si == NULL ||
   2493 #ifdef IXGBE_FDIR
   2494 	    adapter->fdir_si == NULL ||
   2495 #endif
   2496 	    adapter->msf_si == NULL) {
   2497 		aprint_error_dev(dev,
   2498 		    "could not establish software interrupts\n");
   2499 		return ENXIO;
   2500 	}
   2501 
   2502 	adapter->osdep.intr = pci_intr_establish(adapter->osdep.pc,
   2503 	    adapter->osdep.ih, IPL_NET, ixgbe_legacy_irq, que);
   2504 	if (adapter->osdep.intr == NULL) {
   2505 		aprint_error_dev(dev, "failed to register interrupt handler\n");
   2506 		softint_disestablish(que->que_si);
   2507 		softint_disestablish(adapter->link_si);
   2508 		softint_disestablish(adapter->mod_si);
   2509 		softint_disestablish(adapter->msf_si);
   2510 #ifdef IXGBE_FDIR
   2511 		softint_disestablish(adapter->fdir_si);
   2512 #endif
   2513 		return ENXIO;
   2514 	}
   2515 	/* For simplicity in the handlers */
   2516 	adapter->que_mask = IXGBE_EIMS_ENABLE_MASK;
   2517 
   2518 	return (0);
   2519 }
   2520 
   2521 
   2522 /*********************************************************************
   2523  *
   2524  *  Setup MSIX Interrupt resources and handlers
   2525  *
   2526  **********************************************************************/
   2527 static int
   2528 ixgbe_allocate_msix(struct adapter *adapter, const struct pci_attach_args *pa)
   2529 {
   2530 #if !defined(NETBSD_MSI_OR_MSIX)
   2531 	return 0;
   2532 #else
   2533 	device_t        dev = adapter->dev;
   2534 	struct 		ix_queue *que = adapter->queues;
   2535 	struct  	tx_ring *txr = adapter->tx_rings;
   2536 	int 		error, rid, vector = 0;
   2537 
   2538 	for (int i = 0; i < adapter->num_queues; i++, vector++, que++, txr++) {
   2539 		rid = vector + 1;
   2540 		que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
   2541 		    RF_SHAREABLE | RF_ACTIVE);
   2542 		if (que->res == NULL) {
   2543 			aprint_error_dev(dev,"Unable to allocate"
   2544 		    	    " bus resource: que interrupt [%d]\n", vector);
   2545 			return (ENXIO);
   2546 		}
   2547 		/* Set the handler function */
   2548 		error = bus_setup_intr(dev, que->res,
   2549 		    INTR_TYPE_NET | INTR_MPSAFE, NULL,
   2550 		    ixgbe_msix_que, que, &que->tag);
   2551 		if (error) {
   2552 			que->res = NULL;
   2553 			aprint_error_dev(dev,
   2554 			    "Failed to register QUE handler\n");
   2555 			return error;
   2556 		}
   2557 #if __FreeBSD_version >= 800504
   2558 		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
   2559 #endif
   2560 		que->msix = vector;
   2561         	adapter->que_mask |= (u64)(1 << que->msix);
   2562 		/*
   2563 		** Bind the msix vector, and thus the
   2564 		** ring to the corresponding cpu.
   2565 		*/
   2566 		if (adapter->num_queues > 1)
   2567 			bus_bind_intr(dev, que->res, i);
   2568 
   2569 #ifndef IXGBE_LEGACY_TX
   2570 		txr->txq_si = softint_establish(SOFTINT_NET,
   2571 		    ixgbe_deferred_mq_start, txr);
   2572 #endif
   2573 		que->que_si = softint_establish(SOFTINT_NET, ixgbe_handle_que,
   2574 		    que);
   2575 		if (que->que_si == NULL) {
   2576 			aprint_error_dev(dev,
   2577 			    "could not establish software interrupt\n");
   2578 		}
   2579 	}
   2580 
   2581 	/* and Link */
   2582 	rid = vector + 1;
   2583 	adapter->res = bus_alloc_resource_any(dev,
   2584     	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
   2585 	if (!adapter->res) {
   2586 		aprint_error_dev(dev,"Unable to allocate bus resource: "
   2587 		    "Link interrupt [%d]\n", rid);
   2588 		return (ENXIO);
   2589 	}
   2590 	/* Set the link handler function */
   2591 	error = bus_setup_intr(dev, adapter->res,
   2592 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
   2593 	    ixgbe_msix_link, adapter, &adapter->tag);
   2594 	if (error) {
   2595 		adapter->res = NULL;
   2596 		aprint_error_dev(dev, "Failed to register LINK handler\n");
   2597 		return (error);
   2598 	}
   2599 #if __FreeBSD_version >= 800504
   2600 	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
   2601 #endif
   2602 	adapter->linkvec = vector;
   2603 	/* Tasklets for Link, SFP and Multispeed Fiber */
   2604 	adapter->link_si =
   2605 	    softint_establish(SOFTINT_NET, ixgbe_handle_link, adapter);
   2606 	adapter->mod_si =
   2607 	    softint_establish(SOFTINT_NET, ixgbe_handle_mod, adapter);
   2608 	adapter->msf_si =
   2609 	    softint_establish(SOFTINT_NET, ixgbe_handle_msf, adapter);
   2610 #ifdef IXGBE_FDIR
   2611 	adapter->fdir_si =
   2612 	    softint_establish(SOFTINT_NET, ixgbe_reinit_fdir, adapter);
   2613 #endif
   2614 
   2615 	return (0);
   2616 #endif
   2617 }
   2618 
   2619 /*
   2620  * Setup Either MSI/X or MSI
   2621  */
   2622 static int
   2623 ixgbe_setup_msix(struct adapter *adapter)
   2624 {
   2625 #if !defined(NETBSD_MSI_OR_MSIX)
   2626 	return 0;
   2627 #else
   2628 	device_t dev = adapter->dev;
   2629 	int rid, want, queues, msgs;
   2630 
   2631 	/* Override by tuneable */
   2632 	if (ixgbe_enable_msix == 0)
   2633 		goto msi;
   2634 
   2635 	/* First try MSI/X */
   2636 	rid = PCI_BAR(MSIX_82598_BAR);
   2637 	adapter->msix_mem = bus_alloc_resource_any(dev,
   2638 	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
   2639        	if (!adapter->msix_mem) {
   2640 		rid += 4;	/* 82599 maps in higher BAR */
   2641 		adapter->msix_mem = bus_alloc_resource_any(dev,
   2642 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
   2643 	}
   2644        	if (!adapter->msix_mem) {
   2645 		/* May not be enabled */
   2646 		device_printf(adapter->dev,
   2647 		    "Unable to map MSIX table \n");
   2648 		goto msi;
   2649 	}
   2650 
   2651 	msgs = pci_msix_count(dev);
   2652 	if (msgs == 0) { /* system has msix disabled */
   2653 		bus_release_resource(dev, SYS_RES_MEMORY,
   2654 		    rid, adapter->msix_mem);
   2655 		adapter->msix_mem = NULL;
   2656 		goto msi;
   2657 	}
   2658 
   2659 	/* Figure out a reasonable auto config value */
   2660 	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
   2661 
   2662 	if (ixgbe_num_queues != 0)
   2663 		queues = ixgbe_num_queues;
   2664 	/* Set max queues to 8 when autoconfiguring */
   2665 	else if ((ixgbe_num_queues == 0) && (queues > 8))
   2666 		queues = 8;
   2667 
   2668 	/*
   2669 	** Want one vector (RX/TX pair) per queue
   2670 	** plus an additional for Link.
   2671 	*/
   2672 	want = queues + 1;
   2673 	if (msgs >= want)
   2674 		msgs = want;
   2675 	else {
   2676                	device_printf(adapter->dev,
   2677 		    "MSIX Configuration Problem, "
   2678 		    "%d vectors but %d queues wanted!\n",
   2679 		    msgs, want);
   2680 		return (0); /* Will go to Legacy setup */
   2681 	}
   2682 	if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
   2683                	device_printf(adapter->dev,
   2684 		    "Using MSIX interrupts with %d vectors\n", msgs);
   2685 		adapter->num_queues = queues;
   2686 		return (msgs);
   2687 	}
   2688 msi:
   2689        	msgs = pci_msi_count(dev);
   2690        	if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
   2691                	device_printf(adapter->dev,"Using an MSI interrupt\n");
   2692 	else
   2693                	device_printf(adapter->dev,"Using a Legacy interrupt\n");
   2694 	return (msgs);
   2695 #endif
   2696 }
   2697 
   2698 
   2699 static int
   2700 ixgbe_allocate_pci_resources(struct adapter *adapter, const struct pci_attach_args *pa)
   2701 {
   2702 	pcireg_t	memtype;
   2703 	device_t        dev = adapter->dev;
   2704 	bus_addr_t addr;
   2705 	int flags;
   2706 
   2707 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, PCI_BAR(0));
   2708 	switch (memtype) {
   2709 	case PCI_MAPREG_TYPE_MEM | PCI_MAPREG_MEM_TYPE_32BIT:
   2710 	case PCI_MAPREG_TYPE_MEM | PCI_MAPREG_MEM_TYPE_64BIT:
   2711 		adapter->osdep.mem_bus_space_tag = pa->pa_memt;
   2712 		if (pci_mapreg_info(pa->pa_pc, pa->pa_tag, PCI_BAR(0),
   2713 	              memtype, &addr, &adapter->osdep.mem_size, &flags) != 0)
   2714 			goto map_err;
   2715 		if ((flags & BUS_SPACE_MAP_PREFETCHABLE) != 0) {
   2716 			aprint_normal_dev(dev, "clearing prefetchable bit\n");
   2717 			flags &= ~BUS_SPACE_MAP_PREFETCHABLE;
   2718 		}
   2719 		if (bus_space_map(adapter->osdep.mem_bus_space_tag, addr,
   2720 		     adapter->osdep.mem_size, flags,
   2721 		     &adapter->osdep.mem_bus_space_handle) != 0) {
   2722 map_err:
   2723 			adapter->osdep.mem_size = 0;
   2724 			aprint_error_dev(dev, "unable to map BAR0\n");
   2725 			return ENXIO;
   2726 		}
   2727 		break;
   2728 	default:
   2729 		aprint_error_dev(dev, "unexpected type on BAR0\n");
   2730 		return ENXIO;
   2731 	}
   2732 
   2733 	/* Legacy defaults */
   2734 	adapter->num_queues = 1;
   2735 	adapter->hw.back = &adapter->osdep;
   2736 
   2737 	/*
   2738 	** Now setup MSI or MSI/X, should
   2739 	** return us the number of supported
   2740 	** vectors. (Will be 1 for MSI)
   2741 	*/
   2742 	adapter->msix = ixgbe_setup_msix(adapter);
   2743 	return (0);
   2744 }
   2745 
   2746 static void
   2747 ixgbe_free_pci_resources(struct adapter * adapter)
   2748 {
   2749 #if defined(NETBSD_MSI_OR_MSIX)
   2750 	struct 		ix_queue *que = adapter->queues;
   2751 	device_t	dev = adapter->dev;
   2752 #endif
   2753 	int		rid;
   2754 
   2755 #if defined(NETBSD_MSI_OR_MSIX)
   2756 	int		 memrid;
   2757 	if (adapter->hw.mac.type == ixgbe_mac_82598EB)
   2758 		memrid = PCI_BAR(MSIX_82598_BAR);
   2759 	else
   2760 		memrid = PCI_BAR(MSIX_82599_BAR);
   2761 
   2762 	/*
   2763 	** There is a slight possibility of a failure mode
   2764 	** in attach that will result in entering this function
   2765 	** before interrupt resources have been initialized, and
   2766 	** in that case we do not want to execute the loops below
   2767 	** We can detect this reliably by the state of the adapter
   2768 	** res pointer.
   2769 	*/
   2770 	if (adapter->res == NULL)
   2771 		goto mem;
   2772 
   2773 	/*
   2774 	**  Release all msix queue resources:
   2775 	*/
   2776 	for (int i = 0; i < adapter->num_queues; i++, que++) {
   2777 		rid = que->msix + 1;
   2778 		if (que->tag != NULL) {
   2779 			bus_teardown_intr(dev, que->res, que->tag);
   2780 			que->tag = NULL;
   2781 		}
   2782 		if (que->res != NULL)
   2783 			bus_release_resource(dev, SYS_RES_IRQ, rid, que->res);
   2784 	}
   2785 #endif
   2786 
   2787 	/* Clean the Legacy or Link interrupt last */
   2788 	if (adapter->linkvec) /* we are doing MSIX */
   2789 		rid = adapter->linkvec + 1;
   2790 	else
   2791 		(adapter->msix != 0) ? (rid = 1):(rid = 0);
   2792 
   2793 	pci_intr_disestablish(adapter->osdep.pc, adapter->osdep.intr);
   2794 	adapter->osdep.intr = NULL;
   2795 
   2796 #if defined(NETBSD_MSI_OR_MSIX)
   2797 mem:
   2798 	if (adapter->msix)
   2799 		pci_release_msi(dev);
   2800 
   2801 	if (adapter->msix_mem != NULL)
   2802 		bus_release_resource(dev, SYS_RES_MEMORY,
   2803 		    memrid, adapter->msix_mem);
   2804 #endif
   2805 
   2806 	if (adapter->osdep.mem_size != 0) {
   2807 		bus_space_unmap(adapter->osdep.mem_bus_space_tag,
   2808 		    adapter->osdep.mem_bus_space_handle,
   2809 		    adapter->osdep.mem_size);
   2810 	}
   2811 
   2812 	return;
   2813 }
   2814 
   2815 /*********************************************************************
   2816  *
   2817  *  Setup networking device structure and register an interface.
   2818  *
   2819  **********************************************************************/
   2820 static int
   2821 ixgbe_setup_interface(device_t dev, struct adapter *adapter)
   2822 {
   2823 	struct ethercom *ec = &adapter->osdep.ec;
   2824 	struct ixgbe_hw *hw = &adapter->hw;
   2825 	struct ifnet   *ifp;
   2826 
   2827 	INIT_DEBUGOUT("ixgbe_setup_interface: begin");
   2828 
   2829 	ifp = adapter->ifp = &ec->ec_if;
   2830 	strlcpy(ifp->if_xname, device_xname(dev), IFNAMSIZ);
   2831 	ifp->if_baudrate = IF_Gbps(10);
   2832 	ifp->if_init = ixgbe_init;
   2833 	ifp->if_stop = ixgbe_ifstop;
   2834 	ifp->if_softc = adapter;
   2835 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
   2836 	ifp->if_ioctl = ixgbe_ioctl;
   2837 #ifndef IXGBE_LEGACY_TX
   2838 	ifp->if_transmit = ixgbe_mq_start;
   2839 	ifp->if_qflush = ixgbe_qflush;
   2840 #else
   2841 	ifp->if_start = ixgbe_start;
   2842 	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 2);
   2843 #endif
   2844 
   2845 	if_attach(ifp);
   2846 	ether_ifattach(ifp, adapter->hw.mac.addr);
   2847 	ether_set_ifflags_cb(ec, ixgbe_ifflags_cb);
   2848 
   2849 	adapter->max_frame_size =
   2850 	    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
   2851 
   2852 	/*
   2853 	 * Tell the upper layer(s) we support long frames.
   2854 	 */
   2855 	ifp->if_hdrlen = sizeof(struct ether_vlan_header);
   2856 
   2857 	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSOv4 | IFCAP_TSOv6;
   2858 	ifp->if_capenable = 0;
   2859 
   2860 	ec->ec_capabilities |= ETHERCAP_VLAN_HWCSUM;
   2861 	ec->ec_capabilities |= ETHERCAP_JUMBO_MTU;
   2862 	ifp->if_capabilities |= IFCAP_LRO;
   2863 	ec->ec_capabilities |= ETHERCAP_VLAN_HWTAGGING
   2864 	    		    | ETHERCAP_VLAN_MTU;
   2865 	ec->ec_capenable = ec->ec_capabilities;
   2866 
   2867 	/*
   2868 	** Don't turn this on by default, if vlans are
   2869 	** created on another pseudo device (eg. lagg)
   2870 	** then vlan events are not passed thru, breaking
   2871 	** operation, but with HW FILTER off it works. If
   2872 	** using vlans directly on the ixgbe driver you can
   2873 	** enable this and get full hardware tag filtering.
   2874 	*/
   2875 	ec->ec_capabilities |= ETHERCAP_VLAN_HWFILTER;
   2876 
   2877 	/*
   2878 	 * Specify the media types supported by this adapter and register
   2879 	 * callbacks to update media and link information
   2880 	 */
   2881 	ifmedia_init(&adapter->media, IFM_IMASK, ixgbe_media_change,
   2882 		     ixgbe_media_status);
   2883 	ifmedia_add(&adapter->media, IFM_ETHER | adapter->optics, 0, NULL);
   2884 	ifmedia_set(&adapter->media, IFM_ETHER | adapter->optics);
   2885 	if (hw->device_id == IXGBE_DEV_ID_82598AT) {
   2886 		ifmedia_add(&adapter->media,
   2887 		    IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
   2888 		ifmedia_add(&adapter->media,
   2889 		    IFM_ETHER | IFM_1000_T, 0, NULL);
   2890 	}
   2891 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
   2892 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
   2893 
   2894 	return (0);
   2895 }
   2896 
   2897 static void
   2898 ixgbe_config_link(struct adapter *adapter)
   2899 {
   2900 	struct ixgbe_hw *hw = &adapter->hw;
   2901 	u32	autoneg, err = 0;
   2902 	bool	sfp, negotiate;
   2903 
   2904 	sfp = ixgbe_is_sfp(hw);
   2905 
   2906 	if (sfp) {
   2907 		void *ip;
   2908 
   2909 		if (hw->phy.multispeed_fiber) {
   2910 			hw->mac.ops.setup_sfp(hw);
   2911 			ixgbe_enable_tx_laser(hw);
   2912 			ip = adapter->msf_si;
   2913 		} else {
   2914 			ip = adapter->mod_si;
   2915 		}
   2916 
   2917 		kpreempt_disable();
   2918 		softint_schedule(ip);
   2919 		kpreempt_enable();
   2920 	} else {
   2921 		if (hw->mac.ops.check_link)
   2922 			err = ixgbe_check_link(hw, &adapter->link_speed,
   2923 			    &adapter->link_up, FALSE);
   2924 		if (err)
   2925 			goto out;
   2926 		autoneg = hw->phy.autoneg_advertised;
   2927 		if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
   2928                 	err  = hw->mac.ops.get_link_capabilities(hw,
   2929 			    &autoneg, &negotiate);
   2930 		else
   2931 			negotiate = 0;
   2932 		if (err)
   2933 			goto out;
   2934 		if (hw->mac.ops.setup_link)
   2935                 	err = hw->mac.ops.setup_link(hw,
   2936 			    autoneg, adapter->link_up);
   2937 	}
   2938 out:
   2939 	return;
   2940 }
   2941 
   2942 /********************************************************************
   2943  * Manage DMA'able memory.
   2944  *******************************************************************/
   2945 
   2946 static int
   2947 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
   2948 		struct ixgbe_dma_alloc *dma, const int mapflags)
   2949 {
   2950 	device_t dev = adapter->dev;
   2951 	int             r, rsegs;
   2952 
   2953 	r = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
   2954 			       DBA_ALIGN, 0,	/* alignment, bounds */
   2955 			       size,	/* maxsize */
   2956 			       1,	/* nsegments */
   2957 			       size,	/* maxsegsize */
   2958 			       BUS_DMA_ALLOCNOW,	/* flags */
   2959 			       &dma->dma_tag);
   2960 	if (r != 0) {
   2961 		aprint_error_dev(dev,
   2962 		    "%s: ixgbe_dma_tag_create failed; error %d\n", __func__, r);
   2963 		goto fail_0;
   2964 	}
   2965 
   2966 	r = bus_dmamem_alloc(dma->dma_tag->dt_dmat,
   2967 		size,
   2968 		dma->dma_tag->dt_alignment,
   2969 		dma->dma_tag->dt_boundary,
   2970 		&dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
   2971 	if (r != 0) {
   2972 		aprint_error_dev(dev,
   2973 		    "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
   2974 		goto fail_1;
   2975 	}
   2976 
   2977 	r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
   2978 	    size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
   2979 	if (r != 0) {
   2980 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2981 		    __func__, r);
   2982 		goto fail_2;
   2983 	}
   2984 
   2985 	r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
   2986 	if (r != 0) {
   2987 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2988 		    __func__, r);
   2989 		goto fail_3;
   2990 	}
   2991 
   2992 	r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map, dma->dma_vaddr,
   2993 			    size,
   2994 			    NULL,
   2995 			    mapflags | BUS_DMA_NOWAIT);
   2996 	if (r != 0) {
   2997 		aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
   2998 		    __func__, r);
   2999 		goto fail_4;
   3000 	}
   3001 	dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
   3002 	dma->dma_size = size;
   3003 	return 0;
   3004 fail_4:
   3005 	ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
   3006 fail_3:
   3007 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
   3008 fail_2:
   3009 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
   3010 fail_1:
   3011 	ixgbe_dma_tag_destroy(dma->dma_tag);
   3012 fail_0:
   3013 	return r;
   3014 }
   3015 
   3016 static void
   3017 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
   3018 {
   3019 	bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
   3020 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   3021 	ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
   3022 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
   3023 	ixgbe_dma_tag_destroy(dma->dma_tag);
   3024 }
   3025 
   3026 
   3027 /*********************************************************************
   3028  *
   3029  *  Allocate memory for the transmit and receive rings, and then
   3030  *  the descriptors associated with each, called only once at attach.
   3031  *
   3032  **********************************************************************/
   3033 static int
   3034 ixgbe_allocate_queues(struct adapter *adapter)
   3035 {
   3036 	device_t	dev = adapter->dev;
   3037 	struct ix_queue	*que;
   3038 	struct tx_ring	*txr;
   3039 	struct rx_ring	*rxr;
   3040 	int rsize, tsize, error = IXGBE_SUCCESS;
   3041 	int txconf = 0, rxconf = 0;
   3042 
   3043         /* First allocate the top level queue structs */
   3044         if (!(adapter->queues =
   3045             (struct ix_queue *) malloc(sizeof(struct ix_queue) *
   3046             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   3047                 aprint_error_dev(dev, "Unable to allocate queue memory\n");
   3048                 error = ENOMEM;
   3049                 goto fail;
   3050         }
   3051 
   3052 	/* First allocate the TX ring struct memory */
   3053 	if (!(adapter->tx_rings =
   3054 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
   3055 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   3056 		aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
   3057 		error = ENOMEM;
   3058 		goto tx_fail;
   3059 	}
   3060 
   3061 	/* Next allocate the RX */
   3062 	if (!(adapter->rx_rings =
   3063 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
   3064 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   3065 		aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
   3066 		error = ENOMEM;
   3067 		goto rx_fail;
   3068 	}
   3069 
   3070 	/* For the ring itself */
   3071 	tsize = roundup2(adapter->num_tx_desc *
   3072 	    sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
   3073 
   3074 	/*
   3075 	 * Now set up the TX queues, txconf is needed to handle the
   3076 	 * possibility that things fail midcourse and we need to
   3077 	 * undo memory gracefully
   3078 	 */
   3079 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
   3080 		/* Set up some basics */
   3081 		txr = &adapter->tx_rings[i];
   3082 		txr->adapter = adapter;
   3083 		txr->me = i;
   3084 		txr->num_desc = adapter->num_tx_desc;
   3085 
   3086 		/* Initialize the TX side lock */
   3087 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
   3088 		    device_xname(dev), txr->me);
   3089 		mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
   3090 
   3091 		if (ixgbe_dma_malloc(adapter, tsize,
   3092 			&txr->txdma, BUS_DMA_NOWAIT)) {
   3093 			aprint_error_dev(dev,
   3094 			    "Unable to allocate TX Descriptor memory\n");
   3095 			error = ENOMEM;
   3096 			goto err_tx_desc;
   3097 		}
   3098 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
   3099 		bzero((void *)txr->tx_base, tsize);
   3100 
   3101         	/* Now allocate transmit buffers for the ring */
   3102         	if (ixgbe_allocate_transmit_buffers(txr)) {
   3103 			aprint_error_dev(dev,
   3104 			    "Critical Failure setting up transmit buffers\n");
   3105 			error = ENOMEM;
   3106 			goto err_tx_desc;
   3107         	}
   3108 #ifndef IXGBE_LEGACY_TX
   3109 		/* Allocate a buf ring */
   3110 		txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
   3111 		    M_WAITOK, &txr->tx_mtx);
   3112 		if (txr->br == NULL) {
   3113 			aprint_error_dev(dev,
   3114 			    "Critical Failure setting up buf ring\n");
   3115 			error = ENOMEM;
   3116 			goto err_tx_desc;
   3117         	}
   3118 #endif
   3119 	}
   3120 
   3121 	/*
   3122 	 * Next the RX queues...
   3123 	 */
   3124 	rsize = roundup2(adapter->num_rx_desc *
   3125 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   3126 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
   3127 		rxr = &adapter->rx_rings[i];
   3128 		/* Set up some basics */
   3129 		rxr->adapter = adapter;
   3130 		rxr->me = i;
   3131 		rxr->num_desc = adapter->num_rx_desc;
   3132 
   3133 		/* Initialize the RX side lock */
   3134 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
   3135 		    device_xname(dev), rxr->me);
   3136 		mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
   3137 
   3138 		if (ixgbe_dma_malloc(adapter, rsize,
   3139 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
   3140 			aprint_error_dev(dev,
   3141 			    "Unable to allocate RxDescriptor memory\n");
   3142 			error = ENOMEM;
   3143 			goto err_rx_desc;
   3144 		}
   3145 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
   3146 		bzero((void *)rxr->rx_base, rsize);
   3147 
   3148         	/* Allocate receive buffers for the ring*/
   3149 		if (ixgbe_allocate_receive_buffers(rxr)) {
   3150 			aprint_error_dev(dev,
   3151 			    "Critical Failure setting up receive buffers\n");
   3152 			error = ENOMEM;
   3153 			goto err_rx_desc;
   3154 		}
   3155 	}
   3156 
   3157 	/*
   3158 	** Finally set up the queue holding structs
   3159 	*/
   3160 	for (int i = 0; i < adapter->num_queues; i++) {
   3161 		que = &adapter->queues[i];
   3162 		que->adapter = adapter;
   3163 		que->txr = &adapter->tx_rings[i];
   3164 		que->rxr = &adapter->rx_rings[i];
   3165 	}
   3166 
   3167 	return (0);
   3168 
   3169 err_rx_desc:
   3170 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
   3171 		ixgbe_dma_free(adapter, &rxr->rxdma);
   3172 err_tx_desc:
   3173 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
   3174 		ixgbe_dma_free(adapter, &txr->txdma);
   3175 	free(adapter->rx_rings, M_DEVBUF);
   3176 rx_fail:
   3177 	free(adapter->tx_rings, M_DEVBUF);
   3178 tx_fail:
   3179 	free(adapter->queues, M_DEVBUF);
   3180 fail:
   3181 	return (error);
   3182 }
   3183 
   3184 /*********************************************************************
   3185  *
   3186  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
   3187  *  the information needed to transmit a packet on the wire. This is
   3188  *  called only once at attach, setup is done every reset.
   3189  *
   3190  **********************************************************************/
   3191 static int
   3192 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
   3193 {
   3194 	struct adapter *adapter = txr->adapter;
   3195 	device_t dev = adapter->dev;
   3196 	struct ixgbe_tx_buf *txbuf;
   3197 	int error, i;
   3198 
   3199 	/*
   3200 	 * Setup DMA descriptor areas.
   3201 	 */
   3202 	if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
   3203 			       1, 0,		/* alignment, bounds */
   3204 			       IXGBE_TSO_SIZE,		/* maxsize */
   3205 			       adapter->num_segs,	/* nsegments */
   3206 			       PAGE_SIZE,		/* maxsegsize */
   3207 			       0,			/* flags */
   3208 			       &txr->txtag))) {
   3209 		aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
   3210 		goto fail;
   3211 	}
   3212 
   3213 	if (!(txr->tx_buffers =
   3214 	    (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
   3215 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   3216 		aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
   3217 		error = ENOMEM;
   3218 		goto fail;
   3219 	}
   3220 
   3221         /* Create the descriptor buffer dma maps */
   3222 	txbuf = txr->tx_buffers;
   3223 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
   3224 		error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
   3225 		if (error != 0) {
   3226 			aprint_error_dev(dev,
   3227 			    "Unable to create TX DMA map (%d)\n", error);
   3228 			goto fail;
   3229 		}
   3230 	}
   3231 
   3232 	return 0;
   3233 fail:
   3234 	/* We free all, it handles case where we are in the middle */
   3235 	ixgbe_free_transmit_structures(adapter);
   3236 	return (error);
   3237 }
   3238 
   3239 /*********************************************************************
   3240  *
   3241  *  Initialize a transmit ring.
   3242  *
   3243  **********************************************************************/
   3244 static void
   3245 ixgbe_setup_transmit_ring(struct tx_ring *txr)
   3246 {
   3247 	struct adapter *adapter = txr->adapter;
   3248 	struct ixgbe_tx_buf *txbuf;
   3249 	int i;
   3250 #ifdef DEV_NETMAP
   3251 	struct netmap_adapter *na = NA(adapter->ifp);
   3252 	struct netmap_slot *slot;
   3253 #endif /* DEV_NETMAP */
   3254 
   3255 	/* Clear the old ring contents */
   3256 	IXGBE_TX_LOCK(txr);
   3257 #ifdef DEV_NETMAP
   3258 	/*
   3259 	 * (under lock): if in netmap mode, do some consistency
   3260 	 * checks and set slot to entry 0 of the netmap ring.
   3261 	 */
   3262 	slot = netmap_reset(na, NR_TX, txr->me, 0);
   3263 #endif /* DEV_NETMAP */
   3264 	bzero((void *)txr->tx_base,
   3265 	      (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
   3266 	/* Reset indices */
   3267 	txr->next_avail_desc = 0;
   3268 	txr->next_to_clean = 0;
   3269 
   3270 	/* Free any existing tx buffers. */
   3271         txbuf = txr->tx_buffers;
   3272 	for (i = 0; i < txr->num_desc; i++, txbuf++) {
   3273 		if (txbuf->m_head != NULL) {
   3274 			bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
   3275 			    0, txbuf->m_head->m_pkthdr.len,
   3276 			    BUS_DMASYNC_POSTWRITE);
   3277 			ixgbe_dmamap_unload(txr->txtag, txbuf->map);
   3278 			m_freem(txbuf->m_head);
   3279 			txbuf->m_head = NULL;
   3280 		}
   3281 #ifdef DEV_NETMAP
   3282 		/*
   3283 		 * In netmap mode, set the map for the packet buffer.
   3284 		 * NOTE: Some drivers (not this one) also need to set
   3285 		 * the physical buffer address in the NIC ring.
   3286 		 * Slots in the netmap ring (indexed by "si") are
   3287 		 * kring->nkr_hwofs positions "ahead" wrt the
   3288 		 * corresponding slot in the NIC ring. In some drivers
   3289 		 * (not here) nkr_hwofs can be negative. Function
   3290 		 * netmap_idx_n2k() handles wraparounds properly.
   3291 		 */
   3292 		if (slot) {
   3293 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
   3294 			netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
   3295 		}
   3296 #endif /* DEV_NETMAP */
   3297 		/* Clear the EOP descriptor pointer */
   3298 		txbuf->eop = NULL;
   3299         }
   3300 
   3301 #ifdef IXGBE_FDIR
   3302 	/* Set the rate at which we sample packets */
   3303 	if (adapter->hw.mac.type != ixgbe_mac_82598EB)
   3304 		txr->atr_sample = atr_sample_rate;
   3305 #endif
   3306 
   3307 	/* Set number of descriptors available */
   3308 	txr->tx_avail = adapter->num_tx_desc;
   3309 
   3310 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   3311 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   3312 	IXGBE_TX_UNLOCK(txr);
   3313 }
   3314 
   3315 /*********************************************************************
   3316  *
   3317  *  Initialize all transmit rings.
   3318  *
   3319  **********************************************************************/
   3320 static int
   3321 ixgbe_setup_transmit_structures(struct adapter *adapter)
   3322 {
   3323 	struct tx_ring *txr = adapter->tx_rings;
   3324 
   3325 	for (int i = 0; i < adapter->num_queues; i++, txr++)
   3326 		ixgbe_setup_transmit_ring(txr);
   3327 
   3328 	return (0);
   3329 }
   3330 
   3331 /*********************************************************************
   3332  *
   3333  *  Enable transmit unit.
   3334  *
   3335  **********************************************************************/
   3336 static void
   3337 ixgbe_initialize_transmit_units(struct adapter *adapter)
   3338 {
   3339 	struct tx_ring	*txr = adapter->tx_rings;
   3340 	struct ixgbe_hw	*hw = &adapter->hw;
   3341 
   3342 	/* Setup the Base and Length of the Tx Descriptor Ring */
   3343 
   3344 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
   3345 		u64	tdba = txr->txdma.dma_paddr;
   3346 		u32	txctrl;
   3347 
   3348 		IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i),
   3349 		       (tdba & 0x00000000ffffffffULL));
   3350 		IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (tdba >> 32));
   3351 		IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
   3352 		    adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc));
   3353 
   3354 		/* Setup the HW Tx Head and Tail descriptor pointers */
   3355 		IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
   3356 		IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
   3357 
   3358 		/* Setup Transmit Descriptor Cmd Settings */
   3359 		txr->txd_cmd = IXGBE_TXD_CMD_IFCS;
   3360 		txr->queue_status = IXGBE_QUEUE_IDLE;
   3361 
   3362 		/* Set the processing limit */
   3363 		txr->process_limit = ixgbe_tx_process_limit;
   3364 
   3365 		/* Disable Head Writeback */
   3366 		switch (hw->mac.type) {
   3367 		case ixgbe_mac_82598EB:
   3368 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
   3369 			break;
   3370 		case ixgbe_mac_82599EB:
   3371 		case ixgbe_mac_X540:
   3372 		default:
   3373 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
   3374 			break;
   3375                 }
   3376 		txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
   3377 		switch (hw->mac.type) {
   3378 		case ixgbe_mac_82598EB:
   3379 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
   3380 			break;
   3381 		case ixgbe_mac_82599EB:
   3382 		case ixgbe_mac_X540:
   3383 		default:
   3384 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
   3385 			break;
   3386 		}
   3387 
   3388 	}
   3389 
   3390 	if (hw->mac.type != ixgbe_mac_82598EB) {
   3391 		u32 dmatxctl, rttdcs;
   3392 		dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
   3393 		dmatxctl |= IXGBE_DMATXCTL_TE;
   3394 		IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
   3395 		/* Disable arbiter to set MTQC */
   3396 		rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
   3397 		rttdcs |= IXGBE_RTTDCS_ARBDIS;
   3398 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
   3399 		IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
   3400 		rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
   3401 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
   3402 	}
   3403 
   3404 	return;
   3405 }
   3406 
   3407 /*********************************************************************
   3408  *
   3409  *  Free all transmit rings.
   3410  *
   3411  **********************************************************************/
   3412 static void
   3413 ixgbe_free_transmit_structures(struct adapter *adapter)
   3414 {
   3415 	struct tx_ring *txr = adapter->tx_rings;
   3416 
   3417 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
   3418 		ixgbe_free_transmit_buffers(txr);
   3419 		ixgbe_dma_free(adapter, &txr->txdma);
   3420 		IXGBE_TX_LOCK_DESTROY(txr);
   3421 	}
   3422 	free(adapter->tx_rings, M_DEVBUF);
   3423 }
   3424 
   3425 /*********************************************************************
   3426  *
   3427  *  Free transmit ring related data structures.
   3428  *
   3429  **********************************************************************/
   3430 static void
   3431 ixgbe_free_transmit_buffers(struct tx_ring *txr)
   3432 {
   3433 	struct adapter *adapter = txr->adapter;
   3434 	struct ixgbe_tx_buf *tx_buffer;
   3435 	int             i;
   3436 
   3437 	INIT_DEBUGOUT("free_transmit_ring: begin");
   3438 
   3439 	if (txr->tx_buffers == NULL)
   3440 		return;
   3441 
   3442 	tx_buffer = txr->tx_buffers;
   3443 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
   3444 		if (tx_buffer->m_head != NULL) {
   3445 			bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
   3446 			    0, tx_buffer->m_head->m_pkthdr.len,
   3447 			    BUS_DMASYNC_POSTWRITE);
   3448 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
   3449 			m_freem(tx_buffer->m_head);
   3450 			tx_buffer->m_head = NULL;
   3451 			if (tx_buffer->map != NULL) {
   3452 				ixgbe_dmamap_destroy(txr->txtag,
   3453 				    tx_buffer->map);
   3454 				tx_buffer->map = NULL;
   3455 			}
   3456 		} else if (tx_buffer->map != NULL) {
   3457 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
   3458 			ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
   3459 			tx_buffer->map = NULL;
   3460 		}
   3461 	}
   3462 #ifndef IXGBE_LEGACY_TX
   3463 	if (txr->br != NULL)
   3464 		buf_ring_free(txr->br, M_DEVBUF);
   3465 #endif
   3466 	if (txr->tx_buffers != NULL) {
   3467 		free(txr->tx_buffers, M_DEVBUF);
   3468 		txr->tx_buffers = NULL;
   3469 	}
   3470 	if (txr->txtag != NULL) {
   3471 		ixgbe_dma_tag_destroy(txr->txtag);
   3472 		txr->txtag = NULL;
   3473 	}
   3474 	return;
   3475 }
   3476 
   3477 /*********************************************************************
   3478  *
   3479  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
   3480  *
   3481  **********************************************************************/
   3482 
   3483 static int
   3484 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
   3485     u32 *cmd_type_len, u32 *olinfo_status)
   3486 {
   3487 	struct m_tag *mtag;
   3488 	struct adapter *adapter = txr->adapter;
   3489 	struct ethercom *ec = &adapter->osdep.ec;
   3490 	struct ixgbe_adv_tx_context_desc *TXD;
   3491 	struct ether_vlan_header *eh;
   3492 	struct ip ip;
   3493 	struct ip6_hdr ip6;
   3494 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
   3495 	int	ehdrlen, ip_hlen = 0;
   3496 	u16	etype;
   3497 	u8	ipproto __diagused = 0;
   3498 	int	offload = TRUE;
   3499 	int	ctxd = txr->next_avail_desc;
   3500 	u16	vtag = 0;
   3501 
   3502 	/* First check if TSO is to be used */
   3503 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4|M_CSUM_TSOv6))
   3504 		return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
   3505 
   3506 	if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
   3507 		offload = FALSE;
   3508 
   3509 	/* Indicate the whole packet as payload when not doing TSO */
   3510        	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
   3511 
   3512 	/* Now ready a context descriptor */
   3513 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
   3514 
   3515 	/*
   3516 	** In advanced descriptors the vlan tag must
   3517 	** be placed into the context descriptor. Hence
   3518 	** we need to make one even if not doing offloads.
   3519 	*/
   3520 	if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
   3521 		vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
   3522 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
   3523 	} else if (offload == FALSE) /* ... no offload to do */
   3524 		return 0;
   3525 
   3526 	/*
   3527 	 * Determine where frame payload starts.
   3528 	 * Jump over vlan headers if already present,
   3529 	 * helpful for QinQ too.
   3530 	 */
   3531 	KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
   3532 	eh = mtod(mp, struct ether_vlan_header *);
   3533 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   3534 		KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
   3535 		etype = ntohs(eh->evl_proto);
   3536 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   3537 	} else {
   3538 		etype = ntohs(eh->evl_encap_proto);
   3539 		ehdrlen = ETHER_HDR_LEN;
   3540 	}
   3541 
   3542 	/* Set the ether header length */
   3543 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
   3544 
   3545 	switch (etype) {
   3546 	case ETHERTYPE_IP:
   3547 		m_copydata(mp, ehdrlen, sizeof(ip), &ip);
   3548 		ip_hlen = ip.ip_hl << 2;
   3549 		ipproto = ip.ip_p;
   3550 #if 0
   3551 		ip.ip_sum = 0;
   3552 		m_copyback(mp, ehdrlen, sizeof(ip), &ip);
   3553 #else
   3554 		KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
   3555 		    ip.ip_sum == 0);
   3556 #endif
   3557 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
   3558 		break;
   3559 	case ETHERTYPE_IPV6:
   3560 		m_copydata(mp, ehdrlen, sizeof(ip6), &ip6);
   3561 		ip_hlen = sizeof(ip6);
   3562 		/* XXX-BZ this will go badly in case of ext hdrs. */
   3563 		ipproto = ip6.ip6_nxt;
   3564 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
   3565 		break;
   3566 	default:
   3567 		break;
   3568 	}
   3569 
   3570 	if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
   3571 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
   3572 
   3573 	vlan_macip_lens |= ip_hlen;
   3574 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
   3575 
   3576 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_TCPv6)) {
   3577 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
   3578 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   3579 		KASSERT(ipproto == IPPROTO_TCP);
   3580 	} else if (mp->m_pkthdr.csum_flags & (M_CSUM_UDPv4|M_CSUM_UDPv6)) {
   3581 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
   3582 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   3583 		KASSERT(ipproto == IPPROTO_UDP);
   3584 	}
   3585 
   3586 	/* Now copy bits into descriptor */
   3587 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
   3588 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
   3589 	TXD->seqnum_seed = htole32(0);
   3590 	TXD->mss_l4len_idx = htole32(0);
   3591 
   3592 	/* We've consumed the first desc, adjust counters */
   3593 	if (++ctxd == txr->num_desc)
   3594 		ctxd = 0;
   3595 	txr->next_avail_desc = ctxd;
   3596 	--txr->tx_avail;
   3597 
   3598         return 0;
   3599 }
   3600 
   3601 /**********************************************************************
   3602  *
   3603  *  Setup work for hardware segmentation offload (TSO) on
   3604  *  adapters using advanced tx descriptors
   3605  *
   3606  **********************************************************************/
   3607 static int
   3608 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
   3609     u32 *cmd_type_len, u32 *olinfo_status)
   3610 {
   3611 	struct m_tag *mtag;
   3612 	struct adapter *adapter = txr->adapter;
   3613 	struct ethercom *ec = &adapter->osdep.ec;
   3614 	struct ixgbe_adv_tx_context_desc *TXD;
   3615 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
   3616 	u32 mss_l4len_idx = 0, paylen;
   3617 	u16 vtag = 0, eh_type;
   3618 	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
   3619 	struct ether_vlan_header *eh;
   3620 #ifdef INET6
   3621 	struct ip6_hdr *ip6;
   3622 #endif
   3623 #ifdef INET
   3624 	struct ip *ip;
   3625 #endif
   3626 	struct tcphdr *th;
   3627 
   3628 
   3629 	/*
   3630 	 * Determine where frame payload starts.
   3631 	 * Jump over vlan headers if already present
   3632 	 */
   3633 	eh = mtod(mp, struct ether_vlan_header *);
   3634 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   3635 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   3636 		eh_type = eh->evl_proto;
   3637 	} else {
   3638 		ehdrlen = ETHER_HDR_LEN;
   3639 		eh_type = eh->evl_encap_proto;
   3640 	}
   3641 
   3642 	switch (ntohs(eh_type)) {
   3643 #ifdef INET6
   3644 	case ETHERTYPE_IPV6:
   3645 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   3646 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
   3647 		if (ip6->ip6_nxt != IPPROTO_TCP)
   3648 			return (ENXIO);
   3649 		ip_hlen = sizeof(struct ip6_hdr);
   3650 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   3651 		th = (struct tcphdr *)((char *)ip6 + ip_hlen);
   3652 		th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
   3653 		    &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
   3654 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
   3655 		break;
   3656 #endif
   3657 #ifdef INET
   3658 	case ETHERTYPE_IP:
   3659 		ip = (struct ip *)(mp->m_data + ehdrlen);
   3660 		if (ip->ip_p != IPPROTO_TCP)
   3661 			return (ENXIO);
   3662 		ip->ip_sum = 0;
   3663 		ip_hlen = ip->ip_hl << 2;
   3664 		th = (struct tcphdr *)((char *)ip + ip_hlen);
   3665 		th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
   3666 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
   3667 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
   3668 		/* Tell transmit desc to also do IPv4 checksum. */
   3669 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
   3670 		break;
   3671 #endif
   3672 	default:
   3673 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
   3674 		    __func__, ntohs(eh_type));
   3675 		break;
   3676 	}
   3677 
   3678 	ctxd = txr->next_avail_desc;
   3679 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
   3680 
   3681 	tcp_hlen = th->th_off << 2;
   3682 
   3683 	/* This is used in the transmit desc in encap */
   3684 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
   3685 
   3686 	/* VLAN MACLEN IPLEN */
   3687 	if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
   3688 		vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
   3689                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
   3690 	}
   3691 
   3692 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
   3693 	vlan_macip_lens |= ip_hlen;
   3694 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
   3695 
   3696 	/* ADV DTYPE TUCMD */
   3697 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
   3698 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
   3699 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
   3700 
   3701 	/* MSS L4LEN IDX */
   3702 	mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
   3703 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
   3704 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
   3705 
   3706 	TXD->seqnum_seed = htole32(0);
   3707 
   3708 	if (++ctxd == txr->num_desc)
   3709 		ctxd = 0;
   3710 
   3711 	txr->tx_avail--;
   3712 	txr->next_avail_desc = ctxd;
   3713 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
   3714 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   3715 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
   3716 	++txr->tso_tx.ev_count;
   3717 	return (0);
   3718 }
   3719 
   3720 #ifdef IXGBE_FDIR
   3721 /*
   3722 ** This routine parses packet headers so that Flow
   3723 ** Director can make a hashed filter table entry
   3724 ** allowing traffic flows to be identified and kept
   3725 ** on the same cpu.  This would be a performance
   3726 ** hit, but we only do it at IXGBE_FDIR_RATE of
   3727 ** packets.
   3728 */
   3729 static void
   3730 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
   3731 {
   3732 	struct adapter			*adapter = txr->adapter;
   3733 	struct ix_queue			*que;
   3734 	struct ip			*ip;
   3735 	struct tcphdr			*th;
   3736 	struct udphdr			*uh;
   3737 	struct ether_vlan_header	*eh;
   3738 	union ixgbe_atr_hash_dword	input = {.dword = 0};
   3739 	union ixgbe_atr_hash_dword	common = {.dword = 0};
   3740 	int  				ehdrlen, ip_hlen;
   3741 	u16				etype;
   3742 
   3743 	eh = mtod(mp, struct ether_vlan_header *);
   3744 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   3745 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   3746 		etype = eh->evl_proto;
   3747 	} else {
   3748 		ehdrlen = ETHER_HDR_LEN;
   3749 		etype = eh->evl_encap_proto;
   3750 	}
   3751 
   3752 	/* Only handling IPv4 */
   3753 	if (etype != htons(ETHERTYPE_IP))
   3754 		return;
   3755 
   3756 	ip = (struct ip *)(mp->m_data + ehdrlen);
   3757 	ip_hlen = ip->ip_hl << 2;
   3758 
   3759 	/* check if we're UDP or TCP */
   3760 	switch (ip->ip_p) {
   3761 	case IPPROTO_TCP:
   3762 		th = (struct tcphdr *)((char *)ip + ip_hlen);
   3763 		/* src and dst are inverted */
   3764 		common.port.dst ^= th->th_sport;
   3765 		common.port.src ^= th->th_dport;
   3766 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
   3767 		break;
   3768 	case IPPROTO_UDP:
   3769 		uh = (struct udphdr *)((char *)ip + ip_hlen);
   3770 		/* src and dst are inverted */
   3771 		common.port.dst ^= uh->uh_sport;
   3772 		common.port.src ^= uh->uh_dport;
   3773 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
   3774 		break;
   3775 	default:
   3776 		return;
   3777 	}
   3778 
   3779 	input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
   3780 	if (mp->m_pkthdr.ether_vtag)
   3781 		common.flex_bytes ^= htons(ETHERTYPE_VLAN);
   3782 	else
   3783 		common.flex_bytes ^= etype;
   3784 	common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
   3785 
   3786 	que = &adapter->queues[txr->me];
   3787 	/*
   3788 	** This assumes the Rx queue and Tx
   3789 	** queue are bound to the same CPU
   3790 	*/
   3791 	ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
   3792 	    input, common, que->msix);
   3793 }
   3794 #endif /* IXGBE_FDIR */
   3795 
   3796 /**********************************************************************
   3797  *
   3798  *  Examine each tx_buffer in the used queue. If the hardware is done
   3799  *  processing the packet then free associated resources. The
   3800  *  tx_buffer is put back on the free queue.
   3801  *
   3802  **********************************************************************/
   3803 static bool
   3804 ixgbe_txeof(struct tx_ring *txr)
   3805 {
   3806 	struct adapter		*adapter = txr->adapter;
   3807 	struct ifnet		*ifp = adapter->ifp;
   3808 	u32			work, processed = 0;
   3809 	u16			limit = txr->process_limit;
   3810 	struct ixgbe_tx_buf	*buf;
   3811 	union ixgbe_adv_tx_desc *txd;
   3812 	struct timeval now, elapsed;
   3813 
   3814 	KASSERT(mutex_owned(&txr->tx_mtx));
   3815 
   3816 #ifdef DEV_NETMAP
   3817 	if (ifp->if_capenable & IFCAP_NETMAP) {
   3818 		struct netmap_adapter *na = NA(ifp);
   3819 		struct netmap_kring *kring = &na->tx_rings[txr->me];
   3820 		txd = txr->tx_base;
   3821 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   3822 		    BUS_DMASYNC_POSTREAD);
   3823 		/*
   3824 		 * In netmap mode, all the work is done in the context
   3825 		 * of the client thread. Interrupt handlers only wake up
   3826 		 * clients, which may be sleeping on individual rings
   3827 		 * or on a global resource for all rings.
   3828 		 * To implement tx interrupt mitigation, we wake up the client
   3829 		 * thread roughly every half ring, even if the NIC interrupts
   3830 		 * more frequently. This is implemented as follows:
   3831 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
   3832 		 *   the slot that should wake up the thread (nkr_num_slots
   3833 		 *   means the user thread should not be woken up);
   3834 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
   3835 		 *   or the slot has the DD bit set.
   3836 		 *
   3837 		 * When the driver has separate locks, we need to
   3838 		 * release and re-acquire txlock to avoid deadlocks.
   3839 		 * XXX see if we can find a better way.
   3840 		 */
   3841 		if (!netmap_mitigate ||
   3842 		    (kring->nr_kflags < kring->nkr_num_slots &&
   3843 		    txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
   3844 			netmap_tx_irq(ifp, txr->me |
   3845 			    (NETMAP_LOCKED_ENTER|NETMAP_LOCKED_EXIT));
   3846 		}
   3847 		return FALSE;
   3848 	}
   3849 #endif /* DEV_NETMAP */
   3850 
   3851 	if (txr->tx_avail == txr->num_desc) {
   3852 		txr->queue_status = IXGBE_QUEUE_IDLE;
   3853 		return false;
   3854 	}
   3855 
   3856 	/* Get work starting point */
   3857 	work = txr->next_to_clean;
   3858 	buf = &txr->tx_buffers[work];
   3859 	txd = &txr->tx_base[work];
   3860 	work -= txr->num_desc; /* The distance to ring end */
   3861         ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   3862 	    BUS_DMASYNC_POSTREAD);
   3863 	do {
   3864 		union ixgbe_adv_tx_desc *eop= buf->eop;
   3865 		if (eop == NULL) /* No work */
   3866 			break;
   3867 
   3868 		if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
   3869 			break;	/* I/O not complete */
   3870 
   3871 		if (buf->m_head) {
   3872 			txr->bytes +=
   3873 			    buf->m_head->m_pkthdr.len;
   3874 			bus_dmamap_sync(txr->txtag->dt_dmat,
   3875 			    buf->map,
   3876 			    0, buf->m_head->m_pkthdr.len,
   3877 			    BUS_DMASYNC_POSTWRITE);
   3878 			ixgbe_dmamap_unload(txr->txtag,
   3879 			    buf->map);
   3880 			m_freem(buf->m_head);
   3881 			buf->m_head = NULL;
   3882 			/*
   3883 			 * NetBSD: Don't override buf->map with NULL here.
   3884 			 * It'll panic when a ring runs one lap around.
   3885 			 */
   3886 		}
   3887 		buf->eop = NULL;
   3888 		++txr->tx_avail;
   3889 
   3890 		/* We clean the range if multi segment */
   3891 		while (txd != eop) {
   3892 			++txd;
   3893 			++buf;
   3894 			++work;
   3895 			/* wrap the ring? */
   3896 			if (__predict_false(!work)) {
   3897 				work -= txr->num_desc;
   3898 				buf = txr->tx_buffers;
   3899 				txd = txr->tx_base;
   3900 			}
   3901 			if (buf->m_head) {
   3902 				txr->bytes +=
   3903 				    buf->m_head->m_pkthdr.len;
   3904 				bus_dmamap_sync(txr->txtag->dt_dmat,
   3905 				    buf->map,
   3906 				    0, buf->m_head->m_pkthdr.len,
   3907 				    BUS_DMASYNC_POSTWRITE);
   3908 				ixgbe_dmamap_unload(txr->txtag,
   3909 				    buf->map);
   3910 				m_freem(buf->m_head);
   3911 				buf->m_head = NULL;
   3912 				/*
   3913 				 * NetBSD: Don't override buf->map with NULL
   3914 				 * here. It'll panic when a ring runs one lap
   3915 				 * around.
   3916 				 */
   3917 			}
   3918 			++txr->tx_avail;
   3919 			buf->eop = NULL;
   3920 
   3921 		}
   3922 		++txr->packets;
   3923 		++processed;
   3924 		++ifp->if_opackets;
   3925 		getmicrotime(&txr->watchdog_time);
   3926 
   3927 		/* Try the next packet */
   3928 		++txd;
   3929 		++buf;
   3930 		++work;
   3931 		/* reset with a wrap */
   3932 		if (__predict_false(!work)) {
   3933 			work -= txr->num_desc;
   3934 			buf = txr->tx_buffers;
   3935 			txd = txr->tx_base;
   3936 		}
   3937 		prefetch(txd);
   3938 	} while (__predict_true(--limit));
   3939 
   3940 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   3941 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   3942 
   3943 	work += txr->num_desc;
   3944 	txr->next_to_clean = work;
   3945 
   3946 	/*
   3947 	** Watchdog calculation, we know there's
   3948 	** work outstanding or the first return
   3949 	** would have been taken, so none processed
   3950 	** for too long indicates a hang.
   3951 	*/
   3952 	getmicrotime(&now);
   3953 	timersub(&now, &txr->watchdog_time, &elapsed);
   3954 	if (!processed && tvtohz(&elapsed) > IXGBE_WATCHDOG)
   3955 		txr->queue_status = IXGBE_QUEUE_HUNG;
   3956 
   3957 	if (txr->tx_avail == txr->num_desc) {
   3958 		txr->queue_status = IXGBE_QUEUE_IDLE;
   3959 		return false;
   3960 	}
   3961 
   3962 	return true;
   3963 }
   3964 
   3965 /*********************************************************************
   3966  *
   3967  *  Refresh mbuf buffers for RX descriptor rings
   3968  *   - now keeps its own state so discards due to resource
   3969  *     exhaustion are unnecessary, if an mbuf cannot be obtained
   3970  *     it just returns, keeping its placeholder, thus it can simply
   3971  *     be recalled to try again.
   3972  *
   3973  **********************************************************************/
   3974 static void
   3975 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
   3976 {
   3977 	struct adapter		*adapter = rxr->adapter;
   3978 	struct ixgbe_rx_buf	*rxbuf;
   3979 	struct mbuf		*mp;
   3980 	int			i, j, error;
   3981 	bool			refreshed = false;
   3982 
   3983 	i = j = rxr->next_to_refresh;
   3984 	/* Control the loop with one beyond */
   3985 	if (++j == rxr->num_desc)
   3986 		j = 0;
   3987 
   3988 	while (j != limit) {
   3989 		rxbuf = &rxr->rx_buffers[i];
   3990 		if (rxbuf->buf == NULL) {
   3991 			mp = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
   3992 			    MT_DATA, M_PKTHDR, rxr->mbuf_sz);
   3993 			if (mp == NULL) {
   3994 				rxr->no_jmbuf.ev_count++;
   3995 				goto update;
   3996 			}
   3997 			if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
   3998 				m_adj(mp, ETHER_ALIGN);
   3999 		} else
   4000 			mp = rxbuf->buf;
   4001 
   4002 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   4003 		/* If we're dealing with an mbuf that was copied rather
   4004 		 * than replaced, there's no need to go through busdma.
   4005 		 */
   4006 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
   4007 			/* Get the memory mapping */
   4008 			error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   4009 			    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   4010 			if (error != 0) {
   4011 				printf("Refresh mbufs: payload dmamap load"
   4012 				    " failure - %d\n", error);
   4013 				m_free(mp);
   4014 				rxbuf->buf = NULL;
   4015 				goto update;
   4016 			}
   4017 			rxbuf->buf = mp;
   4018 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   4019 			    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
   4020 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
   4021 			    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   4022 		} else {
   4023 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
   4024 			rxbuf->flags &= ~IXGBE_RX_COPY;
   4025 		}
   4026 
   4027 		refreshed = true;
   4028 		/* Next is precalculated */
   4029 		i = j;
   4030 		rxr->next_to_refresh = i;
   4031 		if (++j == rxr->num_desc)
   4032 			j = 0;
   4033 	}
   4034 update:
   4035 	if (refreshed) /* Update hardware tail index */
   4036 		IXGBE_WRITE_REG(&adapter->hw,
   4037 		    IXGBE_RDT(rxr->me), rxr->next_to_refresh);
   4038 	return;
   4039 }
   4040 
   4041 /*********************************************************************
   4042  *
   4043  *  Allocate memory for rx_buffer structures. Since we use one
   4044  *  rx_buffer per received packet, the maximum number of rx_buffer's
   4045  *  that we'll need is equal to the number of receive descriptors
   4046  *  that we've allocated.
   4047  *
   4048  **********************************************************************/
   4049 static int
   4050 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
   4051 {
   4052 	struct	adapter 	*adapter = rxr->adapter;
   4053 	device_t 		dev = adapter->dev;
   4054 	struct ixgbe_rx_buf 	*rxbuf;
   4055 	int             	i, bsize, error;
   4056 
   4057 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
   4058 	if (!(rxr->rx_buffers =
   4059 	    (struct ixgbe_rx_buf *) malloc(bsize,
   4060 	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
   4061 		aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
   4062 		error = ENOMEM;
   4063 		goto fail;
   4064 	}
   4065 
   4066 	if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
   4067 				   1, 0,	/* alignment, bounds */
   4068 				   MJUM16BYTES,		/* maxsize */
   4069 				   1,			/* nsegments */
   4070 				   MJUM16BYTES,		/* maxsegsize */
   4071 				   0,			/* flags */
   4072 				   &rxr->ptag))) {
   4073 		aprint_error_dev(dev, "Unable to create RX DMA tag\n");
   4074 		goto fail;
   4075 	}
   4076 
   4077 	for (i = 0; i < rxr->num_desc; i++, rxbuf++) {
   4078 		rxbuf = &rxr->rx_buffers[i];
   4079 		error = ixgbe_dmamap_create(rxr->ptag,
   4080 		    BUS_DMA_NOWAIT, &rxbuf->pmap);
   4081 		if (error) {
   4082 			aprint_error_dev(dev, "Unable to create RX dma map\n");
   4083 			goto fail;
   4084 		}
   4085 	}
   4086 
   4087 	return (0);
   4088 
   4089 fail:
   4090 	/* Frees all, but can handle partial completion */
   4091 	ixgbe_free_receive_structures(adapter);
   4092 	return (error);
   4093 }
   4094 
   4095 /*
   4096 ** Used to detect a descriptor that has
   4097 ** been merged by Hardware RSC.
   4098 */
   4099 static inline u32
   4100 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
   4101 {
   4102 	return (le32toh(rx->wb.lower.lo_dword.data) &
   4103 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
   4104 }
   4105 
   4106 /*********************************************************************
   4107  *
   4108  *  Initialize Hardware RSC (LRO) feature on 82599
   4109  *  for an RX ring, this is toggled by the LRO capability
   4110  *  even though it is transparent to the stack.
   4111  *
   4112  *  NOTE: since this HW feature only works with IPV4 and
   4113  *        our testing has shown soft LRO to be as effective
   4114  *        I have decided to disable this by default.
   4115  *
   4116  **********************************************************************/
   4117 static void
   4118 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
   4119 {
   4120 	struct	adapter 	*adapter = rxr->adapter;
   4121 	struct	ixgbe_hw	*hw = &adapter->hw;
   4122 	u32			rscctrl, rdrxctl;
   4123 
   4124 	/* If turning LRO/RSC off we need to disable it */
   4125 	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
   4126 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   4127 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
   4128 		return;
   4129 	}
   4130 
   4131 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
   4132 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
   4133 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
   4134 	if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
   4135 #endif /* DEV_NETMAP */
   4136 	rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
   4137 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
   4138 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
   4139 
   4140 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   4141 	rscctrl |= IXGBE_RSCCTL_RSCEN;
   4142 	/*
   4143 	** Limit the total number of descriptors that
   4144 	** can be combined, so it does not exceed 64K
   4145 	*/
   4146 	if (rxr->mbuf_sz == MCLBYTES)
   4147 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
   4148 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
   4149 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
   4150 	else if (rxr->mbuf_sz == MJUM9BYTES)
   4151 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
   4152 	else  /* Using 16K cluster */
   4153 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
   4154 
   4155 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
   4156 
   4157 	/* Enable TCP header recognition */
   4158 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
   4159 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
   4160 	    IXGBE_PSRTYPE_TCPHDR));
   4161 
   4162 	/* Disable RSC for ACK packets */
   4163 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
   4164 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
   4165 
   4166 	rxr->hw_rsc = TRUE;
   4167 }
   4168 
   4169 
   4170 static void
   4171 ixgbe_free_receive_ring(struct rx_ring *rxr)
   4172 {
   4173 	struct ixgbe_rx_buf       *rxbuf;
   4174 	int i;
   4175 
   4176 	for (i = 0; i < rxr->num_desc; i++) {
   4177 		rxbuf = &rxr->rx_buffers[i];
   4178 		if (rxbuf->buf != NULL) {
   4179 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   4180 			    0, rxbuf->buf->m_pkthdr.len,
   4181 			    BUS_DMASYNC_POSTREAD);
   4182 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   4183 			rxbuf->buf->m_flags |= M_PKTHDR;
   4184 			m_freem(rxbuf->buf);
   4185 			rxbuf->buf = NULL;
   4186 		}
   4187 	}
   4188 }
   4189 
   4190 
   4191 /*********************************************************************
   4192  *
   4193  *  Initialize a receive ring and its buffers.
   4194  *
   4195  **********************************************************************/
   4196 static int
   4197 ixgbe_setup_receive_ring(struct rx_ring *rxr)
   4198 {
   4199 	struct	adapter 	*adapter;
   4200 	struct ixgbe_rx_buf	*rxbuf;
   4201 #ifdef LRO
   4202 	struct ifnet		*ifp;
   4203 	struct lro_ctrl		*lro = &rxr->lro;
   4204 #endif /* LRO */
   4205 	int			rsize, error = 0;
   4206 #ifdef DEV_NETMAP
   4207 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
   4208 	struct netmap_slot *slot;
   4209 #endif /* DEV_NETMAP */
   4210 
   4211 	adapter = rxr->adapter;
   4212 #ifdef LRO
   4213 	ifp = adapter->ifp;
   4214 #endif /* LRO */
   4215 
   4216 	/* Clear the ring contents */
   4217 	IXGBE_RX_LOCK(rxr);
   4218 #ifdef DEV_NETMAP
   4219 	/* same as in ixgbe_setup_transmit_ring() */
   4220 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
   4221 #endif /* DEV_NETMAP */
   4222 	rsize = roundup2(adapter->num_rx_desc *
   4223 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   4224 	bzero((void *)rxr->rx_base, rsize);
   4225 	/* Cache the size */
   4226 	rxr->mbuf_sz = adapter->rx_mbuf_sz;
   4227 
   4228 	/* Free current RX buffer structs and their mbufs */
   4229 	ixgbe_free_receive_ring(rxr);
   4230 
   4231 	IXGBE_RX_UNLOCK(rxr);
   4232 
   4233 	/* Now reinitialize our supply of jumbo mbufs.  The number
   4234 	 * or size of jumbo mbufs may have changed.
   4235 	 */
   4236 	ixgbe_jcl_reinit(&adapter->jcl_head, rxr->ptag->dt_dmat,
   4237 	    2 * adapter->num_rx_desc, adapter->rx_mbuf_sz);
   4238 
   4239 	IXGBE_RX_LOCK(rxr);
   4240 
   4241 	/* Now replenish the mbufs */
   4242 	for (int j = 0; j != rxr->num_desc; ++j) {
   4243 		struct mbuf	*mp;
   4244 
   4245 		rxbuf = &rxr->rx_buffers[j];
   4246 #ifdef DEV_NETMAP
   4247 		/*
   4248 		 * In netmap mode, fill the map and set the buffer
   4249 		 * address in the NIC ring, considering the offset
   4250 		 * between the netmap and NIC rings (see comment in
   4251 		 * ixgbe_setup_transmit_ring() ). No need to allocate
   4252 		 * an mbuf, so end the block with a continue;
   4253 		 */
   4254 		if (slot) {
   4255 			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
   4256 			uint64_t paddr;
   4257 			void *addr;
   4258 
   4259 			addr = PNMB(slot + sj, &paddr);
   4260 			netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
   4261 			/* Update descriptor */
   4262 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
   4263 			continue;
   4264 		}
   4265 #endif /* DEV_NETMAP */
   4266 		rxbuf->buf = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
   4267 		    MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
   4268 		if (rxbuf->buf == NULL) {
   4269 			error = ENOBUFS;
   4270                         goto fail;
   4271 		}
   4272 		mp = rxbuf->buf;
   4273 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   4274 		/* Get the memory mapping */
   4275 		error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   4276 		    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   4277 		if (error != 0)
   4278                         goto fail;
   4279 		bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   4280 		    0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
   4281 		/* Update descriptor */
   4282 		rxr->rx_base[j].read.pkt_addr =
   4283 		    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   4284 	}
   4285 
   4286 
   4287 	/* Setup our descriptor indices */
   4288 	rxr->next_to_check = 0;
   4289 	rxr->next_to_refresh = 0;
   4290 	rxr->lro_enabled = FALSE;
   4291 	rxr->rx_copies.ev_count = 0;
   4292 	rxr->rx_bytes.ev_count = 0;
   4293 	rxr->discard = FALSE;
   4294 	rxr->vtag_strip = FALSE;
   4295 
   4296 	ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   4297 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   4298 
   4299 	/*
   4300 	** Now set up the LRO interface:
   4301 	*/
   4302 	if (ixgbe_rsc_enable)
   4303 		ixgbe_setup_hw_rsc(rxr);
   4304 #ifdef LRO
   4305 	else if (ifp->if_capenable & IFCAP_LRO) {
   4306 		device_t dev = adapter->dev;
   4307 		int err = tcp_lro_init(lro);
   4308 		if (err) {
   4309 			device_printf(dev, "LRO Initialization failed!\n");
   4310 			goto fail;
   4311 		}
   4312 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
   4313 		rxr->lro_enabled = TRUE;
   4314 		lro->ifp = adapter->ifp;
   4315 	}
   4316 #endif /* LRO */
   4317 
   4318 	IXGBE_RX_UNLOCK(rxr);
   4319 	return (0);
   4320 
   4321 fail:
   4322 	ixgbe_free_receive_ring(rxr);
   4323 	IXGBE_RX_UNLOCK(rxr);
   4324 	return (error);
   4325 }
   4326 
   4327 /*********************************************************************
   4328  *
   4329  *  Initialize all receive rings.
   4330  *
   4331  **********************************************************************/
   4332 static int
   4333 ixgbe_setup_receive_structures(struct adapter *adapter)
   4334 {
   4335 	struct rx_ring *rxr = adapter->rx_rings;
   4336 	int j;
   4337 
   4338 	for (j = 0; j < adapter->num_queues; j++, rxr++)
   4339 		if (ixgbe_setup_receive_ring(rxr))
   4340 			goto fail;
   4341 
   4342 	return (0);
   4343 fail:
   4344 	/*
   4345 	 * Free RX buffers allocated so far, we will only handle
   4346 	 * the rings that completed, the failing case will have
   4347 	 * cleaned up for itself. 'j' failed, so its the terminus.
   4348 	 */
   4349 	for (int i = 0; i < j; ++i) {
   4350 		rxr = &adapter->rx_rings[i];
   4351 		ixgbe_free_receive_ring(rxr);
   4352 	}
   4353 
   4354 	return (ENOBUFS);
   4355 }
   4356 
   4357 /*********************************************************************
   4358  *
   4359  *  Setup receive registers and features.
   4360  *
   4361  **********************************************************************/
   4362 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
   4363 
   4364 #define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
   4365 
   4366 static void
   4367 ixgbe_initialize_receive_units(struct adapter *adapter)
   4368 {
   4369 	int i;
   4370 	struct	rx_ring	*rxr = adapter->rx_rings;
   4371 	struct ixgbe_hw	*hw = &adapter->hw;
   4372 	struct ifnet   *ifp = adapter->ifp;
   4373 	u32		bufsz, rxctrl, fctrl, srrctl, rxcsum;
   4374 	u32		reta, mrqc = 0, hlreg, r[10];
   4375 
   4376 
   4377 	/*
   4378 	 * Make sure receives are disabled while
   4379 	 * setting up the descriptor ring
   4380 	 */
   4381 	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
   4382 	IXGBE_WRITE_REG(hw, IXGBE_RXCTRL,
   4383 	    rxctrl & ~IXGBE_RXCTRL_RXEN);
   4384 
   4385 	/* Enable broadcasts */
   4386 	fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
   4387 	fctrl |= IXGBE_FCTRL_BAM;
   4388 	fctrl |= IXGBE_FCTRL_DPF;
   4389 	fctrl |= IXGBE_FCTRL_PMCF;
   4390 	IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
   4391 
   4392 	/* Set for Jumbo Frames? */
   4393 	hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
   4394 	if (ifp->if_mtu > ETHERMTU)
   4395 		hlreg |= IXGBE_HLREG0_JUMBOEN;
   4396 	else
   4397 		hlreg &= ~IXGBE_HLREG0_JUMBOEN;
   4398 #ifdef DEV_NETMAP
   4399 	/* crcstrip is conditional in netmap (in RDRXCTL too ?) */
   4400 	if (ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
   4401 		hlreg &= ~IXGBE_HLREG0_RXCRCSTRP;
   4402 	else
   4403 		hlreg |= IXGBE_HLREG0_RXCRCSTRP;
   4404 #endif /* DEV_NETMAP */
   4405 	IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
   4406 
   4407 	bufsz = (adapter->rx_mbuf_sz +
   4408 	    BSIZEPKT_ROUNDUP) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
   4409 
   4410 	for (i = 0; i < adapter->num_queues; i++, rxr++) {
   4411 		u64 rdba = rxr->rxdma.dma_paddr;
   4412 
   4413 		/* Setup the Base and Length of the Rx Descriptor Ring */
   4414 		IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i),
   4415 			       (rdba & 0x00000000ffffffffULL));
   4416 		IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (rdba >> 32));
   4417 		IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
   4418 		    adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc));
   4419 
   4420 		/* Set up the SRRCTL register */
   4421 		srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
   4422 		srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
   4423 		srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
   4424 		srrctl |= bufsz;
   4425 		srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
   4426 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
   4427 
   4428 		/* Setup the HW Rx Head and Tail Descriptor Pointers */
   4429 		IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
   4430 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
   4431 
   4432 		/* Set the processing limit */
   4433 		rxr->process_limit = ixgbe_rx_process_limit;
   4434 	}
   4435 
   4436 	if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
   4437 		u32 psrtype = IXGBE_PSRTYPE_TCPHDR |
   4438 			      IXGBE_PSRTYPE_UDPHDR |
   4439 			      IXGBE_PSRTYPE_IPV4HDR |
   4440 			      IXGBE_PSRTYPE_IPV6HDR;
   4441 		IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype);
   4442 	}
   4443 
   4444 	rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
   4445 
   4446 	/* Setup RSS */
   4447 	if (adapter->num_queues > 1) {
   4448 		int j;
   4449 		reta = 0;
   4450 
   4451 		/* set up random bits */
   4452 		cprng_fast(&r, sizeof(r));
   4453 
   4454 		/* Set up the redirection table */
   4455 		for (i = 0, j = 0; i < 128; i++, j++) {
   4456 			if (j == adapter->num_queues) j = 0;
   4457 			reta = (reta << 8) | (j * 0x11);
   4458 			if ((i & 3) == 3)
   4459 				IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta);
   4460 		}
   4461 
   4462 		/* Now fill our hash function seeds */
   4463 		for (i = 0; i < 10; i++)
   4464 			IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), r[i]);
   4465 
   4466 		/* Perform hash on these packet types */
   4467 		mrqc = IXGBE_MRQC_RSSEN
   4468 		     | IXGBE_MRQC_RSS_FIELD_IPV4
   4469 		     | IXGBE_MRQC_RSS_FIELD_IPV4_TCP
   4470 		     | IXGBE_MRQC_RSS_FIELD_IPV4_UDP
   4471 		     | IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP
   4472 		     | IXGBE_MRQC_RSS_FIELD_IPV6_EX
   4473 		     | IXGBE_MRQC_RSS_FIELD_IPV6
   4474 		     | IXGBE_MRQC_RSS_FIELD_IPV6_TCP
   4475 		     | IXGBE_MRQC_RSS_FIELD_IPV6_UDP
   4476 		     | IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
   4477 		IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
   4478 
   4479 		/* RSS and RX IPP Checksum are mutually exclusive */
   4480 		rxcsum |= IXGBE_RXCSUM_PCSD;
   4481 	}
   4482 
   4483 	if (ifp->if_capenable & IFCAP_RXCSUM)
   4484 		rxcsum |= IXGBE_RXCSUM_PCSD;
   4485 
   4486 	if (!(rxcsum & IXGBE_RXCSUM_PCSD))
   4487 		rxcsum |= IXGBE_RXCSUM_IPPCSE;
   4488 
   4489 	IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
   4490 
   4491 	return;
   4492 }
   4493 
   4494 /*********************************************************************
   4495  *
   4496  *  Free all receive rings.
   4497  *
   4498  **********************************************************************/
   4499 static void
   4500 ixgbe_free_receive_structures(struct adapter *adapter)
   4501 {
   4502 	struct rx_ring *rxr = adapter->rx_rings;
   4503 
   4504 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
   4505 #ifdef LRO
   4506 		struct lro_ctrl		*lro = &rxr->lro;
   4507 #endif /* LRO */
   4508 		ixgbe_free_receive_buffers(rxr);
   4509 #ifdef LRO
   4510 		/* Free LRO memory */
   4511 		tcp_lro_free(lro);
   4512 #endif /* LRO */
   4513 		/* Free the ring memory as well */
   4514 		ixgbe_dma_free(adapter, &rxr->rxdma);
   4515 		IXGBE_RX_LOCK_DESTROY(rxr);
   4516 	}
   4517 
   4518 	free(adapter->rx_rings, M_DEVBUF);
   4519 }
   4520 
   4521 
   4522 /*********************************************************************
   4523  *
   4524  *  Free receive ring data structures
   4525  *
   4526  **********************************************************************/
   4527 static void
   4528 ixgbe_free_receive_buffers(struct rx_ring *rxr)
   4529 {
   4530 	struct adapter		*adapter = rxr->adapter;
   4531 	struct ixgbe_rx_buf	*rxbuf;
   4532 
   4533 	INIT_DEBUGOUT("free_receive_structures: begin");
   4534 
   4535 	/* Cleanup any existing buffers */
   4536 	if (rxr->rx_buffers != NULL) {
   4537 		for (int i = 0; i < adapter->num_rx_desc; i++) {
   4538 			rxbuf = &rxr->rx_buffers[i];
   4539 			if (rxbuf->buf != NULL) {
   4540 				bus_dmamap_sync(rxr->ptag->dt_dmat,
   4541 				    rxbuf->pmap, 0, rxbuf->buf->m_pkthdr.len,
   4542 				    BUS_DMASYNC_POSTREAD);
   4543 				ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   4544 				rxbuf->buf->m_flags |= M_PKTHDR;
   4545 				m_freem(rxbuf->buf);
   4546 			}
   4547 			rxbuf->buf = NULL;
   4548 			if (rxbuf->pmap != NULL) {
   4549 				ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
   4550 				rxbuf->pmap = NULL;
   4551 			}
   4552 		}
   4553 		if (rxr->rx_buffers != NULL) {
   4554 			free(rxr->rx_buffers, M_DEVBUF);
   4555 			rxr->rx_buffers = NULL;
   4556 		}
   4557 	}
   4558 
   4559 	if (rxr->ptag != NULL) {
   4560 		ixgbe_dma_tag_destroy(rxr->ptag);
   4561 		rxr->ptag = NULL;
   4562 	}
   4563 
   4564 	return;
   4565 }
   4566 
   4567 static __inline void
   4568 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
   4569 {
   4570 	int s;
   4571 
   4572 #ifdef LRO
   4573 	struct adapter	*adapter = ifp->if_softc;
   4574 	struct ethercom *ec = &adapter->osdep.ec;
   4575 
   4576         /*
   4577          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
   4578          * should be computed by hardware. Also it should not have VLAN tag in
   4579          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
   4580          */
   4581         if (rxr->lro_enabled &&
   4582             (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
   4583             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   4584             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   4585             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
   4586             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   4587             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
   4588             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
   4589             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
   4590                 /*
   4591                  * Send to the stack if:
   4592                  **  - LRO not enabled, or
   4593                  **  - no LRO resources, or
   4594                  **  - lro enqueue fails
   4595                  */
   4596                 if (rxr->lro.lro_cnt != 0)
   4597                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
   4598                                 return;
   4599         }
   4600 #endif /* LRO */
   4601 
   4602 	IXGBE_RX_UNLOCK(rxr);
   4603 
   4604 	s = splnet();
   4605 	/* Pass this up to any BPF listeners. */
   4606 	bpf_mtap(ifp, m);
   4607 	(*ifp->if_input)(ifp, m);
   4608 	splx(s);
   4609 
   4610 	IXGBE_RX_LOCK(rxr);
   4611 }
   4612 
   4613 static __inline void
   4614 ixgbe_rx_discard(struct rx_ring *rxr, int i)
   4615 {
   4616 	struct ixgbe_rx_buf	*rbuf;
   4617 
   4618 	rbuf = &rxr->rx_buffers[i];
   4619 
   4620         if (rbuf->fmp != NULL) {/* Partial chain ? */
   4621 		rbuf->fmp->m_flags |= M_PKTHDR;
   4622                 m_freem(rbuf->fmp);
   4623                 rbuf->fmp = NULL;
   4624 	}
   4625 
   4626 	/*
   4627 	** With advanced descriptors the writeback
   4628 	** clobbers the buffer addrs, so its easier
   4629 	** to just free the existing mbufs and take
   4630 	** the normal refresh path to get new buffers
   4631 	** and mapping.
   4632 	*/
   4633 	if (rbuf->buf) {
   4634 		m_free(rbuf->buf);
   4635 		rbuf->buf = NULL;
   4636 	}
   4637 
   4638 	return;
   4639 }
   4640 
   4641 
   4642 /*********************************************************************
   4643  *
   4644  *  This routine executes in interrupt context. It replenishes
   4645  *  the mbufs in the descriptor and sends data which has been
   4646  *  dma'ed into host memory to upper layer.
   4647  *
   4648  *  We loop at most count times if count is > 0, or until done if
   4649  *  count < 0.
   4650  *
   4651  *  Return TRUE for more work, FALSE for all clean.
   4652  *********************************************************************/
   4653 static bool
   4654 ixgbe_rxeof(struct ix_queue *que)
   4655 {
   4656 	struct adapter		*adapter = que->adapter;
   4657 	struct rx_ring		*rxr = que->rxr;
   4658 	struct ifnet		*ifp = adapter->ifp;
   4659 #ifdef LRO
   4660 	struct lro_ctrl		*lro = &rxr->lro;
   4661 	struct lro_entry	*queued;
   4662 #endif /* LRO */
   4663 	int			i, nextp, processed = 0;
   4664 	u32			staterr = 0;
   4665 	u16			count = rxr->process_limit;
   4666 	union ixgbe_adv_rx_desc	*cur;
   4667 	struct ixgbe_rx_buf	*rbuf, *nbuf;
   4668 
   4669 	IXGBE_RX_LOCK(rxr);
   4670 
   4671 #ifdef DEV_NETMAP
   4672 	/* Same as the txeof routine: wakeup clients on intr. */
   4673 	if (netmap_rx_irq(ifp, rxr->me | NETMAP_LOCKED_ENTER, &processed))
   4674 		return (FALSE);
   4675 #endif /* DEV_NETMAP */
   4676 	for (i = rxr->next_to_check; count != 0;) {
   4677 		struct mbuf	*sendmp, *mp;
   4678 		u32		rsc, ptype;
   4679 		u16		len;
   4680 		u16		vtag = 0;
   4681 		bool		eop;
   4682 
   4683 		/* Sync the ring. */
   4684 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   4685 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   4686 
   4687 		cur = &rxr->rx_base[i];
   4688 		staterr = le32toh(cur->wb.upper.status_error);
   4689 
   4690 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
   4691 			break;
   4692 		if ((ifp->if_flags & IFF_RUNNING) == 0)
   4693 			break;
   4694 
   4695 		count--;
   4696 		sendmp = NULL;
   4697 		nbuf = NULL;
   4698 		rsc = 0;
   4699 		cur->wb.upper.status_error = 0;
   4700 		rbuf = &rxr->rx_buffers[i];
   4701 		mp = rbuf->buf;
   4702 
   4703 		len = le16toh(cur->wb.upper.length);
   4704 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
   4705 		    IXGBE_RXDADV_PKTTYPE_MASK;
   4706 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
   4707 
   4708 		/* Make sure bad packets are discarded */
   4709 		if (((staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) ||
   4710 		    (rxr->discard)) {
   4711 			rxr->rx_discarded.ev_count++;
   4712 			if (eop)
   4713 				rxr->discard = FALSE;
   4714 			else
   4715 				rxr->discard = TRUE;
   4716 			ixgbe_rx_discard(rxr, i);
   4717 			goto next_desc;
   4718 		}
   4719 
   4720 		/*
   4721 		** On 82599 which supports a hardware
   4722 		** LRO (called HW RSC), packets need
   4723 		** not be fragmented across sequential
   4724 		** descriptors, rather the next descriptor
   4725 		** is indicated in bits of the descriptor.
   4726 		** This also means that we might proceses
   4727 		** more than one packet at a time, something
   4728 		** that has never been true before, it
   4729 		** required eliminating global chain pointers
   4730 		** in favor of what we are doing here.  -jfv
   4731 		*/
   4732 		if (!eop) {
   4733 			/*
   4734 			** Figure out the next descriptor
   4735 			** of this frame.
   4736 			*/
   4737 			if (rxr->hw_rsc == TRUE) {
   4738 				rsc = ixgbe_rsc_count(cur);
   4739 				rxr->rsc_num += (rsc - 1);
   4740 			}
   4741 			if (rsc) { /* Get hardware index */
   4742 				nextp = ((staterr &
   4743 				    IXGBE_RXDADV_NEXTP_MASK) >>
   4744 				    IXGBE_RXDADV_NEXTP_SHIFT);
   4745 			} else { /* Just sequential */
   4746 				nextp = i + 1;
   4747 				if (nextp == adapter->num_rx_desc)
   4748 					nextp = 0;
   4749 			}
   4750 			nbuf = &rxr->rx_buffers[nextp];
   4751 			prefetch(nbuf);
   4752 		}
   4753 		/*
   4754 		** Rather than using the fmp/lmp global pointers
   4755 		** we now keep the head of a packet chain in the
   4756 		** buffer struct and pass this along from one
   4757 		** descriptor to the next, until we get EOP.
   4758 		*/
   4759 		mp->m_len = len;
   4760 		/*
   4761 		** See if there is a stored head
   4762 		** that determines what we are
   4763 		*/
   4764 		sendmp = rbuf->fmp;
   4765 
   4766 		if (sendmp != NULL) {  /* secondary frag */
   4767 			rbuf->buf = rbuf->fmp = NULL;
   4768 			mp->m_flags &= ~M_PKTHDR;
   4769 			sendmp->m_pkthdr.len += mp->m_len;
   4770 		} else {
   4771 			/*
   4772 			 * Optimize.  This might be a small packet,
   4773 			 * maybe just a TCP ACK.  Do a fast copy that
   4774 			 * is cache aligned into a new mbuf, and
   4775 			 * leave the old mbuf+cluster for re-use.
   4776 			 */
   4777 			if (eop && len <= IXGBE_RX_COPY_LEN) {
   4778 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
   4779 				if (sendmp != NULL) {
   4780 					sendmp->m_data +=
   4781 					    IXGBE_RX_COPY_ALIGN;
   4782 					ixgbe_bcopy(mp->m_data,
   4783 					    sendmp->m_data, len);
   4784 					sendmp->m_len = len;
   4785 					rxr->rx_copies.ev_count++;
   4786 					rbuf->flags |= IXGBE_RX_COPY;
   4787 				}
   4788 			}
   4789 			if (sendmp == NULL) {
   4790 				rbuf->buf = rbuf->fmp = NULL;
   4791 				sendmp = mp;
   4792 			}
   4793 
   4794 			/* first desc of a non-ps chain */
   4795 			sendmp->m_flags |= M_PKTHDR;
   4796 			sendmp->m_pkthdr.len = mp->m_len;
   4797 		}
   4798 		++processed;
   4799 		/* Pass the head pointer on */
   4800 		if (eop == 0) {
   4801 			nbuf->fmp = sendmp;
   4802 			sendmp = NULL;
   4803 			mp->m_next = nbuf->buf;
   4804 		} else { /* Sending this frame */
   4805 			sendmp->m_pkthdr.rcvif = ifp;
   4806 			ifp->if_ipackets++;
   4807 			rxr->rx_packets.ev_count++;
   4808 			/* capture data for AIM */
   4809 			rxr->bytes += sendmp->m_pkthdr.len;
   4810 			rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
   4811 			/* Process vlan info */
   4812 			if ((rxr->vtag_strip) &&
   4813 			    (staterr & IXGBE_RXD_STAT_VP))
   4814 				vtag = le16toh(cur->wb.upper.vlan);
   4815 			if (vtag) {
   4816 				VLAN_INPUT_TAG(ifp, sendmp, vtag,
   4817 				    printf("%s: could not apply VLAN "
   4818 					"tag", __func__));
   4819 			}
   4820 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
   4821 				ixgbe_rx_checksum(staterr, sendmp, ptype,
   4822 				   &adapter->stats);
   4823 			}
   4824 #if __FreeBSD_version >= 800000
   4825 			sendmp->m_pkthdr.flowid = que->msix;
   4826 			sendmp->m_flags |= M_FLOWID;
   4827 #endif
   4828 		}
   4829 next_desc:
   4830 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   4831 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   4832 
   4833 		/* Advance our pointers to the next descriptor. */
   4834 		if (++i == rxr->num_desc)
   4835 			i = 0;
   4836 
   4837 		/* Now send to the stack or do LRO */
   4838 		if (sendmp != NULL) {
   4839 			rxr->next_to_check = i;
   4840 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
   4841 			i = rxr->next_to_check;
   4842 		}
   4843 
   4844                /* Every 8 descriptors we go to refresh mbufs */
   4845 		if (processed == 8) {
   4846 			ixgbe_refresh_mbufs(rxr, i);
   4847 			processed = 0;
   4848 		}
   4849 	}
   4850 
   4851 	/* Refresh any remaining buf structs */
   4852 	if (ixgbe_rx_unrefreshed(rxr))
   4853 		ixgbe_refresh_mbufs(rxr, i);
   4854 
   4855 	rxr->next_to_check = i;
   4856 
   4857 #ifdef LRO
   4858 	/*
   4859 	 * Flush any outstanding LRO work
   4860 	 */
   4861 	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
   4862 		SLIST_REMOVE_HEAD(&lro->lro_active, next);
   4863 		tcp_lro_flush(lro, queued);
   4864 	}
   4865 #endif /* LRO */
   4866 
   4867 	IXGBE_RX_UNLOCK(rxr);
   4868 
   4869 	/*
   4870 	** We still have cleaning to do?
   4871 	** Schedule another interrupt if so.
   4872 	*/
   4873 	if ((staterr & IXGBE_RXD_STAT_DD) != 0) {
   4874 		ixgbe_rearm_queues(adapter, (u64)(1ULL << que->msix));
   4875 		return true;
   4876 	}
   4877 
   4878 	return false;
   4879 }
   4880 
   4881 
   4882 /*********************************************************************
   4883  *
   4884  *  Verify that the hardware indicated that the checksum is valid.
   4885  *  Inform the stack about the status of checksum so that stack
   4886  *  doesn't spend time verifying the checksum.
   4887  *
   4888  *********************************************************************/
   4889 static void
   4890 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
   4891     struct ixgbe_hw_stats *stats)
   4892 {
   4893 	u16	status = (u16) staterr;
   4894 	u8	errors = (u8) (staterr >> 24);
   4895 #if 0
   4896 	bool	sctp = FALSE;
   4897 
   4898 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   4899 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
   4900 		sctp = TRUE;
   4901 #endif
   4902 
   4903 	if (status & IXGBE_RXD_STAT_IPCS) {
   4904 		stats->ipcs.ev_count++;
   4905 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
   4906 			/* IP Checksum Good */
   4907 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
   4908 
   4909 		} else {
   4910 			stats->ipcs_bad.ev_count++;
   4911 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
   4912 		}
   4913 	}
   4914 	if (status & IXGBE_RXD_STAT_L4CS) {
   4915 		stats->l4cs.ev_count++;
   4916 		u16 type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
   4917 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
   4918 			mp->m_pkthdr.csum_flags |= type;
   4919 		} else {
   4920 			stats->l4cs_bad.ev_count++;
   4921 			mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
   4922 		}
   4923 	}
   4924 	return;
   4925 }
   4926 
   4927 
   4928 #if 0	/* XXX Badly need to overhaul vlan(4) on NetBSD. */
   4929 /*
   4930 ** This routine is run via an vlan config EVENT,
   4931 ** it enables us to use the HW Filter table since
   4932 ** we can get the vlan id. This just creates the
   4933 ** entry in the soft version of the VFTA, init will
   4934 ** repopulate the real table.
   4935 */
   4936 static void
   4937 ixgbe_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
   4938 {
   4939 	struct adapter	*adapter = ifp->if_softc;
   4940 	u16		index, bit;
   4941 
   4942 	if (ifp->if_softc !=  arg)   /* Not our event */
   4943 		return;
   4944 
   4945 	if ((vtag == 0) || (vtag > 4095))	/* Invalid */
   4946 		return;
   4947 
   4948 	IXGBE_CORE_LOCK(adapter);
   4949 	index = (vtag >> 5) & 0x7F;
   4950 	bit = vtag & 0x1F;
   4951 	adapter->shadow_vfta[index] |= (1 << bit);
   4952 	ixgbe_init_locked(adapter);
   4953 	IXGBE_CORE_UNLOCK(adapter);
   4954 }
   4955 
   4956 /*
   4957 ** This routine is run via an vlan
   4958 ** unconfig EVENT, remove our entry
   4959 ** in the soft vfta.
   4960 */
   4961 static void
   4962 ixgbe_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
   4963 {
   4964 	struct adapter	*adapter = ifp->if_softc;
   4965 	u16		index, bit;
   4966 
   4967 	if (ifp->if_softc !=  arg)
   4968 		return;
   4969 
   4970 	if ((vtag == 0) || (vtag > 4095))	/* Invalid */
   4971 		return;
   4972 
   4973 	IXGBE_CORE_LOCK(adapter);
   4974 	index = (vtag >> 5) & 0x7F;
   4975 	bit = vtag & 0x1F;
   4976 	adapter->shadow_vfta[index] &= ~(1 << bit);
   4977 	/* Re-init to load the changes */
   4978 	ixgbe_init_locked(adapter);
   4979 	IXGBE_CORE_UNLOCK(adapter);
   4980 }
   4981 #endif
   4982 
   4983 static void
   4984 ixgbe_setup_vlan_hw_support(struct adapter *adapter)
   4985 {
   4986 	struct ethercom *ec = &adapter->osdep.ec;
   4987 	struct ixgbe_hw *hw = &adapter->hw;
   4988 	struct rx_ring	*rxr;
   4989 	u32		ctrl;
   4990 
   4991 	/*
   4992 	** We get here thru init_locked, meaning
   4993 	** a soft reset, this has already cleared
   4994 	** the VFTA and other state, so if there
   4995 	** have been no vlan's registered do nothing.
   4996 	*/
   4997 	if (!VLAN_ATTACHED(&adapter->osdep.ec)) {
   4998 		return;
   4999 	}
   5000 
   5001 	/*
   5002 	** A soft reset zero's out the VFTA, so
   5003 	** we need to repopulate it now.
   5004 	*/
   5005 	for (int i = 0; i < IXGBE_VFTA_SIZE; i++)
   5006 		if (adapter->shadow_vfta[i] != 0)
   5007 			IXGBE_WRITE_REG(hw, IXGBE_VFTA(i),
   5008 			    adapter->shadow_vfta[i]);
   5009 
   5010 	ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
   5011 	/* Enable the Filter Table if enabled */
   5012 	if (ec->ec_capenable & ETHERCAP_VLAN_HWFILTER) {
   5013 		ctrl &= ~IXGBE_VLNCTRL_CFIEN;
   5014 		ctrl |= IXGBE_VLNCTRL_VFE;
   5015 	}
   5016 	if (hw->mac.type == ixgbe_mac_82598EB)
   5017 		ctrl |= IXGBE_VLNCTRL_VME;
   5018 	IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
   5019 
   5020 	/* Setup the queues for vlans */
   5021 	for (int i = 0; i < adapter->num_queues; i++) {
   5022 		rxr = &adapter->rx_rings[i];
   5023 		/* On 82599 the VLAN enable is per/queue in RXDCTL */
   5024 		if (hw->mac.type != ixgbe_mac_82598EB) {
   5025 			ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
   5026 			ctrl |= IXGBE_RXDCTL_VME;
   5027 			IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
   5028 		}
   5029 		rxr->vtag_strip = TRUE;
   5030 	}
   5031 }
   5032 
   5033 static void
   5034 ixgbe_enable_intr(struct adapter *adapter)
   5035 {
   5036 	struct ixgbe_hw	*hw = &adapter->hw;
   5037 	struct ix_queue	*que = adapter->queues;
   5038 	u32		mask, fwsm;
   5039 
   5040 	mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
   5041 	/* Enable Fan Failure detection */
   5042 	if (hw->device_id == IXGBE_DEV_ID_82598AT)
   5043 		    mask |= IXGBE_EIMS_GPI_SDP1;
   5044 
   5045 	switch (adapter->hw.mac.type) {
   5046 		case ixgbe_mac_82599EB:
   5047 			mask |= IXGBE_EIMS_ECC;
   5048 			mask |= IXGBE_EIMS_GPI_SDP0;
   5049 			mask |= IXGBE_EIMS_GPI_SDP1;
   5050 			mask |= IXGBE_EIMS_GPI_SDP2;
   5051 #ifdef IXGBE_FDIR
   5052 			mask |= IXGBE_EIMS_FLOW_DIR;
   5053 #endif
   5054 			break;
   5055 		case ixgbe_mac_X540:
   5056 			mask |= IXGBE_EIMS_ECC;
   5057 			/* Detect if Thermal Sensor is enabled */
   5058 			fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
   5059 			if (fwsm & IXGBE_FWSM_TS_ENABLED)
   5060 				mask |= IXGBE_EIMS_TS;
   5061 #ifdef IXGBE_FDIR
   5062 			mask |= IXGBE_EIMS_FLOW_DIR;
   5063 #endif
   5064 		/* falls through */
   5065 		default:
   5066 			break;
   5067 	}
   5068 
   5069 	IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
   5070 
   5071 	/* With RSS we use auto clear */
   5072 	if (adapter->msix_mem) {
   5073 		mask = IXGBE_EIMS_ENABLE_MASK;
   5074 		/* Don't autoclear Link */
   5075 		mask &= ~IXGBE_EIMS_OTHER;
   5076 		mask &= ~IXGBE_EIMS_LSC;
   5077 		IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask);
   5078 	}
   5079 
   5080 	/*
   5081 	** Now enable all queues, this is done separately to
   5082 	** allow for handling the extended (beyond 32) MSIX
   5083 	** vectors that can be used by 82599
   5084 	*/
   5085         for (int i = 0; i < adapter->num_queues; i++, que++)
   5086                 ixgbe_enable_queue(adapter, que->msix);
   5087 
   5088 	IXGBE_WRITE_FLUSH(hw);
   5089 
   5090 	return;
   5091 }
   5092 
   5093 static void
   5094 ixgbe_disable_intr(struct adapter *adapter)
   5095 {
   5096 	if (adapter->msix_mem)
   5097 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, 0);
   5098 	if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
   5099 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0);
   5100 	} else {
   5101 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000);
   5102 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0);
   5103 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0);
   5104 	}
   5105 	IXGBE_WRITE_FLUSH(&adapter->hw);
   5106 	return;
   5107 }
   5108 
   5109 u16
   5110 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
   5111 {
   5112 	switch (reg % 4) {
   5113 	case 0:
   5114 		return pci_conf_read(hw->back->pc, hw->back->tag, reg) &
   5115 		    __BITS(15, 0);
   5116 	case 2:
   5117 		return __SHIFTOUT(pci_conf_read(hw->back->pc, hw->back->tag,
   5118 		    reg - 2), __BITS(31, 16));
   5119 	default:
   5120 		panic("%s: invalid register (%" PRIx32, __func__, reg);
   5121 		break;
   5122 	}
   5123 }
   5124 
   5125 void
   5126 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
   5127 {
   5128 	pcireg_t old;
   5129 
   5130 	switch (reg % 4) {
   5131 	case 0:
   5132 		old = pci_conf_read(hw->back->pc, hw->back->tag, reg) &
   5133 		    __BITS(31, 16);
   5134 		pci_conf_write(hw->back->pc, hw->back->tag, reg, value | old);
   5135 		break;
   5136 	case 2:
   5137 		old = pci_conf_read(hw->back->pc, hw->back->tag, reg - 2) &
   5138 		    __BITS(15, 0);
   5139 		pci_conf_write(hw->back->pc, hw->back->tag, reg - 2,
   5140 		    __SHIFTIN(value, __BITS(31, 16)) | old);
   5141 		break;
   5142 	default:
   5143 		panic("%s: invalid register (%" PRIx32, __func__, reg);
   5144 		break;
   5145 	}
   5146 
   5147 	return;
   5148 }
   5149 
   5150 /*
   5151 ** Setup the correct IVAR register for a particular MSIX interrupt
   5152 **   (yes this is all very magic and confusing :)
   5153 **  - entry is the register array entry
   5154 **  - vector is the MSIX vector for this queue
   5155 **  - type is RX/TX/MISC
   5156 */
   5157 static void
   5158 ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type)
   5159 {
   5160 	struct ixgbe_hw *hw = &adapter->hw;
   5161 	u32 ivar, index;
   5162 
   5163 	vector |= IXGBE_IVAR_ALLOC_VAL;
   5164 
   5165 	switch (hw->mac.type) {
   5166 
   5167 	case ixgbe_mac_82598EB:
   5168 		if (type == -1)
   5169 			entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
   5170 		else
   5171 			entry += (type * 64);
   5172 		index = (entry >> 2) & 0x1F;
   5173 		ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
   5174 		ivar &= ~(0xFF << (8 * (entry & 0x3)));
   5175 		ivar |= (vector << (8 * (entry & 0x3)));
   5176 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR(index), ivar);
   5177 		break;
   5178 
   5179 	case ixgbe_mac_82599EB:
   5180 	case ixgbe_mac_X540:
   5181 		if (type == -1) { /* MISC IVAR */
   5182 			index = (entry & 1) * 8;
   5183 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
   5184 			ivar &= ~(0xFF << index);
   5185 			ivar |= (vector << index);
   5186 			IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
   5187 		} else {	/* RX/TX IVARS */
   5188 			index = (16 * (entry & 1)) + (8 * type);
   5189 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
   5190 			ivar &= ~(0xFF << index);
   5191 			ivar |= (vector << index);
   5192 			IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
   5193 		}
   5194 
   5195 	default:
   5196 		break;
   5197 	}
   5198 }
   5199 
   5200 static void
   5201 ixgbe_configure_ivars(struct adapter *adapter)
   5202 {
   5203 	struct  ix_queue *que = adapter->queues;
   5204 	u32 newitr;
   5205 
   5206 	if (ixgbe_max_interrupt_rate > 0)
   5207 		newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8;
   5208 	else
   5209 		newitr = 0;
   5210 
   5211         for (int i = 0; i < adapter->num_queues; i++, que++) {
   5212 		/* First the RX queue entry */
   5213                 ixgbe_set_ivar(adapter, i, que->msix, 0);
   5214 		/* ... and the TX */
   5215 		ixgbe_set_ivar(adapter, i, que->msix, 1);
   5216 		/* Set an Initial EITR value */
   5217                 IXGBE_WRITE_REG(&adapter->hw,
   5218                     IXGBE_EITR(que->msix), newitr);
   5219 	}
   5220 
   5221 	/* For the Link interrupt */
   5222         ixgbe_set_ivar(adapter, 1, adapter->linkvec, -1);
   5223 }
   5224 
   5225 /*
   5226 ** ixgbe_sfp_probe - called in the local timer to
   5227 ** determine if a port had optics inserted.
   5228 */
   5229 static bool ixgbe_sfp_probe(struct adapter *adapter)
   5230 {
   5231 	struct ixgbe_hw	*hw = &adapter->hw;
   5232 	device_t	dev = adapter->dev;
   5233 	bool		result = FALSE;
   5234 
   5235 	if ((hw->phy.type == ixgbe_phy_nl) &&
   5236 	    (hw->phy.sfp_type == ixgbe_sfp_type_not_present)) {
   5237 		s32 ret = hw->phy.ops.identify_sfp(hw);
   5238 		if (ret)
   5239                         goto out;
   5240 		ret = hw->phy.ops.reset(hw);
   5241 		if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
   5242 			device_printf(dev,"Unsupported SFP+ module detected!");
   5243 			device_printf(dev, "Reload driver with supported module.\n");
   5244 			adapter->sfp_probe = FALSE;
   5245                         goto out;
   5246 		} else
   5247 			device_printf(dev,"SFP+ module detected!\n");
   5248 		/* We now have supported optics */
   5249 		adapter->sfp_probe = FALSE;
   5250 		/* Set the optics type so system reports correctly */
   5251 		ixgbe_setup_optics(adapter);
   5252 		result = TRUE;
   5253 	}
   5254 out:
   5255 	return (result);
   5256 }
   5257 
   5258 /*
   5259 ** Tasklet handler for MSIX Link interrupts
   5260 **  - do outside interrupt since it might sleep
   5261 */
   5262 static void
   5263 ixgbe_handle_link(void *context)
   5264 {
   5265 	struct adapter  *adapter = context;
   5266 
   5267 	if (ixgbe_check_link(&adapter->hw,
   5268 	    &adapter->link_speed, &adapter->link_up, 0) == 0)
   5269 	    ixgbe_update_link_status(adapter);
   5270 }
   5271 
   5272 /*
   5273 ** Tasklet for handling SFP module interrupts
   5274 */
   5275 static void
   5276 ixgbe_handle_mod(void *context)
   5277 {
   5278 	struct adapter  *adapter = context;
   5279 	struct ixgbe_hw *hw = &adapter->hw;
   5280 	device_t	dev = adapter->dev;
   5281 	u32 err;
   5282 
   5283 	err = hw->phy.ops.identify_sfp(hw);
   5284 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
   5285 		device_printf(dev,
   5286 		    "Unsupported SFP+ module type was detected.\n");
   5287 		return;
   5288 	}
   5289 	err = hw->mac.ops.setup_sfp(hw);
   5290 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
   5291 		device_printf(dev,
   5292 		    "Setup failure - unsupported SFP+ module type.\n");
   5293 		return;
   5294 	}
   5295 	softint_schedule(adapter->msf_si);
   5296 	return;
   5297 }
   5298 
   5299 
   5300 /*
   5301 ** Tasklet for handling MSF (multispeed fiber) interrupts
   5302 */
   5303 static void
   5304 ixgbe_handle_msf(void *context)
   5305 {
   5306 	struct adapter  *adapter = context;
   5307 	struct ixgbe_hw *hw = &adapter->hw;
   5308 	u32 autoneg;
   5309 	bool negotiate;
   5310 
   5311 	autoneg = hw->phy.autoneg_advertised;
   5312 	if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
   5313 		hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate);
   5314 	else
   5315 		negotiate = 0;
   5316 	if (hw->mac.ops.setup_link)
   5317 		hw->mac.ops.setup_link(hw, autoneg, TRUE);
   5318 	return;
   5319 }
   5320 
   5321 #ifdef IXGBE_FDIR
   5322 /*
   5323 ** Tasklet for reinitializing the Flow Director filter table
   5324 */
   5325 static void
   5326 ixgbe_reinit_fdir(void *context)
   5327 {
   5328 	struct adapter  *adapter = context;
   5329 	struct ifnet   *ifp = adapter->ifp;
   5330 
   5331 	if (adapter->fdir_reinit != 1) /* Shouldn't happen */
   5332 		return;
   5333 	ixgbe_reinit_fdir_tables_82599(&adapter->hw);
   5334 	adapter->fdir_reinit = 0;
   5335 	/* re-enable flow director interrupts */
   5336 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_FLOW_DIR);
   5337 	/* Restart the interface */
   5338 	ifp->if_flags |= IFF_RUNNING;
   5339 	return;
   5340 }
   5341 #endif
   5342 
   5343 /**********************************************************************
   5344  *
   5345  *  Update the board statistics counters.
   5346  *
   5347  **********************************************************************/
   5348 static void
   5349 ixgbe_update_stats_counters(struct adapter *adapter)
   5350 {
   5351 	struct ifnet   *ifp = adapter->ifp;
   5352 	struct ixgbe_hw *hw = &adapter->hw;
   5353 	u32  missed_rx = 0, bprc, lxon, lxoff, total;
   5354 	u64  total_missed_rx = 0;
   5355 	uint64_t crcerrs, rlec;
   5356 
   5357 	crcerrs = IXGBE_READ_REG(hw, IXGBE_CRCERRS);
   5358 	adapter->stats.crcerrs.ev_count += crcerrs;
   5359 	adapter->stats.illerrc.ev_count += IXGBE_READ_REG(hw, IXGBE_ILLERRC);
   5360 	adapter->stats.errbc.ev_count += IXGBE_READ_REG(hw, IXGBE_ERRBC);
   5361 	adapter->stats.mspdc.ev_count += IXGBE_READ_REG(hw, IXGBE_MSPDC);
   5362 
   5363 	/*
   5364 	** Note: these are for the 8 possible traffic classes,
   5365 	**	 which in current implementation is unused,
   5366 	**	 therefore only 0 should read real data.
   5367 	*/
   5368 	for (int i = 0; i < __arraycount(adapter->stats.mpc); i++) {
   5369 		int j = i % adapter->num_queues;
   5370 		u32 mp;
   5371 		mp = IXGBE_READ_REG(hw, IXGBE_MPC(i));
   5372 		/* missed_rx tallies misses for the gprc workaround */
   5373 		missed_rx += mp;
   5374 		/* global total per queue */
   5375         	adapter->stats.mpc[j].ev_count += mp;
   5376 		/* Running comprehensive total for stats display */
   5377 		total_missed_rx += mp;
   5378 		if (hw->mac.type == ixgbe_mac_82598EB) {
   5379 			adapter->stats.rnbc[j] +=
   5380 			    IXGBE_READ_REG(hw, IXGBE_RNBC(i));
   5381 			adapter->stats.qbtc[j].ev_count +=
   5382 			    IXGBE_READ_REG(hw, IXGBE_QBTC(i));
   5383 			adapter->stats.qbrc[j].ev_count +=
   5384 			    IXGBE_READ_REG(hw, IXGBE_QBRC(i));
   5385 			adapter->stats.pxonrxc[j].ev_count +=
   5386 			    IXGBE_READ_REG(hw, IXGBE_PXONRXC(i));
   5387 		} else {
   5388 			adapter->stats.pxonrxc[j].ev_count +=
   5389 			    IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i));
   5390 		}
   5391 		adapter->stats.pxontxc[j].ev_count +=
   5392 		    IXGBE_READ_REG(hw, IXGBE_PXONTXC(i));
   5393 		adapter->stats.pxofftxc[j].ev_count +=
   5394 		    IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i));
   5395 		adapter->stats.pxoffrxc[j].ev_count +=
   5396 		    IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
   5397 		adapter->stats.pxon2offc[j].ev_count +=
   5398 		    IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i));
   5399 	}
   5400 	for (int i = 0; i < __arraycount(adapter->stats.qprc); i++) {
   5401 		int j = i % adapter->num_queues;
   5402 		adapter->stats.qprc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
   5403 		adapter->stats.qptc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
   5404 		adapter->stats.qprdc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
   5405 	}
   5406 	adapter->stats.mlfc.ev_count += IXGBE_READ_REG(hw, IXGBE_MLFC);
   5407 	adapter->stats.mrfc.ev_count += IXGBE_READ_REG(hw, IXGBE_MRFC);
   5408 	rlec = IXGBE_READ_REG(hw, IXGBE_RLEC);
   5409 	adapter->stats.rlec.ev_count += rlec;
   5410 
   5411 	/* Hardware workaround, gprc counts missed packets */
   5412 	adapter->stats.gprc.ev_count += IXGBE_READ_REG(hw, IXGBE_GPRC) - missed_rx;
   5413 
   5414 	lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
   5415 	adapter->stats.lxontxc.ev_count += lxon;
   5416 	lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
   5417 	adapter->stats.lxofftxc.ev_count += lxoff;
   5418 	total = lxon + lxoff;
   5419 
   5420 	if (hw->mac.type != ixgbe_mac_82598EB) {
   5421 		adapter->stats.gorc.ev_count += IXGBE_READ_REG(hw, IXGBE_GORCL) +
   5422 		    ((u64)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32);
   5423 		adapter->stats.gotc.ev_count += IXGBE_READ_REG(hw, IXGBE_GOTCL) +
   5424 		    ((u64)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32) - total * ETHER_MIN_LEN;
   5425 		adapter->stats.tor.ev_count += IXGBE_READ_REG(hw, IXGBE_TORL) +
   5426 		    ((u64)IXGBE_READ_REG(hw, IXGBE_TORH) << 32);
   5427 		adapter->stats.lxonrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
   5428 		adapter->stats.lxoffrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
   5429 	} else {
   5430 		adapter->stats.lxonrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
   5431 		adapter->stats.lxoffrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
   5432 		/* 82598 only has a counter in the high register */
   5433 		adapter->stats.gorc.ev_count += IXGBE_READ_REG(hw, IXGBE_GORCH);
   5434 		adapter->stats.gotc.ev_count += IXGBE_READ_REG(hw, IXGBE_GOTCH) - total * ETHER_MIN_LEN;
   5435 		adapter->stats.tor.ev_count += IXGBE_READ_REG(hw, IXGBE_TORH);
   5436 	}
   5437 
   5438 	/*
   5439 	 * Workaround: mprc hardware is incorrectly counting
   5440 	 * broadcasts, so for now we subtract those.
   5441 	 */
   5442 	bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
   5443 	adapter->stats.bprc.ev_count += bprc;
   5444 	adapter->stats.mprc.ev_count += IXGBE_READ_REG(hw, IXGBE_MPRC) - ((hw->mac.type == ixgbe_mac_82598EB) ? bprc : 0);
   5445 
   5446 	adapter->stats.prc64.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC64);
   5447 	adapter->stats.prc127.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC127);
   5448 	adapter->stats.prc255.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC255);
   5449 	adapter->stats.prc511.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC511);
   5450 	adapter->stats.prc1023.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC1023);
   5451 	adapter->stats.prc1522.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC1522);
   5452 
   5453 	adapter->stats.gptc.ev_count += IXGBE_READ_REG(hw, IXGBE_GPTC) - total;
   5454 	adapter->stats.mptc.ev_count += IXGBE_READ_REG(hw, IXGBE_MPTC) - total;
   5455 	adapter->stats.ptc64.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC64) - total;
   5456 
   5457 	adapter->stats.ruc.ev_count += IXGBE_READ_REG(hw, IXGBE_RUC);
   5458 	adapter->stats.rfc.ev_count += IXGBE_READ_REG(hw, IXGBE_RFC);
   5459 	adapter->stats.roc.ev_count += IXGBE_READ_REG(hw, IXGBE_ROC);
   5460 	adapter->stats.rjc.ev_count += IXGBE_READ_REG(hw, IXGBE_RJC);
   5461 	adapter->stats.mngprc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPRC);
   5462 	adapter->stats.mngpdc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPDC);
   5463 	adapter->stats.mngptc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPTC);
   5464 	adapter->stats.tpr.ev_count += IXGBE_READ_REG(hw, IXGBE_TPR);
   5465 	adapter->stats.tpt.ev_count += IXGBE_READ_REG(hw, IXGBE_TPT);
   5466 	adapter->stats.ptc127.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC127);
   5467 	adapter->stats.ptc255.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC255);
   5468 	adapter->stats.ptc511.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC511);
   5469 	adapter->stats.ptc1023.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC1023);
   5470 	adapter->stats.ptc1522.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC1522);
   5471 	adapter->stats.bptc.ev_count += IXGBE_READ_REG(hw, IXGBE_BPTC);
   5472 	adapter->stats.xec.ev_count += IXGBE_READ_REG(hw, IXGBE_XEC);
   5473 	adapter->stats.fccrc.ev_count += IXGBE_READ_REG(hw, IXGBE_FCCRC);
   5474 	adapter->stats.fclast.ev_count += IXGBE_READ_REG(hw, IXGBE_FCLAST);
   5475 
   5476 	/* Only read FCOE on 82599 */
   5477 	if (hw->mac.type != ixgbe_mac_82598EB) {
   5478 		adapter->stats.fcoerpdc.ev_count +=
   5479 		    IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
   5480 		adapter->stats.fcoeprc.ev_count +=
   5481 		    IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
   5482 		adapter->stats.fcoeptc.ev_count +=
   5483 		    IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
   5484 		adapter->stats.fcoedwrc.ev_count +=
   5485 		    IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
   5486 		adapter->stats.fcoedwtc.ev_count +=
   5487 		    IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
   5488 	}
   5489 
   5490 	/* Fill out the OS statistics structure */
   5491 	/*
   5492 	 * NetBSD: Don't override if_{i|o}{packets|bytes|mcasts} with
   5493 	 * adapter->stats counters. It's required to make ifconfig -z
   5494 	 * (SOICZIFDATA) work.
   5495 	 */
   5496 	ifp->if_collisions = 0;
   5497 
   5498 	/* Rx Errors */
   5499 	ifp->if_iqdrops += total_missed_rx;
   5500 	ifp->if_ierrors += crcerrs + rlec;
   5501 }
   5502 
   5503 /** ixgbe_sysctl_tdh_handler - Handler function
   5504  *  Retrieves the TDH value from the hardware
   5505  */
   5506 static int
   5507 ixgbe_sysctl_tdh_handler(SYSCTLFN_ARGS)
   5508 {
   5509 	struct sysctlnode node;
   5510 	uint32_t val;
   5511 	struct tx_ring *txr;
   5512 
   5513 	node = *rnode;
   5514 	txr = (struct tx_ring *)node.sysctl_data;
   5515 	if (txr == NULL)
   5516 		return 0;
   5517 	val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDH(txr->me));
   5518 	node.sysctl_data = &val;
   5519 	return sysctl_lookup(SYSCTLFN_CALL(&node));
   5520 }
   5521 
   5522 /** ixgbe_sysctl_tdt_handler - Handler function
   5523  *  Retrieves the TDT value from the hardware
   5524  */
   5525 static int
   5526 ixgbe_sysctl_tdt_handler(SYSCTLFN_ARGS)
   5527 {
   5528 	struct sysctlnode node;
   5529 	uint32_t val;
   5530 	struct tx_ring *txr;
   5531 
   5532 	node = *rnode;
   5533 	txr = (struct tx_ring *)node.sysctl_data;
   5534 	if (txr == NULL)
   5535 		return 0;
   5536 	val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDT(txr->me));
   5537 	node.sysctl_data = &val;
   5538 	return sysctl_lookup(SYSCTLFN_CALL(&node));
   5539 }
   5540 
   5541 /** ixgbe_sysctl_rdh_handler - Handler function
   5542  *  Retrieves the RDH value from the hardware
   5543  */
   5544 static int
   5545 ixgbe_sysctl_rdh_handler(SYSCTLFN_ARGS)
   5546 {
   5547 	struct sysctlnode node;
   5548 	uint32_t val;
   5549 	struct rx_ring *rxr;
   5550 
   5551 	node = *rnode;
   5552 	rxr = (struct rx_ring *)node.sysctl_data;
   5553 	if (rxr == NULL)
   5554 		return 0;
   5555 	val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDH(rxr->me));
   5556 	node.sysctl_data = &val;
   5557 	return sysctl_lookup(SYSCTLFN_CALL(&node));
   5558 }
   5559 
   5560 /** ixgbe_sysctl_rdt_handler - Handler function
   5561  *  Retrieves the RDT value from the hardware
   5562  */
   5563 static int
   5564 ixgbe_sysctl_rdt_handler(SYSCTLFN_ARGS)
   5565 {
   5566 	struct sysctlnode node;
   5567 	uint32_t val;
   5568 	struct rx_ring *rxr;
   5569 
   5570 	node = *rnode;
   5571 	rxr = (struct rx_ring *)node.sysctl_data;
   5572 	if (rxr == NULL)
   5573 		return 0;
   5574 	val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDT(rxr->me));
   5575 	node.sysctl_data = &val;
   5576 	return sysctl_lookup(SYSCTLFN_CALL(&node));
   5577 }
   5578 
   5579 static int
   5580 ixgbe_sysctl_interrupt_rate_handler(SYSCTLFN_ARGS)
   5581 {
   5582 	int error;
   5583 	struct sysctlnode node;
   5584 	struct ix_queue *que;
   5585 	uint32_t reg, usec, rate;
   5586 
   5587 	node = *rnode;
   5588 	que = (struct ix_queue *)node.sysctl_data;
   5589 	if (que == NULL)
   5590 		return 0;
   5591 	reg = IXGBE_READ_REG(&que->adapter->hw, IXGBE_EITR(que->msix));
   5592 	usec = ((reg & 0x0FF8) >> 3);
   5593 	if (usec > 0)
   5594 		rate = 500000 / usec;
   5595 	else
   5596 		rate = 0;
   5597 	node.sysctl_data = &rate;
   5598 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   5599 	if (error)
   5600 		return error;
   5601 	reg &= ~0xfff; /* default, no limitation */
   5602 	ixgbe_max_interrupt_rate = 0;
   5603 	if (rate > 0 && rate < 500000) {
   5604 		if (rate < 1000)
   5605 			rate = 1000;
   5606 		ixgbe_max_interrupt_rate = rate;
   5607 		reg |= ((4000000/rate) & 0xff8 );
   5608 	}
   5609 	IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg);
   5610 	return 0;
   5611 }
   5612 
   5613 const struct sysctlnode *
   5614 ixgbe_sysctl_instance(struct adapter *adapter)
   5615 {
   5616 	const char *dvname;
   5617 	struct sysctllog **log;
   5618 	int rc;
   5619 	const struct sysctlnode *rnode;
   5620 
   5621 	log = &adapter->sysctllog;
   5622 	dvname = device_xname(adapter->dev);
   5623 
   5624 	if ((rc = sysctl_createv(log, 0, NULL, &rnode,
   5625 	    0, CTLTYPE_NODE, dvname,
   5626 	    SYSCTL_DESCR("ixgbe information and settings"),
   5627 	    NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL)) != 0)
   5628 		goto err;
   5629 
   5630 	return rnode;
   5631 err:
   5632 	printf("%s: sysctl_createv failed, rc = %d\n", __func__, rc);
   5633 	return NULL;
   5634 }
   5635 
   5636 /*
   5637  * Add sysctl variables, one per statistic, to the system.
   5638  */
   5639 static void
   5640 ixgbe_add_hw_stats(struct adapter *adapter)
   5641 {
   5642 	device_t dev = adapter->dev;
   5643 	const struct sysctlnode *rnode, *cnode;
   5644 	struct sysctllog **log = &adapter->sysctllog;
   5645 	struct tx_ring *txr = adapter->tx_rings;
   5646 	struct rx_ring *rxr = adapter->rx_rings;
   5647 	struct ixgbe_hw_stats *stats = &adapter->stats;
   5648 
   5649 	/* Driver Statistics */
   5650 #if 0
   5651 	/* These counters are not updated by the software */
   5652 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
   5653 			CTLFLAG_RD, &adapter->dropped_pkts,
   5654 			"Driver dropped packets");
   5655 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_header_failed",
   5656 			CTLFLAG_RD, &adapter->mbuf_header_failed,
   5657 			"???");
   5658 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_packet_failed",
   5659 			CTLFLAG_RD, &adapter->mbuf_packet_failed,
   5660 			"???");
   5661 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "no_tx_map_avail",
   5662 			CTLFLAG_RD, &adapter->no_tx_map_avail,
   5663 			"???");
   5664 #endif
   5665 	evcnt_attach_dynamic(&adapter->handleq, EVCNT_TYPE_MISC,
   5666 	    NULL, device_xname(dev), "Handled queue in softint");
   5667 	evcnt_attach_dynamic(&adapter->req, EVCNT_TYPE_MISC,
   5668 	    NULL, device_xname(dev), "Requeued in softint");
   5669 	evcnt_attach_dynamic(&adapter->morerx, EVCNT_TYPE_MISC,
   5670 	    NULL, device_xname(dev), "Interrupt handler more rx");
   5671 	evcnt_attach_dynamic(&adapter->moretx, EVCNT_TYPE_MISC,
   5672 	    NULL, device_xname(dev), "Interrupt handler more tx");
   5673 	evcnt_attach_dynamic(&adapter->txloops, EVCNT_TYPE_MISC,
   5674 	    NULL, device_xname(dev), "Interrupt handler tx loops");
   5675 	evcnt_attach_dynamic(&adapter->efbig_tx_dma_setup, EVCNT_TYPE_MISC,
   5676 	    NULL, device_xname(dev), "Driver tx dma soft fail EFBIG");
   5677 	evcnt_attach_dynamic(&adapter->m_defrag_failed, EVCNT_TYPE_MISC,
   5678 	    NULL, device_xname(dev), "m_defrag() failed");
   5679 	evcnt_attach_dynamic(&adapter->efbig2_tx_dma_setup, EVCNT_TYPE_MISC,
   5680 	    NULL, device_xname(dev), "Driver tx dma hard fail EFBIG");
   5681 	evcnt_attach_dynamic(&adapter->einval_tx_dma_setup, EVCNT_TYPE_MISC,
   5682 	    NULL, device_xname(dev), "Driver tx dma hard fail EINVAL");
   5683 	evcnt_attach_dynamic(&adapter->other_tx_dma_setup, EVCNT_TYPE_MISC,
   5684 	    NULL, device_xname(dev), "Driver tx dma hard fail other");
   5685 	evcnt_attach_dynamic(&adapter->eagain_tx_dma_setup, EVCNT_TYPE_MISC,
   5686 	    NULL, device_xname(dev), "Driver tx dma soft fail EAGAIN");
   5687 	evcnt_attach_dynamic(&adapter->enomem_tx_dma_setup, EVCNT_TYPE_MISC,
   5688 	    NULL, device_xname(dev), "Driver tx dma soft fail ENOMEM");
   5689 	evcnt_attach_dynamic(&adapter->watchdog_events, EVCNT_TYPE_MISC,
   5690 	    NULL, device_xname(dev), "Watchdog timeouts");
   5691 	evcnt_attach_dynamic(&adapter->tso_err, EVCNT_TYPE_MISC,
   5692 	    NULL, device_xname(dev), "TSO errors");
   5693 	evcnt_attach_dynamic(&adapter->link_irq, EVCNT_TYPE_MISC,
   5694 	    NULL, device_xname(dev), "Link MSIX IRQ Handled");
   5695 
   5696 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
   5697 		snprintf(adapter->queues[i].evnamebuf,
   5698 		    sizeof(adapter->queues[i].evnamebuf), "%s queue%d",
   5699 		    device_xname(dev), i);
   5700 		snprintf(adapter->queues[i].namebuf,
   5701 		    sizeof(adapter->queues[i].namebuf), "queue%d", i);
   5702 
   5703 		if ((rnode = ixgbe_sysctl_instance(adapter)) == NULL) {
   5704 			aprint_error_dev(dev, "could not create sysctl root\n");
   5705 			break;
   5706 		}
   5707 
   5708 		if (sysctl_createv(log, 0, &rnode, &rnode,
   5709 		    0, CTLTYPE_NODE,
   5710 		    adapter->queues[i].namebuf, SYSCTL_DESCR("Queue Name"),
   5711 		    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL) != 0)
   5712 			break;
   5713 
   5714 		if (sysctl_createv(log, 0, &rnode, &cnode,
   5715 		    CTLFLAG_READWRITE, CTLTYPE_INT,
   5716 		    "interrupt_rate", SYSCTL_DESCR("Interrupt Rate"),
   5717 		    ixgbe_sysctl_interrupt_rate_handler, 0,
   5718 		    (void *)&adapter->queues[i], 0, CTL_CREATE, CTL_EOL) != 0)
   5719 			break;
   5720 
   5721 		if (sysctl_createv(log, 0, &rnode, &cnode,
   5722 		    CTLFLAG_READONLY, CTLTYPE_QUAD,
   5723 		    "irqs", SYSCTL_DESCR("irqs on this queue"),
   5724 			NULL, 0, &(adapter->queues[i].irqs),
   5725 		    0, CTL_CREATE, CTL_EOL) != 0)
   5726 			break;
   5727 
   5728 		if (sysctl_createv(log, 0, &rnode, &cnode,
   5729 		    CTLFLAG_READONLY, CTLTYPE_INT,
   5730 		    "txd_head", SYSCTL_DESCR("Transmit Descriptor Head"),
   5731 		    ixgbe_sysctl_tdh_handler, 0, (void *)txr,
   5732 		    0, CTL_CREATE, CTL_EOL) != 0)
   5733 			break;
   5734 
   5735 		if (sysctl_createv(log, 0, &rnode, &cnode,
   5736 		    CTLFLAG_READONLY, CTLTYPE_INT,
   5737 		    "txd_tail", SYSCTL_DESCR("Transmit Descriptor Tail"),
   5738 		    ixgbe_sysctl_tdt_handler, 0, (void *)txr,
   5739 		    0, CTL_CREATE, CTL_EOL) != 0)
   5740 			break;
   5741 
   5742 		evcnt_attach_dynamic(&txr->tso_tx, EVCNT_TYPE_MISC,
   5743 		    NULL, device_xname(dev), "TSO");
   5744 		evcnt_attach_dynamic(&txr->no_desc_avail, EVCNT_TYPE_MISC,
   5745 		    NULL, adapter->queues[i].evnamebuf,
   5746 		    "Queue No Descriptor Available");
   5747 		evcnt_attach_dynamic(&txr->total_packets, EVCNT_TYPE_MISC,
   5748 		    NULL, adapter->queues[i].evnamebuf,
   5749 		    "Queue Packets Transmitted");
   5750 
   5751 #ifdef LRO
   5752 		struct lro_ctrl *lro = &rxr->lro;
   5753 #endif /* LRO */
   5754 
   5755 		if (sysctl_createv(log, 0, &rnode, &cnode,
   5756 		    CTLFLAG_READONLY,
   5757 		    CTLTYPE_INT,
   5758 		    "rxd_head", SYSCTL_DESCR("Receive Descriptor Head"),
   5759 		    ixgbe_sysctl_rdh_handler, 0, (void *)rxr, 0,
   5760 		    CTL_CREATE, CTL_EOL) != 0)
   5761 			break;
   5762 
   5763 		if (sysctl_createv(log, 0, &rnode, &cnode,
   5764 		    CTLFLAG_READONLY,
   5765 		    CTLTYPE_INT,
   5766 		    "rxd_tail", SYSCTL_DESCR("Receive Descriptor Tail"),
   5767 		    ixgbe_sysctl_rdt_handler, 0, (void *)rxr, 0,
   5768 		    CTL_CREATE, CTL_EOL) != 0)
   5769 			break;
   5770 
   5771 		if (i < __arraycount(adapter->stats.mpc)) {
   5772 			evcnt_attach_dynamic(&adapter->stats.mpc[i],
   5773 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   5774 			    "Missed Packet Count");
   5775 		}
   5776 		if (i < __arraycount(adapter->stats.pxontxc)) {
   5777 			evcnt_attach_dynamic(&adapter->stats.pxontxc[i],
   5778 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   5779 			    "pxontxc");
   5780 			evcnt_attach_dynamic(&adapter->stats.pxonrxc[i],
   5781 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   5782 			    "pxonrxc");
   5783 			evcnt_attach_dynamic(&adapter->stats.pxofftxc[i],
   5784 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   5785 			    "pxofftxc");
   5786 			evcnt_attach_dynamic(&adapter->stats.pxoffrxc[i],
   5787 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   5788 			    "pxoffrxc");
   5789 			evcnt_attach_dynamic(&adapter->stats.pxon2offc[i],
   5790 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   5791 			    "pxon2offc");
   5792 		}
   5793 		if (i < __arraycount(adapter->stats.qprc)) {
   5794 			evcnt_attach_dynamic(&adapter->stats.qprc[i],
   5795 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   5796 			    "qprc");
   5797 			evcnt_attach_dynamic(&adapter->stats.qptc[i],
   5798 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   5799 			    "qptc");
   5800 			evcnt_attach_dynamic(&adapter->stats.qbrc[i],
   5801 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   5802 			    "qbrc");
   5803 			evcnt_attach_dynamic(&adapter->stats.qbtc[i],
   5804 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   5805 			    "qbtc");
   5806 			evcnt_attach_dynamic(&adapter->stats.qprdc[i],
   5807 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   5808 			    "qprdc");
   5809 		}
   5810 
   5811 		evcnt_attach_dynamic(&rxr->rx_packets, EVCNT_TYPE_MISC,
   5812 		    NULL, adapter->queues[i].evnamebuf, "Queue Packets Received");
   5813 		evcnt_attach_dynamic(&rxr->rx_bytes, EVCNT_TYPE_MISC,
   5814 		    NULL, adapter->queues[i].evnamebuf, "Queue Bytes Received");
   5815 		evcnt_attach_dynamic(&rxr->rx_copies, EVCNT_TYPE_MISC,
   5816 		    NULL, adapter->queues[i].evnamebuf, "Copied RX Frames");
   5817 		evcnt_attach_dynamic(&rxr->no_jmbuf, EVCNT_TYPE_MISC,
   5818 		    NULL, adapter->queues[i].evnamebuf, "Rx no jumbo mbuf");
   5819 		evcnt_attach_dynamic(&rxr->rx_discarded, EVCNT_TYPE_MISC,
   5820 		    NULL, adapter->queues[i].evnamebuf, "Rx discarded");
   5821 		evcnt_attach_dynamic(&rxr->rx_irq, EVCNT_TYPE_MISC,
   5822 		    NULL, adapter->queues[i].evnamebuf, "Rx interrupts");
   5823 #ifdef LRO
   5824 		SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_queued",
   5825 				CTLFLAG_RD, &lro->lro_queued, 0,
   5826 				"LRO Queued");
   5827 		SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_flushed",
   5828 				CTLFLAG_RD, &lro->lro_flushed, 0,
   5829 				"LRO Flushed");
   5830 #endif /* LRO */
   5831 	}
   5832 
   5833 	/* MAC stats get the own sub node */
   5834 
   5835 
   5836 	snprintf(stats->namebuf,
   5837 	    sizeof(stats->namebuf), "%s MAC Statistics", device_xname(dev));
   5838 
   5839 	evcnt_attach_dynamic(&stats->ipcs, EVCNT_TYPE_MISC, NULL,
   5840 	    stats->namebuf, "rx csum offload - IP");
   5841 	evcnt_attach_dynamic(&stats->l4cs, EVCNT_TYPE_MISC, NULL,
   5842 	    stats->namebuf, "rx csum offload - L4");
   5843 	evcnt_attach_dynamic(&stats->ipcs_bad, EVCNT_TYPE_MISC, NULL,
   5844 	    stats->namebuf, "rx csum offload - IP bad");
   5845 	evcnt_attach_dynamic(&stats->l4cs_bad, EVCNT_TYPE_MISC, NULL,
   5846 	    stats->namebuf, "rx csum offload - L4 bad");
   5847 	evcnt_attach_dynamic(&stats->intzero, EVCNT_TYPE_MISC, NULL,
   5848 	    stats->namebuf, "Interrupt conditions zero");
   5849 	evcnt_attach_dynamic(&stats->legint, EVCNT_TYPE_MISC, NULL,
   5850 	    stats->namebuf, "Legacy interrupts");
   5851 	evcnt_attach_dynamic(&stats->crcerrs, EVCNT_TYPE_MISC, NULL,
   5852 	    stats->namebuf, "CRC Errors");
   5853 	evcnt_attach_dynamic(&stats->illerrc, EVCNT_TYPE_MISC, NULL,
   5854 	    stats->namebuf, "Illegal Byte Errors");
   5855 	evcnt_attach_dynamic(&stats->errbc, EVCNT_TYPE_MISC, NULL,
   5856 	    stats->namebuf, "Byte Errors");
   5857 	evcnt_attach_dynamic(&stats->mspdc, EVCNT_TYPE_MISC, NULL,
   5858 	    stats->namebuf, "MAC Short Packets Discarded");
   5859 	evcnt_attach_dynamic(&stats->mlfc, EVCNT_TYPE_MISC, NULL,
   5860 	    stats->namebuf, "MAC Local Faults");
   5861 	evcnt_attach_dynamic(&stats->mrfc, EVCNT_TYPE_MISC, NULL,
   5862 	    stats->namebuf, "MAC Remote Faults");
   5863 	evcnt_attach_dynamic(&stats->rlec, EVCNT_TYPE_MISC, NULL,
   5864 	    stats->namebuf, "Receive Length Errors");
   5865 	evcnt_attach_dynamic(&stats->lxontxc, EVCNT_TYPE_MISC, NULL,
   5866 	    stats->namebuf, "Link XON Transmitted");
   5867 	evcnt_attach_dynamic(&stats->lxonrxc, EVCNT_TYPE_MISC, NULL,
   5868 	    stats->namebuf, "Link XON Received");
   5869 	evcnt_attach_dynamic(&stats->lxofftxc, EVCNT_TYPE_MISC, NULL,
   5870 	    stats->namebuf, "Link XOFF Transmitted");
   5871 	evcnt_attach_dynamic(&stats->lxoffrxc, EVCNT_TYPE_MISC, NULL,
   5872 	    stats->namebuf, "Link XOFF Received");
   5873 
   5874 	/* Packet Reception Stats */
   5875 	evcnt_attach_dynamic(&stats->tor, EVCNT_TYPE_MISC, NULL,
   5876 	    stats->namebuf, "Total Octets Received");
   5877 	evcnt_attach_dynamic(&stats->gorc, EVCNT_TYPE_MISC, NULL,
   5878 	    stats->namebuf, "Good Octets Received");
   5879 	evcnt_attach_dynamic(&stats->tpr, EVCNT_TYPE_MISC, NULL,
   5880 	    stats->namebuf, "Total Packets Received");
   5881 	evcnt_attach_dynamic(&stats->gprc, EVCNT_TYPE_MISC, NULL,
   5882 	    stats->namebuf, "Good Packets Received");
   5883 	evcnt_attach_dynamic(&stats->mprc, EVCNT_TYPE_MISC, NULL,
   5884 	    stats->namebuf, "Multicast Packets Received");
   5885 	evcnt_attach_dynamic(&stats->bprc, EVCNT_TYPE_MISC, NULL,
   5886 	    stats->namebuf, "Broadcast Packets Received");
   5887 	evcnt_attach_dynamic(&stats->prc64, EVCNT_TYPE_MISC, NULL,
   5888 	    stats->namebuf, "64 byte frames received ");
   5889 	evcnt_attach_dynamic(&stats->prc127, EVCNT_TYPE_MISC, NULL,
   5890 	    stats->namebuf, "65-127 byte frames received");
   5891 	evcnt_attach_dynamic(&stats->prc255, EVCNT_TYPE_MISC, NULL,
   5892 	    stats->namebuf, "128-255 byte frames received");
   5893 	evcnt_attach_dynamic(&stats->prc511, EVCNT_TYPE_MISC, NULL,
   5894 	    stats->namebuf, "256-511 byte frames received");
   5895 	evcnt_attach_dynamic(&stats->prc1023, EVCNT_TYPE_MISC, NULL,
   5896 	    stats->namebuf, "512-1023 byte frames received");
   5897 	evcnt_attach_dynamic(&stats->prc1522, EVCNT_TYPE_MISC, NULL,
   5898 	    stats->namebuf, "1023-1522 byte frames received");
   5899 	evcnt_attach_dynamic(&stats->ruc, EVCNT_TYPE_MISC, NULL,
   5900 	    stats->namebuf, "Receive Undersized");
   5901 	evcnt_attach_dynamic(&stats->rfc, EVCNT_TYPE_MISC, NULL,
   5902 	    stats->namebuf, "Fragmented Packets Received ");
   5903 	evcnt_attach_dynamic(&stats->roc, EVCNT_TYPE_MISC, NULL,
   5904 	    stats->namebuf, "Oversized Packets Received");
   5905 	evcnt_attach_dynamic(&stats->rjc, EVCNT_TYPE_MISC, NULL,
   5906 	    stats->namebuf, "Received Jabber");
   5907 	evcnt_attach_dynamic(&stats->mngprc, EVCNT_TYPE_MISC, NULL,
   5908 	    stats->namebuf, "Management Packets Received");
   5909 	evcnt_attach_dynamic(&stats->xec, EVCNT_TYPE_MISC, NULL,
   5910 	    stats->namebuf, "Checksum Errors");
   5911 
   5912 	/* Packet Transmission Stats */
   5913 	evcnt_attach_dynamic(&stats->gotc, EVCNT_TYPE_MISC, NULL,
   5914 	    stats->namebuf, "Good Octets Transmitted");
   5915 	evcnt_attach_dynamic(&stats->tpt, EVCNT_TYPE_MISC, NULL,
   5916 	    stats->namebuf, "Total Packets Transmitted");
   5917 	evcnt_attach_dynamic(&stats->gptc, EVCNT_TYPE_MISC, NULL,
   5918 	    stats->namebuf, "Good Packets Transmitted");
   5919 	evcnt_attach_dynamic(&stats->bptc, EVCNT_TYPE_MISC, NULL,
   5920 	    stats->namebuf, "Broadcast Packets Transmitted");
   5921 	evcnt_attach_dynamic(&stats->mptc, EVCNT_TYPE_MISC, NULL,
   5922 	    stats->namebuf, "Multicast Packets Transmitted");
   5923 	evcnt_attach_dynamic(&stats->mngptc, EVCNT_TYPE_MISC, NULL,
   5924 	    stats->namebuf, "Management Packets Transmitted");
   5925 	evcnt_attach_dynamic(&stats->ptc64, EVCNT_TYPE_MISC, NULL,
   5926 	    stats->namebuf, "64 byte frames transmitted ");
   5927 	evcnt_attach_dynamic(&stats->ptc127, EVCNT_TYPE_MISC, NULL,
   5928 	    stats->namebuf, "65-127 byte frames transmitted");
   5929 	evcnt_attach_dynamic(&stats->ptc255, EVCNT_TYPE_MISC, NULL,
   5930 	    stats->namebuf, "128-255 byte frames transmitted");
   5931 	evcnt_attach_dynamic(&stats->ptc511, EVCNT_TYPE_MISC, NULL,
   5932 	    stats->namebuf, "256-511 byte frames transmitted");
   5933 	evcnt_attach_dynamic(&stats->ptc1023, EVCNT_TYPE_MISC, NULL,
   5934 	    stats->namebuf, "512-1023 byte frames transmitted");
   5935 	evcnt_attach_dynamic(&stats->ptc1522, EVCNT_TYPE_MISC, NULL,
   5936 	    stats->namebuf, "1024-1522 byte frames transmitted");
   5937 }
   5938 
   5939 /*
   5940 ** Set flow control using sysctl:
   5941 ** Flow control values:
   5942 ** 	0 - off
   5943 **	1 - rx pause
   5944 **	2 - tx pause
   5945 **	3 - full
   5946 */
   5947 static int
   5948 ixgbe_set_flowcntl(SYSCTLFN_ARGS)
   5949 {
   5950 	struct sysctlnode node;
   5951 	int error, last;
   5952 	struct adapter *adapter;
   5953 
   5954 	node = *rnode;
   5955 	adapter = (struct adapter *)node.sysctl_data;
   5956 	node.sysctl_data = &adapter->fc;
   5957 	last = adapter->fc;
   5958 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   5959 	if (error != 0 || newp == NULL)
   5960 		return error;
   5961 
   5962 	/* Don't bother if it's not changed */
   5963 	if (adapter->fc == last)
   5964 		return (0);
   5965 
   5966 	switch (adapter->fc) {
   5967 		case ixgbe_fc_rx_pause:
   5968 		case ixgbe_fc_tx_pause:
   5969 		case ixgbe_fc_full:
   5970 			adapter->hw.fc.requested_mode = adapter->fc;
   5971 			if (adapter->num_queues > 1)
   5972 				ixgbe_disable_rx_drop(adapter);
   5973 			break;
   5974 		case ixgbe_fc_none:
   5975 			adapter->hw.fc.requested_mode = ixgbe_fc_none;
   5976 			if (adapter->num_queues > 1)
   5977 				ixgbe_enable_rx_drop(adapter);
   5978 			break;
   5979 		default:
   5980 			adapter->fc = last;
   5981 			return (EINVAL);
   5982 	}
   5983 	/* Don't autoneg if forcing a value */
   5984 	adapter->hw.fc.disable_fc_autoneg = TRUE;
   5985 	ixgbe_fc_enable(&adapter->hw);
   5986 	return 0;
   5987 }
   5988 
   5989 /*
   5990 ** Control link advertise speed:
   5991 **	1 - advertise only 1G
   5992 **	2 - advertise 100Mb
   5993 **	3 - advertise normal
   5994 */
   5995 static int
   5996 ixgbe_set_advertise(SYSCTLFN_ARGS)
   5997 {
   5998 	struct sysctlnode	node;
   5999 	int			t, error = 0;
   6000 	struct adapter		*adapter;
   6001 	device_t		dev;
   6002 	struct ixgbe_hw		*hw;
   6003 	ixgbe_link_speed	speed, last;
   6004 
   6005 	node = *rnode;
   6006 	adapter = (struct adapter *)node.sysctl_data;
   6007 	dev = adapter->dev;
   6008 	hw = &adapter->hw;
   6009 	last = adapter->advertise;
   6010 	t = adapter->advertise;
   6011 	node.sysctl_data = &t;
   6012 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   6013 	if (error != 0 || newp == NULL)
   6014 		return error;
   6015 
   6016 	if (adapter->advertise == last) /* no change */
   6017 		return (0);
   6018 
   6019 	if (t == -1)
   6020 		return 0;
   6021 
   6022 	adapter->advertise = t;
   6023 
   6024 	if (!((hw->phy.media_type == ixgbe_media_type_copper) ||
   6025             (hw->phy.multispeed_fiber)))
   6026 		return (EINVAL);
   6027 
   6028 	if ((adapter->advertise == 2) && (hw->mac.type != ixgbe_mac_X540)) {
   6029 		device_printf(dev, "Set Advertise: 100Mb on X540 only\n");
   6030 		return (EINVAL);
   6031 	}
   6032 
   6033 	if (adapter->advertise == 1)
   6034                 speed = IXGBE_LINK_SPEED_1GB_FULL;
   6035 	else if (adapter->advertise == 2)
   6036                 speed = IXGBE_LINK_SPEED_100_FULL;
   6037 	else if (adapter->advertise == 3)
   6038                 speed = IXGBE_LINK_SPEED_1GB_FULL |
   6039 			IXGBE_LINK_SPEED_10GB_FULL;
   6040 	else {/* bogus value */
   6041 		adapter->advertise = last;
   6042 		return (EINVAL);
   6043 	}
   6044 
   6045 	hw->mac.autotry_restart = TRUE;
   6046 	hw->mac.ops.setup_link(hw, speed, TRUE);
   6047 
   6048 	return 0;
   6049 }
   6050 
   6051 /*
   6052 ** Thermal Shutdown Trigger
   6053 **   - cause a Thermal Overtemp IRQ
   6054 */
   6055 static int
   6056 ixgbe_set_thermal_test(SYSCTLFN_ARGS)
   6057 {
   6058 	struct sysctlnode node;
   6059 	int		error, fire = 0;
   6060 	struct adapter	*adapter;
   6061 	struct ixgbe_hw *hw;
   6062 
   6063 	node = *rnode;
   6064 	adapter = (struct adapter *)node.sysctl_data;
   6065 	hw = &adapter->hw;
   6066 
   6067 	if (hw->mac.type != ixgbe_mac_X540)
   6068 		return (0);
   6069 
   6070 	node.sysctl_data = &fire;
   6071 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   6072 	if ((error) || (newp == NULL))
   6073 		return (error);
   6074 
   6075 	if (fire) {
   6076 		u32 reg = IXGBE_READ_REG(hw, IXGBE_EICS);
   6077 		reg |= IXGBE_EICR_TS;
   6078 		IXGBE_WRITE_REG(hw, IXGBE_EICS, reg);
   6079 	}
   6080 
   6081 	return (0);
   6082 }
   6083 
   6084 /*
   6085 ** Enable the hardware to drop packets when the buffer is
   6086 ** full. This is useful when multiqueue,so that no single
   6087 ** queue being full stalls the entire RX engine. We only
   6088 ** enable this when Multiqueue AND when Flow Control is
   6089 ** disabled.
   6090 */
   6091 static void
   6092 ixgbe_enable_rx_drop(struct adapter *adapter)
   6093 {
   6094         struct ixgbe_hw *hw = &adapter->hw;
   6095 
   6096 	for (int i = 0; i < adapter->num_queues; i++) {
   6097         	u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
   6098         	srrctl |= IXGBE_SRRCTL_DROP_EN;
   6099         	IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
   6100 	}
   6101 }
   6102 
   6103 static void
   6104 ixgbe_disable_rx_drop(struct adapter *adapter)
   6105 {
   6106         struct ixgbe_hw *hw = &adapter->hw;
   6107 
   6108 	for (int i = 0; i < adapter->num_queues; i++) {
   6109         	u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
   6110         	srrctl &= ~IXGBE_SRRCTL_DROP_EN;
   6111         	IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
   6112 	}
   6113 }
   6114