Home | History | Annotate | Line # | Download | only in ixgbe
ixgbe.c revision 1.29
      1 /******************************************************************************
      2 
      3   Copyright (c) 2001-2013, Intel Corporation
      4   All rights reserved.
      5 
      6   Redistribution and use in source and binary forms, with or without
      7   modification, are permitted provided that the following conditions are met:
      8 
      9    1. Redistributions of source code must retain the above copyright notice,
     10       this list of conditions and the following disclaimer.
     11 
     12    2. Redistributions in binary form must reproduce the above copyright
     13       notice, this list of conditions and the following disclaimer in the
     14       documentation and/or other materials provided with the distribution.
     15 
     16    3. Neither the name of the Intel Corporation nor the names of its
     17       contributors may be used to endorse or promote products derived from
     18       this software without specific prior written permission.
     19 
     20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     30   POSSIBILITY OF SUCH DAMAGE.
     31 
     32 ******************************************************************************/
     33 /*
     34  * Copyright (c) 2011 The NetBSD Foundation, Inc.
     35  * All rights reserved.
     36  *
     37  * This code is derived from software contributed to The NetBSD Foundation
     38  * by Coyote Point Systems, Inc.
     39  *
     40  * Redistribution and use in source and binary forms, with or without
     41  * modification, are permitted provided that the following conditions
     42  * are met:
     43  * 1. Redistributions of source code must retain the above copyright
     44  *    notice, this list of conditions and the following disclaimer.
     45  * 2. Redistributions in binary form must reproduce the above copyright
     46  *    notice, this list of conditions and the following disclaimer in the
     47  *    documentation and/or other materials provided with the distribution.
     48  *
     49  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     50  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     51  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     52  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     53  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     54  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     55  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     56  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     57  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     58  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     59  * POSSIBILITY OF SUCH DAMAGE.
     60  */
     61 /*$FreeBSD: head/sys/dev/ixgbe/ixgbe.c 250108 2013-04-30 16:18:29Z luigi $*/
     62 /*$NetBSD: ixgbe.c,v 1.29 2015/05/06 09:21:22 msaitoh Exp $*/
     63 
     64 #include "opt_inet.h"
     65 #include "opt_inet6.h"
     66 
     67 #include "ixgbe.h"
     68 #include "vlan.h"
     69 
     70 /*********************************************************************
     71  *  Set this to one to display debug statistics
     72  *********************************************************************/
     73 int             ixgbe_display_debug_stats = 0;
     74 
     75 /*********************************************************************
     76  *  Driver version
     77  *********************************************************************/
     78 char ixgbe_driver_version[] = "2.5.8 - HEAD";
     79 
     80 /*********************************************************************
     81  *  PCI Device ID Table
     82  *
     83  *  Used by probe to select devices to load on
     84  *  Last field stores an index into ixgbe_strings
     85  *  Last entry must be all 0s
     86  *
     87  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
     88  *********************************************************************/
     89 
     90 static ixgbe_vendor_info_t ixgbe_vendor_info_array[] =
     91 {
     92 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, 0, 0, 0},
     93 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, 0, 0, 0},
     94 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, 0, 0, 0},
     95 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, 0, 0, 0},
     96 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2, 0, 0, 0},
     97 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, 0, 0, 0},
     98 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, 0, 0, 0},
     99 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, 0, 0, 0},
    100 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, 0, 0, 0},
    101 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, 0, 0, 0},
    102 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, 0, 0, 0},
    103 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, 0, 0, 0},
    104 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ, 0, 0, 0},
    105 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, 0, 0, 0},
    106 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, 0, 0, 0},
    107 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, 0, 0, 0},
    108 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, 0, 0, 0},
    109 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, 0, 0, 0},
    110 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE, 0, 0, 0},
    111 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2, 0, 0, 0},
    112 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, 0, 0, 0},
    113 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, 0, 0, 0},
    114 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF_QP, 0, 0, 0},
    115 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, 0, 0, 0},
    116 	/* required last entry */
    117 	{0, 0, 0, 0, 0}
    118 };
    119 
    120 /*********************************************************************
    121  *  Table of branding strings
    122  *********************************************************************/
    123 
    124 static const char    *ixgbe_strings[] = {
    125 	"Intel(R) PRO/10GbE PCI-Express Network Driver"
    126 };
    127 
    128 /*********************************************************************
    129  *  Function prototypes
    130  *********************************************************************/
    131 static int      ixgbe_probe(device_t, cfdata_t, void *);
    132 static void     ixgbe_attach(device_t, device_t, void *);
    133 static int      ixgbe_detach(device_t, int);
    134 #if 0
    135 static int      ixgbe_shutdown(device_t);
    136 #endif
    137 #if IXGBE_LEGACY_TX
    138 static void     ixgbe_start(struct ifnet *);
    139 static void     ixgbe_start_locked(struct tx_ring *, struct ifnet *);
    140 #else
    141 static int	ixgbe_mq_start(struct ifnet *, struct mbuf *);
    142 static int	ixgbe_mq_start_locked(struct ifnet *,
    143                     struct tx_ring *, struct mbuf *);
    144 static void	ixgbe_qflush(struct ifnet *);
    145 static void	ixgbe_deferred_mq_start(void *);
    146 #endif
    147 static int      ixgbe_ioctl(struct ifnet *, u_long, void *);
    148 static void	ixgbe_ifstop(struct ifnet *, int);
    149 static int	ixgbe_init(struct ifnet *);
    150 static void	ixgbe_init_locked(struct adapter *);
    151 static void     ixgbe_stop(void *);
    152 static void     ixgbe_media_status(struct ifnet *, struct ifmediareq *);
    153 static int      ixgbe_media_change(struct ifnet *);
    154 static void     ixgbe_identify_hardware(struct adapter *);
    155 static int      ixgbe_allocate_pci_resources(struct adapter *,
    156 		    const struct pci_attach_args *);
    157 static int      ixgbe_allocate_msix(struct adapter *,
    158 		    const struct pci_attach_args *);
    159 static int      ixgbe_allocate_legacy(struct adapter *,
    160 		    const struct pci_attach_args *);
    161 static int	ixgbe_allocate_queues(struct adapter *);
    162 static int	ixgbe_setup_msix(struct adapter *);
    163 static void	ixgbe_free_pci_resources(struct adapter *);
    164 static void	ixgbe_local_timer(void *);
    165 static int	ixgbe_setup_interface(device_t, struct adapter *);
    166 static void	ixgbe_config_link(struct adapter *);
    167 
    168 static int      ixgbe_allocate_transmit_buffers(struct tx_ring *);
    169 static int	ixgbe_setup_transmit_structures(struct adapter *);
    170 static void	ixgbe_setup_transmit_ring(struct tx_ring *);
    171 static void     ixgbe_initialize_transmit_units(struct adapter *);
    172 static void     ixgbe_free_transmit_structures(struct adapter *);
    173 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
    174 
    175 static int      ixgbe_allocate_receive_buffers(struct rx_ring *);
    176 static int      ixgbe_setup_receive_structures(struct adapter *);
    177 static int	ixgbe_setup_receive_ring(struct rx_ring *);
    178 static void     ixgbe_initialize_receive_units(struct adapter *);
    179 static void     ixgbe_free_receive_structures(struct adapter *);
    180 static void     ixgbe_free_receive_buffers(struct rx_ring *);
    181 static void	ixgbe_setup_hw_rsc(struct rx_ring *);
    182 
    183 static void     ixgbe_enable_intr(struct adapter *);
    184 static void     ixgbe_disable_intr(struct adapter *);
    185 static void     ixgbe_update_stats_counters(struct adapter *);
    186 static bool	ixgbe_txeof(struct tx_ring *);
    187 static bool	ixgbe_rxeof(struct ix_queue *);
    188 static void	ixgbe_rx_checksum(u32, struct mbuf *, u32,
    189 		    struct ixgbe_hw_stats *);
    190 static void     ixgbe_set_promisc(struct adapter *);
    191 static void     ixgbe_set_multi(struct adapter *);
    192 static void     ixgbe_update_link_status(struct adapter *);
    193 static void	ixgbe_refresh_mbufs(struct rx_ring *, int);
    194 static int      ixgbe_xmit(struct tx_ring *, struct mbuf *);
    195 static int	ixgbe_set_flowcntl(SYSCTLFN_PROTO);
    196 static int	ixgbe_set_advertise(SYSCTLFN_PROTO);
    197 static int	ixgbe_set_thermal_test(SYSCTLFN_PROTO);
    198 static int	ixgbe_dma_malloc(struct adapter *, bus_size_t,
    199 		    struct ixgbe_dma_alloc *, int);
    200 static void     ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
    201 static int	ixgbe_tx_ctx_setup(struct tx_ring *,
    202 		    struct mbuf *, u32 *, u32 *);
    203 static int	ixgbe_tso_setup(struct tx_ring *,
    204 		    struct mbuf *, u32 *, u32 *);
    205 static void	ixgbe_set_ivar(struct adapter *, u8, u8, s8);
    206 static void	ixgbe_configure_ivars(struct adapter *);
    207 static u8 *	ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *);
    208 
    209 static void	ixgbe_setup_vlan_hw_support(struct adapter *);
    210 #if 0
    211 static void	ixgbe_register_vlan(void *, struct ifnet *, u16);
    212 static void	ixgbe_unregister_vlan(void *, struct ifnet *, u16);
    213 #endif
    214 
    215 static void     ixgbe_add_hw_stats(struct adapter *adapter);
    216 
    217 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
    218 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
    219 		    struct mbuf *, u32);
    220 
    221 static void	ixgbe_enable_rx_drop(struct adapter *);
    222 static void	ixgbe_disable_rx_drop(struct adapter *);
    223 
    224 /* Support for pluggable optic modules */
    225 static bool	ixgbe_sfp_probe(struct adapter *);
    226 static void	ixgbe_setup_optics(struct adapter *);
    227 
    228 /* Legacy (single vector interrupt handler */
    229 static int	ixgbe_legacy_irq(void *);
    230 
    231 #if defined(NETBSD_MSI_OR_MSIX)
    232 /* The MSI/X Interrupt handlers */
    233 static void	ixgbe_msix_que(void *);
    234 static void	ixgbe_msix_link(void *);
    235 #endif
    236 
    237 /* Software interrupts for deferred work */
    238 static void	ixgbe_handle_que(void *);
    239 static void	ixgbe_handle_link(void *);
    240 static void	ixgbe_handle_msf(void *);
    241 static void	ixgbe_handle_mod(void *);
    242 
    243 const struct sysctlnode *ixgbe_sysctl_instance(struct adapter *);
    244 static ixgbe_vendor_info_t *ixgbe_lookup(const struct pci_attach_args *);
    245 
    246 #ifdef IXGBE_FDIR
    247 static void	ixgbe_atr(struct tx_ring *, struct mbuf *);
    248 static void	ixgbe_reinit_fdir(void *, int);
    249 #endif
    250 
    251 /*********************************************************************
    252  *  FreeBSD Device Interface Entry Points
    253  *********************************************************************/
    254 
    255 CFATTACH_DECL3_NEW(ixg, sizeof(struct adapter),
    256     ixgbe_probe, ixgbe_attach, ixgbe_detach, NULL, NULL, NULL,
    257     DVF_DETACH_SHUTDOWN);
    258 
    259 #if 0
    260 devclass_t ixgbe_devclass;
    261 DRIVER_MODULE(ixgbe, pci, ixgbe_driver, ixgbe_devclass, 0, 0);
    262 
    263 MODULE_DEPEND(ixgbe, pci, 1, 1, 1);
    264 MODULE_DEPEND(ixgbe, ether, 1, 1, 1);
    265 #endif
    266 
    267 /*
    268 ** TUNEABLE PARAMETERS:
    269 */
    270 
    271 /*
    272 ** AIM: Adaptive Interrupt Moderation
    273 ** which means that the interrupt rate
    274 ** is varied over time based on the
    275 ** traffic for that interrupt vector
    276 */
    277 static int ixgbe_enable_aim = TRUE;
    278 #define TUNABLE_INT(__x, __y)
    279 TUNABLE_INT("hw.ixgbe.enable_aim", &ixgbe_enable_aim);
    280 
    281 static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY);
    282 TUNABLE_INT("hw.ixgbe.max_interrupt_rate", &ixgbe_max_interrupt_rate);
    283 
    284 /* How many packets rxeof tries to clean at a time */
    285 static int ixgbe_rx_process_limit = 256;
    286 TUNABLE_INT("hw.ixgbe.rx_process_limit", &ixgbe_rx_process_limit);
    287 
    288 /* How many packets txeof tries to clean at a time */
    289 static int ixgbe_tx_process_limit = 256;
    290 TUNABLE_INT("hw.ixgbe.tx_process_limit", &ixgbe_tx_process_limit);
    291 
    292 /*
    293 ** Smart speed setting, default to on
    294 ** this only works as a compile option
    295 ** right now as its during attach, set
    296 ** this to 'ixgbe_smart_speed_off' to
    297 ** disable.
    298 */
    299 static int ixgbe_smart_speed = ixgbe_smart_speed_on;
    300 
    301 /*
    302  * MSIX should be the default for best performance,
    303  * but this allows it to be forced off for testing.
    304  */
    305 static int ixgbe_enable_msix = 1;
    306 TUNABLE_INT("hw.ixgbe.enable_msix", &ixgbe_enable_msix);
    307 
    308 #if defined(NETBSD_MSI_OR_MSIX)
    309 /*
    310  * Number of Queues, can be set to 0,
    311  * it then autoconfigures based on the
    312  * number of cpus with a max of 8. This
    313  * can be overriden manually here.
    314  */
    315 static int ixgbe_num_queues = 0;
    316 TUNABLE_INT("hw.ixgbe.num_queues", &ixgbe_num_queues);
    317 #endif
    318 
    319 /*
    320 ** Number of TX descriptors per ring,
    321 ** setting higher than RX as this seems
    322 ** the better performing choice.
    323 */
    324 static int ixgbe_txd = PERFORM_TXD;
    325 TUNABLE_INT("hw.ixgbe.txd", &ixgbe_txd);
    326 
    327 /* Number of RX descriptors per ring */
    328 static int ixgbe_rxd = PERFORM_RXD;
    329 TUNABLE_INT("hw.ixgbe.rxd", &ixgbe_rxd);
    330 
    331 /*
    332 ** HW RSC control:
    333 **  this feature only works with
    334 **  IPv4, and only on 82599 and later.
    335 **  Also this will cause IP forwarding to
    336 **  fail and that can't be controlled by
    337 **  the stack as LRO can. For all these
    338 **  reasons I've deemed it best to leave
    339 **  this off and not bother with a tuneable
    340 **  interface, this would need to be compiled
    341 **  to enable.
    342 */
    343 static bool ixgbe_rsc_enable = FALSE;
    344 
    345 /* Keep running tab on them for sanity check */
    346 static int ixgbe_total_ports;
    347 
    348 #ifdef IXGBE_FDIR
    349 /*
    350 ** For Flow Director: this is the
    351 ** number of TX packets we sample
    352 ** for the filter pool, this means
    353 ** every 20th packet will be probed.
    354 **
    355 ** This feature can be disabled by
    356 ** setting this to 0.
    357 */
    358 static int atr_sample_rate = 20;
    359 /*
    360 ** Flow Director actually 'steals'
    361 ** part of the packet buffer as its
    362 ** filter pool, this variable controls
    363 ** how much it uses:
    364 **  0 = 64K, 1 = 128K, 2 = 256K
    365 */
    366 static int fdir_pballoc = 1;
    367 #endif
    368 
    369 #ifdef DEV_NETMAP
    370 /*
    371  * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
    372  * be a reference on how to implement netmap support in a driver.
    373  * Additional comments are in ixgbe_netmap.h .
    374  *
    375  * <dev/netmap/ixgbe_netmap.h> contains functions for netmap support
    376  * that extend the standard driver.
    377  */
    378 #include <dev/netmap/ixgbe_netmap.h>
    379 #endif /* DEV_NETMAP */
    380 
    381 /*********************************************************************
    382  *  Device identification routine
    383  *
    384  *  ixgbe_probe determines if the driver should be loaded on
    385  *  adapter based on PCI vendor/device id of the adapter.
    386  *
    387  *  return 1 on success, 0 on failure
    388  *********************************************************************/
    389 
    390 static int
    391 ixgbe_probe(device_t dev, cfdata_t cf, void *aux)
    392 {
    393 	const struct pci_attach_args *pa = aux;
    394 
    395 	return (ixgbe_lookup(pa) != NULL) ? 1 : 0;
    396 }
    397 
    398 static ixgbe_vendor_info_t *
    399 ixgbe_lookup(const struct pci_attach_args *pa)
    400 {
    401 	pcireg_t subid;
    402 	ixgbe_vendor_info_t *ent;
    403 
    404 	INIT_DEBUGOUT("ixgbe_probe: begin");
    405 
    406 	if (PCI_VENDOR(pa->pa_id) != IXGBE_INTEL_VENDOR_ID)
    407 		return NULL;
    408 
    409 	subid = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_SUBSYS_ID_REG);
    410 
    411 	for (ent = ixgbe_vendor_info_array; ent->vendor_id != 0; ent++) {
    412 		if (PCI_VENDOR(pa->pa_id) == ent->vendor_id &&
    413 		    PCI_PRODUCT(pa->pa_id) == ent->device_id &&
    414 
    415 		    (PCI_SUBSYS_VENDOR(subid) == ent->subvendor_id ||
    416 		     ent->subvendor_id == 0) &&
    417 
    418 		    (PCI_SUBSYS_ID(subid) == ent->subdevice_id ||
    419 		     ent->subdevice_id == 0)) {
    420 			++ixgbe_total_ports;
    421 			return ent;
    422 		}
    423 	}
    424 	return NULL;
    425 }
    426 
    427 
    428 static void
    429 ixgbe_sysctl_attach(struct adapter *adapter)
    430 {
    431 	struct sysctllog **log;
    432 	const struct sysctlnode *rnode, *cnode;
    433 	device_t dev;
    434 
    435 	dev = adapter->dev;
    436 	log = &adapter->sysctllog;
    437 
    438 	if ((rnode = ixgbe_sysctl_instance(adapter)) == NULL) {
    439 		aprint_error_dev(dev, "could not create sysctl root\n");
    440 		return;
    441 	}
    442 
    443 	if (sysctl_createv(log, 0, &rnode, &cnode,
    444 	    CTLFLAG_READONLY, CTLTYPE_INT,
    445 	    "num_rx_desc", SYSCTL_DESCR("Number of rx descriptors"),
    446 	    NULL, 0, &adapter->num_rx_desc, 0, CTL_CREATE, CTL_EOL) != 0)
    447 		aprint_error_dev(dev, "could not create sysctl\n");
    448 
    449 	if (sysctl_createv(log, 0, &rnode, &cnode,
    450 	    CTLFLAG_READONLY, CTLTYPE_INT,
    451 	    "num_queues", SYSCTL_DESCR("Number of queues"),
    452 	    NULL, 0, &adapter->num_queues, 0, CTL_CREATE, CTL_EOL) != 0)
    453 		aprint_error_dev(dev, "could not create sysctl\n");
    454 
    455 	if (sysctl_createv(log, 0, &rnode, &cnode,
    456 	    CTLFLAG_READWRITE, CTLTYPE_INT,
    457 	    "fc", SYSCTL_DESCR("Flow Control"),
    458 	    ixgbe_set_flowcntl, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0)
    459 		aprint_error_dev(dev, "could not create sysctl\n");
    460 
    461 	/* XXX This is an *instance* sysctl controlling a *global* variable.
    462 	 * XXX It's that way in the FreeBSD driver that this derives from.
    463 	 */
    464 	if (sysctl_createv(log, 0, &rnode, &cnode,
    465 	    CTLFLAG_READWRITE, CTLTYPE_INT,
    466 	    "enable_aim", SYSCTL_DESCR("Interrupt Moderation"),
    467 	    NULL, 0, &ixgbe_enable_aim, 0, CTL_CREATE, CTL_EOL) != 0)
    468 		aprint_error_dev(dev, "could not create sysctl\n");
    469 
    470 	if (sysctl_createv(log, 0, &rnode, &cnode,
    471 	    CTLFLAG_READWRITE, CTLTYPE_INT,
    472 	    "advertise_speed", SYSCTL_DESCR("Link Speed"),
    473 	    ixgbe_set_advertise, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0)
    474 		aprint_error_dev(dev, "could not create sysctl\n");
    475 
    476 	if (sysctl_createv(log, 0, &rnode, &cnode,
    477 	    CTLFLAG_READWRITE, CTLTYPE_INT,
    478 	    "ts", SYSCTL_DESCR("Thermal Test"),
    479 	    ixgbe_set_thermal_test, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0)
    480 		aprint_error_dev(dev, "could not create sysctl\n");
    481 }
    482 
    483 /*********************************************************************
    484  *  Device initialization routine
    485  *
    486  *  The attach entry point is called when the driver is being loaded.
    487  *  This routine identifies the type of hardware, allocates all resources
    488  *  and initializes the hardware.
    489  *
    490  *  return 0 on success, positive on failure
    491  *********************************************************************/
    492 
    493 static void
    494 ixgbe_attach(device_t parent, device_t dev, void *aux)
    495 {
    496 	struct adapter *adapter;
    497 	struct ixgbe_hw *hw;
    498 	int             error = 0;
    499 	u16		csum;
    500 	u32		ctrl_ext;
    501 	ixgbe_vendor_info_t *ent;
    502 	const struct pci_attach_args *pa = aux;
    503 
    504 	INIT_DEBUGOUT("ixgbe_attach: begin");
    505 
    506 	/* Allocate, clear, and link in our adapter structure */
    507 	adapter = device_private(dev);
    508 	adapter->dev = adapter->osdep.dev = dev;
    509 	hw = &adapter->hw;
    510 	adapter->osdep.pc = pa->pa_pc;
    511 	adapter->osdep.tag = pa->pa_tag;
    512 	adapter->osdep.dmat = pa->pa_dmat;
    513 
    514 	ent = ixgbe_lookup(pa);
    515 
    516 	KASSERT(ent != NULL);
    517 
    518 	aprint_normal(": %s, Version - %s\n",
    519 	    ixgbe_strings[ent->index], ixgbe_driver_version);
    520 
    521 	/* Core Lock Init*/
    522 	IXGBE_CORE_LOCK_INIT(adapter, device_xname(dev));
    523 
    524 	/* SYSCTL APIs */
    525 
    526 	ixgbe_sysctl_attach(adapter);
    527 
    528 	/* Set up the timer callout */
    529 	callout_init(&adapter->timer, 0);
    530 
    531 	/* Determine hardware revision */
    532 	ixgbe_identify_hardware(adapter);
    533 
    534 	/* Do base PCI setup - map BAR0 */
    535 	if (ixgbe_allocate_pci_resources(adapter, pa)) {
    536 		aprint_error_dev(dev, "Allocation of PCI resources failed\n");
    537 		error = ENXIO;
    538 		goto err_out;
    539 	}
    540 
    541 	/* Do descriptor calc and sanity checks */
    542 	if (((ixgbe_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 ||
    543 	    ixgbe_txd < MIN_TXD || ixgbe_txd > MAX_TXD) {
    544 		aprint_error_dev(dev, "TXD config issue, using default!\n");
    545 		adapter->num_tx_desc = DEFAULT_TXD;
    546 	} else
    547 		adapter->num_tx_desc = ixgbe_txd;
    548 
    549 	/*
    550 	** With many RX rings it is easy to exceed the
    551 	** system mbuf allocation. Tuning nmbclusters
    552 	** can alleviate this.
    553 	*/
    554 	if (nmbclusters > 0 ) {
    555 		int s;
    556 		s = (ixgbe_rxd * adapter->num_queues) * ixgbe_total_ports;
    557 		if (s > nmbclusters) {
    558 			aprint_error_dev(dev, "RX Descriptors exceed "
    559 			    "system mbuf max, using default instead!\n");
    560 			ixgbe_rxd = DEFAULT_RXD;
    561 		}
    562 	}
    563 
    564 	if (((ixgbe_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 ||
    565 	    ixgbe_rxd < MIN_TXD || ixgbe_rxd > MAX_TXD) {
    566 		aprint_error_dev(dev, "RXD config issue, using default!\n");
    567 		adapter->num_rx_desc = DEFAULT_RXD;
    568 	} else
    569 		adapter->num_rx_desc = ixgbe_rxd;
    570 
    571 	/* Allocate our TX/RX Queues */
    572 	if (ixgbe_allocate_queues(adapter)) {
    573 		error = ENOMEM;
    574 		goto err_out;
    575 	}
    576 
    577 	/* Allocate multicast array memory. */
    578 	adapter->mta = malloc(sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
    579 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
    580 	if (adapter->mta == NULL) {
    581 		aprint_error_dev(dev, "Cannot allocate multicast setup array\n");
    582 		error = ENOMEM;
    583 		goto err_late;
    584 	}
    585 
    586 	/* Initialize the shared code */
    587 	error = ixgbe_init_shared_code(hw);
    588 	if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
    589 		/*
    590 		** No optics in this port, set up
    591 		** so the timer routine will probe
    592 		** for later insertion.
    593 		*/
    594 		adapter->sfp_probe = TRUE;
    595 		error = 0;
    596 	} else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
    597 		aprint_error_dev(dev,"Unsupported SFP+ module detected!\n");
    598 		error = EIO;
    599 		goto err_late;
    600 	} else if (error) {
    601 		aprint_error_dev(dev,"Unable to initialize the shared code\n");
    602 		error = EIO;
    603 		goto err_late;
    604 	}
    605 
    606 	/* Make sure we have a good EEPROM before we read from it */
    607 	if (ixgbe_validate_eeprom_checksum(&adapter->hw, &csum) < 0) {
    608 		aprint_error_dev(dev,"The EEPROM Checksum Is Not Valid\n");
    609 		error = EIO;
    610 		goto err_late;
    611 	}
    612 
    613 	error = ixgbe_init_hw(hw);
    614 	switch (error) {
    615 	case IXGBE_ERR_EEPROM_VERSION:
    616 		aprint_error_dev(dev, "This device is a pre-production adapter/"
    617 		    "LOM.  Please be aware there may be issues associated "
    618 		    "with your hardware.\n If you are experiencing problems "
    619 		    "please contact your Intel or hardware representative "
    620 		    "who provided you with this hardware.\n");
    621 		break;
    622 	case IXGBE_ERR_SFP_NOT_SUPPORTED:
    623 		aprint_error_dev(dev,"Unsupported SFP+ Module\n");
    624 		error = EIO;
    625 		aprint_error_dev(dev,"Hardware Initialization Failure\n");
    626 		goto err_late;
    627 	case IXGBE_ERR_SFP_NOT_PRESENT:
    628 		device_printf(dev,"No SFP+ Module found\n");
    629 		/* falls thru */
    630 	default:
    631 		break;
    632 	}
    633 
    634 	/* Detect and set physical type */
    635 	ixgbe_setup_optics(adapter);
    636 
    637 	if ((adapter->msix > 1) && (ixgbe_enable_msix))
    638 		error = ixgbe_allocate_msix(adapter, pa);
    639 	else
    640 		error = ixgbe_allocate_legacy(adapter, pa);
    641 	if (error)
    642 		goto err_late;
    643 
    644 	/* Setup OS specific network interface */
    645 	if (ixgbe_setup_interface(dev, adapter) != 0)
    646 		goto err_late;
    647 
    648 	/* Initialize statistics */
    649 	ixgbe_update_stats_counters(adapter);
    650 
    651         /* Print PCIE bus type/speed/width info */
    652 	ixgbe_get_bus_info(hw);
    653 	aprint_normal_dev(dev,"PCI Express Bus: Speed %s %s\n",
    654 	    ((hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0Gb/s":
    655 	    (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5Gb/s":"Unknown"),
    656 	    (hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" :
    657 	    (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" :
    658 	    (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" :
    659 	    ("Unknown"));
    660 
    661 	if ((hw->bus.width <= ixgbe_bus_width_pcie_x4) &&
    662 	    (hw->bus.speed == ixgbe_bus_speed_2500)) {
    663 		aprint_error_dev(dev, "PCI-Express bandwidth available"
    664 		    " for this card\n     is not sufficient for"
    665 		    " optimal performance.\n");
    666 		aprint_error_dev(dev, "For optimal performance a x8 "
    667 		    "PCIE, or x4 PCIE 2 slot is required.\n");
    668         }
    669 
    670 	/* Set an initial default flow control value */
    671 	adapter->fc =  ixgbe_fc_full;
    672 
    673 	/* let hardware know driver is loaded */
    674 	ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
    675 	ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
    676 	IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
    677 
    678 	ixgbe_add_hw_stats(adapter);
    679 
    680 #ifdef DEV_NETMAP
    681 	ixgbe_netmap_attach(adapter);
    682 #endif /* DEV_NETMAP */
    683 	INIT_DEBUGOUT("ixgbe_attach: end");
    684 	return;
    685 err_late:
    686 	ixgbe_free_transmit_structures(adapter);
    687 	ixgbe_free_receive_structures(adapter);
    688 err_out:
    689 	if (adapter->ifp != NULL)
    690 		if_free(adapter->ifp);
    691 	ixgbe_free_pci_resources(adapter);
    692 	if (adapter->mta != NULL)
    693 		free(adapter->mta, M_DEVBUF);
    694 	return;
    695 
    696 }
    697 
    698 /*********************************************************************
    699  *  Device removal routine
    700  *
    701  *  The detach entry point is called when the driver is being removed.
    702  *  This routine stops the adapter and deallocates all the resources
    703  *  that were allocated for driver operation.
    704  *
    705  *  return 0 on success, positive on failure
    706  *********************************************************************/
    707 
    708 static int
    709 ixgbe_detach(device_t dev, int flags)
    710 {
    711 	struct adapter *adapter = device_private(dev);
    712 	struct rx_ring *rxr = adapter->rx_rings;
    713 	struct ixgbe_hw_stats *stats = &adapter->stats;
    714 	struct ix_queue *que = adapter->queues;
    715 	struct tx_ring *txr = adapter->tx_rings;
    716 	u32	ctrl_ext;
    717 
    718 	INIT_DEBUGOUT("ixgbe_detach: begin");
    719 
    720 #if NVLAN > 0
    721 	/* Make sure VLANs are not using driver */
    722 	if (!VLAN_ATTACHED(&adapter->osdep.ec))
    723 		;	/* nothing to do: no VLANs */
    724 	else if ((flags & (DETACH_SHUTDOWN|DETACH_FORCE)) != 0)
    725 		vlan_ifdetach(adapter->ifp);
    726 	else {
    727 		aprint_error_dev(dev, "VLANs in use\n");
    728 		return EBUSY;
    729 	}
    730 #endif
    731 
    732 	IXGBE_CORE_LOCK(adapter);
    733 	ixgbe_stop(adapter);
    734 	IXGBE_CORE_UNLOCK(adapter);
    735 
    736 	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
    737 #ifndef IXGBE_LEGACY_TX
    738 		softint_disestablish(txr->txq_si);
    739 #endif
    740 		softint_disestablish(que->que_si);
    741 	}
    742 
    743 	/* Drain the Link queue */
    744 	softint_disestablish(adapter->link_si);
    745 	softint_disestablish(adapter->mod_si);
    746 	softint_disestablish(adapter->msf_si);
    747 #ifdef IXGBE_FDIR
    748 	softint_disestablish(adapter->fdir_si);
    749 #endif
    750 
    751 	/* let hardware know driver is unloading */
    752 	ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
    753 	ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
    754 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext);
    755 
    756 	ether_ifdetach(adapter->ifp);
    757 	callout_halt(&adapter->timer, NULL);
    758 #ifdef DEV_NETMAP
    759 	netmap_detach(adapter->ifp);
    760 #endif /* DEV_NETMAP */
    761 	ixgbe_free_pci_resources(adapter);
    762 #if 0	/* XXX the NetBSD port is probably missing something here */
    763 	bus_generic_detach(dev);
    764 #endif
    765 	if_detach(adapter->ifp);
    766 
    767 	sysctl_teardown(&adapter->sysctllog);
    768 	evcnt_detach(&adapter->handleq);
    769 	evcnt_detach(&adapter->req);
    770 	evcnt_detach(&adapter->morerx);
    771 	evcnt_detach(&adapter->moretx);
    772 	evcnt_detach(&adapter->txloops);
    773 	evcnt_detach(&adapter->efbig_tx_dma_setup);
    774 	evcnt_detach(&adapter->m_defrag_failed);
    775 	evcnt_detach(&adapter->efbig2_tx_dma_setup);
    776 	evcnt_detach(&adapter->einval_tx_dma_setup);
    777 	evcnt_detach(&adapter->other_tx_dma_setup);
    778 	evcnt_detach(&adapter->eagain_tx_dma_setup);
    779 	evcnt_detach(&adapter->enomem_tx_dma_setup);
    780 	evcnt_detach(&adapter->watchdog_events);
    781 	evcnt_detach(&adapter->tso_err);
    782 	evcnt_detach(&adapter->link_irq);
    783 
    784 	txr = adapter->tx_rings;
    785 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
    786 		evcnt_detach(&txr->no_desc_avail);
    787 		evcnt_detach(&txr->total_packets);
    788 		evcnt_detach(&txr->tso_tx);
    789 
    790 		if (i < __arraycount(adapter->stats.mpc)) {
    791 			evcnt_detach(&adapter->stats.mpc[i]);
    792 		}
    793 		if (i < __arraycount(adapter->stats.pxontxc)) {
    794 			evcnt_detach(&adapter->stats.pxontxc[i]);
    795 			evcnt_detach(&adapter->stats.pxonrxc[i]);
    796 			evcnt_detach(&adapter->stats.pxofftxc[i]);
    797 			evcnt_detach(&adapter->stats.pxoffrxc[i]);
    798 			evcnt_detach(&adapter->stats.pxon2offc[i]);
    799 		}
    800 		if (i < __arraycount(adapter->stats.qprc)) {
    801 			evcnt_detach(&adapter->stats.qprc[i]);
    802 			evcnt_detach(&adapter->stats.qptc[i]);
    803 			evcnt_detach(&adapter->stats.qbrc[i]);
    804 			evcnt_detach(&adapter->stats.qbtc[i]);
    805 			evcnt_detach(&adapter->stats.qprdc[i]);
    806 		}
    807 
    808 		evcnt_detach(&rxr->rx_packets);
    809 		evcnt_detach(&rxr->rx_bytes);
    810 		evcnt_detach(&rxr->no_jmbuf);
    811 		evcnt_detach(&rxr->rx_discarded);
    812 		evcnt_detach(&rxr->rx_irq);
    813 	}
    814 	evcnt_detach(&stats->ipcs);
    815 	evcnt_detach(&stats->l4cs);
    816 	evcnt_detach(&stats->ipcs_bad);
    817 	evcnt_detach(&stats->l4cs_bad);
    818 	evcnt_detach(&stats->intzero);
    819 	evcnt_detach(&stats->legint);
    820 	evcnt_detach(&stats->crcerrs);
    821 	evcnt_detach(&stats->illerrc);
    822 	evcnt_detach(&stats->errbc);
    823 	evcnt_detach(&stats->mspdc);
    824 	evcnt_detach(&stats->mlfc);
    825 	evcnt_detach(&stats->mrfc);
    826 	evcnt_detach(&stats->rlec);
    827 	evcnt_detach(&stats->lxontxc);
    828 	evcnt_detach(&stats->lxonrxc);
    829 	evcnt_detach(&stats->lxofftxc);
    830 	evcnt_detach(&stats->lxoffrxc);
    831 
    832 	/* Packet Reception Stats */
    833 	evcnt_detach(&stats->tor);
    834 	evcnt_detach(&stats->gorc);
    835 	evcnt_detach(&stats->tpr);
    836 	evcnt_detach(&stats->gprc);
    837 	evcnt_detach(&stats->mprc);
    838 	evcnt_detach(&stats->bprc);
    839 	evcnt_detach(&stats->prc64);
    840 	evcnt_detach(&stats->prc127);
    841 	evcnt_detach(&stats->prc255);
    842 	evcnt_detach(&stats->prc511);
    843 	evcnt_detach(&stats->prc1023);
    844 	evcnt_detach(&stats->prc1522);
    845 	evcnt_detach(&stats->ruc);
    846 	evcnt_detach(&stats->rfc);
    847 	evcnt_detach(&stats->roc);
    848 	evcnt_detach(&stats->rjc);
    849 	evcnt_detach(&stats->mngprc);
    850 	evcnt_detach(&stats->xec);
    851 
    852 	/* Packet Transmission Stats */
    853 	evcnt_detach(&stats->gotc);
    854 	evcnt_detach(&stats->tpt);
    855 	evcnt_detach(&stats->gptc);
    856 	evcnt_detach(&stats->bptc);
    857 	evcnt_detach(&stats->mptc);
    858 	evcnt_detach(&stats->mngptc);
    859 	evcnt_detach(&stats->ptc64);
    860 	evcnt_detach(&stats->ptc127);
    861 	evcnt_detach(&stats->ptc255);
    862 	evcnt_detach(&stats->ptc511);
    863 	evcnt_detach(&stats->ptc1023);
    864 	evcnt_detach(&stats->ptc1522);
    865 
    866 	ixgbe_free_transmit_structures(adapter);
    867 	ixgbe_free_receive_structures(adapter);
    868 	free(adapter->mta, M_DEVBUF);
    869 
    870 	IXGBE_CORE_LOCK_DESTROY(adapter);
    871 	return (0);
    872 }
    873 
    874 /*********************************************************************
    875  *
    876  *  Shutdown entry point
    877  *
    878  **********************************************************************/
    879 
    880 #if 0 /* XXX NetBSD ought to register something like this through pmf(9) */
    881 static int
    882 ixgbe_shutdown(device_t dev)
    883 {
    884 	struct adapter *adapter = device_private(dev);
    885 	IXGBE_CORE_LOCK(adapter);
    886 	ixgbe_stop(adapter);
    887 	IXGBE_CORE_UNLOCK(adapter);
    888 	return (0);
    889 }
    890 #endif
    891 
    892 
    893 #ifdef IXGBE_LEGACY_TX
    894 /*********************************************************************
    895  *  Transmit entry point
    896  *
    897  *  ixgbe_start is called by the stack to initiate a transmit.
    898  *  The driver will remain in this routine as long as there are
    899  *  packets to transmit and transmit resources are available.
    900  *  In case resources are not available stack is notified and
    901  *  the packet is requeued.
    902  **********************************************************************/
    903 
    904 static void
    905 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
    906 {
    907 	int rc;
    908 	struct mbuf    *m_head;
    909 	struct adapter *adapter = txr->adapter;
    910 
    911 	IXGBE_TX_LOCK_ASSERT(txr);
    912 
    913 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    914 		return;
    915 	if (!adapter->link_active)
    916 		return;
    917 
    918 	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    919 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
    920 			break;
    921 
    922 		IFQ_POLL(&ifp->if_snd, m_head);
    923 		if (m_head == NULL)
    924 			break;
    925 
    926 		if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
    927 			break;
    928 		}
    929 		IFQ_DEQUEUE(&ifp->if_snd, m_head);
    930 		if (rc == EFBIG) {
    931 			struct mbuf *mtmp;
    932 
    933 			if ((mtmp = m_defrag(m_head, M_NOWAIT)) != NULL) {
    934 				m_head = mtmp;
    935 				rc = ixgbe_xmit(txr, m_head);
    936 				if (rc != 0)
    937 					adapter->efbig2_tx_dma_setup.ev_count++;
    938 			} else
    939 				adapter->m_defrag_failed.ev_count++;
    940 		}
    941 		if (rc != 0) {
    942 			m_freem(m_head);
    943 			continue;
    944 		}
    945 
    946 		/* Send a copy of the frame to the BPF listener */
    947 		bpf_mtap(ifp, m_head);
    948 
    949 		/* Set watchdog on */
    950 		getmicrotime(&txr->watchdog_time);
    951 		txr->queue_status = IXGBE_QUEUE_WORKING;
    952 
    953 	}
    954 	return;
    955 }
    956 
    957 /*
    958  * Legacy TX start - called by the stack, this
    959  * always uses the first tx ring, and should
    960  * not be used with multiqueue tx enabled.
    961  */
    962 static void
    963 ixgbe_start(struct ifnet *ifp)
    964 {
    965 	struct adapter *adapter = ifp->if_softc;
    966 	struct tx_ring	*txr = adapter->tx_rings;
    967 
    968 	if (ifp->if_flags & IFF_RUNNING) {
    969 		IXGBE_TX_LOCK(txr);
    970 		ixgbe_start_locked(txr, ifp);
    971 		IXGBE_TX_UNLOCK(txr);
    972 	}
    973 	return;
    974 }
    975 
    976 #else /* ! IXGBE_LEGACY_TX */
    977 
    978 /*
    979 ** Multiqueue Transmit driver
    980 **
    981 */
    982 static int
    983 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
    984 {
    985 	struct adapter	*adapter = ifp->if_softc;
    986 	struct ix_queue	*que;
    987 	struct tx_ring	*txr;
    988 	int 		i = 0, err = 0;
    989 
    990 	/* Which queue to use */
    991 	if ((m->m_flags & M_FLOWID) != 0)
    992 		i = m->m_pkthdr.flowid % adapter->num_queues;
    993 	else
    994 		i = cpu_index(curcpu()) % adapter->num_queues;
    995 
    996 	txr = &adapter->tx_rings[i];
    997 	que = &adapter->queues[i];
    998 
    999 	if (IXGBE_TX_TRYLOCK(txr)) {
   1000 		err = ixgbe_mq_start_locked(ifp, txr, m);
   1001 		IXGBE_TX_UNLOCK(txr);
   1002 	} else {
   1003 		err = drbr_enqueue(ifp, txr->br, m);
   1004 		softint_schedule(txr->txq_si);
   1005 	}
   1006 
   1007 	return (err);
   1008 }
   1009 
   1010 static int
   1011 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
   1012 {
   1013 	struct adapter  *adapter = txr->adapter;
   1014         struct mbuf     *next;
   1015         int             enqueued, err = 0;
   1016 
   1017 	if (((ifp->if_flags & IFF_RUNNING) == 0) ||
   1018 	    adapter->link_active == 0) {
   1019 		if (m != NULL)
   1020 			err = drbr_enqueue(ifp, txr->br, m);
   1021 		return (err);
   1022 	}
   1023 
   1024 	enqueued = 0;
   1025 	if (m != NULL) {
   1026 		err = drbr_enqueue(ifp, txr->br, m);
   1027 		if (err) {
   1028 			return (err);
   1029 		}
   1030 	}
   1031 
   1032 	/* Process the queue */
   1033 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
   1034 		if ((err = ixgbe_xmit(txr, &next)) != 0) {
   1035 			if (next == NULL) {
   1036 				drbr_advance(ifp, txr->br);
   1037 			} else {
   1038 				drbr_putback(ifp, txr->br, next);
   1039 			}
   1040 			break;
   1041 		}
   1042 		drbr_advance(ifp, txr->br);
   1043 		enqueued++;
   1044 		/* Send a copy of the frame to the BPF listener */
   1045 		bpf_mtap(ifp, next);
   1046 		if ((ifp->if_flags & IFF_RUNNING) == 0)
   1047 			break;
   1048 		if (txr->tx_avail < IXGBE_TX_OP_THRESHOLD)
   1049 			ixgbe_txeof(txr);
   1050 	}
   1051 
   1052 	if (enqueued > 0) {
   1053 		/* Set watchdog on */
   1054 		txr->queue_status = IXGBE_QUEUE_WORKING;
   1055 		getmicrotime(&txr->watchdog_time);
   1056 	}
   1057 
   1058 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
   1059 		ixgbe_txeof(txr);
   1060 
   1061 	return (err);
   1062 }
   1063 
   1064 /*
   1065  * Called from a taskqueue to drain queued transmit packets.
   1066  */
   1067 static void
   1068 ixgbe_deferred_mq_start(void *arg)
   1069 {
   1070 	struct tx_ring *txr = arg;
   1071 	struct adapter *adapter = txr->adapter;
   1072 	struct ifnet *ifp = adapter->ifp;
   1073 
   1074 	IXGBE_TX_LOCK(txr);
   1075 	if (!drbr_empty(ifp, txr->br))
   1076 		ixgbe_mq_start_locked(ifp, txr, NULL);
   1077 	IXGBE_TX_UNLOCK(txr);
   1078 }
   1079 
   1080 /*
   1081 ** Flush all ring buffers
   1082 */
   1083 static void
   1084 ixgbe_qflush(struct ifnet *ifp)
   1085 {
   1086 	struct adapter	*adapter = ifp->if_softc;
   1087 	struct tx_ring	*txr = adapter->tx_rings;
   1088 	struct mbuf	*m;
   1089 
   1090 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
   1091 		IXGBE_TX_LOCK(txr);
   1092 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
   1093 			m_freem(m);
   1094 		IXGBE_TX_UNLOCK(txr);
   1095 	}
   1096 	if_qflush(ifp);
   1097 }
   1098 #endif /* IXGBE_LEGACY_TX */
   1099 
   1100 static int
   1101 ixgbe_ifflags_cb(struct ethercom *ec)
   1102 {
   1103 	struct ifnet *ifp = &ec->ec_if;
   1104 	struct adapter *adapter = ifp->if_softc;
   1105 	int change = ifp->if_flags ^ adapter->if_flags, rc = 0;
   1106 
   1107 	IXGBE_CORE_LOCK(adapter);
   1108 
   1109 	if (change != 0)
   1110 		adapter->if_flags = ifp->if_flags;
   1111 
   1112 	if ((change & ~(IFF_CANTCHANGE|IFF_DEBUG)) != 0)
   1113 		rc = ENETRESET;
   1114 	else if ((change & (IFF_PROMISC | IFF_ALLMULTI)) != 0)
   1115 		ixgbe_set_promisc(adapter);
   1116 
   1117 	/* Set up VLAN support and filter */
   1118 	ixgbe_setup_vlan_hw_support(adapter);
   1119 
   1120 	IXGBE_CORE_UNLOCK(adapter);
   1121 
   1122 	return rc;
   1123 }
   1124 
   1125 /*********************************************************************
   1126  *  Ioctl entry point
   1127  *
   1128  *  ixgbe_ioctl is called when the user wants to configure the
   1129  *  interface.
   1130  *
   1131  *  return 0 on success, positive on failure
   1132  **********************************************************************/
   1133 
   1134 static int
   1135 ixgbe_ioctl(struct ifnet * ifp, u_long command, void *data)
   1136 {
   1137 	struct adapter	*adapter = ifp->if_softc;
   1138 	struct ixgbe_hw *hw = &adapter->hw;
   1139 	struct ifcapreq *ifcr = data;
   1140 	struct ifreq	*ifr = data;
   1141 	int             error = 0;
   1142 	int l4csum_en;
   1143 	const int l4csum = IFCAP_CSUM_TCPv4_Rx|IFCAP_CSUM_UDPv4_Rx|
   1144 	     IFCAP_CSUM_TCPv6_Rx|IFCAP_CSUM_UDPv6_Rx;
   1145 
   1146 	switch (command) {
   1147 	case SIOCSIFFLAGS:
   1148 		IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)");
   1149 		break;
   1150 	case SIOCADDMULTI:
   1151 	case SIOCDELMULTI:
   1152 		IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI");
   1153 		break;
   1154 	case SIOCSIFMEDIA:
   1155 	case SIOCGIFMEDIA:
   1156 		IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)");
   1157 		break;
   1158 	case SIOCSIFCAP:
   1159 		IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)");
   1160 		break;
   1161 	case SIOCSIFMTU:
   1162 		IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)");
   1163 		break;
   1164 	default:
   1165 		IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command);
   1166 		break;
   1167 	}
   1168 
   1169 	switch (command) {
   1170 	case SIOCSIFMEDIA:
   1171 	case SIOCGIFMEDIA:
   1172 		return ifmedia_ioctl(ifp, ifr, &adapter->media, command);
   1173 	case SIOCGI2C:
   1174 	{
   1175 		struct ixgbe_i2c_req	i2c;
   1176 		IOCTL_DEBUGOUT("ioctl: SIOCGI2C (Get I2C Data)");
   1177 		error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
   1178 		if (error)
   1179 			break;
   1180 		if ((i2c.dev_addr != 0xA0) || (i2c.dev_addr != 0xA2)){
   1181 			error = EINVAL;
   1182 			break;
   1183 		}
   1184 		hw->phy.ops.read_i2c_byte(hw, i2c.offset,
   1185 		    i2c.dev_addr, i2c.data);
   1186 		error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
   1187 		break;
   1188 	}
   1189 	case SIOCSIFCAP:
   1190 		/* Layer-4 Rx checksum offload has to be turned on and
   1191 		 * off as a unit.
   1192 		 */
   1193 		l4csum_en = ifcr->ifcr_capenable & l4csum;
   1194 		if (l4csum_en != l4csum && l4csum_en != 0)
   1195 			return EINVAL;
   1196 		/*FALLTHROUGH*/
   1197 	case SIOCADDMULTI:
   1198 	case SIOCDELMULTI:
   1199 	case SIOCSIFFLAGS:
   1200 	case SIOCSIFMTU:
   1201 	default:
   1202 		if ((error = ether_ioctl(ifp, command, data)) != ENETRESET)
   1203 			return error;
   1204 		if ((ifp->if_flags & IFF_RUNNING) == 0)
   1205 			;
   1206 		else if (command == SIOCSIFCAP || command == SIOCSIFMTU) {
   1207 			IXGBE_CORE_LOCK(adapter);
   1208 			ixgbe_init_locked(adapter);
   1209 			IXGBE_CORE_UNLOCK(adapter);
   1210 		} else if (command == SIOCADDMULTI || command == SIOCDELMULTI) {
   1211 			/*
   1212 			 * Multicast list has changed; set the hardware filter
   1213 			 * accordingly.
   1214 			 */
   1215 			IXGBE_CORE_LOCK(adapter);
   1216 			ixgbe_disable_intr(adapter);
   1217 			ixgbe_set_multi(adapter);
   1218 			ixgbe_enable_intr(adapter);
   1219 			IXGBE_CORE_UNLOCK(adapter);
   1220 		}
   1221 		return 0;
   1222 	}
   1223 
   1224 	return error;
   1225 }
   1226 
   1227 /*********************************************************************
   1228  *  Init entry point
   1229  *
   1230  *  This routine is used in two ways. It is used by the stack as
   1231  *  init entry point in network interface structure. It is also used
   1232  *  by the driver as a hw/sw initialization routine to get to a
   1233  *  consistent state.
   1234  *
   1235  *  return 0 on success, positive on failure
   1236  **********************************************************************/
   1237 #define IXGBE_MHADD_MFS_SHIFT 16
   1238 
   1239 static void
   1240 ixgbe_init_locked(struct adapter *adapter)
   1241 {
   1242 	struct ifnet   *ifp = adapter->ifp;
   1243 	device_t 	dev = adapter->dev;
   1244 	struct ixgbe_hw *hw = &adapter->hw;
   1245 	u32		k, txdctl, mhadd, gpie;
   1246 	u32		rxdctl, rxctrl;
   1247 
   1248 	/* XXX check IFF_UP and IFF_RUNNING, power-saving state! */
   1249 
   1250 	KASSERT(mutex_owned(&adapter->core_mtx));
   1251 	INIT_DEBUGOUT("ixgbe_init: begin");
   1252 	hw->adapter_stopped = FALSE;
   1253 	ixgbe_stop_adapter(hw);
   1254         callout_stop(&adapter->timer);
   1255 
   1256 	/* XXX I moved this here from the SIOCSIFMTU case in ixgbe_ioctl(). */
   1257 	adapter->max_frame_size =
   1258 		ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
   1259 
   1260         /* reprogram the RAR[0] in case user changed it. */
   1261         ixgbe_set_rar(hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
   1262 
   1263 	/* Get the latest mac address, User can use a LAA */
   1264 	memcpy(hw->mac.addr, CLLADDR(adapter->ifp->if_sadl),
   1265 	    IXGBE_ETH_LENGTH_OF_ADDRESS);
   1266 	ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
   1267 	hw->addr_ctrl.rar_used_count = 1;
   1268 
   1269 	/* Prepare transmit descriptors and buffers */
   1270 	if (ixgbe_setup_transmit_structures(adapter)) {
   1271 		device_printf(dev,"Could not setup transmit structures\n");
   1272 		ixgbe_stop(adapter);
   1273 		return;
   1274 	}
   1275 
   1276 	ixgbe_init_hw(hw);
   1277 	ixgbe_initialize_transmit_units(adapter);
   1278 
   1279 	/* Setup Multicast table */
   1280 	ixgbe_set_multi(adapter);
   1281 
   1282 	/*
   1283 	** Determine the correct mbuf pool
   1284 	** for doing jumbo frames
   1285 	*/
   1286 	if (adapter->max_frame_size <= 2048)
   1287 		adapter->rx_mbuf_sz = MCLBYTES;
   1288 	else if (adapter->max_frame_size <= 4096)
   1289 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
   1290 	else if (adapter->max_frame_size <= 9216)
   1291 		adapter->rx_mbuf_sz = MJUM9BYTES;
   1292 	else
   1293 		adapter->rx_mbuf_sz = MJUM16BYTES;
   1294 
   1295 	/* Prepare receive descriptors and buffers */
   1296 	if (ixgbe_setup_receive_structures(adapter)) {
   1297 		device_printf(dev,"Could not setup receive structures\n");
   1298 		ixgbe_stop(adapter);
   1299 		return;
   1300 	}
   1301 
   1302 	/* Configure RX settings */
   1303 	ixgbe_initialize_receive_units(adapter);
   1304 
   1305 	gpie = IXGBE_READ_REG(&adapter->hw, IXGBE_GPIE);
   1306 
   1307 	/* Enable Fan Failure Interrupt */
   1308 	gpie |= IXGBE_SDP1_GPIEN;
   1309 
   1310 	/* Add for Thermal detection */
   1311 	if (hw->mac.type == ixgbe_mac_82599EB)
   1312 		gpie |= IXGBE_SDP2_GPIEN;
   1313 
   1314 	/* Thermal Failure Detection */
   1315 	if (hw->mac.type == ixgbe_mac_X540)
   1316 		gpie |= IXGBE_SDP0_GPIEN;
   1317 
   1318 	if (adapter->msix > 1) {
   1319 		/* Enable Enhanced MSIX mode */
   1320 		gpie |= IXGBE_GPIE_MSIX_MODE;
   1321 		gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
   1322 		    IXGBE_GPIE_OCD;
   1323 	}
   1324 	IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
   1325 
   1326 	/* Set MTU size */
   1327 	if (ifp->if_mtu > ETHERMTU) {
   1328 		mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
   1329 		mhadd &= ~IXGBE_MHADD_MFS_MASK;
   1330 		mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
   1331 		IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
   1332 	}
   1333 
   1334 	/* Now enable all the queues */
   1335 
   1336 	for (int i = 0; i < adapter->num_queues; i++) {
   1337 		txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
   1338 		txdctl |= IXGBE_TXDCTL_ENABLE;
   1339 		/* Set WTHRESH to 8, burst writeback */
   1340 		txdctl |= (8 << 16);
   1341 		/*
   1342 		 * When the internal queue falls below PTHRESH (32),
   1343 		 * start prefetching as long as there are at least
   1344 		 * HTHRESH (1) buffers ready. The values are taken
   1345 		 * from the Intel linux driver 3.8.21.
   1346 		 * Prefetching enables tx line rate even with 1 queue.
   1347 		 */
   1348 		txdctl |= (32 << 0) | (1 << 8);
   1349 		IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
   1350 	}
   1351 
   1352 	for (int i = 0; i < adapter->num_queues; i++) {
   1353 		rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
   1354 		if (hw->mac.type == ixgbe_mac_82598EB) {
   1355 			/*
   1356 			** PTHRESH = 21
   1357 			** HTHRESH = 4
   1358 			** WTHRESH = 8
   1359 			*/
   1360 			rxdctl &= ~0x3FFFFF;
   1361 			rxdctl |= 0x080420;
   1362 		}
   1363 		rxdctl |= IXGBE_RXDCTL_ENABLE;
   1364 		IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
   1365 		/* XXX I don't trust this loop, and I don't trust the
   1366 		 * XXX memory barrier.  What is this meant to do? --dyoung
   1367 		 */
   1368 		for (k = 0; k < 10; k++) {
   1369 			if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
   1370 			    IXGBE_RXDCTL_ENABLE)
   1371 				break;
   1372 			else
   1373 				msec_delay(1);
   1374 		}
   1375 		wmb();
   1376 #ifdef DEV_NETMAP
   1377 		/*
   1378 		 * In netmap mode, we must preserve the buffers made
   1379 		 * available to userspace before the if_init()
   1380 		 * (this is true by default on the TX side, because
   1381 		 * init makes all buffers available to userspace).
   1382 		 *
   1383 		 * netmap_reset() and the device specific routines
   1384 		 * (e.g. ixgbe_setup_receive_rings()) map these
   1385 		 * buffers at the end of the NIC ring, so here we
   1386 		 * must set the RDT (tail) register to make sure
   1387 		 * they are not overwritten.
   1388 		 *
   1389 		 * In this driver the NIC ring starts at RDH = 0,
   1390 		 * RDT points to the last slot available for reception (?),
   1391 		 * so RDT = num_rx_desc - 1 means the whole ring is available.
   1392 		 */
   1393 		if (ifp->if_capenable & IFCAP_NETMAP) {
   1394 			struct netmap_adapter *na = NA(adapter->ifp);
   1395 			struct netmap_kring *kring = &na->rx_rings[i];
   1396 			int t = na->num_rx_desc - 1 - kring->nr_hwavail;
   1397 
   1398 			IXGBE_WRITE_REG(hw, IXGBE_RDT(i), t);
   1399 		} else
   1400 #endif /* DEV_NETMAP */
   1401 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i), adapter->num_rx_desc - 1);
   1402 	}
   1403 
   1404 	/* Set up VLAN support and filter */
   1405 	ixgbe_setup_vlan_hw_support(adapter);
   1406 
   1407 	/* Enable Receive engine */
   1408 	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
   1409 	if (hw->mac.type == ixgbe_mac_82598EB)
   1410 		rxctrl |= IXGBE_RXCTRL_DMBYPS;
   1411 	rxctrl |= IXGBE_RXCTRL_RXEN;
   1412 	ixgbe_enable_rx_dma(hw, rxctrl);
   1413 
   1414 	callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
   1415 
   1416 	/* Set up MSI/X routing */
   1417 	if (ixgbe_enable_msix)  {
   1418 		ixgbe_configure_ivars(adapter);
   1419 		/* Set up auto-mask */
   1420 		if (hw->mac.type == ixgbe_mac_82598EB)
   1421 			IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
   1422 		else {
   1423 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
   1424 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
   1425 		}
   1426 	} else {  /* Simple settings for Legacy/MSI */
   1427                 ixgbe_set_ivar(adapter, 0, 0, 0);
   1428                 ixgbe_set_ivar(adapter, 0, 0, 1);
   1429 		IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
   1430 	}
   1431 
   1432 #ifdef IXGBE_FDIR
   1433 	/* Init Flow director */
   1434 	if (hw->mac.type != ixgbe_mac_82598EB) {
   1435 		u32 hdrm = 32 << fdir_pballoc;
   1436 
   1437 		hw->mac.ops.setup_rxpba(hw, 0, hdrm, PBA_STRATEGY_EQUAL);
   1438 		ixgbe_init_fdir_signature_82599(&adapter->hw, fdir_pballoc);
   1439 	}
   1440 #endif
   1441 
   1442 	/*
   1443 	** Check on any SFP devices that
   1444 	** need to be kick-started
   1445 	*/
   1446 	if (hw->phy.type == ixgbe_phy_none) {
   1447 		int err = hw->phy.ops.identify(hw);
   1448 		if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
   1449                 	device_printf(dev,
   1450 			    "Unsupported SFP+ module type was detected.\n");
   1451 			return;
   1452         	}
   1453 	}
   1454 
   1455 	/* Set moderation on the Link interrupt */
   1456 	IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->linkvec), IXGBE_LINK_ITR);
   1457 
   1458 	/* Config/Enable Link */
   1459 	ixgbe_config_link(adapter);
   1460 
   1461 	/* Hardware Packet Buffer & Flow Control setup */
   1462 	{
   1463 		u32 rxpb, frame, size, tmp;
   1464 
   1465 		frame = adapter->max_frame_size;
   1466 
   1467 		/* Calculate High Water */
   1468 		if (hw->mac.type == ixgbe_mac_X540)
   1469 			tmp = IXGBE_DV_X540(frame, frame);
   1470 		else
   1471 			tmp = IXGBE_DV(frame, frame);
   1472 		size = IXGBE_BT2KB(tmp);
   1473 		rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
   1474 		hw->fc.high_water[0] = rxpb - size;
   1475 
   1476 		/* Now calculate Low Water */
   1477 		if (hw->mac.type == ixgbe_mac_X540)
   1478 			tmp = IXGBE_LOW_DV_X540(frame);
   1479 		else
   1480 			tmp = IXGBE_LOW_DV(frame);
   1481 		hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
   1482 
   1483 		hw->fc.requested_mode = adapter->fc;
   1484 		hw->fc.pause_time = IXGBE_FC_PAUSE;
   1485 		hw->fc.send_xon = TRUE;
   1486 	}
   1487 	/* Initialize the FC settings */
   1488 	ixgbe_start_hw(hw);
   1489 
   1490 	/* And now turn on interrupts */
   1491 	ixgbe_enable_intr(adapter);
   1492 
   1493 	/* Now inform the stack we're ready */
   1494 	ifp->if_flags |= IFF_RUNNING;
   1495 
   1496 	return;
   1497 }
   1498 
   1499 static int
   1500 ixgbe_init(struct ifnet *ifp)
   1501 {
   1502 	struct adapter *adapter = ifp->if_softc;
   1503 
   1504 	IXGBE_CORE_LOCK(adapter);
   1505 	ixgbe_init_locked(adapter);
   1506 	IXGBE_CORE_UNLOCK(adapter);
   1507 	return 0;	/* XXX ixgbe_init_locked cannot fail?  really? */
   1508 }
   1509 
   1510 
   1511 /*
   1512 **
   1513 ** MSIX Interrupt Handlers and Tasklets
   1514 **
   1515 */
   1516 
   1517 static inline void
   1518 ixgbe_enable_queue(struct adapter *adapter, u32 vector)
   1519 {
   1520 	struct ixgbe_hw *hw = &adapter->hw;
   1521 	u64	queue = (u64)(1ULL << vector);
   1522 	u32	mask;
   1523 
   1524 	if (hw->mac.type == ixgbe_mac_82598EB) {
   1525                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
   1526                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
   1527 	} else {
   1528                 mask = (queue & 0xFFFFFFFF);
   1529                 if (mask)
   1530                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask);
   1531                 mask = (queue >> 32);
   1532                 if (mask)
   1533                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask);
   1534 	}
   1535 }
   1536 
   1537 __unused static inline void
   1538 ixgbe_disable_queue(struct adapter *adapter, u32 vector)
   1539 {
   1540 	struct ixgbe_hw *hw = &adapter->hw;
   1541 	u64	queue = (u64)(1ULL << vector);
   1542 	u32	mask;
   1543 
   1544 	if (hw->mac.type == ixgbe_mac_82598EB) {
   1545                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
   1546                 IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask);
   1547 	} else {
   1548                 mask = (queue & 0xFFFFFFFF);
   1549                 if (mask)
   1550                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask);
   1551                 mask = (queue >> 32);
   1552                 if (mask)
   1553                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask);
   1554 	}
   1555 }
   1556 
   1557 static inline void
   1558 ixgbe_rearm_queues(struct adapter *adapter, u64 queues)
   1559 {
   1560 	u32 mask;
   1561 
   1562 	if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
   1563 		mask = (IXGBE_EIMS_RTX_QUEUE & queues);
   1564 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, mask);
   1565 	} else {
   1566 		mask = (queues & 0xFFFFFFFF);
   1567 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(0), mask);
   1568 		mask = (queues >> 32);
   1569 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(1), mask);
   1570 	}
   1571 }
   1572 
   1573 
   1574 static void
   1575 ixgbe_handle_que(void *context)
   1576 {
   1577 	struct ix_queue *que = context;
   1578 	struct adapter  *adapter = que->adapter;
   1579 	struct tx_ring  *txr = que->txr;
   1580 	struct ifnet    *ifp = adapter->ifp;
   1581 	bool		more;
   1582 
   1583 	adapter->handleq.ev_count++;
   1584 
   1585 	if (ifp->if_flags & IFF_RUNNING) {
   1586 		more = ixgbe_rxeof(que);
   1587 		IXGBE_TX_LOCK(txr);
   1588 		ixgbe_txeof(txr);
   1589 #ifndef IXGBE_LEGACY_TX
   1590 		if (!drbr_empty(ifp, txr->br))
   1591 			ixgbe_mq_start_locked(ifp, txr, NULL);
   1592 #else
   1593 		if (!IFQ_IS_EMPTY(&ifp->if_snd))
   1594 			ixgbe_start_locked(txr, ifp);
   1595 #endif
   1596 		IXGBE_TX_UNLOCK(txr);
   1597 		if (more) {
   1598 			adapter->req.ev_count++;
   1599 			softint_schedule(que->que_si);
   1600 			return;
   1601 		}
   1602 	}
   1603 
   1604 	/* Reenable this interrupt */
   1605 	ixgbe_enable_queue(adapter, que->msix);
   1606 	return;
   1607 }
   1608 
   1609 
   1610 /*********************************************************************
   1611  *
   1612  *  Legacy Interrupt Service routine
   1613  *
   1614  **********************************************************************/
   1615 
   1616 static int
   1617 ixgbe_legacy_irq(void *arg)
   1618 {
   1619 	struct ix_queue *que = arg;
   1620 	struct adapter	*adapter = que->adapter;
   1621 	struct ifnet   *ifp = adapter->ifp;
   1622 	struct ixgbe_hw	*hw = &adapter->hw;
   1623 	struct 		tx_ring *txr = adapter->tx_rings;
   1624 	bool		more_tx = false, more_rx = false;
   1625 	u32       	reg_eicr, loop = MAX_LOOP;
   1626 
   1627 	reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
   1628 
   1629 	adapter->stats.legint.ev_count++;
   1630 	++que->irqs;
   1631 	if (reg_eicr == 0) {
   1632 		adapter->stats.intzero.ev_count++;
   1633 		if ((ifp->if_flags & IFF_UP) != 0)
   1634 			ixgbe_enable_intr(adapter);
   1635 		return 0;
   1636 	}
   1637 
   1638 	if ((ifp->if_flags & IFF_RUNNING) != 0) {
   1639 		more_rx = ixgbe_rxeof(que);
   1640 
   1641 		IXGBE_TX_LOCK(txr);
   1642 		do {
   1643 			adapter->txloops.ev_count++;
   1644 			more_tx = ixgbe_txeof(txr);
   1645 		} while (loop-- && more_tx);
   1646 		IXGBE_TX_UNLOCK(txr);
   1647 	}
   1648 
   1649 	if (more_rx || more_tx) {
   1650 		if (more_rx)
   1651 			adapter->morerx.ev_count++;
   1652 		if (more_tx)
   1653 			adapter->moretx.ev_count++;
   1654 		softint_schedule(que->que_si);
   1655 	}
   1656 
   1657 	/* Check for fan failure */
   1658 	if ((hw->phy.media_type == ixgbe_media_type_copper) &&
   1659 	    (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
   1660                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
   1661 		    "REPLACE IMMEDIATELY!!\n");
   1662 		IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1);
   1663 	}
   1664 
   1665 	/* Link status change */
   1666 	if (reg_eicr & IXGBE_EICR_LSC)
   1667 		softint_schedule(adapter->link_si);
   1668 
   1669 	ixgbe_enable_intr(adapter);
   1670 	return 1;
   1671 }
   1672 
   1673 
   1674 #if defined(NETBSD_MSI_OR_MSIX)
   1675 /*********************************************************************
   1676  *
   1677  *  MSIX Queue Interrupt Service routine
   1678  *
   1679  **********************************************************************/
   1680 void
   1681 ixgbe_msix_que(void *arg)
   1682 {
   1683 	struct ix_queue	*que = arg;
   1684 	struct adapter  *adapter = que->adapter;
   1685 	struct tx_ring	*txr = que->txr;
   1686 	struct rx_ring	*rxr = que->rxr;
   1687 	bool		more_tx, more_rx;
   1688 	u32		newitr = 0;
   1689 
   1690 	ixgbe_disable_queue(adapter, que->msix);
   1691 	++que->irqs;
   1692 
   1693 	more_rx = ixgbe_rxeof(que);
   1694 
   1695 	IXGBE_TX_LOCK(txr);
   1696 	more_tx = ixgbe_txeof(txr);
   1697 	/*
   1698 	** Make certain that if the stack
   1699 	** has anything queued the task gets
   1700 	** scheduled to handle it.
   1701 	*/
   1702 #ifdef IXGBE_LEGACY_TX
   1703 	if (!IFQ_IS_EMPTY(&adapter->ifp->if_snd))
   1704 #else
   1705 	if (!drbr_empty(adapter->ifp, txr->br))
   1706 #endif
   1707 		more_tx = 1;
   1708 	IXGBE_TX_UNLOCK(txr);
   1709 
   1710 	/* Do AIM now? */
   1711 
   1712 	if (ixgbe_enable_aim == FALSE)
   1713 		goto no_calc;
   1714 	/*
   1715 	** Do Adaptive Interrupt Moderation:
   1716         **  - Write out last calculated setting
   1717 	**  - Calculate based on average size over
   1718 	**    the last interval.
   1719 	*/
   1720         if (que->eitr_setting)
   1721                 IXGBE_WRITE_REG(&adapter->hw,
   1722                     IXGBE_EITR(que->msix), que->eitr_setting);
   1723 
   1724         que->eitr_setting = 0;
   1725 
   1726         /* Idle, do nothing */
   1727         if ((txr->bytes == 0) && (rxr->bytes == 0))
   1728                 goto no_calc;
   1729 
   1730 	if ((txr->bytes) && (txr->packets))
   1731                	newitr = txr->bytes/txr->packets;
   1732 	if ((rxr->bytes) && (rxr->packets))
   1733 		newitr = max(newitr,
   1734 		    (rxr->bytes / rxr->packets));
   1735 	newitr += 24; /* account for hardware frame, crc */
   1736 
   1737 	/* set an upper boundary */
   1738 	newitr = min(newitr, 3000);
   1739 
   1740 	/* Be nice to the mid range */
   1741 	if ((newitr > 300) && (newitr < 1200))
   1742 		newitr = (newitr / 3);
   1743 	else
   1744 		newitr = (newitr / 2);
   1745 
   1746         if (adapter->hw.mac.type == ixgbe_mac_82598EB)
   1747                 newitr |= newitr << 16;
   1748         else
   1749                 newitr |= IXGBE_EITR_CNT_WDIS;
   1750 
   1751         /* save for next interrupt */
   1752         que->eitr_setting = newitr;
   1753 
   1754         /* Reset state */
   1755         txr->bytes = 0;
   1756         txr->packets = 0;
   1757         rxr->bytes = 0;
   1758         rxr->packets = 0;
   1759 
   1760 no_calc:
   1761 	if (more_tx || more_rx)
   1762 		softint_schedule(que->que_si);
   1763 	else /* Reenable this interrupt */
   1764 		ixgbe_enable_queue(adapter, que->msix);
   1765 	return;
   1766 }
   1767 
   1768 
   1769 static void
   1770 ixgbe_msix_link(void *arg)
   1771 {
   1772 	struct adapter	*adapter = arg;
   1773 	struct ixgbe_hw *hw = &adapter->hw;
   1774 	u32		reg_eicr;
   1775 
   1776 	++adapter->link_irq.ev_count;
   1777 
   1778 	/* First get the cause */
   1779 	reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICS);
   1780 	/* Clear interrupt with write */
   1781 	IXGBE_WRITE_REG(hw, IXGBE_EICR, reg_eicr);
   1782 
   1783 	/* Link status change */
   1784 	if (reg_eicr & IXGBE_EICR_LSC)
   1785 		softint_schedule(adapter->link_si);
   1786 
   1787 	if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
   1788 #ifdef IXGBE_FDIR
   1789 		if (reg_eicr & IXGBE_EICR_FLOW_DIR) {
   1790 			/* This is probably overkill :) */
   1791 			if (!atomic_cmpset_int(&adapter->fdir_reinit, 0, 1))
   1792 				return;
   1793                 	/* Disable the interrupt */
   1794 			IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FLOW_DIR);
   1795 			softint_schedule(adapter->fdir_si);
   1796 		} else
   1797 #endif
   1798 		if (reg_eicr & IXGBE_EICR_ECC) {
   1799                 	device_printf(adapter->dev, "\nCRITICAL: ECC ERROR!! "
   1800 			    "Please Reboot!!\n");
   1801 			IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
   1802 		} else
   1803 
   1804 		if (reg_eicr & IXGBE_EICR_GPI_SDP1) {
   1805                 	/* Clear the interrupt */
   1806                 	IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
   1807 			softint_schedule(adapter->msf_si);
   1808         	} else if (reg_eicr & IXGBE_EICR_GPI_SDP2) {
   1809                 	/* Clear the interrupt */
   1810                 	IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP2);
   1811 			softint_schedule(adapter->mod_si);
   1812 		}
   1813         }
   1814 
   1815 	/* Check for fan failure */
   1816 	if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
   1817 	    (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
   1818                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
   1819 		    "REPLACE IMMEDIATELY!!\n");
   1820 		IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
   1821 	}
   1822 
   1823 	/* Check for over temp condition */
   1824 	if ((hw->mac.type == ixgbe_mac_X540) &&
   1825 	    (reg_eicr & IXGBE_EICR_TS)) {
   1826 		device_printf(adapter->dev, "\nCRITICAL: OVER TEMP!! "
   1827 		    "PHY IS SHUT DOWN!!\n");
   1828 		device_printf(adapter->dev, "System shutdown required\n");
   1829 		IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_TS);
   1830 	}
   1831 
   1832 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_OTHER);
   1833 	return;
   1834 }
   1835 #endif
   1836 
   1837 /*********************************************************************
   1838  *
   1839  *  Media Ioctl callback
   1840  *
   1841  *  This routine is called whenever the user queries the status of
   1842  *  the interface using ifconfig.
   1843  *
   1844  **********************************************************************/
   1845 static void
   1846 ixgbe_media_status(struct ifnet * ifp, struct ifmediareq * ifmr)
   1847 {
   1848 	struct adapter *adapter = ifp->if_softc;
   1849 
   1850 	INIT_DEBUGOUT("ixgbe_media_status: begin");
   1851 	IXGBE_CORE_LOCK(adapter);
   1852 	ixgbe_update_link_status(adapter);
   1853 
   1854 	ifmr->ifm_status = IFM_AVALID;
   1855 	ifmr->ifm_active = IFM_ETHER;
   1856 
   1857 	if (!adapter->link_active) {
   1858 		IXGBE_CORE_UNLOCK(adapter);
   1859 		return;
   1860 	}
   1861 
   1862 	ifmr->ifm_status |= IFM_ACTIVE;
   1863 
   1864 	switch (adapter->link_speed) {
   1865 		case IXGBE_LINK_SPEED_100_FULL:
   1866 			ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
   1867 			break;
   1868 		case IXGBE_LINK_SPEED_1GB_FULL:
   1869 			ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
   1870 			break;
   1871 		case IXGBE_LINK_SPEED_10GB_FULL:
   1872 			ifmr->ifm_active |= adapter->optics | IFM_FDX;
   1873 			break;
   1874 	}
   1875 
   1876 	IXGBE_CORE_UNLOCK(adapter);
   1877 
   1878 	return;
   1879 }
   1880 
   1881 /*********************************************************************
   1882  *
   1883  *  Media Ioctl callback
   1884  *
   1885  *  This routine is called when the user changes speed/duplex using
   1886  *  media/mediopt option with ifconfig.
   1887  *
   1888  **********************************************************************/
   1889 static int
   1890 ixgbe_media_change(struct ifnet * ifp)
   1891 {
   1892 	struct adapter *adapter = ifp->if_softc;
   1893 	struct ifmedia *ifm = &adapter->media;
   1894 
   1895 	INIT_DEBUGOUT("ixgbe_media_change: begin");
   1896 
   1897 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
   1898 		return (EINVAL);
   1899 
   1900         switch (IFM_SUBTYPE(ifm->ifm_media)) {
   1901         case IFM_AUTO:
   1902                 adapter->hw.phy.autoneg_advertised =
   1903 		    IXGBE_LINK_SPEED_100_FULL |
   1904 		    IXGBE_LINK_SPEED_1GB_FULL |
   1905 		    IXGBE_LINK_SPEED_10GB_FULL;
   1906                 break;
   1907         default:
   1908                 device_printf(adapter->dev, "Only auto media type\n");
   1909 		return (EINVAL);
   1910         }
   1911 
   1912 	return (0);
   1913 }
   1914 
   1915 /*********************************************************************
   1916  *
   1917  *  This routine maps the mbufs to tx descriptors, allowing the
   1918  *  TX engine to transmit the packets.
   1919  *  	- return 0 on success, positive on failure
   1920  *
   1921  **********************************************************************/
   1922 
   1923 static int
   1924 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
   1925 {
   1926 	struct m_tag *mtag;
   1927 	struct adapter  *adapter = txr->adapter;
   1928 	struct ethercom *ec = &adapter->osdep.ec;
   1929 	u32		olinfo_status = 0, cmd_type_len;
   1930 	int             i, j, error;
   1931 	int		first;
   1932 	bus_dmamap_t	map;
   1933 	struct ixgbe_tx_buf *txbuf;
   1934 	union ixgbe_adv_tx_desc *txd = NULL;
   1935 
   1936 	/* Basic descriptor defines */
   1937         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
   1938 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
   1939 
   1940 	if ((mtag = VLAN_OUTPUT_TAG(ec, m_head)) != NULL)
   1941         	cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
   1942 
   1943         /*
   1944          * Important to capture the first descriptor
   1945          * used because it will contain the index of
   1946          * the one we tell the hardware to report back
   1947          */
   1948         first = txr->next_avail_desc;
   1949 	txbuf = &txr->tx_buffers[first];
   1950 	map = txbuf->map;
   1951 
   1952 	/*
   1953 	 * Map the packet for DMA.
   1954 	 */
   1955 	error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map,
   1956 	    m_head, BUS_DMA_NOWAIT);
   1957 
   1958 	if (__predict_false(error)) {
   1959 
   1960 		switch (error) {
   1961 		case EAGAIN:
   1962 			adapter->eagain_tx_dma_setup.ev_count++;
   1963 			return EAGAIN;
   1964 		case ENOMEM:
   1965 			adapter->enomem_tx_dma_setup.ev_count++;
   1966 			return EAGAIN;
   1967 		case EFBIG:
   1968 			/*
   1969 			 * XXX Try it again?
   1970 			 * do m_defrag() and retry bus_dmamap_load_mbuf().
   1971 			 */
   1972 			adapter->efbig_tx_dma_setup.ev_count++;
   1973 			return error;
   1974 		case EINVAL:
   1975 			adapter->einval_tx_dma_setup.ev_count++;
   1976 			return error;
   1977 		default:
   1978 			adapter->other_tx_dma_setup.ev_count++;
   1979 			return error;
   1980 		case 0:
   1981 			break;
   1982 		}
   1983 	}
   1984 
   1985 	/* Make certain there are enough descriptors */
   1986 	if (map->dm_nsegs > txr->tx_avail - 2) {
   1987 		txr->no_desc_avail.ev_count++;
   1988 		ixgbe_dmamap_unload(txr->txtag, txbuf->map);
   1989 		return EAGAIN;
   1990 	}
   1991 
   1992 	/*
   1993 	** Set up the appropriate offload context
   1994 	** this will consume the first descriptor
   1995 	*/
   1996 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
   1997 	if (__predict_false(error)) {
   1998 		return (error);
   1999 	}
   2000 
   2001 #ifdef IXGBE_FDIR
   2002 	/* Do the flow director magic */
   2003 	if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
   2004 		++txr->atr_count;
   2005 		if (txr->atr_count >= atr_sample_rate) {
   2006 			ixgbe_atr(txr, m_head);
   2007 			txr->atr_count = 0;
   2008 		}
   2009 	}
   2010 #endif
   2011 
   2012 	i = txr->next_avail_desc;
   2013 	for (j = 0; j < map->dm_nsegs; j++) {
   2014 		bus_size_t seglen;
   2015 		bus_addr_t segaddr;
   2016 
   2017 		txbuf = &txr->tx_buffers[i];
   2018 		txd = &txr->tx_base[i];
   2019 		seglen = map->dm_segs[j].ds_len;
   2020 		segaddr = htole64(map->dm_segs[j].ds_addr);
   2021 
   2022 		txd->read.buffer_addr = segaddr;
   2023 		txd->read.cmd_type_len = htole32(txr->txd_cmd |
   2024 		    cmd_type_len |seglen);
   2025 		txd->read.olinfo_status = htole32(olinfo_status);
   2026 
   2027 		if (++i == txr->num_desc)
   2028 			i = 0;
   2029 	}
   2030 
   2031 	txd->read.cmd_type_len |=
   2032 	    htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
   2033 	txr->tx_avail -= map->dm_nsegs;
   2034 	txr->next_avail_desc = i;
   2035 
   2036 	txbuf->m_head = m_head;
   2037 	/*
   2038 	** Here we swap the map so the last descriptor,
   2039 	** which gets the completion interrupt has the
   2040 	** real map, and the first descriptor gets the
   2041 	** unused map from this descriptor.
   2042 	*/
   2043 	txr->tx_buffers[first].map = txbuf->map;
   2044 	txbuf->map = map;
   2045 	bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
   2046 	    BUS_DMASYNC_PREWRITE);
   2047 
   2048         /* Set the EOP descriptor that will be marked done */
   2049         txbuf = &txr->tx_buffers[first];
   2050 	txbuf->eop = txd;
   2051 
   2052         ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   2053 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   2054 	/*
   2055 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
   2056 	 * hardware that this frame is available to transmit.
   2057 	 */
   2058 	++txr->total_packets.ev_count;
   2059 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), i);
   2060 
   2061 	return 0;
   2062 }
   2063 
   2064 static void
   2065 ixgbe_set_promisc(struct adapter *adapter)
   2066 {
   2067 	struct ether_multi *enm;
   2068 	struct ether_multistep step;
   2069 	u_int32_t       reg_rctl;
   2070 	struct ethercom *ec = &adapter->osdep.ec;
   2071 	struct ifnet   *ifp = adapter->ifp;
   2072 	int		mcnt = 0;
   2073 
   2074 	reg_rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
   2075 	reg_rctl &= (~IXGBE_FCTRL_UPE);
   2076 	if (ifp->if_flags & IFF_ALLMULTI)
   2077 		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
   2078 	else {
   2079 		ETHER_FIRST_MULTI(step, ec, enm);
   2080 		while (enm != NULL) {
   2081 			if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
   2082 				break;
   2083 			mcnt++;
   2084 			ETHER_NEXT_MULTI(step, enm);
   2085 		}
   2086 	}
   2087 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
   2088 		reg_rctl &= (~IXGBE_FCTRL_MPE);
   2089 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
   2090 
   2091 	if (ifp->if_flags & IFF_PROMISC) {
   2092 		reg_rctl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
   2093 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
   2094 	} else if (ifp->if_flags & IFF_ALLMULTI) {
   2095 		reg_rctl |= IXGBE_FCTRL_MPE;
   2096 		reg_rctl &= ~IXGBE_FCTRL_UPE;
   2097 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
   2098 	}
   2099 	return;
   2100 }
   2101 
   2102 
   2103 /*********************************************************************
   2104  *  Multicast Update
   2105  *
   2106  *  This routine is called whenever multicast address list is updated.
   2107  *
   2108  **********************************************************************/
   2109 #define IXGBE_RAR_ENTRIES 16
   2110 
   2111 static void
   2112 ixgbe_set_multi(struct adapter *adapter)
   2113 {
   2114 	struct ether_multi *enm;
   2115 	struct ether_multistep step;
   2116 	u32	fctrl;
   2117 	u8	*mta;
   2118 	u8	*update_ptr;
   2119 	int	mcnt = 0;
   2120 	struct ethercom *ec = &adapter->osdep.ec;
   2121 	struct ifnet   *ifp = adapter->ifp;
   2122 
   2123 	IOCTL_DEBUGOUT("ixgbe_set_multi: begin");
   2124 
   2125 	mta = adapter->mta;
   2126 	bzero(mta, sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
   2127 	    MAX_NUM_MULTICAST_ADDRESSES);
   2128 
   2129 	ifp->if_flags &= ~IFF_ALLMULTI;
   2130 	ETHER_FIRST_MULTI(step, ec, enm);
   2131 	while (enm != NULL) {
   2132 		if ((mcnt == MAX_NUM_MULTICAST_ADDRESSES) ||
   2133 		    (memcmp(enm->enm_addrlo, enm->enm_addrhi,
   2134 			ETHER_ADDR_LEN) != 0)) {
   2135 			ifp->if_flags |= IFF_ALLMULTI;
   2136 			break;
   2137 		}
   2138 		bcopy(enm->enm_addrlo,
   2139 		    &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
   2140 		    IXGBE_ETH_LENGTH_OF_ADDRESS);
   2141 		mcnt++;
   2142 		ETHER_NEXT_MULTI(step, enm);
   2143 	}
   2144 
   2145 	fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
   2146 	fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
   2147 	if (ifp->if_flags & IFF_PROMISC)
   2148 		fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
   2149 	else if (ifp->if_flags & IFF_ALLMULTI) {
   2150 		fctrl |= IXGBE_FCTRL_MPE;
   2151 	}
   2152 
   2153 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl);
   2154 
   2155 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) {
   2156 		update_ptr = mta;
   2157 		ixgbe_update_mc_addr_list(&adapter->hw,
   2158 		    update_ptr, mcnt, ixgbe_mc_array_itr, TRUE);
   2159 	}
   2160 
   2161 	return;
   2162 }
   2163 
   2164 /*
   2165  * This is an iterator function now needed by the multicast
   2166  * shared code. It simply feeds the shared code routine the
   2167  * addresses in the array of ixgbe_set_multi() one by one.
   2168  */
   2169 static u8 *
   2170 ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq)
   2171 {
   2172 	u8 *addr = *update_ptr;
   2173 	u8 *newptr;
   2174 	*vmdq = 0;
   2175 
   2176 	newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
   2177 	*update_ptr = newptr;
   2178 	return addr;
   2179 }
   2180 
   2181 
   2182 /*********************************************************************
   2183  *  Timer routine
   2184  *
   2185  *  This routine checks for link status,updates statistics,
   2186  *  and runs the watchdog check.
   2187  *
   2188  **********************************************************************/
   2189 
   2190 static void
   2191 ixgbe_local_timer1(void *arg)
   2192 {
   2193 	struct adapter	*adapter = arg;
   2194 	device_t	dev = adapter->dev;
   2195 	struct ix_queue *que = adapter->queues;
   2196 	struct tx_ring	*txr = adapter->tx_rings;
   2197 	int		hung = 0, paused = 0;
   2198 
   2199 	KASSERT(mutex_owned(&adapter->core_mtx));
   2200 
   2201 	/* Check for pluggable optics */
   2202 	if (adapter->sfp_probe)
   2203 		if (!ixgbe_sfp_probe(adapter))
   2204 			goto out; /* Nothing to do */
   2205 
   2206 	ixgbe_update_link_status(adapter);
   2207 	ixgbe_update_stats_counters(adapter);
   2208 
   2209 	/*
   2210 	 * If the interface has been paused
   2211 	 * then don't do the watchdog check
   2212 	 */
   2213 	if (IXGBE_READ_REG(&adapter->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF)
   2214 		paused = 1;
   2215 
   2216 	/*
   2217 	** Check the TX queues status
   2218 	**      - watchdog only if all queues show hung
   2219 	*/
   2220 	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
   2221 		if ((txr->queue_status == IXGBE_QUEUE_HUNG) &&
   2222 		    (paused == 0))
   2223 			++hung;
   2224 		else if (txr->queue_status == IXGBE_QUEUE_WORKING)
   2225 			softint_schedule(que->que_si);
   2226 	}
   2227 	/* Only truely watchdog if all queues show hung */
   2228 	if (hung == adapter->num_queues)
   2229 		goto watchdog;
   2230 
   2231 out:
   2232 	ixgbe_rearm_queues(adapter, adapter->que_mask);
   2233 	callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
   2234 	return;
   2235 
   2236 watchdog:
   2237 	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
   2238 	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
   2239 	    IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(txr->me)),
   2240 	    IXGBE_READ_REG(&adapter->hw, IXGBE_TDT(txr->me)));
   2241 	device_printf(dev,"TX(%d) desc avail = %d,"
   2242 	    "Next TX to Clean = %d\n",
   2243 	    txr->me, txr->tx_avail, txr->next_to_clean);
   2244 	adapter->ifp->if_flags &= ~IFF_RUNNING;
   2245 	adapter->watchdog_events.ev_count++;
   2246 	ixgbe_init_locked(adapter);
   2247 }
   2248 
   2249 static void
   2250 ixgbe_local_timer(void *arg)
   2251 {
   2252 	struct adapter *adapter = arg;
   2253 
   2254 	IXGBE_CORE_LOCK(adapter);
   2255 	ixgbe_local_timer1(adapter);
   2256 	IXGBE_CORE_UNLOCK(adapter);
   2257 }
   2258 
   2259 /*
   2260 ** Note: this routine updates the OS on the link state
   2261 **	the real check of the hardware only happens with
   2262 **	a link interrupt.
   2263 */
   2264 static void
   2265 ixgbe_update_link_status(struct adapter *adapter)
   2266 {
   2267 	struct ifnet	*ifp = adapter->ifp;
   2268 	device_t dev = adapter->dev;
   2269 
   2270 
   2271 	if (adapter->link_up){
   2272 		if (adapter->link_active == FALSE) {
   2273 			if (bootverbose)
   2274 				device_printf(dev,"Link is up %d Gbps %s \n",
   2275 				    ((adapter->link_speed == 128)? 10:1),
   2276 				    "Full Duplex");
   2277 			adapter->link_active = TRUE;
   2278 			/* Update any Flow Control changes */
   2279 			ixgbe_fc_enable(&adapter->hw);
   2280 			if_link_state_change(ifp, LINK_STATE_UP);
   2281 		}
   2282 	} else { /* Link down */
   2283 		if (adapter->link_active == TRUE) {
   2284 			if (bootverbose)
   2285 				device_printf(dev,"Link is Down\n");
   2286 			if_link_state_change(ifp, LINK_STATE_DOWN);
   2287 			adapter->link_active = FALSE;
   2288 		}
   2289 	}
   2290 
   2291 	return;
   2292 }
   2293 
   2294 
   2295 static void
   2296 ixgbe_ifstop(struct ifnet *ifp, int disable)
   2297 {
   2298 	struct adapter *adapter = ifp->if_softc;
   2299 
   2300 	IXGBE_CORE_LOCK(adapter);
   2301 	ixgbe_stop(adapter);
   2302 	IXGBE_CORE_UNLOCK(adapter);
   2303 }
   2304 
   2305 /*********************************************************************
   2306  *
   2307  *  This routine disables all traffic on the adapter by issuing a
   2308  *  global reset on the MAC and deallocates TX/RX buffers.
   2309  *
   2310  **********************************************************************/
   2311 
   2312 static void
   2313 ixgbe_stop(void *arg)
   2314 {
   2315 	struct ifnet   *ifp;
   2316 	struct adapter *adapter = arg;
   2317 	struct ixgbe_hw *hw = &adapter->hw;
   2318 	ifp = adapter->ifp;
   2319 
   2320 	KASSERT(mutex_owned(&adapter->core_mtx));
   2321 
   2322 	INIT_DEBUGOUT("ixgbe_stop: begin\n");
   2323 	ixgbe_disable_intr(adapter);
   2324 	callout_stop(&adapter->timer);
   2325 
   2326 	/* Let the stack know...*/
   2327 	ifp->if_flags &= ~IFF_RUNNING;
   2328 
   2329 	ixgbe_reset_hw(hw);
   2330 	hw->adapter_stopped = FALSE;
   2331 	ixgbe_stop_adapter(hw);
   2332 	/* Turn off the laser */
   2333 	if (hw->phy.multispeed_fiber)
   2334 		ixgbe_disable_tx_laser(hw);
   2335 
   2336 	/* reprogram the RAR[0] in case user changed it. */
   2337 	ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
   2338 
   2339 	return;
   2340 }
   2341 
   2342 
   2343 /*********************************************************************
   2344  *
   2345  *  Determine hardware revision.
   2346  *
   2347  **********************************************************************/
   2348 static void
   2349 ixgbe_identify_hardware(struct adapter *adapter)
   2350 {
   2351 	pcitag_t tag;
   2352 	pci_chipset_tag_t pc;
   2353 	pcireg_t subid, id;
   2354 	struct ixgbe_hw *hw = &adapter->hw;
   2355 
   2356 	pc = adapter->osdep.pc;
   2357 	tag = adapter->osdep.tag;
   2358 
   2359 	id = pci_conf_read(pc, tag, PCI_ID_REG);
   2360 	subid = pci_conf_read(pc, tag, PCI_SUBSYS_ID_REG);
   2361 
   2362 	/* Save off the information about this board */
   2363 	hw->vendor_id = PCI_VENDOR(id);
   2364 	hw->device_id = PCI_PRODUCT(id);
   2365 	hw->revision_id =
   2366 	    PCI_REVISION(pci_conf_read(pc, tag, PCI_CLASS_REG));
   2367 	hw->subsystem_vendor_id = PCI_SUBSYS_VENDOR(subid);
   2368 	hw->subsystem_device_id = PCI_SUBSYS_ID(subid);
   2369 
   2370 	/* We need this here to set the num_segs below */
   2371 	ixgbe_set_mac_type(hw);
   2372 
   2373 	/* Pick up the 82599 and VF settings */
   2374 	if (hw->mac.type != ixgbe_mac_82598EB) {
   2375 		hw->phy.smart_speed = ixgbe_smart_speed;
   2376 		adapter->num_segs = IXGBE_82599_SCATTER;
   2377 	} else
   2378 		adapter->num_segs = IXGBE_82598_SCATTER;
   2379 
   2380 	return;
   2381 }
   2382 
   2383 /*********************************************************************
   2384  *
   2385  *  Determine optic type
   2386  *
   2387  **********************************************************************/
   2388 static void
   2389 ixgbe_setup_optics(struct adapter *adapter)
   2390 {
   2391 	struct ixgbe_hw *hw = &adapter->hw;
   2392 	int		layer;
   2393 
   2394 	layer = ixgbe_get_supported_physical_layer(hw);
   2395 
   2396 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
   2397 		adapter->optics = IFM_10G_T;
   2398 		return;
   2399 	}
   2400 
   2401 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
   2402 		adapter->optics = IFM_1000_T;
   2403 		return;
   2404 	}
   2405 
   2406 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) {
   2407 		adapter->optics = IFM_1000_SX;
   2408 		return;
   2409 	}
   2410 
   2411 	if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR |
   2412 	    IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) {
   2413 		adapter->optics = IFM_10G_LR;
   2414 		return;
   2415 	}
   2416 
   2417 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
   2418 		adapter->optics = IFM_10G_SR;
   2419 		return;
   2420 	}
   2421 
   2422 	if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) {
   2423 		adapter->optics = IFM_10G_TWINAX;
   2424 		return;
   2425 	}
   2426 
   2427 	if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 |
   2428 	    IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) {
   2429 		adapter->optics = IFM_10G_CX4;
   2430 		return;
   2431 	}
   2432 
   2433 	/* If we get here just set the default */
   2434 	adapter->optics = IFM_ETHER | IFM_AUTO;
   2435 	return;
   2436 }
   2437 
   2438 /*********************************************************************
   2439  *
   2440  *  Setup the Legacy or MSI Interrupt handler
   2441  *
   2442  **********************************************************************/
   2443 static int
   2444 ixgbe_allocate_legacy(struct adapter *adapter, const struct pci_attach_args *pa)
   2445 {
   2446 	device_t	dev = adapter->dev;
   2447 	struct		ix_queue *que = adapter->queues;
   2448 #ifndef IXGBE_LEGACY_TX
   2449 	struct tx_ring		*txr = adapter->tx_rings;
   2450 #endif
   2451 	char intrbuf[PCI_INTRSTR_LEN];
   2452 #if 0
   2453 	int		rid = 0;
   2454 
   2455 	/* MSI RID at 1 */
   2456 	if (adapter->msix == 1)
   2457 		rid = 1;
   2458 #endif
   2459 
   2460 	/* We allocate a single interrupt resource */
   2461  	if (pci_intr_map(pa, &adapter->osdep.ih) != 0) {
   2462 		aprint_error_dev(dev, "unable to map interrupt\n");
   2463 		return ENXIO;
   2464 	} else {
   2465 		aprint_normal_dev(dev, "interrupting at %s\n",
   2466 		    pci_intr_string(adapter->osdep.pc, adapter->osdep.ih,
   2467 			intrbuf, sizeof(intrbuf)));
   2468 	}
   2469 
   2470 	/*
   2471 	 * Try allocating a fast interrupt and the associated deferred
   2472 	 * processing contexts.
   2473 	 */
   2474 #ifndef IXGBE_LEGACY_TX
   2475 	txr->txq_si = softint_establish(SOFTINT_NET, ixgbe_deferred_mq_start,
   2476 	    txr);
   2477 #endif
   2478 	que->que_si = softint_establish(SOFTINT_NET, ixgbe_handle_que, que);
   2479 
   2480 	/* Tasklets for Link, SFP and Multispeed Fiber */
   2481 	adapter->link_si =
   2482 	    softint_establish(SOFTINT_NET, ixgbe_handle_link, adapter);
   2483 	adapter->mod_si =
   2484 	    softint_establish(SOFTINT_NET, ixgbe_handle_mod, adapter);
   2485 	adapter->msf_si =
   2486 	    softint_establish(SOFTINT_NET, ixgbe_handle_msf, adapter);
   2487 
   2488 #ifdef IXGBE_FDIR
   2489 	adapter->fdir_si =
   2490 	    softint_establish(SOFTINT_NET, ixgbe_reinit_fdir, adapter);
   2491 #endif
   2492 	if (que->que_si == NULL ||
   2493 	    adapter->link_si == NULL ||
   2494 	    adapter->mod_si == NULL ||
   2495 #ifdef IXGBE_FDIR
   2496 	    adapter->fdir_si == NULL ||
   2497 #endif
   2498 	    adapter->msf_si == NULL) {
   2499 		aprint_error_dev(dev,
   2500 		    "could not establish software interrupts\n");
   2501 		return ENXIO;
   2502 	}
   2503 
   2504 	adapter->osdep.intr = pci_intr_establish(adapter->osdep.pc,
   2505 	    adapter->osdep.ih, IPL_NET, ixgbe_legacy_irq, que);
   2506 	if (adapter->osdep.intr == NULL) {
   2507 		aprint_error_dev(dev, "failed to register interrupt handler\n");
   2508 		softint_disestablish(que->que_si);
   2509 		softint_disestablish(adapter->link_si);
   2510 		softint_disestablish(adapter->mod_si);
   2511 		softint_disestablish(adapter->msf_si);
   2512 #ifdef IXGBE_FDIR
   2513 		softint_disestablish(adapter->fdir_si);
   2514 #endif
   2515 		return ENXIO;
   2516 	}
   2517 	/* For simplicity in the handlers */
   2518 	adapter->que_mask = IXGBE_EIMS_ENABLE_MASK;
   2519 
   2520 	return (0);
   2521 }
   2522 
   2523 
   2524 /*********************************************************************
   2525  *
   2526  *  Setup MSIX Interrupt resources and handlers
   2527  *
   2528  **********************************************************************/
   2529 static int
   2530 ixgbe_allocate_msix(struct adapter *adapter, const struct pci_attach_args *pa)
   2531 {
   2532 #if !defined(NETBSD_MSI_OR_MSIX)
   2533 	return 0;
   2534 #else
   2535 	device_t        dev = adapter->dev;
   2536 	struct 		ix_queue *que = adapter->queues;
   2537 	struct  	tx_ring *txr = adapter->tx_rings;
   2538 	int 		error, rid, vector = 0;
   2539 
   2540 	for (int i = 0; i < adapter->num_queues; i++, vector++, que++, txr++) {
   2541 		rid = vector + 1;
   2542 		que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
   2543 		    RF_SHAREABLE | RF_ACTIVE);
   2544 		if (que->res == NULL) {
   2545 			aprint_error_dev(dev,"Unable to allocate"
   2546 		    	    " bus resource: que interrupt [%d]\n", vector);
   2547 			return (ENXIO);
   2548 		}
   2549 		/* Set the handler function */
   2550 		error = bus_setup_intr(dev, que->res,
   2551 		    INTR_TYPE_NET | INTR_MPSAFE, NULL,
   2552 		    ixgbe_msix_que, que, &que->tag);
   2553 		if (error) {
   2554 			que->res = NULL;
   2555 			aprint_error_dev(dev,
   2556 			    "Failed to register QUE handler\n");
   2557 			return error;
   2558 		}
   2559 #if __FreeBSD_version >= 800504
   2560 		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
   2561 #endif
   2562 		que->msix = vector;
   2563         	adapter->que_mask |= (u64)(1 << que->msix);
   2564 		/*
   2565 		** Bind the msix vector, and thus the
   2566 		** ring to the corresponding cpu.
   2567 		*/
   2568 		if (adapter->num_queues > 1)
   2569 			bus_bind_intr(dev, que->res, i);
   2570 
   2571 #ifndef IXGBE_LEGACY_TX
   2572 		txr->txq_si = softint_establish(SOFTINT_NET,
   2573 		    ixgbe_deferred_mq_start, txr);
   2574 #endif
   2575 		que->que_si = softint_establish(SOFTINT_NET, ixgbe_handle_que,
   2576 		    que);
   2577 		if (que->que_si == NULL) {
   2578 			aprint_error_dev(dev,
   2579 			    "could not establish software interrupt\n");
   2580 		}
   2581 	}
   2582 
   2583 	/* and Link */
   2584 	rid = vector + 1;
   2585 	adapter->res = bus_alloc_resource_any(dev,
   2586     	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
   2587 	if (!adapter->res) {
   2588 		aprint_error_dev(dev,"Unable to allocate bus resource: "
   2589 		    "Link interrupt [%d]\n", rid);
   2590 		return (ENXIO);
   2591 	}
   2592 	/* Set the link handler function */
   2593 	error = bus_setup_intr(dev, adapter->res,
   2594 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
   2595 	    ixgbe_msix_link, adapter, &adapter->tag);
   2596 	if (error) {
   2597 		adapter->res = NULL;
   2598 		aprint_error_dev(dev, "Failed to register LINK handler\n");
   2599 		return (error);
   2600 	}
   2601 #if __FreeBSD_version >= 800504
   2602 	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
   2603 #endif
   2604 	adapter->linkvec = vector;
   2605 	/* Tasklets for Link, SFP and Multispeed Fiber */
   2606 	adapter->link_si =
   2607 	    softint_establish(SOFTINT_NET, ixgbe_handle_link, adapter);
   2608 	adapter->mod_si =
   2609 	    softint_establish(SOFTINT_NET, ixgbe_handle_mod, adapter);
   2610 	adapter->msf_si =
   2611 	    softint_establish(SOFTINT_NET, ixgbe_handle_msf, adapter);
   2612 #ifdef IXGBE_FDIR
   2613 	adapter->fdir_si =
   2614 	    softint_establish(SOFTINT_NET, ixgbe_reinit_fdir, adapter);
   2615 #endif
   2616 
   2617 	return (0);
   2618 #endif
   2619 }
   2620 
   2621 /*
   2622  * Setup Either MSI/X or MSI
   2623  */
   2624 static int
   2625 ixgbe_setup_msix(struct adapter *adapter)
   2626 {
   2627 #if !defined(NETBSD_MSI_OR_MSIX)
   2628 	return 0;
   2629 #else
   2630 	device_t dev = adapter->dev;
   2631 	int rid, want, queues, msgs;
   2632 
   2633 	/* Override by tuneable */
   2634 	if (ixgbe_enable_msix == 0)
   2635 		goto msi;
   2636 
   2637 	/* First try MSI/X */
   2638 	rid = PCI_BAR(MSIX_82598_BAR);
   2639 	adapter->msix_mem = bus_alloc_resource_any(dev,
   2640 	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
   2641        	if (!adapter->msix_mem) {
   2642 		rid += 4;	/* 82599 maps in higher BAR */
   2643 		adapter->msix_mem = bus_alloc_resource_any(dev,
   2644 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
   2645 	}
   2646        	if (!adapter->msix_mem) {
   2647 		/* May not be enabled */
   2648 		device_printf(adapter->dev,
   2649 		    "Unable to map MSIX table \n");
   2650 		goto msi;
   2651 	}
   2652 
   2653 	msgs = pci_msix_count(dev);
   2654 	if (msgs == 0) { /* system has msix disabled */
   2655 		bus_release_resource(dev, SYS_RES_MEMORY,
   2656 		    rid, adapter->msix_mem);
   2657 		adapter->msix_mem = NULL;
   2658 		goto msi;
   2659 	}
   2660 
   2661 	/* Figure out a reasonable auto config value */
   2662 	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
   2663 
   2664 	if (ixgbe_num_queues != 0)
   2665 		queues = ixgbe_num_queues;
   2666 	/* Set max queues to 8 when autoconfiguring */
   2667 	else if ((ixgbe_num_queues == 0) && (queues > 8))
   2668 		queues = 8;
   2669 
   2670 	/*
   2671 	** Want one vector (RX/TX pair) per queue
   2672 	** plus an additional for Link.
   2673 	*/
   2674 	want = queues + 1;
   2675 	if (msgs >= want)
   2676 		msgs = want;
   2677 	else {
   2678                	device_printf(adapter->dev,
   2679 		    "MSIX Configuration Problem, "
   2680 		    "%d vectors but %d queues wanted!\n",
   2681 		    msgs, want);
   2682 		return (0); /* Will go to Legacy setup */
   2683 	}
   2684 	if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
   2685                	device_printf(adapter->dev,
   2686 		    "Using MSIX interrupts with %d vectors\n", msgs);
   2687 		adapter->num_queues = queues;
   2688 		return (msgs);
   2689 	}
   2690 msi:
   2691        	msgs = pci_msi_count(dev);
   2692        	if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
   2693                	device_printf(adapter->dev,"Using an MSI interrupt\n");
   2694 	else
   2695                	device_printf(adapter->dev,"Using a Legacy interrupt\n");
   2696 	return (msgs);
   2697 #endif
   2698 }
   2699 
   2700 
   2701 static int
   2702 ixgbe_allocate_pci_resources(struct adapter *adapter, const struct pci_attach_args *pa)
   2703 {
   2704 	pcireg_t	memtype;
   2705 	device_t        dev = adapter->dev;
   2706 	bus_addr_t addr;
   2707 	int flags;
   2708 
   2709 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, PCI_BAR(0));
   2710 	switch (memtype) {
   2711 	case PCI_MAPREG_TYPE_MEM | PCI_MAPREG_MEM_TYPE_32BIT:
   2712 	case PCI_MAPREG_TYPE_MEM | PCI_MAPREG_MEM_TYPE_64BIT:
   2713 		adapter->osdep.mem_bus_space_tag = pa->pa_memt;
   2714 		if (pci_mapreg_info(pa->pa_pc, pa->pa_tag, PCI_BAR(0),
   2715 	              memtype, &addr, &adapter->osdep.mem_size, &flags) != 0)
   2716 			goto map_err;
   2717 		if ((flags & BUS_SPACE_MAP_PREFETCHABLE) != 0) {
   2718 			aprint_normal_dev(dev, "clearing prefetchable bit\n");
   2719 			flags &= ~BUS_SPACE_MAP_PREFETCHABLE;
   2720 		}
   2721 		if (bus_space_map(adapter->osdep.mem_bus_space_tag, addr,
   2722 		     adapter->osdep.mem_size, flags,
   2723 		     &adapter->osdep.mem_bus_space_handle) != 0) {
   2724 map_err:
   2725 			adapter->osdep.mem_size = 0;
   2726 			aprint_error_dev(dev, "unable to map BAR0\n");
   2727 			return ENXIO;
   2728 		}
   2729 		break;
   2730 	default:
   2731 		aprint_error_dev(dev, "unexpected type on BAR0\n");
   2732 		return ENXIO;
   2733 	}
   2734 
   2735 	/* Legacy defaults */
   2736 	adapter->num_queues = 1;
   2737 	adapter->hw.back = &adapter->osdep;
   2738 
   2739 	/*
   2740 	** Now setup MSI or MSI/X, should
   2741 	** return us the number of supported
   2742 	** vectors. (Will be 1 for MSI)
   2743 	*/
   2744 	adapter->msix = ixgbe_setup_msix(adapter);
   2745 	return (0);
   2746 }
   2747 
   2748 static void
   2749 ixgbe_free_pci_resources(struct adapter * adapter)
   2750 {
   2751 #if defined(NETBSD_MSI_OR_MSIX)
   2752 	struct 		ix_queue *que = adapter->queues;
   2753 	device_t	dev = adapter->dev;
   2754 #endif
   2755 	int		rid;
   2756 
   2757 #if defined(NETBSD_MSI_OR_MSIX)
   2758 	int		 memrid;
   2759 	if (adapter->hw.mac.type == ixgbe_mac_82598EB)
   2760 		memrid = PCI_BAR(MSIX_82598_BAR);
   2761 	else
   2762 		memrid = PCI_BAR(MSIX_82599_BAR);
   2763 
   2764 	/*
   2765 	** There is a slight possibility of a failure mode
   2766 	** in attach that will result in entering this function
   2767 	** before interrupt resources have been initialized, and
   2768 	** in that case we do not want to execute the loops below
   2769 	** We can detect this reliably by the state of the adapter
   2770 	** res pointer.
   2771 	*/
   2772 	if (adapter->res == NULL)
   2773 		goto mem;
   2774 
   2775 	/*
   2776 	**  Release all msix queue resources:
   2777 	*/
   2778 	for (int i = 0; i < adapter->num_queues; i++, que++) {
   2779 		rid = que->msix + 1;
   2780 		if (que->tag != NULL) {
   2781 			bus_teardown_intr(dev, que->res, que->tag);
   2782 			que->tag = NULL;
   2783 		}
   2784 		if (que->res != NULL)
   2785 			bus_release_resource(dev, SYS_RES_IRQ, rid, que->res);
   2786 	}
   2787 #endif
   2788 
   2789 	/* Clean the Legacy or Link interrupt last */
   2790 	if (adapter->linkvec) /* we are doing MSIX */
   2791 		rid = adapter->linkvec + 1;
   2792 	else
   2793 		(adapter->msix != 0) ? (rid = 1):(rid = 0);
   2794 
   2795 	pci_intr_disestablish(adapter->osdep.pc, adapter->osdep.intr);
   2796 	adapter->osdep.intr = NULL;
   2797 
   2798 #if defined(NETBSD_MSI_OR_MSIX)
   2799 mem:
   2800 	if (adapter->msix)
   2801 		pci_release_msi(dev);
   2802 
   2803 	if (adapter->msix_mem != NULL)
   2804 		bus_release_resource(dev, SYS_RES_MEMORY,
   2805 		    memrid, adapter->msix_mem);
   2806 #endif
   2807 
   2808 	if (adapter->osdep.mem_size != 0) {
   2809 		bus_space_unmap(adapter->osdep.mem_bus_space_tag,
   2810 		    adapter->osdep.mem_bus_space_handle,
   2811 		    adapter->osdep.mem_size);
   2812 	}
   2813 
   2814 	return;
   2815 }
   2816 
   2817 /*********************************************************************
   2818  *
   2819  *  Setup networking device structure and register an interface.
   2820  *
   2821  **********************************************************************/
   2822 static int
   2823 ixgbe_setup_interface(device_t dev, struct adapter *adapter)
   2824 {
   2825 	struct ethercom *ec = &adapter->osdep.ec;
   2826 	struct ixgbe_hw *hw = &adapter->hw;
   2827 	struct ifnet   *ifp;
   2828 
   2829 	INIT_DEBUGOUT("ixgbe_setup_interface: begin");
   2830 
   2831 	ifp = adapter->ifp = &ec->ec_if;
   2832 	strlcpy(ifp->if_xname, device_xname(dev), IFNAMSIZ);
   2833 	ifp->if_baudrate = IF_Gbps(10);
   2834 	ifp->if_init = ixgbe_init;
   2835 	ifp->if_stop = ixgbe_ifstop;
   2836 	ifp->if_softc = adapter;
   2837 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
   2838 	ifp->if_ioctl = ixgbe_ioctl;
   2839 #ifndef IXGBE_LEGACY_TX
   2840 	ifp->if_transmit = ixgbe_mq_start;
   2841 	ifp->if_qflush = ixgbe_qflush;
   2842 #else
   2843 	ifp->if_start = ixgbe_start;
   2844 	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 2);
   2845 #endif
   2846 
   2847 	if_attach(ifp);
   2848 	ether_ifattach(ifp, adapter->hw.mac.addr);
   2849 	ether_set_ifflags_cb(ec, ixgbe_ifflags_cb);
   2850 
   2851 	adapter->max_frame_size =
   2852 	    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
   2853 
   2854 	/*
   2855 	 * Tell the upper layer(s) we support long frames.
   2856 	 */
   2857 	ifp->if_hdrlen = sizeof(struct ether_vlan_header);
   2858 
   2859 	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSOv4 | IFCAP_TSOv6;
   2860 	ifp->if_capenable = 0;
   2861 
   2862 	ec->ec_capabilities |= ETHERCAP_VLAN_HWCSUM;
   2863 	ec->ec_capabilities |= ETHERCAP_JUMBO_MTU;
   2864 	ifp->if_capabilities |= IFCAP_LRO;
   2865 	ec->ec_capabilities |= ETHERCAP_VLAN_HWTAGGING
   2866 	    		    | ETHERCAP_VLAN_MTU;
   2867 	ec->ec_capenable = ec->ec_capabilities;
   2868 
   2869 	/*
   2870 	** Don't turn this on by default, if vlans are
   2871 	** created on another pseudo device (eg. lagg)
   2872 	** then vlan events are not passed thru, breaking
   2873 	** operation, but with HW FILTER off it works. If
   2874 	** using vlans directly on the ixgbe driver you can
   2875 	** enable this and get full hardware tag filtering.
   2876 	*/
   2877 	ec->ec_capabilities |= ETHERCAP_VLAN_HWFILTER;
   2878 
   2879 	/*
   2880 	 * Specify the media types supported by this adapter and register
   2881 	 * callbacks to update media and link information
   2882 	 */
   2883 	ifmedia_init(&adapter->media, IFM_IMASK, ixgbe_media_change,
   2884 		     ixgbe_media_status);
   2885 	ifmedia_add(&adapter->media, IFM_ETHER | adapter->optics, 0, NULL);
   2886 	ifmedia_set(&adapter->media, IFM_ETHER | adapter->optics);
   2887 	if (hw->device_id == IXGBE_DEV_ID_82598AT) {
   2888 		ifmedia_add(&adapter->media,
   2889 		    IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
   2890 		ifmedia_add(&adapter->media,
   2891 		    IFM_ETHER | IFM_1000_T, 0, NULL);
   2892 	}
   2893 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
   2894 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
   2895 
   2896 	return (0);
   2897 }
   2898 
   2899 static void
   2900 ixgbe_config_link(struct adapter *adapter)
   2901 {
   2902 	struct ixgbe_hw *hw = &adapter->hw;
   2903 	u32	autoneg, err = 0;
   2904 	bool	sfp, negotiate;
   2905 
   2906 	sfp = ixgbe_is_sfp(hw);
   2907 
   2908 	if (sfp) {
   2909 		void *ip;
   2910 
   2911 		if (hw->phy.multispeed_fiber) {
   2912 			hw->mac.ops.setup_sfp(hw);
   2913 			ixgbe_enable_tx_laser(hw);
   2914 			ip = adapter->msf_si;
   2915 		} else {
   2916 			ip = adapter->mod_si;
   2917 		}
   2918 
   2919 		kpreempt_disable();
   2920 		softint_schedule(ip);
   2921 		kpreempt_enable();
   2922 	} else {
   2923 		if (hw->mac.ops.check_link)
   2924 			err = ixgbe_check_link(hw, &adapter->link_speed,
   2925 			    &adapter->link_up, FALSE);
   2926 		if (err)
   2927 			goto out;
   2928 		autoneg = hw->phy.autoneg_advertised;
   2929 		if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
   2930                 	err  = hw->mac.ops.get_link_capabilities(hw,
   2931 			    &autoneg, &negotiate);
   2932 		else
   2933 			negotiate = 0;
   2934 		if (err)
   2935 			goto out;
   2936 		if (hw->mac.ops.setup_link)
   2937                 	err = hw->mac.ops.setup_link(hw,
   2938 			    autoneg, adapter->link_up);
   2939 	}
   2940 out:
   2941 	return;
   2942 }
   2943 
   2944 /********************************************************************
   2945  * Manage DMA'able memory.
   2946  *******************************************************************/
   2947 
   2948 static int
   2949 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
   2950 		struct ixgbe_dma_alloc *dma, const int mapflags)
   2951 {
   2952 	device_t dev = adapter->dev;
   2953 	int             r, rsegs;
   2954 
   2955 	r = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
   2956 			       DBA_ALIGN, 0,	/* alignment, bounds */
   2957 			       size,	/* maxsize */
   2958 			       1,	/* nsegments */
   2959 			       size,	/* maxsegsize */
   2960 			       BUS_DMA_ALLOCNOW,	/* flags */
   2961 			       &dma->dma_tag);
   2962 	if (r != 0) {
   2963 		aprint_error_dev(dev,
   2964 		    "%s: ixgbe_dma_tag_create failed; error %d\n", __func__, r);
   2965 		goto fail_0;
   2966 	}
   2967 
   2968 	r = bus_dmamem_alloc(dma->dma_tag->dt_dmat,
   2969 		size,
   2970 		dma->dma_tag->dt_alignment,
   2971 		dma->dma_tag->dt_boundary,
   2972 		&dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
   2973 	if (r != 0) {
   2974 		aprint_error_dev(dev,
   2975 		    "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
   2976 		goto fail_1;
   2977 	}
   2978 
   2979 	r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
   2980 	    size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
   2981 	if (r != 0) {
   2982 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2983 		    __func__, r);
   2984 		goto fail_2;
   2985 	}
   2986 
   2987 	r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
   2988 	if (r != 0) {
   2989 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2990 		    __func__, r);
   2991 		goto fail_3;
   2992 	}
   2993 
   2994 	r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map, dma->dma_vaddr,
   2995 			    size,
   2996 			    NULL,
   2997 			    mapflags | BUS_DMA_NOWAIT);
   2998 	if (r != 0) {
   2999 		aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
   3000 		    __func__, r);
   3001 		goto fail_4;
   3002 	}
   3003 	dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
   3004 	dma->dma_size = size;
   3005 	return 0;
   3006 fail_4:
   3007 	ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
   3008 fail_3:
   3009 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
   3010 fail_2:
   3011 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
   3012 fail_1:
   3013 	ixgbe_dma_tag_destroy(dma->dma_tag);
   3014 fail_0:
   3015 	return r;
   3016 }
   3017 
   3018 static void
   3019 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
   3020 {
   3021 	bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
   3022 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   3023 	ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
   3024 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
   3025 	ixgbe_dma_tag_destroy(dma->dma_tag);
   3026 }
   3027 
   3028 
   3029 /*********************************************************************
   3030  *
   3031  *  Allocate memory for the transmit and receive rings, and then
   3032  *  the descriptors associated with each, called only once at attach.
   3033  *
   3034  **********************************************************************/
   3035 static int
   3036 ixgbe_allocate_queues(struct adapter *adapter)
   3037 {
   3038 	device_t	dev = adapter->dev;
   3039 	struct ix_queue	*que;
   3040 	struct tx_ring	*txr;
   3041 	struct rx_ring	*rxr;
   3042 	int rsize, tsize, error = IXGBE_SUCCESS;
   3043 	int txconf = 0, rxconf = 0;
   3044 
   3045         /* First allocate the top level queue structs */
   3046         if (!(adapter->queues =
   3047             (struct ix_queue *) malloc(sizeof(struct ix_queue) *
   3048             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   3049                 aprint_error_dev(dev, "Unable to allocate queue memory\n");
   3050                 error = ENOMEM;
   3051                 goto fail;
   3052         }
   3053 
   3054 	/* First allocate the TX ring struct memory */
   3055 	if (!(adapter->tx_rings =
   3056 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
   3057 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   3058 		aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
   3059 		error = ENOMEM;
   3060 		goto tx_fail;
   3061 	}
   3062 
   3063 	/* Next allocate the RX */
   3064 	if (!(adapter->rx_rings =
   3065 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
   3066 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   3067 		aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
   3068 		error = ENOMEM;
   3069 		goto rx_fail;
   3070 	}
   3071 
   3072 	/* For the ring itself */
   3073 	tsize = roundup2(adapter->num_tx_desc *
   3074 	    sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
   3075 
   3076 	/*
   3077 	 * Now set up the TX queues, txconf is needed to handle the
   3078 	 * possibility that things fail midcourse and we need to
   3079 	 * undo memory gracefully
   3080 	 */
   3081 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
   3082 		/* Set up some basics */
   3083 		txr = &adapter->tx_rings[i];
   3084 		txr->adapter = adapter;
   3085 		txr->me = i;
   3086 		txr->num_desc = adapter->num_tx_desc;
   3087 
   3088 		/* Initialize the TX side lock */
   3089 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
   3090 		    device_xname(dev), txr->me);
   3091 		mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
   3092 
   3093 		if (ixgbe_dma_malloc(adapter, tsize,
   3094 			&txr->txdma, BUS_DMA_NOWAIT)) {
   3095 			aprint_error_dev(dev,
   3096 			    "Unable to allocate TX Descriptor memory\n");
   3097 			error = ENOMEM;
   3098 			goto err_tx_desc;
   3099 		}
   3100 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
   3101 		bzero((void *)txr->tx_base, tsize);
   3102 
   3103         	/* Now allocate transmit buffers for the ring */
   3104         	if (ixgbe_allocate_transmit_buffers(txr)) {
   3105 			aprint_error_dev(dev,
   3106 			    "Critical Failure setting up transmit buffers\n");
   3107 			error = ENOMEM;
   3108 			goto err_tx_desc;
   3109         	}
   3110 #ifndef IXGBE_LEGACY_TX
   3111 		/* Allocate a buf ring */
   3112 		txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
   3113 		    M_WAITOK, &txr->tx_mtx);
   3114 		if (txr->br == NULL) {
   3115 			aprint_error_dev(dev,
   3116 			    "Critical Failure setting up buf ring\n");
   3117 			error = ENOMEM;
   3118 			goto err_tx_desc;
   3119         	}
   3120 #endif
   3121 	}
   3122 
   3123 	/*
   3124 	 * Next the RX queues...
   3125 	 */
   3126 	rsize = roundup2(adapter->num_rx_desc *
   3127 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   3128 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
   3129 		rxr = &adapter->rx_rings[i];
   3130 		/* Set up some basics */
   3131 		rxr->adapter = adapter;
   3132 		rxr->me = i;
   3133 		rxr->num_desc = adapter->num_rx_desc;
   3134 
   3135 		/* Initialize the RX side lock */
   3136 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
   3137 		    device_xname(dev), rxr->me);
   3138 		mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
   3139 
   3140 		if (ixgbe_dma_malloc(adapter, rsize,
   3141 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
   3142 			aprint_error_dev(dev,
   3143 			    "Unable to allocate RxDescriptor memory\n");
   3144 			error = ENOMEM;
   3145 			goto err_rx_desc;
   3146 		}
   3147 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
   3148 		bzero((void *)rxr->rx_base, rsize);
   3149 
   3150         	/* Allocate receive buffers for the ring*/
   3151 		if (ixgbe_allocate_receive_buffers(rxr)) {
   3152 			aprint_error_dev(dev,
   3153 			    "Critical Failure setting up receive buffers\n");
   3154 			error = ENOMEM;
   3155 			goto err_rx_desc;
   3156 		}
   3157 	}
   3158 
   3159 	/*
   3160 	** Finally set up the queue holding structs
   3161 	*/
   3162 	for (int i = 0; i < adapter->num_queues; i++) {
   3163 		que = &adapter->queues[i];
   3164 		que->adapter = adapter;
   3165 		que->txr = &adapter->tx_rings[i];
   3166 		que->rxr = &adapter->rx_rings[i];
   3167 	}
   3168 
   3169 	return (0);
   3170 
   3171 err_rx_desc:
   3172 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
   3173 		ixgbe_dma_free(adapter, &rxr->rxdma);
   3174 err_tx_desc:
   3175 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
   3176 		ixgbe_dma_free(adapter, &txr->txdma);
   3177 	free(adapter->rx_rings, M_DEVBUF);
   3178 rx_fail:
   3179 	free(adapter->tx_rings, M_DEVBUF);
   3180 tx_fail:
   3181 	free(adapter->queues, M_DEVBUF);
   3182 fail:
   3183 	return (error);
   3184 }
   3185 
   3186 /*********************************************************************
   3187  *
   3188  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
   3189  *  the information needed to transmit a packet on the wire. This is
   3190  *  called only once at attach, setup is done every reset.
   3191  *
   3192  **********************************************************************/
   3193 static int
   3194 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
   3195 {
   3196 	struct adapter *adapter = txr->adapter;
   3197 	device_t dev = adapter->dev;
   3198 	struct ixgbe_tx_buf *txbuf;
   3199 	int error, i;
   3200 
   3201 	/*
   3202 	 * Setup DMA descriptor areas.
   3203 	 */
   3204 	if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
   3205 			       1, 0,		/* alignment, bounds */
   3206 			       IXGBE_TSO_SIZE,		/* maxsize */
   3207 			       adapter->num_segs,	/* nsegments */
   3208 			       PAGE_SIZE,		/* maxsegsize */
   3209 			       0,			/* flags */
   3210 			       &txr->txtag))) {
   3211 		aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
   3212 		goto fail;
   3213 	}
   3214 
   3215 	if (!(txr->tx_buffers =
   3216 	    (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
   3217 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   3218 		aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
   3219 		error = ENOMEM;
   3220 		goto fail;
   3221 	}
   3222 
   3223         /* Create the descriptor buffer dma maps */
   3224 	txbuf = txr->tx_buffers;
   3225 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
   3226 		error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
   3227 		if (error != 0) {
   3228 			aprint_error_dev(dev,
   3229 			    "Unable to create TX DMA map (%d)\n", error);
   3230 			goto fail;
   3231 		}
   3232 	}
   3233 
   3234 	return 0;
   3235 fail:
   3236 	/* We free all, it handles case where we are in the middle */
   3237 	ixgbe_free_transmit_structures(adapter);
   3238 	return (error);
   3239 }
   3240 
   3241 /*********************************************************************
   3242  *
   3243  *  Initialize a transmit ring.
   3244  *
   3245  **********************************************************************/
   3246 static void
   3247 ixgbe_setup_transmit_ring(struct tx_ring *txr)
   3248 {
   3249 	struct adapter *adapter = txr->adapter;
   3250 	struct ixgbe_tx_buf *txbuf;
   3251 	int i;
   3252 #ifdef DEV_NETMAP
   3253 	struct netmap_adapter *na = NA(adapter->ifp);
   3254 	struct netmap_slot *slot;
   3255 #endif /* DEV_NETMAP */
   3256 
   3257 	/* Clear the old ring contents */
   3258 	IXGBE_TX_LOCK(txr);
   3259 #ifdef DEV_NETMAP
   3260 	/*
   3261 	 * (under lock): if in netmap mode, do some consistency
   3262 	 * checks and set slot to entry 0 of the netmap ring.
   3263 	 */
   3264 	slot = netmap_reset(na, NR_TX, txr->me, 0);
   3265 #endif /* DEV_NETMAP */
   3266 	bzero((void *)txr->tx_base,
   3267 	      (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
   3268 	/* Reset indices */
   3269 	txr->next_avail_desc = 0;
   3270 	txr->next_to_clean = 0;
   3271 
   3272 	/* Free any existing tx buffers. */
   3273         txbuf = txr->tx_buffers;
   3274 	for (i = 0; i < txr->num_desc; i++, txbuf++) {
   3275 		if (txbuf->m_head != NULL) {
   3276 			bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
   3277 			    0, txbuf->m_head->m_pkthdr.len,
   3278 			    BUS_DMASYNC_POSTWRITE);
   3279 			ixgbe_dmamap_unload(txr->txtag, txbuf->map);
   3280 			m_freem(txbuf->m_head);
   3281 			txbuf->m_head = NULL;
   3282 		}
   3283 #ifdef DEV_NETMAP
   3284 		/*
   3285 		 * In netmap mode, set the map for the packet buffer.
   3286 		 * NOTE: Some drivers (not this one) also need to set
   3287 		 * the physical buffer address in the NIC ring.
   3288 		 * Slots in the netmap ring (indexed by "si") are
   3289 		 * kring->nkr_hwofs positions "ahead" wrt the
   3290 		 * corresponding slot in the NIC ring. In some drivers
   3291 		 * (not here) nkr_hwofs can be negative. Function
   3292 		 * netmap_idx_n2k() handles wraparounds properly.
   3293 		 */
   3294 		if (slot) {
   3295 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
   3296 			netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
   3297 		}
   3298 #endif /* DEV_NETMAP */
   3299 		/* Clear the EOP descriptor pointer */
   3300 		txbuf->eop = NULL;
   3301         }
   3302 
   3303 #ifdef IXGBE_FDIR
   3304 	/* Set the rate at which we sample packets */
   3305 	if (adapter->hw.mac.type != ixgbe_mac_82598EB)
   3306 		txr->atr_sample = atr_sample_rate;
   3307 #endif
   3308 
   3309 	/* Set number of descriptors available */
   3310 	txr->tx_avail = adapter->num_tx_desc;
   3311 
   3312 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   3313 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   3314 	IXGBE_TX_UNLOCK(txr);
   3315 }
   3316 
   3317 /*********************************************************************
   3318  *
   3319  *  Initialize all transmit rings.
   3320  *
   3321  **********************************************************************/
   3322 static int
   3323 ixgbe_setup_transmit_structures(struct adapter *adapter)
   3324 {
   3325 	struct tx_ring *txr = adapter->tx_rings;
   3326 
   3327 	for (int i = 0; i < adapter->num_queues; i++, txr++)
   3328 		ixgbe_setup_transmit_ring(txr);
   3329 
   3330 	return (0);
   3331 }
   3332 
   3333 /*********************************************************************
   3334  *
   3335  *  Enable transmit unit.
   3336  *
   3337  **********************************************************************/
   3338 static void
   3339 ixgbe_initialize_transmit_units(struct adapter *adapter)
   3340 {
   3341 	struct tx_ring	*txr = adapter->tx_rings;
   3342 	struct ixgbe_hw	*hw = &adapter->hw;
   3343 
   3344 	/* Setup the Base and Length of the Tx Descriptor Ring */
   3345 
   3346 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
   3347 		u64	tdba = txr->txdma.dma_paddr;
   3348 		u32	txctrl;
   3349 
   3350 		IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i),
   3351 		       (tdba & 0x00000000ffffffffULL));
   3352 		IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (tdba >> 32));
   3353 		IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
   3354 		    adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc));
   3355 
   3356 		/* Setup the HW Tx Head and Tail descriptor pointers */
   3357 		IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
   3358 		IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
   3359 
   3360 		/* Setup Transmit Descriptor Cmd Settings */
   3361 		txr->txd_cmd = IXGBE_TXD_CMD_IFCS;
   3362 		txr->queue_status = IXGBE_QUEUE_IDLE;
   3363 
   3364 		/* Set the processing limit */
   3365 		txr->process_limit = ixgbe_tx_process_limit;
   3366 
   3367 		/* Disable Head Writeback */
   3368 		switch (hw->mac.type) {
   3369 		case ixgbe_mac_82598EB:
   3370 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
   3371 			break;
   3372 		case ixgbe_mac_82599EB:
   3373 		case ixgbe_mac_X540:
   3374 		default:
   3375 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
   3376 			break;
   3377                 }
   3378 		txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
   3379 		switch (hw->mac.type) {
   3380 		case ixgbe_mac_82598EB:
   3381 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
   3382 			break;
   3383 		case ixgbe_mac_82599EB:
   3384 		case ixgbe_mac_X540:
   3385 		default:
   3386 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
   3387 			break;
   3388 		}
   3389 
   3390 	}
   3391 
   3392 	if (hw->mac.type != ixgbe_mac_82598EB) {
   3393 		u32 dmatxctl, rttdcs;
   3394 		dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
   3395 		dmatxctl |= IXGBE_DMATXCTL_TE;
   3396 		IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
   3397 		/* Disable arbiter to set MTQC */
   3398 		rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
   3399 		rttdcs |= IXGBE_RTTDCS_ARBDIS;
   3400 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
   3401 		IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
   3402 		rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
   3403 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
   3404 	}
   3405 
   3406 	return;
   3407 }
   3408 
   3409 /*********************************************************************
   3410  *
   3411  *  Free all transmit rings.
   3412  *
   3413  **********************************************************************/
   3414 static void
   3415 ixgbe_free_transmit_structures(struct adapter *adapter)
   3416 {
   3417 	struct tx_ring *txr = adapter->tx_rings;
   3418 
   3419 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
   3420 		ixgbe_free_transmit_buffers(txr);
   3421 		ixgbe_dma_free(adapter, &txr->txdma);
   3422 		IXGBE_TX_LOCK_DESTROY(txr);
   3423 	}
   3424 	free(adapter->tx_rings, M_DEVBUF);
   3425 }
   3426 
   3427 /*********************************************************************
   3428  *
   3429  *  Free transmit ring related data structures.
   3430  *
   3431  **********************************************************************/
   3432 static void
   3433 ixgbe_free_transmit_buffers(struct tx_ring *txr)
   3434 {
   3435 	struct adapter *adapter = txr->adapter;
   3436 	struct ixgbe_tx_buf *tx_buffer;
   3437 	int             i;
   3438 
   3439 	INIT_DEBUGOUT("free_transmit_ring: begin");
   3440 
   3441 	if (txr->tx_buffers == NULL)
   3442 		return;
   3443 
   3444 	tx_buffer = txr->tx_buffers;
   3445 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
   3446 		if (tx_buffer->m_head != NULL) {
   3447 			bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
   3448 			    0, tx_buffer->m_head->m_pkthdr.len,
   3449 			    BUS_DMASYNC_POSTWRITE);
   3450 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
   3451 			m_freem(tx_buffer->m_head);
   3452 			tx_buffer->m_head = NULL;
   3453 			if (tx_buffer->map != NULL) {
   3454 				ixgbe_dmamap_destroy(txr->txtag,
   3455 				    tx_buffer->map);
   3456 				tx_buffer->map = NULL;
   3457 			}
   3458 		} else if (tx_buffer->map != NULL) {
   3459 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
   3460 			ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
   3461 			tx_buffer->map = NULL;
   3462 		}
   3463 	}
   3464 #ifndef IXGBE_LEGACY_TX
   3465 	if (txr->br != NULL)
   3466 		buf_ring_free(txr->br, M_DEVBUF);
   3467 #endif
   3468 	if (txr->tx_buffers != NULL) {
   3469 		free(txr->tx_buffers, M_DEVBUF);
   3470 		txr->tx_buffers = NULL;
   3471 	}
   3472 	if (txr->txtag != NULL) {
   3473 		ixgbe_dma_tag_destroy(txr->txtag);
   3474 		txr->txtag = NULL;
   3475 	}
   3476 	return;
   3477 }
   3478 
   3479 /*********************************************************************
   3480  *
   3481  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
   3482  *
   3483  **********************************************************************/
   3484 
   3485 static int
   3486 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
   3487     u32 *cmd_type_len, u32 *olinfo_status)
   3488 {
   3489 	struct m_tag *mtag;
   3490 	struct adapter *adapter = txr->adapter;
   3491 	struct ethercom *ec = &adapter->osdep.ec;
   3492 	struct ixgbe_adv_tx_context_desc *TXD;
   3493 	struct ether_vlan_header *eh;
   3494 	struct ip ip;
   3495 	struct ip6_hdr ip6;
   3496 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
   3497 	int	ehdrlen, ip_hlen = 0;
   3498 	u16	etype;
   3499 	u8	ipproto __diagused = 0;
   3500 	int	offload = TRUE;
   3501 	int	ctxd = txr->next_avail_desc;
   3502 	u16	vtag = 0;
   3503 
   3504 	/* First check if TSO is to be used */
   3505 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4|M_CSUM_TSOv6))
   3506 		return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
   3507 
   3508 	if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
   3509 		offload = FALSE;
   3510 
   3511 	/* Indicate the whole packet as payload when not doing TSO */
   3512        	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
   3513 
   3514 	/* Now ready a context descriptor */
   3515 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
   3516 
   3517 	/*
   3518 	** In advanced descriptors the vlan tag must
   3519 	** be placed into the context descriptor. Hence
   3520 	** we need to make one even if not doing offloads.
   3521 	*/
   3522 	if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
   3523 		vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
   3524 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
   3525 	} else if (offload == FALSE) /* ... no offload to do */
   3526 		return 0;
   3527 
   3528 	/*
   3529 	 * Determine where frame payload starts.
   3530 	 * Jump over vlan headers if already present,
   3531 	 * helpful for QinQ too.
   3532 	 */
   3533 	KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
   3534 	eh = mtod(mp, struct ether_vlan_header *);
   3535 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   3536 		KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
   3537 		etype = ntohs(eh->evl_proto);
   3538 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   3539 	} else {
   3540 		etype = ntohs(eh->evl_encap_proto);
   3541 		ehdrlen = ETHER_HDR_LEN;
   3542 	}
   3543 
   3544 	/* Set the ether header length */
   3545 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
   3546 
   3547 	switch (etype) {
   3548 	case ETHERTYPE_IP:
   3549 		m_copydata(mp, ehdrlen, sizeof(ip), &ip);
   3550 		ip_hlen = ip.ip_hl << 2;
   3551 		ipproto = ip.ip_p;
   3552 #if 0
   3553 		ip.ip_sum = 0;
   3554 		m_copyback(mp, ehdrlen, sizeof(ip), &ip);
   3555 #else
   3556 		KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
   3557 		    ip.ip_sum == 0);
   3558 #endif
   3559 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
   3560 		break;
   3561 	case ETHERTYPE_IPV6:
   3562 		m_copydata(mp, ehdrlen, sizeof(ip6), &ip6);
   3563 		ip_hlen = sizeof(ip6);
   3564 		/* XXX-BZ this will go badly in case of ext hdrs. */
   3565 		ipproto = ip6.ip6_nxt;
   3566 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
   3567 		break;
   3568 	default:
   3569 		break;
   3570 	}
   3571 
   3572 	if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
   3573 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
   3574 
   3575 	vlan_macip_lens |= ip_hlen;
   3576 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
   3577 
   3578 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_TCPv6)) {
   3579 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
   3580 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   3581 		KASSERT(ipproto == IPPROTO_TCP);
   3582 	} else if (mp->m_pkthdr.csum_flags & (M_CSUM_UDPv4|M_CSUM_UDPv6)) {
   3583 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
   3584 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   3585 		KASSERT(ipproto == IPPROTO_UDP);
   3586 	}
   3587 
   3588 	/* Now copy bits into descriptor */
   3589 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
   3590 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
   3591 	TXD->seqnum_seed = htole32(0);
   3592 	TXD->mss_l4len_idx = htole32(0);
   3593 
   3594 	/* We've consumed the first desc, adjust counters */
   3595 	if (++ctxd == txr->num_desc)
   3596 		ctxd = 0;
   3597 	txr->next_avail_desc = ctxd;
   3598 	--txr->tx_avail;
   3599 
   3600         return 0;
   3601 }
   3602 
   3603 /**********************************************************************
   3604  *
   3605  *  Setup work for hardware segmentation offload (TSO) on
   3606  *  adapters using advanced tx descriptors
   3607  *
   3608  **********************************************************************/
   3609 static int
   3610 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
   3611     u32 *cmd_type_len, u32 *olinfo_status)
   3612 {
   3613 	struct m_tag *mtag;
   3614 	struct adapter *adapter = txr->adapter;
   3615 	struct ethercom *ec = &adapter->osdep.ec;
   3616 	struct ixgbe_adv_tx_context_desc *TXD;
   3617 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
   3618 	u32 mss_l4len_idx = 0, paylen;
   3619 	u16 vtag = 0, eh_type;
   3620 	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
   3621 	struct ether_vlan_header *eh;
   3622 #ifdef INET6
   3623 	struct ip6_hdr *ip6;
   3624 #endif
   3625 #ifdef INET
   3626 	struct ip *ip;
   3627 #endif
   3628 	struct tcphdr *th;
   3629 
   3630 
   3631 	/*
   3632 	 * Determine where frame payload starts.
   3633 	 * Jump over vlan headers if already present
   3634 	 */
   3635 	eh = mtod(mp, struct ether_vlan_header *);
   3636 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   3637 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   3638 		eh_type = eh->evl_proto;
   3639 	} else {
   3640 		ehdrlen = ETHER_HDR_LEN;
   3641 		eh_type = eh->evl_encap_proto;
   3642 	}
   3643 
   3644 	switch (ntohs(eh_type)) {
   3645 #ifdef INET6
   3646 	case ETHERTYPE_IPV6:
   3647 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   3648 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
   3649 		if (ip6->ip6_nxt != IPPROTO_TCP)
   3650 			return (ENXIO);
   3651 		ip_hlen = sizeof(struct ip6_hdr);
   3652 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   3653 		th = (struct tcphdr *)((char *)ip6 + ip_hlen);
   3654 		th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
   3655 		    &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
   3656 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
   3657 		break;
   3658 #endif
   3659 #ifdef INET
   3660 	case ETHERTYPE_IP:
   3661 		ip = (struct ip *)(mp->m_data + ehdrlen);
   3662 		if (ip->ip_p != IPPROTO_TCP)
   3663 			return (ENXIO);
   3664 		ip->ip_sum = 0;
   3665 		ip_hlen = ip->ip_hl << 2;
   3666 		th = (struct tcphdr *)((char *)ip + ip_hlen);
   3667 		th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
   3668 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
   3669 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
   3670 		/* Tell transmit desc to also do IPv4 checksum. */
   3671 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
   3672 		break;
   3673 #endif
   3674 	default:
   3675 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
   3676 		    __func__, ntohs(eh_type));
   3677 		break;
   3678 	}
   3679 
   3680 	ctxd = txr->next_avail_desc;
   3681 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
   3682 
   3683 	tcp_hlen = th->th_off << 2;
   3684 
   3685 	/* This is used in the transmit desc in encap */
   3686 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
   3687 
   3688 	/* VLAN MACLEN IPLEN */
   3689 	if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
   3690 		vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
   3691                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
   3692 	}
   3693 
   3694 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
   3695 	vlan_macip_lens |= ip_hlen;
   3696 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
   3697 
   3698 	/* ADV DTYPE TUCMD */
   3699 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
   3700 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
   3701 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
   3702 
   3703 	/* MSS L4LEN IDX */
   3704 	mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
   3705 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
   3706 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
   3707 
   3708 	TXD->seqnum_seed = htole32(0);
   3709 
   3710 	if (++ctxd == txr->num_desc)
   3711 		ctxd = 0;
   3712 
   3713 	txr->tx_avail--;
   3714 	txr->next_avail_desc = ctxd;
   3715 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
   3716 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   3717 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
   3718 	++txr->tso_tx.ev_count;
   3719 	return (0);
   3720 }
   3721 
   3722 #ifdef IXGBE_FDIR
   3723 /*
   3724 ** This routine parses packet headers so that Flow
   3725 ** Director can make a hashed filter table entry
   3726 ** allowing traffic flows to be identified and kept
   3727 ** on the same cpu.  This would be a performance
   3728 ** hit, but we only do it at IXGBE_FDIR_RATE of
   3729 ** packets.
   3730 */
   3731 static void
   3732 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
   3733 {
   3734 	struct adapter			*adapter = txr->adapter;
   3735 	struct ix_queue			*que;
   3736 	struct ip			*ip;
   3737 	struct tcphdr			*th;
   3738 	struct udphdr			*uh;
   3739 	struct ether_vlan_header	*eh;
   3740 	union ixgbe_atr_hash_dword	input = {.dword = 0};
   3741 	union ixgbe_atr_hash_dword	common = {.dword = 0};
   3742 	int  				ehdrlen, ip_hlen;
   3743 	u16				etype;
   3744 
   3745 	eh = mtod(mp, struct ether_vlan_header *);
   3746 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   3747 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   3748 		etype = eh->evl_proto;
   3749 	} else {
   3750 		ehdrlen = ETHER_HDR_LEN;
   3751 		etype = eh->evl_encap_proto;
   3752 	}
   3753 
   3754 	/* Only handling IPv4 */
   3755 	if (etype != htons(ETHERTYPE_IP))
   3756 		return;
   3757 
   3758 	ip = (struct ip *)(mp->m_data + ehdrlen);
   3759 	ip_hlen = ip->ip_hl << 2;
   3760 
   3761 	/* check if we're UDP or TCP */
   3762 	switch (ip->ip_p) {
   3763 	case IPPROTO_TCP:
   3764 		th = (struct tcphdr *)((char *)ip + ip_hlen);
   3765 		/* src and dst are inverted */
   3766 		common.port.dst ^= th->th_sport;
   3767 		common.port.src ^= th->th_dport;
   3768 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
   3769 		break;
   3770 	case IPPROTO_UDP:
   3771 		uh = (struct udphdr *)((char *)ip + ip_hlen);
   3772 		/* src and dst are inverted */
   3773 		common.port.dst ^= uh->uh_sport;
   3774 		common.port.src ^= uh->uh_dport;
   3775 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
   3776 		break;
   3777 	default:
   3778 		return;
   3779 	}
   3780 
   3781 	input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
   3782 	if (mp->m_pkthdr.ether_vtag)
   3783 		common.flex_bytes ^= htons(ETHERTYPE_VLAN);
   3784 	else
   3785 		common.flex_bytes ^= etype;
   3786 	common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
   3787 
   3788 	que = &adapter->queues[txr->me];
   3789 	/*
   3790 	** This assumes the Rx queue and Tx
   3791 	** queue are bound to the same CPU
   3792 	*/
   3793 	ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
   3794 	    input, common, que->msix);
   3795 }
   3796 #endif /* IXGBE_FDIR */
   3797 
   3798 /**********************************************************************
   3799  *
   3800  *  Examine each tx_buffer in the used queue. If the hardware is done
   3801  *  processing the packet then free associated resources. The
   3802  *  tx_buffer is put back on the free queue.
   3803  *
   3804  **********************************************************************/
   3805 static bool
   3806 ixgbe_txeof(struct tx_ring *txr)
   3807 {
   3808 	struct adapter		*adapter = txr->adapter;
   3809 	struct ifnet		*ifp = adapter->ifp;
   3810 	u32			work, processed = 0;
   3811 	u16			limit = txr->process_limit;
   3812 	struct ixgbe_tx_buf	*buf;
   3813 	union ixgbe_adv_tx_desc *txd;
   3814 	struct timeval now, elapsed;
   3815 
   3816 	KASSERT(mutex_owned(&txr->tx_mtx));
   3817 
   3818 #ifdef DEV_NETMAP
   3819 	if (ifp->if_capenable & IFCAP_NETMAP) {
   3820 		struct netmap_adapter *na = NA(ifp);
   3821 		struct netmap_kring *kring = &na->tx_rings[txr->me];
   3822 		txd = txr->tx_base;
   3823 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   3824 		    BUS_DMASYNC_POSTREAD);
   3825 		/*
   3826 		 * In netmap mode, all the work is done in the context
   3827 		 * of the client thread. Interrupt handlers only wake up
   3828 		 * clients, which may be sleeping on individual rings
   3829 		 * or on a global resource for all rings.
   3830 		 * To implement tx interrupt mitigation, we wake up the client
   3831 		 * thread roughly every half ring, even if the NIC interrupts
   3832 		 * more frequently. This is implemented as follows:
   3833 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
   3834 		 *   the slot that should wake up the thread (nkr_num_slots
   3835 		 *   means the user thread should not be woken up);
   3836 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
   3837 		 *   or the slot has the DD bit set.
   3838 		 *
   3839 		 * When the driver has separate locks, we need to
   3840 		 * release and re-acquire txlock to avoid deadlocks.
   3841 		 * XXX see if we can find a better way.
   3842 		 */
   3843 		if (!netmap_mitigate ||
   3844 		    (kring->nr_kflags < kring->nkr_num_slots &&
   3845 		    txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
   3846 			netmap_tx_irq(ifp, txr->me |
   3847 			    (NETMAP_LOCKED_ENTER|NETMAP_LOCKED_EXIT));
   3848 		}
   3849 		return FALSE;
   3850 	}
   3851 #endif /* DEV_NETMAP */
   3852 
   3853 	if (txr->tx_avail == txr->num_desc) {
   3854 		txr->queue_status = IXGBE_QUEUE_IDLE;
   3855 		return false;
   3856 	}
   3857 
   3858 	/* Get work starting point */
   3859 	work = txr->next_to_clean;
   3860 	buf = &txr->tx_buffers[work];
   3861 	txd = &txr->tx_base[work];
   3862 	work -= txr->num_desc; /* The distance to ring end */
   3863         ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   3864 	    BUS_DMASYNC_POSTREAD);
   3865 	do {
   3866 		union ixgbe_adv_tx_desc *eop= buf->eop;
   3867 		if (eop == NULL) /* No work */
   3868 			break;
   3869 
   3870 		if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
   3871 			break;	/* I/O not complete */
   3872 
   3873 		if (buf->m_head) {
   3874 			txr->bytes +=
   3875 			    buf->m_head->m_pkthdr.len;
   3876 			bus_dmamap_sync(txr->txtag->dt_dmat,
   3877 			    buf->map,
   3878 			    0, buf->m_head->m_pkthdr.len,
   3879 			    BUS_DMASYNC_POSTWRITE);
   3880 			ixgbe_dmamap_unload(txr->txtag,
   3881 			    buf->map);
   3882 			m_freem(buf->m_head);
   3883 			buf->m_head = NULL;
   3884 			/*
   3885 			 * NetBSD: Don't override buf->map with NULL here.
   3886 			 * It'll panic when a ring runs one lap around.
   3887 			 */
   3888 		}
   3889 		buf->eop = NULL;
   3890 		++txr->tx_avail;
   3891 
   3892 		/* We clean the range if multi segment */
   3893 		while (txd != eop) {
   3894 			++txd;
   3895 			++buf;
   3896 			++work;
   3897 			/* wrap the ring? */
   3898 			if (__predict_false(!work)) {
   3899 				work -= txr->num_desc;
   3900 				buf = txr->tx_buffers;
   3901 				txd = txr->tx_base;
   3902 			}
   3903 			if (buf->m_head) {
   3904 				txr->bytes +=
   3905 				    buf->m_head->m_pkthdr.len;
   3906 				bus_dmamap_sync(txr->txtag->dt_dmat,
   3907 				    buf->map,
   3908 				    0, buf->m_head->m_pkthdr.len,
   3909 				    BUS_DMASYNC_POSTWRITE);
   3910 				ixgbe_dmamap_unload(txr->txtag,
   3911 				    buf->map);
   3912 				m_freem(buf->m_head);
   3913 				buf->m_head = NULL;
   3914 				/*
   3915 				 * NetBSD: Don't override buf->map with NULL
   3916 				 * here. It'll panic when a ring runs one lap
   3917 				 * around.
   3918 				 */
   3919 			}
   3920 			++txr->tx_avail;
   3921 			buf->eop = NULL;
   3922 
   3923 		}
   3924 		++txr->packets;
   3925 		++processed;
   3926 		++ifp->if_opackets;
   3927 		getmicrotime(&txr->watchdog_time);
   3928 
   3929 		/* Try the next packet */
   3930 		++txd;
   3931 		++buf;
   3932 		++work;
   3933 		/* reset with a wrap */
   3934 		if (__predict_false(!work)) {
   3935 			work -= txr->num_desc;
   3936 			buf = txr->tx_buffers;
   3937 			txd = txr->tx_base;
   3938 		}
   3939 		prefetch(txd);
   3940 	} while (__predict_true(--limit));
   3941 
   3942 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   3943 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   3944 
   3945 	work += txr->num_desc;
   3946 	txr->next_to_clean = work;
   3947 
   3948 	/*
   3949 	** Watchdog calculation, we know there's
   3950 	** work outstanding or the first return
   3951 	** would have been taken, so none processed
   3952 	** for too long indicates a hang.
   3953 	*/
   3954 	getmicrotime(&now);
   3955 	timersub(&now, &txr->watchdog_time, &elapsed);
   3956 	if (!processed && tvtohz(&elapsed) > IXGBE_WATCHDOG)
   3957 		txr->queue_status = IXGBE_QUEUE_HUNG;
   3958 
   3959 	if (txr->tx_avail == txr->num_desc) {
   3960 		txr->queue_status = IXGBE_QUEUE_IDLE;
   3961 		return false;
   3962 	}
   3963 
   3964 	return true;
   3965 }
   3966 
   3967 /*********************************************************************
   3968  *
   3969  *  Refresh mbuf buffers for RX descriptor rings
   3970  *   - now keeps its own state so discards due to resource
   3971  *     exhaustion are unnecessary, if an mbuf cannot be obtained
   3972  *     it just returns, keeping its placeholder, thus it can simply
   3973  *     be recalled to try again.
   3974  *
   3975  **********************************************************************/
   3976 static void
   3977 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
   3978 {
   3979 	struct adapter		*adapter = rxr->adapter;
   3980 	struct ixgbe_rx_buf	*rxbuf;
   3981 	struct mbuf		*mp;
   3982 	int			i, j, error;
   3983 	bool			refreshed = false;
   3984 
   3985 	i = j = rxr->next_to_refresh;
   3986 	/* Control the loop with one beyond */
   3987 	if (++j == rxr->num_desc)
   3988 		j = 0;
   3989 
   3990 	while (j != limit) {
   3991 		rxbuf = &rxr->rx_buffers[i];
   3992 		if (rxbuf->buf == NULL) {
   3993 			mp = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
   3994 			    MT_DATA, M_PKTHDR, rxr->mbuf_sz);
   3995 			if (mp == NULL) {
   3996 				rxr->no_jmbuf.ev_count++;
   3997 				goto update;
   3998 			}
   3999 			if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
   4000 				m_adj(mp, ETHER_ALIGN);
   4001 		} else
   4002 			mp = rxbuf->buf;
   4003 
   4004 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   4005 		/* If we're dealing with an mbuf that was copied rather
   4006 		 * than replaced, there's no need to go through busdma.
   4007 		 */
   4008 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
   4009 			/* Get the memory mapping */
   4010 			error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   4011 			    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   4012 			if (error != 0) {
   4013 				printf("Refresh mbufs: payload dmamap load"
   4014 				    " failure - %d\n", error);
   4015 				m_free(mp);
   4016 				rxbuf->buf = NULL;
   4017 				goto update;
   4018 			}
   4019 			rxbuf->buf = mp;
   4020 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   4021 			    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
   4022 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
   4023 			    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   4024 		} else {
   4025 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
   4026 			rxbuf->flags &= ~IXGBE_RX_COPY;
   4027 		}
   4028 
   4029 		refreshed = true;
   4030 		/* Next is precalculated */
   4031 		i = j;
   4032 		rxr->next_to_refresh = i;
   4033 		if (++j == rxr->num_desc)
   4034 			j = 0;
   4035 	}
   4036 update:
   4037 	if (refreshed) /* Update hardware tail index */
   4038 		IXGBE_WRITE_REG(&adapter->hw,
   4039 		    IXGBE_RDT(rxr->me), rxr->next_to_refresh);
   4040 	return;
   4041 }
   4042 
   4043 /*********************************************************************
   4044  *
   4045  *  Allocate memory for rx_buffer structures. Since we use one
   4046  *  rx_buffer per received packet, the maximum number of rx_buffer's
   4047  *  that we'll need is equal to the number of receive descriptors
   4048  *  that we've allocated.
   4049  *
   4050  **********************************************************************/
   4051 static int
   4052 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
   4053 {
   4054 	struct	adapter 	*adapter = rxr->adapter;
   4055 	device_t 		dev = adapter->dev;
   4056 	struct ixgbe_rx_buf 	*rxbuf;
   4057 	int             	i, bsize, error;
   4058 
   4059 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
   4060 	if (!(rxr->rx_buffers =
   4061 	    (struct ixgbe_rx_buf *) malloc(bsize,
   4062 	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
   4063 		aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
   4064 		error = ENOMEM;
   4065 		goto fail;
   4066 	}
   4067 
   4068 	if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
   4069 				   1, 0,	/* alignment, bounds */
   4070 				   MJUM16BYTES,		/* maxsize */
   4071 				   1,			/* nsegments */
   4072 				   MJUM16BYTES,		/* maxsegsize */
   4073 				   0,			/* flags */
   4074 				   &rxr->ptag))) {
   4075 		aprint_error_dev(dev, "Unable to create RX DMA tag\n");
   4076 		goto fail;
   4077 	}
   4078 
   4079 	for (i = 0; i < rxr->num_desc; i++, rxbuf++) {
   4080 		rxbuf = &rxr->rx_buffers[i];
   4081 		error = ixgbe_dmamap_create(rxr->ptag,
   4082 		    BUS_DMA_NOWAIT, &rxbuf->pmap);
   4083 		if (error) {
   4084 			aprint_error_dev(dev, "Unable to create RX dma map\n");
   4085 			goto fail;
   4086 		}
   4087 	}
   4088 
   4089 	return (0);
   4090 
   4091 fail:
   4092 	/* Frees all, but can handle partial completion */
   4093 	ixgbe_free_receive_structures(adapter);
   4094 	return (error);
   4095 }
   4096 
   4097 /*
   4098 ** Used to detect a descriptor that has
   4099 ** been merged by Hardware RSC.
   4100 */
   4101 static inline u32
   4102 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
   4103 {
   4104 	return (le32toh(rx->wb.lower.lo_dword.data) &
   4105 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
   4106 }
   4107 
   4108 /*********************************************************************
   4109  *
   4110  *  Initialize Hardware RSC (LRO) feature on 82599
   4111  *  for an RX ring, this is toggled by the LRO capability
   4112  *  even though it is transparent to the stack.
   4113  *
   4114  *  NOTE: since this HW feature only works with IPV4 and
   4115  *        our testing has shown soft LRO to be as effective
   4116  *        I have decided to disable this by default.
   4117  *
   4118  **********************************************************************/
   4119 static void
   4120 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
   4121 {
   4122 	struct	adapter 	*adapter = rxr->adapter;
   4123 	struct	ixgbe_hw	*hw = &adapter->hw;
   4124 	u32			rscctrl, rdrxctl;
   4125 
   4126 	/* If turning LRO/RSC off we need to disable it */
   4127 	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
   4128 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   4129 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
   4130 		return;
   4131 	}
   4132 
   4133 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
   4134 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
   4135 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
   4136 	if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
   4137 #endif /* DEV_NETMAP */
   4138 	rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
   4139 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
   4140 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
   4141 
   4142 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   4143 	rscctrl |= IXGBE_RSCCTL_RSCEN;
   4144 	/*
   4145 	** Limit the total number of descriptors that
   4146 	** can be combined, so it does not exceed 64K
   4147 	*/
   4148 	if (rxr->mbuf_sz == MCLBYTES)
   4149 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
   4150 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
   4151 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
   4152 	else if (rxr->mbuf_sz == MJUM9BYTES)
   4153 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
   4154 	else  /* Using 16K cluster */
   4155 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
   4156 
   4157 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
   4158 
   4159 	/* Enable TCP header recognition */
   4160 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
   4161 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
   4162 	    IXGBE_PSRTYPE_TCPHDR));
   4163 
   4164 	/* Disable RSC for ACK packets */
   4165 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
   4166 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
   4167 
   4168 	rxr->hw_rsc = TRUE;
   4169 }
   4170 
   4171 
   4172 static void
   4173 ixgbe_free_receive_ring(struct rx_ring *rxr)
   4174 {
   4175 	struct ixgbe_rx_buf       *rxbuf;
   4176 	int i;
   4177 
   4178 	for (i = 0; i < rxr->num_desc; i++) {
   4179 		rxbuf = &rxr->rx_buffers[i];
   4180 		if (rxbuf->buf != NULL) {
   4181 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   4182 			    0, rxbuf->buf->m_pkthdr.len,
   4183 			    BUS_DMASYNC_POSTREAD);
   4184 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   4185 			rxbuf->buf->m_flags |= M_PKTHDR;
   4186 			m_freem(rxbuf->buf);
   4187 			rxbuf->buf = NULL;
   4188 		}
   4189 	}
   4190 }
   4191 
   4192 
   4193 /*********************************************************************
   4194  *
   4195  *  Initialize a receive ring and its buffers.
   4196  *
   4197  **********************************************************************/
   4198 static int
   4199 ixgbe_setup_receive_ring(struct rx_ring *rxr)
   4200 {
   4201 	struct	adapter 	*adapter;
   4202 	struct ixgbe_rx_buf	*rxbuf;
   4203 #ifdef LRO
   4204 	struct ifnet		*ifp;
   4205 	struct lro_ctrl		*lro = &rxr->lro;
   4206 #endif /* LRO */
   4207 	int			rsize, error = 0;
   4208 #ifdef DEV_NETMAP
   4209 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
   4210 	struct netmap_slot *slot;
   4211 #endif /* DEV_NETMAP */
   4212 
   4213 	adapter = rxr->adapter;
   4214 #ifdef LRO
   4215 	ifp = adapter->ifp;
   4216 #endif /* LRO */
   4217 
   4218 	/* Clear the ring contents */
   4219 	IXGBE_RX_LOCK(rxr);
   4220 #ifdef DEV_NETMAP
   4221 	/* same as in ixgbe_setup_transmit_ring() */
   4222 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
   4223 #endif /* DEV_NETMAP */
   4224 	rsize = roundup2(adapter->num_rx_desc *
   4225 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   4226 	bzero((void *)rxr->rx_base, rsize);
   4227 	/* Cache the size */
   4228 	rxr->mbuf_sz = adapter->rx_mbuf_sz;
   4229 
   4230 	/* Free current RX buffer structs and their mbufs */
   4231 	ixgbe_free_receive_ring(rxr);
   4232 
   4233 	IXGBE_RX_UNLOCK(rxr);
   4234 
   4235 	/* Now reinitialize our supply of jumbo mbufs.  The number
   4236 	 * or size of jumbo mbufs may have changed.
   4237 	 */
   4238 	ixgbe_jcl_reinit(&adapter->jcl_head, rxr->ptag->dt_dmat,
   4239 	    2 * adapter->num_rx_desc, adapter->rx_mbuf_sz);
   4240 
   4241 	IXGBE_RX_LOCK(rxr);
   4242 
   4243 	/* Now replenish the mbufs */
   4244 	for (int j = 0; j != rxr->num_desc; ++j) {
   4245 		struct mbuf	*mp;
   4246 
   4247 		rxbuf = &rxr->rx_buffers[j];
   4248 #ifdef DEV_NETMAP
   4249 		/*
   4250 		 * In netmap mode, fill the map and set the buffer
   4251 		 * address in the NIC ring, considering the offset
   4252 		 * between the netmap and NIC rings (see comment in
   4253 		 * ixgbe_setup_transmit_ring() ). No need to allocate
   4254 		 * an mbuf, so end the block with a continue;
   4255 		 */
   4256 		if (slot) {
   4257 			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
   4258 			uint64_t paddr;
   4259 			void *addr;
   4260 
   4261 			addr = PNMB(slot + sj, &paddr);
   4262 			netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
   4263 			/* Update descriptor */
   4264 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
   4265 			continue;
   4266 		}
   4267 #endif /* DEV_NETMAP */
   4268 		rxbuf->buf = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
   4269 		    MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
   4270 		if (rxbuf->buf == NULL) {
   4271 			error = ENOBUFS;
   4272                         goto fail;
   4273 		}
   4274 		mp = rxbuf->buf;
   4275 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   4276 		/* Get the memory mapping */
   4277 		error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   4278 		    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   4279 		if (error != 0)
   4280                         goto fail;
   4281 		bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   4282 		    0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
   4283 		/* Update descriptor */
   4284 		rxr->rx_base[j].read.pkt_addr =
   4285 		    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   4286 	}
   4287 
   4288 
   4289 	/* Setup our descriptor indices */
   4290 	rxr->next_to_check = 0;
   4291 	rxr->next_to_refresh = 0;
   4292 	rxr->lro_enabled = FALSE;
   4293 	rxr->rx_copies.ev_count = 0;
   4294 	rxr->rx_bytes.ev_count = 0;
   4295 	rxr->discard = FALSE;
   4296 	rxr->vtag_strip = FALSE;
   4297 
   4298 	ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   4299 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   4300 
   4301 	/*
   4302 	** Now set up the LRO interface:
   4303 	*/
   4304 	if (ixgbe_rsc_enable)
   4305 		ixgbe_setup_hw_rsc(rxr);
   4306 #ifdef LRO
   4307 	else if (ifp->if_capenable & IFCAP_LRO) {
   4308 		device_t dev = adapter->dev;
   4309 		int err = tcp_lro_init(lro);
   4310 		if (err) {
   4311 			device_printf(dev, "LRO Initialization failed!\n");
   4312 			goto fail;
   4313 		}
   4314 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
   4315 		rxr->lro_enabled = TRUE;
   4316 		lro->ifp = adapter->ifp;
   4317 	}
   4318 #endif /* LRO */
   4319 
   4320 	IXGBE_RX_UNLOCK(rxr);
   4321 	return (0);
   4322 
   4323 fail:
   4324 	ixgbe_free_receive_ring(rxr);
   4325 	IXGBE_RX_UNLOCK(rxr);
   4326 	return (error);
   4327 }
   4328 
   4329 /*********************************************************************
   4330  *
   4331  *  Initialize all receive rings.
   4332  *
   4333  **********************************************************************/
   4334 static int
   4335 ixgbe_setup_receive_structures(struct adapter *adapter)
   4336 {
   4337 	struct rx_ring *rxr = adapter->rx_rings;
   4338 	int j;
   4339 
   4340 	for (j = 0; j < adapter->num_queues; j++, rxr++)
   4341 		if (ixgbe_setup_receive_ring(rxr))
   4342 			goto fail;
   4343 
   4344 	return (0);
   4345 fail:
   4346 	/*
   4347 	 * Free RX buffers allocated so far, we will only handle
   4348 	 * the rings that completed, the failing case will have
   4349 	 * cleaned up for itself. 'j' failed, so its the terminus.
   4350 	 */
   4351 	for (int i = 0; i < j; ++i) {
   4352 		rxr = &adapter->rx_rings[i];
   4353 		ixgbe_free_receive_ring(rxr);
   4354 	}
   4355 
   4356 	return (ENOBUFS);
   4357 }
   4358 
   4359 /*********************************************************************
   4360  *
   4361  *  Setup receive registers and features.
   4362  *
   4363  **********************************************************************/
   4364 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
   4365 
   4366 #define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
   4367 
   4368 static void
   4369 ixgbe_initialize_receive_units(struct adapter *adapter)
   4370 {
   4371 	int i;
   4372 	struct	rx_ring	*rxr = adapter->rx_rings;
   4373 	struct ixgbe_hw	*hw = &adapter->hw;
   4374 	struct ifnet   *ifp = adapter->ifp;
   4375 	u32		bufsz, rxctrl, fctrl, srrctl, rxcsum;
   4376 	u32		reta, mrqc = 0, hlreg, r[10];
   4377 
   4378 
   4379 	/*
   4380 	 * Make sure receives are disabled while
   4381 	 * setting up the descriptor ring
   4382 	 */
   4383 	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
   4384 	IXGBE_WRITE_REG(hw, IXGBE_RXCTRL,
   4385 	    rxctrl & ~IXGBE_RXCTRL_RXEN);
   4386 
   4387 	/* Enable broadcasts */
   4388 	fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
   4389 	fctrl |= IXGBE_FCTRL_BAM;
   4390 	fctrl |= IXGBE_FCTRL_DPF;
   4391 	fctrl |= IXGBE_FCTRL_PMCF;
   4392 	IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
   4393 
   4394 	/* Set for Jumbo Frames? */
   4395 	hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
   4396 	if (ifp->if_mtu > ETHERMTU)
   4397 		hlreg |= IXGBE_HLREG0_JUMBOEN;
   4398 	else
   4399 		hlreg &= ~IXGBE_HLREG0_JUMBOEN;
   4400 #ifdef DEV_NETMAP
   4401 	/* crcstrip is conditional in netmap (in RDRXCTL too ?) */
   4402 	if (ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
   4403 		hlreg &= ~IXGBE_HLREG0_RXCRCSTRP;
   4404 	else
   4405 		hlreg |= IXGBE_HLREG0_RXCRCSTRP;
   4406 #endif /* DEV_NETMAP */
   4407 	IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
   4408 
   4409 	bufsz = (adapter->rx_mbuf_sz +
   4410 	    BSIZEPKT_ROUNDUP) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
   4411 
   4412 	for (i = 0; i < adapter->num_queues; i++, rxr++) {
   4413 		u64 rdba = rxr->rxdma.dma_paddr;
   4414 
   4415 		/* Setup the Base and Length of the Rx Descriptor Ring */
   4416 		IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i),
   4417 			       (rdba & 0x00000000ffffffffULL));
   4418 		IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (rdba >> 32));
   4419 		IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
   4420 		    adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc));
   4421 
   4422 		/* Set up the SRRCTL register */
   4423 		srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
   4424 		srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
   4425 		srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
   4426 		srrctl |= bufsz;
   4427 		srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
   4428 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
   4429 
   4430 		/* Setup the HW Rx Head and Tail Descriptor Pointers */
   4431 		IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
   4432 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
   4433 
   4434 		/* Set the processing limit */
   4435 		rxr->process_limit = ixgbe_rx_process_limit;
   4436 	}
   4437 
   4438 	if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
   4439 		u32 psrtype = IXGBE_PSRTYPE_TCPHDR |
   4440 			      IXGBE_PSRTYPE_UDPHDR |
   4441 			      IXGBE_PSRTYPE_IPV4HDR |
   4442 			      IXGBE_PSRTYPE_IPV6HDR;
   4443 		IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype);
   4444 	}
   4445 
   4446 	rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
   4447 
   4448 	/* Setup RSS */
   4449 	if (adapter->num_queues > 1) {
   4450 		int j;
   4451 		reta = 0;
   4452 
   4453 		/* set up random bits */
   4454 		cprng_fast(&r, sizeof(r));
   4455 
   4456 		/* Set up the redirection table */
   4457 		for (i = 0, j = 0; i < 128; i++, j++) {
   4458 			if (j == adapter->num_queues) j = 0;
   4459 			reta = (reta << 8) | (j * 0x11);
   4460 			if ((i & 3) == 3)
   4461 				IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta);
   4462 		}
   4463 
   4464 		/* Now fill our hash function seeds */
   4465 		for (i = 0; i < 10; i++)
   4466 			IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), r[i]);
   4467 
   4468 		/* Perform hash on these packet types */
   4469 		mrqc = IXGBE_MRQC_RSSEN
   4470 		     | IXGBE_MRQC_RSS_FIELD_IPV4
   4471 		     | IXGBE_MRQC_RSS_FIELD_IPV4_TCP
   4472 		     | IXGBE_MRQC_RSS_FIELD_IPV4_UDP
   4473 		     | IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP
   4474 		     | IXGBE_MRQC_RSS_FIELD_IPV6_EX
   4475 		     | IXGBE_MRQC_RSS_FIELD_IPV6
   4476 		     | IXGBE_MRQC_RSS_FIELD_IPV6_TCP
   4477 		     | IXGBE_MRQC_RSS_FIELD_IPV6_UDP
   4478 		     | IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
   4479 		IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
   4480 
   4481 		/* RSS and RX IPP Checksum are mutually exclusive */
   4482 		rxcsum |= IXGBE_RXCSUM_PCSD;
   4483 	}
   4484 
   4485 	if (ifp->if_capenable & IFCAP_RXCSUM)
   4486 		rxcsum |= IXGBE_RXCSUM_PCSD;
   4487 
   4488 	if (!(rxcsum & IXGBE_RXCSUM_PCSD))
   4489 		rxcsum |= IXGBE_RXCSUM_IPPCSE;
   4490 
   4491 	IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
   4492 
   4493 	return;
   4494 }
   4495 
   4496 /*********************************************************************
   4497  *
   4498  *  Free all receive rings.
   4499  *
   4500  **********************************************************************/
   4501 static void
   4502 ixgbe_free_receive_structures(struct adapter *adapter)
   4503 {
   4504 	struct rx_ring *rxr = adapter->rx_rings;
   4505 
   4506 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
   4507 #ifdef LRO
   4508 		struct lro_ctrl		*lro = &rxr->lro;
   4509 #endif /* LRO */
   4510 		ixgbe_free_receive_buffers(rxr);
   4511 #ifdef LRO
   4512 		/* Free LRO memory */
   4513 		tcp_lro_free(lro);
   4514 #endif /* LRO */
   4515 		/* Free the ring memory as well */
   4516 		ixgbe_dma_free(adapter, &rxr->rxdma);
   4517 		IXGBE_RX_LOCK_DESTROY(rxr);
   4518 	}
   4519 
   4520 	free(adapter->rx_rings, M_DEVBUF);
   4521 }
   4522 
   4523 
   4524 /*********************************************************************
   4525  *
   4526  *  Free receive ring data structures
   4527  *
   4528  **********************************************************************/
   4529 static void
   4530 ixgbe_free_receive_buffers(struct rx_ring *rxr)
   4531 {
   4532 	struct adapter		*adapter = rxr->adapter;
   4533 	struct ixgbe_rx_buf	*rxbuf;
   4534 
   4535 	INIT_DEBUGOUT("free_receive_structures: begin");
   4536 
   4537 	/* Cleanup any existing buffers */
   4538 	if (rxr->rx_buffers != NULL) {
   4539 		for (int i = 0; i < adapter->num_rx_desc; i++) {
   4540 			rxbuf = &rxr->rx_buffers[i];
   4541 			if (rxbuf->buf != NULL) {
   4542 				bus_dmamap_sync(rxr->ptag->dt_dmat,
   4543 				    rxbuf->pmap, 0, rxbuf->buf->m_pkthdr.len,
   4544 				    BUS_DMASYNC_POSTREAD);
   4545 				ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   4546 				rxbuf->buf->m_flags |= M_PKTHDR;
   4547 				m_freem(rxbuf->buf);
   4548 			}
   4549 			rxbuf->buf = NULL;
   4550 			if (rxbuf->pmap != NULL) {
   4551 				ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
   4552 				rxbuf->pmap = NULL;
   4553 			}
   4554 		}
   4555 		if (rxr->rx_buffers != NULL) {
   4556 			free(rxr->rx_buffers, M_DEVBUF);
   4557 			rxr->rx_buffers = NULL;
   4558 		}
   4559 	}
   4560 
   4561 	if (rxr->ptag != NULL) {
   4562 		ixgbe_dma_tag_destroy(rxr->ptag);
   4563 		rxr->ptag = NULL;
   4564 	}
   4565 
   4566 	return;
   4567 }
   4568 
   4569 static __inline void
   4570 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
   4571 {
   4572 	int s;
   4573 
   4574 #ifdef LRO
   4575 	struct adapter	*adapter = ifp->if_softc;
   4576 	struct ethercom *ec = &adapter->osdep.ec;
   4577 
   4578         /*
   4579          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
   4580          * should be computed by hardware. Also it should not have VLAN tag in
   4581          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
   4582          */
   4583         if (rxr->lro_enabled &&
   4584             (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
   4585             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   4586             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   4587             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
   4588             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   4589             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
   4590             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
   4591             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
   4592                 /*
   4593                  * Send to the stack if:
   4594                  **  - LRO not enabled, or
   4595                  **  - no LRO resources, or
   4596                  **  - lro enqueue fails
   4597                  */
   4598                 if (rxr->lro.lro_cnt != 0)
   4599                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
   4600                                 return;
   4601         }
   4602 #endif /* LRO */
   4603 
   4604 	IXGBE_RX_UNLOCK(rxr);
   4605 
   4606 	s = splnet();
   4607 	/* Pass this up to any BPF listeners. */
   4608 	bpf_mtap(ifp, m);
   4609 	(*ifp->if_input)(ifp, m);
   4610 	splx(s);
   4611 
   4612 	IXGBE_RX_LOCK(rxr);
   4613 }
   4614 
   4615 static __inline void
   4616 ixgbe_rx_discard(struct rx_ring *rxr, int i)
   4617 {
   4618 	struct ixgbe_rx_buf	*rbuf;
   4619 
   4620 	rbuf = &rxr->rx_buffers[i];
   4621 
   4622         if (rbuf->fmp != NULL) {/* Partial chain ? */
   4623 		rbuf->fmp->m_flags |= M_PKTHDR;
   4624                 m_freem(rbuf->fmp);
   4625                 rbuf->fmp = NULL;
   4626 	}
   4627 
   4628 	/*
   4629 	** With advanced descriptors the writeback
   4630 	** clobbers the buffer addrs, so its easier
   4631 	** to just free the existing mbufs and take
   4632 	** the normal refresh path to get new buffers
   4633 	** and mapping.
   4634 	*/
   4635 	if (rbuf->buf) {
   4636 		m_free(rbuf->buf);
   4637 		rbuf->buf = NULL;
   4638 	}
   4639 
   4640 	return;
   4641 }
   4642 
   4643 
   4644 /*********************************************************************
   4645  *
   4646  *  This routine executes in interrupt context. It replenishes
   4647  *  the mbufs in the descriptor and sends data which has been
   4648  *  dma'ed into host memory to upper layer.
   4649  *
   4650  *  We loop at most count times if count is > 0, or until done if
   4651  *  count < 0.
   4652  *
   4653  *  Return TRUE for more work, FALSE for all clean.
   4654  *********************************************************************/
   4655 static bool
   4656 ixgbe_rxeof(struct ix_queue *que)
   4657 {
   4658 	struct adapter		*adapter = que->adapter;
   4659 	struct rx_ring		*rxr = que->rxr;
   4660 	struct ifnet		*ifp = adapter->ifp;
   4661 #ifdef LRO
   4662 	struct lro_ctrl		*lro = &rxr->lro;
   4663 	struct lro_entry	*queued;
   4664 #endif /* LRO */
   4665 	int			i, nextp, processed = 0;
   4666 	u32			staterr = 0;
   4667 	u16			count = rxr->process_limit;
   4668 	union ixgbe_adv_rx_desc	*cur;
   4669 	struct ixgbe_rx_buf	*rbuf, *nbuf;
   4670 
   4671 	IXGBE_RX_LOCK(rxr);
   4672 
   4673 #ifdef DEV_NETMAP
   4674 	/* Same as the txeof routine: wakeup clients on intr. */
   4675 	if (netmap_rx_irq(ifp, rxr->me | NETMAP_LOCKED_ENTER, &processed))
   4676 		return (FALSE);
   4677 #endif /* DEV_NETMAP */
   4678 	for (i = rxr->next_to_check; count != 0;) {
   4679 		struct mbuf	*sendmp, *mp;
   4680 		u32		rsc, ptype;
   4681 		u16		len;
   4682 		u16		vtag = 0;
   4683 		bool		eop;
   4684 
   4685 		/* Sync the ring. */
   4686 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   4687 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   4688 
   4689 		cur = &rxr->rx_base[i];
   4690 		staterr = le32toh(cur->wb.upper.status_error);
   4691 
   4692 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
   4693 			break;
   4694 		if ((ifp->if_flags & IFF_RUNNING) == 0)
   4695 			break;
   4696 
   4697 		count--;
   4698 		sendmp = NULL;
   4699 		nbuf = NULL;
   4700 		rsc = 0;
   4701 		cur->wb.upper.status_error = 0;
   4702 		rbuf = &rxr->rx_buffers[i];
   4703 		mp = rbuf->buf;
   4704 
   4705 		len = le16toh(cur->wb.upper.length);
   4706 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
   4707 		    IXGBE_RXDADV_PKTTYPE_MASK;
   4708 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
   4709 
   4710 		/* Make sure bad packets are discarded */
   4711 		if (((staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) ||
   4712 		    (rxr->discard)) {
   4713 			rxr->rx_discarded.ev_count++;
   4714 			if (eop)
   4715 				rxr->discard = FALSE;
   4716 			else
   4717 				rxr->discard = TRUE;
   4718 			ixgbe_rx_discard(rxr, i);
   4719 			goto next_desc;
   4720 		}
   4721 
   4722 		/*
   4723 		** On 82599 which supports a hardware
   4724 		** LRO (called HW RSC), packets need
   4725 		** not be fragmented across sequential
   4726 		** descriptors, rather the next descriptor
   4727 		** is indicated in bits of the descriptor.
   4728 		** This also means that we might proceses
   4729 		** more than one packet at a time, something
   4730 		** that has never been true before, it
   4731 		** required eliminating global chain pointers
   4732 		** in favor of what we are doing here.  -jfv
   4733 		*/
   4734 		if (!eop) {
   4735 			/*
   4736 			** Figure out the next descriptor
   4737 			** of this frame.
   4738 			*/
   4739 			if (rxr->hw_rsc == TRUE) {
   4740 				rsc = ixgbe_rsc_count(cur);
   4741 				rxr->rsc_num += (rsc - 1);
   4742 			}
   4743 			if (rsc) { /* Get hardware index */
   4744 				nextp = ((staterr &
   4745 				    IXGBE_RXDADV_NEXTP_MASK) >>
   4746 				    IXGBE_RXDADV_NEXTP_SHIFT);
   4747 			} else { /* Just sequential */
   4748 				nextp = i + 1;
   4749 				if (nextp == adapter->num_rx_desc)
   4750 					nextp = 0;
   4751 			}
   4752 			nbuf = &rxr->rx_buffers[nextp];
   4753 			prefetch(nbuf);
   4754 		}
   4755 		/*
   4756 		** Rather than using the fmp/lmp global pointers
   4757 		** we now keep the head of a packet chain in the
   4758 		** buffer struct and pass this along from one
   4759 		** descriptor to the next, until we get EOP.
   4760 		*/
   4761 		mp->m_len = len;
   4762 		/*
   4763 		** See if there is a stored head
   4764 		** that determines what we are
   4765 		*/
   4766 		sendmp = rbuf->fmp;
   4767 
   4768 		if (sendmp != NULL) {  /* secondary frag */
   4769 			rbuf->buf = rbuf->fmp = NULL;
   4770 			mp->m_flags &= ~M_PKTHDR;
   4771 			sendmp->m_pkthdr.len += mp->m_len;
   4772 		} else {
   4773 			/*
   4774 			 * Optimize.  This might be a small packet,
   4775 			 * maybe just a TCP ACK.  Do a fast copy that
   4776 			 * is cache aligned into a new mbuf, and
   4777 			 * leave the old mbuf+cluster for re-use.
   4778 			 */
   4779 			if (eop && len <= IXGBE_RX_COPY_LEN) {
   4780 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
   4781 				if (sendmp != NULL) {
   4782 					sendmp->m_data +=
   4783 					    IXGBE_RX_COPY_ALIGN;
   4784 					ixgbe_bcopy(mp->m_data,
   4785 					    sendmp->m_data, len);
   4786 					sendmp->m_len = len;
   4787 					rxr->rx_copies.ev_count++;
   4788 					rbuf->flags |= IXGBE_RX_COPY;
   4789 				}
   4790 			}
   4791 			if (sendmp == NULL) {
   4792 				rbuf->buf = rbuf->fmp = NULL;
   4793 				sendmp = mp;
   4794 			}
   4795 
   4796 			/* first desc of a non-ps chain */
   4797 			sendmp->m_flags |= M_PKTHDR;
   4798 			sendmp->m_pkthdr.len = mp->m_len;
   4799 		}
   4800 		++processed;
   4801 		/* Pass the head pointer on */
   4802 		if (eop == 0) {
   4803 			nbuf->fmp = sendmp;
   4804 			sendmp = NULL;
   4805 			mp->m_next = nbuf->buf;
   4806 		} else { /* Sending this frame */
   4807 			sendmp->m_pkthdr.rcvif = ifp;
   4808 			ifp->if_ipackets++;
   4809 			rxr->rx_packets.ev_count++;
   4810 			/* capture data for AIM */
   4811 			rxr->bytes += sendmp->m_pkthdr.len;
   4812 			rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
   4813 			/* Process vlan info */
   4814 			if ((rxr->vtag_strip) &&
   4815 			    (staterr & IXGBE_RXD_STAT_VP))
   4816 				vtag = le16toh(cur->wb.upper.vlan);
   4817 			if (vtag) {
   4818 				VLAN_INPUT_TAG(ifp, sendmp, vtag,
   4819 				    printf("%s: could not apply VLAN "
   4820 					"tag", __func__));
   4821 			}
   4822 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
   4823 				ixgbe_rx_checksum(staterr, sendmp, ptype,
   4824 				   &adapter->stats);
   4825 			}
   4826 #if __FreeBSD_version >= 800000
   4827 			sendmp->m_pkthdr.flowid = que->msix;
   4828 			sendmp->m_flags |= M_FLOWID;
   4829 #endif
   4830 		}
   4831 next_desc:
   4832 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   4833 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   4834 
   4835 		/* Advance our pointers to the next descriptor. */
   4836 		if (++i == rxr->num_desc)
   4837 			i = 0;
   4838 
   4839 		/* Now send to the stack or do LRO */
   4840 		if (sendmp != NULL) {
   4841 			rxr->next_to_check = i;
   4842 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
   4843 			i = rxr->next_to_check;
   4844 		}
   4845 
   4846                /* Every 8 descriptors we go to refresh mbufs */
   4847 		if (processed == 8) {
   4848 			ixgbe_refresh_mbufs(rxr, i);
   4849 			processed = 0;
   4850 		}
   4851 	}
   4852 
   4853 	/* Refresh any remaining buf structs */
   4854 	if (ixgbe_rx_unrefreshed(rxr))
   4855 		ixgbe_refresh_mbufs(rxr, i);
   4856 
   4857 	rxr->next_to_check = i;
   4858 
   4859 #ifdef LRO
   4860 	/*
   4861 	 * Flush any outstanding LRO work
   4862 	 */
   4863 	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
   4864 		SLIST_REMOVE_HEAD(&lro->lro_active, next);
   4865 		tcp_lro_flush(lro, queued);
   4866 	}
   4867 #endif /* LRO */
   4868 
   4869 	IXGBE_RX_UNLOCK(rxr);
   4870 
   4871 	/*
   4872 	** We still have cleaning to do?
   4873 	** Schedule another interrupt if so.
   4874 	*/
   4875 	if ((staterr & IXGBE_RXD_STAT_DD) != 0) {
   4876 		ixgbe_rearm_queues(adapter, (u64)(1ULL << que->msix));
   4877 		return true;
   4878 	}
   4879 
   4880 	return false;
   4881 }
   4882 
   4883 
   4884 /*********************************************************************
   4885  *
   4886  *  Verify that the hardware indicated that the checksum is valid.
   4887  *  Inform the stack about the status of checksum so that stack
   4888  *  doesn't spend time verifying the checksum.
   4889  *
   4890  *********************************************************************/
   4891 static void
   4892 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
   4893     struct ixgbe_hw_stats *stats)
   4894 {
   4895 	u16	status = (u16) staterr;
   4896 	u8	errors = (u8) (staterr >> 24);
   4897 #if 0
   4898 	bool	sctp = FALSE;
   4899 
   4900 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   4901 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
   4902 		sctp = TRUE;
   4903 #endif
   4904 
   4905 	if (status & IXGBE_RXD_STAT_IPCS) {
   4906 		stats->ipcs.ev_count++;
   4907 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
   4908 			/* IP Checksum Good */
   4909 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
   4910 
   4911 		} else {
   4912 			stats->ipcs_bad.ev_count++;
   4913 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
   4914 		}
   4915 	}
   4916 	if (status & IXGBE_RXD_STAT_L4CS) {
   4917 		stats->l4cs.ev_count++;
   4918 		u16 type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
   4919 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
   4920 			mp->m_pkthdr.csum_flags |= type;
   4921 		} else {
   4922 			stats->l4cs_bad.ev_count++;
   4923 			mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
   4924 		}
   4925 	}
   4926 	return;
   4927 }
   4928 
   4929 
   4930 #if 0	/* XXX Badly need to overhaul vlan(4) on NetBSD. */
   4931 /*
   4932 ** This routine is run via an vlan config EVENT,
   4933 ** it enables us to use the HW Filter table since
   4934 ** we can get the vlan id. This just creates the
   4935 ** entry in the soft version of the VFTA, init will
   4936 ** repopulate the real table.
   4937 */
   4938 static void
   4939 ixgbe_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
   4940 {
   4941 	struct adapter	*adapter = ifp->if_softc;
   4942 	u16		index, bit;
   4943 
   4944 	if (ifp->if_softc !=  arg)   /* Not our event */
   4945 		return;
   4946 
   4947 	if ((vtag == 0) || (vtag > 4095))	/* Invalid */
   4948 		return;
   4949 
   4950 	IXGBE_CORE_LOCK(adapter);
   4951 	index = (vtag >> 5) & 0x7F;
   4952 	bit = vtag & 0x1F;
   4953 	adapter->shadow_vfta[index] |= (1 << bit);
   4954 	ixgbe_init_locked(adapter);
   4955 	IXGBE_CORE_UNLOCK(adapter);
   4956 }
   4957 
   4958 /*
   4959 ** This routine is run via an vlan
   4960 ** unconfig EVENT, remove our entry
   4961 ** in the soft vfta.
   4962 */
   4963 static void
   4964 ixgbe_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
   4965 {
   4966 	struct adapter	*adapter = ifp->if_softc;
   4967 	u16		index, bit;
   4968 
   4969 	if (ifp->if_softc !=  arg)
   4970 		return;
   4971 
   4972 	if ((vtag == 0) || (vtag > 4095))	/* Invalid */
   4973 		return;
   4974 
   4975 	IXGBE_CORE_LOCK(adapter);
   4976 	index = (vtag >> 5) & 0x7F;
   4977 	bit = vtag & 0x1F;
   4978 	adapter->shadow_vfta[index] &= ~(1 << bit);
   4979 	/* Re-init to load the changes */
   4980 	ixgbe_init_locked(adapter);
   4981 	IXGBE_CORE_UNLOCK(adapter);
   4982 }
   4983 #endif
   4984 
   4985 static void
   4986 ixgbe_setup_vlan_hw_support(struct adapter *adapter)
   4987 {
   4988 	struct ethercom *ec = &adapter->osdep.ec;
   4989 	struct ixgbe_hw *hw = &adapter->hw;
   4990 	struct rx_ring	*rxr;
   4991 	u32		ctrl;
   4992 
   4993 	/*
   4994 	** We get here thru init_locked, meaning
   4995 	** a soft reset, this has already cleared
   4996 	** the VFTA and other state, so if there
   4997 	** have been no vlan's registered do nothing.
   4998 	*/
   4999 	if (!VLAN_ATTACHED(&adapter->osdep.ec)) {
   5000 		return;
   5001 	}
   5002 
   5003 	/*
   5004 	** A soft reset zero's out the VFTA, so
   5005 	** we need to repopulate it now.
   5006 	*/
   5007 	for (int i = 0; i < IXGBE_VFTA_SIZE; i++)
   5008 		if (adapter->shadow_vfta[i] != 0)
   5009 			IXGBE_WRITE_REG(hw, IXGBE_VFTA(i),
   5010 			    adapter->shadow_vfta[i]);
   5011 
   5012 	ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
   5013 	/* Enable the Filter Table if enabled */
   5014 	if (ec->ec_capenable & ETHERCAP_VLAN_HWFILTER) {
   5015 		ctrl &= ~IXGBE_VLNCTRL_CFIEN;
   5016 		ctrl |= IXGBE_VLNCTRL_VFE;
   5017 	}
   5018 	if (hw->mac.type == ixgbe_mac_82598EB)
   5019 		ctrl |= IXGBE_VLNCTRL_VME;
   5020 	IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
   5021 
   5022 	/* Setup the queues for vlans */
   5023 	for (int i = 0; i < adapter->num_queues; i++) {
   5024 		rxr = &adapter->rx_rings[i];
   5025 		/* On 82599 the VLAN enable is per/queue in RXDCTL */
   5026 		if (hw->mac.type != ixgbe_mac_82598EB) {
   5027 			ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
   5028 			ctrl |= IXGBE_RXDCTL_VME;
   5029 			IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
   5030 		}
   5031 		rxr->vtag_strip = TRUE;
   5032 	}
   5033 }
   5034 
   5035 static void
   5036 ixgbe_enable_intr(struct adapter *adapter)
   5037 {
   5038 	struct ixgbe_hw	*hw = &adapter->hw;
   5039 	struct ix_queue	*que = adapter->queues;
   5040 	u32		mask, fwsm;
   5041 
   5042 	mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
   5043 	/* Enable Fan Failure detection */
   5044 	if (hw->device_id == IXGBE_DEV_ID_82598AT)
   5045 		    mask |= IXGBE_EIMS_GPI_SDP1;
   5046 
   5047 	switch (adapter->hw.mac.type) {
   5048 		case ixgbe_mac_82599EB:
   5049 			mask |= IXGBE_EIMS_ECC;
   5050 			mask |= IXGBE_EIMS_GPI_SDP0;
   5051 			mask |= IXGBE_EIMS_GPI_SDP1;
   5052 			mask |= IXGBE_EIMS_GPI_SDP2;
   5053 #ifdef IXGBE_FDIR
   5054 			mask |= IXGBE_EIMS_FLOW_DIR;
   5055 #endif
   5056 			break;
   5057 		case ixgbe_mac_X540:
   5058 			mask |= IXGBE_EIMS_ECC;
   5059 			/* Detect if Thermal Sensor is enabled */
   5060 			fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
   5061 			if (fwsm & IXGBE_FWSM_TS_ENABLED)
   5062 				mask |= IXGBE_EIMS_TS;
   5063 #ifdef IXGBE_FDIR
   5064 			mask |= IXGBE_EIMS_FLOW_DIR;
   5065 #endif
   5066 		/* falls through */
   5067 		default:
   5068 			break;
   5069 	}
   5070 
   5071 	IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
   5072 
   5073 	/* With RSS we use auto clear */
   5074 	if (adapter->msix_mem) {
   5075 		mask = IXGBE_EIMS_ENABLE_MASK;
   5076 		/* Don't autoclear Link */
   5077 		mask &= ~IXGBE_EIMS_OTHER;
   5078 		mask &= ~IXGBE_EIMS_LSC;
   5079 		IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask);
   5080 	}
   5081 
   5082 	/*
   5083 	** Now enable all queues, this is done separately to
   5084 	** allow for handling the extended (beyond 32) MSIX
   5085 	** vectors that can be used by 82599
   5086 	*/
   5087         for (int i = 0; i < adapter->num_queues; i++, que++)
   5088                 ixgbe_enable_queue(adapter, que->msix);
   5089 
   5090 	IXGBE_WRITE_FLUSH(hw);
   5091 
   5092 	return;
   5093 }
   5094 
   5095 static void
   5096 ixgbe_disable_intr(struct adapter *adapter)
   5097 {
   5098 	if (adapter->msix_mem)
   5099 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, 0);
   5100 	if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
   5101 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0);
   5102 	} else {
   5103 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000);
   5104 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0);
   5105 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0);
   5106 	}
   5107 	IXGBE_WRITE_FLUSH(&adapter->hw);
   5108 	return;
   5109 }
   5110 
   5111 u16
   5112 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
   5113 {
   5114 	switch (reg % 4) {
   5115 	case 0:
   5116 		return pci_conf_read(hw->back->pc, hw->back->tag, reg) &
   5117 		    __BITS(15, 0);
   5118 	case 2:
   5119 		return __SHIFTOUT(pci_conf_read(hw->back->pc, hw->back->tag,
   5120 		    reg - 2), __BITS(31, 16));
   5121 	default:
   5122 		panic("%s: invalid register (%" PRIx32, __func__, reg);
   5123 		break;
   5124 	}
   5125 }
   5126 
   5127 void
   5128 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
   5129 {
   5130 	pcireg_t old;
   5131 
   5132 	switch (reg % 4) {
   5133 	case 0:
   5134 		old = pci_conf_read(hw->back->pc, hw->back->tag, reg) &
   5135 		    __BITS(31, 16);
   5136 		pci_conf_write(hw->back->pc, hw->back->tag, reg, value | old);
   5137 		break;
   5138 	case 2:
   5139 		old = pci_conf_read(hw->back->pc, hw->back->tag, reg - 2) &
   5140 		    __BITS(15, 0);
   5141 		pci_conf_write(hw->back->pc, hw->back->tag, reg - 2,
   5142 		    __SHIFTIN(value, __BITS(31, 16)) | old);
   5143 		break;
   5144 	default:
   5145 		panic("%s: invalid register (%" PRIx32, __func__, reg);
   5146 		break;
   5147 	}
   5148 
   5149 	return;
   5150 }
   5151 
   5152 /*
   5153 ** Setup the correct IVAR register for a particular MSIX interrupt
   5154 **   (yes this is all very magic and confusing :)
   5155 **  - entry is the register array entry
   5156 **  - vector is the MSIX vector for this queue
   5157 **  - type is RX/TX/MISC
   5158 */
   5159 static void
   5160 ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type)
   5161 {
   5162 	struct ixgbe_hw *hw = &adapter->hw;
   5163 	u32 ivar, index;
   5164 
   5165 	vector |= IXGBE_IVAR_ALLOC_VAL;
   5166 
   5167 	switch (hw->mac.type) {
   5168 
   5169 	case ixgbe_mac_82598EB:
   5170 		if (type == -1)
   5171 			entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
   5172 		else
   5173 			entry += (type * 64);
   5174 		index = (entry >> 2) & 0x1F;
   5175 		ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
   5176 		ivar &= ~(0xFF << (8 * (entry & 0x3)));
   5177 		ivar |= (vector << (8 * (entry & 0x3)));
   5178 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR(index), ivar);
   5179 		break;
   5180 
   5181 	case ixgbe_mac_82599EB:
   5182 	case ixgbe_mac_X540:
   5183 		if (type == -1) { /* MISC IVAR */
   5184 			index = (entry & 1) * 8;
   5185 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
   5186 			ivar &= ~(0xFF << index);
   5187 			ivar |= (vector << index);
   5188 			IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
   5189 		} else {	/* RX/TX IVARS */
   5190 			index = (16 * (entry & 1)) + (8 * type);
   5191 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
   5192 			ivar &= ~(0xFF << index);
   5193 			ivar |= (vector << index);
   5194 			IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
   5195 		}
   5196 
   5197 	default:
   5198 		break;
   5199 	}
   5200 }
   5201 
   5202 static void
   5203 ixgbe_configure_ivars(struct adapter *adapter)
   5204 {
   5205 	struct  ix_queue *que = adapter->queues;
   5206 	u32 newitr;
   5207 
   5208 	if (ixgbe_max_interrupt_rate > 0)
   5209 		newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8;
   5210 	else
   5211 		newitr = 0;
   5212 
   5213         for (int i = 0; i < adapter->num_queues; i++, que++) {
   5214 		/* First the RX queue entry */
   5215                 ixgbe_set_ivar(adapter, i, que->msix, 0);
   5216 		/* ... and the TX */
   5217 		ixgbe_set_ivar(adapter, i, que->msix, 1);
   5218 		/* Set an Initial EITR value */
   5219                 IXGBE_WRITE_REG(&adapter->hw,
   5220                     IXGBE_EITR(que->msix), newitr);
   5221 	}
   5222 
   5223 	/* For the Link interrupt */
   5224         ixgbe_set_ivar(adapter, 1, adapter->linkvec, -1);
   5225 }
   5226 
   5227 /*
   5228 ** ixgbe_sfp_probe - called in the local timer to
   5229 ** determine if a port had optics inserted.
   5230 */
   5231 static bool ixgbe_sfp_probe(struct adapter *adapter)
   5232 {
   5233 	struct ixgbe_hw	*hw = &adapter->hw;
   5234 	device_t	dev = adapter->dev;
   5235 	bool		result = FALSE;
   5236 
   5237 	if ((hw->phy.type == ixgbe_phy_nl) &&
   5238 	    (hw->phy.sfp_type == ixgbe_sfp_type_not_present)) {
   5239 		s32 ret = hw->phy.ops.identify_sfp(hw);
   5240 		if (ret)
   5241                         goto out;
   5242 		ret = hw->phy.ops.reset(hw);
   5243 		if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
   5244 			device_printf(dev,"Unsupported SFP+ module detected!");
   5245 			device_printf(dev, "Reload driver with supported module.\n");
   5246 			adapter->sfp_probe = FALSE;
   5247                         goto out;
   5248 		} else
   5249 			device_printf(dev,"SFP+ module detected!\n");
   5250 		/* We now have supported optics */
   5251 		adapter->sfp_probe = FALSE;
   5252 		/* Set the optics type so system reports correctly */
   5253 		ixgbe_setup_optics(adapter);
   5254 		result = TRUE;
   5255 	}
   5256 out:
   5257 	return (result);
   5258 }
   5259 
   5260 /*
   5261 ** Tasklet handler for MSIX Link interrupts
   5262 **  - do outside interrupt since it might sleep
   5263 */
   5264 static void
   5265 ixgbe_handle_link(void *context)
   5266 {
   5267 	struct adapter  *adapter = context;
   5268 
   5269 	if (ixgbe_check_link(&adapter->hw,
   5270 	    &adapter->link_speed, &adapter->link_up, 0) == 0)
   5271 	    ixgbe_update_link_status(adapter);
   5272 }
   5273 
   5274 /*
   5275 ** Tasklet for handling SFP module interrupts
   5276 */
   5277 static void
   5278 ixgbe_handle_mod(void *context)
   5279 {
   5280 	struct adapter  *adapter = context;
   5281 	struct ixgbe_hw *hw = &adapter->hw;
   5282 	device_t	dev = adapter->dev;
   5283 	u32 err;
   5284 
   5285 	err = hw->phy.ops.identify_sfp(hw);
   5286 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
   5287 		device_printf(dev,
   5288 		    "Unsupported SFP+ module type was detected.\n");
   5289 		return;
   5290 	}
   5291 	err = hw->mac.ops.setup_sfp(hw);
   5292 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
   5293 		device_printf(dev,
   5294 		    "Setup failure - unsupported SFP+ module type.\n");
   5295 		return;
   5296 	}
   5297 	softint_schedule(adapter->msf_si);
   5298 	return;
   5299 }
   5300 
   5301 
   5302 /*
   5303 ** Tasklet for handling MSF (multispeed fiber) interrupts
   5304 */
   5305 static void
   5306 ixgbe_handle_msf(void *context)
   5307 {
   5308 	struct adapter  *adapter = context;
   5309 	struct ixgbe_hw *hw = &adapter->hw;
   5310 	u32 autoneg;
   5311 	bool negotiate;
   5312 
   5313 	autoneg = hw->phy.autoneg_advertised;
   5314 	if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
   5315 		hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate);
   5316 	else
   5317 		negotiate = 0;
   5318 	if (hw->mac.ops.setup_link)
   5319 		hw->mac.ops.setup_link(hw, autoneg, TRUE);
   5320 	return;
   5321 }
   5322 
   5323 #ifdef IXGBE_FDIR
   5324 /*
   5325 ** Tasklet for reinitializing the Flow Director filter table
   5326 */
   5327 static void
   5328 ixgbe_reinit_fdir(void *context)
   5329 {
   5330 	struct adapter  *adapter = context;
   5331 	struct ifnet   *ifp = adapter->ifp;
   5332 
   5333 	if (adapter->fdir_reinit != 1) /* Shouldn't happen */
   5334 		return;
   5335 	ixgbe_reinit_fdir_tables_82599(&adapter->hw);
   5336 	adapter->fdir_reinit = 0;
   5337 	/* re-enable flow director interrupts */
   5338 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_FLOW_DIR);
   5339 	/* Restart the interface */
   5340 	ifp->if_flags |= IFF_RUNNING;
   5341 	return;
   5342 }
   5343 #endif
   5344 
   5345 /**********************************************************************
   5346  *
   5347  *  Update the board statistics counters.
   5348  *
   5349  **********************************************************************/
   5350 static void
   5351 ixgbe_update_stats_counters(struct adapter *adapter)
   5352 {
   5353 	struct ifnet   *ifp = adapter->ifp;
   5354 	struct ixgbe_hw *hw = &adapter->hw;
   5355 	u32  missed_rx = 0, bprc, lxon, lxoff, total;
   5356 	u64  total_missed_rx = 0;
   5357 	uint64_t crcerrs, rlec;
   5358 
   5359 	crcerrs = IXGBE_READ_REG(hw, IXGBE_CRCERRS);
   5360 	adapter->stats.crcerrs.ev_count += crcerrs;
   5361 	adapter->stats.illerrc.ev_count += IXGBE_READ_REG(hw, IXGBE_ILLERRC);
   5362 	adapter->stats.errbc.ev_count += IXGBE_READ_REG(hw, IXGBE_ERRBC);
   5363 	adapter->stats.mspdc.ev_count += IXGBE_READ_REG(hw, IXGBE_MSPDC);
   5364 
   5365 	/*
   5366 	** Note: these are for the 8 possible traffic classes,
   5367 	**	 which in current implementation is unused,
   5368 	**	 therefore only 0 should read real data.
   5369 	*/
   5370 	for (int i = 0; i < __arraycount(adapter->stats.mpc); i++) {
   5371 		int j = i % adapter->num_queues;
   5372 		u32 mp;
   5373 		mp = IXGBE_READ_REG(hw, IXGBE_MPC(i));
   5374 		/* missed_rx tallies misses for the gprc workaround */
   5375 		missed_rx += mp;
   5376 		/* global total per queue */
   5377         	adapter->stats.mpc[j].ev_count += mp;
   5378 		/* Running comprehensive total for stats display */
   5379 		total_missed_rx += mp;
   5380 		if (hw->mac.type == ixgbe_mac_82598EB) {
   5381 			adapter->stats.rnbc[j] +=
   5382 			    IXGBE_READ_REG(hw, IXGBE_RNBC(i));
   5383 			adapter->stats.qbtc[j].ev_count +=
   5384 			    IXGBE_READ_REG(hw, IXGBE_QBTC(i));
   5385 			adapter->stats.qbrc[j].ev_count +=
   5386 			    IXGBE_READ_REG(hw, IXGBE_QBRC(i));
   5387 			adapter->stats.pxonrxc[j].ev_count +=
   5388 			    IXGBE_READ_REG(hw, IXGBE_PXONRXC(i));
   5389 		} else {
   5390 			adapter->stats.pxonrxc[j].ev_count +=
   5391 			    IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i));
   5392 		}
   5393 		adapter->stats.pxontxc[j].ev_count +=
   5394 		    IXGBE_READ_REG(hw, IXGBE_PXONTXC(i));
   5395 		adapter->stats.pxofftxc[j].ev_count +=
   5396 		    IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i));
   5397 		adapter->stats.pxoffrxc[j].ev_count +=
   5398 		    IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
   5399 		adapter->stats.pxon2offc[j].ev_count +=
   5400 		    IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i));
   5401 	}
   5402 	for (int i = 0; i < __arraycount(adapter->stats.qprc); i++) {
   5403 		int j = i % adapter->num_queues;
   5404 		adapter->stats.qprc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
   5405 		adapter->stats.qptc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
   5406 		adapter->stats.qprdc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
   5407 	}
   5408 	adapter->stats.mlfc.ev_count += IXGBE_READ_REG(hw, IXGBE_MLFC);
   5409 	adapter->stats.mrfc.ev_count += IXGBE_READ_REG(hw, IXGBE_MRFC);
   5410 	rlec = IXGBE_READ_REG(hw, IXGBE_RLEC);
   5411 	adapter->stats.rlec.ev_count += rlec;
   5412 
   5413 	/* Hardware workaround, gprc counts missed packets */
   5414 	adapter->stats.gprc.ev_count += IXGBE_READ_REG(hw, IXGBE_GPRC) - missed_rx;
   5415 
   5416 	lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
   5417 	adapter->stats.lxontxc.ev_count += lxon;
   5418 	lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
   5419 	adapter->stats.lxofftxc.ev_count += lxoff;
   5420 	total = lxon + lxoff;
   5421 
   5422 	if (hw->mac.type != ixgbe_mac_82598EB) {
   5423 		adapter->stats.gorc.ev_count += IXGBE_READ_REG(hw, IXGBE_GORCL) +
   5424 		    ((u64)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32);
   5425 		adapter->stats.gotc.ev_count += IXGBE_READ_REG(hw, IXGBE_GOTCL) +
   5426 		    ((u64)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32) - total * ETHER_MIN_LEN;
   5427 		adapter->stats.tor.ev_count += IXGBE_READ_REG(hw, IXGBE_TORL) +
   5428 		    ((u64)IXGBE_READ_REG(hw, IXGBE_TORH) << 32);
   5429 		adapter->stats.lxonrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
   5430 		adapter->stats.lxoffrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
   5431 	} else {
   5432 		adapter->stats.lxonrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
   5433 		adapter->stats.lxoffrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
   5434 		/* 82598 only has a counter in the high register */
   5435 		adapter->stats.gorc.ev_count += IXGBE_READ_REG(hw, IXGBE_GORCH);
   5436 		adapter->stats.gotc.ev_count += IXGBE_READ_REG(hw, IXGBE_GOTCH) - total * ETHER_MIN_LEN;
   5437 		adapter->stats.tor.ev_count += IXGBE_READ_REG(hw, IXGBE_TORH);
   5438 	}
   5439 
   5440 	/*
   5441 	 * Workaround: mprc hardware is incorrectly counting
   5442 	 * broadcasts, so for now we subtract those.
   5443 	 */
   5444 	bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
   5445 	adapter->stats.bprc.ev_count += bprc;
   5446 	adapter->stats.mprc.ev_count += IXGBE_READ_REG(hw, IXGBE_MPRC) - ((hw->mac.type == ixgbe_mac_82598EB) ? bprc : 0);
   5447 
   5448 	adapter->stats.prc64.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC64);
   5449 	adapter->stats.prc127.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC127);
   5450 	adapter->stats.prc255.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC255);
   5451 	adapter->stats.prc511.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC511);
   5452 	adapter->stats.prc1023.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC1023);
   5453 	adapter->stats.prc1522.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC1522);
   5454 
   5455 	adapter->stats.gptc.ev_count += IXGBE_READ_REG(hw, IXGBE_GPTC) - total;
   5456 	adapter->stats.mptc.ev_count += IXGBE_READ_REG(hw, IXGBE_MPTC) - total;
   5457 	adapter->stats.ptc64.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC64) - total;
   5458 
   5459 	adapter->stats.ruc.ev_count += IXGBE_READ_REG(hw, IXGBE_RUC);
   5460 	adapter->stats.rfc.ev_count += IXGBE_READ_REG(hw, IXGBE_RFC);
   5461 	adapter->stats.roc.ev_count += IXGBE_READ_REG(hw, IXGBE_ROC);
   5462 	adapter->stats.rjc.ev_count += IXGBE_READ_REG(hw, IXGBE_RJC);
   5463 	adapter->stats.mngprc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPRC);
   5464 	adapter->stats.mngpdc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPDC);
   5465 	adapter->stats.mngptc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPTC);
   5466 	adapter->stats.tpr.ev_count += IXGBE_READ_REG(hw, IXGBE_TPR);
   5467 	adapter->stats.tpt.ev_count += IXGBE_READ_REG(hw, IXGBE_TPT);
   5468 	adapter->stats.ptc127.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC127);
   5469 	adapter->stats.ptc255.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC255);
   5470 	adapter->stats.ptc511.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC511);
   5471 	adapter->stats.ptc1023.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC1023);
   5472 	adapter->stats.ptc1522.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC1522);
   5473 	adapter->stats.bptc.ev_count += IXGBE_READ_REG(hw, IXGBE_BPTC);
   5474 	adapter->stats.xec.ev_count += IXGBE_READ_REG(hw, IXGBE_XEC);
   5475 	adapter->stats.fccrc.ev_count += IXGBE_READ_REG(hw, IXGBE_FCCRC);
   5476 	adapter->stats.fclast.ev_count += IXGBE_READ_REG(hw, IXGBE_FCLAST);
   5477 
   5478 	/* Only read FCOE on 82599 */
   5479 	if (hw->mac.type != ixgbe_mac_82598EB) {
   5480 		adapter->stats.fcoerpdc.ev_count +=
   5481 		    IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
   5482 		adapter->stats.fcoeprc.ev_count +=
   5483 		    IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
   5484 		adapter->stats.fcoeptc.ev_count +=
   5485 		    IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
   5486 		adapter->stats.fcoedwrc.ev_count +=
   5487 		    IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
   5488 		adapter->stats.fcoedwtc.ev_count +=
   5489 		    IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
   5490 	}
   5491 
   5492 	/* Fill out the OS statistics structure */
   5493 	/*
   5494 	 * NetBSD: Don't override if_{i|o}{packets|bytes|mcasts} with
   5495 	 * adapter->stats counters. It's required to make ifconfig -z
   5496 	 * (SOICZIFDATA) work.
   5497 	 */
   5498 	ifp->if_collisions = 0;
   5499 
   5500 	/* Rx Errors */
   5501 	ifp->if_iqdrops += total_missed_rx;
   5502 	ifp->if_ierrors += crcerrs + rlec;
   5503 }
   5504 
   5505 /** ixgbe_sysctl_tdh_handler - Handler function
   5506  *  Retrieves the TDH value from the hardware
   5507  */
   5508 static int
   5509 ixgbe_sysctl_tdh_handler(SYSCTLFN_ARGS)
   5510 {
   5511 	struct sysctlnode node;
   5512 	uint32_t val;
   5513 	struct tx_ring *txr;
   5514 
   5515 	node = *rnode;
   5516 	txr = (struct tx_ring *)node.sysctl_data;
   5517 	if (txr == NULL)
   5518 		return 0;
   5519 	val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDH(txr->me));
   5520 	node.sysctl_data = &val;
   5521 	return sysctl_lookup(SYSCTLFN_CALL(&node));
   5522 }
   5523 
   5524 /** ixgbe_sysctl_tdt_handler - Handler function
   5525  *  Retrieves the TDT value from the hardware
   5526  */
   5527 static int
   5528 ixgbe_sysctl_tdt_handler(SYSCTLFN_ARGS)
   5529 {
   5530 	struct sysctlnode node;
   5531 	uint32_t val;
   5532 	struct tx_ring *txr;
   5533 
   5534 	node = *rnode;
   5535 	txr = (struct tx_ring *)node.sysctl_data;
   5536 	if (txr == NULL)
   5537 		return 0;
   5538 	val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDT(txr->me));
   5539 	node.sysctl_data = &val;
   5540 	return sysctl_lookup(SYSCTLFN_CALL(&node));
   5541 }
   5542 
   5543 /** ixgbe_sysctl_rdh_handler - Handler function
   5544  *  Retrieves the RDH value from the hardware
   5545  */
   5546 static int
   5547 ixgbe_sysctl_rdh_handler(SYSCTLFN_ARGS)
   5548 {
   5549 	struct sysctlnode node;
   5550 	uint32_t val;
   5551 	struct rx_ring *rxr;
   5552 
   5553 	node = *rnode;
   5554 	rxr = (struct rx_ring *)node.sysctl_data;
   5555 	if (rxr == NULL)
   5556 		return 0;
   5557 	val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDH(rxr->me));
   5558 	node.sysctl_data = &val;
   5559 	return sysctl_lookup(SYSCTLFN_CALL(&node));
   5560 }
   5561 
   5562 /** ixgbe_sysctl_rdt_handler - Handler function
   5563  *  Retrieves the RDT value from the hardware
   5564  */
   5565 static int
   5566 ixgbe_sysctl_rdt_handler(SYSCTLFN_ARGS)
   5567 {
   5568 	struct sysctlnode node;
   5569 	uint32_t val;
   5570 	struct rx_ring *rxr;
   5571 
   5572 	node = *rnode;
   5573 	rxr = (struct rx_ring *)node.sysctl_data;
   5574 	if (rxr == NULL)
   5575 		return 0;
   5576 	val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDT(rxr->me));
   5577 	node.sysctl_data = &val;
   5578 	return sysctl_lookup(SYSCTLFN_CALL(&node));
   5579 }
   5580 
   5581 static int
   5582 ixgbe_sysctl_interrupt_rate_handler(SYSCTLFN_ARGS)
   5583 {
   5584 	int error;
   5585 	struct sysctlnode node;
   5586 	struct ix_queue *que;
   5587 	uint32_t reg, usec, rate;
   5588 
   5589 	node = *rnode;
   5590 	que = (struct ix_queue *)node.sysctl_data;
   5591 	if (que == NULL)
   5592 		return 0;
   5593 	reg = IXGBE_READ_REG(&que->adapter->hw, IXGBE_EITR(que->msix));
   5594 	usec = ((reg & 0x0FF8) >> 3);
   5595 	if (usec > 0)
   5596 		rate = 500000 / usec;
   5597 	else
   5598 		rate = 0;
   5599 	node.sysctl_data = &rate;
   5600 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   5601 	if (error)
   5602 		return error;
   5603 	reg &= ~0xfff; /* default, no limitation */
   5604 	ixgbe_max_interrupt_rate = 0;
   5605 	if (rate > 0 && rate < 500000) {
   5606 		if (rate < 1000)
   5607 			rate = 1000;
   5608 		ixgbe_max_interrupt_rate = rate;
   5609 		reg |= ((4000000/rate) & 0xff8 );
   5610 	}
   5611 	IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg);
   5612 	return 0;
   5613 }
   5614 
   5615 const struct sysctlnode *
   5616 ixgbe_sysctl_instance(struct adapter *adapter)
   5617 {
   5618 	const char *dvname;
   5619 	struct sysctllog **log;
   5620 	int rc;
   5621 	const struct sysctlnode *rnode;
   5622 
   5623 	log = &adapter->sysctllog;
   5624 	dvname = device_xname(adapter->dev);
   5625 
   5626 	if ((rc = sysctl_createv(log, 0, NULL, &rnode,
   5627 	    0, CTLTYPE_NODE, dvname,
   5628 	    SYSCTL_DESCR("ixgbe information and settings"),
   5629 	    NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL)) != 0)
   5630 		goto err;
   5631 
   5632 	return rnode;
   5633 err:
   5634 	printf("%s: sysctl_createv failed, rc = %d\n", __func__, rc);
   5635 	return NULL;
   5636 }
   5637 
   5638 /*
   5639  * Add sysctl variables, one per statistic, to the system.
   5640  */
   5641 static void
   5642 ixgbe_add_hw_stats(struct adapter *adapter)
   5643 {
   5644 	device_t dev = adapter->dev;
   5645 	const struct sysctlnode *rnode, *cnode;
   5646 	struct sysctllog **log = &adapter->sysctllog;
   5647 	struct tx_ring *txr = adapter->tx_rings;
   5648 	struct rx_ring *rxr = adapter->rx_rings;
   5649 	struct ixgbe_hw_stats *stats = &adapter->stats;
   5650 
   5651 	/* Driver Statistics */
   5652 #if 0
   5653 	/* These counters are not updated by the software */
   5654 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
   5655 			CTLFLAG_RD, &adapter->dropped_pkts,
   5656 			"Driver dropped packets");
   5657 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_header_failed",
   5658 			CTLFLAG_RD, &adapter->mbuf_header_failed,
   5659 			"???");
   5660 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_packet_failed",
   5661 			CTLFLAG_RD, &adapter->mbuf_packet_failed,
   5662 			"???");
   5663 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "no_tx_map_avail",
   5664 			CTLFLAG_RD, &adapter->no_tx_map_avail,
   5665 			"???");
   5666 #endif
   5667 	evcnt_attach_dynamic(&adapter->handleq, EVCNT_TYPE_MISC,
   5668 	    NULL, device_xname(dev), "Handled queue in softint");
   5669 	evcnt_attach_dynamic(&adapter->req, EVCNT_TYPE_MISC,
   5670 	    NULL, device_xname(dev), "Requeued in softint");
   5671 	evcnt_attach_dynamic(&adapter->morerx, EVCNT_TYPE_MISC,
   5672 	    NULL, device_xname(dev), "Interrupt handler more rx");
   5673 	evcnt_attach_dynamic(&adapter->moretx, EVCNT_TYPE_MISC,
   5674 	    NULL, device_xname(dev), "Interrupt handler more tx");
   5675 	evcnt_attach_dynamic(&adapter->txloops, EVCNT_TYPE_MISC,
   5676 	    NULL, device_xname(dev), "Interrupt handler tx loops");
   5677 	evcnt_attach_dynamic(&adapter->efbig_tx_dma_setup, EVCNT_TYPE_MISC,
   5678 	    NULL, device_xname(dev), "Driver tx dma soft fail EFBIG");
   5679 	evcnt_attach_dynamic(&adapter->m_defrag_failed, EVCNT_TYPE_MISC,
   5680 	    NULL, device_xname(dev), "m_defrag() failed");
   5681 	evcnt_attach_dynamic(&adapter->efbig2_tx_dma_setup, EVCNT_TYPE_MISC,
   5682 	    NULL, device_xname(dev), "Driver tx dma hard fail EFBIG");
   5683 	evcnt_attach_dynamic(&adapter->einval_tx_dma_setup, EVCNT_TYPE_MISC,
   5684 	    NULL, device_xname(dev), "Driver tx dma hard fail EINVAL");
   5685 	evcnt_attach_dynamic(&adapter->other_tx_dma_setup, EVCNT_TYPE_MISC,
   5686 	    NULL, device_xname(dev), "Driver tx dma hard fail other");
   5687 	evcnt_attach_dynamic(&adapter->eagain_tx_dma_setup, EVCNT_TYPE_MISC,
   5688 	    NULL, device_xname(dev), "Driver tx dma soft fail EAGAIN");
   5689 	evcnt_attach_dynamic(&adapter->enomem_tx_dma_setup, EVCNT_TYPE_MISC,
   5690 	    NULL, device_xname(dev), "Driver tx dma soft fail ENOMEM");
   5691 	evcnt_attach_dynamic(&adapter->watchdog_events, EVCNT_TYPE_MISC,
   5692 	    NULL, device_xname(dev), "Watchdog timeouts");
   5693 	evcnt_attach_dynamic(&adapter->tso_err, EVCNT_TYPE_MISC,
   5694 	    NULL, device_xname(dev), "TSO errors");
   5695 	evcnt_attach_dynamic(&adapter->link_irq, EVCNT_TYPE_MISC,
   5696 	    NULL, device_xname(dev), "Link MSIX IRQ Handled");
   5697 
   5698 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
   5699 		snprintf(adapter->queues[i].evnamebuf,
   5700 		    sizeof(adapter->queues[i].evnamebuf), "%s queue%d",
   5701 		    device_xname(dev), i);
   5702 		snprintf(adapter->queues[i].namebuf,
   5703 		    sizeof(adapter->queues[i].namebuf), "queue%d", i);
   5704 
   5705 		if ((rnode = ixgbe_sysctl_instance(adapter)) == NULL) {
   5706 			aprint_error_dev(dev, "could not create sysctl root\n");
   5707 			break;
   5708 		}
   5709 
   5710 		if (sysctl_createv(log, 0, &rnode, &rnode,
   5711 		    0, CTLTYPE_NODE,
   5712 		    adapter->queues[i].namebuf, SYSCTL_DESCR("Queue Name"),
   5713 		    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL) != 0)
   5714 			break;
   5715 
   5716 		if (sysctl_createv(log, 0, &rnode, &cnode,
   5717 		    CTLFLAG_READWRITE, CTLTYPE_INT,
   5718 		    "interrupt_rate", SYSCTL_DESCR("Interrupt Rate"),
   5719 		    ixgbe_sysctl_interrupt_rate_handler, 0,
   5720 		    (void *)&adapter->queues[i], 0, CTL_CREATE, CTL_EOL) != 0)
   5721 			break;
   5722 
   5723 		if (sysctl_createv(log, 0, &rnode, &cnode,
   5724 		    CTLFLAG_READONLY, CTLTYPE_QUAD,
   5725 		    "irqs", SYSCTL_DESCR("irqs on this queue"),
   5726 			NULL, 0, &(adapter->queues[i].irqs),
   5727 		    0, CTL_CREATE, CTL_EOL) != 0)
   5728 			break;
   5729 
   5730 		if (sysctl_createv(log, 0, &rnode, &cnode,
   5731 		    CTLFLAG_READONLY, CTLTYPE_INT,
   5732 		    "txd_head", SYSCTL_DESCR("Transmit Descriptor Head"),
   5733 		    ixgbe_sysctl_tdh_handler, 0, (void *)txr,
   5734 		    0, CTL_CREATE, CTL_EOL) != 0)
   5735 			break;
   5736 
   5737 		if (sysctl_createv(log, 0, &rnode, &cnode,
   5738 		    CTLFLAG_READONLY, CTLTYPE_INT,
   5739 		    "txd_tail", SYSCTL_DESCR("Transmit Descriptor Tail"),
   5740 		    ixgbe_sysctl_tdt_handler, 0, (void *)txr,
   5741 		    0, CTL_CREATE, CTL_EOL) != 0)
   5742 			break;
   5743 
   5744 		evcnt_attach_dynamic(&txr->tso_tx, EVCNT_TYPE_MISC,
   5745 		    NULL, device_xname(dev), "TSO");
   5746 		evcnt_attach_dynamic(&txr->no_desc_avail, EVCNT_TYPE_MISC,
   5747 		    NULL, adapter->queues[i].evnamebuf,
   5748 		    "Queue No Descriptor Available");
   5749 		evcnt_attach_dynamic(&txr->total_packets, EVCNT_TYPE_MISC,
   5750 		    NULL, adapter->queues[i].evnamebuf,
   5751 		    "Queue Packets Transmitted");
   5752 
   5753 #ifdef LRO
   5754 		struct lro_ctrl *lro = &rxr->lro;
   5755 #endif /* LRO */
   5756 
   5757 		if (sysctl_createv(log, 0, &rnode, &cnode,
   5758 		    CTLFLAG_READONLY,
   5759 		    CTLTYPE_INT,
   5760 		    "rxd_head", SYSCTL_DESCR("Receive Descriptor Head"),
   5761 		    ixgbe_sysctl_rdh_handler, 0, (void *)rxr, 0,
   5762 		    CTL_CREATE, CTL_EOL) != 0)
   5763 			break;
   5764 
   5765 		if (sysctl_createv(log, 0, &rnode, &cnode,
   5766 		    CTLFLAG_READONLY,
   5767 		    CTLTYPE_INT,
   5768 		    "rxd_tail", SYSCTL_DESCR("Receive Descriptor Tail"),
   5769 		    ixgbe_sysctl_rdt_handler, 0, (void *)rxr, 0,
   5770 		    CTL_CREATE, CTL_EOL) != 0)
   5771 			break;
   5772 
   5773 		if (i < __arraycount(adapter->stats.mpc)) {
   5774 			evcnt_attach_dynamic(&adapter->stats.mpc[i],
   5775 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   5776 			    "Missed Packet Count");
   5777 		}
   5778 		if (i < __arraycount(adapter->stats.pxontxc)) {
   5779 			evcnt_attach_dynamic(&adapter->stats.pxontxc[i],
   5780 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   5781 			    "pxontxc");
   5782 			evcnt_attach_dynamic(&adapter->stats.pxonrxc[i],
   5783 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   5784 			    "pxonrxc");
   5785 			evcnt_attach_dynamic(&adapter->stats.pxofftxc[i],
   5786 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   5787 			    "pxofftxc");
   5788 			evcnt_attach_dynamic(&adapter->stats.pxoffrxc[i],
   5789 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   5790 			    "pxoffrxc");
   5791 			evcnt_attach_dynamic(&adapter->stats.pxon2offc[i],
   5792 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   5793 			    "pxon2offc");
   5794 		}
   5795 		if (i < __arraycount(adapter->stats.qprc)) {
   5796 			evcnt_attach_dynamic(&adapter->stats.qprc[i],
   5797 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   5798 			    "qprc");
   5799 			evcnt_attach_dynamic(&adapter->stats.qptc[i],
   5800 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   5801 			    "qptc");
   5802 			evcnt_attach_dynamic(&adapter->stats.qbrc[i],
   5803 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   5804 			    "qbrc");
   5805 			evcnt_attach_dynamic(&adapter->stats.qbtc[i],
   5806 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   5807 			    "qbtc");
   5808 			evcnt_attach_dynamic(&adapter->stats.qprdc[i],
   5809 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   5810 			    "qprdc");
   5811 		}
   5812 
   5813 		evcnt_attach_dynamic(&rxr->rx_packets, EVCNT_TYPE_MISC,
   5814 		    NULL, adapter->queues[i].evnamebuf, "Queue Packets Received");
   5815 		evcnt_attach_dynamic(&rxr->rx_bytes, EVCNT_TYPE_MISC,
   5816 		    NULL, adapter->queues[i].evnamebuf, "Queue Bytes Received");
   5817 		evcnt_attach_dynamic(&rxr->rx_copies, EVCNT_TYPE_MISC,
   5818 		    NULL, adapter->queues[i].evnamebuf, "Copied RX Frames");
   5819 		evcnt_attach_dynamic(&rxr->no_jmbuf, EVCNT_TYPE_MISC,
   5820 		    NULL, adapter->queues[i].evnamebuf, "Rx no jumbo mbuf");
   5821 		evcnt_attach_dynamic(&rxr->rx_discarded, EVCNT_TYPE_MISC,
   5822 		    NULL, adapter->queues[i].evnamebuf, "Rx discarded");
   5823 		evcnt_attach_dynamic(&rxr->rx_irq, EVCNT_TYPE_MISC,
   5824 		    NULL, adapter->queues[i].evnamebuf, "Rx interrupts");
   5825 #ifdef LRO
   5826 		SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_queued",
   5827 				CTLFLAG_RD, &lro->lro_queued, 0,
   5828 				"LRO Queued");
   5829 		SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_flushed",
   5830 				CTLFLAG_RD, &lro->lro_flushed, 0,
   5831 				"LRO Flushed");
   5832 #endif /* LRO */
   5833 	}
   5834 
   5835 	/* MAC stats get the own sub node */
   5836 
   5837 
   5838 	snprintf(stats->namebuf,
   5839 	    sizeof(stats->namebuf), "%s MAC Statistics", device_xname(dev));
   5840 
   5841 	evcnt_attach_dynamic(&stats->ipcs, EVCNT_TYPE_MISC, NULL,
   5842 	    stats->namebuf, "rx csum offload - IP");
   5843 	evcnt_attach_dynamic(&stats->l4cs, EVCNT_TYPE_MISC, NULL,
   5844 	    stats->namebuf, "rx csum offload - L4");
   5845 	evcnt_attach_dynamic(&stats->ipcs_bad, EVCNT_TYPE_MISC, NULL,
   5846 	    stats->namebuf, "rx csum offload - IP bad");
   5847 	evcnt_attach_dynamic(&stats->l4cs_bad, EVCNT_TYPE_MISC, NULL,
   5848 	    stats->namebuf, "rx csum offload - L4 bad");
   5849 	evcnt_attach_dynamic(&stats->intzero, EVCNT_TYPE_MISC, NULL,
   5850 	    stats->namebuf, "Interrupt conditions zero");
   5851 	evcnt_attach_dynamic(&stats->legint, EVCNT_TYPE_MISC, NULL,
   5852 	    stats->namebuf, "Legacy interrupts");
   5853 	evcnt_attach_dynamic(&stats->crcerrs, EVCNT_TYPE_MISC, NULL,
   5854 	    stats->namebuf, "CRC Errors");
   5855 	evcnt_attach_dynamic(&stats->illerrc, EVCNT_TYPE_MISC, NULL,
   5856 	    stats->namebuf, "Illegal Byte Errors");
   5857 	evcnt_attach_dynamic(&stats->errbc, EVCNT_TYPE_MISC, NULL,
   5858 	    stats->namebuf, "Byte Errors");
   5859 	evcnt_attach_dynamic(&stats->mspdc, EVCNT_TYPE_MISC, NULL,
   5860 	    stats->namebuf, "MAC Short Packets Discarded");
   5861 	evcnt_attach_dynamic(&stats->mlfc, EVCNT_TYPE_MISC, NULL,
   5862 	    stats->namebuf, "MAC Local Faults");
   5863 	evcnt_attach_dynamic(&stats->mrfc, EVCNT_TYPE_MISC, NULL,
   5864 	    stats->namebuf, "MAC Remote Faults");
   5865 	evcnt_attach_dynamic(&stats->rlec, EVCNT_TYPE_MISC, NULL,
   5866 	    stats->namebuf, "Receive Length Errors");
   5867 	evcnt_attach_dynamic(&stats->lxontxc, EVCNT_TYPE_MISC, NULL,
   5868 	    stats->namebuf, "Link XON Transmitted");
   5869 	evcnt_attach_dynamic(&stats->lxonrxc, EVCNT_TYPE_MISC, NULL,
   5870 	    stats->namebuf, "Link XON Received");
   5871 	evcnt_attach_dynamic(&stats->lxofftxc, EVCNT_TYPE_MISC, NULL,
   5872 	    stats->namebuf, "Link XOFF Transmitted");
   5873 	evcnt_attach_dynamic(&stats->lxoffrxc, EVCNT_TYPE_MISC, NULL,
   5874 	    stats->namebuf, "Link XOFF Received");
   5875 
   5876 	/* Packet Reception Stats */
   5877 	evcnt_attach_dynamic(&stats->tor, EVCNT_TYPE_MISC, NULL,
   5878 	    stats->namebuf, "Total Octets Received");
   5879 	evcnt_attach_dynamic(&stats->gorc, EVCNT_TYPE_MISC, NULL,
   5880 	    stats->namebuf, "Good Octets Received");
   5881 	evcnt_attach_dynamic(&stats->tpr, EVCNT_TYPE_MISC, NULL,
   5882 	    stats->namebuf, "Total Packets Received");
   5883 	evcnt_attach_dynamic(&stats->gprc, EVCNT_TYPE_MISC, NULL,
   5884 	    stats->namebuf, "Good Packets Received");
   5885 	evcnt_attach_dynamic(&stats->mprc, EVCNT_TYPE_MISC, NULL,
   5886 	    stats->namebuf, "Multicast Packets Received");
   5887 	evcnt_attach_dynamic(&stats->bprc, EVCNT_TYPE_MISC, NULL,
   5888 	    stats->namebuf, "Broadcast Packets Received");
   5889 	evcnt_attach_dynamic(&stats->prc64, EVCNT_TYPE_MISC, NULL,
   5890 	    stats->namebuf, "64 byte frames received ");
   5891 	evcnt_attach_dynamic(&stats->prc127, EVCNT_TYPE_MISC, NULL,
   5892 	    stats->namebuf, "65-127 byte frames received");
   5893 	evcnt_attach_dynamic(&stats->prc255, EVCNT_TYPE_MISC, NULL,
   5894 	    stats->namebuf, "128-255 byte frames received");
   5895 	evcnt_attach_dynamic(&stats->prc511, EVCNT_TYPE_MISC, NULL,
   5896 	    stats->namebuf, "256-511 byte frames received");
   5897 	evcnt_attach_dynamic(&stats->prc1023, EVCNT_TYPE_MISC, NULL,
   5898 	    stats->namebuf, "512-1023 byte frames received");
   5899 	evcnt_attach_dynamic(&stats->prc1522, EVCNT_TYPE_MISC, NULL,
   5900 	    stats->namebuf, "1023-1522 byte frames received");
   5901 	evcnt_attach_dynamic(&stats->ruc, EVCNT_TYPE_MISC, NULL,
   5902 	    stats->namebuf, "Receive Undersized");
   5903 	evcnt_attach_dynamic(&stats->rfc, EVCNT_TYPE_MISC, NULL,
   5904 	    stats->namebuf, "Fragmented Packets Received ");
   5905 	evcnt_attach_dynamic(&stats->roc, EVCNT_TYPE_MISC, NULL,
   5906 	    stats->namebuf, "Oversized Packets Received");
   5907 	evcnt_attach_dynamic(&stats->rjc, EVCNT_TYPE_MISC, NULL,
   5908 	    stats->namebuf, "Received Jabber");
   5909 	evcnt_attach_dynamic(&stats->mngprc, EVCNT_TYPE_MISC, NULL,
   5910 	    stats->namebuf, "Management Packets Received");
   5911 	evcnt_attach_dynamic(&stats->xec, EVCNT_TYPE_MISC, NULL,
   5912 	    stats->namebuf, "Checksum Errors");
   5913 
   5914 	/* Packet Transmission Stats */
   5915 	evcnt_attach_dynamic(&stats->gotc, EVCNT_TYPE_MISC, NULL,
   5916 	    stats->namebuf, "Good Octets Transmitted");
   5917 	evcnt_attach_dynamic(&stats->tpt, EVCNT_TYPE_MISC, NULL,
   5918 	    stats->namebuf, "Total Packets Transmitted");
   5919 	evcnt_attach_dynamic(&stats->gptc, EVCNT_TYPE_MISC, NULL,
   5920 	    stats->namebuf, "Good Packets Transmitted");
   5921 	evcnt_attach_dynamic(&stats->bptc, EVCNT_TYPE_MISC, NULL,
   5922 	    stats->namebuf, "Broadcast Packets Transmitted");
   5923 	evcnt_attach_dynamic(&stats->mptc, EVCNT_TYPE_MISC, NULL,
   5924 	    stats->namebuf, "Multicast Packets Transmitted");
   5925 	evcnt_attach_dynamic(&stats->mngptc, EVCNT_TYPE_MISC, NULL,
   5926 	    stats->namebuf, "Management Packets Transmitted");
   5927 	evcnt_attach_dynamic(&stats->ptc64, EVCNT_TYPE_MISC, NULL,
   5928 	    stats->namebuf, "64 byte frames transmitted ");
   5929 	evcnt_attach_dynamic(&stats->ptc127, EVCNT_TYPE_MISC, NULL,
   5930 	    stats->namebuf, "65-127 byte frames transmitted");
   5931 	evcnt_attach_dynamic(&stats->ptc255, EVCNT_TYPE_MISC, NULL,
   5932 	    stats->namebuf, "128-255 byte frames transmitted");
   5933 	evcnt_attach_dynamic(&stats->ptc511, EVCNT_TYPE_MISC, NULL,
   5934 	    stats->namebuf, "256-511 byte frames transmitted");
   5935 	evcnt_attach_dynamic(&stats->ptc1023, EVCNT_TYPE_MISC, NULL,
   5936 	    stats->namebuf, "512-1023 byte frames transmitted");
   5937 	evcnt_attach_dynamic(&stats->ptc1522, EVCNT_TYPE_MISC, NULL,
   5938 	    stats->namebuf, "1024-1522 byte frames transmitted");
   5939 }
   5940 
   5941 /*
   5942 ** Set flow control using sysctl:
   5943 ** Flow control values:
   5944 ** 	0 - off
   5945 **	1 - rx pause
   5946 **	2 - tx pause
   5947 **	3 - full
   5948 */
   5949 static int
   5950 ixgbe_set_flowcntl(SYSCTLFN_ARGS)
   5951 {
   5952 	struct sysctlnode node;
   5953 	int error, last;
   5954 	struct adapter *adapter;
   5955 
   5956 	node = *rnode;
   5957 	adapter = (struct adapter *)node.sysctl_data;
   5958 	node.sysctl_data = &adapter->fc;
   5959 	last = adapter->fc;
   5960 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   5961 	if (error != 0 || newp == NULL)
   5962 		return error;
   5963 
   5964 	/* Don't bother if it's not changed */
   5965 	if (adapter->fc == last)
   5966 		return (0);
   5967 
   5968 	switch (adapter->fc) {
   5969 		case ixgbe_fc_rx_pause:
   5970 		case ixgbe_fc_tx_pause:
   5971 		case ixgbe_fc_full:
   5972 			adapter->hw.fc.requested_mode = adapter->fc;
   5973 			if (adapter->num_queues > 1)
   5974 				ixgbe_disable_rx_drop(adapter);
   5975 			break;
   5976 		case ixgbe_fc_none:
   5977 			adapter->hw.fc.requested_mode = ixgbe_fc_none;
   5978 			if (adapter->num_queues > 1)
   5979 				ixgbe_enable_rx_drop(adapter);
   5980 			break;
   5981 		default:
   5982 			adapter->fc = last;
   5983 			return (EINVAL);
   5984 	}
   5985 	/* Don't autoneg if forcing a value */
   5986 	adapter->hw.fc.disable_fc_autoneg = TRUE;
   5987 	ixgbe_fc_enable(&adapter->hw);
   5988 	return 0;
   5989 }
   5990 
   5991 /*
   5992 ** Control link advertise speed:
   5993 **	1 - advertise only 1G
   5994 **	2 - advertise 100Mb
   5995 **	3 - advertise normal
   5996 */
   5997 static int
   5998 ixgbe_set_advertise(SYSCTLFN_ARGS)
   5999 {
   6000 	struct sysctlnode	node;
   6001 	int			t, error = 0;
   6002 	struct adapter		*adapter;
   6003 	device_t		dev;
   6004 	struct ixgbe_hw		*hw;
   6005 	ixgbe_link_speed	speed, last;
   6006 
   6007 	node = *rnode;
   6008 	adapter = (struct adapter *)node.sysctl_data;
   6009 	dev = adapter->dev;
   6010 	hw = &adapter->hw;
   6011 	last = adapter->advertise;
   6012 	t = adapter->advertise;
   6013 	node.sysctl_data = &t;
   6014 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   6015 	if (error != 0 || newp == NULL)
   6016 		return error;
   6017 
   6018 	if (adapter->advertise == last) /* no change */
   6019 		return (0);
   6020 
   6021 	if (t == -1)
   6022 		return 0;
   6023 
   6024 	adapter->advertise = t;
   6025 
   6026 	if (!((hw->phy.media_type == ixgbe_media_type_copper) ||
   6027             (hw->phy.multispeed_fiber)))
   6028 		return (EINVAL);
   6029 
   6030 	if ((adapter->advertise == 2) && (hw->mac.type != ixgbe_mac_X540)) {
   6031 		device_printf(dev, "Set Advertise: 100Mb on X540 only\n");
   6032 		return (EINVAL);
   6033 	}
   6034 
   6035 	if (adapter->advertise == 1)
   6036                 speed = IXGBE_LINK_SPEED_1GB_FULL;
   6037 	else if (adapter->advertise == 2)
   6038                 speed = IXGBE_LINK_SPEED_100_FULL;
   6039 	else if (adapter->advertise == 3)
   6040                 speed = IXGBE_LINK_SPEED_1GB_FULL |
   6041 			IXGBE_LINK_SPEED_10GB_FULL;
   6042 	else {/* bogus value */
   6043 		adapter->advertise = last;
   6044 		return (EINVAL);
   6045 	}
   6046 
   6047 	hw->mac.autotry_restart = TRUE;
   6048 	hw->mac.ops.setup_link(hw, speed, TRUE);
   6049 
   6050 	return 0;
   6051 }
   6052 
   6053 /*
   6054 ** Thermal Shutdown Trigger
   6055 **   - cause a Thermal Overtemp IRQ
   6056 */
   6057 static int
   6058 ixgbe_set_thermal_test(SYSCTLFN_ARGS)
   6059 {
   6060 	struct sysctlnode node;
   6061 	int		error, fire = 0;
   6062 	struct adapter	*adapter;
   6063 	struct ixgbe_hw *hw;
   6064 
   6065 	node = *rnode;
   6066 	adapter = (struct adapter *)node.sysctl_data;
   6067 	hw = &adapter->hw;
   6068 
   6069 	if (hw->mac.type != ixgbe_mac_X540)
   6070 		return (0);
   6071 
   6072 	node.sysctl_data = &fire;
   6073 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   6074 	if ((error) || (newp == NULL))
   6075 		return (error);
   6076 
   6077 	if (fire) {
   6078 		u32 reg = IXGBE_READ_REG(hw, IXGBE_EICS);
   6079 		reg |= IXGBE_EICR_TS;
   6080 		IXGBE_WRITE_REG(hw, IXGBE_EICS, reg);
   6081 	}
   6082 
   6083 	return (0);
   6084 }
   6085 
   6086 /*
   6087 ** Enable the hardware to drop packets when the buffer is
   6088 ** full. This is useful when multiqueue,so that no single
   6089 ** queue being full stalls the entire RX engine. We only
   6090 ** enable this when Multiqueue AND when Flow Control is
   6091 ** disabled.
   6092 */
   6093 static void
   6094 ixgbe_enable_rx_drop(struct adapter *adapter)
   6095 {
   6096         struct ixgbe_hw *hw = &adapter->hw;
   6097 
   6098 	for (int i = 0; i < adapter->num_queues; i++) {
   6099         	u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
   6100         	srrctl |= IXGBE_SRRCTL_DROP_EN;
   6101         	IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
   6102 	}
   6103 }
   6104 
   6105 static void
   6106 ixgbe_disable_rx_drop(struct adapter *adapter)
   6107 {
   6108         struct ixgbe_hw *hw = &adapter->hw;
   6109 
   6110 	for (int i = 0; i < adapter->num_queues; i++) {
   6111         	u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
   6112         	srrctl &= ~IXGBE_SRRCTL_DROP_EN;
   6113         	IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
   6114 	}
   6115 }
   6116