Home | History | Annotate | Line # | Download | only in ixgbe
ixgbe.c revision 1.28
      1 /******************************************************************************
      2 
      3   Copyright (c) 2001-2013, Intel Corporation
      4   All rights reserved.
      5 
      6   Redistribution and use in source and binary forms, with or without
      7   modification, are permitted provided that the following conditions are met:
      8 
      9    1. Redistributions of source code must retain the above copyright notice,
     10       this list of conditions and the following disclaimer.
     11 
     12    2. Redistributions in binary form must reproduce the above copyright
     13       notice, this list of conditions and the following disclaimer in the
     14       documentation and/or other materials provided with the distribution.
     15 
     16    3. Neither the name of the Intel Corporation nor the names of its
     17       contributors may be used to endorse or promote products derived from
     18       this software without specific prior written permission.
     19 
     20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     30   POSSIBILITY OF SUCH DAMAGE.
     31 
     32 ******************************************************************************/
     33 /*
     34  * Copyright (c) 2011 The NetBSD Foundation, Inc.
     35  * All rights reserved.
     36  *
     37  * This code is derived from software contributed to The NetBSD Foundation
     38  * by Coyote Point Systems, Inc.
     39  *
     40  * Redistribution and use in source and binary forms, with or without
     41  * modification, are permitted provided that the following conditions
     42  * are met:
     43  * 1. Redistributions of source code must retain the above copyright
     44  *    notice, this list of conditions and the following disclaimer.
     45  * 2. Redistributions in binary form must reproduce the above copyright
     46  *    notice, this list of conditions and the following disclaimer in the
     47  *    documentation and/or other materials provided with the distribution.
     48  *
     49  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     50  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     51  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     52  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     53  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     54  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     55  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     56  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     57  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     58  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     59  * POSSIBILITY OF SUCH DAMAGE.
     60  */
     61 /*$FreeBSD: head/sys/dev/ixgbe/ixgbe.c 250108 2013-04-30 16:18:29Z luigi $*/
     62 /*$NetBSD: ixgbe.c,v 1.28 2015/04/24 07:00:51 msaitoh Exp $*/
     63 
     64 #include "opt_inet.h"
     65 #include "opt_inet6.h"
     66 
     67 #include "ixgbe.h"
     68 
     69 /*********************************************************************
     70  *  Set this to one to display debug statistics
     71  *********************************************************************/
     72 int             ixgbe_display_debug_stats = 0;
     73 
     74 /*********************************************************************
     75  *  Driver version
     76  *********************************************************************/
     77 char ixgbe_driver_version[] = "2.5.8 - HEAD";
     78 
     79 /*********************************************************************
     80  *  PCI Device ID Table
     81  *
     82  *  Used by probe to select devices to load on
     83  *  Last field stores an index into ixgbe_strings
     84  *  Last entry must be all 0s
     85  *
     86  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
     87  *********************************************************************/
     88 
     89 static ixgbe_vendor_info_t ixgbe_vendor_info_array[] =
     90 {
     91 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, 0, 0, 0},
     92 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, 0, 0, 0},
     93 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, 0, 0, 0},
     94 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, 0, 0, 0},
     95 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2, 0, 0, 0},
     96 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, 0, 0, 0},
     97 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, 0, 0, 0},
     98 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, 0, 0, 0},
     99 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, 0, 0, 0},
    100 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, 0, 0, 0},
    101 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, 0, 0, 0},
    102 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, 0, 0, 0},
    103 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ, 0, 0, 0},
    104 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, 0, 0, 0},
    105 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, 0, 0, 0},
    106 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, 0, 0, 0},
    107 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, 0, 0, 0},
    108 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, 0, 0, 0},
    109 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE, 0, 0, 0},
    110 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2, 0, 0, 0},
    111 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, 0, 0, 0},
    112 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, 0, 0, 0},
    113 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF_QP, 0, 0, 0},
    114 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, 0, 0, 0},
    115 	/* required last entry */
    116 	{0, 0, 0, 0, 0}
    117 };
    118 
    119 /*********************************************************************
    120  *  Table of branding strings
    121  *********************************************************************/
    122 
    123 static const char    *ixgbe_strings[] = {
    124 	"Intel(R) PRO/10GbE PCI-Express Network Driver"
    125 };
    126 
    127 /*********************************************************************
    128  *  Function prototypes
    129  *********************************************************************/
    130 static int      ixgbe_probe(device_t, cfdata_t, void *);
    131 static void     ixgbe_attach(device_t, device_t, void *);
    132 static int      ixgbe_detach(device_t, int);
    133 #if 0
    134 static int      ixgbe_shutdown(device_t);
    135 #endif
    136 #if IXGBE_LEGACY_TX
    137 static void     ixgbe_start(struct ifnet *);
    138 static void     ixgbe_start_locked(struct tx_ring *, struct ifnet *);
    139 #else
    140 static int	ixgbe_mq_start(struct ifnet *, struct mbuf *);
    141 static int	ixgbe_mq_start_locked(struct ifnet *,
    142                     struct tx_ring *, struct mbuf *);
    143 static void	ixgbe_qflush(struct ifnet *);
    144 static void	ixgbe_deferred_mq_start(void *);
    145 #endif
    146 static int      ixgbe_ioctl(struct ifnet *, u_long, void *);
    147 static void	ixgbe_ifstop(struct ifnet *, int);
    148 static int	ixgbe_init(struct ifnet *);
    149 static void	ixgbe_init_locked(struct adapter *);
    150 static void     ixgbe_stop(void *);
    151 static void     ixgbe_media_status(struct ifnet *, struct ifmediareq *);
    152 static int      ixgbe_media_change(struct ifnet *);
    153 static void     ixgbe_identify_hardware(struct adapter *);
    154 static int      ixgbe_allocate_pci_resources(struct adapter *,
    155 		    const struct pci_attach_args *);
    156 static int      ixgbe_allocate_msix(struct adapter *,
    157 		    const struct pci_attach_args *);
    158 static int      ixgbe_allocate_legacy(struct adapter *,
    159 		    const struct pci_attach_args *);
    160 static int	ixgbe_allocate_queues(struct adapter *);
    161 static int	ixgbe_setup_msix(struct adapter *);
    162 static void	ixgbe_free_pci_resources(struct adapter *);
    163 static void	ixgbe_local_timer(void *);
    164 static int	ixgbe_setup_interface(device_t, struct adapter *);
    165 static void	ixgbe_config_link(struct adapter *);
    166 
    167 static int      ixgbe_allocate_transmit_buffers(struct tx_ring *);
    168 static int	ixgbe_setup_transmit_structures(struct adapter *);
    169 static void	ixgbe_setup_transmit_ring(struct tx_ring *);
    170 static void     ixgbe_initialize_transmit_units(struct adapter *);
    171 static void     ixgbe_free_transmit_structures(struct adapter *);
    172 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
    173 
    174 static int      ixgbe_allocate_receive_buffers(struct rx_ring *);
    175 static int      ixgbe_setup_receive_structures(struct adapter *);
    176 static int	ixgbe_setup_receive_ring(struct rx_ring *);
    177 static void     ixgbe_initialize_receive_units(struct adapter *);
    178 static void     ixgbe_free_receive_structures(struct adapter *);
    179 static void     ixgbe_free_receive_buffers(struct rx_ring *);
    180 static void	ixgbe_setup_hw_rsc(struct rx_ring *);
    181 
    182 static void     ixgbe_enable_intr(struct adapter *);
    183 static void     ixgbe_disable_intr(struct adapter *);
    184 static void     ixgbe_update_stats_counters(struct adapter *);
    185 static bool	ixgbe_txeof(struct tx_ring *);
    186 static bool	ixgbe_rxeof(struct ix_queue *);
    187 static void	ixgbe_rx_checksum(u32, struct mbuf *, u32,
    188 		    struct ixgbe_hw_stats *);
    189 static void     ixgbe_set_promisc(struct adapter *);
    190 static void     ixgbe_set_multi(struct adapter *);
    191 static void     ixgbe_update_link_status(struct adapter *);
    192 static void	ixgbe_refresh_mbufs(struct rx_ring *, int);
    193 static int      ixgbe_xmit(struct tx_ring *, struct mbuf *);
    194 static int	ixgbe_set_flowcntl(SYSCTLFN_PROTO);
    195 static int	ixgbe_set_advertise(SYSCTLFN_PROTO);
    196 static int	ixgbe_set_thermal_test(SYSCTLFN_PROTO);
    197 static int	ixgbe_dma_malloc(struct adapter *, bus_size_t,
    198 		    struct ixgbe_dma_alloc *, int);
    199 static void     ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
    200 static int	ixgbe_tx_ctx_setup(struct tx_ring *,
    201 		    struct mbuf *, u32 *, u32 *);
    202 static int	ixgbe_tso_setup(struct tx_ring *,
    203 		    struct mbuf *, u32 *, u32 *);
    204 static void	ixgbe_set_ivar(struct adapter *, u8, u8, s8);
    205 static void	ixgbe_configure_ivars(struct adapter *);
    206 static u8 *	ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *);
    207 
    208 static void	ixgbe_setup_vlan_hw_support(struct adapter *);
    209 #if 0
    210 static void	ixgbe_register_vlan(void *, struct ifnet *, u16);
    211 static void	ixgbe_unregister_vlan(void *, struct ifnet *, u16);
    212 #endif
    213 
    214 static void     ixgbe_add_hw_stats(struct adapter *adapter);
    215 
    216 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
    217 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
    218 		    struct mbuf *, u32);
    219 
    220 static void	ixgbe_enable_rx_drop(struct adapter *);
    221 static void	ixgbe_disable_rx_drop(struct adapter *);
    222 
    223 /* Support for pluggable optic modules */
    224 static bool	ixgbe_sfp_probe(struct adapter *);
    225 static void	ixgbe_setup_optics(struct adapter *);
    226 
    227 /* Legacy (single vector interrupt handler */
    228 static int	ixgbe_legacy_irq(void *);
    229 
    230 #if defined(NETBSD_MSI_OR_MSIX)
    231 /* The MSI/X Interrupt handlers */
    232 static void	ixgbe_msix_que(void *);
    233 static void	ixgbe_msix_link(void *);
    234 #endif
    235 
    236 /* Software interrupts for deferred work */
    237 static void	ixgbe_handle_que(void *);
    238 static void	ixgbe_handle_link(void *);
    239 static void	ixgbe_handle_msf(void *);
    240 static void	ixgbe_handle_mod(void *);
    241 
    242 const struct sysctlnode *ixgbe_sysctl_instance(struct adapter *);
    243 static ixgbe_vendor_info_t *ixgbe_lookup(const struct pci_attach_args *);
    244 
    245 #ifdef IXGBE_FDIR
    246 static void	ixgbe_atr(struct tx_ring *, struct mbuf *);
    247 static void	ixgbe_reinit_fdir(void *, int);
    248 #endif
    249 
    250 /*********************************************************************
    251  *  FreeBSD Device Interface Entry Points
    252  *********************************************************************/
    253 
    254 CFATTACH_DECL3_NEW(ixg, sizeof(struct adapter),
    255     ixgbe_probe, ixgbe_attach, ixgbe_detach, NULL, NULL, NULL,
    256     DVF_DETACH_SHUTDOWN);
    257 
    258 #if 0
    259 devclass_t ixgbe_devclass;
    260 DRIVER_MODULE(ixgbe, pci, ixgbe_driver, ixgbe_devclass, 0, 0);
    261 
    262 MODULE_DEPEND(ixgbe, pci, 1, 1, 1);
    263 MODULE_DEPEND(ixgbe, ether, 1, 1, 1);
    264 #endif
    265 
    266 /*
    267 ** TUNEABLE PARAMETERS:
    268 */
    269 
    270 /*
    271 ** AIM: Adaptive Interrupt Moderation
    272 ** which means that the interrupt rate
    273 ** is varied over time based on the
    274 ** traffic for that interrupt vector
    275 */
    276 static int ixgbe_enable_aim = TRUE;
    277 #define TUNABLE_INT(__x, __y)
    278 TUNABLE_INT("hw.ixgbe.enable_aim", &ixgbe_enable_aim);
    279 
    280 static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY);
    281 TUNABLE_INT("hw.ixgbe.max_interrupt_rate", &ixgbe_max_interrupt_rate);
    282 
    283 /* How many packets rxeof tries to clean at a time */
    284 static int ixgbe_rx_process_limit = 256;
    285 TUNABLE_INT("hw.ixgbe.rx_process_limit", &ixgbe_rx_process_limit);
    286 
    287 /* How many packets txeof tries to clean at a time */
    288 static int ixgbe_tx_process_limit = 256;
    289 TUNABLE_INT("hw.ixgbe.tx_process_limit", &ixgbe_tx_process_limit);
    290 
    291 /*
    292 ** Smart speed setting, default to on
    293 ** this only works as a compile option
    294 ** right now as its during attach, set
    295 ** this to 'ixgbe_smart_speed_off' to
    296 ** disable.
    297 */
    298 static int ixgbe_smart_speed = ixgbe_smart_speed_on;
    299 
    300 /*
    301  * MSIX should be the default for best performance,
    302  * but this allows it to be forced off for testing.
    303  */
    304 static int ixgbe_enable_msix = 1;
    305 TUNABLE_INT("hw.ixgbe.enable_msix", &ixgbe_enable_msix);
    306 
    307 #if defined(NETBSD_MSI_OR_MSIX)
    308 /*
    309  * Number of Queues, can be set to 0,
    310  * it then autoconfigures based on the
    311  * number of cpus with a max of 8. This
    312  * can be overriden manually here.
    313  */
    314 static int ixgbe_num_queues = 0;
    315 TUNABLE_INT("hw.ixgbe.num_queues", &ixgbe_num_queues);
    316 #endif
    317 
    318 /*
    319 ** Number of TX descriptors per ring,
    320 ** setting higher than RX as this seems
    321 ** the better performing choice.
    322 */
    323 static int ixgbe_txd = PERFORM_TXD;
    324 TUNABLE_INT("hw.ixgbe.txd", &ixgbe_txd);
    325 
    326 /* Number of RX descriptors per ring */
    327 static int ixgbe_rxd = PERFORM_RXD;
    328 TUNABLE_INT("hw.ixgbe.rxd", &ixgbe_rxd);
    329 
    330 /*
    331 ** HW RSC control:
    332 **  this feature only works with
    333 **  IPv4, and only on 82599 and later.
    334 **  Also this will cause IP forwarding to
    335 **  fail and that can't be controlled by
    336 **  the stack as LRO can. For all these
    337 **  reasons I've deemed it best to leave
    338 **  this off and not bother with a tuneable
    339 **  interface, this would need to be compiled
    340 **  to enable.
    341 */
    342 static bool ixgbe_rsc_enable = FALSE;
    343 
    344 /* Keep running tab on them for sanity check */
    345 static int ixgbe_total_ports;
    346 
    347 #ifdef IXGBE_FDIR
    348 /*
    349 ** For Flow Director: this is the
    350 ** number of TX packets we sample
    351 ** for the filter pool, this means
    352 ** every 20th packet will be probed.
    353 **
    354 ** This feature can be disabled by
    355 ** setting this to 0.
    356 */
    357 static int atr_sample_rate = 20;
    358 /*
    359 ** Flow Director actually 'steals'
    360 ** part of the packet buffer as its
    361 ** filter pool, this variable controls
    362 ** how much it uses:
    363 **  0 = 64K, 1 = 128K, 2 = 256K
    364 */
    365 static int fdir_pballoc = 1;
    366 #endif
    367 
    368 #ifdef DEV_NETMAP
    369 /*
    370  * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
    371  * be a reference on how to implement netmap support in a driver.
    372  * Additional comments are in ixgbe_netmap.h .
    373  *
    374  * <dev/netmap/ixgbe_netmap.h> contains functions for netmap support
    375  * that extend the standard driver.
    376  */
    377 #include <dev/netmap/ixgbe_netmap.h>
    378 #endif /* DEV_NETMAP */
    379 
    380 /*********************************************************************
    381  *  Device identification routine
    382  *
    383  *  ixgbe_probe determines if the driver should be loaded on
    384  *  adapter based on PCI vendor/device id of the adapter.
    385  *
    386  *  return 1 on success, 0 on failure
    387  *********************************************************************/
    388 
    389 static int
    390 ixgbe_probe(device_t dev, cfdata_t cf, void *aux)
    391 {
    392 	const struct pci_attach_args *pa = aux;
    393 
    394 	return (ixgbe_lookup(pa) != NULL) ? 1 : 0;
    395 }
    396 
    397 static ixgbe_vendor_info_t *
    398 ixgbe_lookup(const struct pci_attach_args *pa)
    399 {
    400 	pcireg_t subid;
    401 	ixgbe_vendor_info_t *ent;
    402 
    403 	INIT_DEBUGOUT("ixgbe_probe: begin");
    404 
    405 	if (PCI_VENDOR(pa->pa_id) != IXGBE_INTEL_VENDOR_ID)
    406 		return NULL;
    407 
    408 	subid = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_SUBSYS_ID_REG);
    409 
    410 	for (ent = ixgbe_vendor_info_array; ent->vendor_id != 0; ent++) {
    411 		if (PCI_VENDOR(pa->pa_id) == ent->vendor_id &&
    412 		    PCI_PRODUCT(pa->pa_id) == ent->device_id &&
    413 
    414 		    (PCI_SUBSYS_VENDOR(subid) == ent->subvendor_id ||
    415 		     ent->subvendor_id == 0) &&
    416 
    417 		    (PCI_SUBSYS_ID(subid) == ent->subdevice_id ||
    418 		     ent->subdevice_id == 0)) {
    419 			++ixgbe_total_ports;
    420 			return ent;
    421 		}
    422 	}
    423 	return NULL;
    424 }
    425 
    426 
    427 static void
    428 ixgbe_sysctl_attach(struct adapter *adapter)
    429 {
    430 	struct sysctllog **log;
    431 	const struct sysctlnode *rnode, *cnode;
    432 	device_t dev;
    433 
    434 	dev = adapter->dev;
    435 	log = &adapter->sysctllog;
    436 
    437 	if ((rnode = ixgbe_sysctl_instance(adapter)) == NULL) {
    438 		aprint_error_dev(dev, "could not create sysctl root\n");
    439 		return;
    440 	}
    441 
    442 	if (sysctl_createv(log, 0, &rnode, &cnode,
    443 	    CTLFLAG_READONLY, CTLTYPE_INT,
    444 	    "num_rx_desc", SYSCTL_DESCR("Number of rx descriptors"),
    445 	    NULL, 0, &adapter->num_rx_desc, 0, CTL_CREATE, CTL_EOL) != 0)
    446 		aprint_error_dev(dev, "could not create sysctl\n");
    447 
    448 	if (sysctl_createv(log, 0, &rnode, &cnode,
    449 	    CTLFLAG_READONLY, CTLTYPE_INT,
    450 	    "num_queues", SYSCTL_DESCR("Number of queues"),
    451 	    NULL, 0, &adapter->num_queues, 0, CTL_CREATE, CTL_EOL) != 0)
    452 		aprint_error_dev(dev, "could not create sysctl\n");
    453 
    454 	if (sysctl_createv(log, 0, &rnode, &cnode,
    455 	    CTLFLAG_READWRITE, CTLTYPE_INT,
    456 	    "fc", SYSCTL_DESCR("Flow Control"),
    457 	    ixgbe_set_flowcntl, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0)
    458 		aprint_error_dev(dev, "could not create sysctl\n");
    459 
    460 	/* XXX This is an *instance* sysctl controlling a *global* variable.
    461 	 * XXX It's that way in the FreeBSD driver that this derives from.
    462 	 */
    463 	if (sysctl_createv(log, 0, &rnode, &cnode,
    464 	    CTLFLAG_READWRITE, CTLTYPE_INT,
    465 	    "enable_aim", SYSCTL_DESCR("Interrupt Moderation"),
    466 	    NULL, 0, &ixgbe_enable_aim, 0, CTL_CREATE, CTL_EOL) != 0)
    467 		aprint_error_dev(dev, "could not create sysctl\n");
    468 
    469 	if (sysctl_createv(log, 0, &rnode, &cnode,
    470 	    CTLFLAG_READWRITE, CTLTYPE_INT,
    471 	    "advertise_speed", SYSCTL_DESCR("Link Speed"),
    472 	    ixgbe_set_advertise, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0)
    473 		aprint_error_dev(dev, "could not create sysctl\n");
    474 
    475 	if (sysctl_createv(log, 0, &rnode, &cnode,
    476 	    CTLFLAG_READWRITE, CTLTYPE_INT,
    477 	    "ts", SYSCTL_DESCR("Thermal Test"),
    478 	    ixgbe_set_thermal_test, 0, (void *)adapter, 0, CTL_CREATE, CTL_EOL) != 0)
    479 		aprint_error_dev(dev, "could not create sysctl\n");
    480 }
    481 
    482 /*********************************************************************
    483  *  Device initialization routine
    484  *
    485  *  The attach entry point is called when the driver is being loaded.
    486  *  This routine identifies the type of hardware, allocates all resources
    487  *  and initializes the hardware.
    488  *
    489  *  return 0 on success, positive on failure
    490  *********************************************************************/
    491 
    492 static void
    493 ixgbe_attach(device_t parent, device_t dev, void *aux)
    494 {
    495 	struct adapter *adapter;
    496 	struct ixgbe_hw *hw;
    497 	int             error = 0;
    498 	u16		csum;
    499 	u32		ctrl_ext;
    500 	ixgbe_vendor_info_t *ent;
    501 	const struct pci_attach_args *pa = aux;
    502 
    503 	INIT_DEBUGOUT("ixgbe_attach: begin");
    504 
    505 	/* Allocate, clear, and link in our adapter structure */
    506 	adapter = device_private(dev);
    507 	adapter->dev = adapter->osdep.dev = dev;
    508 	hw = &adapter->hw;
    509 	adapter->osdep.pc = pa->pa_pc;
    510 	adapter->osdep.tag = pa->pa_tag;
    511 	adapter->osdep.dmat = pa->pa_dmat;
    512 
    513 	ent = ixgbe_lookup(pa);
    514 
    515 	KASSERT(ent != NULL);
    516 
    517 	aprint_normal(": %s, Version - %s\n",
    518 	    ixgbe_strings[ent->index], ixgbe_driver_version);
    519 
    520 	/* Core Lock Init*/
    521 	IXGBE_CORE_LOCK_INIT(adapter, device_xname(dev));
    522 
    523 	/* SYSCTL APIs */
    524 
    525 	ixgbe_sysctl_attach(adapter);
    526 
    527 	/* Set up the timer callout */
    528 	callout_init(&adapter->timer, 0);
    529 
    530 	/* Determine hardware revision */
    531 	ixgbe_identify_hardware(adapter);
    532 
    533 	/* Do base PCI setup - map BAR0 */
    534 	if (ixgbe_allocate_pci_resources(adapter, pa)) {
    535 		aprint_error_dev(dev, "Allocation of PCI resources failed\n");
    536 		error = ENXIO;
    537 		goto err_out;
    538 	}
    539 
    540 	/* Do descriptor calc and sanity checks */
    541 	if (((ixgbe_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 ||
    542 	    ixgbe_txd < MIN_TXD || ixgbe_txd > MAX_TXD) {
    543 		aprint_error_dev(dev, "TXD config issue, using default!\n");
    544 		adapter->num_tx_desc = DEFAULT_TXD;
    545 	} else
    546 		adapter->num_tx_desc = ixgbe_txd;
    547 
    548 	/*
    549 	** With many RX rings it is easy to exceed the
    550 	** system mbuf allocation. Tuning nmbclusters
    551 	** can alleviate this.
    552 	*/
    553 	if (nmbclusters > 0 ) {
    554 		int s;
    555 		s = (ixgbe_rxd * adapter->num_queues) * ixgbe_total_ports;
    556 		if (s > nmbclusters) {
    557 			aprint_error_dev(dev, "RX Descriptors exceed "
    558 			    "system mbuf max, using default instead!\n");
    559 			ixgbe_rxd = DEFAULT_RXD;
    560 		}
    561 	}
    562 
    563 	if (((ixgbe_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 ||
    564 	    ixgbe_rxd < MIN_TXD || ixgbe_rxd > MAX_TXD) {
    565 		aprint_error_dev(dev, "RXD config issue, using default!\n");
    566 		adapter->num_rx_desc = DEFAULT_RXD;
    567 	} else
    568 		adapter->num_rx_desc = ixgbe_rxd;
    569 
    570 	/* Allocate our TX/RX Queues */
    571 	if (ixgbe_allocate_queues(adapter)) {
    572 		error = ENOMEM;
    573 		goto err_out;
    574 	}
    575 
    576 	/* Allocate multicast array memory. */
    577 	adapter->mta = malloc(sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
    578 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
    579 	if (adapter->mta == NULL) {
    580 		aprint_error_dev(dev, "Cannot allocate multicast setup array\n");
    581 		error = ENOMEM;
    582 		goto err_late;
    583 	}
    584 
    585 	/* Initialize the shared code */
    586 	error = ixgbe_init_shared_code(hw);
    587 	if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
    588 		/*
    589 		** No optics in this port, set up
    590 		** so the timer routine will probe
    591 		** for later insertion.
    592 		*/
    593 		adapter->sfp_probe = TRUE;
    594 		error = 0;
    595 	} else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
    596 		aprint_error_dev(dev,"Unsupported SFP+ module detected!\n");
    597 		error = EIO;
    598 		goto err_late;
    599 	} else if (error) {
    600 		aprint_error_dev(dev,"Unable to initialize the shared code\n");
    601 		error = EIO;
    602 		goto err_late;
    603 	}
    604 
    605 	/* Make sure we have a good EEPROM before we read from it */
    606 	if (ixgbe_validate_eeprom_checksum(&adapter->hw, &csum) < 0) {
    607 		aprint_error_dev(dev,"The EEPROM Checksum Is Not Valid\n");
    608 		error = EIO;
    609 		goto err_late;
    610 	}
    611 
    612 	error = ixgbe_init_hw(hw);
    613 	switch (error) {
    614 	case IXGBE_ERR_EEPROM_VERSION:
    615 		aprint_error_dev(dev, "This device is a pre-production adapter/"
    616 		    "LOM.  Please be aware there may be issues associated "
    617 		    "with your hardware.\n If you are experiencing problems "
    618 		    "please contact your Intel or hardware representative "
    619 		    "who provided you with this hardware.\n");
    620 		break;
    621 	case IXGBE_ERR_SFP_NOT_SUPPORTED:
    622 		aprint_error_dev(dev,"Unsupported SFP+ Module\n");
    623 		error = EIO;
    624 		aprint_error_dev(dev,"Hardware Initialization Failure\n");
    625 		goto err_late;
    626 	case IXGBE_ERR_SFP_NOT_PRESENT:
    627 		device_printf(dev,"No SFP+ Module found\n");
    628 		/* falls thru */
    629 	default:
    630 		break;
    631 	}
    632 
    633 	/* Detect and set physical type */
    634 	ixgbe_setup_optics(adapter);
    635 
    636 	if ((adapter->msix > 1) && (ixgbe_enable_msix))
    637 		error = ixgbe_allocate_msix(adapter, pa);
    638 	else
    639 		error = ixgbe_allocate_legacy(adapter, pa);
    640 	if (error)
    641 		goto err_late;
    642 
    643 	/* Setup OS specific network interface */
    644 	if (ixgbe_setup_interface(dev, adapter) != 0)
    645 		goto err_late;
    646 
    647 	/* Initialize statistics */
    648 	ixgbe_update_stats_counters(adapter);
    649 
    650         /* Print PCIE bus type/speed/width info */
    651 	ixgbe_get_bus_info(hw);
    652 	aprint_normal_dev(dev,"PCI Express Bus: Speed %s %s\n",
    653 	    ((hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0Gb/s":
    654 	    (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5Gb/s":"Unknown"),
    655 	    (hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" :
    656 	    (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" :
    657 	    (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" :
    658 	    ("Unknown"));
    659 
    660 	if ((hw->bus.width <= ixgbe_bus_width_pcie_x4) &&
    661 	    (hw->bus.speed == ixgbe_bus_speed_2500)) {
    662 		aprint_error_dev(dev, "PCI-Express bandwidth available"
    663 		    " for this card\n     is not sufficient for"
    664 		    " optimal performance.\n");
    665 		aprint_error_dev(dev, "For optimal performance a x8 "
    666 		    "PCIE, or x4 PCIE 2 slot is required.\n");
    667         }
    668 
    669 	/* Set an initial default flow control value */
    670 	adapter->fc =  ixgbe_fc_full;
    671 
    672 	/* let hardware know driver is loaded */
    673 	ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
    674 	ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
    675 	IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
    676 
    677 	ixgbe_add_hw_stats(adapter);
    678 
    679 #ifdef DEV_NETMAP
    680 	ixgbe_netmap_attach(adapter);
    681 #endif /* DEV_NETMAP */
    682 	INIT_DEBUGOUT("ixgbe_attach: end");
    683 	return;
    684 err_late:
    685 	ixgbe_free_transmit_structures(adapter);
    686 	ixgbe_free_receive_structures(adapter);
    687 err_out:
    688 	if (adapter->ifp != NULL)
    689 		if_free(adapter->ifp);
    690 	ixgbe_free_pci_resources(adapter);
    691 	if (adapter->mta != NULL)
    692 		free(adapter->mta, M_DEVBUF);
    693 	return;
    694 
    695 }
    696 
    697 /*********************************************************************
    698  *  Device removal routine
    699  *
    700  *  The detach entry point is called when the driver is being removed.
    701  *  This routine stops the adapter and deallocates all the resources
    702  *  that were allocated for driver operation.
    703  *
    704  *  return 0 on success, positive on failure
    705  *********************************************************************/
    706 
    707 static int
    708 ixgbe_detach(device_t dev, int flags)
    709 {
    710 	struct adapter *adapter = device_private(dev);
    711 	struct rx_ring *rxr = adapter->rx_rings;
    712 	struct ixgbe_hw_stats *stats = &adapter->stats;
    713 	struct ix_queue *que = adapter->queues;
    714 	struct tx_ring *txr = adapter->tx_rings;
    715 	u32	ctrl_ext;
    716 
    717 	INIT_DEBUGOUT("ixgbe_detach: begin");
    718 
    719 	/* Make sure VLANs are not using driver */
    720 	if (!VLAN_ATTACHED(&adapter->osdep.ec))
    721 		;	/* nothing to do: no VLANs */
    722 	else if ((flags & (DETACH_SHUTDOWN|DETACH_FORCE)) != 0)
    723 		vlan_ifdetach(adapter->ifp);
    724 	else {
    725 		aprint_error_dev(dev, "VLANs in use\n");
    726 		return EBUSY;
    727 	}
    728 
    729 	IXGBE_CORE_LOCK(adapter);
    730 	ixgbe_stop(adapter);
    731 	IXGBE_CORE_UNLOCK(adapter);
    732 
    733 	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
    734 #ifndef IXGBE_LEGACY_TX
    735 		softint_disestablish(txr->txq_si);
    736 #endif
    737 		softint_disestablish(que->que_si);
    738 	}
    739 
    740 	/* Drain the Link queue */
    741 	softint_disestablish(adapter->link_si);
    742 	softint_disestablish(adapter->mod_si);
    743 	softint_disestablish(adapter->msf_si);
    744 #ifdef IXGBE_FDIR
    745 	softint_disestablish(adapter->fdir_si);
    746 #endif
    747 
    748 	/* let hardware know driver is unloading */
    749 	ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
    750 	ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
    751 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext);
    752 
    753 	ether_ifdetach(adapter->ifp);
    754 	callout_halt(&adapter->timer, NULL);
    755 #ifdef DEV_NETMAP
    756 	netmap_detach(adapter->ifp);
    757 #endif /* DEV_NETMAP */
    758 	ixgbe_free_pci_resources(adapter);
    759 #if 0	/* XXX the NetBSD port is probably missing something here */
    760 	bus_generic_detach(dev);
    761 #endif
    762 	if_detach(adapter->ifp);
    763 
    764 	sysctl_teardown(&adapter->sysctllog);
    765 	evcnt_detach(&adapter->handleq);
    766 	evcnt_detach(&adapter->req);
    767 	evcnt_detach(&adapter->morerx);
    768 	evcnt_detach(&adapter->moretx);
    769 	evcnt_detach(&adapter->txloops);
    770 	evcnt_detach(&adapter->efbig_tx_dma_setup);
    771 	evcnt_detach(&adapter->m_defrag_failed);
    772 	evcnt_detach(&adapter->efbig2_tx_dma_setup);
    773 	evcnt_detach(&adapter->einval_tx_dma_setup);
    774 	evcnt_detach(&adapter->other_tx_dma_setup);
    775 	evcnt_detach(&adapter->eagain_tx_dma_setup);
    776 	evcnt_detach(&adapter->enomem_tx_dma_setup);
    777 	evcnt_detach(&adapter->watchdog_events);
    778 	evcnt_detach(&adapter->tso_err);
    779 	evcnt_detach(&adapter->link_irq);
    780 
    781 	txr = adapter->tx_rings;
    782 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
    783 		evcnt_detach(&txr->no_desc_avail);
    784 		evcnt_detach(&txr->total_packets);
    785 		evcnt_detach(&txr->tso_tx);
    786 
    787 		if (i < __arraycount(adapter->stats.mpc)) {
    788 			evcnt_detach(&adapter->stats.mpc[i]);
    789 		}
    790 		if (i < __arraycount(adapter->stats.pxontxc)) {
    791 			evcnt_detach(&adapter->stats.pxontxc[i]);
    792 			evcnt_detach(&adapter->stats.pxonrxc[i]);
    793 			evcnt_detach(&adapter->stats.pxofftxc[i]);
    794 			evcnt_detach(&adapter->stats.pxoffrxc[i]);
    795 			evcnt_detach(&adapter->stats.pxon2offc[i]);
    796 		}
    797 		if (i < __arraycount(adapter->stats.qprc)) {
    798 			evcnt_detach(&adapter->stats.qprc[i]);
    799 			evcnt_detach(&adapter->stats.qptc[i]);
    800 			evcnt_detach(&adapter->stats.qbrc[i]);
    801 			evcnt_detach(&adapter->stats.qbtc[i]);
    802 			evcnt_detach(&adapter->stats.qprdc[i]);
    803 		}
    804 
    805 		evcnt_detach(&rxr->rx_packets);
    806 		evcnt_detach(&rxr->rx_bytes);
    807 		evcnt_detach(&rxr->no_jmbuf);
    808 		evcnt_detach(&rxr->rx_discarded);
    809 		evcnt_detach(&rxr->rx_irq);
    810 	}
    811 	evcnt_detach(&stats->ipcs);
    812 	evcnt_detach(&stats->l4cs);
    813 	evcnt_detach(&stats->ipcs_bad);
    814 	evcnt_detach(&stats->l4cs_bad);
    815 	evcnt_detach(&stats->intzero);
    816 	evcnt_detach(&stats->legint);
    817 	evcnt_detach(&stats->crcerrs);
    818 	evcnt_detach(&stats->illerrc);
    819 	evcnt_detach(&stats->errbc);
    820 	evcnt_detach(&stats->mspdc);
    821 	evcnt_detach(&stats->mlfc);
    822 	evcnt_detach(&stats->mrfc);
    823 	evcnt_detach(&stats->rlec);
    824 	evcnt_detach(&stats->lxontxc);
    825 	evcnt_detach(&stats->lxonrxc);
    826 	evcnt_detach(&stats->lxofftxc);
    827 	evcnt_detach(&stats->lxoffrxc);
    828 
    829 	/* Packet Reception Stats */
    830 	evcnt_detach(&stats->tor);
    831 	evcnt_detach(&stats->gorc);
    832 	evcnt_detach(&stats->tpr);
    833 	evcnt_detach(&stats->gprc);
    834 	evcnt_detach(&stats->mprc);
    835 	evcnt_detach(&stats->bprc);
    836 	evcnt_detach(&stats->prc64);
    837 	evcnt_detach(&stats->prc127);
    838 	evcnt_detach(&stats->prc255);
    839 	evcnt_detach(&stats->prc511);
    840 	evcnt_detach(&stats->prc1023);
    841 	evcnt_detach(&stats->prc1522);
    842 	evcnt_detach(&stats->ruc);
    843 	evcnt_detach(&stats->rfc);
    844 	evcnt_detach(&stats->roc);
    845 	evcnt_detach(&stats->rjc);
    846 	evcnt_detach(&stats->mngprc);
    847 	evcnt_detach(&stats->xec);
    848 
    849 	/* Packet Transmission Stats */
    850 	evcnt_detach(&stats->gotc);
    851 	evcnt_detach(&stats->tpt);
    852 	evcnt_detach(&stats->gptc);
    853 	evcnt_detach(&stats->bptc);
    854 	evcnt_detach(&stats->mptc);
    855 	evcnt_detach(&stats->mngptc);
    856 	evcnt_detach(&stats->ptc64);
    857 	evcnt_detach(&stats->ptc127);
    858 	evcnt_detach(&stats->ptc255);
    859 	evcnt_detach(&stats->ptc511);
    860 	evcnt_detach(&stats->ptc1023);
    861 	evcnt_detach(&stats->ptc1522);
    862 
    863 	ixgbe_free_transmit_structures(adapter);
    864 	ixgbe_free_receive_structures(adapter);
    865 	free(adapter->mta, M_DEVBUF);
    866 
    867 	IXGBE_CORE_LOCK_DESTROY(adapter);
    868 	return (0);
    869 }
    870 
    871 /*********************************************************************
    872  *
    873  *  Shutdown entry point
    874  *
    875  **********************************************************************/
    876 
    877 #if 0 /* XXX NetBSD ought to register something like this through pmf(9) */
    878 static int
    879 ixgbe_shutdown(device_t dev)
    880 {
    881 	struct adapter *adapter = device_private(dev);
    882 	IXGBE_CORE_LOCK(adapter);
    883 	ixgbe_stop(adapter);
    884 	IXGBE_CORE_UNLOCK(adapter);
    885 	return (0);
    886 }
    887 #endif
    888 
    889 
    890 #ifdef IXGBE_LEGACY_TX
    891 /*********************************************************************
    892  *  Transmit entry point
    893  *
    894  *  ixgbe_start is called by the stack to initiate a transmit.
    895  *  The driver will remain in this routine as long as there are
    896  *  packets to transmit and transmit resources are available.
    897  *  In case resources are not available stack is notified and
    898  *  the packet is requeued.
    899  **********************************************************************/
    900 
    901 static void
    902 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
    903 {
    904 	int rc;
    905 	struct mbuf    *m_head;
    906 	struct adapter *adapter = txr->adapter;
    907 
    908 	IXGBE_TX_LOCK_ASSERT(txr);
    909 
    910 	if ((ifp->if_flags & IFF_RUNNING) == 0)
    911 		return;
    912 	if (!adapter->link_active)
    913 		return;
    914 
    915 	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
    916 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
    917 			break;
    918 
    919 		IFQ_POLL(&ifp->if_snd, m_head);
    920 		if (m_head == NULL)
    921 			break;
    922 
    923 		if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
    924 			break;
    925 		}
    926 		IFQ_DEQUEUE(&ifp->if_snd, m_head);
    927 		if (rc == EFBIG) {
    928 			struct mbuf *mtmp;
    929 
    930 			if ((mtmp = m_defrag(m_head, M_NOWAIT)) != NULL) {
    931 				m_head = mtmp;
    932 				rc = ixgbe_xmit(txr, m_head);
    933 				if (rc != 0)
    934 					adapter->efbig2_tx_dma_setup.ev_count++;
    935 			} else
    936 				adapter->m_defrag_failed.ev_count++;
    937 		}
    938 		if (rc != 0) {
    939 			m_freem(m_head);
    940 			continue;
    941 		}
    942 
    943 		/* Send a copy of the frame to the BPF listener */
    944 		bpf_mtap(ifp, m_head);
    945 
    946 		/* Set watchdog on */
    947 		getmicrotime(&txr->watchdog_time);
    948 		txr->queue_status = IXGBE_QUEUE_WORKING;
    949 
    950 	}
    951 	return;
    952 }
    953 
    954 /*
    955  * Legacy TX start - called by the stack, this
    956  * always uses the first tx ring, and should
    957  * not be used with multiqueue tx enabled.
    958  */
    959 static void
    960 ixgbe_start(struct ifnet *ifp)
    961 {
    962 	struct adapter *adapter = ifp->if_softc;
    963 	struct tx_ring	*txr = adapter->tx_rings;
    964 
    965 	if (ifp->if_flags & IFF_RUNNING) {
    966 		IXGBE_TX_LOCK(txr);
    967 		ixgbe_start_locked(txr, ifp);
    968 		IXGBE_TX_UNLOCK(txr);
    969 	}
    970 	return;
    971 }
    972 
    973 #else /* ! IXGBE_LEGACY_TX */
    974 
    975 /*
    976 ** Multiqueue Transmit driver
    977 **
    978 */
    979 static int
    980 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
    981 {
    982 	struct adapter	*adapter = ifp->if_softc;
    983 	struct ix_queue	*que;
    984 	struct tx_ring	*txr;
    985 	int 		i = 0, err = 0;
    986 
    987 	/* Which queue to use */
    988 	if ((m->m_flags & M_FLOWID) != 0)
    989 		i = m->m_pkthdr.flowid % adapter->num_queues;
    990 	else
    991 		i = cpu_index(curcpu()) % adapter->num_queues;
    992 
    993 	txr = &adapter->tx_rings[i];
    994 	que = &adapter->queues[i];
    995 
    996 	if (IXGBE_TX_TRYLOCK(txr)) {
    997 		err = ixgbe_mq_start_locked(ifp, txr, m);
    998 		IXGBE_TX_UNLOCK(txr);
    999 	} else {
   1000 		err = drbr_enqueue(ifp, txr->br, m);
   1001 		softint_schedule(txr->txq_si);
   1002 	}
   1003 
   1004 	return (err);
   1005 }
   1006 
   1007 static int
   1008 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
   1009 {
   1010 	struct adapter  *adapter = txr->adapter;
   1011         struct mbuf     *next;
   1012         int             enqueued, err = 0;
   1013 
   1014 	if (((ifp->if_flags & IFF_RUNNING) == 0) ||
   1015 	    adapter->link_active == 0) {
   1016 		if (m != NULL)
   1017 			err = drbr_enqueue(ifp, txr->br, m);
   1018 		return (err);
   1019 	}
   1020 
   1021 	enqueued = 0;
   1022 	if (m != NULL) {
   1023 		err = drbr_enqueue(ifp, txr->br, m);
   1024 		if (err) {
   1025 			return (err);
   1026 		}
   1027 	}
   1028 
   1029 	/* Process the queue */
   1030 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
   1031 		if ((err = ixgbe_xmit(txr, &next)) != 0) {
   1032 			if (next == NULL) {
   1033 				drbr_advance(ifp, txr->br);
   1034 			} else {
   1035 				drbr_putback(ifp, txr->br, next);
   1036 			}
   1037 			break;
   1038 		}
   1039 		drbr_advance(ifp, txr->br);
   1040 		enqueued++;
   1041 		/* Send a copy of the frame to the BPF listener */
   1042 		bpf_mtap(ifp, next);
   1043 		if ((ifp->if_flags & IFF_RUNNING) == 0)
   1044 			break;
   1045 		if (txr->tx_avail < IXGBE_TX_OP_THRESHOLD)
   1046 			ixgbe_txeof(txr);
   1047 	}
   1048 
   1049 	if (enqueued > 0) {
   1050 		/* Set watchdog on */
   1051 		txr->queue_status = IXGBE_QUEUE_WORKING;
   1052 		getmicrotime(&txr->watchdog_time);
   1053 	}
   1054 
   1055 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
   1056 		ixgbe_txeof(txr);
   1057 
   1058 	return (err);
   1059 }
   1060 
   1061 /*
   1062  * Called from a taskqueue to drain queued transmit packets.
   1063  */
   1064 static void
   1065 ixgbe_deferred_mq_start(void *arg)
   1066 {
   1067 	struct tx_ring *txr = arg;
   1068 	struct adapter *adapter = txr->adapter;
   1069 	struct ifnet *ifp = adapter->ifp;
   1070 
   1071 	IXGBE_TX_LOCK(txr);
   1072 	if (!drbr_empty(ifp, txr->br))
   1073 		ixgbe_mq_start_locked(ifp, txr, NULL);
   1074 	IXGBE_TX_UNLOCK(txr);
   1075 }
   1076 
   1077 /*
   1078 ** Flush all ring buffers
   1079 */
   1080 static void
   1081 ixgbe_qflush(struct ifnet *ifp)
   1082 {
   1083 	struct adapter	*adapter = ifp->if_softc;
   1084 	struct tx_ring	*txr = adapter->tx_rings;
   1085 	struct mbuf	*m;
   1086 
   1087 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
   1088 		IXGBE_TX_LOCK(txr);
   1089 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
   1090 			m_freem(m);
   1091 		IXGBE_TX_UNLOCK(txr);
   1092 	}
   1093 	if_qflush(ifp);
   1094 }
   1095 #endif /* IXGBE_LEGACY_TX */
   1096 
   1097 static int
   1098 ixgbe_ifflags_cb(struct ethercom *ec)
   1099 {
   1100 	struct ifnet *ifp = &ec->ec_if;
   1101 	struct adapter *adapter = ifp->if_softc;
   1102 	int change = ifp->if_flags ^ adapter->if_flags, rc = 0;
   1103 
   1104 	IXGBE_CORE_LOCK(adapter);
   1105 
   1106 	if (change != 0)
   1107 		adapter->if_flags = ifp->if_flags;
   1108 
   1109 	if ((change & ~(IFF_CANTCHANGE|IFF_DEBUG)) != 0)
   1110 		rc = ENETRESET;
   1111 	else if ((change & (IFF_PROMISC | IFF_ALLMULTI)) != 0)
   1112 		ixgbe_set_promisc(adapter);
   1113 
   1114 	/* Set up VLAN support and filter */
   1115 	ixgbe_setup_vlan_hw_support(adapter);
   1116 
   1117 	IXGBE_CORE_UNLOCK(adapter);
   1118 
   1119 	return rc;
   1120 }
   1121 
   1122 /*********************************************************************
   1123  *  Ioctl entry point
   1124  *
   1125  *  ixgbe_ioctl is called when the user wants to configure the
   1126  *  interface.
   1127  *
   1128  *  return 0 on success, positive on failure
   1129  **********************************************************************/
   1130 
   1131 static int
   1132 ixgbe_ioctl(struct ifnet * ifp, u_long command, void *data)
   1133 {
   1134 	struct adapter	*adapter = ifp->if_softc;
   1135 	struct ixgbe_hw *hw = &adapter->hw;
   1136 	struct ifcapreq *ifcr = data;
   1137 	struct ifreq	*ifr = data;
   1138 	int             error = 0;
   1139 	int l4csum_en;
   1140 	const int l4csum = IFCAP_CSUM_TCPv4_Rx|IFCAP_CSUM_UDPv4_Rx|
   1141 	     IFCAP_CSUM_TCPv6_Rx|IFCAP_CSUM_UDPv6_Rx;
   1142 
   1143 	switch (command) {
   1144 	case SIOCSIFFLAGS:
   1145 		IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)");
   1146 		break;
   1147 	case SIOCADDMULTI:
   1148 	case SIOCDELMULTI:
   1149 		IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI");
   1150 		break;
   1151 	case SIOCSIFMEDIA:
   1152 	case SIOCGIFMEDIA:
   1153 		IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)");
   1154 		break;
   1155 	case SIOCSIFCAP:
   1156 		IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)");
   1157 		break;
   1158 	case SIOCSIFMTU:
   1159 		IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)");
   1160 		break;
   1161 	default:
   1162 		IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command);
   1163 		break;
   1164 	}
   1165 
   1166 	switch (command) {
   1167 	case SIOCSIFMEDIA:
   1168 	case SIOCGIFMEDIA:
   1169 		return ifmedia_ioctl(ifp, ifr, &adapter->media, command);
   1170 	case SIOCGI2C:
   1171 	{
   1172 		struct ixgbe_i2c_req	i2c;
   1173 		IOCTL_DEBUGOUT("ioctl: SIOCGI2C (Get I2C Data)");
   1174 		error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
   1175 		if (error)
   1176 			break;
   1177 		if ((i2c.dev_addr != 0xA0) || (i2c.dev_addr != 0xA2)){
   1178 			error = EINVAL;
   1179 			break;
   1180 		}
   1181 		hw->phy.ops.read_i2c_byte(hw, i2c.offset,
   1182 		    i2c.dev_addr, i2c.data);
   1183 		error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
   1184 		break;
   1185 	}
   1186 	case SIOCSIFCAP:
   1187 		/* Layer-4 Rx checksum offload has to be turned on and
   1188 		 * off as a unit.
   1189 		 */
   1190 		l4csum_en = ifcr->ifcr_capenable & l4csum;
   1191 		if (l4csum_en != l4csum && l4csum_en != 0)
   1192 			return EINVAL;
   1193 		/*FALLTHROUGH*/
   1194 	case SIOCADDMULTI:
   1195 	case SIOCDELMULTI:
   1196 	case SIOCSIFFLAGS:
   1197 	case SIOCSIFMTU:
   1198 	default:
   1199 		if ((error = ether_ioctl(ifp, command, data)) != ENETRESET)
   1200 			return error;
   1201 		if ((ifp->if_flags & IFF_RUNNING) == 0)
   1202 			;
   1203 		else if (command == SIOCSIFCAP || command == SIOCSIFMTU) {
   1204 			IXGBE_CORE_LOCK(adapter);
   1205 			ixgbe_init_locked(adapter);
   1206 			IXGBE_CORE_UNLOCK(adapter);
   1207 		} else if (command == SIOCADDMULTI || command == SIOCDELMULTI) {
   1208 			/*
   1209 			 * Multicast list has changed; set the hardware filter
   1210 			 * accordingly.
   1211 			 */
   1212 			IXGBE_CORE_LOCK(adapter);
   1213 			ixgbe_disable_intr(adapter);
   1214 			ixgbe_set_multi(adapter);
   1215 			ixgbe_enable_intr(adapter);
   1216 			IXGBE_CORE_UNLOCK(adapter);
   1217 		}
   1218 		return 0;
   1219 	}
   1220 
   1221 	return error;
   1222 }
   1223 
   1224 /*********************************************************************
   1225  *  Init entry point
   1226  *
   1227  *  This routine is used in two ways. It is used by the stack as
   1228  *  init entry point in network interface structure. It is also used
   1229  *  by the driver as a hw/sw initialization routine to get to a
   1230  *  consistent state.
   1231  *
   1232  *  return 0 on success, positive on failure
   1233  **********************************************************************/
   1234 #define IXGBE_MHADD_MFS_SHIFT 16
   1235 
   1236 static void
   1237 ixgbe_init_locked(struct adapter *adapter)
   1238 {
   1239 	struct ifnet   *ifp = adapter->ifp;
   1240 	device_t 	dev = adapter->dev;
   1241 	struct ixgbe_hw *hw = &adapter->hw;
   1242 	u32		k, txdctl, mhadd, gpie;
   1243 	u32		rxdctl, rxctrl;
   1244 
   1245 	/* XXX check IFF_UP and IFF_RUNNING, power-saving state! */
   1246 
   1247 	KASSERT(mutex_owned(&adapter->core_mtx));
   1248 	INIT_DEBUGOUT("ixgbe_init: begin");
   1249 	hw->adapter_stopped = FALSE;
   1250 	ixgbe_stop_adapter(hw);
   1251         callout_stop(&adapter->timer);
   1252 
   1253 	/* XXX I moved this here from the SIOCSIFMTU case in ixgbe_ioctl(). */
   1254 	adapter->max_frame_size =
   1255 		ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
   1256 
   1257         /* reprogram the RAR[0] in case user changed it. */
   1258         ixgbe_set_rar(hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
   1259 
   1260 	/* Get the latest mac address, User can use a LAA */
   1261 	memcpy(hw->mac.addr, CLLADDR(adapter->ifp->if_sadl),
   1262 	    IXGBE_ETH_LENGTH_OF_ADDRESS);
   1263 	ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
   1264 	hw->addr_ctrl.rar_used_count = 1;
   1265 
   1266 	/* Prepare transmit descriptors and buffers */
   1267 	if (ixgbe_setup_transmit_structures(adapter)) {
   1268 		device_printf(dev,"Could not setup transmit structures\n");
   1269 		ixgbe_stop(adapter);
   1270 		return;
   1271 	}
   1272 
   1273 	ixgbe_init_hw(hw);
   1274 	ixgbe_initialize_transmit_units(adapter);
   1275 
   1276 	/* Setup Multicast table */
   1277 	ixgbe_set_multi(adapter);
   1278 
   1279 	/*
   1280 	** Determine the correct mbuf pool
   1281 	** for doing jumbo frames
   1282 	*/
   1283 	if (adapter->max_frame_size <= 2048)
   1284 		adapter->rx_mbuf_sz = MCLBYTES;
   1285 	else if (adapter->max_frame_size <= 4096)
   1286 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
   1287 	else if (adapter->max_frame_size <= 9216)
   1288 		adapter->rx_mbuf_sz = MJUM9BYTES;
   1289 	else
   1290 		adapter->rx_mbuf_sz = MJUM16BYTES;
   1291 
   1292 	/* Prepare receive descriptors and buffers */
   1293 	if (ixgbe_setup_receive_structures(adapter)) {
   1294 		device_printf(dev,"Could not setup receive structures\n");
   1295 		ixgbe_stop(adapter);
   1296 		return;
   1297 	}
   1298 
   1299 	/* Configure RX settings */
   1300 	ixgbe_initialize_receive_units(adapter);
   1301 
   1302 	gpie = IXGBE_READ_REG(&adapter->hw, IXGBE_GPIE);
   1303 
   1304 	/* Enable Fan Failure Interrupt */
   1305 	gpie |= IXGBE_SDP1_GPIEN;
   1306 
   1307 	/* Add for Thermal detection */
   1308 	if (hw->mac.type == ixgbe_mac_82599EB)
   1309 		gpie |= IXGBE_SDP2_GPIEN;
   1310 
   1311 	/* Thermal Failure Detection */
   1312 	if (hw->mac.type == ixgbe_mac_X540)
   1313 		gpie |= IXGBE_SDP0_GPIEN;
   1314 
   1315 	if (adapter->msix > 1) {
   1316 		/* Enable Enhanced MSIX mode */
   1317 		gpie |= IXGBE_GPIE_MSIX_MODE;
   1318 		gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
   1319 		    IXGBE_GPIE_OCD;
   1320 	}
   1321 	IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
   1322 
   1323 	/* Set MTU size */
   1324 	if (ifp->if_mtu > ETHERMTU) {
   1325 		mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
   1326 		mhadd &= ~IXGBE_MHADD_MFS_MASK;
   1327 		mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
   1328 		IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
   1329 	}
   1330 
   1331 	/* Now enable all the queues */
   1332 
   1333 	for (int i = 0; i < adapter->num_queues; i++) {
   1334 		txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
   1335 		txdctl |= IXGBE_TXDCTL_ENABLE;
   1336 		/* Set WTHRESH to 8, burst writeback */
   1337 		txdctl |= (8 << 16);
   1338 		/*
   1339 		 * When the internal queue falls below PTHRESH (32),
   1340 		 * start prefetching as long as there are at least
   1341 		 * HTHRESH (1) buffers ready. The values are taken
   1342 		 * from the Intel linux driver 3.8.21.
   1343 		 * Prefetching enables tx line rate even with 1 queue.
   1344 		 */
   1345 		txdctl |= (32 << 0) | (1 << 8);
   1346 		IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
   1347 	}
   1348 
   1349 	for (int i = 0; i < adapter->num_queues; i++) {
   1350 		rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
   1351 		if (hw->mac.type == ixgbe_mac_82598EB) {
   1352 			/*
   1353 			** PTHRESH = 21
   1354 			** HTHRESH = 4
   1355 			** WTHRESH = 8
   1356 			*/
   1357 			rxdctl &= ~0x3FFFFF;
   1358 			rxdctl |= 0x080420;
   1359 		}
   1360 		rxdctl |= IXGBE_RXDCTL_ENABLE;
   1361 		IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
   1362 		/* XXX I don't trust this loop, and I don't trust the
   1363 		 * XXX memory barrier.  What is this meant to do? --dyoung
   1364 		 */
   1365 		for (k = 0; k < 10; k++) {
   1366 			if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
   1367 			    IXGBE_RXDCTL_ENABLE)
   1368 				break;
   1369 			else
   1370 				msec_delay(1);
   1371 		}
   1372 		wmb();
   1373 #ifdef DEV_NETMAP
   1374 		/*
   1375 		 * In netmap mode, we must preserve the buffers made
   1376 		 * available to userspace before the if_init()
   1377 		 * (this is true by default on the TX side, because
   1378 		 * init makes all buffers available to userspace).
   1379 		 *
   1380 		 * netmap_reset() and the device specific routines
   1381 		 * (e.g. ixgbe_setup_receive_rings()) map these
   1382 		 * buffers at the end of the NIC ring, so here we
   1383 		 * must set the RDT (tail) register to make sure
   1384 		 * they are not overwritten.
   1385 		 *
   1386 		 * In this driver the NIC ring starts at RDH = 0,
   1387 		 * RDT points to the last slot available for reception (?),
   1388 		 * so RDT = num_rx_desc - 1 means the whole ring is available.
   1389 		 */
   1390 		if (ifp->if_capenable & IFCAP_NETMAP) {
   1391 			struct netmap_adapter *na = NA(adapter->ifp);
   1392 			struct netmap_kring *kring = &na->rx_rings[i];
   1393 			int t = na->num_rx_desc - 1 - kring->nr_hwavail;
   1394 
   1395 			IXGBE_WRITE_REG(hw, IXGBE_RDT(i), t);
   1396 		} else
   1397 #endif /* DEV_NETMAP */
   1398 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i), adapter->num_rx_desc - 1);
   1399 	}
   1400 
   1401 	/* Set up VLAN support and filter */
   1402 	ixgbe_setup_vlan_hw_support(adapter);
   1403 
   1404 	/* Enable Receive engine */
   1405 	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
   1406 	if (hw->mac.type == ixgbe_mac_82598EB)
   1407 		rxctrl |= IXGBE_RXCTRL_DMBYPS;
   1408 	rxctrl |= IXGBE_RXCTRL_RXEN;
   1409 	ixgbe_enable_rx_dma(hw, rxctrl);
   1410 
   1411 	callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
   1412 
   1413 	/* Set up MSI/X routing */
   1414 	if (ixgbe_enable_msix)  {
   1415 		ixgbe_configure_ivars(adapter);
   1416 		/* Set up auto-mask */
   1417 		if (hw->mac.type == ixgbe_mac_82598EB)
   1418 			IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
   1419 		else {
   1420 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
   1421 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
   1422 		}
   1423 	} else {  /* Simple settings for Legacy/MSI */
   1424                 ixgbe_set_ivar(adapter, 0, 0, 0);
   1425                 ixgbe_set_ivar(adapter, 0, 0, 1);
   1426 		IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
   1427 	}
   1428 
   1429 #ifdef IXGBE_FDIR
   1430 	/* Init Flow director */
   1431 	if (hw->mac.type != ixgbe_mac_82598EB) {
   1432 		u32 hdrm = 32 << fdir_pballoc;
   1433 
   1434 		hw->mac.ops.setup_rxpba(hw, 0, hdrm, PBA_STRATEGY_EQUAL);
   1435 		ixgbe_init_fdir_signature_82599(&adapter->hw, fdir_pballoc);
   1436 	}
   1437 #endif
   1438 
   1439 	/*
   1440 	** Check on any SFP devices that
   1441 	** need to be kick-started
   1442 	*/
   1443 	if (hw->phy.type == ixgbe_phy_none) {
   1444 		int err = hw->phy.ops.identify(hw);
   1445 		if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
   1446                 	device_printf(dev,
   1447 			    "Unsupported SFP+ module type was detected.\n");
   1448 			return;
   1449         	}
   1450 	}
   1451 
   1452 	/* Set moderation on the Link interrupt */
   1453 	IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->linkvec), IXGBE_LINK_ITR);
   1454 
   1455 	/* Config/Enable Link */
   1456 	ixgbe_config_link(adapter);
   1457 
   1458 	/* Hardware Packet Buffer & Flow Control setup */
   1459 	{
   1460 		u32 rxpb, frame, size, tmp;
   1461 
   1462 		frame = adapter->max_frame_size;
   1463 
   1464 		/* Calculate High Water */
   1465 		if (hw->mac.type == ixgbe_mac_X540)
   1466 			tmp = IXGBE_DV_X540(frame, frame);
   1467 		else
   1468 			tmp = IXGBE_DV(frame, frame);
   1469 		size = IXGBE_BT2KB(tmp);
   1470 		rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
   1471 		hw->fc.high_water[0] = rxpb - size;
   1472 
   1473 		/* Now calculate Low Water */
   1474 		if (hw->mac.type == ixgbe_mac_X540)
   1475 			tmp = IXGBE_LOW_DV_X540(frame);
   1476 		else
   1477 			tmp = IXGBE_LOW_DV(frame);
   1478 		hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
   1479 
   1480 		hw->fc.requested_mode = adapter->fc;
   1481 		hw->fc.pause_time = IXGBE_FC_PAUSE;
   1482 		hw->fc.send_xon = TRUE;
   1483 	}
   1484 	/* Initialize the FC settings */
   1485 	ixgbe_start_hw(hw);
   1486 
   1487 	/* And now turn on interrupts */
   1488 	ixgbe_enable_intr(adapter);
   1489 
   1490 	/* Now inform the stack we're ready */
   1491 	ifp->if_flags |= IFF_RUNNING;
   1492 
   1493 	return;
   1494 }
   1495 
   1496 static int
   1497 ixgbe_init(struct ifnet *ifp)
   1498 {
   1499 	struct adapter *adapter = ifp->if_softc;
   1500 
   1501 	IXGBE_CORE_LOCK(adapter);
   1502 	ixgbe_init_locked(adapter);
   1503 	IXGBE_CORE_UNLOCK(adapter);
   1504 	return 0;	/* XXX ixgbe_init_locked cannot fail?  really? */
   1505 }
   1506 
   1507 
   1508 /*
   1509 **
   1510 ** MSIX Interrupt Handlers and Tasklets
   1511 **
   1512 */
   1513 
   1514 static inline void
   1515 ixgbe_enable_queue(struct adapter *adapter, u32 vector)
   1516 {
   1517 	struct ixgbe_hw *hw = &adapter->hw;
   1518 	u64	queue = (u64)(1ULL << vector);
   1519 	u32	mask;
   1520 
   1521 	if (hw->mac.type == ixgbe_mac_82598EB) {
   1522                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
   1523                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
   1524 	} else {
   1525                 mask = (queue & 0xFFFFFFFF);
   1526                 if (mask)
   1527                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask);
   1528                 mask = (queue >> 32);
   1529                 if (mask)
   1530                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask);
   1531 	}
   1532 }
   1533 
   1534 __unused static inline void
   1535 ixgbe_disable_queue(struct adapter *adapter, u32 vector)
   1536 {
   1537 	struct ixgbe_hw *hw = &adapter->hw;
   1538 	u64	queue = (u64)(1ULL << vector);
   1539 	u32	mask;
   1540 
   1541 	if (hw->mac.type == ixgbe_mac_82598EB) {
   1542                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
   1543                 IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask);
   1544 	} else {
   1545                 mask = (queue & 0xFFFFFFFF);
   1546                 if (mask)
   1547                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask);
   1548                 mask = (queue >> 32);
   1549                 if (mask)
   1550                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask);
   1551 	}
   1552 }
   1553 
   1554 static inline void
   1555 ixgbe_rearm_queues(struct adapter *adapter, u64 queues)
   1556 {
   1557 	u32 mask;
   1558 
   1559 	if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
   1560 		mask = (IXGBE_EIMS_RTX_QUEUE & queues);
   1561 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, mask);
   1562 	} else {
   1563 		mask = (queues & 0xFFFFFFFF);
   1564 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(0), mask);
   1565 		mask = (queues >> 32);
   1566 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(1), mask);
   1567 	}
   1568 }
   1569 
   1570 
   1571 static void
   1572 ixgbe_handle_que(void *context)
   1573 {
   1574 	struct ix_queue *que = context;
   1575 	struct adapter  *adapter = que->adapter;
   1576 	struct tx_ring  *txr = que->txr;
   1577 	struct ifnet    *ifp = adapter->ifp;
   1578 	bool		more;
   1579 
   1580 	adapter->handleq.ev_count++;
   1581 
   1582 	if (ifp->if_flags & IFF_RUNNING) {
   1583 		more = ixgbe_rxeof(que);
   1584 		IXGBE_TX_LOCK(txr);
   1585 		ixgbe_txeof(txr);
   1586 #ifndef IXGBE_LEGACY_TX
   1587 		if (!drbr_empty(ifp, txr->br))
   1588 			ixgbe_mq_start_locked(ifp, txr, NULL);
   1589 #else
   1590 		if (!IFQ_IS_EMPTY(&ifp->if_snd))
   1591 			ixgbe_start_locked(txr, ifp);
   1592 #endif
   1593 		IXGBE_TX_UNLOCK(txr);
   1594 		if (more) {
   1595 			adapter->req.ev_count++;
   1596 			softint_schedule(que->que_si);
   1597 			return;
   1598 		}
   1599 	}
   1600 
   1601 	/* Reenable this interrupt */
   1602 	ixgbe_enable_queue(adapter, que->msix);
   1603 	return;
   1604 }
   1605 
   1606 
   1607 /*********************************************************************
   1608  *
   1609  *  Legacy Interrupt Service routine
   1610  *
   1611  **********************************************************************/
   1612 
   1613 static int
   1614 ixgbe_legacy_irq(void *arg)
   1615 {
   1616 	struct ix_queue *que = arg;
   1617 	struct adapter	*adapter = que->adapter;
   1618 	struct ifnet   *ifp = adapter->ifp;
   1619 	struct ixgbe_hw	*hw = &adapter->hw;
   1620 	struct 		tx_ring *txr = adapter->tx_rings;
   1621 	bool		more_tx = false, more_rx = false;
   1622 	u32       	reg_eicr, loop = MAX_LOOP;
   1623 
   1624 	reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
   1625 
   1626 	adapter->stats.legint.ev_count++;
   1627 	++que->irqs;
   1628 	if (reg_eicr == 0) {
   1629 		adapter->stats.intzero.ev_count++;
   1630 		if ((ifp->if_flags & IFF_UP) != 0)
   1631 			ixgbe_enable_intr(adapter);
   1632 		return 0;
   1633 	}
   1634 
   1635 	if ((ifp->if_flags & IFF_RUNNING) != 0) {
   1636 		more_rx = ixgbe_rxeof(que);
   1637 
   1638 		IXGBE_TX_LOCK(txr);
   1639 		do {
   1640 			adapter->txloops.ev_count++;
   1641 			more_tx = ixgbe_txeof(txr);
   1642 		} while (loop-- && more_tx);
   1643 		IXGBE_TX_UNLOCK(txr);
   1644 	}
   1645 
   1646 	if (more_rx || more_tx) {
   1647 		if (more_rx)
   1648 			adapter->morerx.ev_count++;
   1649 		if (more_tx)
   1650 			adapter->moretx.ev_count++;
   1651 		softint_schedule(que->que_si);
   1652 	}
   1653 
   1654 	/* Check for fan failure */
   1655 	if ((hw->phy.media_type == ixgbe_media_type_copper) &&
   1656 	    (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
   1657                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
   1658 		    "REPLACE IMMEDIATELY!!\n");
   1659 		IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1);
   1660 	}
   1661 
   1662 	/* Link status change */
   1663 	if (reg_eicr & IXGBE_EICR_LSC)
   1664 		softint_schedule(adapter->link_si);
   1665 
   1666 	ixgbe_enable_intr(adapter);
   1667 	return 1;
   1668 }
   1669 
   1670 
   1671 #if defined(NETBSD_MSI_OR_MSIX)
   1672 /*********************************************************************
   1673  *
   1674  *  MSIX Queue Interrupt Service routine
   1675  *
   1676  **********************************************************************/
   1677 void
   1678 ixgbe_msix_que(void *arg)
   1679 {
   1680 	struct ix_queue	*que = arg;
   1681 	struct adapter  *adapter = que->adapter;
   1682 	struct tx_ring	*txr = que->txr;
   1683 	struct rx_ring	*rxr = que->rxr;
   1684 	bool		more_tx, more_rx;
   1685 	u32		newitr = 0;
   1686 
   1687 	ixgbe_disable_queue(adapter, que->msix);
   1688 	++que->irqs;
   1689 
   1690 	more_rx = ixgbe_rxeof(que);
   1691 
   1692 	IXGBE_TX_LOCK(txr);
   1693 	more_tx = ixgbe_txeof(txr);
   1694 	/*
   1695 	** Make certain that if the stack
   1696 	** has anything queued the task gets
   1697 	** scheduled to handle it.
   1698 	*/
   1699 #ifdef IXGBE_LEGACY_TX
   1700 	if (!IFQ_IS_EMPTY(&adapter->ifp->if_snd))
   1701 #else
   1702 	if (!drbr_empty(adapter->ifp, txr->br))
   1703 #endif
   1704 		more_tx = 1;
   1705 	IXGBE_TX_UNLOCK(txr);
   1706 
   1707 	/* Do AIM now? */
   1708 
   1709 	if (ixgbe_enable_aim == FALSE)
   1710 		goto no_calc;
   1711 	/*
   1712 	** Do Adaptive Interrupt Moderation:
   1713         **  - Write out last calculated setting
   1714 	**  - Calculate based on average size over
   1715 	**    the last interval.
   1716 	*/
   1717         if (que->eitr_setting)
   1718                 IXGBE_WRITE_REG(&adapter->hw,
   1719                     IXGBE_EITR(que->msix), que->eitr_setting);
   1720 
   1721         que->eitr_setting = 0;
   1722 
   1723         /* Idle, do nothing */
   1724         if ((txr->bytes == 0) && (rxr->bytes == 0))
   1725                 goto no_calc;
   1726 
   1727 	if ((txr->bytes) && (txr->packets))
   1728                	newitr = txr->bytes/txr->packets;
   1729 	if ((rxr->bytes) && (rxr->packets))
   1730 		newitr = max(newitr,
   1731 		    (rxr->bytes / rxr->packets));
   1732 	newitr += 24; /* account for hardware frame, crc */
   1733 
   1734 	/* set an upper boundary */
   1735 	newitr = min(newitr, 3000);
   1736 
   1737 	/* Be nice to the mid range */
   1738 	if ((newitr > 300) && (newitr < 1200))
   1739 		newitr = (newitr / 3);
   1740 	else
   1741 		newitr = (newitr / 2);
   1742 
   1743         if (adapter->hw.mac.type == ixgbe_mac_82598EB)
   1744                 newitr |= newitr << 16;
   1745         else
   1746                 newitr |= IXGBE_EITR_CNT_WDIS;
   1747 
   1748         /* save for next interrupt */
   1749         que->eitr_setting = newitr;
   1750 
   1751         /* Reset state */
   1752         txr->bytes = 0;
   1753         txr->packets = 0;
   1754         rxr->bytes = 0;
   1755         rxr->packets = 0;
   1756 
   1757 no_calc:
   1758 	if (more_tx || more_rx)
   1759 		softint_schedule(que->que_si);
   1760 	else /* Reenable this interrupt */
   1761 		ixgbe_enable_queue(adapter, que->msix);
   1762 	return;
   1763 }
   1764 
   1765 
   1766 static void
   1767 ixgbe_msix_link(void *arg)
   1768 {
   1769 	struct adapter	*adapter = arg;
   1770 	struct ixgbe_hw *hw = &adapter->hw;
   1771 	u32		reg_eicr;
   1772 
   1773 	++adapter->link_irq.ev_count;
   1774 
   1775 	/* First get the cause */
   1776 	reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICS);
   1777 	/* Clear interrupt with write */
   1778 	IXGBE_WRITE_REG(hw, IXGBE_EICR, reg_eicr);
   1779 
   1780 	/* Link status change */
   1781 	if (reg_eicr & IXGBE_EICR_LSC)
   1782 		softint_schedule(adapter->link_si);
   1783 
   1784 	if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
   1785 #ifdef IXGBE_FDIR
   1786 		if (reg_eicr & IXGBE_EICR_FLOW_DIR) {
   1787 			/* This is probably overkill :) */
   1788 			if (!atomic_cmpset_int(&adapter->fdir_reinit, 0, 1))
   1789 				return;
   1790                 	/* Disable the interrupt */
   1791 			IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FLOW_DIR);
   1792 			softint_schedule(adapter->fdir_si);
   1793 		} else
   1794 #endif
   1795 		if (reg_eicr & IXGBE_EICR_ECC) {
   1796                 	device_printf(adapter->dev, "\nCRITICAL: ECC ERROR!! "
   1797 			    "Please Reboot!!\n");
   1798 			IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
   1799 		} else
   1800 
   1801 		if (reg_eicr & IXGBE_EICR_GPI_SDP1) {
   1802                 	/* Clear the interrupt */
   1803                 	IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
   1804 			softint_schedule(adapter->msf_si);
   1805         	} else if (reg_eicr & IXGBE_EICR_GPI_SDP2) {
   1806                 	/* Clear the interrupt */
   1807                 	IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP2);
   1808 			softint_schedule(adapter->mod_si);
   1809 		}
   1810         }
   1811 
   1812 	/* Check for fan failure */
   1813 	if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
   1814 	    (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
   1815                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
   1816 		    "REPLACE IMMEDIATELY!!\n");
   1817 		IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
   1818 	}
   1819 
   1820 	/* Check for over temp condition */
   1821 	if ((hw->mac.type == ixgbe_mac_X540) &&
   1822 	    (reg_eicr & IXGBE_EICR_TS)) {
   1823 		device_printf(adapter->dev, "\nCRITICAL: OVER TEMP!! "
   1824 		    "PHY IS SHUT DOWN!!\n");
   1825 		device_printf(adapter->dev, "System shutdown required\n");
   1826 		IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_TS);
   1827 	}
   1828 
   1829 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_OTHER);
   1830 	return;
   1831 }
   1832 #endif
   1833 
   1834 /*********************************************************************
   1835  *
   1836  *  Media Ioctl callback
   1837  *
   1838  *  This routine is called whenever the user queries the status of
   1839  *  the interface using ifconfig.
   1840  *
   1841  **********************************************************************/
   1842 static void
   1843 ixgbe_media_status(struct ifnet * ifp, struct ifmediareq * ifmr)
   1844 {
   1845 	struct adapter *adapter = ifp->if_softc;
   1846 
   1847 	INIT_DEBUGOUT("ixgbe_media_status: begin");
   1848 	IXGBE_CORE_LOCK(adapter);
   1849 	ixgbe_update_link_status(adapter);
   1850 
   1851 	ifmr->ifm_status = IFM_AVALID;
   1852 	ifmr->ifm_active = IFM_ETHER;
   1853 
   1854 	if (!adapter->link_active) {
   1855 		IXGBE_CORE_UNLOCK(adapter);
   1856 		return;
   1857 	}
   1858 
   1859 	ifmr->ifm_status |= IFM_ACTIVE;
   1860 
   1861 	switch (adapter->link_speed) {
   1862 		case IXGBE_LINK_SPEED_100_FULL:
   1863 			ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
   1864 			break;
   1865 		case IXGBE_LINK_SPEED_1GB_FULL:
   1866 			ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
   1867 			break;
   1868 		case IXGBE_LINK_SPEED_10GB_FULL:
   1869 			ifmr->ifm_active |= adapter->optics | IFM_FDX;
   1870 			break;
   1871 	}
   1872 
   1873 	IXGBE_CORE_UNLOCK(adapter);
   1874 
   1875 	return;
   1876 }
   1877 
   1878 /*********************************************************************
   1879  *
   1880  *  Media Ioctl callback
   1881  *
   1882  *  This routine is called when the user changes speed/duplex using
   1883  *  media/mediopt option with ifconfig.
   1884  *
   1885  **********************************************************************/
   1886 static int
   1887 ixgbe_media_change(struct ifnet * ifp)
   1888 {
   1889 	struct adapter *adapter = ifp->if_softc;
   1890 	struct ifmedia *ifm = &adapter->media;
   1891 
   1892 	INIT_DEBUGOUT("ixgbe_media_change: begin");
   1893 
   1894 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
   1895 		return (EINVAL);
   1896 
   1897         switch (IFM_SUBTYPE(ifm->ifm_media)) {
   1898         case IFM_AUTO:
   1899                 adapter->hw.phy.autoneg_advertised =
   1900 		    IXGBE_LINK_SPEED_100_FULL |
   1901 		    IXGBE_LINK_SPEED_1GB_FULL |
   1902 		    IXGBE_LINK_SPEED_10GB_FULL;
   1903                 break;
   1904         default:
   1905                 device_printf(adapter->dev, "Only auto media type\n");
   1906 		return (EINVAL);
   1907         }
   1908 
   1909 	return (0);
   1910 }
   1911 
   1912 /*********************************************************************
   1913  *
   1914  *  This routine maps the mbufs to tx descriptors, allowing the
   1915  *  TX engine to transmit the packets.
   1916  *  	- return 0 on success, positive on failure
   1917  *
   1918  **********************************************************************/
   1919 
   1920 static int
   1921 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
   1922 {
   1923 	struct m_tag *mtag;
   1924 	struct adapter  *adapter = txr->adapter;
   1925 	struct ethercom *ec = &adapter->osdep.ec;
   1926 	u32		olinfo_status = 0, cmd_type_len;
   1927 	int             i, j, error;
   1928 	int		first;
   1929 	bus_dmamap_t	map;
   1930 	struct ixgbe_tx_buf *txbuf;
   1931 	union ixgbe_adv_tx_desc *txd = NULL;
   1932 
   1933 	/* Basic descriptor defines */
   1934         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
   1935 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
   1936 
   1937 	if ((mtag = VLAN_OUTPUT_TAG(ec, m_head)) != NULL)
   1938         	cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
   1939 
   1940         /*
   1941          * Important to capture the first descriptor
   1942          * used because it will contain the index of
   1943          * the one we tell the hardware to report back
   1944          */
   1945         first = txr->next_avail_desc;
   1946 	txbuf = &txr->tx_buffers[first];
   1947 	map = txbuf->map;
   1948 
   1949 	/*
   1950 	 * Map the packet for DMA.
   1951 	 */
   1952 	error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map,
   1953 	    m_head, BUS_DMA_NOWAIT);
   1954 
   1955 	if (__predict_false(error)) {
   1956 
   1957 		switch (error) {
   1958 		case EAGAIN:
   1959 			adapter->eagain_tx_dma_setup.ev_count++;
   1960 			return EAGAIN;
   1961 		case ENOMEM:
   1962 			adapter->enomem_tx_dma_setup.ev_count++;
   1963 			return EAGAIN;
   1964 		case EFBIG:
   1965 			/*
   1966 			 * XXX Try it again?
   1967 			 * do m_defrag() and retry bus_dmamap_load_mbuf().
   1968 			 */
   1969 			adapter->efbig_tx_dma_setup.ev_count++;
   1970 			return error;
   1971 		case EINVAL:
   1972 			adapter->einval_tx_dma_setup.ev_count++;
   1973 			return error;
   1974 		default:
   1975 			adapter->other_tx_dma_setup.ev_count++;
   1976 			return error;
   1977 		case 0:
   1978 			break;
   1979 		}
   1980 	}
   1981 
   1982 	/* Make certain there are enough descriptors */
   1983 	if (map->dm_nsegs > txr->tx_avail - 2) {
   1984 		txr->no_desc_avail.ev_count++;
   1985 		ixgbe_dmamap_unload(txr->txtag, txbuf->map);
   1986 		return EAGAIN;
   1987 	}
   1988 
   1989 	/*
   1990 	** Set up the appropriate offload context
   1991 	** this will consume the first descriptor
   1992 	*/
   1993 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
   1994 	if (__predict_false(error)) {
   1995 		return (error);
   1996 	}
   1997 
   1998 #ifdef IXGBE_FDIR
   1999 	/* Do the flow director magic */
   2000 	if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
   2001 		++txr->atr_count;
   2002 		if (txr->atr_count >= atr_sample_rate) {
   2003 			ixgbe_atr(txr, m_head);
   2004 			txr->atr_count = 0;
   2005 		}
   2006 	}
   2007 #endif
   2008 
   2009 	i = txr->next_avail_desc;
   2010 	for (j = 0; j < map->dm_nsegs; j++) {
   2011 		bus_size_t seglen;
   2012 		bus_addr_t segaddr;
   2013 
   2014 		txbuf = &txr->tx_buffers[i];
   2015 		txd = &txr->tx_base[i];
   2016 		seglen = map->dm_segs[j].ds_len;
   2017 		segaddr = htole64(map->dm_segs[j].ds_addr);
   2018 
   2019 		txd->read.buffer_addr = segaddr;
   2020 		txd->read.cmd_type_len = htole32(txr->txd_cmd |
   2021 		    cmd_type_len |seglen);
   2022 		txd->read.olinfo_status = htole32(olinfo_status);
   2023 
   2024 		if (++i == txr->num_desc)
   2025 			i = 0;
   2026 	}
   2027 
   2028 	txd->read.cmd_type_len |=
   2029 	    htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
   2030 	txr->tx_avail -= map->dm_nsegs;
   2031 	txr->next_avail_desc = i;
   2032 
   2033 	txbuf->m_head = m_head;
   2034 	/*
   2035 	** Here we swap the map so the last descriptor,
   2036 	** which gets the completion interrupt has the
   2037 	** real map, and the first descriptor gets the
   2038 	** unused map from this descriptor.
   2039 	*/
   2040 	txr->tx_buffers[first].map = txbuf->map;
   2041 	txbuf->map = map;
   2042 	bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
   2043 	    BUS_DMASYNC_PREWRITE);
   2044 
   2045         /* Set the EOP descriptor that will be marked done */
   2046         txbuf = &txr->tx_buffers[first];
   2047 	txbuf->eop = txd;
   2048 
   2049         ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   2050 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   2051 	/*
   2052 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
   2053 	 * hardware that this frame is available to transmit.
   2054 	 */
   2055 	++txr->total_packets.ev_count;
   2056 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), i);
   2057 
   2058 	return 0;
   2059 }
   2060 
   2061 static void
   2062 ixgbe_set_promisc(struct adapter *adapter)
   2063 {
   2064 	struct ether_multi *enm;
   2065 	struct ether_multistep step;
   2066 	u_int32_t       reg_rctl;
   2067 	struct ethercom *ec = &adapter->osdep.ec;
   2068 	struct ifnet   *ifp = adapter->ifp;
   2069 	int		mcnt = 0;
   2070 
   2071 	reg_rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
   2072 	reg_rctl &= (~IXGBE_FCTRL_UPE);
   2073 	if (ifp->if_flags & IFF_ALLMULTI)
   2074 		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
   2075 	else {
   2076 		ETHER_FIRST_MULTI(step, ec, enm);
   2077 		while (enm != NULL) {
   2078 			if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
   2079 				break;
   2080 			mcnt++;
   2081 			ETHER_NEXT_MULTI(step, enm);
   2082 		}
   2083 	}
   2084 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
   2085 		reg_rctl &= (~IXGBE_FCTRL_MPE);
   2086 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
   2087 
   2088 	if (ifp->if_flags & IFF_PROMISC) {
   2089 		reg_rctl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
   2090 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
   2091 	} else if (ifp->if_flags & IFF_ALLMULTI) {
   2092 		reg_rctl |= IXGBE_FCTRL_MPE;
   2093 		reg_rctl &= ~IXGBE_FCTRL_UPE;
   2094 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
   2095 	}
   2096 	return;
   2097 }
   2098 
   2099 
   2100 /*********************************************************************
   2101  *  Multicast Update
   2102  *
   2103  *  This routine is called whenever multicast address list is updated.
   2104  *
   2105  **********************************************************************/
   2106 #define IXGBE_RAR_ENTRIES 16
   2107 
   2108 static void
   2109 ixgbe_set_multi(struct adapter *adapter)
   2110 {
   2111 	struct ether_multi *enm;
   2112 	struct ether_multistep step;
   2113 	u32	fctrl;
   2114 	u8	*mta;
   2115 	u8	*update_ptr;
   2116 	int	mcnt = 0;
   2117 	struct ethercom *ec = &adapter->osdep.ec;
   2118 	struct ifnet   *ifp = adapter->ifp;
   2119 
   2120 	IOCTL_DEBUGOUT("ixgbe_set_multi: begin");
   2121 
   2122 	mta = adapter->mta;
   2123 	bzero(mta, sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
   2124 	    MAX_NUM_MULTICAST_ADDRESSES);
   2125 
   2126 	ifp->if_flags &= ~IFF_ALLMULTI;
   2127 	ETHER_FIRST_MULTI(step, ec, enm);
   2128 	while (enm != NULL) {
   2129 		if ((mcnt == MAX_NUM_MULTICAST_ADDRESSES) ||
   2130 		    (memcmp(enm->enm_addrlo, enm->enm_addrhi,
   2131 			ETHER_ADDR_LEN) != 0)) {
   2132 			ifp->if_flags |= IFF_ALLMULTI;
   2133 			break;
   2134 		}
   2135 		bcopy(enm->enm_addrlo,
   2136 		    &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
   2137 		    IXGBE_ETH_LENGTH_OF_ADDRESS);
   2138 		mcnt++;
   2139 		ETHER_NEXT_MULTI(step, enm);
   2140 	}
   2141 
   2142 	fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
   2143 	fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
   2144 	if (ifp->if_flags & IFF_PROMISC)
   2145 		fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
   2146 	else if (ifp->if_flags & IFF_ALLMULTI) {
   2147 		fctrl |= IXGBE_FCTRL_MPE;
   2148 	}
   2149 
   2150 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl);
   2151 
   2152 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) {
   2153 		update_ptr = mta;
   2154 		ixgbe_update_mc_addr_list(&adapter->hw,
   2155 		    update_ptr, mcnt, ixgbe_mc_array_itr, TRUE);
   2156 	}
   2157 
   2158 	return;
   2159 }
   2160 
   2161 /*
   2162  * This is an iterator function now needed by the multicast
   2163  * shared code. It simply feeds the shared code routine the
   2164  * addresses in the array of ixgbe_set_multi() one by one.
   2165  */
   2166 static u8 *
   2167 ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq)
   2168 {
   2169 	u8 *addr = *update_ptr;
   2170 	u8 *newptr;
   2171 	*vmdq = 0;
   2172 
   2173 	newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
   2174 	*update_ptr = newptr;
   2175 	return addr;
   2176 }
   2177 
   2178 
   2179 /*********************************************************************
   2180  *  Timer routine
   2181  *
   2182  *  This routine checks for link status,updates statistics,
   2183  *  and runs the watchdog check.
   2184  *
   2185  **********************************************************************/
   2186 
   2187 static void
   2188 ixgbe_local_timer1(void *arg)
   2189 {
   2190 	struct adapter	*adapter = arg;
   2191 	device_t	dev = adapter->dev;
   2192 	struct ix_queue *que = adapter->queues;
   2193 	struct tx_ring	*txr = adapter->tx_rings;
   2194 	int		hung = 0, paused = 0;
   2195 
   2196 	KASSERT(mutex_owned(&adapter->core_mtx));
   2197 
   2198 	/* Check for pluggable optics */
   2199 	if (adapter->sfp_probe)
   2200 		if (!ixgbe_sfp_probe(adapter))
   2201 			goto out; /* Nothing to do */
   2202 
   2203 	ixgbe_update_link_status(adapter);
   2204 	ixgbe_update_stats_counters(adapter);
   2205 
   2206 	/*
   2207 	 * If the interface has been paused
   2208 	 * then don't do the watchdog check
   2209 	 */
   2210 	if (IXGBE_READ_REG(&adapter->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF)
   2211 		paused = 1;
   2212 
   2213 	/*
   2214 	** Check the TX queues status
   2215 	**      - watchdog only if all queues show hung
   2216 	*/
   2217 	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
   2218 		if ((txr->queue_status == IXGBE_QUEUE_HUNG) &&
   2219 		    (paused == 0))
   2220 			++hung;
   2221 		else if (txr->queue_status == IXGBE_QUEUE_WORKING)
   2222 			softint_schedule(que->que_si);
   2223 	}
   2224 	/* Only truely watchdog if all queues show hung */
   2225 	if (hung == adapter->num_queues)
   2226 		goto watchdog;
   2227 
   2228 out:
   2229 	ixgbe_rearm_queues(adapter, adapter->que_mask);
   2230 	callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
   2231 	return;
   2232 
   2233 watchdog:
   2234 	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
   2235 	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
   2236 	    IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(txr->me)),
   2237 	    IXGBE_READ_REG(&adapter->hw, IXGBE_TDT(txr->me)));
   2238 	device_printf(dev,"TX(%d) desc avail = %d,"
   2239 	    "Next TX to Clean = %d\n",
   2240 	    txr->me, txr->tx_avail, txr->next_to_clean);
   2241 	adapter->ifp->if_flags &= ~IFF_RUNNING;
   2242 	adapter->watchdog_events.ev_count++;
   2243 	ixgbe_init_locked(adapter);
   2244 }
   2245 
   2246 static void
   2247 ixgbe_local_timer(void *arg)
   2248 {
   2249 	struct adapter *adapter = arg;
   2250 
   2251 	IXGBE_CORE_LOCK(adapter);
   2252 	ixgbe_local_timer1(adapter);
   2253 	IXGBE_CORE_UNLOCK(adapter);
   2254 }
   2255 
   2256 /*
   2257 ** Note: this routine updates the OS on the link state
   2258 **	the real check of the hardware only happens with
   2259 **	a link interrupt.
   2260 */
   2261 static void
   2262 ixgbe_update_link_status(struct adapter *adapter)
   2263 {
   2264 	struct ifnet	*ifp = adapter->ifp;
   2265 	device_t dev = adapter->dev;
   2266 
   2267 
   2268 	if (adapter->link_up){
   2269 		if (adapter->link_active == FALSE) {
   2270 			if (bootverbose)
   2271 				device_printf(dev,"Link is up %d Gbps %s \n",
   2272 				    ((adapter->link_speed == 128)? 10:1),
   2273 				    "Full Duplex");
   2274 			adapter->link_active = TRUE;
   2275 			/* Update any Flow Control changes */
   2276 			ixgbe_fc_enable(&adapter->hw);
   2277 			if_link_state_change(ifp, LINK_STATE_UP);
   2278 		}
   2279 	} else { /* Link down */
   2280 		if (adapter->link_active == TRUE) {
   2281 			if (bootverbose)
   2282 				device_printf(dev,"Link is Down\n");
   2283 			if_link_state_change(ifp, LINK_STATE_DOWN);
   2284 			adapter->link_active = FALSE;
   2285 		}
   2286 	}
   2287 
   2288 	return;
   2289 }
   2290 
   2291 
   2292 static void
   2293 ixgbe_ifstop(struct ifnet *ifp, int disable)
   2294 {
   2295 	struct adapter *adapter = ifp->if_softc;
   2296 
   2297 	IXGBE_CORE_LOCK(adapter);
   2298 	ixgbe_stop(adapter);
   2299 	IXGBE_CORE_UNLOCK(adapter);
   2300 }
   2301 
   2302 /*********************************************************************
   2303  *
   2304  *  This routine disables all traffic on the adapter by issuing a
   2305  *  global reset on the MAC and deallocates TX/RX buffers.
   2306  *
   2307  **********************************************************************/
   2308 
   2309 static void
   2310 ixgbe_stop(void *arg)
   2311 {
   2312 	struct ifnet   *ifp;
   2313 	struct adapter *adapter = arg;
   2314 	struct ixgbe_hw *hw = &adapter->hw;
   2315 	ifp = adapter->ifp;
   2316 
   2317 	KASSERT(mutex_owned(&adapter->core_mtx));
   2318 
   2319 	INIT_DEBUGOUT("ixgbe_stop: begin\n");
   2320 	ixgbe_disable_intr(adapter);
   2321 	callout_stop(&adapter->timer);
   2322 
   2323 	/* Let the stack know...*/
   2324 	ifp->if_flags &= ~IFF_RUNNING;
   2325 
   2326 	ixgbe_reset_hw(hw);
   2327 	hw->adapter_stopped = FALSE;
   2328 	ixgbe_stop_adapter(hw);
   2329 	/* Turn off the laser */
   2330 	if (hw->phy.multispeed_fiber)
   2331 		ixgbe_disable_tx_laser(hw);
   2332 
   2333 	/* reprogram the RAR[0] in case user changed it. */
   2334 	ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
   2335 
   2336 	return;
   2337 }
   2338 
   2339 
   2340 /*********************************************************************
   2341  *
   2342  *  Determine hardware revision.
   2343  *
   2344  **********************************************************************/
   2345 static void
   2346 ixgbe_identify_hardware(struct adapter *adapter)
   2347 {
   2348 	pcitag_t tag;
   2349 	pci_chipset_tag_t pc;
   2350 	pcireg_t subid, id;
   2351 	struct ixgbe_hw *hw = &adapter->hw;
   2352 
   2353 	pc = adapter->osdep.pc;
   2354 	tag = adapter->osdep.tag;
   2355 
   2356 	id = pci_conf_read(pc, tag, PCI_ID_REG);
   2357 	subid = pci_conf_read(pc, tag, PCI_SUBSYS_ID_REG);
   2358 
   2359 	/* Save off the information about this board */
   2360 	hw->vendor_id = PCI_VENDOR(id);
   2361 	hw->device_id = PCI_PRODUCT(id);
   2362 	hw->revision_id =
   2363 	    PCI_REVISION(pci_conf_read(pc, tag, PCI_CLASS_REG));
   2364 	hw->subsystem_vendor_id = PCI_SUBSYS_VENDOR(subid);
   2365 	hw->subsystem_device_id = PCI_SUBSYS_ID(subid);
   2366 
   2367 	/* We need this here to set the num_segs below */
   2368 	ixgbe_set_mac_type(hw);
   2369 
   2370 	/* Pick up the 82599 and VF settings */
   2371 	if (hw->mac.type != ixgbe_mac_82598EB) {
   2372 		hw->phy.smart_speed = ixgbe_smart_speed;
   2373 		adapter->num_segs = IXGBE_82599_SCATTER;
   2374 	} else
   2375 		adapter->num_segs = IXGBE_82598_SCATTER;
   2376 
   2377 	return;
   2378 }
   2379 
   2380 /*********************************************************************
   2381  *
   2382  *  Determine optic type
   2383  *
   2384  **********************************************************************/
   2385 static void
   2386 ixgbe_setup_optics(struct adapter *adapter)
   2387 {
   2388 	struct ixgbe_hw *hw = &adapter->hw;
   2389 	int		layer;
   2390 
   2391 	layer = ixgbe_get_supported_physical_layer(hw);
   2392 
   2393 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
   2394 		adapter->optics = IFM_10G_T;
   2395 		return;
   2396 	}
   2397 
   2398 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
   2399 		adapter->optics = IFM_1000_T;
   2400 		return;
   2401 	}
   2402 
   2403 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) {
   2404 		adapter->optics = IFM_1000_SX;
   2405 		return;
   2406 	}
   2407 
   2408 	if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR |
   2409 	    IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) {
   2410 		adapter->optics = IFM_10G_LR;
   2411 		return;
   2412 	}
   2413 
   2414 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
   2415 		adapter->optics = IFM_10G_SR;
   2416 		return;
   2417 	}
   2418 
   2419 	if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) {
   2420 		adapter->optics = IFM_10G_TWINAX;
   2421 		return;
   2422 	}
   2423 
   2424 	if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 |
   2425 	    IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) {
   2426 		adapter->optics = IFM_10G_CX4;
   2427 		return;
   2428 	}
   2429 
   2430 	/* If we get here just set the default */
   2431 	adapter->optics = IFM_ETHER | IFM_AUTO;
   2432 	return;
   2433 }
   2434 
   2435 /*********************************************************************
   2436  *
   2437  *  Setup the Legacy or MSI Interrupt handler
   2438  *
   2439  **********************************************************************/
   2440 static int
   2441 ixgbe_allocate_legacy(struct adapter *adapter, const struct pci_attach_args *pa)
   2442 {
   2443 	device_t	dev = adapter->dev;
   2444 	struct		ix_queue *que = adapter->queues;
   2445 #ifndef IXGBE_LEGACY_TX
   2446 	struct tx_ring		*txr = adapter->tx_rings;
   2447 #endif
   2448 	char intrbuf[PCI_INTRSTR_LEN];
   2449 #if 0
   2450 	int		rid = 0;
   2451 
   2452 	/* MSI RID at 1 */
   2453 	if (adapter->msix == 1)
   2454 		rid = 1;
   2455 #endif
   2456 
   2457 	/* We allocate a single interrupt resource */
   2458  	if (pci_intr_map(pa, &adapter->osdep.ih) != 0) {
   2459 		aprint_error_dev(dev, "unable to map interrupt\n");
   2460 		return ENXIO;
   2461 	} else {
   2462 		aprint_normal_dev(dev, "interrupting at %s\n",
   2463 		    pci_intr_string(adapter->osdep.pc, adapter->osdep.ih,
   2464 			intrbuf, sizeof(intrbuf)));
   2465 	}
   2466 
   2467 	/*
   2468 	 * Try allocating a fast interrupt and the associated deferred
   2469 	 * processing contexts.
   2470 	 */
   2471 #ifndef IXGBE_LEGACY_TX
   2472 	txr->txq_si = softint_establish(SOFTINT_NET, ixgbe_deferred_mq_start,
   2473 	    txr);
   2474 #endif
   2475 	que->que_si = softint_establish(SOFTINT_NET, ixgbe_handle_que, que);
   2476 
   2477 	/* Tasklets for Link, SFP and Multispeed Fiber */
   2478 	adapter->link_si =
   2479 	    softint_establish(SOFTINT_NET, ixgbe_handle_link, adapter);
   2480 	adapter->mod_si =
   2481 	    softint_establish(SOFTINT_NET, ixgbe_handle_mod, adapter);
   2482 	adapter->msf_si =
   2483 	    softint_establish(SOFTINT_NET, ixgbe_handle_msf, adapter);
   2484 
   2485 #ifdef IXGBE_FDIR
   2486 	adapter->fdir_si =
   2487 	    softint_establish(SOFTINT_NET, ixgbe_reinit_fdir, adapter);
   2488 #endif
   2489 	if (que->que_si == NULL ||
   2490 	    adapter->link_si == NULL ||
   2491 	    adapter->mod_si == NULL ||
   2492 #ifdef IXGBE_FDIR
   2493 	    adapter->fdir_si == NULL ||
   2494 #endif
   2495 	    adapter->msf_si == NULL) {
   2496 		aprint_error_dev(dev,
   2497 		    "could not establish software interrupts\n");
   2498 		return ENXIO;
   2499 	}
   2500 
   2501 	adapter->osdep.intr = pci_intr_establish(adapter->osdep.pc,
   2502 	    adapter->osdep.ih, IPL_NET, ixgbe_legacy_irq, que);
   2503 	if (adapter->osdep.intr == NULL) {
   2504 		aprint_error_dev(dev, "failed to register interrupt handler\n");
   2505 		softint_disestablish(que->que_si);
   2506 		softint_disestablish(adapter->link_si);
   2507 		softint_disestablish(adapter->mod_si);
   2508 		softint_disestablish(adapter->msf_si);
   2509 #ifdef IXGBE_FDIR
   2510 		softint_disestablish(adapter->fdir_si);
   2511 #endif
   2512 		return ENXIO;
   2513 	}
   2514 	/* For simplicity in the handlers */
   2515 	adapter->que_mask = IXGBE_EIMS_ENABLE_MASK;
   2516 
   2517 	return (0);
   2518 }
   2519 
   2520 
   2521 /*********************************************************************
   2522  *
   2523  *  Setup MSIX Interrupt resources and handlers
   2524  *
   2525  **********************************************************************/
   2526 static int
   2527 ixgbe_allocate_msix(struct adapter *adapter, const struct pci_attach_args *pa)
   2528 {
   2529 #if !defined(NETBSD_MSI_OR_MSIX)
   2530 	return 0;
   2531 #else
   2532 	device_t        dev = adapter->dev;
   2533 	struct 		ix_queue *que = adapter->queues;
   2534 	struct  	tx_ring *txr = adapter->tx_rings;
   2535 	int 		error, rid, vector = 0;
   2536 
   2537 	for (int i = 0; i < adapter->num_queues; i++, vector++, que++, txr++) {
   2538 		rid = vector + 1;
   2539 		que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
   2540 		    RF_SHAREABLE | RF_ACTIVE);
   2541 		if (que->res == NULL) {
   2542 			aprint_error_dev(dev,"Unable to allocate"
   2543 		    	    " bus resource: que interrupt [%d]\n", vector);
   2544 			return (ENXIO);
   2545 		}
   2546 		/* Set the handler function */
   2547 		error = bus_setup_intr(dev, que->res,
   2548 		    INTR_TYPE_NET | INTR_MPSAFE, NULL,
   2549 		    ixgbe_msix_que, que, &que->tag);
   2550 		if (error) {
   2551 			que->res = NULL;
   2552 			aprint_error_dev(dev,
   2553 			    "Failed to register QUE handler\n");
   2554 			return error;
   2555 		}
   2556 #if __FreeBSD_version >= 800504
   2557 		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
   2558 #endif
   2559 		que->msix = vector;
   2560         	adapter->que_mask |= (u64)(1 << que->msix);
   2561 		/*
   2562 		** Bind the msix vector, and thus the
   2563 		** ring to the corresponding cpu.
   2564 		*/
   2565 		if (adapter->num_queues > 1)
   2566 			bus_bind_intr(dev, que->res, i);
   2567 
   2568 #ifndef IXGBE_LEGACY_TX
   2569 		txr->txq_si = softint_establish(SOFTINT_NET,
   2570 		    ixgbe_deferred_mq_start, txr);
   2571 #endif
   2572 		que->que_si = softint_establish(SOFTINT_NET, ixgbe_handle_que,
   2573 		    que);
   2574 		if (que->que_si == NULL) {
   2575 			aprint_error_dev(dev,
   2576 			    "could not establish software interrupt\n");
   2577 		}
   2578 	}
   2579 
   2580 	/* and Link */
   2581 	rid = vector + 1;
   2582 	adapter->res = bus_alloc_resource_any(dev,
   2583     	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
   2584 	if (!adapter->res) {
   2585 		aprint_error_dev(dev,"Unable to allocate bus resource: "
   2586 		    "Link interrupt [%d]\n", rid);
   2587 		return (ENXIO);
   2588 	}
   2589 	/* Set the link handler function */
   2590 	error = bus_setup_intr(dev, adapter->res,
   2591 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
   2592 	    ixgbe_msix_link, adapter, &adapter->tag);
   2593 	if (error) {
   2594 		adapter->res = NULL;
   2595 		aprint_error_dev(dev, "Failed to register LINK handler\n");
   2596 		return (error);
   2597 	}
   2598 #if __FreeBSD_version >= 800504
   2599 	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
   2600 #endif
   2601 	adapter->linkvec = vector;
   2602 	/* Tasklets for Link, SFP and Multispeed Fiber */
   2603 	adapter->link_si =
   2604 	    softint_establish(SOFTINT_NET, ixgbe_handle_link, adapter);
   2605 	adapter->mod_si =
   2606 	    softint_establish(SOFTINT_NET, ixgbe_handle_mod, adapter);
   2607 	adapter->msf_si =
   2608 	    softint_establish(SOFTINT_NET, ixgbe_handle_msf, adapter);
   2609 #ifdef IXGBE_FDIR
   2610 	adapter->fdir_si =
   2611 	    softint_establish(SOFTINT_NET, ixgbe_reinit_fdir, adapter);
   2612 #endif
   2613 
   2614 	return (0);
   2615 #endif
   2616 }
   2617 
   2618 /*
   2619  * Setup Either MSI/X or MSI
   2620  */
   2621 static int
   2622 ixgbe_setup_msix(struct adapter *adapter)
   2623 {
   2624 #if !defined(NETBSD_MSI_OR_MSIX)
   2625 	return 0;
   2626 #else
   2627 	device_t dev = adapter->dev;
   2628 	int rid, want, queues, msgs;
   2629 
   2630 	/* Override by tuneable */
   2631 	if (ixgbe_enable_msix == 0)
   2632 		goto msi;
   2633 
   2634 	/* First try MSI/X */
   2635 	rid = PCI_BAR(MSIX_82598_BAR);
   2636 	adapter->msix_mem = bus_alloc_resource_any(dev,
   2637 	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
   2638        	if (!adapter->msix_mem) {
   2639 		rid += 4;	/* 82599 maps in higher BAR */
   2640 		adapter->msix_mem = bus_alloc_resource_any(dev,
   2641 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
   2642 	}
   2643        	if (!adapter->msix_mem) {
   2644 		/* May not be enabled */
   2645 		device_printf(adapter->dev,
   2646 		    "Unable to map MSIX table \n");
   2647 		goto msi;
   2648 	}
   2649 
   2650 	msgs = pci_msix_count(dev);
   2651 	if (msgs == 0) { /* system has msix disabled */
   2652 		bus_release_resource(dev, SYS_RES_MEMORY,
   2653 		    rid, adapter->msix_mem);
   2654 		adapter->msix_mem = NULL;
   2655 		goto msi;
   2656 	}
   2657 
   2658 	/* Figure out a reasonable auto config value */
   2659 	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
   2660 
   2661 	if (ixgbe_num_queues != 0)
   2662 		queues = ixgbe_num_queues;
   2663 	/* Set max queues to 8 when autoconfiguring */
   2664 	else if ((ixgbe_num_queues == 0) && (queues > 8))
   2665 		queues = 8;
   2666 
   2667 	/*
   2668 	** Want one vector (RX/TX pair) per queue
   2669 	** plus an additional for Link.
   2670 	*/
   2671 	want = queues + 1;
   2672 	if (msgs >= want)
   2673 		msgs = want;
   2674 	else {
   2675                	device_printf(adapter->dev,
   2676 		    "MSIX Configuration Problem, "
   2677 		    "%d vectors but %d queues wanted!\n",
   2678 		    msgs, want);
   2679 		return (0); /* Will go to Legacy setup */
   2680 	}
   2681 	if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
   2682                	device_printf(adapter->dev,
   2683 		    "Using MSIX interrupts with %d vectors\n", msgs);
   2684 		adapter->num_queues = queues;
   2685 		return (msgs);
   2686 	}
   2687 msi:
   2688        	msgs = pci_msi_count(dev);
   2689        	if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
   2690                	device_printf(adapter->dev,"Using an MSI interrupt\n");
   2691 	else
   2692                	device_printf(adapter->dev,"Using a Legacy interrupt\n");
   2693 	return (msgs);
   2694 #endif
   2695 }
   2696 
   2697 
   2698 static int
   2699 ixgbe_allocate_pci_resources(struct adapter *adapter, const struct pci_attach_args *pa)
   2700 {
   2701 	pcireg_t	memtype;
   2702 	device_t        dev = adapter->dev;
   2703 	bus_addr_t addr;
   2704 	int flags;
   2705 
   2706 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, PCI_BAR(0));
   2707 	switch (memtype) {
   2708 	case PCI_MAPREG_TYPE_MEM | PCI_MAPREG_MEM_TYPE_32BIT:
   2709 	case PCI_MAPREG_TYPE_MEM | PCI_MAPREG_MEM_TYPE_64BIT:
   2710 		adapter->osdep.mem_bus_space_tag = pa->pa_memt;
   2711 		if (pci_mapreg_info(pa->pa_pc, pa->pa_tag, PCI_BAR(0),
   2712 	              memtype, &addr, &adapter->osdep.mem_size, &flags) != 0)
   2713 			goto map_err;
   2714 		if ((flags & BUS_SPACE_MAP_PREFETCHABLE) != 0) {
   2715 			aprint_normal_dev(dev, "clearing prefetchable bit\n");
   2716 			flags &= ~BUS_SPACE_MAP_PREFETCHABLE;
   2717 		}
   2718 		if (bus_space_map(adapter->osdep.mem_bus_space_tag, addr,
   2719 		     adapter->osdep.mem_size, flags,
   2720 		     &adapter->osdep.mem_bus_space_handle) != 0) {
   2721 map_err:
   2722 			adapter->osdep.mem_size = 0;
   2723 			aprint_error_dev(dev, "unable to map BAR0\n");
   2724 			return ENXIO;
   2725 		}
   2726 		break;
   2727 	default:
   2728 		aprint_error_dev(dev, "unexpected type on BAR0\n");
   2729 		return ENXIO;
   2730 	}
   2731 
   2732 	/* Legacy defaults */
   2733 	adapter->num_queues = 1;
   2734 	adapter->hw.back = &adapter->osdep;
   2735 
   2736 	/*
   2737 	** Now setup MSI or MSI/X, should
   2738 	** return us the number of supported
   2739 	** vectors. (Will be 1 for MSI)
   2740 	*/
   2741 	adapter->msix = ixgbe_setup_msix(adapter);
   2742 	return (0);
   2743 }
   2744 
   2745 static void
   2746 ixgbe_free_pci_resources(struct adapter * adapter)
   2747 {
   2748 #if defined(NETBSD_MSI_OR_MSIX)
   2749 	struct 		ix_queue *que = adapter->queues;
   2750 	device_t	dev = adapter->dev;
   2751 #endif
   2752 	int		rid;
   2753 
   2754 #if defined(NETBSD_MSI_OR_MSIX)
   2755 	int		 memrid;
   2756 	if (adapter->hw.mac.type == ixgbe_mac_82598EB)
   2757 		memrid = PCI_BAR(MSIX_82598_BAR);
   2758 	else
   2759 		memrid = PCI_BAR(MSIX_82599_BAR);
   2760 
   2761 	/*
   2762 	** There is a slight possibility of a failure mode
   2763 	** in attach that will result in entering this function
   2764 	** before interrupt resources have been initialized, and
   2765 	** in that case we do not want to execute the loops below
   2766 	** We can detect this reliably by the state of the adapter
   2767 	** res pointer.
   2768 	*/
   2769 	if (adapter->res == NULL)
   2770 		goto mem;
   2771 
   2772 	/*
   2773 	**  Release all msix queue resources:
   2774 	*/
   2775 	for (int i = 0; i < adapter->num_queues; i++, que++) {
   2776 		rid = que->msix + 1;
   2777 		if (que->tag != NULL) {
   2778 			bus_teardown_intr(dev, que->res, que->tag);
   2779 			que->tag = NULL;
   2780 		}
   2781 		if (que->res != NULL)
   2782 			bus_release_resource(dev, SYS_RES_IRQ, rid, que->res);
   2783 	}
   2784 #endif
   2785 
   2786 	/* Clean the Legacy or Link interrupt last */
   2787 	if (adapter->linkvec) /* we are doing MSIX */
   2788 		rid = adapter->linkvec + 1;
   2789 	else
   2790 		(adapter->msix != 0) ? (rid = 1):(rid = 0);
   2791 
   2792 	pci_intr_disestablish(adapter->osdep.pc, adapter->osdep.intr);
   2793 	adapter->osdep.intr = NULL;
   2794 
   2795 #if defined(NETBSD_MSI_OR_MSIX)
   2796 mem:
   2797 	if (adapter->msix)
   2798 		pci_release_msi(dev);
   2799 
   2800 	if (adapter->msix_mem != NULL)
   2801 		bus_release_resource(dev, SYS_RES_MEMORY,
   2802 		    memrid, adapter->msix_mem);
   2803 #endif
   2804 
   2805 	if (adapter->osdep.mem_size != 0) {
   2806 		bus_space_unmap(adapter->osdep.mem_bus_space_tag,
   2807 		    adapter->osdep.mem_bus_space_handle,
   2808 		    adapter->osdep.mem_size);
   2809 	}
   2810 
   2811 	return;
   2812 }
   2813 
   2814 /*********************************************************************
   2815  *
   2816  *  Setup networking device structure and register an interface.
   2817  *
   2818  **********************************************************************/
   2819 static int
   2820 ixgbe_setup_interface(device_t dev, struct adapter *adapter)
   2821 {
   2822 	struct ethercom *ec = &adapter->osdep.ec;
   2823 	struct ixgbe_hw *hw = &adapter->hw;
   2824 	struct ifnet   *ifp;
   2825 
   2826 	INIT_DEBUGOUT("ixgbe_setup_interface: begin");
   2827 
   2828 	ifp = adapter->ifp = &ec->ec_if;
   2829 	strlcpy(ifp->if_xname, device_xname(dev), IFNAMSIZ);
   2830 	ifp->if_baudrate = IF_Gbps(10);
   2831 	ifp->if_init = ixgbe_init;
   2832 	ifp->if_stop = ixgbe_ifstop;
   2833 	ifp->if_softc = adapter;
   2834 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
   2835 	ifp->if_ioctl = ixgbe_ioctl;
   2836 #ifndef IXGBE_LEGACY_TX
   2837 	ifp->if_transmit = ixgbe_mq_start;
   2838 	ifp->if_qflush = ixgbe_qflush;
   2839 #else
   2840 	ifp->if_start = ixgbe_start;
   2841 	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 2);
   2842 #endif
   2843 
   2844 	if_attach(ifp);
   2845 	ether_ifattach(ifp, adapter->hw.mac.addr);
   2846 	ether_set_ifflags_cb(ec, ixgbe_ifflags_cb);
   2847 
   2848 	adapter->max_frame_size =
   2849 	    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
   2850 
   2851 	/*
   2852 	 * Tell the upper layer(s) we support long frames.
   2853 	 */
   2854 	ifp->if_hdrlen = sizeof(struct ether_vlan_header);
   2855 
   2856 	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSOv4 | IFCAP_TSOv6;
   2857 	ifp->if_capenable = 0;
   2858 
   2859 	ec->ec_capabilities |= ETHERCAP_VLAN_HWCSUM;
   2860 	ec->ec_capabilities |= ETHERCAP_JUMBO_MTU;
   2861 	ifp->if_capabilities |= IFCAP_LRO;
   2862 	ec->ec_capabilities |= ETHERCAP_VLAN_HWTAGGING
   2863 	    		    | ETHERCAP_VLAN_MTU;
   2864 	ec->ec_capenable = ec->ec_capabilities;
   2865 
   2866 	/*
   2867 	** Don't turn this on by default, if vlans are
   2868 	** created on another pseudo device (eg. lagg)
   2869 	** then vlan events are not passed thru, breaking
   2870 	** operation, but with HW FILTER off it works. If
   2871 	** using vlans directly on the ixgbe driver you can
   2872 	** enable this and get full hardware tag filtering.
   2873 	*/
   2874 	ec->ec_capabilities |= ETHERCAP_VLAN_HWFILTER;
   2875 
   2876 	/*
   2877 	 * Specify the media types supported by this adapter and register
   2878 	 * callbacks to update media and link information
   2879 	 */
   2880 	ifmedia_init(&adapter->media, IFM_IMASK, ixgbe_media_change,
   2881 		     ixgbe_media_status);
   2882 	ifmedia_add(&adapter->media, IFM_ETHER | adapter->optics, 0, NULL);
   2883 	ifmedia_set(&adapter->media, IFM_ETHER | adapter->optics);
   2884 	if (hw->device_id == IXGBE_DEV_ID_82598AT) {
   2885 		ifmedia_add(&adapter->media,
   2886 		    IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
   2887 		ifmedia_add(&adapter->media,
   2888 		    IFM_ETHER | IFM_1000_T, 0, NULL);
   2889 	}
   2890 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
   2891 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
   2892 
   2893 	return (0);
   2894 }
   2895 
   2896 static void
   2897 ixgbe_config_link(struct adapter *adapter)
   2898 {
   2899 	struct ixgbe_hw *hw = &adapter->hw;
   2900 	u32	autoneg, err = 0;
   2901 	bool	sfp, negotiate;
   2902 
   2903 	sfp = ixgbe_is_sfp(hw);
   2904 
   2905 	if (sfp) {
   2906 		void *ip;
   2907 
   2908 		if (hw->phy.multispeed_fiber) {
   2909 			hw->mac.ops.setup_sfp(hw);
   2910 			ixgbe_enable_tx_laser(hw);
   2911 			ip = adapter->msf_si;
   2912 		} else {
   2913 			ip = adapter->mod_si;
   2914 		}
   2915 
   2916 		kpreempt_disable();
   2917 		softint_schedule(ip);
   2918 		kpreempt_enable();
   2919 	} else {
   2920 		if (hw->mac.ops.check_link)
   2921 			err = ixgbe_check_link(hw, &adapter->link_speed,
   2922 			    &adapter->link_up, FALSE);
   2923 		if (err)
   2924 			goto out;
   2925 		autoneg = hw->phy.autoneg_advertised;
   2926 		if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
   2927                 	err  = hw->mac.ops.get_link_capabilities(hw,
   2928 			    &autoneg, &negotiate);
   2929 		else
   2930 			negotiate = 0;
   2931 		if (err)
   2932 			goto out;
   2933 		if (hw->mac.ops.setup_link)
   2934                 	err = hw->mac.ops.setup_link(hw,
   2935 			    autoneg, adapter->link_up);
   2936 	}
   2937 out:
   2938 	return;
   2939 }
   2940 
   2941 /********************************************************************
   2942  * Manage DMA'able memory.
   2943  *******************************************************************/
   2944 
   2945 static int
   2946 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
   2947 		struct ixgbe_dma_alloc *dma, const int mapflags)
   2948 {
   2949 	device_t dev = adapter->dev;
   2950 	int             r, rsegs;
   2951 
   2952 	r = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
   2953 			       DBA_ALIGN, 0,	/* alignment, bounds */
   2954 			       size,	/* maxsize */
   2955 			       1,	/* nsegments */
   2956 			       size,	/* maxsegsize */
   2957 			       BUS_DMA_ALLOCNOW,	/* flags */
   2958 			       &dma->dma_tag);
   2959 	if (r != 0) {
   2960 		aprint_error_dev(dev,
   2961 		    "%s: ixgbe_dma_tag_create failed; error %d\n", __func__, r);
   2962 		goto fail_0;
   2963 	}
   2964 
   2965 	r = bus_dmamem_alloc(dma->dma_tag->dt_dmat,
   2966 		size,
   2967 		dma->dma_tag->dt_alignment,
   2968 		dma->dma_tag->dt_boundary,
   2969 		&dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
   2970 	if (r != 0) {
   2971 		aprint_error_dev(dev,
   2972 		    "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
   2973 		goto fail_1;
   2974 	}
   2975 
   2976 	r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
   2977 	    size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
   2978 	if (r != 0) {
   2979 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2980 		    __func__, r);
   2981 		goto fail_2;
   2982 	}
   2983 
   2984 	r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
   2985 	if (r != 0) {
   2986 		aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
   2987 		    __func__, r);
   2988 		goto fail_3;
   2989 	}
   2990 
   2991 	r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map, dma->dma_vaddr,
   2992 			    size,
   2993 			    NULL,
   2994 			    mapflags | BUS_DMA_NOWAIT);
   2995 	if (r != 0) {
   2996 		aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
   2997 		    __func__, r);
   2998 		goto fail_4;
   2999 	}
   3000 	dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
   3001 	dma->dma_size = size;
   3002 	return 0;
   3003 fail_4:
   3004 	ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
   3005 fail_3:
   3006 	bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
   3007 fail_2:
   3008 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
   3009 fail_1:
   3010 	ixgbe_dma_tag_destroy(dma->dma_tag);
   3011 fail_0:
   3012 	return r;
   3013 }
   3014 
   3015 static void
   3016 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
   3017 {
   3018 	bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
   3019 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   3020 	ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
   3021 	bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
   3022 	ixgbe_dma_tag_destroy(dma->dma_tag);
   3023 }
   3024 
   3025 
   3026 /*********************************************************************
   3027  *
   3028  *  Allocate memory for the transmit and receive rings, and then
   3029  *  the descriptors associated with each, called only once at attach.
   3030  *
   3031  **********************************************************************/
   3032 static int
   3033 ixgbe_allocate_queues(struct adapter *adapter)
   3034 {
   3035 	device_t	dev = adapter->dev;
   3036 	struct ix_queue	*que;
   3037 	struct tx_ring	*txr;
   3038 	struct rx_ring	*rxr;
   3039 	int rsize, tsize, error = IXGBE_SUCCESS;
   3040 	int txconf = 0, rxconf = 0;
   3041 
   3042         /* First allocate the top level queue structs */
   3043         if (!(adapter->queues =
   3044             (struct ix_queue *) malloc(sizeof(struct ix_queue) *
   3045             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   3046                 aprint_error_dev(dev, "Unable to allocate queue memory\n");
   3047                 error = ENOMEM;
   3048                 goto fail;
   3049         }
   3050 
   3051 	/* First allocate the TX ring struct memory */
   3052 	if (!(adapter->tx_rings =
   3053 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
   3054 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   3055 		aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
   3056 		error = ENOMEM;
   3057 		goto tx_fail;
   3058 	}
   3059 
   3060 	/* Next allocate the RX */
   3061 	if (!(adapter->rx_rings =
   3062 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
   3063 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   3064 		aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
   3065 		error = ENOMEM;
   3066 		goto rx_fail;
   3067 	}
   3068 
   3069 	/* For the ring itself */
   3070 	tsize = roundup2(adapter->num_tx_desc *
   3071 	    sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
   3072 
   3073 	/*
   3074 	 * Now set up the TX queues, txconf is needed to handle the
   3075 	 * possibility that things fail midcourse and we need to
   3076 	 * undo memory gracefully
   3077 	 */
   3078 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
   3079 		/* Set up some basics */
   3080 		txr = &adapter->tx_rings[i];
   3081 		txr->adapter = adapter;
   3082 		txr->me = i;
   3083 		txr->num_desc = adapter->num_tx_desc;
   3084 
   3085 		/* Initialize the TX side lock */
   3086 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
   3087 		    device_xname(dev), txr->me);
   3088 		mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
   3089 
   3090 		if (ixgbe_dma_malloc(adapter, tsize,
   3091 			&txr->txdma, BUS_DMA_NOWAIT)) {
   3092 			aprint_error_dev(dev,
   3093 			    "Unable to allocate TX Descriptor memory\n");
   3094 			error = ENOMEM;
   3095 			goto err_tx_desc;
   3096 		}
   3097 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
   3098 		bzero((void *)txr->tx_base, tsize);
   3099 
   3100         	/* Now allocate transmit buffers for the ring */
   3101         	if (ixgbe_allocate_transmit_buffers(txr)) {
   3102 			aprint_error_dev(dev,
   3103 			    "Critical Failure setting up transmit buffers\n");
   3104 			error = ENOMEM;
   3105 			goto err_tx_desc;
   3106         	}
   3107 #ifndef IXGBE_LEGACY_TX
   3108 		/* Allocate a buf ring */
   3109 		txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
   3110 		    M_WAITOK, &txr->tx_mtx);
   3111 		if (txr->br == NULL) {
   3112 			aprint_error_dev(dev,
   3113 			    "Critical Failure setting up buf ring\n");
   3114 			error = ENOMEM;
   3115 			goto err_tx_desc;
   3116         	}
   3117 #endif
   3118 	}
   3119 
   3120 	/*
   3121 	 * Next the RX queues...
   3122 	 */
   3123 	rsize = roundup2(adapter->num_rx_desc *
   3124 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   3125 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
   3126 		rxr = &adapter->rx_rings[i];
   3127 		/* Set up some basics */
   3128 		rxr->adapter = adapter;
   3129 		rxr->me = i;
   3130 		rxr->num_desc = adapter->num_rx_desc;
   3131 
   3132 		/* Initialize the RX side lock */
   3133 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
   3134 		    device_xname(dev), rxr->me);
   3135 		mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
   3136 
   3137 		if (ixgbe_dma_malloc(adapter, rsize,
   3138 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
   3139 			aprint_error_dev(dev,
   3140 			    "Unable to allocate RxDescriptor memory\n");
   3141 			error = ENOMEM;
   3142 			goto err_rx_desc;
   3143 		}
   3144 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
   3145 		bzero((void *)rxr->rx_base, rsize);
   3146 
   3147         	/* Allocate receive buffers for the ring*/
   3148 		if (ixgbe_allocate_receive_buffers(rxr)) {
   3149 			aprint_error_dev(dev,
   3150 			    "Critical Failure setting up receive buffers\n");
   3151 			error = ENOMEM;
   3152 			goto err_rx_desc;
   3153 		}
   3154 	}
   3155 
   3156 	/*
   3157 	** Finally set up the queue holding structs
   3158 	*/
   3159 	for (int i = 0; i < adapter->num_queues; i++) {
   3160 		que = &adapter->queues[i];
   3161 		que->adapter = adapter;
   3162 		que->txr = &adapter->tx_rings[i];
   3163 		que->rxr = &adapter->rx_rings[i];
   3164 	}
   3165 
   3166 	return (0);
   3167 
   3168 err_rx_desc:
   3169 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
   3170 		ixgbe_dma_free(adapter, &rxr->rxdma);
   3171 err_tx_desc:
   3172 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
   3173 		ixgbe_dma_free(adapter, &txr->txdma);
   3174 	free(adapter->rx_rings, M_DEVBUF);
   3175 rx_fail:
   3176 	free(adapter->tx_rings, M_DEVBUF);
   3177 tx_fail:
   3178 	free(adapter->queues, M_DEVBUF);
   3179 fail:
   3180 	return (error);
   3181 }
   3182 
   3183 /*********************************************************************
   3184  *
   3185  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
   3186  *  the information needed to transmit a packet on the wire. This is
   3187  *  called only once at attach, setup is done every reset.
   3188  *
   3189  **********************************************************************/
   3190 static int
   3191 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
   3192 {
   3193 	struct adapter *adapter = txr->adapter;
   3194 	device_t dev = adapter->dev;
   3195 	struct ixgbe_tx_buf *txbuf;
   3196 	int error, i;
   3197 
   3198 	/*
   3199 	 * Setup DMA descriptor areas.
   3200 	 */
   3201 	if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
   3202 			       1, 0,		/* alignment, bounds */
   3203 			       IXGBE_TSO_SIZE,		/* maxsize */
   3204 			       adapter->num_segs,	/* nsegments */
   3205 			       PAGE_SIZE,		/* maxsegsize */
   3206 			       0,			/* flags */
   3207 			       &txr->txtag))) {
   3208 		aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
   3209 		goto fail;
   3210 	}
   3211 
   3212 	if (!(txr->tx_buffers =
   3213 	    (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
   3214 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
   3215 		aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
   3216 		error = ENOMEM;
   3217 		goto fail;
   3218 	}
   3219 
   3220         /* Create the descriptor buffer dma maps */
   3221 	txbuf = txr->tx_buffers;
   3222 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
   3223 		error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
   3224 		if (error != 0) {
   3225 			aprint_error_dev(dev,
   3226 			    "Unable to create TX DMA map (%d)\n", error);
   3227 			goto fail;
   3228 		}
   3229 	}
   3230 
   3231 	return 0;
   3232 fail:
   3233 	/* We free all, it handles case where we are in the middle */
   3234 	ixgbe_free_transmit_structures(adapter);
   3235 	return (error);
   3236 }
   3237 
   3238 /*********************************************************************
   3239  *
   3240  *  Initialize a transmit ring.
   3241  *
   3242  **********************************************************************/
   3243 static void
   3244 ixgbe_setup_transmit_ring(struct tx_ring *txr)
   3245 {
   3246 	struct adapter *adapter = txr->adapter;
   3247 	struct ixgbe_tx_buf *txbuf;
   3248 	int i;
   3249 #ifdef DEV_NETMAP
   3250 	struct netmap_adapter *na = NA(adapter->ifp);
   3251 	struct netmap_slot *slot;
   3252 #endif /* DEV_NETMAP */
   3253 
   3254 	/* Clear the old ring contents */
   3255 	IXGBE_TX_LOCK(txr);
   3256 #ifdef DEV_NETMAP
   3257 	/*
   3258 	 * (under lock): if in netmap mode, do some consistency
   3259 	 * checks and set slot to entry 0 of the netmap ring.
   3260 	 */
   3261 	slot = netmap_reset(na, NR_TX, txr->me, 0);
   3262 #endif /* DEV_NETMAP */
   3263 	bzero((void *)txr->tx_base,
   3264 	      (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
   3265 	/* Reset indices */
   3266 	txr->next_avail_desc = 0;
   3267 	txr->next_to_clean = 0;
   3268 
   3269 	/* Free any existing tx buffers. */
   3270         txbuf = txr->tx_buffers;
   3271 	for (i = 0; i < txr->num_desc; i++, txbuf++) {
   3272 		if (txbuf->m_head != NULL) {
   3273 			bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
   3274 			    0, txbuf->m_head->m_pkthdr.len,
   3275 			    BUS_DMASYNC_POSTWRITE);
   3276 			ixgbe_dmamap_unload(txr->txtag, txbuf->map);
   3277 			m_freem(txbuf->m_head);
   3278 			txbuf->m_head = NULL;
   3279 		}
   3280 #ifdef DEV_NETMAP
   3281 		/*
   3282 		 * In netmap mode, set the map for the packet buffer.
   3283 		 * NOTE: Some drivers (not this one) also need to set
   3284 		 * the physical buffer address in the NIC ring.
   3285 		 * Slots in the netmap ring (indexed by "si") are
   3286 		 * kring->nkr_hwofs positions "ahead" wrt the
   3287 		 * corresponding slot in the NIC ring. In some drivers
   3288 		 * (not here) nkr_hwofs can be negative. Function
   3289 		 * netmap_idx_n2k() handles wraparounds properly.
   3290 		 */
   3291 		if (slot) {
   3292 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
   3293 			netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
   3294 		}
   3295 #endif /* DEV_NETMAP */
   3296 		/* Clear the EOP descriptor pointer */
   3297 		txbuf->eop = NULL;
   3298         }
   3299 
   3300 #ifdef IXGBE_FDIR
   3301 	/* Set the rate at which we sample packets */
   3302 	if (adapter->hw.mac.type != ixgbe_mac_82598EB)
   3303 		txr->atr_sample = atr_sample_rate;
   3304 #endif
   3305 
   3306 	/* Set number of descriptors available */
   3307 	txr->tx_avail = adapter->num_tx_desc;
   3308 
   3309 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   3310 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   3311 	IXGBE_TX_UNLOCK(txr);
   3312 }
   3313 
   3314 /*********************************************************************
   3315  *
   3316  *  Initialize all transmit rings.
   3317  *
   3318  **********************************************************************/
   3319 static int
   3320 ixgbe_setup_transmit_structures(struct adapter *adapter)
   3321 {
   3322 	struct tx_ring *txr = adapter->tx_rings;
   3323 
   3324 	for (int i = 0; i < adapter->num_queues; i++, txr++)
   3325 		ixgbe_setup_transmit_ring(txr);
   3326 
   3327 	return (0);
   3328 }
   3329 
   3330 /*********************************************************************
   3331  *
   3332  *  Enable transmit unit.
   3333  *
   3334  **********************************************************************/
   3335 static void
   3336 ixgbe_initialize_transmit_units(struct adapter *adapter)
   3337 {
   3338 	struct tx_ring	*txr = adapter->tx_rings;
   3339 	struct ixgbe_hw	*hw = &adapter->hw;
   3340 
   3341 	/* Setup the Base and Length of the Tx Descriptor Ring */
   3342 
   3343 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
   3344 		u64	tdba = txr->txdma.dma_paddr;
   3345 		u32	txctrl;
   3346 
   3347 		IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i),
   3348 		       (tdba & 0x00000000ffffffffULL));
   3349 		IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (tdba >> 32));
   3350 		IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
   3351 		    adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc));
   3352 
   3353 		/* Setup the HW Tx Head and Tail descriptor pointers */
   3354 		IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
   3355 		IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
   3356 
   3357 		/* Setup Transmit Descriptor Cmd Settings */
   3358 		txr->txd_cmd = IXGBE_TXD_CMD_IFCS;
   3359 		txr->queue_status = IXGBE_QUEUE_IDLE;
   3360 
   3361 		/* Set the processing limit */
   3362 		txr->process_limit = ixgbe_tx_process_limit;
   3363 
   3364 		/* Disable Head Writeback */
   3365 		switch (hw->mac.type) {
   3366 		case ixgbe_mac_82598EB:
   3367 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
   3368 			break;
   3369 		case ixgbe_mac_82599EB:
   3370 		case ixgbe_mac_X540:
   3371 		default:
   3372 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
   3373 			break;
   3374                 }
   3375 		txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
   3376 		switch (hw->mac.type) {
   3377 		case ixgbe_mac_82598EB:
   3378 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
   3379 			break;
   3380 		case ixgbe_mac_82599EB:
   3381 		case ixgbe_mac_X540:
   3382 		default:
   3383 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
   3384 			break;
   3385 		}
   3386 
   3387 	}
   3388 
   3389 	if (hw->mac.type != ixgbe_mac_82598EB) {
   3390 		u32 dmatxctl, rttdcs;
   3391 		dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
   3392 		dmatxctl |= IXGBE_DMATXCTL_TE;
   3393 		IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
   3394 		/* Disable arbiter to set MTQC */
   3395 		rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
   3396 		rttdcs |= IXGBE_RTTDCS_ARBDIS;
   3397 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
   3398 		IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
   3399 		rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
   3400 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
   3401 	}
   3402 
   3403 	return;
   3404 }
   3405 
   3406 /*********************************************************************
   3407  *
   3408  *  Free all transmit rings.
   3409  *
   3410  **********************************************************************/
   3411 static void
   3412 ixgbe_free_transmit_structures(struct adapter *adapter)
   3413 {
   3414 	struct tx_ring *txr = adapter->tx_rings;
   3415 
   3416 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
   3417 		ixgbe_free_transmit_buffers(txr);
   3418 		ixgbe_dma_free(adapter, &txr->txdma);
   3419 		IXGBE_TX_LOCK_DESTROY(txr);
   3420 	}
   3421 	free(adapter->tx_rings, M_DEVBUF);
   3422 }
   3423 
   3424 /*********************************************************************
   3425  *
   3426  *  Free transmit ring related data structures.
   3427  *
   3428  **********************************************************************/
   3429 static void
   3430 ixgbe_free_transmit_buffers(struct tx_ring *txr)
   3431 {
   3432 	struct adapter *adapter = txr->adapter;
   3433 	struct ixgbe_tx_buf *tx_buffer;
   3434 	int             i;
   3435 
   3436 	INIT_DEBUGOUT("free_transmit_ring: begin");
   3437 
   3438 	if (txr->tx_buffers == NULL)
   3439 		return;
   3440 
   3441 	tx_buffer = txr->tx_buffers;
   3442 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
   3443 		if (tx_buffer->m_head != NULL) {
   3444 			bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
   3445 			    0, tx_buffer->m_head->m_pkthdr.len,
   3446 			    BUS_DMASYNC_POSTWRITE);
   3447 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
   3448 			m_freem(tx_buffer->m_head);
   3449 			tx_buffer->m_head = NULL;
   3450 			if (tx_buffer->map != NULL) {
   3451 				ixgbe_dmamap_destroy(txr->txtag,
   3452 				    tx_buffer->map);
   3453 				tx_buffer->map = NULL;
   3454 			}
   3455 		} else if (tx_buffer->map != NULL) {
   3456 			ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
   3457 			ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
   3458 			tx_buffer->map = NULL;
   3459 		}
   3460 	}
   3461 #ifndef IXGBE_LEGACY_TX
   3462 	if (txr->br != NULL)
   3463 		buf_ring_free(txr->br, M_DEVBUF);
   3464 #endif
   3465 	if (txr->tx_buffers != NULL) {
   3466 		free(txr->tx_buffers, M_DEVBUF);
   3467 		txr->tx_buffers = NULL;
   3468 	}
   3469 	if (txr->txtag != NULL) {
   3470 		ixgbe_dma_tag_destroy(txr->txtag);
   3471 		txr->txtag = NULL;
   3472 	}
   3473 	return;
   3474 }
   3475 
   3476 /*********************************************************************
   3477  *
   3478  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
   3479  *
   3480  **********************************************************************/
   3481 
   3482 static int
   3483 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
   3484     u32 *cmd_type_len, u32 *olinfo_status)
   3485 {
   3486 	struct m_tag *mtag;
   3487 	struct adapter *adapter = txr->adapter;
   3488 	struct ethercom *ec = &adapter->osdep.ec;
   3489 	struct ixgbe_adv_tx_context_desc *TXD;
   3490 	struct ether_vlan_header *eh;
   3491 	struct ip ip;
   3492 	struct ip6_hdr ip6;
   3493 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
   3494 	int	ehdrlen, ip_hlen = 0;
   3495 	u16	etype;
   3496 	u8	ipproto __diagused = 0;
   3497 	int	offload = TRUE;
   3498 	int	ctxd = txr->next_avail_desc;
   3499 	u16	vtag = 0;
   3500 
   3501 	/* First check if TSO is to be used */
   3502 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4|M_CSUM_TSOv6))
   3503 		return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
   3504 
   3505 	if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
   3506 		offload = FALSE;
   3507 
   3508 	/* Indicate the whole packet as payload when not doing TSO */
   3509        	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
   3510 
   3511 	/* Now ready a context descriptor */
   3512 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
   3513 
   3514 	/*
   3515 	** In advanced descriptors the vlan tag must
   3516 	** be placed into the context descriptor. Hence
   3517 	** we need to make one even if not doing offloads.
   3518 	*/
   3519 	if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
   3520 		vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
   3521 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
   3522 	} else if (offload == FALSE) /* ... no offload to do */
   3523 		return 0;
   3524 
   3525 	/*
   3526 	 * Determine where frame payload starts.
   3527 	 * Jump over vlan headers if already present,
   3528 	 * helpful for QinQ too.
   3529 	 */
   3530 	KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
   3531 	eh = mtod(mp, struct ether_vlan_header *);
   3532 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   3533 		KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
   3534 		etype = ntohs(eh->evl_proto);
   3535 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   3536 	} else {
   3537 		etype = ntohs(eh->evl_encap_proto);
   3538 		ehdrlen = ETHER_HDR_LEN;
   3539 	}
   3540 
   3541 	/* Set the ether header length */
   3542 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
   3543 
   3544 	switch (etype) {
   3545 	case ETHERTYPE_IP:
   3546 		m_copydata(mp, ehdrlen, sizeof(ip), &ip);
   3547 		ip_hlen = ip.ip_hl << 2;
   3548 		ipproto = ip.ip_p;
   3549 #if 0
   3550 		ip.ip_sum = 0;
   3551 		m_copyback(mp, ehdrlen, sizeof(ip), &ip);
   3552 #else
   3553 		KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
   3554 		    ip.ip_sum == 0);
   3555 #endif
   3556 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
   3557 		break;
   3558 	case ETHERTYPE_IPV6:
   3559 		m_copydata(mp, ehdrlen, sizeof(ip6), &ip6);
   3560 		ip_hlen = sizeof(ip6);
   3561 		/* XXX-BZ this will go badly in case of ext hdrs. */
   3562 		ipproto = ip6.ip6_nxt;
   3563 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
   3564 		break;
   3565 	default:
   3566 		break;
   3567 	}
   3568 
   3569 	if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
   3570 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
   3571 
   3572 	vlan_macip_lens |= ip_hlen;
   3573 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
   3574 
   3575 	if (mp->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_TCPv6)) {
   3576 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
   3577 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   3578 		KASSERT(ipproto == IPPROTO_TCP);
   3579 	} else if (mp->m_pkthdr.csum_flags & (M_CSUM_UDPv4|M_CSUM_UDPv6)) {
   3580 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
   3581 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   3582 		KASSERT(ipproto == IPPROTO_UDP);
   3583 	}
   3584 
   3585 	/* Now copy bits into descriptor */
   3586 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
   3587 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
   3588 	TXD->seqnum_seed = htole32(0);
   3589 	TXD->mss_l4len_idx = htole32(0);
   3590 
   3591 	/* We've consumed the first desc, adjust counters */
   3592 	if (++ctxd == txr->num_desc)
   3593 		ctxd = 0;
   3594 	txr->next_avail_desc = ctxd;
   3595 	--txr->tx_avail;
   3596 
   3597         return 0;
   3598 }
   3599 
   3600 /**********************************************************************
   3601  *
   3602  *  Setup work for hardware segmentation offload (TSO) on
   3603  *  adapters using advanced tx descriptors
   3604  *
   3605  **********************************************************************/
   3606 static int
   3607 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
   3608     u32 *cmd_type_len, u32 *olinfo_status)
   3609 {
   3610 	struct m_tag *mtag;
   3611 	struct adapter *adapter = txr->adapter;
   3612 	struct ethercom *ec = &adapter->osdep.ec;
   3613 	struct ixgbe_adv_tx_context_desc *TXD;
   3614 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
   3615 	u32 mss_l4len_idx = 0, paylen;
   3616 	u16 vtag = 0, eh_type;
   3617 	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
   3618 	struct ether_vlan_header *eh;
   3619 #ifdef INET6
   3620 	struct ip6_hdr *ip6;
   3621 #endif
   3622 #ifdef INET
   3623 	struct ip *ip;
   3624 #endif
   3625 	struct tcphdr *th;
   3626 
   3627 
   3628 	/*
   3629 	 * Determine where frame payload starts.
   3630 	 * Jump over vlan headers if already present
   3631 	 */
   3632 	eh = mtod(mp, struct ether_vlan_header *);
   3633 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   3634 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   3635 		eh_type = eh->evl_proto;
   3636 	} else {
   3637 		ehdrlen = ETHER_HDR_LEN;
   3638 		eh_type = eh->evl_encap_proto;
   3639 	}
   3640 
   3641 	switch (ntohs(eh_type)) {
   3642 #ifdef INET6
   3643 	case ETHERTYPE_IPV6:
   3644 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   3645 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
   3646 		if (ip6->ip6_nxt != IPPROTO_TCP)
   3647 			return (ENXIO);
   3648 		ip_hlen = sizeof(struct ip6_hdr);
   3649 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
   3650 		th = (struct tcphdr *)((char *)ip6 + ip_hlen);
   3651 		th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
   3652 		    &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
   3653 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
   3654 		break;
   3655 #endif
   3656 #ifdef INET
   3657 	case ETHERTYPE_IP:
   3658 		ip = (struct ip *)(mp->m_data + ehdrlen);
   3659 		if (ip->ip_p != IPPROTO_TCP)
   3660 			return (ENXIO);
   3661 		ip->ip_sum = 0;
   3662 		ip_hlen = ip->ip_hl << 2;
   3663 		th = (struct tcphdr *)((char *)ip + ip_hlen);
   3664 		th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
   3665 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
   3666 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
   3667 		/* Tell transmit desc to also do IPv4 checksum. */
   3668 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
   3669 		break;
   3670 #endif
   3671 	default:
   3672 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
   3673 		    __func__, ntohs(eh_type));
   3674 		break;
   3675 	}
   3676 
   3677 	ctxd = txr->next_avail_desc;
   3678 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
   3679 
   3680 	tcp_hlen = th->th_off << 2;
   3681 
   3682 	/* This is used in the transmit desc in encap */
   3683 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
   3684 
   3685 	/* VLAN MACLEN IPLEN */
   3686 	if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
   3687 		vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
   3688                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
   3689 	}
   3690 
   3691 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
   3692 	vlan_macip_lens |= ip_hlen;
   3693 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
   3694 
   3695 	/* ADV DTYPE TUCMD */
   3696 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
   3697 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
   3698 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
   3699 
   3700 	/* MSS L4LEN IDX */
   3701 	mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
   3702 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
   3703 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
   3704 
   3705 	TXD->seqnum_seed = htole32(0);
   3706 
   3707 	if (++ctxd == txr->num_desc)
   3708 		ctxd = 0;
   3709 
   3710 	txr->tx_avail--;
   3711 	txr->next_avail_desc = ctxd;
   3712 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
   3713 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
   3714 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
   3715 	++txr->tso_tx.ev_count;
   3716 	return (0);
   3717 }
   3718 
   3719 #ifdef IXGBE_FDIR
   3720 /*
   3721 ** This routine parses packet headers so that Flow
   3722 ** Director can make a hashed filter table entry
   3723 ** allowing traffic flows to be identified and kept
   3724 ** on the same cpu.  This would be a performance
   3725 ** hit, but we only do it at IXGBE_FDIR_RATE of
   3726 ** packets.
   3727 */
   3728 static void
   3729 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
   3730 {
   3731 	struct adapter			*adapter = txr->adapter;
   3732 	struct ix_queue			*que;
   3733 	struct ip			*ip;
   3734 	struct tcphdr			*th;
   3735 	struct udphdr			*uh;
   3736 	struct ether_vlan_header	*eh;
   3737 	union ixgbe_atr_hash_dword	input = {.dword = 0};
   3738 	union ixgbe_atr_hash_dword	common = {.dword = 0};
   3739 	int  				ehdrlen, ip_hlen;
   3740 	u16				etype;
   3741 
   3742 	eh = mtod(mp, struct ether_vlan_header *);
   3743 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
   3744 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
   3745 		etype = eh->evl_proto;
   3746 	} else {
   3747 		ehdrlen = ETHER_HDR_LEN;
   3748 		etype = eh->evl_encap_proto;
   3749 	}
   3750 
   3751 	/* Only handling IPv4 */
   3752 	if (etype != htons(ETHERTYPE_IP))
   3753 		return;
   3754 
   3755 	ip = (struct ip *)(mp->m_data + ehdrlen);
   3756 	ip_hlen = ip->ip_hl << 2;
   3757 
   3758 	/* check if we're UDP or TCP */
   3759 	switch (ip->ip_p) {
   3760 	case IPPROTO_TCP:
   3761 		th = (struct tcphdr *)((char *)ip + ip_hlen);
   3762 		/* src and dst are inverted */
   3763 		common.port.dst ^= th->th_sport;
   3764 		common.port.src ^= th->th_dport;
   3765 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
   3766 		break;
   3767 	case IPPROTO_UDP:
   3768 		uh = (struct udphdr *)((char *)ip + ip_hlen);
   3769 		/* src and dst are inverted */
   3770 		common.port.dst ^= uh->uh_sport;
   3771 		common.port.src ^= uh->uh_dport;
   3772 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
   3773 		break;
   3774 	default:
   3775 		return;
   3776 	}
   3777 
   3778 	input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
   3779 	if (mp->m_pkthdr.ether_vtag)
   3780 		common.flex_bytes ^= htons(ETHERTYPE_VLAN);
   3781 	else
   3782 		common.flex_bytes ^= etype;
   3783 	common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
   3784 
   3785 	que = &adapter->queues[txr->me];
   3786 	/*
   3787 	** This assumes the Rx queue and Tx
   3788 	** queue are bound to the same CPU
   3789 	*/
   3790 	ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
   3791 	    input, common, que->msix);
   3792 }
   3793 #endif /* IXGBE_FDIR */
   3794 
   3795 /**********************************************************************
   3796  *
   3797  *  Examine each tx_buffer in the used queue. If the hardware is done
   3798  *  processing the packet then free associated resources. The
   3799  *  tx_buffer is put back on the free queue.
   3800  *
   3801  **********************************************************************/
   3802 static bool
   3803 ixgbe_txeof(struct tx_ring *txr)
   3804 {
   3805 	struct adapter		*adapter = txr->adapter;
   3806 	struct ifnet		*ifp = adapter->ifp;
   3807 	u32			work, processed = 0;
   3808 	u16			limit = txr->process_limit;
   3809 	struct ixgbe_tx_buf	*buf;
   3810 	union ixgbe_adv_tx_desc *txd;
   3811 	struct timeval now, elapsed;
   3812 
   3813 	KASSERT(mutex_owned(&txr->tx_mtx));
   3814 
   3815 #ifdef DEV_NETMAP
   3816 	if (ifp->if_capenable & IFCAP_NETMAP) {
   3817 		struct netmap_adapter *na = NA(ifp);
   3818 		struct netmap_kring *kring = &na->tx_rings[txr->me];
   3819 		txd = txr->tx_base;
   3820 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   3821 		    BUS_DMASYNC_POSTREAD);
   3822 		/*
   3823 		 * In netmap mode, all the work is done in the context
   3824 		 * of the client thread. Interrupt handlers only wake up
   3825 		 * clients, which may be sleeping on individual rings
   3826 		 * or on a global resource for all rings.
   3827 		 * To implement tx interrupt mitigation, we wake up the client
   3828 		 * thread roughly every half ring, even if the NIC interrupts
   3829 		 * more frequently. This is implemented as follows:
   3830 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
   3831 		 *   the slot that should wake up the thread (nkr_num_slots
   3832 		 *   means the user thread should not be woken up);
   3833 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
   3834 		 *   or the slot has the DD bit set.
   3835 		 *
   3836 		 * When the driver has separate locks, we need to
   3837 		 * release and re-acquire txlock to avoid deadlocks.
   3838 		 * XXX see if we can find a better way.
   3839 		 */
   3840 		if (!netmap_mitigate ||
   3841 		    (kring->nr_kflags < kring->nkr_num_slots &&
   3842 		    txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
   3843 			netmap_tx_irq(ifp, txr->me |
   3844 			    (NETMAP_LOCKED_ENTER|NETMAP_LOCKED_EXIT));
   3845 		}
   3846 		return FALSE;
   3847 	}
   3848 #endif /* DEV_NETMAP */
   3849 
   3850 	if (txr->tx_avail == txr->num_desc) {
   3851 		txr->queue_status = IXGBE_QUEUE_IDLE;
   3852 		return false;
   3853 	}
   3854 
   3855 	/* Get work starting point */
   3856 	work = txr->next_to_clean;
   3857 	buf = &txr->tx_buffers[work];
   3858 	txd = &txr->tx_base[work];
   3859 	work -= txr->num_desc; /* The distance to ring end */
   3860         ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   3861 	    BUS_DMASYNC_POSTREAD);
   3862 	do {
   3863 		union ixgbe_adv_tx_desc *eop= buf->eop;
   3864 		if (eop == NULL) /* No work */
   3865 			break;
   3866 
   3867 		if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
   3868 			break;	/* I/O not complete */
   3869 
   3870 		if (buf->m_head) {
   3871 			txr->bytes +=
   3872 			    buf->m_head->m_pkthdr.len;
   3873 			bus_dmamap_sync(txr->txtag->dt_dmat,
   3874 			    buf->map,
   3875 			    0, buf->m_head->m_pkthdr.len,
   3876 			    BUS_DMASYNC_POSTWRITE);
   3877 			ixgbe_dmamap_unload(txr->txtag,
   3878 			    buf->map);
   3879 			m_freem(buf->m_head);
   3880 			buf->m_head = NULL;
   3881 			/*
   3882 			 * NetBSD: Don't override buf->map with NULL here.
   3883 			 * It'll panic when a ring runs one lap around.
   3884 			 */
   3885 		}
   3886 		buf->eop = NULL;
   3887 		++txr->tx_avail;
   3888 
   3889 		/* We clean the range if multi segment */
   3890 		while (txd != eop) {
   3891 			++txd;
   3892 			++buf;
   3893 			++work;
   3894 			/* wrap the ring? */
   3895 			if (__predict_false(!work)) {
   3896 				work -= txr->num_desc;
   3897 				buf = txr->tx_buffers;
   3898 				txd = txr->tx_base;
   3899 			}
   3900 			if (buf->m_head) {
   3901 				txr->bytes +=
   3902 				    buf->m_head->m_pkthdr.len;
   3903 				bus_dmamap_sync(txr->txtag->dt_dmat,
   3904 				    buf->map,
   3905 				    0, buf->m_head->m_pkthdr.len,
   3906 				    BUS_DMASYNC_POSTWRITE);
   3907 				ixgbe_dmamap_unload(txr->txtag,
   3908 				    buf->map);
   3909 				m_freem(buf->m_head);
   3910 				buf->m_head = NULL;
   3911 				/*
   3912 				 * NetBSD: Don't override buf->map with NULL
   3913 				 * here. It'll panic when a ring runs one lap
   3914 				 * around.
   3915 				 */
   3916 			}
   3917 			++txr->tx_avail;
   3918 			buf->eop = NULL;
   3919 
   3920 		}
   3921 		++txr->packets;
   3922 		++processed;
   3923 		++ifp->if_opackets;
   3924 		getmicrotime(&txr->watchdog_time);
   3925 
   3926 		/* Try the next packet */
   3927 		++txd;
   3928 		++buf;
   3929 		++work;
   3930 		/* reset with a wrap */
   3931 		if (__predict_false(!work)) {
   3932 			work -= txr->num_desc;
   3933 			buf = txr->tx_buffers;
   3934 			txd = txr->tx_base;
   3935 		}
   3936 		prefetch(txd);
   3937 	} while (__predict_true(--limit));
   3938 
   3939 	ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
   3940 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   3941 
   3942 	work += txr->num_desc;
   3943 	txr->next_to_clean = work;
   3944 
   3945 	/*
   3946 	** Watchdog calculation, we know there's
   3947 	** work outstanding or the first return
   3948 	** would have been taken, so none processed
   3949 	** for too long indicates a hang.
   3950 	*/
   3951 	getmicrotime(&now);
   3952 	timersub(&now, &txr->watchdog_time, &elapsed);
   3953 	if (!processed && tvtohz(&elapsed) > IXGBE_WATCHDOG)
   3954 		txr->queue_status = IXGBE_QUEUE_HUNG;
   3955 
   3956 	if (txr->tx_avail == txr->num_desc) {
   3957 		txr->queue_status = IXGBE_QUEUE_IDLE;
   3958 		return false;
   3959 	}
   3960 
   3961 	return true;
   3962 }
   3963 
   3964 /*********************************************************************
   3965  *
   3966  *  Refresh mbuf buffers for RX descriptor rings
   3967  *   - now keeps its own state so discards due to resource
   3968  *     exhaustion are unnecessary, if an mbuf cannot be obtained
   3969  *     it just returns, keeping its placeholder, thus it can simply
   3970  *     be recalled to try again.
   3971  *
   3972  **********************************************************************/
   3973 static void
   3974 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
   3975 {
   3976 	struct adapter		*adapter = rxr->adapter;
   3977 	struct ixgbe_rx_buf	*rxbuf;
   3978 	struct mbuf		*mp;
   3979 	int			i, j, error;
   3980 	bool			refreshed = false;
   3981 
   3982 	i = j = rxr->next_to_refresh;
   3983 	/* Control the loop with one beyond */
   3984 	if (++j == rxr->num_desc)
   3985 		j = 0;
   3986 
   3987 	while (j != limit) {
   3988 		rxbuf = &rxr->rx_buffers[i];
   3989 		if (rxbuf->buf == NULL) {
   3990 			mp = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
   3991 			    MT_DATA, M_PKTHDR, rxr->mbuf_sz);
   3992 			if (mp == NULL) {
   3993 				rxr->no_jmbuf.ev_count++;
   3994 				goto update;
   3995 			}
   3996 			if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
   3997 				m_adj(mp, ETHER_ALIGN);
   3998 		} else
   3999 			mp = rxbuf->buf;
   4000 
   4001 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   4002 		/* If we're dealing with an mbuf that was copied rather
   4003 		 * than replaced, there's no need to go through busdma.
   4004 		 */
   4005 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
   4006 			/* Get the memory mapping */
   4007 			error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   4008 			    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   4009 			if (error != 0) {
   4010 				printf("Refresh mbufs: payload dmamap load"
   4011 				    " failure - %d\n", error);
   4012 				m_free(mp);
   4013 				rxbuf->buf = NULL;
   4014 				goto update;
   4015 			}
   4016 			rxbuf->buf = mp;
   4017 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   4018 			    0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
   4019 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
   4020 			    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   4021 		} else {
   4022 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
   4023 			rxbuf->flags &= ~IXGBE_RX_COPY;
   4024 		}
   4025 
   4026 		refreshed = true;
   4027 		/* Next is precalculated */
   4028 		i = j;
   4029 		rxr->next_to_refresh = i;
   4030 		if (++j == rxr->num_desc)
   4031 			j = 0;
   4032 	}
   4033 update:
   4034 	if (refreshed) /* Update hardware tail index */
   4035 		IXGBE_WRITE_REG(&adapter->hw,
   4036 		    IXGBE_RDT(rxr->me), rxr->next_to_refresh);
   4037 	return;
   4038 }
   4039 
   4040 /*********************************************************************
   4041  *
   4042  *  Allocate memory for rx_buffer structures. Since we use one
   4043  *  rx_buffer per received packet, the maximum number of rx_buffer's
   4044  *  that we'll need is equal to the number of receive descriptors
   4045  *  that we've allocated.
   4046  *
   4047  **********************************************************************/
   4048 static int
   4049 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
   4050 {
   4051 	struct	adapter 	*adapter = rxr->adapter;
   4052 	device_t 		dev = adapter->dev;
   4053 	struct ixgbe_rx_buf 	*rxbuf;
   4054 	int             	i, bsize, error;
   4055 
   4056 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
   4057 	if (!(rxr->rx_buffers =
   4058 	    (struct ixgbe_rx_buf *) malloc(bsize,
   4059 	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
   4060 		aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
   4061 		error = ENOMEM;
   4062 		goto fail;
   4063 	}
   4064 
   4065 	if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat,	/* parent */
   4066 				   1, 0,	/* alignment, bounds */
   4067 				   MJUM16BYTES,		/* maxsize */
   4068 				   1,			/* nsegments */
   4069 				   MJUM16BYTES,		/* maxsegsize */
   4070 				   0,			/* flags */
   4071 				   &rxr->ptag))) {
   4072 		aprint_error_dev(dev, "Unable to create RX DMA tag\n");
   4073 		goto fail;
   4074 	}
   4075 
   4076 	for (i = 0; i < rxr->num_desc; i++, rxbuf++) {
   4077 		rxbuf = &rxr->rx_buffers[i];
   4078 		error = ixgbe_dmamap_create(rxr->ptag,
   4079 		    BUS_DMA_NOWAIT, &rxbuf->pmap);
   4080 		if (error) {
   4081 			aprint_error_dev(dev, "Unable to create RX dma map\n");
   4082 			goto fail;
   4083 		}
   4084 	}
   4085 
   4086 	return (0);
   4087 
   4088 fail:
   4089 	/* Frees all, but can handle partial completion */
   4090 	ixgbe_free_receive_structures(adapter);
   4091 	return (error);
   4092 }
   4093 
   4094 /*
   4095 ** Used to detect a descriptor that has
   4096 ** been merged by Hardware RSC.
   4097 */
   4098 static inline u32
   4099 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
   4100 {
   4101 	return (le32toh(rx->wb.lower.lo_dword.data) &
   4102 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
   4103 }
   4104 
   4105 /*********************************************************************
   4106  *
   4107  *  Initialize Hardware RSC (LRO) feature on 82599
   4108  *  for an RX ring, this is toggled by the LRO capability
   4109  *  even though it is transparent to the stack.
   4110  *
   4111  *  NOTE: since this HW feature only works with IPV4 and
   4112  *        our testing has shown soft LRO to be as effective
   4113  *        I have decided to disable this by default.
   4114  *
   4115  **********************************************************************/
   4116 static void
   4117 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
   4118 {
   4119 	struct	adapter 	*adapter = rxr->adapter;
   4120 	struct	ixgbe_hw	*hw = &adapter->hw;
   4121 	u32			rscctrl, rdrxctl;
   4122 
   4123 	/* If turning LRO/RSC off we need to disable it */
   4124 	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
   4125 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   4126 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
   4127 		return;
   4128 	}
   4129 
   4130 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
   4131 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
   4132 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
   4133 	if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
   4134 #endif /* DEV_NETMAP */
   4135 	rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
   4136 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
   4137 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
   4138 
   4139 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
   4140 	rscctrl |= IXGBE_RSCCTL_RSCEN;
   4141 	/*
   4142 	** Limit the total number of descriptors that
   4143 	** can be combined, so it does not exceed 64K
   4144 	*/
   4145 	if (rxr->mbuf_sz == MCLBYTES)
   4146 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
   4147 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
   4148 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
   4149 	else if (rxr->mbuf_sz == MJUM9BYTES)
   4150 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
   4151 	else  /* Using 16K cluster */
   4152 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
   4153 
   4154 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
   4155 
   4156 	/* Enable TCP header recognition */
   4157 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
   4158 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
   4159 	    IXGBE_PSRTYPE_TCPHDR));
   4160 
   4161 	/* Disable RSC for ACK packets */
   4162 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
   4163 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
   4164 
   4165 	rxr->hw_rsc = TRUE;
   4166 }
   4167 
   4168 
   4169 static void
   4170 ixgbe_free_receive_ring(struct rx_ring *rxr)
   4171 {
   4172 	struct ixgbe_rx_buf       *rxbuf;
   4173 	int i;
   4174 
   4175 	for (i = 0; i < rxr->num_desc; i++) {
   4176 		rxbuf = &rxr->rx_buffers[i];
   4177 		if (rxbuf->buf != NULL) {
   4178 			bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   4179 			    0, rxbuf->buf->m_pkthdr.len,
   4180 			    BUS_DMASYNC_POSTREAD);
   4181 			ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   4182 			rxbuf->buf->m_flags |= M_PKTHDR;
   4183 			m_freem(rxbuf->buf);
   4184 			rxbuf->buf = NULL;
   4185 		}
   4186 	}
   4187 }
   4188 
   4189 
   4190 /*********************************************************************
   4191  *
   4192  *  Initialize a receive ring and its buffers.
   4193  *
   4194  **********************************************************************/
   4195 static int
   4196 ixgbe_setup_receive_ring(struct rx_ring *rxr)
   4197 {
   4198 	struct	adapter 	*adapter;
   4199 	struct ixgbe_rx_buf	*rxbuf;
   4200 #ifdef LRO
   4201 	struct ifnet		*ifp;
   4202 	struct lro_ctrl		*lro = &rxr->lro;
   4203 #endif /* LRO */
   4204 	int			rsize, error = 0;
   4205 #ifdef DEV_NETMAP
   4206 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
   4207 	struct netmap_slot *slot;
   4208 #endif /* DEV_NETMAP */
   4209 
   4210 	adapter = rxr->adapter;
   4211 #ifdef LRO
   4212 	ifp = adapter->ifp;
   4213 #endif /* LRO */
   4214 
   4215 	/* Clear the ring contents */
   4216 	IXGBE_RX_LOCK(rxr);
   4217 #ifdef DEV_NETMAP
   4218 	/* same as in ixgbe_setup_transmit_ring() */
   4219 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
   4220 #endif /* DEV_NETMAP */
   4221 	rsize = roundup2(adapter->num_rx_desc *
   4222 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
   4223 	bzero((void *)rxr->rx_base, rsize);
   4224 	/* Cache the size */
   4225 	rxr->mbuf_sz = adapter->rx_mbuf_sz;
   4226 
   4227 	/* Free current RX buffer structs and their mbufs */
   4228 	ixgbe_free_receive_ring(rxr);
   4229 
   4230 	IXGBE_RX_UNLOCK(rxr);
   4231 
   4232 	/* Now reinitialize our supply of jumbo mbufs.  The number
   4233 	 * or size of jumbo mbufs may have changed.
   4234 	 */
   4235 	ixgbe_jcl_reinit(&adapter->jcl_head, rxr->ptag->dt_dmat,
   4236 	    2 * adapter->num_rx_desc, adapter->rx_mbuf_sz);
   4237 
   4238 	IXGBE_RX_LOCK(rxr);
   4239 
   4240 	/* Now replenish the mbufs */
   4241 	for (int j = 0; j != rxr->num_desc; ++j) {
   4242 		struct mbuf	*mp;
   4243 
   4244 		rxbuf = &rxr->rx_buffers[j];
   4245 #ifdef DEV_NETMAP
   4246 		/*
   4247 		 * In netmap mode, fill the map and set the buffer
   4248 		 * address in the NIC ring, considering the offset
   4249 		 * between the netmap and NIC rings (see comment in
   4250 		 * ixgbe_setup_transmit_ring() ). No need to allocate
   4251 		 * an mbuf, so end the block with a continue;
   4252 		 */
   4253 		if (slot) {
   4254 			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
   4255 			uint64_t paddr;
   4256 			void *addr;
   4257 
   4258 			addr = PNMB(slot + sj, &paddr);
   4259 			netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
   4260 			/* Update descriptor */
   4261 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
   4262 			continue;
   4263 		}
   4264 #endif /* DEV_NETMAP */
   4265 		rxbuf->buf = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
   4266 		    MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
   4267 		if (rxbuf->buf == NULL) {
   4268 			error = ENOBUFS;
   4269                         goto fail;
   4270 		}
   4271 		mp = rxbuf->buf;
   4272 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
   4273 		/* Get the memory mapping */
   4274 		error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
   4275 		    rxbuf->pmap, mp, BUS_DMA_NOWAIT);
   4276 		if (error != 0)
   4277                         goto fail;
   4278 		bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
   4279 		    0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
   4280 		/* Update descriptor */
   4281 		rxr->rx_base[j].read.pkt_addr =
   4282 		    htole64(rxbuf->pmap->dm_segs[0].ds_addr);
   4283 	}
   4284 
   4285 
   4286 	/* Setup our descriptor indices */
   4287 	rxr->next_to_check = 0;
   4288 	rxr->next_to_refresh = 0;
   4289 	rxr->lro_enabled = FALSE;
   4290 	rxr->rx_copies.ev_count = 0;
   4291 	rxr->rx_bytes.ev_count = 0;
   4292 	rxr->discard = FALSE;
   4293 	rxr->vtag_strip = FALSE;
   4294 
   4295 	ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   4296 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   4297 
   4298 	/*
   4299 	** Now set up the LRO interface:
   4300 	*/
   4301 	if (ixgbe_rsc_enable)
   4302 		ixgbe_setup_hw_rsc(rxr);
   4303 #ifdef LRO
   4304 	else if (ifp->if_capenable & IFCAP_LRO) {
   4305 		device_t dev = adapter->dev;
   4306 		int err = tcp_lro_init(lro);
   4307 		if (err) {
   4308 			device_printf(dev, "LRO Initialization failed!\n");
   4309 			goto fail;
   4310 		}
   4311 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
   4312 		rxr->lro_enabled = TRUE;
   4313 		lro->ifp = adapter->ifp;
   4314 	}
   4315 #endif /* LRO */
   4316 
   4317 	IXGBE_RX_UNLOCK(rxr);
   4318 	return (0);
   4319 
   4320 fail:
   4321 	ixgbe_free_receive_ring(rxr);
   4322 	IXGBE_RX_UNLOCK(rxr);
   4323 	return (error);
   4324 }
   4325 
   4326 /*********************************************************************
   4327  *
   4328  *  Initialize all receive rings.
   4329  *
   4330  **********************************************************************/
   4331 static int
   4332 ixgbe_setup_receive_structures(struct adapter *adapter)
   4333 {
   4334 	struct rx_ring *rxr = adapter->rx_rings;
   4335 	int j;
   4336 
   4337 	for (j = 0; j < adapter->num_queues; j++, rxr++)
   4338 		if (ixgbe_setup_receive_ring(rxr))
   4339 			goto fail;
   4340 
   4341 	return (0);
   4342 fail:
   4343 	/*
   4344 	 * Free RX buffers allocated so far, we will only handle
   4345 	 * the rings that completed, the failing case will have
   4346 	 * cleaned up for itself. 'j' failed, so its the terminus.
   4347 	 */
   4348 	for (int i = 0; i < j; ++i) {
   4349 		rxr = &adapter->rx_rings[i];
   4350 		ixgbe_free_receive_ring(rxr);
   4351 	}
   4352 
   4353 	return (ENOBUFS);
   4354 }
   4355 
   4356 /*********************************************************************
   4357  *
   4358  *  Setup receive registers and features.
   4359  *
   4360  **********************************************************************/
   4361 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
   4362 
   4363 #define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
   4364 
   4365 static void
   4366 ixgbe_initialize_receive_units(struct adapter *adapter)
   4367 {
   4368 	int i;
   4369 	struct	rx_ring	*rxr = adapter->rx_rings;
   4370 	struct ixgbe_hw	*hw = &adapter->hw;
   4371 	struct ifnet   *ifp = adapter->ifp;
   4372 	u32		bufsz, rxctrl, fctrl, srrctl, rxcsum;
   4373 	u32		reta, mrqc = 0, hlreg, r[10];
   4374 
   4375 
   4376 	/*
   4377 	 * Make sure receives are disabled while
   4378 	 * setting up the descriptor ring
   4379 	 */
   4380 	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
   4381 	IXGBE_WRITE_REG(hw, IXGBE_RXCTRL,
   4382 	    rxctrl & ~IXGBE_RXCTRL_RXEN);
   4383 
   4384 	/* Enable broadcasts */
   4385 	fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
   4386 	fctrl |= IXGBE_FCTRL_BAM;
   4387 	fctrl |= IXGBE_FCTRL_DPF;
   4388 	fctrl |= IXGBE_FCTRL_PMCF;
   4389 	IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
   4390 
   4391 	/* Set for Jumbo Frames? */
   4392 	hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
   4393 	if (ifp->if_mtu > ETHERMTU)
   4394 		hlreg |= IXGBE_HLREG0_JUMBOEN;
   4395 	else
   4396 		hlreg &= ~IXGBE_HLREG0_JUMBOEN;
   4397 #ifdef DEV_NETMAP
   4398 	/* crcstrip is conditional in netmap (in RDRXCTL too ?) */
   4399 	if (ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
   4400 		hlreg &= ~IXGBE_HLREG0_RXCRCSTRP;
   4401 	else
   4402 		hlreg |= IXGBE_HLREG0_RXCRCSTRP;
   4403 #endif /* DEV_NETMAP */
   4404 	IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
   4405 
   4406 	bufsz = (adapter->rx_mbuf_sz +
   4407 	    BSIZEPKT_ROUNDUP) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
   4408 
   4409 	for (i = 0; i < adapter->num_queues; i++, rxr++) {
   4410 		u64 rdba = rxr->rxdma.dma_paddr;
   4411 
   4412 		/* Setup the Base and Length of the Rx Descriptor Ring */
   4413 		IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i),
   4414 			       (rdba & 0x00000000ffffffffULL));
   4415 		IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (rdba >> 32));
   4416 		IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
   4417 		    adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc));
   4418 
   4419 		/* Set up the SRRCTL register */
   4420 		srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
   4421 		srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
   4422 		srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
   4423 		srrctl |= bufsz;
   4424 		srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
   4425 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
   4426 
   4427 		/* Setup the HW Rx Head and Tail Descriptor Pointers */
   4428 		IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
   4429 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
   4430 
   4431 		/* Set the processing limit */
   4432 		rxr->process_limit = ixgbe_rx_process_limit;
   4433 	}
   4434 
   4435 	if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
   4436 		u32 psrtype = IXGBE_PSRTYPE_TCPHDR |
   4437 			      IXGBE_PSRTYPE_UDPHDR |
   4438 			      IXGBE_PSRTYPE_IPV4HDR |
   4439 			      IXGBE_PSRTYPE_IPV6HDR;
   4440 		IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype);
   4441 	}
   4442 
   4443 	rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
   4444 
   4445 	/* Setup RSS */
   4446 	if (adapter->num_queues > 1) {
   4447 		int j;
   4448 		reta = 0;
   4449 
   4450 		/* set up random bits */
   4451 		cprng_fast(&r, sizeof(r));
   4452 
   4453 		/* Set up the redirection table */
   4454 		for (i = 0, j = 0; i < 128; i++, j++) {
   4455 			if (j == adapter->num_queues) j = 0;
   4456 			reta = (reta << 8) | (j * 0x11);
   4457 			if ((i & 3) == 3)
   4458 				IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta);
   4459 		}
   4460 
   4461 		/* Now fill our hash function seeds */
   4462 		for (i = 0; i < 10; i++)
   4463 			IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), r[i]);
   4464 
   4465 		/* Perform hash on these packet types */
   4466 		mrqc = IXGBE_MRQC_RSSEN
   4467 		     | IXGBE_MRQC_RSS_FIELD_IPV4
   4468 		     | IXGBE_MRQC_RSS_FIELD_IPV4_TCP
   4469 		     | IXGBE_MRQC_RSS_FIELD_IPV4_UDP
   4470 		     | IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP
   4471 		     | IXGBE_MRQC_RSS_FIELD_IPV6_EX
   4472 		     | IXGBE_MRQC_RSS_FIELD_IPV6
   4473 		     | IXGBE_MRQC_RSS_FIELD_IPV6_TCP
   4474 		     | IXGBE_MRQC_RSS_FIELD_IPV6_UDP
   4475 		     | IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
   4476 		IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
   4477 
   4478 		/* RSS and RX IPP Checksum are mutually exclusive */
   4479 		rxcsum |= IXGBE_RXCSUM_PCSD;
   4480 	}
   4481 
   4482 	if (ifp->if_capenable & IFCAP_RXCSUM)
   4483 		rxcsum |= IXGBE_RXCSUM_PCSD;
   4484 
   4485 	if (!(rxcsum & IXGBE_RXCSUM_PCSD))
   4486 		rxcsum |= IXGBE_RXCSUM_IPPCSE;
   4487 
   4488 	IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
   4489 
   4490 	return;
   4491 }
   4492 
   4493 /*********************************************************************
   4494  *
   4495  *  Free all receive rings.
   4496  *
   4497  **********************************************************************/
   4498 static void
   4499 ixgbe_free_receive_structures(struct adapter *adapter)
   4500 {
   4501 	struct rx_ring *rxr = adapter->rx_rings;
   4502 
   4503 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
   4504 #ifdef LRO
   4505 		struct lro_ctrl		*lro = &rxr->lro;
   4506 #endif /* LRO */
   4507 		ixgbe_free_receive_buffers(rxr);
   4508 #ifdef LRO
   4509 		/* Free LRO memory */
   4510 		tcp_lro_free(lro);
   4511 #endif /* LRO */
   4512 		/* Free the ring memory as well */
   4513 		ixgbe_dma_free(adapter, &rxr->rxdma);
   4514 		IXGBE_RX_LOCK_DESTROY(rxr);
   4515 	}
   4516 
   4517 	free(adapter->rx_rings, M_DEVBUF);
   4518 }
   4519 
   4520 
   4521 /*********************************************************************
   4522  *
   4523  *  Free receive ring data structures
   4524  *
   4525  **********************************************************************/
   4526 static void
   4527 ixgbe_free_receive_buffers(struct rx_ring *rxr)
   4528 {
   4529 	struct adapter		*adapter = rxr->adapter;
   4530 	struct ixgbe_rx_buf	*rxbuf;
   4531 
   4532 	INIT_DEBUGOUT("free_receive_structures: begin");
   4533 
   4534 	/* Cleanup any existing buffers */
   4535 	if (rxr->rx_buffers != NULL) {
   4536 		for (int i = 0; i < adapter->num_rx_desc; i++) {
   4537 			rxbuf = &rxr->rx_buffers[i];
   4538 			if (rxbuf->buf != NULL) {
   4539 				bus_dmamap_sync(rxr->ptag->dt_dmat,
   4540 				    rxbuf->pmap, 0, rxbuf->buf->m_pkthdr.len,
   4541 				    BUS_DMASYNC_POSTREAD);
   4542 				ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
   4543 				rxbuf->buf->m_flags |= M_PKTHDR;
   4544 				m_freem(rxbuf->buf);
   4545 			}
   4546 			rxbuf->buf = NULL;
   4547 			if (rxbuf->pmap != NULL) {
   4548 				ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
   4549 				rxbuf->pmap = NULL;
   4550 			}
   4551 		}
   4552 		if (rxr->rx_buffers != NULL) {
   4553 			free(rxr->rx_buffers, M_DEVBUF);
   4554 			rxr->rx_buffers = NULL;
   4555 		}
   4556 	}
   4557 
   4558 	if (rxr->ptag != NULL) {
   4559 		ixgbe_dma_tag_destroy(rxr->ptag);
   4560 		rxr->ptag = NULL;
   4561 	}
   4562 
   4563 	return;
   4564 }
   4565 
   4566 static __inline void
   4567 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
   4568 {
   4569 	int s;
   4570 
   4571 #ifdef LRO
   4572 	struct adapter	*adapter = ifp->if_softc;
   4573 	struct ethercom *ec = &adapter->osdep.ec;
   4574 
   4575         /*
   4576          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
   4577          * should be computed by hardware. Also it should not have VLAN tag in
   4578          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
   4579          */
   4580         if (rxr->lro_enabled &&
   4581             (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
   4582             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   4583             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   4584             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
   4585             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
   4586             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
   4587             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
   4588             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
   4589                 /*
   4590                  * Send to the stack if:
   4591                  **  - LRO not enabled, or
   4592                  **  - no LRO resources, or
   4593                  **  - lro enqueue fails
   4594                  */
   4595                 if (rxr->lro.lro_cnt != 0)
   4596                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
   4597                                 return;
   4598         }
   4599 #endif /* LRO */
   4600 
   4601 	IXGBE_RX_UNLOCK(rxr);
   4602 
   4603 	s = splnet();
   4604 	/* Pass this up to any BPF listeners. */
   4605 	bpf_mtap(ifp, m);
   4606 	(*ifp->if_input)(ifp, m);
   4607 	splx(s);
   4608 
   4609 	IXGBE_RX_LOCK(rxr);
   4610 }
   4611 
   4612 static __inline void
   4613 ixgbe_rx_discard(struct rx_ring *rxr, int i)
   4614 {
   4615 	struct ixgbe_rx_buf	*rbuf;
   4616 
   4617 	rbuf = &rxr->rx_buffers[i];
   4618 
   4619         if (rbuf->fmp != NULL) {/* Partial chain ? */
   4620 		rbuf->fmp->m_flags |= M_PKTHDR;
   4621                 m_freem(rbuf->fmp);
   4622                 rbuf->fmp = NULL;
   4623 	}
   4624 
   4625 	/*
   4626 	** With advanced descriptors the writeback
   4627 	** clobbers the buffer addrs, so its easier
   4628 	** to just free the existing mbufs and take
   4629 	** the normal refresh path to get new buffers
   4630 	** and mapping.
   4631 	*/
   4632 	if (rbuf->buf) {
   4633 		m_free(rbuf->buf);
   4634 		rbuf->buf = NULL;
   4635 	}
   4636 
   4637 	return;
   4638 }
   4639 
   4640 
   4641 /*********************************************************************
   4642  *
   4643  *  This routine executes in interrupt context. It replenishes
   4644  *  the mbufs in the descriptor and sends data which has been
   4645  *  dma'ed into host memory to upper layer.
   4646  *
   4647  *  We loop at most count times if count is > 0, or until done if
   4648  *  count < 0.
   4649  *
   4650  *  Return TRUE for more work, FALSE for all clean.
   4651  *********************************************************************/
   4652 static bool
   4653 ixgbe_rxeof(struct ix_queue *que)
   4654 {
   4655 	struct adapter		*adapter = que->adapter;
   4656 	struct rx_ring		*rxr = que->rxr;
   4657 	struct ifnet		*ifp = adapter->ifp;
   4658 #ifdef LRO
   4659 	struct lro_ctrl		*lro = &rxr->lro;
   4660 	struct lro_entry	*queued;
   4661 #endif /* LRO */
   4662 	int			i, nextp, processed = 0;
   4663 	u32			staterr = 0;
   4664 	u16			count = rxr->process_limit;
   4665 	union ixgbe_adv_rx_desc	*cur;
   4666 	struct ixgbe_rx_buf	*rbuf, *nbuf;
   4667 
   4668 	IXGBE_RX_LOCK(rxr);
   4669 
   4670 #ifdef DEV_NETMAP
   4671 	/* Same as the txeof routine: wakeup clients on intr. */
   4672 	if (netmap_rx_irq(ifp, rxr->me | NETMAP_LOCKED_ENTER, &processed))
   4673 		return (FALSE);
   4674 #endif /* DEV_NETMAP */
   4675 	for (i = rxr->next_to_check; count != 0;) {
   4676 		struct mbuf	*sendmp, *mp;
   4677 		u32		rsc, ptype;
   4678 		u16		len;
   4679 		u16		vtag = 0;
   4680 		bool		eop;
   4681 
   4682 		/* Sync the ring. */
   4683 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   4684 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
   4685 
   4686 		cur = &rxr->rx_base[i];
   4687 		staterr = le32toh(cur->wb.upper.status_error);
   4688 
   4689 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
   4690 			break;
   4691 		if ((ifp->if_flags & IFF_RUNNING) == 0)
   4692 			break;
   4693 
   4694 		count--;
   4695 		sendmp = NULL;
   4696 		nbuf = NULL;
   4697 		rsc = 0;
   4698 		cur->wb.upper.status_error = 0;
   4699 		rbuf = &rxr->rx_buffers[i];
   4700 		mp = rbuf->buf;
   4701 
   4702 		len = le16toh(cur->wb.upper.length);
   4703 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
   4704 		    IXGBE_RXDADV_PKTTYPE_MASK;
   4705 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
   4706 
   4707 		/* Make sure bad packets are discarded */
   4708 		if (((staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) ||
   4709 		    (rxr->discard)) {
   4710 			rxr->rx_discarded.ev_count++;
   4711 			if (eop)
   4712 				rxr->discard = FALSE;
   4713 			else
   4714 				rxr->discard = TRUE;
   4715 			ixgbe_rx_discard(rxr, i);
   4716 			goto next_desc;
   4717 		}
   4718 
   4719 		/*
   4720 		** On 82599 which supports a hardware
   4721 		** LRO (called HW RSC), packets need
   4722 		** not be fragmented across sequential
   4723 		** descriptors, rather the next descriptor
   4724 		** is indicated in bits of the descriptor.
   4725 		** This also means that we might proceses
   4726 		** more than one packet at a time, something
   4727 		** that has never been true before, it
   4728 		** required eliminating global chain pointers
   4729 		** in favor of what we are doing here.  -jfv
   4730 		*/
   4731 		if (!eop) {
   4732 			/*
   4733 			** Figure out the next descriptor
   4734 			** of this frame.
   4735 			*/
   4736 			if (rxr->hw_rsc == TRUE) {
   4737 				rsc = ixgbe_rsc_count(cur);
   4738 				rxr->rsc_num += (rsc - 1);
   4739 			}
   4740 			if (rsc) { /* Get hardware index */
   4741 				nextp = ((staterr &
   4742 				    IXGBE_RXDADV_NEXTP_MASK) >>
   4743 				    IXGBE_RXDADV_NEXTP_SHIFT);
   4744 			} else { /* Just sequential */
   4745 				nextp = i + 1;
   4746 				if (nextp == adapter->num_rx_desc)
   4747 					nextp = 0;
   4748 			}
   4749 			nbuf = &rxr->rx_buffers[nextp];
   4750 			prefetch(nbuf);
   4751 		}
   4752 		/*
   4753 		** Rather than using the fmp/lmp global pointers
   4754 		** we now keep the head of a packet chain in the
   4755 		** buffer struct and pass this along from one
   4756 		** descriptor to the next, until we get EOP.
   4757 		*/
   4758 		mp->m_len = len;
   4759 		/*
   4760 		** See if there is a stored head
   4761 		** that determines what we are
   4762 		*/
   4763 		sendmp = rbuf->fmp;
   4764 
   4765 		if (sendmp != NULL) {  /* secondary frag */
   4766 			rbuf->buf = rbuf->fmp = NULL;
   4767 			mp->m_flags &= ~M_PKTHDR;
   4768 			sendmp->m_pkthdr.len += mp->m_len;
   4769 		} else {
   4770 			/*
   4771 			 * Optimize.  This might be a small packet,
   4772 			 * maybe just a TCP ACK.  Do a fast copy that
   4773 			 * is cache aligned into a new mbuf, and
   4774 			 * leave the old mbuf+cluster for re-use.
   4775 			 */
   4776 			if (eop && len <= IXGBE_RX_COPY_LEN) {
   4777 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
   4778 				if (sendmp != NULL) {
   4779 					sendmp->m_data +=
   4780 					    IXGBE_RX_COPY_ALIGN;
   4781 					ixgbe_bcopy(mp->m_data,
   4782 					    sendmp->m_data, len);
   4783 					sendmp->m_len = len;
   4784 					rxr->rx_copies.ev_count++;
   4785 					rbuf->flags |= IXGBE_RX_COPY;
   4786 				}
   4787 			}
   4788 			if (sendmp == NULL) {
   4789 				rbuf->buf = rbuf->fmp = NULL;
   4790 				sendmp = mp;
   4791 			}
   4792 
   4793 			/* first desc of a non-ps chain */
   4794 			sendmp->m_flags |= M_PKTHDR;
   4795 			sendmp->m_pkthdr.len = mp->m_len;
   4796 		}
   4797 		++processed;
   4798 		/* Pass the head pointer on */
   4799 		if (eop == 0) {
   4800 			nbuf->fmp = sendmp;
   4801 			sendmp = NULL;
   4802 			mp->m_next = nbuf->buf;
   4803 		} else { /* Sending this frame */
   4804 			sendmp->m_pkthdr.rcvif = ifp;
   4805 			ifp->if_ipackets++;
   4806 			rxr->rx_packets.ev_count++;
   4807 			/* capture data for AIM */
   4808 			rxr->bytes += sendmp->m_pkthdr.len;
   4809 			rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
   4810 			/* Process vlan info */
   4811 			if ((rxr->vtag_strip) &&
   4812 			    (staterr & IXGBE_RXD_STAT_VP))
   4813 				vtag = le16toh(cur->wb.upper.vlan);
   4814 			if (vtag) {
   4815 				VLAN_INPUT_TAG(ifp, sendmp, vtag,
   4816 				    printf("%s: could not apply VLAN "
   4817 					"tag", __func__));
   4818 			}
   4819 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
   4820 				ixgbe_rx_checksum(staterr, sendmp, ptype,
   4821 				   &adapter->stats);
   4822 			}
   4823 #if __FreeBSD_version >= 800000
   4824 			sendmp->m_pkthdr.flowid = que->msix;
   4825 			sendmp->m_flags |= M_FLOWID;
   4826 #endif
   4827 		}
   4828 next_desc:
   4829 		ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
   4830 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   4831 
   4832 		/* Advance our pointers to the next descriptor. */
   4833 		if (++i == rxr->num_desc)
   4834 			i = 0;
   4835 
   4836 		/* Now send to the stack or do LRO */
   4837 		if (sendmp != NULL) {
   4838 			rxr->next_to_check = i;
   4839 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
   4840 			i = rxr->next_to_check;
   4841 		}
   4842 
   4843                /* Every 8 descriptors we go to refresh mbufs */
   4844 		if (processed == 8) {
   4845 			ixgbe_refresh_mbufs(rxr, i);
   4846 			processed = 0;
   4847 		}
   4848 	}
   4849 
   4850 	/* Refresh any remaining buf structs */
   4851 	if (ixgbe_rx_unrefreshed(rxr))
   4852 		ixgbe_refresh_mbufs(rxr, i);
   4853 
   4854 	rxr->next_to_check = i;
   4855 
   4856 #ifdef LRO
   4857 	/*
   4858 	 * Flush any outstanding LRO work
   4859 	 */
   4860 	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
   4861 		SLIST_REMOVE_HEAD(&lro->lro_active, next);
   4862 		tcp_lro_flush(lro, queued);
   4863 	}
   4864 #endif /* LRO */
   4865 
   4866 	IXGBE_RX_UNLOCK(rxr);
   4867 
   4868 	/*
   4869 	** We still have cleaning to do?
   4870 	** Schedule another interrupt if so.
   4871 	*/
   4872 	if ((staterr & IXGBE_RXD_STAT_DD) != 0) {
   4873 		ixgbe_rearm_queues(adapter, (u64)(1ULL << que->msix));
   4874 		return true;
   4875 	}
   4876 
   4877 	return false;
   4878 }
   4879 
   4880 
   4881 /*********************************************************************
   4882  *
   4883  *  Verify that the hardware indicated that the checksum is valid.
   4884  *  Inform the stack about the status of checksum so that stack
   4885  *  doesn't spend time verifying the checksum.
   4886  *
   4887  *********************************************************************/
   4888 static void
   4889 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
   4890     struct ixgbe_hw_stats *stats)
   4891 {
   4892 	u16	status = (u16) staterr;
   4893 	u8	errors = (u8) (staterr >> 24);
   4894 #if 0
   4895 	bool	sctp = FALSE;
   4896 
   4897 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
   4898 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
   4899 		sctp = TRUE;
   4900 #endif
   4901 
   4902 	if (status & IXGBE_RXD_STAT_IPCS) {
   4903 		stats->ipcs.ev_count++;
   4904 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
   4905 			/* IP Checksum Good */
   4906 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
   4907 
   4908 		} else {
   4909 			stats->ipcs_bad.ev_count++;
   4910 			mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
   4911 		}
   4912 	}
   4913 	if (status & IXGBE_RXD_STAT_L4CS) {
   4914 		stats->l4cs.ev_count++;
   4915 		u16 type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
   4916 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
   4917 			mp->m_pkthdr.csum_flags |= type;
   4918 		} else {
   4919 			stats->l4cs_bad.ev_count++;
   4920 			mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
   4921 		}
   4922 	}
   4923 	return;
   4924 }
   4925 
   4926 
   4927 #if 0	/* XXX Badly need to overhaul vlan(4) on NetBSD. */
   4928 /*
   4929 ** This routine is run via an vlan config EVENT,
   4930 ** it enables us to use the HW Filter table since
   4931 ** we can get the vlan id. This just creates the
   4932 ** entry in the soft version of the VFTA, init will
   4933 ** repopulate the real table.
   4934 */
   4935 static void
   4936 ixgbe_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
   4937 {
   4938 	struct adapter	*adapter = ifp->if_softc;
   4939 	u16		index, bit;
   4940 
   4941 	if (ifp->if_softc !=  arg)   /* Not our event */
   4942 		return;
   4943 
   4944 	if ((vtag == 0) || (vtag > 4095))	/* Invalid */
   4945 		return;
   4946 
   4947 	IXGBE_CORE_LOCK(adapter);
   4948 	index = (vtag >> 5) & 0x7F;
   4949 	bit = vtag & 0x1F;
   4950 	adapter->shadow_vfta[index] |= (1 << bit);
   4951 	ixgbe_init_locked(adapter);
   4952 	IXGBE_CORE_UNLOCK(adapter);
   4953 }
   4954 
   4955 /*
   4956 ** This routine is run via an vlan
   4957 ** unconfig EVENT, remove our entry
   4958 ** in the soft vfta.
   4959 */
   4960 static void
   4961 ixgbe_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
   4962 {
   4963 	struct adapter	*adapter = ifp->if_softc;
   4964 	u16		index, bit;
   4965 
   4966 	if (ifp->if_softc !=  arg)
   4967 		return;
   4968 
   4969 	if ((vtag == 0) || (vtag > 4095))	/* Invalid */
   4970 		return;
   4971 
   4972 	IXGBE_CORE_LOCK(adapter);
   4973 	index = (vtag >> 5) & 0x7F;
   4974 	bit = vtag & 0x1F;
   4975 	adapter->shadow_vfta[index] &= ~(1 << bit);
   4976 	/* Re-init to load the changes */
   4977 	ixgbe_init_locked(adapter);
   4978 	IXGBE_CORE_UNLOCK(adapter);
   4979 }
   4980 #endif
   4981 
   4982 static void
   4983 ixgbe_setup_vlan_hw_support(struct adapter *adapter)
   4984 {
   4985 	struct ethercom *ec = &adapter->osdep.ec;
   4986 	struct ixgbe_hw *hw = &adapter->hw;
   4987 	struct rx_ring	*rxr;
   4988 	u32		ctrl;
   4989 
   4990 	/*
   4991 	** We get here thru init_locked, meaning
   4992 	** a soft reset, this has already cleared
   4993 	** the VFTA and other state, so if there
   4994 	** have been no vlan's registered do nothing.
   4995 	*/
   4996 	if (!VLAN_ATTACHED(&adapter->osdep.ec)) {
   4997 		return;
   4998 	}
   4999 
   5000 	/*
   5001 	** A soft reset zero's out the VFTA, so
   5002 	** we need to repopulate it now.
   5003 	*/
   5004 	for (int i = 0; i < IXGBE_VFTA_SIZE; i++)
   5005 		if (adapter->shadow_vfta[i] != 0)
   5006 			IXGBE_WRITE_REG(hw, IXGBE_VFTA(i),
   5007 			    adapter->shadow_vfta[i]);
   5008 
   5009 	ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
   5010 	/* Enable the Filter Table if enabled */
   5011 	if (ec->ec_capenable & ETHERCAP_VLAN_HWFILTER) {
   5012 		ctrl &= ~IXGBE_VLNCTRL_CFIEN;
   5013 		ctrl |= IXGBE_VLNCTRL_VFE;
   5014 	}
   5015 	if (hw->mac.type == ixgbe_mac_82598EB)
   5016 		ctrl |= IXGBE_VLNCTRL_VME;
   5017 	IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
   5018 
   5019 	/* Setup the queues for vlans */
   5020 	for (int i = 0; i < adapter->num_queues; i++) {
   5021 		rxr = &adapter->rx_rings[i];
   5022 		/* On 82599 the VLAN enable is per/queue in RXDCTL */
   5023 		if (hw->mac.type != ixgbe_mac_82598EB) {
   5024 			ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
   5025 			ctrl |= IXGBE_RXDCTL_VME;
   5026 			IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
   5027 		}
   5028 		rxr->vtag_strip = TRUE;
   5029 	}
   5030 }
   5031 
   5032 static void
   5033 ixgbe_enable_intr(struct adapter *adapter)
   5034 {
   5035 	struct ixgbe_hw	*hw = &adapter->hw;
   5036 	struct ix_queue	*que = adapter->queues;
   5037 	u32		mask, fwsm;
   5038 
   5039 	mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
   5040 	/* Enable Fan Failure detection */
   5041 	if (hw->device_id == IXGBE_DEV_ID_82598AT)
   5042 		    mask |= IXGBE_EIMS_GPI_SDP1;
   5043 
   5044 	switch (adapter->hw.mac.type) {
   5045 		case ixgbe_mac_82599EB:
   5046 			mask |= IXGBE_EIMS_ECC;
   5047 			mask |= IXGBE_EIMS_GPI_SDP0;
   5048 			mask |= IXGBE_EIMS_GPI_SDP1;
   5049 			mask |= IXGBE_EIMS_GPI_SDP2;
   5050 #ifdef IXGBE_FDIR
   5051 			mask |= IXGBE_EIMS_FLOW_DIR;
   5052 #endif
   5053 			break;
   5054 		case ixgbe_mac_X540:
   5055 			mask |= IXGBE_EIMS_ECC;
   5056 			/* Detect if Thermal Sensor is enabled */
   5057 			fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
   5058 			if (fwsm & IXGBE_FWSM_TS_ENABLED)
   5059 				mask |= IXGBE_EIMS_TS;
   5060 #ifdef IXGBE_FDIR
   5061 			mask |= IXGBE_EIMS_FLOW_DIR;
   5062 #endif
   5063 		/* falls through */
   5064 		default:
   5065 			break;
   5066 	}
   5067 
   5068 	IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
   5069 
   5070 	/* With RSS we use auto clear */
   5071 	if (adapter->msix_mem) {
   5072 		mask = IXGBE_EIMS_ENABLE_MASK;
   5073 		/* Don't autoclear Link */
   5074 		mask &= ~IXGBE_EIMS_OTHER;
   5075 		mask &= ~IXGBE_EIMS_LSC;
   5076 		IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask);
   5077 	}
   5078 
   5079 	/*
   5080 	** Now enable all queues, this is done separately to
   5081 	** allow for handling the extended (beyond 32) MSIX
   5082 	** vectors that can be used by 82599
   5083 	*/
   5084         for (int i = 0; i < adapter->num_queues; i++, que++)
   5085                 ixgbe_enable_queue(adapter, que->msix);
   5086 
   5087 	IXGBE_WRITE_FLUSH(hw);
   5088 
   5089 	return;
   5090 }
   5091 
   5092 static void
   5093 ixgbe_disable_intr(struct adapter *adapter)
   5094 {
   5095 	if (adapter->msix_mem)
   5096 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, 0);
   5097 	if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
   5098 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0);
   5099 	} else {
   5100 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000);
   5101 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0);
   5102 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0);
   5103 	}
   5104 	IXGBE_WRITE_FLUSH(&adapter->hw);
   5105 	return;
   5106 }
   5107 
   5108 u16
   5109 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
   5110 {
   5111 	switch (reg % 4) {
   5112 	case 0:
   5113 		return pci_conf_read(hw->back->pc, hw->back->tag, reg) &
   5114 		    __BITS(15, 0);
   5115 	case 2:
   5116 		return __SHIFTOUT(pci_conf_read(hw->back->pc, hw->back->tag,
   5117 		    reg - 2), __BITS(31, 16));
   5118 	default:
   5119 		panic("%s: invalid register (%" PRIx32, __func__, reg);
   5120 		break;
   5121 	}
   5122 }
   5123 
   5124 void
   5125 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
   5126 {
   5127 	pcireg_t old;
   5128 
   5129 	switch (reg % 4) {
   5130 	case 0:
   5131 		old = pci_conf_read(hw->back->pc, hw->back->tag, reg) &
   5132 		    __BITS(31, 16);
   5133 		pci_conf_write(hw->back->pc, hw->back->tag, reg, value | old);
   5134 		break;
   5135 	case 2:
   5136 		old = pci_conf_read(hw->back->pc, hw->back->tag, reg - 2) &
   5137 		    __BITS(15, 0);
   5138 		pci_conf_write(hw->back->pc, hw->back->tag, reg - 2,
   5139 		    __SHIFTIN(value, __BITS(31, 16)) | old);
   5140 		break;
   5141 	default:
   5142 		panic("%s: invalid register (%" PRIx32, __func__, reg);
   5143 		break;
   5144 	}
   5145 
   5146 	return;
   5147 }
   5148 
   5149 /*
   5150 ** Setup the correct IVAR register for a particular MSIX interrupt
   5151 **   (yes this is all very magic and confusing :)
   5152 **  - entry is the register array entry
   5153 **  - vector is the MSIX vector for this queue
   5154 **  - type is RX/TX/MISC
   5155 */
   5156 static void
   5157 ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type)
   5158 {
   5159 	struct ixgbe_hw *hw = &adapter->hw;
   5160 	u32 ivar, index;
   5161 
   5162 	vector |= IXGBE_IVAR_ALLOC_VAL;
   5163 
   5164 	switch (hw->mac.type) {
   5165 
   5166 	case ixgbe_mac_82598EB:
   5167 		if (type == -1)
   5168 			entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
   5169 		else
   5170 			entry += (type * 64);
   5171 		index = (entry >> 2) & 0x1F;
   5172 		ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
   5173 		ivar &= ~(0xFF << (8 * (entry & 0x3)));
   5174 		ivar |= (vector << (8 * (entry & 0x3)));
   5175 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR(index), ivar);
   5176 		break;
   5177 
   5178 	case ixgbe_mac_82599EB:
   5179 	case ixgbe_mac_X540:
   5180 		if (type == -1) { /* MISC IVAR */
   5181 			index = (entry & 1) * 8;
   5182 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
   5183 			ivar &= ~(0xFF << index);
   5184 			ivar |= (vector << index);
   5185 			IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
   5186 		} else {	/* RX/TX IVARS */
   5187 			index = (16 * (entry & 1)) + (8 * type);
   5188 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
   5189 			ivar &= ~(0xFF << index);
   5190 			ivar |= (vector << index);
   5191 			IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
   5192 		}
   5193 
   5194 	default:
   5195 		break;
   5196 	}
   5197 }
   5198 
   5199 static void
   5200 ixgbe_configure_ivars(struct adapter *adapter)
   5201 {
   5202 	struct  ix_queue *que = adapter->queues;
   5203 	u32 newitr;
   5204 
   5205 	if (ixgbe_max_interrupt_rate > 0)
   5206 		newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8;
   5207 	else
   5208 		newitr = 0;
   5209 
   5210         for (int i = 0; i < adapter->num_queues; i++, que++) {
   5211 		/* First the RX queue entry */
   5212                 ixgbe_set_ivar(adapter, i, que->msix, 0);
   5213 		/* ... and the TX */
   5214 		ixgbe_set_ivar(adapter, i, que->msix, 1);
   5215 		/* Set an Initial EITR value */
   5216                 IXGBE_WRITE_REG(&adapter->hw,
   5217                     IXGBE_EITR(que->msix), newitr);
   5218 	}
   5219 
   5220 	/* For the Link interrupt */
   5221         ixgbe_set_ivar(adapter, 1, adapter->linkvec, -1);
   5222 }
   5223 
   5224 /*
   5225 ** ixgbe_sfp_probe - called in the local timer to
   5226 ** determine if a port had optics inserted.
   5227 */
   5228 static bool ixgbe_sfp_probe(struct adapter *adapter)
   5229 {
   5230 	struct ixgbe_hw	*hw = &adapter->hw;
   5231 	device_t	dev = adapter->dev;
   5232 	bool		result = FALSE;
   5233 
   5234 	if ((hw->phy.type == ixgbe_phy_nl) &&
   5235 	    (hw->phy.sfp_type == ixgbe_sfp_type_not_present)) {
   5236 		s32 ret = hw->phy.ops.identify_sfp(hw);
   5237 		if (ret)
   5238                         goto out;
   5239 		ret = hw->phy.ops.reset(hw);
   5240 		if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
   5241 			device_printf(dev,"Unsupported SFP+ module detected!");
   5242 			device_printf(dev, "Reload driver with supported module.\n");
   5243 			adapter->sfp_probe = FALSE;
   5244                         goto out;
   5245 		} else
   5246 			device_printf(dev,"SFP+ module detected!\n");
   5247 		/* We now have supported optics */
   5248 		adapter->sfp_probe = FALSE;
   5249 		/* Set the optics type so system reports correctly */
   5250 		ixgbe_setup_optics(adapter);
   5251 		result = TRUE;
   5252 	}
   5253 out:
   5254 	return (result);
   5255 }
   5256 
   5257 /*
   5258 ** Tasklet handler for MSIX Link interrupts
   5259 **  - do outside interrupt since it might sleep
   5260 */
   5261 static void
   5262 ixgbe_handle_link(void *context)
   5263 {
   5264 	struct adapter  *adapter = context;
   5265 
   5266 	if (ixgbe_check_link(&adapter->hw,
   5267 	    &adapter->link_speed, &adapter->link_up, 0) == 0)
   5268 	    ixgbe_update_link_status(adapter);
   5269 }
   5270 
   5271 /*
   5272 ** Tasklet for handling SFP module interrupts
   5273 */
   5274 static void
   5275 ixgbe_handle_mod(void *context)
   5276 {
   5277 	struct adapter  *adapter = context;
   5278 	struct ixgbe_hw *hw = &adapter->hw;
   5279 	device_t	dev = adapter->dev;
   5280 	u32 err;
   5281 
   5282 	err = hw->phy.ops.identify_sfp(hw);
   5283 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
   5284 		device_printf(dev,
   5285 		    "Unsupported SFP+ module type was detected.\n");
   5286 		return;
   5287 	}
   5288 	err = hw->mac.ops.setup_sfp(hw);
   5289 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
   5290 		device_printf(dev,
   5291 		    "Setup failure - unsupported SFP+ module type.\n");
   5292 		return;
   5293 	}
   5294 	softint_schedule(adapter->msf_si);
   5295 	return;
   5296 }
   5297 
   5298 
   5299 /*
   5300 ** Tasklet for handling MSF (multispeed fiber) interrupts
   5301 */
   5302 static void
   5303 ixgbe_handle_msf(void *context)
   5304 {
   5305 	struct adapter  *adapter = context;
   5306 	struct ixgbe_hw *hw = &adapter->hw;
   5307 	u32 autoneg;
   5308 	bool negotiate;
   5309 
   5310 	autoneg = hw->phy.autoneg_advertised;
   5311 	if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
   5312 		hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate);
   5313 	else
   5314 		negotiate = 0;
   5315 	if (hw->mac.ops.setup_link)
   5316 		hw->mac.ops.setup_link(hw, autoneg, TRUE);
   5317 	return;
   5318 }
   5319 
   5320 #ifdef IXGBE_FDIR
   5321 /*
   5322 ** Tasklet for reinitializing the Flow Director filter table
   5323 */
   5324 static void
   5325 ixgbe_reinit_fdir(void *context)
   5326 {
   5327 	struct adapter  *adapter = context;
   5328 	struct ifnet   *ifp = adapter->ifp;
   5329 
   5330 	if (adapter->fdir_reinit != 1) /* Shouldn't happen */
   5331 		return;
   5332 	ixgbe_reinit_fdir_tables_82599(&adapter->hw);
   5333 	adapter->fdir_reinit = 0;
   5334 	/* re-enable flow director interrupts */
   5335 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_FLOW_DIR);
   5336 	/* Restart the interface */
   5337 	ifp->if_flags |= IFF_RUNNING;
   5338 	return;
   5339 }
   5340 #endif
   5341 
   5342 /**********************************************************************
   5343  *
   5344  *  Update the board statistics counters.
   5345  *
   5346  **********************************************************************/
   5347 static void
   5348 ixgbe_update_stats_counters(struct adapter *adapter)
   5349 {
   5350 	struct ifnet   *ifp = adapter->ifp;
   5351 	struct ixgbe_hw *hw = &adapter->hw;
   5352 	u32  missed_rx = 0, bprc, lxon, lxoff, total;
   5353 	u64  total_missed_rx = 0;
   5354 	uint64_t crcerrs, rlec;
   5355 
   5356 	crcerrs = IXGBE_READ_REG(hw, IXGBE_CRCERRS);
   5357 	adapter->stats.crcerrs.ev_count += crcerrs;
   5358 	adapter->stats.illerrc.ev_count += IXGBE_READ_REG(hw, IXGBE_ILLERRC);
   5359 	adapter->stats.errbc.ev_count += IXGBE_READ_REG(hw, IXGBE_ERRBC);
   5360 	adapter->stats.mspdc.ev_count += IXGBE_READ_REG(hw, IXGBE_MSPDC);
   5361 
   5362 	/*
   5363 	** Note: these are for the 8 possible traffic classes,
   5364 	**	 which in current implementation is unused,
   5365 	**	 therefore only 0 should read real data.
   5366 	*/
   5367 	for (int i = 0; i < __arraycount(adapter->stats.mpc); i++) {
   5368 		int j = i % adapter->num_queues;
   5369 		u32 mp;
   5370 		mp = IXGBE_READ_REG(hw, IXGBE_MPC(i));
   5371 		/* missed_rx tallies misses for the gprc workaround */
   5372 		missed_rx += mp;
   5373 		/* global total per queue */
   5374         	adapter->stats.mpc[j].ev_count += mp;
   5375 		/* Running comprehensive total for stats display */
   5376 		total_missed_rx += mp;
   5377 		if (hw->mac.type == ixgbe_mac_82598EB) {
   5378 			adapter->stats.rnbc[j] +=
   5379 			    IXGBE_READ_REG(hw, IXGBE_RNBC(i));
   5380 			adapter->stats.qbtc[j].ev_count +=
   5381 			    IXGBE_READ_REG(hw, IXGBE_QBTC(i));
   5382 			adapter->stats.qbrc[j].ev_count +=
   5383 			    IXGBE_READ_REG(hw, IXGBE_QBRC(i));
   5384 			adapter->stats.pxonrxc[j].ev_count +=
   5385 			    IXGBE_READ_REG(hw, IXGBE_PXONRXC(i));
   5386 		} else {
   5387 			adapter->stats.pxonrxc[j].ev_count +=
   5388 			    IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i));
   5389 		}
   5390 		adapter->stats.pxontxc[j].ev_count +=
   5391 		    IXGBE_READ_REG(hw, IXGBE_PXONTXC(i));
   5392 		adapter->stats.pxofftxc[j].ev_count +=
   5393 		    IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i));
   5394 		adapter->stats.pxoffrxc[j].ev_count +=
   5395 		    IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
   5396 		adapter->stats.pxon2offc[j].ev_count +=
   5397 		    IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i));
   5398 	}
   5399 	for (int i = 0; i < __arraycount(adapter->stats.qprc); i++) {
   5400 		int j = i % adapter->num_queues;
   5401 		adapter->stats.qprc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
   5402 		adapter->stats.qptc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
   5403 		adapter->stats.qprdc[j].ev_count += IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
   5404 	}
   5405 	adapter->stats.mlfc.ev_count += IXGBE_READ_REG(hw, IXGBE_MLFC);
   5406 	adapter->stats.mrfc.ev_count += IXGBE_READ_REG(hw, IXGBE_MRFC);
   5407 	rlec = IXGBE_READ_REG(hw, IXGBE_RLEC);
   5408 	adapter->stats.rlec.ev_count += rlec;
   5409 
   5410 	/* Hardware workaround, gprc counts missed packets */
   5411 	adapter->stats.gprc.ev_count += IXGBE_READ_REG(hw, IXGBE_GPRC) - missed_rx;
   5412 
   5413 	lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
   5414 	adapter->stats.lxontxc.ev_count += lxon;
   5415 	lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
   5416 	adapter->stats.lxofftxc.ev_count += lxoff;
   5417 	total = lxon + lxoff;
   5418 
   5419 	if (hw->mac.type != ixgbe_mac_82598EB) {
   5420 		adapter->stats.gorc.ev_count += IXGBE_READ_REG(hw, IXGBE_GORCL) +
   5421 		    ((u64)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32);
   5422 		adapter->stats.gotc.ev_count += IXGBE_READ_REG(hw, IXGBE_GOTCL) +
   5423 		    ((u64)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32) - total * ETHER_MIN_LEN;
   5424 		adapter->stats.tor.ev_count += IXGBE_READ_REG(hw, IXGBE_TORL) +
   5425 		    ((u64)IXGBE_READ_REG(hw, IXGBE_TORH) << 32);
   5426 		adapter->stats.lxonrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
   5427 		adapter->stats.lxoffrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
   5428 	} else {
   5429 		adapter->stats.lxonrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
   5430 		adapter->stats.lxoffrxc.ev_count += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
   5431 		/* 82598 only has a counter in the high register */
   5432 		adapter->stats.gorc.ev_count += IXGBE_READ_REG(hw, IXGBE_GORCH);
   5433 		adapter->stats.gotc.ev_count += IXGBE_READ_REG(hw, IXGBE_GOTCH) - total * ETHER_MIN_LEN;
   5434 		adapter->stats.tor.ev_count += IXGBE_READ_REG(hw, IXGBE_TORH);
   5435 	}
   5436 
   5437 	/*
   5438 	 * Workaround: mprc hardware is incorrectly counting
   5439 	 * broadcasts, so for now we subtract those.
   5440 	 */
   5441 	bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
   5442 	adapter->stats.bprc.ev_count += bprc;
   5443 	adapter->stats.mprc.ev_count += IXGBE_READ_REG(hw, IXGBE_MPRC) - ((hw->mac.type == ixgbe_mac_82598EB) ? bprc : 0);
   5444 
   5445 	adapter->stats.prc64.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC64);
   5446 	adapter->stats.prc127.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC127);
   5447 	adapter->stats.prc255.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC255);
   5448 	adapter->stats.prc511.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC511);
   5449 	adapter->stats.prc1023.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC1023);
   5450 	adapter->stats.prc1522.ev_count += IXGBE_READ_REG(hw, IXGBE_PRC1522);
   5451 
   5452 	adapter->stats.gptc.ev_count += IXGBE_READ_REG(hw, IXGBE_GPTC) - total;
   5453 	adapter->stats.mptc.ev_count += IXGBE_READ_REG(hw, IXGBE_MPTC) - total;
   5454 	adapter->stats.ptc64.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC64) - total;
   5455 
   5456 	adapter->stats.ruc.ev_count += IXGBE_READ_REG(hw, IXGBE_RUC);
   5457 	adapter->stats.rfc.ev_count += IXGBE_READ_REG(hw, IXGBE_RFC);
   5458 	adapter->stats.roc.ev_count += IXGBE_READ_REG(hw, IXGBE_ROC);
   5459 	adapter->stats.rjc.ev_count += IXGBE_READ_REG(hw, IXGBE_RJC);
   5460 	adapter->stats.mngprc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPRC);
   5461 	adapter->stats.mngpdc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPDC);
   5462 	adapter->stats.mngptc.ev_count += IXGBE_READ_REG(hw, IXGBE_MNGPTC);
   5463 	adapter->stats.tpr.ev_count += IXGBE_READ_REG(hw, IXGBE_TPR);
   5464 	adapter->stats.tpt.ev_count += IXGBE_READ_REG(hw, IXGBE_TPT);
   5465 	adapter->stats.ptc127.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC127);
   5466 	adapter->stats.ptc255.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC255);
   5467 	adapter->stats.ptc511.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC511);
   5468 	adapter->stats.ptc1023.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC1023);
   5469 	adapter->stats.ptc1522.ev_count += IXGBE_READ_REG(hw, IXGBE_PTC1522);
   5470 	adapter->stats.bptc.ev_count += IXGBE_READ_REG(hw, IXGBE_BPTC);
   5471 	adapter->stats.xec.ev_count += IXGBE_READ_REG(hw, IXGBE_XEC);
   5472 	adapter->stats.fccrc.ev_count += IXGBE_READ_REG(hw, IXGBE_FCCRC);
   5473 	adapter->stats.fclast.ev_count += IXGBE_READ_REG(hw, IXGBE_FCLAST);
   5474 
   5475 	/* Only read FCOE on 82599 */
   5476 	if (hw->mac.type != ixgbe_mac_82598EB) {
   5477 		adapter->stats.fcoerpdc.ev_count +=
   5478 		    IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
   5479 		adapter->stats.fcoeprc.ev_count +=
   5480 		    IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
   5481 		adapter->stats.fcoeptc.ev_count +=
   5482 		    IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
   5483 		adapter->stats.fcoedwrc.ev_count +=
   5484 		    IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
   5485 		adapter->stats.fcoedwtc.ev_count +=
   5486 		    IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
   5487 	}
   5488 
   5489 	/* Fill out the OS statistics structure */
   5490 	/*
   5491 	 * NetBSD: Don't override if_{i|o}{packets|bytes|mcasts} with
   5492 	 * adapter->stats counters. It's required to make ifconfig -z
   5493 	 * (SOICZIFDATA) work.
   5494 	 */
   5495 	ifp->if_collisions = 0;
   5496 
   5497 	/* Rx Errors */
   5498 	ifp->if_iqdrops += total_missed_rx;
   5499 	ifp->if_ierrors += crcerrs + rlec;
   5500 }
   5501 
   5502 /** ixgbe_sysctl_tdh_handler - Handler function
   5503  *  Retrieves the TDH value from the hardware
   5504  */
   5505 static int
   5506 ixgbe_sysctl_tdh_handler(SYSCTLFN_ARGS)
   5507 {
   5508 	struct sysctlnode node;
   5509 	uint32_t val;
   5510 	struct tx_ring *txr;
   5511 
   5512 	node = *rnode;
   5513 	txr = (struct tx_ring *)node.sysctl_data;
   5514 	if (txr == NULL)
   5515 		return 0;
   5516 	val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDH(txr->me));
   5517 	node.sysctl_data = &val;
   5518 	return sysctl_lookup(SYSCTLFN_CALL(&node));
   5519 }
   5520 
   5521 /** ixgbe_sysctl_tdt_handler - Handler function
   5522  *  Retrieves the TDT value from the hardware
   5523  */
   5524 static int
   5525 ixgbe_sysctl_tdt_handler(SYSCTLFN_ARGS)
   5526 {
   5527 	struct sysctlnode node;
   5528 	uint32_t val;
   5529 	struct tx_ring *txr;
   5530 
   5531 	node = *rnode;
   5532 	txr = (struct tx_ring *)node.sysctl_data;
   5533 	if (txr == NULL)
   5534 		return 0;
   5535 	val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDT(txr->me));
   5536 	node.sysctl_data = &val;
   5537 	return sysctl_lookup(SYSCTLFN_CALL(&node));
   5538 }
   5539 
   5540 /** ixgbe_sysctl_rdh_handler - Handler function
   5541  *  Retrieves the RDH value from the hardware
   5542  */
   5543 static int
   5544 ixgbe_sysctl_rdh_handler(SYSCTLFN_ARGS)
   5545 {
   5546 	struct sysctlnode node;
   5547 	uint32_t val;
   5548 	struct rx_ring *rxr;
   5549 
   5550 	node = *rnode;
   5551 	rxr = (struct rx_ring *)node.sysctl_data;
   5552 	if (rxr == NULL)
   5553 		return 0;
   5554 	val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDH(rxr->me));
   5555 	node.sysctl_data = &val;
   5556 	return sysctl_lookup(SYSCTLFN_CALL(&node));
   5557 }
   5558 
   5559 /** ixgbe_sysctl_rdt_handler - Handler function
   5560  *  Retrieves the RDT value from the hardware
   5561  */
   5562 static int
   5563 ixgbe_sysctl_rdt_handler(SYSCTLFN_ARGS)
   5564 {
   5565 	struct sysctlnode node;
   5566 	uint32_t val;
   5567 	struct rx_ring *rxr;
   5568 
   5569 	node = *rnode;
   5570 	rxr = (struct rx_ring *)node.sysctl_data;
   5571 	if (rxr == NULL)
   5572 		return 0;
   5573 	val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDT(rxr->me));
   5574 	node.sysctl_data = &val;
   5575 	return sysctl_lookup(SYSCTLFN_CALL(&node));
   5576 }
   5577 
   5578 static int
   5579 ixgbe_sysctl_interrupt_rate_handler(SYSCTLFN_ARGS)
   5580 {
   5581 	int error;
   5582 	struct sysctlnode node;
   5583 	struct ix_queue *que;
   5584 	uint32_t reg, usec, rate;
   5585 
   5586 	node = *rnode;
   5587 	que = (struct ix_queue *)node.sysctl_data;
   5588 	if (que == NULL)
   5589 		return 0;
   5590 	reg = IXGBE_READ_REG(&que->adapter->hw, IXGBE_EITR(que->msix));
   5591 	usec = ((reg & 0x0FF8) >> 3);
   5592 	if (usec > 0)
   5593 		rate = 500000 / usec;
   5594 	else
   5595 		rate = 0;
   5596 	node.sysctl_data = &rate;
   5597 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   5598 	if (error)
   5599 		return error;
   5600 	reg &= ~0xfff; /* default, no limitation */
   5601 	ixgbe_max_interrupt_rate = 0;
   5602 	if (rate > 0 && rate < 500000) {
   5603 		if (rate < 1000)
   5604 			rate = 1000;
   5605 		ixgbe_max_interrupt_rate = rate;
   5606 		reg |= ((4000000/rate) & 0xff8 );
   5607 	}
   5608 	IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg);
   5609 	return 0;
   5610 }
   5611 
   5612 const struct sysctlnode *
   5613 ixgbe_sysctl_instance(struct adapter *adapter)
   5614 {
   5615 	const char *dvname;
   5616 	struct sysctllog **log;
   5617 	int rc;
   5618 	const struct sysctlnode *rnode;
   5619 
   5620 	log = &adapter->sysctllog;
   5621 	dvname = device_xname(adapter->dev);
   5622 
   5623 	if ((rc = sysctl_createv(log, 0, NULL, &rnode,
   5624 	    0, CTLTYPE_NODE, dvname,
   5625 	    SYSCTL_DESCR("ixgbe information and settings"),
   5626 	    NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL)) != 0)
   5627 		goto err;
   5628 
   5629 	return rnode;
   5630 err:
   5631 	printf("%s: sysctl_createv failed, rc = %d\n", __func__, rc);
   5632 	return NULL;
   5633 }
   5634 
   5635 /*
   5636  * Add sysctl variables, one per statistic, to the system.
   5637  */
   5638 static void
   5639 ixgbe_add_hw_stats(struct adapter *adapter)
   5640 {
   5641 	device_t dev = adapter->dev;
   5642 	const struct sysctlnode *rnode, *cnode;
   5643 	struct sysctllog **log = &adapter->sysctllog;
   5644 	struct tx_ring *txr = adapter->tx_rings;
   5645 	struct rx_ring *rxr = adapter->rx_rings;
   5646 	struct ixgbe_hw_stats *stats = &adapter->stats;
   5647 
   5648 	/* Driver Statistics */
   5649 #if 0
   5650 	/* These counters are not updated by the software */
   5651 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
   5652 			CTLFLAG_RD, &adapter->dropped_pkts,
   5653 			"Driver dropped packets");
   5654 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_header_failed",
   5655 			CTLFLAG_RD, &adapter->mbuf_header_failed,
   5656 			"???");
   5657 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_packet_failed",
   5658 			CTLFLAG_RD, &adapter->mbuf_packet_failed,
   5659 			"???");
   5660 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "no_tx_map_avail",
   5661 			CTLFLAG_RD, &adapter->no_tx_map_avail,
   5662 			"???");
   5663 #endif
   5664 	evcnt_attach_dynamic(&adapter->handleq, EVCNT_TYPE_MISC,
   5665 	    NULL, device_xname(dev), "Handled queue in softint");
   5666 	evcnt_attach_dynamic(&adapter->req, EVCNT_TYPE_MISC,
   5667 	    NULL, device_xname(dev), "Requeued in softint");
   5668 	evcnt_attach_dynamic(&adapter->morerx, EVCNT_TYPE_MISC,
   5669 	    NULL, device_xname(dev), "Interrupt handler more rx");
   5670 	evcnt_attach_dynamic(&adapter->moretx, EVCNT_TYPE_MISC,
   5671 	    NULL, device_xname(dev), "Interrupt handler more tx");
   5672 	evcnt_attach_dynamic(&adapter->txloops, EVCNT_TYPE_MISC,
   5673 	    NULL, device_xname(dev), "Interrupt handler tx loops");
   5674 	evcnt_attach_dynamic(&adapter->efbig_tx_dma_setup, EVCNT_TYPE_MISC,
   5675 	    NULL, device_xname(dev), "Driver tx dma soft fail EFBIG");
   5676 	evcnt_attach_dynamic(&adapter->m_defrag_failed, EVCNT_TYPE_MISC,
   5677 	    NULL, device_xname(dev), "m_defrag() failed");
   5678 	evcnt_attach_dynamic(&adapter->efbig2_tx_dma_setup, EVCNT_TYPE_MISC,
   5679 	    NULL, device_xname(dev), "Driver tx dma hard fail EFBIG");
   5680 	evcnt_attach_dynamic(&adapter->einval_tx_dma_setup, EVCNT_TYPE_MISC,
   5681 	    NULL, device_xname(dev), "Driver tx dma hard fail EINVAL");
   5682 	evcnt_attach_dynamic(&adapter->other_tx_dma_setup, EVCNT_TYPE_MISC,
   5683 	    NULL, device_xname(dev), "Driver tx dma hard fail other");
   5684 	evcnt_attach_dynamic(&adapter->eagain_tx_dma_setup, EVCNT_TYPE_MISC,
   5685 	    NULL, device_xname(dev), "Driver tx dma soft fail EAGAIN");
   5686 	evcnt_attach_dynamic(&adapter->enomem_tx_dma_setup, EVCNT_TYPE_MISC,
   5687 	    NULL, device_xname(dev), "Driver tx dma soft fail ENOMEM");
   5688 	evcnt_attach_dynamic(&adapter->watchdog_events, EVCNT_TYPE_MISC,
   5689 	    NULL, device_xname(dev), "Watchdog timeouts");
   5690 	evcnt_attach_dynamic(&adapter->tso_err, EVCNT_TYPE_MISC,
   5691 	    NULL, device_xname(dev), "TSO errors");
   5692 	evcnt_attach_dynamic(&adapter->link_irq, EVCNT_TYPE_MISC,
   5693 	    NULL, device_xname(dev), "Link MSIX IRQ Handled");
   5694 
   5695 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
   5696 		snprintf(adapter->queues[i].evnamebuf,
   5697 		    sizeof(adapter->queues[i].evnamebuf), "%s queue%d",
   5698 		    device_xname(dev), i);
   5699 		snprintf(adapter->queues[i].namebuf,
   5700 		    sizeof(adapter->queues[i].namebuf), "queue%d", i);
   5701 
   5702 		if ((rnode = ixgbe_sysctl_instance(adapter)) == NULL) {
   5703 			aprint_error_dev(dev, "could not create sysctl root\n");
   5704 			break;
   5705 		}
   5706 
   5707 		if (sysctl_createv(log, 0, &rnode, &rnode,
   5708 		    0, CTLTYPE_NODE,
   5709 		    adapter->queues[i].namebuf, SYSCTL_DESCR("Queue Name"),
   5710 		    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL) != 0)
   5711 			break;
   5712 
   5713 		if (sysctl_createv(log, 0, &rnode, &cnode,
   5714 		    CTLFLAG_READWRITE, CTLTYPE_INT,
   5715 		    "interrupt_rate", SYSCTL_DESCR("Interrupt Rate"),
   5716 		    ixgbe_sysctl_interrupt_rate_handler, 0,
   5717 		    (void *)&adapter->queues[i], 0, CTL_CREATE, CTL_EOL) != 0)
   5718 			break;
   5719 
   5720 		if (sysctl_createv(log, 0, &rnode, &cnode,
   5721 		    CTLFLAG_READONLY, CTLTYPE_QUAD,
   5722 		    "irqs", SYSCTL_DESCR("irqs on this queue"),
   5723 			NULL, 0, &(adapter->queues[i].irqs),
   5724 		    0, CTL_CREATE, CTL_EOL) != 0)
   5725 			break;
   5726 
   5727 		if (sysctl_createv(log, 0, &rnode, &cnode,
   5728 		    CTLFLAG_READONLY, CTLTYPE_INT,
   5729 		    "txd_head", SYSCTL_DESCR("Transmit Descriptor Head"),
   5730 		    ixgbe_sysctl_tdh_handler, 0, (void *)txr,
   5731 		    0, CTL_CREATE, CTL_EOL) != 0)
   5732 			break;
   5733 
   5734 		if (sysctl_createv(log, 0, &rnode, &cnode,
   5735 		    CTLFLAG_READONLY, CTLTYPE_INT,
   5736 		    "txd_tail", SYSCTL_DESCR("Transmit Descriptor Tail"),
   5737 		    ixgbe_sysctl_tdt_handler, 0, (void *)txr,
   5738 		    0, CTL_CREATE, CTL_EOL) != 0)
   5739 			break;
   5740 
   5741 		evcnt_attach_dynamic(&txr->tso_tx, EVCNT_TYPE_MISC,
   5742 		    NULL, device_xname(dev), "TSO");
   5743 		evcnt_attach_dynamic(&txr->no_desc_avail, EVCNT_TYPE_MISC,
   5744 		    NULL, adapter->queues[i].evnamebuf,
   5745 		    "Queue No Descriptor Available");
   5746 		evcnt_attach_dynamic(&txr->total_packets, EVCNT_TYPE_MISC,
   5747 		    NULL, adapter->queues[i].evnamebuf,
   5748 		    "Queue Packets Transmitted");
   5749 
   5750 #ifdef LRO
   5751 		struct lro_ctrl *lro = &rxr->lro;
   5752 #endif /* LRO */
   5753 
   5754 		if (sysctl_createv(log, 0, &rnode, &cnode,
   5755 		    CTLFLAG_READONLY,
   5756 		    CTLTYPE_INT,
   5757 		    "rxd_head", SYSCTL_DESCR("Receive Descriptor Head"),
   5758 		    ixgbe_sysctl_rdh_handler, 0, (void *)rxr, 0,
   5759 		    CTL_CREATE, CTL_EOL) != 0)
   5760 			break;
   5761 
   5762 		if (sysctl_createv(log, 0, &rnode, &cnode,
   5763 		    CTLFLAG_READONLY,
   5764 		    CTLTYPE_INT,
   5765 		    "rxd_tail", SYSCTL_DESCR("Receive Descriptor Tail"),
   5766 		    ixgbe_sysctl_rdt_handler, 0, (void *)rxr, 0,
   5767 		    CTL_CREATE, CTL_EOL) != 0)
   5768 			break;
   5769 
   5770 		if (i < __arraycount(adapter->stats.mpc)) {
   5771 			evcnt_attach_dynamic(&adapter->stats.mpc[i],
   5772 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   5773 			    "Missed Packet Count");
   5774 		}
   5775 		if (i < __arraycount(adapter->stats.pxontxc)) {
   5776 			evcnt_attach_dynamic(&adapter->stats.pxontxc[i],
   5777 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   5778 			    "pxontxc");
   5779 			evcnt_attach_dynamic(&adapter->stats.pxonrxc[i],
   5780 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   5781 			    "pxonrxc");
   5782 			evcnt_attach_dynamic(&adapter->stats.pxofftxc[i],
   5783 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   5784 			    "pxofftxc");
   5785 			evcnt_attach_dynamic(&adapter->stats.pxoffrxc[i],
   5786 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   5787 			    "pxoffrxc");
   5788 			evcnt_attach_dynamic(&adapter->stats.pxon2offc[i],
   5789 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   5790 			    "pxon2offc");
   5791 		}
   5792 		if (i < __arraycount(adapter->stats.qprc)) {
   5793 			evcnt_attach_dynamic(&adapter->stats.qprc[i],
   5794 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   5795 			    "qprc");
   5796 			evcnt_attach_dynamic(&adapter->stats.qptc[i],
   5797 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   5798 			    "qptc");
   5799 			evcnt_attach_dynamic(&adapter->stats.qbrc[i],
   5800 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   5801 			    "qbrc");
   5802 			evcnt_attach_dynamic(&adapter->stats.qbtc[i],
   5803 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   5804 			    "qbtc");
   5805 			evcnt_attach_dynamic(&adapter->stats.qprdc[i],
   5806 			    EVCNT_TYPE_MISC, NULL, adapter->queues[i].evnamebuf,
   5807 			    "qprdc");
   5808 		}
   5809 
   5810 		evcnt_attach_dynamic(&rxr->rx_packets, EVCNT_TYPE_MISC,
   5811 		    NULL, adapter->queues[i].evnamebuf, "Queue Packets Received");
   5812 		evcnt_attach_dynamic(&rxr->rx_bytes, EVCNT_TYPE_MISC,
   5813 		    NULL, adapter->queues[i].evnamebuf, "Queue Bytes Received");
   5814 		evcnt_attach_dynamic(&rxr->rx_copies, EVCNT_TYPE_MISC,
   5815 		    NULL, adapter->queues[i].evnamebuf, "Copied RX Frames");
   5816 		evcnt_attach_dynamic(&rxr->no_jmbuf, EVCNT_TYPE_MISC,
   5817 		    NULL, adapter->queues[i].evnamebuf, "Rx no jumbo mbuf");
   5818 		evcnt_attach_dynamic(&rxr->rx_discarded, EVCNT_TYPE_MISC,
   5819 		    NULL, adapter->queues[i].evnamebuf, "Rx discarded");
   5820 		evcnt_attach_dynamic(&rxr->rx_irq, EVCNT_TYPE_MISC,
   5821 		    NULL, adapter->queues[i].evnamebuf, "Rx interrupts");
   5822 #ifdef LRO
   5823 		SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_queued",
   5824 				CTLFLAG_RD, &lro->lro_queued, 0,
   5825 				"LRO Queued");
   5826 		SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_flushed",
   5827 				CTLFLAG_RD, &lro->lro_flushed, 0,
   5828 				"LRO Flushed");
   5829 #endif /* LRO */
   5830 	}
   5831 
   5832 	/* MAC stats get the own sub node */
   5833 
   5834 
   5835 	snprintf(stats->namebuf,
   5836 	    sizeof(stats->namebuf), "%s MAC Statistics", device_xname(dev));
   5837 
   5838 	evcnt_attach_dynamic(&stats->ipcs, EVCNT_TYPE_MISC, NULL,
   5839 	    stats->namebuf, "rx csum offload - IP");
   5840 	evcnt_attach_dynamic(&stats->l4cs, EVCNT_TYPE_MISC, NULL,
   5841 	    stats->namebuf, "rx csum offload - L4");
   5842 	evcnt_attach_dynamic(&stats->ipcs_bad, EVCNT_TYPE_MISC, NULL,
   5843 	    stats->namebuf, "rx csum offload - IP bad");
   5844 	evcnt_attach_dynamic(&stats->l4cs_bad, EVCNT_TYPE_MISC, NULL,
   5845 	    stats->namebuf, "rx csum offload - L4 bad");
   5846 	evcnt_attach_dynamic(&stats->intzero, EVCNT_TYPE_MISC, NULL,
   5847 	    stats->namebuf, "Interrupt conditions zero");
   5848 	evcnt_attach_dynamic(&stats->legint, EVCNT_TYPE_MISC, NULL,
   5849 	    stats->namebuf, "Legacy interrupts");
   5850 	evcnt_attach_dynamic(&stats->crcerrs, EVCNT_TYPE_MISC, NULL,
   5851 	    stats->namebuf, "CRC Errors");
   5852 	evcnt_attach_dynamic(&stats->illerrc, EVCNT_TYPE_MISC, NULL,
   5853 	    stats->namebuf, "Illegal Byte Errors");
   5854 	evcnt_attach_dynamic(&stats->errbc, EVCNT_TYPE_MISC, NULL,
   5855 	    stats->namebuf, "Byte Errors");
   5856 	evcnt_attach_dynamic(&stats->mspdc, EVCNT_TYPE_MISC, NULL,
   5857 	    stats->namebuf, "MAC Short Packets Discarded");
   5858 	evcnt_attach_dynamic(&stats->mlfc, EVCNT_TYPE_MISC, NULL,
   5859 	    stats->namebuf, "MAC Local Faults");
   5860 	evcnt_attach_dynamic(&stats->mrfc, EVCNT_TYPE_MISC, NULL,
   5861 	    stats->namebuf, "MAC Remote Faults");
   5862 	evcnt_attach_dynamic(&stats->rlec, EVCNT_TYPE_MISC, NULL,
   5863 	    stats->namebuf, "Receive Length Errors");
   5864 	evcnt_attach_dynamic(&stats->lxontxc, EVCNT_TYPE_MISC, NULL,
   5865 	    stats->namebuf, "Link XON Transmitted");
   5866 	evcnt_attach_dynamic(&stats->lxonrxc, EVCNT_TYPE_MISC, NULL,
   5867 	    stats->namebuf, "Link XON Received");
   5868 	evcnt_attach_dynamic(&stats->lxofftxc, EVCNT_TYPE_MISC, NULL,
   5869 	    stats->namebuf, "Link XOFF Transmitted");
   5870 	evcnt_attach_dynamic(&stats->lxoffrxc, EVCNT_TYPE_MISC, NULL,
   5871 	    stats->namebuf, "Link XOFF Received");
   5872 
   5873 	/* Packet Reception Stats */
   5874 	evcnt_attach_dynamic(&stats->tor, EVCNT_TYPE_MISC, NULL,
   5875 	    stats->namebuf, "Total Octets Received");
   5876 	evcnt_attach_dynamic(&stats->gorc, EVCNT_TYPE_MISC, NULL,
   5877 	    stats->namebuf, "Good Octets Received");
   5878 	evcnt_attach_dynamic(&stats->tpr, EVCNT_TYPE_MISC, NULL,
   5879 	    stats->namebuf, "Total Packets Received");
   5880 	evcnt_attach_dynamic(&stats->gprc, EVCNT_TYPE_MISC, NULL,
   5881 	    stats->namebuf, "Good Packets Received");
   5882 	evcnt_attach_dynamic(&stats->mprc, EVCNT_TYPE_MISC, NULL,
   5883 	    stats->namebuf, "Multicast Packets Received");
   5884 	evcnt_attach_dynamic(&stats->bprc, EVCNT_TYPE_MISC, NULL,
   5885 	    stats->namebuf, "Broadcast Packets Received");
   5886 	evcnt_attach_dynamic(&stats->prc64, EVCNT_TYPE_MISC, NULL,
   5887 	    stats->namebuf, "64 byte frames received ");
   5888 	evcnt_attach_dynamic(&stats->prc127, EVCNT_TYPE_MISC, NULL,
   5889 	    stats->namebuf, "65-127 byte frames received");
   5890 	evcnt_attach_dynamic(&stats->prc255, EVCNT_TYPE_MISC, NULL,
   5891 	    stats->namebuf, "128-255 byte frames received");
   5892 	evcnt_attach_dynamic(&stats->prc511, EVCNT_TYPE_MISC, NULL,
   5893 	    stats->namebuf, "256-511 byte frames received");
   5894 	evcnt_attach_dynamic(&stats->prc1023, EVCNT_TYPE_MISC, NULL,
   5895 	    stats->namebuf, "512-1023 byte frames received");
   5896 	evcnt_attach_dynamic(&stats->prc1522, EVCNT_TYPE_MISC, NULL,
   5897 	    stats->namebuf, "1023-1522 byte frames received");
   5898 	evcnt_attach_dynamic(&stats->ruc, EVCNT_TYPE_MISC, NULL,
   5899 	    stats->namebuf, "Receive Undersized");
   5900 	evcnt_attach_dynamic(&stats->rfc, EVCNT_TYPE_MISC, NULL,
   5901 	    stats->namebuf, "Fragmented Packets Received ");
   5902 	evcnt_attach_dynamic(&stats->roc, EVCNT_TYPE_MISC, NULL,
   5903 	    stats->namebuf, "Oversized Packets Received");
   5904 	evcnt_attach_dynamic(&stats->rjc, EVCNT_TYPE_MISC, NULL,
   5905 	    stats->namebuf, "Received Jabber");
   5906 	evcnt_attach_dynamic(&stats->mngprc, EVCNT_TYPE_MISC, NULL,
   5907 	    stats->namebuf, "Management Packets Received");
   5908 	evcnt_attach_dynamic(&stats->xec, EVCNT_TYPE_MISC, NULL,
   5909 	    stats->namebuf, "Checksum Errors");
   5910 
   5911 	/* Packet Transmission Stats */
   5912 	evcnt_attach_dynamic(&stats->gotc, EVCNT_TYPE_MISC, NULL,
   5913 	    stats->namebuf, "Good Octets Transmitted");
   5914 	evcnt_attach_dynamic(&stats->tpt, EVCNT_TYPE_MISC, NULL,
   5915 	    stats->namebuf, "Total Packets Transmitted");
   5916 	evcnt_attach_dynamic(&stats->gptc, EVCNT_TYPE_MISC, NULL,
   5917 	    stats->namebuf, "Good Packets Transmitted");
   5918 	evcnt_attach_dynamic(&stats->bptc, EVCNT_TYPE_MISC, NULL,
   5919 	    stats->namebuf, "Broadcast Packets Transmitted");
   5920 	evcnt_attach_dynamic(&stats->mptc, EVCNT_TYPE_MISC, NULL,
   5921 	    stats->namebuf, "Multicast Packets Transmitted");
   5922 	evcnt_attach_dynamic(&stats->mngptc, EVCNT_TYPE_MISC, NULL,
   5923 	    stats->namebuf, "Management Packets Transmitted");
   5924 	evcnt_attach_dynamic(&stats->ptc64, EVCNT_TYPE_MISC, NULL,
   5925 	    stats->namebuf, "64 byte frames transmitted ");
   5926 	evcnt_attach_dynamic(&stats->ptc127, EVCNT_TYPE_MISC, NULL,
   5927 	    stats->namebuf, "65-127 byte frames transmitted");
   5928 	evcnt_attach_dynamic(&stats->ptc255, EVCNT_TYPE_MISC, NULL,
   5929 	    stats->namebuf, "128-255 byte frames transmitted");
   5930 	evcnt_attach_dynamic(&stats->ptc511, EVCNT_TYPE_MISC, NULL,
   5931 	    stats->namebuf, "256-511 byte frames transmitted");
   5932 	evcnt_attach_dynamic(&stats->ptc1023, EVCNT_TYPE_MISC, NULL,
   5933 	    stats->namebuf, "512-1023 byte frames transmitted");
   5934 	evcnt_attach_dynamic(&stats->ptc1522, EVCNT_TYPE_MISC, NULL,
   5935 	    stats->namebuf, "1024-1522 byte frames transmitted");
   5936 }
   5937 
   5938 /*
   5939 ** Set flow control using sysctl:
   5940 ** Flow control values:
   5941 ** 	0 - off
   5942 **	1 - rx pause
   5943 **	2 - tx pause
   5944 **	3 - full
   5945 */
   5946 static int
   5947 ixgbe_set_flowcntl(SYSCTLFN_ARGS)
   5948 {
   5949 	struct sysctlnode node;
   5950 	int error, last;
   5951 	struct adapter *adapter;
   5952 
   5953 	node = *rnode;
   5954 	adapter = (struct adapter *)node.sysctl_data;
   5955 	node.sysctl_data = &adapter->fc;
   5956 	last = adapter->fc;
   5957 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   5958 	if (error != 0 || newp == NULL)
   5959 		return error;
   5960 
   5961 	/* Don't bother if it's not changed */
   5962 	if (adapter->fc == last)
   5963 		return (0);
   5964 
   5965 	switch (adapter->fc) {
   5966 		case ixgbe_fc_rx_pause:
   5967 		case ixgbe_fc_tx_pause:
   5968 		case ixgbe_fc_full:
   5969 			adapter->hw.fc.requested_mode = adapter->fc;
   5970 			if (adapter->num_queues > 1)
   5971 				ixgbe_disable_rx_drop(adapter);
   5972 			break;
   5973 		case ixgbe_fc_none:
   5974 			adapter->hw.fc.requested_mode = ixgbe_fc_none;
   5975 			if (adapter->num_queues > 1)
   5976 				ixgbe_enable_rx_drop(adapter);
   5977 			break;
   5978 		default:
   5979 			adapter->fc = last;
   5980 			return (EINVAL);
   5981 	}
   5982 	/* Don't autoneg if forcing a value */
   5983 	adapter->hw.fc.disable_fc_autoneg = TRUE;
   5984 	ixgbe_fc_enable(&adapter->hw);
   5985 	return 0;
   5986 }
   5987 
   5988 /*
   5989 ** Control link advertise speed:
   5990 **	1 - advertise only 1G
   5991 **	2 - advertise 100Mb
   5992 **	3 - advertise normal
   5993 */
   5994 static int
   5995 ixgbe_set_advertise(SYSCTLFN_ARGS)
   5996 {
   5997 	struct sysctlnode	node;
   5998 	int			t, error = 0;
   5999 	struct adapter		*adapter;
   6000 	device_t		dev;
   6001 	struct ixgbe_hw		*hw;
   6002 	ixgbe_link_speed	speed, last;
   6003 
   6004 	node = *rnode;
   6005 	adapter = (struct adapter *)node.sysctl_data;
   6006 	dev = adapter->dev;
   6007 	hw = &adapter->hw;
   6008 	last = adapter->advertise;
   6009 	t = adapter->advertise;
   6010 	node.sysctl_data = &t;
   6011 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   6012 	if (error != 0 || newp == NULL)
   6013 		return error;
   6014 
   6015 	if (adapter->advertise == last) /* no change */
   6016 		return (0);
   6017 
   6018 	if (t == -1)
   6019 		return 0;
   6020 
   6021 	adapter->advertise = t;
   6022 
   6023 	if (!((hw->phy.media_type == ixgbe_media_type_copper) ||
   6024             (hw->phy.multispeed_fiber)))
   6025 		return (EINVAL);
   6026 
   6027 	if ((adapter->advertise == 2) && (hw->mac.type != ixgbe_mac_X540)) {
   6028 		device_printf(dev, "Set Advertise: 100Mb on X540 only\n");
   6029 		return (EINVAL);
   6030 	}
   6031 
   6032 	if (adapter->advertise == 1)
   6033                 speed = IXGBE_LINK_SPEED_1GB_FULL;
   6034 	else if (adapter->advertise == 2)
   6035                 speed = IXGBE_LINK_SPEED_100_FULL;
   6036 	else if (adapter->advertise == 3)
   6037                 speed = IXGBE_LINK_SPEED_1GB_FULL |
   6038 			IXGBE_LINK_SPEED_10GB_FULL;
   6039 	else {/* bogus value */
   6040 		adapter->advertise = last;
   6041 		return (EINVAL);
   6042 	}
   6043 
   6044 	hw->mac.autotry_restart = TRUE;
   6045 	hw->mac.ops.setup_link(hw, speed, TRUE);
   6046 
   6047 	return 0;
   6048 }
   6049 
   6050 /*
   6051 ** Thermal Shutdown Trigger
   6052 **   - cause a Thermal Overtemp IRQ
   6053 */
   6054 static int
   6055 ixgbe_set_thermal_test(SYSCTLFN_ARGS)
   6056 {
   6057 	struct sysctlnode node;
   6058 	int		error, fire = 0;
   6059 	struct adapter	*adapter;
   6060 	struct ixgbe_hw *hw;
   6061 
   6062 	node = *rnode;
   6063 	adapter = (struct adapter *)node.sysctl_data;
   6064 	hw = &adapter->hw;
   6065 
   6066 	if (hw->mac.type != ixgbe_mac_X540)
   6067 		return (0);
   6068 
   6069 	node.sysctl_data = &fire;
   6070 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   6071 	if ((error) || (newp == NULL))
   6072 		return (error);
   6073 
   6074 	if (fire) {
   6075 		u32 reg = IXGBE_READ_REG(hw, IXGBE_EICS);
   6076 		reg |= IXGBE_EICR_TS;
   6077 		IXGBE_WRITE_REG(hw, IXGBE_EICS, reg);
   6078 	}
   6079 
   6080 	return (0);
   6081 }
   6082 
   6083 /*
   6084 ** Enable the hardware to drop packets when the buffer is
   6085 ** full. This is useful when multiqueue,so that no single
   6086 ** queue being full stalls the entire RX engine. We only
   6087 ** enable this when Multiqueue AND when Flow Control is
   6088 ** disabled.
   6089 */
   6090 static void
   6091 ixgbe_enable_rx_drop(struct adapter *adapter)
   6092 {
   6093         struct ixgbe_hw *hw = &adapter->hw;
   6094 
   6095 	for (int i = 0; i < adapter->num_queues; i++) {
   6096         	u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
   6097         	srrctl |= IXGBE_SRRCTL_DROP_EN;
   6098         	IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
   6099 	}
   6100 }
   6101 
   6102 static void
   6103 ixgbe_disable_rx_drop(struct adapter *adapter)
   6104 {
   6105         struct ixgbe_hw *hw = &adapter->hw;
   6106 
   6107 	for (int i = 0; i < adapter->num_queues; i++) {
   6108         	u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
   6109         	srrctl &= ~IXGBE_SRRCTL_DROP_EN;
   6110         	IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
   6111 	}
   6112 }
   6113