Home | History | Annotate | Line # | Download | only in pci
if_xge.c revision 1.34
      1 /*      $NetBSD: if_xge.c,v 1.34 2020/03/01 15:54:18 thorpej Exp $ */
      2 
      3 /*
      4  * Copyright (c) 2004, SUNET, Swedish University Computer Network.
      5  * All rights reserved.
      6  *
      7  * Written by Anders Magnusson for SUNET, Swedish University Computer Network.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *      This product includes software developed for the NetBSD Project by
     20  *      SUNET, Swedish University Computer Network.
     21  * 4. The name of SUNET may not be used to endorse or promote products
     22  *    derived from this software without specific prior written permission.
     23  *
     24  * THIS SOFTWARE IS PROVIDED BY SUNET ``AS IS'' AND
     25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     26  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     27  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL SUNET
     28  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     29  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     30  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     31  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     32  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     33  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     34  * POSSIBILITY OF SUCH DAMAGE.
     35  */
     36 
     37 /*
     38  * Device driver for the S2io Xframe Ten Gigabit Ethernet controller.
     39  *
     40  * TODO (in no specific order):
     41  *	HW VLAN support.
     42  *	IPv6 HW cksum.
     43  */
     44 
     45 #include <sys/cdefs.h>
     46 __KERNEL_RCSID(0, "$NetBSD: if_xge.c,v 1.34 2020/03/01 15:54:18 thorpej Exp $");
     47 
     48 
     49 #include <sys/param.h>
     50 #include <sys/systm.h>
     51 #include <sys/mbuf.h>
     52 #include <sys/malloc.h>
     53 #include <sys/kernel.h>
     54 #include <sys/proc.h>
     55 #include <sys/socket.h>
     56 #include <sys/device.h>
     57 
     58 #include <net/if.h>
     59 #include <net/if_dl.h>
     60 #include <net/if_media.h>
     61 #include <net/if_ether.h>
     62 #include <net/bpf.h>
     63 
     64 #include <sys/bus.h>
     65 #include <sys/intr.h>
     66 #include <machine/endian.h>
     67 
     68 #include <dev/mii/mii.h>
     69 #include <dev/mii/miivar.h>
     70 
     71 #include <dev/pci/pcivar.h>
     72 #include <dev/pci/pcireg.h>
     73 #include <dev/pci/pcidevs.h>
     74 
     75 #include <dev/pci/if_xgereg.h>
     76 
     77 /*
     78  * Some tunable constants, tune with care!
     79  */
     80 #define RX_MODE		RX_MODE_1  /* Receive mode (buffer usage, see below) */
     81 #define NRXDESCS	1016	   /* # of receive descriptors (requested) */
     82 #define NTXDESCS	8192	   /* Number of transmit descriptors */
     83 #define NTXFRAGS	100	   /* Max fragments per packet */
     84 #define XGE_EVENT_COUNTERS	   /* Instrumentation */
     85 
     86 /*
     87  * Receive buffer modes; 1, 3 or 5 buffers.
     88  */
     89 #define RX_MODE_1 1
     90 #define RX_MODE_3 3
     91 #define RX_MODE_5 5
     92 
     93 /*
     94  * Use clever macros to avoid a bunch of #ifdef's.
     95  */
     96 #define XCONCAT3(x, y, z) x ## y ## z
     97 #define CONCAT3(x, y, z) XCONCAT3(x, y, z)
     98 #define NDESC_BUFMODE CONCAT3(NDESC_, RX_MODE, BUFMODE)
     99 #define rxd_4k CONCAT3(rxd, RX_MODE, _4k)
    100 #define rxdesc ___CONCAT(rxd, RX_MODE)
    101 
    102 #define NEXTTX(x)	(((x)+1) % NTXDESCS)
    103 #define NRXFRAGS	RX_MODE /* hardware imposed frags */
    104 #define NRXPAGES	((NRXDESCS/NDESC_BUFMODE)+1)
    105 #define NRXREAL		(NRXPAGES*NDESC_BUFMODE)
    106 #define RXMAPSZ		(NRXPAGES*PAGE_SIZE)
    107 
    108 #ifdef XGE_EVENT_COUNTERS
    109 #define XGE_EVCNT_INCR(ev)	(ev)->ev_count++
    110 #else
    111 #define XGE_EVCNT_INCR(ev)	/* nothing */
    112 #endif
    113 
    114 /*
    115  * Magics to fix a bug when the mac address can't be read correctly.
    116  * Comes from the Linux driver.
    117  */
    118 static uint64_t fix_mac[] = {
    119 	0x0060000000000000ULL, 0x0060600000000000ULL,
    120 	0x0040600000000000ULL, 0x0000600000000000ULL,
    121 	0x0020600000000000ULL, 0x0060600000000000ULL,
    122 	0x0020600000000000ULL, 0x0060600000000000ULL,
    123 	0x0020600000000000ULL, 0x0060600000000000ULL,
    124 	0x0020600000000000ULL, 0x0060600000000000ULL,
    125 	0x0020600000000000ULL, 0x0060600000000000ULL,
    126 	0x0020600000000000ULL, 0x0060600000000000ULL,
    127 	0x0020600000000000ULL, 0x0060600000000000ULL,
    128 	0x0020600000000000ULL, 0x0060600000000000ULL,
    129 	0x0020600000000000ULL, 0x0060600000000000ULL,
    130 	0x0020600000000000ULL, 0x0060600000000000ULL,
    131 	0x0020600000000000ULL, 0x0000600000000000ULL,
    132 	0x0040600000000000ULL, 0x0060600000000000ULL,
    133 };
    134 
    135 
    136 struct xge_softc {
    137 	device_t sc_dev;
    138 	struct ethercom sc_ethercom;
    139 #define sc_if sc_ethercom.ec_if
    140 	bus_dma_tag_t sc_dmat;
    141 	bus_space_tag_t sc_st;
    142 	bus_space_handle_t sc_sh;
    143 	bus_space_tag_t sc_txt;
    144 	bus_space_handle_t sc_txh;
    145 	void *sc_ih;
    146 
    147 	struct ifmedia xena_media;
    148 	pcireg_t sc_pciregs[16];
    149 
    150 	/* Transmit structures */
    151 	struct txd *sc_txd[NTXDESCS];	/* transmit frags array */
    152 	bus_addr_t sc_txdp[NTXDESCS];	/* bus address of transmit frags */
    153 	bus_dmamap_t sc_txm[NTXDESCS];	/* transmit frags map */
    154 	struct mbuf *sc_txb[NTXDESCS];	/* transmit mbuf pointer */
    155 	int sc_nexttx, sc_lasttx;
    156 	bus_dmamap_t sc_txmap;		/* transmit descriptor map */
    157 
    158 	/* Receive data */
    159 	bus_dmamap_t sc_rxmap;		/* receive descriptor map */
    160 	struct rxd_4k *sc_rxd_4k[NRXPAGES]; /* receive desc pages */
    161 	bus_dmamap_t sc_rxm[NRXREAL];	/* receive buffer map */
    162 	struct mbuf *sc_rxb[NRXREAL];	/* mbufs on receive descriptors */
    163 	int sc_nextrx;			/* next descriptor to check */
    164 
    165 #ifdef XGE_EVENT_COUNTERS
    166 	struct evcnt sc_intr;	/* # of interrupts */
    167 	struct evcnt sc_txintr;	/* # of transmit interrupts */
    168 	struct evcnt sc_rxintr;	/* # of receive interrupts */
    169 	struct evcnt sc_txqe;	/* # of xmit intrs when board queue empty */
    170 #endif
    171 };
    172 
    173 static int xge_match(device_t parent, cfdata_t cf, void *aux);
    174 static void xge_attach(device_t parent, device_t self, void *aux);
    175 static int xge_alloc_txmem(struct xge_softc *);
    176 static int xge_alloc_rxmem(struct xge_softc *);
    177 static void xge_start(struct ifnet *);
    178 static void xge_stop(struct ifnet *, int);
    179 static int xge_add_rxbuf(struct xge_softc *, int);
    180 static void xge_mcast_filter(struct xge_softc *sc);
    181 static int xge_setup_xgxs(struct xge_softc *sc);
    182 static int xge_ioctl(struct ifnet *ifp, u_long cmd, void *data);
    183 static int xge_init(struct ifnet *ifp);
    184 static void xge_ifmedia_status(struct ifnet *, struct ifmediareq *);
    185 static int xge_xgmii_mediachange(struct ifnet *);
    186 static int xge_intr(void  *);
    187 
    188 /*
    189  * Helpers to address registers.
    190  */
    191 #define PIF_WCSR(csr, val)	pif_wcsr(sc, csr, val)
    192 #define PIF_RCSR(csr)		pif_rcsr(sc, csr)
    193 #define TXP_WCSR(csr, val)	txp_wcsr(sc, csr, val)
    194 #define PIF_WKEY(csr, val)	pif_wkey(sc, csr, val)
    195 
    196 static inline void
    197 pif_wcsr(struct xge_softc *sc, bus_size_t csr, uint64_t val)
    198 {
    199 	uint32_t lval, hval;
    200 
    201 	lval = val&0xffffffff;
    202 	hval = val>>32;
    203 	bus_space_write_4(sc->sc_st, sc->sc_sh, csr, lval);
    204 	bus_space_write_4(sc->sc_st, sc->sc_sh, csr+4, hval);
    205 }
    206 
    207 static inline uint64_t
    208 pif_rcsr(struct xge_softc *sc, bus_size_t csr)
    209 {
    210 	uint64_t val, val2;
    211 	val = bus_space_read_4(sc->sc_st, sc->sc_sh, csr);
    212 	val2 = bus_space_read_4(sc->sc_st, sc->sc_sh, csr+4);
    213 	val |= (val2 << 32);
    214 	return val;
    215 }
    216 
    217 static inline void
    218 txp_wcsr(struct xge_softc *sc, bus_size_t csr, uint64_t val)
    219 {
    220 	uint32_t lval, hval;
    221 
    222 	lval = val&0xffffffff;
    223 	hval = val>>32;
    224 	bus_space_write_4(sc->sc_txt, sc->sc_txh, csr, lval);
    225 	bus_space_write_4(sc->sc_txt, sc->sc_txh, csr+4, hval);
    226 }
    227 
    228 
    229 static inline void
    230 pif_wkey(struct xge_softc *sc, bus_size_t csr, uint64_t val)
    231 {
    232 	uint32_t lval, hval;
    233 
    234 	lval = val&0xffffffff;
    235 	hval = val>>32;
    236 	PIF_WCSR(RMAC_CFG_KEY, RMAC_KEY_VALUE);
    237 	bus_space_write_4(sc->sc_st, sc->sc_sh, csr, lval);
    238 	PIF_WCSR(RMAC_CFG_KEY, RMAC_KEY_VALUE);
    239 	bus_space_write_4(sc->sc_st, sc->sc_sh, csr+4, hval);
    240 }
    241 
    242 
    243 CFATTACH_DECL_NEW(xge, sizeof(struct xge_softc),
    244     xge_match, xge_attach, NULL, NULL);
    245 
    246 #define XNAME device_xname(sc->sc_dev)
    247 
    248 #define XGE_RXSYNC(desc, what) \
    249 	bus_dmamap_sync(sc->sc_dmat, sc->sc_rxmap, \
    250 	(desc/NDESC_BUFMODE) * XGE_PAGE + sizeof(struct rxdesc) * \
    251 	(desc%NDESC_BUFMODE), sizeof(struct rxdesc), what)
    252 #define XGE_RXD(desc)	&sc->sc_rxd_4k[desc/NDESC_BUFMODE]-> \
    253 	r4_rxd[desc%NDESC_BUFMODE]
    254 
    255 /*
    256  * Non-tunable constants.
    257  */
    258 #define XGE_MAX_MTU		9600
    259 #define	XGE_IP_MAXPACKET	65535	/* same as IP_MAXPACKET */
    260 
    261 static int
    262 xge_match(device_t parent, cfdata_t cf, void *aux)
    263 {
    264 	struct pci_attach_args *pa = aux;
    265 
    266 	if (PCI_VENDOR(pa->pa_id) == PCI_VENDOR_S2IO &&
    267 	    PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_S2IO_XFRAME)
    268 		return 1;
    269 
    270 	return 0;
    271 }
    272 
    273 void
    274 xge_attach(device_t parent, device_t self, void *aux)
    275 {
    276 	struct pci_attach_args *pa = aux;
    277 	struct xge_softc *sc;
    278 	struct ifnet *ifp;
    279 	pcireg_t memtype;
    280 	pci_intr_handle_t ih;
    281 	const char *intrstr = NULL;
    282 	pci_chipset_tag_t pc = pa->pa_pc;
    283 	uint8_t enaddr[ETHER_ADDR_LEN];
    284 	uint64_t val;
    285 	int i;
    286 	char intrbuf[PCI_INTRSTR_LEN];
    287 
    288 	sc = device_private(self);
    289 	sc->sc_dev = self;
    290 
    291 	if (pci_dma64_available(pa))
    292 		sc->sc_dmat = pa->pa_dmat64;
    293 	else
    294 		sc->sc_dmat = pa->pa_dmat;
    295 
    296 	/* Get BAR0 address */
    297 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, XGE_PIF_BAR);
    298 	if (pci_mapreg_map(pa, XGE_PIF_BAR, memtype, 0,
    299 	    &sc->sc_st, &sc->sc_sh, 0, 0)) {
    300 		aprint_error("%s: unable to map PIF BAR registers\n", XNAME);
    301 		return;
    302 	}
    303 
    304 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, XGE_TXP_BAR);
    305 	if (pci_mapreg_map(pa, XGE_TXP_BAR, memtype, 0,
    306 	    &sc->sc_txt, &sc->sc_txh, 0, 0)) {
    307 		aprint_error("%s: unable to map TXP BAR registers\n", XNAME);
    308 		return;
    309 	}
    310 
    311 	/* Save PCI config space */
    312 	for (i = 0; i < 64; i += 4)
    313 		sc->sc_pciregs[i/4] = pci_conf_read(pa->pa_pc, pa->pa_tag, i);
    314 
    315 #if BYTE_ORDER == LITTLE_ENDIAN
    316 	val = (uint64_t)0xFFFFFFFFFFFFFFFFULL;
    317 	val &= ~(TxF_R_SE | RxF_W_SE);
    318 	PIF_WCSR(SWAPPER_CTRL, val);
    319 	PIF_WCSR(SWAPPER_CTRL, val);
    320 #elif BYTE_ORDER == BIG_ENDIAN
    321 	/* do nothing */
    322 #else
    323 #error bad endianness!
    324 #endif
    325 
    326 	if ((val = PIF_RCSR(PIF_RD_SWAPPER_Fb)) != SWAPPER_MAGIC) {
    327 		aprint_error("%s: failed configuring endian, %llx != %llx!\n",
    328 		    XNAME, (unsigned long long)val, SWAPPER_MAGIC);
    329 		return;
    330 	}
    331 
    332 	/*
    333 	 * The MAC addr may be all FF's, which is not good.
    334 	 * Resolve it by writing some magics to GPIO_CONTROL and
    335 	 * force a chip reset to read in the serial eeprom again.
    336 	 */
    337 	for (i = 0; i < sizeof(fix_mac)/sizeof(fix_mac[0]); i++) {
    338 		PIF_WCSR(GPIO_CONTROL, fix_mac[i]);
    339 		PIF_RCSR(GPIO_CONTROL);
    340 	}
    341 
    342 	/*
    343 	 * Reset the chip and restore the PCI registers.
    344 	 */
    345 	PIF_WCSR(SW_RESET, 0xa5a5a50000000000ULL);
    346 	DELAY(500000);
    347 	for (i = 0; i < 64; i += 4)
    348 		pci_conf_write(pa->pa_pc, pa->pa_tag, i, sc->sc_pciregs[i/4]);
    349 
    350 	/*
    351 	 * Restore the byte order registers.
    352 	 */
    353 #if BYTE_ORDER == LITTLE_ENDIAN
    354 	val = (uint64_t)0xFFFFFFFFFFFFFFFFULL;
    355 	val &= ~(TxF_R_SE | RxF_W_SE);
    356 	PIF_WCSR(SWAPPER_CTRL, val);
    357 	PIF_WCSR(SWAPPER_CTRL, val);
    358 #elif BYTE_ORDER == BIG_ENDIAN
    359 	/* do nothing */
    360 #else
    361 #error bad endianness!
    362 #endif
    363 
    364 	if ((val = PIF_RCSR(PIF_RD_SWAPPER_Fb)) != SWAPPER_MAGIC) {
    365 		aprint_error("%s: failed configuring endian2, %llx != %llx!\n",
    366 		    XNAME, (unsigned long long)val, SWAPPER_MAGIC);
    367 		return;
    368 	}
    369 
    370 	/*
    371 	 * XGXS initialization.
    372 	 */
    373 	/* 29, reset */
    374 	PIF_WCSR(SW_RESET, 0);
    375 	DELAY(500000);
    376 
    377 	/* 30, configure XGXS transceiver */
    378 	xge_setup_xgxs(sc);
    379 
    380 	/* 33, program MAC address (not needed here) */
    381 	/* Get ethernet address */
    382 	PIF_WCSR(RMAC_ADDR_CMD_MEM,
    383 	    RMAC_ADDR_CMD_MEM_STR | RMAC_ADDR_CMD_MEM_OFF(0));
    384 	while (PIF_RCSR(RMAC_ADDR_CMD_MEM) & RMAC_ADDR_CMD_MEM_STR)
    385 		;
    386 	val = PIF_RCSR(RMAC_ADDR_DATA0_MEM);
    387 	for (i = 0; i < ETHER_ADDR_LEN; i++)
    388 		enaddr[i] = (uint8_t)(val >> (56 - (8*i)));
    389 
    390 	/*
    391 	 * Get memory for transmit descriptor lists.
    392 	 */
    393 	if (xge_alloc_txmem(sc)) {
    394 		aprint_error("%s: failed allocating txmem.\n", XNAME);
    395 		return;
    396 	}
    397 
    398 	/* 9 and 10 - set FIFO number/prio */
    399 	PIF_WCSR(TX_FIFO_P0, TX_FIFO_LEN0(NTXDESCS));
    400 	PIF_WCSR(TX_FIFO_P1, 0ULL);
    401 	PIF_WCSR(TX_FIFO_P2, 0ULL);
    402 	PIF_WCSR(TX_FIFO_P3, 0ULL);
    403 
    404 	/* 11, XXX set round-robin prio? */
    405 
    406 	/* 12, enable transmit FIFO */
    407 	val = PIF_RCSR(TX_FIFO_P0);
    408 	val |= TX_FIFO_ENABLE;
    409 	PIF_WCSR(TX_FIFO_P0, val);
    410 
    411 	/* 13, disable some error checks */
    412 	PIF_WCSR(TX_PA_CFG,
    413 	    TX_PA_CFG_IFR | TX_PA_CFG_ISO | TX_PA_CFG_ILC | TX_PA_CFG_ILE);
    414 
    415 	/*
    416 	 * Create transmit DMA maps.
    417 	 * Make them large for TSO.
    418 	 */
    419 	for (i = 0; i < NTXDESCS; i++) {
    420 		if (bus_dmamap_create(sc->sc_dmat, XGE_IP_MAXPACKET,
    421 		    NTXFRAGS, MCLBYTES, 0, 0, &sc->sc_txm[i])) {
    422 			aprint_error("%s: cannot create TX DMA maps\n", XNAME);
    423 			return;
    424 		}
    425 	}
    426 
    427 	sc->sc_lasttx = NTXDESCS-1;
    428 
    429 	/*
    430 	 * RxDMA initialization.
    431 	 * Only use one out of 8 possible receive queues.
    432 	 */
    433 	if (xge_alloc_rxmem(sc)) {	/* allocate rx descriptor memory */
    434 		aprint_error("%s: failed allocating rxmem\n", XNAME);
    435 		return;
    436 	}
    437 
    438 	/* Create receive buffer DMA maps */
    439 	for (i = 0; i < NRXREAL; i++) {
    440 		if (bus_dmamap_create(sc->sc_dmat, XGE_MAX_MTU,
    441 		    NRXFRAGS, MCLBYTES, 0, 0, &sc->sc_rxm[i])) {
    442 			aprint_error("%s: cannot create RX DMA maps\n", XNAME);
    443 			return;
    444 		}
    445 	}
    446 
    447 	/* allocate mbufs to receive descriptors */
    448 	for (i = 0; i < NRXREAL; i++)
    449 		if (xge_add_rxbuf(sc, i))
    450 			panic("out of mbufs too early");
    451 
    452 	/* 14, setup receive ring priority */
    453 	PIF_WCSR(RX_QUEUE_PRIORITY, 0ULL); /* only use one ring */
    454 
    455 	/* 15, setup receive ring round-robin calendar */
    456 	PIF_WCSR(RX_W_ROUND_ROBIN_0, 0ULL); /* only use one ring */
    457 	PIF_WCSR(RX_W_ROUND_ROBIN_1, 0ULL);
    458 	PIF_WCSR(RX_W_ROUND_ROBIN_2, 0ULL);
    459 	PIF_WCSR(RX_W_ROUND_ROBIN_3, 0ULL);
    460 	PIF_WCSR(RX_W_ROUND_ROBIN_4, 0ULL);
    461 
    462 	/* 16, write receive ring start address */
    463 	PIF_WCSR(PRC_RXD0_0, (uint64_t)sc->sc_rxmap->dm_segs[0].ds_addr);
    464 	/* PRC_RXD0_[1-7] are not used */
    465 
    466 	/* 17, Setup alarm registers */
    467 	PIF_WCSR(PRC_ALARM_ACTION, 0ULL); /* Default everything to retry */
    468 
    469 	/* 18, init receive ring controller */
    470 #if RX_MODE == RX_MODE_1
    471 	val = RING_MODE_1;
    472 #elif RX_MODE == RX_MODE_3
    473 	val = RING_MODE_3;
    474 #else /* RX_MODE == RX_MODE_5 */
    475 	val = RING_MODE_5;
    476 #endif
    477 	PIF_WCSR(PRC_CTRL_0, RC_IN_SVC | val);
    478 	/* leave 1-7 disabled */
    479 	/* XXXX snoop configuration? */
    480 
    481 	/* 19, set chip memory assigned to the queue */
    482 	PIF_WCSR(RX_QUEUE_CFG, MC_QUEUE(0, 64)); /* all 64M to queue 0 */
    483 
    484 	/* 20, setup RLDRAM parameters */
    485 	/* do not touch it for now */
    486 
    487 	/* 21, setup pause frame thresholds */
    488 	/* so not touch the defaults */
    489 	/* XXX - must 0xff be written as stated in the manual? */
    490 
    491 	/* 22, configure RED */
    492 	/* we do not want to drop packets, so ignore */
    493 
    494 	/* 23, initiate RLDRAM */
    495 	val = PIF_RCSR(MC_RLDRAM_MRS);
    496 	val |= MC_QUEUE_SIZE_ENABLE | MC_RLDRAM_MRS_ENABLE;
    497 	PIF_WCSR(MC_RLDRAM_MRS, val);
    498 	DELAY(1000);
    499 
    500 	/*
    501 	 * Setup interrupt policies.
    502 	 */
    503 	/* 40, Transmit interrupts */
    504 	PIF_WCSR(TTI_DATA1_MEM, TX_TIMER_VAL(0x1ff) | TX_TIMER_AC |
    505 	    TX_URNG_A(5) | TX_URNG_B(20) | TX_URNG_C(48));
    506 	PIF_WCSR(TTI_DATA2_MEM,
    507 	    TX_UFC_A(25) | TX_UFC_B(64) | TX_UFC_C(128) | TX_UFC_D(512));
    508 	PIF_WCSR(TTI_COMMAND_MEM, TTI_CMD_MEM_WE | TTI_CMD_MEM_STROBE);
    509 	while (PIF_RCSR(TTI_COMMAND_MEM) & TTI_CMD_MEM_STROBE)
    510 		;
    511 
    512 	/* 41, Receive interrupts */
    513 	PIF_WCSR(RTI_DATA1_MEM, RX_TIMER_VAL(0x800) | RX_TIMER_AC |
    514 	    RX_URNG_A(5) | RX_URNG_B(20) | RX_URNG_C(50));
    515 	PIF_WCSR(RTI_DATA2_MEM,
    516 	    RX_UFC_A(64) | RX_UFC_B(128) | RX_UFC_C(256) | RX_UFC_D(512));
    517 	PIF_WCSR(RTI_COMMAND_MEM, RTI_CMD_MEM_WE | RTI_CMD_MEM_STROBE);
    518 	while (PIF_RCSR(RTI_COMMAND_MEM) & RTI_CMD_MEM_STROBE)
    519 		;
    520 
    521 	/*
    522 	 * Setup media stuff.
    523 	 */
    524 	sc->sc_ethercom.ec_ifmedia = &sc->xena_media;
    525 	ifmedia_init(&sc->xena_media, IFM_IMASK, xge_xgmii_mediachange,
    526 	    xge_ifmedia_status);
    527 	ifmedia_add(&sc->xena_media, IFM_ETHER | IFM_10G_LR, 0, NULL);
    528 	ifmedia_set(&sc->xena_media, IFM_ETHER | IFM_10G_LR);
    529 
    530 	aprint_normal("%s: Ethernet address %s\n", XNAME,
    531 	    ether_sprintf(enaddr));
    532 
    533 	ifp = &sc->sc_ethercom.ec_if;
    534 	strlcpy(ifp->if_xname, device_xname(sc->sc_dev), IFNAMSIZ);
    535 	ifp->if_baudrate = 10000000000LL;
    536 	ifp->if_init = xge_init;
    537 	ifp->if_stop = xge_stop;
    538 	ifp->if_softc = sc;
    539 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
    540 	ifp->if_ioctl = xge_ioctl;
    541 	ifp->if_start = xge_start;
    542 	IFQ_SET_MAXLEN(&ifp->if_snd, uimax(NTXDESCS - 1, IFQ_MAXLEN));
    543 	IFQ_SET_READY(&ifp->if_snd);
    544 
    545 	/*
    546 	 * Offloading capabilities.
    547 	 */
    548 	sc->sc_ethercom.ec_capabilities |=
    549 	    ETHERCAP_JUMBO_MTU | ETHERCAP_VLAN_MTU;
    550 	ifp->if_capabilities |=
    551 	    IFCAP_CSUM_IPv4_Rx | IFCAP_CSUM_IPv4_Tx |
    552 	    IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_TCPv4_Tx |
    553 	    IFCAP_CSUM_UDPv4_Rx | IFCAP_CSUM_UDPv4_Tx | IFCAP_TSOv4;
    554 
    555 	/*
    556 	 * Attach the interface.
    557 	 */
    558 	if_attach(ifp);
    559 	if_deferred_start_init(ifp, NULL);
    560 	ether_ifattach(ifp, enaddr);
    561 
    562 	/*
    563 	 * Setup interrupt vector before initializing.
    564 	 */
    565 	if (pci_intr_map(pa, &ih)) {
    566 		aprint_error_dev(sc->sc_dev, "unable to map interrupt\n");
    567 		return;
    568 	}
    569 	intrstr = pci_intr_string(pc, ih, intrbuf, sizeof(intrbuf));
    570 	sc->sc_ih = pci_intr_establish_xname(pc, ih, IPL_NET, xge_intr, sc,
    571 	    device_xname(self));
    572 	if (sc->sc_ih == NULL) {
    573 		aprint_error_dev(sc->sc_dev,
    574 		    "unable to establish interrupt at %s\n",
    575 		    intrstr ? intrstr : "<unknown>");
    576 		return;
    577 	}
    578 	aprint_normal_dev(sc->sc_dev, "interrupting at %s\n", intrstr);
    579 
    580 #ifdef XGE_EVENT_COUNTERS
    581 	evcnt_attach_dynamic(&sc->sc_intr, EVCNT_TYPE_MISC,
    582 	    NULL, XNAME, "intr");
    583 	evcnt_attach_dynamic(&sc->sc_txintr, EVCNT_TYPE_MISC,
    584 	    NULL, XNAME, "txintr");
    585 	evcnt_attach_dynamic(&sc->sc_rxintr, EVCNT_TYPE_MISC,
    586 	    NULL, XNAME, "rxintr");
    587 	evcnt_attach_dynamic(&sc->sc_txqe, EVCNT_TYPE_MISC,
    588 	    NULL, XNAME, "txqe");
    589 #endif
    590 }
    591 
    592 void
    593 xge_ifmedia_status(struct ifnet *ifp, struct ifmediareq *ifmr)
    594 {
    595 	struct xge_softc *sc = ifp->if_softc;
    596 	uint64_t reg;
    597 
    598 	ifmr->ifm_status = IFM_AVALID;
    599 	ifmr->ifm_active = IFM_ETHER | IFM_10G_LR;
    600 
    601 	reg = PIF_RCSR(ADAPTER_STATUS);
    602 	if ((reg & (RMAC_REMOTE_FAULT | RMAC_LOCAL_FAULT)) == 0)
    603 		ifmr->ifm_status |= IFM_ACTIVE;
    604 }
    605 
    606 int
    607 xge_xgmii_mediachange(struct ifnet *ifp)
    608 {
    609 	return 0;
    610 }
    611 
    612 static void
    613 xge_enable(struct xge_softc *sc)
    614 {
    615 	uint64_t val;
    616 
    617 	/* 2, enable adapter */
    618 	val = PIF_RCSR(ADAPTER_CONTROL);
    619 	val |= ADAPTER_EN;
    620 	PIF_WCSR(ADAPTER_CONTROL, val);
    621 
    622 	/* 3, light the card enable led */
    623 	val = PIF_RCSR(ADAPTER_CONTROL);
    624 	val |= LED_ON;
    625 	PIF_WCSR(ADAPTER_CONTROL, val);
    626 	printf("%s: link up\n", XNAME);
    627 
    628 }
    629 
    630 int
    631 xge_init(struct ifnet *ifp)
    632 {
    633 	struct xge_softc *sc = ifp->if_softc;
    634 	uint64_t val;
    635 
    636 	if (ifp->if_flags & IFF_RUNNING)
    637 		return 0;
    638 
    639 	/* 31+32, setup MAC config */
    640 	PIF_WKEY(MAC_CFG, TMAC_EN | RMAC_EN | TMAC_APPEND_PAD |
    641 	    RMAC_STRIP_FCS | RMAC_BCAST_EN | RMAC_DISCARD_PFRM | RMAC_PROM_EN);
    642 
    643 	DELAY(1000);
    644 
    645 	/* 54, ensure that the adapter is 'quiescent' */
    646 	val = PIF_RCSR(ADAPTER_STATUS);
    647 	if ((val & QUIESCENT) != QUIESCENT) {
    648 		char buf[200];
    649 		printf("%s: adapter not quiescent, aborting\n", XNAME);
    650 		val = (val & QUIESCENT) ^ QUIESCENT;
    651 		snprintb(buf, sizeof buf, QUIESCENT_BMSK, val);
    652 		printf("%s: ADAPTER_STATUS missing bits %s\n", XNAME, buf);
    653 		return 1;
    654 	}
    655 
    656 	/* 56, enable the transmit laser */
    657 	val = PIF_RCSR(ADAPTER_CONTROL);
    658 	val |= EOI_TX_ON;
    659 	PIF_WCSR(ADAPTER_CONTROL, val);
    660 
    661 	xge_enable(sc);
    662 	/*
    663 	 * Enable all interrupts
    664 	 */
    665 	PIF_WCSR(TX_TRAFFIC_MASK, 0);
    666 	PIF_WCSR(RX_TRAFFIC_MASK, 0);
    667 	PIF_WCSR(GENERAL_INT_MASK, 0);
    668 	PIF_WCSR(TXPIC_INT_MASK, 0);
    669 	PIF_WCSR(RXPIC_INT_MASK, 0);
    670 	PIF_WCSR(MAC_INT_MASK, MAC_TMAC_INT); /* only from RMAC */
    671 	PIF_WCSR(MAC_RMAC_ERR_MASK, ~RMAC_LINK_STATE_CHANGE_INT);
    672 
    673 
    674 	/* Done... */
    675 	ifp->if_flags |= IFF_RUNNING;
    676 	ifp->if_flags &= ~IFF_OACTIVE;
    677 
    678 	return 0;
    679 }
    680 
    681 static void
    682 xge_stop(struct ifnet *ifp, int disable)
    683 {
    684 	struct xge_softc *sc = ifp->if_softc;
    685 	uint64_t val;
    686 
    687 	val = PIF_RCSR(ADAPTER_CONTROL);
    688 	val &= ~ADAPTER_EN;
    689 	PIF_WCSR(ADAPTER_CONTROL, val);
    690 
    691 	while ((PIF_RCSR(ADAPTER_STATUS) & QUIESCENT) != QUIESCENT)
    692 		;
    693 }
    694 
    695 int
    696 xge_intr(void *pv)
    697 {
    698 	struct xge_softc *sc = pv;
    699 	struct txd *txd;
    700 	struct ifnet *ifp = &sc->sc_if;
    701 	bus_dmamap_t dmp;
    702 	uint64_t val;
    703 	int i, lasttx, plen;
    704 
    705 	val = PIF_RCSR(GENERAL_INT_STATUS);
    706 	if (val == 0)
    707 		return 0; /* no interrupt here */
    708 
    709 	XGE_EVCNT_INCR(&sc->sc_intr);
    710 
    711 	PIF_WCSR(GENERAL_INT_STATUS, val);
    712 
    713 	if ((val = PIF_RCSR(MAC_RMAC_ERR_REG)) & RMAC_LINK_STATE_CHANGE_INT) {
    714 		/* Wait for quiescence */
    715 		printf("%s: link down\n", XNAME);
    716 		while ((PIF_RCSR(ADAPTER_STATUS) & QUIESCENT) != QUIESCENT)
    717 			;
    718 		PIF_WCSR(MAC_RMAC_ERR_REG, RMAC_LINK_STATE_CHANGE_INT);
    719 
    720 		val = PIF_RCSR(ADAPTER_STATUS);
    721 		if ((val & (RMAC_REMOTE_FAULT | RMAC_LOCAL_FAULT)) == 0)
    722 			xge_enable(sc); /* Only if link restored */
    723 	}
    724 
    725 	if ((val = PIF_RCSR(TX_TRAFFIC_INT))) {
    726 		XGE_EVCNT_INCR(&sc->sc_txintr);
    727 		PIF_WCSR(TX_TRAFFIC_INT, val); /* clear interrupt bits */
    728 	}
    729 	/*
    730 	 * Collect sent packets.
    731 	 */
    732 	lasttx = sc->sc_lasttx;
    733 	while ((i = NEXTTX(sc->sc_lasttx)) != sc->sc_nexttx) {
    734 		txd = sc->sc_txd[i];
    735 		dmp = sc->sc_txm[i];
    736 
    737 		bus_dmamap_sync(sc->sc_dmat, dmp, 0,
    738 		    dmp->dm_mapsize,
    739 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
    740 
    741 		if (txd->txd_control1 & TXD_CTL1_OWN) {
    742 			bus_dmamap_sync(sc->sc_dmat, dmp, 0,
    743 			    dmp->dm_mapsize, BUS_DMASYNC_PREREAD);
    744 			break;
    745 		}
    746 		bus_dmamap_unload(sc->sc_dmat, dmp);
    747 		m_freem(sc->sc_txb[i]);
    748 		if_statinc(ifp, if_opackets);
    749 		sc->sc_lasttx = i;
    750 	}
    751 	if (i == sc->sc_nexttx) {
    752 		XGE_EVCNT_INCR(&sc->sc_txqe);
    753 	}
    754 
    755 	if (sc->sc_lasttx != lasttx)
    756 		ifp->if_flags &= ~IFF_OACTIVE;
    757 
    758 	/* Try to get more packets on the wire */
    759 	if_schedule_deferred_start(ifp);
    760 
    761 	if ((val = PIF_RCSR(RX_TRAFFIC_INT))) {
    762 		XGE_EVCNT_INCR(&sc->sc_rxintr);
    763 		PIF_WCSR(RX_TRAFFIC_INT, val); /* Clear interrupt bits */
    764 	}
    765 
    766 	for (;;) {
    767 		struct rxdesc *rxd;
    768 		struct mbuf *m;
    769 
    770 		XGE_RXSYNC(sc->sc_nextrx,
    771 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
    772 
    773 		rxd = XGE_RXD(sc->sc_nextrx);
    774 		if (rxd->rxd_control1 & RXD_CTL1_OWN) {
    775 			XGE_RXSYNC(sc->sc_nextrx, BUS_DMASYNC_PREREAD);
    776 			break;
    777 		}
    778 
    779 		/* Got a packet */
    780 		m = sc->sc_rxb[sc->sc_nextrx];
    781 #if RX_MODE == RX_MODE_1
    782 		plen = m->m_len = RXD_CTL2_BUF0SIZ(rxd->rxd_control2);
    783 #elif RX_MODE == RX_MODE_3
    784 #error Fix rxmodes in xge_intr
    785 #elif RX_MODE == RX_MODE_5
    786 		plen = m->m_len = RXD_CTL2_BUF0SIZ(rxd->rxd_control2);
    787 		plen += m->m_next->m_len = RXD_CTL2_BUF1SIZ(rxd->rxd_control2);
    788 		plen += m->m_next->m_next->m_len =
    789 		    RXD_CTL2_BUF2SIZ(rxd->rxd_control2);
    790 		plen += m->m_next->m_next->m_next->m_len =
    791 		    RXD_CTL3_BUF3SIZ(rxd->rxd_control3);
    792 		plen += m->m_next->m_next->m_next->m_next->m_len =
    793 		    RXD_CTL3_BUF4SIZ(rxd->rxd_control3);
    794 #endif
    795 		m_set_rcvif(m, ifp);
    796 		m->m_pkthdr.len = plen;
    797 
    798 		val = rxd->rxd_control1;
    799 
    800 		if (xge_add_rxbuf(sc, sc->sc_nextrx)) {
    801 			/* Failed, recycle this mbuf */
    802 #if RX_MODE == RX_MODE_1
    803 			rxd->rxd_control2 = RXD_MKCTL2(MCLBYTES, 0, 0);
    804 			rxd->rxd_control1 = RXD_CTL1_OWN;
    805 #elif RX_MODE == RX_MODE_3
    806 #elif RX_MODE == RX_MODE_5
    807 #endif
    808 			XGE_RXSYNC(sc->sc_nextrx,
    809 			    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
    810 			if_statinc(ifp, if_ierrors);
    811 			break;
    812 		}
    813 
    814 		if (RXD_CTL1_PROTOS(val) & (RXD_CTL1_P_IPv4|RXD_CTL1_P_IPv6)) {
    815 			m->m_pkthdr.csum_flags |= M_CSUM_IPv4;
    816 			if (RXD_CTL1_L3CSUM(val) != 0xffff)
    817 				m->m_pkthdr.csum_flags |= M_CSUM_IPv4_BAD;
    818 		}
    819 		if (RXD_CTL1_PROTOS(val) & RXD_CTL1_P_TCP) {
    820 			m->m_pkthdr.csum_flags |= M_CSUM_TCPv4 | M_CSUM_TCPv6;
    821 			if (RXD_CTL1_L4CSUM(val) != 0xffff)
    822 				m->m_pkthdr.csum_flags |= M_CSUM_TCP_UDP_BAD;
    823 		}
    824 		if (RXD_CTL1_PROTOS(val) & RXD_CTL1_P_UDP) {
    825 			m->m_pkthdr.csum_flags |= M_CSUM_UDPv4 | M_CSUM_UDPv6;
    826 			if (RXD_CTL1_L4CSUM(val) != 0xffff)
    827 				m->m_pkthdr.csum_flags |= M_CSUM_TCP_UDP_BAD;
    828 		}
    829 
    830 		if_percpuq_enqueue(ifp->if_percpuq, m);
    831 
    832 		if (++sc->sc_nextrx == NRXREAL)
    833 			sc->sc_nextrx = 0;
    834 
    835 	}
    836 
    837 	return 0;
    838 }
    839 
    840 int
    841 xge_ioctl(struct ifnet *ifp, u_long cmd, void *data)
    842 {
    843 	struct xge_softc *sc = ifp->if_softc;
    844 	struct ifreq *ifr = (struct ifreq *) data;
    845 	int s, error = 0;
    846 
    847 	s = splnet();
    848 
    849 	switch (cmd) {
    850 	case SIOCSIFMTU:
    851 		if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > XGE_MAX_MTU)
    852 			error = EINVAL;
    853 		else if ((error = ifioctl_common(ifp, cmd, data))
    854 		    == ENETRESET) {
    855 			PIF_WCSR(RMAC_MAX_PYLD_LEN,
    856 			    RMAC_PYLD_LEN(ifr->ifr_mtu));
    857 			error = 0;
    858 		}
    859 		break;
    860 
    861 	default:
    862 		if ((error = ether_ioctl(ifp, cmd, data)) != ENETRESET)
    863 			break;
    864 
    865 		error = 0;
    866 
    867 		if (cmd != SIOCADDMULTI && cmd != SIOCDELMULTI)
    868 			;
    869 		else if (ifp->if_flags & IFF_RUNNING) {
    870 			/* Change multicast list */
    871 			xge_mcast_filter(sc);
    872 		}
    873 		break;
    874 	}
    875 
    876 	splx(s);
    877 	return error;
    878 }
    879 
    880 void
    881 xge_mcast_filter(struct xge_softc *sc)
    882 {
    883 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
    884 	struct ethercom *ec = &sc->sc_ethercom;
    885 	struct ether_multi *enm;
    886 	struct ether_multistep step;
    887 	int i, numaddr = 1; /* first slot used for card unicast address */
    888 	uint64_t val;
    889 
    890 	ETHER_LOCK(ec);
    891 	ETHER_FIRST_MULTI(step, ec, enm);
    892 	while (enm != NULL) {
    893 		if (memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN)) {
    894 			/* Skip ranges */
    895 			ETHER_UNLOCK(ec);
    896 			goto allmulti;
    897 		}
    898 		if (numaddr == MAX_MCAST_ADDR) {
    899 			ETHER_UNLOCK(ec);
    900 			goto allmulti;
    901 		}
    902 		for (val = 0, i = 0; i < ETHER_ADDR_LEN; i++) {
    903 			val <<= 8;
    904 			val |= enm->enm_addrlo[i];
    905 		}
    906 		PIF_WCSR(RMAC_ADDR_DATA0_MEM, val << 16);
    907 		PIF_WCSR(RMAC_ADDR_DATA1_MEM, 0xFFFFFFFFFFFFFFFFULL);
    908 		PIF_WCSR(RMAC_ADDR_CMD_MEM, RMAC_ADDR_CMD_MEM_WE |
    909 		    RMAC_ADDR_CMD_MEM_STR | RMAC_ADDR_CMD_MEM_OFF(numaddr));
    910 		while (PIF_RCSR(RMAC_ADDR_CMD_MEM) & RMAC_ADDR_CMD_MEM_STR)
    911 			;
    912 		numaddr++;
    913 		ETHER_NEXT_MULTI(step, enm);
    914 	}
    915 	ETHER_UNLOCK(ec);
    916 	/* set the remaining entries to the broadcast address */
    917 	for (i = numaddr; i < MAX_MCAST_ADDR; i++) {
    918 		PIF_WCSR(RMAC_ADDR_DATA0_MEM, 0xffffffffffff0000ULL);
    919 		PIF_WCSR(RMAC_ADDR_DATA1_MEM, 0xFFFFFFFFFFFFFFFFULL);
    920 		PIF_WCSR(RMAC_ADDR_CMD_MEM, RMAC_ADDR_CMD_MEM_WE |
    921 		    RMAC_ADDR_CMD_MEM_STR | RMAC_ADDR_CMD_MEM_OFF(i));
    922 		while (PIF_RCSR(RMAC_ADDR_CMD_MEM) & RMAC_ADDR_CMD_MEM_STR)
    923 			;
    924 	}
    925 	ifp->if_flags &= ~IFF_ALLMULTI;
    926 	return;
    927 
    928 allmulti:
    929 	/* Just receive everything with the multicast bit set */
    930 	ifp->if_flags |= IFF_ALLMULTI;
    931 	PIF_WCSR(RMAC_ADDR_DATA0_MEM, 0x8000000000000000ULL);
    932 	PIF_WCSR(RMAC_ADDR_DATA1_MEM, 0xF000000000000000ULL);
    933 	PIF_WCSR(RMAC_ADDR_CMD_MEM, RMAC_ADDR_CMD_MEM_WE |
    934 	    RMAC_ADDR_CMD_MEM_STR | RMAC_ADDR_CMD_MEM_OFF(1));
    935 	while (PIF_RCSR(RMAC_ADDR_CMD_MEM) & RMAC_ADDR_CMD_MEM_STR)
    936 		;
    937 }
    938 
    939 void
    940 xge_start(struct ifnet *ifp)
    941 {
    942 	struct xge_softc *sc = ifp->if_softc;
    943 	struct txd *txd = NULL; /* XXX - gcc */
    944 	bus_dmamap_t dmp;
    945 	struct	mbuf *m;
    946 	uint64_t par, lcr;
    947 	int nexttx = 0, ntxd, error, i;
    948 
    949 	if ((ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING)
    950 		return;
    951 
    952 	par = lcr = 0;
    953 	for (;;) {
    954 		IFQ_POLL(&ifp->if_snd, m);
    955 		if (m == NULL)
    956 			break;	/* out of packets */
    957 
    958 		if (sc->sc_nexttx == sc->sc_lasttx)
    959 			break;	/* No more space */
    960 
    961 		nexttx = sc->sc_nexttx;
    962 		dmp = sc->sc_txm[nexttx];
    963 
    964 		if ((error = bus_dmamap_load_mbuf(sc->sc_dmat, dmp, m,
    965 		    BUS_DMA_WRITE | BUS_DMA_NOWAIT)) != 0) {
    966 			printf("%s: bus_dmamap_load_mbuf error %d\n",
    967 			    XNAME, error);
    968 			break;
    969 		}
    970 		IFQ_DEQUEUE(&ifp->if_snd, m);
    971 
    972 		bus_dmamap_sync(sc->sc_dmat, dmp, 0, dmp->dm_mapsize,
    973 		    BUS_DMASYNC_PREWRITE);
    974 
    975 		txd = sc->sc_txd[nexttx];
    976 		sc->sc_txb[nexttx] = m;
    977 		for (i = 0; i < dmp->dm_nsegs; i++) {
    978 			if (dmp->dm_segs[i].ds_len == 0)
    979 				continue;
    980 			txd->txd_control1 = dmp->dm_segs[i].ds_len;
    981 			txd->txd_control2 = 0;
    982 			txd->txd_bufaddr = dmp->dm_segs[i].ds_addr;
    983 			txd++;
    984 		}
    985 		ntxd = txd - sc->sc_txd[nexttx] - 1;
    986 		txd = sc->sc_txd[nexttx];
    987 		txd->txd_control1 |= TXD_CTL1_OWN | TXD_CTL1_GCF;
    988 		txd->txd_control2 = TXD_CTL2_UTIL;
    989 		if (m->m_pkthdr.csum_flags & M_CSUM_TSOv4) {
    990 			txd->txd_control1 |= TXD_CTL1_MSS(m->m_pkthdr.segsz);
    991 			txd->txd_control1 |= TXD_CTL1_LSO;
    992 		}
    993 
    994 		if (m->m_pkthdr.csum_flags & M_CSUM_IPv4)
    995 			txd->txd_control2 |= TXD_CTL2_CIPv4;
    996 		if (m->m_pkthdr.csum_flags & M_CSUM_TCPv4)
    997 			txd->txd_control2 |= TXD_CTL2_CTCP;
    998 		if (m->m_pkthdr.csum_flags & M_CSUM_UDPv4)
    999 			txd->txd_control2 |= TXD_CTL2_CUDP;
   1000 		txd[ntxd].txd_control1 |= TXD_CTL1_GCL;
   1001 
   1002 		bus_dmamap_sync(sc->sc_dmat, dmp, 0, dmp->dm_mapsize,
   1003 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1004 
   1005 		par = sc->sc_txdp[nexttx];
   1006 		lcr = TXDL_NUMTXD(ntxd) | TXDL_LGC_FIRST | TXDL_LGC_LAST;
   1007 		if (m->m_pkthdr.csum_flags & M_CSUM_TSOv4)
   1008 			lcr |= TXDL_SFF;
   1009 		TXP_WCSR(TXDL_PAR, par);
   1010 		TXP_WCSR(TXDL_LCR, lcr);
   1011 
   1012 		bpf_mtap(ifp, m, BPF_D_OUT);
   1013 
   1014 		sc->sc_nexttx = NEXTTX(nexttx);
   1015 	}
   1016 }
   1017 
   1018 /*
   1019  * Allocate DMA memory for transmit descriptor fragments.
   1020  * Only one map is used for all descriptors.
   1021  */
   1022 int
   1023 xge_alloc_txmem(struct xge_softc *sc)
   1024 {
   1025 	struct txd *txp;
   1026 	bus_dma_segment_t seg;
   1027 	bus_addr_t txdp;
   1028 	void *kva;
   1029 	int i, rseg, state;
   1030 
   1031 #define TXMAPSZ (NTXDESCS*NTXFRAGS*sizeof(struct txd))
   1032 	state = 0;
   1033 	if (bus_dmamem_alloc(sc->sc_dmat, TXMAPSZ, PAGE_SIZE, 0,
   1034 	    &seg, 1, &rseg, BUS_DMA_NOWAIT))
   1035 		goto err;
   1036 	state++;
   1037 	if (bus_dmamem_map(sc->sc_dmat, &seg, rseg, TXMAPSZ, &kva,
   1038 	    BUS_DMA_NOWAIT))
   1039 		goto err;
   1040 
   1041 	state++;
   1042 	if (bus_dmamap_create(sc->sc_dmat, TXMAPSZ, 1, TXMAPSZ, 0,
   1043 	    BUS_DMA_NOWAIT, &sc->sc_txmap))
   1044 		goto err;
   1045 	state++;
   1046 	if (bus_dmamap_load(sc->sc_dmat, sc->sc_txmap,
   1047 	    kva, TXMAPSZ, NULL, BUS_DMA_NOWAIT))
   1048 		goto err;
   1049 
   1050 	/* setup transmit array pointers */
   1051 	txp = (struct txd *)kva;
   1052 	txdp = seg.ds_addr;
   1053 	for (txp = (struct txd *)kva, i = 0; i < NTXDESCS; i++) {
   1054 		sc->sc_txd[i] = txp;
   1055 		sc->sc_txdp[i] = txdp;
   1056 		txp += NTXFRAGS;
   1057 		txdp += (NTXFRAGS * sizeof(struct txd));
   1058 	}
   1059 
   1060 	return 0;
   1061 
   1062 err:
   1063 	if (state > 2)
   1064 		bus_dmamap_destroy(sc->sc_dmat, sc->sc_txmap);
   1065 	if (state > 1)
   1066 		bus_dmamem_unmap(sc->sc_dmat, kva, TXMAPSZ);
   1067 	if (state > 0)
   1068 		bus_dmamem_free(sc->sc_dmat, &seg, rseg);
   1069 	return ENOBUFS;
   1070 }
   1071 
   1072 /*
   1073  * Allocate DMA memory for receive descriptor,
   1074  * only one map is used for all descriptors.
   1075  * link receive descriptor pages together.
   1076  */
   1077 int
   1078 xge_alloc_rxmem(struct xge_softc *sc)
   1079 {
   1080 	struct rxd_4k *rxpp;
   1081 	bus_dma_segment_t seg;
   1082 	void *kva;
   1083 	int i, rseg, state;
   1084 
   1085 	/* sanity check */
   1086 	if (sizeof(struct rxd_4k) != XGE_PAGE) {
   1087 		printf("bad compiler struct alignment, %d != %d\n",
   1088 		    (int)sizeof(struct rxd_4k), XGE_PAGE);
   1089 		return EINVAL;
   1090 	}
   1091 
   1092 	state = 0;
   1093 	if (bus_dmamem_alloc(sc->sc_dmat, RXMAPSZ, PAGE_SIZE, 0,
   1094 	    &seg, 1, &rseg, BUS_DMA_NOWAIT))
   1095 		goto err;
   1096 	state++;
   1097 	if (bus_dmamem_map(sc->sc_dmat, &seg, rseg, RXMAPSZ, &kva,
   1098 	    BUS_DMA_NOWAIT))
   1099 		goto err;
   1100 
   1101 	state++;
   1102 	if (bus_dmamap_create(sc->sc_dmat, RXMAPSZ, 1, RXMAPSZ, 0,
   1103 	    BUS_DMA_NOWAIT, &sc->sc_rxmap))
   1104 		goto err;
   1105 	state++;
   1106 	if (bus_dmamap_load(sc->sc_dmat, sc->sc_rxmap,
   1107 	    kva, RXMAPSZ, NULL, BUS_DMA_NOWAIT))
   1108 		goto err;
   1109 
   1110 	/* setup receive page link pointers */
   1111 	for (rxpp = (struct rxd_4k *)kva, i = 0; i < NRXPAGES; i++, rxpp++) {
   1112 		sc->sc_rxd_4k[i] = rxpp;
   1113 		rxpp->r4_next = (uint64_t)sc->sc_rxmap->dm_segs[0].ds_addr +
   1114 		    (i*sizeof(struct rxd_4k)) + sizeof(struct rxd_4k);
   1115 	}
   1116 	sc->sc_rxd_4k[NRXPAGES-1]->r4_next =
   1117 	    (uint64_t)sc->sc_rxmap->dm_segs[0].ds_addr;
   1118 
   1119 	return 0;
   1120 
   1121 err:
   1122 	if (state > 2)
   1123 		bus_dmamap_destroy(sc->sc_dmat, sc->sc_txmap);
   1124 	if (state > 1)
   1125 		bus_dmamem_unmap(sc->sc_dmat, kva, TXMAPSZ);
   1126 	if (state > 0)
   1127 		bus_dmamem_free(sc->sc_dmat, &seg, rseg);
   1128 	return ENOBUFS;
   1129 }
   1130 
   1131 
   1132 /*
   1133  * Add a new mbuf chain to descriptor id.
   1134  */
   1135 int
   1136 xge_add_rxbuf(struct xge_softc *sc, int id)
   1137 {
   1138 	struct rxdesc *rxd;
   1139 	struct mbuf *m[5];
   1140 	int page, desc, error;
   1141 #if RX_MODE == RX_MODE_5
   1142 	int i;
   1143 #endif
   1144 
   1145 	page = id/NDESC_BUFMODE;
   1146 	desc = id%NDESC_BUFMODE;
   1147 
   1148 	rxd = &sc->sc_rxd_4k[page]->r4_rxd[desc];
   1149 
   1150 	/*
   1151 	 * Allocate mbufs.
   1152 	 * Currently five mbufs and two clusters are used,
   1153 	 * the hardware will put (ethernet, ip, tcp/udp) headers in
   1154 	 * their own buffer and the clusters are only used for data.
   1155 	 */
   1156 #if RX_MODE == RX_MODE_1
   1157 	MGETHDR(m[0], M_DONTWAIT, MT_DATA);
   1158 	if (m[0] == NULL)
   1159 		return ENOBUFS;
   1160 	MCLGET(m[0], M_DONTWAIT);
   1161 	if ((m[0]->m_flags & M_EXT) == 0) {
   1162 		m_freem(m[0]);
   1163 		return ENOBUFS;
   1164 	}
   1165 	m[0]->m_len = m[0]->m_pkthdr.len = m[0]->m_ext.ext_size;
   1166 #elif RX_MODE == RX_MODE_3
   1167 #error missing rxmode 3.
   1168 #elif RX_MODE == RX_MODE_5
   1169 	MGETHDR(m[0], M_DONTWAIT, MT_DATA);
   1170 	for (i = 1; i < 5; i++) {
   1171 		MGET(m[i], M_DONTWAIT, MT_DATA);
   1172 	}
   1173 	if (m[3])
   1174 		MCLGET(m[3], M_DONTWAIT);
   1175 	if (m[4])
   1176 		MCLGET(m[4], M_DONTWAIT);
   1177 	if (!m[0] || !m[1] || !m[2] || !m[3] || !m[4] ||
   1178 	    ((m[3]->m_flags & M_EXT) == 0) || ((m[4]->m_flags & M_EXT) == 0)) {
   1179 		/* Out of something */
   1180 		for (i = 0; i < 5; i++)
   1181 			if (m[i] != NULL)
   1182 				m_free(m[i]);
   1183 		return ENOBUFS;
   1184 	}
   1185 	/* Link'em together */
   1186 	m[0]->m_next = m[1];
   1187 	m[1]->m_next = m[2];
   1188 	m[2]->m_next = m[3];
   1189 	m[3]->m_next = m[4];
   1190 #else
   1191 #error bad mode RX_MODE
   1192 #endif
   1193 
   1194 	if (sc->sc_rxb[id])
   1195 		bus_dmamap_unload(sc->sc_dmat, sc->sc_rxm[id]);
   1196 	sc->sc_rxb[id] = m[0];
   1197 
   1198 	error = bus_dmamap_load_mbuf(sc->sc_dmat, sc->sc_rxm[id], m[0],
   1199 	    BUS_DMA_READ | BUS_DMA_NOWAIT);
   1200 	if (error)
   1201 		return error;
   1202 	bus_dmamap_sync(sc->sc_dmat, sc->sc_rxm[id], 0,
   1203 	    sc->sc_rxm[id]->dm_mapsize, BUS_DMASYNC_PREREAD);
   1204 
   1205 #if RX_MODE == RX_MODE_1
   1206 	rxd->rxd_control2 = RXD_MKCTL2(m[0]->m_len, 0, 0);
   1207 	rxd->rxd_buf0 = (uint64_t)sc->sc_rxm[id]->dm_segs[0].ds_addr;
   1208 	rxd->rxd_control1 = RXD_CTL1_OWN;
   1209 #elif RX_MODE == RX_MODE_3
   1210 #elif RX_MODE == RX_MODE_5
   1211 	rxd->rxd_control3 = RXD_MKCTL3(0, m[3]->m_len, m[4]->m_len);
   1212 	rxd->rxd_control2 = RXD_MKCTL2(m[0]->m_len, m[1]->m_len, m[2]->m_len);
   1213 	rxd->rxd_buf0 = (uint64_t)sc->sc_rxm[id]->dm_segs[0].ds_addr;
   1214 	rxd->rxd_buf1 = (uint64_t)sc->sc_rxm[id]->dm_segs[1].ds_addr;
   1215 	rxd->rxd_buf2 = (uint64_t)sc->sc_rxm[id]->dm_segs[2].ds_addr;
   1216 	rxd->rxd_buf3 = (uint64_t)sc->sc_rxm[id]->dm_segs[3].ds_addr;
   1217 	rxd->rxd_buf4 = (uint64_t)sc->sc_rxm[id]->dm_segs[4].ds_addr;
   1218 	rxd->rxd_control1 = RXD_CTL1_OWN;
   1219 #endif
   1220 
   1221 	XGE_RXSYNC(id, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
   1222 	return 0;
   1223 }
   1224 
   1225 /*
   1226  * These magics comes from the FreeBSD driver.
   1227  */
   1228 int
   1229 xge_setup_xgxs(struct xge_softc *sc)
   1230 {
   1231 	/* The magic numbers are described in the users guide */
   1232 
   1233 	/* Writing to MDIO 0x8000 (Global Config 0) */
   1234 	PIF_WCSR(DTX_CONTROL, 0x8000051500000000ULL); DELAY(50);
   1235 	PIF_WCSR(DTX_CONTROL, 0x80000515000000E0ULL); DELAY(50);
   1236 	PIF_WCSR(DTX_CONTROL, 0x80000515D93500E4ULL); DELAY(50);
   1237 
   1238 	/* Writing to MDIO 0x8000 (Global Config 1) */
   1239 	PIF_WCSR(DTX_CONTROL, 0x8001051500000000ULL); DELAY(50);
   1240 	PIF_WCSR(DTX_CONTROL, 0x80010515000000e0ULL); DELAY(50);
   1241 	PIF_WCSR(DTX_CONTROL, 0x80010515001e00e4ULL); DELAY(50);
   1242 
   1243 	/* Reset the Gigablaze */
   1244 	PIF_WCSR(DTX_CONTROL, 0x8002051500000000ULL); DELAY(50);
   1245 	PIF_WCSR(DTX_CONTROL, 0x80020515000000E0ULL); DELAY(50);
   1246 	PIF_WCSR(DTX_CONTROL, 0x80020515F21000E4ULL); DELAY(50);
   1247 
   1248 	/* read the pole settings */
   1249 	PIF_WCSR(DTX_CONTROL, 0x8000051500000000ULL); DELAY(50);
   1250 	PIF_WCSR(DTX_CONTROL, 0x80000515000000e0ULL); DELAY(50);
   1251 	PIF_WCSR(DTX_CONTROL, 0x80000515000000ecULL); DELAY(50);
   1252 
   1253 	PIF_WCSR(DTX_CONTROL, 0x8001051500000000ULL); DELAY(50);
   1254 	PIF_WCSR(DTX_CONTROL, 0x80010515000000e0ULL); DELAY(50);
   1255 	PIF_WCSR(DTX_CONTROL, 0x80010515000000ecULL); DELAY(50);
   1256 
   1257 	PIF_WCSR(DTX_CONTROL, 0x8002051500000000ULL); DELAY(50);
   1258 	PIF_WCSR(DTX_CONTROL, 0x80020515000000e0ULL); DELAY(50);
   1259 	PIF_WCSR(DTX_CONTROL, 0x80020515000000ecULL); DELAY(50);
   1260 
   1261 	/* Workaround for TX Lane XAUI initialization error.
   1262 	   Read Xpak PHY register 24 for XAUI lane status */
   1263 	PIF_WCSR(DTX_CONTROL, 0x0018040000000000ULL); DELAY(50);
   1264 	PIF_WCSR(DTX_CONTROL, 0x00180400000000e0ULL); DELAY(50);
   1265 	PIF_WCSR(DTX_CONTROL, 0x00180400000000ecULL); DELAY(50);
   1266 
   1267 	/*
   1268 	 * Reading the MDIO control with value 0x1804001c0F001c
   1269 	 * means the TxLanes were already in sync
   1270 	 * Reading the MDIO control with value 0x1804000c0x001c
   1271 	 * means some TxLanes are not in sync where x is a 4-bit
   1272 	 * value representing each lanes
   1273 	 */
   1274 #if 0
   1275 	val = PIF_RCSR(MDIO_CONTROL);
   1276 	if (val != 0x1804001c0F001cULL) {
   1277 		printf("%s: MDIO_CONTROL: %llx != %llx\n",
   1278 		    XNAME, val, 0x1804001c0F001cULL);
   1279 		return 1;
   1280 	}
   1281 #endif
   1282 
   1283 	/* Set and remove the DTE XS INTLoopBackN */
   1284 	PIF_WCSR(DTX_CONTROL, 0x0000051500000000ULL); DELAY(50);
   1285 	PIF_WCSR(DTX_CONTROL, 0x00000515604000e0ULL); DELAY(50);
   1286 	PIF_WCSR(DTX_CONTROL, 0x00000515604000e4ULL); DELAY(50);
   1287 	PIF_WCSR(DTX_CONTROL, 0x00000515204000e4ULL); DELAY(50);
   1288 	PIF_WCSR(DTX_CONTROL, 0x00000515204000ecULL); DELAY(50);
   1289 
   1290 #if 0
   1291 	/* Reading the DTX control register Should be 0x5152040001c */
   1292 	val = PIF_RCSR(DTX_CONTROL);
   1293 	if (val != 0x5152040001cULL) {
   1294 		printf("%s: DTX_CONTROL: %llx != %llx\n",
   1295 		    XNAME, val, 0x5152040001cULL);
   1296 		return 1;
   1297 	}
   1298 #endif
   1299 
   1300 	PIF_WCSR(MDIO_CONTROL, 0x0018040000000000ULL); DELAY(50);
   1301 	PIF_WCSR(MDIO_CONTROL, 0x00180400000000e0ULL); DELAY(50);
   1302 	PIF_WCSR(MDIO_CONTROL, 0x00180400000000ecULL); DELAY(50);
   1303 
   1304 #if 0
   1305 	/* Reading the MIOD control should be 0x1804001c0f001c */
   1306 	val = PIF_RCSR(MDIO_CONTROL);
   1307 	if (val != 0x1804001c0f001cULL) {
   1308 		printf("%s: MDIO_CONTROL2: %llx != %llx\n",
   1309 		    XNAME, val, 0x1804001c0f001cULL);
   1310 		return 1;
   1311 	}
   1312 #endif
   1313 	return 0;
   1314 }
   1315