Home | History | Annotate | Line # | Download | only in cxgb
cxgb_sge.c revision 1.1.6.2
      1 /**************************************************************************
      2 
      3 Copyright (c) 2007, Chelsio Inc.
      4 All rights reserved.
      5 
      6 Redistribution and use in source and binary forms, with or without
      7 modification, are permitted provided that the following conditions are met:
      8 
      9  1. Redistributions of source code must retain the above copyright notice,
     10     this list of conditions and the following disclaimer.
     11 
     12  2. Neither the name of the Chelsio Corporation nor the names of its
     13     contributors may be used to endorse or promote products derived from
     14     this software without specific prior written permission.
     15 
     16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     26 POSSIBILITY OF SUCH DAMAGE.
     27 
     28 ***************************************************************************/
     29 
     30 #include <sys/cdefs.h>
     31 __KERNEL_RCSID(0, "$NetBSD: cxgb_sge.c,v 1.1.6.2 2010/08/11 22:54:06 yamt Exp $");
     32 
     33 #include <sys/param.h>
     34 #include <sys/systm.h>
     35 #include <sys/kernel.h>
     36 #include <sys/conf.h>
     37 #include <machine/bus.h>
     38 #include <sys/queue.h>
     39 #include <sys/sysctl.h>
     40 
     41 #include <sys/proc.h>
     42 #include <sys/sched.h>
     43 #include <sys/systm.h>
     44 
     45 #include <netinet/in_systm.h>
     46 #include <netinet/in.h>
     47 #include <netinet/ip.h>
     48 #include <netinet/tcp.h>
     49 
     50 #include <dev/pci/pcireg.h>
     51 #include <dev/pci/pcivar.h>
     52 
     53 #ifdef CONFIG_DEFINED
     54 #include <cxgb_include.h>
     55 #else
     56 #include <dev/pci/cxgb/cxgb_include.h>
     57 #endif
     58 
     59 uint32_t collapse_free = 0;
     60 uint32_t mb_free_vec_free = 0;
     61 int      txq_fills = 0;
     62 int      collapse_mbufs = 0;
     63 static int bogus_imm = 0;
     64 #ifndef DISABLE_MBUF_IOVEC
     65 static int recycle_enable = 1;
     66 #endif
     67 
     68 #define USE_GTS 0
     69 
     70 #define SGE_RX_SM_BUF_SIZE  1536
     71 #define SGE_RX_DROP_THRES   16
     72 #define SGE_RX_COPY_THRES   128
     73 
     74 /*
     75  * Period of the Tx buffer reclaim timer.  This timer does not need to run
     76  * frequently as Tx buffers are usually reclaimed by new Tx packets.
     77  */
     78 #define TX_RECLAIM_PERIOD       (hz >> 1)
     79 
     80 /*
     81  * work request size in bytes
     82  */
     83 #define WR_LEN (WR_FLITS * 8)
     84 
     85 /*
     86  * Values for sge_txq.flags
     87  */
     88 enum {
     89     TXQ_RUNNING = 1 << 0,  /* fetch engine is running */
     90     TXQ_LAST_PKT_DB = 1 << 1,  /* last packet rang the doorbell */
     91 };
     92 
     93 struct tx_desc {
     94     uint64_t    flit[TX_DESC_FLITS];
     95 } __packed;
     96 
     97 struct rx_desc {
     98     uint32_t    addr_lo;
     99     uint32_t    len_gen;
    100     uint32_t    gen2;
    101     uint32_t    addr_hi;
    102 } __packed;
    103 
    104 struct rsp_desc {               /* response queue descriptor */
    105     struct rss_header   rss_hdr;
    106     uint32_t        flags;
    107     uint32_t        len_cq;
    108     uint8_t         imm_data[47];
    109     uint8_t         intr_gen;
    110 } __packed;
    111 
    112 #define RX_SW_DESC_MAP_CREATED  (1 << 0)
    113 #define TX_SW_DESC_MAP_CREATED  (1 << 1)
    114 #define RX_SW_DESC_INUSE        (1 << 3)
    115 #define TX_SW_DESC_MAPPED       (1 << 4)
    116 
    117 #define RSPQ_NSOP_NEOP           G_RSPD_SOP_EOP(0)
    118 #define RSPQ_EOP                 G_RSPD_SOP_EOP(F_RSPD_EOP)
    119 #define RSPQ_SOP                 G_RSPD_SOP_EOP(F_RSPD_SOP)
    120 #define RSPQ_SOP_EOP             G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
    121 
    122 struct tx_sw_desc {                /* SW state per Tx descriptor */
    123     struct mbuf *m;
    124     bus_dma_segment_t segs[1];
    125     bus_dmamap_t    map;
    126     int     flags;
    127 };
    128 
    129 struct rx_sw_desc {                /* SW state per Rx descriptor */
    130     void            *cl;
    131     bus_dmamap_t    map;
    132     int     flags;
    133 };
    134 
    135 struct txq_state {
    136     unsigned int compl;
    137     unsigned int gen;
    138     unsigned int pidx;
    139 };
    140 
    141 /*
    142  * Maps a number of flits to the number of Tx descriptors that can hold them.
    143  * The formula is
    144  *
    145  * desc = 1 + (flits - 2) / (WR_FLITS - 1).
    146  *
    147  * HW allows up to 4 descriptors to be combined into a WR.
    148  */
    149 static uint8_t flit_desc_map[] = {
    150     0,
    151 #if SGE_NUM_GENBITS == 1
    152     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    153     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    154     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
    155     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
    156 #elif SGE_NUM_GENBITS == 2
    157     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    158     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    159     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
    160     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
    161 #else
    162 # error "SGE_NUM_GENBITS must be 1 or 2"
    163 #endif
    164 };
    165 
    166 
    167 static int lro_default = 0;
    168 int cxgb_debug = 0;
    169 
    170 static void t3_free_qset(adapter_t *sc, struct sge_qset *q);
    171 static void sge_timer_cb(void *arg);
    172 static void sge_timer_reclaim(struct work *wk, void *arg);
    173 static void sge_txq_reclaim_handler(struct work *wk, void *arg);
    174 static int free_tx_desc(struct sge_txq *q, int n, struct mbuf **m_vec);
    175 
    176 /**
    177  *  reclaim_completed_tx - reclaims completed Tx descriptors
    178  *  @adapter: the adapter
    179  *  @q: the Tx queue to reclaim completed descriptors from
    180  *
    181  *  Reclaims Tx descriptors that the SGE has indicated it has processed,
    182  *  and frees the associated buffers if possible.  Called with the Tx
    183  *  queue's lock held.
    184  */
    185 static __inline int
    186 reclaim_completed_tx(struct sge_txq *q, int nbufs, struct mbuf **mvec)
    187 {
    188     int reclaimed, reclaim = desc_reclaimable(q);
    189     int n = 0;
    190 
    191     mtx_assert(&q->lock, MA_OWNED);
    192     if (reclaim > 0) {
    193         n = free_tx_desc(q, min(reclaim, nbufs), mvec);
    194         reclaimed = min(reclaim, nbufs);
    195         q->cleaned += reclaimed;
    196         q->in_use -= reclaimed;
    197     }
    198     return (n);
    199 }
    200 
    201 /**
    202  *  should_restart_tx - are there enough resources to restart a Tx queue?
    203  *  @q: the Tx queue
    204  *
    205  *  Checks if there are enough descriptors to restart a suspended Tx queue.
    206  */
    207 static __inline int
    208 should_restart_tx(const struct sge_txq *q)
    209 {
    210     unsigned int r = q->processed - q->cleaned;
    211 
    212     return q->in_use - r < (q->size >> 1);
    213 }
    214 
    215 /**
    216  *  t3_sge_init - initialize SGE
    217  *  @adap: the adapter
    218  *  @p: the SGE parameters
    219  *
    220  *  Performs SGE initialization needed every time after a chip reset.
    221  *  We do not initialize any of the queue sets here, instead the driver
    222  *  top-level must request those individually.  We also do not enable DMA
    223  *  here, that should be done after the queues have been set up.
    224  */
    225 void
    226 t3_sge_init(adapter_t *adap, struct sge_params *p)
    227 {
    228     u_int ctrl, ups;
    229 
    230     ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
    231 
    232     ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
    233            F_CQCRDTCTRL |
    234            V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
    235            V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
    236 #if SGE_NUM_GENBITS == 1
    237     ctrl |= F_EGRGENCTRL;
    238 #endif
    239     if (adap->params.rev > 0) {
    240         if (!(adap->flags & (USING_MSIX | USING_MSI)))
    241             ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
    242         ctrl |= F_CQCRDTCTRL | F_AVOIDCQOVFL;
    243     }
    244     t3_write_reg(adap, A_SG_CONTROL, ctrl);
    245     t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
    246              V_LORCQDRBTHRSH(512));
    247     t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
    248     t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
    249              V_TIMEOUT(200 * core_ticks_per_usec(adap)));
    250     t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 1000);
    251     t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
    252     t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
    253     t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
    254     t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
    255     t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
    256 }
    257 
    258 
    259 /**
    260  *  sgl_len - calculates the size of an SGL of the given capacity
    261  *  @n: the number of SGL entries
    262  *
    263  *  Calculates the number of flits needed for a scatter/gather list that
    264  *  can hold the given number of entries.
    265  */
    266 static __inline unsigned int
    267 sgl_len(unsigned int n)
    268 {
    269     return ((3 * n) / 2 + (n & 1));
    270 }
    271 
    272 /**
    273  *  get_imm_packet - return the next ingress packet buffer from a response
    274  *  @resp: the response descriptor containing the packet data
    275  *
    276  *  Return a packet containing the immediate data of the given response.
    277  */
    278 #ifdef DISABLE_MBUF_IOVEC
    279 static __inline int
    280 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct t3_mbuf_hdr *mh)
    281 {
    282     struct mbuf *m;
    283     int len;
    284     uint32_t flags = ntohl(resp->flags);
    285     uint8_t sopeop = G_RSPD_SOP_EOP(flags);
    286 
    287     /*
    288      * would be a firmware bug
    289      */
    290     if (sopeop == RSPQ_NSOP_NEOP || sopeop == RSPQ_SOP)
    291         return (0);
    292 
    293     m = m_gethdr(M_NOWAIT, MT_DATA);
    294     len = G_RSPD_LEN(ntohl(resp->len_cq));
    295 
    296     if (m) {
    297         MH_ALIGN(m, IMMED_PKT_SIZE);
    298         memcpy(m->m_data, resp->imm_data, IMMED_PKT_SIZE);
    299         m->m_len = len;
    300 
    301         switch (sopeop) {
    302         case RSPQ_SOP_EOP:
    303             mh->mh_head = mh->mh_tail = m;
    304             m->m_pkthdr.len = len;
    305             m->m_flags |= M_PKTHDR;
    306             break;
    307         case RSPQ_EOP:
    308             m->m_flags &= ~M_PKTHDR;
    309             mh->mh_head->m_pkthdr.len += len;
    310             mh->mh_tail->m_next = m;
    311             mh->mh_tail = m;
    312             break;
    313         }
    314     }
    315     return (m != NULL);
    316 }
    317 
    318 #else
    319 static int
    320 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m, void *cl, uint32_t flags)
    321 {
    322     int len, error;
    323     uint8_t sopeop = G_RSPD_SOP_EOP(flags);
    324 
    325     /*
    326      * would be a firmware bug
    327      */
    328     len = G_RSPD_LEN(ntohl(resp->len_cq));
    329     if (sopeop == RSPQ_NSOP_NEOP || sopeop == RSPQ_SOP) {
    330         if (cxgb_debug)
    331             device_printf(sc->dev, "unexpected value sopeop=%d flags=0x%x len=%din get_imm_packet\n", sopeop, flags, len);
    332         bogus_imm++;
    333         return (EINVAL);
    334     }
    335     error = 0;
    336     switch (sopeop) {
    337     case RSPQ_SOP_EOP:
    338         m->m_len = m->m_pkthdr.len = len;
    339         memcpy(mtod(m, uint8_t *), resp->imm_data, len);
    340         break;
    341     case RSPQ_EOP:
    342         memcpy(cl, resp->imm_data, len);
    343         m_iovappend(m, cl, MSIZE, len, 0);
    344         break;
    345     default:
    346         bogus_imm++;
    347         error = EINVAL;
    348     }
    349 
    350     return (error);
    351 }
    352 #endif
    353 
    354 static __inline u_int
    355 flits_to_desc(u_int n)
    356 {
    357     return (flit_desc_map[n]);
    358 }
    359 
    360 void
    361 t3_sge_err_intr_handler(adapter_t *adapter)
    362 {
    363     unsigned int v, status;
    364 
    365 
    366     status = t3_read_reg(adapter, A_SG_INT_CAUSE);
    367 
    368     if (status & F_RSPQCREDITOVERFOW)
    369         CH_ALERT(adapter, "SGE response queue credit overflow\n");
    370 
    371     if (status & F_RSPQDISABLED) {
    372         v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
    373 
    374         CH_ALERT(adapter,
    375              "packet delivered to disabled response queue (0x%x)\n",
    376              (v >> S_RSPQ0DISABLED) & 0xff);
    377     }
    378 
    379     t3_write_reg(adapter, A_SG_INT_CAUSE, status);
    380     if (status & (F_RSPQCREDITOVERFOW | F_RSPQDISABLED))
    381         t3_fatal_err(adapter);
    382 }
    383 
    384 void
    385 t3_sge_prep(adapter_t *adap, struct sge_params *p)
    386 {
    387     int i;
    388 
    389     /* XXX Does ETHER_ALIGN need to be accounted for here? */
    390     p->max_pkt_size = MJUM16BYTES - sizeof(struct cpl_rx_data);
    391 
    392     for (i = 0; i < SGE_QSETS; ++i) {
    393         struct qset_params *q = p->qset + i;
    394 
    395         q->polling = adap->params.rev > 0;
    396 
    397         if (adap->params.nports > 2)
    398             q->coalesce_nsecs = 50000;
    399         else
    400             q->coalesce_nsecs = 5000;
    401 
    402         q->rspq_size = RSPQ_Q_SIZE;
    403         q->fl_size = FL_Q_SIZE;
    404         q->jumbo_size = JUMBO_Q_SIZE;
    405         q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
    406         q->txq_size[TXQ_OFLD] = 1024;
    407         q->txq_size[TXQ_CTRL] = 256;
    408         q->cong_thres = 0;
    409     }
    410 }
    411 
    412 int
    413 t3_sge_alloc(adapter_t *sc)
    414 {
    415     /* The parent tag. */
    416     sc->parent_dmat = sc->pa.pa_dmat;
    417 
    418     /*
    419      * DMA tag for normal sized RX frames
    420      */
    421     sc->rx_dmat = sc->pa.pa_dmat;
    422 
    423     /*
    424      * DMA tag for jumbo sized RX frames.
    425      */
    426     sc->rx_jumbo_dmat = sc->pa.pa_dmat;
    427 
    428     /*
    429      * DMA tag for TX frames.
    430      */
    431     sc->tx_dmat = sc->pa.pa_dmat;
    432 
    433     return (0);
    434 }
    435 
    436 int
    437 t3_sge_free(struct adapter * sc)
    438 {
    439     return (0);
    440 }
    441 
    442 void
    443 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
    444 {
    445 
    446     qs->rspq.holdoff_tmr = max(p->coalesce_nsecs/100, 1U);
    447     qs->rspq.polling = 0 /* p->polling */;
    448 }
    449 
    450 /**
    451  *  refill_fl - refill an SGE free-buffer list
    452  *  @sc: the controller softc
    453  *  @q: the free-list to refill
    454  *  @n: the number of new buffers to allocate
    455  *
    456  *  (Re)populate an SGE free-buffer list with up to @n new packet buffers.
    457  *  The caller must assure that @n does not exceed the queue's capacity.
    458  */
    459 static void
    460 refill_fl(adapter_t *sc, struct sge_fl *q, int n)
    461 {
    462     struct rx_sw_desc *sd = &q->sdesc[q->pidx];
    463     struct rx_desc *d = &q->desc[q->pidx];
    464     void *cl;
    465     int err;
    466 
    467     while (n--) {
    468         /*
    469          * We only allocate a cluster, mbuf allocation happens after rx
    470          */
    471         if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0)
    472         {
    473             err = bus_dmamap_create(sc->pa.pa_dmat,
    474                         q->buf_size, 1, q->buf_size, 0,
    475                         BUS_DMA_ALLOCNOW, &sd->map);
    476             if (err != 0)
    477             {
    478                 log(LOG_WARNING, "failure in refill_fl\n");
    479                 return;
    480             }
    481             sd->flags |= RX_SW_DESC_MAP_CREATED;
    482         }
    483         cl = malloc(q->buf_size, M_DEVBUF, M_NOWAIT);
    484         if (cl == NULL)
    485         {
    486             log(LOG_WARNING, "Failed to allocate cluster\n");
    487             break;
    488         }
    489         err = bus_dmamap_load(sc->pa.pa_dmat, sd->map, cl, q->buf_size, NULL, BUS_DMA_NOWAIT);
    490         if (err)
    491         {
    492             log(LOG_WARNING, "failure in refill_fl\n");
    493             free(cl, M_DEVBUF);
    494             return;
    495         }
    496 
    497         sd->flags |= RX_SW_DESC_INUSE;
    498         sd->cl = cl;
    499         d->addr_lo = htobe32(sd->map->dm_segs[0].ds_addr & 0xffffffff);
    500         d->addr_hi = htobe32(((uint64_t)sd->map->dm_segs[0].ds_addr>>32) & 0xffffffff);
    501         d->len_gen = htobe32(V_FLD_GEN1(q->gen));
    502         d->gen2 = htobe32(V_FLD_GEN2(q->gen));
    503 
    504         d++;
    505         sd++;
    506 
    507         if (++q->pidx == q->size) {
    508             q->pidx = 0;
    509             q->gen ^= 1;
    510             sd = q->sdesc;
    511             d = q->desc;
    512         }
    513         q->credits++;
    514     }
    515 
    516     t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
    517 }
    518 
    519 
    520 /**
    521  *  free_rx_bufs - free the Rx buffers on an SGE free list
    522  *  @sc: the controle softc
    523  *  @q: the SGE free list to clean up
    524  *
    525  *  Release the buffers on an SGE free-buffer Rx queue.  HW fetching from
    526  *  this queue should be stopped before calling this function.
    527  */
    528 static void
    529 free_rx_bufs(adapter_t *sc, struct sge_fl *q)
    530 {
    531     u_int cidx = q->cidx;
    532 
    533     while (q->credits--) {
    534         struct rx_sw_desc *d = &q->sdesc[cidx];
    535 
    536         if (d->flags & RX_SW_DESC_INUSE) {
    537 	    bus_dmamap_unload(q->entry_tag, d->map);
    538 	    bus_dmamap_destroy(q->entry_tag, d->map);
    539 	    d->map = NULL;
    540             free(d->cl, M_DEVBUF);
    541 	    d->cl = NULL;
    542         }
    543         d->cl = NULL;
    544         if (++cidx == q->size)
    545             cidx = 0;
    546     }
    547 }
    548 
    549 static __inline void
    550 __refill_fl(adapter_t *adap, struct sge_fl *fl)
    551 {
    552     refill_fl(adap, fl, min(16U, fl->size - fl->credits));
    553 }
    554 
    555 #ifndef DISABLE_MBUF_IOVEC
    556 /**
    557  *  recycle_rx_buf - recycle a receive buffer
    558  *  @adapter: the adapter
    559  *  @q: the SGE free list
    560  *  @idx: index of buffer to recycle
    561  *
    562  *  Recycles the specified buffer on the given free list by adding it at
    563  *  the next available slot on the list.
    564  */
    565 static void
    566 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
    567 {
    568     struct rx_desc *from = &q->desc[idx];
    569     struct rx_desc *to   = &q->desc[q->pidx];
    570 
    571     q->sdesc[q->pidx] = q->sdesc[idx];
    572     to->addr_lo = from->addr_lo;        // already big endian
    573     to->addr_hi = from->addr_hi;        // likewise
    574     wmb();
    575     to->len_gen = htobe32(V_FLD_GEN1(q->gen));
    576     to->gen2 = htobe32(V_FLD_GEN2(q->gen));
    577     q->credits++;
    578 
    579     if (++q->pidx == q->size) {
    580         q->pidx = 0;
    581         q->gen ^= 1;
    582     }
    583     t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
    584 }
    585 #endif
    586 
    587 static int
    588 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
    589     bus_addr_t *phys,
    590     void *desc, void *sdesc, bus_dma_tag_t *tag,
    591     bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
    592 {
    593     size_t len = nelem * elem_size;
    594     void *s = NULL;
    595     void *p = NULL;
    596     int err;
    597     bus_dma_segment_t phys_seg;
    598 
    599     int nsegs;
    600 
    601     *tag = sc->pa.pa_dmat;
    602 
    603     /* allocate wired physical memory for DMA descriptor array */
    604     err = bus_dmamem_alloc(*tag, len, PAGE_SIZE, 0, &phys_seg, 1,
    605                 &nsegs, BUS_DMA_NOWAIT);
    606     if (err != 0)
    607     {
    608         device_printf(sc->dev, "Cannot allocate descriptor memory\n");
    609         return (ENOMEM);
    610     }
    611     *phys = phys_seg.ds_addr;
    612 
    613     /* map physical address to kernel virtual address */
    614     err = bus_dmamem_map(*tag, &phys_seg, 1, len, &p,
    615                 BUS_DMA_NOWAIT|BUS_DMA_COHERENT);
    616     if (err != 0)
    617     {
    618         device_printf(sc->dev, "Cannot map descriptor memory\n");
    619         return (ENOMEM);
    620     }
    621 
    622     memset(p, 0, len);
    623     *(void **)desc = p;
    624 
    625     if (sw_size)
    626     {
    627         len = nelem * sw_size;
    628         s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);
    629         *(void **)sdesc = s;
    630     }
    631 
    632     if (parent_entry_tag == NULL)
    633         return (0);
    634     *entry_tag = sc->pa.pa_dmat;
    635 
    636     return (0);
    637 }
    638 
    639 static void
    640 sge_slow_intr_handler(struct work *wk, void *arg)
    641 {
    642     adapter_t *sc = arg;
    643 
    644     t3_slow_intr_handler(sc);
    645 }
    646 
    647 /**
    648  *  sge_timer_cb - perform periodic maintenance of an SGE qset
    649  *  @data: the SGE queue set to maintain
    650  *
    651  *  Runs periodically from a timer to perform maintenance of an SGE queue
    652  *  set.  It performs two tasks:
    653  *
    654  *  a) Cleans up any completed Tx descriptors that may still be pending.
    655  *  Normal descriptor cleanup happens when new packets are added to a Tx
    656  *  queue so this timer is relatively infrequent and does any cleanup only
    657  *  if the Tx queue has not seen any new packets in a while.  We make a
    658  *  best effort attempt to reclaim descriptors, in that we don't wait
    659  *  around if we cannot get a queue's lock (which most likely is because
    660  *  someone else is queueing new packets and so will also handle the clean
    661  *  up).  Since control queues use immediate data exclusively we don't
    662  *  bother cleaning them up here.
    663  *
    664  *  b) Replenishes Rx queues that have run out due to memory shortage.
    665  *  Normally new Rx buffers are added when existing ones are consumed but
    666  *  when out of memory a queue can become empty.  We try to add only a few
    667  *  buffers here, the queue will be replenished fully as these new buffers
    668  *  are used up if memory shortage has subsided.
    669  *
    670  *  c) Return coalesced response queue credits in case a response queue is
    671  *  starved.
    672  *
    673  *  d) Ring doorbells for T304 tunnel queues since we have seen doorbell
    674  *  fifo overflows and the FW doesn't implement any recovery scheme yet.
    675  */
    676 
    677 static void
    678 sge_timer_cb(void *arg)
    679 {
    680     adapter_t *sc = arg;
    681     struct port_info *p;
    682     struct sge_qset *qs;
    683     struct sge_txq  *txq;
    684     int i, j;
    685     int reclaim_eth, reclaim_ofl, refill_rx;
    686 
    687     for (i = 0; i < sc->params.nports; i++)
    688         for (j = 0; j < sc->port[i].nqsets; j++) {
    689             qs = &sc->sge.qs[i + j];
    690             txq = &qs->txq[0];
    691             reclaim_eth = txq[TXQ_ETH].processed - txq[TXQ_ETH].cleaned;
    692             reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
    693             refill_rx = ((qs->fl[0].credits < qs->fl[0].size) ||
    694                 (qs->fl[1].credits < qs->fl[1].size));
    695             if (reclaim_eth || reclaim_ofl || refill_rx) {
    696                 p = &sc->port[i];
    697                 workqueue_enqueue(p->timer_reclaim_task.wq, &p->timer_reclaim_task.w, NULL);
    698                 break;
    699             }
    700         }
    701     if (sc->params.nports > 2) {
    702         int k;
    703 
    704         for_each_port(sc, k) {
    705             struct port_info *pi = &sc->port[k];
    706 
    707             t3_write_reg(sc, A_SG_KDOORBELL,
    708                      F_SELEGRCNTX |
    709                      (FW_TUNNEL_SGEEC_START + pi->first_qset));
    710         }
    711     }
    712     if (sc->open_device_map != 0)
    713         callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
    714 }
    715 
    716 /*
    717  * This is meant to be a catch-all function to keep sge state private
    718  * to sge.c
    719  *
    720  */
    721 int
    722 t3_sge_init_adapter(adapter_t *sc)
    723 {
    724     callout_init(&sc->sge_timer_ch, 0);
    725     callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
    726     sc->slow_intr_task.name = "sge_slow_intr";
    727     sc->slow_intr_task.func = sge_slow_intr_handler;
    728     sc->slow_intr_task.context = sc;
    729     kthread_create(PRI_NONE, 0, NULL, cxgb_make_task, &sc->slow_intr_task, NULL, "cxgb_make_task");
    730     return (0);
    731 }
    732 
    733 int
    734 t3_sge_init_port(struct port_info *p)
    735 {
    736     p->timer_reclaim_task.name = "sge_timer_reclaim";
    737     p->timer_reclaim_task.func = sge_timer_reclaim;
    738     p->timer_reclaim_task.context = p;
    739     kthread_create(PRI_NONE, 0, NULL, cxgb_make_task, &p->timer_reclaim_task, NULL, "cxgb_make_task");
    740 
    741     return (0);
    742 }
    743 
    744 void
    745 t3_sge_deinit_sw(adapter_t *sc)
    746 {
    747     callout_drain(&sc->sge_timer_ch);
    748 }
    749 
    750 /**
    751  *  refill_rspq - replenish an SGE response queue
    752  *  @adapter: the adapter
    753  *  @q: the response queue to replenish
    754  *  @credits: how many new responses to make available
    755  *
    756  *  Replenishes a response queue by making the supplied number of responses
    757  *  available to HW.
    758  */
    759 static __inline void
    760 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
    761 {
    762 
    763     /* mbufs are allocated on demand when a rspq entry is processed. */
    764     t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
    765              V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
    766 }
    767 
    768 static __inline void
    769 sge_txq_reclaim_(struct sge_txq *txq)
    770 {
    771     int reclaimable, i, n;
    772     struct mbuf *m_vec[TX_CLEAN_MAX_DESC];
    773     struct port_info *p;
    774 
    775     p = txq->port;
    776 reclaim_more:
    777     n = 0;
    778     reclaimable = desc_reclaimable(txq);
    779     if (reclaimable > 0 && mtx_trylock(&txq->lock)) {
    780         n = reclaim_completed_tx(txq, TX_CLEAN_MAX_DESC, m_vec);
    781         mtx_unlock(&txq->lock);
    782     }
    783     if (n == 0)
    784         return;
    785 
    786     for (i = 0; i < n; i++) {
    787         m_freem_vec(m_vec[i]);
    788     }
    789     if (p && p->ifp->if_drv_flags & IFF_DRV_OACTIVE &&
    790         txq->size - txq->in_use >= TX_START_MAX_DESC) {
    791         txq_fills++;
    792         p->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
    793         workqueue_enqueue(p->start_task.wq, &p->start_task.w, NULL);
    794     }
    795 
    796     if (n)
    797         goto reclaim_more;
    798 }
    799 
    800 static void
    801 sge_txq_reclaim_handler(struct work *wk, void *arg)
    802 {
    803     struct sge_txq *q = arg;
    804 
    805     sge_txq_reclaim_(q);
    806 }
    807 
    808 static void
    809 sge_timer_reclaim(struct work *wk, void *arg)
    810 {
    811     struct port_info *p = arg;
    812     int i, nqsets = p->nqsets;
    813     adapter_t *sc = p->adapter;
    814     struct sge_qset *qs;
    815     struct sge_txq *txq;
    816     struct mtx *lock;
    817 
    818     for (i = 0; i < nqsets; i++) {
    819         qs = &sc->sge.qs[i];
    820         txq = &qs->txq[TXQ_ETH];
    821         sge_txq_reclaim_(txq);
    822 
    823         txq = &qs->txq[TXQ_OFLD];
    824         sge_txq_reclaim_(txq);
    825 
    826         lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
    827                 &sc->sge.qs[0].rspq.lock;
    828 
    829         if (mtx_trylock(lock)) {
    830             /* XXX currently assume that we are *NOT* polling */
    831             uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
    832 
    833             if (qs->fl[0].credits < qs->fl[0].size - 16)
    834                 __refill_fl(sc, &qs->fl[0]);
    835             if (qs->fl[1].credits < qs->fl[1].size - 16)
    836                 __refill_fl(sc, &qs->fl[1]);
    837 
    838             if (status & (1 << qs->rspq.cntxt_id)) {
    839                 if (qs->rspq.credits) {
    840                     refill_rspq(sc, &qs->rspq, 1);
    841                     qs->rspq.credits--;
    842                     t3_write_reg(sc, A_SG_RSPQ_FL_STATUS,
    843                         1 << qs->rspq.cntxt_id);
    844                 }
    845             }
    846             mtx_unlock(lock);
    847         }
    848     }
    849 }
    850 
    851 /**
    852  *  init_qset_cntxt - initialize an SGE queue set context info
    853  *  @qs: the queue set
    854  *  @id: the queue set id
    855  *
    856  *  Initializes the TIDs and context ids for the queues of a queue set.
    857  */
    858 static void
    859 init_qset_cntxt(struct sge_qset *qs, u_int id)
    860 {
    861 
    862     qs->rspq.cntxt_id = id;
    863     qs->fl[0].cntxt_id = 2 * id;
    864     qs->fl[1].cntxt_id = 2 * id + 1;
    865     qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
    866     qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
    867     qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
    868     qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
    869     qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
    870 }
    871 
    872 
    873 static void
    874 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
    875 {
    876     txq->in_use += ndesc;
    877     /*
    878      * XXX we don't handle stopping of queue
    879      * presumably start handles this when we bump against the end
    880      */
    881     txqs->gen = txq->gen;
    882     txq->unacked += ndesc;
    883     txqs->compl = (txq->unacked & 8) << (S_WR_COMPL - 3);
    884     txq->unacked &= 7;
    885     txqs->pidx = txq->pidx;
    886     txq->pidx += ndesc;
    887 
    888     if (txq->pidx >= txq->size) {
    889         txq->pidx -= txq->size;
    890         txq->gen ^= 1;
    891     }
    892 
    893 }
    894 
    895 /**
    896  *  calc_tx_descs - calculate the number of Tx descriptors for a packet
    897  *  @m: the packet mbufs
    898  *      @nsegs: the number of segments
    899  *
    900  *  Returns the number of Tx descriptors needed for the given Ethernet
    901  *  packet.  Ethernet packets require addition of WR and CPL headers.
    902  */
    903 static __inline unsigned int
    904 calc_tx_descs(const struct mbuf *m, int nsegs)
    905 {
    906     unsigned int flits;
    907 
    908     if (m->m_pkthdr.len <= WR_LEN - sizeof(struct cpl_tx_pkt))
    909         return 1;
    910 
    911     flits = sgl_len(nsegs) + 2;
    912 #ifdef TSO_SUPPORTED
    913     if  (m->m_pkthdr.csum_flags & (CSUM_TSO))
    914         flits++;
    915 #endif
    916     return flits_to_desc(flits);
    917 }
    918 
    919 static unsigned int
    920 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq,
    921     struct tx_sw_desc *stx, bus_dma_segment_t *segs, int *nsegs)
    922 {
    923     struct mbuf *m0;
    924     int err, pktlen;
    925     int i, total_len;
    926 
    927     m0 = *m;
    928     pktlen = m0->m_pkthdr.len;
    929 
    930     m0 = *m;
    931     i = 0;
    932     total_len = 0;
    933     while (m0)
    934     {
    935         i++;
    936         total_len += m0->m_len;
    937         m0 = m0->m_next;
    938     }
    939     err = bus_dmamap_create(txq->entry_tag, total_len, TX_MAX_SEGS, total_len, 0, BUS_DMA_NOWAIT, &stx->map);
    940     if (err)
    941         return (err);
    942     err = bus_dmamap_load_mbuf(txq->entry_tag, stx->map, *m, 0);
    943     if (err)
    944         return (err);
    945     // feed out the physical mappings
    946     *nsegs = stx->map->dm_nsegs;
    947     for (i=0; i<*nsegs; i++)
    948     {
    949         segs[i] = stx->map->dm_segs[i];
    950     }
    951 #ifdef DEBUG
    952     if (err) {
    953         int n = 0;
    954         struct mbuf *mtmp = m0;
    955         while(mtmp) {
    956             n++;
    957             mtmp = mtmp->m_next;
    958         }
    959         printf("map_mbufs: bus_dmamap_load_mbuf_sg failed with %d - pkthdr.len==%d nmbufs=%d\n",
    960             err, m0->m_pkthdr.len, n);
    961     }
    962 #endif
    963     if (err == EFBIG) {
    964         /* Too many segments, try to defrag */
    965         m0 = m_defrag(m0, M_DONTWAIT);
    966         if (m0 == NULL) {
    967             m_freem(*m);
    968             *m = NULL;
    969             return (ENOBUFS);
    970         }
    971         *m = m0;
    972         INT3; // XXXXXXXXXXXXXXXXXX like above!
    973     }
    974 
    975     if (err == ENOMEM) {
    976         return (err);
    977     }
    978 
    979     if (err) {
    980         if (cxgb_debug)
    981             printf("map failure err=%d pktlen=%d\n", err, pktlen);
    982         m_freem_vec(m0);
    983         *m = NULL;
    984         return (err);
    985     }
    986 
    987     bus_dmamap_sync(txq->entry_tag, stx->map, 0, pktlen, BUS_DMASYNC_PREWRITE);
    988     stx->flags |= TX_SW_DESC_MAPPED;
    989 
    990     return (0);
    991 }
    992 
    993 /**
    994  *  make_sgl - populate a scatter/gather list for a packet
    995  *  @sgp: the SGL to populate
    996  *  @segs: the packet dma segments
    997  *  @nsegs: the number of segments
    998  *
    999  *  Generates a scatter/gather list for the buffers that make up a packet
   1000  *  and returns the SGL size in 8-byte words.  The caller must size the SGL
   1001  *  appropriately.
   1002  */
   1003 static __inline void
   1004 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
   1005 {
   1006     int i, idx;
   1007 
   1008     for (idx = 0, i = 0; i < nsegs; i++, idx ^= 1) {
   1009         if (i && idx == 0)
   1010             ++sgp;
   1011 
   1012         sgp->len[idx] = htobe32(segs[i].ds_len);
   1013         sgp->addr[idx] = htobe64(segs[i].ds_addr);
   1014     }
   1015 
   1016     if (idx)
   1017         sgp->len[idx] = 0;
   1018 }
   1019 
   1020 /**
   1021  *  check_ring_tx_db - check and potentially ring a Tx queue's doorbell
   1022  *  @adap: the adapter
   1023  *  @q: the Tx queue
   1024  *
   1025  *  Ring the doorbel if a Tx queue is asleep.  There is a natural race,
   1026  *  where the HW is going to sleep just after we checked, however,
   1027  *  then the interrupt handler will detect the outstanding TX packet
   1028  *  and ring the doorbell for us.
   1029  *
   1030  *  When GTS is disabled we unconditionally ring the doorbell.
   1031  */
   1032 static __inline void
   1033 check_ring_tx_db(adapter_t *adap, struct sge_txq *q)
   1034 {
   1035 #if USE_GTS
   1036     clear_bit(TXQ_LAST_PKT_DB, &q->flags);
   1037     if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
   1038         set_bit(TXQ_LAST_PKT_DB, &q->flags);
   1039 #ifdef T3_TRACE
   1040         T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
   1041               q->cntxt_id);
   1042 #endif
   1043         t3_write_reg(adap, A_SG_KDOORBELL,
   1044                  F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
   1045     }
   1046 #else
   1047     wmb();            /* write descriptors before telling HW */
   1048     t3_write_reg(adap, A_SG_KDOORBELL,
   1049              F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
   1050 #endif
   1051 }
   1052 
   1053 static __inline void
   1054 wr_gen2(struct tx_desc *d, unsigned int gen)
   1055 {
   1056 #if SGE_NUM_GENBITS == 2
   1057     d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
   1058 #endif
   1059 }
   1060 
   1061 
   1062 
   1063 /**
   1064  *  write_wr_hdr_sgl - write a WR header and, optionally, SGL
   1065  *  @ndesc: number of Tx descriptors spanned by the SGL
   1066  *  @txd: first Tx descriptor to be written
   1067  *  @txqs: txq state (generation and producer index)
   1068  *  @txq: the SGE Tx queue
   1069  *  @sgl: the SGL
   1070  *  @flits: number of flits to the start of the SGL in the first descriptor
   1071  *  @sgl_flits: the SGL size in flits
   1072  *  @wr_hi: top 32 bits of WR header based on WR type (big endian)
   1073  *  @wr_lo: low 32 bits of WR header based on WR type (big endian)
   1074  *
   1075  *  Write a work request header and an associated SGL.  If the SGL is
   1076  *  small enough to fit into one Tx descriptor it has already been written
   1077  *  and we just need to write the WR header.  Otherwise we distribute the
   1078  *  SGL across the number of descriptors it spans.
   1079  */
   1080 
   1081 static void
   1082 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
   1083     const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
   1084     unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
   1085 {
   1086 
   1087     struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
   1088     struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx];
   1089 
   1090     if (__predict_true(ndesc == 1)) {
   1091         wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
   1092             V_WR_SGLSFLT(flits)) | wr_hi;
   1093         wmb();
   1094         wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
   1095             V_WR_GEN(txqs->gen)) | wr_lo;
   1096         /* XXX gen? */
   1097         wr_gen2(txd, txqs->gen);
   1098     } else {
   1099         unsigned int ogen = txqs->gen;
   1100         const uint64_t *fp = (const uint64_t *)sgl;
   1101         struct work_request_hdr *wp = wrp;
   1102 
   1103         wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
   1104             V_WR_SGLSFLT(flits)) | wr_hi;
   1105 
   1106         while (sgl_flits) {
   1107             unsigned int avail = WR_FLITS - flits;
   1108 
   1109             if (avail > sgl_flits)
   1110                 avail = sgl_flits;
   1111             memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
   1112             sgl_flits -= avail;
   1113             ndesc--;
   1114             if (!sgl_flits)
   1115                 break;
   1116 
   1117             fp += avail;
   1118             txd++;
   1119             txsd++;
   1120             if (++txqs->pidx == txq->size) {
   1121                 txqs->pidx = 0;
   1122                 txqs->gen ^= 1;
   1123                 txd = txq->desc;
   1124                 txsd = txq->sdesc;
   1125             }
   1126 
   1127             /*
   1128              * when the head of the mbuf chain
   1129              * is freed all clusters will be freed
   1130              * with it
   1131              */
   1132             txsd->m = NULL;
   1133             wrp = (struct work_request_hdr *)txd;
   1134             wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
   1135                 V_WR_SGLSFLT(1)) | wr_hi;
   1136             wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
   1137                     sgl_flits + 1)) |
   1138                 V_WR_GEN(txqs->gen)) | wr_lo;
   1139             wr_gen2(txd, txqs->gen);
   1140             flits = 1;
   1141         }
   1142         wrp->wr_hi |= htonl(F_WR_EOP);
   1143         wmb();
   1144         wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
   1145         wr_gen2((struct tx_desc *)wp, ogen);
   1146     }
   1147 }
   1148 
   1149 
   1150 /* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */
   1151 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20)
   1152 
   1153 int
   1154 t3_encap(struct port_info *p, struct mbuf **m, int *free_it)
   1155 {
   1156     adapter_t *sc;
   1157     struct mbuf *m0;
   1158     struct sge_qset *qs;
   1159     struct sge_txq *txq;
   1160     struct tx_sw_desc *stx;
   1161     struct txq_state txqs;
   1162     unsigned int ndesc, flits, cntrl, mlen;
   1163     int err, nsegs, tso_info = 0;
   1164 
   1165     struct work_request_hdr *wrp;
   1166     struct tx_sw_desc *txsd;
   1167     struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
   1168     bus_dma_segment_t segs[TX_MAX_SEGS];
   1169     uint32_t wr_hi, wr_lo, sgl_flits;
   1170 
   1171     struct tx_desc *txd;
   1172     struct cpl_tx_pkt *cpl;
   1173 
   1174     m0 = *m;
   1175     sc = p->adapter;
   1176 
   1177     DPRINTF("t3_encap port_id=%d qsidx=%d ", p->port_id, p->first_qset);
   1178 
   1179     /* port_id=1 qsid=1 txpkt_intf=2 tx_chan=0 */
   1180 
   1181     qs = &sc->sge.qs[p->first_qset];
   1182 
   1183     txq = &qs->txq[TXQ_ETH];
   1184     stx = &txq->sdesc[txq->pidx];
   1185     txd = &txq->desc[txq->pidx];
   1186     cpl = (struct cpl_tx_pkt *)txd;
   1187     mlen = m0->m_pkthdr.len;
   1188     cpl->len = htonl(mlen | 0x80000000);
   1189 
   1190     DPRINTF("mlen=%d txpkt_intf=%d tx_chan=%d\n", mlen, p->txpkt_intf, p->tx_chan);
   1191     /*
   1192      * XXX handle checksum, TSO, and VLAN here
   1193      *
   1194      */
   1195     cntrl = V_TXPKT_INTF(p->txpkt_intf);
   1196 
   1197     /*
   1198      * XXX need to add VLAN support for 6.x
   1199      */
   1200 #ifdef VLAN_SUPPORTED
   1201     if (m0->m_flags & M_VLANTAG)
   1202         cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag);
   1203     if  (m0->m_pkthdr.csum_flags & (CSUM_TSO))
   1204         tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
   1205 #endif
   1206     if (tso_info) {
   1207         int eth_type;
   1208         struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *) cpl;
   1209         struct ip *ip;
   1210         struct tcphdr *tcp;
   1211         char *pkthdr, tmp[TCPPKTHDRSIZE]; /* is this too large for the stack? */
   1212 
   1213         txd->flit[2] = 0;
   1214         cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
   1215         hdr->cntrl = htonl(cntrl);
   1216 
   1217         if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) {
   1218             pkthdr = &tmp[0];
   1219             m_copydata(m0, 0, TCPPKTHDRSIZE, pkthdr);
   1220         } else {
   1221             pkthdr = mtod(m0, char *);
   1222         }
   1223 
   1224 #ifdef VLAN_SUPPORTED
   1225         if (__predict_false(m0->m_flags & M_VLANTAG)) {
   1226             eth_type = CPL_ETH_II_VLAN;
   1227             ip = (struct ip *)(pkthdr + ETHER_HDR_LEN +
   1228                 ETHER_VLAN_ENCAP_LEN);
   1229         } else {
   1230             eth_type = CPL_ETH_II;
   1231             ip = (struct ip *)(pkthdr + ETHER_HDR_LEN);
   1232         }
   1233 #else
   1234         eth_type = CPL_ETH_II;
   1235         ip = (struct ip *)(pkthdr + ETHER_HDR_LEN);
   1236 #endif
   1237         tcp = (struct tcphdr *)((uint8_t *)ip +
   1238             sizeof(*ip));
   1239 
   1240         tso_info |= V_LSO_ETH_TYPE(eth_type) |
   1241                 V_LSO_IPHDR_WORDS(ip->ip_hl) |
   1242                 V_LSO_TCPHDR_WORDS(tcp->th_off);
   1243         hdr->lso_info = htonl(tso_info);
   1244         flits = 3;
   1245     } else {
   1246         cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
   1247         cpl->cntrl = htonl(cntrl);
   1248 
   1249         if (mlen <= WR_LEN - sizeof(*cpl)) {
   1250             txq_prod(txq, 1, &txqs);
   1251             txq->sdesc[txqs.pidx].m = NULL;
   1252 
   1253             if (m0->m_len == m0->m_pkthdr.len)
   1254                 memcpy(&txd->flit[2], mtod(m0, uint8_t *), mlen);
   1255             else
   1256                 m_copydata(m0, 0, mlen, (void *)&txd->flit[2]);
   1257 
   1258             *free_it = 1;
   1259             flits = (mlen + 7) / 8 + 2;
   1260             cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
   1261                       V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
   1262                       F_WR_SOP | F_WR_EOP | txqs.compl);
   1263             wmb();
   1264             cpl->wr.wr_lo = htonl(V_WR_LEN(flits) |
   1265                 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
   1266 
   1267             wr_gen2(txd, txqs.gen);
   1268             check_ring_tx_db(sc, txq);
   1269             return (0);
   1270         }
   1271         flits = 2;
   1272     }
   1273 
   1274     wrp = (struct work_request_hdr *)txd;
   1275 
   1276     if ((err = busdma_map_mbufs(m, txq, stx, segs, &nsegs)) != 0) {
   1277         return (err);
   1278     }
   1279     m0 = *m;
   1280     ndesc = calc_tx_descs(m0, nsegs);
   1281 
   1282     sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
   1283     make_sgl(sgp, segs, nsegs);
   1284 
   1285     sgl_flits = sgl_len(nsegs);
   1286 
   1287     DPRINTF("make_sgl success nsegs==%d ndesc==%d\n", nsegs, ndesc);
   1288     txq_prod(txq, ndesc, &txqs);
   1289     txsd = &txq->sdesc[txqs.pidx];
   1290     wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
   1291     wr_lo = htonl(V_WR_TID(txq->token));
   1292     txsd->m = m0;
   1293     m_set_priority(m0, txqs.pidx);
   1294 
   1295     write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo);
   1296     check_ring_tx_db(p->adapter, txq);
   1297 
   1298     return (0);
   1299 }
   1300 
   1301 
   1302 /**
   1303  *  write_imm - write a packet into a Tx descriptor as immediate data
   1304  *  @d: the Tx descriptor to write
   1305  *  @m: the packet
   1306  *  @len: the length of packet data to write as immediate data
   1307  *  @gen: the generation bit value to write
   1308  *
   1309  *  Writes a packet as immediate data into a Tx descriptor.  The packet
   1310  *  contains a work request at its beginning.  We must write the packet
   1311  *  carefully so the SGE doesn't read accidentally before it's written in
   1312  *  its entirety.
   1313  */
   1314 static __inline void
   1315 write_imm(struct tx_desc *d, struct mbuf *m,
   1316       unsigned int len, unsigned int gen)
   1317 {
   1318     struct work_request_hdr *from = mtod(m, struct work_request_hdr *);
   1319     struct work_request_hdr *to = (struct work_request_hdr *)d;
   1320 
   1321     memcpy(&to[1], &from[1], len - sizeof(*from));
   1322     to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
   1323                     V_WR_BCNTLFLT(len & 7));
   1324     wmb();
   1325     to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
   1326                     V_WR_LEN((len + 7) / 8));
   1327     wr_gen2(d, gen);
   1328     m_freem(m);
   1329 }
   1330 
   1331 /**
   1332  *  check_desc_avail - check descriptor availability on a send queue
   1333  *  @adap: the adapter
   1334  *  @q: the TX queue
   1335  *  @m: the packet needing the descriptors
   1336  *  @ndesc: the number of Tx descriptors needed
   1337  *  @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
   1338  *
   1339  *  Checks if the requested number of Tx descriptors is available on an
   1340  *  SGE send queue.  If the queue is already suspended or not enough
   1341  *  descriptors are available the packet is queued for later transmission.
   1342  *  Must be called with the Tx queue locked.
   1343  *
   1344  *  Returns 0 if enough descriptors are available, 1 if there aren't
   1345  *  enough descriptors and the packet has been queued, and 2 if the caller
   1346  *  needs to retry because there weren't enough descriptors at the
   1347  *  beginning of the call but some freed up in the mean time.
   1348  */
   1349 static __inline int
   1350 check_desc_avail(adapter_t *adap, struct sge_txq *q,
   1351          struct mbuf *m, unsigned int ndesc,
   1352          unsigned int qid)
   1353 {
   1354     /*
   1355      * XXX We currently only use this for checking the control queue
   1356      * the control queue is only used for binding qsets which happens
   1357      * at init time so we are guaranteed enough descriptors
   1358      */
   1359     if (__predict_false(!mbufq_empty(&q->sendq))) {
   1360 addq_exit:  mbufq_tail(&q->sendq, m);
   1361         return 1;
   1362     }
   1363     if (__predict_false(q->size - q->in_use < ndesc)) {
   1364 
   1365         struct sge_qset *qs = txq_to_qset(q, qid);
   1366 
   1367         setbit(&qs->txq_stopped, qid);
   1368         smp_mb();
   1369 
   1370         if (should_restart_tx(q) &&
   1371             test_and_clear_bit(qid, &qs->txq_stopped))
   1372             return 2;
   1373 
   1374         q->stops++;
   1375         goto addq_exit;
   1376     }
   1377     return 0;
   1378 }
   1379 
   1380 
   1381 /**
   1382  *  reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
   1383  *  @q: the SGE control Tx queue
   1384  *
   1385  *  This is a variant of reclaim_completed_tx() that is used for Tx queues
   1386  *  that send only immediate data (presently just the control queues) and
   1387  *  thus do not have any mbufs
   1388  */
   1389 static __inline void
   1390 reclaim_completed_tx_imm(struct sge_txq *q)
   1391 {
   1392     unsigned int reclaim = q->processed - q->cleaned;
   1393 
   1394     mtx_assert(&q->lock, MA_OWNED);
   1395 
   1396     q->in_use -= reclaim;
   1397     q->cleaned += reclaim;
   1398 }
   1399 
   1400 static __inline int
   1401 immediate(const struct mbuf *m)
   1402 {
   1403     return m->m_len <= WR_LEN  && m->m_pkthdr.len <= WR_LEN ;
   1404 }
   1405 
   1406 /**
   1407  *  ctrl_xmit - send a packet through an SGE control Tx queue
   1408  *  @adap: the adapter
   1409  *  @q: the control queue
   1410  *  @m: the packet
   1411  *
   1412  *  Send a packet through an SGE control Tx queue.  Packets sent through
   1413  *  a control queue must fit entirely as immediate data in a single Tx
   1414  *  descriptor and have no page fragments.
   1415  */
   1416 static int
   1417 ctrl_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
   1418 {
   1419     int ret;
   1420     struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
   1421 
   1422     if (__predict_false(!immediate(m))) {
   1423         m_freem(m);
   1424         return 0;
   1425     }
   1426 
   1427     wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
   1428     wrp->wr_lo = htonl(V_WR_TID(q->token));
   1429 
   1430     mtx_lock(&q->lock);
   1431 again:  reclaim_completed_tx_imm(q);
   1432 
   1433     ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
   1434     if (__predict_false(ret)) {
   1435         if (ret == 1) {
   1436             mtx_unlock(&q->lock);
   1437             return (-1);
   1438         }
   1439         goto again;
   1440     }
   1441 
   1442     write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
   1443 
   1444     q->in_use++;
   1445     if (++q->pidx >= q->size) {
   1446         q->pidx = 0;
   1447         q->gen ^= 1;
   1448     }
   1449     mtx_unlock(&q->lock);
   1450     wmb();
   1451     t3_write_reg(adap, A_SG_KDOORBELL,
   1452              F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
   1453     return (0);
   1454 }
   1455 
   1456 
   1457 /**
   1458  *  restart_ctrlq - restart a suspended control queue
   1459  *  @qs: the queue set cotaining the control queue
   1460  *
   1461  *  Resumes transmission on a suspended Tx control queue.
   1462  */
   1463 static void
   1464 restart_ctrlq(struct work *wk, void *data)
   1465 {
   1466     struct mbuf *m;
   1467     struct sge_qset *qs = (struct sge_qset *)data;
   1468     struct sge_txq *q = &qs->txq[TXQ_CTRL];
   1469     adapter_t *adap = qs->port->adapter;
   1470 
   1471     mtx_lock(&q->lock);
   1472 again:  reclaim_completed_tx_imm(q);
   1473 
   1474     while (q->in_use < q->size &&
   1475            (m = mbufq_dequeue(&q->sendq)) != NULL) {
   1476 
   1477         write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
   1478 
   1479         if (++q->pidx >= q->size) {
   1480             q->pidx = 0;
   1481             q->gen ^= 1;
   1482         }
   1483         q->in_use++;
   1484     }
   1485     if (!mbufq_empty(&q->sendq)) {
   1486         setbit(&qs->txq_stopped, TXQ_CTRL);
   1487         smp_mb();
   1488 
   1489         if (should_restart_tx(q) &&
   1490             test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
   1491             goto again;
   1492         q->stops++;
   1493     }
   1494     mtx_unlock(&q->lock);
   1495     t3_write_reg(adap, A_SG_KDOORBELL,
   1496              F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
   1497 }
   1498 
   1499 
   1500 /*
   1501  * Send a management message through control queue 0
   1502  */
   1503 int
   1504 t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
   1505 {
   1506     return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], m);
   1507 }
   1508 
   1509 /**
   1510  *  free_qset - free the resources of an SGE queue set
   1511  *  @sc: the controller owning the queue set
   1512  *  @q: the queue set
   1513  *
   1514  *  Release the HW and SW resources associated with an SGE queue set, such
   1515  *  as HW contexts, packet buffers, and descriptor rings.  Traffic to the
   1516  *  queue set must be quiesced prior to calling this.
   1517  */
   1518 static void
   1519 t3_free_qset(adapter_t *sc, struct sge_qset *q)
   1520 {
   1521     int i;
   1522 
   1523     for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
   1524         if (q->fl[i].desc) {
   1525             mtx_lock(&sc->sge.reg_lock);
   1526             t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
   1527             mtx_unlock(&sc->sge.reg_lock);
   1528             bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
   1529 		INT3;
   1530 //            bus_dmamem_free(q->fl[i].desc_tag, &q->fl[i].phys_addr, 1);
   1531             // XXXXXXXXXXX destroy DMA tags????
   1532         }
   1533         if (q->fl[i].sdesc) {
   1534             free_rx_bufs(sc, &q->fl[i]);
   1535             free(q->fl[i].sdesc, M_DEVBUF);
   1536         }
   1537     }
   1538 
   1539     for (i = 0; i < SGE_TXQ_PER_SET; i++) {
   1540         if (q->txq[i].desc) {
   1541             mtx_lock(&sc->sge.reg_lock);
   1542             t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
   1543             mtx_unlock(&sc->sge.reg_lock);
   1544             bus_dmamap_unload(q->txq[i].desc_tag,
   1545                     q->txq[i].desc_map);
   1546 		INT3;
   1547 //            bus_dmamem_free(q->txq[i].desc_tag, &q->txq[i].phys_addr, 1);
   1548             // XXXXXXXXXXX destroy DMA tags????  And the lock?!??!
   1549 
   1550         }
   1551         if (q->txq[i].sdesc) {
   1552             free(q->txq[i].sdesc, M_DEVBUF);
   1553         }
   1554     }
   1555 
   1556     if (q->rspq.desc) {
   1557         mtx_lock(&sc->sge.reg_lock);
   1558         t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
   1559         mtx_unlock(&sc->sge.reg_lock);
   1560 
   1561         bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
   1562 	INT3;
   1563 //        bus_dmamem_free(q->rspq.desc_tag, &q->rspq.phys_addr, 1);
   1564         // XXXXXXXXXXX destroy DMA tags???? and the LOCK ?!?!?
   1565     }
   1566 
   1567     memset(q, 0, sizeof(*q));
   1568 }
   1569 
   1570 /**
   1571  *  t3_free_sge_resources - free SGE resources
   1572  *  @sc: the adapter softc
   1573  *
   1574  *  Frees resources used by the SGE queue sets.
   1575  */
   1576 void
   1577 t3_free_sge_resources(adapter_t *sc)
   1578 {
   1579     int i, nqsets;
   1580 
   1581     for (nqsets = i = 0; i < (sc)->params.nports; i++)
   1582         nqsets += sc->port[i].nqsets;
   1583 
   1584     for (i = 0; i < nqsets; ++i)
   1585         t3_free_qset(sc, &sc->sge.qs[i]);
   1586 }
   1587 
   1588 /**
   1589  *  t3_sge_start - enable SGE
   1590  *  @sc: the controller softc
   1591  *
   1592  *  Enables the SGE for DMAs.  This is the last step in starting packet
   1593  *  transfers.
   1594  */
   1595 void
   1596 t3_sge_start(adapter_t *sc)
   1597 {
   1598     t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
   1599 }
   1600 
   1601 /**
   1602  *  t3_sge_stop - disable SGE operation
   1603  *  @sc: the adapter
   1604  *
   1605  *  Disables the DMA engine.  This can be called in emeregencies (e.g.,
   1606  *  from error interrupts) or from normal process context.  In the latter
   1607  *  case it also disables any pending queue restart tasklets.  Note that
   1608  *  if it is called in interrupt context it cannot disable the restart
   1609  *  tasklets as it cannot wait, however the tasklets will have no effect
   1610  *  since the doorbells are disabled and the driver will call this again
   1611  *  later from process context, at which time the tasklets will be stopped
   1612  *  if they are still running.
   1613  */
   1614 void
   1615 t3_sge_stop(adapter_t *sc)
   1616 {
   1617     int i, nqsets;
   1618 
   1619     t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
   1620 
   1621     for (nqsets = i = 0; i < (sc)->params.nports; i++)
   1622         nqsets += sc->port[i].nqsets;
   1623 
   1624     for (i = 0; i < nqsets; ++i) {
   1625     }
   1626 }
   1627 
   1628 
   1629 /**
   1630  *  free_tx_desc - reclaims Tx descriptors and their buffers
   1631  *  @adapter: the adapter
   1632  *  @q: the Tx queue to reclaim descriptors from
   1633  *  @n: the number of descriptors to reclaim
   1634  *
   1635  *  Reclaims Tx descriptors from an SGE Tx queue and frees the associated
   1636  *  Tx buffers.  Called with the Tx queue lock held.
   1637  */
   1638 int
   1639 free_tx_desc(struct sge_txq *q, int n, struct mbuf **m_vec)
   1640 {
   1641     struct tx_sw_desc *d;
   1642     unsigned int cidx = q->cidx;
   1643     int nbufs = 0;
   1644 
   1645 #ifdef T3_TRACE
   1646     T3_TRACE2(sc->tb[q->cntxt_id & 7],
   1647           "reclaiming %u Tx descriptors at cidx %u", n, cidx);
   1648 #endif
   1649     d = &q->sdesc[cidx];
   1650 
   1651     while (n-- > 0) {
   1652         DPRINTF("cidx=%d d=%p\n", cidx, d);
   1653         if (d->m) {
   1654             if (d->flags & TX_SW_DESC_MAPPED) {
   1655                 bus_dmamap_unload(q->entry_tag, d->map);
   1656                 bus_dmamap_destroy(q->entry_tag, d->map);
   1657                 d->flags &= ~TX_SW_DESC_MAPPED;
   1658             }
   1659             if (m_get_priority(d->m) == cidx) {
   1660                 m_vec[nbufs] = d->m;
   1661                 d->m = NULL;
   1662                 nbufs++;
   1663             } else {
   1664                 printf("pri=%d cidx=%d\n", (int)m_get_priority(d->m), cidx);
   1665             }
   1666         }
   1667         ++d;
   1668         if (++cidx == q->size) {
   1669             cidx = 0;
   1670             d = q->sdesc;
   1671         }
   1672     }
   1673     q->cidx = cidx;
   1674 
   1675     return (nbufs);
   1676 }
   1677 
   1678 /**
   1679  *  is_new_response - check if a response is newly written
   1680  *  @r: the response descriptor
   1681  *  @q: the response queue
   1682  *
   1683  *  Returns true if a response descriptor contains a yet unprocessed
   1684  *  response.
   1685  */
   1686 static __inline int
   1687 is_new_response(const struct rsp_desc *r,
   1688     const struct sge_rspq *q)
   1689 {
   1690     return (r->intr_gen & F_RSPD_GEN2) == q->gen;
   1691 }
   1692 
   1693 #define RSPD_GTS_MASK  (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
   1694 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
   1695             V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
   1696             V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
   1697             V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
   1698 
   1699 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
   1700 #define NOMEM_INTR_DELAY 2500
   1701 
   1702 /**
   1703  *  write_ofld_wr - write an offload work request
   1704  *  @adap: the adapter
   1705  *  @m: the packet to send
   1706  *  @q: the Tx queue
   1707  *  @pidx: index of the first Tx descriptor to write
   1708  *  @gen: the generation value to use
   1709  *  @ndesc: number of descriptors the packet will occupy
   1710  *
   1711  *  Write an offload work request to send the supplied packet.  The packet
   1712  *  data already carry the work request with most fields populated.
   1713  */
   1714 static void
   1715 write_ofld_wr(adapter_t *adap, struct mbuf *m,
   1716     struct sge_txq *q, unsigned int pidx,
   1717     unsigned int gen, unsigned int ndesc,
   1718     bus_dma_segment_t *segs, unsigned int nsegs)
   1719 {
   1720     unsigned int sgl_flits, flits;
   1721     struct work_request_hdr *from;
   1722     struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
   1723     struct tx_desc *d = &q->desc[pidx];
   1724     struct txq_state txqs;
   1725 
   1726     if (immediate(m)) {
   1727         q->sdesc[pidx].m = NULL;
   1728         write_imm(d, m, m->m_len, gen);
   1729         return;
   1730     }
   1731 
   1732     /* Only TX_DATA builds SGLs */
   1733 
   1734     from = mtod(m, struct work_request_hdr *);
   1735     INT3; ///  DEBUG this???
   1736     flits = 3; // XXXXXXXXXXXXXX
   1737 
   1738     sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl;
   1739 
   1740     make_sgl(sgp, segs, nsegs);
   1741     sgl_flits = sgl_len(nsegs);
   1742 
   1743     txqs.gen = q->gen;
   1744     txqs.pidx = q->pidx;
   1745     txqs.compl = (q->unacked & 8) << (S_WR_COMPL - 3);
   1746     write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits,
   1747         from->wr_hi, from->wr_lo);
   1748 }
   1749 
   1750 /**
   1751  *  calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
   1752  *  @m: the packet
   1753  *
   1754  *  Returns the number of Tx descriptors needed for the given offload
   1755  *  packet.  These packets are already fully constructed.
   1756  */
   1757 static __inline unsigned int
   1758 calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs)
   1759 {
   1760     unsigned int flits, cnt = 0;
   1761 
   1762 
   1763     if (m->m_len <= WR_LEN)
   1764         return 1;                 /* packet fits as immediate data */
   1765 
   1766     if (m->m_flags & M_IOVEC)
   1767         cnt = mtomv(m)->mv_count;
   1768 
   1769     INT3; // Debug this????
   1770     flits = 3; // XXXXXXXXX
   1771 
   1772     return flits_to_desc(flits + sgl_len(cnt));
   1773 }
   1774 
   1775 /**
   1776  *  ofld_xmit - send a packet through an offload queue
   1777  *  @adap: the adapter
   1778  *  @q: the Tx offload queue
   1779  *  @m: the packet
   1780  *
   1781  *  Send an offload packet through an SGE offload queue.
   1782  */
   1783 static int
   1784 ofld_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
   1785 {
   1786     int ret, nsegs;
   1787     unsigned int ndesc;
   1788     unsigned int pidx, gen;
   1789     struct mbuf *m_vec[TX_CLEAN_MAX_DESC];
   1790     bus_dma_segment_t segs[TX_MAX_SEGS];
   1791     int i, cleaned;
   1792     struct tx_sw_desc *stx = &q->sdesc[q->pidx];
   1793 
   1794     mtx_lock(&q->lock);
   1795     if ((ret = busdma_map_mbufs(&m, q, stx, segs, &nsegs)) != 0) {
   1796         mtx_unlock(&q->lock);
   1797         return (ret);
   1798     }
   1799     ndesc = calc_tx_descs_ofld(m, nsegs);
   1800 again:  cleaned = reclaim_completed_tx(q, TX_CLEAN_MAX_DESC, m_vec);
   1801 
   1802     ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
   1803     if (__predict_false(ret)) {
   1804         if (ret == 1) {
   1805             m_set_priority(m, ndesc);     /* save for restart */
   1806             mtx_unlock(&q->lock);
   1807             return EINTR;
   1808         }
   1809         goto again;
   1810     }
   1811 
   1812     gen = q->gen;
   1813     q->in_use += ndesc;
   1814     pidx = q->pidx;
   1815     q->pidx += ndesc;
   1816     if (q->pidx >= q->size) {
   1817         q->pidx -= q->size;
   1818         q->gen ^= 1;
   1819     }
   1820 #ifdef T3_TRACE
   1821     T3_TRACE5(adap->tb[q->cntxt_id & 7],
   1822           "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u",
   1823           ndesc, pidx, skb->len, skb->len - skb->data_len,
   1824           skb_shinfo(skb)->nr_frags);
   1825 #endif
   1826     mtx_unlock(&q->lock);
   1827 
   1828     write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
   1829     check_ring_tx_db(adap, q);
   1830 
   1831     for (i = 0; i < cleaned; i++) {
   1832         m_freem_vec(m_vec[i]);
   1833     }
   1834     return (0);
   1835 }
   1836 
   1837 /**
   1838  *  restart_offloadq - restart a suspended offload queue
   1839  *  @qs: the queue set cotaining the offload queue
   1840  *
   1841  *  Resumes transmission on a suspended Tx offload queue.
   1842  */
   1843 static void
   1844 restart_offloadq(struct work *wk, void *data)
   1845 {
   1846 
   1847     struct mbuf *m;
   1848     struct sge_qset *qs = data;
   1849     struct sge_txq *q = &qs->txq[TXQ_OFLD];
   1850     adapter_t *adap = qs->port->adapter;
   1851     struct mbuf *m_vec[TX_CLEAN_MAX_DESC];
   1852     bus_dma_segment_t segs[TX_MAX_SEGS];
   1853     int nsegs, i, cleaned;
   1854     struct tx_sw_desc *stx = &q->sdesc[q->pidx];
   1855 
   1856     mtx_lock(&q->lock);
   1857 again:  cleaned = reclaim_completed_tx(q, TX_CLEAN_MAX_DESC, m_vec);
   1858 
   1859     while ((m = mbufq_peek(&q->sendq)) != NULL) {
   1860         unsigned int gen, pidx;
   1861         unsigned int ndesc = m_get_priority(m);
   1862 
   1863         if (__predict_false(q->size - q->in_use < ndesc)) {
   1864             setbit(&qs->txq_stopped, TXQ_OFLD);
   1865             smp_mb();
   1866 
   1867             if (should_restart_tx(q) &&
   1868                 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
   1869                 goto again;
   1870             q->stops++;
   1871             break;
   1872         }
   1873 
   1874         gen = q->gen;
   1875         q->in_use += ndesc;
   1876         pidx = q->pidx;
   1877         q->pidx += ndesc;
   1878         if (q->pidx >= q->size) {
   1879             q->pidx -= q->size;
   1880             q->gen ^= 1;
   1881         }
   1882 
   1883         (void)mbufq_dequeue(&q->sendq);
   1884         busdma_map_mbufs(&m, q, stx, segs, &nsegs);
   1885         mtx_unlock(&q->lock);
   1886         write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
   1887         mtx_lock(&q->lock);
   1888     }
   1889     mtx_unlock(&q->lock);
   1890 
   1891 #if USE_GTS
   1892     set_bit(TXQ_RUNNING, &q->flags);
   1893     set_bit(TXQ_LAST_PKT_DB, &q->flags);
   1894 #endif
   1895     t3_write_reg(adap, A_SG_KDOORBELL,
   1896              F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
   1897 
   1898     for (i = 0; i < cleaned; i++) {
   1899         m_freem_vec(m_vec[i]);
   1900     }
   1901 }
   1902 
   1903 /**
   1904  *  queue_set - return the queue set a packet should use
   1905  *  @m: the packet
   1906  *
   1907  *  Maps a packet to the SGE queue set it should use.  The desired queue
   1908  *  set is carried in bits 1-3 in the packet's priority.
   1909  */
   1910 static __inline int
   1911 queue_set(const struct mbuf *m)
   1912 {
   1913     return m_get_priority(m) >> 1;
   1914 }
   1915 
   1916 /**
   1917  *  is_ctrl_pkt - return whether an offload packet is a control packet
   1918  *  @m: the packet
   1919  *
   1920  *  Determines whether an offload packet should use an OFLD or a CTRL
   1921  *  Tx queue.  This is indicated by bit 0 in the packet's priority.
   1922  */
   1923 static __inline int
   1924 is_ctrl_pkt(const struct mbuf *m)
   1925 {
   1926     return m_get_priority(m) & 1;
   1927 }
   1928 
   1929 /**
   1930  *  t3_offload_tx - send an offload packet
   1931  *  @tdev: the offload device to send to
   1932  *  @m: the packet
   1933  *
   1934  *  Sends an offload packet.  We use the packet priority to select the
   1935  *  appropriate Tx queue as follows: bit 0 indicates whether the packet
   1936  *  should be sent as regular or control, bits 1-3 select the queue set.
   1937  */
   1938 int
   1939 t3_offload_tx(struct toedev *tdev, struct mbuf *m)
   1940 {
   1941     adapter_t *adap = tdev2adap(tdev);
   1942     struct sge_qset *qs = &adap->sge.qs[queue_set(m)];
   1943 
   1944     if (__predict_false(is_ctrl_pkt(m)))
   1945         return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], m);
   1946 
   1947     return ofld_xmit(adap, &qs->txq[TXQ_OFLD], m);
   1948 }
   1949 
   1950 static void
   1951 restart_tx(struct sge_qset *qs)
   1952 {
   1953     if (isset(&qs->txq_stopped, TXQ_OFLD) &&
   1954         should_restart_tx(&qs->txq[TXQ_OFLD]) &&
   1955         test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
   1956         qs->txq[TXQ_OFLD].restarts++;
   1957         workqueue_enqueue(qs->txq[TXQ_OFLD].qresume_task.wq, &qs->txq[TXQ_OFLD].qresume_task.w, NULL);
   1958     }
   1959     if (isset(&qs->txq_stopped, TXQ_CTRL) &&
   1960         should_restart_tx(&qs->txq[TXQ_CTRL]) &&
   1961         test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
   1962         qs->txq[TXQ_CTRL].restarts++;
   1963         workqueue_enqueue(qs->txq[TXQ_CTRL].qresume_task.wq, &qs->txq[TXQ_CTRL].qresume_task.w, NULL);
   1964     }
   1965 }
   1966 
   1967 /**
   1968  *  t3_sge_alloc_qset - initialize an SGE queue set
   1969  *  @sc: the controller softc
   1970  *  @id: the queue set id
   1971  *  @nports: how many Ethernet ports will be using this queue set
   1972  *  @irq_vec_idx: the IRQ vector index for response queue interrupts
   1973  *  @p: configuration parameters for this queue set
   1974  *  @ntxq: number of Tx queues for the queue set
   1975  *  @pi: port info for queue set
   1976  *
   1977  *  Allocate resources and initialize an SGE queue set.  A queue set
   1978  *  comprises a response queue, two Rx free-buffer queues, and up to 3
   1979  *  Tx queues.  The Tx queues are assigned roles in the order Ethernet
   1980  *  queue, offload queue, and control queue.
   1981  */
   1982 int
   1983 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
   1984           const struct qset_params *p, int ntxq, struct port_info *pi)
   1985 {
   1986     struct sge_qset *q = &sc->sge.qs[id];
   1987     int i, ret = 0;
   1988 
   1989     init_qset_cntxt(q, id);
   1990 
   1991     if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
   1992             sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
   1993             &q->fl[0].desc, &q->fl[0].sdesc,
   1994             &q->fl[0].desc_tag, &q->fl[0].desc_map,
   1995             sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
   1996         goto err;
   1997     }
   1998 
   1999     if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
   2000             sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
   2001             &q->fl[1].desc, &q->fl[1].sdesc,
   2002             &q->fl[1].desc_tag, &q->fl[1].desc_map,
   2003             sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
   2004         goto err;
   2005     }
   2006 
   2007     if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
   2008             &q->rspq.phys_addr, &q->rspq.desc, NULL,
   2009             &q->rspq.desc_tag, &q->rspq.desc_map,
   2010             NULL, NULL)) != 0) {
   2011         goto err;
   2012     }
   2013 
   2014     for (i = 0; i < ntxq; ++i) {
   2015         /*
   2016          * The control queue always uses immediate data so does not
   2017          * need to keep track of any mbufs.
   2018          * XXX Placeholder for future TOE support.
   2019          */
   2020         size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
   2021 
   2022         if ((ret = alloc_ring(sc, p->txq_size[i],
   2023                 sizeof(struct tx_desc), sz,
   2024                 &q->txq[i].phys_addr, &q->txq[i].desc,
   2025                 &q->txq[i].sdesc, &q->txq[i].desc_tag,
   2026                 &q->txq[i].desc_map,
   2027                 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
   2028             goto err;
   2029         }
   2030         mbufq_init(&q->txq[i].sendq);
   2031         q->txq[i].gen = 1;
   2032         q->txq[i].size = p->txq_size[i];
   2033         snprintf(q->txq[i].lockbuf, TXQ_NAME_LEN, "t3 txq lock %d:%d:%d",
   2034             0, irq_vec_idx, i);
   2035         MTX_INIT(&q->txq[i].lock, q->txq[i].lockbuf, NULL, MTX_DEF);
   2036     }
   2037 
   2038     q->txq[TXQ_ETH].port = pi;
   2039 
   2040     q->txq[TXQ_OFLD].qresume_task.name = "restart_offloadq";
   2041     q->txq[TXQ_OFLD].qresume_task.func = restart_offloadq;
   2042     q->txq[TXQ_OFLD].qresume_task.context = q;
   2043     kthread_create(PRI_NONE, 0, NULL, cxgb_make_task, &q->txq[TXQ_OFLD].qresume_task, NULL, "cxgb_make_task");
   2044 
   2045     q->txq[TXQ_CTRL].qresume_task.name = "restart_ctrlq";
   2046     q->txq[TXQ_CTRL].qresume_task.func = restart_ctrlq;
   2047     q->txq[TXQ_CTRL].qresume_task.context = q;
   2048     kthread_create(PRI_NONE, 0, NULL, cxgb_make_task, &q->txq[TXQ_CTRL].qresume_task, NULL, "cxgb_make_task");
   2049 
   2050     q->txq[TXQ_ETH].qreclaim_task.name = "sge_txq_reclaim_handler";
   2051     q->txq[TXQ_ETH].qreclaim_task.func = sge_txq_reclaim_handler;
   2052     q->txq[TXQ_ETH].qreclaim_task.context = &q->txq[TXQ_ETH];
   2053     kthread_create(PRI_NONE, 0, NULL, cxgb_make_task, &q->txq[TXQ_ETH].qreclaim_task, NULL, "cxgb_make_task");
   2054 
   2055     q->txq[TXQ_OFLD].qreclaim_task.name = "sge_txq_reclaim_handler";
   2056     q->txq[TXQ_OFLD].qreclaim_task.func = sge_txq_reclaim_handler;
   2057     q->txq[TXQ_OFLD].qreclaim_task.context = &q->txq[TXQ_OFLD];
   2058     kthread_create(PRI_NONE, 0, NULL, cxgb_make_task, &q->txq[TXQ_OFLD].qreclaim_task, NULL, "cxgb_make_task");
   2059 
   2060     q->fl[0].gen = q->fl[1].gen = 1;
   2061     q->fl[0].size = p->fl_size;
   2062     q->fl[1].size = p->jumbo_size;
   2063 
   2064     q->rspq.gen = 1;
   2065     q->rspq.cidx = 0;
   2066     q->rspq.size = p->rspq_size;
   2067 
   2068     q->txq[TXQ_ETH].stop_thres = nports *
   2069         flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
   2070 
   2071     q->fl[0].buf_size = MCLBYTES;
   2072     q->fl[1].buf_size = MJUMPAGESIZE;
   2073 
   2074     q->lro.enabled = lro_default;
   2075 
   2076     mtx_lock(&sc->sge.reg_lock);
   2077     ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
   2078                    q->rspq.phys_addr, q->rspq.size,
   2079                    q->fl[0].buf_size, 1, 0);
   2080     if (ret) {
   2081         printf("error %d from t3_sge_init_rspcntxt\n", ret);
   2082         goto err_unlock;
   2083     }
   2084 
   2085     for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
   2086         ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
   2087                       q->fl[i].phys_addr, q->fl[i].size,
   2088                       q->fl[i].buf_size, p->cong_thres, 1,
   2089                       0);
   2090         if (ret) {
   2091             printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
   2092             goto err_unlock;
   2093         }
   2094     }
   2095 
   2096     ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
   2097                  SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
   2098                  q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
   2099                  1, 0);
   2100     if (ret) {
   2101         printf("error %d from t3_sge_init_ecntxt\n", ret);
   2102         goto err_unlock;
   2103     }
   2104 
   2105     if (ntxq > 1) {
   2106         ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
   2107                      USE_GTS, SGE_CNTXT_OFLD, id,
   2108                      q->txq[TXQ_OFLD].phys_addr,
   2109                      q->txq[TXQ_OFLD].size, 0, 1, 0);
   2110         if (ret) {
   2111             printf("error %d from t3_sge_init_ecntxt\n", ret);
   2112             goto err_unlock;
   2113         }
   2114     }
   2115 
   2116     if (ntxq > 2) {
   2117         ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
   2118                      SGE_CNTXT_CTRL, id,
   2119                      q->txq[TXQ_CTRL].phys_addr,
   2120                      q->txq[TXQ_CTRL].size,
   2121                      q->txq[TXQ_CTRL].token, 1, 0);
   2122         if (ret) {
   2123             printf("error %d from t3_sge_init_ecntxt\n", ret);
   2124             goto err_unlock;
   2125         }
   2126     }
   2127 
   2128     snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
   2129         0, irq_vec_idx);
   2130     MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
   2131 
   2132     mtx_unlock(&sc->sge.reg_lock);
   2133     t3_update_qset_coalesce(q, p);
   2134     q->port = pi;
   2135 
   2136     refill_fl(sc, &q->fl[0], q->fl[0].size);
   2137     refill_fl(sc, &q->fl[1], q->fl[1].size);
   2138     refill_rspq(sc, &q->rspq, q->rspq.size - 1);
   2139 
   2140     t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
   2141              V_NEWTIMER(q->rspq.holdoff_tmr));
   2142 
   2143     return (0);
   2144 
   2145 err_unlock:
   2146     mtx_unlock(&sc->sge.reg_lock);
   2147 err:
   2148     t3_free_qset(sc, q);
   2149 
   2150     return (ret);
   2151 }
   2152 
   2153 void
   2154 t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad)
   2155 {
   2156     struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
   2157     struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
   2158     struct ifnet *ifp = pi->ifp;
   2159 
   2160     DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff);
   2161 
   2162     /*
   2163      * XXX need to add VLAN support for 6.x
   2164      */
   2165 #ifdef VLAN_SUPPORTED
   2166     if (__predict_false(cpl->vlan_valid)) {
   2167         m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
   2168         m->m_flags |= M_VLANTAG;
   2169     }
   2170 #endif
   2171 
   2172     m->m_pkthdr.rcvif = ifp;
   2173     m_explode(m);
   2174     /*
   2175      * adjust after conversion to mbuf chain
   2176      */
   2177     m_adj(m, sizeof(*cpl) + ethpad);
   2178 
   2179     (*ifp->if_input)(ifp, m);
   2180 }
   2181 
   2182 /**
   2183  *  get_packet - return the next ingress packet buffer from a free list
   2184  *  @adap: the adapter that received the packet
   2185  *  @drop_thres: # of remaining buffers before we start dropping packets
   2186  *  @qs: the qset that the SGE free list holding the packet belongs to
   2187  *      @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
   2188  *      @r: response descriptor
   2189  *
   2190  *  Get the next packet from a free list and complete setup of the
   2191  *  sk_buff.  If the packet is small we make a copy and recycle the
   2192  *  original buffer, otherwise we use the original buffer itself.  If a
   2193  *  positive drop threshold is supplied packets are dropped and their
   2194  *  buffers recycled if (a) the number of remaining buffers is under the
   2195  *  threshold and the packet is too big to copy, or (b) the packet should
   2196  *  be copied but there is no memory for the copy.
   2197  */
   2198 #ifdef DISABLE_MBUF_IOVEC
   2199 
   2200 static int
   2201 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
   2202     struct t3_mbuf_hdr *mh, struct rsp_desc *r, struct mbuf *m)
   2203 {
   2204 
   2205     unsigned int len_cq =  ntohl(r->len_cq);
   2206     struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
   2207     struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
   2208     uint32_t len = G_RSPD_LEN(len_cq);
   2209     uint32_t flags = ntohl(r->flags);
   2210     uint8_t sopeop = G_RSPD_SOP_EOP(flags);
   2211     int ret = 0;
   2212 
   2213     prefetch(sd->cl);
   2214 
   2215     fl->credits--;
   2216     bus_dmamap_sync(fl->entry_tag, sd->map, 0, len, BUS_DMASYNC_POSTREAD);
   2217     bus_dmamap_unload(fl->entry_tag, sd->map);
   2218 
   2219     m->m_len = len;
   2220     m_cljset(m, sd->cl, fl->type);
   2221 
   2222     switch(sopeop) {
   2223     case RSPQ_SOP_EOP:
   2224         DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m));
   2225         mh->mh_head = mh->mh_tail = m;
   2226         m->m_pkthdr.len = len;
   2227         m->m_flags |= M_PKTHDR;
   2228         ret = 1;
   2229         break;
   2230     case RSPQ_NSOP_NEOP:
   2231         DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m));
   2232         m->m_flags &= ~M_PKTHDR;
   2233         if (mh->mh_tail == NULL) {
   2234             if (cxgb_debug)
   2235                 printf("discarding intermediate descriptor entry\n");
   2236             m_freem(m);
   2237             break;
   2238         }
   2239         mh->mh_tail->m_next = m;
   2240         mh->mh_tail = m;
   2241         mh->mh_head->m_pkthdr.len += len;
   2242         ret = 0;
   2243         break;
   2244     case RSPQ_SOP:
   2245         DBG(DBG_RX, ("get_packet: SOP m %p\n", m));
   2246         m->m_pkthdr.len = len;
   2247         mh->mh_head = mh->mh_tail = m;
   2248         m->m_flags |= M_PKTHDR;
   2249         ret = 0;
   2250         break;
   2251     case RSPQ_EOP:
   2252         DBG(DBG_RX, ("get_packet: EOP m %p\n", m));
   2253         m->m_flags &= ~M_PKTHDR;
   2254         mh->mh_head->m_pkthdr.len += len;
   2255         mh->mh_tail->m_next = m;
   2256         mh->mh_tail = m;
   2257         ret = 1;
   2258         break;
   2259     }
   2260     if (++fl->cidx == fl->size)
   2261         fl->cidx = 0;
   2262 
   2263     return (ret);
   2264 }
   2265 
   2266 #else
   2267 static int
   2268 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
   2269     struct mbuf *m, struct rsp_desc *r)
   2270 {
   2271 
   2272     unsigned int len_cq =  ntohl(r->len_cq);
   2273     struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
   2274     struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
   2275     uint32_t len = G_RSPD_LEN(len_cq);
   2276     uint32_t flags = ntohl(r->flags);
   2277     uint8_t sopeop = G_RSPD_SOP_EOP(flags);
   2278     void *cl;
   2279     int ret = 0;
   2280 
   2281     prefetch(sd->cl);
   2282 
   2283     fl->credits--;
   2284     bus_dmamap_sync(fl->entry_tag, sd->map, 0, len, BUS_DMASYNC_POSTREAD);
   2285 
   2286     if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) {
   2287         cl = mtod(m, void *);
   2288         memcpy(cl, sd->cl, len);
   2289         recycle_rx_buf(adap, fl, fl->cidx);
   2290     } else {
   2291         cl = sd->cl;
   2292         bus_dmamap_unload(fl->entry_tag, sd->map);
   2293     }
   2294     switch(sopeop) {
   2295     case RSPQ_SOP_EOP:
   2296         DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m));
   2297         m->m_len = m->m_pkthdr.len = len;
   2298         if (cl == sd->cl)
   2299             m_cljset(m, cl, fl->type);
   2300         ret = 1;
   2301         goto done;
   2302         break;
   2303     case RSPQ_NSOP_NEOP:
   2304         DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m));
   2305         ret = 0;
   2306         break;
   2307     case RSPQ_SOP:
   2308         DBG(DBG_RX, ("get_packet: SOP m %p\n", m));
   2309         m_iovinit(m);
   2310         ret = 0;
   2311         break;
   2312     case RSPQ_EOP:
   2313         DBG(DBG_RX, ("get_packet: EOP m %p\n", m));
   2314         ret = 1;
   2315         break;
   2316     }
   2317     m_iovappend(m, cl, fl->buf_size, len, 0);
   2318 
   2319 done:
   2320     if (++fl->cidx == fl->size)
   2321         fl->cidx = 0;
   2322 
   2323     return (ret);
   2324 }
   2325 #endif
   2326 /**
   2327  *  handle_rsp_cntrl_info - handles control information in a response
   2328  *  @qs: the queue set corresponding to the response
   2329  *  @flags: the response control flags
   2330  *
   2331  *  Handles the control information of an SGE response, such as GTS
   2332  *  indications and completion credits for the queue set's Tx queues.
   2333  *  HW coalesces credits, we don't do any extra SW coalescing.
   2334  */
   2335 static __inline void
   2336 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
   2337 {
   2338     unsigned int credits;
   2339 
   2340 #if USE_GTS
   2341     if (flags & F_RSPD_TXQ0_GTS)
   2342         clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
   2343 #endif
   2344     credits = G_RSPD_TXQ0_CR(flags);
   2345     if (credits) {
   2346         qs->txq[TXQ_ETH].processed += credits;
   2347         if (desc_reclaimable(&qs->txq[TXQ_ETH]) > TX_START_MAX_DESC)
   2348             workqueue_enqueue(qs->port->timer_reclaim_task.wq,
   2349                              &qs->port->timer_reclaim_task.w, NULL);
   2350     }
   2351 
   2352     credits = G_RSPD_TXQ2_CR(flags);
   2353     if (credits)
   2354         qs->txq[TXQ_CTRL].processed += credits;
   2355 
   2356 # if USE_GTS
   2357     if (flags & F_RSPD_TXQ1_GTS)
   2358         clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
   2359 # endif
   2360     credits = G_RSPD_TXQ1_CR(flags);
   2361     if (credits)
   2362         qs->txq[TXQ_OFLD].processed += credits;
   2363 }
   2364 
   2365 static void
   2366 check_ring_db(adapter_t *adap, struct sge_qset *qs,
   2367     unsigned int sleeping)
   2368 {
   2369     ;
   2370 }
   2371 
   2372 /**
   2373  *  process_responses - process responses from an SGE response queue
   2374  *  @adap: the adapter
   2375  *  @qs: the queue set to which the response queue belongs
   2376  *  @budget: how many responses can be processed in this round
   2377  *
   2378  *  Process responses from an SGE response queue up to the supplied budget.
   2379  *  Responses include received packets as well as credits and other events
   2380  *  for the queues that belong to the response queue's queue set.
   2381  *  A negative budget is effectively unlimited.
   2382  *
   2383  *  Additionally choose the interrupt holdoff time for the next interrupt
   2384  *  on this queue.  If the system is under memory shortage use a fairly
   2385  *  long delay to help recovery.
   2386  */
   2387 static int
   2388 process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
   2389 {
   2390     struct sge_rspq *rspq = &qs->rspq;
   2391     struct rsp_desc *r = &rspq->desc[rspq->cidx];
   2392     int budget_left = budget;
   2393     unsigned int sleeping = 0;
   2394     int lro = qs->lro.enabled;
   2395 #ifdef DEBUG
   2396     static int last_holdoff = 0;
   2397     if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) {
   2398         printf("next_holdoff=%d\n", rspq->holdoff_tmr);
   2399         last_holdoff = rspq->holdoff_tmr;
   2400     }
   2401 #endif
   2402     rspq->next_holdoff = rspq->holdoff_tmr;
   2403 
   2404     while (__predict_true(budget_left && is_new_response(r, rspq))) {
   2405         int eth, eop = 0, ethpad = 0;
   2406         uint32_t flags = ntohl(r->flags);
   2407         uint32_t rss_csum = *(const uint32_t *)r;
   2408         uint32_t rss_hash = r->rss_hdr.rss_hash_val;
   2409 
   2410         eth = (r->rss_hdr.opcode == CPL_RX_PKT);
   2411 
   2412         if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
   2413             /* XXX */
   2414         } else if  (flags & F_RSPD_IMM_DATA_VALID) {
   2415 #ifdef DISABLE_MBUF_IOVEC
   2416             if (cxgb_debug)
   2417                 printf("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n", r->rss_hdr.opcode, rspq->cidx);
   2418 
   2419             if(get_imm_packet(adap, r, &rspq->rspq_mh) == 0) {
   2420                 rspq->next_holdoff = NOMEM_INTR_DELAY;
   2421                 budget_left--;
   2422                 break;
   2423             } else {
   2424                 eop = 1;
   2425             }
   2426 #else
   2427             struct mbuf *m = NULL;
   2428 
   2429             if (rspq->rspq_mbuf == NULL)
   2430                 rspq->rspq_mbuf = m_gethdr(M_DONTWAIT, MT_DATA);
   2431                         else
   2432                 m = m_gethdr(M_DONTWAIT, MT_DATA);
   2433 
   2434             /*
   2435              * XXX revisit me
   2436              */
   2437             if (rspq->rspq_mbuf == NULL &&  m == NULL) {
   2438                 rspq->next_holdoff = NOMEM_INTR_DELAY;
   2439                 budget_left--;
   2440                 break;
   2441             }
   2442             if (get_imm_packet(adap, r, rspq->rspq_mbuf, m, flags))
   2443                 goto skip;
   2444             eop = 1;
   2445 #endif
   2446             rspq->imm_data++;
   2447         } else if (r->len_cq) {
   2448             int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
   2449 
   2450 #ifdef DISABLE_MBUF_IOVEC
   2451             struct mbuf *m;
   2452             m = m_gethdr(M_NOWAIT, MT_DATA);
   2453 
   2454             if (m == NULL) {
   2455                 log(LOG_WARNING, "failed to get mbuf for packet\n");
   2456                 break;
   2457             }
   2458 
   2459             eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r, m);
   2460 #else
   2461             if (rspq->rspq_mbuf == NULL)
   2462                 rspq->rspq_mbuf = m_gethdr(M_DONTWAIT, MT_DATA);
   2463             if (rspq->rspq_mbuf == NULL) {
   2464                 log(LOG_WARNING, "failed to get mbuf for packet\n");
   2465                 break;
   2466             }
   2467             eop = get_packet(adap, drop_thresh, qs, rspq->rspq_mbuf, r);
   2468 #endif
   2469             ethpad = 2;
   2470         } else {
   2471             DPRINTF("pure response\n");
   2472             rspq->pure_rsps++;
   2473         }
   2474 
   2475         if (flags & RSPD_CTRL_MASK) {
   2476             sleeping |= flags & RSPD_GTS_MASK;
   2477             handle_rsp_cntrl_info(qs, flags);
   2478         }
   2479 #ifndef DISABLE_MBUF_IOVEC
   2480     skip:
   2481 #endif
   2482         r++;
   2483         if (__predict_false(++rspq->cidx == rspq->size)) {
   2484             rspq->cidx = 0;
   2485             rspq->gen ^= 1;
   2486             r = rspq->desc;
   2487         }
   2488 
   2489         prefetch(r);
   2490         if (++rspq->credits >= (rspq->size / 4)) {
   2491             refill_rspq(adap, rspq, rspq->credits);
   2492             rspq->credits = 0;
   2493         }
   2494 
   2495         if (eop) {
   2496             prefetch(mtod(rspq->rspq_mh.mh_head, uint8_t *));
   2497             prefetch(mtod(rspq->rspq_mh.mh_head, uint8_t *) + L1_CACHE_BYTES);
   2498 
   2499             if (eth) {
   2500                 t3_rx_eth_lro(adap, rspq, rspq->rspq_mh.mh_head, ethpad,
   2501                     rss_hash, rss_csum, lro);
   2502 
   2503                 rspq->rspq_mh.mh_head = NULL;
   2504             } else {
   2505                 rspq->rspq_mh.mh_head->m_pkthdr.csum_data = rss_csum;
   2506                 /*
   2507                  * XXX size mismatch
   2508                  */
   2509                 m_set_priority(rspq->rspq_mh.mh_head, rss_hash);
   2510             }
   2511             __refill_fl(adap, &qs->fl[0]);
   2512             __refill_fl(adap, &qs->fl[1]);
   2513 
   2514         }
   2515         --budget_left;
   2516     }
   2517 
   2518     t3_lro_flush(adap, qs, &qs->lro);
   2519 
   2520     if (sleeping)
   2521         check_ring_db(adap, qs, sleeping);
   2522 
   2523     smp_mb();  /* commit Tx queue processed updates */
   2524     if (__predict_false(qs->txq_stopped != 0))
   2525         restart_tx(qs);
   2526 
   2527     budget -= budget_left;
   2528     return (budget);
   2529 }
   2530 
   2531 /*
   2532  * A helper function that processes responses and issues GTS.
   2533  */
   2534 static __inline int
   2535 process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
   2536 {
   2537     int work;
   2538     static int last_holdoff = 0;
   2539 
   2540     work = process_responses(adap, rspq_to_qset(rq), -1);
   2541 
   2542     if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
   2543         printf("next_holdoff=%d\n", rq->next_holdoff);
   2544         last_holdoff = rq->next_holdoff;
   2545     }
   2546     if (work)
   2547         t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
   2548             V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
   2549     return work;
   2550 }
   2551 
   2552 
   2553 /*
   2554  * Interrupt handler for legacy INTx interrupts for T3B-based cards.
   2555  * Handles data events from SGE response queues as well as error and other
   2556  * async events as they all use the same interrupt pin.  We use one SGE
   2557  * response queue per port in this mode and protect all response queues with
   2558  * queue 0's lock.
   2559  */
   2560 int
   2561 t3b_intr(void *data)
   2562 {
   2563     uint32_t i, map;
   2564     adapter_t *adap = data;
   2565     struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
   2566 
   2567     t3_write_reg(adap, A_PL_CLI, 0);
   2568     map = t3_read_reg(adap, A_SG_DATA_INTR);
   2569 
   2570     if (!map)
   2571         return (FALSE);
   2572 
   2573     if (__predict_false(map & F_ERRINTR))
   2574         workqueue_enqueue(adap->slow_intr_task.wq, &adap->slow_intr_task.w, NULL);
   2575 
   2576     mtx_lock(&q0->lock);
   2577     for_each_port(adap, i)
   2578         if (map & (1 << i))
   2579             process_responses_gts(adap, &adap->sge.qs[i].rspq);
   2580     mtx_unlock(&q0->lock);
   2581 
   2582     return (TRUE);
   2583 }
   2584 
   2585 /*
   2586  * The MSI interrupt handler.  This needs to handle data events from SGE
   2587  * response queues as well as error and other async events as they all use
   2588  * the same MSI vector.  We use one SGE response queue per port in this mode
   2589  * and protect all response queues with queue 0's lock.
   2590  */
   2591 int
   2592 t3_intr_msi(void *data)
   2593 {
   2594     adapter_t *adap = data;
   2595     struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
   2596     int i, new_packets = 0;
   2597 
   2598     mtx_lock(&q0->lock);
   2599 
   2600     for_each_port(adap, i)
   2601         if (process_responses_gts(adap, &adap->sge.qs[i].rspq))
   2602             new_packets = 1;
   2603     mtx_unlock(&q0->lock);
   2604     if (new_packets == 0)
   2605         workqueue_enqueue(adap->slow_intr_task.wq, &adap->slow_intr_task.w, NULL);
   2606 
   2607     return (TRUE);
   2608 }
   2609 
   2610 int
   2611 t3_intr_msix(void *data)
   2612 {
   2613     struct sge_qset *qs = data;
   2614     adapter_t *adap = qs->port->adapter;
   2615     struct sge_rspq *rspq = &qs->rspq;
   2616 
   2617     mtx_lock(&rspq->lock);
   2618     if (process_responses_gts(adap, rspq) == 0)
   2619         rspq->unhandled_irqs++;
   2620     mtx_unlock(&rspq->lock);
   2621 
   2622     return (TRUE);
   2623 }
   2624 
   2625 /**
   2626  *  t3_get_desc - dump an SGE descriptor for debugging purposes
   2627  *  @qs: the queue set
   2628  *  @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
   2629  *  @idx: the descriptor index in the queue
   2630  *  @data: where to dump the descriptor contents
   2631  *
   2632  *  Dumps the contents of a HW descriptor of an SGE queue.  Returns the
   2633  *  size of the descriptor.
   2634  */
   2635 int
   2636 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
   2637         unsigned char *data)
   2638 {
   2639     if (qnum >= 6)
   2640         return (EINVAL);
   2641 
   2642     if (qnum < 3) {
   2643         if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
   2644             return -EINVAL;
   2645         memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
   2646         return sizeof(struct tx_desc);
   2647     }
   2648 
   2649     if (qnum == 3) {
   2650         if (!qs->rspq.desc || idx >= qs->rspq.size)
   2651             return (EINVAL);
   2652         memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
   2653         return sizeof(struct rsp_desc);
   2654     }
   2655 
   2656     qnum -= 4;
   2657     if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
   2658         return (EINVAL);
   2659     memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
   2660     return sizeof(struct rx_desc);
   2661 }
   2662