1 /************************************************************************** 2 3 Copyright (c) 2007, Chelsio Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Chelsio Corporation nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 ***************************************************************************/ 29 30 #include <sys/cdefs.h> 31 __KERNEL_RCSID(0, "$NetBSD: cxgb_sge.c,v 1.8 2025/05/28 06:06:53 andvar Exp $"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/kernel.h> 36 #include <sys/conf.h> 37 #include <sys/bus.h> 38 #include <sys/queue.h> 39 #include <sys/sysctl.h> 40 41 #include <sys/proc.h> 42 #include <sys/sched.h> 43 #include <sys/systm.h> 44 45 #include <netinet/in_systm.h> 46 #include <netinet/in.h> 47 #include <netinet/ip.h> 48 #include <netinet/tcp.h> 49 50 #include <dev/pci/pcireg.h> 51 #include <dev/pci/pcivar.h> 52 53 #ifdef CONFIG_DEFINED 54 #include <cxgb_include.h> 55 #else 56 #include <dev/pci/cxgb/cxgb_include.h> 57 #endif 58 59 uint32_t collapse_free = 0; 60 uint32_t mb_free_vec_free = 0; 61 int txq_fills = 0; 62 int collapse_mbufs = 0; 63 static int bogus_imm = 0; 64 #ifndef DISABLE_MBUF_IOVEC 65 static int recycle_enable = 1; 66 #endif 67 68 #define USE_GTS 0 69 70 #define SGE_RX_SM_BUF_SIZE 1536 71 #define SGE_RX_DROP_THRES 16 72 #define SGE_RX_COPY_THRES 128 73 74 /* 75 * Period of the Tx buffer reclaim timer. This timer does not need to run 76 * frequently as Tx buffers are usually reclaimed by new Tx packets. 77 */ 78 #define TX_RECLAIM_PERIOD (hz >> 1) 79 80 /* 81 * work request size in bytes 82 */ 83 #define WR_LEN (WR_FLITS * 8) 84 85 /* 86 * Values for sge_txq.flags 87 */ 88 enum { 89 TXQ_RUNNING = 1 << 0, /* fetch engine is running */ 90 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */ 91 }; 92 93 struct tx_desc { 94 uint64_t flit[TX_DESC_FLITS]; 95 } __packed; 96 97 struct rx_desc { 98 uint32_t addr_lo; 99 uint32_t len_gen; 100 uint32_t gen2; 101 uint32_t addr_hi; 102 } __packed; 103 104 struct rsp_desc { /* response queue descriptor */ 105 struct rss_header rss_hdr; 106 uint32_t flags; 107 uint32_t len_cq; 108 uint8_t imm_data[47]; 109 uint8_t intr_gen; 110 } __packed; 111 112 #define RX_SW_DESC_MAP_CREATED (1 << 0) 113 #define TX_SW_DESC_MAP_CREATED (1 << 1) 114 #define RX_SW_DESC_INUSE (1 << 3) 115 #define TX_SW_DESC_MAPPED (1 << 4) 116 117 #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0) 118 #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP) 119 #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP) 120 #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP) 121 122 struct tx_sw_desc { /* SW state per Tx descriptor */ 123 struct mbuf *m; 124 bus_dma_segment_t segs[1]; 125 bus_dmamap_t map; 126 int flags; 127 }; 128 129 struct rx_sw_desc { /* SW state per Rx descriptor */ 130 void *cl; 131 bus_dmamap_t map; 132 int flags; 133 }; 134 135 struct txq_state { 136 unsigned int compl; 137 unsigned int gen; 138 unsigned int pidx; 139 }; 140 141 /* 142 * Maps a number of flits to the number of Tx descriptors that can hold them. 143 * The formula is 144 * 145 * desc = 1 + (flits - 2) / (WR_FLITS - 1). 146 * 147 * HW allows up to 4 descriptors to be combined into a WR. 148 */ 149 static uint8_t flit_desc_map[] = { 150 0, 151 #if SGE_NUM_GENBITS == 1 152 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 153 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 154 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 155 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 156 #elif SGE_NUM_GENBITS == 2 157 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 158 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 159 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 160 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 161 #else 162 # error "SGE_NUM_GENBITS must be 1 or 2" 163 #endif 164 }; 165 166 167 static int lro_default = 0; 168 int cxgb_debug = 0; 169 170 static void t3_free_qset(adapter_t *sc, struct sge_qset *q); 171 static void sge_timer_cb(void *arg); 172 static void sge_timer_reclaim(struct work *wk, void *arg); 173 static void sge_txq_reclaim_handler(struct work *wk, void *arg); 174 static int free_tx_desc(struct sge_txq *q, int n, struct mbuf **m_vec); 175 176 /** 177 * reclaim_completed_tx - reclaims completed Tx descriptors 178 * @adapter: the adapter 179 * @q: the Tx queue to reclaim completed descriptors from 180 * 181 * Reclaims Tx descriptors that the SGE has indicated it has processed, 182 * and frees the associated buffers if possible. Called with the Tx 183 * queue's lock held. 184 */ 185 static __inline int 186 reclaim_completed_tx(struct sge_txq *q, int nbufs, struct mbuf **mvec) 187 { 188 int reclaimed, reclaim = desc_reclaimable(q); 189 int n = 0; 190 191 mtx_assert(&q->lock, MA_OWNED); 192 if (reclaim > 0) { 193 n = free_tx_desc(q, uimin(reclaim, nbufs), mvec); 194 reclaimed = uimin(reclaim, nbufs); 195 q->cleaned += reclaimed; 196 q->in_use -= reclaimed; 197 } 198 return (n); 199 } 200 201 /** 202 * should_restart_tx - are there enough resources to restart a Tx queue? 203 * @q: the Tx queue 204 * 205 * Checks if there are enough descriptors to restart a suspended Tx queue. 206 */ 207 static __inline int 208 should_restart_tx(const struct sge_txq *q) 209 { 210 unsigned int r = q->processed - q->cleaned; 211 212 return q->in_use - r < (q->size >> 1); 213 } 214 215 /** 216 * t3_sge_init - initialize SGE 217 * @adap: the adapter 218 * @p: the SGE parameters 219 * 220 * Performs SGE initialization needed every time after a chip reset. 221 * We do not initialize any of the queue sets here, instead the driver 222 * top-level must request those individually. We also do not enable DMA 223 * here, that should be done after the queues have been set up. 224 */ 225 void 226 t3_sge_init(adapter_t *adap, struct sge_params *p) 227 { 228 u_int ctrl, ups; 229 230 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */ 231 232 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL | 233 F_CQCRDTCTRL | 234 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS | 235 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING; 236 #if SGE_NUM_GENBITS == 1 237 ctrl |= F_EGRGENCTRL; 238 #endif 239 if (adap->params.rev > 0) { 240 if (!(adap->flags & (USING_MSIX | USING_MSI))) 241 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ; 242 ctrl |= F_CQCRDTCTRL | F_AVOIDCQOVFL; 243 } 244 t3_write_reg(adap, A_SG_CONTROL, ctrl); 245 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) | 246 V_LORCQDRBTHRSH(512)); 247 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10); 248 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) | 249 V_TIMEOUT(200 * core_ticks_per_usec(adap))); 250 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 1000); 251 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256); 252 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000); 253 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256); 254 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff)); 255 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024); 256 } 257 258 259 /** 260 * sgl_len - calculates the size of an SGL of the given capacity 261 * @n: the number of SGL entries 262 * 263 * Calculates the number of flits needed for a scatter/gather list that 264 * can hold the given number of entries. 265 */ 266 static __inline unsigned int 267 sgl_len(unsigned int n) 268 { 269 return ((3 * n) / 2 + (n & 1)); 270 } 271 272 /** 273 * get_imm_packet - return the next ingress packet buffer from a response 274 * @resp: the response descriptor containing the packet data 275 * 276 * Return a packet containing the immediate data of the given response. 277 */ 278 #ifdef DISABLE_MBUF_IOVEC 279 static __inline int 280 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct t3_mbuf_hdr *mh) 281 { 282 struct mbuf *m; 283 int len; 284 uint32_t flags = ntohl(resp->flags); 285 uint8_t sopeop = G_RSPD_SOP_EOP(flags); 286 287 /* 288 * would be a firmware bug 289 */ 290 if (sopeop == RSPQ_NSOP_NEOP || sopeop == RSPQ_SOP) 291 return (0); 292 293 m = m_gethdr(M_NOWAIT, MT_DATA); 294 len = G_RSPD_LEN(ntohl(resp->len_cq)); 295 296 if (m) { 297 m_align(m, IMMED_PKT_SIZE); 298 memcpy(m->m_data, resp->imm_data, IMMED_PKT_SIZE); 299 m->m_len = len; 300 301 switch (sopeop) { 302 case RSPQ_SOP_EOP: 303 mh->mh_head = mh->mh_tail = m; 304 m->m_pkthdr.len = len; 305 m->m_flags |= M_PKTHDR; 306 break; 307 case RSPQ_EOP: 308 m->m_flags &= ~M_PKTHDR; 309 mh->mh_head->m_pkthdr.len += len; 310 mh->mh_tail->m_next = m; 311 mh->mh_tail = m; 312 break; 313 } 314 } 315 return (m != NULL); 316 } 317 318 #else 319 static int 320 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m, void *cl, uint32_t flags) 321 { 322 int len, error; 323 uint8_t sopeop = G_RSPD_SOP_EOP(flags); 324 325 /* 326 * would be a firmware bug 327 */ 328 len = G_RSPD_LEN(ntohl(resp->len_cq)); 329 if (sopeop == RSPQ_NSOP_NEOP || sopeop == RSPQ_SOP) { 330 if (cxgb_debug) 331 device_printf(sc->dev, "unexpected value sopeop=%d flags=0x%x len=%din get_imm_packet\n", sopeop, flags, len); 332 bogus_imm++; 333 return (EINVAL); 334 } 335 error = 0; 336 switch (sopeop) { 337 case RSPQ_SOP_EOP: 338 m->m_len = m->m_pkthdr.len = len; 339 memcpy(mtod(m, uint8_t *), resp->imm_data, len); 340 break; 341 case RSPQ_EOP: 342 memcpy(cl, resp->imm_data, len); 343 m_iovappend(m, cl, MSIZE, len, 0); 344 break; 345 default: 346 bogus_imm++; 347 error = EINVAL; 348 } 349 350 return (error); 351 } 352 #endif 353 354 static __inline u_int 355 flits_to_desc(u_int n) 356 { 357 return (flit_desc_map[n]); 358 } 359 360 void 361 t3_sge_err_intr_handler(adapter_t *adapter) 362 { 363 unsigned int v, status; 364 365 366 status = t3_read_reg(adapter, A_SG_INT_CAUSE); 367 368 if (status & F_RSPQCREDITOVERFOW) 369 CH_ALERT(adapter, "SGE response queue credit overflow\n"); 370 371 if (status & F_RSPQDISABLED) { 372 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS); 373 374 CH_ALERT(adapter, 375 "packet delivered to disabled response queue (0x%x)\n", 376 (v >> S_RSPQ0DISABLED) & 0xff); 377 } 378 379 t3_write_reg(adapter, A_SG_INT_CAUSE, status); 380 if (status & (F_RSPQCREDITOVERFOW | F_RSPQDISABLED)) 381 t3_fatal_err(adapter); 382 } 383 384 void 385 t3_sge_prep(adapter_t *adap, struct sge_params *p) 386 { 387 int i; 388 389 /* XXX Does ETHER_ALIGN need to be accounted for here? */ 390 p->max_pkt_size = MJUM16BYTES - sizeof(struct cpl_rx_data); 391 392 for (i = 0; i < SGE_QSETS; ++i) { 393 struct qset_params *q = p->qset + i; 394 395 q->polling = adap->params.rev > 0; 396 397 if (adap->params.nports > 2) 398 q->coalesce_nsecs = 50000; 399 else 400 q->coalesce_nsecs = 5000; 401 402 q->rspq_size = RSPQ_Q_SIZE; 403 q->fl_size = FL_Q_SIZE; 404 q->jumbo_size = JUMBO_Q_SIZE; 405 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE; 406 q->txq_size[TXQ_OFLD] = 1024; 407 q->txq_size[TXQ_CTRL] = 256; 408 q->cong_thres = 0; 409 } 410 } 411 412 int 413 t3_sge_alloc(adapter_t *sc) 414 { 415 /* The parent tag. */ 416 sc->parent_dmat = sc->pa.pa_dmat; 417 418 /* 419 * DMA tag for normal sized RX frames 420 */ 421 sc->rx_dmat = sc->pa.pa_dmat; 422 423 /* 424 * DMA tag for jumbo sized RX frames. 425 */ 426 sc->rx_jumbo_dmat = sc->pa.pa_dmat; 427 428 /* 429 * DMA tag for TX frames. 430 */ 431 sc->tx_dmat = sc->pa.pa_dmat; 432 433 return (0); 434 } 435 436 int 437 t3_sge_free(struct adapter * sc) 438 { 439 return (0); 440 } 441 442 void 443 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p) 444 { 445 446 qs->rspq.holdoff_tmr = uimax(p->coalesce_nsecs/100, 1U); 447 qs->rspq.polling = 0 /* p->polling */; 448 } 449 450 /** 451 * refill_fl - refill an SGE free-buffer list 452 * @sc: the controller softc 453 * @q: the free-list to refill 454 * @n: the number of new buffers to allocate 455 * 456 * (Re)populate an SGE free-buffer list with up to @n new packet buffers. 457 * The caller must assure that @n does not exceed the queue's capacity. 458 */ 459 static void 460 refill_fl(adapter_t *sc, struct sge_fl *q, int n) 461 { 462 struct rx_sw_desc *sd = &q->sdesc[q->pidx]; 463 struct rx_desc *d = &q->desc[q->pidx]; 464 void *cl; 465 int err; 466 467 while (n--) { 468 /* 469 * We only allocate a cluster, mbuf allocation happens after rx 470 */ 471 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) 472 { 473 err = bus_dmamap_create(sc->pa.pa_dmat, 474 q->buf_size, 1, q->buf_size, 0, 475 BUS_DMA_ALLOCNOW, &sd->map); 476 if (err != 0) 477 { 478 log(LOG_WARNING, "failure in refill_fl\n"); 479 return; 480 } 481 sd->flags |= RX_SW_DESC_MAP_CREATED; 482 } 483 cl = malloc(q->buf_size, M_DEVBUF, M_NOWAIT); 484 if (cl == NULL) 485 { 486 log(LOG_WARNING, "Failed to allocate cluster\n"); 487 break; 488 } 489 err = bus_dmamap_load(sc->pa.pa_dmat, sd->map, cl, q->buf_size, NULL, BUS_DMA_NOWAIT); 490 if (err) 491 { 492 log(LOG_WARNING, "failure in refill_fl\n"); 493 free(cl, M_DEVBUF); 494 return; 495 } 496 497 sd->flags |= RX_SW_DESC_INUSE; 498 sd->cl = cl; 499 d->addr_lo = htobe32(sd->map->dm_segs[0].ds_addr & 0xffffffff); 500 d->addr_hi = htobe32(((uint64_t)sd->map->dm_segs[0].ds_addr>>32) & 0xffffffff); 501 d->len_gen = htobe32(V_FLD_GEN1(q->gen)); 502 d->gen2 = htobe32(V_FLD_GEN2(q->gen)); 503 504 d++; 505 sd++; 506 507 if (++q->pidx == q->size) { 508 q->pidx = 0; 509 q->gen ^= 1; 510 sd = q->sdesc; 511 d = q->desc; 512 } 513 q->credits++; 514 } 515 516 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 517 } 518 519 520 /** 521 * free_rx_bufs - free the Rx buffers on an SGE free list 522 * @sc: the controle softc 523 * @q: the SGE free list to clean up 524 * 525 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from 526 * this queue should be stopped before calling this function. 527 */ 528 static void 529 free_rx_bufs(adapter_t *sc, struct sge_fl *q) 530 { 531 u_int cidx = q->cidx; 532 533 while (q->credits--) { 534 struct rx_sw_desc *d = &q->sdesc[cidx]; 535 536 if (d->flags & RX_SW_DESC_INUSE) { 537 bus_dmamap_unload(q->entry_tag, d->map); 538 bus_dmamap_destroy(q->entry_tag, d->map); 539 d->map = NULL; 540 free(d->cl, M_DEVBUF); 541 d->cl = NULL; 542 } 543 d->cl = NULL; 544 if (++cidx == q->size) 545 cidx = 0; 546 } 547 } 548 549 static __inline void 550 __refill_fl(adapter_t *adap, struct sge_fl *fl) 551 { 552 refill_fl(adap, fl, uimin(16U, fl->size - fl->credits)); 553 } 554 555 #ifndef DISABLE_MBUF_IOVEC 556 /** 557 * recycle_rx_buf - recycle a receive buffer 558 * @adapter: the adapter 559 * @q: the SGE free list 560 * @idx: index of buffer to recycle 561 * 562 * Recycles the specified buffer on the given free list by adding it at 563 * the next available slot on the list. 564 */ 565 static void 566 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx) 567 { 568 struct rx_desc *from = &q->desc[idx]; 569 struct rx_desc *to = &q->desc[q->pidx]; 570 571 q->sdesc[q->pidx] = q->sdesc[idx]; 572 to->addr_lo = from->addr_lo; // already big endian 573 to->addr_hi = from->addr_hi; // likewise 574 wmb(); 575 to->len_gen = htobe32(V_FLD_GEN1(q->gen)); 576 to->gen2 = htobe32(V_FLD_GEN2(q->gen)); 577 q->credits++; 578 579 if (++q->pidx == q->size) { 580 q->pidx = 0; 581 q->gen ^= 1; 582 } 583 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 584 } 585 #endif 586 587 static int 588 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size, 589 bus_addr_t *phys, 590 void *desc, void *sdesc, bus_dma_tag_t *tag, 591 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag) 592 { 593 size_t len = nelem * elem_size; 594 void *s = NULL; 595 void *p = NULL; 596 int err; 597 bus_dma_segment_t phys_seg; 598 599 int nsegs; 600 601 *tag = sc->pa.pa_dmat; 602 603 /* allocate wired physical memory for DMA descriptor array */ 604 err = bus_dmamem_alloc(*tag, len, PAGE_SIZE, 0, &phys_seg, 1, 605 &nsegs, BUS_DMA_NOWAIT); 606 if (err != 0) 607 { 608 device_printf(sc->dev, "Cannot allocate descriptor memory\n"); 609 return (ENOMEM); 610 } 611 *phys = phys_seg.ds_addr; 612 613 /* map physical address to kernel virtual address */ 614 err = bus_dmamem_map(*tag, &phys_seg, 1, len, &p, 615 BUS_DMA_NOWAIT|BUS_DMA_COHERENT); 616 if (err != 0) 617 { 618 device_printf(sc->dev, "Cannot map descriptor memory\n"); 619 return (ENOMEM); 620 } 621 622 memset(p, 0, len); 623 *(void **)desc = p; 624 625 if (sw_size) 626 { 627 len = nelem * sw_size; 628 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO); 629 *(void **)sdesc = s; 630 } 631 632 if (parent_entry_tag == NULL) 633 return (0); 634 *entry_tag = sc->pa.pa_dmat; 635 636 return (0); 637 } 638 639 static void 640 sge_slow_intr_handler(struct work *wk, void *arg) 641 { 642 adapter_t *sc = arg; 643 644 t3_slow_intr_handler(sc); 645 } 646 647 /** 648 * sge_timer_cb - perform periodic maintenance of an SGE qset 649 * @data: the SGE queue set to maintain 650 * 651 * Runs periodically from a timer to perform maintenance of an SGE queue 652 * set. It performs two tasks: 653 * 654 * a) Cleans up any completed Tx descriptors that may still be pending. 655 * Normal descriptor cleanup happens when new packets are added to a Tx 656 * queue so this timer is relatively infrequent and does any cleanup only 657 * if the Tx queue has not seen any new packets in a while. We make a 658 * best effort attempt to reclaim descriptors, in that we don't wait 659 * around if we cannot get a queue's lock (which most likely is because 660 * someone else is queueing new packets and so will also handle the clean 661 * up). Since control queues use immediate data exclusively we don't 662 * bother cleaning them up here. 663 * 664 * b) Replenishes Rx queues that have run out due to memory shortage. 665 * Normally new Rx buffers are added when existing ones are consumed but 666 * when out of memory a queue can become empty. We try to add only a few 667 * buffers here, the queue will be replenished fully as these new buffers 668 * are used up if memory shortage has subsided. 669 * 670 * c) Return coalesced response queue credits in case a response queue is 671 * starved. 672 * 673 * d) Ring doorbells for T304 tunnel queues since we have seen doorbell 674 * fifo overflows and the FW doesn't implement any recovery scheme yet. 675 */ 676 677 static void 678 sge_timer_cb(void *arg) 679 { 680 adapter_t *sc = arg; 681 struct port_info *p; 682 struct sge_qset *qs; 683 struct sge_txq *txq; 684 int i, j; 685 int reclaim_eth, reclaim_ofl, refill_rx; 686 687 for (i = 0; i < sc->params.nports; i++) 688 for (j = 0; j < sc->port[i].nqsets; j++) { 689 qs = &sc->sge.qs[i + j]; 690 txq = &qs->txq[0]; 691 reclaim_eth = txq[TXQ_ETH].processed - txq[TXQ_ETH].cleaned; 692 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned; 693 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || 694 (qs->fl[1].credits < qs->fl[1].size)); 695 if (reclaim_eth || reclaim_ofl || refill_rx) { 696 p = &sc->port[i]; 697 workqueue_enqueue(p->timer_reclaim_task.wq, &p->timer_reclaim_task.w, NULL); 698 break; 699 } 700 } 701 if (sc->params.nports > 2) { 702 int k; 703 704 for_each_port(sc, k) { 705 struct port_info *pi = &sc->port[k]; 706 707 t3_write_reg(sc, A_SG_KDOORBELL, 708 F_SELEGRCNTX | 709 (FW_TUNNEL_SGEEC_START + pi->first_qset)); 710 } 711 } 712 if (sc->open_device_map != 0) 713 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 714 } 715 716 /* 717 * This is meant to be a catch-all function to keep sge state private 718 * to sge.c 719 * 720 */ 721 int 722 t3_sge_init_adapter(adapter_t *sc) 723 { 724 callout_init(&sc->sge_timer_ch, 0); 725 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 726 sc->slow_intr_task.name = "sge_slow_intr"; 727 sc->slow_intr_task.func = sge_slow_intr_handler; 728 sc->slow_intr_task.context = sc; 729 kthread_create(PRI_NONE, 0, NULL, cxgb_make_task, &sc->slow_intr_task, NULL, "cxgb_make_task"); 730 return (0); 731 } 732 733 int 734 t3_sge_init_port(struct port_info *p) 735 { 736 p->timer_reclaim_task.name = "sge_timer_reclaim"; 737 p->timer_reclaim_task.func = sge_timer_reclaim; 738 p->timer_reclaim_task.context = p; 739 kthread_create(PRI_NONE, 0, NULL, cxgb_make_task, &p->timer_reclaim_task, NULL, "cxgb_make_task"); 740 741 return (0); 742 } 743 744 void 745 t3_sge_deinit_sw(adapter_t *sc) 746 { 747 callout_drain(&sc->sge_timer_ch); 748 } 749 750 /** 751 * refill_rspq - replenish an SGE response queue 752 * @adapter: the adapter 753 * @q: the response queue to replenish 754 * @credits: how many new responses to make available 755 * 756 * Replenishes a response queue by making the supplied number of responses 757 * available to HW. 758 */ 759 static __inline void 760 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits) 761 { 762 763 /* mbufs are allocated on demand when a rspq entry is processed. */ 764 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN, 765 V_RSPQ(q->cntxt_id) | V_CREDITS(credits)); 766 } 767 768 static __inline void 769 sge_txq_reclaim_(struct sge_txq *txq) 770 { 771 int reclaimable, i, n; 772 struct mbuf *m_vec[TX_CLEAN_MAX_DESC]; 773 struct port_info *p; 774 775 p = txq->port; 776 reclaim_more: 777 n = 0; 778 reclaimable = desc_reclaimable(txq); 779 if (reclaimable > 0 && mtx_trylock(&txq->lock)) { 780 n = reclaim_completed_tx(txq, TX_CLEAN_MAX_DESC, m_vec); 781 mtx_unlock(&txq->lock); 782 } 783 if (n == 0) 784 return; 785 786 for (i = 0; i < n; i++) { 787 m_freem_vec(m_vec[i]); 788 } 789 if (p && p->ifp->if_drv_flags & IFF_DRV_OACTIVE && 790 txq->size - txq->in_use >= TX_START_MAX_DESC) { 791 txq_fills++; 792 p->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 793 workqueue_enqueue(p->start_task.wq, &p->start_task.w, NULL); 794 } 795 796 if (n) 797 goto reclaim_more; 798 } 799 800 static void 801 sge_txq_reclaim_handler(struct work *wk, void *arg) 802 { 803 struct sge_txq *q = arg; 804 805 sge_txq_reclaim_(q); 806 } 807 808 static void 809 sge_timer_reclaim(struct work *wk, void *arg) 810 { 811 struct port_info *p = arg; 812 int i, nqsets = p->nqsets; 813 adapter_t *sc = p->adapter; 814 struct sge_qset *qs; 815 struct sge_txq *txq; 816 struct mtx *lock; 817 818 for (i = 0; i < nqsets; i++) { 819 qs = &sc->sge.qs[i]; 820 txq = &qs->txq[TXQ_ETH]; 821 sge_txq_reclaim_(txq); 822 823 txq = &qs->txq[TXQ_OFLD]; 824 sge_txq_reclaim_(txq); 825 826 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 827 &sc->sge.qs[0].rspq.lock; 828 829 if (mtx_trylock(lock)) { 830 /* XXX currently assume that we are *NOT* polling */ 831 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS); 832 833 if (qs->fl[0].credits < qs->fl[0].size - 16) 834 __refill_fl(sc, &qs->fl[0]); 835 if (qs->fl[1].credits < qs->fl[1].size - 16) 836 __refill_fl(sc, &qs->fl[1]); 837 838 if (status & (1 << qs->rspq.cntxt_id)) { 839 if (qs->rspq.credits) { 840 refill_rspq(sc, &qs->rspq, 1); 841 qs->rspq.credits--; 842 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 843 1 << qs->rspq.cntxt_id); 844 } 845 } 846 mtx_unlock(lock); 847 } 848 } 849 } 850 851 /** 852 * init_qset_cntxt - initialize an SGE queue set context info 853 * @qs: the queue set 854 * @id: the queue set id 855 * 856 * Initializes the TIDs and context ids for the queues of a queue set. 857 */ 858 static void 859 init_qset_cntxt(struct sge_qset *qs, u_int id) 860 { 861 862 qs->rspq.cntxt_id = id; 863 qs->fl[0].cntxt_id = 2 * id; 864 qs->fl[1].cntxt_id = 2 * id + 1; 865 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id; 866 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id; 867 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id; 868 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id; 869 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id; 870 } 871 872 873 static void 874 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs) 875 { 876 txq->in_use += ndesc; 877 /* 878 * XXX we don't handle stopping of queue 879 * presumably start handles this when we bump against the end 880 */ 881 txqs->gen = txq->gen; 882 txq->unacked += ndesc; 883 txqs->compl = (txq->unacked & 8) << (S_WR_COMPL - 3); 884 txq->unacked &= 7; 885 txqs->pidx = txq->pidx; 886 txq->pidx += ndesc; 887 888 if (txq->pidx >= txq->size) { 889 txq->pidx -= txq->size; 890 txq->gen ^= 1; 891 } 892 893 } 894 895 /** 896 * calc_tx_descs - calculate the number of Tx descriptors for a packet 897 * @m: the packet mbufs 898 * @nsegs: the number of segments 899 * 900 * Returns the number of Tx descriptors needed for the given Ethernet 901 * packet. Ethernet packets require addition of WR and CPL headers. 902 */ 903 static __inline unsigned int 904 calc_tx_descs(const struct mbuf *m, int nsegs) 905 { 906 unsigned int flits; 907 908 if (m->m_pkthdr.len <= WR_LEN - sizeof(struct cpl_tx_pkt)) 909 return 1; 910 911 flits = sgl_len(nsegs) + 2; 912 #ifdef TSO_SUPPORTED 913 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) 914 flits++; 915 #endif 916 return flits_to_desc(flits); 917 } 918 919 static unsigned int 920 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq, 921 struct tx_sw_desc *stx, bus_dma_segment_t *segs, int *nsegs) 922 { 923 struct mbuf *m0; 924 int err, pktlen; 925 int i, total_len; 926 927 m0 = *m; 928 pktlen = m0->m_pkthdr.len; 929 930 m0 = *m; 931 i = 0; 932 total_len = 0; 933 while (m0) 934 { 935 i++; 936 total_len += m0->m_len; 937 m0 = m0->m_next; 938 } 939 err = bus_dmamap_create(txq->entry_tag, total_len, TX_MAX_SEGS, total_len, 0, BUS_DMA_NOWAIT, &stx->map); 940 if (err) 941 return (err); 942 err = bus_dmamap_load_mbuf(txq->entry_tag, stx->map, *m, 0); 943 if (err) 944 return (err); 945 // feed out the physical mappings 946 *nsegs = stx->map->dm_nsegs; 947 for (i=0; i<*nsegs; i++) 948 { 949 segs[i] = stx->map->dm_segs[i]; 950 } 951 #ifdef DEBUG 952 if (err) { 953 int n = 0; 954 struct mbuf *mtmp = m0; 955 while(mtmp) { 956 n++; 957 mtmp = mtmp->m_next; 958 } 959 printf("map_mbufs: bus_dmamap_load_mbuf_sg failed with %d - pkthdr.len==%d nmbufs=%d\n", 960 err, m0->m_pkthdr.len, n); 961 } 962 #endif 963 if (err == EFBIG) { 964 /* Too many segments, try to defrag */ 965 m0 = m_defrag(m0, M_DONTWAIT); 966 if (m0 == NULL) { 967 m_freem(*m); 968 *m = NULL; 969 return (ENOBUFS); 970 } 971 *m = m0; 972 INT3; // XXXXXXXXXXXXXXXXXX like above! 973 } 974 975 if (err == ENOMEM) { 976 return (err); 977 } 978 979 if (err) { 980 if (cxgb_debug) 981 printf("map failure err=%d pktlen=%d\n", err, pktlen); 982 m_freem_vec(m0); 983 *m = NULL; 984 return (err); 985 } 986 987 bus_dmamap_sync(txq->entry_tag, stx->map, 0, pktlen, BUS_DMASYNC_PREWRITE); 988 stx->flags |= TX_SW_DESC_MAPPED; 989 990 return (0); 991 } 992 993 /** 994 * make_sgl - populate a scatter/gather list for a packet 995 * @sgp: the SGL to populate 996 * @segs: the packet dma segments 997 * @nsegs: the number of segments 998 * 999 * Generates a scatter/gather list for the buffers that make up a packet 1000 * and returns the SGL size in 8-byte words. The caller must size the SGL 1001 * appropriately. 1002 */ 1003 static __inline void 1004 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs) 1005 { 1006 int i, idx; 1007 1008 for (idx = 0, i = 0; i < nsegs; i++, idx ^= 1) { 1009 if (i && idx == 0) 1010 ++sgp; 1011 1012 sgp->len[idx] = htobe32(segs[i].ds_len); 1013 sgp->addr[idx] = htobe64(segs[i].ds_addr); 1014 } 1015 1016 if (idx) 1017 sgp->len[idx] = 0; 1018 } 1019 1020 /** 1021 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell 1022 * @adap: the adapter 1023 * @q: the Tx queue 1024 * 1025 * Ring the doorbel if a Tx queue is asleep. There is a natural race, 1026 * where the HW is going to sleep just after we checked, however, 1027 * then the interrupt handler will detect the outstanding TX packet 1028 * and ring the doorbell for us. 1029 * 1030 * When GTS is disabled we unconditionally ring the doorbell. 1031 */ 1032 static __inline void 1033 check_ring_tx_db(adapter_t *adap, struct sge_txq *q) 1034 { 1035 #if USE_GTS 1036 clear_bit(TXQ_LAST_PKT_DB, &q->flags); 1037 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) { 1038 set_bit(TXQ_LAST_PKT_DB, &q->flags); 1039 #ifdef T3_TRACE 1040 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d", 1041 q->cntxt_id); 1042 #endif 1043 t3_write_reg(adap, A_SG_KDOORBELL, 1044 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1045 } 1046 #else 1047 wmb(); /* write descriptors before telling HW */ 1048 t3_write_reg(adap, A_SG_KDOORBELL, 1049 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1050 #endif 1051 } 1052 1053 static __inline void 1054 wr_gen2(struct tx_desc *d, unsigned int gen) 1055 { 1056 #if SGE_NUM_GENBITS == 2 1057 d->flit[TX_DESC_FLITS - 1] = htobe64(gen); 1058 #endif 1059 } 1060 1061 1062 1063 /** 1064 * write_wr_hdr_sgl - write a WR header and, optionally, SGL 1065 * @ndesc: number of Tx descriptors spanned by the SGL 1066 * @txd: first Tx descriptor to be written 1067 * @txqs: txq state (generation and producer index) 1068 * @txq: the SGE Tx queue 1069 * @sgl: the SGL 1070 * @flits: number of flits to the start of the SGL in the first descriptor 1071 * @sgl_flits: the SGL size in flits 1072 * @wr_hi: top 32 bits of WR header based on WR type (big endian) 1073 * @wr_lo: low 32 bits of WR header based on WR type (big endian) 1074 * 1075 * Write a work request header and an associated SGL. If the SGL is 1076 * small enough to fit into one Tx descriptor it has already been written 1077 * and we just need to write the WR header. Otherwise we distribute the 1078 * SGL across the number of descriptors it spans. 1079 */ 1080 1081 static void 1082 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs, 1083 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits, 1084 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo) 1085 { 1086 1087 struct work_request_hdr *wrp = (struct work_request_hdr *)txd; 1088 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx]; 1089 1090 if (__predict_true(ndesc == 1)) { 1091 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1092 V_WR_SGLSFLT(flits)) | wr_hi; 1093 wmb(); 1094 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) | 1095 V_WR_GEN(txqs->gen)) | wr_lo; 1096 /* XXX gen? */ 1097 wr_gen2(txd, txqs->gen); 1098 } else { 1099 unsigned int ogen = txqs->gen; 1100 const uint64_t *fp = (const uint64_t *)sgl; 1101 struct work_request_hdr *wp = wrp; 1102 1103 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) | 1104 V_WR_SGLSFLT(flits)) | wr_hi; 1105 1106 while (sgl_flits) { 1107 unsigned int avail = WR_FLITS - flits; 1108 1109 if (avail > sgl_flits) 1110 avail = sgl_flits; 1111 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp)); 1112 sgl_flits -= avail; 1113 ndesc--; 1114 if (!sgl_flits) 1115 break; 1116 1117 fp += avail; 1118 txd++; 1119 txsd++; 1120 if (++txqs->pidx == txq->size) { 1121 txqs->pidx = 0; 1122 txqs->gen ^= 1; 1123 txd = txq->desc; 1124 txsd = txq->sdesc; 1125 } 1126 1127 /* 1128 * when the head of the mbuf chain 1129 * is freed all clusters will be freed 1130 * with it 1131 */ 1132 txsd->m = NULL; 1133 wrp = (struct work_request_hdr *)txd; 1134 wrp->wr_hi = htonl(V_WR_DATATYPE(1) | 1135 V_WR_SGLSFLT(1)) | wr_hi; 1136 wrp->wr_lo = htonl(V_WR_LEN(uimin(WR_FLITS, 1137 sgl_flits + 1)) | 1138 V_WR_GEN(txqs->gen)) | wr_lo; 1139 wr_gen2(txd, txqs->gen); 1140 flits = 1; 1141 } 1142 wrp->wr_hi |= htonl(F_WR_EOP); 1143 wmb(); 1144 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo; 1145 wr_gen2((struct tx_desc *)wp, ogen); 1146 } 1147 } 1148 1149 1150 /* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */ 1151 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20) 1152 1153 int 1154 t3_encap(struct port_info *p, struct mbuf **m, int *free_it) 1155 { 1156 adapter_t *sc; 1157 struct mbuf *m0; 1158 struct sge_qset *qs; 1159 struct sge_txq *txq; 1160 struct tx_sw_desc *stx; 1161 struct txq_state txqs; 1162 unsigned int ndesc, flits, cntrl, mlen; 1163 int err, nsegs, tso_info = 0; 1164 1165 struct work_request_hdr *wrp; 1166 struct tx_sw_desc *txsd; 1167 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1]; 1168 bus_dma_segment_t segs[TX_MAX_SEGS]; 1169 uint32_t wr_hi, wr_lo, sgl_flits; 1170 1171 struct tx_desc *txd; 1172 struct cpl_tx_pkt *cpl; 1173 1174 m0 = *m; 1175 sc = p->adapter; 1176 1177 DPRINTF("t3_encap port_id=%d qsidx=%d ", p->port_id, p->first_qset); 1178 1179 /* port_id=1 qsid=1 txpkt_intf=2 tx_chan=0 */ 1180 1181 qs = &sc->sge.qs[p->first_qset]; 1182 1183 txq = &qs->txq[TXQ_ETH]; 1184 stx = &txq->sdesc[txq->pidx]; 1185 txd = &txq->desc[txq->pidx]; 1186 cpl = (struct cpl_tx_pkt *)txd; 1187 mlen = m0->m_pkthdr.len; 1188 cpl->len = htonl(mlen | 0x80000000); 1189 1190 DPRINTF("mlen=%d txpkt_intf=%d tx_chan=%d\n", mlen, p->txpkt_intf, p->tx_chan); 1191 /* 1192 * XXX handle checksum, TSO, and VLAN here 1193 * 1194 */ 1195 cntrl = V_TXPKT_INTF(p->txpkt_intf); 1196 1197 /* 1198 * XXX need to add VLAN support for 6.x 1199 */ 1200 #ifdef VLAN_SUPPORTED 1201 if (vlan_has_tag(m0)) 1202 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(vlan_get_tag(m0)); 1203 if (m0->m_pkthdr.csum_flags & (CSUM_TSO)) 1204 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz); 1205 #endif 1206 if (tso_info) { 1207 int eth_type; 1208 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *) cpl; 1209 struct ip *ip; 1210 struct tcphdr *tcp; 1211 char *pkthdr, tmp[TCPPKTHDRSIZE]; /* is this too large for the stack? */ 1212 1213 txd->flit[2] = 0; 1214 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO); 1215 hdr->cntrl = htonl(cntrl); 1216 1217 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) { 1218 pkthdr = &tmp[0]; 1219 m_copydata(m0, 0, TCPPKTHDRSIZE, pkthdr); 1220 } else { 1221 pkthdr = mtod(m0, char *); 1222 } 1223 1224 #ifdef VLAN_SUPPORTED 1225 if (vlan_has_tag(m0)) { 1226 eth_type = CPL_ETH_II_VLAN; 1227 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN + 1228 ETHER_VLAN_ENCAP_LEN); 1229 } else { 1230 eth_type = CPL_ETH_II; 1231 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN); 1232 } 1233 #else 1234 eth_type = CPL_ETH_II; 1235 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN); 1236 #endif 1237 tcp = (struct tcphdr *)((uint8_t *)ip + 1238 sizeof(*ip)); 1239 1240 tso_info |= V_LSO_ETH_TYPE(eth_type) | 1241 V_LSO_IPHDR_WORDS(ip->ip_hl) | 1242 V_LSO_TCPHDR_WORDS(tcp->th_off); 1243 hdr->lso_info = htonl(tso_info); 1244 flits = 3; 1245 } else { 1246 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1247 cpl->cntrl = htonl(cntrl); 1248 1249 if (mlen <= WR_LEN - sizeof(*cpl)) { 1250 txq_prod(txq, 1, &txqs); 1251 txq->sdesc[txqs.pidx].m = NULL; 1252 1253 if (m0->m_len == m0->m_pkthdr.len) 1254 memcpy(&txd->flit[2], mtod(m0, uint8_t *), mlen); 1255 else 1256 m_copydata(m0, 0, mlen, (void *)&txd->flit[2]); 1257 1258 *free_it = 1; 1259 flits = (mlen + 7) / 8 + 2; 1260 cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1261 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1262 F_WR_SOP | F_WR_EOP | txqs.compl); 1263 wmb(); 1264 cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | 1265 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1266 1267 wr_gen2(txd, txqs.gen); 1268 check_ring_tx_db(sc, txq); 1269 return (0); 1270 } 1271 flits = 2; 1272 } 1273 1274 wrp = (struct work_request_hdr *)txd; 1275 1276 if ((err = busdma_map_mbufs(m, txq, stx, segs, &nsegs)) != 0) { 1277 return (err); 1278 } 1279 m0 = *m; 1280 ndesc = calc_tx_descs(m0, nsegs); 1281 1282 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl; 1283 make_sgl(sgp, segs, nsegs); 1284 1285 sgl_flits = sgl_len(nsegs); 1286 1287 DPRINTF("make_sgl success nsegs==%d ndesc==%d\n", nsegs, ndesc); 1288 txq_prod(txq, ndesc, &txqs); 1289 txsd = &txq->sdesc[txqs.pidx]; 1290 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1291 wr_lo = htonl(V_WR_TID(txq->token)); 1292 txsd->m = m0; 1293 m_set_priority(m0, txqs.pidx); 1294 1295 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo); 1296 check_ring_tx_db(p->adapter, txq); 1297 1298 return (0); 1299 } 1300 1301 1302 /** 1303 * write_imm - write a packet into a Tx descriptor as immediate data 1304 * @d: the Tx descriptor to write 1305 * @m: the packet 1306 * @len: the length of packet data to write as immediate data 1307 * @gen: the generation bit value to write 1308 * 1309 * Writes a packet as immediate data into a Tx descriptor. The packet 1310 * contains a work request at its beginning. We must write the packet 1311 * carefully so the SGE doesn't read accidentally before it's written in 1312 * its entirety. 1313 */ 1314 static __inline void 1315 write_imm(struct tx_desc *d, struct mbuf *m, 1316 unsigned int len, unsigned int gen) 1317 { 1318 struct work_request_hdr *from = mtod(m, struct work_request_hdr *); 1319 struct work_request_hdr *to = (struct work_request_hdr *)d; 1320 1321 memcpy(&to[1], &from[1], len - sizeof(*from)); 1322 to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP | 1323 V_WR_BCNTLFLT(len & 7)); 1324 wmb(); 1325 to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) | 1326 V_WR_LEN((len + 7) / 8)); 1327 wr_gen2(d, gen); 1328 m_freem(m); 1329 } 1330 1331 /** 1332 * check_desc_avail - check descriptor availability on a send queue 1333 * @adap: the adapter 1334 * @q: the TX queue 1335 * @m: the packet needing the descriptors 1336 * @ndesc: the number of Tx descriptors needed 1337 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL) 1338 * 1339 * Checks if the requested number of Tx descriptors is available on an 1340 * SGE send queue. If the queue is already suspended or not enough 1341 * descriptors are available the packet is queued for later transmission. 1342 * Must be called with the Tx queue locked. 1343 * 1344 * Returns 0 if enough descriptors are available, 1 if there aren't 1345 * enough descriptors and the packet has been queued, and 2 if the caller 1346 * needs to retry because there weren't enough descriptors at the 1347 * beginning of the call but some freed up in the mean time. 1348 */ 1349 static __inline int 1350 check_desc_avail(adapter_t *adap, struct sge_txq *q, 1351 struct mbuf *m, unsigned int ndesc, 1352 unsigned int qid) 1353 { 1354 /* 1355 * XXX We currently only use this for checking the control queue 1356 * the control queue is only used for binding qsets which happens 1357 * at init time so we are guaranteed enough descriptors 1358 */ 1359 if (__predict_false(!mbufq_empty(&q->sendq))) { 1360 addq_exit: mbufq_tail(&q->sendq, m); 1361 return 1; 1362 } 1363 if (__predict_false(q->size - q->in_use < ndesc)) { 1364 1365 struct sge_qset *qs = txq_to_qset(q, qid); 1366 1367 setbit(&qs->txq_stopped, qid); 1368 smp_mb(); 1369 1370 if (should_restart_tx(q) && 1371 test_and_clear_bit(qid, &qs->txq_stopped)) 1372 return 2; 1373 1374 q->stops++; 1375 goto addq_exit; 1376 } 1377 return 0; 1378 } 1379 1380 1381 /** 1382 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs 1383 * @q: the SGE control Tx queue 1384 * 1385 * This is a variant of reclaim_completed_tx() that is used for Tx queues 1386 * that send only immediate data (presently just the control queues) and 1387 * thus do not have any mbufs 1388 */ 1389 static __inline void 1390 reclaim_completed_tx_imm(struct sge_txq *q) 1391 { 1392 unsigned int reclaim = q->processed - q->cleaned; 1393 1394 mtx_assert(&q->lock, MA_OWNED); 1395 1396 q->in_use -= reclaim; 1397 q->cleaned += reclaim; 1398 } 1399 1400 static __inline int 1401 immediate(const struct mbuf *m) 1402 { 1403 return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ; 1404 } 1405 1406 /** 1407 * ctrl_xmit - send a packet through an SGE control Tx queue 1408 * @adap: the adapter 1409 * @q: the control queue 1410 * @m: the packet 1411 * 1412 * Send a packet through an SGE control Tx queue. Packets sent through 1413 * a control queue must fit entirely as immediate data in a single Tx 1414 * descriptor and have no page fragments. 1415 */ 1416 static int 1417 ctrl_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m) 1418 { 1419 int ret; 1420 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *); 1421 1422 if (__predict_false(!immediate(m))) { 1423 m_freem(m); 1424 return 0; 1425 } 1426 1427 wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP); 1428 wrp->wr_lo = htonl(V_WR_TID(q->token)); 1429 1430 mtx_lock(&q->lock); 1431 again: reclaim_completed_tx_imm(q); 1432 1433 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL); 1434 if (__predict_false(ret)) { 1435 if (ret == 1) { 1436 mtx_unlock(&q->lock); 1437 return (-1); 1438 } 1439 goto again; 1440 } 1441 1442 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1443 1444 q->in_use++; 1445 if (++q->pidx >= q->size) { 1446 q->pidx = 0; 1447 q->gen ^= 1; 1448 } 1449 mtx_unlock(&q->lock); 1450 wmb(); 1451 t3_write_reg(adap, A_SG_KDOORBELL, 1452 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1453 return (0); 1454 } 1455 1456 1457 /** 1458 * restart_ctrlq - restart a suspended control queue 1459 * @qs: the queue set containing the control queue 1460 * 1461 * Resumes transmission on a suspended Tx control queue. 1462 */ 1463 static void 1464 restart_ctrlq(struct work *wk, void *data) 1465 { 1466 struct mbuf *m; 1467 struct sge_qset *qs = (struct sge_qset *)data; 1468 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1469 adapter_t *adap = qs->port->adapter; 1470 1471 mtx_lock(&q->lock); 1472 again: reclaim_completed_tx_imm(q); 1473 1474 while (q->in_use < q->size && 1475 (m = mbufq_dequeue(&q->sendq)) != NULL) { 1476 1477 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1478 1479 if (++q->pidx >= q->size) { 1480 q->pidx = 0; 1481 q->gen ^= 1; 1482 } 1483 q->in_use++; 1484 } 1485 if (!mbufq_empty(&q->sendq)) { 1486 setbit(&qs->txq_stopped, TXQ_CTRL); 1487 smp_mb(); 1488 1489 if (should_restart_tx(q) && 1490 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) 1491 goto again; 1492 q->stops++; 1493 } 1494 mtx_unlock(&q->lock); 1495 t3_write_reg(adap, A_SG_KDOORBELL, 1496 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1497 } 1498 1499 1500 /* 1501 * Send a management message through control queue 0 1502 */ 1503 int 1504 t3_mgmt_tx(struct adapter *adap, struct mbuf *m) 1505 { 1506 return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], m); 1507 } 1508 1509 /** 1510 * free_qset - free the resources of an SGE queue set 1511 * @sc: the controller owning the queue set 1512 * @q: the queue set 1513 * 1514 * Release the HW and SW resources associated with an SGE queue set, such 1515 * as HW contexts, packet buffers, and descriptor rings. Traffic to the 1516 * queue set must be quiesced prior to calling this. 1517 */ 1518 static void 1519 t3_free_qset(adapter_t *sc, struct sge_qset *q) 1520 { 1521 int i; 1522 1523 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 1524 if (q->fl[i].desc) { 1525 mtx_lock(&sc->sge.reg_lock); 1526 t3_sge_disable_fl(sc, q->fl[i].cntxt_id); 1527 mtx_unlock(&sc->sge.reg_lock); 1528 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map); 1529 INT3; 1530 // bus_dmamem_free(q->fl[i].desc_tag, &q->fl[i].phys_addr, 1); 1531 // XXXXXXXXXXX destroy DMA tags???? 1532 } 1533 if (q->fl[i].sdesc) { 1534 free_rx_bufs(sc, &q->fl[i]); 1535 free(q->fl[i].sdesc, M_DEVBUF); 1536 } 1537 } 1538 1539 for (i = 0; i < SGE_TXQ_PER_SET; i++) { 1540 if (q->txq[i].desc) { 1541 mtx_lock(&sc->sge.reg_lock); 1542 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0); 1543 mtx_unlock(&sc->sge.reg_lock); 1544 bus_dmamap_unload(q->txq[i].desc_tag, 1545 q->txq[i].desc_map); 1546 INT3; 1547 // bus_dmamem_free(q->txq[i].desc_tag, &q->txq[i].phys_addr, 1); 1548 // XXXXXXXXXXX destroy DMA tags???? And the lock?!??! 1549 1550 } 1551 if (q->txq[i].sdesc) { 1552 free(q->txq[i].sdesc, M_DEVBUF); 1553 } 1554 } 1555 1556 if (q->rspq.desc) { 1557 mtx_lock(&sc->sge.reg_lock); 1558 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id); 1559 mtx_unlock(&sc->sge.reg_lock); 1560 1561 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map); 1562 INT3; 1563 // bus_dmamem_free(q->rspq.desc_tag, &q->rspq.phys_addr, 1); 1564 // XXXXXXXXXXX destroy DMA tags???? and the LOCK ?!?!? 1565 } 1566 1567 memset(q, 0, sizeof(*q)); 1568 } 1569 1570 /** 1571 * t3_free_sge_resources - free SGE resources 1572 * @sc: the adapter softc 1573 * 1574 * Frees resources used by the SGE queue sets. 1575 */ 1576 void 1577 t3_free_sge_resources(adapter_t *sc) 1578 { 1579 int i, nqsets; 1580 1581 for (nqsets = i = 0; i < (sc)->params.nports; i++) 1582 nqsets += sc->port[i].nqsets; 1583 1584 for (i = 0; i < nqsets; ++i) 1585 t3_free_qset(sc, &sc->sge.qs[i]); 1586 } 1587 1588 /** 1589 * t3_sge_start - enable SGE 1590 * @sc: the controller softc 1591 * 1592 * Enables the SGE for DMAs. This is the last step in starting packet 1593 * transfers. 1594 */ 1595 void 1596 t3_sge_start(adapter_t *sc) 1597 { 1598 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE); 1599 } 1600 1601 /** 1602 * t3_sge_stop - disable SGE operation 1603 * @sc: the adapter 1604 * 1605 * Disables the DMA engine. This can be called in emeregencies (e.g., 1606 * from error interrupts) or from normal process context. In the latter 1607 * case it also disables any pending queue restart tasklets. Note that 1608 * if it is called in interrupt context it cannot disable the restart 1609 * tasklets as it cannot wait, however the tasklets will have no effect 1610 * since the doorbells are disabled and the driver will call this again 1611 * later from process context, at which time the tasklets will be stopped 1612 * if they are still running. 1613 */ 1614 void 1615 t3_sge_stop(adapter_t *sc) 1616 { 1617 int i, nqsets; 1618 1619 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0); 1620 1621 for (nqsets = i = 0; i < (sc)->params.nports; i++) 1622 nqsets += sc->port[i].nqsets; 1623 1624 for (i = 0; i < nqsets; ++i) { 1625 } 1626 } 1627 1628 1629 /** 1630 * free_tx_desc - reclaims Tx descriptors and their buffers 1631 * @adapter: the adapter 1632 * @q: the Tx queue to reclaim descriptors from 1633 * @n: the number of descriptors to reclaim 1634 * 1635 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated 1636 * Tx buffers. Called with the Tx queue lock held. 1637 */ 1638 int 1639 free_tx_desc(struct sge_txq *q, int n, struct mbuf **m_vec) 1640 { 1641 struct tx_sw_desc *d; 1642 unsigned int cidx = q->cidx; 1643 int nbufs = 0; 1644 1645 #ifdef T3_TRACE 1646 T3_TRACE2(sc->tb[q->cntxt_id & 7], 1647 "reclaiming %u Tx descriptors at cidx %u", n, cidx); 1648 #endif 1649 d = &q->sdesc[cidx]; 1650 1651 while (n-- > 0) { 1652 DPRINTF("cidx=%d d=%p\n", cidx, d); 1653 if (d->m) { 1654 if (d->flags & TX_SW_DESC_MAPPED) { 1655 bus_dmamap_unload(q->entry_tag, d->map); 1656 bus_dmamap_destroy(q->entry_tag, d->map); 1657 d->flags &= ~TX_SW_DESC_MAPPED; 1658 } 1659 if (m_get_priority(d->m) == cidx) { 1660 m_vec[nbufs] = d->m; 1661 d->m = NULL; 1662 nbufs++; 1663 } else { 1664 printf("pri=%d cidx=%d\n", (int)m_get_priority(d->m), cidx); 1665 } 1666 } 1667 ++d; 1668 if (++cidx == q->size) { 1669 cidx = 0; 1670 d = q->sdesc; 1671 } 1672 } 1673 q->cidx = cidx; 1674 1675 return (nbufs); 1676 } 1677 1678 /** 1679 * is_new_response - check if a response is newly written 1680 * @r: the response descriptor 1681 * @q: the response queue 1682 * 1683 * Returns true if a response descriptor contains a yet unprocessed 1684 * response. 1685 */ 1686 static __inline int 1687 is_new_response(const struct rsp_desc *r, 1688 const struct sge_rspq *q) 1689 { 1690 return (r->intr_gen & F_RSPD_GEN2) == q->gen; 1691 } 1692 1693 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS) 1694 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \ 1695 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \ 1696 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \ 1697 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR)) 1698 1699 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */ 1700 #define NOMEM_INTR_DELAY 2500 1701 1702 /** 1703 * write_ofld_wr - write an offload work request 1704 * @adap: the adapter 1705 * @m: the packet to send 1706 * @q: the Tx queue 1707 * @pidx: index of the first Tx descriptor to write 1708 * @gen: the generation value to use 1709 * @ndesc: number of descriptors the packet will occupy 1710 * 1711 * Write an offload work request to send the supplied packet. The packet 1712 * data already carry the work request with most fields populated. 1713 */ 1714 static void 1715 write_ofld_wr(adapter_t *adap, struct mbuf *m, 1716 struct sge_txq *q, unsigned int pidx, 1717 unsigned int gen, unsigned int ndesc, 1718 bus_dma_segment_t *segs, unsigned int nsegs) 1719 { 1720 unsigned int sgl_flits, flits; 1721 struct work_request_hdr *from; 1722 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1]; 1723 struct tx_desc *d = &q->desc[pidx]; 1724 struct txq_state txqs; 1725 1726 if (immediate(m)) { 1727 q->sdesc[pidx].m = NULL; 1728 write_imm(d, m, m->m_len, gen); 1729 return; 1730 } 1731 1732 /* Only TX_DATA builds SGLs */ 1733 1734 from = mtod(m, struct work_request_hdr *); 1735 INT3; /// DEBUG this??? 1736 flits = 3; // XXXXXXXXXXXXXX 1737 1738 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl; 1739 1740 make_sgl(sgp, segs, nsegs); 1741 sgl_flits = sgl_len(nsegs); 1742 1743 txqs.gen = q->gen; 1744 txqs.pidx = q->pidx; 1745 txqs.compl = (q->unacked & 8) << (S_WR_COMPL - 3); 1746 write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits, 1747 from->wr_hi, from->wr_lo); 1748 } 1749 1750 /** 1751 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet 1752 * @m: the packet 1753 * 1754 * Returns the number of Tx descriptors needed for the given offload 1755 * packet. These packets are already fully constructed. 1756 */ 1757 static __inline unsigned int 1758 calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs) 1759 { 1760 unsigned int flits, cnt = 0; 1761 1762 1763 if (m->m_len <= WR_LEN) 1764 return 1; /* packet fits as immediate data */ 1765 1766 if (m->m_flags & M_IOVEC) 1767 cnt = mtomv(m)->mv_count; 1768 1769 INT3; // Debug this???? 1770 flits = 3; // XXXXXXXXX 1771 1772 return flits_to_desc(flits + sgl_len(cnt)); 1773 } 1774 1775 /** 1776 * ofld_xmit - send a packet through an offload queue 1777 * @adap: the adapter 1778 * @q: the Tx offload queue 1779 * @m: the packet 1780 * 1781 * Send an offload packet through an SGE offload queue. 1782 */ 1783 static int 1784 ofld_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m) 1785 { 1786 int ret, nsegs; 1787 unsigned int ndesc; 1788 unsigned int pidx, gen; 1789 struct mbuf *m_vec[TX_CLEAN_MAX_DESC]; 1790 bus_dma_segment_t segs[TX_MAX_SEGS]; 1791 int i, cleaned; 1792 struct tx_sw_desc *stx = &q->sdesc[q->pidx]; 1793 1794 mtx_lock(&q->lock); 1795 if ((ret = busdma_map_mbufs(&m, q, stx, segs, &nsegs)) != 0) { 1796 mtx_unlock(&q->lock); 1797 return (ret); 1798 } 1799 ndesc = calc_tx_descs_ofld(m, nsegs); 1800 again: cleaned = reclaim_completed_tx(q, TX_CLEAN_MAX_DESC, m_vec); 1801 1802 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD); 1803 if (__predict_false(ret)) { 1804 if (ret == 1) { 1805 m_set_priority(m, ndesc); /* save for restart */ 1806 mtx_unlock(&q->lock); 1807 return EINTR; 1808 } 1809 goto again; 1810 } 1811 1812 gen = q->gen; 1813 q->in_use += ndesc; 1814 pidx = q->pidx; 1815 q->pidx += ndesc; 1816 if (q->pidx >= q->size) { 1817 q->pidx -= q->size; 1818 q->gen ^= 1; 1819 } 1820 #ifdef T3_TRACE 1821 T3_TRACE5(adap->tb[q->cntxt_id & 7], 1822 "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u", 1823 ndesc, pidx, skb->len, skb->len - skb->data_len, 1824 skb_shinfo(skb)->nr_frags); 1825 #endif 1826 mtx_unlock(&q->lock); 1827 1828 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 1829 check_ring_tx_db(adap, q); 1830 1831 for (i = 0; i < cleaned; i++) { 1832 m_freem_vec(m_vec[i]); 1833 } 1834 return (0); 1835 } 1836 1837 /** 1838 * restart_offloadq - restart a suspended offload queue 1839 * @qs: the queue set containing the offload queue 1840 * 1841 * Resumes transmission on a suspended Tx offload queue. 1842 */ 1843 static void 1844 restart_offloadq(struct work *wk, void *data) 1845 { 1846 1847 struct mbuf *m; 1848 struct sge_qset *qs = data; 1849 struct sge_txq *q = &qs->txq[TXQ_OFLD]; 1850 adapter_t *adap = qs->port->adapter; 1851 struct mbuf *m_vec[TX_CLEAN_MAX_DESC]; 1852 bus_dma_segment_t segs[TX_MAX_SEGS]; 1853 int nsegs, i, cleaned; 1854 struct tx_sw_desc *stx = &q->sdesc[q->pidx]; 1855 1856 mtx_lock(&q->lock); 1857 again: cleaned = reclaim_completed_tx(q, TX_CLEAN_MAX_DESC, m_vec); 1858 1859 while ((m = mbufq_peek(&q->sendq)) != NULL) { 1860 unsigned int gen, pidx; 1861 unsigned int ndesc = m_get_priority(m); 1862 1863 if (__predict_false(q->size - q->in_use < ndesc)) { 1864 setbit(&qs->txq_stopped, TXQ_OFLD); 1865 smp_mb(); 1866 1867 if (should_restart_tx(q) && 1868 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) 1869 goto again; 1870 q->stops++; 1871 break; 1872 } 1873 1874 gen = q->gen; 1875 q->in_use += ndesc; 1876 pidx = q->pidx; 1877 q->pidx += ndesc; 1878 if (q->pidx >= q->size) { 1879 q->pidx -= q->size; 1880 q->gen ^= 1; 1881 } 1882 1883 (void)mbufq_dequeue(&q->sendq); 1884 busdma_map_mbufs(&m, q, stx, segs, &nsegs); 1885 mtx_unlock(&q->lock); 1886 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 1887 mtx_lock(&q->lock); 1888 } 1889 mtx_unlock(&q->lock); 1890 1891 #if USE_GTS 1892 set_bit(TXQ_RUNNING, &q->flags); 1893 set_bit(TXQ_LAST_PKT_DB, &q->flags); 1894 #endif 1895 t3_write_reg(adap, A_SG_KDOORBELL, 1896 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1897 1898 for (i = 0; i < cleaned; i++) { 1899 m_freem_vec(m_vec[i]); 1900 } 1901 } 1902 1903 /** 1904 * queue_set - return the queue set a packet should use 1905 * @m: the packet 1906 * 1907 * Maps a packet to the SGE queue set it should use. The desired queue 1908 * set is carried in bits 1-3 in the packet's priority. 1909 */ 1910 static __inline int 1911 queue_set(const struct mbuf *m) 1912 { 1913 return m_get_priority(m) >> 1; 1914 } 1915 1916 /** 1917 * is_ctrl_pkt - return whether an offload packet is a control packet 1918 * @m: the packet 1919 * 1920 * Determines whether an offload packet should use an OFLD or a CTRL 1921 * Tx queue. This is indicated by bit 0 in the packet's priority. 1922 */ 1923 static __inline int 1924 is_ctrl_pkt(const struct mbuf *m) 1925 { 1926 return m_get_priority(m) & 1; 1927 } 1928 1929 /** 1930 * t3_offload_tx - send an offload packet 1931 * @tdev: the offload device to send to 1932 * @m: the packet 1933 * 1934 * Sends an offload packet. We use the packet priority to select the 1935 * appropriate Tx queue as follows: bit 0 indicates whether the packet 1936 * should be sent as regular or control, bits 1-3 select the queue set. 1937 */ 1938 int 1939 t3_offload_tx(struct toedev *tdev, struct mbuf *m) 1940 { 1941 adapter_t *adap = tdev2adap(tdev); 1942 struct sge_qset *qs = &adap->sge.qs[queue_set(m)]; 1943 1944 if (__predict_false(is_ctrl_pkt(m))) 1945 return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], m); 1946 1947 return ofld_xmit(adap, &qs->txq[TXQ_OFLD], m); 1948 } 1949 1950 static void 1951 restart_tx(struct sge_qset *qs) 1952 { 1953 if (isset(&qs->txq_stopped, TXQ_OFLD) && 1954 should_restart_tx(&qs->txq[TXQ_OFLD]) && 1955 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) { 1956 qs->txq[TXQ_OFLD].restarts++; 1957 workqueue_enqueue(qs->txq[TXQ_OFLD].qresume_task.wq, &qs->txq[TXQ_OFLD].qresume_task.w, NULL); 1958 } 1959 if (isset(&qs->txq_stopped, TXQ_CTRL) && 1960 should_restart_tx(&qs->txq[TXQ_CTRL]) && 1961 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) { 1962 qs->txq[TXQ_CTRL].restarts++; 1963 workqueue_enqueue(qs->txq[TXQ_CTRL].qresume_task.wq, &qs->txq[TXQ_CTRL].qresume_task.w, NULL); 1964 } 1965 } 1966 1967 /** 1968 * t3_sge_alloc_qset - initialize an SGE queue set 1969 * @sc: the controller softc 1970 * @id: the queue set id 1971 * @nports: how many Ethernet ports will be using this queue set 1972 * @irq_vec_idx: the IRQ vector index for response queue interrupts 1973 * @p: configuration parameters for this queue set 1974 * @ntxq: number of Tx queues for the queue set 1975 * @pi: port info for queue set 1976 * 1977 * Allocate resources and initialize an SGE queue set. A queue set 1978 * comprises a response queue, two Rx free-buffer queues, and up to 3 1979 * Tx queues. The Tx queues are assigned roles in the order Ethernet 1980 * queue, offload queue, and control queue. 1981 */ 1982 int 1983 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, 1984 const struct qset_params *p, int ntxq, struct port_info *pi) 1985 { 1986 struct sge_qset *q = &sc->sge.qs[id]; 1987 int i, ret = 0; 1988 1989 init_qset_cntxt(q, id); 1990 1991 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc), 1992 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr, 1993 &q->fl[0].desc, &q->fl[0].sdesc, 1994 &q->fl[0].desc_tag, &q->fl[0].desc_map, 1995 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) { 1996 goto err; 1997 } 1998 1999 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc), 2000 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr, 2001 &q->fl[1].desc, &q->fl[1].sdesc, 2002 &q->fl[1].desc_tag, &q->fl[1].desc_map, 2003 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) { 2004 goto err; 2005 } 2006 2007 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0, 2008 &q->rspq.phys_addr, &q->rspq.desc, NULL, 2009 &q->rspq.desc_tag, &q->rspq.desc_map, 2010 NULL, NULL)) != 0) { 2011 goto err; 2012 } 2013 2014 for (i = 0; i < ntxq; ++i) { 2015 /* 2016 * The control queue always uses immediate data so does not 2017 * need to keep track of any mbufs. 2018 * XXX Placeholder for future TOE support. 2019 */ 2020 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc); 2021 2022 if ((ret = alloc_ring(sc, p->txq_size[i], 2023 sizeof(struct tx_desc), sz, 2024 &q->txq[i].phys_addr, &q->txq[i].desc, 2025 &q->txq[i].sdesc, &q->txq[i].desc_tag, 2026 &q->txq[i].desc_map, 2027 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) { 2028 goto err; 2029 } 2030 mbufq_init(&q->txq[i].sendq); 2031 q->txq[i].gen = 1; 2032 q->txq[i].size = p->txq_size[i]; 2033 snprintf(q->txq[i].lockbuf, TXQ_NAME_LEN, "t3 txq lock %d:%d:%d", 2034 0, irq_vec_idx, i); 2035 MTX_INIT(&q->txq[i].lock, q->txq[i].lockbuf, NULL, MTX_DEF); 2036 } 2037 2038 q->txq[TXQ_ETH].port = pi; 2039 2040 q->txq[TXQ_OFLD].qresume_task.name = "restart_offloadq"; 2041 q->txq[TXQ_OFLD].qresume_task.func = restart_offloadq; 2042 q->txq[TXQ_OFLD].qresume_task.context = q; 2043 kthread_create(PRI_NONE, 0, NULL, cxgb_make_task, &q->txq[TXQ_OFLD].qresume_task, NULL, "cxgb_make_task"); 2044 2045 q->txq[TXQ_CTRL].qresume_task.name = "restart_ctrlq"; 2046 q->txq[TXQ_CTRL].qresume_task.func = restart_ctrlq; 2047 q->txq[TXQ_CTRL].qresume_task.context = q; 2048 kthread_create(PRI_NONE, 0, NULL, cxgb_make_task, &q->txq[TXQ_CTRL].qresume_task, NULL, "cxgb_make_task"); 2049 2050 q->txq[TXQ_ETH].qreclaim_task.name = "sge_txq_reclaim_handler"; 2051 q->txq[TXQ_ETH].qreclaim_task.func = sge_txq_reclaim_handler; 2052 q->txq[TXQ_ETH].qreclaim_task.context = &q->txq[TXQ_ETH]; 2053 kthread_create(PRI_NONE, 0, NULL, cxgb_make_task, &q->txq[TXQ_ETH].qreclaim_task, NULL, "cxgb_make_task"); 2054 2055 q->txq[TXQ_OFLD].qreclaim_task.name = "sge_txq_reclaim_handler"; 2056 q->txq[TXQ_OFLD].qreclaim_task.func = sge_txq_reclaim_handler; 2057 q->txq[TXQ_OFLD].qreclaim_task.context = &q->txq[TXQ_OFLD]; 2058 kthread_create(PRI_NONE, 0, NULL, cxgb_make_task, &q->txq[TXQ_OFLD].qreclaim_task, NULL, "cxgb_make_task"); 2059 2060 q->fl[0].gen = q->fl[1].gen = 1; 2061 q->fl[0].size = p->fl_size; 2062 q->fl[1].size = p->jumbo_size; 2063 2064 q->rspq.gen = 1; 2065 q->rspq.cidx = 0; 2066 q->rspq.size = p->rspq_size; 2067 2068 q->txq[TXQ_ETH].stop_thres = nports * 2069 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3); 2070 2071 q->fl[0].buf_size = MCLBYTES; 2072 q->fl[1].buf_size = MJUMPAGESIZE; 2073 2074 q->lro.enabled = lro_default; 2075 2076 mtx_lock(&sc->sge.reg_lock); 2077 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx, 2078 q->rspq.phys_addr, q->rspq.size, 2079 q->fl[0].buf_size, 1, 0); 2080 if (ret) { 2081 printf("error %d from t3_sge_init_rspcntxt\n", ret); 2082 goto err_unlock; 2083 } 2084 2085 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 2086 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0, 2087 q->fl[i].phys_addr, q->fl[i].size, 2088 q->fl[i].buf_size, p->cong_thres, 1, 2089 0); 2090 if (ret) { 2091 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i); 2092 goto err_unlock; 2093 } 2094 } 2095 2096 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS, 2097 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr, 2098 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token, 2099 1, 0); 2100 if (ret) { 2101 printf("error %d from t3_sge_init_ecntxt\n", ret); 2102 goto err_unlock; 2103 } 2104 2105 if (ntxq > 1) { 2106 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id, 2107 USE_GTS, SGE_CNTXT_OFLD, id, 2108 q->txq[TXQ_OFLD].phys_addr, 2109 q->txq[TXQ_OFLD].size, 0, 1, 0); 2110 if (ret) { 2111 printf("error %d from t3_sge_init_ecntxt\n", ret); 2112 goto err_unlock; 2113 } 2114 } 2115 2116 if (ntxq > 2) { 2117 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0, 2118 SGE_CNTXT_CTRL, id, 2119 q->txq[TXQ_CTRL].phys_addr, 2120 q->txq[TXQ_CTRL].size, 2121 q->txq[TXQ_CTRL].token, 1, 0); 2122 if (ret) { 2123 printf("error %d from t3_sge_init_ecntxt\n", ret); 2124 goto err_unlock; 2125 } 2126 } 2127 2128 snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d", 2129 0, irq_vec_idx); 2130 MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF); 2131 2132 mtx_unlock(&sc->sge.reg_lock); 2133 t3_update_qset_coalesce(q, p); 2134 q->port = pi; 2135 2136 refill_fl(sc, &q->fl[0], q->fl[0].size); 2137 refill_fl(sc, &q->fl[1], q->fl[1].size); 2138 refill_rspq(sc, &q->rspq, q->rspq.size - 1); 2139 2140 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) | 2141 V_NEWTIMER(q->rspq.holdoff_tmr)); 2142 2143 return (0); 2144 2145 err_unlock: 2146 mtx_unlock(&sc->sge.reg_lock); 2147 err: 2148 t3_free_qset(sc, q); 2149 2150 return (ret); 2151 } 2152 2153 void 2154 t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad) 2155 { 2156 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad); 2157 struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]]; 2158 struct ifnet *ifp = pi->ifp; 2159 2160 DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff); 2161 2162 /* 2163 * XXX need to add VLAN support for 6.x 2164 */ 2165 #ifdef VLAN_SUPPORTED 2166 if (cpl->vlan_valid) { 2167 vlan_set_tag(ntohs(cpl->vlan)); 2168 } 2169 #endif 2170 2171 m_set_rcvif(m, ifp); 2172 m_explode(m); 2173 /* 2174 * adjust after conversion to mbuf chain 2175 */ 2176 m_adj(m, sizeof(*cpl) + ethpad); 2177 2178 if_percpuq_enqueue(ifp->if_percpuq, m); 2179 } 2180 2181 /** 2182 * get_packet - return the next ingress packet buffer from a free list 2183 * @adap: the adapter that received the packet 2184 * @drop_thres: # of remaining buffers before we start dropping packets 2185 * @qs: the qset that the SGE free list holding the packet belongs to 2186 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain 2187 * @r: response descriptor 2188 * 2189 * Get the next packet from a free list and complete setup of the 2190 * sk_buff. If the packet is small we make a copy and recycle the 2191 * original buffer, otherwise we use the original buffer itself. If a 2192 * positive drop threshold is supplied packets are dropped and their 2193 * buffers recycled if (a) the number of remaining buffers is under the 2194 * threshold and the packet is too big to copy, or (b) the packet should 2195 * be copied but there is no memory for the copy. 2196 */ 2197 #ifdef DISABLE_MBUF_IOVEC 2198 2199 static int 2200 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, 2201 struct t3_mbuf_hdr *mh, struct rsp_desc *r, struct mbuf *m) 2202 { 2203 2204 unsigned int len_cq = ntohl(r->len_cq); 2205 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; 2206 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; 2207 uint32_t len = G_RSPD_LEN(len_cq); 2208 uint32_t flags = ntohl(r->flags); 2209 uint8_t sopeop = G_RSPD_SOP_EOP(flags); 2210 int ret = 0; 2211 2212 prefetch(sd->cl); 2213 2214 fl->credits--; 2215 bus_dmamap_sync(fl->entry_tag, sd->map, 0, len, BUS_DMASYNC_POSTREAD); 2216 bus_dmamap_unload(fl->entry_tag, sd->map); 2217 2218 m->m_len = len; 2219 m_cljset(m, sd->cl, fl->type); 2220 2221 switch(sopeop) { 2222 case RSPQ_SOP_EOP: 2223 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m)); 2224 mh->mh_head = mh->mh_tail = m; 2225 m->m_pkthdr.len = len; 2226 m->m_flags |= M_PKTHDR; 2227 ret = 1; 2228 break; 2229 case RSPQ_NSOP_NEOP: 2230 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m)); 2231 m->m_flags &= ~M_PKTHDR; 2232 if (mh->mh_tail == NULL) { 2233 if (cxgb_debug) 2234 printf("discarding intermediate descriptor entry\n"); 2235 m_freem(m); 2236 break; 2237 } 2238 mh->mh_tail->m_next = m; 2239 mh->mh_tail = m; 2240 mh->mh_head->m_pkthdr.len += len; 2241 ret = 0; 2242 break; 2243 case RSPQ_SOP: 2244 DBG(DBG_RX, ("get_packet: SOP m %p\n", m)); 2245 m->m_pkthdr.len = len; 2246 mh->mh_head = mh->mh_tail = m; 2247 m->m_flags |= M_PKTHDR; 2248 ret = 0; 2249 break; 2250 case RSPQ_EOP: 2251 DBG(DBG_RX, ("get_packet: EOP m %p\n", m)); 2252 m->m_flags &= ~M_PKTHDR; 2253 mh->mh_head->m_pkthdr.len += len; 2254 mh->mh_tail->m_next = m; 2255 mh->mh_tail = m; 2256 ret = 1; 2257 break; 2258 } 2259 if (++fl->cidx == fl->size) 2260 fl->cidx = 0; 2261 2262 return (ret); 2263 } 2264 2265 #else 2266 static int 2267 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, 2268 struct mbuf *m, struct rsp_desc *r) 2269 { 2270 2271 unsigned int len_cq = ntohl(r->len_cq); 2272 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; 2273 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; 2274 uint32_t len = G_RSPD_LEN(len_cq); 2275 uint32_t flags = ntohl(r->flags); 2276 uint8_t sopeop = G_RSPD_SOP_EOP(flags); 2277 void *cl; 2278 int ret = 0; 2279 2280 prefetch(sd->cl); 2281 2282 fl->credits--; 2283 bus_dmamap_sync(fl->entry_tag, sd->map, 0, len, BUS_DMASYNC_POSTREAD); 2284 2285 if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) { 2286 cl = mtod(m, void *); 2287 memcpy(cl, sd->cl, len); 2288 recycle_rx_buf(adap, fl, fl->cidx); 2289 } else { 2290 cl = sd->cl; 2291 bus_dmamap_unload(fl->entry_tag, sd->map); 2292 } 2293 switch(sopeop) { 2294 case RSPQ_SOP_EOP: 2295 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m)); 2296 m->m_len = m->m_pkthdr.len = len; 2297 if (cl == sd->cl) 2298 m_cljset(m, cl, fl->type); 2299 ret = 1; 2300 goto done; 2301 break; 2302 case RSPQ_NSOP_NEOP: 2303 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m)); 2304 ret = 0; 2305 break; 2306 case RSPQ_SOP: 2307 DBG(DBG_RX, ("get_packet: SOP m %p\n", m)); 2308 m_iovinit(m); 2309 ret = 0; 2310 break; 2311 case RSPQ_EOP: 2312 DBG(DBG_RX, ("get_packet: EOP m %p\n", m)); 2313 ret = 1; 2314 break; 2315 } 2316 m_iovappend(m, cl, fl->buf_size, len, 0); 2317 2318 done: 2319 if (++fl->cidx == fl->size) 2320 fl->cidx = 0; 2321 2322 return (ret); 2323 } 2324 #endif 2325 /** 2326 * handle_rsp_cntrl_info - handles control information in a response 2327 * @qs: the queue set corresponding to the response 2328 * @flags: the response control flags 2329 * 2330 * Handles the control information of an SGE response, such as GTS 2331 * indications and completion credits for the queue set's Tx queues. 2332 * HW coalesces credits, we don't do any extra SW coalescing. 2333 */ 2334 static __inline void 2335 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags) 2336 { 2337 unsigned int credits; 2338 2339 #if USE_GTS 2340 if (flags & F_RSPD_TXQ0_GTS) 2341 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags); 2342 #endif 2343 credits = G_RSPD_TXQ0_CR(flags); 2344 if (credits) { 2345 qs->txq[TXQ_ETH].processed += credits; 2346 if (desc_reclaimable(&qs->txq[TXQ_ETH]) > TX_START_MAX_DESC) 2347 workqueue_enqueue(qs->port->timer_reclaim_task.wq, 2348 &qs->port->timer_reclaim_task.w, NULL); 2349 } 2350 2351 credits = G_RSPD_TXQ2_CR(flags); 2352 if (credits) 2353 qs->txq[TXQ_CTRL].processed += credits; 2354 2355 # if USE_GTS 2356 if (flags & F_RSPD_TXQ1_GTS) 2357 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags); 2358 # endif 2359 credits = G_RSPD_TXQ1_CR(flags); 2360 if (credits) 2361 qs->txq[TXQ_OFLD].processed += credits; 2362 } 2363 2364 static void 2365 check_ring_db(adapter_t *adap, struct sge_qset *qs, 2366 unsigned int sleeping) 2367 { 2368 ; 2369 } 2370 2371 /** 2372 * process_responses - process responses from an SGE response queue 2373 * @adap: the adapter 2374 * @qs: the queue set to which the response queue belongs 2375 * @budget: how many responses can be processed in this round 2376 * 2377 * Process responses from an SGE response queue up to the supplied budget. 2378 * Responses include received packets as well as credits and other events 2379 * for the queues that belong to the response queue's queue set. 2380 * A negative budget is effectively unlimited. 2381 * 2382 * Additionally choose the interrupt holdoff time for the next interrupt 2383 * on this queue. If the system is under memory shortage use a fairly 2384 * long delay to help recovery. 2385 */ 2386 static int 2387 process_responses(adapter_t *adap, struct sge_qset *qs, int budget) 2388 { 2389 struct sge_rspq *rspq = &qs->rspq; 2390 struct rsp_desc *r = &rspq->desc[rspq->cidx]; 2391 int budget_left = budget; 2392 unsigned int sleeping = 0; 2393 int lro = qs->lro.enabled; 2394 #ifdef DEBUG 2395 static int last_holdoff = 0; 2396 if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) { 2397 printf("next_holdoff=%d\n", rspq->holdoff_tmr); 2398 last_holdoff = rspq->holdoff_tmr; 2399 } 2400 #endif 2401 rspq->next_holdoff = rspq->holdoff_tmr; 2402 2403 while (__predict_true(budget_left && is_new_response(r, rspq))) { 2404 int eth, eop = 0, ethpad = 0; 2405 uint32_t flags = ntohl(r->flags); 2406 uint32_t rss_csum = *(const uint32_t *)r; 2407 uint32_t rss_hash = r->rss_hdr.rss_hash_val; 2408 2409 eth = (r->rss_hdr.opcode == CPL_RX_PKT); 2410 2411 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) { 2412 /* XXX */ 2413 } else if (flags & F_RSPD_IMM_DATA_VALID) { 2414 #ifdef DISABLE_MBUF_IOVEC 2415 if (cxgb_debug) 2416 printf("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n", r->rss_hdr.opcode, rspq->cidx); 2417 2418 if(get_imm_packet(adap, r, &rspq->rspq_mh) == 0) { 2419 rspq->next_holdoff = NOMEM_INTR_DELAY; 2420 budget_left--; 2421 break; 2422 } else { 2423 eop = 1; 2424 } 2425 #else 2426 struct mbuf *m = NULL; 2427 2428 if (rspq->rspq_mbuf == NULL) 2429 rspq->rspq_mbuf = m_gethdr(M_DONTWAIT, MT_DATA); 2430 else 2431 m = m_gethdr(M_DONTWAIT, MT_DATA); 2432 2433 /* 2434 * XXX revisit me 2435 */ 2436 if (rspq->rspq_mbuf == NULL && m == NULL) { 2437 rspq->next_holdoff = NOMEM_INTR_DELAY; 2438 budget_left--; 2439 break; 2440 } 2441 if (get_imm_packet(adap, r, rspq->rspq_mbuf, m, flags)) 2442 goto skip; 2443 eop = 1; 2444 #endif 2445 rspq->imm_data++; 2446 } else if (r->len_cq) { 2447 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0; 2448 2449 #ifdef DISABLE_MBUF_IOVEC 2450 struct mbuf *m; 2451 m = m_gethdr(M_NOWAIT, MT_DATA); 2452 2453 if (m == NULL) { 2454 log(LOG_WARNING, "failed to get mbuf for packet\n"); 2455 break; 2456 } 2457 2458 eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r, m); 2459 #else 2460 if (rspq->rspq_mbuf == NULL) 2461 rspq->rspq_mbuf = m_gethdr(M_DONTWAIT, MT_DATA); 2462 if (rspq->rspq_mbuf == NULL) { 2463 log(LOG_WARNING, "failed to get mbuf for packet\n"); 2464 break; 2465 } 2466 eop = get_packet(adap, drop_thresh, qs, rspq->rspq_mbuf, r); 2467 #endif 2468 ethpad = 2; 2469 } else { 2470 DPRINTF("pure response\n"); 2471 rspq->pure_rsps++; 2472 } 2473 2474 if (flags & RSPD_CTRL_MASK) { 2475 sleeping |= flags & RSPD_GTS_MASK; 2476 handle_rsp_cntrl_info(qs, flags); 2477 } 2478 #ifndef DISABLE_MBUF_IOVEC 2479 skip: 2480 #endif 2481 r++; 2482 if (__predict_false(++rspq->cidx == rspq->size)) { 2483 rspq->cidx = 0; 2484 rspq->gen ^= 1; 2485 r = rspq->desc; 2486 } 2487 2488 prefetch(r); 2489 if (++rspq->credits >= (rspq->size / 4)) { 2490 refill_rspq(adap, rspq, rspq->credits); 2491 rspq->credits = 0; 2492 } 2493 2494 if (eop) { 2495 prefetch(mtod(rspq->rspq_mh.mh_head, uint8_t *)); 2496 prefetch(mtod(rspq->rspq_mh.mh_head, uint8_t *) + L1_CACHE_BYTES); 2497 2498 if (eth) { 2499 t3_rx_eth_lro(adap, rspq, rspq->rspq_mh.mh_head, ethpad, 2500 rss_hash, rss_csum, lro); 2501 2502 rspq->rspq_mh.mh_head = NULL; 2503 } else { 2504 rspq->rspq_mh.mh_head->m_pkthdr.csum_data = rss_csum; 2505 /* 2506 * XXX size mismatch 2507 */ 2508 m_set_priority(rspq->rspq_mh.mh_head, rss_hash); 2509 } 2510 __refill_fl(adap, &qs->fl[0]); 2511 __refill_fl(adap, &qs->fl[1]); 2512 2513 } 2514 --budget_left; 2515 } 2516 2517 t3_lro_flush(adap, qs, &qs->lro); 2518 2519 if (sleeping) 2520 check_ring_db(adap, qs, sleeping); 2521 2522 smp_mb(); /* commit Tx queue processed updates */ 2523 if (__predict_false(qs->txq_stopped != 0)) 2524 restart_tx(qs); 2525 2526 budget -= budget_left; 2527 return (budget); 2528 } 2529 2530 /* 2531 * A helper function that processes responses and issues GTS. 2532 */ 2533 static __inline int 2534 process_responses_gts(adapter_t *adap, struct sge_rspq *rq) 2535 { 2536 int work; 2537 static int last_holdoff = 0; 2538 2539 work = process_responses(adap, rspq_to_qset(rq), -1); 2540 2541 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) { 2542 printf("next_holdoff=%d\n", rq->next_holdoff); 2543 last_holdoff = rq->next_holdoff; 2544 } 2545 if (work) 2546 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) | 2547 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx)); 2548 return work; 2549 } 2550 2551 2552 /* 2553 * Interrupt handler for legacy INTx interrupts for T3B-based cards. 2554 * Handles data events from SGE response queues as well as error and other 2555 * async events as they all use the same interrupt pin. We use one SGE 2556 * response queue per port in this mode and protect all response queues with 2557 * queue 0's lock. 2558 */ 2559 int 2560 t3b_intr(void *data) 2561 { 2562 uint32_t i, map; 2563 adapter_t *adap = data; 2564 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 2565 2566 t3_write_reg(adap, A_PL_CLI, 0); 2567 map = t3_read_reg(adap, A_SG_DATA_INTR); 2568 2569 if (!map) 2570 return (FALSE); 2571 2572 if (__predict_false(map & F_ERRINTR)) 2573 workqueue_enqueue(adap->slow_intr_task.wq, &adap->slow_intr_task.w, NULL); 2574 2575 mtx_lock(&q0->lock); 2576 for_each_port(adap, i) 2577 if (map & (1 << i)) 2578 process_responses_gts(adap, &adap->sge.qs[i].rspq); 2579 mtx_unlock(&q0->lock); 2580 2581 return (TRUE); 2582 } 2583 2584 /* 2585 * The MSI interrupt handler. This needs to handle data events from SGE 2586 * response queues as well as error and other async events as they all use 2587 * the same MSI vector. We use one SGE response queue per port in this mode 2588 * and protect all response queues with queue 0's lock. 2589 */ 2590 int 2591 t3_intr_msi(void *data) 2592 { 2593 adapter_t *adap = data; 2594 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 2595 int i, new_packets = 0; 2596 2597 mtx_lock(&q0->lock); 2598 2599 for_each_port(adap, i) 2600 if (process_responses_gts(adap, &adap->sge.qs[i].rspq)) 2601 new_packets = 1; 2602 mtx_unlock(&q0->lock); 2603 if (new_packets == 0) 2604 workqueue_enqueue(adap->slow_intr_task.wq, &adap->slow_intr_task.w, NULL); 2605 2606 return (TRUE); 2607 } 2608 2609 int 2610 t3_intr_msix(void *data) 2611 { 2612 struct sge_qset *qs = data; 2613 adapter_t *adap = qs->port->adapter; 2614 struct sge_rspq *rspq = &qs->rspq; 2615 2616 mtx_lock(&rspq->lock); 2617 if (process_responses_gts(adap, rspq) == 0) 2618 rspq->unhandled_irqs++; 2619 mtx_unlock(&rspq->lock); 2620 2621 return (TRUE); 2622 } 2623 2624 /** 2625 * t3_get_desc - dump an SGE descriptor for debugging purposes 2626 * @qs: the queue set 2627 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx) 2628 * @idx: the descriptor index in the queue 2629 * @data: where to dump the descriptor contents 2630 * 2631 * Dumps the contents of a HW descriptor of an SGE queue. Returns the 2632 * size of the descriptor. 2633 */ 2634 int 2635 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, 2636 unsigned char *data) 2637 { 2638 if (qnum >= 6) 2639 return (EINVAL); 2640 2641 if (qnum < 3) { 2642 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size) 2643 return -EINVAL; 2644 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc)); 2645 return sizeof(struct tx_desc); 2646 } 2647 2648 if (qnum == 3) { 2649 if (!qs->rspq.desc || idx >= qs->rspq.size) 2650 return (EINVAL); 2651 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc)); 2652 return sizeof(struct rsp_desc); 2653 } 2654 2655 qnum -= 4; 2656 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size) 2657 return (EINVAL); 2658 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc)); 2659 return sizeof(struct rx_desc); 2660 } 2661