lp_setup.c revision 4a49301e
1/************************************************************************** 2 * 3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * \brief Primitive rasterization/rendering (points, lines, triangles) 30 * 31 * \author Keith Whitwell <keith@tungstengraphics.com> 32 * \author Brian Paul 33 */ 34 35#include "lp_context.h" 36#include "lp_quad.h" 37#include "lp_setup.h" 38#include "lp_state.h" 39#include "draw/draw_context.h" 40#include "draw/draw_private.h" 41#include "draw/draw_vertex.h" 42#include "pipe/p_shader_tokens.h" 43#include "util/u_math.h" 44#include "util/u_memory.h" 45#include "lp_bld_debug.h" 46#include "lp_tile_cache.h" 47#include "lp_tile_soa.h" 48 49 50#define DEBUG_VERTS 0 51#define DEBUG_FRAGS 0 52 53/** 54 * Triangle edge info 55 */ 56struct edge { 57 float dx; /**< X(v1) - X(v0), used only during setup */ 58 float dy; /**< Y(v1) - Y(v0), used only during setup */ 59 float dxdy; /**< dx/dy */ 60 float sx, sy; /**< first sample point coord */ 61 int lines; /**< number of lines on this edge */ 62}; 63 64 65#define MAX_QUADS 16 66 67 68/** 69 * Triangle setup info (derived from draw_stage). 70 * Also used for line drawing (taking some liberties). 71 */ 72struct setup_context { 73 struct llvmpipe_context *llvmpipe; 74 75 /* Vertices are just an array of floats making up each attribute in 76 * turn. Currently fixed at 4 floats, but should change in time. 77 * Codegen will help cope with this. 78 */ 79 const float (*vmax)[4]; 80 const float (*vmid)[4]; 81 const float (*vmin)[4]; 82 const float (*vprovoke)[4]; 83 84 struct edge ebot; 85 struct edge etop; 86 struct edge emaj; 87 88 float oneoverarea; 89 int facing; 90 91 float pixel_offset; 92 93 struct quad_header quad[MAX_QUADS]; 94 struct quad_header *quad_ptrs[MAX_QUADS]; 95 unsigned count; 96 97 struct quad_interp_coef coef; 98 99 struct { 100 int left[2]; /**< [0] = row0, [1] = row1 */ 101 int right[2]; 102 int y; 103 } span; 104 105#if DEBUG_FRAGS 106 uint numFragsEmitted; /**< per primitive */ 107 uint numFragsWritten; /**< per primitive */ 108#endif 109 110 unsigned winding; /* which winding to cull */ 111}; 112 113 114 115/** 116 * Execute fragment shader for the four fragments in the quad. 117 */ 118ALIGN_STACK 119static void 120shade_quads(struct llvmpipe_context *llvmpipe, 121 struct quad_header *quads[], 122 unsigned nr) 123{ 124 struct lp_fragment_shader *fs = llvmpipe->fs; 125 struct quad_header *quad = quads[0]; 126 const unsigned x = quad->input.x0; 127 const unsigned y = quad->input.y0; 128 uint8_t *tile; 129 uint8_t *color; 130 void *depth; 131 uint32_t ALIGN16_ATTRIB mask[4][NUM_CHANNELS]; 132 unsigned chan_index; 133 unsigned q; 134 135 assert(fs->current); 136 if(!fs->current) 137 return; 138 139 /* Sanity checks */ 140 assert(nr * QUAD_SIZE == TILE_VECTOR_HEIGHT * TILE_VECTOR_WIDTH); 141 assert(x % TILE_VECTOR_WIDTH == 0); 142 assert(y % TILE_VECTOR_HEIGHT == 0); 143 for (q = 0; q < nr; ++q) { 144 assert(quads[q]->input.x0 == x + q*2); 145 assert(quads[q]->input.y0 == y); 146 } 147 148 /* mask */ 149 for (q = 0; q < 4; ++q) 150 for (chan_index = 0; chan_index < NUM_CHANNELS; ++chan_index) 151 mask[q][chan_index] = quads[q]->inout.mask & (1 << chan_index) ? ~0 : 0; 152 153 /* color buffer */ 154 if(llvmpipe->framebuffer.nr_cbufs >= 1 && 155 llvmpipe->framebuffer.cbufs[0]) { 156 tile = lp_get_cached_tile(llvmpipe->cbuf_cache[0], x, y); 157 color = &TILE_PIXEL(tile, x & (TILE_SIZE-1), y & (TILE_SIZE-1), 0); 158 } 159 else 160 color = NULL; 161 162 /* depth buffer */ 163 if(llvmpipe->zsbuf_map) { 164 assert((x % 2) == 0); 165 assert((y % 2) == 0); 166 depth = llvmpipe->zsbuf_map + 167 y*llvmpipe->zsbuf_transfer->stride + 168 2*x*llvmpipe->zsbuf_transfer->block.size; 169 } 170 else 171 depth = NULL; 172 173 /* XXX: This will most likely fail on 32bit x86 without -mstackrealign */ 174 assert(lp_check_alignment(mask, 16)); 175 176 assert(lp_check_alignment(depth, 16)); 177 assert(lp_check_alignment(color, 16)); 178 assert(lp_check_alignment(llvmpipe->jit_context.blend_color, 16)); 179 180 /* run shader */ 181 fs->current->jit_function( &llvmpipe->jit_context, 182 x, y, 183 quad->coef->a0, 184 quad->coef->dadx, 185 quad->coef->dady, 186 &mask[0][0], 187 color, 188 depth); 189} 190 191 192 193 194/** 195 * Do triangle cull test using tri determinant (sign indicates orientation) 196 * \return true if triangle is to be culled. 197 */ 198static INLINE boolean 199cull_tri(const struct setup_context *setup, float det) 200{ 201 if (det != 0) { 202 /* if (det < 0 then Z points toward camera and triangle is 203 * counter-clockwise winding. 204 */ 205 unsigned winding = (det < 0) ? PIPE_WINDING_CCW : PIPE_WINDING_CW; 206 207 if ((winding & setup->winding) == 0) 208 return FALSE; 209 } 210 211 /* Culled: 212 */ 213 return TRUE; 214} 215 216 217 218/** 219 * Clip setup->quad against the scissor/surface bounds. 220 */ 221static INLINE void 222quad_clip( struct setup_context *setup, struct quad_header *quad ) 223{ 224 const struct pipe_scissor_state *cliprect = &setup->llvmpipe->cliprect; 225 const int minx = (int) cliprect->minx; 226 const int maxx = (int) cliprect->maxx; 227 const int miny = (int) cliprect->miny; 228 const int maxy = (int) cliprect->maxy; 229 230 if (quad->input.x0 >= maxx || 231 quad->input.y0 >= maxy || 232 quad->input.x0 + 1 < minx || 233 quad->input.y0 + 1 < miny) { 234 /* totally clipped */ 235 quad->inout.mask = 0x0; 236 return; 237 } 238 if (quad->input.x0 < minx) 239 quad->inout.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); 240 if (quad->input.y0 < miny) 241 quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); 242 if (quad->input.x0 == maxx - 1) 243 quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); 244 if (quad->input.y0 == maxy - 1) 245 quad->inout.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); 246} 247 248 249 250/** 251 * Given an X or Y coordinate, return the block/quad coordinate that it 252 * belongs to. 253 */ 254static INLINE int block( int x ) 255{ 256 return x & ~(2-1); 257} 258 259static INLINE int block_x( int x ) 260{ 261 return x & ~(TILE_VECTOR_WIDTH - 1); 262} 263 264 265/** 266 * Emit a quad (pass to next stage) with clipping. 267 */ 268static INLINE void 269clip_emit_quad( struct setup_context *setup, struct quad_header *quad ) 270{ 271 quad_clip( setup, quad ); 272 273 if (quad->inout.mask) { 274 struct llvmpipe_context *lp = setup->llvmpipe; 275 276#if 1 277 /* XXX: The blender expects 4 quads. This is far from efficient, but 278 * until we codegenerate single-quad variants of the fragment pipeline 279 * we need this hack. */ 280 const unsigned nr_quads = TILE_VECTOR_HEIGHT*TILE_VECTOR_WIDTH/QUAD_SIZE; 281 struct quad_header quads[4]; 282 struct quad_header *quad_ptrs[4]; 283 int x0 = block_x(quad->input.x0); 284 unsigned i; 285 286 assert(nr_quads == 4); 287 288 for(i = 0; i < nr_quads; ++i) { 289 int x = x0 + 2*i; 290 if(x == quad->input.x0) 291 memcpy(&quads[i], quad, sizeof quads[i]); 292 else { 293 memset(&quads[i], 0, sizeof quads[i]); 294 quads[i].input.x0 = x; 295 quads[i].input.y0 = quad->input.y0; 296 quads[i].coef = quad->coef; 297 } 298 quad_ptrs[i] = &quads[i]; 299 } 300 301 shade_quads( lp, quad_ptrs, nr_quads ); 302#else 303 shade_quads( lp, &quad, 1 ); 304#endif 305 } 306} 307 308 309/** 310 * Render a horizontal span of quads 311 */ 312static void flush_spans( struct setup_context *setup ) 313{ 314 const int step = TILE_VECTOR_WIDTH; 315 const int xleft0 = setup->span.left[0]; 316 const int xleft1 = setup->span.left[1]; 317 const int xright0 = setup->span.right[0]; 318 const int xright1 = setup->span.right[1]; 319 320 321 int minleft = block_x(MIN2(xleft0, xleft1)); 322 int maxright = MAX2(xright0, xright1); 323 int x; 324 325 for (x = minleft; x < maxright; x += step) { 326 unsigned skip_left0 = CLAMP(xleft0 - x, 0, step); 327 unsigned skip_left1 = CLAMP(xleft1 - x, 0, step); 328 unsigned skip_right0 = CLAMP(x + step - xright0, 0, step); 329 unsigned skip_right1 = CLAMP(x + step - xright1, 0, step); 330 unsigned lx = x; 331 const unsigned nr_quads = TILE_VECTOR_HEIGHT*TILE_VECTOR_WIDTH/QUAD_SIZE; 332 unsigned q = 0; 333 334 unsigned skipmask_left0 = (1U << skip_left0) - 1U; 335 unsigned skipmask_left1 = (1U << skip_left1) - 1U; 336 337 /* These calculations fail when step == 32 and skip_right == 0. 338 */ 339 unsigned skipmask_right0 = ~0U << (unsigned)(step - skip_right0); 340 unsigned skipmask_right1 = ~0U << (unsigned)(step - skip_right1); 341 342 unsigned mask0 = ~skipmask_left0 & ~skipmask_right0; 343 unsigned mask1 = ~skipmask_left1 & ~skipmask_right1; 344 345 if (mask0 | mask1) { 346 for(q = 0; q < nr_quads; ++q) { 347 unsigned quadmask = (mask0 & 3) | ((mask1 & 3) << 2); 348 setup->quad[q].input.x0 = lx; 349 setup->quad[q].input.y0 = setup->span.y; 350 setup->quad[q].inout.mask = quadmask; 351 setup->quad_ptrs[q] = &setup->quad[q]; 352 mask0 >>= 2; 353 mask1 >>= 2; 354 lx += 2; 355 } 356 assert(!(mask0 | mask1)); 357 358 shade_quads(setup->llvmpipe, setup->quad_ptrs, nr_quads ); 359 } 360 } 361 362 363 setup->span.y = 0; 364 setup->span.right[0] = 0; 365 setup->span.right[1] = 0; 366 setup->span.left[0] = 1000000; /* greater than right[0] */ 367 setup->span.left[1] = 1000000; /* greater than right[1] */ 368} 369 370 371#if DEBUG_VERTS 372static void print_vertex(const struct setup_context *setup, 373 const float (*v)[4]) 374{ 375 int i; 376 debug_printf(" Vertex: (%p)\n", v); 377 for (i = 0; i < setup->quad[0].nr_attrs; i++) { 378 debug_printf(" %d: %f %f %f %f\n", i, 379 v[i][0], v[i][1], v[i][2], v[i][3]); 380 if (util_is_inf_or_nan(v[i][0])) { 381 debug_printf(" NaN!\n"); 382 } 383 } 384} 385#endif 386 387/** 388 * Sort the vertices from top to bottom order, setting up the triangle 389 * edge fields (ebot, emaj, etop). 390 * \return FALSE if coords are inf/nan (cull the tri), TRUE otherwise 391 */ 392static boolean setup_sort_vertices( struct setup_context *setup, 393 float det, 394 const float (*v0)[4], 395 const float (*v1)[4], 396 const float (*v2)[4] ) 397{ 398 setup->vprovoke = v2; 399 400 /* determine bottom to top order of vertices */ 401 { 402 float y0 = v0[0][1]; 403 float y1 = v1[0][1]; 404 float y2 = v2[0][1]; 405 if (y0 <= y1) { 406 if (y1 <= y2) { 407 /* y0<=y1<=y2 */ 408 setup->vmin = v0; 409 setup->vmid = v1; 410 setup->vmax = v2; 411 } 412 else if (y2 <= y0) { 413 /* y2<=y0<=y1 */ 414 setup->vmin = v2; 415 setup->vmid = v0; 416 setup->vmax = v1; 417 } 418 else { 419 /* y0<=y2<=y1 */ 420 setup->vmin = v0; 421 setup->vmid = v2; 422 setup->vmax = v1; 423 } 424 } 425 else { 426 if (y0 <= y2) { 427 /* y1<=y0<=y2 */ 428 setup->vmin = v1; 429 setup->vmid = v0; 430 setup->vmax = v2; 431 } 432 else if (y2 <= y1) { 433 /* y2<=y1<=y0 */ 434 setup->vmin = v2; 435 setup->vmid = v1; 436 setup->vmax = v0; 437 } 438 else { 439 /* y1<=y2<=y0 */ 440 setup->vmin = v1; 441 setup->vmid = v2; 442 setup->vmax = v0; 443 } 444 } 445 } 446 447 setup->ebot.dx = setup->vmid[0][0] - setup->vmin[0][0]; 448 setup->ebot.dy = setup->vmid[0][1] - setup->vmin[0][1]; 449 setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0]; 450 setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1]; 451 setup->etop.dx = setup->vmax[0][0] - setup->vmid[0][0]; 452 setup->etop.dy = setup->vmax[0][1] - setup->vmid[0][1]; 453 454 /* 455 * Compute triangle's area. Use 1/area to compute partial 456 * derivatives of attributes later. 457 * 458 * The area will be the same as prim->det, but the sign may be 459 * different depending on how the vertices get sorted above. 460 * 461 * To determine whether the primitive is front or back facing we 462 * use the prim->det value because its sign is correct. 463 */ 464 { 465 const float area = (setup->emaj.dx * setup->ebot.dy - 466 setup->ebot.dx * setup->emaj.dy); 467 468 setup->oneoverarea = 1.0f / area; 469 470 /* 471 debug_printf("%s one-over-area %f area %f det %f\n", 472 __FUNCTION__, setup->oneoverarea, area, det ); 473 */ 474 if (util_is_inf_or_nan(setup->oneoverarea)) 475 return FALSE; 476 } 477 478 /* We need to know if this is a front or back-facing triangle for: 479 * - the GLSL gl_FrontFacing fragment attribute (bool) 480 * - two-sided stencil test 481 */ 482 setup->facing = 483 ((det > 0.0) ^ 484 (setup->llvmpipe->rasterizer->front_winding == PIPE_WINDING_CW)); 485 486 /* Prepare pixel offset for rasterisation: 487 * - pixel center (0.5, 0.5) for GL, or 488 * - assume (0.0, 0.0) for other APIs. 489 */ 490 if (setup->llvmpipe->rasterizer->gl_rasterization_rules) { 491 setup->pixel_offset = 0.5f; 492 } else { 493 setup->pixel_offset = 0.0f; 494 } 495 496 return TRUE; 497} 498 499 500/** 501 * Compute a0, dadx and dady for a linearly interpolated coefficient, 502 * for a triangle. 503 */ 504static void tri_pos_coeff( struct setup_context *setup, 505 uint vertSlot, unsigned i) 506{ 507 float botda = setup->vmid[vertSlot][i] - setup->vmin[vertSlot][i]; 508 float majda = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i]; 509 float a = setup->ebot.dy * majda - botda * setup->emaj.dy; 510 float b = setup->emaj.dx * botda - majda * setup->ebot.dx; 511 float dadx = a * setup->oneoverarea; 512 float dady = b * setup->oneoverarea; 513 514 assert(i <= 3); 515 516 setup->coef.dadx[0][i] = dadx; 517 setup->coef.dady[0][i] = dady; 518 519 /* calculate a0 as the value which would be sampled for the 520 * fragment at (0,0), taking into account that we want to sample at 521 * pixel centers, in other words (pixel_offset, pixel_offset). 522 * 523 * this is neat but unfortunately not a good way to do things for 524 * triangles with very large values of dadx or dady as it will 525 * result in the subtraction and re-addition from a0 of a very 526 * large number, which means we'll end up loosing a lot of the 527 * fractional bits and precision from a0. the way to fix this is 528 * to define a0 as the sample at a pixel center somewhere near vmin 529 * instead - i'll switch to this later. 530 */ 531 setup->coef.a0[0][i] = (setup->vmin[vertSlot][i] - 532 (dadx * (setup->vmin[0][0] - setup->pixel_offset) + 533 dady * (setup->vmin[0][1] - setup->pixel_offset))); 534 535 /* 536 debug_printf("attr[%d].%c: %f dx:%f dy:%f\n", 537 slot, "xyzw"[i], 538 setup->coef[slot].a0[i], 539 setup->coef[slot].dadx[i], 540 setup->coef[slot].dady[i]); 541 */ 542} 543 544 545/** 546 * Compute a0 for a constant-valued coefficient (GL_FLAT shading). 547 * The value value comes from vertex[slot][i]. 548 * The result will be put into setup->coef[slot].a0[i]. 549 * \param slot which attribute slot 550 * \param i which component of the slot (0..3) 551 */ 552static void const_pos_coeff( struct setup_context *setup, 553 uint vertSlot, unsigned i) 554{ 555 setup->coef.dadx[0][i] = 0; 556 setup->coef.dady[0][i] = 0; 557 558 /* need provoking vertex info! 559 */ 560 setup->coef.a0[0][i] = setup->vprovoke[vertSlot][i]; 561} 562 563 564/** 565 * Compute a0 for a constant-valued coefficient (GL_FLAT shading). 566 * The value value comes from vertex[slot][i]. 567 * The result will be put into setup->coef[slot].a0[i]. 568 * \param slot which attribute slot 569 * \param i which component of the slot (0..3) 570 */ 571static void const_coeff( struct setup_context *setup, 572 unsigned attrib, 573 uint vertSlot) 574{ 575 unsigned i; 576 for (i = 0; i < NUM_CHANNELS; ++i) { 577 setup->coef.dadx[1 + attrib][i] = 0; 578 setup->coef.dady[1 + attrib][i] = 0; 579 580 /* need provoking vertex info! 581 */ 582 setup->coef.a0[1 + attrib][i] = setup->vprovoke[vertSlot][i]; 583 } 584} 585 586 587/** 588 * Compute a0, dadx and dady for a linearly interpolated coefficient, 589 * for a triangle. 590 */ 591static void tri_linear_coeff( struct setup_context *setup, 592 unsigned attrib, 593 uint vertSlot) 594{ 595 unsigned i; 596 for (i = 0; i < NUM_CHANNELS; ++i) { 597 float botda = setup->vmid[vertSlot][i] - setup->vmin[vertSlot][i]; 598 float majda = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i]; 599 float a = setup->ebot.dy * majda - botda * setup->emaj.dy; 600 float b = setup->emaj.dx * botda - majda * setup->ebot.dx; 601 float dadx = a * setup->oneoverarea; 602 float dady = b * setup->oneoverarea; 603 604 assert(i <= 3); 605 606 setup->coef.dadx[1 + attrib][i] = dadx; 607 setup->coef.dady[1 + attrib][i] = dady; 608 609 /* calculate a0 as the value which would be sampled for the 610 * fragment at (0,0), taking into account that we want to sample at 611 * pixel centers, in other words (0.5, 0.5). 612 * 613 * this is neat but unfortunately not a good way to do things for 614 * triangles with very large values of dadx or dady as it will 615 * result in the subtraction and re-addition from a0 of a very 616 * large number, which means we'll end up loosing a lot of the 617 * fractional bits and precision from a0. the way to fix this is 618 * to define a0 as the sample at a pixel center somewhere near vmin 619 * instead - i'll switch to this later. 620 */ 621 setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] - 622 (dadx * (setup->vmin[0][0] - setup->pixel_offset) + 623 dady * (setup->vmin[0][1] - setup->pixel_offset))); 624 625 /* 626 debug_printf("attr[%d].%c: %f dx:%f dy:%f\n", 627 slot, "xyzw"[i], 628 setup->coef[slot].a0[i], 629 setup->coef[slot].dadx[i], 630 setup->coef[slot].dady[i]); 631 */ 632 } 633} 634 635 636/** 637 * Compute a0, dadx and dady for a perspective-corrected interpolant, 638 * for a triangle. 639 * We basically multiply the vertex value by 1/w before computing 640 * the plane coefficients (a0, dadx, dady). 641 * Later, when we compute the value at a particular fragment position we'll 642 * divide the interpolated value by the interpolated W at that fragment. 643 */ 644static void tri_persp_coeff( struct setup_context *setup, 645 unsigned attrib, 646 uint vertSlot) 647{ 648 unsigned i; 649 for (i = 0; i < NUM_CHANNELS; ++i) { 650 /* premultiply by 1/w (v[0][3] is always W): 651 */ 652 float mina = setup->vmin[vertSlot][i] * setup->vmin[0][3]; 653 float mida = setup->vmid[vertSlot][i] * setup->vmid[0][3]; 654 float maxa = setup->vmax[vertSlot][i] * setup->vmax[0][3]; 655 float botda = mida - mina; 656 float majda = maxa - mina; 657 float a = setup->ebot.dy * majda - botda * setup->emaj.dy; 658 float b = setup->emaj.dx * botda - majda * setup->ebot.dx; 659 float dadx = a * setup->oneoverarea; 660 float dady = b * setup->oneoverarea; 661 662 /* 663 debug_printf("tri persp %d,%d: %f %f %f\n", vertSlot, i, 664 setup->vmin[vertSlot][i], 665 setup->vmid[vertSlot][i], 666 setup->vmax[vertSlot][i] 667 ); 668 */ 669 assert(i <= 3); 670 671 setup->coef.dadx[1 + attrib][i] = dadx; 672 setup->coef.dady[1 + attrib][i] = dady; 673 setup->coef.a0[1 + attrib][i] = (mina - 674 (dadx * (setup->vmin[0][0] - setup->pixel_offset) + 675 dady * (setup->vmin[0][1] - setup->pixel_offset))); 676 } 677} 678 679 680/** 681 * Special coefficient setup for gl_FragCoord. 682 * X and Y are trivial, though Y has to be inverted for OpenGL. 683 * Z and W are copied from posCoef which should have already been computed. 684 * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. 685 */ 686static void 687setup_fragcoord_coeff(struct setup_context *setup, uint slot) 688{ 689 /*X*/ 690 setup->coef.a0[1 + slot][0] = 0; 691 setup->coef.dadx[1 + slot][0] = 1.0; 692 setup->coef.dady[1 + slot][0] = 0.0; 693 /*Y*/ 694 setup->coef.a0[1 + slot][1] = 0.0; 695 setup->coef.dadx[1 + slot][1] = 0.0; 696 setup->coef.dady[1 + slot][1] = 1.0; 697 /*Z*/ 698 setup->coef.a0[1 + slot][2] = setup->coef.a0[0][2]; 699 setup->coef.dadx[1 + slot][2] = setup->coef.dadx[0][2]; 700 setup->coef.dady[1 + slot][2] = setup->coef.dady[0][2]; 701 /*W*/ 702 setup->coef.a0[1 + slot][3] = setup->coef.a0[0][3]; 703 setup->coef.dadx[1 + slot][3] = setup->coef.dadx[0][3]; 704 setup->coef.dady[1 + slot][3] = setup->coef.dady[0][3]; 705} 706 707 708 709/** 710 * Compute the setup->coef[] array dadx, dady, a0 values. 711 * Must be called after setup->vmin,vmid,vmax,vprovoke are initialized. 712 */ 713static void setup_tri_coefficients( struct setup_context *setup ) 714{ 715 struct llvmpipe_context *llvmpipe = setup->llvmpipe; 716 const struct lp_fragment_shader *lpfs = llvmpipe->fs; 717 const struct vertex_info *vinfo = llvmpipe_get_vertex_info(llvmpipe); 718 uint fragSlot; 719 720 /* z and w are done by linear interpolation: 721 */ 722 tri_pos_coeff(setup, 0, 2); 723 tri_pos_coeff(setup, 0, 3); 724 725 /* setup interpolation for all the remaining attributes: 726 */ 727 for (fragSlot = 0; fragSlot < lpfs->info.num_inputs; fragSlot++) { 728 const uint vertSlot = vinfo->attrib[fragSlot].src_index; 729 730 switch (vinfo->attrib[fragSlot].interp_mode) { 731 case INTERP_CONSTANT: 732 const_coeff(setup, fragSlot, vertSlot); 733 break; 734 case INTERP_LINEAR: 735 tri_linear_coeff(setup, fragSlot, vertSlot); 736 break; 737 case INTERP_PERSPECTIVE: 738 tri_persp_coeff(setup, fragSlot, vertSlot); 739 break; 740 case INTERP_POS: 741 setup_fragcoord_coeff(setup, fragSlot); 742 break; 743 default: 744 assert(0); 745 } 746 747 if (lpfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { 748 setup->coef.a0[1 + fragSlot][0] = 1.0f - setup->facing; 749 setup->coef.dadx[1 + fragSlot][0] = 0.0; 750 setup->coef.dady[1 + fragSlot][0] = 0.0; 751 } 752 } 753} 754 755 756 757static void setup_tri_edges( struct setup_context *setup ) 758{ 759 float vmin_x = setup->vmin[0][0] + setup->pixel_offset; 760 float vmid_x = setup->vmid[0][0] + setup->pixel_offset; 761 762 float vmin_y = setup->vmin[0][1] - setup->pixel_offset; 763 float vmid_y = setup->vmid[0][1] - setup->pixel_offset; 764 float vmax_y = setup->vmax[0][1] - setup->pixel_offset; 765 766 setup->emaj.sy = ceilf(vmin_y); 767 setup->emaj.lines = (int) ceilf(vmax_y - setup->emaj.sy); 768 setup->emaj.dxdy = setup->emaj.dx / setup->emaj.dy; 769 setup->emaj.sx = vmin_x + (setup->emaj.sy - vmin_y) * setup->emaj.dxdy; 770 771 setup->etop.sy = ceilf(vmid_y); 772 setup->etop.lines = (int) ceilf(vmax_y - setup->etop.sy); 773 setup->etop.dxdy = setup->etop.dx / setup->etop.dy; 774 setup->etop.sx = vmid_x + (setup->etop.sy - vmid_y) * setup->etop.dxdy; 775 776 setup->ebot.sy = ceilf(vmin_y); 777 setup->ebot.lines = (int) ceilf(vmid_y - setup->ebot.sy); 778 setup->ebot.dxdy = setup->ebot.dx / setup->ebot.dy; 779 setup->ebot.sx = vmin_x + (setup->ebot.sy - vmin_y) * setup->ebot.dxdy; 780} 781 782 783/** 784 * Render the upper or lower half of a triangle. 785 * Scissoring/cliprect is applied here too. 786 */ 787static void subtriangle( struct setup_context *setup, 788 struct edge *eleft, 789 struct edge *eright, 790 unsigned lines ) 791{ 792 const struct pipe_scissor_state *cliprect = &setup->llvmpipe->cliprect; 793 const int minx = (int) cliprect->minx; 794 const int maxx = (int) cliprect->maxx; 795 const int miny = (int) cliprect->miny; 796 const int maxy = (int) cliprect->maxy; 797 int y, start_y, finish_y; 798 int sy = (int)eleft->sy; 799 800 assert((int)eleft->sy == (int) eright->sy); 801 802 /* clip top/bottom */ 803 start_y = sy; 804 if (start_y < miny) 805 start_y = miny; 806 807 finish_y = sy + lines; 808 if (finish_y > maxy) 809 finish_y = maxy; 810 811 start_y -= sy; 812 finish_y -= sy; 813 814 /* 815 debug_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y); 816 */ 817 818 for (y = start_y; y < finish_y; y++) { 819 820 /* avoid accumulating adds as floats don't have the precision to 821 * accurately iterate large triangle edges that way. luckily we 822 * can just multiply these days. 823 * 824 * this is all drowned out by the attribute interpolation anyway. 825 */ 826 int left = (int)(eleft->sx + y * eleft->dxdy); 827 int right = (int)(eright->sx + y * eright->dxdy); 828 829 /* clip left/right */ 830 if (left < minx) 831 left = minx; 832 if (right > maxx) 833 right = maxx; 834 835 if (left < right) { 836 int _y = sy + y; 837 if (block(_y) != setup->span.y) { 838 flush_spans(setup); 839 setup->span.y = block(_y); 840 } 841 842 setup->span.left[_y&1] = left; 843 setup->span.right[_y&1] = right; 844 } 845 } 846 847 848 /* save the values so that emaj can be restarted: 849 */ 850 eleft->sx += lines * eleft->dxdy; 851 eright->sx += lines * eright->dxdy; 852 eleft->sy += lines; 853 eright->sy += lines; 854} 855 856 857/** 858 * Recalculate prim's determinant. This is needed as we don't have 859 * get this information through the vbuf_render interface & we must 860 * calculate it here. 861 */ 862static float 863calc_det( const float (*v0)[4], 864 const float (*v1)[4], 865 const float (*v2)[4] ) 866{ 867 /* edge vectors e = v0 - v2, f = v1 - v2 */ 868 const float ex = v0[0][0] - v2[0][0]; 869 const float ey = v0[0][1] - v2[0][1]; 870 const float fx = v1[0][0] - v2[0][0]; 871 const float fy = v1[0][1] - v2[0][1]; 872 873 /* det = cross(e,f).z */ 874 return ex * fy - ey * fx; 875} 876 877 878/** 879 * Do setup for triangle rasterization, then render the triangle. 880 */ 881void llvmpipe_setup_tri( struct setup_context *setup, 882 const float (*v0)[4], 883 const float (*v1)[4], 884 const float (*v2)[4] ) 885{ 886 float det; 887 888#if DEBUG_VERTS 889 debug_printf("Setup triangle:\n"); 890 print_vertex(setup, v0); 891 print_vertex(setup, v1); 892 print_vertex(setup, v2); 893#endif 894 895 if (setup->llvmpipe->no_rast) 896 return; 897 898 det = calc_det(v0, v1, v2); 899 /* 900 debug_printf("%s\n", __FUNCTION__ ); 901 */ 902 903#if DEBUG_FRAGS 904 setup->numFragsEmitted = 0; 905 setup->numFragsWritten = 0; 906#endif 907 908 if (cull_tri( setup, det )) 909 return; 910 911 if (!setup_sort_vertices( setup, det, v0, v1, v2 )) 912 return; 913 setup_tri_coefficients( setup ); 914 setup_tri_edges( setup ); 915 916 assert(setup->llvmpipe->reduced_prim == PIPE_PRIM_TRIANGLES); 917 918 setup->span.y = 0; 919 setup->span.right[0] = 0; 920 setup->span.right[1] = 0; 921 /* setup->span.z_mode = tri_z_mode( setup->ctx ); */ 922 923 /* init_constant_attribs( setup ); */ 924 925 if (setup->oneoverarea < 0.0) { 926 /* emaj on left: 927 */ 928 subtriangle( setup, &setup->emaj, &setup->ebot, setup->ebot.lines ); 929 subtriangle( setup, &setup->emaj, &setup->etop, setup->etop.lines ); 930 } 931 else { 932 /* emaj on right: 933 */ 934 subtriangle( setup, &setup->ebot, &setup->emaj, setup->ebot.lines ); 935 subtriangle( setup, &setup->etop, &setup->emaj, setup->etop.lines ); 936 } 937 938 flush_spans( setup ); 939 940#if DEBUG_FRAGS 941 printf("Tri: %u frags emitted, %u written\n", 942 setup->numFragsEmitted, 943 setup->numFragsWritten); 944#endif 945} 946 947 948 949/** 950 * Compute a0, dadx and dady for a linearly interpolated coefficient, 951 * for a line. 952 */ 953static void 954linear_pos_coeff(struct setup_context *setup, 955 uint vertSlot, uint i) 956{ 957 const float da = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i]; 958 const float dadx = da * setup->emaj.dx * setup->oneoverarea; 959 const float dady = da * setup->emaj.dy * setup->oneoverarea; 960 setup->coef.dadx[0][i] = dadx; 961 setup->coef.dady[0][i] = dady; 962 setup->coef.a0[0][i] = (setup->vmin[vertSlot][i] - 963 (dadx * (setup->vmin[0][0] - setup->pixel_offset) + 964 dady * (setup->vmin[0][1] - setup->pixel_offset))); 965} 966 967 968/** 969 * Compute a0, dadx and dady for a linearly interpolated coefficient, 970 * for a line. 971 */ 972static void 973line_linear_coeff(struct setup_context *setup, 974 unsigned attrib, 975 uint vertSlot) 976{ 977 unsigned i; 978 for (i = 0; i < NUM_CHANNELS; ++i) { 979 const float da = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i]; 980 const float dadx = da * setup->emaj.dx * setup->oneoverarea; 981 const float dady = da * setup->emaj.dy * setup->oneoverarea; 982 setup->coef.dadx[1 + attrib][i] = dadx; 983 setup->coef.dady[1 + attrib][i] = dady; 984 setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] - 985 (dadx * (setup->vmin[0][0] - setup->pixel_offset) + 986 dady * (setup->vmin[0][1] - setup->pixel_offset))); 987 } 988} 989 990 991/** 992 * Compute a0, dadx and dady for a perspective-corrected interpolant, 993 * for a line. 994 */ 995static void 996line_persp_coeff(struct setup_context *setup, 997 unsigned attrib, 998 uint vertSlot) 999{ 1000 unsigned i; 1001 for (i = 0; i < NUM_CHANNELS; ++i) { 1002 /* XXX double-check/verify this arithmetic */ 1003 const float a0 = setup->vmin[vertSlot][i] * setup->vmin[0][3]; 1004 const float a1 = setup->vmax[vertSlot][i] * setup->vmax[0][3]; 1005 const float da = a1 - a0; 1006 const float dadx = da * setup->emaj.dx * setup->oneoverarea; 1007 const float dady = da * setup->emaj.dy * setup->oneoverarea; 1008 setup->coef.dadx[1 + attrib][i] = dadx; 1009 setup->coef.dady[1 + attrib][i] = dady; 1010 setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] - 1011 (dadx * (setup->vmin[0][0] - setup->pixel_offset) + 1012 dady * (setup->vmin[0][1] - setup->pixel_offset))); 1013 } 1014} 1015 1016 1017/** 1018 * Compute the setup->coef[] array dadx, dady, a0 values. 1019 * Must be called after setup->vmin,vmax are initialized. 1020 */ 1021static INLINE boolean 1022setup_line_coefficients(struct setup_context *setup, 1023 const float (*v0)[4], 1024 const float (*v1)[4]) 1025{ 1026 struct llvmpipe_context *llvmpipe = setup->llvmpipe; 1027 const struct lp_fragment_shader *lpfs = llvmpipe->fs; 1028 const struct vertex_info *vinfo = llvmpipe_get_vertex_info(llvmpipe); 1029 uint fragSlot; 1030 float area; 1031 1032 /* use setup->vmin, vmax to point to vertices */ 1033 if (llvmpipe->rasterizer->flatshade_first) 1034 setup->vprovoke = v0; 1035 else 1036 setup->vprovoke = v1; 1037 setup->vmin = v0; 1038 setup->vmax = v1; 1039 1040 setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0]; 1041 setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1]; 1042 1043 /* NOTE: this is not really area but something proportional to it */ 1044 area = setup->emaj.dx * setup->emaj.dx + setup->emaj.dy * setup->emaj.dy; 1045 if (area == 0.0f || util_is_inf_or_nan(area)) 1046 return FALSE; 1047 setup->oneoverarea = 1.0f / area; 1048 1049 /* z and w are done by linear interpolation: 1050 */ 1051 linear_pos_coeff(setup, 0, 2); 1052 linear_pos_coeff(setup, 0, 3); 1053 1054 /* setup interpolation for all the remaining attributes: 1055 */ 1056 for (fragSlot = 0; fragSlot < lpfs->info.num_inputs; fragSlot++) { 1057 const uint vertSlot = vinfo->attrib[fragSlot].src_index; 1058 1059 switch (vinfo->attrib[fragSlot].interp_mode) { 1060 case INTERP_CONSTANT: 1061 const_coeff(setup, fragSlot, vertSlot); 1062 break; 1063 case INTERP_LINEAR: 1064 line_linear_coeff(setup, fragSlot, vertSlot); 1065 break; 1066 case INTERP_PERSPECTIVE: 1067 line_persp_coeff(setup, fragSlot, vertSlot); 1068 break; 1069 case INTERP_POS: 1070 setup_fragcoord_coeff(setup, fragSlot); 1071 break; 1072 default: 1073 assert(0); 1074 } 1075 1076 if (lpfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { 1077 setup->coef.a0[1 + fragSlot][0] = 1.0f - setup->facing; 1078 setup->coef.dadx[1 + fragSlot][0] = 0.0; 1079 setup->coef.dady[1 + fragSlot][0] = 0.0; 1080 } 1081 } 1082 return TRUE; 1083} 1084 1085 1086/** 1087 * Plot a pixel in a line segment. 1088 */ 1089static INLINE void 1090plot(struct setup_context *setup, int x, int y) 1091{ 1092 const int iy = y & 1; 1093 const int ix = x & 1; 1094 const int quadX = x - ix; 1095 const int quadY = y - iy; 1096 const int mask = (1 << ix) << (2 * iy); 1097 1098 if (quadX != setup->quad[0].input.x0 || 1099 quadY != setup->quad[0].input.y0) 1100 { 1101 /* flush prev quad, start new quad */ 1102 1103 if (setup->quad[0].input.x0 != -1) 1104 clip_emit_quad( setup, &setup->quad[0] ); 1105 1106 setup->quad[0].input.x0 = quadX; 1107 setup->quad[0].input.y0 = quadY; 1108 setup->quad[0].inout.mask = 0x0; 1109 } 1110 1111 setup->quad[0].inout.mask |= mask; 1112} 1113 1114 1115/** 1116 * Do setup for line rasterization, then render the line. 1117 * Single-pixel width, no stipple, etc. We rely on the 'draw' module 1118 * to handle stippling and wide lines. 1119 */ 1120void 1121llvmpipe_setup_line(struct setup_context *setup, 1122 const float (*v0)[4], 1123 const float (*v1)[4]) 1124{ 1125 int x0 = (int) v0[0][0]; 1126 int x1 = (int) v1[0][0]; 1127 int y0 = (int) v0[0][1]; 1128 int y1 = (int) v1[0][1]; 1129 int dx = x1 - x0; 1130 int dy = y1 - y0; 1131 int xstep, ystep; 1132 1133#if DEBUG_VERTS 1134 debug_printf("Setup line:\n"); 1135 print_vertex(setup, v0); 1136 print_vertex(setup, v1); 1137#endif 1138 1139 if (setup->llvmpipe->no_rast) 1140 return; 1141 1142 if (dx == 0 && dy == 0) 1143 return; 1144 1145 if (!setup_line_coefficients(setup, v0, v1)) 1146 return; 1147 1148 assert(v0[0][0] < 1.0e9); 1149 assert(v0[0][1] < 1.0e9); 1150 assert(v1[0][0] < 1.0e9); 1151 assert(v1[0][1] < 1.0e9); 1152 1153 if (dx < 0) { 1154 dx = -dx; /* make positive */ 1155 xstep = -1; 1156 } 1157 else { 1158 xstep = 1; 1159 } 1160 1161 if (dy < 0) { 1162 dy = -dy; /* make positive */ 1163 ystep = -1; 1164 } 1165 else { 1166 ystep = 1; 1167 } 1168 1169 assert(dx >= 0); 1170 assert(dy >= 0); 1171 assert(setup->llvmpipe->reduced_prim == PIPE_PRIM_LINES); 1172 1173 setup->quad[0].input.x0 = setup->quad[0].input.y0 = -1; 1174 setup->quad[0].inout.mask = 0x0; 1175 1176 /* XXX temporary: set coverage to 1.0 so the line appears 1177 * if AA mode happens to be enabled. 1178 */ 1179 setup->quad[0].input.coverage[0] = 1180 setup->quad[0].input.coverage[1] = 1181 setup->quad[0].input.coverage[2] = 1182 setup->quad[0].input.coverage[3] = 1.0; 1183 1184 if (dx > dy) { 1185 /*** X-major line ***/ 1186 int i; 1187 const int errorInc = dy + dy; 1188 int error = errorInc - dx; 1189 const int errorDec = error - dx; 1190 1191 for (i = 0; i < dx; i++) { 1192 plot(setup, x0, y0); 1193 1194 x0 += xstep; 1195 if (error < 0) { 1196 error += errorInc; 1197 } 1198 else { 1199 error += errorDec; 1200 y0 += ystep; 1201 } 1202 } 1203 } 1204 else { 1205 /*** Y-major line ***/ 1206 int i; 1207 const int errorInc = dx + dx; 1208 int error = errorInc - dy; 1209 const int errorDec = error - dy; 1210 1211 for (i = 0; i < dy; i++) { 1212 plot(setup, x0, y0); 1213 1214 y0 += ystep; 1215 if (error < 0) { 1216 error += errorInc; 1217 } 1218 else { 1219 error += errorDec; 1220 x0 += xstep; 1221 } 1222 } 1223 } 1224 1225 /* draw final quad */ 1226 if (setup->quad[0].inout.mask) { 1227 clip_emit_quad( setup, &setup->quad[0] ); 1228 } 1229} 1230 1231 1232static void 1233point_persp_coeff(struct setup_context *setup, 1234 const float (*vert)[4], 1235 unsigned attrib, 1236 uint vertSlot) 1237{ 1238 unsigned i; 1239 for(i = 0; i < NUM_CHANNELS; ++i) { 1240 setup->coef.dadx[1 + attrib][i] = 0.0F; 1241 setup->coef.dady[1 + attrib][i] = 0.0F; 1242 setup->coef.a0[1 + attrib][i] = vert[vertSlot][i] * vert[0][3]; 1243 } 1244} 1245 1246 1247/** 1248 * Do setup for point rasterization, then render the point. 1249 * Round or square points... 1250 * XXX could optimize a lot for 1-pixel points. 1251 */ 1252void 1253llvmpipe_setup_point( struct setup_context *setup, 1254 const float (*v0)[4] ) 1255{ 1256 struct llvmpipe_context *llvmpipe = setup->llvmpipe; 1257 const struct lp_fragment_shader *lpfs = llvmpipe->fs; 1258 const int sizeAttr = setup->llvmpipe->psize_slot; 1259 const float size 1260 = sizeAttr > 0 ? v0[sizeAttr][0] 1261 : setup->llvmpipe->rasterizer->point_size; 1262 const float halfSize = 0.5F * size; 1263 const boolean round = (boolean) setup->llvmpipe->rasterizer->point_smooth; 1264 const float x = v0[0][0]; /* Note: data[0] is always position */ 1265 const float y = v0[0][1]; 1266 const struct vertex_info *vinfo = llvmpipe_get_vertex_info(llvmpipe); 1267 uint fragSlot; 1268 1269#if DEBUG_VERTS 1270 debug_printf("Setup point:\n"); 1271 print_vertex(setup, v0); 1272#endif 1273 1274 if (llvmpipe->no_rast) 1275 return; 1276 1277 assert(setup->llvmpipe->reduced_prim == PIPE_PRIM_POINTS); 1278 1279 /* For points, all interpolants are constant-valued. 1280 * However, for point sprites, we'll need to setup texcoords appropriately. 1281 * XXX: which coefficients are the texcoords??? 1282 * We may do point sprites as textured quads... 1283 * 1284 * KW: We don't know which coefficients are texcoords - ultimately 1285 * the choice of what interpolation mode to use for each attribute 1286 * should be determined by the fragment program, using 1287 * per-attribute declaration statements that include interpolation 1288 * mode as a parameter. So either the fragment program will have 1289 * to be adjusted for pointsprite vs normal point behaviour, or 1290 * otherwise a special interpolation mode will have to be defined 1291 * which matches the required behaviour for point sprites. But - 1292 * the latter is not a feature of normal hardware, and as such 1293 * probably should be ruled out on that basis. 1294 */ 1295 setup->vprovoke = v0; 1296 1297 /* setup Z, W */ 1298 const_pos_coeff(setup, 0, 2); 1299 const_pos_coeff(setup, 0, 3); 1300 1301 for (fragSlot = 0; fragSlot < lpfs->info.num_inputs; fragSlot++) { 1302 const uint vertSlot = vinfo->attrib[fragSlot].src_index; 1303 1304 switch (vinfo->attrib[fragSlot].interp_mode) { 1305 case INTERP_CONSTANT: 1306 /* fall-through */ 1307 case INTERP_LINEAR: 1308 const_coeff(setup, fragSlot, vertSlot); 1309 break; 1310 case INTERP_PERSPECTIVE: 1311 point_persp_coeff(setup, setup->vprovoke, fragSlot, vertSlot); 1312 break; 1313 case INTERP_POS: 1314 setup_fragcoord_coeff(setup, fragSlot); 1315 break; 1316 default: 1317 assert(0); 1318 } 1319 1320 if (lpfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { 1321 setup->coef.a0[1 + fragSlot][0] = 1.0f - setup->facing; 1322 setup->coef.dadx[1 + fragSlot][0] = 0.0; 1323 setup->coef.dady[1 + fragSlot][0] = 0.0; 1324 } 1325 } 1326 1327 1328 if (halfSize <= 0.5 && !round) { 1329 /* special case for 1-pixel points */ 1330 const int ix = ((int) x) & 1; 1331 const int iy = ((int) y) & 1; 1332 setup->quad[0].input.x0 = (int) x - ix; 1333 setup->quad[0].input.y0 = (int) y - iy; 1334 setup->quad[0].inout.mask = (1 << ix) << (2 * iy); 1335 clip_emit_quad( setup, &setup->quad[0] ); 1336 } 1337 else { 1338 if (round) { 1339 /* rounded points */ 1340 const int ixmin = block((int) (x - halfSize)); 1341 const int ixmax = block((int) (x + halfSize)); 1342 const int iymin = block((int) (y - halfSize)); 1343 const int iymax = block((int) (y + halfSize)); 1344 const float rmin = halfSize - 0.7071F; /* 0.7071 = sqrt(2)/2 */ 1345 const float rmax = halfSize + 0.7071F; 1346 const float rmin2 = MAX2(0.0F, rmin * rmin); 1347 const float rmax2 = rmax * rmax; 1348 const float cscale = 1.0F / (rmax2 - rmin2); 1349 int ix, iy; 1350 1351 for (iy = iymin; iy <= iymax; iy += 2) { 1352 for (ix = ixmin; ix <= ixmax; ix += 2) { 1353 float dx, dy, dist2, cover; 1354 1355 setup->quad[0].inout.mask = 0x0; 1356 1357 dx = (ix + 0.5f) - x; 1358 dy = (iy + 0.5f) - y; 1359 dist2 = dx * dx + dy * dy; 1360 if (dist2 <= rmax2) { 1361 cover = 1.0F - (dist2 - rmin2) * cscale; 1362 setup->quad[0].input.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0f); 1363 setup->quad[0].inout.mask |= MASK_TOP_LEFT; 1364 } 1365 1366 dx = (ix + 1.5f) - x; 1367 dy = (iy + 0.5f) - y; 1368 dist2 = dx * dx + dy * dy; 1369 if (dist2 <= rmax2) { 1370 cover = 1.0F - (dist2 - rmin2) * cscale; 1371 setup->quad[0].input.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0f); 1372 setup->quad[0].inout.mask |= MASK_TOP_RIGHT; 1373 } 1374 1375 dx = (ix + 0.5f) - x; 1376 dy = (iy + 1.5f) - y; 1377 dist2 = dx * dx + dy * dy; 1378 if (dist2 <= rmax2) { 1379 cover = 1.0F - (dist2 - rmin2) * cscale; 1380 setup->quad[0].input.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0f); 1381 setup->quad[0].inout.mask |= MASK_BOTTOM_LEFT; 1382 } 1383 1384 dx = (ix + 1.5f) - x; 1385 dy = (iy + 1.5f) - y; 1386 dist2 = dx * dx + dy * dy; 1387 if (dist2 <= rmax2) { 1388 cover = 1.0F - (dist2 - rmin2) * cscale; 1389 setup->quad[0].input.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0f); 1390 setup->quad[0].inout.mask |= MASK_BOTTOM_RIGHT; 1391 } 1392 1393 if (setup->quad[0].inout.mask) { 1394 setup->quad[0].input.x0 = ix; 1395 setup->quad[0].input.y0 = iy; 1396 clip_emit_quad( setup, &setup->quad[0] ); 1397 } 1398 } 1399 } 1400 } 1401 else { 1402 /* square points */ 1403 const int xmin = (int) (x + 0.75 - halfSize); 1404 const int ymin = (int) (y + 0.25 - halfSize); 1405 const int xmax = xmin + (int) size; 1406 const int ymax = ymin + (int) size; 1407 /* XXX could apply scissor to xmin,ymin,xmax,ymax now */ 1408 const int ixmin = block(xmin); 1409 const int ixmax = block(xmax - 1); 1410 const int iymin = block(ymin); 1411 const int iymax = block(ymax - 1); 1412 int ix, iy; 1413 1414 /* 1415 debug_printf("(%f, %f) -> X:%d..%d Y:%d..%d\n", x, y, xmin, xmax,ymin,ymax); 1416 */ 1417 for (iy = iymin; iy <= iymax; iy += 2) { 1418 uint rowMask = 0xf; 1419 if (iy < ymin) { 1420 /* above the top edge */ 1421 rowMask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); 1422 } 1423 if (iy + 1 >= ymax) { 1424 /* below the bottom edge */ 1425 rowMask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); 1426 } 1427 1428 for (ix = ixmin; ix <= ixmax; ix += 2) { 1429 uint mask = rowMask; 1430 1431 if (ix < xmin) { 1432 /* fragment is past left edge of point, turn off left bits */ 1433 mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); 1434 } 1435 if (ix + 1 >= xmax) { 1436 /* past the right edge */ 1437 mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); 1438 } 1439 1440 setup->quad[0].inout.mask = mask; 1441 setup->quad[0].input.x0 = ix; 1442 setup->quad[0].input.y0 = iy; 1443 clip_emit_quad( setup, &setup->quad[0] ); 1444 } 1445 } 1446 } 1447 } 1448} 1449 1450void llvmpipe_setup_prepare( struct setup_context *setup ) 1451{ 1452 struct llvmpipe_context *lp = setup->llvmpipe; 1453 1454 if (lp->dirty) { 1455 llvmpipe_update_derived(lp); 1456 } 1457 1458 if (lp->reduced_api_prim == PIPE_PRIM_TRIANGLES && 1459 lp->rasterizer->fill_cw == PIPE_POLYGON_MODE_FILL && 1460 lp->rasterizer->fill_ccw == PIPE_POLYGON_MODE_FILL) { 1461 /* we'll do culling */ 1462 setup->winding = lp->rasterizer->cull_mode; 1463 } 1464 else { 1465 /* 'draw' will do culling */ 1466 setup->winding = PIPE_WINDING_NONE; 1467 } 1468} 1469 1470 1471 1472void llvmpipe_setup_destroy_context( struct setup_context *setup ) 1473{ 1474 align_free( setup ); 1475} 1476 1477 1478/** 1479 * Create a new primitive setup/render stage. 1480 */ 1481struct setup_context *llvmpipe_setup_create_context( struct llvmpipe_context *llvmpipe ) 1482{ 1483 struct setup_context *setup; 1484 unsigned i; 1485 1486 setup = align_malloc(sizeof(struct setup_context), 16); 1487 if (!setup) 1488 return NULL; 1489 1490 memset(setup, 0, sizeof *setup); 1491 setup->llvmpipe = llvmpipe; 1492 1493 for (i = 0; i < MAX_QUADS; i++) { 1494 setup->quad[i].coef = &setup->coef; 1495 } 1496 1497 setup->span.left[0] = 1000000; /* greater than right[0] */ 1498 setup->span.left[1] = 1000000; /* greater than right[1] */ 1499 1500 return setup; 1501} 1502 1503