1/************************************************************************** 2 * 3 * Copyright 2010 VMware. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 29#include "util/u_math.h" 30#include "util/u_memory.h" 31#include "util/simple_list.h" 32#include "util/os_time.h" 33#include "gallivm/lp_bld_arit.h" 34#include "gallivm/lp_bld_bitarit.h" 35#include "gallivm/lp_bld_const.h" 36#include "gallivm/lp_bld_debug.h" 37#include "gallivm/lp_bld_init.h" 38#include "gallivm/lp_bld_logic.h" 39#include "gallivm/lp_bld_intr.h" 40#include "gallivm/lp_bld_flow.h" 41#include "gallivm/lp_bld_type.h" 42 43#include "lp_perf.h" 44#include "lp_debug.h" 45#include "lp_flush.h" 46#include "lp_screen.h" 47#include "lp_context.h" 48#include "lp_state.h" 49#include "lp_state_fs.h" 50#include "lp_state_setup.h" 51 52 53/** Setup shader number (for debugging) */ 54static unsigned setup_no = 0; 55 56 57/* currently organized to interpolate full float[4] attributes even 58 * when some elements are unused. Later, can pack vertex data more 59 * closely. 60 */ 61 62 63struct lp_setup_args 64{ 65 /* Function arguments: 66 */ 67 LLVMValueRef v0; 68 LLVMValueRef v1; 69 LLVMValueRef v2; 70 LLVMValueRef facing; /* boolean */ 71 LLVMValueRef a0; 72 LLVMValueRef dadx; 73 LLVMValueRef dady; 74 LLVMValueRef key; 75 76 /* Derived: 77 */ 78 LLVMValueRef x0_center; 79 LLVMValueRef y0_center; 80 LLVMValueRef dy20_ooa; 81 LLVMValueRef dy01_ooa; 82 LLVMValueRef dx20_ooa; 83 LLVMValueRef dx01_ooa; 84 struct lp_build_context bld; 85}; 86 87 88static void 89store_coef(struct gallivm_state *gallivm, 90 struct lp_setup_args *args, 91 unsigned slot, 92 LLVMValueRef a0, 93 LLVMValueRef dadx, 94 LLVMValueRef dady) 95{ 96 LLVMBuilderRef builder = gallivm->builder; 97 LLVMValueRef idx = lp_build_const_int32(gallivm, slot); 98 99 LLVMBuildStore(builder, 100 a0, 101 LLVMBuildGEP(builder, args->a0, &idx, 1, "")); 102 103 LLVMBuildStore(builder, 104 dadx, 105 LLVMBuildGEP(builder, args->dadx, &idx, 1, "")); 106 107 LLVMBuildStore(builder, 108 dady, 109 LLVMBuildGEP(builder, args->dady, &idx, 1, "")); 110} 111 112 113 114static void 115emit_constant_coef4(struct gallivm_state *gallivm, 116 struct lp_setup_args *args, 117 unsigned slot, 118 LLVMValueRef vert) 119{ 120 store_coef(gallivm, args, slot, vert, args->bld.zero, args->bld.zero); 121} 122 123 124 125/** 126 * Setup the fragment input attribute with the front-facing value. 127 * \param frontface is the triangle front facing? 128 */ 129static void 130emit_facing_coef(struct gallivm_state *gallivm, 131 struct lp_setup_args *args, 132 unsigned slot ) 133{ 134 LLVMBuilderRef builder = gallivm->builder; 135 LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context); 136 LLVMValueRef a0_0 = args->facing; 137 LLVMValueRef a0_0f = LLVMBuildSIToFP(builder, a0_0, float_type, ""); 138 LLVMValueRef a0, face_val; 139 const unsigned char swizzles[4] = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_0, 140 PIPE_SWIZZLE_0, PIPE_SWIZZLE_0 }; 141 /* Our face val is either 1 or 0 so we do 142 * face = (val * 2) - 1 143 * to make it 1 or -1 144 */ 145 face_val = 146 LLVMBuildFAdd(builder, 147 LLVMBuildFMul(builder, a0_0f, 148 lp_build_const_float(gallivm, 2.0), 149 ""), 150 lp_build_const_float(gallivm, -1.0), 151 "facing"); 152 face_val = lp_build_broadcast_scalar(&args->bld, face_val); 153 a0 = lp_build_swizzle_aos(&args->bld, face_val, swizzles); 154 155 store_coef(gallivm, args, slot, a0, args->bld.zero, args->bld.zero); 156} 157 158 159static LLVMValueRef 160vert_attrib(struct gallivm_state *gallivm, 161 LLVMValueRef vert, 162 int attr, 163 int elem, 164 const char *name) 165{ 166 LLVMBuilderRef b = gallivm->builder; 167 LLVMValueRef idx[2]; 168 idx[0] = lp_build_const_int32(gallivm, attr); 169 idx[1] = lp_build_const_int32(gallivm, elem); 170 return LLVMBuildLoad(b, LLVMBuildGEP(b, vert, idx, 2, ""), name); 171} 172 173 174static void 175lp_twoside(struct gallivm_state *gallivm, 176 struct lp_setup_args *args, 177 const struct lp_setup_variant_key *key, 178 int bcolor_slot, 179 LLVMValueRef attribv[3]) 180{ 181 LLVMBuilderRef b = gallivm->builder; 182 LLVMValueRef a0_back, a1_back, a2_back; 183 LLVMValueRef idx2 = lp_build_const_int32(gallivm, bcolor_slot); 184 185 LLVMValueRef facing = args->facing; 186 LLVMValueRef front_facing = LLVMBuildICmp(b, LLVMIntEQ, facing, 187 lp_build_const_int32(gallivm, 0), ""); /** need i1 for if condition */ 188 189 a0_back = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx2, 1, ""), "v0a_back"); 190 a1_back = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx2, 1, ""), "v1a_back"); 191 a2_back = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx2, 1, ""), "v2a_back"); 192 193 /* Possibly swap the front and back attrib values, 194 * 195 * Prefer select to if so we don't have to worry about phis or 196 * allocas. 197 */ 198 attribv[0] = LLVMBuildSelect(b, front_facing, a0_back, attribv[0], ""); 199 attribv[1] = LLVMBuildSelect(b, front_facing, a1_back, attribv[1], ""); 200 attribv[2] = LLVMBuildSelect(b, front_facing, a2_back, attribv[2], ""); 201 202} 203 204static LLVMValueRef 205lp_do_offset_tri(struct gallivm_state *gallivm, 206 struct lp_setup_args *args, 207 const struct lp_setup_variant_key *key, 208 LLVMValueRef inv_det, 209 LLVMValueRef dxyz01, 210 LLVMValueRef dxyz20, 211 LLVMValueRef attribv[3]) 212{ 213 LLVMBuilderRef b = gallivm->builder; 214 struct lp_build_context flt_scalar_bld; 215 struct lp_build_context int_scalar_bld; 216 struct lp_build_context *bld = &args->bld; 217 LLVMValueRef zoffset, mult; 218 LLVMValueRef dzdxdzdy, dzdx, dzdy, dzxyz20, dyzzx01, dyzzx01_dzxyz20, dzx01_dyz20; 219 LLVMValueRef max, max_value, res12; 220 LLVMValueRef shuffles[4]; 221 LLVMTypeRef shuf_type = LLVMInt32TypeInContext(gallivm->context); 222 LLVMValueRef onei = lp_build_const_int32(gallivm, 1); 223 LLVMValueRef zeroi = lp_build_const_int32(gallivm, 0); 224 LLVMValueRef twoi = lp_build_const_int32(gallivm, 2); 225 LLVMValueRef threei = lp_build_const_int32(gallivm, 3); 226 227 /* (res12) = cross(e,f).xy */ 228 shuffles[0] = twoi; 229 shuffles[1] = zeroi; 230 shuffles[2] = onei; 231 shuffles[3] = twoi; 232 dzxyz20 = LLVMBuildShuffleVector(b, dxyz20, dxyz20, LLVMConstVector(shuffles, 4), ""); 233 234 shuffles[0] = onei; 235 shuffles[1] = twoi; 236 shuffles[2] = twoi; 237 shuffles[3] = zeroi; 238 dyzzx01 = LLVMBuildShuffleVector(b, dxyz01, dxyz01, LLVMConstVector(shuffles, 4), ""); 239 240 dyzzx01_dzxyz20 = LLVMBuildFMul(b, dzxyz20, dyzzx01, "dyzzx01_dzxyz20"); 241 242 shuffles[0] = twoi; 243 shuffles[1] = threei; 244 shuffles[2] = LLVMGetUndef(shuf_type); 245 shuffles[3] = LLVMGetUndef(shuf_type); 246 dzx01_dyz20 = LLVMBuildShuffleVector(b, dyzzx01_dzxyz20, dyzzx01_dzxyz20, 247 LLVMConstVector(shuffles, 4), ""); 248 249 res12 = LLVMBuildFSub(b, dyzzx01_dzxyz20, dzx01_dyz20, "res12"); 250 251 /* dzdx = fabsf(res1 * inv_det), dydx = fabsf(res2 * inv_det)*/ 252 dzdxdzdy = LLVMBuildFMul(b, res12, inv_det, "dzdxdzdy"); 253 dzdxdzdy = lp_build_abs(bld, dzdxdzdy); 254 255 dzdx = LLVMBuildExtractElement(b, dzdxdzdy, zeroi, ""); 256 dzdy = LLVMBuildExtractElement(b, dzdxdzdy, onei, ""); 257 258 /* mult = MAX2(dzdx, dzdy) * pgon_offset_scale */ 259 max = LLVMBuildFCmp(b, LLVMRealUGT, dzdx, dzdy, ""); 260 max_value = LLVMBuildSelect(b, max, dzdx, dzdy, "max"); 261 262 mult = LLVMBuildFMul(b, max_value, 263 lp_build_const_float(gallivm, key->pgon_offset_scale), ""); 264 265 lp_build_context_init(&flt_scalar_bld, gallivm, lp_type_float_vec(32, 32)); 266 267 if (key->floating_point_depth) { 268 /* 269 * bias = pgon_offset_units * 2^(exponent(max(abs(z0), abs(z1), abs(z2))) - 270 * mantissa_bits) + MAX2(dzdx, dzdy) * pgon_offset_scale 271 * 272 * NOTE: Assumes IEEE float32. 273 */ 274 LLVMValueRef c23_shifted, exp_mask, bias, exp; 275 LLVMValueRef maxz_value, maxz0z1_value; 276 277 lp_build_context_init(&int_scalar_bld, gallivm, lp_type_int_vec(32, 32)); 278 279 c23_shifted = lp_build_const_int32(gallivm, 23 << 23); 280 exp_mask = lp_build_const_int32(gallivm, 0xff << 23); 281 282 maxz0z1_value = lp_build_max(&flt_scalar_bld, 283 lp_build_abs(&flt_scalar_bld, 284 LLVMBuildExtractElement(b, attribv[0], twoi, "")), 285 lp_build_abs(&flt_scalar_bld, 286 LLVMBuildExtractElement(b, attribv[1], twoi, ""))); 287 288 maxz_value = lp_build_max(&flt_scalar_bld, 289 lp_build_abs(&flt_scalar_bld, 290 LLVMBuildExtractElement(b, attribv[2], twoi, "")), 291 maxz0z1_value); 292 293 exp = LLVMBuildBitCast(b, maxz_value, int_scalar_bld.vec_type, ""); 294 exp = lp_build_and(&int_scalar_bld, exp, exp_mask); 295 exp = lp_build_sub(&int_scalar_bld, exp, c23_shifted); 296 /* Clamping to zero means mrd will be zero for very small numbers, 297 * but specs do not indicate this should be prevented by clamping 298 * mrd to smallest normal number instead. */ 299 exp = lp_build_max(&int_scalar_bld, exp, int_scalar_bld.zero); 300 exp = LLVMBuildBitCast(b, exp, flt_scalar_bld.vec_type, ""); 301 302 bias = LLVMBuildFMul(b, exp, 303 lp_build_const_float(gallivm, key->pgon_offset_units), 304 "bias"); 305 306 zoffset = LLVMBuildFAdd(b, bias, mult, "zoffset"); 307 } else { 308 /* 309 * bias = pgon_offset_units + MAX2(dzdx, dzdy) * pgon_offset_scale 310 */ 311 zoffset = LLVMBuildFAdd(b, 312 lp_build_const_float(gallivm, key->pgon_offset_units), 313 mult, "zoffset"); 314 } 315 316 if (key->pgon_offset_clamp > 0) { 317 zoffset = lp_build_min(&flt_scalar_bld, 318 lp_build_const_float(gallivm, key->pgon_offset_clamp), 319 zoffset); 320 } 321 else if (key->pgon_offset_clamp < 0) { 322 zoffset = lp_build_max(&flt_scalar_bld, 323 lp_build_const_float(gallivm, key->pgon_offset_clamp), 324 zoffset); 325 } 326 327 return zoffset; 328} 329 330static void 331load_attribute(struct gallivm_state *gallivm, 332 struct lp_setup_args *args, 333 const struct lp_setup_variant_key *key, 334 unsigned vert_attr, 335 LLVMValueRef attribv[3]) 336{ 337 LLVMBuilderRef b = gallivm->builder; 338 LLVMValueRef idx = lp_build_const_int32(gallivm, vert_attr); 339 340 /* Load the vertex data 341 */ 342 attribv[0] = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx, 1, ""), "v0a"); 343 attribv[1] = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx, 1, ""), "v1a"); 344 attribv[2] = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx, 1, ""), "v2a"); 345 346 347 /* Potentially modify it according to twoside, etc: 348 */ 349 if (key->twoside) { 350 if (vert_attr == key->color_slot && key->bcolor_slot >= 0) 351 lp_twoside(gallivm, args, key, key->bcolor_slot, attribv); 352 else if (vert_attr == key->spec_slot && key->bspec_slot >= 0) 353 lp_twoside(gallivm, args, key, key->bspec_slot, attribv); 354 } 355} 356 357/* 358 * FIXME: interpolation is always done wrt fb origin (0/0). 359 * However, if some (small) tri is far away from the origin and gradients 360 * are large, this can lead to HUGE errors, since the a0 value calculated 361 * here can get very large (with the actual values inside the triangle way 362 * smaller), leading to complete loss of accuracy. This could be prevented 363 * by using some point inside (or at corner) of the tri as interpolation 364 * origin, or just use barycentric interpolation (which GL suggests and is 365 * what real hw does - you can get the barycentric coordinates from the 366 * edge functions in rasterization in principle (though we skip these 367 * sometimes completely in case of tris covering a block fully, 368 * which obviously wouldn't work)). 369 */ 370static void 371calc_coef4( struct gallivm_state *gallivm, 372 struct lp_setup_args *args, 373 LLVMValueRef a0, 374 LLVMValueRef a1, 375 LLVMValueRef a2, 376 LLVMValueRef out[3]) 377{ 378 LLVMBuilderRef b = gallivm->builder; 379 LLVMValueRef attr_0; 380 LLVMValueRef dy20_ooa = args->dy20_ooa; 381 LLVMValueRef dy01_ooa = args->dy01_ooa; 382 LLVMValueRef dx20_ooa = args->dx20_ooa; 383 LLVMValueRef dx01_ooa = args->dx01_ooa; 384 LLVMValueRef x0_center = args->x0_center; 385 LLVMValueRef y0_center = args->y0_center; 386 LLVMValueRef da01 = LLVMBuildFSub(b, a0, a1, "da01"); 387 LLVMValueRef da20 = LLVMBuildFSub(b, a2, a0, "da20"); 388 389 /* Calculate dadx (vec4f) 390 */ 391 LLVMValueRef da01_dy20_ooa = LLVMBuildFMul(b, da01, dy20_ooa, "da01_dy20_ooa"); 392 LLVMValueRef da20_dy01_ooa = LLVMBuildFMul(b, da20, dy01_ooa, "da20_dy01_ooa"); 393 LLVMValueRef dadx = LLVMBuildFSub(b, da01_dy20_ooa, da20_dy01_ooa, "dadx"); 394 395 /* Calculate dady (vec4f) 396 */ 397 LLVMValueRef da01_dx20_ooa = LLVMBuildFMul(b, da01, dx20_ooa, "da01_dx20_ooa"); 398 LLVMValueRef da20_dx01_ooa = LLVMBuildFMul(b, da20, dx01_ooa, "da20_dx01_ooa"); 399 LLVMValueRef dady = LLVMBuildFSub(b, da20_dx01_ooa, da01_dx20_ooa, "dady"); 400 401 /* Calculate a0 - the attribute value at the origin 402 */ 403 LLVMValueRef dadx_x0 = LLVMBuildFMul(b, dadx, x0_center, "dadx_x0"); 404 LLVMValueRef dady_y0 = LLVMBuildFMul(b, dady, y0_center, "dady_y0"); 405 LLVMValueRef attr_v0 = LLVMBuildFAdd(b, dadx_x0, dady_y0, "attr_v0"); 406 attr_0 = LLVMBuildFSub(b, a0, attr_v0, "attr_0"); 407 408 out[0] = attr_0; 409 out[1] = dadx; 410 out[2] = dady; 411} 412 413static void 414emit_coef4( struct gallivm_state *gallivm, 415 struct lp_setup_args *args, 416 unsigned slot, 417 LLVMValueRef a0, 418 LLVMValueRef a1, 419 LLVMValueRef a2) 420{ 421 LLVMValueRef coeffs[3]; 422 calc_coef4(gallivm, args, a0, a1, a2, coeffs); 423 store_coef(gallivm, args, slot, 424 coeffs[0], coeffs[1], coeffs[2]); 425} 426 427 428static void 429emit_linear_coef( struct gallivm_state *gallivm, 430 struct lp_setup_args *args, 431 unsigned slot, 432 LLVMValueRef attribv[3]) 433{ 434 /* nothing to do anymore */ 435 emit_coef4(gallivm, 436 args, slot, 437 attribv[0], 438 attribv[1], 439 attribv[2]); 440} 441 442 443/** 444 * Compute a0, dadx and dady for a perspective-corrected interpolant, 445 * for a triangle. 446 * We basically multiply the vertex value by 1/w before computing 447 * the plane coefficients (a0, dadx, dady). 448 * Later, when we compute the value at a particular fragment position we'll 449 * divide the interpolated value by the interpolated W at that fragment. 450 */ 451static void 452apply_perspective_corr( struct gallivm_state *gallivm, 453 struct lp_setup_args *args, 454 unsigned slot, 455 LLVMValueRef attribv[3]) 456{ 457 LLVMBuilderRef b = gallivm->builder; 458 459 /* premultiply by 1/w (v[0][3] is always 1/w): 460 */ 461 LLVMValueRef v0_oow = lp_build_broadcast_scalar(&args->bld, 462 vert_attrib(gallivm, args->v0, 0, 3, "v0_oow")); 463 LLVMValueRef v1_oow = lp_build_broadcast_scalar(&args->bld, 464 vert_attrib(gallivm, args->v1, 0, 3, "v1_oow")); 465 LLVMValueRef v2_oow = lp_build_broadcast_scalar(&args->bld, 466 vert_attrib(gallivm, args->v2, 0, 3, "v2_oow")); 467 468 attribv[0] = LLVMBuildFMul(b, attribv[0], v0_oow, "v0_oow_v0a"); 469 attribv[1] = LLVMBuildFMul(b, attribv[1], v1_oow, "v1_oow_v1a"); 470 attribv[2] = LLVMBuildFMul(b, attribv[2], v2_oow, "v2_oow_v2a"); 471} 472 473 474/** 475 * Compute the inputs-> dadx, dady, a0 values. 476 */ 477static void 478emit_tri_coef( struct gallivm_state *gallivm, 479 const struct lp_setup_variant_key *key, 480 struct lp_setup_args *args) 481{ 482 unsigned slot; 483 484 LLVMValueRef attribs[3]; 485 486 /* setup interpolation for all the remaining attributes: 487 */ 488 for (slot = 0; slot < key->num_inputs; slot++) { 489 switch (key->inputs[slot].interp) { 490 case LP_INTERP_CONSTANT: 491 load_attribute(gallivm, args, key, key->inputs[slot].src_index, attribs); 492 if (key->flatshade_first) { 493 emit_constant_coef4(gallivm, args, slot+1, attribs[0]); 494 } 495 else { 496 emit_constant_coef4(gallivm, args, slot+1, attribs[2]); 497 } 498 break; 499 500 case LP_INTERP_LINEAR: 501 load_attribute(gallivm, args, key, key->inputs[slot].src_index, attribs); 502 emit_linear_coef(gallivm, args, slot+1, attribs); 503 break; 504 505 case LP_INTERP_PERSPECTIVE: 506 load_attribute(gallivm, args, key, key->inputs[slot].src_index, attribs); 507 apply_perspective_corr(gallivm, args, slot+1, attribs); 508 emit_linear_coef(gallivm, args, slot+1, attribs); 509 break; 510 511 case LP_INTERP_POSITION: 512 /* 513 * The generated pixel interpolators will pick up the coeffs from 514 * slot 0. 515 */ 516 break; 517 518 case LP_INTERP_FACING: 519 emit_facing_coef(gallivm, args, slot+1); 520 break; 521 522 default: 523 assert(0); 524 } 525 } 526} 527 528 529/* XXX: generic code: 530 */ 531static void 532set_noalias(LLVMBuilderRef builder, 533 LLVMValueRef function, 534 const LLVMTypeRef *arg_types, 535 int nr_args) 536{ 537 int i; 538 for(i = 0; i < nr_args; ++i) 539 if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) 540 lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS); 541} 542 543static void 544init_args(struct gallivm_state *gallivm, 545 const struct lp_setup_variant_key *key, 546 struct lp_setup_args *args) 547{ 548 LLVMBuilderRef b = gallivm->builder; 549 LLVMTypeRef shuf_type = LLVMInt32TypeInContext(gallivm->context); 550 LLVMValueRef onef = lp_build_const_float(gallivm, 1.0); 551 LLVMValueRef onei = lp_build_const_int32(gallivm, 1); 552 LLVMValueRef zeroi = lp_build_const_int32(gallivm, 0); 553 LLVMValueRef pixel_center, xy0_center, dxy01, dxy20, dyx20; 554 LLVMValueRef e, f, ef, ooa; 555 LLVMValueRef shuffles[4], shuf10; 556 LLVMValueRef attr_pos[3]; 557 LLVMValueRef polygon_offset; 558 struct lp_type typef4 = lp_type_float_vec(32, 128); 559 struct lp_build_context bld; 560 561 lp_build_context_init(&bld, gallivm, typef4); 562 args->bld = bld; 563 564 /* The internal position input is in slot zero: 565 */ 566 load_attribute(gallivm, args, key, 0, attr_pos); 567 568 pixel_center = lp_build_const_vec(gallivm, typef4, 569 (!key->multisample && key->pixel_center_half) ? 0.5 : 0.0); 570 571 /* 572 * xy are first two elems in v0a/v1a/v2a but just use vec4 arit 573 * also offset_tri uses actually xyz in them 574 */ 575 xy0_center = LLVMBuildFSub(b, attr_pos[0], pixel_center, "xy0_center" ); 576 577 dxy01 = LLVMBuildFSub(b, attr_pos[0], attr_pos[1], "dxy01"); 578 dxy20 = LLVMBuildFSub(b, attr_pos[2], attr_pos[0], "dxy20"); 579 580 shuffles[0] = onei; 581 shuffles[1] = zeroi; 582 shuffles[2] = LLVMGetUndef(shuf_type); 583 shuffles[3] = LLVMGetUndef(shuf_type); 584 shuf10 = LLVMConstVector(shuffles, 4); 585 586 dyx20 = LLVMBuildShuffleVector(b, dxy20, dxy20, shuf10, ""); 587 588 ef = LLVMBuildFMul(b, dxy01, dyx20, "ef"); 589 e = LLVMBuildExtractElement(b, ef, zeroi, ""); 590 f = LLVMBuildExtractElement(b, ef, onei, ""); 591 592 ooa = LLVMBuildFDiv(b, onef, LLVMBuildFSub(b, e, f, ""), "ooa"); 593 594 ooa = lp_build_broadcast_scalar(&bld, ooa); 595 596 /* tri offset calc shares a lot of arithmetic, do it here */ 597 if (key->pgon_offset_scale != 0.0f || key->pgon_offset_units != 0.0f) { 598 polygon_offset = lp_do_offset_tri(gallivm, args, key, ooa, dxy01, dxy20, attr_pos); 599 } else { 600 polygon_offset = lp_build_const_float(gallivm, 0.0f); 601 } 602 603 dxy20 = LLVMBuildFMul(b, dxy20, ooa, ""); 604 dxy01 = LLVMBuildFMul(b, dxy01, ooa, ""); 605 606 args->dy20_ooa = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy20, onei); 607 args->dy01_ooa = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy01, onei); 608 609 args->dx20_ooa = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy20, zeroi); 610 args->dx01_ooa = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy01, zeroi); 611 612 args->x0_center = lp_build_extract_broadcast(gallivm, typef4, typef4, xy0_center, zeroi); 613 args->y0_center = lp_build_extract_broadcast(gallivm, typef4, typef4, xy0_center, onei); 614 615 LLVMValueRef coeffs[3]; 616 calc_coef4(gallivm, args, 617 attr_pos[0], attr_pos[1], attr_pos[2], 618 coeffs); 619 620 /* This is a bit sneaky: 621 * Because we observe that the X component of A0 is otherwise unused, 622 * we can overwrite it with the computed polygon-offset value, to make 623 * sure it's available in the fragment shader without having to change 624 * the interface (which is error-prone). 625 */ 626 coeffs[0] = LLVMBuildInsertElement(b, coeffs[0], polygon_offset, 627 lp_build_const_int32(gallivm, 0), ""); 628 629 store_coef(gallivm, args, 0, 630 coeffs[0], coeffs[1], coeffs[2]); 631} 632 633/** 634 * Generate the runtime callable function for the coefficient calculation. 635 * 636 */ 637static struct lp_setup_variant * 638generate_setup_variant(struct lp_setup_variant_key *key, 639 struct llvmpipe_context *lp) 640{ 641 struct lp_setup_variant *variant = NULL; 642 struct gallivm_state *gallivm; 643 struct lp_setup_args args; 644 char func_name[64]; 645 LLVMTypeRef vec4f_type; 646 LLVMTypeRef func_type; 647 LLVMTypeRef arg_types[8]; 648 LLVMBasicBlockRef block; 649 LLVMBuilderRef builder; 650 int64_t t0 = 0, t1; 651 652 if (0) 653 goto fail; 654 655 variant = CALLOC_STRUCT(lp_setup_variant); 656 if (!variant) 657 goto fail; 658 659 variant->no = setup_no++; 660 661 snprintf(func_name, sizeof(func_name), "setup_variant_%u", 662 variant->no); 663 664 variant->gallivm = gallivm = gallivm_create(func_name, lp->context, NULL); 665 if (!variant->gallivm) { 666 goto fail; 667 } 668 669 builder = gallivm->builder; 670 671 if (LP_DEBUG & DEBUG_COUNTERS) { 672 t0 = os_time_get(); 673 } 674 675 memcpy(&variant->key, key, key->size); 676 variant->list_item_global.base = variant; 677 678 /* Currently always deal with full 4-wide vertex attributes from 679 * the vertices. 680 */ 681 682 vec4f_type = LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4); 683 684 arg_types[0] = LLVMPointerType(vec4f_type, 0); /* v0 */ 685 arg_types[1] = LLVMPointerType(vec4f_type, 0); /* v1 */ 686 arg_types[2] = LLVMPointerType(vec4f_type, 0); /* v2 */ 687 arg_types[3] = LLVMInt32TypeInContext(gallivm->context); /* facing */ 688 arg_types[4] = LLVMPointerType(vec4f_type, 0); /* a0, aligned */ 689 arg_types[5] = LLVMPointerType(vec4f_type, 0); /* dadx, aligned */ 690 arg_types[6] = LLVMPointerType(vec4f_type, 0); /* dady, aligned */ 691 arg_types[7] = LLVMPointerType(vec4f_type, 0); /* key (placeholder) */ 692 693 func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context), 694 arg_types, ARRAY_SIZE(arg_types), 0); 695 696 variant->function = LLVMAddFunction(gallivm->module, func_name, func_type); 697 if (!variant->function) 698 goto fail; 699 700 LLVMSetFunctionCallConv(variant->function, LLVMCCallConv); 701 702 args.v0 = LLVMGetParam(variant->function, 0); 703 args.v1 = LLVMGetParam(variant->function, 1); 704 args.v2 = LLVMGetParam(variant->function, 2); 705 args.facing = LLVMGetParam(variant->function, 3); 706 args.a0 = LLVMGetParam(variant->function, 4); 707 args.dadx = LLVMGetParam(variant->function, 5); 708 args.dady = LLVMGetParam(variant->function, 6); 709 args.key = LLVMGetParam(variant->function, 7); 710 711 lp_build_name(args.v0, "in_v0"); 712 lp_build_name(args.v1, "in_v1"); 713 lp_build_name(args.v2, "in_v2"); 714 lp_build_name(args.facing, "in_facing"); 715 lp_build_name(args.a0, "out_a0"); 716 lp_build_name(args.dadx, "out_dadx"); 717 lp_build_name(args.dady, "out_dady"); 718 lp_build_name(args.key, "key"); 719 720 /* 721 * Function body 722 */ 723 block = LLVMAppendBasicBlockInContext(gallivm->context, 724 variant->function, "entry"); 725 LLVMPositionBuilderAtEnd(builder, block); 726 727 set_noalias(builder, variant->function, arg_types, ARRAY_SIZE(arg_types)); 728 init_args(gallivm, &variant->key, &args); 729 emit_tri_coef(gallivm, &variant->key, &args); 730 731 LLVMBuildRetVoid(builder); 732 733 gallivm_verify_function(gallivm, variant->function); 734 735 gallivm_compile_module(gallivm); 736 737 variant->jit_function = (lp_jit_setup_triangle) 738 gallivm_jit_function(gallivm, variant->function); 739 if (!variant->jit_function) 740 goto fail; 741 742 gallivm_free_ir(variant->gallivm); 743 744 /* 745 * Update timing information: 746 */ 747 if (LP_DEBUG & DEBUG_COUNTERS) { 748 t1 = os_time_get(); 749 LP_COUNT_ADD(llvm_compile_time, t1 - t0); 750 LP_COUNT_ADD(nr_llvm_compiles, 1); 751 } 752 753 return variant; 754 755fail: 756 if (variant) { 757 if (variant->gallivm) { 758 gallivm_destroy(variant->gallivm); 759 } 760 FREE(variant); 761 } 762 763 return NULL; 764} 765 766 767 768static void 769lp_make_setup_variant_key(struct llvmpipe_context *lp, 770 struct lp_setup_variant_key *key) 771{ 772 struct lp_fragment_shader *fs = lp->fs; 773 unsigned i; 774 775 assert(sizeof key->inputs[0] == sizeof(uint)); 776 777 key->num_inputs = fs->info.base.num_inputs; 778 key->flatshade_first = lp->rasterizer->flatshade_first; 779 key->pixel_center_half = lp->rasterizer->half_pixel_center; 780 key->multisample = lp->rasterizer->multisample; 781 key->twoside = lp->rasterizer->light_twoside; 782 key->size = Offset(struct lp_setup_variant_key, 783 inputs[key->num_inputs]); 784 785 key->color_slot = lp->color_slot[0]; 786 key->bcolor_slot = lp->bcolor_slot[0]; 787 key->spec_slot = lp->color_slot[1]; 788 key->bspec_slot = lp->bcolor_slot[1]; 789 790 /* 791 * If depth is floating point, depth bias is calculated with respect 792 * to the primitive's maximum Z value. Retain the original depth bias 793 * value until that stage. 794 */ 795 key->floating_point_depth = lp->floating_point_depth; 796 797 if (key->floating_point_depth) { 798 key->pgon_offset_units = (float) lp->rasterizer->offset_units; 799 } else { 800 key->pgon_offset_units = 801 (float) (lp->rasterizer->offset_units * lp->mrd * 2); 802 } 803 804 key->pgon_offset_scale = lp->rasterizer->offset_scale; 805 key->pgon_offset_clamp = lp->rasterizer->offset_clamp; 806 key->uses_constant_interp = 0; 807 key->pad = 0; 808 memcpy(key->inputs, fs->inputs, key->num_inputs * sizeof key->inputs[0]); 809 for (i = 0; i < key->num_inputs; i++) { 810 if (key->inputs[i].interp == LP_INTERP_COLOR) { 811 if (lp->rasterizer->flatshade) 812 key->inputs[i].interp = LP_INTERP_CONSTANT; 813 else 814 key->inputs[i].interp = LP_INTERP_PERSPECTIVE; 815 } 816 if (key->inputs[i].interp == LP_INTERP_CONSTANT) { 817 key->uses_constant_interp = 1; 818 } 819 } 820} 821 822 823static void 824remove_setup_variant(struct llvmpipe_context *lp, 825 struct lp_setup_variant *variant) 826{ 827 if (gallivm_debug & GALLIVM_DEBUG_IR) { 828 debug_printf("llvmpipe: del setup_variant #%u total %u\n", 829 variant->no, lp->nr_setup_variants); 830 } 831 832 if (variant->gallivm) { 833 gallivm_destroy(variant->gallivm); 834 } 835 836 remove_from_list(&variant->list_item_global); 837 lp->nr_setup_variants--; 838 FREE(variant); 839} 840 841 842 843/* When the number of setup variants exceeds a threshold, cull a 844 * fraction (currently a quarter) of them. 845 */ 846static void 847cull_setup_variants(struct llvmpipe_context *lp) 848{ 849 struct pipe_context *pipe = &lp->pipe; 850 int i; 851 852 /* 853 * XXX: we need to flush the context until we have some sort of reference 854 * counting in fragment shaders as they may still be binned 855 * Flushing alone might not be sufficient we need to wait on it too. 856 */ 857 llvmpipe_finish(pipe, __FUNCTION__); 858 859 for (i = 0; i < LP_MAX_SETUP_VARIANTS / 4; i++) { 860 struct lp_setup_variant_list_item *item; 861 if (is_empty_list(&lp->setup_variants_list)) { 862 break; 863 } 864 item = last_elem(&lp->setup_variants_list); 865 assert(item); 866 assert(item->base); 867 remove_setup_variant(lp, item->base); 868 } 869} 870 871 872/** 873 * Update fragment/vertex shader linkage state. This is called just 874 * prior to drawing something when some fragment-related state has 875 * changed. 876 */ 877void 878llvmpipe_update_setup(struct llvmpipe_context *lp) 879{ 880 struct lp_setup_variant_key *key = &lp->setup_variant.key; 881 struct lp_setup_variant *variant = NULL; 882 struct lp_setup_variant_list_item *li; 883 884 lp_make_setup_variant_key(lp, key); 885 886 foreach(li, &lp->setup_variants_list) { 887 if(li->base->key.size == key->size && 888 memcmp(&li->base->key, key, key->size) == 0) { 889 variant = li->base; 890 break; 891 } 892 } 893 894 if (variant) { 895 move_to_head(&lp->setup_variants_list, &variant->list_item_global); 896 } 897 else { 898 if (lp->nr_setup_variants >= LP_MAX_SETUP_VARIANTS) { 899 cull_setup_variants(lp); 900 } 901 902 variant = generate_setup_variant(key, lp); 903 if (variant) { 904 insert_at_head(&lp->setup_variants_list, &variant->list_item_global); 905 lp->nr_setup_variants++; 906 } 907 } 908 909 lp_setup_set_setup_variant(lp->setup, variant); 910} 911 912void 913lp_delete_setup_variants(struct llvmpipe_context *lp) 914{ 915 struct lp_setup_variant_list_item *li; 916 li = first_elem(&lp->setup_variants_list); 917 while(!at_end(&lp->setup_variants_list, li)) { 918 struct lp_setup_variant_list_item *next = next_elem(li); 919 remove_setup_variant(lp, li->base); 920 li = next; 921 } 922} 923 924void 925lp_dump_setup_coef(const struct lp_setup_variant_key *key, 926 const float (*sa0)[4], 927 const float (*sdadx)[4], 928 const float (*sdady)[4]) 929{ 930 int i, slot; 931 932 for (i = 0; i < TGSI_NUM_CHANNELS; i++) { 933 float a0 = sa0 [0][i]; 934 float dadx = sdadx[0][i]; 935 float dady = sdady[0][i]; 936 937 debug_printf("POS.%c: a0 = %f, dadx = %f, dady = %f\n", 938 "xyzw"[i], a0, dadx, dady); 939 } 940 941 for (slot = 0; slot < key->num_inputs; slot++) { 942 unsigned usage_mask = key->inputs[slot].usage_mask; 943 for (i = 0; i < TGSI_NUM_CHANNELS; i++) { 944 if (usage_mask & (1 << i)) { 945 float a0 = sa0 [1 + slot][i]; 946 float dadx = sdadx[1 + slot][i]; 947 float dady = sdady[1 + slot][i]; 948 949 debug_printf("IN[%u].%c: a0 = %f, dadx = %f, dady = %f\n", 950 slot, "xyzw"[i], a0, dadx, dady); 951 } 952 } 953 } 954} 955