1/* 2 * Copyright © 2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24#include "ir.h" 25#include "ir_builder.h" 26#include "ir_optimization.h" 27#include "ir_hierarchical_visitor.h" 28#include "program/prog_instruction.h" 29#include "program/prog_statevars.h" 30#include "util/bitscan.h" 31#include "builtin_functions.h" 32#include "main/mtypes.h" 33 34using namespace ir_builder; 35 36#define imm1(x) new(mem_ctx) ir_constant((float) (x), 1) 37#define imm3(x) new(mem_ctx) ir_constant((float) (x), 3) 38 39static ir_rvalue * 40blend_multiply(ir_variable *src, ir_variable *dst) 41{ 42 /* f(Cs,Cd) = Cs*Cd */ 43 return mul(src, dst); 44} 45 46static ir_rvalue * 47blend_screen(ir_variable *src, ir_variable *dst) 48{ 49 /* f(Cs,Cd) = Cs+Cd-Cs*Cd */ 50 return sub(add(src, dst), mul(src, dst)); 51} 52 53static ir_rvalue * 54blend_overlay(ir_variable *src, ir_variable *dst) 55{ 56 void *mem_ctx = ralloc_parent(src); 57 58 /* f(Cs,Cd) = 2*Cs*Cd, if Cd <= 0.5 59 * 1-2*(1-Cs)*(1-Cd), otherwise 60 */ 61 ir_rvalue *rule_1 = mul(imm3(2), mul(src, dst)); 62 ir_rvalue *rule_2 = 63 sub(imm3(1), mul(imm3(2), mul(sub(imm3(1), src), sub(imm3(1), dst)))); 64 return csel(lequal(dst, imm3(0.5f)), rule_1, rule_2); 65} 66 67static ir_rvalue * 68blend_darken(ir_variable *src, ir_variable *dst) 69{ 70 /* f(Cs,Cd) = min(Cs,Cd) */ 71 return min2(src, dst); 72} 73 74static ir_rvalue * 75blend_lighten(ir_variable *src, ir_variable *dst) 76{ 77 /* f(Cs,Cd) = max(Cs,Cd) */ 78 return max2(src, dst); 79} 80 81static ir_rvalue * 82blend_colordodge(ir_variable *src, ir_variable *dst) 83{ 84 void *mem_ctx = ralloc_parent(src); 85 86 /* f(Cs,Cd) = 87 * 0, if Cd <= 0 88 * min(1,Cd/(1-Cs)), if Cd > 0 and Cs < 1 89 * 1, if Cd > 0 and Cs >= 1 90 */ 91 return csel(lequal(dst, imm3(0)), imm3(0), 92 csel(gequal(src, imm3(1)), imm3(1), 93 min2(imm3(1), div(dst, sub(imm3(1), src))))); 94} 95 96static ir_rvalue * 97blend_colorburn(ir_variable *src, ir_variable *dst) 98{ 99 void *mem_ctx = ralloc_parent(src); 100 101 /* f(Cs,Cd) = 102 * 1, if Cd >= 1 103 * 1 - min(1,(1-Cd)/Cs), if Cd < 1 and Cs > 0 104 * 0, if Cd < 1 and Cs <= 0 105 */ 106 return csel(gequal(dst, imm3(1)), imm3(1), 107 csel(lequal(src, imm3(0)), imm3(0), 108 sub(imm3(1), min2(imm3(1), div(sub(imm3(1), dst), src))))); 109} 110 111static ir_rvalue * 112blend_hardlight(ir_variable *src, ir_variable *dst) 113{ 114 void *mem_ctx = ralloc_parent(src); 115 116 /* f(Cs,Cd) = 2*Cs*Cd, if Cs <= 0.5 117 * 1-2*(1-Cs)*(1-Cd), otherwise 118 */ 119 ir_rvalue *rule_1 = mul(imm3(2), mul(src, dst)); 120 ir_rvalue *rule_2 = 121 sub(imm3(1), mul(imm3(2), mul(sub(imm3(1), src), sub(imm3(1), dst)))); 122 return csel(lequal(src, imm3(0.5f)), rule_1, rule_2); 123} 124 125static ir_rvalue * 126blend_softlight(ir_variable *src, ir_variable *dst) 127{ 128 void *mem_ctx = ralloc_parent(src); 129 130 /* f(Cs,Cd) = 131 * Cd-(1-2*Cs)*Cd*(1-Cd), 132 * if Cs <= 0.5 133 * Cd+(2*Cs-1)*Cd*((16*Cd-12)*Cd+3), 134 * if Cs > 0.5 and Cd <= 0.25 135 * Cd+(2*Cs-1)*(sqrt(Cd)-Cd), 136 * if Cs > 0.5 and Cd > 0.25 137 * 138 * We can simplify this to 139 * 140 * f(Cs,Cd) = Cd+(2*Cs-1)*g(Cs,Cd) where 141 * g(Cs,Cd) = Cd*Cd-Cd if Cs <= 0.5 142 * Cd*((16*Cd-12)*Cd+3) if Cs > 0.5 and Cd <= 0.25 143 * sqrt(Cd)-Cd, otherwise 144 */ 145 ir_rvalue *factor_1 = mul(dst, sub(imm3(1), dst)); 146 ir_rvalue *factor_2 = 147 mul(dst, add(mul(sub(mul(imm3(16), dst), imm3(12)), dst), imm3(3))); 148 ir_rvalue *factor_3 = sub(sqrt(dst), dst); 149 ir_rvalue *factor = csel(lequal(src, imm3(0.5f)), factor_1, 150 csel(lequal(dst, imm3(0.25f)), 151 factor_2, factor_3)); 152 return add(dst, mul(sub(mul(imm3(2), src), imm3(1)), factor)); 153} 154 155static ir_rvalue * 156blend_difference(ir_variable *src, ir_variable *dst) 157{ 158 return abs(sub(dst, src)); 159} 160 161static ir_rvalue * 162blend_exclusion(ir_variable *src, ir_variable *dst) 163{ 164 void *mem_ctx = ralloc_parent(src); 165 166 return add(src, sub(dst, mul(imm3(2), mul(src, dst)))); 167} 168 169/* Return the minimum of a vec3's components */ 170static ir_rvalue * 171minv3(ir_variable *v) 172{ 173 return min2(min2(swizzle_x(v), swizzle_y(v)), swizzle_z(v)); 174} 175 176/* Return the maximum of a vec3's components */ 177static ir_rvalue * 178maxv3(ir_variable *v) 179{ 180 return max2(max2(swizzle_x(v), swizzle_y(v)), swizzle_z(v)); 181} 182 183static ir_rvalue * 184lumv3(ir_variable *c) 185{ 186 ir_constant_data data; 187 data.f[0] = 0.30; 188 data.f[1] = 0.59; 189 data.f[2] = 0.11; 190 191 void *mem_ctx = ralloc_parent(c); 192 193 /* dot(c, vec3(0.30, 0.59, 0.11)) */ 194 return dot(c, new(mem_ctx) ir_constant(glsl_type::vec3_type, &data)); 195} 196 197static ir_rvalue * 198satv3(ir_variable *c) 199{ 200 return sub(maxv3(c), minv3(c)); 201} 202 203/* Take the base RGB color <cbase> and override its luminosity with that 204 * of the RGB color <clum>. 205 * 206 * This follows the equations given in the ES 3.2 (June 15th, 2016) 207 * specification. Revision 16 of GL_KHR_blend_equation_advanced and 208 * revision 9 of GL_NV_blend_equation_advanced specify a different set 209 * of equations. Older revisions match ES 3.2's text, and dEQP expects 210 * the ES 3.2 rules implemented here. 211 */ 212static void 213set_lum(ir_factory *f, 214 ir_variable *color, 215 ir_variable *cbase, 216 ir_variable *clum) 217{ 218 void *mem_ctx = f->mem_ctx; 219 f->emit(assign(color, add(cbase, sub(lumv3(clum), lumv3(cbase))))); 220 221 ir_variable *llum = f->make_temp(glsl_type::float_type, "__blend_lum"); 222 ir_variable *mincol = f->make_temp(glsl_type::float_type, "__blend_mincol"); 223 ir_variable *maxcol = f->make_temp(glsl_type::float_type, "__blend_maxcol"); 224 225 f->emit(assign(llum, lumv3(color))); 226 f->emit(assign(mincol, minv3(color))); 227 f->emit(assign(maxcol, maxv3(color))); 228 229 f->emit(if_tree(less(mincol, imm1(0)), 230 assign(color, add(llum, div(mul(sub(color, llum), llum), 231 sub(llum, mincol)))), 232 if_tree(greater(maxcol, imm1(1)), 233 assign(color, add(llum, div(mul(sub(color, llum), 234 sub(imm3(1), llum)), 235 sub(maxcol, llum))))))); 236 237} 238 239/* Take the base RGB color <cbase> and override its saturation with 240 * that of the RGB color <csat>. The override the luminosity of the 241 * result with that of the RGB color <clum>. 242 */ 243static void 244set_lum_sat(ir_factory *f, 245 ir_variable *color, 246 ir_variable *cbase, 247 ir_variable *csat, 248 ir_variable *clum) 249{ 250 void *mem_ctx = f->mem_ctx; 251 252 ir_rvalue *minbase = minv3(cbase); 253 ir_rvalue *ssat = satv3(csat); 254 255 ir_variable *sbase = f->make_temp(glsl_type::float_type, "__blend_sbase"); 256 f->emit(assign(sbase, satv3(cbase))); 257 258 /* Equivalent (modulo rounding errors) to setting the 259 * smallest (R,G,B) component to 0, the largest to <ssat>, 260 * and interpolating the "middle" component based on its 261 * original value relative to the smallest/largest. 262 */ 263 f->emit(if_tree(greater(sbase, imm1(0)), 264 assign(color, div(mul(sub(cbase, minbase), ssat), sbase)), 265 assign(color, imm3(0)))); 266 set_lum(f, color, color, clum); 267} 268 269static ir_rvalue * 270is_mode(ir_variable *mode, enum gl_advanced_blend_mode q) 271{ 272 return equal(mode, new(ralloc_parent(mode)) ir_constant(unsigned(q))); 273} 274 275static ir_variable * 276calc_blend_result(ir_factory f, 277 ir_variable *mode, 278 ir_variable *fb, 279 ir_rvalue *blend_src, 280 GLbitfield blend_qualifiers) 281{ 282 void *mem_ctx = f.mem_ctx; 283 ir_variable *result = f.make_temp(glsl_type::vec4_type, "__blend_result"); 284 285 /* Save blend_src to a temporary so we can reference it multiple times. */ 286 ir_variable *src = f.make_temp(glsl_type::vec4_type, "__blend_src"); 287 f.emit(assign(src, blend_src)); 288 289 /* If we're not doing advanced blending, just write the original value. */ 290 ir_if *if_blending = new(mem_ctx) ir_if(is_mode(mode, BLEND_NONE)); 291 f.emit(if_blending); 292 if_blending->then_instructions.push_tail(assign(result, src)); 293 294 f.instructions = &if_blending->else_instructions; 295 296 /* (Rs', Gs', Bs') = 297 * (0, 0, 0), if As == 0 298 * (Rs/As, Gs/As, Bs/As), otherwise 299 */ 300 ir_variable *src_rgb = f.make_temp(glsl_type::vec3_type, "__blend_src_rgb"); 301 ir_variable *src_alpha = f.make_temp(glsl_type::float_type, "__blend_src_a"); 302 303 /* (Rd', Gd', Bd') = 304 * (0, 0, 0), if Ad == 0 305 * (Rd/Ad, Gd/Ad, Bd/Ad), otherwise 306 */ 307 ir_variable *dst_rgb = f.make_temp(glsl_type::vec3_type, "__blend_dst_rgb"); 308 ir_variable *dst_alpha = f.make_temp(glsl_type::float_type, "__blend_dst_a"); 309 310 f.emit(assign(dst_alpha, swizzle_w(fb))); 311 f.emit(if_tree(equal(dst_alpha, imm1(0)), 312 assign(dst_rgb, imm3(0)), 313 assign(dst_rgb, csel(equal(swizzle_xyz(fb), 314 swizzle(fb, SWIZZLE_WWWW, 3)), 315 imm3(1), 316 div(swizzle_xyz(fb), dst_alpha))))); 317 318 f.emit(assign(src_alpha, swizzle_w(src))); 319 f.emit(if_tree(equal(src_alpha, imm1(0)), 320 assign(src_rgb, imm3(0)), 321 assign(src_rgb, csel(equal(swizzle_xyz(src), 322 swizzle(src, SWIZZLE_WWWW, 3)), 323 imm3(1), 324 div(swizzle_xyz(src), src_alpha))))); 325 326 ir_variable *factor = f.make_temp(glsl_type::vec3_type, "__blend_factor"); 327 328 ir_factory casefactory = f; 329 330 unsigned choices = blend_qualifiers; 331 while (choices) { 332 enum gl_advanced_blend_mode choice = (enum gl_advanced_blend_mode) 333 (1u << u_bit_scan(&choices)); 334 335 ir_if *iff = new(mem_ctx) ir_if(is_mode(mode, choice)); 336 casefactory.emit(iff); 337 casefactory.instructions = &iff->then_instructions; 338 339 ir_rvalue *val = NULL; 340 341 switch (choice) { 342 case BLEND_MULTIPLY: 343 val = blend_multiply(src_rgb, dst_rgb); 344 break; 345 case BLEND_SCREEN: 346 val = blend_screen(src_rgb, dst_rgb); 347 break; 348 case BLEND_OVERLAY: 349 val = blend_overlay(src_rgb, dst_rgb); 350 break; 351 case BLEND_DARKEN: 352 val = blend_darken(src_rgb, dst_rgb); 353 break; 354 case BLEND_LIGHTEN: 355 val = blend_lighten(src_rgb, dst_rgb); 356 break; 357 case BLEND_COLORDODGE: 358 val = blend_colordodge(src_rgb, dst_rgb); 359 break; 360 case BLEND_COLORBURN: 361 val = blend_colorburn(src_rgb, dst_rgb); 362 break; 363 case BLEND_HARDLIGHT: 364 val = blend_hardlight(src_rgb, dst_rgb); 365 break; 366 case BLEND_SOFTLIGHT: 367 val = blend_softlight(src_rgb, dst_rgb); 368 break; 369 case BLEND_DIFFERENCE: 370 val = blend_difference(src_rgb, dst_rgb); 371 break; 372 case BLEND_EXCLUSION: 373 val = blend_exclusion(src_rgb, dst_rgb); 374 break; 375 case BLEND_HSL_HUE: 376 set_lum_sat(&casefactory, factor, src_rgb, dst_rgb, dst_rgb); 377 break; 378 case BLEND_HSL_SATURATION: 379 set_lum_sat(&casefactory, factor, dst_rgb, src_rgb, dst_rgb); 380 break; 381 case BLEND_HSL_COLOR: 382 set_lum(&casefactory, factor, src_rgb, dst_rgb); 383 break; 384 case BLEND_HSL_LUMINOSITY: 385 set_lum(&casefactory, factor, dst_rgb, src_rgb); 386 break; 387 case BLEND_NONE: 388 case BLEND_ALL: 389 unreachable("not real cases"); 390 } 391 392 if (val) 393 casefactory.emit(assign(factor, val)); 394 395 casefactory.instructions = &iff->else_instructions; 396 } 397 398 /* p0(As,Ad) = As*Ad 399 * p1(As,Ad) = As*(1-Ad) 400 * p2(As,Ad) = Ad*(1-As) 401 */ 402 ir_variable *p0 = f.make_temp(glsl_type::float_type, "__blend_p0"); 403 ir_variable *p1 = f.make_temp(glsl_type::float_type, "__blend_p1"); 404 ir_variable *p2 = f.make_temp(glsl_type::float_type, "__blend_p2"); 405 406 f.emit(assign(p0, mul(src_alpha, dst_alpha))); 407 f.emit(assign(p1, mul(src_alpha, sub(imm1(1), dst_alpha)))); 408 f.emit(assign(p2, mul(dst_alpha, sub(imm1(1), src_alpha)))); 409 410 /* R = f(Rs',Rd')*p0(As,Ad) + Y*Rs'*p1(As,Ad) + Z*Rd'*p2(As,Ad) 411 * G = f(Gs',Gd')*p0(As,Ad) + Y*Gs'*p1(As,Ad) + Z*Gd'*p2(As,Ad) 412 * B = f(Bs',Bd')*p0(As,Ad) + Y*Bs'*p1(As,Ad) + Z*Bd'*p2(As,Ad) 413 * A = X*p0(As,Ad) + Y*p1(As,Ad) + Z*p2(As,Ad) 414 * 415 * <X, Y, Z> is always <1, 1, 1>, so we can ignore it. 416 * 417 * In vector form, this is: 418 * RGB = factor * p0 + Cs * p1 + Cd * p2 419 * A = p0 + p1 + p2 420 */ 421 f.emit(assign(result, 422 add(add(mul(factor, p0), mul(src_rgb, p1)), mul(dst_rgb, p2)), 423 WRITEMASK_XYZ)); 424 f.emit(assign(result, add(add(p0, p1), p2), WRITEMASK_W)); 425 426 return result; 427} 428 429/** 430 * Dereference var, or var[0] if it's an array. 431 */ 432static ir_dereference * 433deref_output(ir_variable *var) 434{ 435 void *mem_ctx = ralloc_parent(var); 436 437 ir_dereference *val = new(mem_ctx) ir_dereference_variable(var); 438 if (val->type->is_array()) { 439 ir_constant *index = new(mem_ctx) ir_constant(0); 440 val = new(mem_ctx) ir_dereference_array(val, index); 441 } 442 443 return val; 444} 445 446static ir_function_signature * 447get_main(gl_linked_shader *sh) 448{ 449 ir_function_signature *sig = NULL; 450 /* We can't use _mesa_get_main_function_signature() because we don't 451 * have a symbol table at this point. Just go find main() by hand. 452 */ 453 foreach_in_list(ir_instruction, ir, sh->ir) { 454 ir_function *f = ir->as_function(); 455 if (f && strcmp(f->name, "main") == 0) { 456 exec_list void_parameters; 457 sig = f->matching_signature(NULL, &void_parameters, false); 458 break; 459 } 460 } 461 assert(sig != NULL); /* main() must exist */ 462 return sig; 463} 464 465bool 466lower_blend_equation_advanced(struct gl_linked_shader *sh, bool coherent) 467{ 468 if (sh->Program->sh.fs.BlendSupport == 0) 469 return false; 470 471 /* Lower early returns in main() so there's a single exit point 472 * where we can insert our lowering code. 473 */ 474 do_lower_jumps(sh->ir, false, false, true, false, false); 475 476 void *mem_ctx = ralloc_parent(sh->ir); 477 478 ir_variable *fb = new(mem_ctx) ir_variable(glsl_type::vec4_type, 479 "__blend_fb_fetch", 480 ir_var_shader_out); 481 fb->data.location = FRAG_RESULT_DATA0; 482 fb->data.read_only = 1; 483 fb->data.fb_fetch_output = 1; 484 fb->data.memory_coherent = coherent; 485 fb->data.how_declared = ir_var_hidden; 486 487 ir_variable *mode = new(mem_ctx) ir_variable(glsl_type::uint_type, 488 "gl_AdvancedBlendModeMESA", 489 ir_var_uniform); 490 mode->data.how_declared = ir_var_hidden; 491 mode->allocate_state_slots(1); 492 ir_state_slot *slot0 = &mode->get_state_slots()[0]; 493 slot0->swizzle = SWIZZLE_XXXX; 494 slot0->tokens[0] = STATE_INTERNAL; 495 slot0->tokens[1] = STATE_ADVANCED_BLENDING_MODE; 496 for (int i = 2; i < STATE_LENGTH; i++) 497 slot0->tokens[i] = 0; 498 499 sh->ir->push_head(fb); 500 sh->ir->push_head(mode); 501 502 /* Gather any output variables referring to render target 0. 503 * 504 * ARB_enhanced_layouts irritatingly allows the shader to specify 505 * multiple output variables for the same render target, each of 506 * which writes a subset of the components, starting at location_frac. 507 * The variables can't overlap, thankfully. 508 */ 509 ir_variable *outputs[4] = { NULL, NULL, NULL, NULL }; 510 foreach_in_list(ir_instruction, ir, sh->ir) { 511 ir_variable *var = ir->as_variable(); 512 if (!var || var->data.mode != ir_var_shader_out) 513 continue; 514 515 if (var->data.location == FRAG_RESULT_DATA0 || 516 var->data.location == FRAG_RESULT_COLOR) { 517 const int components = var->type->without_array()->vector_elements; 518 519 for (int i = 0; i < components; i++) { 520 outputs[var->data.location_frac + i] = var; 521 } 522 } 523 } 524 525 /* Combine values written to outputs into a single RGBA blend source. 526 * We assign <0, 0, 0, 1> to any components with no corresponding output. 527 */ 528 ir_rvalue *blend_source; 529 if (outputs[0] && outputs[0]->type->without_array()->vector_elements == 4) { 530 blend_source = deref_output(outputs[0]); 531 } else { 532 ir_rvalue *blend_comps[4]; 533 for (int i = 0; i < 4; i++) { 534 ir_variable *var = outputs[i]; 535 if (var) { 536 blend_comps[i] = swizzle(deref_output(outputs[i]), 537 i - outputs[i]->data.location_frac, 1); 538 } else { 539 blend_comps[i] = new(mem_ctx) ir_constant(i < 3 ? 0.0f : 1.0f); 540 } 541 } 542 543 blend_source = 544 new(mem_ctx) ir_expression(ir_quadop_vector, glsl_type::vec4_type, 545 blend_comps[0], blend_comps[1], 546 blend_comps[2], blend_comps[3]); 547 } 548 549 ir_function_signature *main = get_main(sh); 550 ir_factory f(&main->body, mem_ctx); 551 552 ir_variable *result_dest = 553 calc_blend_result(f, mode, fb, blend_source, 554 sh->Program->sh.fs.BlendSupport); 555 556 /* Copy the result back to the original values. It would be simpler 557 * to demote the program's output variables, and create a new vec4 558 * output for our result, but this pass runs before we create the 559 * ARB_program_interface_query resource list. So we have to leave 560 * the original outputs in place and use them. 561 */ 562 for (int i = 0; i < 4; i++) { 563 if (!outputs[i]) 564 continue; 565 566 f.emit(assign(deref_output(outputs[i]), swizzle(result_dest, i, 1), 567 1 << i)); 568 } 569 570 validate_ir_tree(sh->ir); 571 return true; 572} 573