1428d7b3dSmrg#include "brw.h" 2428d7b3dSmrg 3428d7b3dSmrg#define X16 8 4428d7b3dSmrg#define Y16 10 5428d7b3dSmrg 6428d7b3dSmrgstatic void brw_wm_xy(struct brw_compile *p, int dw) 7428d7b3dSmrg{ 8428d7b3dSmrg struct brw_reg r1 = brw_vec1_grf(1, 0); 9428d7b3dSmrg struct brw_reg r1_uw = __retype_uw(r1); 10428d7b3dSmrg struct brw_reg x_uw, y_uw; 11428d7b3dSmrg 12428d7b3dSmrg brw_set_compression_control(p, BRW_COMPRESSION_NONE); 13428d7b3dSmrg 14428d7b3dSmrg if (dw == 16) { 15428d7b3dSmrg x_uw = brw_uw16_grf(30, 0); 16428d7b3dSmrg y_uw = brw_uw16_grf(28, 0); 17428d7b3dSmrg } else { 18428d7b3dSmrg x_uw = brw_uw8_grf(30, 0); 19428d7b3dSmrg y_uw = brw_uw8_grf(28, 0); 20428d7b3dSmrg } 21428d7b3dSmrg 22428d7b3dSmrg brw_ADD(p, 23428d7b3dSmrg x_uw, 24428d7b3dSmrg __stride(__suboffset(r1_uw, 4), 2, 4, 0), 25428d7b3dSmrg brw_imm_v(0x10101010)); 26428d7b3dSmrg brw_ADD(p, 27428d7b3dSmrg y_uw, 28428d7b3dSmrg __stride(__suboffset(r1_uw, 5), 2, 4, 0), 29428d7b3dSmrg brw_imm_v(0x11001100)); 30428d7b3dSmrg 31428d7b3dSmrg brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); 32428d7b3dSmrg 33428d7b3dSmrg brw_ADD(p, brw_vec8_grf(X16, 0), vec8(x_uw), brw_negate(r1)); 34428d7b3dSmrg brw_ADD(p, brw_vec8_grf(Y16, 0), vec8(y_uw), brw_negate(__suboffset(r1, 1))); 35428d7b3dSmrg} 36428d7b3dSmrg 37428d7b3dSmrgstatic void brw_wm_affine_st(struct brw_compile *p, int dw, 38428d7b3dSmrg int channel, int msg) 39428d7b3dSmrg{ 40428d7b3dSmrg int uv; 41428d7b3dSmrg 42428d7b3dSmrg if (dw == 16) { 43428d7b3dSmrg brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); 44428d7b3dSmrg uv = p->gen >= 060 ? 6 : 3; 45428d7b3dSmrg } else { 46428d7b3dSmrg brw_set_compression_control(p, BRW_COMPRESSION_NONE); 47428d7b3dSmrg uv = p->gen >= 060 ? 4 : 3; 48428d7b3dSmrg } 49428d7b3dSmrg uv += 2*channel; 50428d7b3dSmrg 51428d7b3dSmrg msg++; 52428d7b3dSmrg if (p->gen >= 060) { 53428d7b3dSmrg brw_PLN(p, 54428d7b3dSmrg brw_message_reg(msg), 55428d7b3dSmrg brw_vec1_grf(uv, 0), 56428d7b3dSmrg brw_vec8_grf(2, 0)); 57428d7b3dSmrg msg += dw/8; 58428d7b3dSmrg 59428d7b3dSmrg brw_PLN(p, 60428d7b3dSmrg brw_message_reg(msg), 61428d7b3dSmrg brw_vec1_grf(uv, 4), 62428d7b3dSmrg brw_vec8_grf(2, 0)); 63428d7b3dSmrg } else { 64428d7b3dSmrg struct brw_reg r = brw_vec1_grf(uv, 0); 65428d7b3dSmrg 66428d7b3dSmrg brw_LINE(p, brw_null_reg(), __suboffset(r, 0), brw_vec8_grf(X16, 0)); 67428d7b3dSmrg brw_MAC(p, brw_message_reg(msg), __suboffset(r, 1), brw_vec8_grf(Y16, 0)); 68428d7b3dSmrg msg += dw/8; 69428d7b3dSmrg 70428d7b3dSmrg brw_LINE(p, brw_null_reg(), __suboffset(r, 4), brw_vec8_grf(X16, 0)); 71428d7b3dSmrg brw_MAC(p, brw_message_reg(msg), __suboffset(r, 5), brw_vec8_grf(Y16, 0)); 72428d7b3dSmrg } 73428d7b3dSmrg} 74428d7b3dSmrg 75428d7b3dSmrgstatic inline unsigned simd(int dw) 76428d7b3dSmrg{ 77428d7b3dSmrg return dw == 16 ? BRW_SAMPLER_SIMD_MODE_SIMD16 : BRW_SAMPLER_SIMD_MODE_SIMD8; 78428d7b3dSmrg} 79428d7b3dSmrg 80428d7b3dSmrgstatic inline struct brw_reg sample_result(int dw, int result) 81428d7b3dSmrg{ 82428d7b3dSmrg return brw_reg(BRW_GENERAL_REGISTER_FILE, result, 0, 83428d7b3dSmrg BRW_REGISTER_TYPE_UW, 84428d7b3dSmrg dw == 16 ? BRW_VERTICAL_STRIDE_16 : BRW_VERTICAL_STRIDE_8, 85428d7b3dSmrg dw == 16 ? BRW_WIDTH_16 : BRW_WIDTH_8, 86428d7b3dSmrg BRW_HORIZONTAL_STRIDE_1, 87428d7b3dSmrg BRW_SWIZZLE_XYZW, 88428d7b3dSmrg WRITEMASK_XYZW); 89428d7b3dSmrg} 90428d7b3dSmrg 91428d7b3dSmrgstatic int brw_wm_sample(struct brw_compile *p, int dw, 92428d7b3dSmrg int channel, int msg, int result) 93428d7b3dSmrg{ 94428d7b3dSmrg struct brw_reg src0; 95428d7b3dSmrg bool header; 96428d7b3dSmrg int len; 97428d7b3dSmrg 98428d7b3dSmrg len = dw == 16 ? 4 : 2; 99428d7b3dSmrg if (p->gen >= 060) { 100428d7b3dSmrg header = false; 101428d7b3dSmrg src0 = brw_message_reg(++msg); 102428d7b3dSmrg } else { 103428d7b3dSmrg header = true; 104428d7b3dSmrg src0 = brw_vec8_grf(0, 0); 105428d7b3dSmrg } 106428d7b3dSmrg 107428d7b3dSmrg brw_SAMPLE(p, sample_result(dw, result), msg, src0, 108428d7b3dSmrg channel+1, channel, WRITEMASK_XYZW, 0, 109428d7b3dSmrg 2*len, len+header, header, simd(dw)); 110428d7b3dSmrg return result; 111428d7b3dSmrg} 112428d7b3dSmrg 113428d7b3dSmrgstatic int brw_wm_sample__alpha(struct brw_compile *p, int dw, 114428d7b3dSmrg int channel, int msg, int result) 115428d7b3dSmrg{ 116428d7b3dSmrg struct brw_reg src0; 117428d7b3dSmrg int mlen, rlen; 118428d7b3dSmrg 119428d7b3dSmrg if (dw == 8) { 120428d7b3dSmrg /* SIMD8 sample return is not masked */ 121428d7b3dSmrg mlen = 3; 122428d7b3dSmrg rlen = 4; 123428d7b3dSmrg } else { 124428d7b3dSmrg mlen = 5; 125428d7b3dSmrg rlen = 2; 126428d7b3dSmrg } 127428d7b3dSmrg 128428d7b3dSmrg if (p->gen >= 060) 129428d7b3dSmrg src0 = brw_message_reg(msg); 130428d7b3dSmrg else 131428d7b3dSmrg src0 = brw_vec8_grf(0, 0); 132428d7b3dSmrg 133428d7b3dSmrg brw_SAMPLE(p, sample_result(dw, result), msg, src0, 134428d7b3dSmrg channel+1, channel, WRITEMASK_W, 0, 135428d7b3dSmrg rlen, mlen, true, simd(dw)); 136428d7b3dSmrg 137428d7b3dSmrg if (dw == 8) 138428d7b3dSmrg result += 3; 139428d7b3dSmrg 140428d7b3dSmrg return result; 141428d7b3dSmrg} 142428d7b3dSmrg 143428d7b3dSmrgstatic int brw_wm_affine(struct brw_compile *p, int dw, 144428d7b3dSmrg int channel, int msg, int result) 145428d7b3dSmrg{ 146428d7b3dSmrg brw_wm_affine_st(p, dw, channel, msg); 147428d7b3dSmrg return brw_wm_sample(p, dw, channel, msg, result); 148428d7b3dSmrg} 149428d7b3dSmrg 150428d7b3dSmrgstatic int brw_wm_affine__alpha(struct brw_compile *p, int dw, 151428d7b3dSmrg int channel, int msg, int result) 152428d7b3dSmrg{ 153428d7b3dSmrg brw_wm_affine_st(p, dw, channel, msg); 154428d7b3dSmrg return brw_wm_sample__alpha(p, dw, channel, msg, result); 155428d7b3dSmrg} 156428d7b3dSmrg 157428d7b3dSmrgstatic inline struct brw_reg null_result(int dw) 158428d7b3dSmrg{ 159428d7b3dSmrg return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_NULL, 0, 160428d7b3dSmrg BRW_REGISTER_TYPE_UW, 161428d7b3dSmrg dw == 16 ? BRW_VERTICAL_STRIDE_16 : BRW_VERTICAL_STRIDE_8, 162428d7b3dSmrg dw == 16 ? BRW_WIDTH_16 : BRW_WIDTH_8, 163428d7b3dSmrg BRW_HORIZONTAL_STRIDE_1, 164428d7b3dSmrg BRW_SWIZZLE_XYZW, 165428d7b3dSmrg WRITEMASK_XYZW); 166428d7b3dSmrg} 167428d7b3dSmrg 168428d7b3dSmrgstatic void brw_fb_write(struct brw_compile *p, int dw) 169428d7b3dSmrg{ 170428d7b3dSmrg struct brw_instruction *insn; 171428d7b3dSmrg unsigned msg_control, msg_type, msg_len; 172428d7b3dSmrg struct brw_reg src0; 173428d7b3dSmrg bool header; 174428d7b3dSmrg 175428d7b3dSmrg if (dw == 16) { 176428d7b3dSmrg brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); 177428d7b3dSmrg msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; 178428d7b3dSmrg msg_len = 8; 179428d7b3dSmrg } else { 180428d7b3dSmrg brw_set_compression_control(p, BRW_COMPRESSION_NONE); 181428d7b3dSmrg msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; 182428d7b3dSmrg msg_len = 4; 183428d7b3dSmrg } 184428d7b3dSmrg 185428d7b3dSmrg if (p->gen < 060) { 186428d7b3dSmrg brw_push_insn_state(p); 187428d7b3dSmrg brw_set_compression_control(p, BRW_COMPRESSION_NONE); 188428d7b3dSmrg brw_set_mask_control(p, BRW_MASK_DISABLE); 189428d7b3dSmrg brw_MOV(p, brw_message_reg(1), brw_vec8_grf(1, 0)); 190428d7b3dSmrg brw_pop_insn_state(p); 191428d7b3dSmrg 192428d7b3dSmrg msg_len += 2; 193428d7b3dSmrg } 194428d7b3dSmrg 195428d7b3dSmrg /* The execution mask is ignored for render target writes. */ 196428d7b3dSmrg insn = brw_next_insn(p, BRW_OPCODE_SEND); 197428d7b3dSmrg insn->header.predicate_control = 0; 198428d7b3dSmrg insn->header.compression_control = BRW_COMPRESSION_NONE; 199428d7b3dSmrg 200428d7b3dSmrg if (p->gen >= 060) { 201428d7b3dSmrg msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; 202428d7b3dSmrg src0 = brw_message_reg(2); 203428d7b3dSmrg header = false; 204428d7b3dSmrg } else { 205428d7b3dSmrg insn->header.destreg__conditionalmod = 0; 206428d7b3dSmrg msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; 207428d7b3dSmrg src0 = __retype_uw(brw_vec8_grf(0, 0)); 208428d7b3dSmrg header = true; 209428d7b3dSmrg } 210428d7b3dSmrg 211428d7b3dSmrg brw_set_dest(p, insn, null_result(dw)); 212428d7b3dSmrg brw_set_src0(p, insn, src0); 213428d7b3dSmrg brw_set_dp_write_message(p, insn, 0, 214428d7b3dSmrg msg_control, msg_type, msg_len, 215428d7b3dSmrg header, true, 0, true, false); 216428d7b3dSmrg} 217428d7b3dSmrg 218428d7b3dSmrgstatic void brw_wm_write(struct brw_compile *p, int dw, int src) 219428d7b3dSmrg{ 220428d7b3dSmrg int n; 221428d7b3dSmrg 222428d7b3dSmrg if (dw == 8 && p->gen >= 060) { 223428d7b3dSmrg /* XXX pixel execution mask? */ 224428d7b3dSmrg brw_set_compression_control(p, BRW_COMPRESSION_NONE); 225428d7b3dSmrg 226428d7b3dSmrg brw_MOV(p, brw_message_reg(2), brw_vec8_grf(src+0, 0)); 227428d7b3dSmrg brw_MOV(p, brw_message_reg(3), brw_vec8_grf(src+1, 0)); 228428d7b3dSmrg brw_MOV(p, brw_message_reg(4), brw_vec8_grf(src+2, 0)); 229428d7b3dSmrg brw_MOV(p, brw_message_reg(5), brw_vec8_grf(src+3, 0)); 230428d7b3dSmrg goto done; 231428d7b3dSmrg } 232428d7b3dSmrg 233428d7b3dSmrg brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); 234428d7b3dSmrg 235428d7b3dSmrg for (n = 0; n < 4; n++) { 236428d7b3dSmrg if (p->gen >= 060) { 237428d7b3dSmrg brw_MOV(p, 238428d7b3dSmrg brw_message_reg(2 + 2*n), 239428d7b3dSmrg brw_vec8_grf(src + 2*n, 0)); 240428d7b3dSmrg } else if (p->gen >= 045 && dw == 16) { 241428d7b3dSmrg brw_MOV(p, 242428d7b3dSmrg brw_message_reg(2 + n + BRW_MRF_COMPR4), 243428d7b3dSmrg brw_vec8_grf(src + 2*n, 0)); 244428d7b3dSmrg } else { 245428d7b3dSmrg brw_set_compression_control(p, BRW_COMPRESSION_NONE); 246428d7b3dSmrg brw_MOV(p, 247428d7b3dSmrg brw_message_reg(2 + n), 248428d7b3dSmrg brw_vec8_grf(src + 2*n, 0)); 249428d7b3dSmrg 250428d7b3dSmrg if (dw == 16) { 251428d7b3dSmrg brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); 252428d7b3dSmrg brw_MOV(p, 253428d7b3dSmrg brw_message_reg(2 + n + 4), 254428d7b3dSmrg brw_vec8_grf(src + 2*n+1, 0)); 255428d7b3dSmrg } 256428d7b3dSmrg } 257428d7b3dSmrg } 258428d7b3dSmrg 259428d7b3dSmrgdone: 260428d7b3dSmrg brw_fb_write(p, dw); 261428d7b3dSmrg} 262428d7b3dSmrg 263428d7b3dSmrgstatic void brw_wm_write__mask(struct brw_compile *p, int dw, 264428d7b3dSmrg int src, int mask) 265428d7b3dSmrg{ 266428d7b3dSmrg int n; 267428d7b3dSmrg 268428d7b3dSmrg if (dw == 8 && p->gen >= 060) { 269428d7b3dSmrg brw_set_compression_control(p, BRW_COMPRESSION_NONE); 270428d7b3dSmrg 271428d7b3dSmrg brw_MUL(p, 272428d7b3dSmrg brw_message_reg(2), 273428d7b3dSmrg brw_vec8_grf(src+0, 0), 274428d7b3dSmrg brw_vec8_grf(mask, 0)); 275428d7b3dSmrg brw_MUL(p, 276428d7b3dSmrg brw_message_reg(3), 277428d7b3dSmrg brw_vec8_grf(src+1, 0), 278428d7b3dSmrg brw_vec8_grf(mask, 0)); 279428d7b3dSmrg brw_MUL(p, 280428d7b3dSmrg brw_message_reg(4), 281428d7b3dSmrg brw_vec8_grf(src+2, 0), 282428d7b3dSmrg brw_vec8_grf(mask, 0)); 283428d7b3dSmrg brw_MUL(p, 284428d7b3dSmrg brw_message_reg(5), 285428d7b3dSmrg brw_vec8_grf(src+3, 0), 286428d7b3dSmrg brw_vec8_grf(mask, 0)); 287428d7b3dSmrg 288428d7b3dSmrg goto done; 289428d7b3dSmrg } 290428d7b3dSmrg 291428d7b3dSmrg brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); 292428d7b3dSmrg 293428d7b3dSmrg for (n = 0; n < 4; n++) { 294428d7b3dSmrg if (p->gen >= 060) { 295428d7b3dSmrg brw_MUL(p, 296428d7b3dSmrg brw_message_reg(2 + 2*n), 297428d7b3dSmrg brw_vec8_grf(src + 2*n, 0), 298428d7b3dSmrg brw_vec8_grf(mask, 0)); 299428d7b3dSmrg } else if (p->gen >= 045 && dw == 16) { 300428d7b3dSmrg brw_MUL(p, 301428d7b3dSmrg brw_message_reg(2 + n + BRW_MRF_COMPR4), 302428d7b3dSmrg brw_vec8_grf(src + 2*n, 0), 303428d7b3dSmrg brw_vec8_grf(mask, 0)); 304428d7b3dSmrg } else { 305428d7b3dSmrg brw_set_compression_control(p, BRW_COMPRESSION_NONE); 306428d7b3dSmrg brw_MUL(p, 307428d7b3dSmrg brw_message_reg(2 + n), 308428d7b3dSmrg brw_vec8_grf(src + 2*n, 0), 309428d7b3dSmrg brw_vec8_grf(mask, 0)); 310428d7b3dSmrg 311428d7b3dSmrg if (dw == 16) { 312428d7b3dSmrg brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); 313428d7b3dSmrg brw_MUL(p, 314428d7b3dSmrg brw_message_reg(2 + n + 4), 315428d7b3dSmrg brw_vec8_grf(src + 2*n+1, 0), 316428d7b3dSmrg brw_vec8_grf(mask+1, 0)); 317428d7b3dSmrg } 318428d7b3dSmrg } 319428d7b3dSmrg } 320428d7b3dSmrg 321428d7b3dSmrgdone: 322428d7b3dSmrg brw_fb_write(p, dw); 323428d7b3dSmrg} 324428d7b3dSmrg 325428d7b3dSmrgstatic void brw_wm_write__opacity(struct brw_compile *p, int dw, 326428d7b3dSmrg int src, int mask) 327428d7b3dSmrg{ 328428d7b3dSmrg int n; 329428d7b3dSmrg 330428d7b3dSmrg if (dw == 8 && p->gen >= 060) { 331428d7b3dSmrg brw_set_compression_control(p, BRW_COMPRESSION_NONE); 332428d7b3dSmrg 333428d7b3dSmrg brw_MUL(p, 334428d7b3dSmrg brw_message_reg(2), 335428d7b3dSmrg brw_vec8_grf(src+0, 0), 336428d7b3dSmrg brw_vec1_grf(mask, 3)); 337428d7b3dSmrg brw_MUL(p, 338428d7b3dSmrg brw_message_reg(3), 339428d7b3dSmrg brw_vec8_grf(src+1, 0), 340428d7b3dSmrg brw_vec1_grf(mask, 3)); 341428d7b3dSmrg brw_MUL(p, 342428d7b3dSmrg brw_message_reg(4), 343428d7b3dSmrg brw_vec8_grf(src+2, 0), 344428d7b3dSmrg brw_vec1_grf(mask, 3)); 345428d7b3dSmrg brw_MUL(p, 346428d7b3dSmrg brw_message_reg(5), 347428d7b3dSmrg brw_vec8_grf(src+3, 0), 348428d7b3dSmrg brw_vec1_grf(mask, 3)); 349428d7b3dSmrg 350428d7b3dSmrg goto done; 351428d7b3dSmrg } 352428d7b3dSmrg 353428d7b3dSmrg brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); 354428d7b3dSmrg 355428d7b3dSmrg for (n = 0; n < 4; n++) { 356428d7b3dSmrg if (p->gen >= 060) { 357428d7b3dSmrg brw_MUL(p, 358428d7b3dSmrg brw_message_reg(2 + 2*n), 359428d7b3dSmrg brw_vec8_grf(src + 2*n, 0), 360428d7b3dSmrg brw_vec1_grf(mask, 3)); 361428d7b3dSmrg } else if (p->gen >= 045 && dw == 16) { 362428d7b3dSmrg brw_MUL(p, 363428d7b3dSmrg brw_message_reg(2 + n + BRW_MRF_COMPR4), 364428d7b3dSmrg brw_vec8_grf(src + 2*n, 0), 365428d7b3dSmrg brw_vec1_grf(mask, 3)); 366428d7b3dSmrg } else { 367428d7b3dSmrg brw_set_compression_control(p, BRW_COMPRESSION_NONE); 368428d7b3dSmrg brw_MUL(p, 369428d7b3dSmrg brw_message_reg(2 + n), 370428d7b3dSmrg brw_vec8_grf(src + 2*n, 0), 371428d7b3dSmrg brw_vec1_grf(mask, 3)); 372428d7b3dSmrg 373428d7b3dSmrg if (dw == 16) { 374428d7b3dSmrg brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); 375428d7b3dSmrg brw_MUL(p, 376428d7b3dSmrg brw_message_reg(2 + n + 4), 377428d7b3dSmrg brw_vec8_grf(src + 2*n+1, 0), 378428d7b3dSmrg brw_vec1_grf(mask, 3)); 379428d7b3dSmrg } 380428d7b3dSmrg } 381428d7b3dSmrg } 382428d7b3dSmrg 383428d7b3dSmrgdone: 384428d7b3dSmrg brw_fb_write(p, dw); 385428d7b3dSmrg} 386428d7b3dSmrg 387428d7b3dSmrgstatic void brw_wm_write__mask_ca(struct brw_compile *p, int dw, 388428d7b3dSmrg int src, int mask) 389428d7b3dSmrg{ 390428d7b3dSmrg int n; 391428d7b3dSmrg 392428d7b3dSmrg if (dw == 8 && p->gen >= 060) { 393428d7b3dSmrg brw_set_compression_control(p, BRW_COMPRESSION_NONE); 394428d7b3dSmrg 395428d7b3dSmrg brw_MUL(p, 396428d7b3dSmrg brw_message_reg(2), 397428d7b3dSmrg brw_vec8_grf(src + 0, 0), 398428d7b3dSmrg brw_vec8_grf(mask + 0, 0)); 399428d7b3dSmrg brw_MUL(p, 400428d7b3dSmrg brw_message_reg(3), 401428d7b3dSmrg brw_vec8_grf(src + 1, 0), 402428d7b3dSmrg brw_vec8_grf(mask + 1, 0)); 403428d7b3dSmrg brw_MUL(p, 404428d7b3dSmrg brw_message_reg(4), 405428d7b3dSmrg brw_vec8_grf(src + 2, 0), 406428d7b3dSmrg brw_vec8_grf(mask + 2, 0)); 407428d7b3dSmrg brw_MUL(p, 408428d7b3dSmrg brw_message_reg(5), 409428d7b3dSmrg brw_vec8_grf(src + 3, 0), 410428d7b3dSmrg brw_vec8_grf(mask + 3, 0)); 411428d7b3dSmrg 412428d7b3dSmrg goto done; 413428d7b3dSmrg } 414428d7b3dSmrg 415428d7b3dSmrg brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); 416428d7b3dSmrg 417428d7b3dSmrg for (n = 0; n < 4; n++) { 418428d7b3dSmrg if (p->gen >= 060) { 419428d7b3dSmrg brw_MUL(p, 420428d7b3dSmrg brw_message_reg(2 + 2*n), 421428d7b3dSmrg brw_vec8_grf(src + 2*n, 0), 422428d7b3dSmrg brw_vec8_grf(mask + 2*n, 0)); 423428d7b3dSmrg } else if (p->gen >= 045 && dw == 16) { 424428d7b3dSmrg brw_MUL(p, 425428d7b3dSmrg brw_message_reg(2 + n + BRW_MRF_COMPR4), 426428d7b3dSmrg brw_vec8_grf(src + 2*n, 0), 427428d7b3dSmrg brw_vec8_grf(mask + 2*n, 0)); 428428d7b3dSmrg } else { 429428d7b3dSmrg brw_set_compression_control(p, BRW_COMPRESSION_NONE); 430428d7b3dSmrg brw_MUL(p, 431428d7b3dSmrg brw_message_reg(2 + n), 432428d7b3dSmrg brw_vec8_grf(src + 2*n, 0), 433428d7b3dSmrg brw_vec8_grf(mask + 2*n, 0)); 434428d7b3dSmrg 435428d7b3dSmrg if (dw == 16) { 436428d7b3dSmrg brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); 437428d7b3dSmrg brw_MUL(p, 438428d7b3dSmrg brw_message_reg(2 + n + 4), 439428d7b3dSmrg brw_vec8_grf(src + 2*n + 1, 0), 440428d7b3dSmrg brw_vec8_grf(mask + 2*n + 1, 0)); 441428d7b3dSmrg } 442428d7b3dSmrg } 443428d7b3dSmrg } 444428d7b3dSmrg 445428d7b3dSmrgdone: 446428d7b3dSmrg brw_fb_write(p, dw); 447428d7b3dSmrg} 448428d7b3dSmrg 449428d7b3dSmrgbool 450428d7b3dSmrgbrw_wm_kernel__affine(struct brw_compile *p, int dispatch) 451428d7b3dSmrg{ 452428d7b3dSmrg if (p->gen < 060) 453428d7b3dSmrg brw_wm_xy(p, dispatch); 454428d7b3dSmrg brw_wm_write(p, dispatch, brw_wm_affine(p, dispatch, 0, 1, 12)); 455428d7b3dSmrg 456428d7b3dSmrg return true; 457428d7b3dSmrg} 458428d7b3dSmrg 459428d7b3dSmrgbool 460428d7b3dSmrgbrw_wm_kernel__affine_mask(struct brw_compile *p, int dispatch) 461428d7b3dSmrg{ 462428d7b3dSmrg int src, mask; 463428d7b3dSmrg 464428d7b3dSmrg if (p->gen < 060) 465428d7b3dSmrg brw_wm_xy(p, dispatch); 466428d7b3dSmrg 467428d7b3dSmrg src = brw_wm_affine(p, dispatch, 0, 1, 12); 468428d7b3dSmrg mask = brw_wm_affine__alpha(p, dispatch, 1, 6, 20); 469428d7b3dSmrg brw_wm_write__mask(p, dispatch, src, mask); 470428d7b3dSmrg 471428d7b3dSmrg return true; 472428d7b3dSmrg} 473428d7b3dSmrg 474428d7b3dSmrgbool 475428d7b3dSmrgbrw_wm_kernel__affine_mask_ca(struct brw_compile *p, int dispatch) 476428d7b3dSmrg{ 477428d7b3dSmrg int src, mask; 478428d7b3dSmrg 479428d7b3dSmrg if (p->gen < 060) 480428d7b3dSmrg brw_wm_xy(p, dispatch); 481428d7b3dSmrg 482428d7b3dSmrg src = brw_wm_affine(p, dispatch, 0, 1, 12); 483428d7b3dSmrg mask = brw_wm_affine(p, dispatch, 1, 6, 20); 484428d7b3dSmrg brw_wm_write__mask_ca(p, dispatch, src, mask); 485428d7b3dSmrg 486428d7b3dSmrg return true; 487428d7b3dSmrg} 488428d7b3dSmrg 489428d7b3dSmrgbool 490428d7b3dSmrgbrw_wm_kernel__affine_mask_sa(struct brw_compile *p, int dispatch) 491428d7b3dSmrg{ 492428d7b3dSmrg int src, mask; 493428d7b3dSmrg 494428d7b3dSmrg if (p->gen < 060) 495428d7b3dSmrg brw_wm_xy(p, dispatch); 496428d7b3dSmrg 497428d7b3dSmrg src = brw_wm_affine__alpha(p, dispatch, 0, 1, 12); 498428d7b3dSmrg mask = brw_wm_affine(p, dispatch, 1, 6, 16); 499428d7b3dSmrg brw_wm_write__mask(p, dispatch, mask, src); 500428d7b3dSmrg 501428d7b3dSmrg return true; 502428d7b3dSmrg} 503428d7b3dSmrg 504428d7b3dSmrg/* Projective variants */ 505428d7b3dSmrg 506428d7b3dSmrgstatic void brw_wm_projective_st(struct brw_compile *p, int dw, 507428d7b3dSmrg int channel, int msg) 508428d7b3dSmrg{ 509428d7b3dSmrg int uv; 510428d7b3dSmrg 511428d7b3dSmrg if (dw == 16) { 512428d7b3dSmrg brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); 513428d7b3dSmrg uv = p->gen >= 060 ? 6 : 3; 514428d7b3dSmrg } else { 515428d7b3dSmrg brw_set_compression_control(p, BRW_COMPRESSION_NONE); 516428d7b3dSmrg uv = p->gen >= 060 ? 4 : 3; 517428d7b3dSmrg } 518428d7b3dSmrg uv += 2*channel; 519428d7b3dSmrg 520428d7b3dSmrg msg++; 521428d7b3dSmrg if (p->gen >= 060) { 522428d7b3dSmrg /* First compute 1/z */ 523428d7b3dSmrg brw_PLN(p, 524428d7b3dSmrg brw_vec8_grf(30, 0), 525428d7b3dSmrg brw_vec1_grf(uv+1, 0), 526428d7b3dSmrg brw_vec8_grf(2, 0)); 527428d7b3dSmrg 528428d7b3dSmrg if (dw == 16) { 529428d7b3dSmrg brw_set_compression_control(p, BRW_COMPRESSION_NONE); 530428d7b3dSmrg brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0)); 531428d7b3dSmrg brw_math_invert(p, brw_vec8_grf(31, 0), brw_vec8_grf(31, 0)); 532428d7b3dSmrg brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); 533428d7b3dSmrg } else 534428d7b3dSmrg brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0)); 535428d7b3dSmrg 536428d7b3dSmrg brw_PLN(p, 537428d7b3dSmrg brw_vec8_grf(26, 0), 538428d7b3dSmrg brw_vec1_grf(uv, 0), 539428d7b3dSmrg brw_vec8_grf(2, 0)); 540428d7b3dSmrg brw_PLN(p, 541428d7b3dSmrg brw_vec8_grf(28, 0), 542428d7b3dSmrg brw_vec1_grf(uv, 4), 543428d7b3dSmrg brw_vec8_grf(2, 0)); 544428d7b3dSmrg 545428d7b3dSmrg brw_MUL(p, 546428d7b3dSmrg brw_message_reg(msg), 547428d7b3dSmrg brw_vec8_grf(26, 0), 548428d7b3dSmrg brw_vec8_grf(30, 0)); 549428d7b3dSmrg brw_MUL(p, 550428d7b3dSmrg brw_message_reg(msg + dw/8), 551428d7b3dSmrg brw_vec8_grf(28, 0), 552428d7b3dSmrg brw_vec8_grf(30, 0)); 553428d7b3dSmrg } else { 554428d7b3dSmrg struct brw_reg r = brw_vec1_grf(uv, 0); 555428d7b3dSmrg 556428d7b3dSmrg /* First compute 1/z */ 557428d7b3dSmrg brw_LINE(p, brw_null_reg(), brw_vec1_grf(uv+1, 0), brw_vec8_grf(X16, 0)); 558428d7b3dSmrg brw_MAC(p, brw_vec8_grf(30, 0), brw_vec1_grf(uv+1, 1), brw_vec8_grf(Y16, 0)); 559428d7b3dSmrg 560428d7b3dSmrg if (dw == 16) { 561428d7b3dSmrg brw_set_compression_control(p, BRW_COMPRESSION_NONE); 562428d7b3dSmrg brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0)); 563428d7b3dSmrg brw_math_invert(p, brw_vec8_grf(31, 0), brw_vec8_grf(31, 0)); 564428d7b3dSmrg brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); 565428d7b3dSmrg } else 566428d7b3dSmrg brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0)); 567428d7b3dSmrg 568428d7b3dSmrg /* Now compute the output s,t values */ 569428d7b3dSmrg brw_LINE(p, brw_null_reg(), __suboffset(r, 0), brw_vec8_grf(X16, 0)); 570428d7b3dSmrg brw_MAC(p, brw_vec8_grf(28, 0), __suboffset(r, 1), brw_vec8_grf(Y16, 0)); 571428d7b3dSmrg brw_MUL(p, brw_message_reg(msg), brw_vec8_grf(28, 0), brw_vec8_grf(30, 0)); 572428d7b3dSmrg msg += dw/8; 573428d7b3dSmrg 574428d7b3dSmrg brw_LINE(p, brw_null_reg(), __suboffset(r, 4), brw_vec8_grf(X16, 0)); 575428d7b3dSmrg brw_MAC(p, brw_vec8_grf(28, 0), __suboffset(r, 5), brw_vec8_grf(Y16, 0)); 576428d7b3dSmrg brw_MUL(p, brw_message_reg(msg), brw_vec8_grf(28, 0), brw_vec8_grf(30, 0)); 577428d7b3dSmrg } 578428d7b3dSmrg} 579428d7b3dSmrg 580428d7b3dSmrgstatic int brw_wm_projective(struct brw_compile *p, int dw, 581428d7b3dSmrg int channel, int msg, int result) 582428d7b3dSmrg{ 583428d7b3dSmrg brw_wm_projective_st(p, dw, channel, msg); 584428d7b3dSmrg return brw_wm_sample(p, dw, channel, msg, result); 585428d7b3dSmrg} 586428d7b3dSmrg 587428d7b3dSmrgstatic int brw_wm_projective__alpha(struct brw_compile *p, int dw, 588428d7b3dSmrg int channel, int msg, int result) 589428d7b3dSmrg{ 590428d7b3dSmrg brw_wm_projective_st(p, dw, channel, msg); 591428d7b3dSmrg return brw_wm_sample__alpha(p, dw, channel, msg, result); 592428d7b3dSmrg} 593428d7b3dSmrg 594428d7b3dSmrgbool 595428d7b3dSmrgbrw_wm_kernel__projective(struct brw_compile *p, int dispatch) 596428d7b3dSmrg{ 597428d7b3dSmrg if (p->gen < 060) 598428d7b3dSmrg brw_wm_xy(p, dispatch); 599428d7b3dSmrg brw_wm_write(p, dispatch, brw_wm_projective(p, dispatch, 0, 1, 12)); 600428d7b3dSmrg 601428d7b3dSmrg return true; 602428d7b3dSmrg} 603428d7b3dSmrg 604428d7b3dSmrgbool 605428d7b3dSmrgbrw_wm_kernel__projective_mask(struct brw_compile *p, int dispatch) 606428d7b3dSmrg{ 607428d7b3dSmrg int src, mask; 608428d7b3dSmrg 609428d7b3dSmrg if (p->gen < 060) 610428d7b3dSmrg brw_wm_xy(p, dispatch); 611428d7b3dSmrg 612428d7b3dSmrg src = brw_wm_projective(p, dispatch, 0, 1, 12); 613428d7b3dSmrg mask = brw_wm_projective__alpha(p, dispatch, 1, 6, 20); 614428d7b3dSmrg brw_wm_write__mask(p, dispatch, src, mask); 615428d7b3dSmrg 616428d7b3dSmrg return true; 617428d7b3dSmrg} 618428d7b3dSmrg 619428d7b3dSmrgbool 620428d7b3dSmrgbrw_wm_kernel__projective_mask_ca(struct brw_compile *p, int dispatch) 621428d7b3dSmrg{ 622428d7b3dSmrg int src, mask; 623428d7b3dSmrg 624428d7b3dSmrg if (p->gen < 060) 625428d7b3dSmrg brw_wm_xy(p, dispatch); 626428d7b3dSmrg 627428d7b3dSmrg src = brw_wm_projective(p, dispatch, 0, 1, 12); 628428d7b3dSmrg mask = brw_wm_projective(p, dispatch, 1, 6, 20); 629428d7b3dSmrg brw_wm_write__mask_ca(p, dispatch, src, mask); 630428d7b3dSmrg 631428d7b3dSmrg return true; 632428d7b3dSmrg} 633428d7b3dSmrg 634428d7b3dSmrgbool 635428d7b3dSmrgbrw_wm_kernel__projective_mask_sa(struct brw_compile *p, int dispatch) 636428d7b3dSmrg{ 637428d7b3dSmrg int src, mask; 638428d7b3dSmrg 639428d7b3dSmrg if (p->gen < 060) 640428d7b3dSmrg brw_wm_xy(p, dispatch); 641428d7b3dSmrg 642428d7b3dSmrg src = brw_wm_projective__alpha(p, dispatch, 0, 1, 12); 643428d7b3dSmrg mask = brw_wm_projective(p, dispatch, 1, 6, 16); 644428d7b3dSmrg brw_wm_write__mask(p, dispatch, mask, src); 645428d7b3dSmrg 646428d7b3dSmrg return true; 647428d7b3dSmrg} 648428d7b3dSmrg 649428d7b3dSmrgbool 650428d7b3dSmrgbrw_wm_kernel__affine_opacity(struct brw_compile *p, int dispatch) 651428d7b3dSmrg{ 652428d7b3dSmrg int src, mask; 653428d7b3dSmrg 654428d7b3dSmrg if (p->gen < 060) { 655428d7b3dSmrg brw_wm_xy(p, dispatch); 656428d7b3dSmrg mask = 5; 657428d7b3dSmrg } else 658428d7b3dSmrg mask = dispatch == 16 ? 8 : 6; 659428d7b3dSmrg 660428d7b3dSmrg src = brw_wm_affine(p, dispatch, 0, 1, 12); 661428d7b3dSmrg brw_wm_write__opacity(p, dispatch, src, mask); 662428d7b3dSmrg 663428d7b3dSmrg return true; 664428d7b3dSmrg} 665428d7b3dSmrg 666428d7b3dSmrgbool 667428d7b3dSmrgbrw_wm_kernel__projective_opacity(struct brw_compile *p, int dispatch) 668428d7b3dSmrg{ 669428d7b3dSmrg int src, mask; 670428d7b3dSmrg 671428d7b3dSmrg if (p->gen < 060) { 672428d7b3dSmrg brw_wm_xy(p, dispatch); 673428d7b3dSmrg mask = 5; 674428d7b3dSmrg } else 675428d7b3dSmrg mask = dispatch == 16 ? 8 : 6; 676428d7b3dSmrg 677428d7b3dSmrg src = brw_wm_projective(p, dispatch, 0, 1, 12); 678428d7b3dSmrg brw_wm_write__opacity(p, dispatch, src, mask); 679428d7b3dSmrg 680428d7b3dSmrg return true; 681428d7b3dSmrg} 682