1428d7b3dSmrg/* 2428d7b3dSmrg * Copyright © 2013 Intel Corporation 3428d7b3dSmrg * 4428d7b3dSmrg * Permission is hereby granted, free of charge, to any person obtaining a 5428d7b3dSmrg * copy of this software and associated documentation files (the "Software"), 6428d7b3dSmrg * to deal in the Software without restriction, including without limitation 7428d7b3dSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8428d7b3dSmrg * and/or sell copies of the Software, and to permit persons to whom the 9428d7b3dSmrg * Software is furnished to do so, subject to the following conditions: 10428d7b3dSmrg * 11428d7b3dSmrg * The above copyright notice and this permission notice (including the next 12428d7b3dSmrg * paragraph) shall be included in all copies or substantial portions of the 13428d7b3dSmrg * Software. 14428d7b3dSmrg * 15428d7b3dSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16428d7b3dSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17428d7b3dSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18428d7b3dSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19428d7b3dSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20428d7b3dSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21428d7b3dSmrg * IN THE SOFTWARE. 22428d7b3dSmrg */ 23428d7b3dSmrg 24428d7b3dSmrg#ifdef HAVE_CONFIG_H 25428d7b3dSmrg#include "config.h" 26428d7b3dSmrg#endif 27428d7b3dSmrg 28428d7b3dSmrg#include <string.h> 29428d7b3dSmrg 30428d7b3dSmrg#include "compiler.h" 31428d7b3dSmrg#include "brw/brw.h" 32428d7b3dSmrg#include "gen8_eu.h" 33428d7b3dSmrg 34428d7b3dSmrg#ifndef ARRAY_SIZE 35428d7b3dSmrg#define ARRAY_SIZE(a) (sizeof(a)/sizeof(a[0])) 36428d7b3dSmrg#endif 37428d7b3dSmrg 38428d7b3dSmrg/* EU ISA */ 39428d7b3dSmrg 40428d7b3dSmrg#define MRF_HACK_START 111 41428d7b3dSmrg 42428d7b3dSmrgstruct gen8_instruction { 43428d7b3dSmrg uint32_t data[4]; 44428d7b3dSmrg}; 45428d7b3dSmrg 46428d7b3dSmrgstatic inline unsigned 47428d7b3dSmrg__gen8_mask(unsigned high, unsigned low) 48428d7b3dSmrg{ 49428d7b3dSmrg assert(high >= low); 50428d7b3dSmrg return (1 << (high - low + 1)) - 1; 51428d7b3dSmrg} 52428d7b3dSmrg 53428d7b3dSmrg/** 54428d7b3dSmrg * Fetch a set of contiguous bits from the instruction. 55428d7b3dSmrg * 56428d7b3dSmrg * Bits indexes range from 0..127; fields may not cross 32-bit boundaries. 57428d7b3dSmrg */ 58428d7b3dSmrgstatic inline unsigned 59428d7b3dSmrg__gen8_bits(struct gen8_instruction *insn, unsigned high, unsigned low) 60428d7b3dSmrg{ 61428d7b3dSmrg /* We assume the field doesn't cross 32-bit boundaries. */ 62428d7b3dSmrg const unsigned word = high / 32; 63428d7b3dSmrg 64428d7b3dSmrg assert(word == low / 32); 65428d7b3dSmrg 66428d7b3dSmrg high %= 32; 67428d7b3dSmrg low %= 32; 68428d7b3dSmrg 69428d7b3dSmrg return (insn->data[word] >> low) & __gen8_mask(high, low); 70428d7b3dSmrg} 71428d7b3dSmrg 72428d7b3dSmrg/** 73428d7b3dSmrg * Set bits in the instruction, with proper shifting and masking. 74428d7b3dSmrg * 75428d7b3dSmrg * Bits indexes range from 0..127; fields may not cross 32-bit boundaries. 76428d7b3dSmrg */ 77428d7b3dSmrgstatic inline void 78428d7b3dSmrg__gen8_set_bits(struct gen8_instruction *insn, 79428d7b3dSmrg unsigned high, 80428d7b3dSmrg unsigned low, 81428d7b3dSmrg unsigned value) 82428d7b3dSmrg{ 83428d7b3dSmrg const unsigned word = high / 32; 84428d7b3dSmrg unsigned mask; 85428d7b3dSmrg 86428d7b3dSmrg assert(word == low / 32); 87428d7b3dSmrg 88428d7b3dSmrg high %= 32; 89428d7b3dSmrg low %= 32; 90428d7b3dSmrg assert(value < __gen8_mask(high, low) + 1); 91428d7b3dSmrg 92428d7b3dSmrg mask = __gen8_mask(high, low) << low; 93428d7b3dSmrg insn->data[word] &= ~mask; 94428d7b3dSmrg insn->data[word] |= (value << low) & mask; 95428d7b3dSmrg 96428d7b3dSmrg assert(__gen8_bits(insn, 32*word+high, 32*word+low) == value); 97428d7b3dSmrg} 98428d7b3dSmrg 99428d7b3dSmrg#define F(name, high, low) \ 100428d7b3dSmrgstatic inline void __gen8_set_##name(struct gen8_instruction *insn, unsigned v) \ 101428d7b3dSmrg{ \ 102428d7b3dSmrg __gen8_set_bits(insn, high, low, v); \ 103428d7b3dSmrg} \ 104428d7b3dSmrgstatic inline unsigned __gen8_##name(struct gen8_instruction *insn) \ 105428d7b3dSmrg{ \ 106428d7b3dSmrg return __gen8_bits(insn, high, low); \ 107428d7b3dSmrg} 108428d7b3dSmrg 109428d7b3dSmrg/** 110428d7b3dSmrg* Direct addressing only: 111428d7b3dSmrg* @{ 112428d7b3dSmrg*/ 113428d7b3dSmrgF(src1_da_reg_nr, 108, 101); 114428d7b3dSmrgF(src0_da_reg_nr, 76, 69); 115428d7b3dSmrgF(dst_da1_hstride, 62, 61); 116428d7b3dSmrgF(dst_da_reg_nr, 60, 53); 117428d7b3dSmrgF(dst_da16_subreg_nr, 52, 52); 118428d7b3dSmrgF(dst_da1_subreg_nr, 52, 48); 119428d7b3dSmrgF(da16_writemask, 51, 48); /* Dst.ChanEn */ 120428d7b3dSmrg/** @} */ 121428d7b3dSmrg 122428d7b3dSmrgF(src1_vert_stride, 120, 117) 123428d7b3dSmrgF(src1_da1_width, 116, 114) 124428d7b3dSmrgF(src1_da16_swiz_w, 115, 114) 125428d7b3dSmrgF(src1_da16_swiz_z, 113, 112) 126428d7b3dSmrgF(src1_da1_hstride, 113, 112) 127428d7b3dSmrgF(src1_address_mode, 111, 111) 128428d7b3dSmrg/** Src1.SrcMod @{ */ 129428d7b3dSmrgF(src1_negate, 110, 110) 130428d7b3dSmrgF(src1_abs, 109, 109) 131428d7b3dSmrg/** @} */ 132428d7b3dSmrgF(src1_da16_subreg_nr, 100, 100) 133428d7b3dSmrgF(src1_da1_subreg_nr, 100, 96) 134428d7b3dSmrgF(src1_da16_swiz_y, 99, 98) 135428d7b3dSmrgF(src1_da16_swiz_x, 97, 96) 136428d7b3dSmrgF(src1_reg_type, 94, 91) 137428d7b3dSmrgF(src1_reg_file, 90, 89) 138428d7b3dSmrgF(src0_vert_stride, 88, 85) 139428d7b3dSmrgF(src0_da1_width, 84, 82) 140428d7b3dSmrgF(src0_da16_swiz_w, 83, 82) 141428d7b3dSmrgF(src0_da16_swiz_z, 81, 80) 142428d7b3dSmrgF(src0_da1_hstride, 81, 80) 143428d7b3dSmrgF(src0_address_mode, 79, 79) 144428d7b3dSmrg/** Src0.SrcMod @{ */ 145428d7b3dSmrgF(src0_negate, 78, 78) 146428d7b3dSmrgF(src0_abs, 77, 77) 147428d7b3dSmrg/** @} */ 148428d7b3dSmrgF(src0_da16_subreg_nr, 68, 68) 149428d7b3dSmrgF(src0_da1_subreg_nr, 68, 64) 150428d7b3dSmrgF(src0_da16_swiz_y, 67, 66) 151428d7b3dSmrgF(src0_da16_swiz_x, 65, 64) 152428d7b3dSmrgF(dst_address_mode, 63, 63) 153428d7b3dSmrgF(src0_reg_type, 46, 43) 154428d7b3dSmrgF(src0_reg_file, 42, 41) 155428d7b3dSmrgF(dst_reg_type, 40, 37) 156428d7b3dSmrgF(dst_reg_file, 36, 35) 157428d7b3dSmrgF(mask_control, 34, 34) 158428d7b3dSmrgF(flag_reg_nr, 33, 33) 159428d7b3dSmrgF(flag_subreg_nr, 32, 32) 160428d7b3dSmrgF(saturate, 31, 31) 161428d7b3dSmrgF(branch_control, 30, 30) 162428d7b3dSmrgF(debug_control, 30, 30) 163428d7b3dSmrgF(cmpt_control, 29, 29) 164428d7b3dSmrgF(acc_wr_control, 28, 28) 165428d7b3dSmrgF(cond_modifier, 27, 24) 166428d7b3dSmrgF(exec_size, 23, 21) 167428d7b3dSmrgF(pred_inv, 20, 20) 168428d7b3dSmrgF(pred_control, 19, 16) 169428d7b3dSmrgF(thread_control, 15, 14) 170428d7b3dSmrgF(qtr_control, 13, 12) 171428d7b3dSmrgF(nib_control, 11, 11) 172428d7b3dSmrgF(dep_control, 10, 9) 173428d7b3dSmrgF(access_mode, 8, 8) 174428d7b3dSmrg/* Bit 7 is Reserved (for future Opcode expansion) */ 175428d7b3dSmrgF(opcode, 6, 0) 176428d7b3dSmrg 177428d7b3dSmrg/** 178428d7b3dSmrg* Three-source instructions: 179428d7b3dSmrg* @{ 180428d7b3dSmrg*/ 181428d7b3dSmrgF(src2_3src_reg_nr, 125, 118) 182428d7b3dSmrgF(src2_3src_subreg_nr, 117, 115) 183428d7b3dSmrgF(src2_3src_swizzle, 114, 107) 184428d7b3dSmrgF(src2_3src_rep_ctrl, 106, 106) 185428d7b3dSmrgF(src1_3src_reg_nr, 104, 97) 186428d7b3dSmrgF(src1_3src_subreg_hi, 96, 96) 187428d7b3dSmrgF(src1_3src_subreg_lo, 95, 94) 188428d7b3dSmrgF(src1_3src_swizzle, 93, 86) 189428d7b3dSmrgF(src1_3src_rep_ctrl, 85, 85) 190428d7b3dSmrgF(src0_3src_reg_nr, 83, 76) 191428d7b3dSmrgF(src0_3src_subreg_nr, 75, 73) 192428d7b3dSmrgF(src0_3src_swizzle, 72, 65) 193428d7b3dSmrgF(src0_3src_rep_ctrl, 64, 64) 194428d7b3dSmrgF(dst_3src_reg_nr, 63, 56) 195428d7b3dSmrgF(dst_3src_subreg_nr, 55, 53) 196428d7b3dSmrgF(dst_3src_writemask, 52, 49) 197428d7b3dSmrgF(dst_3src_type, 48, 46) 198428d7b3dSmrgF(src_3src_type, 45, 43) 199428d7b3dSmrgF(src2_3src_negate, 42, 42) 200428d7b3dSmrgF(src2_3src_abs, 41, 41) 201428d7b3dSmrgF(src1_3src_negate, 40, 40) 202428d7b3dSmrgF(src1_3src_abs, 39, 39) 203428d7b3dSmrgF(src0_3src_negate, 38, 38) 204428d7b3dSmrgF(src0_3src_abs, 37, 37) 205428d7b3dSmrg/** @} */ 206428d7b3dSmrg 207428d7b3dSmrg/** 208428d7b3dSmrg* Fields for SEND messages: 209428d7b3dSmrg* @{ 210428d7b3dSmrg*/ 211428d7b3dSmrgF(eot, 127, 127) 212428d7b3dSmrgF(mlen, 124, 121) 213428d7b3dSmrgF(rlen, 120, 116) 214428d7b3dSmrgF(header_present, 115, 115) 215428d7b3dSmrgF(function_control, 114, 96) 216428d7b3dSmrgF(sfid, 27, 24) 217428d7b3dSmrgF(math_function, 27, 24) 218428d7b3dSmrg/** @} */ 219428d7b3dSmrg 220428d7b3dSmrg/** 221428d7b3dSmrg* URB message function control bits: 222428d7b3dSmrg* @{ 223428d7b3dSmrg*/ 224428d7b3dSmrgF(urb_per_slot_offset, 113, 113) 225428d7b3dSmrgF(urb_interleave, 111, 111) 226428d7b3dSmrgF(urb_global_offset, 110, 100) 227428d7b3dSmrgF(urb_opcode, 99, 96) 228428d7b3dSmrg/** @} */ 229428d7b3dSmrg 230428d7b3dSmrg/** 231428d7b3dSmrg* Sampler message function control bits: 232428d7b3dSmrg* @{ 233428d7b3dSmrg*/ 234428d7b3dSmrgF(sampler_simd_mode, 114, 113) 235428d7b3dSmrgF(sampler_msg_type, 112, 108) 236428d7b3dSmrgF(sampler, 107, 104) 237428d7b3dSmrgF(binding_table_index, 103, 96) 238428d7b3dSmrg/** @} */ 239428d7b3dSmrg 240428d7b3dSmrg/** 241428d7b3dSmrg * Data port message function control bits: 242428d7b3dSmrg * @ { 243428d7b3dSmrg */ 244428d7b3dSmrgF(dp_category, 114, 114) 245428d7b3dSmrgF(dp_message_type, 113, 110) 246428d7b3dSmrgF(dp_message_control, 109, 104) 247428d7b3dSmrgF(dp_binding_table_index, 103, 96) 248428d7b3dSmrg/** @} */ 249428d7b3dSmrg 250428d7b3dSmrg/** 251428d7b3dSmrg * Thread Spawn message function control bits: 252428d7b3dSmrg * @ { 253428d7b3dSmrg */ 254428d7b3dSmrgF(ts_resource_select, 100, 100) 255428d7b3dSmrgF(ts_request_type, 97, 97) 256428d7b3dSmrgF(ts_opcode, 96, 96) 257428d7b3dSmrg/** @} */ 258428d7b3dSmrg 259428d7b3dSmrg/** 260428d7b3dSmrg * Video Motion Estimation message function control bits: 261428d7b3dSmrg * @ { 262428d7b3dSmrg */ 263428d7b3dSmrgF(vme_message_type, 110, 109) 264428d7b3dSmrgF(vme_binding_table_index, 103, 96) 265428d7b3dSmrg/** @} */ 266428d7b3dSmrg 267428d7b3dSmrg/** 268428d7b3dSmrg * Check & Refinement Engine message function control bits: 269428d7b3dSmrg * @ { 270428d7b3dSmrg */ 271428d7b3dSmrgF(cre_message_type, 110, 109) 272428d7b3dSmrgF(cre_binding_table_index, 103, 96) 273428d7b3dSmrg/** @} */ 274428d7b3dSmrg 275428d7b3dSmrg#undef F 276428d7b3dSmrg 277428d7b3dSmrg/** 278428d7b3dSmrg* Flow control instruction bits: 279428d7b3dSmrg* @{ 280428d7b3dSmrg*/ 281428d7b3dSmrgstatic inline unsigned __gen8_uip(struct gen8_instruction *insn) 282428d7b3dSmrg{ 283428d7b3dSmrg return insn->data[2]; 284428d7b3dSmrg} 285428d7b3dSmrg 286428d7b3dSmrgstatic inline void __gen8_set_uip(struct gen8_instruction *insn, unsigned uip) 287428d7b3dSmrg{ 288428d7b3dSmrg insn->data[2] = uip; 289428d7b3dSmrg} 290428d7b3dSmrg 291428d7b3dSmrgstatic inline unsigned __gen8_jip(struct gen8_instruction *insn) 292428d7b3dSmrg{ 293428d7b3dSmrg return insn->data[3]; 294428d7b3dSmrg} 295428d7b3dSmrg 296428d7b3dSmrgstatic inline void __gen8_set_jip(struct gen8_instruction *insn, unsigned jip) 297428d7b3dSmrg{ 298428d7b3dSmrg insn->data[3] = jip; 299428d7b3dSmrg} 300428d7b3dSmrg/** @} */ 301428d7b3dSmrg 302428d7b3dSmrgstatic inline int __gen8_src1_imm_d(struct gen8_instruction *insn) 303428d7b3dSmrg{ 304428d7b3dSmrg return insn->data[3]; 305428d7b3dSmrg} 306428d7b3dSmrg 307428d7b3dSmrgstatic inline unsigned __gen8_src1_imm_ud(struct gen8_instruction *insn) 308428d7b3dSmrg{ 309428d7b3dSmrg return insn->data[3]; 310428d7b3dSmrg} 311428d7b3dSmrg 312428d7b3dSmrgstatic inline float __gen8_src1_imm_f(struct gen8_instruction *insn) 313428d7b3dSmrg{ 314428d7b3dSmrg union { 315428d7b3dSmrg uint32_t u; 316428d7b3dSmrg float f; 317428d7b3dSmrg } ft = { insn->data[3] }; 318428d7b3dSmrg return ft.f; 319428d7b3dSmrg} 320428d7b3dSmrg 321428d7b3dSmrgstatic void 322428d7b3dSmrg__gen8_set_dst(struct brw_compile *p, 323428d7b3dSmrg struct gen8_instruction *inst, 324428d7b3dSmrg struct brw_reg reg) 325428d7b3dSmrg{ 326428d7b3dSmrg /* MRFs haven't existed since Gen7, so we better not be using them. */ 327428d7b3dSmrg if (reg.file == BRW_MESSAGE_REGISTER_FILE) { 328428d7b3dSmrg reg.file = BRW_GENERAL_REGISTER_FILE; 329428d7b3dSmrg reg.nr += MRF_HACK_START; 330428d7b3dSmrg } 331428d7b3dSmrg 332428d7b3dSmrg assert(reg.file != BRW_MESSAGE_REGISTER_FILE); 333428d7b3dSmrg 334428d7b3dSmrg if (reg.file == BRW_GENERAL_REGISTER_FILE) 335428d7b3dSmrg assert(reg.nr < BRW_MAX_GRF); 336428d7b3dSmrg 337428d7b3dSmrg __gen8_set_dst_reg_file(inst, reg.file); 338428d7b3dSmrg __gen8_set_dst_reg_type(inst, reg.type); 339428d7b3dSmrg 340428d7b3dSmrg assert(reg.address_mode == BRW_ADDRESS_DIRECT); 341428d7b3dSmrg 342428d7b3dSmrg __gen8_set_dst_da_reg_nr(inst, reg.nr); 343428d7b3dSmrg 344428d7b3dSmrg if (__gen8_access_mode(inst) == BRW_ALIGN_1) { 345428d7b3dSmrg /* Set Dst.SubRegNum[4:0] */ 346428d7b3dSmrg __gen8_set_dst_da1_subreg_nr(inst, reg.subnr); 347428d7b3dSmrg 348428d7b3dSmrg /* Set Dst.HorzStride */ 349428d7b3dSmrg if (reg.hstride == BRW_HORIZONTAL_STRIDE_0) 350428d7b3dSmrg reg.hstride = BRW_HORIZONTAL_STRIDE_1; 351428d7b3dSmrg __gen8_set_dst_da1_hstride(inst, reg.hstride); 352428d7b3dSmrg } else { 353428d7b3dSmrg /* Align16 SubRegNum only has a single bit (bit 4; bits 3:0 MBZ). */ 354428d7b3dSmrg assert(reg.subnr == 0 || reg.subnr == 16); 355428d7b3dSmrg __gen8_set_dst_da16_subreg_nr(inst, reg.subnr >> 4); 356428d7b3dSmrg __gen8_set_da16_writemask(inst, reg.dw1.bits.writemask); 357428d7b3dSmrg } 358428d7b3dSmrg 359428d7b3dSmrg#if 1 360428d7b3dSmrg if (reg.width == BRW_WIDTH_8 && p->compressed) 361428d7b3dSmrg __gen8_set_exec_size(inst, BRW_EXECUTE_16); 362428d7b3dSmrg else 363428d7b3dSmrg __gen8_set_exec_size(inst, reg.width); 364428d7b3dSmrg#else 365428d7b3dSmrg if (reg.width < BRW_EXECUTE_8) 366428d7b3dSmrg __gen8_set_exec_size(inst, reg.width); 367428d7b3dSmrg#endif 368428d7b3dSmrg} 369428d7b3dSmrg 370428d7b3dSmrgstatic void 371428d7b3dSmrg__gen8_validate_reg(struct gen8_instruction *inst, struct brw_reg reg) 372428d7b3dSmrg{ 373428d7b3dSmrg int hstride_for_reg[] = {0, 1, 2, 4}; 374428d7b3dSmrg int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256}; 375428d7b3dSmrg int width_for_reg[] = {1, 2, 4, 8, 16}; 376428d7b3dSmrg int execsize_for_reg[] = {1, 2, 4, 8, 16}; 377428d7b3dSmrg int width, hstride, vstride, execsize; 378428d7b3dSmrg 379428d7b3dSmrg if (reg.file == BRW_IMMEDIATE_VALUE) { 380428d7b3dSmrg /* TODO: check immediate vectors */ 381428d7b3dSmrg return; 382428d7b3dSmrg } 383428d7b3dSmrg 384428d7b3dSmrg if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE) 385428d7b3dSmrg return; 386428d7b3dSmrg 387428d7b3dSmrg assert(reg.hstride >= 0 && reg.hstride < ARRAY_SIZE(hstride_for_reg)); 388428d7b3dSmrg hstride = hstride_for_reg[reg.hstride]; 389428d7b3dSmrg 390428d7b3dSmrg if (reg.vstride == 0xf) { 391428d7b3dSmrg vstride = -1; 392428d7b3dSmrg } else { 393428d7b3dSmrg assert(reg.vstride >= 0 && reg.vstride < ARRAY_SIZE(vstride_for_reg)); 394428d7b3dSmrg vstride = vstride_for_reg[reg.vstride]; 395428d7b3dSmrg } 396428d7b3dSmrg 397428d7b3dSmrg assert(reg.width >= 0 && reg.width < ARRAY_SIZE(width_for_reg)); 398428d7b3dSmrg width = width_for_reg[reg.width]; 399428d7b3dSmrg 400428d7b3dSmrg assert(__gen8_exec_size(inst) >= 0 && 401428d7b3dSmrg __gen8_exec_size(inst) < ARRAY_SIZE(execsize_for_reg)); 402428d7b3dSmrg execsize = execsize_for_reg[__gen8_exec_size(inst)]; 403428d7b3dSmrg 404428d7b3dSmrg /* Restrictions from 3.3.10: Register Region Restrictions. */ 405428d7b3dSmrg /* 3. */ 406428d7b3dSmrg assert(execsize >= width); 407428d7b3dSmrg 408428d7b3dSmrg /* 4. */ 409428d7b3dSmrg if (execsize == width && hstride != 0) { 410428d7b3dSmrg assert(vstride == -1 || vstride == width * hstride); 411428d7b3dSmrg } 412428d7b3dSmrg 413428d7b3dSmrg /* 5. */ 414428d7b3dSmrg if (execsize == width && hstride == 0) { 415428d7b3dSmrg /* no restriction on vstride. */ 416428d7b3dSmrg } 417428d7b3dSmrg 418428d7b3dSmrg /* 6. */ 419428d7b3dSmrg if (width == 1) { 420428d7b3dSmrg assert(hstride == 0); 421428d7b3dSmrg } 422428d7b3dSmrg 423428d7b3dSmrg /* 7. */ 424428d7b3dSmrg if (execsize == 1 && width == 1) { 425428d7b3dSmrg assert(hstride == 0); 426428d7b3dSmrg assert(vstride == 0); 427428d7b3dSmrg } 428428d7b3dSmrg 429428d7b3dSmrg /* 8. */ 430428d7b3dSmrg if (vstride == 0 && hstride == 0) { 431428d7b3dSmrg assert(width == 1); 432428d7b3dSmrg } 433428d7b3dSmrg 434428d7b3dSmrg /* 10. Check destination issues. */ 435428d7b3dSmrg} 436428d7b3dSmrg 437428d7b3dSmrgstatic void 438428d7b3dSmrg__gen8_set_src0(struct gen8_instruction *inst, struct brw_reg reg) 439428d7b3dSmrg{ 440428d7b3dSmrg /* MRFs haven't existed since Gen7, so we better not be using them. */ 441428d7b3dSmrg if (reg.file == BRW_MESSAGE_REGISTER_FILE) { 442428d7b3dSmrg reg.file = BRW_GENERAL_REGISTER_FILE; 443428d7b3dSmrg reg.nr += MRF_HACK_START; 444428d7b3dSmrg } 445428d7b3dSmrg 446428d7b3dSmrg if (reg.file == BRW_GENERAL_REGISTER_FILE) 447428d7b3dSmrg assert(reg.nr < BRW_MAX_GRF); 448428d7b3dSmrg 449428d7b3dSmrg __gen8_validate_reg(inst, reg); 450428d7b3dSmrg 451428d7b3dSmrg __gen8_set_src0_reg_file(inst, reg.file); 452428d7b3dSmrg __gen8_set_src0_reg_type(inst, reg.type); 453428d7b3dSmrg __gen8_set_src0_abs(inst, reg.abs); 454428d7b3dSmrg __gen8_set_src0_negate(inst, reg.negate); 455428d7b3dSmrg 456428d7b3dSmrg assert(reg.address_mode == BRW_ADDRESS_DIRECT); 457428d7b3dSmrg 458428d7b3dSmrg if (reg.file == BRW_IMMEDIATE_VALUE) { 459428d7b3dSmrg inst->data[3] = reg.dw1.ud; 460428d7b3dSmrg 461428d7b3dSmrg /* Required to set some fields in src1 as well: */ 462428d7b3dSmrg __gen8_set_src1_reg_file(inst, 0); /* arf */ 463428d7b3dSmrg __gen8_set_src1_reg_type(inst, reg.type); 464428d7b3dSmrg } else { 465428d7b3dSmrg __gen8_set_src0_da_reg_nr(inst, reg.nr); 466428d7b3dSmrg 467428d7b3dSmrg if (__gen8_access_mode(inst) == BRW_ALIGN_1) { 468428d7b3dSmrg /* Set Src0.SubRegNum[4:0] */ 469428d7b3dSmrg __gen8_set_src0_da1_subreg_nr(inst, reg.subnr); 470428d7b3dSmrg 471428d7b3dSmrg if (reg.width == BRW_WIDTH_1 && 472428d7b3dSmrg __gen8_exec_size(inst) == BRW_EXECUTE_1) { 473428d7b3dSmrg __gen8_set_src0_da1_hstride(inst, BRW_HORIZONTAL_STRIDE_0); 474428d7b3dSmrg __gen8_set_src0_vert_stride(inst, BRW_VERTICAL_STRIDE_0); 475428d7b3dSmrg } else { 476428d7b3dSmrg __gen8_set_src0_da1_hstride(inst, reg.hstride); 477428d7b3dSmrg __gen8_set_src0_vert_stride(inst, reg.vstride); 478428d7b3dSmrg } 479428d7b3dSmrg __gen8_set_src0_da1_width(inst, reg.width); 480428d7b3dSmrg } else { 481428d7b3dSmrg /* Align16 SubRegNum only has a single bit (bit 4; bits 3:0 MBZ). */ 482428d7b3dSmrg assert(reg.subnr == 0 || reg.subnr == 16); 483428d7b3dSmrg __gen8_set_src0_da16_subreg_nr(inst, reg.subnr >> 4); 484428d7b3dSmrg 485428d7b3dSmrg __gen8_set_src0_da16_swiz_x(inst, 486428d7b3dSmrg BRW_GET_SWZ(reg.dw1.bits.swizzle, 487428d7b3dSmrg BRW_CHANNEL_X)); 488428d7b3dSmrg __gen8_set_src0_da16_swiz_y(inst, 489428d7b3dSmrg BRW_GET_SWZ(reg.dw1.bits.swizzle, 490428d7b3dSmrg BRW_CHANNEL_Y)); 491428d7b3dSmrg __gen8_set_src0_da16_swiz_z(inst, 492428d7b3dSmrg BRW_GET_SWZ(reg.dw1.bits.swizzle, 493428d7b3dSmrg BRW_CHANNEL_Z)); 494428d7b3dSmrg __gen8_set_src0_da16_swiz_w(inst, 495428d7b3dSmrg BRW_GET_SWZ(reg.dw1.bits.swizzle, 496428d7b3dSmrg BRW_CHANNEL_W)); 497428d7b3dSmrg 498428d7b3dSmrg /* This is an oddity of the fact that we're using the same 499428d7b3dSmrg * descriptions for registers in both Align16 and Align1 modes. 500428d7b3dSmrg */ 501428d7b3dSmrg if (reg.vstride == BRW_VERTICAL_STRIDE_8) 502428d7b3dSmrg __gen8_set_src0_vert_stride(inst, BRW_VERTICAL_STRIDE_4); 503428d7b3dSmrg else 504428d7b3dSmrg __gen8_set_src0_vert_stride(inst, reg.vstride); 505428d7b3dSmrg } 506428d7b3dSmrg } 507428d7b3dSmrg} 508428d7b3dSmrg 509428d7b3dSmrgstatic void 510428d7b3dSmrg__gen8_set_src1(struct gen8_instruction *inst, struct brw_reg reg) 511428d7b3dSmrg{ 512428d7b3dSmrg /* MRFs haven't existed since Gen7, so we better not be using them. */ 513428d7b3dSmrg if (reg.file == BRW_MESSAGE_REGISTER_FILE) { 514428d7b3dSmrg reg.file = BRW_GENERAL_REGISTER_FILE; 515428d7b3dSmrg reg.nr += MRF_HACK_START; 516428d7b3dSmrg } 517428d7b3dSmrg 518428d7b3dSmrg if (reg.file == BRW_GENERAL_REGISTER_FILE) 519428d7b3dSmrg assert(reg.nr < BRW_MAX_GRF); 520428d7b3dSmrg 521428d7b3dSmrg __gen8_validate_reg(inst, reg); 522428d7b3dSmrg 523428d7b3dSmrg __gen8_set_src1_reg_file(inst, reg.file); 524428d7b3dSmrg __gen8_set_src1_reg_type(inst, reg.type); 525428d7b3dSmrg __gen8_set_src1_abs(inst, reg.abs); 526428d7b3dSmrg __gen8_set_src1_negate(inst, reg.negate); 527428d7b3dSmrg 528428d7b3dSmrg /* Only src1 can be an immediate in two-argument instructions. */ 529428d7b3dSmrg assert(__gen8_src0_reg_file(inst) != BRW_IMMEDIATE_VALUE); 530428d7b3dSmrg 531428d7b3dSmrg assert(reg.address_mode == BRW_ADDRESS_DIRECT); 532428d7b3dSmrg 533428d7b3dSmrg if (reg.file == BRW_IMMEDIATE_VALUE) { 534428d7b3dSmrg inst->data[3] = reg.dw1.ud; 535428d7b3dSmrg } else { 536428d7b3dSmrg __gen8_set_src1_da_reg_nr(inst, reg.nr); 537428d7b3dSmrg 538428d7b3dSmrg if (__gen8_access_mode(inst) == BRW_ALIGN_1) { 539428d7b3dSmrg /* Set Src0.SubRegNum[4:0] */ 540428d7b3dSmrg __gen8_set_src1_da1_subreg_nr(inst, reg.subnr); 541428d7b3dSmrg 542428d7b3dSmrg if (reg.width == BRW_WIDTH_1 && 543428d7b3dSmrg __gen8_exec_size(inst) == BRW_EXECUTE_1) { 544428d7b3dSmrg __gen8_set_src1_da1_hstride(inst, BRW_HORIZONTAL_STRIDE_0); 545428d7b3dSmrg __gen8_set_src1_vert_stride(inst, BRW_VERTICAL_STRIDE_0); 546428d7b3dSmrg } else { 547428d7b3dSmrg __gen8_set_src1_da1_hstride(inst, reg.hstride); 548428d7b3dSmrg __gen8_set_src1_vert_stride(inst, reg.vstride); 549428d7b3dSmrg } 550428d7b3dSmrg __gen8_set_src1_da1_width(inst, reg.width); 551428d7b3dSmrg } else { 552428d7b3dSmrg /* Align16 SubRegNum only has a single bit (bit 4; bits 3:0 MBZ). */ 553428d7b3dSmrg assert(reg.subnr == 0 || reg.subnr == 16); 554428d7b3dSmrg __gen8_set_src1_da16_subreg_nr(inst, reg.subnr >> 4); 555428d7b3dSmrg 556428d7b3dSmrg __gen8_set_src1_da16_swiz_x(inst, 557428d7b3dSmrg BRW_GET_SWZ(reg.dw1.bits.swizzle, 558428d7b3dSmrg BRW_CHANNEL_X)); 559428d7b3dSmrg __gen8_set_src1_da16_swiz_y(inst, 560428d7b3dSmrg BRW_GET_SWZ(reg.dw1.bits.swizzle, 561428d7b3dSmrg BRW_CHANNEL_Y)); 562428d7b3dSmrg __gen8_set_src1_da16_swiz_z(inst, 563428d7b3dSmrg BRW_GET_SWZ(reg.dw1.bits.swizzle, 564428d7b3dSmrg BRW_CHANNEL_Z)); 565428d7b3dSmrg __gen8_set_src1_da16_swiz_w(inst, 566428d7b3dSmrg BRW_GET_SWZ(reg.dw1.bits.swizzle, 567428d7b3dSmrg BRW_CHANNEL_W)); 568428d7b3dSmrg 569428d7b3dSmrg /* This is an oddity of the fact that we're using the same 570428d7b3dSmrg * descriptions for registers in both Align16 and Align1 modes. 571428d7b3dSmrg */ 572428d7b3dSmrg if (reg.vstride == BRW_VERTICAL_STRIDE_8) 573428d7b3dSmrg __gen8_set_src1_vert_stride(inst, BRW_VERTICAL_STRIDE_4); 574428d7b3dSmrg else 575428d7b3dSmrg __gen8_set_src1_vert_stride(inst, reg.vstride); 576428d7b3dSmrg } 577428d7b3dSmrg } 578428d7b3dSmrg} 579428d7b3dSmrg 580428d7b3dSmrg/** 581428d7b3dSmrg * Set the Message Descriptor and Extended Message Descriptor fields 582428d7b3dSmrg * for SEND messages. 583428d7b3dSmrg * 584428d7b3dSmrg * \note This zeroes out the Function Control bits, so it must be called 585428d7b3dSmrg * \b before filling out any message-specific data. Callers can 586428d7b3dSmrg * choose not to fill in irrelevant bits; they will be zero. 587428d7b3dSmrg */ 588428d7b3dSmrgstatic void 589428d7b3dSmrg__gen8_set_message_descriptor(struct gen8_instruction *inst, 590428d7b3dSmrg enum brw_message_target sfid, 591428d7b3dSmrg unsigned msg_length, 592428d7b3dSmrg unsigned response_length, 593428d7b3dSmrg bool header_present, 594428d7b3dSmrg bool end_of_thread) 595428d7b3dSmrg{ 596428d7b3dSmrg __gen8_set_src1(inst, brw_imm_d(0)); 597428d7b3dSmrg 598428d7b3dSmrg __gen8_set_sfid(inst, sfid); 599428d7b3dSmrg __gen8_set_mlen(inst, msg_length); 600428d7b3dSmrg __gen8_set_rlen(inst, response_length); 601428d7b3dSmrg __gen8_set_header_present(inst, header_present); 602428d7b3dSmrg __gen8_set_eot(inst, end_of_thread); 603428d7b3dSmrg} 604428d7b3dSmrg 605428d7b3dSmrg#if 0 606428d7b3dSmrgstatic void 607428d7b3dSmrg__gen8_set_urb_message(struct gen8_instruction *inst, 608428d7b3dSmrg unsigned opcode, 609428d7b3dSmrg unsigned msg_length, 610428d7b3dSmrg unsigned response_length, 611428d7b3dSmrg bool end_of_thread, 612428d7b3dSmrg unsigned offset, 613428d7b3dSmrg bool interleave) 614428d7b3dSmrg{ 615428d7b3dSmrg __gen8_set_message_descriptor(inst, BRW_SFID_URB, msg_length, response_length, 616428d7b3dSmrg true, end_of_thread); 617428d7b3dSmrg __gen8_set_src0(inst, brw_vec8_grf(MRF_HACK_START + 1, 0)); 618428d7b3dSmrg __gen8_set_urb_opcode(inst, 0); /* URB_WRITE_HWORD */ 619428d7b3dSmrg __gen8_set_urb_global_offset(inst, offset); 620428d7b3dSmrg __gen8_set_urb_interleave(inst, interleave); 621428d7b3dSmrg /* per_slot_offset = 0 makes it ignore offsets in message header */ 622428d7b3dSmrg __gen8_set_urb_per_slot_offset(inst, 0); 623428d7b3dSmrg} 624428d7b3dSmrg#endif 625428d7b3dSmrg 626428d7b3dSmrgstatic void 627428d7b3dSmrg__gen8_set_sampler_message(struct gen8_instruction *inst, 628428d7b3dSmrg unsigned binding_table_index, 629428d7b3dSmrg unsigned sampler, 630428d7b3dSmrg unsigned msg_type, 631428d7b3dSmrg unsigned response_length, 632428d7b3dSmrg unsigned msg_length, 633428d7b3dSmrg bool header_present, 634428d7b3dSmrg unsigned simd_mode) 635428d7b3dSmrg{ 636428d7b3dSmrg __gen8_set_message_descriptor(inst, BRW_SFID_SAMPLER, msg_length, 637428d7b3dSmrg response_length, header_present, false); 638428d7b3dSmrg 639428d7b3dSmrg __gen8_set_binding_table_index(inst, binding_table_index); 640428d7b3dSmrg __gen8_set_sampler(inst, sampler); 641428d7b3dSmrg __gen8_set_sampler_msg_type(inst, msg_type); 642428d7b3dSmrg __gen8_set_sampler_simd_mode(inst, simd_mode); 643428d7b3dSmrg} 644428d7b3dSmrg 645428d7b3dSmrgstatic void 646428d7b3dSmrg__gen8_set_dp_message(struct gen8_instruction *inst, 647428d7b3dSmrg enum brw_message_target sfid, 648428d7b3dSmrg unsigned binding_table_index, 649428d7b3dSmrg unsigned msg_type, 650428d7b3dSmrg unsigned msg_control, 651428d7b3dSmrg unsigned mlen, 652428d7b3dSmrg unsigned rlen, 653428d7b3dSmrg bool header_present, 654428d7b3dSmrg bool end_of_thread) 655428d7b3dSmrg{ 656428d7b3dSmrg /* Binding table index is from 0..255 */ 657428d7b3dSmrg assert((binding_table_index & 0xff) == binding_table_index); 658428d7b3dSmrg 659428d7b3dSmrg /* Message Type is only 5 bits */ 660428d7b3dSmrg assert((msg_type & 0x1f) == msg_type); 661428d7b3dSmrg 662428d7b3dSmrg /* Message Control is only 6 bits */ 663428d7b3dSmrg assert((msg_control & 0x3f) == msg_control); 664428d7b3dSmrg 665428d7b3dSmrg __gen8_set_message_descriptor(inst, sfid, mlen, rlen, header_present, 666428d7b3dSmrg end_of_thread); 667428d7b3dSmrg __gen8_set_function_control(inst, 668428d7b3dSmrg binding_table_index | msg_type << 14 | msg_control << 8); 669428d7b3dSmrg} 670428d7b3dSmrg 671428d7b3dSmrgstatic inline struct gen8_instruction * 672428d7b3dSmrggen8_next_insn(struct brw_compile *p, int opcode) 673428d7b3dSmrg{ 674428d7b3dSmrg struct gen8_instruction *insn; 675428d7b3dSmrg 676428d7b3dSmrg assert(p->nr_insn + 1 < BRW_EU_MAX_INSN); 677428d7b3dSmrg 678428d7b3dSmrg insn = memcpy(&p->store[p->nr_insn++], p->current, sizeof(*insn)); 679428d7b3dSmrg __gen8_set_opcode(insn, opcode); 680428d7b3dSmrg 681428d7b3dSmrg return insn; 682428d7b3dSmrg} 683428d7b3dSmrg 684428d7b3dSmrgstatic void gen8_math(struct brw_compile *p, 685428d7b3dSmrg struct brw_reg dst, 686428d7b3dSmrg unsigned function, 687428d7b3dSmrg unsigned saturate, 688428d7b3dSmrg unsigned msg_reg_nr, 689428d7b3dSmrg struct brw_reg src, 690428d7b3dSmrg unsigned data_type, 691428d7b3dSmrg unsigned precision) 692428d7b3dSmrg{ 693428d7b3dSmrg struct gen8_instruction *insn = gen8_next_insn(p, BRW_OPCODE_MATH); 694428d7b3dSmrg 695428d7b3dSmrg assert(dst.file == BRW_GENERAL_REGISTER_FILE); 696428d7b3dSmrg assert(src.file == BRW_GENERAL_REGISTER_FILE); 697428d7b3dSmrg 698428d7b3dSmrg assert(dst.hstride == BRW_HORIZONTAL_STRIDE_1); 699428d7b3dSmrg assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); 700428d7b3dSmrg 701428d7b3dSmrg /* Source modifiers are ignored for extended math instructions. */ 702428d7b3dSmrg assert(!src.negate); 703428d7b3dSmrg assert(!src.abs); 704428d7b3dSmrg 705428d7b3dSmrg if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT && 706428d7b3dSmrg function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { 707428d7b3dSmrg assert(src.type == BRW_REGISTER_TYPE_F); 708428d7b3dSmrg } 709428d7b3dSmrg 710428d7b3dSmrg /* Math is the same ISA format as other opcodes, except that CondModifier 711428d7b3dSmrg * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 712428d7b3dSmrg */ 713428d7b3dSmrg __gen8_set_cond_modifier(insn, function); 714428d7b3dSmrg __gen8_set_saturate(insn, saturate); 715428d7b3dSmrg 716428d7b3dSmrg __gen8_set_dst(p, insn, dst); 717428d7b3dSmrg __gen8_set_src0(insn, src); 718428d7b3dSmrg __gen8_set_src1(insn, brw_null_reg()); 719428d7b3dSmrg} 720428d7b3dSmrg 721428d7b3dSmrgstatic inline void gen8_math_invert(struct brw_compile *p, 722428d7b3dSmrg struct brw_reg dst, 723428d7b3dSmrg struct brw_reg src) 724428d7b3dSmrg{ 725428d7b3dSmrg gen8_math(p, 726428d7b3dSmrg dst, 727428d7b3dSmrg BRW_MATH_FUNCTION_INV, 728428d7b3dSmrg BRW_MATH_SATURATE_NONE, 729428d7b3dSmrg 0, 730428d7b3dSmrg src, 731428d7b3dSmrg BRW_MATH_PRECISION_FULL, 732428d7b3dSmrg BRW_MATH_DATA_VECTOR); 733428d7b3dSmrg 734428d7b3dSmrg} 735428d7b3dSmrg 736428d7b3dSmrg/* Helpers for regular instructions: */ 737428d7b3dSmrgstatic inline struct gen8_instruction *gen8_alu1(struct brw_compile *p, 738428d7b3dSmrg unsigned opcode, 739428d7b3dSmrg struct brw_reg dst, 740428d7b3dSmrg struct brw_reg src) 741428d7b3dSmrg{ 742428d7b3dSmrg struct gen8_instruction *insn = gen8_next_insn(p, opcode); 743428d7b3dSmrg __gen8_set_dst(p, insn, dst); 744428d7b3dSmrg __gen8_set_src0(insn, src); 745428d7b3dSmrg return insn; 746428d7b3dSmrg} 747428d7b3dSmrg 748428d7b3dSmrgstatic inline struct gen8_instruction *gen8_alu2(struct brw_compile *p, 749428d7b3dSmrg unsigned opcode, 750428d7b3dSmrg struct brw_reg dst, 751428d7b3dSmrg struct brw_reg src0, 752428d7b3dSmrg struct brw_reg src1) 753428d7b3dSmrg{ 754428d7b3dSmrg struct gen8_instruction *insn = gen8_next_insn(p, opcode); 755428d7b3dSmrg __gen8_set_dst(p, insn, dst); 756428d7b3dSmrg __gen8_set_src0(insn, src0); 757428d7b3dSmrg __gen8_set_src1(insn, src1); 758428d7b3dSmrg return insn; 759428d7b3dSmrg} 760428d7b3dSmrg 761428d7b3dSmrg#define ALU1(OP) \ 762428d7b3dSmrgstatic inline struct gen8_instruction *gen8_##OP(struct brw_compile *p, \ 763428d7b3dSmrg struct brw_reg dst, \ 764428d7b3dSmrg struct brw_reg src0) \ 765428d7b3dSmrg{ \ 766428d7b3dSmrg return gen8_alu1(p, BRW_OPCODE_##OP, dst, src0); \ 767428d7b3dSmrg} 768428d7b3dSmrg 769428d7b3dSmrg#define ALU2(OP) \ 770428d7b3dSmrgstatic inline struct gen8_instruction *gen8_##OP(struct brw_compile *p, \ 771428d7b3dSmrg struct brw_reg dst, \ 772428d7b3dSmrg struct brw_reg src0, \ 773428d7b3dSmrg struct brw_reg src1) \ 774428d7b3dSmrg{ \ 775428d7b3dSmrg return gen8_alu2(p, BRW_OPCODE_##OP, dst, src0, src1); \ 776428d7b3dSmrg} 777428d7b3dSmrg 778428d7b3dSmrgstatic inline struct gen8_instruction *gen8_ADD(struct brw_compile *p, 779428d7b3dSmrg struct brw_reg dst, 780428d7b3dSmrg struct brw_reg src0, 781428d7b3dSmrg struct brw_reg src1) 782428d7b3dSmrg{ 783428d7b3dSmrg /* 6.2.2: add */ 784428d7b3dSmrg if (src0.type == BRW_REGISTER_TYPE_F || 785428d7b3dSmrg (src0.file == BRW_IMMEDIATE_VALUE && 786428d7b3dSmrg src0.type == BRW_REGISTER_TYPE_VF)) { 787428d7b3dSmrg assert(src1.type != BRW_REGISTER_TYPE_UD); 788428d7b3dSmrg assert(src1.type != BRW_REGISTER_TYPE_D); 789428d7b3dSmrg } 790428d7b3dSmrg 791428d7b3dSmrg if (src1.type == BRW_REGISTER_TYPE_F || 792428d7b3dSmrg (src1.file == BRW_IMMEDIATE_VALUE && 793428d7b3dSmrg src1.type == BRW_REGISTER_TYPE_VF)) { 794428d7b3dSmrg assert(src0.type != BRW_REGISTER_TYPE_UD); 795428d7b3dSmrg assert(src0.type != BRW_REGISTER_TYPE_D); 796428d7b3dSmrg } 797428d7b3dSmrg 798428d7b3dSmrg return gen8_alu2(p, BRW_OPCODE_ADD, dst, src0, src1); 799428d7b3dSmrg} 800428d7b3dSmrg 801428d7b3dSmrgstatic inline struct gen8_instruction *gen8_MUL(struct brw_compile *p, 802428d7b3dSmrg struct brw_reg dst, 803428d7b3dSmrg struct brw_reg src0, 804428d7b3dSmrg struct brw_reg src1) 805428d7b3dSmrg{ 806428d7b3dSmrg /* 6.32.38: mul */ 807428d7b3dSmrg if (src0.type == BRW_REGISTER_TYPE_D || 808428d7b3dSmrg src0.type == BRW_REGISTER_TYPE_UD || 809428d7b3dSmrg src1.type == BRW_REGISTER_TYPE_D || 810428d7b3dSmrg src1.type == BRW_REGISTER_TYPE_UD) { 811428d7b3dSmrg assert(dst.type != BRW_REGISTER_TYPE_F); 812428d7b3dSmrg } 813428d7b3dSmrg 814428d7b3dSmrg if (src0.type == BRW_REGISTER_TYPE_F || 815428d7b3dSmrg (src0.file == BRW_IMMEDIATE_VALUE && 816428d7b3dSmrg src0.type == BRW_REGISTER_TYPE_VF)) { 817428d7b3dSmrg assert(src1.type != BRW_REGISTER_TYPE_UD); 818428d7b3dSmrg assert(src1.type != BRW_REGISTER_TYPE_D); 819428d7b3dSmrg } 820428d7b3dSmrg 821428d7b3dSmrg if (src1.type == BRW_REGISTER_TYPE_F || 822428d7b3dSmrg (src1.file == BRW_IMMEDIATE_VALUE && 823428d7b3dSmrg src1.type == BRW_REGISTER_TYPE_VF)) { 824428d7b3dSmrg assert(src0.type != BRW_REGISTER_TYPE_UD); 825428d7b3dSmrg assert(src0.type != BRW_REGISTER_TYPE_D); 826428d7b3dSmrg } 827428d7b3dSmrg 828428d7b3dSmrg assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE || 829428d7b3dSmrg src0.nr != BRW_ARF_ACCUMULATOR); 830428d7b3dSmrg assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE || 831428d7b3dSmrg src1.nr != BRW_ARF_ACCUMULATOR); 832428d7b3dSmrg 833428d7b3dSmrg return gen8_alu2(p, BRW_OPCODE_MUL, dst, src0, src1); 834428d7b3dSmrg} 835428d7b3dSmrg 836428d7b3dSmrgALU1(MOV); 837428d7b3dSmrgALU2(SEL); 838428d7b3dSmrgALU1(NOT); 839428d7b3dSmrgALU2(AND); 840428d7b3dSmrgALU2(OR); 841428d7b3dSmrgALU2(XOR); 842428d7b3dSmrgALU2(SHR); 843428d7b3dSmrgALU2(SHL); 844428d7b3dSmrgALU2(RSR); 845428d7b3dSmrgALU2(RSL); 846428d7b3dSmrgALU2(ASR); 847428d7b3dSmrgALU1(FRC); 848428d7b3dSmrgALU1(RNDD); 849428d7b3dSmrgALU2(MAC); 850428d7b3dSmrgALU2(MACH); 851428d7b3dSmrgALU1(LZD); 852428d7b3dSmrgALU2(DP4); 853428d7b3dSmrgALU2(DPH); 854428d7b3dSmrgALU2(DP3); 855428d7b3dSmrgALU2(DP2); 856428d7b3dSmrgALU2(LINE); 857428d7b3dSmrgALU2(PLN); 858428d7b3dSmrg 859428d7b3dSmrgALU1(RNDZ); 860428d7b3dSmrgALU1(RNDE); 861428d7b3dSmrg 862428d7b3dSmrg#undef ALU1 863428d7b3dSmrg#undef ALU2 864428d7b3dSmrg 865428d7b3dSmrgstatic void gen8_set_compression_control(struct brw_compile *p, 866428d7b3dSmrg enum brw_compression compression_control) 867428d7b3dSmrg{ 868428d7b3dSmrg unsigned v; 869428d7b3dSmrg 870428d7b3dSmrg p->compressed = compression_control == BRW_COMPRESSION_COMPRESSED; 871428d7b3dSmrg 872428d7b3dSmrg switch (compression_control) { 873428d7b3dSmrg default: assert(0); 874428d7b3dSmrg case BRW_COMPRESSION_NONE: v = GEN6_COMPRESSION_1Q; break; 875428d7b3dSmrg case BRW_COMPRESSION_2NDHALF: v = GEN6_COMPRESSION_2Q; break; 876428d7b3dSmrg case BRW_COMPRESSION_COMPRESSED: v = GEN6_COMPRESSION_1H; break; 877428d7b3dSmrg } 878428d7b3dSmrg __gen8_set_cmpt_control((struct gen8_instruction *)p->current, v); 879428d7b3dSmrg} 880428d7b3dSmrg 881428d7b3dSmrgstatic inline void gen8_set_mask_control(struct brw_compile *p, unsigned value) 882428d7b3dSmrg{ 883428d7b3dSmrg __gen8_set_mask_control((struct gen8_instruction *)p->current, value); 884428d7b3dSmrg} 885428d7b3dSmrg 886428d7b3dSmrgstatic inline void gen8_set_saturate(struct brw_compile *p, unsigned value) 887428d7b3dSmrg{ 888428d7b3dSmrg __gen8_set_saturate((struct gen8_instruction *)p->current, value); 889428d7b3dSmrg} 890428d7b3dSmrg 891428d7b3dSmrgstatic inline void gen8_set_acc_write_control(struct brw_compile *p, unsigned value) 892428d7b3dSmrg{ 893428d7b3dSmrg __gen8_set_acc_wr_control((struct gen8_instruction *)p->current, value); 894428d7b3dSmrg} 895428d7b3dSmrg 896428d7b3dSmrgstatic void gen8_SAMPLE(struct brw_compile *p, 897428d7b3dSmrg struct brw_reg dst, 898428d7b3dSmrg unsigned msg_reg_nr, 899428d7b3dSmrg unsigned binding_table_index, 900428d7b3dSmrg unsigned sampler, 901428d7b3dSmrg unsigned writemask, 902428d7b3dSmrg unsigned msg_type, 903428d7b3dSmrg unsigned response_length, 904428d7b3dSmrg unsigned msg_length, 905428d7b3dSmrg bool header_present, 906428d7b3dSmrg unsigned simd_mode) 907428d7b3dSmrg{ 908428d7b3dSmrg struct brw_reg src0 = brw_message_reg(msg_reg_nr); 909428d7b3dSmrg 910428d7b3dSmrg assert(writemask); 911428d7b3dSmrg 912428d7b3dSmrg if (writemask != WRITEMASK_XYZW) { 913428d7b3dSmrg writemask = ~writemask & WRITEMASK_XYZW; 914428d7b3dSmrg 915428d7b3dSmrg brw_push_insn_state(p); 916428d7b3dSmrg 917428d7b3dSmrg gen8_set_compression_control(p, BRW_COMPRESSION_NONE); 918428d7b3dSmrg gen8_set_mask_control(p, BRW_MASK_DISABLE); 919428d7b3dSmrg 920428d7b3dSmrg gen8_MOV(p, __retype_ud(src0), __retype_ud(brw_vec8_grf(0,0))); 921428d7b3dSmrg gen8_MOV(p, get_element_ud(src0, 2), brw_imm_ud(writemask << 12)); 922428d7b3dSmrg 923428d7b3dSmrg brw_pop_insn_state(p); 924428d7b3dSmrg } 925428d7b3dSmrg 926428d7b3dSmrg { 927428d7b3dSmrg struct gen8_instruction *insn; 928428d7b3dSmrg 929428d7b3dSmrg insn = gen8_next_insn(p, BRW_OPCODE_SEND); 930428d7b3dSmrg __gen8_set_pred_control(insn, 0); /* XXX */ 931428d7b3dSmrg __gen8_set_cmpt_control(insn, GEN6_COMPRESSION_1Q); 932428d7b3dSmrg 933428d7b3dSmrg __gen8_set_dst(p, insn, dst); 934428d7b3dSmrg __gen8_set_src0(insn, src0); 935428d7b3dSmrg __gen8_set_sampler_message(insn, 936428d7b3dSmrg binding_table_index, 937428d7b3dSmrg sampler, 938428d7b3dSmrg msg_type, 939428d7b3dSmrg response_length, 940428d7b3dSmrg msg_length, 941428d7b3dSmrg header_present, 942428d7b3dSmrg simd_mode); 943428d7b3dSmrg } 944428d7b3dSmrg} 945428d7b3dSmrg 946428d7b3dSmrg/* shader logic */ 947428d7b3dSmrg 948428d7b3dSmrgstatic void wm_affine_st(struct brw_compile *p, int dw, int channel, int msg) 949428d7b3dSmrg{ 950428d7b3dSmrg int uv; 951428d7b3dSmrg 952428d7b3dSmrg if (dw == 16) { 953428d7b3dSmrg gen8_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); 954428d7b3dSmrg uv = 6; 955428d7b3dSmrg } else { 956428d7b3dSmrg gen8_set_compression_control(p, BRW_COMPRESSION_NONE); 957428d7b3dSmrg uv = 4; 958428d7b3dSmrg } 959428d7b3dSmrg uv += 2*channel; 960428d7b3dSmrg 961428d7b3dSmrg msg++; 962428d7b3dSmrg gen8_PLN(p, 963428d7b3dSmrg brw_message_reg(msg), 964428d7b3dSmrg brw_vec1_grf(uv, 0), 965428d7b3dSmrg brw_vec8_grf(2, 0)); 966428d7b3dSmrg msg += dw/8; 967428d7b3dSmrg 968428d7b3dSmrg gen8_PLN(p, 969428d7b3dSmrg brw_message_reg(msg), 970428d7b3dSmrg brw_vec1_grf(uv, 4), 971428d7b3dSmrg brw_vec8_grf(2, 0)); 972428d7b3dSmrg} 973428d7b3dSmrg 974428d7b3dSmrgstatic inline unsigned simd(int dw) 975428d7b3dSmrg{ 976428d7b3dSmrg return dw == 16 ? BRW_SAMPLER_SIMD_MODE_SIMD16 : BRW_SAMPLER_SIMD_MODE_SIMD8; 977428d7b3dSmrg} 978428d7b3dSmrg 979428d7b3dSmrgstatic inline struct brw_reg sample_result(int dw, int result) 980428d7b3dSmrg{ 981428d7b3dSmrg return brw_reg(BRW_GENERAL_REGISTER_FILE, result, 0, 982428d7b3dSmrg BRW_REGISTER_TYPE_UW, 983428d7b3dSmrg dw == 16 ? BRW_VERTICAL_STRIDE_16 : BRW_VERTICAL_STRIDE_8, 984428d7b3dSmrg dw == 16 ? BRW_WIDTH_16 : BRW_WIDTH_8, 985428d7b3dSmrg BRW_HORIZONTAL_STRIDE_1, 986428d7b3dSmrg BRW_SWIZZLE_XYZW, 987428d7b3dSmrg WRITEMASK_XYZW); 988428d7b3dSmrg} 989428d7b3dSmrg 990428d7b3dSmrgstatic int wm_sample(struct brw_compile *p, int dw, 991428d7b3dSmrg int channel, int msg, int result) 992428d7b3dSmrg{ 993428d7b3dSmrg int len = dw == 16 ? 4 : 2; 994428d7b3dSmrg gen8_SAMPLE(p, sample_result(dw, result), ++msg, 995428d7b3dSmrg channel+1, channel, WRITEMASK_XYZW, 0, 996428d7b3dSmrg 2*len, len, false, simd(dw)); 997428d7b3dSmrg return result; 998428d7b3dSmrg} 999428d7b3dSmrg 1000428d7b3dSmrgstatic int wm_sample__alpha(struct brw_compile *p, int dw, 1001428d7b3dSmrg int channel, int msg, int result) 1002428d7b3dSmrg{ 1003428d7b3dSmrg int mlen, rlen; 1004428d7b3dSmrg 1005428d7b3dSmrg if (dw == 8) { 1006428d7b3dSmrg mlen = 3; 1007428d7b3dSmrg rlen = 1; 1008428d7b3dSmrg } else { 1009428d7b3dSmrg mlen = 5; 1010428d7b3dSmrg rlen = 2; 1011428d7b3dSmrg } 1012428d7b3dSmrg 1013428d7b3dSmrg gen8_SAMPLE(p, sample_result(dw, result), msg, 1014428d7b3dSmrg channel+1, channel, WRITEMASK_W, 0, 1015428d7b3dSmrg rlen, mlen, true, simd(dw)); 1016428d7b3dSmrg 1017428d7b3dSmrg return result; 1018428d7b3dSmrg} 1019428d7b3dSmrg 1020428d7b3dSmrgstatic int wm_affine(struct brw_compile *p, int dw, 1021428d7b3dSmrg int channel, int msg, int result) 1022428d7b3dSmrg{ 1023428d7b3dSmrg wm_affine_st(p, dw, channel, msg); 1024428d7b3dSmrg return wm_sample(p, dw, channel, msg, result); 1025428d7b3dSmrg} 1026428d7b3dSmrg 1027428d7b3dSmrgstatic int wm_affine__alpha(struct brw_compile *p, int dw, 1028428d7b3dSmrg int channel, int msg, int result) 1029428d7b3dSmrg{ 1030428d7b3dSmrg wm_affine_st(p, dw, channel, msg); 1031428d7b3dSmrg return wm_sample__alpha(p, dw, channel, msg, result); 1032428d7b3dSmrg} 1033428d7b3dSmrg 1034428d7b3dSmrgstatic inline struct brw_reg null_result(int dw) 1035428d7b3dSmrg{ 1036428d7b3dSmrg return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_NULL, 0, 1037428d7b3dSmrg BRW_REGISTER_TYPE_UW, 1038428d7b3dSmrg dw == 16 ? BRW_VERTICAL_STRIDE_16 : BRW_VERTICAL_STRIDE_8, 1039428d7b3dSmrg dw == 16 ? BRW_WIDTH_16 : BRW_WIDTH_8, 1040428d7b3dSmrg BRW_HORIZONTAL_STRIDE_1, 1041428d7b3dSmrg BRW_SWIZZLE_XYZW, 1042428d7b3dSmrg WRITEMASK_XYZW); 1043428d7b3dSmrg} 1044428d7b3dSmrg 1045428d7b3dSmrgstatic void fb_write(struct brw_compile *p, int dw) 1046428d7b3dSmrg{ 1047428d7b3dSmrg struct gen8_instruction *insn; 1048428d7b3dSmrg unsigned msg_control, msg_len; 1049428d7b3dSmrg struct brw_reg src0; 1050428d7b3dSmrg 1051428d7b3dSmrg if (dw == 16) { 1052428d7b3dSmrg msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; 1053428d7b3dSmrg msg_len = 8; 1054428d7b3dSmrg } else { 1055428d7b3dSmrg msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; 1056428d7b3dSmrg msg_len = 4; 1057428d7b3dSmrg } 1058428d7b3dSmrg msg_control |= 1 << 4; /* Last Render Target */ 1059428d7b3dSmrg 1060428d7b3dSmrg /* The execution mask is ignored for render target writes. */ 1061428d7b3dSmrg insn = gen8_next_insn(p, BRW_OPCODE_SEND); 1062428d7b3dSmrg __gen8_set_pred_control(insn, 0); 1063428d7b3dSmrg __gen8_set_cmpt_control(insn, GEN6_COMPRESSION_1Q); 1064428d7b3dSmrg 1065428d7b3dSmrg src0 = brw_message_reg(2); 1066428d7b3dSmrg 1067428d7b3dSmrg __gen8_set_dst(p, insn, null_result(dw)); 1068428d7b3dSmrg __gen8_set_src0(insn, src0); 1069428d7b3dSmrg __gen8_set_dp_message(insn, 1070428d7b3dSmrg GEN6_SFID_DATAPORT_RENDER_CACHE, 1071428d7b3dSmrg 0, 1072428d7b3dSmrg GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, 1073428d7b3dSmrg msg_control, 1074428d7b3dSmrg msg_len, 0, 1075428d7b3dSmrg false, true); 1076428d7b3dSmrg} 1077428d7b3dSmrg 1078428d7b3dSmrgstatic void wm_write__mask(struct brw_compile *p, int dw, 1079428d7b3dSmrg int src, int mask) 1080428d7b3dSmrg{ 1081428d7b3dSmrg int n; 1082428d7b3dSmrg 1083428d7b3dSmrg if (dw == 8) { 1084428d7b3dSmrg gen8_set_compression_control(p, BRW_COMPRESSION_NONE); 1085428d7b3dSmrg for (n = 0; n < 4; n++) 1086428d7b3dSmrg gen8_MUL(p, 1087428d7b3dSmrg brw_message_reg(2 + n), 1088428d7b3dSmrg brw_vec8_grf(src + n, 0), 1089428d7b3dSmrg brw_vec8_grf(mask, 0)); 1090428d7b3dSmrg } else { 1091428d7b3dSmrg gen8_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); 1092428d7b3dSmrg for (n = 0; n < 4; n++) 1093428d7b3dSmrg gen8_MUL(p, 1094428d7b3dSmrg brw_message_reg(2 + 2*n), 1095428d7b3dSmrg brw_vec8_grf(src + 2*n, 0), 1096428d7b3dSmrg brw_vec8_grf(mask, 0)); 1097428d7b3dSmrg } 1098428d7b3dSmrg 1099428d7b3dSmrg fb_write(p, dw); 1100428d7b3dSmrg} 1101428d7b3dSmrg 1102428d7b3dSmrgstatic void wm_write__opacity(struct brw_compile *p, int dw, int src, int mask) 1103428d7b3dSmrg{ 1104428d7b3dSmrg int n; 1105428d7b3dSmrg 1106428d7b3dSmrg if (dw == 8) { 1107428d7b3dSmrg gen8_set_compression_control(p, BRW_COMPRESSION_NONE); 1108428d7b3dSmrg for (n = 0; n < 4; n++) 1109428d7b3dSmrg gen8_MUL(p, 1110428d7b3dSmrg brw_message_reg(2 + n), 1111428d7b3dSmrg brw_vec8_grf(src + n, 0), 1112428d7b3dSmrg brw_vec1_grf(mask, 3)); 1113428d7b3dSmrg } else { 1114428d7b3dSmrg gen8_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); 1115428d7b3dSmrg for (n = 0; n < 4; n++) 1116428d7b3dSmrg gen8_MUL(p, 1117428d7b3dSmrg brw_message_reg(2 + 2*n), 1118428d7b3dSmrg brw_vec8_grf(src + 2*n, 0), 1119428d7b3dSmrg brw_vec1_grf(mask, 3)); 1120428d7b3dSmrg } 1121428d7b3dSmrg 1122428d7b3dSmrg fb_write(p, dw); 1123428d7b3dSmrg} 1124428d7b3dSmrg 1125428d7b3dSmrgstatic void wm_write__mask_ca(struct brw_compile *p, int dw, 1126428d7b3dSmrg int src, int mask) 1127428d7b3dSmrg{ 1128428d7b3dSmrg int n; 1129428d7b3dSmrg 1130428d7b3dSmrg if (dw == 8) { 1131428d7b3dSmrg gen8_set_compression_control(p, BRW_COMPRESSION_NONE); 1132428d7b3dSmrg for (n = 0; n < 4; n++) 1133428d7b3dSmrg gen8_MUL(p, 1134428d7b3dSmrg brw_message_reg(2 + n), 1135428d7b3dSmrg brw_vec8_grf(src + n, 0), 1136428d7b3dSmrg brw_vec8_grf(mask + n, 0)); 1137428d7b3dSmrg } else { 1138428d7b3dSmrg gen8_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); 1139428d7b3dSmrg for (n = 0; n < 4; n++) 1140428d7b3dSmrg gen8_MUL(p, 1141428d7b3dSmrg brw_message_reg(2 + 2*n), 1142428d7b3dSmrg brw_vec8_grf(src + 2*n, 0), 1143428d7b3dSmrg brw_vec8_grf(mask + 2*n, 0)); 1144428d7b3dSmrg } 1145428d7b3dSmrg 1146428d7b3dSmrg fb_write(p, dw); 1147428d7b3dSmrg} 1148428d7b3dSmrg 1149428d7b3dSmrgstatic void gen8_compile_init(struct brw_compile *p) 1150428d7b3dSmrg{ 1151428d7b3dSmrg struct gen8_instruction *insn = memset(p->current, 0, sizeof(*insn)); 1152428d7b3dSmrg COMPILE_TIME_ASSERT(sizeof(*insn) == sizeof(*p->current)); 1153428d7b3dSmrg __gen8_set_mask_control(insn, BRW_MASK_ENABLE); 1154428d7b3dSmrg __gen8_set_saturate(insn, 0); 1155428d7b3dSmrg __gen8_set_cmpt_control(insn, GEN6_COMPRESSION_1Q); 1156428d7b3dSmrg //__gen8_set_pred_control(insn, 0xf); 1157428d7b3dSmrg} 1158428d7b3dSmrg 1159428d7b3dSmrgbool 1160428d7b3dSmrggen8_wm_kernel__affine(struct brw_compile *p, int dispatch) 1161428d7b3dSmrg{ 1162428d7b3dSmrg gen8_compile_init(p); 1163428d7b3dSmrg 1164428d7b3dSmrg wm_affine(p, dispatch, 0, 10, MRF_HACK_START+2); 1165428d7b3dSmrg fb_write(p, dispatch); 1166428d7b3dSmrg return true; 1167428d7b3dSmrg} 1168428d7b3dSmrg 1169428d7b3dSmrgbool 1170428d7b3dSmrggen8_wm_kernel__affine_mask(struct brw_compile *p, int dispatch) 1171428d7b3dSmrg{ 1172428d7b3dSmrg int src, mask; 1173428d7b3dSmrg 1174428d7b3dSmrg gen8_compile_init(p); 1175428d7b3dSmrg 1176428d7b3dSmrg src = wm_affine(p, dispatch, 0, 1, 12); 1177428d7b3dSmrg mask = wm_affine__alpha(p, dispatch, 1, 6, 20); 1178428d7b3dSmrg wm_write__mask(p, dispatch, src, mask); 1179428d7b3dSmrg 1180428d7b3dSmrg return true; 1181428d7b3dSmrg} 1182428d7b3dSmrg 1183428d7b3dSmrgbool 1184428d7b3dSmrggen8_wm_kernel__affine_mask_ca(struct brw_compile *p, int dispatch) 1185428d7b3dSmrg{ 1186428d7b3dSmrg int src, mask; 1187428d7b3dSmrg 1188428d7b3dSmrg gen8_compile_init(p); 1189428d7b3dSmrg 1190428d7b3dSmrg src = wm_affine(p, dispatch, 0, 1, 12); 1191428d7b3dSmrg mask = wm_affine(p, dispatch, 1, 6, 20); 1192428d7b3dSmrg wm_write__mask_ca(p, dispatch, src, mask); 1193428d7b3dSmrg 1194428d7b3dSmrg return true; 1195428d7b3dSmrg} 1196428d7b3dSmrg 1197428d7b3dSmrgbool 1198428d7b3dSmrggen8_wm_kernel__affine_mask_sa(struct brw_compile *p, int dispatch) 1199428d7b3dSmrg{ 1200428d7b3dSmrg int src, mask; 1201428d7b3dSmrg 1202428d7b3dSmrg gen8_compile_init(p); 1203428d7b3dSmrg 1204428d7b3dSmrg src = wm_affine__alpha(p, dispatch, 0, 1, 12); 1205428d7b3dSmrg mask = wm_affine(p, dispatch, 1, 6, 16); 1206428d7b3dSmrg wm_write__mask(p, dispatch, mask, src); 1207428d7b3dSmrg 1208428d7b3dSmrg return true; 1209428d7b3dSmrg} 1210428d7b3dSmrg 1211428d7b3dSmrg/* Projective variants */ 1212428d7b3dSmrg 1213428d7b3dSmrgstatic void wm_projective_st(struct brw_compile *p, int dw, 1214428d7b3dSmrg int channel, int msg) 1215428d7b3dSmrg{ 1216428d7b3dSmrg int uv; 1217428d7b3dSmrg 1218428d7b3dSmrg gen8_compile_init(p); 1219428d7b3dSmrg 1220428d7b3dSmrg if (dw == 16) { 1221428d7b3dSmrg gen8_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); 1222428d7b3dSmrg uv = 6; 1223428d7b3dSmrg } else { 1224428d7b3dSmrg gen8_set_compression_control(p, BRW_COMPRESSION_NONE); 1225428d7b3dSmrg uv = 4; 1226428d7b3dSmrg } 1227428d7b3dSmrg uv += 2*channel; 1228428d7b3dSmrg 1229428d7b3dSmrg msg++; 1230428d7b3dSmrg /* First compute 1/z */ 1231428d7b3dSmrg gen8_PLN(p, 1232428d7b3dSmrg brw_vec8_grf(30, 0), 1233428d7b3dSmrg brw_vec1_grf(uv+1, 0), 1234428d7b3dSmrg brw_vec8_grf(2, 0)); 1235428d7b3dSmrg 1236428d7b3dSmrg if (dw == 16) { 1237428d7b3dSmrg gen8_set_compression_control(p, BRW_COMPRESSION_NONE); 1238428d7b3dSmrg gen8_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0)); 1239428d7b3dSmrg gen8_math_invert(p, brw_vec8_grf(31, 0), brw_vec8_grf(31, 0)); 1240428d7b3dSmrg gen8_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); 1241428d7b3dSmrg } else 1242428d7b3dSmrg gen8_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0)); 1243428d7b3dSmrg 1244428d7b3dSmrg gen8_PLN(p, 1245428d7b3dSmrg brw_vec8_grf(26, 0), 1246428d7b3dSmrg brw_vec1_grf(uv, 0), 1247428d7b3dSmrg brw_vec8_grf(2, 0)); 1248428d7b3dSmrg gen8_PLN(p, 1249428d7b3dSmrg brw_vec8_grf(28, 0), 1250428d7b3dSmrg brw_vec1_grf(uv, 4), 1251428d7b3dSmrg brw_vec8_grf(2, 0)); 1252428d7b3dSmrg 1253428d7b3dSmrg gen8_MUL(p, 1254428d7b3dSmrg brw_message_reg(msg), 1255428d7b3dSmrg brw_vec8_grf(26, 0), 1256428d7b3dSmrg brw_vec8_grf(30, 0)); 1257428d7b3dSmrg gen8_MUL(p, 1258428d7b3dSmrg brw_message_reg(msg + dw/8), 1259428d7b3dSmrg brw_vec8_grf(28, 0), 1260428d7b3dSmrg brw_vec8_grf(30, 0)); 1261428d7b3dSmrg} 1262428d7b3dSmrg 1263428d7b3dSmrgstatic int wm_projective(struct brw_compile *p, int dw, 1264428d7b3dSmrg int channel, int msg, int result) 1265428d7b3dSmrg{ 1266428d7b3dSmrg gen8_compile_init(p); 1267428d7b3dSmrg 1268428d7b3dSmrg wm_projective_st(p, dw, channel, msg); 1269428d7b3dSmrg return wm_sample(p, dw, channel, msg, result); 1270428d7b3dSmrg} 1271428d7b3dSmrg 1272428d7b3dSmrgstatic int wm_projective__alpha(struct brw_compile *p, int dw, 1273428d7b3dSmrg int channel, int msg, int result) 1274428d7b3dSmrg{ 1275428d7b3dSmrg gen8_compile_init(p); 1276428d7b3dSmrg 1277428d7b3dSmrg wm_projective_st(p, dw, channel, msg); 1278428d7b3dSmrg return wm_sample__alpha(p, dw, channel, msg, result); 1279428d7b3dSmrg} 1280428d7b3dSmrg 1281428d7b3dSmrgbool 1282428d7b3dSmrggen8_wm_kernel__projective(struct brw_compile *p, int dispatch) 1283428d7b3dSmrg{ 1284428d7b3dSmrg gen8_compile_init(p); 1285428d7b3dSmrg 1286428d7b3dSmrg wm_projective(p, dispatch, 0, 10, MRF_HACK_START+2); 1287428d7b3dSmrg fb_write(p, dispatch); 1288428d7b3dSmrg return true; 1289428d7b3dSmrg} 1290428d7b3dSmrg 1291428d7b3dSmrgbool 1292428d7b3dSmrggen8_wm_kernel__projective_mask(struct brw_compile *p, int dispatch) 1293428d7b3dSmrg{ 1294428d7b3dSmrg int src, mask; 1295428d7b3dSmrg 1296428d7b3dSmrg gen8_compile_init(p); 1297428d7b3dSmrg 1298428d7b3dSmrg src = wm_projective(p, dispatch, 0, 1, 12); 1299428d7b3dSmrg mask = wm_projective__alpha(p, dispatch, 1, 6, 20); 1300428d7b3dSmrg wm_write__mask(p, dispatch, src, mask); 1301428d7b3dSmrg 1302428d7b3dSmrg return true; 1303428d7b3dSmrg} 1304428d7b3dSmrg 1305428d7b3dSmrgbool 1306428d7b3dSmrggen8_wm_kernel__projective_mask_ca(struct brw_compile *p, int dispatch) 1307428d7b3dSmrg{ 1308428d7b3dSmrg int src, mask; 1309428d7b3dSmrg 1310428d7b3dSmrg gen8_compile_init(p); 1311428d7b3dSmrg 1312428d7b3dSmrg src = wm_projective(p, dispatch, 0, 1, 12); 1313428d7b3dSmrg mask = wm_projective(p, dispatch, 1, 6, 20); 1314428d7b3dSmrg wm_write__mask_ca(p, dispatch, src, mask); 1315428d7b3dSmrg 1316428d7b3dSmrg return true; 1317428d7b3dSmrg} 1318428d7b3dSmrg 1319428d7b3dSmrgbool 1320428d7b3dSmrggen8_wm_kernel__projective_mask_sa(struct brw_compile *p, int dispatch) 1321428d7b3dSmrg{ 1322428d7b3dSmrg int src, mask; 1323428d7b3dSmrg 1324428d7b3dSmrg gen8_compile_init(p); 1325428d7b3dSmrg 1326428d7b3dSmrg src = wm_projective__alpha(p, dispatch, 0, 1, 12); 1327428d7b3dSmrg mask = wm_projective(p, dispatch, 1, 6, 16); 1328428d7b3dSmrg wm_write__mask(p, dispatch, mask, src); 1329428d7b3dSmrg 1330428d7b3dSmrg return true; 1331428d7b3dSmrg} 1332428d7b3dSmrg 1333428d7b3dSmrgbool 1334428d7b3dSmrggen8_wm_kernel__affine_opacity(struct brw_compile *p, int dispatch) 1335428d7b3dSmrg{ 1336428d7b3dSmrg int src, mask; 1337428d7b3dSmrg 1338428d7b3dSmrg gen8_compile_init(p); 1339428d7b3dSmrg 1340428d7b3dSmrg src = wm_affine(p, dispatch, 0, 1, 12); 1341428d7b3dSmrg mask = dispatch == 16 ? 8 : 6; 1342428d7b3dSmrg wm_write__opacity(p, dispatch, src, mask); 1343428d7b3dSmrg 1344428d7b3dSmrg return true; 1345428d7b3dSmrg} 1346428d7b3dSmrg 1347428d7b3dSmrgbool 1348428d7b3dSmrggen8_wm_kernel__projective_opacity(struct brw_compile *p, int dispatch) 1349428d7b3dSmrg{ 1350428d7b3dSmrg int src, mask; 1351428d7b3dSmrg 1352428d7b3dSmrg gen8_compile_init(p); 1353428d7b3dSmrg 1354428d7b3dSmrg mask = dispatch == 16 ? 8 : 6; 1355428d7b3dSmrg src = wm_projective(p, dispatch, 0, 1, 12); 1356428d7b3dSmrg wm_write__opacity(p, dispatch, src, mask); 1357428d7b3dSmrg 1358428d7b3dSmrg return true; 1359428d7b3dSmrg} 1360