1/* 2 * Copyright © 2013 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24/** 25 * \file brw_vec4_tes.cpp 26 * 27 * Tessellaton evaluation shader specific code derived from the vec4_visitor class. 28 */ 29 30#include "brw_vec4_tes.h" 31#include "brw_cfg.h" 32#include "dev/intel_debug.h" 33 34namespace brw { 35 36vec4_tes_visitor::vec4_tes_visitor(const struct brw_compiler *compiler, 37 void *log_data, 38 const struct brw_tes_prog_key *key, 39 struct brw_tes_prog_data *prog_data, 40 const nir_shader *shader, 41 void *mem_ctx, 42 int shader_time_index, 43 bool debug_enabled) 44 : vec4_visitor(compiler, log_data, &key->base.tex, &prog_data->base, 45 shader, mem_ctx, false, shader_time_index, debug_enabled) 46{ 47} 48 49void 50vec4_tes_visitor::setup_payload() 51{ 52 int reg = 0; 53 54 /* The payload always contains important data in r0 and r1, which contains 55 * the URB handles that are passed on to the URB write at the end 56 * of the thread. 57 */ 58 reg += 2; 59 60 reg = setup_uniforms(reg); 61 62 foreach_block_and_inst(block, vec4_instruction, inst, cfg) { 63 for (int i = 0; i < 3; i++) { 64 if (inst->src[i].file != ATTR) 65 continue; 66 67 unsigned slot = inst->src[i].nr + inst->src[i].offset / 16; 68 struct brw_reg grf = brw_vec4_grf(reg + slot / 2, 4 * (slot % 2)); 69 grf = stride(grf, 0, 4, 1); 70 grf.swizzle = inst->src[i].swizzle; 71 grf.type = inst->src[i].type; 72 grf.abs = inst->src[i].abs; 73 grf.negate = inst->src[i].negate; 74 inst->src[i] = grf; 75 } 76 } 77 78 reg += 8 * prog_data->urb_read_length; 79 80 this->first_non_payload_grf = reg; 81} 82 83 84void 85vec4_tes_visitor::emit_prolog() 86{ 87 input_read_header = src_reg(this, glsl_type::uvec4_type); 88 emit(TES_OPCODE_CREATE_INPUT_READ_HEADER, dst_reg(input_read_header)); 89 90 this->current_annotation = NULL; 91} 92 93 94void 95vec4_tes_visitor::emit_urb_write_header(int mrf) 96{ 97 /* No need to do anything for DS; an implied write to this MRF will be 98 * performed by VS_OPCODE_URB_WRITE. 99 */ 100 (void) mrf; 101} 102 103 104vec4_instruction * 105vec4_tes_visitor::emit_urb_write_opcode(bool complete) 106{ 107 /* For DS, the URB writes end the thread. */ 108 if (complete) { 109 if (INTEL_DEBUG(DEBUG_SHADER_TIME)) 110 emit_shader_time_end(); 111 } 112 113 vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE); 114 inst->urb_write_flags = complete ? 115 BRW_URB_WRITE_EOT_COMPLETE : BRW_URB_WRITE_NO_FLAGS; 116 117 return inst; 118} 119 120void 121vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) 122{ 123 const struct brw_tes_prog_data *tes_prog_data = 124 (const struct brw_tes_prog_data *) prog_data; 125 126 switch (instr->intrinsic) { 127 case nir_intrinsic_load_tess_coord: 128 /* gl_TessCoord is part of the payload in g1 channels 0-2 and 4-6. */ 129 emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), 130 src_reg(brw_vec8_grf(1, 0)))); 131 break; 132 case nir_intrinsic_load_tess_level_outer: 133 if (tes_prog_data->domain == BRW_TESS_DOMAIN_ISOLINE) { 134 emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), 135 swizzle(src_reg(ATTR, 1, glsl_type::vec4_type), 136 BRW_SWIZZLE_ZWZW))); 137 } else { 138 emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), 139 swizzle(src_reg(ATTR, 1, glsl_type::vec4_type), 140 BRW_SWIZZLE_WZYX))); 141 } 142 break; 143 case nir_intrinsic_load_tess_level_inner: 144 if (tes_prog_data->domain == BRW_TESS_DOMAIN_QUAD) { 145 emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), 146 swizzle(src_reg(ATTR, 0, glsl_type::vec4_type), 147 BRW_SWIZZLE_WZYX))); 148 } else { 149 emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), 150 src_reg(ATTR, 1, glsl_type::float_type))); 151 } 152 break; 153 case nir_intrinsic_load_primitive_id: 154 emit(TES_OPCODE_GET_PRIMITIVE_ID, 155 get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD)); 156 break; 157 158 case nir_intrinsic_load_input: 159 case nir_intrinsic_load_per_vertex_input: { 160 assert(nir_dest_bit_size(instr->dest) == 32); 161 src_reg indirect_offset = get_indirect_offset(instr); 162 unsigned imm_offset = instr->const_index[0]; 163 src_reg header = input_read_header; 164 unsigned first_component = nir_intrinsic_component(instr); 165 166 if (indirect_offset.file != BAD_FILE) { 167 src_reg clamped_indirect_offset = src_reg(this, glsl_type::uvec4_type); 168 169 /* Page 190 of "Volume 7: 3D Media GPGPU Engine (Haswell)" says the 170 * valid range of the offset is [0, 0FFFFFFFh]. 171 */ 172 emit_minmax(BRW_CONDITIONAL_L, 173 dst_reg(clamped_indirect_offset), 174 retype(indirect_offset, BRW_REGISTER_TYPE_UD), 175 brw_imm_ud(0x0fffffffu)); 176 177 header = src_reg(this, glsl_type::uvec4_type); 178 emit(TES_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header), 179 input_read_header, clamped_indirect_offset); 180 } else { 181 /* Arbitrarily only push up to 24 vec4 slots worth of data, 182 * which is 12 registers (since each holds 2 vec4 slots). 183 */ 184 const unsigned max_push_slots = 24; 185 if (imm_offset < max_push_slots) { 186 src_reg src = src_reg(ATTR, imm_offset, glsl_type::ivec4_type); 187 src.swizzle = BRW_SWZ_COMP_INPUT(first_component); 188 189 emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D), src)); 190 191 prog_data->urb_read_length = 192 MAX2(prog_data->urb_read_length, 193 DIV_ROUND_UP(imm_offset + 1, 2)); 194 break; 195 } 196 } 197 198 dst_reg temp(this, glsl_type::ivec4_type); 199 vec4_instruction *read = 200 emit(VEC4_OPCODE_URB_READ, temp, src_reg(header)); 201 read->offset = imm_offset; 202 read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; 203 204 src_reg src = src_reg(temp); 205 src.swizzle = BRW_SWZ_COMP_INPUT(first_component); 206 207 /* Copy to target. We might end up with some funky writemasks landing 208 * in here, but we really don't want them in the above pseudo-ops. 209 */ 210 dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D); 211 dst.writemask = brw_writemask_for_size(instr->num_components); 212 emit(MOV(dst, src)); 213 break; 214 } 215 default: 216 vec4_visitor::nir_emit_intrinsic(instr); 217 } 218} 219 220 221void 222vec4_tes_visitor::emit_thread_end() 223{ 224 /* For DS, we always end the thread by emitting a single vertex. 225 * emit_urb_write_opcode() will take care of setting the eot flag on the 226 * SEND instruction. 227 */ 228 emit_vertex(); 229} 230 231} /* namespace brw */ 232