101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2013 Intel Corporation 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 2101e04c3fSmrg * DEALINGS IN THE SOFTWARE. 2201e04c3fSmrg */ 2301e04c3fSmrg 2401e04c3fSmrg/** 2501e04c3fSmrg * \file brw_vec4_tes.cpp 2601e04c3fSmrg * 2701e04c3fSmrg * Tessellaton evaluation shader specific code derived from the vec4_visitor class. 2801e04c3fSmrg */ 2901e04c3fSmrg 3001e04c3fSmrg#include "brw_vec4_tes.h" 3101e04c3fSmrg#include "brw_cfg.h" 327ec681f3Smrg#include "dev/intel_debug.h" 3301e04c3fSmrg 3401e04c3fSmrgnamespace brw { 3501e04c3fSmrg 3601e04c3fSmrgvec4_tes_visitor::vec4_tes_visitor(const struct brw_compiler *compiler, 3701e04c3fSmrg void *log_data, 3801e04c3fSmrg const struct brw_tes_prog_key *key, 3901e04c3fSmrg struct brw_tes_prog_data *prog_data, 4001e04c3fSmrg const nir_shader *shader, 4101e04c3fSmrg void *mem_ctx, 427ec681f3Smrg int shader_time_index, 437ec681f3Smrg bool debug_enabled) 447ec681f3Smrg : vec4_visitor(compiler, log_data, &key->base.tex, &prog_data->base, 457ec681f3Smrg shader, mem_ctx, false, shader_time_index, debug_enabled) 4601e04c3fSmrg{ 4701e04c3fSmrg} 4801e04c3fSmrg 4901e04c3fSmrgvoid 5001e04c3fSmrgvec4_tes_visitor::setup_payload() 5101e04c3fSmrg{ 5201e04c3fSmrg int reg = 0; 5301e04c3fSmrg 5401e04c3fSmrg /* The payload always contains important data in r0 and r1, which contains 5501e04c3fSmrg * the URB handles that are passed on to the URB write at the end 5601e04c3fSmrg * of the thread. 5701e04c3fSmrg */ 5801e04c3fSmrg reg += 2; 5901e04c3fSmrg 6001e04c3fSmrg reg = setup_uniforms(reg); 6101e04c3fSmrg 6201e04c3fSmrg foreach_block_and_inst(block, vec4_instruction, inst, cfg) { 6301e04c3fSmrg for (int i = 0; i < 3; i++) { 6401e04c3fSmrg if (inst->src[i].file != ATTR) 6501e04c3fSmrg continue; 6601e04c3fSmrg 6701e04c3fSmrg unsigned slot = inst->src[i].nr + inst->src[i].offset / 16; 6801e04c3fSmrg struct brw_reg grf = brw_vec4_grf(reg + slot / 2, 4 * (slot % 2)); 697ec681f3Smrg grf = stride(grf, 0, 4, 1); 7001e04c3fSmrg grf.swizzle = inst->src[i].swizzle; 7101e04c3fSmrg grf.type = inst->src[i].type; 7201e04c3fSmrg grf.abs = inst->src[i].abs; 7301e04c3fSmrg grf.negate = inst->src[i].negate; 7401e04c3fSmrg inst->src[i] = grf; 7501e04c3fSmrg } 7601e04c3fSmrg } 7701e04c3fSmrg 7801e04c3fSmrg reg += 8 * prog_data->urb_read_length; 7901e04c3fSmrg 8001e04c3fSmrg this->first_non_payload_grf = reg; 8101e04c3fSmrg} 8201e04c3fSmrg 8301e04c3fSmrg 8401e04c3fSmrgvoid 8501e04c3fSmrgvec4_tes_visitor::emit_prolog() 8601e04c3fSmrg{ 8701e04c3fSmrg input_read_header = src_reg(this, glsl_type::uvec4_type); 8801e04c3fSmrg emit(TES_OPCODE_CREATE_INPUT_READ_HEADER, dst_reg(input_read_header)); 8901e04c3fSmrg 9001e04c3fSmrg this->current_annotation = NULL; 9101e04c3fSmrg} 9201e04c3fSmrg 9301e04c3fSmrg 9401e04c3fSmrgvoid 9501e04c3fSmrgvec4_tes_visitor::emit_urb_write_header(int mrf) 9601e04c3fSmrg{ 9701e04c3fSmrg /* No need to do anything for DS; an implied write to this MRF will be 9801e04c3fSmrg * performed by VS_OPCODE_URB_WRITE. 9901e04c3fSmrg */ 10001e04c3fSmrg (void) mrf; 10101e04c3fSmrg} 10201e04c3fSmrg 10301e04c3fSmrg 10401e04c3fSmrgvec4_instruction * 10501e04c3fSmrgvec4_tes_visitor::emit_urb_write_opcode(bool complete) 10601e04c3fSmrg{ 10701e04c3fSmrg /* For DS, the URB writes end the thread. */ 10801e04c3fSmrg if (complete) { 1097ec681f3Smrg if (INTEL_DEBUG(DEBUG_SHADER_TIME)) 11001e04c3fSmrg emit_shader_time_end(); 11101e04c3fSmrg } 11201e04c3fSmrg 11301e04c3fSmrg vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE); 11401e04c3fSmrg inst->urb_write_flags = complete ? 11501e04c3fSmrg BRW_URB_WRITE_EOT_COMPLETE : BRW_URB_WRITE_NO_FLAGS; 11601e04c3fSmrg 11701e04c3fSmrg return inst; 11801e04c3fSmrg} 11901e04c3fSmrg 12001e04c3fSmrgvoid 12101e04c3fSmrgvec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) 12201e04c3fSmrg{ 12301e04c3fSmrg const struct brw_tes_prog_data *tes_prog_data = 12401e04c3fSmrg (const struct brw_tes_prog_data *) prog_data; 12501e04c3fSmrg 12601e04c3fSmrg switch (instr->intrinsic) { 12701e04c3fSmrg case nir_intrinsic_load_tess_coord: 12801e04c3fSmrg /* gl_TessCoord is part of the payload in g1 channels 0-2 and 4-6. */ 12901e04c3fSmrg emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), 13001e04c3fSmrg src_reg(brw_vec8_grf(1, 0)))); 13101e04c3fSmrg break; 13201e04c3fSmrg case nir_intrinsic_load_tess_level_outer: 13301e04c3fSmrg if (tes_prog_data->domain == BRW_TESS_DOMAIN_ISOLINE) { 13401e04c3fSmrg emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), 13501e04c3fSmrg swizzle(src_reg(ATTR, 1, glsl_type::vec4_type), 13601e04c3fSmrg BRW_SWIZZLE_ZWZW))); 13701e04c3fSmrg } else { 13801e04c3fSmrg emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), 13901e04c3fSmrg swizzle(src_reg(ATTR, 1, glsl_type::vec4_type), 14001e04c3fSmrg BRW_SWIZZLE_WZYX))); 14101e04c3fSmrg } 14201e04c3fSmrg break; 14301e04c3fSmrg case nir_intrinsic_load_tess_level_inner: 14401e04c3fSmrg if (tes_prog_data->domain == BRW_TESS_DOMAIN_QUAD) { 14501e04c3fSmrg emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), 14601e04c3fSmrg swizzle(src_reg(ATTR, 0, glsl_type::vec4_type), 14701e04c3fSmrg BRW_SWIZZLE_WZYX))); 14801e04c3fSmrg } else { 14901e04c3fSmrg emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), 15001e04c3fSmrg src_reg(ATTR, 1, glsl_type::float_type))); 15101e04c3fSmrg } 15201e04c3fSmrg break; 15301e04c3fSmrg case nir_intrinsic_load_primitive_id: 15401e04c3fSmrg emit(TES_OPCODE_GET_PRIMITIVE_ID, 15501e04c3fSmrg get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD)); 15601e04c3fSmrg break; 15701e04c3fSmrg 15801e04c3fSmrg case nir_intrinsic_load_input: 15901e04c3fSmrg case nir_intrinsic_load_per_vertex_input: { 1607ec681f3Smrg assert(nir_dest_bit_size(instr->dest) == 32); 16101e04c3fSmrg src_reg indirect_offset = get_indirect_offset(instr); 16201e04c3fSmrg unsigned imm_offset = instr->const_index[0]; 16301e04c3fSmrg src_reg header = input_read_header; 16401e04c3fSmrg unsigned first_component = nir_intrinsic_component(instr); 16501e04c3fSmrg 16601e04c3fSmrg if (indirect_offset.file != BAD_FILE) { 16701e04c3fSmrg src_reg clamped_indirect_offset = src_reg(this, glsl_type::uvec4_type); 16801e04c3fSmrg 16901e04c3fSmrg /* Page 190 of "Volume 7: 3D Media GPGPU Engine (Haswell)" says the 17001e04c3fSmrg * valid range of the offset is [0, 0FFFFFFFh]. 17101e04c3fSmrg */ 17201e04c3fSmrg emit_minmax(BRW_CONDITIONAL_L, 17301e04c3fSmrg dst_reg(clamped_indirect_offset), 17401e04c3fSmrg retype(indirect_offset, BRW_REGISTER_TYPE_UD), 17501e04c3fSmrg brw_imm_ud(0x0fffffffu)); 17601e04c3fSmrg 17701e04c3fSmrg header = src_reg(this, glsl_type::uvec4_type); 17801e04c3fSmrg emit(TES_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header), 17901e04c3fSmrg input_read_header, clamped_indirect_offset); 18001e04c3fSmrg } else { 18101e04c3fSmrg /* Arbitrarily only push up to 24 vec4 slots worth of data, 18201e04c3fSmrg * which is 12 registers (since each holds 2 vec4 slots). 18301e04c3fSmrg */ 18401e04c3fSmrg const unsigned max_push_slots = 24; 18501e04c3fSmrg if (imm_offset < max_push_slots) { 1867ec681f3Smrg src_reg src = src_reg(ATTR, imm_offset, glsl_type::ivec4_type); 18701e04c3fSmrg src.swizzle = BRW_SWZ_COMP_INPUT(first_component); 18801e04c3fSmrg 1897ec681f3Smrg emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D), src)); 19001e04c3fSmrg 19101e04c3fSmrg prog_data->urb_read_length = 19201e04c3fSmrg MAX2(prog_data->urb_read_length, 1937ec681f3Smrg DIV_ROUND_UP(imm_offset + 1, 2)); 19401e04c3fSmrg break; 19501e04c3fSmrg } 19601e04c3fSmrg } 19701e04c3fSmrg 1987ec681f3Smrg dst_reg temp(this, glsl_type::ivec4_type); 1997ec681f3Smrg vec4_instruction *read = 2007ec681f3Smrg emit(VEC4_OPCODE_URB_READ, temp, src_reg(header)); 2017ec681f3Smrg read->offset = imm_offset; 2027ec681f3Smrg read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; 2037ec681f3Smrg 2047ec681f3Smrg src_reg src = src_reg(temp); 2057ec681f3Smrg src.swizzle = BRW_SWZ_COMP_INPUT(first_component); 2067ec681f3Smrg 2077ec681f3Smrg /* Copy to target. We might end up with some funky writemasks landing 2087ec681f3Smrg * in here, but we really don't want them in the above pseudo-ops. 2097ec681f3Smrg */ 2107ec681f3Smrg dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D); 2117ec681f3Smrg dst.writemask = brw_writemask_for_size(instr->num_components); 2127ec681f3Smrg emit(MOV(dst, src)); 21301e04c3fSmrg break; 21401e04c3fSmrg } 21501e04c3fSmrg default: 21601e04c3fSmrg vec4_visitor::nir_emit_intrinsic(instr); 21701e04c3fSmrg } 21801e04c3fSmrg} 21901e04c3fSmrg 22001e04c3fSmrg 22101e04c3fSmrgvoid 22201e04c3fSmrgvec4_tes_visitor::emit_thread_end() 22301e04c3fSmrg{ 22401e04c3fSmrg /* For DS, we always end the thread by emitting a single vertex. 22501e04c3fSmrg * emit_urb_write_opcode() will take care of setting the eot flag on the 22601e04c3fSmrg * SEND instruction. 22701e04c3fSmrg */ 22801e04c3fSmrg emit_vertex(); 22901e04c3fSmrg} 23001e04c3fSmrg 23101e04c3fSmrg} /* namespace brw */ 232