101e04c3fSmrg/*
201e04c3fSmrg * Copyright © 2013 Intel Corporation
301e04c3fSmrg *
401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
501e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
601e04c3fSmrg * to deal in the Software without restriction, including without limitation
701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
901e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
1001e04c3fSmrg *
1101e04c3fSmrg * The above copyright notice and this permission notice (including the next
1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1301e04c3fSmrg * Software.
1401e04c3fSmrg *
1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
2101e04c3fSmrg * DEALINGS IN THE SOFTWARE.
2201e04c3fSmrg */
2301e04c3fSmrg
2401e04c3fSmrg/**
2501e04c3fSmrg * \file brw_vec4_tes.cpp
2601e04c3fSmrg *
2701e04c3fSmrg * Tessellaton evaluation shader specific code derived from the vec4_visitor class.
2801e04c3fSmrg */
2901e04c3fSmrg
3001e04c3fSmrg#include "brw_vec4_tes.h"
3101e04c3fSmrg#include "brw_cfg.h"
327ec681f3Smrg#include "dev/intel_debug.h"
3301e04c3fSmrg
3401e04c3fSmrgnamespace brw {
3501e04c3fSmrg
3601e04c3fSmrgvec4_tes_visitor::vec4_tes_visitor(const struct brw_compiler *compiler,
3701e04c3fSmrg                                  void *log_data,
3801e04c3fSmrg                                  const struct brw_tes_prog_key *key,
3901e04c3fSmrg                                  struct brw_tes_prog_data *prog_data,
4001e04c3fSmrg                                  const nir_shader *shader,
4101e04c3fSmrg                                  void *mem_ctx,
427ec681f3Smrg                                  int shader_time_index,
437ec681f3Smrg                                  bool debug_enabled)
447ec681f3Smrg   : vec4_visitor(compiler, log_data, &key->base.tex, &prog_data->base,
457ec681f3Smrg                  shader, mem_ctx, false, shader_time_index, debug_enabled)
4601e04c3fSmrg{
4701e04c3fSmrg}
4801e04c3fSmrg
4901e04c3fSmrgvoid
5001e04c3fSmrgvec4_tes_visitor::setup_payload()
5101e04c3fSmrg{
5201e04c3fSmrg   int reg = 0;
5301e04c3fSmrg
5401e04c3fSmrg   /* The payload always contains important data in r0 and r1, which contains
5501e04c3fSmrg    * the URB handles that are passed on to the URB write at the end
5601e04c3fSmrg    * of the thread.
5701e04c3fSmrg    */
5801e04c3fSmrg   reg += 2;
5901e04c3fSmrg
6001e04c3fSmrg   reg = setup_uniforms(reg);
6101e04c3fSmrg
6201e04c3fSmrg   foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
6301e04c3fSmrg      for (int i = 0; i < 3; i++) {
6401e04c3fSmrg         if (inst->src[i].file != ATTR)
6501e04c3fSmrg            continue;
6601e04c3fSmrg
6701e04c3fSmrg         unsigned slot = inst->src[i].nr + inst->src[i].offset / 16;
6801e04c3fSmrg         struct brw_reg grf = brw_vec4_grf(reg + slot / 2, 4 * (slot % 2));
697ec681f3Smrg         grf = stride(grf, 0, 4, 1);
7001e04c3fSmrg         grf.swizzle = inst->src[i].swizzle;
7101e04c3fSmrg         grf.type = inst->src[i].type;
7201e04c3fSmrg         grf.abs = inst->src[i].abs;
7301e04c3fSmrg         grf.negate = inst->src[i].negate;
7401e04c3fSmrg         inst->src[i] = grf;
7501e04c3fSmrg      }
7601e04c3fSmrg   }
7701e04c3fSmrg
7801e04c3fSmrg   reg += 8 * prog_data->urb_read_length;
7901e04c3fSmrg
8001e04c3fSmrg   this->first_non_payload_grf = reg;
8101e04c3fSmrg}
8201e04c3fSmrg
8301e04c3fSmrg
8401e04c3fSmrgvoid
8501e04c3fSmrgvec4_tes_visitor::emit_prolog()
8601e04c3fSmrg{
8701e04c3fSmrg   input_read_header = src_reg(this, glsl_type::uvec4_type);
8801e04c3fSmrg   emit(TES_OPCODE_CREATE_INPUT_READ_HEADER, dst_reg(input_read_header));
8901e04c3fSmrg
9001e04c3fSmrg   this->current_annotation = NULL;
9101e04c3fSmrg}
9201e04c3fSmrg
9301e04c3fSmrg
9401e04c3fSmrgvoid
9501e04c3fSmrgvec4_tes_visitor::emit_urb_write_header(int mrf)
9601e04c3fSmrg{
9701e04c3fSmrg   /* No need to do anything for DS; an implied write to this MRF will be
9801e04c3fSmrg    * performed by VS_OPCODE_URB_WRITE.
9901e04c3fSmrg    */
10001e04c3fSmrg   (void) mrf;
10101e04c3fSmrg}
10201e04c3fSmrg
10301e04c3fSmrg
10401e04c3fSmrgvec4_instruction *
10501e04c3fSmrgvec4_tes_visitor::emit_urb_write_opcode(bool complete)
10601e04c3fSmrg{
10701e04c3fSmrg   /* For DS, the URB writes end the thread. */
10801e04c3fSmrg   if (complete) {
1097ec681f3Smrg      if (INTEL_DEBUG(DEBUG_SHADER_TIME))
11001e04c3fSmrg         emit_shader_time_end();
11101e04c3fSmrg   }
11201e04c3fSmrg
11301e04c3fSmrg   vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
11401e04c3fSmrg   inst->urb_write_flags = complete ?
11501e04c3fSmrg      BRW_URB_WRITE_EOT_COMPLETE : BRW_URB_WRITE_NO_FLAGS;
11601e04c3fSmrg
11701e04c3fSmrg   return inst;
11801e04c3fSmrg}
11901e04c3fSmrg
12001e04c3fSmrgvoid
12101e04c3fSmrgvec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
12201e04c3fSmrg{
12301e04c3fSmrg   const struct brw_tes_prog_data *tes_prog_data =
12401e04c3fSmrg      (const struct brw_tes_prog_data *) prog_data;
12501e04c3fSmrg
12601e04c3fSmrg   switch (instr->intrinsic) {
12701e04c3fSmrg   case nir_intrinsic_load_tess_coord:
12801e04c3fSmrg      /* gl_TessCoord is part of the payload in g1 channels 0-2 and 4-6. */
12901e04c3fSmrg      emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
13001e04c3fSmrg               src_reg(brw_vec8_grf(1, 0))));
13101e04c3fSmrg      break;
13201e04c3fSmrg   case nir_intrinsic_load_tess_level_outer:
13301e04c3fSmrg      if (tes_prog_data->domain == BRW_TESS_DOMAIN_ISOLINE) {
13401e04c3fSmrg         emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
13501e04c3fSmrg                  swizzle(src_reg(ATTR, 1, glsl_type::vec4_type),
13601e04c3fSmrg                          BRW_SWIZZLE_ZWZW)));
13701e04c3fSmrg      } else {
13801e04c3fSmrg         emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
13901e04c3fSmrg                  swizzle(src_reg(ATTR, 1, glsl_type::vec4_type),
14001e04c3fSmrg                          BRW_SWIZZLE_WZYX)));
14101e04c3fSmrg      }
14201e04c3fSmrg      break;
14301e04c3fSmrg   case nir_intrinsic_load_tess_level_inner:
14401e04c3fSmrg      if (tes_prog_data->domain == BRW_TESS_DOMAIN_QUAD) {
14501e04c3fSmrg         emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
14601e04c3fSmrg                  swizzle(src_reg(ATTR, 0, glsl_type::vec4_type),
14701e04c3fSmrg                          BRW_SWIZZLE_WZYX)));
14801e04c3fSmrg      } else {
14901e04c3fSmrg         emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
15001e04c3fSmrg                  src_reg(ATTR, 1, glsl_type::float_type)));
15101e04c3fSmrg      }
15201e04c3fSmrg      break;
15301e04c3fSmrg   case nir_intrinsic_load_primitive_id:
15401e04c3fSmrg      emit(TES_OPCODE_GET_PRIMITIVE_ID,
15501e04c3fSmrg           get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD));
15601e04c3fSmrg      break;
15701e04c3fSmrg
15801e04c3fSmrg   case nir_intrinsic_load_input:
15901e04c3fSmrg   case nir_intrinsic_load_per_vertex_input: {
1607ec681f3Smrg      assert(nir_dest_bit_size(instr->dest) == 32);
16101e04c3fSmrg      src_reg indirect_offset = get_indirect_offset(instr);
16201e04c3fSmrg      unsigned imm_offset = instr->const_index[0];
16301e04c3fSmrg      src_reg header = input_read_header;
16401e04c3fSmrg      unsigned first_component = nir_intrinsic_component(instr);
16501e04c3fSmrg
16601e04c3fSmrg      if (indirect_offset.file != BAD_FILE) {
16701e04c3fSmrg         src_reg clamped_indirect_offset = src_reg(this, glsl_type::uvec4_type);
16801e04c3fSmrg
16901e04c3fSmrg         /* Page 190 of "Volume 7: 3D Media GPGPU Engine (Haswell)" says the
17001e04c3fSmrg          * valid range of the offset is [0, 0FFFFFFFh].
17101e04c3fSmrg          */
17201e04c3fSmrg         emit_minmax(BRW_CONDITIONAL_L,
17301e04c3fSmrg                     dst_reg(clamped_indirect_offset),
17401e04c3fSmrg                     retype(indirect_offset, BRW_REGISTER_TYPE_UD),
17501e04c3fSmrg                     brw_imm_ud(0x0fffffffu));
17601e04c3fSmrg
17701e04c3fSmrg         header = src_reg(this, glsl_type::uvec4_type);
17801e04c3fSmrg         emit(TES_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header),
17901e04c3fSmrg              input_read_header, clamped_indirect_offset);
18001e04c3fSmrg      } else {
18101e04c3fSmrg         /* Arbitrarily only push up to 24 vec4 slots worth of data,
18201e04c3fSmrg          * which is 12 registers (since each holds 2 vec4 slots).
18301e04c3fSmrg          */
18401e04c3fSmrg         const unsigned max_push_slots = 24;
18501e04c3fSmrg         if (imm_offset < max_push_slots) {
1867ec681f3Smrg            src_reg src = src_reg(ATTR, imm_offset, glsl_type::ivec4_type);
18701e04c3fSmrg            src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
18801e04c3fSmrg
1897ec681f3Smrg            emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D), src));
19001e04c3fSmrg
19101e04c3fSmrg            prog_data->urb_read_length =
19201e04c3fSmrg               MAX2(prog_data->urb_read_length,
1937ec681f3Smrg                    DIV_ROUND_UP(imm_offset + 1, 2));
19401e04c3fSmrg            break;
19501e04c3fSmrg         }
19601e04c3fSmrg      }
19701e04c3fSmrg
1987ec681f3Smrg      dst_reg temp(this, glsl_type::ivec4_type);
1997ec681f3Smrg      vec4_instruction *read =
2007ec681f3Smrg         emit(VEC4_OPCODE_URB_READ, temp, src_reg(header));
2017ec681f3Smrg      read->offset = imm_offset;
2027ec681f3Smrg      read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
2037ec681f3Smrg
2047ec681f3Smrg      src_reg src = src_reg(temp);
2057ec681f3Smrg      src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
2067ec681f3Smrg
2077ec681f3Smrg      /* Copy to target.  We might end up with some funky writemasks landing
2087ec681f3Smrg       * in here, but we really don't want them in the above pseudo-ops.
2097ec681f3Smrg       */
2107ec681f3Smrg      dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
2117ec681f3Smrg      dst.writemask = brw_writemask_for_size(instr->num_components);
2127ec681f3Smrg      emit(MOV(dst, src));
21301e04c3fSmrg      break;
21401e04c3fSmrg   }
21501e04c3fSmrg   default:
21601e04c3fSmrg      vec4_visitor::nir_emit_intrinsic(instr);
21701e04c3fSmrg   }
21801e04c3fSmrg}
21901e04c3fSmrg
22001e04c3fSmrg
22101e04c3fSmrgvoid
22201e04c3fSmrgvec4_tes_visitor::emit_thread_end()
22301e04c3fSmrg{
22401e04c3fSmrg   /* For DS, we always end the thread by emitting a single vertex.
22501e04c3fSmrg    * emit_urb_write_opcode() will take care of setting the eot flag on the
22601e04c3fSmrg    * SEND instruction.
22701e04c3fSmrg    */
22801e04c3fSmrg   emit_vertex();
22901e04c3fSmrg}
23001e04c3fSmrg
23101e04c3fSmrg} /* namespace brw */
232