brw_vec4.h revision b8e80941
1/*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#ifndef BRW_VEC4_H
25#define BRW_VEC4_H
26
27#include "brw_shader.h"
28
29#ifdef __cplusplus
30#include "brw_ir_vec4.h"
31#include "brw_vec4_builder.h"
32#endif
33
34#include "compiler/glsl/ir.h"
35#include "compiler/nir/nir.h"
36
37
38#ifdef __cplusplus
39extern "C" {
40#endif
41
42const unsigned *
43brw_vec4_generate_assembly(const struct brw_compiler *compiler,
44                           void *log_data,
45                           void *mem_ctx,
46                           const nir_shader *nir,
47                           struct brw_vue_prog_data *prog_data,
48                           const struct cfg_t *cfg);
49
50#ifdef __cplusplus
51} /* extern "C" */
52
53namespace brw {
54
55class vec4_live_variables;
56
57/**
58 * The vertex shader front-end.
59 *
60 * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and
61 * fixed-function) into VS IR.
62 */
63class vec4_visitor : public backend_shader
64{
65public:
66   vec4_visitor(const struct brw_compiler *compiler,
67                void *log_data,
68                const struct brw_sampler_prog_key_data *key,
69                struct brw_vue_prog_data *prog_data,
70                const nir_shader *shader,
71		void *mem_ctx,
72                bool no_spills,
73                int shader_time_index);
74
75   dst_reg dst_null_f()
76   {
77      return dst_reg(brw_null_reg());
78   }
79
80   dst_reg dst_null_df()
81   {
82      return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_DF));
83   }
84
85   dst_reg dst_null_d()
86   {
87      return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
88   }
89
90   dst_reg dst_null_ud()
91   {
92      return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
93   }
94
95   const struct brw_sampler_prog_key_data * const key_tex;
96   struct brw_vue_prog_data * const prog_data;
97   char *fail_msg;
98   bool failed;
99
100   /**
101    * GLSL IR currently being processed, which is associated with our
102    * driver IR instructions for debugging purposes.
103    */
104   const void *base_ir;
105   const char *current_annotation;
106
107   int first_non_payload_grf;
108   unsigned int max_grf;
109   int *virtual_grf_start;
110   int *virtual_grf_end;
111   brw::vec4_live_variables *live_intervals;
112   dst_reg userplane[MAX_CLIP_PLANES];
113
114   bool need_all_constants_in_pull_buffer;
115
116   /* Regs for vertex results.  Generated at ir_variable visiting time
117    * for the ir->location's used.
118    */
119   dst_reg output_reg[VARYING_SLOT_TESS_MAX][4];
120   unsigned output_num_components[VARYING_SLOT_TESS_MAX][4];
121   const char *output_reg_annotation[VARYING_SLOT_TESS_MAX];
122   int uniforms;
123
124   src_reg shader_start_time;
125
126   bool run();
127   void fail(const char *msg, ...);
128
129   int setup_uniforms(int payload_reg);
130
131   bool reg_allocate_trivial();
132   bool reg_allocate();
133   void evaluate_spill_costs(float *spill_costs, bool *no_spill);
134   int choose_spill_reg(struct ra_graph *g);
135   void spill_reg(unsigned spill_reg);
136   void move_grf_array_access_to_scratch();
137   void move_uniform_array_access_to_pull_constants();
138   void move_push_constants_to_pull_constants();
139   void split_uniform_registers();
140   void pack_uniform_registers();
141   void calculate_live_intervals();
142   void invalidate_live_intervals();
143   void split_virtual_grfs();
144   bool opt_vector_float();
145   bool opt_reduce_swizzle();
146   bool dead_code_eliminate();
147   int var_range_start(unsigned v, unsigned n) const;
148   int var_range_end(unsigned v, unsigned n) const;
149   bool virtual_grf_interferes(int a, int b);
150   bool opt_cmod_propagation();
151   bool opt_copy_propagation(bool do_constant_prop = true);
152   bool opt_cse_local(bblock_t *block);
153   bool opt_cse();
154   bool opt_algebraic();
155   bool opt_register_coalesce();
156   bool eliminate_find_live_channel();
157   bool is_dep_ctrl_unsafe(const vec4_instruction *inst);
158   void opt_set_dependency_control();
159   void opt_schedule_instructions();
160   void convert_to_hw_regs();
161   void fixup_3src_null_dest();
162
163   bool is_supported_64bit_region(vec4_instruction *inst, unsigned arg);
164   bool lower_simd_width();
165   bool scalarize_df();
166   bool lower_64bit_mad_to_mul_add();
167   void apply_logical_swizzle(struct brw_reg *hw_reg,
168                              vec4_instruction *inst, int arg);
169
170   vec4_instruction *emit(vec4_instruction *inst);
171
172   vec4_instruction *emit(enum opcode opcode);
173   vec4_instruction *emit(enum opcode opcode, const dst_reg &dst);
174   vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
175                          const src_reg &src0);
176   vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
177                          const src_reg &src0, const src_reg &src1);
178   vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
179                          const src_reg &src0, const src_reg &src1,
180                          const src_reg &src2);
181
182   vec4_instruction *emit_before(bblock_t *block,
183                                 vec4_instruction *inst,
184				 vec4_instruction *new_inst);
185
186#define EMIT1(op) vec4_instruction *op(const dst_reg &, const src_reg &);
187#define EMIT2(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &);
188#define EMIT3(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &, const src_reg &);
189   EMIT1(MOV)
190   EMIT1(NOT)
191   EMIT1(RNDD)
192   EMIT1(RNDE)
193   EMIT1(RNDZ)
194   EMIT1(FRC)
195   EMIT1(F32TO16)
196   EMIT1(F16TO32)
197   EMIT2(ADD)
198   EMIT2(MUL)
199   EMIT2(MACH)
200   EMIT2(MAC)
201   EMIT2(AND)
202   EMIT2(OR)
203   EMIT2(XOR)
204   EMIT2(DP3)
205   EMIT2(DP4)
206   EMIT2(DPH)
207   EMIT2(SHL)
208   EMIT2(SHR)
209   EMIT2(ASR)
210   vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1,
211			 enum brw_conditional_mod condition);
212   vec4_instruction *IF(src_reg src0, src_reg src1,
213                        enum brw_conditional_mod condition);
214   vec4_instruction *IF(enum brw_predicate predicate);
215   EMIT1(SCRATCH_READ)
216   EMIT2(SCRATCH_WRITE)
217   EMIT3(LRP)
218   EMIT1(BFREV)
219   EMIT3(BFE)
220   EMIT2(BFI1)
221   EMIT3(BFI2)
222   EMIT1(FBH)
223   EMIT1(FBL)
224   EMIT1(CBIT)
225   EMIT3(MAD)
226   EMIT2(ADDC)
227   EMIT2(SUBB)
228   EMIT1(DIM)
229
230#undef EMIT1
231#undef EMIT2
232#undef EMIT3
233
234   int implied_mrf_writes(vec4_instruction *inst);
235
236   vec4_instruction *emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
237                                 src_reg src0, src_reg src1);
238
239   vec4_instruction *emit_lrp(const dst_reg &dst, const src_reg &x,
240                              const src_reg &y, const src_reg &a);
241
242   /**
243    * Copy any live channel from \p src to the first channel of the
244    * result.
245    */
246   src_reg emit_uniformize(const src_reg &src);
247
248   src_reg fix_3src_operand(const src_reg &src);
249   src_reg resolve_source_modifiers(const src_reg &src);
250
251   vec4_instruction *emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
252                               const src_reg &src1 = src_reg());
253
254   src_reg fix_math_operand(const src_reg &src);
255
256   void emit_pack_half_2x16(dst_reg dst, src_reg src0);
257   void emit_unpack_half_2x16(dst_reg dst, src_reg src0);
258   void emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0);
259   void emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0);
260   void emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0);
261   void emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0);
262
263   void emit_texture(ir_texture_opcode op,
264                     dst_reg dest,
265                     const glsl_type *dest_type,
266                     src_reg coordinate,
267                     int coord_components,
268                     src_reg shadow_comparator,
269                     src_reg lod, src_reg lod2,
270                     src_reg sample_index,
271                     uint32_t constant_offset,
272                     src_reg offset_value,
273                     src_reg mcs,
274                     uint32_t surface, src_reg surface_reg,
275                     src_reg sampler_reg);
276
277   src_reg emit_mcs_fetch(const glsl_type *coordinate_type, src_reg coordinate,
278                          src_reg surface);
279   void emit_gen6_gather_wa(uint8_t wa, dst_reg dst);
280
281   void emit_ndc_computation();
282   void emit_psiz_and_flags(dst_reg reg);
283   vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying, int comp);
284   virtual void emit_urb_slot(dst_reg reg, int varying);
285
286   void emit_shader_time_begin();
287   void emit_shader_time_end();
288   void emit_shader_time_write(int shader_time_subindex, src_reg value);
289
290   src_reg get_scratch_offset(bblock_t *block, vec4_instruction *inst,
291			      src_reg *reladdr, int reg_offset);
292   void emit_scratch_read(bblock_t *block, vec4_instruction *inst,
293			  dst_reg dst,
294			  src_reg orig_src,
295			  int base_offset);
296   void emit_scratch_write(bblock_t *block, vec4_instruction *inst,
297			   int base_offset);
298   void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
299				dst_reg dst,
300				src_reg orig_src,
301                                int base_offset,
302                                src_reg indirect);
303   void emit_pull_constant_load_reg(dst_reg dst,
304                                    src_reg surf_index,
305                                    src_reg offset,
306                                    bblock_t *before_block,
307                                    vec4_instruction *before_inst);
308   src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
309                                vec4_instruction *inst, src_reg src);
310
311   void resolve_ud_negate(src_reg *reg);
312
313   bool lower_minmax();
314
315   src_reg get_timestamp();
316
317   void dump_instruction(backend_instruction *inst);
318   void dump_instruction(backend_instruction *inst, FILE *file);
319
320   bool is_high_sampler(src_reg sampler);
321
322   bool optimize_predicate(nir_alu_instr *instr, enum brw_predicate *predicate);
323
324   void emit_conversion_from_double(dst_reg dst, src_reg src, bool saturate);
325   void emit_conversion_to_double(dst_reg dst, src_reg src, bool saturate);
326
327   vec4_instruction *shuffle_64bit_data(dst_reg dst, src_reg src,
328                                        bool for_write,
329                                        bblock_t *block = NULL,
330                                        vec4_instruction *ref = NULL);
331
332   virtual void emit_nir_code();
333   virtual void nir_setup_uniforms();
334   virtual void nir_emit_impl(nir_function_impl *impl);
335   virtual void nir_emit_cf_list(exec_list *list);
336   virtual void nir_emit_if(nir_if *if_stmt);
337   virtual void nir_emit_loop(nir_loop *loop);
338   virtual void nir_emit_block(nir_block *block);
339   virtual void nir_emit_instr(nir_instr *instr);
340   virtual void nir_emit_load_const(nir_load_const_instr *instr);
341   src_reg get_nir_ssbo_intrinsic_index(nir_intrinsic_instr *instr);
342   virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
343   virtual void nir_emit_alu(nir_alu_instr *instr);
344   virtual void nir_emit_jump(nir_jump_instr *instr);
345   virtual void nir_emit_texture(nir_tex_instr *instr);
346   virtual void nir_emit_undef(nir_ssa_undef_instr *instr);
347   virtual void nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr);
348
349   dst_reg get_nir_dest(const nir_dest &dest, enum brw_reg_type type);
350   dst_reg get_nir_dest(const nir_dest &dest, nir_alu_type type);
351   dst_reg get_nir_dest(const nir_dest &dest);
352   src_reg get_nir_src(const nir_src &src, enum brw_reg_type type,
353                       unsigned num_components = 4);
354   src_reg get_nir_src(const nir_src &src, nir_alu_type type,
355                       unsigned num_components = 4);
356   src_reg get_nir_src(const nir_src &src,
357                       unsigned num_components = 4);
358   src_reg get_nir_src_imm(const nir_src &src);
359   src_reg get_indirect_offset(nir_intrinsic_instr *instr);
360
361   dst_reg *nir_locals;
362   dst_reg *nir_ssa_values;
363
364protected:
365   void emit_vertex();
366   void setup_payload_interference(struct ra_graph *g, int first_payload_node,
367                                   int reg_node_count);
368   virtual void setup_payload() = 0;
369   virtual void emit_prolog() = 0;
370   virtual void emit_thread_end() = 0;
371   virtual void emit_urb_write_header(int mrf) = 0;
372   virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0;
373   virtual void gs_emit_vertex(int stream_id);
374   virtual void gs_end_primitive();
375
376private:
377   /**
378    * If true, then register allocation should fail instead of spilling.
379    */
380   const bool no_spills;
381
382   int shader_time_index;
383
384   unsigned last_scratch; /**< measured in 32-byte (register size) units */
385};
386
387} /* namespace brw */
388#endif /* __cplusplus */
389
390#endif /* BRW_VEC4_H */
391