1b8e80941Smrg/* -*- c++ -*- */
2b8e80941Smrg/*
3b8e80941Smrg * Copyright © 2010-2015 Intel Corporation
4b8e80941Smrg *
5b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
6b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
7b8e80941Smrg * to deal in the Software without restriction, including without limitation
8b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
10b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
11b8e80941Smrg *
12b8e80941Smrg * The above copyright notice and this permission notice (including the next
13b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
14b8e80941Smrg * Software.
15b8e80941Smrg *
16b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22b8e80941Smrg * IN THE SOFTWARE.
23b8e80941Smrg */
24b8e80941Smrg
25b8e80941Smrg#ifndef BRW_IR_FS_H
26b8e80941Smrg#define BRW_IR_FS_H
27b8e80941Smrg
28b8e80941Smrg#include "brw_shader.h"
29b8e80941Smrg
30b8e80941Smrgclass fs_inst;
31b8e80941Smrg
32b8e80941Smrgclass fs_reg : public backend_reg {
33b8e80941Smrgpublic:
34b8e80941Smrg   DECLARE_RALLOC_CXX_OPERATORS(fs_reg)
35b8e80941Smrg
36b8e80941Smrg   void init();
37b8e80941Smrg
38b8e80941Smrg   fs_reg();
39b8e80941Smrg   fs_reg(struct ::brw_reg reg);
40b8e80941Smrg   fs_reg(enum brw_reg_file file, int nr);
41b8e80941Smrg   fs_reg(enum brw_reg_file file, int nr, enum brw_reg_type type);
42b8e80941Smrg
43b8e80941Smrg   bool equals(const fs_reg &r) const;
44b8e80941Smrg   bool negative_equals(const fs_reg &r) const;
45b8e80941Smrg   bool is_contiguous() const;
46b8e80941Smrg
47b8e80941Smrg   /**
48b8e80941Smrg    * Return the size in bytes of a single logical component of the
49b8e80941Smrg    * register assuming the given execution width.
50b8e80941Smrg    */
51b8e80941Smrg   unsigned component_size(unsigned width) const;
52b8e80941Smrg
53b8e80941Smrg   /** Register region horizontal stride */
54b8e80941Smrg   uint8_t stride;
55b8e80941Smrg};
56b8e80941Smrg
57b8e80941Smrgstatic inline fs_reg
58b8e80941Smrgnegate(fs_reg reg)
59b8e80941Smrg{
60b8e80941Smrg   assert(reg.file != IMM);
61b8e80941Smrg   reg.negate = !reg.negate;
62b8e80941Smrg   return reg;
63b8e80941Smrg}
64b8e80941Smrg
65b8e80941Smrgstatic inline fs_reg
66b8e80941Smrgretype(fs_reg reg, enum brw_reg_type type)
67b8e80941Smrg{
68b8e80941Smrg   reg.type = type;
69b8e80941Smrg   return reg;
70b8e80941Smrg}
71b8e80941Smrg
72b8e80941Smrgstatic inline fs_reg
73b8e80941Smrgbyte_offset(fs_reg reg, unsigned delta)
74b8e80941Smrg{
75b8e80941Smrg   switch (reg.file) {
76b8e80941Smrg   case BAD_FILE:
77b8e80941Smrg      break;
78b8e80941Smrg   case VGRF:
79b8e80941Smrg   case ATTR:
80b8e80941Smrg   case UNIFORM:
81b8e80941Smrg      reg.offset += delta;
82b8e80941Smrg      break;
83b8e80941Smrg   case MRF: {
84b8e80941Smrg      const unsigned suboffset = reg.offset + delta;
85b8e80941Smrg      reg.nr += suboffset / REG_SIZE;
86b8e80941Smrg      reg.offset = suboffset % REG_SIZE;
87b8e80941Smrg      break;
88b8e80941Smrg   }
89b8e80941Smrg   case ARF:
90b8e80941Smrg   case FIXED_GRF: {
91b8e80941Smrg      const unsigned suboffset = reg.subnr + delta;
92b8e80941Smrg      reg.nr += suboffset / REG_SIZE;
93b8e80941Smrg      reg.subnr = suboffset % REG_SIZE;
94b8e80941Smrg      break;
95b8e80941Smrg   }
96b8e80941Smrg   case IMM:
97b8e80941Smrg   default:
98b8e80941Smrg      assert(delta == 0);
99b8e80941Smrg   }
100b8e80941Smrg   return reg;
101b8e80941Smrg}
102b8e80941Smrg
103b8e80941Smrgstatic inline fs_reg
104b8e80941Smrghoriz_offset(const fs_reg &reg, unsigned delta)
105b8e80941Smrg{
106b8e80941Smrg   switch (reg.file) {
107b8e80941Smrg   case BAD_FILE:
108b8e80941Smrg   case UNIFORM:
109b8e80941Smrg   case IMM:
110b8e80941Smrg      /* These only have a single component that is implicitly splatted.  A
111b8e80941Smrg       * horizontal offset should be a harmless no-op.
112b8e80941Smrg       * XXX - Handle vector immediates correctly.
113b8e80941Smrg       */
114b8e80941Smrg      return reg;
115b8e80941Smrg   case VGRF:
116b8e80941Smrg   case MRF:
117b8e80941Smrg   case ATTR:
118b8e80941Smrg      return byte_offset(reg, delta * reg.stride * type_sz(reg.type));
119b8e80941Smrg   case ARF:
120b8e80941Smrg   case FIXED_GRF:
121b8e80941Smrg      if (reg.is_null()) {
122b8e80941Smrg         return reg;
123b8e80941Smrg      } else {
124b8e80941Smrg         const unsigned stride = reg.hstride ? 1 << (reg.hstride - 1) : 0;
125b8e80941Smrg         return byte_offset(reg, delta * stride * type_sz(reg.type));
126b8e80941Smrg      }
127b8e80941Smrg   }
128b8e80941Smrg   unreachable("Invalid register file");
129b8e80941Smrg}
130b8e80941Smrg
131b8e80941Smrgstatic inline fs_reg
132b8e80941Smrgoffset(fs_reg reg, unsigned width, unsigned delta)
133b8e80941Smrg{
134b8e80941Smrg   switch (reg.file) {
135b8e80941Smrg   case BAD_FILE:
136b8e80941Smrg      break;
137b8e80941Smrg   case ARF:
138b8e80941Smrg   case FIXED_GRF:
139b8e80941Smrg   case MRF:
140b8e80941Smrg   case VGRF:
141b8e80941Smrg   case ATTR:
142b8e80941Smrg   case UNIFORM:
143b8e80941Smrg      return byte_offset(reg, delta * reg.component_size(width));
144b8e80941Smrg   case IMM:
145b8e80941Smrg      assert(delta == 0);
146b8e80941Smrg   }
147b8e80941Smrg   return reg;
148b8e80941Smrg}
149b8e80941Smrg
150b8e80941Smrg/**
151b8e80941Smrg * Get the scalar channel of \p reg given by \p idx and replicate it to all
152b8e80941Smrg * channels of the result.
153b8e80941Smrg */
154b8e80941Smrgstatic inline fs_reg
155b8e80941Smrgcomponent(fs_reg reg, unsigned idx)
156b8e80941Smrg{
157b8e80941Smrg   reg = horiz_offset(reg, idx);
158b8e80941Smrg   reg.stride = 0;
159b8e80941Smrg   return reg;
160b8e80941Smrg}
161b8e80941Smrg
162b8e80941Smrg/**
163b8e80941Smrg * Return an integer identifying the discrete address space a register is
164b8e80941Smrg * contained in.  A register is by definition fully contained in the single
165b8e80941Smrg * reg_space it belongs to, so two registers with different reg_space ids are
166b8e80941Smrg * guaranteed not to overlap.  Most register files are a single reg_space of
167b8e80941Smrg * its own, only the VGRF file is composed of multiple discrete address
168b8e80941Smrg * spaces, one for each VGRF allocation.
169b8e80941Smrg */
170b8e80941Smrgstatic inline uint32_t
171b8e80941Smrgreg_space(const fs_reg &r)
172b8e80941Smrg{
173b8e80941Smrg   return r.file << 16 | (r.file == VGRF ? r.nr : 0);
174b8e80941Smrg}
175b8e80941Smrg
176b8e80941Smrg/**
177b8e80941Smrg * Return the base offset in bytes of a register relative to the start of its
178b8e80941Smrg * reg_space().
179b8e80941Smrg */
180b8e80941Smrgstatic inline unsigned
181b8e80941Smrgreg_offset(const fs_reg &r)
182b8e80941Smrg{
183b8e80941Smrg   return (r.file == VGRF || r.file == IMM ? 0 : r.nr) *
184b8e80941Smrg          (r.file == UNIFORM ? 4 : REG_SIZE) + r.offset +
185b8e80941Smrg          (r.file == ARF || r.file == FIXED_GRF ? r.subnr : 0);
186b8e80941Smrg}
187b8e80941Smrg
188b8e80941Smrg/**
189b8e80941Smrg * Return the amount of padding in bytes left unused between individual
190b8e80941Smrg * components of register \p r due to a (horizontal) stride value greater than
191b8e80941Smrg * one, or zero if components are tightly packed in the register file.
192b8e80941Smrg */
193b8e80941Smrgstatic inline unsigned
194b8e80941Smrgreg_padding(const fs_reg &r)
195b8e80941Smrg{
196b8e80941Smrg   const unsigned stride = ((r.file != ARF && r.file != FIXED_GRF) ? r.stride :
197b8e80941Smrg                            r.hstride == 0 ? 0 :
198b8e80941Smrg                            1 << (r.hstride - 1));
199b8e80941Smrg   return (MAX2(1, stride) - 1) * type_sz(r.type);
200b8e80941Smrg}
201b8e80941Smrg
202b8e80941Smrg/**
203b8e80941Smrg * Return whether the register region starting at \p r and spanning \p dr
204b8e80941Smrg * bytes could potentially overlap the register region starting at \p s and
205b8e80941Smrg * spanning \p ds bytes.
206b8e80941Smrg */
207b8e80941Smrgstatic inline bool
208b8e80941Smrgregions_overlap(const fs_reg &r, unsigned dr, const fs_reg &s, unsigned ds)
209b8e80941Smrg{
210b8e80941Smrg   if (r.file == MRF && (r.nr & BRW_MRF_COMPR4)) {
211b8e80941Smrg      fs_reg t = r;
212b8e80941Smrg      t.nr &= ~BRW_MRF_COMPR4;
213b8e80941Smrg      /* COMPR4 regions are translated by the hardware during decompression
214b8e80941Smrg       * into two separate half-regions 4 MRFs apart from each other.
215b8e80941Smrg       */
216b8e80941Smrg      return regions_overlap(t, dr / 2, s, ds) ||
217b8e80941Smrg             regions_overlap(byte_offset(t, 4 * REG_SIZE), dr / 2, s, ds);
218b8e80941Smrg
219b8e80941Smrg   } else if (s.file == MRF && (s.nr & BRW_MRF_COMPR4)) {
220b8e80941Smrg      return regions_overlap(s, ds, r, dr);
221b8e80941Smrg
222b8e80941Smrg   } else {
223b8e80941Smrg      return reg_space(r) == reg_space(s) &&
224b8e80941Smrg             !(reg_offset(r) + dr <= reg_offset(s) ||
225b8e80941Smrg               reg_offset(s) + ds <= reg_offset(r));
226b8e80941Smrg   }
227b8e80941Smrg}
228b8e80941Smrg
229b8e80941Smrg/**
230b8e80941Smrg * Check that the register region given by r [r.offset, r.offset + dr[
231b8e80941Smrg * is fully contained inside the register region given by s
232b8e80941Smrg * [s.offset, s.offset + ds[.
233b8e80941Smrg */
234b8e80941Smrgstatic inline bool
235b8e80941Smrgregion_contained_in(const fs_reg &r, unsigned dr, const fs_reg &s, unsigned ds)
236b8e80941Smrg{
237b8e80941Smrg   return reg_space(r) == reg_space(s) &&
238b8e80941Smrg          reg_offset(r) >= reg_offset(s) &&
239b8e80941Smrg          reg_offset(r) + dr <= reg_offset(s) + ds;
240b8e80941Smrg}
241b8e80941Smrg
242b8e80941Smrg/**
243b8e80941Smrg * Return whether the given register region is n-periodic, i.e. whether the
244b8e80941Smrg * original region remains invariant after shifting it by \p n scalar
245b8e80941Smrg * channels.
246b8e80941Smrg */
247b8e80941Smrgstatic inline bool
248b8e80941Smrgis_periodic(const fs_reg &reg, unsigned n)
249b8e80941Smrg{
250b8e80941Smrg   if (reg.file == BAD_FILE || reg.is_null()) {
251b8e80941Smrg      return true;
252b8e80941Smrg
253b8e80941Smrg   } else if (reg.file == IMM) {
254b8e80941Smrg      const unsigned period = (reg.type == BRW_REGISTER_TYPE_UV ||
255b8e80941Smrg                               reg.type == BRW_REGISTER_TYPE_V ? 8 :
256b8e80941Smrg                               reg.type == BRW_REGISTER_TYPE_VF ? 4 :
257b8e80941Smrg                               1);
258b8e80941Smrg      return n % period == 0;
259b8e80941Smrg
260b8e80941Smrg   } else if (reg.file == ARF || reg.file == FIXED_GRF) {
261b8e80941Smrg      const unsigned period = (reg.hstride == 0 && reg.vstride == 0 ? 1 :
262b8e80941Smrg                               reg.vstride == 0 ? 1 << reg.width :
263b8e80941Smrg                               ~0);
264b8e80941Smrg      return n % period == 0;
265b8e80941Smrg
266b8e80941Smrg   } else {
267b8e80941Smrg      return reg.stride == 0;
268b8e80941Smrg   }
269b8e80941Smrg}
270b8e80941Smrg
271b8e80941Smrgstatic inline bool
272b8e80941Smrgis_uniform(const fs_reg &reg)
273b8e80941Smrg{
274b8e80941Smrg   return is_periodic(reg, 1);
275b8e80941Smrg}
276b8e80941Smrg
277b8e80941Smrg/**
278b8e80941Smrg * Get the specified 8-component quarter of a register.
279b8e80941Smrg * XXX - Maybe come up with a less misleading name for this (e.g. quarter())?
280b8e80941Smrg */
281b8e80941Smrgstatic inline fs_reg
282b8e80941Smrghalf(const fs_reg &reg, unsigned idx)
283b8e80941Smrg{
284b8e80941Smrg   assert(idx < 2);
285b8e80941Smrg   return horiz_offset(reg, 8 * idx);
286b8e80941Smrg}
287b8e80941Smrg
288b8e80941Smrg/**
289b8e80941Smrg * Reinterpret each channel of register \p reg as a vector of values of the
290b8e80941Smrg * given smaller type and take the i-th subcomponent from each.
291b8e80941Smrg */
292b8e80941Smrgstatic inline fs_reg
293b8e80941Smrgsubscript(fs_reg reg, brw_reg_type type, unsigned i)
294b8e80941Smrg{
295b8e80941Smrg   assert((i + 1) * type_sz(type) <= type_sz(reg.type));
296b8e80941Smrg
297b8e80941Smrg   if (reg.file == ARF || reg.file == FIXED_GRF) {
298b8e80941Smrg      /* The stride is encoded inconsistently for fixed GRF and ARF registers
299b8e80941Smrg       * as the log2 of the actual vertical and horizontal strides.
300b8e80941Smrg       */
301b8e80941Smrg      const int delta = _mesa_logbase2(type_sz(reg.type)) -
302b8e80941Smrg                        _mesa_logbase2(type_sz(type));
303b8e80941Smrg      reg.hstride += (reg.hstride ? delta : 0);
304b8e80941Smrg      reg.vstride += (reg.vstride ? delta : 0);
305b8e80941Smrg
306b8e80941Smrg   } else if (reg.file == IMM) {
307b8e80941Smrg      assert(reg.type == type);
308b8e80941Smrg
309b8e80941Smrg   } else {
310b8e80941Smrg      reg.stride *= type_sz(reg.type) / type_sz(type);
311b8e80941Smrg   }
312b8e80941Smrg
313b8e80941Smrg   return byte_offset(retype(reg, type), i * type_sz(type));
314b8e80941Smrg}
315b8e80941Smrg
316b8e80941Smrgstatic inline fs_reg
317b8e80941Smrghoriz_stride(fs_reg reg, unsigned s)
318b8e80941Smrg{
319b8e80941Smrg   reg.stride *= s;
320b8e80941Smrg   return reg;
321b8e80941Smrg}
322b8e80941Smrg
323b8e80941Smrgstatic const fs_reg reg_undef;
324b8e80941Smrg
325b8e80941Smrgclass fs_inst : public backend_instruction {
326b8e80941Smrg   fs_inst &operator=(const fs_inst &);
327b8e80941Smrg
328b8e80941Smrg   void init(enum opcode opcode, uint8_t exec_width, const fs_reg &dst,
329b8e80941Smrg             const fs_reg *src, unsigned sources);
330b8e80941Smrg
331b8e80941Smrgpublic:
332b8e80941Smrg   DECLARE_RALLOC_CXX_OPERATORS(fs_inst)
333b8e80941Smrg
334b8e80941Smrg   fs_inst();
335b8e80941Smrg   fs_inst(enum opcode opcode, uint8_t exec_size);
336b8e80941Smrg   fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst);
337b8e80941Smrg   fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
338b8e80941Smrg           const fs_reg &src0);
339b8e80941Smrg   fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
340b8e80941Smrg           const fs_reg &src0, const fs_reg &src1);
341b8e80941Smrg   fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
342b8e80941Smrg           const fs_reg &src0, const fs_reg &src1, const fs_reg &src2);
343b8e80941Smrg   fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
344b8e80941Smrg           const fs_reg src[], unsigned sources);
345b8e80941Smrg   fs_inst(const fs_inst &that);
346b8e80941Smrg   ~fs_inst();
347b8e80941Smrg
348b8e80941Smrg   void resize_sources(uint8_t num_sources);
349b8e80941Smrg
350b8e80941Smrg   bool is_send_from_grf() const;
351b8e80941Smrg   bool is_partial_write() const;
352b8e80941Smrg   bool is_copy_payload(const brw::simple_allocator &grf_alloc) const;
353b8e80941Smrg   unsigned components_read(unsigned i) const;
354b8e80941Smrg   unsigned size_read(int arg) const;
355b8e80941Smrg   bool can_do_source_mods(const struct gen_device_info *devinfo) const;
356b8e80941Smrg   bool can_do_cmod();
357b8e80941Smrg   bool can_change_types() const;
358b8e80941Smrg   bool has_source_and_destination_hazard() const;
359b8e80941Smrg
360b8e80941Smrg   /**
361b8e80941Smrg    * Return whether \p arg is a control source of a virtual instruction which
362b8e80941Smrg    * shouldn't contribute to the execution type and usual regioning
363b8e80941Smrg    * restriction calculations of arithmetic instructions.
364b8e80941Smrg    */
365b8e80941Smrg   bool is_control_source(unsigned arg) const;
366b8e80941Smrg
367b8e80941Smrg   /**
368b8e80941Smrg    * Return the subset of flag registers read by the instruction as a bitset
369b8e80941Smrg    * with byte granularity.
370b8e80941Smrg    */
371b8e80941Smrg   unsigned flags_read(const gen_device_info *devinfo) const;
372b8e80941Smrg
373b8e80941Smrg   /**
374b8e80941Smrg    * Return the subset of flag registers updated by the instruction (either
375b8e80941Smrg    * partially or fully) as a bitset with byte granularity.
376b8e80941Smrg    */
377b8e80941Smrg   unsigned flags_written() const;
378b8e80941Smrg
379b8e80941Smrg   fs_reg dst;
380b8e80941Smrg   fs_reg *src;
381b8e80941Smrg
382b8e80941Smrg   uint8_t sources; /**< Number of fs_reg sources. */
383b8e80941Smrg
384b8e80941Smrg   bool last_rt:1;
385b8e80941Smrg   bool pi_noperspective:1;   /**< Pixel interpolator noperspective flag */
386b8e80941Smrg};
387b8e80941Smrg
388b8e80941Smrg/**
389b8e80941Smrg * Make the execution of \p inst dependent on the evaluation of a possibly
390b8e80941Smrg * inverted predicate.
391b8e80941Smrg */
392b8e80941Smrgstatic inline fs_inst *
393b8e80941Smrgset_predicate_inv(enum brw_predicate pred, bool inverse,
394b8e80941Smrg                  fs_inst *inst)
395b8e80941Smrg{
396b8e80941Smrg   inst->predicate = pred;
397b8e80941Smrg   inst->predicate_inverse = inverse;
398b8e80941Smrg   return inst;
399b8e80941Smrg}
400b8e80941Smrg
401b8e80941Smrg/**
402b8e80941Smrg * Make the execution of \p inst dependent on the evaluation of a predicate.
403b8e80941Smrg */
404b8e80941Smrgstatic inline fs_inst *
405b8e80941Smrgset_predicate(enum brw_predicate pred, fs_inst *inst)
406b8e80941Smrg{
407b8e80941Smrg   return set_predicate_inv(pred, false, inst);
408b8e80941Smrg}
409b8e80941Smrg
410b8e80941Smrg/**
411b8e80941Smrg * Write the result of evaluating the condition given by \p mod to a flag
412b8e80941Smrg * register.
413b8e80941Smrg */
414b8e80941Smrgstatic inline fs_inst *
415b8e80941Smrgset_condmod(enum brw_conditional_mod mod, fs_inst *inst)
416b8e80941Smrg{
417b8e80941Smrg   inst->conditional_mod = mod;
418b8e80941Smrg   return inst;
419b8e80941Smrg}
420b8e80941Smrg
421b8e80941Smrg/**
422b8e80941Smrg * Clamp the result of \p inst to the saturation range of its destination
423b8e80941Smrg * datatype.
424b8e80941Smrg */
425b8e80941Smrgstatic inline fs_inst *
426b8e80941Smrgset_saturate(bool saturate, fs_inst *inst)
427b8e80941Smrg{
428b8e80941Smrg   inst->saturate = saturate;
429b8e80941Smrg   return inst;
430b8e80941Smrg}
431b8e80941Smrg
432b8e80941Smrg/**
433b8e80941Smrg * Return the number of dataflow registers written by the instruction (either
434b8e80941Smrg * fully or partially) counted from 'floor(reg_offset(inst->dst) /
435b8e80941Smrg * register_size)'.  The somewhat arbitrary register size unit is 4B for the
436b8e80941Smrg * UNIFORM and IMM files and 32B for all other files.
437b8e80941Smrg */
438b8e80941Smrginline unsigned
439b8e80941Smrgregs_written(const fs_inst *inst)
440b8e80941Smrg{
441b8e80941Smrg   assert(inst->dst.file != UNIFORM && inst->dst.file != IMM);
442b8e80941Smrg   return DIV_ROUND_UP(reg_offset(inst->dst) % REG_SIZE +
443b8e80941Smrg                       inst->size_written -
444b8e80941Smrg                       MIN2(inst->size_written, reg_padding(inst->dst)),
445b8e80941Smrg                       REG_SIZE);
446b8e80941Smrg}
447b8e80941Smrg
448b8e80941Smrg/**
449b8e80941Smrg * Return the number of dataflow registers read by the instruction (either
450b8e80941Smrg * fully or partially) counted from 'floor(reg_offset(inst->src[i]) /
451b8e80941Smrg * register_size)'.  The somewhat arbitrary register size unit is 4B for the
452b8e80941Smrg * UNIFORM and IMM files and 32B for all other files.
453b8e80941Smrg */
454b8e80941Smrginline unsigned
455b8e80941Smrgregs_read(const fs_inst *inst, unsigned i)
456b8e80941Smrg{
457b8e80941Smrg   const unsigned reg_size =
458b8e80941Smrg      inst->src[i].file == UNIFORM || inst->src[i].file == IMM ? 4 : REG_SIZE;
459b8e80941Smrg   return DIV_ROUND_UP(reg_offset(inst->src[i]) % reg_size +
460b8e80941Smrg                       inst->size_read(i) -
461b8e80941Smrg                       MIN2(inst->size_read(i), reg_padding(inst->src[i])),
462b8e80941Smrg                       reg_size);
463b8e80941Smrg}
464b8e80941Smrg
465b8e80941Smrgstatic inline enum brw_reg_type
466b8e80941Smrgget_exec_type(const fs_inst *inst)
467b8e80941Smrg{
468b8e80941Smrg   brw_reg_type exec_type = BRW_REGISTER_TYPE_B;
469b8e80941Smrg
470b8e80941Smrg   for (int i = 0; i < inst->sources; i++) {
471b8e80941Smrg      if (inst->src[i].file != BAD_FILE &&
472b8e80941Smrg          !inst->is_control_source(i)) {
473b8e80941Smrg         const brw_reg_type t = get_exec_type(inst->src[i].type);
474b8e80941Smrg         if (type_sz(t) > type_sz(exec_type))
475b8e80941Smrg            exec_type = t;
476b8e80941Smrg         else if (type_sz(t) == type_sz(exec_type) &&
477b8e80941Smrg                  brw_reg_type_is_floating_point(t))
478b8e80941Smrg            exec_type = t;
479b8e80941Smrg      }
480b8e80941Smrg   }
481b8e80941Smrg
482b8e80941Smrg   if (exec_type == BRW_REGISTER_TYPE_B)
483b8e80941Smrg      exec_type = inst->dst.type;
484b8e80941Smrg
485b8e80941Smrg   assert(exec_type != BRW_REGISTER_TYPE_B);
486b8e80941Smrg
487b8e80941Smrg   /* Promotion of the execution type to 32-bit for conversions from or to
488b8e80941Smrg    * half-float seems to be consistent with the following text from the
489b8e80941Smrg    * Cherryview PRM Vol. 7, "Execution Data Type":
490b8e80941Smrg    *
491b8e80941Smrg    * "When single precision and half precision floats are mixed between
492b8e80941Smrg    *  source operands or between source and destination operand [..] single
493b8e80941Smrg    *  precision float is the execution datatype."
494b8e80941Smrg    *
495b8e80941Smrg    * and from "Register Region Restrictions":
496b8e80941Smrg    *
497b8e80941Smrg    * "Conversion between Integer and HF (Half Float) must be DWord aligned
498b8e80941Smrg    *  and strided by a DWord on the destination."
499b8e80941Smrg    */
500b8e80941Smrg   if (type_sz(exec_type) == 2 &&
501b8e80941Smrg       inst->dst.type != exec_type) {
502b8e80941Smrg      if (exec_type == BRW_REGISTER_TYPE_HF)
503b8e80941Smrg         exec_type = BRW_REGISTER_TYPE_F;
504b8e80941Smrg      else if (inst->dst.type == BRW_REGISTER_TYPE_HF)
505b8e80941Smrg         exec_type = BRW_REGISTER_TYPE_D;
506b8e80941Smrg   }
507b8e80941Smrg
508b8e80941Smrg   return exec_type;
509b8e80941Smrg}
510b8e80941Smrg
511b8e80941Smrgstatic inline unsigned
512b8e80941Smrgget_exec_type_size(const fs_inst *inst)
513b8e80941Smrg{
514b8e80941Smrg   return type_sz(get_exec_type(inst));
515b8e80941Smrg}
516b8e80941Smrg
517b8e80941Smrg/**
518b8e80941Smrg * Return whether the instruction isn't an ALU instruction and cannot be
519b8e80941Smrg * assumed to complete in-order.
520b8e80941Smrg */
521b8e80941Smrgstatic inline bool
522b8e80941Smrgis_unordered(const fs_inst *inst)
523b8e80941Smrg{
524b8e80941Smrg   return inst->mlen || inst->is_send_from_grf() || inst->is_math();
525b8e80941Smrg}
526b8e80941Smrg
527b8e80941Smrg/**
528b8e80941Smrg * Return whether the following regioning restriction applies to the specified
529b8e80941Smrg * instruction.  From the Cherryview PRM Vol 7. "Register Region
530b8e80941Smrg * Restrictions":
531b8e80941Smrg *
532b8e80941Smrg * "When source or destination datatype is 64b or operation is integer DWord
533b8e80941Smrg *  multiply, regioning in Align1 must follow these rules:
534b8e80941Smrg *
535b8e80941Smrg *  1. Source and Destination horizontal stride must be aligned to the same qword.
536b8e80941Smrg *  2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride.
537b8e80941Smrg *  3. Source and Destination offset must be the same, except the case of
538b8e80941Smrg *     scalar source."
539b8e80941Smrg */
540b8e80941Smrgstatic inline bool
541b8e80941Smrghas_dst_aligned_region_restriction(const gen_device_info *devinfo,
542b8e80941Smrg                                   const fs_inst *inst)
543b8e80941Smrg{
544b8e80941Smrg   const brw_reg_type exec_type = get_exec_type(inst);
545b8e80941Smrg   /* Even though the hardware spec claims that "integer DWord multiply"
546b8e80941Smrg    * operations are restricted, empirical evidence and the behavior of the
547b8e80941Smrg    * simulator suggest that only 32x32-bit integer multiplication is
548b8e80941Smrg    * restricted.
549b8e80941Smrg    */
550b8e80941Smrg   const bool is_dword_multiply = !brw_reg_type_is_floating_point(exec_type) &&
551b8e80941Smrg      ((inst->opcode == BRW_OPCODE_MUL &&
552b8e80941Smrg        MIN2(type_sz(inst->src[0].type), type_sz(inst->src[1].type)) >= 4) ||
553b8e80941Smrg       (inst->opcode == BRW_OPCODE_MAD &&
554b8e80941Smrg        MIN2(type_sz(inst->src[1].type), type_sz(inst->src[2].type)) >= 4));
555b8e80941Smrg
556b8e80941Smrg   if (type_sz(inst->dst.type) > 4 || type_sz(exec_type) > 4 ||
557b8e80941Smrg       (type_sz(exec_type) == 4 && is_dword_multiply))
558b8e80941Smrg      return devinfo->is_cherryview || gen_device_info_is_9lp(devinfo);
559b8e80941Smrg   else
560b8e80941Smrg      return false;
561b8e80941Smrg}
562b8e80941Smrg
563b8e80941Smrg#endif
564