1b8e80941Smrg/* -*- c++ -*- */ 2b8e80941Smrg/* 3b8e80941Smrg * Copyright © 2010-2015 Intel Corporation 4b8e80941Smrg * 5b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 6b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 7b8e80941Smrg * to deal in the Software without restriction, including without limitation 8b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 10b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 11b8e80941Smrg * 12b8e80941Smrg * The above copyright notice and this permission notice (including the next 13b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 14b8e80941Smrg * Software. 15b8e80941Smrg * 16b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22b8e80941Smrg * IN THE SOFTWARE. 23b8e80941Smrg */ 24b8e80941Smrg 25b8e80941Smrg#ifndef BRW_IR_FS_H 26b8e80941Smrg#define BRW_IR_FS_H 27b8e80941Smrg 28b8e80941Smrg#include "brw_shader.h" 29b8e80941Smrg 30b8e80941Smrgclass fs_inst; 31b8e80941Smrg 32b8e80941Smrgclass fs_reg : public backend_reg { 33b8e80941Smrgpublic: 34b8e80941Smrg DECLARE_RALLOC_CXX_OPERATORS(fs_reg) 35b8e80941Smrg 36b8e80941Smrg void init(); 37b8e80941Smrg 38b8e80941Smrg fs_reg(); 39b8e80941Smrg fs_reg(struct ::brw_reg reg); 40b8e80941Smrg fs_reg(enum brw_reg_file file, int nr); 41b8e80941Smrg fs_reg(enum brw_reg_file file, int nr, enum brw_reg_type type); 42b8e80941Smrg 43b8e80941Smrg bool equals(const fs_reg &r) const; 44b8e80941Smrg bool negative_equals(const fs_reg &r) const; 45b8e80941Smrg bool is_contiguous() const; 46b8e80941Smrg 47b8e80941Smrg /** 48b8e80941Smrg * Return the size in bytes of a single logical component of the 49b8e80941Smrg * register assuming the given execution width. 50b8e80941Smrg */ 51b8e80941Smrg unsigned component_size(unsigned width) const; 52b8e80941Smrg 53b8e80941Smrg /** Register region horizontal stride */ 54b8e80941Smrg uint8_t stride; 55b8e80941Smrg}; 56b8e80941Smrg 57b8e80941Smrgstatic inline fs_reg 58b8e80941Smrgnegate(fs_reg reg) 59b8e80941Smrg{ 60b8e80941Smrg assert(reg.file != IMM); 61b8e80941Smrg reg.negate = !reg.negate; 62b8e80941Smrg return reg; 63b8e80941Smrg} 64b8e80941Smrg 65b8e80941Smrgstatic inline fs_reg 66b8e80941Smrgretype(fs_reg reg, enum brw_reg_type type) 67b8e80941Smrg{ 68b8e80941Smrg reg.type = type; 69b8e80941Smrg return reg; 70b8e80941Smrg} 71b8e80941Smrg 72b8e80941Smrgstatic inline fs_reg 73b8e80941Smrgbyte_offset(fs_reg reg, unsigned delta) 74b8e80941Smrg{ 75b8e80941Smrg switch (reg.file) { 76b8e80941Smrg case BAD_FILE: 77b8e80941Smrg break; 78b8e80941Smrg case VGRF: 79b8e80941Smrg case ATTR: 80b8e80941Smrg case UNIFORM: 81b8e80941Smrg reg.offset += delta; 82b8e80941Smrg break; 83b8e80941Smrg case MRF: { 84b8e80941Smrg const unsigned suboffset = reg.offset + delta; 85b8e80941Smrg reg.nr += suboffset / REG_SIZE; 86b8e80941Smrg reg.offset = suboffset % REG_SIZE; 87b8e80941Smrg break; 88b8e80941Smrg } 89b8e80941Smrg case ARF: 90b8e80941Smrg case FIXED_GRF: { 91b8e80941Smrg const unsigned suboffset = reg.subnr + delta; 92b8e80941Smrg reg.nr += suboffset / REG_SIZE; 93b8e80941Smrg reg.subnr = suboffset % REG_SIZE; 94b8e80941Smrg break; 95b8e80941Smrg } 96b8e80941Smrg case IMM: 97b8e80941Smrg default: 98b8e80941Smrg assert(delta == 0); 99b8e80941Smrg } 100b8e80941Smrg return reg; 101b8e80941Smrg} 102b8e80941Smrg 103b8e80941Smrgstatic inline fs_reg 104b8e80941Smrghoriz_offset(const fs_reg ®, unsigned delta) 105b8e80941Smrg{ 106b8e80941Smrg switch (reg.file) { 107b8e80941Smrg case BAD_FILE: 108b8e80941Smrg case UNIFORM: 109b8e80941Smrg case IMM: 110b8e80941Smrg /* These only have a single component that is implicitly splatted. A 111b8e80941Smrg * horizontal offset should be a harmless no-op. 112b8e80941Smrg * XXX - Handle vector immediates correctly. 113b8e80941Smrg */ 114b8e80941Smrg return reg; 115b8e80941Smrg case VGRF: 116b8e80941Smrg case MRF: 117b8e80941Smrg case ATTR: 118b8e80941Smrg return byte_offset(reg, delta * reg.stride * type_sz(reg.type)); 119b8e80941Smrg case ARF: 120b8e80941Smrg case FIXED_GRF: 121b8e80941Smrg if (reg.is_null()) { 122b8e80941Smrg return reg; 123b8e80941Smrg } else { 124b8e80941Smrg const unsigned stride = reg.hstride ? 1 << (reg.hstride - 1) : 0; 125b8e80941Smrg return byte_offset(reg, delta * stride * type_sz(reg.type)); 126b8e80941Smrg } 127b8e80941Smrg } 128b8e80941Smrg unreachable("Invalid register file"); 129b8e80941Smrg} 130b8e80941Smrg 131b8e80941Smrgstatic inline fs_reg 132b8e80941Smrgoffset(fs_reg reg, unsigned width, unsigned delta) 133b8e80941Smrg{ 134b8e80941Smrg switch (reg.file) { 135b8e80941Smrg case BAD_FILE: 136b8e80941Smrg break; 137b8e80941Smrg case ARF: 138b8e80941Smrg case FIXED_GRF: 139b8e80941Smrg case MRF: 140b8e80941Smrg case VGRF: 141b8e80941Smrg case ATTR: 142b8e80941Smrg case UNIFORM: 143b8e80941Smrg return byte_offset(reg, delta * reg.component_size(width)); 144b8e80941Smrg case IMM: 145b8e80941Smrg assert(delta == 0); 146b8e80941Smrg } 147b8e80941Smrg return reg; 148b8e80941Smrg} 149b8e80941Smrg 150b8e80941Smrg/** 151b8e80941Smrg * Get the scalar channel of \p reg given by \p idx and replicate it to all 152b8e80941Smrg * channels of the result. 153b8e80941Smrg */ 154b8e80941Smrgstatic inline fs_reg 155b8e80941Smrgcomponent(fs_reg reg, unsigned idx) 156b8e80941Smrg{ 157b8e80941Smrg reg = horiz_offset(reg, idx); 158b8e80941Smrg reg.stride = 0; 159b8e80941Smrg return reg; 160b8e80941Smrg} 161b8e80941Smrg 162b8e80941Smrg/** 163b8e80941Smrg * Return an integer identifying the discrete address space a register is 164b8e80941Smrg * contained in. A register is by definition fully contained in the single 165b8e80941Smrg * reg_space it belongs to, so two registers with different reg_space ids are 166b8e80941Smrg * guaranteed not to overlap. Most register files are a single reg_space of 167b8e80941Smrg * its own, only the VGRF file is composed of multiple discrete address 168b8e80941Smrg * spaces, one for each VGRF allocation. 169b8e80941Smrg */ 170b8e80941Smrgstatic inline uint32_t 171b8e80941Smrgreg_space(const fs_reg &r) 172b8e80941Smrg{ 173b8e80941Smrg return r.file << 16 | (r.file == VGRF ? r.nr : 0); 174b8e80941Smrg} 175b8e80941Smrg 176b8e80941Smrg/** 177b8e80941Smrg * Return the base offset in bytes of a register relative to the start of its 178b8e80941Smrg * reg_space(). 179b8e80941Smrg */ 180b8e80941Smrgstatic inline unsigned 181b8e80941Smrgreg_offset(const fs_reg &r) 182b8e80941Smrg{ 183b8e80941Smrg return (r.file == VGRF || r.file == IMM ? 0 : r.nr) * 184b8e80941Smrg (r.file == UNIFORM ? 4 : REG_SIZE) + r.offset + 185b8e80941Smrg (r.file == ARF || r.file == FIXED_GRF ? r.subnr : 0); 186b8e80941Smrg} 187b8e80941Smrg 188b8e80941Smrg/** 189b8e80941Smrg * Return the amount of padding in bytes left unused between individual 190b8e80941Smrg * components of register \p r due to a (horizontal) stride value greater than 191b8e80941Smrg * one, or zero if components are tightly packed in the register file. 192b8e80941Smrg */ 193b8e80941Smrgstatic inline unsigned 194b8e80941Smrgreg_padding(const fs_reg &r) 195b8e80941Smrg{ 196b8e80941Smrg const unsigned stride = ((r.file != ARF && r.file != FIXED_GRF) ? r.stride : 197b8e80941Smrg r.hstride == 0 ? 0 : 198b8e80941Smrg 1 << (r.hstride - 1)); 199b8e80941Smrg return (MAX2(1, stride) - 1) * type_sz(r.type); 200b8e80941Smrg} 201b8e80941Smrg 202b8e80941Smrg/** 203b8e80941Smrg * Return whether the register region starting at \p r and spanning \p dr 204b8e80941Smrg * bytes could potentially overlap the register region starting at \p s and 205b8e80941Smrg * spanning \p ds bytes. 206b8e80941Smrg */ 207b8e80941Smrgstatic inline bool 208b8e80941Smrgregions_overlap(const fs_reg &r, unsigned dr, const fs_reg &s, unsigned ds) 209b8e80941Smrg{ 210b8e80941Smrg if (r.file == MRF && (r.nr & BRW_MRF_COMPR4)) { 211b8e80941Smrg fs_reg t = r; 212b8e80941Smrg t.nr &= ~BRW_MRF_COMPR4; 213b8e80941Smrg /* COMPR4 regions are translated by the hardware during decompression 214b8e80941Smrg * into two separate half-regions 4 MRFs apart from each other. 215b8e80941Smrg */ 216b8e80941Smrg return regions_overlap(t, dr / 2, s, ds) || 217b8e80941Smrg regions_overlap(byte_offset(t, 4 * REG_SIZE), dr / 2, s, ds); 218b8e80941Smrg 219b8e80941Smrg } else if (s.file == MRF && (s.nr & BRW_MRF_COMPR4)) { 220b8e80941Smrg return regions_overlap(s, ds, r, dr); 221b8e80941Smrg 222b8e80941Smrg } else { 223b8e80941Smrg return reg_space(r) == reg_space(s) && 224b8e80941Smrg !(reg_offset(r) + dr <= reg_offset(s) || 225b8e80941Smrg reg_offset(s) + ds <= reg_offset(r)); 226b8e80941Smrg } 227b8e80941Smrg} 228b8e80941Smrg 229b8e80941Smrg/** 230b8e80941Smrg * Check that the register region given by r [r.offset, r.offset + dr[ 231b8e80941Smrg * is fully contained inside the register region given by s 232b8e80941Smrg * [s.offset, s.offset + ds[. 233b8e80941Smrg */ 234b8e80941Smrgstatic inline bool 235b8e80941Smrgregion_contained_in(const fs_reg &r, unsigned dr, const fs_reg &s, unsigned ds) 236b8e80941Smrg{ 237b8e80941Smrg return reg_space(r) == reg_space(s) && 238b8e80941Smrg reg_offset(r) >= reg_offset(s) && 239b8e80941Smrg reg_offset(r) + dr <= reg_offset(s) + ds; 240b8e80941Smrg} 241b8e80941Smrg 242b8e80941Smrg/** 243b8e80941Smrg * Return whether the given register region is n-periodic, i.e. whether the 244b8e80941Smrg * original region remains invariant after shifting it by \p n scalar 245b8e80941Smrg * channels. 246b8e80941Smrg */ 247b8e80941Smrgstatic inline bool 248b8e80941Smrgis_periodic(const fs_reg ®, unsigned n) 249b8e80941Smrg{ 250b8e80941Smrg if (reg.file == BAD_FILE || reg.is_null()) { 251b8e80941Smrg return true; 252b8e80941Smrg 253b8e80941Smrg } else if (reg.file == IMM) { 254b8e80941Smrg const unsigned period = (reg.type == BRW_REGISTER_TYPE_UV || 255b8e80941Smrg reg.type == BRW_REGISTER_TYPE_V ? 8 : 256b8e80941Smrg reg.type == BRW_REGISTER_TYPE_VF ? 4 : 257b8e80941Smrg 1); 258b8e80941Smrg return n % period == 0; 259b8e80941Smrg 260b8e80941Smrg } else if (reg.file == ARF || reg.file == FIXED_GRF) { 261b8e80941Smrg const unsigned period = (reg.hstride == 0 && reg.vstride == 0 ? 1 : 262b8e80941Smrg reg.vstride == 0 ? 1 << reg.width : 263b8e80941Smrg ~0); 264b8e80941Smrg return n % period == 0; 265b8e80941Smrg 266b8e80941Smrg } else { 267b8e80941Smrg return reg.stride == 0; 268b8e80941Smrg } 269b8e80941Smrg} 270b8e80941Smrg 271b8e80941Smrgstatic inline bool 272b8e80941Smrgis_uniform(const fs_reg ®) 273b8e80941Smrg{ 274b8e80941Smrg return is_periodic(reg, 1); 275b8e80941Smrg} 276b8e80941Smrg 277b8e80941Smrg/** 278b8e80941Smrg * Get the specified 8-component quarter of a register. 279b8e80941Smrg * XXX - Maybe come up with a less misleading name for this (e.g. quarter())? 280b8e80941Smrg */ 281b8e80941Smrgstatic inline fs_reg 282b8e80941Smrghalf(const fs_reg ®, unsigned idx) 283b8e80941Smrg{ 284b8e80941Smrg assert(idx < 2); 285b8e80941Smrg return horiz_offset(reg, 8 * idx); 286b8e80941Smrg} 287b8e80941Smrg 288b8e80941Smrg/** 289b8e80941Smrg * Reinterpret each channel of register \p reg as a vector of values of the 290b8e80941Smrg * given smaller type and take the i-th subcomponent from each. 291b8e80941Smrg */ 292b8e80941Smrgstatic inline fs_reg 293b8e80941Smrgsubscript(fs_reg reg, brw_reg_type type, unsigned i) 294b8e80941Smrg{ 295b8e80941Smrg assert((i + 1) * type_sz(type) <= type_sz(reg.type)); 296b8e80941Smrg 297b8e80941Smrg if (reg.file == ARF || reg.file == FIXED_GRF) { 298b8e80941Smrg /* The stride is encoded inconsistently for fixed GRF and ARF registers 299b8e80941Smrg * as the log2 of the actual vertical and horizontal strides. 300b8e80941Smrg */ 301b8e80941Smrg const int delta = _mesa_logbase2(type_sz(reg.type)) - 302b8e80941Smrg _mesa_logbase2(type_sz(type)); 303b8e80941Smrg reg.hstride += (reg.hstride ? delta : 0); 304b8e80941Smrg reg.vstride += (reg.vstride ? delta : 0); 305b8e80941Smrg 306b8e80941Smrg } else if (reg.file == IMM) { 307b8e80941Smrg assert(reg.type == type); 308b8e80941Smrg 309b8e80941Smrg } else { 310b8e80941Smrg reg.stride *= type_sz(reg.type) / type_sz(type); 311b8e80941Smrg } 312b8e80941Smrg 313b8e80941Smrg return byte_offset(retype(reg, type), i * type_sz(type)); 314b8e80941Smrg} 315b8e80941Smrg 316b8e80941Smrgstatic inline fs_reg 317b8e80941Smrghoriz_stride(fs_reg reg, unsigned s) 318b8e80941Smrg{ 319b8e80941Smrg reg.stride *= s; 320b8e80941Smrg return reg; 321b8e80941Smrg} 322b8e80941Smrg 323b8e80941Smrgstatic const fs_reg reg_undef; 324b8e80941Smrg 325b8e80941Smrgclass fs_inst : public backend_instruction { 326b8e80941Smrg fs_inst &operator=(const fs_inst &); 327b8e80941Smrg 328b8e80941Smrg void init(enum opcode opcode, uint8_t exec_width, const fs_reg &dst, 329b8e80941Smrg const fs_reg *src, unsigned sources); 330b8e80941Smrg 331b8e80941Smrgpublic: 332b8e80941Smrg DECLARE_RALLOC_CXX_OPERATORS(fs_inst) 333b8e80941Smrg 334b8e80941Smrg fs_inst(); 335b8e80941Smrg fs_inst(enum opcode opcode, uint8_t exec_size); 336b8e80941Smrg fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst); 337b8e80941Smrg fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, 338b8e80941Smrg const fs_reg &src0); 339b8e80941Smrg fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, 340b8e80941Smrg const fs_reg &src0, const fs_reg &src1); 341b8e80941Smrg fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, 342b8e80941Smrg const fs_reg &src0, const fs_reg &src1, const fs_reg &src2); 343b8e80941Smrg fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, 344b8e80941Smrg const fs_reg src[], unsigned sources); 345b8e80941Smrg fs_inst(const fs_inst &that); 346b8e80941Smrg ~fs_inst(); 347b8e80941Smrg 348b8e80941Smrg void resize_sources(uint8_t num_sources); 349b8e80941Smrg 350b8e80941Smrg bool is_send_from_grf() const; 351b8e80941Smrg bool is_partial_write() const; 352b8e80941Smrg bool is_copy_payload(const brw::simple_allocator &grf_alloc) const; 353b8e80941Smrg unsigned components_read(unsigned i) const; 354b8e80941Smrg unsigned size_read(int arg) const; 355b8e80941Smrg bool can_do_source_mods(const struct gen_device_info *devinfo) const; 356b8e80941Smrg bool can_do_cmod(); 357b8e80941Smrg bool can_change_types() const; 358b8e80941Smrg bool has_source_and_destination_hazard() const; 359b8e80941Smrg 360b8e80941Smrg /** 361b8e80941Smrg * Return whether \p arg is a control source of a virtual instruction which 362b8e80941Smrg * shouldn't contribute to the execution type and usual regioning 363b8e80941Smrg * restriction calculations of arithmetic instructions. 364b8e80941Smrg */ 365b8e80941Smrg bool is_control_source(unsigned arg) const; 366b8e80941Smrg 367b8e80941Smrg /** 368b8e80941Smrg * Return the subset of flag registers read by the instruction as a bitset 369b8e80941Smrg * with byte granularity. 370b8e80941Smrg */ 371b8e80941Smrg unsigned flags_read(const gen_device_info *devinfo) const; 372b8e80941Smrg 373b8e80941Smrg /** 374b8e80941Smrg * Return the subset of flag registers updated by the instruction (either 375b8e80941Smrg * partially or fully) as a bitset with byte granularity. 376b8e80941Smrg */ 377b8e80941Smrg unsigned flags_written() const; 378b8e80941Smrg 379b8e80941Smrg fs_reg dst; 380b8e80941Smrg fs_reg *src; 381b8e80941Smrg 382b8e80941Smrg uint8_t sources; /**< Number of fs_reg sources. */ 383b8e80941Smrg 384b8e80941Smrg bool last_rt:1; 385b8e80941Smrg bool pi_noperspective:1; /**< Pixel interpolator noperspective flag */ 386b8e80941Smrg}; 387b8e80941Smrg 388b8e80941Smrg/** 389b8e80941Smrg * Make the execution of \p inst dependent on the evaluation of a possibly 390b8e80941Smrg * inverted predicate. 391b8e80941Smrg */ 392b8e80941Smrgstatic inline fs_inst * 393b8e80941Smrgset_predicate_inv(enum brw_predicate pred, bool inverse, 394b8e80941Smrg fs_inst *inst) 395b8e80941Smrg{ 396b8e80941Smrg inst->predicate = pred; 397b8e80941Smrg inst->predicate_inverse = inverse; 398b8e80941Smrg return inst; 399b8e80941Smrg} 400b8e80941Smrg 401b8e80941Smrg/** 402b8e80941Smrg * Make the execution of \p inst dependent on the evaluation of a predicate. 403b8e80941Smrg */ 404b8e80941Smrgstatic inline fs_inst * 405b8e80941Smrgset_predicate(enum brw_predicate pred, fs_inst *inst) 406b8e80941Smrg{ 407b8e80941Smrg return set_predicate_inv(pred, false, inst); 408b8e80941Smrg} 409b8e80941Smrg 410b8e80941Smrg/** 411b8e80941Smrg * Write the result of evaluating the condition given by \p mod to a flag 412b8e80941Smrg * register. 413b8e80941Smrg */ 414b8e80941Smrgstatic inline fs_inst * 415b8e80941Smrgset_condmod(enum brw_conditional_mod mod, fs_inst *inst) 416b8e80941Smrg{ 417b8e80941Smrg inst->conditional_mod = mod; 418b8e80941Smrg return inst; 419b8e80941Smrg} 420b8e80941Smrg 421b8e80941Smrg/** 422b8e80941Smrg * Clamp the result of \p inst to the saturation range of its destination 423b8e80941Smrg * datatype. 424b8e80941Smrg */ 425b8e80941Smrgstatic inline fs_inst * 426b8e80941Smrgset_saturate(bool saturate, fs_inst *inst) 427b8e80941Smrg{ 428b8e80941Smrg inst->saturate = saturate; 429b8e80941Smrg return inst; 430b8e80941Smrg} 431b8e80941Smrg 432b8e80941Smrg/** 433b8e80941Smrg * Return the number of dataflow registers written by the instruction (either 434b8e80941Smrg * fully or partially) counted from 'floor(reg_offset(inst->dst) / 435b8e80941Smrg * register_size)'. The somewhat arbitrary register size unit is 4B for the 436b8e80941Smrg * UNIFORM and IMM files and 32B for all other files. 437b8e80941Smrg */ 438b8e80941Smrginline unsigned 439b8e80941Smrgregs_written(const fs_inst *inst) 440b8e80941Smrg{ 441b8e80941Smrg assert(inst->dst.file != UNIFORM && inst->dst.file != IMM); 442b8e80941Smrg return DIV_ROUND_UP(reg_offset(inst->dst) % REG_SIZE + 443b8e80941Smrg inst->size_written - 444b8e80941Smrg MIN2(inst->size_written, reg_padding(inst->dst)), 445b8e80941Smrg REG_SIZE); 446b8e80941Smrg} 447b8e80941Smrg 448b8e80941Smrg/** 449b8e80941Smrg * Return the number of dataflow registers read by the instruction (either 450b8e80941Smrg * fully or partially) counted from 'floor(reg_offset(inst->src[i]) / 451b8e80941Smrg * register_size)'. The somewhat arbitrary register size unit is 4B for the 452b8e80941Smrg * UNIFORM and IMM files and 32B for all other files. 453b8e80941Smrg */ 454b8e80941Smrginline unsigned 455b8e80941Smrgregs_read(const fs_inst *inst, unsigned i) 456b8e80941Smrg{ 457b8e80941Smrg const unsigned reg_size = 458b8e80941Smrg inst->src[i].file == UNIFORM || inst->src[i].file == IMM ? 4 : REG_SIZE; 459b8e80941Smrg return DIV_ROUND_UP(reg_offset(inst->src[i]) % reg_size + 460b8e80941Smrg inst->size_read(i) - 461b8e80941Smrg MIN2(inst->size_read(i), reg_padding(inst->src[i])), 462b8e80941Smrg reg_size); 463b8e80941Smrg} 464b8e80941Smrg 465b8e80941Smrgstatic inline enum brw_reg_type 466b8e80941Smrgget_exec_type(const fs_inst *inst) 467b8e80941Smrg{ 468b8e80941Smrg brw_reg_type exec_type = BRW_REGISTER_TYPE_B; 469b8e80941Smrg 470b8e80941Smrg for (int i = 0; i < inst->sources; i++) { 471b8e80941Smrg if (inst->src[i].file != BAD_FILE && 472b8e80941Smrg !inst->is_control_source(i)) { 473b8e80941Smrg const brw_reg_type t = get_exec_type(inst->src[i].type); 474b8e80941Smrg if (type_sz(t) > type_sz(exec_type)) 475b8e80941Smrg exec_type = t; 476b8e80941Smrg else if (type_sz(t) == type_sz(exec_type) && 477b8e80941Smrg brw_reg_type_is_floating_point(t)) 478b8e80941Smrg exec_type = t; 479b8e80941Smrg } 480b8e80941Smrg } 481b8e80941Smrg 482b8e80941Smrg if (exec_type == BRW_REGISTER_TYPE_B) 483b8e80941Smrg exec_type = inst->dst.type; 484b8e80941Smrg 485b8e80941Smrg assert(exec_type != BRW_REGISTER_TYPE_B); 486b8e80941Smrg 487b8e80941Smrg /* Promotion of the execution type to 32-bit for conversions from or to 488b8e80941Smrg * half-float seems to be consistent with the following text from the 489b8e80941Smrg * Cherryview PRM Vol. 7, "Execution Data Type": 490b8e80941Smrg * 491b8e80941Smrg * "When single precision and half precision floats are mixed between 492b8e80941Smrg * source operands or between source and destination operand [..] single 493b8e80941Smrg * precision float is the execution datatype." 494b8e80941Smrg * 495b8e80941Smrg * and from "Register Region Restrictions": 496b8e80941Smrg * 497b8e80941Smrg * "Conversion between Integer and HF (Half Float) must be DWord aligned 498b8e80941Smrg * and strided by a DWord on the destination." 499b8e80941Smrg */ 500b8e80941Smrg if (type_sz(exec_type) == 2 && 501b8e80941Smrg inst->dst.type != exec_type) { 502b8e80941Smrg if (exec_type == BRW_REGISTER_TYPE_HF) 503b8e80941Smrg exec_type = BRW_REGISTER_TYPE_F; 504b8e80941Smrg else if (inst->dst.type == BRW_REGISTER_TYPE_HF) 505b8e80941Smrg exec_type = BRW_REGISTER_TYPE_D; 506b8e80941Smrg } 507b8e80941Smrg 508b8e80941Smrg return exec_type; 509b8e80941Smrg} 510b8e80941Smrg 511b8e80941Smrgstatic inline unsigned 512b8e80941Smrgget_exec_type_size(const fs_inst *inst) 513b8e80941Smrg{ 514b8e80941Smrg return type_sz(get_exec_type(inst)); 515b8e80941Smrg} 516b8e80941Smrg 517b8e80941Smrg/** 518b8e80941Smrg * Return whether the instruction isn't an ALU instruction and cannot be 519b8e80941Smrg * assumed to complete in-order. 520b8e80941Smrg */ 521b8e80941Smrgstatic inline bool 522b8e80941Smrgis_unordered(const fs_inst *inst) 523b8e80941Smrg{ 524b8e80941Smrg return inst->mlen || inst->is_send_from_grf() || inst->is_math(); 525b8e80941Smrg} 526b8e80941Smrg 527b8e80941Smrg/** 528b8e80941Smrg * Return whether the following regioning restriction applies to the specified 529b8e80941Smrg * instruction. From the Cherryview PRM Vol 7. "Register Region 530b8e80941Smrg * Restrictions": 531b8e80941Smrg * 532b8e80941Smrg * "When source or destination datatype is 64b or operation is integer DWord 533b8e80941Smrg * multiply, regioning in Align1 must follow these rules: 534b8e80941Smrg * 535b8e80941Smrg * 1. Source and Destination horizontal stride must be aligned to the same qword. 536b8e80941Smrg * 2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride. 537b8e80941Smrg * 3. Source and Destination offset must be the same, except the case of 538b8e80941Smrg * scalar source." 539b8e80941Smrg */ 540b8e80941Smrgstatic inline bool 541b8e80941Smrghas_dst_aligned_region_restriction(const gen_device_info *devinfo, 542b8e80941Smrg const fs_inst *inst) 543b8e80941Smrg{ 544b8e80941Smrg const brw_reg_type exec_type = get_exec_type(inst); 545b8e80941Smrg /* Even though the hardware spec claims that "integer DWord multiply" 546b8e80941Smrg * operations are restricted, empirical evidence and the behavior of the 547b8e80941Smrg * simulator suggest that only 32x32-bit integer multiplication is 548b8e80941Smrg * restricted. 549b8e80941Smrg */ 550b8e80941Smrg const bool is_dword_multiply = !brw_reg_type_is_floating_point(exec_type) && 551b8e80941Smrg ((inst->opcode == BRW_OPCODE_MUL && 552b8e80941Smrg MIN2(type_sz(inst->src[0].type), type_sz(inst->src[1].type)) >= 4) || 553b8e80941Smrg (inst->opcode == BRW_OPCODE_MAD && 554b8e80941Smrg MIN2(type_sz(inst->src[1].type), type_sz(inst->src[2].type)) >= 4)); 555b8e80941Smrg 556b8e80941Smrg if (type_sz(inst->dst.type) > 4 || type_sz(exec_type) > 4 || 557b8e80941Smrg (type_sz(exec_type) == 4 && is_dword_multiply)) 558b8e80941Smrg return devinfo->is_cherryview || gen_device_info_is_9lp(devinfo); 559b8e80941Smrg else 560b8e80941Smrg return false; 561b8e80941Smrg} 562b8e80941Smrg 563b8e80941Smrg#endif 564