1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2012 Intel Corporation 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21b8e80941Smrg * IN THE SOFTWARE. 22b8e80941Smrg */ 23b8e80941Smrg 24b8e80941Smrg/** @file brw_eu_compact.c 25b8e80941Smrg * 26b8e80941Smrg * Instruction compaction is a feature of G45 and newer hardware that allows 27b8e80941Smrg * for a smaller instruction encoding. 28b8e80941Smrg * 29b8e80941Smrg * The instruction cache is on the order of 32KB, and many programs generate 30b8e80941Smrg * far more instructions than that. The instruction cache is built to barely 31b8e80941Smrg * keep up with instruction dispatch ability in cache hit cases -- L1 32b8e80941Smrg * instruction cache misses that still hit in the next level could limit 33b8e80941Smrg * throughput by around 50%. 34b8e80941Smrg * 35b8e80941Smrg * The idea of instruction compaction is that most instructions use a tiny 36b8e80941Smrg * subset of the GPU functionality, so we can encode what would be a 16 byte 37b8e80941Smrg * instruction in 8 bytes using some lookup tables for various fields. 38b8e80941Smrg * 39b8e80941Smrg * 40b8e80941Smrg * Instruction compaction capabilities vary subtly by generation. 41b8e80941Smrg * 42b8e80941Smrg * G45's support for instruction compaction is very limited. Jump counts on 43b8e80941Smrg * this generation are in units of 16-byte uncompacted instructions. As such, 44b8e80941Smrg * all jump targets must be 16-byte aligned. Also, all instructions must be 45b8e80941Smrg * naturally aligned, i.e. uncompacted instructions must be 16-byte aligned. 46b8e80941Smrg * A G45-only instruction, NENOP, must be used to provide padding to align 47b8e80941Smrg * uncompacted instructions. 48b8e80941Smrg * 49b8e80941Smrg * Gen5 removes these restrictions and changes jump counts to be in units of 50b8e80941Smrg * 8-byte compacted instructions, allowing jump targets to be only 8-byte 51b8e80941Smrg * aligned. Uncompacted instructions can also be placed on 8-byte boundaries. 52b8e80941Smrg * 53b8e80941Smrg * Gen6 adds the ability to compact instructions with a limited range of 54b8e80941Smrg * immediate values. Compactable immediates have 12 unrestricted bits, and a 55b8e80941Smrg * 13th bit that's replicated through the high 20 bits, to create the 32-bit 56b8e80941Smrg * value of DW3 in the uncompacted instruction word. 57b8e80941Smrg * 58b8e80941Smrg * On Gen7 we can compact some control flow instructions with a small positive 59b8e80941Smrg * immediate in the low bits of DW3, like ENDIF with the JIP field. Other 60b8e80941Smrg * control flow instructions with UIP cannot be compacted, because of the 61b8e80941Smrg * replicated 13th bit. No control flow instructions can be compacted on Gen6 62b8e80941Smrg * since the jump count field is not in DW3. 63b8e80941Smrg * 64b8e80941Smrg * break JIP/UIP 65b8e80941Smrg * cont JIP/UIP 66b8e80941Smrg * halt JIP/UIP 67b8e80941Smrg * if JIP/UIP 68b8e80941Smrg * else JIP (plus UIP on BDW+) 69b8e80941Smrg * endif JIP 70b8e80941Smrg * while JIP (must be negative) 71b8e80941Smrg * 72b8e80941Smrg * Gen 8 adds support for compacting 3-src instructions. 73b8e80941Smrg */ 74b8e80941Smrg 75b8e80941Smrg#include "brw_eu.h" 76b8e80941Smrg#include "brw_shader.h" 77b8e80941Smrg#include "brw_disasm_info.h" 78b8e80941Smrg#include "dev/gen_debug.h" 79b8e80941Smrg 80b8e80941Smrgstatic const uint32_t g45_control_index_table[32] = { 81b8e80941Smrg 0b00000000000000000, 82b8e80941Smrg 0b01000000000000000, 83b8e80941Smrg 0b00110000000000000, 84b8e80941Smrg 0b00000000000000010, 85b8e80941Smrg 0b00100000000000000, 86b8e80941Smrg 0b00010000000000000, 87b8e80941Smrg 0b01000000000100000, 88b8e80941Smrg 0b01000000100000000, 89b8e80941Smrg 0b01010000000100000, 90b8e80941Smrg 0b00000000100000010, 91b8e80941Smrg 0b11000000000000000, 92b8e80941Smrg 0b00001000100000010, 93b8e80941Smrg 0b01001000100000000, 94b8e80941Smrg 0b00000000100000000, 95b8e80941Smrg 0b11000000000100000, 96b8e80941Smrg 0b00001000100000000, 97b8e80941Smrg 0b10110000000000000, 98b8e80941Smrg 0b11010000000100000, 99b8e80941Smrg 0b00110000100000000, 100b8e80941Smrg 0b00100000100000000, 101b8e80941Smrg 0b01000000000001000, 102b8e80941Smrg 0b01000000000000100, 103b8e80941Smrg 0b00111100000000000, 104b8e80941Smrg 0b00101011000000000, 105b8e80941Smrg 0b00110000000010000, 106b8e80941Smrg 0b00010000100000000, 107b8e80941Smrg 0b01000000000100100, 108b8e80941Smrg 0b01000000000101000, 109b8e80941Smrg 0b00110000000000110, 110b8e80941Smrg 0b00000000000001010, 111b8e80941Smrg 0b01010000000101000, 112b8e80941Smrg 0b01010000000100100, 113b8e80941Smrg}; 114b8e80941Smrg 115b8e80941Smrgstatic const uint32_t g45_datatype_table[32] = { 116b8e80941Smrg 0b001000000000100001, 117b8e80941Smrg 0b001011010110101101, 118b8e80941Smrg 0b001000001000110001, 119b8e80941Smrg 0b001111011110111101, 120b8e80941Smrg 0b001011010110101100, 121b8e80941Smrg 0b001000000110101101, 122b8e80941Smrg 0b001000000000100000, 123b8e80941Smrg 0b010100010110110001, 124b8e80941Smrg 0b001100011000101101, 125b8e80941Smrg 0b001000000000100010, 126b8e80941Smrg 0b001000001000110110, 127b8e80941Smrg 0b010000001000110001, 128b8e80941Smrg 0b001000001000110010, 129b8e80941Smrg 0b011000001000110010, 130b8e80941Smrg 0b001111011110111100, 131b8e80941Smrg 0b001000000100101000, 132b8e80941Smrg 0b010100011000110001, 133b8e80941Smrg 0b001010010100101001, 134b8e80941Smrg 0b001000001000101001, 135b8e80941Smrg 0b010000001000110110, 136b8e80941Smrg 0b101000001000110001, 137b8e80941Smrg 0b001011011000101101, 138b8e80941Smrg 0b001000000100001001, 139b8e80941Smrg 0b001011011000101100, 140b8e80941Smrg 0b110100011000110001, 141b8e80941Smrg 0b001000001110111101, 142b8e80941Smrg 0b110000001000110001, 143b8e80941Smrg 0b011000000100101010, 144b8e80941Smrg 0b101000001000101001, 145b8e80941Smrg 0b001011010110001100, 146b8e80941Smrg 0b001000000110100001, 147b8e80941Smrg 0b001010010100001000, 148b8e80941Smrg}; 149b8e80941Smrg 150b8e80941Smrgstatic const uint16_t g45_subreg_table[32] = { 151b8e80941Smrg 0b000000000000000, 152b8e80941Smrg 0b000000010000000, 153b8e80941Smrg 0b000001000000000, 154b8e80941Smrg 0b000100000000000, 155b8e80941Smrg 0b000000000100000, 156b8e80941Smrg 0b100000000000000, 157b8e80941Smrg 0b000000000010000, 158b8e80941Smrg 0b001100000000000, 159b8e80941Smrg 0b001010000000000, 160b8e80941Smrg 0b000000100000000, 161b8e80941Smrg 0b001000000000000, 162b8e80941Smrg 0b000000000001000, 163b8e80941Smrg 0b000000001000000, 164b8e80941Smrg 0b000000000000001, 165b8e80941Smrg 0b000010000000000, 166b8e80941Smrg 0b000000010100000, 167b8e80941Smrg 0b000000000000111, 168b8e80941Smrg 0b000001000100000, 169b8e80941Smrg 0b011000000000000, 170b8e80941Smrg 0b000000110000000, 171b8e80941Smrg 0b000000000000010, 172b8e80941Smrg 0b000000000000100, 173b8e80941Smrg 0b000000001100000, 174b8e80941Smrg 0b000100000000010, 175b8e80941Smrg 0b001110011000110, 176b8e80941Smrg 0b001110100001000, 177b8e80941Smrg 0b000110011000110, 178b8e80941Smrg 0b000001000011000, 179b8e80941Smrg 0b000110010000100, 180b8e80941Smrg 0b001100000000110, 181b8e80941Smrg 0b000000010000110, 182b8e80941Smrg 0b000001000110000, 183b8e80941Smrg}; 184b8e80941Smrg 185b8e80941Smrgstatic const uint16_t g45_src_index_table[32] = { 186b8e80941Smrg 0b000000000000, 187b8e80941Smrg 0b010001101000, 188b8e80941Smrg 0b010110001000, 189b8e80941Smrg 0b011010010000, 190b8e80941Smrg 0b001101001000, 191b8e80941Smrg 0b010110001010, 192b8e80941Smrg 0b010101110000, 193b8e80941Smrg 0b011001111000, 194b8e80941Smrg 0b001000101000, 195b8e80941Smrg 0b000000101000, 196b8e80941Smrg 0b010001010000, 197b8e80941Smrg 0b111101101100, 198b8e80941Smrg 0b010110001100, 199b8e80941Smrg 0b010001101100, 200b8e80941Smrg 0b011010010100, 201b8e80941Smrg 0b010001001100, 202b8e80941Smrg 0b001100101000, 203b8e80941Smrg 0b000000000010, 204b8e80941Smrg 0b111101001100, 205b8e80941Smrg 0b011001101000, 206b8e80941Smrg 0b010101001000, 207b8e80941Smrg 0b000000000100, 208b8e80941Smrg 0b000000101100, 209b8e80941Smrg 0b010001101010, 210b8e80941Smrg 0b000000111000, 211b8e80941Smrg 0b010101011000, 212b8e80941Smrg 0b000100100000, 213b8e80941Smrg 0b010110000000, 214b8e80941Smrg 0b010000000100, 215b8e80941Smrg 0b010000111000, 216b8e80941Smrg 0b000101100000, 217b8e80941Smrg 0b111101110100, 218b8e80941Smrg}; 219b8e80941Smrg 220b8e80941Smrgstatic const uint32_t gen6_control_index_table[32] = { 221b8e80941Smrg 0b00000000000000000, 222b8e80941Smrg 0b01000000000000000, 223b8e80941Smrg 0b00110000000000000, 224b8e80941Smrg 0b00000000100000000, 225b8e80941Smrg 0b00010000000000000, 226b8e80941Smrg 0b00001000100000000, 227b8e80941Smrg 0b00000000100000010, 228b8e80941Smrg 0b00000000000000010, 229b8e80941Smrg 0b01000000100000000, 230b8e80941Smrg 0b01010000000000000, 231b8e80941Smrg 0b10110000000000000, 232b8e80941Smrg 0b00100000000000000, 233b8e80941Smrg 0b11010000000000000, 234b8e80941Smrg 0b11000000000000000, 235b8e80941Smrg 0b01001000100000000, 236b8e80941Smrg 0b01000000000001000, 237b8e80941Smrg 0b01000000000000100, 238b8e80941Smrg 0b00000000000001000, 239b8e80941Smrg 0b00000000000000100, 240b8e80941Smrg 0b00111000100000000, 241b8e80941Smrg 0b00001000100000010, 242b8e80941Smrg 0b00110000100000000, 243b8e80941Smrg 0b00110000000000001, 244b8e80941Smrg 0b00100000000000001, 245b8e80941Smrg 0b00110000000000010, 246b8e80941Smrg 0b00110000000000101, 247b8e80941Smrg 0b00110000000001001, 248b8e80941Smrg 0b00110000000010000, 249b8e80941Smrg 0b00110000000000011, 250b8e80941Smrg 0b00110000000000100, 251b8e80941Smrg 0b00110000100001000, 252b8e80941Smrg 0b00100000000001001, 253b8e80941Smrg}; 254b8e80941Smrg 255b8e80941Smrgstatic const uint32_t gen6_datatype_table[32] = { 256b8e80941Smrg 0b001001110000000000, 257b8e80941Smrg 0b001000110000100000, 258b8e80941Smrg 0b001001110000000001, 259b8e80941Smrg 0b001000000001100000, 260b8e80941Smrg 0b001010110100101001, 261b8e80941Smrg 0b001000000110101101, 262b8e80941Smrg 0b001100011000101100, 263b8e80941Smrg 0b001011110110101101, 264b8e80941Smrg 0b001000000111101100, 265b8e80941Smrg 0b001000000001100001, 266b8e80941Smrg 0b001000110010100101, 267b8e80941Smrg 0b001000000001000001, 268b8e80941Smrg 0b001000001000110001, 269b8e80941Smrg 0b001000001000101001, 270b8e80941Smrg 0b001000000000100000, 271b8e80941Smrg 0b001000001000110010, 272b8e80941Smrg 0b001010010100101001, 273b8e80941Smrg 0b001011010010100101, 274b8e80941Smrg 0b001000000110100101, 275b8e80941Smrg 0b001100011000101001, 276b8e80941Smrg 0b001011011000101100, 277b8e80941Smrg 0b001011010110100101, 278b8e80941Smrg 0b001011110110100101, 279b8e80941Smrg 0b001111011110111101, 280b8e80941Smrg 0b001111011110111100, 281b8e80941Smrg 0b001111011110111101, 282b8e80941Smrg 0b001111011110011101, 283b8e80941Smrg 0b001111011110111110, 284b8e80941Smrg 0b001000000000100001, 285b8e80941Smrg 0b001000000000100010, 286b8e80941Smrg 0b001001111111011101, 287b8e80941Smrg 0b001000001110111110, 288b8e80941Smrg}; 289b8e80941Smrg 290b8e80941Smrgstatic const uint16_t gen6_subreg_table[32] = { 291b8e80941Smrg 0b000000000000000, 292b8e80941Smrg 0b000000000000100, 293b8e80941Smrg 0b000000110000000, 294b8e80941Smrg 0b111000000000000, 295b8e80941Smrg 0b011110000001000, 296b8e80941Smrg 0b000010000000000, 297b8e80941Smrg 0b000000000010000, 298b8e80941Smrg 0b000110000001100, 299b8e80941Smrg 0b001000000000000, 300b8e80941Smrg 0b000001000000000, 301b8e80941Smrg 0b000001010010100, 302b8e80941Smrg 0b000000001010110, 303b8e80941Smrg 0b010000000000000, 304b8e80941Smrg 0b110000000000000, 305b8e80941Smrg 0b000100000000000, 306b8e80941Smrg 0b000000010000000, 307b8e80941Smrg 0b000000000001000, 308b8e80941Smrg 0b100000000000000, 309b8e80941Smrg 0b000001010000000, 310b8e80941Smrg 0b001010000000000, 311b8e80941Smrg 0b001100000000000, 312b8e80941Smrg 0b000000001010100, 313b8e80941Smrg 0b101101010010100, 314b8e80941Smrg 0b010100000000000, 315b8e80941Smrg 0b000000010001111, 316b8e80941Smrg 0b011000000000000, 317b8e80941Smrg 0b111110000000000, 318b8e80941Smrg 0b101000000000000, 319b8e80941Smrg 0b000000000001111, 320b8e80941Smrg 0b000100010001111, 321b8e80941Smrg 0b001000010001111, 322b8e80941Smrg 0b000110000000000, 323b8e80941Smrg}; 324b8e80941Smrg 325b8e80941Smrgstatic const uint16_t gen6_src_index_table[32] = { 326b8e80941Smrg 0b000000000000, 327b8e80941Smrg 0b010110001000, 328b8e80941Smrg 0b010001101000, 329b8e80941Smrg 0b001000101000, 330b8e80941Smrg 0b011010010000, 331b8e80941Smrg 0b000100100000, 332b8e80941Smrg 0b010001101100, 333b8e80941Smrg 0b010101110000, 334b8e80941Smrg 0b011001111000, 335b8e80941Smrg 0b001100101000, 336b8e80941Smrg 0b010110001100, 337b8e80941Smrg 0b001000100000, 338b8e80941Smrg 0b010110001010, 339b8e80941Smrg 0b000000000010, 340b8e80941Smrg 0b010101010000, 341b8e80941Smrg 0b010101101000, 342b8e80941Smrg 0b111101001100, 343b8e80941Smrg 0b111100101100, 344b8e80941Smrg 0b011001110000, 345b8e80941Smrg 0b010110001001, 346b8e80941Smrg 0b010101011000, 347b8e80941Smrg 0b001101001000, 348b8e80941Smrg 0b010000101100, 349b8e80941Smrg 0b010000000000, 350b8e80941Smrg 0b001101110000, 351b8e80941Smrg 0b001100010000, 352b8e80941Smrg 0b001100000000, 353b8e80941Smrg 0b010001101010, 354b8e80941Smrg 0b001101111000, 355b8e80941Smrg 0b000001110000, 356b8e80941Smrg 0b001100100000, 357b8e80941Smrg 0b001101010000, 358b8e80941Smrg}; 359b8e80941Smrg 360b8e80941Smrgstatic const uint32_t gen7_control_index_table[32] = { 361b8e80941Smrg 0b0000000000000000010, 362b8e80941Smrg 0b0000100000000000000, 363b8e80941Smrg 0b0000100000000000001, 364b8e80941Smrg 0b0000100000000000010, 365b8e80941Smrg 0b0000100000000000011, 366b8e80941Smrg 0b0000100000000000100, 367b8e80941Smrg 0b0000100000000000101, 368b8e80941Smrg 0b0000100000000000111, 369b8e80941Smrg 0b0000100000000001000, 370b8e80941Smrg 0b0000100000000001001, 371b8e80941Smrg 0b0000100000000001101, 372b8e80941Smrg 0b0000110000000000000, 373b8e80941Smrg 0b0000110000000000001, 374b8e80941Smrg 0b0000110000000000010, 375b8e80941Smrg 0b0000110000000000011, 376b8e80941Smrg 0b0000110000000000100, 377b8e80941Smrg 0b0000110000000000101, 378b8e80941Smrg 0b0000110000000000111, 379b8e80941Smrg 0b0000110000000001001, 380b8e80941Smrg 0b0000110000000001101, 381b8e80941Smrg 0b0000110000000010000, 382b8e80941Smrg 0b0000110000100000000, 383b8e80941Smrg 0b0001000000000000000, 384b8e80941Smrg 0b0001000000000000010, 385b8e80941Smrg 0b0001000000000000100, 386b8e80941Smrg 0b0001000000100000000, 387b8e80941Smrg 0b0010110000000000000, 388b8e80941Smrg 0b0010110000000010000, 389b8e80941Smrg 0b0011000000000000000, 390b8e80941Smrg 0b0011000000100000000, 391b8e80941Smrg 0b0101000000000000000, 392b8e80941Smrg 0b0101000000100000000, 393b8e80941Smrg}; 394b8e80941Smrg 395b8e80941Smrgstatic const uint32_t gen7_datatype_table[32] = { 396b8e80941Smrg 0b001000000000000001, 397b8e80941Smrg 0b001000000000100000, 398b8e80941Smrg 0b001000000000100001, 399b8e80941Smrg 0b001000000001100001, 400b8e80941Smrg 0b001000000010111101, 401b8e80941Smrg 0b001000001011111101, 402b8e80941Smrg 0b001000001110100001, 403b8e80941Smrg 0b001000001110100101, 404b8e80941Smrg 0b001000001110111101, 405b8e80941Smrg 0b001000010000100001, 406b8e80941Smrg 0b001000110000100000, 407b8e80941Smrg 0b001000110000100001, 408b8e80941Smrg 0b001001010010100101, 409b8e80941Smrg 0b001001110010100100, 410b8e80941Smrg 0b001001110010100101, 411b8e80941Smrg 0b001111001110111101, 412b8e80941Smrg 0b001111011110011101, 413b8e80941Smrg 0b001111011110111100, 414b8e80941Smrg 0b001111011110111101, 415b8e80941Smrg 0b001111111110111100, 416b8e80941Smrg 0b000000001000001100, 417b8e80941Smrg 0b001000000000111101, 418b8e80941Smrg 0b001000000010100101, 419b8e80941Smrg 0b001000010000100000, 420b8e80941Smrg 0b001001010010100100, 421b8e80941Smrg 0b001001110010000100, 422b8e80941Smrg 0b001010010100001001, 423b8e80941Smrg 0b001101111110111101, 424b8e80941Smrg 0b001111111110111101, 425b8e80941Smrg 0b001011110110101100, 426b8e80941Smrg 0b001010010100101000, 427b8e80941Smrg 0b001010110100101000, 428b8e80941Smrg}; 429b8e80941Smrg 430b8e80941Smrgstatic const uint16_t gen7_subreg_table[32] = { 431b8e80941Smrg 0b000000000000000, 432b8e80941Smrg 0b000000000000001, 433b8e80941Smrg 0b000000000001000, 434b8e80941Smrg 0b000000000001111, 435b8e80941Smrg 0b000000000010000, 436b8e80941Smrg 0b000000010000000, 437b8e80941Smrg 0b000000100000000, 438b8e80941Smrg 0b000000110000000, 439b8e80941Smrg 0b000001000000000, 440b8e80941Smrg 0b000001000010000, 441b8e80941Smrg 0b000010100000000, 442b8e80941Smrg 0b001000000000000, 443b8e80941Smrg 0b001000000000001, 444b8e80941Smrg 0b001000010000001, 445b8e80941Smrg 0b001000010000010, 446b8e80941Smrg 0b001000010000011, 447b8e80941Smrg 0b001000010000100, 448b8e80941Smrg 0b001000010000111, 449b8e80941Smrg 0b001000010001000, 450b8e80941Smrg 0b001000010001110, 451b8e80941Smrg 0b001000010001111, 452b8e80941Smrg 0b001000110000000, 453b8e80941Smrg 0b001000111101000, 454b8e80941Smrg 0b010000000000000, 455b8e80941Smrg 0b010000110000000, 456b8e80941Smrg 0b011000000000000, 457b8e80941Smrg 0b011110010000111, 458b8e80941Smrg 0b100000000000000, 459b8e80941Smrg 0b101000000000000, 460b8e80941Smrg 0b110000000000000, 461b8e80941Smrg 0b111000000000000, 462b8e80941Smrg 0b111000000011100, 463b8e80941Smrg}; 464b8e80941Smrg 465b8e80941Smrgstatic const uint16_t gen7_src_index_table[32] = { 466b8e80941Smrg 0b000000000000, 467b8e80941Smrg 0b000000000010, 468b8e80941Smrg 0b000000010000, 469b8e80941Smrg 0b000000010010, 470b8e80941Smrg 0b000000011000, 471b8e80941Smrg 0b000000100000, 472b8e80941Smrg 0b000000101000, 473b8e80941Smrg 0b000001001000, 474b8e80941Smrg 0b000001010000, 475b8e80941Smrg 0b000001110000, 476b8e80941Smrg 0b000001111000, 477b8e80941Smrg 0b001100000000, 478b8e80941Smrg 0b001100000010, 479b8e80941Smrg 0b001100001000, 480b8e80941Smrg 0b001100010000, 481b8e80941Smrg 0b001100010010, 482b8e80941Smrg 0b001100100000, 483b8e80941Smrg 0b001100101000, 484b8e80941Smrg 0b001100111000, 485b8e80941Smrg 0b001101000000, 486b8e80941Smrg 0b001101000010, 487b8e80941Smrg 0b001101001000, 488b8e80941Smrg 0b001101010000, 489b8e80941Smrg 0b001101100000, 490b8e80941Smrg 0b001101101000, 491b8e80941Smrg 0b001101110000, 492b8e80941Smrg 0b001101110001, 493b8e80941Smrg 0b001101111000, 494b8e80941Smrg 0b010001101000, 495b8e80941Smrg 0b010001101001, 496b8e80941Smrg 0b010001101010, 497b8e80941Smrg 0b010110001000, 498b8e80941Smrg}; 499b8e80941Smrg 500b8e80941Smrgstatic const uint32_t gen8_control_index_table[32] = { 501b8e80941Smrg 0b0000000000000000010, 502b8e80941Smrg 0b0000100000000000000, 503b8e80941Smrg 0b0000100000000000001, 504b8e80941Smrg 0b0000100000000000010, 505b8e80941Smrg 0b0000100000000000011, 506b8e80941Smrg 0b0000100000000000100, 507b8e80941Smrg 0b0000100000000000101, 508b8e80941Smrg 0b0000100000000000111, 509b8e80941Smrg 0b0000100000000001000, 510b8e80941Smrg 0b0000100000000001001, 511b8e80941Smrg 0b0000100000000001101, 512b8e80941Smrg 0b0000110000000000000, 513b8e80941Smrg 0b0000110000000000001, 514b8e80941Smrg 0b0000110000000000010, 515b8e80941Smrg 0b0000110000000000011, 516b8e80941Smrg 0b0000110000000000100, 517b8e80941Smrg 0b0000110000000000101, 518b8e80941Smrg 0b0000110000000000111, 519b8e80941Smrg 0b0000110000000001001, 520b8e80941Smrg 0b0000110000000001101, 521b8e80941Smrg 0b0000110000000010000, 522b8e80941Smrg 0b0000110000100000000, 523b8e80941Smrg 0b0001000000000000000, 524b8e80941Smrg 0b0001000000000000010, 525b8e80941Smrg 0b0001000000000000100, 526b8e80941Smrg 0b0001000000100000000, 527b8e80941Smrg 0b0010110000000000000, 528b8e80941Smrg 0b0010110000000010000, 529b8e80941Smrg 0b0011000000000000000, 530b8e80941Smrg 0b0011000000100000000, 531b8e80941Smrg 0b0101000000000000000, 532b8e80941Smrg 0b0101000000100000000, 533b8e80941Smrg}; 534b8e80941Smrg 535b8e80941Smrgstatic const uint32_t gen8_datatype_table[32] = { 536b8e80941Smrg 0b001000000000000000001, 537b8e80941Smrg 0b001000000000001000000, 538b8e80941Smrg 0b001000000000001000001, 539b8e80941Smrg 0b001000000000011000001, 540b8e80941Smrg 0b001000000000101011101, 541b8e80941Smrg 0b001000000010111011101, 542b8e80941Smrg 0b001000000011101000001, 543b8e80941Smrg 0b001000000011101000101, 544b8e80941Smrg 0b001000000011101011101, 545b8e80941Smrg 0b001000001000001000001, 546b8e80941Smrg 0b001000011000001000000, 547b8e80941Smrg 0b001000011000001000001, 548b8e80941Smrg 0b001000101000101000101, 549b8e80941Smrg 0b001000111000101000100, 550b8e80941Smrg 0b001000111000101000101, 551b8e80941Smrg 0b001011100011101011101, 552b8e80941Smrg 0b001011101011100011101, 553b8e80941Smrg 0b001011101011101011100, 554b8e80941Smrg 0b001011101011101011101, 555b8e80941Smrg 0b001011111011101011100, 556b8e80941Smrg 0b000000000010000001100, 557b8e80941Smrg 0b001000000000001011101, 558b8e80941Smrg 0b001000000000101000101, 559b8e80941Smrg 0b001000001000001000000, 560b8e80941Smrg 0b001000101000101000100, 561b8e80941Smrg 0b001000111000100000100, 562b8e80941Smrg 0b001001001001000001001, 563b8e80941Smrg 0b001010111011101011101, 564b8e80941Smrg 0b001011111011101011101, 565b8e80941Smrg 0b001001111001101001100, 566b8e80941Smrg 0b001001001001001001000, 567b8e80941Smrg 0b001001011001001001000, 568b8e80941Smrg}; 569b8e80941Smrg 570b8e80941Smrgstatic const uint16_t gen8_subreg_table[32] = { 571b8e80941Smrg 0b000000000000000, 572b8e80941Smrg 0b000000000000001, 573b8e80941Smrg 0b000000000001000, 574b8e80941Smrg 0b000000000001111, 575b8e80941Smrg 0b000000000010000, 576b8e80941Smrg 0b000000010000000, 577b8e80941Smrg 0b000000100000000, 578b8e80941Smrg 0b000000110000000, 579b8e80941Smrg 0b000001000000000, 580b8e80941Smrg 0b000001000010000, 581b8e80941Smrg 0b000001010000000, 582b8e80941Smrg 0b001000000000000, 583b8e80941Smrg 0b001000000000001, 584b8e80941Smrg 0b001000010000001, 585b8e80941Smrg 0b001000010000010, 586b8e80941Smrg 0b001000010000011, 587b8e80941Smrg 0b001000010000100, 588b8e80941Smrg 0b001000010000111, 589b8e80941Smrg 0b001000010001000, 590b8e80941Smrg 0b001000010001110, 591b8e80941Smrg 0b001000010001111, 592b8e80941Smrg 0b001000110000000, 593b8e80941Smrg 0b001000111101000, 594b8e80941Smrg 0b010000000000000, 595b8e80941Smrg 0b010000110000000, 596b8e80941Smrg 0b011000000000000, 597b8e80941Smrg 0b011110010000111, 598b8e80941Smrg 0b100000000000000, 599b8e80941Smrg 0b101000000000000, 600b8e80941Smrg 0b110000000000000, 601b8e80941Smrg 0b111000000000000, 602b8e80941Smrg 0b111000000011100, 603b8e80941Smrg}; 604b8e80941Smrg 605b8e80941Smrgstatic const uint16_t gen8_src_index_table[32] = { 606b8e80941Smrg 0b000000000000, 607b8e80941Smrg 0b000000000010, 608b8e80941Smrg 0b000000010000, 609b8e80941Smrg 0b000000010010, 610b8e80941Smrg 0b000000011000, 611b8e80941Smrg 0b000000100000, 612b8e80941Smrg 0b000000101000, 613b8e80941Smrg 0b000001001000, 614b8e80941Smrg 0b000001010000, 615b8e80941Smrg 0b000001110000, 616b8e80941Smrg 0b000001111000, 617b8e80941Smrg 0b001100000000, 618b8e80941Smrg 0b001100000010, 619b8e80941Smrg 0b001100001000, 620b8e80941Smrg 0b001100010000, 621b8e80941Smrg 0b001100010010, 622b8e80941Smrg 0b001100100000, 623b8e80941Smrg 0b001100101000, 624b8e80941Smrg 0b001100111000, 625b8e80941Smrg 0b001101000000, 626b8e80941Smrg 0b001101000010, 627b8e80941Smrg 0b001101001000, 628b8e80941Smrg 0b001101010000, 629b8e80941Smrg 0b001101100000, 630b8e80941Smrg 0b001101101000, 631b8e80941Smrg 0b001101110000, 632b8e80941Smrg 0b001101110001, 633b8e80941Smrg 0b001101111000, 634b8e80941Smrg 0b010001101000, 635b8e80941Smrg 0b010001101001, 636b8e80941Smrg 0b010001101010, 637b8e80941Smrg 0b010110001000, 638b8e80941Smrg}; 639b8e80941Smrg 640b8e80941Smrgstatic const uint32_t gen11_datatype_table[32] = { 641b8e80941Smrg 0b001000000000000000001, 642b8e80941Smrg 0b001000000000001000000, 643b8e80941Smrg 0b001000000000001000001, 644b8e80941Smrg 0b001000000000011000001, 645b8e80941Smrg 0b001000000000101100101, 646b8e80941Smrg 0b001000000101111100101, 647b8e80941Smrg 0b001000000100101000001, 648b8e80941Smrg 0b001000000100101000101, 649b8e80941Smrg 0b001000000100101100101, 650b8e80941Smrg 0b001000001000001000001, 651b8e80941Smrg 0b001000011000001000000, 652b8e80941Smrg 0b001000011000001000001, 653b8e80941Smrg 0b001000101000101000101, 654b8e80941Smrg 0b001000111000101000100, 655b8e80941Smrg 0b001000111000101000101, 656b8e80941Smrg 0b001100100100101100101, 657b8e80941Smrg 0b001100101100100100101, 658b8e80941Smrg 0b001100101100101100100, 659b8e80941Smrg 0b001100101100101100101, 660b8e80941Smrg 0b001100111100101100100, 661b8e80941Smrg 0b000000000010000001100, 662b8e80941Smrg 0b001000000000001100101, 663b8e80941Smrg 0b001000000000101000101, 664b8e80941Smrg 0b001000001000001000000, 665b8e80941Smrg 0b001000101000101000100, 666b8e80941Smrg 0b001000111000100000100, 667b8e80941Smrg 0b001001001001000001001, 668b8e80941Smrg 0b001101111100101100101, 669b8e80941Smrg 0b001100111100101100101, 670b8e80941Smrg 0b001001111001101001100, 671b8e80941Smrg 0b001001001001001001000, 672b8e80941Smrg 0b001001011001001001000, 673b8e80941Smrg}; 674b8e80941Smrg 675b8e80941Smrg/* This is actually the control index table for Cherryview (26 bits), but the 676b8e80941Smrg * only difference from Broadwell (24 bits) is that it has two extra 0-bits at 677b8e80941Smrg * the start. 678b8e80941Smrg * 679b8e80941Smrg * The low 24 bits have the same mappings on both hardware. 680b8e80941Smrg */ 681b8e80941Smrgstatic const uint32_t gen8_3src_control_index_table[4] = { 682b8e80941Smrg 0b00100000000110000000000001, 683b8e80941Smrg 0b00000000000110000000000001, 684b8e80941Smrg 0b00000000001000000000000001, 685b8e80941Smrg 0b00000000001000000000100001, 686b8e80941Smrg}; 687b8e80941Smrg 688b8e80941Smrg/* This is actually the control index table for Cherryview (49 bits), but the 689b8e80941Smrg * only difference from Broadwell (46 bits) is that it has three extra 0-bits 690b8e80941Smrg * at the start. 691b8e80941Smrg * 692b8e80941Smrg * The low 44 bits have the same mappings on both hardware, and since the high 693b8e80941Smrg * three bits on Broadwell are zero, we can reuse Cherryview's table. 694b8e80941Smrg */ 695b8e80941Smrgstatic const uint64_t gen8_3src_source_index_table[4] = { 696b8e80941Smrg 0b0000001110010011100100111001000001111000000000000, 697b8e80941Smrg 0b0000001110010011100100111001000001111000000000010, 698b8e80941Smrg 0b0000001110010011100100111001000001111000000001000, 699b8e80941Smrg 0b0000001110010011100100111001000001111000000100000, 700b8e80941Smrg}; 701b8e80941Smrg 702b8e80941Smrgstatic const uint32_t *control_index_table; 703b8e80941Smrgstatic const uint32_t *datatype_table; 704b8e80941Smrgstatic const uint16_t *subreg_table; 705b8e80941Smrgstatic const uint16_t *src_index_table; 706b8e80941Smrg 707b8e80941Smrgstatic bool 708b8e80941Smrgset_control_index(const struct gen_device_info *devinfo, 709b8e80941Smrg brw_compact_inst *dst, const brw_inst *src) 710b8e80941Smrg{ 711b8e80941Smrg uint32_t uncompacted = devinfo->gen >= 8 /* 17b/G45; 19b/IVB+ */ 712b8e80941Smrg ? (brw_inst_bits(src, 33, 31) << 16) | /* 3b */ 713b8e80941Smrg (brw_inst_bits(src, 23, 12) << 4) | /* 12b */ 714b8e80941Smrg (brw_inst_bits(src, 10, 9) << 2) | /* 2b */ 715b8e80941Smrg (brw_inst_bits(src, 34, 34) << 1) | /* 1b */ 716b8e80941Smrg (brw_inst_bits(src, 8, 8)) /* 1b */ 717b8e80941Smrg : (brw_inst_bits(src, 31, 31) << 16) | /* 1b */ 718b8e80941Smrg (brw_inst_bits(src, 23, 8)); /* 16b */ 719b8e80941Smrg 720b8e80941Smrg /* On gen7, the flag register and subregister numbers are integrated into 721b8e80941Smrg * the control index. 722b8e80941Smrg */ 723b8e80941Smrg if (devinfo->gen == 7) 724b8e80941Smrg uncompacted |= brw_inst_bits(src, 90, 89) << 17; /* 2b */ 725b8e80941Smrg 726b8e80941Smrg for (int i = 0; i < 32; i++) { 727b8e80941Smrg if (control_index_table[i] == uncompacted) { 728b8e80941Smrg brw_compact_inst_set_control_index(devinfo, dst, i); 729b8e80941Smrg return true; 730b8e80941Smrg } 731b8e80941Smrg } 732b8e80941Smrg 733b8e80941Smrg return false; 734b8e80941Smrg} 735b8e80941Smrg 736b8e80941Smrgstatic bool 737b8e80941Smrgset_datatype_index(const struct gen_device_info *devinfo, brw_compact_inst *dst, 738b8e80941Smrg const brw_inst *src) 739b8e80941Smrg{ 740b8e80941Smrg uint32_t uncompacted = devinfo->gen >= 8 /* 18b/G45+; 21b/BDW+ */ 741b8e80941Smrg ? (brw_inst_bits(src, 63, 61) << 18) | /* 3b */ 742b8e80941Smrg (brw_inst_bits(src, 94, 89) << 12) | /* 6b */ 743b8e80941Smrg (brw_inst_bits(src, 46, 35)) /* 12b */ 744b8e80941Smrg : (brw_inst_bits(src, 63, 61) << 15) | /* 3b */ 745b8e80941Smrg (brw_inst_bits(src, 46, 32)); /* 15b */ 746b8e80941Smrg 747b8e80941Smrg for (int i = 0; i < 32; i++) { 748b8e80941Smrg if (datatype_table[i] == uncompacted) { 749b8e80941Smrg brw_compact_inst_set_datatype_index(devinfo, dst, i); 750b8e80941Smrg return true; 751b8e80941Smrg } 752b8e80941Smrg } 753b8e80941Smrg 754b8e80941Smrg return false; 755b8e80941Smrg} 756b8e80941Smrg 757b8e80941Smrgstatic bool 758b8e80941Smrgset_subreg_index(const struct gen_device_info *devinfo, brw_compact_inst *dst, 759b8e80941Smrg const brw_inst *src, bool is_immediate) 760b8e80941Smrg{ 761b8e80941Smrg uint16_t uncompacted = /* 15b */ 762b8e80941Smrg (brw_inst_bits(src, 52, 48) << 0) | /* 5b */ 763b8e80941Smrg (brw_inst_bits(src, 68, 64) << 5); /* 5b */ 764b8e80941Smrg 765b8e80941Smrg if (!is_immediate) 766b8e80941Smrg uncompacted |= brw_inst_bits(src, 100, 96) << 10; /* 5b */ 767b8e80941Smrg 768b8e80941Smrg for (int i = 0; i < 32; i++) { 769b8e80941Smrg if (subreg_table[i] == uncompacted) { 770b8e80941Smrg brw_compact_inst_set_subreg_index(devinfo, dst, i); 771b8e80941Smrg return true; 772b8e80941Smrg } 773b8e80941Smrg } 774b8e80941Smrg 775b8e80941Smrg return false; 776b8e80941Smrg} 777b8e80941Smrg 778b8e80941Smrgstatic bool 779b8e80941Smrgget_src_index(uint16_t uncompacted, 780b8e80941Smrg uint16_t *compacted) 781b8e80941Smrg{ 782b8e80941Smrg for (int i = 0; i < 32; i++) { 783b8e80941Smrg if (src_index_table[i] == uncompacted) { 784b8e80941Smrg *compacted = i; 785b8e80941Smrg return true; 786b8e80941Smrg } 787b8e80941Smrg } 788b8e80941Smrg 789b8e80941Smrg return false; 790b8e80941Smrg} 791b8e80941Smrg 792b8e80941Smrgstatic bool 793b8e80941Smrgset_src0_index(const struct gen_device_info *devinfo, 794b8e80941Smrg brw_compact_inst *dst, const brw_inst *src) 795b8e80941Smrg{ 796b8e80941Smrg uint16_t compacted; 797b8e80941Smrg uint16_t uncompacted = brw_inst_bits(src, 88, 77); /* 12b */ 798b8e80941Smrg 799b8e80941Smrg if (!get_src_index(uncompacted, &compacted)) 800b8e80941Smrg return false; 801b8e80941Smrg 802b8e80941Smrg brw_compact_inst_set_src0_index(devinfo, dst, compacted); 803b8e80941Smrg 804b8e80941Smrg return true; 805b8e80941Smrg} 806b8e80941Smrg 807b8e80941Smrgstatic bool 808b8e80941Smrgset_src1_index(const struct gen_device_info *devinfo, brw_compact_inst *dst, 809b8e80941Smrg const brw_inst *src, bool is_immediate) 810b8e80941Smrg{ 811b8e80941Smrg uint16_t compacted; 812b8e80941Smrg 813b8e80941Smrg if (is_immediate) { 814b8e80941Smrg compacted = (brw_inst_imm_ud(devinfo, src) >> 8) & 0x1f; 815b8e80941Smrg } else { 816b8e80941Smrg uint16_t uncompacted = brw_inst_bits(src, 120, 109); /* 12b */ 817b8e80941Smrg 818b8e80941Smrg if (!get_src_index(uncompacted, &compacted)) 819b8e80941Smrg return false; 820b8e80941Smrg } 821b8e80941Smrg 822b8e80941Smrg brw_compact_inst_set_src1_index(devinfo, dst, compacted); 823b8e80941Smrg 824b8e80941Smrg return true; 825b8e80941Smrg} 826b8e80941Smrg 827b8e80941Smrgstatic bool 828b8e80941Smrgset_3src_control_index(const struct gen_device_info *devinfo, 829b8e80941Smrg brw_compact_inst *dst, const brw_inst *src) 830b8e80941Smrg{ 831b8e80941Smrg assert(devinfo->gen >= 8); 832b8e80941Smrg 833b8e80941Smrg uint32_t uncompacted = /* 24b/BDW; 26b/CHV */ 834b8e80941Smrg (brw_inst_bits(src, 34, 32) << 21) | /* 3b */ 835b8e80941Smrg (brw_inst_bits(src, 28, 8)); /* 21b */ 836b8e80941Smrg 837b8e80941Smrg if (devinfo->gen >= 9 || devinfo->is_cherryview) 838b8e80941Smrg uncompacted |= brw_inst_bits(src, 36, 35) << 24; /* 2b */ 839b8e80941Smrg 840b8e80941Smrg for (unsigned i = 0; i < ARRAY_SIZE(gen8_3src_control_index_table); i++) { 841b8e80941Smrg if (gen8_3src_control_index_table[i] == uncompacted) { 842b8e80941Smrg brw_compact_inst_set_3src_control_index(devinfo, dst, i); 843b8e80941Smrg return true; 844b8e80941Smrg } 845b8e80941Smrg } 846b8e80941Smrg 847b8e80941Smrg return false; 848b8e80941Smrg} 849b8e80941Smrg 850b8e80941Smrgstatic bool 851b8e80941Smrgset_3src_source_index(const struct gen_device_info *devinfo, 852b8e80941Smrg brw_compact_inst *dst, const brw_inst *src) 853b8e80941Smrg{ 854b8e80941Smrg assert(devinfo->gen >= 8); 855b8e80941Smrg 856b8e80941Smrg uint64_t uncompacted = /* 46b/BDW; 49b/CHV */ 857b8e80941Smrg (brw_inst_bits(src, 83, 83) << 43) | /* 1b */ 858b8e80941Smrg (brw_inst_bits(src, 114, 107) << 35) | /* 8b */ 859b8e80941Smrg (brw_inst_bits(src, 93, 86) << 27) | /* 8b */ 860b8e80941Smrg (brw_inst_bits(src, 72, 65) << 19) | /* 8b */ 861b8e80941Smrg (brw_inst_bits(src, 55, 37)); /* 19b */ 862b8e80941Smrg 863b8e80941Smrg if (devinfo->gen >= 9 || devinfo->is_cherryview) { 864b8e80941Smrg uncompacted |= 865b8e80941Smrg (brw_inst_bits(src, 126, 125) << 47) | /* 2b */ 866b8e80941Smrg (brw_inst_bits(src, 105, 104) << 45) | /* 2b */ 867b8e80941Smrg (brw_inst_bits(src, 84, 84) << 44); /* 1b */ 868b8e80941Smrg } else { 869b8e80941Smrg uncompacted |= 870b8e80941Smrg (brw_inst_bits(src, 125, 125) << 45) | /* 1b */ 871b8e80941Smrg (brw_inst_bits(src, 104, 104) << 44); /* 1b */ 872b8e80941Smrg } 873b8e80941Smrg 874b8e80941Smrg for (unsigned i = 0; i < ARRAY_SIZE(gen8_3src_source_index_table); i++) { 875b8e80941Smrg if (gen8_3src_source_index_table[i] == uncompacted) { 876b8e80941Smrg brw_compact_inst_set_3src_source_index(devinfo, dst, i); 877b8e80941Smrg return true; 878b8e80941Smrg } 879b8e80941Smrg } 880b8e80941Smrg 881b8e80941Smrg return false; 882b8e80941Smrg} 883b8e80941Smrg 884b8e80941Smrgstatic bool 885b8e80941Smrghas_unmapped_bits(const struct gen_device_info *devinfo, const brw_inst *src) 886b8e80941Smrg{ 887b8e80941Smrg /* EOT can only be mapped on a send if the src1 is an immediate */ 888b8e80941Smrg if ((brw_inst_opcode(devinfo, src) == BRW_OPCODE_SENDC || 889b8e80941Smrg brw_inst_opcode(devinfo, src) == BRW_OPCODE_SEND) && 890b8e80941Smrg brw_inst_eot(devinfo, src)) 891b8e80941Smrg return true; 892b8e80941Smrg 893b8e80941Smrg /* Check for instruction bits that don't map to any of the fields of the 894b8e80941Smrg * compacted instruction. The instruction cannot be compacted if any of 895b8e80941Smrg * them are set. They overlap with: 896b8e80941Smrg * - NibCtrl (bit 47 on Gen7, bit 11 on Gen8) 897b8e80941Smrg * - Dst.AddrImm[9] (bit 47 on Gen8) 898b8e80941Smrg * - Src0.AddrImm[9] (bit 95 on Gen8) 899b8e80941Smrg * - Imm64[27:31] (bits 91-95 on Gen7, bit 95 on Gen8) 900b8e80941Smrg * - UIP[31] (bit 95 on Gen8) 901b8e80941Smrg */ 902b8e80941Smrg if (devinfo->gen >= 8) { 903b8e80941Smrg assert(!brw_inst_bits(src, 7, 7)); 904b8e80941Smrg return brw_inst_bits(src, 95, 95) || 905b8e80941Smrg brw_inst_bits(src, 47, 47) || 906b8e80941Smrg brw_inst_bits(src, 11, 11); 907b8e80941Smrg } else { 908b8e80941Smrg assert(!brw_inst_bits(src, 7, 7) && 909b8e80941Smrg !(devinfo->gen < 7 && brw_inst_bits(src, 90, 90))); 910b8e80941Smrg return brw_inst_bits(src, 95, 91) || 911b8e80941Smrg brw_inst_bits(src, 47, 47); 912b8e80941Smrg } 913b8e80941Smrg} 914b8e80941Smrg 915b8e80941Smrgstatic bool 916b8e80941Smrghas_3src_unmapped_bits(const struct gen_device_info *devinfo, 917b8e80941Smrg const brw_inst *src) 918b8e80941Smrg{ 919b8e80941Smrg /* Check for three-source instruction bits that don't map to any of the 920b8e80941Smrg * fields of the compacted instruction. All of them seem to be reserved 921b8e80941Smrg * bits currently. 922b8e80941Smrg */ 923b8e80941Smrg if (devinfo->gen >= 9 || devinfo->is_cherryview) { 924b8e80941Smrg assert(!brw_inst_bits(src, 127, 127) && 925b8e80941Smrg !brw_inst_bits(src, 7, 7)); 926b8e80941Smrg } else { 927b8e80941Smrg assert(devinfo->gen >= 8); 928b8e80941Smrg assert(!brw_inst_bits(src, 127, 126) && 929b8e80941Smrg !brw_inst_bits(src, 105, 105) && 930b8e80941Smrg !brw_inst_bits(src, 84, 84) && 931b8e80941Smrg !brw_inst_bits(src, 7, 7)); 932b8e80941Smrg 933b8e80941Smrg /* Src1Type and Src2Type, used for mixed-precision floating point */ 934b8e80941Smrg if (brw_inst_bits(src, 36, 35)) 935b8e80941Smrg return true; 936b8e80941Smrg } 937b8e80941Smrg 938b8e80941Smrg return false; 939b8e80941Smrg} 940b8e80941Smrg 941b8e80941Smrgstatic bool 942b8e80941Smrgbrw_try_compact_3src_instruction(const struct gen_device_info *devinfo, 943b8e80941Smrg brw_compact_inst *dst, const brw_inst *src) 944b8e80941Smrg{ 945b8e80941Smrg assert(devinfo->gen >= 8); 946b8e80941Smrg 947b8e80941Smrg if (has_3src_unmapped_bits(devinfo, src)) 948b8e80941Smrg return false; 949b8e80941Smrg 950b8e80941Smrg#define compact(field) \ 951b8e80941Smrg brw_compact_inst_set_3src_##field(devinfo, dst, brw_inst_3src_##field(devinfo, src)) 952b8e80941Smrg#define compact_a16(field) \ 953b8e80941Smrg brw_compact_inst_set_3src_##field(devinfo, dst, brw_inst_3src_a16_##field(devinfo, src)) 954b8e80941Smrg 955b8e80941Smrg compact(opcode); 956b8e80941Smrg 957b8e80941Smrg if (!set_3src_control_index(devinfo, dst, src)) 958b8e80941Smrg return false; 959b8e80941Smrg 960b8e80941Smrg if (!set_3src_source_index(devinfo, dst, src)) 961b8e80941Smrg return false; 962b8e80941Smrg 963b8e80941Smrg compact(dst_reg_nr); 964b8e80941Smrg compact_a16(src0_rep_ctrl); 965b8e80941Smrg brw_compact_inst_set_3src_cmpt_control(devinfo, dst, true); 966b8e80941Smrg compact(debug_control); 967b8e80941Smrg compact(saturate); 968b8e80941Smrg compact_a16(src1_rep_ctrl); 969b8e80941Smrg compact_a16(src2_rep_ctrl); 970b8e80941Smrg compact(src0_reg_nr); 971b8e80941Smrg compact(src1_reg_nr); 972b8e80941Smrg compact(src2_reg_nr); 973b8e80941Smrg compact_a16(src0_subreg_nr); 974b8e80941Smrg compact_a16(src1_subreg_nr); 975b8e80941Smrg compact_a16(src2_subreg_nr); 976b8e80941Smrg 977b8e80941Smrg#undef compact 978b8e80941Smrg#undef compact_a16 979b8e80941Smrg 980b8e80941Smrg return true; 981b8e80941Smrg} 982b8e80941Smrg 983b8e80941Smrg/* Compacted instructions have 12-bits for immediate sources, and a 13th bit 984b8e80941Smrg * that's replicated through the high 20 bits. 985b8e80941Smrg * 986b8e80941Smrg * Effectively this means we get 12-bit integers, 0.0f, and some limited uses 987b8e80941Smrg * of packed vectors as compactable immediates. 988b8e80941Smrg */ 989b8e80941Smrgstatic bool 990b8e80941Smrgis_compactable_immediate(unsigned imm) 991b8e80941Smrg{ 992b8e80941Smrg /* We get the low 12 bits as-is. */ 993b8e80941Smrg imm &= ~0xfff; 994b8e80941Smrg 995b8e80941Smrg /* We get one bit replicated through the top 20 bits. */ 996b8e80941Smrg return imm == 0 || imm == 0xfffff000; 997b8e80941Smrg} 998b8e80941Smrg 999b8e80941Smrg/** 1000b8e80941Smrg * Applies some small changes to instruction types to increase chances of 1001b8e80941Smrg * compaction. 1002b8e80941Smrg */ 1003b8e80941Smrgstatic brw_inst 1004b8e80941Smrgprecompact(const struct gen_device_info *devinfo, brw_inst inst) 1005b8e80941Smrg{ 1006b8e80941Smrg if (brw_inst_src0_reg_file(devinfo, &inst) != BRW_IMMEDIATE_VALUE) 1007b8e80941Smrg return inst; 1008b8e80941Smrg 1009b8e80941Smrg /* The Bspec's section titled "Non-present Operands" claims that if src0 1010b8e80941Smrg * is an immediate that src1's type must be the same as that of src0. 1011b8e80941Smrg * 1012b8e80941Smrg * The SNB+ DataTypeIndex instruction compaction tables contain mappings 1013b8e80941Smrg * that do not follow this rule. E.g., from the IVB/HSW table: 1014b8e80941Smrg * 1015b8e80941Smrg * DataTypeIndex 18-Bit Mapping Mapped Meaning 1016b8e80941Smrg * 3 001000001011111101 r:f | i:vf | a:ud | <1> | dir | 1017b8e80941Smrg * 1018b8e80941Smrg * And from the SNB table: 1019b8e80941Smrg * 1020b8e80941Smrg * DataTypeIndex 18-Bit Mapping Mapped Meaning 1021b8e80941Smrg * 8 001000000111101100 a:w | i:w | a:ud | <1> | dir | 1022b8e80941Smrg * 1023b8e80941Smrg * Neither of these cause warnings from the simulator when used, 1024b8e80941Smrg * compacted or otherwise. In fact, all compaction mappings that have an 1025b8e80941Smrg * immediate in src0 use a:ud for src1. 1026b8e80941Smrg * 1027b8e80941Smrg * The GM45 instruction compaction tables do not contain mapped meanings 1028b8e80941Smrg * so it's not clear whether it has the restriction. We'll assume it was 1029b8e80941Smrg * lifted on SNB. (FINISHME: decode the GM45 tables and check.) 1030b8e80941Smrg * 1031b8e80941Smrg * Don't do any of this for 64-bit immediates, since the src1 fields 1032b8e80941Smrg * overlap with the immediate and setting them would overwrite the 1033b8e80941Smrg * immediate we set. 1034b8e80941Smrg */ 1035b8e80941Smrg if (devinfo->gen >= 6 && 1036b8e80941Smrg !(devinfo->is_haswell && 1037b8e80941Smrg brw_inst_opcode(devinfo, &inst) == BRW_OPCODE_DIM) && 1038b8e80941Smrg !(devinfo->gen >= 8 && 1039b8e80941Smrg (brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_DF || 1040b8e80941Smrg brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_UQ || 1041b8e80941Smrg brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_Q))) { 1042b8e80941Smrg enum brw_reg_file file = brw_inst_src1_reg_file(devinfo, &inst); 1043b8e80941Smrg brw_inst_set_src1_file_type(devinfo, &inst, file, BRW_REGISTER_TYPE_UD); 1044b8e80941Smrg } 1045b8e80941Smrg 1046b8e80941Smrg /* Compacted instructions only have 12-bits (plus 1 for the other 20) 1047b8e80941Smrg * for immediate values. Presumably the hardware engineers realized 1048b8e80941Smrg * that the only useful floating-point value that could be represented 1049b8e80941Smrg * in this format is 0.0, which can also be represented as a VF-typed 1050b8e80941Smrg * immediate, so they gave us the previously mentioned mapping on IVB+. 1051b8e80941Smrg * 1052b8e80941Smrg * Strangely, we do have a mapping for imm:f in src1, so we don't need 1053b8e80941Smrg * to do this there. 1054b8e80941Smrg * 1055b8e80941Smrg * If we see a 0.0:F, change the type to VF so that it can be compacted. 1056b8e80941Smrg */ 1057b8e80941Smrg if (brw_inst_imm_ud(devinfo, &inst) == 0x0 && 1058b8e80941Smrg brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_F && 1059b8e80941Smrg brw_inst_dst_type(devinfo, &inst) == BRW_REGISTER_TYPE_F && 1060b8e80941Smrg brw_inst_dst_hstride(devinfo, &inst) == BRW_HORIZONTAL_STRIDE_1) { 1061b8e80941Smrg enum brw_reg_file file = brw_inst_src0_reg_file(devinfo, &inst); 1062b8e80941Smrg brw_inst_set_src0_file_type(devinfo, &inst, file, BRW_REGISTER_TYPE_VF); 1063b8e80941Smrg } 1064b8e80941Smrg 1065b8e80941Smrg /* There are no mappings for dst:d | i:d, so if the immediate is suitable 1066b8e80941Smrg * set the types to :UD so the instruction can be compacted. 1067b8e80941Smrg */ 1068b8e80941Smrg if (is_compactable_immediate(brw_inst_imm_ud(devinfo, &inst)) && 1069b8e80941Smrg brw_inst_cond_modifier(devinfo, &inst) == BRW_CONDITIONAL_NONE && 1070b8e80941Smrg brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_D && 1071b8e80941Smrg brw_inst_dst_type(devinfo, &inst) == BRW_REGISTER_TYPE_D) { 1072b8e80941Smrg enum brw_reg_file src_file = brw_inst_src0_reg_file(devinfo, &inst); 1073b8e80941Smrg enum brw_reg_file dst_file = brw_inst_dst_reg_file(devinfo, &inst); 1074b8e80941Smrg 1075b8e80941Smrg brw_inst_set_src0_file_type(devinfo, &inst, src_file, BRW_REGISTER_TYPE_UD); 1076b8e80941Smrg brw_inst_set_dst_file_type(devinfo, &inst, dst_file, BRW_REGISTER_TYPE_UD); 1077b8e80941Smrg } 1078b8e80941Smrg 1079b8e80941Smrg return inst; 1080b8e80941Smrg} 1081b8e80941Smrg 1082b8e80941Smrg/** 1083b8e80941Smrg * Tries to compact instruction src into dst. 1084b8e80941Smrg * 1085b8e80941Smrg * It doesn't modify dst unless src is compactable, which is relied on by 1086b8e80941Smrg * brw_compact_instructions(). 1087b8e80941Smrg */ 1088b8e80941Smrgbool 1089b8e80941Smrgbrw_try_compact_instruction(const struct gen_device_info *devinfo, 1090b8e80941Smrg brw_compact_inst *dst, const brw_inst *src) 1091b8e80941Smrg{ 1092b8e80941Smrg brw_compact_inst temp; 1093b8e80941Smrg 1094b8e80941Smrg assert(brw_inst_cmpt_control(devinfo, src) == 0); 1095b8e80941Smrg 1096b8e80941Smrg if (is_3src(devinfo, brw_inst_opcode(devinfo, src))) { 1097b8e80941Smrg if (devinfo->gen >= 8) { 1098b8e80941Smrg memset(&temp, 0, sizeof(temp)); 1099b8e80941Smrg if (brw_try_compact_3src_instruction(devinfo, &temp, src)) { 1100b8e80941Smrg *dst = temp; 1101b8e80941Smrg return true; 1102b8e80941Smrg } else { 1103b8e80941Smrg return false; 1104b8e80941Smrg } 1105b8e80941Smrg } else { 1106b8e80941Smrg return false; 1107b8e80941Smrg } 1108b8e80941Smrg } 1109b8e80941Smrg 1110b8e80941Smrg bool is_immediate = 1111b8e80941Smrg brw_inst_src0_reg_file(devinfo, src) == BRW_IMMEDIATE_VALUE || 1112b8e80941Smrg brw_inst_src1_reg_file(devinfo, src) == BRW_IMMEDIATE_VALUE; 1113b8e80941Smrg if (is_immediate && 1114b8e80941Smrg (devinfo->gen < 6 || 1115b8e80941Smrg !is_compactable_immediate(brw_inst_imm_ud(devinfo, src)))) { 1116b8e80941Smrg return false; 1117b8e80941Smrg } 1118b8e80941Smrg 1119b8e80941Smrg if (has_unmapped_bits(devinfo, src)) 1120b8e80941Smrg return false; 1121b8e80941Smrg 1122b8e80941Smrg memset(&temp, 0, sizeof(temp)); 1123b8e80941Smrg 1124b8e80941Smrg#define compact(field) \ 1125b8e80941Smrg brw_compact_inst_set_##field(devinfo, &temp, brw_inst_##field(devinfo, src)) 1126b8e80941Smrg 1127b8e80941Smrg compact(opcode); 1128b8e80941Smrg compact(debug_control); 1129b8e80941Smrg 1130b8e80941Smrg if (!set_control_index(devinfo, &temp, src)) 1131b8e80941Smrg return false; 1132b8e80941Smrg if (!set_datatype_index(devinfo, &temp, src)) 1133b8e80941Smrg return false; 1134b8e80941Smrg if (!set_subreg_index(devinfo, &temp, src, is_immediate)) 1135b8e80941Smrg return false; 1136b8e80941Smrg 1137b8e80941Smrg if (devinfo->gen >= 6) { 1138b8e80941Smrg compact(acc_wr_control); 1139b8e80941Smrg } else { 1140b8e80941Smrg compact(mask_control_ex); 1141b8e80941Smrg } 1142b8e80941Smrg 1143b8e80941Smrg compact(cond_modifier); 1144b8e80941Smrg 1145b8e80941Smrg if (devinfo->gen <= 6) 1146b8e80941Smrg compact(flag_subreg_nr); 1147b8e80941Smrg 1148b8e80941Smrg brw_compact_inst_set_cmpt_control(devinfo, &temp, true); 1149b8e80941Smrg 1150b8e80941Smrg if (!set_src0_index(devinfo, &temp, src)) 1151b8e80941Smrg return false; 1152b8e80941Smrg if (!set_src1_index(devinfo, &temp, src, is_immediate)) 1153b8e80941Smrg return false; 1154b8e80941Smrg 1155b8e80941Smrg brw_compact_inst_set_dst_reg_nr(devinfo, &temp, 1156b8e80941Smrg brw_inst_dst_da_reg_nr(devinfo, src)); 1157b8e80941Smrg brw_compact_inst_set_src0_reg_nr(devinfo, &temp, 1158b8e80941Smrg brw_inst_src0_da_reg_nr(devinfo, src)); 1159b8e80941Smrg 1160b8e80941Smrg if (is_immediate) { 1161b8e80941Smrg brw_compact_inst_set_src1_reg_nr(devinfo, &temp, 1162b8e80941Smrg brw_inst_imm_ud(devinfo, src) & 0xff); 1163b8e80941Smrg } else { 1164b8e80941Smrg brw_compact_inst_set_src1_reg_nr(devinfo, &temp, 1165b8e80941Smrg brw_inst_src1_da_reg_nr(devinfo, src)); 1166b8e80941Smrg } 1167b8e80941Smrg 1168b8e80941Smrg#undef compact 1169b8e80941Smrg 1170b8e80941Smrg *dst = temp; 1171b8e80941Smrg 1172b8e80941Smrg return true; 1173b8e80941Smrg} 1174b8e80941Smrg 1175b8e80941Smrgstatic void 1176b8e80941Smrgset_uncompacted_control(const struct gen_device_info *devinfo, brw_inst *dst, 1177b8e80941Smrg brw_compact_inst *src) 1178b8e80941Smrg{ 1179b8e80941Smrg uint32_t uncompacted = 1180b8e80941Smrg control_index_table[brw_compact_inst_control_index(devinfo, src)]; 1181b8e80941Smrg 1182b8e80941Smrg if (devinfo->gen >= 8) { 1183b8e80941Smrg brw_inst_set_bits(dst, 33, 31, (uncompacted >> 16)); 1184b8e80941Smrg brw_inst_set_bits(dst, 23, 12, (uncompacted >> 4) & 0xfff); 1185b8e80941Smrg brw_inst_set_bits(dst, 10, 9, (uncompacted >> 2) & 0x3); 1186b8e80941Smrg brw_inst_set_bits(dst, 34, 34, (uncompacted >> 1) & 0x1); 1187b8e80941Smrg brw_inst_set_bits(dst, 8, 8, (uncompacted >> 0) & 0x1); 1188b8e80941Smrg } else { 1189b8e80941Smrg brw_inst_set_bits(dst, 31, 31, (uncompacted >> 16) & 0x1); 1190b8e80941Smrg brw_inst_set_bits(dst, 23, 8, (uncompacted & 0xffff)); 1191b8e80941Smrg 1192b8e80941Smrg if (devinfo->gen == 7) 1193b8e80941Smrg brw_inst_set_bits(dst, 90, 89, uncompacted >> 17); 1194b8e80941Smrg } 1195b8e80941Smrg} 1196b8e80941Smrg 1197b8e80941Smrgstatic void 1198b8e80941Smrgset_uncompacted_datatype(const struct gen_device_info *devinfo, brw_inst *dst, 1199b8e80941Smrg brw_compact_inst *src) 1200b8e80941Smrg{ 1201b8e80941Smrg uint32_t uncompacted = 1202b8e80941Smrg datatype_table[brw_compact_inst_datatype_index(devinfo, src)]; 1203b8e80941Smrg 1204b8e80941Smrg if (devinfo->gen >= 8) { 1205b8e80941Smrg brw_inst_set_bits(dst, 63, 61, (uncompacted >> 18)); 1206b8e80941Smrg brw_inst_set_bits(dst, 94, 89, (uncompacted >> 12) & 0x3f); 1207b8e80941Smrg brw_inst_set_bits(dst, 46, 35, (uncompacted >> 0) & 0xfff); 1208b8e80941Smrg } else { 1209b8e80941Smrg brw_inst_set_bits(dst, 63, 61, (uncompacted >> 15)); 1210b8e80941Smrg brw_inst_set_bits(dst, 46, 32, (uncompacted & 0x7fff)); 1211b8e80941Smrg } 1212b8e80941Smrg} 1213b8e80941Smrg 1214b8e80941Smrgstatic void 1215b8e80941Smrgset_uncompacted_subreg(const struct gen_device_info *devinfo, brw_inst *dst, 1216b8e80941Smrg brw_compact_inst *src) 1217b8e80941Smrg{ 1218b8e80941Smrg uint16_t uncompacted = 1219b8e80941Smrg subreg_table[brw_compact_inst_subreg_index(devinfo, src)]; 1220b8e80941Smrg 1221b8e80941Smrg brw_inst_set_bits(dst, 100, 96, (uncompacted >> 10)); 1222b8e80941Smrg brw_inst_set_bits(dst, 68, 64, (uncompacted >> 5) & 0x1f); 1223b8e80941Smrg brw_inst_set_bits(dst, 52, 48, (uncompacted >> 0) & 0x1f); 1224b8e80941Smrg} 1225b8e80941Smrg 1226b8e80941Smrgstatic void 1227b8e80941Smrgset_uncompacted_src0(const struct gen_device_info *devinfo, brw_inst *dst, 1228b8e80941Smrg brw_compact_inst *src) 1229b8e80941Smrg{ 1230b8e80941Smrg uint32_t compacted = brw_compact_inst_src0_index(devinfo, src); 1231b8e80941Smrg uint16_t uncompacted = src_index_table[compacted]; 1232b8e80941Smrg 1233b8e80941Smrg brw_inst_set_bits(dst, 88, 77, uncompacted); 1234b8e80941Smrg} 1235b8e80941Smrg 1236b8e80941Smrgstatic void 1237b8e80941Smrgset_uncompacted_src1(const struct gen_device_info *devinfo, brw_inst *dst, 1238b8e80941Smrg brw_compact_inst *src, bool is_immediate) 1239b8e80941Smrg{ 1240b8e80941Smrg if (is_immediate) { 1241b8e80941Smrg signed high5 = brw_compact_inst_src1_index(devinfo, src); 1242b8e80941Smrg /* Replicate top bit of src1_index into high 20 bits of the immediate. */ 1243b8e80941Smrg brw_inst_set_imm_ud(devinfo, dst, (high5 << 27) >> 19); 1244b8e80941Smrg } else { 1245b8e80941Smrg uint16_t uncompacted = 1246b8e80941Smrg src_index_table[brw_compact_inst_src1_index(devinfo, src)]; 1247b8e80941Smrg 1248b8e80941Smrg brw_inst_set_bits(dst, 120, 109, uncompacted); 1249b8e80941Smrg } 1250b8e80941Smrg} 1251b8e80941Smrg 1252b8e80941Smrgstatic void 1253b8e80941Smrgset_uncompacted_3src_control_index(const struct gen_device_info *devinfo, 1254b8e80941Smrg brw_inst *dst, brw_compact_inst *src) 1255b8e80941Smrg{ 1256b8e80941Smrg assert(devinfo->gen >= 8); 1257b8e80941Smrg 1258b8e80941Smrg uint32_t compacted = brw_compact_inst_3src_control_index(devinfo, src); 1259b8e80941Smrg uint32_t uncompacted = gen8_3src_control_index_table[compacted]; 1260b8e80941Smrg 1261b8e80941Smrg brw_inst_set_bits(dst, 34, 32, (uncompacted >> 21) & 0x7); 1262b8e80941Smrg brw_inst_set_bits(dst, 28, 8, (uncompacted >> 0) & 0x1fffff); 1263b8e80941Smrg 1264b8e80941Smrg if (devinfo->gen >= 9 || devinfo->is_cherryview) 1265b8e80941Smrg brw_inst_set_bits(dst, 36, 35, (uncompacted >> 24) & 0x3); 1266b8e80941Smrg} 1267b8e80941Smrg 1268b8e80941Smrgstatic void 1269b8e80941Smrgset_uncompacted_3src_source_index(const struct gen_device_info *devinfo, 1270b8e80941Smrg brw_inst *dst, brw_compact_inst *src) 1271b8e80941Smrg{ 1272b8e80941Smrg assert(devinfo->gen >= 8); 1273b8e80941Smrg 1274b8e80941Smrg uint32_t compacted = brw_compact_inst_3src_source_index(devinfo, src); 1275b8e80941Smrg uint64_t uncompacted = gen8_3src_source_index_table[compacted]; 1276b8e80941Smrg 1277b8e80941Smrg brw_inst_set_bits(dst, 83, 83, (uncompacted >> 43) & 0x1); 1278b8e80941Smrg brw_inst_set_bits(dst, 114, 107, (uncompacted >> 35) & 0xff); 1279b8e80941Smrg brw_inst_set_bits(dst, 93, 86, (uncompacted >> 27) & 0xff); 1280b8e80941Smrg brw_inst_set_bits(dst, 72, 65, (uncompacted >> 19) & 0xff); 1281b8e80941Smrg brw_inst_set_bits(dst, 55, 37, (uncompacted >> 0) & 0x7ffff); 1282b8e80941Smrg 1283b8e80941Smrg if (devinfo->gen >= 9 || devinfo->is_cherryview) { 1284b8e80941Smrg brw_inst_set_bits(dst, 126, 125, (uncompacted >> 47) & 0x3); 1285b8e80941Smrg brw_inst_set_bits(dst, 105, 104, (uncompacted >> 45) & 0x3); 1286b8e80941Smrg brw_inst_set_bits(dst, 84, 84, (uncompacted >> 44) & 0x1); 1287b8e80941Smrg } else { 1288b8e80941Smrg brw_inst_set_bits(dst, 125, 125, (uncompacted >> 45) & 0x1); 1289b8e80941Smrg brw_inst_set_bits(dst, 104, 104, (uncompacted >> 44) & 0x1); 1290b8e80941Smrg } 1291b8e80941Smrg} 1292b8e80941Smrg 1293b8e80941Smrgstatic void 1294b8e80941Smrgbrw_uncompact_3src_instruction(const struct gen_device_info *devinfo, 1295b8e80941Smrg brw_inst *dst, brw_compact_inst *src) 1296b8e80941Smrg{ 1297b8e80941Smrg assert(devinfo->gen >= 8); 1298b8e80941Smrg 1299b8e80941Smrg#define uncompact(field) \ 1300b8e80941Smrg brw_inst_set_3src_##field(devinfo, dst, brw_compact_inst_3src_##field(devinfo, src)) 1301b8e80941Smrg#define uncompact_a16(field) \ 1302b8e80941Smrg brw_inst_set_3src_a16_##field(devinfo, dst, brw_compact_inst_3src_##field(devinfo, src)) 1303b8e80941Smrg 1304b8e80941Smrg uncompact(opcode); 1305b8e80941Smrg 1306b8e80941Smrg set_uncompacted_3src_control_index(devinfo, dst, src); 1307b8e80941Smrg set_uncompacted_3src_source_index(devinfo, dst, src); 1308b8e80941Smrg 1309b8e80941Smrg uncompact(dst_reg_nr); 1310b8e80941Smrg uncompact_a16(src0_rep_ctrl); 1311b8e80941Smrg brw_inst_set_3src_cmpt_control(devinfo, dst, false); 1312b8e80941Smrg uncompact(debug_control); 1313b8e80941Smrg uncompact(saturate); 1314b8e80941Smrg uncompact_a16(src1_rep_ctrl); 1315b8e80941Smrg uncompact_a16(src2_rep_ctrl); 1316b8e80941Smrg uncompact(src0_reg_nr); 1317b8e80941Smrg uncompact(src1_reg_nr); 1318b8e80941Smrg uncompact(src2_reg_nr); 1319b8e80941Smrg uncompact_a16(src0_subreg_nr); 1320b8e80941Smrg uncompact_a16(src1_subreg_nr); 1321b8e80941Smrg uncompact_a16(src2_subreg_nr); 1322b8e80941Smrg 1323b8e80941Smrg#undef uncompact 1324b8e80941Smrg#undef uncompact_a16 1325b8e80941Smrg} 1326b8e80941Smrg 1327b8e80941Smrgvoid 1328b8e80941Smrgbrw_uncompact_instruction(const struct gen_device_info *devinfo, brw_inst *dst, 1329b8e80941Smrg brw_compact_inst *src) 1330b8e80941Smrg{ 1331b8e80941Smrg memset(dst, 0, sizeof(*dst)); 1332b8e80941Smrg 1333b8e80941Smrg if (devinfo->gen >= 8 && 1334b8e80941Smrg is_3src(devinfo, brw_compact_inst_3src_opcode(devinfo, src))) { 1335b8e80941Smrg brw_uncompact_3src_instruction(devinfo, dst, src); 1336b8e80941Smrg return; 1337b8e80941Smrg } 1338b8e80941Smrg 1339b8e80941Smrg#define uncompact(field) \ 1340b8e80941Smrg brw_inst_set_##field(devinfo, dst, brw_compact_inst_##field(devinfo, src)) 1341b8e80941Smrg 1342b8e80941Smrg uncompact(opcode); 1343b8e80941Smrg uncompact(debug_control); 1344b8e80941Smrg 1345b8e80941Smrg set_uncompacted_control(devinfo, dst, src); 1346b8e80941Smrg set_uncompacted_datatype(devinfo, dst, src); 1347b8e80941Smrg 1348b8e80941Smrg /* src0/1 register file fields are in the datatype table. */ 1349b8e80941Smrg bool is_immediate = brw_inst_src0_reg_file(devinfo, dst) == BRW_IMMEDIATE_VALUE || 1350b8e80941Smrg brw_inst_src1_reg_file(devinfo, dst) == BRW_IMMEDIATE_VALUE; 1351b8e80941Smrg 1352b8e80941Smrg set_uncompacted_subreg(devinfo, dst, src); 1353b8e80941Smrg 1354b8e80941Smrg if (devinfo->gen >= 6) { 1355b8e80941Smrg uncompact(acc_wr_control); 1356b8e80941Smrg } else { 1357b8e80941Smrg uncompact(mask_control_ex); 1358b8e80941Smrg } 1359b8e80941Smrg 1360b8e80941Smrg uncompact(cond_modifier); 1361b8e80941Smrg 1362b8e80941Smrg if (devinfo->gen <= 6) 1363b8e80941Smrg uncompact(flag_subreg_nr); 1364b8e80941Smrg 1365b8e80941Smrg set_uncompacted_src0(devinfo, dst, src); 1366b8e80941Smrg set_uncompacted_src1(devinfo, dst, src, is_immediate); 1367b8e80941Smrg 1368b8e80941Smrg brw_inst_set_dst_da_reg_nr(devinfo, dst, 1369b8e80941Smrg brw_compact_inst_dst_reg_nr(devinfo, src)); 1370b8e80941Smrg brw_inst_set_src0_da_reg_nr(devinfo, dst, 1371b8e80941Smrg brw_compact_inst_src0_reg_nr(devinfo, src)); 1372b8e80941Smrg 1373b8e80941Smrg if (is_immediate) { 1374b8e80941Smrg brw_inst_set_imm_ud(devinfo, dst, 1375b8e80941Smrg brw_inst_imm_ud(devinfo, dst) | 1376b8e80941Smrg brw_compact_inst_src1_reg_nr(devinfo, src)); 1377b8e80941Smrg } else { 1378b8e80941Smrg brw_inst_set_src1_da_reg_nr(devinfo, dst, 1379b8e80941Smrg brw_compact_inst_src1_reg_nr(devinfo, src)); 1380b8e80941Smrg } 1381b8e80941Smrg 1382b8e80941Smrg#undef uncompact 1383b8e80941Smrg} 1384b8e80941Smrg 1385b8e80941Smrgvoid brw_debug_compact_uncompact(const struct gen_device_info *devinfo, 1386b8e80941Smrg brw_inst *orig, 1387b8e80941Smrg brw_inst *uncompacted) 1388b8e80941Smrg{ 1389b8e80941Smrg fprintf(stderr, "Instruction compact/uncompact changed (gen%d):\n", 1390b8e80941Smrg devinfo->gen); 1391b8e80941Smrg 1392b8e80941Smrg fprintf(stderr, " before: "); 1393b8e80941Smrg brw_disassemble_inst(stderr, devinfo, orig, true); 1394b8e80941Smrg 1395b8e80941Smrg fprintf(stderr, " after: "); 1396b8e80941Smrg brw_disassemble_inst(stderr, devinfo, uncompacted, false); 1397b8e80941Smrg 1398b8e80941Smrg uint32_t *before_bits = (uint32_t *)orig; 1399b8e80941Smrg uint32_t *after_bits = (uint32_t *)uncompacted; 1400b8e80941Smrg fprintf(stderr, " changed bits:\n"); 1401b8e80941Smrg for (int i = 0; i < 128; i++) { 1402b8e80941Smrg uint32_t before = before_bits[i / 32] & (1 << (i & 31)); 1403b8e80941Smrg uint32_t after = after_bits[i / 32] & (1 << (i & 31)); 1404b8e80941Smrg 1405b8e80941Smrg if (before != after) { 1406b8e80941Smrg fprintf(stderr, " bit %d, %s to %s\n", i, 1407b8e80941Smrg before ? "set" : "unset", 1408b8e80941Smrg after ? "set" : "unset"); 1409b8e80941Smrg } 1410b8e80941Smrg } 1411b8e80941Smrg} 1412b8e80941Smrg 1413b8e80941Smrgstatic int 1414b8e80941Smrgcompacted_between(int old_ip, int old_target_ip, int *compacted_counts) 1415b8e80941Smrg{ 1416b8e80941Smrg int this_compacted_count = compacted_counts[old_ip]; 1417b8e80941Smrg int target_compacted_count = compacted_counts[old_target_ip]; 1418b8e80941Smrg return target_compacted_count - this_compacted_count; 1419b8e80941Smrg} 1420b8e80941Smrg 1421b8e80941Smrgstatic void 1422b8e80941Smrgupdate_uip_jip(const struct gen_device_info *devinfo, brw_inst *insn, 1423b8e80941Smrg int this_old_ip, int *compacted_counts) 1424b8e80941Smrg{ 1425b8e80941Smrg /* JIP and UIP are in units of: 1426b8e80941Smrg * - bytes on Gen8+; and 1427b8e80941Smrg * - compacted instructions on Gen6+. 1428b8e80941Smrg */ 1429b8e80941Smrg int shift = devinfo->gen >= 8 ? 3 : 0; 1430b8e80941Smrg 1431b8e80941Smrg int32_t jip_compacted = brw_inst_jip(devinfo, insn) >> shift; 1432b8e80941Smrg jip_compacted -= compacted_between(this_old_ip, 1433b8e80941Smrg this_old_ip + (jip_compacted / 2), 1434b8e80941Smrg compacted_counts); 1435b8e80941Smrg brw_inst_set_jip(devinfo, insn, jip_compacted << shift); 1436b8e80941Smrg 1437b8e80941Smrg if (brw_inst_opcode(devinfo, insn) == BRW_OPCODE_ENDIF || 1438b8e80941Smrg brw_inst_opcode(devinfo, insn) == BRW_OPCODE_WHILE || 1439b8e80941Smrg (brw_inst_opcode(devinfo, insn) == BRW_OPCODE_ELSE && devinfo->gen <= 7)) 1440b8e80941Smrg return; 1441b8e80941Smrg 1442b8e80941Smrg int32_t uip_compacted = brw_inst_uip(devinfo, insn) >> shift; 1443b8e80941Smrg uip_compacted -= compacted_between(this_old_ip, 1444b8e80941Smrg this_old_ip + (uip_compacted / 2), 1445b8e80941Smrg compacted_counts); 1446b8e80941Smrg brw_inst_set_uip(devinfo, insn, uip_compacted << shift); 1447b8e80941Smrg} 1448b8e80941Smrg 1449b8e80941Smrgstatic void 1450b8e80941Smrgupdate_gen4_jump_count(const struct gen_device_info *devinfo, brw_inst *insn, 1451b8e80941Smrg int this_old_ip, int *compacted_counts) 1452b8e80941Smrg{ 1453b8e80941Smrg assert(devinfo->gen == 5 || devinfo->is_g4x); 1454b8e80941Smrg 1455b8e80941Smrg /* Jump Count is in units of: 1456b8e80941Smrg * - uncompacted instructions on G45; and 1457b8e80941Smrg * - compacted instructions on Gen5. 1458b8e80941Smrg */ 1459b8e80941Smrg int shift = devinfo->is_g4x ? 1 : 0; 1460b8e80941Smrg 1461b8e80941Smrg int jump_count_compacted = brw_inst_gen4_jump_count(devinfo, insn) << shift; 1462b8e80941Smrg 1463b8e80941Smrg int target_old_ip = this_old_ip + (jump_count_compacted / 2); 1464b8e80941Smrg 1465b8e80941Smrg int this_compacted_count = compacted_counts[this_old_ip]; 1466b8e80941Smrg int target_compacted_count = compacted_counts[target_old_ip]; 1467b8e80941Smrg 1468b8e80941Smrg jump_count_compacted -= (target_compacted_count - this_compacted_count); 1469b8e80941Smrg brw_inst_set_gen4_jump_count(devinfo, insn, jump_count_compacted >> shift); 1470b8e80941Smrg} 1471b8e80941Smrg 1472b8e80941Smrgvoid 1473b8e80941Smrgbrw_init_compaction_tables(const struct gen_device_info *devinfo) 1474b8e80941Smrg{ 1475b8e80941Smrg assert(g45_control_index_table[ARRAY_SIZE(g45_control_index_table) - 1] != 0); 1476b8e80941Smrg assert(g45_datatype_table[ARRAY_SIZE(g45_datatype_table) - 1] != 0); 1477b8e80941Smrg assert(g45_subreg_table[ARRAY_SIZE(g45_subreg_table) - 1] != 0); 1478b8e80941Smrg assert(g45_src_index_table[ARRAY_SIZE(g45_src_index_table) - 1] != 0); 1479b8e80941Smrg assert(gen6_control_index_table[ARRAY_SIZE(gen6_control_index_table) - 1] != 0); 1480b8e80941Smrg assert(gen6_datatype_table[ARRAY_SIZE(gen6_datatype_table) - 1] != 0); 1481b8e80941Smrg assert(gen6_subreg_table[ARRAY_SIZE(gen6_subreg_table) - 1] != 0); 1482b8e80941Smrg assert(gen6_src_index_table[ARRAY_SIZE(gen6_src_index_table) - 1] != 0); 1483b8e80941Smrg assert(gen7_control_index_table[ARRAY_SIZE(gen7_control_index_table) - 1] != 0); 1484b8e80941Smrg assert(gen7_datatype_table[ARRAY_SIZE(gen7_datatype_table) - 1] != 0); 1485b8e80941Smrg assert(gen7_subreg_table[ARRAY_SIZE(gen7_subreg_table) - 1] != 0); 1486b8e80941Smrg assert(gen7_src_index_table[ARRAY_SIZE(gen7_src_index_table) - 1] != 0); 1487b8e80941Smrg assert(gen8_control_index_table[ARRAY_SIZE(gen8_control_index_table) - 1] != 0); 1488b8e80941Smrg assert(gen8_datatype_table[ARRAY_SIZE(gen8_datatype_table) - 1] != 0); 1489b8e80941Smrg assert(gen8_subreg_table[ARRAY_SIZE(gen8_subreg_table) - 1] != 0); 1490b8e80941Smrg assert(gen8_src_index_table[ARRAY_SIZE(gen8_src_index_table) - 1] != 0); 1491b8e80941Smrg assert(gen11_datatype_table[ARRAY_SIZE(gen11_datatype_table) - 1] != 0); 1492b8e80941Smrg 1493b8e80941Smrg switch (devinfo->gen) { 1494b8e80941Smrg case 11: 1495b8e80941Smrg control_index_table = gen8_control_index_table; 1496b8e80941Smrg datatype_table = gen11_datatype_table; 1497b8e80941Smrg subreg_table = gen8_subreg_table; 1498b8e80941Smrg src_index_table = gen8_src_index_table; 1499b8e80941Smrg break; 1500b8e80941Smrg case 10: 1501b8e80941Smrg case 9: 1502b8e80941Smrg case 8: 1503b8e80941Smrg control_index_table = gen8_control_index_table; 1504b8e80941Smrg datatype_table = gen8_datatype_table; 1505b8e80941Smrg subreg_table = gen8_subreg_table; 1506b8e80941Smrg src_index_table = gen8_src_index_table; 1507b8e80941Smrg break; 1508b8e80941Smrg case 7: 1509b8e80941Smrg control_index_table = gen7_control_index_table; 1510b8e80941Smrg datatype_table = gen7_datatype_table; 1511b8e80941Smrg subreg_table = gen7_subreg_table; 1512b8e80941Smrg src_index_table = gen7_src_index_table; 1513b8e80941Smrg break; 1514b8e80941Smrg case 6: 1515b8e80941Smrg control_index_table = gen6_control_index_table; 1516b8e80941Smrg datatype_table = gen6_datatype_table; 1517b8e80941Smrg subreg_table = gen6_subreg_table; 1518b8e80941Smrg src_index_table = gen6_src_index_table; 1519b8e80941Smrg break; 1520b8e80941Smrg case 5: 1521b8e80941Smrg case 4: 1522b8e80941Smrg control_index_table = g45_control_index_table; 1523b8e80941Smrg datatype_table = g45_datatype_table; 1524b8e80941Smrg subreg_table = g45_subreg_table; 1525b8e80941Smrg src_index_table = g45_src_index_table; 1526b8e80941Smrg break; 1527b8e80941Smrg default: 1528b8e80941Smrg unreachable("unknown generation"); 1529b8e80941Smrg } 1530b8e80941Smrg} 1531b8e80941Smrg 1532b8e80941Smrgvoid 1533b8e80941Smrgbrw_compact_instructions(struct brw_codegen *p, int start_offset, 1534b8e80941Smrg struct disasm_info *disasm) 1535b8e80941Smrg{ 1536b8e80941Smrg if (unlikely(INTEL_DEBUG & DEBUG_NO_COMPACTION)) 1537b8e80941Smrg return; 1538b8e80941Smrg 1539b8e80941Smrg const struct gen_device_info *devinfo = p->devinfo; 1540b8e80941Smrg void *store = p->store + start_offset / 16; 1541b8e80941Smrg /* For an instruction at byte offset 16*i before compaction, this is the 1542b8e80941Smrg * number of compacted instructions minus the number of padding NOP/NENOPs 1543b8e80941Smrg * that preceded it. 1544b8e80941Smrg */ 1545b8e80941Smrg int compacted_counts[(p->next_insn_offset - start_offset) / sizeof(brw_inst)]; 1546b8e80941Smrg /* For an instruction at byte offset 8*i after compaction, this was its IP 1547b8e80941Smrg * (in 16-byte units) before compaction. 1548b8e80941Smrg */ 1549b8e80941Smrg int old_ip[(p->next_insn_offset - start_offset) / sizeof(brw_compact_inst) + 1]; 1550b8e80941Smrg 1551b8e80941Smrg if (devinfo->gen == 4 && !devinfo->is_g4x) 1552b8e80941Smrg return; 1553b8e80941Smrg 1554b8e80941Smrg int offset = 0; 1555b8e80941Smrg int compacted_count = 0; 1556b8e80941Smrg for (int src_offset = 0; src_offset < p->next_insn_offset - start_offset; 1557b8e80941Smrg src_offset += sizeof(brw_inst)) { 1558b8e80941Smrg brw_inst *src = store + src_offset; 1559b8e80941Smrg void *dst = store + offset; 1560b8e80941Smrg 1561b8e80941Smrg old_ip[offset / sizeof(brw_compact_inst)] = src_offset / sizeof(brw_inst); 1562b8e80941Smrg compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count; 1563b8e80941Smrg 1564b8e80941Smrg brw_inst inst = precompact(devinfo, *src); 1565b8e80941Smrg brw_inst saved = inst; 1566b8e80941Smrg 1567b8e80941Smrg if (brw_try_compact_instruction(devinfo, dst, &inst)) { 1568b8e80941Smrg compacted_count++; 1569b8e80941Smrg 1570b8e80941Smrg if (INTEL_DEBUG) { 1571b8e80941Smrg brw_inst uncompacted; 1572b8e80941Smrg brw_uncompact_instruction(devinfo, &uncompacted, dst); 1573b8e80941Smrg if (memcmp(&saved, &uncompacted, sizeof(uncompacted))) { 1574b8e80941Smrg brw_debug_compact_uncompact(devinfo, &saved, &uncompacted); 1575b8e80941Smrg } 1576b8e80941Smrg } 1577b8e80941Smrg 1578b8e80941Smrg offset += sizeof(brw_compact_inst); 1579b8e80941Smrg } else { 1580b8e80941Smrg /* All uncompacted instructions need to be aligned on G45. */ 1581b8e80941Smrg if ((offset & sizeof(brw_compact_inst)) != 0 && devinfo->is_g4x){ 1582b8e80941Smrg brw_compact_inst *align = store + offset; 1583b8e80941Smrg memset(align, 0, sizeof(*align)); 1584b8e80941Smrg brw_compact_inst_set_opcode(devinfo, align, BRW_OPCODE_NENOP); 1585b8e80941Smrg brw_compact_inst_set_cmpt_control(devinfo, align, true); 1586b8e80941Smrg offset += sizeof(brw_compact_inst); 1587b8e80941Smrg compacted_count--; 1588b8e80941Smrg compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count; 1589b8e80941Smrg old_ip[offset / sizeof(brw_compact_inst)] = src_offset / sizeof(brw_inst); 1590b8e80941Smrg 1591b8e80941Smrg dst = store + offset; 1592b8e80941Smrg } 1593b8e80941Smrg 1594b8e80941Smrg /* If we didn't compact this intruction, we need to move it down into 1595b8e80941Smrg * place. 1596b8e80941Smrg */ 1597b8e80941Smrg if (offset != src_offset) { 1598b8e80941Smrg memmove(dst, src, sizeof(brw_inst)); 1599b8e80941Smrg } 1600b8e80941Smrg offset += sizeof(brw_inst); 1601b8e80941Smrg } 1602b8e80941Smrg } 1603b8e80941Smrg 1604b8e80941Smrg /* Add an entry for the ending offset of the program. This greatly 1605b8e80941Smrg * simplifies the linked list walk at the end of the function. 1606b8e80941Smrg */ 1607b8e80941Smrg old_ip[offset / sizeof(brw_compact_inst)] = 1608b8e80941Smrg (p->next_insn_offset - start_offset) / sizeof(brw_inst); 1609b8e80941Smrg 1610b8e80941Smrg /* Fix up control flow offsets. */ 1611b8e80941Smrg p->next_insn_offset = start_offset + offset; 1612b8e80941Smrg for (offset = 0; offset < p->next_insn_offset - start_offset; 1613b8e80941Smrg offset = next_offset(devinfo, store, offset)) { 1614b8e80941Smrg brw_inst *insn = store + offset; 1615b8e80941Smrg int this_old_ip = old_ip[offset / sizeof(brw_compact_inst)]; 1616b8e80941Smrg int this_compacted_count = compacted_counts[this_old_ip]; 1617b8e80941Smrg 1618b8e80941Smrg switch (brw_inst_opcode(devinfo, insn)) { 1619b8e80941Smrg case BRW_OPCODE_BREAK: 1620b8e80941Smrg case BRW_OPCODE_CONTINUE: 1621b8e80941Smrg case BRW_OPCODE_HALT: 1622b8e80941Smrg if (devinfo->gen >= 6) { 1623b8e80941Smrg update_uip_jip(devinfo, insn, this_old_ip, compacted_counts); 1624b8e80941Smrg } else { 1625b8e80941Smrg update_gen4_jump_count(devinfo, insn, this_old_ip, 1626b8e80941Smrg compacted_counts); 1627b8e80941Smrg } 1628b8e80941Smrg break; 1629b8e80941Smrg 1630b8e80941Smrg case BRW_OPCODE_IF: 1631b8e80941Smrg case BRW_OPCODE_IFF: 1632b8e80941Smrg case BRW_OPCODE_ELSE: 1633b8e80941Smrg case BRW_OPCODE_ENDIF: 1634b8e80941Smrg case BRW_OPCODE_WHILE: 1635b8e80941Smrg if (devinfo->gen >= 7) { 1636b8e80941Smrg if (brw_inst_cmpt_control(devinfo, insn)) { 1637b8e80941Smrg brw_inst uncompacted; 1638b8e80941Smrg brw_uncompact_instruction(devinfo, &uncompacted, 1639b8e80941Smrg (brw_compact_inst *)insn); 1640b8e80941Smrg 1641b8e80941Smrg update_uip_jip(devinfo, &uncompacted, this_old_ip, 1642b8e80941Smrg compacted_counts); 1643b8e80941Smrg 1644b8e80941Smrg bool ret = brw_try_compact_instruction(devinfo, 1645b8e80941Smrg (brw_compact_inst *)insn, 1646b8e80941Smrg &uncompacted); 1647b8e80941Smrg assert(ret); (void)ret; 1648b8e80941Smrg } else { 1649b8e80941Smrg update_uip_jip(devinfo, insn, this_old_ip, compacted_counts); 1650b8e80941Smrg } 1651b8e80941Smrg } else if (devinfo->gen == 6) { 1652b8e80941Smrg assert(!brw_inst_cmpt_control(devinfo, insn)); 1653b8e80941Smrg 1654b8e80941Smrg /* Jump Count is in units of compacted instructions on Gen6. */ 1655b8e80941Smrg int jump_count_compacted = brw_inst_gen6_jump_count(devinfo, insn); 1656b8e80941Smrg 1657b8e80941Smrg int target_old_ip = this_old_ip + (jump_count_compacted / 2); 1658b8e80941Smrg int target_compacted_count = compacted_counts[target_old_ip]; 1659b8e80941Smrg jump_count_compacted -= (target_compacted_count - this_compacted_count); 1660b8e80941Smrg brw_inst_set_gen6_jump_count(devinfo, insn, jump_count_compacted); 1661b8e80941Smrg } else { 1662b8e80941Smrg update_gen4_jump_count(devinfo, insn, this_old_ip, 1663b8e80941Smrg compacted_counts); 1664b8e80941Smrg } 1665b8e80941Smrg break; 1666b8e80941Smrg 1667b8e80941Smrg case BRW_OPCODE_ADD: 1668b8e80941Smrg /* Add instructions modifying the IP register use an immediate src1, 1669b8e80941Smrg * and Gens that use this cannot compact instructions with immediate 1670b8e80941Smrg * operands. 1671b8e80941Smrg */ 1672b8e80941Smrg if (brw_inst_cmpt_control(devinfo, insn)) 1673b8e80941Smrg break; 1674b8e80941Smrg 1675b8e80941Smrg if (brw_inst_dst_reg_file(devinfo, insn) == BRW_ARCHITECTURE_REGISTER_FILE && 1676b8e80941Smrg brw_inst_dst_da_reg_nr(devinfo, insn) == BRW_ARF_IP) { 1677b8e80941Smrg assert(brw_inst_src1_reg_file(devinfo, insn) == BRW_IMMEDIATE_VALUE); 1678b8e80941Smrg 1679b8e80941Smrg int shift = 3; 1680b8e80941Smrg int jump_compacted = brw_inst_imm_d(devinfo, insn) >> shift; 1681b8e80941Smrg 1682b8e80941Smrg int target_old_ip = this_old_ip + (jump_compacted / 2); 1683b8e80941Smrg int target_compacted_count = compacted_counts[target_old_ip]; 1684b8e80941Smrg jump_compacted -= (target_compacted_count - this_compacted_count); 1685b8e80941Smrg brw_inst_set_imm_ud(devinfo, insn, jump_compacted << shift); 1686b8e80941Smrg } 1687b8e80941Smrg break; 1688b8e80941Smrg } 1689b8e80941Smrg } 1690b8e80941Smrg 1691b8e80941Smrg /* p->nr_insn is counting the number of uncompacted instructions still, so 1692b8e80941Smrg * divide. We do want to be sure there's a valid instruction in any 1693b8e80941Smrg * alignment padding, so that the next compression pass (for the FS 8/16 1694b8e80941Smrg * compile passes) parses correctly. 1695b8e80941Smrg */ 1696b8e80941Smrg if (p->next_insn_offset & sizeof(brw_compact_inst)) { 1697b8e80941Smrg brw_compact_inst *align = store + offset; 1698b8e80941Smrg memset(align, 0, sizeof(*align)); 1699b8e80941Smrg brw_compact_inst_set_opcode(devinfo, align, BRW_OPCODE_NOP); 1700b8e80941Smrg brw_compact_inst_set_cmpt_control(devinfo, align, true); 1701b8e80941Smrg p->next_insn_offset += sizeof(brw_compact_inst); 1702b8e80941Smrg } 1703b8e80941Smrg p->nr_insn = p->next_insn_offset / sizeof(brw_inst); 1704b8e80941Smrg 1705b8e80941Smrg /* Update the instruction offsets for each group. */ 1706b8e80941Smrg if (disasm) { 1707b8e80941Smrg int offset = 0; 1708b8e80941Smrg 1709b8e80941Smrg foreach_list_typed(struct inst_group, group, link, &disasm->group_list) { 1710b8e80941Smrg while (start_offset + old_ip[offset / sizeof(brw_compact_inst)] * 1711b8e80941Smrg sizeof(brw_inst) != group->offset) { 1712b8e80941Smrg assert(start_offset + old_ip[offset / sizeof(brw_compact_inst)] * 1713b8e80941Smrg sizeof(brw_inst) < group->offset); 1714b8e80941Smrg offset = next_offset(devinfo, store, offset); 1715b8e80941Smrg } 1716b8e80941Smrg 1717b8e80941Smrg group->offset = start_offset + offset; 1718b8e80941Smrg 1719b8e80941Smrg offset = next_offset(devinfo, store, offset); 1720b8e80941Smrg } 1721b8e80941Smrg } 1722b8e80941Smrg} 1723