1/*
2 * Copyright © 2012-2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24/** @file brw_eu_compact.c
25 *
26 * Instruction compaction is a feature of G45 and newer hardware that allows
27 * for a smaller instruction encoding.
28 *
29 * The instruction cache is on the order of 32KB, and many programs generate
30 * far more instructions than that.  The instruction cache is built to barely
31 * keep up with instruction dispatch ability in cache hit cases -- L1
32 * instruction cache misses that still hit in the next level could limit
33 * throughput by around 50%.
34 *
35 * The idea of instruction compaction is that most instructions use a tiny
36 * subset of the GPU functionality, so we can encode what would be a 16 byte
37 * instruction in 8 bytes using some lookup tables for various fields.
38 *
39 *
40 * Instruction compaction capabilities vary subtly by generation.
41 *
42 * G45's support for instruction compaction is very limited. Jump counts on
43 * this generation are in units of 16-byte uncompacted instructions. As such,
44 * all jump targets must be 16-byte aligned. Also, all instructions must be
45 * naturally aligned, i.e. uncompacted instructions must be 16-byte aligned.
46 * A G45-only instruction, NENOP, must be used to provide padding to align
47 * uncompacted instructions.
48 *
49 * Gfx5 removes these restrictions and changes jump counts to be in units of
50 * 8-byte compacted instructions, allowing jump targets to be only 8-byte
51 * aligned. Uncompacted instructions can also be placed on 8-byte boundaries.
52 *
53 * Gfx6 adds the ability to compact instructions with a limited range of
54 * immediate values. Compactable immediates have 12 unrestricted bits, and a
55 * 13th bit that's replicated through the high 20 bits, to create the 32-bit
56 * value of DW3 in the uncompacted instruction word.
57 *
58 * On Gfx7 we can compact some control flow instructions with a small positive
59 * immediate in the low bits of DW3, like ENDIF with the JIP field. Other
60 * control flow instructions with UIP cannot be compacted, because of the
61 * replicated 13th bit. No control flow instructions can be compacted on Gfx6
62 * since the jump count field is not in DW3.
63 *
64 *    break    JIP/UIP
65 *    cont     JIP/UIP
66 *    halt     JIP/UIP
67 *    if       JIP/UIP
68 *    else     JIP (plus UIP on BDW+)
69 *    endif    JIP
70 *    while    JIP (must be negative)
71 *
72 * Gen 8 adds support for compacting 3-src instructions.
73 *
74 * Gfx12 reduces the number of bits that available to compacted immediates from
75 * 13 to 12, but improves the compaction of floating-point immediates by
76 * allowing the high bits to be encoded (the sign, 8-bit exponent, and the
77 * three most significant bits of the mantissa), rather than the lowest bits of
78 * the mantissa.
79 */
80
81#include "brw_eu.h"
82#include "brw_shader.h"
83#include "brw_disasm_info.h"
84#include "dev/intel_debug.h"
85
86static const uint32_t g45_control_index_table[32] = {
87   0b00000000000000000,
88   0b01000000000000000,
89   0b00110000000000000,
90   0b00000000000000010,
91   0b00100000000000000,
92   0b00010000000000000,
93   0b01000000000100000,
94   0b01000000100000000,
95   0b01010000000100000,
96   0b00000000100000010,
97   0b11000000000000000,
98   0b00001000100000010,
99   0b01001000100000000,
100   0b00000000100000000,
101   0b11000000000100000,
102   0b00001000100000000,
103   0b10110000000000000,
104   0b11010000000100000,
105   0b00110000100000000,
106   0b00100000100000000,
107   0b01000000000001000,
108   0b01000000000000100,
109   0b00111100000000000,
110   0b00101011000000000,
111   0b00110000000010000,
112   0b00010000100000000,
113   0b01000000000100100,
114   0b01000000000101000,
115   0b00110000000000110,
116   0b00000000000001010,
117   0b01010000000101000,
118   0b01010000000100100,
119};
120
121static const uint32_t g45_datatype_table[32] = {
122   0b001000000000100001,
123   0b001011010110101101,
124   0b001000001000110001,
125   0b001111011110111101,
126   0b001011010110101100,
127   0b001000000110101101,
128   0b001000000000100000,
129   0b010100010110110001,
130   0b001100011000101101,
131   0b001000000000100010,
132   0b001000001000110110,
133   0b010000001000110001,
134   0b001000001000110010,
135   0b011000001000110010,
136   0b001111011110111100,
137   0b001000000100101000,
138   0b010100011000110001,
139   0b001010010100101001,
140   0b001000001000101001,
141   0b010000001000110110,
142   0b101000001000110001,
143   0b001011011000101101,
144   0b001000000100001001,
145   0b001011011000101100,
146   0b110100011000110001,
147   0b001000001110111101,
148   0b110000001000110001,
149   0b011000000100101010,
150   0b101000001000101001,
151   0b001011010110001100,
152   0b001000000110100001,
153   0b001010010100001000,
154};
155
156static const uint16_t g45_subreg_table[32] = {
157   0b000000000000000,
158   0b000000010000000,
159   0b000001000000000,
160   0b000100000000000,
161   0b000000000100000,
162   0b100000000000000,
163   0b000000000010000,
164   0b001100000000000,
165   0b001010000000000,
166   0b000000100000000,
167   0b001000000000000,
168   0b000000000001000,
169   0b000000001000000,
170   0b000000000000001,
171   0b000010000000000,
172   0b000000010100000,
173   0b000000000000111,
174   0b000001000100000,
175   0b011000000000000,
176   0b000000110000000,
177   0b000000000000010,
178   0b000000000000100,
179   0b000000001100000,
180   0b000100000000010,
181   0b001110011000110,
182   0b001110100001000,
183   0b000110011000110,
184   0b000001000011000,
185   0b000110010000100,
186   0b001100000000110,
187   0b000000010000110,
188   0b000001000110000,
189};
190
191static const uint16_t g45_src_index_table[32] = {
192   0b000000000000,
193   0b010001101000,
194   0b010110001000,
195   0b011010010000,
196   0b001101001000,
197   0b010110001010,
198   0b010101110000,
199   0b011001111000,
200   0b001000101000,
201   0b000000101000,
202   0b010001010000,
203   0b111101101100,
204   0b010110001100,
205   0b010001101100,
206   0b011010010100,
207   0b010001001100,
208   0b001100101000,
209   0b000000000010,
210   0b111101001100,
211   0b011001101000,
212   0b010101001000,
213   0b000000000100,
214   0b000000101100,
215   0b010001101010,
216   0b000000111000,
217   0b010101011000,
218   0b000100100000,
219   0b010110000000,
220   0b010000000100,
221   0b010000111000,
222   0b000101100000,
223   0b111101110100,
224};
225
226static const uint32_t gfx6_control_index_table[32] = {
227   0b00000000000000000,
228   0b01000000000000000,
229   0b00110000000000000,
230   0b00000000100000000,
231   0b00010000000000000,
232   0b00001000100000000,
233   0b00000000100000010,
234   0b00000000000000010,
235   0b01000000100000000,
236   0b01010000000000000,
237   0b10110000000000000,
238   0b00100000000000000,
239   0b11010000000000000,
240   0b11000000000000000,
241   0b01001000100000000,
242   0b01000000000001000,
243   0b01000000000000100,
244   0b00000000000001000,
245   0b00000000000000100,
246   0b00111000100000000,
247   0b00001000100000010,
248   0b00110000100000000,
249   0b00110000000000001,
250   0b00100000000000001,
251   0b00110000000000010,
252   0b00110000000000101,
253   0b00110000000001001,
254   0b00110000000010000,
255   0b00110000000000011,
256   0b00110000000000100,
257   0b00110000100001000,
258   0b00100000000001001,
259};
260
261static const uint32_t gfx6_datatype_table[32] = {
262   0b001001110000000000,
263   0b001000110000100000,
264   0b001001110000000001,
265   0b001000000001100000,
266   0b001010110100101001,
267   0b001000000110101101,
268   0b001100011000101100,
269   0b001011110110101101,
270   0b001000000111101100,
271   0b001000000001100001,
272   0b001000110010100101,
273   0b001000000001000001,
274   0b001000001000110001,
275   0b001000001000101001,
276   0b001000000000100000,
277   0b001000001000110010,
278   0b001010010100101001,
279   0b001011010010100101,
280   0b001000000110100101,
281   0b001100011000101001,
282   0b001011011000101100,
283   0b001011010110100101,
284   0b001011110110100101,
285   0b001111011110111101,
286   0b001111011110111100,
287   0b001111011110111101,
288   0b001111011110011101,
289   0b001111011110111110,
290   0b001000000000100001,
291   0b001000000000100010,
292   0b001001111111011101,
293   0b001000001110111110,
294};
295
296static const uint16_t gfx6_subreg_table[32] = {
297   0b000000000000000,
298   0b000000000000100,
299   0b000000110000000,
300   0b111000000000000,
301   0b011110000001000,
302   0b000010000000000,
303   0b000000000010000,
304   0b000110000001100,
305   0b001000000000000,
306   0b000001000000000,
307   0b000001010010100,
308   0b000000001010110,
309   0b010000000000000,
310   0b110000000000000,
311   0b000100000000000,
312   0b000000010000000,
313   0b000000000001000,
314   0b100000000000000,
315   0b000001010000000,
316   0b001010000000000,
317   0b001100000000000,
318   0b000000001010100,
319   0b101101010010100,
320   0b010100000000000,
321   0b000000010001111,
322   0b011000000000000,
323   0b111110000000000,
324   0b101000000000000,
325   0b000000000001111,
326   0b000100010001111,
327   0b001000010001111,
328   0b000110000000000,
329};
330
331static const uint16_t gfx6_src_index_table[32] = {
332   0b000000000000,
333   0b010110001000,
334   0b010001101000,
335   0b001000101000,
336   0b011010010000,
337   0b000100100000,
338   0b010001101100,
339   0b010101110000,
340   0b011001111000,
341   0b001100101000,
342   0b010110001100,
343   0b001000100000,
344   0b010110001010,
345   0b000000000010,
346   0b010101010000,
347   0b010101101000,
348   0b111101001100,
349   0b111100101100,
350   0b011001110000,
351   0b010110001001,
352   0b010101011000,
353   0b001101001000,
354   0b010000101100,
355   0b010000000000,
356   0b001101110000,
357   0b001100010000,
358   0b001100000000,
359   0b010001101010,
360   0b001101111000,
361   0b000001110000,
362   0b001100100000,
363   0b001101010000,
364};
365
366static const uint32_t gfx7_control_index_table[32] = {
367   0b0000000000000000010,
368   0b0000100000000000000,
369   0b0000100000000000001,
370   0b0000100000000000010,
371   0b0000100000000000011,
372   0b0000100000000000100,
373   0b0000100000000000101,
374   0b0000100000000000111,
375   0b0000100000000001000,
376   0b0000100000000001001,
377   0b0000100000000001101,
378   0b0000110000000000000,
379   0b0000110000000000001,
380   0b0000110000000000010,
381   0b0000110000000000011,
382   0b0000110000000000100,
383   0b0000110000000000101,
384   0b0000110000000000111,
385   0b0000110000000001001,
386   0b0000110000000001101,
387   0b0000110000000010000,
388   0b0000110000100000000,
389   0b0001000000000000000,
390   0b0001000000000000010,
391   0b0001000000000000100,
392   0b0001000000100000000,
393   0b0010110000000000000,
394   0b0010110000000010000,
395   0b0011000000000000000,
396   0b0011000000100000000,
397   0b0101000000000000000,
398   0b0101000000100000000,
399};
400
401static const uint32_t gfx7_datatype_table[32] = {
402   0b001000000000000001,
403   0b001000000000100000,
404   0b001000000000100001,
405   0b001000000001100001,
406   0b001000000010111101,
407   0b001000001011111101,
408   0b001000001110100001,
409   0b001000001110100101,
410   0b001000001110111101,
411   0b001000010000100001,
412   0b001000110000100000,
413   0b001000110000100001,
414   0b001001010010100101,
415   0b001001110010100100,
416   0b001001110010100101,
417   0b001111001110111101,
418   0b001111011110011101,
419   0b001111011110111100,
420   0b001111011110111101,
421   0b001111111110111100,
422   0b000000001000001100,
423   0b001000000000111101,
424   0b001000000010100101,
425   0b001000010000100000,
426   0b001001010010100100,
427   0b001001110010000100,
428   0b001010010100001001,
429   0b001101111110111101,
430   0b001111111110111101,
431   0b001011110110101100,
432   0b001010010100101000,
433   0b001010110100101000,
434};
435
436static const uint16_t gfx7_subreg_table[32] = {
437   0b000000000000000,
438   0b000000000000001,
439   0b000000000001000,
440   0b000000000001111,
441   0b000000000010000,
442   0b000000010000000,
443   0b000000100000000,
444   0b000000110000000,
445   0b000001000000000,
446   0b000001000010000,
447   0b000010100000000,
448   0b001000000000000,
449   0b001000000000001,
450   0b001000010000001,
451   0b001000010000010,
452   0b001000010000011,
453   0b001000010000100,
454   0b001000010000111,
455   0b001000010001000,
456   0b001000010001110,
457   0b001000010001111,
458   0b001000110000000,
459   0b001000111101000,
460   0b010000000000000,
461   0b010000110000000,
462   0b011000000000000,
463   0b011110010000111,
464   0b100000000000000,
465   0b101000000000000,
466   0b110000000000000,
467   0b111000000000000,
468   0b111000000011100,
469};
470
471static const uint16_t gfx7_src_index_table[32] = {
472   0b000000000000,
473   0b000000000010,
474   0b000000010000,
475   0b000000010010,
476   0b000000011000,
477   0b000000100000,
478   0b000000101000,
479   0b000001001000,
480   0b000001010000,
481   0b000001110000,
482   0b000001111000,
483   0b001100000000,
484   0b001100000010,
485   0b001100001000,
486   0b001100010000,
487   0b001100010010,
488   0b001100100000,
489   0b001100101000,
490   0b001100111000,
491   0b001101000000,
492   0b001101000010,
493   0b001101001000,
494   0b001101010000,
495   0b001101100000,
496   0b001101101000,
497   0b001101110000,
498   0b001101110001,
499   0b001101111000,
500   0b010001101000,
501   0b010001101001,
502   0b010001101010,
503   0b010110001000,
504};
505
506static const uint32_t gfx8_control_index_table[32] = {
507   0b0000000000000000010,
508   0b0000100000000000000,
509   0b0000100000000000001,
510   0b0000100000000000010,
511   0b0000100000000000011,
512   0b0000100000000000100,
513   0b0000100000000000101,
514   0b0000100000000000111,
515   0b0000100000000001000,
516   0b0000100000000001001,
517   0b0000100000000001101,
518   0b0000110000000000000,
519   0b0000110000000000001,
520   0b0000110000000000010,
521   0b0000110000000000011,
522   0b0000110000000000100,
523   0b0000110000000000101,
524   0b0000110000000000111,
525   0b0000110000000001001,
526   0b0000110000000001101,
527   0b0000110000000010000,
528   0b0000110000100000000,
529   0b0001000000000000000,
530   0b0001000000000000010,
531   0b0001000000000000100,
532   0b0001000000100000000,
533   0b0010110000000000000,
534   0b0010110000000010000,
535   0b0011000000000000000,
536   0b0011000000100000000,
537   0b0101000000000000000,
538   0b0101000000100000000,
539};
540
541static const uint32_t gfx8_datatype_table[32] = {
542   0b001000000000000000001,
543   0b001000000000001000000,
544   0b001000000000001000001,
545   0b001000000000011000001,
546   0b001000000000101011101,
547   0b001000000010111011101,
548   0b001000000011101000001,
549   0b001000000011101000101,
550   0b001000000011101011101,
551   0b001000001000001000001,
552   0b001000011000001000000,
553   0b001000011000001000001,
554   0b001000101000101000101,
555   0b001000111000101000100,
556   0b001000111000101000101,
557   0b001011100011101011101,
558   0b001011101011100011101,
559   0b001011101011101011100,
560   0b001011101011101011101,
561   0b001011111011101011100,
562   0b000000000010000001100,
563   0b001000000000001011101,
564   0b001000000000101000101,
565   0b001000001000001000000,
566   0b001000101000101000100,
567   0b001000111000100000100,
568   0b001001001001000001001,
569   0b001010111011101011101,
570   0b001011111011101011101,
571   0b001001111001101001100,
572   0b001001001001001001000,
573   0b001001011001001001000,
574};
575
576static const uint16_t gfx8_subreg_table[32] = {
577   0b000000000000000,
578   0b000000000000001,
579   0b000000000001000,
580   0b000000000001111,
581   0b000000000010000,
582   0b000000010000000,
583   0b000000100000000,
584   0b000000110000000,
585   0b000001000000000,
586   0b000001000010000,
587   0b000001010000000,
588   0b001000000000000,
589   0b001000000000001,
590   0b001000010000001,
591   0b001000010000010,
592   0b001000010000011,
593   0b001000010000100,
594   0b001000010000111,
595   0b001000010001000,
596   0b001000010001110,
597   0b001000010001111,
598   0b001000110000000,
599   0b001000111101000,
600   0b010000000000000,
601   0b010000110000000,
602   0b011000000000000,
603   0b011110010000111,
604   0b100000000000000,
605   0b101000000000000,
606   0b110000000000000,
607   0b111000000000000,
608   0b111000000011100,
609};
610
611static const uint16_t gfx8_src_index_table[32] = {
612   0b000000000000,
613   0b000000000010,
614   0b000000010000,
615   0b000000010010,
616   0b000000011000,
617   0b000000100000,
618   0b000000101000,
619   0b000001001000,
620   0b000001010000,
621   0b000001110000,
622   0b000001111000,
623   0b001100000000,
624   0b001100000010,
625   0b001100001000,
626   0b001100010000,
627   0b001100010010,
628   0b001100100000,
629   0b001100101000,
630   0b001100111000,
631   0b001101000000,
632   0b001101000010,
633   0b001101001000,
634   0b001101010000,
635   0b001101100000,
636   0b001101101000,
637   0b001101110000,
638   0b001101110001,
639   0b001101111000,
640   0b010001101000,
641   0b010001101001,
642   0b010001101010,
643   0b010110001000,
644};
645
646static const uint32_t gfx11_datatype_table[32] = {
647   0b001000000000000000001,
648   0b001000000000001000000,
649   0b001000000000001000001,
650   0b001000000000011000001,
651   0b001000000000101100101,
652   0b001000000101111100101,
653   0b001000000100101000001,
654   0b001000000100101000101,
655   0b001000000100101100101,
656   0b001000001000001000001,
657   0b001000011000001000000,
658   0b001000011000001000001,
659   0b001000101000101000101,
660   0b001000111000101000100,
661   0b001000111000101000101,
662   0b001100100100101100101,
663   0b001100101100100100101,
664   0b001100101100101100100,
665   0b001100101100101100101,
666   0b001100111100101100100,
667   0b000000000010000001100,
668   0b001000000000001100101,
669   0b001000000000101000101,
670   0b001000001000001000000,
671   0b001000101000101000100,
672   0b001000111000100000100,
673   0b001001001001000001001,
674   0b001101111100101100101,
675   0b001100111100101100101,
676   0b001001111001101001100,
677   0b001001001001001001000,
678   0b001001011001001001000,
679};
680
681static const uint32_t gfx12_control_index_table[32] = {
682   0b000000000000000000100, /* 	       (16|M0)                            */
683   0b000000000000000000011, /* 	       (8|M0)                             */
684   0b000000010000000000000, /* 	(W)    (1|M0)                             */
685   0b000000010000000000100, /* 	(W)    (16|M0)                            */
686   0b000000010000000000011, /* 	(W)    (8|M0)                             */
687   0b010000000000000000100, /* 	       (16|M0)  (ge)f0.0                  */
688   0b000000000000000100100, /* 	       (16|M16)                           */
689   0b010100000000000000100, /* 	       (16|M0)  (lt)f0.0                  */
690   0b000000000000000000000, /* 	       (1|M0)                             */
691   0b000010000000000000100, /* 	       (16|M0)           (sat)            */
692   0b000000000000000010011, /* 	       (8|M8)                             */
693   0b001100000000000000100, /* 	       (16|M0)  (gt)f0.0                  */
694   0b000100000000000000100, /* 	       (16|M0)  (eq)f0.0                  */
695   0b000100010000000000100, /* 	(W)    (16|M0)  (eq)f0.0                  */
696   0b001000000000000000100, /* 	       (16|M0)  (ne)f0.0                  */
697   0b000000000000100000100, /* 	(f0.0) (16|M0)                            */
698   0b010100000000000000011, /* 	       (8|M0)   (lt)f0.0                  */
699   0b000000000000110000100, /* 	(f1.0) (16|M0)                            */
700   0b000000010000000000001, /* 	(W)    (2|M0)                             */
701   0b000000000000101000100, /* 	(f0.1) (16|M0)                            */
702   0b000000000000111000100, /* 	(f1.1) (16|M0)                            */
703   0b010000010000000000100, /* 	(W)    (16|M0)  (ge)f0.0                  */
704   0b000000000000000100011, /* 	       (8|M16)                            */
705   0b000000000000000110011, /* 	       (8|M24)                            */
706   0b010100010000000000100, /* 	(W)    (16|M0)  (lt)f0.0                  */
707   0b010000000000000000011, /* 	       (8|M0)   (ge)f0.0                  */
708   0b000100010000000000000, /* 	(W)    (1|M0)   (eq)f0.0                  */
709   0b000010000000000000011, /* 	       (8|M0)            (sat)            */
710   0b010100000000010000100, /* 	       (16|M0)  (lt)f1.0                  */
711   0b000100000000000000011, /* 	       (8|M0)   (eq)f0.0                  */
712   0b000001000000000000011, /* 	       (8|M0)                   {AccWrEn} */
713   0b000000010000000100100, /* 	(W)    (16|M16)                           */
714};
715
716static const uint32_t gfx12_datatype_table[32] = {
717   0b11010110100101010100, /* grf<1>:f  grf:f  grf:f  */
718   0b00000110100101010100, /* grf<1>:f  grf:f  arf:ub */
719   0b00000010101101010100, /* grf<1>:f  imm:f  arf:ub */
720   0b01010110110101010100, /* grf<1>:f  grf:f  imm:f  */
721   0b11010100100101010100, /* arf<1>:f  grf:f  grf:f  */
722   0b11010010100101010100, /* grf<1>:f  arf:f  grf:f  */
723   0b01010100110101010100, /* arf<1>:f  grf:f  imm:f  */
724   0b00000000100000000000, /* arf<1>:ub arf:ub arf:ub */
725   0b11010000100101010100, /* arf<1>:f  arf:f  grf:f  */
726   0b00101110110011001100, /* grf<1>:d  grf:d  imm:w  */
727   0b10110110100011001100, /* grf<1>:d  grf:d  grf:d  */
728   0b01010010110101010100, /* grf<1>:f  arf:f  imm:f  */
729   0b10010110100001000100, /* grf<1>:ud grf:ud grf:ud */
730   0b01010000110101010100, /* arf<1>:f  arf:f  imm:f  */
731   0b00110110110011001100, /* grf<1>:d  grf:d  imm:d  */
732   0b00010110110001000100, /* grf<1>:ud grf:ud imm:ud */
733   0b00000111000101010100, /* grf<2>:f  grf:f  arf:ub */
734   0b00101100110011001100, /* arf<1>:d  grf:d  imm:w  */
735   0b00000000100000100010, /* arf<1>:uw arf:uw arf:ub */
736   0b00000010100001000100, /* grf<1>:ud arf:ud arf:ub */
737   0b00100110110000101010, /* grf<1>:w  grf:uw imm:uv */
738   0b00001110110000100010, /* grf<1>:uw grf:uw imm:uw */
739   0b10010111000001000100, /* grf<2>:ud grf:ud grf:ud */
740   0b00000110100101001100, /* grf<1>:d  grf:f  arf:ub */
741   0b10001100100011001100, /* arf<1>:d  grf:d  grf:uw */
742   0b00000110100001010100, /* grf<1>:f  grf:ud arf:ub */
743   0b00101110110001001100, /* grf<1>:d  grf:ud imm:w  */
744   0b00000010100000100010, /* grf<1>:uw arf:uw arf:ub */
745   0b00000110100000110100, /* grf<1>:f  grf:uw arf:ub */
746   0b00000110100000010100, /* grf<1>:f  grf:ub arf:ub */
747   0b00000110100011010100, /* grf<1>:f  grf:d  arf:ub */
748   0b00000010100101010100, /* grf<1>:f  arf:f  arf:ub */
749};
750
751static const uint16_t gfx12_subreg_table[32] = {
752   0b000000000000000, /* .0  .0  .0  */
753   0b100000000000000, /* .0  .0  .16 */
754   0b001000000000000, /* .0  .0  .4  */
755   0b011000000000000, /* .0  .0  .12 */
756   0b000000010000000, /* .0  .4  .0  */
757   0b010000000000000, /* .0  .0  .8  */
758   0b101000000000000, /* .0  .0  .20 */
759   0b000000000001000, /* .8  .0  .0  */
760   0b000000100000000, /* .0  .8  .0  */
761   0b110000000000000, /* .0  .0  .24 */
762   0b111000000000000, /* .0  .0  .28 */
763   0b000001000000000, /* .0  .16 .0  */
764   0b000000000000100, /* .4  .0  .0  */
765   0b000001100000000, /* .0  .24 .0  */
766   0b000001010000000, /* .0  .20 .0  */
767   0b000000110000000, /* .0  .12 .0  */
768   0b000001110000000, /* .0  .28 .0  */
769   0b000000000011100, /* .28 .0  .0  */
770   0b000000000010000, /* .16 .0  .0  */
771   0b000000000001100, /* .12 .0  .0  */
772   0b000000000011000, /* .24 .0  .0  */
773   0b000000000010100, /* .20 .0  .0  */
774   0b000000000000010, /* .2  .0  .0  */
775   0b000000101000000, /* .0  .10 .0  */
776   0b000000001000000, /* .0  .2  .0  */
777   0b000000010000100, /* .4  .4  .0  */
778   0b000000001011100, /* .28 .2  .0  */
779   0b000000001000010, /* .2  .2  .0  */
780   0b000000110001100, /* .12 .12 .0  */
781   0b000000000100000, /* .0  .1  .0  */
782   0b000000001100000, /* .0  .3  .0  */
783   0b110001100000000, /* .0  .24 .24 */
784};
785
786static const uint16_t gfx12_src0_index_table[16] = {
787   0b010001100100, /*       r<8;8,1>  */
788   0b000000000000, /*       r<0;1,0>  */
789   0b010001100110, /*      -r<8;8,1>  */
790   0b010001100101, /*  (abs)r<8;8,1>  */
791   0b000000000010, /*      -r<0;1,0>  */
792   0b001000000000, /*       r<2;1,0>  */
793   0b001001000000, /*       r<2;4,0>  */
794   0b001101000000, /*       r<4;4,0>  */
795   0b001000100100, /*       r<2;2,1>  */
796   0b001100000000, /*       r<4;1,0>  */
797   0b001000100110, /*      -r<2;2,1>  */
798   0b001101000100, /*       r<4;4,1>  */
799   0b010001100111, /* -(abs)r<8;8,1>  */
800   0b000100000000, /*       r<1;1,0>  */
801   0b000000000001, /*  (abs)r<0;1,0>  */
802   0b111100010000, /*       r[a]<1,0> */
803};
804
805static const uint16_t gfx12_src1_index_table[16] = {
806   0b000100011001, /*       r<8;8,1> */
807   0b000000000000, /*       r<0;1,0> */
808   0b100100011001, /*      -r<8;8,1> */
809   0b100000000000, /*      -r<0;1,0> */
810   0b010100011001, /*  (abs)r<8;8,1> */
811   0b100011010000, /*      -r<4;4,0> */
812   0b000010000000, /*       r<2;1,0> */
813   0b000010001001, /*       r<2;2,1> */
814   0b100010001001, /*      -r<2;2,1> */
815   0b000011010000, /*       r<4;4,0> */
816   0b000011010001, /*       r<4;4,1> */
817   0b000011000000, /*       r<4;1,0> */
818   0b110100011001, /* -(abs)r<8;8,1> */
819   0b010000000000, /*  (abs)r<0;1,0> */
820   0b110000000000, /* -(abs)r<0;1,0> */
821   0b100011010001, /*      -r<4;4,1> */
822};
823
824static const uint16_t xehp_src0_index_table[16] = {
825   0b000100000000, /*       r<1;1,0>  */
826   0b000000000000, /*       r<0;1,0>  */
827   0b000100000010, /*      -r<1;1,0>  */
828   0b000100000001, /*  (abs)r<1;1,0>  */
829   0b000000000010, /*      -r<0;1,0>  */
830   0b001000000000, /*       r<2;1,0>  */
831   0b001001000000, /*       r<2;4,0>  */
832   0b001101000000, /*       r<4;4,0>  */
833   0b001100000000, /*       r<4;1,0>  */
834   0b000100000011, /* -(abs)r<1;1,0>  */
835   0b000000000001, /*  (abs)r<0;1,0>  */
836   0b111100010000, /*       r[a]<1,0> */
837   0b010001100000, /*       r<8;8,0>  */
838   0b000101000000, /*       r<1;4,0>  */
839   0b010001001000, /*       r<8;4,2>  */
840   0b001000000010, /*      -r<2;1,0>  */
841};
842
843static const uint16_t xehp_src1_index_table[16] = {
844   0b000001000000, /*       r<1;1,0>    */
845   0b000000000000, /*       r<0;1,0>    */
846   0b100001000000, /*      -r<1;1,0>    */
847   0b100000000000, /*      -r<0;1,0>    */
848   0b010001000000, /*  (abs)r<1;1,0>    */
849   0b100011010000, /*      -r<4;4,0>    */
850   0b000010000000, /*       r<2;1,0>    */
851   0b000011010000, /*       r<4;4,0>    */
852   0b000011000000, /*       r<4;1,0>    */
853   0b110001000000, /* -(abs)r<1;1,0>    */
854   0b010000000000, /*  (abs)r<0;1,0>    */
855   0b110000000000, /* -(abs)r<0;1,0>    */
856   0b000100011000, /*       r<8;8,0>    */
857   0b100010000000, /*      -r<2;1,0>    */
858   0b100000001001, /*      -r<0;2,1>    */
859   0b100001000100, /*      -r[a]<1;1,0> */
860};
861
862/* This is actually the control index table for Cherryview (26 bits), but the
863 * only difference from Broadwell (24 bits) is that it has two extra 0-bits at
864 * the start.
865 *
866 * The low 24 bits have the same mappings on both hardware.
867 */
868static const uint32_t gfx8_3src_control_index_table[4] = {
869   0b00100000000110000000000001,
870   0b00000000000110000000000001,
871   0b00000000001000000000000001,
872   0b00000000001000000000100001,
873};
874
875/* This is actually the control index table for Cherryview (49 bits), but the
876 * only difference from Broadwell (46 bits) is that it has three extra 0-bits
877 * at the start.
878 *
879 * The low 44 bits have the same mappings on both hardware, and since the high
880 * three bits on Broadwell are zero, we can reuse Cherryview's table.
881 */
882static const uint64_t gfx8_3src_source_index_table[4] = {
883   0b0000001110010011100100111001000001111000000000000,
884   0b0000001110010011100100111001000001111000000000010,
885   0b0000001110010011100100111001000001111000000001000,
886   0b0000001110010011100100111001000001111000000100000,
887};
888
889static const uint64_t gfx12_3src_control_index_table[32] = {
890   0b000001001010010101000000000000000100, /*      (16|M0)       grf<1>:f   :f  :f  :f */
891   0b000001001010010101000000000000000011, /*      (8|M0)        grf<1>:f   :f  :f  :f */
892   0b000001001000010101000000000000000011, /*      (8|M0)        arf<1>:f   :f  :f  :f */
893   0b000001001010010101000010000000000011, /* (W)  (8|M0)        grf<1>:f   :f  :f  :f */
894   0b000001001000010101000010000000000011, /* (W)  (8|M0)        arf<1>:f   :f  :f  :f */
895   0b000001001000010101000000000000010011, /*      (8|M8)        arf<1>:f   :f  :f  :f */
896   0b000001001010010101000000000000010011, /*      (8|M8)        grf<1>:f   :f  :f  :f */
897   0b000001001000010101000010000000010011, /* (W)  (8|M8)        arf<1>:f   :f  :f  :f */
898   0b000001001010010101000010000000010011, /* (W)  (8|M8)        grf<1>:f   :f  :f  :f */
899   0b000001001010010101000010000000000100, /* (W)  (16|M0)       grf<1>:f   :f  :f  :f */
900   0b000001001000010101000000000000000100, /*      (16|M0)       arf<1>:f   :f  :f  :f */
901   0b000001001010010101010000000000000100, /*      (16|M0)  (sat)grf<1>:f   :f  :f  :f */
902   0b000001001010010101000000000000100100, /*      (16|M16)      grf<1>:f   :f  :f  :f */
903   0b000001001000010101000010000000000100, /* (W)  (16|M0)       arf<1>:f   :f  :f  :f */
904   0b000001001010010101000010000000000000, /* (W)  (1|M0)        grf<1>:f   :f  :f  :f */
905   0b000001001010010101010000000000000011, /*      (8|M0)   (sat)grf<1>:f   :f  :f  :f */
906   0b000001001000010101000010000000110011, /* (W)  (8|M24)       arf<1>:f   :f  :f  :f */
907   0b000001001000010101000010000000100011, /* (W)  (8|M16)       arf<1>:f   :f  :f  :f */
908   0b000001001010010101000010000000110011, /* (W)  (8|M24)       grf<1>:f   :f  :f  :f */
909   0b000001001010010101000010000000100011, /* (W)  (8|M16)       grf<1>:f   :f  :f  :f */
910   0b000001001000010101000000000000100011, /*      (8|M16)       arf<1>:f   :f  :f  :f */
911   0b000001001000010101000000000000110011, /*      (8|M24)       arf<1>:f   :f  :f  :f */
912   0b000001001010010101000000000000100011, /*      (8|M16)       grf<1>:f   :f  :f  :f */
913   0b000001001010010101000000000000110011, /*      (8|M24)       grf<1>:f   :f  :f  :f */
914   0b000001001000010101010000000000000100, /*      (16|M0)  (sat)arf<1>:f   :f  :f  :f */
915   0b000001001010010101010010000000000100, /* (W)  (16|M0)  (sat)grf<1>:f   :f  :f  :f */
916   0b000001001010010101000010000000100100, /* (W)  (16|M16)      grf<1>:f   :f  :f  :f */
917   0b000001001010010001000010000000000000, /* (W)  (1|M0)        grf<1>:ud :ud :ud :ud */
918   0b000001001000010101000000000000100100, /*      (16|M16)      arf<1>:f   :f  :f  :f */
919   0b000001001010010101010000000000100100, /*      (16|M16) (sat)grf<1>:f   :f  :f  :f */
920   0b000001001010010101000010000000000010, /* (W)  (4|M0)        grf<1>:f   :f  :f  :f */
921   0b000001001000010101010000000000000011, /*      (8|M0)   (sat)arf<1>:f   :f  :f  :f */
922};
923
924static const uint64_t xehp_3src_control_index_table[32] = {
925   0b0000010010100010101000000000000000100, /*          (16|M0)       grf<1>:f   :f   :f   :f          */
926   0b0000010010100010101000000000000000011, /*          (8|M0)        grf<1>:f   :f   :f   :f          */
927   0b0000010010000010101000000000000000011, /*          (8|M0)        arf<1>:f   :f   :f   :f          */
928   0b0000010010100010101000010000000000011, /*     (W)  (8|M0)        grf<1>:f   :f   :f   :f          */
929   0b0000010010000010101000010000000000011, /*     (W)  (8|M0)        arf<1>:f   :f   :f   :f          */
930   0b0000010010000010101000000000000010011, /*          (8|M8)        arf<1>:f   :f   :f   :f          */
931   0b0000010010100010101000000000000010011, /*          (8|M8)        grf<1>:f   :f   :f   :f          */
932   0b0000010010000010101000010000000010011, /*     (W)  (8|M8)        arf<1>:f   :f   :f   :f          */
933   0b0000010010100010101000010000000010011, /*     (W)  (8|M8)        grf<1>:f   :f   :f   :f          */
934   0b0000010010100010101000010000000000100, /*     (W)  (16|M0)       grf<1>:f   :f   :f   :f          */
935   0b0000010010000010101000000000000000100, /*          (16|M0)       arf<1>:f   :f   :f   :f          */
936   0b0000010010100010101010000000000000100, /*          (16|M0)  (sat)grf<1>:f   :f   :f   :f          */
937   0b0000010010100010101000000000000100100, /*          (16|M16)      grf<1>:f   :f   :f   :f          */
938   0b0000010010000010101000010000000000100, /*     (W)  (16|M0)       arf<1>:f   :f   :f   :f          */
939   0b0000010010100010101000010000000000000, /*     (W)  (1|M0)        grf<1>:f   :f   :f   :f          */
940   0b0000010010100010101010000000000000011, /*          (8|M0)   (sat)grf<1>:f   :f   :f   :f          */
941   0b0000010010000010101000010000000100011, /*     (W)  (8|M16)       arf<1>:f   :f   :f   :f          */
942   0b0000010010000010101000010000000110011, /*     (W)  (8|M24)       arf<1>:f   :f   :f   :f          */
943   0b0000010010100010101000010000000100011, /*     (W)  (8|M16)       grf<1>:f   :f   :f   :f          */
944   0b0000010010100010101000010000000110011, /*     (W)  (8|M24)       grf<1>:f   :f   :f   :f          */
945   0b0000010010000010101000000000000110011, /*          (8|M24)       arf<1>:f   :f   :f   :f          */
946   0b0000010010000010101000000000000100011, /*          (8|M16)       arf<1>:f   :f   :f   :f          */
947   0b0000000100111110011000000000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d  :ub   :b          */
948   0b0000000000111110011000100000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d  :ub  :ub {Atomic} */
949   0b0000100100111110011000100000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d   :b   :b {Atomic} */
950   0b0000100000111110011000100000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d   :b  :ub {Atomic} */
951   0b0000100100111110011000000000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d   :b   :b          */
952   0b0000000000111110011000000000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d  :ub  :ub          */
953   0b0000000100111110011000100000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d  :ub   :b {Atomic} */
954   0b0000100000111110011000000000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d   :b  :ub          */
955   0b0000101101111010101000100000000000011, /* dpas.8x* (8|M0)        grf<1>:f   :f  :bf  :bf {Atomic} */
956   0b0000101101111010101000000000000000011, /* dpas.8x* (8|M0)        grf<1>:f   :f  :bf  :bf          */
957};
958
959static const uint32_t gfx12_3src_source_index_table[32] = {
960   0b100101100001100000000, /*  grf<0;0>   grf<8;1>  grf<0> */
961   0b100101100001001000010, /*  arf<4;1>   grf<8;1>  grf<0> */
962   0b101101100001101000011, /*  grf<8;1>   grf<8;1>  grf<1> */
963   0b100101100001101000011, /*  grf<8;1>   grf<8;1>  grf<0> */
964   0b101100000000101000011, /*  grf<8;1>   grf<0;0>  grf<1> */
965   0b101101100001101001011, /* -grf<8;1>   grf<8;1>  grf<1> */
966   0b101001100001101000011, /*  grf<8;1>   arf<8;1>  grf<1> */
967   0b100001100001100000000, /*  grf<0;0>   arf<8;1>  grf<0> */
968   0b101101100001100000000, /*  grf<0;0>   grf<8;1>  grf<1> */
969   0b101101100101101000011, /*  grf<8;1>   grf<8;1> -grf<1> */
970   0b101101110001101000011, /*  grf<8;1>  -grf<8;1>  grf<1> */
971   0b101100000000100000000, /*  grf<0;0>   grf<0;0>  grf<1> */
972   0b100001100001101000011, /*  grf<8;1>   arf<8;1>  grf<0> */
973   0b100101110001100000000, /*  grf<0;0>  -grf<8;1>  grf<0> */
974   0b100101110001101000011, /*  grf<8;1>  -grf<8;1>  grf<0> */
975   0b100101100001101001011, /* -grf<8;1>   grf<8;1>  grf<0> */
976   0b100100000000101000011, /*  grf<8;1>   grf<0;0>  grf<0> */
977   0b100101100001100001000, /* -grf<0;0>   grf<8;1>  grf<0> */
978   0b100100000000100000000, /*  grf<0;0>   grf<0;0>  grf<0> */
979   0b101101110001100000000, /*  grf<0;0>  -grf<8;1>  grf<1> */
980   0b100101100101100000000, /*  grf<0;0>   grf<8;1> -grf<0> */
981   0b101001100001100000000, /*  grf<0;0>   arf<8;1>  grf<1> */
982   0b100101100101101000011, /*  grf<8;1>   grf<8;1> -grf<0> */
983   0b101101100101101001011, /* -grf<8;1>   grf<8;1> -grf<1> */
984   0b101001100001101001011, /* -grf<8;1>   arf<8;1>  grf<1> */
985   0b101101110001101001011, /* -grf<8;1>  -grf<8;1>  grf<1> */
986   0b101100010000101000011, /*  grf<8;1>  -grf<0;0>  grf<1> */
987   0b101100000100101000011, /*  grf<8;1>   grf<0;0> -grf<1> */
988   0b101101100001100001000, /* -grf<0;0>   grf<8;1>  grf<1> */
989   0b101101100101100000000, /*  grf<0;0>   grf<8;1> -grf<1> */
990   0b100100000100101000011, /*  grf<8;1>   grf<0;0> -grf<0> */
991   0b101001100101101000011, /*  grf<8;1>   arf<8;1> -grf<1> */
992};
993
994static const uint32_t xehp_3src_source_index_table[32] = {
995   0b100100000001100000000, /*           grf<0;0>   grf<1;0>     grf<0>      */
996   0b100100000001000000001, /*           arf<1;0>   grf<1;0>     grf<0>      */
997   0b101100000001100000001, /*           grf<1;0>   grf<1;0>     grf<1>      */
998   0b100100000001100000001, /*           grf<1;0>   grf<1;0>     grf<0>      */
999   0b101100000000100000001, /*           grf<1;0>   grf<0;0>     grf<1>      */
1000   0b101100000001100001001, /*          -grf<1;0>   grf<1;0>     grf<1>      */
1001   0b101000000001100000001, /*           grf<1;0>   arf<1;0>     grf<1>      */
1002   0b101100000001100000000, /*           grf<0;0>   grf<1;0>     grf<1>      */
1003   0b100000000001100000000, /*           grf<0;0>   arf<1;0>     grf<0>      */
1004   0b101100000101100000001, /*           grf<1;0>   grf<1;0>    -grf<1>      */
1005   0b101100010001100000001, /*           grf<1;0>  -grf<1;0>     grf<1>      */
1006   0b101100000000100000000, /*           grf<0;0>   grf<0;0>     grf<1>      */
1007   0b100000000001100000001, /*           grf<1;0>   arf<1;0>     grf<0>      */
1008   0b100100010001100000000, /*           grf<0;0>  -grf<1;0>     grf<0>      */
1009   0b100100010001100000001, /*           grf<1;0>  -grf<1;0>     grf<0>      */
1010   0b100100000001100001001, /*          -grf<1;0>   grf<1;0>     grf<0>      */
1011   0b100100000000100000001, /*           grf<1;0>   grf<0;0>     grf<0>      */
1012   0b100100000001100001000, /*          -grf<0;0>   grf<1;0>     grf<0>      */
1013   0b100100000000100000000, /*           grf<0;0>   grf<0;0>     grf<0>
1014                             * dpas.*x1  grf:d      grf:[ub,b]   grf:[ub,b]
1015                             * dpas.*x1  grf:f      grf:bf       grf:bf
1016                             */
1017   0b101100010001100000000, /*           grf<0;0>  -grf<1;0>     grf<1>      */
1018   0b100100000101100000000, /*           grf<0;0>   grf<1;0>    -grf<0>      */
1019   0b101000000001100000000, /*           grf<0;0>   arf<1;0>     grf<1>      */
1020   0b100100000101100000001, /*           grf<1;0>   grf<1;0>    -grf<0>      */
1021   0b101100000101100001001, /*          -grf<1;0>   grf<1;0>    -grf<1>      */
1022   0b100100010000100000000, /* dpas.*x1  grf:d      grf:[u2,s2]  grf:[ub,b]  */
1023   0b100100000100100000000, /* dpas.*x1  grf:d      grf:[ub,b]   grf:[u2,s2] */
1024   0b100100010100100000000, /* dpas.*x1  grf:d      grf:[u2,s2]  grf:[u2,s2] */
1025   0b100100001000100000000, /* dpas.*x1  grf:d      grf:[u4,s4]  grf:[ub,b]  */
1026   0b100100001100100000000, /* dpas.*x1  grf:d      grf:[u4,s4]  grf:[u2,s2] */
1027   0b100100000010100000000, /* dpas.*x1  grf:d      grf:[ub,b]   grf:[u4,s4] */
1028   0b100100001010100000000, /* dpas.*x1  grf:d      grf:[u4,s4]  grf:[u4,s4] */
1029   0b100100010010100000000, /* dpas.*x1  grf:d      grf:[u2,s2]  grf:[u4,s4] */
1030};
1031
1032static const uint32_t gfx12_3src_subreg_table[32] = {
1033   0b00000000000000000000, /* .0  .0  .0  .0  */
1034   0b00100000000000000000, /* .0  .0  .0  .4  */
1035   0b00000000000110000000, /* .0  .12 .0  .0  */
1036   0b10100000000000000000, /* .0  .0  .0  .20 */
1037   0b10000000001110000000, /* .0  .28 .0  .16 */
1038   0b01100000000000000000, /* .0  .0  .0  .12 */
1039   0b01000000000000000000, /* .0  .0  .0  .8  */
1040   0b00000010000000000000, /* .0  .0  .8  .0  */
1041   0b00000001000000000000, /* .0  .0  .4  .0  */
1042   0b11000000000000000000, /* .0  .0  .0  .24 */
1043   0b10000000000000000000, /* .0  .0  .0  .16 */
1044   0b11100000000000000000, /* .0  .0  .0  .28 */
1045   0b00000110000000000000, /* .0  .0  .24 .0  */
1046   0b00000000000010000000, /* .0  .4  .0  .0  */
1047   0b00000100000000000000, /* .0  .0  .16 .0  */
1048   0b00000011000000000000, /* .0  .0  .12 .0  */
1049   0b00000101000000000000, /* .0  .0  .20 .0  */
1050   0b00000111000000000000, /* .0  .0  .28 .0  */
1051   0b00000000000100000000, /* .0  .8  .0  .0  */
1052   0b00000000001000000000, /* .0  .16 .0  .0  */
1053   0b00000000001100000000, /* .0  .24 .0  .0  */
1054   0b00000000001010000000, /* .0  .20 .0  .0  */
1055   0b00000000001110000000, /* .0  .28 .0  .0  */
1056   0b11000000001110000000, /* .0  .28 .0  .24 */
1057   0b00100000000100000000, /* .0  .8  .0  .4  */
1058   0b00100000000110000000, /* .0  .12 .0  .4  */
1059   0b01000000000110000000, /* .0  .12 .0  .8  */
1060   0b10000000001100000000, /* .0  .24 .0  .16 */
1061   0b10000000001010000000, /* .0  .20 .0  .16 */
1062   0b01100000000010000000, /* .0  .4  .0  .12 */
1063   0b10100000001110000000, /* .0  .28 .0  .20 */
1064   0b01000000000010000000, /* .0  .4  .0  .8  */
1065};
1066
1067struct compaction_state {
1068   const struct intel_device_info *devinfo;
1069   const uint32_t *control_index_table;
1070   const uint32_t *datatype_table;
1071   const uint16_t *subreg_table;
1072   const uint16_t *src0_index_table;
1073   const uint16_t *src1_index_table;
1074};
1075
1076static void compaction_state_init(struct compaction_state *c,
1077                                  const struct intel_device_info *devinfo);
1078
1079static bool
1080set_control_index(const struct compaction_state *c,
1081                  brw_compact_inst *dst, const brw_inst *src)
1082{
1083   const struct intel_device_info *devinfo = c->devinfo;
1084   uint32_t uncompacted; /* 17b/G45; 19b/IVB+; 21b/TGL+ */
1085
1086   if (devinfo->ver >= 12) {
1087      uncompacted = (brw_inst_bits(src, 95, 92) << 17) | /*  4b */
1088                    (brw_inst_bits(src, 34, 34) << 16) | /*  1b */
1089                    (brw_inst_bits(src, 33, 33) << 15) | /*  1b */
1090                    (brw_inst_bits(src, 32, 32) << 14) | /*  1b */
1091                    (brw_inst_bits(src, 31, 31) << 13) | /*  1b */
1092                    (brw_inst_bits(src, 28, 28) << 12) | /*  1b */
1093                    (brw_inst_bits(src, 27, 24) <<  8) | /*  4b */
1094                    (brw_inst_bits(src, 23, 22) <<  6) | /*  2b */
1095                    (brw_inst_bits(src, 21, 19) <<  3) | /*  3b */
1096                    (brw_inst_bits(src, 18, 16));        /*  3b */
1097   } else if (devinfo->ver >= 8) {
1098      uncompacted = (brw_inst_bits(src, 33, 31) << 16) | /*  3b */
1099                    (brw_inst_bits(src, 23, 12) <<  4) | /* 12b */
1100                    (brw_inst_bits(src, 10,  9) <<  2) | /*  2b */
1101                    (brw_inst_bits(src, 34, 34) <<  1) | /*  1b */
1102                    (brw_inst_bits(src,  8,  8));        /*  1b */
1103   } else {
1104      uncompacted = (brw_inst_bits(src, 31, 31) << 16) | /*  1b */
1105                    (brw_inst_bits(src, 23,  8));        /* 16b */
1106
1107      /* On gfx7, the flag register and subregister numbers are integrated into
1108       * the control index.
1109       */
1110      if (devinfo->ver == 7)
1111         uncompacted |= brw_inst_bits(src, 90, 89) << 17; /* 2b */
1112   }
1113
1114   for (int i = 0; i < 32; i++) {
1115      if (c->control_index_table[i] == uncompacted) {
1116         brw_compact_inst_set_control_index(devinfo, dst, i);
1117	 return true;
1118      }
1119   }
1120
1121   return false;
1122}
1123
1124static bool
1125set_datatype_index(const struct compaction_state *c, brw_compact_inst *dst,
1126                   const brw_inst *src, bool is_immediate)
1127{
1128   const struct intel_device_info *devinfo = c->devinfo;
1129   uint32_t uncompacted; /* 18b/G45+; 21b/BDW+; 20b/TGL+ */
1130
1131   if (devinfo->ver >= 12) {
1132      uncompacted = (brw_inst_bits(src, 91, 88) << 15) | /*  4b */
1133                    (brw_inst_bits(src, 66, 66) << 14) | /*  1b */
1134                    (brw_inst_bits(src, 50, 50) << 13) | /*  1b */
1135                    (brw_inst_bits(src, 49, 48) << 11) | /*  2b */
1136                    (brw_inst_bits(src, 47, 47) << 10) | /*  1b */
1137                    (brw_inst_bits(src, 46, 46) <<  9) | /*  1b */
1138                    (brw_inst_bits(src, 43, 40) <<  5) | /*  4b */
1139                    (brw_inst_bits(src, 39, 36) <<  1) | /*  4b */
1140                    (brw_inst_bits(src, 35, 35));        /*  1b */
1141
1142      /* Src1.RegFile overlaps with the immediate, so ignore it if an immediate
1143       * is present
1144       */
1145      if (!is_immediate) {
1146         uncompacted |= brw_inst_bits(src, 98, 98) << 19; /* 1b */
1147      }
1148   } else if (devinfo->ver >= 8) {
1149      uncompacted = (brw_inst_bits(src, 63, 61) << 18) | /*  3b */
1150                    (brw_inst_bits(src, 94, 89) << 12) | /*  6b */
1151                    (brw_inst_bits(src, 46, 35));        /* 12b */
1152   } else {
1153      uncompacted = (brw_inst_bits(src, 63, 61) << 15) | /*  3b */
1154                    (brw_inst_bits(src, 46, 32));        /* 15b */
1155   }
1156
1157   for (int i = 0; i < 32; i++) {
1158      if (c->datatype_table[i] == uncompacted) {
1159         brw_compact_inst_set_datatype_index(devinfo, dst, i);
1160	 return true;
1161      }
1162   }
1163
1164   return false;
1165}
1166
1167static bool
1168set_subreg_index(const struct compaction_state *c, brw_compact_inst *dst,
1169                 const brw_inst *src, bool is_immediate)
1170{
1171   const struct intel_device_info *devinfo = c->devinfo;
1172   uint16_t uncompacted; /* 15b */
1173
1174   if (devinfo->ver >= 12) {
1175      uncompacted = (brw_inst_bits(src, 55, 51) << 0) |    /* 5b */
1176                    (brw_inst_bits(src, 71, 67) << 5);     /* 5b */
1177
1178      if (!is_immediate)
1179         uncompacted |= brw_inst_bits(src, 103, 99) << 10; /* 5b */
1180   } else {
1181      uncompacted = (brw_inst_bits(src, 52, 48) << 0) |    /* 5b */
1182                    (brw_inst_bits(src, 68, 64) << 5);     /* 5b */
1183
1184      if (!is_immediate)
1185         uncompacted |= brw_inst_bits(src, 100, 96) << 10; /* 5b */
1186   }
1187
1188   for (int i = 0; i < 32; i++) {
1189      if (c->subreg_table[i] == uncompacted) {
1190         brw_compact_inst_set_subreg_index(devinfo, dst, i);
1191	 return true;
1192      }
1193   }
1194
1195   return false;
1196}
1197
1198static bool
1199set_src0_index(const struct compaction_state *c, brw_compact_inst *dst,
1200               const brw_inst *src)
1201{
1202   const struct intel_device_info *devinfo = c->devinfo;
1203   uint16_t uncompacted; /* 12b */
1204   int table_len;
1205
1206   if (devinfo->ver >= 12) {
1207      table_len = ARRAY_SIZE(gfx12_src0_index_table);
1208      uncompacted = (brw_inst_bits(src, 87, 84) << 8) | /*  4b */
1209                    (brw_inst_bits(src, 83, 81) << 5) | /*  3b */
1210                    (brw_inst_bits(src, 80, 80) << 4) | /*  1b */
1211                    (brw_inst_bits(src, 65, 64) << 2) | /*  2b */
1212                    (brw_inst_bits(src, 45, 44));       /*  2b */
1213   } else {
1214      table_len = ARRAY_SIZE(gfx8_src_index_table);
1215      uncompacted = brw_inst_bits(src, 88, 77);         /* 12b */
1216   }
1217
1218   for (int i = 0; i < table_len; i++) {
1219      if (c->src0_index_table[i] == uncompacted) {
1220         brw_compact_inst_set_src0_index(devinfo, dst, i);
1221	 return true;
1222      }
1223   }
1224
1225   return false;
1226}
1227
1228static bool
1229set_src1_index(const struct compaction_state *c, brw_compact_inst *dst,
1230               const brw_inst *src, bool is_immediate, unsigned imm)
1231{
1232   const struct intel_device_info *devinfo = c->devinfo;
1233   if (is_immediate) {
1234      if (devinfo->ver >= 12) {
1235         /* src1 index takes the low 4 bits of the 12-bit compacted value */
1236         brw_compact_inst_set_src1_index(devinfo, dst, imm & 0xf);
1237      } else {
1238         /* src1 index takes the high 5 bits of the 13-bit compacted value */
1239         brw_compact_inst_set_src1_index(devinfo, dst, imm >> 8);
1240      }
1241      return true;
1242   } else {
1243      uint16_t uncompacted; /* 12b */
1244      int table_len;
1245
1246      if (devinfo->ver >= 12) {
1247         table_len = ARRAY_SIZE(gfx12_src0_index_table);
1248         uncompacted = (brw_inst_bits(src, 121, 120) << 10) | /*  2b */
1249                       (brw_inst_bits(src, 119, 116) <<  6) | /*  4b */
1250                       (brw_inst_bits(src, 115, 113) <<  3) | /*  3b */
1251                       (brw_inst_bits(src, 112, 112) <<  2) | /*  1b */
1252                       (brw_inst_bits(src,  97,  96));        /*  2b */
1253      } else {
1254         table_len = ARRAY_SIZE(gfx8_src_index_table);
1255         uncompacted = brw_inst_bits(src, 120, 109);          /* 12b */
1256      }
1257
1258      for (int i = 0; i < table_len; i++) {
1259         if (c->src1_index_table[i] == uncompacted) {
1260            brw_compact_inst_set_src1_index(devinfo, dst, i);
1261            return true;
1262         }
1263      }
1264   }
1265
1266   return false;
1267}
1268
1269static bool
1270set_3src_control_index(const struct intel_device_info *devinfo,
1271                       brw_compact_inst *dst, const brw_inst *src)
1272{
1273   assert(devinfo->ver >= 8);
1274
1275   if (devinfo->verx10 >= 125) {
1276      uint64_t uncompacted =             /* 37b/XeHP+ */
1277         (brw_inst_bits(src, 95, 92) << 33) | /*  4b */
1278         (brw_inst_bits(src, 90, 88) << 30) | /*  3b */
1279         (brw_inst_bits(src, 82, 80) << 27) | /*  3b */
1280         (brw_inst_bits(src, 50, 50) << 26) | /*  1b */
1281         (brw_inst_bits(src, 49, 48) << 24) | /*  2b */
1282         (brw_inst_bits(src, 42, 40) << 21) | /*  3b */
1283         (brw_inst_bits(src, 39, 39) << 20) | /*  1b */
1284         (brw_inst_bits(src, 38, 36) << 17) | /*  3b */
1285         (brw_inst_bits(src, 34, 34) << 16) | /*  1b */
1286         (brw_inst_bits(src, 33, 33) << 15) | /*  1b */
1287         (brw_inst_bits(src, 32, 32) << 14) | /*  1b */
1288         (brw_inst_bits(src, 31, 31) << 13) | /*  1b */
1289         (brw_inst_bits(src, 28, 28) << 12) | /*  1b */
1290         (brw_inst_bits(src, 27, 24) <<  8) | /*  4b */
1291         (brw_inst_bits(src, 23, 23) <<  7) | /*  1b */
1292         (brw_inst_bits(src, 22, 22) <<  6) | /*  1b */
1293         (brw_inst_bits(src, 21, 19) <<  3) | /*  3b */
1294         (brw_inst_bits(src, 18, 16));        /*  3b */
1295
1296      for (unsigned i = 0; i < ARRAY_SIZE(xehp_3src_control_index_table); i++) {
1297         if (xehp_3src_control_index_table[i] == uncompacted) {
1298            brw_compact_inst_set_3src_control_index(devinfo, dst, i);
1299            return true;
1300         }
1301      }
1302   } else if (devinfo->ver >= 12) {
1303      uint64_t uncompacted =             /* 36b/TGL+ */
1304         (brw_inst_bits(src, 95, 92) << 32) | /*  4b */
1305         (brw_inst_bits(src, 90, 88) << 29) | /*  3b */
1306         (brw_inst_bits(src, 82, 80) << 26) | /*  3b */
1307         (brw_inst_bits(src, 50, 50) << 25) | /*  1b */
1308         (brw_inst_bits(src, 48, 48) << 24) | /*  1b */
1309         (brw_inst_bits(src, 42, 40) << 21) | /*  3b */
1310         (brw_inst_bits(src, 39, 39) << 20) | /*  1b */
1311         (brw_inst_bits(src, 38, 36) << 17) | /*  3b */
1312         (brw_inst_bits(src, 34, 34) << 16) | /*  1b */
1313         (brw_inst_bits(src, 33, 33) << 15) | /*  1b */
1314         (brw_inst_bits(src, 32, 32) << 14) | /*  1b */
1315         (brw_inst_bits(src, 31, 31) << 13) | /*  1b */
1316         (brw_inst_bits(src, 28, 28) << 12) | /*  1b */
1317         (brw_inst_bits(src, 27, 24) <<  8) | /*  4b */
1318         (brw_inst_bits(src, 23, 23) <<  7) | /*  1b */
1319         (brw_inst_bits(src, 22, 22) <<  6) | /*  1b */
1320         (brw_inst_bits(src, 21, 19) <<  3) | /*  3b */
1321         (brw_inst_bits(src, 18, 16));        /*  3b */
1322
1323      for (unsigned i = 0; i < ARRAY_SIZE(gfx12_3src_control_index_table); i++) {
1324         if (gfx12_3src_control_index_table[i] == uncompacted) {
1325            brw_compact_inst_set_3src_control_index(devinfo, dst, i);
1326            return true;
1327         }
1328      }
1329   } else {
1330      uint32_t uncompacted = /* 24b/BDW; 26b/CHV/SKL+ */
1331         (brw_inst_bits(src, 34, 32) << 21) |  /*  3b */
1332         (brw_inst_bits(src, 28,  8));         /* 21b */
1333
1334      if (devinfo->ver >= 9 || devinfo->is_cherryview) {
1335         uncompacted |=
1336            brw_inst_bits(src, 36, 35) << 24;  /*  2b */
1337      }
1338
1339      for (unsigned i = 0; i < ARRAY_SIZE(gfx8_3src_control_index_table); i++) {
1340         if (gfx8_3src_control_index_table[i] == uncompacted) {
1341            brw_compact_inst_set_3src_control_index(devinfo, dst, i);
1342            return true;
1343         }
1344      }
1345   }
1346
1347   return false;
1348}
1349
1350static bool
1351set_3src_source_index(const struct intel_device_info *devinfo,
1352                      brw_compact_inst *dst, const brw_inst *src)
1353{
1354   assert(devinfo->ver >= 8);
1355
1356   if (devinfo->ver >= 12) {
1357      uint32_t uncompacted =               /* 21b/TGL+ */
1358         (brw_inst_bits(src, 114, 114) << 20) | /*  1b */
1359         (brw_inst_bits(src, 113, 112) << 18) | /*  2b */
1360         (brw_inst_bits(src,  98,  98) << 17) | /*  1b */
1361         (brw_inst_bits(src,  97,  96) << 15) | /*  2b */
1362         (brw_inst_bits(src,  91,  91) << 14) | /*  1b */
1363         (brw_inst_bits(src,  87,  86) << 12) | /*  2b */
1364         (brw_inst_bits(src,  85,  84) << 10) | /*  2b */
1365         (brw_inst_bits(src,  83,  83) <<  9) | /*  1b */
1366         (brw_inst_bits(src,  66,  66) <<  8) | /*  1b */
1367         (brw_inst_bits(src,  65,  64) <<  6) | /*  2b */
1368         (brw_inst_bits(src,  47,  47) <<  5) | /*  1b */
1369         (brw_inst_bits(src,  46,  46) <<  4) | /*  1b */
1370         (brw_inst_bits(src,  45,  44) <<  2) | /*  2b */
1371         (brw_inst_bits(src,  43,  43) <<  1) | /*  1b */
1372         (brw_inst_bits(src,  35,  35));        /*  1b */
1373
1374      const uint32_t *three_src_source_index_table =
1375         devinfo->verx10 >= 125 ?
1376         xehp_3src_source_index_table : gfx12_3src_source_index_table;
1377      const uint32_t three_src_source_index_table_len =
1378         devinfo->verx10 >= 125 ? ARRAY_SIZE(xehp_3src_source_index_table) :
1379                                  ARRAY_SIZE(gfx12_3src_source_index_table);
1380
1381      for (unsigned i = 0; i < three_src_source_index_table_len; i++) {
1382         if (three_src_source_index_table[i] == uncompacted) {
1383            brw_compact_inst_set_3src_source_index(devinfo, dst, i);
1384            return true;
1385         }
1386      }
1387   } else {
1388      uint64_t uncompacted =    /* 46b/BDW; 49b/CHV/SKL+ */
1389         (brw_inst_bits(src,  83,  83) << 43) |   /*  1b */
1390         (brw_inst_bits(src, 114, 107) << 35) |   /*  8b */
1391         (brw_inst_bits(src,  93,  86) << 27) |   /*  8b */
1392         (brw_inst_bits(src,  72,  65) << 19) |   /*  8b */
1393         (brw_inst_bits(src,  55,  37));          /* 19b */
1394
1395      if (devinfo->ver >= 9 || devinfo->is_cherryview) {
1396         uncompacted |=
1397            (brw_inst_bits(src, 126, 125) << 47) | /* 2b */
1398            (brw_inst_bits(src, 105, 104) << 45) | /* 2b */
1399            (brw_inst_bits(src,  84,  84) << 44);  /* 1b */
1400      } else {
1401         uncompacted |=
1402            (brw_inst_bits(src, 125, 125) << 45) | /* 1b */
1403            (brw_inst_bits(src, 104, 104) << 44);  /* 1b */
1404      }
1405
1406      for (unsigned i = 0; i < ARRAY_SIZE(gfx8_3src_source_index_table); i++) {
1407         if (gfx8_3src_source_index_table[i] == uncompacted) {
1408            brw_compact_inst_set_3src_source_index(devinfo, dst, i);
1409            return true;
1410         }
1411      }
1412   }
1413
1414   return false;
1415}
1416
1417static bool
1418set_3src_subreg_index(const struct intel_device_info *devinfo,
1419                      brw_compact_inst *dst, const brw_inst *src)
1420{
1421   assert(devinfo->ver >= 12);
1422
1423   uint32_t uncompacted =               /* 20b/TGL+ */
1424      (brw_inst_bits(src, 119, 115) << 15) | /*  5b */
1425      (brw_inst_bits(src, 103,  99) << 10) | /*  5b */
1426      (brw_inst_bits(src,  71,  67) <<  5) | /*  5b */
1427      (brw_inst_bits(src,  55,  51));        /*  5b */
1428
1429   for (unsigned i = 0; i < ARRAY_SIZE(gfx12_3src_subreg_table); i++) {
1430      if (gfx12_3src_subreg_table[i] == uncompacted) {
1431         brw_compact_inst_set_3src_subreg_index(devinfo, dst, i);
1432	 return true;
1433      }
1434   }
1435
1436   return false;
1437}
1438
1439static bool
1440has_unmapped_bits(const struct intel_device_info *devinfo, const brw_inst *src)
1441{
1442   /* EOT can only be mapped on a send if the src1 is an immediate */
1443   if ((brw_inst_opcode(devinfo, src) == BRW_OPCODE_SENDC ||
1444        brw_inst_opcode(devinfo, src) == BRW_OPCODE_SEND) &&
1445       brw_inst_eot(devinfo, src))
1446      return true;
1447
1448   /* Check for instruction bits that don't map to any of the fields of the
1449    * compacted instruction.  The instruction cannot be compacted if any of
1450    * them are set.  They overlap with:
1451    *  - NibCtrl (bit 47 on Gfx7, bit 11 on Gfx8)
1452    *  - Dst.AddrImm[9] (bit 47 on Gfx8)
1453    *  - Src0.AddrImm[9] (bit 95 on Gfx8)
1454    *  - Imm64[27:31] (bits 91-95 on Gfx7, bit 95 on Gfx8)
1455    *  - UIP[31] (bit 95 on Gfx8)
1456    */
1457   if (devinfo->ver >= 12) {
1458      assert(!brw_inst_bits(src, 7,  7));
1459      return false;
1460   } else if (devinfo->ver >= 8) {
1461      assert(!brw_inst_bits(src, 7,  7));
1462      return brw_inst_bits(src, 95, 95) ||
1463             brw_inst_bits(src, 47, 47) ||
1464             brw_inst_bits(src, 11, 11);
1465   } else {
1466      assert(!brw_inst_bits(src, 7,  7) &&
1467             !(devinfo->ver < 7 && brw_inst_bits(src, 90, 90)));
1468      return brw_inst_bits(src, 95, 91) ||
1469             brw_inst_bits(src, 47, 47);
1470   }
1471}
1472
1473static bool
1474has_3src_unmapped_bits(const struct intel_device_info *devinfo,
1475                       const brw_inst *src)
1476{
1477   /* Check for three-source instruction bits that don't map to any of the
1478    * fields of the compacted instruction.  All of them seem to be reserved
1479    * bits currently.
1480    */
1481   if (devinfo->ver >= 12) {
1482      assert(!brw_inst_bits(src, 7, 7));
1483   } else if (devinfo->ver >= 9 || devinfo->is_cherryview) {
1484      assert(!brw_inst_bits(src, 127, 127) &&
1485             !brw_inst_bits(src, 7,  7));
1486   } else {
1487      assert(devinfo->ver >= 8);
1488      assert(!brw_inst_bits(src, 127, 126) &&
1489             !brw_inst_bits(src, 105, 105) &&
1490             !brw_inst_bits(src, 84, 84) &&
1491             !brw_inst_bits(src, 7,  7));
1492
1493      /* Src1Type and Src2Type, used for mixed-precision floating point */
1494      if (brw_inst_bits(src, 36, 35))
1495         return true;
1496   }
1497
1498   return false;
1499}
1500
1501static bool
1502brw_try_compact_3src_instruction(const struct intel_device_info *devinfo,
1503                                 brw_compact_inst *dst, const brw_inst *src)
1504{
1505   assert(devinfo->ver >= 8);
1506
1507   if (has_3src_unmapped_bits(devinfo, src))
1508      return false;
1509
1510#define compact(field) \
1511   brw_compact_inst_set_3src_##field(devinfo, dst, brw_inst_3src_##field(devinfo, src))
1512#define compact_a16(field) \
1513   brw_compact_inst_set_3src_##field(devinfo, dst, brw_inst_3src_a16_##field(devinfo, src))
1514
1515   compact(hw_opcode);
1516
1517   if (!set_3src_control_index(devinfo, dst, src))
1518      return false;
1519
1520   if (!set_3src_source_index(devinfo, dst, src))
1521      return false;
1522
1523   if (devinfo->ver >= 12) {
1524      if (!set_3src_subreg_index(devinfo, dst, src))
1525         return false;
1526
1527      compact(swsb);
1528      compact(debug_control);
1529      compact(dst_reg_nr);
1530      compact(src0_reg_nr);
1531      compact(src1_reg_nr);
1532      compact(src2_reg_nr);
1533   } else {
1534      compact(dst_reg_nr);
1535      compact_a16(src0_rep_ctrl);
1536      compact(debug_control);
1537      compact(saturate);
1538      compact_a16(src1_rep_ctrl);
1539      compact_a16(src2_rep_ctrl);
1540      compact(src0_reg_nr);
1541      compact(src1_reg_nr);
1542      compact(src2_reg_nr);
1543      compact_a16(src0_subreg_nr);
1544      compact_a16(src1_subreg_nr);
1545      compact_a16(src2_subreg_nr);
1546   }
1547   brw_compact_inst_set_3src_cmpt_control(devinfo, dst, true);
1548
1549#undef compact
1550#undef compact_a16
1551
1552   return true;
1553}
1554
1555/* On SNB through ICL, compacted instructions have 12-bits for immediate
1556 * sources, and a 13th bit that's replicated through the high 20 bits.
1557 *
1558 * Effectively this means we get 12-bit integers, 0.0f, and some limited uses
1559 * of packed vectors as compactable immediates.
1560 *
1561 * On TGL+, the high 12-bits of floating-point values (:f and :hf) are encoded
1562 * rather than the low 12-bits. For signed integer the 12th bit is replicated,
1563 * while for unsigned integers it is not.
1564 *
1565 * Returns the compacted immediate, or -1 if immediate cannot be compacted
1566 */
1567static int
1568compact_immediate(const struct intel_device_info *devinfo,
1569                  enum brw_reg_type type, unsigned imm)
1570{
1571   if (devinfo->ver >= 12) {
1572      /* 16-bit immediates need to be replicated through the 32-bit immediate
1573       * field
1574       */
1575      switch (type) {
1576      case BRW_REGISTER_TYPE_W:
1577      case BRW_REGISTER_TYPE_UW:
1578      case BRW_REGISTER_TYPE_HF:
1579         if ((imm >> 16) != (imm & 0xffff))
1580            return -1;
1581         break;
1582      default:
1583         break;
1584      }
1585
1586      switch (type) {
1587      case BRW_REGISTER_TYPE_F:
1588         /* We get the high 12-bits as-is; rest must be zero */
1589         if ((imm & 0xfffff) == 0)
1590            return (imm >> 20) & 0xfff;
1591         break;
1592      case BRW_REGISTER_TYPE_HF:
1593         /* We get the high 12-bits as-is; rest must be zero */
1594         if ((imm & 0xf) == 0)
1595            return (imm >> 4) & 0xfff;
1596         break;
1597      case BRW_REGISTER_TYPE_UD:
1598      case BRW_REGISTER_TYPE_VF:
1599      case BRW_REGISTER_TYPE_UV:
1600      case BRW_REGISTER_TYPE_V:
1601         /* We get the low 12-bits as-is; rest must be zero */
1602         if ((imm & 0xfffff000) == 0)
1603            return imm & 0xfff;
1604         break;
1605      case BRW_REGISTER_TYPE_UW:
1606         /* We get the low 12-bits as-is; rest must be zero */
1607         if ((imm & 0xf000) == 0)
1608            return imm & 0xfff;
1609         break;
1610      case BRW_REGISTER_TYPE_D:
1611         /* We get the low 11-bits as-is; 12th is replicated */
1612         if (((int)imm >> 11) == 0 || ((int)imm >> 11) == -1)
1613            return imm & 0xfff;
1614         break;
1615      case BRW_REGISTER_TYPE_W:
1616         /* We get the low 11-bits as-is; 12th is replicated */
1617         if (((short)imm >> 11) == 0 || ((short)imm >> 11) == -1)
1618            return imm & 0xfff;
1619         break;
1620      case BRW_REGISTER_TYPE_NF:
1621      case BRW_REGISTER_TYPE_DF:
1622      case BRW_REGISTER_TYPE_Q:
1623      case BRW_REGISTER_TYPE_UQ:
1624      case BRW_REGISTER_TYPE_B:
1625      case BRW_REGISTER_TYPE_UB:
1626         return -1;
1627      }
1628   } else {
1629      /* We get the low 12 bits as-is; 13th is replicated */
1630      if (((int)imm >> 12) == 0 || ((int)imm >> 12 == -1)) {
1631         return imm & 0x1fff;
1632      }
1633   }
1634
1635   return -1;
1636}
1637
1638static int
1639uncompact_immediate(const struct intel_device_info *devinfo,
1640                    enum brw_reg_type type, unsigned compact_imm)
1641{
1642   if (devinfo->ver >= 12) {
1643      switch (type) {
1644      case BRW_REGISTER_TYPE_F:
1645         return compact_imm << 20;
1646      case BRW_REGISTER_TYPE_HF:
1647         return (compact_imm << 20) | (compact_imm << 4);
1648      case BRW_REGISTER_TYPE_UD:
1649      case BRW_REGISTER_TYPE_VF:
1650      case BRW_REGISTER_TYPE_UV:
1651      case BRW_REGISTER_TYPE_V:
1652         return compact_imm;
1653      case BRW_REGISTER_TYPE_UW:
1654         /* Replicate */
1655         return compact_imm << 16 | compact_imm;
1656      case BRW_REGISTER_TYPE_D:
1657         /* Extend the 12th bit into the high 20 bits */
1658         return (int)(compact_imm << 20) >> 20;
1659      case BRW_REGISTER_TYPE_W:
1660         /* Extend the 12th bit into the high 4 bits and replicate */
1661         return (  (int)(compact_imm << 20) >> 4) |
1662                ((short)(compact_imm <<  4) >> 4);
1663      case BRW_REGISTER_TYPE_NF:
1664      case BRW_REGISTER_TYPE_DF:
1665      case BRW_REGISTER_TYPE_Q:
1666      case BRW_REGISTER_TYPE_UQ:
1667      case BRW_REGISTER_TYPE_B:
1668      case BRW_REGISTER_TYPE_UB:
1669         unreachable("not reached");
1670      }
1671   } else {
1672      /* Replicate the 13th bit into the high 19 bits */
1673      return (int)(compact_imm << 19) >> 19;
1674   }
1675
1676   unreachable("not reached");
1677}
1678
1679static bool
1680has_immediate(const struct intel_device_info *devinfo, const brw_inst *inst,
1681              enum brw_reg_type *type)
1682{
1683   if (brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE) {
1684      *type = brw_inst_src0_type(devinfo, inst);
1685      return *type != INVALID_REG_TYPE;
1686   } else if (brw_inst_src1_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE) {
1687      *type = brw_inst_src1_type(devinfo, inst);
1688      return *type != INVALID_REG_TYPE;
1689   }
1690
1691   return false;
1692}
1693
1694/**
1695 * Applies some small changes to instruction types to increase chances of
1696 * compaction.
1697 */
1698static brw_inst
1699precompact(const struct intel_device_info *devinfo, brw_inst inst)
1700{
1701   if (brw_inst_src0_reg_file(devinfo, &inst) != BRW_IMMEDIATE_VALUE)
1702      return inst;
1703
1704   /* The Bspec's section titled "Non-present Operands" claims that if src0
1705    * is an immediate that src1's type must be the same as that of src0.
1706    *
1707    * The SNB+ DataTypeIndex instruction compaction tables contain mappings
1708    * that do not follow this rule. E.g., from the IVB/HSW table:
1709    *
1710    *  DataTypeIndex   18-Bit Mapping       Mapped Meaning
1711    *        3         001000001011111101   r:f | i:vf | a:ud | <1> | dir |
1712    *
1713    * And from the SNB table:
1714    *
1715    *  DataTypeIndex   18-Bit Mapping       Mapped Meaning
1716    *        8         001000000111101100   a:w | i:w | a:ud | <1> | dir |
1717    *
1718    * Neither of these cause warnings from the simulator when used,
1719    * compacted or otherwise. In fact, all compaction mappings that have an
1720    * immediate in src0 use a:ud for src1.
1721    *
1722    * The GM45 instruction compaction tables do not contain mapped meanings
1723    * so it's not clear whether it has the restriction. We'll assume it was
1724    * lifted on SNB. (FINISHME: decode the GM45 tables and check.)
1725    *
1726    * Don't do any of this for 64-bit immediates, since the src1 fields
1727    * overlap with the immediate and setting them would overwrite the
1728    * immediate we set.
1729    */
1730   if (devinfo->ver >= 6 &&
1731       !(devinfo->is_haswell &&
1732         brw_inst_opcode(devinfo, &inst) == BRW_OPCODE_DIM) &&
1733       !(devinfo->ver >= 8 &&
1734         (brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_DF ||
1735          brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_UQ ||
1736          brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_Q))) {
1737      brw_inst_set_src1_reg_hw_type(devinfo, &inst, 0);
1738   }
1739
1740   /* Compacted instructions only have 12-bits (plus 1 for the other 20)
1741    * for immediate values. Presumably the hardware engineers realized
1742    * that the only useful floating-point value that could be represented
1743    * in this format is 0.0, which can also be represented as a VF-typed
1744    * immediate, so they gave us the previously mentioned mapping on IVB+.
1745    *
1746    * Strangely, we do have a mapping for imm:f in src1, so we don't need
1747    * to do this there.
1748    *
1749    * If we see a 0.0:F, change the type to VF so that it can be compacted.
1750    *
1751    * Compaction of floating-point immediates is improved on Gfx12, thus
1752    * removing the need for this.
1753    */
1754   if (devinfo->ver < 12 &&
1755       brw_inst_imm_ud(devinfo, &inst) == 0x0 &&
1756       brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_F &&
1757       brw_inst_dst_type(devinfo, &inst) == BRW_REGISTER_TYPE_F &&
1758       brw_inst_dst_hstride(devinfo, &inst) == BRW_HORIZONTAL_STRIDE_1) {
1759      enum brw_reg_file file = brw_inst_src0_reg_file(devinfo, &inst);
1760      brw_inst_set_src0_file_type(devinfo, &inst, file, BRW_REGISTER_TYPE_VF);
1761   }
1762
1763   /* There are no mappings for dst:d | i:d, so if the immediate is suitable
1764    * set the types to :UD so the instruction can be compacted.
1765    *
1766    * FINISHME: Use dst:f | imm:f on Gfx12
1767    */
1768   if (devinfo->ver < 12 &&
1769       compact_immediate(devinfo, BRW_REGISTER_TYPE_D,
1770                         brw_inst_imm_ud(devinfo, &inst)) != -1 &&
1771       brw_inst_cond_modifier(devinfo, &inst) == BRW_CONDITIONAL_NONE &&
1772       brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_D &&
1773       brw_inst_dst_type(devinfo, &inst) == BRW_REGISTER_TYPE_D) {
1774      enum brw_reg_file src_file = brw_inst_src0_reg_file(devinfo, &inst);
1775      enum brw_reg_file dst_file = brw_inst_dst_reg_file(devinfo, &inst);
1776
1777      brw_inst_set_src0_file_type(devinfo, &inst, src_file, BRW_REGISTER_TYPE_UD);
1778      brw_inst_set_dst_file_type(devinfo, &inst, dst_file, BRW_REGISTER_TYPE_UD);
1779   }
1780
1781   return inst;
1782}
1783
1784/**
1785 * Tries to compact instruction src into dst.
1786 *
1787 * It doesn't modify dst unless src is compactable, which is relied on by
1788 * brw_compact_instructions().
1789 */
1790static bool
1791try_compact_instruction(const struct compaction_state *c,
1792                        brw_compact_inst *dst, const brw_inst *src)
1793{
1794   const struct intel_device_info *devinfo = c->devinfo;
1795   brw_compact_inst temp;
1796
1797   assert(brw_inst_cmpt_control(devinfo, src) == 0);
1798
1799   if (is_3src(devinfo, brw_inst_opcode(devinfo, src))) {
1800      if (devinfo->ver >= 8) {
1801         memset(&temp, 0, sizeof(temp));
1802         if (brw_try_compact_3src_instruction(devinfo, &temp, src)) {
1803            *dst = temp;
1804            return true;
1805         } else {
1806            return false;
1807         }
1808      } else {
1809         return false;
1810      }
1811   }
1812
1813   enum brw_reg_type type;
1814   bool is_immediate = has_immediate(devinfo, src, &type);
1815
1816   unsigned compacted_imm = 0;
1817
1818   if (is_immediate) {
1819      /* Instructions with immediates cannot be compacted on Gen < 6 */
1820      if (devinfo->ver < 6)
1821         return false;
1822
1823      compacted_imm = compact_immediate(devinfo, type,
1824                                        brw_inst_imm_ud(devinfo, src));
1825      if (compacted_imm == -1)
1826         return false;
1827   }
1828
1829   if (has_unmapped_bits(devinfo, src))
1830      return false;
1831
1832   memset(&temp, 0, sizeof(temp));
1833
1834#define compact(field) \
1835   brw_compact_inst_set_##field(devinfo, &temp, brw_inst_##field(devinfo, src))
1836#define compact_reg(field) \
1837   brw_compact_inst_set_##field##_reg_nr(devinfo, &temp, \
1838                                       brw_inst_##field##_da_reg_nr(devinfo, src))
1839
1840   compact(hw_opcode);
1841   compact(debug_control);
1842
1843   if (!set_control_index(c, &temp, src))
1844      return false;
1845   if (!set_datatype_index(c, &temp, src, is_immediate))
1846      return false;
1847   if (!set_subreg_index(c, &temp, src, is_immediate))
1848      return false;
1849   if (!set_src0_index(c, &temp, src))
1850      return false;
1851   if (!set_src1_index(c, &temp, src, is_immediate, compacted_imm))
1852      return false;
1853
1854   if (devinfo->ver >= 12) {
1855      compact(swsb);
1856      compact_reg(dst);
1857      compact_reg(src0);
1858
1859      if (is_immediate) {
1860         /* src1 reg takes the high 8 bits (of the 12-bit compacted value) */
1861         brw_compact_inst_set_src1_reg_nr(devinfo, &temp, compacted_imm >> 4);
1862      } else {
1863         compact_reg(src1);
1864      }
1865   } else {
1866      if (devinfo->ver >= 6) {
1867         compact(acc_wr_control);
1868      } else {
1869         compact(mask_control_ex);
1870      }
1871
1872      if (devinfo->ver <= 6)
1873         compact(flag_subreg_nr);
1874
1875      compact(cond_modifier);
1876
1877      compact_reg(dst);
1878      compact_reg(src0);
1879
1880      if (is_immediate) {
1881         /* src1 reg takes the low 8 bits (of the 13-bit compacted value) */
1882         brw_compact_inst_set_src1_reg_nr(devinfo, &temp, compacted_imm & 0xff);
1883      } else {
1884         compact_reg(src1);
1885      }
1886   }
1887   brw_compact_inst_set_cmpt_control(devinfo, &temp, true);
1888
1889#undef compact
1890#undef compact_reg
1891
1892   *dst = temp;
1893
1894   return true;
1895}
1896
1897bool
1898brw_try_compact_instruction(const struct intel_device_info *devinfo,
1899                            brw_compact_inst *dst, const brw_inst *src)
1900{
1901   struct compaction_state c;
1902   compaction_state_init(&c, devinfo);
1903   return try_compact_instruction(&c, dst, src);
1904}
1905
1906static void
1907set_uncompacted_control(const struct compaction_state *c, brw_inst *dst,
1908                        brw_compact_inst *src)
1909{
1910   const struct intel_device_info *devinfo = c->devinfo;
1911   uint32_t uncompacted =
1912      c->control_index_table[brw_compact_inst_control_index(devinfo, src)];
1913
1914   if (devinfo->ver >= 12) {
1915      brw_inst_set_bits(dst, 95, 92, (uncompacted >> 17));
1916      brw_inst_set_bits(dst, 34, 34, (uncompacted >> 16) & 0x1);
1917      brw_inst_set_bits(dst, 33, 33, (uncompacted >> 15) & 0x1);
1918      brw_inst_set_bits(dst, 32, 32, (uncompacted >> 14) & 0x1);
1919      brw_inst_set_bits(dst, 31, 31, (uncompacted >> 13) & 0x1);
1920      brw_inst_set_bits(dst, 28, 28, (uncompacted >> 12) & 0x1);
1921      brw_inst_set_bits(dst, 27, 24, (uncompacted >>  8) & 0xf);
1922      brw_inst_set_bits(dst, 23, 22, (uncompacted >>  6) & 0x3);
1923      brw_inst_set_bits(dst, 21, 19, (uncompacted >>  3) & 0x7);
1924      brw_inst_set_bits(dst, 18, 16, (uncompacted >>  0) & 0x7);
1925   } else if (devinfo->ver >= 8) {
1926      brw_inst_set_bits(dst, 33, 31, (uncompacted >> 16));
1927      brw_inst_set_bits(dst, 23, 12, (uncompacted >>  4) & 0xfff);
1928      brw_inst_set_bits(dst, 10,  9, (uncompacted >>  2) & 0x3);
1929      brw_inst_set_bits(dst, 34, 34, (uncompacted >>  1) & 0x1);
1930      brw_inst_set_bits(dst,  8,  8, (uncompacted >>  0) & 0x1);
1931   } else {
1932      brw_inst_set_bits(dst, 31, 31, (uncompacted >> 16) & 0x1);
1933      brw_inst_set_bits(dst, 23,  8, (uncompacted & 0xffff));
1934
1935      if (devinfo->ver == 7)
1936         brw_inst_set_bits(dst, 90, 89, uncompacted >> 17);
1937   }
1938}
1939
1940static void
1941set_uncompacted_datatype(const struct compaction_state *c, brw_inst *dst,
1942                         brw_compact_inst *src)
1943{
1944   const struct intel_device_info *devinfo = c->devinfo;
1945   uint32_t uncompacted =
1946      c->datatype_table[brw_compact_inst_datatype_index(devinfo, src)];
1947
1948   if (devinfo->ver >= 12) {
1949      brw_inst_set_bits(dst, 98, 98, (uncompacted >> 19));
1950      brw_inst_set_bits(dst, 91, 88, (uncompacted >> 15) & 0xf);
1951      brw_inst_set_bits(dst, 66, 66, (uncompacted >> 14) & 0x1);
1952      brw_inst_set_bits(dst, 50, 50, (uncompacted >> 13) & 0x1);
1953      brw_inst_set_bits(dst, 49, 48, (uncompacted >> 11) & 0x3);
1954      brw_inst_set_bits(dst, 47, 47, (uncompacted >> 10) & 0x1);
1955      brw_inst_set_bits(dst, 46, 46, (uncompacted >>  9) & 0x1);
1956      brw_inst_set_bits(dst, 43, 40, (uncompacted >>  5) & 0xf);
1957      brw_inst_set_bits(dst, 39, 36, (uncompacted >>  1) & 0xf);
1958      brw_inst_set_bits(dst, 35, 35, (uncompacted >>  0) & 0x1);
1959   } else if (devinfo->ver >= 8) {
1960      brw_inst_set_bits(dst, 63, 61, (uncompacted >> 18));
1961      brw_inst_set_bits(dst, 94, 89, (uncompacted >> 12) & 0x3f);
1962      brw_inst_set_bits(dst, 46, 35, (uncompacted >>  0) & 0xfff);
1963   } else {
1964      brw_inst_set_bits(dst, 63, 61, (uncompacted >> 15));
1965      brw_inst_set_bits(dst, 46, 32, (uncompacted & 0x7fff));
1966   }
1967}
1968
1969static void
1970set_uncompacted_subreg(const struct compaction_state *c, brw_inst *dst,
1971                       brw_compact_inst *src)
1972{
1973   const struct intel_device_info *devinfo = c->devinfo;
1974   uint16_t uncompacted =
1975      c->subreg_table[brw_compact_inst_subreg_index(devinfo, src)];
1976
1977   if (devinfo->ver >= 12) {
1978      brw_inst_set_bits(dst, 103, 99, (uncompacted >> 10));
1979      brw_inst_set_bits(dst,  71, 67, (uncompacted >>  5) & 0x1f);
1980      brw_inst_set_bits(dst,  55, 51, (uncompacted >>  0) & 0x1f);
1981   } else {
1982      brw_inst_set_bits(dst, 100, 96, (uncompacted >> 10));
1983      brw_inst_set_bits(dst,  68, 64, (uncompacted >>  5) & 0x1f);
1984      brw_inst_set_bits(dst,  52, 48, (uncompacted >>  0) & 0x1f);
1985   }
1986}
1987
1988static void
1989set_uncompacted_src0(const struct compaction_state *c, brw_inst *dst,
1990                     brw_compact_inst *src)
1991{
1992   const struct intel_device_info *devinfo = c->devinfo;
1993   uint32_t compacted = brw_compact_inst_src0_index(devinfo, src);
1994   uint16_t uncompacted = c->src0_index_table[compacted];
1995
1996   if (devinfo->ver >= 12) {
1997      brw_inst_set_bits(dst, 87, 84, (uncompacted >> 8));
1998      brw_inst_set_bits(dst, 83, 81, (uncompacted >> 5) & 0x7);
1999      brw_inst_set_bits(dst, 80, 80, (uncompacted >> 4) & 0x1);
2000      brw_inst_set_bits(dst, 65, 64, (uncompacted >> 2) & 0x3);
2001      brw_inst_set_bits(dst, 45, 44, (uncompacted >> 0) & 0x3);
2002   } else {
2003      brw_inst_set_bits(dst, 88, 77, uncompacted);
2004   }
2005}
2006
2007static void
2008set_uncompacted_src1(const struct compaction_state *c, brw_inst *dst,
2009                     brw_compact_inst *src)
2010{
2011   const struct intel_device_info *devinfo = c->devinfo;
2012   uint16_t uncompacted =
2013      c->src1_index_table[brw_compact_inst_src1_index(devinfo, src)];
2014
2015   if (devinfo->ver >= 12) {
2016      brw_inst_set_bits(dst, 121, 120, (uncompacted >> 10));
2017      brw_inst_set_bits(dst, 119, 116, (uncompacted >>  6) & 0xf);
2018      brw_inst_set_bits(dst, 115, 113, (uncompacted >>  3) & 0x7);
2019      brw_inst_set_bits(dst, 112, 112, (uncompacted >>  2) & 0x1);
2020      brw_inst_set_bits(dst,  97,  96, (uncompacted >>  0) & 0x3);
2021   } else {
2022      brw_inst_set_bits(dst, 120, 109, uncompacted);
2023   }
2024}
2025
2026static void
2027set_uncompacted_3src_control_index(const struct compaction_state *c,
2028                                   brw_inst *dst, brw_compact_inst *src)
2029{
2030   const struct intel_device_info *devinfo = c->devinfo;
2031   assert(devinfo->ver >= 8);
2032
2033   if (devinfo->verx10 >= 125) {
2034      uint64_t compacted = brw_compact_inst_3src_control_index(devinfo, src);
2035      uint64_t uncompacted = xehp_3src_control_index_table[compacted];
2036
2037      brw_inst_set_bits(dst, 95, 92, (uncompacted >> 33));
2038      brw_inst_set_bits(dst, 90, 88, (uncompacted >> 30) & 0x7);
2039      brw_inst_set_bits(dst, 82, 80, (uncompacted >> 27) & 0x7);
2040      brw_inst_set_bits(dst, 50, 50, (uncompacted >> 26) & 0x1);
2041      brw_inst_set_bits(dst, 49, 48, (uncompacted >> 24) & 0x3);
2042      brw_inst_set_bits(dst, 42, 40, (uncompacted >> 21) & 0x7);
2043      brw_inst_set_bits(dst, 39, 39, (uncompacted >> 20) & 0x1);
2044      brw_inst_set_bits(dst, 38, 36, (uncompacted >> 17) & 0x7);
2045      brw_inst_set_bits(dst, 34, 34, (uncompacted >> 16) & 0x1);
2046      brw_inst_set_bits(dst, 33, 33, (uncompacted >> 15) & 0x1);
2047      brw_inst_set_bits(dst, 32, 32, (uncompacted >> 14) & 0x1);
2048      brw_inst_set_bits(dst, 31, 31, (uncompacted >> 13) & 0x1);
2049      brw_inst_set_bits(dst, 28, 28, (uncompacted >> 12) & 0x1);
2050      brw_inst_set_bits(dst, 27, 24, (uncompacted >>  8) & 0xf);
2051      brw_inst_set_bits(dst, 23, 23, (uncompacted >>  7) & 0x1);
2052      brw_inst_set_bits(dst, 22, 22, (uncompacted >>  6) & 0x1);
2053      brw_inst_set_bits(dst, 21, 19, (uncompacted >>  3) & 0x7);
2054      brw_inst_set_bits(dst, 18, 16, (uncompacted >>  0) & 0x7);
2055
2056   } else if (devinfo->ver >= 12) {
2057      uint64_t compacted = brw_compact_inst_3src_control_index(devinfo, src);
2058      uint64_t uncompacted = gfx12_3src_control_index_table[compacted];
2059
2060      brw_inst_set_bits(dst, 95, 92, (uncompacted >> 32));
2061      brw_inst_set_bits(dst, 90, 88, (uncompacted >> 29) & 0x7);
2062      brw_inst_set_bits(dst, 82, 80, (uncompacted >> 26) & 0x7);
2063      brw_inst_set_bits(dst, 50, 50, (uncompacted >> 25) & 0x1);
2064      brw_inst_set_bits(dst, 48, 48, (uncompacted >> 24) & 0x1);
2065      brw_inst_set_bits(dst, 42, 40, (uncompacted >> 21) & 0x7);
2066      brw_inst_set_bits(dst, 39, 39, (uncompacted >> 20) & 0x1);
2067      brw_inst_set_bits(dst, 38, 36, (uncompacted >> 17) & 0x7);
2068      brw_inst_set_bits(dst, 34, 34, (uncompacted >> 16) & 0x1);
2069      brw_inst_set_bits(dst, 33, 33, (uncompacted >> 15) & 0x1);
2070      brw_inst_set_bits(dst, 32, 32, (uncompacted >> 14) & 0x1);
2071      brw_inst_set_bits(dst, 31, 31, (uncompacted >> 13) & 0x1);
2072      brw_inst_set_bits(dst, 28, 28, (uncompacted >> 12) & 0x1);
2073      brw_inst_set_bits(dst, 27, 24, (uncompacted >>  8) & 0xf);
2074      brw_inst_set_bits(dst, 23, 23, (uncompacted >>  7) & 0x1);
2075      brw_inst_set_bits(dst, 22, 22, (uncompacted >>  6) & 0x1);
2076      brw_inst_set_bits(dst, 21, 19, (uncompacted >>  3) & 0x7);
2077      brw_inst_set_bits(dst, 18, 16, (uncompacted >>  0) & 0x7);
2078   } else {
2079      uint32_t compacted = brw_compact_inst_3src_control_index(devinfo, src);
2080      uint32_t uncompacted = gfx8_3src_control_index_table[compacted];
2081
2082      brw_inst_set_bits(dst, 34, 32, (uncompacted >> 21) & 0x7);
2083      brw_inst_set_bits(dst, 28,  8, (uncompacted >>  0) & 0x1fffff);
2084
2085      if (devinfo->ver >= 9 || devinfo->is_cherryview)
2086         brw_inst_set_bits(dst, 36, 35, (uncompacted >> 24) & 0x3);
2087   }
2088}
2089
2090static void
2091set_uncompacted_3src_source_index(const struct intel_device_info *devinfo,
2092                                  brw_inst *dst, brw_compact_inst *src)
2093{
2094   assert(devinfo->ver >= 8);
2095
2096   uint32_t compacted = brw_compact_inst_3src_source_index(devinfo, src);
2097
2098   if (devinfo->ver >= 12) {
2099      const uint32_t *three_src_source_index_table =
2100         devinfo->verx10 >= 125 ?
2101         xehp_3src_source_index_table : gfx12_3src_source_index_table;
2102      uint32_t uncompacted = three_src_source_index_table[compacted];
2103
2104      brw_inst_set_bits(dst, 114, 114, (uncompacted >> 20));
2105      brw_inst_set_bits(dst, 113, 112, (uncompacted >> 18) & 0x3);
2106      brw_inst_set_bits(dst,  98,  98, (uncompacted >> 17) & 0x1);
2107      brw_inst_set_bits(dst,  97,  96, (uncompacted >> 15) & 0x3);
2108      brw_inst_set_bits(dst,  91,  91, (uncompacted >> 14) & 0x1);
2109      brw_inst_set_bits(dst,  87,  86, (uncompacted >> 12) & 0x3);
2110      brw_inst_set_bits(dst,  85,  84, (uncompacted >> 10) & 0x3);
2111      brw_inst_set_bits(dst,  83,  83, (uncompacted >>  9) & 0x1);
2112      brw_inst_set_bits(dst,  66,  66, (uncompacted >>  8) & 0x1);
2113      brw_inst_set_bits(dst,  65,  64, (uncompacted >>  6) & 0x3);
2114      brw_inst_set_bits(dst,  47,  47, (uncompacted >>  5) & 0x1);
2115      brw_inst_set_bits(dst,  46,  46, (uncompacted >>  4) & 0x1);
2116      brw_inst_set_bits(dst,  45,  44, (uncompacted >>  2) & 0x3);
2117      brw_inst_set_bits(dst,  43,  43, (uncompacted >>  1) & 0x1);
2118      brw_inst_set_bits(dst,  35,  35, (uncompacted >>  0) & 0x1);
2119   } else {
2120      uint64_t uncompacted = gfx8_3src_source_index_table[compacted];
2121
2122      brw_inst_set_bits(dst,  83,  83, (uncompacted >> 43) & 0x1);
2123      brw_inst_set_bits(dst, 114, 107, (uncompacted >> 35) & 0xff);
2124      brw_inst_set_bits(dst,  93,  86, (uncompacted >> 27) & 0xff);
2125      brw_inst_set_bits(dst,  72,  65, (uncompacted >> 19) & 0xff);
2126      brw_inst_set_bits(dst,  55,  37, (uncompacted >>  0) & 0x7ffff);
2127
2128      if (devinfo->ver >= 9 || devinfo->is_cherryview) {
2129         brw_inst_set_bits(dst, 126, 125, (uncompacted >> 47) & 0x3);
2130         brw_inst_set_bits(dst, 105, 104, (uncompacted >> 45) & 0x3);
2131         brw_inst_set_bits(dst,  84,  84, (uncompacted >> 44) & 0x1);
2132      } else {
2133         brw_inst_set_bits(dst, 125, 125, (uncompacted >> 45) & 0x1);
2134         brw_inst_set_bits(dst, 104, 104, (uncompacted >> 44) & 0x1);
2135      }
2136   }
2137}
2138
2139static void
2140set_uncompacted_3src_subreg_index(const struct intel_device_info *devinfo,
2141                                  brw_inst *dst, brw_compact_inst *src)
2142{
2143   assert(devinfo->ver >= 12);
2144
2145   uint32_t compacted = brw_compact_inst_3src_subreg_index(devinfo, src);
2146   uint32_t uncompacted = gfx12_3src_subreg_table[compacted];
2147
2148   brw_inst_set_bits(dst, 119, 115, (uncompacted >> 15));
2149   brw_inst_set_bits(dst, 103,  99, (uncompacted >> 10) & 0x1f);
2150   brw_inst_set_bits(dst,  71,  67, (uncompacted >>  5) & 0x1f);
2151   brw_inst_set_bits(dst,  55,  51, (uncompacted >>  0) & 0x1f);
2152}
2153
2154static void
2155brw_uncompact_3src_instruction(const struct compaction_state *c,
2156                               brw_inst *dst, brw_compact_inst *src)
2157{
2158   const struct intel_device_info *devinfo = c->devinfo;
2159   assert(devinfo->ver >= 8);
2160
2161#define uncompact(field) \
2162   brw_inst_set_3src_##field(devinfo, dst, brw_compact_inst_3src_##field(devinfo, src))
2163#define uncompact_a16(field) \
2164   brw_inst_set_3src_a16_##field(devinfo, dst, brw_compact_inst_3src_##field(devinfo, src))
2165
2166   uncompact(hw_opcode);
2167
2168   if (devinfo->ver >= 12) {
2169      set_uncompacted_3src_control_index(c, dst, src);
2170      set_uncompacted_3src_source_index(devinfo, dst, src);
2171      set_uncompacted_3src_subreg_index(devinfo, dst, src);
2172
2173      uncompact(debug_control);
2174      uncompact(swsb);
2175      uncompact(dst_reg_nr);
2176      uncompact(src0_reg_nr);
2177      uncompact(src1_reg_nr);
2178      uncompact(src2_reg_nr);
2179   } else {
2180      set_uncompacted_3src_control_index(c, dst, src);
2181      set_uncompacted_3src_source_index(devinfo, dst, src);
2182
2183      uncompact(dst_reg_nr);
2184      uncompact_a16(src0_rep_ctrl);
2185      uncompact(debug_control);
2186      uncompact(saturate);
2187      uncompact_a16(src1_rep_ctrl);
2188      uncompact_a16(src2_rep_ctrl);
2189      uncompact(src0_reg_nr);
2190      uncompact(src1_reg_nr);
2191      uncompact(src2_reg_nr);
2192      uncompact_a16(src0_subreg_nr);
2193      uncompact_a16(src1_subreg_nr);
2194      uncompact_a16(src2_subreg_nr);
2195   }
2196   brw_inst_set_3src_cmpt_control(devinfo, dst, false);
2197
2198#undef uncompact
2199#undef uncompact_a16
2200}
2201
2202static void
2203uncompact_instruction(const struct compaction_state *c, brw_inst *dst,
2204                      brw_compact_inst *src)
2205{
2206   const struct intel_device_info *devinfo = c->devinfo;
2207   memset(dst, 0, sizeof(*dst));
2208
2209   if (devinfo->ver >= 8 &&
2210       is_3src(devinfo, brw_opcode_decode(
2211                  devinfo, brw_compact_inst_3src_hw_opcode(devinfo, src)))) {
2212      brw_uncompact_3src_instruction(c, dst, src);
2213      return;
2214   }
2215
2216#define uncompact(field) \
2217   brw_inst_set_##field(devinfo, dst, brw_compact_inst_##field(devinfo, src))
2218#define uncompact_reg(field) \
2219   brw_inst_set_##field##_da_reg_nr(devinfo, dst, \
2220                                    brw_compact_inst_##field##_reg_nr(devinfo, src))
2221
2222   uncompact(hw_opcode);
2223   uncompact(debug_control);
2224
2225   set_uncompacted_control(c, dst, src);
2226   set_uncompacted_datatype(c, dst, src);
2227   set_uncompacted_subreg(c, dst, src);
2228   set_uncompacted_src0(c, dst, src);
2229
2230   enum brw_reg_type type;
2231   if (has_immediate(devinfo, dst, &type)) {
2232      unsigned imm = uncompact_immediate(devinfo, type,
2233                                         brw_compact_inst_imm(devinfo, src));
2234      brw_inst_set_imm_ud(devinfo, dst, imm);
2235   } else {
2236      set_uncompacted_src1(c, dst, src);
2237      uncompact_reg(src1);
2238   }
2239
2240   if (devinfo->ver >= 12) {
2241      uncompact(swsb);
2242      uncompact_reg(dst);
2243      uncompact_reg(src0);
2244   } else {
2245      if (devinfo->ver >= 6) {
2246         uncompact(acc_wr_control);
2247      } else {
2248         uncompact(mask_control_ex);
2249      }
2250
2251      uncompact(cond_modifier);
2252
2253      if (devinfo->ver <= 6)
2254         uncompact(flag_subreg_nr);
2255
2256      uncompact_reg(dst);
2257      uncompact_reg(src0);
2258   }
2259   brw_inst_set_cmpt_control(devinfo, dst, false);
2260
2261#undef uncompact
2262#undef uncompact_reg
2263}
2264
2265void
2266brw_uncompact_instruction(const struct intel_device_info *devinfo,
2267                          brw_inst *dst, brw_compact_inst *src)
2268{
2269   struct compaction_state c;
2270   compaction_state_init(&c, devinfo);
2271   uncompact_instruction(&c, dst, src);
2272}
2273
2274void brw_debug_compact_uncompact(const struct intel_device_info *devinfo,
2275                                 brw_inst *orig,
2276                                 brw_inst *uncompacted)
2277{
2278   fprintf(stderr, "Instruction compact/uncompact changed (gen%d):\n",
2279           devinfo->ver);
2280
2281   fprintf(stderr, "  before: ");
2282   brw_disassemble_inst(stderr, devinfo, orig, true, 0, NULL);
2283
2284   fprintf(stderr, "  after:  ");
2285   brw_disassemble_inst(stderr, devinfo, uncompacted, false, 0, NULL);
2286
2287   uint32_t *before_bits = (uint32_t *)orig;
2288   uint32_t *after_bits = (uint32_t *)uncompacted;
2289   fprintf(stderr, "  changed bits:\n");
2290   for (int i = 0; i < 128; i++) {
2291      uint32_t before = before_bits[i / 32] & (1 << (i & 31));
2292      uint32_t after = after_bits[i / 32] & (1 << (i & 31));
2293
2294      if (before != after) {
2295         fprintf(stderr, "  bit %d, %s to %s\n", i,
2296                 before ? "set" : "unset",
2297                 after ? "set" : "unset");
2298      }
2299   }
2300}
2301
2302static int
2303compacted_between(int old_ip, int old_target_ip, int *compacted_counts)
2304{
2305   int this_compacted_count = compacted_counts[old_ip];
2306   int target_compacted_count = compacted_counts[old_target_ip];
2307   return target_compacted_count - this_compacted_count;
2308}
2309
2310static void
2311update_uip_jip(const struct intel_device_info *devinfo, brw_inst *insn,
2312               int this_old_ip, int *compacted_counts)
2313{
2314   /* JIP and UIP are in units of:
2315    *    - bytes on Gfx8+; and
2316    *    - compacted instructions on Gfx6+.
2317    */
2318   int shift = devinfo->ver >= 8 ? 3 : 0;
2319
2320   int32_t jip_compacted = brw_inst_jip(devinfo, insn) >> shift;
2321   jip_compacted -= compacted_between(this_old_ip,
2322                                      this_old_ip + (jip_compacted / 2),
2323                                      compacted_counts);
2324   brw_inst_set_jip(devinfo, insn, jip_compacted << shift);
2325
2326   if (brw_inst_opcode(devinfo, insn) == BRW_OPCODE_ENDIF ||
2327       brw_inst_opcode(devinfo, insn) == BRW_OPCODE_WHILE ||
2328       (brw_inst_opcode(devinfo, insn) == BRW_OPCODE_ELSE && devinfo->ver <= 7))
2329      return;
2330
2331   int32_t uip_compacted = brw_inst_uip(devinfo, insn) >> shift;
2332   uip_compacted -= compacted_between(this_old_ip,
2333                                      this_old_ip + (uip_compacted / 2),
2334                                      compacted_counts);
2335   brw_inst_set_uip(devinfo, insn, uip_compacted << shift);
2336}
2337
2338static void
2339update_gfx4_jump_count(const struct intel_device_info *devinfo, brw_inst *insn,
2340                       int this_old_ip, int *compacted_counts)
2341{
2342   assert(devinfo->ver == 5 || devinfo->is_g4x);
2343
2344   /* Jump Count is in units of:
2345    *    - uncompacted instructions on G45; and
2346    *    - compacted instructions on Gfx5.
2347    */
2348   int shift = devinfo->is_g4x ? 1 : 0;
2349
2350   int jump_count_compacted = brw_inst_gfx4_jump_count(devinfo, insn) << shift;
2351
2352   int target_old_ip = this_old_ip + (jump_count_compacted / 2);
2353
2354   int this_compacted_count = compacted_counts[this_old_ip];
2355   int target_compacted_count = compacted_counts[target_old_ip];
2356
2357   jump_count_compacted -= (target_compacted_count - this_compacted_count);
2358   brw_inst_set_gfx4_jump_count(devinfo, insn, jump_count_compacted >> shift);
2359}
2360
2361static void
2362compaction_state_init(struct compaction_state *c,
2363                      const struct intel_device_info *devinfo)
2364{
2365   assert(g45_control_index_table[ARRAY_SIZE(g45_control_index_table) - 1] != 0);
2366   assert(g45_datatype_table[ARRAY_SIZE(g45_datatype_table) - 1] != 0);
2367   assert(g45_subreg_table[ARRAY_SIZE(g45_subreg_table) - 1] != 0);
2368   assert(g45_src_index_table[ARRAY_SIZE(g45_src_index_table) - 1] != 0);
2369   assert(gfx6_control_index_table[ARRAY_SIZE(gfx6_control_index_table) - 1] != 0);
2370   assert(gfx6_datatype_table[ARRAY_SIZE(gfx6_datatype_table) - 1] != 0);
2371   assert(gfx6_subreg_table[ARRAY_SIZE(gfx6_subreg_table) - 1] != 0);
2372   assert(gfx6_src_index_table[ARRAY_SIZE(gfx6_src_index_table) - 1] != 0);
2373   assert(gfx7_control_index_table[ARRAY_SIZE(gfx7_control_index_table) - 1] != 0);
2374   assert(gfx7_datatype_table[ARRAY_SIZE(gfx7_datatype_table) - 1] != 0);
2375   assert(gfx7_subreg_table[ARRAY_SIZE(gfx7_subreg_table) - 1] != 0);
2376   assert(gfx7_src_index_table[ARRAY_SIZE(gfx7_src_index_table) - 1] != 0);
2377   assert(gfx8_control_index_table[ARRAY_SIZE(gfx8_control_index_table) - 1] != 0);
2378   assert(gfx8_datatype_table[ARRAY_SIZE(gfx8_datatype_table) - 1] != 0);
2379   assert(gfx8_subreg_table[ARRAY_SIZE(gfx8_subreg_table) - 1] != 0);
2380   assert(gfx8_src_index_table[ARRAY_SIZE(gfx8_src_index_table) - 1] != 0);
2381   assert(gfx11_datatype_table[ARRAY_SIZE(gfx11_datatype_table) - 1] != 0);
2382   assert(gfx12_control_index_table[ARRAY_SIZE(gfx12_control_index_table) - 1] != 0);
2383   assert(gfx12_datatype_table[ARRAY_SIZE(gfx12_datatype_table) - 1] != 0);
2384   assert(gfx12_subreg_table[ARRAY_SIZE(gfx12_subreg_table) - 1] != 0);
2385   assert(gfx12_src0_index_table[ARRAY_SIZE(gfx12_src0_index_table) - 1] != 0);
2386   assert(gfx12_src1_index_table[ARRAY_SIZE(gfx12_src1_index_table) - 1] != 0);
2387   assert(xehp_src0_index_table[ARRAY_SIZE(xehp_src0_index_table) - 1] != 0);
2388   assert(xehp_src1_index_table[ARRAY_SIZE(xehp_src1_index_table) - 1] != 0);
2389
2390   c->devinfo = devinfo;
2391   switch (devinfo->ver) {
2392   case 12:
2393      c->control_index_table = gfx12_control_index_table;;
2394      c->datatype_table = gfx12_datatype_table;
2395      c->subreg_table = gfx12_subreg_table;
2396      if (devinfo->verx10 >= 125) {
2397         c->src0_index_table = xehp_src0_index_table;
2398         c->src1_index_table = xehp_src1_index_table;
2399      } else {
2400         c->src0_index_table = gfx12_src0_index_table;
2401         c->src1_index_table = gfx12_src1_index_table;
2402      }
2403      break;
2404   case 11:
2405      c->control_index_table = gfx8_control_index_table;
2406      c->datatype_table = gfx11_datatype_table;
2407      c->subreg_table = gfx8_subreg_table;
2408      c->src0_index_table = gfx8_src_index_table;
2409      c->src1_index_table = gfx8_src_index_table;
2410      break;
2411   case 9:
2412   case 8:
2413      c->control_index_table = gfx8_control_index_table;
2414      c->datatype_table = gfx8_datatype_table;
2415      c->subreg_table = gfx8_subreg_table;
2416      c->src0_index_table = gfx8_src_index_table;
2417      c->src1_index_table = gfx8_src_index_table;
2418      break;
2419   case 7:
2420      c->control_index_table = gfx7_control_index_table;
2421      c->datatype_table = gfx7_datatype_table;
2422      c->subreg_table = gfx7_subreg_table;
2423      c->src0_index_table = gfx7_src_index_table;
2424      c->src1_index_table = gfx7_src_index_table;
2425      break;
2426   case 6:
2427      c->control_index_table = gfx6_control_index_table;
2428      c->datatype_table = gfx6_datatype_table;
2429      c->subreg_table = gfx6_subreg_table;
2430      c->src0_index_table = gfx6_src_index_table;
2431      c->src1_index_table = gfx6_src_index_table;
2432      break;
2433   case 5:
2434   case 4:
2435      c->control_index_table = g45_control_index_table;
2436      c->datatype_table = g45_datatype_table;
2437      c->subreg_table = g45_subreg_table;
2438      c->src0_index_table = g45_src_index_table;
2439      c->src1_index_table = g45_src_index_table;
2440      break;
2441   default:
2442      unreachable("unknown generation");
2443   }
2444}
2445
2446void
2447brw_compact_instructions(struct brw_codegen *p, int start_offset,
2448                         struct disasm_info *disasm)
2449{
2450   if (INTEL_DEBUG(DEBUG_NO_COMPACTION))
2451      return;
2452
2453   const struct intel_device_info *devinfo = p->devinfo;
2454   void *store = p->store + start_offset / 16;
2455   /* For an instruction at byte offset 16*i before compaction, this is the
2456    * number of compacted instructions minus the number of padding NOP/NENOPs
2457    * that preceded it.
2458    */
2459   int compacted_counts[(p->next_insn_offset - start_offset) / sizeof(brw_inst)];
2460   /* For an instruction at byte offset 8*i after compaction, this was its IP
2461    * (in 16-byte units) before compaction.
2462    */
2463   int old_ip[(p->next_insn_offset - start_offset) / sizeof(brw_compact_inst) + 1];
2464
2465   if (devinfo->ver == 4 && !devinfo->is_g4x)
2466      return;
2467
2468   struct compaction_state c;
2469   compaction_state_init(&c, devinfo);
2470
2471   int offset = 0;
2472   int compacted_count = 0;
2473   for (int src_offset = 0; src_offset < p->next_insn_offset - start_offset;
2474        src_offset += sizeof(brw_inst)) {
2475      brw_inst *src = store + src_offset;
2476      void *dst = store + offset;
2477
2478      old_ip[offset / sizeof(brw_compact_inst)] = src_offset / sizeof(brw_inst);
2479      compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count;
2480
2481      brw_inst inst = precompact(devinfo, *src);
2482      brw_inst saved = inst;
2483
2484      if (try_compact_instruction(&c, dst, &inst)) {
2485         compacted_count++;
2486
2487         if (INTEL_DEBUG(DEBUG_ANY)) {
2488            brw_inst uncompacted;
2489            uncompact_instruction(&c, &uncompacted, dst);
2490            if (memcmp(&saved, &uncompacted, sizeof(uncompacted))) {
2491               brw_debug_compact_uncompact(devinfo, &saved, &uncompacted);
2492            }
2493         }
2494
2495         offset += sizeof(brw_compact_inst);
2496      } else {
2497         /* All uncompacted instructions need to be aligned on G45. */
2498         if ((offset & sizeof(brw_compact_inst)) != 0 && devinfo->is_g4x){
2499            brw_compact_inst *align = store + offset;
2500            memset(align, 0, sizeof(*align));
2501            brw_compact_inst_set_hw_opcode(
2502               devinfo, align, brw_opcode_encode(devinfo, BRW_OPCODE_NENOP));
2503            brw_compact_inst_set_cmpt_control(devinfo, align, true);
2504            offset += sizeof(brw_compact_inst);
2505            compacted_count--;
2506            compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count;
2507            old_ip[offset / sizeof(brw_compact_inst)] = src_offset / sizeof(brw_inst);
2508
2509            dst = store + offset;
2510         }
2511
2512         /* If we didn't compact this intruction, we need to move it down into
2513          * place.
2514          */
2515         if (offset != src_offset) {
2516            memmove(dst, src, sizeof(brw_inst));
2517         }
2518         offset += sizeof(brw_inst);
2519      }
2520   }
2521
2522   /* Add an entry for the ending offset of the program. This greatly
2523    * simplifies the linked list walk at the end of the function.
2524    */
2525   old_ip[offset / sizeof(brw_compact_inst)] =
2526      (p->next_insn_offset - start_offset) / sizeof(brw_inst);
2527
2528   /* Fix up control flow offsets. */
2529   p->next_insn_offset = start_offset + offset;
2530   for (offset = 0; offset < p->next_insn_offset - start_offset;
2531        offset = next_offset(devinfo, store, offset)) {
2532      brw_inst *insn = store + offset;
2533      int this_old_ip = old_ip[offset / sizeof(brw_compact_inst)];
2534      int this_compacted_count = compacted_counts[this_old_ip];
2535
2536      switch (brw_inst_opcode(devinfo, insn)) {
2537      case BRW_OPCODE_BREAK:
2538      case BRW_OPCODE_CONTINUE:
2539      case BRW_OPCODE_HALT:
2540         if (devinfo->ver >= 6) {
2541            update_uip_jip(devinfo, insn, this_old_ip, compacted_counts);
2542         } else {
2543            update_gfx4_jump_count(devinfo, insn, this_old_ip,
2544                                   compacted_counts);
2545         }
2546         break;
2547
2548      case BRW_OPCODE_IF:
2549      case BRW_OPCODE_IFF:
2550      case BRW_OPCODE_ELSE:
2551      case BRW_OPCODE_ENDIF:
2552      case BRW_OPCODE_WHILE:
2553         if (devinfo->ver >= 7) {
2554            if (brw_inst_cmpt_control(devinfo, insn)) {
2555               brw_inst uncompacted;
2556               uncompact_instruction(&c, &uncompacted,
2557                                     (brw_compact_inst *)insn);
2558
2559               update_uip_jip(devinfo, &uncompacted, this_old_ip,
2560                              compacted_counts);
2561
2562               bool ret = try_compact_instruction(&c, (brw_compact_inst *)insn,
2563                                                  &uncompacted);
2564               assert(ret); (void)ret;
2565            } else {
2566               update_uip_jip(devinfo, insn, this_old_ip, compacted_counts);
2567            }
2568         } else if (devinfo->ver == 6) {
2569            assert(!brw_inst_cmpt_control(devinfo, insn));
2570
2571            /* Jump Count is in units of compacted instructions on Gfx6. */
2572            int jump_count_compacted = brw_inst_gfx6_jump_count(devinfo, insn);
2573
2574            int target_old_ip = this_old_ip + (jump_count_compacted / 2);
2575            int target_compacted_count = compacted_counts[target_old_ip];
2576            jump_count_compacted -= (target_compacted_count - this_compacted_count);
2577            brw_inst_set_gfx6_jump_count(devinfo, insn, jump_count_compacted);
2578         } else {
2579            update_gfx4_jump_count(devinfo, insn, this_old_ip,
2580                                   compacted_counts);
2581         }
2582         break;
2583
2584      case BRW_OPCODE_ADD:
2585         /* Add instructions modifying the IP register use an immediate src1,
2586          * and Gens that use this cannot compact instructions with immediate
2587          * operands.
2588          */
2589         if (brw_inst_cmpt_control(devinfo, insn))
2590            break;
2591
2592         if (brw_inst_dst_reg_file(devinfo, insn) == BRW_ARCHITECTURE_REGISTER_FILE &&
2593             brw_inst_dst_da_reg_nr(devinfo, insn) == BRW_ARF_IP) {
2594            assert(brw_inst_src1_reg_file(devinfo, insn) == BRW_IMMEDIATE_VALUE);
2595
2596            int shift = 3;
2597            int jump_compacted = brw_inst_imm_d(devinfo, insn) >> shift;
2598
2599            int target_old_ip = this_old_ip + (jump_compacted / 2);
2600            int target_compacted_count = compacted_counts[target_old_ip];
2601            jump_compacted -= (target_compacted_count - this_compacted_count);
2602            brw_inst_set_imm_ud(devinfo, insn, jump_compacted << shift);
2603         }
2604         break;
2605
2606      default:
2607         break;
2608      }
2609   }
2610
2611   /* p->nr_insn is counting the number of uncompacted instructions still, so
2612    * divide.  We do want to be sure there's a valid instruction in any
2613    * alignment padding, so that the next compression pass (for the FS 8/16
2614    * compile passes) parses correctly.
2615    */
2616   if (p->next_insn_offset & sizeof(brw_compact_inst)) {
2617      brw_compact_inst *align = store + offset;
2618      memset(align, 0, sizeof(*align));
2619      brw_compact_inst_set_hw_opcode(
2620         devinfo, align, brw_opcode_encode(devinfo, BRW_OPCODE_NOP));
2621      brw_compact_inst_set_cmpt_control(devinfo, align, true);
2622      p->next_insn_offset += sizeof(brw_compact_inst);
2623   }
2624   p->nr_insn = p->next_insn_offset / sizeof(brw_inst);
2625
2626   for (int i = 0; i < p->num_relocs; i++) {
2627      if (p->relocs[i].offset < (uint32_t)start_offset)
2628         continue;
2629
2630      assert(p->relocs[i].offset % 16 == 0);
2631      unsigned idx = (p->relocs[i].offset - start_offset) / 16;
2632      p->relocs[i].offset -= compacted_counts[idx] * 8;
2633   }
2634
2635   /* Update the instruction offsets for each group. */
2636   if (disasm) {
2637      int offset = 0;
2638
2639      foreach_list_typed(struct inst_group, group, link, &disasm->group_list) {
2640         while (start_offset + old_ip[offset / sizeof(brw_compact_inst)] *
2641                sizeof(brw_inst) != group->offset) {
2642            assert(start_offset + old_ip[offset / sizeof(brw_compact_inst)] *
2643                   sizeof(brw_inst) < group->offset);
2644            offset = next_offset(devinfo, store, offset);
2645         }
2646
2647         group->offset = start_offset + offset;
2648
2649         offset = next_offset(devinfo, store, offset);
2650      }
2651   }
2652}
2653