1/*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24/** @file brw_eu_compact.c
25 *
26 * Instruction compaction is a feature of G45 and newer hardware that allows
27 * for a smaller instruction encoding.
28 *
29 * The instruction cache is on the order of 32KB, and many programs generate
30 * far more instructions than that.  The instruction cache is built to barely
31 * keep up with instruction dispatch ability in cache hit cases -- L1
32 * instruction cache misses that still hit in the next level could limit
33 * throughput by around 50%.
34 *
35 * The idea of instruction compaction is that most instructions use a tiny
36 * subset of the GPU functionality, so we can encode what would be a 16 byte
37 * instruction in 8 bytes using some lookup tables for various fields.
38 *
39 *
40 * Instruction compaction capabilities vary subtly by generation.
41 *
42 * G45's support for instruction compaction is very limited. Jump counts on
43 * this generation are in units of 16-byte uncompacted instructions. As such,
44 * all jump targets must be 16-byte aligned. Also, all instructions must be
45 * naturally aligned, i.e. uncompacted instructions must be 16-byte aligned.
46 * A G45-only instruction, NENOP, must be used to provide padding to align
47 * uncompacted instructions.
48 *
49 * Gen5 removes these restrictions and changes jump counts to be in units of
50 * 8-byte compacted instructions, allowing jump targets to be only 8-byte
51 * aligned. Uncompacted instructions can also be placed on 8-byte boundaries.
52 *
53 * Gen6 adds the ability to compact instructions with a limited range of
54 * immediate values. Compactable immediates have 12 unrestricted bits, and a
55 * 13th bit that's replicated through the high 20 bits, to create the 32-bit
56 * value of DW3 in the uncompacted instruction word.
57 *
58 * On Gen7 we can compact some control flow instructions with a small positive
59 * immediate in the low bits of DW3, like ENDIF with the JIP field. Other
60 * control flow instructions with UIP cannot be compacted, because of the
61 * replicated 13th bit. No control flow instructions can be compacted on Gen6
62 * since the jump count field is not in DW3.
63 *
64 *    break    JIP/UIP
65 *    cont     JIP/UIP
66 *    halt     JIP/UIP
67 *    if       JIP/UIP
68 *    else     JIP (plus UIP on BDW+)
69 *    endif    JIP
70 *    while    JIP (must be negative)
71 *
72 * Gen 8 adds support for compacting 3-src instructions.
73 */
74
75#include "brw_eu.h"
76#include "brw_shader.h"
77#include "brw_disasm_info.h"
78#include "dev/gen_debug.h"
79
80static const uint32_t g45_control_index_table[32] = {
81   0b00000000000000000,
82   0b01000000000000000,
83   0b00110000000000000,
84   0b00000000000000010,
85   0b00100000000000000,
86   0b00010000000000000,
87   0b01000000000100000,
88   0b01000000100000000,
89   0b01010000000100000,
90   0b00000000100000010,
91   0b11000000000000000,
92   0b00001000100000010,
93   0b01001000100000000,
94   0b00000000100000000,
95   0b11000000000100000,
96   0b00001000100000000,
97   0b10110000000000000,
98   0b11010000000100000,
99   0b00110000100000000,
100   0b00100000100000000,
101   0b01000000000001000,
102   0b01000000000000100,
103   0b00111100000000000,
104   0b00101011000000000,
105   0b00110000000010000,
106   0b00010000100000000,
107   0b01000000000100100,
108   0b01000000000101000,
109   0b00110000000000110,
110   0b00000000000001010,
111   0b01010000000101000,
112   0b01010000000100100,
113};
114
115static const uint32_t g45_datatype_table[32] = {
116   0b001000000000100001,
117   0b001011010110101101,
118   0b001000001000110001,
119   0b001111011110111101,
120   0b001011010110101100,
121   0b001000000110101101,
122   0b001000000000100000,
123   0b010100010110110001,
124   0b001100011000101101,
125   0b001000000000100010,
126   0b001000001000110110,
127   0b010000001000110001,
128   0b001000001000110010,
129   0b011000001000110010,
130   0b001111011110111100,
131   0b001000000100101000,
132   0b010100011000110001,
133   0b001010010100101001,
134   0b001000001000101001,
135   0b010000001000110110,
136   0b101000001000110001,
137   0b001011011000101101,
138   0b001000000100001001,
139   0b001011011000101100,
140   0b110100011000110001,
141   0b001000001110111101,
142   0b110000001000110001,
143   0b011000000100101010,
144   0b101000001000101001,
145   0b001011010110001100,
146   0b001000000110100001,
147   0b001010010100001000,
148};
149
150static const uint16_t g45_subreg_table[32] = {
151   0b000000000000000,
152   0b000000010000000,
153   0b000001000000000,
154   0b000100000000000,
155   0b000000000100000,
156   0b100000000000000,
157   0b000000000010000,
158   0b001100000000000,
159   0b001010000000000,
160   0b000000100000000,
161   0b001000000000000,
162   0b000000000001000,
163   0b000000001000000,
164   0b000000000000001,
165   0b000010000000000,
166   0b000000010100000,
167   0b000000000000111,
168   0b000001000100000,
169   0b011000000000000,
170   0b000000110000000,
171   0b000000000000010,
172   0b000000000000100,
173   0b000000001100000,
174   0b000100000000010,
175   0b001110011000110,
176   0b001110100001000,
177   0b000110011000110,
178   0b000001000011000,
179   0b000110010000100,
180   0b001100000000110,
181   0b000000010000110,
182   0b000001000110000,
183};
184
185static const uint16_t g45_src_index_table[32] = {
186   0b000000000000,
187   0b010001101000,
188   0b010110001000,
189   0b011010010000,
190   0b001101001000,
191   0b010110001010,
192   0b010101110000,
193   0b011001111000,
194   0b001000101000,
195   0b000000101000,
196   0b010001010000,
197   0b111101101100,
198   0b010110001100,
199   0b010001101100,
200   0b011010010100,
201   0b010001001100,
202   0b001100101000,
203   0b000000000010,
204   0b111101001100,
205   0b011001101000,
206   0b010101001000,
207   0b000000000100,
208   0b000000101100,
209   0b010001101010,
210   0b000000111000,
211   0b010101011000,
212   0b000100100000,
213   0b010110000000,
214   0b010000000100,
215   0b010000111000,
216   0b000101100000,
217   0b111101110100,
218};
219
220static const uint32_t gen6_control_index_table[32] = {
221   0b00000000000000000,
222   0b01000000000000000,
223   0b00110000000000000,
224   0b00000000100000000,
225   0b00010000000000000,
226   0b00001000100000000,
227   0b00000000100000010,
228   0b00000000000000010,
229   0b01000000100000000,
230   0b01010000000000000,
231   0b10110000000000000,
232   0b00100000000000000,
233   0b11010000000000000,
234   0b11000000000000000,
235   0b01001000100000000,
236   0b01000000000001000,
237   0b01000000000000100,
238   0b00000000000001000,
239   0b00000000000000100,
240   0b00111000100000000,
241   0b00001000100000010,
242   0b00110000100000000,
243   0b00110000000000001,
244   0b00100000000000001,
245   0b00110000000000010,
246   0b00110000000000101,
247   0b00110000000001001,
248   0b00110000000010000,
249   0b00110000000000011,
250   0b00110000000000100,
251   0b00110000100001000,
252   0b00100000000001001,
253};
254
255static const uint32_t gen6_datatype_table[32] = {
256   0b001001110000000000,
257   0b001000110000100000,
258   0b001001110000000001,
259   0b001000000001100000,
260   0b001010110100101001,
261   0b001000000110101101,
262   0b001100011000101100,
263   0b001011110110101101,
264   0b001000000111101100,
265   0b001000000001100001,
266   0b001000110010100101,
267   0b001000000001000001,
268   0b001000001000110001,
269   0b001000001000101001,
270   0b001000000000100000,
271   0b001000001000110010,
272   0b001010010100101001,
273   0b001011010010100101,
274   0b001000000110100101,
275   0b001100011000101001,
276   0b001011011000101100,
277   0b001011010110100101,
278   0b001011110110100101,
279   0b001111011110111101,
280   0b001111011110111100,
281   0b001111011110111101,
282   0b001111011110011101,
283   0b001111011110111110,
284   0b001000000000100001,
285   0b001000000000100010,
286   0b001001111111011101,
287   0b001000001110111110,
288};
289
290static const uint16_t gen6_subreg_table[32] = {
291   0b000000000000000,
292   0b000000000000100,
293   0b000000110000000,
294   0b111000000000000,
295   0b011110000001000,
296   0b000010000000000,
297   0b000000000010000,
298   0b000110000001100,
299   0b001000000000000,
300   0b000001000000000,
301   0b000001010010100,
302   0b000000001010110,
303   0b010000000000000,
304   0b110000000000000,
305   0b000100000000000,
306   0b000000010000000,
307   0b000000000001000,
308   0b100000000000000,
309   0b000001010000000,
310   0b001010000000000,
311   0b001100000000000,
312   0b000000001010100,
313   0b101101010010100,
314   0b010100000000000,
315   0b000000010001111,
316   0b011000000000000,
317   0b111110000000000,
318   0b101000000000000,
319   0b000000000001111,
320   0b000100010001111,
321   0b001000010001111,
322   0b000110000000000,
323};
324
325static const uint16_t gen6_src_index_table[32] = {
326   0b000000000000,
327   0b010110001000,
328   0b010001101000,
329   0b001000101000,
330   0b011010010000,
331   0b000100100000,
332   0b010001101100,
333   0b010101110000,
334   0b011001111000,
335   0b001100101000,
336   0b010110001100,
337   0b001000100000,
338   0b010110001010,
339   0b000000000010,
340   0b010101010000,
341   0b010101101000,
342   0b111101001100,
343   0b111100101100,
344   0b011001110000,
345   0b010110001001,
346   0b010101011000,
347   0b001101001000,
348   0b010000101100,
349   0b010000000000,
350   0b001101110000,
351   0b001100010000,
352   0b001100000000,
353   0b010001101010,
354   0b001101111000,
355   0b000001110000,
356   0b001100100000,
357   0b001101010000,
358};
359
360static const uint32_t gen7_control_index_table[32] = {
361   0b0000000000000000010,
362   0b0000100000000000000,
363   0b0000100000000000001,
364   0b0000100000000000010,
365   0b0000100000000000011,
366   0b0000100000000000100,
367   0b0000100000000000101,
368   0b0000100000000000111,
369   0b0000100000000001000,
370   0b0000100000000001001,
371   0b0000100000000001101,
372   0b0000110000000000000,
373   0b0000110000000000001,
374   0b0000110000000000010,
375   0b0000110000000000011,
376   0b0000110000000000100,
377   0b0000110000000000101,
378   0b0000110000000000111,
379   0b0000110000000001001,
380   0b0000110000000001101,
381   0b0000110000000010000,
382   0b0000110000100000000,
383   0b0001000000000000000,
384   0b0001000000000000010,
385   0b0001000000000000100,
386   0b0001000000100000000,
387   0b0010110000000000000,
388   0b0010110000000010000,
389   0b0011000000000000000,
390   0b0011000000100000000,
391   0b0101000000000000000,
392   0b0101000000100000000,
393};
394
395static const uint32_t gen7_datatype_table[32] = {
396   0b001000000000000001,
397   0b001000000000100000,
398   0b001000000000100001,
399   0b001000000001100001,
400   0b001000000010111101,
401   0b001000001011111101,
402   0b001000001110100001,
403   0b001000001110100101,
404   0b001000001110111101,
405   0b001000010000100001,
406   0b001000110000100000,
407   0b001000110000100001,
408   0b001001010010100101,
409   0b001001110010100100,
410   0b001001110010100101,
411   0b001111001110111101,
412   0b001111011110011101,
413   0b001111011110111100,
414   0b001111011110111101,
415   0b001111111110111100,
416   0b000000001000001100,
417   0b001000000000111101,
418   0b001000000010100101,
419   0b001000010000100000,
420   0b001001010010100100,
421   0b001001110010000100,
422   0b001010010100001001,
423   0b001101111110111101,
424   0b001111111110111101,
425   0b001011110110101100,
426   0b001010010100101000,
427   0b001010110100101000,
428};
429
430static const uint16_t gen7_subreg_table[32] = {
431   0b000000000000000,
432   0b000000000000001,
433   0b000000000001000,
434   0b000000000001111,
435   0b000000000010000,
436   0b000000010000000,
437   0b000000100000000,
438   0b000000110000000,
439   0b000001000000000,
440   0b000001000010000,
441   0b000010100000000,
442   0b001000000000000,
443   0b001000000000001,
444   0b001000010000001,
445   0b001000010000010,
446   0b001000010000011,
447   0b001000010000100,
448   0b001000010000111,
449   0b001000010001000,
450   0b001000010001110,
451   0b001000010001111,
452   0b001000110000000,
453   0b001000111101000,
454   0b010000000000000,
455   0b010000110000000,
456   0b011000000000000,
457   0b011110010000111,
458   0b100000000000000,
459   0b101000000000000,
460   0b110000000000000,
461   0b111000000000000,
462   0b111000000011100,
463};
464
465static const uint16_t gen7_src_index_table[32] = {
466   0b000000000000,
467   0b000000000010,
468   0b000000010000,
469   0b000000010010,
470   0b000000011000,
471   0b000000100000,
472   0b000000101000,
473   0b000001001000,
474   0b000001010000,
475   0b000001110000,
476   0b000001111000,
477   0b001100000000,
478   0b001100000010,
479   0b001100001000,
480   0b001100010000,
481   0b001100010010,
482   0b001100100000,
483   0b001100101000,
484   0b001100111000,
485   0b001101000000,
486   0b001101000010,
487   0b001101001000,
488   0b001101010000,
489   0b001101100000,
490   0b001101101000,
491   0b001101110000,
492   0b001101110001,
493   0b001101111000,
494   0b010001101000,
495   0b010001101001,
496   0b010001101010,
497   0b010110001000,
498};
499
500static const uint32_t gen8_control_index_table[32] = {
501   0b0000000000000000010,
502   0b0000100000000000000,
503   0b0000100000000000001,
504   0b0000100000000000010,
505   0b0000100000000000011,
506   0b0000100000000000100,
507   0b0000100000000000101,
508   0b0000100000000000111,
509   0b0000100000000001000,
510   0b0000100000000001001,
511   0b0000100000000001101,
512   0b0000110000000000000,
513   0b0000110000000000001,
514   0b0000110000000000010,
515   0b0000110000000000011,
516   0b0000110000000000100,
517   0b0000110000000000101,
518   0b0000110000000000111,
519   0b0000110000000001001,
520   0b0000110000000001101,
521   0b0000110000000010000,
522   0b0000110000100000000,
523   0b0001000000000000000,
524   0b0001000000000000010,
525   0b0001000000000000100,
526   0b0001000000100000000,
527   0b0010110000000000000,
528   0b0010110000000010000,
529   0b0011000000000000000,
530   0b0011000000100000000,
531   0b0101000000000000000,
532   0b0101000000100000000,
533};
534
535static const uint32_t gen8_datatype_table[32] = {
536   0b001000000000000000001,
537   0b001000000000001000000,
538   0b001000000000001000001,
539   0b001000000000011000001,
540   0b001000000000101011101,
541   0b001000000010111011101,
542   0b001000000011101000001,
543   0b001000000011101000101,
544   0b001000000011101011101,
545   0b001000001000001000001,
546   0b001000011000001000000,
547   0b001000011000001000001,
548   0b001000101000101000101,
549   0b001000111000101000100,
550   0b001000111000101000101,
551   0b001011100011101011101,
552   0b001011101011100011101,
553   0b001011101011101011100,
554   0b001011101011101011101,
555   0b001011111011101011100,
556   0b000000000010000001100,
557   0b001000000000001011101,
558   0b001000000000101000101,
559   0b001000001000001000000,
560   0b001000101000101000100,
561   0b001000111000100000100,
562   0b001001001001000001001,
563   0b001010111011101011101,
564   0b001011111011101011101,
565   0b001001111001101001100,
566   0b001001001001001001000,
567   0b001001011001001001000,
568};
569
570static const uint16_t gen8_subreg_table[32] = {
571   0b000000000000000,
572   0b000000000000001,
573   0b000000000001000,
574   0b000000000001111,
575   0b000000000010000,
576   0b000000010000000,
577   0b000000100000000,
578   0b000000110000000,
579   0b000001000000000,
580   0b000001000010000,
581   0b000001010000000,
582   0b001000000000000,
583   0b001000000000001,
584   0b001000010000001,
585   0b001000010000010,
586   0b001000010000011,
587   0b001000010000100,
588   0b001000010000111,
589   0b001000010001000,
590   0b001000010001110,
591   0b001000010001111,
592   0b001000110000000,
593   0b001000111101000,
594   0b010000000000000,
595   0b010000110000000,
596   0b011000000000000,
597   0b011110010000111,
598   0b100000000000000,
599   0b101000000000000,
600   0b110000000000000,
601   0b111000000000000,
602   0b111000000011100,
603};
604
605static const uint16_t gen8_src_index_table[32] = {
606   0b000000000000,
607   0b000000000010,
608   0b000000010000,
609   0b000000010010,
610   0b000000011000,
611   0b000000100000,
612   0b000000101000,
613   0b000001001000,
614   0b000001010000,
615   0b000001110000,
616   0b000001111000,
617   0b001100000000,
618   0b001100000010,
619   0b001100001000,
620   0b001100010000,
621   0b001100010010,
622   0b001100100000,
623   0b001100101000,
624   0b001100111000,
625   0b001101000000,
626   0b001101000010,
627   0b001101001000,
628   0b001101010000,
629   0b001101100000,
630   0b001101101000,
631   0b001101110000,
632   0b001101110001,
633   0b001101111000,
634   0b010001101000,
635   0b010001101001,
636   0b010001101010,
637   0b010110001000,
638};
639
640static const uint32_t gen11_datatype_table[32] = {
641   0b001000000000000000001,
642   0b001000000000001000000,
643   0b001000000000001000001,
644   0b001000000000011000001,
645   0b001000000000101100101,
646   0b001000000101111100101,
647   0b001000000100101000001,
648   0b001000000100101000101,
649   0b001000000100101100101,
650   0b001000001000001000001,
651   0b001000011000001000000,
652   0b001000011000001000001,
653   0b001000101000101000101,
654   0b001000111000101000100,
655   0b001000111000101000101,
656   0b001100100100101100101,
657   0b001100101100100100101,
658   0b001100101100101100100,
659   0b001100101100101100101,
660   0b001100111100101100100,
661   0b000000000010000001100,
662   0b001000000000001100101,
663   0b001000000000101000101,
664   0b001000001000001000000,
665   0b001000101000101000100,
666   0b001000111000100000100,
667   0b001001001001000001001,
668   0b001101111100101100101,
669   0b001100111100101100101,
670   0b001001111001101001100,
671   0b001001001001001001000,
672   0b001001011001001001000,
673};
674
675/* This is actually the control index table for Cherryview (26 bits), but the
676 * only difference from Broadwell (24 bits) is that it has two extra 0-bits at
677 * the start.
678 *
679 * The low 24 bits have the same mappings on both hardware.
680 */
681static const uint32_t gen8_3src_control_index_table[4] = {
682   0b00100000000110000000000001,
683   0b00000000000110000000000001,
684   0b00000000001000000000000001,
685   0b00000000001000000000100001,
686};
687
688/* This is actually the control index table for Cherryview (49 bits), but the
689 * only difference from Broadwell (46 bits) is that it has three extra 0-bits
690 * at the start.
691 *
692 * The low 44 bits have the same mappings on both hardware, and since the high
693 * three bits on Broadwell are zero, we can reuse Cherryview's table.
694 */
695static const uint64_t gen8_3src_source_index_table[4] = {
696   0b0000001110010011100100111001000001111000000000000,
697   0b0000001110010011100100111001000001111000000000010,
698   0b0000001110010011100100111001000001111000000001000,
699   0b0000001110010011100100111001000001111000000100000,
700};
701
702static const uint32_t *control_index_table;
703static const uint32_t *datatype_table;
704static const uint16_t *subreg_table;
705static const uint16_t *src_index_table;
706
707static bool
708set_control_index(const struct gen_device_info *devinfo,
709                  brw_compact_inst *dst, const brw_inst *src)
710{
711   uint32_t uncompacted = devinfo->gen >= 8  /* 17b/G45; 19b/IVB+ */
712      ? (brw_inst_bits(src, 33, 31) << 16) | /*  3b */
713        (brw_inst_bits(src, 23, 12) <<  4) | /* 12b */
714        (brw_inst_bits(src, 10,  9) <<  2) | /*  2b */
715        (brw_inst_bits(src, 34, 34) <<  1) | /*  1b */
716        (brw_inst_bits(src,  8,  8))         /*  1b */
717      : (brw_inst_bits(src, 31, 31) << 16) | /*  1b */
718        (brw_inst_bits(src, 23,  8));        /* 16b */
719
720   /* On gen7, the flag register and subregister numbers are integrated into
721    * the control index.
722    */
723   if (devinfo->gen == 7)
724      uncompacted |= brw_inst_bits(src, 90, 89) << 17; /* 2b */
725
726   for (int i = 0; i < 32; i++) {
727      if (control_index_table[i] == uncompacted) {
728         brw_compact_inst_set_control_index(devinfo, dst, i);
729	 return true;
730      }
731   }
732
733   return false;
734}
735
736static bool
737set_datatype_index(const struct gen_device_info *devinfo, brw_compact_inst *dst,
738                   const brw_inst *src)
739{
740   uint32_t uncompacted = devinfo->gen >= 8  /* 18b/G45+; 21b/BDW+ */
741      ? (brw_inst_bits(src, 63, 61) << 18) | /*  3b */
742        (brw_inst_bits(src, 94, 89) << 12) | /*  6b */
743        (brw_inst_bits(src, 46, 35))         /* 12b */
744      : (brw_inst_bits(src, 63, 61) << 15) | /*  3b */
745        (brw_inst_bits(src, 46, 32));        /* 15b */
746
747   for (int i = 0; i < 32; i++) {
748      if (datatype_table[i] == uncompacted) {
749         brw_compact_inst_set_datatype_index(devinfo, dst, i);
750	 return true;
751      }
752   }
753
754   return false;
755}
756
757static bool
758set_subreg_index(const struct gen_device_info *devinfo, brw_compact_inst *dst,
759                 const brw_inst *src, bool is_immediate)
760{
761   uint16_t uncompacted =                 /* 15b */
762      (brw_inst_bits(src, 52, 48) << 0) | /*  5b */
763      (brw_inst_bits(src, 68, 64) << 5);  /*  5b */
764
765   if (!is_immediate)
766      uncompacted |= brw_inst_bits(src, 100, 96) << 10; /* 5b */
767
768   for (int i = 0; i < 32; i++) {
769      if (subreg_table[i] == uncompacted) {
770         brw_compact_inst_set_subreg_index(devinfo, dst, i);
771	 return true;
772      }
773   }
774
775   return false;
776}
777
778static bool
779get_src_index(uint16_t uncompacted,
780              uint16_t *compacted)
781{
782   for (int i = 0; i < 32; i++) {
783      if (src_index_table[i] == uncompacted) {
784	 *compacted = i;
785	 return true;
786      }
787   }
788
789   return false;
790}
791
792static bool
793set_src0_index(const struct gen_device_info *devinfo,
794               brw_compact_inst *dst, const brw_inst *src)
795{
796   uint16_t compacted;
797   uint16_t uncompacted = brw_inst_bits(src, 88, 77); /* 12b */
798
799   if (!get_src_index(uncompacted, &compacted))
800      return false;
801
802   brw_compact_inst_set_src0_index(devinfo, dst, compacted);
803
804   return true;
805}
806
807static bool
808set_src1_index(const struct gen_device_info *devinfo, brw_compact_inst *dst,
809               const brw_inst *src, bool is_immediate)
810{
811   uint16_t compacted;
812
813   if (is_immediate) {
814      compacted = (brw_inst_imm_ud(devinfo, src) >> 8) & 0x1f;
815   } else {
816      uint16_t uncompacted = brw_inst_bits(src, 120, 109); /* 12b */
817
818      if (!get_src_index(uncompacted, &compacted))
819         return false;
820   }
821
822   brw_compact_inst_set_src1_index(devinfo, dst, compacted);
823
824   return true;
825}
826
827static bool
828set_3src_control_index(const struct gen_device_info *devinfo,
829                       brw_compact_inst *dst, const brw_inst *src)
830{
831   assert(devinfo->gen >= 8);
832
833   uint32_t uncompacted =                  /* 24b/BDW; 26b/CHV */
834      (brw_inst_bits(src, 34, 32) << 21) | /*  3b */
835      (brw_inst_bits(src, 28,  8));        /* 21b */
836
837   if (devinfo->gen >= 9 || devinfo->is_cherryview)
838      uncompacted |= brw_inst_bits(src, 36, 35) << 24; /* 2b */
839
840   for (unsigned i = 0; i < ARRAY_SIZE(gen8_3src_control_index_table); i++) {
841      if (gen8_3src_control_index_table[i] == uncompacted) {
842         brw_compact_inst_set_3src_control_index(devinfo, dst, i);
843	 return true;
844      }
845   }
846
847   return false;
848}
849
850static bool
851set_3src_source_index(const struct gen_device_info *devinfo,
852                      brw_compact_inst *dst, const brw_inst *src)
853{
854   assert(devinfo->gen >= 8);
855
856   uint64_t uncompacted =                    /* 46b/BDW; 49b/CHV */
857      (brw_inst_bits(src,  83,  83) << 43) | /*  1b */
858      (brw_inst_bits(src, 114, 107) << 35) | /*  8b */
859      (brw_inst_bits(src,  93,  86) << 27) | /*  8b */
860      (brw_inst_bits(src,  72,  65) << 19) | /*  8b */
861      (brw_inst_bits(src,  55,  37));        /* 19b */
862
863   if (devinfo->gen >= 9 || devinfo->is_cherryview) {
864      uncompacted |=
865         (brw_inst_bits(src, 126, 125) << 47) | /* 2b */
866         (brw_inst_bits(src, 105, 104) << 45) | /* 2b */
867         (brw_inst_bits(src,  84,  84) << 44);  /* 1b */
868   } else {
869      uncompacted |=
870         (brw_inst_bits(src, 125, 125) << 45) | /* 1b */
871         (brw_inst_bits(src, 104, 104) << 44);  /* 1b */
872   }
873
874   for (unsigned i = 0; i < ARRAY_SIZE(gen8_3src_source_index_table); i++) {
875      if (gen8_3src_source_index_table[i] == uncompacted) {
876         brw_compact_inst_set_3src_source_index(devinfo, dst, i);
877	 return true;
878      }
879   }
880
881   return false;
882}
883
884static bool
885has_unmapped_bits(const struct gen_device_info *devinfo, const brw_inst *src)
886{
887   /* EOT can only be mapped on a send if the src1 is an immediate */
888   if ((brw_inst_opcode(devinfo, src) == BRW_OPCODE_SENDC ||
889        brw_inst_opcode(devinfo, src) == BRW_OPCODE_SEND) &&
890       brw_inst_eot(devinfo, src))
891      return true;
892
893   /* Check for instruction bits that don't map to any of the fields of the
894    * compacted instruction.  The instruction cannot be compacted if any of
895    * them are set.  They overlap with:
896    *  - NibCtrl (bit 47 on Gen7, bit 11 on Gen8)
897    *  - Dst.AddrImm[9] (bit 47 on Gen8)
898    *  - Src0.AddrImm[9] (bit 95 on Gen8)
899    *  - Imm64[27:31] (bits 91-95 on Gen7, bit 95 on Gen8)
900    *  - UIP[31] (bit 95 on Gen8)
901    */
902   if (devinfo->gen >= 8) {
903      assert(!brw_inst_bits(src, 7,  7));
904      return brw_inst_bits(src, 95, 95) ||
905             brw_inst_bits(src, 47, 47) ||
906             brw_inst_bits(src, 11, 11);
907   } else {
908      assert(!brw_inst_bits(src, 7,  7) &&
909             !(devinfo->gen < 7 && brw_inst_bits(src, 90, 90)));
910      return brw_inst_bits(src, 95, 91) ||
911             brw_inst_bits(src, 47, 47);
912   }
913}
914
915static bool
916has_3src_unmapped_bits(const struct gen_device_info *devinfo,
917                       const brw_inst *src)
918{
919   /* Check for three-source instruction bits that don't map to any of the
920    * fields of the compacted instruction.  All of them seem to be reserved
921    * bits currently.
922    */
923   if (devinfo->gen >= 9 || devinfo->is_cherryview) {
924      assert(!brw_inst_bits(src, 127, 127) &&
925             !brw_inst_bits(src, 7,  7));
926   } else {
927      assert(devinfo->gen >= 8);
928      assert(!brw_inst_bits(src, 127, 126) &&
929             !brw_inst_bits(src, 105, 105) &&
930             !brw_inst_bits(src, 84, 84) &&
931             !brw_inst_bits(src, 7,  7));
932
933      /* Src1Type and Src2Type, used for mixed-precision floating point */
934      if (brw_inst_bits(src, 36, 35))
935         return true;
936   }
937
938   return false;
939}
940
941static bool
942brw_try_compact_3src_instruction(const struct gen_device_info *devinfo,
943                                 brw_compact_inst *dst, const brw_inst *src)
944{
945   assert(devinfo->gen >= 8);
946
947   if (has_3src_unmapped_bits(devinfo, src))
948      return false;
949
950#define compact(field) \
951   brw_compact_inst_set_3src_##field(devinfo, dst, brw_inst_3src_##field(devinfo, src))
952#define compact_a16(field) \
953   brw_compact_inst_set_3src_##field(devinfo, dst, brw_inst_3src_a16_##field(devinfo, src))
954
955   compact(opcode);
956
957   if (!set_3src_control_index(devinfo, dst, src))
958      return false;
959
960   if (!set_3src_source_index(devinfo, dst, src))
961      return false;
962
963   compact(dst_reg_nr);
964   compact_a16(src0_rep_ctrl);
965   brw_compact_inst_set_3src_cmpt_control(devinfo, dst, true);
966   compact(debug_control);
967   compact(saturate);
968   compact_a16(src1_rep_ctrl);
969   compact_a16(src2_rep_ctrl);
970   compact(src0_reg_nr);
971   compact(src1_reg_nr);
972   compact(src2_reg_nr);
973   compact_a16(src0_subreg_nr);
974   compact_a16(src1_subreg_nr);
975   compact_a16(src2_subreg_nr);
976
977#undef compact
978#undef compact_a16
979
980   return true;
981}
982
983/* Compacted instructions have 12-bits for immediate sources, and a 13th bit
984 * that's replicated through the high 20 bits.
985 *
986 * Effectively this means we get 12-bit integers, 0.0f, and some limited uses
987 * of packed vectors as compactable immediates.
988 */
989static bool
990is_compactable_immediate(unsigned imm)
991{
992   /* We get the low 12 bits as-is. */
993   imm &= ~0xfff;
994
995   /* We get one bit replicated through the top 20 bits. */
996   return imm == 0 || imm == 0xfffff000;
997}
998
999/**
1000 * Applies some small changes to instruction types to increase chances of
1001 * compaction.
1002 */
1003static brw_inst
1004precompact(const struct gen_device_info *devinfo, brw_inst inst)
1005{
1006   if (brw_inst_src0_reg_file(devinfo, &inst) != BRW_IMMEDIATE_VALUE)
1007      return inst;
1008
1009   /* The Bspec's section titled "Non-present Operands" claims that if src0
1010    * is an immediate that src1's type must be the same as that of src0.
1011    *
1012    * The SNB+ DataTypeIndex instruction compaction tables contain mappings
1013    * that do not follow this rule. E.g., from the IVB/HSW table:
1014    *
1015    *  DataTypeIndex   18-Bit Mapping       Mapped Meaning
1016    *        3         001000001011111101   r:f | i:vf | a:ud | <1> | dir |
1017    *
1018    * And from the SNB table:
1019    *
1020    *  DataTypeIndex   18-Bit Mapping       Mapped Meaning
1021    *        8         001000000111101100   a:w | i:w | a:ud | <1> | dir |
1022    *
1023    * Neither of these cause warnings from the simulator when used,
1024    * compacted or otherwise. In fact, all compaction mappings that have an
1025    * immediate in src0 use a:ud for src1.
1026    *
1027    * The GM45 instruction compaction tables do not contain mapped meanings
1028    * so it's not clear whether it has the restriction. We'll assume it was
1029    * lifted on SNB. (FINISHME: decode the GM45 tables and check.)
1030    *
1031    * Don't do any of this for 64-bit immediates, since the src1 fields
1032    * overlap with the immediate and setting them would overwrite the
1033    * immediate we set.
1034    */
1035   if (devinfo->gen >= 6 &&
1036       !(devinfo->is_haswell &&
1037         brw_inst_opcode(devinfo, &inst) == BRW_OPCODE_DIM) &&
1038       !(devinfo->gen >= 8 &&
1039         (brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_DF ||
1040          brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_UQ ||
1041          brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_Q))) {
1042      enum brw_reg_file file = brw_inst_src1_reg_file(devinfo, &inst);
1043      brw_inst_set_src1_file_type(devinfo, &inst, file, BRW_REGISTER_TYPE_UD);
1044   }
1045
1046   /* Compacted instructions only have 12-bits (plus 1 for the other 20)
1047    * for immediate values. Presumably the hardware engineers realized
1048    * that the only useful floating-point value that could be represented
1049    * in this format is 0.0, which can also be represented as a VF-typed
1050    * immediate, so they gave us the previously mentioned mapping on IVB+.
1051    *
1052    * Strangely, we do have a mapping for imm:f in src1, so we don't need
1053    * to do this there.
1054    *
1055    * If we see a 0.0:F, change the type to VF so that it can be compacted.
1056    */
1057   if (brw_inst_imm_ud(devinfo, &inst) == 0x0 &&
1058       brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_F &&
1059       brw_inst_dst_type(devinfo, &inst) == BRW_REGISTER_TYPE_F &&
1060       brw_inst_dst_hstride(devinfo, &inst) == BRW_HORIZONTAL_STRIDE_1) {
1061      enum brw_reg_file file = brw_inst_src0_reg_file(devinfo, &inst);
1062      brw_inst_set_src0_file_type(devinfo, &inst, file, BRW_REGISTER_TYPE_VF);
1063   }
1064
1065   /* There are no mappings for dst:d | i:d, so if the immediate is suitable
1066    * set the types to :UD so the instruction can be compacted.
1067    */
1068   if (is_compactable_immediate(brw_inst_imm_ud(devinfo, &inst)) &&
1069       brw_inst_cond_modifier(devinfo, &inst) == BRW_CONDITIONAL_NONE &&
1070       brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_D &&
1071       brw_inst_dst_type(devinfo, &inst) == BRW_REGISTER_TYPE_D) {
1072      enum brw_reg_file src_file = brw_inst_src0_reg_file(devinfo, &inst);
1073      enum brw_reg_file dst_file = brw_inst_dst_reg_file(devinfo, &inst);
1074
1075      brw_inst_set_src0_file_type(devinfo, &inst, src_file, BRW_REGISTER_TYPE_UD);
1076      brw_inst_set_dst_file_type(devinfo, &inst, dst_file, BRW_REGISTER_TYPE_UD);
1077   }
1078
1079   return inst;
1080}
1081
1082/**
1083 * Tries to compact instruction src into dst.
1084 *
1085 * It doesn't modify dst unless src is compactable, which is relied on by
1086 * brw_compact_instructions().
1087 */
1088bool
1089brw_try_compact_instruction(const struct gen_device_info *devinfo,
1090                            brw_compact_inst *dst, const brw_inst *src)
1091{
1092   brw_compact_inst temp;
1093
1094   assert(brw_inst_cmpt_control(devinfo, src) == 0);
1095
1096   if (is_3src(devinfo, brw_inst_opcode(devinfo, src))) {
1097      if (devinfo->gen >= 8) {
1098         memset(&temp, 0, sizeof(temp));
1099         if (brw_try_compact_3src_instruction(devinfo, &temp, src)) {
1100            *dst = temp;
1101            return true;
1102         } else {
1103            return false;
1104         }
1105      } else {
1106         return false;
1107      }
1108   }
1109
1110   bool is_immediate =
1111      brw_inst_src0_reg_file(devinfo, src) == BRW_IMMEDIATE_VALUE ||
1112      brw_inst_src1_reg_file(devinfo, src) == BRW_IMMEDIATE_VALUE;
1113   if (is_immediate &&
1114       (devinfo->gen < 6 ||
1115        !is_compactable_immediate(brw_inst_imm_ud(devinfo, src)))) {
1116      return false;
1117   }
1118
1119   if (has_unmapped_bits(devinfo, src))
1120      return false;
1121
1122   memset(&temp, 0, sizeof(temp));
1123
1124#define compact(field) \
1125   brw_compact_inst_set_##field(devinfo, &temp, brw_inst_##field(devinfo, src))
1126
1127   compact(opcode);
1128   compact(debug_control);
1129
1130   if (!set_control_index(devinfo, &temp, src))
1131      return false;
1132   if (!set_datatype_index(devinfo, &temp, src))
1133      return false;
1134   if (!set_subreg_index(devinfo, &temp, src, is_immediate))
1135      return false;
1136
1137   if (devinfo->gen >= 6) {
1138      compact(acc_wr_control);
1139   } else {
1140      compact(mask_control_ex);
1141   }
1142
1143   compact(cond_modifier);
1144
1145   if (devinfo->gen <= 6)
1146      compact(flag_subreg_nr);
1147
1148   brw_compact_inst_set_cmpt_control(devinfo, &temp, true);
1149
1150   if (!set_src0_index(devinfo, &temp, src))
1151      return false;
1152   if (!set_src1_index(devinfo, &temp, src, is_immediate))
1153      return false;
1154
1155   brw_compact_inst_set_dst_reg_nr(devinfo, &temp,
1156                                   brw_inst_dst_da_reg_nr(devinfo, src));
1157   brw_compact_inst_set_src0_reg_nr(devinfo, &temp,
1158                                    brw_inst_src0_da_reg_nr(devinfo, src));
1159
1160   if (is_immediate) {
1161      brw_compact_inst_set_src1_reg_nr(devinfo, &temp,
1162                                       brw_inst_imm_ud(devinfo, src) & 0xff);
1163   } else {
1164      brw_compact_inst_set_src1_reg_nr(devinfo, &temp,
1165                                       brw_inst_src1_da_reg_nr(devinfo, src));
1166   }
1167
1168#undef compact
1169
1170   *dst = temp;
1171
1172   return true;
1173}
1174
1175static void
1176set_uncompacted_control(const struct gen_device_info *devinfo, brw_inst *dst,
1177                        brw_compact_inst *src)
1178{
1179   uint32_t uncompacted =
1180      control_index_table[brw_compact_inst_control_index(devinfo, src)];
1181
1182   if (devinfo->gen >= 8) {
1183      brw_inst_set_bits(dst, 33, 31, (uncompacted >> 16));
1184      brw_inst_set_bits(dst, 23, 12, (uncompacted >>  4) & 0xfff);
1185      brw_inst_set_bits(dst, 10,  9, (uncompacted >>  2) & 0x3);
1186      brw_inst_set_bits(dst, 34, 34, (uncompacted >>  1) & 0x1);
1187      brw_inst_set_bits(dst,  8,  8, (uncompacted >>  0) & 0x1);
1188   } else {
1189      brw_inst_set_bits(dst, 31, 31, (uncompacted >> 16) & 0x1);
1190      brw_inst_set_bits(dst, 23,  8, (uncompacted & 0xffff));
1191
1192      if (devinfo->gen == 7)
1193         brw_inst_set_bits(dst, 90, 89, uncompacted >> 17);
1194   }
1195}
1196
1197static void
1198set_uncompacted_datatype(const struct gen_device_info *devinfo, brw_inst *dst,
1199                         brw_compact_inst *src)
1200{
1201   uint32_t uncompacted =
1202      datatype_table[brw_compact_inst_datatype_index(devinfo, src)];
1203
1204   if (devinfo->gen >= 8) {
1205      brw_inst_set_bits(dst, 63, 61, (uncompacted >> 18));
1206      brw_inst_set_bits(dst, 94, 89, (uncompacted >> 12) & 0x3f);
1207      brw_inst_set_bits(dst, 46, 35, (uncompacted >>  0) & 0xfff);
1208   } else {
1209      brw_inst_set_bits(dst, 63, 61, (uncompacted >> 15));
1210      brw_inst_set_bits(dst, 46, 32, (uncompacted & 0x7fff));
1211   }
1212}
1213
1214static void
1215set_uncompacted_subreg(const struct gen_device_info *devinfo, brw_inst *dst,
1216                       brw_compact_inst *src)
1217{
1218   uint16_t uncompacted =
1219      subreg_table[brw_compact_inst_subreg_index(devinfo, src)];
1220
1221   brw_inst_set_bits(dst, 100, 96, (uncompacted >> 10));
1222   brw_inst_set_bits(dst,  68, 64, (uncompacted >>  5) & 0x1f);
1223   brw_inst_set_bits(dst,  52, 48, (uncompacted >>  0) & 0x1f);
1224}
1225
1226static void
1227set_uncompacted_src0(const struct gen_device_info *devinfo, brw_inst *dst,
1228                     brw_compact_inst *src)
1229{
1230   uint32_t compacted = brw_compact_inst_src0_index(devinfo, src);
1231   uint16_t uncompacted = src_index_table[compacted];
1232
1233   brw_inst_set_bits(dst, 88, 77, uncompacted);
1234}
1235
1236static void
1237set_uncompacted_src1(const struct gen_device_info *devinfo, brw_inst *dst,
1238                     brw_compact_inst *src, bool is_immediate)
1239{
1240   if (is_immediate) {
1241      signed high5 = brw_compact_inst_src1_index(devinfo, src);
1242      /* Replicate top bit of src1_index into high 20 bits of the immediate. */
1243      brw_inst_set_imm_ud(devinfo, dst, (high5 << 27) >> 19);
1244   } else {
1245      uint16_t uncompacted =
1246         src_index_table[brw_compact_inst_src1_index(devinfo, src)];
1247
1248      brw_inst_set_bits(dst, 120, 109, uncompacted);
1249   }
1250}
1251
1252static void
1253set_uncompacted_3src_control_index(const struct gen_device_info *devinfo,
1254                                   brw_inst *dst, brw_compact_inst *src)
1255{
1256   assert(devinfo->gen >= 8);
1257
1258   uint32_t compacted = brw_compact_inst_3src_control_index(devinfo, src);
1259   uint32_t uncompacted = gen8_3src_control_index_table[compacted];
1260
1261   brw_inst_set_bits(dst, 34, 32, (uncompacted >> 21) & 0x7);
1262   brw_inst_set_bits(dst, 28,  8, (uncompacted >>  0) & 0x1fffff);
1263
1264   if (devinfo->gen >= 9 || devinfo->is_cherryview)
1265      brw_inst_set_bits(dst, 36, 35, (uncompacted >> 24) & 0x3);
1266}
1267
1268static void
1269set_uncompacted_3src_source_index(const struct gen_device_info *devinfo,
1270                                  brw_inst *dst, brw_compact_inst *src)
1271{
1272   assert(devinfo->gen >= 8);
1273
1274   uint32_t compacted = brw_compact_inst_3src_source_index(devinfo, src);
1275   uint64_t uncompacted = gen8_3src_source_index_table[compacted];
1276
1277   brw_inst_set_bits(dst,  83,  83, (uncompacted >> 43) & 0x1);
1278   brw_inst_set_bits(dst, 114, 107, (uncompacted >> 35) & 0xff);
1279   brw_inst_set_bits(dst,  93,  86, (uncompacted >> 27) & 0xff);
1280   brw_inst_set_bits(dst,  72,  65, (uncompacted >> 19) & 0xff);
1281   brw_inst_set_bits(dst,  55,  37, (uncompacted >>  0) & 0x7ffff);
1282
1283   if (devinfo->gen >= 9 || devinfo->is_cherryview) {
1284      brw_inst_set_bits(dst, 126, 125, (uncompacted >> 47) & 0x3);
1285      brw_inst_set_bits(dst, 105, 104, (uncompacted >> 45) & 0x3);
1286      brw_inst_set_bits(dst,  84,  84, (uncompacted >> 44) & 0x1);
1287   } else {
1288      brw_inst_set_bits(dst, 125, 125, (uncompacted >> 45) & 0x1);
1289      brw_inst_set_bits(dst, 104, 104, (uncompacted >> 44) & 0x1);
1290   }
1291}
1292
1293static void
1294brw_uncompact_3src_instruction(const struct gen_device_info *devinfo,
1295                               brw_inst *dst, brw_compact_inst *src)
1296{
1297   assert(devinfo->gen >= 8);
1298
1299#define uncompact(field) \
1300   brw_inst_set_3src_##field(devinfo, dst, brw_compact_inst_3src_##field(devinfo, src))
1301#define uncompact_a16(field) \
1302   brw_inst_set_3src_a16_##field(devinfo, dst, brw_compact_inst_3src_##field(devinfo, src))
1303
1304   uncompact(opcode);
1305
1306   set_uncompacted_3src_control_index(devinfo, dst, src);
1307   set_uncompacted_3src_source_index(devinfo, dst, src);
1308
1309   uncompact(dst_reg_nr);
1310   uncompact_a16(src0_rep_ctrl);
1311   brw_inst_set_3src_cmpt_control(devinfo, dst, false);
1312   uncompact(debug_control);
1313   uncompact(saturate);
1314   uncompact_a16(src1_rep_ctrl);
1315   uncompact_a16(src2_rep_ctrl);
1316   uncompact(src0_reg_nr);
1317   uncompact(src1_reg_nr);
1318   uncompact(src2_reg_nr);
1319   uncompact_a16(src0_subreg_nr);
1320   uncompact_a16(src1_subreg_nr);
1321   uncompact_a16(src2_subreg_nr);
1322
1323#undef uncompact
1324#undef uncompact_a16
1325}
1326
1327void
1328brw_uncompact_instruction(const struct gen_device_info *devinfo, brw_inst *dst,
1329                          brw_compact_inst *src)
1330{
1331   memset(dst, 0, sizeof(*dst));
1332
1333   if (devinfo->gen >= 8 &&
1334       is_3src(devinfo, brw_compact_inst_3src_opcode(devinfo, src))) {
1335      brw_uncompact_3src_instruction(devinfo, dst, src);
1336      return;
1337   }
1338
1339#define uncompact(field) \
1340   brw_inst_set_##field(devinfo, dst, brw_compact_inst_##field(devinfo, src))
1341
1342   uncompact(opcode);
1343   uncompact(debug_control);
1344
1345   set_uncompacted_control(devinfo, dst, src);
1346   set_uncompacted_datatype(devinfo, dst, src);
1347
1348   /* src0/1 register file fields are in the datatype table. */
1349   bool is_immediate = brw_inst_src0_reg_file(devinfo, dst) == BRW_IMMEDIATE_VALUE ||
1350                       brw_inst_src1_reg_file(devinfo, dst) == BRW_IMMEDIATE_VALUE;
1351
1352   set_uncompacted_subreg(devinfo, dst, src);
1353
1354   if (devinfo->gen >= 6) {
1355      uncompact(acc_wr_control);
1356   } else {
1357      uncompact(mask_control_ex);
1358   }
1359
1360   uncompact(cond_modifier);
1361
1362   if (devinfo->gen <= 6)
1363      uncompact(flag_subreg_nr);
1364
1365   set_uncompacted_src0(devinfo, dst, src);
1366   set_uncompacted_src1(devinfo, dst, src, is_immediate);
1367
1368   brw_inst_set_dst_da_reg_nr(devinfo, dst,
1369                              brw_compact_inst_dst_reg_nr(devinfo, src));
1370   brw_inst_set_src0_da_reg_nr(devinfo, dst,
1371                               brw_compact_inst_src0_reg_nr(devinfo, src));
1372
1373   if (is_immediate) {
1374      brw_inst_set_imm_ud(devinfo, dst,
1375                          brw_inst_imm_ud(devinfo, dst) |
1376                          brw_compact_inst_src1_reg_nr(devinfo, src));
1377   } else {
1378      brw_inst_set_src1_da_reg_nr(devinfo, dst,
1379                                  brw_compact_inst_src1_reg_nr(devinfo, src));
1380   }
1381
1382#undef uncompact
1383}
1384
1385void brw_debug_compact_uncompact(const struct gen_device_info *devinfo,
1386                                 brw_inst *orig,
1387                                 brw_inst *uncompacted)
1388{
1389   fprintf(stderr, "Instruction compact/uncompact changed (gen%d):\n",
1390           devinfo->gen);
1391
1392   fprintf(stderr, "  before: ");
1393   brw_disassemble_inst(stderr, devinfo, orig, true);
1394
1395   fprintf(stderr, "  after:  ");
1396   brw_disassemble_inst(stderr, devinfo, uncompacted, false);
1397
1398   uint32_t *before_bits = (uint32_t *)orig;
1399   uint32_t *after_bits = (uint32_t *)uncompacted;
1400   fprintf(stderr, "  changed bits:\n");
1401   for (int i = 0; i < 128; i++) {
1402      uint32_t before = before_bits[i / 32] & (1 << (i & 31));
1403      uint32_t after = after_bits[i / 32] & (1 << (i & 31));
1404
1405      if (before != after) {
1406         fprintf(stderr, "  bit %d, %s to %s\n", i,
1407                 before ? "set" : "unset",
1408                 after ? "set" : "unset");
1409      }
1410   }
1411}
1412
1413static int
1414compacted_between(int old_ip, int old_target_ip, int *compacted_counts)
1415{
1416   int this_compacted_count = compacted_counts[old_ip];
1417   int target_compacted_count = compacted_counts[old_target_ip];
1418   return target_compacted_count - this_compacted_count;
1419}
1420
1421static void
1422update_uip_jip(const struct gen_device_info *devinfo, brw_inst *insn,
1423               int this_old_ip, int *compacted_counts)
1424{
1425   /* JIP and UIP are in units of:
1426    *    - bytes on Gen8+; and
1427    *    - compacted instructions on Gen6+.
1428    */
1429   int shift = devinfo->gen >= 8 ? 3 : 0;
1430
1431   int32_t jip_compacted = brw_inst_jip(devinfo, insn) >> shift;
1432   jip_compacted -= compacted_between(this_old_ip,
1433                                      this_old_ip + (jip_compacted / 2),
1434                                      compacted_counts);
1435   brw_inst_set_jip(devinfo, insn, jip_compacted << shift);
1436
1437   if (brw_inst_opcode(devinfo, insn) == BRW_OPCODE_ENDIF ||
1438       brw_inst_opcode(devinfo, insn) == BRW_OPCODE_WHILE ||
1439       (brw_inst_opcode(devinfo, insn) == BRW_OPCODE_ELSE && devinfo->gen <= 7))
1440      return;
1441
1442   int32_t uip_compacted = brw_inst_uip(devinfo, insn) >> shift;
1443   uip_compacted -= compacted_between(this_old_ip,
1444                                      this_old_ip + (uip_compacted / 2),
1445                                      compacted_counts);
1446   brw_inst_set_uip(devinfo, insn, uip_compacted << shift);
1447}
1448
1449static void
1450update_gen4_jump_count(const struct gen_device_info *devinfo, brw_inst *insn,
1451                       int this_old_ip, int *compacted_counts)
1452{
1453   assert(devinfo->gen == 5 || devinfo->is_g4x);
1454
1455   /* Jump Count is in units of:
1456    *    - uncompacted instructions on G45; and
1457    *    - compacted instructions on Gen5.
1458    */
1459   int shift = devinfo->is_g4x ? 1 : 0;
1460
1461   int jump_count_compacted = brw_inst_gen4_jump_count(devinfo, insn) << shift;
1462
1463   int target_old_ip = this_old_ip + (jump_count_compacted / 2);
1464
1465   int this_compacted_count = compacted_counts[this_old_ip];
1466   int target_compacted_count = compacted_counts[target_old_ip];
1467
1468   jump_count_compacted -= (target_compacted_count - this_compacted_count);
1469   brw_inst_set_gen4_jump_count(devinfo, insn, jump_count_compacted >> shift);
1470}
1471
1472void
1473brw_init_compaction_tables(const struct gen_device_info *devinfo)
1474{
1475   assert(g45_control_index_table[ARRAY_SIZE(g45_control_index_table) - 1] != 0);
1476   assert(g45_datatype_table[ARRAY_SIZE(g45_datatype_table) - 1] != 0);
1477   assert(g45_subreg_table[ARRAY_SIZE(g45_subreg_table) - 1] != 0);
1478   assert(g45_src_index_table[ARRAY_SIZE(g45_src_index_table) - 1] != 0);
1479   assert(gen6_control_index_table[ARRAY_SIZE(gen6_control_index_table) - 1] != 0);
1480   assert(gen6_datatype_table[ARRAY_SIZE(gen6_datatype_table) - 1] != 0);
1481   assert(gen6_subreg_table[ARRAY_SIZE(gen6_subreg_table) - 1] != 0);
1482   assert(gen6_src_index_table[ARRAY_SIZE(gen6_src_index_table) - 1] != 0);
1483   assert(gen7_control_index_table[ARRAY_SIZE(gen7_control_index_table) - 1] != 0);
1484   assert(gen7_datatype_table[ARRAY_SIZE(gen7_datatype_table) - 1] != 0);
1485   assert(gen7_subreg_table[ARRAY_SIZE(gen7_subreg_table) - 1] != 0);
1486   assert(gen7_src_index_table[ARRAY_SIZE(gen7_src_index_table) - 1] != 0);
1487   assert(gen8_control_index_table[ARRAY_SIZE(gen8_control_index_table) - 1] != 0);
1488   assert(gen8_datatype_table[ARRAY_SIZE(gen8_datatype_table) - 1] != 0);
1489   assert(gen8_subreg_table[ARRAY_SIZE(gen8_subreg_table) - 1] != 0);
1490   assert(gen8_src_index_table[ARRAY_SIZE(gen8_src_index_table) - 1] != 0);
1491   assert(gen11_datatype_table[ARRAY_SIZE(gen11_datatype_table) - 1] != 0);
1492
1493   switch (devinfo->gen) {
1494   case 11:
1495      control_index_table = gen8_control_index_table;
1496      datatype_table = gen11_datatype_table;
1497      subreg_table = gen8_subreg_table;
1498      src_index_table = gen8_src_index_table;
1499      break;
1500   case 10:
1501   case 9:
1502   case 8:
1503      control_index_table = gen8_control_index_table;
1504      datatype_table = gen8_datatype_table;
1505      subreg_table = gen8_subreg_table;
1506      src_index_table = gen8_src_index_table;
1507      break;
1508   case 7:
1509      control_index_table = gen7_control_index_table;
1510      datatype_table = gen7_datatype_table;
1511      subreg_table = gen7_subreg_table;
1512      src_index_table = gen7_src_index_table;
1513      break;
1514   case 6:
1515      control_index_table = gen6_control_index_table;
1516      datatype_table = gen6_datatype_table;
1517      subreg_table = gen6_subreg_table;
1518      src_index_table = gen6_src_index_table;
1519      break;
1520   case 5:
1521   case 4:
1522      control_index_table = g45_control_index_table;
1523      datatype_table = g45_datatype_table;
1524      subreg_table = g45_subreg_table;
1525      src_index_table = g45_src_index_table;
1526      break;
1527   default:
1528      unreachable("unknown generation");
1529   }
1530}
1531
1532void
1533brw_compact_instructions(struct brw_codegen *p, int start_offset,
1534                         struct disasm_info *disasm)
1535{
1536   if (unlikely(INTEL_DEBUG & DEBUG_NO_COMPACTION))
1537      return;
1538
1539   const struct gen_device_info *devinfo = p->devinfo;
1540   void *store = p->store + start_offset / 16;
1541   /* For an instruction at byte offset 16*i before compaction, this is the
1542    * number of compacted instructions minus the number of padding NOP/NENOPs
1543    * that preceded it.
1544    */
1545   int compacted_counts[(p->next_insn_offset - start_offset) / sizeof(brw_inst)];
1546   /* For an instruction at byte offset 8*i after compaction, this was its IP
1547    * (in 16-byte units) before compaction.
1548    */
1549   int old_ip[(p->next_insn_offset - start_offset) / sizeof(brw_compact_inst) + 1];
1550
1551   if (devinfo->gen == 4 && !devinfo->is_g4x)
1552      return;
1553
1554   int offset = 0;
1555   int compacted_count = 0;
1556   for (int src_offset = 0; src_offset < p->next_insn_offset - start_offset;
1557        src_offset += sizeof(brw_inst)) {
1558      brw_inst *src = store + src_offset;
1559      void *dst = store + offset;
1560
1561      old_ip[offset / sizeof(brw_compact_inst)] = src_offset / sizeof(brw_inst);
1562      compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count;
1563
1564      brw_inst inst = precompact(devinfo, *src);
1565      brw_inst saved = inst;
1566
1567      if (brw_try_compact_instruction(devinfo, dst, &inst)) {
1568         compacted_count++;
1569
1570         if (INTEL_DEBUG) {
1571            brw_inst uncompacted;
1572            brw_uncompact_instruction(devinfo, &uncompacted, dst);
1573            if (memcmp(&saved, &uncompacted, sizeof(uncompacted))) {
1574               brw_debug_compact_uncompact(devinfo, &saved, &uncompacted);
1575            }
1576         }
1577
1578         offset += sizeof(brw_compact_inst);
1579      } else {
1580         /* All uncompacted instructions need to be aligned on G45. */
1581         if ((offset & sizeof(brw_compact_inst)) != 0 && devinfo->is_g4x){
1582            brw_compact_inst *align = store + offset;
1583            memset(align, 0, sizeof(*align));
1584            brw_compact_inst_set_opcode(devinfo, align, BRW_OPCODE_NENOP);
1585            brw_compact_inst_set_cmpt_control(devinfo, align, true);
1586            offset += sizeof(brw_compact_inst);
1587            compacted_count--;
1588            compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count;
1589            old_ip[offset / sizeof(brw_compact_inst)] = src_offset / sizeof(brw_inst);
1590
1591            dst = store + offset;
1592         }
1593
1594         /* If we didn't compact this intruction, we need to move it down into
1595          * place.
1596          */
1597         if (offset != src_offset) {
1598            memmove(dst, src, sizeof(brw_inst));
1599         }
1600         offset += sizeof(brw_inst);
1601      }
1602   }
1603
1604   /* Add an entry for the ending offset of the program. This greatly
1605    * simplifies the linked list walk at the end of the function.
1606    */
1607   old_ip[offset / sizeof(brw_compact_inst)] =
1608      (p->next_insn_offset - start_offset) / sizeof(brw_inst);
1609
1610   /* Fix up control flow offsets. */
1611   p->next_insn_offset = start_offset + offset;
1612   for (offset = 0; offset < p->next_insn_offset - start_offset;
1613        offset = next_offset(devinfo, store, offset)) {
1614      brw_inst *insn = store + offset;
1615      int this_old_ip = old_ip[offset / sizeof(brw_compact_inst)];
1616      int this_compacted_count = compacted_counts[this_old_ip];
1617
1618      switch (brw_inst_opcode(devinfo, insn)) {
1619      case BRW_OPCODE_BREAK:
1620      case BRW_OPCODE_CONTINUE:
1621      case BRW_OPCODE_HALT:
1622         if (devinfo->gen >= 6) {
1623            update_uip_jip(devinfo, insn, this_old_ip, compacted_counts);
1624         } else {
1625            update_gen4_jump_count(devinfo, insn, this_old_ip,
1626                                   compacted_counts);
1627         }
1628         break;
1629
1630      case BRW_OPCODE_IF:
1631      case BRW_OPCODE_IFF:
1632      case BRW_OPCODE_ELSE:
1633      case BRW_OPCODE_ENDIF:
1634      case BRW_OPCODE_WHILE:
1635         if (devinfo->gen >= 7) {
1636            if (brw_inst_cmpt_control(devinfo, insn)) {
1637               brw_inst uncompacted;
1638               brw_uncompact_instruction(devinfo, &uncompacted,
1639                                         (brw_compact_inst *)insn);
1640
1641               update_uip_jip(devinfo, &uncompacted, this_old_ip,
1642                              compacted_counts);
1643
1644               bool ret = brw_try_compact_instruction(devinfo,
1645                                                      (brw_compact_inst *)insn,
1646                                                      &uncompacted);
1647               assert(ret); (void)ret;
1648            } else {
1649               update_uip_jip(devinfo, insn, this_old_ip, compacted_counts);
1650            }
1651         } else if (devinfo->gen == 6) {
1652            assert(!brw_inst_cmpt_control(devinfo, insn));
1653
1654            /* Jump Count is in units of compacted instructions on Gen6. */
1655            int jump_count_compacted = brw_inst_gen6_jump_count(devinfo, insn);
1656
1657            int target_old_ip = this_old_ip + (jump_count_compacted / 2);
1658            int target_compacted_count = compacted_counts[target_old_ip];
1659            jump_count_compacted -= (target_compacted_count - this_compacted_count);
1660            brw_inst_set_gen6_jump_count(devinfo, insn, jump_count_compacted);
1661         } else {
1662            update_gen4_jump_count(devinfo, insn, this_old_ip,
1663                                   compacted_counts);
1664         }
1665         break;
1666
1667      case BRW_OPCODE_ADD:
1668         /* Add instructions modifying the IP register use an immediate src1,
1669          * and Gens that use this cannot compact instructions with immediate
1670          * operands.
1671          */
1672         if (brw_inst_cmpt_control(devinfo, insn))
1673            break;
1674
1675         if (brw_inst_dst_reg_file(devinfo, insn) == BRW_ARCHITECTURE_REGISTER_FILE &&
1676             brw_inst_dst_da_reg_nr(devinfo, insn) == BRW_ARF_IP) {
1677            assert(brw_inst_src1_reg_file(devinfo, insn) == BRW_IMMEDIATE_VALUE);
1678
1679            int shift = 3;
1680            int jump_compacted = brw_inst_imm_d(devinfo, insn) >> shift;
1681
1682            int target_old_ip = this_old_ip + (jump_compacted / 2);
1683            int target_compacted_count = compacted_counts[target_old_ip];
1684            jump_compacted -= (target_compacted_count - this_compacted_count);
1685            brw_inst_set_imm_ud(devinfo, insn, jump_compacted << shift);
1686         }
1687         break;
1688      }
1689   }
1690
1691   /* p->nr_insn is counting the number of uncompacted instructions still, so
1692    * divide.  We do want to be sure there's a valid instruction in any
1693    * alignment padding, so that the next compression pass (for the FS 8/16
1694    * compile passes) parses correctly.
1695    */
1696   if (p->next_insn_offset & sizeof(brw_compact_inst)) {
1697      brw_compact_inst *align = store + offset;
1698      memset(align, 0, sizeof(*align));
1699      brw_compact_inst_set_opcode(devinfo, align, BRW_OPCODE_NOP);
1700      brw_compact_inst_set_cmpt_control(devinfo, align, true);
1701      p->next_insn_offset += sizeof(brw_compact_inst);
1702   }
1703   p->nr_insn = p->next_insn_offset / sizeof(brw_inst);
1704
1705   /* Update the instruction offsets for each group. */
1706   if (disasm) {
1707      int offset = 0;
1708
1709      foreach_list_typed(struct inst_group, group, link, &disasm->group_list) {
1710         while (start_offset + old_ip[offset / sizeof(brw_compact_inst)] *
1711                sizeof(brw_inst) != group->offset) {
1712            assert(start_offset + old_ip[offset / sizeof(brw_compact_inst)] *
1713                   sizeof(brw_inst) < group->offset);
1714            offset = next_offset(devinfo, store, offset);
1715         }
1716
1717         group->offset = start_offset + offset;
1718
1719         offset = next_offset(devinfo, store, offset);
1720      }
1721   }
1722}
1723