1/*
2 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28  * Authors:
29  *   Keith Whitwell <keithw@vmware.com>
30  */
31
32/** @file brw_reg.h
33 *
34 * This file defines struct brw_reg, which is our representation for EU
35 * registers.  They're not a hardware specific format, just an abstraction
36 * that intends to capture the full flexibility of the hardware registers.
37 *
38 * The brw_eu_emit.c layer's brw_set_dest/brw_set_src[01] functions encode
39 * the abstract brw_reg type into the actual hardware instruction encoding.
40 */
41
42#ifndef BRW_REG_H
43#define BRW_REG_H
44
45#include <stdbool.h>
46#include "main/compiler.h"
47#include "main/macros.h"
48#include "program/prog_instruction.h"
49#include "brw_eu_defines.h"
50#include "brw_reg_type.h"
51
52#ifdef __cplusplus
53extern "C" {
54#endif
55
56struct gen_device_info;
57
58/** Number of general purpose registers (VS, WM, etc) */
59#define BRW_MAX_GRF 128
60
61/**
62 * First GRF used for the MRF hack.
63 *
64 * On gen7, MRFs are no longer used, and contiguous GRFs are used instead.  We
65 * haven't converted our compiler to be aware of this, so it asks for MRFs and
66 * brw_eu_emit.c quietly converts them to be accesses of the top GRFs.  The
67 * register allocators have to be careful of this to avoid corrupting the "MRF"s
68 * with actual GRF allocations.
69 */
70#define GEN7_MRF_HACK_START 112
71
72/** Number of message register file registers */
73#define BRW_MAX_MRF(gen) (gen == 6 ? 24 : 16)
74
75#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
76#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
77
78#define BRW_SWIZZLE_NOOP      BRW_SWIZZLE4(0,1,2,3)
79#define BRW_SWIZZLE_XYZW      BRW_SWIZZLE4(0,1,2,3)
80#define BRW_SWIZZLE_XXXX      BRW_SWIZZLE4(0,0,0,0)
81#define BRW_SWIZZLE_YYYY      BRW_SWIZZLE4(1,1,1,1)
82#define BRW_SWIZZLE_ZZZZ      BRW_SWIZZLE4(2,2,2,2)
83#define BRW_SWIZZLE_WWWW      BRW_SWIZZLE4(3,3,3,3)
84#define BRW_SWIZZLE_XYXY      BRW_SWIZZLE4(0,1,0,1)
85#define BRW_SWIZZLE_YXYX      BRW_SWIZZLE4(1,0,1,0)
86#define BRW_SWIZZLE_XZXZ      BRW_SWIZZLE4(0,2,0,2)
87#define BRW_SWIZZLE_YZXW      BRW_SWIZZLE4(1,2,0,3)
88#define BRW_SWIZZLE_YWYW      BRW_SWIZZLE4(1,3,1,3)
89#define BRW_SWIZZLE_ZXYW      BRW_SWIZZLE4(2,0,1,3)
90#define BRW_SWIZZLE_ZWZW      BRW_SWIZZLE4(2,3,2,3)
91#define BRW_SWIZZLE_WZWZ      BRW_SWIZZLE4(3,2,3,2)
92#define BRW_SWIZZLE_WZYX      BRW_SWIZZLE4(3,2,1,0)
93#define BRW_SWIZZLE_XXZZ      BRW_SWIZZLE4(0,0,2,2)
94#define BRW_SWIZZLE_YYWW      BRW_SWIZZLE4(1,1,3,3)
95#define BRW_SWIZZLE_YXWZ      BRW_SWIZZLE4(1,0,3,2)
96
97#define BRW_SWZ_COMP_INPUT(comp) (BRW_SWIZZLE_XYZW >> ((comp)*2))
98#define BRW_SWZ_COMP_OUTPUT(comp) (BRW_SWIZZLE_XYZW << ((comp)*2))
99
100static inline bool
101brw_is_single_value_swizzle(unsigned swiz)
102{
103   return (swiz == BRW_SWIZZLE_XXXX ||
104           swiz == BRW_SWIZZLE_YYYY ||
105           swiz == BRW_SWIZZLE_ZZZZ ||
106           swiz == BRW_SWIZZLE_WWWW);
107}
108
109/**
110 * Compute the swizzle obtained from the application of \p swz0 on the result
111 * of \p swz1.  The argument ordering is expected to match function
112 * composition.
113 */
114static inline unsigned
115brw_compose_swizzle(unsigned swz0, unsigned swz1)
116{
117   return BRW_SWIZZLE4(
118      BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 0)),
119      BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 1)),
120      BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 2)),
121      BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 3)));
122}
123
124/**
125 * Return the result of applying swizzle \p swz to shuffle the bits of \p mask
126 * (AKA image).
127 */
128static inline unsigned
129brw_apply_swizzle_to_mask(unsigned swz, unsigned mask)
130{
131   unsigned result = 0;
132
133   for (unsigned i = 0; i < 4; i++) {
134      if (mask & (1 << BRW_GET_SWZ(swz, i)))
135         result |= 1 << i;
136   }
137
138   return result;
139}
140
141/**
142 * Return the result of applying the inverse of swizzle \p swz to shuffle the
143 * bits of \p mask (AKA preimage).  Useful to find out which components are
144 * read from a swizzled source given the instruction writemask.
145 */
146static inline unsigned
147brw_apply_inv_swizzle_to_mask(unsigned swz, unsigned mask)
148{
149   unsigned result = 0;
150
151   for (unsigned i = 0; i < 4; i++) {
152      if (mask & (1 << i))
153         result |= 1 << BRW_GET_SWZ(swz, i);
154   }
155
156   return result;
157}
158
159/**
160 * Construct an identity swizzle for the set of enabled channels given by \p
161 * mask.  The result will only reference channels enabled in the provided \p
162 * mask, assuming that \p mask is non-zero.  The constructed swizzle will
163 * satisfy the property that for any instruction OP and any mask:
164 *
165 *    brw_OP(p, brw_writemask(dst, mask),
166 *           brw_swizzle(src, brw_swizzle_for_mask(mask)));
167 *
168 * will be equivalent to the same instruction without swizzle:
169 *
170 *    brw_OP(p, brw_writemask(dst, mask), src);
171 */
172static inline unsigned
173brw_swizzle_for_mask(unsigned mask)
174{
175   unsigned last = (mask ? ffs(mask) - 1 : 0);
176   unsigned swz[4];
177
178   for (unsigned i = 0; i < 4; i++)
179      last = swz[i] = (mask & (1 << i) ? i : last);
180
181   return BRW_SWIZZLE4(swz[0], swz[1], swz[2], swz[3]);
182}
183
184/**
185 * Construct an identity swizzle for the first \p n components of a vector.
186 * When only a subset of channels of a vec4 are used we don't want to
187 * reference the other channels, as that will tell optimization passes that
188 * those other channels are used.
189 */
190static inline unsigned
191brw_swizzle_for_size(unsigned n)
192{
193   return brw_swizzle_for_mask((1 << n) - 1);
194}
195
196/**
197 * Converse of brw_swizzle_for_mask().  Returns the mask of components
198 * accessed by the specified swizzle \p swz.
199 */
200static inline unsigned
201brw_mask_for_swizzle(unsigned swz)
202{
203   return brw_apply_inv_swizzle_to_mask(swz, ~0);
204}
205
206uint32_t brw_swizzle_immediate(enum brw_reg_type type, uint32_t x, unsigned swz);
207
208#define REG_SIZE (8*4)
209
210/* These aren't hardware structs, just something useful for us to pass around:
211 *
212 * Align1 operation has a lot of control over input ranges.  Used in
213 * WM programs to implement shaders decomposed into "channel serial"
214 * or "structure of array" form:
215 */
216struct brw_reg {
217   union {
218      struct {
219         enum brw_reg_type type:4;
220         enum brw_reg_file file:3;      /* :2 hardware format */
221         unsigned negate:1;             /* source only */
222         unsigned abs:1;                /* source only */
223         unsigned address_mode:1;       /* relative addressing, hopefully! */
224         unsigned pad0:17;
225         unsigned subnr:5;              /* :1 in align16 */
226      };
227      uint32_t bits;
228   };
229
230   union {
231      struct {
232         unsigned nr;
233         unsigned swizzle:8;      /* src only, align16 only */
234         unsigned writemask:4;    /* dest only, align16 only */
235         int  indirect_offset:10; /* relative addressing offset */
236         unsigned vstride:4;      /* source only */
237         unsigned width:3;        /* src only, align1 only */
238         unsigned hstride:2;      /* align1 only */
239         unsigned pad1:1;
240      };
241
242      double df;
243      uint64_t u64;
244      int64_t d64;
245      float f;
246      int   d;
247      unsigned ud;
248   };
249};
250
251static inline bool
252brw_regs_equal(const struct brw_reg *a, const struct brw_reg *b)
253{
254   return a->bits == b->bits && a->u64 == b->u64;
255}
256
257static inline bool
258brw_regs_negative_equal(const struct brw_reg *a, const struct brw_reg *b)
259{
260   if (a->file == IMM) {
261      if (a->bits != b->bits)
262         return false;
263
264      switch ((enum brw_reg_type) a->type) {
265      case BRW_REGISTER_TYPE_UQ:
266      case BRW_REGISTER_TYPE_Q:
267         return a->d64 == -b->d64;
268      case BRW_REGISTER_TYPE_DF:
269         return a->df == -b->df;
270      case BRW_REGISTER_TYPE_UD:
271      case BRW_REGISTER_TYPE_D:
272         return a->d == -b->d;
273      case BRW_REGISTER_TYPE_F:
274         return a->f == -b->f;
275      case BRW_REGISTER_TYPE_VF:
276         /* It is tempting to treat 0 as a negation of 0 (and -0 as a negation
277          * of -0).  There are occasions where 0 or -0 is used and the exact
278          * bit pattern is desired.  At the very least, changing this to allow
279          * 0 as a negation of 0 causes some fp64 tests to fail on IVB.
280          */
281         return a->ud == (b->ud ^ 0x80808080);
282      case BRW_REGISTER_TYPE_UW:
283      case BRW_REGISTER_TYPE_W:
284      case BRW_REGISTER_TYPE_UV:
285      case BRW_REGISTER_TYPE_V:
286      case BRW_REGISTER_TYPE_HF:
287         /* FINISHME: Implement support for these types once there is
288          * something in the compiler that can generate them.  Until then,
289          * they cannot be tested.
290          */
291         return false;
292      case BRW_REGISTER_TYPE_UB:
293      case BRW_REGISTER_TYPE_B:
294      case BRW_REGISTER_TYPE_NF:
295      default:
296         unreachable("not reached");
297      }
298   } else {
299      struct brw_reg tmp = *a;
300
301      tmp.negate = !tmp.negate;
302
303      return brw_regs_equal(&tmp, b);
304   }
305}
306
307struct brw_indirect {
308   unsigned addr_subnr:4;
309   int addr_offset:10;
310   unsigned pad:18;
311};
312
313
314static inline unsigned
315type_sz(unsigned type)
316{
317   switch(type) {
318   case BRW_REGISTER_TYPE_UQ:
319   case BRW_REGISTER_TYPE_Q:
320   case BRW_REGISTER_TYPE_DF:
321      return 8;
322   case BRW_REGISTER_TYPE_UD:
323   case BRW_REGISTER_TYPE_D:
324   case BRW_REGISTER_TYPE_F:
325   case BRW_REGISTER_TYPE_VF:
326      return 4;
327   case BRW_REGISTER_TYPE_UW:
328   case BRW_REGISTER_TYPE_W:
329   case BRW_REGISTER_TYPE_UV:
330   case BRW_REGISTER_TYPE_V:
331   case BRW_REGISTER_TYPE_HF:
332      return 2;
333   case BRW_REGISTER_TYPE_UB:
334   case BRW_REGISTER_TYPE_B:
335      return 1;
336   default:
337      unreachable("not reached");
338   }
339}
340
341static inline enum brw_reg_type
342get_exec_type(const enum brw_reg_type type)
343{
344   switch (type) {
345   case BRW_REGISTER_TYPE_B:
346   case BRW_REGISTER_TYPE_V:
347      return BRW_REGISTER_TYPE_W;
348   case BRW_REGISTER_TYPE_UB:
349   case BRW_REGISTER_TYPE_UV:
350      return BRW_REGISTER_TYPE_UW;
351   case BRW_REGISTER_TYPE_VF:
352      return BRW_REGISTER_TYPE_F;
353   default:
354      return type;
355   }
356}
357
358/**
359 * Return an integer type of the requested size and signedness.
360 */
361static inline enum brw_reg_type
362brw_int_type(unsigned sz, bool is_signed)
363{
364   switch (sz) {
365   case 1:
366      return (is_signed ? BRW_REGISTER_TYPE_B : BRW_REGISTER_TYPE_UB);
367   case 2:
368      return (is_signed ? BRW_REGISTER_TYPE_W : BRW_REGISTER_TYPE_UW);
369   case 4:
370      return (is_signed ? BRW_REGISTER_TYPE_D : BRW_REGISTER_TYPE_UD);
371   case 8:
372      return (is_signed ? BRW_REGISTER_TYPE_Q : BRW_REGISTER_TYPE_UQ);
373   default:
374      unreachable("Not reached.");
375   }
376}
377
378static inline bool
379type_is_unsigned_int(enum brw_reg_type tp)
380{
381   return tp == BRW_REGISTER_TYPE_UB ||
382          tp == BRW_REGISTER_TYPE_UW ||
383          tp == BRW_REGISTER_TYPE_UD ||
384          tp == BRW_REGISTER_TYPE_UQ;
385}
386
387/**
388 * Construct a brw_reg.
389 * \param file      one of the BRW_x_REGISTER_FILE values
390 * \param nr        register number/index
391 * \param subnr     register sub number
392 * \param negate    register negate modifier
393 * \param abs       register abs modifier
394 * \param type      one of BRW_REGISTER_TYPE_x
395 * \param vstride   one of BRW_VERTICAL_STRIDE_x
396 * \param width     one of BRW_WIDTH_x
397 * \param hstride   one of BRW_HORIZONTAL_STRIDE_x
398 * \param swizzle   one of BRW_SWIZZLE_x
399 * \param writemask WRITEMASK_X/Y/Z/W bitfield
400 */
401static inline struct brw_reg
402brw_reg(enum brw_reg_file file,
403        unsigned nr,
404        unsigned subnr,
405        unsigned negate,
406        unsigned abs,
407        enum brw_reg_type type,
408        unsigned vstride,
409        unsigned width,
410        unsigned hstride,
411        unsigned swizzle,
412        unsigned writemask)
413{
414   struct brw_reg reg;
415   if (file == BRW_GENERAL_REGISTER_FILE)
416      assert(nr < BRW_MAX_GRF);
417   else if (file == BRW_ARCHITECTURE_REGISTER_FILE)
418      assert(nr <= BRW_ARF_TIMESTAMP);
419   /* Asserting on the MRF register number requires to know the hardware gen
420    * (gen6 has 24 MRF registers), which we don't know here, so we assert
421    * for that in the generators and in brw_eu_emit.c
422    */
423
424   reg.type = type;
425   reg.file = file;
426   reg.negate = negate;
427   reg.abs = abs;
428   reg.address_mode = BRW_ADDRESS_DIRECT;
429   reg.pad0 = 0;
430   reg.subnr = subnr * type_sz(type);
431   reg.nr = nr;
432
433   /* Could do better: If the reg is r5.3<0;1,0>, we probably want to
434    * set swizzle and writemask to W, as the lower bits of subnr will
435    * be lost when converted to align16.  This is probably too much to
436    * keep track of as you'd want it adjusted by suboffset(), etc.
437    * Perhaps fix up when converting to align16?
438    */
439   reg.swizzle = swizzle;
440   reg.writemask = writemask;
441   reg.indirect_offset = 0;
442   reg.vstride = vstride;
443   reg.width = width;
444   reg.hstride = hstride;
445   reg.pad1 = 0;
446   return reg;
447}
448
449/** Construct float[16] register */
450static inline struct brw_reg
451brw_vec16_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
452{
453   return brw_reg(file,
454                  nr,
455                  subnr,
456                  0,
457                  0,
458                  BRW_REGISTER_TYPE_F,
459                  BRW_VERTICAL_STRIDE_16,
460                  BRW_WIDTH_16,
461                  BRW_HORIZONTAL_STRIDE_1,
462                  BRW_SWIZZLE_XYZW,
463                  WRITEMASK_XYZW);
464}
465
466/** Construct float[8] register */
467static inline struct brw_reg
468brw_vec8_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
469{
470   return brw_reg(file,
471                  nr,
472                  subnr,
473                  0,
474                  0,
475                  BRW_REGISTER_TYPE_F,
476                  BRW_VERTICAL_STRIDE_8,
477                  BRW_WIDTH_8,
478                  BRW_HORIZONTAL_STRIDE_1,
479                  BRW_SWIZZLE_XYZW,
480                  WRITEMASK_XYZW);
481}
482
483/** Construct float[4] register */
484static inline struct brw_reg
485brw_vec4_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
486{
487   return brw_reg(file,
488                  nr,
489                  subnr,
490                  0,
491                  0,
492                  BRW_REGISTER_TYPE_F,
493                  BRW_VERTICAL_STRIDE_4,
494                  BRW_WIDTH_4,
495                  BRW_HORIZONTAL_STRIDE_1,
496                  BRW_SWIZZLE_XYZW,
497                  WRITEMASK_XYZW);
498}
499
500/** Construct float[2] register */
501static inline struct brw_reg
502brw_vec2_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
503{
504   return brw_reg(file,
505                  nr,
506                  subnr,
507                  0,
508                  0,
509                  BRW_REGISTER_TYPE_F,
510                  BRW_VERTICAL_STRIDE_2,
511                  BRW_WIDTH_2,
512                  BRW_HORIZONTAL_STRIDE_1,
513                  BRW_SWIZZLE_XYXY,
514                  WRITEMASK_XY);
515}
516
517/** Construct float[1] register */
518static inline struct brw_reg
519brw_vec1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
520{
521   return brw_reg(file,
522                  nr,
523                  subnr,
524                  0,
525                  0,
526                  BRW_REGISTER_TYPE_F,
527                  BRW_VERTICAL_STRIDE_0,
528                  BRW_WIDTH_1,
529                  BRW_HORIZONTAL_STRIDE_0,
530                  BRW_SWIZZLE_XXXX,
531                  WRITEMASK_X);
532}
533
534static inline struct brw_reg
535brw_vecn_reg(unsigned width, enum brw_reg_file file,
536             unsigned nr, unsigned subnr)
537{
538   switch (width) {
539   case 1:
540      return brw_vec1_reg(file, nr, subnr);
541   case 2:
542      return brw_vec2_reg(file, nr, subnr);
543   case 4:
544      return brw_vec4_reg(file, nr, subnr);
545   case 8:
546      return brw_vec8_reg(file, nr, subnr);
547   case 16:
548      return brw_vec16_reg(file, nr, subnr);
549   default:
550      unreachable("Invalid register width");
551   }
552}
553
554static inline struct brw_reg
555retype(struct brw_reg reg, enum brw_reg_type type)
556{
557   reg.type = type;
558   return reg;
559}
560
561static inline struct brw_reg
562firsthalf(struct brw_reg reg)
563{
564   return reg;
565}
566
567static inline struct brw_reg
568sechalf(struct brw_reg reg)
569{
570   if (reg.vstride)
571      reg.nr++;
572   return reg;
573}
574
575static inline struct brw_reg
576offset(struct brw_reg reg, unsigned delta)
577{
578   reg.nr += delta;
579   return reg;
580}
581
582
583static inline struct brw_reg
584byte_offset(struct brw_reg reg, unsigned bytes)
585{
586   unsigned newoffset = reg.nr * REG_SIZE + reg.subnr + bytes;
587   reg.nr = newoffset / REG_SIZE;
588   reg.subnr = newoffset % REG_SIZE;
589   return reg;
590}
591
592static inline struct brw_reg
593suboffset(struct brw_reg reg, unsigned delta)
594{
595   return byte_offset(reg, delta * type_sz(reg.type));
596}
597
598/** Construct unsigned word[16] register */
599static inline struct brw_reg
600brw_uw16_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
601{
602   return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
603}
604
605/** Construct unsigned word[8] register */
606static inline struct brw_reg
607brw_uw8_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
608{
609   return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
610}
611
612/** Construct unsigned word[1] register */
613static inline struct brw_reg
614brw_uw1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
615{
616   return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
617}
618
619static inline struct brw_reg
620brw_ud1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
621{
622   return retype(brw_vec1_reg(file, nr, subnr), BRW_REGISTER_TYPE_UD);
623}
624
625static inline struct brw_reg
626brw_imm_reg(enum brw_reg_type type)
627{
628   return brw_reg(BRW_IMMEDIATE_VALUE,
629                  0,
630                  0,
631                  0,
632                  0,
633                  type,
634                  BRW_VERTICAL_STRIDE_0,
635                  BRW_WIDTH_1,
636                  BRW_HORIZONTAL_STRIDE_0,
637                  0,
638                  0);
639}
640
641/** Construct float immediate register */
642static inline struct brw_reg
643brw_imm_df(double df)
644{
645   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_DF);
646   imm.df = df;
647   return imm;
648}
649
650static inline struct brw_reg
651brw_imm_u64(uint64_t u64)
652{
653   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UQ);
654   imm.u64 = u64;
655   return imm;
656}
657
658static inline struct brw_reg
659brw_imm_f(float f)
660{
661   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F);
662   imm.f = f;
663   return imm;
664}
665
666/** Construct int64_t immediate register */
667static inline struct brw_reg
668brw_imm_q(int64_t q)
669{
670   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_Q);
671   imm.d64 = q;
672   return imm;
673}
674
675/** Construct int64_t immediate register */
676static inline struct brw_reg
677brw_imm_uq(uint64_t uq)
678{
679   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UQ);
680   imm.u64 = uq;
681   return imm;
682}
683
684/** Construct integer immediate register */
685static inline struct brw_reg
686brw_imm_d(int d)
687{
688   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D);
689   imm.d = d;
690   return imm;
691}
692
693/** Construct uint immediate register */
694static inline struct brw_reg
695brw_imm_ud(unsigned ud)
696{
697   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD);
698   imm.ud = ud;
699   return imm;
700}
701
702/** Construct ushort immediate register */
703static inline struct brw_reg
704brw_imm_uw(uint16_t uw)
705{
706   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW);
707   imm.ud = uw | (uw << 16);
708   return imm;
709}
710
711/** Construct short immediate register */
712static inline struct brw_reg
713brw_imm_w(int16_t w)
714{
715   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W);
716   imm.ud = (uint16_t)w | (uint32_t)(uint16_t)w << 16;
717   return imm;
718}
719
720/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type
721 * numbers alias with _V and _VF below:
722 */
723
724/** Construct vector of eight signed half-byte values */
725static inline struct brw_reg
726brw_imm_v(unsigned v)
727{
728   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V);
729   imm.ud = v;
730   return imm;
731}
732
733/** Construct vector of eight unsigned half-byte values */
734static inline struct brw_reg
735brw_imm_uv(unsigned uv)
736{
737   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UV);
738   imm.ud = uv;
739   return imm;
740}
741
742/** Construct vector of four 8-bit float values */
743static inline struct brw_reg
744brw_imm_vf(unsigned v)
745{
746   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
747   imm.ud = v;
748   return imm;
749}
750
751static inline struct brw_reg
752brw_imm_vf4(unsigned v0, unsigned v1, unsigned v2, unsigned v3)
753{
754   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
755   imm.vstride = BRW_VERTICAL_STRIDE_0;
756   imm.width = BRW_WIDTH_4;
757   imm.hstride = BRW_HORIZONTAL_STRIDE_1;
758   imm.ud = ((v0 << 0) | (v1 << 8) | (v2 << 16) | (v3 << 24));
759   return imm;
760}
761
762
763static inline struct brw_reg
764brw_address(struct brw_reg reg)
765{
766   return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr);
767}
768
769/** Construct float[1] general-purpose register */
770static inline struct brw_reg
771brw_vec1_grf(unsigned nr, unsigned subnr)
772{
773   return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
774}
775
776/** Construct float[2] general-purpose register */
777static inline struct brw_reg
778brw_vec2_grf(unsigned nr, unsigned subnr)
779{
780   return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
781}
782
783/** Construct float[4] general-purpose register */
784static inline struct brw_reg
785brw_vec4_grf(unsigned nr, unsigned subnr)
786{
787   return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
788}
789
790/** Construct float[8] general-purpose register */
791static inline struct brw_reg
792brw_vec8_grf(unsigned nr, unsigned subnr)
793{
794   return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
795}
796
797/** Construct float[16] general-purpose register */
798static inline struct brw_reg
799brw_vec16_grf(unsigned nr, unsigned subnr)
800{
801   return brw_vec16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
802}
803
804static inline struct brw_reg
805brw_vecn_grf(unsigned width, unsigned nr, unsigned subnr)
806{
807   return brw_vecn_reg(width, BRW_GENERAL_REGISTER_FILE, nr, subnr);
808}
809
810
811static inline struct brw_reg
812brw_uw8_grf(unsigned nr, unsigned subnr)
813{
814   return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
815}
816
817static inline struct brw_reg
818brw_uw16_grf(unsigned nr, unsigned subnr)
819{
820   return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
821}
822
823
824/** Construct null register (usually used for setting condition codes) */
825static inline struct brw_reg
826brw_null_reg(void)
827{
828   return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_NULL, 0);
829}
830
831static inline struct brw_reg
832brw_null_vec(unsigned width)
833{
834   return brw_vecn_reg(width, BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_NULL, 0);
835}
836
837static inline struct brw_reg
838brw_address_reg(unsigned subnr)
839{
840   return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_ADDRESS, subnr);
841}
842
843static inline struct brw_reg
844brw_tdr_reg(void)
845{
846   return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_TDR, 0);
847}
848
849/* If/else instructions break in align16 mode if writemask & swizzle
850 * aren't xyzw.  This goes against the convention for other scalar
851 * regs:
852 */
853static inline struct brw_reg
854brw_ip_reg(void)
855{
856   return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
857                  BRW_ARF_IP,
858                  0,
859                  0,
860                  0,
861                  BRW_REGISTER_TYPE_UD,
862                  BRW_VERTICAL_STRIDE_4, /* ? */
863                  BRW_WIDTH_1,
864                  BRW_HORIZONTAL_STRIDE_0,
865                  BRW_SWIZZLE_XYZW, /* NOTE! */
866                  WRITEMASK_XYZW); /* NOTE! */
867}
868
869static inline struct brw_reg
870brw_notification_reg(void)
871{
872   return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
873                  BRW_ARF_NOTIFICATION_COUNT,
874                  0,
875                  0,
876                  0,
877                  BRW_REGISTER_TYPE_UD,
878                  BRW_VERTICAL_STRIDE_0,
879                  BRW_WIDTH_1,
880                  BRW_HORIZONTAL_STRIDE_0,
881                  BRW_SWIZZLE_XXXX,
882                  WRITEMASK_X);
883}
884
885static inline struct brw_reg
886brw_cr0_reg(unsigned subnr)
887{
888   return brw_ud1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_CONTROL, subnr);
889}
890
891static inline struct brw_reg
892brw_sr0_reg(unsigned subnr)
893{
894   return brw_ud1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_STATE, subnr);
895}
896
897static inline struct brw_reg
898brw_acc_reg(unsigned width)
899{
900   return brw_vecn_reg(width, BRW_ARCHITECTURE_REGISTER_FILE,
901                       BRW_ARF_ACCUMULATOR, 0);
902}
903
904static inline struct brw_reg
905brw_flag_reg(int reg, int subreg)
906{
907   return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
908                      BRW_ARF_FLAG + reg, subreg);
909}
910
911static inline struct brw_reg
912brw_flag_subreg(unsigned subreg)
913{
914   return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
915                      BRW_ARF_FLAG + subreg / 2, subreg % 2);
916}
917
918/**
919 * Return the mask register present in Gen4-5, or the related register present
920 * in Gen7.5 and later hardware referred to as "channel enable" register in
921 * the documentation.
922 */
923static inline struct brw_reg
924brw_mask_reg(unsigned subnr)
925{
926   return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_MASK, subnr);
927}
928
929static inline struct brw_reg
930brw_vmask_reg()
931{
932   return brw_sr0_reg(3);
933}
934
935static inline struct brw_reg
936brw_dmask_reg()
937{
938   return brw_sr0_reg(2);
939}
940
941static inline struct brw_reg
942brw_message_reg(unsigned nr)
943{
944   return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, 0);
945}
946
947static inline struct brw_reg
948brw_uvec_mrf(unsigned width, unsigned nr, unsigned subnr)
949{
950   return retype(brw_vecn_reg(width, BRW_MESSAGE_REGISTER_FILE, nr, subnr),
951                 BRW_REGISTER_TYPE_UD);
952}
953
954/* This is almost always called with a numeric constant argument, so
955 * make things easy to evaluate at compile time:
956 */
957static inline unsigned cvt(unsigned val)
958{
959   switch (val) {
960   case 0: return 0;
961   case 1: return 1;
962   case 2: return 2;
963   case 4: return 3;
964   case 8: return 4;
965   case 16: return 5;
966   case 32: return 6;
967   }
968   return 0;
969}
970
971static inline struct brw_reg
972stride(struct brw_reg reg, unsigned vstride, unsigned width, unsigned hstride)
973{
974   reg.vstride = cvt(vstride);
975   reg.width = cvt(width) - 1;
976   reg.hstride = cvt(hstride);
977   return reg;
978}
979
980/**
981 * Multiply the vertical and horizontal stride of a register by the given
982 * factor \a s.
983 */
984static inline struct brw_reg
985spread(struct brw_reg reg, unsigned s)
986{
987   if (s) {
988      assert(_mesa_is_pow_two(s));
989
990      if (reg.hstride)
991         reg.hstride += cvt(s) - 1;
992
993      if (reg.vstride)
994         reg.vstride += cvt(s) - 1;
995
996      return reg;
997   } else {
998      return stride(reg, 0, 1, 0);
999   }
1000}
1001
1002/**
1003 * Reinterpret each channel of register \p reg as a vector of values of the
1004 * given smaller type and take the i-th subcomponent from each.
1005 */
1006static inline struct brw_reg
1007subscript(struct brw_reg reg, enum brw_reg_type type, unsigned i)
1008{
1009   if (reg.file == IMM)
1010      return reg;
1011
1012   unsigned scale = type_sz(reg.type) / type_sz(type);
1013   assert(scale >= 1 && i < scale);
1014
1015   return suboffset(retype(spread(reg, scale), type), i);
1016}
1017
1018static inline struct brw_reg
1019vec16(struct brw_reg reg)
1020{
1021   return stride(reg, 16,16,1);
1022}
1023
1024static inline struct brw_reg
1025vec8(struct brw_reg reg)
1026{
1027   return stride(reg, 8,8,1);
1028}
1029
1030static inline struct brw_reg
1031vec4(struct brw_reg reg)
1032{
1033   return stride(reg, 4,4,1);
1034}
1035
1036static inline struct brw_reg
1037vec2(struct brw_reg reg)
1038{
1039   return stride(reg, 2,2,1);
1040}
1041
1042static inline struct brw_reg
1043vec1(struct brw_reg reg)
1044{
1045   return stride(reg, 0,1,0);
1046}
1047
1048
1049static inline struct brw_reg
1050get_element(struct brw_reg reg, unsigned elt)
1051{
1052   return vec1(suboffset(reg, elt));
1053}
1054
1055static inline struct brw_reg
1056get_element_ud(struct brw_reg reg, unsigned elt)
1057{
1058   return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_UD), elt));
1059}
1060
1061static inline struct brw_reg
1062get_element_d(struct brw_reg reg, unsigned elt)
1063{
1064   return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_D), elt));
1065}
1066
1067static inline struct brw_reg
1068brw_swizzle(struct brw_reg reg, unsigned swz)
1069{
1070   if (reg.file == BRW_IMMEDIATE_VALUE)
1071      reg.ud = brw_swizzle_immediate(reg.type, reg.ud, swz);
1072   else
1073      reg.swizzle = brw_compose_swizzle(swz, reg.swizzle);
1074
1075   return reg;
1076}
1077
1078static inline struct brw_reg
1079brw_writemask(struct brw_reg reg, unsigned mask)
1080{
1081   assert(reg.file != BRW_IMMEDIATE_VALUE);
1082   reg.writemask &= mask;
1083   return reg;
1084}
1085
1086static inline struct brw_reg
1087brw_set_writemask(struct brw_reg reg, unsigned mask)
1088{
1089   assert(reg.file != BRW_IMMEDIATE_VALUE);
1090   reg.writemask = mask;
1091   return reg;
1092}
1093
1094static inline unsigned
1095brw_writemask_for_size(unsigned n)
1096{
1097   return (1 << n) - 1;
1098}
1099
1100static inline unsigned
1101brw_writemask_for_component_packing(unsigned n, unsigned first_component)
1102{
1103   assert(first_component + n <= 4);
1104   return (((1 << n) - 1) << first_component);
1105}
1106
1107static inline struct brw_reg
1108negate(struct brw_reg reg)
1109{
1110   reg.negate ^= 1;
1111   return reg;
1112}
1113
1114static inline struct brw_reg
1115brw_abs(struct brw_reg reg)
1116{
1117   reg.abs = 1;
1118   reg.negate = 0;
1119   return reg;
1120}
1121
1122/************************************************************************/
1123
1124static inline struct brw_reg
1125brw_vec4_indirect(unsigned subnr, int offset)
1126{
1127   struct brw_reg reg =  brw_vec4_grf(0, 0);
1128   reg.subnr = subnr;
1129   reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
1130   reg.indirect_offset = offset;
1131   return reg;
1132}
1133
1134static inline struct brw_reg
1135brw_vec1_indirect(unsigned subnr, int offset)
1136{
1137   struct brw_reg reg =  brw_vec1_grf(0, 0);
1138   reg.subnr = subnr;
1139   reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
1140   reg.indirect_offset = offset;
1141   return reg;
1142}
1143
1144static inline struct brw_reg
1145brw_VxH_indirect(unsigned subnr, int offset)
1146{
1147   struct brw_reg reg = brw_vec1_grf(0, 0);
1148   reg.vstride = BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL;
1149   reg.subnr = subnr;
1150   reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
1151   reg.indirect_offset = offset;
1152   return reg;
1153}
1154
1155static inline struct brw_reg
1156deref_4f(struct brw_indirect ptr, int offset)
1157{
1158   return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
1159}
1160
1161static inline struct brw_reg
1162deref_1f(struct brw_indirect ptr, int offset)
1163{
1164   return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
1165}
1166
1167static inline struct brw_reg
1168deref_4b(struct brw_indirect ptr, int offset)
1169{
1170   return retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B);
1171}
1172
1173static inline struct brw_reg
1174deref_1uw(struct brw_indirect ptr, int offset)
1175{
1176   return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW);
1177}
1178
1179static inline struct brw_reg
1180deref_1d(struct brw_indirect ptr, int offset)
1181{
1182   return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_D);
1183}
1184
1185static inline struct brw_reg
1186deref_1ud(struct brw_indirect ptr, int offset)
1187{
1188   return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD);
1189}
1190
1191static inline struct brw_reg
1192get_addr_reg(struct brw_indirect ptr)
1193{
1194   return brw_address_reg(ptr.addr_subnr);
1195}
1196
1197static inline struct brw_indirect
1198brw_indirect_offset(struct brw_indirect ptr, int offset)
1199{
1200   ptr.addr_offset += offset;
1201   return ptr;
1202}
1203
1204static inline struct brw_indirect
1205brw_indirect(unsigned addr_subnr, int offset)
1206{
1207   struct brw_indirect ptr;
1208   ptr.addr_subnr = addr_subnr;
1209   ptr.addr_offset = offset;
1210   ptr.pad = 0;
1211   return ptr;
1212}
1213
1214static inline bool
1215region_matches(struct brw_reg reg, enum brw_vertical_stride v,
1216               enum brw_width w, enum brw_horizontal_stride h)
1217{
1218   return reg.vstride == v &&
1219          reg.width == w &&
1220          reg.hstride == h;
1221}
1222
1223#define has_scalar_region(reg) \
1224   region_matches(reg, BRW_VERTICAL_STRIDE_0, BRW_WIDTH_1, \
1225                  BRW_HORIZONTAL_STRIDE_0)
1226
1227/* brw_packed_float.c */
1228int brw_float_to_vf(float f);
1229float brw_vf_to_float(unsigned char vf);
1230
1231#ifdef __cplusplus
1232}
1233#endif
1234
1235#endif
1236