1/* 2 Copyright (C) Intel Corp. 2006. All Rights Reserved. 3 Intel funded Tungsten Graphics to 4 develop this 3D driver. 5 6 Permission is hereby granted, free of charge, to any person obtaining 7 a copy of this software and associated documentation files (the 8 "Software"), to deal in the Software without restriction, including 9 without limitation the rights to use, copy, modify, merge, publish, 10 distribute, sublicense, and/or sell copies of the Software, and to 11 permit persons to whom the Software is furnished to do so, subject to 12 the following conditions: 13 14 The above copyright notice and this permission notice (including the 15 next paragraph) shall be included in all copies or substantial 16 portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 26 **********************************************************************/ 27 /* 28 * Authors: 29 * Keith Whitwell <keithw@vmware.com> 30 */ 31 32/** @file brw_reg.h 33 * 34 * This file defines struct brw_reg, which is our representation for EU 35 * registers. They're not a hardware specific format, just an abstraction 36 * that intends to capture the full flexibility of the hardware registers. 37 * 38 * The brw_eu_emit.c layer's brw_set_dest/brw_set_src[01] functions encode 39 * the abstract brw_reg type into the actual hardware instruction encoding. 40 */ 41 42#ifndef BRW_REG_H 43#define BRW_REG_H 44 45#include <stdbool.h> 46#include "main/compiler.h" 47#include "main/macros.h" 48#include "program/prog_instruction.h" 49#include "brw_eu_defines.h" 50#include "brw_reg_type.h" 51 52#ifdef __cplusplus 53extern "C" { 54#endif 55 56struct gen_device_info; 57 58/** Number of general purpose registers (VS, WM, etc) */ 59#define BRW_MAX_GRF 128 60 61/** 62 * First GRF used for the MRF hack. 63 * 64 * On gen7, MRFs are no longer used, and contiguous GRFs are used instead. We 65 * haven't converted our compiler to be aware of this, so it asks for MRFs and 66 * brw_eu_emit.c quietly converts them to be accesses of the top GRFs. The 67 * register allocators have to be careful of this to avoid corrupting the "MRF"s 68 * with actual GRF allocations. 69 */ 70#define GEN7_MRF_HACK_START 112 71 72/** Number of message register file registers */ 73#define BRW_MAX_MRF(gen) (gen == 6 ? 24 : 16) 74 75#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6)) 76#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3) 77 78#define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3) 79#define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3) 80#define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0) 81#define BRW_SWIZZLE_YYYY BRW_SWIZZLE4(1,1,1,1) 82#define BRW_SWIZZLE_ZZZZ BRW_SWIZZLE4(2,2,2,2) 83#define BRW_SWIZZLE_WWWW BRW_SWIZZLE4(3,3,3,3) 84#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1) 85#define BRW_SWIZZLE_YXYX BRW_SWIZZLE4(1,0,1,0) 86#define BRW_SWIZZLE_XZXZ BRW_SWIZZLE4(0,2,0,2) 87#define BRW_SWIZZLE_YZXW BRW_SWIZZLE4(1,2,0,3) 88#define BRW_SWIZZLE_YWYW BRW_SWIZZLE4(1,3,1,3) 89#define BRW_SWIZZLE_ZXYW BRW_SWIZZLE4(2,0,1,3) 90#define BRW_SWIZZLE_ZWZW BRW_SWIZZLE4(2,3,2,3) 91#define BRW_SWIZZLE_WZWZ BRW_SWIZZLE4(3,2,3,2) 92#define BRW_SWIZZLE_WZYX BRW_SWIZZLE4(3,2,1,0) 93#define BRW_SWIZZLE_XXZZ BRW_SWIZZLE4(0,0,2,2) 94#define BRW_SWIZZLE_YYWW BRW_SWIZZLE4(1,1,3,3) 95#define BRW_SWIZZLE_YXWZ BRW_SWIZZLE4(1,0,3,2) 96 97#define BRW_SWZ_COMP_INPUT(comp) (BRW_SWIZZLE_XYZW >> ((comp)*2)) 98#define BRW_SWZ_COMP_OUTPUT(comp) (BRW_SWIZZLE_XYZW << ((comp)*2)) 99 100static inline bool 101brw_is_single_value_swizzle(unsigned swiz) 102{ 103 return (swiz == BRW_SWIZZLE_XXXX || 104 swiz == BRW_SWIZZLE_YYYY || 105 swiz == BRW_SWIZZLE_ZZZZ || 106 swiz == BRW_SWIZZLE_WWWW); 107} 108 109/** 110 * Compute the swizzle obtained from the application of \p swz0 on the result 111 * of \p swz1. The argument ordering is expected to match function 112 * composition. 113 */ 114static inline unsigned 115brw_compose_swizzle(unsigned swz0, unsigned swz1) 116{ 117 return BRW_SWIZZLE4( 118 BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 0)), 119 BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 1)), 120 BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 2)), 121 BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 3))); 122} 123 124/** 125 * Return the result of applying swizzle \p swz to shuffle the bits of \p mask 126 * (AKA image). 127 */ 128static inline unsigned 129brw_apply_swizzle_to_mask(unsigned swz, unsigned mask) 130{ 131 unsigned result = 0; 132 133 for (unsigned i = 0; i < 4; i++) { 134 if (mask & (1 << BRW_GET_SWZ(swz, i))) 135 result |= 1 << i; 136 } 137 138 return result; 139} 140 141/** 142 * Return the result of applying the inverse of swizzle \p swz to shuffle the 143 * bits of \p mask (AKA preimage). Useful to find out which components are 144 * read from a swizzled source given the instruction writemask. 145 */ 146static inline unsigned 147brw_apply_inv_swizzle_to_mask(unsigned swz, unsigned mask) 148{ 149 unsigned result = 0; 150 151 for (unsigned i = 0; i < 4; i++) { 152 if (mask & (1 << i)) 153 result |= 1 << BRW_GET_SWZ(swz, i); 154 } 155 156 return result; 157} 158 159/** 160 * Construct an identity swizzle for the set of enabled channels given by \p 161 * mask. The result will only reference channels enabled in the provided \p 162 * mask, assuming that \p mask is non-zero. The constructed swizzle will 163 * satisfy the property that for any instruction OP and any mask: 164 * 165 * brw_OP(p, brw_writemask(dst, mask), 166 * brw_swizzle(src, brw_swizzle_for_mask(mask))); 167 * 168 * will be equivalent to the same instruction without swizzle: 169 * 170 * brw_OP(p, brw_writemask(dst, mask), src); 171 */ 172static inline unsigned 173brw_swizzle_for_mask(unsigned mask) 174{ 175 unsigned last = (mask ? ffs(mask) - 1 : 0); 176 unsigned swz[4]; 177 178 for (unsigned i = 0; i < 4; i++) 179 last = swz[i] = (mask & (1 << i) ? i : last); 180 181 return BRW_SWIZZLE4(swz[0], swz[1], swz[2], swz[3]); 182} 183 184/** 185 * Construct an identity swizzle for the first \p n components of a vector. 186 * When only a subset of channels of a vec4 are used we don't want to 187 * reference the other channels, as that will tell optimization passes that 188 * those other channels are used. 189 */ 190static inline unsigned 191brw_swizzle_for_size(unsigned n) 192{ 193 return brw_swizzle_for_mask((1 << n) - 1); 194} 195 196/** 197 * Converse of brw_swizzle_for_mask(). Returns the mask of components 198 * accessed by the specified swizzle \p swz. 199 */ 200static inline unsigned 201brw_mask_for_swizzle(unsigned swz) 202{ 203 return brw_apply_inv_swizzle_to_mask(swz, ~0); 204} 205 206uint32_t brw_swizzle_immediate(enum brw_reg_type type, uint32_t x, unsigned swz); 207 208#define REG_SIZE (8*4) 209 210/* These aren't hardware structs, just something useful for us to pass around: 211 * 212 * Align1 operation has a lot of control over input ranges. Used in 213 * WM programs to implement shaders decomposed into "channel serial" 214 * or "structure of array" form: 215 */ 216struct brw_reg { 217 union { 218 struct { 219 enum brw_reg_type type:4; 220 enum brw_reg_file file:3; /* :2 hardware format */ 221 unsigned negate:1; /* source only */ 222 unsigned abs:1; /* source only */ 223 unsigned address_mode:1; /* relative addressing, hopefully! */ 224 unsigned pad0:17; 225 unsigned subnr:5; /* :1 in align16 */ 226 }; 227 uint32_t bits; 228 }; 229 230 union { 231 struct { 232 unsigned nr; 233 unsigned swizzle:8; /* src only, align16 only */ 234 unsigned writemask:4; /* dest only, align16 only */ 235 int indirect_offset:10; /* relative addressing offset */ 236 unsigned vstride:4; /* source only */ 237 unsigned width:3; /* src only, align1 only */ 238 unsigned hstride:2; /* align1 only */ 239 unsigned pad1:1; 240 }; 241 242 double df; 243 uint64_t u64; 244 int64_t d64; 245 float f; 246 int d; 247 unsigned ud; 248 }; 249}; 250 251static inline bool 252brw_regs_equal(const struct brw_reg *a, const struct brw_reg *b) 253{ 254 return a->bits == b->bits && a->u64 == b->u64; 255} 256 257static inline bool 258brw_regs_negative_equal(const struct brw_reg *a, const struct brw_reg *b) 259{ 260 if (a->file == IMM) { 261 if (a->bits != b->bits) 262 return false; 263 264 switch ((enum brw_reg_type) a->type) { 265 case BRW_REGISTER_TYPE_UQ: 266 case BRW_REGISTER_TYPE_Q: 267 return a->d64 == -b->d64; 268 case BRW_REGISTER_TYPE_DF: 269 return a->df == -b->df; 270 case BRW_REGISTER_TYPE_UD: 271 case BRW_REGISTER_TYPE_D: 272 return a->d == -b->d; 273 case BRW_REGISTER_TYPE_F: 274 return a->f == -b->f; 275 case BRW_REGISTER_TYPE_VF: 276 /* It is tempting to treat 0 as a negation of 0 (and -0 as a negation 277 * of -0). There are occasions where 0 or -0 is used and the exact 278 * bit pattern is desired. At the very least, changing this to allow 279 * 0 as a negation of 0 causes some fp64 tests to fail on IVB. 280 */ 281 return a->ud == (b->ud ^ 0x80808080); 282 case BRW_REGISTER_TYPE_UW: 283 case BRW_REGISTER_TYPE_W: 284 case BRW_REGISTER_TYPE_UV: 285 case BRW_REGISTER_TYPE_V: 286 case BRW_REGISTER_TYPE_HF: 287 /* FINISHME: Implement support for these types once there is 288 * something in the compiler that can generate them. Until then, 289 * they cannot be tested. 290 */ 291 return false; 292 case BRW_REGISTER_TYPE_UB: 293 case BRW_REGISTER_TYPE_B: 294 case BRW_REGISTER_TYPE_NF: 295 default: 296 unreachable("not reached"); 297 } 298 } else { 299 struct brw_reg tmp = *a; 300 301 tmp.negate = !tmp.negate; 302 303 return brw_regs_equal(&tmp, b); 304 } 305} 306 307struct brw_indirect { 308 unsigned addr_subnr:4; 309 int addr_offset:10; 310 unsigned pad:18; 311}; 312 313 314static inline unsigned 315type_sz(unsigned type) 316{ 317 switch(type) { 318 case BRW_REGISTER_TYPE_UQ: 319 case BRW_REGISTER_TYPE_Q: 320 case BRW_REGISTER_TYPE_DF: 321 return 8; 322 case BRW_REGISTER_TYPE_UD: 323 case BRW_REGISTER_TYPE_D: 324 case BRW_REGISTER_TYPE_F: 325 case BRW_REGISTER_TYPE_VF: 326 return 4; 327 case BRW_REGISTER_TYPE_UW: 328 case BRW_REGISTER_TYPE_W: 329 case BRW_REGISTER_TYPE_UV: 330 case BRW_REGISTER_TYPE_V: 331 case BRW_REGISTER_TYPE_HF: 332 return 2; 333 case BRW_REGISTER_TYPE_UB: 334 case BRW_REGISTER_TYPE_B: 335 return 1; 336 default: 337 unreachable("not reached"); 338 } 339} 340 341static inline enum brw_reg_type 342get_exec_type(const enum brw_reg_type type) 343{ 344 switch (type) { 345 case BRW_REGISTER_TYPE_B: 346 case BRW_REGISTER_TYPE_V: 347 return BRW_REGISTER_TYPE_W; 348 case BRW_REGISTER_TYPE_UB: 349 case BRW_REGISTER_TYPE_UV: 350 return BRW_REGISTER_TYPE_UW; 351 case BRW_REGISTER_TYPE_VF: 352 return BRW_REGISTER_TYPE_F; 353 default: 354 return type; 355 } 356} 357 358/** 359 * Return an integer type of the requested size and signedness. 360 */ 361static inline enum brw_reg_type 362brw_int_type(unsigned sz, bool is_signed) 363{ 364 switch (sz) { 365 case 1: 366 return (is_signed ? BRW_REGISTER_TYPE_B : BRW_REGISTER_TYPE_UB); 367 case 2: 368 return (is_signed ? BRW_REGISTER_TYPE_W : BRW_REGISTER_TYPE_UW); 369 case 4: 370 return (is_signed ? BRW_REGISTER_TYPE_D : BRW_REGISTER_TYPE_UD); 371 case 8: 372 return (is_signed ? BRW_REGISTER_TYPE_Q : BRW_REGISTER_TYPE_UQ); 373 default: 374 unreachable("Not reached."); 375 } 376} 377 378static inline bool 379type_is_unsigned_int(enum brw_reg_type tp) 380{ 381 return tp == BRW_REGISTER_TYPE_UB || 382 tp == BRW_REGISTER_TYPE_UW || 383 tp == BRW_REGISTER_TYPE_UD || 384 tp == BRW_REGISTER_TYPE_UQ; 385} 386 387/** 388 * Construct a brw_reg. 389 * \param file one of the BRW_x_REGISTER_FILE values 390 * \param nr register number/index 391 * \param subnr register sub number 392 * \param negate register negate modifier 393 * \param abs register abs modifier 394 * \param type one of BRW_REGISTER_TYPE_x 395 * \param vstride one of BRW_VERTICAL_STRIDE_x 396 * \param width one of BRW_WIDTH_x 397 * \param hstride one of BRW_HORIZONTAL_STRIDE_x 398 * \param swizzle one of BRW_SWIZZLE_x 399 * \param writemask WRITEMASK_X/Y/Z/W bitfield 400 */ 401static inline struct brw_reg 402brw_reg(enum brw_reg_file file, 403 unsigned nr, 404 unsigned subnr, 405 unsigned negate, 406 unsigned abs, 407 enum brw_reg_type type, 408 unsigned vstride, 409 unsigned width, 410 unsigned hstride, 411 unsigned swizzle, 412 unsigned writemask) 413{ 414 struct brw_reg reg; 415 if (file == BRW_GENERAL_REGISTER_FILE) 416 assert(nr < BRW_MAX_GRF); 417 else if (file == BRW_ARCHITECTURE_REGISTER_FILE) 418 assert(nr <= BRW_ARF_TIMESTAMP); 419 /* Asserting on the MRF register number requires to know the hardware gen 420 * (gen6 has 24 MRF registers), which we don't know here, so we assert 421 * for that in the generators and in brw_eu_emit.c 422 */ 423 424 reg.type = type; 425 reg.file = file; 426 reg.negate = negate; 427 reg.abs = abs; 428 reg.address_mode = BRW_ADDRESS_DIRECT; 429 reg.pad0 = 0; 430 reg.subnr = subnr * type_sz(type); 431 reg.nr = nr; 432 433 /* Could do better: If the reg is r5.3<0;1,0>, we probably want to 434 * set swizzle and writemask to W, as the lower bits of subnr will 435 * be lost when converted to align16. This is probably too much to 436 * keep track of as you'd want it adjusted by suboffset(), etc. 437 * Perhaps fix up when converting to align16? 438 */ 439 reg.swizzle = swizzle; 440 reg.writemask = writemask; 441 reg.indirect_offset = 0; 442 reg.vstride = vstride; 443 reg.width = width; 444 reg.hstride = hstride; 445 reg.pad1 = 0; 446 return reg; 447} 448 449/** Construct float[16] register */ 450static inline struct brw_reg 451brw_vec16_reg(enum brw_reg_file file, unsigned nr, unsigned subnr) 452{ 453 return brw_reg(file, 454 nr, 455 subnr, 456 0, 457 0, 458 BRW_REGISTER_TYPE_F, 459 BRW_VERTICAL_STRIDE_16, 460 BRW_WIDTH_16, 461 BRW_HORIZONTAL_STRIDE_1, 462 BRW_SWIZZLE_XYZW, 463 WRITEMASK_XYZW); 464} 465 466/** Construct float[8] register */ 467static inline struct brw_reg 468brw_vec8_reg(enum brw_reg_file file, unsigned nr, unsigned subnr) 469{ 470 return brw_reg(file, 471 nr, 472 subnr, 473 0, 474 0, 475 BRW_REGISTER_TYPE_F, 476 BRW_VERTICAL_STRIDE_8, 477 BRW_WIDTH_8, 478 BRW_HORIZONTAL_STRIDE_1, 479 BRW_SWIZZLE_XYZW, 480 WRITEMASK_XYZW); 481} 482 483/** Construct float[4] register */ 484static inline struct brw_reg 485brw_vec4_reg(enum brw_reg_file file, unsigned nr, unsigned subnr) 486{ 487 return brw_reg(file, 488 nr, 489 subnr, 490 0, 491 0, 492 BRW_REGISTER_TYPE_F, 493 BRW_VERTICAL_STRIDE_4, 494 BRW_WIDTH_4, 495 BRW_HORIZONTAL_STRIDE_1, 496 BRW_SWIZZLE_XYZW, 497 WRITEMASK_XYZW); 498} 499 500/** Construct float[2] register */ 501static inline struct brw_reg 502brw_vec2_reg(enum brw_reg_file file, unsigned nr, unsigned subnr) 503{ 504 return brw_reg(file, 505 nr, 506 subnr, 507 0, 508 0, 509 BRW_REGISTER_TYPE_F, 510 BRW_VERTICAL_STRIDE_2, 511 BRW_WIDTH_2, 512 BRW_HORIZONTAL_STRIDE_1, 513 BRW_SWIZZLE_XYXY, 514 WRITEMASK_XY); 515} 516 517/** Construct float[1] register */ 518static inline struct brw_reg 519brw_vec1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr) 520{ 521 return brw_reg(file, 522 nr, 523 subnr, 524 0, 525 0, 526 BRW_REGISTER_TYPE_F, 527 BRW_VERTICAL_STRIDE_0, 528 BRW_WIDTH_1, 529 BRW_HORIZONTAL_STRIDE_0, 530 BRW_SWIZZLE_XXXX, 531 WRITEMASK_X); 532} 533 534static inline struct brw_reg 535brw_vecn_reg(unsigned width, enum brw_reg_file file, 536 unsigned nr, unsigned subnr) 537{ 538 switch (width) { 539 case 1: 540 return brw_vec1_reg(file, nr, subnr); 541 case 2: 542 return brw_vec2_reg(file, nr, subnr); 543 case 4: 544 return brw_vec4_reg(file, nr, subnr); 545 case 8: 546 return brw_vec8_reg(file, nr, subnr); 547 case 16: 548 return brw_vec16_reg(file, nr, subnr); 549 default: 550 unreachable("Invalid register width"); 551 } 552} 553 554static inline struct brw_reg 555retype(struct brw_reg reg, enum brw_reg_type type) 556{ 557 reg.type = type; 558 return reg; 559} 560 561static inline struct brw_reg 562firsthalf(struct brw_reg reg) 563{ 564 return reg; 565} 566 567static inline struct brw_reg 568sechalf(struct brw_reg reg) 569{ 570 if (reg.vstride) 571 reg.nr++; 572 return reg; 573} 574 575static inline struct brw_reg 576offset(struct brw_reg reg, unsigned delta) 577{ 578 reg.nr += delta; 579 return reg; 580} 581 582 583static inline struct brw_reg 584byte_offset(struct brw_reg reg, unsigned bytes) 585{ 586 unsigned newoffset = reg.nr * REG_SIZE + reg.subnr + bytes; 587 reg.nr = newoffset / REG_SIZE; 588 reg.subnr = newoffset % REG_SIZE; 589 return reg; 590} 591 592static inline struct brw_reg 593suboffset(struct brw_reg reg, unsigned delta) 594{ 595 return byte_offset(reg, delta * type_sz(reg.type)); 596} 597 598/** Construct unsigned word[16] register */ 599static inline struct brw_reg 600brw_uw16_reg(enum brw_reg_file file, unsigned nr, unsigned subnr) 601{ 602 return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); 603} 604 605/** Construct unsigned word[8] register */ 606static inline struct brw_reg 607brw_uw8_reg(enum brw_reg_file file, unsigned nr, unsigned subnr) 608{ 609 return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); 610} 611 612/** Construct unsigned word[1] register */ 613static inline struct brw_reg 614brw_uw1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr) 615{ 616 return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); 617} 618 619static inline struct brw_reg 620brw_ud1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr) 621{ 622 return retype(brw_vec1_reg(file, nr, subnr), BRW_REGISTER_TYPE_UD); 623} 624 625static inline struct brw_reg 626brw_imm_reg(enum brw_reg_type type) 627{ 628 return brw_reg(BRW_IMMEDIATE_VALUE, 629 0, 630 0, 631 0, 632 0, 633 type, 634 BRW_VERTICAL_STRIDE_0, 635 BRW_WIDTH_1, 636 BRW_HORIZONTAL_STRIDE_0, 637 0, 638 0); 639} 640 641/** Construct float immediate register */ 642static inline struct brw_reg 643brw_imm_df(double df) 644{ 645 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_DF); 646 imm.df = df; 647 return imm; 648} 649 650static inline struct brw_reg 651brw_imm_u64(uint64_t u64) 652{ 653 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UQ); 654 imm.u64 = u64; 655 return imm; 656} 657 658static inline struct brw_reg 659brw_imm_f(float f) 660{ 661 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F); 662 imm.f = f; 663 return imm; 664} 665 666/** Construct int64_t immediate register */ 667static inline struct brw_reg 668brw_imm_q(int64_t q) 669{ 670 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_Q); 671 imm.d64 = q; 672 return imm; 673} 674 675/** Construct int64_t immediate register */ 676static inline struct brw_reg 677brw_imm_uq(uint64_t uq) 678{ 679 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UQ); 680 imm.u64 = uq; 681 return imm; 682} 683 684/** Construct integer immediate register */ 685static inline struct brw_reg 686brw_imm_d(int d) 687{ 688 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D); 689 imm.d = d; 690 return imm; 691} 692 693/** Construct uint immediate register */ 694static inline struct brw_reg 695brw_imm_ud(unsigned ud) 696{ 697 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD); 698 imm.ud = ud; 699 return imm; 700} 701 702/** Construct ushort immediate register */ 703static inline struct brw_reg 704brw_imm_uw(uint16_t uw) 705{ 706 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW); 707 imm.ud = uw | (uw << 16); 708 return imm; 709} 710 711/** Construct short immediate register */ 712static inline struct brw_reg 713brw_imm_w(int16_t w) 714{ 715 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W); 716 imm.ud = (uint16_t)w | (uint32_t)(uint16_t)w << 16; 717 return imm; 718} 719 720/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type 721 * numbers alias with _V and _VF below: 722 */ 723 724/** Construct vector of eight signed half-byte values */ 725static inline struct brw_reg 726brw_imm_v(unsigned v) 727{ 728 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V); 729 imm.ud = v; 730 return imm; 731} 732 733/** Construct vector of eight unsigned half-byte values */ 734static inline struct brw_reg 735brw_imm_uv(unsigned uv) 736{ 737 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UV); 738 imm.ud = uv; 739 return imm; 740} 741 742/** Construct vector of four 8-bit float values */ 743static inline struct brw_reg 744brw_imm_vf(unsigned v) 745{ 746 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); 747 imm.ud = v; 748 return imm; 749} 750 751static inline struct brw_reg 752brw_imm_vf4(unsigned v0, unsigned v1, unsigned v2, unsigned v3) 753{ 754 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); 755 imm.vstride = BRW_VERTICAL_STRIDE_0; 756 imm.width = BRW_WIDTH_4; 757 imm.hstride = BRW_HORIZONTAL_STRIDE_1; 758 imm.ud = ((v0 << 0) | (v1 << 8) | (v2 << 16) | (v3 << 24)); 759 return imm; 760} 761 762 763static inline struct brw_reg 764brw_address(struct brw_reg reg) 765{ 766 return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr); 767} 768 769/** Construct float[1] general-purpose register */ 770static inline struct brw_reg 771brw_vec1_grf(unsigned nr, unsigned subnr) 772{ 773 return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); 774} 775 776/** Construct float[2] general-purpose register */ 777static inline struct brw_reg 778brw_vec2_grf(unsigned nr, unsigned subnr) 779{ 780 return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); 781} 782 783/** Construct float[4] general-purpose register */ 784static inline struct brw_reg 785brw_vec4_grf(unsigned nr, unsigned subnr) 786{ 787 return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); 788} 789 790/** Construct float[8] general-purpose register */ 791static inline struct brw_reg 792brw_vec8_grf(unsigned nr, unsigned subnr) 793{ 794 return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); 795} 796 797/** Construct float[16] general-purpose register */ 798static inline struct brw_reg 799brw_vec16_grf(unsigned nr, unsigned subnr) 800{ 801 return brw_vec16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); 802} 803 804static inline struct brw_reg 805brw_vecn_grf(unsigned width, unsigned nr, unsigned subnr) 806{ 807 return brw_vecn_reg(width, BRW_GENERAL_REGISTER_FILE, nr, subnr); 808} 809 810 811static inline struct brw_reg 812brw_uw8_grf(unsigned nr, unsigned subnr) 813{ 814 return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); 815} 816 817static inline struct brw_reg 818brw_uw16_grf(unsigned nr, unsigned subnr) 819{ 820 return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); 821} 822 823 824/** Construct null register (usually used for setting condition codes) */ 825static inline struct brw_reg 826brw_null_reg(void) 827{ 828 return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_NULL, 0); 829} 830 831static inline struct brw_reg 832brw_null_vec(unsigned width) 833{ 834 return brw_vecn_reg(width, BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_NULL, 0); 835} 836 837static inline struct brw_reg 838brw_address_reg(unsigned subnr) 839{ 840 return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_ADDRESS, subnr); 841} 842 843static inline struct brw_reg 844brw_tdr_reg(void) 845{ 846 return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_TDR, 0); 847} 848 849/* If/else instructions break in align16 mode if writemask & swizzle 850 * aren't xyzw. This goes against the convention for other scalar 851 * regs: 852 */ 853static inline struct brw_reg 854brw_ip_reg(void) 855{ 856 return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, 857 BRW_ARF_IP, 858 0, 859 0, 860 0, 861 BRW_REGISTER_TYPE_UD, 862 BRW_VERTICAL_STRIDE_4, /* ? */ 863 BRW_WIDTH_1, 864 BRW_HORIZONTAL_STRIDE_0, 865 BRW_SWIZZLE_XYZW, /* NOTE! */ 866 WRITEMASK_XYZW); /* NOTE! */ 867} 868 869static inline struct brw_reg 870brw_notification_reg(void) 871{ 872 return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, 873 BRW_ARF_NOTIFICATION_COUNT, 874 0, 875 0, 876 0, 877 BRW_REGISTER_TYPE_UD, 878 BRW_VERTICAL_STRIDE_0, 879 BRW_WIDTH_1, 880 BRW_HORIZONTAL_STRIDE_0, 881 BRW_SWIZZLE_XXXX, 882 WRITEMASK_X); 883} 884 885static inline struct brw_reg 886brw_cr0_reg(unsigned subnr) 887{ 888 return brw_ud1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_CONTROL, subnr); 889} 890 891static inline struct brw_reg 892brw_sr0_reg(unsigned subnr) 893{ 894 return brw_ud1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_STATE, subnr); 895} 896 897static inline struct brw_reg 898brw_acc_reg(unsigned width) 899{ 900 return brw_vecn_reg(width, BRW_ARCHITECTURE_REGISTER_FILE, 901 BRW_ARF_ACCUMULATOR, 0); 902} 903 904static inline struct brw_reg 905brw_flag_reg(int reg, int subreg) 906{ 907 return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, 908 BRW_ARF_FLAG + reg, subreg); 909} 910 911static inline struct brw_reg 912brw_flag_subreg(unsigned subreg) 913{ 914 return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, 915 BRW_ARF_FLAG + subreg / 2, subreg % 2); 916} 917 918/** 919 * Return the mask register present in Gen4-5, or the related register present 920 * in Gen7.5 and later hardware referred to as "channel enable" register in 921 * the documentation. 922 */ 923static inline struct brw_reg 924brw_mask_reg(unsigned subnr) 925{ 926 return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_MASK, subnr); 927} 928 929static inline struct brw_reg 930brw_vmask_reg() 931{ 932 return brw_sr0_reg(3); 933} 934 935static inline struct brw_reg 936brw_dmask_reg() 937{ 938 return brw_sr0_reg(2); 939} 940 941static inline struct brw_reg 942brw_message_reg(unsigned nr) 943{ 944 return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, 0); 945} 946 947static inline struct brw_reg 948brw_uvec_mrf(unsigned width, unsigned nr, unsigned subnr) 949{ 950 return retype(brw_vecn_reg(width, BRW_MESSAGE_REGISTER_FILE, nr, subnr), 951 BRW_REGISTER_TYPE_UD); 952} 953 954/* This is almost always called with a numeric constant argument, so 955 * make things easy to evaluate at compile time: 956 */ 957static inline unsigned cvt(unsigned val) 958{ 959 switch (val) { 960 case 0: return 0; 961 case 1: return 1; 962 case 2: return 2; 963 case 4: return 3; 964 case 8: return 4; 965 case 16: return 5; 966 case 32: return 6; 967 } 968 return 0; 969} 970 971static inline struct brw_reg 972stride(struct brw_reg reg, unsigned vstride, unsigned width, unsigned hstride) 973{ 974 reg.vstride = cvt(vstride); 975 reg.width = cvt(width) - 1; 976 reg.hstride = cvt(hstride); 977 return reg; 978} 979 980/** 981 * Multiply the vertical and horizontal stride of a register by the given 982 * factor \a s. 983 */ 984static inline struct brw_reg 985spread(struct brw_reg reg, unsigned s) 986{ 987 if (s) { 988 assert(_mesa_is_pow_two(s)); 989 990 if (reg.hstride) 991 reg.hstride += cvt(s) - 1; 992 993 if (reg.vstride) 994 reg.vstride += cvt(s) - 1; 995 996 return reg; 997 } else { 998 return stride(reg, 0, 1, 0); 999 } 1000} 1001 1002/** 1003 * Reinterpret each channel of register \p reg as a vector of values of the 1004 * given smaller type and take the i-th subcomponent from each. 1005 */ 1006static inline struct brw_reg 1007subscript(struct brw_reg reg, enum brw_reg_type type, unsigned i) 1008{ 1009 if (reg.file == IMM) 1010 return reg; 1011 1012 unsigned scale = type_sz(reg.type) / type_sz(type); 1013 assert(scale >= 1 && i < scale); 1014 1015 return suboffset(retype(spread(reg, scale), type), i); 1016} 1017 1018static inline struct brw_reg 1019vec16(struct brw_reg reg) 1020{ 1021 return stride(reg, 16,16,1); 1022} 1023 1024static inline struct brw_reg 1025vec8(struct brw_reg reg) 1026{ 1027 return stride(reg, 8,8,1); 1028} 1029 1030static inline struct brw_reg 1031vec4(struct brw_reg reg) 1032{ 1033 return stride(reg, 4,4,1); 1034} 1035 1036static inline struct brw_reg 1037vec2(struct brw_reg reg) 1038{ 1039 return stride(reg, 2,2,1); 1040} 1041 1042static inline struct brw_reg 1043vec1(struct brw_reg reg) 1044{ 1045 return stride(reg, 0,1,0); 1046} 1047 1048 1049static inline struct brw_reg 1050get_element(struct brw_reg reg, unsigned elt) 1051{ 1052 return vec1(suboffset(reg, elt)); 1053} 1054 1055static inline struct brw_reg 1056get_element_ud(struct brw_reg reg, unsigned elt) 1057{ 1058 return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_UD), elt)); 1059} 1060 1061static inline struct brw_reg 1062get_element_d(struct brw_reg reg, unsigned elt) 1063{ 1064 return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_D), elt)); 1065} 1066 1067static inline struct brw_reg 1068brw_swizzle(struct brw_reg reg, unsigned swz) 1069{ 1070 if (reg.file == BRW_IMMEDIATE_VALUE) 1071 reg.ud = brw_swizzle_immediate(reg.type, reg.ud, swz); 1072 else 1073 reg.swizzle = brw_compose_swizzle(swz, reg.swizzle); 1074 1075 return reg; 1076} 1077 1078static inline struct brw_reg 1079brw_writemask(struct brw_reg reg, unsigned mask) 1080{ 1081 assert(reg.file != BRW_IMMEDIATE_VALUE); 1082 reg.writemask &= mask; 1083 return reg; 1084} 1085 1086static inline struct brw_reg 1087brw_set_writemask(struct brw_reg reg, unsigned mask) 1088{ 1089 assert(reg.file != BRW_IMMEDIATE_VALUE); 1090 reg.writemask = mask; 1091 return reg; 1092} 1093 1094static inline unsigned 1095brw_writemask_for_size(unsigned n) 1096{ 1097 return (1 << n) - 1; 1098} 1099 1100static inline unsigned 1101brw_writemask_for_component_packing(unsigned n, unsigned first_component) 1102{ 1103 assert(first_component + n <= 4); 1104 return (((1 << n) - 1) << first_component); 1105} 1106 1107static inline struct brw_reg 1108negate(struct brw_reg reg) 1109{ 1110 reg.negate ^= 1; 1111 return reg; 1112} 1113 1114static inline struct brw_reg 1115brw_abs(struct brw_reg reg) 1116{ 1117 reg.abs = 1; 1118 reg.negate = 0; 1119 return reg; 1120} 1121 1122/************************************************************************/ 1123 1124static inline struct brw_reg 1125brw_vec4_indirect(unsigned subnr, int offset) 1126{ 1127 struct brw_reg reg = brw_vec4_grf(0, 0); 1128 reg.subnr = subnr; 1129 reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; 1130 reg.indirect_offset = offset; 1131 return reg; 1132} 1133 1134static inline struct brw_reg 1135brw_vec1_indirect(unsigned subnr, int offset) 1136{ 1137 struct brw_reg reg = brw_vec1_grf(0, 0); 1138 reg.subnr = subnr; 1139 reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; 1140 reg.indirect_offset = offset; 1141 return reg; 1142} 1143 1144static inline struct brw_reg 1145brw_VxH_indirect(unsigned subnr, int offset) 1146{ 1147 struct brw_reg reg = brw_vec1_grf(0, 0); 1148 reg.vstride = BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL; 1149 reg.subnr = subnr; 1150 reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; 1151 reg.indirect_offset = offset; 1152 return reg; 1153} 1154 1155static inline struct brw_reg 1156deref_4f(struct brw_indirect ptr, int offset) 1157{ 1158 return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset); 1159} 1160 1161static inline struct brw_reg 1162deref_1f(struct brw_indirect ptr, int offset) 1163{ 1164 return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset); 1165} 1166 1167static inline struct brw_reg 1168deref_4b(struct brw_indirect ptr, int offset) 1169{ 1170 return retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B); 1171} 1172 1173static inline struct brw_reg 1174deref_1uw(struct brw_indirect ptr, int offset) 1175{ 1176 return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW); 1177} 1178 1179static inline struct brw_reg 1180deref_1d(struct brw_indirect ptr, int offset) 1181{ 1182 return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_D); 1183} 1184 1185static inline struct brw_reg 1186deref_1ud(struct brw_indirect ptr, int offset) 1187{ 1188 return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD); 1189} 1190 1191static inline struct brw_reg 1192get_addr_reg(struct brw_indirect ptr) 1193{ 1194 return brw_address_reg(ptr.addr_subnr); 1195} 1196 1197static inline struct brw_indirect 1198brw_indirect_offset(struct brw_indirect ptr, int offset) 1199{ 1200 ptr.addr_offset += offset; 1201 return ptr; 1202} 1203 1204static inline struct brw_indirect 1205brw_indirect(unsigned addr_subnr, int offset) 1206{ 1207 struct brw_indirect ptr; 1208 ptr.addr_subnr = addr_subnr; 1209 ptr.addr_offset = offset; 1210 ptr.pad = 0; 1211 return ptr; 1212} 1213 1214static inline bool 1215region_matches(struct brw_reg reg, enum brw_vertical_stride v, 1216 enum brw_width w, enum brw_horizontal_stride h) 1217{ 1218 return reg.vstride == v && 1219 reg.width == w && 1220 reg.hstride == h; 1221} 1222 1223#define has_scalar_region(reg) \ 1224 region_matches(reg, BRW_VERTICAL_STRIDE_0, BRW_WIDTH_1, \ 1225 BRW_HORIZONTAL_STRIDE_0) 1226 1227/* brw_packed_float.c */ 1228int brw_float_to_vf(float f); 1229float brw_vf_to_float(unsigned char vf); 1230 1231#ifdef __cplusplus 1232} 1233#endif 1234 1235#endif 1236