17ec681f3Smrg/* 27ec681f3Smrg * Copyright (c) 2017 Rob Clark <robdclark@gmail.com> 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice (including the next 127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg * Software. 147ec681f3Smrg * 157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 207ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 217ec681f3Smrg * SOFTWARE. 227ec681f3Smrg */ 237ec681f3Smrg 247ec681f3Smrg#ifndef _AFUC_H_ 257ec681f3Smrg#define _AFUC_H_ 267ec681f3Smrg 277ec681f3Smrg#include <stdbool.h> 287ec681f3Smrg 297ec681f3Smrg#include "util/macros.h" 307ec681f3Smrg 317ec681f3Smrg/* 327ec681f3SmrgTODO kernel debugfs to inject packet into rb for easier experimentation. It 337ec681f3Smrgshould trigger reloading pfp/me and resetting gpu.. 347ec681f3Smrg 357ec681f3SmrgActually maybe it should be flag on submit ioctl to be able to deal w/ relocs, 367ec681f3Smrgshould be restricted to CAP_ADMIN and probably compile option too (default=n). 377ec681f3Smrgif flag set, copy cmdstream bo contents into RB instead of IB'ing to it from 387ec681f3SmrgRB. 397ec681f3Smrg */ 407ec681f3Smrg 417ec681f3Smrg/* The opcode is encoded variable length. Opcodes less than 0x30 427ec681f3Smrg * are encoded as 5 bits followed by (rep) flag. Opcodes >= 0x30 437ec681f3Smrg * (ie. top two bits are '11' are encoded as 6 bits. See get_opc() 447ec681f3Smrg */ 457ec681f3Smrgtypedef enum { 467ec681f3Smrg OPC_NOP = 0x00, 477ec681f3Smrg 487ec681f3Smrg OPC_ADD = 0x01, /* add immediate */ 497ec681f3Smrg OPC_ADDHI = 0x02, /* add immediate (hi 32b of 64b) */ 507ec681f3Smrg OPC_SUB = 0x03, /* subtract immediate */ 517ec681f3Smrg OPC_SUBHI = 0x04, /* subtract immediate (hi 32b of 64b) */ 527ec681f3Smrg OPC_AND = 0x05, /* AND immediate */ 537ec681f3Smrg OPC_OR = 0x06, /* OR immediate */ 547ec681f3Smrg OPC_XOR = 0x07, /* XOR immediate */ 557ec681f3Smrg OPC_NOT = 0x08, /* bitwise not of immed (src1 ignored) */ 567ec681f3Smrg OPC_SHL = 0x09, /* shift-left immediate */ 577ec681f3Smrg OPC_USHR = 0x0a, /* unsigned shift right by immediate */ 587ec681f3Smrg OPC_ISHR = 0x0b, /* signed shift right by immediate */ 597ec681f3Smrg OPC_ROT = 0x0c, /* rotate left (left shift with wrap-around) */ 607ec681f3Smrg OPC_MUL8 = 0x0d, /* 8bit multiply by immediate */ 617ec681f3Smrg OPC_MIN = 0x0e, 627ec681f3Smrg OPC_MAX = 0x0f, 637ec681f3Smrg OPC_CMP = 0x10, /* compare src to immed */ 647ec681f3Smrg OPC_MOVI = 0x11, /* move immediate */ 657ec681f3Smrg 667ec681f3Smrg /* Return the most-significant bit of src2, or 0 if src2 == 0 (the 677ec681f3Smrg * same as if src2 == 1). src1 is ignored. Note that this overlaps 687ec681f3Smrg * with STORE6, so it can only be used with the two-source encoding. 697ec681f3Smrg */ 707ec681f3Smrg OPC_MSB = 0x14, 717ec681f3Smrg 727ec681f3Smrg OPC_ALU = 0x13, /* ALU instruction with two src registers */ 737ec681f3Smrg 747ec681f3Smrg /* These seem something to do with setting some external state.. 757ec681f3Smrg * doesn't seem to map *directly* to registers, but I guess that 767ec681f3Smrg * is where things end up. For example, this sequence in the 777ec681f3Smrg * CP_INDIRECT_BUFFER handler: 787ec681f3Smrg * 797ec681f3Smrg * mov $02, $data ; low 32b of IB target address 807ec681f3Smrg * mov $03, $data ; high 32b of IB target 817ec681f3Smrg * mov $04, $data ; IB size in dwords 827ec681f3Smrg * breq $04, 0x0, #l23 (#69, 04a2) 837ec681f3Smrg * and $05, $18, 0x0003 847ec681f3Smrg * shl $05, $05, 0x0002 857ec681f3Smrg * cwrite $02, [$05 + 0x0b0], 0x8 867ec681f3Smrg * cwrite $03, [$05 + 0x0b1], 0x8 877ec681f3Smrg * cwrite $04, [$05 + 0x0b2], 0x8 887ec681f3Smrg * 897ec681f3Smrg * Note that CP_IB1/2_BASE_LO/HI/BUFSZ in 0x0b1f->0xb21 (IB1) and 907ec681f3Smrg * 0x0b22->0x0b24 (IB2). Presumably $05 ends up w/ different value 917ec681f3Smrg * for RB->IB1 vs IB1->IB2. 927ec681f3Smrg */ 937ec681f3Smrg OPC_CWRITE5 = 0x15, 947ec681f3Smrg OPC_CREAD5 = 0x16, 957ec681f3Smrg 967ec681f3Smrg /* A6xx shuffled around the cwrite/cread opcodes and added new opcodes 977ec681f3Smrg * that let you read/write directly to memory (and bypass the IOMMU?). 987ec681f3Smrg */ 997ec681f3Smrg OPC_STORE6 = 0x14, 1007ec681f3Smrg OPC_CWRITE6 = 0x15, 1017ec681f3Smrg OPC_LOAD6 = 0x16, 1027ec681f3Smrg OPC_CREAD6 = 0x17, 1037ec681f3Smrg 1047ec681f3Smrg OPC_BRNEI = 0x30, /* relative branch (if $src != immed) */ 1057ec681f3Smrg OPC_BREQI = 0x31, /* relative branch (if $src == immed) */ 1067ec681f3Smrg OPC_BRNEB = 0x32, /* relative branch (if bit not set) */ 1077ec681f3Smrg OPC_BREQB = 0x33, /* relative branch (if bit is set) */ 1087ec681f3Smrg OPC_RET = 0x34, /* return */ 1097ec681f3Smrg OPC_CALL = 0x35, /* "function" call */ 1107ec681f3Smrg OPC_WIN = 0x36, /* wait for input (ie. wait for WPTR to advance) */ 1117ec681f3Smrg OPC_PREEMPTLEAVE6 = 0x38, /* try to leave preemption */ 1127ec681f3Smrg OPC_SETSECURE = 0x3b, /* switch secure mode on/off */ 1137ec681f3Smrg} afuc_opc; 1147ec681f3Smrg 1157ec681f3Smrg/** 1167ec681f3Smrg * Special GPR registers: 1177ec681f3Smrg * 1187ec681f3Smrg * Notes: (applicable to a6xx, double check a5xx) 1197ec681f3Smrg * 1207ec681f3Smrg * 0x1d: 1217ec681f3Smrg * $addr: writes configure GPU reg address to read/write 1227ec681f3Smrg * (does not respect CP_PROTECT) 1237ec681f3Smrg * $memdata: reads from FIFO filled based on MEM_READ_DWORDS/ 1247ec681f3Smrg * MEM_READ_ADDR 1257ec681f3Smrg * 0x1e: (note different mnemonic for src vs dst) 1267ec681f3Smrg * $usraddr: writes configure GPU reg address to read/write, 1277ec681f3Smrg * respecting CP_PROTECT 1287ec681f3Smrg * $regdata: reads from FIFO filled based on REG_READ_DWORDS/ 1297ec681f3Smrg * REG_READ_ADDR 1307ec681f3Smrg * 0x1f: 1317ec681f3Smrg * $data: reads from from pm4 input stream 1327ec681f3Smrg * $data: writes to stream configured by write to $addr 1337ec681f3Smrg * or $usraddr 1347ec681f3Smrg */ 1357ec681f3Smrgtypedef enum { 1367ec681f3Smrg REG_REM = 0x1c, 1377ec681f3Smrg REG_MEMDATA = 0x1d, /* when used as src */ 1387ec681f3Smrg REG_ADDR = 0x1d, /* when used as dst */ 1397ec681f3Smrg REG_REGDATA = 0x1e, /* when used as src */ 1407ec681f3Smrg REG_USRADDR = 0x1e, /* when used as dst */ 1417ec681f3Smrg REG_DATA = 0x1f, 1427ec681f3Smrg} afuc_reg; 1437ec681f3Smrg 1447ec681f3Smrgtypedef union PACKED { 1457ec681f3Smrg /* addi, subi, andi, ori, xori, etc: */ 1467ec681f3Smrg struct PACKED { 1477ec681f3Smrg uint32_t uimm : 16; 1487ec681f3Smrg uint32_t dst : 5; 1497ec681f3Smrg uint32_t src : 5; 1507ec681f3Smrg uint32_t hdr : 6; 1517ec681f3Smrg } alui; 1527ec681f3Smrg struct PACKED { 1537ec681f3Smrg uint32_t uimm : 16; 1547ec681f3Smrg uint32_t dst : 5; 1557ec681f3Smrg uint32_t shift : 5; 1567ec681f3Smrg uint32_t hdr : 6; 1577ec681f3Smrg } movi; 1587ec681f3Smrg struct PACKED { 1597ec681f3Smrg uint32_t alu : 5; 1607ec681f3Smrg uint32_t pad : 4; 1617ec681f3Smrg uint32_t xmov : 2; /* execute eXtra mov's based on $rem */ 1627ec681f3Smrg uint32_t dst : 5; 1637ec681f3Smrg uint32_t src2 : 5; 1647ec681f3Smrg uint32_t src1 : 5; 1657ec681f3Smrg uint32_t hdr : 6; 1667ec681f3Smrg } alu; 1677ec681f3Smrg struct PACKED { 1687ec681f3Smrg uint32_t uimm : 12; 1697ec681f3Smrg /* TODO this needs to be confirmed: 1707ec681f3Smrg * 1717ec681f3Smrg * flags: 1727ec681f3Smrg * 0x4 - post-increment src2 by uimm (need to confirm this is also 1737ec681f3Smrg * true for load/cread). TBD whether, when used in conjunction 1747ec681f3Smrg * with @LOAD_STORE_HI, 32b rollover works properly. 1757ec681f3Smrg * 1767ec681f3Smrg * other values tbd, also need to confirm if different bits can be 1777ec681f3Smrg * set together (I don't see examples of this in existing fw) 1787ec681f3Smrg */ 1797ec681f3Smrg uint32_t flags : 4; 1807ec681f3Smrg uint32_t src1 : 5; /* dst (cread) or src (cwrite) register */ 1817ec681f3Smrg uint32_t src2 : 5; /* read or write address is src2+uimm */ 1827ec681f3Smrg uint32_t hdr : 6; 1837ec681f3Smrg } control; 1847ec681f3Smrg struct PACKED { 1857ec681f3Smrg int32_t ioff : 16; /* relative offset */ 1867ec681f3Smrg uint32_t bit_or_imm : 5; 1877ec681f3Smrg uint32_t src : 5; 1887ec681f3Smrg uint32_t hdr : 6; 1897ec681f3Smrg } br; 1907ec681f3Smrg struct PACKED { 1917ec681f3Smrg uint32_t uoff : 26; /* absolute (unsigned) offset */ 1927ec681f3Smrg uint32_t hdr : 6; 1937ec681f3Smrg } call; 1947ec681f3Smrg struct PACKED { 1957ec681f3Smrg uint32_t pad : 25; 1967ec681f3Smrg uint32_t interrupt : 1; /* return from ctxt-switch interrupt handler */ 1977ec681f3Smrg uint32_t hdr : 6; 1987ec681f3Smrg } ret; 1997ec681f3Smrg struct PACKED { 2007ec681f3Smrg uint32_t pad : 26; 2017ec681f3Smrg uint32_t hdr : 6; 2027ec681f3Smrg } waitin; 2037ec681f3Smrg struct PACKED { 2047ec681f3Smrg uint32_t pad : 26; 2057ec681f3Smrg uint32_t opc_r : 6; 2067ec681f3Smrg }; 2077ec681f3Smrg 2087ec681f3Smrg} afuc_instr; 2097ec681f3Smrg 2107ec681f3Smrgstatic inline void 2117ec681f3Smrgafuc_get_opc(afuc_instr *ai, afuc_opc *opc, bool *rep) 2127ec681f3Smrg{ 2137ec681f3Smrg if (ai->opc_r < 0x30) { 2147ec681f3Smrg *opc = ai->opc_r >> 1; 2157ec681f3Smrg *rep = ai->opc_r & 0x1; 2167ec681f3Smrg } else { 2177ec681f3Smrg *opc = ai->opc_r; 2187ec681f3Smrg *rep = false; 2197ec681f3Smrg } 2207ec681f3Smrg} 2217ec681f3Smrg 2227ec681f3Smrgstatic inline void 2237ec681f3Smrgafuc_set_opc(afuc_instr *ai, afuc_opc opc, bool rep) 2247ec681f3Smrg{ 2257ec681f3Smrg if (opc < 0x30) { 2267ec681f3Smrg ai->opc_r = opc << 1; 2277ec681f3Smrg ai->opc_r |= !!rep; 2287ec681f3Smrg } else { 2297ec681f3Smrg ai->opc_r = opc; 2307ec681f3Smrg } 2317ec681f3Smrg} 2327ec681f3Smrg 2337ec681f3Smrgvoid print_src(unsigned reg); 2347ec681f3Smrgvoid print_dst(unsigned reg); 2357ec681f3Smrgvoid print_control_reg(uint32_t id); 2367ec681f3Smrgvoid print_pipe_reg(uint32_t id); 2377ec681f3Smrg 2387ec681f3Smrg#endif /* _AFUC_H_ */ 239