17ec681f3Smrg/*
27ec681f3Smrg * Copyright (c) 2017 Rob Clark <robdclark@gmail.com>
37ec681f3Smrg *
47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
67ec681f3Smrg * to deal in the Software without restriction, including without limitation
77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
97ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
107ec681f3Smrg *
117ec681f3Smrg * The above copyright notice and this permission notice (including the next
127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
137ec681f3Smrg * Software.
147ec681f3Smrg *
157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
207ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
217ec681f3Smrg * SOFTWARE.
227ec681f3Smrg */
237ec681f3Smrg
247ec681f3Smrg#ifndef _AFUC_H_
257ec681f3Smrg#define _AFUC_H_
267ec681f3Smrg
277ec681f3Smrg#include <stdbool.h>
287ec681f3Smrg
297ec681f3Smrg#include "util/macros.h"
307ec681f3Smrg
317ec681f3Smrg/*
327ec681f3SmrgTODO kernel debugfs to inject packet into rb for easier experimentation.  It
337ec681f3Smrgshould trigger reloading pfp/me and resetting gpu..
347ec681f3Smrg
357ec681f3SmrgActually maybe it should be flag on submit ioctl to be able to deal w/ relocs,
367ec681f3Smrgshould be restricted to CAP_ADMIN and probably compile option too (default=n).
377ec681f3Smrgif flag set, copy cmdstream bo contents into RB instead of IB'ing to it from
387ec681f3SmrgRB.
397ec681f3Smrg */
407ec681f3Smrg
417ec681f3Smrg/* The opcode is encoded variable length.  Opcodes less than 0x30
427ec681f3Smrg * are encoded as 5 bits followed by (rep) flag.  Opcodes >= 0x30
437ec681f3Smrg * (ie. top two bits are '11' are encoded as 6 bits.  See get_opc()
447ec681f3Smrg */
457ec681f3Smrgtypedef enum {
467ec681f3Smrg   OPC_NOP = 0x00,
477ec681f3Smrg
487ec681f3Smrg   OPC_ADD = 0x01,   /* add immediate */
497ec681f3Smrg   OPC_ADDHI = 0x02, /* add immediate (hi 32b of 64b) */
507ec681f3Smrg   OPC_SUB = 0x03,   /* subtract immediate */
517ec681f3Smrg   OPC_SUBHI = 0x04, /* subtract immediate (hi 32b of 64b) */
527ec681f3Smrg   OPC_AND = 0x05,   /* AND immediate */
537ec681f3Smrg   OPC_OR = 0x06,    /* OR immediate */
547ec681f3Smrg   OPC_XOR = 0x07,   /* XOR immediate */
557ec681f3Smrg   OPC_NOT = 0x08,   /* bitwise not of immed (src1 ignored) */
567ec681f3Smrg   OPC_SHL = 0x09,   /* shift-left immediate */
577ec681f3Smrg   OPC_USHR = 0x0a,  /* unsigned shift right by immediate */
587ec681f3Smrg   OPC_ISHR = 0x0b,  /* signed shift right by immediate */
597ec681f3Smrg   OPC_ROT = 0x0c,   /* rotate left (left shift with wrap-around) */
607ec681f3Smrg   OPC_MUL8 = 0x0d,  /* 8bit multiply by immediate */
617ec681f3Smrg   OPC_MIN = 0x0e,
627ec681f3Smrg   OPC_MAX = 0x0f,
637ec681f3Smrg   OPC_CMP = 0x10,  /* compare src to immed */
647ec681f3Smrg   OPC_MOVI = 0x11, /* move immediate */
657ec681f3Smrg
667ec681f3Smrg   /* Return the most-significant bit of src2, or 0 if src2 == 0 (the
677ec681f3Smrg    * same as if src2 == 1). src1 is ignored. Note that this overlaps
687ec681f3Smrg    * with STORE6, so it can only be used with the two-source encoding.
697ec681f3Smrg    */
707ec681f3Smrg   OPC_MSB = 0x14,
717ec681f3Smrg
727ec681f3Smrg   OPC_ALU = 0x13, /* ALU instruction with two src registers */
737ec681f3Smrg
747ec681f3Smrg   /* These seem something to do with setting some external state..
757ec681f3Smrg    * doesn't seem to map *directly* to registers, but I guess that
767ec681f3Smrg    * is where things end up.  For example, this sequence in the
777ec681f3Smrg    * CP_INDIRECT_BUFFER handler:
787ec681f3Smrg    *
797ec681f3Smrg    *     mov $02, $data   ; low 32b of IB target address
807ec681f3Smrg    *     mov $03, $data   ; high 32b of IB target
817ec681f3Smrg    *     mov $04, $data   ; IB size in dwords
827ec681f3Smrg    *     breq $04, 0x0, #l23 (#69, 04a2)
837ec681f3Smrg    *     and $05, $18, 0x0003
847ec681f3Smrg    *     shl $05, $05, 0x0002
857ec681f3Smrg    *     cwrite $02, [$05 + 0x0b0], 0x8
867ec681f3Smrg    *     cwrite $03, [$05 + 0x0b1], 0x8
877ec681f3Smrg    *     cwrite $04, [$05 + 0x0b2], 0x8
887ec681f3Smrg    *
897ec681f3Smrg    * Note that CP_IB1/2_BASE_LO/HI/BUFSZ in 0x0b1f->0xb21 (IB1) and
907ec681f3Smrg    * 0x0b22->0x0b24 (IB2).  Presumably $05 ends up w/ different value
917ec681f3Smrg    * for RB->IB1 vs IB1->IB2.
927ec681f3Smrg    */
937ec681f3Smrg   OPC_CWRITE5 = 0x15,
947ec681f3Smrg   OPC_CREAD5 = 0x16,
957ec681f3Smrg
967ec681f3Smrg   /* A6xx shuffled around the cwrite/cread opcodes and added new opcodes
977ec681f3Smrg    * that let you read/write directly to memory (and bypass the IOMMU?).
987ec681f3Smrg    */
997ec681f3Smrg   OPC_STORE6 = 0x14,
1007ec681f3Smrg   OPC_CWRITE6 = 0x15,
1017ec681f3Smrg   OPC_LOAD6 = 0x16,
1027ec681f3Smrg   OPC_CREAD6 = 0x17,
1037ec681f3Smrg
1047ec681f3Smrg   OPC_BRNEI = 0x30,         /* relative branch (if $src != immed) */
1057ec681f3Smrg   OPC_BREQI = 0x31,         /* relative branch (if $src == immed) */
1067ec681f3Smrg   OPC_BRNEB = 0x32,         /* relative branch (if bit not set) */
1077ec681f3Smrg   OPC_BREQB = 0x33,         /* relative branch (if bit is set) */
1087ec681f3Smrg   OPC_RET = 0x34,           /* return */
1097ec681f3Smrg   OPC_CALL = 0x35,          /* "function" call */
1107ec681f3Smrg   OPC_WIN = 0x36,           /* wait for input (ie. wait for WPTR to advance) */
1117ec681f3Smrg   OPC_PREEMPTLEAVE6 = 0x38, /* try to leave preemption */
1127ec681f3Smrg   OPC_SETSECURE = 0x3b,     /* switch secure mode on/off */
1137ec681f3Smrg} afuc_opc;
1147ec681f3Smrg
1157ec681f3Smrg/**
1167ec681f3Smrg * Special GPR registers:
1177ec681f3Smrg *
1187ec681f3Smrg * Notes:  (applicable to a6xx, double check a5xx)
1197ec681f3Smrg *
1207ec681f3Smrg *   0x1d:
1217ec681f3Smrg *      $addr:    writes configure GPU reg address to read/write
1227ec681f3Smrg *                (does not respect CP_PROTECT)
1237ec681f3Smrg *      $memdata: reads from FIFO filled based on MEM_READ_DWORDS/
1247ec681f3Smrg *                MEM_READ_ADDR
1257ec681f3Smrg *   0x1e: (note different mnemonic for src vs dst)
1267ec681f3Smrg *      $usraddr: writes configure GPU reg address to read/write,
1277ec681f3Smrg *                respecting CP_PROTECT
1287ec681f3Smrg *      $regdata: reads from FIFO filled based on REG_READ_DWORDS/
1297ec681f3Smrg *                REG_READ_ADDR
1307ec681f3Smrg *   0x1f:
1317ec681f3Smrg *      $data:    reads from from pm4 input stream
1327ec681f3Smrg *      $data:    writes to stream configured by write to $addr
1337ec681f3Smrg *                or $usraddr
1347ec681f3Smrg */
1357ec681f3Smrgtypedef enum {
1367ec681f3Smrg   REG_REM     = 0x1c,
1377ec681f3Smrg   REG_MEMDATA = 0x1d,  /* when used as src */
1387ec681f3Smrg   REG_ADDR    = 0x1d,  /* when used as dst */
1397ec681f3Smrg   REG_REGDATA = 0x1e,  /* when used as src */
1407ec681f3Smrg   REG_USRADDR = 0x1e,  /* when used as dst */
1417ec681f3Smrg   REG_DATA    = 0x1f,
1427ec681f3Smrg} afuc_reg;
1437ec681f3Smrg
1447ec681f3Smrgtypedef union PACKED {
1457ec681f3Smrg   /* addi, subi, andi, ori, xori, etc: */
1467ec681f3Smrg   struct PACKED {
1477ec681f3Smrg      uint32_t uimm : 16;
1487ec681f3Smrg      uint32_t dst : 5;
1497ec681f3Smrg      uint32_t src : 5;
1507ec681f3Smrg      uint32_t hdr : 6;
1517ec681f3Smrg   } alui;
1527ec681f3Smrg   struct PACKED {
1537ec681f3Smrg      uint32_t uimm : 16;
1547ec681f3Smrg      uint32_t dst : 5;
1557ec681f3Smrg      uint32_t shift : 5;
1567ec681f3Smrg      uint32_t hdr : 6;
1577ec681f3Smrg   } movi;
1587ec681f3Smrg   struct PACKED {
1597ec681f3Smrg      uint32_t alu : 5;
1607ec681f3Smrg      uint32_t pad : 4;
1617ec681f3Smrg      uint32_t xmov : 2; /* execute eXtra mov's based on $rem */
1627ec681f3Smrg      uint32_t dst : 5;
1637ec681f3Smrg      uint32_t src2 : 5;
1647ec681f3Smrg      uint32_t src1 : 5;
1657ec681f3Smrg      uint32_t hdr : 6;
1667ec681f3Smrg   } alu;
1677ec681f3Smrg   struct PACKED {
1687ec681f3Smrg      uint32_t uimm : 12;
1697ec681f3Smrg      /* TODO this needs to be confirmed:
1707ec681f3Smrg       *
1717ec681f3Smrg       * flags:
1727ec681f3Smrg       *   0x4 - post-increment src2 by uimm (need to confirm this is also
1737ec681f3Smrg       *         true for load/cread).  TBD whether, when used in conjunction
1747ec681f3Smrg       *         with @LOAD_STORE_HI, 32b rollover works properly.
1757ec681f3Smrg       *
1767ec681f3Smrg       * other values tbd, also need to confirm if different bits can be
1777ec681f3Smrg       * set together (I don't see examples of this in existing fw)
1787ec681f3Smrg       */
1797ec681f3Smrg      uint32_t flags : 4;
1807ec681f3Smrg      uint32_t src1 : 5; /* dst (cread) or src (cwrite) register */
1817ec681f3Smrg      uint32_t src2 : 5; /* read or write address is src2+uimm */
1827ec681f3Smrg      uint32_t hdr : 6;
1837ec681f3Smrg   } control;
1847ec681f3Smrg   struct PACKED {
1857ec681f3Smrg      int32_t ioff : 16; /* relative offset */
1867ec681f3Smrg      uint32_t bit_or_imm : 5;
1877ec681f3Smrg      uint32_t src : 5;
1887ec681f3Smrg      uint32_t hdr : 6;
1897ec681f3Smrg   } br;
1907ec681f3Smrg   struct PACKED {
1917ec681f3Smrg      uint32_t uoff : 26; /* absolute (unsigned) offset */
1927ec681f3Smrg      uint32_t hdr : 6;
1937ec681f3Smrg   } call;
1947ec681f3Smrg   struct PACKED {
1957ec681f3Smrg      uint32_t pad : 25;
1967ec681f3Smrg      uint32_t interrupt : 1; /* return from ctxt-switch interrupt handler */
1977ec681f3Smrg      uint32_t hdr : 6;
1987ec681f3Smrg   } ret;
1997ec681f3Smrg   struct PACKED {
2007ec681f3Smrg      uint32_t pad : 26;
2017ec681f3Smrg      uint32_t hdr : 6;
2027ec681f3Smrg   } waitin;
2037ec681f3Smrg   struct PACKED {
2047ec681f3Smrg      uint32_t pad : 26;
2057ec681f3Smrg      uint32_t opc_r : 6;
2067ec681f3Smrg   };
2077ec681f3Smrg
2087ec681f3Smrg} afuc_instr;
2097ec681f3Smrg
2107ec681f3Smrgstatic inline void
2117ec681f3Smrgafuc_get_opc(afuc_instr *ai, afuc_opc *opc, bool *rep)
2127ec681f3Smrg{
2137ec681f3Smrg   if (ai->opc_r < 0x30) {
2147ec681f3Smrg      *opc = ai->opc_r >> 1;
2157ec681f3Smrg      *rep = ai->opc_r & 0x1;
2167ec681f3Smrg   } else {
2177ec681f3Smrg      *opc = ai->opc_r;
2187ec681f3Smrg      *rep = false;
2197ec681f3Smrg   }
2207ec681f3Smrg}
2217ec681f3Smrg
2227ec681f3Smrgstatic inline void
2237ec681f3Smrgafuc_set_opc(afuc_instr *ai, afuc_opc opc, bool rep)
2247ec681f3Smrg{
2257ec681f3Smrg   if (opc < 0x30) {
2267ec681f3Smrg      ai->opc_r = opc << 1;
2277ec681f3Smrg      ai->opc_r |= !!rep;
2287ec681f3Smrg   } else {
2297ec681f3Smrg      ai->opc_r = opc;
2307ec681f3Smrg   }
2317ec681f3Smrg}
2327ec681f3Smrg
2337ec681f3Smrgvoid print_src(unsigned reg);
2347ec681f3Smrgvoid print_dst(unsigned reg);
2357ec681f3Smrgvoid print_control_reg(uint32_t id);
2367ec681f3Smrgvoid print_pipe_reg(uint32_t id);
2377ec681f3Smrg
2387ec681f3Smrg#endif /* _AFUC_H_ */
239