17ec681f3Smrg; Copyright (c) 2020 Valve Corporation
27ec681f3Smrg;
37ec681f3Smrg; Permission is hereby granted, free of charge, to any person obtaining a
47ec681f3Smrg; copy of this software and associated documentation files (the "Software"),
57ec681f3Smrg; to deal in the Software without restriction, including without limitation
67ec681f3Smrg; the rights to use, copy, modify, merge, publish, distribute, sublicense,
77ec681f3Smrg; and/or sell copies of the Software, and to permit persons to whom the
87ec681f3Smrg; Software is furnished to do so, subject to the following conditions:
97ec681f3Smrg;
107ec681f3Smrg; The above copyright notice and this permission notice (including the next
117ec681f3Smrg; paragraph) shall be included in all copies or substantial portions of the
127ec681f3Smrg; Software.
137ec681f3Smrg;
147ec681f3Smrg; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
157ec681f3Smrg; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
167ec681f3Smrg; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
177ec681f3Smrg; THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
187ec681f3Smrg; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
197ec681f3Smrg; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
207ec681f3Smrg; SOFTWARE.
217ec681f3Smrg;
227ec681f3Smrg;
237ec681f3Smrg; This file is the source for a simple mock firmware used to regression test
247ec681f3Smrg; the afuc assembler/disassembler. Note, it won't actually work if you try to
257ec681f3Smrg; load it on the GPU! First this is assembled, compared to the reference
267ec681f3Smrg; binary, then disassambled and compared to the reference disassembly. We do
277ec681f3Smrg; this to avoid having to host the actual firmware, especially the disassembled
287ec681f3Smrg; version, in Mesa.
297ec681f3Smrg[01000001]
307ec681f3Smrg[01000000]
317ec681f3Smrgloc02:
327ec681f3Smrg; packet table loading:
337ec681f3Smrgmov $01, 0x0830 ; CP_SQE_INSTR_BASE
347ec681f3Smrgmov $02, 0x0002
357ec681f3Smrgcwrite $01, [$00 + @REG_READ_ADDR], 0x0
367ec681f3Smrgcwrite $02, [$00 + @REG_READ_DWORDS], 0x0
377ec681f3Smrg; move hi/lo of SQE fw addrs to registers:
387ec681f3Smrgmov $01, $regdata
397ec681f3Smrgmov $02, $regdata
407ec681f3Smrg; skip first dword
417ec681f3Smrgadd $01, $01, 0x0004
427ec681f3Smrgaddhi $02, $02, 0x0000
437ec681f3Smrgmov $03, 0x0001
447ec681f3Smrgcwrite $01, [$00 + @MEM_READ_ADDR], 0x0
457ec681f3Smrgcwrite $02, [$00 + @MEM_READ_ADDR+0x1], 0x0
467ec681f3Smrgcwrite $03, [$00 + @MEM_READ_DWORDS], 0x0
477ec681f3Smrg; read 2nd dword of fw, and add offset (minus 4 because we skipped first dword)
487ec681f3Smrg; to base address of sqe fw
497ec681f3Smrgrot $04, $memdata, 0x0008
507ec681f3Smrgushr $04, $04, 0x0006
517ec681f3Smrgsub $04, $04, 0x0004
527ec681f3Smrgadd $01, $01, $04
537ec681f3Smrgaddhi $02, $02, 0x0000
547ec681f3Smrg
557ec681f3Smrg; load packet table:
567ec681f3Smrgmov $rem, 0x0080
577ec681f3Smrgcwrite $01, [$00 + @MEM_READ_ADDR], 0x0
587ec681f3Smrgcwrite $02, [$00 + @MEM_READ_ADDR+0x1], 0x0
597ec681f3Smrgcwrite $02, [$00 + @LOAD_STORE_HI], 0x0
607ec681f3Smrgcwrite $rem, [$00 + @MEM_READ_DWORDS], 0x0
617ec681f3Smrgcwrite $00, [$00 + @PACKET_TABLE_WRITE_ADDR], 0x0
627ec681f3Smrg(rep)cwrite $memdata, [$00 + @PACKET_TABLE_WRITE], 0x0
637ec681f3Smrg
647ec681f3Smrgmov $02, 0x883
657ec681f3Smrgmov $03, 0xbeef
667ec681f3Smrgmov $04, 0xdead << 16
677ec681f3Smrgor $03, $03, $04
687ec681f3Smrgcwrite $02, [$00 + @REG_WRITE_ADDR], 0x0
697ec681f3Smrgcwrite $03, [$00 + @REG_WRITE], 0x0
707ec681f3Smrgwaitin
717ec681f3Smrgmov $01, $data
727ec681f3Smrg
737ec681f3SmrgCP_ME_INIT:
747ec681f3Smrg; test label-as-immediate feature
757ec681f3Smrgmov $02, #loc02 ; should be 0x0002
767ec681f3Smrgwaitin
777ec681f3Smrgmov $01, $data
787ec681f3Smrg
797ec681f3SmrgCP_MEM_WRITE:
807ec681f3Smrg; test $addr + (rep) + (xmovN) with ALU
817ec681f3Smrgmov $addr, 0xa0 << 24
827ec681f3Smrgmov $02, 4
837ec681f3Smrg(xmov1)add $data, $02, $data
847ec681f3Smrgmov $addr, 0xa204 << 16
857ec681f3Smrg(rep)(xmov3)mov $data, $data
867ec681f3Smrgwaitin
877ec681f3Smrgmov $01, $data
887ec681f3Smrg
897ec681f3SmrgCP_SCRATCH_WRITE:
907ec681f3Smrg; test (rep) + flags + non-zero offset with cwrite
917ec681f3Smrg; TODO: 0x4 flag is actually pre-increment addressing, handle it as such
927ec681f3Smrgmov $02, 0xff
937ec681f3Smrg(rep)cwrite $data, [$02 + 0x001], 0x4
947ec681f3Smrgwaitin
957ec681f3Smrgmov $01, $data
967ec681f3Smrg
977ec681f3SmrgCP_SET_SECURE_MODE:
987ec681f3Smrg; test setsecure
997ec681f3Smrgmov $02, $data
1007ec681f3Smrgsetsecure $02, #setsecure_success
1017ec681f3Smrgerr:
1027ec681f3Smrgjump #err
1037ec681f3Smrgnop
1047ec681f3Smrgsetsecure_success:
1057ec681f3Smrgwaitin
1067ec681f3Smrgmov $01, $data
1077ec681f3Smrg
1087ec681f3Smrgeuclid:
1097ec681f3Smrg; Euclid's algorithm in afuc: https://en.wikipedia.org/wiki/Euclidean_algorithm
1107ec681f3Smrg; Since afuc doesn't do modulo, we implement the subtraction-based version.
1117ec681f3Smrg;
1127ec681f3Smrg; Demonstrates/tests comparisons and conditional branches. This also
1137ec681f3Smrg; demonstrates the common trick of branching in a delay slot. Note that if a
1147ec681f3Smrg; branch is taken and its delay slot includes another branch, the second
1157ec681f3Smrg; branch cannot also be taken, which is why the last branch in the sequence
1167ec681f3Smrg; cannot be unconditional.
1177ec681f3Smrg;
1187ec681f3Smrg; Inputs are in $02 and $03, and output is in $02.
1197ec681f3Smrgcmp $04, $02, $03
1207ec681f3Smrgbreq $04, b0, #euclid_exit
1217ec681f3Smrgbrne $04, b1, #euclid_gt
1227ec681f3Smrgbreq $04, b2, #euclid
1237ec681f3Smrgsub $03, $03, $02
1247ec681f3Smrgeuclid_gt:
1257ec681f3Smrgjump #euclid
1267ec681f3Smrgsub $02, $02, $03
1277ec681f3Smrgeuclid_exit:
1287ec681f3Smrgret
1297ec681f3Smrgnop
1307ec681f3Smrg
1317ec681f3SmrgCP_REG_RMW:
1327ec681f3Smrg; Test various ALU instructions, and read/write $regdata
1337ec681f3Smrgcwrite $data, [$00 + @REG_READ_ADDR], 0x0
1347ec681f3Smrgadd $02, $regdata, 0x42
1357ec681f3Smrgaddhi $03, $00, $regdata
1367ec681f3Smrgsub $02, $02, $regdata
1377ec681f3Smrgcall #euclid
1387ec681f3Smrgsubhi $03, $03, $regdata
1397ec681f3Smrgand $02, $02, $regdata
1407ec681f3Smrgor $02, $02, 0x1
1417ec681f3Smrgxor $02, $02, 0x1
1427ec681f3Smrgnot $02, $02
1437ec681f3Smrgshl $02, $02, $regdata
1447ec681f3Smrgushr $02, $02, $regdata
1457ec681f3Smrgishr $02, $02, $regdata
1467ec681f3Smrgrot $02, $02, $regdata
1477ec681f3Smrgmin $02, $02, $regdata
1487ec681f3Smrgmax $02, $02, $regdata
1497ec681f3Smrgmul8 $02, $02, $regdata
1507ec681f3Smrgmsb $02, $02
1517ec681f3Smrgmov $usraddr, $data
1527ec681f3Smrgmov $data, $02
1537ec681f3Smrgwaitin
1547ec681f3Smrgmov $01, $data
1557ec681f3Smrg
1567ec681f3SmrgCP_MEMCPY:
1577ec681f3Smrg; implement CP_MEMCPY using load/store instructions
1587ec681f3Smrgmov $02, $data
1597ec681f3Smrgmov $03, $data
1607ec681f3Smrgmov $04, $data
1617ec681f3Smrgmov $05, $data
1627ec681f3Smrgmov $06, $data
1637ec681f3Smrgcpy_header:
1647ec681f3Smrgbreq $06, 0, #cpy_exit
1657ec681f3Smrgcwrite $03, [$00 + @LOAD_STORE_HI], 0x0
1667ec681f3Smrgload $07, [$02 + 0x004], 0x4
1677ec681f3Smrgcwrite $05, [$00 + @LOAD_STORE_HI], 0x0
1687ec681f3Smrgjump #cpy_header
1697ec681f3Smrgstore $07, [$04 + 0x004], 0x4
1707ec681f3Smrgcpy_exit:
1717ec681f3Smrgwaitin
1727ec681f3Smrgmov $01, $data
1737ec681f3Smrg
1747ec681f3SmrgCP_MEM_TO_MEM:
1757ec681f3Smrg; implement CP_MEMCPY using mem read control regs
1767ec681f3Smrg; tests @FOO+0x1 for 64-bit control regs, and reading/writing $rem
1777ec681f3Smrgcwrite $data, [$00 + @MEM_READ_ADDR], 0x0
1787ec681f3Smrgcwrite $data, [$00 + @MEM_READ_ADDR+1], 0x0
1797ec681f3Smrgmov $02, $data
1807ec681f3Smrgcwrite $data, [$00 + @LOAD_STORE_HI], 0x0
1817ec681f3Smrgmov $rem, $data
1827ec681f3Smrgcwrite $rem, [$00 + @MEM_READ_DWORDS], 0x0
1837ec681f3Smrg(rep)store $memdata, [$02 + 0x004], 0x4
1847ec681f3Smrgwaitin
1857ec681f3Smrgmov $01, $data
1867ec681f3Smrg
1877ec681f3SmrgUNKN15:
1887ec681f3Smrg; test preemptleave + iret + conditional branch w/ immed
1897ec681f3Smrgcread $02, [$00 + 0x101], 0x0
1907ec681f3Smrgbrne $02, 0x0001, #exit_iret
1917ec681f3Smrgnop
1927ec681f3Smrgpreemptleave #err
1937ec681f3Smrgnop
1947ec681f3Smrgnop
1957ec681f3Smrgnop
1967ec681f3Smrgwaitin
1977ec681f3Smrgmov $01, $data
1987ec681f3Smrgexit_iret:
1997ec681f3Smrgiret
2007ec681f3Smrgnop
2017ec681f3Smrg
2027ec681f3SmrgUNKN0:
2037ec681f3SmrgUNKN1:
2047ec681f3SmrgUNKN2:
2057ec681f3SmrgUNKN3:
2067ec681f3SmrgPKT4:
2077ec681f3SmrgUNKN5:
2087ec681f3SmrgUNKN6:
2097ec681f3SmrgUNKN7:
2107ec681f3SmrgUNKN8:
2117ec681f3SmrgUNKN9:
2127ec681f3SmrgUNKN10:
2137ec681f3SmrgUNKN11:
2147ec681f3SmrgUNKN12:
2157ec681f3SmrgUNKN13:
2167ec681f3SmrgUNKN14:
2177ec681f3SmrgCP_NOP:
2187ec681f3SmrgCP_RECORD_PFP_TIMESTAMP:
2197ec681f3SmrgCP_WAIT_MEM_WRITES:
2207ec681f3SmrgCP_WAIT_FOR_ME:
2217ec681f3SmrgCP_WAIT_MEM_GTE:
2227ec681f3SmrgUNKN21:
2237ec681f3SmrgUNKN22:
2247ec681f3SmrgUNKN23:
2257ec681f3SmrgUNKN24:
2267ec681f3SmrgCP_DRAW_PRED_ENABLE_GLOBAL:
2277ec681f3SmrgCP_DRAW_PRED_ENABLE_LOCAL:
2287ec681f3SmrgUNKN27:
2297ec681f3SmrgCP_PREEMPT_ENABLE:
2307ec681f3SmrgCP_SKIP_IB2_ENABLE_GLOBAL:
2317ec681f3SmrgCP_PREEMPT_TOKEN:
2327ec681f3SmrgUNKN31:
2337ec681f3SmrgUNKN32:
2347ec681f3SmrgCP_DRAW_INDX:
2357ec681f3SmrgCP_SKIP_IB2_ENABLE_LOCAL:
2367ec681f3SmrgCP_DRAW_AUTO:
2377ec681f3SmrgCP_SET_STATE:
2387ec681f3SmrgCP_WAIT_FOR_IDLE:
2397ec681f3SmrgCP_IM_LOAD:
2407ec681f3SmrgCP_DRAW_INDIRECT:
2417ec681f3SmrgCP_DRAW_INDX_INDIRECT:
2427ec681f3SmrgCP_DRAW_INDIRECT_MULTI:
2437ec681f3SmrgCP_IM_LOAD_IMMEDIATE:
2447ec681f3SmrgCP_BLIT:
2457ec681f3SmrgCP_SET_CONSTANT:
2467ec681f3SmrgCP_SET_BIN_DATA5_OFFSET:
2477ec681f3SmrgCP_SET_BIN_DATA5:
2487ec681f3SmrgUNKN48:
2497ec681f3SmrgCP_RUN_OPENCL:
2507ec681f3SmrgCP_LOAD_STATE6_GEOM:
2517ec681f3SmrgCP_EXEC_CS:
2527ec681f3SmrgCP_LOAD_STATE6_FRAG:
2537ec681f3SmrgCP_SET_SUBDRAW_SIZE:
2547ec681f3SmrgCP_LOAD_STATE6:
2557ec681f3SmrgCP_INDIRECT_BUFFER_PFD:
2567ec681f3SmrgCP_DRAW_INDX_OFFSET:
2577ec681f3SmrgCP_REG_TEST:
2587ec681f3SmrgCP_COND_INDIRECT_BUFFER_PFE:
2597ec681f3SmrgCP_INVALIDATE_STATE:
2607ec681f3SmrgCP_WAIT_REG_MEM:
2617ec681f3SmrgCP_REG_TO_MEM:
2627ec681f3SmrgCP_INDIRECT_BUFFER:
2637ec681f3SmrgCP_INTERRUPT:
2647ec681f3SmrgCP_EXEC_CS_INDIRECT:
2657ec681f3SmrgCP_MEM_TO_REG:
2667ec681f3SmrgCP_SET_DRAW_STATE:
2677ec681f3SmrgCP_COND_EXEC:
2687ec681f3SmrgCP_COND_WRITE5:
2697ec681f3SmrgCP_EVENT_WRITE:
2707ec681f3SmrgCP_COND_REG_EXEC:
2717ec681f3SmrgUNKN73:
2727ec681f3SmrgCP_REG_TO_SCRATCH:
2737ec681f3SmrgCP_SET_DRAW_INIT_FLAGS:
2747ec681f3SmrgCP_SCRATCH_TO_REG:
2757ec681f3SmrgCP_DRAW_PRED_SET:
2767ec681f3SmrgCP_MEM_WRITE_CNTR:
2777ec681f3SmrgCP_START_BIN:
2787ec681f3SmrgCP_END_BIN:
2797ec681f3SmrgCP_WAIT_REG_EQ:
2807ec681f3SmrgCP_SMMU_TABLE_UPDATE:
2817ec681f3SmrgUNKN84:
2827ec681f3SmrgCP_SET_CTXSWITCH_IB:
2837ec681f3SmrgCP_SET_PSEUDO_REG:
2847ec681f3SmrgCP_INDIRECT_BUFFER_CHAIN:
2857ec681f3SmrgCP_EVENT_WRITE_SHD:
2867ec681f3SmrgCP_EVENT_WRITE_CFL:
2877ec681f3SmrgUNKN90:
2887ec681f3SmrgCP_EVENT_WRITE_ZPD:
2897ec681f3SmrgCP_CONTEXT_REG_BUNCH:
2907ec681f3SmrgCP_WAIT_IB_PFD_COMPLETE:
2917ec681f3SmrgCP_CONTEXT_UPDATE:
2927ec681f3SmrgCP_SET_PROTECTED_MODE:
2937ec681f3SmrgUNKN96:
2947ec681f3SmrgUNKN97:
2957ec681f3SmrgUNKN98:
2967ec681f3SmrgCP_SET_MODE:
2977ec681f3SmrgCP_SET_VISIBILITY_OVERRIDE:
2987ec681f3SmrgCP_SET_MARKER:
2997ec681f3SmrgUNKN103:
3007ec681f3SmrgUNKN104:
3017ec681f3SmrgUNKN105:
3027ec681f3SmrgUNKN106:
3037ec681f3SmrgUNKN107:
3047ec681f3SmrgUNKN108:
3057ec681f3SmrgCP_REG_WRITE:
3067ec681f3SmrgUNKN110:
3077ec681f3SmrgCP_BOOTSTRAP_UCODE:
3087ec681f3SmrgCP_WAIT_TWO_REGS:
3097ec681f3SmrgCP_TEST_TWO_MEMS:
3107ec681f3SmrgCP_REG_TO_MEM_OFFSET_REG:
3117ec681f3SmrgCP_REG_TO_MEM_OFFSET_MEM:
3127ec681f3SmrgUNKN118:
3137ec681f3SmrgUNKN119:
3147ec681f3SmrgCP_REG_WR_NO_CTXT:
3157ec681f3SmrgUNKN121:
3167ec681f3SmrgUNKN122:
3177ec681f3SmrgUNKN123:
3187ec681f3SmrgUNKN124:
3197ec681f3SmrgUNKN125:
3207ec681f3SmrgUNKN126:
3217ec681f3SmrgUNKN127:
3227ec681f3Smrg        waitin
3237ec681f3Smrg        mov $01, $data
324