17ec681f3Smrg; Copyright (c) 2020 Valve Corporation 27ec681f3Smrg; 37ec681f3Smrg; Permission is hereby granted, free of charge, to any person obtaining a 47ec681f3Smrg; copy of this software and associated documentation files (the "Software"), 57ec681f3Smrg; to deal in the Software without restriction, including without limitation 67ec681f3Smrg; the rights to use, copy, modify, merge, publish, distribute, sublicense, 77ec681f3Smrg; and/or sell copies of the Software, and to permit persons to whom the 87ec681f3Smrg; Software is furnished to do so, subject to the following conditions: 97ec681f3Smrg; 107ec681f3Smrg; The above copyright notice and this permission notice (including the next 117ec681f3Smrg; paragraph) shall be included in all copies or substantial portions of the 127ec681f3Smrg; Software. 137ec681f3Smrg; 147ec681f3Smrg; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 157ec681f3Smrg; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 167ec681f3Smrg; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 177ec681f3Smrg; THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 187ec681f3Smrg; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 197ec681f3Smrg; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 207ec681f3Smrg; SOFTWARE. 217ec681f3Smrg; 227ec681f3Smrg; 237ec681f3Smrg; This file is the source for a simple mock firmware used to regression test 247ec681f3Smrg; the afuc assembler/disassembler. Note, it won't actually work if you try to 257ec681f3Smrg; load it on the GPU! First this is assembled, compared to the reference 267ec681f3Smrg; binary, then disassambled and compared to the reference disassembly. We do 277ec681f3Smrg; this to avoid having to host the actual firmware, especially the disassembled 287ec681f3Smrg; version, in Mesa. 297ec681f3Smrg[01000001] 307ec681f3Smrg[01000000] 317ec681f3Smrgloc02: 327ec681f3Smrg; packet table loading: 337ec681f3Smrgmov $01, 0x0830 ; CP_SQE_INSTR_BASE 347ec681f3Smrgmov $02, 0x0002 357ec681f3Smrgcwrite $01, [$00 + @REG_READ_ADDR], 0x0 367ec681f3Smrgcwrite $02, [$00 + @REG_READ_DWORDS], 0x0 377ec681f3Smrg; move hi/lo of SQE fw addrs to registers: 387ec681f3Smrgmov $01, $regdata 397ec681f3Smrgmov $02, $regdata 407ec681f3Smrg; skip first dword 417ec681f3Smrgadd $01, $01, 0x0004 427ec681f3Smrgaddhi $02, $02, 0x0000 437ec681f3Smrgmov $03, 0x0001 447ec681f3Smrgcwrite $01, [$00 + @MEM_READ_ADDR], 0x0 457ec681f3Smrgcwrite $02, [$00 + @MEM_READ_ADDR+0x1], 0x0 467ec681f3Smrgcwrite $03, [$00 + @MEM_READ_DWORDS], 0x0 477ec681f3Smrg; read 2nd dword of fw, and add offset (minus 4 because we skipped first dword) 487ec681f3Smrg; to base address of sqe fw 497ec681f3Smrgrot $04, $memdata, 0x0008 507ec681f3Smrgushr $04, $04, 0x0006 517ec681f3Smrgsub $04, $04, 0x0004 527ec681f3Smrgadd $01, $01, $04 537ec681f3Smrgaddhi $02, $02, 0x0000 547ec681f3Smrg 557ec681f3Smrg; load packet table: 567ec681f3Smrgmov $rem, 0x0080 577ec681f3Smrgcwrite $01, [$00 + @MEM_READ_ADDR], 0x0 587ec681f3Smrgcwrite $02, [$00 + @MEM_READ_ADDR+0x1], 0x0 597ec681f3Smrgcwrite $02, [$00 + @LOAD_STORE_HI], 0x0 607ec681f3Smrgcwrite $rem, [$00 + @MEM_READ_DWORDS], 0x0 617ec681f3Smrgcwrite $00, [$00 + @PACKET_TABLE_WRITE_ADDR], 0x0 627ec681f3Smrg(rep)cwrite $memdata, [$00 + @PACKET_TABLE_WRITE], 0x0 637ec681f3Smrg 647ec681f3Smrgmov $02, 0x883 657ec681f3Smrgmov $03, 0xbeef 667ec681f3Smrgmov $04, 0xdead << 16 677ec681f3Smrgor $03, $03, $04 687ec681f3Smrgcwrite $02, [$00 + @REG_WRITE_ADDR], 0x0 697ec681f3Smrgcwrite $03, [$00 + @REG_WRITE], 0x0 707ec681f3Smrgwaitin 717ec681f3Smrgmov $01, $data 727ec681f3Smrg 737ec681f3SmrgCP_ME_INIT: 747ec681f3Smrg; test label-as-immediate feature 757ec681f3Smrgmov $02, #loc02 ; should be 0x0002 767ec681f3Smrgwaitin 777ec681f3Smrgmov $01, $data 787ec681f3Smrg 797ec681f3SmrgCP_MEM_WRITE: 807ec681f3Smrg; test $addr + (rep) + (xmovN) with ALU 817ec681f3Smrgmov $addr, 0xa0 << 24 827ec681f3Smrgmov $02, 4 837ec681f3Smrg(xmov1)add $data, $02, $data 847ec681f3Smrgmov $addr, 0xa204 << 16 857ec681f3Smrg(rep)(xmov3)mov $data, $data 867ec681f3Smrgwaitin 877ec681f3Smrgmov $01, $data 887ec681f3Smrg 897ec681f3SmrgCP_SCRATCH_WRITE: 907ec681f3Smrg; test (rep) + flags + non-zero offset with cwrite 917ec681f3Smrg; TODO: 0x4 flag is actually pre-increment addressing, handle it as such 927ec681f3Smrgmov $02, 0xff 937ec681f3Smrg(rep)cwrite $data, [$02 + 0x001], 0x4 947ec681f3Smrgwaitin 957ec681f3Smrgmov $01, $data 967ec681f3Smrg 977ec681f3SmrgCP_SET_SECURE_MODE: 987ec681f3Smrg; test setsecure 997ec681f3Smrgmov $02, $data 1007ec681f3Smrgsetsecure $02, #setsecure_success 1017ec681f3Smrgerr: 1027ec681f3Smrgjump #err 1037ec681f3Smrgnop 1047ec681f3Smrgsetsecure_success: 1057ec681f3Smrgwaitin 1067ec681f3Smrgmov $01, $data 1077ec681f3Smrg 1087ec681f3Smrgeuclid: 1097ec681f3Smrg; Euclid's algorithm in afuc: https://en.wikipedia.org/wiki/Euclidean_algorithm 1107ec681f3Smrg; Since afuc doesn't do modulo, we implement the subtraction-based version. 1117ec681f3Smrg; 1127ec681f3Smrg; Demonstrates/tests comparisons and conditional branches. This also 1137ec681f3Smrg; demonstrates the common trick of branching in a delay slot. Note that if a 1147ec681f3Smrg; branch is taken and its delay slot includes another branch, the second 1157ec681f3Smrg; branch cannot also be taken, which is why the last branch in the sequence 1167ec681f3Smrg; cannot be unconditional. 1177ec681f3Smrg; 1187ec681f3Smrg; Inputs are in $02 and $03, and output is in $02. 1197ec681f3Smrgcmp $04, $02, $03 1207ec681f3Smrgbreq $04, b0, #euclid_exit 1217ec681f3Smrgbrne $04, b1, #euclid_gt 1227ec681f3Smrgbreq $04, b2, #euclid 1237ec681f3Smrgsub $03, $03, $02 1247ec681f3Smrgeuclid_gt: 1257ec681f3Smrgjump #euclid 1267ec681f3Smrgsub $02, $02, $03 1277ec681f3Smrgeuclid_exit: 1287ec681f3Smrgret 1297ec681f3Smrgnop 1307ec681f3Smrg 1317ec681f3SmrgCP_REG_RMW: 1327ec681f3Smrg; Test various ALU instructions, and read/write $regdata 1337ec681f3Smrgcwrite $data, [$00 + @REG_READ_ADDR], 0x0 1347ec681f3Smrgadd $02, $regdata, 0x42 1357ec681f3Smrgaddhi $03, $00, $regdata 1367ec681f3Smrgsub $02, $02, $regdata 1377ec681f3Smrgcall #euclid 1387ec681f3Smrgsubhi $03, $03, $regdata 1397ec681f3Smrgand $02, $02, $regdata 1407ec681f3Smrgor $02, $02, 0x1 1417ec681f3Smrgxor $02, $02, 0x1 1427ec681f3Smrgnot $02, $02 1437ec681f3Smrgshl $02, $02, $regdata 1447ec681f3Smrgushr $02, $02, $regdata 1457ec681f3Smrgishr $02, $02, $regdata 1467ec681f3Smrgrot $02, $02, $regdata 1477ec681f3Smrgmin $02, $02, $regdata 1487ec681f3Smrgmax $02, $02, $regdata 1497ec681f3Smrgmul8 $02, $02, $regdata 1507ec681f3Smrgmsb $02, $02 1517ec681f3Smrgmov $usraddr, $data 1527ec681f3Smrgmov $data, $02 1537ec681f3Smrgwaitin 1547ec681f3Smrgmov $01, $data 1557ec681f3Smrg 1567ec681f3SmrgCP_MEMCPY: 1577ec681f3Smrg; implement CP_MEMCPY using load/store instructions 1587ec681f3Smrgmov $02, $data 1597ec681f3Smrgmov $03, $data 1607ec681f3Smrgmov $04, $data 1617ec681f3Smrgmov $05, $data 1627ec681f3Smrgmov $06, $data 1637ec681f3Smrgcpy_header: 1647ec681f3Smrgbreq $06, 0, #cpy_exit 1657ec681f3Smrgcwrite $03, [$00 + @LOAD_STORE_HI], 0x0 1667ec681f3Smrgload $07, [$02 + 0x004], 0x4 1677ec681f3Smrgcwrite $05, [$00 + @LOAD_STORE_HI], 0x0 1687ec681f3Smrgjump #cpy_header 1697ec681f3Smrgstore $07, [$04 + 0x004], 0x4 1707ec681f3Smrgcpy_exit: 1717ec681f3Smrgwaitin 1727ec681f3Smrgmov $01, $data 1737ec681f3Smrg 1747ec681f3SmrgCP_MEM_TO_MEM: 1757ec681f3Smrg; implement CP_MEMCPY using mem read control regs 1767ec681f3Smrg; tests @FOO+0x1 for 64-bit control regs, and reading/writing $rem 1777ec681f3Smrgcwrite $data, [$00 + @MEM_READ_ADDR], 0x0 1787ec681f3Smrgcwrite $data, [$00 + @MEM_READ_ADDR+1], 0x0 1797ec681f3Smrgmov $02, $data 1807ec681f3Smrgcwrite $data, [$00 + @LOAD_STORE_HI], 0x0 1817ec681f3Smrgmov $rem, $data 1827ec681f3Smrgcwrite $rem, [$00 + @MEM_READ_DWORDS], 0x0 1837ec681f3Smrg(rep)store $memdata, [$02 + 0x004], 0x4 1847ec681f3Smrgwaitin 1857ec681f3Smrgmov $01, $data 1867ec681f3Smrg 1877ec681f3SmrgUNKN15: 1887ec681f3Smrg; test preemptleave + iret + conditional branch w/ immed 1897ec681f3Smrgcread $02, [$00 + 0x101], 0x0 1907ec681f3Smrgbrne $02, 0x0001, #exit_iret 1917ec681f3Smrgnop 1927ec681f3Smrgpreemptleave #err 1937ec681f3Smrgnop 1947ec681f3Smrgnop 1957ec681f3Smrgnop 1967ec681f3Smrgwaitin 1977ec681f3Smrgmov $01, $data 1987ec681f3Smrgexit_iret: 1997ec681f3Smrgiret 2007ec681f3Smrgnop 2017ec681f3Smrg 2027ec681f3SmrgUNKN0: 2037ec681f3SmrgUNKN1: 2047ec681f3SmrgUNKN2: 2057ec681f3SmrgUNKN3: 2067ec681f3SmrgPKT4: 2077ec681f3SmrgUNKN5: 2087ec681f3SmrgUNKN6: 2097ec681f3SmrgUNKN7: 2107ec681f3SmrgUNKN8: 2117ec681f3SmrgUNKN9: 2127ec681f3SmrgUNKN10: 2137ec681f3SmrgUNKN11: 2147ec681f3SmrgUNKN12: 2157ec681f3SmrgUNKN13: 2167ec681f3SmrgUNKN14: 2177ec681f3SmrgCP_NOP: 2187ec681f3SmrgCP_RECORD_PFP_TIMESTAMP: 2197ec681f3SmrgCP_WAIT_MEM_WRITES: 2207ec681f3SmrgCP_WAIT_FOR_ME: 2217ec681f3SmrgCP_WAIT_MEM_GTE: 2227ec681f3SmrgUNKN21: 2237ec681f3SmrgUNKN22: 2247ec681f3SmrgUNKN23: 2257ec681f3SmrgUNKN24: 2267ec681f3SmrgCP_DRAW_PRED_ENABLE_GLOBAL: 2277ec681f3SmrgCP_DRAW_PRED_ENABLE_LOCAL: 2287ec681f3SmrgUNKN27: 2297ec681f3SmrgCP_PREEMPT_ENABLE: 2307ec681f3SmrgCP_SKIP_IB2_ENABLE_GLOBAL: 2317ec681f3SmrgCP_PREEMPT_TOKEN: 2327ec681f3SmrgUNKN31: 2337ec681f3SmrgUNKN32: 2347ec681f3SmrgCP_DRAW_INDX: 2357ec681f3SmrgCP_SKIP_IB2_ENABLE_LOCAL: 2367ec681f3SmrgCP_DRAW_AUTO: 2377ec681f3SmrgCP_SET_STATE: 2387ec681f3SmrgCP_WAIT_FOR_IDLE: 2397ec681f3SmrgCP_IM_LOAD: 2407ec681f3SmrgCP_DRAW_INDIRECT: 2417ec681f3SmrgCP_DRAW_INDX_INDIRECT: 2427ec681f3SmrgCP_DRAW_INDIRECT_MULTI: 2437ec681f3SmrgCP_IM_LOAD_IMMEDIATE: 2447ec681f3SmrgCP_BLIT: 2457ec681f3SmrgCP_SET_CONSTANT: 2467ec681f3SmrgCP_SET_BIN_DATA5_OFFSET: 2477ec681f3SmrgCP_SET_BIN_DATA5: 2487ec681f3SmrgUNKN48: 2497ec681f3SmrgCP_RUN_OPENCL: 2507ec681f3SmrgCP_LOAD_STATE6_GEOM: 2517ec681f3SmrgCP_EXEC_CS: 2527ec681f3SmrgCP_LOAD_STATE6_FRAG: 2537ec681f3SmrgCP_SET_SUBDRAW_SIZE: 2547ec681f3SmrgCP_LOAD_STATE6: 2557ec681f3SmrgCP_INDIRECT_BUFFER_PFD: 2567ec681f3SmrgCP_DRAW_INDX_OFFSET: 2577ec681f3SmrgCP_REG_TEST: 2587ec681f3SmrgCP_COND_INDIRECT_BUFFER_PFE: 2597ec681f3SmrgCP_INVALIDATE_STATE: 2607ec681f3SmrgCP_WAIT_REG_MEM: 2617ec681f3SmrgCP_REG_TO_MEM: 2627ec681f3SmrgCP_INDIRECT_BUFFER: 2637ec681f3SmrgCP_INTERRUPT: 2647ec681f3SmrgCP_EXEC_CS_INDIRECT: 2657ec681f3SmrgCP_MEM_TO_REG: 2667ec681f3SmrgCP_SET_DRAW_STATE: 2677ec681f3SmrgCP_COND_EXEC: 2687ec681f3SmrgCP_COND_WRITE5: 2697ec681f3SmrgCP_EVENT_WRITE: 2707ec681f3SmrgCP_COND_REG_EXEC: 2717ec681f3SmrgUNKN73: 2727ec681f3SmrgCP_REG_TO_SCRATCH: 2737ec681f3SmrgCP_SET_DRAW_INIT_FLAGS: 2747ec681f3SmrgCP_SCRATCH_TO_REG: 2757ec681f3SmrgCP_DRAW_PRED_SET: 2767ec681f3SmrgCP_MEM_WRITE_CNTR: 2777ec681f3SmrgCP_START_BIN: 2787ec681f3SmrgCP_END_BIN: 2797ec681f3SmrgCP_WAIT_REG_EQ: 2807ec681f3SmrgCP_SMMU_TABLE_UPDATE: 2817ec681f3SmrgUNKN84: 2827ec681f3SmrgCP_SET_CTXSWITCH_IB: 2837ec681f3SmrgCP_SET_PSEUDO_REG: 2847ec681f3SmrgCP_INDIRECT_BUFFER_CHAIN: 2857ec681f3SmrgCP_EVENT_WRITE_SHD: 2867ec681f3SmrgCP_EVENT_WRITE_CFL: 2877ec681f3SmrgUNKN90: 2887ec681f3SmrgCP_EVENT_WRITE_ZPD: 2897ec681f3SmrgCP_CONTEXT_REG_BUNCH: 2907ec681f3SmrgCP_WAIT_IB_PFD_COMPLETE: 2917ec681f3SmrgCP_CONTEXT_UPDATE: 2927ec681f3SmrgCP_SET_PROTECTED_MODE: 2937ec681f3SmrgUNKN96: 2947ec681f3SmrgUNKN97: 2957ec681f3SmrgUNKN98: 2967ec681f3SmrgCP_SET_MODE: 2977ec681f3SmrgCP_SET_VISIBILITY_OVERRIDE: 2987ec681f3SmrgCP_SET_MARKER: 2997ec681f3SmrgUNKN103: 3007ec681f3SmrgUNKN104: 3017ec681f3SmrgUNKN105: 3027ec681f3SmrgUNKN106: 3037ec681f3SmrgUNKN107: 3047ec681f3SmrgUNKN108: 3057ec681f3SmrgCP_REG_WRITE: 3067ec681f3SmrgUNKN110: 3077ec681f3SmrgCP_BOOTSTRAP_UCODE: 3087ec681f3SmrgCP_WAIT_TWO_REGS: 3097ec681f3SmrgCP_TEST_TWO_MEMS: 3107ec681f3SmrgCP_REG_TO_MEM_OFFSET_REG: 3117ec681f3SmrgCP_REG_TO_MEM_OFFSET_MEM: 3127ec681f3SmrgUNKN118: 3137ec681f3SmrgUNKN119: 3147ec681f3SmrgCP_REG_WR_NO_CTXT: 3157ec681f3SmrgUNKN121: 3167ec681f3SmrgUNKN122: 3177ec681f3SmrgUNKN123: 3187ec681f3SmrgUNKN124: 3197ec681f3SmrgUNKN125: 3207ec681f3SmrgUNKN126: 3217ec681f3SmrgUNKN127: 3227ec681f3Smrg waitin 3237ec681f3Smrg mov $01, $data 324