1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2014-2017 Broadcom 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21b8e80941Smrg * IN THE SOFTWARE. 22b8e80941Smrg */ 23b8e80941Smrg 24b8e80941Smrg#ifndef VC5_CL_H 25b8e80941Smrg#define VC5_CL_H 26b8e80941Smrg 27b8e80941Smrg#include <stdint.h> 28b8e80941Smrg 29b8e80941Smrg#include "util/u_math.h" 30b8e80941Smrg#include "util/macros.h" 31b8e80941Smrg 32b8e80941Smrgstruct v3d_bo; 33b8e80941Smrgstruct v3d_job; 34b8e80941Smrgstruct v3d_cl; 35b8e80941Smrg 36b8e80941Smrg/** 37b8e80941Smrg * Undefined structure, used for typechecking that you're passing the pointers 38b8e80941Smrg * to these functions correctly. 39b8e80941Smrg */ 40b8e80941Smrgstruct v3d_cl_out; 41b8e80941Smrg 42b8e80941Smrg/** A reference to a BO used in the CL packing functions */ 43b8e80941Smrgstruct v3d_cl_reloc { 44b8e80941Smrg struct v3d_bo *bo; 45b8e80941Smrg uint32_t offset; 46b8e80941Smrg}; 47b8e80941Smrg 48b8e80941Smrgstatic inline void cl_pack_emit_reloc(struct v3d_cl *cl, const struct v3d_cl_reloc *); 49b8e80941Smrg 50b8e80941Smrg#define __gen_user_data struct v3d_cl 51b8e80941Smrg#define __gen_address_type struct v3d_cl_reloc 52b8e80941Smrg#define __gen_address_offset(reloc) (((reloc)->bo ? (reloc)->bo->offset : 0) + \ 53b8e80941Smrg (reloc)->offset) 54b8e80941Smrg#define __gen_emit_reloc cl_pack_emit_reloc 55b8e80941Smrg 56b8e80941Smrgstruct v3d_cl { 57b8e80941Smrg void *base; 58b8e80941Smrg struct v3d_job *job; 59b8e80941Smrg struct v3d_cl_out *next; 60b8e80941Smrg struct v3d_bo *bo; 61b8e80941Smrg uint32_t size; 62b8e80941Smrg}; 63b8e80941Smrg 64b8e80941Smrgvoid v3d_init_cl(struct v3d_job *job, struct v3d_cl *cl); 65b8e80941Smrgvoid v3d_destroy_cl(struct v3d_cl *cl); 66b8e80941Smrgvoid v3d_dump_cl(void *cl, uint32_t size, bool is_render); 67b8e80941Smrguint32_t v3d_gem_hindex(struct v3d_job *job, struct v3d_bo *bo); 68b8e80941Smrg 69b8e80941Smrgstruct PACKED unaligned_16 { uint16_t x; }; 70b8e80941Smrgstruct PACKED unaligned_32 { uint32_t x; }; 71b8e80941Smrg 72b8e80941Smrgstatic inline uint32_t cl_offset(struct v3d_cl *cl) 73b8e80941Smrg{ 74b8e80941Smrg return (char *)cl->next - (char *)cl->base; 75b8e80941Smrg} 76b8e80941Smrg 77b8e80941Smrgstatic inline struct v3d_cl_reloc cl_get_address(struct v3d_cl *cl) 78b8e80941Smrg{ 79b8e80941Smrg return (struct v3d_cl_reloc){ .bo = cl->bo, .offset = cl_offset(cl) }; 80b8e80941Smrg} 81b8e80941Smrg 82b8e80941Smrgstatic inline void 83b8e80941Smrgcl_advance(struct v3d_cl_out **cl, uint32_t n) 84b8e80941Smrg{ 85b8e80941Smrg (*cl) = (struct v3d_cl_out *)((char *)(*cl) + n); 86b8e80941Smrg} 87b8e80941Smrg 88b8e80941Smrgstatic inline struct v3d_cl_out * 89b8e80941Smrgcl_start(struct v3d_cl *cl) 90b8e80941Smrg{ 91b8e80941Smrg return cl->next; 92b8e80941Smrg} 93b8e80941Smrg 94b8e80941Smrgstatic inline void 95b8e80941Smrgcl_end(struct v3d_cl *cl, struct v3d_cl_out *next) 96b8e80941Smrg{ 97b8e80941Smrg cl->next = next; 98b8e80941Smrg assert(cl_offset(cl) <= cl->size); 99b8e80941Smrg} 100b8e80941Smrg 101b8e80941Smrg 102b8e80941Smrgstatic inline void 103b8e80941Smrgput_unaligned_32(struct v3d_cl_out *ptr, uint32_t val) 104b8e80941Smrg{ 105b8e80941Smrg struct unaligned_32 *p = (void *)ptr; 106b8e80941Smrg p->x = val; 107b8e80941Smrg} 108b8e80941Smrg 109b8e80941Smrgstatic inline void 110b8e80941Smrgput_unaligned_16(struct v3d_cl_out *ptr, uint16_t val) 111b8e80941Smrg{ 112b8e80941Smrg struct unaligned_16 *p = (void *)ptr; 113b8e80941Smrg p->x = val; 114b8e80941Smrg} 115b8e80941Smrg 116b8e80941Smrgstatic inline void 117b8e80941Smrgcl_u8(struct v3d_cl_out **cl, uint8_t n) 118b8e80941Smrg{ 119b8e80941Smrg *(uint8_t *)(*cl) = n; 120b8e80941Smrg cl_advance(cl, 1); 121b8e80941Smrg} 122b8e80941Smrg 123b8e80941Smrgstatic inline void 124b8e80941Smrgcl_u16(struct v3d_cl_out **cl, uint16_t n) 125b8e80941Smrg{ 126b8e80941Smrg put_unaligned_16(*cl, n); 127b8e80941Smrg cl_advance(cl, 2); 128b8e80941Smrg} 129b8e80941Smrg 130b8e80941Smrgstatic inline void 131b8e80941Smrgcl_u32(struct v3d_cl_out **cl, uint32_t n) 132b8e80941Smrg{ 133b8e80941Smrg put_unaligned_32(*cl, n); 134b8e80941Smrg cl_advance(cl, 4); 135b8e80941Smrg} 136b8e80941Smrg 137b8e80941Smrgstatic inline void 138b8e80941Smrgcl_aligned_u32(struct v3d_cl_out **cl, uint32_t n) 139b8e80941Smrg{ 140b8e80941Smrg *(uint32_t *)(*cl) = n; 141b8e80941Smrg cl_advance(cl, 4); 142b8e80941Smrg} 143b8e80941Smrg 144b8e80941Smrgstatic inline void 145b8e80941Smrgcl_aligned_reloc(struct v3d_cl *cl, 146b8e80941Smrg struct v3d_cl_out **cl_out, 147b8e80941Smrg struct v3d_bo *bo, uint32_t offset) 148b8e80941Smrg{ 149b8e80941Smrg cl_aligned_u32(cl_out, bo->offset + offset); 150b8e80941Smrg v3d_job_add_bo(cl->job, bo); 151b8e80941Smrg} 152b8e80941Smrg 153b8e80941Smrgstatic inline void 154b8e80941Smrgcl_ptr(struct v3d_cl_out **cl, void *ptr) 155b8e80941Smrg{ 156b8e80941Smrg *(struct v3d_cl_out **)(*cl) = ptr; 157b8e80941Smrg cl_advance(cl, sizeof(void *)); 158b8e80941Smrg} 159b8e80941Smrg 160b8e80941Smrgstatic inline void 161b8e80941Smrgcl_f(struct v3d_cl_out **cl, float f) 162b8e80941Smrg{ 163b8e80941Smrg cl_u32(cl, fui(f)); 164b8e80941Smrg} 165b8e80941Smrg 166b8e80941Smrgstatic inline void 167b8e80941Smrgcl_aligned_f(struct v3d_cl_out **cl, float f) 168b8e80941Smrg{ 169b8e80941Smrg cl_aligned_u32(cl, fui(f)); 170b8e80941Smrg} 171b8e80941Smrg 172b8e80941Smrg/** 173b8e80941Smrg * Reference to a BO with its associated offset, used in the pack process. 174b8e80941Smrg */ 175b8e80941Smrgstatic inline struct v3d_cl_reloc 176b8e80941Smrgcl_address(struct v3d_bo *bo, uint32_t offset) 177b8e80941Smrg{ 178b8e80941Smrg struct v3d_cl_reloc reloc = { 179b8e80941Smrg .bo = bo, 180b8e80941Smrg .offset = offset, 181b8e80941Smrg }; 182b8e80941Smrg return reloc; 183b8e80941Smrg} 184b8e80941Smrg 185b8e80941Smrguint32_t v3d_cl_ensure_space(struct v3d_cl *cl, uint32_t size, uint32_t align); 186b8e80941Smrgvoid v3d_cl_ensure_space_with_branch(struct v3d_cl *cl, uint32_t size); 187b8e80941Smrg 188b8e80941Smrg#define cl_packet_header(packet) V3DX(packet ## _header) 189b8e80941Smrg#define cl_packet_length(packet) V3DX(packet ## _length) 190b8e80941Smrg#define cl_packet_pack(packet) V3DX(packet ## _pack) 191b8e80941Smrg#define cl_packet_struct(packet) V3DX(packet) 192b8e80941Smrg 193b8e80941Smrgstatic inline void * 194b8e80941Smrgcl_get_emit_space(struct v3d_cl_out **cl, size_t size) 195b8e80941Smrg{ 196b8e80941Smrg void *addr = *cl; 197b8e80941Smrg cl_advance(cl, size); 198b8e80941Smrg return addr; 199b8e80941Smrg} 200b8e80941Smrg 201b8e80941Smrg/* Macro for setting up an emit of a CL struct. A temporary unpacked struct 202b8e80941Smrg * is created, which you get to set fields in of the form: 203b8e80941Smrg * 204b8e80941Smrg * cl_emit(bcl, FLAT_SHADE_FLAGS, flags) { 205b8e80941Smrg * .flags.flat_shade_flags = 1 << 2, 206b8e80941Smrg * } 207b8e80941Smrg * 208b8e80941Smrg * or default values only can be emitted with just: 209b8e80941Smrg * 210b8e80941Smrg * cl_emit(bcl, FLAT_SHADE_FLAGS, flags); 211b8e80941Smrg * 212b8e80941Smrg * The trick here is that we make a for loop that will execute the body 213b8e80941Smrg * (either the block or the ';' after the macro invocation) exactly once. 214b8e80941Smrg */ 215b8e80941Smrg#define cl_emit(cl, packet, name) \ 216b8e80941Smrg for (struct cl_packet_struct(packet) name = { \ 217b8e80941Smrg cl_packet_header(packet) \ 218b8e80941Smrg }, \ 219b8e80941Smrg *_loop_terminate = &name; \ 220b8e80941Smrg __builtin_expect(_loop_terminate != NULL, 1); \ 221b8e80941Smrg ({ \ 222b8e80941Smrg struct v3d_cl_out *cl_out = cl_start(cl); \ 223b8e80941Smrg cl_packet_pack(packet)(cl, (uint8_t *)cl_out, &name); \ 224b8e80941Smrg cl_advance(&cl_out, cl_packet_length(packet)); \ 225b8e80941Smrg cl_end(cl, cl_out); \ 226b8e80941Smrg _loop_terminate = NULL; \ 227b8e80941Smrg })) \ 228b8e80941Smrg 229b8e80941Smrg#define cl_emit_with_prepacked(cl, packet, prepacked, name) \ 230b8e80941Smrg for (struct cl_packet_struct(packet) name = { \ 231b8e80941Smrg cl_packet_header(packet) \ 232b8e80941Smrg }, \ 233b8e80941Smrg *_loop_terminate = &name; \ 234b8e80941Smrg __builtin_expect(_loop_terminate != NULL, 1); \ 235b8e80941Smrg ({ \ 236b8e80941Smrg struct v3d_cl_out *cl_out = cl_start(cl); \ 237b8e80941Smrg uint8_t packed[cl_packet_length(packet)]; \ 238b8e80941Smrg cl_packet_pack(packet)(cl, packed, &name); \ 239b8e80941Smrg for (int _i = 0; _i < cl_packet_length(packet); _i++) \ 240b8e80941Smrg ((uint8_t *)cl_out)[_i] = packed[_i] | (prepacked)[_i]; \ 241b8e80941Smrg cl_advance(&cl_out, cl_packet_length(packet)); \ 242b8e80941Smrg cl_end(cl, cl_out); \ 243b8e80941Smrg _loop_terminate = NULL; \ 244b8e80941Smrg })) \ 245b8e80941Smrg 246b8e80941Smrg#define cl_emit_prepacked_sized(cl, packet, size) do { \ 247b8e80941Smrg memcpy((cl)->next, packet, size); \ 248b8e80941Smrg cl_advance(&(cl)->next, size); \ 249b8e80941Smrg} while (0) 250b8e80941Smrg 251b8e80941Smrg#define cl_emit_prepacked(cl, packet) \ 252b8e80941Smrg cl_emit_prepacked_sized(cl, packet, sizeof(*(packet))) 253b8e80941Smrg 254b8e80941Smrg#define v3dx_pack(packed, packet, name) \ 255b8e80941Smrg for (struct cl_packet_struct(packet) name = { \ 256b8e80941Smrg cl_packet_header(packet) \ 257b8e80941Smrg }, \ 258b8e80941Smrg *_loop_terminate = &name; \ 259b8e80941Smrg __builtin_expect(_loop_terminate != NULL, 1); \ 260b8e80941Smrg ({ \ 261b8e80941Smrg cl_packet_pack(packet)(NULL, (uint8_t *)packed, &name); \ 262b8e80941Smrg VG(VALGRIND_CHECK_MEM_IS_DEFINED((uint8_t *)packed, \ 263b8e80941Smrg cl_packet_length(packet))); \ 264b8e80941Smrg _loop_terminate = NULL; \ 265b8e80941Smrg })) \ 266b8e80941Smrg 267b8e80941Smrg/** 268b8e80941Smrg * Helper function called by the XML-generated pack functions for filling in 269b8e80941Smrg * an address field in shader records. 270b8e80941Smrg * 271b8e80941Smrg * Since we have a private address space as of VC5, our BOs can have lifelong 272b8e80941Smrg * offsets, and all the kernel needs to know is which BOs need to be paged in 273b8e80941Smrg * for this exec. 274b8e80941Smrg */ 275b8e80941Smrgstatic inline void 276b8e80941Smrgcl_pack_emit_reloc(struct v3d_cl *cl, const struct v3d_cl_reloc *reloc) 277b8e80941Smrg{ 278b8e80941Smrg if (reloc->bo) 279b8e80941Smrg v3d_job_add_bo(cl->job, reloc->bo); 280b8e80941Smrg} 281b8e80941Smrg 282b8e80941Smrg#endif /* VC5_CL_H */ 283