v3d_tiling.c revision b8e80941
1/* 2 * Copyright © 2014-2017 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24/** @file v3d_tiling.c 25 * 26 * Handles information about the VC5 tiling formats, and loading and storing 27 * from them. 28 */ 29 30#include <stdint.h> 31#include "v3d_screen.h" 32#include "v3d_context.h" 33#include "v3d_tiling.h" 34#include "broadcom/common/v3d_cpu_tiling.h" 35 36/** Return the width in pixels of a 64-byte microtile. */ 37uint32_t 38v3d_utile_width(int cpp) 39{ 40 switch (cpp) { 41 case 1: 42 case 2: 43 return 8; 44 case 4: 45 case 8: 46 return 4; 47 case 16: 48 return 2; 49 default: 50 unreachable("unknown cpp"); 51 } 52} 53 54/** Return the height in pixels of a 64-byte microtile. */ 55uint32_t 56v3d_utile_height(int cpp) 57{ 58 switch (cpp) { 59 case 1: 60 return 8; 61 case 2: 62 case 4: 63 return 4; 64 case 8: 65 case 16: 66 return 2; 67 default: 68 unreachable("unknown cpp"); 69 } 70} 71 72/** 73 * Returns the byte address for a given pixel within a utile. 74 * 75 * Utiles are 64b blocks of pixels in raster order, with 32bpp being a 4x4 76 * arrangement. 77 */ 78static inline uint32_t 79v3d_get_utile_pixel_offset(uint32_t cpp, uint32_t x, uint32_t y) 80{ 81 uint32_t utile_w = v3d_utile_width(cpp); 82 83 assert(x < utile_w && y < v3d_utile_height(cpp)); 84 85 return x * cpp + y * utile_w * cpp; 86} 87 88/** 89 * Returns the byte offset for a given pixel in a LINEARTILE layout. 90 * 91 * LINEARTILE is a single line of utiles in either the X or Y direction. 92 */ 93static inline uint32_t 94v3d_get_lt_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y) 95{ 96 uint32_t utile_w = v3d_utile_width(cpp); 97 uint32_t utile_h = v3d_utile_height(cpp); 98 uint32_t utile_index_x = x / utile_w; 99 uint32_t utile_index_y = y / utile_h; 100 101 assert(utile_index_x == 0 || utile_index_y == 0); 102 103 return (64 * (utile_index_x + utile_index_y) + 104 v3d_get_utile_pixel_offset(cpp, 105 x & (utile_w - 1), 106 y & (utile_h - 1))); 107} 108 109/** 110 * Returns the byte offset for a given pixel in a UBLINEAR layout. 111 * 112 * UBLINEAR is the layout where pixels are arranged in UIF blocks (2x2 113 * utiles), and the UIF blocks are in 1 or 2 columns in raster order. 114 */ 115static inline uint32_t 116v3d_get_ublinear_pixel_offset(uint32_t cpp, uint32_t x, uint32_t y, 117 int ublinear_number) 118{ 119 uint32_t utile_w = v3d_utile_width(cpp); 120 uint32_t utile_h = v3d_utile_height(cpp); 121 uint32_t ub_w = utile_w * 2; 122 uint32_t ub_h = utile_h * 2; 123 uint32_t ub_x = x / ub_w; 124 uint32_t ub_y = y / ub_h; 125 126 return (256 * (ub_y * ublinear_number + 127 ub_x) + 128 ((x & utile_w) ? 64 : 0) + 129 ((y & utile_h) ? 128 : 0) + 130 + v3d_get_utile_pixel_offset(cpp, 131 x & (utile_w - 1), 132 y & (utile_h - 1))); 133} 134 135static inline uint32_t 136v3d_get_ublinear_2_column_pixel_offset(uint32_t cpp, uint32_t image_h, 137 uint32_t x, uint32_t y) 138{ 139 return v3d_get_ublinear_pixel_offset(cpp, x, y, 2); 140} 141 142static inline uint32_t 143v3d_get_ublinear_1_column_pixel_offset(uint32_t cpp, uint32_t image_h, 144 uint32_t x, uint32_t y) 145{ 146 return v3d_get_ublinear_pixel_offset(cpp, x, y, 1); 147} 148 149/** 150 * Returns the byte offset for a given pixel in a UIF layout. 151 * 152 * UIF is the general VC5 tiling layout shared across 3D, media, and scanout. 153 * It stores pixels in UIF blocks (2x2 utiles), and UIF blocks are stored in 154 * 4x4 groups, and those 4x4 groups are then stored in raster order. 155 */ 156static inline uint32_t 157v3d_get_uif_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y, 158 bool do_xor) 159{ 160 uint32_t utile_w = v3d_utile_width(cpp); 161 uint32_t utile_h = v3d_utile_height(cpp); 162 uint32_t mb_width = utile_w * 2; 163 uint32_t mb_height = utile_h * 2; 164 uint32_t log2_mb_width = ffs(mb_width) - 1; 165 uint32_t log2_mb_height = ffs(mb_height) - 1; 166 167 /* Macroblock X, y */ 168 uint32_t mb_x = x >> log2_mb_width; 169 uint32_t mb_y = y >> log2_mb_height; 170 /* X, y within the macroblock */ 171 uint32_t mb_pixel_x = x - (mb_x << log2_mb_width); 172 uint32_t mb_pixel_y = y - (mb_y << log2_mb_height); 173 174 if (do_xor && (mb_x / 4) & 1) 175 mb_y ^= 0x10; 176 177 uint32_t mb_h = align(image_h, 1 << log2_mb_height) >> log2_mb_height; 178 uint32_t mb_id = ((mb_x / 4) * ((mb_h - 1) * 4)) + mb_x + mb_y * 4; 179 180 uint32_t mb_base_addr = mb_id * 256; 181 182 bool top = mb_pixel_y < utile_h; 183 bool left = mb_pixel_x < utile_w; 184 185 /* Docs have this in pixels, we do bytes here. */ 186 uint32_t mb_tile_offset = (!top * 128 + !left * 64); 187 188 uint32_t utile_x = mb_pixel_x & (utile_w - 1); 189 uint32_t utile_y = mb_pixel_y & (utile_h - 1); 190 191 uint32_t mb_pixel_address = (mb_base_addr + 192 mb_tile_offset + 193 v3d_get_utile_pixel_offset(cpp, 194 utile_x, 195 utile_y)); 196 197 return mb_pixel_address; 198} 199 200static inline uint32_t 201v3d_get_uif_xor_pixel_offset(uint32_t cpp, uint32_t image_h, 202 uint32_t x, uint32_t y) 203{ 204 return v3d_get_uif_pixel_offset(cpp, image_h, x, y, true); 205} 206 207static inline uint32_t 208v3d_get_uif_no_xor_pixel_offset(uint32_t cpp, uint32_t image_h, 209 uint32_t x, uint32_t y) 210{ 211 return v3d_get_uif_pixel_offset(cpp, image_h, x, y, false); 212} 213 214/* Loads/stores non-utile-aligned boxes by walking over the destination 215 * rectangle, computing the address on the GPU, and storing/loading a pixel at 216 * a time. 217 */ 218static inline void 219v3d_move_pixels_unaligned(void *gpu, uint32_t gpu_stride, 220 void *cpu, uint32_t cpu_stride, 221 int cpp, uint32_t image_h, 222 const struct pipe_box *box, 223 uint32_t (*get_pixel_offset)(uint32_t cpp, 224 uint32_t image_h, 225 uint32_t x, uint32_t y), 226 bool is_load) 227{ 228 for (uint32_t y = 0; y < box->height; y++) { 229 void *cpu_row = cpu + y * cpu_stride; 230 231 for (int x = 0; x < box->width; x++) { 232 uint32_t pixel_offset = get_pixel_offset(cpp, image_h, 233 box->x + x, 234 box->y + y); 235 236 if (false) { 237 fprintf(stderr, "%3d,%3d -> %d\n", 238 box->x + x, box->y + y, 239 pixel_offset); 240 } 241 242 if (is_load) { 243 memcpy(cpu_row + x * cpp, 244 gpu + pixel_offset, 245 cpp); 246 } else { 247 memcpy(gpu + pixel_offset, 248 cpu_row + x * cpp, 249 cpp); 250 } 251 } 252 } 253} 254 255/* Breaks the image down into utiles and calls either the fast whole-utile 256 * load/store functions, or the unaligned fallback case. 257 */ 258static inline void 259v3d_move_pixels_general_percpp(void *gpu, uint32_t gpu_stride, 260 void *cpu, uint32_t cpu_stride, 261 int cpp, uint32_t image_h, 262 const struct pipe_box *box, 263 uint32_t (*get_pixel_offset)(uint32_t cpp, 264 uint32_t image_h, 265 uint32_t x, uint32_t y), 266 bool is_load) 267{ 268 uint32_t utile_w = v3d_utile_width(cpp); 269 uint32_t utile_h = v3d_utile_height(cpp); 270 uint32_t utile_gpu_stride = utile_w * cpp; 271 uint32_t x1 = box->x; 272 uint32_t y1 = box->y; 273 uint32_t x2 = box->x + box->width; 274 uint32_t y2 = box->y + box->height; 275 uint32_t align_x1 = align(x1, utile_w); 276 uint32_t align_y1 = align(y1, utile_h); 277 uint32_t align_x2 = x2 & ~(utile_w - 1); 278 uint32_t align_y2 = y2 & ~(utile_h - 1); 279 280 /* Load/store all the whole utiles first. */ 281 for (uint32_t y = align_y1; y < align_y2; y += utile_h) { 282 void *cpu_row = cpu + (y - box->y) * cpu_stride; 283 284 for (uint32_t x = align_x1; x < align_x2; x += utile_w) { 285 void *utile_gpu = (gpu + 286 get_pixel_offset(cpp, image_h, x, y)); 287 void *utile_cpu = cpu_row + (x - box->x) * cpp; 288 289 if (is_load) { 290 v3d_load_utile(utile_cpu, cpu_stride, 291 utile_gpu, utile_gpu_stride); 292 } else { 293 v3d_store_utile(utile_gpu, utile_gpu_stride, 294 utile_cpu, cpu_stride); 295 } 296 } 297 } 298 299 /* If there were no aligned utiles in the middle, load/store the whole 300 * thing unaligned. 301 */ 302 if (align_y2 <= align_y1 || 303 align_x2 <= align_x1) { 304 v3d_move_pixels_unaligned(gpu, gpu_stride, 305 cpu, cpu_stride, 306 cpp, image_h, 307 box, 308 get_pixel_offset, is_load); 309 return; 310 } 311 312 /* Load/store the partial utiles. */ 313 struct pipe_box partial_boxes[4] = { 314 /* Top */ 315 { 316 .x = x1, 317 .width = x2 - x1, 318 .y = y1, 319 .height = align_y1 - y1, 320 }, 321 /* Bottom */ 322 { 323 .x = x1, 324 .width = x2 - x1, 325 .y = align_y2, 326 .height = y2 - align_y2, 327 }, 328 /* Left */ 329 { 330 .x = x1, 331 .width = align_x1 - x1, 332 .y = align_y1, 333 .height = align_y2 - align_y1, 334 }, 335 /* Right */ 336 { 337 .x = align_x2, 338 .width = x2 - align_x2, 339 .y = align_y1, 340 .height = align_y2 - align_y1, 341 }, 342 }; 343 for (int i = 0; i < ARRAY_SIZE(partial_boxes); i++) { 344 void *partial_cpu = (cpu + 345 (partial_boxes[i].y - y1) * cpu_stride + 346 (partial_boxes[i].x - x1) * cpp); 347 348 v3d_move_pixels_unaligned(gpu, gpu_stride, 349 partial_cpu, cpu_stride, 350 cpp, image_h, 351 &partial_boxes[i], 352 get_pixel_offset, is_load); 353 } 354} 355 356static inline void 357v3d_move_pixels_general(void *gpu, uint32_t gpu_stride, 358 void *cpu, uint32_t cpu_stride, 359 int cpp, uint32_t image_h, 360 const struct pipe_box *box, 361 uint32_t (*get_pixel_offset)(uint32_t cpp, 362 uint32_t image_h, 363 uint32_t x, uint32_t y), 364 bool is_load) 365{ 366 switch (cpp) { 367 case 1: 368 v3d_move_pixels_general_percpp(gpu, gpu_stride, 369 cpu, cpu_stride, 370 1, image_h, box, 371 get_pixel_offset, 372 is_load); 373 break; 374 case 2: 375 v3d_move_pixels_general_percpp(gpu, gpu_stride, 376 cpu, cpu_stride, 377 2, image_h, box, 378 get_pixel_offset, 379 is_load); 380 break; 381 case 4: 382 v3d_move_pixels_general_percpp(gpu, gpu_stride, 383 cpu, cpu_stride, 384 4, image_h, box, 385 get_pixel_offset, 386 is_load); 387 break; 388 case 8: 389 v3d_move_pixels_general_percpp(gpu, gpu_stride, 390 cpu, cpu_stride, 391 8, image_h, box, 392 get_pixel_offset, 393 is_load); 394 break; 395 case 16: 396 v3d_move_pixels_general_percpp(gpu, gpu_stride, 397 cpu, cpu_stride, 398 16, image_h, box, 399 get_pixel_offset, 400 is_load); 401 break; 402 } 403} 404 405static inline void 406v3d_move_tiled_image(void *gpu, uint32_t gpu_stride, 407 void *cpu, uint32_t cpu_stride, 408 enum v3d_tiling_mode tiling_format, 409 int cpp, 410 uint32_t image_h, 411 const struct pipe_box *box, 412 bool is_load) 413{ 414 switch (tiling_format) { 415 case VC5_TILING_UIF_XOR: 416 v3d_move_pixels_general(gpu, gpu_stride, 417 cpu, cpu_stride, 418 cpp, image_h, box, 419 v3d_get_uif_xor_pixel_offset, 420 is_load); 421 break; 422 case VC5_TILING_UIF_NO_XOR: 423 v3d_move_pixels_general(gpu, gpu_stride, 424 cpu, cpu_stride, 425 cpp, image_h, box, 426 v3d_get_uif_no_xor_pixel_offset, 427 is_load); 428 break; 429 case VC5_TILING_UBLINEAR_2_COLUMN: 430 v3d_move_pixels_general(gpu, gpu_stride, 431 cpu, cpu_stride, 432 cpp, image_h, box, 433 v3d_get_ublinear_2_column_pixel_offset, 434 is_load); 435 break; 436 case VC5_TILING_UBLINEAR_1_COLUMN: 437 v3d_move_pixels_general(gpu, gpu_stride, 438 cpu, cpu_stride, 439 cpp, image_h, box, 440 v3d_get_ublinear_1_column_pixel_offset, 441 is_load); 442 break; 443 case VC5_TILING_LINEARTILE: 444 v3d_move_pixels_general(gpu, gpu_stride, 445 cpu, cpu_stride, 446 cpp, image_h, box, 447 v3d_get_lt_pixel_offset, 448 is_load); 449 break; 450 default: 451 unreachable("Unsupported tiling format"); 452 break; 453 } 454} 455 456/** 457 * Loads pixel data from the start (microtile-aligned) box in \p src to the 458 * start of \p dst according to the given tiling format. 459 */ 460void 461v3d_load_tiled_image(void *dst, uint32_t dst_stride, 462 void *src, uint32_t src_stride, 463 enum v3d_tiling_mode tiling_format, int cpp, 464 uint32_t image_h, 465 const struct pipe_box *box) 466{ 467 v3d_move_tiled_image(src, src_stride, 468 dst, dst_stride, 469 tiling_format, 470 cpp, 471 image_h, 472 box, 473 true); 474} 475 476/** 477 * Stores pixel data from the start of \p src into a (microtile-aligned) box in 478 * \p dst according to the given tiling format. 479 */ 480void 481v3d_store_tiled_image(void *dst, uint32_t dst_stride, 482 void *src, uint32_t src_stride, 483 enum v3d_tiling_mode tiling_format, int cpp, 484 uint32_t image_h, 485 const struct pipe_box *box) 486{ 487 v3d_move_tiled_image(dst, dst_stride, 488 src, src_stride, 489 tiling_format, 490 cpp, 491 image_h, 492 box, 493 false); 494} 495