1428d7b3dSmrg/* 2428d7b3dSmrg * Copyright © 2012 Intel Corporation 3428d7b3dSmrg * 4428d7b3dSmrg * Permission is hereby granted, free of charge, to any person obtaining a 5428d7b3dSmrg * copy of this software and associated documentation files (the "Software"), 6428d7b3dSmrg * to deal in the Software without restriction, including without limitation 7428d7b3dSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8428d7b3dSmrg * and/or sell copies of the Software, and to permit persons to whom the 9428d7b3dSmrg * Software is furnished to do so, subject to the following conditions: 10428d7b3dSmrg * 11428d7b3dSmrg * The above copyright notice and this permission notice (including the next 12428d7b3dSmrg * paragraph) shall be included in all copies or substantial portions of the 13428d7b3dSmrg * Software. 14428d7b3dSmrg * 15428d7b3dSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16428d7b3dSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17428d7b3dSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18428d7b3dSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19428d7b3dSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20428d7b3dSmrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21428d7b3dSmrg * SOFTWARE. 22428d7b3dSmrg * 23428d7b3dSmrg * Authors: 24428d7b3dSmrg * Chris Wilson <chris@chris-wilson.co.uk> 25428d7b3dSmrg * 26428d7b3dSmrg */ 27428d7b3dSmrg 28428d7b3dSmrg#ifdef HAVE_CONFIG_H 29428d7b3dSmrg#include "config.h" 30428d7b3dSmrg#endif 31428d7b3dSmrg 32428d7b3dSmrg#include "sna.h" 33428d7b3dSmrg#include "sna_render.h" 34428d7b3dSmrg#include "sna_render_inline.h" 35428d7b3dSmrg#include "gen4_vertex.h" 36428d7b3dSmrg 37428d7b3dSmrg#ifndef sse2 38428d7b3dSmrg#define sse2 39428d7b3dSmrg#endif 40428d7b3dSmrg 41428d7b3dSmrgvoid gen4_vertex_align(struct sna *sna, const struct sna_composite_op *op) 42428d7b3dSmrg{ 43428d7b3dSmrg int vertex_index; 44428d7b3dSmrg 45428d7b3dSmrg assert(op->floats_per_vertex); 46428d7b3dSmrg assert(op->floats_per_rect == 3*op->floats_per_vertex); 47428d7b3dSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 48428d7b3dSmrg 49428d7b3dSmrg vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex; 50428d7b3dSmrg if ((int)sna->render.vertex_size - vertex_index * op->floats_per_vertex < 2*op->floats_per_rect) { 51428d7b3dSmrg DBG(("%s: flushing vertex buffer: new index=%d, max=%d\n", 52428d7b3dSmrg __FUNCTION__, vertex_index, sna->render.vertex_size / op->floats_per_vertex)); 53428d7b3dSmrg if (gen4_vertex_finish(sna) < 2*op->floats_per_rect) { 54428d7b3dSmrg kgem_submit(&sna->kgem); 55428d7b3dSmrg _kgem_set_mode(&sna->kgem, KGEM_RENDER); 56428d7b3dSmrg } 57428d7b3dSmrg assert(sna->render.vertex_used < sna->render.vertex_size); 58428d7b3dSmrg 59428d7b3dSmrg vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex; 60428d7b3dSmrg assert(vertex_index * op->floats_per_vertex <= sna->render.vertex_size); 61428d7b3dSmrg } 62428d7b3dSmrg 63428d7b3dSmrg sna->render.vertex_index = vertex_index; 64428d7b3dSmrg sna->render.vertex_used = vertex_index * op->floats_per_vertex; 65428d7b3dSmrg} 66428d7b3dSmrg 67428d7b3dSmrgvoid gen4_vertex_flush(struct sna *sna) 68428d7b3dSmrg{ 69428d7b3dSmrg DBG(("%s[%x] = %d\n", __FUNCTION__, 70428d7b3dSmrg 4*sna->render.vertex_offset, 71428d7b3dSmrg sna->render.vertex_index - sna->render.vertex_start)); 72428d7b3dSmrg 73428d7b3dSmrg assert(sna->render.vertex_offset); 74428d7b3dSmrg assert(sna->render.vertex_offset <= sna->kgem.nbatch); 75428d7b3dSmrg assert(sna->render.vertex_index > sna->render.vertex_start); 76428d7b3dSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 77428d7b3dSmrg 78428d7b3dSmrg sna->kgem.batch[sna->render.vertex_offset] = 79428d7b3dSmrg sna->render.vertex_index - sna->render.vertex_start; 80428d7b3dSmrg sna->render.vertex_offset = 0; 81428d7b3dSmrg} 82428d7b3dSmrg 83428d7b3dSmrgint gen4_vertex_finish(struct sna *sna) 84428d7b3dSmrg{ 85428d7b3dSmrg struct kgem_bo *bo; 86428d7b3dSmrg unsigned int i; 87428d7b3dSmrg unsigned hint, size; 88428d7b3dSmrg 89428d7b3dSmrg DBG(("%s: used=%d / %d\n", __FUNCTION__, 90428d7b3dSmrg sna->render.vertex_used, sna->render.vertex_size)); 91428d7b3dSmrg assert(sna->render.vertex_offset == 0); 92428d7b3dSmrg assert(sna->render.vertex_used); 93428d7b3dSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 94428d7b3dSmrg 95428d7b3dSmrg sna_vertex_wait__locked(&sna->render); 96428d7b3dSmrg 97428d7b3dSmrg /* Note: we only need dword alignment (currently) */ 98428d7b3dSmrg 99428d7b3dSmrg hint = CREATE_GTT_MAP; 100428d7b3dSmrg 101428d7b3dSmrg bo = sna->render.vbo; 102428d7b3dSmrg if (bo) { 103428d7b3dSmrg for (i = 0; i < sna->render.nvertex_reloc; i++) { 104428d7b3dSmrg DBG(("%s: reloc[%d] = %d\n", __FUNCTION__, 105428d7b3dSmrg i, sna->render.vertex_reloc[i])); 106428d7b3dSmrg 107428d7b3dSmrg sna->kgem.batch[sna->render.vertex_reloc[i]] = 108428d7b3dSmrg kgem_add_reloc(&sna->kgem, 109428d7b3dSmrg sna->render.vertex_reloc[i], bo, 110428d7b3dSmrg I915_GEM_DOMAIN_VERTEX << 16, 111428d7b3dSmrg 0); 112428d7b3dSmrg } 113428d7b3dSmrg 114428d7b3dSmrg assert(!sna->render.active); 115428d7b3dSmrg sna->render.nvertex_reloc = 0; 116428d7b3dSmrg sna->render.vertex_used = 0; 117428d7b3dSmrg sna->render.vertex_index = 0; 118428d7b3dSmrg sna->render.vbo = NULL; 119428d7b3dSmrg sna->render.vb_id = 0; 120428d7b3dSmrg 121428d7b3dSmrg kgem_bo_destroy(&sna->kgem, bo); 122428d7b3dSmrg hint |= CREATE_CACHED | CREATE_NO_THROTTLE; 123428d7b3dSmrg } else { 124428d7b3dSmrg assert(sna->render.vertex_size == ARRAY_SIZE(sna->render.vertex_data)); 125428d7b3dSmrg assert(sna->render.vertices == sna->render.vertex_data); 126428d7b3dSmrg if (kgem_is_idle(&sna->kgem)) 127428d7b3dSmrg return 0; 128428d7b3dSmrg } 129428d7b3dSmrg 130428d7b3dSmrg size = 256*1024; 131428d7b3dSmrg assert(!sna->render.active); 132428d7b3dSmrg sna->render.vertices = NULL; 133428d7b3dSmrg sna->render.vbo = kgem_create_linear(&sna->kgem, size, hint); 134428d7b3dSmrg while (sna->render.vbo == NULL && size > sizeof(sna->render.vertex_data)) { 135428d7b3dSmrg size /= 2; 136428d7b3dSmrg sna->render.vbo = kgem_create_linear(&sna->kgem, size, hint); 137428d7b3dSmrg } 138428d7b3dSmrg if (sna->render.vbo == NULL) 139428d7b3dSmrg sna->render.vbo = kgem_create_linear(&sna->kgem, 140428d7b3dSmrg 256*1024, CREATE_GTT_MAP); 141428d7b3dSmrg if (sna->render.vbo && 142428d7b3dSmrg kgem_check_bo(&sna->kgem, sna->render.vbo, NULL)) 143428d7b3dSmrg sna->render.vertices = kgem_bo_map(&sna->kgem, sna->render.vbo); 144428d7b3dSmrg if (sna->render.vertices == NULL) { 145428d7b3dSmrg if (sna->render.vbo) { 146428d7b3dSmrg kgem_bo_destroy(&sna->kgem, sna->render.vbo); 147428d7b3dSmrg sna->render.vbo = NULL; 148428d7b3dSmrg } 149428d7b3dSmrg sna->render.vertices = sna->render.vertex_data; 150428d7b3dSmrg sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); 151428d7b3dSmrg return 0; 152428d7b3dSmrg } 153428d7b3dSmrg 154428d7b3dSmrg if (sna->render.vertex_used) { 155428d7b3dSmrg DBG(("%s: copying initial buffer x %d to handle=%d\n", 156428d7b3dSmrg __FUNCTION__, 157428d7b3dSmrg sna->render.vertex_used, 158428d7b3dSmrg sna->render.vbo->handle)); 159428d7b3dSmrg assert(sizeof(float)*sna->render.vertex_used <= 160428d7b3dSmrg __kgem_bo_size(sna->render.vbo)); 161428d7b3dSmrg memcpy(sna->render.vertices, 162428d7b3dSmrg sna->render.vertex_data, 163428d7b3dSmrg sizeof(float)*sna->render.vertex_used); 164428d7b3dSmrg } 165428d7b3dSmrg 166428d7b3dSmrg size = __kgem_bo_size(sna->render.vbo)/4; 167428d7b3dSmrg if (size >= UINT16_MAX) 168428d7b3dSmrg size = UINT16_MAX - 1; 169428d7b3dSmrg 170428d7b3dSmrg DBG(("%s: create vbo handle=%d, size=%d floats [%d bytes]\n", 171428d7b3dSmrg __FUNCTION__, sna->render.vbo->handle, size, __kgem_bo_size(sna->render.vbo))); 172428d7b3dSmrg assert(size > sna->render.vertex_used); 173428d7b3dSmrg 174428d7b3dSmrg sna->render.vertex_size = size; 175428d7b3dSmrg return size - sna->render.vertex_used; 176428d7b3dSmrg} 177428d7b3dSmrg 178428d7b3dSmrgvoid gen4_vertex_close(struct sna *sna) 179428d7b3dSmrg{ 180428d7b3dSmrg struct kgem_bo *bo, *free_bo = NULL; 181428d7b3dSmrg unsigned int i, delta = 0; 182428d7b3dSmrg 183428d7b3dSmrg assert(sna->render.vertex_offset == 0); 184428d7b3dSmrg if (!sna->render.vb_id) 185428d7b3dSmrg return; 186428d7b3dSmrg 187428d7b3dSmrg DBG(("%s: used=%d, vbo active? %d, vb=%x, nreloc=%d\n", 188428d7b3dSmrg __FUNCTION__, sna->render.vertex_used, sna->render.vbo ? sna->render.vbo->handle : 0, 189428d7b3dSmrg sna->render.vb_id, sna->render.nvertex_reloc)); 190428d7b3dSmrg 191428d7b3dSmrg assert(!sna->render.active); 192428d7b3dSmrg 193428d7b3dSmrg bo = sna->render.vbo; 194428d7b3dSmrg if (bo) { 195428d7b3dSmrg if (sna->render.vertex_size - sna->render.vertex_used < 64) { 196428d7b3dSmrg DBG(("%s: discarding vbo (full), handle=%d\n", __FUNCTION__, sna->render.vbo->handle)); 197428d7b3dSmrg sna->render.vbo = NULL; 198428d7b3dSmrg sna->render.vertices = sna->render.vertex_data; 199428d7b3dSmrg sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); 200428d7b3dSmrg free_bo = bo; 201428d7b3dSmrg } else if (!sna->kgem.has_llc && sna->render.vertices == MAP(bo->map__cpu)) { 202428d7b3dSmrg DBG(("%s: converting CPU map to GTT\n", __FUNCTION__)); 203428d7b3dSmrg sna->render.vertices = 204428d7b3dSmrg kgem_bo_map__gtt(&sna->kgem, sna->render.vbo); 205428d7b3dSmrg if (sna->render.vertices == NULL) { 206428d7b3dSmrg sna->render.vbo = NULL; 207428d7b3dSmrg sna->render.vertices = sna->render.vertex_data; 208428d7b3dSmrg sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); 209428d7b3dSmrg free_bo = bo; 210428d7b3dSmrg } 211428d7b3dSmrg 212428d7b3dSmrg } 213428d7b3dSmrg } else { 214428d7b3dSmrg int size; 215428d7b3dSmrg 216428d7b3dSmrg size = sna->kgem.nbatch; 217428d7b3dSmrg size += sna->kgem.batch_size - sna->kgem.surface; 218428d7b3dSmrg size += sna->render.vertex_used; 219428d7b3dSmrg 220428d7b3dSmrg if (size <= 1024) { 221428d7b3dSmrg DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__, 222428d7b3dSmrg sna->render.vertex_used, sna->kgem.nbatch)); 223428d7b3dSmrg assert(sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface); 224428d7b3dSmrg memcpy(sna->kgem.batch + sna->kgem.nbatch, 225428d7b3dSmrg sna->render.vertex_data, 226428d7b3dSmrg sna->render.vertex_used * 4); 227428d7b3dSmrg delta = sna->kgem.nbatch * 4; 228428d7b3dSmrg bo = NULL; 229428d7b3dSmrg sna->kgem.nbatch += sna->render.vertex_used; 230428d7b3dSmrg } else { 231428d7b3dSmrg size = 256 * 1024; 232428d7b3dSmrg do { 233428d7b3dSmrg bo = kgem_create_linear(&sna->kgem, size, 234428d7b3dSmrg CREATE_GTT_MAP | CREATE_NO_RETIRE | CREATE_NO_THROTTLE | CREATE_CACHED); 235428d7b3dSmrg } while (bo == NULL && (size>>=1) > sizeof(float)*sna->render.vertex_used); 236428d7b3dSmrg 237428d7b3dSmrg sna->render.vertices = NULL; 238428d7b3dSmrg if (bo) 239428d7b3dSmrg sna->render.vertices = kgem_bo_map(&sna->kgem, bo); 240428d7b3dSmrg if (sna->render.vertices != NULL) { 241428d7b3dSmrg DBG(("%s: new vbo: %d / %d\n", __FUNCTION__, 242428d7b3dSmrg sna->render.vertex_used, __kgem_bo_size(bo)/4)); 243428d7b3dSmrg 244428d7b3dSmrg assert(sizeof(float)*sna->render.vertex_used <= __kgem_bo_size(bo)); 245428d7b3dSmrg memcpy(sna->render.vertices, 246428d7b3dSmrg sna->render.vertex_data, 247428d7b3dSmrg sizeof(float)*sna->render.vertex_used); 248428d7b3dSmrg 249428d7b3dSmrg size = __kgem_bo_size(bo)/4; 250428d7b3dSmrg if (size >= UINT16_MAX) 251428d7b3dSmrg size = UINT16_MAX - 1; 252428d7b3dSmrg 253428d7b3dSmrg sna->render.vbo = bo; 254428d7b3dSmrg sna->render.vertex_size = size; 255428d7b3dSmrg } else { 256428d7b3dSmrg DBG(("%s: tmp vbo: %d\n", __FUNCTION__, 257428d7b3dSmrg sna->render.vertex_used)); 258428d7b3dSmrg 259428d7b3dSmrg if (bo) 260428d7b3dSmrg kgem_bo_destroy(&sna->kgem, bo); 261428d7b3dSmrg 262428d7b3dSmrg bo = kgem_create_linear(&sna->kgem, 263428d7b3dSmrg 4*sna->render.vertex_used, 264428d7b3dSmrg CREATE_NO_THROTTLE); 265428d7b3dSmrg if (bo && !kgem_bo_write(&sna->kgem, bo, 266428d7b3dSmrg sna->render.vertex_data, 267428d7b3dSmrg 4*sna->render.vertex_used)) { 268428d7b3dSmrg kgem_bo_destroy(&sna->kgem, bo); 269428d7b3dSmrg bo = NULL; 270428d7b3dSmrg } 271428d7b3dSmrg 272428d7b3dSmrg assert(sna->render.vbo == NULL); 273428d7b3dSmrg sna->render.vertices = sna->render.vertex_data; 274428d7b3dSmrg sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); 275428d7b3dSmrg free_bo = bo; 276428d7b3dSmrg } 277428d7b3dSmrg } 278428d7b3dSmrg } 279428d7b3dSmrg 280428d7b3dSmrg assert(sna->render.nvertex_reloc); 281428d7b3dSmrg for (i = 0; i < sna->render.nvertex_reloc; i++) { 282428d7b3dSmrg DBG(("%s: reloc[%d] = %d\n", __FUNCTION__, 283428d7b3dSmrg i, sna->render.vertex_reloc[i])); 284428d7b3dSmrg 285428d7b3dSmrg sna->kgem.batch[sna->render.vertex_reloc[i]] = 286428d7b3dSmrg kgem_add_reloc(&sna->kgem, 287428d7b3dSmrg sna->render.vertex_reloc[i], bo, 288428d7b3dSmrg I915_GEM_DOMAIN_VERTEX << 16, 289428d7b3dSmrg delta); 290428d7b3dSmrg } 291428d7b3dSmrg sna->render.nvertex_reloc = 0; 292428d7b3dSmrg sna->render.vb_id = 0; 293428d7b3dSmrg 294428d7b3dSmrg if (sna->render.vbo == NULL) { 295428d7b3dSmrg assert(!sna->render.active); 296428d7b3dSmrg sna->render.vertex_used = 0; 297428d7b3dSmrg sna->render.vertex_index = 0; 298428d7b3dSmrg assert(sna->render.vertices == sna->render.vertex_data); 299428d7b3dSmrg assert(sna->render.vertex_size == ARRAY_SIZE(sna->render.vertex_data)); 300428d7b3dSmrg } 301428d7b3dSmrg 302428d7b3dSmrg if (free_bo) 303428d7b3dSmrg kgem_bo_destroy(&sna->kgem, free_bo); 304428d7b3dSmrg} 305428d7b3dSmrg 306428d7b3dSmrg/* specialised vertex emission routines */ 307428d7b3dSmrg 308428d7b3dSmrg#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) /* XXX assert(!too_large(x, y)); */ 309428d7b3dSmrg#define OUT_VERTEX_F(v) vertex_emit(sna, v) 310428d7b3dSmrg 311428d7b3dSmrgforce_inline static float 312428d7b3dSmrgcompute_linear(const struct sna_composite_channel *channel, 313428d7b3dSmrg int16_t x, int16_t y) 314428d7b3dSmrg{ 315428d7b3dSmrg return ((x+channel->offset[0]) * channel->u.linear.dx + 316428d7b3dSmrg (y+channel->offset[1]) * channel->u.linear.dy + 317428d7b3dSmrg channel->u.linear.offset); 318428d7b3dSmrg} 319428d7b3dSmrg 320428d7b3dSmrgsse2 inline static void 321428d7b3dSmrgemit_texcoord(struct sna *sna, 322428d7b3dSmrg const struct sna_composite_channel *channel, 323428d7b3dSmrg int16_t x, int16_t y) 324428d7b3dSmrg{ 325428d7b3dSmrg if (channel->is_solid) { 326428d7b3dSmrg OUT_VERTEX_F(0.5); 327428d7b3dSmrg return; 328428d7b3dSmrg } 329428d7b3dSmrg 330428d7b3dSmrg x += channel->offset[0]; 331428d7b3dSmrg y += channel->offset[1]; 332428d7b3dSmrg 333428d7b3dSmrg if (channel->is_affine) { 334428d7b3dSmrg float s, t; 335428d7b3dSmrg 336428d7b3dSmrg sna_get_transformed_coordinates(x, y, 337428d7b3dSmrg channel->transform, 338428d7b3dSmrg &s, &t); 339428d7b3dSmrg OUT_VERTEX_F(s * channel->scale[0]); 340428d7b3dSmrg OUT_VERTEX_F(t * channel->scale[1]); 341428d7b3dSmrg } else { 342428d7b3dSmrg float s, t, w; 343428d7b3dSmrg 344428d7b3dSmrg sna_get_transformed_coordinates_3d(x, y, 345428d7b3dSmrg channel->transform, 346428d7b3dSmrg &s, &t, &w); 347428d7b3dSmrg OUT_VERTEX_F(s * channel->scale[0]); 348428d7b3dSmrg OUT_VERTEX_F(t * channel->scale[1]); 349428d7b3dSmrg OUT_VERTEX_F(w); 350428d7b3dSmrg } 351428d7b3dSmrg} 352428d7b3dSmrg 353428d7b3dSmrgsse2 force_inline static void 354428d7b3dSmrgemit_vertex(struct sna *sna, 355428d7b3dSmrg const struct sna_composite_op *op, 356428d7b3dSmrg int16_t srcX, int16_t srcY, 357428d7b3dSmrg int16_t mskX, int16_t mskY, 358428d7b3dSmrg int16_t dstX, int16_t dstY) 359428d7b3dSmrg{ 360428d7b3dSmrg OUT_VERTEX(dstX, dstY); 361428d7b3dSmrg emit_texcoord(sna, &op->src, srcX, srcY); 362428d7b3dSmrg} 363428d7b3dSmrg 364428d7b3dSmrgsse2 fastcall static void 365428d7b3dSmrgemit_primitive(struct sna *sna, 366428d7b3dSmrg const struct sna_composite_op *op, 367428d7b3dSmrg const struct sna_composite_rectangles *r) 368428d7b3dSmrg{ 369428d7b3dSmrg emit_vertex(sna, op, 370428d7b3dSmrg r->src.x + r->width, r->src.y + r->height, 371428d7b3dSmrg r->mask.x + r->width, r->mask.y + r->height, 372428d7b3dSmrg r->dst.x + r->width, r->dst.y + r->height); 373428d7b3dSmrg emit_vertex(sna, op, 374428d7b3dSmrg r->src.x, r->src.y + r->height, 375428d7b3dSmrg r->mask.x, r->mask.y + r->height, 376428d7b3dSmrg r->dst.x, r->dst.y + r->height); 377428d7b3dSmrg emit_vertex(sna, op, 378428d7b3dSmrg r->src.x, r->src.y, 379428d7b3dSmrg r->mask.x, r->mask.y, 380428d7b3dSmrg r->dst.x, r->dst.y); 381428d7b3dSmrg} 382428d7b3dSmrg 383428d7b3dSmrgsse2 inline static float * 384428d7b3dSmrgvemit_texcoord(float *v, 385428d7b3dSmrg const struct sna_composite_channel *channel, 386428d7b3dSmrg int16_t x, int16_t y) 387428d7b3dSmrg{ 388428d7b3dSmrg if (channel->is_solid) { 389428d7b3dSmrg *v++ = 0.5; 390428d7b3dSmrg } else { 391428d7b3dSmrg x += channel->offset[0]; 392428d7b3dSmrg y += channel->offset[1]; 393428d7b3dSmrg 394428d7b3dSmrg if (channel->is_affine) { 395428d7b3dSmrg float s, t; 396428d7b3dSmrg 397428d7b3dSmrg sna_get_transformed_coordinates(x, y, 398428d7b3dSmrg channel->transform, 399428d7b3dSmrg &s, &t); 400428d7b3dSmrg *v++ = s * channel->scale[0]; 401428d7b3dSmrg *v++ = t * channel->scale[1]; 402428d7b3dSmrg } else { 403428d7b3dSmrg float s, t, w; 404428d7b3dSmrg 405428d7b3dSmrg sna_get_transformed_coordinates_3d(x, y, 406428d7b3dSmrg channel->transform, 407428d7b3dSmrg &s, &t, &w); 408428d7b3dSmrg *v++ = s * channel->scale[0]; 409428d7b3dSmrg *v++ = t * channel->scale[1]; 410428d7b3dSmrg *v++ = w; 411428d7b3dSmrg } 412428d7b3dSmrg } 413428d7b3dSmrg 414428d7b3dSmrg return v; 415428d7b3dSmrg} 416428d7b3dSmrg 417428d7b3dSmrgsse2 force_inline static float * 418428d7b3dSmrgvemit_vertex(float *v, 419428d7b3dSmrg const struct sna_composite_op *op, 420428d7b3dSmrg int16_t x, int16_t y) 421428d7b3dSmrg{ 422428d7b3dSmrg *v++ = pack_2s(x, y); 423428d7b3dSmrg return vemit_texcoord(v, &op->src, x, y); 424428d7b3dSmrg} 425428d7b3dSmrg 426428d7b3dSmrgsse2 fastcall static void 427428d7b3dSmrgemit_boxes(const struct sna_composite_op *op, 428428d7b3dSmrg const BoxRec *box, int nbox, 429428d7b3dSmrg float *v) 430428d7b3dSmrg{ 431428d7b3dSmrg do { 432428d7b3dSmrg v = vemit_vertex(v, op, box->x2, box->y2); 433428d7b3dSmrg v = vemit_vertex(v, op, box->x1, box->y2); 434428d7b3dSmrg v = vemit_vertex(v, op, box->x1, box->y1); 435428d7b3dSmrg 436428d7b3dSmrg box++; 437428d7b3dSmrg } while (--nbox); 438428d7b3dSmrg} 439428d7b3dSmrg 440428d7b3dSmrgsse2 force_inline static void 441428d7b3dSmrgemit_vertex_mask(struct sna *sna, 442428d7b3dSmrg const struct sna_composite_op *op, 443428d7b3dSmrg int16_t srcX, int16_t srcY, 444428d7b3dSmrg int16_t mskX, int16_t mskY, 445428d7b3dSmrg int16_t dstX, int16_t dstY) 446428d7b3dSmrg{ 447428d7b3dSmrg OUT_VERTEX(dstX, dstY); 448428d7b3dSmrg emit_texcoord(sna, &op->src, srcX, srcY); 449428d7b3dSmrg emit_texcoord(sna, &op->mask, mskX, mskY); 450428d7b3dSmrg} 451428d7b3dSmrg 452428d7b3dSmrgsse2 fastcall static void 453428d7b3dSmrgemit_primitive_mask(struct sna *sna, 454428d7b3dSmrg const struct sna_composite_op *op, 455428d7b3dSmrg const struct sna_composite_rectangles *r) 456428d7b3dSmrg{ 457428d7b3dSmrg emit_vertex_mask(sna, op, 458428d7b3dSmrg r->src.x + r->width, r->src.y + r->height, 459428d7b3dSmrg r->mask.x + r->width, r->mask.y + r->height, 460428d7b3dSmrg r->dst.x + r->width, r->dst.y + r->height); 461428d7b3dSmrg emit_vertex_mask(sna, op, 462428d7b3dSmrg r->src.x, r->src.y + r->height, 463428d7b3dSmrg r->mask.x, r->mask.y + r->height, 464428d7b3dSmrg r->dst.x, r->dst.y + r->height); 465428d7b3dSmrg emit_vertex_mask(sna, op, 466428d7b3dSmrg r->src.x, r->src.y, 467428d7b3dSmrg r->mask.x, r->mask.y, 468428d7b3dSmrg r->dst.x, r->dst.y); 469428d7b3dSmrg} 470428d7b3dSmrg 471428d7b3dSmrgsse2 force_inline static float * 472428d7b3dSmrgvemit_vertex_mask(float *v, 473428d7b3dSmrg const struct sna_composite_op *op, 474428d7b3dSmrg int16_t x, int16_t y) 475428d7b3dSmrg{ 476428d7b3dSmrg *v++ = pack_2s(x, y); 477428d7b3dSmrg v = vemit_texcoord(v, &op->src, x, y); 478428d7b3dSmrg v = vemit_texcoord(v, &op->mask, x, y); 479428d7b3dSmrg return v; 480428d7b3dSmrg} 481428d7b3dSmrg 482428d7b3dSmrgsse2 fastcall static void 483428d7b3dSmrgemit_boxes_mask(const struct sna_composite_op *op, 484428d7b3dSmrg const BoxRec *box, int nbox, 485428d7b3dSmrg float *v) 486428d7b3dSmrg{ 487428d7b3dSmrg do { 488428d7b3dSmrg v = vemit_vertex_mask(v, op, box->x2, box->y2); 489428d7b3dSmrg v = vemit_vertex_mask(v, op, box->x1, box->y2); 490428d7b3dSmrg v = vemit_vertex_mask(v, op, box->x1, box->y1); 491428d7b3dSmrg 492428d7b3dSmrg box++; 493428d7b3dSmrg } while (--nbox); 494428d7b3dSmrg} 495428d7b3dSmrg 496428d7b3dSmrg 497428d7b3dSmrgsse2 fastcall static void 498428d7b3dSmrgemit_primitive_solid(struct sna *sna, 499428d7b3dSmrg const struct sna_composite_op *op, 500428d7b3dSmrg const struct sna_composite_rectangles *r) 501428d7b3dSmrg{ 502428d7b3dSmrg float *v; 503428d7b3dSmrg union { 504428d7b3dSmrg struct sna_coordinate p; 505428d7b3dSmrg float f; 506428d7b3dSmrg } dst; 507428d7b3dSmrg 508428d7b3dSmrg assert(op->floats_per_rect == 6); 509428d7b3dSmrg assert((sna->render.vertex_used % 2) == 0); 510428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 511428d7b3dSmrg sna->render.vertex_used += 6; 512428d7b3dSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 513428d7b3dSmrg 514428d7b3dSmrg dst.p.x = r->dst.x + r->width; 515428d7b3dSmrg dst.p.y = r->dst.y + r->height; 516428d7b3dSmrg v[0] = dst.f; 517428d7b3dSmrg dst.p.x = r->dst.x; 518428d7b3dSmrg v[2] = dst.f; 519428d7b3dSmrg dst.p.y = r->dst.y; 520428d7b3dSmrg v[4] = dst.f; 521428d7b3dSmrg 522428d7b3dSmrg v[5] = v[3] = v[1] = .5; 523428d7b3dSmrg} 524428d7b3dSmrg 525428d7b3dSmrgsse2 fastcall static void 526428d7b3dSmrgemit_boxes_solid(const struct sna_composite_op *op, 527428d7b3dSmrg const BoxRec *box, int nbox, 528428d7b3dSmrg float *v) 529428d7b3dSmrg{ 530428d7b3dSmrg do { 531428d7b3dSmrg union { 532428d7b3dSmrg struct sna_coordinate p; 533428d7b3dSmrg float f; 534428d7b3dSmrg } dst; 535428d7b3dSmrg 536428d7b3dSmrg dst.p.x = box->x2; 537428d7b3dSmrg dst.p.y = box->y2; 538428d7b3dSmrg v[0] = dst.f; 539428d7b3dSmrg dst.p.x = box->x1; 540428d7b3dSmrg v[2] = dst.f; 541428d7b3dSmrg dst.p.y = box->y1; 542428d7b3dSmrg v[4] = dst.f; 543428d7b3dSmrg 544428d7b3dSmrg v[5] = v[3] = v[1] = .5; 545428d7b3dSmrg box++; 546428d7b3dSmrg v += 6; 547428d7b3dSmrg } while (--nbox); 548428d7b3dSmrg} 549428d7b3dSmrg 550428d7b3dSmrgsse2 fastcall static void 551428d7b3dSmrgemit_primitive_linear(struct sna *sna, 552428d7b3dSmrg const struct sna_composite_op *op, 553428d7b3dSmrg const struct sna_composite_rectangles *r) 554428d7b3dSmrg{ 555428d7b3dSmrg float *v; 556428d7b3dSmrg union { 557428d7b3dSmrg struct sna_coordinate p; 558428d7b3dSmrg float f; 559428d7b3dSmrg } dst; 560428d7b3dSmrg 561428d7b3dSmrg assert(op->floats_per_rect == 6); 562428d7b3dSmrg assert((sna->render.vertex_used % 2) == 0); 563428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 564428d7b3dSmrg sna->render.vertex_used += 6; 565428d7b3dSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 566428d7b3dSmrg 567428d7b3dSmrg dst.p.x = r->dst.x + r->width; 568428d7b3dSmrg dst.p.y = r->dst.y + r->height; 569428d7b3dSmrg v[0] = dst.f; 570428d7b3dSmrg dst.p.x = r->dst.x; 571428d7b3dSmrg v[2] = dst.f; 572428d7b3dSmrg dst.p.y = r->dst.y; 573428d7b3dSmrg v[4] = dst.f; 574428d7b3dSmrg 575428d7b3dSmrg v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height); 576428d7b3dSmrg v[3] = compute_linear(&op->src, r->src.x, r->src.y+r->height); 577428d7b3dSmrg v[5] = compute_linear(&op->src, r->src.x, r->src.y); 578428d7b3dSmrg} 579428d7b3dSmrg 580428d7b3dSmrgsse2 fastcall static void 581428d7b3dSmrgemit_boxes_linear(const struct sna_composite_op *op, 582428d7b3dSmrg const BoxRec *box, int nbox, 583428d7b3dSmrg float *v) 584428d7b3dSmrg{ 585428d7b3dSmrg union { 586428d7b3dSmrg struct sna_coordinate p; 587428d7b3dSmrg float f; 588428d7b3dSmrg } dst; 589428d7b3dSmrg 590428d7b3dSmrg do { 591428d7b3dSmrg dst.p.x = box->x2; 592428d7b3dSmrg dst.p.y = box->y2; 593428d7b3dSmrg v[0] = dst.f; 594428d7b3dSmrg dst.p.x = box->x1; 595428d7b3dSmrg v[2] = dst.f; 596428d7b3dSmrg dst.p.y = box->y1; 597428d7b3dSmrg v[4] = dst.f; 598428d7b3dSmrg 599428d7b3dSmrg v[1] = compute_linear(&op->src, box->x2, box->y2); 600428d7b3dSmrg v[3] = compute_linear(&op->src, box->x1, box->y2); 601428d7b3dSmrg v[5] = compute_linear(&op->src, box->x1, box->y1); 602428d7b3dSmrg 603428d7b3dSmrg v += 6; 604428d7b3dSmrg box++; 605428d7b3dSmrg } while (--nbox); 606428d7b3dSmrg} 607428d7b3dSmrg 608428d7b3dSmrgsse2 fastcall static void 609428d7b3dSmrgemit_primitive_identity_source(struct sna *sna, 610428d7b3dSmrg const struct sna_composite_op *op, 611428d7b3dSmrg const struct sna_composite_rectangles *r) 612428d7b3dSmrg{ 613428d7b3dSmrg union { 614428d7b3dSmrg struct sna_coordinate p; 615428d7b3dSmrg float f; 616428d7b3dSmrg } dst; 617428d7b3dSmrg float *v; 618428d7b3dSmrg 619428d7b3dSmrg assert(op->floats_per_rect == 9); 620428d7b3dSmrg assert((sna->render.vertex_used % 3) == 0); 621428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 622428d7b3dSmrg sna->render.vertex_used += 9; 623428d7b3dSmrg 624428d7b3dSmrg dst.p.x = r->dst.x + r->width; 625428d7b3dSmrg dst.p.y = r->dst.y + r->height; 626428d7b3dSmrg v[0] = dst.f; 627428d7b3dSmrg dst.p.x = r->dst.x; 628428d7b3dSmrg v[3] = dst.f; 629428d7b3dSmrg dst.p.y = r->dst.y; 630428d7b3dSmrg v[6] = dst.f; 631428d7b3dSmrg 632428d7b3dSmrg v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0]; 633428d7b3dSmrg v[1] = v[4] + r->width * op->src.scale[0]; 634428d7b3dSmrg 635428d7b3dSmrg v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1]; 636428d7b3dSmrg v[5] = v[2] = v[8] + r->height * op->src.scale[1]; 637428d7b3dSmrg} 638428d7b3dSmrg 639428d7b3dSmrgsse2 fastcall static void 640428d7b3dSmrgemit_boxes_identity_source(const struct sna_composite_op *op, 641428d7b3dSmrg const BoxRec *box, int nbox, 642428d7b3dSmrg float *v) 643428d7b3dSmrg{ 644428d7b3dSmrg do { 645428d7b3dSmrg union { 646428d7b3dSmrg struct sna_coordinate p; 647428d7b3dSmrg float f; 648428d7b3dSmrg } dst; 649428d7b3dSmrg 650428d7b3dSmrg dst.p.x = box->x2; 651428d7b3dSmrg dst.p.y = box->y2; 652428d7b3dSmrg v[0] = dst.f; 653428d7b3dSmrg dst.p.x = box->x1; 654428d7b3dSmrg v[3] = dst.f; 655428d7b3dSmrg dst.p.y = box->y1; 656428d7b3dSmrg v[6] = dst.f; 657428d7b3dSmrg 658428d7b3dSmrg v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0]; 659428d7b3dSmrg v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0]; 660428d7b3dSmrg 661428d7b3dSmrg v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1]; 662428d7b3dSmrg v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1]; 663428d7b3dSmrg 664428d7b3dSmrg v += 9; 665428d7b3dSmrg box++; 666428d7b3dSmrg } while (--nbox); 667428d7b3dSmrg} 668428d7b3dSmrg 669428d7b3dSmrgsse2 fastcall static void 670428d7b3dSmrgemit_primitive_simple_source(struct sna *sna, 671428d7b3dSmrg const struct sna_composite_op *op, 672428d7b3dSmrg const struct sna_composite_rectangles *r) 673428d7b3dSmrg{ 674428d7b3dSmrg float *v; 675428d7b3dSmrg union { 676428d7b3dSmrg struct sna_coordinate p; 677428d7b3dSmrg float f; 678428d7b3dSmrg } dst; 679428d7b3dSmrg 680428d7b3dSmrg float xx = op->src.transform->matrix[0][0]; 681428d7b3dSmrg float x0 = op->src.transform->matrix[0][2]; 682428d7b3dSmrg float yy = op->src.transform->matrix[1][1]; 683428d7b3dSmrg float y0 = op->src.transform->matrix[1][2]; 684428d7b3dSmrg float sx = op->src.scale[0]; 685428d7b3dSmrg float sy = op->src.scale[1]; 686428d7b3dSmrg int16_t tx = op->src.offset[0]; 687428d7b3dSmrg int16_t ty = op->src.offset[1]; 688428d7b3dSmrg 689428d7b3dSmrg assert(op->floats_per_rect == 9); 690428d7b3dSmrg assert((sna->render.vertex_used % 3) == 0); 691428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 692428d7b3dSmrg sna->render.vertex_used += 3*3; 693428d7b3dSmrg 694428d7b3dSmrg dst.p.x = r->dst.x + r->width; 695428d7b3dSmrg dst.p.y = r->dst.y + r->height; 696428d7b3dSmrg v[0] = dst.f; 697428d7b3dSmrg v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx; 698428d7b3dSmrg v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy; 699428d7b3dSmrg 700428d7b3dSmrg dst.p.x = r->dst.x; 701428d7b3dSmrg v[3] = dst.f; 702428d7b3dSmrg v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx; 703428d7b3dSmrg 704428d7b3dSmrg dst.p.y = r->dst.y; 705428d7b3dSmrg v[6] = dst.f; 706428d7b3dSmrg v[8] = ((r->src.y + ty) * yy + y0) * sy; 707428d7b3dSmrg} 708428d7b3dSmrg 709428d7b3dSmrgsse2 fastcall static void 710428d7b3dSmrgemit_boxes_simple_source(const struct sna_composite_op *op, 711428d7b3dSmrg const BoxRec *box, int nbox, 712428d7b3dSmrg float *v) 713428d7b3dSmrg{ 714428d7b3dSmrg float xx = op->src.transform->matrix[0][0]; 715428d7b3dSmrg float x0 = op->src.transform->matrix[0][2]; 716428d7b3dSmrg float yy = op->src.transform->matrix[1][1]; 717428d7b3dSmrg float y0 = op->src.transform->matrix[1][2]; 718428d7b3dSmrg float sx = op->src.scale[0]; 719428d7b3dSmrg float sy = op->src.scale[1]; 720428d7b3dSmrg int16_t tx = op->src.offset[0]; 721428d7b3dSmrg int16_t ty = op->src.offset[1]; 722428d7b3dSmrg 723428d7b3dSmrg do { 724428d7b3dSmrg union { 725428d7b3dSmrg struct sna_coordinate p; 726428d7b3dSmrg float f; 727428d7b3dSmrg } dst; 728428d7b3dSmrg 729428d7b3dSmrg dst.p.x = box->x2; 730428d7b3dSmrg dst.p.y = box->y2; 731428d7b3dSmrg v[0] = dst.f; 732428d7b3dSmrg v[1] = ((box->x2 + tx) * xx + x0) * sx; 733428d7b3dSmrg v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy; 734428d7b3dSmrg 735428d7b3dSmrg dst.p.x = box->x1; 736428d7b3dSmrg v[3] = dst.f; 737428d7b3dSmrg v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx; 738428d7b3dSmrg 739428d7b3dSmrg dst.p.y = box->y1; 740428d7b3dSmrg v[6] = dst.f; 741428d7b3dSmrg v[8] = ((box->y1 + ty) * yy + y0) * sy; 742428d7b3dSmrg 743428d7b3dSmrg v += 9; 744428d7b3dSmrg box++; 745428d7b3dSmrg } while (--nbox); 746428d7b3dSmrg} 747428d7b3dSmrg 748428d7b3dSmrgsse2 fastcall static void 749428d7b3dSmrgemit_primitive_affine_source(struct sna *sna, 750428d7b3dSmrg const struct sna_composite_op *op, 751428d7b3dSmrg const struct sna_composite_rectangles *r) 752428d7b3dSmrg{ 753428d7b3dSmrg union { 754428d7b3dSmrg struct sna_coordinate p; 755428d7b3dSmrg float f; 756428d7b3dSmrg } dst; 757428d7b3dSmrg float *v; 758428d7b3dSmrg 759428d7b3dSmrg assert(op->floats_per_rect == 9); 760428d7b3dSmrg assert((sna->render.vertex_used % 3) == 0); 761428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 762428d7b3dSmrg sna->render.vertex_used += 9; 763428d7b3dSmrg 764428d7b3dSmrg dst.p.x = r->dst.x + r->width; 765428d7b3dSmrg dst.p.y = r->dst.y + r->height; 766428d7b3dSmrg v[0] = dst.f; 767428d7b3dSmrg _sna_get_transformed_scaled(op->src.offset[0] + r->src.x + r->width, 768428d7b3dSmrg op->src.offset[1] + r->src.y + r->height, 769428d7b3dSmrg op->src.transform, op->src.scale, 770428d7b3dSmrg &v[1], &v[2]); 771428d7b3dSmrg 772428d7b3dSmrg dst.p.x = r->dst.x; 773428d7b3dSmrg v[3] = dst.f; 774428d7b3dSmrg _sna_get_transformed_scaled(op->src.offset[0] + r->src.x, 775428d7b3dSmrg op->src.offset[1] + r->src.y + r->height, 776428d7b3dSmrg op->src.transform, op->src.scale, 777428d7b3dSmrg &v[4], &v[5]); 778428d7b3dSmrg 779428d7b3dSmrg dst.p.y = r->dst.y; 780428d7b3dSmrg v[6] = dst.f; 781428d7b3dSmrg _sna_get_transformed_scaled(op->src.offset[0] + r->src.x, 782428d7b3dSmrg op->src.offset[1] + r->src.y, 783428d7b3dSmrg op->src.transform, op->src.scale, 784428d7b3dSmrg &v[7], &v[8]); 785428d7b3dSmrg} 786428d7b3dSmrg 787428d7b3dSmrgsse2 fastcall static void 788428d7b3dSmrgemit_boxes_affine_source(const struct sna_composite_op *op, 789428d7b3dSmrg const BoxRec *box, int nbox, 790428d7b3dSmrg float *v) 791428d7b3dSmrg{ 792428d7b3dSmrg do { 793428d7b3dSmrg union { 794428d7b3dSmrg struct sna_coordinate p; 795428d7b3dSmrg float f; 796428d7b3dSmrg } dst; 797428d7b3dSmrg 798428d7b3dSmrg dst.p.x = box->x2; 799428d7b3dSmrg dst.p.y = box->y2; 800428d7b3dSmrg v[0] = dst.f; 801428d7b3dSmrg _sna_get_transformed_scaled(op->src.offset[0] + box->x2, 802428d7b3dSmrg op->src.offset[1] + box->y2, 803428d7b3dSmrg op->src.transform, op->src.scale, 804428d7b3dSmrg &v[1], &v[2]); 805428d7b3dSmrg 806428d7b3dSmrg dst.p.x = box->x1; 807428d7b3dSmrg v[3] = dst.f; 808428d7b3dSmrg _sna_get_transformed_scaled(op->src.offset[0] + box->x1, 809428d7b3dSmrg op->src.offset[1] + box->y2, 810428d7b3dSmrg op->src.transform, op->src.scale, 811428d7b3dSmrg &v[4], &v[5]); 812428d7b3dSmrg 813428d7b3dSmrg dst.p.y = box->y1; 814428d7b3dSmrg v[6] = dst.f; 815428d7b3dSmrg _sna_get_transformed_scaled(op->src.offset[0] + box->x1, 816428d7b3dSmrg op->src.offset[1] + box->y1, 817428d7b3dSmrg op->src.transform, op->src.scale, 818428d7b3dSmrg &v[7], &v[8]); 819428d7b3dSmrg box++; 820428d7b3dSmrg v += 9; 821428d7b3dSmrg } while (--nbox); 822428d7b3dSmrg} 823428d7b3dSmrg 824428d7b3dSmrgsse2 fastcall static void 825428d7b3dSmrgemit_primitive_identity_mask(struct sna *sna, 826428d7b3dSmrg const struct sna_composite_op *op, 827428d7b3dSmrg const struct sna_composite_rectangles *r) 828428d7b3dSmrg{ 829428d7b3dSmrg union { 830428d7b3dSmrg struct sna_coordinate p; 831428d7b3dSmrg float f; 832428d7b3dSmrg } dst; 833428d7b3dSmrg float msk_x, msk_y; 834428d7b3dSmrg float w, h; 835428d7b3dSmrg float *v; 836428d7b3dSmrg 837428d7b3dSmrg msk_x = r->mask.x + op->mask.offset[0]; 838428d7b3dSmrg msk_y = r->mask.y + op->mask.offset[1]; 839428d7b3dSmrg w = r->width; 840428d7b3dSmrg h = r->height; 841428d7b3dSmrg 842428d7b3dSmrg DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n", 843428d7b3dSmrg __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h)); 844428d7b3dSmrg 845428d7b3dSmrg assert(op->floats_per_rect == 12); 846428d7b3dSmrg assert((sna->render.vertex_used % 4) == 0); 847428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 848428d7b3dSmrg sna->render.vertex_used += 12; 849428d7b3dSmrg 850428d7b3dSmrg dst.p.x = r->dst.x + r->width; 851428d7b3dSmrg dst.p.y = r->dst.y + r->height; 852428d7b3dSmrg v[0] = dst.f; 853428d7b3dSmrg v[2] = (msk_x + w) * op->mask.scale[0]; 854428d7b3dSmrg v[7] = v[3] = (msk_y + h) * op->mask.scale[1]; 855428d7b3dSmrg 856428d7b3dSmrg dst.p.x = r->dst.x; 857428d7b3dSmrg v[4] = dst.f; 858428d7b3dSmrg v[10] = v[6] = msk_x * op->mask.scale[0]; 859428d7b3dSmrg 860428d7b3dSmrg dst.p.y = r->dst.y; 861428d7b3dSmrg v[8] = dst.f; 862428d7b3dSmrg v[11] = msk_y * op->mask.scale[1]; 863428d7b3dSmrg 864428d7b3dSmrg v[9] = v[5] = v[1] = .5; 865428d7b3dSmrg} 866428d7b3dSmrg 867428d7b3dSmrgsse2 fastcall static void 868428d7b3dSmrgemit_boxes_identity_mask(const struct sna_composite_op *op, 869428d7b3dSmrg const BoxRec *box, int nbox, 870428d7b3dSmrg float *v) 871428d7b3dSmrg{ 872428d7b3dSmrg float msk_x = op->mask.offset[0]; 873428d7b3dSmrg float msk_y = op->mask.offset[1]; 874428d7b3dSmrg 875428d7b3dSmrg do { 876428d7b3dSmrg union { 877428d7b3dSmrg struct sna_coordinate p; 878428d7b3dSmrg float f; 879428d7b3dSmrg } dst; 880428d7b3dSmrg 881428d7b3dSmrg dst.p.x = box->x2; 882428d7b3dSmrg dst.p.y = box->y2; 883428d7b3dSmrg v[0] = dst.f; 884428d7b3dSmrg v[2] = (msk_x + box->x2) * op->mask.scale[0]; 885428d7b3dSmrg v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1]; 886428d7b3dSmrg 887428d7b3dSmrg dst.p.x = box->x1; 888428d7b3dSmrg v[4] = dst.f; 889428d7b3dSmrg v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0]; 890428d7b3dSmrg 891428d7b3dSmrg dst.p.y = box->y1; 892428d7b3dSmrg v[8] = dst.f; 893428d7b3dSmrg v[11] = (msk_y + box->y1) * op->mask.scale[1]; 894428d7b3dSmrg 895428d7b3dSmrg v[9] = v[5] = v[1] = .5; 896428d7b3dSmrg v += 12; 897428d7b3dSmrg box++; 898428d7b3dSmrg } while (--nbox); 899428d7b3dSmrg} 900428d7b3dSmrg 901428d7b3dSmrgsse2 fastcall static void 902428d7b3dSmrgemit_primitive_linear_identity_mask(struct sna *sna, 903428d7b3dSmrg const struct sna_composite_op *op, 904428d7b3dSmrg const struct sna_composite_rectangles *r) 905428d7b3dSmrg{ 906428d7b3dSmrg union { 907428d7b3dSmrg struct sna_coordinate p; 908428d7b3dSmrg float f; 909428d7b3dSmrg } dst; 910428d7b3dSmrg float msk_x, msk_y; 911428d7b3dSmrg float w, h; 912428d7b3dSmrg float *v; 913428d7b3dSmrg 914428d7b3dSmrg msk_x = r->mask.x + op->mask.offset[0]; 915428d7b3dSmrg msk_y = r->mask.y + op->mask.offset[1]; 916428d7b3dSmrg w = r->width; 917428d7b3dSmrg h = r->height; 918428d7b3dSmrg 919428d7b3dSmrg DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n", 920428d7b3dSmrg __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h)); 921428d7b3dSmrg 922428d7b3dSmrg assert(op->floats_per_rect == 12); 923428d7b3dSmrg assert((sna->render.vertex_used % 4) == 0); 924428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 925428d7b3dSmrg sna->render.vertex_used += 12; 926428d7b3dSmrg 927428d7b3dSmrg dst.p.x = r->dst.x + r->width; 928428d7b3dSmrg dst.p.y = r->dst.y + r->height; 929428d7b3dSmrg v[0] = dst.f; 930428d7b3dSmrg v[2] = (msk_x + w) * op->mask.scale[0]; 931428d7b3dSmrg v[7] = v[3] = (msk_y + h) * op->mask.scale[1]; 932428d7b3dSmrg 933428d7b3dSmrg dst.p.x = r->dst.x; 934428d7b3dSmrg v[4] = dst.f; 935428d7b3dSmrg v[10] = v[6] = msk_x * op->mask.scale[0]; 936428d7b3dSmrg 937428d7b3dSmrg dst.p.y = r->dst.y; 938428d7b3dSmrg v[8] = dst.f; 939428d7b3dSmrg v[11] = msk_y * op->mask.scale[1]; 940428d7b3dSmrg 941428d7b3dSmrg v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height); 942428d7b3dSmrg v[5] = compute_linear(&op->src, r->src.x, r->src.y+r->height); 943428d7b3dSmrg v[9] = compute_linear(&op->src, r->src.x, r->src.y); 944428d7b3dSmrg} 945428d7b3dSmrg 946428d7b3dSmrgsse2 fastcall static void 947428d7b3dSmrgemit_boxes_linear_identity_mask(const struct sna_composite_op *op, 948428d7b3dSmrg const BoxRec *box, int nbox, 949428d7b3dSmrg float *v) 950428d7b3dSmrg{ 951428d7b3dSmrg float msk_x = op->mask.offset[0]; 952428d7b3dSmrg float msk_y = op->mask.offset[1]; 953428d7b3dSmrg 954428d7b3dSmrg do { 955428d7b3dSmrg union { 956428d7b3dSmrg struct sna_coordinate p; 957428d7b3dSmrg float f; 958428d7b3dSmrg } dst; 959428d7b3dSmrg 960428d7b3dSmrg dst.p.x = box->x2; 961428d7b3dSmrg dst.p.y = box->y2; 962428d7b3dSmrg v[0] = dst.f; 963428d7b3dSmrg v[2] = (msk_x + box->x2) * op->mask.scale[0]; 964428d7b3dSmrg v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1]; 965428d7b3dSmrg 966428d7b3dSmrg dst.p.x = box->x1; 967428d7b3dSmrg v[4] = dst.f; 968428d7b3dSmrg v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0]; 969428d7b3dSmrg 970428d7b3dSmrg dst.p.y = box->y1; 971428d7b3dSmrg v[8] = dst.f; 972428d7b3dSmrg v[11] = (msk_y + box->y1) * op->mask.scale[1]; 973428d7b3dSmrg 974428d7b3dSmrg v[1] = compute_linear(&op->src, box->x2, box->y2); 975428d7b3dSmrg v[5] = compute_linear(&op->src, box->x1, box->y2); 976428d7b3dSmrg v[9] = compute_linear(&op->src, box->x1, box->y1); 977428d7b3dSmrg 978428d7b3dSmrg v += 12; 979428d7b3dSmrg box++; 980428d7b3dSmrg } while (--nbox); 981428d7b3dSmrg} 982428d7b3dSmrg 983428d7b3dSmrgsse2 fastcall static void 984428d7b3dSmrgemit_primitive_identity_source_mask(struct sna *sna, 985428d7b3dSmrg const struct sna_composite_op *op, 986428d7b3dSmrg const struct sna_composite_rectangles *r) 987428d7b3dSmrg{ 988428d7b3dSmrg union { 989428d7b3dSmrg struct sna_coordinate p; 990428d7b3dSmrg float f; 991428d7b3dSmrg } dst; 992428d7b3dSmrg float src_x, src_y; 993428d7b3dSmrg float msk_x, msk_y; 994428d7b3dSmrg float w, h; 995428d7b3dSmrg float *v; 996428d7b3dSmrg 997428d7b3dSmrg src_x = r->src.x + op->src.offset[0]; 998428d7b3dSmrg src_y = r->src.y + op->src.offset[1]; 999428d7b3dSmrg msk_x = r->mask.x + op->mask.offset[0]; 1000428d7b3dSmrg msk_y = r->mask.y + op->mask.offset[1]; 1001428d7b3dSmrg w = r->width; 1002428d7b3dSmrg h = r->height; 1003428d7b3dSmrg 1004428d7b3dSmrg assert(op->floats_per_rect == 15); 1005428d7b3dSmrg assert((sna->render.vertex_used % 5) == 0); 1006428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 1007428d7b3dSmrg sna->render.vertex_used += 15; 1008428d7b3dSmrg 1009428d7b3dSmrg dst.p.x = r->dst.x + r->width; 1010428d7b3dSmrg dst.p.y = r->dst.y + r->height; 1011428d7b3dSmrg v[0] = dst.f; 1012428d7b3dSmrg v[1] = (src_x + w) * op->src.scale[0]; 1013428d7b3dSmrg v[2] = (src_y + h) * op->src.scale[1]; 1014428d7b3dSmrg v[3] = (msk_x + w) * op->mask.scale[0]; 1015428d7b3dSmrg v[4] = (msk_y + h) * op->mask.scale[1]; 1016428d7b3dSmrg 1017428d7b3dSmrg dst.p.x = r->dst.x; 1018428d7b3dSmrg v[5] = dst.f; 1019428d7b3dSmrg v[6] = src_x * op->src.scale[0]; 1020428d7b3dSmrg v[7] = v[2]; 1021428d7b3dSmrg v[8] = msk_x * op->mask.scale[0]; 1022428d7b3dSmrg v[9] = v[4]; 1023428d7b3dSmrg 1024428d7b3dSmrg dst.p.y = r->dst.y; 1025428d7b3dSmrg v[10] = dst.f; 1026428d7b3dSmrg v[11] = v[6]; 1027428d7b3dSmrg v[12] = src_y * op->src.scale[1]; 1028428d7b3dSmrg v[13] = v[8]; 1029428d7b3dSmrg v[14] = msk_y * op->mask.scale[1]; 1030428d7b3dSmrg} 1031428d7b3dSmrg 1032428d7b3dSmrgsse2 fastcall static void 1033428d7b3dSmrgemit_primitive_simple_source_identity(struct sna *sna, 1034428d7b3dSmrg const struct sna_composite_op *op, 1035428d7b3dSmrg const struct sna_composite_rectangles *r) 1036428d7b3dSmrg{ 1037428d7b3dSmrg float *v; 1038428d7b3dSmrg union { 1039428d7b3dSmrg struct sna_coordinate p; 1040428d7b3dSmrg float f; 1041428d7b3dSmrg } dst; 1042428d7b3dSmrg 1043428d7b3dSmrg float xx = op->src.transform->matrix[0][0]; 1044428d7b3dSmrg float x0 = op->src.transform->matrix[0][2]; 1045428d7b3dSmrg float yy = op->src.transform->matrix[1][1]; 1046428d7b3dSmrg float y0 = op->src.transform->matrix[1][2]; 1047428d7b3dSmrg float sx = op->src.scale[0]; 1048428d7b3dSmrg float sy = op->src.scale[1]; 1049428d7b3dSmrg int16_t tx = op->src.offset[0]; 1050428d7b3dSmrg int16_t ty = op->src.offset[1]; 1051428d7b3dSmrg float msk_x = r->mask.x + op->mask.offset[0]; 1052428d7b3dSmrg float msk_y = r->mask.y + op->mask.offset[1]; 1053428d7b3dSmrg float w = r->width, h = r->height; 1054428d7b3dSmrg 1055428d7b3dSmrg assert(op->floats_per_rect == 15); 1056428d7b3dSmrg assert((sna->render.vertex_used % 5) == 0); 1057428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 1058428d7b3dSmrg sna->render.vertex_used += 3*5; 1059428d7b3dSmrg 1060428d7b3dSmrg dst.p.x = r->dst.x + r->width; 1061428d7b3dSmrg dst.p.y = r->dst.y + r->height; 1062428d7b3dSmrg v[0] = dst.f; 1063428d7b3dSmrg v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx; 1064428d7b3dSmrg v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy; 1065428d7b3dSmrg v[3] = (msk_x + w) * op->mask.scale[0]; 1066428d7b3dSmrg v[4] = (msk_y + h) * op->mask.scale[1]; 1067428d7b3dSmrg 1068428d7b3dSmrg dst.p.x = r->dst.x; 1069428d7b3dSmrg v[5] = dst.f; 1070428d7b3dSmrg v[6] = ((r->src.x + tx) * xx + x0) * sx; 1071428d7b3dSmrg v[7] = v[2]; 1072428d7b3dSmrg v[8] = msk_x * op->mask.scale[0]; 1073428d7b3dSmrg v[9] = v[4]; 1074428d7b3dSmrg 1075428d7b3dSmrg dst.p.y = r->dst.y; 1076428d7b3dSmrg v[10] = dst.f; 1077428d7b3dSmrg v[11] = v[6]; 1078428d7b3dSmrg v[12] = ((r->src.y + ty) * yy + y0) * sy; 1079428d7b3dSmrg v[13] = v[8]; 1080428d7b3dSmrg v[14] = msk_y * op->mask.scale[1]; 1081428d7b3dSmrg} 1082428d7b3dSmrg 1083428d7b3dSmrgsse2 fastcall static void 1084428d7b3dSmrgemit_primitive_affine_source_identity(struct sna *sna, 1085428d7b3dSmrg const struct sna_composite_op *op, 1086428d7b3dSmrg const struct sna_composite_rectangles *r) 1087428d7b3dSmrg{ 1088428d7b3dSmrg float *v; 1089428d7b3dSmrg union { 1090428d7b3dSmrg struct sna_coordinate p; 1091428d7b3dSmrg float f; 1092428d7b3dSmrg } dst; 1093428d7b3dSmrg float msk_x = r->mask.x + op->mask.offset[0]; 1094428d7b3dSmrg float msk_y = r->mask.y + op->mask.offset[1]; 1095428d7b3dSmrg float w = r->width, h = r->height; 1096428d7b3dSmrg 1097428d7b3dSmrg assert(op->floats_per_rect == 15); 1098428d7b3dSmrg assert((sna->render.vertex_used % 5) == 0); 1099428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 1100428d7b3dSmrg sna->render.vertex_used += 3*5; 1101428d7b3dSmrg 1102428d7b3dSmrg dst.p.x = r->dst.x + r->width; 1103428d7b3dSmrg dst.p.y = r->dst.y + r->height; 1104428d7b3dSmrg v[0] = dst.f; 1105428d7b3dSmrg _sna_get_transformed_scaled(op->src.offset[0] + r->src.x + r->width, 1106428d7b3dSmrg op->src.offset[1] + r->src.y + r->height, 1107428d7b3dSmrg op->src.transform, op->src.scale, 1108428d7b3dSmrg &v[1], &v[2]); 1109428d7b3dSmrg v[3] = (msk_x + w) * op->mask.scale[0]; 1110428d7b3dSmrg v[4] = (msk_y + h) * op->mask.scale[1]; 1111428d7b3dSmrg 1112428d7b3dSmrg dst.p.x = r->dst.x; 1113428d7b3dSmrg v[5] = dst.f; 1114428d7b3dSmrg _sna_get_transformed_scaled(op->src.offset[0] + r->src.x, 1115428d7b3dSmrg op->src.offset[1] + r->src.y + r->height, 1116428d7b3dSmrg op->src.transform, op->src.scale, 1117428d7b3dSmrg &v[6], &v[7]); 1118428d7b3dSmrg v[8] = msk_x * op->mask.scale[0]; 1119428d7b3dSmrg v[9] = v[4]; 1120428d7b3dSmrg 1121428d7b3dSmrg dst.p.y = r->dst.y; 1122428d7b3dSmrg v[10] = dst.f; 1123428d7b3dSmrg _sna_get_transformed_scaled(op->src.offset[0] + r->src.x, 1124428d7b3dSmrg op->src.offset[1] + r->src.y, 1125428d7b3dSmrg op->src.transform, op->src.scale, 1126428d7b3dSmrg &v[11], &v[12]); 1127428d7b3dSmrg v[13] = v[8]; 1128428d7b3dSmrg v[14] = msk_y * op->mask.scale[1]; 1129428d7b3dSmrg} 1130428d7b3dSmrg 1131428d7b3dSmrg/* SSE4_2 */ 1132428d7b3dSmrg#if defined(sse4_2) 1133428d7b3dSmrg 1134428d7b3dSmrgsse4_2 fastcall static void 1135428d7b3dSmrgemit_primitive_linear__sse4_2(struct sna *sna, 1136428d7b3dSmrg const struct sna_composite_op *op, 1137428d7b3dSmrg const struct sna_composite_rectangles *r) 1138428d7b3dSmrg{ 1139428d7b3dSmrg float *v; 1140428d7b3dSmrg union { 1141428d7b3dSmrg struct sna_coordinate p; 1142428d7b3dSmrg float f; 1143428d7b3dSmrg } dst; 1144428d7b3dSmrg 1145428d7b3dSmrg assert(op->floats_per_rect == 6); 1146428d7b3dSmrg assert((sna->render.vertex_used % 2) == 0); 1147428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 1148428d7b3dSmrg sna->render.vertex_used += 6; 1149428d7b3dSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 1150428d7b3dSmrg 1151428d7b3dSmrg dst.p.x = r->dst.x + r->width; 1152428d7b3dSmrg dst.p.y = r->dst.y + r->height; 1153428d7b3dSmrg v[0] = dst.f; 1154428d7b3dSmrg dst.p.x = r->dst.x; 1155428d7b3dSmrg v[2] = dst.f; 1156428d7b3dSmrg dst.p.y = r->dst.y; 1157428d7b3dSmrg v[4] = dst.f; 1158428d7b3dSmrg 1159428d7b3dSmrg v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height); 1160428d7b3dSmrg v[3] = compute_linear(&op->src, r->src.x, r->src.y+r->height); 1161428d7b3dSmrg v[5] = compute_linear(&op->src, r->src.x, r->src.y); 1162428d7b3dSmrg} 1163428d7b3dSmrg 1164428d7b3dSmrgsse4_2 fastcall static void 1165428d7b3dSmrgemit_boxes_linear__sse4_2(const struct sna_composite_op *op, 1166428d7b3dSmrg const BoxRec *box, int nbox, 1167428d7b3dSmrg float *v) 1168428d7b3dSmrg{ 1169428d7b3dSmrg union { 1170428d7b3dSmrg struct sna_coordinate p; 1171428d7b3dSmrg float f; 1172428d7b3dSmrg } dst; 1173428d7b3dSmrg 1174428d7b3dSmrg do { 1175428d7b3dSmrg dst.p.x = box->x2; 1176428d7b3dSmrg dst.p.y = box->y2; 1177428d7b3dSmrg v[0] = dst.f; 1178428d7b3dSmrg dst.p.x = box->x1; 1179428d7b3dSmrg v[2] = dst.f; 1180428d7b3dSmrg dst.p.y = box->y1; 1181428d7b3dSmrg v[4] = dst.f; 1182428d7b3dSmrg 1183428d7b3dSmrg v[1] = compute_linear(&op->src, box->x2, box->y2); 1184428d7b3dSmrg v[3] = compute_linear(&op->src, box->x1, box->y2); 1185428d7b3dSmrg v[5] = compute_linear(&op->src, box->x1, box->y1); 1186428d7b3dSmrg 1187428d7b3dSmrg v += 6; 1188428d7b3dSmrg box++; 1189428d7b3dSmrg } while (--nbox); 1190428d7b3dSmrg} 1191428d7b3dSmrg 1192428d7b3dSmrgsse4_2 fastcall static void 1193428d7b3dSmrgemit_primitive_identity_source__sse4_2(struct sna *sna, 1194428d7b3dSmrg const struct sna_composite_op *op, 1195428d7b3dSmrg const struct sna_composite_rectangles *r) 1196428d7b3dSmrg{ 1197428d7b3dSmrg union { 1198428d7b3dSmrg struct sna_coordinate p; 1199428d7b3dSmrg float f; 1200428d7b3dSmrg } dst; 1201428d7b3dSmrg float *v; 1202428d7b3dSmrg 1203428d7b3dSmrg assert(op->floats_per_rect == 9); 1204428d7b3dSmrg assert((sna->render.vertex_used % 3) == 0); 1205428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 1206428d7b3dSmrg sna->render.vertex_used += 9; 1207428d7b3dSmrg 1208428d7b3dSmrg dst.p.x = r->dst.x + r->width; 1209428d7b3dSmrg dst.p.y = r->dst.y + r->height; 1210428d7b3dSmrg v[0] = dst.f; 1211428d7b3dSmrg dst.p.x = r->dst.x; 1212428d7b3dSmrg v[3] = dst.f; 1213428d7b3dSmrg dst.p.y = r->dst.y; 1214428d7b3dSmrg v[6] = dst.f; 1215428d7b3dSmrg 1216428d7b3dSmrg v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0]; 1217428d7b3dSmrg v[1] = v[4] + r->width * op->src.scale[0]; 1218428d7b3dSmrg 1219428d7b3dSmrg v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1]; 1220428d7b3dSmrg v[5] = v[2] = v[8] + r->height * op->src.scale[1]; 1221428d7b3dSmrg} 1222428d7b3dSmrg 1223428d7b3dSmrgsse4_2 fastcall static void 1224428d7b3dSmrgemit_boxes_identity_source__sse4_2(const struct sna_composite_op *op, 1225428d7b3dSmrg const BoxRec *box, int nbox, 1226428d7b3dSmrg float *v) 1227428d7b3dSmrg{ 1228428d7b3dSmrg do { 1229428d7b3dSmrg union { 1230428d7b3dSmrg struct sna_coordinate p; 1231428d7b3dSmrg float f; 1232428d7b3dSmrg } dst; 1233428d7b3dSmrg 1234428d7b3dSmrg dst.p.x = box->x2; 1235428d7b3dSmrg dst.p.y = box->y2; 1236428d7b3dSmrg v[0] = dst.f; 1237428d7b3dSmrg dst.p.x = box->x1; 1238428d7b3dSmrg v[3] = dst.f; 1239428d7b3dSmrg dst.p.y = box->y1; 1240428d7b3dSmrg v[6] = dst.f; 1241428d7b3dSmrg 1242428d7b3dSmrg v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0]; 1243428d7b3dSmrg v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0]; 1244428d7b3dSmrg 1245428d7b3dSmrg v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1]; 1246428d7b3dSmrg v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1]; 1247428d7b3dSmrg 1248428d7b3dSmrg v += 9; 1249428d7b3dSmrg box++; 1250428d7b3dSmrg } while (--nbox); 1251428d7b3dSmrg} 1252428d7b3dSmrg 1253428d7b3dSmrgsse4_2 fastcall static void 1254428d7b3dSmrgemit_primitive_simple_source__sse4_2(struct sna *sna, 1255428d7b3dSmrg const struct sna_composite_op *op, 1256428d7b3dSmrg const struct sna_composite_rectangles *r) 1257428d7b3dSmrg{ 1258428d7b3dSmrg float *v; 1259428d7b3dSmrg union { 1260428d7b3dSmrg struct sna_coordinate p; 1261428d7b3dSmrg float f; 1262428d7b3dSmrg } dst; 1263428d7b3dSmrg 1264428d7b3dSmrg float xx = op->src.transform->matrix[0][0]; 1265428d7b3dSmrg float x0 = op->src.transform->matrix[0][2]; 1266428d7b3dSmrg float yy = op->src.transform->matrix[1][1]; 1267428d7b3dSmrg float y0 = op->src.transform->matrix[1][2]; 1268428d7b3dSmrg float sx = op->src.scale[0]; 1269428d7b3dSmrg float sy = op->src.scale[1]; 1270428d7b3dSmrg int16_t tx = op->src.offset[0]; 1271428d7b3dSmrg int16_t ty = op->src.offset[1]; 1272428d7b3dSmrg 1273428d7b3dSmrg assert(op->floats_per_rect == 9); 1274428d7b3dSmrg assert((sna->render.vertex_used % 3) == 0); 1275428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 1276428d7b3dSmrg sna->render.vertex_used += 3*3; 1277428d7b3dSmrg 1278428d7b3dSmrg dst.p.x = r->dst.x + r->width; 1279428d7b3dSmrg dst.p.y = r->dst.y + r->height; 1280428d7b3dSmrg v[0] = dst.f; 1281428d7b3dSmrg v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx; 1282428d7b3dSmrg v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy; 1283428d7b3dSmrg 1284428d7b3dSmrg dst.p.x = r->dst.x; 1285428d7b3dSmrg v[3] = dst.f; 1286428d7b3dSmrg v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx; 1287428d7b3dSmrg 1288428d7b3dSmrg dst.p.y = r->dst.y; 1289428d7b3dSmrg v[6] = dst.f; 1290428d7b3dSmrg v[8] = ((r->src.y + ty) * yy + y0) * sy; 1291428d7b3dSmrg} 1292428d7b3dSmrg 1293428d7b3dSmrgsse4_2 fastcall static void 1294428d7b3dSmrgemit_boxes_simple_source__sse4_2(const struct sna_composite_op *op, 1295428d7b3dSmrg const BoxRec *box, int nbox, 1296428d7b3dSmrg float *v) 1297428d7b3dSmrg{ 1298428d7b3dSmrg float xx = op->src.transform->matrix[0][0]; 1299428d7b3dSmrg float x0 = op->src.transform->matrix[0][2]; 1300428d7b3dSmrg float yy = op->src.transform->matrix[1][1]; 1301428d7b3dSmrg float y0 = op->src.transform->matrix[1][2]; 1302428d7b3dSmrg float sx = op->src.scale[0]; 1303428d7b3dSmrg float sy = op->src.scale[1]; 1304428d7b3dSmrg int16_t tx = op->src.offset[0]; 1305428d7b3dSmrg int16_t ty = op->src.offset[1]; 1306428d7b3dSmrg 1307428d7b3dSmrg do { 1308428d7b3dSmrg union { 1309428d7b3dSmrg struct sna_coordinate p; 1310428d7b3dSmrg float f; 1311428d7b3dSmrg } dst; 1312428d7b3dSmrg 1313428d7b3dSmrg dst.p.x = box->x2; 1314428d7b3dSmrg dst.p.y = box->y2; 1315428d7b3dSmrg v[0] = dst.f; 1316428d7b3dSmrg v[1] = ((box->x2 + tx) * xx + x0) * sx; 1317428d7b3dSmrg v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy; 1318428d7b3dSmrg 1319428d7b3dSmrg dst.p.x = box->x1; 1320428d7b3dSmrg v[3] = dst.f; 1321428d7b3dSmrg v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx; 1322428d7b3dSmrg 1323428d7b3dSmrg dst.p.y = box->y1; 1324428d7b3dSmrg v[6] = dst.f; 1325428d7b3dSmrg v[8] = ((box->y1 + ty) * yy + y0) * sy; 1326428d7b3dSmrg 1327428d7b3dSmrg v += 9; 1328428d7b3dSmrg box++; 1329428d7b3dSmrg } while (--nbox); 1330428d7b3dSmrg} 1331428d7b3dSmrg 1332428d7b3dSmrgsse4_2 fastcall static void 1333428d7b3dSmrgemit_primitive_identity_mask__sse4_2(struct sna *sna, 1334428d7b3dSmrg const struct sna_composite_op *op, 1335428d7b3dSmrg const struct sna_composite_rectangles *r) 1336428d7b3dSmrg{ 1337428d7b3dSmrg union { 1338428d7b3dSmrg struct sna_coordinate p; 1339428d7b3dSmrg float f; 1340428d7b3dSmrg } dst; 1341428d7b3dSmrg float msk_x, msk_y; 1342428d7b3dSmrg float w, h; 1343428d7b3dSmrg float *v; 1344428d7b3dSmrg 1345428d7b3dSmrg msk_x = r->mask.x + op->mask.offset[0]; 1346428d7b3dSmrg msk_y = r->mask.y + op->mask.offset[1]; 1347428d7b3dSmrg w = r->width; 1348428d7b3dSmrg h = r->height; 1349428d7b3dSmrg 1350428d7b3dSmrg DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n", 1351428d7b3dSmrg __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h)); 1352428d7b3dSmrg 1353428d7b3dSmrg assert(op->floats_per_rect == 12); 1354428d7b3dSmrg assert((sna->render.vertex_used % 4) == 0); 1355428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 1356428d7b3dSmrg sna->render.vertex_used += 12; 1357428d7b3dSmrg 1358428d7b3dSmrg dst.p.x = r->dst.x + r->width; 1359428d7b3dSmrg dst.p.y = r->dst.y + r->height; 1360428d7b3dSmrg v[0] = dst.f; 1361428d7b3dSmrg v[2] = (msk_x + w) * op->mask.scale[0]; 1362428d7b3dSmrg v[7] = v[3] = (msk_y + h) * op->mask.scale[1]; 1363428d7b3dSmrg 1364428d7b3dSmrg dst.p.x = r->dst.x; 1365428d7b3dSmrg v[4] = dst.f; 1366428d7b3dSmrg v[10] = v[6] = msk_x * op->mask.scale[0]; 1367428d7b3dSmrg 1368428d7b3dSmrg dst.p.y = r->dst.y; 1369428d7b3dSmrg v[8] = dst.f; 1370428d7b3dSmrg v[11] = msk_y * op->mask.scale[1]; 1371428d7b3dSmrg 1372428d7b3dSmrg v[9] = v[5] = v[1] = .5; 1373428d7b3dSmrg} 1374428d7b3dSmrg 1375428d7b3dSmrgsse4_2 fastcall static void 1376428d7b3dSmrgemit_boxes_identity_mask__sse4_2(const struct sna_composite_op *op, 1377428d7b3dSmrg const BoxRec *box, int nbox, 1378428d7b3dSmrg float *v) 1379428d7b3dSmrg{ 1380428d7b3dSmrg float msk_x = op->mask.offset[0]; 1381428d7b3dSmrg float msk_y = op->mask.offset[1]; 1382428d7b3dSmrg 1383428d7b3dSmrg do { 1384428d7b3dSmrg union { 1385428d7b3dSmrg struct sna_coordinate p; 1386428d7b3dSmrg float f; 1387428d7b3dSmrg } dst; 1388428d7b3dSmrg 1389428d7b3dSmrg dst.p.x = box->x2; 1390428d7b3dSmrg dst.p.y = box->y2; 1391428d7b3dSmrg v[0] = dst.f; 1392428d7b3dSmrg v[2] = (msk_x + box->x2) * op->mask.scale[0]; 1393428d7b3dSmrg v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1]; 1394428d7b3dSmrg 1395428d7b3dSmrg dst.p.x = box->x1; 1396428d7b3dSmrg v[4] = dst.f; 1397428d7b3dSmrg v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0]; 1398428d7b3dSmrg 1399428d7b3dSmrg dst.p.y = box->y1; 1400428d7b3dSmrg v[8] = dst.f; 1401428d7b3dSmrg v[11] = (msk_y + box->y1) * op->mask.scale[1]; 1402428d7b3dSmrg 1403428d7b3dSmrg v[9] = v[5] = v[1] = .5; 1404428d7b3dSmrg v += 12; 1405428d7b3dSmrg box++; 1406428d7b3dSmrg } while (--nbox); 1407428d7b3dSmrg} 1408428d7b3dSmrg 1409428d7b3dSmrgsse4_2 fastcall static void 1410428d7b3dSmrgemit_primitive_linear_identity_mask__sse4_2(struct sna *sna, 1411428d7b3dSmrg const struct sna_composite_op *op, 1412428d7b3dSmrg const struct sna_composite_rectangles *r) 1413428d7b3dSmrg{ 1414428d7b3dSmrg union { 1415428d7b3dSmrg struct sna_coordinate p; 1416428d7b3dSmrg float f; 1417428d7b3dSmrg } dst; 1418428d7b3dSmrg float msk_x, msk_y; 1419428d7b3dSmrg float w, h; 1420428d7b3dSmrg float *v; 1421428d7b3dSmrg 1422428d7b3dSmrg msk_x = r->mask.x + op->mask.offset[0]; 1423428d7b3dSmrg msk_y = r->mask.y + op->mask.offset[1]; 1424428d7b3dSmrg w = r->width; 1425428d7b3dSmrg h = r->height; 1426428d7b3dSmrg 1427428d7b3dSmrg DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n", 1428428d7b3dSmrg __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h)); 1429428d7b3dSmrg 1430428d7b3dSmrg assert(op->floats_per_rect == 12); 1431428d7b3dSmrg assert((sna->render.vertex_used % 4) == 0); 1432428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 1433428d7b3dSmrg sna->render.vertex_used += 12; 1434428d7b3dSmrg 1435428d7b3dSmrg dst.p.x = r->dst.x + r->width; 1436428d7b3dSmrg dst.p.y = r->dst.y + r->height; 1437428d7b3dSmrg v[0] = dst.f; 1438428d7b3dSmrg v[2] = (msk_x + w) * op->mask.scale[0]; 1439428d7b3dSmrg v[7] = v[3] = (msk_y + h) * op->mask.scale[1]; 1440428d7b3dSmrg 1441428d7b3dSmrg dst.p.x = r->dst.x; 1442428d7b3dSmrg v[4] = dst.f; 1443428d7b3dSmrg v[10] = v[6] = msk_x * op->mask.scale[0]; 1444428d7b3dSmrg 1445428d7b3dSmrg dst.p.y = r->dst.y; 1446428d7b3dSmrg v[8] = dst.f; 1447428d7b3dSmrg v[11] = msk_y * op->mask.scale[1]; 1448428d7b3dSmrg 1449428d7b3dSmrg v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height); 1450428d7b3dSmrg v[5] = compute_linear(&op->src, r->src.x, r->src.y+r->height); 1451428d7b3dSmrg v[9] = compute_linear(&op->src, r->src.x, r->src.y); 1452428d7b3dSmrg} 1453428d7b3dSmrg 1454428d7b3dSmrgsse4_2 fastcall static void 1455428d7b3dSmrgemit_boxes_linear_identity_mask__sse4_2(const struct sna_composite_op *op, 1456428d7b3dSmrg const BoxRec *box, int nbox, 1457428d7b3dSmrg float *v) 1458428d7b3dSmrg{ 1459428d7b3dSmrg float msk_x = op->mask.offset[0]; 1460428d7b3dSmrg float msk_y = op->mask.offset[1]; 1461428d7b3dSmrg 1462428d7b3dSmrg do { 1463428d7b3dSmrg union { 1464428d7b3dSmrg struct sna_coordinate p; 1465428d7b3dSmrg float f; 1466428d7b3dSmrg } dst; 1467428d7b3dSmrg 1468428d7b3dSmrg dst.p.x = box->x2; 1469428d7b3dSmrg dst.p.y = box->y2; 1470428d7b3dSmrg v[0] = dst.f; 1471428d7b3dSmrg v[2] = (msk_x + box->x2) * op->mask.scale[0]; 1472428d7b3dSmrg v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1]; 1473428d7b3dSmrg 1474428d7b3dSmrg dst.p.x = box->x1; 1475428d7b3dSmrg v[4] = dst.f; 1476428d7b3dSmrg v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0]; 1477428d7b3dSmrg 1478428d7b3dSmrg dst.p.y = box->y1; 1479428d7b3dSmrg v[8] = dst.f; 1480428d7b3dSmrg v[11] = (msk_y + box->y1) * op->mask.scale[1]; 1481428d7b3dSmrg 1482428d7b3dSmrg v[1] = compute_linear(&op->src, box->x2, box->y2); 1483428d7b3dSmrg v[5] = compute_linear(&op->src, box->x1, box->y2); 1484428d7b3dSmrg v[9] = compute_linear(&op->src, box->x1, box->y1); 1485428d7b3dSmrg 1486428d7b3dSmrg v += 12; 1487428d7b3dSmrg box++; 1488428d7b3dSmrg } while (--nbox); 1489428d7b3dSmrg} 1490428d7b3dSmrg 1491428d7b3dSmrg#endif 1492428d7b3dSmrg 1493428d7b3dSmrg/* AVX2 */ 1494428d7b3dSmrg#if defined(avx2) 1495428d7b3dSmrg 1496428d7b3dSmrgavx2 fastcall static void 1497428d7b3dSmrgemit_primitive_linear__avx2(struct sna *sna, 1498428d7b3dSmrg const struct sna_composite_op *op, 1499428d7b3dSmrg const struct sna_composite_rectangles *r) 1500428d7b3dSmrg{ 1501428d7b3dSmrg float *v; 1502428d7b3dSmrg union { 1503428d7b3dSmrg struct sna_coordinate p; 1504428d7b3dSmrg float f; 1505428d7b3dSmrg } dst; 1506428d7b3dSmrg 1507428d7b3dSmrg assert(op->floats_per_rect == 6); 1508428d7b3dSmrg assert((sna->render.vertex_used % 2) == 0); 1509428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 1510428d7b3dSmrg sna->render.vertex_used += 6; 1511428d7b3dSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 1512428d7b3dSmrg 1513428d7b3dSmrg dst.p.x = r->dst.x + r->width; 1514428d7b3dSmrg dst.p.y = r->dst.y + r->height; 1515428d7b3dSmrg v[0] = dst.f; 1516428d7b3dSmrg dst.p.x = r->dst.x; 1517428d7b3dSmrg v[2] = dst.f; 1518428d7b3dSmrg dst.p.y = r->dst.y; 1519428d7b3dSmrg v[4] = dst.f; 1520428d7b3dSmrg 1521428d7b3dSmrg v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height); 1522428d7b3dSmrg v[3] = compute_linear(&op->src, r->src.x, r->src.y+r->height); 1523428d7b3dSmrg v[5] = compute_linear(&op->src, r->src.x, r->src.y); 1524428d7b3dSmrg} 1525428d7b3dSmrg 1526428d7b3dSmrgavx2 fastcall static void 1527428d7b3dSmrgemit_boxes_linear__avx2(const struct sna_composite_op *op, 1528428d7b3dSmrg const BoxRec *box, int nbox, 1529428d7b3dSmrg float *v) 1530428d7b3dSmrg{ 1531428d7b3dSmrg union { 1532428d7b3dSmrg struct sna_coordinate p; 1533428d7b3dSmrg float f; 1534428d7b3dSmrg } dst; 1535428d7b3dSmrg 1536428d7b3dSmrg do { 1537428d7b3dSmrg dst.p.x = box->x2; 1538428d7b3dSmrg dst.p.y = box->y2; 1539428d7b3dSmrg v[0] = dst.f; 1540428d7b3dSmrg dst.p.x = box->x1; 1541428d7b3dSmrg v[2] = dst.f; 1542428d7b3dSmrg dst.p.y = box->y1; 1543428d7b3dSmrg v[4] = dst.f; 1544428d7b3dSmrg 1545428d7b3dSmrg v[1] = compute_linear(&op->src, box->x2, box->y2); 1546428d7b3dSmrg v[3] = compute_linear(&op->src, box->x1, box->y2); 1547428d7b3dSmrg v[5] = compute_linear(&op->src, box->x1, box->y1); 1548428d7b3dSmrg 1549428d7b3dSmrg v += 6; 1550428d7b3dSmrg box++; 1551428d7b3dSmrg } while (--nbox); 1552428d7b3dSmrg} 1553428d7b3dSmrg 1554428d7b3dSmrgavx2 fastcall static void 1555428d7b3dSmrgemit_primitive_identity_source__avx2(struct sna *sna, 1556428d7b3dSmrg const struct sna_composite_op *op, 1557428d7b3dSmrg const struct sna_composite_rectangles *r) 1558428d7b3dSmrg{ 1559428d7b3dSmrg union { 1560428d7b3dSmrg struct sna_coordinate p; 1561428d7b3dSmrg float f; 1562428d7b3dSmrg } dst; 1563428d7b3dSmrg float *v; 1564428d7b3dSmrg 1565428d7b3dSmrg assert(op->floats_per_rect == 9); 1566428d7b3dSmrg assert((sna->render.vertex_used % 3) == 0); 1567428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 1568428d7b3dSmrg sna->render.vertex_used += 9; 1569428d7b3dSmrg 1570428d7b3dSmrg dst.p.x = r->dst.x + r->width; 1571428d7b3dSmrg dst.p.y = r->dst.y + r->height; 1572428d7b3dSmrg v[0] = dst.f; 1573428d7b3dSmrg dst.p.x = r->dst.x; 1574428d7b3dSmrg v[3] = dst.f; 1575428d7b3dSmrg dst.p.y = r->dst.y; 1576428d7b3dSmrg v[6] = dst.f; 1577428d7b3dSmrg 1578428d7b3dSmrg v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0]; 1579428d7b3dSmrg v[1] = v[4] + r->width * op->src.scale[0]; 1580428d7b3dSmrg 1581428d7b3dSmrg v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1]; 1582428d7b3dSmrg v[5] = v[2] = v[8] + r->height * op->src.scale[1]; 1583428d7b3dSmrg} 1584428d7b3dSmrg 1585428d7b3dSmrgavx2 fastcall static void 1586428d7b3dSmrgemit_boxes_identity_source__avx2(const struct sna_composite_op *op, 1587428d7b3dSmrg const BoxRec *box, int nbox, 1588428d7b3dSmrg float *v) 1589428d7b3dSmrg{ 1590428d7b3dSmrg do { 1591428d7b3dSmrg union { 1592428d7b3dSmrg struct sna_coordinate p; 1593428d7b3dSmrg float f; 1594428d7b3dSmrg } dst; 1595428d7b3dSmrg 1596428d7b3dSmrg dst.p.x = box->x2; 1597428d7b3dSmrg dst.p.y = box->y2; 1598428d7b3dSmrg v[0] = dst.f; 1599428d7b3dSmrg dst.p.x = box->x1; 1600428d7b3dSmrg v[3] = dst.f; 1601428d7b3dSmrg dst.p.y = box->y1; 1602428d7b3dSmrg v[6] = dst.f; 1603428d7b3dSmrg 1604428d7b3dSmrg v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0]; 1605428d7b3dSmrg v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0]; 1606428d7b3dSmrg 1607428d7b3dSmrg v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1]; 1608428d7b3dSmrg v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1]; 1609428d7b3dSmrg 1610428d7b3dSmrg v += 9; 1611428d7b3dSmrg box++; 1612428d7b3dSmrg } while (--nbox); 1613428d7b3dSmrg} 1614428d7b3dSmrg 1615428d7b3dSmrgavx2 fastcall static void 1616428d7b3dSmrgemit_primitive_simple_source__avx2(struct sna *sna, 1617428d7b3dSmrg const struct sna_composite_op *op, 1618428d7b3dSmrg const struct sna_composite_rectangles *r) 1619428d7b3dSmrg{ 1620428d7b3dSmrg float *v; 1621428d7b3dSmrg union { 1622428d7b3dSmrg struct sna_coordinate p; 1623428d7b3dSmrg float f; 1624428d7b3dSmrg } dst; 1625428d7b3dSmrg 1626428d7b3dSmrg float xx = op->src.transform->matrix[0][0]; 1627428d7b3dSmrg float x0 = op->src.transform->matrix[0][2]; 1628428d7b3dSmrg float yy = op->src.transform->matrix[1][1]; 1629428d7b3dSmrg float y0 = op->src.transform->matrix[1][2]; 1630428d7b3dSmrg float sx = op->src.scale[0]; 1631428d7b3dSmrg float sy = op->src.scale[1]; 1632428d7b3dSmrg int16_t tx = op->src.offset[0]; 1633428d7b3dSmrg int16_t ty = op->src.offset[1]; 1634428d7b3dSmrg 1635428d7b3dSmrg assert(op->floats_per_rect == 9); 1636428d7b3dSmrg assert((sna->render.vertex_used % 3) == 0); 1637428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 1638428d7b3dSmrg sna->render.vertex_used += 3*3; 1639428d7b3dSmrg 1640428d7b3dSmrg dst.p.x = r->dst.x + r->width; 1641428d7b3dSmrg dst.p.y = r->dst.y + r->height; 1642428d7b3dSmrg v[0] = dst.f; 1643428d7b3dSmrg v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx; 1644428d7b3dSmrg v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy; 1645428d7b3dSmrg 1646428d7b3dSmrg dst.p.x = r->dst.x; 1647428d7b3dSmrg v[3] = dst.f; 1648428d7b3dSmrg v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx; 1649428d7b3dSmrg 1650428d7b3dSmrg dst.p.y = r->dst.y; 1651428d7b3dSmrg v[6] = dst.f; 1652428d7b3dSmrg v[8] = ((r->src.y + ty) * yy + y0) * sy; 1653428d7b3dSmrg} 1654428d7b3dSmrg 1655428d7b3dSmrgavx2 fastcall static void 1656428d7b3dSmrgemit_boxes_simple_source__avx2(const struct sna_composite_op *op, 1657428d7b3dSmrg const BoxRec *box, int nbox, 1658428d7b3dSmrg float *v) 1659428d7b3dSmrg{ 1660428d7b3dSmrg float xx = op->src.transform->matrix[0][0]; 1661428d7b3dSmrg float x0 = op->src.transform->matrix[0][2]; 1662428d7b3dSmrg float yy = op->src.transform->matrix[1][1]; 1663428d7b3dSmrg float y0 = op->src.transform->matrix[1][2]; 1664428d7b3dSmrg float sx = op->src.scale[0]; 1665428d7b3dSmrg float sy = op->src.scale[1]; 1666428d7b3dSmrg int16_t tx = op->src.offset[0]; 1667428d7b3dSmrg int16_t ty = op->src.offset[1]; 1668428d7b3dSmrg 1669428d7b3dSmrg do { 1670428d7b3dSmrg union { 1671428d7b3dSmrg struct sna_coordinate p; 1672428d7b3dSmrg float f; 1673428d7b3dSmrg } dst; 1674428d7b3dSmrg 1675428d7b3dSmrg dst.p.x = box->x2; 1676428d7b3dSmrg dst.p.y = box->y2; 1677428d7b3dSmrg v[0] = dst.f; 1678428d7b3dSmrg v[1] = ((box->x2 + tx) * xx + x0) * sx; 1679428d7b3dSmrg v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy; 1680428d7b3dSmrg 1681428d7b3dSmrg dst.p.x = box->x1; 1682428d7b3dSmrg v[3] = dst.f; 1683428d7b3dSmrg v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx; 1684428d7b3dSmrg 1685428d7b3dSmrg dst.p.y = box->y1; 1686428d7b3dSmrg v[6] = dst.f; 1687428d7b3dSmrg v[8] = ((box->y1 + ty) * yy + y0) * sy; 1688428d7b3dSmrg 1689428d7b3dSmrg v += 9; 1690428d7b3dSmrg box++; 1691428d7b3dSmrg } while (--nbox); 1692428d7b3dSmrg} 1693428d7b3dSmrg 1694428d7b3dSmrgavx2 fastcall static void 1695428d7b3dSmrgemit_primitive_identity_mask__avx2(struct sna *sna, 1696428d7b3dSmrg const struct sna_composite_op *op, 1697428d7b3dSmrg const struct sna_composite_rectangles *r) 1698428d7b3dSmrg{ 1699428d7b3dSmrg union { 1700428d7b3dSmrg struct sna_coordinate p; 1701428d7b3dSmrg float f; 1702428d7b3dSmrg } dst; 1703428d7b3dSmrg float msk_x, msk_y; 1704428d7b3dSmrg float w, h; 1705428d7b3dSmrg float *v; 1706428d7b3dSmrg 1707428d7b3dSmrg msk_x = r->mask.x + op->mask.offset[0]; 1708428d7b3dSmrg msk_y = r->mask.y + op->mask.offset[1]; 1709428d7b3dSmrg w = r->width; 1710428d7b3dSmrg h = r->height; 1711428d7b3dSmrg 1712428d7b3dSmrg DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n", 1713428d7b3dSmrg __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h)); 1714428d7b3dSmrg 1715428d7b3dSmrg assert(op->floats_per_rect == 12); 1716428d7b3dSmrg assert((sna->render.vertex_used % 4) == 0); 1717428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 1718428d7b3dSmrg sna->render.vertex_used += 12; 1719428d7b3dSmrg 1720428d7b3dSmrg dst.p.x = r->dst.x + r->width; 1721428d7b3dSmrg dst.p.y = r->dst.y + r->height; 1722428d7b3dSmrg v[0] = dst.f; 1723428d7b3dSmrg v[2] = (msk_x + w) * op->mask.scale[0]; 1724428d7b3dSmrg v[7] = v[3] = (msk_y + h) * op->mask.scale[1]; 1725428d7b3dSmrg 1726428d7b3dSmrg dst.p.x = r->dst.x; 1727428d7b3dSmrg v[4] = dst.f; 1728428d7b3dSmrg v[10] = v[6] = msk_x * op->mask.scale[0]; 1729428d7b3dSmrg 1730428d7b3dSmrg dst.p.y = r->dst.y; 1731428d7b3dSmrg v[8] = dst.f; 1732428d7b3dSmrg v[11] = msk_y * op->mask.scale[1]; 1733428d7b3dSmrg 1734428d7b3dSmrg v[9] = v[5] = v[1] = .5; 1735428d7b3dSmrg} 1736428d7b3dSmrg 1737428d7b3dSmrgavx2 fastcall static void 1738428d7b3dSmrgemit_boxes_identity_mask__avx2(const struct sna_composite_op *op, 1739428d7b3dSmrg const BoxRec *box, int nbox, 1740428d7b3dSmrg float *v) 1741428d7b3dSmrg{ 1742428d7b3dSmrg float msk_x = op->mask.offset[0]; 1743428d7b3dSmrg float msk_y = op->mask.offset[1]; 1744428d7b3dSmrg 1745428d7b3dSmrg do { 1746428d7b3dSmrg union { 1747428d7b3dSmrg struct sna_coordinate p; 1748428d7b3dSmrg float f; 1749428d7b3dSmrg } dst; 1750428d7b3dSmrg 1751428d7b3dSmrg dst.p.x = box->x2; 1752428d7b3dSmrg dst.p.y = box->y2; 1753428d7b3dSmrg v[0] = dst.f; 1754428d7b3dSmrg v[2] = (msk_x + box->x2) * op->mask.scale[0]; 1755428d7b3dSmrg v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1]; 1756428d7b3dSmrg 1757428d7b3dSmrg dst.p.x = box->x1; 1758428d7b3dSmrg v[4] = dst.f; 1759428d7b3dSmrg v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0]; 1760428d7b3dSmrg 1761428d7b3dSmrg dst.p.y = box->y1; 1762428d7b3dSmrg v[8] = dst.f; 1763428d7b3dSmrg v[11] = (msk_y + box->y1) * op->mask.scale[1]; 1764428d7b3dSmrg 1765428d7b3dSmrg v[9] = v[5] = v[1] = .5; 1766428d7b3dSmrg v += 12; 1767428d7b3dSmrg box++; 1768428d7b3dSmrg } while (--nbox); 1769428d7b3dSmrg} 1770428d7b3dSmrg 1771428d7b3dSmrgavx2 fastcall static void 1772428d7b3dSmrgemit_primitive_linear_identity_mask__avx2(struct sna *sna, 1773428d7b3dSmrg const struct sna_composite_op *op, 1774428d7b3dSmrg const struct sna_composite_rectangles *r) 1775428d7b3dSmrg{ 1776428d7b3dSmrg union { 1777428d7b3dSmrg struct sna_coordinate p; 1778428d7b3dSmrg float f; 1779428d7b3dSmrg } dst; 1780428d7b3dSmrg float msk_x, msk_y; 1781428d7b3dSmrg float w, h; 1782428d7b3dSmrg float *v; 1783428d7b3dSmrg 1784428d7b3dSmrg msk_x = r->mask.x + op->mask.offset[0]; 1785428d7b3dSmrg msk_y = r->mask.y + op->mask.offset[1]; 1786428d7b3dSmrg w = r->width; 1787428d7b3dSmrg h = r->height; 1788428d7b3dSmrg 1789428d7b3dSmrg DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n", 1790428d7b3dSmrg __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h)); 1791428d7b3dSmrg 1792428d7b3dSmrg assert(op->floats_per_rect == 12); 1793428d7b3dSmrg assert((sna->render.vertex_used % 4) == 0); 1794428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 1795428d7b3dSmrg sna->render.vertex_used += 12; 1796428d7b3dSmrg 1797428d7b3dSmrg dst.p.x = r->dst.x + r->width; 1798428d7b3dSmrg dst.p.y = r->dst.y + r->height; 1799428d7b3dSmrg v[0] = dst.f; 1800428d7b3dSmrg v[2] = (msk_x + w) * op->mask.scale[0]; 1801428d7b3dSmrg v[7] = v[3] = (msk_y + h) * op->mask.scale[1]; 1802428d7b3dSmrg 1803428d7b3dSmrg dst.p.x = r->dst.x; 1804428d7b3dSmrg v[4] = dst.f; 1805428d7b3dSmrg v[10] = v[6] = msk_x * op->mask.scale[0]; 1806428d7b3dSmrg 1807428d7b3dSmrg dst.p.y = r->dst.y; 1808428d7b3dSmrg v[8] = dst.f; 1809428d7b3dSmrg v[11] = msk_y * op->mask.scale[1]; 1810428d7b3dSmrg 1811428d7b3dSmrg v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height); 1812428d7b3dSmrg v[5] = compute_linear(&op->src, r->src.x, r->src.y+r->height); 1813428d7b3dSmrg v[9] = compute_linear(&op->src, r->src.x, r->src.y); 1814428d7b3dSmrg} 1815428d7b3dSmrg 1816428d7b3dSmrgavx2 fastcall static void 1817428d7b3dSmrgemit_boxes_linear_identity_mask__avx2(const struct sna_composite_op *op, 1818428d7b3dSmrg const BoxRec *box, int nbox, 1819428d7b3dSmrg float *v) 1820428d7b3dSmrg{ 1821428d7b3dSmrg float msk_x = op->mask.offset[0]; 1822428d7b3dSmrg float msk_y = op->mask.offset[1]; 1823428d7b3dSmrg 1824428d7b3dSmrg do { 1825428d7b3dSmrg union { 1826428d7b3dSmrg struct sna_coordinate p; 1827428d7b3dSmrg float f; 1828428d7b3dSmrg } dst; 1829428d7b3dSmrg 1830428d7b3dSmrg dst.p.x = box->x2; 1831428d7b3dSmrg dst.p.y = box->y2; 1832428d7b3dSmrg v[0] = dst.f; 1833428d7b3dSmrg v[2] = (msk_x + box->x2) * op->mask.scale[0]; 1834428d7b3dSmrg v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1]; 1835428d7b3dSmrg 1836428d7b3dSmrg dst.p.x = box->x1; 1837428d7b3dSmrg v[4] = dst.f; 1838428d7b3dSmrg v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0]; 1839428d7b3dSmrg 1840428d7b3dSmrg dst.p.y = box->y1; 1841428d7b3dSmrg v[8] = dst.f; 1842428d7b3dSmrg v[11] = (msk_y + box->y1) * op->mask.scale[1]; 1843428d7b3dSmrg 1844428d7b3dSmrg v[1] = compute_linear(&op->src, box->x2, box->y2); 1845428d7b3dSmrg v[5] = compute_linear(&op->src, box->x1, box->y2); 1846428d7b3dSmrg v[9] = compute_linear(&op->src, box->x1, box->y1); 1847428d7b3dSmrg 1848428d7b3dSmrg v += 12; 1849428d7b3dSmrg box++; 1850428d7b3dSmrg } while (--nbox); 1851428d7b3dSmrg} 1852428d7b3dSmrg 1853428d7b3dSmrg#endif 1854428d7b3dSmrg 1855428d7b3dSmrgunsigned gen4_choose_composite_emitter(struct sna *sna, struct sna_composite_op *tmp) 1856428d7b3dSmrg{ 1857428d7b3dSmrg unsigned vb; 1858428d7b3dSmrg 1859428d7b3dSmrg if (tmp->mask.bo) { 1860428d7b3dSmrg if (tmp->mask.transform == NULL) { 1861428d7b3dSmrg if (tmp->src.is_solid) { 1862428d7b3dSmrg DBG(("%s: solid, identity mask\n", __FUNCTION__)); 1863428d7b3dSmrg#if defined(avx2) 1864428d7b3dSmrg if (sna->cpu_features & AVX2) { 1865428d7b3dSmrg tmp->prim_emit = emit_primitive_identity_mask__avx2; 1866428d7b3dSmrg tmp->emit_boxes = emit_boxes_identity_mask__avx2; 1867428d7b3dSmrg } else 1868428d7b3dSmrg#endif 1869428d7b3dSmrg#if defined(sse4_2) 1870428d7b3dSmrg if (sna->cpu_features & SSE4_2) { 1871428d7b3dSmrg tmp->prim_emit = emit_primitive_identity_mask__sse4_2; 1872428d7b3dSmrg tmp->emit_boxes = emit_boxes_identity_mask__sse4_2; 1873428d7b3dSmrg } else 1874428d7b3dSmrg#endif 1875428d7b3dSmrg { 1876428d7b3dSmrg tmp->prim_emit = emit_primitive_identity_mask; 1877428d7b3dSmrg tmp->emit_boxes = emit_boxes_identity_mask; 1878428d7b3dSmrg } 1879428d7b3dSmrg tmp->floats_per_vertex = 4; 1880428d7b3dSmrg vb = 1 | 2 << 2; 1881428d7b3dSmrg } else if (tmp->src.is_linear) { 1882428d7b3dSmrg DBG(("%s: linear, identity mask\n", __FUNCTION__)); 1883428d7b3dSmrg#if defined(avx2) 1884428d7b3dSmrg if (sna->cpu_features & AVX2) { 1885428d7b3dSmrg tmp->prim_emit = emit_primitive_linear_identity_mask__avx2; 1886428d7b3dSmrg tmp->emit_boxes = emit_boxes_linear_identity_mask__avx2; 1887428d7b3dSmrg } else 1888428d7b3dSmrg#endif 1889428d7b3dSmrg#if defined(sse4_2) 1890428d7b3dSmrg if (sna->cpu_features & SSE4_2) { 1891428d7b3dSmrg tmp->prim_emit = emit_primitive_linear_identity_mask__sse4_2; 1892428d7b3dSmrg tmp->emit_boxes = emit_boxes_linear_identity_mask__sse4_2; 1893428d7b3dSmrg } else 1894428d7b3dSmrg#endif 1895428d7b3dSmrg { 1896428d7b3dSmrg tmp->prim_emit = emit_primitive_linear_identity_mask; 1897428d7b3dSmrg tmp->emit_boxes = emit_boxes_linear_identity_mask; 1898428d7b3dSmrg } 1899428d7b3dSmrg tmp->floats_per_vertex = 4; 1900428d7b3dSmrg vb = 1 | 2 << 2; 1901428d7b3dSmrg } else if (tmp->src.transform == NULL) { 1902428d7b3dSmrg DBG(("%s: identity source, identity mask\n", __FUNCTION__)); 1903428d7b3dSmrg tmp->prim_emit = emit_primitive_identity_source_mask; 1904428d7b3dSmrg tmp->floats_per_vertex = 5; 1905428d7b3dSmrg vb = 2 << 2 | 2; 1906428d7b3dSmrg } else if (tmp->src.is_affine) { 1907428d7b3dSmrg tmp->src.scale[0] /= tmp->src.transform->matrix[2][2]; 1908428d7b3dSmrg tmp->src.scale[1] /= tmp->src.transform->matrix[2][2]; 1909428d7b3dSmrg if (!sna_affine_transform_is_rotation(tmp->src.transform)) { 1910428d7b3dSmrg DBG(("%s: simple src, identity mask\n", __FUNCTION__)); 1911428d7b3dSmrg tmp->prim_emit = emit_primitive_simple_source_identity; 1912428d7b3dSmrg } else { 1913428d7b3dSmrg DBG(("%s: affine src, identity mask\n", __FUNCTION__)); 1914428d7b3dSmrg tmp->prim_emit = emit_primitive_affine_source_identity; 1915428d7b3dSmrg } 1916428d7b3dSmrg tmp->floats_per_vertex = 5; 1917428d7b3dSmrg vb = 2 << 2 | 2; 1918428d7b3dSmrg } else { 1919428d7b3dSmrg DBG(("%s: projective source, identity mask\n", __FUNCTION__)); 1920428d7b3dSmrg tmp->prim_emit = emit_primitive_mask; 1921428d7b3dSmrg tmp->floats_per_vertex = 6; 1922428d7b3dSmrg vb = 2 << 2 | 3; 1923428d7b3dSmrg } 1924428d7b3dSmrg } else { 1925428d7b3dSmrg tmp->prim_emit = emit_primitive_mask; 1926428d7b3dSmrg tmp->emit_boxes = emit_boxes_mask; 1927428d7b3dSmrg tmp->floats_per_vertex = 1; 1928428d7b3dSmrg vb = 0; 1929428d7b3dSmrg if (tmp->mask.is_solid) { 1930428d7b3dSmrg tmp->floats_per_vertex += 1; 1931428d7b3dSmrg vb |= 1 << 2; 1932428d7b3dSmrg } else if (tmp->mask.is_affine) { 1933428d7b3dSmrg tmp->floats_per_vertex += 2; 1934428d7b3dSmrg vb |= 2 << 2; 1935428d7b3dSmrg }else { 1936428d7b3dSmrg tmp->floats_per_vertex += 3; 1937428d7b3dSmrg vb |= 3 << 2; 1938428d7b3dSmrg } 1939428d7b3dSmrg if (tmp->src.is_solid) { 1940428d7b3dSmrg tmp->floats_per_vertex += 1; 1941428d7b3dSmrg vb |= 1; 1942428d7b3dSmrg } else if (tmp->src.is_affine) { 1943428d7b3dSmrg tmp->floats_per_vertex += 2; 1944428d7b3dSmrg vb |= 2 ; 1945428d7b3dSmrg }else { 1946428d7b3dSmrg tmp->floats_per_vertex += 3; 1947428d7b3dSmrg vb |= 3; 1948428d7b3dSmrg } 1949428d7b3dSmrg DBG(("%s: general mask: floats-per-vertex=%d, vb=%x\n", 1950428d7b3dSmrg __FUNCTION__,tmp->floats_per_vertex, vb)); 1951428d7b3dSmrg } 1952428d7b3dSmrg } else { 1953428d7b3dSmrg if (tmp->src.is_solid) { 1954428d7b3dSmrg DBG(("%s: solid, no mask\n", __FUNCTION__)); 1955428d7b3dSmrg tmp->prim_emit = emit_primitive_solid; 1956428d7b3dSmrg tmp->emit_boxes = emit_boxes_solid; 1957428d7b3dSmrg if (tmp->src.is_opaque && tmp->op == PictOpOver) 1958428d7b3dSmrg tmp->op = PictOpSrc; 1959428d7b3dSmrg tmp->floats_per_vertex = 2; 1960428d7b3dSmrg vb = 1; 1961428d7b3dSmrg } else if (tmp->src.is_linear) { 1962428d7b3dSmrg DBG(("%s: linear, no mask\n", __FUNCTION__)); 1963428d7b3dSmrg#if defined(avx2) 1964428d7b3dSmrg if (sna->cpu_features & AVX2) { 1965428d7b3dSmrg tmp->prim_emit = emit_primitive_linear__avx2; 1966428d7b3dSmrg tmp->emit_boxes = emit_boxes_linear__avx2; 1967428d7b3dSmrg } else 1968428d7b3dSmrg#endif 1969428d7b3dSmrg#if defined(sse4_2) 1970428d7b3dSmrg if (sna->cpu_features & SSE4_2) { 1971428d7b3dSmrg tmp->prim_emit = emit_primitive_linear__sse4_2; 1972428d7b3dSmrg tmp->emit_boxes = emit_boxes_linear__sse4_2; 1973428d7b3dSmrg } else 1974428d7b3dSmrg#endif 1975428d7b3dSmrg { 1976428d7b3dSmrg tmp->prim_emit = emit_primitive_linear; 1977428d7b3dSmrg tmp->emit_boxes = emit_boxes_linear; 1978428d7b3dSmrg } 1979428d7b3dSmrg tmp->floats_per_vertex = 2; 1980428d7b3dSmrg vb = 1; 1981428d7b3dSmrg } else if (tmp->src.transform == NULL) { 1982428d7b3dSmrg DBG(("%s: identity src, no mask\n", __FUNCTION__)); 1983428d7b3dSmrg#if defined(avx2) 1984428d7b3dSmrg if (sna->cpu_features & AVX2) { 1985428d7b3dSmrg tmp->prim_emit = emit_primitive_identity_source__avx2; 1986428d7b3dSmrg tmp->emit_boxes = emit_boxes_identity_source__avx2; 1987428d7b3dSmrg } else 1988428d7b3dSmrg#endif 1989428d7b3dSmrg#if defined(sse4_2) 1990428d7b3dSmrg if (sna->cpu_features & SSE4_2) { 1991428d7b3dSmrg tmp->prim_emit = emit_primitive_identity_source__sse4_2; 1992428d7b3dSmrg tmp->emit_boxes = emit_boxes_identity_source__sse4_2; 1993428d7b3dSmrg } else 1994428d7b3dSmrg#endif 1995428d7b3dSmrg { 1996428d7b3dSmrg tmp->prim_emit = emit_primitive_identity_source; 1997428d7b3dSmrg tmp->emit_boxes = emit_boxes_identity_source; 1998428d7b3dSmrg } 1999428d7b3dSmrg tmp->floats_per_vertex = 3; 2000428d7b3dSmrg vb = 2; 2001428d7b3dSmrg } else if (tmp->src.is_affine) { 2002428d7b3dSmrg tmp->src.scale[0] /= tmp->src.transform->matrix[2][2]; 2003428d7b3dSmrg tmp->src.scale[1] /= tmp->src.transform->matrix[2][2]; 2004428d7b3dSmrg if (!sna_affine_transform_is_rotation(tmp->src.transform)) { 2005428d7b3dSmrg DBG(("%s: simple src, no mask\n", __FUNCTION__)); 2006428d7b3dSmrg#if defined(avx2) 2007428d7b3dSmrg if (sna->cpu_features & AVX2) { 2008428d7b3dSmrg tmp->prim_emit = emit_primitive_simple_source__avx2; 2009428d7b3dSmrg tmp->emit_boxes = emit_boxes_simple_source__avx2; 2010428d7b3dSmrg } else 2011428d7b3dSmrg#endif 2012428d7b3dSmrg#if defined(sse4_2) 2013428d7b3dSmrg if (sna->cpu_features & SSE4_2) { 2014428d7b3dSmrg tmp->prim_emit = emit_primitive_simple_source__sse4_2; 2015428d7b3dSmrg tmp->emit_boxes = emit_boxes_simple_source__sse4_2; 2016428d7b3dSmrg } else 2017428d7b3dSmrg#endif 2018428d7b3dSmrg { 2019428d7b3dSmrg tmp->prim_emit = emit_primitive_simple_source; 2020428d7b3dSmrg tmp->emit_boxes = emit_boxes_simple_source; 2021428d7b3dSmrg } 2022428d7b3dSmrg } else { 2023428d7b3dSmrg DBG(("%s: affine src, no mask\n", __FUNCTION__)); 2024428d7b3dSmrg tmp->prim_emit = emit_primitive_affine_source; 2025428d7b3dSmrg tmp->emit_boxes = emit_boxes_affine_source; 2026428d7b3dSmrg } 2027428d7b3dSmrg tmp->floats_per_vertex = 3; 2028428d7b3dSmrg vb = 2; 2029428d7b3dSmrg } else { 2030428d7b3dSmrg DBG(("%s: projective src, no mask\n", __FUNCTION__)); 2031428d7b3dSmrg assert(!tmp->src.is_solid); 2032428d7b3dSmrg tmp->prim_emit = emit_primitive; 2033428d7b3dSmrg tmp->emit_boxes = emit_boxes; 2034428d7b3dSmrg tmp->floats_per_vertex = 4; 2035428d7b3dSmrg vb = 3; 2036428d7b3dSmrg } 2037428d7b3dSmrg } 2038428d7b3dSmrg tmp->floats_per_rect = 3 * tmp->floats_per_vertex; 2039428d7b3dSmrg 2040428d7b3dSmrg return vb; 2041428d7b3dSmrg} 2042428d7b3dSmrg 2043428d7b3dSmrgsse2 force_inline static void 2044428d7b3dSmrgemit_span_vertex(struct sna *sna, 2045428d7b3dSmrg const struct sna_composite_spans_op *op, 2046428d7b3dSmrg int16_t x, int16_t y) 2047428d7b3dSmrg{ 2048428d7b3dSmrg OUT_VERTEX(x, y); 2049428d7b3dSmrg emit_texcoord(sna, &op->base.src, x, y); 2050428d7b3dSmrg} 2051428d7b3dSmrg 2052428d7b3dSmrgsse2 fastcall static void 2053428d7b3dSmrgemit_span_primitive(struct sna *sna, 2054428d7b3dSmrg const struct sna_composite_spans_op *op, 2055428d7b3dSmrg const BoxRec *box, 2056428d7b3dSmrg float opacity) 2057428d7b3dSmrg{ 2058428d7b3dSmrg emit_span_vertex(sna, op, box->x2, box->y2); 2059428d7b3dSmrg OUT_VERTEX_F(opacity); 2060428d7b3dSmrg 2061428d7b3dSmrg emit_span_vertex(sna, op, box->x1, box->y2); 2062428d7b3dSmrg OUT_VERTEX_F(opacity); 2063428d7b3dSmrg 2064428d7b3dSmrg emit_span_vertex(sna, op, box->x1, box->y1); 2065428d7b3dSmrg OUT_VERTEX_F(opacity); 2066428d7b3dSmrg} 2067428d7b3dSmrg 2068428d7b3dSmrgsse2 fastcall static void 2069428d7b3dSmrgemit_span_boxes(const struct sna_composite_spans_op *op, 2070428d7b3dSmrg const struct sna_opacity_box *b, int nbox, 2071428d7b3dSmrg float *v) 2072428d7b3dSmrg{ 2073428d7b3dSmrg do { 2074428d7b3dSmrg v = vemit_vertex(v, &op->base, b->box.x2, b->box.y2); 2075428d7b3dSmrg *v++ = b->alpha; 2076428d7b3dSmrg 2077428d7b3dSmrg v = vemit_vertex(v, &op->base, b->box.x1, b->box.y2); 2078428d7b3dSmrg *v++ = b->alpha; 2079428d7b3dSmrg 2080428d7b3dSmrg v = vemit_vertex(v, &op->base, b->box.x1, b->box.y1); 2081428d7b3dSmrg *v++ = b->alpha; 2082428d7b3dSmrg 2083428d7b3dSmrg b++; 2084428d7b3dSmrg } while (--nbox); 2085428d7b3dSmrg} 2086428d7b3dSmrg 2087428d7b3dSmrgsse2 fastcall static void 2088428d7b3dSmrgemit_span_solid(struct sna *sna, 2089428d7b3dSmrg const struct sna_composite_spans_op *op, 2090428d7b3dSmrg const BoxRec *box, 2091428d7b3dSmrg float opacity) 2092428d7b3dSmrg{ 2093428d7b3dSmrg float *v; 2094428d7b3dSmrg union { 2095428d7b3dSmrg struct sna_coordinate p; 2096428d7b3dSmrg float f; 2097428d7b3dSmrg } dst; 2098428d7b3dSmrg 2099428d7b3dSmrg assert(op->base.floats_per_rect == 9); 2100428d7b3dSmrg assert((sna->render.vertex_used % 3) == 0); 2101428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 2102428d7b3dSmrg sna->render.vertex_used += 3*3; 2103428d7b3dSmrg 2104428d7b3dSmrg dst.p.x = box->x2; 2105428d7b3dSmrg dst.p.y = box->y2; 2106428d7b3dSmrg v[0] = dst.f; 2107428d7b3dSmrg 2108428d7b3dSmrg dst.p.x = box->x1; 2109428d7b3dSmrg v[3] = dst.f; 2110428d7b3dSmrg 2111428d7b3dSmrg dst.p.y = box->y1; 2112428d7b3dSmrg v[6] = dst.f; 2113428d7b3dSmrg 2114428d7b3dSmrg v[7] = v[4] = v[1] = .5; 2115428d7b3dSmrg v[8] = v[5] = v[2] = opacity; 2116428d7b3dSmrg} 2117428d7b3dSmrg 2118428d7b3dSmrgsse2 fastcall static void 2119428d7b3dSmrgemit_span_boxes_solid(const struct sna_composite_spans_op *op, 2120428d7b3dSmrg const struct sna_opacity_box *b, 2121428d7b3dSmrg int nbox, float *v) 2122428d7b3dSmrg{ 2123428d7b3dSmrg do { 2124428d7b3dSmrg union { 2125428d7b3dSmrg struct sna_coordinate p; 2126428d7b3dSmrg float f; 2127428d7b3dSmrg } dst; 2128428d7b3dSmrg 2129428d7b3dSmrg dst.p.x = b->box.x2; 2130428d7b3dSmrg dst.p.y = b->box.y2; 2131428d7b3dSmrg v[0] = dst.f; 2132428d7b3dSmrg 2133428d7b3dSmrg dst.p.x = b->box.x1; 2134428d7b3dSmrg v[3] = dst.f; 2135428d7b3dSmrg 2136428d7b3dSmrg dst.p.y = b->box.y1; 2137428d7b3dSmrg v[6] = dst.f; 2138428d7b3dSmrg 2139428d7b3dSmrg v[7] = v[4] = v[1] = .5; 2140428d7b3dSmrg v[8] = v[5] = v[2] = b->alpha; 2141428d7b3dSmrg 2142428d7b3dSmrg v += 9; 2143428d7b3dSmrg b++; 2144428d7b3dSmrg } while (--nbox); 2145428d7b3dSmrg} 2146428d7b3dSmrg 2147428d7b3dSmrgsse2 fastcall static void 2148428d7b3dSmrgemit_span_identity(struct sna *sna, 2149428d7b3dSmrg const struct sna_composite_spans_op *op, 2150428d7b3dSmrg const BoxRec *box, 2151428d7b3dSmrg float opacity) 2152428d7b3dSmrg{ 2153428d7b3dSmrg float *v; 2154428d7b3dSmrg union { 2155428d7b3dSmrg struct sna_coordinate p; 2156428d7b3dSmrg float f; 2157428d7b3dSmrg } dst; 2158428d7b3dSmrg 2159428d7b3dSmrg float sx = op->base.src.scale[0]; 2160428d7b3dSmrg float sy = op->base.src.scale[1]; 2161428d7b3dSmrg int16_t tx = op->base.src.offset[0]; 2162428d7b3dSmrg int16_t ty = op->base.src.offset[1]; 2163428d7b3dSmrg 2164428d7b3dSmrg assert(op->base.floats_per_rect == 12); 2165428d7b3dSmrg assert((sna->render.vertex_used % 4) == 0); 2166428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 2167428d7b3dSmrg sna->render.vertex_used += 3*4; 2168428d7b3dSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 2169428d7b3dSmrg 2170428d7b3dSmrg dst.p.x = box->x2; 2171428d7b3dSmrg dst.p.y = box->y2; 2172428d7b3dSmrg v[0] = dst.f; 2173428d7b3dSmrg v[1] = (box->x2 + tx) * sx; 2174428d7b3dSmrg v[6] = v[2] = (box->y2 + ty) * sy; 2175428d7b3dSmrg 2176428d7b3dSmrg dst.p.x = box->x1; 2177428d7b3dSmrg v[4] = dst.f; 2178428d7b3dSmrg v[9] = v[5] = (box->x1 + tx) * sx; 2179428d7b3dSmrg 2180428d7b3dSmrg dst.p.y = box->y1; 2181428d7b3dSmrg v[8] = dst.f; 2182428d7b3dSmrg v[10] = (box->y1 + ty) * sy; 2183428d7b3dSmrg 2184428d7b3dSmrg v[11] = v[7] = v[3] = opacity; 2185428d7b3dSmrg} 2186428d7b3dSmrg 2187428d7b3dSmrgsse2 fastcall static void 2188428d7b3dSmrgemit_span_boxes_identity(const struct sna_composite_spans_op *op, 2189428d7b3dSmrg const struct sna_opacity_box *b, int nbox, 2190428d7b3dSmrg float *v) 2191428d7b3dSmrg{ 2192428d7b3dSmrg do { 2193428d7b3dSmrg union { 2194428d7b3dSmrg struct sna_coordinate p; 2195428d7b3dSmrg float f; 2196428d7b3dSmrg } dst; 2197428d7b3dSmrg 2198428d7b3dSmrg float sx = op->base.src.scale[0]; 2199428d7b3dSmrg float sy = op->base.src.scale[1]; 2200428d7b3dSmrg int16_t tx = op->base.src.offset[0]; 2201428d7b3dSmrg int16_t ty = op->base.src.offset[1]; 2202428d7b3dSmrg 2203428d7b3dSmrg dst.p.x = b->box.x2; 2204428d7b3dSmrg dst.p.y = b->box.y2; 2205428d7b3dSmrg v[0] = dst.f; 2206428d7b3dSmrg v[1] = (b->box.x2 + tx) * sx; 2207428d7b3dSmrg v[6] = v[2] = (b->box.y2 + ty) * sy; 2208428d7b3dSmrg 2209428d7b3dSmrg dst.p.x = b->box.x1; 2210428d7b3dSmrg v[4] = dst.f; 2211428d7b3dSmrg v[9] = v[5] = (b->box.x1 + tx) * sx; 2212428d7b3dSmrg 2213428d7b3dSmrg dst.p.y = b->box.y1; 2214428d7b3dSmrg v[8] = dst.f; 2215428d7b3dSmrg v[10] = (b->box.y1 + ty) * sy; 2216428d7b3dSmrg 2217428d7b3dSmrg v[11] = v[7] = v[3] = b->alpha; 2218428d7b3dSmrg 2219428d7b3dSmrg v += 12; 2220428d7b3dSmrg b++; 2221428d7b3dSmrg } while (--nbox); 2222428d7b3dSmrg} 2223428d7b3dSmrg 2224428d7b3dSmrgsse2 fastcall static void 2225428d7b3dSmrgemit_span_simple(struct sna *sna, 2226428d7b3dSmrg const struct sna_composite_spans_op *op, 2227428d7b3dSmrg const BoxRec *box, 2228428d7b3dSmrg float opacity) 2229428d7b3dSmrg{ 2230428d7b3dSmrg float *v; 2231428d7b3dSmrg union { 2232428d7b3dSmrg struct sna_coordinate p; 2233428d7b3dSmrg float f; 2234428d7b3dSmrg } dst; 2235428d7b3dSmrg 2236428d7b3dSmrg float xx = op->base.src.transform->matrix[0][0]; 2237428d7b3dSmrg float x0 = op->base.src.transform->matrix[0][2]; 2238428d7b3dSmrg float yy = op->base.src.transform->matrix[1][1]; 2239428d7b3dSmrg float y0 = op->base.src.transform->matrix[1][2]; 2240428d7b3dSmrg float sx = op->base.src.scale[0]; 2241428d7b3dSmrg float sy = op->base.src.scale[1]; 2242428d7b3dSmrg int16_t tx = op->base.src.offset[0]; 2243428d7b3dSmrg int16_t ty = op->base.src.offset[1]; 2244428d7b3dSmrg 2245428d7b3dSmrg assert(op->base.floats_per_rect == 12); 2246428d7b3dSmrg assert((sna->render.vertex_used % 4) == 0); 2247428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 2248428d7b3dSmrg sna->render.vertex_used += 3*4; 2249428d7b3dSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 2250428d7b3dSmrg 2251428d7b3dSmrg dst.p.x = box->x2; 2252428d7b3dSmrg dst.p.y = box->y2; 2253428d7b3dSmrg v[0] = dst.f; 2254428d7b3dSmrg v[1] = ((box->x2 + tx) * xx + x0) * sx; 2255428d7b3dSmrg v[6] = v[2] = ((box->y2 + ty) * yy + y0) * sy; 2256428d7b3dSmrg 2257428d7b3dSmrg dst.p.x = box->x1; 2258428d7b3dSmrg v[4] = dst.f; 2259428d7b3dSmrg v[9] = v[5] = ((box->x1 + tx) * xx + x0) * sx; 2260428d7b3dSmrg 2261428d7b3dSmrg dst.p.y = box->y1; 2262428d7b3dSmrg v[8] = dst.f; 2263428d7b3dSmrg v[10] = ((box->y1 + ty) * yy + y0) * sy; 2264428d7b3dSmrg 2265428d7b3dSmrg v[11] = v[7] = v[3] = opacity; 2266428d7b3dSmrg} 2267428d7b3dSmrg 2268428d7b3dSmrgsse2 fastcall static void 2269428d7b3dSmrgemit_span_boxes_simple(const struct sna_composite_spans_op *op, 2270428d7b3dSmrg const struct sna_opacity_box *b, int nbox, 2271428d7b3dSmrg float *v) 2272428d7b3dSmrg{ 2273428d7b3dSmrg float xx = op->base.src.transform->matrix[0][0]; 2274428d7b3dSmrg float x0 = op->base.src.transform->matrix[0][2]; 2275428d7b3dSmrg float yy = op->base.src.transform->matrix[1][1]; 2276428d7b3dSmrg float y0 = op->base.src.transform->matrix[1][2]; 2277428d7b3dSmrg float sx = op->base.src.scale[0]; 2278428d7b3dSmrg float sy = op->base.src.scale[1]; 2279428d7b3dSmrg int16_t tx = op->base.src.offset[0]; 2280428d7b3dSmrg int16_t ty = op->base.src.offset[1]; 2281428d7b3dSmrg 2282428d7b3dSmrg do { 2283428d7b3dSmrg union { 2284428d7b3dSmrg struct sna_coordinate p; 2285428d7b3dSmrg float f; 2286428d7b3dSmrg } dst; 2287428d7b3dSmrg 2288428d7b3dSmrg dst.p.x = b->box.x2; 2289428d7b3dSmrg dst.p.y = b->box.y2; 2290428d7b3dSmrg v[0] = dst.f; 2291428d7b3dSmrg v[1] = ((b->box.x2 + tx) * xx + x0) * sx; 2292428d7b3dSmrg v[6] = v[2] = ((b->box.y2 + ty) * yy + y0) * sy; 2293428d7b3dSmrg 2294428d7b3dSmrg dst.p.x = b->box.x1; 2295428d7b3dSmrg v[4] = dst.f; 2296428d7b3dSmrg v[9] = v[5] = ((b->box.x1 + tx) * xx + x0) * sx; 2297428d7b3dSmrg 2298428d7b3dSmrg dst.p.y = b->box.y1; 2299428d7b3dSmrg v[8] = dst.f; 2300428d7b3dSmrg v[10] = ((b->box.y1 + ty) * yy + y0) * sy; 2301428d7b3dSmrg 2302428d7b3dSmrg v[11] = v[7] = v[3] = b->alpha; 2303428d7b3dSmrg 2304428d7b3dSmrg v += 12; 2305428d7b3dSmrg b++; 2306428d7b3dSmrg } while (--nbox); 2307428d7b3dSmrg} 2308428d7b3dSmrg 2309428d7b3dSmrgsse2 fastcall static void 2310428d7b3dSmrgemit_span_affine(struct sna *sna, 2311428d7b3dSmrg const struct sna_composite_spans_op *op, 2312428d7b3dSmrg const BoxRec *box, 2313428d7b3dSmrg float opacity) 2314428d7b3dSmrg{ 2315428d7b3dSmrg union { 2316428d7b3dSmrg struct sna_coordinate p; 2317428d7b3dSmrg float f; 2318428d7b3dSmrg } dst; 2319428d7b3dSmrg float *v; 2320428d7b3dSmrg 2321428d7b3dSmrg assert(op->base.floats_per_rect == 12); 2322428d7b3dSmrg assert((sna->render.vertex_used % 4) == 0); 2323428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 2324428d7b3dSmrg sna->render.vertex_used += 12; 2325428d7b3dSmrg 2326428d7b3dSmrg dst.p.x = box->x2; 2327428d7b3dSmrg dst.p.y = box->y2; 2328428d7b3dSmrg v[0] = dst.f; 2329428d7b3dSmrg _sna_get_transformed_scaled(op->base.src.offset[0] + box->x2, 2330428d7b3dSmrg op->base.src.offset[1] + box->y2, 2331428d7b3dSmrg op->base.src.transform, 2332428d7b3dSmrg op->base.src.scale, 2333428d7b3dSmrg &v[1], &v[2]); 2334428d7b3dSmrg 2335428d7b3dSmrg dst.p.x = box->x1; 2336428d7b3dSmrg v[4] = dst.f; 2337428d7b3dSmrg _sna_get_transformed_scaled(op->base.src.offset[0] + box->x1, 2338428d7b3dSmrg op->base.src.offset[1] + box->y2, 2339428d7b3dSmrg op->base.src.transform, 2340428d7b3dSmrg op->base.src.scale, 2341428d7b3dSmrg &v[5], &v[6]); 2342428d7b3dSmrg 2343428d7b3dSmrg dst.p.y = box->y1; 2344428d7b3dSmrg v[8] = dst.f; 2345428d7b3dSmrg _sna_get_transformed_scaled(op->base.src.offset[0] + box->x1, 2346428d7b3dSmrg op->base.src.offset[1] + box->y1, 2347428d7b3dSmrg op->base.src.transform, 2348428d7b3dSmrg op->base.src.scale, 2349428d7b3dSmrg &v[9], &v[10]); 2350428d7b3dSmrg 2351428d7b3dSmrg v[11] = v[7] = v[3] = opacity; 2352428d7b3dSmrg} 2353428d7b3dSmrg 2354428d7b3dSmrgsse2 fastcall static void 2355428d7b3dSmrgemit_span_boxes_affine(const struct sna_composite_spans_op *op, 2356428d7b3dSmrg const struct sna_opacity_box *b, int nbox, 2357428d7b3dSmrg float *v) 2358428d7b3dSmrg{ 2359428d7b3dSmrg do { 2360428d7b3dSmrg union { 2361428d7b3dSmrg struct sna_coordinate p; 2362428d7b3dSmrg float f; 2363428d7b3dSmrg } dst; 2364428d7b3dSmrg 2365428d7b3dSmrg dst.p.x = b->box.x2; 2366428d7b3dSmrg dst.p.y = b->box.y2; 2367428d7b3dSmrg v[0] = dst.f; 2368428d7b3dSmrg _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x2, 2369428d7b3dSmrg op->base.src.offset[1] + b->box.y2, 2370428d7b3dSmrg op->base.src.transform, 2371428d7b3dSmrg op->base.src.scale, 2372428d7b3dSmrg &v[1], &v[2]); 2373428d7b3dSmrg 2374428d7b3dSmrg dst.p.x = b->box.x1; 2375428d7b3dSmrg v[4] = dst.f; 2376428d7b3dSmrg _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1, 2377428d7b3dSmrg op->base.src.offset[1] + b->box.y2, 2378428d7b3dSmrg op->base.src.transform, 2379428d7b3dSmrg op->base.src.scale, 2380428d7b3dSmrg &v[5], &v[6]); 2381428d7b3dSmrg 2382428d7b3dSmrg dst.p.y = b->box.y1; 2383428d7b3dSmrg v[8] = dst.f; 2384428d7b3dSmrg _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1, 2385428d7b3dSmrg op->base.src.offset[1] + b->box.y1, 2386428d7b3dSmrg op->base.src.transform, 2387428d7b3dSmrg op->base.src.scale, 2388428d7b3dSmrg &v[9], &v[10]); 2389428d7b3dSmrg 2390428d7b3dSmrg v[11] = v[7] = v[3] = b->alpha; 2391428d7b3dSmrg 2392428d7b3dSmrg v += 12; 2393428d7b3dSmrg b++; 2394428d7b3dSmrg } while (--nbox); 2395428d7b3dSmrg} 2396428d7b3dSmrg 2397428d7b3dSmrgsse2 fastcall static void 2398428d7b3dSmrgemit_span_linear(struct sna *sna, 2399428d7b3dSmrg const struct sna_composite_spans_op *op, 2400428d7b3dSmrg const BoxRec *box, 2401428d7b3dSmrg float opacity) 2402428d7b3dSmrg{ 2403428d7b3dSmrg union { 2404428d7b3dSmrg struct sna_coordinate p; 2405428d7b3dSmrg float f; 2406428d7b3dSmrg } dst; 2407428d7b3dSmrg float *v; 2408428d7b3dSmrg 2409428d7b3dSmrg assert(op->base.floats_per_rect == 9); 2410428d7b3dSmrg assert((sna->render.vertex_used % 3) == 0); 2411428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 2412428d7b3dSmrg sna->render.vertex_used += 9; 2413428d7b3dSmrg 2414428d7b3dSmrg dst.p.x = box->x2; 2415428d7b3dSmrg dst.p.y = box->y2; 2416428d7b3dSmrg v[0] = dst.f; 2417428d7b3dSmrg dst.p.x = box->x1; 2418428d7b3dSmrg v[3] = dst.f; 2419428d7b3dSmrg dst.p.y = box->y1; 2420428d7b3dSmrg v[6] = dst.f; 2421428d7b3dSmrg 2422428d7b3dSmrg v[1] = compute_linear(&op->base.src, box->x2, box->y2); 2423428d7b3dSmrg v[4] = compute_linear(&op->base.src, box->x1, box->y2); 2424428d7b3dSmrg v[7] = compute_linear(&op->base.src, box->x1, box->y1); 2425428d7b3dSmrg 2426428d7b3dSmrg v[8] = v[5] = v[2] = opacity; 2427428d7b3dSmrg} 2428428d7b3dSmrg 2429428d7b3dSmrgsse2 fastcall static void 2430428d7b3dSmrgemit_span_boxes_linear(const struct sna_composite_spans_op *op, 2431428d7b3dSmrg const struct sna_opacity_box *b, int nbox, 2432428d7b3dSmrg float *v) 2433428d7b3dSmrg{ 2434428d7b3dSmrg do { 2435428d7b3dSmrg union { 2436428d7b3dSmrg struct sna_coordinate p; 2437428d7b3dSmrg float f; 2438428d7b3dSmrg } dst; 2439428d7b3dSmrg 2440428d7b3dSmrg dst.p.x = b->box.x2; 2441428d7b3dSmrg dst.p.y = b->box.y2; 2442428d7b3dSmrg v[0] = dst.f; 2443428d7b3dSmrg dst.p.x = b->box.x1; 2444428d7b3dSmrg v[3] = dst.f; 2445428d7b3dSmrg dst.p.y = b->box.y1; 2446428d7b3dSmrg v[6] = dst.f; 2447428d7b3dSmrg 2448428d7b3dSmrg v[1] = compute_linear(&op->base.src, b->box.x2, b->box.y2); 2449428d7b3dSmrg v[4] = compute_linear(&op->base.src, b->box.x1, b->box.y2); 2450428d7b3dSmrg v[7] = compute_linear(&op->base.src, b->box.x1, b->box.y1); 2451428d7b3dSmrg 2452428d7b3dSmrg v[8] = v[5] = v[2] = b->alpha; 2453428d7b3dSmrg 2454428d7b3dSmrg v += 9; 2455428d7b3dSmrg b++; 2456428d7b3dSmrg } while (--nbox); 2457428d7b3dSmrg} 2458428d7b3dSmrg 2459428d7b3dSmrg/* SSE4_2 */ 2460428d7b3dSmrg#if defined(sse4_2) 2461428d7b3dSmrg 2462428d7b3dSmrgsse4_2 fastcall static void 2463428d7b3dSmrgemit_span_identity__sse4_2(struct sna *sna, 2464428d7b3dSmrg const struct sna_composite_spans_op *op, 2465428d7b3dSmrg const BoxRec *box, 2466428d7b3dSmrg float opacity) 2467428d7b3dSmrg{ 2468428d7b3dSmrg float *v; 2469428d7b3dSmrg union { 2470428d7b3dSmrg struct sna_coordinate p; 2471428d7b3dSmrg float f; 2472428d7b3dSmrg } dst; 2473428d7b3dSmrg 2474428d7b3dSmrg float sx = op->base.src.scale[0]; 2475428d7b3dSmrg float sy = op->base.src.scale[1]; 2476428d7b3dSmrg int16_t tx = op->base.src.offset[0]; 2477428d7b3dSmrg int16_t ty = op->base.src.offset[1]; 2478428d7b3dSmrg 2479428d7b3dSmrg assert(op->base.floats_per_rect == 12); 2480428d7b3dSmrg assert((sna->render.vertex_used % 4) == 0); 2481428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 2482428d7b3dSmrg sna->render.vertex_used += 3*4; 2483428d7b3dSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 2484428d7b3dSmrg 2485428d7b3dSmrg dst.p.x = box->x2; 2486428d7b3dSmrg dst.p.y = box->y2; 2487428d7b3dSmrg v[0] = dst.f; 2488428d7b3dSmrg v[1] = (box->x2 + tx) * sx; 2489428d7b3dSmrg v[6] = v[2] = (box->y2 + ty) * sy; 2490428d7b3dSmrg 2491428d7b3dSmrg dst.p.x = box->x1; 2492428d7b3dSmrg v[4] = dst.f; 2493428d7b3dSmrg v[9] = v[5] = (box->x1 + tx) * sx; 2494428d7b3dSmrg 2495428d7b3dSmrg dst.p.y = box->y1; 2496428d7b3dSmrg v[8] = dst.f; 2497428d7b3dSmrg v[10] = (box->y1 + ty) * sy; 2498428d7b3dSmrg 2499428d7b3dSmrg v[11] = v[7] = v[3] = opacity; 2500428d7b3dSmrg} 2501428d7b3dSmrg 2502428d7b3dSmrgsse4_2 fastcall static void 2503428d7b3dSmrgemit_span_boxes_identity__sse4_2(const struct sna_composite_spans_op *op, 2504428d7b3dSmrg const struct sna_opacity_box *b, int nbox, 2505428d7b3dSmrg float *v) 2506428d7b3dSmrg{ 2507428d7b3dSmrg do { 2508428d7b3dSmrg union { 2509428d7b3dSmrg struct sna_coordinate p; 2510428d7b3dSmrg float f; 2511428d7b3dSmrg } dst; 2512428d7b3dSmrg 2513428d7b3dSmrg float sx = op->base.src.scale[0]; 2514428d7b3dSmrg float sy = op->base.src.scale[1]; 2515428d7b3dSmrg int16_t tx = op->base.src.offset[0]; 2516428d7b3dSmrg int16_t ty = op->base.src.offset[1]; 2517428d7b3dSmrg 2518428d7b3dSmrg dst.p.x = b->box.x2; 2519428d7b3dSmrg dst.p.y = b->box.y2; 2520428d7b3dSmrg v[0] = dst.f; 2521428d7b3dSmrg v[1] = (b->box.x2 + tx) * sx; 2522428d7b3dSmrg v[6] = v[2] = (b->box.y2 + ty) * sy; 2523428d7b3dSmrg 2524428d7b3dSmrg dst.p.x = b->box.x1; 2525428d7b3dSmrg v[4] = dst.f; 2526428d7b3dSmrg v[9] = v[5] = (b->box.x1 + tx) * sx; 2527428d7b3dSmrg 2528428d7b3dSmrg dst.p.y = b->box.y1; 2529428d7b3dSmrg v[8] = dst.f; 2530428d7b3dSmrg v[10] = (b->box.y1 + ty) * sy; 2531428d7b3dSmrg 2532428d7b3dSmrg v[11] = v[7] = v[3] = b->alpha; 2533428d7b3dSmrg 2534428d7b3dSmrg v += 12; 2535428d7b3dSmrg b++; 2536428d7b3dSmrg } while (--nbox); 2537428d7b3dSmrg} 2538428d7b3dSmrg 2539428d7b3dSmrgsse4_2 fastcall static void 2540428d7b3dSmrgemit_span_simple__sse4_2(struct sna *sna, 2541428d7b3dSmrg const struct sna_composite_spans_op *op, 2542428d7b3dSmrg const BoxRec *box, 2543428d7b3dSmrg float opacity) 2544428d7b3dSmrg{ 2545428d7b3dSmrg float *v; 2546428d7b3dSmrg union { 2547428d7b3dSmrg struct sna_coordinate p; 2548428d7b3dSmrg float f; 2549428d7b3dSmrg } dst; 2550428d7b3dSmrg 2551428d7b3dSmrg float xx = op->base.src.transform->matrix[0][0]; 2552428d7b3dSmrg float x0 = op->base.src.transform->matrix[0][2]; 2553428d7b3dSmrg float yy = op->base.src.transform->matrix[1][1]; 2554428d7b3dSmrg float y0 = op->base.src.transform->matrix[1][2]; 2555428d7b3dSmrg float sx = op->base.src.scale[0]; 2556428d7b3dSmrg float sy = op->base.src.scale[1]; 2557428d7b3dSmrg int16_t tx = op->base.src.offset[0]; 2558428d7b3dSmrg int16_t ty = op->base.src.offset[1]; 2559428d7b3dSmrg 2560428d7b3dSmrg assert(op->base.floats_per_rect == 12); 2561428d7b3dSmrg assert((sna->render.vertex_used % 4) == 0); 2562428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 2563428d7b3dSmrg sna->render.vertex_used += 3*4; 2564428d7b3dSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 2565428d7b3dSmrg 2566428d7b3dSmrg dst.p.x = box->x2; 2567428d7b3dSmrg dst.p.y = box->y2; 2568428d7b3dSmrg v[0] = dst.f; 2569428d7b3dSmrg v[1] = ((box->x2 + tx) * xx + x0) * sx; 2570428d7b3dSmrg v[6] = v[2] = ((box->y2 + ty) * yy + y0) * sy; 2571428d7b3dSmrg 2572428d7b3dSmrg dst.p.x = box->x1; 2573428d7b3dSmrg v[4] = dst.f; 2574428d7b3dSmrg v[9] = v[5] = ((box->x1 + tx) * xx + x0) * sx; 2575428d7b3dSmrg 2576428d7b3dSmrg dst.p.y = box->y1; 2577428d7b3dSmrg v[8] = dst.f; 2578428d7b3dSmrg v[10] = ((box->y1 + ty) * yy + y0) * sy; 2579428d7b3dSmrg 2580428d7b3dSmrg v[11] = v[7] = v[3] = opacity; 2581428d7b3dSmrg} 2582428d7b3dSmrg 2583428d7b3dSmrgsse4_2 fastcall static void 2584428d7b3dSmrgemit_span_boxes_simple__sse4_2(const struct sna_composite_spans_op *op, 2585428d7b3dSmrg const struct sna_opacity_box *b, int nbox, 2586428d7b3dSmrg float *v) 2587428d7b3dSmrg{ 2588428d7b3dSmrg float xx = op->base.src.transform->matrix[0][0]; 2589428d7b3dSmrg float x0 = op->base.src.transform->matrix[0][2]; 2590428d7b3dSmrg float yy = op->base.src.transform->matrix[1][1]; 2591428d7b3dSmrg float y0 = op->base.src.transform->matrix[1][2]; 2592428d7b3dSmrg float sx = op->base.src.scale[0]; 2593428d7b3dSmrg float sy = op->base.src.scale[1]; 2594428d7b3dSmrg int16_t tx = op->base.src.offset[0]; 2595428d7b3dSmrg int16_t ty = op->base.src.offset[1]; 2596428d7b3dSmrg 2597428d7b3dSmrg do { 2598428d7b3dSmrg union { 2599428d7b3dSmrg struct sna_coordinate p; 2600428d7b3dSmrg float f; 2601428d7b3dSmrg } dst; 2602428d7b3dSmrg 2603428d7b3dSmrg dst.p.x = b->box.x2; 2604428d7b3dSmrg dst.p.y = b->box.y2; 2605428d7b3dSmrg v[0] = dst.f; 2606428d7b3dSmrg v[1] = ((b->box.x2 + tx) * xx + x0) * sx; 2607428d7b3dSmrg v[6] = v[2] = ((b->box.y2 + ty) * yy + y0) * sy; 2608428d7b3dSmrg 2609428d7b3dSmrg dst.p.x = b->box.x1; 2610428d7b3dSmrg v[4] = dst.f; 2611428d7b3dSmrg v[9] = v[5] = ((b->box.x1 + tx) * xx + x0) * sx; 2612428d7b3dSmrg 2613428d7b3dSmrg dst.p.y = b->box.y1; 2614428d7b3dSmrg v[8] = dst.f; 2615428d7b3dSmrg v[10] = ((b->box.y1 + ty) * yy + y0) * sy; 2616428d7b3dSmrg 2617428d7b3dSmrg v[11] = v[7] = v[3] = b->alpha; 2618428d7b3dSmrg 2619428d7b3dSmrg v += 12; 2620428d7b3dSmrg b++; 2621428d7b3dSmrg } while (--nbox); 2622428d7b3dSmrg} 2623428d7b3dSmrg 2624428d7b3dSmrgsse4_2 fastcall static void 2625428d7b3dSmrgemit_span_affine__sse4_2(struct sna *sna, 2626428d7b3dSmrg const struct sna_composite_spans_op *op, 2627428d7b3dSmrg const BoxRec *box, 2628428d7b3dSmrg float opacity) 2629428d7b3dSmrg{ 2630428d7b3dSmrg union { 2631428d7b3dSmrg struct sna_coordinate p; 2632428d7b3dSmrg float f; 2633428d7b3dSmrg } dst; 2634428d7b3dSmrg float *v; 2635428d7b3dSmrg 2636428d7b3dSmrg assert(op->base.floats_per_rect == 12); 2637428d7b3dSmrg assert((sna->render.vertex_used % 4) == 0); 2638428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 2639428d7b3dSmrg sna->render.vertex_used += 12; 2640428d7b3dSmrg 2641428d7b3dSmrg dst.p.x = box->x2; 2642428d7b3dSmrg dst.p.y = box->y2; 2643428d7b3dSmrg v[0] = dst.f; 2644428d7b3dSmrg _sna_get_transformed_scaled(op->base.src.offset[0] + box->x2, 2645428d7b3dSmrg op->base.src.offset[1] + box->y2, 2646428d7b3dSmrg op->base.src.transform, 2647428d7b3dSmrg op->base.src.scale, 2648428d7b3dSmrg &v[1], &v[2]); 2649428d7b3dSmrg 2650428d7b3dSmrg dst.p.x = box->x1; 2651428d7b3dSmrg v[4] = dst.f; 2652428d7b3dSmrg _sna_get_transformed_scaled(op->base.src.offset[0] + box->x1, 2653428d7b3dSmrg op->base.src.offset[1] + box->y2, 2654428d7b3dSmrg op->base.src.transform, 2655428d7b3dSmrg op->base.src.scale, 2656428d7b3dSmrg &v[5], &v[6]); 2657428d7b3dSmrg 2658428d7b3dSmrg dst.p.y = box->y1; 2659428d7b3dSmrg v[8] = dst.f; 2660428d7b3dSmrg _sna_get_transformed_scaled(op->base.src.offset[0] + box->x1, 2661428d7b3dSmrg op->base.src.offset[1] + box->y1, 2662428d7b3dSmrg op->base.src.transform, 2663428d7b3dSmrg op->base.src.scale, 2664428d7b3dSmrg &v[9], &v[10]); 2665428d7b3dSmrg 2666428d7b3dSmrg v[11] = v[7] = v[3] = opacity; 2667428d7b3dSmrg} 2668428d7b3dSmrg 2669428d7b3dSmrgsse4_2 fastcall static void 2670428d7b3dSmrgemit_span_boxes_affine__sse4_2(const struct sna_composite_spans_op *op, 2671428d7b3dSmrg const struct sna_opacity_box *b, int nbox, 2672428d7b3dSmrg float *v) 2673428d7b3dSmrg{ 2674428d7b3dSmrg do { 2675428d7b3dSmrg union { 2676428d7b3dSmrg struct sna_coordinate p; 2677428d7b3dSmrg float f; 2678428d7b3dSmrg } dst; 2679428d7b3dSmrg 2680428d7b3dSmrg dst.p.x = b->box.x2; 2681428d7b3dSmrg dst.p.y = b->box.y2; 2682428d7b3dSmrg v[0] = dst.f; 2683428d7b3dSmrg _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x2, 2684428d7b3dSmrg op->base.src.offset[1] + b->box.y2, 2685428d7b3dSmrg op->base.src.transform, 2686428d7b3dSmrg op->base.src.scale, 2687428d7b3dSmrg &v[1], &v[2]); 2688428d7b3dSmrg 2689428d7b3dSmrg dst.p.x = b->box.x1; 2690428d7b3dSmrg v[4] = dst.f; 2691428d7b3dSmrg _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1, 2692428d7b3dSmrg op->base.src.offset[1] + b->box.y2, 2693428d7b3dSmrg op->base.src.transform, 2694428d7b3dSmrg op->base.src.scale, 2695428d7b3dSmrg &v[5], &v[6]); 2696428d7b3dSmrg 2697428d7b3dSmrg dst.p.y = b->box.y1; 2698428d7b3dSmrg v[8] = dst.f; 2699428d7b3dSmrg _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1, 2700428d7b3dSmrg op->base.src.offset[1] + b->box.y1, 2701428d7b3dSmrg op->base.src.transform, 2702428d7b3dSmrg op->base.src.scale, 2703428d7b3dSmrg &v[9], &v[10]); 2704428d7b3dSmrg 2705428d7b3dSmrg v[11] = v[7] = v[3] = b->alpha; 2706428d7b3dSmrg 2707428d7b3dSmrg v += 12; 2708428d7b3dSmrg b++; 2709428d7b3dSmrg } while (--nbox); 2710428d7b3dSmrg} 2711428d7b3dSmrg 2712428d7b3dSmrgsse4_2 fastcall static void 2713428d7b3dSmrgemit_span_linear__sse4_2(struct sna *sna, 2714428d7b3dSmrg const struct sna_composite_spans_op *op, 2715428d7b3dSmrg const BoxRec *box, 2716428d7b3dSmrg float opacity) 2717428d7b3dSmrg{ 2718428d7b3dSmrg union { 2719428d7b3dSmrg struct sna_coordinate p; 2720428d7b3dSmrg float f; 2721428d7b3dSmrg } dst; 2722428d7b3dSmrg float *v; 2723428d7b3dSmrg 2724428d7b3dSmrg assert(op->base.floats_per_rect == 9); 2725428d7b3dSmrg assert((sna->render.vertex_used % 3) == 0); 2726428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 2727428d7b3dSmrg sna->render.vertex_used += 9; 2728428d7b3dSmrg 2729428d7b3dSmrg dst.p.x = box->x2; 2730428d7b3dSmrg dst.p.y = box->y2; 2731428d7b3dSmrg v[0] = dst.f; 2732428d7b3dSmrg dst.p.x = box->x1; 2733428d7b3dSmrg v[3] = dst.f; 2734428d7b3dSmrg dst.p.y = box->y1; 2735428d7b3dSmrg v[6] = dst.f; 2736428d7b3dSmrg 2737428d7b3dSmrg v[1] = compute_linear(&op->base.src, box->x2, box->y2); 2738428d7b3dSmrg v[4] = compute_linear(&op->base.src, box->x1, box->y2); 2739428d7b3dSmrg v[7] = compute_linear(&op->base.src, box->x1, box->y1); 2740428d7b3dSmrg 2741428d7b3dSmrg v[8] = v[5] = v[2] = opacity; 2742428d7b3dSmrg} 2743428d7b3dSmrg 2744428d7b3dSmrgsse4_2 fastcall static void 2745428d7b3dSmrgemit_span_boxes_linear__sse4_2(const struct sna_composite_spans_op *op, 2746428d7b3dSmrg const struct sna_opacity_box *b, int nbox, 2747428d7b3dSmrg float *v) 2748428d7b3dSmrg{ 2749428d7b3dSmrg do { 2750428d7b3dSmrg union { 2751428d7b3dSmrg struct sna_coordinate p; 2752428d7b3dSmrg float f; 2753428d7b3dSmrg } dst; 2754428d7b3dSmrg 2755428d7b3dSmrg dst.p.x = b->box.x2; 2756428d7b3dSmrg dst.p.y = b->box.y2; 2757428d7b3dSmrg v[0] = dst.f; 2758428d7b3dSmrg dst.p.x = b->box.x1; 2759428d7b3dSmrg v[3] = dst.f; 2760428d7b3dSmrg dst.p.y = b->box.y1; 2761428d7b3dSmrg v[6] = dst.f; 2762428d7b3dSmrg 2763428d7b3dSmrg v[1] = compute_linear(&op->base.src, b->box.x2, b->box.y2); 2764428d7b3dSmrg v[4] = compute_linear(&op->base.src, b->box.x1, b->box.y2); 2765428d7b3dSmrg v[7] = compute_linear(&op->base.src, b->box.x1, b->box.y1); 2766428d7b3dSmrg 2767428d7b3dSmrg v[8] = v[5] = v[2] = b->alpha; 2768428d7b3dSmrg 2769428d7b3dSmrg v += 9; 2770428d7b3dSmrg b++; 2771428d7b3dSmrg } while (--nbox); 2772428d7b3dSmrg} 2773428d7b3dSmrg 2774428d7b3dSmrg#endif 2775428d7b3dSmrg 2776428d7b3dSmrg/* AVX2 */ 2777428d7b3dSmrg#if defined(avx2) 2778428d7b3dSmrg 2779428d7b3dSmrgavx2 fastcall static void 2780428d7b3dSmrgemit_span_identity__avx2(struct sna *sna, 2781428d7b3dSmrg const struct sna_composite_spans_op *op, 2782428d7b3dSmrg const BoxRec *box, 2783428d7b3dSmrg float opacity) 2784428d7b3dSmrg{ 2785428d7b3dSmrg float *v; 2786428d7b3dSmrg union { 2787428d7b3dSmrg struct sna_coordinate p; 2788428d7b3dSmrg float f; 2789428d7b3dSmrg } dst; 2790428d7b3dSmrg 2791428d7b3dSmrg float sx = op->base.src.scale[0]; 2792428d7b3dSmrg float sy = op->base.src.scale[1]; 2793428d7b3dSmrg int16_t tx = op->base.src.offset[0]; 2794428d7b3dSmrg int16_t ty = op->base.src.offset[1]; 2795428d7b3dSmrg 2796428d7b3dSmrg assert(op->base.floats_per_rect == 12); 2797428d7b3dSmrg assert((sna->render.vertex_used % 4) == 0); 2798428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 2799428d7b3dSmrg sna->render.vertex_used += 3*4; 2800428d7b3dSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 2801428d7b3dSmrg 2802428d7b3dSmrg dst.p.x = box->x2; 2803428d7b3dSmrg dst.p.y = box->y2; 2804428d7b3dSmrg v[0] = dst.f; 2805428d7b3dSmrg v[1] = (box->x2 + tx) * sx; 2806428d7b3dSmrg v[6] = v[2] = (box->y2 + ty) * sy; 2807428d7b3dSmrg 2808428d7b3dSmrg dst.p.x = box->x1; 2809428d7b3dSmrg v[4] = dst.f; 2810428d7b3dSmrg v[9] = v[5] = (box->x1 + tx) * sx; 2811428d7b3dSmrg 2812428d7b3dSmrg dst.p.y = box->y1; 2813428d7b3dSmrg v[8] = dst.f; 2814428d7b3dSmrg v[10] = (box->y1 + ty) * sy; 2815428d7b3dSmrg 2816428d7b3dSmrg v[11] = v[7] = v[3] = opacity; 2817428d7b3dSmrg} 2818428d7b3dSmrg 2819428d7b3dSmrgavx2 fastcall static void 2820428d7b3dSmrgemit_span_boxes_identity__avx2(const struct sna_composite_spans_op *op, 2821428d7b3dSmrg const struct sna_opacity_box *b, int nbox, 2822428d7b3dSmrg float *v) 2823428d7b3dSmrg{ 2824428d7b3dSmrg do { 2825428d7b3dSmrg union { 2826428d7b3dSmrg struct sna_coordinate p; 2827428d7b3dSmrg float f; 2828428d7b3dSmrg } dst; 2829428d7b3dSmrg 2830428d7b3dSmrg float sx = op->base.src.scale[0]; 2831428d7b3dSmrg float sy = op->base.src.scale[1]; 2832428d7b3dSmrg int16_t tx = op->base.src.offset[0]; 2833428d7b3dSmrg int16_t ty = op->base.src.offset[1]; 2834428d7b3dSmrg 2835428d7b3dSmrg dst.p.x = b->box.x2; 2836428d7b3dSmrg dst.p.y = b->box.y2; 2837428d7b3dSmrg v[0] = dst.f; 2838428d7b3dSmrg v[1] = (b->box.x2 + tx) * sx; 2839428d7b3dSmrg v[6] = v[2] = (b->box.y2 + ty) * sy; 2840428d7b3dSmrg 2841428d7b3dSmrg dst.p.x = b->box.x1; 2842428d7b3dSmrg v[4] = dst.f; 2843428d7b3dSmrg v[9] = v[5] = (b->box.x1 + tx) * sx; 2844428d7b3dSmrg 2845428d7b3dSmrg dst.p.y = b->box.y1; 2846428d7b3dSmrg v[8] = dst.f; 2847428d7b3dSmrg v[10] = (b->box.y1 + ty) * sy; 2848428d7b3dSmrg 2849428d7b3dSmrg v[11] = v[7] = v[3] = b->alpha; 2850428d7b3dSmrg 2851428d7b3dSmrg v += 12; 2852428d7b3dSmrg b++; 2853428d7b3dSmrg } while (--nbox); 2854428d7b3dSmrg} 2855428d7b3dSmrg 2856428d7b3dSmrgavx2 fastcall static void 2857428d7b3dSmrgemit_span_simple__avx2(struct sna *sna, 2858428d7b3dSmrg const struct sna_composite_spans_op *op, 2859428d7b3dSmrg const BoxRec *box, 2860428d7b3dSmrg float opacity) 2861428d7b3dSmrg{ 2862428d7b3dSmrg float *v; 2863428d7b3dSmrg union { 2864428d7b3dSmrg struct sna_coordinate p; 2865428d7b3dSmrg float f; 2866428d7b3dSmrg } dst; 2867428d7b3dSmrg 2868428d7b3dSmrg float xx = op->base.src.transform->matrix[0][0]; 2869428d7b3dSmrg float x0 = op->base.src.transform->matrix[0][2]; 2870428d7b3dSmrg float yy = op->base.src.transform->matrix[1][1]; 2871428d7b3dSmrg float y0 = op->base.src.transform->matrix[1][2]; 2872428d7b3dSmrg float sx = op->base.src.scale[0]; 2873428d7b3dSmrg float sy = op->base.src.scale[1]; 2874428d7b3dSmrg int16_t tx = op->base.src.offset[0]; 2875428d7b3dSmrg int16_t ty = op->base.src.offset[1]; 2876428d7b3dSmrg 2877428d7b3dSmrg assert(op->base.floats_per_rect == 12); 2878428d7b3dSmrg assert((sna->render.vertex_used % 4) == 0); 2879428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 2880428d7b3dSmrg sna->render.vertex_used += 3*4; 2881428d7b3dSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 2882428d7b3dSmrg 2883428d7b3dSmrg dst.p.x = box->x2; 2884428d7b3dSmrg dst.p.y = box->y2; 2885428d7b3dSmrg v[0] = dst.f; 2886428d7b3dSmrg v[1] = ((box->x2 + tx) * xx + x0) * sx; 2887428d7b3dSmrg v[6] = v[2] = ((box->y2 + ty) * yy + y0) * sy; 2888428d7b3dSmrg 2889428d7b3dSmrg dst.p.x = box->x1; 2890428d7b3dSmrg v[4] = dst.f; 2891428d7b3dSmrg v[9] = v[5] = ((box->x1 + tx) * xx + x0) * sx; 2892428d7b3dSmrg 2893428d7b3dSmrg dst.p.y = box->y1; 2894428d7b3dSmrg v[8] = dst.f; 2895428d7b3dSmrg v[10] = ((box->y1 + ty) * yy + y0) * sy; 2896428d7b3dSmrg 2897428d7b3dSmrg v[11] = v[7] = v[3] = opacity; 2898428d7b3dSmrg} 2899428d7b3dSmrg 2900428d7b3dSmrgavx2 fastcall static void 2901428d7b3dSmrgemit_span_boxes_simple__avx2(const struct sna_composite_spans_op *op, 2902428d7b3dSmrg const struct sna_opacity_box *b, int nbox, 2903428d7b3dSmrg float *v) 2904428d7b3dSmrg{ 2905428d7b3dSmrg float xx = op->base.src.transform->matrix[0][0]; 2906428d7b3dSmrg float x0 = op->base.src.transform->matrix[0][2]; 2907428d7b3dSmrg float yy = op->base.src.transform->matrix[1][1]; 2908428d7b3dSmrg float y0 = op->base.src.transform->matrix[1][2]; 2909428d7b3dSmrg float sx = op->base.src.scale[0]; 2910428d7b3dSmrg float sy = op->base.src.scale[1]; 2911428d7b3dSmrg int16_t tx = op->base.src.offset[0]; 2912428d7b3dSmrg int16_t ty = op->base.src.offset[1]; 2913428d7b3dSmrg 2914428d7b3dSmrg do { 2915428d7b3dSmrg union { 2916428d7b3dSmrg struct sna_coordinate p; 2917428d7b3dSmrg float f; 2918428d7b3dSmrg } dst; 2919428d7b3dSmrg 2920428d7b3dSmrg dst.p.x = b->box.x2; 2921428d7b3dSmrg dst.p.y = b->box.y2; 2922428d7b3dSmrg v[0] = dst.f; 2923428d7b3dSmrg v[1] = ((b->box.x2 + tx) * xx + x0) * sx; 2924428d7b3dSmrg v[6] = v[2] = ((b->box.y2 + ty) * yy + y0) * sy; 2925428d7b3dSmrg 2926428d7b3dSmrg dst.p.x = b->box.x1; 2927428d7b3dSmrg v[4] = dst.f; 2928428d7b3dSmrg v[9] = v[5] = ((b->box.x1 + tx) * xx + x0) * sx; 2929428d7b3dSmrg 2930428d7b3dSmrg dst.p.y = b->box.y1; 2931428d7b3dSmrg v[8] = dst.f; 2932428d7b3dSmrg v[10] = ((b->box.y1 + ty) * yy + y0) * sy; 2933428d7b3dSmrg 2934428d7b3dSmrg v[11] = v[7] = v[3] = b->alpha; 2935428d7b3dSmrg 2936428d7b3dSmrg v += 12; 2937428d7b3dSmrg b++; 2938428d7b3dSmrg } while (--nbox); 2939428d7b3dSmrg} 2940428d7b3dSmrg 2941428d7b3dSmrgavx2 fastcall static void 2942428d7b3dSmrgemit_span_affine__avx2(struct sna *sna, 2943428d7b3dSmrg const struct sna_composite_spans_op *op, 2944428d7b3dSmrg const BoxRec *box, 2945428d7b3dSmrg float opacity) 2946428d7b3dSmrg{ 2947428d7b3dSmrg union { 2948428d7b3dSmrg struct sna_coordinate p; 2949428d7b3dSmrg float f; 2950428d7b3dSmrg } dst; 2951428d7b3dSmrg float *v; 2952428d7b3dSmrg 2953428d7b3dSmrg assert(op->base.floats_per_rect == 12); 2954428d7b3dSmrg assert((sna->render.vertex_used % 4) == 0); 2955428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 2956428d7b3dSmrg sna->render.vertex_used += 12; 2957428d7b3dSmrg 2958428d7b3dSmrg dst.p.x = box->x2; 2959428d7b3dSmrg dst.p.y = box->y2; 2960428d7b3dSmrg v[0] = dst.f; 2961428d7b3dSmrg _sna_get_transformed_scaled(op->base.src.offset[0] + box->x2, 2962428d7b3dSmrg op->base.src.offset[1] + box->y2, 2963428d7b3dSmrg op->base.src.transform, 2964428d7b3dSmrg op->base.src.scale, 2965428d7b3dSmrg &v[1], &v[2]); 2966428d7b3dSmrg 2967428d7b3dSmrg dst.p.x = box->x1; 2968428d7b3dSmrg v[4] = dst.f; 2969428d7b3dSmrg _sna_get_transformed_scaled(op->base.src.offset[0] + box->x1, 2970428d7b3dSmrg op->base.src.offset[1] + box->y2, 2971428d7b3dSmrg op->base.src.transform, 2972428d7b3dSmrg op->base.src.scale, 2973428d7b3dSmrg &v[5], &v[6]); 2974428d7b3dSmrg 2975428d7b3dSmrg dst.p.y = box->y1; 2976428d7b3dSmrg v[8] = dst.f; 2977428d7b3dSmrg _sna_get_transformed_scaled(op->base.src.offset[0] + box->x1, 2978428d7b3dSmrg op->base.src.offset[1] + box->y1, 2979428d7b3dSmrg op->base.src.transform, 2980428d7b3dSmrg op->base.src.scale, 2981428d7b3dSmrg &v[9], &v[10]); 2982428d7b3dSmrg 2983428d7b3dSmrg v[11] = v[7] = v[3] = opacity; 2984428d7b3dSmrg} 2985428d7b3dSmrg 2986428d7b3dSmrgavx2 fastcall static void 2987428d7b3dSmrgemit_span_boxes_affine__avx2(const struct sna_composite_spans_op *op, 2988428d7b3dSmrg const struct sna_opacity_box *b, int nbox, 2989428d7b3dSmrg float *v) 2990428d7b3dSmrg{ 2991428d7b3dSmrg do { 2992428d7b3dSmrg union { 2993428d7b3dSmrg struct sna_coordinate p; 2994428d7b3dSmrg float f; 2995428d7b3dSmrg } dst; 2996428d7b3dSmrg 2997428d7b3dSmrg dst.p.x = b->box.x2; 2998428d7b3dSmrg dst.p.y = b->box.y2; 2999428d7b3dSmrg v[0] = dst.f; 3000428d7b3dSmrg _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x2, 3001428d7b3dSmrg op->base.src.offset[1] + b->box.y2, 3002428d7b3dSmrg op->base.src.transform, 3003428d7b3dSmrg op->base.src.scale, 3004428d7b3dSmrg &v[1], &v[2]); 3005428d7b3dSmrg 3006428d7b3dSmrg dst.p.x = b->box.x1; 3007428d7b3dSmrg v[4] = dst.f; 3008428d7b3dSmrg _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1, 3009428d7b3dSmrg op->base.src.offset[1] + b->box.y2, 3010428d7b3dSmrg op->base.src.transform, 3011428d7b3dSmrg op->base.src.scale, 3012428d7b3dSmrg &v[5], &v[6]); 3013428d7b3dSmrg 3014428d7b3dSmrg dst.p.y = b->box.y1; 3015428d7b3dSmrg v[8] = dst.f; 3016428d7b3dSmrg _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1, 3017428d7b3dSmrg op->base.src.offset[1] + b->box.y1, 3018428d7b3dSmrg op->base.src.transform, 3019428d7b3dSmrg op->base.src.scale, 3020428d7b3dSmrg &v[9], &v[10]); 3021428d7b3dSmrg 3022428d7b3dSmrg v[11] = v[7] = v[3] = b->alpha; 3023428d7b3dSmrg 3024428d7b3dSmrg v += 12; 3025428d7b3dSmrg b++; 3026428d7b3dSmrg } while (--nbox); 3027428d7b3dSmrg} 3028428d7b3dSmrg 3029428d7b3dSmrgavx2 fastcall static void 3030428d7b3dSmrgemit_span_linear__avx2(struct sna *sna, 3031428d7b3dSmrg const struct sna_composite_spans_op *op, 3032428d7b3dSmrg const BoxRec *box, 3033428d7b3dSmrg float opacity) 3034428d7b3dSmrg{ 3035428d7b3dSmrg union { 3036428d7b3dSmrg struct sna_coordinate p; 3037428d7b3dSmrg float f; 3038428d7b3dSmrg } dst; 3039428d7b3dSmrg float *v; 3040428d7b3dSmrg 3041428d7b3dSmrg assert(op->base.floats_per_rect == 9); 3042428d7b3dSmrg assert((sna->render.vertex_used % 3) == 0); 3043428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 3044428d7b3dSmrg sna->render.vertex_used += 9; 3045428d7b3dSmrg 3046428d7b3dSmrg dst.p.x = box->x2; 3047428d7b3dSmrg dst.p.y = box->y2; 3048428d7b3dSmrg v[0] = dst.f; 3049428d7b3dSmrg dst.p.x = box->x1; 3050428d7b3dSmrg v[3] = dst.f; 3051428d7b3dSmrg dst.p.y = box->y1; 3052428d7b3dSmrg v[6] = dst.f; 3053428d7b3dSmrg 3054428d7b3dSmrg v[1] = compute_linear(&op->base.src, box->x2, box->y2); 3055428d7b3dSmrg v[4] = compute_linear(&op->base.src, box->x1, box->y2); 3056428d7b3dSmrg v[7] = compute_linear(&op->base.src, box->x1, box->y1); 3057428d7b3dSmrg 3058428d7b3dSmrg v[8] = v[5] = v[2] = opacity; 3059428d7b3dSmrg} 3060428d7b3dSmrg 3061428d7b3dSmrgavx2 fastcall static void 3062428d7b3dSmrgemit_span_boxes_linear__avx2(const struct sna_composite_spans_op *op, 3063428d7b3dSmrg const struct sna_opacity_box *b, int nbox, 3064428d7b3dSmrg float *v) 3065428d7b3dSmrg{ 3066428d7b3dSmrg do { 3067428d7b3dSmrg union { 3068428d7b3dSmrg struct sna_coordinate p; 3069428d7b3dSmrg float f; 3070428d7b3dSmrg } dst; 3071428d7b3dSmrg 3072428d7b3dSmrg dst.p.x = b->box.x2; 3073428d7b3dSmrg dst.p.y = b->box.y2; 3074428d7b3dSmrg v[0] = dst.f; 3075428d7b3dSmrg dst.p.x = b->box.x1; 3076428d7b3dSmrg v[3] = dst.f; 3077428d7b3dSmrg dst.p.y = b->box.y1; 3078428d7b3dSmrg v[6] = dst.f; 3079428d7b3dSmrg 3080428d7b3dSmrg v[1] = compute_linear(&op->base.src, b->box.x2, b->box.y2); 3081428d7b3dSmrg v[4] = compute_linear(&op->base.src, b->box.x1, b->box.y2); 3082428d7b3dSmrg v[7] = compute_linear(&op->base.src, b->box.x1, b->box.y1); 3083428d7b3dSmrg 3084428d7b3dSmrg v[8] = v[5] = v[2] = b->alpha; 3085428d7b3dSmrg 3086428d7b3dSmrg v += 9; 3087428d7b3dSmrg b++; 3088428d7b3dSmrg } while (--nbox); 3089428d7b3dSmrg} 3090428d7b3dSmrg#endif 3091428d7b3dSmrg 3092428d7b3dSmrgunsigned gen4_choose_spans_emitter(struct sna *sna, 3093428d7b3dSmrg struct sna_composite_spans_op *tmp) 3094428d7b3dSmrg{ 3095428d7b3dSmrg unsigned vb; 3096428d7b3dSmrg 3097428d7b3dSmrg if (tmp->base.src.is_solid) { 3098428d7b3dSmrg DBG(("%s: solid source\n", __FUNCTION__)); 3099428d7b3dSmrg tmp->prim_emit = emit_span_solid; 3100428d7b3dSmrg tmp->emit_boxes = emit_span_boxes_solid; 3101428d7b3dSmrg tmp->base.floats_per_vertex = 3; 3102428d7b3dSmrg vb = 1 << 2 | 1; 3103428d7b3dSmrg } else if (tmp->base.src.is_linear) { 3104428d7b3dSmrg DBG(("%s: linear source\n", __FUNCTION__)); 3105428d7b3dSmrg#if defined(avx2) 3106428d7b3dSmrg if (sna->cpu_features & AVX2) { 3107428d7b3dSmrg tmp->prim_emit = emit_span_linear__avx2; 3108428d7b3dSmrg tmp->emit_boxes = emit_span_boxes_linear__avx2; 3109428d7b3dSmrg } else 3110428d7b3dSmrg#endif 3111428d7b3dSmrg#if defined(sse4_2) 3112428d7b3dSmrg if (sna->cpu_features & SSE4_2) { 3113428d7b3dSmrg tmp->prim_emit = emit_span_linear__sse4_2; 3114428d7b3dSmrg tmp->emit_boxes = emit_span_boxes_linear__sse4_2; 3115428d7b3dSmrg } else 3116428d7b3dSmrg#endif 3117428d7b3dSmrg { 3118428d7b3dSmrg tmp->prim_emit = emit_span_linear; 3119428d7b3dSmrg tmp->emit_boxes = emit_span_boxes_linear; 3120428d7b3dSmrg } 3121428d7b3dSmrg tmp->base.floats_per_vertex = 3; 3122428d7b3dSmrg vb = 1 << 2 | 1; 3123428d7b3dSmrg } else if (tmp->base.src.transform == NULL) { 3124428d7b3dSmrg DBG(("%s: identity transform\n", __FUNCTION__)); 3125428d7b3dSmrg#if defined(avx2) 3126428d7b3dSmrg if (sna->cpu_features & AVX2) { 3127428d7b3dSmrg tmp->prim_emit = emit_span_identity__avx2; 3128428d7b3dSmrg tmp->emit_boxes = emit_span_boxes_identity__avx2; 3129428d7b3dSmrg } else 3130428d7b3dSmrg#endif 3131428d7b3dSmrg#if defined(sse4_2) 3132428d7b3dSmrg if (sna->cpu_features & SSE4_2) { 3133428d7b3dSmrg tmp->prim_emit = emit_span_identity__sse4_2; 3134428d7b3dSmrg tmp->emit_boxes = emit_span_boxes_identity__sse4_2; 3135428d7b3dSmrg } else 3136428d7b3dSmrg#endif 3137428d7b3dSmrg { 3138428d7b3dSmrg tmp->prim_emit = emit_span_identity; 3139428d7b3dSmrg tmp->emit_boxes = emit_span_boxes_identity; 3140428d7b3dSmrg } 3141428d7b3dSmrg tmp->base.floats_per_vertex = 4; 3142428d7b3dSmrg vb = 1 << 2 | 2; 3143428d7b3dSmrg } else if (tmp->base.is_affine) { 3144428d7b3dSmrg tmp->base.src.scale[0] /= tmp->base.src.transform->matrix[2][2]; 3145428d7b3dSmrg tmp->base.src.scale[1] /= tmp->base.src.transform->matrix[2][2]; 3146428d7b3dSmrg if (!sna_affine_transform_is_rotation(tmp->base.src.transform)) { 3147428d7b3dSmrg DBG(("%s: simple (unrotated affine) transform\n", __FUNCTION__)); 3148428d7b3dSmrg#if defined(avx2) 3149428d7b3dSmrg if (sna->cpu_features & AVX2) { 3150428d7b3dSmrg tmp->prim_emit = emit_span_simple__avx2; 3151428d7b3dSmrg tmp->emit_boxes = emit_span_boxes_simple__avx2; 3152428d7b3dSmrg } else 3153428d7b3dSmrg#endif 3154428d7b3dSmrg#if defined(sse4_2) 3155428d7b3dSmrg if (sna->cpu_features & SSE4_2) { 3156428d7b3dSmrg tmp->prim_emit = emit_span_simple__sse4_2; 3157428d7b3dSmrg tmp->emit_boxes = emit_span_boxes_simple__sse4_2; 3158428d7b3dSmrg } else 3159428d7b3dSmrg#endif 3160428d7b3dSmrg { 3161428d7b3dSmrg tmp->prim_emit = emit_span_simple; 3162428d7b3dSmrg tmp->emit_boxes = emit_span_boxes_simple; 3163428d7b3dSmrg } 3164428d7b3dSmrg } else { 3165428d7b3dSmrg DBG(("%s: affine transform\n", __FUNCTION__)); 3166428d7b3dSmrg#if defined(avx2) 3167428d7b3dSmrg if (sna->cpu_features & AVX2) { 3168428d7b3dSmrg tmp->prim_emit = emit_span_affine__avx2; 3169428d7b3dSmrg tmp->emit_boxes = emit_span_boxes_affine__avx2; 3170428d7b3dSmrg } else 3171428d7b3dSmrg#endif 3172428d7b3dSmrg#if defined(sse4_2) 3173428d7b3dSmrg if (sna->cpu_features & SSE4_2) { 3174428d7b3dSmrg tmp->prim_emit = emit_span_affine__sse4_2; 3175428d7b3dSmrg tmp->emit_boxes = emit_span_boxes_affine__sse4_2; 3176428d7b3dSmrg } else 3177428d7b3dSmrg#endif 3178428d7b3dSmrg { 3179428d7b3dSmrg tmp->prim_emit = emit_span_affine; 3180428d7b3dSmrg tmp->emit_boxes = emit_span_boxes_affine; 3181428d7b3dSmrg } 3182428d7b3dSmrg } 3183428d7b3dSmrg tmp->base.floats_per_vertex = 4; 3184428d7b3dSmrg vb = 1 << 2 | 2; 3185428d7b3dSmrg } else { 3186428d7b3dSmrg DBG(("%s: projective transform\n", __FUNCTION__)); 3187428d7b3dSmrg tmp->prim_emit = emit_span_primitive; 3188428d7b3dSmrg tmp->emit_boxes = emit_span_boxes; 3189428d7b3dSmrg tmp->base.floats_per_vertex = 5; 3190428d7b3dSmrg vb = 1 << 2 | 3; 3191428d7b3dSmrg } 3192428d7b3dSmrg tmp->base.floats_per_rect = 3 * tmp->base.floats_per_vertex; 3193428d7b3dSmrg return vb; 3194428d7b3dSmrg} 3195