1428d7b3dSmrg/* 2428d7b3dSmrg * Copyright © 2013 Intel Corporation 3428d7b3dSmrg * 4428d7b3dSmrg * Permission is hereby granted, free of charge, to any person obtaining a 5428d7b3dSmrg * copy of this software and associated documentation files (the "Software"), 6428d7b3dSmrg * to deal in the Software without restriction, including without limitation 7428d7b3dSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8428d7b3dSmrg * and/or sell copies of the Software, and to permit persons to whom the 9428d7b3dSmrg * Software is furnished to do so, subject to the following conditions: 10428d7b3dSmrg * 11428d7b3dSmrg * The above copyright notice and this permission notice (including the next 12428d7b3dSmrg * paragraph) shall be included in all copies or substantial portions of the 13428d7b3dSmrg * Software. 14428d7b3dSmrg * 15428d7b3dSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16428d7b3dSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17428d7b3dSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18428d7b3dSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19428d7b3dSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20428d7b3dSmrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21428d7b3dSmrg * SOFTWARE. 22428d7b3dSmrg * 23428d7b3dSmrg * Authors: 24428d7b3dSmrg * Chris Wilson <chris@chris-wilson.co.uk> 25428d7b3dSmrg * 26428d7b3dSmrg */ 27428d7b3dSmrg 28428d7b3dSmrg#ifdef HAVE_CONFIG_H 29428d7b3dSmrg#include "config.h" 30428d7b3dSmrg#endif 31428d7b3dSmrg 32428d7b3dSmrg#include "sna.h" 33428d7b3dSmrg 34428d7b3dSmrg#include <unistd.h> 35428d7b3dSmrg#include <pthread.h> 36428d7b3dSmrg#include <signal.h> 37428d7b3dSmrg 38428d7b3dSmrg#ifdef HAVE_VALGRIND 39428d7b3dSmrg#include <valgrind.h> 40428d7b3dSmrgstatic inline bool valgrind_active(void) { return RUNNING_ON_VALGRIND; } 41428d7b3dSmrg#else 42428d7b3dSmrgstatic inline bool valgrind_active(void) { return false; } 43428d7b3dSmrg#endif 44428d7b3dSmrg 45428d7b3dSmrgstatic int max_threads = -1; 46428d7b3dSmrg 47428d7b3dSmrgstatic struct thread { 48428d7b3dSmrg pthread_t thread; 49428d7b3dSmrg pthread_mutex_t mutex; 50428d7b3dSmrg pthread_cond_t cond; 51428d7b3dSmrg 52428d7b3dSmrg void (*func)(void *arg); 53428d7b3dSmrg void *arg; 54428d7b3dSmrg} *threads; 55428d7b3dSmrg 56428d7b3dSmrgstatic void *__run__(void *arg) 57428d7b3dSmrg{ 58428d7b3dSmrg struct thread *t = arg; 59428d7b3dSmrg sigset_t signals; 60428d7b3dSmrg 61428d7b3dSmrg /* Disable all signals in the slave threads as X uses them for IO */ 62428d7b3dSmrg sigfillset(&signals); 63428d7b3dSmrg sigdelset(&signals, SIGBUS); 64428d7b3dSmrg sigdelset(&signals, SIGSEGV); 65428d7b3dSmrg pthread_sigmask(SIG_SETMASK, &signals, NULL); 66428d7b3dSmrg 67428d7b3dSmrg pthread_mutex_lock(&t->mutex); 68428d7b3dSmrg while (1) { 69428d7b3dSmrg while (t->func == NULL) 70428d7b3dSmrg pthread_cond_wait(&t->cond, &t->mutex); 71428d7b3dSmrg pthread_mutex_unlock(&t->mutex); 72428d7b3dSmrg 73428d7b3dSmrg assert(t->func); 74428d7b3dSmrg t->func(t->arg); 75428d7b3dSmrg 76428d7b3dSmrg pthread_mutex_lock(&t->mutex); 77428d7b3dSmrg t->arg = NULL; 78428d7b3dSmrg t->func = NULL; 79428d7b3dSmrg pthread_cond_signal(&t->cond); 80428d7b3dSmrg } 81428d7b3dSmrg pthread_mutex_unlock(&t->mutex); 82428d7b3dSmrg 83428d7b3dSmrg return NULL; 84428d7b3dSmrg} 85428d7b3dSmrg 86428d7b3dSmrg#if defined(__GNUC__) 87428d7b3dSmrg#define popcount(x) __builtin_popcount(x) 88428d7b3dSmrg#else 89428d7b3dSmrgstatic int popcount(unsigned int x) 90428d7b3dSmrg{ 91428d7b3dSmrg int count = 0; 92428d7b3dSmrg 93428d7b3dSmrg while (x) { 94428d7b3dSmrg count += x&1; 95428d7b3dSmrg x >>= 1; 96428d7b3dSmrg } 97428d7b3dSmrg 98428d7b3dSmrg return count; 99428d7b3dSmrg} 100428d7b3dSmrg#endif 101428d7b3dSmrg 102428d7b3dSmrgstatic int 103428d7b3dSmrgnum_cores(void) 104428d7b3dSmrg{ 105428d7b3dSmrg FILE *file = fopen("/proc/cpuinfo", "r"); 106428d7b3dSmrg int count = 0; 107428d7b3dSmrg if (file) { 108428d7b3dSmrg size_t len = 0; 109428d7b3dSmrg char *line = NULL; 110428d7b3dSmrg uint32_t processors = 0, cores = 0; 111428d7b3dSmrg while (getline(&line, &len, file) != -1) { 112428d7b3dSmrg int id; 113428d7b3dSmrg if (sscanf(line, "physical id : %d", &id) == 1) { 114428d7b3dSmrg if (id >= 32) 115428d7b3dSmrg continue; 116428d7b3dSmrg processors |= 1 << id; 117428d7b3dSmrg } else if (sscanf(line, "core id : %d", &id) == 1) { 118428d7b3dSmrg if (id >= 32) 119428d7b3dSmrg continue; 120428d7b3dSmrg cores |= 1 << id; 121428d7b3dSmrg } 122428d7b3dSmrg } 123428d7b3dSmrg free(line); 124428d7b3dSmrg fclose(file); 125428d7b3dSmrg 126428d7b3dSmrg DBG(("%s: processors=0x%08x, cores=0x%08x\n", 127428d7b3dSmrg __FUNCTION__, processors, cores)); 128428d7b3dSmrg 129428d7b3dSmrg count = popcount(processors) * popcount(cores); 130428d7b3dSmrg } 131428d7b3dSmrg return count; 132428d7b3dSmrg} 133428d7b3dSmrg 134428d7b3dSmrgvoid sna_threads_init(void) 135428d7b3dSmrg{ 136428d7b3dSmrg int n; 137428d7b3dSmrg 138428d7b3dSmrg if (max_threads != -1) 139428d7b3dSmrg return; 140428d7b3dSmrg 141428d7b3dSmrg if (valgrind_active()) 142428d7b3dSmrg goto bail; 143428d7b3dSmrg 144428d7b3dSmrg max_threads = num_cores(); 145428d7b3dSmrg if (max_threads == 0) 146428d7b3dSmrg max_threads = sysconf(_SC_NPROCESSORS_ONLN) / 2; 147428d7b3dSmrg if (max_threads <= 1) 148428d7b3dSmrg goto bail; 149428d7b3dSmrg 150428d7b3dSmrg DBG(("%s: creating a thread pool of %d threads\n", 151428d7b3dSmrg __func__, max_threads)); 152428d7b3dSmrg 153428d7b3dSmrg threads = malloc (sizeof(threads[0])*max_threads); 154428d7b3dSmrg if (threads == NULL) 155428d7b3dSmrg goto bail; 156428d7b3dSmrg 157428d7b3dSmrg for (n = 1; n < max_threads; n++) { 158428d7b3dSmrg pthread_mutex_init(&threads[n].mutex, NULL); 159428d7b3dSmrg pthread_cond_init(&threads[n].cond, NULL); 160428d7b3dSmrg 161428d7b3dSmrg threads[n].func = NULL; 162428d7b3dSmrg threads[n].arg = NULL; 163428d7b3dSmrg if (pthread_create(&threads[n].thread, NULL, 164428d7b3dSmrg __run__, &threads[n])) 165428d7b3dSmrg goto bail; 166428d7b3dSmrg } 167428d7b3dSmrg 168428d7b3dSmrg threads[0].thread = pthread_self(); 169428d7b3dSmrg return; 170428d7b3dSmrg 171428d7b3dSmrgbail: 172428d7b3dSmrg max_threads = 0; 173428d7b3dSmrg} 174428d7b3dSmrg 175428d7b3dSmrgvoid sna_threads_run(int id, void (*func)(void *arg), void *arg) 176428d7b3dSmrg{ 177428d7b3dSmrg assert(max_threads > 0); 178428d7b3dSmrg assert(pthread_self() == threads[0].thread); 179428d7b3dSmrg assert(id > 0 && id < max_threads); 180428d7b3dSmrg 181428d7b3dSmrg assert(threads[id].func == NULL); 182428d7b3dSmrg 183428d7b3dSmrg pthread_mutex_lock(&threads[id].mutex); 184428d7b3dSmrg threads[id].func = func; 185428d7b3dSmrg threads[id].arg = arg; 186428d7b3dSmrg pthread_cond_signal(&threads[id].cond); 187428d7b3dSmrg pthread_mutex_unlock(&threads[id].mutex); 188428d7b3dSmrg} 189428d7b3dSmrg 190428d7b3dSmrgvoid sna_threads_trap(int sig) 191428d7b3dSmrg{ 192428d7b3dSmrg pthread_t t = pthread_self(); 193428d7b3dSmrg int n; 194428d7b3dSmrg 195428d7b3dSmrg if (max_threads == 0) 196428d7b3dSmrg return; 197428d7b3dSmrg 198428d7b3dSmrg if (t == threads[0].thread) 199428d7b3dSmrg return; 200428d7b3dSmrg 201428d7b3dSmrg for (n = 1; threads[n].thread != t; n++) 202428d7b3dSmrg ; 203428d7b3dSmrg 204428d7b3dSmrg ERR(("%s: thread[%d] caught signal %d\n", __func__, n, sig)); 205428d7b3dSmrg 206428d7b3dSmrg pthread_mutex_lock(&threads[n].mutex); 207428d7b3dSmrg threads[n].arg = (void *)(intptr_t)sig; 208428d7b3dSmrg threads[n].func = NULL; 209428d7b3dSmrg pthread_cond_signal(&threads[n].cond); 210428d7b3dSmrg pthread_mutex_unlock(&threads[n].mutex); 211428d7b3dSmrg 212428d7b3dSmrg pthread_exit(&sig); 213428d7b3dSmrg} 214428d7b3dSmrg 215428d7b3dSmrgvoid sna_threads_wait(void) 216428d7b3dSmrg{ 217428d7b3dSmrg int n; 218428d7b3dSmrg 219428d7b3dSmrg assert(max_threads > 0); 220428d7b3dSmrg assert(pthread_self() == threads[0].thread); 221428d7b3dSmrg 222428d7b3dSmrg for (n = 1; n < max_threads; n++) { 223428d7b3dSmrg if (threads[n].func != NULL) { 224428d7b3dSmrg pthread_mutex_lock(&threads[n].mutex); 225428d7b3dSmrg while (threads[n].func) 226428d7b3dSmrg pthread_cond_wait(&threads[n].cond, &threads[n].mutex); 227428d7b3dSmrg pthread_mutex_unlock(&threads[n].mutex); 228428d7b3dSmrg } 229428d7b3dSmrg 230428d7b3dSmrg if (threads[n].arg != NULL) { 231428d7b3dSmrg DBG(("%s: thread[%d] died from signal %d\n", __func__, n, (int)(intptr_t)threads[n].arg)); 232428d7b3dSmrg sna_threads_kill(); 233428d7b3dSmrg return; 234428d7b3dSmrg } 235428d7b3dSmrg } 236428d7b3dSmrg} 237428d7b3dSmrg 238428d7b3dSmrgvoid sna_threads_kill(void) 239428d7b3dSmrg{ 240428d7b3dSmrg int n; 241428d7b3dSmrg 242428d7b3dSmrg ERR(("%s: kill %d threads\n", __func__, max_threads)); 243428d7b3dSmrg assert(max_threads > 0); 244428d7b3dSmrg assert(pthread_self() == threads[0].thread); 245428d7b3dSmrg 246428d7b3dSmrg for (n = 1; n < max_threads; n++) 247428d7b3dSmrg pthread_cancel(threads[n].thread); 248428d7b3dSmrg 249428d7b3dSmrg for (n = 1; n < max_threads; n++) 250428d7b3dSmrg pthread_join(threads[n].thread, NULL); 251428d7b3dSmrg 252428d7b3dSmrg max_threads = 0; 253428d7b3dSmrg} 254428d7b3dSmrg 255428d7b3dSmrgint sna_use_threads(int width, int height, int threshold) 256428d7b3dSmrg{ 257428d7b3dSmrg int num_threads; 258428d7b3dSmrg 259428d7b3dSmrg if (max_threads <= 0) 260428d7b3dSmrg return 1; 261428d7b3dSmrg 262428d7b3dSmrg if (height <= 1) 263428d7b3dSmrg return 1; 264428d7b3dSmrg 265428d7b3dSmrg if (width < 128) 266428d7b3dSmrg height /= 128/width; 267428d7b3dSmrg 268428d7b3dSmrg num_threads = height * max_threads / threshold - 1; 269428d7b3dSmrg if (num_threads <= 0) 270428d7b3dSmrg return 1; 271428d7b3dSmrg 272428d7b3dSmrg if (num_threads > max_threads) 273428d7b3dSmrg num_threads = max_threads; 274428d7b3dSmrg if (num_threads > height) 275428d7b3dSmrg num_threads = height; 276428d7b3dSmrg 277428d7b3dSmrg return num_threads; 278428d7b3dSmrg} 279428d7b3dSmrg 280428d7b3dSmrgstruct thread_composite { 281428d7b3dSmrg pixman_image_t *src, *mask, *dst; 282428d7b3dSmrg pixman_op_t op; 283428d7b3dSmrg int16_t src_x, src_y; 284428d7b3dSmrg int16_t mask_x, mask_y; 285428d7b3dSmrg int16_t dst_x, dst_y; 286428d7b3dSmrg uint16_t width, height; 287428d7b3dSmrg}; 288428d7b3dSmrg 289428d7b3dSmrgstatic void thread_composite(void *arg) 290428d7b3dSmrg{ 291428d7b3dSmrg struct thread_composite *t = arg; 292428d7b3dSmrg pixman_image_composite(t->op, t->src, t->mask, t->dst, 293428d7b3dSmrg t->src_x, t->src_y, 294428d7b3dSmrg t->mask_x, t->mask_y, 295428d7b3dSmrg t->dst_x, t->dst_y, 296428d7b3dSmrg t->width, t->height); 297428d7b3dSmrg} 298428d7b3dSmrg 299428d7b3dSmrgvoid sna_image_composite(pixman_op_t op, 300428d7b3dSmrg pixman_image_t *src, 301428d7b3dSmrg pixman_image_t *mask, 302428d7b3dSmrg pixman_image_t *dst, 303428d7b3dSmrg int16_t src_x, 304428d7b3dSmrg int16_t src_y, 305428d7b3dSmrg int16_t mask_x, 306428d7b3dSmrg int16_t mask_y, 307428d7b3dSmrg int16_t dst_x, 308428d7b3dSmrg int16_t dst_y, 309428d7b3dSmrg uint16_t width, 310428d7b3dSmrg uint16_t height) 311428d7b3dSmrg{ 312428d7b3dSmrg int num_threads; 313428d7b3dSmrg 314428d7b3dSmrg num_threads = sna_use_threads(width, height, 32); 315428d7b3dSmrg if (num_threads <= 1) { 316428d7b3dSmrg if (sigtrap_get() == 0) { 317428d7b3dSmrg pixman_image_composite(op, src, mask, dst, 318428d7b3dSmrg src_x, src_y, 319428d7b3dSmrg mask_x, mask_y, 320428d7b3dSmrg dst_x, dst_y, 321428d7b3dSmrg width, height); 322428d7b3dSmrg sigtrap_put(); 323428d7b3dSmrg } 324428d7b3dSmrg } else { 325428d7b3dSmrg struct thread_composite data[num_threads]; 326428d7b3dSmrg int y, dy, n; 327428d7b3dSmrg 328428d7b3dSmrg DBG(("%s: using %d threads for compositing %dx%d\n", 329428d7b3dSmrg __FUNCTION__, num_threads, width, height)); 330428d7b3dSmrg 331428d7b3dSmrg y = dst_y; 332428d7b3dSmrg dy = (height + num_threads - 1) / num_threads; 333428d7b3dSmrg num_threads -= (num_threads-1) * dy >= height; 334428d7b3dSmrg 335428d7b3dSmrg data[0].op = op; 336428d7b3dSmrg data[0].src = src; 337428d7b3dSmrg data[0].mask = mask; 338428d7b3dSmrg data[0].dst = dst; 339428d7b3dSmrg data[0].src_x = src_x; 340428d7b3dSmrg data[0].src_y = src_y; 341428d7b3dSmrg data[0].mask_x = mask_x; 342428d7b3dSmrg data[0].mask_y = mask_y; 343428d7b3dSmrg data[0].dst_x = dst_x; 344428d7b3dSmrg data[0].dst_y = y; 345428d7b3dSmrg data[0].width = width; 346428d7b3dSmrg data[0].height = dy; 347428d7b3dSmrg 348428d7b3dSmrg if (sigtrap_get() == 0) { 349428d7b3dSmrg for (n = 1; n < num_threads; n++) { 350428d7b3dSmrg data[n] = data[0]; 351428d7b3dSmrg data[n].src_y += y - dst_y; 352428d7b3dSmrg data[n].mask_y += y - dst_y; 353428d7b3dSmrg data[n].dst_y = y; 354428d7b3dSmrg y += dy; 355428d7b3dSmrg 356428d7b3dSmrg sna_threads_run(n, thread_composite, &data[n]); 357428d7b3dSmrg } 358428d7b3dSmrg 359428d7b3dSmrg assert(y < dst_y + height); 360428d7b3dSmrg if (y + dy > dst_y + height) 361428d7b3dSmrg dy = dst_y + height - y; 362428d7b3dSmrg 363428d7b3dSmrg data[0].src_y += y - dst_y; 364428d7b3dSmrg data[0].mask_y += y - dst_y; 365428d7b3dSmrg data[0].dst_y = y; 366428d7b3dSmrg data[0].height = dy; 367428d7b3dSmrg 368428d7b3dSmrg thread_composite(&data[0]); 369428d7b3dSmrg 370428d7b3dSmrg sna_threads_wait(); 371428d7b3dSmrg sigtrap_put(); 372428d7b3dSmrg } else 373428d7b3dSmrg sna_threads_kill(); 374428d7b3dSmrg } 375428d7b3dSmrg} 376