1428d7b3dSmrg/*
2428d7b3dSmrg * Copyright © 2013 Intel Corporation
3428d7b3dSmrg *
4428d7b3dSmrg * Permission is hereby granted, free of charge, to any person obtaining a
5428d7b3dSmrg * copy of this software and associated documentation files (the "Software"),
6428d7b3dSmrg * to deal in the Software without restriction, including without limitation
7428d7b3dSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8428d7b3dSmrg * and/or sell copies of the Software, and to permit persons to whom the
9428d7b3dSmrg * Software is furnished to do so, subject to the following conditions:
10428d7b3dSmrg *
11428d7b3dSmrg * The above copyright notice and this permission notice (including the next
12428d7b3dSmrg * paragraph) shall be included in all copies or substantial portions of the
13428d7b3dSmrg * Software.
14428d7b3dSmrg *
15428d7b3dSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16428d7b3dSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17428d7b3dSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18428d7b3dSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19428d7b3dSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20428d7b3dSmrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21428d7b3dSmrg * SOFTWARE.
22428d7b3dSmrg *
23428d7b3dSmrg * Authors:
24428d7b3dSmrg *    Chris Wilson <chris@chris-wilson.co.uk>
25428d7b3dSmrg *
26428d7b3dSmrg */
27428d7b3dSmrg
28428d7b3dSmrg#ifdef HAVE_CONFIG_H
29428d7b3dSmrg#include "config.h"
30428d7b3dSmrg#endif
31428d7b3dSmrg
32428d7b3dSmrg#include "sna.h"
33428d7b3dSmrg
34428d7b3dSmrg#include <unistd.h>
35428d7b3dSmrg#include <pthread.h>
36428d7b3dSmrg#include <signal.h>
37428d7b3dSmrg
38428d7b3dSmrg#ifdef HAVE_VALGRIND
39428d7b3dSmrg#include <valgrind.h>
40428d7b3dSmrgstatic inline bool valgrind_active(void) { return RUNNING_ON_VALGRIND; }
41428d7b3dSmrg#else
42428d7b3dSmrgstatic inline bool valgrind_active(void) { return false; }
43428d7b3dSmrg#endif
44428d7b3dSmrg
45428d7b3dSmrgstatic int max_threads = -1;
46428d7b3dSmrg
47428d7b3dSmrgstatic struct thread {
48428d7b3dSmrg    pthread_t thread;
49428d7b3dSmrg    pthread_mutex_t mutex;
50428d7b3dSmrg    pthread_cond_t cond;
51428d7b3dSmrg
52428d7b3dSmrg    void (*func)(void *arg);
53428d7b3dSmrg    void *arg;
54428d7b3dSmrg} *threads;
55428d7b3dSmrg
56428d7b3dSmrgstatic void *__run__(void *arg)
57428d7b3dSmrg{
58428d7b3dSmrg	struct thread *t = arg;
59428d7b3dSmrg	sigset_t signals;
60428d7b3dSmrg
61428d7b3dSmrg	/* Disable all signals in the slave threads as X uses them for IO */
62428d7b3dSmrg	sigfillset(&signals);
63428d7b3dSmrg	sigdelset(&signals, SIGBUS);
64428d7b3dSmrg	sigdelset(&signals, SIGSEGV);
65428d7b3dSmrg	pthread_sigmask(SIG_SETMASK, &signals, NULL);
66428d7b3dSmrg
67428d7b3dSmrg	pthread_mutex_lock(&t->mutex);
68428d7b3dSmrg	while (1) {
69428d7b3dSmrg		while (t->func == NULL)
70428d7b3dSmrg			pthread_cond_wait(&t->cond, &t->mutex);
71428d7b3dSmrg		pthread_mutex_unlock(&t->mutex);
72428d7b3dSmrg
73428d7b3dSmrg		assert(t->func);
74428d7b3dSmrg		t->func(t->arg);
75428d7b3dSmrg
76428d7b3dSmrg		pthread_mutex_lock(&t->mutex);
77428d7b3dSmrg		t->arg = NULL;
78428d7b3dSmrg		t->func = NULL;
79428d7b3dSmrg		pthread_cond_signal(&t->cond);
80428d7b3dSmrg	}
81428d7b3dSmrg	pthread_mutex_unlock(&t->mutex);
82428d7b3dSmrg
83428d7b3dSmrg	return NULL;
84428d7b3dSmrg}
85428d7b3dSmrg
86428d7b3dSmrg#if defined(__GNUC__)
87428d7b3dSmrg#define popcount(x) __builtin_popcount(x)
88428d7b3dSmrg#else
89428d7b3dSmrgstatic int popcount(unsigned int x)
90428d7b3dSmrg{
91428d7b3dSmrg	int count = 0;
92428d7b3dSmrg
93428d7b3dSmrg	while (x) {
94428d7b3dSmrg		count += x&1;
95428d7b3dSmrg		x >>= 1;
96428d7b3dSmrg	}
97428d7b3dSmrg
98428d7b3dSmrg	return count;
99428d7b3dSmrg}
100428d7b3dSmrg#endif
101428d7b3dSmrg
102428d7b3dSmrgstatic int
103428d7b3dSmrgnum_cores(void)
104428d7b3dSmrg{
105428d7b3dSmrg	FILE *file = fopen("/proc/cpuinfo", "r");
106428d7b3dSmrg	int count = 0;
107428d7b3dSmrg	if (file) {
108428d7b3dSmrg		size_t len = 0;
109428d7b3dSmrg		char *line = NULL;
110428d7b3dSmrg		uint32_t processors = 0, cores = 0;
111428d7b3dSmrg		while (getline(&line, &len, file) != -1) {
112428d7b3dSmrg			int id;
113428d7b3dSmrg			if (sscanf(line, "physical id : %d", &id) == 1) {
114428d7b3dSmrg				if (id >= 32)
115428d7b3dSmrg					continue;
116428d7b3dSmrg				processors |= 1 << id;
117428d7b3dSmrg			} else if (sscanf(line, "core id : %d", &id) == 1) {
118428d7b3dSmrg				if (id >= 32)
119428d7b3dSmrg					continue;
120428d7b3dSmrg				cores |= 1 << id;
121428d7b3dSmrg			}
122428d7b3dSmrg		}
123428d7b3dSmrg		free(line);
124428d7b3dSmrg		fclose(file);
125428d7b3dSmrg
126428d7b3dSmrg		DBG(("%s: processors=0x%08x, cores=0x%08x\n",
127428d7b3dSmrg		     __FUNCTION__, processors, cores));
128428d7b3dSmrg
129428d7b3dSmrg		count = popcount(processors) * popcount(cores);
130428d7b3dSmrg	}
131428d7b3dSmrg	return count;
132428d7b3dSmrg}
133428d7b3dSmrg
134428d7b3dSmrgvoid sna_threads_init(void)
135428d7b3dSmrg{
136428d7b3dSmrg	int n;
137428d7b3dSmrg
138428d7b3dSmrg	if (max_threads != -1)
139428d7b3dSmrg		return;
140428d7b3dSmrg
141428d7b3dSmrg	if (valgrind_active())
142428d7b3dSmrg		goto bail;
143428d7b3dSmrg
144428d7b3dSmrg	max_threads = num_cores();
145428d7b3dSmrg	if (max_threads == 0)
146428d7b3dSmrg		max_threads = sysconf(_SC_NPROCESSORS_ONLN) / 2;
147428d7b3dSmrg	if (max_threads <= 1)
148428d7b3dSmrg		goto bail;
149428d7b3dSmrg
150428d7b3dSmrg	DBG(("%s: creating a thread pool of %d threads\n",
151428d7b3dSmrg	     __func__, max_threads));
152428d7b3dSmrg
153428d7b3dSmrg	threads = malloc (sizeof(threads[0])*max_threads);
154428d7b3dSmrg	if (threads == NULL)
155428d7b3dSmrg		goto bail;
156428d7b3dSmrg
157428d7b3dSmrg	for (n = 1; n < max_threads; n++) {
158428d7b3dSmrg		pthread_mutex_init(&threads[n].mutex, NULL);
159428d7b3dSmrg		pthread_cond_init(&threads[n].cond, NULL);
160428d7b3dSmrg
161428d7b3dSmrg		threads[n].func = NULL;
162428d7b3dSmrg		threads[n].arg = NULL;
163428d7b3dSmrg		if (pthread_create(&threads[n].thread, NULL,
164428d7b3dSmrg				   __run__, &threads[n]))
165428d7b3dSmrg			goto bail;
166428d7b3dSmrg	}
167428d7b3dSmrg
168428d7b3dSmrg	threads[0].thread = pthread_self();
169428d7b3dSmrg	return;
170428d7b3dSmrg
171428d7b3dSmrgbail:
172428d7b3dSmrg	max_threads = 0;
173428d7b3dSmrg}
174428d7b3dSmrg
175428d7b3dSmrgvoid sna_threads_run(int id, void (*func)(void *arg), void *arg)
176428d7b3dSmrg{
177428d7b3dSmrg	assert(max_threads > 0);
178428d7b3dSmrg	assert(pthread_self() == threads[0].thread);
179428d7b3dSmrg	assert(id > 0 && id < max_threads);
180428d7b3dSmrg
181428d7b3dSmrg	assert(threads[id].func == NULL);
182428d7b3dSmrg
183428d7b3dSmrg	pthread_mutex_lock(&threads[id].mutex);
184428d7b3dSmrg	threads[id].func = func;
185428d7b3dSmrg	threads[id].arg = arg;
186428d7b3dSmrg	pthread_cond_signal(&threads[id].cond);
187428d7b3dSmrg	pthread_mutex_unlock(&threads[id].mutex);
188428d7b3dSmrg}
189428d7b3dSmrg
190428d7b3dSmrgvoid sna_threads_trap(int sig)
191428d7b3dSmrg{
192428d7b3dSmrg	pthread_t t = pthread_self();
193428d7b3dSmrg	int n;
194428d7b3dSmrg
195428d7b3dSmrg	if (max_threads == 0)
196428d7b3dSmrg		return;
197428d7b3dSmrg
198428d7b3dSmrg	if (t == threads[0].thread)
199428d7b3dSmrg		return;
200428d7b3dSmrg
201428d7b3dSmrg	for (n = 1; threads[n].thread != t; n++)
202428d7b3dSmrg		;
203428d7b3dSmrg
204428d7b3dSmrg	ERR(("%s: thread[%d] caught signal %d\n", __func__, n, sig));
205428d7b3dSmrg
206428d7b3dSmrg	pthread_mutex_lock(&threads[n].mutex);
207428d7b3dSmrg	threads[n].arg = (void *)(intptr_t)sig;
208428d7b3dSmrg	threads[n].func = NULL;
209428d7b3dSmrg	pthread_cond_signal(&threads[n].cond);
210428d7b3dSmrg	pthread_mutex_unlock(&threads[n].mutex);
211428d7b3dSmrg
212428d7b3dSmrg	pthread_exit(&sig);
213428d7b3dSmrg}
214428d7b3dSmrg
215428d7b3dSmrgvoid sna_threads_wait(void)
216428d7b3dSmrg{
217428d7b3dSmrg	int n;
218428d7b3dSmrg
219428d7b3dSmrg	assert(max_threads > 0);
220428d7b3dSmrg	assert(pthread_self() == threads[0].thread);
221428d7b3dSmrg
222428d7b3dSmrg	for (n = 1; n < max_threads; n++) {
223428d7b3dSmrg		if (threads[n].func != NULL) {
224428d7b3dSmrg			pthread_mutex_lock(&threads[n].mutex);
225428d7b3dSmrg			while (threads[n].func)
226428d7b3dSmrg				pthread_cond_wait(&threads[n].cond, &threads[n].mutex);
227428d7b3dSmrg			pthread_mutex_unlock(&threads[n].mutex);
228428d7b3dSmrg		}
229428d7b3dSmrg
230428d7b3dSmrg		if (threads[n].arg != NULL) {
231428d7b3dSmrg			DBG(("%s: thread[%d] died from signal %d\n", __func__, n, (int)(intptr_t)threads[n].arg));
232428d7b3dSmrg			sna_threads_kill();
233428d7b3dSmrg			return;
234428d7b3dSmrg		}
235428d7b3dSmrg	}
236428d7b3dSmrg}
237428d7b3dSmrg
238428d7b3dSmrgvoid sna_threads_kill(void)
239428d7b3dSmrg{
240428d7b3dSmrg	int n;
241428d7b3dSmrg
242428d7b3dSmrg	ERR(("%s: kill %d threads\n", __func__, max_threads));
243428d7b3dSmrg	assert(max_threads > 0);
244428d7b3dSmrg	assert(pthread_self() == threads[0].thread);
245428d7b3dSmrg
246428d7b3dSmrg	for (n = 1; n < max_threads; n++)
247428d7b3dSmrg		pthread_cancel(threads[n].thread);
248428d7b3dSmrg
249428d7b3dSmrg	for (n = 1; n < max_threads; n++)
250428d7b3dSmrg		pthread_join(threads[n].thread, NULL);
251428d7b3dSmrg
252428d7b3dSmrg	max_threads = 0;
253428d7b3dSmrg}
254428d7b3dSmrg
255428d7b3dSmrgint sna_use_threads(int width, int height, int threshold)
256428d7b3dSmrg{
257428d7b3dSmrg	int num_threads;
258428d7b3dSmrg
259428d7b3dSmrg	if (max_threads <= 0)
260428d7b3dSmrg		return 1;
261428d7b3dSmrg
262428d7b3dSmrg	if (height <= 1)
263428d7b3dSmrg		return 1;
264428d7b3dSmrg
265428d7b3dSmrg	if (width < 128)
266428d7b3dSmrg		height /= 128/width;
267428d7b3dSmrg
268428d7b3dSmrg	num_threads = height * max_threads / threshold - 1;
269428d7b3dSmrg	if (num_threads <= 0)
270428d7b3dSmrg		return 1;
271428d7b3dSmrg
272428d7b3dSmrg	if (num_threads > max_threads)
273428d7b3dSmrg		num_threads = max_threads;
274428d7b3dSmrg	if (num_threads > height)
275428d7b3dSmrg		num_threads = height;
276428d7b3dSmrg
277428d7b3dSmrg	return num_threads;
278428d7b3dSmrg}
279428d7b3dSmrg
280428d7b3dSmrgstruct thread_composite {
281428d7b3dSmrg	pixman_image_t *src, *mask, *dst;
282428d7b3dSmrg	pixman_op_t op;
283428d7b3dSmrg	int16_t src_x, src_y;
284428d7b3dSmrg	int16_t mask_x, mask_y;
285428d7b3dSmrg	int16_t dst_x, dst_y;
286428d7b3dSmrg	uint16_t width, height;
287428d7b3dSmrg};
288428d7b3dSmrg
289428d7b3dSmrgstatic void thread_composite(void *arg)
290428d7b3dSmrg{
291428d7b3dSmrg	struct thread_composite *t = arg;
292428d7b3dSmrg	pixman_image_composite(t->op, t->src, t->mask, t->dst,
293428d7b3dSmrg			       t->src_x, t->src_y,
294428d7b3dSmrg			       t->mask_x, t->mask_y,
295428d7b3dSmrg			       t->dst_x, t->dst_y,
296428d7b3dSmrg			       t->width, t->height);
297428d7b3dSmrg}
298428d7b3dSmrg
299428d7b3dSmrgvoid sna_image_composite(pixman_op_t        op,
300428d7b3dSmrg			 pixman_image_t    *src,
301428d7b3dSmrg			 pixman_image_t    *mask,
302428d7b3dSmrg			 pixman_image_t    *dst,
303428d7b3dSmrg			 int16_t            src_x,
304428d7b3dSmrg			 int16_t            src_y,
305428d7b3dSmrg			 int16_t            mask_x,
306428d7b3dSmrg			 int16_t            mask_y,
307428d7b3dSmrg			 int16_t            dst_x,
308428d7b3dSmrg			 int16_t            dst_y,
309428d7b3dSmrg			 uint16_t           width,
310428d7b3dSmrg			 uint16_t           height)
311428d7b3dSmrg{
312428d7b3dSmrg	int num_threads;
313428d7b3dSmrg
314428d7b3dSmrg	num_threads = sna_use_threads(width, height, 32);
315428d7b3dSmrg	if (num_threads <= 1) {
316428d7b3dSmrg		if (sigtrap_get() == 0) {
317428d7b3dSmrg			pixman_image_composite(op, src, mask, dst,
318428d7b3dSmrg					       src_x, src_y,
319428d7b3dSmrg					       mask_x, mask_y,
320428d7b3dSmrg					       dst_x, dst_y,
321428d7b3dSmrg					       width, height);
322428d7b3dSmrg			sigtrap_put();
323428d7b3dSmrg		}
324428d7b3dSmrg	} else {
325428d7b3dSmrg		struct thread_composite data[num_threads];
326428d7b3dSmrg		int y, dy, n;
327428d7b3dSmrg
328428d7b3dSmrg		DBG(("%s: using %d threads for compositing %dx%d\n",
329428d7b3dSmrg		     __FUNCTION__, num_threads, width, height));
330428d7b3dSmrg
331428d7b3dSmrg		y = dst_y;
332428d7b3dSmrg		dy = (height + num_threads - 1) / num_threads;
333428d7b3dSmrg		num_threads -= (num_threads-1) * dy >= height;
334428d7b3dSmrg
335428d7b3dSmrg		data[0].op = op;
336428d7b3dSmrg		data[0].src = src;
337428d7b3dSmrg		data[0].mask = mask;
338428d7b3dSmrg		data[0].dst = dst;
339428d7b3dSmrg		data[0].src_x = src_x;
340428d7b3dSmrg		data[0].src_y = src_y;
341428d7b3dSmrg		data[0].mask_x = mask_x;
342428d7b3dSmrg		data[0].mask_y = mask_y;
343428d7b3dSmrg		data[0].dst_x = dst_x;
344428d7b3dSmrg		data[0].dst_y = y;
345428d7b3dSmrg		data[0].width = width;
346428d7b3dSmrg		data[0].height = dy;
347428d7b3dSmrg
348428d7b3dSmrg		if (sigtrap_get() == 0) {
349428d7b3dSmrg			for (n = 1; n < num_threads; n++) {
350428d7b3dSmrg				data[n] = data[0];
351428d7b3dSmrg				data[n].src_y += y - dst_y;
352428d7b3dSmrg				data[n].mask_y += y - dst_y;
353428d7b3dSmrg				data[n].dst_y = y;
354428d7b3dSmrg				y += dy;
355428d7b3dSmrg
356428d7b3dSmrg				sna_threads_run(n, thread_composite, &data[n]);
357428d7b3dSmrg			}
358428d7b3dSmrg
359428d7b3dSmrg			assert(y < dst_y + height);
360428d7b3dSmrg			if (y + dy > dst_y + height)
361428d7b3dSmrg				dy = dst_y + height - y;
362428d7b3dSmrg
363428d7b3dSmrg			data[0].src_y += y - dst_y;
364428d7b3dSmrg			data[0].mask_y += y - dst_y;
365428d7b3dSmrg			data[0].dst_y = y;
366428d7b3dSmrg			data[0].height = dy;
367428d7b3dSmrg
368428d7b3dSmrg			thread_composite(&data[0]);
369428d7b3dSmrg
370428d7b3dSmrg			sna_threads_wait();
371428d7b3dSmrg			sigtrap_put();
372428d7b3dSmrg		} else
373428d7b3dSmrg			sna_threads_kill();
374428d7b3dSmrg	}
375428d7b3dSmrg}
376