Home | History | Annotate | Line # | Download | only in benchmark
      1 // SPDX-FileCopyrightText: 2010 Mathieu Desnoyers <mathieu.desnoyers (at) efficios.com>
      2 // SPDX-FileCopyrightText: 2010 Paolo Bonzini <pbonzini (at) redhat.com>
      3 //
      4 // SPDX-License-Identifier: GPL-2.0-or-later
      5 
      6 /*
      7  * Userspace RCU library - example RCU-based lock-free stack
      8  */
      9 
     10 #include <stdio.h>
     11 #include <pthread.h>
     12 #include <stdlib.h>
     13 #include <stdint.h>
     14 #include <stdbool.h>
     15 #include <string.h>
     16 #include <sys/types.h>
     17 #include <sys/wait.h>
     18 #include <unistd.h>
     19 #include <stdio.h>
     20 #include <errno.h>
     21 
     22 #include <urcu/arch.h>
     23 #include <urcu/assert.h>
     24 #include <urcu/tls-compat.h>
     25 #include "thread-id.h"
     26 
     27 /* hardcoded number of CPUs */
     28 #define NR_CPUS 16384
     29 
     30 #ifndef DYNAMIC_LINK_TEST
     31 #define _LGPL_SOURCE
     32 #endif
     33 #include <urcu.h>
     34 
     35 /* Remove deprecation warnings from test build. */
     36 #define CDS_LFS_RCU_DEPRECATED
     37 
     38 #include <urcu/cds.h>
     39 
     40 static unsigned long rduration;
     41 
     42 static unsigned long duration;
     43 
     44 /* read-side C.S. duration, in loops */
     45 static unsigned long wdelay;
     46 
     47 static inline void loop_sleep(unsigned long loops)
     48 {
     49 	while (loops-- != 0)
     50 		caa_cpu_relax();
     51 }
     52 
     53 static int verbose_mode;
     54 
     55 #define printf_verbose(fmt, args...)		\
     56 	do {					\
     57 		if (verbose_mode)		\
     58 			printf(fmt, args);	\
     59 	} while (0)
     60 
     61 static unsigned int cpu_affinities[NR_CPUS];
     62 static unsigned int next_aff = 0;
     63 static int use_affinity = 0;
     64 
     65 pthread_mutex_t affinity_mutex = PTHREAD_MUTEX_INITIALIZER;
     66 
     67 static void set_affinity(void)
     68 {
     69 #ifdef HAVE_SCHED_SETAFFINITY
     70 	cpu_set_t mask;
     71 	int cpu, ret;
     72 #endif /* HAVE_SCHED_SETAFFINITY */
     73 
     74 	if (!use_affinity)
     75 		return;
     76 
     77 #ifdef HAVE_SCHED_SETAFFINITY
     78 	ret = pthread_mutex_lock(&affinity_mutex);
     79 	if (ret) {
     80 		perror("Error in pthread mutex lock");
     81 		exit(-1);
     82 	}
     83 	cpu = cpu_affinities[next_aff++];
     84 	ret = pthread_mutex_unlock(&affinity_mutex);
     85 	if (ret) {
     86 		perror("Error in pthread mutex unlock");
     87 		exit(-1);
     88 	}
     89 
     90 	CPU_ZERO(&mask);
     91 	CPU_SET(cpu, &mask);
     92 	sched_setaffinity(0, sizeof(mask), &mask);
     93 #endif /* HAVE_SCHED_SETAFFINITY */
     94 }
     95 
     96 /*
     97  * returns 0 if test should end.
     98  */
     99 static int test_duration_dequeue(void)
    100 {
    101 	return test_duration_read();
    102 }
    103 
    104 static int test_duration_enqueue(void)
    105 {
    106 	return test_duration_write();
    107 }
    108 
    109 static DEFINE_URCU_TLS(unsigned long long, nr_dequeues);
    110 static DEFINE_URCU_TLS(unsigned long long, nr_enqueues);
    111 
    112 static DEFINE_URCU_TLS(unsigned long long, nr_successful_dequeues);
    113 static DEFINE_URCU_TLS(unsigned long long, nr_successful_enqueues);
    114 
    115 static unsigned int nr_enqueuers;
    116 static unsigned int nr_dequeuers;
    117 
    118 struct test {
    119 	struct cds_lfs_node_rcu list;
    120 	struct rcu_head rcu;
    121 };
    122 
    123 static struct cds_lfs_stack_rcu s;
    124 
    125 static
    126 void *thr_enqueuer(void *_count)
    127 {
    128 	unsigned long long *count = _count;
    129 
    130 	printf_verbose("thread_begin %s, tid %lu\n",
    131 			"enqueuer", urcu_get_thread_id());
    132 
    133 	set_affinity();
    134 
    135 	rcu_register_thread();
    136 
    137 	wait_until_go();
    138 
    139 	for (;;) {
    140 		struct test *node = malloc(sizeof(*node));
    141 		if (!node)
    142 			goto fail;
    143 		cds_lfs_node_init_rcu(&node->list);
    144 		/* No rcu read-side is needed for push */
    145 		cds_lfs_push_rcu(&s, &node->list);
    146 		URCU_TLS(nr_successful_enqueues)++;
    147 
    148 		if (caa_unlikely(wdelay))
    149 			loop_sleep(wdelay);
    150 fail:
    151 		URCU_TLS(nr_enqueues)++;
    152 		if (caa_unlikely(!test_duration_enqueue()))
    153 			break;
    154 	}
    155 
    156 	rcu_unregister_thread();
    157 
    158 	count[0] = URCU_TLS(nr_enqueues);
    159 	count[1] = URCU_TLS(nr_successful_enqueues);
    160 	printf_verbose("enqueuer thread_end, tid %lu, "
    161 			"enqueues %llu successful_enqueues %llu\n",
    162 			urcu_get_thread_id(),
    163 			URCU_TLS(nr_enqueues),
    164 			URCU_TLS(nr_successful_enqueues));
    165 	return ((void*)1);
    166 
    167 }
    168 
    169 static
    170 void free_node_cb(struct rcu_head *head)
    171 {
    172 	struct test *node =
    173 		caa_container_of(head, struct test, rcu);
    174 	free(node);
    175 }
    176 
    177 static
    178 void *thr_dequeuer(void *_count)
    179 {
    180 	unsigned long long *count = _count;
    181 
    182 	printf_verbose("thread_begin %s, tid %lu\n",
    183 			"dequeuer", urcu_get_thread_id());
    184 
    185 	set_affinity();
    186 
    187 	rcu_register_thread();
    188 
    189 	wait_until_go();
    190 
    191 	for (;;) {
    192 		struct cds_lfs_node_rcu *snode;
    193 
    194 		rcu_read_lock();
    195 		snode = cds_lfs_pop_rcu(&s);
    196 		rcu_read_unlock();
    197 		if (snode) {
    198 			struct test *node;
    199 
    200 			node = caa_container_of(snode, struct test, list);
    201 			call_rcu(&node->rcu, free_node_cb);
    202 			URCU_TLS(nr_successful_dequeues)++;
    203 		}
    204 		URCU_TLS(nr_dequeues)++;
    205 		if (caa_unlikely(!test_duration_dequeue()))
    206 			break;
    207 		if (caa_unlikely(rduration))
    208 			loop_sleep(rduration);
    209 	}
    210 
    211 	rcu_unregister_thread();
    212 
    213 	printf_verbose("dequeuer thread_end, tid %lu, "
    214 			"dequeues %llu, successful_dequeues %llu\n",
    215 			urcu_get_thread_id(),
    216 			URCU_TLS(nr_dequeues),
    217 			URCU_TLS(nr_successful_dequeues));
    218 	count[0] = URCU_TLS(nr_dequeues);
    219 	count[1] = URCU_TLS(nr_successful_dequeues);
    220 	return ((void*)2);
    221 }
    222 
    223 static
    224 void test_end(unsigned long long *nr_dequeues_l)
    225 {
    226 	struct cds_lfs_node_rcu *snode;
    227 
    228 	do {
    229 		snode = cds_lfs_pop_rcu(&s);
    230 		if (snode) {
    231 			struct test *node;
    232 
    233 			node = caa_container_of(snode, struct test, list);
    234 			free(node);
    235 			(*nr_dequeues_l)++;
    236 		}
    237 	} while (snode);
    238 }
    239 
    240 static
    241 void show_usage(char **argv)
    242 {
    243 	printf("Usage : %s nr_dequeuers nr_enqueuers duration (s) <OPTIONS>\n",
    244 		argv[0]);
    245 	printf("OPTIONS:\n");
    246 	printf("	[-d delay] (enqueuer period (in loops))\n");
    247 	printf("	[-c duration] (dequeuer period (in loops))\n");
    248 	printf("	[-v] (verbose output)\n");
    249 	printf("	[-a cpu#] [-a cpu#]... (affinity)\n");
    250 	printf("\n");
    251 }
    252 
    253 int main(int argc, char **argv)
    254 {
    255 	int err;
    256 	pthread_t *tid_enqueuer, *tid_dequeuer;
    257 	void *tret;
    258 	unsigned long long *count_enqueuer, *count_dequeuer;
    259 	unsigned long long tot_enqueues = 0, tot_dequeues = 0;
    260 	unsigned long long tot_successful_enqueues = 0,
    261 			   tot_successful_dequeues = 0;
    262 	unsigned long long end_dequeues = 0;
    263 	int i, a;
    264 	unsigned int i_thr;
    265 
    266 	if (argc < 4) {
    267 		show_usage(argv);
    268 		return -1;
    269 	}
    270 
    271 	err = sscanf(argv[1], "%u", &nr_dequeuers);
    272 	if (err != 1) {
    273 		show_usage(argv);
    274 		return -1;
    275 	}
    276 
    277 	err = sscanf(argv[2], "%u", &nr_enqueuers);
    278 	if (err != 1) {
    279 		show_usage(argv);
    280 		return -1;
    281 	}
    282 
    283 	err = sscanf(argv[3], "%lu", &duration);
    284 	if (err != 1) {
    285 		show_usage(argv);
    286 		return -1;
    287 	}
    288 
    289 	for (i = 4; i < argc; i++) {
    290 		if (argv[i][0] != '-')
    291 			continue;
    292 		switch (argv[i][1]) {
    293 		case 'a':
    294 			if (argc < i + 2) {
    295 				show_usage(argv);
    296 				return -1;
    297 			}
    298 			a = atoi(argv[++i]);
    299 			cpu_affinities[next_aff++] = a;
    300 			use_affinity = 1;
    301 			printf_verbose("Adding CPU %d affinity\n", a);
    302 			break;
    303 		case 'c':
    304 			if (argc < i + 2) {
    305 				show_usage(argv);
    306 				return -1;
    307 			}
    308 			rduration = atol(argv[++i]);
    309 			break;
    310 		case 'd':
    311 			if (argc < i + 2) {
    312 				show_usage(argv);
    313 				return -1;
    314 			}
    315 			wdelay = atol(argv[++i]);
    316 			break;
    317 		case 'v':
    318 			verbose_mode = 1;
    319 			break;
    320 		}
    321 	}
    322 
    323 	printf_verbose("running test for %lu seconds, %u enqueuers, "
    324 		       "%u dequeuers.\n",
    325 		       duration, nr_enqueuers, nr_dequeuers);
    326 	printf_verbose("Writer delay : %lu loops.\n", rduration);
    327 	printf_verbose("Reader duration : %lu loops.\n", wdelay);
    328 	printf_verbose("thread %-6s, tid %lu\n",
    329 			"main", urcu_get_thread_id());
    330 
    331 	tid_enqueuer = calloc(nr_enqueuers, sizeof(*tid_enqueuer));
    332 	tid_dequeuer = calloc(nr_dequeuers, sizeof(*tid_dequeuer));
    333 	count_enqueuer = calloc(nr_enqueuers, 2 * sizeof(*count_enqueuer));
    334 	count_dequeuer = calloc(nr_dequeuers, 2 * sizeof(*count_dequeuer));
    335 	cds_lfs_init_rcu(&s);
    336 	err = create_all_cpu_call_rcu_data(0);
    337 	if (err) {
    338 		printf("Per-CPU call_rcu() worker threads unavailable. Using default global worker thread.\n");
    339 	}
    340 
    341 	next_aff = 0;
    342 
    343 	for (i_thr = 0; i_thr < nr_enqueuers; i_thr++) {
    344 		err = pthread_create(&tid_enqueuer[i_thr], NULL, thr_enqueuer,
    345 				     &count_enqueuer[2 * i_thr]);
    346 		if (err != 0)
    347 			exit(1);
    348 	}
    349 	for (i_thr = 0; i_thr < nr_dequeuers; i_thr++) {
    350 		err = pthread_create(&tid_dequeuer[i_thr], NULL, thr_dequeuer,
    351 				     &count_dequeuer[2 * i_thr]);
    352 		if (err != 0)
    353 			exit(1);
    354 	}
    355 
    356 	cmm_smp_mb();
    357 
    358 	begin_test();
    359 
    360 	for (i_thr = 0; i_thr < duration; i_thr++) {
    361 		sleep(1);
    362 		if (verbose_mode) {
    363 			fwrite(".", sizeof(char), 1, stdout);
    364 			fflush(stdout);
    365 		}
    366 	}
    367 
    368 	end_test();
    369 
    370 	for (i_thr = 0; i_thr < nr_enqueuers; i_thr++) {
    371 		err = pthread_join(tid_enqueuer[i_thr], &tret);
    372 		if (err != 0)
    373 			exit(1);
    374 		tot_enqueues += count_enqueuer[2 * i_thr];
    375 		tot_successful_enqueues += count_enqueuer[2 * i_thr + 1];
    376 	}
    377 	for (i_thr = 0; i_thr < nr_dequeuers; i_thr++) {
    378 		err = pthread_join(tid_dequeuer[i_thr], &tret);
    379 		if (err != 0)
    380 			exit(1);
    381 		tot_dequeues += count_dequeuer[2 * i_thr];
    382 		tot_successful_dequeues += count_dequeuer[2 * i_thr + 1];
    383 	}
    384 
    385 	test_end(&end_dequeues);
    386 
    387 	printf_verbose("total number of enqueues : %llu, dequeues %llu\n",
    388 		       tot_enqueues, tot_dequeues);
    389 	printf_verbose("total number of successful enqueues : %llu, "
    390 		       "successful dequeues %llu\n",
    391 		       tot_successful_enqueues, tot_successful_dequeues);
    392 	printf("SUMMARY %-25s testdur %4lu nr_enqueuers %3u wdelay %6lu "
    393 		"nr_dequeuers %3u "
    394 		"rdur %6lu nr_enqueues %12llu nr_dequeues %12llu "
    395 		"successful enqueues %12llu successful dequeues %12llu "
    396 		"end_dequeues %llu nr_ops %12llu\n",
    397 		argv[0], duration, nr_enqueuers, wdelay,
    398 		nr_dequeuers, rduration, tot_enqueues, tot_dequeues,
    399 		tot_successful_enqueues,
    400 		tot_successful_dequeues, end_dequeues,
    401 		tot_enqueues + tot_dequeues);
    402 	if (tot_successful_enqueues != tot_successful_dequeues + end_dequeues)
    403 		printf("WARNING! Discrepancy between nr succ. enqueues %llu vs "
    404 		       "succ. dequeues + end dequeues %llu.\n",
    405 		       tot_successful_enqueues,
    406 		       tot_successful_dequeues + end_dequeues);
    407 
    408 	free_all_cpu_call_rcu_data();
    409 	free(count_enqueuer);
    410 	free(count_dequeuer);
    411 	free(tid_enqueuer);
    412 	free(tid_dequeuer);
    413 	return 0;
    414 }
    415