1/*
2 * Copyright (C) 2018 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 *    Rob Clark <robclark@freedesktop.org>
25 */
26
27#include <assert.h>
28#include <inttypes.h>
29
30#include "util/hash_table.h"
31#include "util/slab.h"
32
33#include "drm/freedreno_ringbuffer.h"
34#include "msm_priv.h"
35
36/* A "softpin" implementation of submit/ringbuffer, which lowers CPU overhead
37 * by avoiding the additional tracking necessary to build cmds/relocs tables
38 * (but still builds a bos table)
39 */
40
41
42#define INIT_SIZE 0x1000
43
44static pthread_mutex_t idx_lock = PTHREAD_MUTEX_INITIALIZER;
45
46
47struct msm_submit_sp {
48	struct fd_submit base;
49
50	DECLARE_ARRAY(struct drm_msm_gem_submit_bo, submit_bos);
51	DECLARE_ARRAY(struct fd_bo *, bos);
52
53	unsigned seqno;
54
55	/* maps fd_bo to idx in bos table: */
56	struct hash_table *bo_table;
57
58	struct slab_mempool ring_pool;
59
60	struct fd_ringbuffer *primary;
61
62	/* Allow for sub-allocation of stateobj ring buffers (ie. sharing
63	 * the same underlying bo)..
64	 *
65	 * We also rely on previous stateobj having been fully constructed
66	 * so we can reclaim extra space at it's end.
67	 */
68	struct fd_ringbuffer *suballoc_ring;
69};
70FD_DEFINE_CAST(fd_submit, msm_submit_sp);
71
72/* for FD_RINGBUFFER_GROWABLE rb's, tracks the 'finalized' cmdstream buffers
73 * and sizes.  Ie. a finalized buffer can have no more commands appended to
74 * it.
75 */
76struct msm_cmd_sp {
77	struct fd_bo *ring_bo;
78	unsigned size;
79};
80
81/* for _FD_RINGBUFFER_OBJECT rb's we need to track the bo's and flags to
82 * later copy into the submit when the stateobj rb is later referenced by
83 * a regular rb:
84 */
85struct msm_reloc_bo_sp {
86	struct fd_bo *bo;
87	unsigned flags;
88};
89
90struct msm_ringbuffer_sp {
91	struct fd_ringbuffer base;
92
93	/* for FD_RINGBUFFER_STREAMING rb's which are sub-allocated */
94	unsigned offset;
95
96// TODO check disasm.. hopefully compilers CSE can realize that
97// reloc_bos and cmds are at the same offsets and optimize some
98// divergent cases into single case
99	union {
100		/* for _FD_RINGBUFFER_OBJECT case: */
101		struct {
102			struct fd_pipe *pipe;
103			DECLARE_ARRAY(struct msm_reloc_bo_sp, reloc_bos);
104		};
105		/* for other cases: */
106		struct {
107			struct fd_submit *submit;
108			DECLARE_ARRAY(struct msm_cmd_sp, cmds);
109		};
110	} u;
111
112	struct fd_bo *ring_bo;
113};
114FD_DEFINE_CAST(fd_ringbuffer, msm_ringbuffer_sp);
115
116static void finalize_current_cmd(struct fd_ringbuffer *ring);
117static struct fd_ringbuffer * msm_ringbuffer_sp_init(
118		struct msm_ringbuffer_sp *msm_ring,
119		uint32_t size, enum fd_ringbuffer_flags flags);
120
121/* add (if needed) bo to submit and return index: */
122static uint32_t
123append_bo(struct msm_submit_sp *submit, struct fd_bo *bo, uint32_t flags)
124{
125	struct msm_bo *msm_bo = to_msm_bo(bo);
126	uint32_t idx;
127	pthread_mutex_lock(&idx_lock);
128	if (likely(msm_bo->current_submit_seqno == submit->seqno)) {
129		idx = msm_bo->idx;
130	} else {
131		uint32_t hash = _mesa_hash_pointer(bo);
132		struct hash_entry *entry;
133
134		entry = _mesa_hash_table_search_pre_hashed(submit->bo_table, hash, bo);
135		if (entry) {
136			/* found */
137			idx = (uint32_t)(uintptr_t)entry->data;
138		} else {
139			idx = APPEND(submit, submit_bos);
140			idx = APPEND(submit, bos);
141
142			submit->submit_bos[idx].flags = 0;
143			submit->submit_bos[idx].handle = bo->handle;
144			submit->submit_bos[idx].presumed = 0;
145
146			submit->bos[idx] = fd_bo_ref(bo);
147
148			_mesa_hash_table_insert_pre_hashed(submit->bo_table, hash, bo,
149					(void *)(uintptr_t)idx);
150		}
151		msm_bo->current_submit_seqno = submit->seqno;
152		msm_bo->idx = idx;
153	}
154	pthread_mutex_unlock(&idx_lock);
155	if (flags & FD_RELOC_READ)
156		submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_READ;
157	if (flags & FD_RELOC_WRITE)
158		submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_WRITE;
159	if (flags & FD_RELOC_DUMP)
160		submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_DUMP;
161	return idx;
162}
163
164static void
165msm_submit_suballoc_ring_bo(struct fd_submit *submit,
166		struct msm_ringbuffer_sp *msm_ring, uint32_t size)
167{
168	struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
169	unsigned suballoc_offset = 0;
170	struct fd_bo *suballoc_bo = NULL;
171
172	if (msm_submit->suballoc_ring) {
173		struct msm_ringbuffer_sp *suballoc_ring =
174				to_msm_ringbuffer_sp(msm_submit->suballoc_ring);
175
176		suballoc_bo = suballoc_ring->ring_bo;
177		suballoc_offset = fd_ringbuffer_size(msm_submit->suballoc_ring) +
178				suballoc_ring->offset;
179
180		suballoc_offset = align(suballoc_offset, 0x10);
181
182		if ((size + suballoc_offset) > suballoc_bo->size) {
183			suballoc_bo = NULL;
184		}
185	}
186
187	if (!suballoc_bo) {
188		// TODO possibly larger size for streaming bo?
189		msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev,
190				0x8000, DRM_FREEDRENO_GEM_GPUREADONLY);
191		msm_ring->offset = 0;
192	} else {
193		msm_ring->ring_bo = fd_bo_ref(suballoc_bo);
194		msm_ring->offset = suballoc_offset;
195	}
196
197	struct fd_ringbuffer *old_suballoc_ring = msm_submit->suballoc_ring;
198
199	msm_submit->suballoc_ring = fd_ringbuffer_ref(&msm_ring->base);
200
201	if (old_suballoc_ring)
202		fd_ringbuffer_del(old_suballoc_ring);
203}
204
205static struct fd_ringbuffer *
206msm_submit_sp_new_ringbuffer(struct fd_submit *submit, uint32_t size,
207		enum fd_ringbuffer_flags flags)
208{
209	struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
210	struct msm_ringbuffer_sp *msm_ring;
211
212	msm_ring = slab_alloc_st(&msm_submit->ring_pool);
213
214	msm_ring->u.submit = submit;
215
216	/* NOTE: needs to be before _suballoc_ring_bo() since it could
217	 * increment the refcnt of the current ring
218	 */
219	msm_ring->base.refcnt = 1;
220
221	if (flags & FD_RINGBUFFER_STREAMING) {
222		msm_submit_suballoc_ring_bo(submit, msm_ring, size);
223	} else {
224		if (flags & FD_RINGBUFFER_GROWABLE)
225			size = INIT_SIZE;
226
227		msm_ring->offset = 0;
228		msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, size,
229				DRM_FREEDRENO_GEM_GPUREADONLY);
230	}
231
232	if (!msm_ringbuffer_sp_init(msm_ring, size, flags))
233		return NULL;
234
235	if (flags & FD_RINGBUFFER_PRIMARY) {
236		debug_assert(!msm_submit->primary);
237		msm_submit->primary = fd_ringbuffer_ref(&msm_ring->base);
238	}
239
240	return &msm_ring->base;
241}
242
243static int
244msm_submit_sp_flush(struct fd_submit *submit, int in_fence_fd,
245		int *out_fence_fd, uint32_t *out_fence)
246{
247	struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
248	struct msm_pipe *msm_pipe = to_msm_pipe(submit->pipe);
249	struct drm_msm_gem_submit req = {
250			.flags = msm_pipe->pipe,
251			.queueid = msm_pipe->queue_id,
252	};
253	int ret;
254
255	debug_assert(msm_submit->primary);
256	finalize_current_cmd(msm_submit->primary);
257
258	struct msm_ringbuffer_sp *primary = to_msm_ringbuffer_sp(msm_submit->primary);
259	struct drm_msm_gem_submit_cmd cmds[primary->u.nr_cmds];
260
261	for (unsigned i = 0; i < primary->u.nr_cmds; i++) {
262		cmds[i].type = MSM_SUBMIT_CMD_BUF;
263		cmds[i].submit_idx = append_bo(msm_submit,
264				primary->u.cmds[i].ring_bo, FD_RELOC_READ | FD_RELOC_DUMP);
265		cmds[i].submit_offset = primary->offset;
266		cmds[i].size = primary->u.cmds[i].size;
267		cmds[i].pad = 0;
268		cmds[i].nr_relocs = 0;
269	}
270
271	if (in_fence_fd != -1) {
272		req.flags |= MSM_SUBMIT_FENCE_FD_IN | MSM_SUBMIT_NO_IMPLICIT;
273		req.fence_fd = in_fence_fd;
274	}
275
276	if (out_fence_fd) {
277		req.flags |= MSM_SUBMIT_FENCE_FD_OUT;
278	}
279
280	/* needs to be after get_cmd() as that could create bos/cmds table: */
281	req.bos = VOID2U64(msm_submit->submit_bos),
282	req.nr_bos = msm_submit->nr_submit_bos;
283	req.cmds = VOID2U64(cmds),
284	req.nr_cmds = primary->u.nr_cmds;
285
286	DEBUG_MSG("nr_cmds=%u, nr_bos=%u", req.nr_cmds, req.nr_bos);
287
288	ret = drmCommandWriteRead(submit->pipe->dev->fd, DRM_MSM_GEM_SUBMIT,
289			&req, sizeof(req));
290	if (ret) {
291		ERROR_MSG("submit failed: %d (%s)", ret, strerror(errno));
292		msm_dump_submit(&req);
293	} else if (!ret) {
294		if (out_fence)
295			*out_fence = req.fence;
296
297		if (out_fence_fd)
298			*out_fence_fd = req.fence_fd;
299	}
300
301	return ret;
302}
303
304static void
305msm_submit_sp_destroy(struct fd_submit *submit)
306{
307	struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
308
309	if (msm_submit->primary)
310		fd_ringbuffer_del(msm_submit->primary);
311	if (msm_submit->suballoc_ring)
312		fd_ringbuffer_del(msm_submit->suballoc_ring);
313
314	_mesa_hash_table_destroy(msm_submit->bo_table, NULL);
315
316	// TODO it would be nice to have a way to debug_assert() if all
317	// rb's haven't been free'd back to the slab, because that is
318	// an indication that we are leaking bo's
319	slab_destroy(&msm_submit->ring_pool);
320
321	for (unsigned i = 0; i < msm_submit->nr_bos; i++)
322		fd_bo_del(msm_submit->bos[i]);
323
324	free(msm_submit->submit_bos);
325	free(msm_submit->bos);
326	free(msm_submit);
327}
328
329static const struct fd_submit_funcs submit_funcs = {
330		.new_ringbuffer = msm_submit_sp_new_ringbuffer,
331		.flush = msm_submit_sp_flush,
332		.destroy = msm_submit_sp_destroy,
333};
334
335struct fd_submit *
336msm_submit_sp_new(struct fd_pipe *pipe)
337{
338	struct msm_submit_sp *msm_submit = calloc(1, sizeof(*msm_submit));
339	struct fd_submit *submit;
340	static unsigned submit_cnt = 0;
341
342	msm_submit->seqno = ++submit_cnt;
343	msm_submit->bo_table = _mesa_hash_table_create(NULL,
344			_mesa_hash_pointer, _mesa_key_pointer_equal);
345	// TODO tune size:
346	slab_create(&msm_submit->ring_pool, sizeof(struct msm_ringbuffer_sp), 16);
347
348	submit = &msm_submit->base;
349	submit->pipe = pipe;
350	submit->funcs = &submit_funcs;
351
352	return submit;
353}
354
355
356static void
357finalize_current_cmd(struct fd_ringbuffer *ring)
358{
359	debug_assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
360
361	struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
362	unsigned idx = APPEND(&msm_ring->u, cmds);
363
364	msm_ring->u.cmds[idx].ring_bo = fd_bo_ref(msm_ring->ring_bo);
365	msm_ring->u.cmds[idx].size = offset_bytes(ring->cur, ring->start);
366}
367
368static void
369msm_ringbuffer_sp_grow(struct fd_ringbuffer *ring, uint32_t size)
370{
371	struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
372	struct fd_pipe *pipe = msm_ring->u.submit->pipe;
373
374	debug_assert(ring->flags & FD_RINGBUFFER_GROWABLE);
375
376	finalize_current_cmd(ring);
377
378	fd_bo_del(msm_ring->ring_bo);
379	msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size,
380			DRM_FREEDRENO_GEM_GPUREADONLY);
381
382	ring->start = fd_bo_map(msm_ring->ring_bo);
383	ring->end = &(ring->start[size/4]);
384	ring->cur = ring->start;
385	ring->size = size;
386}
387
388static void
389msm_ringbuffer_sp_emit_reloc(struct fd_ringbuffer *ring,
390		const struct fd_reloc *reloc)
391{
392	struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
393	struct fd_pipe *pipe;
394
395	if (ring->flags & _FD_RINGBUFFER_OBJECT) {
396		unsigned idx = APPEND(&msm_ring->u, reloc_bos);
397
398		msm_ring->u.reloc_bos[idx].bo = fd_bo_ref(reloc->bo);
399		msm_ring->u.reloc_bos[idx].flags = reloc->flags;
400
401		pipe = msm_ring->u.pipe;
402	} else {
403		struct msm_submit_sp *msm_submit =
404				to_msm_submit_sp(msm_ring->u.submit);
405
406		append_bo(msm_submit, reloc->bo, reloc->flags);
407
408		pipe = msm_ring->u.submit->pipe;
409	}
410
411	uint64_t iova = fd_bo_get_iova(reloc->bo) + reloc->offset;
412	uint32_t dword = iova;
413	int shift = reloc->shift;
414
415	if (shift < 0)
416		dword >>= -shift;
417	else
418		dword <<= shift;
419
420	(*ring->cur++) = dword | reloc->or;
421
422	if (pipe->gpu_id >= 500) {
423		dword = iova >> 32;
424		shift -= 32;
425
426		if (shift < 0)
427			dword >>= -shift;
428		else
429			dword <<= shift;
430
431		(*ring->cur++) = dword | reloc->orhi;
432	}
433}
434
435static uint32_t
436msm_ringbuffer_sp_emit_reloc_ring(struct fd_ringbuffer *ring,
437		struct fd_ringbuffer *target, uint32_t cmd_idx)
438{
439	struct msm_ringbuffer_sp *msm_target = to_msm_ringbuffer_sp(target);
440	struct fd_bo *bo;
441	uint32_t size;
442
443	if ((target->flags & FD_RINGBUFFER_GROWABLE) &&
444			(cmd_idx < msm_target->u.nr_cmds)) {
445		bo   = msm_target->u.cmds[cmd_idx].ring_bo;
446		size = msm_target->u.cmds[cmd_idx].size;
447	} else {
448		bo   = msm_target->ring_bo;
449		size = offset_bytes(target->cur, target->start);
450	}
451
452	msm_ringbuffer_sp_emit_reloc(ring, &(struct fd_reloc){
453		.bo     = bo,
454		.flags  = FD_RELOC_READ | FD_RELOC_DUMP,
455		.offset = msm_target->offset,
456	});
457
458	if (!(target->flags & _FD_RINGBUFFER_OBJECT))
459		return size;
460
461	struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
462
463	if (ring->flags & _FD_RINGBUFFER_OBJECT) {
464		for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) {
465			unsigned idx = APPEND(&msm_ring->u, reloc_bos);
466
467			msm_ring->u.reloc_bos[idx].bo =
468				fd_bo_ref(msm_target->u.reloc_bos[i].bo);
469			msm_ring->u.reloc_bos[idx].flags =
470				msm_target->u.reloc_bos[i].flags;
471		}
472	} else {
473		// TODO it would be nice to know whether we have already
474		// seen this target before.  But hopefully we hit the
475		// append_bo() fast path enough for this to not matter:
476		struct msm_submit_sp *msm_submit = to_msm_submit_sp(msm_ring->u.submit);
477
478		for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) {
479			append_bo(msm_submit, msm_target->u.reloc_bos[i].bo,
480					msm_target->u.reloc_bos[i].flags);
481		}
482	}
483
484	return size;
485}
486
487static uint32_t
488msm_ringbuffer_sp_cmd_count(struct fd_ringbuffer *ring)
489{
490	if (ring->flags & FD_RINGBUFFER_GROWABLE)
491		return to_msm_ringbuffer_sp(ring)->u.nr_cmds + 1;
492	return 1;
493}
494
495static void
496msm_ringbuffer_sp_destroy(struct fd_ringbuffer *ring)
497{
498	struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
499
500	fd_bo_del(msm_ring->ring_bo);
501
502	if (ring->flags & _FD_RINGBUFFER_OBJECT) {
503		for (unsigned i = 0; i < msm_ring->u.nr_reloc_bos; i++) {
504			fd_bo_del(msm_ring->u.reloc_bos[i].bo);
505		}
506
507		free(msm_ring);
508	} else {
509		struct fd_submit *submit = msm_ring->u.submit;
510
511		for (unsigned i = 0; i < msm_ring->u.nr_cmds; i++) {
512			fd_bo_del(msm_ring->u.cmds[i].ring_bo);
513		}
514
515		slab_free_st(&to_msm_submit_sp(submit)->ring_pool, msm_ring);
516	}
517}
518
519static const struct fd_ringbuffer_funcs ring_funcs = {
520		.grow = msm_ringbuffer_sp_grow,
521		.emit_reloc = msm_ringbuffer_sp_emit_reloc,
522		.emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring,
523		.cmd_count = msm_ringbuffer_sp_cmd_count,
524		.destroy = msm_ringbuffer_sp_destroy,
525};
526
527static inline struct fd_ringbuffer *
528msm_ringbuffer_sp_init(struct msm_ringbuffer_sp *msm_ring, uint32_t size,
529		enum fd_ringbuffer_flags flags)
530{
531	struct fd_ringbuffer *ring = &msm_ring->base;
532
533	debug_assert(msm_ring->ring_bo);
534
535	uint8_t *base = fd_bo_map(msm_ring->ring_bo);
536	ring->start = (void *)(base + msm_ring->offset);
537	ring->end = &(ring->start[size/4]);
538	ring->cur = ring->start;
539
540	ring->size = size;
541	ring->flags = flags;
542
543	ring->funcs = &ring_funcs;
544
545	// TODO initializing these could probably be conditional on flags
546	// since unneed for FD_RINGBUFFER_STAGING case..
547	msm_ring->u.cmds = NULL;
548	msm_ring->u.nr_cmds = msm_ring->u.max_cmds = 0;
549
550	msm_ring->u.reloc_bos = NULL;
551	msm_ring->u.nr_reloc_bos = msm_ring->u.max_reloc_bos = 0;
552
553	return ring;
554}
555
556struct fd_ringbuffer *
557msm_ringbuffer_sp_new_object(struct fd_pipe *pipe, uint32_t size)
558{
559	struct msm_ringbuffer_sp *msm_ring = malloc(sizeof(*msm_ring));
560
561	msm_ring->u.pipe = pipe;
562	msm_ring->offset = 0;
563	msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size,
564			DRM_FREEDRENO_GEM_GPUREADONLY);
565	msm_ring->base.refcnt = 1;
566
567	return msm_ringbuffer_sp_init(msm_ring, size, _FD_RINGBUFFER_OBJECT);
568}
569