Home | History | Annotate | Line # | Download | only in scheduler
sched_main.c revision 1.1.1.1
      1 /*	$NetBSD: sched_main.c,v 1.1.1.1 2021/12/18 20:15:53 riastradh Exp $	*/
      2 
      3 /*
      4  * Copyright 2015 Advanced Micro Devices, Inc.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the "Software"),
      8  * to deal in the Software without restriction, including without limitation
      9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10  * and/or sell copies of the Software, and to permit persons to whom the
     11  * Software is furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice shall be included in
     14  * all copies or substantial portions of the Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     22  * OTHER DEALINGS IN THE SOFTWARE.
     23  *
     24  */
     25 
     26 /**
     27  * DOC: Overview
     28  *
     29  * The GPU scheduler provides entities which allow userspace to push jobs
     30  * into software queues which are then scheduled on a hardware run queue.
     31  * The software queues have a priority among them. The scheduler selects the entities
     32  * from the run queue using a FIFO. The scheduler provides dependency handling
     33  * features among jobs. The driver is supposed to provide callback functions for
     34  * backend operations to the scheduler like submitting a job to hardware run queue,
     35  * returning the dependencies of a job etc.
     36  *
     37  * The organisation of the scheduler is the following:
     38  *
     39  * 1. Each hw run queue has one scheduler
     40  * 2. Each scheduler has multiple run queues with different priorities
     41  *    (e.g., HIGH_HW,HIGH_SW, KERNEL, NORMAL)
     42  * 3. Each scheduler run queue has a queue of entities to schedule
     43  * 4. Entities themselves maintain a queue of jobs that will be scheduled on
     44  *    the hardware.
     45  *
     46  * The jobs in a entity are always scheduled in the order that they were pushed.
     47  */
     48 
     49 #include <sys/cdefs.h>
     50 __KERNEL_RCSID(0, "$NetBSD: sched_main.c,v 1.1.1.1 2021/12/18 20:15:53 riastradh Exp $");
     51 
     52 #include <linux/kthread.h>
     53 #include <linux/wait.h>
     54 #include <linux/sched.h>
     55 #include <linux/completion.h>
     56 #include <uapi/linux/sched/types.h>
     57 
     58 #include <drm/drm_print.h>
     59 #include <drm/gpu_scheduler.h>
     60 #include <drm/spsc_queue.h>
     61 
     62 #define CREATE_TRACE_POINTS
     63 #include "gpu_scheduler_trace.h"
     64 
     65 #define to_drm_sched_job(sched_job)		\
     66 		container_of((sched_job), struct drm_sched_job, queue_node)
     67 
     68 static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb);
     69 
     70 /**
     71  * drm_sched_rq_init - initialize a given run queue struct
     72  *
     73  * @rq: scheduler run queue
     74  *
     75  * Initializes a scheduler runqueue.
     76  */
     77 static void drm_sched_rq_init(struct drm_gpu_scheduler *sched,
     78 			      struct drm_sched_rq *rq)
     79 {
     80 	spin_lock_init(&rq->lock);
     81 	INIT_LIST_HEAD(&rq->entities);
     82 	rq->current_entity = NULL;
     83 	rq->sched = sched;
     84 }
     85 
     86 /**
     87  * drm_sched_rq_add_entity - add an entity
     88  *
     89  * @rq: scheduler run queue
     90  * @entity: scheduler entity
     91  *
     92  * Adds a scheduler entity to the run queue.
     93  */
     94 void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
     95 			     struct drm_sched_entity *entity)
     96 {
     97 	if (!list_empty(&entity->list))
     98 		return;
     99 	spin_lock(&rq->lock);
    100 	atomic_inc(&rq->sched->score);
    101 	list_add_tail(&entity->list, &rq->entities);
    102 	spin_unlock(&rq->lock);
    103 }
    104 
    105 /**
    106  * drm_sched_rq_remove_entity - remove an entity
    107  *
    108  * @rq: scheduler run queue
    109  * @entity: scheduler entity
    110  *
    111  * Removes a scheduler entity from the run queue.
    112  */
    113 void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
    114 				struct drm_sched_entity *entity)
    115 {
    116 	if (list_empty(&entity->list))
    117 		return;
    118 	spin_lock(&rq->lock);
    119 	atomic_dec(&rq->sched->score);
    120 	list_del_init(&entity->list);
    121 	if (rq->current_entity == entity)
    122 		rq->current_entity = NULL;
    123 	spin_unlock(&rq->lock);
    124 }
    125 
    126 /**
    127  * drm_sched_rq_select_entity - Select an entity which could provide a job to run
    128  *
    129  * @rq: scheduler run queue to check.
    130  *
    131  * Try to find a ready entity, returns NULL if none found.
    132  */
    133 static struct drm_sched_entity *
    134 drm_sched_rq_select_entity(struct drm_sched_rq *rq)
    135 {
    136 	struct drm_sched_entity *entity;
    137 
    138 	spin_lock(&rq->lock);
    139 
    140 	entity = rq->current_entity;
    141 	if (entity) {
    142 		list_for_each_entry_continue(entity, &rq->entities, list) {
    143 			if (drm_sched_entity_is_ready(entity)) {
    144 				rq->current_entity = entity;
    145 				reinit_completion(&entity->entity_idle);
    146 				spin_unlock(&rq->lock);
    147 				return entity;
    148 			}
    149 		}
    150 	}
    151 
    152 	list_for_each_entry(entity, &rq->entities, list) {
    153 
    154 		if (drm_sched_entity_is_ready(entity)) {
    155 			rq->current_entity = entity;
    156 			reinit_completion(&entity->entity_idle);
    157 			spin_unlock(&rq->lock);
    158 			return entity;
    159 		}
    160 
    161 		if (entity == rq->current_entity)
    162 			break;
    163 	}
    164 
    165 	spin_unlock(&rq->lock);
    166 
    167 	return NULL;
    168 }
    169 
    170 /**
    171  * drm_sched_dependency_optimized
    172  *
    173  * @fence: the dependency fence
    174  * @entity: the entity which depends on the above fence
    175  *
    176  * Returns true if the dependency can be optimized and false otherwise
    177  */
    178 bool drm_sched_dependency_optimized(struct dma_fence* fence,
    179 				    struct drm_sched_entity *entity)
    180 {
    181 	struct drm_gpu_scheduler *sched = entity->rq->sched;
    182 	struct drm_sched_fence *s_fence;
    183 
    184 	if (!fence || dma_fence_is_signaled(fence))
    185 		return false;
    186 	if (fence->context == entity->fence_context)
    187 		return true;
    188 	s_fence = to_drm_sched_fence(fence);
    189 	if (s_fence && s_fence->sched == sched)
    190 		return true;
    191 
    192 	return false;
    193 }
    194 EXPORT_SYMBOL(drm_sched_dependency_optimized);
    195 
    196 /**
    197  * drm_sched_start_timeout - start timeout for reset worker
    198  *
    199  * @sched: scheduler instance to start the worker for
    200  *
    201  * Start the timeout for the given scheduler.
    202  */
    203 static void drm_sched_start_timeout(struct drm_gpu_scheduler *sched)
    204 {
    205 	if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
    206 	    !list_empty(&sched->ring_mirror_list))
    207 		schedule_delayed_work(&sched->work_tdr, sched->timeout);
    208 }
    209 
    210 /**
    211  * drm_sched_fault - immediately start timeout handler
    212  *
    213  * @sched: scheduler where the timeout handling should be started.
    214  *
    215  * Start timeout handling immediately when the driver detects a hardware fault.
    216  */
    217 void drm_sched_fault(struct drm_gpu_scheduler *sched)
    218 {
    219 	mod_delayed_work(system_wq, &sched->work_tdr, 0);
    220 }
    221 EXPORT_SYMBOL(drm_sched_fault);
    222 
    223 /**
    224  * drm_sched_suspend_timeout - Suspend scheduler job timeout
    225  *
    226  * @sched: scheduler instance for which to suspend the timeout
    227  *
    228  * Suspend the delayed work timeout for the scheduler. This is done by
    229  * modifying the delayed work timeout to an arbitrary large value,
    230  * MAX_SCHEDULE_TIMEOUT in this case. Note that this function can be
    231  * called from an IRQ context.
    232  *
    233  * Returns the timeout remaining
    234  *
    235  */
    236 unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched)
    237 {
    238 	unsigned long sched_timeout, now = jiffies;
    239 
    240 	sched_timeout = sched->work_tdr.timer.expires;
    241 
    242 	/*
    243 	 * Modify the timeout to an arbitrarily large value. This also prevents
    244 	 * the timeout to be restarted when new submissions arrive
    245 	 */
    246 	if (mod_delayed_work(system_wq, &sched->work_tdr, MAX_SCHEDULE_TIMEOUT)
    247 			&& time_after(sched_timeout, now))
    248 		return sched_timeout - now;
    249 	else
    250 		return sched->timeout;
    251 }
    252 EXPORT_SYMBOL(drm_sched_suspend_timeout);
    253 
    254 /**
    255  * drm_sched_resume_timeout - Resume scheduler job timeout
    256  *
    257  * @sched: scheduler instance for which to resume the timeout
    258  * @remaining: remaining timeout
    259  *
    260  * Resume the delayed work timeout for the scheduler. Note that
    261  * this function can be called from an IRQ context.
    262  */
    263 void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched,
    264 		unsigned long remaining)
    265 {
    266 	unsigned long flags;
    267 
    268 	spin_lock_irqsave(&sched->job_list_lock, flags);
    269 
    270 	if (list_empty(&sched->ring_mirror_list))
    271 		cancel_delayed_work(&sched->work_tdr);
    272 	else
    273 		mod_delayed_work(system_wq, &sched->work_tdr, remaining);
    274 
    275 	spin_unlock_irqrestore(&sched->job_list_lock, flags);
    276 }
    277 EXPORT_SYMBOL(drm_sched_resume_timeout);
    278 
    279 static void drm_sched_job_begin(struct drm_sched_job *s_job)
    280 {
    281 	struct drm_gpu_scheduler *sched = s_job->sched;
    282 	unsigned long flags;
    283 
    284 	spin_lock_irqsave(&sched->job_list_lock, flags);
    285 	list_add_tail(&s_job->node, &sched->ring_mirror_list);
    286 	drm_sched_start_timeout(sched);
    287 	spin_unlock_irqrestore(&sched->job_list_lock, flags);
    288 }
    289 
    290 static void drm_sched_job_timedout(struct work_struct *work)
    291 {
    292 	struct drm_gpu_scheduler *sched;
    293 	struct drm_sched_job *job;
    294 	unsigned long flags;
    295 
    296 	sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
    297 
    298 	/* Protects against concurrent deletion in drm_sched_get_cleanup_job */
    299 	spin_lock_irqsave(&sched->job_list_lock, flags);
    300 	job = list_first_entry_or_null(&sched->ring_mirror_list,
    301 				       struct drm_sched_job, node);
    302 
    303 	if (job) {
    304 		/*
    305 		 * Remove the bad job so it cannot be freed by concurrent
    306 		 * drm_sched_cleanup_jobs. It will be reinserted back after sched->thread
    307 		 * is parked at which point it's safe.
    308 		 */
    309 		list_del_init(&job->node);
    310 		spin_unlock_irqrestore(&sched->job_list_lock, flags);
    311 
    312 		job->sched->ops->timedout_job(job);
    313 
    314 		/*
    315 		 * Guilty job did complete and hence needs to be manually removed
    316 		 * See drm_sched_stop doc.
    317 		 */
    318 		if (sched->free_guilty) {
    319 			job->sched->ops->free_job(job);
    320 			sched->free_guilty = false;
    321 		}
    322 	} else {
    323 		spin_unlock_irqrestore(&sched->job_list_lock, flags);
    324 	}
    325 
    326 	spin_lock_irqsave(&sched->job_list_lock, flags);
    327 	drm_sched_start_timeout(sched);
    328 	spin_unlock_irqrestore(&sched->job_list_lock, flags);
    329 }
    330 
    331  /**
    332   * drm_sched_increase_karma - Update sched_entity guilty flag
    333   *
    334   * @bad: The job guilty of time out
    335   *
    336   * Increment on every hang caused by the 'bad' job. If this exceeds the hang
    337   * limit of the scheduler then the respective sched entity is marked guilty and
    338   * jobs from it will not be scheduled further
    339   */
    340 void drm_sched_increase_karma(struct drm_sched_job *bad)
    341 {
    342 	int i;
    343 	struct drm_sched_entity *tmp;
    344 	struct drm_sched_entity *entity;
    345 	struct drm_gpu_scheduler *sched = bad->sched;
    346 
    347 	/* don't increase @bad's karma if it's from KERNEL RQ,
    348 	 * because sometimes GPU hang would cause kernel jobs (like VM updating jobs)
    349 	 * corrupt but keep in mind that kernel jobs always considered good.
    350 	 */
    351 	if (bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) {
    352 		atomic_inc(&bad->karma);
    353 		for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_KERNEL;
    354 		     i++) {
    355 			struct drm_sched_rq *rq = &sched->sched_rq[i];
    356 
    357 			spin_lock(&rq->lock);
    358 			list_for_each_entry_safe(entity, tmp, &rq->entities, list) {
    359 				if (bad->s_fence->scheduled.context ==
    360 				    entity->fence_context) {
    361 					if (atomic_read(&bad->karma) >
    362 					    bad->sched->hang_limit)
    363 						if (entity->guilty)
    364 							atomic_set(entity->guilty, 1);
    365 					break;
    366 				}
    367 			}
    368 			spin_unlock(&rq->lock);
    369 			if (&entity->list != &rq->entities)
    370 				break;
    371 		}
    372 	}
    373 }
    374 EXPORT_SYMBOL(drm_sched_increase_karma);
    375 
    376 /**
    377  * drm_sched_stop - stop the scheduler
    378  *
    379  * @sched: scheduler instance
    380  * @bad: job which caused the time out
    381  *
    382  * Stop the scheduler and also removes and frees all completed jobs.
    383  * Note: bad job will not be freed as it might be used later and so it's
    384  * callers responsibility to release it manually if it's not part of the
    385  * mirror list any more.
    386  *
    387  */
    388 void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad)
    389 {
    390 	struct drm_sched_job *s_job, *tmp;
    391 	unsigned long flags;
    392 
    393 	kthread_park(sched->thread);
    394 
    395 	/*
    396 	 * Reinsert back the bad job here - now it's safe as
    397 	 * drm_sched_get_cleanup_job cannot race against us and release the
    398 	 * bad job at this point - we parked (waited for) any in progress
    399 	 * (earlier) cleanups and drm_sched_get_cleanup_job will not be called
    400 	 * now until the scheduler thread is unparked.
    401 	 */
    402 	if (bad && bad->sched == sched)
    403 		/*
    404 		 * Add at the head of the queue to reflect it was the earliest
    405 		 * job extracted.
    406 		 */
    407 		list_add(&bad->node, &sched->ring_mirror_list);
    408 
    409 	/*
    410 	 * Iterate the job list from later to  earlier one and either deactive
    411 	 * their HW callbacks or remove them from mirror list if they already
    412 	 * signaled.
    413 	 * This iteration is thread safe as sched thread is stopped.
    414 	 */
    415 	list_for_each_entry_safe_reverse(s_job, tmp, &sched->ring_mirror_list, node) {
    416 		if (s_job->s_fence->parent &&
    417 		    dma_fence_remove_callback(s_job->s_fence->parent,
    418 					      &s_job->cb)) {
    419 			atomic_dec(&sched->hw_rq_count);
    420 		} else {
    421 			/*
    422 			 * remove job from ring_mirror_list.
    423 			 * Locking here is for concurrent resume timeout
    424 			 */
    425 			spin_lock_irqsave(&sched->job_list_lock, flags);
    426 			list_del_init(&s_job->node);
    427 			spin_unlock_irqrestore(&sched->job_list_lock, flags);
    428 
    429 			/*
    430 			 * Wait for job's HW fence callback to finish using s_job
    431 			 * before releasing it.
    432 			 *
    433 			 * Job is still alive so fence refcount at least 1
    434 			 */
    435 			dma_fence_wait(&s_job->s_fence->finished, false);
    436 
    437 			/*
    438 			 * We must keep bad job alive for later use during
    439 			 * recovery by some of the drivers but leave a hint
    440 			 * that the guilty job must be released.
    441 			 */
    442 			if (bad != s_job)
    443 				sched->ops->free_job(s_job);
    444 			else
    445 				sched->free_guilty = true;
    446 		}
    447 	}
    448 
    449 	/*
    450 	 * Stop pending timer in flight as we rearm it in  drm_sched_start. This
    451 	 * avoids the pending timeout work in progress to fire right away after
    452 	 * this TDR finished and before the newly restarted jobs had a
    453 	 * chance to complete.
    454 	 */
    455 	cancel_delayed_work(&sched->work_tdr);
    456 }
    457 
    458 EXPORT_SYMBOL(drm_sched_stop);
    459 
    460 /**
    461  * drm_sched_job_recovery - recover jobs after a reset
    462  *
    463  * @sched: scheduler instance
    464  * @full_recovery: proceed with complete sched restart
    465  *
    466  */
    467 void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery)
    468 {
    469 	struct drm_sched_job *s_job, *tmp;
    470 	unsigned long flags;
    471 	int r;
    472 
    473 	/*
    474 	 * Locking the list is not required here as the sched thread is parked
    475 	 * so no new jobs are being inserted or removed. Also concurrent
    476 	 * GPU recovers can't run in parallel.
    477 	 */
    478 	list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
    479 		struct dma_fence *fence = s_job->s_fence->parent;
    480 
    481 		atomic_inc(&sched->hw_rq_count);
    482 
    483 		if (!full_recovery)
    484 			continue;
    485 
    486 		if (fence) {
    487 			r = dma_fence_add_callback(fence, &s_job->cb,
    488 						   drm_sched_process_job);
    489 			if (r == -ENOENT)
    490 				drm_sched_process_job(fence, &s_job->cb);
    491 			else if (r)
    492 				DRM_ERROR("fence add callback failed (%d)\n",
    493 					  r);
    494 		} else
    495 			drm_sched_process_job(NULL, &s_job->cb);
    496 	}
    497 
    498 	if (full_recovery) {
    499 		spin_lock_irqsave(&sched->job_list_lock, flags);
    500 		drm_sched_start_timeout(sched);
    501 		spin_unlock_irqrestore(&sched->job_list_lock, flags);
    502 	}
    503 
    504 	kthread_unpark(sched->thread);
    505 }
    506 EXPORT_SYMBOL(drm_sched_start);
    507 
    508 /**
    509  * drm_sched_resubmit_jobs - helper to relunch job from mirror ring list
    510  *
    511  * @sched: scheduler instance
    512  *
    513  */
    514 void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched)
    515 {
    516 	struct drm_sched_job *s_job, *tmp;
    517 	uint64_t guilty_context;
    518 	bool found_guilty = false;
    519 	struct dma_fence *fence;
    520 
    521 	list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
    522 		struct drm_sched_fence *s_fence = s_job->s_fence;
    523 
    524 		if (!found_guilty && atomic_read(&s_job->karma) > sched->hang_limit) {
    525 			found_guilty = true;
    526 			guilty_context = s_job->s_fence->scheduled.context;
    527 		}
    528 
    529 		if (found_guilty && s_job->s_fence->scheduled.context == guilty_context)
    530 			dma_fence_set_error(&s_fence->finished, -ECANCELED);
    531 
    532 		dma_fence_put(s_job->s_fence->parent);
    533 		fence = sched->ops->run_job(s_job);
    534 
    535 		if (IS_ERR_OR_NULL(fence)) {
    536 			if (IS_ERR(fence))
    537 				dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
    538 
    539 			s_job->s_fence->parent = NULL;
    540 		} else {
    541 			s_job->s_fence->parent = fence;
    542 		}
    543 
    544 
    545 	}
    546 }
    547 EXPORT_SYMBOL(drm_sched_resubmit_jobs);
    548 
    549 /**
    550  * drm_sched_job_init - init a scheduler job
    551  *
    552  * @job: scheduler job to init
    553  * @entity: scheduler entity to use
    554  * @owner: job owner for debugging
    555  *
    556  * Refer to drm_sched_entity_push_job() documentation
    557  * for locking considerations.
    558  *
    559  * Returns 0 for success, negative error code otherwise.
    560  */
    561 int drm_sched_job_init(struct drm_sched_job *job,
    562 		       struct drm_sched_entity *entity,
    563 		       void *owner)
    564 {
    565 	struct drm_gpu_scheduler *sched;
    566 
    567 	drm_sched_entity_select_rq(entity);
    568 	if (!entity->rq)
    569 		return -ENOENT;
    570 
    571 	sched = entity->rq->sched;
    572 
    573 	job->sched = sched;
    574 	job->entity = entity;
    575 	job->s_priority = entity->rq - sched->sched_rq;
    576 	job->s_fence = drm_sched_fence_create(entity, owner);
    577 	if (!job->s_fence)
    578 		return -ENOMEM;
    579 	job->id = atomic64_inc_return(&sched->job_id_count);
    580 
    581 	INIT_LIST_HEAD(&job->node);
    582 
    583 	return 0;
    584 }
    585 EXPORT_SYMBOL(drm_sched_job_init);
    586 
    587 /**
    588  * drm_sched_job_cleanup - clean up scheduler job resources
    589  *
    590  * @job: scheduler job to clean up
    591  */
    592 void drm_sched_job_cleanup(struct drm_sched_job *job)
    593 {
    594 	dma_fence_put(&job->s_fence->finished);
    595 	job->s_fence = NULL;
    596 }
    597 EXPORT_SYMBOL(drm_sched_job_cleanup);
    598 
    599 /**
    600  * drm_sched_ready - is the scheduler ready
    601  *
    602  * @sched: scheduler instance
    603  *
    604  * Return true if we can push more jobs to the hw, otherwise false.
    605  */
    606 static bool drm_sched_ready(struct drm_gpu_scheduler *sched)
    607 {
    608 	return atomic_read(&sched->hw_rq_count) <
    609 		sched->hw_submission_limit;
    610 }
    611 
    612 /**
    613  * drm_sched_wakeup - Wake up the scheduler when it is ready
    614  *
    615  * @sched: scheduler instance
    616  *
    617  */
    618 void drm_sched_wakeup(struct drm_gpu_scheduler *sched)
    619 {
    620 	if (drm_sched_ready(sched))
    621 		wake_up_interruptible(&sched->wake_up_worker);
    622 }
    623 
    624 /**
    625  * drm_sched_select_entity - Select next entity to process
    626  *
    627  * @sched: scheduler instance
    628  *
    629  * Returns the entity to process or NULL if none are found.
    630  */
    631 static struct drm_sched_entity *
    632 drm_sched_select_entity(struct drm_gpu_scheduler *sched)
    633 {
    634 	struct drm_sched_entity *entity;
    635 	int i;
    636 
    637 	if (!drm_sched_ready(sched))
    638 		return NULL;
    639 
    640 	/* Kernel run queue has higher priority than normal run queue*/
    641 	for (i = DRM_SCHED_PRIORITY_MAX - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
    642 		entity = drm_sched_rq_select_entity(&sched->sched_rq[i]);
    643 		if (entity)
    644 			break;
    645 	}
    646 
    647 	return entity;
    648 }
    649 
    650 /**
    651  * drm_sched_process_job - process a job
    652  *
    653  * @f: fence
    654  * @cb: fence callbacks
    655  *
    656  * Called after job has finished execution.
    657  */
    658 static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb)
    659 {
    660 	struct drm_sched_job *s_job = container_of(cb, struct drm_sched_job, cb);
    661 	struct drm_sched_fence *s_fence = s_job->s_fence;
    662 	struct drm_gpu_scheduler *sched = s_fence->sched;
    663 
    664 	atomic_dec(&sched->hw_rq_count);
    665 	atomic_dec(&sched->score);
    666 
    667 	trace_drm_sched_process_job(s_fence);
    668 
    669 	drm_sched_fence_finished(s_fence);
    670 	wake_up_interruptible(&sched->wake_up_worker);
    671 }
    672 
    673 /**
    674  * drm_sched_get_cleanup_job - fetch the next finished job to be destroyed
    675  *
    676  * @sched: scheduler instance
    677  *
    678  * Returns the next finished job from the mirror list (if there is one)
    679  * ready for it to be destroyed.
    680  */
    681 static struct drm_sched_job *
    682 drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
    683 {
    684 	struct drm_sched_job *job;
    685 	unsigned long flags;
    686 
    687 	/*
    688 	 * Don't destroy jobs while the timeout worker is running  OR thread
    689 	 * is being parked and hence assumed to not touch ring_mirror_list
    690 	 */
    691 	if ((sched->timeout != MAX_SCHEDULE_TIMEOUT &&
    692 	    !cancel_delayed_work(&sched->work_tdr)) ||
    693 	    __kthread_should_park(sched->thread))
    694 		return NULL;
    695 
    696 	spin_lock_irqsave(&sched->job_list_lock, flags);
    697 
    698 	job = list_first_entry_or_null(&sched->ring_mirror_list,
    699 				       struct drm_sched_job, node);
    700 
    701 	if (job && dma_fence_is_signaled(&job->s_fence->finished)) {
    702 		/* remove job from ring_mirror_list */
    703 		list_del_init(&job->node);
    704 	} else {
    705 		job = NULL;
    706 		/* queue timeout for next job */
    707 		drm_sched_start_timeout(sched);
    708 	}
    709 
    710 	spin_unlock_irqrestore(&sched->job_list_lock, flags);
    711 
    712 	return job;
    713 }
    714 
    715 /**
    716  * drm_sched_blocked - check if the scheduler is blocked
    717  *
    718  * @sched: scheduler instance
    719  *
    720  * Returns true if blocked, otherwise false.
    721  */
    722 static bool drm_sched_blocked(struct drm_gpu_scheduler *sched)
    723 {
    724 	if (kthread_should_park()) {
    725 		kthread_parkme();
    726 		return true;
    727 	}
    728 
    729 	return false;
    730 }
    731 
    732 /**
    733  * drm_sched_main - main scheduler thread
    734  *
    735  * @param: scheduler instance
    736  *
    737  * Returns 0.
    738  */
    739 static int drm_sched_main(void *param)
    740 {
    741 	struct sched_param sparam = {.sched_priority = 1};
    742 	struct drm_gpu_scheduler *sched = (struct drm_gpu_scheduler *)param;
    743 	int r;
    744 
    745 	sched_setscheduler(current, SCHED_FIFO, &sparam);
    746 
    747 	while (!kthread_should_stop()) {
    748 		struct drm_sched_entity *entity = NULL;
    749 		struct drm_sched_fence *s_fence;
    750 		struct drm_sched_job *sched_job;
    751 		struct dma_fence *fence;
    752 		struct drm_sched_job *cleanup_job = NULL;
    753 
    754 		wait_event_interruptible(sched->wake_up_worker,
    755 					 (cleanup_job = drm_sched_get_cleanup_job(sched)) ||
    756 					 (!drm_sched_blocked(sched) &&
    757 					  (entity = drm_sched_select_entity(sched))) ||
    758 					 kthread_should_stop());
    759 
    760 		if (cleanup_job) {
    761 			sched->ops->free_job(cleanup_job);
    762 			/* queue timeout for next job */
    763 			drm_sched_start_timeout(sched);
    764 		}
    765 
    766 		if (!entity)
    767 			continue;
    768 
    769 		sched_job = drm_sched_entity_pop_job(entity);
    770 
    771 		complete(&entity->entity_idle);
    772 
    773 		if (!sched_job)
    774 			continue;
    775 
    776 		s_fence = sched_job->s_fence;
    777 
    778 		atomic_inc(&sched->hw_rq_count);
    779 		drm_sched_job_begin(sched_job);
    780 
    781 		fence = sched->ops->run_job(sched_job);
    782 		drm_sched_fence_scheduled(s_fence);
    783 
    784 		if (!IS_ERR_OR_NULL(fence)) {
    785 			s_fence->parent = dma_fence_get(fence);
    786 			r = dma_fence_add_callback(fence, &sched_job->cb,
    787 						   drm_sched_process_job);
    788 			if (r == -ENOENT)
    789 				drm_sched_process_job(fence, &sched_job->cb);
    790 			else if (r)
    791 				DRM_ERROR("fence add callback failed (%d)\n",
    792 					  r);
    793 			dma_fence_put(fence);
    794 		} else {
    795 			if (IS_ERR(fence))
    796 				dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
    797 
    798 			drm_sched_process_job(NULL, &sched_job->cb);
    799 		}
    800 
    801 		wake_up(&sched->job_scheduled);
    802 	}
    803 	return 0;
    804 }
    805 
    806 /**
    807  * drm_sched_init - Init a gpu scheduler instance
    808  *
    809  * @sched: scheduler instance
    810  * @ops: backend operations for this scheduler
    811  * @hw_submission: number of hw submissions that can be in flight
    812  * @hang_limit: number of times to allow a job to hang before dropping it
    813  * @timeout: timeout value in jiffies for the scheduler
    814  * @name: name used for debugging
    815  *
    816  * Return 0 on success, otherwise error code.
    817  */
    818 int drm_sched_init(struct drm_gpu_scheduler *sched,
    819 		   const struct drm_sched_backend_ops *ops,
    820 		   unsigned hw_submission,
    821 		   unsigned hang_limit,
    822 		   long timeout,
    823 		   const char *name)
    824 {
    825 	int i, ret;
    826 	sched->ops = ops;
    827 	sched->hw_submission_limit = hw_submission;
    828 	sched->name = name;
    829 	sched->timeout = timeout;
    830 	sched->hang_limit = hang_limit;
    831 	for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_MAX; i++)
    832 		drm_sched_rq_init(sched, &sched->sched_rq[i]);
    833 
    834 	init_waitqueue_head(&sched->wake_up_worker);
    835 	init_waitqueue_head(&sched->job_scheduled);
    836 	INIT_LIST_HEAD(&sched->ring_mirror_list);
    837 	spin_lock_init(&sched->job_list_lock);
    838 	atomic_set(&sched->hw_rq_count, 0);
    839 	INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
    840 	atomic_set(&sched->score, 0);
    841 	atomic64_set(&sched->job_id_count, 0);
    842 
    843 	/* Each scheduler will run on a seperate kernel thread */
    844 	sched->thread = kthread_run(drm_sched_main, sched, sched->name);
    845 	if (IS_ERR(sched->thread)) {
    846 		ret = PTR_ERR(sched->thread);
    847 		sched->thread = NULL;
    848 		DRM_ERROR("Failed to create scheduler for %s.\n", name);
    849 		return ret;
    850 	}
    851 
    852 	sched->ready = true;
    853 	return 0;
    854 }
    855 EXPORT_SYMBOL(drm_sched_init);
    856 
    857 /**
    858  * drm_sched_fini - Destroy a gpu scheduler
    859  *
    860  * @sched: scheduler instance
    861  *
    862  * Tears down and cleans up the scheduler.
    863  */
    864 void drm_sched_fini(struct drm_gpu_scheduler *sched)
    865 {
    866 	if (sched->thread)
    867 		kthread_stop(sched->thread);
    868 
    869 	sched->ready = false;
    870 }
    871 EXPORT_SYMBOL(drm_sched_fini);
    872