Home | History | Annotate | Line # | Download | only in gvt
      1 /*	$NetBSD: sched_policy.c,v 1.2 2021/12/18 23:45:31 riastradh Exp $	*/
      2 
      3 /*
      4  * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the "Software"),
      8  * to deal in the Software without restriction, including without limitation
      9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10  * and/or sell copies of the Software, and to permit persons to whom the
     11  * Software is furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice (including the next
     14  * paragraph) shall be included in all copies or substantial portions of the
     15  * Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     23  * SOFTWARE.
     24  *
     25  * Authors:
     26  *    Anhua Xu
     27  *    Kevin Tian <kevin.tian (at) intel.com>
     28  *
     29  * Contributors:
     30  *    Min He <min.he (at) intel.com>
     31  *    Bing Niu <bing.niu (at) intel.com>
     32  *    Zhi Wang <zhi.a.wang (at) intel.com>
     33  *
     34  */
     35 
     36 #include <sys/cdefs.h>
     37 __KERNEL_RCSID(0, "$NetBSD: sched_policy.c,v 1.2 2021/12/18 23:45:31 riastradh Exp $");
     38 
     39 #include "i915_drv.h"
     40 #include "gvt.h"
     41 
     42 static bool vgpu_has_pending_workload(struct intel_vgpu *vgpu)
     43 {
     44 	enum intel_engine_id i;
     45 	struct intel_engine_cs *engine;
     46 
     47 	for_each_engine(engine, vgpu->gvt->dev_priv, i) {
     48 		if (!list_empty(workload_q_head(vgpu, i)))
     49 			return true;
     50 	}
     51 
     52 	return false;
     53 }
     54 
     55 /* We give 2 seconds higher prio for vGPU during start */
     56 #define GVT_SCHED_VGPU_PRI_TIME  2
     57 
     58 struct vgpu_sched_data {
     59 	struct list_head lru_list;
     60 	struct intel_vgpu *vgpu;
     61 	bool active;
     62 	bool pri_sched;
     63 	ktime_t pri_time;
     64 	ktime_t sched_in_time;
     65 	ktime_t sched_time;
     66 	ktime_t left_ts;
     67 	ktime_t allocated_ts;
     68 
     69 	struct vgpu_sched_ctl sched_ctl;
     70 };
     71 
     72 struct gvt_sched_data {
     73 	struct intel_gvt *gvt;
     74 	struct hrtimer timer;
     75 	unsigned long period;
     76 	struct list_head lru_runq_head;
     77 	ktime_t expire_time;
     78 };
     79 
     80 static void vgpu_update_timeslice(struct intel_vgpu *vgpu, ktime_t cur_time)
     81 {
     82 	ktime_t delta_ts;
     83 	struct vgpu_sched_data *vgpu_data;
     84 
     85 	if (!vgpu || vgpu == vgpu->gvt->idle_vgpu)
     86 		return;
     87 
     88 	vgpu_data = vgpu->sched_data;
     89 	delta_ts = ktime_sub(cur_time, vgpu_data->sched_in_time);
     90 	vgpu_data->sched_time = ktime_add(vgpu_data->sched_time, delta_ts);
     91 	vgpu_data->left_ts = ktime_sub(vgpu_data->left_ts, delta_ts);
     92 	vgpu_data->sched_in_time = cur_time;
     93 }
     94 
     95 #define GVT_TS_BALANCE_PERIOD_MS 100
     96 #define GVT_TS_BALANCE_STAGE_NUM 10
     97 
     98 static void gvt_balance_timeslice(struct gvt_sched_data *sched_data)
     99 {
    100 	struct vgpu_sched_data *vgpu_data;
    101 	struct list_head *pos;
    102 	static u64 stage_check;
    103 	int stage = stage_check++ % GVT_TS_BALANCE_STAGE_NUM;
    104 
    105 	/* The timeslice accumulation reset at stage 0, which is
    106 	 * allocated again without adding previous debt.
    107 	 */
    108 	if (stage == 0) {
    109 		int total_weight = 0;
    110 		ktime_t fair_timeslice;
    111 
    112 		list_for_each(pos, &sched_data->lru_runq_head) {
    113 			vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list);
    114 			total_weight += vgpu_data->sched_ctl.weight;
    115 		}
    116 
    117 		list_for_each(pos, &sched_data->lru_runq_head) {
    118 			vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list);
    119 			fair_timeslice = ktime_divns(ms_to_ktime(GVT_TS_BALANCE_PERIOD_MS),
    120 						     total_weight) * vgpu_data->sched_ctl.weight;
    121 
    122 			vgpu_data->allocated_ts = fair_timeslice;
    123 			vgpu_data->left_ts = vgpu_data->allocated_ts;
    124 		}
    125 	} else {
    126 		list_for_each(pos, &sched_data->lru_runq_head) {
    127 			vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list);
    128 
    129 			/* timeslice for next 100ms should add the left/debt
    130 			 * slice of previous stages.
    131 			 */
    132 			vgpu_data->left_ts += vgpu_data->allocated_ts;
    133 		}
    134 	}
    135 }
    136 
    137 static void try_to_schedule_next_vgpu(struct intel_gvt *gvt)
    138 {
    139 	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
    140 	enum intel_engine_id i;
    141 	struct intel_engine_cs *engine;
    142 	struct vgpu_sched_data *vgpu_data;
    143 	ktime_t cur_time;
    144 
    145 	/* no need to schedule if next_vgpu is the same with current_vgpu,
    146 	 * let scheduler chose next_vgpu again by setting it to NULL.
    147 	 */
    148 	if (scheduler->next_vgpu == scheduler->current_vgpu) {
    149 		scheduler->next_vgpu = NULL;
    150 		return;
    151 	}
    152 
    153 	/*
    154 	 * after the flag is set, workload dispatch thread will
    155 	 * stop dispatching workload for current vgpu
    156 	 */
    157 	scheduler->need_reschedule = true;
    158 
    159 	/* still have uncompleted workload? */
    160 	for_each_engine(engine, gvt->dev_priv, i) {
    161 		if (scheduler->current_workload[i])
    162 			return;
    163 	}
    164 
    165 	cur_time = ktime_get();
    166 	vgpu_update_timeslice(scheduler->current_vgpu, cur_time);
    167 	vgpu_data = scheduler->next_vgpu->sched_data;
    168 	vgpu_data->sched_in_time = cur_time;
    169 
    170 	/* switch current vgpu */
    171 	scheduler->current_vgpu = scheduler->next_vgpu;
    172 	scheduler->next_vgpu = NULL;
    173 
    174 	scheduler->need_reschedule = false;
    175 
    176 	/* wake up workload dispatch thread */
    177 	for_each_engine(engine, gvt->dev_priv, i)
    178 		wake_up(&scheduler->waitq[i]);
    179 }
    180 
    181 static struct intel_vgpu *find_busy_vgpu(struct gvt_sched_data *sched_data)
    182 {
    183 	struct vgpu_sched_data *vgpu_data;
    184 	struct intel_vgpu *vgpu = NULL;
    185 	struct list_head *head = &sched_data->lru_runq_head;
    186 	struct list_head *pos;
    187 
    188 	/* search a vgpu with pending workload */
    189 	list_for_each(pos, head) {
    190 
    191 		vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list);
    192 		if (!vgpu_has_pending_workload(vgpu_data->vgpu))
    193 			continue;
    194 
    195 		if (vgpu_data->pri_sched) {
    196 			if (ktime_before(ktime_get(), vgpu_data->pri_time)) {
    197 				vgpu = vgpu_data->vgpu;
    198 				break;
    199 			} else
    200 				vgpu_data->pri_sched = false;
    201 		}
    202 
    203 		/* Return the vGPU only if it has time slice left */
    204 		if (vgpu_data->left_ts > 0) {
    205 			vgpu = vgpu_data->vgpu;
    206 			break;
    207 		}
    208 	}
    209 
    210 	return vgpu;
    211 }
    212 
    213 /* in nanosecond */
    214 #define GVT_DEFAULT_TIME_SLICE 1000000
    215 
    216 static void tbs_sched_func(struct gvt_sched_data *sched_data)
    217 {
    218 	struct intel_gvt *gvt = sched_data->gvt;
    219 	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
    220 	struct vgpu_sched_data *vgpu_data;
    221 	struct intel_vgpu *vgpu = NULL;
    222 
    223 	/* no active vgpu or has already had a target */
    224 	if (list_empty(&sched_data->lru_runq_head) || scheduler->next_vgpu)
    225 		goto out;
    226 
    227 	vgpu = find_busy_vgpu(sched_data);
    228 	if (vgpu) {
    229 		scheduler->next_vgpu = vgpu;
    230 		vgpu_data = vgpu->sched_data;
    231 		if (!vgpu_data->pri_sched) {
    232 			/* Move the last used vGPU to the tail of lru_list */
    233 			list_del_init(&vgpu_data->lru_list);
    234 			list_add_tail(&vgpu_data->lru_list,
    235 				      &sched_data->lru_runq_head);
    236 		}
    237 	} else {
    238 		scheduler->next_vgpu = gvt->idle_vgpu;
    239 	}
    240 out:
    241 	if (scheduler->next_vgpu)
    242 		try_to_schedule_next_vgpu(gvt);
    243 }
    244 
    245 void intel_gvt_schedule(struct intel_gvt *gvt)
    246 {
    247 	struct gvt_sched_data *sched_data = gvt->scheduler.sched_data;
    248 	ktime_t cur_time;
    249 
    250 	mutex_lock(&gvt->sched_lock);
    251 	cur_time = ktime_get();
    252 
    253 	if (test_and_clear_bit(INTEL_GVT_REQUEST_SCHED,
    254 				(void *)&gvt->service_request)) {
    255 		if (cur_time >= sched_data->expire_time) {
    256 			gvt_balance_timeslice(sched_data);
    257 			sched_data->expire_time = ktime_add_ms(
    258 				cur_time, GVT_TS_BALANCE_PERIOD_MS);
    259 		}
    260 	}
    261 	clear_bit(INTEL_GVT_REQUEST_EVENT_SCHED, (void *)&gvt->service_request);
    262 
    263 	vgpu_update_timeslice(gvt->scheduler.current_vgpu, cur_time);
    264 	tbs_sched_func(sched_data);
    265 
    266 	mutex_unlock(&gvt->sched_lock);
    267 }
    268 
    269 static enum hrtimer_restart tbs_timer_fn(struct hrtimer *timer_data)
    270 {
    271 	struct gvt_sched_data *data;
    272 
    273 	data = container_of(timer_data, struct gvt_sched_data, timer);
    274 
    275 	intel_gvt_request_service(data->gvt, INTEL_GVT_REQUEST_SCHED);
    276 
    277 	hrtimer_add_expires_ns(&data->timer, data->period);
    278 
    279 	return HRTIMER_RESTART;
    280 }
    281 
    282 static int tbs_sched_init(struct intel_gvt *gvt)
    283 {
    284 	struct intel_gvt_workload_scheduler *scheduler =
    285 		&gvt->scheduler;
    286 
    287 	struct gvt_sched_data *data;
    288 
    289 	data = kzalloc(sizeof(*data), GFP_KERNEL);
    290 	if (!data)
    291 		return -ENOMEM;
    292 
    293 	INIT_LIST_HEAD(&data->lru_runq_head);
    294 	hrtimer_init(&data->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
    295 	data->timer.function = tbs_timer_fn;
    296 	data->period = GVT_DEFAULT_TIME_SLICE;
    297 	data->gvt = gvt;
    298 
    299 	scheduler->sched_data = data;
    300 
    301 	return 0;
    302 }
    303 
    304 static void tbs_sched_clean(struct intel_gvt *gvt)
    305 {
    306 	struct intel_gvt_workload_scheduler *scheduler =
    307 		&gvt->scheduler;
    308 	struct gvt_sched_data *data = scheduler->sched_data;
    309 
    310 	hrtimer_cancel(&data->timer);
    311 
    312 	kfree(data);
    313 	scheduler->sched_data = NULL;
    314 }
    315 
    316 static int tbs_sched_init_vgpu(struct intel_vgpu *vgpu)
    317 {
    318 	struct vgpu_sched_data *data;
    319 
    320 	data = kzalloc(sizeof(*data), GFP_KERNEL);
    321 	if (!data)
    322 		return -ENOMEM;
    323 
    324 	data->sched_ctl.weight = vgpu->sched_ctl.weight;
    325 	data->vgpu = vgpu;
    326 	INIT_LIST_HEAD(&data->lru_list);
    327 
    328 	vgpu->sched_data = data;
    329 
    330 	return 0;
    331 }
    332 
    333 static void tbs_sched_clean_vgpu(struct intel_vgpu *vgpu)
    334 {
    335 	struct intel_gvt *gvt = vgpu->gvt;
    336 	struct gvt_sched_data *sched_data = gvt->scheduler.sched_data;
    337 
    338 	kfree(vgpu->sched_data);
    339 	vgpu->sched_data = NULL;
    340 
    341 	/* this vgpu id has been removed */
    342 	if (idr_is_empty(&gvt->vgpu_idr))
    343 		hrtimer_cancel(&sched_data->timer);
    344 }
    345 
    346 static void tbs_sched_start_schedule(struct intel_vgpu *vgpu)
    347 {
    348 	struct gvt_sched_data *sched_data = vgpu->gvt->scheduler.sched_data;
    349 	struct vgpu_sched_data *vgpu_data = vgpu->sched_data;
    350 	ktime_t now;
    351 
    352 	if (!list_empty(&vgpu_data->lru_list))
    353 		return;
    354 
    355 	now = ktime_get();
    356 	vgpu_data->pri_time = ktime_add(now,
    357 					ktime_set(GVT_SCHED_VGPU_PRI_TIME, 0));
    358 	vgpu_data->pri_sched = true;
    359 
    360 	list_add(&vgpu_data->lru_list, &sched_data->lru_runq_head);
    361 
    362 	if (!hrtimer_active(&sched_data->timer))
    363 		hrtimer_start(&sched_data->timer, ktime_add_ns(ktime_get(),
    364 			sched_data->period), HRTIMER_MODE_ABS);
    365 	vgpu_data->active = true;
    366 }
    367 
    368 static void tbs_sched_stop_schedule(struct intel_vgpu *vgpu)
    369 {
    370 	struct vgpu_sched_data *vgpu_data = vgpu->sched_data;
    371 
    372 	list_del_init(&vgpu_data->lru_list);
    373 	vgpu_data->active = false;
    374 }
    375 
    376 static struct intel_gvt_sched_policy_ops tbs_schedule_ops = {
    377 	.init = tbs_sched_init,
    378 	.clean = tbs_sched_clean,
    379 	.init_vgpu = tbs_sched_init_vgpu,
    380 	.clean_vgpu = tbs_sched_clean_vgpu,
    381 	.start_schedule = tbs_sched_start_schedule,
    382 	.stop_schedule = tbs_sched_stop_schedule,
    383 };
    384 
    385 int intel_gvt_init_sched_policy(struct intel_gvt *gvt)
    386 {
    387 	int ret;
    388 
    389 	mutex_lock(&gvt->sched_lock);
    390 	gvt->scheduler.sched_ops = &tbs_schedule_ops;
    391 	ret = gvt->scheduler.sched_ops->init(gvt);
    392 	mutex_unlock(&gvt->sched_lock);
    393 
    394 	return ret;
    395 }
    396 
    397 void intel_gvt_clean_sched_policy(struct intel_gvt *gvt)
    398 {
    399 	mutex_lock(&gvt->sched_lock);
    400 	gvt->scheduler.sched_ops->clean(gvt);
    401 	mutex_unlock(&gvt->sched_lock);
    402 }
    403 
    404 /* for per-vgpu scheduler policy, there are 2 per-vgpu data:
    405  * sched_data, and sched_ctl. We see these 2 data as part of
    406  * the global scheduler which are proteced by gvt->sched_lock.
    407  * Caller should make their decision if the vgpu_lock should
    408  * be hold outside.
    409  */
    410 
    411 int intel_vgpu_init_sched_policy(struct intel_vgpu *vgpu)
    412 {
    413 	int ret;
    414 
    415 	mutex_lock(&vgpu->gvt->sched_lock);
    416 	ret = vgpu->gvt->scheduler.sched_ops->init_vgpu(vgpu);
    417 	mutex_unlock(&vgpu->gvt->sched_lock);
    418 
    419 	return ret;
    420 }
    421 
    422 void intel_vgpu_clean_sched_policy(struct intel_vgpu *vgpu)
    423 {
    424 	mutex_lock(&vgpu->gvt->sched_lock);
    425 	vgpu->gvt->scheduler.sched_ops->clean_vgpu(vgpu);
    426 	mutex_unlock(&vgpu->gvt->sched_lock);
    427 }
    428 
    429 void intel_vgpu_start_schedule(struct intel_vgpu *vgpu)
    430 {
    431 	struct vgpu_sched_data *vgpu_data = vgpu->sched_data;
    432 
    433 	mutex_lock(&vgpu->gvt->sched_lock);
    434 	if (!vgpu_data->active) {
    435 		gvt_dbg_core("vgpu%d: start schedule\n", vgpu->id);
    436 		vgpu->gvt->scheduler.sched_ops->start_schedule(vgpu);
    437 	}
    438 	mutex_unlock(&vgpu->gvt->sched_lock);
    439 }
    440 
    441 void intel_gvt_kick_schedule(struct intel_gvt *gvt)
    442 {
    443 	mutex_lock(&gvt->sched_lock);
    444 	intel_gvt_request_service(gvt, INTEL_GVT_REQUEST_EVENT_SCHED);
    445 	mutex_unlock(&gvt->sched_lock);
    446 }
    447 
    448 void intel_vgpu_stop_schedule(struct intel_vgpu *vgpu)
    449 {
    450 	struct intel_gvt_workload_scheduler *scheduler =
    451 		&vgpu->gvt->scheduler;
    452 	int ring_id;
    453 	struct vgpu_sched_data *vgpu_data = vgpu->sched_data;
    454 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
    455 
    456 	if (!vgpu_data->active)
    457 		return;
    458 
    459 	gvt_dbg_core("vgpu%d: stop schedule\n", vgpu->id);
    460 
    461 	mutex_lock(&vgpu->gvt->sched_lock);
    462 	scheduler->sched_ops->stop_schedule(vgpu);
    463 
    464 	if (scheduler->next_vgpu == vgpu)
    465 		scheduler->next_vgpu = NULL;
    466 
    467 	if (scheduler->current_vgpu == vgpu) {
    468 		/* stop workload dispatching */
    469 		scheduler->need_reschedule = true;
    470 		scheduler->current_vgpu = NULL;
    471 	}
    472 
    473 	intel_runtime_pm_get(&dev_priv->runtime_pm);
    474 	spin_lock_bh(&scheduler->mmio_context_lock);
    475 	for (ring_id = 0; ring_id < I915_NUM_ENGINES; ring_id++) {
    476 		if (scheduler->engine_owner[ring_id] == vgpu) {
    477 			intel_gvt_switch_mmio(vgpu, NULL, ring_id);
    478 			scheduler->engine_owner[ring_id] = NULL;
    479 		}
    480 	}
    481 	spin_unlock_bh(&scheduler->mmio_context_lock);
    482 	intel_runtime_pm_put_unchecked(&dev_priv->runtime_pm);
    483 	mutex_unlock(&vgpu->gvt->sched_lock);
    484 }
    485