1 1.1 riastrad /* $NetBSD: i915_perf.c,v 1.2 2021/12/18 23:45:31 riastradh Exp $ */ 2 1.1 riastrad 3 1.1 riastrad /* 4 1.1 riastrad * SPDX-License-Identifier: MIT 5 1.1 riastrad * 6 1.1 riastrad * Copyright 2019 Intel Corporation 7 1.1 riastrad */ 8 1.1 riastrad 9 1.1 riastrad #include <sys/cdefs.h> 10 1.1 riastrad __KERNEL_RCSID(0, "$NetBSD: i915_perf.c,v 1.2 2021/12/18 23:45:31 riastradh Exp $"); 11 1.1 riastrad 12 1.1 riastrad #include <linux/kref.h> 13 1.1 riastrad 14 1.1 riastrad #include "gem/i915_gem_pm.h" 15 1.1 riastrad #include "gt/intel_gt.h" 16 1.1 riastrad 17 1.1 riastrad #include "i915_selftest.h" 18 1.1 riastrad 19 1.1 riastrad #include "igt_flush_test.h" 20 1.1 riastrad #include "lib_sw_fence.h" 21 1.1 riastrad 22 1.1 riastrad static struct i915_perf_stream * 23 1.1 riastrad test_stream(struct i915_perf *perf) 24 1.1 riastrad { 25 1.1 riastrad struct drm_i915_perf_open_param param = {}; 26 1.1 riastrad struct perf_open_properties props = { 27 1.1 riastrad .engine = intel_engine_lookup_user(perf->i915, 28 1.1 riastrad I915_ENGINE_CLASS_RENDER, 29 1.1 riastrad 0), 30 1.1 riastrad .sample_flags = SAMPLE_OA_REPORT, 31 1.1 riastrad .oa_format = IS_GEN(perf->i915, 12) ? 32 1.1 riastrad I915_OA_FORMAT_A32u40_A4u32_B8_C8 : I915_OA_FORMAT_C4_B8, 33 1.1 riastrad .metrics_set = 1, 34 1.1 riastrad }; 35 1.1 riastrad struct i915_perf_stream *stream; 36 1.1 riastrad 37 1.1 riastrad stream = kzalloc(sizeof(*stream), GFP_KERNEL); 38 1.1 riastrad if (!stream) 39 1.1 riastrad return NULL; 40 1.1 riastrad 41 1.1 riastrad stream->perf = perf; 42 1.1 riastrad 43 1.1 riastrad mutex_lock(&perf->lock); 44 1.1 riastrad if (i915_oa_stream_init(stream, ¶m, &props)) { 45 1.1 riastrad kfree(stream); 46 1.1 riastrad stream = NULL; 47 1.1 riastrad } 48 1.1 riastrad mutex_unlock(&perf->lock); 49 1.1 riastrad 50 1.1 riastrad return stream; 51 1.1 riastrad } 52 1.1 riastrad 53 1.1 riastrad static void stream_destroy(struct i915_perf_stream *stream) 54 1.1 riastrad { 55 1.1 riastrad struct i915_perf *perf = stream->perf; 56 1.1 riastrad 57 1.1 riastrad mutex_lock(&perf->lock); 58 1.1 riastrad i915_perf_destroy_locked(stream); 59 1.1 riastrad mutex_unlock(&perf->lock); 60 1.1 riastrad } 61 1.1 riastrad 62 1.1 riastrad static int live_sanitycheck(void *arg) 63 1.1 riastrad { 64 1.1 riastrad struct drm_i915_private *i915 = arg; 65 1.1 riastrad struct i915_perf_stream *stream; 66 1.1 riastrad 67 1.1 riastrad /* Quick check we can create a perf stream */ 68 1.1 riastrad 69 1.1 riastrad stream = test_stream(&i915->perf); 70 1.1 riastrad if (!stream) 71 1.1 riastrad return -EINVAL; 72 1.1 riastrad 73 1.1 riastrad stream_destroy(stream); 74 1.1 riastrad return 0; 75 1.1 riastrad } 76 1.1 riastrad 77 1.1 riastrad static int write_timestamp(struct i915_request *rq, int slot) 78 1.1 riastrad { 79 1.1 riastrad u32 *cs; 80 1.1 riastrad int len; 81 1.1 riastrad 82 1.1 riastrad cs = intel_ring_begin(rq, 6); 83 1.1 riastrad if (IS_ERR(cs)) 84 1.1 riastrad return PTR_ERR(cs); 85 1.1 riastrad 86 1.1 riastrad len = 5; 87 1.1 riastrad if (INTEL_GEN(rq->i915) >= 8) 88 1.1 riastrad len++; 89 1.1 riastrad 90 1.1 riastrad *cs++ = GFX_OP_PIPE_CONTROL(len); 91 1.1 riastrad *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | 92 1.1 riastrad PIPE_CONTROL_STORE_DATA_INDEX | 93 1.1 riastrad PIPE_CONTROL_WRITE_TIMESTAMP; 94 1.1 riastrad *cs++ = slot * sizeof(u32); 95 1.1 riastrad *cs++ = 0; 96 1.1 riastrad *cs++ = 0; 97 1.1 riastrad *cs++ = 0; 98 1.1 riastrad 99 1.1 riastrad intel_ring_advance(rq, cs); 100 1.1 riastrad 101 1.1 riastrad return 0; 102 1.1 riastrad } 103 1.1 riastrad 104 1.1 riastrad static ktime_t poll_status(struct i915_request *rq, int slot) 105 1.1 riastrad { 106 1.1 riastrad while (!intel_read_status_page(rq->engine, slot) && 107 1.1 riastrad !i915_request_completed(rq)) 108 1.1 riastrad cpu_relax(); 109 1.1 riastrad 110 1.1 riastrad return ktime_get(); 111 1.1 riastrad } 112 1.1 riastrad 113 1.1 riastrad static int live_noa_delay(void *arg) 114 1.1 riastrad { 115 1.1 riastrad struct drm_i915_private *i915 = arg; 116 1.1 riastrad struct i915_perf_stream *stream; 117 1.1 riastrad struct i915_request *rq; 118 1.1 riastrad ktime_t t0, t1; 119 1.1 riastrad u64 expected; 120 1.1 riastrad u32 delay; 121 1.1 riastrad int err; 122 1.1 riastrad int i; 123 1.1 riastrad 124 1.1 riastrad /* Check that the GPU delays matches expectations */ 125 1.1 riastrad 126 1.1 riastrad stream = test_stream(&i915->perf); 127 1.1 riastrad if (!stream) 128 1.1 riastrad return -ENOMEM; 129 1.1 riastrad 130 1.1 riastrad expected = atomic64_read(&stream->perf->noa_programming_delay); 131 1.1 riastrad 132 1.1 riastrad if (stream->engine->class != RENDER_CLASS) { 133 1.1 riastrad err = -ENODEV; 134 1.1 riastrad goto out; 135 1.1 riastrad } 136 1.1 riastrad 137 1.1 riastrad for (i = 0; i < 4; i++) 138 1.1 riastrad intel_write_status_page(stream->engine, 0x100 + i, 0); 139 1.1 riastrad 140 1.1 riastrad rq = intel_engine_create_kernel_request(stream->engine); 141 1.1 riastrad if (IS_ERR(rq)) { 142 1.1 riastrad err = PTR_ERR(rq); 143 1.1 riastrad goto out; 144 1.1 riastrad } 145 1.1 riastrad 146 1.1 riastrad if (rq->engine->emit_init_breadcrumb && 147 1.1 riastrad i915_request_timeline(rq)->has_initial_breadcrumb) { 148 1.1 riastrad err = rq->engine->emit_init_breadcrumb(rq); 149 1.1 riastrad if (err) { 150 1.1 riastrad i915_request_add(rq); 151 1.1 riastrad goto out; 152 1.1 riastrad } 153 1.1 riastrad } 154 1.1 riastrad 155 1.1 riastrad err = write_timestamp(rq, 0x100); 156 1.1 riastrad if (err) { 157 1.1 riastrad i915_request_add(rq); 158 1.1 riastrad goto out; 159 1.1 riastrad } 160 1.1 riastrad 161 1.1 riastrad err = rq->engine->emit_bb_start(rq, 162 1.1 riastrad i915_ggtt_offset(stream->noa_wait), 0, 163 1.1 riastrad I915_DISPATCH_SECURE); 164 1.1 riastrad if (err) { 165 1.1 riastrad i915_request_add(rq); 166 1.1 riastrad goto out; 167 1.1 riastrad } 168 1.1 riastrad 169 1.1 riastrad err = write_timestamp(rq, 0x102); 170 1.1 riastrad if (err) { 171 1.1 riastrad i915_request_add(rq); 172 1.1 riastrad goto out; 173 1.1 riastrad } 174 1.1 riastrad 175 1.1 riastrad i915_request_get(rq); 176 1.1 riastrad i915_request_add(rq); 177 1.1 riastrad 178 1.1 riastrad preempt_disable(); 179 1.1 riastrad t0 = poll_status(rq, 0x100); 180 1.1 riastrad t1 = poll_status(rq, 0x102); 181 1.1 riastrad preempt_enable(); 182 1.1 riastrad 183 1.1 riastrad pr_info("CPU delay: %lluns, expected %lluns\n", 184 1.1 riastrad ktime_sub(t1, t0), expected); 185 1.1 riastrad 186 1.1 riastrad delay = intel_read_status_page(stream->engine, 0x102); 187 1.1 riastrad delay -= intel_read_status_page(stream->engine, 0x100); 188 1.1 riastrad delay = div_u64(mul_u32_u32(delay, 1000 * 1000), 189 1.1 riastrad RUNTIME_INFO(i915)->cs_timestamp_frequency_khz); 190 1.1 riastrad pr_info("GPU delay: %uns, expected %lluns\n", 191 1.1 riastrad delay, expected); 192 1.1 riastrad 193 1.1 riastrad if (4 * delay < 3 * expected || 2 * delay > 3 * expected) { 194 1.1 riastrad pr_err("GPU delay [%uus] outside of expected threshold! [%lluus, %lluus]\n", 195 1.1 riastrad delay / 1000, 196 1.1 riastrad div_u64(3 * expected, 4000), 197 1.1 riastrad div_u64(3 * expected, 2000)); 198 1.1 riastrad err = -EINVAL; 199 1.1 riastrad } 200 1.1 riastrad 201 1.1 riastrad i915_request_put(rq); 202 1.1 riastrad out: 203 1.1 riastrad stream_destroy(stream); 204 1.1 riastrad return err; 205 1.1 riastrad } 206 1.1 riastrad 207 1.1 riastrad int i915_perf_live_selftests(struct drm_i915_private *i915) 208 1.1 riastrad { 209 1.1 riastrad static const struct i915_subtest tests[] = { 210 1.1 riastrad SUBTEST(live_sanitycheck), 211 1.1 riastrad SUBTEST(live_noa_delay), 212 1.1 riastrad }; 213 1.1 riastrad struct i915_perf *perf = &i915->perf; 214 1.1 riastrad 215 1.1 riastrad if (!perf->metrics_kobj || !perf->ops.enable_metric_set) 216 1.1 riastrad return 0; 217 1.1 riastrad 218 1.1 riastrad if (intel_gt_is_wedged(&i915->gt)) 219 1.1 riastrad return 0; 220 1.1 riastrad 221 1.1 riastrad return i915_subtests(tests, i915); 222 1.1 riastrad } 223