Home | History | Annotate | Download | only in i915

Lines Matching defs:perf

31  * DOC: i915 Perf Overview
37 * This i915 perf interface enables userspace to configure and open a file
53 * DOC: i915 Perf History and Comparison with Core Perf
55 * The interface was initially inspired by the core Perf infrastructure but
58 * i915 perf file descriptors represent a "stream" instead of an "event"; where
59 * a perf event primarily corresponds to a single 64bit value, while a stream
63 * of related counters. Samples for an i915 perf stream capturing OA metrics
66 * selected by the user opening the stream. Perf has support for grouping
70 * i915 perf stream configurations are provided as an array of u64 (key,value)
74 * i915 perf doesn't support exposing metrics via an mmap'd circular buffer.
84 * Issues hit with first prototype based on Core Perf
87 * The first prototype of this driver was based on the core perf
89 * perf, we found we were breaking or working around too many assumptions baked
90 * into perf's currently cpu centric design.
92 * In the end we didn't see a clear benefit to making perf's implementation and
94 * wouldn't be able to use any existing perf based userspace tools.
97 * how userspace will sometimes need to combine i915 perf OA metrics with
101 * a standard vendor/architecture agnostic interface by not using perf.
104 * For posterity, in case we might re-visit trying to adapt core perf to be
108 * - The perf based OA PMU driver broke some significant design assumptions:
110 * Existing perf pmus are used for profiling work on a cpu and we were
113 * registers) to fit with perf's current design, and adding _DEVICE records
120 * buffer to perf's buffer, those bursts of sample writes looked to perf like
123 * Perf supports groups of counters and allows those to be read via
130 * set while perf generally expects counter configurations to be orthogonal.
135 * GPU context to filter metrics on). We avoided using perf's grouping
136 * feature and forwarded OA reports to userspace via perf's 'raw' sample
144 * - As a side note on perf's grouping feature; there was also some concern
158 * event scheduling is a central design idea within perf for allowing
180 * - It felt like our perf based PMU was making some technical compromises
181 * just for the sake of using perf:
187 * cpu id, perf ensures pmu methods will be invoked via an inter process
189 * perf events for a specific cpu. This was workable but it meant the
329 * The default threshold of 100000Hz is based on perf's similar
402 #ifndef __NetBSD__ /* XXX i915 perf sysctl */
421 i915_perf_get_oa_config(struct i915_perf *perf, int metrics_set)
427 oa_config = &perf->test_config;
429 oa_config = idr_find(&perf->metrics_idr, metrics_set);
513 hw_tail = stream->perf->ops.oa_hw_tail_read(stream);
581 * @stream: An i915-perf stream opened for OA metrics
626 * @stream: An i915-perf stream opened for OA metrics
696 * @stream: An i915-perf stream opened for OA metrics
813 (IS_GEN(stream->perf->i915, 12) ?
817 if (__ratelimit(&stream->perf->spurious_report_rs))
832 if (!(report32[0] & stream->perf->gen8_valid_ctx_bit) &&
833 INTEL_GEN(stream->perf->i915) <= 11)
867 if (!stream->perf->exclusive_stream->ctx ||
876 if (stream->perf->exclusive_stream->ctx &&
907 oaheadptr = IS_GEN(stream->perf->i915, 12) ?
929 * @stream: An i915-perf stream opened for OA metrics
967 oastatus_reg = IS_GEN(stream->perf->i915, 12) ?
995 stream->perf->ops.oa_disable(stream);
996 stream->perf->ops.oa_enable(stream);
1019 * @stream: An i915-perf stream opened for OA metrics
1127 if (__ratelimit(&stream->perf->spurious_report_rs))
1171 * @stream: An i915-perf stream opened for OA metrics
1211 oastatus1 &= ~stream->perf->gen7_latched_oastatus1;
1242 stream->perf->ops.oa_disable(stream);
1243 stream->perf->ops.oa_enable(stream);
1253 stream->perf->gen7_latched_oastatus1 |=
1262 * @stream: An i915-perf stream opened for OA metrics
1293 * @stream: An i915-perf stream opened for OA metrics
1294 * @file: An i915 perf stream file
1297 * For handling userspace polling on an i915 perf stream opened for OA metrics,
1312 * @stream: An i915-perf stream opened for OA metrics
1334 return stream->perf->ops.read(stream, buf, count, offset);
1365 * @stream: An i915-perf stream opened for OA metrics
1445 * @stream: An i915-perf stream opened for OA metrics
1491 struct i915_perf *perf = stream->perf;
1493 BUG_ON(stream != perf->exclusive_stream);
1504 perf->exclusive_stream = NULL;
1505 perf->ops.disable_metric_set(stream);
1518 if (perf->spurious_report_rs.missed) {
1520 perf->spurious_report_rs.missed);
1554 stream->perf->gen7_latched_oastatus1 = 0;
1697 bo = i915_gem_object_create_shmem(stream->perf->i915, OA_BUFFER_SIZE);
1742 if (INTEL_GEN(stream->perf->i915) >= 8)
1758 struct drm_i915_private *i915 = stream->perf->i915;
1763 atomic64_read(&stream->perf->noa_programming_delay) *
1788 * needs to be fixed during the lifetime of the i915/perf stream.
1989 obj = i915_gem_object_create_shmem(stream->perf->i915, config_length);
2012 *cs++ = (INTEL_GEN(stream->perf->i915) < 8 ?
2181 u32 ctx_oactxctrl = stream->perf->ctx_oactxctrl_offset;
2182 u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
2337 stream->perf->ctx_oactxctrl_offset + 1,
2405 struct drm_i915_private *i915 = stream->perf->i915;
2410 lockdep_assert_held(&stream->perf->lock);
2485 const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
2494 stream->perf->ctx_oactxctrl_offset + 1,
2547 if (IS_GEN_RANGE(stream->perf->i915, 9, 11)) {
2732 * @stream: An i915 perf stream opened for OA metrics
2741 stream->perf->ops.oa_enable(stream);
2785 * @stream: An i915 perf stream opened for OA metrics
2793 stream->perf->ops.oa_disable(stream);
2812 * @stream: An i915 perf stream
2832 struct i915_perf *perf = stream->perf;
2846 if (!perf->metrics_kobj) {
2852 (INTEL_GEN(perf->i915) < 12 || !stream->ctx)) {
2857 if (!perf->ops.enable_metric_set) {
2867 if (perf->exclusive_stream) {
2882 format_size = perf->oa_formats[props->oa_format].size;
2894 perf->oa_formats[props->oa_format].format;
2914 stream->oa_config = i915_perf_get_oa_config(perf, props->metrics_set);
2941 perf->exclusive_stream = stream;
2943 ret = perf->ops.enable_metric_set(stream);
2962 perf->exclusive_stream = NULL;
2963 perf->ops.disable_metric_set(stream);
2988 /* perf.exclusive_stream serialised by lrc_configure_all_contexts() */
2993 stream = engine->i915->perf.exclusive_stream;
2998 if (stream && INTEL_GEN(stream->perf->i915) < 12)
3004 * @stream: An i915 perf stream
3005 * @file: An i915 perf stream file
3057 * i915_perf_read - handles read() FOP for i915 perf stream FDs
3058 * @file: An i915 perf stream file
3092 struct i915_perf *perf = stream->perf;
3121 mutex_lock(&perf->lock);
3124 mutex_unlock(&perf->lock);
3127 mutex_lock(&perf->lock);
3129 mutex_unlock(&perf->lock);
3193 * @stream: An i915 perf stream
3194 * @file: An i915 perf stream file
3197 * For handling userspace polling on an i915 perf stream, this calls through to
3201 * Note: The &perf->lock mutex has been taken to serialize
3228 * @file: An i915 perf stream file
3231 * For handling userspace polling on an i915 perf stream, this ensures
3242 struct i915_perf *perf = stream->perf;
3245 mutex_lock(&perf->lock);
3247 mutex_unlock(&perf->lock);
3256 * @stream: A disabled i915 perf stream
3281 * @stream: An enabled i915 perf stream
3314 config = i915_perf_get_oa_config(stream->perf, metrics_set);
3343 * i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs
3344 * @stream: An i915 perf stream
3348 * Note: The &perf->lock mutex has been taken to serialize
3373 * i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs
3374 * @file: An i915 perf stream file
3399 struct i915_perf *perf = stream->perf;
3402 mutex_lock(&perf->lock);
3404 mutex_unlock(&perf->lock);
3410 * i915_perf_destroy_locked - destroy an i915 perf stream
3411 * @stream: An i915 perf stream
3413 * Frees all resources associated with the given i915 perf @stream, disabling
3416 * Note: The &perf->lock mutex has been taken to serialize
3436 * @file: An i915 perf stream file
3438 * Cleans up any resources associated with an open i915 perf stream file.
3448 struct i915_perf *perf = stream->perf;
3450 mutex_lock(&perf->lock);
3452 mutex_unlock(&perf->lock);
3454 /* Release the reference the perf stream kept on the driver. */
3455 drm_dev_put(&perf->i915->drm);
3463 struct i915_perf *perf = stream->perf;
3465 mutex_lock(&perf->lock);
3467 mutex_unlock(&perf->lock);
3469 /* Release the reference the perf stream kept on the driver. */
3470 drm_dev_put(&perf->i915->drm);
3526 * @perf: i915 perf instance
3534 * behalf of i915_perf_open_ioctl() with the &perf->lock mutex
3549 i915_perf_open_ioctl_locked(struct i915_perf *perf,
3567 DRM_DEBUG("Failed to look up context with ID %u for opening perf stream\n",
3593 if (IS_HASWELL(perf->i915) && specific_ctx)
3595 else if (IS_GEN(perf->i915, 12) && specific_ctx &&
3608 /* Similar to perf's kernel.perf_paranoid_cpu sysctl option
3615 DRM_DEBUG("Insufficient privileges to open i915 perf stream\n");
3626 stream->perf = perf;
3678 drm_dev_get(&perf->i915->drm);
3694 static u64 oa_exponent_to_ns(struct i915_perf *perf, int exponent)
3697 1000ULL * RUNTIME_INFO(perf->i915)->cs_timestamp_frequency_khz);
3702 * @perf: i915 perf instance
3715 static int read_properties_unlocked(struct i915_perf *perf,
3726 DRM_DEBUG("No i915 perf properties given\n");
3730 /* At the moment we only support using i915-perf on the RCS. */
3731 props->engine = intel_engine_lookup_user(perf->i915,
3746 DRM_DEBUG("More i915 perf properties specified than exist\n");
3764 DRM_DEBUG("Unknown i915 perf property ID\n");
3790 if (!perf->oa_formats[value].size) {
3811 oa_period = oa_exponent_to_ns(perf, value);
3861 * i915-perf stream is expected to be a suitable interface for other forms of
3864 * Note we copy the properties from userspace outside of the i915 perf
3868 * i915_perf_open_ioctl_locked() after taking the &perf->lock
3871 * Return: A newly opened i915 Perf stream file descriptor or negative
3877 struct i915_perf *perf = &to_i915(dev)->perf;
3883 if (!perf->i915) {
3884 DRM_DEBUG("i915 perf interface not available for this system\n");
3896 ret = read_properties_unlocked(perf,
3903 mutex_lock(&perf->lock);
3904 ret = i915_perf_open_ioctl_locked(perf, param, &props, file);
3905 mutex_unlock(&perf->lock);
3911 * i915_perf_register - exposes i915-perf to userspace
3916 * used to open an i915-perf stream.
3921 struct i915_perf *perf = &i915->perf;
3924 if (!perf->i915)
3931 mutex_lock(&perf->lock);
3934 perf->metrics_kobj =
3937 if (!perf->metrics_kobj)
3940 sysfs_attr_init(&perf->test_config.sysfs_metric_id.attr);
3978 if (perf->test_config.id == 0)
3984 ret = sysfs_create_group(perf->metrics_kobj,
3985 &perf->test_config.sysfs_metric);
3990 perf->test_config.perf = perf;
3991 kref_init(&perf->test_config.ref);
3997 kobject_put(perf->metrics_kobj);
3998 perf->metrics_kobj = NULL;
4002 mutex_unlock(&perf->lock);
4007 * i915_perf_unregister - hide i915-perf from userspace
4010 * i915-perf state cleanup is split up into an 'unregister' and
4018 struct i915_perf *perf = &i915->perf;
4020 if (!perf->metrics_kobj)
4023 sysfs_remove_group(perf->metrics_kobj,
4024 &perf->test_config.sysfs_metric);
4026 kobject_put(perf->metrics_kobj);
4027 perf->metrics_kobj = NULL;
4031 static bool gen8_is_valid_flex_addr(struct i915_perf *perf, u32 addr)
4062 static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
4069 static bool gen7_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
4077 static bool gen8_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
4079 return gen7_is_valid_mux_addr(perf, addr) ||
4084 static bool gen10_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
4086 return gen8_is_valid_mux_addr(perf, addr) ||
4091 static bool hsw_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
4093 return gen7_is_valid_mux_addr(perf, addr) ||
4099 static bool chv_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
4101 return gen7_is_valid_mux_addr(perf, addr) ||
4105 static bool gen12_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
4116 static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
4146 static struct i915_oa_reg *alloc_oa_regs(struct i915_perf *perf,
4147 bool (*is_valid)(struct i915_perf *perf, u32 addr),
4177 if (!is_valid(perf, addr)) {
4212 static int create_dynamic_oa_sysfs_entry(struct i915_perf *perf,
4230 return sysfs_create_group(perf->metrics_kobj,
4245 * Returns: A new allocated config number to be used with the perf open ioctl
4251 struct i915_perf *perf = &to_i915(dev)->perf;
4257 if (!perf->i915) {
4258 DRM_DEBUG("i915 perf interface not available for this system\n");
4262 if (!perf->metrics_kobj) {
4285 oa_config->perf = perf;
4300 regs = alloc_oa_regs(perf,
4301 perf->ops.is_valid_mux_reg,
4313 regs = alloc_oa_regs(perf,
4314 perf->ops.is_valid_b_counter_reg,
4325 if (INTEL_GEN(perf->i915) < 8) {
4332 regs = alloc_oa_regs(perf,
4333 perf->ops.is_valid_flex_reg,
4346 err = mutex_lock_interruptible(&perf->metrics_lock);
4353 idr_for_each_entry(&perf->metrics_idr, tmp, id) {
4361 err = create_dynamic_oa_sysfs_entry(perf, oa_config);
4368 oa_config->id = idr_alloc(&perf->metrics_idr,
4377 mutex_unlock(&perf->metrics_lock);
4385 mutex_unlock(&perf->metrics_lock);
4407 struct i915_perf *perf = &to_i915(dev)->perf;
4412 if (!perf->i915) {
4413 DRM_DEBUG("i915 perf interface not available for this system\n");
4422 ret = mutex_lock_interruptible(&perf->metrics_lock);
4426 oa_config = idr_find(&perf->metrics_idr, *arg);
4436 sysfs_remove_group(perf->metrics_kobj, &oa_config->sysfs_metric);
4439 idr_remove(&perf->metrics_idr, *arg);
4441 mutex_unlock(&perf->metrics_lock);
4450 mutex_unlock(&perf->metrics_lock);
4454 #ifndef __NetBSD__ /* XXX i915 perf sysctl */
4501 * i915_perf_init - initialize i915-perf state on module bind
4504 * Initializes i915-perf state without exposing anything to userspace.
4506 * Note: i915-perf initialization is split into an 'init' and 'register'
4511 struct i915_perf *perf = &i915->perf;
4516 perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr;
4517 perf->ops.is_valid_mux_reg = hsw_is_valid_mux_addr;
4518 perf->ops.is_valid_flex_reg = NULL;
4519 perf->ops.enable_metric_set = hsw_enable_metric_set;
4520 perf->ops.disable_metric_set = hsw_disable_metric_set;
4521 perf->ops.oa_enable = gen7_oa_enable;
4522 perf->ops.oa_disable = gen7_oa_disable;
4523 perf->ops.read = gen7_oa_read;
4524 perf->ops.oa_hw_tail_read = gen7_oa_hw_tail_read;
4526 perf->oa_formats = hsw_oa_formats;
4534 perf->ops.read = gen8_oa_read;
4537 perf->oa_formats = gen8_plus_oa_formats;
4539 perf->ops.is_valid_b_counter_reg =
4541 perf->ops.is_valid_mux_reg =
4543 perf->ops.is_valid_flex_reg =
4547 perf->ops.is_valid_mux_reg =
4551 perf->ops.oa_enable = gen8_oa_enable;
4552 perf->ops.oa_disable = gen8_oa_disable;
4553 perf->ops.enable_metric_set = gen8_enable_metric_set;
4554 perf->ops.disable_metric_set = gen8_disable_metric_set;
4555 perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
4558 perf->ctx_oactxctrl_offset = 0x120;
4559 perf->ctx_flexeu0_offset = 0x2ce;
4561 perf->gen8_valid_ctx_bit = BIT(25);
4563 perf->ctx_oactxctrl_offset = 0x128;
4564 perf->ctx_flexeu0_offset = 0x3de;
4566 perf->gen8_valid_ctx_bit = BIT(16);
4569 perf->oa_formats = gen8_plus_oa_formats;
4571 perf->ops.is_valid_b_counter_reg =
4573 perf->ops.is_valid_mux_reg =
4575 perf->ops.is_valid_flex_reg =
4578 perf->ops.oa_enable = gen8_oa_enable;
4579 perf->ops.oa_disable = gen8_oa_disable;
4580 perf->ops.enable_metric_set = gen8_enable_metric_set;
4581 perf->ops.disable_metric_set = gen10_disable_metric_set;
4582 perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
4585 perf->ctx_oactxctrl_offset = 0x128;
4586 perf->ctx_flexeu0_offset = 0x3de;
4588 perf->ctx_oactxctrl_offset = 0x124;
4589 perf->ctx_flexeu0_offset = 0x78e;
4591 perf->gen8_valid_ctx_bit = BIT(16);
4593 perf->oa_formats = gen12_oa_formats;
4595 perf->ops.is_valid_b_counter_reg =
4597 perf->ops.is_valid_mux_reg =
4599 perf->ops.is_valid_flex_reg =
4602 perf->ops.oa_enable = gen12_oa_enable;
4603 perf->ops.oa_disable = gen12_oa_disable;
4604 perf->ops.enable_metric_set = gen12_enable_metric_set;
4605 perf->ops.disable_metric_set = gen12_disable_metric_set;
4606 perf->ops.oa_hw_tail_read = gen12_oa_hw_tail_read;
4608 perf->ctx_flexeu0_offset = 0;
4609 perf->ctx_oactxctrl_offset = 0x144;
4613 if (perf->ops.enable_metric_set) {
4614 mutex_init(&perf->lock);
4619 mutex_init(&perf->metrics_lock);
4620 idr_init(&perf->metrics_idr);
4632 ratelimit_state_init(&perf->spurious_report_rs, 5 * HZ, 10);
4637 ratelimit_set_flags(&perf->spurious_report_rs,
4640 atomic64_set(&perf->noa_programming_delay,
4643 perf->i915 = i915;
4655 #ifndef __NetBSD__ /* XXX i915 perf sysctl */
4662 #ifndef __NetBSD__ /* XXX i915 perf sysctl */
4673 struct i915_perf *perf = &i915->perf;
4675 if (!perf->i915)
4678 if (perf->ops.enable_metric_set) {
4679 mutex_destroy(&perf->metrics_lock);
4680 mutex_destroy(&perf->lock);
4683 idr_for_each(&perf->metrics_idr, destroy_config, perf);
4684 idr_destroy(&perf->metrics_idr);
4686 memset(&perf->ops, 0, sizeof(perf->ops));
4687 perf->i915 = NULL;
4691 * i915_perf_ioctl_version - Version of the i915-perf subsystem