Home | History | Annotate | Line # | Download | only in i915
      1 /*	$NetBSD: i915_perf_types.h,v 1.6 2021/12/19 11:36:57 riastradh Exp $	*/
      2 
      3 /* SPDX-License-Identifier: MIT */
      4 /*
      5  * Copyright  2019 Intel Corporation
      6  */
      7 
      8 #ifndef _I915_PERF_TYPES_H_
      9 #define _I915_PERF_TYPES_H_
     10 
     11 #include <linux/atomic.h>
     12 #include <linux/device.h>
     13 #include <linux/hrtimer.h>
     14 #include <linux/llist.h>
     15 #include <linux/poll.h>
     16 #include <linux/sysfs.h>
     17 #include <linux/types.h>
     18 #include <linux/uuid.h>
     19 #include <linux/wait.h>
     20 
     21 #include "i915_reg.h"
     22 #include "intel_wakeref.h"
     23 
     24 struct drm_i915_private;
     25 struct file;
     26 struct i915_gem_context;
     27 struct i915_perf;
     28 struct i915_vma;
     29 struct intel_context;
     30 struct intel_engine_cs;
     31 
     32 struct i915_oa_format {
     33 	u32 format;
     34 	int size;
     35 };
     36 
     37 struct i915_oa_reg {
     38 	i915_reg_t addr;
     39 	u32 value;
     40 };
     41 
     42 struct i915_oa_config {
     43 	struct i915_perf *perf;
     44 
     45 	char uuid[UUID_STRING_LEN + 1];
     46 	int id;
     47 
     48 	const struct i915_oa_reg *mux_regs;
     49 	u32 mux_regs_len;
     50 	const struct i915_oa_reg *b_counter_regs;
     51 	u32 b_counter_regs_len;
     52 	const struct i915_oa_reg *flex_regs;
     53 	u32 flex_regs_len;
     54 
     55 #ifndef __NetBSD__		/* XXX sysfs */
     56 	struct attribute_group sysfs_metric;
     57 	struct attribute *attrs[2];
     58 	struct device_attribute sysfs_metric_id;
     59 #endif
     60 
     61 	struct kref ref;
     62 	struct rcu_head rcu;
     63 };
     64 
     65 struct i915_perf_stream;
     66 
     67 /**
     68  * struct i915_perf_stream_ops - the OPs to support a specific stream type
     69  */
     70 struct i915_perf_stream_ops {
     71 	/**
     72 	 * @enable: Enables the collection of HW samples, either in response to
     73 	 * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened
     74 	 * without `I915_PERF_FLAG_DISABLED`.
     75 	 */
     76 	void (*enable)(struct i915_perf_stream *stream);
     77 
     78 	/**
     79 	 * @disable: Disables the collection of HW samples, either in response
     80 	 * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying
     81 	 * the stream.
     82 	 */
     83 	void (*disable)(struct i915_perf_stream *stream);
     84 
     85 #ifndef __NetBSD__
     86 	/**
     87 	 * @poll_wait: Call poll_wait, passing a wait queue that will be woken
     88 	 * once there is something ready to read() for the stream
     89 	 */
     90 	void (*poll_wait)(struct i915_perf_stream *stream,
     91 			  struct file *file,
     92 			  poll_table *wait);
     93 #endif
     94 
     95 	/**
     96 	 * @wait_unlocked: For handling a blocking read, wait until there is
     97 	 * something to ready to read() for the stream. E.g. wait on the same
     98 	 * wait queue that would be passed to poll_wait().
     99 	 */
    100 	int (*wait_unlocked)(struct i915_perf_stream *stream);
    101 
    102 	/**
    103 	 * @read: Copy buffered metrics as records to userspace
    104 	 * **buf**: the userspace, destination buffer
    105 	 * **count**: the number of bytes to copy, requested by userspace
    106 	 * **offset**: zero at the start of the read, updated as the read
    107 	 * proceeds, it represents how many bytes have been copied so far and
    108 	 * the buffer offset for copying the next record.
    109 	 *
    110 	 * Copy as many buffered i915 perf samples and records for this stream
    111 	 * to userspace as will fit in the given buffer.
    112 	 *
    113 	 * Only write complete records; returning -%ENOSPC if there isn't room
    114 	 * for a complete record.
    115 	 *
    116 	 * Return any error condition that results in a short read such as
    117 	 * -%ENOSPC or -%EFAULT, even though these may be squashed before
    118 	 * returning to userspace.
    119 	 */
    120 #ifdef __NetBSD__
    121 	int (*read)(struct i915_perf_stream *stream,
    122 		    struct uio *buf,
    123 		    kauth_cred_t count, /* XXX dummy */
    124 		    int offset);	/* XXX dummy */
    125 #else
    126 	int (*read)(struct i915_perf_stream *stream,
    127 		    char __user *buf,
    128 		    size_t count,
    129 		    size_t *offset);
    130 #endif
    131 
    132 	/**
    133 	 * @destroy: Cleanup any stream specific resources.
    134 	 *
    135 	 * The stream will always be disabled before this is called.
    136 	 */
    137 	void (*destroy)(struct i915_perf_stream *stream);
    138 };
    139 
    140 /**
    141  * struct i915_perf_stream - state for a single open stream FD
    142  */
    143 struct i915_perf_stream {
    144 	/**
    145 	 * @perf: i915_perf backpointer
    146 	 */
    147 	struct i915_perf *perf;
    148 
    149 	/**
    150 	 * @uncore: mmio access path
    151 	 */
    152 	struct intel_uncore *uncore;
    153 
    154 	/**
    155 	 * @engine: Engine associated with this performance stream.
    156 	 */
    157 	struct intel_engine_cs *engine;
    158 
    159 	/**
    160 	 * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*`
    161 	 * properties given when opening a stream, representing the contents
    162 	 * of a single sample as read() by userspace.
    163 	 */
    164 	u32 sample_flags;
    165 
    166 	/**
    167 	 * @sample_size: Considering the configured contents of a sample
    168 	 * combined with the required header size, this is the total size
    169 	 * of a single sample record.
    170 	 */
    171 	int sample_size;
    172 
    173 	/**
    174 	 * @ctx: %NULL if measuring system-wide across all contexts or a
    175 	 * specific context that is being monitored.
    176 	 */
    177 	struct i915_gem_context *ctx;
    178 
    179 	/**
    180 	 * @enabled: Whether the stream is currently enabled, considering
    181 	 * whether the stream was opened in a disabled state and based
    182 	 * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls.
    183 	 */
    184 	bool enabled;
    185 
    186 	/**
    187 	 * @hold_preemption: Whether preemption is put on hold for command
    188 	 * submissions done on the @ctx. This is useful for some drivers that
    189 	 * cannot easily post process the OA buffer context to subtract delta
    190 	 * of performance counters not associated with @ctx.
    191 	 */
    192 	bool hold_preemption;
    193 
    194 	/**
    195 	 * @ops: The callbacks providing the implementation of this specific
    196 	 * type of configured stream.
    197 	 */
    198 	const struct i915_perf_stream_ops *ops;
    199 
    200 	/**
    201 	 * @oa_config: The OA configuration used by the stream.
    202 	 */
    203 	struct i915_oa_config *oa_config;
    204 
    205 	/**
    206 	 * @oa_config_bos: A list of struct i915_oa_config_bo allocated lazily
    207 	 * each time @oa_config changes.
    208 	 */
    209 	struct llist_head oa_config_bos;
    210 
    211 	/**
    212 	 * @pinned_ctx: The OA context specific information.
    213 	 */
    214 	struct intel_context *pinned_ctx;
    215 
    216 	/**
    217 	 * @specific_ctx_id: The id of the specific context.
    218 	 */
    219 	u32 specific_ctx_id;
    220 
    221 	/**
    222 	 * @specific_ctx_id_mask: The mask used to masking specific_ctx_id bits.
    223 	 */
    224 	u32 specific_ctx_id_mask;
    225 
    226 	/**
    227 	 * @poll_check_timer: High resolution timer that will periodically
    228 	 * check for data in the circular OA buffer for notifying userspace
    229 	 * (e.g. during a read() or poll()).
    230 	 */
    231 	struct hrtimer poll_check_timer;
    232 
    233 	/**
    234 	 * @poll_wq: The wait queue that hrtimer callback wakes when it
    235 	 * sees data ready to read in the circular OA buffer.
    236 	 */
    237 #ifdef __NetBSD__
    238 	drm_waitqueue_t poll_wq;
    239 	struct selinfo poll_selq;
    240 #else
    241 	wait_queue_head_t poll_wq;
    242 #endif
    243 
    244 	/**
    245 	 * @pollin: Whether there is data available to read.
    246 	 */
    247 	bool pollin;
    248 
    249 	/**
    250 	 * @periodic: Whether periodic sampling is currently enabled.
    251 	 */
    252 	bool periodic;
    253 
    254 	/**
    255 	 * @period_exponent: The OA unit sampling frequency is derived from this.
    256 	 */
    257 	int period_exponent;
    258 
    259 	/**
    260 	 * @oa_buffer: State of the OA buffer.
    261 	 */
    262 	struct {
    263 		struct i915_vma *vma;
    264 		u8 *vaddr;
    265 		u32 last_ctx_id;
    266 		int format;
    267 		int format_size;
    268 		int size_exponent;
    269 
    270 		/**
    271 		 * @ptr_lock: Locks reads and writes to all head/tail state
    272 		 *
    273 		 * Consider: the head and tail pointer state needs to be read
    274 		 * consistently from a hrtimer callback (atomic context) and
    275 		 * read() fop (user context) with tail pointer updates happening
    276 		 * in atomic context and head updates in user context and the
    277 		 * (unlikely) possibility of read() errors needing to reset all
    278 		 * head/tail state.
    279 		 *
    280 		 * Note: Contention/performance aren't currently a significant
    281 		 * concern here considering the relatively low frequency of
    282 		 * hrtimer callbacks (5ms period) and that reads typically only
    283 		 * happen in response to a hrtimer event and likely complete
    284 		 * before the next callback.
    285 		 *
    286 		 * Note: This lock is not held *while* reading and copying data
    287 		 * to userspace so the value of head observed in htrimer
    288 		 * callbacks won't represent any partial consumption of data.
    289 		 */
    290 		spinlock_t ptr_lock;
    291 
    292 		/**
    293 		 * @tails: One 'aging' tail pointer and one 'aged' tail pointer ready to
    294 		 * used for reading.
    295 		 *
    296 		 * Initial values of 0xffffffff are invalid and imply that an
    297 		 * update is required (and should be ignored by an attempted
    298 		 * read)
    299 		 */
    300 		struct {
    301 			u32 offset;
    302 		} tails[2];
    303 
    304 		/**
    305 		 * @aged_tail_idx: Index for the aged tail ready to read() data up to.
    306 		 */
    307 		unsigned int aged_tail_idx;
    308 
    309 		/**
    310 		 * @aging_timestamp: A monotonic timestamp for when the current aging tail pointer
    311 		 * was read; used to determine when it is old enough to trust.
    312 		 */
    313 		u64 aging_timestamp;
    314 
    315 		/**
    316 		 * @head: Although we can always read back the head pointer register,
    317 		 * we prefer to avoid trusting the HW state, just to avoid any
    318 		 * risk that some hardware condition could * somehow bump the
    319 		 * head pointer unpredictably and cause us to forward the wrong
    320 		 * OA buffer data to userspace.
    321 		 */
    322 		u32 head;
    323 	} oa_buffer;
    324 
    325 	/**
    326 	 * @noa_wait: A batch buffer doing a wait on the GPU for the NOA logic to be
    327 	 * reprogrammed.
    328 	 */
    329 	struct i915_vma *noa_wait;
    330 };
    331 
    332 /**
    333  * struct i915_oa_ops - Gen specific implementation of an OA unit stream
    334  */
    335 struct i915_oa_ops {
    336 	/**
    337 	 * @is_valid_b_counter_reg: Validates register's address for
    338 	 * programming boolean counters for a particular platform.
    339 	 */
    340 	bool (*is_valid_b_counter_reg)(struct i915_perf *perf, u32 addr);
    341 
    342 	/**
    343 	 * @is_valid_mux_reg: Validates register's address for programming mux
    344 	 * for a particular platform.
    345 	 */
    346 	bool (*is_valid_mux_reg)(struct i915_perf *perf, u32 addr);
    347 
    348 	/**
    349 	 * @is_valid_flex_reg: Validates register's address for programming
    350 	 * flex EU filtering for a particular platform.
    351 	 */
    352 	bool (*is_valid_flex_reg)(struct i915_perf *perf, u32 addr);
    353 
    354 	/**
    355 	 * @enable_metric_set: Selects and applies any MUX configuration to set
    356 	 * up the Boolean and Custom (B/C) counters that are part of the
    357 	 * counter reports being sampled. May apply system constraints such as
    358 	 * disabling EU clock gating as required.
    359 	 */
    360 	int (*enable_metric_set)(struct i915_perf_stream *stream);
    361 
    362 	/**
    363 	 * @disable_metric_set: Remove system constraints associated with using
    364 	 * the OA unit.
    365 	 */
    366 	void (*disable_metric_set)(struct i915_perf_stream *stream);
    367 
    368 	/**
    369 	 * @oa_enable: Enable periodic sampling
    370 	 */
    371 	void (*oa_enable)(struct i915_perf_stream *stream);
    372 
    373 	/**
    374 	 * @oa_disable: Disable periodic sampling
    375 	 */
    376 	void (*oa_disable)(struct i915_perf_stream *stream);
    377 
    378 	/**
    379 	 * @read: Copy data from the circular OA buffer into a given userspace
    380 	 * buffer.
    381 	 */
    382 #ifdef __NetBSD__
    383 	int (*read)(struct i915_perf_stream *stream,
    384 		    struct uio *buf,
    385 		    kauth_cred_t count, /* XXX dummy */
    386 		    int offset);	/* XXX dummy */
    387 #else
    388 	int (*read)(struct i915_perf_stream *stream,
    389 		    char __user *buf,
    390 		    size_t count,
    391 		    size_t *offset);
    392 #endif
    393 
    394 	/**
    395 	 * @oa_hw_tail_read: read the OA tail pointer register
    396 	 *
    397 	 * In particular this enables us to share all the fiddly code for
    398 	 * handling the OA unit tail pointer race that affects multiple
    399 	 * generations.
    400 	 */
    401 	u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream);
    402 };
    403 
    404 struct i915_perf {
    405 	struct drm_i915_private *i915;
    406 
    407 	struct kobject *metrics_kobj;
    408 
    409 	/*
    410 	 * Lock associated with adding/modifying/removing OA configs
    411 	 * in perf->metrics_idr.
    412 	 */
    413 	struct mutex metrics_lock;
    414 
    415 	/*
    416 	 * List of dynamic configurations (struct i915_oa_config), you
    417 	 * need to hold perf->metrics_lock to access it.
    418 	 */
    419 	struct idr metrics_idr;
    420 
    421 	/*
    422 	 * Lock associated with anything below within this structure
    423 	 * except exclusive_stream.
    424 	 */
    425 	struct mutex lock;
    426 
    427 	/*
    428 	 * The stream currently using the OA unit. If accessed
    429 	 * outside a syscall associated to its file
    430 	 * descriptor.
    431 	 */
    432 	struct i915_perf_stream *exclusive_stream;
    433 
    434 	/**
    435 	 * For rate limiting any notifications of spurious
    436 	 * invalid OA reports
    437 	 */
    438 	struct ratelimit_state spurious_report_rs;
    439 
    440 	struct i915_oa_config test_config;
    441 
    442 	u32 gen7_latched_oastatus1;
    443 	u32 ctx_oactxctrl_offset;
    444 	u32 ctx_flexeu0_offset;
    445 
    446 	/**
    447 	 * The RPT_ID/reason field for Gen8+ includes a bit
    448 	 * to determine if the CTX ID in the report is valid
    449 	 * but the specific bit differs between Gen 8 and 9
    450 	 */
    451 	u32 gen8_valid_ctx_bit;
    452 
    453 	struct i915_oa_ops ops;
    454 	const struct i915_oa_format *oa_formats;
    455 
    456 	atomic64_t noa_programming_delay;
    457 };
    458 
    459 #endif /* _I915_PERF_TYPES_H_ */
    460