[CI,07/13] drm/i915/perf: allow for CS OA configs to be created lazily

Submitted by Lionel Landwerlin on Sept. 9, 2019, 8:22 a.m.

Details

Message ID 20190909082245.27245-7-lionel.g.landwerlin@intel.com
State New
Headers show
Series "Series without cover letter" ( rev: 1 ) in Intel GFX

Not browsing as part of any series.

Commit Message

Lionel Landwerlin Sept. 9, 2019, 8:22 a.m.
Here we introduce a mechanism by which the execbuf part of the i915
driver will be able to request that a batch buffer containing the
programming for a particular OA config be created.

We'll execute these OA configuration buffers right before executing a
set of userspace commands so that a particular user batchbuffer be
executed with a given OA configuration.

This mechanism essentially allows the userspace driver to go through
several OA configuration without having to open/close the i915/perf
stream.

v2: No need for locking on object OA config object creation (Chris)
    Flush cpu mapping of OA config (Chris)

v3: Properly deal with the perf_metric lock (Chris/Lionel)

v4: Fix oa config unref/put when not found (Lionel)

v5: Allocate BOs for configurations on the stream instead of globally
    (Lionel)

v6: Fix 64bit division (Chris)

v7: Store allocated config BOs into the stream (Lionel)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v4)
---
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h |   1 +
 drivers/gpu/drm/i915/i915_drv.h              |   4 +-
 drivers/gpu/drm/i915/i915_perf.c             | 270 ++++++++++++++++---
 drivers/gpu/drm/i915/i915_perf.h             |  26 ++
 drivers/gpu/drm/i915/i915_perf_types.h       |  15 +-
 5 files changed, 273 insertions(+), 43 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index fbad403ab7ac..b6373fbc927d 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -135,6 +135,7 @@ 
 /* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */
 #define   MI_LRI_CS_MMIO		(1<<19)
 #define   MI_LRI_FORCE_POSTED		(1<<12)
+#define MI_LOAD_REGISTER_IMM_MAX_REGS (126)
 #define MI_STORE_REGISTER_MEM        MI_INSTR(0x24, 1)
 #define MI_STORE_REGISTER_MEM_GEN8   MI_INSTR(0x24, 2)
 #define   MI_SRM_LRM_GLOBAL_GTT		(1<<22)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f4145ae6ab6e..7eb31923cde9 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1363,8 +1363,8 @@  struct drm_i915_private {
 		struct mutex metrics_lock;
 
 		/*
-		 * List of dynamic configurations, you need to hold
-		 * dev_priv->perf.metrics_lock to access it.
+		 * List of dynamic configurations (struct i915_oa_config), you
+		 * need to hold dev_priv->perf.metrics_lock to access it.
 		 */
 		struct idr metrics_idr;
 
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 40a1ec2bc96b..c9d0de3050fb 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -367,11 +367,19 @@  struct perf_open_properties {
 	struct intel_engine_cs *engine;
 };
 
+struct i915_oa_config_bo {
+	struct list_head link;
+
+	struct i915_oa_config *oa_config;
+	struct drm_i915_gem_object *bo;
+};
+
 static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer);
 
-static void free_oa_config(struct drm_i915_private *dev_priv,
-			   struct i915_oa_config *oa_config)
+void i915_oa_config_release(struct kref *ref)
 {
+	struct i915_oa_config *oa_config = container_of(ref, typeof(*oa_config), ref);
+
 	if (!PTR_ERR(oa_config->flex_regs))
 		kfree(oa_config->flex_regs);
 	if (!PTR_ERR(oa_config->b_counter_regs))
@@ -381,40 +389,194 @@  static void free_oa_config(struct drm_i915_private *dev_priv,
 	kfree(oa_config);
 }
 
-static void put_oa_config(struct drm_i915_private *dev_priv,
-			  struct i915_oa_config *oa_config)
+static u32 *write_cs_mi_lri(u32 *cs, const struct i915_oa_reg *reg_data, u32 n_regs)
 {
-	if (!atomic_dec_and_test(&oa_config->ref_count))
-		return;
+	u32 i;
+
+	for (i = 0; i < n_regs; i++) {
+		if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) {
+			u32 n_lri = min(n_regs - i,
+					(u32) MI_LOAD_REGISTER_IMM_MAX_REGS);
 
-	free_oa_config(dev_priv, oa_config);
+			*cs++ = MI_LOAD_REGISTER_IMM(n_lri);
+		}
+		*cs++ = i915_mmio_reg_offset(reg_data[i].addr);
+		*cs++ = reg_data[i].value;
+	}
+
+	return cs;
 }
 
-static int get_oa_config(struct drm_i915_private *dev_priv,
-			 int metrics_set,
-			 struct i915_oa_config **out_config)
+static struct i915_oa_config_bo* alloc_oa_config_buffer(struct drm_i915_private *i915,
+							struct i915_oa_config *oa_config)
 {
-	int ret;
+	struct i915_oa_config_bo *oa_bo;
+	size_t config_length = 0;
+	u32 *cs;
+	int err;
+
+	oa_bo = kzalloc(sizeof(*oa_bo), GFP_KERNEL);
+	if (!oa_bo)
+		return ERR_PTR(-ENOMEM);
+
+	oa_bo->oa_config = i915_oa_config_get(oa_config);
+
+	if (oa_config->mux_regs_len > 0) {
+		config_length += DIV_ROUND_UP(oa_config->mux_regs_len,
+					      MI_LOAD_REGISTER_IMM_MAX_REGS) * 4;
+		config_length += oa_config->mux_regs_len * 8;
+	}
+	if (oa_config->b_counter_regs_len > 0) {
+		config_length += DIV_ROUND_UP(oa_config->b_counter_regs_len,
+					      MI_LOAD_REGISTER_IMM_MAX_REGS) * 4;
+		config_length += oa_config->b_counter_regs_len * 8;
+	}
+	if (oa_config->flex_regs_len > 0) {
+		config_length += DIV_ROUND_UP(oa_config->flex_regs_len,
+					      MI_LOAD_REGISTER_IMM_MAX_REGS) * 4;
+		config_length += oa_config->flex_regs_len * 8;
+	}
+	config_length += 4; /* MI_BATCH_BUFFER_END */
+	config_length = ALIGN(config_length, I915_GTT_PAGE_SIZE);
+
+	oa_bo->bo = i915_gem_object_create_shmem(i915, config_length);
+	if (IS_ERR(oa_bo->bo)) {
+		err = PTR_ERR(oa_bo->bo);
+		goto err_oa_config;
+	}
+
+	cs = i915_gem_object_pin_map(oa_bo->bo, I915_MAP_WB);
+	if (IS_ERR(cs)) {
+		err = PTR_ERR(cs);
+		goto err_oa_bo;
+	}
+
+	cs = write_cs_mi_lri(cs, oa_config->mux_regs, oa_config->mux_regs_len);
+	cs = write_cs_mi_lri(cs, oa_config->b_counter_regs, oa_config->b_counter_regs_len);
+	cs = write_cs_mi_lri(cs, oa_config->flex_regs, oa_config->flex_regs_len);
+
+	*cs++ = MI_BATCH_BUFFER_END;
+
+	i915_gem_object_flush_map(oa_bo->bo);
+	i915_gem_object_unpin_map(oa_bo->bo);
+
+	return oa_bo;
+
+err_oa_bo:
+	i915_gem_object_put(oa_bo->bo);
+err_oa_config:
+	i915_oa_config_put(oa_bo->oa_config);
+	kfree(oa_bo);
+
+	return ERR_PTR(err);
+}
+
+int i915_perf_get_oa_config(struct drm_i915_private *i915,
+			    int metrics_set,
+			    struct i915_oa_config **out_config)
+{
+	struct i915_oa_config *oa_config;
+	int err;
+
+	if (!i915->perf.initialized)
+		return -ENODEV;
+
+	err = mutex_lock_interruptible(&i915->perf.metrics_lock);
+	if (err)
+		return err;
 
 	if (metrics_set == 1) {
-		*out_config = &dev_priv->perf.test_config;
-		atomic_inc(&dev_priv->perf.test_config.ref_count);
-		return 0;
+		oa_config = &i915->perf.test_config;
+	} else {
+		oa_config = idr_find(&i915->perf.metrics_idr, metrics_set);
+		if (!oa_config)
+			err = -EINVAL;
 	}
 
-	ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock);
-	if (ret)
-		return ret;
+	if (!err)
+		*out_config = i915_oa_config_get(oa_config);
 
-	*out_config = idr_find(&dev_priv->perf.metrics_idr, metrics_set);
-	if (!*out_config)
-		ret = -EINVAL;
-	else
-		atomic_inc(&(*out_config)->ref_count);
+	mutex_unlock(&i915->perf.metrics_lock);
 
-	mutex_unlock(&dev_priv->perf.metrics_lock);
+	return err;
+}
 
-	return ret;
+static void free_oa_config_bo(struct i915_oa_config_bo *oa_bo)
+{
+	i915_oa_config_put(oa_bo->oa_config);
+	i915_gem_object_put(oa_bo->bo);
+	kfree(oa_bo);
+}
+
+int i915_perf_get_oa_config_and_bo(struct i915_perf_stream *stream,
+				   int metrics_set,
+				   struct i915_oa_config **out_config,
+				   struct drm_i915_gem_object **out_obj)
+{
+	struct drm_i915_private *i915 = stream->dev_priv;
+	struct i915_oa_config *oa_config;
+	int err = 0;
+
+	if (!i915->perf.initialized)
+		return -ENODEV;
+
+	err = i915_perf_get_oa_config(i915, metrics_set, &oa_config);
+	if (err)
+		return err;
+
+	if (out_config)
+		*out_config = oa_config;
+
+	if (out_obj) {
+		struct i915_oa_config_bo *oa_bo = NULL, *oa_bo_iter;
+
+		/* Look for the buffer in the already allocated BOs attached
+		 * to the stream.
+		 */
+		err = mutex_lock_interruptible(&stream->config_mutex);
+		if (err)
+			goto err;
+
+		list_for_each_entry(oa_bo_iter, &stream->oa_config_bos, link) {
+			if (oa_bo_iter->oa_config == oa_config &&
+			    memcmp(oa_bo_iter->oa_config->uuid,
+				   oa_config->uuid,
+				   sizeof(oa_config->uuid)) == 0) {
+				oa_bo = oa_bo_iter;
+				break;
+			}
+		}
+
+		mutex_unlock(&stream->config_mutex);
+
+		if (!oa_bo) {
+			oa_bo = alloc_oa_config_buffer(i915, oa_config);
+			if (IS_ERR(oa_bo)) {
+				err = PTR_ERR(oa_bo);
+				goto err;
+			}
+
+			err = mutex_lock_interruptible(&stream->config_mutex);
+			if (err) {
+				free_oa_config_bo(oa_bo);
+				goto err;
+			}
+
+			list_add(&oa_bo->link, &stream->oa_config_bos);
+
+			mutex_unlock(&stream->config_mutex);
+		}
+
+		*out_obj = i915_gem_object_get(oa_bo->bo);
+	}
+
+err:
+	if (err) {
+		i915_oa_config_put(oa_config);
+		*out_config = NULL;
+	}
+
+	return err;
 }
 
 static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream)
@@ -1362,6 +1524,18 @@  free_oa_buffer(struct i915_perf_stream *stream)
 	stream->oa_buffer.vaddr = NULL;
 }
 
+static void
+free_oa_configs(struct i915_perf_stream *stream)
+{
+	struct i915_oa_config_bo *oa_bo, *tmp;
+
+	i915_oa_config_put(stream->oa_config);
+	list_for_each_entry_safe(oa_bo, tmp, &stream->oa_config_bos, link) {
+		list_del(&oa_bo->link);
+		free_oa_config_bo(oa_bo);
+	}
+}
+
 static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
 {
 	struct drm_i915_private *dev_priv = stream->dev_priv;
@@ -1385,7 +1559,7 @@  static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
 	if (stream->ctx)
 		oa_put_render_ctx_id(stream);
 
-	put_oa_config(dev_priv, stream->oa_config);
+	free_oa_configs(stream);
 
 	if (dev_priv->perf.spurious_report_rs.missed) {
 		DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n",
@@ -2199,6 +2373,8 @@  static int i915_oa_stream_init(struct i915_perf_stream *stream,
 		return -EINVAL;
 	}
 
+	mutex_init(&stream->config_mutex);
+
 	stream->sample_size = sizeof(struct drm_i915_perf_record_header);
 
 	format_size = dev_priv->perf.oa_formats[props->oa_format].size;
@@ -2227,7 +2403,8 @@  static int i915_oa_stream_init(struct i915_perf_stream *stream,
 		}
 	}
 
-	ret = get_oa_config(dev_priv, props->metrics_set, &stream->oa_config);
+	ret = i915_perf_get_oa_config(dev_priv, props->metrics_set,
+				      &stream->oa_config);
 	if (ret) {
 		DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set);
 		goto err_config;
@@ -2265,6 +2442,8 @@  static int i915_oa_stream_init(struct i915_perf_stream *stream,
 		goto err_enable;
 	}
 
+	DRM_DEBUG("opening stream oa config uuid=%s\n", stream->oa_config->uuid);
+
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
 	hrtimer_init(&stream->poll_check_timer,
@@ -2284,11 +2463,11 @@  static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	free_oa_buffer(stream);
 
 err_oa_buf_alloc:
-	put_oa_config(dev_priv, stream->oa_config);
-
 	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
 	intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref);
 
+	free_oa_configs(stream);
+
 err_config:
 	if (stream->ctx)
 		oa_put_render_ctx_id(stream);
@@ -2309,8 +2488,12 @@  void i915_oa_init_reg_state(struct intel_engine_cs *engine,
 		return;
 
 	stream = engine->i915->perf.exclusive_stream;
-	if (stream)
+
+	if (stream) {
+		mutex_lock(&stream->config_mutex);
 		gen8_update_reg_state_unlocked(stream, ce, regs, stream->oa_config);
+		mutex_unlock(&stream->config_mutex);
+	}
 }
 
 /**
@@ -2653,7 +2836,9 @@  static int i915_perf_release(struct inode *inode, struct file *file)
 	struct drm_i915_private *dev_priv = stream->dev_priv;
 
 	mutex_lock(&dev_priv->perf.lock);
+
 	i915_perf_destroy_locked(stream);
+
 	mutex_unlock(&dev_priv->perf.lock);
 
 	/* Release the reference the perf stream kept on the driver. */
@@ -2762,6 +2947,7 @@  i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
 		goto err_ctx;
 	}
 
+	INIT_LIST_HEAD(&stream->oa_config_bos);
 	stream->dev_priv = dev_priv;
 	stream->ctx = specific_ctx;
 
@@ -3088,7 +3274,8 @@  void i915_perf_register(struct drm_i915_private *dev_priv)
 	if (ret)
 		goto sysfs_error;
 
-	atomic_set(&dev_priv->perf.test_config.ref_count, 1);
+	dev_priv->perf.test_config.i915 = dev_priv;
+	kref_init(&dev_priv->perf.test_config.ref);
 
 	goto exit;
 
@@ -3344,7 +3531,8 @@  int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
 		return -ENOMEM;
 	}
 
-	atomic_set(&oa_config->ref_count, 1);
+	oa_config->i915 = dev_priv;
+	kref_init(&oa_config->ref);
 
 	if (!uuid_is_valid(args->uuid)) {
 		DRM_DEBUG("Invalid uuid format for OA config\n");
@@ -3443,7 +3631,7 @@  int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
 sysfs_err:
 	mutex_unlock(&dev_priv->perf.metrics_lock);
 reg_err:
-	put_oa_config(dev_priv, oa_config);
+	i915_oa_config_put(oa_config);
 	DRM_DEBUG("Failed to add new OA config\n");
 	return err;
 }
@@ -3479,13 +3667,13 @@  int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
 
 	ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock);
 	if (ret)
-		goto lock_err;
+		return ret;
 
 	oa_config = idr_find(&dev_priv->perf.metrics_idr, *arg);
 	if (!oa_config) {
 		DRM_DEBUG("Failed to remove unknown OA config\n");
 		ret = -ENOENT;
-		goto config_err;
+		goto err_unlock;
 	}
 
 	GEM_BUG_ON(*arg != oa_config->id);
@@ -3495,13 +3683,16 @@  int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
 
 	idr_remove(&dev_priv->perf.metrics_idr, *arg);
 
+	mutex_unlock(&dev_priv->perf.metrics_lock);
+
 	DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id);
 
-	put_oa_config(dev_priv, oa_config);
+	i915_oa_config_put(oa_config);
+
+	return 0;
 
-config_err:
+err_unlock:
 	mutex_unlock(&dev_priv->perf.metrics_lock);
-lock_err:
 	return ret;
 }
 
@@ -3671,10 +3862,9 @@  void i915_perf_init(struct drm_i915_private *dev_priv)
 
 static int destroy_config(int id, void *p, void *data)
 {
-	struct drm_i915_private *dev_priv = data;
 	struct i915_oa_config *oa_config = p;
 
-	put_oa_config(dev_priv, oa_config);
+	i915_oa_config_put(oa_config);
 
 	return 0;
 }
@@ -3688,7 +3878,7 @@  void i915_perf_fini(struct drm_i915_private *dev_priv)
 	if (!dev_priv->perf.initialized)
 		return;
 
-	idr_for_each(&dev_priv->perf.metrics_idr, destroy_config, dev_priv);
+	idr_for_each(&dev_priv->perf.metrics_idr, destroy_config, NULL);
 	idr_destroy(&dev_priv->perf.metrics_idr);
 
 	unregister_sysctl_table(dev_priv->perf.sysctl_header);
diff --git a/drivers/gpu/drm/i915/i915_perf.h b/drivers/gpu/drm/i915/i915_perf.h
index 270193526b56..1c85487876a4 100644
--- a/drivers/gpu/drm/i915/i915_perf.h
+++ b/drivers/gpu/drm/i915/i915_perf.h
@@ -7,12 +7,16 @@ 
 #define __I915_PERF_H__
 
 #include <linux/types.h>
+#include <linux/kref.h>
 
 #include "i915_perf_types.h"
 
 struct drm_device;
 struct drm_file;
+struct drm_i915_gem_object;
 struct drm_i915_private;
+struct i915_oa_config;
+struct i915_perf_stream;
 struct intel_context;
 struct intel_engine_cs;
 
@@ -31,5 +35,27 @@  int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
 void i915_oa_init_reg_state(struct intel_engine_cs *engine,
 			    struct intel_context *ce,
 			    u32 *reg_state);
+int i915_perf_get_oa_config(struct drm_i915_private *i915,
+			    int metrics_set,
+			    struct i915_oa_config **out_config);
+int i915_perf_get_oa_config_and_bo(struct i915_perf_stream *stream,
+				   int metrics_set,
+				   struct i915_oa_config **out_config,
+				   struct drm_i915_gem_object **out_obj);
+void i915_oa_config_release(struct kref *ref);
+
+static inline struct i915_oa_config *i915_oa_config_get(struct i915_oa_config *oa_config)
+{
+	kref_get(&oa_config->ref);
+	return oa_config;
+}
+
+static inline void i915_oa_config_put(struct i915_oa_config *oa_config)
+{
+	if (!oa_config)
+		return;
+
+	kref_put(&oa_config->ref, i915_oa_config_release);
+}
 
 #endif /* __I915_PERF_H__ */
diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h
index 0f72c83142dd..58469d2a3970 100644
--- a/drivers/gpu/drm/i915/i915_perf_types.h
+++ b/drivers/gpu/drm/i915/i915_perf_types.h
@@ -37,6 +37,8 @@  struct i915_oa_reg {
 };
 
 struct i915_oa_config {
+	struct drm_i915_private *i915;
+
 	char uuid[UUID_STRING_LEN + 1];
 	int id;
 
@@ -51,7 +53,7 @@  struct i915_oa_config {
 	struct attribute *attrs[2];
 	struct device_attribute sysfs_metric_id;
 
-	atomic_t ref_count;
+	struct kref ref;
 };
 
 struct i915_perf_stream;
@@ -173,11 +175,22 @@  struct i915_perf_stream {
 	 */
 	const struct i915_perf_stream_ops *ops;
 
+	/**
+	 * @active_config_mutex: Protects access to @oa_config & @oa_config_bos.
+	 */
+	struct mutex config_mutex;
+
 	/**
 	 * @oa_config: The OA configuration used by the stream.
 	 */
 	struct i915_oa_config *oa_config;
 
+	/**
+	 * @oa_config_bos: A list of struct i915_oa_config_bo allocated lazily
+	 * each time @oa_config changes.
+	 */
+	struct list_head oa_config_bos;
+
 	/**
 	 * The OA context specific information.
 	 */