[v3] drm/i915/selftests: Add a GuC doorbells selftest

Submitted by Michel Thierry on Nov. 10, 2017, 7:04 p.m.

Details

Message ID 20171110190415.2765-1-michel.thierry@intel.com
State New
Headers show
Series "drm/i915/selftests: Add a GuC doorbells selftest" ( rev: 3 ) in Intel GFX

Not browsing as part of any series.

Commit Message

Michel Thierry Nov. 10, 2017, 7:04 p.m.
The first test aims to check guc_init_doorbell_hw, changing the existing
guc clients and doorbells state before calling it.

The second test tries to create as many clients as it is currently possible
(currently limited to max number of doorbells) and exercise the doorbell
alloc/dealloc code.

Since our usage mode require very few clients/doorbells, this code has
been exercised very lightly and it's good to have a simple test for it.

As reference, this test already helped identify the bug fixed by
commit 7f1ea2ac3017 ("drm/i915/guc: Fix doorbell id selection").

v2: Extend number of clients; check for client allocation failure when
number of doorbells is exceeded; validate client properties; reuse
guc_init_doorbell_hw (Chris).

v3: guc_init_doorbell_hw test added per Chris suggestion.

Signed-off-by: Michel Thierry <michel.thierry@intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Michal Winiarski <michal.winiarski@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_guc_submission.c         |   4 +
 .../gpu/drm/i915/selftests/i915_live_selftests.h   |   1 +
 drivers/gpu/drm/i915/selftests/intel_guc.c         | 345 +++++++++++++++++++++
 3 files changed, 350 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/selftests/intel_guc.c

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 0ba2fc04fe9c..5d6576e01a91 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -1464,3 +1464,7 @@  void i915_guc_submission_disable(struct drm_i915_private *dev_priv)
 
 	guc_clients_destroy(guc);
 }
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/intel_guc.c"
+#endif
diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
index d7dd98a6acad..088f45bc6199 100644
--- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
@@ -20,3 +20,4 @@  selftest(evict, i915_gem_evict_live_selftests)
 selftest(hugepages, i915_gem_huge_page_live_selftests)
 selftest(contexts, i915_gem_context_live_selftests)
 selftest(hangcheck, intel_hangcheck_live_selftests)
+selftest(guc, intel_guc_live_selftest)
diff --git a/drivers/gpu/drm/i915/selftests/intel_guc.c b/drivers/gpu/drm/i915/selftests/intel_guc.c
new file mode 100644
index 000000000000..3d7245bb5588
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/intel_guc.c
@@ -0,0 +1,345 @@ 
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "../i915_selftest.h"
+
+/* max doorbell number + negative test for each client type */
+#define ATTEMPTS (GUC_NUM_DOORBELLS + GUC_CLIENT_PRIORITY_NUM)
+
+struct i915_guc_client *clients[ATTEMPTS];
+
+static bool available_dbs(struct intel_guc *guc, u32 priority)
+{
+	unsigned long offset;
+	unsigned long end;
+	u16 id;
+
+	/* first half is used for normal priority, second half for high */
+	offset = 0;
+	end = GUC_NUM_DOORBELLS/2;
+	if (priority <= GUC_CLIENT_PRIORITY_HIGH) {
+		offset = end;
+		end += offset;
+	}
+
+	id = find_next_zero_bit(guc->doorbell_bitmap, end, offset);
+	if (id < end)
+		return true;
+
+	return false;
+}
+
+static int check_all_doorbells(struct intel_guc *guc)
+{
+	u16 db_id;
+
+	pr_info_once("Max number of doorbells: %d", GUC_NUM_DOORBELLS);
+	for (db_id = 0; db_id < GUC_NUM_DOORBELLS; ++db_id) {
+		if (!doorbell_ok(guc, db_id)) {
+			pr_err("doorbell %d, not ok\n", db_id);
+			return -EIO;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * Basic client sanity check, called after create_clients.
+ */
+static int validate_client(struct i915_guc_client *client,
+			   int client_priority,
+			   bool is_preempt_client)
+{
+	struct drm_i915_private *dev_priv = guc_to_i915(client->guc);
+	struct i915_gem_context *ctx_owner = is_preempt_client ?
+			dev_priv->preempt_context : dev_priv->kernel_context;
+
+	if (client->owner != ctx_owner ||
+	    client->engines != INTEL_INFO(dev_priv)->ring_mask ||
+	    client->priority != client_priority ||
+	    client->doorbell_id == GUC_DOORBELL_INVALID)
+		return -EINVAL;
+	else
+		return 0;
+}
+
+/*
+ * Checking that guc_init_doorbell_hw is doing what it should.
+ */
+static int igt_guc_init_doorbell_hw(void *args)
+{
+	struct drm_i915_private *dev_priv = args;
+	struct intel_guc *guc;
+	DECLARE_BITMAP(db_bitmap_bk, GUC_NUM_DOORBELLS);
+	int i, err = 0;
+
+	pr_info("GuC init_doorbell_hw selftest\n");
+	GEM_BUG_ON(!HAS_GUC(dev_priv));
+	mutex_lock(&dev_priv->drm.struct_mutex);
+
+	guc = &dev_priv->guc;
+	if (!guc) {
+		pr_err("No guc object!\n");
+		err = -EINVAL;
+		goto unlock;
+	}
+
+	err = check_all_doorbells(guc);
+	if (err)
+		goto unlock;
+
+	/* Get rid of clients created during driver load, the test will
+	 * recreate them.
+	 */
+	guc_clients_destroy(guc);
+	if (guc->execbuf_client || guc->preempt_client) {
+		pr_err("guc_clients_destroy lied!\n");
+		err = -EINVAL;
+		goto unlock;
+	}
+
+	err = guc_clients_create(guc);
+	if (err) {
+		pr_err("Failed to create clients\n");
+		goto unlock;
+	}
+
+	err = validate_client(guc->execbuf_client,
+			      GUC_CLIENT_PRIORITY_KMD_NORMAL, false);
+	if (err) {
+		pr_err("execbug client validation failed\n");
+		goto out;
+	}
+
+	err = validate_client(guc->preempt_client,
+			      GUC_CLIENT_PRIORITY_KMD_HIGH, true);
+	if (err) {
+		pr_err("preempt client validation failed\n");
+		goto out;
+	}
+
+	/* each client should have received a doorbell during alloc */
+	if (!has_doorbell(guc->execbuf_client) ||
+	    !has_doorbell(guc->preempt_client)) {
+		pr_err("guc_clients_create didn't create doorbells\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* basic test */
+	err = guc_init_doorbell_hw(guc);
+	if (err)
+		goto out;
+
+	/* test using client with no doorbell (init_db_hw should fail) */
+	destroy_doorbell(guc->execbuf_client);
+	if (has_doorbell(guc->execbuf_client)) {
+		pr_err("destroy db did not work\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* expected to fail */
+	err = guc_init_doorbell_hw(guc);
+	if (err != -EIO) {
+		pr_err("unexpected (err = %d)", err);
+		goto out;
+	}
+
+	/* clean after test */
+	err = create_doorbell(guc->execbuf_client);
+	if (err) {
+		pr_err("recreate doorbell failed\n");
+		goto out;
+	}
+
+	/* test doorbell_bitmap out of sync, will trigger a few of
+	 * WARN_ON(!doorbell_ok(guc, db_id)) but that's ok as long as the
+	 * doorbells from our clients don't fail.
+	 */
+	bitmap_copy(db_bitmap_bk, guc->doorbell_bitmap, GUC_NUM_DOORBELLS);
+	for (i = 0; i < GUC_NUM_DOORBELLS; i++)
+		if (i % 2)
+			test_and_change_bit(i, guc->doorbell_bitmap);
+
+	err = guc_init_doorbell_hw(guc);
+	if (err) {
+		pr_err("out of sync doorbell caused an error\n");
+		goto out;
+	}
+
+	/* restore 'correct' db bitmap */
+	bitmap_copy(guc->doorbell_bitmap, db_bitmap_bk, GUC_NUM_DOORBELLS);
+	err = guc_init_doorbell_hw(guc);
+	if (err) {
+		pr_err("restored doorbell caused an error\n");
+		goto out;
+	}
+
+out:
+	/* leave clean state for other test, plus the driver always destroy the
+	 * clients during unload.
+	 */
+	guc_clients_destroy(guc);
+	guc_clients_create(guc);
+unlock:
+	mutex_unlock(&dev_priv->drm.struct_mutex);
+	return err;
+}
+
+/*
+ * Create as many clients as number of doorbells (note that there's already
+ * one client/db created during driver load).
+ */
+static int igt_guc_doorbells(void *arg)
+{
+	struct drm_i915_private *dev_priv = arg;
+	struct intel_guc *guc;
+	int i, err = 0;
+	u16 db_id;
+
+	pr_info("GuC Doorbells selftest\n");
+	GEM_BUG_ON(!HAS_GUC(dev_priv));
+	mutex_lock(&dev_priv->drm.struct_mutex);
+
+	guc = &dev_priv->guc;
+	if (!guc) {
+		pr_err("No guc object!\n");
+		err = -EINVAL;
+		goto unlock;
+	}
+
+	err = check_all_doorbells(guc);
+	if (err)
+		goto unlock;
+
+	for (i = 0; i < ATTEMPTS; i++) {
+		clients[i] = guc_client_alloc(dev_priv,
+					      INTEL_INFO(dev_priv)->ring_mask,
+					      i % GUC_CLIENT_PRIORITY_NUM,
+					      dev_priv->kernel_context);
+
+		if (!clients[i]) {
+			pr_err("[%d] No guc client\n", i);
+			err = -EINVAL;
+			goto out;
+		}
+
+		if (IS_ERR(clients[i])) {
+			if (PTR_ERR(clients[i]) != -ENOSPC) {
+				pr_err("[%d] unexpected error\n", i);
+				err = PTR_ERR(clients[i]);
+				goto out;
+			}
+
+			if (available_dbs(guc, i % GUC_CLIENT_PRIORITY_NUM)) {
+				pr_err("[%d] non-db related alloc fail\n", i);
+				err = -EINVAL;
+				goto out;
+			}
+
+			/* expected, ran out of dbs for this client type */
+			continue;
+		}
+
+		/*
+		 * The check below is only valid because we keep a doorbell
+		 * assigned during the whole life of the client.
+		 */
+		if (clients[i]->stage_id >= GUC_NUM_DOORBELLS) {
+			pr_err("[%d] more clients than doorbells (%d >= %d)\n",
+			       i, clients[i]->stage_id, GUC_NUM_DOORBELLS);
+			err = -EINVAL;
+			goto out;
+		}
+
+		err = validate_client(clients[i],
+				      i % GUC_CLIENT_PRIORITY_NUM, false);
+		if (err) {
+			pr_err("[%d] client_alloc sanity check failed!\n", i);
+			err = -EINVAL;
+			goto out;
+		}
+
+		db_id = clients[i]->doorbell_id;
+
+		/*
+		 * Client alloc gives us a doorbell, but we want to exercise
+		 * this ourselves (this resembles guc_init_doorbell_hw)
+		 */
+		destroy_doorbell(clients[i]);
+		if (clients[i]->doorbell_id != GUC_DOORBELL_INVALID) {
+			pr_err("[%d] destroy db did not work!\n", i);
+			err = -EINVAL;
+			goto out;
+		}
+
+		err = __reserve_doorbell(clients[i]);
+		if (err) {
+			pr_err("[%d] Failed to reserve a doorbell\n", i);
+			goto out;
+		}
+
+		__update_doorbell_desc(clients[i], clients[i]->doorbell_id);
+		err = __create_doorbell(clients[i]);
+		if (err) {
+			pr_err("[%d] Failed to create a doorbell\n", i);
+			goto out;
+		}
+
+		/* doorbell id shouldn't change, we are holding the mutex */
+		if (db_id != clients[i]->doorbell_id) {
+			pr_err("[%d] doorbell id changed (%d != %d)\n",
+			       i, db_id, clients[i]->doorbell_id);
+			err = -EINVAL;
+			goto out;
+		}
+
+		err = check_all_doorbells(guc);
+		if (err)
+			goto out;
+	}
+
+out:
+	for (i = 0; i < ATTEMPTS; i++)
+		if (!IS_ERR_OR_NULL(clients[i]))
+			guc_client_free(clients[i]);
+unlock:
+	mutex_unlock(&dev_priv->drm.struct_mutex);
+	return err;
+}
+
+int intel_guc_live_selftest(struct drm_i915_private *dev_priv)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(igt_guc_init_doorbell_hw),
+		SUBTEST(igt_guc_doorbells),
+	};
+
+	if (!i915_modparams.enable_guc_submission)
+		return 0;
+
+	return i915_subtests(tests, dev_priv);
+}