[20/45] drm/i915: Apply an execution_mask to the virtual_engine

Submitted by Chris Wilson on April 25, 2019, 9:19 a.m.

Details

Message ID 20190425092004.9995-20-chris@chris-wilson.co.uk
State Accepted
Commit 78e41ddd219822a5f42948eb6c6c6cae14c80181
Headers show
Series "Series without cover letter" ( rev: 2 1 ) in Intel GFX

Browsing this patch as part of:
"Series without cover letter" rev 1 in Intel GFX
<< prev patch [20/45] next patch >>

Commit Message

Chris Wilson April 25, 2019, 9:19 a.m.
Allow the user to direct which physical engines of the virtual engine
they wish to execute one, as sometimes it is necessary to override the
load balancing algorithm.

v2: Only kick the virtual engines on context-out if required

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c    |  67 +++++++++++++
 drivers/gpu/drm/i915/gt/selftest_lrc.c | 131 +++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_request.c    |   1 +
 drivers/gpu/drm/i915/i915_request.h    |   3 +
 4 files changed, 202 insertions(+)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index e8faf3417f9c..8acf9d1c6f2f 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -549,6 +549,15 @@  execlists_context_schedule_in(struct i915_request *rq)
 	rq->hw_context->active = rq->engine;
 }
 
+static void kick_siblings(struct i915_request *rq)
+{
+	struct virtual_engine *ve = to_virtual_engine(rq->hw_context->engine);
+	struct i915_request *next = READ_ONCE(ve->request);
+
+	if (next && next->execution_mask & ~rq->execution_mask)
+		tasklet_schedule(&ve->base.execlists.tasklet);
+}
+
 static inline void
 execlists_context_schedule_out(struct i915_request *rq, unsigned long status)
 {
@@ -556,6 +565,18 @@  execlists_context_schedule_out(struct i915_request *rq, unsigned long status)
 	intel_engine_context_out(rq->engine);
 	execlists_context_status_change(rq, status);
 	trace_i915_request_out(rq);
+
+	/*
+	 * If this is part of a virtual engine, its next request may have
+	 * been blocked waiting for access to the active context. We have
+	 * to kick all the siblings again in case we need to switch (e.g.
+	 * the next request is not runnable on this engine). Hopefully,
+	 * we will already have submitted the next request before the
+	 * tasklet runs and do not need to rebuild each virtual tree
+	 * and kick everyone again.
+	 */
+	if (rq->engine != rq->hw_context->engine)
+		kick_siblings(rq);
 }
 
 static u64 execlists_update_context(struct i915_request *rq)
@@ -783,6 +804,9 @@  static bool virtual_matches(const struct virtual_engine *ve,
 {
 	const struct intel_engine_cs *active;
 
+	if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */
+		return false;
+
 	/*
 	 * We track when the HW has completed saving the context image
 	 * (i.e. when we have seen the final CS event switching out of
@@ -3141,12 +3165,44 @@  static const struct intel_context_ops virtual_context_ops = {
 	.destroy = virtual_context_destroy,
 };
 
+static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
+{
+	struct i915_request *rq;
+	intel_engine_mask_t mask;
+
+	rq = READ_ONCE(ve->request);
+	if (!rq)
+		return 0;
+
+	/* The rq is ready for submission; rq->execution_mask is now stable. */
+	mask = rq->execution_mask;
+	if (unlikely(!mask)) {
+		/* Invalid selection, submit to a random engine in error */
+		i915_request_skip(rq, -ENODEV);
+		mask = ve->siblings[0]->mask;
+	}
+
+	GEM_TRACE("%s: rq=%llx:%lld, mask=%x, prio=%d\n",
+		  ve->base.name,
+		  rq->fence.context, rq->fence.seqno,
+		  mask, ve->base.execlists.queue_priority_hint);
+
+	return mask;
+}
+
 static void virtual_submission_tasklet(unsigned long data)
 {
 	struct virtual_engine * const ve = (struct virtual_engine *)data;
 	const int prio = ve->base.execlists.queue_priority_hint;
+	intel_engine_mask_t mask;
 	unsigned int n;
 
+	rcu_read_lock();
+	mask = virtual_submission_mask(ve);
+	rcu_read_unlock();
+	if (unlikely(!mask))
+		return;
+
 	local_irq_disable();
 	for (n = 0; READ_ONCE(ve->request) && n < ve->num_siblings; n++) {
 		struct intel_engine_cs *sibling = ve->siblings[n];
@@ -3154,6 +3210,17 @@  static void virtual_submission_tasklet(unsigned long data)
 		struct rb_node **parent, *rb;
 		bool first;
 
+		if (unlikely(!(mask & sibling->mask))) {
+			if (!RB_EMPTY_NODE(&node->rb)) {
+				spin_lock(&sibling->timeline.lock);
+				rb_erase_cached(&node->rb,
+						&sibling->execlists.virtual);
+				RB_CLEAR_NODE(&node->rb);
+				spin_unlock(&sibling->timeline.lock);
+			}
+			continue;
+		}
+
 		spin_lock(&sibling->timeline.lock);
 
 		if (!RB_EMPTY_NODE(&node->rb)) {
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 558399e5c7bb..73308749936c 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -1480,6 +1480,136 @@  static int live_virtual_engine(void *arg)
 	return err;
 }
 
+static int mask_virtual_engine(struct drm_i915_private *i915,
+			       struct intel_engine_cs **siblings,
+			       unsigned int nsibling)
+{
+	struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
+	struct i915_gem_context *ctx;
+	struct intel_context *ve;
+	struct igt_live_test t;
+	unsigned int n;
+	int err;
+
+	/*
+	 * Check that by setting the execution mask on a request, we can
+	 * restrict it to our desired engine within the virtual engine.
+	 */
+
+	ctx = kernel_context(i915);
+	if (!ctx)
+		return -ENOMEM;
+
+	ve = intel_execlists_create_virtual(ctx, siblings, nsibling);
+	if (IS_ERR(ve)) {
+		err = PTR_ERR(ve);
+		goto out_close;
+	}
+
+	err = intel_context_pin(ve);
+	if (err)
+		goto out_put;
+
+	err = igt_live_test_begin(&t, i915, __func__, ve->engine->name);
+	if (err)
+		goto out_unpin;
+
+	for (n = 0; n < nsibling; n++) {
+		request[n] = i915_request_create(ve);
+		if (IS_ERR(request)) {
+			err = PTR_ERR(request);
+			nsibling = n;
+			goto out;
+		}
+
+		/* Reverse order as it's more likely to be unnatural */
+		request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
+
+		i915_request_get(request[n]);
+		i915_request_add(request[n]);
+	}
+
+	for (n = 0; n < nsibling; n++) {
+		if (i915_request_wait(request[n], I915_WAIT_LOCKED, HZ / 10) < 0) {
+			pr_err("%s(%s): wait for %llx:%lld timed out\n",
+			       __func__, ve->engine->name,
+			       request[n]->fence.context,
+			       request[n]->fence.seqno);
+
+			GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
+				  __func__, ve->engine->name,
+				  request[n]->fence.context,
+				  request[n]->fence.seqno);
+			GEM_TRACE_DUMP();
+			i915_gem_set_wedged(i915);
+			err = -EIO;
+			goto out;
+		}
+
+		if (request[n]->engine != siblings[nsibling - n - 1]) {
+			pr_err("Executed on wrong sibling '%s', expected '%s'\n",
+			       request[n]->engine->name,
+			       siblings[nsibling - n - 1]->name);
+			err = -EINVAL;
+			goto out;
+		}
+	}
+
+	err = igt_live_test_end(&t);
+	if (err)
+		goto out;
+
+out:
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		err = -EIO;
+
+	for (n = 0; n < nsibling; n++)
+		i915_request_put(request[n]);
+
+out_unpin:
+	intel_context_unpin(ve);
+out_put:
+	intel_context_put(ve);
+out_close:
+	kernel_context_close(ctx);
+	return err;
+}
+
+static int live_virtual_mask(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
+	unsigned int class, inst;
+	int err = 0;
+
+	if (USES_GUC_SUBMISSION(i915))
+		return 0;
+
+	mutex_lock(&i915->drm.struct_mutex);
+
+	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
+		unsigned int nsibling;
+
+		nsibling = 0;
+		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
+			if (!i915->engine_class[class][inst])
+				break;
+
+			siblings[nsibling++] = i915->engine_class[class][inst];
+		}
+		if (nsibling < 2)
+			continue;
+
+		err = mask_virtual_engine(i915, siblings, nsibling);
+		if (err)
+			goto out_unlock;
+	}
+
+out_unlock:
+	mutex_unlock(&i915->drm.struct_mutex);
+	return err;
+}
+
 int intel_execlists_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
@@ -1493,6 +1623,7 @@  int intel_execlists_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_preempt_hang),
 		SUBTEST(live_preempt_smoke),
 		SUBTEST(live_virtual_engine),
+		SUBTEST(live_virtual_mask),
 	};
 
 	if (!HAS_EXECLISTS(i915))
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index a5a2ccc23958..06c2b89d0b0a 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -688,6 +688,7 @@  __i915_request_create(struct intel_context *ce, gfp_t gfp)
 	rq->batch = NULL;
 	rq->capture_list = NULL;
 	rq->waitboost = false;
+	rq->execution_mask = ALL_ENGINES;
 
 	INIT_LIST_HEAD(&rq->active_list);
 	INIT_LIST_HEAD(&rq->execute_cb);
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 8025a89b5999..d7f9b2194568 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -28,6 +28,8 @@ 
 #include <linux/dma-fence.h>
 #include <linux/lockdep.h>
 
+#include "gt/intel_engine_types.h"
+
 #include "i915_gem.h"
 #include "i915_scheduler.h"
 #include "i915_selftest.h"
@@ -156,6 +158,7 @@  struct i915_request {
 	 */
 	struct i915_sched_node sched;
 	struct i915_dependency dep;
+	intel_engine_mask_t execution_mask;
 
 	/*
 	 * A convenience pointer to the current breadcrumb value stored in

Comments

On 25/04/2019 10:19, Chris Wilson wrote:
> Allow the user to direct which physical engines of the virtual engine
> they wish to execute one, as sometimes it is necessary to override the
> load balancing algorithm.
> 
> v2: Only kick the virtual engines on context-out if required
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_lrc.c    |  67 +++++++++++++
>   drivers/gpu/drm/i915/gt/selftest_lrc.c | 131 +++++++++++++++++++++++++
>   drivers/gpu/drm/i915/i915_request.c    |   1 +
>   drivers/gpu/drm/i915/i915_request.h    |   3 +
>   4 files changed, 202 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index e8faf3417f9c..8acf9d1c6f2f 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -549,6 +549,15 @@ execlists_context_schedule_in(struct i915_request *rq)
>   	rq->hw_context->active = rq->engine;
>   }
>   
> +static void kick_siblings(struct i915_request *rq)
> +{
> +	struct virtual_engine *ve = to_virtual_engine(rq->hw_context->engine);
> +	struct i915_request *next = READ_ONCE(ve->request);
> +
> +	if (next && next->execution_mask & ~rq->execution_mask)
> +		tasklet_schedule(&ve->base.execlists.tasklet);
> +}
> +
>   static inline void
>   execlists_context_schedule_out(struct i915_request *rq, unsigned long status)
>   {
> @@ -556,6 +565,18 @@ execlists_context_schedule_out(struct i915_request *rq, unsigned long status)
>   	intel_engine_context_out(rq->engine);
>   	execlists_context_status_change(rq, status);
>   	trace_i915_request_out(rq);
> +
> +	/*
> +	 * If this is part of a virtual engine, its next request may have
> +	 * been blocked waiting for access to the active context. We have
> +	 * to kick all the siblings again in case we need to switch (e.g.
> +	 * the next request is not runnable on this engine). Hopefully,
> +	 * we will already have submitted the next request before the
> +	 * tasklet runs and do not need to rebuild each virtual tree
> +	 * and kick everyone again.
> +	 */
> +	if (rq->engine != rq->hw_context->engine)
> +		kick_siblings(rq);
>   }
>   
>   static u64 execlists_update_context(struct i915_request *rq)
> @@ -783,6 +804,9 @@ static bool virtual_matches(const struct virtual_engine *ve,
>   {
>   	const struct intel_engine_cs *active;
>   
> +	if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */
> +		return false;
> +
>   	/*
>   	 * We track when the HW has completed saving the context image
>   	 * (i.e. when we have seen the final CS event switching out of
> @@ -3141,12 +3165,44 @@ static const struct intel_context_ops virtual_context_ops = {
>   	.destroy = virtual_context_destroy,
>   };
>   
> +static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
> +{
> +	struct i915_request *rq;
> +	intel_engine_mask_t mask;
> +
> +	rq = READ_ONCE(ve->request);
> +	if (!rq)
> +		return 0;
> +
> +	/* The rq is ready for submission; rq->execution_mask is now stable. */
> +	mask = rq->execution_mask;
> +	if (unlikely(!mask)) {
> +		/* Invalid selection, submit to a random engine in error */
> +		i915_request_skip(rq, -ENODEV);
> +		mask = ve->siblings[0]->mask;
> +	}
> +
> +	GEM_TRACE("%s: rq=%llx:%lld, mask=%x, prio=%d\n",
> +		  ve->base.name,
> +		  rq->fence.context, rq->fence.seqno,
> +		  mask, ve->base.execlists.queue_priority_hint);
> +
> +	return mask;
> +}
> +
>   static void virtual_submission_tasklet(unsigned long data)
>   {
>   	struct virtual_engine * const ve = (struct virtual_engine *)data;
>   	const int prio = ve->base.execlists.queue_priority_hint;
> +	intel_engine_mask_t mask;
>   	unsigned int n;
>   
> +	rcu_read_lock();
> +	mask = virtual_submission_mask(ve);
> +	rcu_read_unlock();
> +	if (unlikely(!mask))

Is the rcu_lock think solely for the same protection against wedging in 
submit_notify?

Regards,

Tvrtko

> +		return;
> +
>   	local_irq_disable();
>   	for (n = 0; READ_ONCE(ve->request) && n < ve->num_siblings; n++) {
>   		struct intel_engine_cs *sibling = ve->siblings[n];
> @@ -3154,6 +3210,17 @@ static void virtual_submission_tasklet(unsigned long data)
>   		struct rb_node **parent, *rb;
>   		bool first;
>   
> +		if (unlikely(!(mask & sibling->mask))) {
> +			if (!RB_EMPTY_NODE(&node->rb)) {
> +				spin_lock(&sibling->timeline.lock);
> +				rb_erase_cached(&node->rb,
> +						&sibling->execlists.virtual);
> +				RB_CLEAR_NODE(&node->rb);
> +				spin_unlock(&sibling->timeline.lock);
> +			}
> +			continue;
> +		}
> +
>   		spin_lock(&sibling->timeline.lock);
>   
>   		if (!RB_EMPTY_NODE(&node->rb)) {
> diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> index 558399e5c7bb..73308749936c 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> @@ -1480,6 +1480,136 @@ static int live_virtual_engine(void *arg)
>   	return err;
>   }
>   
> +static int mask_virtual_engine(struct drm_i915_private *i915,
> +			       struct intel_engine_cs **siblings,
> +			       unsigned int nsibling)
> +{
> +	struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
> +	struct i915_gem_context *ctx;
> +	struct intel_context *ve;
> +	struct igt_live_test t;
> +	unsigned int n;
> +	int err;
> +
> +	/*
> +	 * Check that by setting the execution mask on a request, we can
> +	 * restrict it to our desired engine within the virtual engine.
> +	 */
> +
> +	ctx = kernel_context(i915);
> +	if (!ctx)
> +		return -ENOMEM;
> +
> +	ve = intel_execlists_create_virtual(ctx, siblings, nsibling);
> +	if (IS_ERR(ve)) {
> +		err = PTR_ERR(ve);
> +		goto out_close;
> +	}
> +
> +	err = intel_context_pin(ve);
> +	if (err)
> +		goto out_put;
> +
> +	err = igt_live_test_begin(&t, i915, __func__, ve->engine->name);
> +	if (err)
> +		goto out_unpin;
> +
> +	for (n = 0; n < nsibling; n++) {
> +		request[n] = i915_request_create(ve);
> +		if (IS_ERR(request)) {
> +			err = PTR_ERR(request);
> +			nsibling = n;
> +			goto out;
> +		}
> +
> +		/* Reverse order as it's more likely to be unnatural */
> +		request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
> +
> +		i915_request_get(request[n]);
> +		i915_request_add(request[n]);
> +	}
> +
> +	for (n = 0; n < nsibling; n++) {
> +		if (i915_request_wait(request[n], I915_WAIT_LOCKED, HZ / 10) < 0) {
> +			pr_err("%s(%s): wait for %llx:%lld timed out\n",
> +			       __func__, ve->engine->name,
> +			       request[n]->fence.context,
> +			       request[n]->fence.seqno);
> +
> +			GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
> +				  __func__, ve->engine->name,
> +				  request[n]->fence.context,
> +				  request[n]->fence.seqno);
> +			GEM_TRACE_DUMP();
> +			i915_gem_set_wedged(i915);
> +			err = -EIO;
> +			goto out;
> +		}
> +
> +		if (request[n]->engine != siblings[nsibling - n - 1]) {
> +			pr_err("Executed on wrong sibling '%s', expected '%s'\n",
> +			       request[n]->engine->name,
> +			       siblings[nsibling - n - 1]->name);
> +			err = -EINVAL;
> +			goto out;
> +		}
> +	}
> +
> +	err = igt_live_test_end(&t);
> +	if (err)
> +		goto out;
> +
> +out:
> +	if (igt_flush_test(i915, I915_WAIT_LOCKED))
> +		err = -EIO;
> +
> +	for (n = 0; n < nsibling; n++)
> +		i915_request_put(request[n]);
> +
> +out_unpin:
> +	intel_context_unpin(ve);
> +out_put:
> +	intel_context_put(ve);
> +out_close:
> +	kernel_context_close(ctx);
> +	return err;
> +}
> +
> +static int live_virtual_mask(void *arg)
> +{
> +	struct drm_i915_private *i915 = arg;
> +	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
> +	unsigned int class, inst;
> +	int err = 0;
> +
> +	if (USES_GUC_SUBMISSION(i915))
> +		return 0;
> +
> +	mutex_lock(&i915->drm.struct_mutex);
> +
> +	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
> +		unsigned int nsibling;
> +
> +		nsibling = 0;
> +		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
> +			if (!i915->engine_class[class][inst])
> +				break;
> +
> +			siblings[nsibling++] = i915->engine_class[class][inst];
> +		}
> +		if (nsibling < 2)
> +			continue;
> +
> +		err = mask_virtual_engine(i915, siblings, nsibling);
> +		if (err)
> +			goto out_unlock;
> +	}
> +
> +out_unlock:
> +	mutex_unlock(&i915->drm.struct_mutex);
> +	return err;
> +}
> +
>   int intel_execlists_live_selftests(struct drm_i915_private *i915)
>   {
>   	static const struct i915_subtest tests[] = {
> @@ -1493,6 +1623,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
>   		SUBTEST(live_preempt_hang),
>   		SUBTEST(live_preempt_smoke),
>   		SUBTEST(live_virtual_engine),
> +		SUBTEST(live_virtual_mask),
>   	};
>   
>   	if (!HAS_EXECLISTS(i915))
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index a5a2ccc23958..06c2b89d0b0a 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -688,6 +688,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
>   	rq->batch = NULL;
>   	rq->capture_list = NULL;
>   	rq->waitboost = false;
> +	rq->execution_mask = ALL_ENGINES;
>   
>   	INIT_LIST_HEAD(&rq->active_list);
>   	INIT_LIST_HEAD(&rq->execute_cb);
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index 8025a89b5999..d7f9b2194568 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -28,6 +28,8 @@
>   #include <linux/dma-fence.h>
>   #include <linux/lockdep.h>
>   
> +#include "gt/intel_engine_types.h"
> +
>   #include "i915_gem.h"
>   #include "i915_scheduler.h"
>   #include "i915_selftest.h"
> @@ -156,6 +158,7 @@ struct i915_request {
>   	 */
>   	struct i915_sched_node sched;
>   	struct i915_dependency dep;
> +	intel_engine_mask_t execution_mask;
>   
>   	/*
>   	 * A convenience pointer to the current breadcrumb value stored in
>
Quoting Tvrtko Ursulin (2019-04-29 15:12:23)
> 
> On 25/04/2019 10:19, Chris Wilson wrote:
> >   static void virtual_submission_tasklet(unsigned long data)
> >   {
> >       struct virtual_engine * const ve = (struct virtual_engine *)data;
> >       const int prio = ve->base.execlists.queue_priority_hint;
> > +     intel_engine_mask_t mask;
> >       unsigned int n;
> >   
> > +     rcu_read_lock();
> > +     mask = virtual_submission_mask(ve);
> > +     rcu_read_unlock();
> > +     if (unlikely(!mask))
> 
> Is the rcu_lock think solely for the same protection against wedging in 
> submit_notify?

No. We may still be in the rbtree of the physical engines and
ve->request may be plucked out from underneath us as we read it. And in
the time it takes to tracek, that request may have been executed,
retired and freed. To prevent the dangling stale dereference, we use
rcu_read_lock() here as we peek into the request, and spinlocks around
the actual transfer to the execution backend.
-Chris
On 07/05/2019 17:59, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-04-29 15:12:23)
>>
>> On 25/04/2019 10:19, Chris Wilson wrote:
>>>    static void virtual_submission_tasklet(unsigned long data)
>>>    {
>>>        struct virtual_engine * const ve = (struct virtual_engine *)data;
>>>        const int prio = ve->base.execlists.queue_priority_hint;
>>> +     intel_engine_mask_t mask;
>>>        unsigned int n;
>>>    
>>> +     rcu_read_lock();
>>> +     mask = virtual_submission_mask(ve);
>>> +     rcu_read_unlock();
>>> +     if (unlikely(!mask))
>>
>> Is the rcu_lock think solely for the same protection against wedging in
>> submit_notify?
> 
> No. We may still be in the rbtree of the physical engines and
> ve->request may be plucked out from underneath us as we read it. And in
> the time it takes to tracek, that request may have been executed,
> retired and freed. To prevent the dangling stale dereference, we use
> rcu_read_lock() here as we peek into the request, and spinlocks around
> the actual transfer to the execution backend.

So it's not actually about ve->request as s member pointer, but the 
request object itself. That could make sense, but then wouldn't you need 
to hold the rcu_read_lock over the whole tasklet? There is another 
ve->request read in the for loop just below, although not an actual 
dereference. I guess I just answered it to myself. Okay, looks good then.

Regards,

Tvrtko