[v3,08/15] drm/i915: Functions to support command submission via GuC

Submitted by yu.dai@intel.com on April 17, 2015, 9:21 p.m.

Details

Message ID 1429305680-4990-9-git-send-email-yu.dai@intel.com
State New
Headers show

Not browsing as part of any series.

Commit Message

yu.dai@intel.com April 17, 2015, 9:21 p.m.
From: Alex Dai <yu.dai@intel.com>

To enable GuC command submission / scheduling, we need to setup
firmware initializaion properly. i915.enable_guc_scheduling is
introduced to enable / disable GuC submission.

GuC firmware uses the one page after Ring Context as shared data.
However, GuC uses same offset to address this page for all rings.
So we have to allocate same size of lrc context for all rings.

Also, reduce ring buffer size to 4 pages. In GuC, work queue tail is
referenced by 11 bits (WQ_RING_TAIL_MASK). It is in QW, so total 14
bits (4 pages).

Issue: VIZ-4884
Signed-off-by: Alex Dai <yu.dai@intel.com>
---
 drivers/gpu/drm/i915/Makefile              |  3 +-
 drivers/gpu/drm/i915/i915_drv.h            |  1 +
 drivers/gpu/drm/i915/i915_params.c         |  4 ++
 drivers/gpu/drm/i915/intel_guc.h           |  9 ++++
 drivers/gpu/drm/i915/intel_guc_loader.c    | 56 ++++++++++++++++++++-
 drivers/gpu/drm/i915/intel_guc_scheduler.c | 78 ++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_lrc.c           | 23 +++------
 drivers/gpu/drm/i915/intel_ringbuffer.c    |  2 +-
 8 files changed, 158 insertions(+), 18 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/intel_guc_scheduler.c

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 6188302..50b2057 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -37,7 +37,8 @@  i915-y += i915_cmd_parser.o \
 	  i915_trace_points.o \
 	  intel_lrc.o \
 	  intel_ringbuffer.o \
-	  intel_uncore.o
+	  intel_uncore.o \
+	  intel_guc_scheduler.o
 
 # ancilliary microcontroller support
 i915-y += intel_uc_loader.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d128ac4..4134db9 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2498,6 +2498,7 @@  struct i915_params {
 	bool reset;
 	bool disable_display;
 	bool disable_vtd_wa;
+	bool enable_guc_scheduling;
 	int use_mmio_flip;
 	int mmio_debug;
 	bool verbose_state_checks;
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index bb64415..9ad2e27 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -53,6 +53,7 @@  struct i915_params i915 __read_mostly = {
 	.mmio_debug = 0,
 	.verbose_state_checks = 1,
 	.nuclear_pageflip = 0,
+	.enable_guc_scheduling = false,
 };
 
 module_param_named(modeset, i915.modeset, int, 0400);
@@ -184,3 +185,6 @@  MODULE_PARM_DESC(verbose_state_checks,
 module_param_named_unsafe(nuclear_pageflip, i915.nuclear_pageflip, bool, 0600);
 MODULE_PARM_DESC(nuclear_pageflip,
 		 "Force atomic modeset functionality; only planes work for now (default: false).");
+
+module_param_named(enable_guc_scheduling, i915.enable_guc_scheduling, bool, 0400);
+MODULE_PARM_DESC(enable_guc_scheduling, "Enable GuC scheduling (default:false)");
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 6b2b5bf..d49549c 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -29,6 +29,9 @@ 
 
 struct intel_guc {
 	struct intel_uc_fw guc_fw;
+
+	/* GuC-specific additions */
+	struct drm_i915_gem_object *ctx_pool_obj;
 };
 
 #define GUC_STATUS		0xc000
@@ -103,4 +106,10 @@  struct drm_i915_gem_object *
 intel_guc_allocate_gem_obj(struct drm_device *dev, u32 size);
 void intel_guc_release_gem_obj(struct drm_i915_gem_object *obj);
 
+/* intel_guc_scheduler.c */
+int guc_scheduler_init(struct drm_device *dev);
+void guc_scheduler_fini(struct drm_device *dev);
+int guc_scheduler_enable(struct drm_device *dev);
+void guc_scheduler_disable(struct drm_device *dev);
+
 #endif
diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
index 04c2b11..15c055a 100644
--- a/drivers/gpu/drm/i915/intel_guc_loader.c
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
@@ -236,6 +236,36 @@  static void set_guc_init_params(struct drm_i915_private *dev_priv)
 
 	/* XXX: Set up log buffer */
 
+	/* If GuC scheduling is enabled, setup params here. */
+	if (i915.enable_guc_scheduling) {
+		u32 pgs = i915_gem_obj_ggtt_offset(dev_priv->guc.ctx_pool_obj);
+		u32 ctx_in_16 = MAX_GUC_GPU_CONTEXTS / 16;
+
+		pgs >>= PAGE_SHIFT;
+		params[GUC_CTL_CTXINFO] = (pgs << GUC_CTL_BASE_ADDR_SHIFT) |
+			(ctx_in_16 << GUC_CTL_CTXNUM_IN16_SHIFT);
+
+		/* The shared data is one page following the Ring Context.
+		 * So the offset is the page number of LRC */
+		pgs = IS_GEN9(dev_priv->dev) ? GEN9_LR_CONTEXT_RENDER_SIZE :
+				GEN8_LR_CONTEXT_RENDER_SIZE;
+		pgs >>= PAGE_SHIFT;
+		params[GUC_CTL_OFFSET] |= pgs << GUC_CTL_SHARED_DATA_SHIFT;
+
+		/* This must be non-zero for scheduler to initialize even the
+		 * firmware doesn't use it. Be note that we use separated obj
+		 * for actual ring buffer, while firmware may treat this as an
+		 * offset from Ring Context base. We must take care of this if
+		 * firmware starts using this field.
+		 */
+		params[GUC_CTL_OFFSET] |= 1 << GUC_CTL_RING_BUFFER_SHIFT;
+
+		params[GUC_CTL_FEATURE] |= GUC_CTL_KERNEL_SUBMISSIONS;
+
+		/* Unmask this bit to enable GuC scheduler */
+		params[GUC_CTL_FEATURE] &= ~GUC_CTL_DISABLE_SCHEDULER;
+	}
+
 	I915_WRITE(SOFT_SCRATCH(0), 0);
 
 	for (i = 0; i < GUC_CTL_MAX_DWORDS; i++)
@@ -322,8 +352,13 @@  int intel_guc_load_ucode(struct drm_device *dev, bool wait)
 	struct intel_uc_fw *guc_fw = &dev_priv->guc.guc_fw;
 	int err;
 
-	if (!HAS_GUC_UCODE(dev))
+	if (!HAS_GUC_UCODE(dev)) {
+		i915.enable_guc_scheduling = false;
 		return 0;
+	}
+
+	if (!HAS_GUC_SCHED(dev))
+		i915.enable_guc_scheduling = false;
 
 	DRM_DEBUG_DRIVER("GuC: wait %d, fetch status %d, load status %d\n",
 		wait, guc_fw->uc_fw_fetch_status, guc_fw->uc_fw_load_status);
@@ -339,15 +374,32 @@  int intel_guc_load_ucode(struct drm_device *dev, bool wait)
 	if (err)
 		goto fail;
 
+	err = guc_scheduler_init(dev);
+	if (err)
+		goto fail;
+
 	guc_fw->uc_fw_load_status = INTEL_UC_FIRMWARE_PENDING;
 	err = guc_load_ucode(dev);
 	if (err)
 		goto fail;
 
+	err = guc_scheduler_enable(dev);
+	if (err)
+		goto fail;
+
 	guc_fw->uc_fw_load_status = INTEL_UC_FIRMWARE_SUCCESS;
 	return 0;
 
 fail:
+	guc_scheduler_disable(dev);
+
+	if (i915.enable_guc_scheduling) {
+		DRM_ERROR("Failed to initialize GuC, declaring GPU wedged\n");
+		atomic_set_mask(I915_WEDGED,
+				&dev_priv->gpu_error.reset_counter);
+		i915.enable_guc_scheduling = false;
+	}
+
 	guc_fw->uc_fw_load_status = INTEL_UC_FIRMWARE_FAIL;
 	return err;
 }
@@ -420,5 +472,7 @@  void intel_guc_ucode_fini(struct drm_device *dev)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_uc_fw *guc_fw = &dev_priv->guc.guc_fw;
 
+	guc_scheduler_fini(dev);
+
 	intel_uc_fw_fini(dev, guc_fw);
 }
diff --git a/drivers/gpu/drm/i915/intel_guc_scheduler.c b/drivers/gpu/drm/i915/intel_guc_scheduler.c
new file mode 100644
index 0000000..1047192
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_guc_scheduler.c
@@ -0,0 +1,78 @@ 
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+#include <linux/firmware.h>
+#include "i915_drv.h"
+#include "intel_guc.h"
+
+void guc_scheduler_fini(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_guc *guc = &dev_priv->guc;
+	struct drm_i915_gem_object *ctx_pool = guc->ctx_pool_obj;
+
+	guc_scheduler_disable(dev);
+
+	if (ctx_pool) {
+		intel_guc_release_gem_obj(ctx_pool);
+		guc->ctx_pool_obj = NULL;
+	}
+}
+
+/* Set up the resources needed by the firmware scheduler. Currently this only
+ * requires one object that can be mapped through the GGTT.
+ */
+int guc_scheduler_init(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	const size_t ctxsize = sizeof(struct guc_context_desc);
+	const size_t poolsize = MAX_GUC_GPU_CONTEXTS * ctxsize;
+	const size_t gemsize = round_up(poolsize, PAGE_SIZE);
+	struct intel_guc *guc = &dev_priv->guc;
+
+	if (!i915.enable_guc_scheduling)
+		return 0; /* not enabled  */
+
+	if (guc->ctx_pool_obj)
+		return 0; /* already allocated */
+
+	guc->ctx_pool_obj = intel_guc_allocate_gem_obj(dev_priv->dev, gemsize);
+	if (!guc->ctx_pool_obj)
+		return -ENOMEM;
+
+	return 0;
+}
+
+int guc_scheduler_enable(struct drm_device *dev)
+{
+	if (!i915.enable_guc_scheduling)
+		return 0;
+
+	/* TODO: placeholder for guc scheduler enabling */
+	return 0;
+}
+
+void guc_scheduler_disable(struct drm_device *dev)
+{
+	/* TODO: placeholder for guc scheduler disabling */
+}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index a9814a2..e5d6a74 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1722,20 +1722,10 @@  static uint32_t get_lr_context_size(struct intel_engine_cs *ring)
 
 	WARN_ON(INTEL_INFO(ring->dev)->gen < 8);
 
-	switch (ring->id) {
-	case RCS:
-		if (INTEL_INFO(ring->dev)->gen >= 9)
-			ret = GEN9_LR_CONTEXT_RENDER_SIZE;
-		else
-			ret = GEN8_LR_CONTEXT_RENDER_SIZE;
-		break;
-	case VCS:
-	case BCS:
-	case VECS:
-	case VCS2:
-		ret = GEN8_LR_CONTEXT_OTHER_SIZE;
-		break;
-	}
+	if (INTEL_INFO(ring->dev)->gen >= 9)
+		ret = GEN9_LR_CONTEXT_RENDER_SIZE;
+	else
+		ret = GEN8_LR_CONTEXT_RENDER_SIZE;
 
 	return ret;
 }
@@ -1784,6 +1774,9 @@  int intel_lr_context_deferred_create(struct intel_context *ctx,
 	WARN_ON(ctx->engine[ring->id].state);
 
 	context_size = round_up(get_lr_context_size(ring), 4096);
+	/* One extra page as the sharing data between driver and GuC */
+	if (i915.enable_guc_scheduling)
+		context_size += PAGE_SIZE;
 
 	ctx_obj = i915_gem_alloc_object(dev, context_size);
 	if (IS_ERR(ctx_obj)) {
@@ -1812,7 +1805,7 @@  int intel_lr_context_deferred_create(struct intel_context *ctx,
 
 	ringbuf->ring = ring;
 
-	ringbuf->size = 32 * PAGE_SIZE;
+	ringbuf->size = 4 * PAGE_SIZE;
 	ringbuf->effective_size = ringbuf->size;
 	ringbuf->head = 0;
 	ringbuf->tail = 0;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index de8c074..8f13e80 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1993,7 +1993,7 @@  static int intel_init_ring_buffer(struct drm_device *dev,
 	INIT_LIST_HEAD(&ring->request_list);
 	INIT_LIST_HEAD(&ring->execlist_queue);
 	i915_gem_batch_pool_init(dev, &ring->batch_pool);
-	ringbuf->size = 32 * PAGE_SIZE;
+	ringbuf->size = 4 * PAGE_SIZE;
 	ringbuf->ring = ring;
 	memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno));
 

Comments

On Fri, Apr 17, 2015 at 02:21:13PM -0700, yu.dai@intel.com wrote:
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index de8c074..8f13e80 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1993,7 +1993,7 @@ static int intel_init_ring_buffer(struct drm_device *dev,
>  	INIT_LIST_HEAD(&ring->request_list);
>  	INIT_LIST_HEAD(&ring->execlist_queue);
>  	i915_gem_batch_pool_init(dev, &ring->batch_pool);
> -	ringbuf->size = 32 * PAGE_SIZE;
> +	ringbuf->size = 4 * PAGE_SIZE;
>  	ringbuf->ring = ring;
>  	memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno));

NAK.
-Chris
On 04/18/2015 06:48 AM, Chris Wilson wrote:
> On Fri, Apr 17, 2015 at 02:21:13PM -0700, yu.dai@intel.com wrote:
> > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> > index de8c074..8f13e80 100644
> > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> > @@ -1993,7 +1993,7 @@ static int intel_init_ring_buffer(struct drm_device *dev,
> >  	INIT_LIST_HEAD(&ring->request_list);
> >  	INIT_LIST_HEAD(&ring->execlist_queue);
> >  	i915_gem_batch_pool_init(dev, &ring->batch_pool);
> > -	ringbuf->size = 32 * PAGE_SIZE;
> > +	ringbuf->size = 4 * PAGE_SIZE;
> >  	ringbuf->ring = ring;
> >  	memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno));
>
> NAK.
>
First of all, GuC firmware reserves limited bits for ring buffer. 4 
pages is max for now. Second, considering the ring buffer is per-context 
now, there is no need to allocate 32 pages for it.

Thanks,
Alex
On Mon, Apr 20, 2015 at 09:07:28AM -0700, Yu Dai wrote:
> 
> 
> On 04/18/2015 06:48 AM, Chris Wilson wrote:
> >On Fri, Apr 17, 2015 at 02:21:13PM -0700, yu.dai@intel.com wrote:
> >> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> >> index de8c074..8f13e80 100644
> >> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> >> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> >> @@ -1993,7 +1993,7 @@ static int intel_init_ring_buffer(struct drm_device *dev,
> >>  	INIT_LIST_HEAD(&ring->request_list);
> >>  	INIT_LIST_HEAD(&ring->execlist_queue);
> >>  	i915_gem_batch_pool_init(dev, &ring->batch_pool);
> >> -	ringbuf->size = 32 * PAGE_SIZE;
> >> +	ringbuf->size = 4 * PAGE_SIZE;
> >>  	ringbuf->ring = ring;
> >>  	memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno));
> >
> >NAK.
> >
> First of all, GuC firmware reserves limited bits for ring buffer. 4
> pages is max for now. Second, considering the ring buffer is
> per-context now, there is no need to allocate 32 pages for it.

Please look at which function you are changing and explain how this is
not in the least broken.
-Chris
On 04/20/2015 12:43 PM, Chris Wilson wrote:
> On Mon, Apr 20, 2015 at 09:07:28AM -0700, Yu Dai wrote:
> >
> >
> > On 04/18/2015 06:48 AM, Chris Wilson wrote:
> > >On Fri, Apr 17, 2015 at 02:21:13PM -0700, yu.dai@intel.com wrote:
> > >> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> > >> index de8c074..8f13e80 100644
> > >> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> > >> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> > >> @@ -1993,7 +1993,7 @@ static int intel_init_ring_buffer(struct drm_device *dev,
> > >>  	INIT_LIST_HEAD(&ring->request_list);
> > >>  	INIT_LIST_HEAD(&ring->execlist_queue);
> > >>  	i915_gem_batch_pool_init(dev, &ring->batch_pool);
> > >> -	ringbuf->size = 32 * PAGE_SIZE;
> > >> +	ringbuf->size = 4 * PAGE_SIZE;
> > >>  	ringbuf->ring = ring;
> > >>  	memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno));
> > >
> > >NAK.
> > >
> > First of all, GuC firmware reserves limited bits for ring buffer. 4
> > pages is max for now. Second, considering the ring buffer is
> > per-context now, there is no need to allocate 32 pages for it.
>
> Please look at which function you are changing and explain how this is
> not in the least broken.
>
You are right, Chris. I should not touch this legacy ringbuf submission. 
Thanks for catch this.

Thanks,
Alex