drm/i915: Use new i915_gem_object_pin_map for LRC

Submitted by Tvrtko Ursulin on April 12, 2016, 8:52 a.m.

Details

Message ID 1460451148-18848-1-git-send-email-tvrtko.ursulin@linux.intel.com
State New
Headers show
Series "drm/i915: Use new i915_gem_object_pin_map for LRC" ( rev: 1 ) in Intel GFX

Not browsing as part of any series.

Commit Message

Tvrtko Ursulin April 12, 2016, 8:52 a.m.
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

We can use the new pin/lazy unpin API for simplicity
and more performance in the execlist submission paths.

v2:
  * Fix error handling and convert more users.
  * Compact some names for readability.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_context.c |  2 +-
 drivers/gpu/drm/i915/intel_lrc.c        | 98 ++++++++++++++++++---------------
 drivers/gpu/drm/i915/intel_lrc.h        |  7 ++-
 3 files changed, 60 insertions(+), 47 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index fe580cb9501a..91028d9c6269 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -342,7 +342,7 @@  void i915_gem_context_reset(struct drm_device *dev)
 		struct intel_context *ctx;
 
 		list_for_each_entry(ctx, &dev_priv->context_list, link)
-			intel_lr_context_reset(dev, ctx);
+			intel_lr_context_reset(dev_priv, ctx);
 	}
 
 	for (i = 0; i < I915_NUM_ENGINES; i++) {
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 0d6dc5ec4a46..0c61d847252a 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -229,8 +229,9 @@  enum {
 
 static int intel_lr_context_pin(struct intel_context *ctx,
 				struct intel_engine_cs *engine);
-static void lrc_setup_hardware_status_page(struct intel_engine_cs *engine,
-					   struct drm_i915_gem_object *default_ctx_obj);
+static int
+lrc_setup_hws(struct intel_engine_cs *engine,
+	      struct drm_i915_gem_object *default_ctx_obj);
 
 
 /**
@@ -1093,8 +1094,8 @@  static int intel_lr_context_do_pin(struct intel_context *ctx,
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *ctx_obj = ctx->engine[engine->id].state;
 	struct intel_ringbuffer *ringbuf = ctx->engine[engine->id].ringbuf;
-	struct page *lrc_state_page;
-	uint32_t *lrc_reg_state;
+	void *obj_addr;
+	u32 *lrc_reg_state;
 	int ret;
 
 	WARN_ON(!mutex_is_locked(&engine->dev->struct_mutex));
@@ -1104,19 +1105,20 @@  static int intel_lr_context_do_pin(struct intel_context *ctx,
 	if (ret)
 		return ret;
 
-	lrc_state_page = i915_gem_object_get_dirty_page(ctx_obj, LRC_STATE_PN);
-	if (WARN_ON(!lrc_state_page)) {
-		ret = -ENODEV;
+	obj_addr = i915_gem_object_pin_map(ctx_obj);
+	if (IS_ERR(obj_addr)) {
+		ret = PTR_ERR(obj_addr);
 		goto unpin_ctx_obj;
 	}
 
+	lrc_reg_state = obj_addr + LRC_STATE_PN * PAGE_SIZE;
+
 	ret = intel_pin_and_map_ringbuffer_obj(engine->dev, ringbuf);
 	if (ret)
-		goto unpin_ctx_obj;
+		goto unpin_map;
 
 	ctx->engine[engine->id].lrc_vma = i915_gem_obj_to_ggtt(ctx_obj);
 	intel_lr_context_descriptor_update(ctx, engine);
-	lrc_reg_state = kmap(lrc_state_page);
 	lrc_reg_state[CTX_RING_BUFFER_START+1] = ringbuf->vma->node.start;
 	ctx->engine[engine->id].lrc_reg_state = lrc_reg_state;
 	ctx_obj->dirty = true;
@@ -1127,6 +1129,8 @@  static int intel_lr_context_do_pin(struct intel_context *ctx,
 
 	return ret;
 
+unpin_map:
+	i915_gem_object_unpin_map(ctx_obj);
 unpin_ctx_obj:
 	i915_gem_object_ggtt_unpin(ctx_obj);
 
@@ -1159,7 +1163,7 @@  void intel_lr_context_unpin(struct intel_context *ctx,
 
 	WARN_ON(!mutex_is_locked(&ctx->i915->dev->struct_mutex));
 	if (--ctx->engine[engine->id].pin_count == 0) {
-		kunmap(kmap_to_page(ctx->engine[engine->id].lrc_reg_state));
+		i915_gem_object_unpin_map(ctx_obj);
 		intel_unpin_ringbuffer_obj(ctx->engine[engine->id].ringbuf);
 		i915_gem_object_ggtt_unpin(ctx_obj);
 		ctx->engine[engine->id].lrc_vma = NULL;
@@ -1584,9 +1588,12 @@  static int gen8_init_common_ring(struct intel_engine_cs *engine)
 	struct drm_device *dev = engine->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	unsigned int next_context_status_buffer_hw;
+	int ret;
 
-	lrc_setup_hardware_status_page(engine,
-				       dev_priv->kernel_context->engine[engine->id].state);
+	ret = lrc_setup_hws(engine,
+			    dev_priv->kernel_context->engine[engine->id].state);
+	if (ret)
+		return ret;
 
 	I915_WRITE_IMR(engine,
 		       ~(engine->irq_enable_mask | engine->irq_keep_mask));
@@ -2048,7 +2055,7 @@  void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
 	i915_gem_batch_pool_fini(&engine->batch_pool);
 
 	if (engine->status_page.obj) {
-		kunmap(sg_page(engine->status_page.obj->pages->sgl));
+		i915_gem_object_unpin_map(engine->status_page.obj);
 		engine->status_page.obj = NULL;
 	}
 
@@ -2378,15 +2385,16 @@  static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine)
 }
 
 static int
-populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_obj,
+populate_lr_context(struct intel_context *ctx,
+		    struct drm_i915_gem_object *ctx_obj,
 		    struct intel_engine_cs *engine,
 		    struct intel_ringbuffer *ringbuf)
 {
 	struct drm_device *dev = engine->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
-	struct page *page;
-	uint32_t *reg_state;
+	void *obj_addr;
+	u32 *reg_state;
 	int ret;
 
 	if (!ppgtt)
@@ -2398,18 +2406,17 @@  populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o
 		return ret;
 	}
 
-	ret = i915_gem_object_get_pages(ctx_obj);
-	if (ret) {
-		DRM_DEBUG_DRIVER("Could not get object pages\n");
+	obj_addr = i915_gem_object_pin_map(ctx_obj);
+	if (IS_ERR(obj_addr)) {
+		ret = PTR_ERR(obj_addr);
+		DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
 		return ret;
 	}
-
-	i915_gem_object_pin_pages(ctx_obj);
+	ctx_obj->dirty = true;
 
 	/* The second page of the context object contains some fields which must
 	 * be set up prior to the first execution. */
-	page = i915_gem_object_get_dirty_page(ctx_obj, LRC_STATE_PN);
-	reg_state = kmap_atomic(page);
+	reg_state = obj_addr + LRC_STATE_PN * PAGE_SIZE;
 
 	/* A context is actually a big batch buffer with several MI_LOAD_REGISTER_IMM
 	 * commands followed by (reg, value) pairs. The values we are setting here are
@@ -2514,8 +2521,7 @@  populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o
 			       make_rpcs(dev));
 	}
 
-	kunmap_atomic(reg_state);
-	i915_gem_object_unpin_pages(ctx_obj);
+	i915_gem_object_unpin_map(ctx_obj);
 
 	return 0;
 }
@@ -2588,22 +2594,27 @@  uint32_t intel_lr_context_size(struct intel_engine_cs *engine)
 	return ret;
 }
 
-static void lrc_setup_hardware_status_page(struct intel_engine_cs *engine,
-					   struct drm_i915_gem_object *default_ctx_obj)
+static int
+lrc_setup_hws(struct intel_engine_cs *engine,
+	      struct drm_i915_gem_object *def_ctx_obj)
 {
 	struct drm_i915_private *dev_priv = engine->dev->dev_private;
-	struct page *page;
+	void *hws;
 
 	/* The HWSP is part of the default context object in LRC mode. */
-	engine->status_page.gfx_addr = i915_gem_obj_ggtt_offset(default_ctx_obj)
-			+ LRC_PPHWSP_PN * PAGE_SIZE;
-	page = i915_gem_object_get_page(default_ctx_obj, LRC_PPHWSP_PN);
-	engine->status_page.page_addr = kmap(page);
-	engine->status_page.obj = default_ctx_obj;
+	engine->status_page.gfx_addr = i915_gem_obj_ggtt_offset(def_ctx_obj) +
+				       LRC_PPHWSP_PN * PAGE_SIZE;
+	hws = i915_gem_object_pin_map(def_ctx_obj);
+	if (IS_ERR(hws))
+		return PTR_ERR(hws);
+	engine->status_page.page_addr = hws + LRC_PPHWSP_PN * PAGE_SIZE;
+	engine->status_page.obj = def_ctx_obj;
 
 	I915_WRITE(RING_HWS_PGA(engine->mmio_base),
-			(u32)engine->status_page.gfx_addr);
+		   (u32)engine->status_page.gfx_addr);
 	POSTING_READ(RING_HWS_PGA(engine->mmio_base));
+
+	return 0;
 }
 
 /**
@@ -2688,10 +2699,9 @@  error_deref_obj:
 	return ret;
 }
 
-void intel_lr_context_reset(struct drm_device *dev,
-			struct intel_context *ctx)
+void intel_lr_context_reset(struct drm_i915_private *dev_priv,
+			    struct intel_context *ctx)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_engine_cs *engine;
 
 	for_each_engine(engine, dev_priv) {
@@ -2699,23 +2709,23 @@  void intel_lr_context_reset(struct drm_device *dev,
 				ctx->engine[engine->id].state;
 		struct intel_ringbuffer *ringbuf =
 				ctx->engine[engine->id].ringbuf;
+		void *obj_addr;
 		uint32_t *reg_state;
-		struct page *page;
 
 		if (!ctx_obj)
 			continue;
 
-		if (i915_gem_object_get_pages(ctx_obj)) {
-			WARN(1, "Failed get_pages for context obj\n");
+		obj_addr = i915_gem_object_pin_map(ctx_obj);
+		if (WARN_ON(IS_ERR(obj_addr)))
 			continue;
-		}
-		page = i915_gem_object_get_dirty_page(ctx_obj, LRC_STATE_PN);
-		reg_state = kmap_atomic(page);
+
+		reg_state = obj_addr + LRC_STATE_PN * PAGE_SIZE;
+		ctx_obj->dirty = true;
 
 		reg_state[CTX_RING_HEAD+1] = 0;
 		reg_state[CTX_RING_TAIL+1] = 0;
 
-		kunmap_atomic(reg_state);
+		i915_gem_object_unpin_map(ctx_obj);
 
 		ringbuf->head = 0;
 		ringbuf->tail = 0;
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 0b0853eee91e..bfaa2f583d98 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -103,8 +103,11 @@  int intel_lr_context_deferred_alloc(struct intel_context *ctx,
 				    struct intel_engine_cs *engine);
 void intel_lr_context_unpin(struct intel_context *ctx,
 			    struct intel_engine_cs *engine);
-void intel_lr_context_reset(struct drm_device *dev,
-			struct intel_context *ctx);
+
+struct drm_i915_private;
+
+void intel_lr_context_reset(struct drm_i915_private *dev_priv,
+			    struct intel_context *ctx);
 uint64_t intel_lr_context_descriptor(struct intel_context *ctx,
 				     struct intel_engine_cs *engine);
 

Comments

On Tue, Apr 12, 2016 at 09:52:28AM +0100, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> We can use the new pin/lazy unpin API for simplicity
> and more performance in the execlist submission paths.
> 
> v2:
>   * Fix error handling and convert more users.
>   * Compact some names for readability.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>

Hmm, the stress tests that would exercise this are already currently
fail (thinking of gem_ctx_create etc). But this should not excerbate it
much!

> -	lrc_state_page = i915_gem_object_get_dirty_page(ctx_obj, LRC_STATE_PN);
> -	if (WARN_ON(!lrc_state_page)) {
> -		ret = -ENODEV;
> +	obj_addr = i915_gem_object_pin_map(ctx_obj);

Oops, there's a bug in pin_map in that we don't mark the object as
dirty. Can you send a quick obj->dirty = true patch?
-Chris
On Tue, Apr 12, 2016 at 09:24:53AM -0000, Patchwork wrote:
> == Series Details ==
> 
> Series: drm/i915: Use new i915_gem_object_pin_map for LRC
> URL   : https://patchwork.freedesktop.org/series/5580/
> State : failure
> 
> == Summary ==
> 
> Series 5580v1 drm/i915: Use new i915_gem_object_pin_map for LRC
> http://patchwork.freedesktop.org/api/1.0/series/5580/revisions/1/mbox/
> 
> Test drv_module_reload_basic:
>                 pass       -> DMESG-WARN (bsw-nuc-2)
>                 pass       -> DMESG-WARN (skl-nuci5)
>                 pass       -> DMESG-WARN (bdw-nuci7)
>                 pass       -> DMESG-WARN (skl-i7k-2)
>                 pass       -> DMESG-WARN (bdw-ultra)

Hmm, I guess we have a leak?
-Chris
On 12/04/16 10:30, Chris Wilson wrote:
> On Tue, Apr 12, 2016 at 09:52:28AM +0100, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> We can use the new pin/lazy unpin API for simplicity
>> and more performance in the execlist submission paths.
>>
>> v2:
>>    * Fix error handling and convert more users.
>>    * Compact some names for readability.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> 
> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
> 
> Hmm, the stress tests that would exercise this are already currently
> fail (thinking of gem_ctx_create etc). But this should not excerbate it
> much!

Something is broken here as reported by the CI:

[  329.004489] ------------[ cut here ]------------
[  329.004508] WARNING: CPU: 1 PID: 7049 at drivers/gpu/drm/i915/i915_gem.c:4641 i915_gem_free_object+0x3bd/0x430 [i915]
[  329.004510] WARN_ON(obj->pages_pin_count)
[  329.004511] Modules linked in:
[  329.004512]  snd_hda_codec_hdmi x86_pkg_temp_thermal intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_hda_codec_realtek snd_hda_codec_generic i915(-) snd_hda_codec snd_hwdep snd_hda_core mei_me lpc_ich snd_pcm mei i2c_hid i2c_designware_platform i2c_designware_core e1000e ptp pps_core sdhci_acpi sdhci mmc_core [last unloaded: snd_hda_intel]
[  329.004527] CPU: 1 PID: 7049 Comm: rmmod Tainted: G     U          4.6.0-rc3-gfxbench+ #1
[  329.004528] Hardware name:                  /NUC5i7RYB, BIOS RYBDWi35.86A.0249.2015.0529.1640 05/29/2015
[  329.004529]  0000000000000000 ffff880212f7bbe0 ffffffff81405fa5 ffff880212f7bc30
[  329.004531]  0000000000000000 ffff880212f7bc20 ffffffff81079c7c 0000122112f7bc28
[  329.004533]  ffff880211eaad90 ffff880211eaad40 ffff880211eaae30 ffff8800ceb70000
[  329.004535] Call Trace:
[  329.004539]  [<ffffffff81405fa5>] dump_stack+0x67/0x92
[  329.004541]  [<ffffffff81079c7c>] __warn+0xcc/0xf0
[  329.004542]  [<ffffffff81079cea>] warn_slowpath_fmt+0x4a/0x50
[  329.004555]  [<ffffffffa01f7ccd>] i915_gem_free_object+0x3bd/0x430 [i915]
[  329.004558]  [<ffffffff8151cffb>] drm_gem_object_free+0x2b/0x50
[  329.004570]  [<ffffffffa0208dd4>] intel_lr_context_free+0x74/0xd0 [i915]
[  329.004580]  [<ffffffffa01e25b3>] i915_gem_context_free+0x1a3/0x270 [i915]
[  329.004589]  [<ffffffffa01e2e6d>] i915_gem_context_fini+0x9d/0xd0 [i915]
[  329.004603]  [<ffffffffa0280bb9>] i915_driver_unload+0x119/0x1d0 [i915]
[  329.004605]  [<ffffffff81522524>] drm_dev_unregister+0x24/0xa0
[  329.004606]  [<ffffffff81522afe>] drm_put_dev+0x1e/0x60
[  329.004614]  [<ffffffffa01b82c0>] i915_pci_remove+0x10/0x20 [i915]
[  329.004616]  [<ffffffff8144efb4>] pci_device_remove+0x34/0xb0
[  329.004618]  [<ffffffff815467c5>] __device_release_driver+0x95/0x140
[  329.004619]  [<ffffffff81546966>] driver_detach+0xb6/0xc0
[  329.004620]  [<ffffffff815457c3>] bus_remove_driver+0x53/0xd0
[  329.004622]  [<ffffffff81547447>] driver_unregister+0x27/0x50
[  329.004623]  [<ffffffff8144e015>] pci_unregister_driver+0x25/0x70
[  329.004625]  [<ffffffff81524214>] drm_pci_exit+0x74/0x90
[  329.004638]  [<ffffffffa02812b5>] i915_exit+0x20/0x1a0 [i915]
[  329.004640]  [<ffffffff8110a47f>] SyS_delete_module+0x18f/0x1f0
[  329.004642]  [<ffffffff817d23a9>] entry_SYSCALL_64_fastpath+0x1c/0xac
[  329.004643] ---[ end trace f0bf445e9d9a7dbe ]---
[  329.004718] ------------[ cut here ]------------

It even happened in my local BAT run but I did not spot it due excessive eDP triggered WARNs. :(

Will look into it as soon as I stash the current work.
 
>> -	lrc_state_page = i915_gem_object_get_dirty_page(ctx_obj, LRC_STATE_PN);
>> -	if (WARN_ON(!lrc_state_page)) {
>> -		ret = -ENODEV;
>> +	obj_addr = i915_gem_object_pin_map(ctx_obj);
> 
> Oops, there's a bug in pin_map in that we don't mark the object as
> dirty. Can you send a quick obj->dirty = true patch?

I can if you are sure we want this. Callers might only want to read so I am not sure.

Regards,

Tvrtko
On Tue, Apr 12, 2016 at 10:36:39AM +0100, Tvrtko Ursulin wrote:
> 
> On 12/04/16 10:30, Chris Wilson wrote:
> > On Tue, Apr 12, 2016 at 09:52:28AM +0100, Tvrtko Ursulin wrote:
> >> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>
> >> We can use the new pin/lazy unpin API for simplicity
> >> and more performance in the execlist submission paths.
> >>
> >> v2:
> >>    * Fix error handling and convert more users.
> >>    * Compact some names for readability.
> >>
> >> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> > 
> > Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
> > 
> > Hmm, the stress tests that would exercise this are already currently
> > fail (thinking of gem_ctx_create etc). But this should not excerbate it
> > much!
> 
> Something is broken here as reported by the CI:
> 
> [  329.004489] ------------[ cut here ]------------
> [  329.004508] WARNING: CPU: 1 PID: 7049 at drivers/gpu/drm/i915/i915_gem.c:4641 i915_gem_free_object+0x3bd/0x430 [i915]
> [  329.004510] WARN_ON(obj->pages_pin_count)
> [  329.004511] Modules linked in:
> [  329.004512]  snd_hda_codec_hdmi x86_pkg_temp_thermal intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_hda_codec_realtek snd_hda_codec_generic i915(-) snd_hda_codec snd_hwdep snd_hda_core mei_me lpc_ich snd_pcm mei i2c_hid i2c_designware_platform i2c_designware_core e1000e ptp pps_core sdhci_acpi sdhci mmc_core [last unloaded: snd_hda_intel]
> [  329.004527] CPU: 1 PID: 7049 Comm: rmmod Tainted: G     U          4.6.0-rc3-gfxbench+ #1
> [  329.004528] Hardware name:                  /NUC5i7RYB, BIOS RYBDWi35.86A.0249.2015.0529.1640 05/29/2015
> [  329.004529]  0000000000000000 ffff880212f7bbe0 ffffffff81405fa5 ffff880212f7bc30
> [  329.004531]  0000000000000000 ffff880212f7bc20 ffffffff81079c7c 0000122112f7bc28
> [  329.004533]  ffff880211eaad90 ffff880211eaad40 ffff880211eaae30 ffff8800ceb70000
> [  329.004535] Call Trace:
> [  329.004539]  [<ffffffff81405fa5>] dump_stack+0x67/0x92
> [  329.004541]  [<ffffffff81079c7c>] __warn+0xcc/0xf0
> [  329.004542]  [<ffffffff81079cea>] warn_slowpath_fmt+0x4a/0x50
> [  329.004555]  [<ffffffffa01f7ccd>] i915_gem_free_object+0x3bd/0x430 [i915]
> [  329.004558]  [<ffffffff8151cffb>] drm_gem_object_free+0x2b/0x50
> [  329.004570]  [<ffffffffa0208dd4>] intel_lr_context_free+0x74/0xd0 [i915]
> [  329.004580]  [<ffffffffa01e25b3>] i915_gem_context_free+0x1a3/0x270 [i915]
> [  329.004589]  [<ffffffffa01e2e6d>] i915_gem_context_fini+0x9d/0xd0 [i915]
> [  329.004603]  [<ffffffffa0280bb9>] i915_driver_unload+0x119/0x1d0 [i915]
> [  329.004605]  [<ffffffff81522524>] drm_dev_unregister+0x24/0xa0
> [  329.004606]  [<ffffffff81522afe>] drm_put_dev+0x1e/0x60
> [  329.004614]  [<ffffffffa01b82c0>] i915_pci_remove+0x10/0x20 [i915]
> [  329.004616]  [<ffffffff8144efb4>] pci_device_remove+0x34/0xb0
> [  329.004618]  [<ffffffff815467c5>] __device_release_driver+0x95/0x140
> [  329.004619]  [<ffffffff81546966>] driver_detach+0xb6/0xc0
> [  329.004620]  [<ffffffff815457c3>] bus_remove_driver+0x53/0xd0
> [  329.004622]  [<ffffffff81547447>] driver_unregister+0x27/0x50
> [  329.004623]  [<ffffffff8144e015>] pci_unregister_driver+0x25/0x70
> [  329.004625]  [<ffffffff81524214>] drm_pci_exit+0x74/0x90
> [  329.004638]  [<ffffffffa02812b5>] i915_exit+0x20/0x1a0 [i915]
> [  329.004640]  [<ffffffff8110a47f>] SyS_delete_module+0x18f/0x1f0
> [  329.004642]  [<ffffffff817d23a9>] entry_SYSCALL_64_fastpath+0x1c/0xac
> [  329.004643] ---[ end trace f0bf445e9d9a7dbe ]---
> [  329.004718] ------------[ cut here ]------------
> 
> It even happened in my local BAT run but I did not spot it due excessive eDP triggered WARNs. :(

I guess it is in the default context.
 
> Will look into it as soon as I stash the current work.
>  
> >> -	lrc_state_page = i915_gem_object_get_dirty_page(ctx_obj, LRC_STATE_PN);
> >> -	if (WARN_ON(!lrc_state_page)) {
> >> -		ret = -ENODEV;
> >> +	obj_addr = i915_gem_object_pin_map(ctx_obj);
> > 
> > Oops, there's a bug in pin_map in that we don't mark the object as
> > dirty. Can you send a quick obj->dirty = true patch?
> 
> I can if you are sure we want this. Callers might only want to read so I am not sure.

I agree it is slightly presumptuous, but not marking the objects as dirty
is a potential source of data loss, marking them as dirty even for pure
reads is just a missed optimisation.
-Chris
On Tue, Apr 12, 2016 at 09:52:28AM +0100, Tvrtko Ursulin wrote:
> -static void lrc_setup_hardware_status_page(struct intel_engine_cs *engine,
> -					   struct drm_i915_gem_object *default_ctx_obj)
> +static int
> +lrc_setup_hws(struct intel_engine_cs *engine,
> +	      struct drm_i915_gem_object *def_ctx_obj)
>  {
>  	struct drm_i915_private *dev_priv = engine->dev->dev_private;
> -	struct page *page;
> +	void *hws;
>  
>  	/* The HWSP is part of the default context object in LRC mode. */
> -	engine->status_page.gfx_addr = i915_gem_obj_ggtt_offset(default_ctx_obj)
> -			+ LRC_PPHWSP_PN * PAGE_SIZE;
> -	page = i915_gem_object_get_page(default_ctx_obj, LRC_PPHWSP_PN);
> -	engine->status_page.page_addr = kmap(page);
> -	engine->status_page.obj = default_ctx_obj;
> +	engine->status_page.gfx_addr = i915_gem_obj_ggtt_offset(def_ctx_obj) +
> +				       LRC_PPHWSP_PN * PAGE_SIZE;
> +	hws = i915_gem_object_pin_map(def_ctx_obj);
> +	if (IS_ERR(hws))
> +		return PTR_ERR(hws);
> +	engine->status_page.page_addr = hws + LRC_PPHWSP_PN * PAGE_SIZE;
> +	engine->status_page.obj = def_ctx_obj;
>  
>  	I915_WRITE(RING_HWS_PGA(engine->mmio_base),
> -			(u32)engine->status_page.gfx_addr);
> +		   (u32)engine->status_page.gfx_addr);
>  	POSTING_READ(RING_HWS_PGA(engine->mmio_base));
> +
> +	return 0;
>  }

I don't see the corresonding change for tearing down the hws.
-Chris
On 12/04/16 10:43, Chris Wilson wrote:
> On Tue, Apr 12, 2016 at 09:52:28AM +0100, Tvrtko Ursulin wrote:
>> -static void lrc_setup_hardware_status_page(struct intel_engine_cs *engine,
>> -					   struct drm_i915_gem_object *default_ctx_obj)
>> +static int
>> +lrc_setup_hws(struct intel_engine_cs *engine,
>> +	      struct drm_i915_gem_object *def_ctx_obj)
>>   {
>>   	struct drm_i915_private *dev_priv = engine->dev->dev_private;
>> -	struct page *page;
>> +	void *hws;
>>
>>   	/* The HWSP is part of the default context object in LRC mode. */
>> -	engine->status_page.gfx_addr = i915_gem_obj_ggtt_offset(default_ctx_obj)
>> -			+ LRC_PPHWSP_PN * PAGE_SIZE;
>> -	page = i915_gem_object_get_page(default_ctx_obj, LRC_PPHWSP_PN);
>> -	engine->status_page.page_addr = kmap(page);
>> -	engine->status_page.obj = default_ctx_obj;
>> +	engine->status_page.gfx_addr = i915_gem_obj_ggtt_offset(def_ctx_obj) +
>> +				       LRC_PPHWSP_PN * PAGE_SIZE;
>> +	hws = i915_gem_object_pin_map(def_ctx_obj);
>> +	if (IS_ERR(hws))
>> +		return PTR_ERR(hws);
>> +	engine->status_page.page_addr = hws + LRC_PPHWSP_PN * PAGE_SIZE;
>> +	engine->status_page.obj = def_ctx_obj;
>>
>>   	I915_WRITE(RING_HWS_PGA(engine->mmio_base),
>> -			(u32)engine->status_page.gfx_addr);
>> +		   (u32)engine->status_page.gfx_addr);
>>   	POSTING_READ(RING_HWS_PGA(engine->mmio_base));
>> +
>> +	return 0;
>>   }
>
> I don't see the corresonding change for tearing down the hws.

It is where it was, in intel_logical_ring_cleanup. Missing was unpin_map 
in intel_lr_context_free. But still there is a leak somewhere. Triggered 
by BAT but not individual IGTs so far.

Regards,

Tvrtko