[06/16] drm/i915/gen8: Add 4 level switching infrastructure and lrc support

Submitted by Michel Thierry on May 26, 2015, 2:21 p.m.

Details

Message ID 1432650084-24491-7-git-send-email-michel.thierry@intel.com
State New
Headers show

Not browsing as part of any series.

Commit Message

Michel Thierry May 26, 2015, 2:21 p.m.
In 64b (48bit canonical) PPGTT addressing, the PDP0 register contains
the base address to PML4, while the other PDP registers are ignored.

In LRC, the addressing mode must be specified in every context descriptor.

v2: PML4 update in legacy context switch is left for historic reasons,
the preferred mode of operation is with lrc context based submission.

v3: s/gen8_map_page_directory/gen8_setup_page_directory and
s/gen8_map_page_directory_pointer/gen8_setup_page_directory_pointer.
Also, clflush will be needed for bxt. (Akash)

v4: Squashed lrc-specific code and use a macro to set PML4 register.

Cc: Akash Goel <akash.goel@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+)
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 62 ++++++++++++++++++++++++++++++++++---
 drivers/gpu/drm/i915/i915_gem_gtt.h |  2 ++
 drivers/gpu/drm/i915/i915_reg.h     |  1 +
 drivers/gpu/drm/i915/intel_lrc.c    | 50 +++++++++++++++++++++++-------
 4 files changed, 98 insertions(+), 17 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 7dad575..fe29216 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -216,6 +216,9 @@  static gen8_pde_t gen8_pde_encode(struct drm_device *dev,
 	return pde;
 }
 
+#define gen8_pdpe_encode gen8_pde_encode
+#define gen8_pml4e_encode gen8_pde_encode
+
 static gen6_pte_t snb_pte_encode(dma_addr_t addr,
 				 enum i915_cache_level level,
 				 bool valid, u32 unused)
@@ -560,8 +563,8 @@  static int gen8_write_pdp(struct intel_engine_cs *ring,
 	return 0;
 }
 
-static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
-			  struct intel_engine_cs *ring)
+static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt,
+				 struct intel_engine_cs *ring)
 {
 	int i, ret;
 
@@ -578,6 +581,12 @@  static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
 	return 0;
 }
 
+static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt,
+			      struct intel_engine_cs *ring)
+{
+	return gen8_write_pdp(ring, 0, ppgtt->pml4.daddr);
+}
+
 static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
 				   uint64_t start,
 				   uint64_t length,
@@ -764,6 +773,45 @@  static void gen8_map_pagetable_range(struct i915_address_space *vm,
 	kunmap_atomic(page_directory);
 }
 
+static void
+gen8_setup_page_directory(struct i915_page_directory_pointer *pdp,
+			  struct i915_page_directory *pd,
+			  int index,
+			  struct drm_device *dev)
+{
+	gen8_ppgtt_pdpe_t *page_directorypo;
+	gen8_ppgtt_pdpe_t pdpe;
+
+	if (!USES_FULL_48BIT_PPGTT(dev))
+		return;
+
+	page_directorypo = kmap_atomic(pdp->page);
+	pdpe = gen8_pdpe_encode(dev, pd->daddr, I915_CACHE_LLC);
+	page_directorypo[index] = pdpe;
+
+	if (!HAS_LLC(dev))
+		drm_clflush_virt_range(page_directorypo, PAGE_SIZE);
+
+	kunmap_atomic(page_directorypo);
+}
+
+static void
+gen8_setup_page_directory_pointer(struct i915_pml4 *pml4,
+				   struct i915_page_directory_pointer *pdp,
+				   int index,
+				   struct drm_device *dev)
+{
+	gen8_ppgtt_pml4e_t *pagemap = kmap_atomic(pml4->page);
+	gen8_ppgtt_pml4e_t pml4e = gen8_pml4e_encode(dev, pdp->daddr, I915_CACHE_LLC);
+	WARN_ON(!USES_FULL_48BIT_PPGTT(dev));
+	pagemap[index] = pml4e;
+
+	if (!HAS_LLC(dev))
+			drm_clflush_virt_range(pagemap, PAGE_SIZE);
+
+	kunmap_atomic(pagemap);
+}
+
 static void gen8_free_page_tables(struct i915_page_directory *pd, struct drm_device *dev)
 {
 	int i;
@@ -1065,6 +1113,7 @@  static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 
 		set_bit(pdpe, pdp->used_pdpes);
 		gen8_map_pagetable_range(vm, pd, start, length);
+		gen8_setup_page_directory(pdp, pd, pdpe, dev);
 	}
 
 	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables, pdpes);
@@ -1132,6 +1181,8 @@  static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
 		ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length);
 		if (ret)
 			goto err_out;
+
+		gen8_setup_page_directory_pointer(pml4, pdp, pml4e, vm->dev);
 	}
 
 	bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es,
@@ -1186,12 +1237,14 @@  static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 			goto err_out;
 
 		ppgtt->base.total = 1ULL << 48;
+		ppgtt->switch_mm = gen8_48b_mm_switch;
 	} else {
 		ret = __pdp_init(&ppgtt->pdp, false);
 		if (ret)
 			goto err_out;
 
 		ppgtt->base.total = 1ULL << 32;
+		ppgtt->switch_mm = gen8_legacy_mm_switch;
 		trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, 0, 0, GEN8_PML4E_SHIFT);
 	}
 
@@ -1203,8 +1256,6 @@  static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 	ppgtt->base.unbind_vma = ppgtt_unbind_vma;
 	ppgtt->base.bind_vma = ppgtt_bind_vma;
 
-	ppgtt->switch_mm = gen8_mm_switch;
-
 	return 0;
 
 err_out:
@@ -1392,8 +1443,9 @@  static void gen8_ppgtt_enable(struct drm_device *dev)
 	int j;
 
 	for_each_ring(ring, dev_priv, j) {
+		u32 four_level = USES_FULL_48BIT_PPGTT(dev) ? GEN8_GFX_PPGTT_64B : 0;
 		I915_WRITE(RING_MODE_GEN7(ring),
-			   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
+			   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level));
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 2229d05..9d53b64 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -39,6 +39,8 @@  struct drm_i915_file_private;
 typedef uint32_t gen6_pte_t;
 typedef uint64_t gen8_pte_t;
 typedef uint64_t gen8_pde_t;
+typedef uint64_t gen8_ppgtt_pdpe_t;
+typedef uint64_t gen8_ppgtt_pml4e_t;
 
 #define gtt_total_entries(gtt) ((gtt).base.total >> PAGE_SHIFT)
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 6eeba63..5b5f94c 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -1642,6 +1642,7 @@  enum skl_disp_power_wells {
 #define   GFX_REPLAY_MODE		(1<<11)
 #define   GFX_PSMI_GRANULARITY		(1<<10)
 #define   GFX_PPGTT_ENABLE		(1<<9)
+#define   GEN8_GFX_PPGTT_64B		(1<<7)
 
 #define VLV_DISPLAY_BASE 0x180000
 #define VLV_MIPI_BASE VLV_DISPLAY_BASE
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 96ae90a..bbcb3cb 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -197,6 +197,11 @@ 
 	reg_state[CTX_PDP ## n ## _LDW+1] = lower_32_bits(_addr); \
 }
 
+#define ASSIGN_CTX_PML4(ppgtt, reg_state) { \
+	reg_state[CTX_PDP0_UDW + 1] = upper_32_bits(ppgtt->pml4.daddr); \
+	reg_state[CTX_PDP0_LDW + 1] = lower_32_bits(ppgtt->pml4.daddr); \
+}
+
 enum {
 	ADVANCED_CONTEXT = 0,
 	LEGACY_CONTEXT,
@@ -269,11 +274,15 @@  static uint64_t execlists_ctx_descriptor(struct intel_engine_cs *ring,
 	struct drm_device *dev = ring->dev;
 	uint64_t desc;
 	uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj);
+	bool legacy_64bit_ctx = USES_FULL_48BIT_PPGTT(dev);
 
 	WARN_ON(lrca & 0xFFFFFFFF00000FFFULL);
 
 	desc = GEN8_CTX_VALID;
-	desc |= LEGACY_CONTEXT << GEN8_CTX_MODE_SHIFT;
+	if (legacy_64bit_ctx)
+		desc |= LEGACY_64B_CONTEXT << GEN8_CTX_MODE_SHIFT;
+	else
+		desc |= LEGACY_CONTEXT << GEN8_CTX_MODE_SHIFT;
 	if (IS_GEN8(ctx_obj->base.dev))
 		desc |= GEN8_CTX_L3LLC_COHERENT;
 	desc |= GEN8_CTX_PRIVILEGE;
@@ -344,10 +353,16 @@  static int execlists_update_context(struct drm_i915_gem_object *ctx_obj,
 	reg_state[CTX_RING_TAIL+1] = tail;
 	reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(ring_obj);
 
-	/* True PPGTT with dynamic page allocation: update PDP registers and
-	 * point the unallocated PDPs to the scratch page
-	 */
-	if (ppgtt) {
+	if (ppgtt && USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
+		/* True 64b PPGTT (48bit canonical)
+		 * PDP0_DESCRIPTOR contains the base address to PML4 and
+		 * other PDP Descriptors are ignored
+		 */
+		ASSIGN_CTX_PML4(ppgtt, reg_state);
+	} else if (ppgtt) {
+		/* True 32b PPGTT with dynamic page allocation: update PDP
+		 * registers and point the unallocated PDPs to the scratch page
+		 */
 		ASSIGN_CTX_PDP(ppgtt, reg_state, 3);
 		ASSIGN_CTX_PDP(ppgtt, reg_state, 2);
 		ASSIGN_CTX_PDP(ppgtt, reg_state, 1);
@@ -1777,13 +1792,24 @@  populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o
 	reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0);
 	reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0);
 
-	/* With dynamic page allocation, PDPs may not be allocated at this point,
-	 * Point the unallocated PDPs to the scratch page
-	 */
-	ASSIGN_CTX_PDP(ppgtt, reg_state, 3);
-	ASSIGN_CTX_PDP(ppgtt, reg_state, 2);
-	ASSIGN_CTX_PDP(ppgtt, reg_state, 1);
-	ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
+	if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
+		/* 64b PPGTT (48bit canonical)
+		 * PDP0_DESCRIPTOR contains the base address to PML4 and
+		 * other PDP Descriptors are ignored.
+		 */
+		ASSIGN_CTX_PML4(ppgtt, reg_state);
+	} else {
+		/* 32b PPGTT
+		 * PDP*_DESCRIPTOR contains the base address of space supported.
+		 * With dynamic page allocation, PDPs may not be allocated at
+		 * this point. Point the unallocated PDPs to the scratch page
+		 */
+		ASSIGN_CTX_PDP(ppgtt, reg_state, 3);
+		ASSIGN_CTX_PDP(ppgtt, reg_state, 2);
+		ASSIGN_CTX_PDP(ppgtt, reg_state, 1);
+		ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
+	}
+
 	if (ring->id == RCS) {
 		reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
 		reg_state[CTX_R_PWR_CLK_STATE] = GEN8_R_PWR_CLK_STATE;