[v3,12/17] drm/i915/gen8: Add ppgtt info and debug_dump

Submitted by Michel Thierry on July 1, 2015, 3:27 p.m.

Details

Message ID 1435764453-11954-13-git-send-email-michel.thierry@intel.com
State New
Headers show

Not browsing as part of any series.

Commit Message

Michel Thierry July 1, 2015, 3:27 p.m.
v2: Clean up patch after rebases.
v3: gen8_dump_ppgtt for 32b and 48b PPGTT.
v4: Use used_pml4es/pdpes (Akash).
v5: Rebase after Mika's ppgtt cleanup / scratch merge patch series.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+)
---
 drivers/gpu/drm/i915/i915_debugfs.c | 18 ++++----
 drivers/gpu/drm/i915/i915_gem_gtt.c | 92 +++++++++++++++++++++++++++++++++++++
 2 files changed, 102 insertions(+), 8 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index ad9a737..8c3dcc9 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2223,7 +2223,6 @@  static void gen6_ppgtt_info(struct seq_file *m, struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_engine_cs *ring;
-	struct drm_file *file;
 	int i;
 
 	if (INTEL_INFO(dev)->gen == 6)
@@ -2246,13 +2245,6 @@  static void gen6_ppgtt_info(struct seq_file *m, struct drm_device *dev)
 		ppgtt->debug_dump(ppgtt, m);
 	}
 
-	list_for_each_entry_reverse(file, &dev->filelist, lhead) {
-		struct drm_i915_file_private *file_priv = file->driver_priv;
-
-		seq_printf(m, "proc: %s\n",
-			   get_pid_task(file->pid, PIDTYPE_PID)->comm);
-		idr_for_each(&file_priv->context_idr, per_file_ctx, m);
-	}
 	seq_printf(m, "ECOCHK: 0x%08x\n", I915_READ(GAM_ECOCHK));
 }
 
@@ -2261,6 +2253,7 @@  static int i915_ppgtt_info(struct seq_file *m, void *data)
 	struct drm_info_node *node = m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_file *file;
 
 	int ret = mutex_lock_interruptible(&dev->struct_mutex);
 	if (ret)
@@ -2272,6 +2265,15 @@  static int i915_ppgtt_info(struct seq_file *m, void *data)
 	else if (INTEL_INFO(dev)->gen >= 6)
 		gen6_ppgtt_info(m, dev);
 
+	list_for_each_entry_reverse(file, &dev->filelist, lhead) {
+		struct drm_i915_file_private *file_priv = file->driver_priv;
+
+		seq_printf(m, "\nproc: %s\n",
+			   get_pid_task(file->pid, PIDTYPE_PID)->comm);
+		idr_for_each(&file_priv->context_idr, per_file_ctx,
+			     (void *)(unsigned long)m);
+	}
+
 	intel_runtime_pm_put(dev_priv);
 	mutex_unlock(&dev->struct_mutex);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index c6fc0d3..0c41e5d 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1337,6 +1337,97 @@  static int gen8_alloc_va_range(struct i915_address_space *vm,
 		return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length);
 }
 
+static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp,
+			  uint64_t start, uint64_t length,
+			  gen8_pte_t scratch_pte,
+			  struct seq_file *m)
+{
+	struct i915_page_directory *pd;
+	uint64_t temp;
+	uint32_t pdpe;
+
+	gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
+		struct i915_page_table *pt;
+		uint64_t pd_len = length;
+		uint64_t pd_start = start;
+		uint32_t pde;
+
+		if (!pd)
+			continue;
+
+		if(!test_bit(pdpe, pdp->used_pdpes))
+			continue;
+
+		seq_printf(m, "\tPDPE #%d\n", pdpe);
+		gen8_for_each_pde(pt, pd, pd_start, pd_len, temp, pde) {
+			uint32_t  pte;
+			gen8_pte_t *pt_vaddr;
+
+			if (!pt)
+				continue;
+
+			pt_vaddr = kmap_px(pt);
+			for (pte = 0; pte < GEN8_PTES; pte+=4) {
+				uint64_t va =
+					(pdpe << GEN8_PDPE_SHIFT) |
+					(pde << GEN8_PDE_SHIFT) |
+					(pte << GEN8_PTE_SHIFT);
+				int i;
+				bool found = false;
+				for (i = 0; i < 4; i++)
+					if (pt_vaddr[pte + i] != scratch_pte)
+						found = true;
+				if (!found)
+					continue;
+
+				seq_printf(m, "\t\t0x%llx [%03d,%03d,%04d]: =", va, pdpe, pde, pte);
+				for (i = 0; i < 4; i++) {
+					if (pt_vaddr[pte + i] != scratch_pte)
+						seq_printf(m, " %llx", pt_vaddr[pte + i]);
+					else
+						seq_puts(m, "  SCRATCH ");
+				}
+				seq_puts(m, "\n");
+			}
+			/* don't use kunmap_px, it could trigger
+			 * an unnecessary flush.
+			 */
+			kunmap_atomic(pt_vaddr);
+		}
+	}
+}
+
+static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
+{
+	struct i915_address_space *vm = &ppgtt->base;
+	uint64_t start = ppgtt->base.start;
+	uint64_t length = ppgtt->base.total;
+	gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
+						 I915_CACHE_LLC, true);
+
+	if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
+		gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m);
+	} else {
+		uint64_t templ4, pml4e;
+		struct i915_pml4 *pml4 = &ppgtt->pml4;
+		struct i915_page_directory_pointer *pdp;
+
+		gen8_for_each_pml4e(pdp, pml4, start, length, templ4, pml4e) {
+			uint64_t pdp_len = length;
+			uint64_t pdp_start = start;
+
+			if (!pdp)
+				continue;
+
+			if (!test_bit(pml4e, pml4->used_pml4es))
+				continue;
+
+			seq_printf(m, "    PML4E #%llu\n", pml4e);
+			gen8_dump_pdp(pdp, pdp_start, pdp_len, scratch_pte, m);
+		}
+	}
+}
+
 /*
  * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
  * with a net effect resembling a 2-level page table in normal x86 terms. Each
@@ -1359,6 +1450,7 @@  static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 	ppgtt->base.clear_range = gen8_ppgtt_clear_range;
 	ppgtt->base.unbind_vma = ppgtt_unbind_vma;
 	ppgtt->base.bind_vma = ppgtt_bind_vma;
+	ppgtt->debug_dump = gen8_dump_ppgtt;
 
 	if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
 		ret = setup_px(ppgtt->base.dev, &ppgtt->pml4);

Comments

On 7/1/2015 8:57 PM, Michel Thierry wrote:
> v2: Clean up patch after rebases.
> v3: gen8_dump_ppgtt for 32b and 48b PPGTT.
> v4: Use used_pml4es/pdpes (Akash).
> v5: Rebase after Mika's ppgtt cleanup / scratch merge patch series.
>
> Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+)
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c | 18 ++++----
>   drivers/gpu/drm/i915/i915_gem_gtt.c | 92 +++++++++++++++++++++++++++++++++++++
>   2 files changed, 102 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index ad9a737..8c3dcc9 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2223,7 +2223,6 @@ static void gen6_ppgtt_info(struct seq_file *m, struct drm_device *dev)
>   {
>   	struct drm_i915_private *dev_priv = dev->dev_private;
>   	struct intel_engine_cs *ring;
> -	struct drm_file *file;
>   	int i;
>
>   	if (INTEL_INFO(dev)->gen == 6)
> @@ -2246,13 +2245,6 @@ static void gen6_ppgtt_info(struct seq_file *m, struct drm_device *dev)
>   		ppgtt->debug_dump(ppgtt, m);
>   	}
>
> -	list_for_each_entry_reverse(file, &dev->filelist, lhead) {
> -		struct drm_i915_file_private *file_priv = file->driver_priv;
> -
> -		seq_printf(m, "proc: %s\n",
> -			   get_pid_task(file->pid, PIDTYPE_PID)->comm);
> -		idr_for_each(&file_priv->context_idr, per_file_ctx, m);
> -	}
>   	seq_printf(m, "ECOCHK: 0x%08x\n", I915_READ(GAM_ECOCHK));
>   }
>
> @@ -2261,6 +2253,7 @@ static int i915_ppgtt_info(struct seq_file *m, void *data)
>   	struct drm_info_node *node = m->private;
>   	struct drm_device *dev = node->minor->dev;
>   	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct drm_file *file;
>
>   	int ret = mutex_lock_interruptible(&dev->struct_mutex);
>   	if (ret)
> @@ -2272,6 +2265,15 @@ static int i915_ppgtt_info(struct seq_file *m, void *data)
>   	else if (INTEL_INFO(dev)->gen >= 6)
>   		gen6_ppgtt_info(m, dev);
>
> +	list_for_each_entry_reverse(file, &dev->filelist, lhead) {
> +		struct drm_i915_file_private *file_priv = file->driver_priv;
> +
> +		seq_printf(m, "\nproc: %s\n",
> +			   get_pid_task(file->pid, PIDTYPE_PID)->comm);
> +		idr_for_each(&file_priv->context_idr, per_file_ctx,
> +			     (void *)(unsigned long)m);
> +	}
> +
>   	intel_runtime_pm_put(dev_priv);
>   	mutex_unlock(&dev->struct_mutex);
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index c6fc0d3..0c41e5d 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -1337,6 +1337,97 @@ static int gen8_alloc_va_range(struct i915_address_space *vm,
>   		return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length);
>   }
>
> +static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp,
> +			  uint64_t start, uint64_t length,
> +			  gen8_pte_t scratch_pte,
> +			  struct seq_file *m)
> +{
> +	struct i915_page_directory *pd;
> +	uint64_t temp;
> +	uint32_t pdpe;
> +
> +	gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
> +		struct i915_page_table *pt;
> +		uint64_t pd_len = length;
> +		uint64_t pd_start = start;
> +		uint32_t pde;
> +
> +		if (!pd)
> +			continue;
> +
> +		if(!test_bit(pdpe, pdp->used_pdpes))
> +			continue;
> +
> +		seq_printf(m, "\tPDPE #%d\n", pdpe);
> +		gen8_for_each_pde(pt, pd, pd_start, pd_len, temp, pde) {
> +			uint32_t  pte;
> +			gen8_pte_t *pt_vaddr;
> +
> +			if (!pt)
> +				continue;
> +
> +			pt_vaddr = kmap_px(pt);
> +			for (pte = 0; pte < GEN8_PTES; pte+=4) {
> +				uint64_t va =
> +					(pdpe << GEN8_PDPE_SHIFT) |
> +					(pde << GEN8_PDE_SHIFT) |
> +					(pte << GEN8_PTE_SHIFT);
> +				int i;
> +				bool found = false;
> +				for (i = 0; i < 4; i++)
> +					if (pt_vaddr[pte + i] != scratch_pte)
> +						found = true;
> +				if (!found)
> +					continue;
> +
> +				seq_printf(m, "\t\t0x%llx [%03d,%03d,%04d]: =", va, pdpe, pde, pte);
> +				for (i = 0; i < 4; i++) {
> +					if (pt_vaddr[pte + i] != scratch_pte)
> +						seq_printf(m, " %llx", pt_vaddr[pte + i]);
> +					else
> +						seq_puts(m, "  SCRATCH ");
> +				}
> +				seq_puts(m, "\n");
> +			}
> +			/* don't use kunmap_px, it could trigger
> +			 * an unnecessary flush.
> +			 */
> +			kunmap_atomic(pt_vaddr);
> +		}
> +	}
> +}
> +
> +static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
> +{
> +	struct i915_address_space *vm = &ppgtt->base;
> +	uint64_t start = ppgtt->base.start;
> +	uint64_t length = ppgtt->base.total;
> +	gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
> +						 I915_CACHE_LLC, true);
> +
> +	if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
> +		gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m);
> +	} else {
> +		uint64_t templ4, pml4e;
> +		struct i915_pml4 *pml4 = &ppgtt->pml4;
> +		struct i915_page_directory_pointer *pdp;
> +
> +		gen8_for_each_pml4e(pdp, pml4, start, length, templ4, pml4e) {
> +			uint64_t pdp_len = length;
> +			uint64_t pdp_start = start;
> +
> +			if (!pdp)
> +				continue;
> +
I think the "if (!test_bit(pml4e, pml4->used_pml4es))" check is 
foolproof & should suffice.
No real need of the extra check of 'if (!pdp)'.
Same for pdpe & pde loops in gen8_dump_pdp function

> +			if (!test_bit(pml4e, pml4->used_pml4es))
> +				continue;
> +
> +			seq_printf(m, "    PML4E #%llu\n", pml4e);
> +			gen8_dump_pdp(pdp, pdp_start, pdp_len, scratch_pte, m);
> +		}
> +	}
> +}
> +
>   /*
>    * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
>    * with a net effect resembling a 2-level page table in normal x86 terms. Each
> @@ -1359,6 +1450,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
>   	ppgtt->base.clear_range = gen8_ppgtt_clear_range;
>   	ppgtt->base.unbind_vma = ppgtt_unbind_vma;
>   	ppgtt->base.bind_vma = ppgtt_bind_vma;
> +	ppgtt->debug_dump = gen8_dump_ppgtt;
>
>   	if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
>   		ret = setup_px(ppgtt->base.dev, &ppgtt->pml4);
>
On 7/7/2015 1:56 PM, Goel, Akash wrote:
> On 7/1/2015 8:57 PM, Michel Thierry wrote:
>> v2: Clean up patch after rebases.
>> v3: gen8_dump_ppgtt for 32b and 48b PPGTT.
>> v4: Use used_pml4es/pdpes (Akash).
>> v5: Rebase after Mika's ppgtt cleanup / scratch merge patch series.
>>
>> Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
>> Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+)
>> ---
>>   drivers/gpu/drm/i915/i915_debugfs.c | 18 ++++----
>>   drivers/gpu/drm/i915/i915_gem_gtt.c | 92
>> +++++++++++++++++++++++++++++++++++++
>>   2 files changed, 102 insertions(+), 8 deletions(-)
>> +static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct
>> seq_file *m)
>> +{
>> +    struct i915_address_space *vm = &ppgtt->base;
>> +    uint64_t start = ppgtt->base.start;
>> +    uint64_t length = ppgtt->base.total;
>> +    gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
>> +                         I915_CACHE_LLC, true);
>> +
>> +    if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
>> +        gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m);
>> +    } else {
>> +        uint64_t templ4, pml4e;
>> +        struct i915_pml4 *pml4 = &ppgtt->pml4;
>> +        struct i915_page_directory_pointer *pdp;
>> +
>> +        gen8_for_each_pml4e(pdp, pml4, start, length, templ4, pml4e) {
>> +            uint64_t pdp_len = length;
>> +            uint64_t pdp_start = start;
>> +
>> +            if (!pdp)
>> +                continue;
>> +
> I think the "if (!test_bit(pml4e, pml4->used_pml4es))" check is
> foolproof & should suffice.
> No real need of the extra check of 'if (!pdp)'.
> Same for pdpe & pde loops in gen8_dump_pdp function
>
Right, I'll changed it to use test_bit across the board, also remove the 
unnecessary pdp_len/pdp_start variables:
-        if (!pd)
-            continue;
-
          if(!test_bit(pdpe, pdp->used_pdpes))
              continue;
-----------
-            if (!pt)
+            if(!test_bit(pde, pd->used_pdes))
                  continue;
-----------
          gen8_for_each_pml4e(pdp, pml4, start, length, templ4, pml4e) {
-            uint64_t pdp_len = length;
-            uint64_t pdp_start = start;
-
-            if (!pdp)
-                continue;
-
              if (!test_bit(pml4e, pml4->used_pml4es))
                  continue;

              seq_printf(m, "    PML4E #%llu\n", pml4e);
-            gen8_dump_pdp(pdp, pdp_start, pdp_len, scratch_pte, m);
+            gen8_dump_pdp(pdp, start, length, scratch_pte, m);
-----------

>> +            if (!test_bit(pml4e, pml4->used_pml4es))
>> +                continue;
>> +
>> +            seq_printf(m, "    PML4E #%llu\n", pml4e);
>> +            gen8_dump_pdp(pdp, pdp_start, pdp_len, scratch_pte, m);
>> +        }
>> +    }
>> +}
>> +
>>   /*
>>    * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP
>> registers
>>    * with a net effect resembling a 2-level page table in normal x86
>> terms. Each
>> @@ -1359,6 +1450,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt
>> *ppgtt)
>>       ppgtt->base.clear_range = gen8_ppgtt_clear_range;
>>       ppgtt->base.unbind_vma = ppgtt_unbind_vma;
>>       ppgtt->base.bind_vma = ppgtt_bind_vma;
>> +    ppgtt->debug_dump = gen8_dump_ppgtt;
>>
>>       if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
>>           ret = setup_px(ppgtt->base.dev, &ppgtt->pml4);
>>