[01/10] drm/i915: Add support for mapping an object page by page

Submitted by ankitprasad.r.sharma@intel.com on Feb. 4, 2016, 9:30 a.m.

Details

Message ID 1454578211-24823-2-git-send-email-ankitprasad.r.sharma@intel.com
State New
Headers show
Series "Support for creating/using Stolen memory backed objects" ( rev: 9 ) in Intel GFX

Not browsing as part of any series.

Commit Message

ankitprasad.r.sharma@intel.com Feb. 4, 2016, 9:30 a.m.
From: Chris Wilson <chris@chris-wilson.co.uk>

Introduced a new vm specfic callback insert_page() to program a single pte in
ggtt or ppgtt. This allows us to map a single page in to the mappable aperture
space. This can be iterated over to access the whole object by using space as
meagre as page size.

v2: Added low level rpm assertions to insert_page routines (Chris)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
---
 drivers/char/agp/intel-gtt.c        |  9 +++++
 drivers/gpu/drm/i915/i915_gem_gtt.c | 65 +++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_gtt.h |  5 +++
 include/drm/intel-gtt.h             |  3 ++
 4 files changed, 82 insertions(+)

Patch hide | download patch | download mbox

diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
index 1341a94..7c68576 100644
--- a/drivers/char/agp/intel-gtt.c
+++ b/drivers/char/agp/intel-gtt.c
@@ -838,6 +838,15 @@  static bool i830_check_flags(unsigned int flags)
 	return false;
 }
 
+void intel_gtt_insert_page(dma_addr_t addr,
+			   unsigned int pg,
+			   unsigned int flags)
+{
+	intel_private.driver->write_entry(addr, pg, flags);
+	wmb();
+}
+EXPORT_SYMBOL(intel_gtt_insert_page);
+
 void intel_gtt_insert_sg_entries(struct sg_table *st,
 				 unsigned int pg_start,
 				 unsigned int flags)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 715a771..a64018f 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2341,6 +2341,28 @@  static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
 #endif
 }
 
+static void gen8_ggtt_insert_page(struct i915_address_space *vm,
+				  dma_addr_t addr,
+				  uint64_t offset,
+				  enum i915_cache_level level,
+				  u32 unused)
+{
+	struct drm_i915_private *dev_priv = to_i915(vm->dev);
+	gen8_pte_t __iomem *pte =
+		(gen8_pte_t __iomem *)dev_priv->gtt.gsm +
+		(offset >> PAGE_SHIFT);
+	int rpm_atomic_seq;
+
+	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
+
+	gen8_set_pte(pte, gen8_pte_encode(addr, level, true));
+	wmb();
+
+	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
+
+	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
+}
+
 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 				     struct sg_table *st,
 				     uint64_t start,
@@ -2412,6 +2434,28 @@  static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm,
 	stop_machine(gen8_ggtt_insert_entries__cb, &arg, NULL);
 }
 
+static void gen6_ggtt_insert_page(struct i915_address_space *vm,
+				  dma_addr_t addr,
+				  uint64_t offset,
+				  enum i915_cache_level level,
+				  u32 flags)
+{
+	struct drm_i915_private *dev_priv = to_i915(vm->dev);
+	gen6_pte_t __iomem *pte =
+		(gen6_pte_t __iomem *)dev_priv->gtt.gsm +
+		(offset >> PAGE_SHIFT);
+	int rpm_atomic_seq;
+
+	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
+
+	iowrite32(vm->pte_encode(addr, level, true, flags), pte);
+	wmb();
+
+	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
+
+	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
+}
+
 /*
  * Binds an object into the global gtt with the specified cache level. The object
  * will be accessible to the GPU via commands whose operands reference offsets
@@ -2523,6 +2567,24 @@  static void gen6_ggtt_clear_range(struct i915_address_space *vm,
 	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
 }
 
+static void i915_ggtt_insert_page(struct i915_address_space *vm,
+				  dma_addr_t addr,
+				  uint64_t offset,
+				  enum i915_cache_level cache_level,
+				  u32 unused)
+{
+	struct drm_i915_private *dev_priv = to_i915(vm->dev);
+	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
+		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
+	int rpm_atomic_seq;
+
+	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
+
+	intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
+
+	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
+}
+
 static void i915_ggtt_insert_entries(struct i915_address_space *vm,
 				     struct sg_table *pages,
 				     uint64_t start,
@@ -3054,6 +3116,7 @@  static int gen8_gmch_probe(struct drm_device *dev,
 	ret = ggtt_probe_common(dev, gtt_size);
 
 	dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range;
+	dev_priv->gtt.base.insert_page = gen8_ggtt_insert_page;
 	dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries;
 	dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
 	dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
@@ -3099,6 +3162,7 @@  static int gen6_gmch_probe(struct drm_device *dev,
 	ret = ggtt_probe_common(dev, gtt_size);
 
 	dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range;
+	dev_priv->gtt.base.insert_page = gen6_ggtt_insert_page;
 	dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries;
 	dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
 	dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
@@ -3133,6 +3197,7 @@  static int i915_gmch_probe(struct drm_device *dev,
 	intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end);
 
 	dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev);
+	dev_priv->gtt.base.insert_page = i915_ggtt_insert_page;
 	dev_priv->gtt.base.insert_entries = i915_ggtt_insert_entries;
 	dev_priv->gtt.base.clear_range = i915_ggtt_clear_range;
 	dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index f520c90..e0d9da9 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -316,6 +316,11 @@  struct i915_address_space {
 			    uint64_t start,
 			    uint64_t length,
 			    bool use_scratch);
+	void (*insert_page)(struct i915_address_space *vm,
+			    dma_addr_t addr,
+			    uint64_t offset,
+			    enum i915_cache_level cache_level,
+			    u32 flags);
 	void (*insert_entries)(struct i915_address_space *vm,
 			       struct sg_table *st,
 			       uint64_t start,
diff --git a/include/drm/intel-gtt.h b/include/drm/intel-gtt.h
index 9e9bddaa5..f49edec 100644
--- a/include/drm/intel-gtt.h
+++ b/include/drm/intel-gtt.h
@@ -13,6 +13,9 @@  void intel_gmch_remove(void);
 bool intel_enable_gtt(void);
 
 void intel_gtt_chipset_flush(void);
+void intel_gtt_insert_page(dma_addr_t addr,
+			   unsigned int pg,
+			   unsigned int flags);
 void intel_gtt_insert_sg_entries(struct sg_table *st,
 				 unsigned int pg_start,
 				 unsigned int flags);

Comments

On 04/02/16 09:30, ankitprasad.r.sharma@intel.com wrote:
> From: Chris Wilson <chris@chris-wilson.co.uk>
>
> Introduced a new vm specfic callback insert_page() to program a single pte in
> ggtt or ppgtt. This allows us to map a single page in to the mappable aperture
> space. This can be iterated over to access the whole object by using space as
> meagre as page size.
>
> v2: Added low level rpm assertions to insert_page routines (Chris)
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> ---
>   drivers/char/agp/intel-gtt.c        |  9 +++++
>   drivers/gpu/drm/i915/i915_gem_gtt.c | 65 +++++++++++++++++++++++++++++++++++++
>   drivers/gpu/drm/i915/i915_gem_gtt.h |  5 +++
>   include/drm/intel-gtt.h             |  3 ++
>   4 files changed, 82 insertions(+)
>
> diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
> index 1341a94..7c68576 100644
> --- a/drivers/char/agp/intel-gtt.c
> +++ b/drivers/char/agp/intel-gtt.c
> @@ -838,6 +838,15 @@ static bool i830_check_flags(unsigned int flags)
>   	return false;
>   }
>
> +void intel_gtt_insert_page(dma_addr_t addr,
> +			   unsigned int pg,
> +			   unsigned int flags)
> +{
> +	intel_private.driver->write_entry(addr, pg, flags);
> +	wmb();
> +}
> +EXPORT_SYMBOL(intel_gtt_insert_page);
> +
>   void intel_gtt_insert_sg_entries(struct sg_table *st,
>   				 unsigned int pg_start,
>   				 unsigned int flags)
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 715a771..a64018f 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -2341,6 +2341,28 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
>   #endif
>   }
>
> +static void gen8_ggtt_insert_page(struct i915_address_space *vm,
> +				  dma_addr_t addr,
> +				  uint64_t offset,
> +				  enum i915_cache_level level,
> +				  u32 unused)
> +{
> +	struct drm_i915_private *dev_priv = to_i915(vm->dev);
> +	gen8_pte_t __iomem *pte =
> +		(gen8_pte_t __iomem *)dev_priv->gtt.gsm +
> +		(offset >> PAGE_SHIFT);
> +	int rpm_atomic_seq;
> +
> +	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
> +
> +	gen8_set_pte(pte, gen8_pte_encode(addr, level, true));
> +	wmb();

gen8_ggtt_insert_entries does a read-back of the PTE after having 
written it with a big fat comment talking about how that could be really 
important. This is not needed in this path?

> +
> +	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);

Why is the posting read not required here as in gen8_ggtt_insert_entries?

> +
> +	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
> +}
> +
>   static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
>   				     struct sg_table *st,
>   				     uint64_t start,
> @@ -2412,6 +2434,28 @@ static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm,
>   	stop_machine(gen8_ggtt_insert_entries__cb, &arg, NULL);
>   }
>
> +static void gen6_ggtt_insert_page(struct i915_address_space *vm,
> +				  dma_addr_t addr,
> +				  uint64_t offset,
> +				  enum i915_cache_level level,
> +				  u32 flags)
> +{
> +	struct drm_i915_private *dev_priv = to_i915(vm->dev);
> +	gen6_pte_t __iomem *pte =
> +		(gen6_pte_t __iomem *)dev_priv->gtt.gsm +
> +		(offset >> PAGE_SHIFT);
> +	int rpm_atomic_seq;
> +
> +	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
> +
> +	iowrite32(vm->pte_encode(addr, level, true, flags), pte);
> +	wmb();
> +
> +	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
> +
> +	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
> +}

Same questions as for the gen8 version.

Regards,

Tvrtko
Hi,
On Thu, 2016-02-11 at 10:50 +0000, Tvrtko Ursulin wrote:
> 
> On 04/02/16 09:30, ankitprasad.r.sharma@intel.com wrote:
> > From: Chris Wilson <chris@chris-wilson.co.uk>
> >
> > Introduced a new vm specfic callback insert_page() to program a single pte in
> > ggtt or ppgtt. This allows us to map a single page in to the mappable aperture
> > space. This can be iterated over to access the whole object by using space as
> > meagre as page size.
> >
> > v2: Added low level rpm assertions to insert_page routines (Chris)
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> > ---
> >   drivers/char/agp/intel-gtt.c        |  9 +++++
> >   drivers/gpu/drm/i915/i915_gem_gtt.c | 65 +++++++++++++++++++++++++++++++++++++
> >   drivers/gpu/drm/i915/i915_gem_gtt.h |  5 +++
> >   include/drm/intel-gtt.h             |  3 ++
> >   4 files changed, 82 insertions(+)
> >
> > diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
> > index 1341a94..7c68576 100644
> > --- a/drivers/char/agp/intel-gtt.c
> > +++ b/drivers/char/agp/intel-gtt.c
> > @@ -838,6 +838,15 @@ static bool i830_check_flags(unsigned int flags)
> >   	return false;
> >   }
> >
> > +void intel_gtt_insert_page(dma_addr_t addr,
> > +			   unsigned int pg,
> > +			   unsigned int flags)
> > +{
> > +	intel_private.driver->write_entry(addr, pg, flags);
> > +	wmb();
> > +}
> > +EXPORT_SYMBOL(intel_gtt_insert_page);
> > +
> >   void intel_gtt_insert_sg_entries(struct sg_table *st,
> >   				 unsigned int pg_start,
> >   				 unsigned int flags)
> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > index 715a771..a64018f 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > @@ -2341,6 +2341,28 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
> >   #endif
> >   }
> >
> > +static void gen8_ggtt_insert_page(struct i915_address_space *vm,
> > +				  dma_addr_t addr,
> > +				  uint64_t offset,
> > +				  enum i915_cache_level level,
> > +				  u32 unused)
> > +{
> > +	struct drm_i915_private *dev_priv = to_i915(vm->dev);
> > +	gen8_pte_t __iomem *pte =
> > +		(gen8_pte_t __iomem *)dev_priv->gtt.gsm +
> > +		(offset >> PAGE_SHIFT);
> > +	int rpm_atomic_seq;
> > +
> > +	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
> > +
> > +	gen8_set_pte(pte, gen8_pte_encode(addr, level, true));
> > +	wmb();
> 
> gen8_ggtt_insert_entries does a read-back of the PTE after having 
> written it with a big fat comment talking about how that could be really 
> important. This is not needed in this path?
As per our discussion with Chris, wmb() is faster than doing a memory
access for reading the PTE.
So, I guess a barrier here should be better to keep things in sync.
> 
> > +
> > +	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
> 
> Why is the posting read not required here as in gen8_ggtt_insert_entries?
I agree with this, a POSTING_READ is required.
> 
> > +
> > +	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
> > +}
> > +
> >   static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
> >   				     struct sg_table *st,
> >   				     uint64_t start,
> > @@ -2412,6 +2434,28 @@ static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm,
> >   	stop_machine(gen8_ggtt_insert_entries__cb, &arg, NULL);
> >   }
> >
> > +static void gen6_ggtt_insert_page(struct i915_address_space *vm,
> > +				  dma_addr_t addr,
> > +				  uint64_t offset,
> > +				  enum i915_cache_level level,
> > +				  u32 flags)
> > +{
> > +	struct drm_i915_private *dev_priv = to_i915(vm->dev);
> > +	gen6_pte_t __iomem *pte =
> > +		(gen6_pte_t __iomem *)dev_priv->gtt.gsm +
> > +		(offset >> PAGE_SHIFT);
> > +	int rpm_atomic_seq;
> > +
> > +	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
> > +
> > +	iowrite32(vm->pte_encode(addr, level, true, flags), pte);
> > +	wmb();
> > +
> > +	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
> > +
> > +	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
> > +}
> 
> Same questions as for the gen8 version.
> 
> Regards,
> 
> Tvrtko

Thanks,
Ankit