[1/5] drm/ttm: Merge hugepage attr changes in ttm_dma_page_put.

Submitted by Huang, Ray on July 26, 2018, 9:14 a.m.

Details

Message ID 1532596448-17831-2-git-send-email-ray.huang@amd.com
State New
Headers show
Series "drm/ttm: move non-x86 definitions to the common header" ( rev: 1 ) in AMD X.Org drivers

Not browsing as part of any series.

Commit Message

Huang, Ray July 26, 2018, 9:14 a.m.
From: Bas Nieuwenhuizen <basni@chromium.org>

Every set_pages_array_wb call resulted in cross-core
interrupts and TLB flushes. Merge more of them for
less overhead.

This reduces the time needed to free a 1.6 GiB GTT WC
buffer as part of Vulkan CTS from  ~2 sec to < 0.25 sec.
(Allocation still takes more than 2 sec though)

Signed-off-by: Bas Nieuwenhuizen <basni@chromium.org>
Signed-off-by: Huang Rui <ray.huang@amd.com>
---
 drivers/gpu/drm/ttm/ttm_page_alloc_dma.c | 30 +++++++++++++++++++++++-------
 1 file changed, 23 insertions(+), 7 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
index 3f14c1c..27fb543 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
@@ -301,6 +301,25 @@  static int set_pages_array_uc(struct page **pages, int addrinarray)
 #endif
 	return 0;
 }
+
+static int ttm_set_page_range_wb(struct page *p, unsigned long numpages)
+{
+#if IS_ENABLED(CONFIG_AGP)
+        unsigned long i;
+
+        for (i = 0; i < numpages; i++)
+                unmap_page_from_agp(p + i);
+#endif
+	return 0;
+}
+
+#else /* for !CONFIG_X86 */
+
+static int ttm_set_page_range_wb(struct page *p, unsigned long numpages)
+{
+	return set_memory_wb((unsigned long)page_address(p), numpages);
+}
+
 #endif /* for !CONFIG_X86 */
 
 static int ttm_set_pages_caching(struct dma_pool *pool,
@@ -389,17 +408,14 @@  static void ttm_pool_update_free_locked(struct dma_pool *pool,
 static void ttm_dma_page_put(struct dma_pool *pool, struct dma_page *d_page)
 {
 	struct page *page = d_page->p;
-	unsigned i, num_pages;
+	unsigned num_pages;
 
 	/* Don't set WB on WB page pool. */
 	if (!(pool->type & IS_CACHED)) {
 		num_pages = pool->size / PAGE_SIZE;
-		for (i = 0; i < num_pages; ++i, ++page) {
-			if (set_pages_array_wb(&page, 1)) {
-				pr_err("%s: Failed to set %d pages to wb!\n",
-				       pool->dev_name, 1);
-			}
-		}
+		if (ttm_set_page_range_wb(page, num_pages))
+			pr_err("%s: Failed to set %d pages to wb!\n",
+			       pool->dev_name, num_pages);
 	}
 
 	list_del(&d_page->page_list);

Comments

Am 26.07.2018 um 11:14 schrieb Huang Rui:
> From: Bas Nieuwenhuizen <basni@chromium.org>
>
> Every set_pages_array_wb call resulted in cross-core
> interrupts and TLB flushes. Merge more of them for
> less overhead.
>
> This reduces the time needed to free a 1.6 GiB GTT WC
> buffer as part of Vulkan CTS from  ~2 sec to < 0.25 sec.
> (Allocation still takes more than 2 sec though)
>
> Signed-off-by: Bas Nieuwenhuizen <basni@chromium.org>
> Signed-off-by: Huang Rui <ray.huang@amd.com>
> ---
>   drivers/gpu/drm/ttm/ttm_page_alloc_dma.c | 30 +++++++++++++++++++++++-------
>   1 file changed, 23 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
> index 3f14c1c..27fb543 100644
> --- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
> +++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
> @@ -301,6 +301,25 @@ static int set_pages_array_uc(struct page **pages, int addrinarray)
>   #endif
>   	return 0;
>   }
> +
> +static int ttm_set_page_range_wb(struct page *p, unsigned long numpages)
> +{
> +#if IS_ENABLED(CONFIG_AGP)
> +        unsigned long i;
> +
> +        for (i = 0; i < numpages; i++)
> +                unmap_page_from_agp(p + i);
> +#endif
> +	return 0;
> +}
> +
> +#else /* for !CONFIG_X86 */
> +
> +static int ttm_set_page_range_wb(struct page *p, unsigned long numpages)
> +{
> +	return set_memory_wb((unsigned long)page_address(p), numpages);
> +}

Please use set_pages_wb() here instead of set_memory_wb(), this way it 
should also work on 32bit systems.

Christian.

> +
>   #endif /* for !CONFIG_X86 */
>   
>   static int ttm_set_pages_caching(struct dma_pool *pool,
> @@ -389,17 +408,14 @@ static void ttm_pool_update_free_locked(struct dma_pool *pool,
>   static void ttm_dma_page_put(struct dma_pool *pool, struct dma_page *d_page)
>   {
>   	struct page *page = d_page->p;
> -	unsigned i, num_pages;
> +	unsigned num_pages;
>   
>   	/* Don't set WB on WB page pool. */
>   	if (!(pool->type & IS_CACHED)) {
>   		num_pages = pool->size / PAGE_SIZE;
> -		for (i = 0; i < num_pages; ++i, ++page) {
> -			if (set_pages_array_wb(&page, 1)) {
> -				pr_err("%s: Failed to set %d pages to wb!\n",
> -				       pool->dev_name, 1);
> -			}
> -		}
> +		if (ttm_set_page_range_wb(page, num_pages))
> +			pr_err("%s: Failed to set %d pages to wb!\n",
> +			       pool->dev_name, num_pages);
>   	}
>   
>   	list_del(&d_page->page_list);
On Thu, Jul 26, 2018 at 11:39 AM Huang Rui <ray.huang@amd.com> wrote:
>
> On Thu, Jul 26, 2018 at 05:28:53PM +0800, Christian König wrote:
> > Am 26.07.2018 um 11:14 schrieb Huang Rui:
> > > From: Bas Nieuwenhuizen <basni@chromium.org>
> > >
> > > Every set_pages_array_wb call resulted in cross-core
> > > interrupts and TLB flushes. Merge more of them for
> > > less overhead.
> > >
> > > This reduces the time needed to free a 1.6 GiB GTT WC
> > > buffer as part of Vulkan CTS from  ~2 sec to < 0.25 sec.
> > > (Allocation still takes more than 2 sec though)
> > >
> > > Signed-off-by: Bas Nieuwenhuizen <basni@chromium.org>
> > > Signed-off-by: Huang Rui <ray.huang@amd.com>
> > > ---
> > >   drivers/gpu/drm/ttm/ttm_page_alloc_dma.c | 30 +++++++++++++++++++++++-------
> > >   1 file changed, 23 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
> > > index 3f14c1c..27fb543 100644
> > > --- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
> > > +++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
> > > @@ -301,6 +301,25 @@ static int set_pages_array_uc(struct page **pages, int addrinarray)
> > >   #endif
> > >     return 0;
> > >   }
> > > +
> > > +static int ttm_set_page_range_wb(struct page *p, unsigned long numpages)
> > > +{
> > > +#if IS_ENABLED(CONFIG_AGP)
> > > +        unsigned long i;
> > > +
> > > +        for (i = 0; i < numpages; i++)
> > > +                unmap_page_from_agp(p + i);
> > > +#endif
> > > +   return 0;
> > > +}
> > > +
> > > +#else /* for !CONFIG_X86 */
> > > +
> > > +static int ttm_set_page_range_wb(struct page *p, unsigned long numpages)
> > > +{
> > > +   return set_memory_wb((unsigned long)page_address(p), numpages);
> > > +}
> >
> > Please use set_pages_wb() here instead of set_memory_wb(), this way it
> > should also work on 32bit systems.
> >
>
> Actually, I fix it in patch 5 ("drm/ttm: use set_pages_wb instead of
> set_memory_wb"). So should I sequeeze it int this patch? There is SMTP
> network issue in SRDC, other patches are timeout...

I'd say fix it up in this patch (You can avoid introducing
ttm_set_page_range_wb that way as it is equal to set_pages_wb) and
then put this patch at the end of your series. Since ttm_page_alloc.c
already has a set_pages_wb fallback implementation that means we do
not introduce code just to deduplicate it later in the series anymore.

>
> [Net::SMTP] Timeout at /usr/lib/git-core/git-send-email line 1393.
>
> Let me resend them into dri-devel as well.
>
> Thanks,
> Ray
On Thu, Jul 26, 2018 at 05:28:53PM +0800, Christian König wrote:
> Am 26.07.2018 um 11:14 schrieb Huang Rui:
> > From: Bas Nieuwenhuizen <basni@chromium.org>
> >
> > Every set_pages_array_wb call resulted in cross-core
> > interrupts and TLB flushes. Merge more of them for
> > less overhead.
> >
> > This reduces the time needed to free a 1.6 GiB GTT WC
> > buffer as part of Vulkan CTS from  ~2 sec to < 0.25 sec.
> > (Allocation still takes more than 2 sec though)
> >
> > Signed-off-by: Bas Nieuwenhuizen <basni@chromium.org>
> > Signed-off-by: Huang Rui <ray.huang@amd.com>
> > ---
> >   drivers/gpu/drm/ttm/ttm_page_alloc_dma.c | 30 +++++++++++++++++++++++-------
> >   1 file changed, 23 insertions(+), 7 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
> > index 3f14c1c..27fb543 100644
> > --- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
> > +++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
> > @@ -301,6 +301,25 @@ static int set_pages_array_uc(struct page **pages, int addrinarray)
> >   #endif
> >   	return 0;
> >   }
> > +
> > +static int ttm_set_page_range_wb(struct page *p, unsigned long numpages)
> > +{
> > +#if IS_ENABLED(CONFIG_AGP)
> > +        unsigned long i;
> > +
> > +        for (i = 0; i < numpages; i++)
> > +                unmap_page_from_agp(p + i);
> > +#endif
> > +	return 0;
> > +}
> > +
> > +#else /* for !CONFIG_X86 */
> > +
> > +static int ttm_set_page_range_wb(struct page *p, unsigned long numpages)
> > +{
> > +	return set_memory_wb((unsigned long)page_address(p), numpages);
> > +}
> 
> Please use set_pages_wb() here instead of set_memory_wb(), this way it 
> should also work on 32bit systems.
> 

Actually, I fix it in patch 5 ("drm/ttm: use set_pages_wb instead of
set_memory_wb"). So should I sequeeze it int this patch? There is SMTP
network issue in SRDC, other patches are timeout...

[Net::SMTP] Timeout at /usr/lib/git-core/git-send-email line 1393.

Let me resend them into dri-devel as well.

Thanks,
Ray
On Thu, Jul 26, 2018 at 05:43:10PM +0800, Bas Nieuwenhuizen wrote:
> On Thu, Jul 26, 2018 at 11:39 AM Huang Rui <ray.huang@amd.com> wrote:
> >
> > On Thu, Jul 26, 2018 at 05:28:53PM +0800, Christian König wrote:
> > > Am 26.07.2018 um 11:14 schrieb Huang Rui:
> > > > From: Bas Nieuwenhuizen <basni@chromium.org>
> > > >
> > > > Every set_pages_array_wb call resulted in cross-core
> > > > interrupts and TLB flushes. Merge more of them for
> > > > less overhead.
> > > >
> > > > This reduces the time needed to free a 1.6 GiB GTT WC
> > > > buffer as part of Vulkan CTS from  ~2 sec to < 0.25 sec.
> > > > (Allocation still takes more than 2 sec though)
> > > >
> > > > Signed-off-by: Bas Nieuwenhuizen <basni@chromium.org>
> > > > Signed-off-by: Huang Rui <ray.huang@amd.com>
> > > > ---
> > > >   drivers/gpu/drm/ttm/ttm_page_alloc_dma.c | 30 +++++++++++++++++++++++-------
> > > >   1 file changed, 23 insertions(+), 7 deletions(-)
> > > >
> > > > diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
> > > > index 3f14c1c..27fb543 100644
> > > > --- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
> > > > +++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
> > > > @@ -301,6 +301,25 @@ static int set_pages_array_uc(struct page **pages, int addrinarray)
> > > >   #endif
> > > >     return 0;
> > > >   }
> > > > +
> > > > +static int ttm_set_page_range_wb(struct page *p, unsigned long numpages)
> > > > +{
> > > > +#if IS_ENABLED(CONFIG_AGP)
> > > > +        unsigned long i;
> > > > +
> > > > +        for (i = 0; i < numpages; i++)
> > > > +                unmap_page_from_agp(p + i);
> > > > +#endif
> > > > +   return 0;
> > > > +}
> > > > +
> > > > +#else /* for !CONFIG_X86 */
> > > > +
> > > > +static int ttm_set_page_range_wb(struct page *p, unsigned long numpages)
> > > > +{
> > > > +   return set_memory_wb((unsigned long)page_address(p), numpages);
> > > > +}
> > >
> > > Please use set_pages_wb() here instead of set_memory_wb(), this way it
> > > should also work on 32bit systems.
> > >
> >
> > Actually, I fix it in patch 5 ("drm/ttm: use set_pages_wb instead of
> > set_memory_wb"). So should I sequeeze it int this patch? There is SMTP
> > network issue in SRDC, other patches are timeout...
> 
> I'd say fix it up in this patch (You can avoid introducing
> ttm_set_page_range_wb that way as it is equal to set_pages_wb) and
> then put this patch at the end of your series. Since ttm_page_alloc.c
> already has a set_pages_wb fallback implementation that means we do
> not introduce code just to deduplicate it later in the series anymore.
> 

That makes sense. :-)
Will update it in V2.

Thanks,
Ray

> >
> > [Net::SMTP] Timeout at /usr/lib/git-core/git-send-email line 1393.
> >
> > Let me resend them into dri-devel as well.
> >
> > Thanks,
> > Ray