[RFC] radeonsi: set a per-buffer flag that disables inter-process sharing

Submitted by Marek Olšák on July 18, 2017, 5:47 p.m.

Details

Message ID 1500400020-11921-1-git-send-email-maraeo@gmail.com
State New
Headers show
Series "radeonsi: set a per-buffer flag that disables inter-process sharing" ( rev: 1 ) in AMD X.Org drivers

Not browsing as part of any series.

Commit Message

Marek Olšák July 18, 2017, 5:47 p.m.
From: Marek Olšák <marek.olsak@amd.com>

for lower overhead in the CS ioctl
---
 src/gallium/drivers/radeon/r600_buffer_common.c | 7 +++++++
 src/gallium/drivers/radeon/radeon_winsys.h      | 1 +
 src/gallium/winsys/amdgpu/drm/amdgpu_bo.c       | 6 ++++++
 3 files changed, 14 insertions(+)

Patch hide | download patch | download mbox

diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c
index dd1c209..2747ac4 100644
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -160,20 +160,27 @@  void r600_init_resource_fields(struct r600_common_screen *rscreen,
 	}
 
 	/* Tiled textures are unmappable. Always put them in VRAM. */
 	if ((res->b.b.target != PIPE_BUFFER && !rtex->surface.is_linear) ||
 	    res->flags & R600_RESOURCE_FLAG_UNMAPPABLE) {
 		res->domains = RADEON_DOMAIN_VRAM;
 		res->flags |= RADEON_FLAG_NO_CPU_ACCESS |
 			 RADEON_FLAG_GTT_WC;
 	}
 
+	/* Only displayable single-sample textures can be shared between
+	 * processes. */
+	if (res->b.b.target == PIPE_BUFFER ||
+	    res->b.b.nr_samples >= 2 ||
+	    rtex->surface.micro_tile_mode != RADEON_MICRO_MODE_DISPLAY)
+		res->flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
+
 	/* If VRAM is just stolen system memory, allow both VRAM and
 	 * GTT, whichever has free space. If a buffer is evicted from
 	 * VRAM to GTT, it will stay there.
 	 *
 	 * DRM 3.6.0 has good BO move throttling, so we can allow VRAM-only
 	 * placements even with a low amount of stolen VRAM.
 	 */
 	if (!rscreen->info.has_dedicated_vram &&
 	    (rscreen->info.drm_major < 3 || rscreen->info.drm_minor < 6) &&
 	    res->domains == RADEON_DOMAIN_VRAM) {
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h
index 351edcd..ce2fd73 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -47,20 +47,21 @@  enum radeon_bo_domain { /* bitfield */
     RADEON_DOMAIN_GTT  = 2,
     RADEON_DOMAIN_VRAM = 4,
     RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT
 };
 
 enum radeon_bo_flag { /* bitfield */
     RADEON_FLAG_GTT_WC =        (1 << 0),
     RADEON_FLAG_NO_CPU_ACCESS = (1 << 1),
     RADEON_FLAG_NO_SUBALLOC =   (1 << 2),
     RADEON_FLAG_SPARSE =        (1 << 3),
+    RADEON_FLAG_NO_INTERPROCESS_SHARING = (1 << 4),
 };
 
 enum radeon_bo_usage { /* bitfield */
     RADEON_USAGE_READ = 2,
     RADEON_USAGE_WRITE = 4,
     RADEON_USAGE_READWRITE = RADEON_USAGE_READ | RADEON_USAGE_WRITE,
 
     /* The winsys ensures that the CS submission will be scheduled after
      * previously flushed CSs referencing this BO in a conflicting way.
      */
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index 97bbe23..f97e1bf 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -31,20 +31,24 @@ 
 
 #include "amdgpu_cs.h"
 
 #include "os/os_time.h"
 #include "state_tracker/drm_driver.h"
 #include <amdgpu_drm.h>
 #include <xf86drm.h>
 #include <stdio.h>
 #include <inttypes.h>
 
+#ifndef AMDGPU_GEM_CREATE_NO_INTERPROCESS_SHARING
+#define AMDGPU_GEM_CREATE_NO_INTERPROCESS_SHARING (1 << 6)
+#endif
+
 /* Set to 1 for verbose output showing committed sparse buffer ranges. */
 #define DEBUG_SPARSE_COMMITS 0
 
 struct amdgpu_sparse_backing_chunk {
    uint32_t begin, end;
 };
 
 static struct pb_buffer *
 amdgpu_bo_create(struct radeon_winsys *rws,
                  uint64_t size,
@@ -395,20 +399,22 @@  static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
 
    if (initial_domain & RADEON_DOMAIN_VRAM)
       request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;
    if (initial_domain & RADEON_DOMAIN_GTT)
       request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
 
    if (flags & RADEON_FLAG_NO_CPU_ACCESS)
       request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
    if (flags & RADEON_FLAG_GTT_WC)
       request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+   if (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING)
+      request.flags |= AMDGPU_GEM_CREATE_NO_INTERPROCESS_SHARING;
 
    r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
    if (r) {
       fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n");
       fprintf(stderr, "amdgpu:    size      : %"PRIu64" bytes\n", size);
       fprintf(stderr, "amdgpu:    alignment : %u bytes\n", alignment);
       fprintf(stderr, "amdgpu:    domains   : %u\n", initial_domain);
       goto error_bo_alloc;
    }
 

Comments

For comments only. There are some assertion failures.

Marek

On Tue, Jul 18, 2017 at 1:47 PM, Marek Olšák <maraeo@gmail.com> wrote:
> From: Marek Olšák <marek.olsak@amd.com>
>
> for lower overhead in the CS ioctl
> ---
>  src/gallium/drivers/radeon/r600_buffer_common.c | 7 +++++++
>  src/gallium/drivers/radeon/radeon_winsys.h      | 1 +
>  src/gallium/winsys/amdgpu/drm/amdgpu_bo.c       | 6 ++++++
>  3 files changed, 14 insertions(+)
>
> diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c
> index dd1c209..2747ac4 100644
> --- a/src/gallium/drivers/radeon/r600_buffer_common.c
> +++ b/src/gallium/drivers/radeon/r600_buffer_common.c
> @@ -160,20 +160,27 @@ void r600_init_resource_fields(struct r600_common_screen *rscreen,
>         }
>
>         /* Tiled textures are unmappable. Always put them in VRAM. */
>         if ((res->b.b.target != PIPE_BUFFER && !rtex->surface.is_linear) ||
>             res->flags & R600_RESOURCE_FLAG_UNMAPPABLE) {
>                 res->domains = RADEON_DOMAIN_VRAM;
>                 res->flags |= RADEON_FLAG_NO_CPU_ACCESS |
>                          RADEON_FLAG_GTT_WC;
>         }
>
> +       /* Only displayable single-sample textures can be shared between
> +        * processes. */
> +       if (res->b.b.target == PIPE_BUFFER ||
> +           res->b.b.nr_samples >= 2 ||
> +           rtex->surface.micro_tile_mode != RADEON_MICRO_MODE_DISPLAY)
> +               res->flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
> +
>         /* If VRAM is just stolen system memory, allow both VRAM and
>          * GTT, whichever has free space. If a buffer is evicted from
>          * VRAM to GTT, it will stay there.
>          *
>          * DRM 3.6.0 has good BO move throttling, so we can allow VRAM-only
>          * placements even with a low amount of stolen VRAM.
>          */
>         if (!rscreen->info.has_dedicated_vram &&
>             (rscreen->info.drm_major < 3 || rscreen->info.drm_minor < 6) &&
>             res->domains == RADEON_DOMAIN_VRAM) {
> diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h
> index 351edcd..ce2fd73 100644
> --- a/src/gallium/drivers/radeon/radeon_winsys.h
> +++ b/src/gallium/drivers/radeon/radeon_winsys.h
> @@ -47,20 +47,21 @@ enum radeon_bo_domain { /* bitfield */
>      RADEON_DOMAIN_GTT  = 2,
>      RADEON_DOMAIN_VRAM = 4,
>      RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT
>  };
>
>  enum radeon_bo_flag { /* bitfield */
>      RADEON_FLAG_GTT_WC =        (1 << 0),
>      RADEON_FLAG_NO_CPU_ACCESS = (1 << 1),
>      RADEON_FLAG_NO_SUBALLOC =   (1 << 2),
>      RADEON_FLAG_SPARSE =        (1 << 3),
> +    RADEON_FLAG_NO_INTERPROCESS_SHARING = (1 << 4),
>  };
>
>  enum radeon_bo_usage { /* bitfield */
>      RADEON_USAGE_READ = 2,
>      RADEON_USAGE_WRITE = 4,
>      RADEON_USAGE_READWRITE = RADEON_USAGE_READ | RADEON_USAGE_WRITE,
>
>      /* The winsys ensures that the CS submission will be scheduled after
>       * previously flushed CSs referencing this BO in a conflicting way.
>       */
> diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
> index 97bbe23..f97e1bf 100644
> --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
> +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
> @@ -31,20 +31,24 @@
>
>  #include "amdgpu_cs.h"
>
>  #include "os/os_time.h"
>  #include "state_tracker/drm_driver.h"
>  #include <amdgpu_drm.h>
>  #include <xf86drm.h>
>  #include <stdio.h>
>  #include <inttypes.h>
>
> +#ifndef AMDGPU_GEM_CREATE_NO_INTERPROCESS_SHARING
> +#define AMDGPU_GEM_CREATE_NO_INTERPROCESS_SHARING (1 << 6)
> +#endif
> +
>  /* Set to 1 for verbose output showing committed sparse buffer ranges. */
>  #define DEBUG_SPARSE_COMMITS 0
>
>  struct amdgpu_sparse_backing_chunk {
>     uint32_t begin, end;
>  };
>
>  static struct pb_buffer *
>  amdgpu_bo_create(struct radeon_winsys *rws,
>                   uint64_t size,
> @@ -395,20 +399,22 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
>
>     if (initial_domain & RADEON_DOMAIN_VRAM)
>        request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;
>     if (initial_domain & RADEON_DOMAIN_GTT)
>        request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
>
>     if (flags & RADEON_FLAG_NO_CPU_ACCESS)
>        request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
>     if (flags & RADEON_FLAG_GTT_WC)
>        request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
> +   if (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING)
> +      request.flags |= AMDGPU_GEM_CREATE_NO_INTERPROCESS_SHARING;
>
>     r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
>     if (r) {
>        fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n");
>        fprintf(stderr, "amdgpu:    size      : %"PRIu64" bytes\n", size);
>        fprintf(stderr, "amdgpu:    alignment : %u bytes\n", alignment);
>        fprintf(stderr, "amdgpu:    domains   : %u\n", initial_domain);
>        goto error_bo_alloc;
>     }
>
> --
> 2.7.4
>