[6/8] radeonsi: use WRITE_DATA for small glBufferSubData sizes

Submitted by Marek Olšák on Jan. 18, 2019, 4:43 p.m.

Details

Message ID 20190118164359.19461-7-maraeo@gmail.com
State New
Headers show
Series "RadeonSI: PKT3_WRITE_DATA for small uploads" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Marek Olšák Jan. 18, 2019, 4:43 p.m.
From: Marek Olšák <marek.olsak@amd.com>

---
 src/gallium/drivers/radeonsi/si_buffer.c | 27 ++++++++++++++++++++++++
 src/gallium/drivers/radeonsi/si_pipe.h   |  1 +
 2 files changed, 28 insertions(+)

Patch hide | download patch | download mbox

diff --git a/src/gallium/drivers/radeonsi/si_buffer.c b/src/gallium/drivers/radeonsi/si_buffer.c
index 4766cf4bdfa..a1e421b8b0d 100644
--- a/src/gallium/drivers/radeonsi/si_buffer.c
+++ b/src/gallium/drivers/radeonsi/si_buffer.c
@@ -16,20 +16,22 @@ 
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 
 #include "radeonsi/si_pipe.h"
+#include "sid.h"
+
 #include "util/u_memory.h"
 #include "util/u_upload_mgr.h"
 #include "util/u_transfer.h"
 #include <inttypes.h>
 #include <stdio.h>
 
 bool si_rings_is_buffer_referenced(struct si_context *sctx,
 				   struct pb_buffer *buf,
 				   enum radeon_bo_usage usage)
 {
@@ -506,20 +508,38 @@  static void *si_buffer_transfer_map(struct pipe_context *ctx,
 	data = si_buffer_map_sync_with_rings(sctx, rbuffer, usage);
 	if (!data) {
 		return NULL;
 	}
 	data += box->x;
 
 	return si_buffer_get_transfer(ctx, resource, usage, box,
 					ptransfer, data, NULL, 0);
 }
 
+static void si_buffer_write_data(struct si_context *sctx, struct r600_resource *buf,
+				 unsigned offset, unsigned size, const void *data)
+{
+	struct radeon_cmdbuf *cs = sctx->gfx_cs;
+
+	si_need_gfx_cs_space(sctx);
+
+	sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
+		       SI_CONTEXT_CS_PARTIAL_FLUSH |
+		       si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_LRU);
+	si_emit_cache_flush(sctx);
+
+	si_cp_write_data(sctx, buf, offset, size, V_370_TC_L2, V_370_ME, data);
+
+	radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
+	radeon_emit(cs, 0);
+}
+
 static void si_buffer_do_flush_region(struct pipe_context *ctx,
 				      struct pipe_transfer *transfer,
 				      const struct pipe_box *box)
 {
 	struct si_transfer *stransfer = (struct si_transfer*)transfer;
 	struct r600_resource *rbuffer = r600_resource(transfer->resource);
 
 	if (stransfer->u.staging) {
 		/* Copy the staging buffer into the original one. */
 		si_copy_buffer((struct si_context*)ctx, transfer->resource,
@@ -568,20 +588,27 @@  static void si_buffer_transfer_unmap(struct pipe_context *ctx,
 
 static void si_buffer_subdata(struct pipe_context *ctx,
 			      struct pipe_resource *buffer,
 			      unsigned usage, unsigned offset,
 			      unsigned size, const void *data)
 {
 	struct pipe_transfer *transfer = NULL;
 	struct pipe_box box;
 	uint8_t *map = NULL;
 
+	if (size <= SI_TRANSFER_WRITE_DATA_THRESHOLD &&
+	    offset % 4 == 0 && size % 4 == 0 && (uintptr_t)data % 4 == 0) {
+		si_buffer_write_data((struct si_context*)ctx,
+				     r600_resource(buffer), offset, size, data);
+		return;
+	}
+
 	u_box_1d(offset, size, &box);
 	map = si_buffer_transfer_map(ctx, buffer, 0,
 				       PIPE_TRANSFER_WRITE |
 				       PIPE_TRANSFER_DISCARD_RANGE |
 				       usage,
 				       &box, &transfer);
 	if (!map)
 		return;
 
 	memcpy(map, data, size);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 5bd3d9641d2..f79828f3438 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -94,20 +94,21 @@ 
 #define SI_PREFETCH_ES			(1 << 3)
 #define SI_PREFETCH_GS			(1 << 4)
 #define SI_PREFETCH_VS			(1 << 5)
 #define SI_PREFETCH_PS			(1 << 6)
 
 #define SI_MAX_BORDER_COLORS		4096
 #define SI_MAX_VIEWPORTS		16
 #define SIX_BITS			0x3F
 #define SI_MAP_BUFFER_ALIGNMENT		64
 #define SI_MAX_VARIABLE_THREADS_PER_BLOCK 1024
+#define SI_TRANSFER_WRITE_DATA_THRESHOLD 64
 
 #define SI_RESOURCE_FLAG_TRANSFER	(PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
 #define SI_RESOURCE_FLAG_FLUSHED_DEPTH	(PIPE_RESOURCE_FLAG_DRV_PRIV << 1)
 #define SI_RESOURCE_FLAG_FORCE_MSAA_TILING (PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
 #define SI_RESOURCE_FLAG_DISABLE_DCC	(PIPE_RESOURCE_FLAG_DRV_PRIV << 3)
 #define SI_RESOURCE_FLAG_UNMAPPABLE	(PIPE_RESOURCE_FLAG_DRV_PRIV << 4)
 #define SI_RESOURCE_FLAG_READ_ONLY	(PIPE_RESOURCE_FLAG_DRV_PRIV << 5)
 #define SI_RESOURCE_FLAG_32BIT		(PIPE_RESOURCE_FLAG_DRV_PRIV << 6)
 #define SI_RESOURCE_FLAG_SO_FILLED_SIZE	(PIPE_RESOURCE_FLAG_DRV_PRIV << 7)
 

Comments

On Fri, Jan 18, 2019 at 5:44 PM Marek Olšák <maraeo@gmail.com> wrote:
>
> From: Marek Olšák <marek.olsak@amd.com>
>
> ---
>  src/gallium/drivers/radeonsi/si_buffer.c | 27 ++++++++++++++++++++++++
>  src/gallium/drivers/radeonsi/si_pipe.h   |  1 +
>  2 files changed, 28 insertions(+)
>
> diff --git a/src/gallium/drivers/radeonsi/si_buffer.c b/src/gallium/drivers/radeonsi/si_buffer.c
> index 4766cf4bdfa..a1e421b8b0d 100644
> --- a/src/gallium/drivers/radeonsi/si_buffer.c
> +++ b/src/gallium/drivers/radeonsi/si_buffer.c
> @@ -16,20 +16,22 @@
>   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>   * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
>   * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
>   * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
>   * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
>   * USE OR OTHER DEALINGS IN THE SOFTWARE.
>   */
>
>  #include "radeonsi/si_pipe.h"
> +#include "sid.h"
> +
>  #include "util/u_memory.h"
>  #include "util/u_upload_mgr.h"
>  #include "util/u_transfer.h"
>  #include <inttypes.h>
>  #include <stdio.h>
>
>  bool si_rings_is_buffer_referenced(struct si_context *sctx,
>                                    struct pb_buffer *buf,
>                                    enum radeon_bo_usage usage)
>  {
> @@ -506,20 +508,38 @@ static void *si_buffer_transfer_map(struct pipe_context *ctx,
>         data = si_buffer_map_sync_with_rings(sctx, rbuffer, usage);
>         if (!data) {
>                 return NULL;
>         }
>         data += box->x;
>
>         return si_buffer_get_transfer(ctx, resource, usage, box,
>                                         ptransfer, data, NULL, 0);
>  }
>
> +static void si_buffer_write_data(struct si_context *sctx, struct r600_resource *buf,
> +                                unsigned offset, unsigned size, const void *data)
> +{
> +       struct radeon_cmdbuf *cs = sctx->gfx_cs;
> +
> +       si_need_gfx_cs_space(sctx);
> +
> +       sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
> +                      SI_CONTEXT_CS_PARTIAL_FLUSH |
> +                      si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_LRU);
> +       si_emit_cache_flush(sctx);

Maybe only do the cache flush if the buffer is referenced by the
current cmd buffer?

(I'm kinda surprised reading this that we don't do
DISCARD_WHOLE_RESOURCE if the offset is 0 and the size equal to the
buffer size. )

> +
> +       si_cp_write_data(sctx, buf, offset, size, V_370_TC_L2, V_370_ME, data);
> +
> +       radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
> +       radeon_emit(cs, 0);
> +}
> +
>  static void si_buffer_do_flush_region(struct pipe_context *ctx,
>                                       struct pipe_transfer *transfer,
>                                       const struct pipe_box *box)
>  {
>         struct si_transfer *stransfer = (struct si_transfer*)transfer;
>         struct r600_resource *rbuffer = r600_resource(transfer->resource);
>
>         if (stransfer->u.staging) {
>                 /* Copy the staging buffer into the original one. */
>                 si_copy_buffer((struct si_context*)ctx, transfer->resource,
> @@ -568,20 +588,27 @@ static void si_buffer_transfer_unmap(struct pipe_context *ctx,
>
>  static void si_buffer_subdata(struct pipe_context *ctx,
>                               struct pipe_resource *buffer,
>                               unsigned usage, unsigned offset,
>                               unsigned size, const void *data)
>  {
>         struct pipe_transfer *transfer = NULL;
>         struct pipe_box box;
>         uint8_t *map = NULL;
>
> +       if (size <= SI_TRANSFER_WRITE_DATA_THRESHOLD &&
> +           offset % 4 == 0 && size % 4 == 0 && (uintptr_t)data % 4 == 0) {
> +               si_buffer_write_data((struct si_context*)ctx,
> +                                    r600_resource(buffer), offset, size, data);
> +               return;
> +       }
> +
>         u_box_1d(offset, size, &box);
>         map = si_buffer_transfer_map(ctx, buffer, 0,
>                                        PIPE_TRANSFER_WRITE |
>                                        PIPE_TRANSFER_DISCARD_RANGE |
>                                        usage,
>                                        &box, &transfer);
>         if (!map)
>                 return;
>
>         memcpy(map, data, size);
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
> index 5bd3d9641d2..f79828f3438 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.h
> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
> @@ -94,20 +94,21 @@
>  #define SI_PREFETCH_ES                 (1 << 3)
>  #define SI_PREFETCH_GS                 (1 << 4)
>  #define SI_PREFETCH_VS                 (1 << 5)
>  #define SI_PREFETCH_PS                 (1 << 6)
>
>  #define SI_MAX_BORDER_COLORS           4096
>  #define SI_MAX_VIEWPORTS               16
>  #define SIX_BITS                       0x3F
>  #define SI_MAP_BUFFER_ALIGNMENT                64
>  #define SI_MAX_VARIABLE_THREADS_PER_BLOCK 1024
> +#define SI_TRANSFER_WRITE_DATA_THRESHOLD 64
>
>  #define SI_RESOURCE_FLAG_TRANSFER      (PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
>  #define SI_RESOURCE_FLAG_FLUSHED_DEPTH (PIPE_RESOURCE_FLAG_DRV_PRIV << 1)
>  #define SI_RESOURCE_FLAG_FORCE_MSAA_TILING (PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
>  #define SI_RESOURCE_FLAG_DISABLE_DCC   (PIPE_RESOURCE_FLAG_DRV_PRIV << 3)
>  #define SI_RESOURCE_FLAG_UNMAPPABLE    (PIPE_RESOURCE_FLAG_DRV_PRIV << 4)
>  #define SI_RESOURCE_FLAG_READ_ONLY     (PIPE_RESOURCE_FLAG_DRV_PRIV << 5)
>  #define SI_RESOURCE_FLAG_32BIT         (PIPE_RESOURCE_FLAG_DRV_PRIV << 6)
>  #define SI_RESOURCE_FLAG_SO_FILLED_SIZE        (PIPE_RESOURCE_FLAG_DRV_PRIV << 7)
>
> --
> 2.17.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
On Fri, Jan 18, 2019 at 6:05 PM Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
wrote:

> On Fri, Jan 18, 2019 at 5:44 PM Marek Olšák <maraeo@gmail.com> wrote:
> >
> > From: Marek Olšák <marek.olsak@amd.com>
> >
> > ---
> >  src/gallium/drivers/radeonsi/si_buffer.c | 27 ++++++++++++++++++++++++
> >  src/gallium/drivers/radeonsi/si_pipe.h   |  1 +
> >  2 files changed, 28 insertions(+)
> >
> > diff --git a/src/gallium/drivers/radeonsi/si_buffer.c
> b/src/gallium/drivers/radeonsi/si_buffer.c
> > index 4766cf4bdfa..a1e421b8b0d 100644
> > --- a/src/gallium/drivers/radeonsi/si_buffer.c
> > +++ b/src/gallium/drivers/radeonsi/si_buffer.c
> > @@ -16,20 +16,22 @@
> >   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR
> >   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> >   * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT
> SHALL
> >   * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
> >   * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
> >   * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
> OR THE
> >   * USE OR OTHER DEALINGS IN THE SOFTWARE.
> >   */
> >
> >  #include "radeonsi/si_pipe.h"
> > +#include "sid.h"
> > +
> >  #include "util/u_memory.h"
> >  #include "util/u_upload_mgr.h"
> >  #include "util/u_transfer.h"
> >  #include <inttypes.h>
> >  #include <stdio.h>
> >
> >  bool si_rings_is_buffer_referenced(struct si_context *sctx,
> >                                    struct pb_buffer *buf,
> >                                    enum radeon_bo_usage usage)
> >  {
> > @@ -506,20 +508,38 @@ static void *si_buffer_transfer_map(struct
> pipe_context *ctx,
> >         data = si_buffer_map_sync_with_rings(sctx, rbuffer, usage);
> >         if (!data) {
> >                 return NULL;
> >         }
> >         data += box->x;
> >
> >         return si_buffer_get_transfer(ctx, resource, usage, box,
> >                                         ptransfer, data, NULL, 0);
> >  }
> >
> > +static void si_buffer_write_data(struct si_context *sctx, struct
> r600_resource *buf,
> > +                                unsigned offset, unsigned size, const
> void *data)
> > +{
> > +       struct radeon_cmdbuf *cs = sctx->gfx_cs;
> > +
> > +       si_need_gfx_cs_space(sctx);
> > +
> > +       sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
> > +                      SI_CONTEXT_CS_PARTIAL_FLUSH |
> > +                      si_get_flush_flags(sctx, SI_COHERENCY_SHADER,
> L2_LRU);
> > +       si_emit_cache_flush(sctx);
>
> Maybe only do the cache flush if the buffer is referenced by the
> current cmd buffer?
>
> (I'm kinda surprised reading this that we don't do
> DISCARD_WHOLE_RESOURCE if the offset is 0 and the size equal to the
> buffer size. )
>

Thanks, that's a good point. We promote to DISCARD_WHOLE_RESOURCE in
si_buffer_transfer_map. I'll remove the hunk in si_buffer_subdata, because
it doesn't do optimizations that si_buffer_transfer_map does.

Marek


> > +
> > +       si_cp_write_data(sctx, buf, offset, size, V_370_TC_L2, V_370_ME,
> data);
> > +
> > +       radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
> > +       radeon_emit(cs, 0);
> > +}
> > +
> >  static void si_buffer_do_flush_region(struct pipe_context *ctx,
> >                                       struct pipe_transfer *transfer,
> >                                       const struct pipe_box *box)
> >  {
> >         struct si_transfer *stransfer = (struct si_transfer*)transfer;
> >         struct r600_resource *rbuffer =
> r600_resource(transfer->resource);
> >
> >         if (stransfer->u.staging) {
> >                 /* Copy the staging buffer into the original one. */
> >                 si_copy_buffer((struct si_context*)ctx,
> transfer->resource,
> > @@ -568,20 +588,27 @@ static void si_buffer_transfer_unmap(struct
> pipe_context *ctx,
> >
> >  static void si_buffer_subdata(struct pipe_context *ctx,
> >                               struct pipe_resource *buffer,
> >                               unsigned usage, unsigned offset,
> >                               unsigned size, const void *data)
> >  {
> >         struct pipe_transfer *transfer = NULL;
> >         struct pipe_box box;
> >         uint8_t *map = NULL;
> >
> > +       if (size <= SI_TRANSFER_WRITE_DATA_THRESHOLD &&
> > +           offset % 4 == 0 && size % 4 == 0 && (uintptr_t)data % 4 ==
> 0) {
> > +               si_buffer_write_data((struct si_context*)ctx,
> > +                                    r600_resource(buffer), offset,
> size, data);
> > +               return;
> > +       }
> > +
> >         u_box_1d(offset, size, &box);
> >         map = si_buffer_transfer_map(ctx, buffer, 0,
> >                                        PIPE_TRANSFER_WRITE |
> >                                        PIPE_TRANSFER_DISCARD_RANGE |
> >                                        usage,
> >                                        &box, &transfer);
> >         if (!map)
> >                 return;
> >
> >         memcpy(map, data, size);
> > diff --git a/src/gallium/drivers/radeonsi/si_pipe.h
> b/src/gallium/drivers/radeonsi/si_pipe.h
> > index 5bd3d9641d2..f79828f3438 100644
> > --- a/src/gallium/drivers/radeonsi/si_pipe.h
> > +++ b/src/gallium/drivers/radeonsi/si_pipe.h
> > @@ -94,20 +94,21 @@
> >  #define SI_PREFETCH_ES                 (1 << 3)
> >  #define SI_PREFETCH_GS                 (1 << 4)
> >  #define SI_PREFETCH_VS                 (1 << 5)
> >  #define SI_PREFETCH_PS                 (1 << 6)
> >
> >  #define SI_MAX_BORDER_COLORS           4096
> >  #define SI_MAX_VIEWPORTS               16
> >  #define SIX_BITS                       0x3F
> >  #define SI_MAP_BUFFER_ALIGNMENT                64
> >  #define SI_MAX_VARIABLE_THREADS_PER_BLOCK 1024
> > +#define SI_TRANSFER_WRITE_DATA_THRESHOLD 64
> >
> >  #define SI_RESOURCE_FLAG_TRANSFER      (PIPE_RESOURCE_FLAG_DRV_PRIV <<
> 0)
> >  #define SI_RESOURCE_FLAG_FLUSHED_DEPTH (PIPE_RESOURCE_FLAG_DRV_PRIV <<
> 1)
> >  #define SI_RESOURCE_FLAG_FORCE_MSAA_TILING (PIPE_RESOURCE_FLAG_DRV_PRIV
> << 2)
> >  #define SI_RESOURCE_FLAG_DISABLE_DCC   (PIPE_RESOURCE_FLAG_DRV_PRIV <<
> 3)
> >  #define SI_RESOURCE_FLAG_UNMAPPABLE    (PIPE_RESOURCE_FLAG_DRV_PRIV <<
> 4)
> >  #define SI_RESOURCE_FLAG_READ_ONLY     (PIPE_RESOURCE_FLAG_DRV_PRIV <<
> 5)
> >  #define SI_RESOURCE_FLAG_32BIT         (PIPE_RESOURCE_FLAG_DRV_PRIV <<
> 6)
> >  #define SI_RESOURCE_FLAG_SO_FILLED_SIZE
> (PIPE_RESOURCE_FLAG_DRV_PRIV << 7)
> >
> > --
> > 2.17.1
> >
> > _______________________________________________
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
On Fri, Jan 18, 2019 at 6:05 PM Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
wrote:

> On Fri, Jan 18, 2019 at 5:44 PM Marek Olšák <maraeo@gmail.com> wrote:
> >
> > From: Marek Olšák <marek.olsak@amd.com>
> >
> > ---
> >  src/gallium/drivers/radeonsi/si_buffer.c | 27 ++++++++++++++++++++++++
> >  src/gallium/drivers/radeonsi/si_pipe.h   |  1 +
> >  2 files changed, 28 insertions(+)
> >
> > diff --git a/src/gallium/drivers/radeonsi/si_buffer.c
> b/src/gallium/drivers/radeonsi/si_buffer.c
> > index 4766cf4bdfa..a1e421b8b0d 100644
> > --- a/src/gallium/drivers/radeonsi/si_buffer.c
> > +++ b/src/gallium/drivers/radeonsi/si_buffer.c
> > @@ -16,20 +16,22 @@
> >   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR
> >   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> >   * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT
> SHALL
> >   * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
> >   * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
> >   * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
> OR THE
> >   * USE OR OTHER DEALINGS IN THE SOFTWARE.
> >   */
> >
> >  #include "radeonsi/si_pipe.h"
> > +#include "sid.h"
> > +
> >  #include "util/u_memory.h"
> >  #include "util/u_upload_mgr.h"
> >  #include "util/u_transfer.h"
> >  #include <inttypes.h>
> >  #include <stdio.h>
> >
> >  bool si_rings_is_buffer_referenced(struct si_context *sctx,
> >                                    struct pb_buffer *buf,
> >                                    enum radeon_bo_usage usage)
> >  {
> > @@ -506,20 +508,38 @@ static void *si_buffer_transfer_map(struct
> pipe_context *ctx,
> >         data = si_buffer_map_sync_with_rings(sctx, rbuffer, usage);
> >         if (!data) {
> >                 return NULL;
> >         }
> >         data += box->x;
> >
> >         return si_buffer_get_transfer(ctx, resource, usage, box,
> >                                         ptransfer, data, NULL, 0);
> >  }
> >
> > +static void si_buffer_write_data(struct si_context *sctx, struct
> r600_resource *buf,
> > +                                unsigned offset, unsigned size, const
> void *data)
> > +{
> > +       struct radeon_cmdbuf *cs = sctx->gfx_cs;
> > +
> > +       si_need_gfx_cs_space(sctx);
> > +
> > +       sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
> > +                      SI_CONTEXT_CS_PARTIAL_FLUSH |
> > +                      si_get_flush_flags(sctx, SI_COHERENCY_SHADER,
> L2_LRU);
> > +       si_emit_cache_flush(sctx);
>
> Maybe only do the cache flush if the buffer is referenced by the
> current cmd buffer?
>

We can't do that, because 2 consecutive IBs can execute simultaneously for
a moment.

Marek
Ack, patches 1-6 are

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>

On Sat, Jan 19, 2019 at 2:08 AM Marek Olšák <maraeo@gmail.com> wrote:
>
> On Fri, Jan 18, 2019 at 6:05 PM Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> wrote:
>>
>> On Fri, Jan 18, 2019 at 5:44 PM Marek Olšák <maraeo@gmail.com> wrote:
>> >
>> > From: Marek Olšák <marek.olsak@amd.com>
>> >
>> > ---
>> >  src/gallium/drivers/radeonsi/si_buffer.c | 27 ++++++++++++++++++++++++
>> >  src/gallium/drivers/radeonsi/si_pipe.h   |  1 +
>> >  2 files changed, 28 insertions(+)
>> >
>> > diff --git a/src/gallium/drivers/radeonsi/si_buffer.c b/src/gallium/drivers/radeonsi/si_buffer.c
>> > index 4766cf4bdfa..a1e421b8b0d 100644
>> > --- a/src/gallium/drivers/radeonsi/si_buffer.c
>> > +++ b/src/gallium/drivers/radeonsi/si_buffer.c
>> > @@ -16,20 +16,22 @@
>> >   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> >   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> >   * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
>> >   * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
>> >   * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
>> >   * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
>> >   * USE OR OTHER DEALINGS IN THE SOFTWARE.
>> >   */
>> >
>> >  #include "radeonsi/si_pipe.h"
>> > +#include "sid.h"
>> > +
>> >  #include "util/u_memory.h"
>> >  #include "util/u_upload_mgr.h"
>> >  #include "util/u_transfer.h"
>> >  #include <inttypes.h>
>> >  #include <stdio.h>
>> >
>> >  bool si_rings_is_buffer_referenced(struct si_context *sctx,
>> >                                    struct pb_buffer *buf,
>> >                                    enum radeon_bo_usage usage)
>> >  {
>> > @@ -506,20 +508,38 @@ static void *si_buffer_transfer_map(struct pipe_context *ctx,
>> >         data = si_buffer_map_sync_with_rings(sctx, rbuffer, usage);
>> >         if (!data) {
>> >                 return NULL;
>> >         }
>> >         data += box->x;
>> >
>> >         return si_buffer_get_transfer(ctx, resource, usage, box,
>> >                                         ptransfer, data, NULL, 0);
>> >  }
>> >
>> > +static void si_buffer_write_data(struct si_context *sctx, struct r600_resource *buf,
>> > +                                unsigned offset, unsigned size, const void *data)
>> > +{
>> > +       struct radeon_cmdbuf *cs = sctx->gfx_cs;
>> > +
>> > +       si_need_gfx_cs_space(sctx);
>> > +
>> > +       sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
>> > +                      SI_CONTEXT_CS_PARTIAL_FLUSH |
>> > +                      si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_LRU);
>> > +       si_emit_cache_flush(sctx);
>>
>> Maybe only do the cache flush if the buffer is referenced by the
>> current cmd buffer?
>
>
> We can't do that, because 2 consecutive IBs can execute simultaneously for a moment.
>
> Marek
FYI, I'm squashing this patch into patch 8.

Marek

On Fri, Jan 18, 2019 at 8:23 PM Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
wrote:

> Ack, patches 1-6 are
>
> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
>
> On Sat, Jan 19, 2019 at 2:08 AM Marek Olšák <maraeo@gmail.com> wrote:
> >
> > On Fri, Jan 18, 2019 at 6:05 PM Bas Nieuwenhuizen <
> bas@basnieuwenhuizen.nl> wrote:
> >>
> >> On Fri, Jan 18, 2019 at 5:44 PM Marek Olšák <maraeo@gmail.com> wrote:
> >> >
> >> > From: Marek Olšák <marek.olsak@amd.com>
> >> >
> >> > ---
> >> >  src/gallium/drivers/radeonsi/si_buffer.c | 27
> ++++++++++++++++++++++++
> >> >  src/gallium/drivers/radeonsi/si_pipe.h   |  1 +
> >> >  2 files changed, 28 insertions(+)
> >> >
> >> > diff --git a/src/gallium/drivers/radeonsi/si_buffer.c
> b/src/gallium/drivers/radeonsi/si_buffer.c
> >> > index 4766cf4bdfa..a1e421b8b0d 100644
> >> > --- a/src/gallium/drivers/radeonsi/si_buffer.c
> >> > +++ b/src/gallium/drivers/radeonsi/si_buffer.c
> >> > @@ -16,20 +16,22 @@
> >> >   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR
> >> >   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> >> >   * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO
> EVENT SHALL
> >> >   * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
> >> >   * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
> TORT OR
> >> >   * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
> SOFTWARE OR THE
> >> >   * USE OR OTHER DEALINGS IN THE SOFTWARE.
> >> >   */
> >> >
> >> >  #include "radeonsi/si_pipe.h"
> >> > +#include "sid.h"
> >> > +
> >> >  #include "util/u_memory.h"
> >> >  #include "util/u_upload_mgr.h"
> >> >  #include "util/u_transfer.h"
> >> >  #include <inttypes.h>
> >> >  #include <stdio.h>
> >> >
> >> >  bool si_rings_is_buffer_referenced(struct si_context *sctx,
> >> >                                    struct pb_buffer *buf,
> >> >                                    enum radeon_bo_usage usage)
> >> >  {
> >> > @@ -506,20 +508,38 @@ static void *si_buffer_transfer_map(struct
> pipe_context *ctx,
> >> >         data = si_buffer_map_sync_with_rings(sctx, rbuffer, usage);
> >> >         if (!data) {
> >> >                 return NULL;
> >> >         }
> >> >         data += box->x;
> >> >
> >> >         return si_buffer_get_transfer(ctx, resource, usage, box,
> >> >                                         ptransfer, data, NULL, 0);
> >> >  }
> >> >
> >> > +static void si_buffer_write_data(struct si_context *sctx, struct
> r600_resource *buf,
> >> > +                                unsigned offset, unsigned size,
> const void *data)
> >> > +{
> >> > +       struct radeon_cmdbuf *cs = sctx->gfx_cs;
> >> > +
> >> > +       si_need_gfx_cs_space(sctx);
> >> > +
> >> > +       sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
> >> > +                      SI_CONTEXT_CS_PARTIAL_FLUSH |
> >> > +                      si_get_flush_flags(sctx, SI_COHERENCY_SHADER,
> L2_LRU);
> >> > +       si_emit_cache_flush(sctx);
> >>
> >> Maybe only do the cache flush if the buffer is referenced by the
> >> current cmd buffer?
> >
> >
> > We can't do that, because 2 consecutive IBs can execute simultaneously
> for a moment.
> >
> > Marek
>