[1/4] winsys/amdgpu: enable chaining for compute IBs

Submitted by Marek Olšák on Feb. 14, 2019, 6:31 a.m.

Details

Message ID 20190214063105.521-2-maraeo@gmail.com
State New
Headers show
Series "RadeonSI: Follow-up for the primitive culling series" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Marek Olšák Feb. 14, 2019, 6:31 a.m.
From: Marek Olšák <marek.olsak@amd.com>

---
 src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index 912307e7d11..eb2944766fc 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -385,26 +385,26 @@  static bool amdgpu_cs_has_user_fence(struct amdgpu_cs_context *cs)
    return cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_UVD &&
           cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCE &&
           cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_UVD_ENC &&
           cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCN_DEC &&
           cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCN_ENC;
 }
 
 static bool amdgpu_cs_has_chaining(struct amdgpu_cs *cs)
 {
    return cs->ctx->ws->info.chip_class >= CIK &&
-          cs->ring_type == RING_GFX;
+          (cs->ring_type == RING_GFX || cs->ring_type == RING_COMPUTE);
 }
 
-static unsigned amdgpu_cs_epilog_dws(enum ring_type ring_type)
+static unsigned amdgpu_cs_epilog_dws(struct amdgpu_cs *cs)
 {
-   if (ring_type == RING_GFX)
+   if (amdgpu_cs_has_chaining(cs))
       return 4; /* for chaining */
 
    return 0;
 }
 
 int amdgpu_lookup_buffer(struct amdgpu_cs_context *cs, struct amdgpu_winsys_bo *bo)
 {
    unsigned hash = bo->unique_id & (ARRAY_SIZE(cs->buffer_indices_hashlist)-1);
    int i = cs->buffer_indices_hashlist[hash];
    struct amdgpu_cs_buffer *buffers;
@@ -795,21 +795,21 @@  static bool amdgpu_get_new_ib(struct radeon_winsys *ws, struct amdgpu_cs *cs,
     * the CS ioctl. */
    ib->ptr_ib_size = &info->ib_bytes;
    ib->ptr_ib_size_inside_ib = false;
 
    amdgpu_cs_add_buffer(&cs->main.base, ib->big_ib_buffer,
                         RADEON_USAGE_READ, 0, RADEON_PRIO_IB1);
 
    ib->base.current.buf = (uint32_t*)(ib->ib_mapped + ib->used_ib_space);
 
    ib_size = ib->big_ib_buffer->size - ib->used_ib_space;
-   ib->base.current.max_dw = ib_size / 4 - amdgpu_cs_epilog_dws(cs->ring_type);
+   ib->base.current.max_dw = ib_size / 4 - amdgpu_cs_epilog_dws(cs);
    assert(ib->base.current.max_dw >= ib->max_check_space_size / 4);
    ib->base.gpu_address = info->va_start;
    return true;
 }
 
 static void amdgpu_set_ib_size(struct amdgpu_ib *ib)
 {
    if (ib->ptr_ib_size_inside_ib) {
       *ib->ptr_ib_size = ib->base.current.cdw |
                          S_3F2_CHAIN(1) | S_3F2_VALID(1);
@@ -1072,21 +1072,21 @@  amdgpu_cs_add_parallel_compute_ib(struct radeon_cmdbuf *ib,
 static bool amdgpu_cs_validate(struct radeon_cmdbuf *rcs)
 {
    return true;
 }
 
 static bool amdgpu_cs_check_space(struct radeon_cmdbuf *rcs, unsigned dw)
 {
    struct amdgpu_ib *ib = amdgpu_ib(rcs);
    struct amdgpu_cs *cs = amdgpu_cs_from_ib(ib);
    unsigned requested_size = rcs->prev_dw + rcs->current.cdw + dw;
-   unsigned cs_epilog_dw = amdgpu_cs_epilog_dws(cs->ring_type);
+   unsigned cs_epilog_dw = amdgpu_cs_epilog_dws(cs);
    unsigned need_byte_size = (dw + cs_epilog_dw) * 4;
    uint64_t va;
    uint32_t *new_ptr_ib_size;
 
    assert(rcs->current.cdw <= rcs->current.max_dw);
 
    /* 125% of the size for IB epilog. */
    unsigned safe_byte_size = need_byte_size + need_byte_size / 4;
    ib->max_check_space_size = MAX2(ib->max_check_space_size,
                                    safe_byte_size);
@@ -1708,21 +1708,21 @@  void amdgpu_cs_sync_flush(struct radeon_cmdbuf *rcs)
 }
 
 static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs,
                            unsigned flags,
                            struct pipe_fence_handle **fence)
 {
    struct amdgpu_cs *cs = amdgpu_cs(rcs);
    struct amdgpu_winsys *ws = cs->ctx->ws;
    int error_code = 0;
 
-   rcs->current.max_dw += amdgpu_cs_epilog_dws(cs->ring_type);
+   rcs->current.max_dw += amdgpu_cs_epilog_dws(cs);
 
    switch (cs->ring_type) {
    case RING_DMA:
       /* pad DMA ring to 8 DWs */
       if (ws->info.chip_class <= SI) {
          while (rcs->current.cdw & 7)
             radeon_emit(rcs, 0xf0000000); /* NOP packet */
       } else {
          while (rcs->current.cdw & 7)
             radeon_emit(rcs, 0x00000000); /* NOP packet */