[12/23] drm/amdgpu: Insert meta data during submitting IB

Submitted by Yu, Xiangliang on Dec. 17, 2016, 4:16 p.m.

Details

Message ID 1481991405-30422-13-git-send-email-Xiangliang.Yu@amd.com
State New
Headers show
Series "转发: [PATCH 07/23] drm/amdgpu: create new directory for GPU virtualization" ( rev: 3 2 1 ) in AMD X.Org drivers

Not browsing as part of any series.

Commit Message

Yu, Xiangliang Dec. 17, 2016, 4:16 p.m.
Virtualization world switch need each command that is submitted
into GFX with an extra entry, which will using WRITE_DATA to
fullfill CSA. In this way, CP will save CE/DE snapshots when
preemption occurred and restore it later.

Signed-off-by: Monk Liu <Monk.Liu@amd.com>
Signed-off-by: Xiangliang Yu <Xiangliang.Yu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   |  2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h |  2 +
 drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h    | 39 ++++++++++++++
 drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c    | 88 ++++++++++++++++++++++++++++++++
 4 files changed, 131 insertions(+)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index acf48de..cc35255 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -175,6 +175,8 @@  int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	if (ring->funcs->emit_hdp_flush)
 		amdgpu_ring_emit_hdp_flush(ring);
 
+	amdgpu_gfx_ring_emit_meta_data(ring, vm);
+
 	/* always set cond_exec_polling to CONTINUE */
 	*ring->cond_exe_cpu_addr = 1;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index dff1248..d6f57a2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -69,4 +69,6 @@  int amd_xgpu_set_ip_blocks(struct amdgpu_device *adev);
 /* Context Save Area functions */
 int amdgpu_vm_map_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm);
 void amdgpu_vm_unmap_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+void amdgpu_gfx_ring_emit_meta_data(struct amdgpu_ring *ring,
+				    struct amdgpu_vm *vm);
 #endif
diff --git a/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h b/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h
index a25e07f..cc3123b 100644
--- a/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h
+++ b/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h
@@ -26,9 +26,48 @@ 
 
 #include "amdgpu.h"
 
+/* context save area structures */
+struct amdgpu_ce_ib_state {
+	uint32_t	ce_ib_completion_status;
+	uint32_t	ce_const_engine_count;
+	uint32_t	ce_ib_offset_ib1;
+	uint32_t	ce_ib_offset_ib2;
+};
+
+struct amdgpu_de_ib_state {
+	uint32_t	de_ib_completion_status;
+	uint32_t	de_const_engine_count;
+	uint32_t	de_ib_offset_ib1;
+	uint32_t	de_ib_offset_ib2;
+	uint32_t	preamble_begin_ib1;
+	uint32_t	preamble_begin_ib2;
+	uint32_t	preamble_end_ib1;
+	uint32_t	preamble_end_ib2;
+	uint32_t	draw_indirect_base_lo;
+	uint32_t	draw_indirect_base_hi;
+	uint32_t	disp_indirect_base_lo;
+	uint32_t	disp_indirect_base_hi;
+	uint32_t	gds_backup_addr_lo;
+	uint32_t	gds_backup_addr_hi;
+	uint32_t	index_base_addr_lo;
+	uint32_t	index_base_addr_hi;
+	uint32_t	sample_cntl;
+};
+
+struct amdgpu_gfx_meta_data {
+	struct amdgpu_ce_ib_state	ce_payload;
+	uint32_t			reserved1[60];
+	struct amdgpu_de_ib_state	de_payload;
+	uint32_t			de_ib_base_addr_lo;
+	uint32_t			de_ib_base_addr_hi;
+	uint32_t			reserved2[941];
+};
+
 /* xgpu structures */
 struct amd_xgpu_csa {
 	struct amdgpu_bo	    *robj;
+	struct amdgpu_ce_ib_state   ce_payload;
+	struct amdgpu_de_ib_state   de_payload;
 	uint64_t		    gpu_addr;
 	uint64_t		    gds_addr;
 	int32_t			    size;
diff --git a/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c b/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c
index 246a747..6d4246c 100644
--- a/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c
+++ b/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c
@@ -207,3 +207,91 @@  void amdgpu_vm_unmap_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	sa = &xgpu->sa;
 	xgpu_vm_unmap_csa(adev, vm, sa);
 }
+
+static void amdgpu_ring_write_multiple(struct amdgpu_ring *ring,
+				       void *src, int count_dw)
+{
+	if (ring->count_dw < count_dw)
+		DRM_ERROR("writing more dwords to the ring than expected:%d.\n",
+			   count_dw);
+	else {
+		unsigned int chunk1, chunk2;
+		void *dst = (void *)&ring->ring[ring->wptr];
+
+		chunk1 = ring->ptr_mask + 1 - ring->wptr;
+		chunk1 = (chunk1 >= count_dw) ? count_dw : chunk1;
+		chunk2 = count_dw - chunk1;
+		chunk1 <<= 2;
+		chunk2 <<= 2;
+		if (chunk1) {
+			memcpy(dst, src, chunk1);
+			dst = (void *)(((uint64_t)dst + chunk1) &
+					ring->ptr_mask);
+		}
+
+		if (chunk2) {
+			src += chunk1;
+			dst = (void *)ring->ring;
+			memcpy(dst, src, chunk2);
+		}
+
+		ring->wptr += count_dw;
+		ring->wptr &= ring->ptr_mask;
+		ring->count_dw -= count_dw;
+	}
+}
+
+void amdgpu_gfx_ring_emit_meta_data(struct amdgpu_ring *ring,
+				    struct amdgpu_vm *vm)
+{
+	struct amdgpu_ce_ib_state *ce_payload;
+	struct amdgpu_de_ib_state *de_payload;
+	struct amd_xgpu_csa *sa = NULL;
+	struct amd_xgpu *xgpu = (struct amd_xgpu *)ring->adev->priv_data;
+	uint64_t csa_addr, gds_addr;
+	int cnt;
+
+	if (!xgpu || (ring->funcs->type != AMDGPU_RING_TYPE_GFX))
+		return;
+
+	sa = &xgpu->sa;
+
+	ce_payload = &sa->ce_payload;
+	de_payload = &sa->de_payload;
+	memset(ce_payload, 0, sizeof(*ce_payload));
+	memset(de_payload, 0, sizeof(*de_payload));
+
+	cnt = (sizeof(*ce_payload) >> 2) + 4 - 2;
+	csa_addr = vm ? vm->csa.csa_addr : sa->gpu_addr;
+	gds_addr = vm ? vm->csa.gds_addr : sa->gds_addr;
+	de_payload->gds_backup_addr_lo = lower_32_bits(gds_addr);
+	de_payload->gds_backup_addr_hi = upper_32_bits(gds_addr);
+
+	/* write CE meta data */
+	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
+	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(2) |
+				WRITE_DATA_DST_SEL(8) |
+				WR_CONFIRM | WRITE_DATA_CACHE_POLICY(0));
+	amdgpu_ring_write(ring, lower_32_bits(csa_addr +
+			  offsetof(struct amdgpu_gfx_meta_data, ce_payload)));
+	amdgpu_ring_write(ring, upper_32_bits(csa_addr +
+			  offsetof(struct amdgpu_gfx_meta_data, ce_payload)));
+
+	amdgpu_ring_write_multiple(ring, (void *)ce_payload,
+					 sizeof(*ce_payload) >> 2);
+
+	/* write DE meta data */
+	cnt = (sizeof(*de_payload) >> 2) + 4 - 2;
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
+	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(1) |
+				WRITE_DATA_DST_SEL(8) |
+				WR_CONFIRM | WRITE_DATA_CACHE_POLICY(0));
+	amdgpu_ring_write(ring, lower_32_bits(csa_addr +
+			  offsetof(struct amdgpu_gfx_meta_data, de_payload)));
+	amdgpu_ring_write(ring, upper_32_bits(csa_addr +
+			  offsetof(struct amdgpu_gfx_meta_data, de_payload)));
+
+	amdgpu_ring_write_multiple(ring, (void *)de_payload,
+					 sizeof(*de_payload) >> 2);
+}

Comments

On Sat, Dec 17, 2016 at 11:16 AM, Xiangliang Yu <Xiangliang.Yu@amd.com> wrote:
> Virtualization world switch need each command that is submitted
> into GFX with an extra entry, which will using WRITE_DATA to
> fullfill CSA. In this way, CP will save CE/DE snapshots when
> preemption occurred and restore it later.
>
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
> Signed-off-by: Xiangliang Yu <Xiangliang.Yu@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   |  2 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h |  2 +
>  drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h    | 39 ++++++++++++++
>  drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c    | 88 ++++++++++++++++++++++++++++++++
>  4 files changed, 131 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index acf48de..cc35255 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -175,6 +175,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>         if (ring->funcs->emit_hdp_flush)
>                 amdgpu_ring_emit_hdp_flush(ring);
>
> +       amdgpu_gfx_ring_emit_meta_data(ring, vm);
> +
>         /* always set cond_exec_polling to CONTINUE */
>         *ring->cond_exe_cpu_addr = 1;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> index dff1248..d6f57a2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> @@ -69,4 +69,6 @@ int amd_xgpu_set_ip_blocks(struct amdgpu_device *adev);
>  /* Context Save Area functions */
>  int amdgpu_vm_map_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm);
>  void amdgpu_vm_unmap_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm);
> +void amdgpu_gfx_ring_emit_meta_data(struct amdgpu_ring *ring,
> +                                   struct amdgpu_vm *vm);
>  #endif
> diff --git a/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h b/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h
> index a25e07f..cc3123b 100644
> --- a/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h
> +++ b/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h
> @@ -26,9 +26,48 @@
>
>  #include "amdgpu.h"
>
> +/* context save area structures */
> +struct amdgpu_ce_ib_state {
> +       uint32_t        ce_ib_completion_status;
> +       uint32_t        ce_const_engine_count;
> +       uint32_t        ce_ib_offset_ib1;
> +       uint32_t        ce_ib_offset_ib2;
> +};
> +
> +struct amdgpu_de_ib_state {
> +       uint32_t        de_ib_completion_status;
> +       uint32_t        de_const_engine_count;
> +       uint32_t        de_ib_offset_ib1;
> +       uint32_t        de_ib_offset_ib2;
> +       uint32_t        preamble_begin_ib1;
> +       uint32_t        preamble_begin_ib2;
> +       uint32_t        preamble_end_ib1;
> +       uint32_t        preamble_end_ib2;
> +       uint32_t        draw_indirect_base_lo;
> +       uint32_t        draw_indirect_base_hi;
> +       uint32_t        disp_indirect_base_lo;
> +       uint32_t        disp_indirect_base_hi;
> +       uint32_t        gds_backup_addr_lo;
> +       uint32_t        gds_backup_addr_hi;
> +       uint32_t        index_base_addr_lo;
> +       uint32_t        index_base_addr_hi;
> +       uint32_t        sample_cntl;
> +};
> +
> +struct amdgpu_gfx_meta_data {
> +       struct amdgpu_ce_ib_state       ce_payload;
> +       uint32_t                        reserved1[60];
> +       struct amdgpu_de_ib_state       de_payload;
> +       uint32_t                        de_ib_base_addr_lo;
> +       uint32_t                        de_ib_base_addr_hi;
> +       uint32_t                        reserved2[941];
> +};
> +

These are gfx8 specific and should be moved to gfx8 module.

>  /* xgpu structures */
>  struct amd_xgpu_csa {
>         struct amdgpu_bo            *robj;
> +       struct amdgpu_ce_ib_state   ce_payload;
> +       struct amdgpu_de_ib_state   de_payload;
>         uint64_t                    gpu_addr;
>         uint64_t                    gds_addr;
>         int32_t                     size;
> diff --git a/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c b/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c
> index 246a747..6d4246c 100644
> --- a/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c
> +++ b/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c
> @@ -207,3 +207,91 @@ void amdgpu_vm_unmap_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>         sa = &xgpu->sa;
>         xgpu_vm_unmap_csa(adev, vm, sa);
>  }
> +
> +static void amdgpu_ring_write_multiple(struct amdgpu_ring *ring,
> +                                      void *src, int count_dw)
> +{
> +       if (ring->count_dw < count_dw)
> +               DRM_ERROR("writing more dwords to the ring than expected:%d.\n",
> +                          count_dw);
> +       else {
> +               unsigned int chunk1, chunk2;
> +               void *dst = (void *)&ring->ring[ring->wptr];
> +
> +               chunk1 = ring->ptr_mask + 1 - ring->wptr;
> +               chunk1 = (chunk1 >= count_dw) ? count_dw : chunk1;
> +               chunk2 = count_dw - chunk1;
> +               chunk1 <<= 2;
> +               chunk2 <<= 2;
> +               if (chunk1) {
> +                       memcpy(dst, src, chunk1);
> +                       dst = (void *)(((uint64_t)dst + chunk1) &
> +                                       ring->ptr_mask);
> +               }
> +
> +               if (chunk2) {
> +                       src += chunk1;
> +                       dst = (void *)ring->ring;
> +                       memcpy(dst, src, chunk2);
> +               }
> +
> +               ring->wptr += count_dw;
> +               ring->wptr &= ring->ptr_mask;
> +               ring->count_dw -= count_dw;
> +       }
> +}
> +
> +void amdgpu_gfx_ring_emit_meta_data(struct amdgpu_ring *ring,
> +                                   struct amdgpu_vm *vm)
> +{
> +       struct amdgpu_ce_ib_state *ce_payload;
> +       struct amdgpu_de_ib_state *de_payload;
> +       struct amd_xgpu_csa *sa = NULL;
> +       struct amd_xgpu *xgpu = (struct amd_xgpu *)ring->adev->priv_data;
> +       uint64_t csa_addr, gds_addr;
> +       int cnt;
> +
> +       if (!xgpu || (ring->funcs->type != AMDGPU_RING_TYPE_GFX))
> +               return;
> +
> +       sa = &xgpu->sa;

No need to make this dependent on xgpu.  As I said in the previous
patch preemption is useful independent of xgpu.

> +
> +       ce_payload = &sa->ce_payload;
> +       de_payload = &sa->de_payload;
> +       memset(ce_payload, 0, sizeof(*ce_payload));
> +       memset(de_payload, 0, sizeof(*de_payload));
> +
> +       cnt = (sizeof(*ce_payload) >> 2) + 4 - 2;
> +       csa_addr = vm ? vm->csa.csa_addr : sa->gpu_addr;
> +       gds_addr = vm ? vm->csa.gds_addr : sa->gds_addr;
> +       de_payload->gds_backup_addr_lo = lower_32_bits(gds_addr);
> +       de_payload->gds_backup_addr_hi = upper_32_bits(gds_addr);
> +
> +       /* write CE meta data */
> +       amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
> +       amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(2) |
> +                               WRITE_DATA_DST_SEL(8) |
> +                               WR_CONFIRM | WRITE_DATA_CACHE_POLICY(0));
> +       amdgpu_ring_write(ring, lower_32_bits(csa_addr +
> +                         offsetof(struct amdgpu_gfx_meta_data, ce_payload)));
> +       amdgpu_ring_write(ring, upper_32_bits(csa_addr +
> +                         offsetof(struct amdgpu_gfx_meta_data, ce_payload)));
> +
> +       amdgpu_ring_write_multiple(ring, (void *)ce_payload,
> +                                        sizeof(*ce_payload) >> 2);
> +
> +       /* write DE meta data */
> +       cnt = (sizeof(*de_payload) >> 2) + 4 - 2;
> +
> +       amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
> +       amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(1) |
> +                               WRITE_DATA_DST_SEL(8) |
> +                               WR_CONFIRM | WRITE_DATA_CACHE_POLICY(0));
> +       amdgpu_ring_write(ring, lower_32_bits(csa_addr +
> +                         offsetof(struct amdgpu_gfx_meta_data, de_payload)));
> +       amdgpu_ring_write(ring, upper_32_bits(csa_addr +
> +                         offsetof(struct amdgpu_gfx_meta_data, de_payload)));
> +
> +       amdgpu_ring_write_multiple(ring, (void *)de_payload,
> +                                        sizeof(*de_payload) >> 2);
> +}

This function is gfx8 specific and should be moved to the gfx8 module.

Alex

> --
> 2.7.4
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> -----Original Message-----

> From: Alex Deucher [mailto:alexdeucher@gmail.com]

> Sent: Tuesday, December 20, 2016 7:23 AM

> To: Yu, Xiangliang <Xiangliang.Yu@amd.com>

> Cc: amd-gfx list <amd-gfx@lists.freedesktop.org>;

> dl.SRDC_SW_GPUVirtualization

> <dl.SRDC_SW_GPUVirtualization@amd.com>; Liu, Monk

> <Monk.Liu@amd.com>

> Subject: Re: [PATCH 12/23] drm/amdgpu: Insert meta data during submitting

> IB

> 

> On Sat, Dec 17, 2016 at 11:16 AM, Xiangliang Yu <Xiangliang.Yu@amd.com>

> wrote:

> > Virtualization world switch need each command that is submitted into

> > GFX with an extra entry, which will using WRITE_DATA to fullfill CSA.

> > In this way, CP will save CE/DE snapshots when preemption occurred and

> > restore it later.

> >

> > Signed-off-by: Monk Liu <Monk.Liu@amd.com>

> > Signed-off-by: Xiangliang Yu <Xiangliang.Yu@amd.com>

> > ---

> >  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   |  2 +

> >  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h |  2 +

> >  drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h    | 39 ++++++++++++++

> >  drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c    | 88

> ++++++++++++++++++++++++++++++++

> >  4 files changed, 131 insertions(+)

> >

> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c

> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c

> > index acf48de..cc35255 100644

> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c

> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c

> > @@ -175,6 +175,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring,

> unsigned num_ibs,

> >         if (ring->funcs->emit_hdp_flush)

> >                 amdgpu_ring_emit_hdp_flush(ring);

> >

> > +       amdgpu_gfx_ring_emit_meta_data(ring, vm);

> > +

> >         /* always set cond_exec_polling to CONTINUE */

> >         *ring->cond_exe_cpu_addr = 1;

> >

> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h

> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h

> > index dff1248..d6f57a2 100644

> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h

> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h

> > @@ -69,4 +69,6 @@ int amd_xgpu_set_ip_blocks(struct amdgpu_device

> > *adev);

> >  /* Context Save Area functions */

> >  int amdgpu_vm_map_csa(struct amdgpu_device *adev, struct

> amdgpu_vm

> > *vm);  void amdgpu_vm_unmap_csa(struct amdgpu_device *adev, struct

> > amdgpu_vm *vm);

> > +void amdgpu_gfx_ring_emit_meta_data(struct amdgpu_ring *ring,

> > +                                   struct amdgpu_vm *vm);

> >  #endif

> > diff --git a/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h

> > b/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h

> > index a25e07f..cc3123b 100644

> > --- a/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h

> > +++ b/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h

> > @@ -26,9 +26,48 @@

> >

> >  #include "amdgpu.h"

> >

> > +/* context save area structures */

> > +struct amdgpu_ce_ib_state {

> > +       uint32_t        ce_ib_completion_status;

> > +       uint32_t        ce_const_engine_count;

> > +       uint32_t        ce_ib_offset_ib1;

> > +       uint32_t        ce_ib_offset_ib2;

> > +};

> > +

> > +struct amdgpu_de_ib_state {

> > +       uint32_t        de_ib_completion_status;

> > +       uint32_t        de_const_engine_count;

> > +       uint32_t        de_ib_offset_ib1;

> > +       uint32_t        de_ib_offset_ib2;

> > +       uint32_t        preamble_begin_ib1;

> > +       uint32_t        preamble_begin_ib2;

> > +       uint32_t        preamble_end_ib1;

> > +       uint32_t        preamble_end_ib2;

> > +       uint32_t        draw_indirect_base_lo;

> > +       uint32_t        draw_indirect_base_hi;

> > +       uint32_t        disp_indirect_base_lo;

> > +       uint32_t        disp_indirect_base_hi;

> > +       uint32_t        gds_backup_addr_lo;

> > +       uint32_t        gds_backup_addr_hi;

> > +       uint32_t        index_base_addr_lo;

> > +       uint32_t        index_base_addr_hi;

> > +       uint32_t        sample_cntl;

> > +};

> > +

> > +struct amdgpu_gfx_meta_data {

> > +       struct amdgpu_ce_ib_state       ce_payload;

> > +       uint32_t                        reserved1[60];

> > +       struct amdgpu_de_ib_state       de_payload;

> > +       uint32_t                        de_ib_base_addr_lo;

> > +       uint32_t                        de_ib_base_addr_hi;

> > +       uint32_t                        reserved2[941];

> > +};

> > +

> 

> These are gfx8 specific and should be moved to gfx8 module.


I think it is only relate to virtualization right now. I'm fine to move it out if support whole preemption solution later.

> 

> >  /* xgpu structures */

> >  struct amd_xgpu_csa {

> >         struct amdgpu_bo            *robj;

> > +       struct amdgpu_ce_ib_state   ce_payload;

> > +       struct amdgpu_de_ib_state   de_payload;

> >         uint64_t                    gpu_addr;

> >         uint64_t                    gds_addr;

> >         int32_t                     size;

> > diff --git a/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c

> > b/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c

> > index 246a747..6d4246c 100644

> > --- a/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c

> > +++ b/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c

> > @@ -207,3 +207,91 @@ void amdgpu_vm_unmap_csa(struct

> amdgpu_device *adev, struct amdgpu_vm *vm)

> >         sa = &xgpu->sa;

> >         xgpu_vm_unmap_csa(adev, vm, sa);  }

> > +

> > +static void amdgpu_ring_write_multiple(struct amdgpu_ring *ring,

> > +                                      void *src, int count_dw) {

> > +       if (ring->count_dw < count_dw)

> > +               DRM_ERROR("writing more dwords to the ring than

> expected:%d.\n",

> > +                          count_dw);

> > +       else {

> > +               unsigned int chunk1, chunk2;

> > +               void *dst = (void *)&ring->ring[ring->wptr];

> > +

> > +               chunk1 = ring->ptr_mask + 1 - ring->wptr;

> > +               chunk1 = (chunk1 >= count_dw) ? count_dw : chunk1;

> > +               chunk2 = count_dw - chunk1;

> > +               chunk1 <<= 2;

> > +               chunk2 <<= 2;

> > +               if (chunk1) {

> > +                       memcpy(dst, src, chunk1);

> > +                       dst = (void *)(((uint64_t)dst + chunk1) &

> > +                                       ring->ptr_mask);

> > +               }

> > +

> > +               if (chunk2) {

> > +                       src += chunk1;

> > +                       dst = (void *)ring->ring;

> > +                       memcpy(dst, src, chunk2);

> > +               }

> > +

> > +               ring->wptr += count_dw;

> > +               ring->wptr &= ring->ptr_mask;

> > +               ring->count_dw -= count_dw;

> > +       }

> > +}

> > +

> > +void amdgpu_gfx_ring_emit_meta_data(struct amdgpu_ring *ring,

> > +                                   struct amdgpu_vm *vm) {

> > +       struct amdgpu_ce_ib_state *ce_payload;

> > +       struct amdgpu_de_ib_state *de_payload;

> > +       struct amd_xgpu_csa *sa = NULL;

> > +       struct amd_xgpu *xgpu = (struct amd_xgpu *)ring->adev->priv_data;

> > +       uint64_t csa_addr, gds_addr;

> > +       int cnt;

> > +

> > +       if (!xgpu || (ring->funcs->type != AMDGPU_RING_TYPE_GFX))

> > +               return;

> > +

> > +       sa = &xgpu->sa;

> 

> No need to make this dependent on xgpu.  As I said in the previous patch

> preemption is useful independent of xgpu.

> 


Ditto

> > +

> > +       ce_payload = &sa->ce_payload;

> > +       de_payload = &sa->de_payload;

> > +       memset(ce_payload, 0, sizeof(*ce_payload));

> > +       memset(de_payload, 0, sizeof(*de_payload));

> > +

> > +       cnt = (sizeof(*ce_payload) >> 2) + 4 - 2;

> > +       csa_addr = vm ? vm->csa.csa_addr : sa->gpu_addr;

> > +       gds_addr = vm ? vm->csa.gds_addr : sa->gds_addr;

> > +       de_payload->gds_backup_addr_lo = lower_32_bits(gds_addr);

> > +       de_payload->gds_backup_addr_hi = upper_32_bits(gds_addr);

> > +

> > +       /* write CE meta data */

> > +       amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));

> > +       amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(2) |

> > +                               WRITE_DATA_DST_SEL(8) |

> > +                               WR_CONFIRM | WRITE_DATA_CACHE_POLICY(0));

> > +       amdgpu_ring_write(ring, lower_32_bits(csa_addr +

> > +                         offsetof(struct amdgpu_gfx_meta_data, ce_payload)));

> > +       amdgpu_ring_write(ring, upper_32_bits(csa_addr +

> > +                         offsetof(struct amdgpu_gfx_meta_data,

> > + ce_payload)));

> > +

> > +       amdgpu_ring_write_multiple(ring, (void *)ce_payload,

> > +                                        sizeof(*ce_payload) >> 2);

> > +

> > +       /* write DE meta data */

> > +       cnt = (sizeof(*de_payload) >> 2) + 4 - 2;

> > +

> > +       amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));

> > +       amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(1) |

> > +                               WRITE_DATA_DST_SEL(8) |

> > +                               WR_CONFIRM | WRITE_DATA_CACHE_POLICY(0));

> > +       amdgpu_ring_write(ring, lower_32_bits(csa_addr +

> > +                         offsetof(struct amdgpu_gfx_meta_data, de_payload)));

> > +       amdgpu_ring_write(ring, upper_32_bits(csa_addr +

> > +                         offsetof(struct amdgpu_gfx_meta_data,

> > + de_payload)));

> > +

> > +       amdgpu_ring_write_multiple(ring, (void *)de_payload,

> > +                                        sizeof(*de_payload) >> 2); }

> 

> This function is gfx8 specific and should be moved to the gfx8 module.

> 

Ditto

> Alex

> 

> > --

> > 2.7.4

> >

> > _______________________________________________

> > amd-gfx mailing list

> > amd-gfx@lists.freedesktop.org

> > https://lists.freedesktop.org/mailman/listinfo/amd-gfx