[11/23] drm/amdgpu: implement context save area(CSA) feature

Submitted by Yu, Xiangliang on Dec. 17, 2016, 4:16 p.m.

Details

Message ID 1481991405-30422-12-git-send-email-Xiangliang.Yu@amd.com
State New
Headers show
Series "转发: [PATCH 07/23] drm/amdgpu: create new directory for GPU virtualization" ( rev: 3 2 1 ) in AMD X.Org drivers

Not browsing as part of any series.

Commit Message

Yu, Xiangliang Dec. 17, 2016, 4:16 p.m.
CSA is need by world switch. This patch implement CSA feature and
bind it to each VM, so hardware can save the state into the area
and restore it when running again.

Signed-off-by: Monk Liu <Monk.Liu@amd.com>
Signed-off-by: Xiangliang Yu <Xiangliang.Yu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h |  14 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   |   8 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h   |   4 +
 drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h    |  12 ++
 drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c    | 209 +++++++++++++++++++++++++++++++
 5 files changed, 247 insertions(+)
 create mode 100644 drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 8ee70f8..dff1248 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -24,6 +24,8 @@ 
 #ifndef AMDGPU_VIRT_H
 #define AMDGPU_VIRT_H
 
+struct amdgpu_vm;
+
 #define AMDGPU_SRIOV_CAPS_SRIOV_VBIOS  (1 << 0) /* vBIOS is sr-iov ready */
 #define AMDGPU_SRIOV_CAPS_ENABLE_IOV   (1 << 1) /* sr-iov is enabled on this GPU */
 #define AMDGPU_SRIOV_CAPS_IS_VF        (1 << 2) /* this GPU is a virtual function */
@@ -33,6 +35,14 @@  struct amdgpu_virtualization {
 	uint32_t virtual_caps;
 };
 
+struct amdgpu_csa {
+	struct amdgpu_bo_va		*va;
+	struct ttm_validate_buffer	tv;
+	uint64_t			reserved_top;
+	uint64_t			csa_addr;
+	uint64_t			gds_addr;
+};
+
 #define amdgpu_sriov_enabled(adev) \
 ((adev)->virtualization.virtual_caps & AMDGPU_SRIOV_CAPS_ENABLE_IOV)
 
@@ -55,4 +65,8 @@  static inline bool is_virtual_machine(void)
 }
 
 int amd_xgpu_set_ip_blocks(struct amdgpu_device *adev);
+
+/* Context Save Area functions */
+int amdgpu_vm_map_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+void amdgpu_vm_unmap_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm);
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index d05546e..98540d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1539,6 +1539,8 @@  int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	pd_size = amdgpu_vm_directory_size(adev);
 	pd_entries = amdgpu_vm_num_pdes(adev);
 
+	vm->csa.reserved_top = AMDGPU_VA_RESERVED_SIZE;
+
 	/* allocate page table array */
 	vm->page_tables = drm_calloc_large(pd_entries, sizeof(struct amdgpu_vm_pt));
 	if (vm->page_tables == NULL) {
@@ -1576,6 +1578,10 @@  int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	vm->last_eviction_counter = atomic64_read(&adev->num_evictions);
 	amdgpu_bo_unreserve(vm->page_directory);
 
+	r = amdgpu_vm_map_csa(adev, vm);
+	if (r)
+		goto error_free_page_directory;
+
 	return 0;
 
 error_free_page_directory:
@@ -1606,6 +1612,8 @@  void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	struct amdgpu_bo_va_mapping *mapping, *tmp;
 	int i;
 
+	amdgpu_vm_unmap_csa(adev, vm);
+
 	amd_sched_entity_fini(vm->entity.sched, &vm->entity);
 
 	if (!RB_EMPTY_ROOT(&vm->va)) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 42a629b..d90630a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -29,6 +29,7 @@ 
 #include "gpu_scheduler.h"
 #include "amdgpu_sync.h"
 #include "amdgpu_ring.h"
+#include "amdgpu_virt.h"
 
 struct amdgpu_bo_va;
 struct amdgpu_job;
@@ -109,6 +110,9 @@  struct amdgpu_vm {
 	/* Scheduler entity for page table updates */
 	struct amd_sched_entity	entity;
 
+	/* Context Save Area */
+	struct amdgpu_csa	csa;
+
 	/* client id */
 	u64                     client_id;
 };
diff --git a/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h b/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h
index 6ab13bc..a25e07f 100644
--- a/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h
+++ b/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h
@@ -26,12 +26,24 @@ 
 
 #include "amdgpu.h"
 
+/* xgpu structures */
+struct amd_xgpu_csa {
+	struct amdgpu_bo	    *robj;
+	uint64_t		    gpu_addr;
+	uint64_t		    gds_addr;
+	int32_t			    size;
+};
+
 struct amd_xgpu {
 	struct amdgpu_device	*adev;
 	struct mutex		lock;
+	struct amd_xgpu_csa	sa;
 	u32			reg_val_offs;
 };
 
 extern int amd_xgpu_alloc(struct amdgpu_device *adev);
 extern void amd_xgpu_free(struct amd_xgpu *xgpu);
+
+extern int xgpu_allocate_csa(struct amd_xgpu *xgpu);
+extern void xgpu_destroy_csa(struct amd_xgpu_csa *csa);
 #endif
diff --git a/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c b/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c
new file mode 100644
index 0000000..246a747
--- /dev/null
+++ b/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c
@@ -0,0 +1,209 @@ 
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Xiangliang.Yu@amd.com
+ *	    Monk.Liu@amd.com
+ *
+ */
+#include "amd_mxgpu.h"
+#include "vid.h"
+
+static int xgpu_init_csa(struct amdgpu_device *adev, struct amd_xgpu_csa *sa)
+{
+	int r, size;
+	void *ptr;
+
+	/* meta data (4k) + gds-gfx (4k)*/
+	size = PAGE_SIZE + adev->gds.mem.gfx_partition_size;
+
+	r = amdgpu_bo_create(adev, size, PAGE_SIZE, true,
+			     AMDGPU_GEM_DOMAIN_GTT,
+			     AMDGPU_GEM_CREATE_CPU_GTT_USWC,
+			     NULL, NULL, &sa->robj);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to allocate csa bo\n", r);
+		return r;
+	}
+
+	r = amdgpu_bo_reserve(sa->robj, true);
+	if (unlikely(r != 0))
+		goto error_free;
+
+	r = amdgpu_bo_pin(sa->robj, AMDGPU_GEM_DOMAIN_GTT, &sa->gpu_addr);
+	if (r)
+		goto error_unreserve;
+
+	r = amdgpu_bo_kmap(sa->robj, &ptr);
+	if (r)
+		goto error_unpin;
+
+	memset(ptr, 0, size);
+	amdgpu_bo_unreserve(sa->robj);
+
+	sa->size = size;
+	sa->gds_addr = sa->gpu_addr + PAGE_SIZE;
+
+	return 0;
+
+error_unpin:
+	amdgpu_bo_unpin(sa->robj);
+error_unreserve:
+	amdgpu_bo_unreserve(sa->robj);
+error_free:
+	amdgpu_bo_unref(&sa->robj);
+	return r;
+}
+
+int xgpu_allocate_csa(struct amd_xgpu *xgpu)
+{
+	struct amdgpu_device *adev = xgpu->adev;
+	struct amd_xgpu_csa *sa = &xgpu->sa;
+
+	return xgpu_init_csa(adev, sa);
+}
+
+void xgpu_destroy_csa(struct amd_xgpu_csa *sa)
+{
+	amdgpu_bo_reserve(sa->robj, true);
+	amdgpu_bo_unpin(sa->robj);
+	amdgpu_bo_unreserve(sa->robj);
+	amdgpu_bo_unref(&sa->robj);
+	sa->gpu_addr = 0;
+	sa->gds_addr = 0;
+}
+
+static int xgpu_vm_map_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+			   struct amd_xgpu_csa *sa)
+{
+	int r;
+	uint64_t vaddr;
+	struct ww_acquire_ctx ticket;
+	struct list_head list, duplicates;
+	struct amdgpu_bo_list_entry pd;
+	struct amdgpu_bo_va *bo_va;
+
+	INIT_LIST_HEAD(&list);
+	INIT_LIST_HEAD(&duplicates);
+	INIT_LIST_HEAD(&vm->csa.tv.head);
+	vm->csa.tv.bo = &sa->robj->tbo;
+	vm->csa.tv.shared = true;
+
+	list_add(&vm->csa.tv.head, &list);
+	amdgpu_vm_get_pd_bo(vm, &list, &pd);
+
+	spin_lock(&vm->status_lock);
+	vm->csa.reserved_top -= sa->size;
+	vaddr = vm->csa.reserved_top;
+	spin_unlock(&vm->status_lock);
+
+	r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
+	if (r) {
+		DRM_ERROR("failed to reserve global CSA buffer(%d).\n", r);
+		return r;
+	}
+
+	bo_va = amdgpu_vm_bo_add(adev, vm, sa->robj);
+	if (!bo_va) {
+		DRM_ERROR("failed to create bo_va for global CSA buffer.\n");
+		return -ENOMEM;
+	}
+
+
+	r = amdgpu_vm_bo_map(adev, bo_va, vaddr, 0, sa->size,
+			     AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
+			     AMDGPU_PTE_EXECUTABLE);
+	if (r) {
+		DRM_ERROR("failed to do bo_map on global CSA buffer(%d).\n", r);
+		amdgpu_vm_bo_rmv(adev, bo_va);
+		ttm_eu_backoff_reservation(&ticket, &list);
+		kfree(bo_va);
+		return r;
+	}
+
+	ttm_eu_backoff_reservation(&ticket, &list);
+	amdgpu_gem_va_update_vm(adev, bo_va, AMDGPU_VA_OP_MAP);
+
+	vm->csa.va = bo_va;
+	vm->csa.csa_addr = vaddr;
+	vm->csa.gds_addr = vaddr + PAGE_SIZE;
+
+	return 0;
+}
+
+int amdgpu_vm_map_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+{
+	struct amd_xgpu *xgpu = (struct amd_xgpu *)adev->priv_data;
+	struct amd_xgpu_csa *sa = NULL;
+
+	if (!xgpu)
+		return 0;
+
+	sa = &xgpu->sa;
+
+	return xgpu_vm_map_csa(adev, vm, sa);
+}
+
+static void xgpu_vm_unmap_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+			      struct amd_xgpu_csa *sa)
+{
+	int r;
+	struct ww_acquire_ctx ticket;
+	struct list_head list, duplicates;
+	struct amdgpu_bo_list_entry pd;
+
+	if (!vm->csa.va)
+		return;
+
+	INIT_LIST_HEAD(&list);
+	INIT_LIST_HEAD(&duplicates);
+	list_add(&vm->csa.tv.head, &list);
+	amdgpu_vm_get_pd_bo(vm, &list, &pd);
+
+	r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
+	if (r) {
+		DRM_ERROR("failed to reserve global CSA buffer(%d).\n", r);
+		return;
+	}
+
+	amdgpu_vm_bo_rmv(adev, vm->csa.va);
+	/* maybe we don't need to do real clearing for the vm will die soon */
+	r = amdgpu_vm_clear_freed(adev, vm);
+	if (r) {
+		DRM_ERROR("failed to clear global CSA bo(%d).\n", r);
+		return;
+	}
+
+	ttm_eu_backoff_reservation(&ticket, &list);
+	vm->csa.va = NULL;
+	vm->csa.csa_addr = vm->csa.gds_addr = 0;
+}
+
+void amdgpu_vm_unmap_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+{
+	struct amd_xgpu *xgpu = (struct amd_xgpu *)adev->priv_data;
+	struct amd_xgpu_csa *sa = NULL;
+
+	if (!xgpu)
+		return;
+
+	sa = &xgpu->sa;
+	xgpu_vm_unmap_csa(adev, vm, sa);
+}

Comments

On Sat, Dec 17, 2016 at 11:16 AM, Xiangliang Yu <Xiangliang.Yu@amd.com> wrote:
> CSA is need by world switch. This patch implement CSA feature and
> bind it to each VM, so hardware can save the state into the area
> and restore it when running again.
>
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
> Signed-off-by: Xiangliang Yu <Xiangliang.Yu@amd.com>

Isn't the CSA actually for preemption?  Preemption is useful outside
of the VF case as well so it should be untangled from the mxgpu code
so it can be utilized independently.

Alex

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h |  14 +++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   |   8 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h   |   4 +
>  drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h    |  12 ++
>  drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c    | 209 +++++++++++++++++++++++++++++++
>  5 files changed, 247 insertions(+)
>  create mode 100644 drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> index 8ee70f8..dff1248 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> @@ -24,6 +24,8 @@
>  #ifndef AMDGPU_VIRT_H
>  #define AMDGPU_VIRT_H
>
> +struct amdgpu_vm;
> +
>  #define AMDGPU_SRIOV_CAPS_SRIOV_VBIOS  (1 << 0) /* vBIOS is sr-iov ready */
>  #define AMDGPU_SRIOV_CAPS_ENABLE_IOV   (1 << 1) /* sr-iov is enabled on this GPU */
>  #define AMDGPU_SRIOV_CAPS_IS_VF        (1 << 2) /* this GPU is a virtual function */
> @@ -33,6 +35,14 @@ struct amdgpu_virtualization {
>         uint32_t virtual_caps;
>  };
>
> +struct amdgpu_csa {
> +       struct amdgpu_bo_va             *va;
> +       struct ttm_validate_buffer      tv;
> +       uint64_t                        reserved_top;
> +       uint64_t                        csa_addr;
> +       uint64_t                        gds_addr;
> +};
> +
>  #define amdgpu_sriov_enabled(adev) \
>  ((adev)->virtualization.virtual_caps & AMDGPU_SRIOV_CAPS_ENABLE_IOV)
>
> @@ -55,4 +65,8 @@ static inline bool is_virtual_machine(void)
>  }
>
>  int amd_xgpu_set_ip_blocks(struct amdgpu_device *adev);
> +
> +/* Context Save Area functions */
> +int amdgpu_vm_map_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm);
> +void amdgpu_vm_unmap_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm);
>  #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index d05546e..98540d9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1539,6 +1539,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>         pd_size = amdgpu_vm_directory_size(adev);
>         pd_entries = amdgpu_vm_num_pdes(adev);
>
> +       vm->csa.reserved_top = AMDGPU_VA_RESERVED_SIZE;
> +
>         /* allocate page table array */
>         vm->page_tables = drm_calloc_large(pd_entries, sizeof(struct amdgpu_vm_pt));
>         if (vm->page_tables == NULL) {
> @@ -1576,6 +1578,10 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>         vm->last_eviction_counter = atomic64_read(&adev->num_evictions);
>         amdgpu_bo_unreserve(vm->page_directory);
>
> +       r = amdgpu_vm_map_csa(adev, vm);
> +       if (r)
> +               goto error_free_page_directory;
> +
>         return 0;
>
>  error_free_page_directory:
> @@ -1606,6 +1612,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>         struct amdgpu_bo_va_mapping *mapping, *tmp;
>         int i;
>
> +       amdgpu_vm_unmap_csa(adev, vm);
> +
>         amd_sched_entity_fini(vm->entity.sched, &vm->entity);
>
>         if (!RB_EMPTY_ROOT(&vm->va)) {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 42a629b..d90630a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -29,6 +29,7 @@
>  #include "gpu_scheduler.h"
>  #include "amdgpu_sync.h"
>  #include "amdgpu_ring.h"
> +#include "amdgpu_virt.h"
>
>  struct amdgpu_bo_va;
>  struct amdgpu_job;
> @@ -109,6 +110,9 @@ struct amdgpu_vm {
>         /* Scheduler entity for page table updates */
>         struct amd_sched_entity entity;
>
> +       /* Context Save Area */
> +       struct amdgpu_csa       csa;
> +
>         /* client id */
>         u64                     client_id;
>  };
> diff --git a/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h b/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h
> index 6ab13bc..a25e07f 100644
> --- a/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h
> +++ b/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h
> @@ -26,12 +26,24 @@
>
>  #include "amdgpu.h"
>
> +/* xgpu structures */
> +struct amd_xgpu_csa {
> +       struct amdgpu_bo            *robj;
> +       uint64_t                    gpu_addr;
> +       uint64_t                    gds_addr;
> +       int32_t                     size;
> +};
> +
>  struct amd_xgpu {
>         struct amdgpu_device    *adev;
>         struct mutex            lock;
> +       struct amd_xgpu_csa     sa;
>         u32                     reg_val_offs;
>  };
>
>  extern int amd_xgpu_alloc(struct amdgpu_device *adev);
>  extern void amd_xgpu_free(struct amd_xgpu *xgpu);
> +
> +extern int xgpu_allocate_csa(struct amd_xgpu *xgpu);
> +extern void xgpu_destroy_csa(struct amd_xgpu_csa *csa);
>  #endif
> diff --git a/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c b/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c
> new file mode 100644
> index 0000000..246a747
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c
> @@ -0,0 +1,209 @@
> +/*
> + * Copyright 2016 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + * Authors: Xiangliang.Yu@amd.com
> + *         Monk.Liu@amd.com
> + *
> + */
> +#include "amd_mxgpu.h"
> +#include "vid.h"
> +
> +static int xgpu_init_csa(struct amdgpu_device *adev, struct amd_xgpu_csa *sa)
> +{
> +       int r, size;
> +       void *ptr;
> +
> +       /* meta data (4k) + gds-gfx (4k)*/
> +       size = PAGE_SIZE + adev->gds.mem.gfx_partition_size;
> +
> +       r = amdgpu_bo_create(adev, size, PAGE_SIZE, true,
> +                            AMDGPU_GEM_DOMAIN_GTT,
> +                            AMDGPU_GEM_CREATE_CPU_GTT_USWC,
> +                            NULL, NULL, &sa->robj);
> +       if (r) {
> +               dev_err(adev->dev, "(%d) failed to allocate csa bo\n", r);
> +               return r;
> +       }
> +
> +       r = amdgpu_bo_reserve(sa->robj, true);
> +       if (unlikely(r != 0))
> +               goto error_free;
> +
> +       r = amdgpu_bo_pin(sa->robj, AMDGPU_GEM_DOMAIN_GTT, &sa->gpu_addr);
> +       if (r)
> +               goto error_unreserve;
> +
> +       r = amdgpu_bo_kmap(sa->robj, &ptr);
> +       if (r)
> +               goto error_unpin;
> +
> +       memset(ptr, 0, size);
> +       amdgpu_bo_unreserve(sa->robj);
> +
> +       sa->size = size;
> +       sa->gds_addr = sa->gpu_addr + PAGE_SIZE;
> +
> +       return 0;
> +
> +error_unpin:
> +       amdgpu_bo_unpin(sa->robj);
> +error_unreserve:
> +       amdgpu_bo_unreserve(sa->robj);
> +error_free:
> +       amdgpu_bo_unref(&sa->robj);
> +       return r;
> +}
> +
> +int xgpu_allocate_csa(struct amd_xgpu *xgpu)
> +{
> +       struct amdgpu_device *adev = xgpu->adev;
> +       struct amd_xgpu_csa *sa = &xgpu->sa;
> +
> +       return xgpu_init_csa(adev, sa);
> +}
> +
> +void xgpu_destroy_csa(struct amd_xgpu_csa *sa)
> +{
> +       amdgpu_bo_reserve(sa->robj, true);
> +       amdgpu_bo_unpin(sa->robj);
> +       amdgpu_bo_unreserve(sa->robj);
> +       amdgpu_bo_unref(&sa->robj);
> +       sa->gpu_addr = 0;
> +       sa->gds_addr = 0;
> +}
> +
> +static int xgpu_vm_map_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> +                          struct amd_xgpu_csa *sa)
> +{
> +       int r;
> +       uint64_t vaddr;
> +       struct ww_acquire_ctx ticket;
> +       struct list_head list, duplicates;
> +       struct amdgpu_bo_list_entry pd;
> +       struct amdgpu_bo_va *bo_va;
> +
> +       INIT_LIST_HEAD(&list);
> +       INIT_LIST_HEAD(&duplicates);
> +       INIT_LIST_HEAD(&vm->csa.tv.head);
> +       vm->csa.tv.bo = &sa->robj->tbo;
> +       vm->csa.tv.shared = true;
> +
> +       list_add(&vm->csa.tv.head, &list);
> +       amdgpu_vm_get_pd_bo(vm, &list, &pd);
> +
> +       spin_lock(&vm->status_lock);
> +       vm->csa.reserved_top -= sa->size;
> +       vaddr = vm->csa.reserved_top;
> +       spin_unlock(&vm->status_lock);
> +
> +       r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
> +       if (r) {
> +               DRM_ERROR("failed to reserve global CSA buffer(%d).\n", r);
> +               return r;
> +       }
> +
> +       bo_va = amdgpu_vm_bo_add(adev, vm, sa->robj);
> +       if (!bo_va) {
> +               DRM_ERROR("failed to create bo_va for global CSA buffer.\n");
> +               return -ENOMEM;
> +       }
> +
> +
> +       r = amdgpu_vm_bo_map(adev, bo_va, vaddr, 0, sa->size,
> +                            AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
> +                            AMDGPU_PTE_EXECUTABLE);
> +       if (r) {
> +               DRM_ERROR("failed to do bo_map on global CSA buffer(%d).\n", r);
> +               amdgpu_vm_bo_rmv(adev, bo_va);
> +               ttm_eu_backoff_reservation(&ticket, &list);
> +               kfree(bo_va);
> +               return r;
> +       }
> +
> +       ttm_eu_backoff_reservation(&ticket, &list);
> +       amdgpu_gem_va_update_vm(adev, bo_va, AMDGPU_VA_OP_MAP);
> +
> +       vm->csa.va = bo_va;
> +       vm->csa.csa_addr = vaddr;
> +       vm->csa.gds_addr = vaddr + PAGE_SIZE;
> +
> +       return 0;
> +}
> +
> +int amdgpu_vm_map_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm)
> +{
> +       struct amd_xgpu *xgpu = (struct amd_xgpu *)adev->priv_data;
> +       struct amd_xgpu_csa *sa = NULL;
> +
> +       if (!xgpu)
> +               return 0;
> +
> +       sa = &xgpu->sa;
> +
> +       return xgpu_vm_map_csa(adev, vm, sa);
> +}
> +
> +static void xgpu_vm_unmap_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> +                             struct amd_xgpu_csa *sa)
> +{
> +       int r;
> +       struct ww_acquire_ctx ticket;
> +       struct list_head list, duplicates;
> +       struct amdgpu_bo_list_entry pd;
> +
> +       if (!vm->csa.va)
> +               return;
> +
> +       INIT_LIST_HEAD(&list);
> +       INIT_LIST_HEAD(&duplicates);
> +       list_add(&vm->csa.tv.head, &list);
> +       amdgpu_vm_get_pd_bo(vm, &list, &pd);
> +
> +       r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
> +       if (r) {
> +               DRM_ERROR("failed to reserve global CSA buffer(%d).\n", r);
> +               return;
> +       }
> +
> +       amdgpu_vm_bo_rmv(adev, vm->csa.va);
> +       /* maybe we don't need to do real clearing for the vm will die soon */
> +       r = amdgpu_vm_clear_freed(adev, vm);
> +       if (r) {
> +               DRM_ERROR("failed to clear global CSA bo(%d).\n", r);
> +               return;
> +       }
> +
> +       ttm_eu_backoff_reservation(&ticket, &list);
> +       vm->csa.va = NULL;
> +       vm->csa.csa_addr = vm->csa.gds_addr = 0;
> +}
> +
> +void amdgpu_vm_unmap_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm)
> +{
> +       struct amd_xgpu *xgpu = (struct amd_xgpu *)adev->priv_data;
> +       struct amd_xgpu_csa *sa = NULL;
> +
> +       if (!xgpu)
> +               return;
> +
> +       sa = &xgpu->sa;
> +       xgpu_vm_unmap_csa(adev, vm, sa);
> +}
> --
> 2.7.4
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
the CSA is used for world switch, and each amdgpu device should have one and only one CSA,

and this CSA will pined, and mapped to each virtual memory /process.


CP/RLCV will use this CSA buffer when preemption occurred, and will write some hardware status into this CSA buffer, within the current IB's context (that's why need do mapping for each virtual memory on CSA)


BR Monk

________________________________
发件人: Alex Deucher <alexdeucher@gmail.com>
发送时间: 2016年12月20日 7:20:09
收件人: Yu, Xiangliang
抄送: amd-gfx list; dl.SRDC_SW_GPUVirtualization; Liu, Monk
主题: Re: [PATCH 11/23] drm/amdgpu: implement context save area(CSA) feature

On Sat, Dec 17, 2016 at 11:16 AM, Xiangliang Yu <Xiangliang.Yu@amd.com> wrote:
> CSA is need by world switch. This patch implement CSA feature and

> bind it to each VM, so hardware can save the state into the area

> and restore it when running again.

>

> Signed-off-by: Monk Liu <Monk.Liu@amd.com>

> Signed-off-by: Xiangliang Yu <Xiangliang.Yu@amd.com>


Isn't the CSA actually for preemption?  Preemption is useful outside
of the VF case as well so it should be untangled from the mxgpu code
so it can be utilized independently.

Alex

> ---

>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h |  14 +++

>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   |   8 ++

>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h   |   4 +

>  drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h    |  12 ++

>  drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c    | 209 +++++++++++++++++++++++++++++++

>  5 files changed, 247 insertions(+)

>  create mode 100644 drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c

>

> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h

> index 8ee70f8..dff1248 100644

> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h

> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h

> @@ -24,6 +24,8 @@

>  #ifndef AMDGPU_VIRT_H

>  #define AMDGPU_VIRT_H

>

> +struct amdgpu_vm;

> +

>  #define AMDGPU_SRIOV_CAPS_SRIOV_VBIOS  (1 << 0) /* vBIOS is sr-iov ready */

>  #define AMDGPU_SRIOV_CAPS_ENABLE_IOV   (1 << 1) /* sr-iov is enabled on this GPU */

>  #define AMDGPU_SRIOV_CAPS_IS_VF        (1 << 2) /* this GPU is a virtual function */

> @@ -33,6 +35,14 @@ struct amdgpu_virtualization {

>         uint32_t virtual_caps;

>  };

>

> +struct amdgpu_csa {

> +       struct amdgpu_bo_va             *va;

> +       struct ttm_validate_buffer      tv;

> +       uint64_t                        reserved_top;

> +       uint64_t                        csa_addr;

> +       uint64_t                        gds_addr;

> +};

> +

>  #define amdgpu_sriov_enabled(adev) \

>  ((adev)->virtualization.virtual_caps & AMDGPU_SRIOV_CAPS_ENABLE_IOV)

>

> @@ -55,4 +65,8 @@ static inline bool is_virtual_machine(void)

>  }

>

>  int amd_xgpu_set_ip_blocks(struct amdgpu_device *adev);

> +

> +/* Context Save Area functions */

> +int amdgpu_vm_map_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm);

> +void amdgpu_vm_unmap_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm);

>  #endif

> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

> index d05546e..98540d9 100644

> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

> @@ -1539,6 +1539,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)

>         pd_size = amdgpu_vm_directory_size(adev);

>         pd_entries = amdgpu_vm_num_pdes(adev);

>

> +       vm->csa.reserved_top = AMDGPU_VA_RESERVED_SIZE;

> +

>         /* allocate page table array */

>         vm->page_tables = drm_calloc_large(pd_entries, sizeof(struct amdgpu_vm_pt));

>         if (vm->page_tables == NULL) {

> @@ -1576,6 +1578,10 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)

>         vm->last_eviction_counter = atomic64_read(&adev->num_evictions);

>         amdgpu_bo_unreserve(vm->page_directory);

>

> +       r = amdgpu_vm_map_csa(adev, vm);

> +       if (r)

> +               goto error_free_page_directory;

> +

>         return 0;

>

>  error_free_page_directory:

> @@ -1606,6 +1612,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)

>         struct amdgpu_bo_va_mapping *mapping, *tmp;

>         int i;

>

> +       amdgpu_vm_unmap_csa(adev, vm);

> +

>         amd_sched_entity_fini(vm->entity.sched, &vm->entity);

>

>         if (!RB_EMPTY_ROOT(&vm->va)) {

> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

> index 42a629b..d90630a 100644

> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

> @@ -29,6 +29,7 @@

>  #include "gpu_scheduler.h"

>  #include "amdgpu_sync.h"

>  #include "amdgpu_ring.h"

> +#include "amdgpu_virt.h"

>

>  struct amdgpu_bo_va;

>  struct amdgpu_job;

> @@ -109,6 +110,9 @@ struct amdgpu_vm {

>         /* Scheduler entity for page table updates */

>         struct amd_sched_entity entity;

>

> +       /* Context Save Area */

> +       struct amdgpu_csa       csa;

> +

>         /* client id */

>         u64                     client_id;

>  };

> diff --git a/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h b/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h

> index 6ab13bc..a25e07f 100644

> --- a/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h

> +++ b/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h

> @@ -26,12 +26,24 @@

>

>  #include "amdgpu.h"

>

> +/* xgpu structures */

> +struct amd_xgpu_csa {

> +       struct amdgpu_bo            *robj;

> +       uint64_t                    gpu_addr;

> +       uint64_t                    gds_addr;

> +       int32_t                     size;

> +};

> +

>  struct amd_xgpu {

>         struct amdgpu_device    *adev;

>         struct mutex            lock;

> +       struct amd_xgpu_csa     sa;

>         u32                     reg_val_offs;

>  };

>

>  extern int amd_xgpu_alloc(struct amdgpu_device *adev);

>  extern void amd_xgpu_free(struct amd_xgpu *xgpu);

> +

> +extern int xgpu_allocate_csa(struct amd_xgpu *xgpu);

> +extern void xgpu_destroy_csa(struct amd_xgpu_csa *csa);

>  #endif

> diff --git a/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c b/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c

> new file mode 100644

> index 0000000..246a747

> --- /dev/null

> +++ b/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c

> @@ -0,0 +1,209 @@

> +/*

> + * Copyright 2016 Advanced Micro Devices, Inc.

> + *

> + * Permission is hereby granted, free of charge, to any person obtaining a

> + * copy of this software and associated documentation files (the "Software"),

> + * to deal in the Software without restriction, including without limitation

> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,

> + * and/or sell copies of the Software, and to permit persons to whom the

> + * Software is furnished to do so, subject to the following conditions:

> + *

> + * The above copyright notice and this permission notice shall be included in

> + * all copies or substantial portions of the Software.

> + *

> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL

> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR

> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,

> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR

> + * OTHER DEALINGS IN THE SOFTWARE.

> + *

> + * Authors: Xiangliang.Yu@amd.com

> + *         Monk.Liu@amd.com

> + *

> + */

> +#include "amd_mxgpu.h"

> +#include "vid.h"

> +

> +static int xgpu_init_csa(struct amdgpu_device *adev, struct amd_xgpu_csa *sa)

> +{

> +       int r, size;

> +       void *ptr;

> +

> +       /* meta data (4k) + gds-gfx (4k)*/

> +       size = PAGE_SIZE + adev->gds.mem.gfx_partition_size;

> +

> +       r = amdgpu_bo_create(adev, size, PAGE_SIZE, true,

> +                            AMDGPU_GEM_DOMAIN_GTT,

> +                            AMDGPU_GEM_CREATE_CPU_GTT_USWC,

> +                            NULL, NULL, &sa->robj);

> +       if (r) {

> +               dev_err(adev->dev, "(%d) failed to allocate csa bo\n", r);

> +               return r;

> +       }

> +

> +       r = amdgpu_bo_reserve(sa->robj, true);

> +       if (unlikely(r != 0))

> +               goto error_free;

> +

> +       r = amdgpu_bo_pin(sa->robj, AMDGPU_GEM_DOMAIN_GTT, &sa->gpu_addr);

> +       if (r)

> +               goto error_unreserve;

> +

> +       r = amdgpu_bo_kmap(sa->robj, &ptr);

> +       if (r)

> +               goto error_unpin;

> +

> +       memset(ptr, 0, size);

> +       amdgpu_bo_unreserve(sa->robj);

> +

> +       sa->size = size;

> +       sa->gds_addr = sa->gpu_addr + PAGE_SIZE;

> +

> +       return 0;

> +

> +error_unpin:

> +       amdgpu_bo_unpin(sa->robj);

> +error_unreserve:

> +       amdgpu_bo_unreserve(sa->robj);

> +error_free:

> +       amdgpu_bo_unref(&sa->robj);

> +       return r;

> +}

> +

> +int xgpu_allocate_csa(struct amd_xgpu *xgpu)

> +{

> +       struct amdgpu_device *adev = xgpu->adev;

> +       struct amd_xgpu_csa *sa = &xgpu->sa;

> +

> +       return xgpu_init_csa(adev, sa);

> +}

> +

> +void xgpu_destroy_csa(struct amd_xgpu_csa *sa)

> +{

> +       amdgpu_bo_reserve(sa->robj, true);

> +       amdgpu_bo_unpin(sa->robj);

> +       amdgpu_bo_unreserve(sa->robj);

> +       amdgpu_bo_unref(&sa->robj);

> +       sa->gpu_addr = 0;

> +       sa->gds_addr = 0;

> +}

> +

> +static int xgpu_vm_map_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,

> +                          struct amd_xgpu_csa *sa)

> +{

> +       int r;

> +       uint64_t vaddr;

> +       struct ww_acquire_ctx ticket;

> +       struct list_head list, duplicates;

> +       struct amdgpu_bo_list_entry pd;

> +       struct amdgpu_bo_va *bo_va;

> +

> +       INIT_LIST_HEAD(&list);

> +       INIT_LIST_HEAD(&duplicates);

> +       INIT_LIST_HEAD(&vm->csa.tv.head);

> +       vm->csa.tv.bo = &sa->robj->tbo;

> +       vm->csa.tv.shared = true;

> +

> +       list_add(&vm->csa.tv.head, &list);

> +       amdgpu_vm_get_pd_bo(vm, &list, &pd);

> +

> +       spin_lock(&vm->status_lock);

> +       vm->csa.reserved_top -= sa->size;

> +       vaddr = vm->csa.reserved_top;

> +       spin_unlock(&vm->status_lock);

> +

> +       r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);

> +       if (r) {

> +               DRM_ERROR("failed to reserve global CSA buffer(%d).\n", r);

> +               return r;

> +       }

> +

> +       bo_va = amdgpu_vm_bo_add(adev, vm, sa->robj);

> +       if (!bo_va) {

> +               DRM_ERROR("failed to create bo_va for global CSA buffer.\n");

> +               return -ENOMEM;

> +       }

> +

> +

> +       r = amdgpu_vm_bo_map(adev, bo_va, vaddr, 0, sa->size,

> +                            AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |

> +                            AMDGPU_PTE_EXECUTABLE);

> +       if (r) {

> +               DRM_ERROR("failed to do bo_map on global CSA buffer(%d).\n", r);

> +               amdgpu_vm_bo_rmv(adev, bo_va);

> +               ttm_eu_backoff_reservation(&ticket, &list);

> +               kfree(bo_va);

> +               return r;

> +       }

> +

> +       ttm_eu_backoff_reservation(&ticket, &list);

> +       amdgpu_gem_va_update_vm(adev, bo_va, AMDGPU_VA_OP_MAP);

> +

> +       vm->csa.va = bo_va;

> +       vm->csa.csa_addr = vaddr;

> +       vm->csa.gds_addr = vaddr + PAGE_SIZE;

> +

> +       return 0;

> +}

> +

> +int amdgpu_vm_map_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm)

> +{

> +       struct amd_xgpu *xgpu = (struct amd_xgpu *)adev->priv_data;

> +       struct amd_xgpu_csa *sa = NULL;

> +

> +       if (!xgpu)

> +               return 0;

> +

> +       sa = &xgpu->sa;

> +

> +       return xgpu_vm_map_csa(adev, vm, sa);

> +}

> +

> +static void xgpu_vm_unmap_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,

> +                             struct amd_xgpu_csa *sa)

> +{

> +       int r;

> +       struct ww_acquire_ctx ticket;

> +       struct list_head list, duplicates;

> +       struct amdgpu_bo_list_entry pd;

> +

> +       if (!vm->csa.va)

> +               return;

> +

> +       INIT_LIST_HEAD(&list);

> +       INIT_LIST_HEAD(&duplicates);

> +       list_add(&vm->csa.tv.head, &list);

> +       amdgpu_vm_get_pd_bo(vm, &list, &pd);

> +

> +       r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);

> +       if (r) {

> +               DRM_ERROR("failed to reserve global CSA buffer(%d).\n", r);

> +               return;

> +       }

> +

> +       amdgpu_vm_bo_rmv(adev, vm->csa.va);

> +       /* maybe we don't need to do real clearing for the vm will die soon */

> +       r = amdgpu_vm_clear_freed(adev, vm);

> +       if (r) {

> +               DRM_ERROR("failed to clear global CSA bo(%d).\n", r);

> +               return;

> +       }

> +

> +       ttm_eu_backoff_reservation(&ticket, &list);

> +       vm->csa.va = NULL;

> +       vm->csa.csa_addr = vm->csa.gds_addr = 0;

> +}

> +

> +void amdgpu_vm_unmap_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm)

> +{

> +       struct amd_xgpu *xgpu = (struct amd_xgpu *)adev->priv_data;

> +       struct amd_xgpu_csa *sa = NULL;

> +

> +       if (!xgpu)

> +               return;

> +

> +       sa = &xgpu->sa;

> +       xgpu_vm_unmap_csa(adev, vm, sa);

> +}

> --

> 2.7.4

>

> _______________________________________________

> amd-gfx mailing list

> amd-gfx@lists.freedesktop.org

> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Thank monk’s detail expanation.
And I think this patch is only support virtualization world switch, not touch whole amdpgu preemption.

Thanks!
Xiangliang Yu

From: Liu, Monk

Sent: Tuesday, December 20, 2016 11:58 AM
To: Alex Deucher <alexdeucher@gmail.com>; Yu, Xiangliang <Xiangliang.Yu@amd.com>
Cc: amd-gfx list <amd-gfx@lists.freedesktop.org>; dl.SRDC_SW_GPUVirtualization <dl.SRDC_SW_GPUVirtualization@amd.com>
Subject: 答复: [PATCH 11/23] drm/amdgpu: implement context save area(CSA) feature


the CSA is used for world switch, and each amdgpu device should have one and only one CSA,

and this CSA will pined, and mapped to each virtual memory /process.



CP/RLCV will use this CSA buffer when preemption occurred, and will write some hardware status into this CSA buffer, within the current IB's context (that's why need do mapping for each virtual memory on CSA)



BR Monk

________________________________
发件人: Alex Deucher <alexdeucher@gmail.com<mailto:alexdeucher@gmail.com>>
发送时间: 2016年12月20日 7:20:09
收件人: Yu, Xiangliang
抄送: amd-gfx list; dl.SRDC_SW_GPUVirtualization; Liu, Monk
主题: Re: [PATCH 11/23] drm/amdgpu: implement context save area(CSA) feature

On Sat, Dec 17, 2016 at 11:16 AM, Xiangliang Yu <Xiangliang.Yu@amd.com<mailto:Xiangliang.Yu@amd.com>> wrote:
> CSA is need by world switch. This patch implement CSA feature and

> bind it to each VM, so hardware can save the state into the area

> and restore it when running again.

>

> Signed-off-by: Monk Liu <Monk.Liu@amd.com<mailto:Monk.Liu@amd.com>>

> Signed-off-by: Xiangliang Yu <Xiangliang.Yu@amd.com<mailto:Xiangliang.Yu@amd.com>>


Isn't the CSA actually for preemption?  Preemption is useful outside
of the VF case as well so it should be untangled from the mxgpu code
so it can be utilized independently.

Alex

> ---

>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h |  14 +++

>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   |   8 ++

>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h   |   4 +

>  drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h    |  12 ++

>  drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c    | 209 +++++++++++++++++++++++++++++++

>  5 files changed, 247 insertions(+)

>  create mode 100644 drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c

>

> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h

> index 8ee70f8..dff1248 100644

> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h

> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h

> @@ -24,6 +24,8 @@

>  #ifndef AMDGPU_VIRT_H

>  #define AMDGPU_VIRT_H

>

> +struct amdgpu_vm;

> +

>  #define AMDGPU_SRIOV_CAPS_SRIOV_VBIOS  (1 << 0) /* vBIOS is sr-iov ready */

>  #define AMDGPU_SRIOV_CAPS_ENABLE_IOV   (1 << 1) /* sr-iov is enabled on this GPU */

>  #define AMDGPU_SRIOV_CAPS_IS_VF        (1 << 2) /* this GPU is a virtual function */

> @@ -33,6 +35,14 @@ struct amdgpu_virtualization {

>         uint32_t virtual_caps;

>  };

>

> +struct amdgpu_csa {

> +       struct amdgpu_bo_va             *va;

> +       struct ttm_validate_buffer      tv;

> +       uint64_t                        reserved_top;

> +       uint64_t                        csa_addr;

> +       uint64_t                        gds_addr;

> +};

> +

>  #define amdgpu_sriov_enabled(adev) \

>  ((adev)->virtualization.virtual_caps & AMDGPU_SRIOV_CAPS_ENABLE_IOV)

>

> @@ -55,4 +65,8 @@ static inline bool is_virtual_machine(void)

>  }

>

>  int amd_xgpu_set_ip_blocks(struct amdgpu_device *adev);

> +

> +/* Context Save Area functions */

> +int amdgpu_vm_map_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm);

> +void amdgpu_vm_unmap_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm);

>  #endif

> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

> index d05546e..98540d9 100644

> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

> @@ -1539,6 +1539,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)

>         pd_size = amdgpu_vm_directory_size(adev);

>         pd_entries = amdgpu_vm_num_pdes(adev);

>

> +       vm->csa.reserved_top = AMDGPU_VA_RESERVED_SIZE;

> +

>         /* allocate page table array */

>         vm->page_tables = drm_calloc_large(pd_entries, sizeof(struct amdgpu_vm_pt));

>         if (vm->page_tables == NULL) {

> @@ -1576,6 +1578,10 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)

>         vm->last_eviction_counter = atomic64_read(&adev->num_evictions);

>         amdgpu_bo_unreserve(vm->page_directory);

>

> +       r = amdgpu_vm_map_csa(adev, vm);

> +       if (r)

> +               goto error_free_page_directory;

> +

>         return 0;

>

>  error_free_page_directory:

> @@ -1606,6 +1612,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)

>         struct amdgpu_bo_va_mapping *mapping, *tmp;

>         int i;

>

> +       amdgpu_vm_unmap_csa(adev, vm);

> +

>         amd_sched_entity_fini(vm->entity.sched, &vm->entity);

>

>         if (!RB_EMPTY_ROOT(&vm->va)) {

> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

> index 42a629b..d90630a 100644

> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

> @@ -29,6 +29,7 @@

>  #include "gpu_scheduler.h"

>  #include "amdgpu_sync.h"

>  #include "amdgpu_ring.h"

> +#include "amdgpu_virt.h"

>

>  struct amdgpu_bo_va;

>  struct amdgpu_job;

> @@ -109,6 +110,9 @@ struct amdgpu_vm {

>         /* Scheduler entity for page table updates */

>         struct amd_sched_entity entity;

>

> +       /* Context Save Area */

> +       struct amdgpu_csa       csa;

> +

>         /* client id */

>         u64                     client_id;

>  };

> diff --git a/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h b/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h

> index 6ab13bc..a25e07f 100644

> --- a/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h

> +++ b/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h

> @@ -26,12 +26,24 @@

>

>  #include "amdgpu.h"

>

> +/* xgpu structures */

> +struct amd_xgpu_csa {

> +       struct amdgpu_bo            *robj;

> +       uint64_t                    gpu_addr;

> +       uint64_t                    gds_addr;

> +       int32_t                     size;

> +};

> +

>  struct amd_xgpu {

>         struct amdgpu_device    *adev;

>         struct mutex            lock;

> +       struct amd_xgpu_csa     sa;

>         u32                     reg_val_offs;

>  };

>

>  extern int amd_xgpu_alloc(struct amdgpu_device *adev);

>  extern void amd_xgpu_free(struct amd_xgpu *xgpu);

> +

> +extern int xgpu_allocate_csa(struct amd_xgpu *xgpu);

> +extern void xgpu_destroy_csa(struct amd_xgpu_csa *csa);

>  #endif

> diff --git a/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c b/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c

> new file mode 100644

> index 0000000..246a747

> --- /dev/null

> +++ b/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c

> @@ -0,0 +1,209 @@

> +/*

> + * Copyright 2016 Advanced Micro Devices, Inc.

> + *

> + * Permission is hereby granted, free of charge, to any person obtaining a

> + * copy of this software and associated documentation files (the "Software"),

> + * to deal in the Software without restriction, including without limitation

> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,

> + * and/or sell copies of the Software, and to permit persons to whom the

> + * Software is furnished to do so, subject to the following conditions:

> + *

> + * The above copyright notice and this permission notice shall be included in

> + * all copies or substantial portions of the Software.

> + *

> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL

> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR

> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,

> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR

> + * OTHER DEALINGS IN THE SOFTWARE.

> + *

> + * Authors: Xiangliang.Yu@amd.com<mailto:Xiangliang.Yu@amd.com>

> + *         Monk.Liu@amd.com<mailto:Monk.Liu@amd.com>

> + *

> + */

> +#include "amd_mxgpu.h"

> +#include "vid.h"

> +

> +static int xgpu_init_csa(struct amdgpu_device *adev, struct amd_xgpu_csa *sa)

> +{

> +       int r, size;

> +       void *ptr;

> +

> +       /* meta data (4k) + gds-gfx (4k)*/

> +       size = PAGE_SIZE + adev->gds.mem.gfx_partition_size;

> +

> +       r = amdgpu_bo_create(adev, size, PAGE_SIZE, true,

> +                            AMDGPU_GEM_DOMAIN_GTT,

> +                            AMDGPU_GEM_CREATE_CPU_GTT_USWC,

> +                            NULL, NULL, &sa->robj);

> +       if (r) {

> +               dev_err(adev->dev, "(%d) failed to allocate csa bo\n", r);

> +               return r;

> +       }

> +

> +       r = amdgpu_bo_reserve(sa->robj, true);

> +       if (unlikely(r != 0))

> +               goto error_free;

> +

> +       r = amdgpu_bo_pin(sa->robj, AMDGPU_GEM_DOMAIN_GTT, &sa->gpu_addr);

> +       if (r)

> +               goto error_unreserve;

> +

> +       r = amdgpu_bo_kmap(sa->robj, &ptr);

> +       if (r)

> +               goto error_unpin;

> +

> +       memset(ptr, 0, size);

> +       amdgpu_bo_unreserve(sa->robj);

> +

> +       sa->size = size;

> +       sa->gds_addr = sa->gpu_addr + PAGE_SIZE;

> +

> +       return 0;

> +

> +error_unpin:

> +       amdgpu_bo_unpin(sa->robj);

> +error_unreserve:

> +       amdgpu_bo_unreserve(sa->robj);

> +error_free:

> +       amdgpu_bo_unref(&sa->robj);

> +       return r;

> +}

> +

> +int xgpu_allocate_csa(struct amd_xgpu *xgpu)

> +{

> +       struct amdgpu_device *adev = xgpu->adev;

> +       struct amd_xgpu_csa *sa = &xgpu->sa;

> +

> +       return xgpu_init_csa(adev, sa);

> +}

> +

> +void xgpu_destroy_csa(struct amd_xgpu_csa *sa)

> +{

> +       amdgpu_bo_reserve(sa->robj, true);

> +       amdgpu_bo_unpin(sa->robj);

> +       amdgpu_bo_unreserve(sa->robj);

> +       amdgpu_bo_unref(&sa->robj);

> +       sa->gpu_addr = 0;

> +       sa->gds_addr = 0;

> +}

> +

> +static int xgpu_vm_map_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,

> +                          struct amd_xgpu_csa *sa)

> +{

> +       int r;

> +       uint64_t vaddr;

> +       struct ww_acquire_ctx ticket;

> +       struct list_head list, duplicates;

> +       struct amdgpu_bo_list_entry pd;

> +       struct amdgpu_bo_va *bo_va;

> +

> +       INIT_LIST_HEAD(&list);

> +       INIT_LIST_HEAD(&duplicates);

> +       INIT_LIST_HEAD(&vm->csa.tv.head);

> +       vm->csa.tv.bo = &sa->robj->tbo;

> +       vm->csa.tv.shared = true;

> +

> +       list_add(&vm->csa.tv.head, &list);

> +       amdgpu_vm_get_pd_bo(vm, &list, &pd);

> +

> +       spin_lock(&vm->status_lock);

> +       vm->csa.reserved_top -= sa->size;

> +       vaddr = vm->csa.reserved_top;

> +       spin_unlock(&vm->status_lock);

> +

> +       r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);

> +       if (r) {

> +               DRM_ERROR("failed to reserve global CSA buffer(%d).\n", r);

> +               return r;

> +       }

> +

> +       bo_va = amdgpu_vm_bo_add(adev, vm, sa->robj);

> +       if (!bo_va) {

> +               DRM_ERROR("failed to create bo_va for global CSA buffer.\n");

> +               return -ENOMEM;

> +       }

> +

> +

> +       r = amdgpu_vm_bo_map(adev, bo_va, vaddr, 0, sa->size,

> +                            AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |

> +                            AMDGPU_PTE_EXECUTABLE);

> +       if (r) {

> +               DRM_ERROR("failed to do bo_map on global CSA buffer(%d).\n", r);

> +               amdgpu_vm_bo_rmv(adev, bo_va);

> +               ttm_eu_backoff_reservation(&ticket, &list);

> +               kfree(bo_va);

> +               return r;

> +       }

> +

> +       ttm_eu_backoff_reservation(&ticket, &list);

> +       amdgpu_gem_va_update_vm(adev, bo_va, AMDGPU_VA_OP_MAP);

> +

> +       vm->csa.va = bo_va;

> +       vm->csa.csa_addr = vaddr;

> +       vm->csa.gds_addr = vaddr + PAGE_SIZE;

> +

> +       return 0;

> +}

> +

> +int amdgpu_vm_map_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm)

> +{

> +       struct amd_xgpu *xgpu = (struct amd_xgpu *)adev->priv_data;

> +       struct amd_xgpu_csa *sa = NULL;

> +

> +       if (!xgpu)

> +               return 0;

> +

> +       sa = &xgpu->sa;

> +

> +       return xgpu_vm_map_csa(adev, vm, sa);

> +}

> +

> +static void xgpu_vm_unmap_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,

> +                             struct amd_xgpu_csa *sa)

> +{

> +       int r;

> +       struct ww_acquire_ctx ticket;

> +       struct list_head list, duplicates;

> +       struct amdgpu_bo_list_entry pd;

> +

> +       if (!vm->csa.va)

> +               return;

> +

> +       INIT_LIST_HEAD(&list);

> +       INIT_LIST_HEAD(&duplicates);

> +       list_add(&vm->csa.tv.head, &list);

> +       amdgpu_vm_get_pd_bo(vm, &list, &pd);

> +

> +       r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);

> +       if (r) {

> +               DRM_ERROR("failed to reserve global CSA buffer(%d).\n", r);

> +               return;

> +       }

> +

> +       amdgpu_vm_bo_rmv(adev, vm->csa.va);

> +       /* maybe we don't need to do real clearing for the vm will die soon */

> +       r = amdgpu_vm_clear_freed(adev, vm);

> +       if (r) {

> +               DRM_ERROR("failed to clear global CSA bo(%d).\n", r);

> +               return;

> +       }

> +

> +       ttm_eu_backoff_reservation(&ticket, &list);

> +       vm->csa.va = NULL;

> +       vm->csa.csa_addr = vm->csa.gds_addr = 0;

> +}

> +

> +void amdgpu_vm_unmap_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm)

> +{

> +       struct amd_xgpu *xgpu = (struct amd_xgpu *)adev->priv_data;

> +       struct amd_xgpu_csa *sa = NULL;

> +

> +       if (!xgpu)

> +               return;

> +

> +       sa = &xgpu->sa;

> +       xgpu_vm_unmap_csa(adev, vm, sa);

> +}

> --

> 2.7.4

>

> _______________________________________________

> amd-gfx mailing list

> amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>

> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
NAK on that whole approach. We previously had similar handling in radeon 
for the temporary IB buffer and it was a pain to support correctly.

The main problem is that you might then have a mismatch between the 
housekeeping structures in the VM and what is really in the page tables 
/ page directory.

Additional a concrete problem is that you simply assume that the PD and 
PTs are present when the VM is created, but that might not be correct 
under memory pressure. In this case the code below will just fail miserable.

I suggest that you go a step back and fully implement the support for 
the CSA for each context and not just a quick hack like this. If I 
remember our internal discussion correctly Monk already had promising 
locking patches for this for the general preemption support.

Regards,
Christian.

Am 20.12.2016 um 06:43 schrieb Yu, Xiangliang:
>
> Thank monk’s detail expanation.
>
> And I think this patch is only support virtualization world switch, 
> not touch whole amdpgu preemption.
>
> Thanks!
>
> Xiangliang Yu
>
> *From:*Liu, Monk
> *Sent:* Tuesday, December 20, 2016 11:58 AM
> *To:* Alex Deucher <alexdeucher@gmail.com>; Yu, Xiangliang 
> <Xiangliang.Yu@amd.com>
> *Cc:* amd-gfx list <amd-gfx@lists.freedesktop.org>; 
> dl.SRDC_SW_GPUVirtualization <dl.SRDC_SW_GPUVirtualization@amd.com>
> *Subject:* 答复: [PATCH 11/23] drm/amdgpu: implement context save 
> area(CSA) feature
>
> the CSA is used for world switch, and each amdgpu device should have 
> one and only one CSA,
>
> and this CSA will pined, and mapped to each virtual memory /process.
>
> CP/RLCV will use this CSA buffer when preemption occurred, and will 
> write some hardware status into this CSA buffer, within the current 
> IB's context (that's why need do mapping for each virtual memory on CSA)
>
> BR Monk
>
> ------------------------------------------------------------------------
>
> *发件人**:*Alex Deucher <alexdeucher@gmail.com 
> <mailto:alexdeucher@gmail.com>>
> *发送时间**:*2016年12月20日7:20:09
> *收件人**:*Yu, Xiangliang
> *抄送**:*amd-gfx list; dl.SRDC_SW_GPUVirtualization; Liu, Monk
> *主题**:*Re: [PATCH 11/23] drm/amdgpu: implement context save area(CSA) 
> feature
>
> On Sat, Dec 17, 2016 at 11:16 AM, Xiangliang Yu <Xiangliang.Yu@amd.com 
> <mailto:Xiangliang.Yu@amd.com>> wrote:
> > CSA is need by world switch. This patch implement CSA feature and
> > bind it to each VM, so hardware can save the state into the area
> > and restore it when running again.
> >
> > Signed-off-by: Monk Liu <Monk.Liu@amd.com <mailto:Monk.Liu@amd.com>>
> > Signed-off-by: Xiangliang Yu <Xiangliang.Yu@amd.com 
> <mailto:Xiangliang.Yu@amd.com>>
>
> Isn't the CSA actually for preemption?  Preemption is useful outside
> of the VF case as well so it should be untangled from the mxgpu code
> so it can be utilized independently.
>
> Alex
>
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h |  14 +++
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   |   8 ++
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h   |   4 +
> >  drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h    |  12 ++
> >  drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c    | 209 
> +++++++++++++++++++++++++++++++
> >  5 files changed, 247 insertions(+)
> >  create mode 100644 drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> > index 8ee70f8..dff1248 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> > @@ -24,6 +24,8 @@
> >  #ifndef AMDGPU_VIRT_H
> >  #define AMDGPU_VIRT_H
> >
> > +struct amdgpu_vm;
> > +
> >  #define AMDGPU_SRIOV_CAPS_SRIOV_VBIOS  (1 << 0) /* vBIOS is sr-iov 
> ready */
> >  #define AMDGPU_SRIOV_CAPS_ENABLE_IOV   (1 << 1) /* sr-iov is 
> enabled on this GPU */
> >  #define AMDGPU_SRIOV_CAPS_IS_VF        (1 << 2) /* this GPU is a 
> virtual function */
> > @@ -33,6 +35,14 @@ struct amdgpu_virtualization {
> >         uint32_t virtual_caps;
> >  };
> >
> > +struct amdgpu_csa {
> > +       struct amdgpu_bo_va             *va;
> > +       struct ttm_validate_buffer      tv;
> > +       uint64_t reserved_top;
> > +       uint64_t                        csa_addr;
> > +       uint64_t                        gds_addr;
> > +};
> > +
> >  #define amdgpu_sriov_enabled(adev) \
> >  ((adev)->virtualization.virtual_caps & AMDGPU_SRIOV_CAPS_ENABLE_IOV)
> >
> > @@ -55,4 +65,8 @@ static inline bool is_virtual_machine(void)
> >  }
> >
> >  int amd_xgpu_set_ip_blocks(struct amdgpu_device *adev);
> > +
> > +/* Context Save Area functions */
> > +int amdgpu_vm_map_csa(struct amdgpu_device *adev, struct amdgpu_vm 
> *vm);
> > +void amdgpu_vm_unmap_csa(struct amdgpu_device *adev, struct 
> amdgpu_vm *vm);
> >  #endif
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > index d05546e..98540d9 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > @@ -1539,6 +1539,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, 
> struct amdgpu_vm *vm)
> >         pd_size = amdgpu_vm_directory_size(adev);
> >         pd_entries = amdgpu_vm_num_pdes(adev);
> >
> > +       vm->csa.reserved_top = AMDGPU_VA_RESERVED_SIZE;
> > +
> >         /* allocate page table array */
> >         vm->page_tables = drm_calloc_large(pd_entries, sizeof(struct 
> amdgpu_vm_pt));
> >         if (vm->page_tables == NULL) {
> > @@ -1576,6 +1578,10 @@ int amdgpu_vm_init(struct amdgpu_device 
> *adev, struct amdgpu_vm *vm)
> >         vm->last_eviction_counter = atomic64_read(&adev->num_evictions);
> >         amdgpu_bo_unreserve(vm->page_directory);
> >
> > +       r = amdgpu_vm_map_csa(adev, vm);
> > +       if (r)
> > +               goto error_free_page_directory;
> > +
> >         return 0;
> >
> >  error_free_page_directory:
> > @@ -1606,6 +1612,8 @@ void amdgpu_vm_fini(struct amdgpu_device 
> *adev, struct amdgpu_vm *vm)
> >         struct amdgpu_bo_va_mapping *mapping, *tmp;
> >         int i;
> >
> > +       amdgpu_vm_unmap_csa(adev, vm);
> > +
> >         amd_sched_entity_fini(vm->entity.sched, &vm->entity);
> >
> >         if (!RB_EMPTY_ROOT(&vm->va)) {
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> > index 42a629b..d90630a 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> > @@ -29,6 +29,7 @@
> >  #include "gpu_scheduler.h"
> >  #include "amdgpu_sync.h"
> >  #include "amdgpu_ring.h"
> > +#include "amdgpu_virt.h"
> >
> >  struct amdgpu_bo_va;
> >  struct amdgpu_job;
> > @@ -109,6 +110,9 @@ struct amdgpu_vm {
> >         /* Scheduler entity for page table updates */
> >         struct amd_sched_entity entity;
> >
> > +       /* Context Save Area */
> > +       struct amdgpu_csa       csa;
> > +
> >         /* client id */
> >         u64                     client_id;
> >  };
> > diff --git a/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h 
> b/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h
> > index 6ab13bc..a25e07f 100644
> > --- a/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h
> > +++ b/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h
> > @@ -26,12 +26,24 @@
> >
> >  #include "amdgpu.h"
> >
> > +/* xgpu structures */
> > +struct amd_xgpu_csa {
> > +       struct amdgpu_bo            *robj;
> > +       uint64_t                    gpu_addr;
> > +       uint64_t                    gds_addr;
> > +       int32_t                     size;
> > +};
> > +
> >  struct amd_xgpu {
> >         struct amdgpu_device    *adev;
> >         struct mutex            lock;
> > +       struct amd_xgpu_csa     sa;
> >         u32                     reg_val_offs;
> >  };
> >
> >  extern int amd_xgpu_alloc(struct amdgpu_device *adev);
> >  extern void amd_xgpu_free(struct amd_xgpu *xgpu);
> > +
> > +extern int xgpu_allocate_csa(struct amd_xgpu *xgpu);
> > +extern void xgpu_destroy_csa(struct amd_xgpu_csa *csa);
> >  #endif
> > diff --git a/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c 
> b/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c
> > new file mode 100644
> > index 0000000..246a747
> > --- /dev/null
> > +++ b/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c
> > @@ -0,0 +1,209 @@
> > +/*
> > + * Copyright 2016 Advanced Micro Devices, Inc.
> > + *
> > + * Permission is hereby granted, free of charge, to any person 
> obtaining a
> > + * copy of this software and associated documentation files (the 
> "Software"),
> > + * to deal in the Software without restriction, including without 
> limitation
> > + * the rights to use, copy, modify, merge, publish, distribute, 
> sublicense,
> > + * and/or sell copies of the Software, and to permit persons to 
> whom the
> > + * Software is furnished to do so, subject to the following conditions:
> > + *
> > + * The above copyright notice and this permission notice shall be 
> included in
> > + * all copies or substantial portions of the Software.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
> EXPRESS OR
> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
> MERCHANTABILITY,
> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO 
> EVENT SHALL
> > + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, 
> DAMAGES OR
> > + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
> OTHERWISE,
> > + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
> USE OR
> > + * OTHER DEALINGS IN THE SOFTWARE.
> > + *
> > + * Authors: Xiangliang.Yu@amd.com <mailto:Xiangliang.Yu@amd.com>
> > + * Monk.Liu@amd.com <mailto:Monk.Liu@amd.com>
> > + *
> > + */
> > +#include "amd_mxgpu.h"
> > +#include "vid.h"
> > +
> > +static int xgpu_init_csa(struct amdgpu_device *adev, struct 
> amd_xgpu_csa *sa)
> > +{
> > +       int r, size;
> > +       void *ptr;
> > +
> > +       /* meta data (4k) + gds-gfx (4k)*/
> > +       size = PAGE_SIZE + adev->gds.mem.gfx_partition_size;
> > +
> > +       r = amdgpu_bo_create(adev, size, PAGE_SIZE, true,
> > +                            AMDGPU_GEM_DOMAIN_GTT,
> > + AMDGPU_GEM_CREATE_CPU_GTT_USWC,
> > +                            NULL, NULL, &sa->robj);
> > +       if (r) {
> > +               dev_err(adev->dev, "(%d) failed to allocate csa 
> bo\n", r);
> > +               return r;
> > +       }
> > +
> > +       r = amdgpu_bo_reserve(sa->robj, true);
> > +       if (unlikely(r != 0))
> > +               goto error_free;
> > +
> > +       r = amdgpu_bo_pin(sa->robj, AMDGPU_GEM_DOMAIN_GTT, 
> &sa->gpu_addr);
> > +       if (r)
> > +               goto error_unreserve;
> > +
> > +       r = amdgpu_bo_kmap(sa->robj, &ptr);
> > +       if (r)
> > +               goto error_unpin;
> > +
> > +       memset(ptr, 0, size);
> > +       amdgpu_bo_unreserve(sa->robj);
> > +
> > +       sa->size = size;
> > +       sa->gds_addr = sa->gpu_addr + PAGE_SIZE;
> > +
> > +       return 0;
> > +
> > +error_unpin:
> > +       amdgpu_bo_unpin(sa->robj);
> > +error_unreserve:
> > +       amdgpu_bo_unreserve(sa->robj);
> > +error_free:
> > +       amdgpu_bo_unref(&sa->robj);
> > +       return r;
> > +}
> > +
> > +int xgpu_allocate_csa(struct amd_xgpu *xgpu)
> > +{
> > +       struct amdgpu_device *adev = xgpu->adev;
> > +       struct amd_xgpu_csa *sa = &xgpu->sa;
> > +
> > +       return xgpu_init_csa(adev, sa);
> > +}
> > +
> > +void xgpu_destroy_csa(struct amd_xgpu_csa *sa)
> > +{
> > +       amdgpu_bo_reserve(sa->robj, true);
> > +       amdgpu_bo_unpin(sa->robj);
> > +       amdgpu_bo_unreserve(sa->robj);
> > +       amdgpu_bo_unref(&sa->robj);
> > +       sa->gpu_addr = 0;
> > +       sa->gds_addr = 0;
> > +}
> > +
> > +static int xgpu_vm_map_csa(struct amdgpu_device *adev, struct 
> amdgpu_vm *vm,
> > +                          struct amd_xgpu_csa *sa)
> > +{
> > +       int r;
> > +       uint64_t vaddr;
> > +       struct ww_acquire_ctx ticket;
> > +       struct list_head list, duplicates;
> > +       struct amdgpu_bo_list_entry pd;
> > +       struct amdgpu_bo_va *bo_va;
> > +
> > +       INIT_LIST_HEAD(&list);
> > +       INIT_LIST_HEAD(&duplicates);
> > +       INIT_LIST_HEAD(&vm->csa.tv.head);
> > +       vm->csa.tv.bo = &sa->robj->tbo;
> > +       vm->csa.tv.shared = true;
> > +
> > +       list_add(&vm->csa.tv.head, &list);
> > +       amdgpu_vm_get_pd_bo(vm, &list, &pd);
> > +
> > +       spin_lock(&vm->status_lock);
> > +       vm->csa.reserved_top -= sa->size;
> > +       vaddr = vm->csa.reserved_top;
> > +       spin_unlock(&vm->status_lock);
> > +
> > +       r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
> > +       if (r) {
> > +               DRM_ERROR("failed to reserve global CSA 
> buffer(%d).\n", r);
> > +               return r;
> > +       }
> > +
> > +       bo_va = amdgpu_vm_bo_add(adev, vm, sa->robj);
> > +       if (!bo_va) {
> > +               DRM_ERROR("failed to create bo_va for global CSA 
> buffer.\n");
> > +               return -ENOMEM;
> > +       }
> > +
> > +
> > +       r = amdgpu_vm_bo_map(adev, bo_va, vaddr, 0, sa->size,
> > +                            AMDGPU_PTE_READABLE | 
> AMDGPU_PTE_WRITEABLE |
> > + AMDGPU_PTE_EXECUTABLE);
> > +       if (r) {
> > +               DRM_ERROR("failed to do bo_map on global CSA 
> buffer(%d).\n", r);
> > +               amdgpu_vm_bo_rmv(adev, bo_va);
> > + ttm_eu_backoff_reservation(&ticket, &list);
> > +               kfree(bo_va);
> > +               return r;
> > +       }
> > +
> > +       ttm_eu_backoff_reservation(&ticket, &list);
> > +       amdgpu_gem_va_update_vm(adev, bo_va, AMDGPU_VA_OP_MAP);
> > +
> > +       vm->csa.va = bo_va;
> > +       vm->csa.csa_addr = vaddr;
> > +       vm->csa.gds_addr = vaddr + PAGE_SIZE;
> > +
> > +       return 0;
> > +}
> > +
> > +int amdgpu_vm_map_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm)
> > +{
> > +       struct amd_xgpu *xgpu = (struct amd_xgpu *)adev->priv_data;
> > +       struct amd_xgpu_csa *sa = NULL;
> > +
> > +       if (!xgpu)
> > +               return 0;
> > +
> > +       sa = &xgpu->sa;
> > +
> > +       return xgpu_vm_map_csa(adev, vm, sa);
> > +}
> > +
> > +static void xgpu_vm_unmap_csa(struct amdgpu_device *adev, struct 
> amdgpu_vm *vm,
> > +                             struct amd_xgpu_csa *sa)
> > +{
> > +       int r;
> > +       struct ww_acquire_ctx ticket;
> > +       struct list_head list, duplicates;
> > +       struct amdgpu_bo_list_entry pd;
> > +
> > +       if (!vm->csa.va)
> > +               return;
> > +
> > +       INIT_LIST_HEAD(&list);
> > +       INIT_LIST_HEAD(&duplicates);
> > +       list_add(&vm->csa.tv.head, &list);
> > +       amdgpu_vm_get_pd_bo(vm, &list, &pd);
> > +
> > +       r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
> > +       if (r) {
> > +               DRM_ERROR("failed to reserve global CSA 
> buffer(%d).\n", r);
> > +               return;
> > +       }
> > +
> > +       amdgpu_vm_bo_rmv(adev, vm->csa.va);
> > +       /* maybe we don't need to do real clearing for the vm will 
> die soon */
> > +       r = amdgpu_vm_clear_freed(adev, vm);
> > +       if (r) {
> > +               DRM_ERROR("failed to clear global CSA bo(%d).\n", r);
> > +               return;
> > +       }
> > +
> > +       ttm_eu_backoff_reservation(&ticket, &list);
> > +       vm->csa.va = NULL;
> > +       vm->csa.csa_addr = vm->csa.gds_addr = 0;
> > +}
> > +
> > +void amdgpu_vm_unmap_csa(struct amdgpu_device *adev, struct 
> amdgpu_vm *vm)
> > +{
> > +       struct amd_xgpu *xgpu = (struct amd_xgpu *)adev->priv_data;
> > +       struct amd_xgpu_csa *sa = NULL;
> > +
> > +       if (!xgpu)
> > +               return;
> > +
> > +       sa = &xgpu->sa;
> > +       xgpu_vm_unmap_csa(adev, vm, sa);
> > +}
> > --
> > 2.7.4
> >
> > _______________________________________________
> > amd-gfx mailing list
> > amd-gfx@lists.freedesktop.org <mailto:amd-gfx@lists.freedesktop.org>
> > https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx