[01/12] drm/amdgpu: implement vm_operations_struct.access

Submitted by Kuehling, Felix on July 3, 2017, 9:11 p.m.

Details

Message ID 1499116294-12817-2-git-send-email-Felix.Kuehling@amd.com
State New
Headers show
Series "Patches from amd-kfd-staging" ( rev: 4 3 2 1 ) in AMD X.Org drivers

Not browsing as part of any series.

Commit Message

Kuehling, Felix July 3, 2017, 9:11 p.m.
Allows gdb to access contents of user mode mapped BOs.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 130 +++++++++++++++++++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h |   2 +
 2 files changed, 131 insertions(+), 1 deletion(-)

Patch hide | download patch | download mbox

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 15148f1..3f927c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1237,6 +1237,134 @@  void amdgpu_ttm_set_active_vram_size(struct amdgpu_device *adev, u64 size)
 	man->size = size >> PAGE_SHIFT;
 }
 
+static struct vm_operations_struct amdgpu_ttm_vm_ops;
+static const struct vm_operations_struct *ttm_vm_ops /* = NULL;
+						      * (appease checkpatch) */;
+static int amdgpu_ttm_bo_access_vram(struct amdgpu_bo *abo,
+				     unsigned long offset,
+				     void *buf, int len, int write)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
+	struct drm_mm_node *nodes = abo->tbo.mem.mm_node;
+	uint32_t value = 0;
+	int result = 0;
+	uint64_t pos;
+	unsigned long flags;
+
+	while (offset >= (nodes->size << PAGE_SHIFT)) {
+		offset -= nodes->size << PAGE_SHIFT;
+		++nodes;
+	}
+	pos = (nodes->start << PAGE_SHIFT) + offset;
+
+	while (len && pos < adev->mc.mc_vram_size) {
+		uint64_t aligned_pos = pos & ~(uint64_t)3;
+		uint32_t bytes = 4 - (pos & 3);
+		uint32_t shift = (pos & 3) * 8;
+		uint32_t mask = 0xffffffff << shift;
+
+		if (len < bytes) {
+			mask &= 0xffffffff >> (bytes - len) * 8;
+			bytes = len;
+		}
+
+		spin_lock_irqsave(&adev->mmio_idx_lock, flags);
+		WREG32(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000);
+		WREG32(mmMM_INDEX_HI, aligned_pos >> 31);
+		if (!write || mask != 0xffffffff)
+			value = RREG32(mmMM_DATA);
+		if (write) {
+			value &= ~mask;
+			value |= (*(uint32_t *)buf << shift) & mask;
+			WREG32(mmMM_DATA, value);
+		}
+		spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
+		if (!write) {
+			value = (value & mask) >> shift;
+			memcpy(buf, &value, bytes);
+		}
+
+		result += bytes;
+		buf = (uint8_t *)buf + bytes;
+		pos += bytes;
+		len -= bytes;
+		if (pos >= (nodes->start + nodes->size) << PAGE_SHIFT) {
+			++nodes;
+			pos = (nodes->start << PAGE_SHIFT);
+		}
+	}
+
+	return result;
+}
+
+static int amdgpu_ttm_bo_access_kmap(struct amdgpu_bo *abo,
+				     unsigned long offset,
+				     void *buf, int len, int write)
+{
+	struct ttm_buffer_object *bo = &abo->tbo;
+	struct ttm_bo_kmap_obj map;
+	void *ptr;
+	bool is_iomem;
+	int r;
+
+	r = ttm_bo_kmap(bo, 0, bo->num_pages, &map);
+	if (r)
+		return r;
+	ptr = (uint8_t *)ttm_kmap_obj_virtual(&map, &is_iomem) + offset;
+	WARN_ON(is_iomem);
+	if (write)
+		memcpy(ptr, buf, len);
+	else
+		memcpy(buf, ptr, len);
+	ttm_bo_kunmap(&map);
+
+	return len;
+}
+
+static int amdgpu_ttm_vm_access(struct vm_area_struct *vma, unsigned long addr,
+				void *buf, int len, int write)
+{
+	unsigned long offset = (addr) - vma->vm_start;
+	struct ttm_buffer_object *bo = vma->vm_private_data;
+	struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo);
+	unsigned domain;
+	int result;
+
+	result = amdgpu_bo_reserve(abo, false);
+	if (result != 0)
+		return result;
+
+	domain = amdgpu_mem_type_to_domain(bo->mem.mem_type);
+	if (domain == AMDGPU_GEM_DOMAIN_VRAM)
+		result = amdgpu_ttm_bo_access_vram(abo, offset,
+						   buf, len, write);
+	else
+		result = amdgpu_ttm_bo_access_kmap(abo, offset,
+						   buf, len, write);
+	amdgpu_bo_unreserve(abo);
+
+	return len;
+}
+
+int amdgpu_bo_mmap(struct file *filp, struct vm_area_struct *vma,
+		   struct ttm_bo_device *bdev)
+{
+	int r;
+
+	r = ttm_bo_mmap(filp, vma, bdev);
+	if (unlikely(r != 0))
+		return r;
+
+	if (unlikely(ttm_vm_ops == NULL)) {
+		ttm_vm_ops = vma->vm_ops;
+		amdgpu_ttm_vm_ops = *ttm_vm_ops;
+		amdgpu_ttm_vm_ops.access = &amdgpu_ttm_vm_access;
+	}
+	vma->vm_ops = &amdgpu_ttm_vm_ops;
+
+	return 0;
+}
+
 int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma)
 {
 	struct drm_file *file_priv;
@@ -1250,7 +1378,7 @@  int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma)
 	if (adev == NULL)
 		return -EINVAL;
 
-	return ttm_bo_mmap(filp, vma, &adev->mman.bdev);
+	return amdgpu_bo_mmap(filp, vma, &adev->mman.bdev);
 }
 
 int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 776a20a..1eb605c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -73,6 +73,8 @@  int amdgpu_fill_buffer(struct amdgpu_bo *bo,
 			struct dma_fence **fence);
 
 int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
+int amdgpu_bo_mmap(struct file *filp, struct vm_area_struct *vma,
+		   struct ttm_bo_device *bdev);
 bool amdgpu_ttm_is_bound(struct ttm_tt *ttm);
 int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem);
 

Comments

Any comments on this one?

This was requested by the HSA runtime team a long time ago as a
debugging feature. It allows gdb to access the content of CPU-mapped
BOs. I imagine this may be useful for user mode driver developers.

Does anyone dare to give me a Reviewed-by?

Regards,
  Felix


On 17-07-03 05:11 PM, Felix Kuehling wrote:
> Allows gdb to access contents of user mode mapped BOs.
>
> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 130 +++++++++++++++++++++++++++++++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h |   2 +
>  2 files changed, 131 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 15148f1..3f927c2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -1237,6 +1237,134 @@ void amdgpu_ttm_set_active_vram_size(struct amdgpu_device *adev, u64 size)
>  	man->size = size >> PAGE_SHIFT;
>  }
>  
> +static struct vm_operations_struct amdgpu_ttm_vm_ops;
> +static const struct vm_operations_struct *ttm_vm_ops /* = NULL;
> +						      * (appease checkpatch) */;
> +static int amdgpu_ttm_bo_access_vram(struct amdgpu_bo *abo,
> +				     unsigned long offset,
> +				     void *buf, int len, int write)
> +{
> +	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
> +	struct drm_mm_node *nodes = abo->tbo.mem.mm_node;
> +	uint32_t value = 0;
> +	int result = 0;
> +	uint64_t pos;
> +	unsigned long flags;
> +
> +	while (offset >= (nodes->size << PAGE_SHIFT)) {
> +		offset -= nodes->size << PAGE_SHIFT;
> +		++nodes;
> +	}
> +	pos = (nodes->start << PAGE_SHIFT) + offset;
> +
> +	while (len && pos < adev->mc.mc_vram_size) {
> +		uint64_t aligned_pos = pos & ~(uint64_t)3;
> +		uint32_t bytes = 4 - (pos & 3);
> +		uint32_t shift = (pos & 3) * 8;
> +		uint32_t mask = 0xffffffff << shift;
> +
> +		if (len < bytes) {
> +			mask &= 0xffffffff >> (bytes - len) * 8;
> +			bytes = len;
> +		}
> +
> +		spin_lock_irqsave(&adev->mmio_idx_lock, flags);
> +		WREG32(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000);
> +		WREG32(mmMM_INDEX_HI, aligned_pos >> 31);
> +		if (!write || mask != 0xffffffff)
> +			value = RREG32(mmMM_DATA);
> +		if (write) {
> +			value &= ~mask;
> +			value |= (*(uint32_t *)buf << shift) & mask;
> +			WREG32(mmMM_DATA, value);
> +		}
> +		spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
> +		if (!write) {
> +			value = (value & mask) >> shift;
> +			memcpy(buf, &value, bytes);
> +		}
> +
> +		result += bytes;
> +		buf = (uint8_t *)buf + bytes;
> +		pos += bytes;
> +		len -= bytes;
> +		if (pos >= (nodes->start + nodes->size) << PAGE_SHIFT) {
> +			++nodes;
> +			pos = (nodes->start << PAGE_SHIFT);
> +		}
> +	}
> +
> +	return result;
> +}
> +
> +static int amdgpu_ttm_bo_access_kmap(struct amdgpu_bo *abo,
> +				     unsigned long offset,
> +				     void *buf, int len, int write)
> +{
> +	struct ttm_buffer_object *bo = &abo->tbo;
> +	struct ttm_bo_kmap_obj map;
> +	void *ptr;
> +	bool is_iomem;
> +	int r;
> +
> +	r = ttm_bo_kmap(bo, 0, bo->num_pages, &map);
> +	if (r)
> +		return r;
> +	ptr = (uint8_t *)ttm_kmap_obj_virtual(&map, &is_iomem) + offset;
> +	WARN_ON(is_iomem);
> +	if (write)
> +		memcpy(ptr, buf, len);
> +	else
> +		memcpy(buf, ptr, len);
> +	ttm_bo_kunmap(&map);
> +
> +	return len;
> +}
> +
> +static int amdgpu_ttm_vm_access(struct vm_area_struct *vma, unsigned long addr,
> +				void *buf, int len, int write)
> +{
> +	unsigned long offset = (addr) - vma->vm_start;
> +	struct ttm_buffer_object *bo = vma->vm_private_data;
> +	struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo);
> +	unsigned domain;
> +	int result;
> +
> +	result = amdgpu_bo_reserve(abo, false);
> +	if (result != 0)
> +		return result;
> +
> +	domain = amdgpu_mem_type_to_domain(bo->mem.mem_type);
> +	if (domain == AMDGPU_GEM_DOMAIN_VRAM)
> +		result = amdgpu_ttm_bo_access_vram(abo, offset,
> +						   buf, len, write);
> +	else
> +		result = amdgpu_ttm_bo_access_kmap(abo, offset,
> +						   buf, len, write);
> +	amdgpu_bo_unreserve(abo);
> +
> +	return len;
> +}
> +
> +int amdgpu_bo_mmap(struct file *filp, struct vm_area_struct *vma,
> +		   struct ttm_bo_device *bdev)
> +{
> +	int r;
> +
> +	r = ttm_bo_mmap(filp, vma, bdev);
> +	if (unlikely(r != 0))
> +		return r;
> +
> +	if (unlikely(ttm_vm_ops == NULL)) {
> +		ttm_vm_ops = vma->vm_ops;
> +		amdgpu_ttm_vm_ops = *ttm_vm_ops;
> +		amdgpu_ttm_vm_ops.access = &amdgpu_ttm_vm_access;
> +	}
> +	vma->vm_ops = &amdgpu_ttm_vm_ops;
> +
> +	return 0;
> +}
> +
>  int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma)
>  {
>  	struct drm_file *file_priv;
> @@ -1250,7 +1378,7 @@ int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma)
>  	if (adev == NULL)
>  		return -EINVAL;
>  
> -	return ttm_bo_mmap(filp, vma, &adev->mman.bdev);
> +	return amdgpu_bo_mmap(filp, vma, &adev->mman.bdev);
>  }
>  
>  int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index 776a20a..1eb605c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -73,6 +73,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>  			struct dma_fence **fence);
>  
>  int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
> +int amdgpu_bo_mmap(struct file *filp, struct vm_area_struct *vma,
> +		   struct ttm_bo_device *bdev);
>  bool amdgpu_ttm_is_bound(struct ttm_tt *ttm);
>  int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem);
>
Hi Felix,

I think you might want to add a boundary check to limit the ptrace 
access inside the BO.

Otherwise there may be a potential security leak allowing others to 
access the whole memory by using ptrace system call.

-Alex Bin


On 2017-07-12 01:37 AM, Felix Kuehling wrote:
> Any comments on this one?
>
> This was requested by the HSA runtime team a long time ago as a
> debugging feature. It allows gdb to access the content of CPU-mapped
> BOs. I imagine this may be useful for user mode driver developers.
>
> Does anyone dare to give me a Reviewed-by?
>
> Regards,
>    Felix
>
>
> On 17-07-03 05:11 PM, Felix Kuehling wrote:
>> Allows gdb to access contents of user mode mapped BOs.
>>
>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 130 +++++++++++++++++++++++++++++++-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h |   2 +
>>   2 files changed, 131 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index 15148f1..3f927c2 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -1237,6 +1237,134 @@ void amdgpu_ttm_set_active_vram_size(struct amdgpu_device *adev, u64 size)
>>   	man->size = size >> PAGE_SHIFT;
>>   }
>>   
>> +static struct vm_operations_struct amdgpu_ttm_vm_ops;
>> +static const struct vm_operations_struct *ttm_vm_ops /* = NULL;
>> +						      * (appease checkpatch) */;
>> +static int amdgpu_ttm_bo_access_vram(struct amdgpu_bo *abo,
>> +				     unsigned long offset,
>> +				     void *buf, int len, int write)
>> +{
>> +	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
>> +	struct drm_mm_node *nodes = abo->tbo.mem.mm_node;
>> +	uint32_t value = 0;
>> +	int result = 0;
>> +	uint64_t pos;
>> +	unsigned long flags;
>> +
>> +	while (offset >= (nodes->size << PAGE_SHIFT)) {
>> +		offset -= nodes->size << PAGE_SHIFT;
>> +		++nodes;
>> +	}
>> +	pos = (nodes->start << PAGE_SHIFT) + offset;
>> +
>> +	while (len && pos < adev->mc.mc_vram_size) {
>> +		uint64_t aligned_pos = pos & ~(uint64_t)3;
>> +		uint32_t bytes = 4 - (pos & 3);
>> +		uint32_t shift = (pos & 3) * 8;
>> +		uint32_t mask = 0xffffffff << shift;
>> +
>> +		if (len < bytes) {
>> +			mask &= 0xffffffff >> (bytes - len) * 8;
>> +			bytes = len;
>> +		}
>> +
>> +		spin_lock_irqsave(&adev->mmio_idx_lock, flags);
>> +		WREG32(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000);
>> +		WREG32(mmMM_INDEX_HI, aligned_pos >> 31);
>> +		if (!write || mask != 0xffffffff)
>> +			value = RREG32(mmMM_DATA);
>> +		if (write) {
>> +			value &= ~mask;
>> +			value |= (*(uint32_t *)buf << shift) & mask;
>> +			WREG32(mmMM_DATA, value);
>> +		}
>> +		spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
>> +		if (!write) {
>> +			value = (value & mask) >> shift;
>> +			memcpy(buf, &value, bytes);
>> +		}
>> +
>> +		result += bytes;
>> +		buf = (uint8_t *)buf + bytes;
>> +		pos += bytes;
>> +		len -= bytes;
>> +		if (pos >= (nodes->start + nodes->size) << PAGE_SHIFT) {
>> +			++nodes;
>> +			pos = (nodes->start << PAGE_SHIFT);
>> +		}
>> +	}
>> +
>> +	return result;
>> +}
>> +
>> +static int amdgpu_ttm_bo_access_kmap(struct amdgpu_bo *abo,
>> +				     unsigned long offset,
>> +				     void *buf, int len, int write)
>> +{
>> +	struct ttm_buffer_object *bo = &abo->tbo;
>> +	struct ttm_bo_kmap_obj map;
>> +	void *ptr;
>> +	bool is_iomem;
>> +	int r;
>> +
>> +	r = ttm_bo_kmap(bo, 0, bo->num_pages, &map);
>> +	if (r)
>> +		return r;
>> +	ptr = (uint8_t *)ttm_kmap_obj_virtual(&map, &is_iomem) + offset;
>> +	WARN_ON(is_iomem);
>> +	if (write)
>> +		memcpy(ptr, buf, len);
>> +	else
>> +		memcpy(buf, ptr, len);
>> +	ttm_bo_kunmap(&map);
>> +
>> +	return len;
>> +}
>> +
>> +static int amdgpu_ttm_vm_access(struct vm_area_struct *vma, unsigned long addr,
>> +				void *buf, int len, int write)
>> +{
>> +	unsigned long offset = (addr) - vma->vm_start;
>> +	struct ttm_buffer_object *bo = vma->vm_private_data;
>> +	struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo);
>> +	unsigned domain;
>> +	int result;
>> +
>> +	result = amdgpu_bo_reserve(abo, false);
>> +	if (result != 0)
>> +		return result;
>> +
>> +	domain = amdgpu_mem_type_to_domain(bo->mem.mem_type);
>> +	if (domain == AMDGPU_GEM_DOMAIN_VRAM)
>> +		result = amdgpu_ttm_bo_access_vram(abo, offset,
>> +						   buf, len, write);
>> +	else
>> +		result = amdgpu_ttm_bo_access_kmap(abo, offset,
>> +						   buf, len, write);
>> +	amdgpu_bo_unreserve(abo);
>> +
>> +	return len;
>> +}
>> +
>> +int amdgpu_bo_mmap(struct file *filp, struct vm_area_struct *vma,
>> +		   struct ttm_bo_device *bdev)
>> +{
>> +	int r;
>> +
>> +	r = ttm_bo_mmap(filp, vma, bdev);
>> +	if (unlikely(r != 0))
>> +		return r;
>> +
>> +	if (unlikely(ttm_vm_ops == NULL)) {
>> +		ttm_vm_ops = vma->vm_ops;
>> +		amdgpu_ttm_vm_ops = *ttm_vm_ops;
>> +		amdgpu_ttm_vm_ops.access = &amdgpu_ttm_vm_access;
>> +	}
>> +	vma->vm_ops = &amdgpu_ttm_vm_ops;
>> +
>> +	return 0;
>> +}
>> +
>>   int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma)
>>   {
>>   	struct drm_file *file_priv;
>> @@ -1250,7 +1378,7 @@ int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma)
>>   	if (adev == NULL)
>>   		return -EINVAL;
>>   
>> -	return ttm_bo_mmap(filp, vma, &adev->mman.bdev);
>> +	return amdgpu_bo_mmap(filp, vma, &adev->mman.bdev);
>>   }
>>   
>>   int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> index 776a20a..1eb605c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> @@ -73,6 +73,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>>   			struct dma_fence **fence);
>>   
>>   int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
>> +int amdgpu_bo_mmap(struct file *filp, struct vm_area_struct *vma,
>> +		   struct ttm_bo_device *bdev);
>>   bool amdgpu_ttm_is_bound(struct ttm_tt *ttm);
>>   int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem);
>>   
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Hi Felix,

in theory all ranges should be limited by the VMA, the TTM mapping code 
makes that assumption as well.

But I agree that we better be save than sorry.

Additional to that one thing which stand out:
> +    r = ttm_bo_kmap(bo, 0, bo->num_pages, &map);

Please try to avoid mapping the whole BO if you want to access only some 
bytes. That in turn usually result in changing the page tables which is 
a bit invasive for the ptrace callback.

In general we should try to avoid accessing more than one page at once.

Regards,
Christian.

Am 12.07.2017 um 08:26 schrieb axie:
> Hi Felix,
>
> I think you might want to add a boundary check to limit the ptrace 
> access inside the BO.
>
> Otherwise there may be a potential security leak allowing others to 
> access the whole memory by using ptrace system call.
>
> -Alex Bin
>
>
> On 2017-07-12 01:37 AM, Felix Kuehling wrote:
>> Any comments on this one?
>>
>> This was requested by the HSA runtime team a long time ago as a
>> debugging feature. It allows gdb to access the content of CPU-mapped
>> BOs. I imagine this may be useful for user mode driver developers.
>>
>> Does anyone dare to give me a Reviewed-by?
>>
>> Regards,
>>    Felix
>>
>>
>> On 17-07-03 05:11 PM, Felix Kuehling wrote:
>>> Allows gdb to access contents of user mode mapped BOs.
>>>
>>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 130 
>>> +++++++++++++++++++++++++++++++-
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h |   2 +
>>>   2 files changed, 131 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> index 15148f1..3f927c2 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> @@ -1237,6 +1237,134 @@ void amdgpu_ttm_set_active_vram_size(struct 
>>> amdgpu_device *adev, u64 size)
>>>       man->size = size >> PAGE_SHIFT;
>>>   }
>>>   +static struct vm_operations_struct amdgpu_ttm_vm_ops;
>>> +static const struct vm_operations_struct *ttm_vm_ops /* = NULL;
>>> +                              * (appease checkpatch) */;
>>> +static int amdgpu_ttm_bo_access_vram(struct amdgpu_bo *abo,
>>> +                     unsigned long offset,
>>> +                     void *buf, int len, int write)
>>> +{
>>> +    struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
>>> +    struct drm_mm_node *nodes = abo->tbo.mem.mm_node;
>>> +    uint32_t value = 0;
>>> +    int result = 0;
>>> +    uint64_t pos;
>>> +    unsigned long flags;
>>> +
>>> +    while (offset >= (nodes->size << PAGE_SHIFT)) {
>>> +        offset -= nodes->size << PAGE_SHIFT;
>>> +        ++nodes;
>>> +    }
>>> +    pos = (nodes->start << PAGE_SHIFT) + offset;
>>> +
>>> +    while (len && pos < adev->mc.mc_vram_size) {
>>> +        uint64_t aligned_pos = pos & ~(uint64_t)3;
>>> +        uint32_t bytes = 4 - (pos & 3);
>>> +        uint32_t shift = (pos & 3) * 8;
>>> +        uint32_t mask = 0xffffffff << shift;
>>> +
>>> +        if (len < bytes) {
>>> +            mask &= 0xffffffff >> (bytes - len) * 8;
>>> +            bytes = len;
>>> +        }
>>> +
>>> +        spin_lock_irqsave(&adev->mmio_idx_lock, flags);
>>> +        WREG32(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000);
>>> +        WREG32(mmMM_INDEX_HI, aligned_pos >> 31);
>>> +        if (!write || mask != 0xffffffff)
>>> +            value = RREG32(mmMM_DATA);
>>> +        if (write) {
>>> +            value &= ~mask;
>>> +            value |= (*(uint32_t *)buf << shift) & mask;
>>> +            WREG32(mmMM_DATA, value);
>>> +        }
>>> +        spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
>>> +        if (!write) {
>>> +            value = (value & mask) >> shift;
>>> +            memcpy(buf, &value, bytes);
>>> +        }
>>> +
>>> +        result += bytes;
>>> +        buf = (uint8_t *)buf + bytes;
>>> +        pos += bytes;
>>> +        len -= bytes;
>>> +        if (pos >= (nodes->start + nodes->size) << PAGE_SHIFT) {
>>> +            ++nodes;
>>> +            pos = (nodes->start << PAGE_SHIFT);
>>> +        }
>>> +    }
>>> +
>>> +    return result;
>>> +}
>>> +
>>> +static int amdgpu_ttm_bo_access_kmap(struct amdgpu_bo *abo,
>>> +                     unsigned long offset,
>>> +                     void *buf, int len, int write)
>>> +{
>>> +    struct ttm_buffer_object *bo = &abo->tbo;
>>> +    struct ttm_bo_kmap_obj map;
>>> +    void *ptr;
>>> +    bool is_iomem;
>>> +    int r;
>>> +
>>> +    r = ttm_bo_kmap(bo, 0, bo->num_pages, &map);
>>> +    if (r)
>>> +        return r;
>>> +    ptr = (uint8_t *)ttm_kmap_obj_virtual(&map, &is_iomem) + offset;
>>> +    WARN_ON(is_iomem);
>>> +    if (write)
>>> +        memcpy(ptr, buf, len);
>>> +    else
>>> +        memcpy(buf, ptr, len);
>>> +    ttm_bo_kunmap(&map);
>>> +
>>> +    return len;
>>> +}
>>> +
>>> +static int amdgpu_ttm_vm_access(struct vm_area_struct *vma, 
>>> unsigned long addr,
>>> +                void *buf, int len, int write)
>>> +{
>>> +    unsigned long offset = (addr) - vma->vm_start;
>>> +    struct ttm_buffer_object *bo = vma->vm_private_data;
>>> +    struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo);
>>> +    unsigned domain;
>>> +    int result;
>>> +
>>> +    result = amdgpu_bo_reserve(abo, false);
>>> +    if (result != 0)
>>> +        return result;
>>> +
>>> +    domain = amdgpu_mem_type_to_domain(bo->mem.mem_type);
>>> +    if (domain == AMDGPU_GEM_DOMAIN_VRAM)
>>> +        result = amdgpu_ttm_bo_access_vram(abo, offset,
>>> +                           buf, len, write);
>>> +    else
>>> +        result = amdgpu_ttm_bo_access_kmap(abo, offset,
>>> +                           buf, len, write);
>>> +    amdgpu_bo_unreserve(abo);
>>> +
>>> +    return len;
>>> +}
>>> +
>>> +int amdgpu_bo_mmap(struct file *filp, struct vm_area_struct *vma,
>>> +           struct ttm_bo_device *bdev)
>>> +{
>>> +    int r;
>>> +
>>> +    r = ttm_bo_mmap(filp, vma, bdev);
>>> +    if (unlikely(r != 0))
>>> +        return r;
>>> +
>>> +    if (unlikely(ttm_vm_ops == NULL)) {
>>> +        ttm_vm_ops = vma->vm_ops;
>>> +        amdgpu_ttm_vm_ops = *ttm_vm_ops;
>>> +        amdgpu_ttm_vm_ops.access = &amdgpu_ttm_vm_access;
>>> +    }
>>> +    vma->vm_ops = &amdgpu_ttm_vm_ops;
>>> +
>>> +    return 0;
>>> +}
>>> +
>>>   int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma)
>>>   {
>>>       struct drm_file *file_priv;
>>> @@ -1250,7 +1378,7 @@ int amdgpu_mmap(struct file *filp, struct 
>>> vm_area_struct *vma)
>>>       if (adev == NULL)
>>>           return -EINVAL;
>>>   -    return ttm_bo_mmap(filp, vma, &adev->mman.bdev);
>>> +    return amdgpu_bo_mmap(filp, vma, &adev->mman.bdev);
>>>   }
>>>     int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t 
>>> src_offset,
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>> index 776a20a..1eb605c 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>> @@ -73,6 +73,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>>>               struct dma_fence **fence);
>>>     int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
>>> +int amdgpu_bo_mmap(struct file *filp, struct vm_area_struct *vma,
>>> +           struct ttm_bo_device *bdev);
>>>   bool amdgpu_ttm_is_bound(struct ttm_tt *ttm);
>>>   int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct 
>>> ttm_mem_reg *bo_mem);
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
On 12/07/17 02:37 PM, Felix Kuehling wrote:
> Any comments on this one?
> 
> This was requested by the HSA runtime team a long time ago as a
> debugging feature. It allows gdb to access the content of CPU-mapped
> BOs. I imagine this may be useful for user mode driver developers.

Adding the dri-devel list.


>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index 15148f1..3f927c2 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -1237,6 +1237,134 @@ void amdgpu_ttm_set_active_vram_size(struct amdgpu_device *adev, u64 size)
>>  	man->size = size >> PAGE_SHIFT;
>>  }
>>  
>> +static struct vm_operations_struct amdgpu_ttm_vm_ops;
>> +static const struct vm_operations_struct *ttm_vm_ops /* = NULL;
>> +						      * (appease checkpatch) */;

How does this appease checkpatch?


>> +static int amdgpu_ttm_bo_access_kmap(struct amdgpu_bo *abo,
>> +				     unsigned long offset,
>> +				     void *buf, int len, int write)
>> +{
>> +	struct ttm_buffer_object *bo = &abo->tbo;
>> +	struct ttm_bo_kmap_obj map;
>> +	void *ptr;
>> +	bool is_iomem;
>> +	int r;
>> +
>> +	r = ttm_bo_kmap(bo, 0, bo->num_pages, &map);
>> +	if (r)
>> +		return r;
>> +	ptr = (uint8_t *)ttm_kmap_obj_virtual(&map, &is_iomem) + offset;
>> +	WARN_ON(is_iomem);
>> +	if (write)
>> +		memcpy(ptr, buf, len);
>> +	else
>> +		memcpy(buf, ptr, len);
>> +	ttm_bo_kunmap(&map);
>> +
>> +	return len;
>> +}

This could be in TTM, right? As a helper function and/or a generic
vm_operations_struct::access hook (at least for GTT/CPU domains).
I agree, in theory, all ranges should be limited by the VMA. But in 
reality, I did not see such boundary check yesterday when I quickly read 
through the source codes where ptrace calls access function.

We may fix it in ptrace if that is really missing. For the time being, 
adding a boundary check in AMDGPU side may be a good idea.

Thanks,
Alex Bin

On 2017-07-12 03:22 AM, Christian König wrote:
> Hi Felix,
>
> in theory all ranges should be limited by the VMA, the TTM mapping 
> code makes that assumption as well.
>
> But I agree that we better be save than sorry.
>
> Additional to that one thing which stand out:
>> +    r = ttm_bo_kmap(bo, 0, bo->num_pages, &map);
>
> Please try to avoid mapping the whole BO if you want to access only 
> some bytes. That in turn usually result in changing the page tables 
> which is a bit invasive for the ptrace callback.
>
> In general we should try to avoid accessing more than one page at once.
>
> Regards,
> Christian.
>
> Am 12.07.2017 um 08:26 schrieb axie:
>> Hi Felix,
>>
>> I think you might want to add a boundary check to limit the ptrace 
>> access inside the BO.
>>
>> Otherwise there may be a potential security leak allowing others to 
>> access the whole memory by using ptrace system call.
>>
>> -Alex Bin
>>
>>
>> On 2017-07-12 01:37 AM, Felix Kuehling wrote:
>>> Any comments on this one?
>>>
>>> This was requested by the HSA runtime team a long time ago as a
>>> debugging feature. It allows gdb to access the content of CPU-mapped
>>> BOs. I imagine this may be useful for user mode driver developers.
>>>
>>> Does anyone dare to give me a Reviewed-by?
>>>
>>> Regards,
>>>    Felix
>>>
>>>
>>> On 17-07-03 05:11 PM, Felix Kuehling wrote:
>>>> Allows gdb to access contents of user mode mapped BOs.
>>>>
>>>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
>>>> ---
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 130 
>>>> +++++++++++++++++++++++++++++++-
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h |   2 +
>>>>   2 files changed, 131 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>> index 15148f1..3f927c2 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>> @@ -1237,6 +1237,134 @@ void amdgpu_ttm_set_active_vram_size(struct 
>>>> amdgpu_device *adev, u64 size)
>>>>       man->size = size >> PAGE_SHIFT;
>>>>   }
>>>>   +static struct vm_operations_struct amdgpu_ttm_vm_ops;
>>>> +static const struct vm_operations_struct *ttm_vm_ops /* = NULL;
>>>> +                              * (appease checkpatch) */;
>>>> +static int amdgpu_ttm_bo_access_vram(struct amdgpu_bo *abo,
>>>> +                     unsigned long offset,
>>>> +                     void *buf, int len, int write)
>>>> +{
>>>> +    struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
>>>> +    struct drm_mm_node *nodes = abo->tbo.mem.mm_node;
>>>> +    uint32_t value = 0;
>>>> +    int result = 0;
>>>> +    uint64_t pos;
>>>> +    unsigned long flags;
>>>> +
>>>> +    while (offset >= (nodes->size << PAGE_SHIFT)) {
>>>> +        offset -= nodes->size << PAGE_SHIFT;
>>>> +        ++nodes;
>>>> +    }
>>>> +    pos = (nodes->start << PAGE_SHIFT) + offset;
>>>> +
>>>> +    while (len && pos < adev->mc.mc_vram_size) {
>>>> +        uint64_t aligned_pos = pos & ~(uint64_t)3;
>>>> +        uint32_t bytes = 4 - (pos & 3);
>>>> +        uint32_t shift = (pos & 3) * 8;
>>>> +        uint32_t mask = 0xffffffff << shift;
>>>> +
>>>> +        if (len < bytes) {
>>>> +            mask &= 0xffffffff >> (bytes - len) * 8;
>>>> +            bytes = len;
>>>> +        }
>>>> +
>>>> +        spin_lock_irqsave(&adev->mmio_idx_lock, flags);
>>>> +        WREG32(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000);
>>>> +        WREG32(mmMM_INDEX_HI, aligned_pos >> 31);
>>>> +        if (!write || mask != 0xffffffff)
>>>> +            value = RREG32(mmMM_DATA);
>>>> +        if (write) {
>>>> +            value &= ~mask;
>>>> +            value |= (*(uint32_t *)buf << shift) & mask;
>>>> +            WREG32(mmMM_DATA, value);
>>>> +        }
>>>> +        spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
>>>> +        if (!write) {
>>>> +            value = (value & mask) >> shift;
>>>> +            memcpy(buf, &value, bytes);
>>>> +        }
>>>> +
>>>> +        result += bytes;
>>>> +        buf = (uint8_t *)buf + bytes;
>>>> +        pos += bytes;
>>>> +        len -= bytes;
>>>> +        if (pos >= (nodes->start + nodes->size) << PAGE_SHIFT) {
>>>> +            ++nodes;
>>>> +            pos = (nodes->start << PAGE_SHIFT);
>>>> +        }
>>>> +    }
>>>> +
>>>> +    return result;
>>>> +}
>>>> +
>>>> +static int amdgpu_ttm_bo_access_kmap(struct amdgpu_bo *abo,
>>>> +                     unsigned long offset,
>>>> +                     void *buf, int len, int write)
>>>> +{
>>>> +    struct ttm_buffer_object *bo = &abo->tbo;
>>>> +    struct ttm_bo_kmap_obj map;
>>>> +    void *ptr;
>>>> +    bool is_iomem;
>>>> +    int r;
>>>> +
>>>> +    r = ttm_bo_kmap(bo, 0, bo->num_pages, &map);
>>>> +    if (r)
>>>> +        return r;
>>>> +    ptr = (uint8_t *)ttm_kmap_obj_virtual(&map, &is_iomem) + offset;
>>>> +    WARN_ON(is_iomem);
>>>> +    if (write)
>>>> +        memcpy(ptr, buf, len);
>>>> +    else
>>>> +        memcpy(buf, ptr, len);
>>>> +    ttm_bo_kunmap(&map);
>>>> +
>>>> +    return len;
>>>> +}
>>>> +
>>>> +static int amdgpu_ttm_vm_access(struct vm_area_struct *vma, 
>>>> unsigned long addr,
>>>> +                void *buf, int len, int write)
>>>> +{
>>>> +    unsigned long offset = (addr) - vma->vm_start;
>>>> +    struct ttm_buffer_object *bo = vma->vm_private_data;
>>>> +    struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo);
>>>> +    unsigned domain;
>>>> +    int result;
>>>> +
>>>> +    result = amdgpu_bo_reserve(abo, false);
>>>> +    if (result != 0)
>>>> +        return result;
>>>> +
>>>> +    domain = amdgpu_mem_type_to_domain(bo->mem.mem_type);
>>>> +    if (domain == AMDGPU_GEM_DOMAIN_VRAM)
>>>> +        result = amdgpu_ttm_bo_access_vram(abo, offset,
>>>> +                           buf, len, write);
>>>> +    else
>>>> +        result = amdgpu_ttm_bo_access_kmap(abo, offset,
>>>> +                           buf, len, write);
>>>> +    amdgpu_bo_unreserve(abo);
>>>> +
>>>> +    return len;
>>>> +}
>>>> +
>>>> +int amdgpu_bo_mmap(struct file *filp, struct vm_area_struct *vma,
>>>> +           struct ttm_bo_device *bdev)
>>>> +{
>>>> +    int r;
>>>> +
>>>> +    r = ttm_bo_mmap(filp, vma, bdev);
>>>> +    if (unlikely(r != 0))
>>>> +        return r;
>>>> +
>>>> +    if (unlikely(ttm_vm_ops == NULL)) {
>>>> +        ttm_vm_ops = vma->vm_ops;
>>>> +        amdgpu_ttm_vm_ops = *ttm_vm_ops;
>>>> +        amdgpu_ttm_vm_ops.access = &amdgpu_ttm_vm_access;
>>>> +    }
>>>> +    vma->vm_ops = &amdgpu_ttm_vm_ops;
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>>   int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma)
>>>>   {
>>>>       struct drm_file *file_priv;
>>>> @@ -1250,7 +1378,7 @@ int amdgpu_mmap(struct file *filp, struct 
>>>> vm_area_struct *vma)
>>>>       if (adev == NULL)
>>>>           return -EINVAL;
>>>>   -    return ttm_bo_mmap(filp, vma, &adev->mman.bdev);
>>>> +    return amdgpu_bo_mmap(filp, vma, &adev->mman.bdev);
>>>>   }
>>>>     int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t 
>>>> src_offset,
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>>> index 776a20a..1eb605c 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>>> @@ -73,6 +73,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>>>>               struct dma_fence **fence);
>>>>     int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
>>>> +int amdgpu_bo_mmap(struct file *filp, struct vm_area_struct *vma,
>>>> +           struct ttm_bo_device *bdev);
>>>>   bool amdgpu_ttm_is_bound(struct ttm_tt *ttm);
>>>>   int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct 
>>>> ttm_mem_reg *bo_mem);
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
>
On 17-07-12 04:01 AM, Michel Dänzer wrote:
> On 12/07/17 02:37 PM, Felix Kuehling wrote:
>> Any comments on this one?
>>
>> This was requested by the HSA runtime team a long time ago as a
>> debugging feature. It allows gdb to access the content of CPU-mapped
>> BOs. I imagine this may be useful for user mode driver developers.
> Adding the dri-devel list.
>
>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> index 15148f1..3f927c2 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> @@ -1237,6 +1237,134 @@ void amdgpu_ttm_set_active_vram_size(struct amdgpu_device *adev, u64 size)
>>>  	man->size = size >> PAGE_SHIFT;
>>>  }
>>>  
>>> +static struct vm_operations_struct amdgpu_ttm_vm_ops;
>>> +static const struct vm_operations_struct *ttm_vm_ops /* = NULL;
>>> +						      * (appease checkpatch) */;
> How does this appease checkpatch?

Checkpatch doesn't like explicit initialization of global variables to
0. Uninitialized data is automatically 0, so no point wasting space in
the initialized data segment.

>
>
>>> +static int amdgpu_ttm_bo_access_kmap(struct amdgpu_bo *abo,
>>> +				     unsigned long offset,
>>> +				     void *buf, int len, int write)
>>> +{
>>> +	struct ttm_buffer_object *bo = &abo->tbo;
>>> +	struct ttm_bo_kmap_obj map;
>>> +	void *ptr;
>>> +	bool is_iomem;
>>> +	int r;
>>> +
>>> +	r = ttm_bo_kmap(bo, 0, bo->num_pages, &map);
>>> +	if (r)
>>> +		return r;
>>> +	ptr = (uint8_t *)ttm_kmap_obj_virtual(&map, &is_iomem) + offset;
>>> +	WARN_ON(is_iomem);
>>> +	if (write)
>>> +		memcpy(ptr, buf, len);
>>> +	else
>>> +		memcpy(buf, ptr, len);
>>> +	ttm_bo_kunmap(&map);
>>> +
>>> +	return len;
>>> +}
> This could be in TTM, right? As a helper function and/or a generic
> vm_operations_struct::access hook (at least for GTT/CPU domains).

I guess. If I were to get TTM involved, I'd also have TTM install its
own access hook in the vm_ops, so the driver doesn't need to override
it. Then I'd add driver-specific callbacks in ttm_bo_driver for
accessing VRAM and GTT/CPU memory. TTM could offer something like
amdgpu_ttm_bo_access_kmap as a helper for use by all drivers.

Regards,
  Felix

>
>
On 13/07/17 02:27 AM, Felix Kuehling wrote:
> On 17-07-12 04:01 AM, Michel Dänzer wrote:
>> On 12/07/17 02:37 PM, Felix Kuehling wrote:
>>> Any comments on this one?
>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>> index 15148f1..3f927c2 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>> @@ -1237,6 +1237,134 @@ void amdgpu_ttm_set_active_vram_size(struct amdgpu_device *adev, u64 size)
>>>>  	man->size = size >> PAGE_SHIFT;
>>>>  }
>>>>  
>>>> +static struct vm_operations_struct amdgpu_ttm_vm_ops;
>>>> +static const struct vm_operations_struct *ttm_vm_ops /* = NULL;
>>>> +						      * (appease checkpatch) */;
>> How does this appease checkpatch?
> 
> Checkpatch doesn't like explicit initialization of global variables to
> 0. Uninitialized data is automatically 0, so no point wasting space in
> the initialized data segment.

That is true for static variables, so I suspect checkpatch explicitly
complains about initializing those to 0. Just remove the comment (and
the initialization).


>>>> +static int amdgpu_ttm_bo_access_kmap(struct amdgpu_bo *abo,
>>>> +				     unsigned long offset,
>>>> +				     void *buf, int len, int write)
>>>> +{
>>>> +	struct ttm_buffer_object *bo = &abo->tbo;
>>>> +	struct ttm_bo_kmap_obj map;
>>>> +	void *ptr;
>>>> +	bool is_iomem;
>>>> +	int r;
>>>> +
>>>> +	r = ttm_bo_kmap(bo, 0, bo->num_pages, &map);
>>>> +	if (r)
>>>> +		return r;
>>>> +	ptr = (uint8_t *)ttm_kmap_obj_virtual(&map, &is_iomem) + offset;
>>>> +	WARN_ON(is_iomem);
>>>> +	if (write)
>>>> +		memcpy(ptr, buf, len);
>>>> +	else
>>>> +		memcpy(buf, ptr, len);
>>>> +	ttm_bo_kunmap(&map);
>>>> +
>>>> +	return len;
>>>> +}
>> This could be in TTM, right? As a helper function and/or a generic
>> vm_operations_struct::access hook (at least for GTT/CPU domains).
> 
> I guess. If I were to get TTM involved, I'd also have TTM install its
> own access hook in the vm_ops, so the driver doesn't need to override
> it. Then I'd add driver-specific callbacks in ttm_bo_driver for
> accessing VRAM

So far, I was thinking basically the same thing.


> and GTT/CPU memory. TTM could offer something like
> amdgpu_ttm_bo_access_kmap as a helper for use by all drivers.

But this doesn't seem necessary, since GTT/CPU domain CPU access should
work the same independent of the driver.