[RFC,13/14] anv/allocator: Add padding information.

Submitted by Rafael Antognolli on Dec. 8, 2018, 12:05 a.m.

Details

Message ID 20181208000553.29501-14-rafael.antognolli@intel.com
State New
Headers show
Series "Do not use userptr in anv if softpin is available." ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Rafael Antognolli Dec. 8, 2018, 12:05 a.m.
It's possible that we still have some space left in the block pool, but
we try to allocate a state larger than that state. This means such state
would start somewhere within the range of the old block_pool, and end
after that range, within the range of the new size.

That's fine when we use userptr, since the memory in the block pool is
CPU mapped continuously. However, by the end of this series, we will
have the block_pool split into different BOs, with different CPU
mapping ranges that are not necessarily continuous. So we must avoid
such case of a given state being part of two different BOs in the block
pool.

This commit solves the issue by detecting that we are growing the
block_pool even though we are not at the end of the range. If that
happens, we don't use the space left at the end of the old size, and
consider it as "padding" that can't be used in the allocation. We update
the size requested from the block pool to take the padding into account,
and return the offset after the padding, which happens to be at the
start of the new address range.

Additionally, we return the amount of padding we used, so the caller
knows that this happens and can return that padding back into a list of
free states, that can be reused later. This way we hopefully don't waste
any space, but also avoid having a state split between two different
BOs.
---
 src/intel/vulkan/anv_allocator.c            | 57 ++++++++++++++++++---
 src/intel/vulkan/anv_private.h              |  2 +-
 src/intel/vulkan/tests/block_pool_no_free.c |  2 +-
 3 files changed, 51 insertions(+), 10 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c
index bddeb4a0fbd..0d426edfb57 100644
--- a/src/intel/vulkan/anv_allocator.c
+++ b/src/intel/vulkan/anv_allocator.c
@@ -839,16 +839,35 @@  done:
 static uint32_t
 anv_block_pool_alloc_new(struct anv_block_pool *pool,
                          struct anv_block_state *pool_state,
-                         uint32_t block_size)
+                         uint32_t block_size, uint32_t *padding)
 {
    struct anv_block_state state, old, new;
 
+   /* Most allocations won't generate any padding */
+   if (padding)
+      *padding = 0;
+
    while (1) {
       state.u64 = __sync_fetch_and_add(&pool_state->u64, block_size);
       if (state.next + block_size <= state.end) {
          assert(pool->map);
          return state.next;
       } else if (state.next <= state.end) {
+         if (pool->bo_flags & EXEC_OBJECT_PINNED && state.next < state.end) {
+            /* We need to grow the block pool, but still have some leftover
+             * space that can't be used by that particular allocation. So we
+             * add that as a "padding", and return it.
+             */
+            uint32_t leftover = state.end - state.next;
+            block_size += leftover;
+
+            /* If there is some leftover space in the pool, the caller must
+             * deal with it.
+             */
+            assert(leftover == 0 || padding);
+            *padding = leftover;
+         }
+
          /* We allocated the first block outside the pool so we have to grow
           * the pool.  pool_state->next acts a mutex: threads who try to
           * allocate now will get block indexes above the current limit and
@@ -872,9 +891,16 @@  anv_block_pool_alloc_new(struct anv_block_pool *pool,
 
 int32_t
 anv_block_pool_alloc(struct anv_block_pool *pool,
-                     uint32_t block_size)
+                     uint32_t block_size, uint32_t *padding)
 {
-   return anv_block_pool_alloc_new(pool, &pool->state, block_size);
+   uint32_t offset;
+
+   offset = anv_block_pool_alloc_new(pool, &pool->state, block_size, padding);
+
+   if (padding && *padding > 0)
+      offset += *padding;
+
+   return offset;
 }
 
 /* Allocates a block out of the back of the block pool.
@@ -891,7 +917,7 @@  anv_block_pool_alloc_back(struct anv_block_pool *pool,
                           uint32_t block_size)
 {
    int32_t offset = anv_block_pool_alloc_new(pool, &pool->back_state,
-                                             block_size);
+                                             block_size, NULL);
 
    /* The offset we get out of anv_block_pool_alloc_new() is actually the
     * number of bytes downwards from the middle to the end of the block.
@@ -947,16 +973,24 @@  static uint32_t
 anv_fixed_size_state_pool_alloc_new(struct anv_fixed_size_state_pool *pool,
                                     struct anv_block_pool *block_pool,
                                     uint32_t state_size,
-                                    uint32_t block_size)
+                                    uint32_t block_size,
+                                    uint32_t *padding)
 {
    struct anv_block_state block, old, new;
    uint32_t offset;
 
+   /* We don't always use anv_block_pool_alloc(), which would set *padding to
+    * zero for us. So if we have a pointer to padding, we must zero it out
+    * ourselves here, to make sure we always return some sensible value.
+    */
+   if (padding)
+      *padding = 0;
+
    /* If our state is large, we don't need any sub-allocation from a block.
     * Instead, we just grab whole (potentially large) blocks.
     */
    if (state_size >= block_size)
-      return anv_block_pool_alloc(block_pool, state_size);
+      return anv_block_pool_alloc(block_pool, state_size, padding);
 
  restart:
    block.u64 = __sync_fetch_and_add(&pool->block.u64, state_size);
@@ -964,7 +998,7 @@  anv_fixed_size_state_pool_alloc_new(struct anv_fixed_size_state_pool *pool,
    if (block.next < block.end) {
       return block.next;
    } else if (block.next == block.end) {
-      offset = anv_block_pool_alloc(block_pool, block_size);
+      offset = anv_block_pool_alloc(block_pool, block_size, padding);
       new.next = offset + state_size;
       new.end = offset + block_size;
       old.u64 = __sync_lock_test_and_set(&pool->block.u64, new.u64);
@@ -1033,6 +1067,7 @@  calculate_divisor(uint32_t size)
       uint32_t bucket_size = anv_state_pool_get_bucket_size(bucket);
       if (size % bucket_size == 0)
          return bucket_size;
+      bucket--;
    }
 
    return 0;
@@ -1156,10 +1191,12 @@  anv_state_pool_alloc_no_vg(struct anv_state_pool *pool,
       }
    }
 
+   uint32_t padding;
    offset = anv_fixed_size_state_pool_alloc_new(&pool->buckets[bucket],
                                                 &pool->block_pool,
                                                 alloc_size,
-                                                pool->block_size);
+                                                pool->block_size,
+                                                &padding);
    /* Everytime we allocate a new state, add it to the state pool */
    uint32_t idx = anv_state_table_add(&pool->table, 1);
    state = anv_state_table_get(&pool->table, idx);
@@ -1169,6 +1206,10 @@  anv_state_pool_alloc_no_vg(struct anv_state_pool *pool,
    struct anv_pool_map pool_map = anv_block_pool_map(&pool->block_pool,
                                                      state->offset);
    state->map = pool_map.map + pool_map.offset;
+   if (padding > 0) {
+      uint32_t return_offset = offset - padding;
+      anv_state_pool_return_chunk(pool, return_offset, padding, 0);
+   }
 
 
 done:
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index b01b392daee..93fdc1e56a2 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -781,7 +781,7 @@  VkResult anv_block_pool_init(struct anv_block_pool *pool,
                              uint64_t bo_flags);
 void anv_block_pool_finish(struct anv_block_pool *pool);
 int32_t anv_block_pool_alloc(struct anv_block_pool *pool,
-                             uint32_t block_size);
+                             uint32_t block_size, uint32_t *padding);
 int32_t anv_block_pool_alloc_back(struct anv_block_pool *pool,
                                   uint32_t block_size);
 struct anv_pool_map anv_block_pool_map(struct anv_block_pool *pool,
diff --git a/src/intel/vulkan/tests/block_pool_no_free.c b/src/intel/vulkan/tests/block_pool_no_free.c
index 730297d4e36..a588455436a 100644
--- a/src/intel/vulkan/tests/block_pool_no_free.c
+++ b/src/intel/vulkan/tests/block_pool_no_free.c
@@ -46,7 +46,7 @@  static void *alloc_blocks(void *_job)
    int32_t block, *data;
 
    for (unsigned i = 0; i < BLOCKS_PER_THREAD; i++) {
-      block = anv_block_pool_alloc(job->pool, block_size);
+      block = anv_block_pool_alloc(job->pool, block_size, NULL);
       data = job->pool->map + block;
       *data = block;
       assert(block >= 0);