[RFC,12/14] anv/allocator: Rework chunk return to the state pool.

Submitted by Rafael Antognolli on Dec. 8, 2018, 12:05 a.m.

Details

Message ID 20181208000553.29501-13-rafael.antognolli@intel.com
State New
Headers show
Series "Do not use userptr in anv if softpin is available." ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Rafael Antognolli Dec. 8, 2018, 12:05 a.m.
This commit tries to rework the code that split and returns chunks back
to the state pool, while still keeping the same logic.

The original code would get a chunk larger than we need and split it
into pool->block_size. Then it would return all but the first one, and
would split that first one into alloc_size chunks. Then it would keep
the first one (for the allocation), and return the others back to the
pool.

The new anv_state_pool_return_chunk() function will take a chunk (with
the alloc_size part removed), and a small_size hint. It then splits that
chunk into pool->block_size'd chunks, and if there's some space still
left, split that into small_size chunks. small_size in this case is the
same size as alloc_size.

The idea is to keep the same logic, but make it in a way we can reuse it
to return other chunks to the pool when we are growing the buffer.
---
 src/intel/vulkan/anv_allocator.c | 147 +++++++++++++++++++++----------
 1 file changed, 102 insertions(+), 45 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c
index 31258e38635..bddeb4a0fbd 100644
--- a/src/intel/vulkan/anv_allocator.c
+++ b/src/intel/vulkan/anv_allocator.c
@@ -994,6 +994,97 @@  anv_state_pool_get_bucket_size(uint32_t bucket)
    return 1 << size_log2;
 }
 
+/** Helper to create a chunk into the state table.
+ *
+ * It just creates 'count' entries into the state table and update their sizes,
+ * offsets and maps, also pushing them as "free" states.
+ */
+static void
+anv_state_pool_return_blocks(struct anv_state_pool *pool,
+                             uint32_t chunk_offset, uint32_t count,
+                             uint32_t block_size)
+{
+   if (count == 0)
+      return;
+
+   uint32_t st_idx = anv_state_table_add(&pool->table, count);
+   for (int i = 0; i < count; i++) {
+      /* update states that were added back to the state table */
+      struct anv_state *state_i = anv_state_table_get(&pool->table,
+                                                      st_idx + i);
+      state_i->alloc_size = block_size;
+      state_i->offset = chunk_offset + block_size * i;
+      struct anv_pool_map pool_map = anv_block_pool_map(&pool->block_pool,
+                                                        state_i->offset);
+      state_i->map = pool_map.map + pool_map.offset;
+   }
+
+   uint32_t block_bucket = anv_state_pool_get_bucket(block_size);
+   anv_state_table_push(&pool->buckets[block_bucket].free_list,
+                        &pool->table, st_idx, count);
+}
+
+static uint32_t
+calculate_divisor(uint32_t size)
+{
+   uint32_t bucket = anv_state_pool_get_bucket(size);
+
+   while (bucket >= 0) {
+      uint32_t bucket_size = anv_state_pool_get_bucket_size(bucket);
+      if (size % bucket_size == 0)
+         return bucket_size;
+   }
+
+   return 0;
+}
+
+/** Returns a chunk of memory back to the state pool.
+ *
+ * If small_size is zero, we split chunk_size into pool->block_size'd pieces,
+ * and return those. If there's some remaining 'rest' space (chunk_size is not
+ * divisble by pool->block_size), then we find a bucket size that is a divisor
+ * of that rest, and split the 'rest' into that size, returning it to the pool.
+ *
+ * If small_size is non-zero, we use it in two different ways:
+ *    * if it is larger than pool->block_size, we split the chunk into
+ *    small_size'd pieces, instead of pool->block_size'd ones.
+ *    * we also use it as the desired size to split the 'rest' after we split
+ *    the bigger size of the chunk into pool->block_size;
+ */
+static void
+anv_state_pool_return_chunk(struct anv_state_pool *pool,
+                            uint32_t chunk_offset, uint32_t chunk_size,
+                            uint32_t small_size)
+{
+   uint32_t divisor = MAX2(pool->block_size, small_size);
+   uint32_t nblocks = chunk_size / divisor;
+   uint32_t rest = chunk_size % pool->block_size;
+
+   /* First return pool->block_size'd chunks.*/
+   uint32_t offset = chunk_offset + rest;
+   anv_state_pool_return_blocks(pool, offset, nblocks, pool->block_size);
+
+   if (rest == 0)
+      return;
+
+   chunk_size = rest;
+
+   if (small_size > 0) {
+      divisor = small_size;
+   } else {
+      /* Find the maximum divisor of the remaining chunk, and return smaller
+       * chunks of that size to the list.
+       */
+      divisor = calculate_divisor(chunk_size);
+      assert(divisor > 0);
+   }
+
+   /* Now return the smaller chunks of 'divisor' size */
+   assert(chunk_size % divisor == 0);
+   nblocks = (chunk_size / divisor);
+   anv_state_pool_return_blocks(pool, chunk_offset, nblocks, divisor);
+}
+
 static struct anv_state
 anv_state_pool_alloc_no_vg(struct anv_state_pool *pool,
                            uint32_t size, uint32_t align)
@@ -1025,6 +1116,10 @@  anv_state_pool_alloc_no_vg(struct anv_state_pool *pool,
           */
          state->alloc_size = alloc_size;
 
+         /* Now return the unused part of the chunk back to the pool as free
+          * blocks
+          */
+
          /* We've found a chunk that's larger than the requested state size.
           * There are a couple of options as to what we do with it:
           *
@@ -1049,52 +1144,14 @@  anv_state_pool_alloc_no_vg(struct anv_state_pool *pool,
           *       one of them.  Then we split what remains into
           *       state.alloc_size sized chunks and return all but one.
           *
-          * We choose option (3).
+          * We choose option (3). That is done by returning the remaining of
+          * the chunk with anv_state_pool_return_chunk(), with alloc_size as a
+          * hint of the size that we want the smaller chunk split into.
           */
-         if (chunk_size > pool->block_size &&
-             alloc_size < pool->block_size) {
-            assert(chunk_size % pool->block_size == 0);
-            /* We don't want to split giant chunks into tiny chunks.  Instead,
-             * break anything bigger than a block into block-sized chunks and
-             * then break it down into bucket-sized chunks from there.  Return
-             * all but the first block of the chunk to the block bucket.
-             */
-            uint32_t push_back = (chunk_size / pool->block_size) - 1;
-            const uint32_t block_bucket =
-               anv_state_pool_get_bucket(pool->block_size);
-            uint32_t st_idx = anv_state_table_add(&pool->table, push_back);
-            for (int i = 0; i < push_back; i++) {
-               /* update states that were added back to the state table */
-               struct anv_state *state_i = anv_state_table_get(&pool->table,
-                                                               st_idx + i);
-               state_i->alloc_size = pool->block_size;
-               state_i->offset = chunk_offset + pool->block_size * (i + 1);
-               struct anv_pool_map pool_map = anv_block_pool_map(&pool->block_pool,
-                                                                 state_i->offset);
-               state_i->map = pool_map.map + pool_map.offset;
-            }
-            anv_state_table_push(&pool->buckets[block_bucket].free_list,
-                                 &pool->table, st_idx, push_back);
-            chunk_size = pool->block_size;
-         }
-
-         assert(chunk_size % alloc_size == 0);
-         uint32_t push_back = (chunk_size / alloc_size) - 1;
-         uint32_t st_idx = anv_state_table_add(&pool->table, push_back);
-         for (int i = 0; i < push_back; i++) {
-            /* update states that were added back to the state table */
-            struct anv_state *state_i = anv_state_table_get(&pool->table,
-                                                            st_idx + i);
-            state_i->alloc_size = alloc_size;
-            state_i->offset = chunk_offset + alloc_size * (i + 1);
-            struct anv_pool_map pool_map = anv_block_pool_map(&pool->block_pool,
-                                                              state_i->offset);
-            state_i->map = pool_map.map + pool_map.offset;
-         }
-         anv_state_table_push(&pool->buckets[bucket].free_list,
-                              &pool->table, st_idx, push_back);
-
-         offset = chunk_offset;
+         uint32_t return_offset = chunk_offset + alloc_size;
+         uint32_t return_size = chunk_size - alloc_size;
+         anv_state_pool_return_chunk(pool, return_offset,
+                                     return_size, alloc_size);
          goto done;
       }
    }

Comments

On Fri, Dec 7, 2018 at 6:06 PM Rafael Antognolli <
rafael.antognolli@intel.com> wrote:

> This commit tries to rework the code that split and returns chunks back
> to the state pool, while still keeping the same logic.
>
> The original code would get a chunk larger than we need and split it
> into pool->block_size. Then it would return all but the first one, and
> would split that first one into alloc_size chunks. Then it would keep
> the first one (for the allocation), and return the others back to the
> pool.
>
> The new anv_state_pool_return_chunk() function will take a chunk (with
> the alloc_size part removed), and a small_size hint. It then splits that
> chunk into pool->block_size'd chunks, and if there's some space still
> left, split that into small_size chunks. small_size in this case is the
> same size as alloc_size.
>
> The idea is to keep the same logic, but make it in a way we can reuse it
> to return other chunks to the pool when we are growing the buffer.
> ---
>  src/intel/vulkan/anv_allocator.c | 147 +++++++++++++++++++++----------
>  1 file changed, 102 insertions(+), 45 deletions(-)
>
> diff --git a/src/intel/vulkan/anv_allocator.c
> b/src/intel/vulkan/anv_allocator.c
> index 31258e38635..bddeb4a0fbd 100644
> --- a/src/intel/vulkan/anv_allocator.c
> +++ b/src/intel/vulkan/anv_allocator.c
> @@ -994,6 +994,97 @@ anv_state_pool_get_bucket_size(uint32_t bucket)
>     return 1 << size_log2;
>  }
>
> +/** Helper to create a chunk into the state table.
> + *
> + * It just creates 'count' entries into the state table and update their
> sizes,
> + * offsets and maps, also pushing them as "free" states.
> + */
> +static void
> +anv_state_pool_return_blocks(struct anv_state_pool *pool,
> +                             uint32_t chunk_offset, uint32_t count,
> +                             uint32_t block_size)
> +{
> +   if (count == 0)
> +      return;
> +
> +   uint32_t st_idx = anv_state_table_add(&pool->table, count);
> +   for (int i = 0; i < count; i++) {
> +      /* update states that were added back to the state table */
> +      struct anv_state *state_i = anv_state_table_get(&pool->table,
> +                                                      st_idx + i);
> +      state_i->alloc_size = block_size;
> +      state_i->offset = chunk_offset + block_size * i;
> +      struct anv_pool_map pool_map = anv_block_pool_map(&pool->block_pool,
> +                                                        state_i->offset);
> +      state_i->map = pool_map.map + pool_map.offset;
> +   }
> +
> +   uint32_t block_bucket = anv_state_pool_get_bucket(block_size);
> +   anv_state_table_push(&pool->buckets[block_bucket].free_list,
> +                        &pool->table, st_idx, count);
> +}
> +
> +static uint32_t
> +calculate_divisor(uint32_t size)
> +{
> +   uint32_t bucket = anv_state_pool_get_bucket(size);
> +
> +   while (bucket >= 0) {
> +      uint32_t bucket_size = anv_state_pool_get_bucket_size(bucket);
> +      if (size % bucket_size == 0)
> +         return bucket_size;
> +   }
> +
> +   return 0;
> +}
> +
> +/** Returns a chunk of memory back to the state pool.
> + *
> + * If small_size is zero, we split chunk_size into pool->block_size'd
> pieces,
> + * and return those. If there's some remaining 'rest' space (chunk_size
> is not
> + * divisble by pool->block_size), then we find a bucket size that is a
> divisor
> + * of that rest, and split the 'rest' into that size, returning it to the
> pool.
> + *
> + * If small_size is non-zero, we use it in two different ways:
> + *    * if it is larger than pool->block_size, we split the chunk into
> + *    small_size'd pieces, instead of pool->block_size'd ones.
> + *    * we also use it as the desired size to split the 'rest' after we
> split
> + *    the bigger size of the chunk into pool->block_size;
>

This seems both overly complicated and not really what we want.  If I have
a block size of 8k and allocate a single 64-byte state and then a 8k state,
it will break my almost 8k of padding into 511 64-byte states and return
those which may be very wasteful if the next thing I do is allocate a 1K
state.

It also doesn't provide the current alignment guarantees that all states
are aligned to their size.  While the alignment guarantee doesn't matter
for most large states, it does matter for some of the smaller sizes.  Now
that I look at it in detail, it appears that the highest alignment we ever
require is 64B and the smallest size we allow is 64B so maybe it just
doesn't matter?

Assuming we don't care about alignments, we could do something like this?

if (small_size > 0) {
   assert(chunk_size % small_size == 0);
   anv_state_pool_return_blocks(pool, chunk_offset, chunk_size /
small_size, small_size);
} else {
   uint32_t divisor = MAX_STATE_SIZE;
   while (divisor >= MIN_STATE_SIZE) {
      uint32_t nblocks = chunk_size / divisor;
      if (nblocks > 0) {
         anv_state_pool_return_blocs(pool, chunk_offset, nblocks, divisor);
         chunk_offset += nblocks * divisor;
         chunk_size -= nblocks * divisor;
      }
   }
}

If we do care about alignments, the above clearly isn't sufficient.  We'd
have to do something where we chunk it separately from both ends or
something.  It'd get kind-of gross.


> + */
> +static void
> +anv_state_pool_return_chunk(struct anv_state_pool *pool,
> +                            uint32_t chunk_offset, uint32_t chunk_size,
> +                            uint32_t small_size)
> +{
> +   uint32_t divisor = MAX2(pool->block_size, small_size);
> +   uint32_t nblocks = chunk_size / divisor;
> +   uint32_t rest = chunk_size % pool->block_size;
> +
> +   /* First return pool->block_size'd chunks.*/
> +   uint32_t offset = chunk_offset + rest;
> +   anv_state_pool_return_blocks(pool, offset, nblocks, pool->block_size);
> +
> +   if (rest == 0)
> +      return;
> +
> +   chunk_size = rest;
> +
> +   if (small_size > 0) {
> +      divisor = small_size;
> +   } else {
> +      /* Find the maximum divisor of the remaining chunk, and return
> smaller
> +       * chunks of that size to the list.
> +       */
> +      divisor = calculate_divisor(chunk_size);
> +      assert(divisor > 0);
> +   }
> +
> +   /* Now return the smaller chunks of 'divisor' size */
> +   assert(chunk_size % divisor == 0);
> +   nblocks = (chunk_size / divisor);
> +   anv_state_pool_return_blocks(pool, chunk_offset, nblocks, divisor);
> +}
> +
>  static struct anv_state
>  anv_state_pool_alloc_no_vg(struct anv_state_pool *pool,
>                             uint32_t size, uint32_t align)
> @@ -1025,6 +1116,10 @@ anv_state_pool_alloc_no_vg(struct anv_state_pool
> *pool,
>            */
>           state->alloc_size = alloc_size;
>
> +         /* Now return the unused part of the chunk back to the pool as
> free
> +          * blocks
> +          */
> +
>           /* We've found a chunk that's larger than the requested state
> size.
>            * There are a couple of options as to what we do with it:
>            *
> @@ -1049,52 +1144,14 @@ anv_state_pool_alloc_no_vg(struct anv_state_pool
> *pool,
>            *       one of them.  Then we split what remains into
>            *       state.alloc_size sized chunks and return all but one.
>            *
> -          * We choose option (3).
> +          * We choose option (3). That is done by returning the remaining
> of
> +          * the chunk with anv_state_pool_return_chunk(), with alloc_size
> as a
> +          * hint of the size that we want the smaller chunk split into.
>            */
> -         if (chunk_size > pool->block_size &&
> -             alloc_size < pool->block_size) {
> -            assert(chunk_size % pool->block_size == 0);
> -            /* We don't want to split giant chunks into tiny chunks.
> Instead,
> -             * break anything bigger than a block into block-sized chunks
> and
> -             * then break it down into bucket-sized chunks from there.
> Return
> -             * all but the first block of the chunk to the block bucket.
> -             */
> -            uint32_t push_back = (chunk_size / pool->block_size) - 1;
> -            const uint32_t block_bucket =
> -               anv_state_pool_get_bucket(pool->block_size);
> -            uint32_t st_idx = anv_state_table_add(&pool->table,
> push_back);
> -            for (int i = 0; i < push_back; i++) {
> -               /* update states that were added back to the state table */
> -               struct anv_state *state_i =
> anv_state_table_get(&pool->table,
> -                                                               st_idx +
> i);
> -               state_i->alloc_size = pool->block_size;
> -               state_i->offset = chunk_offset + pool->block_size * (i +
> 1);
> -               struct anv_pool_map pool_map =
> anv_block_pool_map(&pool->block_pool,
> -
>  state_i->offset);
> -               state_i->map = pool_map.map + pool_map.offset;
> -            }
> -            anv_state_table_push(&pool->buckets[block_bucket].free_list,
> -                                 &pool->table, st_idx, push_back);
>

In your mind, does adding this helper remove at least some of the API
ugliness you were complaining about in patch 2?  I think it does.


> -            chunk_size = pool->block_size;
> -         }
> -
> -         assert(chunk_size % alloc_size == 0);
> -         uint32_t push_back = (chunk_size / alloc_size) - 1;
> -         uint32_t st_idx = anv_state_table_add(&pool->table, push_back);
> -         for (int i = 0; i < push_back; i++) {
> -            /* update states that were added back to the state table */
> -            struct anv_state *state_i = anv_state_table_get(&pool->table,
> -                                                            st_idx + i);
> -            state_i->alloc_size = alloc_size;
> -            state_i->offset = chunk_offset + alloc_size * (i + 1);
> -            struct anv_pool_map pool_map =
> anv_block_pool_map(&pool->block_pool,
> -
> state_i->offset);
> -            state_i->map = pool_map.map + pool_map.offset;
> -         }
> -         anv_state_table_push(&pool->buckets[bucket].free_list,
> -                              &pool->table, st_idx, push_back);
> -
> -         offset = chunk_offset;
> +         uint32_t return_offset = chunk_offset + alloc_size;
> +         uint32_t return_size = chunk_size - alloc_size;
> +         anv_state_pool_return_chunk(pool, return_offset,
> +                                     return_size, alloc_size);
>           goto done;
>        }
>     }
> --
> 2.17.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
On Mon, Dec 10, 2018 at 04:56:40PM -0600, Jason Ekstrand wrote:
> On Fri, Dec 7, 2018 at 6:06 PM Rafael Antognolli <rafael.antognolli@intel.com>
> wrote:
> 
>     This commit tries to rework the code that split and returns chunks back
>     to the state pool, while still keeping the same logic.
> 
>     The original code would get a chunk larger than we need and split it
>     into pool->block_size. Then it would return all but the first one, and
>     would split that first one into alloc_size chunks. Then it would keep
>     the first one (for the allocation), and return the others back to the
>     pool.
> 
>     The new anv_state_pool_return_chunk() function will take a chunk (with
>     the alloc_size part removed), and a small_size hint. It then splits that
>     chunk into pool->block_size'd chunks, and if there's some space still
>     left, split that into small_size chunks. small_size in this case is the
>     same size as alloc_size.
> 
>     The idea is to keep the same logic, but make it in a way we can reuse it
>     to return other chunks to the pool when we are growing the buffer.
>     ---
>      src/intel/vulkan/anv_allocator.c | 147 +++++++++++++++++++++----------
>      1 file changed, 102 insertions(+), 45 deletions(-)
> 
>     diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/
>     anv_allocator.c
>     index 31258e38635..bddeb4a0fbd 100644
>     --- a/src/intel/vulkan/anv_allocator.c
>     +++ b/src/intel/vulkan/anv_allocator.c
>     @@ -994,6 +994,97 @@ anv_state_pool_get_bucket_size(uint32_t bucket)
>         return 1 << size_log2;
>      }
> 
>     +/** Helper to create a chunk into the state table.
>     + *
>     + * It just creates 'count' entries into the state table and update their
>     sizes,
>     + * offsets and maps, also pushing them as "free" states.
>     + */
>     +static void
>     +anv_state_pool_return_blocks(struct anv_state_pool *pool,
>     +                             uint32_t chunk_offset, uint32_t count,
>     +                             uint32_t block_size)
>     +{
>     +   if (count == 0)
>     +      return;
>     +
>     +   uint32_t st_idx = anv_state_table_add(&pool->table, count);
>     +   for (int i = 0; i < count; i++) {
>     +      /* update states that were added back to the state table */
>     +      struct anv_state *state_i = anv_state_table_get(&pool->table,
>     +                                                      st_idx + i);
>     +      state_i->alloc_size = block_size;
>     +      state_i->offset = chunk_offset + block_size * i;
>     +      struct anv_pool_map pool_map = anv_block_pool_map(&pool->block_pool,
>     +                                                        state_i->offset);
>     +      state_i->map = pool_map.map + pool_map.offset;
>     +   }
>     +
>     +   uint32_t block_bucket = anv_state_pool_get_bucket(block_size);
>     +   anv_state_table_push(&pool->buckets[block_bucket].free_list,
>     +                        &pool->table, st_idx, count);
>     +}
>     +
>     +static uint32_t
>     +calculate_divisor(uint32_t size)
>     +{
>     +   uint32_t bucket = anv_state_pool_get_bucket(size);
>     +
>     +   while (bucket >= 0) {
>     +      uint32_t bucket_size = anv_state_pool_get_bucket_size(bucket);
>     +      if (size % bucket_size == 0)
>     +         return bucket_size;
>     +   }
>     +
>     +   return 0;
>     +}
>     +
>     +/** Returns a chunk of memory back to the state pool.
>     + *
>     + * If small_size is zero, we split chunk_size into pool->block_size'd
>     pieces,
>     + * and return those. If there's some remaining 'rest' space (chunk_size is
>     not
>     + * divisble by pool->block_size), then we find a bucket size that is a
>     divisor
>     + * of that rest, and split the 'rest' into that size, returning it to the
>     pool.
>     + *
>     + * If small_size is non-zero, we use it in two different ways:
>     + *    * if it is larger than pool->block_size, we split the chunk into
>     + *    small_size'd pieces, instead of pool->block_size'd ones.
>     + *    * we also use it as the desired size to split the 'rest' after we
>     split
>     + *    the bigger size of the chunk into pool->block_size;
> 
> 
> This seems both overly complicated and not really what we want.  If I have a
> block size of 8k and allocate a single 64-byte state and then a 8k state, it
> will break my almost 8k of padding into 511 64-byte states and return those
> which may be very wasteful if the next thing I do is allocate a 1K state.

Good, this would definitely be a waste.

> It also doesn't provide the current alignment guarantees that all states are
> aligned to their size.  While the alignment guarantee doesn't matter for most
> large states, it does matter for some of the smaller sizes.  Now that I look at
> it in detail, it appears that the highest alignment we ever require is 64B and
> the smallest size we allow is 64B so maybe it just doesn't matter?
> 
> Assuming we don't care about alignments, we could do something like this?
>
> if (small_size > 0) {
>    assert(chunk_size % small_size == 0);
>    anv_state_pool_return_blocks(pool, chunk_offset, chunk_size / small_size,
> small_size);
> } else {
>    uint32_t divisor = MAX_STATE_SIZE;
>    while (divisor >= MIN_STATE_SIZE) {
>       uint32_t nblocks = chunk_size / divisor;
>       if (nblocks > 0) {
>          anv_state_pool_return_blocs(pool, chunk_offset, nblocks, divisor);
>          chunk_offset += nblocks * divisor;
>          chunk_size -= nblocks * divisor;
>       }
>    }
> }

The code above is indeed way simpler and it does seem to achieve
what we want for the "return padding" case.

However, we also use this helper for returning blocks that we got from
the freelist, but were only partially used. For instance if we need to
allocate a state of size 64 bytes, and we have a block of 8KB in the
freelist, due to the snippet above (small_size == 64) we will end up
splitting it into 511 64-byte states too.

Maybe (if we want to keep the old semantics), in the case where
small_size > 0, we have to do something like:

if (small_size > 0) {
   assert(chunk_size % small_size == 0);
   if (chunk_size > pool->block_size) {
      assert(chunk_size % pool->block_size == 0);
      uint32_t nblocks = chunk_size / pool->block_size - 1;
      anv_state_pool_return_blocks(pool, offset, nblocks, pool->block_size);
      chunk_size -= nblocks * pool->block_size;
      offset += nblocks * pool->block_size;
   }
   anv_state_pool_return_blocks(pool, chunk_offset, chunk_size / small_size, small_size);
} else {
   ... your else clause here
}

Maybe it's over complicating things again... what do you think?

> 
> If we do care about alignments, the above clearly isn't sufficient.  We'd have
> to do something where we chunk it separately from both ends or something.  It'd
> get kind-of gross.
>  
> 
>     + */
>     +static void
>     +anv_state_pool_return_chunk(struct anv_state_pool *pool,
>     +                            uint32_t chunk_offset, uint32_t chunk_size,
>     +                            uint32_t small_size)
>     +{
>     +   uint32_t divisor = MAX2(pool->block_size, small_size);
>     +   uint32_t nblocks = chunk_size / divisor;
>     +   uint32_t rest = chunk_size % pool->block_size;
>     +
>     +   /* First return pool->block_size'd chunks.*/
>     +   uint32_t offset = chunk_offset + rest;
>     +   anv_state_pool_return_blocks(pool, offset, nblocks, pool->block_size);
>     +
>     +   if (rest == 0)
>     +      return;
>     +
>     +   chunk_size = rest;
>     +
>     +   if (small_size > 0) {
>     +      divisor = small_size;
>     +   } else {
>     +      /* Find the maximum divisor of the remaining chunk, and return
>     smaller
>     +       * chunks of that size to the list.
>     +       */
>     +      divisor = calculate_divisor(chunk_size);
>     +      assert(divisor > 0);
>     +   }
>     +
>     +   /* Now return the smaller chunks of 'divisor' size */
>     +   assert(chunk_size % divisor == 0);
>     +   nblocks = (chunk_size / divisor);
>     +   anv_state_pool_return_blocks(pool, chunk_offset, nblocks, divisor);
>     +}
>     +
>      static struct anv_state
>      anv_state_pool_alloc_no_vg(struct anv_state_pool *pool,
>                                 uint32_t size, uint32_t align)
>     @@ -1025,6 +1116,10 @@ anv_state_pool_alloc_no_vg(struct anv_state_pool
>     *pool,
>                */
>               state->alloc_size = alloc_size;
> 
>     +         /* Now return the unused part of the chunk back to the pool as
>     free
>     +          * blocks
>     +          */
>     +
>               /* We've found a chunk that's larger than the requested state
>     size.
>                * There are a couple of options as to what we do with it:
>                *
>     @@ -1049,52 +1144,14 @@ anv_state_pool_alloc_no_vg(struct anv_state_pool
>     *pool,
>                *       one of them.  Then we split what remains into
>                *       state.alloc_size sized chunks and return all but one.
>                *
>     -          * We choose option (3).
>     +          * We choose option (3). That is done by returning the remaining
>     of
>     +          * the chunk with anv_state_pool_return_chunk(), with alloc_size
>     as a
>     +          * hint of the size that we want the smaller chunk split into.
>                */
>     -         if (chunk_size > pool->block_size &&
>     -             alloc_size < pool->block_size) {
>     -            assert(chunk_size % pool->block_size == 0);
>     -            /* We don't want to split giant chunks into tiny chunks. 
>     Instead,
>     -             * break anything bigger than a block into block-sized chunks
>     and
>     -             * then break it down into bucket-sized chunks from there. 
>     Return
>     -             * all but the first block of the chunk to the block bucket.
>     -             */
>     -            uint32_t push_back = (chunk_size / pool->block_size) - 1;
>     -            const uint32_t block_bucket =
>     -               anv_state_pool_get_bucket(pool->block_size);
>     -            uint32_t st_idx = anv_state_table_add(&pool->table,
>     push_back);
>     -            for (int i = 0; i < push_back; i++) {
>     -               /* update states that were added back to the state table */
>     -               struct anv_state *state_i = anv_state_table_get(&pool->
>     table,
>     -                                                               st_idx +
>     i);
>     -               state_i->alloc_size = pool->block_size;
>     -               state_i->offset = chunk_offset + pool->block_size * (i +
>     1);
>     -               struct anv_pool_map pool_map = anv_block_pool_map(&pool->
>     block_pool,
>     -                                                                 state_i->
>     offset);
>     -               state_i->map = pool_map.map + pool_map.offset;
>     -            }
>     -            anv_state_table_push(&pool->buckets[block_bucket].free_list,
>     -                                 &pool->table, st_idx, push_back);
> 
> 
> In your mind, does adding this helper remove at least some of the API ugliness
> you were complaining about in patch 2?  I think it does.

Yes, I think it does too. At first I really didn't want to send the
series until I figured out a way to improve the API, but after I added
these helpers I was less bothered by it.

>     -            chunk_size = pool->block_size;
>     -         }
>     -
>     -         assert(chunk_size % alloc_size == 0);
>     -         uint32_t push_back = (chunk_size / alloc_size) - 1;
>     -         uint32_t st_idx = anv_state_table_add(&pool->table, push_back);
>     -         for (int i = 0; i < push_back; i++) {
>     -            /* update states that were added back to the state table */
>     -            struct anv_state *state_i = anv_state_table_get(&pool->table,
>     -                                                            st_idx + i);
>     -            state_i->alloc_size = alloc_size;
>     -            state_i->offset = chunk_offset + alloc_size * (i + 1);
>     -            struct anv_pool_map pool_map = anv_block_pool_map(&pool->
>     block_pool,
>     -                                                              state_i->
>     offset);
>     -            state_i->map = pool_map.map + pool_map.offset;
>     -         }
>     -         anv_state_table_push(&pool->buckets[bucket].free_list,
>     -                              &pool->table, st_idx, push_back);
>     -
>     -         offset = chunk_offset;
>     +         uint32_t return_offset = chunk_offset + alloc_size;
>     +         uint32_t return_size = chunk_size - alloc_size;
>     +         anv_state_pool_return_chunk(pool, return_offset,
>     +                                     return_size, alloc_size);
>               goto done;
>            }
>         }
>     --
>     2.17.1
> 
>     _______________________________________________
>     mesa-dev mailing list
>     mesa-dev@lists.freedesktop.org
>     https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
On Mon, Dec 10, 2018 at 5:48 PM Rafael Antognolli <
rafael.antognolli@intel.com> wrote:

> On Mon, Dec 10, 2018 at 04:56:40PM -0600, Jason Ekstrand wrote:
> > On Fri, Dec 7, 2018 at 6:06 PM Rafael Antognolli <
> rafael.antognolli@intel.com>
> > wrote:
> >
> >     This commit tries to rework the code that split and returns chunks
> back
> >     to the state pool, while still keeping the same logic.
> >
> >     The original code would get a chunk larger than we need and split it
> >     into pool->block_size. Then it would return all but the first one,
> and
> >     would split that first one into alloc_size chunks. Then it would keep
> >     the first one (for the allocation), and return the others back to the
> >     pool.
> >
> >     The new anv_state_pool_return_chunk() function will take a chunk
> (with
> >     the alloc_size part removed), and a small_size hint. It then splits
> that
> >     chunk into pool->block_size'd chunks, and if there's some space still
> >     left, split that into small_size chunks. small_size in this case is
> the
> >     same size as alloc_size.
> >
> >     The idea is to keep the same logic, but make it in a way we can
> reuse it
> >     to return other chunks to the pool when we are growing the buffer.
> >     ---
> >      src/intel/vulkan/anv_allocator.c | 147
> +++++++++++++++++++++----------
> >      1 file changed, 102 insertions(+), 45 deletions(-)
> >
> >     diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/
> >     anv_allocator.c
> >     index 31258e38635..bddeb4a0fbd 100644
> >     --- a/src/intel/vulkan/anv_allocator.c
> >     +++ b/src/intel/vulkan/anv_allocator.c
> >     @@ -994,6 +994,97 @@ anv_state_pool_get_bucket_size(uint32_t bucket)
> >         return 1 << size_log2;
> >      }
> >
> >     +/** Helper to create a chunk into the state table.
> >     + *
> >     + * It just creates 'count' entries into the state table and update
> their
> >     sizes,
> >     + * offsets and maps, also pushing them as "free" states.
> >     + */
> >     +static void
> >     +anv_state_pool_return_blocks(struct anv_state_pool *pool,
> >     +                             uint32_t chunk_offset, uint32_t count,
> >     +                             uint32_t block_size)
> >     +{
> >     +   if (count == 0)
> >     +      return;
> >     +
> >     +   uint32_t st_idx = anv_state_table_add(&pool->table, count);
> >     +   for (int i = 0; i < count; i++) {
> >     +      /* update states that were added back to the state table */
> >     +      struct anv_state *state_i = anv_state_table_get(&pool->table,
> >     +                                                      st_idx + i);
> >     +      state_i->alloc_size = block_size;
> >     +      state_i->offset = chunk_offset + block_size * i;
> >     +      struct anv_pool_map pool_map =
> anv_block_pool_map(&pool->block_pool,
> >     +
> state_i->offset);
> >     +      state_i->map = pool_map.map + pool_map.offset;
> >     +   }
> >     +
> >     +   uint32_t block_bucket = anv_state_pool_get_bucket(block_size);
> >     +   anv_state_table_push(&pool->buckets[block_bucket].free_list,
> >     +                        &pool->table, st_idx, count);
> >     +}
> >     +
> >     +static uint32_t
> >     +calculate_divisor(uint32_t size)
> >     +{
> >     +   uint32_t bucket = anv_state_pool_get_bucket(size);
> >     +
> >     +   while (bucket >= 0) {
> >     +      uint32_t bucket_size = anv_state_pool_get_bucket_size(bucket);
> >     +      if (size % bucket_size == 0)
> >     +         return bucket_size;
> >     +   }
> >     +
> >     +   return 0;
> >     +}
> >     +
> >     +/** Returns a chunk of memory back to the state pool.
> >     + *
> >     + * If small_size is zero, we split chunk_size into
> pool->block_size'd
> >     pieces,
> >     + * and return those. If there's some remaining 'rest' space
> (chunk_size is
> >     not
> >     + * divisble by pool->block_size), then we find a bucket size that
> is a
> >     divisor
> >     + * of that rest, and split the 'rest' into that size, returning it
> to the
> >     pool.
> >     + *
> >     + * If small_size is non-zero, we use it in two different ways:
> >     + *    * if it is larger than pool->block_size, we split the chunk
> into
> >     + *    small_size'd pieces, instead of pool->block_size'd ones.
> >     + *    * we also use it as the desired size to split the 'rest'
> after we
> >     split
> >     + *    the bigger size of the chunk into pool->block_size;
> >
> >
> > This seems both overly complicated and not really what we want.  If I
> have a
> > block size of 8k and allocate a single 64-byte state and then a 8k
> state, it
> > will break my almost 8k of padding into 511 64-byte states and return
> those
> > which may be very wasteful if the next thing I do is allocate a 1K state.
>
> Good, this would definitely be a waste.
>
> > It also doesn't provide the current alignment guarantees that all states
> are
> > aligned to their size.  While the alignment guarantee doesn't matter for
> most
> > large states, it does matter for some of the smaller sizes.  Now that I
> look at
> > it in detail, it appears that the highest alignment we ever require is
> 64B and
> > the smallest size we allow is 64B so maybe it just doesn't matter?
> >
> > Assuming we don't care about alignments, we could do something like this?
> >
> > if (small_size > 0) {
> >    assert(chunk_size % small_size == 0);
> >    anv_state_pool_return_blocks(pool, chunk_offset, chunk_size /
> small_size,
> > small_size);
> > } else {
> >    uint32_t divisor = MAX_STATE_SIZE;
> >    while (divisor >= MIN_STATE_SIZE) {
> >       uint32_t nblocks = chunk_size / divisor;
> >       if (nblocks > 0) {
> >          anv_state_pool_return_blocs(pool, chunk_offset, nblocks,
> divisor);
> >          chunk_offset += nblocks * divisor;
> >          chunk_size -= nblocks * divisor;
> >       }
> >    }
> > }
>
> The code above is indeed way simpler and it does seem to achieve
> what we want for the "return padding" case.
>
> However, we also use this helper for returning blocks that we got from
> the freelist, but were only partially used. For instance if we need to
> allocate a state of size 64 bytes, and we have a block of 8KB in the
> freelist, due to the snippet above (small_size == 64) we will end up
> splitting it into 511 64-byte states too.
>
> Maybe (if we want to keep the old semantics), in the case where
> small_size > 0, we have to do something like:
>
> if (small_size > 0) {
>    assert(chunk_size % small_size == 0);
>    if (chunk_size > pool->block_size) {
>       assert(chunk_size % pool->block_size == 0);
>       uint32_t nblocks = chunk_size / pool->block_size - 1;
>       anv_state_pool_return_blocks(pool, offset, nblocks,
> pool->block_size);
>       chunk_size -= nblocks * pool->block_size;
>       offset += nblocks * pool->block_size;
>    }
>    anv_state_pool_return_blocks(pool, chunk_offset, chunk_size /
> small_size, small_size);
> } else {
>    ... your else clause here
> }
>
> Maybe it's over complicating things again... what do you think?
>

Maybe?  Or maybe we want to just keep the block_size semantics for
everything and split padding into some small chunks as needed and then a
bunch of block_size chunks.  If we did that, we'd have the same semantics
for everything.


> >
> > If we do care about alignments, the above clearly isn't sufficient.
> We'd have
> > to do something where we chunk it separately from both ends or
> something.  It'd
> > get kind-of gross.
> >
> >
> >     + */
> >     +static void
> >     +anv_state_pool_return_chunk(struct anv_state_pool *pool,
> >     +                            uint32_t chunk_offset, uint32_t
> chunk_size,
> >     +                            uint32_t small_size)
> >     +{
> >     +   uint32_t divisor = MAX2(pool->block_size, small_size);
> >     +   uint32_t nblocks = chunk_size / divisor;
> >     +   uint32_t rest = chunk_size % pool->block_size;
> >     +
> >     +   /* First return pool->block_size'd chunks.*/
> >     +   uint32_t offset = chunk_offset + rest;
> >     +   anv_state_pool_return_blocks(pool, offset, nblocks,
> pool->block_size);
> >     +
> >     +   if (rest == 0)
> >     +      return;
> >     +
> >     +   chunk_size = rest;
> >     +
> >     +   if (small_size > 0) {
> >     +      divisor = small_size;
> >     +   } else {
> >     +      /* Find the maximum divisor of the remaining chunk, and return
> >     smaller
> >     +       * chunks of that size to the list.
> >     +       */
> >     +      divisor = calculate_divisor(chunk_size);
> >     +      assert(divisor > 0);
> >     +   }
> >     +
> >     +   /* Now return the smaller chunks of 'divisor' size */
> >     +   assert(chunk_size % divisor == 0);
> >     +   nblocks = (chunk_size / divisor);
> >     +   anv_state_pool_return_blocks(pool, chunk_offset, nblocks,
> divisor);
> >     +}
> >     +
> >      static struct anv_state
> >      anv_state_pool_alloc_no_vg(struct anv_state_pool *pool,
> >                                 uint32_t size, uint32_t align)
> >     @@ -1025,6 +1116,10 @@ anv_state_pool_alloc_no_vg(struct
> anv_state_pool
> >     *pool,
> >                */
> >               state->alloc_size = alloc_size;
> >
> >     +         /* Now return the unused part of the chunk back to the
> pool as
> >     free
> >     +          * blocks
> >     +          */
> >     +
> >               /* We've found a chunk that's larger than the requested
> state
> >     size.
> >                * There are a couple of options as to what we do with it:
> >                *
> >     @@ -1049,52 +1144,14 @@ anv_state_pool_alloc_no_vg(struct
> anv_state_pool
> >     *pool,
> >                *       one of them.  Then we split what remains into
> >                *       state.alloc_size sized chunks and return all but
> one.
> >                *
> >     -          * We choose option (3).
> >     +          * We choose option (3). That is done by returning the
> remaining
> >     of
> >     +          * the chunk with anv_state_pool_return_chunk(), with
> alloc_size
> >     as a
> >     +          * hint of the size that we want the smaller chunk split
> into.
> >                */
> >     -         if (chunk_size > pool->block_size &&
> >     -             alloc_size < pool->block_size) {
> >     -            assert(chunk_size % pool->block_size == 0);
> >     -            /* We don't want to split giant chunks into tiny
> chunks.
> >     Instead,
> >     -             * break anything bigger than a block into block-sized
> chunks
> >     and
> >     -             * then break it down into bucket-sized chunks from
> there.
> >     Return
> >     -             * all but the first block of the chunk to the block
> bucket.
> >     -             */
> >     -            uint32_t push_back = (chunk_size / pool->block_size) -
> 1;
> >     -            const uint32_t block_bucket =
> >     -               anv_state_pool_get_bucket(pool->block_size);
> >     -            uint32_t st_idx = anv_state_table_add(&pool->table,
> >     push_back);
> >     -            for (int i = 0; i < push_back; i++) {
> >     -               /* update states that were added back to the state
> table */
> >     -               struct anv_state *state_i =
> anv_state_table_get(&pool->
> >     table,
> >     -
>  st_idx +
> >     i);
> >     -               state_i->alloc_size = pool->block_size;
> >     -               state_i->offset = chunk_offset + pool->block_size *
> (i +
> >     1);
> >     -               struct anv_pool_map pool_map =
> anv_block_pool_map(&pool->
> >     block_pool,
> >     -
>  state_i->
> >     offset);
> >     -               state_i->map = pool_map.map + pool_map.offset;
> >     -            }
> >     -
> anv_state_table_push(&pool->buckets[block_bucket].free_list,
> >     -                                 &pool->table, st_idx, push_back);
> >
> >
> > In your mind, does adding this helper remove at least some of the API
> ugliness
> > you were complaining about in patch 2?  I think it does.
>
> Yes, I think it does too. At first I really didn't want to send the
> series until I figured out a way to improve the API, but after I added
> these helpers I was less bothered by it.
>

I think what I'd recommend is that you pull the first helper in this patch
into it's own patch right before patch 2 and use it replace the two in-line
copies here.  Then the API wouldn't look nearly as horrible and this could
focus on the heuristic for dealing with padding (and maybe even be rolled
into the padding patch).


> >     -            chunk_size = pool->block_size;
> >     -         }
> >     -
> >     -         assert(chunk_size % alloc_size == 0);
> >     -         uint32_t push_back = (chunk_size / alloc_size) - 1;
> >     -         uint32_t st_idx = anv_state_table_add(&pool->table,
> push_back);
> >     -         for (int i = 0; i < push_back; i++) {
> >     -            /* update states that were added back to the state
> table */
> >     -            struct anv_state *state_i =
> anv_state_table_get(&pool->table,
> >     -                                                            st_idx
> + i);
> >     -            state_i->alloc_size = alloc_size;
> >     -            state_i->offset = chunk_offset + alloc_size * (i + 1);
> >     -            struct anv_pool_map pool_map =
> anv_block_pool_map(&pool->
> >     block_pool,
> >     -
> state_i->
> >     offset);
> >     -            state_i->map = pool_map.map + pool_map.offset;
> >     -         }
> >     -         anv_state_table_push(&pool->buckets[bucket].free_list,
> >     -                              &pool->table, st_idx, push_back);
> >     -
> >     -         offset = chunk_offset;
> >     +         uint32_t return_offset = chunk_offset + alloc_size;
> >     +         uint32_t return_size = chunk_size - alloc_size;
> >     +         anv_state_pool_return_chunk(pool, return_offset,
> >     +                                     return_size, alloc_size);
> >               goto done;
> >            }
> >         }
> >     --
> >     2.17.1
> >
> >     _______________________________________________
> >     mesa-dev mailing list
> >     mesa-dev@lists.freedesktop.org
> >     https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> >
>
On Mon, Dec 10, 2018 at 11:10:02PM -0600, Jason Ekstrand wrote:
> 
> 
> On Mon, Dec 10, 2018 at 5:48 PM Rafael Antognolli <rafael.antognolli@intel.com>
> wrote:
> 
>     On Mon, Dec 10, 2018 at 04:56:40PM -0600, Jason Ekstrand wrote:
>     > On Fri, Dec 7, 2018 at 6:06 PM Rafael Antognolli <
>     rafael.antognolli@intel.com>
>     > wrote:
>     >
>     >     This commit tries to rework the code that split and returns chunks
>     back
>     >     to the state pool, while still keeping the same logic.
>     >
>     >     The original code would get a chunk larger than we need and split it
>     >     into pool->block_size. Then it would return all but the first one,
>     and
>     >     would split that first one into alloc_size chunks. Then it would keep
>     >     the first one (for the allocation), and return the others back to the
>     >     pool.
>     >
>     >     The new anv_state_pool_return_chunk() function will take a chunk
>     (with
>     >     the alloc_size part removed), and a small_size hint. It then splits
>     that
>     >     chunk into pool->block_size'd chunks, and if there's some space still
>     >     left, split that into small_size chunks. small_size in this case is
>     the
>     >     same size as alloc_size.
>     >
>     >     The idea is to keep the same logic, but make it in a way we can reuse
>     it
>     >     to return other chunks to the pool when we are growing the buffer.
>     >     ---
>     >      src/intel/vulkan/anv_allocator.c | 147
>     +++++++++++++++++++++----------
>     >      1 file changed, 102 insertions(+), 45 deletions(-)
>     >
>     >     diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/
>     >     anv_allocator.c
>     >     index 31258e38635..bddeb4a0fbd 100644
>     >     --- a/src/intel/vulkan/anv_allocator.c
>     >     +++ b/src/intel/vulkan/anv_allocator.c
>     >     @@ -994,6 +994,97 @@ anv_state_pool_get_bucket_size(uint32_t bucket)
>     >         return 1 << size_log2;
>     >      }
>     >
>     >     +/** Helper to create a chunk into the state table.
>     >     + *
>     >     + * It just creates 'count' entries into the state table and update
>     their
>     >     sizes,
>     >     + * offsets and maps, also pushing them as "free" states.
>     >     + */
>     >     +static void
>     >     +anv_state_pool_return_blocks(struct anv_state_pool *pool,
>     >     +                             uint32_t chunk_offset, uint32_t count,
>     >     +                             uint32_t block_size)
>     >     +{
>     >     +   if (count == 0)
>     >     +      return;
>     >     +
>     >     +   uint32_t st_idx = anv_state_table_add(&pool->table, count);
>     >     +   for (int i = 0; i < count; i++) {
>     >     +      /* update states that were added back to the state table */
>     >     +      struct anv_state *state_i = anv_state_table_get(&pool->table,
>     >     +                                                      st_idx + i);
>     >     +      state_i->alloc_size = block_size;
>     >     +      state_i->offset = chunk_offset + block_size * i;
>     >     +      struct anv_pool_map pool_map = anv_block_pool_map(&pool->
>     block_pool,
>     >     +                                                        state_i->
>     offset);
>     >     +      state_i->map = pool_map.map + pool_map.offset;
>     >     +   }
>     >     +
>     >     +   uint32_t block_bucket = anv_state_pool_get_bucket(block_size);
>     >     +   anv_state_table_push(&pool->buckets[block_bucket].free_list,
>     >     +                        &pool->table, st_idx, count);
>     >     +}
>     >     +
>     >     +static uint32_t
>     >     +calculate_divisor(uint32_t size)
>     >     +{
>     >     +   uint32_t bucket = anv_state_pool_get_bucket(size);
>     >     +
>     >     +   while (bucket >= 0) {
>     >     +      uint32_t bucket_size = anv_state_pool_get_bucket_size(bucket);
>     >     +      if (size % bucket_size == 0)
>     >     +         return bucket_size;
>     >     +   }
>     >     +
>     >     +   return 0;
>     >     +}
>     >     +
>     >     +/** Returns a chunk of memory back to the state pool.
>     >     + *
>     >     + * If small_size is zero, we split chunk_size into pool->
>     block_size'd
>     >     pieces,
>     >     + * and return those. If there's some remaining 'rest' space
>     (chunk_size is
>     >     not
>     >     + * divisble by pool->block_size), then we find a bucket size that is
>     a
>     >     divisor
>     >     + * of that rest, and split the 'rest' into that size, returning it
>     to the
>     >     pool.
>     >     + *
>     >     + * If small_size is non-zero, we use it in two different ways:
>     >     + *    * if it is larger than pool->block_size, we split the chunk
>     into
>     >     + *    small_size'd pieces, instead of pool->block_size'd ones.
>     >     + *    * we also use it as the desired size to split the 'rest' after
>     we
>     >     split
>     >     + *    the bigger size of the chunk into pool->block_size;
>     >
>     >
>     > This seems both overly complicated and not really what we want.  If I
>     have a
>     > block size of 8k and allocate a single 64-byte state and then a 8k state,
>     it
>     > will break my almost 8k of padding into 511 64-byte states and return
>     those
>     > which may be very wasteful if the next thing I do is allocate a 1K state.
> 
>     Good, this would definitely be a waste.
> 
>     > It also doesn't provide the current alignment guarantees that all states
>     are
>     > aligned to their size.  While the alignment guarantee doesn't matter for
>     most
>     > large states, it does matter for some of the smaller sizes.  Now that I
>     look at
>     > it in detail, it appears that the highest alignment we ever require is
>     64B and
>     > the smallest size we allow is 64B so maybe it just doesn't matter?
>     >
>     > Assuming we don't care about alignments, we could do something like this?
>     >
>     > if (small_size > 0) {
>     >    assert(chunk_size % small_size == 0);
>     >    anv_state_pool_return_blocks(pool, chunk_offset, chunk_size /
>     small_size,
>     > small_size);
>     > } else {
>     >    uint32_t divisor = MAX_STATE_SIZE;
>     >    while (divisor >= MIN_STATE_SIZE) {
>     >       uint32_t nblocks = chunk_size / divisor;
>     >       if (nblocks > 0) {
>     >          anv_state_pool_return_blocs(pool, chunk_offset, nblocks,
>     divisor);
>     >          chunk_offset += nblocks * divisor;
>     >          chunk_size -= nblocks * divisor;
>     >       }
>     >    }
>     > }
> 
>     The code above is indeed way simpler and it does seem to achieve
>     what we want for the "return padding" case.
> 
>     However, we also use this helper for returning blocks that we got from
>     the freelist, but were only partially used. For instance if we need to
>     allocate a state of size 64 bytes, and we have a block of 8KB in the
>     freelist, due to the snippet above (small_size == 64) we will end up
>     splitting it into 511 64-byte states too.
> 
>     Maybe (if we want to keep the old semantics), in the case where
>     small_size > 0, we have to do something like:
> 
>     if (small_size > 0) {
>        assert(chunk_size % small_size == 0);
>        if (chunk_size > pool->block_size) {
>           assert(chunk_size % pool->block_size == 0);
>           uint32_t nblocks = chunk_size / pool->block_size - 1;
>           anv_state_pool_return_blocks(pool, offset, nblocks, pool->
>     block_size);
>           chunk_size -= nblocks * pool->block_size;
>           offset += nblocks * pool->block_size;
>        }
>        anv_state_pool_return_blocks(pool, chunk_offset, chunk_size /
>     small_size, small_size);
>     } else {
>        ... your else clause here
>     }
> 
>     Maybe it's over complicating things again... what do you think?
> 
> 
> Maybe?  Or maybe we want to just keep the block_size semantics for everything
> and split padding into some small chunks as needed and then a bunch of
> block_size chunks.  If we did that, we'd have the same semantics for
> everything.

Just to be sure, how does something like this look:

 uint32_t divisor = MAX2(pool->block_size, small_size);
 uint32_t nblocks = chunk_size / divisor;
 uint32_t rest = chunk_size - nblocks * divisor;

 /* First return pool->block_size'd chunks.*/
 uint32_t offset = chunk_offset + rest;
 anv_state_pool_return_blocks(pool, offset, nblocks, divisor);

 chunk_size = rest;

 uint32_t b = anv_state_pool_get_bucket(divisor);
 while (chunk_size > 0 && b >= ANV_MIN_STATE_SIZE_LOG2) {
    divisor = anv_state_pool_get_bucket_size(b);
    nblocks = chunk_size / divisor;
    rest = chunk_size - nblocks * divisor;
    if (nblocks > 0) {
       anv_state_pool_return_blocks(pool, chunk_offset + rest,
                                    nblocks, divisor);
       chunk_size = rest;
    }
    b--;
 }

I'm also wondering if it's worth leaving the "small_size" portion of it,
and if so I probably need a better name for it.

> 

<snip>

> 
> 
> I think what I'd recommend is that you pull the first helper in this patch into
> it's own patch right before patch 2 and use it replace the two in-line copies
> here.  Then the API wouldn't look nearly as horrible and this could focus on
> the heuristic for dealing with padding (and maybe even be rolled into the
> padding patch).

Cool, done locally already, thanks!

> 
>     >     -            chunk_size = pool->block_size;
>     >     -         }
>     >     -
>     >     -         assert(chunk_size % alloc_size == 0);
>     >     -         uint32_t push_back = (chunk_size / alloc_size) - 1;
>     >     -         uint32_t st_idx = anv_state_table_add(&pool->table,
>     push_back);
>     >     -         for (int i = 0; i < push_back; i++) {
>     >     -            /* update states that were added back to the state table
>     */
>     >     -            struct anv_state *state_i = anv_state_table_get(&pool->
>     table,
>     >     -                                                            st_idx +
>     i);
>     >     -            state_i->alloc_size = alloc_size;
>     >     -            state_i->offset = chunk_offset + alloc_size * (i + 1);
>     >     -            struct anv_pool_map pool_map = anv_block_pool_map(&
>     pool->
>     >     block_pool,
>     >     -                                                             
>     state_i->
>     >     offset);
>     >     -            state_i->map = pool_map.map + pool_map.offset;
>     >     -         }
>     >     -         anv_state_table_push(&pool->buckets[bucket].free_list,
>     >     -                              &pool->table, st_idx, push_back);
>     >     -
>     >     -         offset = chunk_offset;
>     >     +         uint32_t return_offset = chunk_offset + alloc_size;
>     >     +         uint32_t return_size = chunk_size - alloc_size;
>     >     +         anv_state_pool_return_chunk(pool, return_offset,
>     >     +                                     return_size, alloc_size);
>     >               goto done;
>     >            }
>     >         }
>     >     --
>     >     2.17.1
>     >
>     >     _______________________________________________
>     >     mesa-dev mailing list
>     >     mesa-dev@lists.freedesktop.org
>     >     https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>     >
>
On Tue, Dec 11, 2018 at 5:31 PM Rafael Antognolli <
rafael.antognolli@intel.com> wrote:

> On Mon, Dec 10, 2018 at 11:10:02PM -0600, Jason Ekstrand wrote:
> >
> >
> > On Mon, Dec 10, 2018 at 5:48 PM Rafael Antognolli <
> rafael.antognolli@intel.com>
> > wrote:
> >
> >     On Mon, Dec 10, 2018 at 04:56:40PM -0600, Jason Ekstrand wrote:
> >     > On Fri, Dec 7, 2018 at 6:06 PM Rafael Antognolli <
> >     rafael.antognolli@intel.com>
> >     > wrote:
> >     >
> >     >     This commit tries to rework the code that split and returns
> chunks
> >     back
> >     >     to the state pool, while still keeping the same logic.
> >     >
> >     >     The original code would get a chunk larger than we need and
> split it
> >     >     into pool->block_size. Then it would return all but the first
> one,
> >     and
> >     >     would split that first one into alloc_size chunks. Then it
> would keep
> >     >     the first one (for the allocation), and return the others back
> to the
> >     >     pool.
> >     >
> >     >     The new anv_state_pool_return_chunk() function will take a
> chunk
> >     (with
> >     >     the alloc_size part removed), and a small_size hint. It then
> splits
> >     that
> >     >     chunk into pool->block_size'd chunks, and if there's some
> space still
> >     >     left, split that into small_size chunks. small_size in this
> case is
> >     the
> >     >     same size as alloc_size.
> >     >
> >     >     The idea is to keep the same logic, but make it in a way we
> can reuse
> >     it
> >     >     to return other chunks to the pool when we are growing the
> buffer.
> >     >     ---
> >     >      src/intel/vulkan/anv_allocator.c | 147
> >     +++++++++++++++++++++----------
> >     >      1 file changed, 102 insertions(+), 45 deletions(-)
> >     >
> >     >     diff --git a/src/intel/vulkan/anv_allocator.c
> b/src/intel/vulkan/
> >     >     anv_allocator.c
> >     >     index 31258e38635..bddeb4a0fbd 100644
> >     >     --- a/src/intel/vulkan/anv_allocator.c
> >     >     +++ b/src/intel/vulkan/anv_allocator.c
> >     >     @@ -994,6 +994,97 @@ anv_state_pool_get_bucket_size(uint32_t
> bucket)
> >     >         return 1 << size_log2;
> >     >      }
> >     >
> >     >     +/** Helper to create a chunk into the state table.
> >     >     + *
> >     >     + * It just creates 'count' entries into the state table and
> update
> >     their
> >     >     sizes,
> >     >     + * offsets and maps, also pushing them as "free" states.
> >     >     + */
> >     >     +static void
> >     >     +anv_state_pool_return_blocks(struct anv_state_pool *pool,
> >     >     +                             uint32_t chunk_offset, uint32_t
> count,
> >     >     +                             uint32_t block_size)
> >     >     +{
> >     >     +   if (count == 0)
> >     >     +      return;
> >     >     +
> >     >     +   uint32_t st_idx = anv_state_table_add(&pool->table, count);
> >     >     +   for (int i = 0; i < count; i++) {
> >     >     +      /* update states that were added back to the state
> table */
> >     >     +      struct anv_state *state_i =
> anv_state_table_get(&pool->table,
> >     >     +                                                      st_idx
> + i);
> >     >     +      state_i->alloc_size = block_size;
> >     >     +      state_i->offset = chunk_offset + block_size * i;
> >     >     +      struct anv_pool_map pool_map =
> anv_block_pool_map(&pool->
> >     block_pool,
> >     >     +
> state_i->
> >     offset);
> >     >     +      state_i->map = pool_map.map + pool_map.offset;
> >     >     +   }
> >     >     +
> >     >     +   uint32_t block_bucket =
> anv_state_pool_get_bucket(block_size);
> >     >     +
>  anv_state_table_push(&pool->buckets[block_bucket].free_list,
> >     >     +                        &pool->table, st_idx, count);
> >     >     +}
> >     >     +
> >     >     +static uint32_t
> >     >     +calculate_divisor(uint32_t size)
> >     >     +{
> >     >     +   uint32_t bucket = anv_state_pool_get_bucket(size);
> >     >     +
> >     >     +   while (bucket >= 0) {
> >     >     +      uint32_t bucket_size =
> anv_state_pool_get_bucket_size(bucket);
> >     >     +      if (size % bucket_size == 0)
> >     >     +         return bucket_size;
> >     >     +   }
> >     >     +
> >     >     +   return 0;
> >     >     +}
> >     >     +
> >     >     +/** Returns a chunk of memory back to the state pool.
> >     >     + *
> >     >     + * If small_size is zero, we split chunk_size into pool->
> >     block_size'd
> >     >     pieces,
> >     >     + * and return those. If there's some remaining 'rest' space
> >     (chunk_size is
> >     >     not
> >     >     + * divisble by pool->block_size), then we find a bucket size
> that is
> >     a
> >     >     divisor
> >     >     + * of that rest, and split the 'rest' into that size,
> returning it
> >     to the
> >     >     pool.
> >     >     + *
> >     >     + * If small_size is non-zero, we use it in two different ways:
> >     >     + *    * if it is larger than pool->block_size, we split the
> chunk
> >     into
> >     >     + *    small_size'd pieces, instead of pool->block_size'd ones.
> >     >     + *    * we also use it as the desired size to split the
> 'rest' after
> >     we
> >     >     split
> >     >     + *    the bigger size of the chunk into pool->block_size;
> >     >
> >     >
> >     > This seems both overly complicated and not really what we want.
> If I
> >     have a
> >     > block size of 8k and allocate a single 64-byte state and then a 8k
> state,
> >     it
> >     > will break my almost 8k of padding into 511 64-byte states and
> return
> >     those
> >     > which may be very wasteful if the next thing I do is allocate a 1K
> state.
> >
> >     Good, this would definitely be a waste.
> >
> >     > It also doesn't provide the current alignment guarantees that all
> states
> >     are
> >     > aligned to their size.  While the alignment guarantee doesn't
> matter for
> >     most
> >     > large states, it does matter for some of the smaller sizes.  Now
> that I
> >     look at
> >     > it in detail, it appears that the highest alignment we ever
> require is
> >     64B and
> >     > the smallest size we allow is 64B so maybe it just doesn't matter?
> >     >
> >     > Assuming we don't care about alignments, we could do something
> like this?
> >     >
> >     > if (small_size > 0) {
> >     >    assert(chunk_size % small_size == 0);
> >     >    anv_state_pool_return_blocks(pool, chunk_offset, chunk_size /
> >     small_size,
> >     > small_size);
> >     > } else {
> >     >    uint32_t divisor = MAX_STATE_SIZE;
> >     >    while (divisor >= MIN_STATE_SIZE) {
> >     >       uint32_t nblocks = chunk_size / divisor;
> >     >       if (nblocks > 0) {
> >     >          anv_state_pool_return_blocs(pool, chunk_offset, nblocks,
> >     divisor);
> >     >          chunk_offset += nblocks * divisor;
> >     >          chunk_size -= nblocks * divisor;
> >     >       }
> >     >    }
> >     > }
> >
> >     The code above is indeed way simpler and it does seem to achieve
> >     what we want for the "return padding" case.
> >
> >     However, we also use this helper for returning blocks that we got
> from
> >     the freelist, but were only partially used. For instance if we need
> to
> >     allocate a state of size 64 bytes, and we have a block of 8KB in the
> >     freelist, due to the snippet above (small_size == 64) we will end up
> >     splitting it into 511 64-byte states too.
> >
> >     Maybe (if we want to keep the old semantics), in the case where
> >     small_size > 0, we have to do something like:
> >
> >     if (small_size > 0) {
> >        assert(chunk_size % small_size == 0);
> >        if (chunk_size > pool->block_size) {
> >           assert(chunk_size % pool->block_size == 0);
> >           uint32_t nblocks = chunk_size / pool->block_size - 1;
> >           anv_state_pool_return_blocks(pool, offset, nblocks, pool->
> >     block_size);
> >           chunk_size -= nblocks * pool->block_size;
> >           offset += nblocks * pool->block_size;
> >        }
> >        anv_state_pool_return_blocks(pool, chunk_offset, chunk_size /
> >     small_size, small_size);
> >     } else {
> >        ... your else clause here
> >     }
> >
> >     Maybe it's over complicating things again... what do you think?
> >
> >
> > Maybe?  Or maybe we want to just keep the block_size semantics for
> everything
> > and split padding into some small chunks as needed and then a bunch of
> > block_size chunks.  If we did that, we'd have the same semantics for
> > everything.
>
> Just to be sure, how does something like this look:
>
>  uint32_t divisor = MAX2(pool->block_size, small_size);
>  uint32_t nblocks = chunk_size / divisor;
>  uint32_t rest = chunk_size - nblocks * divisor;
>
>  /* First return pool->block_size'd chunks.*/
>  uint32_t offset = chunk_offset + rest;
>  anv_state_pool_return_blocks(pool, offset, nblocks, divisor);
>

oh.... That's clever.  We can't assume that the start is aligned but we can
always assume that the end is aligned.  I like it!  Probably deserves a
comment and an assert though. :-)


>
>  chunk_size = rest;
>
>  uint32_t b = anv_state_pool_get_bucket(divisor);
>

I think we want divisor to start off at small_size here if small_size > 0
&& small_size < bucket_size.


>  while (chunk_size > 0 && b >= ANV_MIN_STATE_SIZE_LOG2) {
>     divisor = anv_state_pool_get_bucket_size(b);
>     nblocks = chunk_size / divisor;
>     rest = chunk_size - nblocks * divisor;
>     if (nblocks > 0) {
>        anv_state_pool_return_blocks(pool, chunk_offset + rest,
>                                     nblocks, divisor);
>        chunk_size = rest;
>     }
>     b--;
>  }
>
> I'm also wondering if it's worth leaving the "small_size" portion of it,
> and if so I probably need a better name for it.
>

Yeah, I think this is more-or-less what we want.  Not sure if I like
decrementing b or just doing "divisor /= 2" better; probably doesn't matter.

--Jason


> >
>
> <snip>
>
> >
> >
> > I think what I'd recommend is that you pull the first helper in this
> patch into
> > it's own patch right before patch 2 and use it replace the two in-line
> copies
> > here.  Then the API wouldn't look nearly as horrible and this could
> focus on
> > the heuristic for dealing with padding (and maybe even be rolled into the
> > padding patch).
>
> Cool, done locally already, thanks!
>
> >
> >     >     -            chunk_size = pool->block_size;
> >     >     -         }
> >     >     -
> >     >     -         assert(chunk_size % alloc_size == 0);
> >     >     -         uint32_t push_back = (chunk_size / alloc_size) - 1;
> >     >     -         uint32_t st_idx = anv_state_table_add(&pool->table,
> >     push_back);
> >     >     -         for (int i = 0; i < push_back; i++) {
> >     >     -            /* update states that were added back to the
> state table
> >     */
> >     >     -            struct anv_state *state_i =
> anv_state_table_get(&pool->
> >     table,
> >     >     -
> st_idx +
> >     i);
> >     >     -            state_i->alloc_size = alloc_size;
> >     >     -            state_i->offset = chunk_offset + alloc_size * (i
> + 1);
> >     >     -            struct anv_pool_map pool_map =
> anv_block_pool_map(&
> >     pool->
> >     >     block_pool,
> >     >     -
> >     state_i->
> >     >     offset);
> >     >     -            state_i->map = pool_map.map + pool_map.offset;
> >     >     -         }
> >     >     -
>  anv_state_table_push(&pool->buckets[bucket].free_list,
> >     >     -                              &pool->table, st_idx,
> push_back);
> >     >     -
> >     >     -         offset = chunk_offset;
> >     >     +         uint32_t return_offset = chunk_offset + alloc_size;
> >     >     +         uint32_t return_size = chunk_size - alloc_size;
> >     >     +         anv_state_pool_return_chunk(pool, return_offset,
> >     >     +                                     return_size, alloc_size);
> >     >               goto done;
> >     >            }
> >     >         }
> >     >     --
> >     >     2.17.1
> >     >
> >     >     _______________________________________________
> >     >     mesa-dev mailing list
> >     >     mesa-dev@lists.freedesktop.org
> >     >     https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> >     >
> >
>