[Mesa-dev,1/3] util/disc_cache: use LRU eviction rather than random eviction

Submitted by Alan Swanson on March 6, 2017, 4:17 p.m.

Details

Message ID 20170306161732.12135-1-reiver@improbability.net
State New
Headers show
Series "Series without cover letter" ( rev: 1 ) in Mesa

Not browsing as part of any series.

Commit Message

Alan Swanson March 6, 2017, 4:17 p.m.
Still using random selection of two-character subdirectory in which
to check cache files rather than scanning entire cache.
---
 src/util/disk_cache.c | 77 +++++++++++++++++++++++----------------------------
 1 file changed, 34 insertions(+), 43 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/util/disk_cache.c b/src/util/disk_cache.c
index 31a9336582..28f8ea4054 100644
--- a/src/util/disk_cache.c
+++ b/src/util/disk_cache.c
@@ -438,30 +438,30 @@  make_cache_file_directory(struct disk_cache *cache, const cache_key key)
    free(dir);
 }
 
-/* Given a directory path and predicate function, count all entries in
- * that directory for which the predicate returns true. Then choose a
- * random entry from among those counted.
+/* Given a directory path and predicate function, find the entry with
+ * the oldest access time in that directory for which the predicate
+ * returns true.
  *
  * Returns: A malloc'ed string for the path to the chosen file, (or
  * NULL on any error). The caller should free the string when
  * finished.
  */
 static char *
-choose_random_file_matching(const char *dir_path,
-                            bool (*predicate)(const struct dirent *,
-                                              const char *dir_path))
+choose_lru_file_matching(const char *dir_path,
+                         bool (*predicate)(const struct dirent *,
+                                           const char *dir_path))
 {
    DIR *dir;
    struct dirent *entry;
-   unsigned int count, victim;
+   struct stat sb;
+   char *tmp, *lru_name = NULL;
+   time_t lru_atime = 0;
    char *filename;
 
    dir = opendir(dir_path);
    if (dir == NULL)
       return NULL;
 
-   count = 0;
-
    while (1) {
       entry = readdir(dir);
       if (entry == NULL)
@@ -469,39 +469,28 @@  choose_random_file_matching(const char *dir_path,
       if (!predicate(entry, dir_path))
          continue;
 
-      count++;
-   }
-
-   if (count == 0) {
-      closedir(dir);
-      return NULL;
-   }
-
-   victim = rand() % count;
-
-   rewinddir(dir);
-   count = 0;
-
-   while (1) {
-      entry = readdir(dir);
-      if (entry == NULL)
-         break;
-      if (!predicate(entry, dir_path))
-         continue;
-      if (count == victim)
-         break;
-
-      count++;
+      if (fstatat(dirfd(dir), entry->d_name, &sb, 0) == 0) {
+         if (!lru_atime || (sb.st_atime < lru_atime)) {
+            tmp = realloc(lru_name, strlen(entry->d_name) + 1);
+            if (tmp) {
+               lru_name = tmp;
+               memcpy(lru_name, entry->d_name,
+                      strlen(entry->d_name) + 1);
+               lru_atime = sb.st_atime;
+            }
+         }
+      }
    }
 
-   if (entry == NULL) {
+   if (lru_name == NULL) {
       closedir(dir);
       return NULL;
    }
 
-   if (asprintf(&filename, "%s/%s", dir_path, entry->d_name) < 0)
+   if (asprintf(&filename, "%s/%s", dir_path, lru_name) < 0)
       filename = NULL;
 
+   free(lru_name);
    closedir(dir);
 
    return filename;
@@ -533,12 +522,12 @@  is_regular_non_tmp_file(const struct dirent *entry, const char *path)
 
 /* Returns the size of the deleted file, (or 0 on any error). */
 static size_t
-unlink_random_file_from_directory(const char *path)
+unlink_lru_file_from_directory(const char *path)
 {
    struct stat sb;
    char *filename;
 
-   filename = choose_random_file_matching(path, is_regular_non_tmp_file);
+   filename = choose_lru_file_matching(path, is_regular_non_tmp_file);
    if (filename == NULL)
       return 0;
 
@@ -581,7 +570,7 @@  is_two_character_sub_directory(const struct dirent *entry, const char *path)
 }
 
 static void
-evict_random_item(struct disk_cache *cache)
+evict_lru_item(struct disk_cache *cache)
 {
    const char hex[] = "0123456789abcde";
    char *dir_path;
@@ -591,6 +580,7 @@  evict_random_item(struct disk_cache *cache)
    /* With a reasonably-sized, full cache, (and with keys generated
     * from a cryptographic hash), we can choose two random hex digits
     * and reasonably expect the directory to exist with a file in it.
+    * Provides pseudo-LRU eviction to reduce checking all cache files.
     */
    a = rand() % 16;
    b = rand() % 16;
@@ -598,7 +588,7 @@  evict_random_item(struct disk_cache *cache)
    if (asprintf(&dir_path, "%s/%c%c", cache->path, hex[a], hex[b]) < 0)
       return;
 
-   size = unlink_random_file_from_directory(dir_path);
+   size = unlink_lru_file_from_directory(dir_path);
 
    free(dir_path);
 
@@ -608,18 +598,19 @@  evict_random_item(struct disk_cache *cache)
    }
 
    /* In the case where the random choice of directory didn't find
-    * something, we choose randomly from the existing directories.
+    * something, we choose the least recently accessed from the
+    * existing directories.
     *
     * Really, the only reason this code exists is to allow the unit
     * tests to work, (which use an artificially-small cache to be able
     * to force a single cached item to be evicted).
     */
-   dir_path = choose_random_file_matching(cache->path,
-                                          is_two_character_sub_directory);
+   dir_path = choose_lru_file_matching(cache->path,
+                                       is_two_character_sub_directory);
    if (dir_path == NULL)
       return;
 
-   size = unlink_random_file_from_directory(dir_path);
+   size = unlink_lru_file_from_directory(dir_path);
 
    free(dir_path);
 
@@ -788,7 +779,7 @@  disk_cache_put(struct disk_cache *cache,
     * else first.
     */
    if (*cache->size + size > cache->max_size)
-      evict_random_item(cache);
+      evict_lru_item(cache);
 
    /* Create CRC of the data and store at the start of the file. We will
     * read this when restoring the cache and use it to check for corruption.

Comments

On Mon, Mar 6, 2017 at 6:17 PM, Alan Swanson <reiver@improbability.net> wrote:
> Still using random selection of two-character subdirectory in which
> to check cache files rather than scanning entire cache.
> ---
>  src/util/disk_cache.c | 77 +++++++++++++++++++++++----------------------------
>  1 file changed, 34 insertions(+), 43 deletions(-)
>
> diff --git a/src/util/disk_cache.c b/src/util/disk_cache.c
> index 31a9336582..28f8ea4054 100644
> --- a/src/util/disk_cache.c
> +++ b/src/util/disk_cache.c
> @@ -438,30 +438,30 @@ make_cache_file_directory(struct disk_cache *cache, const cache_key key)
>     free(dir);
>  }
>
> -/* Given a directory path and predicate function, count all entries in
> - * that directory for which the predicate returns true. Then choose a
> - * random entry from among those counted.
> +/* Given a directory path and predicate function, find the entry with
> + * the oldest access time in that directory for which the predicate
> + * returns true.
>   *
>   * Returns: A malloc'ed string for the path to the chosen file, (or
>   * NULL on any error). The caller should free the string when
>   * finished.
>   */
>  static char *
> -choose_random_file_matching(const char *dir_path,
> -                            bool (*predicate)(const struct dirent *,
> -                                              const char *dir_path))
> +choose_lru_file_matching(const char *dir_path,
> +                         bool (*predicate)(const struct dirent *,
> +                                           const char *dir_path))
>  {
>     DIR *dir;
>     struct dirent *entry;
> -   unsigned int count, victim;
> +   struct stat sb;
> +   char *tmp, *lru_name = NULL;
> +   time_t lru_atime = 0;
>     char *filename;
>
>     dir = opendir(dir_path);
>     if (dir == NULL)
>        return NULL;
>
> -   count = 0;
> -
>     while (1) {
>        entry = readdir(dir);
>        if (entry == NULL)
> @@ -469,39 +469,28 @@ choose_random_file_matching(const char *dir_path,
>        if (!predicate(entry, dir_path))
>           continue;
>
> -      count++;
> -   }
> -
> -   if (count == 0) {
> -      closedir(dir);
> -      return NULL;
> -   }
> -
> -   victim = rand() % count;
> -
> -   rewinddir(dir);
> -   count = 0;
> -
> -   while (1) {
> -      entry = readdir(dir);
> -      if (entry == NULL)
> -         break;
> -      if (!predicate(entry, dir_path))
> -         continue;
> -      if (count == victim)
> -         break;
> -
> -      count++;
> +      if (fstatat(dirfd(dir), entry->d_name, &sb, 0) == 0) {
> +         if (!lru_atime || (sb.st_atime < lru_atime)) {
> +            tmp = realloc(lru_name, strlen(entry->d_name) + 1);
> +            if (tmp) {
> +               lru_name = tmp;
> +               memcpy(lru_name, entry->d_name,
> +                      strlen(entry->d_name) + 1);

Maybe avoid the double strlen()?
Either way, FWIW:
Reviewed-by: Grazvydas Ignotas <notasas@gmail.com>

GraÅžvydas