backends/json.py: Compress results with gzip.

Submitted by Dylan Baker on April 13, 2015, 10:04 p.m.

Details

Message ID 1428962689-7005-1-git-send-email-baker.dylan.c@gmail.com
State New, archived
Headers show

Not browsing as part of any series.

Commit Message

Dylan Baker April 13, 2015, 10:04 p.m.
For a standard run of quick.py this reduces the size of the results from
21M to 1.5M.

This does not remove support for uncompressed files.

I chose gzip as opposed to zip or bzip2 because it has good performance
for both compression and decompression, is part of the standard library
provided by zlib, and is ubiquitous. For python3, which has xz
compression it would be nice to implement xz support instead.

Signed-off-by: Dylan Baker <dylanx.c.baker@intel.com>
---
 framework/backends/json.py            | 32 +++++++++++++++++++++++++++-----
 framework/tests/json_backend_tests.py |  9 +++++----
 framework/tests/json_tests.py         |  3 ++-
 3 files changed, 34 insertions(+), 10 deletions(-)

Patch hide | download patch | download mbox

diff --git a/framework/backends/json.py b/framework/backends/json.py
index affd64e..f5f2374 100644
--- a/framework/backends/json.py
+++ b/framework/backends/json.py
@@ -25,6 +25,7 @@  import os
 import sys
 import shutil
 import posixpath
+import gzip
 
 try:
     import simplejson as json
@@ -133,7 +134,7 @@  class JSONBackend(FileBackend):
         assert data['tests']
 
         # write out the combined file.
-        with open(os.path.join(self._dest, 'results.json'), 'w') as f:
+        with gzip.open(os.path.join(self._dest, 'results.json.gz'), 'w') as f:
             json.dump(data, f, default=piglit_encoder,
                       indent=INDENT)
 
@@ -160,10 +161,25 @@  def load_results(filename):
     "main"
 
     """
+    # This function doesn't use the common with open() pattern. This is
+    # intentional, since we use two different open methods the __builtin__.open
+    # and gzip.open, and we use open based on if trees. This actually reduces
+    # the amount of code that we have in this function
+
     # This will load any file or file-like thing. That would include pipes and
     # file descriptors
     if not os.path.isdir(filename):
         filepath = filename
+        if os.path.splitext(filepath)[1] == '.gz':
+            # There is still a possible problem of a file being passed that is
+            # gzipped, but doesn't have a '.gz' extension. However, the gzip
+            # tool doesn't normally work with '.gz', so I think it's fair to
+            # just assume that we will have gz.
+            f = gzip.open(filepath)
+        else:
+            f = open(filepath, 'r')
+        testrun = _load(f)
+        f.close()
     elif os.path.exists(os.path.join(filename, 'metadata.json')):
         # If the test is still running we need to use the resume code, since
         # there will not be a results.json file.
@@ -180,17 +196,23 @@  def load_results(filename):
     else:
         # If there are both old and new results in a directory pick the new
         # ones first
-        if os.path.exists(os.path.join(filename, 'results.json')):
+        if os.path.exists(os.path.join(filename, 'results.json.gz')):
+            filepath = os.path.join(filename, 'results.json.gz')
+            f = gzip.open(filepath, 'rb')
+        elif os.path.exists(os.path.join(filename, 'results.json')):
             filepath = os.path.join(filename, 'results.json')
+            f = open(filepath, 'r')
         # Version 0 results are called 'main'
         elif os.path.exists(os.path.join(filename, 'main')):
             filepath = os.path.join(filename, 'main')
+            f = open(filepath, 'r')
         else:
             raise errors.ResultsLoadError('No results found in "{}"'.format(
-                filename))
+                filepath))
 
-    with open(filepath, 'r') as f:
+        # There may need to be an exception caught here.
         testrun = _load(f)
+        f.close()
 
     return _update_results(testrun, filepath)
 
@@ -524,7 +546,7 @@  def _update_four_to_five(results):
 
 
 REGISTRY = Registry(
-    extensions=['', '.json'],
+    extensions=['', '.json', '.json.gz'],
     backend=JSONBackend,
     load=load_results,
     meta=set_meta,
diff --git a/framework/tests/json_backend_tests.py b/framework/tests/json_backend_tests.py
index 45626e8..ba52ffd 100644
--- a/framework/tests/json_backend_tests.py
+++ b/framework/tests/json_backend_tests.py
@@ -24,6 +24,7 @@ 
 
 from __future__ import print_function, absolute_import
 import os
+import gzip
 
 try:
     import simplejson as json
@@ -134,12 +135,12 @@  class TestJSONTestFinalize(utils.StaticDirectory):
         assert not os.path.exists(os.path.join(self.tdir, 'tests'))
 
     def test_create_results(self):
-        """ JSONBackend.finalize() creates a results.json file """
-        assert os.path.exists(os.path.join(self.tdir, 'results.json'))
+        """ JSONBackend.finalize() creates a results.json.gz file """
+        assert os.path.exists(os.path.join(self.tdir, 'results.json.gz'))
 
     def test_results_valid(self):
-        """ JSONBackend.finalize() results.json is valid """
-        with open(os.path.join(self.tdir, 'results.json'), 'r') as f:
+        """ JSONBackend.finalize() results.json.gz is valid """
+        with gzip.open(os.path.join(self.tdir, 'results.json.gz'), 'rb') as f:
             try:
                 json.load(f)
             except Exception as e:
diff --git a/framework/tests/json_tests.py b/framework/tests/json_tests.py
index 70a501a..0cc1b4b 100644
--- a/framework/tests/json_tests.py
+++ b/framework/tests/json_tests.py
@@ -28,6 +28,7 @@  tests and they will change with each version of the json output.
 
 from __future__ import print_function, absolute_import
 import os
+import gzip
 
 import nose.tools as nt
 try:
@@ -65,7 +66,7 @@  class TestJsonOutput(utils.StaticDirectory):
         backend.initialize(_create_metadata(args, 'test', core.Options()))
         backend.write_test('result', {'result': 'pass'})
         backend.finalize({'time_elapsed': 1.22})
-        with open(os.path.join(cls.tdir, 'results.json'), 'r') as f:
+        with gzip.open(os.path.join(cls.tdir, 'results.json.gz'), 'r') as f:
             cls.json = json.load(f)
 
     def test_root_results_version(self):