[v2] backends/json.py: Compress results with gzip.

Submitted by Dylan Baker on April 23, 2015, 10:44 p.m.

Details

Message ID 1429829059-3937-1-git-send-email-baker.dylan.c@gmail.com
State New, archived
Headers show

Not browsing as part of any series.

Commit Message

Dylan Baker April 23, 2015, 10:44 p.m.
For a standard run of quick.py this reduces the size of the results from
21M to 1.5M.

This does not remove support for uncompressed files.

I chose gzip as opposed to zip or bzip2 because it has good performance
for both compression and decompression, is part of the standard library
provided by zlib, and is ubiquitous. For python3, which has xz
compression it would be nice to implement xz support instead.

v2: - fix framework.backends.load to handle compressed suffixes (fixes
      summary generation)

Signed-off-by: Dylan Baker <dylanx.c.baker@intel.com>
---
 framework/backends/__init__.py        | 28 +++++++++++++++++++---------
 framework/backends/json.py            | 32 +++++++++++++++++++++++++++-----
 framework/tests/json_backend_tests.py | 22 ++++++++++++++++++----
 framework/tests/json_tests.py         |  3 ++-
 4 files changed, 66 insertions(+), 19 deletions(-)

Patch hide | download patch | download mbox

diff --git a/framework/backends/__init__.py b/framework/backends/__init__.py
index e2f199a..e35c217 100644
--- a/framework/backends/__init__.py
+++ b/framework/backends/__init__.py
@@ -117,18 +117,28 @@  def load(file_path):
     then return the TestrunResult instance.
 
     """
+    def get_extension(file_path):
+        """Helper function to get the extension string."""
+        name, extension = os.path.splitext(file_path)
+        # If we hit a compressed suffix, get an additional suffix to test with.
+        # i.e: Use .json.gz rather that .gz
+        if extension == '.gz':
+            extension = os.path.splitext(name)[1] + extension
+        elif not extension:
+            extension = ''
+
+        return extension
+
     extension = None
 
     if os.path.isfile(file_path):
-        extension = os.path.splitext(file_path)[1]
-        if not extension:
-            extension = ''
+        extension = get_extension(file_path)
     else:
-        for file in os.listdir(file_path):
-            if file.startswith('result'):
-                extension = os.path.splitext(file)[1]
+        for file_ in os.listdir(file_path):
+            if file_.startswith('result'):
+                extension = get_extension(file_)
                 break
-            elif file == 'main':
+            elif file_ == 'main':
                 extension = ''
                 break
     tests = os.path.join(file_path, 'tests')
@@ -136,8 +146,8 @@  def load(file_path):
         if os.path.exists(tests):
             extension = os.path.splitext(os.listdir(tests)[0])[1]
         else:
-            # At this point we have failed to find any sort of backend, just except
-            # and die
+            # At this point we have failed to find any sort of backend, just
+            # except and die
             raise BackendError("No backend found for any file in {}".format(
                 file_path))
 
diff --git a/framework/backends/json.py b/framework/backends/json.py
index 2034ecd..55e9357 100644
--- a/framework/backends/json.py
+++ b/framework/backends/json.py
@@ -25,6 +25,7 @@  import os
 import sys
 import shutil
 import posixpath
+import gzip
 
 try:
     import simplejson as json
@@ -140,7 +141,7 @@  class JSONBackend(FileBackend):
         assert data['tests']
 
         # write out the combined file.
-        with open(os.path.join(self._dest, 'results.json'), 'w') as f:
+        with gzip.open(os.path.join(self._dest, 'results.json.gz'), 'w') as f:
             json.dump(data, f, default=piglit_encoder,
                       indent=INDENT)
 
@@ -167,10 +168,25 @@  def load_results(filename):
     "main"
 
     """
+    # This function doesn't use the common with open() pattern. This is
+    # intentional, since we use two different open methods the __builtin__.open
+    # and gzip.open, and we use open based on if trees. This actually reduces
+    # the amount of code that we have in this function
+
     # This will load any file or file-like thing. That would include pipes and
     # file descriptors
     if not os.path.isdir(filename):
         filepath = filename
+        if os.path.splitext(filepath)[1] == '.gz':
+            # There is still a possible problem of a file being passed that is
+            # gzipped, but doesn't have a '.gz' extension. However, the gzip
+            # tool doesn't normally work with '.gz', so I think it's fair to
+            # just assume that we will have gz.
+            f = gzip.open(filepath)
+        else:
+            f = open(filepath, 'r')
+        testrun = _load(f)
+        f.close()
     elif os.path.exists(os.path.join(filename, 'metadata.json')):
         # If the test is still running we need to use the resume code, since
         # there will not be a results.json file.
@@ -187,17 +203,23 @@  def load_results(filename):
     else:
         # If there are both old and new results in a directory pick the new
         # ones first
-        if os.path.exists(os.path.join(filename, 'results.json')):
+        if os.path.exists(os.path.join(filename, 'results.json.gz')):
+            filepath = os.path.join(filename, 'results.json.gz')
+            f = gzip.open(filepath, 'rb')
+        elif os.path.exists(os.path.join(filename, 'results.json')):
             filepath = os.path.join(filename, 'results.json')
+            f = open(filepath, 'r')
         # Version 0 results are called 'main'
         elif os.path.exists(os.path.join(filename, 'main')):
             filepath = os.path.join(filename, 'main')
+            f = open(filepath, 'r')
         else:
             raise errors.ResultsLoadError('No results found in "{}"'.format(
-                filename))
+                filepath))
 
-    with open(filepath, 'r') as f:
+        # There may need to be an exception caught here.
         testrun = _load(f)
+        f.close()
 
     return _update_results(testrun, filepath)
 
@@ -522,7 +544,7 @@  def _update_four_to_five(results):
 
 
 REGISTRY = Registry(
-    extensions=['', '.json'],
+    extensions=['', '.json', '.json.gz'],
     backend=JSONBackend,
     load=load_results,
     meta=set_meta,
diff --git a/framework/tests/json_backend_tests.py b/framework/tests/json_backend_tests.py
index 6c0ee6b..118be2e 100644
--- a/framework/tests/json_backend_tests.py
+++ b/framework/tests/json_backend_tests.py
@@ -24,6 +24,7 @@ 
 
 from __future__ import print_function, absolute_import
 import os
+import gzip
 
 try:
     import simplejson as json
@@ -134,12 +135,12 @@  class TestJSONTestFinalize(utils.StaticDirectory):
         assert not os.path.exists(os.path.join(self.tdir, 'tests'))
 
     def test_create_results(self):
-        """ JSONBackend.finalize() creates a results.json file """
-        assert os.path.exists(os.path.join(self.tdir, 'results.json'))
+        """ JSONBackend.finalize() creates a results.json.gz file """
+        assert os.path.exists(os.path.join(self.tdir, 'results.json.gz'))
 
     def test_results_valid(self):
-        """ JSONBackend.finalize() results.json is valid """
-        with open(os.path.join(self.tdir, 'results.json'), 'r') as f:
+        """ JSONBackend.finalize() results.json.gz is valid """
+        with gzip.open(os.path.join(self.tdir, 'results.json.gz'), 'rb') as f:
             try:
                 json.load(f)
             except Exception as e:
@@ -288,6 +289,19 @@  def test_load_json():
     nt.assert_in('sometest', result.tests)
 
 
+def test_load_json_gz():
+    """backends.load(): Loads .json.gz files"""
+    with utils.tempdir() as tdir:
+        filename = os.path.join(tdir, 'results.json.gz')
+        with gzip.open(filename, 'w') as f:
+            json.dump(utils.JSON_DATA, f)
+
+        result = backends.load(filename)
+
+    nt.assert_is_instance(result, results.TestrunResult)
+    nt.assert_in('sometest', result.tests)
+
+
 def test_piglit_decoder():
     """backends.json.piglit_decoder: Works correctly"""
     test = json.loads('{"foo": {"result": "pass"}}',
diff --git a/framework/tests/json_tests.py b/framework/tests/json_tests.py
index 70a501a..0cc1b4b 100644
--- a/framework/tests/json_tests.py
+++ b/framework/tests/json_tests.py
@@ -28,6 +28,7 @@  tests and they will change with each version of the json output.
 
 from __future__ import print_function, absolute_import
 import os
+import gzip
 
 import nose.tools as nt
 try:
@@ -65,7 +66,7 @@  class TestJsonOutput(utils.StaticDirectory):
         backend.initialize(_create_metadata(args, 'test', core.Options()))
         backend.write_test('result', {'result': 'pass'})
         backend.finalize({'time_elapsed': 1.22})
-        with open(os.path.join(cls.tdir, 'results.json'), 'r') as f:
+        with gzip.open(os.path.join(cls.tdir, 'results.json.gz'), 'r') as f:
             cls.json = json.load(f)
 
     def test_root_results_version(self):