Add hashed caching to mkogg.py.
authorW. Trevor King <wking@drexel.edu>
Tue, 16 Nov 2010 17:16:02 +0000 (12:16 -0500)
committerW. Trevor King <wking@drexel.edu>
Tue, 16 Nov 2010 17:16:02 +0000 (12:16 -0500)
posts/mkogg/mkogg.py

index 3203b30574c00f2a95ba0dfd94c81a4c9a253cf5..da374c7b55961037a81dcff28f3a6f821b05688a 100755 (executable)
@@ -40,11 +40,15 @@ External packages required for full functionality:
 .. _vorbis: http://www.vorbis.com
 """
 
+from hashlib import sha256 as _hash
+import os
+import os.path
 import shutil
 from subprocess import Popen, PIPE
 from tempfile import mkstemp
-import os
-import os.path
+
+
+__version__ = '0.2'
 
 
 def invoke(args, stdin=None, expect=(0,)):
@@ -71,19 +75,50 @@ class Converter (object):
 
     .. _suggestions: http://www.xiph.org/vorbis/doc/v-comment.html
     """
-    def __init__(self, source_dir, target_dir, target_extension='ogg'):
+    def __init__(self, source_dir, target_dir, target_extension='ogg',
+                 cache_file=None):
         self.source_dir = source_dir
         self.target_dir = target_dir
         self._source_extensions = ['flac', 'mp3', 'ogg']
         self._target_extension = target_extension
+        self._cache_file = cache_file
+        self._cache = self._read_cache()
         f,self._tempfile = mkstemp(prefix='mkogg-')
 
     def cleanup(self):
         os.remove(self._tempfile)
-
-    def _makedirs(self, target_dir):
-        if not os.path.exists(target_dir):
-            os.makedirs(target_dir)
+        self._save_cache()
+
+    def _read_cache(self):
+        cache = {}
+        if self._cache_file == None:
+            return cache
+        try:
+            with open(self._cache_file, 'r') as f:
+                line = f.readline()
+                assert line.startswith('# mkogg cache version:'), line
+                version = line.split(':', 1)[-1].strip()
+                if version != __version__:
+                    print 'cache version mismatch: %s != %s' % (
+                        version, __version__)
+                    return cache  # old cache, ignore contents
+                for line in f:
+                    try:
+                        key,value = [x.strip() for x in line.split(' -> ')]
+                    except ValueError:
+                        pass
+                    cache[key] = value
+        except IOError:
+            pass
+        return cache
+
+    def _save_cache(self):
+        if self._cache_file == None:
+            return
+        with open(self._cache_file, 'w') as f:
+            f.write('# mkogg cache version: %s\n' % __version__)
+            for key,value in self._cache.iteritems():
+                f.write('%s -> %s\n' % (key, value))
 
     def run(self):
         self._makedirs(self.target_dir)
@@ -105,20 +140,58 @@ class Converter (object):
                 self._makedirs(target_dir)
                 self._convert(source_path, target_path, ext)
 
+    def _makedirs(self, target_dir):
+        if not os.path.exists(target_dir):
+            os.makedirs(target_dir)
+
     def _convert(self, source, target, ext):
+        cache_key = self._cache_key(source)
+        old_cache_value = self._cache.get(cache_key, None)
+        if (old_cache_value != None and
+            old_cache_value == self._cache_value(target)):
+            print 'cached %s to %s' % (source, target)
+            return
         print 'convert %s to %s' % (source, target)
         if ext == self._target_extension:
             shutil.copy(source, target)
-        else:
-            convert = getattr(self, 'convert_%s_to_%s'
-                              % (ext, self._target_extension))
-            convert(source, target)
-            if not getattr(convert, 'handles_metadata', False):
-                get_metadata = getattr(self, 'get_%s_metadata' % ext)
-                metadata = get_metadata(source)
-                set_metadata = getattr(self, 'set_%s_metadata'
-                                       % self._target_extension)
-                set_metadata(target, metadata)
+            return
+        convert = getattr(self, 'convert_%s_to_%s'
+                          % (ext, self._target_extension))
+        convert(source, target)
+        if not getattr(convert, 'handles_metadata', False):
+            get_metadata = getattr(self, 'get_%s_metadata' % ext)
+            metadata = get_metadata(source)
+            set_metadata = getattr(self, 'set_%s_metadata'
+                                   % self._target_extension)
+            set_metadata(target, metadata)
+        self._cache[cache_key] = self._cache_value(target)
+
+    def _cache_key(self, source):
+        return repr((self._file_hash(source), self._target_extension))
+
+    def _cache_value(self, target):
+        return self._file_hash(target)
+
+    def _file_hash(self, filename):
+        """
+        Examples
+        --------
+        >>> c = Converter(None, None)
+        >>> h = c._file_hash(__file__)
+        >>> len(h)
+        64
+        >>> c._file_hash('/highly/unlikely/to/exist') == None
+        True
+        >>> c.cleanup()
+        """
+        h = _hash()
+        try:
+            with open(filename, 'r') as f:
+                for line in f:
+                    h.update(line)
+        except IOError:
+            return None
+        return str(h.hexdigest())
 
     def _parse_date(self, date):
         """Parse `date` (`YYYY[-MM[-DD]]`), returning `(year, month, day)`.
@@ -333,6 +406,9 @@ if __name__ == '__main__':
     p.add_option('-t', '--target-extension', dest='ext',
                  default='ogg', metavar='EXT',
                  help='Conversion target type (e.g. flac, mp3) (%default)')
+    p.add_option('-c', '--cache', dest='cache', metavar='PATH',
+                 help=('Save conversion hashes in a cache file to avoid '
+                       'repeated previous conversions.'))
     p.add_option('--test', dest='test', action='store_true', default=False,
                  help='Run internal tests and exit')
 
@@ -342,7 +418,8 @@ if __name__ == '__main__':
         sys.exit(test())
 
     source_dir,target_dir = args
-    c = Converter(source_dir, target_dir, target_extension=options.ext)
+    c = Converter(source_dir, target_dir, target_extension=options.ext,
+                  cache_file=options.cache)
     try:
         c.run()
     finally: