+# Define some simple data logging classes for consistency
+import os, os.path
+import stat
+import cPickle as pickle
+import time
+import string
+import numpy
+class error (Exception) :
+ "Basic module error class"
+ pass
+class errorDirExists (error) :
+ "The specified directory already exists"
+class data_log :
+ """
+ Data logging class.
+ Creates consistent, timestamped log files.
+ Initialized with log_dir and log_name.
+ log_dir specifies the base data directory.
+ If it doesn't exist, log_dir is created.
+ A subdir of log_dir is created (if neccessary) named YYYYMMDD,
+ where YYYYMMDD is the current day in localtime.
+ If noclobber_logsubdir == True, this dir must not exist yet.
+ log_name specifies the base name for the created log files (in the log subdir).
+ The created log filenames are prefixed with a YYYYMMDDHHMMSS timestamp.
+ If the target filename already exists, the filename is postfixed with
+ '_N', where N is the lowest integer that doesn't clober an existing file.
+ General data is saved to the log files with the write(obj) method.
+ By default, write() cPickles the object passed.
+ You can save in other formats by overriding write()
+ Binary data is can be saved directly to the log files with the
+ write_binary(binary_string) method.
+ All file names are stripped of possibly troublesome characters.
+ """
+ def __init__(self, log_dir=".", noclobber_logsubdir=False,
+ log_name="log",
+ timestamp=None) :
+ # generate lists of not-allowed characters
+ unaltered_chars = "-._" + string.digits + string.letters
+ mapped_pairs = {' ':'_'}
+ allowed_chars = unaltered_chars + "".join(mapped_pairs.keys())
+ all_chars = string.maketrans('','')
+ self.delete_chars = all_chars.translate(all_chars, allowed_chars)
+ trans_from = "".join(mapped_pairs.keys())
+ trans_to = "".join(mapped_pairs.values()) # same order as keys, since no modifications to mapped_pairs were made in between the two calls
+ self.transtable = string.maketrans(trans_from, trans_to)
+ self._log_name = self._clean_filename(log_name) # never checked after this...
+ self._log_dir = self._create_logdir(log_dir) # will not clobber.
+ subdir, timestamp = self._create_logsubdir(self._log_dir,
+ noclobber_logsubdir,
+ timestamp)
+ self.subdir = subdir
+ self.timestamp = timestamp
+ def _clean_filename(self, filename) :
+ """
+ Currently only works on filenames, since it deletes '/'.
+ If you need it to work on full paths, use os.path.split(your_path)[1]
+ to strip of the filename portion...
+ """
+ cleanname = filename.translate(self.transtable, self.delete_chars)
+ return cleanname
+ def _create_logdir(self, log_dir) :
+ log_dir = os.path.expanduser(log_dir)
+ if not os.path.exists(log_dir) :
+ os.mkdir(log_dir, 0755)
+ return log_dir
+ def _create_logsubdir(self, log_dir, noclobber_logsubdir,
+ timestamp=None) :
+ if timestamp == None :
+ timestamp = time.strftime("%Y%m%d") # %H%M%S
+ subdir = os.path.join(log_dir, timestamp)
+ if os.path.exists(subdir) :
+ if noclobber_logsubdir:
+ raise errorDirExists, "%s exists" % subdir
+ else :
+ os.mkdir(subdir, 0755)
+ return (subdir, timestamp)
+ def get_filename(self, timestamp=None) :
+ """
+ Get a filename (using localtime if timestamp==None),
+ appending integers as neccessary to avoid clobbering.
+ For use in write() routines.
+ Returns (filepath, timestamp)
+ """
+ if timestamp == None :
+ timestamp = time.strftime("%Y%m%d%H%M%S")
+ filename = "%s_%s" % (timestamp, self._log_name)
+ fullname = os.path.join(self.subdir, filename)
+ filepath = fullname
+ i = 1
+ while os.path.exists(filepath) :
+ filepath = "%s_%d" % (fullname, i)
+ i+=1
+ return (filepath, timestamp)
+ def write(self, obj, timestamp=None) :
+ """
+ Save object to a timestamped file with pickle.
+ If timestamp == None, use the current localtime.
+ Returns (filepath, timestamp)
+ """
+ filepath, timestamp = self.get_filename(timestamp)
+ fd = open(filepath, 'wb')
+ os.chmod(filepath, 0644)
+ pickle.dump(obj, fd)
+ fd.close()
+ return (filepath, timestamp)
+ def write_binary(self, binary_string, timestamp=None) :
+ """
+ Save binary_string to a timestamped file.
+ If timestamp == None, use the current localtime.
+ Returns (filepath, timestamp)
+ """
+ filepath, timestamp = self.get_filename(timestamp)
+ # open a new file in readonly mode, don't clobber.
+ fd = os.open(filepath, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0644)
+ bytes_written = 0
+ bytes_remaining = len(binary_string)
+ while bytes_remaining > 0 :
+ bw = os.write(fd, binary_string[bytes_written:])
+ bytes_written += bw
+ bytes_remaining -= bw
+ os.close(fd)
+ return (filepath, timestamp)
+ def _write_dict_of_arrays(self, d, base_filepath) :
+ # open a new file in readonly mode, don't clobber.
+ bfd = open(base_filepath, 'w', 0644)
+ bfd.write("Contents (key : file-extension : format):\n")
+ for key in d.keys() :
+ clean_key = self._clean_filename(key)
+ bfd.write("%s : %s : %s\n" % (key, clean_key, str(d[key].dtype)))
+ # write the keyed array to it's own file
+ filepath = "%s_%s" % (base_filepath, clean_key)
+ d[key].tofile(filepath)
+ bfd.close()
+ def write_dict_of_arrays(self, d, timestamp=None) :
+ """
+ Save dict of (string, numpy_array) pairs to timestamped files.
+ If timestamp == None, use the current localtime.
+ Returns (base_filepath, timestamp)
+ """
+ base_filepath, timestamp = self.get_filename(timestamp)
+ self._write_dict_of_arrays(d, base_filepath)
+ return (base_filepath, timestamp)
+class data_load :
+ """
+ Loads data logged by data_log.
+ """
+ def read(self, file) :
+ """
+ Load an object saved with data_log.write()
+ """
+ return pickle.load(open(file, 'rb'))
+ def read_binary(self, file) :
+ """
+ Load an object saved with data_log.write_binary()
+ The file-name must not have been altered.
+ """
+ raise Exception, "not implemented"
+ def read_dict_of_arrays(self, basefile) :
+ """
+ Load an object saved with data_log.write_binary()
+ The file-names must not have been altered.
+ """
+ obj = {}
+ i=0
+ realbasefile = os.path.realpath(basefile)
+ for line in file(realbasefile) :
+ if i > 0 : # ignore first line
+ ldata = line.split(' : ')
+ name = ldata[0]
+ fpath = "%s_%s" % (realbasefile, ldata[1])
+ exec 'typ = numpy.%s' % ldata[2]
+ obj[name] = numpy.fromfile(fpath, dtype=typ)
+ i += 1
+ return obj
+_test_dir = "."
+def _check_data_logsubdir_clobber() :
+ log1 = data_log(_test_dir, noclobber_logsubdir=True)
+ try :
+ log2 = data_log(_test_dir, noclobber_logsubdir=True)
+ raise error, "Didn't detect old log"
+ except errorDirExists :
+ pass # everything as it should be
+ os.rmdir(log1.subdir)
+def _check_data_log_filenames() :
+ data = {"Test":True, "Data":[1,2,3,4]}
+ log = data_log(_test_dir, noclobber_logsubdir=True)
+ files = [None]*10
+ for i in range(10):
+ files[i], ts = log.write(data)
+ print "Contents of log directory (should be 10 identical logs)"
+ os.system('ls -l %s' % log.subdir)
+ for file in files :
+ os.remove( file )
+ os.rmdir(log.subdir)
+def _check_data_log_pickle_integrity() :
+ data = {"Test":True, "Data":[1,2,3,4]}
+ # save the data
+ log = data_log(_test_dir, noclobber_logsubdir=True)
+ filepath, ts = log.write(data)
+ # read it back in
+ fd = open(filepath, 'rb')
+ data_in = pickle.load(fd)
+ fd.close()
+ # compare
+ if data != data_in :
+ print "Saved : ", data
+ print "Read back: ", data_in
+ raise error, "Poorly pickled"
+ os.remove(filepath)
+ os.rmdir(log.subdir)
+def _check_data_log_binary_integrity() :
+ from numpy import zeros, uint16, fromfile
+ npts = 100
+ data = zeros((npts,), dtype=uint16)
+ for i in range(npts) :
+ data[i] = i
+ # save the data
+ log = data_log(_test_dir, noclobber_logsubdir=True)
+ filepath, ts = log.write_binary(data.tostring())
+ # read it back in
+ data_in = fromfile(filepath, dtype=uint16, count=-1)
+ # compare
+ if npts != len(data_in) :
+ raise error, "Saved %d uint16s, read %d" % (npts, len(data_in))
+ for i in range(npts) :
+ if data_in[i] != data[i] :
+ print "Dissagreement in element %d" % i
+ print "Saved %d, read back %d" % (data[i], data_in[i])
+ raise error, "Poorly saved"
+ os.remove(filepath)
+ os.rmdir(log.subdir)
+def _check_data_loc_dict_of_arrays() :
+ from numpy import zeros, uint16, fromfile
+ npts = 100
+ data1 = zeros((npts,), dtype=uint16)
+ for i in range(npts) :
+ data1[i] = i
+ data2 = zeros((npts,), dtype=uint16)
+ for i in range(npts) :
+ data2[i] = npts-i
+ data={"data1":data1, "d\/at:$a 2":data2}
+ # save the data
+ log = data_log(_test_dir, noclobber_logsubdir=True)
+ filepath, ts = log.write_dict_of_arrays(data)
+ # checking
+ print "Contents of log directory (should be 3 logs)"
+ os.system('ls -l %s' % log.subdir)
+ print "The table of contents file:"
+ os.system('cat %s' % (filepath))
+ data1_in = fromfile(filepath+"_data1", dtype=uint16)
+ data2_in = fromfile(filepath+"_data_2", dtype=uint16)
+ for i in range(npts) :
+ if data1_in[i] != data1[i] :
+ print "Dissagreement in element %d of data1" % i
+ print "Saved %d, read back %d" % (data1[i], data1_in[i])
+ raise error, "Poorly saved"
+ if data2_in[i] != data2[i] :
+ print "Dissagreement in element %d of data2" % i
+ print "Saved %d, read back %d" % (data2[i], data2_in[i])
+ raise error, "Poorly saved"
+ os.remove(filepath)
+ os.remove(filepath+"_data1")
+ os.remove(filepath+"_data_2")
+ os.rmdir(log.subdir)
+def test() :
+ _check_data_logsubdir_clobber()
+ _check_data_log_filenames()
+ _check_data_log_pickle_integrity()
+ _check_data_log_binary_integrity()
+ _check_data_loc_dict_of_arrays()
+if __name__ == "__main__" :
+ test()