From afc0829c83d6d0f297715827e67384278612e5e6 Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Thu, 9 Oct 2008 12:08:47 -0400 Subject: [PATCH 1/1] Started versioning. --- data_logger.py | 294 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 294 insertions(+) create mode 100644 data_logger.py diff --git a/data_logger.py b/data_logger.py new file mode 100644 index 0000000..6562e39 --- /dev/null +++ b/data_logger.py @@ -0,0 +1,294 @@ +#!/user/bin/python +# +# Define some simple data logging classes for consistency + +import os, os.path +import stat +import cPickle as pickle +import time +import string +import numpy + +class error (Exception) : + "Basic module error class" + pass + +class errorDirExists (error) : + "The specified directory already exists" + +class data_log : + """ + Data logging class. + Creates consistent, timestamped log files. + + Initialized with log_dir and log_name. + log_dir specifies the base data directory. + If it doesn't exist, log_dir is created. + + A subdir of log_dir is created (if neccessary) named YYYYMMDD, + where YYYYMMDD is the current day in localtime. + If noclobber_logsubdir == True, this dir must not exist yet. + + log_name specifies the base name for the created log files (in the log subdir). + The created log filenames are prefixed with a YYYYMMDDHHMMSS timestamp. + If the target filename already exists, the filename is postfixed with + '_N', where N is the lowest integer that doesn't clober an existing file. + + General data is saved to the log files with the write(obj) method. + By default, write() cPickles the object passed. + You can save in other formats by overriding write() + + Binary data is can be saved directly to the log files with the + write_binary(binary_string) method. + + All file names are stripped of possibly troublesome characters. + """ + def __init__(self, log_dir=".", noclobber_logsubdir=False, + log_name="log", + timestamp=None) : + # generate lists of not-allowed characters + unaltered_chars = "-._" + string.digits + string.letters + mapped_pairs = {' ':'_'} + allowed_chars = unaltered_chars + "".join(mapped_pairs.keys()) + all_chars = string.maketrans('','') + self.delete_chars = all_chars.translate(all_chars, allowed_chars) + trans_from = "".join(mapped_pairs.keys()) + trans_to = "".join(mapped_pairs.values()) # same order as keys, since no modifications to mapped_pairs were made in between the two calls + self.transtable = string.maketrans(trans_from, trans_to) + + self._log_name = self._clean_filename(log_name) # never checked after this... + self._log_dir = self._create_logdir(log_dir) # will not clobber. + subdir, timestamp = self._create_logsubdir(self._log_dir, + noclobber_logsubdir, + timestamp) + self.subdir = subdir + self.timestamp = timestamp + def _clean_filename(self, filename) : + """ + Currently only works on filenames, since it deletes '/'. + If you need it to work on full paths, use os.path.split(your_path)[1] + to strip of the filename portion... + """ + cleanname = filename.translate(self.transtable, self.delete_chars) + return cleanname + def _create_logdir(self, log_dir) : + log_dir = os.path.expanduser(log_dir) + if not os.path.exists(log_dir) : + os.mkdir(log_dir, 0755) + return log_dir + def _create_logsubdir(self, log_dir, noclobber_logsubdir, + timestamp=None) : + if timestamp == None : + timestamp = time.strftime("%Y%m%d") # %H%M%S + subdir = os.path.join(log_dir, timestamp) + if os.path.exists(subdir) : + if noclobber_logsubdir: + raise errorDirExists, "%s exists" % subdir + else : + os.mkdir(subdir, 0755) + return (subdir, timestamp) + def get_filename(self, timestamp=None) : + """ + Get a filename (using localtime if timestamp==None), + appending integers as neccessary to avoid clobbering. + For use in write() routines. + Returns (filepath, timestamp) + """ + if timestamp == None : + timestamp = time.strftime("%Y%m%d%H%M%S") + filename = "%s_%s" % (timestamp, self._log_name) + fullname = os.path.join(self.subdir, filename) + filepath = fullname + i = 1 + while os.path.exists(filepath) : + filepath = "%s_%d" % (fullname, i) + i+=1 + return (filepath, timestamp) + def write(self, obj, timestamp=None) : + """ + Save object to a timestamped file with pickle. + If timestamp == None, use the current localtime. + Returns (filepath, timestamp) + """ + filepath, timestamp = self.get_filename(timestamp) + fd = open(filepath, 'wb') + os.chmod(filepath, 0644) + pickle.dump(obj, fd) + fd.close() + return (filepath, timestamp) + def write_binary(self, binary_string, timestamp=None) : + """ + Save binary_string to a timestamped file. + If timestamp == None, use the current localtime. + Returns (filepath, timestamp) + """ + filepath, timestamp = self.get_filename(timestamp) + # open a new file in readonly mode, don't clobber. + fd = os.open(filepath, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0644) + bytes_written = 0 + bytes_remaining = len(binary_string) + while bytes_remaining > 0 : + bw = os.write(fd, binary_string[bytes_written:]) + bytes_written += bw + bytes_remaining -= bw + os.close(fd) + return (filepath, timestamp) + def _write_dict_of_arrays(self, d, base_filepath) : + # open a new file in readonly mode, don't clobber. + bfd = open(base_filepath, 'w', 0644) + bfd.write("Contents (key : file-extension : format):\n") + for key in d.keys() : + clean_key = self._clean_filename(key) + bfd.write("%s : %s : %s\n" % (key, clean_key, str(d[key].dtype))) + # write the keyed array to it's own file + filepath = "%s_%s" % (base_filepath, clean_key) + d[key].tofile(filepath) + bfd.close() + def write_dict_of_arrays(self, d, timestamp=None) : + """ + Save dict of (string, numpy_array) pairs to timestamped files. + If timestamp == None, use the current localtime. + Returns (base_filepath, timestamp) + """ + base_filepath, timestamp = self.get_filename(timestamp) + self._write_dict_of_arrays(d, base_filepath) + return (base_filepath, timestamp) + +class data_load : + """ + Loads data logged by data_log. + """ + def read(self, file) : + """ + Load an object saved with data_log.write() + """ + return pickle.load(open(file, 'rb')) + def read_binary(self, file) : + """ + Load an object saved with data_log.write_binary() + The file-name must not have been altered. + """ + raise Exception, "not implemented" + def read_dict_of_arrays(self, basefile) : + """ + Load an object saved with data_log.write_binary() + The file-names must not have been altered. + """ + obj = {} + i=0 + realbasefile = os.path.realpath(basefile) + for line in file(realbasefile) : + if i > 0 : # ignore first line + ldata = line.split(' : ') + name = ldata[0] + fpath = "%s_%s" % (realbasefile, ldata[1]) + exec 'typ = numpy.%s' % ldata[2] + obj[name] = numpy.fromfile(fpath, dtype=typ) + i += 1 + return obj + +_test_dir = "." + +def _check_data_logsubdir_clobber() : + log1 = data_log(_test_dir, noclobber_logsubdir=True) + try : + log2 = data_log(_test_dir, noclobber_logsubdir=True) + raise error, "Didn't detect old log" + except errorDirExists : + pass # everything as it should be + os.rmdir(log1.subdir) + +def _check_data_log_filenames() : + data = {"Test":True, "Data":[1,2,3,4]} + log = data_log(_test_dir, noclobber_logsubdir=True) + files = [None]*10 + for i in range(10): + files[i], ts = log.write(data) + print "Contents of log directory (should be 10 identical logs)" + os.system('ls -l %s' % log.subdir) + for file in files : + os.remove( file ) + os.rmdir(log.subdir) + +def _check_data_log_pickle_integrity() : + data = {"Test":True, "Data":[1,2,3,4]} + # save the data + log = data_log(_test_dir, noclobber_logsubdir=True) + filepath, ts = log.write(data) + # read it back in + fd = open(filepath, 'rb') + data_in = pickle.load(fd) + fd.close() + # compare + if data != data_in : + print "Saved : ", data + print "Read back: ", data_in + raise error, "Poorly pickled" + os.remove(filepath) + os.rmdir(log.subdir) + +def _check_data_log_binary_integrity() : + from numpy import zeros, uint16, fromfile + npts = 100 + data = zeros((npts,), dtype=uint16) + for i in range(npts) : + data[i] = i + # save the data + log = data_log(_test_dir, noclobber_logsubdir=True) + filepath, ts = log.write_binary(data.tostring()) + # read it back in + data_in = fromfile(filepath, dtype=uint16, count=-1) + # compare + if npts != len(data_in) : + raise error, "Saved %d uint16s, read %d" % (npts, len(data_in)) + for i in range(npts) : + if data_in[i] != data[i] : + print "Dissagreement in element %d" % i + print "Saved %d, read back %d" % (data[i], data_in[i]) + raise error, "Poorly saved" + os.remove(filepath) + os.rmdir(log.subdir) + +def _check_data_loc_dict_of_arrays() : + from numpy import zeros, uint16, fromfile + npts = 100 + data1 = zeros((npts,), dtype=uint16) + for i in range(npts) : + data1[i] = i + data2 = zeros((npts,), dtype=uint16) + for i in range(npts) : + data2[i] = npts-i + data={"data1":data1, "d\/at:$a 2":data2} + # save the data + log = data_log(_test_dir, noclobber_logsubdir=True) + filepath, ts = log.write_dict_of_arrays(data) + # checking + print "Contents of log directory (should be 3 logs)" + os.system('ls -l %s' % log.subdir) + print "The table of contents file:" + os.system('cat %s' % (filepath)) + data1_in = fromfile(filepath+"_data1", dtype=uint16) + data2_in = fromfile(filepath+"_data_2", dtype=uint16) + for i in range(npts) : + if data1_in[i] != data1[i] : + print "Dissagreement in element %d of data1" % i + print "Saved %d, read back %d" % (data1[i], data1_in[i]) + raise error, "Poorly saved" + if data2_in[i] != data2[i] : + print "Dissagreement in element %d of data2" % i + print "Saved %d, read back %d" % (data2[i], data2_in[i]) + raise error, "Poorly saved" + os.remove(filepath) + os.remove(filepath+"_data1") + os.remove(filepath+"_data_2") + os.rmdir(log.subdir) + +def test() : + _check_data_logsubdir_clobber() + _check_data_log_filenames() + _check_data_log_pickle_integrity() + _check_data_log_binary_integrity() + _check_data_loc_dict_of_arrays() + +if __name__ == "__main__" : + test() -- 2.26.2