3 # data_logger - classes for consistently logging data in an organized
4 # fasion. See the test functions for some usage examples
6 # Copyright (C) 2008, William Trevor King
8 # This program is free software; you can redistribute it and/or
9 # modify it under the terms of the GNU General Public License as
10 # published by the Free Software Foundation; either version 3 of the
11 # License, or (at your option) any later version.
13 # This program is distributed in the hope that it will be useful, but
14 # WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
16 # See the GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
23 # The author may be contacted at <wking@drexel.edu> on the Internet, or
24 # write to Trevor King, Drexel University, Physics Dept., 3141 Chestnut St.,
25 # Philadelphia PA 19104, USA.
29 import cPickle as pickle
35 DEFAULT_PATH = "~/rsrch/data"
36 DEFAULT_PATH_REPLACE_STRING = "$DEFAULT$/"
38 class error (Exception) :
39 "Basic module error class"
42 class errorDirExists (error) :
43 "The specified directory already exists"
45 def normalize_logdir(log_dir):
46 length = len(DEFAULT_PATH_REPLACE_STRING)
47 if log_dir[:length] == DEFAULT_PATH_REPLACE_STRING:
48 log_dir = os.path.join(DEFAULT_PATH, log_dir[length:])
49 log_dir = os.path.expanduser(log_dir)
55 Creates consistent, timestamped log files.
57 Initialized with log_dir and log_name.
58 log_dir specifies the base data directory.
59 If it doesn't exist, log_dir is created.
61 If log_dir begins with '$DEFAULT$/', that portion of the path is replaced
62 with the then-current contents of the DEFAULT_PATH module global.
64 A subdir of log_dir is created (if necessary) named YYYYMMDD,
65 where YYYYMMDD is the current day in localtime.
66 If noclobber_logsubdir == True, this dir must not exist yet.
68 log_name specifies the base name for the created log files (in the log subdir).
69 The created log filenames are prefixed with a YYYYMMDDHHMMSS timestamp.
70 If the target filename already exists, the filename is postfixed with
71 '_N', where N is the lowest integer that doesn't clobber an existing file.
73 General data is saved to the log files with the write(obj) method.
74 By default, write() cPickles the object passed.
75 You can save in other formats by overriding write()
77 Binary data is can be saved directly to the log files with the
78 write_binary(binary_string) method.
80 All file names are stripped of possibly troublesome characters.
82 def __init__(self, log_dir=".", noclobber_logsubdir=False,
85 # generate lists of not-allowed characters
86 unaltered_chars = "-._" + string.digits + string.letters
87 mapped_pairs = {' ':'_'}
88 allowed_chars = unaltered_chars + "".join(mapped_pairs.keys())
89 all_chars = string.maketrans('','')
90 self.delete_chars = all_chars.translate(all_chars, allowed_chars)
91 trans_from = "".join(mapped_pairs.keys())
92 trans_to = "".join(mapped_pairs.values()) # same order as keys, since no modifications to mapped_pairs were made in between the two calls
93 self.transtable = string.maketrans(trans_from, trans_to)
95 self._log_name = self._clean_filename(log_name) # never checked after this...
96 self._log_dir = self._create_logdir(log_dir) # will not clobber.
97 subdir, timestamp = self._create_logsubdir(self._log_dir,
101 self.timestamp = timestamp
102 def _clean_filename(self, filename) :
104 Currently only works on filenames, since it deletes '/'.
105 If you need it to work on full paths, use os.path.split(your_path)[1]
106 to strip of the filename portion...
108 cleanname = filename.translate(self.transtable, self.delete_chars)
110 def _create_logdir(self, log_dir) :
111 log_dir = normalize_logdir(log_dir)
112 if not os.path.exists(log_dir) :
113 os.mkdir(log_dir, 0755)
115 def _create_logsubdir(self, log_dir, noclobber_logsubdir,
117 if timestamp == None :
118 timestamp = time.strftime("%Y%m%d") # %H%M%S
119 subdir = os.path.join(log_dir, timestamp)
120 if os.path.exists(subdir) :
121 if noclobber_logsubdir:
122 raise errorDirExists, "%s exists" % subdir
124 os.mkdir(subdir, 0755)
125 return (subdir, timestamp)
126 def get_filename(self, timestamp=None) :
128 Get a filename (using localtime if timestamp==None),
129 appending integers as necessary to avoid clobbering.
130 For use in write() routines.
131 Returns (filepath, timestamp)
133 if timestamp == None :
134 timestamp = time.strftime("%Y%m%d%H%M%S")
135 filename = "%s_%s" % (timestamp, self._log_name)
136 fullname = os.path.join(self.subdir, filename)
139 while os.path.exists(filepath) :
140 filepath = "%s_%d" % (fullname, i)
142 return (filepath, timestamp)
143 def write(self, obj, timestamp=None) :
145 Save object to a timestamped file with pickle.
146 If timestamp == None, use the current localtime.
147 Returns (filepath, timestamp)
149 filepath, timestamp = self.get_filename(timestamp)
150 fd = open(filepath, 'wb')
151 os.chmod(filepath, 0644)
154 return (filepath, timestamp)
155 def write_binary(self, binary_string, timestamp=None) :
157 Save binary_string to a timestamped file.
158 If timestamp == None, use the current localtime.
159 Returns (filepath, timestamp)
161 filepath, timestamp = self.get_filename(timestamp)
162 # open a new file in readonly mode, don't clobber.
163 fd = os.open(filepath, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0644)
165 bytes_remaining = len(binary_string)
166 while bytes_remaining > 0 :
167 bw = os.write(fd, binary_string[bytes_written:])
169 bytes_remaining -= bw
171 return (filepath, timestamp)
172 def _write_dict_of_arrays(self, d, base_filepath) :
173 # open a new file in readonly mode, don't clobber.
174 bfd = open(base_filepath, 'w', 0644)
175 bfd.write("Contents (key : file-extension : format):\n")
176 for key in d.keys() :
177 clean_key = self._clean_filename(key)
178 bfd.write("%s : %s : %s\n" % (key, clean_key, str(d[key].dtype)))
179 # write the keyed array to it's own file
180 filepath = "%s_%s" % (base_filepath, clean_key)
181 d[key].tofile(filepath)
183 def write_dict_of_arrays(self, d, timestamp=None) :
185 Save dict of (string, numpy_array) pairs to timestamped files.
186 If timestamp == None, use the current localtime.
187 Returns (base_filepath, timestamp)
189 base_filepath, timestamp = self.get_filename(timestamp)
190 self._write_dict_of_arrays(d, base_filepath)
191 return (base_filepath, timestamp)
195 Loads data logged by data_log.
197 def read(self, file) :
199 Load an object saved with data_log.write()
201 return pickle.load(open(file, 'rb'))
202 def read_binary(self, file) :
204 Load an object saved with data_log.write_binary()
205 The file-name must not have been altered.
207 type = file.split("_")[-1]
211 raise Exception, "read_binary() not implemented for type %s" % (type)
212 return numpy.fromfile(file, dtype=t)
213 def read_dict_of_arrays(self, basefile) :
215 Load an object saved with data_log.write_binary()
216 The file-names must not have been altered.
220 realbasefile = os.path.realpath(basefile)
221 for line in file(realbasefile) :
222 if i > 0 : # ignore first line
223 ldata = line.split(' : ')
225 fpath = "%s_%s" % (realbasefile, ldata[1])
226 exec 'typ = numpy.%s' % ldata[2]
227 obj[name] = numpy.fromfile(fpath, dtype=typ)
233 def _check_data_logsubdir_clobber() :
234 log1 = data_log(_test_dir, noclobber_logsubdir=True)
236 log2 = data_log(_test_dir, noclobber_logsubdir=True)
237 raise error, "Didn't detect old log"
238 except errorDirExists :
239 pass # everything as it should be
240 os.rmdir(log1.subdir)
242 def _check_data_log_filenames() :
243 data = {"Test":True, "Data":[1,2,3,4]}
244 log = data_log(_test_dir, noclobber_logsubdir=True)
247 files[i], ts = log.write(data)
248 print "Contents of log directory (should be 10 identical logs)"
249 os.system('ls -l %s' % log.subdir)
254 def _check_data_log_pickle_integrity() :
255 data = {"Test":True, "Data":[1,2,3,4]}
257 log = data_log(_test_dir, noclobber_logsubdir=True)
258 filepath, ts = log.write(data)
260 fd = open(filepath, 'rb')
261 data_in = pickle.load(fd)
265 print "Saved : ", data
266 print "Read back: ", data_in
267 raise error, "Poorly pickled"
271 def _check_data_log_binary_integrity() :
272 from numpy import zeros, uint16, fromfile
274 data = zeros((npts,), dtype=uint16)
275 for i in range(npts) :
278 log = data_log(_test_dir, noclobber_logsubdir=True)
279 filepath, ts = log.write_binary(data.tostring())
281 data_in = fromfile(filepath, dtype=uint16, count=-1)
283 if npts != len(data_in) :
284 raise error, "Saved %d uint16s, read %d" % (npts, len(data_in))
285 for i in range(npts) :
286 if data_in[i] != data[i] :
287 print "Disagreement in element %d" % i
288 print "Saved %d, read back %d" % (data[i], data_in[i])
289 raise error, "Poorly saved"
293 def _check_data_loc_dict_of_arrays() :
294 from numpy import zeros, uint16, fromfile
296 data1 = zeros((npts,), dtype=uint16)
297 for i in range(npts) :
299 data2 = zeros((npts,), dtype=uint16)
300 for i in range(npts) :
302 data={"data1":data1, "d\/at:$a 2":data2}
304 log = data_log(_test_dir, noclobber_logsubdir=True)
305 filepath, ts = log.write_dict_of_arrays(data)
307 print "Contents of log directory (should be 3 logs)"
308 os.system('ls -l %s' % log.subdir)
309 print "The table of contents file:"
310 os.system('cat %s' % (filepath))
311 data1_in = fromfile(filepath+"_data1", dtype=uint16)
312 data2_in = fromfile(filepath+"_data_2", dtype=uint16)
313 for i in range(npts) :
314 if data1_in[i] != data1[i] :
315 print "Disagreement in element %d of data1" % i
316 print "Saved %d, read back %d" % (data1[i], data1_in[i])
317 raise error, "Poorly saved"
318 if data2_in[i] != data2[i] :
319 print "Disagreement in element %d of data2" % i
320 print "Saved %d, read back %d" % (data2[i], data2_in[i])
321 raise error, "Poorly saved"
323 os.remove(filepath+"_data1")
324 os.remove(filepath+"_data_2")
328 _check_data_logsubdir_clobber()
329 _check_data_log_filenames()
330 _check_data_log_pickle_integrity()
331 _check_data_log_binary_integrity()
332 _check_data_loc_dict_of_arrays()
334 if __name__ == "__main__" :