3 # data_logger - classes for consistently logging data in an organized
4 # fasion. See the test functions for some usage examples
6 # Copyright (C) 2008, William Trevor King
8 # This program is free software; you can redistribute it and/or
9 # modify it under the terms of the GNU General Public License as
10 # published by the Free Software Foundation; either version 3 of the
11 # License, or (at your option) any later version.
13 # This program is distributed in the hope that it will be useful, but
14 # WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
16 # See the GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
23 # The author may be contacted at <wking@drexel.edu> on the Internet, or
24 # write to Trevor King, Drexel University, Physics Dept., 3141 Chestnut St.,
25 # Philadelphia PA 19104, USA.
29 import cPickle as pickle
35 DEFAULT_PATH = "~/rsrch/data"
36 DEFAULT_PATH_REPLACE_STRING = "$DEFAULT$/"
38 class error (Exception) :
39 "Basic module error class"
42 class errorDirExists (error) :
43 "The specified directory already exists"
48 Creates consistent, timestamped log files.
50 Initialized with log_dir and log_name.
51 log_dir specifies the base data directory.
52 If it doesn't exist, log_dir is created.
54 If log_dir begins with '$DEFAULT$/', that portion of the path is replaced
55 with the then-current contents of the DEFAULT_PATH module global.
57 A subdir of log_dir is created (if necessary) named YYYYMMDD,
58 where YYYYMMDD is the current day in localtime.
59 If noclobber_logsubdir == True, this dir must not exist yet.
61 log_name specifies the base name for the created log files (in the log subdir).
62 The created log filenames are prefixed with a YYYYMMDDHHMMSS timestamp.
63 If the target filename already exists, the filename is postfixed with
64 '_N', where N is the lowest integer that doesn't clobber an existing file.
66 General data is saved to the log files with the write(obj) method.
67 By default, write() cPickles the object passed.
68 You can save in other formats by overriding write()
70 Binary data is can be saved directly to the log files with the
71 write_binary(binary_string) method.
73 All file names are stripped of possibly troublesome characters.
75 def __init__(self, log_dir=".", noclobber_logsubdir=False,
78 # generate lists of not-allowed characters
79 unaltered_chars = "-._" + string.digits + string.letters
80 mapped_pairs = {' ':'_'}
81 allowed_chars = unaltered_chars + "".join(mapped_pairs.keys())
82 all_chars = string.maketrans('','')
83 self.delete_chars = all_chars.translate(all_chars, allowed_chars)
84 trans_from = "".join(mapped_pairs.keys())
85 trans_to = "".join(mapped_pairs.values()) # same order as keys, since no modifications to mapped_pairs were made in between the two calls
86 self.transtable = string.maketrans(trans_from, trans_to)
88 self._log_name = self._clean_filename(log_name) # never checked after this...
89 self._log_dir = self._create_logdir(log_dir) # will not clobber.
90 subdir, timestamp = self._create_logsubdir(self._log_dir,
94 self.timestamp = timestamp
95 def _clean_filename(self, filename) :
97 Currently only works on filenames, since it deletes '/'.
98 If you need it to work on full paths, use os.path.split(your_path)[1]
99 to strip of the filename portion...
101 cleanname = filename.translate(self.transtable, self.delete_chars)
103 def _normalize_logdir(self, log_dir):
104 length = len(DEFAULT_PATH_REPLACE_STRING)
105 if log_dir[:length] == DEFAULT_PATH_REPLACE_STRING:
106 log_dir = os.path.join(DEFAULT_PATH, log_dir[length:])
107 log_dir = os.path.expanduser(log_dir)
109 def _create_logdir(self, log_dir) :
110 log_dir = self._normalize_logdir(log_dir)
111 if not os.path.exists(log_dir) :
112 os.mkdir(log_dir, 0755)
114 def _create_logsubdir(self, log_dir, noclobber_logsubdir,
116 if timestamp == None :
117 timestamp = time.strftime("%Y%m%d") # %H%M%S
118 subdir = os.path.join(log_dir, timestamp)
119 if os.path.exists(subdir) :
120 if noclobber_logsubdir:
121 raise errorDirExists, "%s exists" % subdir
123 os.mkdir(subdir, 0755)
124 return (subdir, timestamp)
125 def get_filename(self, timestamp=None) :
127 Get a filename (using localtime if timestamp==None),
128 appending integers as necessary to avoid clobbering.
129 For use in write() routines.
130 Returns (filepath, timestamp)
132 if timestamp == None :
133 timestamp = time.strftime("%Y%m%d%H%M%S")
134 filename = "%s_%s" % (timestamp, self._log_name)
135 fullname = os.path.join(self.subdir, filename)
138 while os.path.exists(filepath) :
139 filepath = "%s_%d" % (fullname, i)
141 return (filepath, timestamp)
142 def write(self, obj, timestamp=None) :
144 Save object to a timestamped file with pickle.
145 If timestamp == None, use the current localtime.
146 Returns (filepath, timestamp)
148 filepath, timestamp = self.get_filename(timestamp)
149 fd = open(filepath, 'wb')
150 os.chmod(filepath, 0644)
153 return (filepath, timestamp)
154 def write_binary(self, binary_string, timestamp=None) :
156 Save binary_string to a timestamped file.
157 If timestamp == None, use the current localtime.
158 Returns (filepath, timestamp)
160 filepath, timestamp = self.get_filename(timestamp)
161 # open a new file in readonly mode, don't clobber.
162 fd = os.open(filepath, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0644)
164 bytes_remaining = len(binary_string)
165 while bytes_remaining > 0 :
166 bw = os.write(fd, binary_string[bytes_written:])
168 bytes_remaining -= bw
170 return (filepath, timestamp)
171 def _write_dict_of_arrays(self, d, base_filepath) :
172 # open a new file in readonly mode, don't clobber.
173 bfd = open(base_filepath, 'w', 0644)
174 bfd.write("Contents (key : file-extension : format):\n")
175 for key in d.keys() :
176 clean_key = self._clean_filename(key)
177 bfd.write("%s : %s : %s\n" % (key, clean_key, str(d[key].dtype)))
178 # write the keyed array to it's own file
179 filepath = "%s_%s" % (base_filepath, clean_key)
180 d[key].tofile(filepath)
182 def write_dict_of_arrays(self, d, timestamp=None) :
184 Save dict of (string, numpy_array) pairs to timestamped files.
185 If timestamp == None, use the current localtime.
186 Returns (base_filepath, timestamp)
188 base_filepath, timestamp = self.get_filename(timestamp)
189 self._write_dict_of_arrays(d, base_filepath)
190 return (base_filepath, timestamp)
194 Loads data logged by data_log.
196 def read(self, file) :
198 Load an object saved with data_log.write()
200 return pickle.load(open(file, 'rb'))
201 def read_binary(self, file) :
203 Load an object saved with data_log.write_binary()
204 The file-name must not have been altered.
206 type = file.split("_")[-1]
210 raise Exception, "read_binary() not implemented for type %s" % (type)
211 return numpy.fromfile(file, dtype=t)
212 def read_dict_of_arrays(self, basefile) :
214 Load an object saved with data_log.write_binary()
215 The file-names must not have been altered.
219 realbasefile = os.path.realpath(basefile)
220 for line in file(realbasefile) :
221 if i > 0 : # ignore first line
222 ldata = line.split(' : ')
224 fpath = "%s_%s" % (realbasefile, ldata[1])
225 exec 'typ = numpy.%s' % ldata[2]
226 obj[name] = numpy.fromfile(fpath, dtype=typ)
232 def _check_data_logsubdir_clobber() :
233 log1 = data_log(_test_dir, noclobber_logsubdir=True)
235 log2 = data_log(_test_dir, noclobber_logsubdir=True)
236 raise error, "Didn't detect old log"
237 except errorDirExists :
238 pass # everything as it should be
239 os.rmdir(log1.subdir)
241 def _check_data_log_filenames() :
242 data = {"Test":True, "Data":[1,2,3,4]}
243 log = data_log(_test_dir, noclobber_logsubdir=True)
246 files[i], ts = log.write(data)
247 print "Contents of log directory (should be 10 identical logs)"
248 os.system('ls -l %s' % log.subdir)
253 def _check_data_log_pickle_integrity() :
254 data = {"Test":True, "Data":[1,2,3,4]}
256 log = data_log(_test_dir, noclobber_logsubdir=True)
257 filepath, ts = log.write(data)
259 fd = open(filepath, 'rb')
260 data_in = pickle.load(fd)
264 print "Saved : ", data
265 print "Read back: ", data_in
266 raise error, "Poorly pickled"
270 def _check_data_log_binary_integrity() :
271 from numpy import zeros, uint16, fromfile
273 data = zeros((npts,), dtype=uint16)
274 for i in range(npts) :
277 log = data_log(_test_dir, noclobber_logsubdir=True)
278 filepath, ts = log.write_binary(data.tostring())
280 data_in = fromfile(filepath, dtype=uint16, count=-1)
282 if npts != len(data_in) :
283 raise error, "Saved %d uint16s, read %d" % (npts, len(data_in))
284 for i in range(npts) :
285 if data_in[i] != data[i] :
286 print "Disagreement in element %d" % i
287 print "Saved %d, read back %d" % (data[i], data_in[i])
288 raise error, "Poorly saved"
292 def _check_data_loc_dict_of_arrays() :
293 from numpy import zeros, uint16, fromfile
295 data1 = zeros((npts,), dtype=uint16)
296 for i in range(npts) :
298 data2 = zeros((npts,), dtype=uint16)
299 for i in range(npts) :
301 data={"data1":data1, "d\/at:$a 2":data2}
303 log = data_log(_test_dir, noclobber_logsubdir=True)
304 filepath, ts = log.write_dict_of_arrays(data)
306 print "Contents of log directory (should be 3 logs)"
307 os.system('ls -l %s' % log.subdir)
308 print "The table of contents file:"
309 os.system('cat %s' % (filepath))
310 data1_in = fromfile(filepath+"_data1", dtype=uint16)
311 data2_in = fromfile(filepath+"_data_2", dtype=uint16)
312 for i in range(npts) :
313 if data1_in[i] != data1[i] :
314 print "Disagreement in element %d of data1" % i
315 print "Saved %d, read back %d" % (data1[i], data1_in[i])
316 raise error, "Poorly saved"
317 if data2_in[i] != data2[i] :
318 print "Disagreement in element %d of data2" % i
319 print "Saved %d, read back %d" % (data2[i], data2_in[i])
320 raise error, "Poorly saved"
322 os.remove(filepath+"_data1")
323 os.remove(filepath+"_data_2")
327 _check_data_logsubdir_clobber()
328 _check_data_log_filenames()
329 _check_data_log_pickle_integrity()
330 _check_data_log_binary_integrity()
331 _check_data_loc_dict_of_arrays()
333 if __name__ == "__main__" :