3 # data_logger - classes for consistently logging data in an organized
4 # fasion. See the test functions for some usage examples
6 # Copyright (C) 2008, William Trevor King
8 # This program is free software; you can redistribute it and/or
9 # modify it under the terms of the GNU General Public License as
10 # published by the Free Software Foundation; either version 3 of the
11 # License, or (at your option) any later version.
13 # This program is distributed in the hope that it will be useful, but
14 # WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
16 # See the GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
23 # The author may be contacted at <wking@drexel.edu> on the Internet, or
24 # write to Trevor King, Drexel University, Physics Dept., 3141 Chestnut St.,
25 # Philadelphia PA 19104, USA.
29 import cPickle as pickle
36 class error (Exception) :
37 "Basic module error class"
40 class errorDirExists (error) :
41 "The specified directory already exists"
46 Creates consistent, timestamped log files.
48 Initialized with log_dir and log_name.
49 log_dir specifies the base data directory.
50 If it doesn't exist, log_dir is created.
52 A subdir of log_dir is created (if necessary) named YYYYMMDD,
53 where YYYYMMDD is the current day in localtime.
54 If noclobber_logsubdir == True, this dir must not exist yet.
56 log_name specifies the base name for the created log files (in the log subdir).
57 The created log filenames are prefixed with a YYYYMMDDHHMMSS timestamp.
58 If the target filename already exists, the filename is postfixed with
59 '_N', where N is the lowest integer that doesn't clobber an existing file.
61 General data is saved to the log files with the write(obj) method.
62 By default, write() cPickles the object passed.
63 You can save in other formats by overriding write()
65 Binary data is can be saved directly to the log files with the
66 write_binary(binary_string) method.
68 All file names are stripped of possibly troublesome characters.
70 def __init__(self, log_dir=".", noclobber_logsubdir=False,
73 # generate lists of not-allowed characters
74 unaltered_chars = "-._" + string.digits + string.letters
75 mapped_pairs = {' ':'_'}
76 allowed_chars = unaltered_chars + "".join(mapped_pairs.keys())
77 all_chars = string.maketrans('','')
78 self.delete_chars = all_chars.translate(all_chars, allowed_chars)
79 trans_from = "".join(mapped_pairs.keys())
80 trans_to = "".join(mapped_pairs.values()) # same order as keys, since no modifications to mapped_pairs were made in between the two calls
81 self.transtable = string.maketrans(trans_from, trans_to)
83 self._log_name = self._clean_filename(log_name) # never checked after this...
84 self._log_dir = self._create_logdir(log_dir) # will not clobber.
85 subdir, timestamp = self._create_logsubdir(self._log_dir,
89 self.timestamp = timestamp
90 def _clean_filename(self, filename) :
92 Currently only works on filenames, since it deletes '/'.
93 If you need it to work on full paths, use os.path.split(your_path)[1]
94 to strip of the filename portion...
96 cleanname = filename.translate(self.transtable, self.delete_chars)
98 def _create_logdir(self, log_dir) :
99 log_dir = os.path.expanduser(log_dir)
100 if not os.path.exists(log_dir) :
101 os.mkdir(log_dir, 0755)
103 def _create_logsubdir(self, log_dir, noclobber_logsubdir,
105 if timestamp == None :
106 timestamp = time.strftime("%Y%m%d") # %H%M%S
107 subdir = os.path.join(log_dir, timestamp)
108 if os.path.exists(subdir) :
109 if noclobber_logsubdir:
110 raise errorDirExists, "%s exists" % subdir
112 os.mkdir(subdir, 0755)
113 return (subdir, timestamp)
114 def get_filename(self, timestamp=None) :
116 Get a filename (using localtime if timestamp==None),
117 appending integers as necessary to avoid clobbering.
118 For use in write() routines.
119 Returns (filepath, timestamp)
121 if timestamp == None :
122 timestamp = time.strftime("%Y%m%d%H%M%S")
123 filename = "%s_%s" % (timestamp, self._log_name)
124 fullname = os.path.join(self.subdir, filename)
127 while os.path.exists(filepath) :
128 filepath = "%s_%d" % (fullname, i)
130 return (filepath, timestamp)
131 def write(self, obj, timestamp=None) :
133 Save object to a timestamped file with pickle.
134 If timestamp == None, use the current localtime.
135 Returns (filepath, timestamp)
137 filepath, timestamp = self.get_filename(timestamp)
138 fd = open(filepath, 'wb')
139 os.chmod(filepath, 0644)
142 return (filepath, timestamp)
143 def write_binary(self, binary_string, timestamp=None) :
145 Save binary_string to a timestamped file.
146 If timestamp == None, use the current localtime.
147 Returns (filepath, timestamp)
149 filepath, timestamp = self.get_filename(timestamp)
150 # open a new file in readonly mode, don't clobber.
151 fd = os.open(filepath, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0644)
153 bytes_remaining = len(binary_string)
154 while bytes_remaining > 0 :
155 bw = os.write(fd, binary_string[bytes_written:])
157 bytes_remaining -= bw
159 return (filepath, timestamp)
160 def _write_dict_of_arrays(self, d, base_filepath) :
161 # open a new file in readonly mode, don't clobber.
162 bfd = open(base_filepath, 'w', 0644)
163 bfd.write("Contents (key : file-extension : format):\n")
164 for key in d.keys() :
165 clean_key = self._clean_filename(key)
166 bfd.write("%s : %s : %s\n" % (key, clean_key, str(d[key].dtype)))
167 # write the keyed array to it's own file
168 filepath = "%s_%s" % (base_filepath, clean_key)
169 d[key].tofile(filepath)
171 def write_dict_of_arrays(self, d, timestamp=None) :
173 Save dict of (string, numpy_array) pairs to timestamped files.
174 If timestamp == None, use the current localtime.
175 Returns (base_filepath, timestamp)
177 base_filepath, timestamp = self.get_filename(timestamp)
178 self._write_dict_of_arrays(d, base_filepath)
179 return (base_filepath, timestamp)
183 Loads data logged by data_log.
185 def read(self, file) :
187 Load an object saved with data_log.write()
189 return pickle.load(open(file, 'rb'))
190 def read_binary(self, file) :
192 Load an object saved with data_log.write_binary()
193 The file-name must not have been altered.
195 type = file.split("_")[-1]
199 raise Exception, "read_binary() not implemented for type %s" % (type)
200 return numpy.fromfile(file, dtype=t)
201 def read_dict_of_arrays(self, basefile) :
203 Load an object saved with data_log.write_binary()
204 The file-names must not have been altered.
208 realbasefile = os.path.realpath(basefile)
209 for line in file(realbasefile) :
210 if i > 0 : # ignore first line
211 ldata = line.split(' : ')
213 fpath = "%s_%s" % (realbasefile, ldata[1])
214 exec 'typ = numpy.%s' % ldata[2]
215 obj[name] = numpy.fromfile(fpath, dtype=typ)
221 def _check_data_logsubdir_clobber() :
222 log1 = data_log(_test_dir, noclobber_logsubdir=True)
224 log2 = data_log(_test_dir, noclobber_logsubdir=True)
225 raise error, "Didn't detect old log"
226 except errorDirExists :
227 pass # everything as it should be
228 os.rmdir(log1.subdir)
230 def _check_data_log_filenames() :
231 data = {"Test":True, "Data":[1,2,3,4]}
232 log = data_log(_test_dir, noclobber_logsubdir=True)
235 files[i], ts = log.write(data)
236 print "Contents of log directory (should be 10 identical logs)"
237 os.system('ls -l %s' % log.subdir)
242 def _check_data_log_pickle_integrity() :
243 data = {"Test":True, "Data":[1,2,3,4]}
245 log = data_log(_test_dir, noclobber_logsubdir=True)
246 filepath, ts = log.write(data)
248 fd = open(filepath, 'rb')
249 data_in = pickle.load(fd)
253 print "Saved : ", data
254 print "Read back: ", data_in
255 raise error, "Poorly pickled"
259 def _check_data_log_binary_integrity() :
260 from numpy import zeros, uint16, fromfile
262 data = zeros((npts,), dtype=uint16)
263 for i in range(npts) :
266 log = data_log(_test_dir, noclobber_logsubdir=True)
267 filepath, ts = log.write_binary(data.tostring())
269 data_in = fromfile(filepath, dtype=uint16, count=-1)
271 if npts != len(data_in) :
272 raise error, "Saved %d uint16s, read %d" % (npts, len(data_in))
273 for i in range(npts) :
274 if data_in[i] != data[i] :
275 print "Disagreement in element %d" % i
276 print "Saved %d, read back %d" % (data[i], data_in[i])
277 raise error, "Poorly saved"
281 def _check_data_loc_dict_of_arrays() :
282 from numpy import zeros, uint16, fromfile
284 data1 = zeros((npts,), dtype=uint16)
285 for i in range(npts) :
287 data2 = zeros((npts,), dtype=uint16)
288 for i in range(npts) :
290 data={"data1":data1, "d\/at:$a 2":data2}
292 log = data_log(_test_dir, noclobber_logsubdir=True)
293 filepath, ts = log.write_dict_of_arrays(data)
295 print "Contents of log directory (should be 3 logs)"
296 os.system('ls -l %s' % log.subdir)
297 print "The table of contents file:"
298 os.system('cat %s' % (filepath))
299 data1_in = fromfile(filepath+"_data1", dtype=uint16)
300 data2_in = fromfile(filepath+"_data_2", dtype=uint16)
301 for i in range(npts) :
302 if data1_in[i] != data1[i] :
303 print "Disagreement in element %d of data1" % i
304 print "Saved %d, read back %d" % (data1[i], data1_in[i])
305 raise error, "Poorly saved"
306 if data2_in[i] != data2[i] :
307 print "Disagreement in element %d of data2" % i
308 print "Saved %d, read back %d" % (data2[i], data2_in[i])
309 raise error, "Poorly saved"
311 os.remove(filepath+"_data1")
312 os.remove(filepath+"_data_2")
316 _check_data_logsubdir_clobber()
317 _check_data_log_filenames()
318 _check_data_log_pickle_integrity()
319 _check_data_log_binary_integrity()
320 _check_data_loc_dict_of_arrays()
322 if __name__ == "__main__" :