3 # data_logger - classes for consistently logging data in an organized
4 # fasion. See the doctests for some usage examples.
6 # Copyright (C) 2008-2010 William Trevor King
8 # This program is free software; you can redistribute it and/or
9 # modify it under the terms of the GNU General Public License as
10 # published by the Free Software Foundation; either version 3 of the
11 # License, or (at your option) any later version.
13 # This program is distributed in the hope that it will be useful, but
14 # WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
16 # See the GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
23 # The author may be contacted at <wking@drexel.edu> on the Internet, or
24 # write to Trevor King, Drexel University, Physics Dept., 3141 Chestnut St.,
25 # Philadelphia PA 19104, USA.
27 from __future__ import with_statement
31 import cPickle as pickle
39 DEFAULT_PATH = "~/rsrch/data"
40 DEFAULT_PATH_REPLACE_STRING = "${DEFAULT}/"
43 class Error (Exception):
44 "Basic module error class"
48 class ErrorDirExists (Error):
49 "The specified directory already exists"
53 def normalize_log_dir(log_dir):
54 """Normalize a log directory.
56 Expands the user symbol `~`, as well as
57 `DEFAULT_PATH_REPLACE_STRING`.
62 Raw `log_dir` passed into `.__init__()`.
67 Normalized version of the input `log_dir`.
71 >>> normalize_log_dir('~/.log') # doctest: +ELLIPSIS
73 >>> normalize_log_dir('${DEFAULT}/hi/there') # doctest: +ELLIPSIS
74 '/.../rsrch/data/hi/there'
76 if log_dir.startswith(DEFAULT_PATH_REPLACE_STRING):
77 length = len(DEFAULT_PATH_REPLACE_STRING)
78 log_dir = os.path.join(DEFAULT_PATH, log_dir[length:])
79 log_dir = os.path.expanduser(log_dir)
83 class DataLog (object):
84 """Create consistent, timestamped log files.
86 General data is saved to the log files with the `write(obj)`
87 method. By default, `write()` `cPickles` the object passed. You
88 can save in other formats by overriding `write()`.
90 Binary data is can be saved directly to the log files with the
91 `write_binary(binary_string)` method.
93 All file names are stripped of possibly troublesome characters.
98 `log_dir` sets the base data directory. If it doesn't exist,
101 If log_dir begins with '${DEFAULT}/', that portion of the path
102 is replaced with the then-current contents of the
103 `DEFAULT_PATH` module global.
105 A subdir of log_dir is created (if necessary) named
106 `YYYYMMDD`, where `YYYYMMDD` is the current day in local time.
108 `log_name` specifies the base name for the created log files
109 (in the log subdir). The created log filenames are prefixed
110 with a `YYYYMMDDHHMMSS` timestamp. If the target filename
111 already exists, the filename is postfixed with `_N`, where
112 `N` is the lowest integer that doesn't clobber an existing
114 noclobber_log_subdir : bool
115 `noclobber_log_subdir == True`, the `YYYMMDD` subdir of
116 `log_dir` must not exist yet.
118 Overide default subdir `timestamp` (%Y%m%d).
124 >>> dl = DataLog('test_data_log', 'temperature', timestamp='20101103',
126 >>> data = {'test':True, 'data':[1, 2, 3, 4]}
127 >>> files = [None]*10
128 >>> for i in range(10):
129 ... files[i],ts = dl.write(data, timestamp='20101103235959')
130 >>> print '\\n'.join(files)
131 test_data_log/20101103/20101103235959_log
132 test_data_log/20101103/20101103235959_log_1
133 test_data_log/20101103/20101103235959_log_2
134 test_data_log/20101103/20101103235959_log_3
135 test_data_log/20101103/20101103235959_log_4
136 test_data_log/20101103/20101103235959_log_5
137 test_data_log/20101103/20101103235959_log_6
138 test_data_log/20101103/20101103235959_log_7
139 test_data_log/20101103/20101103235959_log_8
140 test_data_log/20101103/20101103235959_log_9
141 >>> shutil.rmtree(dl._log_dir)
143 def __init__(self, log_dir=".", noclobber_log_subdir=False,
144 log_name="log", timestamp=None):
145 self._setup_character_translation()
146 self._log_name = self._clean_filename(log_name) # last check.
147 self._log_dir = self._create_log_dir(log_dir) # will not clobber.
148 self._subdir,self._timestamp = self._create_log_subdir(
149 self._log_dir, noclobber_log_subdir, timestamp)
151 def _setup_character_translation(self):
152 """Setup `._delete_chars` and `._trans_table` for `._clean_filename()`.
154 # generate lists of not-allowed characters
155 unaltered_chars = '-._' + string.digits + string.letters
156 mapped_pairs = {' ':'_'}
157 allowed_chars = unaltered_chars + ''.join(mapped_pairs.keys())
158 all_chars = string.maketrans('','')
159 self._delete_chars = all_chars.translate(all_chars, allowed_chars)
160 trans_from = ''.join(mapped_pairs.keys())
161 trans_to = ''.join(mapped_pairs.values())
162 # values in trans_to are in the same order as the keys in
163 # trans_from, since no modifications to mapped_pairs were made
164 # in between the two calls.
165 self._trans_table = string.maketrans(trans_from, trans_to)
167 def _clean_filename(self, filename):
168 """Remove troublesome characters from filenames.
170 This method only works on filenames, since it deletes '/'. If
171 you need it to work on full paths, use
172 `os.path.split(your_path)` and clean the portions separately.
181 >>> dl = DataLog(log_dir="test_clean_filename")
182 >>> dl._clean_filename('hi there')
184 >>> dl._clean_filename('hello\\tthe/castle')
186 >>> shutil.rmtree(dl._log_dir)
188 cleanname = filename.translate(self._trans_table, self._delete_chars)
191 def _create_log_dir(self, log_dir):
192 """Create a clean base log dir (if necessary).
197 Raw `log_dir` passed into `.__init__()`.
202 Normalized version of the input `log_dir`.
207 >>> dl = DataLog(log_dir='test_create_log_dir')
208 >>> shutil.rmtree(dl._log_dir)
210 log_dir = normalize_log_dir(log_dir)
211 if not os.path.exists(log_dir):
212 os.mkdir(log_dir, 0755)
215 def _create_log_subdir(self, log_dir, noclobber_log_subdir=False,
217 """Create a clean log dir for logging.
222 Normalized version of the input `log_dir`.
223 noclobber_log_subdir : bool
224 `noclobber_log_subdir` passed into `.__init__()`.
226 Overide default `timestamp` (%Y%m%d).
231 Path to the timestamped subdir of `log_dir`.
233 The timestamp used to generate `subdir`.
239 >>> dl = DataLog(log_dir='test_create_log_subdir',
240 ... timestamp='20101103')
241 >>> os.listdir(dl._log_dir)
243 >>> dl._create_log_subdir(dl._log_dir, noclobber_log_subdir=True,
244 ... timestamp=dl._timestamp)
245 Traceback (most recent call last):
247 ErrorDirExists: test_create_log_subdir/20101103 exists
248 >>> dl._create_log_subdir(dl._log_dir, noclobber_log_subdir=False,
249 ... timestamp=dl._timestamp)
250 ('test_create_log_subdir/20101103', '20101103')
251 >>> dl._create_log_subdir(dl._log_dir) # doctest: +ELLIPSIS
252 ('test_create_log_subdir/...', '...')
253 >>> shutil.rmtree(dl._log_dir)
255 if timestamp == None:
256 timestamp = time.strftime("%Y%m%d") # %H%M%S
257 subdir = os.path.join(log_dir, timestamp)
258 if os.path.exists(subdir):
259 if noclobber_log_subdir:
260 raise ErrorDirExists, "%s exists" % subdir
262 os.mkdir(subdir, 0755)
263 return (subdir, timestamp)
265 def _get_filename(self, timestamp=None):
266 """Get a filename for a new data log for `.write()`.
268 Append integers as necessary to avoid clobbering. Note that
269 the appended integers are *not* thread-safe. You need to
270 actually create the file to reserve the name.
275 Normalized version of the input `log_dir`.
276 noclobber_log_subdir : bool
277 `noclobber_log_subdir` passed into `.__init__()`.
279 Overide default `timestamp` (%Y%m%d%H%M%S).
284 Path to the timestamped log file.
286 The timestamp used to generate `subdir`.
291 >>> dl = DataLog(log_dir='test_get_filename',
292 ... log_name='my-log', timestamp='20101103')
293 >>> f,t = dl._get_filename('20100103235959')
295 'test_get_filename/20101103/20100103235959_my-log'
298 >>> open(f, 'w').write('dummy content')
299 >>> f,t = dl._get_filename('20100103235959')
301 'test_get_filename/20101103/20100103235959_my-log_1'
304 >>> open(f, 'w').write('dummy content')
305 >>> f,t = dl._get_filename('20100103235959')
307 'test_get_filename/20101103/20100103235959_my-log_2'
310 >>> dl._get_filename() # doctest: +ELLIPSIS
311 ('test_get_filename/20101103/..._my-log', '...')
312 >>> shutil.rmtree(dl._log_dir)
314 if timestamp == None:
315 timestamp = time.strftime("%Y%m%d%H%M%S")
316 filename = "%s_%s" % (timestamp, self._log_name)
317 fullname = os.path.join(self._subdir, filename)
320 while os.path.exists(filepath):
321 filepath = "%s_%d" % (fullname, i)
323 return (filepath, timestamp)
325 def write(self, obj, timestamp=None):
326 """Save object to a timestamped file with `cPickle`.
333 Passed on to `._get_filename()`.
338 Path to the timestamped log file.
340 The timestamp used to generate the log file.
345 >>> dl = DataLog(log_dir='test_write',
346 ... log_name='my-log', timestamp='20101103')
347 >>> f,t = dl.write([1, 2, 3])
348 >>> a = pickle.load(open(f, 'rb'))
351 >>> shutil.rmtree(dl._log_dir)
353 filepath, timestamp = self._get_filename(timestamp)
354 with open(filepath, 'wb') as fd:
355 os.chmod(filepath, 0644)
357 return (filepath, timestamp)
359 def write_binary(self, binary_string, timestamp=None):
360 """Save a binary string to a timestamped file.
364 binary_string : buffer
365 Binary string to save.
367 Passed on to `._get_filename()`.
372 Path to the timestamped log file.
374 The timestamp used to generate the log file(s).
380 >>> dl = DataLog(log_dir='test_write_binary',
381 ... log_name='my-log', timestamp='20101103')
382 >>> data = numpy.arange(5, dtype=numpy.uint16)
383 >>> filepath,ts = dl.write_binary(data.tostring())
384 >>> data_in = numpy.fromfile(filepath, dtype=numpy.uint16, count=-1)
386 array([0, 1, 2, 3, 4], dtype=uint16)
387 >>> (data == data_in).all()
389 >>> shutil.rmtree(dl._log_dir)
391 filepath, timestamp = self._get_filename(timestamp)
392 # open a new file in readonly mode, don't clobber.
393 fd = os.open(filepath, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0644)
395 bytes_remaining = len(binary_string)
396 while bytes_remaining > 0:
397 bw = os.write(fd, binary_string[bytes_written:])
399 bytes_remaining -= bw
401 return (filepath, timestamp)
403 def _write_dict_of_arrays(self, d, base_filepath):
404 """Save dict of (string, numpy_array) pairs under `base_filepath`.
411 Path for table of contents and from which per-pair paths
414 # open a new file in readonly mode, don't clobber.
415 bfd = open(base_filepath, 'w', 0644)
416 bfd.write("Contents (key : file-extension : format):\n")
418 clean_key = self._clean_filename(key)
419 bfd.write("%s : %s : %s\n" % (key, clean_key, str(d[key].dtype)))
420 # write the keyed array to it's own file
421 filepath = "%s_%s" % (base_filepath, clean_key)
422 d[key].tofile(filepath)
425 def write_dict_of_arrays(self, d, timestamp=None):
426 """Save dict of (string, numpy_array) pairs to timestamped files.
433 Passed on to `._get_filename()`.
438 Path to the timestamped log file.
440 The timestamp used to generate the log file(s).
447 >>> dl = DataLog(log_dir='test_write_dict_of_arrays',
448 ... log_name='my-log', timestamp='20101103')
449 >>> d = {'data1':numpy.arange(5, dtype=numpy.int16),
450 ... 'd\/at:$a 2':numpy.arange(3, dtype=numpy.float64)}
451 >>> filepath,ts = dl.write_dict_of_arrays(
452 ... d, timestamp='20101103235959')
454 'test_write_dict_of_arrays/20101103/20101103235959_my-log'
455 >>> print '\\n'.join(sorted(os.listdir(dl._subdir)))
456 20101103235959_my-log
457 20101103235959_my-log_data1
458 20101103235959_my-log_data_2
459 >>> contents = open(filepath, 'r').read()
461 Contents (key : file-extension : format):
462 data1 : data1 : int16
463 d\/at:$a 2 : data_2 : float64
465 >>> data1_in = numpy.fromfile(
466 ... filepath+'_data1', dtype=numpy.int16, count=-1)
468 array([0, 1, 2, 3, 4], dtype=int16)
469 >>> data2_in = numpy.fromfile(
470 ... filepath+'_data_2', dtype=numpy.float64, count=-1)
473 >>> shutil.rmtree(dl._log_dir)
475 base_filepath,timestamp = self._get_filename(timestamp)
476 self._write_dict_of_arrays(d, base_filepath)
477 return (base_filepath, timestamp)
480 class DataLoad (object):
481 """Load data logged by `DataLog`.
483 def read(self, filename):
484 """Load an object saved with `DataLog.write()`.
489 `filename` returned by `DataLog.write()`.
499 >>> dl = DataLog(log_dir='test_read',
500 ... log_name='my-log', timestamp='20101103')
501 >>> f,t = dl.write([1, 2, 3])
502 >>> load = DataLoad()
506 >>> shutil.rmtree(dl._log_dir)
508 return pickle.load(open(filename, 'rb'))
510 def read_binary(self, filename):
511 """Load an object saved with `DataLog.write_binary()`.
513 Warning: this method *requires* `filename` to end with
514 `_float` and *assumes* that the file contains `numpy.float`
515 data. That is terrible. Use `h5py` instead of this module!
520 `filename` returned by `DataLog.write_binary()`.
531 >>> dl = DataLog(log_dir='test_read_binary',
532 ... log_name='my-log_float', timestamp='20101103')
533 >>> f,t = dl.write_binary(numpy.array([1, 2, 3], dtype=numpy.float))
534 >>> load = DataLoad()
535 >>> d = load.read_binary(f)
538 >>> shutil.rmtree(dl._log_dir)
540 type_ = filename.split("_")[-1]
545 "read_binary() not implemented for type %s" % (type_))
546 return numpy.fromfile(filename, dtype=t)
548 def read_dict_of_arrays(self, basefile):
549 """Load an object saved with `DataLog.write_dict_of_arrays()`.
551 The filenames must not have been altered.
556 `filename` returned by `DataLog.write_dict_of_arrays()`.
568 >>> dl = DataLog(log_dir='test_read_dict_of_arrays',
569 ... log_name='my-log', timestamp='20101103')
570 >>> d = {'data1':numpy.arange(5, dtype=numpy.int16),
571 ... 'd\/at:$a 2':numpy.arange(3, dtype=numpy.float64)}
572 >>> f,t = dl.write_dict_of_arrays(d, timestamp='20101103235959')
573 >>> load = DataLoad()
574 >>> d = load.read_dict_of_arrays(f)
576 {'d\\\\/at:$a 2': array([ 0., 1., 2.]),
577 'data1': array([0, 1, 2, 3, 4], dtype=int16)}
578 >>> shutil.rmtree(dl._log_dir)
582 realbasefile = os.path.realpath(basefile)
583 for line in file(realbasefile):
584 if i > 0 : # ignore first line
585 ldata = line.split(' : ')
587 fpath = "%s_%s" % (realbasefile, ldata[1])
588 type_ = getattr(numpy, ldata[2].strip())
589 obj[name] = numpy.fromfile(fpath, dtype=type_)
598 result = doctest.testmod()
599 sys.exit(min(result.failed, 127))
601 if __name__ == "__main__":