@type error_msg: String
"""
- self.resource
+ self.resource = resource
+ self.error_msg = error_msg
def __str__(self):
return "Failed while loading resource: %s, error was: %s" % (
- resource, error_msg)
+ self.resource, self.error_msg)
+
def RecursiveFileLoader(filename):
"""
"""
if os.path.isdir(filename):
- for root, dirs, files in os.walk(self.fname):
+ for root, dirs, files in os.walk(filename):
if 'CVS' in dirs:
dirs.remove('CVS')
- files = filter(files, startswith('.'))
- files = filter(files, endswith('~'))
- for file in files:
- yield file
+ files = filter(files, str.startswith('.'))
+ files = filter(files, str.endswith('~'))
+ for f in files:
+ yield f
else:
yield filename
+
class DataLoader(object):
def __init__(self, validator):
"""
Function to do the actual work of a Loader
"""
- pass
+ raise NotImplementedError("Please override in a subclass")
+
+
+class FileLoader(DataLoader):
+ """ Class to access data in files """
+
+ def __init__(self, filename, validator):
+ """
+ Args:
+ filename : Name of file or directory to open
+ validator : class with validate() method to validate data.
+ """
+ DataLoader.__init__(self, validator)
+ self.fname = filename
+
+ def load(self):
+ """
+ Return the {source: {key: value}} pairs from a file
+ Return the {source: [list of errors] from a load
+
+ @param recursive: If set and self.fname is a directory;
+ load all files in self.fname
+ @type: Boolean
+ @rtype: tuple
+ @returns:
+ Returns (data,errors), both may be empty dicts or populated.
+ """
+ data = {}
+ errors = {}
+ # I tried to save a nasty lookup on lineparser by doing the lookup
+ # once, which may be expensive due to digging in child classes.
+ func = self.lineParser
+ for fn in RecursiveFileLoader(self.fname):
+ f = open(fn, 'rb')
+ for line_num, line in enumerate(f):
+ func(line, line_num, data, errors)
+ return (data, errors)
+
+ def lineParser(self, line, line_num, data, errors):
+ """ This function parses 1 line at a time
+ Args:
+ line: a string representing 1 line of a file
+ line_num: an integer representing what line we are processing
+ data: a dict that contains the data we have extracted from the file
+ already
+ errors: a dict representing parse errors.
+ Returns:
+ Nothing (None). Writes to data and errors
+ """
+ raise NotImplementedError("Please over-ride this in a child class")
-class ItemFileLoader(DataLoader):
+class ItemFileLoader(FileLoader):
"""
Class to load data from a file full of items one per line
Note that due to the data store being a dict, duplicates
are removed.
"""
-
- _recursive = False
def __init__(self, filename, validator):
- DataLoader.__init__(self, validator)
- self.fname = filename
+ FileLoader.__init__(self, filename, validator)
- def load(self):
- data = {}
- errors = {}
- for file in RecursiveFileLoader(self.fname):
- f = open(file, 'rb')
- for line_num, line in enumerate(f):
- if line.startswith('#'):
- continue
- split = line.strip().split()
- if not len(split):
- errors.setdefault(self.fname,[]).append(
- "Malformed data at line: %s, data: %s"
- % (line_num + 1, split))
- key = split[0]
- if not self._validator.validate(key):
- errors.setdefault(self.fname,[]).append(
- "Validation failed at line: %s, data %s"
- % (line_num + 1, split))
- continue
- data[key] = None
- return (data, errors)
-
-class KeyListFileLoader(DataLoader):
+ def lineParser(self, line, line_num, data, errors):
+ line = line.strip()
+ if line.startswith('#'): # Skip commented lines
+ return
+ if not len(line): # skip empty lines
+ return
+ split = line.split()
+ if not len(split):
+ errors.setdefault(self.fname, []).append(
+ "Malformed data at line: %s, data: %s"
+ % (line_num + 1, line))
+ return
+ key = split[0]
+ if not self._validator.validate(key):
+ errors.setdefault(self.fname, []).append(
+ "Validation failed at line: %s, data %s"
+ % (line_num + 1, key))
+ return
+ data[key] = None
+
+class KeyListFileLoader(FileLoader):
"""
Class to load data from a file full of key [list] tuples
{'key':['foo1','foo2','foo3']}
"""
- _recursive = False
-
def __init__(self, filename, validator):
- DataLoader.__init__(self, validator)
- self.fname = filename
-
- def load(self):
- data = {}
- errors = {}
- for file in RecursiveFileLoader(self.fname):
- f = open(file, 'rb')
- for line_num, line in enumerate(f):
- if line.startswith('#'):
- continue
- split = line.strip().split()
- if len(split) < 2:
- errors.setdefault(self.fname,[]).append(
- "Malformed data at line: %s, data: %s"
- % (line_num + 1, split))
- continue
- key = split[0]
- value = split[1:]
- if not self._validator.validate(key):
- errors.setdefault(self.fname,[]).append(
- "Validation failed at line: %s, data %s"
- % (line_num + 1, split))
- continue
- if key in data:
- data[key].append(value)
- else:
- data[key] = value
- return (data, errors)
-
-class KeyValuePairFileLoader(DataLoader):
+ FileLoader.__init__(self, filename, validator)
+
+
+ def lineParser(self, line, line_num, data, errors):
+ line = line.strip()
+ if line.startswith('#'): # Skip commented lines
+ return
+ if not len(line): # skip empty lines
+ return
+ split = line.split()
+ if len(split) < 2:
+ errors.setdefault(self.fname, []).append(
+ "Malformed data at line: %s, data: %s"
+ % (line_num + 1, line))
+ return
+ key = split[0]
+ value = split[1:]
+ if not self._validator.validate(key):
+ errors.setdefault(self.fname, []).append(
+ "Validation failed at line: %s, data %s"
+ % (line_num + 1, key))
+ return
+ if key in data:
+ data[key].append(value)
+ else:
+ data[key] = value
+
+
+class KeyValuePairFileLoader(FileLoader):
"""
Class to load data from a file full of key=value pairs
'foo':'bar'}
"""
- _recursive = False
-
def __init__(self, filename, validator):
- DataLoader.__init__(self, validator)
- self.fname = filename
+ FileLoader.__init__(self, filename, validator)
+
+
+ def lineParser(self, line, line_num, data, errors):
+ line = line.strip()
+ if line.startswith('#'): # skip commented lines
+ return
+ if not len(line): # skip empty lines
+ return
+ split = line.split('=')
+ if len(split) < 2:
+ errors.setdefault(self.fname, []).append(
+ "Malformed data at line: %s, data %s"
+ % (line_num + 1, line))
+ return
+ key = split[0]
+ value = split[1:]
+ if not self._validator.validate(key):
+ errors.setdefault(self.fname, []).append(
+ "Validation failed at line: %s, data %s"
+ % (line_num + 1, key))
+ return
+ if key in data:
+ data[key].append(value)
+ else:
+ data[key] = value
- def load(self):
- """
- Return the {source: {key: value}} pairs from a file
- Return the {source: [list of errors] from a load
-
- @param recursive: If set and self.fname is a directory;
- load all files in self.fname
- @type: Boolean
- @rtype: tuple
- @returns:
- Returns (data,errors), both may be empty dicts or populated.
- """
-
- DataLoader.load(self)
- data = {}
- errors = {}
- for file in RecursiveFileLoader(self.fname):
- f = open(file, 'rb')
- for line_num, line in enumerate(f):
- if line.startswith('#'):
- continue
- split = line.strip().split('=')
- if len(split) < 2:
- errors.setdefault(self.fname, []).append(
- "Malformed data at line: %s, data %s"
- % (line_num + 1, split))
- key = split[0]
- value = split[1:]
- if not self._validator.validate(key):
- errors.setdefault(self.fname, []).append(
- "Validation failed at line: %s, data %s"
- % (line_num + 1, split))
- continue
- if key in data:
- data[key].append(value)
- else:
- data[key] = value
- return (data, errors)