Reunite UTF-8 hack comment with sys.setdefaultencoding call it labels.
[cookbook.git] / cookbook / mom.py
1 # Copyright (C) 2010 W. Trevor King <wking@drexel.edu>
2 #
3 # This file is part of Cookbook.
4 #
5 # Cookbook is free software: you can redistribute it and/or modify it
6 # under the terms of the GNU General Public License as published by the
7 # Free Software Foundation, either version 3 of the License, or (at your
8 # option) any later version.
9 #
10 # Cookbook is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 # GNU General Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License
16 # along with Cookbook.  If not, see <http://www.gnu.org/licenses/>.
17
18 """Hack parser for standardizing my Mom's cookbook text.
19 """
20
21 from .cookbook import (
22     Cookbook, Recipe, IngredientBlock, Ingredient, Amount, Directions)
23
24
25 class MomParser (object):
26     def parse(self, filename):
27         c = Cookbook()
28         consecutive_blanks = 100
29         recipe_lines = None
30         for line in file(filename, 'r'):
31             line = line.strip().decode('utf-8')
32             if line == '':
33                 if recipe_lines != None and consecutive_blanks == 0:
34                     recipe_lines.append('')
35                 consecutive_blanks += 1
36                 continue
37             if consecutive_blanks >= 2:
38                 if recipe_lines != None:
39                     c.append(self._parse_recipe(recipe_lines))
40                 recipe_lines = [line]
41             else:
42                 recipe_lines.append(line)
43             consecutive_blanks = 0
44         return c
45
46     def _parse_recipe(self, lines):
47         name = lines.pop(0)
48         yield_,author,source,url,lines = self._parse_yield_line(lines)
49         ingredient_blocks,lines = self._parse_ingredient_blocks(lines)
50         directions,lines = self._parse_directions(lines)
51         assert len(lines) == 0, lines
52         return Recipe(
53             name=name,
54             ingredient_blocks=ingredient_blocks,
55             directions=directions,
56             yield_=yield_,
57             author=author,
58             source=source,
59             url=url)
60
61     def _parse_yield_line(self, lines):
62         while len(lines) > 0 and lines[0] == '':
63             lines.pop(0)
64         fields = ['yield', 'from', 'source', 'url']
65         yield_ = author = source = url = None
66         matching_line = False
67         for field in fields:
68             if field in lines[0].lower():
69                 matching_line = True
70                 break
71         if matching_line == True:
72             bits = lines.pop(0).split('\t')
73             for bit in bits:
74                 for field in fields:
75                     if bit.lower().startswith(field+':'):
76                         value = bit[len(field+':'):].strip()
77                         if field == 'yield':
78                             yield_ = value.replace('Serving', 'serving')
79                         elif field == 'from':
80                             author = value
81                         elif field == 'source':
82                             source = value
83                         elif field == 'url':
84                             url = value
85                         break
86         return (yield_, author, source, url, lines)
87
88     def _parse_ingredient_blocks(self, lines):
89         ingredient_blocks = []
90         first_block = True
91         while True:
92             while len(lines) > 0 and lines[0] == '': # scroll past blanks
93                 lines.pop(0)
94             if (len(lines) == 0
95                 or not (first_block == True
96                         or lines[0].endswith(':'))):
97                 break
98             if lines[0].endswith(':'):
99                 line = lines.pop(0)
100                 name = line[:-1].strip()
101             else:
102                 name = None
103             block = IngredientBlock(name)
104             while len(lines) > 0 and lines[0] != '':
105                 block.append(self._parse_ingredient_line(lines.pop(0)))
106             ingredient_blocks.append(block)
107             first_block = False
108         return (ingredient_blocks, lines)
109
110     def _parse_ingredient_line(self, line):
111         if line.lower().startswith('1 red'):
112             line = '1 # red'+line[len('1 red'):]
113         try:
114             value,units,name = line.split(' ', 2)
115         except ValueError:
116             print line,
117             raise
118         if units == '#':
119             units = None
120         elif units == 'Large':
121             units = 'large'
122         elif units == 'Cloves':
123             units = 'cloves'
124         return Ingredient(name, Amount(value, units))
125
126     def _parse_directions(self, lines):
127         directions = Directions()
128         paragraph = []
129         for line in lines:
130             if line == '':
131                 if len(paragraph) > 0:
132                     directions.append('\n'.join(paragraph))
133                     paragraph = []
134             else:
135                 paragraph.append(line)
136         return (directions, [])