Re: [PATCH] emacs: wash: make word-wrap bound message width
[notmuch-archives.git] / b9 / 8ef0e27dc2fbe5c478ffc7b522851867bbae90
1 Return-Path: <SRS0=SM3z=H4=kanru.info=kanru@srs.perfora.net>\r
2 X-Original-To: notmuch@notmuchmail.org\r
3 Delivered-To: notmuch@notmuchmail.org\r
4 Received: from localhost (localhost [127.0.0.1])\r
5         by olra.theworths.org (Postfix) with ESMTP id 70C7E431FBC\r
6         for <notmuch@notmuchmail.org>; Wed,  9 Dec 2009 23:01:41 -0800 (PST)\r
7 X-Virus-Scanned: Debian amavisd-new at olra.theworths.org\r
8 Received: from olra.theworths.org ([127.0.0.1])\r
9         by localhost (olra.theworths.org [127.0.0.1]) (amavisd-new, port 10024)\r
10         with ESMTP id K3UpuR28SnEB for <notmuch@notmuchmail.org>;\r
11         Wed,  9 Dec 2009 23:01:40 -0800 (PST)\r
12 Received: from mout.perfora.net (mout.perfora.net [74.208.4.195])\r
13         by olra.theworths.org (Postfix) with ESMTP id 1FA5D431FAE\r
14         for <notmuch@notmuchmail.org>; Wed,  9 Dec 2009 23:01:40 -0800 (PST)\r
15 Received-SPF: pass (mxus2: domain of kanru.info designates 67.220.217.187 as\r
16         permitted sender) client-ip=67.220.217.187;\r
17         envelope-from=kanru@kanru.info; helo=cp20.secserverpros.com; \r
18 Received: from cp20.secserverpros.com (cp20.secserverpros.com\r
19  [67.220.217.187])      by mx.perfora.net (node=mxus2) with ESMTP (Nemesis)     id\r
20  0MAwiS-1NAxXD0UVg-00A20H for notmuch@notmuchmail.org;  Thu, 10 Dec 2009\r
21  02:01:39 -0500\r
22 Received: from 61-228-148-113.dynamic.hinet.net ([61.228.148.113]\r
23         helo=kanru.info)\r
24         by cp20.secserverpros.com with esmtps (TLSv1:AES256-SHA:256)\r
25         (Exim 4.69) (envelope-from <kanru@kanru.info>) id 1NId1s-0001y9-3n\r
26         for notmuch@notmuchmail.org; Thu, 10 Dec 2009 07:01:37 +0000\r
27 Received: from kanru (uid 1000) (envelope-from kanru@kanru.info) id 2269\r
28         by kanru.info (DragonFly Mail Agent) Thu, 10 Dec 2009 15:00:45 +0800\r
29 From: Kan-Ru Chen <kanru@kanru.info>\r
30 To: notmuch <notmuch@notmuchmail.org>\r
31 Date: Thu, 10 Dec 2009 15:00:42 +0800\r
32 Message-ID: <87r5r3joth.fsf@anar.kanru.info>\r
33 MIME-Version: 1.0\r
34 Content-Type: multipart/signed; boundary="=-=-=";\r
35         micalg=pgp-sha256; protocol="application/pgp-signature"\r
36 X-ACL-Warn: {\r
37 X-AntiAbuse: This header was added to track abuse,\r
38         please include it with any abuse report\r
39 X-AntiAbuse: Primary Hostname - cp20.secserverpros.com\r
40 X-AntiAbuse: Original Domain - notmuchmail.org\r
41 X-AntiAbuse: Originator/Caller UID/GID - [47 12] / [47 12]\r
42 X-AntiAbuse: Sender Address Domain - kanru.info\r
43 X-Source: \r
44 X-Source-Args: \r
45 X-Source-Dir: \r
46 Subject: [notmuch] Patch for xapian defect #250\r
47 X-BeenThere: notmuch@notmuchmail.org\r
48 X-Mailman-Version: 2.1.12\r
49 Precedence: list\r
50 List-Id: "Use and development of the notmuch mail system."\r
51         <notmuch.notmuchmail.org>\r
52 List-Unsubscribe: <http://notmuchmail.org/mailman/options/notmuch>,\r
53         <mailto:notmuch-request@notmuchmail.org?subject=unsubscribe>\r
54 List-Archive: <http://notmuchmail.org/pipermail/notmuch>\r
55 List-Post: <mailto:notmuch@notmuchmail.org>\r
56 List-Help: <mailto:notmuch-request@notmuchmail.org?subject=help>\r
57 List-Subscribe: <http://notmuchmail.org/mailman/listinfo/notmuch>,\r
58         <mailto:notmuch-request@notmuchmail.org?subject=subscribe>\r
59 X-List-Received-Date: Thu, 10 Dec 2009 07:01:41 -0000\r
60 \r
61 --=-=-=\r
62 Content-Transfer-Encoding: quoted-printable\r
63 \r
64 \r
65 The termlist is already sorted, so this is the patch trying to minimize\r
66 the modification of database as suggested in the comment and Carl's\r
67 TODO file.\r
68 \r
69 My poor profiling shows not much, but some improvement.\r
70 \r
71 *Before*\r
72 =20\r
73 % time notmuch tag +test id:hfntnu+gotv@eGroups.com=20=20=20\r
74 MOD_PLISTS: 368\r
75 notmuch tag +test id:hfntnu+gotv@eGroups.com  0.05s user 0.03s system 11% c=\r
76 pu 0.673 total\r
77 \r
78 % time notmuch tag -test id:hfntnu+gotv@eGroups.com=20=20=20\r
79 MOD_PLISTS: 368\r
80 notmuch tag -test id:hfntnu+gotv@eGroups.com  0.06s user 0.01s system 10% c=\r
81 pu 0.681 total\r
82 =20\r
83 *After*\r
84 =20\r
85 % time notmuch tag +test id:hfntnu+gotv@eGroups.com=20=20=20=20=20=20=20=20=\r
86 =20=20=20=20=20=20=20=20\r
87 MOD_PLIST: 1\r
88 notmuch tag +test id:hfntnu+gotv@eGroups.com  0.01s user 0.02s system 6% cp=\r
89 u 0.436 total\r
90 \r
91 % time notmuch tag -test id:hfntnu+gotv@eGroups.com=20=20=20=20=20=20=20=20=\r
92 =20=20=20=20=20=20=20=20\r
93 MOD_PLIST: 1\r
94 notmuch tag -test id:hfntnu+gotv@eGroups.com  0.01s user 0.01s system 5% cp=\r
95 u 0.383 total\r
96 \r
97 \r
98 % time notmuch tag +test tag:notmuch\r
99 notmuch tag +test tag:notmuch  1.71s user 0.03s system 65% cpu 2.632 total\r
100 \r
101 % time notmuch tag -test tag:notmuch\r
102 notmuch tag -test tag:notmuch  1.61s user 0.02s system 73% cpu 2.204 total\r
103 \r
104 % notmuch count tag:notmuch\r
105 682\r
106 \r
107 =2D-- flint_database.cc 2009-12-08 13:34:24.790284881 +0800\r
108 +++ flint_database.cc   2009-12-10 14:22:14.493653956 +0800\r
109 @@ -1188,7 +1188,7 @@\r
110 =20\r
111         termlist.next();\r
112         while (!termlist.at_end()) {\r
113 =2D         string tname =3D termlist.get_termname();\r
114 +            string tname =3D termlist.get_termname();\r
115             position_table.delete_positionlist(did, tname);\r
116             termcount wdf =3D termlist.get_wdf();\r
117 =20\r
118 @@ -1278,20 +1278,50 @@\r
119         }\r
120 =20=20=20\r
121         if (!modifying || document.internal->terms_modified()) {\r
122 =2D         // FIXME - in the case where there is overlap between the new\r
123 =2D         // termlist and the old termlist, it would be better to compare the\r
124 =2D         // two lists, and make the minimum set of modifications required.\r
125 =2D         // This would lead to smaller changesets for replication, and\r
126 =2D         // probably be faster overall.\r
127 =2D\r
128 =2D         // First, add entries to remove the postings in the underlying reco=\r
129 rd.\r
130             Xapian::Internal::RefCntPtr<const FlintWritableDatabase> ptrtothis(th=\r
131 is);\r
132             FlintTermList termlist(ptrtothis, did);\r
133 +            Xapian::TermIterator term =3D document.termlist_begin();\r
134 +           Xapian::TermIterator term_end =3D document.termlist_end();\r
135 +            flint_doclen_t new_doclen =3D termlist.get_doclength();\r
136 +            string old_tname, new_tname;\r
137 +=20=20=20=20=20=20=20=20=20=20=20=20\r
138 +            total_length -=3D new_doclen;\r
139 +=20=20=20=20=20=20=20=20=20=20=20=20\r
140 +            termlist.next();\r
141 +            while (true) {\r
142 +              bool identical =3D false;\r
143 +              int cmp;\r
144 +              if (termlist.at_end() && term =3D=3D term_end)\r
145 +                break;\r
146 +              if (!termlist.at_end() && term !=3D term_end) {\r
147 +                old_tname =3D termlist.get_termname();\r
148 +                new_tname =3D *term;\r
149 +                cmp =3D old_tname.compare(new_tname);\r
150 +\r
151 +                // Check postlist to see whether they are identical\r
152 +                if (cmp =3D=3D 0) {\r
153 +                  int new_count =3D term.positionlist_count();\r
154 +                  int old_count =3D termlist.positionlist_count();\r
155 +                  if (old_count =3D=3D new_count) {\r
156 +                    PositionIterator it =3D term.positionlist_begin();\r
157 +                    PositionIterator it_end =3D term.positionlist_end();\r
158 +                    PositionIterator old =3D termlist.positionlist_begin();\r
159 +                    if (equal(it, it_end, old))\r
160 +                      identical =3D true;\r
161 +                  }\r
162 +                }\r
163 +              } else if (termlist.at_end()) {\r
164 +                cmp =3D 2;\r
165 +                new_tname =3D *term;\r
166 +              } else {\r
167 +                cmp =3D -2;\r
168 +                old_tname =3D termlist.get_termname();\r
169 +              }\r
170 =20\r
171 =2D         termlist.next();\r
172 =2D         while (!termlist.at_end()) {\r
173 =2D             string tname =3D termlist.get_termname();\r
174 +              if (cmp < 0) {\r
175 +                const string& tname =3D old_tname;\r
176                 termcount wdf =3D termlist.get_wdf();\r
177 +                new_doclen -=3D wdf;\r
178 =20\r
179                 map<string, pair<termcount_diff, termcount_diff> >::iterator i;\r
180                 i =3D freq_deltas.find(tname);\r
181 @@ -1318,58 +1348,62 @@\r
182                     // Modifying a document we added/modified since the last flush.\r
183                     k->second =3D make_pair('D', 0u);\r
184                 }\r
185 =2D\r
186 =2D             termlist.next();\r
187 =2D         }\r
188 =2D\r
189 =2D         total_length -=3D termlist.get_doclength();\r
190 =2D\r
191 =2D         flint_doclen_t new_doclen =3D 0;\r
192 =2D         Xapian::TermIterator term =3D document.termlist_begin();\r
193 =2D         Xapian::TermIterator term_end =3D document.termlist_end();\r
194 =2D         for ( ; term !=3D term_end; ++term) {\r
195 =2D             // Calculate the new document length\r
196 +              } else if (!identical) {\r
197 +                const string& tname =3D new_tname;\r
198                 termcount wdf =3D term.get_wdf();\r
199 =2D             new_doclen +=3D wdf;\r
200 =2D\r
201 =2D             string tname =3D *term;\r
202 =2D             if (tname.size() > MAX_SAFE_TERM_LENGTH)\r
203 =2D                 throw Xapian::InvalidArgumentError("Term too long (> "STRINGIZE(MA=\r
204 X_SAFE_TERM_LENGTH)"): " + tname);\r
205 =2D             map<string, pair<termcount_diff, termcount_diff> >::iterator i;\r
206 =2D             i =3D freq_deltas.find(tname);\r
207 =2D             if (i =3D=3D freq_deltas.end()) {\r
208 =2D                 freq_deltas.insert(make_pair(tname, make_pair(1, termcount_diff(wd=\r
209 f))));\r
210 =2D             } else {\r
211 =2D                 ++i->second.first;\r
212 =2D                 i->second.second +=3D wdf;\r
213 =2D             }\r
214 +                new_doclen +=3D wdf;\r
215 +=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20\r
216 +                if (cmp > 0) {\r
217 +                  if (tname.size() > MAX_SAFE_TERM_LENGTH)\r
218 +                    throw Xapian::InvalidArgumentError("Term too long (> "=\r
219 STRINGIZE(MAX_SAFE_TERM_LENGTH)"): " + tname);\r
220 +                  map<string, pair<termcount_diff, termcount_diff> >::iter=\r
221 ator i;\r
222 +                  i =3D freq_deltas.find(tname);\r
223 +                  if (i =3D=3D freq_deltas.end()) {\r
224 +                    freq_deltas.insert(make_pair(tname, make_pair(1, termc=\r
225 ount_diff(wdf))));\r
226 +                  } else {\r
227 +                    ++i->second.first;\r
228 +                    i->second.second +=3D wdf;\r
229 +                  }\r
230 +\r
231 +                  // Add did to tname's postlist\r
232 +                  map<string, map<docid, pair<char, termcount> > >::iterat=\r
233 or j;\r
234 +                  j =3D mod_plists.find(tname);\r
235 +                  if (j =3D=3D mod_plists.end()) {\r
236 +                    map<docid, pair<char, termcount> > m;\r
237 +                    j =3D mod_plists.insert(make_pair(tname, m)).first;\r
238 +                  }\r
239 +                  map<docid, pair<char, termcount> >::iterator k;\r
240 +                  k =3D j->second.find(did);\r
241 +                  if (k !=3D j->second.end()) {\r
242 +                    Assert(k->second.first =3D=3D 'D');\r
243 +                    k->second.first =3D 'M';\r
244 +                    k->second.second =3D wdf;\r
245 +                  } else {\r
246 +                    j->second.insert(make_pair(did, make_pair('A', wdf)));\r
247 +                  }\r
248 +                }\r
249 +\r
250 +                PositionIterator it =3D term.positionlist_begin();\r
251 +                PositionIterator it_end =3D term.positionlist_end();\r
252 +                if (it !=3D it_end) {\r
253 +                  position_table.set_positionlist(\r
254 +                                                  did, tname, it, it_end);\r
255 +                } else {\r
256 +                  position_table.delete_positionlist(did, tname);\r
257 +                }\r
258 +              }\r
259 +              if (termlist.at_end())\r
260 +                ++term;\r
261 +              else if (term =3D=3D term_end)\r
262 +                termlist.next();\r
263 +              else {\r
264 +                if (cmp >=3D 0)\r
265 +                  ++term;\r
266 +                if (cmp <=3D 0)\r
267 +                  termlist.next();\r
268 +              }\r
269 +            }\r
270 =20\r
271 =2D             // Add did to tname's postlist\r
272 =2D             map<string, map<docid, pair<char, termcount> > >::iterator j;\r
273 =2D             j =3D mod_plists.find(tname);\r
274 =2D             if (j =3D=3D mod_plists.end()) {\r
275 =2D                 map<docid, pair<char, termcount> > m;\r
276 =2D                 j =3D mod_plists.insert(make_pair(tname, m)).first;\r
277 =2D             }\r
278 =2D             map<docid, pair<char, termcount> >::iterator k;\r
279 =2D             k =3D j->second.find(did);\r
280 =2D             if (k !=3D j->second.end()) {\r
281 =2D                 Assert(k->second.first =3D=3D 'D');\r
282 =2D                 k->second.first =3D 'M';\r
283 =2D                 k->second.second =3D wdf;\r
284 =2D             } else {\r
285 =2D                 j->second.insert(make_pair(did, make_pair('A', wdf)));\r
286 =2D             }\r
287 =2D\r
288 =2D             PositionIterator it =3D term.positionlist_begin();\r
289 =2D             PositionIterator it_end =3D term.positionlist_end();\r
290 =2D             if (it !=3D it_end) {\r
291 =2D                 position_table.set_positionlist(\r
292 =2D                     did, tname, it, it_end);\r
293 =2D             } else {\r
294 =2D                 position_table.delete_positionlist(did, tname);\r
295 =2D             }\r
296 =2D         }\r
297             LOGLINE(DB, "Calculated doclen for replacement document " << did << "=\r
298  as " << new_doclen);\r
299 =20\r
300             // Set the termlist\r
301 \r
302 \r
303 =2D-=20\r
304 Kan-Ru Chen | http://kanru.info\r
305 \r
306 Q: Why are my replies five sentences or less?\r
307 A: http://five.sentenc.es/\r
308 \r
309 --=-=-=\r
310 Content-Type: application/pgp-signature\r
311 \r
312 -----BEGIN PGP SIGNATURE-----\r
313 Version: GnuPG v2.0.13 (GNU/Linux)\r
314 \r
315 iQIcBAEBCAAGBQJLIJydAAoJEBsTLgHOxq1G07kP/0CkSSK7vOdEM/vComrNqMGJ\r
316 wh8XcfZrKqsl+irunqSUKG4g1EDHRfWMheeJYggSzyZvZB4uVCEJkUnHMtIqVb9b\r
317 olzCOgqqYQL0ImbX06RktX6lF8U3vCkt+xthX2poyn6/wxnwQoBh1uj52mbIaLXo\r
318 BIyFBK7rNyazD/5+i/OXIB4wUjUXT6xTWt7J9DVK4CPqhS7mxHvdzV8ZAcw8lAOp\r
319 w46Nr/OqjoBQbHcIz0rw3ZTxl7VCMxXT/NDam6fR2bNgJR+/klbEDAhwwzWw2yMp\r
320 meWZAzrRGm3tCfD5tirjOAf+CxD/3wY85ThfC1RZt62u0WEv/HzFVFONmGp09lb2\r
321 sgaELQOO5yTOpaL6A/bfTmv1bn49elCgcuc1EbgToU5rcjXg1D+5foWQQP5hj/hU\r
322 xY4mEEa2AfCRCNDEKkSmdi+7v7QBjJudP52twyi2mdZW0M3xdCrX3twKtS7taDC3\r
323 FUZ30tceNxf9w26jlQRRXV/jtfi826JR9Yx4E4X25jAABmBDAGq5XHnPU9y9HRzF\r
324 ykwTWM9DH+yjsTffyFLb+Ce204ihNKBA6ldyDkIYPYDc9YtLow+THCdbguQm2fCU\r
325 JAkykYkYBpI1sZvCavIq31CCM4uxD29F6SKjm3yXG2Zlfyu2M0aq6swZMGF0WvVM\r
326 6n1Y9+6DgjsdI32dJ8h8\r
327 =jjvD\r
328 -----END PGP SIGNATURE-----\r
329 --=-=-=--\r