From: Michal Sojka <sojkam1@fel.cvut.cz>
Date: Fri, 24 Feb 2012 07:36:22 +0000 (+0100)
Subject: Convert non-UTF-8 parts to UTF-8 before indexing them
X-Git-Tag: debian/0.12_rc1-1~20
X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=40edc971a82e236704216058591d4c7684f8058f;p=notmuch.git

Convert non-UTF-8 parts to UTF-8 before indexing them

This fixes a bug that didn't allow to search for non-ASCII words such
parts. The code here was copied from show_text_part_content(), because
the show command already does the needed conversion when showing the
message.
---

diff --git a/lib/index.cc b/lib/index.cc
index d8f8b2bf..e3777322 100644
--- a/lib/index.cc
+++ b/lib/index.cc
@@ -315,6 +315,7 @@ _index_mime_part (notmuch_message_t *message,
     GByteArray *byte_array;
     GMimeContentDisposition *disposition;
     char *body;
+    const char *charset;
 
     if (! part) {
 	fprintf (stderr, "Warning: Not indexing empty mime part.\n");
@@ -390,6 +391,20 @@ _index_mime_part (notmuch_message_t *message,
     g_mime_stream_filter_add (GMIME_STREAM_FILTER (filter),
 			      discard_uuencode_filter);
 
+    charset = g_mime_object_get_content_type_parameter (part, "charset");
+    if (charset) {
+	GMimeFilter *charset_filter;
+	charset_filter = g_mime_filter_charset_new (charset, "UTF-8");
+	/* This result can be NULL for things like "unknown-8bit".
+	 * Don't set a NULL filter as that makes GMime print
+	 * annoying assertion-failure messages on stderr. */
+	if (charset_filter) {
+	    g_mime_stream_filter_add (GMIME_STREAM_FILTER (filter),
+				      charset_filter);
+	    g_object_unref (charset_filter);
+	}
+    }
+
     wrapper = g_mime_part_get_content_object (GMIME_PART (part));
     if (wrapper)
 	g_mime_data_wrapper_write_to_stream (wrapper, filter);