From d6fc6e0d89b3580c2fab78b36d880a3f5bfc9081 Mon Sep 17 00:00:00 2001 From: Mark Walters Date: Mon, 2 May 2016 08:37:46 +0100 Subject: [PATCH] Re: [PATCH v1 1/2] emacs: Observe the charset of MIME parts when reading them. --- 9d/2c898db4a08fc9021667dc86e83d931ea6de9c | 146 ++++++++++++++++++++++ 1 file changed, 146 insertions(+) create mode 100644 9d/2c898db4a08fc9021667dc86e83d931ea6de9c diff --git a/9d/2c898db4a08fc9021667dc86e83d931ea6de9c b/9d/2c898db4a08fc9021667dc86e83d931ea6de9c new file mode 100644 index 000000000..a60f07849 --- /dev/null +++ b/9d/2c898db4a08fc9021667dc86e83d931ea6de9c @@ -0,0 +1,146 @@ +Return-Path: +X-Original-To: notmuch@notmuchmail.org +Delivered-To: notmuch@notmuchmail.org +Received: from localhost (localhost [127.0.0.1]) + by arlo.cworth.org (Postfix) with ESMTP id E98366DE01BE + for ; Mon, 2 May 2016 00:38:01 -0700 (PDT) +X-Virus-Scanned: Debian amavisd-new at cworth.org +X-Spam-Flag: NO +X-Spam-Score: -0.306 +X-Spam-Level: +X-Spam-Status: No, score=-0.306 tagged_above=-999 required=5 tests=[AWL=0.264, + DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, + FREEMAIL_ENVFROM_END_DIGIT=0.25, FREEMAIL_FROM=0.001, RCVD_IN_DNSWL_LOW=-0.7, + RCVD_IN_MSPIKE_H3=-0.01, RCVD_IN_MSPIKE_WL=-0.01, SPF_PASS=-0.001] + autolearn=disabled +Received: from arlo.cworth.org ([127.0.0.1]) + by localhost (arlo.cworth.org [127.0.0.1]) (amavisd-new, port 10024) + with ESMTP id ng3Vavm_NEhg for ; + Mon, 2 May 2016 00:37:51 -0700 (PDT) +Received: from mail-wm0-f66.google.com (mail-wm0-f66.google.com + [74.125.82.66]) by arlo.cworth.org (Postfix) with ESMTPS id B216C6DE00F5 for + ; Mon, 2 May 2016 00:37:50 -0700 (PDT) +Received: by mail-wm0-f66.google.com with SMTP id n129so15966730wmn.1 + for ; Mon, 02 May 2016 00:37:50 -0700 (PDT) +DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20120113; + h=from:to:subject:in-reply-to:references:user-agent:date:message-id + :mime-version; bh=jyUvm/jh9U3EesS14XXW3DOsiSjy9K4/efHfPNiw2M0=; + b=BA2KHN2n3lRFfAUh2KNR0ai44crcfRTavMDuN1cQOYoxRko6SsJPspOVOxb6dhNi6m + O/EAh4g9PcD3JPvfspxwUPC3s5G0gFZXa3iilHRnQ+9nic6SssCMTxtwi/OUwqIsKGsJ + iv1HILx+AFtzNXp+ekEevrBHHulx7blCHByg1Jcf6iM1MCRY+RfDiip7AAVLRdPDN2i3 + z1wy0eIHdv66++lsxDjf86kljlqaUaM+jPi9T+bouoJ+NSfdgVcFQmRYmN1oWdeZUowU + lZ0oDyY0tdvNwi9z99FG5mCog6twKmKPwIxv0lUPSFtNUU0jf1HkhWzSZf4w5WJ66d0u + JeMw== +X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=1e100.net; s=20130820; + h=x-gm-message-state:from:to:subject:in-reply-to:references + :user-agent:date:message-id:mime-version; + bh=jyUvm/jh9U3EesS14XXW3DOsiSjy9K4/efHfPNiw2M0=; + b=g6aLXZZZhBeLi93cmfrWHTlEwRXwhuut2SoXD4FyTEJ2iMkad7dWEin18NSng7eFrC + 0jJKzdxns/HJz3PQ0pPcMfQgR0tXW5bXlfNQnpschvBOXsdbp+BXWb6T847lHjrA2VC1 + +05OmMRw9ZLXhK/xWq1lkDnlN9aM7uoqPcekkgtSeb8OVCux0SOJJesE3fit2HTYedC8 + JSN3bVUamXz47FzyxSionprXUfOGBN0R8OXM/pNg0x75zPYiIG9bIItyoOVAHbpbgKP7 + gBQb/YXqi4Av+r2ivMckugb6Lj8QK60Z5lamZlWcX0+FFTsduaD3+a4k69jC0VKp3sYI + z3FA== +X-Gm-Message-State: + AOPr4FVSIBLECRL/61jKkHKqyun9mUD6ZWRN1TYtXLS/RkDaF/XBAj95PWDo7oe0BAaDzg== +X-Received: by 10.194.10.162 with SMTP id j2mr34405240wjb.72.1462174668681; + Mon, 02 May 2016 00:37:48 -0700 (PDT) +Received: from localhost (5751dfa2.skybroadband.com. [87.81.223.162]) + by smtp.gmail.com with ESMTPSA id y70sm17293483wmd.3.2016.05.02.00.37.47 + (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); + Mon, 02 May 2016 00:37:47 -0700 (PDT) +From: Mark Walters +To: David Edmondson , notmuch@notmuchmail.org +Subject: Re: [PATCH v1 1/2] emacs: Observe the charset of MIME parts when + reading them. +In-Reply-To: <1461999108-68582-2-git-send-email-dme@dme.org> +References: <1461999108-68582-1-git-send-email-dme@dme.org> + <1461999108-68582-2-git-send-email-dme@dme.org> +User-Agent: Notmuch/0.22~rc1+2~g56141bf (http://notmuchmail.org) Emacs/24.4.1 + (x86_64-pc-linux-gnu) +Date: Mon, 02 May 2016 08:37:46 +0100 +Message-ID: <877ffc9agl.fsf@qmul.ac.uk> +MIME-Version: 1.0 +Content-Type: text/plain +X-BeenThere: notmuch@notmuchmail.org +X-Mailman-Version: 2.1.20 +Precedence: list +List-Id: "Use and development of the notmuch mail system." + +List-Unsubscribe: , + +List-Archive: +List-Post: +List-Help: +List-Subscribe: , + +X-List-Received-Date: Mon, 02 May 2016 07:38:02 -0000 + + +On Sat, 30 Apr 2016, David Edmondson wrote: +> `notmuch--get-bodypart-raw' previously assumed that all non-binary MIME +> parts could be successfully read by assuming that they were UTF-8 +> encoded. This was demonstrated to be wrong, specifically when a part was +> marked as ISO8859-1 and included accented characters (which were +> incorrectly rendered as a result). +> +> Rather than assuming UTF-8, attempt to use the part's declared charset +> when reading it, falling back to US-ASCII if the declared charset is +> unknown, unsupported or invalid. + +As this seemed hard to test (if I understand the bug correctly it didn't +show up on my test of the entire of the entire performance corpus -- of +course my testing could have been wrong) would it be possible to add a test +for it? + +Best wishes + +Mark + + +> --- +> emacs/notmuch-lib.el | 16 +++++++++++++++- +> 1 file changed, 15 insertions(+), 1 deletion(-) +> +> diff --git a/emacs/notmuch-lib.el b/emacs/notmuch-lib.el +> index 78978ee..f05ded6 100644 +> --- a/emacs/notmuch-lib.el +> +++ b/emacs/notmuch-lib.el +> @@ -23,6 +23,7 @@ +> +> ;;; Code: +> +> +(require 'mm-util) +> (require 'mm-view) +> (require 'mm-decode) +> (require 'cl) +> @@ -572,7 +573,20 @@ the given type." +> ,@(when process-crypto '("--decrypt")) +> ,(notmuch-id-to-query (plist-get msg :id)))) +> (coding-system-for-read +> - (if binaryp 'no-conversion 'utf-8))) +> + (if binaryp 'no-conversion +> + (let ((coding-system (mm-charset-to-coding-system +> + (plist-get part :content-charset)))) +> + ;; Sadly, +> + ;; `mm-charset-to-coding-system' seems +> + ;; to return things that are not +> + ;; considered acceptable values for +> + ;; `coding-system-for-read'. +> + (if (coding-system-p coding-system) +> + coding-system +> + ;; RFC 2047 says that the default +> + ;; charset is US-ASCII. RFC6657 +> + ;; complicates this somewhat. +> + 'us-ascii))))) +> (apply #'call-process notmuch-command nil '(t nil) nil args) +> (buffer-string)))))) +> (when (and cache data) +> -- +> 2.7.1 +> +> _______________________________________________ +> notmuch mailing list +> notmuch@notmuchmail.org +> https://notmuchmail.org/mailman/listinfo/notmuch -- 2.26.2