From 40cd474b86a6af48fcda426f0cce72dba21e0fd8 Mon Sep 17 00:00:00 2001 From: David Edmondson Date: Sat, 30 Apr 2016 07:51:47 +0100 Subject: [PATCH] [PATCH v1 1/2] emacs: Observe the charset of MIME parts when reading them. --- 3c/d77137e8331a2fb4435388e1a619a6a545d3eb | 130 ++++++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100644 3c/d77137e8331a2fb4435388e1a619a6a545d3eb diff --git a/3c/d77137e8331a2fb4435388e1a619a6a545d3eb b/3c/d77137e8331a2fb4435388e1a619a6a545d3eb new file mode 100644 index 000000000..0e5f2d56a --- /dev/null +++ b/3c/d77137e8331a2fb4435388e1a619a6a545d3eb @@ -0,0 +1,130 @@ +Return-Path: +X-Original-To: notmuch@notmuchmail.org +Delivered-To: notmuch@notmuchmail.org +Received: from localhost (localhost [127.0.0.1]) + by arlo.cworth.org (Postfix) with ESMTP id 415696DE035C + for ; Fri, 29 Apr 2016 23:52:02 -0700 (PDT) +X-Virus-Scanned: Debian amavisd-new at cworth.org +X-Spam-Flag: NO +X-Spam-Score: 0.231 +X-Spam-Level: +X-Spam-Status: No, score=0.231 tagged_above=-999 required=5 tests=[AWL=0.298, + DKIM_SIGNED=0.1, DKIM_VALID=-0.1, RCVD_IN_DNSWL_LOW=-0.7, + RCVD_IN_MSPIKE_H3=-0.01, RCVD_IN_MSPIKE_WL=-0.01, SPF_NEUTRAL=0.652, + UNPARSEABLE_RELAY=0.001] autolearn=disabled +Received: from arlo.cworth.org ([127.0.0.1]) + by localhost (arlo.cworth.org [127.0.0.1]) (amavisd-new, port 10024) + with ESMTP id zQeplO8f_yv9 for ; + Fri, 29 Apr 2016 23:51:54 -0700 (PDT) +Received: from mail-wm0-f49.google.com (mail-wm0-f49.google.com + [74.125.82.49]) by arlo.cworth.org (Postfix) with ESMTPS id 9AF426DE034D for + ; Fri, 29 Apr 2016 23:51:53 -0700 (PDT) +Received: by mail-wm0-f49.google.com with SMTP id n129so50703025wmn.1 + for ; Fri, 29 Apr 2016 23:51:53 -0700 (PDT) +DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=dme-org.20150623.gappssmtp.com; s=20150623; + h=from:to:subject:date:message-id:in-reply-to:references; + bh=Jn5Xj5FrI+RVuER5XWDYYAQrxqIEiH1Kkda3TGp4IW0=; + b=f8mvvXiwA3BWMxVq+9LXpIH2m9dkunv5rGmltLq5501uvmpoIvIbMYDJYQT6TPX/8Y + XpEHnEv/FHRg60cMLV/Q9Y6uL5gnCOX8kSh9In/Fz1dShhVD1Aaob7UG+73Udje6FByw + Z1j8c6KVGMOOMuFNvM/7DTd53rBUUnNgueTIIkQJEvsVwRKZ2cH+pCGLASsZl5YILNMG + Ebo8GaGvbe8MhmLi9Z9yOGWomFpFurCI7J1lW8uyXPl+swgzZ3UUeFw+YWVvzlrj8c5r + B32o3NMTaB3ate0Xe2n/NZirLefJ4Isq/DTWhtIioUy1lyNYw1a0ehxzcIn/pKg0w3W2 + S9iw== +X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=1e100.net; s=20130820; + h=x-gm-message-state:from:to:subject:date:message-id:in-reply-to + :references; + bh=Jn5Xj5FrI+RVuER5XWDYYAQrxqIEiH1Kkda3TGp4IW0=; + b=Fvy5D49GJeo+9N1qjHay4JgTdOpI6hyc6Xt7NraIoCuCXQUwMIZ/IX8jSHcueFP7qs + MBc/27CpOeQusM2NcxpPKNdpjnHvyvwdtnxpFraMWkVTQkpKr+CqzrjzT1BrsIcxLe5Z + UrxJR+6wqgc9Cr6Mne5jmBLE9aLQWXhRbuyu8HJx3Jo1kuezD7WHHLYmRImI54nbVl0+ + JM92Kjw+5gr5sUrDCxtNQ7+0QcpqixwXvNVhCTVxTQ+6IyLbsASOMeW15E01PQz9Xl83 + nI4ElvrUBmZkp5J6ti6qDCky08D8yBT8707Bf5tjdm2MYziqGxCiOTIGGlYVxFJVicCh + cyQQ== +X-Gm-Message-State: + AOPr4FWR/+GnChNZg2dKmU+UlfboOO/oOpXxoLQvwEg04TmcmtV0OYMEFsH2suXLssVMTQ== +X-Received: by 10.194.58.138 with SMTP id r10mr25459936wjq.153.1461999112294; + Fri, 29 Apr 2016 23:51:52 -0700 (PDT) +Received: from disaster-area.hh.sledj.net (disaster-area.hh.sledj.net. + [81.149.164.25]) + by smtp.gmail.com with ESMTPSA id k139sm6774756wmg.24.2016.04.29.23.51.50 + for + (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); + Fri, 29 Apr 2016 23:51:51 -0700 (PDT) +Received: from localhost (disaster-area.hh.sledj.net [local]) + by disaster-area.hh.sledj.net (OpenSMTPD) with ESMTPA id 539b1410 + for ; Sat, 30 Apr 2016 06:51:48 +0000 (UTC) +From: David Edmondson +To: notmuch@notmuchmail.org +Subject: [PATCH v1 1/2] emacs: Observe the charset of MIME parts when reading + them. +Date: Sat, 30 Apr 2016 07:51:47 +0100 +Message-Id: <1461999108-68582-2-git-send-email-dme@dme.org> +X-Mailer: git-send-email 2.7.1 +In-Reply-To: <1461999108-68582-1-git-send-email-dme@dme.org> +References: <1461999108-68582-1-git-send-email-dme@dme.org> +X-BeenThere: notmuch@notmuchmail.org +X-Mailman-Version: 2.1.20 +Precedence: list +List-Id: "Use and development of the notmuch mail system." + +List-Unsubscribe: , + +List-Archive: +List-Post: +List-Help: +List-Subscribe: , + +X-List-Received-Date: Sat, 30 Apr 2016 06:52:02 -0000 + +`notmuch--get-bodypart-raw' previously assumed that all non-binary MIME +parts could be successfully read by assuming that they were UTF-8 +encoded. This was demonstrated to be wrong, specifically when a part was +marked as ISO8859-1 and included accented characters (which were +incorrectly rendered as a result). + +Rather than assuming UTF-8, attempt to use the part's declared charset +when reading it, falling back to US-ASCII if the declared charset is +unknown, unsupported or invalid. +--- + emacs/notmuch-lib.el | 16 +++++++++++++++- + 1 file changed, 15 insertions(+), 1 deletion(-) + +diff --git a/emacs/notmuch-lib.el b/emacs/notmuch-lib.el +index 78978ee..f05ded6 100644 +--- a/emacs/notmuch-lib.el ++++ b/emacs/notmuch-lib.el +@@ -23,6 +23,7 @@ + + ;;; Code: + ++(require 'mm-util) + (require 'mm-view) + (require 'mm-decode) + (require 'cl) +@@ -572,7 +573,20 @@ the given type." + ,@(when process-crypto '("--decrypt")) + ,(notmuch-id-to-query (plist-get msg :id)))) + (coding-system-for-read +- (if binaryp 'no-conversion 'utf-8))) ++ (if binaryp 'no-conversion ++ (let ((coding-system (mm-charset-to-coding-system ++ (plist-get part :content-charset)))) ++ ;; Sadly, ++ ;; `mm-charset-to-coding-system' seems ++ ;; to return things that are not ++ ;; considered acceptable values for ++ ;; `coding-system-for-read'. ++ (if (coding-system-p coding-system) ++ coding-system ++ ;; RFC 2047 says that the default ++ ;; charset is US-ASCII. RFC6657 ++ ;; complicates this somewhat. ++ 'us-ascii))))) + (apply #'call-process notmuch-command nil '(t nil) nil args) + (buffer-string)))))) + (when (and cache data) +-- +2.7.1 + -- 2.26.2