From 4d8fb2d5b8700072d2aef24bda39a5a546474e69 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Wed, 22 May 2013 20:03:40 +2000 Subject: [PATCH] Re: [PATCH 4/5] emacs: Streaming S-expression parser --- 22/302e911995137cfdaf1609b1e10aea71a7b346 | 378 ++++++++++++++++++++++ 1 file changed, 378 insertions(+) create mode 100644 22/302e911995137cfdaf1609b1e10aea71a7b346 diff --git a/22/302e911995137cfdaf1609b1e10aea71a7b346 b/22/302e911995137cfdaf1609b1e10aea71a7b346 new file mode 100644 index 000000000..4417d1859 --- /dev/null +++ b/22/302e911995137cfdaf1609b1e10aea71a7b346 @@ -0,0 +1,378 @@ +Return-Path: +X-Original-To: notmuch@notmuchmail.org +Delivered-To: notmuch@notmuchmail.org +Received: from localhost (localhost [127.0.0.1]) + by olra.theworths.org (Postfix) with ESMTP id 13C15431FBF + for ; Tue, 21 May 2013 17:03:56 -0700 (PDT) +X-Virus-Scanned: Debian amavisd-new at olra.theworths.org +X-Spam-Flag: NO +X-Spam-Score: -0.7 +X-Spam-Level: +X-Spam-Status: No, score=-0.7 tagged_above=-999 required=5 + tests=[RCVD_IN_DNSWL_LOW=-0.7] autolearn=disabled +Received: from olra.theworths.org ([127.0.0.1]) + by localhost (olra.theworths.org [127.0.0.1]) (amavisd-new, port 10024) + with ESMTP id LHSwi+YPzy39 for ; + Tue, 21 May 2013 17:03:46 -0700 (PDT) +Received: from dmz-mailsec-scanner-1.mit.edu (DMZ-MAILSEC-SCANNER-1.MIT.EDU + [18.9.25.12]) + by olra.theworths.org (Postfix) with ESMTP id 91319431FB6 + for ; Tue, 21 May 2013 17:03:46 -0700 (PDT) +X-AuditID: 1209190c-b7f566d000004c69-d7-519c0b616e12 +Received: from mailhub-auth-1.mit.edu ( [18.9.21.35]) + by dmz-mailsec-scanner-1.mit.edu (Symantec Messaging Gateway) with SMTP + id 87.F2.19561.16B0C915; Tue, 21 May 2013 20:03:45 -0400 (EDT) +Received: from outgoing.mit.edu (OUTGOING-AUTH-1.MIT.EDU [18.9.28.11]) + by mailhub-auth-1.mit.edu (8.13.8/8.9.2) with ESMTP id r4M03i2O030129; + Tue, 21 May 2013 20:03:45 -0400 +Received: from awakening.csail.mit.edu (awakening.csail.mit.edu [18.26.4.91]) + (authenticated bits=0) + (User authenticated as amdragon@ATHENA.MIT.EDU) + by outgoing.mit.edu (8.13.8/8.12.4) with ESMTP id r4M03gst028278 + (version=TLSv1/SSLv3 cipher=DHE-RSA-AES128-SHA bits=128 verify=NOT); + Tue, 21 May 2013 20:03:43 -0400 +Received: from amthrax by awakening.csail.mit.edu with local (Exim 4.80) + (envelope-from ) + id 1UewWr-0000TC-PN; Tue, 21 May 2013 20:03:41 -0400 +From: Austin Clements +To: Mark Walters , notmuch@notmuchmail.org +Subject: Re: [PATCH 4/5] emacs: Streaming S-expression parser +In-Reply-To: <87fvxg2wc4.fsf@qmul.ac.uk> +References: <1368851472-5382-1-git-send-email-amdragon@mit.edu> + <1368851472-5382-5-git-send-email-amdragon@mit.edu> + <87fvxg2wc4.fsf@qmul.ac.uk> +User-Agent: Notmuch/0.15.2+83~g8bee3c4 (http://notmuchmail.org) Emacs/23.4.1 + (i486-pc-linux-gnu) +Date: Tue, 21 May 2013 20:03:40 -0400 +Message-ID: <87ip2b3moz.fsf@awakening.csail.mit.edu> +MIME-Version: 1.0 +Content-Type: text/plain; charset=utf-8 +Content-Transfer-Encoding: quoted-printable +X-Brightmail-Tracker: + H4sIAAAAAAAAA+NgFupmleLIzCtJLcpLzFFi42IR4hRV1k3knhNosOWDlcXquTwW12/OZHZg + 8tg56y67x7NVt5gDmKK4bFJSczLLUov07RK4Mqbd2c5SsDa+ovvKRLYGxk1eXYycHBICJhIT + nt9nhLDFJC7cW8/WxcjFISSwj1Fi5fozrBDORkaJtRvWQjmnmSR+T1jIDuEsYZTo3XSHDaSf + TUBDYtv+5WCzRARcJZ5++8zcxcjBISxgK7FoP1gJJ1DJqltHmSF6+xklTvz7BlYvKpAgsfLu + CTCbRUBVYuLk+ewgNi/QfcfXNrNC2IISJ2c+YQGxmQXUJf7Mu8QMYWtLLFv4mnkCo+AsJGWz + kJTNQlK2gJF5FaNsSm6Vbm5iZk5xarJucXJiXl5qka6hXm5miV5qSukmRnAIS/LsYHxzUOkQ + owAHoxIP74Pa2YFCrIllxZW5hxglOZiURHmrOOcECvEl5adUZiQWZ8QXleakFh9ilOBgVhLh + NbwGVM6bklhZlVqUD5OS5mBREue9nHLTX0ggPbEkNTs1tSC1CCYrw8GhJMHbzAU0VLAoNT21 + Ii0zpwQhzcTBCTKcB2h4IUgNb3FBYm5xZjpE/hSjMcfm85PfMXK0fQWSQix5+XmpUuK8rSCl + AiClGaV5cNNgaegVozjQc8IQVTzAFAY37xXQKiagVdstZ4KsKklESEk1MKYmaQYtFvsSkac9 + l9HAz6XH2n/z7skuX2a+dzmkJ/m3RSDr2LHnemdUZsvO/XvoeWtM98slrGq8cudvxh1ZyrzC + lfnlig3ra2u2/+fecIHRQjUvqeDRcestJq/Ld1QlTNZ4drni0/NH13x6zi8V+nKsfOehys37 + pxSE7Pfl9suYvbV7FZ9H6j0lluKMREMt5qLiRACZ8xXOHgMAAA== +X-BeenThere: notmuch@notmuchmail.org +X-Mailman-Version: 2.1.13 +Precedence: list +List-Id: "Use and development of the notmuch mail system." + +List-Unsubscribe: , + +List-Archive: +List-Post: +List-Help: +List-Subscribe: , + +X-List-Received-Date: Wed, 22 May 2013 00:03:56 -0000 + +On Tue, 21 May 2013, Mark Walters wrote: +> Hi +> +> This patch looks good to me. Some minor comments below. + +Some minor replies below. + +In building some other code on top of this, I found an interesting (but +easy to fix) interface bug. Currently, the interface is designed as if +it doesn't matter what buffer these functions are called from, however, +because they move point and expect this point motion to persist, it's +actually not safe to call this interface unless the caller is in the +right buffer anyway. For example, if the buffer is selected in a +window, the with-current-buffer in the parser functions will actually +move a *temporary* point, meaning that the only way the caller can +discover the new point is to first select the buffer for itself. I can +think of two solutions: 1) maintain our own mark for the parser's +current position or 2) tweak the doc strings and code so that it reads +from the current buffer. 1 keeps the interface the way it's currently +documented, but complicates the parser implementation and interface and +doesn't simplify the caller. 2 simplifies the parser and it turns out +all callers already satisfy the requirement. + +> Best wishes +> +> Mark +> +> +> On Sat, 18 May 2013, Austin Clements wrote: +>> This provides the same interface as the streaming JSON parser, but +>> reads S-expressions incrementally. The only difference is that the +>> `notmuch-sexp-parse-partial-list' helper does not handle interleaved +>> error messages (since we now have the ability to separate these out at +>> the invocation level), so it no longer takes an error function and +>> does not need to do the horrible resynchronization that the JSON +>> parser had to. +>> +>> Some implementation improvements have been made over the JSON parser. +>> This uses a vector instead of a list for the parser data structure, +>> since this allows faster access to elements (and modern versions of +>> Emacs handle storage of small vectors efficiently). Private functions +>> follow the "prefix--name" convention. And the implementation is much +>> simpler overall because S-expressions are much easier to parse. +>> --- +>> emacs/Makefile.local | 1 + +>> emacs/notmuch-parser.el | 212 ++++++++++++++++++++++++++++++++++++++++= ++++++++ +>> 2 files changed, 213 insertions(+) +>> create mode 100644 emacs/notmuch-parser.el +>> +>> diff --git a/emacs/Makefile.local b/emacs/Makefile.local +>> index 456700a..a910aff 100644 +>> --- a/emacs/Makefile.local +>> +++ b/emacs/Makefile.local +>> @@ -3,6 +3,7 @@ +>> dir :=3D emacs +>> emacs_sources :=3D \ +>> $(dir)/notmuch-lib.el \ +>> + $(dir)/notmuch-parser.el \ +>> $(dir)/notmuch.el \ +>> $(dir)/notmuch-query.el \ +>> $(dir)/notmuch-show.el \ +>> diff --git a/emacs/notmuch-parser.el b/emacs/notmuch-parser.el +>> new file mode 100644 +>> index 0000000..1b7cf64 +>> --- /dev/null +>> +++ b/emacs/notmuch-parser.el +>> @@ -0,0 +1,212 @@ +>> +;; notmuch-parser.el --- streaming S-expression parser +>> +;; +>> +;; Copyright =C2=A9 Austin Clements +>> +;; +>> +;; This file is part of Notmuch. +>> +;; +>> +;; Notmuch is free software: you can redistribute it and/or modify it +>> +;; under the terms of the GNU General Public License as published by +>> +;; the Free Software Foundation, either version 3 of the License, or +>> +;; (at your option) any later version. +>> +;; +>> +;; Notmuch is distributed in the hope that it will be useful, but +>> +;; WITHOUT ANY WARRANTY; without even the implied warranty of +>> +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +>> +;; General Public License for more details. +>> +;; +>> +;; You should have received a copy of the GNU General Public License +>> +;; along with Notmuch. If not, see . +>> +;; +>> +;; Authors: Austin Clements +>> + +>> +(require 'cl) +>> + +>> +(defun notmuch-sexp-create-parser (buffer) +>> + "Return a streaming S-expression parser that reads from BUFFER. +>> + +>> +This parser is designed to incrementally read an S-expression +>> +whose structure is known to the caller. Like a typical +>> +S-expression parsing interface, it provides a function to read a +>> +complete S-expression from the input. However, it extends this +>> +with an additional function that requires the next value in the +>> +input to be a list and descends into it, allowing its elements to +>> +be read one at a time or further descended into. Both functions +>> +can return 'retry to indicate that not enough input is available. +>> + +>> +The parser always consumes input from BUFFER's point. Hence, the +>> +caller is allowed to delete any data before point and may +>> +resynchronize after an error by moving point." +>> + +>> + (vector 'notmuch-sexp-parser +>> + buffer +>> + ;; List depth +>> + 0 +>> + ;; Partial parse position marker +>> + nil +>> + ;; Partial parse state +>> + nil)) +>> + +>> +(defmacro notmuch-sexp--buffer (sp) `(aref ,sp 1)) +>> +(defmacro notmuch-sexp--depth (sp) `(aref ,sp 2)) +>> +(defmacro notmuch-sexp--partial-pos (sp) `(aref ,sp 3)) +>> +(defmacro notmuch-sexp--partial-state (sp) `(aref ,sp 4)) +> +> Why the double hyphen --? Is it a name-space or some convention? + +More specifically, this seems to be the most common Elisp convention for +indicating private symbols. + +>> +(defun notmuch-sexp-read (sp) +>> + "Consume and return the value at point in SP's buffer. +>> + +>> +Returns 'retry if there is insufficient input to parse a complete +>> +value (though it may still move point over whitespace). If the +>> +parser is currently inside a list and the next token ends the +>> +list, this moves point just past the terminator and returns 'end. +>> +Otherwise, this moves point to just past the end of the value and +>> +returns the value." +>> + +>> + (with-current-buffer (notmuch-sexp--buffer sp) +>> + (skip-chars-forward " \n\r\t") +>> + (cond ((eobp) 'retry) +>> + ((=3D (char-after) ?\)) +>> + ;; We've reached the end of a list +>> + (if (=3D (notmuch-sexp--depth sp) 0) +>> + ;; .. but we weren't in a list. Let read signal the +>> + ;; error. +>> + (read (current-buffer)) +> +> Why is good for read to signal the error rather than us doing it? + +This ensures the syntax error handling and signal behavior of +notmuch-sexp-read is identical in every way to a regular read call. +Maybe the comment should read "Let read signal the error like we do in +all other code paths."? + +>> + ;; Go up a level and return an end token +>> + (decf (notmuch-sexp--depth sp)) +>> + (forward-char) +>> + 'end)) +>> + ((=3D (char-after) ?\() +>> + ;; We're at the beginning of a list. If we haven't started +>> + ;; a partial parse yet, attempt to read the list in its +>> + ;; entirety. If this fails, or we've started a partial +>> + ;; parse, extend the partial parse to figure out when we +>> + ;; have a complete list. +>> + (catch 'return +>> + (when (null (notmuch-sexp--partial-state sp)) +>> + (let ((start (point))) +>> + (condition-case nil +>> + (throw 'return (read (current-buffer))) +>> + (end-of-file (goto-char start))))) +>> + ;; Extend the partial parse +>> + (let (is-complete) +>> + (save-excursion +>> + (let* ((new-state (parse-partial-sexp +>> + (or (notmuch-sexp--partial-pos sp) (point)) +>> + (point-max) 0 nil +>> + (notmuch-sexp--partial-state sp))) +>> + ;; A complete value is available if we've +>> + ;; reached depth 0. +>> + (depth (first new-state))) +>> + (assert (>=3D depth 0)) +>> + (if (=3D depth 0) +>> + ;; Reset partial parse state +>> + (setf (notmuch-sexp--partial-state sp) nil +>> + (notmuch-sexp--partial-pos sp) nil +>> + is-complete t) +>> + ;; Update partial parse state +>> + (setf (notmuch-sexp--partial-state sp) new-state +>> + (notmuch-sexp--partial-pos sp) (point-marker))))) +>> + (if is-complete +>> + (read (current-buffer)) +>> + 'retry)))) +>> + (t +>> + ;; Attempt to read a non-compound value +>> + (let ((start (point))) +>> + (condition-case nil +>> + (let ((val (read (current-buffer)))) +>> + ;; We got what looks like a complete read, but if +>> + ;; we reached the end of the buffer in the process, +>> + ;; we may not actually have all of the input we +>> + ;; need (unless it's a string, which is delimited). +>> + (if (or (stringp val) (not (eobp))) +>> + val +>> + ;; We can't be sure the input was complete +>> + (goto-char start) +>> + 'retry)) +>> + (end-of-file +>> + (goto-char start) +>> + 'retry))))))) +>> + +>> +(defun notmuch-sexp-begin-list (sp) +>> + "Parse the beginning of a list value and enter the list. +>> + +>> +Returns 'retry if there is insufficient input to parse the +>> +beginning of the list. If this is able to parse the beginning of +>> +a list, it moves point past the token that opens the list and +>> +returns t. Later calls to `notmuch-sexp-read' will return the +>> +elements inside the list. If the input in buffer is not the +>> +beginning of a list, throw invalid-read-syntax." +>> + +>> + (with-current-buffer (notmuch-sexp--buffer sp) +>> + (skip-chars-forward " \n\r\t") +>> + (cond ((eobp) 'retry) +>> + ((=3D (char-after) ?\() +>> + (forward-char) +>> + (incf (notmuch-sexp--depth sp)) +>> + t) +>> + (t +>> + ;; Skip over the bad character like `read' does +>> + (forward-char) +>> + (signal 'invalid-read-syntax (list (string (char-before)))))))) +>> + +>> +(defun notmuch-sexp-eof (sp) +>> + "Signal an error if there is more data in SP's buffer. +>> + +>> +Moves point to the beginning of any trailing data or to the end +>> +of the buffer if there is only trailing whitespace." +>> + +>> + (with-current-buffer (notmuch-sexp--buffer sp) +>> + (skip-chars-forward " \n\r\t") +>> + (unless (eobp) +>> + (error "Trailing garbage following expression")))) +>> + +>> +(defvar notmuch-sexp--parser nil +>> + "The buffer-local notmuch-sexp-parser instance. +>> + +>> +Used by `notmuch-sexp-parse-partial-list'.") +>> + +>> +(defvar notmuch-sexp--state nil +>> + "The buffer-local `notmuch-sexp-parse-partial-list' state.") +>> + +>> +(defun notmuch-sexp-parse-partial-list (result-function result-buffer) +>> + "Incrementally parse an S-expression list from the current buffer. +>> + +>> +This function consume an S-expression list from the current +> +> consumes + +Oops, yes. + +>> +buffer, applying RESULT-FUNCTION in RESULT-BUFFER to each +>> +complete value in the list. It operates incrementally and should +>> +be called whenever the input buffer has been extended with +>> +additional data. The caller just needs to ensure it does not +>> +move point in the input buffer." +>> + +>> + ;; Set up the initial state +>> + (unless (local-variable-p 'notmuch-sexp--parser) +>> + (set (make-local-variable 'notmuch-sexp--parser) +>> + (notmuch-sexp-create-parser (current-buffer))) +>> + (set (make-local-variable 'notmuch-sexp--state) 'begin)) +>> + (let (done) +>> + (while (not done) +>> + (case notmuch-sexp--state +>> + (begin +>> + ;; Enter the list +>> + (if (eq (notmuch-sexp-begin-list notmuch-sexp--parser) 'retry) +>> + (setq done t) +>> + (setq notmuch-sexp--state 'result))) +>> + (result +>> + ;; Parse a result +>> + (let ((result (notmuch-sexp-read notmuch-sexp--parser))) +>> + (case result +>> + (retry (setq done t)) +>> + (end (setq notmuch-sexp--state 'end)) +>> + (t (with-current-buffer result-buffer +>> + (funcall result-function result)))))) +>> + (end +>> + ;; Any trailing data is unexpected +>> + (notmuch-sexp-eof notmuch-sexp--parser) +>> + (setq done t))))) +>> + ;; Clear out what we've parsed +>> + (delete-region (point-min) (point))) +>> + +>> +(provide 'notmuch-parser) +>> + +>> +;; Local Variables: +>> +;; byte-compile-warnings: (not cl-functions) +>> +;; End: +>> --=20 +>> 1.7.10.4 -- 2.26.2