lib/index.cc: generalize filter state machine
authorDavid Bremner <david@tethera.net>
Thu, 8 Jun 2017 02:11:48 +0000 (23:11 -0300)
committerDavid Bremner <david@tethera.net>
Sat, 1 Jul 2017 15:32:17 +0000 (12:32 -0300)
To match things more complicated than fixed strings, we need states
with multiple out arrows.

lib/index.cc

index 19ddc39c25050873829196e6c8876be46751bbd0..8a18abf4e23bb7afb2ac0a6924c0699e7fb86295 100644 (file)
@@ -159,16 +159,23 @@ filter_filter (GMimeFilter *gmime_filter, char *inbuf, size_t inlen, size_t pres
     g_mime_filter_set_size (gmime_filter, inlen, FALSE);
     outptr = gmime_filter->outbuf;
 
+    next = filter->state;
     while (inptr < inend) {
-       if (*inptr >= states[filter->state].a &&
-           *inptr <= states[filter->state].b)
-       {
-           next = states[filter->state].next_if_match;
-       }
-       else
-       {
-           next = states[filter->state].next_if_not_match;
-       }
+        /* Each state is defined by a contiguous set of rows of the
+        * state table marked by a common value for '.state'. The
+        * state numbers must be equal to the index of the first row
+        * in a given state; thus the loop condition here looks for a
+        * jump to a first row of a state, which is a real transition
+        * in the underlying DFA.
+        */
+       do {
+           if (*inptr >= states[next].a && *inptr <= states[next].b)  {
+               next = states[next].next_if_match;
+           } else  {
+               next = states[next].next_if_not_match;
+           }
+
+       } while (next != states[next].state);
 
        if (filter->state < filter->first_skipping_state)
            *outptr++ = *inptr;