Make the date parsing accept pretty much any random crap.
authorLinus Torvalds <torvalds@ppc970.osdl.org>
Sat, 30 Apr 2005 20:19:56 +0000 (13:19 -0700)
committerLinus Torvalds <torvalds@ppc970.osdl.org>
Sat, 30 Apr 2005 20:19:56 +0000 (13:19 -0700)
This date parser turns line-noise into a date. Cool.

Makefile
date.c
test-date.c [new file with mode: 0644]

index 783ad4d3383e87e9e1b9deb55f57a47380a2b9b3..37d7192f6cc880d732ba9db3d0f58f7f811e4ed4 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -60,6 +60,9 @@ CFLAGS += '-DSHA1_HEADER=$(SHA1_HEADER)'
 $(LIB_FILE): $(LIB_OBJS)
        $(AR) rcs $@ $(LIB_OBJS)
 
+test-date: test-date.c date.o
+       $(CC) $(CFLAGS) -o $@ test-date.c date.o
+
 git-%: %.c $(LIB_FILE)
        $(CC) $(CFLAGS) -o $@ $(filter %.c,$^) $(LIBS)
 
diff --git a/date.c b/date.c
index 1e43936f8ebb9c7462347ab3a8b192a973b36195..b59e832d6c58d94e2ab9e4027fb0de11e8987e13 100644 (file)
--- a/date.c
+++ b/date.c
@@ -30,144 +30,253 @@ static time_t my_mktime(struct tm *tm)
 }
 
 static const char *month_names[] = {
-        "Jan", "Feb", "Mar", "Apr", "May", "Jun",
-        "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
+       "January", "February", "March", "April", "May", "June",
+       "July", "August", "September", "October", "November", "December"
 };
 
 static const char *weekday_names[] = {
-        "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
+       "Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"
 };
 
+/*
+ * Check these. And note how it doesn't do the summer-time conversion.
+ *
+ * In my world, it's always summer, and things are probably a bit off
+ * in other ways too.
+ */
+static const struct {
+       const char *name;
+       int offset;
+} timezone_names[] = {
+       { "IDLW", -12 },        /* International Date Line West */
+       { "NT",   -11 },        /* Nome */
+       { "CAT",  -10 },        /* Central Alaska */
+       { "HST",  -10 },        /* Hawaii Standard */
+       { "HDT",   -9 },        /* Hawaii Daylight */
+       { "YDT",   -8 },        /* Yukon Daylight */
+       { "YST",   -9 },        /* Yukon Standard */
+       { "PST",   -8 },        /* Pacific Standard */
+       { "PDT",   -7 },        /* Pacific Daylight */
+       { "MST",   -7 },        /* Mountain Standard */
+       { "MDT",   -6 },        /* Mountain Daylight */
+       { "CST",   -6 },        /* Central Standard */
+       { "CDT",   -5 },        /* Central Daylight */
+       { "EST",   -5 },        /* Eastern Standard */
+       { "EDT",   -4 },        /* Eastern Daylight */
+       { "AST",   -3 },        /* Atlantic Standard */
+       { "ADT",   -2 },        /* Atlantic Daylight */
+       { "WAT",   -1 },        /* West Africa */
+
+       { "GMT",    0 },        /* Greenwich Mean */
+       { "UTC",    0 },        /* Universal (Coordinated) */
+
+       { "WET",    0 },        /* Western European */
+       { "BST",    0 },        /* British Summer */
+       { "CET",   +1 },        /* Central European */
+       { "MET",   +1 },        /* Middle European */
+       { "MEWT",  +1 },        /* Middle European Winter */
+       { "MEST",  +2 },        /* Middle European Summer */
+       { "CEST",  +2 },        /* Central European Summer */
+       { "MESZ",  +1 },        /* Middle European Summer */
+       { "FWT",   +1 },        /* French Winter */
+       { "FST",   +2 },        /* French Summer */
+       { "EET",   +2 },        /* Eastern Europe, USSR Zone 1 */
+       { "WAST",  +7 },        /* West Australian Standard */
+       { "WADT",  +8 },        /* West Australian Daylight */
+       { "CCT",   +8 },        /* China Coast, USSR Zone 7 */
+       { "JST",   +9 },        /* Japan Standard, USSR Zone 8 */
+       { "EAST", +10 },        /* Eastern Australian Standard */
+       { "EADT", +11 },        /* Eastern Australian Daylight */
+       { "GST",  +10 },        /* Guam Standard, USSR Zone 9 */
+       { "NZT",  +11 },        /* New Zealand */
+       { "NZST", +11 },        /* New Zealand Standard */
+       { "NZDT", +12 },        /* New Zealand Daylight */
+       { "IDLE", +12 },        /* International Date Line East */
+};
 
-static char *skipfws(char *str)
+#define NR_TZ (sizeof(timezone_names) / sizeof(timezone_names[0]))
+       
+static int match_string(const char *date, const char *str)
 {
-       while (isspace(*str))
-               str++;
-       return str;
+       int i = 0;
+
+       for (i = 0; *date; date++, str++, i++) {
+               if (*date == *str)
+                       continue;
+               if (toupper(*date) == toupper(*str))
+                       continue;
+               if (!isalnum(*date))
+                       break;
+               return 0;
+       }
+       return i;
 }
 
-       
-/* Gr. strptime is crap for this; it doesn't have a way to require RFC2822
-   (i.e. English) day/month names, and it doesn't work correctly with %z. */
-void parse_date(char *date, char *result, int maxlen)
+/*
+* Parse month, weekday, or timezone name
+*/
+static int match_alpha(const char *date, struct tm *tm, int *offset)
 {
-       struct tm tm;
-       char *p, *tz;
-       int i, offset;
-       time_t then;
+       int i;
 
-       memset(&tm, 0, sizeof(tm));
-
-       /* Skip day-name */
-       p = skipfws(date);
-       if (!isdigit(*p)) {
-               for (i=0; i<7; i++) {
-                       if (!strncmp(p,weekday_names[i],3) && p[3] == ',') {
-                               p = skipfws(p+4);
-                               goto day;
-                       }
+       for (i = 0; i < 12; i++) {
+               int match = match_string(date, month_names[i]);
+               if (match >= 3) {
+                       tm->tm_mon = i;
+                       return match;
                }
-               return;
-       }                                       
-
-       /* day */
- day:
-       tm.tm_mday = strtoul(p, &p, 10);
-
-       if (tm.tm_mday < 1 || tm.tm_mday > 31)
-               return;
-
-       if (!isspace(*p))
-               return;
-
-       p = skipfws(p);
+       }
 
-       /* month */
+       for (i = 0; i < 7; i++) {
+               int match = match_string(date, weekday_names[i]);
+               if (match >= 3) {
+                       tm->tm_wday = i;
+                       return match;
+               }
+       }
 
-       for (i=0; i<12; i++) {
-               if (!strncmp(p, month_names[i], 3) && isspace(p[3])) {
-                       tm.tm_mon = i;
-                       p = skipfws(p+strlen(month_names[i]));
-                       goto year;
+       for (i = 0; i < NR_TZ; i++) {
+               int match = match_string(date, timezone_names[i].name);
+               if (match >= 3) {
+                       *offset = 60*timezone_names[i].offset;
+                       return match;
                }
        }
-       return; /* Error -- bad month */
 
-       /* year */
- year: 
-       tm.tm_year = strtoul(p, &p, 10);
+       /* BAD CRAP */
+       return 0;
+}
 
-       if (!tm.tm_year && !isspace(*p))
-               return;
+static int match_digit(char *date, struct tm *tm, int *offset)
+{
+       char *end, c;
+       unsigned long num, num2, num3;
+
+       num = strtoul(date, &end, 10);
+
+       /* Time? num:num[:num] */
+       if (num < 24 && end[0] == ':' && isdigit(end[1])) {
+               tm->tm_hour = num;
+               num = strtoul(end+1, &end, 10);
+               if (num < 60) {
+                       tm->tm_min = num;
+                       if (end[0] == ':' && isdigit(end[1])) {
+                               num = strtoul(end+1, &end, 10);
+                               if (num < 61)
+                                       tm->tm_sec = num;
+                       }
+               }
+               return end - date;
+       }
 
-       if (tm.tm_year > 1900)
-               tm.tm_year -= 1900;
+       /* Year? Day of month? Numeric date-string?*/
+       c = *end;
+       switch (c) {
+       default:
+               if (num > 0 && num < 32) {
+                       tm->tm_mday = num;
+                       break;
+               }
+               if (num > 1900) {
+                       tm->tm_year = num - 1900;
+                       break;
+               }
+               if (num > 70) {
+                       tm->tm_year = num;
+                       break;
+               }
+               break;
+
+       case '-':
+       case '/':
+               if (num && num < 32 && isdigit(end[1])) {
+                       num2 = strtoul(end+1, &end, 10);
+                       if (!num2 || num2 > 31)
+                               break;
+                       if (num > 12) {
+                               if (num2 > 12)
+                                       break;
+                               num3 = num;
+                               num  = num2;
+                               num2 = num3;
+                       }
+                       tm->tm_mon = num - 1;
+                       tm->tm_mday = num2;
+                       if (*end == c && isdigit(end[1])) {
+                               num3 = strtoul(end, &end, 10);
+                               if (num3 > 1900)
+                                       num3 -= 1900;
+                               tm->tm_year = num3;
+                       }
+                       break;
+               }
+       }
                
-       p=skipfws(p);
-
-       /* hour */
-       if (!isdigit(*p))
-               return;
-       tm.tm_hour = strtoul(p, &p, 10);
-       
-       if (tm.tm_hour > 23)
-               return;
-
-       if (*p != ':')
-               return; /* Error -- bad time */
-       p++;
-
-       /* minute */
-       if (!isdigit(*p))
-               return;
-       tm.tm_min = strtoul(p, &p, 10);
-       
-       if (tm.tm_min > 59)
-               return;
-
-       if (*p != ':')
-               goto zone;
-       p++;
-
-       /* second */
-       if (!isdigit(*p))
-               return;
-       tm.tm_sec = strtoul(p, &p, 10);
-       
-       if (tm.tm_sec > 60)
-               return;
+       return end - date;
+                       
+}
 
- zone:
-       if (!isspace(*p))
-               return;
+static int match_tz(char *date, int *offp)
+{
+       char *end;
+       int offset = strtoul(date+1, &end, 10);
+       int min, hour;
 
-       p = skipfws(p);
+       min = offset % 100;
+       hour = offset / 100;
 
-       if (*p == '-')
-               offset = -60;
-       else if (*p == '+')
-               offset = 60;
-       else
-              return;
+       offset = hour*60+min;
+       if (*date == '-')
+               offset = -offset;
 
-       if (!isdigit(p[1]) || !isdigit(p[2]) || !isdigit(p[3]) || !isdigit(p[4]))
-               return;
+       *offp = offset;
+       return end - date;
+}
 
-       tz = p;
-       i = strtoul(p+1, NULL, 10);
-       offset *= ((i % 100) + ((i / 100) * 60));
+/* Gr. strptime is crap for this; it doesn't have a way to require RFC2822
+   (i.e. English) day/month names, and it doesn't work correctly with %z. */
+void parse_date(char *date, char *result, int maxlen)
+{
+       struct tm tm;
+       int offset;
+       time_t then;
 
-       p = skipfws(p + 5);
-       if (*p && *p != '(') /* trailing comment like (EDT) is ok */
-               return;
+       memset(&tm, 0, sizeof(tm));
+       tm.tm_year = -1;
+       tm.tm_mon = -1;
+       tm.tm_mday = -1;
+       offset = 0;
+
+       for (;;) {
+               int match = 0;
+               unsigned char c = *date;
+
+               /* Stop at end of string or newline */
+               if (!c || c == '\n')
+                       break;
+
+               if (isalpha(c))
+                       match = match_alpha(date, &tm, &offset);
+               else if (isdigit(c))
+                       match = match_digit(date, &tm, &offset);
+               else if ((c == '-' || c == '+') && isdigit(date[1]))
+                       match = match_tz(date, &offset);
+
+               if (!match) {
+                       /* BAD CRAP */
+                       match = 1;
+               }       
+
+               date += match;
+       }
 
        then = my_mktime(&tm); /* mktime uses local timezone */
        if (then == -1)
                return;
 
-       then -= offset;
+       then -= offset * 60;
 
-       snprintf(result, maxlen, "%lu %5.5s", then, tz);
+       snprintf(result, maxlen, "%lu %+03d%02d", then, offset/60, offset % 60);
 }
 
 void datestamp(char *buf, int bufsize)
diff --git a/test-date.c b/test-date.c
new file mode 100644 (file)
index 0000000..8ec41c3
--- /dev/null
@@ -0,0 +1,20 @@
+#include <stdio.h>
+#include <time.h>
+
+#include "cache.h"
+
+int main(int argc, char **argv)
+{
+       int i;
+
+       for (i = 1; i < argc; i++) {
+               char result[100];
+               time_t t;
+
+               memcpy(result, "bad", 4);
+               parse_date(argv[i], result, sizeof(result));
+               t = strtoul(result, NULL, 0);
+               printf("%s -> %s -> %s\n", argv[i], result, ctime(&t));
+       }
+       return 0;
+}