--- /dev/null
+/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/* GMime
+ * Copyright (C) 2000-2009 Jeffrey Stedfast
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free
+ * Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#define _GNU_SOURCE
+
+#include <glib.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#ifdef HAVE_SYS_PARAM_H
+#include <sys/param.h> /* for MAXHOSTNAMELEN */
+#else
+#define MAXHOSTNAMELEN 64
+#endif
+#ifdef HAVE_UTSNAME_DOMAINNAME
+#include <sys/utsname.h> /* for uname() */
+#endif
+#include <sys/types.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h> /* Unix header for getpid() */
+#endif
+#ifdef G_OS_WIN32
+#include <winsock2.h>
+#include <ws2tcpip.h>
+#include <process.h>
+#define getpid() _getpid()
+#endif
+#ifdef HAVE_NETDB_H
+#include <netdb.h>
+#endif
+#include <ctype.h>
+#include <errno.h>
+
+#include "gmime-utils.h"
+#include "gmime-table-private.h"
+#include "gmime-parse-utils.h"
+#include "gmime-part.h"
+#include "gmime-charset.h"
+#include "gmime-iconv.h"
+#include "gmime-iconv-utils.h"
+
+#ifdef ENABLE_WARNINGS
+#define w(x) x
+#else
+#define w(x)
+#endif /* ENABLE_WARNINGS */
+
+#define d(x)
+
+
+/**
+ * SECTION: gmime-utils
+ * @title: gmime-utils
+ * @short_description: MIME utility functions
+ * @see_also:
+ *
+ * Utility functions to parse, encode and decode various MIME tokens
+ * and encodings.
+ **/
+
+extern gboolean _g_mime_enable_rfc2047_workarounds (void);
+
+#define GMIME_FOLD_PREENCODED (GMIME_FOLD_LEN / 2)
+
+/* date parser macros */
+#define NUMERIC_CHARS "1234567890"
+#define WEEKDAY_CHARS "SundayMondayTuesdayWednesdayThursdayFridaySaturday"
+#define MONTH_CHARS "JanuaryFebruaryMarchAprilMayJuneJulyAugustSeptemberOctoberNovemberDecember"
+#define TIMEZONE_ALPHA_CHARS "UTCGMTESTEDTCSTCDTMSTPSTPDTZAMNY()"
+#define TIMEZONE_NUMERIC_CHARS "-+1234567890"
+#define TIME_CHARS "1234567890:"
+
+#define DATE_TOKEN_NON_NUMERIC (1 << 0)
+#define DATE_TOKEN_NON_WEEKDAY (1 << 1)
+#define DATE_TOKEN_NON_MONTH (1 << 2)
+#define DATE_TOKEN_NON_TIME (1 << 3)
+#define DATE_TOKEN_HAS_COLON (1 << 4)
+#define DATE_TOKEN_NON_TIMEZONE_ALPHA (1 << 5)
+#define DATE_TOKEN_NON_TIMEZONE_NUMERIC (1 << 6)
+#define DATE_TOKEN_HAS_SIGN (1 << 7)
+
+static unsigned char tohex[16] = {
+ '0', '1', '2', '3', '4', '5', '6', '7',
+ '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
+};
+
+static unsigned char gmime_datetok_table[256] = {
+ 128,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+ 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+ 111,111,111,111,111,111,111,111, 79, 79,111,175,111,175,111,111,
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,119,111,111,111,111,111,
+ 111, 75,111, 79, 75, 79,105, 79,111,111,107,111,111, 73, 75,107,
+ 79,111,111, 73, 77, 79,111,109,111, 79, 79,111,111,111,111,111,
+ 111,105,107,107,109,105,111,107,105,105,111,111,107,107,105,105,
+ 107,111,105,105,105,105,107,111,111,105,111,111,111,111,111,111,
+ 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+ 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+ 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+ 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+ 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+ 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+ 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+ 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+};
+
+/* hrm, is there a library for this shit? */
+static struct {
+ char *name;
+ int offset;
+} tz_offsets [] = {
+ { "UT", 0 },
+ { "GMT", 0 },
+ { "EST", -500 }, /* these are all US timezones. bloody yanks */
+ { "EDT", -400 },
+ { "CST", -600 },
+ { "CDT", -500 },
+ { "MST", -700 },
+ { "MDT", -600 },
+ { "PST", -800 },
+ { "PDT", -700 },
+ { "Z", 0 },
+ { "A", -100 },
+ { "M", -1200 },
+ { "N", 100 },
+ { "Y", 1200 },
+};
+
+static char *tm_months[] = {
+ "Jan", "Feb", "Mar", "Apr", "May", "Jun",
+ "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
+};
+
+static char *tm_days[] = {
+ "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
+};
+
+
+/**
+ * g_mime_utils_header_format_date:
+ * @date: time_t date representation
+ * @tz_offset: Timezone offset
+ *
+ * Allocates a string buffer containing the rfc822 formatted date
+ * string represented by @time and @tz_offset.
+ *
+ * Returns: a valid string representation of the date.
+ **/
+char *
+g_mime_utils_header_format_date (time_t date, int tz_offset)
+{
+ struct tm tm;
+
+ date += ((tz_offset / 100) * (60 * 60)) + (tz_offset % 100) * 60;
+
+#if defined (HAVE_GMTIME_R)
+ gmtime_r (&date, &tm);
+#elif defined (HAVE_GMTIME_S)
+ gmtime_s (&tm, &date);
+#else
+ memcpy (&tm, gmtime (&date), sizeof (tm));
+#endif
+
+ return g_strdup_printf ("%s, %02d %s %04d %02d:%02d:%02d %+05d",
+ tm_days[tm.tm_wday], tm.tm_mday,
+ tm_months[tm.tm_mon],
+ tm.tm_year + 1900,
+ tm.tm_hour, tm.tm_min, tm.tm_sec,
+ tz_offset);
+}
+
+/* This is where it gets ugly... */
+
+typedef struct _date_token {
+ struct _date_token *next;
+ unsigned char mask;
+ const char *start;
+ size_t len;
+} date_token;
+
+#define date_token_free(tok) g_slice_free (date_token, tok)
+#define date_token_new() g_slice_new (date_token)
+
+static date_token *
+datetok (const char *date)
+{
+ date_token *tokens = NULL, *token, *tail = (date_token *) &tokens;
+ const char *start, *end;
+ unsigned char mask;
+
+ start = date;
+ while (*start) {
+ /* kill leading whitespace */
+ while (*start == ' ' || *start == '\t')
+ start++;
+
+ if (*start == '\0')
+ break;
+
+ mask = gmime_datetok_table[(unsigned char) *start];
+
+ /* find the end of this token */
+ end = start + 1;
+ while (*end && !strchr ("-/,\t\r\n ", *end))
+ mask |= gmime_datetok_table[(unsigned char) *end++];
+
+ if (end != start) {
+ token = date_token_new ();
+ token->next = NULL;
+ token->start = start;
+ token->len = end - start;
+ token->mask = mask;
+
+ tail->next = token;
+ tail = token;
+ }
+
+ if (*end)
+ start = end + 1;
+ else
+ break;
+ }
+
+ return tokens;
+}
+
+static int
+decode_int (const char *in, size_t inlen)
+{
+ register const char *inptr;
+ int sign = 1, val = 0;
+ const char *inend;
+
+ inptr = in;
+ inend = in + inlen;
+
+ if (*inptr == '-') {
+ sign = -1;
+ inptr++;
+ } else if (*inptr == '+')
+ inptr++;
+
+ for ( ; inptr < inend; inptr++) {
+ if (!(*inptr >= '0' && *inptr <= '9'))
+ return -1;
+ else
+ val = (val * 10) + (*inptr - '0');
+ }
+
+ val *= sign;
+
+ return val;
+}
+
+#if 0
+static int
+get_days_in_month (int month, int year)
+{
+ switch (month) {
+ case 1:
+ case 3:
+ case 5:
+ case 7:
+ case 8:
+ case 10:
+ case 12:
+ return 31;
+ case 4:
+ case 6:
+ case 9:
+ case 11:
+ return 30;
+ case 2:
+ if (g_date_is_leap_year (year))
+ return 29;
+ else
+ return 28;
+ default:
+ return 0;
+ }
+}
+#endif
+
+static int
+get_wday (const char *in, size_t inlen)
+{
+ int wday;
+
+ g_return_val_if_fail (in != NULL, -1);
+
+ if (inlen < 3)
+ return -1;
+
+ for (wday = 0; wday < 7; wday++) {
+ if (!g_ascii_strncasecmp (in, tm_days[wday], 3))
+ return wday;
+ }
+
+ return -1; /* unknown week day */
+}
+
+static int
+get_mday (const char *in, size_t inlen)
+{
+ int mday;
+
+ g_return_val_if_fail (in != NULL, -1);
+
+ mday = decode_int (in, inlen);
+
+ if (mday < 0 || mday > 31)
+ mday = -1;
+
+ return mday;
+}
+
+static int
+get_month (const char *in, size_t inlen)
+{
+ int i;
+
+ g_return_val_if_fail (in != NULL, -1);
+
+ if (inlen < 3)
+ return -1;
+
+ for (i = 0; i < 12; i++) {
+ if (!g_ascii_strncasecmp (in, tm_months[i], 3))
+ return i;
+ }
+
+ return -1; /* unknown month */
+}
+
+static int
+get_year (const char *in, size_t inlen)
+{
+ int year;
+
+ g_return_val_if_fail (in != NULL, -1);
+
+ if ((year = decode_int (in, inlen)) == -1)
+ return -1;
+
+ if (year < 100)
+ year += (year < 70) ? 2000 : 1900;
+
+ if (year < 1969)
+ return -1;
+
+ return year;
+}
+
+static gboolean
+get_time (const char *in, size_t inlen, int *hour, int *min, int *sec)
+{
+ register const char *inptr;
+ int *val, colons = 0;
+ const char *inend;
+
+ *hour = *min = *sec = 0;
+
+ inend = in + inlen;
+ val = hour;
+ for (inptr = in; inptr < inend; inptr++) {
+ if (*inptr == ':') {
+ colons++;
+ switch (colons) {
+ case 1:
+ val = min;
+ break;
+ case 2:
+ val = sec;
+ break;
+ default:
+ return FALSE;
+ }
+ } else if (!(*inptr >= '0' && *inptr <= '9'))
+ return FALSE;
+ else
+ *val = (*val * 10) + (*inptr - '0');
+ }
+
+ return TRUE;
+}
+
+static int
+get_tzone (date_token **token)
+{
+ const char *inptr, *inend;
+ size_t inlen;
+ int i, t;
+
+ for (i = 0; *token && i < 2; *token = (*token)->next, i++) {
+ inptr = (*token)->start;
+ inlen = (*token)->len;
+ inend = inptr + inlen;
+
+ if (*inptr == '+' || *inptr == '-') {
+ return decode_int (inptr, inlen);
+ } else {
+ if (*inptr == '(') {
+ inptr++;
+ if (*(inend - 1) == ')')
+ inlen -= 2;
+ else
+ inlen--;
+ }
+
+ for (t = 0; t < 15; t++) {
+ size_t len = strlen (tz_offsets[t].name);
+
+ if (len != inlen)
+ continue;
+
+ if (!strncmp (inptr, tz_offsets[t].name, len))
+ return tz_offsets[t].offset;
+ }
+ }
+ }
+
+ return -1;
+}
+
+static time_t
+mktime_utc (struct tm *tm)
+{
+ time_t tt;
+ long tz;
+
+ tm->tm_isdst = -1;
+ tt = mktime (tm);
+
+#if defined (G_OS_WIN32)
+ _get_timezone (&tz);
+ if (tm->tm_isdst > 0) {
+ int dst;
+
+ _get_dstbias (&dst);
+ tz += dst;
+ }
+#elif defined (HAVE_TM_GMTOFF)
+ tz = -tm->tm_gmtoff;
+#elif defined (HAVE_TIMEZONE)
+ if (tm->tm_isdst > 0) {
+#if defined (HAVE_ALTZONE)
+ tz = altzone;
+#else /* !defined (HAVE_ALTZONE) */
+ tz = (timezone - 3600);
+#endif
+ } else {
+ tz = timezone;
+ }
+#elif defined (HAVE__TIMEZONE)
+ tz = _timezone;
+#else
+#error Neither HAVE_TIMEZONE nor HAVE_TM_GMTOFF defined. Rerun autoheader, autoconf, etc.
+#endif
+
+ return tt - tz;
+}
+
+static time_t
+parse_rfc822_date (date_token *tokens, int *tzone)
+{
+ int hour, min, sec, offset, n;
+ date_token *token;
+ struct tm tm;
+ time_t t;
+
+ g_return_val_if_fail (tokens != NULL, (time_t) 0);
+
+ token = tokens;
+
+ memset ((void *) &tm, 0, sizeof (struct tm));
+
+ if ((n = get_wday (token->start, token->len)) != -1) {
+ /* not all dates may have this... */
+ tm.tm_wday = n;
+ token = token->next;
+ }
+
+ /* get the mday */
+ if (!token || (n = get_mday (token->start, token->len)) == -1)
+ return (time_t) 0;
+
+ tm.tm_mday = n;
+ token = token->next;
+
+ /* get the month */
+ if (!token || (n = get_month (token->start, token->len)) == -1)
+ return (time_t) 0;
+
+ tm.tm_mon = n;
+ token = token->next;
+
+ /* get the year */
+ if (!token || (n = get_year (token->start, token->len)) == -1)
+ return (time_t) 0;
+
+ tm.tm_year = n - 1900;
+ token = token->next;
+
+ /* get the hour/min/sec */
+ if (!token || !get_time (token->start, token->len, &hour, &min, &sec))
+ return (time_t) 0;
+
+ tm.tm_hour = hour;
+ tm.tm_min = min;
+ tm.tm_sec = sec;
+ token = token->next;
+
+ /* get the timezone */
+ if (!token || (n = get_tzone (&token)) == -1) {
+ /* I guess we assume tz is GMT? */
+ offset = 0;
+ } else {
+ offset = n;
+ }
+
+ t = mktime_utc (&tm);
+
+ /* t is now GMT of the time we want, but not offset by the timezone ... */
+
+ /* this should convert the time to the GMT equiv time */
+ t -= ((offset / 100) * 60 * 60) + (offset % 100) * 60;
+
+ if (tzone)
+ *tzone = offset;
+
+ return t;
+}
+
+
+#define date_token_mask(t) (((date_token *) t)->mask)
+#define is_numeric(t) ((date_token_mask (t) & DATE_TOKEN_NON_NUMERIC) == 0)
+#define is_weekday(t) ((date_token_mask (t) & DATE_TOKEN_NON_WEEKDAY) == 0)
+#define is_month(t) ((date_token_mask (t) & DATE_TOKEN_NON_MONTH) == 0)
+#define is_time(t) (((date_token_mask (t) & DATE_TOKEN_NON_TIME) == 0) && (date_token_mask (t) & DATE_TOKEN_HAS_COLON))
+#define is_tzone_alpha(t) ((date_token_mask (t) & DATE_TOKEN_NON_TIMEZONE_ALPHA) == 0)
+#define is_tzone_numeric(t) (((date_token_mask (t) & DATE_TOKEN_NON_TIMEZONE_NUMERIC) == 0) && (date_token_mask (t) & DATE_TOKEN_HAS_SIGN))
+#define is_tzone(t) (is_tzone_alpha (t) || is_tzone_numeric (t))
+
+static time_t
+parse_broken_date (date_token *tokens, int *tzone)
+{
+ gboolean got_wday, got_month, got_tzone;
+ int hour, min, sec, offset, n;
+ date_token *token;
+ struct tm tm;
+ time_t t;
+
+ memset ((void *) &tm, 0, sizeof (struct tm));
+ got_wday = got_month = got_tzone = FALSE;
+ offset = 0;
+
+ token = tokens;
+ while (token) {
+ if (is_weekday (token) && !got_wday) {
+ if ((n = get_wday (token->start, token->len)) != -1) {
+ d(printf ("weekday; "));
+ got_wday = TRUE;
+ tm.tm_wday = n;
+ goto next;
+ }
+ }
+
+ if (is_month (token) && !got_month) {
+ if ((n = get_month (token->start, token->len)) != -1) {
+ d(printf ("month; "));
+ got_month = TRUE;
+ tm.tm_mon = n;
+ goto next;
+ }
+ }
+
+ if (is_time (token) && !tm.tm_hour && !tm.tm_min && !tm.tm_sec) {
+ if (get_time (token->start, token->len, &hour, &min, &sec)) {
+ d(printf ("time; "));
+ tm.tm_hour = hour;
+ tm.tm_min = min;
+ tm.tm_sec = sec;
+ goto next;
+ }
+ }
+
+ if (is_tzone (token) && !got_tzone) {
+ date_token *t = token;
+
+ if ((n = get_tzone (&t)) != -1) {
+ d(printf ("tzone; "));
+ got_tzone = TRUE;
+ offset = n;
+ goto next;
+ }
+ }
+
+ if (is_numeric (token)) {
+ if (token->len == 4 && !tm.tm_year) {
+ if ((n = get_year (token->start, token->len)) != -1) {
+ d(printf ("year; "));
+ tm.tm_year = n - 1900;
+ goto next;
+ }
+ } else {
+ /* Note: assumes MM-DD-YY ordering if '0 < MM < 12' holds true */
+ if (!got_month && token->next && is_numeric (token->next)) {
+ if ((n = decode_int (token->start, token->len)) > 12) {
+ goto mday;
+ } else if (n > 0) {
+ d(printf ("mon; "));
+ got_month = TRUE;
+ tm.tm_mon = n - 1;
+ }
+ goto next;
+ } else if (!tm.tm_mday && (n = get_mday (token->start, token->len)) != -1) {
+ mday:
+ d(printf ("mday; "));
+ tm.tm_mday = n;
+ goto next;
+ } else if (!tm.tm_year) {
+ if ((n = get_year (token->start, token->len)) != -1) {
+ d(printf ("2-digit year; "));
+ tm.tm_year = n - 1900;
+ }
+ goto next;
+ }
+ }
+ }
+
+ d(printf ("???; "));
+
+ next:
+
+ token = token->next;
+ }
+
+ d(printf ("\n"));
+
+ t = mktime_utc (&tm);
+
+ /* t is now GMT of the time we want, but not offset by the timezone ... */
+
+ /* this should convert the time to the GMT equiv time */
+ t -= ((offset / 100) * 60 * 60) + (offset % 100) * 60;
+
+ if (tzone)
+ *tzone = offset;
+
+ return t;
+}
+
+#if 0
+static void
+gmime_datetok_table_init (void)
+{
+ int i;
+
+ memset (gmime_datetok_table, 0, sizeof (gmime_datetok_table));
+
+ for (i = 0; i < 256; i++) {
+ if (!strchr (NUMERIC_CHARS, i))
+ gmime_datetok_table[i] |= DATE_TOKEN_NON_NUMERIC;
+
+ if (!strchr (WEEKDAY_CHARS, i))
+ gmime_datetok_table[i] |= DATE_TOKEN_NON_WEEKDAY;
+
+ if (!strchr (MONTH_CHARS, i))
+ gmime_datetok_table[i] |= DATE_TOKEN_NON_MONTH;
+
+ if (!strchr (TIME_CHARS, i))
+ gmime_datetok_table[i] |= DATE_TOKEN_NON_TIME;
+
+ if (!strchr (TIMEZONE_ALPHA_CHARS, i))
+ gmime_datetok_table[i] |= DATE_TOKEN_NON_TIMEZONE_ALPHA;
+
+ if (!strchr (TIMEZONE_NUMERIC_CHARS, i))
+ gmime_datetok_table[i] |= DATE_TOKEN_NON_TIMEZONE_NUMERIC;
+
+ if (((char) i) == ':')
+ gmime_datetok_table[i] |= DATE_TOKEN_HAS_COLON;
+
+ if (strchr ("+-", i))
+ gmime_datetok_table[i] |= DATE_TOKEN_HAS_SIGN;
+ }
+
+ printf ("static unsigned char gmime_datetok_table[256] = {");
+ for (i = 0; i < 256; i++) {
+ if (i % 16 == 0)
+ printf ("\n\t");
+ printf ("%3d,", gmime_datetok_table[i]);
+ }
+ printf ("\n};\n");
+}
+#endif
+
+
+/**
+ * g_mime_utils_header_decode_date:
+ * @str: input date string
+ * @tz_offset: timezone offset
+ *
+ * Decodes the rfc822 date string and saves the GMT offset into
+ * @tz_offset if non-NULL.
+ *
+ * Returns: the time_t representation of the date string specified by
+ * @str or (time_t) %0 on error. If @tz_offset is non-NULL, the value
+ * of the timezone offset will be stored.
+ **/
+time_t
+g_mime_utils_header_decode_date (const char *str, int *tz_offset)
+{
+ date_token *token, *tokens;
+ time_t date;
+
+ if (!(tokens = datetok (str))) {
+ if (tz_offset)
+ *tz_offset = 0;
+
+ return (time_t) 0;
+ }
+
+ if (!(date = parse_rfc822_date (tokens, tz_offset)))
+ date = parse_broken_date (tokens, tz_offset);
+
+ /* cleanup */
+ while (tokens) {
+ token = tokens;
+ tokens = tokens->next;
+ date_token_free (token);
+ }
+
+ return date;
+}
+
+
+/**
+ * g_mime_utils_generate_message_id:
+ * @fqdn: Fully qualified domain name
+ *
+ * Generates a unique Message-Id.
+ *
+ * Returns: a unique string in an addr-spec format suitable for use as
+ * a Message-Id.
+ **/
+char *
+g_mime_utils_generate_message_id (const char *fqdn)
+{
+#ifdef G_THREADS_ENABLED
+ static GStaticMutex mutex = G_STATIC_MUTEX_INIT;
+#define MUTEX_LOCK() g_static_mutex_lock (&mutex)
+#define MUTEX_UNLOCK() g_static_mutex_unlock (&mutex)
+#else
+#define MUTEX_LOCK()
+#define MUTEX_UNLOCK()
+#endif
+ static unsigned long int count = 0;
+ const char *hostname = NULL;
+ char *name = NULL;
+ char *msgid;
+
+ if (!fqdn) {
+#ifdef HAVE_UTSNAME_DOMAINNAME
+ struct utsname unam;
+
+ uname (&unam);
+
+ hostname = unam.nodename;
+
+ if (unam.domainname[0])
+ name = g_strdup_printf ("%s.%s", hostname, unam.domainname);
+#else /* ! HAVE_UTSNAME_DOMAINNAME */
+ char host[MAXHOSTNAMELEN + 1];
+
+#ifdef HAVE_GETHOSTNAME
+ host[MAXHOSTNAMELEN] = '\0';
+ if (gethostname (host, MAXHOSTNAMELEN) == 0) {
+#ifdef HAVE_GETDOMAINNAME
+ size_t domainlen = MAXHOSTNAMELEN;
+ char *domain;
+ int rv;
+
+ domain = g_malloc (domainlen);
+
+ while ((rv = getdomainname (domain, domainlen)) == -1 && errno == EINVAL) {
+ domainlen += MAXHOSTNAMELEN;
+ domain = g_realloc (domain, domainlen);
+ }
+
+ if (rv == 0 && domain[0]) {
+ if (host[0]) {
+ name = g_strdup_printf ("%s.%s", host, domain);
+ g_free (domain);
+ } else {
+ name = domain;
+ }
+ }
+#endif /* HAVE_GETDOMAINNAME */
+ } else {
+ host[0] = '\0';
+ }
+#endif /* HAVE_GETHOSTNAME */
+ hostname = host;
+#endif /* HAVE_UTSNAME_DOMAINNAME */
+
+#ifdef HAVE_GETADDRINFO
+ if (!name && hostname[0]) {
+ /* we weren't able to get a domain name */
+ struct addrinfo hints, *res;
+
+ memset (&hints, 0, sizeof (hints));
+ hints.ai_flags = AI_CANONNAME;
+
+ if (getaddrinfo (hostname, NULL, &hints, &res) == 0) {
+ name = g_strdup (res->ai_canonname);
+ freeaddrinfo (res);
+ }
+ }
+#endif /* HAVE_GETADDRINFO */
+
+ fqdn = name != NULL ? name : (hostname[0] ? hostname : "localhost.localdomain");
+ }
+
+ MUTEX_LOCK ();
+ msgid = g_strdup_printf ("%lu.%lu.%lu@%s", (unsigned long int) time (NULL),
+ (unsigned long int) getpid (), count++, fqdn);
+ MUTEX_UNLOCK ();
+
+ g_free (name);
+
+ return msgid;
+}
+
+static char *
+decode_addrspec (const char **in)
+{
+ const char *word, *inptr;
+ GString *addrspec;
+ char *str;
+
+ decode_lwsp (in);
+ inptr = *in;
+
+ if (!(word = decode_word (&inptr))) {
+ w(g_warning ("No local-part in addr-spec: %s", *in));
+ return NULL;
+ }
+
+ addrspec = g_string_new ("");
+ g_string_append_len (addrspec, word, (size_t) (inptr - word));
+
+ /* get the rest of the local-part */
+ decode_lwsp (&inptr);
+ while (*inptr == '.') {
+ g_string_append_c (addrspec, *inptr++);
+ if ((word = decode_word (&inptr))) {
+ g_string_append_len (addrspec, word, (size_t) (inptr - word));
+ decode_lwsp (&inptr);
+ } else {
+ w(g_warning ("Invalid local-part in addr-spec: %s", *in));
+ goto exception;
+ }
+ }
+
+ /* we should be at the '@' now... */
+ if (*inptr++ != '@') {
+ w(g_warning ("Invalid addr-spec; missing '@': %s", *in));
+ goto exception;
+ }
+
+ g_string_append_c (addrspec, '@');
+ if (!decode_domain (&inptr, addrspec)) {
+ w(g_warning ("No domain in addr-spec: %s", *in));
+ goto exception;
+ }
+
+ str = addrspec->str;
+ g_string_free (addrspec, FALSE);
+
+ *in = inptr;
+
+ return str;
+
+ exception:
+
+ g_string_free (addrspec, TRUE);
+
+ return NULL;
+}
+
+static char *
+decode_msgid (const char **in)
+{
+ const char *inptr = *in;
+ char *msgid = NULL;
+
+ decode_lwsp (&inptr);
+ if (*inptr != '<') {
+ w(g_warning ("Invalid msg-id; missing '<': %s", *in));
+ } else {
+ inptr++;
+ }
+
+ decode_lwsp (&inptr);
+ if ((msgid = decode_addrspec (&inptr))) {
+ decode_lwsp (&inptr);
+ if (*inptr != '>') {
+ w(g_warning ("Invalid msg-id; missing '>': %s", *in));
+ } else {
+ inptr++;
+ }
+
+ *in = inptr;
+ } else {
+ w(g_warning ("Invalid msg-id; missing addr-spec: %s", *in));
+ *in = inptr;
+ while (*inptr && *inptr != '>')
+ inptr++;
+
+ msgid = g_strndup (*in, (size_t) (inptr - *in));
+ *in = inptr;
+ }
+
+ return msgid;
+}
+
+
+/**
+ * g_mime_utils_decode_message_id:
+ * @message_id: string containing a message-id
+ *
+ * Decodes a msg-id as defined by rfc822.
+ *
+ * Returns: the addr-spec portion of the msg-id.
+ **/
+char *
+g_mime_utils_decode_message_id (const char *message_id)
+{
+ g_return_val_if_fail (message_id != NULL, NULL);
+
+ return decode_msgid (&message_id);
+}
+
+
+/**
+ * g_mime_references_decode:
+ * @text: string containing a list of msg-ids
+ *
+ * Decodes a list of msg-ids as in the References and/or In-Reply-To
+ * headers defined in rfc822.
+ *
+ * Returns: a list of referenced msg-ids.
+ **/
+GMimeReferences *
+g_mime_references_decode (const char *text)
+{
+ GMimeReferences *refs, *tail, *ref;
+ const char *word, *inptr = text;
+ char *msgid;
+
+ g_return_val_if_fail (text != NULL, NULL);
+
+ refs = NULL;
+ tail = (GMimeReferences *) &refs;
+
+ while (*inptr) {
+ decode_lwsp (&inptr);
+ if (*inptr == '<') {
+ /* looks like a msg-id */
+ if ((msgid = decode_msgid (&inptr))) {
+ ref = g_new (GMimeReferences, 1);
+ ref->next = NULL;
+ ref->msgid = msgid;
+ tail->next = ref;
+ tail = ref;
+ } else {
+ w(g_warning ("Invalid References header: %s", inptr));
+ break;
+ }
+ } else if (*inptr) {
+ /* looks like part of a phrase */
+ if (!(word = decode_word (&inptr))) {
+ w(g_warning ("Invalid References header: %s", inptr));
+ break;
+ }
+ }
+ }
+
+ return refs;
+}
+
+
+/**
+ * g_mime_references_append:
+ * @refs: the address of a #GMimeReferences list
+ * @msgid: a message-id string
+ *
+ * Appends a reference to msgid to the list of references.
+ **/
+void
+g_mime_references_append (GMimeReferences **refs, const char *msgid)
+{
+ GMimeReferences *ref;
+
+ g_return_if_fail (refs != NULL);
+ g_return_if_fail (msgid != NULL);
+
+ ref = (GMimeReferences *) refs;
+ while (ref->next)
+ ref = ref->next;
+
+ ref->next = g_new (GMimeReferences, 1);
+ ref->next->msgid = g_strdup (msgid);
+ ref->next->next = NULL;
+}
+
+
+/**
+ * g_mime_references_free:
+ * @refs: a #GMimeReferences list
+ *
+ * Frees the #GMimeReferences list.
+ **/
+void
+g_mime_references_free (GMimeReferences *refs)
+{
+ GMimeReferences *ref, *next;
+
+ ref = refs;
+ while (ref) {
+ next = ref->next;
+ g_free (ref->msgid);
+ g_free (ref);
+ ref = next;
+ }
+}
+
+
+/**
+ * g_mime_references_clear:
+ * @refs: address of a #GMimeReferences list
+ *
+ * Clears the #GMimeReferences list and resets it to %NULL.
+ **/
+void
+g_mime_references_clear (GMimeReferences **refs)
+{
+ g_return_if_fail (refs != NULL);
+
+ g_mime_references_free (*refs);
+ *refs = NULL;
+}
+
+
+/**
+ * g_mime_references_get_next:
+ * @ref: a #GMimeReferences list
+ *
+ * Advances to the next reference node in the #GMimeReferences list.
+ *
+ * Returns: the next reference node in the #GMimeReferences list.
+ **/
+const GMimeReferences *
+g_mime_references_get_next (const GMimeReferences *ref)
+{
+ return ref ? ref->next : NULL;
+}
+
+
+/**
+ * g_mime_references_get_message_id:
+ * @ref: a #GMimeReferences list
+ *
+ * Gets the Message-Id reference from the #GMimeReferences node.
+ *
+ * Returns: the Message-Id reference from the #GMimeReferences node.
+ **/
+const char *
+g_mime_references_get_message_id (const GMimeReferences *ref)
+{
+ return ref ? ref->msgid : NULL;
+}
+
+
+static gboolean
+is_rfc2047_token (const char *inptr, size_t len)
+{
+ if (len < 8 || strncmp (inptr, "=?", 2) != 0 || strncmp (inptr + len - 2, "?=", 2) != 0)
+ return FALSE;
+
+ inptr += 2;
+ len -= 2;
+
+ /* skip past the charset */
+ while (*inptr != '?' && len > 0) {
+ inptr++;
+ len--;
+ }
+
+ if (*inptr != '?' || len < 4)
+ return FALSE;
+
+ if (inptr[1] != 'q' && inptr[1] != 'Q' && inptr[1] != 'b' && inptr[1] != 'B')
+ return FALSE;
+
+ inptr += 2;
+ len -= 2;
+
+ if (*inptr != '?')
+ return FALSE;
+
+ return TRUE;
+}
+
+static char *
+header_fold (const char *in, gboolean structured)
+{
+ gboolean last_was_lwsp = FALSE;
+ register const char *inptr;
+ size_t len, outlen, i;
+ size_t fieldlen;
+ GString *out;
+ char *ret;
+
+ inptr = in;
+ len = strlen (in);
+ if (len <= GMIME_FOLD_LEN + 1)
+ return g_strdup (in);
+
+ out = g_string_new ("");
+ fieldlen = strcspn (inptr, ": \t\n");
+ g_string_append_len (out, inptr, fieldlen);
+ outlen = fieldlen;
+ inptr += fieldlen;
+
+ while (*inptr && *inptr != '\n') {
+ len = strcspn (inptr, " \t\n");
+
+ if (len > 1 && outlen + len > GMIME_FOLD_LEN) {
+ if (outlen > 1 && out->len > fieldlen + 2) {
+ if (last_was_lwsp) {
+ if (structured)
+ out->str[out->len - 1] = '\t';
+
+ g_string_insert_c (out, out->len - 1, '\n');
+ } else
+ g_string_append (out, "\n\t");
+ outlen = 1;
+ }
+
+ if (!structured && !is_rfc2047_token (inptr, len)) {
+ /* check for very long words, just cut them up */
+ while (outlen + len > GMIME_FOLD_LEN) {
+ for (i = 0; i < GMIME_FOLD_LEN - outlen; i++)
+ g_string_append_c (out, inptr[i]);
+ inptr += GMIME_FOLD_LEN - outlen;
+ len -= GMIME_FOLD_LEN - outlen;
+ g_string_append (out, "\n\t");
+ outlen = 1;
+ }
+ } else {
+ g_string_append_len (out, inptr, len);
+ outlen += len;
+ inptr += len;
+ }
+ last_was_lwsp = FALSE;
+ } else if (len > 0) {
+ g_string_append_len (out, inptr, len);
+ outlen += len;
+ inptr += len;
+ last_was_lwsp = FALSE;
+ } else {
+ last_was_lwsp = TRUE;
+ if (*inptr == '\t') {
+ /* tabs are a good place to fold, odds
+ are that this is where the previous
+ mailer folded it */
+ g_string_append (out, "\n\t");
+ outlen = 1;
+ while (is_blank (*inptr))
+ inptr++;
+ } else {
+ g_string_append_c (out, *inptr++);
+ outlen++;
+ }
+ }
+ }
+
+ if (*inptr == '\n' && out->str[out->len - 1] != '\n')
+ g_string_append_c (out, '\n');
+
+ ret = out->str;
+ g_string_free (out, FALSE);
+
+ return ret;
+}
+
+
+/**
+ * g_mime_utils_structured_header_fold:
+ * @str: input string
+ *
+ * Folds a structured header according to the rules in rfc822.
+ *
+ * Returns: an allocated string containing the folded header.
+ **/
+char *
+g_mime_utils_structured_header_fold (const char *str)
+{
+ return header_fold (str, TRUE);
+}
+
+
+/**
+ * g_mime_utils_unstructured_header_fold:
+ * @str: input string
+ *
+ * Folds an unstructured header according to the rules in rfc822.
+ *
+ * Returns: an allocated string containing the folded header.
+ **/
+char *
+g_mime_utils_unstructured_header_fold (const char *str)
+{
+ return header_fold (str, FALSE);
+}
+
+
+/**
+ * g_mime_utils_header_fold:
+ * @str: input string
+ *
+ * Folds a structured header according to the rules in rfc822.
+ *
+ * Returns: an allocated string containing the folded header.
+ **/
+char *
+g_mime_utils_header_fold (const char *str)
+{
+ return header_fold (str, TRUE);
+}
+
+
+/**
+ * g_mime_utils_header_printf:
+ * @format: string format
+ * @Varargs: arguments
+ *
+ * Allocates a buffer containing a formatted header specified by the
+ * @Varargs.
+ *
+ * Returns: an allocated string containing the folded header specified
+ * by @format and the following arguments.
+ **/
+char *
+g_mime_utils_header_printf (const char *format, ...)
+{
+ char *buf, *ret;
+ va_list ap;
+
+ va_start (ap, format);
+ buf = g_strdup_vprintf (format, ap);
+ va_end (ap);
+
+ ret = header_fold (buf, TRUE);
+ g_free (buf);
+
+ return ret;
+}
+
+static gboolean
+need_quotes (const char *string)
+{
+ gboolean quoted = FALSE;
+ const char *inptr;
+
+ inptr = string;
+
+ while (*inptr) {
+ if (*inptr == '\\')
+ inptr++;
+ else if (*inptr == '"')
+ quoted = !quoted;
+ else if (!quoted && (is_tspecial (*inptr) || *inptr == '.'))
+ return TRUE;
+
+ if (*inptr)
+ inptr++;
+ }
+
+ return FALSE;
+}
+
+/**
+ * g_mime_utils_quote_string:
+ * @str: input string
+ *
+ * Quotes @string as needed according to the rules in rfc2045.
+ *
+ * Returns: an allocated string containing the escaped and quoted (if
+ * needed to be) input string. The decision to quote the string is
+ * based on whether or not the input string contains any 'tspecials'
+ * as defined by rfc2045.
+ **/
+char *
+g_mime_utils_quote_string (const char *str)
+{
+ gboolean quote;
+ const char *c;
+ char *qstring;
+ GString *out;
+
+ out = g_string_new ("");
+
+ if ((quote = need_quotes (str)))
+ g_string_append_c (out, '"');
+
+ for (c = str; *c; c++) {
+ if ((*c == '"' && quote) || *c == '\\')
+ g_string_append_c (out, '\\');
+
+ g_string_append_c (out, *c);
+ }
+
+ if (quote)
+ g_string_append_c (out, '"');
+
+ qstring = out->str;
+ g_string_free (out, FALSE);
+
+ return qstring;
+}
+
+
+/**
+ * g_mime_utils_unquote_string:
+ * @str: input string
+ *
+ * Unquotes and unescapes a string.
+ **/
+void
+g_mime_utils_unquote_string (char *str)
+{
+ /* if the string is quoted, unquote it */
+ register char *inptr = str;
+ int escaped = FALSE;
+ int quoted = FALSE;
+
+ if (!str)
+ return;
+
+ while (*inptr) {
+ if (*inptr == '\\') {
+ if (escaped)
+ *str++ = *inptr++;
+ else
+ inptr++;
+ escaped = !escaped;
+ } else if (*inptr == '"') {
+ if (escaped) {
+ *str++ = *inptr++;
+ escaped = FALSE;
+ } else {
+ quoted = !quoted;
+ inptr++;
+ }
+ } else {
+ *str++ = *inptr++;
+ escaped = FALSE;
+ }
+ }
+
+ *str = '\0';
+}
+
+
+/**
+ * g_mime_utils_text_is_8bit:
+ * @text: text to check for 8bit chars
+ * @len: text length
+ *
+ * Determines if @text contains 8bit characters within the first @len
+ * bytes.
+ *
+ * Returns: %TRUE if the text contains 8bit characters or %FALSE
+ * otherwise.
+ **/
+gboolean
+g_mime_utils_text_is_8bit (const unsigned char *text, size_t len)
+{
+ register const unsigned char *inptr;
+ const unsigned char *inend;
+
+ g_return_val_if_fail (text != NULL, FALSE);
+
+ inend = text + len;
+ for (inptr = text; *inptr && inptr < inend; inptr++)
+ if (*inptr > (unsigned char) 127)
+ return TRUE;
+
+ return FALSE;
+}
+
+
+/**
+ * g_mime_utils_best_encoding:
+ * @text: text to encode
+ * @len: text length
+ *
+ * Determines the best content encoding for the first @len bytes of
+ * @text.
+ *
+ * Returns: a #GMimeContentEncoding that is determined to be the best
+ * encoding type for the specified block of text. ("best" in this
+ * particular case means smallest output size)
+ **/
+GMimeContentEncoding
+g_mime_utils_best_encoding (const unsigned char *text, size_t len)
+{
+ const unsigned char *ch, *inend;
+ size_t count = 0;
+
+ inend = text + len;
+ for (ch = text; ch < inend; ch++)
+ if (*ch > (unsigned char) 127)
+ count++;
+
+ if ((float) count <= len * 0.17)
+ return GMIME_CONTENT_ENCODING_QUOTEDPRINTABLE;
+ else
+ return GMIME_CONTENT_ENCODING_BASE64;
+}
+
+
+/**
+ * charset_convert:
+ * @cd: iconv converter
+ * @inbuf: input text buffer to convert
+ * @inleft: length of the input buffer
+ * @outp: pointer to output buffer
+ * @outlenp: pointer to output buffer length
+ * @ninval: the number of invalid bytes in @inbuf
+ *
+ * Converts the input buffer from one charset to another using the
+ * @cd. On completion, @outp will point to the output buffer
+ * containing the converted text (nul-terminated), @outlenp will be
+ * the size of the @outp buffer (note: not the strlen() of @outp) and
+ * @ninval will contain the number of bytes which could not be
+ * converted.
+ *
+ * Bytes which cannot be converted from @inbuf will appear as '?'
+ * characters in the output buffer.
+ *
+ * If *@outp is non-NULL, then it is assumed that it points to a
+ * pre-allocated buffer of length *@outlenp. This is done so that the
+ * same output buffer can be reused multiple times.
+ *
+ * Returns: the string length of the output buffer.
+ **/
+static size_t
+charset_convert (iconv_t cd, const char *inbuf, size_t inleft, char **outp, size_t *outlenp, size_t *ninval)
+{
+ size_t outlen, outleft, rc, n = 0;
+ char *outbuf, *out;
+
+ if (*outp == NULL) {
+ outleft = outlen = (inleft * 2) + 16;
+ outbuf = out = g_malloc (outlen + 1);
+ } else {
+ outleft = outlen = *outlenp;
+ outbuf = out = *outp;
+ }
+
+ do {
+ rc = iconv (cd, (char **) &inbuf, &inleft, &outbuf, &outleft);
+ if (rc == (size_t) -1) {
+ if (errno == EINVAL) {
+ /* incomplete sequence at the end of the input buffer */
+ n += inleft;
+ break;
+ }
+
+#ifdef G_OS_WIN32
+ /* seems that GnuWin32's libiconv 1.9 does not set errno in
+ * the E2BIG case, so we have to fake it */
+ if (outleft <= inleft)
+ errno = E2BIG;
+#endif
+
+ if (errno == E2BIG) {
+ /* need to grow the output buffer */
+ outlen += (inleft * 2) + 16;
+ rc = (size_t) (outbuf - out);
+ out = g_realloc (out, outlen + 1);
+ outleft = outlen - rc;
+ outbuf = out + rc;
+ } else {
+ /* invalid byte(-sequence) in the input buffer */
+ *outbuf++ = '?';
+ outleft--;
+ inleft--;
+ inbuf++;
+ n++;
+ }
+ }
+ } while (inleft > 0);
+
+ iconv (cd, NULL, NULL, &outbuf, &outleft);
+ *outbuf++ = '\0';
+
+ *outlenp = outlen;
+ *outp = out;
+ *ninval = n;
+
+ return (outbuf - out);
+}
+
+
+#define USER_CHARSETS_INCLUDE_UTF8 (1 << 0)
+#define USER_CHARSETS_INCLUDE_LOCALE (1 << 1)
+
+
+/**
+ * g_mime_utils_decode_8bit:
+ * @text: input text in unknown 8bit/multibyte character set
+ * @len: input text length
+ *
+ * Attempts to convert text in an unknown 8bit/multibyte charset into
+ * UTF-8 by finding the charset which will convert the most bytes into
+ * valid UTF-8 characters as possible. If no exact match can be found,
+ * it will choose the best match and convert invalid byte sequences
+ * into question-marks (?) in the returned string buffer.
+ *
+ * Returns: a UTF-8 string representation of @text.
+ **/
+char *
+g_mime_utils_decode_8bit (const char *text, size_t len)
+{
+ const char **charsets, **user_charsets, *locale, *best;
+ size_t outleft, outlen, min, ninval;
+ unsigned int included = 0;
+ iconv_t cd;
+ char *out;
+ int i = 0;
+
+ g_return_val_if_fail (text != NULL, NULL);
+
+ locale = g_mime_locale_charset ();
+ if (locale && !g_ascii_strcasecmp (locale, "UTF-8"))
+ included |= USER_CHARSETS_INCLUDE_LOCALE;
+
+ if ((user_charsets = g_mime_user_charsets ())) {
+ while (user_charsets[i])
+ i++;
+ }
+
+ charsets = g_alloca (sizeof (char *) * (i + 3));
+ i = 0;
+
+ if (user_charsets) {
+ while (user_charsets[i]) {
+ /* keep a record of whether or not the user-supplied
+ * charsets include UTF-8 and/or the default fallback
+ * charset so that we avoid doubling our efforts for
+ * these 2 charsets. We could have used a hash table
+ * to keep track of unique charsets, but we can
+ * (hopefully) assume that user_charsets is a unique
+ * list of charsets with no duplicates. */
+ if (!g_ascii_strcasecmp (user_charsets[i], "UTF-8"))
+ included |= USER_CHARSETS_INCLUDE_UTF8;
+
+ if (locale && !g_ascii_strcasecmp (user_charsets[i], locale))
+ included |= USER_CHARSETS_INCLUDE_LOCALE;
+
+ charsets[i] = user_charsets[i];
+ i++;
+ }
+ }
+
+ if (!(included & USER_CHARSETS_INCLUDE_UTF8))
+ charsets[i++] = "UTF-8";
+
+ if (!(included & USER_CHARSETS_INCLUDE_LOCALE))
+ charsets[i++] = locale;
+
+ charsets[i] = NULL;
+
+ min = len;
+ best = charsets[0];
+
+ outleft = (len * 2) + 16;
+ out = g_malloc (outleft + 1);
+
+ for (i = 0; charsets[i]; i++) {
+ if ((cd = g_mime_iconv_open ("UTF-8", charsets[i])) == (iconv_t) -1)
+ continue;
+
+ outlen = charset_convert (cd, text, len, &out, &outleft, &ninval);
+
+ g_mime_iconv_close (cd);
+
+ if (ninval == 0)
+ return g_realloc (out, outlen + 1);
+
+ if (ninval < min) {
+ best = charsets[i];
+ min = ninval;
+ }
+ }
+
+ /* if we get here, then none of the charsets fit the 8bit text flawlessly...
+ * try to find the one that fit the best and use that to convert what we can,
+ * replacing any byte we can't convert with a '?' */
+
+ if ((cd = g_mime_iconv_open ("UTF-8", best)) == (iconv_t) -1) {
+ /* this shouldn't happen... but if we are here, then
+ * it did... the only thing we can do at this point
+ * is replace the 8bit garbage and pray */
+ register const char *inptr = text;
+ const char *inend = inptr + len;
+ char *outbuf = out;
+
+ while (inptr < inend) {
+ if (is_ascii (*inptr))
+ *outbuf++ = *inptr++;
+ else
+ *outbuf++ = '?';
+ }
+
+ *outbuf++ = '\0';
+
+ return g_realloc (out, (size_t) (outbuf - out));
+ }
+
+ outlen = charset_convert (cd, text, len, &out, &outleft, &ninval);
+
+ g_mime_iconv_close (cd);
+
+ return g_realloc (out, outlen + 1);
+}
+
+
+/* this decodes rfc2047's version of quoted-printable */
+static ssize_t
+quoted_decode (const unsigned char *in, size_t len, unsigned char *out)
+{
+ register const unsigned char *inptr;
+ register unsigned char *outptr;
+ const unsigned char *inend;
+ unsigned char c, c1;
+
+ inend = in + len;
+ outptr = out;
+
+ inptr = in;
+ while (inptr < inend) {
+ c = *inptr++;
+ if (c == '=') {
+ if (inend - inptr >= 2) {
+ c = toupper (*inptr++);
+ c1 = toupper (*inptr++);
+ *outptr++ = (((c >= 'A' ? c - 'A' + 10 : c - '0') & 0x0f) << 4)
+ | ((c1 >= 'A' ? c1 - 'A' + 10 : c1 - '0') & 0x0f);
+ } else {
+ /* data was truncated */
+ return -1;
+ }
+ } else if (c == '_') {
+ /* _'s are an rfc2047 shortcut for encoding spaces */
+ *outptr++ = ' ';
+ } else {
+ *outptr++ = c;
+ }
+ }
+
+ return (ssize_t) (outptr - out);
+}
+
+#define is_rfc2047_encoded_word(atom, len) (len >= 7 && !strncmp (atom, "=?", 2) && !strncmp (atom + len - 2, "?=", 2))
+
+static char *
+rfc2047_decode_word (const char *in, size_t inlen)
+{
+ const unsigned char *instart = (const unsigned char *) in;
+ const register unsigned char *inptr = instart + 2;
+ const unsigned char *inend = instart + inlen - 2;
+ unsigned char *decoded;
+ const char *charset;
+ size_t len, ninval;
+ char *charenc, *p;
+ guint32 save = 0;
+ ssize_t declen;
+ int state = 0;
+ iconv_t cd;
+ char *buf;
+
+ /* skip over the charset */
+ if (!(inptr = memchr (inptr, '?', inend - inptr)) || inptr[2] != '?')
+ return NULL;
+
+ inptr++;
+
+ switch (*inptr) {
+ case 'B':
+ case 'b':
+ inptr += 2;
+ len = (size_t) (inend - inptr);
+ decoded = g_alloca (len);
+ declen = g_mime_encoding_base64_decode_step (inptr, len, decoded, &state, &save);
+
+ if (declen == -1) {
+ d(fprintf (stderr, "encountered broken 'Q' encoding\n"));
+ return NULL;
+ }
+ break;
+ case 'Q':
+ case 'q':
+ inptr += 2;
+ len = (size_t) (inend - inptr);
+ decoded = g_alloca (len);
+ declen = quoted_decode (inptr, len, decoded);
+
+ if (declen == -1) {
+ d(fprintf (stderr, "encountered broken 'Q' encoding\n"));
+ return NULL;
+ }
+ break;
+ default:
+ d(fprintf (stderr, "unknown encoding\n"));
+ return NULL;
+ }
+
+ len = (inptr - 3) - (instart + 2);
+ charenc = g_alloca (len + 1);
+ memcpy (charenc, in + 2, len);
+ charenc[len] = '\0';
+ charset = charenc;
+
+ /* rfc2231 updates rfc2047 encoded words...
+ * The ABNF given in RFC 2047 for encoded-words is:
+ * encoded-word := "=?" charset "?" encoding "?" encoded-text "?="
+ * This specification changes this ABNF to:
+ * encoded-word := "=?" charset ["*" language] "?" encoding "?" encoded-text "?="
+ */
+
+ /* trim off the 'language' part if it's there... */
+ if ((p = strchr (charset, '*')))
+ *p = '\0';
+
+ /* slight optimization? */
+ if (!g_ascii_strcasecmp (charset, "UTF-8")) {
+ p = (char *) decoded;
+ len = declen;
+
+ //while (!g_utf8_validate (p, len, (const char **) &p)) {
+ // len = declen - (p - (char *) decoded);
+ // *p = '?';
+ //}
+
+ return g_strndup ((char *) decoded, declen);
+ }
+
+ if (!charset[0] || (cd = g_mime_iconv_open ("UTF-8", charset)) == (iconv_t) -1) {
+ w(g_warning ("Cannot convert from %s to UTF-8, header display may "
+ "be corrupt: %s", charset[0] ? charset : "unspecified charset",
+ g_strerror (errno)));
+
+ return g_mime_utils_decode_8bit ((char *) decoded, declen);
+ }
+
+ len = declen;
+ buf = g_malloc (len + 1);
+
+ charset_convert (cd, (char *) decoded, declen, &buf, &len, &ninval);
+
+ g_mime_iconv_close (cd);
+
+#if w(!)0
+ if (ninval > 0) {
+ g_warning ("Failed to completely convert \"%.*s\" to UTF-8, display may be "
+ "corrupt: %s", declen, decoded, g_strerror (errno));
+ }
+#endif
+
+ return buf;
+}
+
+
+/**
+ * g_mime_utils_header_decode_text:
+ * @text: header text to decode
+ *
+ * Decodes an rfc2047 encoded 'text' header.
+ *
+ * Note: See g_mime_set_user_charsets() for details on how charset
+ * conversion is handled for unencoded 8bit text and/or wrongly
+ * specified rfc2047 encoded-word tokens.
+ *
+ * Returns: a newly allocated UTF-8 string representing the the decoded
+ * header.
+ **/
+char *
+g_mime_utils_header_decode_text (const char *text)
+{
+ gboolean enable_rfc2047_workarounds = _g_mime_enable_rfc2047_workarounds ();
+ register const char *inptr = text;
+ gboolean encoded = FALSE;
+ const char *lwsp, *word;
+ size_t nlwsp, n;
+ gboolean ascii;
+ char *decoded;
+ GString *out;
+
+ if (text == NULL)
+ return g_strdup ("");
+
+ out = g_string_sized_new (strlen (text) + 1);
+
+ while (*inptr != '\0') {
+ lwsp = inptr;
+ while (is_lwsp (*inptr))
+ inptr++;
+
+ nlwsp = (size_t) (inptr - lwsp);
+
+ if (*inptr != '\0') {
+ word = inptr;
+ ascii = TRUE;
+
+ if (enable_rfc2047_workarounds) {
+ if (!strncmp (inptr, "=?", 2)) {
+ inptr += 2;
+
+ /* skip past the charset (if one is even declared, sigh) */
+ while (*inptr && *inptr != '?') {
+ ascii = ascii && is_ascii (*inptr);
+ inptr++;
+ }
+
+ /* sanity check encoding type */
+ if (inptr[0] != '?' || !strchr ("BbQq", inptr[1]) || inptr[2] != '?')
+ goto non_rfc2047;
+
+ inptr += 3;
+
+ /* find the end of the rfc2047 encoded word token */
+ while (*inptr && strncmp (inptr, "?=", 2) != 0) {
+ ascii = ascii && is_ascii (*inptr);
+ inptr++;
+ }
+
+ if (!strncmp (inptr, "?=", 2))
+ inptr += 2;
+ } else {
+ non_rfc2047:
+ /* stop if we encounter a possible rfc2047 encoded
+ * token even if it's inside another word, sigh. */
+ while (*inptr && !is_lwsp (*inptr) &&
+ strncmp (inptr, "=?", 2) != 0) {
+ ascii = ascii && is_ascii (*inptr);
+ inptr++;
+ }
+ }
+ } else {
+ while (*inptr && !is_lwsp (*inptr)) {
+ ascii = ascii && is_ascii (*inptr);
+ inptr++;
+ }
+ }
+
+ n = (size_t) (inptr - word);
+ if (is_rfc2047_encoded_word (word, n)) {
+ if ((decoded = rfc2047_decode_word (word, n))) {
+ /* rfc2047 states that you must ignore all
+ * whitespace between encoded words */
+ if (!encoded)
+ g_string_append_len (out, lwsp, nlwsp);
+
+ g_string_append (out, decoded);
+ g_free (decoded);
+
+ encoded = TRUE;
+ } else {
+ /* append lwsp and invalid rfc2047 encoded-word token */
+ g_string_append_len (out, lwsp, nlwsp + n);
+ encoded = FALSE;
+ }
+ } else {
+ /* append lwsp */
+ g_string_append_len (out, lwsp, nlwsp);
+
+ /* append word token */
+ if (!ascii) {
+ /* *sigh* I hate broken mailers... */
+ decoded = g_mime_utils_decode_8bit (word, n);
+ g_string_append (out, decoded);
+ g_free (decoded);
+ } else {
+ g_string_append_len (out, word, n);
+ }
+
+ encoded = FALSE;
+ }
+ } else {
+ /* appending trailing lwsp */
+ g_string_append_len (out, lwsp, nlwsp);
+ break;
+ }
+ }
+
+ decoded = out->str;
+ g_string_free (out, FALSE);
+
+ return decoded;
+}
+
+
+/**
+ * g_mime_utils_header_decode_phrase:
+ * @phrase: header to decode
+ *
+ * Decodes an rfc2047 encoded 'phrase' header.
+ *
+ * Note: See g_mime_set_user_charsets() for details on how charset
+ * conversion is handled for unencoded 8bit text and/or wrongly
+ * specified rfc2047 encoded-word tokens.
+ *
+ * Returns: a newly allocated UTF-8 string representing the the decoded
+ * header.
+ **/
+char *
+g_mime_utils_header_decode_phrase (const char *phrase)
+{
+ register const char *inptr = phrase;
+ gboolean encoded = FALSE;
+ const char *lwsp, *text;
+ size_t nlwsp, n;
+ gboolean ascii;
+ char *decoded;
+ GString *out;
+
+ if (phrase == NULL)
+ return g_strdup ("");
+
+ out = g_string_sized_new (strlen (phrase) + 1);
+
+ while (*inptr != '\0') {
+ lwsp = inptr;
+ while (is_lwsp (*inptr))
+ inptr++;
+
+ nlwsp = (size_t) (inptr - lwsp);
+
+ text = inptr;
+ if (is_atom (*inptr)) {
+ while (is_atom (*inptr))
+ inptr++;
+
+ n = (size_t) (inptr - text);
+ if (is_rfc2047_encoded_word (text, n)) {
+ if ((decoded = rfc2047_decode_word (text, n))) {
+ /* rfc2047 states that you must ignore all
+ * whitespace between encoded words */
+ if (!encoded)
+ g_string_append_len (out, lwsp, nlwsp);
+
+ g_string_append (out, decoded);
+ g_free (decoded);
+
+ encoded = TRUE;
+ } else {
+ /* append lwsp and invalid rfc2047 encoded-word token */
+ g_string_append_len (out, lwsp, nlwsp + n);
+ encoded = FALSE;
+ }
+ } else {
+ /* append lwsp and atom token */
+ g_string_append_len (out, lwsp, nlwsp + n);
+ encoded = FALSE;
+ }
+ } else {
+ g_string_append_len (out, lwsp, nlwsp);
+
+ ascii = TRUE;
+ while (*inptr && !is_lwsp (*inptr)) {
+ ascii = ascii && is_ascii (*inptr);
+ inptr++;
+ }
+
+ n = (size_t) (inptr - text);
+
+ if (!ascii) {
+ /* *sigh* I hate broken mailers... */
+ decoded = g_mime_utils_decode_8bit (text, n);
+ g_string_append (out, decoded);
+ g_free (decoded);
+ } else {
+ g_string_append_len (out, text, n);
+ }
+
+ encoded = FALSE;
+ }
+ }
+
+ decoded = out->str;
+ g_string_free (out, FALSE);
+
+ return decoded;
+}
+
+
+/* rfc2047 version of quoted-printable */
+static size_t
+quoted_encode (const char *in, size_t len, unsigned char *out, gushort safemask)
+{
+ register const unsigned char *inptr = (const unsigned char *) in;
+ const unsigned char *inend = inptr + len;
+ register unsigned char *outptr = out;
+ unsigned char c;
+
+ while (inptr < inend) {
+ c = *inptr++;
+ if (c == ' ') {
+ *outptr++ = '_';
+ } else if (c != '_' && gmime_special_table[c] & safemask) {
+ *outptr++ = c;
+ } else {
+ *outptr++ = '=';
+ *outptr++ = tohex[(c >> 4) & 0xf];
+ *outptr++ = tohex[c & 0xf];
+ }
+ }
+
+ return (outptr - out);
+}
+
+static void
+rfc2047_encode_word (GString *string, const char *word, size_t len,
+ const char *charset, gushort safemask)
+{
+ register char *inptr, *outptr;
+ iconv_t cd = (iconv_t) -1;
+ unsigned char *encoded;
+ size_t enclen, pos;
+ char *uword = NULL;
+ guint32 save = 0;
+ int state = 0;
+ char encoding;
+
+ if (g_ascii_strcasecmp (charset, "UTF-8") != 0)
+ cd = g_mime_iconv_open (charset, "UTF-8");
+
+ if (cd != (iconv_t) -1) {
+ uword = g_mime_iconv_strndup (cd, (char *) word, len);
+ g_mime_iconv_close (cd);
+ }
+
+ if (uword) {
+ len = strlen (uword);
+ word = uword;
+ } else {
+ charset = "UTF-8";
+ }
+
+ switch (g_mime_utils_best_encoding ((const unsigned char *) word, len)) {
+ case GMIME_CONTENT_ENCODING_BASE64:
+ enclen = GMIME_BASE64_ENCODE_LEN (len);
+ encoded = g_alloca (enclen + 1);
+
+ encoding = 'b';
+
+ pos = g_mime_encoding_base64_encode_close ((const unsigned char *) word, len, encoded, &state, &save);
+ encoded[pos] = '\0';
+
+ /* remove \n chars as headers need to be wrapped differently */
+ if (G_UNLIKELY ((inptr = strchr ((char *) encoded, '\n')))) {
+ outptr = inptr++;
+ while (G_LIKELY (*inptr)) {
+ if (G_LIKELY (*inptr != '\n'))
+ *outptr++ = *inptr;
+
+ inptr++;
+ }
+
+ *outptr = '\0';
+ }
+
+ break;
+ case GMIME_CONTENT_ENCODING_QUOTEDPRINTABLE:
+ enclen = GMIME_QP_ENCODE_LEN (len);
+ encoded = g_alloca (enclen + 1);
+
+ encoding = 'q';
+
+ pos = quoted_encode (word, len, encoded, safemask);
+ encoded[pos] = '\0';
+
+ break;
+ default:
+ encoded = NULL;
+ encoding = '\0';
+ g_assert_not_reached ();
+ }
+
+ g_free (uword);
+
+ g_string_append_printf (string, "=?%s?%c?%s?=", charset, encoding, encoded);
+}
+
+
+typedef enum {
+ WORD_ATOM,
+ WORD_QSTRING,
+ WORD_2047
+} rfc822_word_t;
+
+typedef struct _rfc822_word {
+ struct _rfc822_word *next;
+ const char *start, *end;
+ rfc822_word_t type;
+ int encoding;
+} rfc822_word;
+
+#define rfc822_word_free(word) g_slice_free (rfc822_word, word)
+#define rfc822_word_new() g_slice_new (rfc822_word)
+
+/* okay, so 'unstructured text' fields don't actually contain 'word'
+ * tokens, but we can group stuff similarly... */
+static rfc822_word *
+rfc2047_encode_get_rfc822_words (const char *in, gboolean phrase)
+{
+ rfc822_word *words, *tail, *word;
+ rfc822_word_t type = WORD_ATOM;
+ const char *inptr, *start, *last;
+ int count = 0, encoding = 0;
+
+ words = NULL;
+ tail = (rfc822_word *) &words;
+
+ last = start = inptr = in;
+ while (inptr && *inptr) {
+ const char *newinptr;
+ gunichar c;
+
+ newinptr = g_utf8_next_char (inptr);
+ c = g_utf8_get_char (inptr);
+ if (newinptr == NULL || !g_unichar_validate (c)) {
+ w(g_warning ("Invalid UTF-8 sequence encountered"));
+ inptr++;
+ continue;
+ }
+
+ inptr = newinptr;
+
+ if (c < 256 && is_lwsp (c)) {
+ if (count > 0) {
+ word = rfc822_word_new ();
+ word->next = NULL;
+ word->start = start;
+ word->end = last;
+ word->type = type;
+ word->encoding = encoding;
+
+ tail->next = word;
+ tail = word;
+ count = 0;
+ }
+
+ start = inptr;
+ type = WORD_ATOM;
+ encoding = 0;
+ } else {
+ count++;
+ if (phrase && c < 128) {
+ /* phrases can have qstring words */
+ if (!is_atom (c))
+ type = MAX (type, WORD_QSTRING);
+ } else if (c > 127 && c < 256) {
+ type = WORD_2047;
+ encoding = MAX (encoding, 1);
+ } else if (c >= 256) {
+ type = WORD_2047;
+ encoding = 2;
+ }
+
+ if (count >= GMIME_FOLD_PREENCODED) {
+ word = rfc822_word_new ();
+ word->next = NULL;
+ word->start = start;
+ word->end = inptr;
+ word->type = type;
+ word->encoding = encoding;
+
+ tail->next = word;
+ tail = word;
+ count = 0;
+
+ /* Note: don't reset 'type' as it
+ * needs to be preserved when breaking
+ * long words */
+ start = inptr;
+ encoding = 0;
+ }
+ }
+
+ last = inptr;
+ }
+
+ if (count > 0) {
+ word = rfc822_word_new ();
+ word->next = NULL;
+ word->start = start;
+ word->end = last;
+ word->type = type;
+ word->encoding = encoding;
+
+ tail->next = word;
+ tail = word;
+ }
+
+#if d(!)0
+ printf ("rfc822 word tokens:\n");
+ word = words;
+ while (word) {
+ printf ("\t'%.*s'; type=%d, encoding=%d\n",
+ word->end - word->start, word->start,
+ word->type, word->encoding);
+
+ word = word->next;
+ }
+#endif
+
+ return words;
+}
+
+#define MERGED_WORD_LT_FOLDLEN(wlen, type) ((type) == WORD_2047 ? (wlen) < GMIME_FOLD_PREENCODED : (wlen) < (GMIME_FOLD_LEN - 8))
+
+static gboolean
+should_merge_words (rfc822_word *word, rfc822_word *next)
+{
+ switch (word->type) {
+ case WORD_ATOM:
+ if (next->type == WORD_2047)
+ return FALSE;
+
+ return (MERGED_WORD_LT_FOLDLEN (next->end - word->start, next->type));
+ case WORD_QSTRING:
+ /* avoid merging with words that need to be rfc2047 encoded */
+ if (next->type == WORD_2047)
+ return FALSE;
+
+ return (MERGED_WORD_LT_FOLDLEN (next->end - word->start, WORD_QSTRING));
+ case WORD_2047:
+ if (next->type == WORD_ATOM) {
+ /* whether we merge or not is dependent upon:
+ * 1. the number of atoms in a row after 'word'
+ * 2. if there is another encword after the string of atoms.
+ */
+ int natoms = 0;
+
+ while (next && next->type == WORD_ATOM) {
+ next = next->next;
+ natoms++;
+ }
+
+ /* if all the words after the encword are atoms, don't merge */
+ if (!next || natoms > 3)
+ return FALSE;
+ }
+
+ /* avoid merging with qstrings */
+ if (next->type == WORD_QSTRING)
+ return FALSE;
+
+ return (MERGED_WORD_LT_FOLDLEN (next->end - word->start, WORD_2047));
+ default:
+ return FALSE;
+ }
+}
+
+static void
+rfc2047_encode_merge_rfc822_words (rfc822_word **wordsp)
+{
+ rfc822_word *word, *next, *words = *wordsp;
+
+ /* first pass: merge qstrings with adjacent qstrings and encwords with adjacent encwords */
+ word = words;
+ while (word && word->next) {
+ next = word->next;
+
+ if (word->type != WORD_ATOM && word->type == next->type &&
+ MERGED_WORD_LT_FOLDLEN (next->end - word->start, word->type)) {
+ /* merge the words */
+ word->encoding = MAX (word->encoding, next->encoding);
+
+ word->end = next->end;
+ word->next = next->next;
+
+ rfc822_word_free (next);
+
+ next = word;
+ }
+
+ word = next;
+ }
+
+ /* second pass: now merge atoms with the other words */
+ word = words;
+ while (word && word->next) {
+ next = word->next;
+
+ if (should_merge_words (word, next)) {
+ /* the resulting word type is the MAX of the 2 types */
+ word->type = MAX (word->type, next->type);
+
+ word->encoding = MAX (word->encoding, next->encoding);
+
+ word->end = next->end;
+ word->next = next->next;
+
+ rfc822_word_free (next);
+
+ continue;
+ }
+
+ word = next;
+ }
+
+ *wordsp = words;
+}
+
+static void
+g_string_append_len_quoted (GString *out, const char *in, size_t len)
+{
+ register const char *inptr;
+ const char *inend;
+
+ g_string_append_c (out, '"');
+
+ inptr = in;
+ inend = in + len;
+
+ while (inptr < inend) {
+ if (*inptr == '"' || *inptr == '\\')
+ g_string_append_c (out, '\\');
+
+ g_string_append_c (out, *inptr);
+
+ inptr++;
+ }
+
+ g_string_append_c (out, '"');
+}
+
+static char *
+rfc2047_encode (const char *in, gushort safemask)
+{
+ rfc822_word *words, *word, *prev = NULL;
+ const char **charsets, *charset;
+ const char *start;
+ GMimeCharset mask;
+ GString *out;
+ char *outstr;
+ size_t len;
+ int i;
+
+ if (!(words = rfc2047_encode_get_rfc822_words (in, safemask & IS_PSAFE)))
+ return g_strdup (in);
+
+ rfc2047_encode_merge_rfc822_words (&words);
+
+ charsets = g_mime_user_charsets ();
+
+ out = g_string_new ("");
+
+ /* output words now with spaces between them */
+ word = words;
+ while (word) {
+ /* append correct number of spaces between words */
+ if (prev && !(prev->type == WORD_2047 && word->type == WORD_2047)) {
+ /* one or both of the words are not encoded so we write the spaces out untouched */
+ len = word->start - prev->end;
+ g_string_append_len (out, prev->end, len);
+ }
+
+ switch (word->type) {
+ case WORD_ATOM:
+ g_string_append_len (out, word->start, (size_t) (word->end - word->start));
+ break;
+ case WORD_QSTRING:
+ g_assert (safemask & IS_PSAFE);
+ g_string_append_len_quoted (out, word->start, (size_t) (word->end - word->start));
+ break;
+ case WORD_2047:
+ if (prev && prev->type == WORD_2047) {
+ /* include the whitespace chars between these 2 words in the
+ resulting rfc2047 encoded word. */
+ len = word->end - prev->end;
+ start = prev->end;
+
+ /* encoded words need to be separated by linear whitespace */
+ g_string_append_c (out, ' ');
+ } else {
+ len = word->end - word->start;
+ start = word->start;
+ }
+
+ switch (word->encoding) {
+ case 0: /* us-ascii */
+ rfc2047_encode_word (out, start, len, "us-ascii", safemask);
+ break;
+ case 1: /* iso-8859-1 */
+ rfc2047_encode_word (out, start, len, "iso-8859-1", safemask);
+ break;
+ default:
+ charset = NULL;
+ g_mime_charset_init (&mask);
+ g_mime_charset_step (&mask, start, len);
+
+ for (i = 0; charsets && charsets[i]; i++) {
+ if (g_mime_charset_can_encode (&mask, charsets[i], start, len)) {
+ charset = charsets[i];
+ break;
+ }
+ }
+
+ if (!charset)
+ charset = g_mime_charset_best_name (&mask);
+
+ rfc2047_encode_word (out, start, len, charset, safemask);
+ break;
+ }
+
+ break;
+ }
+
+ rfc822_word_free (prev);
+
+ prev = word;
+ word = word->next;
+ }
+
+ rfc822_word_free (prev);
+
+ outstr = out->str;
+ g_string_free (out, FALSE);
+
+ return outstr;
+}
+
+
+/**
+ * g_mime_utils_header_encode_phrase:
+ * @phrase: phrase to encode
+ *
+ * Encodes a 'phrase' header according to the rules in rfc2047.
+ *
+ * Returns: the encoded 'phrase'. Useful for encoding internet
+ * addresses.
+ **/
+char *
+g_mime_utils_header_encode_phrase (const char *phrase)
+{
+ if (phrase == NULL)
+ return NULL;
+
+ return rfc2047_encode (phrase, IS_PSAFE);
+}
+
+
+/**
+ * g_mime_utils_header_encode_text:
+ * @text: text to encode
+ *
+ * Encodes a 'text' header according to the rules in rfc2047.
+ *
+ * Returns: the encoded header. Useful for encoding
+ * headers like "Subject".
+ **/
+char *
+g_mime_utils_header_encode_text (const char *text)
+{
+ if (text == NULL)
+ return NULL;
+
+ return rfc2047_encode (text, IS_ESAFE);
+}