/* Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* Decoding common Content-Encodings of E-Mail functions. * * These decoding functions do not copy data. */ #include "mod_mbox.h" /* * The char64 macro and `mime_decode_b64' routine are taken from * metamail 2.7, which is copyright (c) 1991 Bell Communications * Research, Inc. (Bellcore). The following license applies to all * code below this point: * * Permission to use, copy, modify, and distribute this material * for any purpose and without fee is hereby granted, provided * that the above copyright notice and this permission notice * appear in all copies, and that the name of Bellcore not be * used in advertising or publicity pertaining to this * material without the specific, prior written permission * of an authorized representative of Bellcore. BELLCORE * MAKES NO REPRESENTATIONS ABOUT THE ACCURACY OR SUITABILITY * OF THIS MATERIAL FOR ANY PURPOSE. IT IS PROVIDED "AS IS", * WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES. */ static char index_64[128] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1 }; #define char64(c) (((c) < 0 || (c) > 127) ? -1 : index_64[(c)]) const char *mbox_cte_to_char(mbox_cte_e cte) { switch (cte) { case CTE_NONE: return "None"; case CTE_7BIT: return "7-Bit"; case CTE_8BIT: return "8-Bit"; case CTE_UUENCODE: return "uuencode"; case CTE_BINARY: return "Binary"; case CTE_QP: return "Quoted Printable"; case CTE_BASE64: return "Base64"; default: return "Unknown CTE"; } } /* Unlike the original ap_escape_html, this one is also binary * safe. */ apr_size_t mbox_cte_escape_html(apr_pool_t *p, const char *s, apr_size_t len, char **body) { char *x; int i, j; /* First, count the number of extra characters */ for (i = 0, j = 0; i < len; i++) { if ((s[i] == '<') || (s[i] == '>')) { j += 3; } else if (s[i] == '&') { j += 4; } } /* If there is nothing to escape, just copy the body to the new string */ if (j == 0) { j = len; x = apr_pstrmemdup(p, s, len); } /* Otherwise, we have some extra characters to insert : allocate enough space for them, and process the data. */ else { x = apr_palloc(p, i + j); for (i = 0, j = 0; i < len; i++, j++) { if (s[i] == '<') { memcpy(&x[j], "<", 4); j += 3; } else if (s[i] == '>') { memcpy(&x[j], ">", 4); j += 3; } else if (s[i] == '&') { memcpy(&x[j], "&", 5); j += 4; } else { x[j] = s[i]; } } } *body = x; return j; } /* Decode BASE64 encoded data */ apr_size_t mbox_cte_decode_b64(char *src) { apr_size_t len = 0; int newline = 1, data_done = 0; int c1, c2, c3, c4; char *dst; dst = src; while ((c1 = *src++) != '\0') { if (isspace(c1)) { if (c1 == '\n') { newline = 1; } else { newline = 0; } continue; } if (data_done) { continue; } newline = 0; do { c2 = *src++; } while (c2 != '\0' && isspace(c2)); do { c3 = *src++; } while (c3 != '\0' && isspace(c3)); do { c4 = *src++; } while (c4 != '\0' && isspace(c4)); /* Premature EOF. Should return an Error? */ if ((c2 == '\0') || (c3 == '\0') || (c4 == '\0')) { return len; } if (c1 == '=' || c2 == '=') { data_done = 1; continue; } c1 = char64(c1); c2 = char64(c2); *dst++ = (c1 << 2) | ((c2 & 0x30) >> 4); len++; if (c3 == '=') { data_done = 1; } else { c3 = char64(c3); *dst++ = ((c2 & 0XF) << 4) | ((c3 & 0x3C) >> 2); len++; if (c4 == '=') { data_done = 1; } else { c4 = char64(c4); *dst++ = ((c3 & 0x03) << 6) | c4; len++; } } } *dst = '\0'; return len; } static int hex2dec_char(char ch) { if (isdigit(ch)) { return ch - '0'; } else if (isupper(ch)) { return ch - 'A' + 10; } else { return ch - 'a' + 10; } } /* Decode quoted-printable to raw text. */ apr_size_t mbox_cte_decode_qp(char *p) { apr_size_t len = 0; char *src, *dst; dst = src = p; while (*src != '\0') { if (*src == '=') { if (*++src == '\n') { ++src; continue; } else { int hi, lo; hi = hex2dec_char(*src++); lo = hex2dec_char(*src); *dst = (hi * 16) + lo; } } else { *dst = *src; } ++dst, ++src; len++; } return len; } /* This function performs the decoding of strings like : * =?UTF-8?B?QnJhbmtvIMSMaWJlag==?= * * These strings complies to the following syntax : * =?charset?mode?data?= rest */ char *mbox_cte_decode_rfc2047(apr_pool_t *p, char *src) { apr_xlate_t *xlate; char *charset, *mode, *data, *rest; int i; /* Look for charset */ charset = strstr(src, "=?"); if (!charset) { return src; } *charset = 0; charset += strlen("=?"); /* Encoding mode (first '?' after charset) */ mode = strstr(charset, "?"); if (!mode) { return src; } *mode = 0; mode++; /* Fetch data */ data = strstr(mode, "?"); if (!data) { return src; } *data = 0; data++; /* Look for the end bound */ rest = strstr(data, "?="); if (!rest) { return src; } *rest = 0; /* Quoted-Printable decoding : mode 'q' */ if ((*mode == 'q') || (*mode == 'Q')) { apr_size_t data_len; int i; /* In QP header encoding, spaces are encoded either in =20 (as in all QP encoding) or in underscores '_' (for header encoding). The first case will be handle by the QP decoding, so we must handle the other one */ for (i = 0; i < strlen(data); i++) { if (data[i] == '_') { data[i] = ' '; } } data_len = mbox_cte_decode_qp(data); data[data_len] = 0; } else if ((*mode == 'b') || (*mode == 'B')) { apr_size_t data_len; data_len = mbox_cte_decode_b64(data); data[data_len] = 0; } /* Convert charset to uppercase */ for (i = 0; i < strlen(charset); i++) { charset[i] = toupper(charset[i]); } /* Charset conversion */ if (apr_xlate_open(&xlate, "UTF-8", charset, p) == APR_SUCCESS) { apr_size_t inbytes_left, outbytes_left; apr_size_t outbuf_len = strlen(data); char *new_data; /* Allocate some memory for our resulting data, and initialize counters. */ new_data = apr_palloc(p, outbuf_len); inbytes_left = strlen(data); outbytes_left = strlen(data); /* Convert */ // apr_xlate_conv_buffer(xlate, data, &inbytes_left, // new_data, &outbytes_left); // new_data[outbuf_len - outbytes_left] = 0; // data = new_data; apr_xlate_close(xlate); } return data; } /* MIME header decoding (see RFC 2047). */ char *mbox_cte_decode_header(apr_pool_t *p, char *src) { char *start, *end, *part; char *result = ""; do { char c; start = strstr(src, "=?"); if (!start) { result = apr_psprintf(p, "%s%s", result, src); return result; } end = strstr(start, "?="); if (!end) { result = apr_psprintf(p, "%s%s", result, src); return result; } c = *start; *start = 0; result = apr_psprintf(p, "%s%s", result, src); *start = c; part = mbox_cte_decode_rfc2047(p, start); result = apr_psprintf(p, "%s%s", result, part); src = end + 2; } while (src && *src); return result; }