NeoMutt  2025-12-11-769-g906513
Teaching an old dog new tricks
DOXYGEN
Loading...
Searching...
No Matches
rfc2047.c File Reference

RFC2047 MIME extensions encoding / decoding routines. More...

#include "config.h"
#include <errno.h>
#include <iconv.h>
#include <stdbool.h>
#include <string.h>
#include "mutt/lib.h"
#include "address/lib.h"
#include "config/lib.h"
#include "core/lib.h"
#include "rfc2047.h"
#include "envelope.h"
#include "mime.h"
+ Include dependency graph for rfc2047.c:

Go to the source code of this file.

Macros

#define ENCWORD_LEN_MAX   75
 Maximum length of an RFC2047 encoded word (75 chars per RFC2047)
 
#define ENCWORD_LEN_MIN   9 /* strlen ("=?.?.?.?=") */
 Minimum length of an RFC2047 encoded word ("=?.?.?.?=" = 9 chars)
 
#define HSPACE(ch)
 Check if character is horizontal whitespace (space, tab, or null)
 
#define CONTINUATION_BYTE(ch)
 Check if a byte is a UTF-8 continuation byte (10xxxxxx pattern)
 

Typedefs

typedef size_t(* encoder_t) (char *res, const char *buf, size_t buflen, const char *tocode)
 

Functions

static size_t b_encoder (char *res, const char *src, size_t srclen, const char *tocode)
 Base64 Encode a string - Implements encoder_t -.
 
static size_t q_encoder (char *res, const char *src, size_t srclen, const char *tocode)
 Quoted-printable Encode a string - Implements encoder_t -.
 
static char * parse_encoded_word (char *str, enum ContentEncoding *enc, char **charset, size_t *charsetlen, char **text, size_t *textlen)
 Parse a string and report RFC2047 elements.
 
static size_t try_block (const char *d, size_t dlen, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
 Attempt to convert a block of text.
 
static size_t encode_block (char *str, char *buf, size_t buflen, const char *fromcode, const char *tocode, encoder_t encoder)
 Encode a block of text using an encoder.
 
static size_t choose_block (char *d, size_t dlen, int col, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
 Calculate how much data can be converted.
 
static void finalize_chunk (struct Buffer *res, struct Buffer *buf, char *charset, size_t charsetlen)
 Perform charset conversion and filtering.
 
static char * decode_word (const char *s, size_t len, enum ContentEncoding enc)
 Decode an RFC2047-encoded string.
 
static int encode (const char *d, size_t dlen, int col, const char *fromcode, const struct Slist *charsets, char **e, size_t *elen, const char *specials)
 RFC2047-encode a string.
 
void rfc2047_encode (char **pd, const char *specials, int col, const struct Slist *charsets)
 RFC-2047-encode a string.
 
void rfc2047_decode (char **pd)
 Decode any RFC2047-encoded header fields.
 
void rfc2047_encode_addrlist (struct AddressList *al, const char *tag)
 Encode any RFC2047 headers, where required, in an Address list.
 
void rfc2047_decode_addrlist (struct AddressList *al)
 Decode any RFC2047 headers in an Address list.
 
void rfc2047_decode_envelope (struct Envelope *env)
 Decode the fields of an Envelope.
 
void rfc2047_encode_envelope (struct Envelope *env)
 Encode the fields of an Envelope.
 

Variables

static const char RFC2047Specials [] = "@.,;:<>[]\\\"()?/= \t"
 

Detailed Description

RFC2047 MIME extensions encoding / decoding routines.

Authors
  • Federico Kircheis
  • Pietro Cerutti
  • Richard Russon
  • Anna Figueiredo Gomes
  • наб

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/.

Definition in file rfc2047.c.

Macro Definition Documentation

◆ ENCWORD_LEN_MAX

#define ENCWORD_LEN_MAX   75

Maximum length of an RFC2047 encoded word (75 chars per RFC2047)

Definition at line 47 of file rfc2047.c.

◆ ENCWORD_LEN_MIN

#define ENCWORD_LEN_MIN   9 /* strlen ("=?.?.?.?=") */

Minimum length of an RFC2047 encoded word ("=?.?.?.?=" = 9 chars)

Definition at line 49 of file rfc2047.c.

◆ HSPACE

#define HSPACE ( ch)
Value:
(((ch) == '\0') || ((ch) == ' ') || ((ch) == '\t'))

Check if character is horizontal whitespace (space, tab, or null)

Definition at line 52 of file rfc2047.c.

◆ CONTINUATION_BYTE

#define CONTINUATION_BYTE ( ch)
Value:
(((ch) & 0xc0) == 0x80)

Check if a byte is a UTF-8 continuation byte (10xxxxxx pattern)

Definition at line 55 of file rfc2047.c.

Typedef Documentation

◆ encoder_t

typedef size_t(* encoder_t) (char *res, const char *buf, size_t buflen, const char *tocode)

Definition at line 68 of file rfc2047.c.

Function Documentation

◆ parse_encoded_word()

static char * parse_encoded_word ( char * str,
enum ContentEncoding * enc,
char ** charset,
size_t * charsetlen,
char ** text,
size_t * textlen )
static

Parse a string and report RFC2047 elements.

Parameters
[in]strString to parse
[out]encContent encoding found in the first RFC2047 word
[out]charsetCharset found in the first RFC2047 word
[out]charsetlenLength of the charset string found
[out]textStart of the first RFC2047 encoded text
[out]textlenLength of the encoded text found
Return values
ptrStart of the RFC2047 encoded word
NULLNone was found

Definition at line 155 of file rfc2047.c.

157{
158 regmatch_t *match = mutt_prex_capture(PREX_RFC2047_ENCODED_WORD, str);
159 if (!match)
160 return NULL;
161
162 const regmatch_t *mfull = &match[PREX_RFC2047_ENCODED_WORD_MATCH_FULL];
163 const regmatch_t *mcharset = &match[PREX_RFC2047_ENCODED_WORD_MATCH_CHARSET];
164 const regmatch_t *mencoding = &match[PREX_RFC2047_ENCODED_WORD_MATCH_ENCODING];
165 const regmatch_t *mtext = &match[PREX_RFC2047_ENCODED_WORD_MATCH_TEXT];
166
167 /* Charset */
168 *charset = str + mutt_regmatch_start(mcharset);
169 *charsetlen = mutt_regmatch_len(mcharset);
170
171 /* Encoding: either Q or B */
172 *enc = (mutt_tolower(str[mutt_regmatch_start(mencoding)]) == 'q') ? ENC_QUOTED_PRINTABLE :
174
175 *text = str + mutt_regmatch_start(mtext);
176 *textlen = mutt_regmatch_len(mtext);
177 return str + mutt_regmatch_start(mfull);
178}
int mutt_tolower(int arg)
Wrapper for tolower(3)
Definition ctype.c:126
@ ENC_BASE64
Base-64 encoded text.
Definition mime.h:52
@ ENC_QUOTED_PRINTABLE
Quoted-printable text.
Definition mime.h:51
regmatch_t * mutt_prex_capture(enum Prex which, const char *str)
Match a precompiled regex against a string.
Definition prex.c:301
@ PREX_RFC2047_ENCODED_WORD_MATCH_ENCODING
=?utf-8?[Q]?=E8=81...?=
Definition prex.h:98
@ PREX_RFC2047_ENCODED_WORD_MATCH_TEXT
=?utf-8?Q?[=E8=81...]?=
Definition prex.h:99
@ PREX_RFC2047_ENCODED_WORD_MATCH_CHARSET
=?[utf-8]?Q?=E8=81...?=
Definition prex.h:97
@ PREX_RFC2047_ENCODED_WORD_MATCH_FULL
[=?utf-8?Q?=E8=81...?=]
Definition prex.h:96
@ PREX_RFC2047_ENCODED_WORD
[=?utf-8?Q?=E8=81=AA=E6=98=8E=E7=9A=84?=]
Definition prex.h:36
static size_t mutt_regmatch_len(const regmatch_t *match)
Return the length of a match.
Definition regex3.h:76
static regoff_t mutt_regmatch_start(const regmatch_t *match)
Return the start of a match.
Definition regex3.h:56
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ try_block()

static size_t try_block ( const char * d,
size_t dlen,
const char * fromcode,
const char * tocode,
encoder_t * encoder,
size_t * wlen )
static

Attempt to convert a block of text.

Parameters
dString to convert
dlenLength of string
fromcodeOriginal encoding
tocodeNew encoding
encoderEncoding function
wlenNumber of characters converted
Return values
0Success, string converted
>0Error, number of bytes that could be converted

If the data could be converted using encoder, then set *encoder and *wlen. Otherwise return an upper bound on the maximum length of the data which could be converted.

The data is converted from fromcode (which must be stateless) to tocode, unless fromcode is NULL, in which case the data is assumed to be already in tocode, which should be 8-bit and stateless.

Definition at line 199 of file rfc2047.c.

201{
202 char buf[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
203 const char *ib = NULL;
204 char *ob = NULL;
205 size_t ibl, obl;
206 int count, len, len_b, len_q;
207
208 if (fromcode)
209 {
210 iconv_t cd = mutt_ch_iconv_open(tocode, fromcode, MUTT_ICONV_NO_FLAGS);
212 ib = d;
213 ibl = dlen;
214 ob = buf;
215 obl = sizeof(buf) - strlen(tocode);
216 if ((iconv(cd, (ICONV_CONST char **) &ib, &ibl, &ob, &obl) == ICONV_ILLEGAL_SEQ) ||
217 (iconv(cd, NULL, NULL, &ob, &obl) == ICONV_ILLEGAL_SEQ))
218 {
219 ASSERT(errno == E2BIG);
220 ASSERT(ib > d);
221 return ((ib - d) == dlen) ? dlen : ib - d + 1;
222 }
223 }
224 else
225 {
226 if (dlen > (sizeof(buf) - strlen(tocode)))
227 return sizeof(buf) - strlen(tocode) + 1;
228 memcpy(buf, d, dlen);
229 ob = buf + dlen;
230 }
231
232 count = 0;
233 for (char *p = buf; p < ob; p++)
234 {
235 unsigned char c = *p;
236 ASSERT(strchr(RFC2047Specials, '?'));
237 if ((c >= 0x7f) || (c < 0x20) || (*p == '_') ||
238 ((c != ' ') && strchr(RFC2047Specials, *p)))
239 {
240 count++;
241 }
242 }
243
244 len = ENCWORD_LEN_MIN - 2 + strlen(tocode);
245 len_b = len + (((ob - buf) + 2) / 3) * 4;
246 len_q = len + (ob - buf) + 2 * count;
247
248 /* Apparently RFC1468 says to use B encoding for iso-2022-jp. */
249 if (mutt_istr_equal(tocode, "ISO-2022-JP"))
250 len_q = ENCWORD_LEN_MAX + 1;
251
252 if ((len_b < len_q) && (len_b <= ENCWORD_LEN_MAX))
253 {
254 *encoder = b_encoder;
255 *wlen = len_b;
256 return 0;
257 }
258 else if (len_q <= ENCWORD_LEN_MAX)
259 {
260 *encoder = q_encoder;
261 *wlen = len_q;
262 return 0;
263 }
264 else
265 {
266 return dlen;
267 }
268}
static size_t b_encoder(char *res, const char *src, size_t srclen, const char *tocode)
Base64 Encode a string - Implements encoder_t -.
Definition rfc2047.c:77
static size_t q_encoder(char *res, const char *src, size_t srclen, const char *tocode)
Quoted-printable Encode a string - Implements encoder_t -.
Definition rfc2047.c:110
iconv_t mutt_ch_iconv_open(const char *tocode, const char *fromcode, uint8_t flags)
Set up iconv for conversions.
Definition charset.c:580
#define MUTT_ICONV_NO_FLAGS
No flags are set.
Definition charset.h:66
#define ICONV_ILLEGAL_SEQ
Error value for iconv() - Illegal sequence.
Definition charset.h:114
static bool iconv_t_valid(const iconv_t cd)
Is the conversion descriptor valid?
Definition charset.h:123
bool mutt_istr_equal(const char *a, const char *b)
Compare two strings, ignoring case.
Definition string.c:677
#define ENCWORD_LEN_MIN
Minimum length of an RFC2047 encoded word ("=?.?.?.?=" = 9 chars)
Definition rfc2047.c:49
static const char RFC2047Specials[]
Definition rfc2047.c:72
#define ENCWORD_LEN_MAX
Maximum length of an RFC2047 encoded word (75 chars per RFC2047)
Definition rfc2047.c:47
#define ASSERT(COND)
Definition signal2.h:59
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ encode_block()

static size_t encode_block ( char * str,
char * buf,
size_t buflen,
const char * fromcode,
const char * tocode,
encoder_t encoder )
static

Encode a block of text using an encoder.

Parameters
strString to convert
bufBuffer for result
buflenBuffer length
fromcodeOriginal encoding
tocodeNew encoding
encoderEncoding function
Return values
numLength of the encoded word

Encode the data (buf, buflen) into str using the encoder.

Definition at line 282 of file rfc2047.c.

284{
285 if (!fromcode)
286 {
287 return (*encoder)(str, buf, buflen, tocode);
288 }
289
290 const iconv_t cd = mutt_ch_iconv_open(tocode, fromcode, MUTT_ICONV_NO_FLAGS);
292 const char *ib = buf;
293 size_t ibl = buflen;
294 char tmp[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
295 char *ob = tmp;
296 size_t obl = sizeof(tmp) - strlen(tocode);
297 const size_t n1 = iconv(cd, (ICONV_CONST char **) &ib, &ibl, &ob, &obl);
298 const size_t n2 = iconv(cd, NULL, NULL, &ob, &obl);
299 ASSERT((n1 != ICONV_ILLEGAL_SEQ) && (n2 != ICONV_ILLEGAL_SEQ));
300 return (*encoder)(str, tmp, ob - tmp, tocode);
301}
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ choose_block()

static size_t choose_block ( char * d,
size_t dlen,
int col,
const char * fromcode,
const char * tocode,
encoder_t * encoder,
size_t * wlen )
static

Calculate how much data can be converted.

Parameters
dString to convert
dlenLength of string
colStarting column to convert
fromcodeOriginal encoding
tocodeNew encoding
encoderEncoding function
wlenNumber of characters converted
Return values
numBytes that can be converted

Discover how much of the data (d, dlen) can be converted into a single encoded word. Return how much data can be converted, and set the length *wlen of the encoded word and *encoder. We start in column col, which limits the length of the word.

Definition at line 319 of file rfc2047.c.

321{
322 const bool utf8 = fromcode && mutt_istr_equal(fromcode, "utf-8");
323
324 size_t n = dlen;
325 while (true)
326 {
327 ASSERT(n > 0);
328 const size_t nn = try_block(d, n, fromcode, tocode, encoder, wlen);
329 if ((nn == 0) && (((col + *wlen) <= (ENCWORD_LEN_MAX + 1)) || (n <= 1)))
330 break;
331 n = ((nn != 0) ? nn : n) - 1;
332 ASSERT(n > 0);
333 if (utf8)
334 while ((n > 1) && CONTINUATION_BYTE(d[n]))
335 n--;
336 }
337 return n;
338}
#define CONTINUATION_BYTE(ch)
Check if a byte is a UTF-8 continuation byte (10xxxxxx pattern)
Definition rfc2047.c:55
static size_t try_block(const char *d, size_t dlen, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
Attempt to convert a block of text.
Definition rfc2047.c:199
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ finalize_chunk()

static void finalize_chunk ( struct Buffer * res,
struct Buffer * buf,
char * charset,
size_t charsetlen )
static

Perform charset conversion and filtering.

Parameters
[out]resBuffer where the resulting string is appended
[in]bufBuffer with the input string
[in]charsetCharset to use for the conversion
[in]charsetlenLength of the charset parameter

The buffer buf is reinitialized at the end of this function.

Definition at line 349 of file rfc2047.c.

350{
351 if (!charset)
352 return;
353 char end = charset[charsetlen];
354 charset[charsetlen] = '\0';
356 charset[charsetlen] = end;
358 buf_addstr(res, buf->data);
359 FREE(&buf->data);
360 buf_init(buf);
361}
struct Buffer * buf_init(struct Buffer *buf)
Initialise a new Buffer.
Definition buffer.c:61
size_t buf_addstr(struct Buffer *buf, const char *s)
Add a string to a Buffer.
Definition buffer.c:226
const char * cc_charset(void)
Get the cached value of $charset.
int mutt_mb_filter_unprintable(char **s)
Replace unprintable characters.
Definition mbyte.c:424
#define FREE(x)
Free memory and set the pointer to NULL.
Definition memory.h:68
int mutt_ch_convert_string(char **ps, const char *from, const char *to, uint8_t flags)
Convert a string between encodings.
Definition charset.c:817
#define MUTT_ICONV_HOOK_FROM
apply charset-hooks to fromcode
Definition charset.h:67
char * data
Pointer to data.
Definition buffer.h:37
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ decode_word()

static char * decode_word ( const char * s,
size_t len,
enum ContentEncoding enc )
static

Decode an RFC2047-encoded string.

Parameters
sString to decode
lenLength of the string
encEncoding type
Return values
ptrDecoded string
Note
The input string must be NUL-terminated; the len parameter is an optimization. The caller must free the returned string.

Definition at line 373 of file rfc2047.c.

374{
375 const char *it = s;
376 const char *end = s + len;
377
378 ASSERT(*end == '\0');
379
380 if (enc == ENC_QUOTED_PRINTABLE)
381 {
382 struct Buffer *buf = buf_pool_get();
383 for (; it < end; it++)
384 {
385 if (*it == '_')
386 {
387 buf_addch(buf, ' ');
388 }
389 else if ((it[0] == '=') && (!(it[1] & ~127) && (hexval(it[1]) != -1)) &&
390 (!(it[2] & ~127) && (hexval(it[2]) != -1)))
391 {
392 buf_addch(buf, (hexval(it[1]) << 4) | hexval(it[2]));
393 it += 2;
394 }
395 else
396 {
397 buf_addch(buf, *it);
398 }
399 }
400 char *str = buf_strdup(buf);
401 buf_pool_release(&buf);
402 return str;
403 }
404 else if (enc == ENC_BASE64)
405 {
406 const int olen = 3 * len / 4 + 1;
407 char *out = MUTT_MEM_MALLOC(olen, char);
408 int dlen = mutt_b64_decode(it, out, olen);
409 if (dlen == -1)
410 {
411 FREE(&out);
412 return NULL;
413 }
414 out[dlen] = '\0';
415 return out;
416 }
417
418 ASSERT(0); /* The enc parameter has an invalid value */
419 return NULL;
420}
int mutt_b64_decode(const char *in, char *out, size_t olen)
Convert NUL-terminated base64 string to raw bytes.
Definition base64.c:180
size_t buf_addch(struct Buffer *buf, char c)
Add a single character to a Buffer.
Definition buffer.c:241
char * buf_strdup(const struct Buffer *buf)
Copy a Buffer's string.
Definition buffer.c:571
#define MUTT_MEM_MALLOC(n, type)
Definition memory.h:53
#define hexval(ch)
Convert hexadecimal character to its integer value.
Definition mime.h:82
struct Buffer * buf_pool_get(void)
Get a Buffer from the pool.
Definition pool.c:91
void buf_pool_release(struct Buffer **ptr)
Return a Buffer to the pool.
Definition pool.c:111
String manipulation buffer.
Definition buffer.h:36
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ encode()

static int encode ( const char * d,
size_t dlen,
int col,
const char * fromcode,
const struct Slist * charsets,
char ** e,
size_t * elen,
const char * specials )
static

RFC2047-encode a string.

Parameters
[in]dString to convert
[in]dlenLength of string
[in]colStarting column to convert
[in]fromcodeOriginal encoding
[in]charsetsList of allowable encodings (colon separated)
[out]eEncoded string
[out]elenLength of encoded string
[in]specialsSpecial characters to be encoded
Return values
0Success

Definition at line 434 of file rfc2047.c.

436{
437 int rc = 0;
438 char *buf = NULL;
439 size_t bufpos, buflen;
440 char *t0 = NULL, *t1 = NULL, *t = NULL;
441 char *s0 = NULL, *s1 = NULL;
442 size_t ulen, r, wlen = 0;
443 encoder_t encoder = NULL;
444 char *tocode1 = NULL;
445 const char *tocode = NULL;
446 const char *icode = "utf-8";
447
448 /* Try to convert to UTF-8. */
449 char *u = mutt_strn_dup(d, dlen);
450 if (mutt_ch_convert_string(&u, fromcode, icode, MUTT_ICONV_NO_FLAGS) != 0)
451 {
452 rc = 1;
453 icode = 0;
454 }
455 ulen = mutt_str_len(u);
456
457 /* Find earliest and latest things we must encode. */
458 s0 = 0;
459 s1 = 0;
460 t0 = 0;
461 t1 = 0;
462 for (t = u; t < (u + ulen); t++)
463 {
464 if ((*t & 0x80) || ((*t == '=') && (t[1] == '?') && ((t == u) || HSPACE(*(t - 1)))))
465 {
466 if (!t0)
467 t0 = t;
468 t1 = t;
469 }
470 else if (specials && *t && strchr(specials, *t))
471 {
472 if (!s0)
473 s0 = t;
474 s1 = t;
475 }
476 }
477
478 /* If we have something to encode, include RFC822 specials */
479 if (t0 && s0 && (s0 < t0))
480 t0 = s0;
481 if (t1 && s1 && (s1 > t1))
482 t1 = s1;
483
484 if (!t0)
485 {
486 /* No encoding is required. */
487 *e = u;
488 *elen = ulen;
489 return rc;
490 }
491
492 /* Choose target charset. */
493 tocode = fromcode;
494 if (icode)
495 {
496 tocode1 = mutt_ch_choose(icode, charsets, u, ulen, 0, 0);
497 if (tocode1)
498 {
499 tocode = tocode1;
500 }
501 else
502 {
503 rc = 2;
504 icode = 0;
505 }
506 }
507
508 /* Hack to avoid labelling 8-bit data as us-ascii. */
509 if (!icode && mutt_ch_is_us_ascii(tocode))
510 tocode = "unknown-8bit";
511
512 /* Adjust t0 for maximum length of line. */
513 t = u + (ENCWORD_LEN_MAX + 1) - col - ENCWORD_LEN_MIN;
514 if (t < u)
515 t = u;
516 if (t < t0)
517 t0 = t;
518
519 /* Adjust t0 until we can encode a character after a space. */
520 for (; t0 > u; t0--)
521 {
522 if (!HSPACE(*(t0 - 1)))
523 continue;
524 t = t0 + 1;
525 if (icode)
526 while ((t < (u + ulen)) && CONTINUATION_BYTE(*t))
527 t++;
528 if ((try_block(t0, t - t0, icode, tocode, &encoder, &wlen) == 0) &&
529 ((col + (t0 - u) + wlen) <= (ENCWORD_LEN_MAX + 1)))
530 {
531 break;
532 }
533 }
534
535 /* Adjust t1 until we can encode a character before a space. */
536 for (; t1 < (u + ulen); t1++)
537 {
538 if (!HSPACE(*t1))
539 continue;
540 t = t1 - 1;
541 if (icode)
542 while (CONTINUATION_BYTE(*t))
543 t--;
544 if ((try_block(t, t1 - t, icode, tocode, &encoder, &wlen) == 0) &&
545 ((1 + wlen + (u + ulen - t1)) <= (ENCWORD_LEN_MAX + 1)))
546 {
547 break;
548 }
549 }
550
551 /* We shall encode the region [t0,t1). */
552
553 /* Initialise the output buffer with the us-ascii prefix. */
554 buflen = 2 * ulen;
555 buf = MUTT_MEM_MALLOC(buflen, char);
556 bufpos = t0 - u;
557 memcpy(buf, u, t0 - u);
558
559 col += t0 - u;
560
561 t = t0;
562 while (true)
563 {
564 /* Find how much we can encode. */
565 size_t n = choose_block(t, t1 - t, col, icode, tocode, &encoder, &wlen);
566 if (n == (t1 - t))
567 {
568 /* See if we can fit the us-ascii suffix, too. */
569 if ((col + wlen + (u + ulen - t1)) <= (ENCWORD_LEN_MAX + 1))
570 break;
571 n = t1 - t - 1;
572 if (icode)
573 while (CONTINUATION_BYTE(t[n]))
574 n--;
575 if (n == 0)
576 {
577 /* This should only happen in the really stupid case where the
578 * only word that needs encoding is one character long, but
579 * there is too much us-ascii stuff after it to use a single
580 * encoded word. We add the next word to the encoded region
581 * and try again. */
582 ASSERT(t1 < (u + ulen));
583 for (t1++; (t1 < (u + ulen)) && !HSPACE(*t1); t1++)
584 ; // do nothing
585
586 continue;
587 }
588 n = choose_block(t, n, col, icode, tocode, &encoder, &wlen);
589 }
590
591 /* Add to output buffer. */
592 const char *line_break = "\n\t";
593 const int lb_len = 2; /* strlen(line_break) */
594
595 if ((bufpos + wlen + lb_len) > buflen)
596 {
597 buflen = bufpos + wlen + lb_len;
598 MUTT_MEM_REALLOC(&buf, buflen, char);
599 }
600 r = encode_block(buf + bufpos, t, n, icode, tocode, encoder);
601 ASSERT(r == wlen);
602 bufpos += wlen;
603 memcpy(buf + bufpos, line_break, lb_len);
604 bufpos += lb_len;
605
606 col = 1;
607
608 t += n;
609 }
610
611 /* Add last encoded word and us-ascii suffix to buffer. */
612 buflen = bufpos + wlen + (u + ulen - t1);
613 MUTT_MEM_REALLOC(&buf, buflen + 1, char);
614 r = encode_block(buf + bufpos, t, t1 - t, icode, tocode, encoder);
615 ASSERT(r == wlen);
616 bufpos += wlen;
617 memcpy(buf + bufpos, t1, u + ulen - t1);
618
619 FREE(&tocode1);
620 FREE(&u);
621
622 buf[buflen] = '\0';
623
624 *e = buf;
625 *elen = buflen + 1;
626 return rc;
627}
#define MUTT_MEM_REALLOC(pptr, n, type)
Definition memory.h:55
char * mutt_ch_choose(const char *fromcode, const struct Slist *charsets, const char *u, size_t ulen, char **d, size_t *dlen)
Figure the best charset to encode a string.
Definition charset.c:1094
#define mutt_ch_is_us_ascii(str)
Definition charset.h:108
char * mutt_strn_dup(const char *begin, size_t len)
Duplicate a sub-string.
Definition string.c:384
size_t mutt_str_len(const char *a)
Calculate the length of a string, safely.
Definition string.c:503
static size_t choose_block(char *d, size_t dlen, int col, const char *fromcode, const char *tocode, encoder_t *encoder, size_t *wlen)
Calculate how much data can be converted.
Definition rfc2047.c:319
size_t(* encoder_t)(char *res, const char *buf, size_t buflen, const char *tocode)
Definition rfc2047.c:68
static size_t encode_block(char *str, char *buf, size_t buflen, const char *fromcode, const char *tocode, encoder_t encoder)
Encode a block of text using an encoder.
Definition rfc2047.c:282
#define HSPACE(ch)
Check if character is horizontal whitespace (space, tab, or null)
Definition rfc2047.c:52
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_encode()

void rfc2047_encode ( char ** pd,
const char * specials,
int col,
const struct Slist * charsets )

RFC-2047-encode a string.

Parameters
[in,out]pdString to be encoded, and resulting encoded string
[in]specialsSpecial characters to be encoded
[in]colStarting index in string
[in]charsetsList of charsets to choose from

Definition at line 636 of file rfc2047.c.

637{
638 if (!pd || !*pd)
639 return;
640
641 const char *const c_charset = cc_charset();
642 if (!c_charset)
643 return;
644
645 struct Slist *fallback = NULL;
646 if (!charsets)
647 {
648 fallback = slist_parse("utf-8", D_SLIST_SEP_COLON);
649 charsets = fallback;
650 }
651
652 char *e = NULL;
653 size_t elen = 0;
654 encode(*pd, strlen(*pd), col, c_charset, charsets, &e, &elen, specials);
655
656 slist_free(&fallback);
657 FREE(pd);
658 *pd = e;
659}
struct Slist * slist_parse(const char *str, uint32_t flags)
Parse a list of strings into a list.
Definition slist.c:177
void slist_free(struct Slist **ptr)
Free an Slist object.
Definition slist.c:124
static int encode(const char *d, size_t dlen, int col, const char *fromcode, const struct Slist *charsets, char **e, size_t *elen, const char *specials)
RFC2047-encode a string.
Definition rfc2047.c:434
String list.
Definition slist.h:37
#define D_SLIST_SEP_COLON
Slist items are colon-separated.
Definition types.h:112
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_decode()

void rfc2047_decode ( char ** pd)

Decode any RFC2047-encoded header fields.

Parameters
[in,out]pdString to be decoded, and resulting decoded string

Try to decode anything that looks like a valid RFC2047 encoded header field, ignoring RFC822 parsing rules. If decoding fails, for example due to an invalid base64 string, the original input is left untouched.

Definition at line 669 of file rfc2047.c.

670{
671 if (!pd || !*pd)
672 return;
673
674 struct Buffer *buf = buf_pool_get(); // Output buffer
675 char *s = *pd; // Read pointer
676 char *beg = NULL; // Begin of encoded word
677 enum ContentEncoding enc = ENC_OTHER; // ENC_BASE64 or ENC_QUOTED_PRINTABLE
678 char *charset = NULL; // Which charset
679 size_t charsetlen; // Length of the charset
680 char *text = NULL; // Encoded text
681 size_t textlen = 0; // Length of encoded text
682
683 /* Keep some state in case the next decoded word is using the same charset
684 * and it happens to be split in the middle of a multibyte character.
685 * See https://github.com/neomutt/neomutt/issues/1015 */
686 struct Buffer *prev = buf_pool_get(); /* Previously decoded word */
687 char *prev_charset = NULL; /* Previously used charset */
688 size_t prev_charsetlen = 0; /* Length of the previously used charset */
689
690 const struct Slist *c_assumed_charset = cc_assumed_charset();
691 const char *c_charset = cc_charset();
692 while (*s)
693 {
694 beg = parse_encoded_word(s, &enc, &charset, &charsetlen, &text, &textlen);
695 if (beg != s)
696 {
697 /* Some non-encoded text was found */
698 size_t holelen = beg ? beg - s : mutt_str_len(s);
699
700 /* Ignore whitespace between encoded words */
701 if (beg && (mutt_str_lws_len(s, holelen) == holelen))
702 {
703 s = beg;
704 continue;
705 }
706
707 /* If we have some previously decoded text, add it now */
708 if (!buf_is_empty(prev))
709 {
710 finalize_chunk(buf, prev, prev_charset, prev_charsetlen);
711 }
712
713 /* Add non-encoded part */
714 if (slist_is_empty(c_assumed_charset))
715 {
716 buf_addstr_n(buf, s, holelen);
717 }
718 else
719 {
720 char *conv = mutt_strn_dup(s, holelen);
721 mutt_ch_convert_nonmime_string(c_assumed_charset, c_charset, &conv);
722 buf_addstr(buf, conv);
723 FREE(&conv);
724 }
725 s += holelen;
726 }
727 if (beg)
728 {
729 /* Some encoded text was found */
730 text[textlen] = '\0';
731 char *decoded = decode_word(text, textlen, enc);
732 if (!decoded)
733 {
734 goto done;
735 }
736 if (!buf_is_empty(prev) && ((prev_charsetlen != charsetlen) ||
737 !mutt_strn_equal(prev_charset, charset, charsetlen)))
738 {
739 /* Different charset, convert the previous chunk and add it to the
740 * final result */
741 finalize_chunk(buf, prev, prev_charset, prev_charsetlen);
742 }
743
744 buf_addstr(prev, decoded);
745 FREE(&decoded);
746 prev_charset = charset;
747 prev_charsetlen = charsetlen;
748 s = text + textlen + 2; /* Skip final ?= */
749 }
750 }
751
752 /* Save the last chunk */
753 if (!buf_is_empty(prev))
754 {
755 finalize_chunk(buf, prev, prev_charset, prev_charsetlen);
756 }
757
758 FREE(pd);
759 *pd = buf_strdup(buf);
760
761done:
762 buf_pool_release(&buf);
763 buf_pool_release(&prev);
764}
size_t buf_addstr_n(struct Buffer *buf, const char *s, size_t len)
Add a string to a Buffer, expanding it if necessary.
Definition buffer.c:96
bool buf_is_empty(const struct Buffer *buf)
Is the Buffer empty?
Definition buffer.c:291
const struct Slist * cc_assumed_charset(void)
Get the cached value of $assumed_charset.
ContentEncoding
Content-Transfer-Encoding.
Definition mime.h:47
@ ENC_OTHER
Encoding unknown.
Definition mime.h:48
int mutt_ch_convert_nonmime_string(const struct Slist *const assumed_charset, const char *charset, char **ps)
Try to convert a string using a list of character sets.
Definition charset.c:317
bool slist_is_empty(const struct Slist *list)
Is the slist empty?
Definition slist.c:140
size_t mutt_str_lws_len(const char *s, size_t n)
Measure the linear-white-space at the beginning of a string.
Definition string.c:633
bool mutt_strn_equal(const char *a, const char *b, size_t num)
Check for equality of two strings (to a maximum), safely.
Definition string.c:429
static char * parse_encoded_word(char *str, enum ContentEncoding *enc, char **charset, size_t *charsetlen, char **text, size_t *textlen)
Parse a string and report RFC2047 elements.
Definition rfc2047.c:155
static char * decode_word(const char *s, size_t len, enum ContentEncoding enc)
Decode an RFC2047-encoded string.
Definition rfc2047.c:373
static void finalize_chunk(struct Buffer *res, struct Buffer *buf, char *charset, size_t charsetlen)
Perform charset conversion and filtering.
Definition rfc2047.c:349
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_encode_addrlist()

void rfc2047_encode_addrlist ( struct AddressList * al,
const char * tag )

Encode any RFC2047 headers, where required, in an Address list.

Parameters
alAddressList
tagHeader tag (used for wrapping calculation)
Note
rfc2047_encode() may realloc the data pointer it's given, so work on a copy to avoid breaking the Buffer

Definition at line 774 of file rfc2047.c.

775{
776 if (!al)
777 return;
778
779 int col = tag ? strlen(tag) + 2 : 32;
780 struct Address *a = NULL;
781 char *data = NULL;
782 const struct Slist *const c_send_charset = cs_subset_slist(NeoMutt->sub, "send_charset");
783 TAILQ_FOREACH(a, al, entries)
784 {
785 if (a->personal)
786 {
787 data = buf_strdup(a->personal);
788 rfc2047_encode(&data, AddressSpecials, col, c_send_charset);
789 buf_strcpy(a->personal, data);
790 FREE(&data);
791 }
792 else if (a->group && a->mailbox)
793 {
794 data = buf_strdup(a->mailbox);
795 rfc2047_encode(&data, AddressSpecials, col, c_send_charset);
796 buf_strcpy(a->mailbox, data);
797 FREE(&data);
798 }
799 }
800}
const char AddressSpecials[]
Characters with special meaning for email addresses.
Definition address.c:45
size_t buf_strcpy(struct Buffer *buf, const char *s)
Copy a string into a Buffer.
Definition buffer.c:395
const struct Slist * cs_subset_slist(const struct ConfigSubset *sub, const char *name)
Get a string-list config item by name.
Definition helpers.c:242
#define TAILQ_FOREACH(var, head, field)
Definition queue.h:782
void rfc2047_encode(char **pd, const char *specials, int col, const struct Slist *charsets)
RFC-2047-encode a string.
Definition rfc2047.c:636
An email address.
Definition address.h:35
struct Buffer * personal
Real name of address.
Definition address.h:36
bool group
Group mailbox?
Definition address.h:38
struct Buffer * mailbox
Mailbox and host address.
Definition address.h:37
Container for Accounts, Notifications.
Definition neomutt.h:41
struct ConfigSubset * sub
Inherited config items.
Definition neomutt.h:49
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_decode_addrlist()

void rfc2047_decode_addrlist ( struct AddressList * al)

Decode any RFC2047 headers in an Address list.

Parameters
alAddressList
Note
rfc2047_decode() may realloc the data pointer it's given, so work on a copy to avoid breaking the Buffer

Definition at line 809 of file rfc2047.c.

810{
811 if (!al)
812 return;
813
814 const bool assumed = !slist_is_empty(cc_assumed_charset());
815 struct Address *a = NULL;
816 char *data = NULL;
817 TAILQ_FOREACH(a, al, entries)
818 {
819 if (a->personal && ((buf_find_string(a->personal, "=?")) || assumed))
820 {
821 data = buf_strdup(a->personal);
822 rfc2047_decode(&data);
823 buf_strcpy(a->personal, data);
824 FREE(&data);
825 }
826 else if (a->group && a->mailbox && buf_find_string(a->mailbox, "=?"))
827 {
828 data = buf_strdup(a->mailbox);
829 rfc2047_decode(&data);
830 buf_strcpy(a->mailbox, data);
831 FREE(&data);
832 }
833 }
834}
const char * buf_find_string(const struct Buffer *buf, const char *s)
Return a pointer to a substring found in the buffer.
Definition buffer.c:638
void rfc2047_decode(char **pd)
Decode any RFC2047-encoded header fields.
Definition rfc2047.c:669
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_decode_envelope()

void rfc2047_decode_envelope ( struct Envelope * env)

Decode the fields of an Envelope.

Parameters
envEnvelope

Definition at line 840 of file rfc2047.c.

841{
842 if (!env)
843 return;
852 rfc2047_decode(&env->x_label);
853
854 char *subj = env->subject;
855 *(char **) &env->subject = NULL;
856 rfc2047_decode(&subj);
857 mutt_env_set_subject(env, subj);
858 FREE(&subj);
859}
void mutt_env_set_subject(struct Envelope *env, const char *subj)
Set both subject and real_subj to subj.
Definition envelope.c:68
void rfc2047_decode_addrlist(struct AddressList *al)
Decode any RFC2047 headers in an Address list.
Definition rfc2047.c:809
struct AddressList return_path
Return path for the Email.
Definition envelope.h:58
char *const subject
Email's subject.
Definition envelope.h:70
struct AddressList to
Email's 'To' list.
Definition envelope.h:60
struct AddressList reply_to
Email's 'reply-to'.
Definition envelope.h:64
struct AddressList mail_followup_to
Email's 'mail-followup-to'.
Definition envelope.h:65
struct AddressList cc
Email's 'Cc' list.
Definition envelope.h:61
struct AddressList sender
Email's sender.
Definition envelope.h:63
struct AddressList bcc
Email's 'Bcc' list.
Definition envelope.h:62
char * x_label
X-Label.
Definition envelope.h:76
struct AddressList from
Email's 'From' list.
Definition envelope.h:59
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ rfc2047_encode_envelope()

void rfc2047_encode_envelope ( struct Envelope * env)

Encode the fields of an Envelope.

Parameters
envEnvelope

Definition at line 865 of file rfc2047.c.

866{
867 if (!env)
868 return;
869 rfc2047_encode_addrlist(&env->from, "From");
870 rfc2047_encode_addrlist(&env->to, "To");
871 rfc2047_encode_addrlist(&env->cc, "Cc");
872 rfc2047_encode_addrlist(&env->bcc, "Bcc");
873 rfc2047_encode_addrlist(&env->reply_to, "Reply-To");
874 rfc2047_encode_addrlist(&env->mail_followup_to, "Mail-Followup-To");
875 rfc2047_encode_addrlist(&env->sender, "Sender");
876 const struct Slist *const c_send_charset = cs_subset_slist(NeoMutt->sub, "send_charset");
877 rfc2047_encode(&env->x_label, NULL, sizeof("X-Label:"), c_send_charset);
878
879 char *subj = env->subject;
880 *(char **) &env->subject = NULL;
881 rfc2047_encode(&subj, NULL, sizeof("Subject:"), c_send_charset);
882 mutt_env_set_subject(env, subj);
883 FREE(&subj);
884}
void rfc2047_encode_addrlist(struct AddressList *al, const char *tag)
Encode any RFC2047 headers, where required, in an Address list.
Definition rfc2047.c:774
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

Variable Documentation

◆ RFC2047Specials

const char RFC2047Specials[] = "@.,;:<>[]\\\"()?/= \t"
static

Definition at line 72 of file rfc2047.c.