NeoMutt  2025-12-11-435-g4ac674
Teaching an old dog new tricks
DOXYGEN
Loading...
Searching...
No Matches
charset.c File Reference

Conversion between different character encodings. More...

#include "config.h"
#include <errno.h>
#include <iconv.h>
#include <langinfo.h>
#include <limits.h>
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include "charset.h"
#include "buffer.h"
#include "list.h"
#include "logging2.h"
#include "memory.h"
#include "pool.h"
#include "queue.h"
#include "regex3.h"
#include "slist.h"
#include "string2.h"
#include <libintl.h>
+ Include dependency graph for charset.c:

Go to the source code of this file.

Data Structures

struct  IconvCacheEntry
 Cached iconv conversion descriptor. More...
 
struct  MimeNames
 MIME name lookup entry. More...
 

Macros

#define EILSEQ   EINVAL
 
#define ICONV_CACHE_SIZE   16
 Max size of the iconv cache.
 

Functions

static struct Lookuplookup_new (void)
 Create a new Lookup.
 
static void lookup_free (struct Lookup **ptr)
 Free a Lookup.
 
static const char * lookup_charset (enum LookupType type, const char *cs)
 Look for a preferred character set name.
 
int mutt_ch_convert_nonmime_string (const struct Slist *const assumed_charset, const char *charset, char **ps)
 Try to convert a string using a list of character sets.
 
void mutt_ch_canonical_charset (char *buf, size_t buflen, const char *name)
 Canonicalise the charset of a string.
 
bool mutt_ch_chscmp (const char *cs1, const char *cs2)
 Are the names of two character sets equivalent?
 
const char * mutt_ch_get_default_charset (const struct Slist *const assumed_charset)
 Get the default character set.
 
char * mutt_ch_get_langinfo_charset (void)
 Get the user's choice of character set.
 
bool mutt_ch_lookup_add (enum LookupType type, const char *pat, const char *replace, struct Buffer *err)
 Add a new character set lookup.
 
void mutt_ch_lookup_remove (void)
 Remove all the character set lookups.
 
const char * mutt_ch_charset_lookup (const char *chs)
 Look for a replacement character set.
 
iconv_t mutt_ch_iconv_open (const char *tocode, const char *fromcode, uint8_t flags)
 Set up iconv for conversions.
 
size_t mutt_ch_iconv (iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft, const char **inrepls, const char *outrepl, int *iconverrno)
 Change the encoding of a string.
 
const char * mutt_ch_iconv_lookup (const char *chs)
 Look for a replacement character set.
 
int mutt_ch_check (const char *s, size_t slen, const char *from, const char *to)
 Check whether a string can be converted between encodings.
 
int mutt_ch_convert_string (char **ps, const char *from, const char *to, uint8_t flags)
 Convert a string between encodings.
 
bool mutt_ch_check_charset (const char *cs, bool strict)
 Does iconv understand a character set?
 
struct FgetConvmutt_ch_fgetconv_open (FILE *fp, const char *from, const char *to, uint8_t flags)
 Prepare a file for charset conversion.
 
void mutt_ch_fgetconv_close (struct FgetConv **ptr)
 Close an fgetconv handle.
 
int mutt_ch_fgetconv (struct FgetConv *fc)
 Convert a file's character set.
 
char * mutt_ch_fgetconvs (char *buf, size_t buflen, struct FgetConv *fc)
 Convert a file's charset into a string buffer.
 
void mutt_ch_set_charset (const char *charset)
 Update the records for a new character set.
 
char * mutt_ch_choose (const char *fromcode, const struct Slist *charsets, const char *u, size_t ulen, char **d, size_t *dlen)
 Figure the best charset to encode a string.
 
void mutt_ch_cache_cleanup (void)
 Clean up the cached iconv handles and charset strings.
 

Variables

wchar_t ReplacementChar = '?'
 When a Unicode character can't be displayed, use this instead.
 
bool CharsetIsUtf8 = false
 Is the user's current character set utf-8?
 
struct LookupList Lookups = TAILQ_HEAD_INITIALIZER(Lookups)
 Lookup table of preferred character set names.
 
static struct IconvCacheEntry IconvCache [ICONV_CACHE_SIZE]
 Cache of iconv conversion descriptors.
 
static int IconvCacheUsed = 0
 Number of iconv descriptors in the cache.
 
static const struct MimeNames PreferredMimeNames []
 Lookup table of preferred charsets.
 

Detailed Description

Conversion between different character encodings.

Authors
  • Tobias Angele
  • Richard Russon
  • Pietro Cerutti
  • Steinar H Gunderson

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/.

Definition in file charset.c.

Macro Definition Documentation

◆ EILSEQ

#define EILSEQ   EINVAL

Definition at line 55 of file charset.c.

◆ ICONV_CACHE_SIZE

#define ICONV_CACHE_SIZE   16

Max size of the iconv cache.

Definition at line 82 of file charset.c.

Function Documentation

◆ lookup_new()

static struct Lookup * lookup_new ( void )
static

Create a new Lookup.

Return values
ptrNew Lookup

Definition at line 255 of file charset.c.

256{
257 return MUTT_MEM_CALLOC(1, struct Lookup);
258}
#define MUTT_MEM_CALLOC(n, type)
Definition memory.h:52
Regex to String lookup table.
Definition charset.h:75
+ Here is the caller graph for this function:

◆ lookup_free()

static void lookup_free ( struct Lookup ** ptr)
static

Free a Lookup.

Parameters
ptrLookup to free

Definition at line 264 of file charset.c.

265{
266 if (!ptr || !*ptr)
267 return;
268
269 struct Lookup *l = *ptr;
270 FREE(&l->replacement);
271 FREE(&l->regex.pattern);
272 if (l->regex.regex)
273 regfree(l->regex.regex);
274 FREE(&l->regex.regex);
275 FREE(&l->regex);
276
277 FREE(ptr);
278}
#define FREE(x)
Free memory and set the pointer to NULL.
Definition memory.h:68
char * replacement
Alternative charset to use.
Definition charset.h:78
struct Regex regex
Regular expression.
Definition charset.h:77
char * pattern
printable version
Definition regex3.h:86
regex_t * regex
compiled expression
Definition regex3.h:87
+ Here is the caller graph for this function:

◆ lookup_charset()

static const char * lookup_charset ( enum LookupType type,
const char * cs )
static

Look for a preferred character set name.

Parameters
typeType, e.g. MUTT_LOOKUP_CHARSET
csCharacter set
Return values
ptrCharset string

If the character set matches one of the regexes, then return the replacement name.

Definition at line 289 of file charset.c.

290{
291 if (!cs)
292 return NULL;
293
294 struct Lookup *l = NULL;
295
296 TAILQ_FOREACH(l, &Lookups, entries)
297 {
298 if (l->type != type)
299 continue;
300 if (mutt_regex_match(&l->regex, cs))
301 return l->replacement;
302 }
303 return NULL;
304}
struct LookupList Lookups
Lookup table of preferred character set names.
Definition charset.c:69
bool mutt_regex_match(const struct Regex *regex, const char *str)
Shorthand to mutt_regex_capture()
Definition regex.c:614
#define TAILQ_FOREACH(var, head, field)
Definition queue.h:782
enum LookupType type
Lookup type.
Definition charset.h:76
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_ch_convert_nonmime_string()

int mutt_ch_convert_nonmime_string ( const struct Slist *const assumed_charset,
const char * charset,
char ** ps )

Try to convert a string using a list of character sets.

Parameters
[in]assumed_charsetFrom $assumed_charset
[in]charsetFrom $charset
[in,out]psString to be converted
Return values
0Success
-1Error

Work through $assumed_charset looking for a character set conversion that works. Failing that, try mutt_ch_get_default_charset().

Definition at line 317 of file charset.c.

319{
320 if (!ps)
321 return -1;
322
323 char *u = *ps;
324 const size_t ulen = mutt_str_len(u);
325 if (ulen == 0)
326 return 0;
327
328 const struct ListNode *np = NULL;
329 STAILQ_FOREACH(np, &assumed_charset->head, entries)
330 {
331 char const *c = np->data;
332 size_t n = mutt_str_len(c);
333 char *fromcode = MUTT_MEM_MALLOC(n + 1, char);
334 mutt_str_copy(fromcode, c, n + 1);
335 char *s = mutt_strn_dup(u, ulen);
336 int m = mutt_ch_convert_string(&s, fromcode, charset, MUTT_ICONV_NO_FLAGS);
337 FREE(&fromcode);
338 if (m == 0)
339 {
340 FREE(ps);
341 *ps = s;
342 return 0;
343 }
344 FREE(&s);
345 }
347 charset, MUTT_ICONV_HOOK_FROM);
348 return -1;
349}
#define MUTT_MEM_MALLOC(n, type)
Definition memory.h:53
int mutt_ch_convert_string(char **ps, const char *from, const char *to, uint8_t flags)
Convert a string between encodings.
Definition charset.c:817
const char * mutt_ch_get_default_charset(const struct Slist *const assumed_charset)
Get the default character set.
Definition charset.c:451
#define MUTT_ICONV_HOOK_FROM
apply charset-hooks to fromcode
Definition charset.h:67
#define MUTT_ICONV_NO_FLAGS
No flags are set.
Definition charset.h:66
char * mutt_strn_dup(const char *begin, size_t len)
Duplicate a sub-string.
Definition string.c:384
size_t mutt_str_len(const char *a)
Calculate the length of a string, safely.
Definition string.c:500
size_t mutt_str_copy(char *dest, const char *src, size_t dsize)
Copy a string into a buffer (guaranteeing NUL-termination)
Definition string.c:583
#define STAILQ_FOREACH(var, head, field)
Definition queue.h:390
A List node for strings.
Definition list.h:37
char * data
String.
Definition list.h:38
struct ListHead head
List containing values.
Definition slist.h:38
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_ch_canonical_charset()

void mutt_ch_canonical_charset ( char * buf,
size_t buflen,
const char * name )

Canonicalise the charset of a string.

Parameters
bufBuffer for canonical character set name
buflenLength of buffer
nameName to be canonicalised

This first ties off any charset extension such as "//TRANSLIT", canonicalizes the charset and re-adds the extension

Definition at line 360 of file charset.c.

361{
362 if (!buf || !name)
363 return;
364
365 char in[1024] = { 0 };
366 char scratch[1024 + 10] = { 0 };
367 struct Buffer *canon = buf_pool_get();
368
369 mutt_str_copy(in, name, sizeof(in));
370 char *ext = strchr(in, '/');
371 if (ext)
372 *ext++ = '\0';
373
374 if (mutt_istr_equal(in, "utf-8") || mutt_istr_equal(in, "utf8"))
375 {
376 buf_strcpy(canon, "utf-8");
377 goto out;
378 }
379
380 /* catch some common iso-8859-something misspellings */
381 size_t plen;
382 if ((plen = mutt_istr_startswith(in, "8859")) && (in[plen] != '-'))
383 snprintf(scratch, sizeof(scratch), "iso-8859-%s", in + plen);
384 else if ((plen = mutt_istr_startswith(in, "8859-")))
385 snprintf(scratch, sizeof(scratch), "iso-8859-%s", in + plen);
386 else if ((plen = mutt_istr_startswith(in, "iso8859")) && (in[plen] != '-'))
387 snprintf(scratch, sizeof(scratch), "iso_8859-%s", in + plen);
388 else if ((plen = mutt_istr_startswith(in, "iso8859-")))
389 snprintf(scratch, sizeof(scratch), "iso_8859-%s", in + plen);
390 else
391 mutt_str_copy(scratch, in, sizeof(scratch));
392
393 for (size_t i = 0; PreferredMimeNames[i].key; i++)
394 {
395 if (mutt_istr_equal(scratch, PreferredMimeNames[i].key))
396 {
397 buf_strcpy(canon, PreferredMimeNames[i].pref);
398 goto out;
399 }
400 }
401
402 buf_strcpy(canon, scratch);
403 buf_lower(canon); // for cosmetics' sake
404
405out:
406 if (ext && (*ext != '\0'))
407 {
408 buf_addch(canon, '/');
409 buf_addstr(canon, ext);
410 }
411
412 mutt_str_copy(buf, buf_string(canon), buflen);
413 buf_pool_release(&canon);
414}
size_t buf_addch(struct Buffer *buf, char c)
Add a single character to a Buffer.
Definition buffer.c:241
size_t buf_addstr(struct Buffer *buf, const char *s)
Add a string to a Buffer.
Definition buffer.c:226
size_t buf_strcpy(struct Buffer *buf, const char *s)
Copy a string into a Buffer.
Definition buffer.c:395
void buf_lower(struct Buffer *buf)
Sets a buffer to lowercase.
Definition buffer.c:734
static const char * buf_string(const struct Buffer *buf)
Convert a buffer to a const char * "string".
Definition buffer.h:96
static const struct MimeNames PreferredMimeNames[]
Lookup table of preferred charsets.
Definition charset.c:107
bool mutt_istr_equal(const char *a, const char *b)
Compare two strings, ignoring case.
Definition string.c:674
size_t mutt_istr_startswith(const char *str, const char *prefix)
Check whether a string starts with a prefix, ignoring case.
Definition string.c:246
struct Buffer * buf_pool_get(void)
Get a Buffer from the pool.
Definition pool.c:91
void buf_pool_release(struct Buffer **ptr)
Return a Buffer to the pool.
Definition pool.c:111
String manipulation buffer.
Definition buffer.h:36
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_ch_chscmp()

bool mutt_ch_chscmp ( const char * cs1,
const char * cs2 )

Are the names of two character sets equivalent?

Parameters
cs1First character set
cs2Second character set
Return values
trueNames are equivalent
falseNames differ

Charsets may have extensions that mutt_ch_canonical_charset() leaves intact; we expect 'cs2' to originate from neomutt code, not user input (i.e. 'cs2' does not have any extension) we simply check if the shorter string is a prefix for the longer.

Definition at line 428 of file charset.c.

429{
430 if (!cs1 || !cs2)
431 return false;
432
433 char buf[256] = { 0 };
434
435 mutt_ch_canonical_charset(buf, sizeof(buf), cs1);
436
437 int len1 = mutt_str_len(buf);
438 int len2 = mutt_str_len(cs2);
439
440 return mutt_istrn_equal(((len1 > len2) ? buf : cs2),
441 ((len1 > len2) ? cs2 : buf), MIN(len1, len2));
442}
#define MIN(a, b)
Return the minimum of two values.
Definition memory.h:40
void mutt_ch_canonical_charset(char *buf, size_t buflen, const char *name)
Canonicalise the charset of a string.
Definition charset.c:360
bool mutt_istrn_equal(const char *a, const char *b, size_t num)
Check for equality of two strings ignoring case (to a maximum), safely.
Definition string.c:457
+ Here is the call graph for this function:

◆ mutt_ch_get_default_charset()

const char * mutt_ch_get_default_charset ( const struct Slist *const assumed_charset)

Get the default character set.

Parameters
assumed_charsetFrom $assumed_charset
Return values
ptrName of the default character set
Warning
This returns a pointer to a static buffer. Do not free it.

Definition at line 451 of file charset.c.

452{
453 static char fcharset[128];
454 const char *c = NULL;
455
456 if (assumed_charset && (assumed_charset->count > 0))
457 c = STAILQ_FIRST(&assumed_charset->head)->data;
458 else
459 c = "us-ascii";
460
461 mutt_str_copy(fcharset, c, sizeof(fcharset));
462 return fcharset;
463}
#define STAILQ_FIRST(head)
Definition queue.h:388
size_t count
Number of values in list.
Definition slist.h:39
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_ch_get_langinfo_charset()

char * mutt_ch_get_langinfo_charset ( void )

Get the user's choice of character set.

Return values
ptrCharset string

Get the canonical character set used by the user's locale. The caller must free the returned string.

Definition at line 472 of file charset.c.

473{
474 char buf[1024] = { 0 };
475
476 mutt_ch_canonical_charset(buf, sizeof(buf), nl_langinfo(CODESET));
477
478 if (buf[0] != '\0')
479 return mutt_str_dup(buf);
480
481 return mutt_str_dup("iso-8859-1");
482}
char * mutt_str_dup(const char *str)
Copy a string, safely.
Definition string.c:257
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_ch_lookup_add()

bool mutt_ch_lookup_add ( enum LookupType type,
const char * pat,
const char * replace,
struct Buffer * err )

Add a new character set lookup.

Parameters
typeType of character set, e.g. MUTT_LOOKUP_CHARSET
patPattern to match
replaceReplacement string
errBuffer for error message
Return values
trueLookup added to list
falseRegex string was invalid

Add a regex for a character set and a replacement name.

Definition at line 495 of file charset.c.

497{
498 if (!pat || !replace)
499 return false;
500
501 regex_t *rx = MUTT_MEM_CALLOC(1, regex_t);
502 int rc = REG_COMP(rx, pat, REG_ICASE);
503 if (rc != 0)
504 {
505 regerror(rc, rx, err->data, err->dsize);
506 FREE(&rx);
507 return false;
508 }
509
510 struct Lookup *l = lookup_new();
511 l->type = type;
512 l->replacement = mutt_str_dup(replace);
513 l->regex.pattern = mutt_str_dup(pat);
514 l->regex.regex = rx;
515 l->regex.pat_not = false;
516
517 TAILQ_INSERT_TAIL(&Lookups, l, entries);
518
519 return true;
520}
static struct Lookup * lookup_new(void)
Create a new Lookup.
Definition charset.c:255
#define TAILQ_INSERT_TAIL(head, elm, field)
Definition queue.h:866
#define REG_COMP(preg, regex, cflags)
Compile a regular expression.
Definition regex3.h:49
size_t dsize
Length of data.
Definition buffer.h:39
char * data
Pointer to data.
Definition buffer.h:37
bool pat_not
do not match
Definition regex3.h:88
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_ch_lookup_remove()

void mutt_ch_lookup_remove ( void )

Remove all the character set lookups.

Empty the list of replacement character set names.

Definition at line 527 of file charset.c.

528{
529 struct Lookup *l = NULL;
530 struct Lookup *tmp = NULL;
531
532 TAILQ_FOREACH_SAFE(l, &Lookups, entries, tmp)
533 {
534 TAILQ_REMOVE(&Lookups, l, entries);
535 lookup_free(&l);
536 }
537}
static void lookup_free(struct Lookup **ptr)
Free a Lookup.
Definition charset.c:264
#define TAILQ_FOREACH_SAFE(var, head, field, tvar)
Definition queue.h:792
#define TAILQ_REMOVE(head, elm, field)
Definition queue.h:901
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_ch_charset_lookup()

const char * mutt_ch_charset_lookup ( const char * chs)

Look for a replacement character set.

Parameters
chsCharacter set to lookup
Return values
ptrReplacement character set (if a 'charset-hook' matches)
NULLNo matching hook

Look through all the 'charset-hook's. If one matches return the replacement character set.

Definition at line 548 of file charset.c.

549{
551}
static const char * lookup_charset(enum LookupType type, const char *cs)
Look for a preferred character set name.
Definition charset.c:289
@ MUTT_LOOKUP_CHARSET
Alias for another character set.
Definition charset.h:62
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_ch_iconv_open()

iconv_t mutt_ch_iconv_open ( const char * tocode,
const char * fromcode,
uint8_t flags )

Set up iconv for conversions.

Parameters
tocodeCurrent character set
fromcodeTarget character set
flagsFlags, e.g. MUTT_ICONV_HOOK_FROM
Return values
ptriconv handle for the conversion

Like iconv_open, but canonicalises the charsets, applies charset-hooks, recanonicalises, and finally applies iconv-hooks. Parameter flags=0 skips charset-hooks, while MUTT_ICONV_HOOK_FROM applies them to fromcode. Callers should use flags=0 when fromcode can safely be considered true, either some constant, or some value provided by the user; MUTT_ICONV_HOOK_FROM should be used only when fromcode is unsure, taken from a possibly wrong incoming MIME label, or such. Misusing MUTT_ICONV_HOOK_FROM leads to unwanted interactions in some setups.

Since calling iconv_open() repeatedly can be expensive, we keep a cache of the most recently used iconv_t objects, kept in LRU order. This means that you should not call iconv_close() on the object yourself. All remaining objects in the cache will exit when main() calls mutt_ch_cache_cleanup().

Note
By design charset-hooks should never be, and are never, applied to tocode.
The top-well-named MUTT_ICONV_HOOK_FROM acts on charset-hooks, not at all on iconv-hooks.

Definition at line 580 of file charset.c.

581{
582 char tocode1[128] = { 0 };
583 char fromcode1[128] = { 0 };
584 const char *tocode2 = NULL, *fromcode2 = NULL;
585 const char *tmp = NULL;
586
587 /* transform to MIME preferred charset names */
588 mutt_ch_canonical_charset(tocode1, sizeof(tocode1), tocode);
589 mutt_ch_canonical_charset(fromcode1, sizeof(fromcode1), fromcode);
590
591 /* maybe apply charset-hooks and recanonicalise fromcode,
592 * but only when caller asked us to sanitize a potentially wrong
593 * charset name incoming from the wild exterior. */
594 if (flags & MUTT_ICONV_HOOK_FROM)
595 {
596 tmp = mutt_ch_charset_lookup(fromcode1);
597 if (tmp)
598 mutt_ch_canonical_charset(fromcode1, sizeof(fromcode1), tmp);
599 }
600
601 /* check if we have this pair cached already */
602 for (int i = 0; i < IconvCacheUsed; i++)
603 {
604 if (strcmp(tocode1, IconvCache[i].tocode1) == 0 &&
605 strcmp(fromcode1, IconvCache[i].fromcode1) == 0)
606 {
607 iconv_t cd = IconvCache[i].cd;
608
609 /* make room for this one at the top */
610 struct IconvCacheEntry top = IconvCache[i];
611 for (int j = i - 1; j >= 0; j--)
612 {
613 IconvCache[j + 1] = IconvCache[j];
614 }
615 IconvCache[0] = top;
616
617 if (iconv_t_valid(cd))
618 {
619 /* reset state */
620 iconv(cd, NULL, NULL, NULL, NULL);
621 }
622 return cd;
623 }
624 }
625
626 /* not found in cache */
627 /* always apply iconv-hooks to suit system's iconv tastes */
628 tocode2 = mutt_ch_iconv_lookup(tocode1);
629 tocode2 = tocode2 ? tocode2 : tocode1;
630 fromcode2 = mutt_ch_iconv_lookup(fromcode1);
631 fromcode2 = fromcode2 ? fromcode2 : fromcode1;
632
633 /* call system iconv with names it appreciates */
634 iconv_t cd = iconv_open(tocode2, fromcode2);
635
637 {
638 mutt_debug(LL_DEBUG2, "iconv: dropping %s -> %s from the cache\n",
641 /* get rid of the oldest entry */
645 {
646 iconv_close(IconvCache[IconvCacheUsed - 1].cd);
647 }
649 }
650
651 /* make room for this one at the top */
652 for (int j = IconvCacheUsed - 1; j >= 0; j--)
653 {
654 IconvCache[j + 1] = IconvCache[j];
655 }
656
658
659 mutt_debug(LL_DEBUG2, "iconv: adding %s -> %s to the cache\n", fromcode1, tocode1);
660 IconvCache[0].fromcode1 = strdup(fromcode1);
661 IconvCache[0].tocode1 = strdup(tocode1);
662 IconvCache[0].cd = cd;
663
664 return cd;
665}
#define mutt_debug(LEVEL,...)
Definition logging2.h:91
@ LL_DEBUG2
Log at debug level 2.
Definition logging2.h:46
static int IconvCacheUsed
Number of iconv descriptors in the cache.
Definition charset.c:86
const char * mutt_ch_iconv_lookup(const char *chs)
Look for a replacement character set.
Definition charset.c:767
const char * mutt_ch_charset_lookup(const char *chs)
Look for a replacement character set.
Definition charset.c:548
#define ICONV_CACHE_SIZE
Max size of the iconv cache.
Definition charset.c:82
static struct IconvCacheEntry IconvCache[ICONV_CACHE_SIZE]
Cache of iconv conversion descriptors.
Definition charset.c:84
static bool iconv_t_valid(const iconv_t cd)
Is the conversion descriptor valid?
Definition charset.h:123
Cached iconv conversion descriptor.
Definition charset.c:75
char * tocode1
Destination character set.
Definition charset.c:77
char * fromcode1
Source character set.
Definition charset.c:76
iconv_t cd
iconv conversion descriptor
Definition charset.c:78
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_ch_iconv()

size_t mutt_ch_iconv ( iconv_t cd,
const char ** inbuf,
size_t * inbytesleft,
char ** outbuf,
size_t * outbytesleft,
const char ** inrepls,
const char * outrepl,
int * iconverrno )

Change the encoding of a string.

Parameters
[in]cdIconv conversion descriptor
[in,out]inbufBuffer to convert
[in,out]inbytesleftLength of buffer to convert
[in,out]outbufBuffer for the result
[in,out]outbytesleftLength of result buffer
[in]inreplsInput replacement characters
[in]outreplOutput replacement characters
[out]iconverrnoErrno if iconv() fails, 0 if it succeeds
Return values
numCharacters converted

Like iconv, but keeps going even when the input is invalid If you're supplying inrepls, the source charset should be stateless; if you're supplying an outrepl, the target charset should be.

Definition at line 683 of file charset.c.

686{
687 size_t rc = 0;
688 const char *ib = *inbuf;
689 size_t ibl = *inbytesleft;
690 char *ob = *outbuf;
691 size_t obl = *outbytesleft;
692
693 while (true)
694 {
695 errno = 0;
696 const size_t ret1 = iconv(cd, (ICONV_CONST char **) &ib, &ibl, &ob, &obl);
697 if (ret1 != ICONV_ILLEGAL_SEQ)
698 rc += ret1;
699 if (iconverrno)
700 *iconverrno = errno;
701
702 if (ibl && obl && (errno == EILSEQ))
703 {
704 if (inrepls)
705 {
706 /* Try replacing the input */
707 const char **t = NULL;
708 for (t = inrepls; *t; t++)
709 {
710 const char *ib1 = *t;
711 size_t ibl1 = strlen(*t);
712 char *ob1 = ob;
713 size_t obl1 = obl;
714 iconv(cd, (ICONV_CONST char **) &ib1, &ibl1, &ob1, &obl1);
715 if (ibl1 == 0)
716 {
717 ib++;
718 ibl--;
719 ob = ob1;
720 obl = obl1;
721 rc++;
722 break;
723 }
724 }
725 if (*t)
726 continue;
727 }
728 /* Replace the output */
729 if (!outrepl)
730 outrepl = "?";
731 iconv(cd, NULL, NULL, &ob, &obl);
732 if (obl)
733 {
734 int n = strlen(outrepl);
735 if (n > obl)
736 {
737 outrepl = "?";
738 n = 1;
739 }
740 memcpy(ob, outrepl, n);
741 ib++;
742 ibl--;
743 ob += n;
744 obl -= n;
745 rc++;
746 iconv(cd, NULL, NULL, NULL, NULL); /* for good measure */
747 continue;
748 }
749 }
750 *inbuf = ib;
751 *inbytesleft = ibl;
752 *outbuf = ob;
753 *outbytesleft = obl;
754 return rc;
755 }
756}
#define EILSEQ
Definition charset.c:55
#define ICONV_ILLEGAL_SEQ
Error value for iconv() - Illegal sequence.
Definition charset.h:114
+ Here is the caller graph for this function:

◆ mutt_ch_iconv_lookup()

const char * mutt_ch_iconv_lookup ( const char * chs)

Look for a replacement character set.

Parameters
chsCharacter set to lookup
Return values
ptrReplacement character set (if a 'iconv-hook' matches)
NULLNo matching hook

Look through all the 'iconv-hook's. If one matches return the replacement character set.

Definition at line 767 of file charset.c.

768{
770}
@ MUTT_LOOKUP_ICONV
Character set conversion.
Definition charset.h:63
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_ch_check()

int mutt_ch_check ( const char * s,
size_t slen,
const char * from,
const char * to )

Check whether a string can be converted between encodings.

Parameters
[in]sString to check
[in]slenLength of the string to check
[in]fromCurrent character set
[in]toTarget character set
Return values
0Success
-1Error in iconv_open()
>0Errno as set by iconv()

Definition at line 782 of file charset.c.

783{
784 if (!s || !from || !to)
785 return -1;
786
787 int rc = 0;
788 iconv_t cd = mutt_ch_iconv_open(to, from, MUTT_ICONV_NO_FLAGS);
789 if (!iconv_t_valid(cd))
790 return -1;
791
792 size_t outlen = MB_LEN_MAX * slen;
793 char *out = MUTT_MEM_MALLOC(outlen + 1, char);
794 char *saved_out = out;
795
796 const size_t convlen = iconv(cd, (ICONV_CONST char **) &s, &slen, &out, &outlen);
797 if (convlen == ICONV_ILLEGAL_SEQ)
798 rc = errno;
799
800 FREE(&saved_out);
801 return rc;
802}
iconv_t mutt_ch_iconv_open(const char *tocode, const char *fromcode, uint8_t flags)
Set up iconv for conversions.
Definition charset.c:580
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_ch_convert_string()

int mutt_ch_convert_string ( char ** ps,
const char * from,
const char * to,
uint8_t flags )

Convert a string between encodings.

Parameters
[in,out]psString to convert
[in]fromCurrent character set
[in]toTarget character set
[in]flagsFlags, e.g. MUTT_ICONV_HOOK_FROM
Return values
0Success
-1Invalid arguments or failure to open an iconv channel
errnoFailure in iconv conversion

Parameter flags is given as-is to mutt_ch_iconv_open(). See there for its meaning and usage policy.

Definition at line 817 of file charset.c.

818{
819 if (!ps)
820 return -1;
821
822 char *s = *ps;
823
824 if (!s || (*s == '\0'))
825 return 0;
826
827 if (!to || !from)
828 return -1;
829
830 const char *repls[] = { "\357\277\275", "?", 0 };
831 int rc = 0;
832
833 iconv_t cd = mutt_ch_iconv_open(to, from, flags);
834 if (!iconv_t_valid(cd))
835 return -1;
836
837 const char **inrepls = NULL;
838 const char *outrepl = NULL;
839
840 if (mutt_ch_is_utf8(to))
841 outrepl = "\357\277\275";
842 else if (mutt_ch_is_utf8(from))
843 inrepls = repls;
844 else
845 outrepl = "?";
846
847 const char *ib = s;
848 size_t ibl = strlen(s);
849 if (ibl >= (SIZE_MAX / MB_LEN_MAX))
850 {
851 return -1;
852 }
853 size_t obl = MB_LEN_MAX * ibl;
854 char *buf = MUTT_MEM_MALLOC(obl + 1, char);
855 char *ob = buf;
856
857 mutt_ch_iconv(cd, &ib, &ibl, &ob, &obl, inrepls, outrepl, &rc);
858 iconv(cd, 0, 0, &ob, &obl);
859
860 *ob = '\0';
861
862 FREE(ps);
863 *ps = buf;
864
865 mutt_str_adjust(ps);
866 return rc;
867}
size_t mutt_ch_iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft, const char **inrepls, const char *outrepl, int *iconverrno)
Change the encoding of a string.
Definition charset.c:683
#define mutt_ch_is_utf8(str)
Definition charset.h:107
void mutt_str_adjust(char **ptr)
Shrink-to-fit a string.
Definition string.c:303
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_ch_check_charset()

bool mutt_ch_check_charset ( const char * cs,
bool strict )

Does iconv understand a character set?

Parameters
csCharacter set to check
strictCheck strictly by using iconv
Return values
trueCharacter set is valid

If strict is false, then finding a matching character set in PreferredMimeNames will be enough. If strict is true, or the charset is not in PreferredMimeNames, then iconv() with be run.

Definition at line 880 of file charset.c.

881{
882 if (!cs)
883 return false;
884
885 if (mutt_ch_is_utf8(cs))
886 return true;
887
888 if (!strict)
889 {
890 for (int i = 0; PreferredMimeNames[i].key; i++)
891 {
892 if (mutt_istr_equal(PreferredMimeNames[i].key, cs) ||
894 {
895 return true;
896 }
897 }
898 }
899
900 iconv_t cd = mutt_ch_iconv_open(cs, cs, MUTT_ICONV_NO_FLAGS);
901 if (iconv_t_valid(cd))
902 {
903 return true;
904 }
905
906 return false;
907}
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_ch_fgetconv_open()

struct FgetConv * mutt_ch_fgetconv_open ( FILE * fp,
const char * from,
const char * to,
uint8_t flags )

Prepare a file for charset conversion.

Parameters
fpFILE ptr to prepare
fromCurrent character set
toDestination character set
flagsFlags, e.g. MUTT_ICONV_HOOK_FROM
Return values
ptrfgetconv handle

Parameter flags is given as-is to mutt_ch_iconv_open().

Definition at line 919 of file charset.c.

920{
921 iconv_t cd = ICONV_T_INVALID;
922
923 if (from && to)
924 cd = mutt_ch_iconv_open(to, from, flags);
925
926 struct FgetConv *fc = MUTT_MEM_CALLOC(1, struct FgetConv);
927 fc->fp = fp;
928 fc->cd = cd;
929
930 if (iconv_t_valid(cd))
931 {
932 static const char *repls[] = { "\357\277\275", "?", 0 };
933
934 fc->p = fc->bufo;
935 fc->ob = fc->bufo;
936 fc->ib = fc->bufi;
937 fc->ibl = 0;
938 fc->inrepls = mutt_ch_is_utf8(to) ? repls : repls + 1;
939 }
940
941 return fc;
942}
#define ICONV_T_INVALID
Error value for iconv functions.
Definition charset.h:111
Cursor for converting a file's encoding.
Definition charset.h:45
char bufi[512]
Input buffer.
Definition charset.h:48
iconv_t cd
iconv conversion descriptor
Definition charset.h:47
char bufo[512]
Output buffer.
Definition charset.h:49
size_t ibl
Input buffer length.
Definition charset.h:53
FILE * fp
File to read from.
Definition charset.h:46
char * p
Current position in output buffer.
Definition charset.h:50
const char ** inrepls
Replacement characters.
Definition charset.h:54
char * ib
Current position in input buffer.
Definition charset.h:52
char * ob
End of output buffer.
Definition charset.h:51
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_ch_fgetconv_close()

void mutt_ch_fgetconv_close ( struct FgetConv ** ptr)

Close an fgetconv handle.

Parameters
[out]ptrfgetconv handle

Definition at line 948 of file charset.c.

949{
950 if (!ptr || !*ptr)
951 return;
952
953 FREE(ptr);
954}
+ Here is the caller graph for this function:

◆ mutt_ch_fgetconv()

int mutt_ch_fgetconv ( struct FgetConv * fc)

Convert a file's character set.

Parameters
fcFgetConv handle
Return values
numNext character in the converted file
EOFError

A file is read into a buffer and its character set is converted. Each call to this function will return one converted character. The buffer is refilled automatically when empty.

Definition at line 966 of file charset.c.

967{
968 if (!fc)
969 return EOF;
970 if (!iconv_t_valid(fc->cd))
971 return fgetc(fc->fp);
972 if (!fc->p)
973 return EOF;
974 if (fc->p < fc->ob)
975 return (unsigned char) *(fc->p)++;
976
977 /* Try to convert some more */
978 fc->p = fc->bufo;
979 fc->ob = fc->bufo;
980 if (fc->ibl)
981 {
982 size_t obl = sizeof(fc->bufo);
983 iconv(fc->cd, (ICONV_CONST char **) &fc->ib, &fc->ibl, &fc->ob, &obl);
984 if (fc->p < fc->ob)
985 return (unsigned char) *(fc->p)++;
986 }
987
988 /* If we trusted iconv a bit more, we would at this point
989 * ask why it had stopped converting ... */
990
991 /* Try to read some more */
992 if ((fc->ibl == sizeof(fc->bufi)) ||
993 (fc->ibl && (fc->ib + fc->ibl < fc->bufi + sizeof(fc->bufi))))
994 {
995 fc->p = 0;
996 return EOF;
997 }
998 if (fc->ibl)
999 memmove(fc->bufi, fc->ib, fc->ibl);
1000 fc->ib = fc->bufi;
1001 fc->ibl += fread(fc->ib + fc->ibl, 1, sizeof(fc->bufi) - fc->ibl, fc->fp);
1002
1003 /* Try harder this time to convert some */
1004 if (fc->ibl)
1005 {
1006 size_t obl = sizeof(fc->bufo);
1007 mutt_ch_iconv(fc->cd, (const char **) &fc->ib, &fc->ibl, &fc->ob, &obl,
1008 fc->inrepls, 0, NULL);
1009 if (fc->p < fc->ob)
1010 return (unsigned char) *(fc->p)++;
1011 }
1012
1013 /* Either the file has finished or one of the buffers is too small */
1014 fc->p = 0;
1015 return EOF;
1016}
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_ch_fgetconvs()

char * mutt_ch_fgetconvs ( char * buf,
size_t buflen,
struct FgetConv * fc )

Convert a file's charset into a string buffer.

Parameters
bufBuffer for result
buflenLength of buffer
fcFgetConv handle
Return values
ptrSuccess, result buffer
NULLError

Read a file into a buffer, converting the character set as it goes.

Definition at line 1028 of file charset.c.

1029{
1030 if (!buf)
1031 return NULL;
1032
1033 size_t r;
1034 for (r = 0; (r + 1) < buflen;)
1035 {
1036 const int c = mutt_ch_fgetconv(fc);
1037 if (c == EOF)
1038 break;
1039 buf[r++] = (char) c;
1040 if (c == '\n')
1041 break;
1042 }
1043 buf[r] = '\0';
1044
1045 if (r > 0)
1046 return buf;
1047
1048 return NULL;
1049}
int mutt_ch_fgetconv(struct FgetConv *fc)
Convert a file's character set.
Definition charset.c:966
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_ch_set_charset()

void mutt_ch_set_charset ( const char * charset)

Update the records for a new character set.

Parameters
charsetNew character set

Check if this character set is utf-8 and pick a suitable replacement character for unprintable characters.

Note
This calls bind_textdomain_codeset() which will affect future message translations.

Definition at line 1061 of file charset.c.

1062{
1063 char buf[256] = { 0 };
1064
1065 mutt_ch_canonical_charset(buf, sizeof(buf), charset);
1066
1067 if (mutt_ch_is_utf8(buf))
1068 {
1069 CharsetIsUtf8 = true;
1070 ReplacementChar = 0xfffd; /* replacement character */
1071 }
1072 else
1073 {
1074 CharsetIsUtf8 = false;
1075 ReplacementChar = '?';
1076 }
1077
1078#if defined(HAVE_BIND_TEXTDOMAIN_CODESET) && defined(ENABLE_NLS)
1079 bind_textdomain_codeset(PACKAGE, buf);
1080#endif
1081}
bool CharsetIsUtf8
Is the user's current character set utf-8?
Definition charset.c:66
wchar_t ReplacementChar
When a Unicode character can't be displayed, use this instead.
Definition charset.c:61
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_ch_choose()

char * mutt_ch_choose ( const char * fromcode,
const struct Slist * charsets,
const char * u,
size_t ulen,
char ** d,
size_t * dlen )

Figure the best charset to encode a string.

Parameters
[in]fromcodeOriginal charset of the string
[in]charsetsList of potential charsets to use
[in]uString to encode
[in]ulenLength of the string to encode
[out]dIf not NULL, point it to the converted string
[out]dlenIf not NULL, point it to the length of the d string
Return values
ptrBest performing charset
NULLNone could be found

Definition at line 1094 of file charset.c.

1096{
1097 if (!fromcode || !charsets)
1098 return NULL;
1099
1100 char *e = NULL, *tocode = NULL;
1101 size_t elen = 0, bestn = 0;
1102
1103 const struct ListNode *np = NULL;
1104 STAILQ_FOREACH(np, &charsets->head, entries)
1105 {
1106 char *t = mutt_str_dup(np->data);
1107 if (!t)
1108 continue;
1109
1110 size_t n = mutt_str_len(t);
1111 char *s = mutt_strn_dup(u, ulen);
1112 const int rc = d ? mutt_ch_convert_string(&s, fromcode, t, MUTT_ICONV_NO_FLAGS) :
1113 mutt_ch_check(s, ulen, fromcode, t);
1114 if (rc)
1115 {
1116 FREE(&t);
1117 FREE(&s);
1118 continue;
1119 }
1120 size_t slen = mutt_str_len(s);
1121
1122 if (!tocode || (n < bestn))
1123 {
1124 bestn = n;
1125 FREE(&tocode);
1126 tocode = t;
1127 if (d)
1128 {
1129 FREE(&e);
1130 e = s;
1131 }
1132 else
1133 {
1134 FREE(&s);
1135 }
1136 elen = slen;
1137 }
1138 else
1139 {
1140 FREE(&t);
1141 FREE(&s);
1142 }
1143 }
1144 if (tocode)
1145 {
1146 if (d)
1147 *d = e;
1148 if (dlen)
1149 *dlen = elen;
1150
1151 char canonical_buf[1024] = { 0 };
1152 mutt_ch_canonical_charset(canonical_buf, sizeof(canonical_buf), tocode);
1153 mutt_str_replace(&tocode, canonical_buf);
1154 }
1155 return tocode;
1156}
int mutt_ch_check(const char *s, size_t slen, const char *from, const char *to)
Check whether a string can be converted between encodings.
Definition charset.c:782
char * mutt_str_replace(char **p, const char *s)
Replace one string with another.
Definition string.c:284
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_ch_cache_cleanup()

void mutt_ch_cache_cleanup ( void )

Clean up the cached iconv handles and charset strings.

Definition at line 1161 of file charset.c.

1162{
1163 for (int i = 0; i < IconvCacheUsed; i++)
1164 {
1165 FREE(&IconvCache[i].fromcode1);
1166 FREE(&IconvCache[i].tocode1);
1167 if (iconv_t_valid(IconvCache[i].cd))
1168 {
1169 iconv_close(IconvCache[i].cd);
1170 }
1171 }
1172 IconvCacheUsed = 0;
1173}
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

Variable Documentation

◆ ReplacementChar

wchar_t ReplacementChar = '?'

When a Unicode character can't be displayed, use this instead.

Definition at line 61 of file charset.c.

◆ CharsetIsUtf8

bool CharsetIsUtf8 = false

Is the user's current character set utf-8?

Definition at line 66 of file charset.c.

◆ Lookups

struct LookupList Lookups = TAILQ_HEAD_INITIALIZER(Lookups)

Lookup table of preferred character set names.

Definition at line 69 of file charset.c.

◆ IconvCache

struct IconvCacheEntry IconvCache[ICONV_CACHE_SIZE]
static

Cache of iconv conversion descriptors.

Definition at line 84 of file charset.c.

◆ IconvCacheUsed

int IconvCacheUsed = 0
static

Number of iconv descriptors in the cache.

Definition at line 86 of file charset.c.

◆ PreferredMimeNames

const struct MimeNames PreferredMimeNames[]
static

Lookup table of preferred charsets.

The following list has been created manually from the data under: http://www.isi.edu/in-notes/iana/assignments/character-sets Last update: 2000-09-07

Note
It includes only the subset of character sets for which a preferred MIME name is given.

Definition at line 107 of file charset.c.

107 {
108 // clang-format off
109 { "ansi_x3.4-1968", "us-ascii" },
110 { "iso-ir-6", "us-ascii" },
111 { "iso_646.irv:1991", "us-ascii" },
112 { "ascii", "us-ascii" },
113 { "iso646-us", "us-ascii" },
114 { "us", "us-ascii" },
115 { "ibm367", "us-ascii" },
116 { "cp367", "us-ascii" },
117 { "csASCII", "us-ascii" },
118
119 { "csISO2022KR", "iso-2022-kr" },
120 { "csEUCKR", "euc-kr" },
121 { "csISO2022JP", "iso-2022-jp" },
122 { "csISO2022JP2", "iso-2022-jp-2" },
123
124 { "ISO_8859-1:1987", "iso-8859-1" },
125 { "iso-ir-100", "iso-8859-1" },
126 { "iso_8859-1", "iso-8859-1" },
127 { "latin1", "iso-8859-1" },
128 { "l1", "iso-8859-1" },
129 { "IBM819", "iso-8859-1" },
130 { "CP819", "iso-8859-1" },
131 { "csISOLatin1", "iso-8859-1" },
132
133 { "ISO_8859-2:1987", "iso-8859-2" },
134 { "iso-ir-101", "iso-8859-2" },
135 { "iso_8859-2", "iso-8859-2" },
136 { "latin2", "iso-8859-2" },
137 { "l2", "iso-8859-2" },
138 { "csISOLatin2", "iso-8859-2" },
139
140 { "ISO_8859-3:1988", "iso-8859-3" },
141 { "iso-ir-109", "iso-8859-3" },
142 { "ISO_8859-3", "iso-8859-3" },
143 { "latin3", "iso-8859-3" },
144 { "l3", "iso-8859-3" },
145 { "csISOLatin3", "iso-8859-3" },
146
147 { "ISO_8859-4:1988", "iso-8859-4" },
148 { "iso-ir-110", "iso-8859-4" },
149 { "ISO_8859-4", "iso-8859-4" },
150 { "latin4", "iso-8859-4" },
151 { "l4", "iso-8859-4" },
152 { "csISOLatin4", "iso-8859-4" },
153
154 { "ISO_8859-6:1987", "iso-8859-6" },
155 { "iso-ir-127", "iso-8859-6" },
156 { "iso_8859-6", "iso-8859-6" },
157 { "ECMA-114", "iso-8859-6" },
158 { "ASMO-708", "iso-8859-6" },
159 { "arabic", "iso-8859-6" },
160 { "csISOLatinArabic", "iso-8859-6" },
161
162 { "ISO_8859-7:1987", "iso-8859-7" },
163 { "iso-ir-126", "iso-8859-7" },
164 { "ISO_8859-7", "iso-8859-7" },
165 { "ELOT_928", "iso-8859-7" },
166 { "ECMA-118", "iso-8859-7" },
167 { "greek", "iso-8859-7" },
168 { "greek8", "iso-8859-7" },
169 { "csISOLatinGreek", "iso-8859-7" },
170
171 { "ISO_8859-8:1988", "iso-8859-8" },
172 { "iso-ir-138", "iso-8859-8" },
173 { "ISO_8859-8", "iso-8859-8" },
174 { "hebrew", "iso-8859-8" },
175 { "csISOLatinHebrew", "iso-8859-8" },
176
177 { "ISO_8859-5:1988", "iso-8859-5" },
178 { "iso-ir-144", "iso-8859-5" },
179 { "ISO_8859-5", "iso-8859-5" },
180 { "cyrillic", "iso-8859-5" },
181 { "csISOLatinCyrillic", "iso-8859-5" },
182
183 { "ISO_8859-9:1989", "iso-8859-9" },
184 { "iso-ir-148", "iso-8859-9" },
185 { "ISO_8859-9", "iso-8859-9" },
186 { "latin5", "iso-8859-9" }, /* this is not a bug */
187 { "l5", "iso-8859-9" },
188 { "csISOLatin5", "iso-8859-9" },
189
190 { "ISO_8859-10:1992", "iso-8859-10" },
191 { "iso-ir-157", "iso-8859-10" },
192 { "latin6", "iso-8859-10" }, /* this is not a bug */
193 { "l6", "iso-8859-10" },
194 { "csISOLatin6", "iso-8859-10" },
195
196 { "csKOI8r", "koi8-r" },
197
198 { "MS_Kanji", "Shift_JIS" }, /* Note the underscore! */
199 { "csShiftJis", "Shift_JIS" },
200
201 { "Extended_UNIX_Code_Packed_Format_for_Japanese",
202 "euc-jp" },
203 { "csEUCPkdFmtJapanese", "euc-jp" },
204
205 { "csGB2312", "gb2312" },
206 { "csbig5", "big5" },
207
208 /* End of official brain damage.
209 * What follows has been taken from glibc's localedata files. */
210
211 { "iso_8859-13", "iso-8859-13" },
212 { "iso-ir-179", "iso-8859-13" },
213 { "latin7", "iso-8859-13" }, /* this is not a bug */
214 { "l7", "iso-8859-13" },
215
216 { "iso_8859-14", "iso-8859-14" },
217 { "latin8", "iso-8859-14" }, /* this is not a bug */
218 { "l8", "iso-8859-14" },
219
220 { "iso_8859-15", "iso-8859-15" },
221 { "latin9", "iso-8859-15" }, /* this is not a bug */
222
223 /* Suggested by Ionel Mugurel Ciobica <tgakic@sg10.chem.tue.nl> */
224 { "latin0", "iso-8859-15" }, /* this is not a bug */
225
226 { "iso_8859-16", "iso-8859-16" },
227 { "latin10", "iso-8859-16" }, /* this is not a bug */
228
229 { "646", "us-ascii" },
230
231 /* http://www.sun.com/software/white-papers/wp-unicode/ */
232
233 { "eucJP", "euc-jp" },
234 { "PCK", "Shift_JIS" },
235 { "ko_KR-euc", "euc-kr" },
236 { "zh_TW-big5", "big5" },
237
238 /* seems to be common on some systems */
239
240 { "sjis", "Shift_JIS" },
241 { "euc-jp-ms", "eucJP-ms" },
242
243 /* If you happen to encounter system-specific brain-damage with respect to
244 * character set naming, please add it above this comment, and submit a patch
245 * to <neomutt-devel@neomutt.org> */
246
247 { NULL, NULL },
248 // clang-format on
249};