NeoMutt  2025-12-11-435-g4ac674
Teaching an old dog new tricks
DOXYGEN
Loading...
Searching...
No Matches
charset.c
Go to the documentation of this file.
1
25
31
32#include "config.h"
33#include <errno.h>
34#include <iconv.h>
35#include <langinfo.h>
36#include <limits.h>
37#include <stdbool.h>
38#include <stdio.h>
39#include <string.h>
40#include "charset.h"
41#include "buffer.h"
42#include "list.h"
43#include "logging2.h"
44#include "memory.h"
45#include "pool.h"
46#include "queue.h"
47#include "regex3.h"
48#include "slist.h"
49#include "string2.h"
50#ifdef ENABLE_NLS
51#include <libintl.h>
52#endif
53
54#ifndef EILSEQ
55#define EILSEQ EINVAL
56#endif
57
61wchar_t ReplacementChar = '?';
62
66bool CharsetIsUtf8 = false;
67
70
75{
76 char *fromcode1;
77 char *tocode1;
78 iconv_t cd;
79};
80
82#define ICONV_CACHE_SIZE 16
86static int IconvCacheUsed = 0;
87
92{
93 const char *key;
94 const char *pref;
95};
96
107static const struct MimeNames PreferredMimeNames[] = {
108 // clang-format off
109 { "ansi_x3.4-1968", "us-ascii" },
110 { "iso-ir-6", "us-ascii" },
111 { "iso_646.irv:1991", "us-ascii" },
112 { "ascii", "us-ascii" },
113 { "iso646-us", "us-ascii" },
114 { "us", "us-ascii" },
115 { "ibm367", "us-ascii" },
116 { "cp367", "us-ascii" },
117 { "csASCII", "us-ascii" },
118
119 { "csISO2022KR", "iso-2022-kr" },
120 { "csEUCKR", "euc-kr" },
121 { "csISO2022JP", "iso-2022-jp" },
122 { "csISO2022JP2", "iso-2022-jp-2" },
123
124 { "ISO_8859-1:1987", "iso-8859-1" },
125 { "iso-ir-100", "iso-8859-1" },
126 { "iso_8859-1", "iso-8859-1" },
127 { "latin1", "iso-8859-1" },
128 { "l1", "iso-8859-1" },
129 { "IBM819", "iso-8859-1" },
130 { "CP819", "iso-8859-1" },
131 { "csISOLatin1", "iso-8859-1" },
132
133 { "ISO_8859-2:1987", "iso-8859-2" },
134 { "iso-ir-101", "iso-8859-2" },
135 { "iso_8859-2", "iso-8859-2" },
136 { "latin2", "iso-8859-2" },
137 { "l2", "iso-8859-2" },
138 { "csISOLatin2", "iso-8859-2" },
139
140 { "ISO_8859-3:1988", "iso-8859-3" },
141 { "iso-ir-109", "iso-8859-3" },
142 { "ISO_8859-3", "iso-8859-3" },
143 { "latin3", "iso-8859-3" },
144 { "l3", "iso-8859-3" },
145 { "csISOLatin3", "iso-8859-3" },
146
147 { "ISO_8859-4:1988", "iso-8859-4" },
148 { "iso-ir-110", "iso-8859-4" },
149 { "ISO_8859-4", "iso-8859-4" },
150 { "latin4", "iso-8859-4" },
151 { "l4", "iso-8859-4" },
152 { "csISOLatin4", "iso-8859-4" },
153
154 { "ISO_8859-6:1987", "iso-8859-6" },
155 { "iso-ir-127", "iso-8859-6" },
156 { "iso_8859-6", "iso-8859-6" },
157 { "ECMA-114", "iso-8859-6" },
158 { "ASMO-708", "iso-8859-6" },
159 { "arabic", "iso-8859-6" },
160 { "csISOLatinArabic", "iso-8859-6" },
161
162 { "ISO_8859-7:1987", "iso-8859-7" },
163 { "iso-ir-126", "iso-8859-7" },
164 { "ISO_8859-7", "iso-8859-7" },
165 { "ELOT_928", "iso-8859-7" },
166 { "ECMA-118", "iso-8859-7" },
167 { "greek", "iso-8859-7" },
168 { "greek8", "iso-8859-7" },
169 { "csISOLatinGreek", "iso-8859-7" },
170
171 { "ISO_8859-8:1988", "iso-8859-8" },
172 { "iso-ir-138", "iso-8859-8" },
173 { "ISO_8859-8", "iso-8859-8" },
174 { "hebrew", "iso-8859-8" },
175 { "csISOLatinHebrew", "iso-8859-8" },
176
177 { "ISO_8859-5:1988", "iso-8859-5" },
178 { "iso-ir-144", "iso-8859-5" },
179 { "ISO_8859-5", "iso-8859-5" },
180 { "cyrillic", "iso-8859-5" },
181 { "csISOLatinCyrillic", "iso-8859-5" },
182
183 { "ISO_8859-9:1989", "iso-8859-9" },
184 { "iso-ir-148", "iso-8859-9" },
185 { "ISO_8859-9", "iso-8859-9" },
186 { "latin5", "iso-8859-9" }, /* this is not a bug */
187 { "l5", "iso-8859-9" },
188 { "csISOLatin5", "iso-8859-9" },
189
190 { "ISO_8859-10:1992", "iso-8859-10" },
191 { "iso-ir-157", "iso-8859-10" },
192 { "latin6", "iso-8859-10" }, /* this is not a bug */
193 { "l6", "iso-8859-10" },
194 { "csISOLatin6", "iso-8859-10" },
195
196 { "csKOI8r", "koi8-r" },
197
198 { "MS_Kanji", "Shift_JIS" }, /* Note the underscore! */
199 { "csShiftJis", "Shift_JIS" },
200
201 { "Extended_UNIX_Code_Packed_Format_for_Japanese",
202 "euc-jp" },
203 { "csEUCPkdFmtJapanese", "euc-jp" },
204
205 { "csGB2312", "gb2312" },
206 { "csbig5", "big5" },
207
208 /* End of official brain damage.
209 * What follows has been taken from glibc's localedata files. */
210
211 { "iso_8859-13", "iso-8859-13" },
212 { "iso-ir-179", "iso-8859-13" },
213 { "latin7", "iso-8859-13" }, /* this is not a bug */
214 { "l7", "iso-8859-13" },
215
216 { "iso_8859-14", "iso-8859-14" },
217 { "latin8", "iso-8859-14" }, /* this is not a bug */
218 { "l8", "iso-8859-14" },
219
220 { "iso_8859-15", "iso-8859-15" },
221 { "latin9", "iso-8859-15" }, /* this is not a bug */
222
223 /* Suggested by Ionel Mugurel Ciobica <tgakic@sg10.chem.tue.nl> */
224 { "latin0", "iso-8859-15" }, /* this is not a bug */
225
226 { "iso_8859-16", "iso-8859-16" },
227 { "latin10", "iso-8859-16" }, /* this is not a bug */
228
229 { "646", "us-ascii" },
230
231 /* http://www.sun.com/software/white-papers/wp-unicode/ */
232
233 { "eucJP", "euc-jp" },
234 { "PCK", "Shift_JIS" },
235 { "ko_KR-euc", "euc-kr" },
236 { "zh_TW-big5", "big5" },
237
238 /* seems to be common on some systems */
239
240 { "sjis", "Shift_JIS" },
241 { "euc-jp-ms", "eucJP-ms" },
242
243 /* If you happen to encounter system-specific brain-damage with respect to
244 * character set naming, please add it above this comment, and submit a patch
245 * to <neomutt-devel@neomutt.org> */
246
247 { NULL, NULL },
248 // clang-format on
249};
250
255static struct Lookup *lookup_new(void)
256{
257 return MUTT_MEM_CALLOC(1, struct Lookup);
258}
259
264static void lookup_free(struct Lookup **ptr)
265{
266 if (!ptr || !*ptr)
267 return;
268
269 struct Lookup *l = *ptr;
270 FREE(&l->replacement);
271 FREE(&l->regex.pattern);
272 if (l->regex.regex)
273 regfree(l->regex.regex);
274 FREE(&l->regex.regex);
275 FREE(&l->regex);
276
277 FREE(ptr);
278}
279
289static const char *lookup_charset(enum LookupType type, const char *cs)
290{
291 if (!cs)
292 return NULL;
293
294 struct Lookup *l = NULL;
295
296 TAILQ_FOREACH(l, &Lookups, entries)
297 {
298 if (l->type != type)
299 continue;
300 if (mutt_regex_match(&l->regex, cs))
301 return l->replacement;
302 }
303 return NULL;
304}
305
317int mutt_ch_convert_nonmime_string(const struct Slist *const assumed_charset,
318 const char *charset, char **ps)
319{
320 if (!ps)
321 return -1;
322
323 char *u = *ps;
324 const size_t ulen = mutt_str_len(u);
325 if (ulen == 0)
326 return 0;
327
328 const struct ListNode *np = NULL;
329 STAILQ_FOREACH(np, &assumed_charset->head, entries)
330 {
331 char const *c = np->data;
332 size_t n = mutt_str_len(c);
333 char *fromcode = MUTT_MEM_MALLOC(n + 1, char);
334 mutt_str_copy(fromcode, c, n + 1);
335 char *s = mutt_strn_dup(u, ulen);
336 int m = mutt_ch_convert_string(&s, fromcode, charset, MUTT_ICONV_NO_FLAGS);
337 FREE(&fromcode);
338 if (m == 0)
339 {
340 FREE(ps);
341 *ps = s;
342 return 0;
343 }
344 FREE(&s);
345 }
347 charset, MUTT_ICONV_HOOK_FROM);
348 return -1;
349}
350
360void mutt_ch_canonical_charset(char *buf, size_t buflen, const char *name)
361{
362 if (!buf || !name)
363 return;
364
365 char in[1024] = { 0 };
366 char scratch[1024 + 10] = { 0 };
367 struct Buffer *canon = buf_pool_get();
368
369 mutt_str_copy(in, name, sizeof(in));
370 char *ext = strchr(in, '/');
371 if (ext)
372 *ext++ = '\0';
373
374 if (mutt_istr_equal(in, "utf-8") || mutt_istr_equal(in, "utf8"))
375 {
376 buf_strcpy(canon, "utf-8");
377 goto out;
378 }
379
380 /* catch some common iso-8859-something misspellings */
381 size_t plen;
382 if ((plen = mutt_istr_startswith(in, "8859")) && (in[plen] != '-'))
383 snprintf(scratch, sizeof(scratch), "iso-8859-%s", in + plen);
384 else if ((plen = mutt_istr_startswith(in, "8859-")))
385 snprintf(scratch, sizeof(scratch), "iso-8859-%s", in + plen);
386 else if ((plen = mutt_istr_startswith(in, "iso8859")) && (in[plen] != '-'))
387 snprintf(scratch, sizeof(scratch), "iso_8859-%s", in + plen);
388 else if ((plen = mutt_istr_startswith(in, "iso8859-")))
389 snprintf(scratch, sizeof(scratch), "iso_8859-%s", in + plen);
390 else
391 mutt_str_copy(scratch, in, sizeof(scratch));
392
393 for (size_t i = 0; PreferredMimeNames[i].key; i++)
394 {
395 if (mutt_istr_equal(scratch, PreferredMimeNames[i].key))
396 {
397 buf_strcpy(canon, PreferredMimeNames[i].pref);
398 goto out;
399 }
400 }
401
402 buf_strcpy(canon, scratch);
403 buf_lower(canon); // for cosmetics' sake
404
405out:
406 if (ext && (*ext != '\0'))
407 {
408 buf_addch(canon, '/');
409 buf_addstr(canon, ext);
410 }
411
412 mutt_str_copy(buf, buf_string(canon), buflen);
413 buf_pool_release(&canon);
414}
415
428bool mutt_ch_chscmp(const char *cs1, const char *cs2)
429{
430 if (!cs1 || !cs2)
431 return false;
432
433 char buf[256] = { 0 };
434
435 mutt_ch_canonical_charset(buf, sizeof(buf), cs1);
436
437 int len1 = mutt_str_len(buf);
438 int len2 = mutt_str_len(cs2);
439
440 return mutt_istrn_equal(((len1 > len2) ? buf : cs2),
441 ((len1 > len2) ? cs2 : buf), MIN(len1, len2));
442}
443
451const char *mutt_ch_get_default_charset(const struct Slist *const assumed_charset)
452{
453 static char fcharset[128];
454 const char *c = NULL;
455
456 if (assumed_charset && (assumed_charset->count > 0))
457 c = STAILQ_FIRST(&assumed_charset->head)->data;
458 else
459 c = "us-ascii";
460
461 mutt_str_copy(fcharset, c, sizeof(fcharset));
462 return fcharset;
463}
464
473{
474 char buf[1024] = { 0 };
475
476 mutt_ch_canonical_charset(buf, sizeof(buf), nl_langinfo(CODESET));
477
478 if (buf[0] != '\0')
479 return mutt_str_dup(buf);
480
481 return mutt_str_dup("iso-8859-1");
482}
483
495bool mutt_ch_lookup_add(enum LookupType type, const char *pat,
496 const char *replace, struct Buffer *err)
497{
498 if (!pat || !replace)
499 return false;
500
501 regex_t *rx = MUTT_MEM_CALLOC(1, regex_t);
502 int rc = REG_COMP(rx, pat, REG_ICASE);
503 if (rc != 0)
504 {
505 regerror(rc, rx, err->data, err->dsize);
506 FREE(&rx);
507 return false;
508 }
509
510 struct Lookup *l = lookup_new();
511 l->type = type;
512 l->replacement = mutt_str_dup(replace);
513 l->regex.pattern = mutt_str_dup(pat);
514 l->regex.regex = rx;
515 l->regex.pat_not = false;
516
517 TAILQ_INSERT_TAIL(&Lookups, l, entries);
518
519 return true;
520}
521
528{
529 struct Lookup *l = NULL;
530 struct Lookup *tmp = NULL;
531
532 TAILQ_FOREACH_SAFE(l, &Lookups, entries, tmp)
533 {
534 TAILQ_REMOVE(&Lookups, l, entries);
535 lookup_free(&l);
536 }
537}
538
548const char *mutt_ch_charset_lookup(const char *chs)
549{
551}
552
580iconv_t mutt_ch_iconv_open(const char *tocode, const char *fromcode, uint8_t flags)
581{
582 char tocode1[128] = { 0 };
583 char fromcode1[128] = { 0 };
584 const char *tocode2 = NULL, *fromcode2 = NULL;
585 const char *tmp = NULL;
586
587 /* transform to MIME preferred charset names */
588 mutt_ch_canonical_charset(tocode1, sizeof(tocode1), tocode);
589 mutt_ch_canonical_charset(fromcode1, sizeof(fromcode1), fromcode);
590
591 /* maybe apply charset-hooks and recanonicalise fromcode,
592 * but only when caller asked us to sanitize a potentially wrong
593 * charset name incoming from the wild exterior. */
594 if (flags & MUTT_ICONV_HOOK_FROM)
595 {
596 tmp = mutt_ch_charset_lookup(fromcode1);
597 if (tmp)
598 mutt_ch_canonical_charset(fromcode1, sizeof(fromcode1), tmp);
599 }
600
601 /* check if we have this pair cached already */
602 for (int i = 0; i < IconvCacheUsed; i++)
603 {
604 if (strcmp(tocode1, IconvCache[i].tocode1) == 0 &&
605 strcmp(fromcode1, IconvCache[i].fromcode1) == 0)
606 {
607 iconv_t cd = IconvCache[i].cd;
608
609 /* make room for this one at the top */
610 struct IconvCacheEntry top = IconvCache[i];
611 for (int j = i - 1; j >= 0; j--)
612 {
613 IconvCache[j + 1] = IconvCache[j];
614 }
615 IconvCache[0] = top;
616
617 if (iconv_t_valid(cd))
618 {
619 /* reset state */
620 iconv(cd, NULL, NULL, NULL, NULL);
621 }
622 return cd;
623 }
624 }
625
626 /* not found in cache */
627 /* always apply iconv-hooks to suit system's iconv tastes */
628 tocode2 = mutt_ch_iconv_lookup(tocode1);
629 tocode2 = tocode2 ? tocode2 : tocode1;
630 fromcode2 = mutt_ch_iconv_lookup(fromcode1);
631 fromcode2 = fromcode2 ? fromcode2 : fromcode1;
632
633 /* call system iconv with names it appreciates */
634 iconv_t cd = iconv_open(tocode2, fromcode2);
635
637 {
638 mutt_debug(LL_DEBUG2, "iconv: dropping %s -> %s from the cache\n",
641 /* get rid of the oldest entry */
645 {
646 iconv_close(IconvCache[IconvCacheUsed - 1].cd);
647 }
649 }
650
651 /* make room for this one at the top */
652 for (int j = IconvCacheUsed - 1; j >= 0; j--)
653 {
654 IconvCache[j + 1] = IconvCache[j];
655 }
656
658
659 mutt_debug(LL_DEBUG2, "iconv: adding %s -> %s to the cache\n", fromcode1, tocode1);
660 IconvCache[0].fromcode1 = strdup(fromcode1);
661 IconvCache[0].tocode1 = strdup(tocode1);
662 IconvCache[0].cd = cd;
663
664 return cd;
665}
666
683size_t mutt_ch_iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft,
684 char **outbuf, size_t *outbytesleft, const char **inrepls,
685 const char *outrepl, int *iconverrno)
686{
687 size_t rc = 0;
688 const char *ib = *inbuf;
689 size_t ibl = *inbytesleft;
690 char *ob = *outbuf;
691 size_t obl = *outbytesleft;
692
693 while (true)
694 {
695 errno = 0;
696 const size_t ret1 = iconv(cd, (ICONV_CONST char **) &ib, &ibl, &ob, &obl);
697 if (ret1 != ICONV_ILLEGAL_SEQ)
698 rc += ret1;
699 if (iconverrno)
700 *iconverrno = errno;
701
702 if (ibl && obl && (errno == EILSEQ))
703 {
704 if (inrepls)
705 {
706 /* Try replacing the input */
707 const char **t = NULL;
708 for (t = inrepls; *t; t++)
709 {
710 const char *ib1 = *t;
711 size_t ibl1 = strlen(*t);
712 char *ob1 = ob;
713 size_t obl1 = obl;
714 iconv(cd, (ICONV_CONST char **) &ib1, &ibl1, &ob1, &obl1);
715 if (ibl1 == 0)
716 {
717 ib++;
718 ibl--;
719 ob = ob1;
720 obl = obl1;
721 rc++;
722 break;
723 }
724 }
725 if (*t)
726 continue;
727 }
728 /* Replace the output */
729 if (!outrepl)
730 outrepl = "?";
731 iconv(cd, NULL, NULL, &ob, &obl);
732 if (obl)
733 {
734 int n = strlen(outrepl);
735 if (n > obl)
736 {
737 outrepl = "?";
738 n = 1;
739 }
740 memcpy(ob, outrepl, n);
741 ib++;
742 ibl--;
743 ob += n;
744 obl -= n;
745 rc++;
746 iconv(cd, NULL, NULL, NULL, NULL); /* for good measure */
747 continue;
748 }
749 }
750 *inbuf = ib;
751 *inbytesleft = ibl;
752 *outbuf = ob;
753 *outbytesleft = obl;
754 return rc;
755 }
756}
757
767const char *mutt_ch_iconv_lookup(const char *chs)
768{
770}
771
782int mutt_ch_check(const char *s, size_t slen, const char *from, const char *to)
783{
784 if (!s || !from || !to)
785 return -1;
786
787 int rc = 0;
788 iconv_t cd = mutt_ch_iconv_open(to, from, MUTT_ICONV_NO_FLAGS);
789 if (!iconv_t_valid(cd))
790 return -1;
791
792 size_t outlen = MB_LEN_MAX * slen;
793 char *out = MUTT_MEM_MALLOC(outlen + 1, char);
794 char *saved_out = out;
795
796 const size_t convlen = iconv(cd, (ICONV_CONST char **) &s, &slen, &out, &outlen);
797 if (convlen == ICONV_ILLEGAL_SEQ)
798 rc = errno;
799
800 FREE(&saved_out);
801 return rc;
802}
803
817int mutt_ch_convert_string(char **ps, const char *from, const char *to, uint8_t flags)
818{
819 if (!ps)
820 return -1;
821
822 char *s = *ps;
823
824 if (!s || (*s == '\0'))
825 return 0;
826
827 if (!to || !from)
828 return -1;
829
830 const char *repls[] = { "\357\277\275", "?", 0 };
831 int rc = 0;
832
833 iconv_t cd = mutt_ch_iconv_open(to, from, flags);
834 if (!iconv_t_valid(cd))
835 return -1;
836
837 const char **inrepls = NULL;
838 const char *outrepl = NULL;
839
840 if (mutt_ch_is_utf8(to))
841 outrepl = "\357\277\275";
842 else if (mutt_ch_is_utf8(from))
843 inrepls = repls;
844 else
845 outrepl = "?";
846
847 const char *ib = s;
848 size_t ibl = strlen(s);
849 if (ibl >= (SIZE_MAX / MB_LEN_MAX))
850 {
851 return -1;
852 }
853 size_t obl = MB_LEN_MAX * ibl;
854 char *buf = MUTT_MEM_MALLOC(obl + 1, char);
855 char *ob = buf;
856
857 mutt_ch_iconv(cd, &ib, &ibl, &ob, &obl, inrepls, outrepl, &rc);
858 iconv(cd, 0, 0, &ob, &obl);
859
860 *ob = '\0';
861
862 FREE(ps);
863 *ps = buf;
864
865 mutt_str_adjust(ps);
866 return rc;
867}
868
880bool mutt_ch_check_charset(const char *cs, bool strict)
881{
882 if (!cs)
883 return false;
884
885 if (mutt_ch_is_utf8(cs))
886 return true;
887
888 if (!strict)
889 {
890 for (int i = 0; PreferredMimeNames[i].key; i++)
891 {
892 if (mutt_istr_equal(PreferredMimeNames[i].key, cs) ||
894 {
895 return true;
896 }
897 }
898 }
899
900 iconv_t cd = mutt_ch_iconv_open(cs, cs, MUTT_ICONV_NO_FLAGS);
901 if (iconv_t_valid(cd))
902 {
903 return true;
904 }
905
906 return false;
907}
908
919struct FgetConv *mutt_ch_fgetconv_open(FILE *fp, const char *from, const char *to, uint8_t flags)
920{
921 iconv_t cd = ICONV_T_INVALID;
922
923 if (from && to)
924 cd = mutt_ch_iconv_open(to, from, flags);
925
926 struct FgetConv *fc = MUTT_MEM_CALLOC(1, struct FgetConv);
927 fc->fp = fp;
928 fc->cd = cd;
929
930 if (iconv_t_valid(cd))
931 {
932 static const char *repls[] = { "\357\277\275", "?", 0 };
933
934 fc->p = fc->bufo;
935 fc->ob = fc->bufo;
936 fc->ib = fc->bufi;
937 fc->ibl = 0;
938 fc->inrepls = mutt_ch_is_utf8(to) ? repls : repls + 1;
939 }
940
941 return fc;
942}
943
949{
950 if (!ptr || !*ptr)
951 return;
952
953 FREE(ptr);
954}
955
967{
968 if (!fc)
969 return EOF;
970 if (!iconv_t_valid(fc->cd))
971 return fgetc(fc->fp);
972 if (!fc->p)
973 return EOF;
974 if (fc->p < fc->ob)
975 return (unsigned char) *(fc->p)++;
976
977 /* Try to convert some more */
978 fc->p = fc->bufo;
979 fc->ob = fc->bufo;
980 if (fc->ibl)
981 {
982 size_t obl = sizeof(fc->bufo);
983 iconv(fc->cd, (ICONV_CONST char **) &fc->ib, &fc->ibl, &fc->ob, &obl);
984 if (fc->p < fc->ob)
985 return (unsigned char) *(fc->p)++;
986 }
987
988 /* If we trusted iconv a bit more, we would at this point
989 * ask why it had stopped converting ... */
990
991 /* Try to read some more */
992 if ((fc->ibl == sizeof(fc->bufi)) ||
993 (fc->ibl && (fc->ib + fc->ibl < fc->bufi + sizeof(fc->bufi))))
994 {
995 fc->p = 0;
996 return EOF;
997 }
998 if (fc->ibl)
999 memmove(fc->bufi, fc->ib, fc->ibl);
1000 fc->ib = fc->bufi;
1001 fc->ibl += fread(fc->ib + fc->ibl, 1, sizeof(fc->bufi) - fc->ibl, fc->fp);
1002
1003 /* Try harder this time to convert some */
1004 if (fc->ibl)
1005 {
1006 size_t obl = sizeof(fc->bufo);
1007 mutt_ch_iconv(fc->cd, (const char **) &fc->ib, &fc->ibl, &fc->ob, &obl,
1008 fc->inrepls, 0, NULL);
1009 if (fc->p < fc->ob)
1010 return (unsigned char) *(fc->p)++;
1011 }
1012
1013 /* Either the file has finished or one of the buffers is too small */
1014 fc->p = 0;
1015 return EOF;
1016}
1017
1028char *mutt_ch_fgetconvs(char *buf, size_t buflen, struct FgetConv *fc)
1029{
1030 if (!buf)
1031 return NULL;
1032
1033 size_t r;
1034 for (r = 0; (r + 1) < buflen;)
1035 {
1036 const int c = mutt_ch_fgetconv(fc);
1037 if (c == EOF)
1038 break;
1039 buf[r++] = (char) c;
1040 if (c == '\n')
1041 break;
1042 }
1043 buf[r] = '\0';
1044
1045 if (r > 0)
1046 return buf;
1047
1048 return NULL;
1049}
1050
1061void mutt_ch_set_charset(const char *charset)
1062{
1063 char buf[256] = { 0 };
1064
1065 mutt_ch_canonical_charset(buf, sizeof(buf), charset);
1066
1067 if (mutt_ch_is_utf8(buf))
1068 {
1069 CharsetIsUtf8 = true;
1070 ReplacementChar = 0xfffd; /* replacement character */
1071 }
1072 else
1073 {
1074 CharsetIsUtf8 = false;
1075 ReplacementChar = '?';
1076 }
1077
1078#if defined(HAVE_BIND_TEXTDOMAIN_CODESET) && defined(ENABLE_NLS)
1079 bind_textdomain_codeset(PACKAGE, buf);
1080#endif
1081}
1082
1094char *mutt_ch_choose(const char *fromcode, const struct Slist *charsets,
1095 const char *u, size_t ulen, char **d, size_t *dlen)
1096{
1097 if (!fromcode || !charsets)
1098 return NULL;
1099
1100 char *e = NULL, *tocode = NULL;
1101 size_t elen = 0, bestn = 0;
1102
1103 const struct ListNode *np = NULL;
1104 STAILQ_FOREACH(np, &charsets->head, entries)
1105 {
1106 char *t = mutt_str_dup(np->data);
1107 if (!t)
1108 continue;
1109
1110 size_t n = mutt_str_len(t);
1111 char *s = mutt_strn_dup(u, ulen);
1112 const int rc = d ? mutt_ch_convert_string(&s, fromcode, t, MUTT_ICONV_NO_FLAGS) :
1113 mutt_ch_check(s, ulen, fromcode, t);
1114 if (rc)
1115 {
1116 FREE(&t);
1117 FREE(&s);
1118 continue;
1119 }
1120 size_t slen = mutt_str_len(s);
1121
1122 if (!tocode || (n < bestn))
1123 {
1124 bestn = n;
1125 FREE(&tocode);
1126 tocode = t;
1127 if (d)
1128 {
1129 FREE(&e);
1130 e = s;
1131 }
1132 else
1133 {
1134 FREE(&s);
1135 }
1136 elen = slen;
1137 }
1138 else
1139 {
1140 FREE(&t);
1141 FREE(&s);
1142 }
1143 }
1144 if (tocode)
1145 {
1146 if (d)
1147 *d = e;
1148 if (dlen)
1149 *dlen = elen;
1150
1151 char canonical_buf[1024] = { 0 };
1152 mutt_ch_canonical_charset(canonical_buf, sizeof(canonical_buf), tocode);
1153 mutt_str_replace(&tocode, canonical_buf);
1154 }
1155 return tocode;
1156}
1157
1162{
1163 for (int i = 0; i < IconvCacheUsed; i++)
1164 {
1165 FREE(&IconvCache[i].fromcode1);
1166 FREE(&IconvCache[i].tocode1);
1167 if (iconv_t_valid(IconvCache[i].cd))
1168 {
1169 iconv_close(IconvCache[i].cd);
1170 }
1171 }
1172 IconvCacheUsed = 0;
1173}
size_t buf_addch(struct Buffer *buf, char c)
Add a single character to a Buffer.
Definition buffer.c:241
size_t buf_addstr(struct Buffer *buf, const char *s)
Add a string to a Buffer.
Definition buffer.c:226
size_t buf_strcpy(struct Buffer *buf, const char *s)
Copy a string into a Buffer.
Definition buffer.c:395
void buf_lower(struct Buffer *buf)
Sets a buffer to lowercase.
Definition buffer.c:734
General purpose object for storing and parsing strings.
static const char * buf_string(const struct Buffer *buf)
Convert a buffer to a const char * "string".
Definition buffer.h:96
#define mutt_debug(LEVEL,...)
Definition logging2.h:91
Singly-linked list type.
Logging Dispatcher.
@ LL_DEBUG2
Log at debug level 2.
Definition logging2.h:46
Memory management wrappers.
#define FREE(x)
Free memory and set the pointer to NULL.
Definition memory.h:68
#define MIN(a, b)
Return the minimum of two values.
Definition memory.h:40
#define MUTT_MEM_CALLOC(n, type)
Definition memory.h:52
#define MUTT_MEM_MALLOC(n, type)
Definition memory.h:53
bool mutt_ch_check_charset(const char *cs, bool strict)
Does iconv understand a character set?
Definition charset.c:880
size_t mutt_ch_iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft, const char **inrepls, const char *outrepl, int *iconverrno)
Change the encoding of a string.
Definition charset.c:683
void mutt_ch_lookup_remove(void)
Remove all the character set lookups.
Definition charset.c:527
static int IconvCacheUsed
Number of iconv descriptors in the cache.
Definition charset.c:86
char * mutt_ch_choose(const char *fromcode, const struct Slist *charsets, const char *u, size_t ulen, char **d, size_t *dlen)
Figure the best charset to encode a string.
Definition charset.c:1094
int mutt_ch_convert_nonmime_string(const struct Slist *const assumed_charset, const char *charset, char **ps)
Try to convert a string using a list of character sets.
Definition charset.c:317
struct LookupList Lookups
Lookup table of preferred character set names.
Definition charset.c:69
char * mutt_ch_get_langinfo_charset(void)
Get the user's choice of character set.
Definition charset.c:472
static const struct MimeNames PreferredMimeNames[]
Lookup table of preferred charsets.
Definition charset.c:107
bool mutt_ch_lookup_add(enum LookupType type, const char *pat, const char *replace, struct Buffer *err)
Add a new character set lookup.
Definition charset.c:495
void mutt_ch_canonical_charset(char *buf, size_t buflen, const char *name)
Canonicalise the charset of a string.
Definition charset.c:360
void mutt_ch_cache_cleanup(void)
Clean up the cached iconv handles and charset strings.
Definition charset.c:1161
const char * mutt_ch_iconv_lookup(const char *chs)
Look for a replacement character set.
Definition charset.c:767
int mutt_ch_convert_string(char **ps, const char *from, const char *to, uint8_t flags)
Convert a string between encodings.
Definition charset.c:817
void mutt_ch_set_charset(const char *charset)
Update the records for a new character set.
Definition charset.c:1061
bool CharsetIsUtf8
Is the user's current character set utf-8?
Definition charset.c:66
static const char * lookup_charset(enum LookupType type, const char *cs)
Look for a preferred character set name.
Definition charset.c:289
int mutt_ch_check(const char *s, size_t slen, const char *from, const char *to)
Check whether a string can be converted between encodings.
Definition charset.c:782
const char * mutt_ch_charset_lookup(const char *chs)
Look for a replacement character set.
Definition charset.c:548
static struct Lookup * lookup_new(void)
Create a new Lookup.
Definition charset.c:255
int mutt_ch_fgetconv(struct FgetConv *fc)
Convert a file's character set.
Definition charset.c:966
#define ICONV_CACHE_SIZE
Max size of the iconv cache.
Definition charset.c:82
static void lookup_free(struct Lookup **ptr)
Free a Lookup.
Definition charset.c:264
wchar_t ReplacementChar
When a Unicode character can't be displayed, use this instead.
Definition charset.c:61
#define EILSEQ
Definition charset.c:55
struct FgetConv * mutt_ch_fgetconv_open(FILE *fp, const char *from, const char *to, uint8_t flags)
Prepare a file for charset conversion.
Definition charset.c:919
static struct IconvCacheEntry IconvCache[ICONV_CACHE_SIZE]
Cache of iconv conversion descriptors.
Definition charset.c:84
char * mutt_ch_fgetconvs(char *buf, size_t buflen, struct FgetConv *fc)
Convert a file's charset into a string buffer.
Definition charset.c:1028
bool mutt_ch_chscmp(const char *cs1, const char *cs2)
Are the names of two character sets equivalent?
Definition charset.c:428
void mutt_ch_fgetconv_close(struct FgetConv **ptr)
Close an fgetconv handle.
Definition charset.c:948
iconv_t mutt_ch_iconv_open(const char *tocode, const char *fromcode, uint8_t flags)
Set up iconv for conversions.
Definition charset.c:580
const char * mutt_ch_get_default_charset(const struct Slist *const assumed_charset)
Get the default character set.
Definition charset.c:451
Conversion between different character encodings.
#define MUTT_ICONV_HOOK_FROM
apply charset-hooks to fromcode
Definition charset.h:67
#define ICONV_T_INVALID
Error value for iconv functions.
Definition charset.h:111
#define mutt_ch_is_utf8(str)
Definition charset.h:107
LookupType
Types of character set lookups.
Definition charset.h:61
@ MUTT_LOOKUP_ICONV
Character set conversion.
Definition charset.h:63
@ MUTT_LOOKUP_CHARSET
Alias for another character set.
Definition charset.h:62
#define MUTT_ICONV_NO_FLAGS
No flags are set.
Definition charset.h:66
#define ICONV_ILLEGAL_SEQ
Error value for iconv() - Illegal sequence.
Definition charset.h:114
static bool iconv_t_valid(const iconv_t cd)
Is the conversion descriptor valid?
Definition charset.h:123
bool mutt_regex_match(const struct Regex *regex, const char *str)
Shorthand to mutt_regex_capture()
Definition regex.c:614
char * mutt_strn_dup(const char *begin, size_t len)
Duplicate a sub-string.
Definition string.c:384
bool mutt_istr_equal(const char *a, const char *b)
Compare two strings, ignoring case.
Definition string.c:674
char * mutt_str_dup(const char *str)
Copy a string, safely.
Definition string.c:257
void mutt_str_adjust(char **ptr)
Shrink-to-fit a string.
Definition string.c:303
size_t mutt_str_len(const char *a)
Calculate the length of a string, safely.
Definition string.c:500
size_t mutt_str_copy(char *dest, const char *src, size_t dsize)
Copy a string into a buffer (guaranteeing NUL-termination)
Definition string.c:583
size_t mutt_istr_startswith(const char *str, const char *prefix)
Check whether a string starts with a prefix, ignoring case.
Definition string.c:246
bool mutt_istrn_equal(const char *a, const char *b, size_t num)
Check for equality of two strings ignoring case (to a maximum), safely.
Definition string.c:457
char * mutt_str_replace(char **p, const char *s)
Replace one string with another.
Definition string.c:284
struct Buffer * buf_pool_get(void)
Get a Buffer from the pool.
Definition pool.c:91
void buf_pool_release(struct Buffer **ptr)
Return a Buffer to the pool.
Definition pool.c:111
A global pool of Buffers.
#define TAILQ_FOREACH(var, head, field)
Definition queue.h:782
#define TAILQ_FOREACH_SAFE(var, head, field, tvar)
Definition queue.h:792
#define STAILQ_FIRST(head)
Definition queue.h:388
#define TAILQ_INSERT_TAIL(head, elm, field)
Definition queue.h:866
#define STAILQ_FOREACH(var, head, field)
Definition queue.h:390
#define TAILQ_REMOVE(head, elm, field)
Definition queue.h:901
#define TAILQ_HEAD_INITIALIZER(head)
Definition queue.h:694
Manage regular expressions.
#define REG_COMP(preg, regex, cflags)
Compile a regular expression.
Definition regex3.h:49
A separated list of strings.
String manipulation functions.
String manipulation buffer.
Definition buffer.h:36
size_t dsize
Length of data.
Definition buffer.h:39
char * data
Pointer to data.
Definition buffer.h:37
Cursor for converting a file's encoding.
Definition charset.h:45
char bufi[512]
Input buffer.
Definition charset.h:48
iconv_t cd
iconv conversion descriptor
Definition charset.h:47
char bufo[512]
Output buffer.
Definition charset.h:49
size_t ibl
Input buffer length.
Definition charset.h:53
FILE * fp
File to read from.
Definition charset.h:46
char * p
Current position in output buffer.
Definition charset.h:50
const char ** inrepls
Replacement characters.
Definition charset.h:54
char * ib
Current position in input buffer.
Definition charset.h:52
char * ob
End of output buffer.
Definition charset.h:51
Cached iconv conversion descriptor.
Definition charset.c:75
char * tocode1
Destination character set.
Definition charset.c:77
char * fromcode1
Source character set.
Definition charset.c:76
iconv_t cd
iconv conversion descriptor
Definition charset.c:78
A List node for strings.
Definition list.h:37
char * data
String.
Definition list.h:38
Regex to String lookup table.
Definition charset.h:75
char * replacement
Alternative charset to use.
Definition charset.h:78
enum LookupType type
Lookup type.
Definition charset.h:76
struct Regex regex
Regular expression.
Definition charset.h:77
MIME name lookup entry.
Definition charset.c:92
const char * key
Charset alias.
Definition charset.c:93
const char * pref
Preferred MIME name.
Definition charset.c:94
char * pattern
printable version
Definition regex3.h:86
bool pat_not
do not match
Definition regex3.h:88
regex_t * regex
compiled expression
Definition regex3.h:87
String list.
Definition slist.h:37
struct ListHead head
List containing values.
Definition slist.h:38
size_t count
Number of values in list.
Definition slist.h:39