NeoMutt  2025-12-11-694-ga89709
Teaching an old dog new tricks
DOXYGEN
Loading...
Searching...
No Matches
content_info.c
Go to the documentation of this file.
1
23
29
30#include "config.h"
31#include <errno.h>
32#include <stdbool.h>
33#include <stdio.h>
34#include <string.h>
35#include <sys/stat.h>
36#include "mutt/lib.h"
37#include "config/lib.h"
38#include "email/lib.h"
39#include "core/lib.h"
40#include "lib.h"
41
49void mutt_update_content_info(struct Content *info, struct ContentState *s,
50 char *buf, size_t buflen)
51{
52 bool from = s->from;
53 int whitespace = s->whitespace;
54 bool dot = s->dot;
55 int linelen = s->linelen;
56 bool was_cr = s->was_cr;
57
58 /* A NULL buffer signals end-of-file; finalize any pending state */
59 if (!buf) /* This signals EOF */
60 {
61 if (was_cr)
62 info->binary = true;
63 if (linelen > info->linemax)
64 info->linemax = linelen;
65
66 return;
67 }
68
69 /* Scan each byte in the buffer and classify it for MIME encoding decisions */
70 for (; buflen; buf++, buflen--)
71 {
72 char ch = *buf;
73
74 /* A CR not followed by LF indicates binary content */
75 if (was_cr)
76 {
77 was_cr = false;
78 if (ch == '\n')
79 {
80 /* CR+LF: complete line ending; record line statistics */
81 if (whitespace)
82 info->space = true;
83 if (dot)
84 info->dot = true;
85 if (linelen > info->linemax)
86 info->linemax = linelen;
87 whitespace = 0;
88 dot = false;
89 linelen = 0;
90 continue;
91 }
92
93 info->binary = true;
94 }
95
96 linelen++;
97 if (ch == '\n')
98 {
99 /* Bare LF line ending */
100 info->crlf++;
101 if (whitespace)
102 info->space = true;
103 if (dot)
104 info->dot = true;
105 if (linelen > info->linemax)
106 info->linemax = linelen;
107 whitespace = 0;
108 linelen = 0;
109 dot = false;
110 }
111 else if (ch == '\r')
112 {
113 info->crlf++;
114 info->cr = true;
115 was_cr = true;
116 continue;
117 }
118 else if (ch & 0x80)
119 {
120 /* High-bit character: needs 8-bit or Base64 encoding */
121 info->hibin++;
122 }
123 else if ((ch == '\t') || (ch == '\f'))
124 {
125 info->ascii++;
126 whitespace++;
127 }
128 else if (ch == 0)
129 {
130 /* NUL byte: forces binary encoding */
131 info->nulbin++;
132 info->lobin++;
133 }
134 else if ((ch < 32) || (ch == 127))
135 {
136 info->lobin++;
137 }
138 else
139 {
140 /* Detect "From " at the start of a line (mbox from-quoting) */
141 if (linelen == 1)
142 {
143 if ((ch == 'F') || (ch == 'f'))
144 from = true;
145 else
146 from = false;
147 if (ch == '.')
148 dot = true;
149 else
150 dot = false;
151 }
152 else if (from)
153 {
154 /* Check chars 2-4 for "rom" to complete "From" detection */
155 if ((linelen == 2) && (ch != 'r'))
156 {
157 from = false;
158 }
159 else if ((linelen == 3) && (ch != 'o'))
160 {
161 from = false;
162 }
163 else if (linelen == 4)
164 {
165 if (ch == 'm')
166 info->from = true;
167 from = false;
168 }
169 }
170 if (ch == ' ')
171 whitespace++;
172 info->ascii++;
173 }
174
175 if (linelen > 1)
176 dot = false;
177 if ((ch != ' ') && (ch != '\t'))
178 whitespace = 0;
179 }
180
181 s->from = from;
182 s->whitespace = whitespace;
183 s->dot = dot;
184 s->linelen = linelen;
185 s->was_cr = was_cr;
186}
187
197struct Content *mutt_get_content_info(const char *fname, struct Body *b,
198 struct ConfigSubset *sub)
199{
200 struct Content *info = NULL;
201 struct ContentState cstate = { 0 };
202 FILE *fp = NULL;
203 char *fromcode = NULL;
204 char *tocode = NULL;
205 char buf[100] = { 0 };
206 size_t r;
207
208 struct stat st = { 0 };
209
210 if (b && !fname)
211 fname = b->filename;
212 if (!fname)
213 return NULL;
214
215 fp = mutt_file_fopen(fname, "r");
216 if (!fp)
217 {
218 mutt_debug(LL_DEBUG1, "%s: %s (errno %d)\n", fname, strerror(errno), errno);
219 return NULL;
220 }
221
222 if (fstat(fileno(fp), &st) == -1)
223 {
224 mutt_error(_("Can't stat %s: %s"), fname, strerror(errno));
225 mutt_file_fclose(&fp);
226 return NULL;
227 }
228
229 if (!S_ISREG(st.st_mode))
230 {
231 mutt_error(_("%s isn't a regular file"), fname);
232 mutt_file_fclose(&fp);
233 return NULL;
234 }
235
236 info = MUTT_MEM_CALLOC(1, struct Content);
237
238 const char *const c_charset = cc_charset();
239 if (b && (b->type == TYPE_TEXT) && (!b->noconv && !b->force_charset))
240 {
241 const struct Slist *const c_attach_charset = cs_subset_slist(sub, "attach_charset");
242 const struct Slist *const c_send_charset = cs_subset_slist(sub, "send_charset");
243 struct Slist *c_charset_slist = slist_parse(c_charset, D_SLIST_SEP_COLON);
244
245 const struct Slist *fchs = b->use_disp ?
246 (c_attach_charset ? c_attach_charset : c_charset_slist) :
247 c_charset_slist;
248
249 struct Slist *chs = slist_parse(mutt_param_get(&b->parameter, "charset"), D_SLIST_SEP_COLON);
250
251 if (c_charset && (chs || c_send_charset) &&
252 (mutt_convert_file_from_to(fp, fchs, chs ? chs : c_send_charset, &fromcode,
253 &tocode, info) != ICONV_ILLEGAL_SEQ))
254 {
255 if (!chs)
256 {
257 char chsbuf[256] = { 0 };
258 mutt_ch_canonical_charset(chsbuf, sizeof(chsbuf), tocode);
259 mutt_param_set(&b->parameter, "charset", chsbuf);
260 }
261 FREE(&b->charset);
262 b->charset = fromcode;
263 fromcode = NULL;
264 FREE(&tocode);
265 mutt_file_fclose(&fp);
266 slist_free(&c_charset_slist);
267 slist_free(&chs);
268 return info;
269 }
270
271 slist_free(&c_charset_slist);
272 slist_free(&chs);
273 }
274
275 rewind(fp);
276 while ((r = fread(buf, 1, sizeof(buf), fp)))
277 mutt_update_content_info(info, &cstate, buf, r);
278 mutt_update_content_info(info, &cstate, 0, 0);
279
280 mutt_file_fclose(&fp);
281
282 if (b && (b->type == TYPE_TEXT) && (!b->noconv && !b->force_charset))
283 {
284 mutt_param_set(&b->parameter, "charset",
285 (!info->hibin ? "us-ascii" :
286 c_charset && !mutt_ch_is_us_ascii(c_charset) ? c_charset :
287 "unknown-8bit"));
288 }
289
290 return info;
291}
const struct Slist * cs_subset_slist(const struct ConfigSubset *sub, const char *name)
Get a string-list config item by name.
Definition helpers.c:242
Convenience wrapper for the config headers.
const char * cc_charset(void)
Get the cached value of $charset.
void mutt_update_content_info(struct Content *info, struct ContentState *s, char *buf, size_t buflen)
Cache some info about an email.
struct Content * mutt_get_content_info(const char *fname, struct Body *b, struct ConfigSubset *sub)
Analyze file to determine MIME encoding to use.
Conversion between different character encodings.
size_t mutt_convert_file_from_to(FILE *fp, const struct Slist *fromcodes, const struct Slist *tocodes, char **fromcode, char **tocode, struct Content *info)
Convert a file between encodings.
Definition convert.c:215
Convenience wrapper for the core headers.
Structs that make up an email.
#define mutt_file_fclose(FP)
Definition file.h:139
#define mutt_file_fopen(PATH, MODE)
Definition file.h:138
#define mutt_error(...)
Definition logging2.h:94
#define mutt_debug(LEVEL,...)
Definition logging2.h:91
@ LL_DEBUG1
Log at debug level 1.
Definition logging2.h:45
#define FREE(x)
Free memory and set the pointer to NULL.
Definition memory.h:68
#define MUTT_MEM_CALLOC(n, type)
Definition memory.h:52
@ TYPE_TEXT
Type: 'text/*'.
Definition mime.h:38
void mutt_ch_canonical_charset(char *buf, size_t buflen, const char *name)
Canonicalise the charset of a string.
Definition charset.c:360
#define mutt_ch_is_us_ascii(str)
Definition charset.h:108
#define ICONV_ILLEGAL_SEQ
Error value for iconv() - Illegal sequence.
Definition charset.h:114
Convenience wrapper for the library headers.
#define _(a)
Definition message.h:28
struct Slist * slist_parse(const char *str, uint32_t flags)
Parse a list of strings into a list.
Definition slist.c:177
void slist_free(struct Slist **ptr)
Free an Slist object.
Definition slist.c:124
char * mutt_param_get(const struct ParameterList *pl, const char *s)
Find a matching Parameter.
Definition parameter.c:85
void mutt_param_set(struct ParameterList *pl, const char *attribute, const char *value)
Set a Parameter.
Definition parameter.c:111
The body of an email.
Definition body.h:36
bool noconv
Don't do character set conversion.
Definition body.h:46
char * charset
Send mode: charset of attached file as stored on disk.
Definition body.h:79
struct ParameterList parameter
Parameters of the content-type.
Definition body.h:63
bool use_disp
Content-Disposition uses filename= ?
Definition body.h:47
bool force_charset
Send mode: don't adjust the character set when in send-mode.
Definition body.h:44
unsigned int type
content-type primary type, ContentType
Definition body.h:40
char * filename
When sending a message, this is the file to which this structure refers.
Definition body.h:59
A set of inherited config items.
Definition subset.h:46
Info about the body of an email.
Definition content.h:56
bool was_cr
Was the last character CR?
Definition content.h:61
int whitespace
Number of trailing whitespaces.
Definition content.h:58
bool from
Is the current line a prefix of "From "?
Definition content.h:57
int linelen
Length of the current line.
Definition content.h:60
bool dot
Was the last character a dot?
Definition content.h:59
Info about an attachment.
Definition content.h:35
long crlf
\r and \n characters
Definition content.h:39
long hibin
8-bit characters
Definition content.h:36
bool cr
Has CR, even when in a CRLF pair.
Definition content.h:46
bool space
Whitespace at the end of lines?
Definition content.h:42
long ascii
Number of ascii chars.
Definition content.h:40
bool binary
Long lines, or CR not in CRLF pair.
Definition content.h:43
bool from
Has a line beginning with "From "?
Definition content.h:44
long nulbin
Null characters (0x0)
Definition content.h:38
long linemax
Length of the longest line in the file.
Definition content.h:41
long lobin
Unprintable 7-bit chars (eg., control chars)
Definition content.h:37
bool dot
Has a line consisting of a single dot?
Definition content.h:45
String list.
Definition slist.h:37
#define D_SLIST_SEP_COLON
Slist items are colon-separated.
Definition types.h:112