NeoMutt  2025-09-05-55-g97fc89
Teaching an old dog new tricks
DOXYGEN
Loading...
Searching...
No Matches
regex.c
Go to the documentation of this file.
1
26
32
33#include "config.h"
34#include <regex.h>
35#include <stdbool.h>
36#include <stdint.h>
37#include <stdlib.h>
38#include "config/types.h"
39#include "atoi.h"
40#include "buffer.h"
41#include "ctype2.h"
42#include "logging2.h"
43#include "mbyte.h"
44#include "memory.h"
45#include "message.h"
46#include "pool.h"
47#include "queue.h"
48#include "regex3.h"
49#include "string2.h"
50
58struct Regex *mutt_regex_compile(const char *str, uint16_t flags)
59{
60 if (!str || (*str == '\0'))
61 return NULL;
62 struct Regex *rx = MUTT_MEM_CALLOC(1, struct Regex);
63 rx->pattern = mutt_str_dup(str);
64 rx->regex = MUTT_MEM_CALLOC(1, regex_t);
65 if (REG_COMP(rx->regex, str, flags) != 0)
66 mutt_regex_free(&rx);
67
68 return rx;
69}
70
79struct Regex *mutt_regex_new(const char *str, uint32_t flags, struct Buffer *err)
80{
81 if (!str || (*str == '\0'))
82 return NULL;
83
84 uint16_t rflags = 0;
85 struct Regex *reg = MUTT_MEM_CALLOC(1, struct Regex);
86
87 reg->regex = MUTT_MEM_CALLOC(1, regex_t);
88 reg->pattern = mutt_str_dup(str);
89
90 /* Should we use smart case matching? */
91 if (((flags & D_REGEX_MATCH_CASE) == 0) && mutt_mb_is_lower(str))
92 rflags |= REG_ICASE;
93
94 /* Is a prefix of '!' allowed? */
95 if (((flags & D_REGEX_ALLOW_NOT) != 0) && (str[0] == '!'))
96 {
97 reg->pat_not = true;
98 str++;
99 }
100
101 int rc = REG_COMP(reg->regex, str, rflags);
102 if (rc != 0)
103 {
104 if (err)
105 regerror(rc, reg->regex, err->data, err->dsize);
106 mutt_regex_free(&reg);
107 return NULL;
108 }
109
110 return reg;
111}
112
117void mutt_regex_free(struct Regex **ptr)
118{
119 if (!ptr || !*ptr)
120 return;
121
122 struct Regex *rx = *ptr;
123 FREE(&rx->pattern);
124 if (rx->regex)
125 regfree(rx->regex);
126 FREE(&rx->regex);
127 FREE(ptr);
128}
129
139int mutt_regexlist_add(struct RegexList *rl, const char *str, uint16_t flags,
140 struct Buffer *err)
141{
142 if (!rl || !str || (*str == '\0'))
143 return 0;
144
145 struct Regex *rx = mutt_regex_compile(str, flags);
146 if (!rx)
147 {
148 buf_printf(err, "Bad regex: %s\n", str);
149 return -1;
150 }
151
152 /* check to make sure the item is not already on this rl */
153 struct RegexNode *np = NULL;
154 STAILQ_FOREACH(np, rl, entries)
155 {
156 if (mutt_istr_equal(rx->pattern, np->regex->pattern))
157 break; /* already on the rl */
158 }
159
160 if (np)
161 {
162 mutt_regex_free(&rx);
163 }
164 else
165 {
166 np = mutt_regexlist_new();
167 np->regex = rx;
168 STAILQ_INSERT_TAIL(rl, np, entries);
169 }
170
171 return 0;
172}
173
178void mutt_regexlist_free(struct RegexList *rl)
179{
180 if (!rl)
181 return;
182
183 struct RegexNode *np = NULL, *tmp = NULL;
184 STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
185 {
186 STAILQ_REMOVE(rl, np, RegexNode, entries);
188 FREE(&np);
189 }
190 STAILQ_INIT(rl);
191}
192
199bool mutt_regexlist_match(struct RegexList *rl, const char *str)
200{
201 if (!rl || !str)
202 return false;
203 struct RegexNode *np = NULL;
204 STAILQ_FOREACH(np, rl, entries)
205 {
206 if (mutt_regex_match(np->regex, str))
207 {
208 mutt_debug(LL_DEBUG5, "%s matches %s\n", str, np->regex->pattern);
209 return true;
210 }
211 }
212
213 return false;
214}
215
221{
222 return MUTT_MEM_CALLOC(1, struct RegexNode);
223}
224
234int mutt_regexlist_remove(struct RegexList *rl, const char *str)
235{
236 if (!rl || !str)
237 return -1;
238
239 if (mutt_str_equal("*", str))
240 {
241 mutt_regexlist_free(rl); /* "unCMD *" means delete all current entries */
242 return 0;
243 }
244
245 int rc = -1;
246 struct RegexNode *np = NULL, *tmp = NULL;
247 STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
248 {
249 if (mutt_istr_equal(str, np->regex->pattern))
250 {
251 STAILQ_REMOVE(rl, np, RegexNode, entries);
253 FREE(&np);
254 rc = 0;
255 }
256 }
257
258 return rc;
259}
260
270int mutt_replacelist_add(struct ReplaceList *rl, const char *pat,
271 const char *templ, struct Buffer *err)
272{
273 if (!rl || !pat || (*pat == '\0') || !templ)
274 return 0;
275
276 struct Regex *rx = mutt_regex_compile(pat, REG_ICASE);
277 if (!rx)
278 {
279 buf_printf(err, _("Bad regex: %s"), pat);
280 return -1;
281 }
282
283 /* check to make sure the item is not already on this rl */
284 struct Replace *np = NULL;
285 STAILQ_FOREACH(np, rl, entries)
286 {
287 if (mutt_istr_equal(rx->pattern, np->regex->pattern))
288 {
289 /* Already on the rl. Formerly we just skipped this case, but
290 * now we're supporting removals, which means we're supporting
291 * re-adds conceptually. So we probably want this to imply a
292 * removal, then do an add. We can achieve the removal by freeing
293 * the template, and leaving t pointed at the current item. */
294 FREE(&np->templ);
295 break;
296 }
297 }
298
299 /* If np is set, it's pointing into an extant ReplaceList* that we want to
300 * update. Otherwise we want to make a new one to link at the rl's end. */
301 if (np)
302 {
303 mutt_regex_free(&rx);
304 }
305 else
306 {
308 np->regex = rx;
309 rx = NULL;
310 STAILQ_INSERT_TAIL(rl, np, entries);
311 }
312
313 /* Now np is the Replace that we want to modify. It is prepared. */
314 np->templ = mutt_str_dup(templ);
315
316 /* Find highest match number in template string */
317 np->nmatch = 0;
318 for (const char *p = templ; *p;)
319 {
320 if (*p == '%')
321 {
322 int n = 0;
323 const char *end = mutt_str_atoi(++p, &n);
324 if (!end)
325 {
326 // this is not an error, we might have matched %R or %L in subjectrx
327 mutt_debug(LL_DEBUG2, "Invalid match number in replacelist: '%s'\n", p);
328 }
329 if (n > np->nmatch)
330 {
331 np->nmatch = n;
332 }
333 if (end)
334 {
335 p = end;
336 }
337 else
338 {
339 p++;
340 }
341 }
342 else
343 {
344 p++;
345 }
346 }
347
348 if (np->nmatch > np->regex->regex->re_nsub)
349 {
350 if (err)
351 buf_addstr(err, _("Not enough subexpressions for template"));
353 return -1;
354 }
355
356 np->nmatch++; /* match 0 is always the whole expr */
357 return 0;
358}
359
368char *mutt_replacelist_apply(struct ReplaceList *rl, const char *str)
369{
370 if (!rl || !str || (*str == '\0'))
371 return NULL;
372
373 static regmatch_t *pmatch = NULL;
374 static size_t nmatch = 0;
375 char *p = NULL;
376
377 struct Buffer *src = buf_pool_get();
378 struct Buffer *dst = buf_pool_get();
379
380 buf_strcpy(src, str);
381
382 struct Replace *np = NULL;
383 STAILQ_FOREACH(np, rl, entries)
384 {
385 /* If this pattern needs more matches, expand pmatch. */
386 if (np->nmatch > nmatch)
387 {
388 MUTT_MEM_REALLOC(&pmatch, np->nmatch, regmatch_t);
389 nmatch = np->nmatch;
390 }
391
392 if (mutt_regex_capture(np->regex, buf_string(src), np->nmatch, pmatch))
393 {
394 mutt_debug(LL_DEBUG5, "%s matches %s\n", buf_string(src), np->regex->pattern);
395
396 buf_reset(dst);
397 if (np->templ)
398 {
399 for (p = np->templ; *p;)
400 {
401 if (*p == '%')
402 {
403 p++;
404 if (*p == 'L')
405 {
406 p++;
407 buf_addstr_n(dst, buf_string(src), pmatch[0].rm_so);
408 }
409 else if (*p == 'R')
410 {
411 p++;
412 buf_addstr(dst, src->data + pmatch[0].rm_eo);
413 }
414 else
415 {
416 long n = strtoul(p, &p, 10); /* get subst number */
417 if (n < np->nmatch)
418 {
419 buf_addstr_n(dst, src->data + pmatch[n].rm_so,
420 pmatch[n].rm_eo - pmatch[n].rm_so);
421 }
422 while (mutt_isdigit(*p)) /* skip subst token */
423 p++;
424 }
425 }
426 else
427 {
428 buf_addch(dst, *p++);
429 }
430 }
431 }
432
433 buf_strcpy(src, buf_string(dst));
434 mutt_debug(LL_DEBUG5, "subst %s\n", buf_string(dst));
435 }
436 }
437
438 char *result = buf_strdup(src);
439
440 buf_pool_release(&src);
441 buf_pool_release(&dst);
442 return result;
443}
444
449void mutt_replacelist_free(struct ReplaceList *rl)
450{
451 if (!rl)
452 return;
453
454 struct Replace *np = NULL, *tmp = NULL;
455 STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
456 {
457 STAILQ_REMOVE(rl, np, Replace, entries);
459 FREE(&np->templ);
460 FREE(&np);
461 }
462}
463
477bool mutt_replacelist_match(struct ReplaceList *rl, char *buf, size_t buflen, const char *str)
478{
479 if (!rl || !buf || !str)
480 return false;
481
482 static regmatch_t *pmatch = NULL;
483 static size_t nmatch = 0;
484 int tlen = 0;
485 char *p = NULL;
486
487 struct Replace *np = NULL;
488 STAILQ_FOREACH(np, rl, entries)
489 {
490 /* If this pattern needs more matches, expand pmatch. */
491 if (np->nmatch > nmatch)
492 {
493 MUTT_MEM_REALLOC(&pmatch, np->nmatch, regmatch_t);
494 nmatch = np->nmatch;
495 }
496
497 /* Does this pattern match? */
498 if (mutt_regex_capture(np->regex, str, (size_t) np->nmatch, pmatch))
499 {
500 mutt_debug(LL_DEBUG5, "%s matches %s\n", str, np->regex->pattern);
501 mutt_debug(LL_DEBUG5, "%d subs\n", (int) np->regex->regex->re_nsub);
502
503 /* Copy template into buf, with substitutions. */
504 for (p = np->templ; *p && (tlen < (buflen - 1));)
505 {
506 /* backreference to pattern match substring, eg. %1, %2, etc) */
507 if (*p == '%')
508 {
509 char *e = NULL; /* used as pointer to end of integer backreference in strtol() call */
510
511 p++; /* skip over % char */
512 long n = strtol(p, &e, 10);
513 /* Ensure that the integer conversion succeeded (e!=p) and bounds check. The upper bound check
514 * should not strictly be necessary since add_to_spam_list() finds the largest value, and
515 * the static array above is always large enough based on that value. */
516 if ((e != p) && (n >= 0) && (n < np->nmatch) && (pmatch[n].rm_so != -1))
517 {
518 /* copy as much of the substring match as will fit in the output buffer, saving space for
519 * the terminating nul char */
520 for (int idx = pmatch[n].rm_so;
521 (idx < pmatch[n].rm_eo) && (tlen < (buflen - 1)); idx++)
522 {
523 buf[tlen++] = str[idx];
524 }
525 }
526 p = e; /* skip over the parsed integer */
527 }
528 else
529 {
530 buf[tlen++] = *p++;
531 }
532 }
533 /* tlen should always be less than buflen except when buflen<=0
534 * because the bounds checks in the above code leave room for the
535 * terminal nul char. This should avoid returning an unterminated
536 * string to the caller. When buflen<=0 we make no assumption about
537 * the validity of the buf pointer. */
538 if (tlen < buflen)
539 {
540 buf[tlen] = '\0';
541 mutt_debug(LL_DEBUG5, "\"%s\"\n", buf);
542 }
543 return true;
544 }
545 }
546
547 return false;
548}
549
555{
556 return MUTT_MEM_CALLOC(1, struct Replace);
557}
558
565int mutt_replacelist_remove(struct ReplaceList *rl, const char *pat)
566{
567 if (!rl || !pat)
568 return 0;
569
570 int nremoved = 0;
571 struct Replace *np = NULL, *tmp = NULL;
572 STAILQ_FOREACH_SAFE(np, rl, entries, tmp)
573 {
574 if (mutt_str_equal(np->regex->pattern, pat))
575 {
576 STAILQ_REMOVE(rl, np, Replace, entries);
578 FREE(&np->templ);
579 FREE(&np);
580 nremoved++;
581 }
582 }
583
584 return nremoved;
585}
586
596bool mutt_regex_capture(const struct Regex *regex, const char *str,
597 size_t nmatch, regmatch_t matches[])
598{
599 if (!regex || !str || !regex->regex)
600 return false;
601
602 int rc = regexec(regex->regex, str, nmatch, matches, 0);
603 return ((rc == 0) ^ regex->pat_not);
604}
605
613bool mutt_regex_match(const struct Regex *regex, const char *str)
614{
615 return mutt_regex_capture(regex, str, 0, NULL);
616}
const char * mutt_str_atoi(const char *str, int *dst)
Convert ASCII string to an integer.
Definition atoi.c:191
Parse a number in a string.
int buf_printf(struct Buffer *buf, const char *fmt,...)
Format a string overwriting a Buffer.
Definition buffer.c:161
size_t buf_addstr_n(struct Buffer *buf, const char *s, size_t len)
Add a string to a Buffer, expanding it if necessary.
Definition buffer.c:96
void buf_reset(struct Buffer *buf)
Reset an existing Buffer.
Definition buffer.c:76
size_t buf_addch(struct Buffer *buf, char c)
Add a single character to a Buffer.
Definition buffer.c:241
size_t buf_addstr(struct Buffer *buf, const char *s)
Add a string to a Buffer.
Definition buffer.c:226
size_t buf_strcpy(struct Buffer *buf, const char *s)
Copy a string into a Buffer.
Definition buffer.c:395
char * buf_strdup(const struct Buffer *buf)
Copy a Buffer's string.
Definition buffer.c:571
General purpose object for storing and parsing strings.
static const char * buf_string(const struct Buffer *buf)
Convert a buffer to a const char * "string".
Definition buffer.h:96
ctype(3) wrapper functions
bool mutt_isdigit(int arg)
Wrapper for isdigit(3)
Definition ctype.c:65
#define mutt_debug(LEVEL,...)
Definition logging2.h:90
Logging Dispatcher.
@ LL_DEBUG5
Log at debug level 5.
Definition logging2.h:48
@ LL_DEBUG2
Log at debug level 2.
Definition logging2.h:45
bool mutt_mb_is_lower(const char *s)
Does a multi-byte string contain only lowercase characters?
Definition mbyte.c:354
Multi-byte String manipulation functions.
Memory management wrappers.
#define FREE(x)
Definition memory.h:62
#define MUTT_MEM_CALLOC(n, type)
Definition memory.h:47
#define MUTT_MEM_REALLOC(pptr, n, type)
Definition memory.h:50
Message logging.
#define _(a)
Definition message.h:28
int mutt_replacelist_remove(struct ReplaceList *rl, const char *pat)
Remove a pattern from a list.
Definition regex.c:565
struct Regex * mutt_regex_new(const char *str, uint32_t flags, struct Buffer *err)
Create an Regex from a string.
Definition regex.c:79
struct RegexNode * mutt_regexlist_new(void)
Create a new RegexList.
Definition regex.c:220
struct Regex * mutt_regex_compile(const char *str, uint16_t flags)
Create an Regex from a string.
Definition regex.c:58
void mutt_regexlist_free(struct RegexList *rl)
Free a RegexList object.
Definition regex.c:178
int mutt_regexlist_add(struct RegexList *rl, const char *str, uint16_t flags, struct Buffer *err)
Compile a regex string and add it to a list.
Definition regex.c:139
bool mutt_regex_capture(const struct Regex *regex, const char *str, size_t nmatch, regmatch_t matches[])
Match a regex against a string, with provided options.
Definition regex.c:596
void mutt_replacelist_free(struct ReplaceList *rl)
Free a ReplaceList object.
Definition regex.c:449
int mutt_regexlist_remove(struct RegexList *rl, const char *str)
Remove a Regex from a list.
Definition regex.c:234
bool mutt_replacelist_match(struct ReplaceList *rl, char *buf, size_t buflen, const char *str)
Does a string match a pattern?
Definition regex.c:477
char * mutt_replacelist_apply(struct ReplaceList *rl, const char *str)
Apply replacements to a buffer.
Definition regex.c:368
struct Replace * mutt_replacelist_new(void)
Create a new ReplaceList.
Definition regex.c:554
int mutt_replacelist_add(struct ReplaceList *rl, const char *pat, const char *templ, struct Buffer *err)
Add a pattern and a template to a list.
Definition regex.c:270
bool mutt_regexlist_match(struct RegexList *rl, const char *str)
Does a string match any Regex in the list?
Definition regex.c:199
void mutt_regex_free(struct Regex **ptr)
Free a Regex object.
Definition regex.c:117
bool mutt_regex_match(const struct Regex *regex, const char *str)
Shorthand to mutt_regex_capture()
Definition regex.c:613
bool mutt_istr_equal(const char *a, const char *b)
Compare two strings, ignoring case.
Definition string.c:672
char * mutt_str_dup(const char *str)
Copy a string, safely.
Definition string.c:255
bool mutt_str_equal(const char *a, const char *b)
Compare two strings.
Definition string.c:660
struct Buffer * buf_pool_get(void)
Get a Buffer from the pool.
Definition pool.c:82
void buf_pool_release(struct Buffer **ptr)
Return a Buffer to the pool.
Definition pool.c:96
A global pool of Buffers.
#define STAILQ_REMOVE(head, elm, type, field)
Definition queue.h:441
#define STAILQ_INIT(head)
Definition queue.h:410
#define STAILQ_FOREACH(var, head, field)
Definition queue.h:390
#define STAILQ_INSERT_TAIL(head, elm, field)
Definition queue.h:427
#define STAILQ_FOREACH_SAFE(var, head, field, tvar)
Definition queue.h:400
Manage regular expressions.
#define REG_COMP(preg, regex, cflags)
Compile a regular expression.
Definition regex3.h:50
String manipulation functions.
String manipulation buffer.
Definition buffer.h:36
size_t dsize
Length of data.
Definition buffer.h:39
char * data
Pointer to data.
Definition buffer.h:37
List of regular expressions.
Definition regex3.h:96
struct Regex * regex
Regex containing a regular expression.
Definition regex3.h:97
Cached regular expression.
Definition regex3.h:86
char * pattern
printable version
Definition regex3.h:87
bool pat_not
do not match
Definition regex3.h:89
regex_t * regex
compiled expression
Definition regex3.h:88
List of regular expressions.
Definition regex3.h:106
char * templ
Template to match.
Definition regex3.h:109
size_t nmatch
Match the 'nth' occurrence (0 means the whole expression)
Definition regex3.h:108
struct Regex * regex
Regex containing a regular expression.
Definition regex3.h:107
Constants for all the config types.
#define D_REGEX_ALLOW_NOT
Regex can begin with '!'.
Definition types.h:106
#define D_REGEX_MATCH_CASE
Case-sensitive matching.
Definition types.h:105