NeoMutt  2025-12-11-694-ga89709
Teaching an old dog new tricks
DOXYGEN
Loading...
Searching...
No Matches
prex.c File Reference

Manage precompiled / predefined regular expressions. More...

#include "config.h"
#include <stdbool.h>
#include <stdint.h>
#include "prex.h"
#include "logging2.h"
#include "memory.h"
#include "signal2.h"
+ Include dependency graph for prex.c:

Go to the source code of this file.

Data Structures

struct  PrexStorage
 A predefined / precompiled regex. More...
 

Macros

#define PREX_MONTH   "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)"
 
#define PREX_MONTH_LAX    "(Jan|January|Feb|February|Mar|March|Apr|April|May|Jun|June|Jul|July|Aug|August|Sep|September|Oct|October|Nov|November|Dec|December)"
 
#define PREX_DOW   "(Mon|Tue|Wed|Thu|Fri|Sat|Sun)"
 
#define PREX_DOW_NOCASE    "([Mm][Oo][Nn]|[Tt][Uu][Ee]|[Ww][Ee][Dd]|[Tt][Hh][Uu]|[Ff][Rr][Ii]|[Ss][Aa][Tt]|[Ss][Uu][Nn])"
 
#define PREX_TIME   "([[:digit:]]{2}:[[:digit:]]{2}:[[:digit:]]{2})"
 
#define PREX_YEAR   "([[:digit:]]{4})"
 
#define UNR_PCTENC_SUBDEL   "][[:alnum:]._~%!$&'()*+,;="
 
#define PATH   ":@/ "
 
#define QUERY_PART   "^&="
 
#define FWS   " *"
 
#define C   "(\\‍(.*\\‍))?"
 
#define CFWS   FWS C FWS
 

Functions

static struct PrexStorageprex (enum Prex which)
 Compile on demand and get data for a predefined regex.
 
regmatch_t * mutt_prex_capture (enum Prex which, const char *str)
 Match a precompiled regex against a string.
 
void mutt_prex_cleanup (void)
 Cleanup heap memory allocated by compiled regexes.
 

Detailed Description

Manage precompiled / predefined regular expressions.

Authors
  • Pietro Cerutti
  • Richard Russon

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/.

Definition in file prex.c.

Macro Definition Documentation

◆ PREX_MONTH

#define PREX_MONTH   "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)"

Definition at line 83 of file prex.c.

◆ PREX_MONTH_LAX

#define PREX_MONTH_LAX    "(Jan|January|Feb|February|Mar|March|Apr|April|May|Jun|June|Jul|July|Aug|August|Sep|September|Oct|October|Nov|November|Dec|December)"

Definition at line 84 of file prex.c.

84#define PREX_MONTH_LAX \
85 "(Jan|January|Feb|February|Mar|March|Apr|April|May|Jun|June|Jul|July|Aug|August|Sep|September|Oct|October|Nov|November|Dec|December)"

◆ PREX_DOW

#define PREX_DOW   "(Mon|Tue|Wed|Thu|Fri|Sat|Sun)"

Definition at line 86 of file prex.c.

◆ PREX_DOW_NOCASE

#define PREX_DOW_NOCASE    "([Mm][Oo][Nn]|[Tt][Uu][Ee]|[Ww][Ee][Dd]|[Tt][Hh][Uu]|[Ff][Rr][Ii]|[Ss][Aa][Tt]|[Ss][Uu][Nn])"

Definition at line 87 of file prex.c.

87#define PREX_DOW_NOCASE \
88 "([Mm][Oo][Nn]|[Tt][Uu][Ee]|[Ww][Ee][Dd]|[Tt][Hh][Uu]|[Ff][Rr][Ii]|[Ss][Aa][Tt]|[Ss][Uu][Nn])"

◆ PREX_TIME

#define PREX_TIME   "([[:digit:]]{2}:[[:digit:]]{2}:[[:digit:]]{2})"

Definition at line 89 of file prex.c.

◆ PREX_YEAR

#define PREX_YEAR   "([[:digit:]]{4})"

Definition at line 90 of file prex.c.

◆ UNR_PCTENC_SUBDEL

#define UNR_PCTENC_SUBDEL   "][[:alnum:]._~%!$&'()*+,;="

◆ PATH

#define PATH   ":@/ "

◆ QUERY_PART

#define QUERY_PART   "^&="

◆ FWS

#define FWS   " *"

◆ C

#define C   "(\\‍(.*\\‍))?"

◆ CFWS

#define CFWS   FWS C FWS

Function Documentation

◆ prex()

static struct PrexStorage * prex ( enum Prex which)
static

Compile on demand and get data for a predefined regex.

Parameters
whichWhich regex to get
Return values
ptrPointer to a PrexStorage struct
Note
Returned pointer is guaranteed not to be NULL. The function asserts on error.

Characters allowed in URL components (unreserved + percent-encoded + sub-delims) - without Unicode

Characters allowed in URL query key/value pairs

Definition at line 100 of file prex.c.

101{
102 static struct PrexStorage storage[] = {
103 // clang-format off
104 {
105 PREX_URL,
107 /* Spec: https://tools.ietf.org/html/rfc3986#section-3 */
108#ifdef HAVE_PCRE2
110#define UNR_PCTENC_SUBDEL "][\\p{L}\\p{N}._~%!$&'()*+,;="
111#else
113#define UNR_PCTENC_SUBDEL "][[:alnum:]._~%!$&'()*+,;="
114#endif
115#define PATH ":@/ "
116 "^([[:alpha:]][-+.[:alnum:]]+):" // . scheme
117 "(" // . rest
118 "(" // . . authority + path
119 // . . or path only
120 "(//" // . . . authority + path
121 "(" // . . . . user info
122 "([" UNR_PCTENC_SUBDEL "@-]*)" // . . . . . user name + '@'
123 "(:([" UNR_PCTENC_SUBDEL "-]*))?" // . . . . . password
124 "@)?"
125 "(" // . . . . host
126 "([" UNR_PCTENC_SUBDEL "-]*)" // . . . . . host name
127 "|"
128 "(\\[[[:xdigit:]:.]+\\])" // . . . . . IPv4 or IPv6
129 ")"
130 "(:([[:digit:]]+))?" // . . . . port
131 "(/([" UNR_PCTENC_SUBDEL PATH "-]*))?" // . . . . path
132 ")"
133 "|"
134 "(" // . . . path only
135 "[" UNR_PCTENC_SUBDEL PATH "-]*" // . . . . path
136 ")"
137 ")"
138 // Should be: "(\\?([" UNR_PCTENC_SUBDEL PATH "?-]*))?"
139 "(\\?([^#]*))?" // . . query
140 ")$"
141#undef PATH
142#undef UNR_PCTENC_SUBDEL
143 },
144 {
148#define QUERY_PART "^&=" // Should be: "-[:alnum:]._~%!$'()*+,;:@/"
149 "([" QUERY_PART "]+)=([" QUERY_PART "]+)" // query + ' '
150#undef QUERY_PART
151 },
152 {
155 "=\\?"
156 "([^][()<>@,;:\\\"/?. =]+)" // charset
157 "\\?"
158 "([qQbB])" // encoding
159 "\\?"
160 "([^?]+)" // encoded text - we accept whitespace, see #1189
161 "\\?="
162 },
163 {
166 "^\\#H ([[:alnum:]_\\.-]+) ([[:alnum:]]{4}( [[:alnum:]]{4}){7})[ \t]*$"
167 },
168 {
171 /* Spec: https://tools.ietf.org/html/rfc5322#section-3.3 */
172#define FWS " *"
173#define C "(\\(.*\\))?"
174#define CFWS FWS C FWS
175 "^"
176 CFWS
177 "(([[:alpha:]]+)" CFWS ", *)?" // Day of week (or whatever)
178 CFWS "([[:digit:]]{1,2}) " // Day
179 CFWS PREX_MONTH_LAX // Month
180 CFWS "([[:digit:]]{2,4}) " // Year
181 CFWS "([[:digit:]]{1,2})" // Hour
182 ":" CFWS "([[:digit:]]{1,2})" // Minute
183 CFWS
184 "(:" CFWS "([[:digit:]]{1,2}))?" // Second
185 CFWS
186 "("
187 "([+-][[:digit:]]{4})|" // TZ
188 "([[:alpha:]]+)" // Obsolete TZ
189 ")?"
190#undef CFWS
191#undef C
192#undef FWS
193 },
194 {
197 "( ([[:digit:]])|([[:digit:]]{2}))" // Day
198 "-" PREX_MONTH // Month
199 "-" PREX_YEAR // Year
200 " " PREX_TIME // Time
201 " ([+-][[:digit:]]{4})" // TZ
202 },
203 {
206 /* Spec: http://qmail.omnis.ch/man/man5/mbox.html */
207 "^From " // From
208 "([^[:space:]]+) +" // Sender
209 PREX_DOW // Day of week
210 " +"
211 PREX_MONTH // Month
212 " ( ([[:digit:]])|([[:digit:]]{2}))" // Day
213 " +"
214 PREX_TIME // Time
215 " +"
216 PREX_YEAR // Year
217 },
218 {
221 /* Spec: http://qmail.omnis.ch/man/man5/mbox.html */
222 "^From " // From
223 "("
224 "[^[:space:]]+" // Sender
225 "( at [^[:space:]]+)?" // Possibly obfuscated, pipermail-style
226 ")?"
227 " *"
228 PREX_DOW_NOCASE // Day of week
229 " +"
230 PREX_MONTH // Month
231 " +"
232 "( " // Day
233 "([[:digit:]])|"
234 "([[:digit:]]{2})"
235 ")"
236 " +"
237 "("
238 PREX_TIME // Time (HH:MM:SS)
239 "|"
240 "([[:digit:]]{2}" // Time (HH:MM)
241 ":[[:digit:]]{2})"
242 ")"
243 " +"
244 "("
245 "([[:alpha:] ]+)|" // Timezone name (which we skip)
246 "([+][[:digit:]]{4} )" // Timezone offset (which we skip)
247 ")?"
248 "("
249 PREX_YEAR // Year (YYYY)
250 "|"
251 "([[:digit:]]{2})" // Year (YY)
252 ")"
253 },
254 {
257 "^([[:alpha:]]+): (.*)$"
258 },
259 {
262 "^(.*)(tags:)([[:alnum:],]*) ?(.*)$"
263 },
264 // clang-format on
265 };
266
267 ASSERT((which < PREX_MAX) && "Invalid 'which' argument");
268 struct PrexStorage *h = &storage[which];
269 ASSERT((which == h->which) && "Fix 'storage' array");
270 if (!h->re)
271 {
272#ifdef HAVE_PCRE2
273 uint32_t opt = pcre2_has_unicode() ? PCRE2_UTF : 0;
274 int eno = 0;
275 PCRE2_SIZE eoff = 0;
276 h->re = pcre2_compile((PCRE2_SPTR8) h->str, PCRE2_ZERO_TERMINATED, opt,
277 &eno, &eoff, NULL);
278 ASSERT(h->re && "Fix your RE");
279 h->mdata = pcre2_match_data_create_from_pattern(h->re, NULL);
280 uint32_t ccount = 0;
281 pcre2_pattern_info(h->re, PCRE2_INFO_CAPTURECOUNT, &ccount);
282 ASSERT(((ccount + 1) == h->nmatches) && "Number of matches do not match (...)");
283 h->matches = MUTT_MEM_CALLOC(h->nmatches, regmatch_t);
284#else
285 h->re = MUTT_MEM_CALLOC(1, regex_t);
286 const int rc = regcomp(h->re, h->str, REG_EXTENDED);
287 ASSERT(rc == 0 && "Fix your RE");
288 h->matches = MUTT_MEM_CALLOC(h->nmatches, regmatch_t);
289#endif
290 }
291 return h;
292}
#define MUTT_MEM_CALLOC(n, type)
Definition memory.h:52
#define PREX_DOW_NOCASE
Definition prex.c:87
#define CFWS
#define PREX_MONTH_LAX
Definition prex.c:84
#define PREX_DOW
Definition prex.c:86
#define PREX_TIME
Definition prex.c:89
#define QUERY_PART
#define PREX_MONTH
Definition prex.c:83
#define PATH
#define PREX_YEAR
Definition prex.c:90
#define UNR_PCTENC_SUBDEL
@ PREX_MBOX_FROM_LAX_MATCH_MAX
Definition prex.h:216
@ PREX_ACCOUNT_CMD_MATCH_MAX
Definition prex.h:227
@ PREX_IMAP_DATE_MATCH_MAX
Definition prex.h:170
@ PREX_MBOX_FROM_MATCH_MAX
Definition prex.h:189
@ PREX_RFC2047_ENCODED_WORD_MATCH_MAX
Definition prex.h:100
@ PREX_URL_QUERY_KEY_VAL_MATCH_MAX
Definition prex.h:86
@ PREX_GNUTLS_CERT_HOST_HASH
[#H foo.com A76D 954B EB79 1F49 5B3A 0A0E 0681 65B1]
Definition prex.h:37
@ PREX_MBOX_FROM_LAX
[From god@heaven.af.mil Sat Jan 3 01:05:34 1996]
Definition prex.h:41
@ PREX_URL
[imaps://user:pass@example.com/INBOX?foo=bar]
Definition prex.h:34
@ PREX_MBOX_FROM
[From god@heaven.af.mil Sat Jan 3 01:05:34 1996]
Definition prex.h:40
@ PREX_ACCOUNT_CMD
key: value
Definition prex.h:42
@ PREX_ALIAS_TAGS
tags:a,b,c
Definition prex.h:43
@ PREX_IMAP_DATE
[16-MAR-2020 15:09:35 -0700]
Definition prex.h:39
@ PREX_RFC5322_DATE_LAX
[Mon, (Comment) 16 Mar 2020 15:09:35 -0700]
Definition prex.h:38
@ PREX_URL_QUERY_KEY_VAL
https://example.com/?[q=foo]
Definition prex.h:35
@ PREX_MAX
Definition prex.h:44
@ PREX_RFC2047_ENCODED_WORD
[=?utf-8?Q?=E8=81=AA=E6=98=8E=E7=9A=84?=]
Definition prex.h:36
@ PREX_RFC5322_DATE_LAX_MATCH_MAX
Definition prex.h:152
@ PREX_URL_MATCH_MAX
Definition prex.h:73
@ PREX_GNUTLS_CERT_HOST_HASH_MATCH_MAX
Definition prex.h:114
@ PREX_ALIAS_TAGS_MATCH_MAX
Definition prex.h:241
#define ASSERT(COND)
Definition signal2.h:59
A predefined / precompiled regex.
Definition prex.c:70
const char * str
Regex string.
Definition prex.c:73
enum Prex which
Regex type, e.g. PREX_URL.
Definition prex.c:71
size_t nmatches
Number of regex matches.
Definition prex.c:72
regex_t * re
Compiled regex.
Definition prex.c:78
regmatch_t * matches
Resulting matches.
Definition prex.c:80
+ Here is the caller graph for this function:

◆ mutt_prex_capture()

regmatch_t * mutt_prex_capture ( enum Prex which,
const char * str )

Match a precompiled regex against a string.

Parameters
whichWhich regex to return
strString to apply regex on
Return values
ptrPointer to an array of matched captures
NULLRegex didn't match

Definition at line 301 of file prex.c.

302{
303 if (!str)
304 return NULL;
305
306 struct PrexStorage *h = prex(which);
307#ifdef HAVE_PCRE2
308 size_t len = strlen(str);
309 int rc = pcre2_match(h->re, (PCRE2_SPTR8) str, len, 0, 0, h->mdata, NULL);
310 if (rc < 0)
311 {
312 PCRE2_UCHAR errmsg[1024];
313 pcre2_get_error_message(rc, errmsg, sizeof(errmsg));
314 mutt_debug(LL_DEBUG2, "pcre2_match - <%s> -> <%s> = %s\n", h->str, str, errmsg);
315 return NULL;
316 }
317 PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(h->mdata);
318 int i = 0;
319 for (; i < rc; i++)
320 {
321 h->matches[i].rm_so = ovector[i * 2];
322 h->matches[i].rm_eo = ovector[i * 2 + 1];
323 }
324 for (; i < h->nmatches; i++)
325 {
326 h->matches[i].rm_so = -1;
327 h->matches[i].rm_eo = -1;
328 }
329#else
330 if (regexec(h->re, str, h->nmatches, h->matches, 0))
331 return NULL;
332
333 ASSERT((h->re->re_nsub == (h->nmatches - 1)) &&
334 "Regular expression and matches enum are out of sync");
335#endif
336 return h->matches;
337}
#define mutt_debug(LEVEL,...)
Definition logging2.h:91
@ LL_DEBUG2
Log at debug level 2.
Definition logging2.h:46
static struct PrexStorage * prex(enum Prex which)
Compile on demand and get data for a predefined regex.
Definition prex.c:100
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ mutt_prex_cleanup()

void mutt_prex_cleanup ( void )

Cleanup heap memory allocated by compiled regexes.

Definition at line 342 of file prex.c.

343{
344 for (enum Prex which = 0; which < PREX_MAX; which++)
345 {
346 struct PrexStorage *h = prex(which);
347#ifdef HAVE_PCRE2
348 pcre2_match_data_free(h->mdata);
349 pcre2_code_free(h->re);
350#else
351 regfree(h->re);
352 FREE(&h->re);
353#endif
354 FREE(&h->matches);
355 }
356}
#define FREE(x)
Free memory and set the pointer to NULL.
Definition memory.h:68
Prex
Predefined list of regular expressions.
Definition prex.h:33
+ Here is the call graph for this function:
+ Here is the caller graph for this function: