NeoMutt  2025-12-11-694-ga89709
Teaching an old dog new tricks
DOXYGEN
Loading...
Searching...
No Matches
fuzzy.h File Reference

Fuzzy matching library - private definitions. More...

+ This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Functions

int fuzzy_subseq_match (const char *pattern, const char *candidate, const struct FuzzyOptions *opts, struct FuzzyResult *out)
 Perform subsequence fuzzy matching (UTF-8 aware, ASCII case-folding)
 

Detailed Description

Fuzzy matching library - private definitions.

Authors
  • Richard Russon

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/.

Definition in file fuzzy.h.

Function Documentation

◆ fuzzy_subseq_match()

int fuzzy_subseq_match ( const char * pattern,
const char * candidate,
const struct FuzzyOptions * opts,
struct FuzzyResult * out )

Perform subsequence fuzzy matching (UTF-8 aware, ASCII case-folding)

Parameters
patternPattern to match (UTF-8 byte sequence)
candidateCandidate string to match against (UTF-8 byte sequence)
optsFuzzy matching options
outOutput result structure
Return values
>=0Match score
-1Error or no match

Performs byte-wise subsequence matching with ASCII-only case folding. UTF-8 multi-byte sequences are preserved but matched as raw bytes. Only ASCII A-Z characters are case-folded to a-z.

Definition at line 262 of file subseq.c.

264{
265 if (!pattern || !candidate)
266 return -1;
267
268 size_t plen = strlen(pattern);
269 if (plen == 0)
270 return -1;
271
272 int max_pattern = (opts && opts->max_pattern) ? opts->max_pattern : DEFAULT_MAX_PATTERN;
273 if ((max_pattern <= 0) || (max_pattern > DEFAULT_MAX_PATTERN))
274 max_pattern = DEFAULT_MAX_PATTERN;
275
276 if (plen > (size_t) max_pattern)
277 return -1;
278
279 bool fold = compute_case_mode(pattern, opts);
280
281 int matchpos[DEFAULT_MAX_PATTERN];
282
283 int pi = 0;
284 int ci = 0;
285 int score = 0;
286
287 int first = -1;
288 int last = -1;
289
290 // Forward subsequence scan
291 while (candidate[ci] && pi < (int) plen)
292 {
293 const unsigned char pbyte = (unsigned char) pattern[pi];
294 if (pbyte < 0x80)
295 {
296 int pc = lower_if(pattern[pi], fold);
297 int cc = lower_if(candidate[ci], fold);
298
299 if (pc == cc)
300 {
301 matchpos[pi] = ci;
302
303 if (first < 0)
304 first = ci;
305
306 last = ci;
307 pi++;
308 }
309
310 ci++;
311 continue;
312 }
313
314 // Non-ASCII pattern bytes must match as a whole UTF-8 codepoint.
315 const int pchar_len = utf8_char_len(pattern + pi);
316 const int premaining = (int) plen - pi;
317 if ((pchar_len <= 0) || (pchar_len > premaining))
318 return -1;
319 bool matched = false;
320
321 while (candidate[ci])
322 {
323 const unsigned char cbyte = (unsigned char) candidate[ci];
324 if (utf8_is_continuation(cbyte))
325 {
326 ci++;
327 continue;
328 }
329
330 const int cchar_len = utf8_char_len(candidate + ci);
331 if ((pchar_len == cchar_len) && (memcmp(pattern + pi, candidate + ci, pchar_len) == 0))
332 {
333 for (int k = 0; k < pchar_len; k++)
334 matchpos[pi + k] = ci + k;
335
336 if (first < 0)
337 first = ci;
338
339 last = ci + pchar_len - 1;
340 pi += pchar_len;
341 ci += cchar_len;
342 matched = true;
343 break;
344 }
345
346 ci += cchar_len;
347 }
348
349 if (!matched)
350 break;
351 }
352
353 if (pi != (int) plen)
354 return -1; // not a subsequence
355
356 // Scoring
357
358 // Base score
359 score += plen * 10;
360
361 // Consecutive & gap penalties
362 for (int i = 1; i < pi; i++)
363 {
364 int gap = matchpos[i] - matchpos[i - 1] - 1;
365
366 if (gap == 0)
367 score += 15; // compact match bonus
368 else
369 score -= gap * 2;
370 }
371
372 // Span penalty
373 int span = last - first + 1;
374 score -= span;
375
376 // Prefix bonus
377 if (first == 0 && opts && opts->prefer_prefix)
378 score += 40;
379
380 // Boundary bonus (ASCII-only separators and CamelCase)
381 for (int i = 0; i < pi; i++)
382 {
383 int pos = matchpos[i];
384
385 if (pos == 0)
386 score += 30; // start of string
387 else
388 {
389 unsigned char prev = (unsigned char) candidate[pos - 1];
390 unsigned char curr = (unsigned char) candidate[pos];
391
392 // ASCII separator boundaries
393 if ((prev == '/') || (prev == '.') || (prev == '-') || (prev == '_'))
394 score += 15;
395 // ASCII CamelCase boundary (lowercase followed by uppercase)
396 else if (((prev >= 'a') && (prev <= 'z')) && ((curr >= 'A') && (curr <= 'Z')))
397 score += 10;
398 }
399 }
400
401 // Mild length penalty
402 score -= (int) strlen(candidate) / 4;
403
404 // Ensure valid matches always return non-negative score
405 if (score < 0)
406 score = 0;
407
408 if (out)
409 {
410 out->score = score;
411 out->span = span;
412 out->start = first;
413 out->end = last;
414 }
415
416 return score;
417}
bool candidate(struct CompletionData *cd, char *user, const char *src, char *dest, size_t dlen)
Helper function for completion.
Definition helpers.c:79
int max_pattern
Safety bound (<=0 = default 256, capped at 256)
Definition lib.h:91
bool prefer_prefix
Extra weight for prefix matches.
Definition lib.h:90
int score
Score (<0 = no match)
Definition lib.h:99
int span
Match span.
Definition lib.h:100
int start
First match position.
Definition lib.h:101
int end
Last match position.
Definition lib.h:102
static int utf8_char_len(const char *s)
Get length of a UTF-8 codepoint at a byte offset.
Definition subseq.c:183
static bool utf8_is_continuation(unsigned char c)
Check for UTF-8 continuation byte.
Definition subseq.c:171
static int lower_if(int c, bool fold)
Convert character to lowercase conditionally.
Definition subseq.c:160
static bool compute_case_mode(const char *pattern, const struct FuzzyOptions *opts)
Determine if case folding should be used.
Definition subseq.c:227
#define DEFAULT_MAX_PATTERN
Default maximum pattern length.
Definition subseq.c:133
+ Here is the call graph for this function:
+ Here is the caller graph for this function: