diff options
author | zimoun <zimon.toutoune@gmail.com> | 2021-01-19 22:28:08 +0100 |
---|---|---|
committer | Ludovic Courtès <ludo@gnu.org> | 2021-02-03 12:41:28 +0100 |
commit | 9505b54a4f9f0265c9d8be53763f0c59d6f62a44 (patch) | |
tree | ac6e41db124f60baa4ddae929f407cd343fdb8a8 /guix | |
parent | e55f1ac7773841838f82ac10f3bf8cde514ca4dd (diff) |
utils: Add string distance.
* guix/utils.scm (string-distance): New procedure.
(string-closest): New procedure.
* tests/utils.scm ("string-distance", "string-closest"): New tests.
Signed-off-by: Ludovic Courtès <ludo@gnu.org>
Diffstat (limited to 'guix')
-rw-r--r-- | guix/utils.scm | 47 |
1 files changed, 46 insertions, 1 deletions
diff --git a/guix/utils.scm b/guix/utils.scm index a85e2f495c..1625cab19b 100644 --- a/guix/utils.scm +++ b/guix/utils.scm @@ -8,6 +8,7 @@ ;;; Copyright © 2017 Mathieu Othacehe <m.othacehe@gmail.com> ;;; Copyright © 2018, 2020 Marius Bakke <marius@gnu.org> ;;; Copyright © 2020 Efraim Flashner <efraim@flashner.co.il> +;;; Copyright © 2021 Simon Tournier <zimon.toutoune@gmail.com> ;;; ;;; This file is part of GNU Guix. ;;; @@ -37,6 +38,7 @@ #:use-module (guix memoization) #:use-module ((guix build utils) #:select (dump-port mkdir-p delete-file-recursively)) #:use-module ((guix build syscalls) #:select (mkdtemp! fdatasync)) + #:use-module ((guix combinators) #:select (fold2)) #:use-module (guix diagnostics) ;<location>, &error-location, etc. #:use-module (ice-9 format) #:use-module (ice-9 regex) @@ -115,7 +117,10 @@ call-with-decompressed-port compressed-output-port call-with-compressed-output-port - canonical-newline-port)) + canonical-newline-port + + string-distance + string-closest)) ;;; @@ -880,6 +885,46 @@ be determined." ;; raising an error would upset Geiser users #f)))))) + +;;; +;;; String comparison. +;;; + +(define (string-distance s1 s2) + "Compute the Levenshtein distance between two strings." + ;; Naive implemenation + (define loop + (mlambda (as bt) + (match as + (() (length bt)) + ((a s ...) + (match bt + (() (length as)) + ((b t ...) + (if (char=? a b) + (loop s t) + (1+ (min + (loop as t) + (loop s bt) + (loop s t)))))))))) + + (let ((c1 (string->list s1)) + (c2 (string->list s2))) + (loop c1 c2))) + +(define* (string-closest trial tests #:key (threshold 3)) + "Return the string from TESTS that is the closest from the TRIAL, +according to 'string-distance'. If the TESTS are too far from TRIAL, +according to THRESHOLD, then #f is returned." + (identity ;discard second return value + (fold2 (lambda (test closest minimal) + (let ((dist (string-distance trial test))) + (if (and (< dist minimal) (< dist threshold)) + (values test dist) + (values closest minimal)))) + #f +inf.0 + tests))) + ;;; Local Variables: ;;; eval: (put 'call-with-progress-reporter 'scheme-indent-function 1) ;;; End: |