diff options
author | zimoun <zimon.toutoune@gmail.com> | 2021-01-19 22:28:08 +0100 |
---|---|---|
committer | Ludovic Courtès <ludo@gnu.org> | 2021-02-03 12:41:28 +0100 |
commit | 9505b54a4f9f0265c9d8be53763f0c59d6f62a44 (patch) | |
tree | ac6e41db124f60baa4ddae929f407cd343fdb8a8 | |
parent | e55f1ac7773841838f82ac10f3bf8cde514ca4dd (diff) |
utils: Add string distance.
* guix/utils.scm (string-distance): New procedure.
(string-closest): New procedure.
* tests/utils.scm ("string-distance", "string-closest"): New tests.
Signed-off-by: Ludovic Courtès <ludo@gnu.org>
-rw-r--r-- | guix/utils.scm | 47 | ||||
-rw-r--r-- | tests/utils.scm | 18 |
2 files changed, 64 insertions, 1 deletions
diff --git a/guix/utils.scm b/guix/utils.scm index a85e2f495c..1625cab19b 100644 --- a/guix/utils.scm +++ b/guix/utils.scm @@ -8,6 +8,7 @@ ;;; Copyright © 2017 Mathieu Othacehe <m.othacehe@gmail.com> ;;; Copyright © 2018, 2020 Marius Bakke <marius@gnu.org> ;;; Copyright © 2020 Efraim Flashner <efraim@flashner.co.il> +;;; Copyright © 2021 Simon Tournier <zimon.toutoune@gmail.com> ;;; ;;; This file is part of GNU Guix. ;;; @@ -37,6 +38,7 @@ #:use-module (guix memoization) #:use-module ((guix build utils) #:select (dump-port mkdir-p delete-file-recursively)) #:use-module ((guix build syscalls) #:select (mkdtemp! fdatasync)) + #:use-module ((guix combinators) #:select (fold2)) #:use-module (guix diagnostics) ;<location>, &error-location, etc. #:use-module (ice-9 format) #:use-module (ice-9 regex) @@ -115,7 +117,10 @@ call-with-decompressed-port compressed-output-port call-with-compressed-output-port - canonical-newline-port)) + canonical-newline-port + + string-distance + string-closest)) ;;; @@ -880,6 +885,46 @@ be determined." ;; raising an error would upset Geiser users #f)))))) + +;;; +;;; String comparison. +;;; + +(define (string-distance s1 s2) + "Compute the Levenshtein distance between two strings." + ;; Naive implemenation + (define loop + (mlambda (as bt) + (match as + (() (length bt)) + ((a s ...) + (match bt + (() (length as)) + ((b t ...) + (if (char=? a b) + (loop s t) + (1+ (min + (loop as t) + (loop s bt) + (loop s t)))))))))) + + (let ((c1 (string->list s1)) + (c2 (string->list s2))) + (loop c1 c2))) + +(define* (string-closest trial tests #:key (threshold 3)) + "Return the string from TESTS that is the closest from the TRIAL, +according to 'string-distance'. If the TESTS are too far from TRIAL, +according to THRESHOLD, then #f is returned." + (identity ;discard second return value + (fold2 (lambda (test closest minimal) + (let ((dist (string-distance trial test))) + (if (and (< dist minimal) (< dist threshold)) + (values test dist) + (values closest minimal)))) + #f +inf.0 + tests))) + ;;; Local Variables: ;;; eval: (put 'call-with-progress-reporter 'scheme-indent-function 1) ;;; End: diff --git a/tests/utils.scm b/tests/utils.scm index 62ec7e8b4c..7fcbb25552 100644 --- a/tests/utils.scm +++ b/tests/utils.scm @@ -2,6 +2,7 @@ ;;; Copyright © 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021 Ludovic Courtès <ludo@gnu.org> ;;; Copyright © 2014 Eric Bavier <bavier@member.fsf.org> ;;; Copyright © 2016 Mathieu Lirzin <mthl@gnu.org> +;;; Copyright © 2021 Simon Tournier <zimon.toutoune@gmail.com> ;;; ;;; This file is part of GNU Guix. ;;; @@ -271,6 +272,23 @@ skip these tests." string-reverse) (call-with-input-file temp-file get-string-all))) +(test-equal "string-distance" + '(0 1 1 5 5) + (list + (string-distance "hello" "hello") + (string-distance "hello" "helo") + (string-distance "helo" "hello") + (string-distance "" "hello") + (string-distance "hello" ""))) + +(test-equal "string-closest" + '("hello" "hello" "helo" #f) + (list + (string-closest "hello" '("hello")) + (string-closest "hello" '("helo" "hello" "halo")) + (string-closest "hello" '("kikoo" "helo" "hihihi" "halo")) + (string-closest "hello" '("aaaaa" "12345" "hellohello" "h")))) + (test-end) (false-if-exception (delete-file temp-file)) |