From 485ac28235bf8775914b905060266b09629c1bac Mon Sep 17 00:00:00 2001 From: Mark H Weaver Date: Thu, 7 Jan 2021 15:15:24 -0500 Subject: utils: Allow text substitution even in the presence of NUL characters. Fixes . Before this change, the presence of a NUL character on a line meant that the (glibc) regexp engine used by Guile would either 1. stop scanning the string or 2. crash with the error "string contains #\\nul character", depending on the locale used. This change works around this limitation by first replacing the NUL character by an unused Unicode code point, doing the substitution, then reverting the replacement. * guix/build/utils.scm (unused-private-use-code-point) (replace-char): New procedures. (substitute): Make use of the above procedures to work around the NUL character regexp engine limitation. * tests/build-utils.scm: Add tests. Co-authored-by: Maxim Cournoyer Signed-off-by: Maxim Cournoyer --- tests/build-utils.scm | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'tests') diff --git a/tests/build-utils.scm b/tests/build-utils.scm index 654b480ed9..31be7ff80f 100644 --- a/tests/build-utils.scm +++ b/tests/build-utils.scm @@ -1,6 +1,7 @@ ;;; GNU Guix --- Functional package management for GNU ;;; Copyright © 2012, 2015, 2016, 2019, 2020 Ludovic Courtès ;;; Copyright © 2019 Ricardo Wurmus +;;; Copyright © 2021 Maxim Cournoyer ;;; ;;; This file is part of GNU Guix. ;;; @@ -18,7 +19,7 @@ ;;; along with GNU Guix. If not, see . -(define-module (test-build-utils) +(define-module (test build-utils) #:use-module (guix tests) #:use-module (guix build utils) #:use-module ((guix utils) @@ -241,4 +242,25 @@ print('hello world')")) "/some/other/path"))) #f))))) +(test-equal "substitute*, text contains a NUL byte, UTF-8" + "c\0d" + (with-fluids ((%default-port-encoding "UTF-8") + (%default-port-conversion-strategy 'error)) + ;; The GNU libc is locale sensitive. Depending on the value of LANG, the + ;; test could fail with "string contains #\\nul character: ~S" or "cannot + ;; convert wide string to output locale". + (setlocale LC_ALL "en_US.UTF-8") + (call-with-temporary-output-file + (lambda (file port) + (format port "a\0b") + (flush-output-port port) + + (substitute* file + (("a") "c") + (("b") "d")) + + (with-input-from-file file + (lambda _ + (get-string-all (current-input-port)))))))) + (test-end) -- cgit v1.2.3