;;; GNU Guix --- Functional package management for GNU ;;; Copyright © 2019, 2021 Julien Lepiller <julien@lepiller.eu> ;;; Copyright © 2020 Ludovic Courtès <ludo@gnu.org> ;;; Copyright © 2023 Florian Pelz <pelzflorian@pelzflorian.de> ;;; ;;; This file is part of GNU Guix. ;;; ;;; GNU Guix is free software; you can redistribute it and/or modify it ;;; under the terms of the GNU General Public License as published by ;;; the Free Software Foundation; either version 3 of the License, or (at ;;; your option) any later version. ;;; ;;; GNU Guix is distributed in the hope that it will be useful, but ;;; WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;; GNU General Public License for more details. ;;; ;;; You should have received a copy of the GNU General Public License ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>. (define-module (guix build po) #:use-module (ice-9 match) #:use-module (ice-9 peg) #:use-module (ice-9 regex) #:use-module (ice-9 textual-ports) #:use-module (ice-9 vlist) #:use-module (srfi srfi-1) #:export (read-po-file translate-cross-references)) ;; A small parser for po files (define-peg-pattern po-file body (* (or entry whitespace))) (define-peg-pattern whitespace body (or " " "\t" "\n")) (define-peg-pattern comment-chr body (range #\space #\頋)) (define-peg-pattern comment none (and "#" (* comment-chr) "\n")) (define-peg-pattern flags all (and (ignore "#, ") (* comment-chr) (ignore "\n"))) (define-peg-pattern entry all (and (* (or flags comment (ignore (* whitespace)))) (ignore "msgid ") msgid (ignore (* whitespace)) (ignore "msgstr ") msgstr)) (define-peg-pattern escape body (or "\\\\" "\\\"" "\\n")) (define-peg-pattern str-chr body (or " " "!" (and (ignore "\\") "\"") "\\n" (and (ignore "\\") "\\") (range #\# #\頋))) (define-peg-pattern msgid all content) (define-peg-pattern msgstr all content) (define-peg-pattern content body (and (ignore "\"") (* str-chr) (ignore "\"") (? (and (ignore (* whitespace)) content)))) (define (interpret-newline-escape str) "Replace '\\n' sequences in STR with a newline character." (let loop ((str str) (result '())) (match (string-contains str "\\n") (#f (string-concatenate-reverse (cons str result))) (index (let ((prefix (string-take str index))) (loop (string-drop str (+ 2 index)) (append (list "\n" prefix) result))))))) (define (parse-tree->assoc parse-tree) "Converts a po PARSE-TREE to an association list, where the key is the msgid and the value is the msgstr. The result only contains non fuzzy strings." (define (comments->flags comments) (match comments (('flags flags) (map (lambda (flag) (string->symbol (string-trim-both flag #\space))) (string-split flags #\,))) ((? list? comments) (fold (lambda (comment res) (match comment ((? string? _) res) (flags (append (comments->flags flags) res)))) '() comments)))) (match parse-tree (() '()) ((entry . parse-tree) (match entry ((? string? entry) (parse-tree->assoc parse-tree)) ;; empty msgid (('entry ('msgid ('msgstr msgstr))) (parse-tree->assoc parse-tree)) ;; empty msgstr (('entry ('msgid msgid) 'msgstr) (parse-tree->assoc parse-tree)) (('entry _ ('msgid msgid) 'msgstr) (parse-tree->assoc parse-tree)) (('entry ('msgid msgid) ('msgstr msgstr)) (acons (interpret-newline-escape msgid) (interpret-newline-escape msgstr) (parse-tree->assoc parse-tree))) (('entry ('msgid msgid) ('msgstr msgstr)) (acons (interpret-newline-escape msgid) (interpret-newline-escape msgstr) (parse-tree->assoc parse-tree))) (('entry comments ('msgid msgid) ('msgstr msgstr)) (if (member 'fuzzy (comments->flags comments)) (parse-tree->assoc parse-tree) (acons (interpret-newline-escape msgid) (interpret-newline-escape msgstr) (parse-tree->assoc parse-tree)))))))) (define (read-po-file port) "Read a .po file from PORT and return an alist of msgid and msgstr." (let ((tree (peg:tree (match-pattern po-file (get-string-all port))))) (parse-tree->assoc tree))) (define (canonicalize-whitespace str) "Change whitespace (newlines, etc.) in STR to @code{#\\space}." (string-map (lambda (chr) (if (char-set-contains? char-set:whitespace chr) #\space chr)) str)) (define xref-regexp ;; Texinfo cross-reference regexp. (make-regexp (string-append "@(px|x)?ref\\{([^,}]+)(" "\\}" ;Match xref with one argument "|,[^,}]*\\}" ;or two arguments "|,[^,}]*,[^,}]*\\}" ;or three arguments ;; or with an *empty* fourth argument: "|,[^,}]*,[^,}]*, *," "|,[^,}]*,[^,}]*, *\\}" ")"))) (define (translate-cross-references texi pofile) "Translate the cross-references that appear in @var{texi}, the initial translation of a Texinfo file, using the msgid/msgstr pairs from @var{pofile}." (define translations (call-with-input-file pofile read-po-file)) (define content (call-with-input-file texi get-string-all)) (define matches (list-matches xref-regexp content)) (define translation-map (fold (match-lambda* (((msgid . str) result) (vhash-cons msgid str result))) vlist-null translations)) (define translated ;; Iterate over MATCHES and replace cross-references with their ;; translation found in TRANSLATION-MAP. (We can't use ;; 'substitute*' because matches can span multiple lines.) (let loop ((matches matches) (offset 0) (result '())) (match matches (() (string-concatenate-reverse (cons (string-drop content offset) result))) ((head . tail) (let ((prefix (match:substring head 1)) (ref (canonicalize-whitespace (match:substring head 2))) (rest (match:substring head 3))) (define translated (string-append "@" (or prefix "") "ref{" (match (vhash-assoc ref translation-map) (#f ref) ((_ . str) str)) (or rest ""))) (loop tail (match:end head) (append (list translated (string-take (string-drop content offset) (- (match:start head) offset))) result))))))) (format (current-error-port) "translated ~a cross-references in '~a'~%" (length matches) texi) (call-with-output-file texi (lambda (port) (display translated port))))