summaryrefslogtreecommitdiff
path: root/gnu/packages/textutils.scm
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/packages/textutils.scm')
-rw-r--r--gnu/packages/textutils.scm63
1 files changed, 63 insertions, 0 deletions
diff --git a/gnu/packages/textutils.scm b/gnu/packages/textutils.scm
index 6e07553e95..330e10610d 100644
--- a/gnu/packages/textutils.scm
+++ b/gnu/packages/textutils.scm
@@ -45,6 +45,7 @@
#:use-module (guix build-system python)
#:use-module (gnu packages)
#:use-module (gnu packages autotools)
+ #:use-module (gnu packages base)
#:use-module (gnu packages compression)
#:use-module (gnu packages gettext)
#:use-module (gnu packages java)
@@ -365,6 +366,68 @@ useful when it is desired to reformat numbers.
@end itemize")
(license license:gpl3)))
+(define-public uniutils
+ (package
+ (name "uniutils")
+ (version "2.27")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (string-append "http://billposer.org/Software/Downloads/"
+ "uniutils-" version ".tar.bz2"))
+ (sha256
+ (base32 "19w1510w87gx7n4qy3zsb0m467a4rn5scvh4ajajg7jh6x5xri08"))))
+ (build-system gnu-build-system)
+ (arguments
+ '(#:configure-flags '("--disable-dependency-tracking")
+ #:phases
+ (modify-phases %standard-phases
+ (add-after 'build 'fix-paths
+ (lambda* (#:key outputs inputs #:allow-other-keys)
+ (let ((out (assoc-ref outputs "out"))
+ (a2b (assoc-ref inputs "ascii2binary"))
+ (iconv (assoc-ref inputs "libiconv")))
+ (substitute* "utf8lookup"
+ (("^ascii2binary ") (string-append a2b "/bin/ascii2binary "))
+ (("^uniname ") (string-append out "/bin/uniname "))
+ (("^iconv ") (string-append iconv "/bin/iconv ")))
+ #t))))))
+ (inputs
+ `(("ascii2binary" ,ascii2binary)
+ ("libiconv" ,libiconv)))
+ (home-page "https://billposer.org/Software/unidesc.html")
+ (synopsis "Find out what is in a Unicode file")
+ (description "Useful tools when working with Unicode files when one
+doesn't know the writing system, doesn't have the necessary font, needs to
+inspect invisible characters, needs to find out whether characters have been
+combined or in what order they occur, or needs statistics on which characters
+occur.
+
+@itemize
+
+@item @command{uniname} defaults to printing the character offset of each
+character, its byte offset, its hex code value, its encoding, the glyph
+itself, and its name. It may also be used to validate UTF-8 input.
+
+@item @command{unidesc} reports the character ranges to which different
+portions of the text belong. It can also be used to identify Unicode encodings
+(e.g. UTF-16be) flagged by magic numbers.
+
+@item @command{unihist} generates a histogram of the characters in its input.
+
+@item @command{ExplicateUTF8} is intended for debugging or for learning about
+Unicode. It determines and explains the validity of a sequence of bytes as a
+UTF8 encoding.
+
+@item @command{utf8lookup} provides a handy way to look up Unicode characters
+from the command line.
+
+@item @command{unireverse} reverse each line of UTF-8 input
+character-by-character.
+
+@end itemize")
+ (license license:gpl3)))
+
(define-public libconfig
(package
(name "libconfig")