summaryrefslogtreecommitdiff
path: root/gnu/packages/language.scm
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/packages/language.scm')
-rw-r--r--gnu/packages/language.scm105
1 files changed, 104 insertions, 1 deletions
diff --git a/gnu/packages/language.scm b/gnu/packages/language.scm
index f7e3ea6cd6..208fab4f5b 100644
--- a/gnu/packages/language.scm
+++ b/gnu/packages/language.scm
@@ -4,7 +4,7 @@
;;; Copyright © 2018 Nikita <nikita@n0.is>
;;; Copyright © 2019 Alex Vong <alexvong1995@gmail.com>
;;; Copyright © 2020 Ricardo Wurmus <rekado@elephly.net>
-;;; Copyright © 2020 Julien Lepiller <julien@lepiller.eu>
+;;; Copyright © 2020, 2022 Julien Lepiller <julien@lepiller.eu>
;;; Copyright © 2022 Milran <milranmike@protonmail.com>
;;;
;;; This file is part of GNU Guix.
@@ -28,6 +28,7 @@
#:use-module (gnu packages autotools)
#:use-module (gnu packages audio)
#:use-module (gnu packages base)
+ #:use-module (gnu packages compression)
#:use-module (gnu packages docbook)
#:use-module (gnu packages emacs)
#:use-module (gnu packages freedesktop)
@@ -58,6 +59,7 @@
#:use-module (gnu packages xorg)
#:use-module (guix packages)
#:use-module (guix build-system cmake)
+ #:use-module (guix build-system copy)
#:use-module (guix build-system glib-or-gtk)
#:use-module (guix build-system gnu)
#:use-module (guix build-system perl)
@@ -928,3 +930,104 @@ and manipulation.")
(description
"libskk is a library to deal with Japanese kana-to-kanji conversion method.")
(license license:gpl3+)))
+
+(define-public mecab
+ (package
+ (name "mecab")
+ (version "0.996")
+ (source (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/taku910/mecab")
+ ;; latest commit
+ (commit "046fa78b2ed56fbd4fac312040f6d62fc1bc31e3")))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "1hdv7rgn8j0ym9gsbigydwrbxa8cx2fb0qngg1ya15vvbw0lk4aa"))
+ (patches
+ (search-patches
+ "mecab-variable-param.patch"))))
+ (build-system gnu-build-system)
+ (native-search-paths
+ (list (search-path-specification
+ (variable "MECAB_DICDIR")
+ (separator #f)
+ (files '("lib/mecab/dic")))))
+ (arguments
+ `(#:phases
+ (modify-phases %standard-phases
+ (add-after 'unpack 'chdir
+ (lambda _
+ (chdir "mecab")))
+ (add-before 'build 'add-mecab-dicdir-variable
+ (lambda _
+ (substitute* "mecabrc.in"
+ (("dicdir = .*")
+ "dicdir = $MECAB_DICDIR"))
+ (substitute* "mecab-config.in"
+ (("echo @libdir@/mecab/dic")
+ "if [ -z \"$MECAB_DICDIR\" ]; then
+ echo @libdir@/mecab/dic
+else
+ echo \"$MECAB_DICDIR\"
+fi")))))))
+ (inputs (list libiconv))
+ (home-page "https://taku910.github.io/mecab")
+ (synopsis "Morphological analysis engine for texts")
+ (description "Mecab is a morphological analysis engine developped as a
+collaboration between the Kyoto university and Nippon Telegraph and Telephone
+Corporation. The engine is independent of any language, dictionary or corpus.")
+ (license (list license:gpl2+ license:lgpl2.1+ license:bsd-3))))
+
+(define-public mecab-ipadic
+ (package
+ (name "mecab-ipadic")
+ (version "2.7.0")
+ (source (package-source mecab))
+ (build-system gnu-build-system)
+ (arguments
+ `(#:configure-flags
+ (list (string-append "--with-dicdir=" (assoc-ref %outputs "out")
+ "/lib/mecab/dic")
+ "--with-charset=utf8")
+ #:phases
+ (modify-phases %standard-phases
+ (add-after 'unpack 'chdir
+ (lambda _
+ (chdir "mecab-ipadic")))
+ (add-before 'configure 'set-mecab-dir
+ (lambda* (#:key outputs #:allow-other-keys)
+ (setenv "MECAB_DICDIR" (string-append (assoc-ref outputs "out")
+ "/lib/mecab/dic")))))))
+ (native-inputs (list mecab)); for mecab-config
+ (home-page "https://taku910.github.io/mecab")
+ (synopsis "Dictionary data for MeCab")
+ (description "This package contains dictionnary data derived from
+ipadic for use with MeCab.")
+ (license (license:non-copyleft "mecab-ipadic/COPYING"))))
+
+(define-public mecab-unidic
+ (package
+ (name "mecab-unidic")
+ (version "3.1.0")
+ (source (origin
+ (method url-fetch)
+ (uri (string-append "https://clrd.ninjal.ac.jp/unidic_archive/cwj/"
+ version "/unidic-cwj-" version ".zip"))
+ (sha256
+ (base32
+ "1z132p2q3bgchiw529j2d7dari21kn0fhkgrj3vcl0ncg2m521il"))))
+ (build-system copy-build-system)
+ (arguments
+ `(#:install-plan
+ '(("." "lib/mecab/dic"
+ #:include-regexp ("\\.bin$" "\\.def$" "\\.dic$" "dicrc")))))
+ (native-inputs (list unzip))
+ (home-page "https://clrd.ninjal.ac.jp/unidic/en/")
+ (synopsis "Dictionary data for MeCab")
+ (description "UniDic for morphological analysis is a dictionary for
+analysis with the morphological analyser MeCab, where the short units exported
+from the database are used as entries (heading terms).")
+ ;; triple-licensed (at the user’s choice)
+ (license (list license:gpl2+ license:lgpl2.1 license:bsd-3))))