summaryrefslogtreecommitdiff
path: root/gnu
diff options
context:
space:
mode:
authorJulien Lepiller <julien@lepiller.eu>2023-04-01 16:13:13 +0200
committerJulien Lepiller <julien@lepiller.eu>2023-04-01 16:37:46 +0200
commit3ab24ba216ce91210b93ec61554b3343fbc3aaab (patch)
tree4c31577889873df4ef9f274e59029992eeeaa7f1 /gnu
parent47ea688fd27d0ce0c8ea5481f1f94d0ebc3e37eb (diff)
gnu: Add mecab.
* gnu/packages/language.scm (mecab): New variable. * gnu/packages/patches/mecab-variable-param.patch: New file. * gnu/local.mk (dist_patch_DATA): Add it.
Diffstat (limited to 'gnu')
-rw-r--r--gnu/local.mk1
-rw-r--r--gnu/packages/language.scm51
-rw-r--r--gnu/packages/patches/mecab-variable-param.patch30
3 files changed, 81 insertions, 1 deletions
diff --git a/gnu/local.mk b/gnu/local.mk
index 3a93ab50dd..47fe4daaff 100644
--- a/gnu/local.mk
+++ b/gnu/local.mk
@@ -1540,6 +1540,7 @@ dist_patch_DATA = \
%D%/packages/patches/libmemcached-build-with-gcc7.patch \
%D%/packages/patches/libmhash-hmac-fix-uaf.patch \
%D%/packages/patches/libsigrokdecode-python3.9-fix.patch \
+ %D%/packages/patches/mecab-variable-param.patch \
%D%/packages/patches/memtest86+-build-reproducibly.patch \
%D%/packages/patches/mercurial-hg-extension-path.patch \
%D%/packages/patches/mercurial-openssl-compat.patch \
diff --git a/gnu/packages/language.scm b/gnu/packages/language.scm
index f7e3ea6cd6..5dda0c1eeb 100644
--- a/gnu/packages/language.scm
+++ b/gnu/packages/language.scm
@@ -4,7 +4,7 @@
;;; Copyright © 2018 Nikita <nikita@n0.is>
;;; Copyright © 2019 Alex Vong <alexvong1995@gmail.com>
;;; Copyright © 2020 Ricardo Wurmus <rekado@elephly.net>
-;;; Copyright © 2020 Julien Lepiller <julien@lepiller.eu>
+;;; Copyright © 2020, 2022 Julien Lepiller <julien@lepiller.eu>
;;; Copyright © 2022 Milran <milranmike@protonmail.com>
;;;
;;; This file is part of GNU Guix.
@@ -928,3 +928,52 @@ and manipulation.")
(description
"libskk is a library to deal with Japanese kana-to-kanji conversion method.")
(license license:gpl3+)))
+
+(define-public mecab
+ (package
+ (name "mecab")
+ (version "0.996")
+ (source (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/taku910/mecab")
+ ;; latest commit
+ (commit "046fa78b2ed56fbd4fac312040f6d62fc1bc31e3")))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "1hdv7rgn8j0ym9gsbigydwrbxa8cx2fb0qngg1ya15vvbw0lk4aa"))
+ (patches
+ (search-patches
+ "mecab-variable-param.patch"))))
+ (build-system gnu-build-system)
+ (native-search-paths
+ (list (search-path-specification
+ (variable "MECAB_DICDIR")
+ (separator #f)
+ (files '("lib/mecab/dic")))))
+ (arguments
+ `(#:phases
+ (modify-phases %standard-phases
+ (add-after 'unpack 'chdir
+ (lambda _
+ (chdir "mecab")))
+ (add-before 'build 'add-mecab-dicdir-variable
+ (lambda _
+ (substitute* "mecabrc.in"
+ (("dicdir = .*")
+ "dicdir = $MECAB_DICDIR"))
+ (substitute* "mecab-config.in"
+ (("echo @libdir@/mecab/dic")
+ "if [ -z \"$MECAB_DICDIR\" ]; then
+ echo @libdir@/mecab/dic
+else
+ echo \"$MECAB_DICDIR\"
+fi")))))))
+ (inputs (list libiconv))
+ (home-page "https://taku910.github.io/mecab")
+ (synopsis "Morphological analysis engine for texts")
+ (description "Mecab is a morphological analysis engine developped as a
+collaboration between the Kyoto university and Nippon Telegraph and Telephone
+Corporation. The engine is independent of any language, dictionary or corpus.")
+ (license (list license:gpl2+ license:lgpl2.1+ license:bsd-3))))
diff --git a/gnu/packages/patches/mecab-variable-param.patch b/gnu/packages/patches/mecab-variable-param.patch
new file mode 100644
index 0000000000..4457cf3f44
--- /dev/null
+++ b/gnu/packages/patches/mecab-variable-param.patch
@@ -0,0 +1,30 @@
+From 2396e90056706ef897acab3aaa081289c7336483 Mon Sep 17 00:00:00 2001
+From: LEPILLER Julien <julien.lepiller@irisa.fr>
+Date: Fri, 19 Apr 2019 11:48:39 +0200
+Subject: [PATCH] Allow variable parameters
+
+---
+ mecab/src/param.cpp | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/mecab/src/param.cpp b/mecab/src/param.cpp
+index 65328a2..006b1b5 100644
+--- a/mecab/src/param.cpp
++++ b/mecab/src/param.cpp
+@@ -79,8 +79,12 @@ bool Param::load(const char *filename) {
+ size_t s1, s2;
+ for (s1 = pos+1; s1 < line.size() && isspace(line[s1]); s1++);
+ for (s2 = pos-1; static_cast<long>(s2) >= 0 && isspace(line[s2]); s2--);
+- const std::string value = line.substr(s1, line.size() - s1);
++ std::string value = line.substr(s1, line.size() - s1);
+ const std::string key = line.substr(0, s2 + 1);
++
++ if(value.find('$') == 0) {
++ value = std::getenv(value.substr(1).c_str());
++ }
+ set<std::string>(key.c_str(), value, false);
+ }
+
+--
+2.20.1
+