dev-python/nltk: Bump to 3.4.5, enable tests
authorMichał Górny <mgorny@gentoo.org>
Thu, 12 Mar 2020 18:31:09 +0000 (19:31 +0100)
committerMichał Górny <mgorny@gentoo.org>
Fri, 13 Mar 2020 09:55:56 +0000 (10:55 +0100)
Signed-off-by: Michał Górny <mgorny@gentoo.org>
dev-python/nltk/Manifest
dev-python/nltk/files/nltk-3.4.5-corpus-tests.patch [new file with mode: 0644]
dev-python/nltk/nltk-3.4.5.ebuild [new file with mode: 0644]

index 8d0786224485306affa2175f937390fe07e49d95..79d80c3f67c2100b173b063592f52477d00cbd15 100644 (file)
@@ -1,2 +1,3 @@
 DIST nltk-3.2.3.tar.gz 1166282 BLAKE2B 803dc1a8ec80a17538c2164d5ee0eca201be2307bbf3e08093a55cee93e9a539d8b48ea42c8ad84b540922355bf0b151ae04338d8505378a89173159d7464b76 SHA512 2e5c0899c29f61166b2e161e62e81fd35e045bda979fbfa662bf9ad1349be4aae6aabe4b9de34905e2abbc05b06a096480829fd66638efa35cd5c866f76c62e8
 DIST nltk-3.2.5.tar.gz 1188856 BLAKE2B 316e5955d2f90c351673ee9d3f0cc159babbcae08104518db62e796b422dcbf51668001b06b5d5dc399613d4db0ed39ff7dd15ad15d6f6c64e10c73a4d53bc06 SHA512 967b209ce3dde5296dafac16b91bff3894ff1a3f073c76974fd9193912597c82d2d1c50d4419052da77ab66f798f230f725b83f31368a95cacf94d46e45055de
+DIST nltk-3.4.5.tar.gz 2821481 BLAKE2B 9d3d6df9a951b4dfbfec335386ba0fb13c484c74a066141ebdfdb541fbec421dd72163d4028e239abd58f95a73e0eef5861574fa9955e4ee11f2c4598c895f59 SHA512 0ca24cd2154e47a8c50fe8e2426fa01c80904ece4100b07cdb4e5835e423e31968e96f0e750e1936f1e758a608596c560aaaee05b844b334f6179249db05f969
diff --git a/dev-python/nltk/files/nltk-3.4.5-corpus-tests.patch b/dev-python/nltk/files/nltk-3.4.5-corpus-tests.patch
new file mode 100644 (file)
index 0000000..0284aec
--- /dev/null
@@ -0,0 +1,37 @@
+diff --git a/nltk/test/corpus.doctest b/nltk/test/corpus.doctest
+index 5509fe2fd..0a34c9522 100644
+--- a/nltk/test/corpus.doctest
++++ b/nltk/test/corpus.doctest
+@@ -94,7 +94,7 @@ If the reader methods are called without any arguments, they will
+ typically load all documents in the corpus.
+     >>> len(inaugural.words())
+-    145735
++    149797
+ If a corpus contains a README file, it can be accessed with a ``readme()`` method:
+@@ -387,8 +387,8 @@ examples illustrate the use of the wordlist corpora:
+     >>> stopwords.fileids() # doctest: +ELLIPSIS
+     ['arabic', 'azerbaijani', 'danish', 'dutch', 'english', 'finnish', 'french', ...]
+-    >>> stopwords.words('portuguese') # doctest: +ELLIPSIS
+-    ['de', 'a', 'o', 'que', 'e', 'do', 'da', 'em', 'um', 'para', ...]
++    >>> sorted(stopwords.words('portuguese')) # doctest: +ELLIPSIS
++    ['a', 'ao', 'aos', 'aquela', 'aquelas', 'aquele', 'aqueles', ...]
+     >>> names.fileids()
+     ['female.txt', 'male.txt']
+     >>> names.words('male.txt') # doctest: +ELLIPSIS
+diff --git a/nltk/test/unit/test_wordnet.py b/nltk/test/unit/test_wordnet.py
+index a7b26ac91..a1df3d396 100644
+--- a/nltk/test/unit/test_wordnet.py
++++ b/nltk/test/unit/test_wordnet.py
+@@ -204,7 +204,7 @@ class WordnNetDemo(unittest.TestCase):
+             u'preobrat',
+             u'preobrat_v_mišljenju'
+             ]
+-        self.assertEqual(S('about-face.n.02').lemma_names(lang='slv'), expected)
++        self.assertEqual(sorted(S('about-face.n.02').lemma_names(lang='slv')), sorted(expected))
+     def test_iterable_type_for_all_lemma_names(self):
+         # Duck-test for iterables.
diff --git a/dev-python/nltk/nltk-3.4.5.ebuild b/dev-python/nltk/nltk-3.4.5.ebuild
new file mode 100644 (file)
index 0000000..b3165b5
--- /dev/null
@@ -0,0 +1,58 @@
+# Copyright 1999-2020 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+EAPI=7
+
+PYTHON_COMPAT=( python3_6 )
+PYTHON_REQ_USE="sqlite,tk?,xml(+)"
+
+inherit distutils-r1
+
+DESCRIPTION="Natural Language Toolkit"
+HOMEPAGE="https://www.nltk.org/ https://github.com/nltk/nltk/"
+SRC_URI="https://github.com/nltk/nltk/archive/${PV}.tar.gz -> ${P}.tar.gz"
+
+LICENSE="Apache-2.0"
+SLOT="0"
+KEYWORDS="~amd64 ~x86 ~amd64-linux ~x86-linux ~ppc-macos ~x64-macos ~x86-macos ~x86-solaris"
+IUSE="tk"
+
+RDEPEND="dev-python/six[${PYTHON_USEDEP}]"
+BDEPEND="
+       test? (
+               dev-python/nltk-data
+               dev-python/numpy[${PYTHON_USEDEP}]
+               dev-python/pyparsing[${PYTHON_USEDEP}]
+               dev-python/twython[${PYTHON_USEDEP}]
+               sci-libs/scikits_learn[${PYTHON_USEDEP}]
+               sci-libs/scipy[${PYTHON_USEDEP}]
+       )"
+PDEPEND="dev-python/nltk-data"
+
+distutils_enable_tests nose
+
+PATCHES=(
+       "${FILESDIR}"/nltk-3.4.5-corpus-tests.patch
+)
+
+src_prepare() {
+       # requires unpackaged pycrfsuite
+       sed -i -e '/>>>/s@$@ # doctest: +SKIP@' nltk/tag/crf.py || die
+       # replace fetching from network with duplicate file URL
+       sed -e 's@https://raw.githubusercontent.com/nltk/nltk/develop/nltk/test/toy.cfg@nltk:grammars/sample_grammars/toy.cfg@' \
+               -i nltk/test/data.doctest || die
+       # requires X and hangs in Xvfb
+       sed -e 's:test_plot:_&:' \
+               -i nltk/test/unit/test_cfd_mutation.py || die
+
+       distutils-r1_src_prepare
+}
+
+src_test() {
+       cd nltk/test || die
+       distutils-r1_src_test
+}
+
+python_test() {
+       "${EPYTHON}" runtests.py -v || die "Tests failed with ${EPYTHON}"
+}