Files
hate_crack/PACK/enchant/tests.py
2018-01-27 13:38:56 -05:00

617 lines
23 KiB
Python

# pyenchant
#
# Copyright (C) 2004-2009, Ryan Kelly
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPsE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the
# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
# Boston, MA 02111-1307, USA.
#
# In addition, as a special exception, you are
# given permission to link the code of this program with
# non-LGPL Spelling Provider libraries (eg: a MSFT Office
# spell checker backend) and distribute linked combinations including
# the two. You must obey the GNU Lesser General Public License in all
# respects for all of the code used other than said providers. If you modify
# this file, you may extend this exception to your version of the
# file, but you are not obligated to do so. If you do not wish to
# do so, delete this exception statement from your version.
#
"""
enchant.tests: testcases for pyenchant
"""
import os
import sys
import unittest
import pickle
try:
import subprocess
except ImportError:
subprocess = None
import enchant
from enchant import *
from enchant import _enchant as _e
from enchant.utils import unicode, raw_unicode, printf, trim_suggestions
def runcmd(cmd):
if subprocess is not None:
kwds = dict(stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
p = subprocess.Popen(cmd, **kwds)
(stdout, stderr) = p.communicate()
if p.returncode:
if sys.version_info[0] >= 3:
stderr = stderr.decode(sys.getdefaultencoding(), "replace")
sys.stderr.write(stderr)
return p.returncode
else:
return os.system(cmd)
class TestBroker(unittest.TestCase):
"""Test cases for the proper functioning of Broker objects.
These tests assume that there is at least one working provider
with a dictionary for the "en_US" language.
"""
def setUp(self):
self.broker = Broker()
def tearDown(self):
del self.broker
def test_HasENUS(self):
"""Test that the en_US language is available."""
self.assertTrue(self.broker.dict_exists("en_US"))
def test_LangsAreAvail(self):
"""Test whether all advertised languages are in fact available."""
for lang in self.broker.list_languages():
if not self.broker.dict_exists(lang):
assert False, "language '" + lang + "' advertised but non-existent"
def test_ProvsAreAvail(self):
"""Test whether all advertised providers are in fact available."""
for (lang, prov) in self.broker.list_dicts():
self.assertTrue(self.broker.dict_exists(lang))
if not self.broker.dict_exists(lang):
assert False, "language '" + lang + "' advertised but non-existent"
if prov not in self.broker.describe():
assert False, "provier '" + str(prov) + "' advertised but non-existent"
def test_ProvOrdering(self):
"""Test that provider ordering works correctly."""
langs = {}
provs = []
# Find the providers for each language, and a list of all providers
for (tag, prov) in self.broker.list_dicts():
# Skip hyphenation dictionaries installed by OOo
if tag.startswith("hyph_") and prov.name == "myspell":
continue
# Canonicalize separators
tag = tag.replace("-", "_")
langs[tag] = []
# NOTE: we are excluding Zemberek here as it appears to return
# a broker for any language, even nonexistent ones
if prov not in provs and prov.name != "zemberek":
provs.append(prov)
for prov in provs:
for tag in langs:
b2 = Broker()
b2.set_ordering(tag, prov.name)
try:
d = b2.request_dict(tag)
if d.provider != prov:
raise ValueError()
langs[tag].append(prov)
except:
pass
# Check availability using a single entry in ordering
for tag in langs:
for prov in langs[tag]:
b2 = Broker()
b2.set_ordering(tag, prov.name)
d = b2.request_dict(tag)
self.assertEqual((d.provider, tag), (prov, tag))
del d
del b2
# Place providers that dont have the language in the ordering
for tag in langs:
for prov in langs[tag]:
order = prov.name
for prov2 in provs:
if prov2 not in langs[tag]:
order = prov2.name + "," + order
b2 = Broker()
b2.set_ordering(tag, order)
d = b2.request_dict(tag)
self.assertEqual((d.provider, tag, order), (prov, tag, order))
del d
del b2
def test_UnicodeTag(self):
"""Test that unicode language tags are accepted"""
d1 = self.broker._request_dict_data(raw_unicode("en_US"))
self.assertTrue(d1)
_e.broker_free_dict(self.broker._this, d1)
d1 = Dict(raw_unicode("en_US"))
self.assertTrue(d1)
def test_GetSetParam(self):
try:
self.broker.get_param("pyenchant.unittest")
except AttributeError:
return
self.assertEqual(self.broker.get_param("pyenchant.unittest"), None)
self.broker.set_param("pyenchant.unittest", "testing")
self.assertEqual(self.broker.get_param("pyenchant.unittest"), "testing")
self.assertEqual(Broker().get_param("pyenchant.unittest"), None)
class TestDict(unittest.TestCase):
"""Test cases for the proper functioning of Dict objects.
These tests assume that there is at least one working provider
with a dictionary for the "en_US" language.
"""
def setUp(self):
self.dict = Dict("en_US")
def tearDown(self):
del self.dict
def test_HasENUS(self):
"""Test that the en_US language is available through default broker."""
self.assertTrue(dict_exists("en_US"))
def test_check(self):
"""Test that check() works on some common words."""
self.assertTrue(self.dict.check("hello"))
self.assertTrue(self.dict.check("test"))
self.assertFalse(self.dict.check("helo"))
self.assertFalse(self.dict.check("testt"))
def test_broker(self):
"""Test that the dict's broker is set correctly."""
self.assertTrue(self.dict._broker is enchant._broker)
def test_tag(self):
"""Test that the dict's tag is set correctly."""
self.assertEqual(self.dict.tag, "en_US")
def test_suggest(self):
"""Test that suggest() gets simple suggestions right."""
self.assertTrue(self.dict.check("hello"))
self.assertTrue("hello" in self.dict.suggest("helo"))
def test_suggestHang1(self):
"""Test whether suggest() hangs on some inputs (Bug #1404196)"""
self.assertTrue(len(self.dict.suggest("Thiis")) >= 0)
self.assertTrue(len(self.dict.suggest("Thiiis")) >= 0)
self.assertTrue(len(self.dict.suggest("Thiiiis")) >= 0)
def test_unicode1(self):
"""Test checking/suggesting for unicode strings"""
# TODO: find something that actually returns suggestions
us1 = raw_unicode(r"he\u2149lo")
self.assertTrue(type(us1) is unicode)
self.assertFalse(self.dict.check(us1))
for s in self.dict.suggest(us1):
self.assertTrue(type(s) is unicode)
def test_session(self):
"""Test that adding words to the session works as required."""
self.assertFalse(self.dict.check("Lozz"))
self.assertFalse(self.dict.is_added("Lozz"))
self.dict.add_to_session("Lozz")
self.assertTrue(self.dict.is_added("Lozz"))
self.assertTrue(self.dict.check("Lozz"))
self.dict.remove_from_session("Lozz")
self.assertFalse(self.dict.check("Lozz"))
self.assertFalse(self.dict.is_added("Lozz"))
self.dict.remove_from_session("hello")
self.assertFalse(self.dict.check("hello"))
self.assertTrue(self.dict.is_removed("hello"))
self.dict.add_to_session("hello")
def test_AddRemove(self):
"""Test adding/removing from default user dictionary."""
nonsense = "kxhjsddsi"
self.assertFalse(self.dict.check(nonsense))
self.dict.add(nonsense)
self.assertTrue(self.dict.is_added(nonsense))
self.assertTrue(self.dict.check(nonsense))
self.dict.remove(nonsense)
self.assertFalse(self.dict.is_added(nonsense))
self.assertFalse(self.dict.check(nonsense))
self.dict.remove("pineapple")
self.assertFalse(self.dict.check("pineapple"))
self.assertTrue(self.dict.is_removed("pineapple"))
self.assertFalse(self.dict.is_added("pineapple"))
self.dict.add("pineapple")
self.assertTrue(self.dict.check("pineapple"))
def test_DefaultLang(self):
"""Test behaviour of default language selection."""
defLang = utils.get_default_language()
if defLang is None:
# If no default language, shouldnt work
self.assertRaises(Error, Dict)
else:
# If there is a default language, should use it
# Of course, no need for the dict to actually exist
try:
d = Dict()
self.assertEqual(d.tag, defLang)
except DictNotFoundError:
pass
def test_pickling(self):
"""Test that pickling doensn't corrupt internal state."""
d1 = Dict("en")
self.assertTrue(d1.check("hello"))
d2 = pickle.loads(pickle.dumps(d1))
self.assertTrue(d1.check("hello"))
self.assertTrue(d2.check("hello"))
d1._free()
self.assertTrue(d2.check("hello"))
class TestPWL(unittest.TestCase):
"""Test cases for the proper functioning of PWLs and DictWithPWL objects.
These tests assume that there is at least one working provider
with a dictionary for the "en_US" language.
"""
def setUp(self):
self._tempDir = self._mkdtemp()
self._fileName = "pwl.txt"
def tearDown(self):
import shutil
shutil.rmtree(self._tempDir)
def _mkdtemp(self):
import tempfile
return tempfile.mkdtemp()
def _path(self, nm=None):
if nm is None:
nm = self._fileName
nm = os.path.join(self._tempDir, nm)
if not os.path.exists(nm):
open(nm, 'w').close()
return nm
def setPWLContents(self, contents):
"""Set the contents of the PWL file."""
pwlFile = open(self._path(), "w")
for ln in contents:
pwlFile.write(ln)
pwlFile.write("\n")
pwlFile.flush()
pwlFile.close()
def getPWLContents(self):
"""Retrieve the contents of the PWL file."""
pwlFile = open(self._path(), "r")
contents = pwlFile.readlines()
pwlFile.close()
return [c.strip() for c in contents]
def test_check(self):
"""Test that basic checking works for PWLs."""
self.setPWLContents(["Sazz", "Lozz"])
d = request_pwl_dict(self._path())
self.assertTrue(d.check("Sazz"))
self.assertTrue(d.check("Lozz"))
self.assertFalse(d.check("hello"))
def test_UnicodeFN(self):
"""Test that unicode PWL filenames are accepted."""
d = request_pwl_dict(unicode(self._path()))
self.assertTrue(d)
def test_add(self):
"""Test that adding words to a PWL works correctly."""
d = request_pwl_dict(self._path())
self.assertFalse(d.check("Flagen"))
d.add("Esquilax")
d.add("Esquilam")
self.assertTrue(d.check("Esquilax"))
self.assertTrue("Esquilax" in self.getPWLContents())
self.assertTrue(d.is_added("Esquilax"))
def test_suggestions(self):
"""Test getting suggestions from a PWL."""
self.setPWLContents(["Sazz", "Lozz"])
d = request_pwl_dict(self._path())
self.assertTrue("Sazz" in d.suggest("Saz"))
self.assertTrue("Lozz" in d.suggest("laz"))
self.assertTrue("Sazz" in d.suggest("laz"))
d.add("Flagen")
self.assertTrue("Flagen" in d.suggest("Flags"))
self.assertFalse("sazz" in d.suggest("Flags"))
def test_DWPWL(self):
"""Test functionality of DictWithPWL."""
self.setPWLContents(["Sazz", "Lozz"])
d = DictWithPWL("en_US", self._path(), self._path("pel.txt"))
self.assertTrue(d.check("Sazz"))
self.assertTrue(d.check("Lozz"))
self.assertTrue(d.check("hello"))
self.assertFalse(d.check("helo"))
self.assertFalse(d.check("Flagen"))
d.add("Flagen")
self.assertTrue(d.check("Flagen"))
self.assertTrue("Flagen" in self.getPWLContents())
self.assertTrue("Flagen" in d.suggest("Flagn"))
self.assertTrue("hello" in d.suggest("helo"))
d.remove("hello")
self.assertFalse(d.check("hello"))
self.assertTrue("hello" not in d.suggest("helo"))
d.remove("Lozz")
self.assertFalse(d.check("Lozz"))
def test_DWPWL_empty(self):
"""Test functionality of DictWithPWL using transient dicts."""
d = DictWithPWL("en_US", None, None)
self.assertTrue(d.check("hello"))
self.assertFalse(d.check("helo"))
self.assertFalse(d.check("Flagen"))
d.add("Flagen")
self.assertTrue(d.check("Flagen"))
d.remove("hello")
self.assertFalse(d.check("hello"))
d.add("hello")
self.assertTrue(d.check("hello"))
def test_PyPWL(self):
"""Test our pure-python PWL implementation."""
d = PyPWL()
self.assertTrue(list(d._words) == [])
d.add("hello")
d.add("there")
d.add("duck")
ws = list(d._words)
self.assertTrue(len(ws) == 3)
self.assertTrue("hello" in ws)
self.assertTrue("there" in ws)
self.assertTrue("duck" in ws)
d.remove("duck")
d.remove("notinthere")
ws = list(d._words)
self.assertTrue(len(ws) == 2)
self.assertTrue("hello" in ws)
self.assertTrue("there" in ws)
def test_UnicodeCharsInPath(self):
"""Test that unicode chars in PWL paths are accepted."""
self._fileName = raw_unicode(r"test_\xe5\xe4\xf6_ing")
d = request_pwl_dict(self._path())
self.assertTrue(d)
class TestUtils(unittest.TestCase):
"""Test cases for various utility functions."""
def test_trim_suggestions(self):
word = "gud"
suggs = ["good", "god", "bad+"]
self.assertEquals(trim_suggestions(word, suggs, 40), ["god", "good", "bad+"])
self.assertEquals(trim_suggestions(word, suggs, 4), ["god", "good", "bad+"])
self.assertEquals(trim_suggestions(word, suggs, 3), ["god", "good", "bad+"])
self.assertEquals(trim_suggestions(word, suggs, 2), ["god", "good"])
self.assertEquals(trim_suggestions(word, suggs, 1), ["god"])
self.assertEquals(trim_suggestions(word, suggs, 0), [])
class TestDocStrings(unittest.TestCase):
"""Test the spelling on all docstrings we can find in this module.
This serves two purposes - to provide a lot of test data for the
checker routines, and to make sure we don't suffer the embarrassment
of having spelling errors in a spellchecking package!
"""
WORDS = ["spellchecking", "utf", "dict", "unicode", "bytestring", "bytestrings",
"str", "pyenchant", "ascii", "utils", "setup", "distutils", "pkg",
"filename", "tokenization", "tuple", "tuples", "tokenizer",
"tokenizers", "testcase", "testcases", "whitespace", "wxpython",
"spellchecker", "dialog", "urls", "wikiwords", "enchantobject",
"providerdesc", "spellcheck", "pwl", "aspell", "myspell",
"docstring", "docstrings", "stopiteration", "pwls", "pypwl",
"dictwithpwl", "skippable", "dicts", "dict's", "filenames",
"trie", "api", "ctypes", "wxspellcheckerdialog", "stateful",
"cmdlinechecker", "spellchecks", "callback", "clunkier", "iterator",
"ispell", "cor", "backends"]
def test_docstrings(self):
"""Test that all our docstrings are error-free."""
import enchant
import enchant.utils
import enchant.pypwl
import enchant.tokenize
import enchant.tokenize.en
import enchant.checker
import enchant.checker.CmdLineChecker
try:
import enchant.checker.GtkSpellCheckerDialog
except ImportError:
pass
try:
import enchant.checker.wxSpellCheckerDialog
except ImportError:
pass
errors = []
# Naive recursion here would blow the stack, instead we
# simulate it with our own stack
tocheck = [enchant]
checked = []
while tocheck:
obj = tocheck.pop()
checked.append(obj)
newobjs = list(self._check_docstrings(obj, errors))
tocheck.extend([obj for obj in newobjs if obj not in checked])
self.assertEqual(len(errors), 0)
def _check_docstrings(self, obj, errors):
import enchant
if hasattr(obj, "__doc__"):
skip_errors = [w for w in getattr(obj, "_DOC_ERRORS", [])]
chkr = enchant.checker.SpellChecker("en_AU", obj.__doc__, filters=[enchant.tokenize.URLFilter])
for err in chkr:
if len(err.word) == 1:
continue
if err.word.lower() in self.WORDS:
continue
if skip_errors and skip_errors[0] == err.word:
skip_errors.pop(0)
continue
errors.append((obj, err.word, err.wordpos))
msg = "\nDOCSTRING SPELLING ERROR: %s %s %d %s\n" % (obj, err.word, err.wordpos, chkr.suggest())
printf([msg], file=sys.stderr)
# Find and yield all child objects that should be checked
for name in dir(obj):
if name.startswith("__"):
continue
child = getattr(obj, name)
if hasattr(child, "__file__"):
if not hasattr(globals(), "__file__"):
continue
if not child.__file__.startswith(os.path.dirname(__file__)):
continue
else:
cmod = getattr(child, "__module__", None)
if not cmod:
cclass = getattr(child, "__class__", None)
cmod = getattr(cclass, "__module__", None)
if cmod and not cmod.startswith("enchant"):
continue
yield child
class TestInstallEnv(unittest.TestCase):
"""Run all testcases in a variety of install environments."""
def setUp(self):
self._tempDir = self._mkdtemp()
self._insDir = "build"
def tearDown(self):
import shutil
shutil.rmtree(self._tempDir)
def _mkdtemp(self):
import tempfile
return tempfile.mkdtemp()
def install(self):
import os, sys, shutil
insdir = os.path.join(self._tempDir, self._insDir)
os.makedirs(insdir)
shutil.copytree("enchant", os.path.join(insdir, "enchant"))
def runtests(self):
import os, sys
insdir = os.path.join(self._tempDir, self._insDir)
if str is not unicode and isinstance(insdir, unicode):
insdir = insdir.encode(sys.getfilesystemencoding())
os.environ["PYTHONPATH"] = insdir
script = os.path.join(insdir, "enchant", "__init__.py")
res = runcmd("\"%s\" %s" % (sys.executable, script,))
self.assertEqual(res, 0)
def test_basic(self):
"""Test proper functioning of TestInstallEnv suite."""
self.install()
self.runtests()
test_basic._DOC_ERRORS = ["TestInstallEnv"]
def test_UnicodeInstallPath(self):
"""Test installation in a path containing unicode chars."""
self._insDir = raw_unicode(r'test_\xe5\xe4\xf6_ing')
self.install()
self.runtests()
class TestPy2exe(unittest.TestCase):
"""Run all testcases inside a py2exe executable"""
_DOC_ERRORS = ["py", "exe"]
def setUp(self):
self._tempDir = self._mkdtemp()
def tearDown(self):
import shutil
shutil.rmtree(self._tempDir)
def test_py2exe(self):
"""Test pyenchant running inside a py2exe executable."""
import os, sys, shutil
from os import path
from os.path import dirname
try:
import py2exe
except ImportError:
return
os.environ["PYTHONPATH"] = dirname(dirname(__file__))
setup_py = path.join(dirname(__file__), "..", "tools", "setup.py2exe.py")
if not path.exists(setup_py):
return
buildCmd = '%s %s -q py2exe --dist-dir="%s"'
buildCmd = buildCmd % (sys.executable, setup_py, self._tempDir)
res = runcmd(buildCmd)
self.assertEqual(res, 0)
testCmd = self._tempDir + "\\test_pyenchant.exe"
self.assertTrue(os.path.exists(testCmd))
res = runcmd(testCmd)
self.assertEqual(res, 0)
test_py2exe._DOC_ERRORS = ["py", "exe"]
def _mkdtemp(self):
import tempfile
return tempfile.mkdtemp()
def buildtestsuite(recurse=True):
from enchant.checker.tests import TestChecker
from enchant.tokenize.tests import TestTokenization, TestFilters
from enchant.tokenize.tests import TestTokenizeEN
suite = unittest.TestSuite()
if recurse:
suite.addTest(unittest.makeSuite(TestInstallEnv))
suite.addTest(unittest.makeSuite(TestPy2exe))
suite.addTest(unittest.makeSuite(TestBroker))
suite.addTest(unittest.makeSuite(TestDict))
suite.addTest(unittest.makeSuite(TestPWL))
suite.addTest(unittest.makeSuite(TestUtils))
suite.addTest(unittest.makeSuite(TestDocStrings))
suite.addTest(unittest.makeSuite(TestChecker))
suite.addTest(unittest.makeSuite(TestTokenization))
suite.addTest(unittest.makeSuite(TestTokenizeEN))
suite.addTest(unittest.makeSuite(TestFilters))
return suite
def runtestsuite(recurse=False):
return unittest.TextTestRunner(verbosity=0).run(buildtestsuite(recurse=recurse))